1 /* 2 * Copyright (c) 2013 Nicira, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program; if not, write to the Free Software 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16 * 02110-1301, USA 17 */ 18 19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 20 21 #include <linux/capability.h> 22 #include <linux/module.h> 23 #include <linux/types.h> 24 #include <linux/kernel.h> 25 #include <linux/slab.h> 26 #include <linux/uaccess.h> 27 #include <linux/skbuff.h> 28 #include <linux/netdevice.h> 29 #include <linux/in.h> 30 #include <linux/tcp.h> 31 #include <linux/udp.h> 32 #include <linux/if_arp.h> 33 #include <linux/init.h> 34 #include <linux/in6.h> 35 #include <linux/inetdevice.h> 36 #include <linux/igmp.h> 37 #include <linux/netfilter_ipv4.h> 38 #include <linux/etherdevice.h> 39 #include <linux/if_ether.h> 40 #include <linux/if_vlan.h> 41 #include <linux/rculist.h> 42 #include <linux/err.h> 43 44 #include <net/sock.h> 45 #include <net/ip.h> 46 #include <net/icmp.h> 47 #include <net/protocol.h> 48 #include <net/ip_tunnels.h> 49 #include <net/arp.h> 50 #include <net/checksum.h> 51 #include <net/dsfield.h> 52 #include <net/inet_ecn.h> 53 #include <net/xfrm.h> 54 #include <net/net_namespace.h> 55 #include <net/netns/generic.h> 56 #include <net/rtnetlink.h> 57 #include <net/udp.h> 58 59 #if IS_ENABLED(CONFIG_IPV6) 60 #include <net/ipv6.h> 61 #include <net/ip6_fib.h> 62 #include <net/ip6_route.h> 63 #endif 64 65 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote) 66 { 67 return hash_32((__force u32)key ^ (__force u32)remote, 68 IP_TNL_HASH_BITS); 69 } 70 71 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p, 72 __be16 flags, __be32 key) 73 { 74 if (p->i_flags & TUNNEL_KEY) { 75 if (flags & TUNNEL_KEY) 76 return key == p->i_key; 77 else 78 /* key expected, none present */ 79 return false; 80 } else 81 return !(flags & TUNNEL_KEY); 82 } 83 84 /* Fallback tunnel: no source, no destination, no key, no options 85 86 Tunnel hash table: 87 We require exact key match i.e. if a key is present in packet 88 it will match only tunnel with the same key; if it is not present, 89 it will match only keyless tunnel. 90 91 All keysless packets, if not matched configured keyless tunnels 92 will match fallback tunnel. 93 Given src, dst and key, find appropriate for input tunnel. 94 */ 95 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, 96 int link, __be16 flags, 97 __be32 remote, __be32 local, 98 __be32 key) 99 { 100 unsigned int hash; 101 struct ip_tunnel *t, *cand = NULL; 102 struct hlist_head *head; 103 104 hash = ip_tunnel_hash(key, remote); 105 head = &itn->tunnels[hash]; 106 107 hlist_for_each_entry_rcu(t, head, hash_node) { 108 if (local != t->parms.iph.saddr || 109 remote != t->parms.iph.daddr || 110 !(t->dev->flags & IFF_UP)) 111 continue; 112 113 if (!ip_tunnel_key_match(&t->parms, flags, key)) 114 continue; 115 116 if (t->parms.link == link) 117 return t; 118 else 119 cand = t; 120 } 121 122 hlist_for_each_entry_rcu(t, head, hash_node) { 123 if (remote != t->parms.iph.daddr || 124 t->parms.iph.saddr != 0 || 125 !(t->dev->flags & IFF_UP)) 126 continue; 127 128 if (!ip_tunnel_key_match(&t->parms, flags, key)) 129 continue; 130 131 if (t->parms.link == link) 132 return t; 133 else if (!cand) 134 cand = t; 135 } 136 137 hash = ip_tunnel_hash(key, 0); 138 head = &itn->tunnels[hash]; 139 140 hlist_for_each_entry_rcu(t, head, hash_node) { 141 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) && 142 (local != t->parms.iph.daddr || !ipv4_is_multicast(local))) 143 continue; 144 145 if (!(t->dev->flags & IFF_UP)) 146 continue; 147 148 if (!ip_tunnel_key_match(&t->parms, flags, key)) 149 continue; 150 151 if (t->parms.link == link) 152 return t; 153 else if (!cand) 154 cand = t; 155 } 156 157 if (flags & TUNNEL_NO_KEY) 158 goto skip_key_lookup; 159 160 hlist_for_each_entry_rcu(t, head, hash_node) { 161 if (t->parms.i_key != key || 162 t->parms.iph.saddr != 0 || 163 t->parms.iph.daddr != 0 || 164 !(t->dev->flags & IFF_UP)) 165 continue; 166 167 if (t->parms.link == link) 168 return t; 169 else if (!cand) 170 cand = t; 171 } 172 173 skip_key_lookup: 174 if (cand) 175 return cand; 176 177 t = rcu_dereference(itn->collect_md_tun); 178 if (t) 179 return t; 180 181 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP) 182 return netdev_priv(itn->fb_tunnel_dev); 183 184 return NULL; 185 } 186 EXPORT_SYMBOL_GPL(ip_tunnel_lookup); 187 188 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn, 189 struct ip_tunnel_parm *parms) 190 { 191 unsigned int h; 192 __be32 remote; 193 __be32 i_key = parms->i_key; 194 195 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr)) 196 remote = parms->iph.daddr; 197 else 198 remote = 0; 199 200 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI)) 201 i_key = 0; 202 203 h = ip_tunnel_hash(i_key, remote); 204 return &itn->tunnels[h]; 205 } 206 207 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t) 208 { 209 struct hlist_head *head = ip_bucket(itn, &t->parms); 210 211 if (t->collect_md) 212 rcu_assign_pointer(itn->collect_md_tun, t); 213 hlist_add_head_rcu(&t->hash_node, head); 214 } 215 216 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t) 217 { 218 if (t->collect_md) 219 rcu_assign_pointer(itn->collect_md_tun, NULL); 220 hlist_del_init_rcu(&t->hash_node); 221 } 222 223 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, 224 struct ip_tunnel_parm *parms, 225 int type) 226 { 227 __be32 remote = parms->iph.daddr; 228 __be32 local = parms->iph.saddr; 229 __be32 key = parms->i_key; 230 __be16 flags = parms->i_flags; 231 int link = parms->link; 232 struct ip_tunnel *t = NULL; 233 struct hlist_head *head = ip_bucket(itn, parms); 234 235 hlist_for_each_entry_rcu(t, head, hash_node) { 236 if (local == t->parms.iph.saddr && 237 remote == t->parms.iph.daddr && 238 link == t->parms.link && 239 type == t->dev->type && 240 ip_tunnel_key_match(&t->parms, flags, key)) 241 break; 242 } 243 return t; 244 } 245 246 static struct net_device *__ip_tunnel_create(struct net *net, 247 const struct rtnl_link_ops *ops, 248 struct ip_tunnel_parm *parms) 249 { 250 int err; 251 struct ip_tunnel *tunnel; 252 struct net_device *dev; 253 char name[IFNAMSIZ]; 254 255 if (parms->name[0]) 256 strlcpy(name, parms->name, IFNAMSIZ); 257 else { 258 if (strlen(ops->kind) > (IFNAMSIZ - 3)) { 259 err = -E2BIG; 260 goto failed; 261 } 262 strlcpy(name, ops->kind, IFNAMSIZ); 263 strncat(name, "%d", 2); 264 } 265 266 ASSERT_RTNL(); 267 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup); 268 if (!dev) { 269 err = -ENOMEM; 270 goto failed; 271 } 272 dev_net_set(dev, net); 273 274 dev->rtnl_link_ops = ops; 275 276 tunnel = netdev_priv(dev); 277 tunnel->parms = *parms; 278 tunnel->net = net; 279 280 err = register_netdevice(dev); 281 if (err) 282 goto failed_free; 283 284 return dev; 285 286 failed_free: 287 free_netdev(dev); 288 failed: 289 return ERR_PTR(err); 290 } 291 292 static inline void init_tunnel_flow(struct flowi4 *fl4, 293 int proto, 294 __be32 daddr, __be32 saddr, 295 __be32 key, __u8 tos, int oif) 296 { 297 memset(fl4, 0, sizeof(*fl4)); 298 fl4->flowi4_oif = oif; 299 fl4->daddr = daddr; 300 fl4->saddr = saddr; 301 fl4->flowi4_tos = tos; 302 fl4->flowi4_proto = proto; 303 fl4->fl4_gre_key = key; 304 } 305 306 static int ip_tunnel_bind_dev(struct net_device *dev) 307 { 308 struct net_device *tdev = NULL; 309 struct ip_tunnel *tunnel = netdev_priv(dev); 310 const struct iphdr *iph; 311 int hlen = LL_MAX_HEADER; 312 int mtu = ETH_DATA_LEN; 313 int t_hlen = tunnel->hlen + sizeof(struct iphdr); 314 315 iph = &tunnel->parms.iph; 316 317 /* Guess output device to choose reasonable mtu and needed_headroom */ 318 if (iph->daddr) { 319 struct flowi4 fl4; 320 struct rtable *rt; 321 322 init_tunnel_flow(&fl4, iph->protocol, iph->daddr, 323 iph->saddr, tunnel->parms.o_key, 324 RT_TOS(iph->tos), tunnel->parms.link); 325 rt = ip_route_output_key(tunnel->net, &fl4); 326 327 if (!IS_ERR(rt)) { 328 tdev = rt->dst.dev; 329 dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, 330 fl4.saddr); 331 ip_rt_put(rt); 332 } 333 if (dev->type != ARPHRD_ETHER) 334 dev->flags |= IFF_POINTOPOINT; 335 } 336 337 if (!tdev && tunnel->parms.link) 338 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); 339 340 if (tdev) { 341 hlen = tdev->hard_header_len + tdev->needed_headroom; 342 mtu = tdev->mtu; 343 } 344 345 dev->needed_headroom = t_hlen + hlen; 346 mtu -= (dev->hard_header_len + t_hlen); 347 348 if (mtu < 68) 349 mtu = 68; 350 351 return mtu; 352 } 353 354 static struct ip_tunnel *ip_tunnel_create(struct net *net, 355 struct ip_tunnel_net *itn, 356 struct ip_tunnel_parm *parms) 357 { 358 struct ip_tunnel *nt; 359 struct net_device *dev; 360 361 BUG_ON(!itn->fb_tunnel_dev); 362 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms); 363 if (IS_ERR(dev)) 364 return ERR_CAST(dev); 365 366 dev->mtu = ip_tunnel_bind_dev(dev); 367 368 nt = netdev_priv(dev); 369 ip_tunnel_add(itn, nt); 370 return nt; 371 } 372 373 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, 374 const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, 375 bool log_ecn_error) 376 { 377 struct pcpu_sw_netstats *tstats; 378 const struct iphdr *iph = ip_hdr(skb); 379 int err; 380 381 #ifdef CONFIG_NET_IPGRE_BROADCAST 382 if (ipv4_is_multicast(iph->daddr)) { 383 tunnel->dev->stats.multicast++; 384 skb->pkt_type = PACKET_BROADCAST; 385 } 386 #endif 387 388 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) || 389 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) { 390 tunnel->dev->stats.rx_crc_errors++; 391 tunnel->dev->stats.rx_errors++; 392 goto drop; 393 } 394 395 if (tunnel->parms.i_flags&TUNNEL_SEQ) { 396 if (!(tpi->flags&TUNNEL_SEQ) || 397 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { 398 tunnel->dev->stats.rx_fifo_errors++; 399 tunnel->dev->stats.rx_errors++; 400 goto drop; 401 } 402 tunnel->i_seqno = ntohl(tpi->seq) + 1; 403 } 404 405 skb_reset_network_header(skb); 406 407 err = IP_ECN_decapsulate(iph, skb); 408 if (unlikely(err)) { 409 if (log_ecn_error) 410 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", 411 &iph->saddr, iph->tos); 412 if (err > 1) { 413 ++tunnel->dev->stats.rx_frame_errors; 414 ++tunnel->dev->stats.rx_errors; 415 goto drop; 416 } 417 } 418 419 tstats = this_cpu_ptr(tunnel->dev->tstats); 420 u64_stats_update_begin(&tstats->syncp); 421 tstats->rx_packets++; 422 tstats->rx_bytes += skb->len; 423 u64_stats_update_end(&tstats->syncp); 424 425 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev))); 426 427 if (tunnel->dev->type == ARPHRD_ETHER) { 428 skb->protocol = eth_type_trans(skb, tunnel->dev); 429 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 430 } else { 431 skb->dev = tunnel->dev; 432 } 433 434 if (tun_dst) 435 skb_dst_set(skb, (struct dst_entry *)tun_dst); 436 437 gro_cells_receive(&tunnel->gro_cells, skb); 438 return 0; 439 440 drop: 441 kfree_skb(skb); 442 return 0; 443 } 444 EXPORT_SYMBOL_GPL(ip_tunnel_rcv); 445 446 static int ip_encap_hlen(struct ip_tunnel_encap *e) 447 { 448 const struct ip_tunnel_encap_ops *ops; 449 int hlen = -EINVAL; 450 451 if (e->type == TUNNEL_ENCAP_NONE) 452 return 0; 453 454 if (e->type >= MAX_IPTUN_ENCAP_OPS) 455 return -EINVAL; 456 457 rcu_read_lock(); 458 ops = rcu_dereference(iptun_encaps[e->type]); 459 if (likely(ops && ops->encap_hlen)) 460 hlen = ops->encap_hlen(e); 461 rcu_read_unlock(); 462 463 return hlen; 464 } 465 466 const struct ip_tunnel_encap_ops __rcu * 467 iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly; 468 469 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops, 470 unsigned int num) 471 { 472 if (num >= MAX_IPTUN_ENCAP_OPS) 473 return -ERANGE; 474 475 return !cmpxchg((const struct ip_tunnel_encap_ops **) 476 &iptun_encaps[num], 477 NULL, ops) ? 0 : -1; 478 } 479 EXPORT_SYMBOL(ip_tunnel_encap_add_ops); 480 481 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops, 482 unsigned int num) 483 { 484 int ret; 485 486 if (num >= MAX_IPTUN_ENCAP_OPS) 487 return -ERANGE; 488 489 ret = (cmpxchg((const struct ip_tunnel_encap_ops **) 490 &iptun_encaps[num], 491 ops, NULL) == ops) ? 0 : -1; 492 493 synchronize_net(); 494 495 return ret; 496 } 497 EXPORT_SYMBOL(ip_tunnel_encap_del_ops); 498 499 int ip_tunnel_encap_setup(struct ip_tunnel *t, 500 struct ip_tunnel_encap *ipencap) 501 { 502 int hlen; 503 504 memset(&t->encap, 0, sizeof(t->encap)); 505 506 hlen = ip_encap_hlen(ipencap); 507 if (hlen < 0) 508 return hlen; 509 510 t->encap.type = ipencap->type; 511 t->encap.sport = ipencap->sport; 512 t->encap.dport = ipencap->dport; 513 t->encap.flags = ipencap->flags; 514 515 t->encap_hlen = hlen; 516 t->hlen = t->encap_hlen + t->tun_hlen; 517 518 return 0; 519 } 520 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup); 521 522 int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, 523 u8 *protocol, struct flowi4 *fl4) 524 { 525 const struct ip_tunnel_encap_ops *ops; 526 int ret = -EINVAL; 527 528 if (t->encap.type == TUNNEL_ENCAP_NONE) 529 return 0; 530 531 if (t->encap.type >= MAX_IPTUN_ENCAP_OPS) 532 return -EINVAL; 533 534 rcu_read_lock(); 535 ops = rcu_dereference(iptun_encaps[t->encap.type]); 536 if (likely(ops && ops->build_header)) 537 ret = ops->build_header(skb, &t->encap, protocol, fl4); 538 rcu_read_unlock(); 539 540 return ret; 541 } 542 EXPORT_SYMBOL(ip_tunnel_encap); 543 544 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, 545 struct rtable *rt, __be16 df, 546 const struct iphdr *inner_iph) 547 { 548 struct ip_tunnel *tunnel = netdev_priv(dev); 549 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len; 550 int mtu; 551 552 if (df) 553 mtu = dst_mtu(&rt->dst) - dev->hard_header_len 554 - sizeof(struct iphdr) - tunnel->hlen; 555 else 556 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; 557 558 if (skb_dst(skb)) 559 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); 560 561 if (skb->protocol == htons(ETH_P_IP)) { 562 if (!skb_is_gso(skb) && 563 (inner_iph->frag_off & htons(IP_DF)) && 564 mtu < pkt_size) { 565 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 566 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 567 return -E2BIG; 568 } 569 } 570 #if IS_ENABLED(CONFIG_IPV6) 571 else if (skb->protocol == htons(ETH_P_IPV6)) { 572 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); 573 574 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && 575 mtu >= IPV6_MIN_MTU) { 576 if ((tunnel->parms.iph.daddr && 577 !ipv4_is_multicast(tunnel->parms.iph.daddr)) || 578 rt6->rt6i_dst.plen == 128) { 579 rt6->rt6i_flags |= RTF_MODIFIED; 580 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); 581 } 582 } 583 584 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && 585 mtu < pkt_size) { 586 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 587 return -E2BIG; 588 } 589 } 590 #endif 591 return 0; 592 } 593 594 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, 595 const struct iphdr *tnl_params, u8 protocol) 596 { 597 struct ip_tunnel *tunnel = netdev_priv(dev); 598 const struct iphdr *inner_iph; 599 struct flowi4 fl4; 600 u8 tos, ttl; 601 __be16 df; 602 struct rtable *rt; /* Route to the other host */ 603 unsigned int max_headroom; /* The extra header space needed */ 604 __be32 dst; 605 bool connected; 606 607 inner_iph = (const struct iphdr *)skb_inner_network_header(skb); 608 connected = (tunnel->parms.iph.daddr != 0); 609 610 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 611 612 dst = tnl_params->daddr; 613 if (dst == 0) { 614 /* NBMA tunnel */ 615 616 if (!skb_dst(skb)) { 617 dev->stats.tx_fifo_errors++; 618 goto tx_error; 619 } 620 621 if (skb->protocol == htons(ETH_P_IP)) { 622 rt = skb_rtable(skb); 623 dst = rt_nexthop(rt, inner_iph->daddr); 624 } 625 #if IS_ENABLED(CONFIG_IPV6) 626 else if (skb->protocol == htons(ETH_P_IPV6)) { 627 const struct in6_addr *addr6; 628 struct neighbour *neigh; 629 bool do_tx_error_icmp; 630 int addr_type; 631 632 neigh = dst_neigh_lookup(skb_dst(skb), 633 &ipv6_hdr(skb)->daddr); 634 if (!neigh) 635 goto tx_error; 636 637 addr6 = (const struct in6_addr *)&neigh->primary_key; 638 addr_type = ipv6_addr_type(addr6); 639 640 if (addr_type == IPV6_ADDR_ANY) { 641 addr6 = &ipv6_hdr(skb)->daddr; 642 addr_type = ipv6_addr_type(addr6); 643 } 644 645 if ((addr_type & IPV6_ADDR_COMPATv4) == 0) 646 do_tx_error_icmp = true; 647 else { 648 do_tx_error_icmp = false; 649 dst = addr6->s6_addr32[3]; 650 } 651 neigh_release(neigh); 652 if (do_tx_error_icmp) 653 goto tx_error_icmp; 654 } 655 #endif 656 else 657 goto tx_error; 658 659 connected = false; 660 } 661 662 tos = tnl_params->tos; 663 if (tos & 0x1) { 664 tos &= ~0x1; 665 if (skb->protocol == htons(ETH_P_IP)) { 666 tos = inner_iph->tos; 667 connected = false; 668 } else if (skb->protocol == htons(ETH_P_IPV6)) { 669 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); 670 connected = false; 671 } 672 } 673 674 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, 675 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link); 676 677 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) 678 goto tx_error; 679 680 rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) : 681 NULL; 682 683 if (!rt) { 684 rt = ip_route_output_key(tunnel->net, &fl4); 685 686 if (IS_ERR(rt)) { 687 dev->stats.tx_carrier_errors++; 688 goto tx_error; 689 } 690 if (connected) 691 dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, 692 fl4.saddr); 693 } 694 695 if (rt->dst.dev == dev) { 696 ip_rt_put(rt); 697 dev->stats.collisions++; 698 goto tx_error; 699 } 700 701 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) { 702 ip_rt_put(rt); 703 goto tx_error; 704 } 705 706 if (tunnel->err_count > 0) { 707 if (time_before(jiffies, 708 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { 709 tunnel->err_count--; 710 711 dst_link_failure(skb); 712 } else 713 tunnel->err_count = 0; 714 } 715 716 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); 717 ttl = tnl_params->ttl; 718 if (ttl == 0) { 719 if (skb->protocol == htons(ETH_P_IP)) 720 ttl = inner_iph->ttl; 721 #if IS_ENABLED(CONFIG_IPV6) 722 else if (skb->protocol == htons(ETH_P_IPV6)) 723 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit; 724 #endif 725 else 726 ttl = ip4_dst_hoplimit(&rt->dst); 727 } 728 729 df = tnl_params->frag_off; 730 if (skb->protocol == htons(ETH_P_IP)) 731 df |= (inner_iph->frag_off&htons(IP_DF)); 732 733 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) 734 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap); 735 if (max_headroom > dev->needed_headroom) 736 dev->needed_headroom = max_headroom; 737 738 if (skb_cow_head(skb, dev->needed_headroom)) { 739 ip_rt_put(rt); 740 dev->stats.tx_dropped++; 741 kfree_skb(skb); 742 return; 743 } 744 745 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, 746 df, !net_eq(tunnel->net, dev_net(dev))); 747 return; 748 749 #if IS_ENABLED(CONFIG_IPV6) 750 tx_error_icmp: 751 dst_link_failure(skb); 752 #endif 753 tx_error: 754 dev->stats.tx_errors++; 755 kfree_skb(skb); 756 } 757 EXPORT_SYMBOL_GPL(ip_tunnel_xmit); 758 759 static void ip_tunnel_update(struct ip_tunnel_net *itn, 760 struct ip_tunnel *t, 761 struct net_device *dev, 762 struct ip_tunnel_parm *p, 763 bool set_mtu) 764 { 765 ip_tunnel_del(itn, t); 766 t->parms.iph.saddr = p->iph.saddr; 767 t->parms.iph.daddr = p->iph.daddr; 768 t->parms.i_key = p->i_key; 769 t->parms.o_key = p->o_key; 770 if (dev->type != ARPHRD_ETHER) { 771 memcpy(dev->dev_addr, &p->iph.saddr, 4); 772 memcpy(dev->broadcast, &p->iph.daddr, 4); 773 } 774 ip_tunnel_add(itn, t); 775 776 t->parms.iph.ttl = p->iph.ttl; 777 t->parms.iph.tos = p->iph.tos; 778 t->parms.iph.frag_off = p->iph.frag_off; 779 780 if (t->parms.link != p->link) { 781 int mtu; 782 783 t->parms.link = p->link; 784 mtu = ip_tunnel_bind_dev(dev); 785 if (set_mtu) 786 dev->mtu = mtu; 787 } 788 dst_cache_reset(&t->dst_cache); 789 netdev_state_change(dev); 790 } 791 792 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) 793 { 794 int err = 0; 795 struct ip_tunnel *t = netdev_priv(dev); 796 struct net *net = t->net; 797 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id); 798 799 BUG_ON(!itn->fb_tunnel_dev); 800 switch (cmd) { 801 case SIOCGETTUNNEL: 802 if (dev == itn->fb_tunnel_dev) { 803 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 804 if (!t) 805 t = netdev_priv(dev); 806 } 807 memcpy(p, &t->parms, sizeof(*p)); 808 break; 809 810 case SIOCADDTUNNEL: 811 case SIOCCHGTUNNEL: 812 err = -EPERM; 813 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 814 goto done; 815 if (p->iph.ttl) 816 p->iph.frag_off |= htons(IP_DF); 817 if (!(p->i_flags & VTI_ISVTI)) { 818 if (!(p->i_flags & TUNNEL_KEY)) 819 p->i_key = 0; 820 if (!(p->o_flags & TUNNEL_KEY)) 821 p->o_key = 0; 822 } 823 824 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 825 826 if (cmd == SIOCADDTUNNEL) { 827 if (!t) { 828 t = ip_tunnel_create(net, itn, p); 829 err = PTR_ERR_OR_ZERO(t); 830 break; 831 } 832 833 err = -EEXIST; 834 break; 835 } 836 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { 837 if (t) { 838 if (t->dev != dev) { 839 err = -EEXIST; 840 break; 841 } 842 } else { 843 unsigned int nflags = 0; 844 845 if (ipv4_is_multicast(p->iph.daddr)) 846 nflags = IFF_BROADCAST; 847 else if (p->iph.daddr) 848 nflags = IFF_POINTOPOINT; 849 850 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { 851 err = -EINVAL; 852 break; 853 } 854 855 t = netdev_priv(dev); 856 } 857 } 858 859 if (t) { 860 err = 0; 861 ip_tunnel_update(itn, t, dev, p, true); 862 } else { 863 err = -ENOENT; 864 } 865 break; 866 867 case SIOCDELTUNNEL: 868 err = -EPERM; 869 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 870 goto done; 871 872 if (dev == itn->fb_tunnel_dev) { 873 err = -ENOENT; 874 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 875 if (!t) 876 goto done; 877 err = -EPERM; 878 if (t == netdev_priv(itn->fb_tunnel_dev)) 879 goto done; 880 dev = t->dev; 881 } 882 unregister_netdevice(dev); 883 err = 0; 884 break; 885 886 default: 887 err = -EINVAL; 888 } 889 890 done: 891 return err; 892 } 893 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); 894 895 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict) 896 { 897 struct ip_tunnel *tunnel = netdev_priv(dev); 898 int t_hlen = tunnel->hlen + sizeof(struct iphdr); 899 int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen; 900 901 if (new_mtu < 68) 902 return -EINVAL; 903 904 if (new_mtu > max_mtu) { 905 if (strict) 906 return -EINVAL; 907 908 new_mtu = max_mtu; 909 } 910 911 dev->mtu = new_mtu; 912 return 0; 913 } 914 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu); 915 916 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) 917 { 918 return __ip_tunnel_change_mtu(dev, new_mtu, true); 919 } 920 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu); 921 922 static void ip_tunnel_dev_free(struct net_device *dev) 923 { 924 struct ip_tunnel *tunnel = netdev_priv(dev); 925 926 gro_cells_destroy(&tunnel->gro_cells); 927 dst_cache_destroy(&tunnel->dst_cache); 928 free_percpu(dev->tstats); 929 free_netdev(dev); 930 } 931 932 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) 933 { 934 struct ip_tunnel *tunnel = netdev_priv(dev); 935 struct ip_tunnel_net *itn; 936 937 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id); 938 939 if (itn->fb_tunnel_dev != dev) { 940 ip_tunnel_del(itn, netdev_priv(dev)); 941 unregister_netdevice_queue(dev, head); 942 } 943 } 944 EXPORT_SYMBOL_GPL(ip_tunnel_dellink); 945 946 struct net *ip_tunnel_get_link_net(const struct net_device *dev) 947 { 948 struct ip_tunnel *tunnel = netdev_priv(dev); 949 950 return tunnel->net; 951 } 952 EXPORT_SYMBOL(ip_tunnel_get_link_net); 953 954 int ip_tunnel_get_iflink(const struct net_device *dev) 955 { 956 struct ip_tunnel *tunnel = netdev_priv(dev); 957 958 return tunnel->parms.link; 959 } 960 EXPORT_SYMBOL(ip_tunnel_get_iflink); 961 962 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, 963 struct rtnl_link_ops *ops, char *devname) 964 { 965 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id); 966 struct ip_tunnel_parm parms; 967 unsigned int i; 968 969 for (i = 0; i < IP_TNL_HASH_SIZE; i++) 970 INIT_HLIST_HEAD(&itn->tunnels[i]); 971 972 if (!ops) { 973 itn->fb_tunnel_dev = NULL; 974 return 0; 975 } 976 977 memset(&parms, 0, sizeof(parms)); 978 if (devname) 979 strlcpy(parms.name, devname, IFNAMSIZ); 980 981 rtnl_lock(); 982 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms); 983 /* FB netdevice is special: we have one, and only one per netns. 984 * Allowing to move it to another netns is clearly unsafe. 985 */ 986 if (!IS_ERR(itn->fb_tunnel_dev)) { 987 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; 988 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev); 989 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev)); 990 } 991 rtnl_unlock(); 992 993 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev); 994 } 995 EXPORT_SYMBOL_GPL(ip_tunnel_init_net); 996 997 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head, 998 struct rtnl_link_ops *ops) 999 { 1000 struct net *net = dev_net(itn->fb_tunnel_dev); 1001 struct net_device *dev, *aux; 1002 int h; 1003 1004 for_each_netdev_safe(net, dev, aux) 1005 if (dev->rtnl_link_ops == ops) 1006 unregister_netdevice_queue(dev, head); 1007 1008 for (h = 0; h < IP_TNL_HASH_SIZE; h++) { 1009 struct ip_tunnel *t; 1010 struct hlist_node *n; 1011 struct hlist_head *thead = &itn->tunnels[h]; 1012 1013 hlist_for_each_entry_safe(t, n, thead, hash_node) 1014 /* If dev is in the same netns, it has already 1015 * been added to the list by the previous loop. 1016 */ 1017 if (!net_eq(dev_net(t->dev), net)) 1018 unregister_netdevice_queue(t->dev, head); 1019 } 1020 } 1021 1022 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops) 1023 { 1024 LIST_HEAD(list); 1025 1026 rtnl_lock(); 1027 ip_tunnel_destroy(itn, &list, ops); 1028 unregister_netdevice_many(&list); 1029 rtnl_unlock(); 1030 } 1031 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net); 1032 1033 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], 1034 struct ip_tunnel_parm *p) 1035 { 1036 struct ip_tunnel *nt; 1037 struct net *net = dev_net(dev); 1038 struct ip_tunnel_net *itn; 1039 int mtu; 1040 int err; 1041 1042 nt = netdev_priv(dev); 1043 itn = net_generic(net, nt->ip_tnl_net_id); 1044 1045 if (nt->collect_md) { 1046 if (rtnl_dereference(itn->collect_md_tun)) 1047 return -EEXIST; 1048 } else { 1049 if (ip_tunnel_find(itn, p, dev->type)) 1050 return -EEXIST; 1051 } 1052 1053 nt->net = net; 1054 nt->parms = *p; 1055 err = register_netdevice(dev); 1056 if (err) 1057 goto out; 1058 1059 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) 1060 eth_hw_addr_random(dev); 1061 1062 mtu = ip_tunnel_bind_dev(dev); 1063 if (!tb[IFLA_MTU]) 1064 dev->mtu = mtu; 1065 1066 ip_tunnel_add(itn, nt); 1067 out: 1068 return err; 1069 } 1070 EXPORT_SYMBOL_GPL(ip_tunnel_newlink); 1071 1072 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], 1073 struct ip_tunnel_parm *p) 1074 { 1075 struct ip_tunnel *t; 1076 struct ip_tunnel *tunnel = netdev_priv(dev); 1077 struct net *net = tunnel->net; 1078 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); 1079 1080 if (dev == itn->fb_tunnel_dev) 1081 return -EINVAL; 1082 1083 t = ip_tunnel_find(itn, p, dev->type); 1084 1085 if (t) { 1086 if (t->dev != dev) 1087 return -EEXIST; 1088 } else { 1089 t = tunnel; 1090 1091 if (dev->type != ARPHRD_ETHER) { 1092 unsigned int nflags = 0; 1093 1094 if (ipv4_is_multicast(p->iph.daddr)) 1095 nflags = IFF_BROADCAST; 1096 else if (p->iph.daddr) 1097 nflags = IFF_POINTOPOINT; 1098 1099 if ((dev->flags ^ nflags) & 1100 (IFF_POINTOPOINT | IFF_BROADCAST)) 1101 return -EINVAL; 1102 } 1103 } 1104 1105 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]); 1106 return 0; 1107 } 1108 EXPORT_SYMBOL_GPL(ip_tunnel_changelink); 1109 1110 int ip_tunnel_init(struct net_device *dev) 1111 { 1112 struct ip_tunnel *tunnel = netdev_priv(dev); 1113 struct iphdr *iph = &tunnel->parms.iph; 1114 int err; 1115 1116 dev->destructor = ip_tunnel_dev_free; 1117 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); 1118 if (!dev->tstats) 1119 return -ENOMEM; 1120 1121 err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL); 1122 if (err) { 1123 free_percpu(dev->tstats); 1124 return err; 1125 } 1126 1127 err = gro_cells_init(&tunnel->gro_cells, dev); 1128 if (err) { 1129 dst_cache_destroy(&tunnel->dst_cache); 1130 free_percpu(dev->tstats); 1131 return err; 1132 } 1133 1134 tunnel->dev = dev; 1135 tunnel->net = dev_net(dev); 1136 strcpy(tunnel->parms.name, dev->name); 1137 iph->version = 4; 1138 iph->ihl = 5; 1139 1140 if (tunnel->collect_md) { 1141 dev->features |= NETIF_F_NETNS_LOCAL; 1142 netif_keep_dst(dev); 1143 } 1144 return 0; 1145 } 1146 EXPORT_SYMBOL_GPL(ip_tunnel_init); 1147 1148 void ip_tunnel_uninit(struct net_device *dev) 1149 { 1150 struct ip_tunnel *tunnel = netdev_priv(dev); 1151 struct net *net = tunnel->net; 1152 struct ip_tunnel_net *itn; 1153 1154 itn = net_generic(net, tunnel->ip_tnl_net_id); 1155 /* fb_tunnel_dev will be unregisted in net-exit call. */ 1156 if (itn->fb_tunnel_dev != dev) 1157 ip_tunnel_del(itn, netdev_priv(dev)); 1158 1159 dst_cache_reset(&tunnel->dst_cache); 1160 } 1161 EXPORT_SYMBOL_GPL(ip_tunnel_uninit); 1162 1163 /* Do least required initialization, rest of init is done in tunnel_init call */ 1164 void ip_tunnel_setup(struct net_device *dev, int net_id) 1165 { 1166 struct ip_tunnel *tunnel = netdev_priv(dev); 1167 tunnel->ip_tnl_net_id = net_id; 1168 } 1169 EXPORT_SYMBOL_GPL(ip_tunnel_setup); 1170 1171 MODULE_LICENSE("GPL"); 1172