1 /* 2 * Copyright (c) 2013 Nicira, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program; if not, write to the Free Software 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16 * 02110-1301, USA 17 */ 18 19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 20 21 #include <linux/capability.h> 22 #include <linux/module.h> 23 #include <linux/types.h> 24 #include <linux/kernel.h> 25 #include <linux/slab.h> 26 #include <linux/uaccess.h> 27 #include <linux/skbuff.h> 28 #include <linux/netdevice.h> 29 #include <linux/in.h> 30 #include <linux/tcp.h> 31 #include <linux/udp.h> 32 #include <linux/if_arp.h> 33 #include <linux/mroute.h> 34 #include <linux/init.h> 35 #include <linux/in6.h> 36 #include <linux/inetdevice.h> 37 #include <linux/igmp.h> 38 #include <linux/netfilter_ipv4.h> 39 #include <linux/etherdevice.h> 40 #include <linux/if_ether.h> 41 #include <linux/if_vlan.h> 42 #include <linux/rculist.h> 43 #include <linux/err.h> 44 45 #include <net/sock.h> 46 #include <net/ip.h> 47 #include <net/icmp.h> 48 #include <net/protocol.h> 49 #include <net/ip_tunnels.h> 50 #include <net/arp.h> 51 #include <net/checksum.h> 52 #include <net/dsfield.h> 53 #include <net/inet_ecn.h> 54 #include <net/xfrm.h> 55 #include <net/net_namespace.h> 56 #include <net/netns/generic.h> 57 #include <net/rtnetlink.h> 58 #include <net/udp.h> 59 60 #if IS_ENABLED(CONFIG_IPV6) 61 #include <net/ipv6.h> 62 #include <net/ip6_fib.h> 63 #include <net/ip6_route.h> 64 #endif 65 66 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote) 67 { 68 return hash_32((__force u32)key ^ (__force u32)remote, 69 IP_TNL_HASH_BITS); 70 } 71 72 static void __tunnel_dst_set(struct ip_tunnel_dst *idst, 73 struct dst_entry *dst, __be32 saddr) 74 { 75 struct dst_entry *old_dst; 76 77 dst_clone(dst); 78 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst); 79 dst_release(old_dst); 80 idst->saddr = saddr; 81 } 82 83 static noinline void tunnel_dst_set(struct ip_tunnel *t, 84 struct dst_entry *dst, __be32 saddr) 85 { 86 __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr); 87 } 88 89 static void tunnel_dst_reset(struct ip_tunnel *t) 90 { 91 tunnel_dst_set(t, NULL, 0); 92 } 93 94 void ip_tunnel_dst_reset_all(struct ip_tunnel *t) 95 { 96 int i; 97 98 for_each_possible_cpu(i) 99 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0); 100 } 101 EXPORT_SYMBOL(ip_tunnel_dst_reset_all); 102 103 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, 104 u32 cookie, __be32 *saddr) 105 { 106 struct ip_tunnel_dst *idst; 107 struct dst_entry *dst; 108 109 rcu_read_lock(); 110 idst = raw_cpu_ptr(t->dst_cache); 111 dst = rcu_dereference(idst->dst); 112 if (dst && !atomic_inc_not_zero(&dst->__refcnt)) 113 dst = NULL; 114 if (dst) { 115 if (!dst->obsolete || dst->ops->check(dst, cookie)) { 116 *saddr = idst->saddr; 117 } else { 118 tunnel_dst_reset(t); 119 dst_release(dst); 120 dst = NULL; 121 } 122 } 123 rcu_read_unlock(); 124 return (struct rtable *)dst; 125 } 126 127 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p, 128 __be16 flags, __be32 key) 129 { 130 if (p->i_flags & TUNNEL_KEY) { 131 if (flags & TUNNEL_KEY) 132 return key == p->i_key; 133 else 134 /* key expected, none present */ 135 return false; 136 } else 137 return !(flags & TUNNEL_KEY); 138 } 139 140 /* Fallback tunnel: no source, no destination, no key, no options 141 142 Tunnel hash table: 143 We require exact key match i.e. if a key is present in packet 144 it will match only tunnel with the same key; if it is not present, 145 it will match only keyless tunnel. 146 147 All keysless packets, if not matched configured keyless tunnels 148 will match fallback tunnel. 149 Given src, dst and key, find appropriate for input tunnel. 150 */ 151 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, 152 int link, __be16 flags, 153 __be32 remote, __be32 local, 154 __be32 key) 155 { 156 unsigned int hash; 157 struct ip_tunnel *t, *cand = NULL; 158 struct hlist_head *head; 159 160 hash = ip_tunnel_hash(key, remote); 161 head = &itn->tunnels[hash]; 162 163 hlist_for_each_entry_rcu(t, head, hash_node) { 164 if (local != t->parms.iph.saddr || 165 remote != t->parms.iph.daddr || 166 !(t->dev->flags & IFF_UP)) 167 continue; 168 169 if (!ip_tunnel_key_match(&t->parms, flags, key)) 170 continue; 171 172 if (t->parms.link == link) 173 return t; 174 else 175 cand = t; 176 } 177 178 hlist_for_each_entry_rcu(t, head, hash_node) { 179 if (remote != t->parms.iph.daddr || 180 t->parms.iph.saddr != 0 || 181 !(t->dev->flags & IFF_UP)) 182 continue; 183 184 if (!ip_tunnel_key_match(&t->parms, flags, key)) 185 continue; 186 187 if (t->parms.link == link) 188 return t; 189 else if (!cand) 190 cand = t; 191 } 192 193 hash = ip_tunnel_hash(key, 0); 194 head = &itn->tunnels[hash]; 195 196 hlist_for_each_entry_rcu(t, head, hash_node) { 197 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) && 198 (local != t->parms.iph.daddr || !ipv4_is_multicast(local))) 199 continue; 200 201 if (!(t->dev->flags & IFF_UP)) 202 continue; 203 204 if (!ip_tunnel_key_match(&t->parms, flags, key)) 205 continue; 206 207 if (t->parms.link == link) 208 return t; 209 else if (!cand) 210 cand = t; 211 } 212 213 if (flags & TUNNEL_NO_KEY) 214 goto skip_key_lookup; 215 216 hlist_for_each_entry_rcu(t, head, hash_node) { 217 if (t->parms.i_key != key || 218 t->parms.iph.saddr != 0 || 219 t->parms.iph.daddr != 0 || 220 !(t->dev->flags & IFF_UP)) 221 continue; 222 223 if (t->parms.link == link) 224 return t; 225 else if (!cand) 226 cand = t; 227 } 228 229 skip_key_lookup: 230 if (cand) 231 return cand; 232 233 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP) 234 return netdev_priv(itn->fb_tunnel_dev); 235 236 237 return NULL; 238 } 239 EXPORT_SYMBOL_GPL(ip_tunnel_lookup); 240 241 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn, 242 struct ip_tunnel_parm *parms) 243 { 244 unsigned int h; 245 __be32 remote; 246 __be32 i_key = parms->i_key; 247 248 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr)) 249 remote = parms->iph.daddr; 250 else 251 remote = 0; 252 253 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI)) 254 i_key = 0; 255 256 h = ip_tunnel_hash(i_key, remote); 257 return &itn->tunnels[h]; 258 } 259 260 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t) 261 { 262 struct hlist_head *head = ip_bucket(itn, &t->parms); 263 264 hlist_add_head_rcu(&t->hash_node, head); 265 } 266 267 static void ip_tunnel_del(struct ip_tunnel *t) 268 { 269 hlist_del_init_rcu(&t->hash_node); 270 } 271 272 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, 273 struct ip_tunnel_parm *parms, 274 int type) 275 { 276 __be32 remote = parms->iph.daddr; 277 __be32 local = parms->iph.saddr; 278 __be32 key = parms->i_key; 279 __be16 flags = parms->i_flags; 280 int link = parms->link; 281 struct ip_tunnel *t = NULL; 282 struct hlist_head *head = ip_bucket(itn, parms); 283 284 hlist_for_each_entry_rcu(t, head, hash_node) { 285 if (local == t->parms.iph.saddr && 286 remote == t->parms.iph.daddr && 287 link == t->parms.link && 288 type == t->dev->type && 289 ip_tunnel_key_match(&t->parms, flags, key)) 290 break; 291 } 292 return t; 293 } 294 295 static struct net_device *__ip_tunnel_create(struct net *net, 296 const struct rtnl_link_ops *ops, 297 struct ip_tunnel_parm *parms) 298 { 299 int err; 300 struct ip_tunnel *tunnel; 301 struct net_device *dev; 302 char name[IFNAMSIZ]; 303 304 if (parms->name[0]) 305 strlcpy(name, parms->name, IFNAMSIZ); 306 else { 307 if (strlen(ops->kind) > (IFNAMSIZ - 3)) { 308 err = -E2BIG; 309 goto failed; 310 } 311 strlcpy(name, ops->kind, IFNAMSIZ); 312 strncat(name, "%d", 2); 313 } 314 315 ASSERT_RTNL(); 316 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup); 317 if (!dev) { 318 err = -ENOMEM; 319 goto failed; 320 } 321 dev_net_set(dev, net); 322 323 dev->rtnl_link_ops = ops; 324 325 tunnel = netdev_priv(dev); 326 tunnel->parms = *parms; 327 tunnel->net = net; 328 329 err = register_netdevice(dev); 330 if (err) 331 goto failed_free; 332 333 return dev; 334 335 failed_free: 336 free_netdev(dev); 337 failed: 338 return ERR_PTR(err); 339 } 340 341 static inline void init_tunnel_flow(struct flowi4 *fl4, 342 int proto, 343 __be32 daddr, __be32 saddr, 344 __be32 key, __u8 tos, int oif) 345 { 346 memset(fl4, 0, sizeof(*fl4)); 347 fl4->flowi4_oif = oif; 348 fl4->daddr = daddr; 349 fl4->saddr = saddr; 350 fl4->flowi4_tos = tos; 351 fl4->flowi4_proto = proto; 352 fl4->fl4_gre_key = key; 353 } 354 355 static int ip_tunnel_bind_dev(struct net_device *dev) 356 { 357 struct net_device *tdev = NULL; 358 struct ip_tunnel *tunnel = netdev_priv(dev); 359 const struct iphdr *iph; 360 int hlen = LL_MAX_HEADER; 361 int mtu = ETH_DATA_LEN; 362 int t_hlen = tunnel->hlen + sizeof(struct iphdr); 363 364 iph = &tunnel->parms.iph; 365 366 /* Guess output device to choose reasonable mtu and needed_headroom */ 367 if (iph->daddr) { 368 struct flowi4 fl4; 369 struct rtable *rt; 370 371 init_tunnel_flow(&fl4, iph->protocol, iph->daddr, 372 iph->saddr, tunnel->parms.o_key, 373 RT_TOS(iph->tos), tunnel->parms.link); 374 rt = ip_route_output_key(tunnel->net, &fl4); 375 376 if (!IS_ERR(rt)) { 377 tdev = rt->dst.dev; 378 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr); 379 ip_rt_put(rt); 380 } 381 if (dev->type != ARPHRD_ETHER) 382 dev->flags |= IFF_POINTOPOINT; 383 } 384 385 if (!tdev && tunnel->parms.link) 386 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); 387 388 if (tdev) { 389 hlen = tdev->hard_header_len + tdev->needed_headroom; 390 mtu = tdev->mtu; 391 } 392 dev->iflink = tunnel->parms.link; 393 394 dev->needed_headroom = t_hlen + hlen; 395 mtu -= (dev->hard_header_len + t_hlen); 396 397 if (mtu < 68) 398 mtu = 68; 399 400 return mtu; 401 } 402 403 static struct ip_tunnel *ip_tunnel_create(struct net *net, 404 struct ip_tunnel_net *itn, 405 struct ip_tunnel_parm *parms) 406 { 407 struct ip_tunnel *nt; 408 struct net_device *dev; 409 410 BUG_ON(!itn->fb_tunnel_dev); 411 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms); 412 if (IS_ERR(dev)) 413 return ERR_CAST(dev); 414 415 dev->mtu = ip_tunnel_bind_dev(dev); 416 417 nt = netdev_priv(dev); 418 ip_tunnel_add(itn, nt); 419 return nt; 420 } 421 422 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, 423 const struct tnl_ptk_info *tpi, bool log_ecn_error) 424 { 425 struct pcpu_sw_netstats *tstats; 426 const struct iphdr *iph = ip_hdr(skb); 427 int err; 428 429 #ifdef CONFIG_NET_IPGRE_BROADCAST 430 if (ipv4_is_multicast(iph->daddr)) { 431 tunnel->dev->stats.multicast++; 432 skb->pkt_type = PACKET_BROADCAST; 433 } 434 #endif 435 436 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) || 437 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) { 438 tunnel->dev->stats.rx_crc_errors++; 439 tunnel->dev->stats.rx_errors++; 440 goto drop; 441 } 442 443 if (tunnel->parms.i_flags&TUNNEL_SEQ) { 444 if (!(tpi->flags&TUNNEL_SEQ) || 445 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { 446 tunnel->dev->stats.rx_fifo_errors++; 447 tunnel->dev->stats.rx_errors++; 448 goto drop; 449 } 450 tunnel->i_seqno = ntohl(tpi->seq) + 1; 451 } 452 453 skb_reset_network_header(skb); 454 455 err = IP_ECN_decapsulate(iph, skb); 456 if (unlikely(err)) { 457 if (log_ecn_error) 458 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", 459 &iph->saddr, iph->tos); 460 if (err > 1) { 461 ++tunnel->dev->stats.rx_frame_errors; 462 ++tunnel->dev->stats.rx_errors; 463 goto drop; 464 } 465 } 466 467 tstats = this_cpu_ptr(tunnel->dev->tstats); 468 u64_stats_update_begin(&tstats->syncp); 469 tstats->rx_packets++; 470 tstats->rx_bytes += skb->len; 471 u64_stats_update_end(&tstats->syncp); 472 473 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev))); 474 475 if (tunnel->dev->type == ARPHRD_ETHER) { 476 skb->protocol = eth_type_trans(skb, tunnel->dev); 477 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 478 } else { 479 skb->dev = tunnel->dev; 480 } 481 482 gro_cells_receive(&tunnel->gro_cells, skb); 483 return 0; 484 485 drop: 486 kfree_skb(skb); 487 return 0; 488 } 489 EXPORT_SYMBOL_GPL(ip_tunnel_rcv); 490 491 static int ip_encap_hlen(struct ip_tunnel_encap *e) 492 { 493 const struct ip_tunnel_encap_ops *ops; 494 int hlen = -EINVAL; 495 496 if (e->type == TUNNEL_ENCAP_NONE) 497 return 0; 498 499 if (e->type >= MAX_IPTUN_ENCAP_OPS) 500 return -EINVAL; 501 502 rcu_read_lock(); 503 ops = rcu_dereference(iptun_encaps[e->type]); 504 if (likely(ops && ops->encap_hlen)) 505 hlen = ops->encap_hlen(e); 506 rcu_read_unlock(); 507 508 return hlen; 509 } 510 511 const struct ip_tunnel_encap_ops __rcu * 512 iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly; 513 514 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops, 515 unsigned int num) 516 { 517 return !cmpxchg((const struct ip_tunnel_encap_ops **) 518 &iptun_encaps[num], 519 NULL, ops) ? 0 : -1; 520 } 521 EXPORT_SYMBOL(ip_tunnel_encap_add_ops); 522 523 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops, 524 unsigned int num) 525 { 526 int ret; 527 528 ret = (cmpxchg((const struct ip_tunnel_encap_ops **) 529 &iptun_encaps[num], 530 ops, NULL) == ops) ? 0 : -1; 531 532 synchronize_net(); 533 534 return ret; 535 } 536 EXPORT_SYMBOL(ip_tunnel_encap_del_ops); 537 538 int ip_tunnel_encap_setup(struct ip_tunnel *t, 539 struct ip_tunnel_encap *ipencap) 540 { 541 int hlen; 542 543 memset(&t->encap, 0, sizeof(t->encap)); 544 545 hlen = ip_encap_hlen(ipencap); 546 if (hlen < 0) 547 return hlen; 548 549 t->encap.type = ipencap->type; 550 t->encap.sport = ipencap->sport; 551 t->encap.dport = ipencap->dport; 552 t->encap.flags = ipencap->flags; 553 554 t->encap_hlen = hlen; 555 t->hlen = t->encap_hlen + t->tun_hlen; 556 557 return 0; 558 } 559 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup); 560 561 int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, 562 u8 *protocol, struct flowi4 *fl4) 563 { 564 const struct ip_tunnel_encap_ops *ops; 565 int ret = -EINVAL; 566 567 if (t->encap.type == TUNNEL_ENCAP_NONE) 568 return 0; 569 570 rcu_read_lock(); 571 ops = rcu_dereference(iptun_encaps[t->encap.type]); 572 if (likely(ops && ops->build_header)) 573 ret = ops->build_header(skb, &t->encap, protocol, fl4); 574 rcu_read_unlock(); 575 576 return ret; 577 } 578 EXPORT_SYMBOL(ip_tunnel_encap); 579 580 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, 581 struct rtable *rt, __be16 df) 582 { 583 struct ip_tunnel *tunnel = netdev_priv(dev); 584 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len; 585 int mtu; 586 587 if (df) 588 mtu = dst_mtu(&rt->dst) - dev->hard_header_len 589 - sizeof(struct iphdr) - tunnel->hlen; 590 else 591 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; 592 593 if (skb_dst(skb)) 594 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); 595 596 if (skb->protocol == htons(ETH_P_IP)) { 597 if (!skb_is_gso(skb) && 598 (df & htons(IP_DF)) && mtu < pkt_size) { 599 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 600 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 601 return -E2BIG; 602 } 603 } 604 #if IS_ENABLED(CONFIG_IPV6) 605 else if (skb->protocol == htons(ETH_P_IPV6)) { 606 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); 607 608 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && 609 mtu >= IPV6_MIN_MTU) { 610 if ((tunnel->parms.iph.daddr && 611 !ipv4_is_multicast(tunnel->parms.iph.daddr)) || 612 rt6->rt6i_dst.plen == 128) { 613 rt6->rt6i_flags |= RTF_MODIFIED; 614 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); 615 } 616 } 617 618 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && 619 mtu < pkt_size) { 620 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 621 return -E2BIG; 622 } 623 } 624 #endif 625 return 0; 626 } 627 628 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, 629 const struct iphdr *tnl_params, u8 protocol) 630 { 631 struct ip_tunnel *tunnel = netdev_priv(dev); 632 const struct iphdr *inner_iph; 633 struct flowi4 fl4; 634 u8 tos, ttl; 635 __be16 df; 636 struct rtable *rt; /* Route to the other host */ 637 unsigned int max_headroom; /* The extra header space needed */ 638 __be32 dst; 639 int err; 640 bool connected; 641 642 inner_iph = (const struct iphdr *)skb_inner_network_header(skb); 643 connected = (tunnel->parms.iph.daddr != 0); 644 645 dst = tnl_params->daddr; 646 if (dst == 0) { 647 /* NBMA tunnel */ 648 649 if (skb_dst(skb) == NULL) { 650 dev->stats.tx_fifo_errors++; 651 goto tx_error; 652 } 653 654 if (skb->protocol == htons(ETH_P_IP)) { 655 rt = skb_rtable(skb); 656 dst = rt_nexthop(rt, inner_iph->daddr); 657 } 658 #if IS_ENABLED(CONFIG_IPV6) 659 else if (skb->protocol == htons(ETH_P_IPV6)) { 660 const struct in6_addr *addr6; 661 struct neighbour *neigh; 662 bool do_tx_error_icmp; 663 int addr_type; 664 665 neigh = dst_neigh_lookup(skb_dst(skb), 666 &ipv6_hdr(skb)->daddr); 667 if (neigh == NULL) 668 goto tx_error; 669 670 addr6 = (const struct in6_addr *)&neigh->primary_key; 671 addr_type = ipv6_addr_type(addr6); 672 673 if (addr_type == IPV6_ADDR_ANY) { 674 addr6 = &ipv6_hdr(skb)->daddr; 675 addr_type = ipv6_addr_type(addr6); 676 } 677 678 if ((addr_type & IPV6_ADDR_COMPATv4) == 0) 679 do_tx_error_icmp = true; 680 else { 681 do_tx_error_icmp = false; 682 dst = addr6->s6_addr32[3]; 683 } 684 neigh_release(neigh); 685 if (do_tx_error_icmp) 686 goto tx_error_icmp; 687 } 688 #endif 689 else 690 goto tx_error; 691 692 connected = false; 693 } 694 695 tos = tnl_params->tos; 696 if (tos & 0x1) { 697 tos &= ~0x1; 698 if (skb->protocol == htons(ETH_P_IP)) { 699 tos = inner_iph->tos; 700 connected = false; 701 } else if (skb->protocol == htons(ETH_P_IPV6)) { 702 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); 703 connected = false; 704 } 705 } 706 707 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, 708 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link); 709 710 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) 711 goto tx_error; 712 713 rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL; 714 715 if (!rt) { 716 rt = ip_route_output_key(tunnel->net, &fl4); 717 718 if (IS_ERR(rt)) { 719 dev->stats.tx_carrier_errors++; 720 goto tx_error; 721 } 722 if (connected) 723 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr); 724 } 725 726 if (rt->dst.dev == dev) { 727 ip_rt_put(rt); 728 dev->stats.collisions++; 729 goto tx_error; 730 } 731 732 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) { 733 ip_rt_put(rt); 734 goto tx_error; 735 } 736 737 if (tunnel->err_count > 0) { 738 if (time_before(jiffies, 739 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { 740 tunnel->err_count--; 741 742 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 743 dst_link_failure(skb); 744 } else 745 tunnel->err_count = 0; 746 } 747 748 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); 749 ttl = tnl_params->ttl; 750 if (ttl == 0) { 751 if (skb->protocol == htons(ETH_P_IP)) 752 ttl = inner_iph->ttl; 753 #if IS_ENABLED(CONFIG_IPV6) 754 else if (skb->protocol == htons(ETH_P_IPV6)) 755 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit; 756 #endif 757 else 758 ttl = ip4_dst_hoplimit(&rt->dst); 759 } 760 761 df = tnl_params->frag_off; 762 if (skb->protocol == htons(ETH_P_IP)) 763 df |= (inner_iph->frag_off&htons(IP_DF)); 764 765 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) 766 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap); 767 if (max_headroom > dev->needed_headroom) 768 dev->needed_headroom = max_headroom; 769 770 if (skb_cow_head(skb, dev->needed_headroom)) { 771 ip_rt_put(rt); 772 dev->stats.tx_dropped++; 773 kfree_skb(skb); 774 return; 775 } 776 777 err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol, 778 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); 779 iptunnel_xmit_stats(err, &dev->stats, dev->tstats); 780 781 return; 782 783 #if IS_ENABLED(CONFIG_IPV6) 784 tx_error_icmp: 785 dst_link_failure(skb); 786 #endif 787 tx_error: 788 dev->stats.tx_errors++; 789 kfree_skb(skb); 790 } 791 EXPORT_SYMBOL_GPL(ip_tunnel_xmit); 792 793 static void ip_tunnel_update(struct ip_tunnel_net *itn, 794 struct ip_tunnel *t, 795 struct net_device *dev, 796 struct ip_tunnel_parm *p, 797 bool set_mtu) 798 { 799 ip_tunnel_del(t); 800 t->parms.iph.saddr = p->iph.saddr; 801 t->parms.iph.daddr = p->iph.daddr; 802 t->parms.i_key = p->i_key; 803 t->parms.o_key = p->o_key; 804 if (dev->type != ARPHRD_ETHER) { 805 memcpy(dev->dev_addr, &p->iph.saddr, 4); 806 memcpy(dev->broadcast, &p->iph.daddr, 4); 807 } 808 ip_tunnel_add(itn, t); 809 810 t->parms.iph.ttl = p->iph.ttl; 811 t->parms.iph.tos = p->iph.tos; 812 t->parms.iph.frag_off = p->iph.frag_off; 813 814 if (t->parms.link != p->link) { 815 int mtu; 816 817 t->parms.link = p->link; 818 mtu = ip_tunnel_bind_dev(dev); 819 if (set_mtu) 820 dev->mtu = mtu; 821 } 822 ip_tunnel_dst_reset_all(t); 823 netdev_state_change(dev); 824 } 825 826 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) 827 { 828 int err = 0; 829 struct ip_tunnel *t = netdev_priv(dev); 830 struct net *net = t->net; 831 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id); 832 833 BUG_ON(!itn->fb_tunnel_dev); 834 switch (cmd) { 835 case SIOCGETTUNNEL: 836 if (dev == itn->fb_tunnel_dev) { 837 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 838 if (t == NULL) 839 t = netdev_priv(dev); 840 } 841 memcpy(p, &t->parms, sizeof(*p)); 842 break; 843 844 case SIOCADDTUNNEL: 845 case SIOCCHGTUNNEL: 846 err = -EPERM; 847 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 848 goto done; 849 if (p->iph.ttl) 850 p->iph.frag_off |= htons(IP_DF); 851 if (!(p->i_flags & VTI_ISVTI)) { 852 if (!(p->i_flags & TUNNEL_KEY)) 853 p->i_key = 0; 854 if (!(p->o_flags & TUNNEL_KEY)) 855 p->o_key = 0; 856 } 857 858 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 859 860 if (cmd == SIOCADDTUNNEL) { 861 if (!t) { 862 t = ip_tunnel_create(net, itn, p); 863 err = PTR_ERR_OR_ZERO(t); 864 break; 865 } 866 867 err = -EEXIST; 868 break; 869 } 870 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { 871 if (t != NULL) { 872 if (t->dev != dev) { 873 err = -EEXIST; 874 break; 875 } 876 } else { 877 unsigned int nflags = 0; 878 879 if (ipv4_is_multicast(p->iph.daddr)) 880 nflags = IFF_BROADCAST; 881 else if (p->iph.daddr) 882 nflags = IFF_POINTOPOINT; 883 884 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { 885 err = -EINVAL; 886 break; 887 } 888 889 t = netdev_priv(dev); 890 } 891 } 892 893 if (t) { 894 err = 0; 895 ip_tunnel_update(itn, t, dev, p, true); 896 } else { 897 err = -ENOENT; 898 } 899 break; 900 901 case SIOCDELTUNNEL: 902 err = -EPERM; 903 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 904 goto done; 905 906 if (dev == itn->fb_tunnel_dev) { 907 err = -ENOENT; 908 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 909 if (t == NULL) 910 goto done; 911 err = -EPERM; 912 if (t == netdev_priv(itn->fb_tunnel_dev)) 913 goto done; 914 dev = t->dev; 915 } 916 unregister_netdevice(dev); 917 err = 0; 918 break; 919 920 default: 921 err = -EINVAL; 922 } 923 924 done: 925 return err; 926 } 927 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); 928 929 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) 930 { 931 struct ip_tunnel *tunnel = netdev_priv(dev); 932 int t_hlen = tunnel->hlen + sizeof(struct iphdr); 933 934 if (new_mtu < 68 || 935 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen) 936 return -EINVAL; 937 dev->mtu = new_mtu; 938 return 0; 939 } 940 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu); 941 942 static void ip_tunnel_dev_free(struct net_device *dev) 943 { 944 struct ip_tunnel *tunnel = netdev_priv(dev); 945 946 gro_cells_destroy(&tunnel->gro_cells); 947 free_percpu(tunnel->dst_cache); 948 free_percpu(dev->tstats); 949 free_netdev(dev); 950 } 951 952 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) 953 { 954 struct ip_tunnel *tunnel = netdev_priv(dev); 955 struct ip_tunnel_net *itn; 956 957 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id); 958 959 if (itn->fb_tunnel_dev != dev) { 960 ip_tunnel_del(netdev_priv(dev)); 961 unregister_netdevice_queue(dev, head); 962 } 963 } 964 EXPORT_SYMBOL_GPL(ip_tunnel_dellink); 965 966 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, 967 struct rtnl_link_ops *ops, char *devname) 968 { 969 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id); 970 struct ip_tunnel_parm parms; 971 unsigned int i; 972 973 for (i = 0; i < IP_TNL_HASH_SIZE; i++) 974 INIT_HLIST_HEAD(&itn->tunnels[i]); 975 976 if (!ops) { 977 itn->fb_tunnel_dev = NULL; 978 return 0; 979 } 980 981 memset(&parms, 0, sizeof(parms)); 982 if (devname) 983 strlcpy(parms.name, devname, IFNAMSIZ); 984 985 rtnl_lock(); 986 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms); 987 /* FB netdevice is special: we have one, and only one per netns. 988 * Allowing to move it to another netns is clearly unsafe. 989 */ 990 if (!IS_ERR(itn->fb_tunnel_dev)) { 991 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; 992 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev); 993 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev)); 994 } 995 rtnl_unlock(); 996 997 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev); 998 } 999 EXPORT_SYMBOL_GPL(ip_tunnel_init_net); 1000 1001 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head, 1002 struct rtnl_link_ops *ops) 1003 { 1004 struct net *net = dev_net(itn->fb_tunnel_dev); 1005 struct net_device *dev, *aux; 1006 int h; 1007 1008 for_each_netdev_safe(net, dev, aux) 1009 if (dev->rtnl_link_ops == ops) 1010 unregister_netdevice_queue(dev, head); 1011 1012 for (h = 0; h < IP_TNL_HASH_SIZE; h++) { 1013 struct ip_tunnel *t; 1014 struct hlist_node *n; 1015 struct hlist_head *thead = &itn->tunnels[h]; 1016 1017 hlist_for_each_entry_safe(t, n, thead, hash_node) 1018 /* If dev is in the same netns, it has already 1019 * been added to the list by the previous loop. 1020 */ 1021 if (!net_eq(dev_net(t->dev), net)) 1022 unregister_netdevice_queue(t->dev, head); 1023 } 1024 } 1025 1026 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops) 1027 { 1028 LIST_HEAD(list); 1029 1030 rtnl_lock(); 1031 ip_tunnel_destroy(itn, &list, ops); 1032 unregister_netdevice_many(&list); 1033 rtnl_unlock(); 1034 } 1035 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net); 1036 1037 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], 1038 struct ip_tunnel_parm *p) 1039 { 1040 struct ip_tunnel *nt; 1041 struct net *net = dev_net(dev); 1042 struct ip_tunnel_net *itn; 1043 int mtu; 1044 int err; 1045 1046 nt = netdev_priv(dev); 1047 itn = net_generic(net, nt->ip_tnl_net_id); 1048 1049 if (ip_tunnel_find(itn, p, dev->type)) 1050 return -EEXIST; 1051 1052 nt->net = net; 1053 nt->parms = *p; 1054 err = register_netdevice(dev); 1055 if (err) 1056 goto out; 1057 1058 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) 1059 eth_hw_addr_random(dev); 1060 1061 mtu = ip_tunnel_bind_dev(dev); 1062 if (!tb[IFLA_MTU]) 1063 dev->mtu = mtu; 1064 1065 ip_tunnel_add(itn, nt); 1066 1067 out: 1068 return err; 1069 } 1070 EXPORT_SYMBOL_GPL(ip_tunnel_newlink); 1071 1072 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], 1073 struct ip_tunnel_parm *p) 1074 { 1075 struct ip_tunnel *t; 1076 struct ip_tunnel *tunnel = netdev_priv(dev); 1077 struct net *net = tunnel->net; 1078 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); 1079 1080 if (dev == itn->fb_tunnel_dev) 1081 return -EINVAL; 1082 1083 t = ip_tunnel_find(itn, p, dev->type); 1084 1085 if (t) { 1086 if (t->dev != dev) 1087 return -EEXIST; 1088 } else { 1089 t = tunnel; 1090 1091 if (dev->type != ARPHRD_ETHER) { 1092 unsigned int nflags = 0; 1093 1094 if (ipv4_is_multicast(p->iph.daddr)) 1095 nflags = IFF_BROADCAST; 1096 else if (p->iph.daddr) 1097 nflags = IFF_POINTOPOINT; 1098 1099 if ((dev->flags ^ nflags) & 1100 (IFF_POINTOPOINT | IFF_BROADCAST)) 1101 return -EINVAL; 1102 } 1103 } 1104 1105 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]); 1106 return 0; 1107 } 1108 EXPORT_SYMBOL_GPL(ip_tunnel_changelink); 1109 1110 int ip_tunnel_init(struct net_device *dev) 1111 { 1112 struct ip_tunnel *tunnel = netdev_priv(dev); 1113 struct iphdr *iph = &tunnel->parms.iph; 1114 int err; 1115 1116 dev->destructor = ip_tunnel_dev_free; 1117 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); 1118 if (!dev->tstats) 1119 return -ENOMEM; 1120 1121 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); 1122 if (!tunnel->dst_cache) { 1123 free_percpu(dev->tstats); 1124 return -ENOMEM; 1125 } 1126 1127 err = gro_cells_init(&tunnel->gro_cells, dev); 1128 if (err) { 1129 free_percpu(tunnel->dst_cache); 1130 free_percpu(dev->tstats); 1131 return err; 1132 } 1133 1134 tunnel->dev = dev; 1135 tunnel->net = dev_net(dev); 1136 strcpy(tunnel->parms.name, dev->name); 1137 iph->version = 4; 1138 iph->ihl = 5; 1139 1140 return 0; 1141 } 1142 EXPORT_SYMBOL_GPL(ip_tunnel_init); 1143 1144 void ip_tunnel_uninit(struct net_device *dev) 1145 { 1146 struct ip_tunnel *tunnel = netdev_priv(dev); 1147 struct net *net = tunnel->net; 1148 struct ip_tunnel_net *itn; 1149 1150 itn = net_generic(net, tunnel->ip_tnl_net_id); 1151 /* fb_tunnel_dev will be unregisted in net-exit call. */ 1152 if (itn->fb_tunnel_dev != dev) 1153 ip_tunnel_del(netdev_priv(dev)); 1154 1155 ip_tunnel_dst_reset_all(tunnel); 1156 } 1157 EXPORT_SYMBOL_GPL(ip_tunnel_uninit); 1158 1159 /* Do least required initialization, rest of init is done in tunnel_init call */ 1160 void ip_tunnel_setup(struct net_device *dev, int net_id) 1161 { 1162 struct ip_tunnel *tunnel = netdev_priv(dev); 1163 tunnel->ip_tnl_net_id = net_id; 1164 } 1165 EXPORT_SYMBOL_GPL(ip_tunnel_setup); 1166 1167 MODULE_LICENSE("GPL"); 1168