1 /* 2 * Copyright (c) 2013 Nicira, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program; if not, write to the Free Software 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16 * 02110-1301, USA 17 */ 18 19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 20 21 #include <linux/capability.h> 22 #include <linux/module.h> 23 #include <linux/types.h> 24 #include <linux/kernel.h> 25 #include <linux/slab.h> 26 #include <linux/uaccess.h> 27 #include <linux/skbuff.h> 28 #include <linux/netdevice.h> 29 #include <linux/in.h> 30 #include <linux/tcp.h> 31 #include <linux/udp.h> 32 #include <linux/if_arp.h> 33 #include <linux/mroute.h> 34 #include <linux/init.h> 35 #include <linux/in6.h> 36 #include <linux/inetdevice.h> 37 #include <linux/igmp.h> 38 #include <linux/netfilter_ipv4.h> 39 #include <linux/etherdevice.h> 40 #include <linux/if_ether.h> 41 #include <linux/if_vlan.h> 42 #include <linux/rculist.h> 43 #include <linux/err.h> 44 45 #include <net/sock.h> 46 #include <net/ip.h> 47 #include <net/icmp.h> 48 #include <net/protocol.h> 49 #include <net/ip_tunnels.h> 50 #include <net/arp.h> 51 #include <net/checksum.h> 52 #include <net/dsfield.h> 53 #include <net/inet_ecn.h> 54 #include <net/xfrm.h> 55 #include <net/net_namespace.h> 56 #include <net/netns/generic.h> 57 #include <net/rtnetlink.h> 58 #include <net/udp.h> 59 60 #if IS_ENABLED(CONFIG_IPV6) 61 #include <net/ipv6.h> 62 #include <net/ip6_fib.h> 63 #include <net/ip6_route.h> 64 #endif 65 66 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote) 67 { 68 return hash_32((__force u32)key ^ (__force u32)remote, 69 IP_TNL_HASH_BITS); 70 } 71 72 static void __tunnel_dst_set(struct ip_tunnel_dst *idst, 73 struct dst_entry *dst, __be32 saddr) 74 { 75 struct dst_entry *old_dst; 76 77 dst_clone(dst); 78 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst); 79 dst_release(old_dst); 80 idst->saddr = saddr; 81 } 82 83 static noinline void tunnel_dst_set(struct ip_tunnel *t, 84 struct dst_entry *dst, __be32 saddr) 85 { 86 __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr); 87 } 88 89 static void tunnel_dst_reset(struct ip_tunnel *t) 90 { 91 tunnel_dst_set(t, NULL, 0); 92 } 93 94 void ip_tunnel_dst_reset_all(struct ip_tunnel *t) 95 { 96 int i; 97 98 for_each_possible_cpu(i) 99 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0); 100 } 101 EXPORT_SYMBOL(ip_tunnel_dst_reset_all); 102 103 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, 104 u32 cookie, __be32 *saddr) 105 { 106 struct ip_tunnel_dst *idst; 107 struct dst_entry *dst; 108 109 rcu_read_lock(); 110 idst = raw_cpu_ptr(t->dst_cache); 111 dst = rcu_dereference(idst->dst); 112 if (dst && !atomic_inc_not_zero(&dst->__refcnt)) 113 dst = NULL; 114 if (dst) { 115 if (!dst->obsolete || dst->ops->check(dst, cookie)) { 116 *saddr = idst->saddr; 117 } else { 118 tunnel_dst_reset(t); 119 dst_release(dst); 120 dst = NULL; 121 } 122 } 123 rcu_read_unlock(); 124 return (struct rtable *)dst; 125 } 126 127 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p, 128 __be16 flags, __be32 key) 129 { 130 if (p->i_flags & TUNNEL_KEY) { 131 if (flags & TUNNEL_KEY) 132 return key == p->i_key; 133 else 134 /* key expected, none present */ 135 return false; 136 } else 137 return !(flags & TUNNEL_KEY); 138 } 139 140 /* Fallback tunnel: no source, no destination, no key, no options 141 142 Tunnel hash table: 143 We require exact key match i.e. if a key is present in packet 144 it will match only tunnel with the same key; if it is not present, 145 it will match only keyless tunnel. 146 147 All keysless packets, if not matched configured keyless tunnels 148 will match fallback tunnel. 149 Given src, dst and key, find appropriate for input tunnel. 150 */ 151 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, 152 int link, __be16 flags, 153 __be32 remote, __be32 local, 154 __be32 key) 155 { 156 unsigned int hash; 157 struct ip_tunnel *t, *cand = NULL; 158 struct hlist_head *head; 159 160 hash = ip_tunnel_hash(key, remote); 161 head = &itn->tunnels[hash]; 162 163 hlist_for_each_entry_rcu(t, head, hash_node) { 164 if (local != t->parms.iph.saddr || 165 remote != t->parms.iph.daddr || 166 !(t->dev->flags & IFF_UP)) 167 continue; 168 169 if (!ip_tunnel_key_match(&t->parms, flags, key)) 170 continue; 171 172 if (t->parms.link == link) 173 return t; 174 else 175 cand = t; 176 } 177 178 hlist_for_each_entry_rcu(t, head, hash_node) { 179 if (remote != t->parms.iph.daddr || 180 t->parms.iph.saddr != 0 || 181 !(t->dev->flags & IFF_UP)) 182 continue; 183 184 if (!ip_tunnel_key_match(&t->parms, flags, key)) 185 continue; 186 187 if (t->parms.link == link) 188 return t; 189 else if (!cand) 190 cand = t; 191 } 192 193 hash = ip_tunnel_hash(key, 0); 194 head = &itn->tunnels[hash]; 195 196 hlist_for_each_entry_rcu(t, head, hash_node) { 197 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) && 198 (local != t->parms.iph.daddr || !ipv4_is_multicast(local))) 199 continue; 200 201 if (!(t->dev->flags & IFF_UP)) 202 continue; 203 204 if (!ip_tunnel_key_match(&t->parms, flags, key)) 205 continue; 206 207 if (t->parms.link == link) 208 return t; 209 else if (!cand) 210 cand = t; 211 } 212 213 if (flags & TUNNEL_NO_KEY) 214 goto skip_key_lookup; 215 216 hlist_for_each_entry_rcu(t, head, hash_node) { 217 if (t->parms.i_key != key || 218 t->parms.iph.saddr != 0 || 219 t->parms.iph.daddr != 0 || 220 !(t->dev->flags & IFF_UP)) 221 continue; 222 223 if (t->parms.link == link) 224 return t; 225 else if (!cand) 226 cand = t; 227 } 228 229 skip_key_lookup: 230 if (cand) 231 return cand; 232 233 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP) 234 return netdev_priv(itn->fb_tunnel_dev); 235 236 237 return NULL; 238 } 239 EXPORT_SYMBOL_GPL(ip_tunnel_lookup); 240 241 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn, 242 struct ip_tunnel_parm *parms) 243 { 244 unsigned int h; 245 __be32 remote; 246 __be32 i_key = parms->i_key; 247 248 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr)) 249 remote = parms->iph.daddr; 250 else 251 remote = 0; 252 253 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI)) 254 i_key = 0; 255 256 h = ip_tunnel_hash(i_key, remote); 257 return &itn->tunnels[h]; 258 } 259 260 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t) 261 { 262 struct hlist_head *head = ip_bucket(itn, &t->parms); 263 264 hlist_add_head_rcu(&t->hash_node, head); 265 } 266 267 static void ip_tunnel_del(struct ip_tunnel *t) 268 { 269 hlist_del_init_rcu(&t->hash_node); 270 } 271 272 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, 273 struct ip_tunnel_parm *parms, 274 int type) 275 { 276 __be32 remote = parms->iph.daddr; 277 __be32 local = parms->iph.saddr; 278 __be32 key = parms->i_key; 279 __be16 flags = parms->i_flags; 280 int link = parms->link; 281 struct ip_tunnel *t = NULL; 282 struct hlist_head *head = ip_bucket(itn, parms); 283 284 hlist_for_each_entry_rcu(t, head, hash_node) { 285 if (local == t->parms.iph.saddr && 286 remote == t->parms.iph.daddr && 287 link == t->parms.link && 288 type == t->dev->type && 289 ip_tunnel_key_match(&t->parms, flags, key)) 290 break; 291 } 292 return t; 293 } 294 295 static struct net_device *__ip_tunnel_create(struct net *net, 296 const struct rtnl_link_ops *ops, 297 struct ip_tunnel_parm *parms) 298 { 299 int err; 300 struct ip_tunnel *tunnel; 301 struct net_device *dev; 302 char name[IFNAMSIZ]; 303 304 if (parms->name[0]) 305 strlcpy(name, parms->name, IFNAMSIZ); 306 else { 307 if (strlen(ops->kind) > (IFNAMSIZ - 3)) { 308 err = -E2BIG; 309 goto failed; 310 } 311 strlcpy(name, ops->kind, IFNAMSIZ); 312 strncat(name, "%d", 2); 313 } 314 315 ASSERT_RTNL(); 316 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup); 317 if (!dev) { 318 err = -ENOMEM; 319 goto failed; 320 } 321 dev_net_set(dev, net); 322 323 dev->rtnl_link_ops = ops; 324 325 tunnel = netdev_priv(dev); 326 tunnel->parms = *parms; 327 tunnel->net = net; 328 329 err = register_netdevice(dev); 330 if (err) 331 goto failed_free; 332 333 return dev; 334 335 failed_free: 336 free_netdev(dev); 337 failed: 338 return ERR_PTR(err); 339 } 340 341 static inline void init_tunnel_flow(struct flowi4 *fl4, 342 int proto, 343 __be32 daddr, __be32 saddr, 344 __be32 key, __u8 tos, int oif) 345 { 346 memset(fl4, 0, sizeof(*fl4)); 347 fl4->flowi4_oif = oif; 348 fl4->daddr = daddr; 349 fl4->saddr = saddr; 350 fl4->flowi4_tos = tos; 351 fl4->flowi4_proto = proto; 352 fl4->fl4_gre_key = key; 353 } 354 355 static int ip_tunnel_bind_dev(struct net_device *dev) 356 { 357 struct net_device *tdev = NULL; 358 struct ip_tunnel *tunnel = netdev_priv(dev); 359 const struct iphdr *iph; 360 int hlen = LL_MAX_HEADER; 361 int mtu = ETH_DATA_LEN; 362 int t_hlen = tunnel->hlen + sizeof(struct iphdr); 363 364 iph = &tunnel->parms.iph; 365 366 /* Guess output device to choose reasonable mtu and needed_headroom */ 367 if (iph->daddr) { 368 struct flowi4 fl4; 369 struct rtable *rt; 370 371 init_tunnel_flow(&fl4, iph->protocol, iph->daddr, 372 iph->saddr, tunnel->parms.o_key, 373 RT_TOS(iph->tos), tunnel->parms.link); 374 rt = ip_route_output_key(tunnel->net, &fl4); 375 376 if (!IS_ERR(rt)) { 377 tdev = rt->dst.dev; 378 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr); 379 ip_rt_put(rt); 380 } 381 if (dev->type != ARPHRD_ETHER) 382 dev->flags |= IFF_POINTOPOINT; 383 } 384 385 if (!tdev && tunnel->parms.link) 386 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); 387 388 if (tdev) { 389 hlen = tdev->hard_header_len + tdev->needed_headroom; 390 mtu = tdev->mtu; 391 } 392 dev->iflink = tunnel->parms.link; 393 394 dev->needed_headroom = t_hlen + hlen; 395 mtu -= (dev->hard_header_len + t_hlen); 396 397 if (mtu < 68) 398 mtu = 68; 399 400 return mtu; 401 } 402 403 static struct ip_tunnel *ip_tunnel_create(struct net *net, 404 struct ip_tunnel_net *itn, 405 struct ip_tunnel_parm *parms) 406 { 407 struct ip_tunnel *nt; 408 struct net_device *dev; 409 410 BUG_ON(!itn->fb_tunnel_dev); 411 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms); 412 if (IS_ERR(dev)) 413 return ERR_CAST(dev); 414 415 dev->mtu = ip_tunnel_bind_dev(dev); 416 417 nt = netdev_priv(dev); 418 ip_tunnel_add(itn, nt); 419 return nt; 420 } 421 422 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, 423 const struct tnl_ptk_info *tpi, bool log_ecn_error) 424 { 425 struct pcpu_sw_netstats *tstats; 426 const struct iphdr *iph = ip_hdr(skb); 427 int err; 428 429 #ifdef CONFIG_NET_IPGRE_BROADCAST 430 if (ipv4_is_multicast(iph->daddr)) { 431 tunnel->dev->stats.multicast++; 432 skb->pkt_type = PACKET_BROADCAST; 433 } 434 #endif 435 436 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) || 437 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) { 438 tunnel->dev->stats.rx_crc_errors++; 439 tunnel->dev->stats.rx_errors++; 440 goto drop; 441 } 442 443 if (tunnel->parms.i_flags&TUNNEL_SEQ) { 444 if (!(tpi->flags&TUNNEL_SEQ) || 445 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { 446 tunnel->dev->stats.rx_fifo_errors++; 447 tunnel->dev->stats.rx_errors++; 448 goto drop; 449 } 450 tunnel->i_seqno = ntohl(tpi->seq) + 1; 451 } 452 453 skb_reset_network_header(skb); 454 455 err = IP_ECN_decapsulate(iph, skb); 456 if (unlikely(err)) { 457 if (log_ecn_error) 458 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", 459 &iph->saddr, iph->tos); 460 if (err > 1) { 461 ++tunnel->dev->stats.rx_frame_errors; 462 ++tunnel->dev->stats.rx_errors; 463 goto drop; 464 } 465 } 466 467 tstats = this_cpu_ptr(tunnel->dev->tstats); 468 u64_stats_update_begin(&tstats->syncp); 469 tstats->rx_packets++; 470 tstats->rx_bytes += skb->len; 471 u64_stats_update_end(&tstats->syncp); 472 473 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev))); 474 475 if (tunnel->dev->type == ARPHRD_ETHER) { 476 skb->protocol = eth_type_trans(skb, tunnel->dev); 477 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 478 } else { 479 skb->dev = tunnel->dev; 480 } 481 482 gro_cells_receive(&tunnel->gro_cells, skb); 483 return 0; 484 485 drop: 486 kfree_skb(skb); 487 return 0; 488 } 489 EXPORT_SYMBOL_GPL(ip_tunnel_rcv); 490 491 static int ip_encap_hlen(struct ip_tunnel_encap *e) 492 { 493 const struct ip_tunnel_encap_ops *ops; 494 int hlen = -EINVAL; 495 496 if (e->type == TUNNEL_ENCAP_NONE) 497 return 0; 498 499 if (e->type >= MAX_IPTUN_ENCAP_OPS) 500 return -EINVAL; 501 502 rcu_read_lock(); 503 ops = rcu_dereference(iptun_encaps[e->type]); 504 if (likely(ops && ops->encap_hlen)) 505 hlen = ops->encap_hlen(e); 506 rcu_read_unlock(); 507 508 return hlen; 509 } 510 511 const struct ip_tunnel_encap_ops __rcu * 512 iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly; 513 514 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops, 515 unsigned int num) 516 { 517 if (num >= MAX_IPTUN_ENCAP_OPS) 518 return -ERANGE; 519 520 return !cmpxchg((const struct ip_tunnel_encap_ops **) 521 &iptun_encaps[num], 522 NULL, ops) ? 0 : -1; 523 } 524 EXPORT_SYMBOL(ip_tunnel_encap_add_ops); 525 526 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops, 527 unsigned int num) 528 { 529 int ret; 530 531 if (num >= MAX_IPTUN_ENCAP_OPS) 532 return -ERANGE; 533 534 ret = (cmpxchg((const struct ip_tunnel_encap_ops **) 535 &iptun_encaps[num], 536 ops, NULL) == ops) ? 0 : -1; 537 538 synchronize_net(); 539 540 return ret; 541 } 542 EXPORT_SYMBOL(ip_tunnel_encap_del_ops); 543 544 int ip_tunnel_encap_setup(struct ip_tunnel *t, 545 struct ip_tunnel_encap *ipencap) 546 { 547 int hlen; 548 549 memset(&t->encap, 0, sizeof(t->encap)); 550 551 hlen = ip_encap_hlen(ipencap); 552 if (hlen < 0) 553 return hlen; 554 555 t->encap.type = ipencap->type; 556 t->encap.sport = ipencap->sport; 557 t->encap.dport = ipencap->dport; 558 t->encap.flags = ipencap->flags; 559 560 t->encap_hlen = hlen; 561 t->hlen = t->encap_hlen + t->tun_hlen; 562 563 return 0; 564 } 565 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup); 566 567 int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, 568 u8 *protocol, struct flowi4 *fl4) 569 { 570 const struct ip_tunnel_encap_ops *ops; 571 int ret = -EINVAL; 572 573 if (t->encap.type == TUNNEL_ENCAP_NONE) 574 return 0; 575 576 if (t->encap.type >= MAX_IPTUN_ENCAP_OPS) 577 return -EINVAL; 578 579 rcu_read_lock(); 580 ops = rcu_dereference(iptun_encaps[t->encap.type]); 581 if (likely(ops && ops->build_header)) 582 ret = ops->build_header(skb, &t->encap, protocol, fl4); 583 rcu_read_unlock(); 584 585 return ret; 586 } 587 EXPORT_SYMBOL(ip_tunnel_encap); 588 589 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, 590 struct rtable *rt, __be16 df) 591 { 592 struct ip_tunnel *tunnel = netdev_priv(dev); 593 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len; 594 int mtu; 595 596 if (df) 597 mtu = dst_mtu(&rt->dst) - dev->hard_header_len 598 - sizeof(struct iphdr) - tunnel->hlen; 599 else 600 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; 601 602 if (skb_dst(skb)) 603 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); 604 605 if (skb->protocol == htons(ETH_P_IP)) { 606 if (!skb_is_gso(skb) && 607 (df & htons(IP_DF)) && mtu < pkt_size) { 608 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 609 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 610 return -E2BIG; 611 } 612 } 613 #if IS_ENABLED(CONFIG_IPV6) 614 else if (skb->protocol == htons(ETH_P_IPV6)) { 615 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); 616 617 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && 618 mtu >= IPV6_MIN_MTU) { 619 if ((tunnel->parms.iph.daddr && 620 !ipv4_is_multicast(tunnel->parms.iph.daddr)) || 621 rt6->rt6i_dst.plen == 128) { 622 rt6->rt6i_flags |= RTF_MODIFIED; 623 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); 624 } 625 } 626 627 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && 628 mtu < pkt_size) { 629 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 630 return -E2BIG; 631 } 632 } 633 #endif 634 return 0; 635 } 636 637 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, 638 const struct iphdr *tnl_params, u8 protocol) 639 { 640 struct ip_tunnel *tunnel = netdev_priv(dev); 641 const struct iphdr *inner_iph; 642 struct flowi4 fl4; 643 u8 tos, ttl; 644 __be16 df; 645 struct rtable *rt; /* Route to the other host */ 646 unsigned int max_headroom; /* The extra header space needed */ 647 __be32 dst; 648 int err; 649 bool connected; 650 651 inner_iph = (const struct iphdr *)skb_inner_network_header(skb); 652 connected = (tunnel->parms.iph.daddr != 0); 653 654 dst = tnl_params->daddr; 655 if (dst == 0) { 656 /* NBMA tunnel */ 657 658 if (skb_dst(skb) == NULL) { 659 dev->stats.tx_fifo_errors++; 660 goto tx_error; 661 } 662 663 if (skb->protocol == htons(ETH_P_IP)) { 664 rt = skb_rtable(skb); 665 dst = rt_nexthop(rt, inner_iph->daddr); 666 } 667 #if IS_ENABLED(CONFIG_IPV6) 668 else if (skb->protocol == htons(ETH_P_IPV6)) { 669 const struct in6_addr *addr6; 670 struct neighbour *neigh; 671 bool do_tx_error_icmp; 672 int addr_type; 673 674 neigh = dst_neigh_lookup(skb_dst(skb), 675 &ipv6_hdr(skb)->daddr); 676 if (neigh == NULL) 677 goto tx_error; 678 679 addr6 = (const struct in6_addr *)&neigh->primary_key; 680 addr_type = ipv6_addr_type(addr6); 681 682 if (addr_type == IPV6_ADDR_ANY) { 683 addr6 = &ipv6_hdr(skb)->daddr; 684 addr_type = ipv6_addr_type(addr6); 685 } 686 687 if ((addr_type & IPV6_ADDR_COMPATv4) == 0) 688 do_tx_error_icmp = true; 689 else { 690 do_tx_error_icmp = false; 691 dst = addr6->s6_addr32[3]; 692 } 693 neigh_release(neigh); 694 if (do_tx_error_icmp) 695 goto tx_error_icmp; 696 } 697 #endif 698 else 699 goto tx_error; 700 701 connected = false; 702 } 703 704 tos = tnl_params->tos; 705 if (tos & 0x1) { 706 tos &= ~0x1; 707 if (skb->protocol == htons(ETH_P_IP)) { 708 tos = inner_iph->tos; 709 connected = false; 710 } else if (skb->protocol == htons(ETH_P_IPV6)) { 711 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); 712 connected = false; 713 } 714 } 715 716 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, 717 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link); 718 719 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) 720 goto tx_error; 721 722 rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL; 723 724 if (!rt) { 725 rt = ip_route_output_key(tunnel->net, &fl4); 726 727 if (IS_ERR(rt)) { 728 dev->stats.tx_carrier_errors++; 729 goto tx_error; 730 } 731 if (connected) 732 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr); 733 } 734 735 if (rt->dst.dev == dev) { 736 ip_rt_put(rt); 737 dev->stats.collisions++; 738 goto tx_error; 739 } 740 741 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) { 742 ip_rt_put(rt); 743 goto tx_error; 744 } 745 746 if (tunnel->err_count > 0) { 747 if (time_before(jiffies, 748 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { 749 tunnel->err_count--; 750 751 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 752 dst_link_failure(skb); 753 } else 754 tunnel->err_count = 0; 755 } 756 757 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); 758 ttl = tnl_params->ttl; 759 if (ttl == 0) { 760 if (skb->protocol == htons(ETH_P_IP)) 761 ttl = inner_iph->ttl; 762 #if IS_ENABLED(CONFIG_IPV6) 763 else if (skb->protocol == htons(ETH_P_IPV6)) 764 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit; 765 #endif 766 else 767 ttl = ip4_dst_hoplimit(&rt->dst); 768 } 769 770 df = tnl_params->frag_off; 771 if (skb->protocol == htons(ETH_P_IP)) 772 df |= (inner_iph->frag_off&htons(IP_DF)); 773 774 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) 775 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap); 776 if (max_headroom > dev->needed_headroom) 777 dev->needed_headroom = max_headroom; 778 779 if (skb_cow_head(skb, dev->needed_headroom)) { 780 ip_rt_put(rt); 781 dev->stats.tx_dropped++; 782 kfree_skb(skb); 783 return; 784 } 785 786 err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol, 787 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); 788 iptunnel_xmit_stats(err, &dev->stats, dev->tstats); 789 790 return; 791 792 #if IS_ENABLED(CONFIG_IPV6) 793 tx_error_icmp: 794 dst_link_failure(skb); 795 #endif 796 tx_error: 797 dev->stats.tx_errors++; 798 kfree_skb(skb); 799 } 800 EXPORT_SYMBOL_GPL(ip_tunnel_xmit); 801 802 static void ip_tunnel_update(struct ip_tunnel_net *itn, 803 struct ip_tunnel *t, 804 struct net_device *dev, 805 struct ip_tunnel_parm *p, 806 bool set_mtu) 807 { 808 ip_tunnel_del(t); 809 t->parms.iph.saddr = p->iph.saddr; 810 t->parms.iph.daddr = p->iph.daddr; 811 t->parms.i_key = p->i_key; 812 t->parms.o_key = p->o_key; 813 if (dev->type != ARPHRD_ETHER) { 814 memcpy(dev->dev_addr, &p->iph.saddr, 4); 815 memcpy(dev->broadcast, &p->iph.daddr, 4); 816 } 817 ip_tunnel_add(itn, t); 818 819 t->parms.iph.ttl = p->iph.ttl; 820 t->parms.iph.tos = p->iph.tos; 821 t->parms.iph.frag_off = p->iph.frag_off; 822 823 if (t->parms.link != p->link) { 824 int mtu; 825 826 t->parms.link = p->link; 827 mtu = ip_tunnel_bind_dev(dev); 828 if (set_mtu) 829 dev->mtu = mtu; 830 } 831 ip_tunnel_dst_reset_all(t); 832 netdev_state_change(dev); 833 } 834 835 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) 836 { 837 int err = 0; 838 struct ip_tunnel *t = netdev_priv(dev); 839 struct net *net = t->net; 840 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id); 841 842 BUG_ON(!itn->fb_tunnel_dev); 843 switch (cmd) { 844 case SIOCGETTUNNEL: 845 if (dev == itn->fb_tunnel_dev) { 846 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 847 if (t == NULL) 848 t = netdev_priv(dev); 849 } 850 memcpy(p, &t->parms, sizeof(*p)); 851 break; 852 853 case SIOCADDTUNNEL: 854 case SIOCCHGTUNNEL: 855 err = -EPERM; 856 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 857 goto done; 858 if (p->iph.ttl) 859 p->iph.frag_off |= htons(IP_DF); 860 if (!(p->i_flags & VTI_ISVTI)) { 861 if (!(p->i_flags & TUNNEL_KEY)) 862 p->i_key = 0; 863 if (!(p->o_flags & TUNNEL_KEY)) 864 p->o_key = 0; 865 } 866 867 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 868 869 if (cmd == SIOCADDTUNNEL) { 870 if (!t) { 871 t = ip_tunnel_create(net, itn, p); 872 err = PTR_ERR_OR_ZERO(t); 873 break; 874 } 875 876 err = -EEXIST; 877 break; 878 } 879 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { 880 if (t != NULL) { 881 if (t->dev != dev) { 882 err = -EEXIST; 883 break; 884 } 885 } else { 886 unsigned int nflags = 0; 887 888 if (ipv4_is_multicast(p->iph.daddr)) 889 nflags = IFF_BROADCAST; 890 else if (p->iph.daddr) 891 nflags = IFF_POINTOPOINT; 892 893 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { 894 err = -EINVAL; 895 break; 896 } 897 898 t = netdev_priv(dev); 899 } 900 } 901 902 if (t) { 903 err = 0; 904 ip_tunnel_update(itn, t, dev, p, true); 905 } else { 906 err = -ENOENT; 907 } 908 break; 909 910 case SIOCDELTUNNEL: 911 err = -EPERM; 912 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 913 goto done; 914 915 if (dev == itn->fb_tunnel_dev) { 916 err = -ENOENT; 917 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 918 if (t == NULL) 919 goto done; 920 err = -EPERM; 921 if (t == netdev_priv(itn->fb_tunnel_dev)) 922 goto done; 923 dev = t->dev; 924 } 925 unregister_netdevice(dev); 926 err = 0; 927 break; 928 929 default: 930 err = -EINVAL; 931 } 932 933 done: 934 return err; 935 } 936 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); 937 938 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) 939 { 940 struct ip_tunnel *tunnel = netdev_priv(dev); 941 int t_hlen = tunnel->hlen + sizeof(struct iphdr); 942 943 if (new_mtu < 68 || 944 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen) 945 return -EINVAL; 946 dev->mtu = new_mtu; 947 return 0; 948 } 949 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu); 950 951 static void ip_tunnel_dev_free(struct net_device *dev) 952 { 953 struct ip_tunnel *tunnel = netdev_priv(dev); 954 955 gro_cells_destroy(&tunnel->gro_cells); 956 free_percpu(tunnel->dst_cache); 957 free_percpu(dev->tstats); 958 free_netdev(dev); 959 } 960 961 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) 962 { 963 struct ip_tunnel *tunnel = netdev_priv(dev); 964 struct ip_tunnel_net *itn; 965 966 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id); 967 968 if (itn->fb_tunnel_dev != dev) { 969 ip_tunnel_del(netdev_priv(dev)); 970 unregister_netdevice_queue(dev, head); 971 } 972 } 973 EXPORT_SYMBOL_GPL(ip_tunnel_dellink); 974 975 struct net *ip_tunnel_get_link_net(const struct net_device *dev) 976 { 977 struct ip_tunnel *tunnel = netdev_priv(dev); 978 979 return tunnel->net; 980 } 981 EXPORT_SYMBOL(ip_tunnel_get_link_net); 982 983 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, 984 struct rtnl_link_ops *ops, char *devname) 985 { 986 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id); 987 struct ip_tunnel_parm parms; 988 unsigned int i; 989 990 for (i = 0; i < IP_TNL_HASH_SIZE; i++) 991 INIT_HLIST_HEAD(&itn->tunnels[i]); 992 993 if (!ops) { 994 itn->fb_tunnel_dev = NULL; 995 return 0; 996 } 997 998 memset(&parms, 0, sizeof(parms)); 999 if (devname) 1000 strlcpy(parms.name, devname, IFNAMSIZ); 1001 1002 rtnl_lock(); 1003 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms); 1004 /* FB netdevice is special: we have one, and only one per netns. 1005 * Allowing to move it to another netns is clearly unsafe. 1006 */ 1007 if (!IS_ERR(itn->fb_tunnel_dev)) { 1008 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; 1009 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev); 1010 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev)); 1011 } 1012 rtnl_unlock(); 1013 1014 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev); 1015 } 1016 EXPORT_SYMBOL_GPL(ip_tunnel_init_net); 1017 1018 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head, 1019 struct rtnl_link_ops *ops) 1020 { 1021 struct net *net = dev_net(itn->fb_tunnel_dev); 1022 struct net_device *dev, *aux; 1023 int h; 1024 1025 for_each_netdev_safe(net, dev, aux) 1026 if (dev->rtnl_link_ops == ops) 1027 unregister_netdevice_queue(dev, head); 1028 1029 for (h = 0; h < IP_TNL_HASH_SIZE; h++) { 1030 struct ip_tunnel *t; 1031 struct hlist_node *n; 1032 struct hlist_head *thead = &itn->tunnels[h]; 1033 1034 hlist_for_each_entry_safe(t, n, thead, hash_node) 1035 /* If dev is in the same netns, it has already 1036 * been added to the list by the previous loop. 1037 */ 1038 if (!net_eq(dev_net(t->dev), net)) 1039 unregister_netdevice_queue(t->dev, head); 1040 } 1041 } 1042 1043 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops) 1044 { 1045 LIST_HEAD(list); 1046 1047 rtnl_lock(); 1048 ip_tunnel_destroy(itn, &list, ops); 1049 unregister_netdevice_many(&list); 1050 rtnl_unlock(); 1051 } 1052 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net); 1053 1054 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], 1055 struct ip_tunnel_parm *p) 1056 { 1057 struct ip_tunnel *nt; 1058 struct net *net = dev_net(dev); 1059 struct ip_tunnel_net *itn; 1060 int mtu; 1061 int err; 1062 1063 nt = netdev_priv(dev); 1064 itn = net_generic(net, nt->ip_tnl_net_id); 1065 1066 if (ip_tunnel_find(itn, p, dev->type)) 1067 return -EEXIST; 1068 1069 nt->net = net; 1070 nt->parms = *p; 1071 err = register_netdevice(dev); 1072 if (err) 1073 goto out; 1074 1075 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) 1076 eth_hw_addr_random(dev); 1077 1078 mtu = ip_tunnel_bind_dev(dev); 1079 if (!tb[IFLA_MTU]) 1080 dev->mtu = mtu; 1081 1082 ip_tunnel_add(itn, nt); 1083 1084 out: 1085 return err; 1086 } 1087 EXPORT_SYMBOL_GPL(ip_tunnel_newlink); 1088 1089 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], 1090 struct ip_tunnel_parm *p) 1091 { 1092 struct ip_tunnel *t; 1093 struct ip_tunnel *tunnel = netdev_priv(dev); 1094 struct net *net = tunnel->net; 1095 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); 1096 1097 if (dev == itn->fb_tunnel_dev) 1098 return -EINVAL; 1099 1100 t = ip_tunnel_find(itn, p, dev->type); 1101 1102 if (t) { 1103 if (t->dev != dev) 1104 return -EEXIST; 1105 } else { 1106 t = tunnel; 1107 1108 if (dev->type != ARPHRD_ETHER) { 1109 unsigned int nflags = 0; 1110 1111 if (ipv4_is_multicast(p->iph.daddr)) 1112 nflags = IFF_BROADCAST; 1113 else if (p->iph.daddr) 1114 nflags = IFF_POINTOPOINT; 1115 1116 if ((dev->flags ^ nflags) & 1117 (IFF_POINTOPOINT | IFF_BROADCAST)) 1118 return -EINVAL; 1119 } 1120 } 1121 1122 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]); 1123 return 0; 1124 } 1125 EXPORT_SYMBOL_GPL(ip_tunnel_changelink); 1126 1127 int ip_tunnel_init(struct net_device *dev) 1128 { 1129 struct ip_tunnel *tunnel = netdev_priv(dev); 1130 struct iphdr *iph = &tunnel->parms.iph; 1131 int err; 1132 1133 dev->destructor = ip_tunnel_dev_free; 1134 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); 1135 if (!dev->tstats) 1136 return -ENOMEM; 1137 1138 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); 1139 if (!tunnel->dst_cache) { 1140 free_percpu(dev->tstats); 1141 return -ENOMEM; 1142 } 1143 1144 err = gro_cells_init(&tunnel->gro_cells, dev); 1145 if (err) { 1146 free_percpu(tunnel->dst_cache); 1147 free_percpu(dev->tstats); 1148 return err; 1149 } 1150 1151 tunnel->dev = dev; 1152 tunnel->net = dev_net(dev); 1153 strcpy(tunnel->parms.name, dev->name); 1154 iph->version = 4; 1155 iph->ihl = 5; 1156 1157 return 0; 1158 } 1159 EXPORT_SYMBOL_GPL(ip_tunnel_init); 1160 1161 void ip_tunnel_uninit(struct net_device *dev) 1162 { 1163 struct ip_tunnel *tunnel = netdev_priv(dev); 1164 struct net *net = tunnel->net; 1165 struct ip_tunnel_net *itn; 1166 1167 itn = net_generic(net, tunnel->ip_tnl_net_id); 1168 /* fb_tunnel_dev will be unregisted in net-exit call. */ 1169 if (itn->fb_tunnel_dev != dev) 1170 ip_tunnel_del(netdev_priv(dev)); 1171 1172 ip_tunnel_dst_reset_all(tunnel); 1173 } 1174 EXPORT_SYMBOL_GPL(ip_tunnel_uninit); 1175 1176 /* Do least required initialization, rest of init is done in tunnel_init call */ 1177 void ip_tunnel_setup(struct net_device *dev, int net_id) 1178 { 1179 struct ip_tunnel *tunnel = netdev_priv(dev); 1180 tunnel->ip_tnl_net_id = net_id; 1181 } 1182 EXPORT_SYMBOL_GPL(ip_tunnel_setup); 1183 1184 MODULE_LICENSE("GPL"); 1185