1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2013 Nicira, Inc. 4 */ 5 6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 7 8 #include <linux/capability.h> 9 #include <linux/module.h> 10 #include <linux/types.h> 11 #include <linux/kernel.h> 12 #include <linux/slab.h> 13 #include <linux/uaccess.h> 14 #include <linux/skbuff.h> 15 #include <linux/netdevice.h> 16 #include <linux/in.h> 17 #include <linux/tcp.h> 18 #include <linux/udp.h> 19 #include <linux/if_arp.h> 20 #include <linux/init.h> 21 #include <linux/in6.h> 22 #include <linux/inetdevice.h> 23 #include <linux/igmp.h> 24 #include <linux/netfilter_ipv4.h> 25 #include <linux/etherdevice.h> 26 #include <linux/if_ether.h> 27 #include <linux/if_vlan.h> 28 #include <linux/rculist.h> 29 #include <linux/err.h> 30 31 #include <net/sock.h> 32 #include <net/ip.h> 33 #include <net/icmp.h> 34 #include <net/protocol.h> 35 #include <net/ip_tunnels.h> 36 #include <net/arp.h> 37 #include <net/checksum.h> 38 #include <net/dsfield.h> 39 #include <net/inet_ecn.h> 40 #include <net/xfrm.h> 41 #include <net/net_namespace.h> 42 #include <net/netns/generic.h> 43 #include <net/rtnetlink.h> 44 #include <net/udp.h> 45 #include <net/dst_metadata.h> 46 47 #if IS_ENABLED(CONFIG_IPV6) 48 #include <net/ipv6.h> 49 #include <net/ip6_fib.h> 50 #include <net/ip6_route.h> 51 #endif 52 53 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote) 54 { 55 return hash_32((__force u32)key ^ (__force u32)remote, 56 IP_TNL_HASH_BITS); 57 } 58 59 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p, 60 __be16 flags, __be32 key) 61 { 62 if (p->i_flags & TUNNEL_KEY) { 63 if (flags & TUNNEL_KEY) 64 return key == p->i_key; 65 else 66 /* key expected, none present */ 67 return false; 68 } else 69 return !(flags & TUNNEL_KEY); 70 } 71 72 /* Fallback tunnel: no source, no destination, no key, no options 73 74 Tunnel hash table: 75 We require exact key match i.e. if a key is present in packet 76 it will match only tunnel with the same key; if it is not present, 77 it will match only keyless tunnel. 78 79 All keysless packets, if not matched configured keyless tunnels 80 will match fallback tunnel. 81 Given src, dst and key, find appropriate for input tunnel. 82 */ 83 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, 84 int link, __be16 flags, 85 __be32 remote, __be32 local, 86 __be32 key) 87 { 88 struct ip_tunnel *t, *cand = NULL; 89 struct hlist_head *head; 90 struct net_device *ndev; 91 unsigned int hash; 92 93 hash = ip_tunnel_hash(key, remote); 94 head = &itn->tunnels[hash]; 95 96 hlist_for_each_entry_rcu(t, head, hash_node) { 97 if (local != t->parms.iph.saddr || 98 remote != t->parms.iph.daddr || 99 !(t->dev->flags & IFF_UP)) 100 continue; 101 102 if (!ip_tunnel_key_match(&t->parms, flags, key)) 103 continue; 104 105 if (t->parms.link == link) 106 return t; 107 else 108 cand = t; 109 } 110 111 hlist_for_each_entry_rcu(t, head, hash_node) { 112 if (remote != t->parms.iph.daddr || 113 t->parms.iph.saddr != 0 || 114 !(t->dev->flags & IFF_UP)) 115 continue; 116 117 if (!ip_tunnel_key_match(&t->parms, flags, key)) 118 continue; 119 120 if (t->parms.link == link) 121 return t; 122 else if (!cand) 123 cand = t; 124 } 125 126 hash = ip_tunnel_hash(key, 0); 127 head = &itn->tunnels[hash]; 128 129 hlist_for_each_entry_rcu(t, head, hash_node) { 130 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) && 131 (local != t->parms.iph.daddr || !ipv4_is_multicast(local))) 132 continue; 133 134 if (!(t->dev->flags & IFF_UP)) 135 continue; 136 137 if (!ip_tunnel_key_match(&t->parms, flags, key)) 138 continue; 139 140 if (t->parms.link == link) 141 return t; 142 else if (!cand) 143 cand = t; 144 } 145 146 hlist_for_each_entry_rcu(t, head, hash_node) { 147 if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) || 148 t->parms.iph.saddr != 0 || 149 t->parms.iph.daddr != 0 || 150 !(t->dev->flags & IFF_UP)) 151 continue; 152 153 if (t->parms.link == link) 154 return t; 155 else if (!cand) 156 cand = t; 157 } 158 159 if (cand) 160 return cand; 161 162 t = rcu_dereference(itn->collect_md_tun); 163 if (t && t->dev->flags & IFF_UP) 164 return t; 165 166 ndev = READ_ONCE(itn->fb_tunnel_dev); 167 if (ndev && ndev->flags & IFF_UP) 168 return netdev_priv(ndev); 169 170 return NULL; 171 } 172 EXPORT_SYMBOL_GPL(ip_tunnel_lookup); 173 174 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn, 175 struct ip_tunnel_parm *parms) 176 { 177 unsigned int h; 178 __be32 remote; 179 __be32 i_key = parms->i_key; 180 181 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr)) 182 remote = parms->iph.daddr; 183 else 184 remote = 0; 185 186 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI)) 187 i_key = 0; 188 189 h = ip_tunnel_hash(i_key, remote); 190 return &itn->tunnels[h]; 191 } 192 193 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t) 194 { 195 struct hlist_head *head = ip_bucket(itn, &t->parms); 196 197 if (t->collect_md) 198 rcu_assign_pointer(itn->collect_md_tun, t); 199 hlist_add_head_rcu(&t->hash_node, head); 200 } 201 202 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t) 203 { 204 if (t->collect_md) 205 rcu_assign_pointer(itn->collect_md_tun, NULL); 206 hlist_del_init_rcu(&t->hash_node); 207 } 208 209 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, 210 struct ip_tunnel_parm *parms, 211 int type) 212 { 213 __be32 remote = parms->iph.daddr; 214 __be32 local = parms->iph.saddr; 215 __be32 key = parms->i_key; 216 __be16 flags = parms->i_flags; 217 int link = parms->link; 218 struct ip_tunnel *t = NULL; 219 struct hlist_head *head = ip_bucket(itn, parms); 220 221 hlist_for_each_entry_rcu(t, head, hash_node) { 222 if (local == t->parms.iph.saddr && 223 remote == t->parms.iph.daddr && 224 link == t->parms.link && 225 type == t->dev->type && 226 ip_tunnel_key_match(&t->parms, flags, key)) 227 break; 228 } 229 return t; 230 } 231 232 static struct net_device *__ip_tunnel_create(struct net *net, 233 const struct rtnl_link_ops *ops, 234 struct ip_tunnel_parm *parms) 235 { 236 int err; 237 struct ip_tunnel *tunnel; 238 struct net_device *dev; 239 char name[IFNAMSIZ]; 240 241 err = -E2BIG; 242 if (parms->name[0]) { 243 if (!dev_valid_name(parms->name)) 244 goto failed; 245 strlcpy(name, parms->name, IFNAMSIZ); 246 } else { 247 if (strlen(ops->kind) > (IFNAMSIZ - 3)) 248 goto failed; 249 strcpy(name, ops->kind); 250 strcat(name, "%d"); 251 } 252 253 ASSERT_RTNL(); 254 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup); 255 if (!dev) { 256 err = -ENOMEM; 257 goto failed; 258 } 259 dev_net_set(dev, net); 260 261 dev->rtnl_link_ops = ops; 262 263 tunnel = netdev_priv(dev); 264 tunnel->parms = *parms; 265 tunnel->net = net; 266 267 err = register_netdevice(dev); 268 if (err) 269 goto failed_free; 270 271 return dev; 272 273 failed_free: 274 free_netdev(dev); 275 failed: 276 return ERR_PTR(err); 277 } 278 279 static int ip_tunnel_bind_dev(struct net_device *dev) 280 { 281 struct net_device *tdev = NULL; 282 struct ip_tunnel *tunnel = netdev_priv(dev); 283 const struct iphdr *iph; 284 int hlen = LL_MAX_HEADER; 285 int mtu = ETH_DATA_LEN; 286 int t_hlen = tunnel->hlen + sizeof(struct iphdr); 287 288 iph = &tunnel->parms.iph; 289 290 /* Guess output device to choose reasonable mtu and needed_headroom */ 291 if (iph->daddr) { 292 struct flowi4 fl4; 293 struct rtable *rt; 294 295 ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr, 296 iph->saddr, tunnel->parms.o_key, 297 RT_TOS(iph->tos), tunnel->parms.link, 298 tunnel->fwmark, 0); 299 rt = ip_route_output_key(tunnel->net, &fl4); 300 301 if (!IS_ERR(rt)) { 302 tdev = rt->dst.dev; 303 ip_rt_put(rt); 304 } 305 if (dev->type != ARPHRD_ETHER) 306 dev->flags |= IFF_POINTOPOINT; 307 308 dst_cache_reset(&tunnel->dst_cache); 309 } 310 311 if (!tdev && tunnel->parms.link) 312 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); 313 314 if (tdev) { 315 hlen = tdev->hard_header_len + tdev->needed_headroom; 316 mtu = min(tdev->mtu, IP_MAX_MTU); 317 } 318 319 dev->needed_headroom = t_hlen + hlen; 320 mtu -= t_hlen; 321 322 if (mtu < IPV4_MIN_MTU) 323 mtu = IPV4_MIN_MTU; 324 325 return mtu; 326 } 327 328 static struct ip_tunnel *ip_tunnel_create(struct net *net, 329 struct ip_tunnel_net *itn, 330 struct ip_tunnel_parm *parms) 331 { 332 struct ip_tunnel *nt; 333 struct net_device *dev; 334 int t_hlen; 335 int mtu; 336 int err; 337 338 dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms); 339 if (IS_ERR(dev)) 340 return ERR_CAST(dev); 341 342 mtu = ip_tunnel_bind_dev(dev); 343 err = dev_set_mtu(dev, mtu); 344 if (err) 345 goto err_dev_set_mtu; 346 347 nt = netdev_priv(dev); 348 t_hlen = nt->hlen + sizeof(struct iphdr); 349 dev->min_mtu = ETH_MIN_MTU; 350 dev->max_mtu = IP_MAX_MTU - t_hlen; 351 ip_tunnel_add(itn, nt); 352 return nt; 353 354 err_dev_set_mtu: 355 unregister_netdevice(dev); 356 return ERR_PTR(err); 357 } 358 359 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, 360 const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, 361 bool log_ecn_error) 362 { 363 const struct iphdr *iph = ip_hdr(skb); 364 int err; 365 366 #ifdef CONFIG_NET_IPGRE_BROADCAST 367 if (ipv4_is_multicast(iph->daddr)) { 368 tunnel->dev->stats.multicast++; 369 skb->pkt_type = PACKET_BROADCAST; 370 } 371 #endif 372 373 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) || 374 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) { 375 tunnel->dev->stats.rx_crc_errors++; 376 tunnel->dev->stats.rx_errors++; 377 goto drop; 378 } 379 380 if (tunnel->parms.i_flags&TUNNEL_SEQ) { 381 if (!(tpi->flags&TUNNEL_SEQ) || 382 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { 383 tunnel->dev->stats.rx_fifo_errors++; 384 tunnel->dev->stats.rx_errors++; 385 goto drop; 386 } 387 tunnel->i_seqno = ntohl(tpi->seq) + 1; 388 } 389 390 skb_reset_network_header(skb); 391 392 err = IP_ECN_decapsulate(iph, skb); 393 if (unlikely(err)) { 394 if (log_ecn_error) 395 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", 396 &iph->saddr, iph->tos); 397 if (err > 1) { 398 ++tunnel->dev->stats.rx_frame_errors; 399 ++tunnel->dev->stats.rx_errors; 400 goto drop; 401 } 402 } 403 404 dev_sw_netstats_rx_add(tunnel->dev, skb->len); 405 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev))); 406 407 if (tunnel->dev->type == ARPHRD_ETHER) { 408 skb->protocol = eth_type_trans(skb, tunnel->dev); 409 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 410 } else { 411 skb->dev = tunnel->dev; 412 } 413 414 if (tun_dst) 415 skb_dst_set(skb, (struct dst_entry *)tun_dst); 416 417 gro_cells_receive(&tunnel->gro_cells, skb); 418 return 0; 419 420 drop: 421 if (tun_dst) 422 dst_release((struct dst_entry *)tun_dst); 423 kfree_skb(skb); 424 return 0; 425 } 426 EXPORT_SYMBOL_GPL(ip_tunnel_rcv); 427 428 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops, 429 unsigned int num) 430 { 431 if (num >= MAX_IPTUN_ENCAP_OPS) 432 return -ERANGE; 433 434 return !cmpxchg((const struct ip_tunnel_encap_ops **) 435 &iptun_encaps[num], 436 NULL, ops) ? 0 : -1; 437 } 438 EXPORT_SYMBOL(ip_tunnel_encap_add_ops); 439 440 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops, 441 unsigned int num) 442 { 443 int ret; 444 445 if (num >= MAX_IPTUN_ENCAP_OPS) 446 return -ERANGE; 447 448 ret = (cmpxchg((const struct ip_tunnel_encap_ops **) 449 &iptun_encaps[num], 450 ops, NULL) == ops) ? 0 : -1; 451 452 synchronize_net(); 453 454 return ret; 455 } 456 EXPORT_SYMBOL(ip_tunnel_encap_del_ops); 457 458 int ip_tunnel_encap_setup(struct ip_tunnel *t, 459 struct ip_tunnel_encap *ipencap) 460 { 461 int hlen; 462 463 memset(&t->encap, 0, sizeof(t->encap)); 464 465 hlen = ip_encap_hlen(ipencap); 466 if (hlen < 0) 467 return hlen; 468 469 t->encap.type = ipencap->type; 470 t->encap.sport = ipencap->sport; 471 t->encap.dport = ipencap->dport; 472 t->encap.flags = ipencap->flags; 473 474 t->encap_hlen = hlen; 475 t->hlen = t->encap_hlen + t->tun_hlen; 476 477 return 0; 478 } 479 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup); 480 481 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, 482 struct rtable *rt, __be16 df, 483 const struct iphdr *inner_iph, 484 int tunnel_hlen, __be32 dst, bool md) 485 { 486 struct ip_tunnel *tunnel = netdev_priv(dev); 487 int pkt_size; 488 int mtu; 489 490 tunnel_hlen = md ? tunnel_hlen : tunnel->hlen; 491 pkt_size = skb->len - tunnel_hlen; 492 493 if (df) 494 mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen); 495 else 496 mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; 497 498 if (skb_valid_dst(skb)) 499 skb_dst_update_pmtu_no_confirm(skb, mtu); 500 501 if (skb->protocol == htons(ETH_P_IP)) { 502 if (!skb_is_gso(skb) && 503 (inner_iph->frag_off & htons(IP_DF)) && 504 mtu < pkt_size) { 505 icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 506 return -E2BIG; 507 } 508 } 509 #if IS_ENABLED(CONFIG_IPV6) 510 else if (skb->protocol == htons(ETH_P_IPV6)) { 511 struct rt6_info *rt6; 512 __be32 daddr; 513 514 rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) : 515 NULL; 516 daddr = md ? dst : tunnel->parms.iph.daddr; 517 518 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && 519 mtu >= IPV6_MIN_MTU) { 520 if ((daddr && !ipv4_is_multicast(daddr)) || 521 rt6->rt6i_dst.plen == 128) { 522 rt6->rt6i_flags |= RTF_MODIFIED; 523 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); 524 } 525 } 526 527 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && 528 mtu < pkt_size) { 529 icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 530 return -E2BIG; 531 } 532 } 533 #endif 534 return 0; 535 } 536 537 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, 538 u8 proto, int tunnel_hlen) 539 { 540 struct ip_tunnel *tunnel = netdev_priv(dev); 541 u32 headroom = sizeof(struct iphdr); 542 struct ip_tunnel_info *tun_info; 543 const struct ip_tunnel_key *key; 544 const struct iphdr *inner_iph; 545 struct rtable *rt = NULL; 546 struct flowi4 fl4; 547 __be16 df = 0; 548 u8 tos, ttl; 549 bool use_cache; 550 551 tun_info = skb_tunnel_info(skb); 552 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || 553 ip_tunnel_info_af(tun_info) != AF_INET)) 554 goto tx_error; 555 key = &tun_info->key; 556 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 557 inner_iph = (const struct iphdr *)skb_inner_network_header(skb); 558 tos = key->tos; 559 if (tos == 1) { 560 if (skb->protocol == htons(ETH_P_IP)) 561 tos = inner_iph->tos; 562 else if (skb->protocol == htons(ETH_P_IPV6)) 563 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); 564 } 565 ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 566 tunnel_id_to_key32(key->tun_id), RT_TOS(tos), 567 0, skb->mark, skb_get_hash(skb)); 568 if (tunnel->encap.type != TUNNEL_ENCAP_NONE) 569 goto tx_error; 570 571 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info); 572 if (use_cache) 573 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr); 574 if (!rt) { 575 rt = ip_route_output_key(tunnel->net, &fl4); 576 if (IS_ERR(rt)) { 577 dev->stats.tx_carrier_errors++; 578 goto tx_error; 579 } 580 if (use_cache) 581 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst, 582 fl4.saddr); 583 } 584 if (rt->dst.dev == dev) { 585 ip_rt_put(rt); 586 dev->stats.collisions++; 587 goto tx_error; 588 } 589 590 if (key->tun_flags & TUNNEL_DONT_FRAGMENT) 591 df = htons(IP_DF); 592 if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen, 593 key->u.ipv4.dst, true)) { 594 ip_rt_put(rt); 595 goto tx_error; 596 } 597 598 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); 599 ttl = key->ttl; 600 if (ttl == 0) { 601 if (skb->protocol == htons(ETH_P_IP)) 602 ttl = inner_iph->ttl; 603 else if (skb->protocol == htons(ETH_P_IPV6)) 604 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit; 605 else 606 ttl = ip4_dst_hoplimit(&rt->dst); 607 } 608 609 headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; 610 if (headroom > dev->needed_headroom) 611 dev->needed_headroom = headroom; 612 613 if (skb_cow_head(skb, dev->needed_headroom)) { 614 ip_rt_put(rt); 615 goto tx_dropped; 616 } 617 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl, 618 df, !net_eq(tunnel->net, dev_net(dev))); 619 return; 620 tx_error: 621 dev->stats.tx_errors++; 622 goto kfree; 623 tx_dropped: 624 dev->stats.tx_dropped++; 625 kfree: 626 kfree_skb(skb); 627 } 628 EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit); 629 630 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, 631 const struct iphdr *tnl_params, u8 protocol) 632 { 633 struct ip_tunnel *tunnel = netdev_priv(dev); 634 struct ip_tunnel_info *tun_info = NULL; 635 const struct iphdr *inner_iph; 636 unsigned int max_headroom; /* The extra header space needed */ 637 struct rtable *rt = NULL; /* Route to the other host */ 638 bool use_cache = false; 639 struct flowi4 fl4; 640 bool md = false; 641 bool connected; 642 u8 tos, ttl; 643 __be32 dst; 644 __be16 df; 645 646 inner_iph = (const struct iphdr *)skb_inner_network_header(skb); 647 connected = (tunnel->parms.iph.daddr != 0); 648 649 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 650 651 dst = tnl_params->daddr; 652 if (dst == 0) { 653 /* NBMA tunnel */ 654 655 if (!skb_dst(skb)) { 656 dev->stats.tx_fifo_errors++; 657 goto tx_error; 658 } 659 660 tun_info = skb_tunnel_info(skb); 661 if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) && 662 ip_tunnel_info_af(tun_info) == AF_INET && 663 tun_info->key.u.ipv4.dst) { 664 dst = tun_info->key.u.ipv4.dst; 665 md = true; 666 connected = true; 667 } 668 else if (skb->protocol == htons(ETH_P_IP)) { 669 rt = skb_rtable(skb); 670 dst = rt_nexthop(rt, inner_iph->daddr); 671 } 672 #if IS_ENABLED(CONFIG_IPV6) 673 else if (skb->protocol == htons(ETH_P_IPV6)) { 674 const struct in6_addr *addr6; 675 struct neighbour *neigh; 676 bool do_tx_error_icmp; 677 int addr_type; 678 679 neigh = dst_neigh_lookup(skb_dst(skb), 680 &ipv6_hdr(skb)->daddr); 681 if (!neigh) 682 goto tx_error; 683 684 addr6 = (const struct in6_addr *)&neigh->primary_key; 685 addr_type = ipv6_addr_type(addr6); 686 687 if (addr_type == IPV6_ADDR_ANY) { 688 addr6 = &ipv6_hdr(skb)->daddr; 689 addr_type = ipv6_addr_type(addr6); 690 } 691 692 if ((addr_type & IPV6_ADDR_COMPATv4) == 0) 693 do_tx_error_icmp = true; 694 else { 695 do_tx_error_icmp = false; 696 dst = addr6->s6_addr32[3]; 697 } 698 neigh_release(neigh); 699 if (do_tx_error_icmp) 700 goto tx_error_icmp; 701 } 702 #endif 703 else 704 goto tx_error; 705 706 if (!md) 707 connected = false; 708 } 709 710 tos = tnl_params->tos; 711 if (tos & 0x1) { 712 tos &= ~0x1; 713 if (skb->protocol == htons(ETH_P_IP)) { 714 tos = inner_iph->tos; 715 connected = false; 716 } else if (skb->protocol == htons(ETH_P_IPV6)) { 717 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); 718 connected = false; 719 } 720 } 721 722 ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr, 723 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link, 724 tunnel->fwmark, skb_get_hash(skb)); 725 726 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) 727 goto tx_error; 728 729 if (connected && md) { 730 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info); 731 if (use_cache) 732 rt = dst_cache_get_ip4(&tun_info->dst_cache, 733 &fl4.saddr); 734 } else { 735 rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, 736 &fl4.saddr) : NULL; 737 } 738 739 if (!rt) { 740 rt = ip_route_output_key(tunnel->net, &fl4); 741 742 if (IS_ERR(rt)) { 743 dev->stats.tx_carrier_errors++; 744 goto tx_error; 745 } 746 if (use_cache) 747 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst, 748 fl4.saddr); 749 else if (!md && connected) 750 dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, 751 fl4.saddr); 752 } 753 754 if (rt->dst.dev == dev) { 755 ip_rt_put(rt); 756 dev->stats.collisions++; 757 goto tx_error; 758 } 759 760 df = tnl_params->frag_off; 761 if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df) 762 df |= (inner_iph->frag_off & htons(IP_DF)); 763 764 if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) { 765 ip_rt_put(rt); 766 goto tx_error; 767 } 768 769 if (tunnel->err_count > 0) { 770 if (time_before(jiffies, 771 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { 772 tunnel->err_count--; 773 774 dst_link_failure(skb); 775 } else 776 tunnel->err_count = 0; 777 } 778 779 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); 780 ttl = tnl_params->ttl; 781 if (ttl == 0) { 782 if (skb->protocol == htons(ETH_P_IP)) 783 ttl = inner_iph->ttl; 784 #if IS_ENABLED(CONFIG_IPV6) 785 else if (skb->protocol == htons(ETH_P_IPV6)) 786 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit; 787 #endif 788 else 789 ttl = ip4_dst_hoplimit(&rt->dst); 790 } 791 792 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) 793 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap); 794 if (max_headroom > dev->needed_headroom) 795 dev->needed_headroom = max_headroom; 796 797 if (skb_cow_head(skb, dev->needed_headroom)) { 798 ip_rt_put(rt); 799 dev->stats.tx_dropped++; 800 kfree_skb(skb); 801 return; 802 } 803 804 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, 805 df, !net_eq(tunnel->net, dev_net(dev))); 806 return; 807 808 #if IS_ENABLED(CONFIG_IPV6) 809 tx_error_icmp: 810 dst_link_failure(skb); 811 #endif 812 tx_error: 813 dev->stats.tx_errors++; 814 kfree_skb(skb); 815 } 816 EXPORT_SYMBOL_GPL(ip_tunnel_xmit); 817 818 static void ip_tunnel_update(struct ip_tunnel_net *itn, 819 struct ip_tunnel *t, 820 struct net_device *dev, 821 struct ip_tunnel_parm *p, 822 bool set_mtu, 823 __u32 fwmark) 824 { 825 ip_tunnel_del(itn, t); 826 t->parms.iph.saddr = p->iph.saddr; 827 t->parms.iph.daddr = p->iph.daddr; 828 t->parms.i_key = p->i_key; 829 t->parms.o_key = p->o_key; 830 if (dev->type != ARPHRD_ETHER) { 831 memcpy(dev->dev_addr, &p->iph.saddr, 4); 832 memcpy(dev->broadcast, &p->iph.daddr, 4); 833 } 834 ip_tunnel_add(itn, t); 835 836 t->parms.iph.ttl = p->iph.ttl; 837 t->parms.iph.tos = p->iph.tos; 838 t->parms.iph.frag_off = p->iph.frag_off; 839 840 if (t->parms.link != p->link || t->fwmark != fwmark) { 841 int mtu; 842 843 t->parms.link = p->link; 844 t->fwmark = fwmark; 845 mtu = ip_tunnel_bind_dev(dev); 846 if (set_mtu) 847 dev->mtu = mtu; 848 } 849 dst_cache_reset(&t->dst_cache); 850 netdev_state_change(dev); 851 } 852 853 int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) 854 { 855 int err = 0; 856 struct ip_tunnel *t = netdev_priv(dev); 857 struct net *net = t->net; 858 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id); 859 860 switch (cmd) { 861 case SIOCGETTUNNEL: 862 if (dev == itn->fb_tunnel_dev) { 863 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 864 if (!t) 865 t = netdev_priv(dev); 866 } 867 memcpy(p, &t->parms, sizeof(*p)); 868 break; 869 870 case SIOCADDTUNNEL: 871 case SIOCCHGTUNNEL: 872 err = -EPERM; 873 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 874 goto done; 875 if (p->iph.ttl) 876 p->iph.frag_off |= htons(IP_DF); 877 if (!(p->i_flags & VTI_ISVTI)) { 878 if (!(p->i_flags & TUNNEL_KEY)) 879 p->i_key = 0; 880 if (!(p->o_flags & TUNNEL_KEY)) 881 p->o_key = 0; 882 } 883 884 t = ip_tunnel_find(itn, p, itn->type); 885 886 if (cmd == SIOCADDTUNNEL) { 887 if (!t) { 888 t = ip_tunnel_create(net, itn, p); 889 err = PTR_ERR_OR_ZERO(t); 890 break; 891 } 892 893 err = -EEXIST; 894 break; 895 } 896 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { 897 if (t) { 898 if (t->dev != dev) { 899 err = -EEXIST; 900 break; 901 } 902 } else { 903 unsigned int nflags = 0; 904 905 if (ipv4_is_multicast(p->iph.daddr)) 906 nflags = IFF_BROADCAST; 907 else if (p->iph.daddr) 908 nflags = IFF_POINTOPOINT; 909 910 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { 911 err = -EINVAL; 912 break; 913 } 914 915 t = netdev_priv(dev); 916 } 917 } 918 919 if (t) { 920 err = 0; 921 ip_tunnel_update(itn, t, dev, p, true, 0); 922 } else { 923 err = -ENOENT; 924 } 925 break; 926 927 case SIOCDELTUNNEL: 928 err = -EPERM; 929 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 930 goto done; 931 932 if (dev == itn->fb_tunnel_dev) { 933 err = -ENOENT; 934 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 935 if (!t) 936 goto done; 937 err = -EPERM; 938 if (t == netdev_priv(itn->fb_tunnel_dev)) 939 goto done; 940 dev = t->dev; 941 } 942 unregister_netdevice(dev); 943 err = 0; 944 break; 945 946 default: 947 err = -EINVAL; 948 } 949 950 done: 951 return err; 952 } 953 EXPORT_SYMBOL_GPL(ip_tunnel_ctl); 954 955 int ip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) 956 { 957 struct ip_tunnel_parm p; 958 int err; 959 960 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 961 return -EFAULT; 962 err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd); 963 if (!err && copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) 964 return -EFAULT; 965 return err; 966 } 967 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); 968 969 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict) 970 { 971 struct ip_tunnel *tunnel = netdev_priv(dev); 972 int t_hlen = tunnel->hlen + sizeof(struct iphdr); 973 int max_mtu = IP_MAX_MTU - t_hlen; 974 975 if (new_mtu < ETH_MIN_MTU) 976 return -EINVAL; 977 978 if (new_mtu > max_mtu) { 979 if (strict) 980 return -EINVAL; 981 982 new_mtu = max_mtu; 983 } 984 985 dev->mtu = new_mtu; 986 return 0; 987 } 988 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu); 989 990 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) 991 { 992 return __ip_tunnel_change_mtu(dev, new_mtu, true); 993 } 994 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu); 995 996 static void ip_tunnel_dev_free(struct net_device *dev) 997 { 998 struct ip_tunnel *tunnel = netdev_priv(dev); 999 1000 gro_cells_destroy(&tunnel->gro_cells); 1001 dst_cache_destroy(&tunnel->dst_cache); 1002 free_percpu(dev->tstats); 1003 } 1004 1005 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) 1006 { 1007 struct ip_tunnel *tunnel = netdev_priv(dev); 1008 struct ip_tunnel_net *itn; 1009 1010 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id); 1011 1012 if (itn->fb_tunnel_dev != dev) { 1013 ip_tunnel_del(itn, netdev_priv(dev)); 1014 unregister_netdevice_queue(dev, head); 1015 } 1016 } 1017 EXPORT_SYMBOL_GPL(ip_tunnel_dellink); 1018 1019 struct net *ip_tunnel_get_link_net(const struct net_device *dev) 1020 { 1021 struct ip_tunnel *tunnel = netdev_priv(dev); 1022 1023 return tunnel->net; 1024 } 1025 EXPORT_SYMBOL(ip_tunnel_get_link_net); 1026 1027 int ip_tunnel_get_iflink(const struct net_device *dev) 1028 { 1029 struct ip_tunnel *tunnel = netdev_priv(dev); 1030 1031 return tunnel->parms.link; 1032 } 1033 EXPORT_SYMBOL(ip_tunnel_get_iflink); 1034 1035 int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, 1036 struct rtnl_link_ops *ops, char *devname) 1037 { 1038 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id); 1039 struct ip_tunnel_parm parms; 1040 unsigned int i; 1041 1042 itn->rtnl_link_ops = ops; 1043 for (i = 0; i < IP_TNL_HASH_SIZE; i++) 1044 INIT_HLIST_HEAD(&itn->tunnels[i]); 1045 1046 if (!ops || !net_has_fallback_tunnels(net)) { 1047 struct ip_tunnel_net *it_init_net; 1048 1049 it_init_net = net_generic(&init_net, ip_tnl_net_id); 1050 itn->type = it_init_net->type; 1051 itn->fb_tunnel_dev = NULL; 1052 return 0; 1053 } 1054 1055 memset(&parms, 0, sizeof(parms)); 1056 if (devname) 1057 strlcpy(parms.name, devname, IFNAMSIZ); 1058 1059 rtnl_lock(); 1060 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms); 1061 /* FB netdevice is special: we have one, and only one per netns. 1062 * Allowing to move it to another netns is clearly unsafe. 1063 */ 1064 if (!IS_ERR(itn->fb_tunnel_dev)) { 1065 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; 1066 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev); 1067 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev)); 1068 itn->type = itn->fb_tunnel_dev->type; 1069 } 1070 rtnl_unlock(); 1071 1072 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev); 1073 } 1074 EXPORT_SYMBOL_GPL(ip_tunnel_init_net); 1075 1076 static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn, 1077 struct list_head *head, 1078 struct rtnl_link_ops *ops) 1079 { 1080 struct net_device *dev, *aux; 1081 int h; 1082 1083 for_each_netdev_safe(net, dev, aux) 1084 if (dev->rtnl_link_ops == ops) 1085 unregister_netdevice_queue(dev, head); 1086 1087 for (h = 0; h < IP_TNL_HASH_SIZE; h++) { 1088 struct ip_tunnel *t; 1089 struct hlist_node *n; 1090 struct hlist_head *thead = &itn->tunnels[h]; 1091 1092 hlist_for_each_entry_safe(t, n, thead, hash_node) 1093 /* If dev is in the same netns, it has already 1094 * been added to the list by the previous loop. 1095 */ 1096 if (!net_eq(dev_net(t->dev), net)) 1097 unregister_netdevice_queue(t->dev, head); 1098 } 1099 } 1100 1101 void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id, 1102 struct rtnl_link_ops *ops) 1103 { 1104 struct ip_tunnel_net *itn; 1105 struct net *net; 1106 LIST_HEAD(list); 1107 1108 rtnl_lock(); 1109 list_for_each_entry(net, net_list, exit_list) { 1110 itn = net_generic(net, id); 1111 ip_tunnel_destroy(net, itn, &list, ops); 1112 } 1113 unregister_netdevice_many(&list); 1114 rtnl_unlock(); 1115 } 1116 EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets); 1117 1118 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], 1119 struct ip_tunnel_parm *p, __u32 fwmark) 1120 { 1121 struct ip_tunnel *nt; 1122 struct net *net = dev_net(dev); 1123 struct ip_tunnel_net *itn; 1124 int mtu; 1125 int err; 1126 1127 nt = netdev_priv(dev); 1128 itn = net_generic(net, nt->ip_tnl_net_id); 1129 1130 if (nt->collect_md) { 1131 if (rtnl_dereference(itn->collect_md_tun)) 1132 return -EEXIST; 1133 } else { 1134 if (ip_tunnel_find(itn, p, dev->type)) 1135 return -EEXIST; 1136 } 1137 1138 nt->net = net; 1139 nt->parms = *p; 1140 nt->fwmark = fwmark; 1141 err = register_netdevice(dev); 1142 if (err) 1143 goto err_register_netdevice; 1144 1145 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) 1146 eth_hw_addr_random(dev); 1147 1148 mtu = ip_tunnel_bind_dev(dev); 1149 if (tb[IFLA_MTU]) { 1150 unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr)); 1151 1152 mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max); 1153 } 1154 1155 err = dev_set_mtu(dev, mtu); 1156 if (err) 1157 goto err_dev_set_mtu; 1158 1159 ip_tunnel_add(itn, nt); 1160 return 0; 1161 1162 err_dev_set_mtu: 1163 unregister_netdevice(dev); 1164 err_register_netdevice: 1165 return err; 1166 } 1167 EXPORT_SYMBOL_GPL(ip_tunnel_newlink); 1168 1169 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], 1170 struct ip_tunnel_parm *p, __u32 fwmark) 1171 { 1172 struct ip_tunnel *t; 1173 struct ip_tunnel *tunnel = netdev_priv(dev); 1174 struct net *net = tunnel->net; 1175 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); 1176 1177 if (dev == itn->fb_tunnel_dev) 1178 return -EINVAL; 1179 1180 t = ip_tunnel_find(itn, p, dev->type); 1181 1182 if (t) { 1183 if (t->dev != dev) 1184 return -EEXIST; 1185 } else { 1186 t = tunnel; 1187 1188 if (dev->type != ARPHRD_ETHER) { 1189 unsigned int nflags = 0; 1190 1191 if (ipv4_is_multicast(p->iph.daddr)) 1192 nflags = IFF_BROADCAST; 1193 else if (p->iph.daddr) 1194 nflags = IFF_POINTOPOINT; 1195 1196 if ((dev->flags ^ nflags) & 1197 (IFF_POINTOPOINT | IFF_BROADCAST)) 1198 return -EINVAL; 1199 } 1200 } 1201 1202 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark); 1203 return 0; 1204 } 1205 EXPORT_SYMBOL_GPL(ip_tunnel_changelink); 1206 1207 int ip_tunnel_init(struct net_device *dev) 1208 { 1209 struct ip_tunnel *tunnel = netdev_priv(dev); 1210 struct iphdr *iph = &tunnel->parms.iph; 1211 int err; 1212 1213 dev->needs_free_netdev = true; 1214 dev->priv_destructor = ip_tunnel_dev_free; 1215 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); 1216 if (!dev->tstats) 1217 return -ENOMEM; 1218 1219 err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL); 1220 if (err) { 1221 free_percpu(dev->tstats); 1222 return err; 1223 } 1224 1225 err = gro_cells_init(&tunnel->gro_cells, dev); 1226 if (err) { 1227 dst_cache_destroy(&tunnel->dst_cache); 1228 free_percpu(dev->tstats); 1229 return err; 1230 } 1231 1232 tunnel->dev = dev; 1233 tunnel->net = dev_net(dev); 1234 strcpy(tunnel->parms.name, dev->name); 1235 iph->version = 4; 1236 iph->ihl = 5; 1237 1238 if (tunnel->collect_md) 1239 netif_keep_dst(dev); 1240 return 0; 1241 } 1242 EXPORT_SYMBOL_GPL(ip_tunnel_init); 1243 1244 void ip_tunnel_uninit(struct net_device *dev) 1245 { 1246 struct ip_tunnel *tunnel = netdev_priv(dev); 1247 struct net *net = tunnel->net; 1248 struct ip_tunnel_net *itn; 1249 1250 itn = net_generic(net, tunnel->ip_tnl_net_id); 1251 ip_tunnel_del(itn, netdev_priv(dev)); 1252 if (itn->fb_tunnel_dev == dev) 1253 WRITE_ONCE(itn->fb_tunnel_dev, NULL); 1254 1255 dst_cache_reset(&tunnel->dst_cache); 1256 } 1257 EXPORT_SYMBOL_GPL(ip_tunnel_uninit); 1258 1259 /* Do least required initialization, rest of init is done in tunnel_init call */ 1260 void ip_tunnel_setup(struct net_device *dev, unsigned int net_id) 1261 { 1262 struct ip_tunnel *tunnel = netdev_priv(dev); 1263 tunnel->ip_tnl_net_id = net_id; 1264 } 1265 EXPORT_SYMBOL_GPL(ip_tunnel_setup); 1266 1267 MODULE_LICENSE("GPL"); 1268