1 /* 2 * Copyright (c) 2013 Nicira, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program; if not, write to the Free Software 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 16 * 02110-1301, USA 17 */ 18 19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 20 21 #include <linux/capability.h> 22 #include <linux/module.h> 23 #include <linux/types.h> 24 #include <linux/kernel.h> 25 #include <linux/slab.h> 26 #include <linux/uaccess.h> 27 #include <linux/skbuff.h> 28 #include <linux/netdevice.h> 29 #include <linux/in.h> 30 #include <linux/tcp.h> 31 #include <linux/udp.h> 32 #include <linux/if_arp.h> 33 #include <linux/mroute.h> 34 #include <linux/init.h> 35 #include <linux/in6.h> 36 #include <linux/inetdevice.h> 37 #include <linux/igmp.h> 38 #include <linux/netfilter_ipv4.h> 39 #include <linux/etherdevice.h> 40 #include <linux/if_ether.h> 41 #include <linux/if_vlan.h> 42 #include <linux/rculist.h> 43 #include <linux/err.h> 44 45 #include <net/sock.h> 46 #include <net/ip.h> 47 #include <net/icmp.h> 48 #include <net/protocol.h> 49 #include <net/ip_tunnels.h> 50 #include <net/arp.h> 51 #include <net/checksum.h> 52 #include <net/dsfield.h> 53 #include <net/inet_ecn.h> 54 #include <net/xfrm.h> 55 #include <net/net_namespace.h> 56 #include <net/netns/generic.h> 57 #include <net/rtnetlink.h> 58 59 #if IS_ENABLED(CONFIG_IPV6) 60 #include <net/ipv6.h> 61 #include <net/ip6_fib.h> 62 #include <net/ip6_route.h> 63 #endif 64 65 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote) 66 { 67 return hash_32((__force u32)key ^ (__force u32)remote, 68 IP_TNL_HASH_BITS); 69 } 70 71 static void __tunnel_dst_set(struct ip_tunnel_dst *idst, 72 struct dst_entry *dst) 73 { 74 struct dst_entry *old_dst; 75 76 if (dst) { 77 if (dst->flags & DST_NOCACHE) 78 dst = NULL; 79 else 80 dst_clone(dst); 81 } 82 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst); 83 dst_release(old_dst); 84 } 85 86 static void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst) 87 { 88 __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst); 89 } 90 91 static void tunnel_dst_reset(struct ip_tunnel *t) 92 { 93 tunnel_dst_set(t, NULL); 94 } 95 96 void ip_tunnel_dst_reset_all(struct ip_tunnel *t) 97 { 98 int i; 99 100 for_each_possible_cpu(i) 101 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL); 102 } 103 EXPORT_SYMBOL(ip_tunnel_dst_reset_all); 104 105 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie) 106 { 107 struct dst_entry *dst; 108 109 rcu_read_lock(); 110 dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst); 111 if (dst) { 112 if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) { 113 rcu_read_unlock(); 114 tunnel_dst_reset(t); 115 return NULL; 116 } 117 dst_hold(dst); 118 } 119 rcu_read_unlock(); 120 return (struct rtable *)dst; 121 } 122 123 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p, 124 __be16 flags, __be32 key) 125 { 126 if (p->i_flags & TUNNEL_KEY) { 127 if (flags & TUNNEL_KEY) 128 return key == p->i_key; 129 else 130 /* key expected, none present */ 131 return false; 132 } else 133 return !(flags & TUNNEL_KEY); 134 } 135 136 /* Fallback tunnel: no source, no destination, no key, no options 137 138 Tunnel hash table: 139 We require exact key match i.e. if a key is present in packet 140 it will match only tunnel with the same key; if it is not present, 141 it will match only keyless tunnel. 142 143 All keysless packets, if not matched configured keyless tunnels 144 will match fallback tunnel. 145 Given src, dst and key, find appropriate for input tunnel. 146 */ 147 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, 148 int link, __be16 flags, 149 __be32 remote, __be32 local, 150 __be32 key) 151 { 152 unsigned int hash; 153 struct ip_tunnel *t, *cand = NULL; 154 struct hlist_head *head; 155 156 hash = ip_tunnel_hash(key, remote); 157 head = &itn->tunnels[hash]; 158 159 hlist_for_each_entry_rcu(t, head, hash_node) { 160 if (local != t->parms.iph.saddr || 161 remote != t->parms.iph.daddr || 162 !(t->dev->flags & IFF_UP)) 163 continue; 164 165 if (!ip_tunnel_key_match(&t->parms, flags, key)) 166 continue; 167 168 if (t->parms.link == link) 169 return t; 170 else 171 cand = t; 172 } 173 174 hlist_for_each_entry_rcu(t, head, hash_node) { 175 if (remote != t->parms.iph.daddr || 176 !(t->dev->flags & IFF_UP)) 177 continue; 178 179 if (!ip_tunnel_key_match(&t->parms, flags, key)) 180 continue; 181 182 if (t->parms.link == link) 183 return t; 184 else if (!cand) 185 cand = t; 186 } 187 188 hash = ip_tunnel_hash(key, 0); 189 head = &itn->tunnels[hash]; 190 191 hlist_for_each_entry_rcu(t, head, hash_node) { 192 if ((local != t->parms.iph.saddr && 193 (local != t->parms.iph.daddr || 194 !ipv4_is_multicast(local))) || 195 !(t->dev->flags & IFF_UP)) 196 continue; 197 198 if (!ip_tunnel_key_match(&t->parms, flags, key)) 199 continue; 200 201 if (t->parms.link == link) 202 return t; 203 else if (!cand) 204 cand = t; 205 } 206 207 if (flags & TUNNEL_NO_KEY) 208 goto skip_key_lookup; 209 210 hlist_for_each_entry_rcu(t, head, hash_node) { 211 if (t->parms.i_key != key || 212 !(t->dev->flags & IFF_UP)) 213 continue; 214 215 if (t->parms.link == link) 216 return t; 217 else if (!cand) 218 cand = t; 219 } 220 221 skip_key_lookup: 222 if (cand) 223 return cand; 224 225 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP) 226 return netdev_priv(itn->fb_tunnel_dev); 227 228 229 return NULL; 230 } 231 EXPORT_SYMBOL_GPL(ip_tunnel_lookup); 232 233 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn, 234 struct ip_tunnel_parm *parms) 235 { 236 unsigned int h; 237 __be32 remote; 238 239 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr)) 240 remote = parms->iph.daddr; 241 else 242 remote = 0; 243 244 h = ip_tunnel_hash(parms->i_key, remote); 245 return &itn->tunnels[h]; 246 } 247 248 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t) 249 { 250 struct hlist_head *head = ip_bucket(itn, &t->parms); 251 252 hlist_add_head_rcu(&t->hash_node, head); 253 } 254 255 static void ip_tunnel_del(struct ip_tunnel *t) 256 { 257 hlist_del_init_rcu(&t->hash_node); 258 } 259 260 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, 261 struct ip_tunnel_parm *parms, 262 int type) 263 { 264 __be32 remote = parms->iph.daddr; 265 __be32 local = parms->iph.saddr; 266 __be32 key = parms->i_key; 267 int link = parms->link; 268 struct ip_tunnel *t = NULL; 269 struct hlist_head *head = ip_bucket(itn, parms); 270 271 hlist_for_each_entry_rcu(t, head, hash_node) { 272 if (local == t->parms.iph.saddr && 273 remote == t->parms.iph.daddr && 274 key == t->parms.i_key && 275 link == t->parms.link && 276 type == t->dev->type) 277 break; 278 } 279 return t; 280 } 281 282 static struct net_device *__ip_tunnel_create(struct net *net, 283 const struct rtnl_link_ops *ops, 284 struct ip_tunnel_parm *parms) 285 { 286 int err; 287 struct ip_tunnel *tunnel; 288 struct net_device *dev; 289 char name[IFNAMSIZ]; 290 291 if (parms->name[0]) 292 strlcpy(name, parms->name, IFNAMSIZ); 293 else { 294 if (strlen(ops->kind) > (IFNAMSIZ - 3)) { 295 err = -E2BIG; 296 goto failed; 297 } 298 strlcpy(name, ops->kind, IFNAMSIZ); 299 strncat(name, "%d", 2); 300 } 301 302 ASSERT_RTNL(); 303 dev = alloc_netdev(ops->priv_size, name, ops->setup); 304 if (!dev) { 305 err = -ENOMEM; 306 goto failed; 307 } 308 dev_net_set(dev, net); 309 310 dev->rtnl_link_ops = ops; 311 312 tunnel = netdev_priv(dev); 313 tunnel->parms = *parms; 314 tunnel->net = net; 315 316 err = register_netdevice(dev); 317 if (err) 318 goto failed_free; 319 320 return dev; 321 322 failed_free: 323 free_netdev(dev); 324 failed: 325 return ERR_PTR(err); 326 } 327 328 static inline void init_tunnel_flow(struct flowi4 *fl4, 329 int proto, 330 __be32 daddr, __be32 saddr, 331 __be32 key, __u8 tos, int oif) 332 { 333 memset(fl4, 0, sizeof(*fl4)); 334 fl4->flowi4_oif = oif; 335 fl4->daddr = daddr; 336 fl4->saddr = saddr; 337 fl4->flowi4_tos = tos; 338 fl4->flowi4_proto = proto; 339 fl4->fl4_gre_key = key; 340 } 341 342 static int ip_tunnel_bind_dev(struct net_device *dev) 343 { 344 struct net_device *tdev = NULL; 345 struct ip_tunnel *tunnel = netdev_priv(dev); 346 const struct iphdr *iph; 347 int hlen = LL_MAX_HEADER; 348 int mtu = ETH_DATA_LEN; 349 int t_hlen = tunnel->hlen + sizeof(struct iphdr); 350 351 iph = &tunnel->parms.iph; 352 353 /* Guess output device to choose reasonable mtu and needed_headroom */ 354 if (iph->daddr) { 355 struct flowi4 fl4; 356 struct rtable *rt; 357 358 init_tunnel_flow(&fl4, iph->protocol, iph->daddr, 359 iph->saddr, tunnel->parms.o_key, 360 RT_TOS(iph->tos), tunnel->parms.link); 361 rt = ip_route_output_key(tunnel->net, &fl4); 362 363 if (!IS_ERR(rt)) { 364 tdev = rt->dst.dev; 365 tunnel_dst_set(tunnel, &rt->dst); 366 ip_rt_put(rt); 367 } 368 if (dev->type != ARPHRD_ETHER) 369 dev->flags |= IFF_POINTOPOINT; 370 } 371 372 if (!tdev && tunnel->parms.link) 373 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); 374 375 if (tdev) { 376 hlen = tdev->hard_header_len + tdev->needed_headroom; 377 mtu = tdev->mtu; 378 } 379 dev->iflink = tunnel->parms.link; 380 381 dev->needed_headroom = t_hlen + hlen; 382 mtu -= (dev->hard_header_len + t_hlen); 383 384 if (mtu < 68) 385 mtu = 68; 386 387 return mtu; 388 } 389 390 static struct ip_tunnel *ip_tunnel_create(struct net *net, 391 struct ip_tunnel_net *itn, 392 struct ip_tunnel_parm *parms) 393 { 394 struct ip_tunnel *nt, *fbt; 395 struct net_device *dev; 396 397 BUG_ON(!itn->fb_tunnel_dev); 398 fbt = netdev_priv(itn->fb_tunnel_dev); 399 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms); 400 if (IS_ERR(dev)) 401 return NULL; 402 403 dev->mtu = ip_tunnel_bind_dev(dev); 404 405 nt = netdev_priv(dev); 406 ip_tunnel_add(itn, nt); 407 return nt; 408 } 409 410 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, 411 const struct tnl_ptk_info *tpi, bool log_ecn_error) 412 { 413 struct pcpu_sw_netstats *tstats; 414 const struct iphdr *iph = ip_hdr(skb); 415 int err; 416 417 #ifdef CONFIG_NET_IPGRE_BROADCAST 418 if (ipv4_is_multicast(iph->daddr)) { 419 /* Looped back packet, drop it! */ 420 if (rt_is_output_route(skb_rtable(skb))) 421 goto drop; 422 tunnel->dev->stats.multicast++; 423 skb->pkt_type = PACKET_BROADCAST; 424 } 425 #endif 426 427 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) || 428 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) { 429 tunnel->dev->stats.rx_crc_errors++; 430 tunnel->dev->stats.rx_errors++; 431 goto drop; 432 } 433 434 if (tunnel->parms.i_flags&TUNNEL_SEQ) { 435 if (!(tpi->flags&TUNNEL_SEQ) || 436 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { 437 tunnel->dev->stats.rx_fifo_errors++; 438 tunnel->dev->stats.rx_errors++; 439 goto drop; 440 } 441 tunnel->i_seqno = ntohl(tpi->seq) + 1; 442 } 443 444 err = IP_ECN_decapsulate(iph, skb); 445 if (unlikely(err)) { 446 if (log_ecn_error) 447 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", 448 &iph->saddr, iph->tos); 449 if (err > 1) { 450 ++tunnel->dev->stats.rx_frame_errors; 451 ++tunnel->dev->stats.rx_errors; 452 goto drop; 453 } 454 } 455 456 tstats = this_cpu_ptr(tunnel->dev->tstats); 457 u64_stats_update_begin(&tstats->syncp); 458 tstats->rx_packets++; 459 tstats->rx_bytes += skb->len; 460 u64_stats_update_end(&tstats->syncp); 461 462 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev))); 463 464 if (tunnel->dev->type == ARPHRD_ETHER) { 465 skb->protocol = eth_type_trans(skb, tunnel->dev); 466 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 467 } else { 468 skb->dev = tunnel->dev; 469 } 470 471 gro_cells_receive(&tunnel->gro_cells, skb); 472 return 0; 473 474 drop: 475 kfree_skb(skb); 476 return 0; 477 } 478 EXPORT_SYMBOL_GPL(ip_tunnel_rcv); 479 480 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, 481 struct rtable *rt, __be16 df) 482 { 483 struct ip_tunnel *tunnel = netdev_priv(dev); 484 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len; 485 int mtu; 486 487 if (df) 488 mtu = dst_mtu(&rt->dst) - dev->hard_header_len 489 - sizeof(struct iphdr) - tunnel->hlen; 490 else 491 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; 492 493 if (skb_dst(skb)) 494 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); 495 496 if (skb->protocol == htons(ETH_P_IP)) { 497 if (!skb_is_gso(skb) && 498 (df & htons(IP_DF)) && mtu < pkt_size) { 499 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 500 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 501 return -E2BIG; 502 } 503 } 504 #if IS_ENABLED(CONFIG_IPV6) 505 else if (skb->protocol == htons(ETH_P_IPV6)) { 506 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); 507 508 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && 509 mtu >= IPV6_MIN_MTU) { 510 if ((tunnel->parms.iph.daddr && 511 !ipv4_is_multicast(tunnel->parms.iph.daddr)) || 512 rt6->rt6i_dst.plen == 128) { 513 rt6->rt6i_flags |= RTF_MODIFIED; 514 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); 515 } 516 } 517 518 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && 519 mtu < pkt_size) { 520 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 521 return -E2BIG; 522 } 523 } 524 #endif 525 return 0; 526 } 527 528 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, 529 const struct iphdr *tnl_params, const u8 protocol) 530 { 531 struct ip_tunnel *tunnel = netdev_priv(dev); 532 const struct iphdr *inner_iph; 533 struct flowi4 fl4; 534 u8 tos, ttl; 535 __be16 df; 536 struct rtable *rt; /* Route to the other host */ 537 unsigned int max_headroom; /* The extra header space needed */ 538 __be32 dst; 539 int err; 540 bool connected = true; 541 542 inner_iph = (const struct iphdr *)skb_inner_network_header(skb); 543 544 dst = tnl_params->daddr; 545 if (dst == 0) { 546 /* NBMA tunnel */ 547 548 if (skb_dst(skb) == NULL) { 549 dev->stats.tx_fifo_errors++; 550 goto tx_error; 551 } 552 553 if (skb->protocol == htons(ETH_P_IP)) { 554 rt = skb_rtable(skb); 555 dst = rt_nexthop(rt, inner_iph->daddr); 556 } 557 #if IS_ENABLED(CONFIG_IPV6) 558 else if (skb->protocol == htons(ETH_P_IPV6)) { 559 const struct in6_addr *addr6; 560 struct neighbour *neigh; 561 bool do_tx_error_icmp; 562 int addr_type; 563 564 neigh = dst_neigh_lookup(skb_dst(skb), 565 &ipv6_hdr(skb)->daddr); 566 if (neigh == NULL) 567 goto tx_error; 568 569 addr6 = (const struct in6_addr *)&neigh->primary_key; 570 addr_type = ipv6_addr_type(addr6); 571 572 if (addr_type == IPV6_ADDR_ANY) { 573 addr6 = &ipv6_hdr(skb)->daddr; 574 addr_type = ipv6_addr_type(addr6); 575 } 576 577 if ((addr_type & IPV6_ADDR_COMPATv4) == 0) 578 do_tx_error_icmp = true; 579 else { 580 do_tx_error_icmp = false; 581 dst = addr6->s6_addr32[3]; 582 } 583 neigh_release(neigh); 584 if (do_tx_error_icmp) 585 goto tx_error_icmp; 586 } 587 #endif 588 else 589 goto tx_error; 590 591 connected = false; 592 } 593 594 tos = tnl_params->tos; 595 if (tos & 0x1) { 596 tos &= ~0x1; 597 if (skb->protocol == htons(ETH_P_IP)) { 598 tos = inner_iph->tos; 599 connected = false; 600 } else if (skb->protocol == htons(ETH_P_IPV6)) { 601 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); 602 connected = false; 603 } 604 } 605 606 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, 607 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link); 608 609 rt = connected ? tunnel_rtable_get(tunnel, 0) : NULL; 610 611 if (!rt) { 612 rt = ip_route_output_key(tunnel->net, &fl4); 613 614 if (IS_ERR(rt)) { 615 dev->stats.tx_carrier_errors++; 616 goto tx_error; 617 } 618 if (connected) 619 tunnel_dst_set(tunnel, &rt->dst); 620 } 621 622 if (rt->dst.dev == dev) { 623 ip_rt_put(rt); 624 dev->stats.collisions++; 625 goto tx_error; 626 } 627 628 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) { 629 ip_rt_put(rt); 630 goto tx_error; 631 } 632 633 if (tunnel->err_count > 0) { 634 if (time_before(jiffies, 635 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { 636 tunnel->err_count--; 637 638 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 639 dst_link_failure(skb); 640 } else 641 tunnel->err_count = 0; 642 } 643 644 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); 645 ttl = tnl_params->ttl; 646 if (ttl == 0) { 647 if (skb->protocol == htons(ETH_P_IP)) 648 ttl = inner_iph->ttl; 649 #if IS_ENABLED(CONFIG_IPV6) 650 else if (skb->protocol == htons(ETH_P_IPV6)) 651 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit; 652 #endif 653 else 654 ttl = ip4_dst_hoplimit(&rt->dst); 655 } 656 657 df = tnl_params->frag_off; 658 if (skb->protocol == htons(ETH_P_IP)) 659 df |= (inner_iph->frag_off&htons(IP_DF)); 660 661 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) 662 + rt->dst.header_len; 663 if (max_headroom > dev->needed_headroom) 664 dev->needed_headroom = max_headroom; 665 666 if (skb_cow_head(skb, dev->needed_headroom)) { 667 dev->stats.tx_dropped++; 668 kfree_skb(skb); 669 return; 670 } 671 672 err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol, 673 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); 674 iptunnel_xmit_stats(err, &dev->stats, dev->tstats); 675 676 return; 677 678 #if IS_ENABLED(CONFIG_IPV6) 679 tx_error_icmp: 680 dst_link_failure(skb); 681 #endif 682 tx_error: 683 dev->stats.tx_errors++; 684 kfree_skb(skb); 685 } 686 EXPORT_SYMBOL_GPL(ip_tunnel_xmit); 687 688 static void ip_tunnel_update(struct ip_tunnel_net *itn, 689 struct ip_tunnel *t, 690 struct net_device *dev, 691 struct ip_tunnel_parm *p, 692 bool set_mtu) 693 { 694 ip_tunnel_del(t); 695 t->parms.iph.saddr = p->iph.saddr; 696 t->parms.iph.daddr = p->iph.daddr; 697 t->parms.i_key = p->i_key; 698 t->parms.o_key = p->o_key; 699 if (dev->type != ARPHRD_ETHER) { 700 memcpy(dev->dev_addr, &p->iph.saddr, 4); 701 memcpy(dev->broadcast, &p->iph.daddr, 4); 702 } 703 ip_tunnel_add(itn, t); 704 705 t->parms.iph.ttl = p->iph.ttl; 706 t->parms.iph.tos = p->iph.tos; 707 t->parms.iph.frag_off = p->iph.frag_off; 708 709 if (t->parms.link != p->link) { 710 int mtu; 711 712 t->parms.link = p->link; 713 mtu = ip_tunnel_bind_dev(dev); 714 if (set_mtu) 715 dev->mtu = mtu; 716 } 717 ip_tunnel_dst_reset_all(t); 718 netdev_state_change(dev); 719 } 720 721 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) 722 { 723 int err = 0; 724 struct ip_tunnel *t; 725 struct net *net = dev_net(dev); 726 struct ip_tunnel *tunnel = netdev_priv(dev); 727 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); 728 729 BUG_ON(!itn->fb_tunnel_dev); 730 switch (cmd) { 731 case SIOCGETTUNNEL: 732 t = NULL; 733 if (dev == itn->fb_tunnel_dev) 734 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 735 if (t == NULL) 736 t = netdev_priv(dev); 737 memcpy(p, &t->parms, sizeof(*p)); 738 break; 739 740 case SIOCADDTUNNEL: 741 case SIOCCHGTUNNEL: 742 err = -EPERM; 743 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 744 goto done; 745 if (p->iph.ttl) 746 p->iph.frag_off |= htons(IP_DF); 747 if (!(p->i_flags&TUNNEL_KEY)) 748 p->i_key = 0; 749 if (!(p->o_flags&TUNNEL_KEY)) 750 p->o_key = 0; 751 752 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 753 754 if (!t && (cmd == SIOCADDTUNNEL)) 755 t = ip_tunnel_create(net, itn, p); 756 757 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { 758 if (t != NULL) { 759 if (t->dev != dev) { 760 err = -EEXIST; 761 break; 762 } 763 } else { 764 unsigned int nflags = 0; 765 766 if (ipv4_is_multicast(p->iph.daddr)) 767 nflags = IFF_BROADCAST; 768 else if (p->iph.daddr) 769 nflags = IFF_POINTOPOINT; 770 771 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { 772 err = -EINVAL; 773 break; 774 } 775 776 t = netdev_priv(dev); 777 } 778 } 779 780 if (t) { 781 err = 0; 782 ip_tunnel_update(itn, t, dev, p, true); 783 } else 784 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); 785 break; 786 787 case SIOCDELTUNNEL: 788 err = -EPERM; 789 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 790 goto done; 791 792 if (dev == itn->fb_tunnel_dev) { 793 err = -ENOENT; 794 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); 795 if (t == NULL) 796 goto done; 797 err = -EPERM; 798 if (t == netdev_priv(itn->fb_tunnel_dev)) 799 goto done; 800 dev = t->dev; 801 } 802 unregister_netdevice(dev); 803 err = 0; 804 break; 805 806 default: 807 err = -EINVAL; 808 } 809 810 done: 811 return err; 812 } 813 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); 814 815 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) 816 { 817 struct ip_tunnel *tunnel = netdev_priv(dev); 818 int t_hlen = tunnel->hlen + sizeof(struct iphdr); 819 820 if (new_mtu < 68 || 821 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen) 822 return -EINVAL; 823 dev->mtu = new_mtu; 824 return 0; 825 } 826 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu); 827 828 static void ip_tunnel_dev_free(struct net_device *dev) 829 { 830 struct ip_tunnel *tunnel = netdev_priv(dev); 831 832 gro_cells_destroy(&tunnel->gro_cells); 833 free_percpu(tunnel->dst_cache); 834 free_percpu(dev->tstats); 835 free_netdev(dev); 836 } 837 838 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) 839 { 840 struct ip_tunnel *tunnel = netdev_priv(dev); 841 struct ip_tunnel_net *itn; 842 843 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id); 844 845 if (itn->fb_tunnel_dev != dev) { 846 ip_tunnel_del(netdev_priv(dev)); 847 unregister_netdevice_queue(dev, head); 848 } 849 } 850 EXPORT_SYMBOL_GPL(ip_tunnel_dellink); 851 852 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, 853 struct rtnl_link_ops *ops, char *devname) 854 { 855 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id); 856 struct ip_tunnel_parm parms; 857 unsigned int i; 858 859 for (i = 0; i < IP_TNL_HASH_SIZE; i++) 860 INIT_HLIST_HEAD(&itn->tunnels[i]); 861 862 if (!ops) { 863 itn->fb_tunnel_dev = NULL; 864 return 0; 865 } 866 867 memset(&parms, 0, sizeof(parms)); 868 if (devname) 869 strlcpy(parms.name, devname, IFNAMSIZ); 870 871 rtnl_lock(); 872 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms); 873 /* FB netdevice is special: we have one, and only one per netns. 874 * Allowing to move it to another netns is clearly unsafe. 875 */ 876 if (!IS_ERR(itn->fb_tunnel_dev)) { 877 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; 878 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev)); 879 } 880 rtnl_unlock(); 881 882 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev); 883 } 884 EXPORT_SYMBOL_GPL(ip_tunnel_init_net); 885 886 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head, 887 struct rtnl_link_ops *ops) 888 { 889 struct net *net = dev_net(itn->fb_tunnel_dev); 890 struct net_device *dev, *aux; 891 int h; 892 893 for_each_netdev_safe(net, dev, aux) 894 if (dev->rtnl_link_ops == ops) 895 unregister_netdevice_queue(dev, head); 896 897 for (h = 0; h < IP_TNL_HASH_SIZE; h++) { 898 struct ip_tunnel *t; 899 struct hlist_node *n; 900 struct hlist_head *thead = &itn->tunnels[h]; 901 902 hlist_for_each_entry_safe(t, n, thead, hash_node) 903 /* If dev is in the same netns, it has already 904 * been added to the list by the previous loop. 905 */ 906 if (!net_eq(dev_net(t->dev), net)) 907 unregister_netdevice_queue(t->dev, head); 908 } 909 } 910 911 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops) 912 { 913 LIST_HEAD(list); 914 915 rtnl_lock(); 916 ip_tunnel_destroy(itn, &list, ops); 917 unregister_netdevice_many(&list); 918 rtnl_unlock(); 919 } 920 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net); 921 922 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], 923 struct ip_tunnel_parm *p) 924 { 925 struct ip_tunnel *nt; 926 struct net *net = dev_net(dev); 927 struct ip_tunnel_net *itn; 928 int mtu; 929 int err; 930 931 nt = netdev_priv(dev); 932 itn = net_generic(net, nt->ip_tnl_net_id); 933 934 if (ip_tunnel_find(itn, p, dev->type)) 935 return -EEXIST; 936 937 nt->net = net; 938 nt->parms = *p; 939 err = register_netdevice(dev); 940 if (err) 941 goto out; 942 943 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) 944 eth_hw_addr_random(dev); 945 946 mtu = ip_tunnel_bind_dev(dev); 947 if (!tb[IFLA_MTU]) 948 dev->mtu = mtu; 949 950 ip_tunnel_add(itn, nt); 951 952 out: 953 return err; 954 } 955 EXPORT_SYMBOL_GPL(ip_tunnel_newlink); 956 957 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], 958 struct ip_tunnel_parm *p) 959 { 960 struct ip_tunnel *t; 961 struct ip_tunnel *tunnel = netdev_priv(dev); 962 struct net *net = tunnel->net; 963 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); 964 965 if (dev == itn->fb_tunnel_dev) 966 return -EINVAL; 967 968 t = ip_tunnel_find(itn, p, dev->type); 969 970 if (t) { 971 if (t->dev != dev) 972 return -EEXIST; 973 } else { 974 t = tunnel; 975 976 if (dev->type != ARPHRD_ETHER) { 977 unsigned int nflags = 0; 978 979 if (ipv4_is_multicast(p->iph.daddr)) 980 nflags = IFF_BROADCAST; 981 else if (p->iph.daddr) 982 nflags = IFF_POINTOPOINT; 983 984 if ((dev->flags ^ nflags) & 985 (IFF_POINTOPOINT | IFF_BROADCAST)) 986 return -EINVAL; 987 } 988 } 989 990 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]); 991 return 0; 992 } 993 EXPORT_SYMBOL_GPL(ip_tunnel_changelink); 994 995 int ip_tunnel_init(struct net_device *dev) 996 { 997 struct ip_tunnel *tunnel = netdev_priv(dev); 998 struct iphdr *iph = &tunnel->parms.iph; 999 int i, err; 1000 1001 dev->destructor = ip_tunnel_dev_free; 1002 dev->tstats = alloc_percpu(struct pcpu_sw_netstats); 1003 if (!dev->tstats) 1004 return -ENOMEM; 1005 1006 for_each_possible_cpu(i) { 1007 struct pcpu_sw_netstats *ipt_stats; 1008 ipt_stats = per_cpu_ptr(dev->tstats, i); 1009 u64_stats_init(&ipt_stats->syncp); 1010 } 1011 1012 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); 1013 if (!tunnel->dst_cache) { 1014 free_percpu(dev->tstats); 1015 return -ENOMEM; 1016 } 1017 1018 err = gro_cells_init(&tunnel->gro_cells, dev); 1019 if (err) { 1020 free_percpu(tunnel->dst_cache); 1021 free_percpu(dev->tstats); 1022 return err; 1023 } 1024 1025 tunnel->dev = dev; 1026 tunnel->net = dev_net(dev); 1027 strcpy(tunnel->parms.name, dev->name); 1028 iph->version = 4; 1029 iph->ihl = 5; 1030 1031 return 0; 1032 } 1033 EXPORT_SYMBOL_GPL(ip_tunnel_init); 1034 1035 void ip_tunnel_uninit(struct net_device *dev) 1036 { 1037 struct ip_tunnel *tunnel = netdev_priv(dev); 1038 struct net *net = tunnel->net; 1039 struct ip_tunnel_net *itn; 1040 1041 itn = net_generic(net, tunnel->ip_tnl_net_id); 1042 /* fb_tunnel_dev will be unregisted in net-exit call. */ 1043 if (itn->fb_tunnel_dev != dev) 1044 ip_tunnel_del(netdev_priv(dev)); 1045 1046 ip_tunnel_dst_reset_all(tunnel); 1047 } 1048 EXPORT_SYMBOL_GPL(ip_tunnel_uninit); 1049 1050 /* Do least required initialization, rest of init is done in tunnel_init call */ 1051 void ip_tunnel_setup(struct net_device *dev, int net_id) 1052 { 1053 struct ip_tunnel *tunnel = netdev_priv(dev); 1054 tunnel->ip_tnl_net_id = net_id; 1055 } 1056 EXPORT_SYMBOL_GPL(ip_tunnel_setup); 1057 1058 MODULE_LICENSE("GPL"); 1059