1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Linux NET3: GRE over IP protocol decoder. 4 * 5 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru) 6 */ 7 8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 9 10 #include <linux/capability.h> 11 #include <linux/module.h> 12 #include <linux/types.h> 13 #include <linux/kernel.h> 14 #include <linux/slab.h> 15 #include <linux/uaccess.h> 16 #include <linux/skbuff.h> 17 #include <linux/netdevice.h> 18 #include <linux/in.h> 19 #include <linux/tcp.h> 20 #include <linux/udp.h> 21 #include <linux/if_arp.h> 22 #include <linux/if_vlan.h> 23 #include <linux/init.h> 24 #include <linux/in6.h> 25 #include <linux/inetdevice.h> 26 #include <linux/igmp.h> 27 #include <linux/netfilter_ipv4.h> 28 #include <linux/etherdevice.h> 29 #include <linux/if_ether.h> 30 31 #include <net/sock.h> 32 #include <net/ip.h> 33 #include <net/icmp.h> 34 #include <net/protocol.h> 35 #include <net/ip_tunnels.h> 36 #include <net/arp.h> 37 #include <net/checksum.h> 38 #include <net/dsfield.h> 39 #include <net/inet_ecn.h> 40 #include <net/xfrm.h> 41 #include <net/net_namespace.h> 42 #include <net/netns/generic.h> 43 #include <net/rtnetlink.h> 44 #include <net/gre.h> 45 #include <net/dst_metadata.h> 46 #include <net/erspan.h> 47 48 /* 49 Problems & solutions 50 -------------------- 51 52 1. The most important issue is detecting local dead loops. 53 They would cause complete host lockup in transmit, which 54 would be "resolved" by stack overflow or, if queueing is enabled, 55 with infinite looping in net_bh. 56 57 We cannot track such dead loops during route installation, 58 it is infeasible task. The most general solutions would be 59 to keep skb->encapsulation counter (sort of local ttl), 60 and silently drop packet when it expires. It is a good 61 solution, but it supposes maintaining new variable in ALL 62 skb, even if no tunneling is used. 63 64 Current solution: xmit_recursion breaks dead loops. This is a percpu 65 counter, since when we enter the first ndo_xmit(), cpu migration is 66 forbidden. We force an exit if this counter reaches RECURSION_LIMIT 67 68 2. Networking dead loops would not kill routers, but would really 69 kill network. IP hop limit plays role of "t->recursion" in this case, 70 if we copy it from packet being encapsulated to upper header. 71 It is very good solution, but it introduces two problems: 72 73 - Routing protocols, using packets with ttl=1 (OSPF, RIP2), 74 do not work over tunnels. 75 - traceroute does not work. I planned to relay ICMP from tunnel, 76 so that this problem would be solved and traceroute output 77 would even more informative. This idea appeared to be wrong: 78 only Linux complies to rfc1812 now (yes, guys, Linux is the only 79 true router now :-)), all routers (at least, in neighbourhood of mine) 80 return only 8 bytes of payload. It is the end. 81 82 Hence, if we want that OSPF worked or traceroute said something reasonable, 83 we should search for another solution. 84 85 One of them is to parse packet trying to detect inner encapsulation 86 made by our node. It is difficult or even impossible, especially, 87 taking into account fragmentation. TO be short, ttl is not solution at all. 88 89 Current solution: The solution was UNEXPECTEDLY SIMPLE. 90 We force DF flag on tunnels with preconfigured hop limit, 91 that is ALL. :-) Well, it does not remove the problem completely, 92 but exponential growth of network traffic is changed to linear 93 (branches, that exceed pmtu are pruned) and tunnel mtu 94 rapidly degrades to value <68, where looping stops. 95 Yes, it is not good if there exists a router in the loop, 96 which does not force DF, even when encapsulating packets have DF set. 97 But it is not our problem! Nobody could accuse us, we made 98 all that we could make. Even if it is your gated who injected 99 fatal route to network, even if it were you who configured 100 fatal static route: you are innocent. :-) 101 102 Alexey Kuznetsov. 103 */ 104 105 static bool log_ecn_error = true; 106 module_param(log_ecn_error, bool, 0644); 107 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); 108 109 static struct rtnl_link_ops ipgre_link_ops __read_mostly; 110 static int ipgre_tunnel_init(struct net_device *dev); 111 static void erspan_build_header(struct sk_buff *skb, 112 u32 id, u32 index, 113 bool truncate, bool is_ipv4); 114 115 static unsigned int ipgre_net_id __read_mostly; 116 static unsigned int gre_tap_net_id __read_mostly; 117 static unsigned int erspan_net_id __read_mostly; 118 119 static int ipgre_err(struct sk_buff *skb, u32 info, 120 const struct tnl_ptk_info *tpi) 121 { 122 123 /* All the routers (except for Linux) return only 124 8 bytes of packet payload. It means, that precise relaying of 125 ICMP in the real Internet is absolutely infeasible. 126 127 Moreover, Cisco "wise men" put GRE key to the third word 128 in GRE header. It makes impossible maintaining even soft 129 state for keyed GRE tunnels with enabled checksum. Tell 130 them "thank you". 131 132 Well, I wonder, rfc1812 was written by Cisco employee, 133 what the hell these idiots break standards established 134 by themselves??? 135 */ 136 struct net *net = dev_net(skb->dev); 137 struct ip_tunnel_net *itn; 138 const struct iphdr *iph; 139 const int type = icmp_hdr(skb)->type; 140 const int code = icmp_hdr(skb)->code; 141 unsigned int data_len = 0; 142 struct ip_tunnel *t; 143 144 if (tpi->proto == htons(ETH_P_TEB)) 145 itn = net_generic(net, gre_tap_net_id); 146 else if (tpi->proto == htons(ETH_P_ERSPAN) || 147 tpi->proto == htons(ETH_P_ERSPAN2)) 148 itn = net_generic(net, erspan_net_id); 149 else 150 itn = net_generic(net, ipgre_net_id); 151 152 iph = (const struct iphdr *)(icmp_hdr(skb) + 1); 153 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, 154 iph->daddr, iph->saddr, tpi->key); 155 156 if (!t) 157 return -ENOENT; 158 159 switch (type) { 160 default: 161 case ICMP_PARAMETERPROB: 162 return 0; 163 164 case ICMP_DEST_UNREACH: 165 switch (code) { 166 case ICMP_SR_FAILED: 167 case ICMP_PORT_UNREACH: 168 /* Impossible event. */ 169 return 0; 170 default: 171 /* All others are translated to HOST_UNREACH. 172 rfc2003 contains "deep thoughts" about NET_UNREACH, 173 I believe they are just ether pollution. --ANK 174 */ 175 break; 176 } 177 break; 178 179 case ICMP_TIME_EXCEEDED: 180 if (code != ICMP_EXC_TTL) 181 return 0; 182 data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */ 183 break; 184 185 case ICMP_REDIRECT: 186 break; 187 } 188 189 #if IS_ENABLED(CONFIG_IPV6) 190 if (tpi->proto == htons(ETH_P_IPV6) && 191 !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len, 192 type, data_len)) 193 return 0; 194 #endif 195 196 if (t->parms.iph.daddr == 0 || 197 ipv4_is_multicast(t->parms.iph.daddr)) 198 return 0; 199 200 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) 201 return 0; 202 203 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) 204 t->err_count++; 205 else 206 t->err_count = 1; 207 t->err_time = jiffies; 208 209 return 0; 210 } 211 212 static void gre_err(struct sk_buff *skb, u32 info) 213 { 214 /* All the routers (except for Linux) return only 215 * 8 bytes of packet payload. It means, that precise relaying of 216 * ICMP in the real Internet is absolutely infeasible. 217 * 218 * Moreover, Cisco "wise men" put GRE key to the third word 219 * in GRE header. It makes impossible maintaining even soft 220 * state for keyed 221 * GRE tunnels with enabled checksum. Tell them "thank you". 222 * 223 * Well, I wonder, rfc1812 was written by Cisco employee, 224 * what the hell these idiots break standards established 225 * by themselves??? 226 */ 227 228 const struct iphdr *iph = (struct iphdr *)skb->data; 229 const int type = icmp_hdr(skb)->type; 230 const int code = icmp_hdr(skb)->code; 231 struct tnl_ptk_info tpi; 232 233 if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP), 234 iph->ihl * 4) < 0) 235 return; 236 237 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { 238 ipv4_update_pmtu(skb, dev_net(skb->dev), info, 239 skb->dev->ifindex, IPPROTO_GRE); 240 return; 241 } 242 if (type == ICMP_REDIRECT) { 243 ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 244 IPPROTO_GRE); 245 return; 246 } 247 248 ipgre_err(skb, info, &tpi); 249 } 250 251 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, 252 int gre_hdr_len) 253 { 254 struct net *net = dev_net(skb->dev); 255 struct metadata_dst *tun_dst = NULL; 256 struct erspan_base_hdr *ershdr; 257 struct ip_tunnel_net *itn; 258 struct ip_tunnel *tunnel; 259 const struct iphdr *iph; 260 struct erspan_md2 *md2; 261 int ver; 262 int len; 263 264 itn = net_generic(net, erspan_net_id); 265 266 iph = ip_hdr(skb); 267 ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len); 268 ver = ershdr->ver; 269 270 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, 271 tpi->flags | TUNNEL_KEY, 272 iph->saddr, iph->daddr, tpi->key); 273 274 if (tunnel) { 275 len = gre_hdr_len + erspan_hdr_len(ver); 276 if (unlikely(!pskb_may_pull(skb, len))) 277 return PACKET_REJECT; 278 279 if (__iptunnel_pull_header(skb, 280 len, 281 htons(ETH_P_TEB), 282 false, false) < 0) 283 goto drop; 284 285 if (tunnel->collect_md) { 286 struct erspan_metadata *pkt_md, *md; 287 struct ip_tunnel_info *info; 288 unsigned char *gh; 289 __be64 tun_id; 290 __be16 flags; 291 292 tpi->flags |= TUNNEL_KEY; 293 flags = tpi->flags; 294 tun_id = key32_to_tunnel_id(tpi->key); 295 296 tun_dst = ip_tun_rx_dst(skb, flags, 297 tun_id, sizeof(*md)); 298 if (!tun_dst) 299 return PACKET_REJECT; 300 301 /* skb can be uncloned in __iptunnel_pull_header, so 302 * old pkt_md is no longer valid and we need to reset 303 * it 304 */ 305 gh = skb_network_header(skb) + 306 skb_network_header_len(skb); 307 pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len + 308 sizeof(*ershdr)); 309 md = ip_tunnel_info_opts(&tun_dst->u.tun_info); 310 md->version = ver; 311 md2 = &md->u.md2; 312 memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE : 313 ERSPAN_V2_MDSIZE); 314 315 info = &tun_dst->u.tun_info; 316 info->key.tun_flags |= TUNNEL_ERSPAN_OPT; 317 info->options_len = sizeof(*md); 318 } 319 320 skb_reset_mac_header(skb); 321 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); 322 return PACKET_RCVD; 323 } 324 return PACKET_REJECT; 325 326 drop: 327 kfree_skb(skb); 328 return PACKET_RCVD; 329 } 330 331 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi, 332 struct ip_tunnel_net *itn, int hdr_len, bool raw_proto) 333 { 334 struct metadata_dst *tun_dst = NULL; 335 const struct iphdr *iph; 336 struct ip_tunnel *tunnel; 337 338 iph = ip_hdr(skb); 339 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, 340 iph->saddr, iph->daddr, tpi->key); 341 342 if (tunnel) { 343 if (__iptunnel_pull_header(skb, hdr_len, tpi->proto, 344 raw_proto, false) < 0) 345 goto drop; 346 347 if (tunnel->dev->type != ARPHRD_NONE) 348 skb_pop_mac_header(skb); 349 else 350 skb_reset_mac_header(skb); 351 if (tunnel->collect_md) { 352 __be16 flags; 353 __be64 tun_id; 354 355 flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY); 356 tun_id = key32_to_tunnel_id(tpi->key); 357 tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0); 358 if (!tun_dst) 359 return PACKET_REJECT; 360 } 361 362 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); 363 return PACKET_RCVD; 364 } 365 return PACKET_NEXT; 366 367 drop: 368 kfree_skb(skb); 369 return PACKET_RCVD; 370 } 371 372 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi, 373 int hdr_len) 374 { 375 struct net *net = dev_net(skb->dev); 376 struct ip_tunnel_net *itn; 377 int res; 378 379 if (tpi->proto == htons(ETH_P_TEB)) 380 itn = net_generic(net, gre_tap_net_id); 381 else 382 itn = net_generic(net, ipgre_net_id); 383 384 res = __ipgre_rcv(skb, tpi, itn, hdr_len, false); 385 if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) { 386 /* ipgre tunnels in collect metadata mode should receive 387 * also ETH_P_TEB traffic. 388 */ 389 itn = net_generic(net, ipgre_net_id); 390 res = __ipgre_rcv(skb, tpi, itn, hdr_len, true); 391 } 392 return res; 393 } 394 395 static int gre_rcv(struct sk_buff *skb) 396 { 397 struct tnl_ptk_info tpi; 398 bool csum_err = false; 399 int hdr_len; 400 401 #ifdef CONFIG_NET_IPGRE_BROADCAST 402 if (ipv4_is_multicast(ip_hdr(skb)->daddr)) { 403 /* Looped back packet, drop it! */ 404 if (rt_is_output_route(skb_rtable(skb))) 405 goto drop; 406 } 407 #endif 408 409 hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0); 410 if (hdr_len < 0) 411 goto drop; 412 413 if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) || 414 tpi.proto == htons(ETH_P_ERSPAN2))) { 415 if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) 416 return 0; 417 goto out; 418 } 419 420 if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) 421 return 0; 422 423 out: 424 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 425 drop: 426 kfree_skb(skb); 427 return 0; 428 } 429 430 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, 431 const struct iphdr *tnl_params, 432 __be16 proto) 433 { 434 struct ip_tunnel *tunnel = netdev_priv(dev); 435 436 if (tunnel->parms.o_flags & TUNNEL_SEQ) 437 tunnel->o_seqno++; 438 439 /* Push GRE header. */ 440 gre_build_header(skb, tunnel->tun_hlen, 441 tunnel->parms.o_flags, proto, tunnel->parms.o_key, 442 htonl(tunnel->o_seqno)); 443 444 ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); 445 } 446 447 static int gre_handle_offloads(struct sk_buff *skb, bool csum) 448 { 449 return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); 450 } 451 452 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, 453 __be16 proto) 454 { 455 struct ip_tunnel *tunnel = netdev_priv(dev); 456 struct ip_tunnel_info *tun_info; 457 const struct ip_tunnel_key *key; 458 int tunnel_hlen; 459 __be16 flags; 460 461 tun_info = skb_tunnel_info(skb); 462 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || 463 ip_tunnel_info_af(tun_info) != AF_INET)) 464 goto err_free_skb; 465 466 key = &tun_info->key; 467 tunnel_hlen = gre_calc_hlen(key->tun_flags); 468 469 if (skb_cow_head(skb, dev->needed_headroom)) 470 goto err_free_skb; 471 472 /* Push Tunnel header. */ 473 if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM))) 474 goto err_free_skb; 475 476 flags = tun_info->key.tun_flags & 477 (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ); 478 gre_build_header(skb, tunnel_hlen, flags, proto, 479 tunnel_id_to_key32(tun_info->key.tun_id), 480 (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0); 481 482 ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen); 483 484 return; 485 486 err_free_skb: 487 kfree_skb(skb); 488 dev->stats.tx_dropped++; 489 } 490 491 static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) 492 { 493 struct ip_tunnel *tunnel = netdev_priv(dev); 494 struct ip_tunnel_info *tun_info; 495 const struct ip_tunnel_key *key; 496 struct erspan_metadata *md; 497 bool truncate = false; 498 __be16 proto; 499 int tunnel_hlen; 500 int version; 501 int nhoff; 502 int thoff; 503 504 tun_info = skb_tunnel_info(skb); 505 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || 506 ip_tunnel_info_af(tun_info) != AF_INET)) 507 goto err_free_skb; 508 509 key = &tun_info->key; 510 if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT)) 511 goto err_free_skb; 512 if (tun_info->options_len < sizeof(*md)) 513 goto err_free_skb; 514 md = ip_tunnel_info_opts(tun_info); 515 516 /* ERSPAN has fixed 8 byte GRE header */ 517 version = md->version; 518 tunnel_hlen = 8 + erspan_hdr_len(version); 519 520 if (skb_cow_head(skb, dev->needed_headroom)) 521 goto err_free_skb; 522 523 if (gre_handle_offloads(skb, false)) 524 goto err_free_skb; 525 526 if (skb->len > dev->mtu + dev->hard_header_len) { 527 pskb_trim(skb, dev->mtu + dev->hard_header_len); 528 truncate = true; 529 } 530 531 nhoff = skb_network_header(skb) - skb_mac_header(skb); 532 if (skb->protocol == htons(ETH_P_IP) && 533 (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff)) 534 truncate = true; 535 536 thoff = skb_transport_header(skb) - skb_mac_header(skb); 537 if (skb->protocol == htons(ETH_P_IPV6) && 538 (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)) 539 truncate = true; 540 541 if (version == 1) { 542 erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)), 543 ntohl(md->u.index), truncate, true); 544 proto = htons(ETH_P_ERSPAN); 545 } else if (version == 2) { 546 erspan_build_header_v2(skb, 547 ntohl(tunnel_id_to_key32(key->tun_id)), 548 md->u.md2.dir, 549 get_hwid(&md->u.md2), 550 truncate, true); 551 proto = htons(ETH_P_ERSPAN2); 552 } else { 553 goto err_free_skb; 554 } 555 556 gre_build_header(skb, 8, TUNNEL_SEQ, 557 proto, 0, htonl(tunnel->o_seqno++)); 558 559 ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen); 560 561 return; 562 563 err_free_skb: 564 kfree_skb(skb); 565 dev->stats.tx_dropped++; 566 } 567 568 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) 569 { 570 struct ip_tunnel_info *info = skb_tunnel_info(skb); 571 const struct ip_tunnel_key *key; 572 struct rtable *rt; 573 struct flowi4 fl4; 574 575 if (ip_tunnel_info_af(info) != AF_INET) 576 return -EINVAL; 577 578 key = &info->key; 579 ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src, 580 tunnel_id_to_key32(key->tun_id), key->tos, 0, 581 skb->mark, skb_get_hash(skb)); 582 rt = ip_route_output_key(dev_net(dev), &fl4); 583 if (IS_ERR(rt)) 584 return PTR_ERR(rt); 585 586 ip_rt_put(rt); 587 info->key.u.ipv4.src = fl4.saddr; 588 return 0; 589 } 590 591 static netdev_tx_t ipgre_xmit(struct sk_buff *skb, 592 struct net_device *dev) 593 { 594 struct ip_tunnel *tunnel = netdev_priv(dev); 595 const struct iphdr *tnl_params; 596 597 if (!pskb_inet_may_pull(skb)) 598 goto free_skb; 599 600 if (tunnel->collect_md) { 601 gre_fb_xmit(skb, dev, skb->protocol); 602 return NETDEV_TX_OK; 603 } 604 605 if (dev->header_ops) { 606 /* Need space for new headers */ 607 if (skb_cow_head(skb, dev->needed_headroom - 608 (tunnel->hlen + sizeof(struct iphdr)))) 609 goto free_skb; 610 611 tnl_params = (const struct iphdr *)skb->data; 612 613 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing 614 * to gre header. 615 */ 616 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr)); 617 skb_reset_mac_header(skb); 618 } else { 619 if (skb_cow_head(skb, dev->needed_headroom)) 620 goto free_skb; 621 622 tnl_params = &tunnel->parms.iph; 623 } 624 625 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM))) 626 goto free_skb; 627 628 __gre_xmit(skb, dev, tnl_params, skb->protocol); 629 return NETDEV_TX_OK; 630 631 free_skb: 632 kfree_skb(skb); 633 dev->stats.tx_dropped++; 634 return NETDEV_TX_OK; 635 } 636 637 static netdev_tx_t erspan_xmit(struct sk_buff *skb, 638 struct net_device *dev) 639 { 640 struct ip_tunnel *tunnel = netdev_priv(dev); 641 bool truncate = false; 642 __be16 proto; 643 644 if (!pskb_inet_may_pull(skb)) 645 goto free_skb; 646 647 if (tunnel->collect_md) { 648 erspan_fb_xmit(skb, dev); 649 return NETDEV_TX_OK; 650 } 651 652 if (gre_handle_offloads(skb, false)) 653 goto free_skb; 654 655 if (skb_cow_head(skb, dev->needed_headroom)) 656 goto free_skb; 657 658 if (skb->len > dev->mtu + dev->hard_header_len) { 659 pskb_trim(skb, dev->mtu + dev->hard_header_len); 660 truncate = true; 661 } 662 663 /* Push ERSPAN header */ 664 if (tunnel->erspan_ver == 1) { 665 erspan_build_header(skb, ntohl(tunnel->parms.o_key), 666 tunnel->index, 667 truncate, true); 668 proto = htons(ETH_P_ERSPAN); 669 } else if (tunnel->erspan_ver == 2) { 670 erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key), 671 tunnel->dir, tunnel->hwid, 672 truncate, true); 673 proto = htons(ETH_P_ERSPAN2); 674 } else { 675 goto free_skb; 676 } 677 678 tunnel->parms.o_flags &= ~TUNNEL_KEY; 679 __gre_xmit(skb, dev, &tunnel->parms.iph, proto); 680 return NETDEV_TX_OK; 681 682 free_skb: 683 kfree_skb(skb); 684 dev->stats.tx_dropped++; 685 return NETDEV_TX_OK; 686 } 687 688 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, 689 struct net_device *dev) 690 { 691 struct ip_tunnel *tunnel = netdev_priv(dev); 692 693 if (!pskb_inet_may_pull(skb)) 694 goto free_skb; 695 696 if (tunnel->collect_md) { 697 gre_fb_xmit(skb, dev, htons(ETH_P_TEB)); 698 return NETDEV_TX_OK; 699 } 700 701 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM))) 702 goto free_skb; 703 704 if (skb_cow_head(skb, dev->needed_headroom)) 705 goto free_skb; 706 707 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB)); 708 return NETDEV_TX_OK; 709 710 free_skb: 711 kfree_skb(skb); 712 dev->stats.tx_dropped++; 713 return NETDEV_TX_OK; 714 } 715 716 static void ipgre_link_update(struct net_device *dev, bool set_mtu) 717 { 718 struct ip_tunnel *tunnel = netdev_priv(dev); 719 int len; 720 721 len = tunnel->tun_hlen; 722 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); 723 len = tunnel->tun_hlen - len; 724 tunnel->hlen = tunnel->hlen + len; 725 726 dev->needed_headroom = dev->needed_headroom + len; 727 if (set_mtu) 728 dev->mtu = max_t(int, dev->mtu - len, 68); 729 730 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) { 731 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) || 732 tunnel->encap.type == TUNNEL_ENCAP_NONE) { 733 dev->features |= NETIF_F_GSO_SOFTWARE; 734 dev->hw_features |= NETIF_F_GSO_SOFTWARE; 735 } else { 736 dev->features &= ~NETIF_F_GSO_SOFTWARE; 737 dev->hw_features &= ~NETIF_F_GSO_SOFTWARE; 738 } 739 dev->features |= NETIF_F_LLTX; 740 } else { 741 dev->hw_features &= ~NETIF_F_GSO_SOFTWARE; 742 dev->features &= ~(NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE); 743 } 744 } 745 746 static int ipgre_tunnel_ioctl(struct net_device *dev, 747 struct ifreq *ifr, int cmd) 748 { 749 struct ip_tunnel_parm p; 750 int err; 751 752 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 753 return -EFAULT; 754 755 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { 756 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || 757 p.iph.ihl != 5 || (p.iph.frag_off & htons(~IP_DF)) || 758 ((p.i_flags | p.o_flags) & (GRE_VERSION | GRE_ROUTING))) 759 return -EINVAL; 760 } 761 762 p.i_flags = gre_flags_to_tnl_flags(p.i_flags); 763 p.o_flags = gre_flags_to_tnl_flags(p.o_flags); 764 765 err = ip_tunnel_ioctl(dev, &p, cmd); 766 if (err) 767 return err; 768 769 if (cmd == SIOCCHGTUNNEL) { 770 struct ip_tunnel *t = netdev_priv(dev); 771 772 t->parms.i_flags = p.i_flags; 773 t->parms.o_flags = p.o_flags; 774 775 if (strcmp(dev->rtnl_link_ops->kind, "erspan")) 776 ipgre_link_update(dev, true); 777 } 778 779 p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags); 780 p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags); 781 782 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) 783 return -EFAULT; 784 785 return 0; 786 } 787 788 /* Nice toy. Unfortunately, useless in real life :-) 789 It allows to construct virtual multiprotocol broadcast "LAN" 790 over the Internet, provided multicast routing is tuned. 791 792 793 I have no idea was this bicycle invented before me, 794 so that I had to set ARPHRD_IPGRE to a random value. 795 I have an impression, that Cisco could make something similar, 796 but this feature is apparently missing in IOS<=11.2(8). 797 798 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks 799 with broadcast 224.66.66.66. If you have access to mbone, play with me :-) 800 801 ping -t 255 224.66.66.66 802 803 If nobody answers, mbone does not work. 804 805 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255 806 ip addr add 10.66.66.<somewhat>/24 dev Universe 807 ifconfig Universe up 808 ifconfig Universe add fe80::<Your_real_addr>/10 809 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96 810 ftp 10.66.66.66 811 ... 812 ftp fec0:6666:6666::193.233.7.65 813 ... 814 */ 815 static int ipgre_header(struct sk_buff *skb, struct net_device *dev, 816 unsigned short type, 817 const void *daddr, const void *saddr, unsigned int len) 818 { 819 struct ip_tunnel *t = netdev_priv(dev); 820 struct iphdr *iph; 821 struct gre_base_hdr *greh; 822 823 iph = skb_push(skb, t->hlen + sizeof(*iph)); 824 greh = (struct gre_base_hdr *)(iph+1); 825 greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags); 826 greh->protocol = htons(type); 827 828 memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); 829 830 /* Set the source hardware address. */ 831 if (saddr) 832 memcpy(&iph->saddr, saddr, 4); 833 if (daddr) 834 memcpy(&iph->daddr, daddr, 4); 835 if (iph->daddr) 836 return t->hlen + sizeof(*iph); 837 838 return -(t->hlen + sizeof(*iph)); 839 } 840 841 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) 842 { 843 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb); 844 memcpy(haddr, &iph->saddr, 4); 845 return 4; 846 } 847 848 static const struct header_ops ipgre_header_ops = { 849 .create = ipgre_header, 850 .parse = ipgre_header_parse, 851 }; 852 853 #ifdef CONFIG_NET_IPGRE_BROADCAST 854 static int ipgre_open(struct net_device *dev) 855 { 856 struct ip_tunnel *t = netdev_priv(dev); 857 858 if (ipv4_is_multicast(t->parms.iph.daddr)) { 859 struct flowi4 fl4; 860 struct rtable *rt; 861 862 rt = ip_route_output_gre(t->net, &fl4, 863 t->parms.iph.daddr, 864 t->parms.iph.saddr, 865 t->parms.o_key, 866 RT_TOS(t->parms.iph.tos), 867 t->parms.link); 868 if (IS_ERR(rt)) 869 return -EADDRNOTAVAIL; 870 dev = rt->dst.dev; 871 ip_rt_put(rt); 872 if (!__in_dev_get_rtnl(dev)) 873 return -EADDRNOTAVAIL; 874 t->mlink = dev->ifindex; 875 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr); 876 } 877 return 0; 878 } 879 880 static int ipgre_close(struct net_device *dev) 881 { 882 struct ip_tunnel *t = netdev_priv(dev); 883 884 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { 885 struct in_device *in_dev; 886 in_dev = inetdev_by_index(t->net, t->mlink); 887 if (in_dev) 888 ip_mc_dec_group(in_dev, t->parms.iph.daddr); 889 } 890 return 0; 891 } 892 #endif 893 894 static const struct net_device_ops ipgre_netdev_ops = { 895 .ndo_init = ipgre_tunnel_init, 896 .ndo_uninit = ip_tunnel_uninit, 897 #ifdef CONFIG_NET_IPGRE_BROADCAST 898 .ndo_open = ipgre_open, 899 .ndo_stop = ipgre_close, 900 #endif 901 .ndo_start_xmit = ipgre_xmit, 902 .ndo_do_ioctl = ipgre_tunnel_ioctl, 903 .ndo_change_mtu = ip_tunnel_change_mtu, 904 .ndo_get_stats64 = ip_tunnel_get_stats64, 905 .ndo_get_iflink = ip_tunnel_get_iflink, 906 }; 907 908 #define GRE_FEATURES (NETIF_F_SG | \ 909 NETIF_F_FRAGLIST | \ 910 NETIF_F_HIGHDMA | \ 911 NETIF_F_HW_CSUM) 912 913 static void ipgre_tunnel_setup(struct net_device *dev) 914 { 915 dev->netdev_ops = &ipgre_netdev_ops; 916 dev->type = ARPHRD_IPGRE; 917 ip_tunnel_setup(dev, ipgre_net_id); 918 } 919 920 static void __gre_tunnel_init(struct net_device *dev) 921 { 922 struct ip_tunnel *tunnel; 923 924 tunnel = netdev_priv(dev); 925 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); 926 tunnel->parms.iph.protocol = IPPROTO_GRE; 927 928 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen; 929 930 dev->features |= GRE_FEATURES; 931 dev->hw_features |= GRE_FEATURES; 932 933 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) { 934 /* TCP offload with GRE SEQ is not supported, nor 935 * can we support 2 levels of outer headers requiring 936 * an update. 937 */ 938 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) || 939 (tunnel->encap.type == TUNNEL_ENCAP_NONE)) { 940 dev->features |= NETIF_F_GSO_SOFTWARE; 941 dev->hw_features |= NETIF_F_GSO_SOFTWARE; 942 } 943 944 /* Can use a lockless transmit, unless we generate 945 * output sequences 946 */ 947 dev->features |= NETIF_F_LLTX; 948 } 949 } 950 951 static int ipgre_tunnel_init(struct net_device *dev) 952 { 953 struct ip_tunnel *tunnel = netdev_priv(dev); 954 struct iphdr *iph = &tunnel->parms.iph; 955 956 __gre_tunnel_init(dev); 957 958 memcpy(dev->dev_addr, &iph->saddr, 4); 959 memcpy(dev->broadcast, &iph->daddr, 4); 960 961 dev->flags = IFF_NOARP; 962 netif_keep_dst(dev); 963 dev->addr_len = 4; 964 965 if (iph->daddr && !tunnel->collect_md) { 966 #ifdef CONFIG_NET_IPGRE_BROADCAST 967 if (ipv4_is_multicast(iph->daddr)) { 968 if (!iph->saddr) 969 return -EINVAL; 970 dev->flags = IFF_BROADCAST; 971 dev->header_ops = &ipgre_header_ops; 972 } 973 #endif 974 } else if (!tunnel->collect_md) { 975 dev->header_ops = &ipgre_header_ops; 976 } 977 978 return ip_tunnel_init(dev); 979 } 980 981 static const struct gre_protocol ipgre_protocol = { 982 .handler = gre_rcv, 983 .err_handler = gre_err, 984 }; 985 986 static int __net_init ipgre_init_net(struct net *net) 987 { 988 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL); 989 } 990 991 static void __net_exit ipgre_exit_batch_net(struct list_head *list_net) 992 { 993 ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops); 994 } 995 996 static struct pernet_operations ipgre_net_ops = { 997 .init = ipgre_init_net, 998 .exit_batch = ipgre_exit_batch_net, 999 .id = &ipgre_net_id, 1000 .size = sizeof(struct ip_tunnel_net), 1001 }; 1002 1003 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[], 1004 struct netlink_ext_ack *extack) 1005 { 1006 __be16 flags; 1007 1008 if (!data) 1009 return 0; 1010 1011 flags = 0; 1012 if (data[IFLA_GRE_IFLAGS]) 1013 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); 1014 if (data[IFLA_GRE_OFLAGS]) 1015 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); 1016 if (flags & (GRE_VERSION|GRE_ROUTING)) 1017 return -EINVAL; 1018 1019 if (data[IFLA_GRE_COLLECT_METADATA] && 1020 data[IFLA_GRE_ENCAP_TYPE] && 1021 nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE) 1022 return -EINVAL; 1023 1024 return 0; 1025 } 1026 1027 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[], 1028 struct netlink_ext_ack *extack) 1029 { 1030 __be32 daddr; 1031 1032 if (tb[IFLA_ADDRESS]) { 1033 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) 1034 return -EINVAL; 1035 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) 1036 return -EADDRNOTAVAIL; 1037 } 1038 1039 if (!data) 1040 goto out; 1041 1042 if (data[IFLA_GRE_REMOTE]) { 1043 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4); 1044 if (!daddr) 1045 return -EINVAL; 1046 } 1047 1048 out: 1049 return ipgre_tunnel_validate(tb, data, extack); 1050 } 1051 1052 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[], 1053 struct netlink_ext_ack *extack) 1054 { 1055 __be16 flags = 0; 1056 int ret; 1057 1058 if (!data) 1059 return 0; 1060 1061 ret = ipgre_tap_validate(tb, data, extack); 1062 if (ret) 1063 return ret; 1064 1065 /* ERSPAN should only have GRE sequence and key flag */ 1066 if (data[IFLA_GRE_OFLAGS]) 1067 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); 1068 if (data[IFLA_GRE_IFLAGS]) 1069 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); 1070 if (!data[IFLA_GRE_COLLECT_METADATA] && 1071 flags != (GRE_SEQ | GRE_KEY)) 1072 return -EINVAL; 1073 1074 /* ERSPAN Session ID only has 10-bit. Since we reuse 1075 * 32-bit key field as ID, check it's range. 1076 */ 1077 if (data[IFLA_GRE_IKEY] && 1078 (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK)) 1079 return -EINVAL; 1080 1081 if (data[IFLA_GRE_OKEY] && 1082 (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK)) 1083 return -EINVAL; 1084 1085 return 0; 1086 } 1087 1088 static int ipgre_netlink_parms(struct net_device *dev, 1089 struct nlattr *data[], 1090 struct nlattr *tb[], 1091 struct ip_tunnel_parm *parms, 1092 __u32 *fwmark) 1093 { 1094 struct ip_tunnel *t = netdev_priv(dev); 1095 1096 memset(parms, 0, sizeof(*parms)); 1097 1098 parms->iph.protocol = IPPROTO_GRE; 1099 1100 if (!data) 1101 return 0; 1102 1103 if (data[IFLA_GRE_LINK]) 1104 parms->link = nla_get_u32(data[IFLA_GRE_LINK]); 1105 1106 if (data[IFLA_GRE_IFLAGS]) 1107 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS])); 1108 1109 if (data[IFLA_GRE_OFLAGS]) 1110 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS])); 1111 1112 if (data[IFLA_GRE_IKEY]) 1113 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); 1114 1115 if (data[IFLA_GRE_OKEY]) 1116 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]); 1117 1118 if (data[IFLA_GRE_LOCAL]) 1119 parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]); 1120 1121 if (data[IFLA_GRE_REMOTE]) 1122 parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]); 1123 1124 if (data[IFLA_GRE_TTL]) 1125 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]); 1126 1127 if (data[IFLA_GRE_TOS]) 1128 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]); 1129 1130 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) { 1131 if (t->ignore_df) 1132 return -EINVAL; 1133 parms->iph.frag_off = htons(IP_DF); 1134 } 1135 1136 if (data[IFLA_GRE_COLLECT_METADATA]) { 1137 t->collect_md = true; 1138 if (dev->type == ARPHRD_IPGRE) 1139 dev->type = ARPHRD_NONE; 1140 } 1141 1142 if (data[IFLA_GRE_IGNORE_DF]) { 1143 if (nla_get_u8(data[IFLA_GRE_IGNORE_DF]) 1144 && (parms->iph.frag_off & htons(IP_DF))) 1145 return -EINVAL; 1146 t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]); 1147 } 1148 1149 if (data[IFLA_GRE_FWMARK]) 1150 *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]); 1151 1152 if (data[IFLA_GRE_ERSPAN_VER]) { 1153 t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]); 1154 1155 if (t->erspan_ver != 1 && t->erspan_ver != 2) 1156 return -EINVAL; 1157 } 1158 1159 if (t->erspan_ver == 1) { 1160 if (data[IFLA_GRE_ERSPAN_INDEX]) { 1161 t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]); 1162 if (t->index & ~INDEX_MASK) 1163 return -EINVAL; 1164 } 1165 } else if (t->erspan_ver == 2) { 1166 if (data[IFLA_GRE_ERSPAN_DIR]) { 1167 t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]); 1168 if (t->dir & ~(DIR_MASK >> DIR_OFFSET)) 1169 return -EINVAL; 1170 } 1171 if (data[IFLA_GRE_ERSPAN_HWID]) { 1172 t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]); 1173 if (t->hwid & ~(HWID_MASK >> HWID_OFFSET)) 1174 return -EINVAL; 1175 } 1176 } 1177 1178 return 0; 1179 } 1180 1181 /* This function returns true when ENCAP attributes are present in the nl msg */ 1182 static bool ipgre_netlink_encap_parms(struct nlattr *data[], 1183 struct ip_tunnel_encap *ipencap) 1184 { 1185 bool ret = false; 1186 1187 memset(ipencap, 0, sizeof(*ipencap)); 1188 1189 if (!data) 1190 return ret; 1191 1192 if (data[IFLA_GRE_ENCAP_TYPE]) { 1193 ret = true; 1194 ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]); 1195 } 1196 1197 if (data[IFLA_GRE_ENCAP_FLAGS]) { 1198 ret = true; 1199 ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]); 1200 } 1201 1202 if (data[IFLA_GRE_ENCAP_SPORT]) { 1203 ret = true; 1204 ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]); 1205 } 1206 1207 if (data[IFLA_GRE_ENCAP_DPORT]) { 1208 ret = true; 1209 ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]); 1210 } 1211 1212 return ret; 1213 } 1214 1215 static int gre_tap_init(struct net_device *dev) 1216 { 1217 __gre_tunnel_init(dev); 1218 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1219 netif_keep_dst(dev); 1220 1221 return ip_tunnel_init(dev); 1222 } 1223 1224 static const struct net_device_ops gre_tap_netdev_ops = { 1225 .ndo_init = gre_tap_init, 1226 .ndo_uninit = ip_tunnel_uninit, 1227 .ndo_start_xmit = gre_tap_xmit, 1228 .ndo_set_mac_address = eth_mac_addr, 1229 .ndo_validate_addr = eth_validate_addr, 1230 .ndo_change_mtu = ip_tunnel_change_mtu, 1231 .ndo_get_stats64 = ip_tunnel_get_stats64, 1232 .ndo_get_iflink = ip_tunnel_get_iflink, 1233 .ndo_fill_metadata_dst = gre_fill_metadata_dst, 1234 }; 1235 1236 static int erspan_tunnel_init(struct net_device *dev) 1237 { 1238 struct ip_tunnel *tunnel = netdev_priv(dev); 1239 1240 tunnel->tun_hlen = 8; 1241 tunnel->parms.iph.protocol = IPPROTO_GRE; 1242 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen + 1243 erspan_hdr_len(tunnel->erspan_ver); 1244 1245 dev->features |= GRE_FEATURES; 1246 dev->hw_features |= GRE_FEATURES; 1247 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1248 netif_keep_dst(dev); 1249 1250 return ip_tunnel_init(dev); 1251 } 1252 1253 static const struct net_device_ops erspan_netdev_ops = { 1254 .ndo_init = erspan_tunnel_init, 1255 .ndo_uninit = ip_tunnel_uninit, 1256 .ndo_start_xmit = erspan_xmit, 1257 .ndo_set_mac_address = eth_mac_addr, 1258 .ndo_validate_addr = eth_validate_addr, 1259 .ndo_change_mtu = ip_tunnel_change_mtu, 1260 .ndo_get_stats64 = ip_tunnel_get_stats64, 1261 .ndo_get_iflink = ip_tunnel_get_iflink, 1262 .ndo_fill_metadata_dst = gre_fill_metadata_dst, 1263 }; 1264 1265 static void ipgre_tap_setup(struct net_device *dev) 1266 { 1267 ether_setup(dev); 1268 dev->max_mtu = 0; 1269 dev->netdev_ops = &gre_tap_netdev_ops; 1270 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1271 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1272 ip_tunnel_setup(dev, gre_tap_net_id); 1273 } 1274 1275 static int ipgre_newlink(struct net *src_net, struct net_device *dev, 1276 struct nlattr *tb[], struct nlattr *data[], 1277 struct netlink_ext_ack *extack) 1278 { 1279 struct ip_tunnel_parm p; 1280 struct ip_tunnel_encap ipencap; 1281 __u32 fwmark = 0; 1282 int err; 1283 1284 if (ipgre_netlink_encap_parms(data, &ipencap)) { 1285 struct ip_tunnel *t = netdev_priv(dev); 1286 err = ip_tunnel_encap_setup(t, &ipencap); 1287 1288 if (err < 0) 1289 return err; 1290 } 1291 1292 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark); 1293 if (err < 0) 1294 return err; 1295 return ip_tunnel_newlink(dev, tb, &p, fwmark); 1296 } 1297 1298 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], 1299 struct nlattr *data[], 1300 struct netlink_ext_ack *extack) 1301 { 1302 struct ip_tunnel *t = netdev_priv(dev); 1303 struct ip_tunnel_encap ipencap; 1304 __u32 fwmark = t->fwmark; 1305 struct ip_tunnel_parm p; 1306 int err; 1307 1308 if (ipgre_netlink_encap_parms(data, &ipencap)) { 1309 err = ip_tunnel_encap_setup(t, &ipencap); 1310 1311 if (err < 0) 1312 return err; 1313 } 1314 1315 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark); 1316 if (err < 0) 1317 return err; 1318 1319 err = ip_tunnel_changelink(dev, tb, &p, fwmark); 1320 if (err < 0) 1321 return err; 1322 1323 t->parms.i_flags = p.i_flags; 1324 t->parms.o_flags = p.o_flags; 1325 1326 if (strcmp(dev->rtnl_link_ops->kind, "erspan")) 1327 ipgre_link_update(dev, !tb[IFLA_MTU]); 1328 1329 return 0; 1330 } 1331 1332 static size_t ipgre_get_size(const struct net_device *dev) 1333 { 1334 return 1335 /* IFLA_GRE_LINK */ 1336 nla_total_size(4) + 1337 /* IFLA_GRE_IFLAGS */ 1338 nla_total_size(2) + 1339 /* IFLA_GRE_OFLAGS */ 1340 nla_total_size(2) + 1341 /* IFLA_GRE_IKEY */ 1342 nla_total_size(4) + 1343 /* IFLA_GRE_OKEY */ 1344 nla_total_size(4) + 1345 /* IFLA_GRE_LOCAL */ 1346 nla_total_size(4) + 1347 /* IFLA_GRE_REMOTE */ 1348 nla_total_size(4) + 1349 /* IFLA_GRE_TTL */ 1350 nla_total_size(1) + 1351 /* IFLA_GRE_TOS */ 1352 nla_total_size(1) + 1353 /* IFLA_GRE_PMTUDISC */ 1354 nla_total_size(1) + 1355 /* IFLA_GRE_ENCAP_TYPE */ 1356 nla_total_size(2) + 1357 /* IFLA_GRE_ENCAP_FLAGS */ 1358 nla_total_size(2) + 1359 /* IFLA_GRE_ENCAP_SPORT */ 1360 nla_total_size(2) + 1361 /* IFLA_GRE_ENCAP_DPORT */ 1362 nla_total_size(2) + 1363 /* IFLA_GRE_COLLECT_METADATA */ 1364 nla_total_size(0) + 1365 /* IFLA_GRE_IGNORE_DF */ 1366 nla_total_size(1) + 1367 /* IFLA_GRE_FWMARK */ 1368 nla_total_size(4) + 1369 /* IFLA_GRE_ERSPAN_INDEX */ 1370 nla_total_size(4) + 1371 /* IFLA_GRE_ERSPAN_VER */ 1372 nla_total_size(1) + 1373 /* IFLA_GRE_ERSPAN_DIR */ 1374 nla_total_size(1) + 1375 /* IFLA_GRE_ERSPAN_HWID */ 1376 nla_total_size(2) + 1377 0; 1378 } 1379 1380 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) 1381 { 1382 struct ip_tunnel *t = netdev_priv(dev); 1383 struct ip_tunnel_parm *p = &t->parms; 1384 __be16 o_flags = p->o_flags; 1385 1386 if (t->erspan_ver == 1 || t->erspan_ver == 2) { 1387 if (!t->collect_md) 1388 o_flags |= TUNNEL_KEY; 1389 1390 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver)) 1391 goto nla_put_failure; 1392 1393 if (t->erspan_ver == 1) { 1394 if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index)) 1395 goto nla_put_failure; 1396 } else { 1397 if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir)) 1398 goto nla_put_failure; 1399 if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid)) 1400 goto nla_put_failure; 1401 } 1402 } 1403 1404 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || 1405 nla_put_be16(skb, IFLA_GRE_IFLAGS, 1406 gre_tnl_flags_to_gre_flags(p->i_flags)) || 1407 nla_put_be16(skb, IFLA_GRE_OFLAGS, 1408 gre_tnl_flags_to_gre_flags(o_flags)) || 1409 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || 1410 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || 1411 nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) || 1412 nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) || 1413 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) || 1414 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) || 1415 nla_put_u8(skb, IFLA_GRE_PMTUDISC, 1416 !!(p->iph.frag_off & htons(IP_DF))) || 1417 nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark)) 1418 goto nla_put_failure; 1419 1420 if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE, 1421 t->encap.type) || 1422 nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT, 1423 t->encap.sport) || 1424 nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT, 1425 t->encap.dport) || 1426 nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS, 1427 t->encap.flags)) 1428 goto nla_put_failure; 1429 1430 if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df)) 1431 goto nla_put_failure; 1432 1433 if (t->collect_md) { 1434 if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA)) 1435 goto nla_put_failure; 1436 } 1437 1438 return 0; 1439 1440 nla_put_failure: 1441 return -EMSGSIZE; 1442 } 1443 1444 static void erspan_setup(struct net_device *dev) 1445 { 1446 struct ip_tunnel *t = netdev_priv(dev); 1447 1448 ether_setup(dev); 1449 dev->max_mtu = 0; 1450 dev->netdev_ops = &erspan_netdev_ops; 1451 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1452 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1453 ip_tunnel_setup(dev, erspan_net_id); 1454 t->erspan_ver = 1; 1455 } 1456 1457 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = { 1458 [IFLA_GRE_LINK] = { .type = NLA_U32 }, 1459 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 }, 1460 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 }, 1461 [IFLA_GRE_IKEY] = { .type = NLA_U32 }, 1462 [IFLA_GRE_OKEY] = { .type = NLA_U32 }, 1463 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) }, 1464 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, 1465 [IFLA_GRE_TTL] = { .type = NLA_U8 }, 1466 [IFLA_GRE_TOS] = { .type = NLA_U8 }, 1467 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 }, 1468 [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 }, 1469 [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 }, 1470 [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 }, 1471 [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 }, 1472 [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG }, 1473 [IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 }, 1474 [IFLA_GRE_FWMARK] = { .type = NLA_U32 }, 1475 [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 }, 1476 [IFLA_GRE_ERSPAN_VER] = { .type = NLA_U8 }, 1477 [IFLA_GRE_ERSPAN_DIR] = { .type = NLA_U8 }, 1478 [IFLA_GRE_ERSPAN_HWID] = { .type = NLA_U16 }, 1479 }; 1480 1481 static struct rtnl_link_ops ipgre_link_ops __read_mostly = { 1482 .kind = "gre", 1483 .maxtype = IFLA_GRE_MAX, 1484 .policy = ipgre_policy, 1485 .priv_size = sizeof(struct ip_tunnel), 1486 .setup = ipgre_tunnel_setup, 1487 .validate = ipgre_tunnel_validate, 1488 .newlink = ipgre_newlink, 1489 .changelink = ipgre_changelink, 1490 .dellink = ip_tunnel_dellink, 1491 .get_size = ipgre_get_size, 1492 .fill_info = ipgre_fill_info, 1493 .get_link_net = ip_tunnel_get_link_net, 1494 }; 1495 1496 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = { 1497 .kind = "gretap", 1498 .maxtype = IFLA_GRE_MAX, 1499 .policy = ipgre_policy, 1500 .priv_size = sizeof(struct ip_tunnel), 1501 .setup = ipgre_tap_setup, 1502 .validate = ipgre_tap_validate, 1503 .newlink = ipgre_newlink, 1504 .changelink = ipgre_changelink, 1505 .dellink = ip_tunnel_dellink, 1506 .get_size = ipgre_get_size, 1507 .fill_info = ipgre_fill_info, 1508 .get_link_net = ip_tunnel_get_link_net, 1509 }; 1510 1511 static struct rtnl_link_ops erspan_link_ops __read_mostly = { 1512 .kind = "erspan", 1513 .maxtype = IFLA_GRE_MAX, 1514 .policy = ipgre_policy, 1515 .priv_size = sizeof(struct ip_tunnel), 1516 .setup = erspan_setup, 1517 .validate = erspan_validate, 1518 .newlink = ipgre_newlink, 1519 .changelink = ipgre_changelink, 1520 .dellink = ip_tunnel_dellink, 1521 .get_size = ipgre_get_size, 1522 .fill_info = ipgre_fill_info, 1523 .get_link_net = ip_tunnel_get_link_net, 1524 }; 1525 1526 struct net_device *gretap_fb_dev_create(struct net *net, const char *name, 1527 u8 name_assign_type) 1528 { 1529 struct nlattr *tb[IFLA_MAX + 1]; 1530 struct net_device *dev; 1531 LIST_HEAD(list_kill); 1532 struct ip_tunnel *t; 1533 int err; 1534 1535 memset(&tb, 0, sizeof(tb)); 1536 1537 dev = rtnl_create_link(net, name, name_assign_type, 1538 &ipgre_tap_ops, tb, NULL); 1539 if (IS_ERR(dev)) 1540 return dev; 1541 1542 /* Configure flow based GRE device. */ 1543 t = netdev_priv(dev); 1544 t->collect_md = true; 1545 1546 err = ipgre_newlink(net, dev, tb, NULL, NULL); 1547 if (err < 0) { 1548 free_netdev(dev); 1549 return ERR_PTR(err); 1550 } 1551 1552 /* openvswitch users expect packet sizes to be unrestricted, 1553 * so set the largest MTU we can. 1554 */ 1555 err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false); 1556 if (err) 1557 goto out; 1558 1559 err = rtnl_configure_link(dev, NULL); 1560 if (err < 0) 1561 goto out; 1562 1563 return dev; 1564 out: 1565 ip_tunnel_dellink(dev, &list_kill); 1566 unregister_netdevice_many(&list_kill); 1567 return ERR_PTR(err); 1568 } 1569 EXPORT_SYMBOL_GPL(gretap_fb_dev_create); 1570 1571 static int __net_init ipgre_tap_init_net(struct net *net) 1572 { 1573 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0"); 1574 } 1575 1576 static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net) 1577 { 1578 ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops); 1579 } 1580 1581 static struct pernet_operations ipgre_tap_net_ops = { 1582 .init = ipgre_tap_init_net, 1583 .exit_batch = ipgre_tap_exit_batch_net, 1584 .id = &gre_tap_net_id, 1585 .size = sizeof(struct ip_tunnel_net), 1586 }; 1587 1588 static int __net_init erspan_init_net(struct net *net) 1589 { 1590 return ip_tunnel_init_net(net, erspan_net_id, 1591 &erspan_link_ops, "erspan0"); 1592 } 1593 1594 static void __net_exit erspan_exit_batch_net(struct list_head *net_list) 1595 { 1596 ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops); 1597 } 1598 1599 static struct pernet_operations erspan_net_ops = { 1600 .init = erspan_init_net, 1601 .exit_batch = erspan_exit_batch_net, 1602 .id = &erspan_net_id, 1603 .size = sizeof(struct ip_tunnel_net), 1604 }; 1605 1606 static int __init ipgre_init(void) 1607 { 1608 int err; 1609 1610 pr_info("GRE over IPv4 tunneling driver\n"); 1611 1612 err = register_pernet_device(&ipgre_net_ops); 1613 if (err < 0) 1614 return err; 1615 1616 err = register_pernet_device(&ipgre_tap_net_ops); 1617 if (err < 0) 1618 goto pnet_tap_failed; 1619 1620 err = register_pernet_device(&erspan_net_ops); 1621 if (err < 0) 1622 goto pnet_erspan_failed; 1623 1624 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO); 1625 if (err < 0) { 1626 pr_info("%s: can't add protocol\n", __func__); 1627 goto add_proto_failed; 1628 } 1629 1630 err = rtnl_link_register(&ipgre_link_ops); 1631 if (err < 0) 1632 goto rtnl_link_failed; 1633 1634 err = rtnl_link_register(&ipgre_tap_ops); 1635 if (err < 0) 1636 goto tap_ops_failed; 1637 1638 err = rtnl_link_register(&erspan_link_ops); 1639 if (err < 0) 1640 goto erspan_link_failed; 1641 1642 return 0; 1643 1644 erspan_link_failed: 1645 rtnl_link_unregister(&ipgre_tap_ops); 1646 tap_ops_failed: 1647 rtnl_link_unregister(&ipgre_link_ops); 1648 rtnl_link_failed: 1649 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); 1650 add_proto_failed: 1651 unregister_pernet_device(&erspan_net_ops); 1652 pnet_erspan_failed: 1653 unregister_pernet_device(&ipgre_tap_net_ops); 1654 pnet_tap_failed: 1655 unregister_pernet_device(&ipgre_net_ops); 1656 return err; 1657 } 1658 1659 static void __exit ipgre_fini(void) 1660 { 1661 rtnl_link_unregister(&ipgre_tap_ops); 1662 rtnl_link_unregister(&ipgre_link_ops); 1663 rtnl_link_unregister(&erspan_link_ops); 1664 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); 1665 unregister_pernet_device(&ipgre_tap_net_ops); 1666 unregister_pernet_device(&ipgre_net_ops); 1667 unregister_pernet_device(&erspan_net_ops); 1668 } 1669 1670 module_init(ipgre_init); 1671 module_exit(ipgre_fini); 1672 MODULE_LICENSE("GPL"); 1673 MODULE_ALIAS_RTNL_LINK("gre"); 1674 MODULE_ALIAS_RTNL_LINK("gretap"); 1675 MODULE_ALIAS_RTNL_LINK("erspan"); 1676 MODULE_ALIAS_NETDEV("gre0"); 1677 MODULE_ALIAS_NETDEV("gretap0"); 1678 MODULE_ALIAS_NETDEV("erspan0"); 1679