1 /* 2 * Linux NET3: GRE over IP protocol decoder. 3 * 4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru) 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 * 11 */ 12 13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 14 15 #include <linux/capability.h> 16 #include <linux/module.h> 17 #include <linux/types.h> 18 #include <linux/kernel.h> 19 #include <linux/slab.h> 20 #include <linux/uaccess.h> 21 #include <linux/skbuff.h> 22 #include <linux/netdevice.h> 23 #include <linux/in.h> 24 #include <linux/tcp.h> 25 #include <linux/udp.h> 26 #include <linux/if_arp.h> 27 #include <linux/if_vlan.h> 28 #include <linux/init.h> 29 #include <linux/in6.h> 30 #include <linux/inetdevice.h> 31 #include <linux/igmp.h> 32 #include <linux/netfilter_ipv4.h> 33 #include <linux/etherdevice.h> 34 #include <linux/if_ether.h> 35 36 #include <net/sock.h> 37 #include <net/ip.h> 38 #include <net/icmp.h> 39 #include <net/protocol.h> 40 #include <net/ip_tunnels.h> 41 #include <net/arp.h> 42 #include <net/checksum.h> 43 #include <net/dsfield.h> 44 #include <net/inet_ecn.h> 45 #include <net/xfrm.h> 46 #include <net/net_namespace.h> 47 #include <net/netns/generic.h> 48 #include <net/rtnetlink.h> 49 #include <net/gre.h> 50 #include <net/dst_metadata.h> 51 #include <net/erspan.h> 52 53 /* 54 Problems & solutions 55 -------------------- 56 57 1. The most important issue is detecting local dead loops. 58 They would cause complete host lockup in transmit, which 59 would be "resolved" by stack overflow or, if queueing is enabled, 60 with infinite looping in net_bh. 61 62 We cannot track such dead loops during route installation, 63 it is infeasible task. The most general solutions would be 64 to keep skb->encapsulation counter (sort of local ttl), 65 and silently drop packet when it expires. It is a good 66 solution, but it supposes maintaining new variable in ALL 67 skb, even if no tunneling is used. 68 69 Current solution: xmit_recursion breaks dead loops. This is a percpu 70 counter, since when we enter the first ndo_xmit(), cpu migration is 71 forbidden. We force an exit if this counter reaches RECURSION_LIMIT 72 73 2. Networking dead loops would not kill routers, but would really 74 kill network. IP hop limit plays role of "t->recursion" in this case, 75 if we copy it from packet being encapsulated to upper header. 76 It is very good solution, but it introduces two problems: 77 78 - Routing protocols, using packets with ttl=1 (OSPF, RIP2), 79 do not work over tunnels. 80 - traceroute does not work. I planned to relay ICMP from tunnel, 81 so that this problem would be solved and traceroute output 82 would even more informative. This idea appeared to be wrong: 83 only Linux complies to rfc1812 now (yes, guys, Linux is the only 84 true router now :-)), all routers (at least, in neighbourhood of mine) 85 return only 8 bytes of payload. It is the end. 86 87 Hence, if we want that OSPF worked or traceroute said something reasonable, 88 we should search for another solution. 89 90 One of them is to parse packet trying to detect inner encapsulation 91 made by our node. It is difficult or even impossible, especially, 92 taking into account fragmentation. TO be short, ttl is not solution at all. 93 94 Current solution: The solution was UNEXPECTEDLY SIMPLE. 95 We force DF flag on tunnels with preconfigured hop limit, 96 that is ALL. :-) Well, it does not remove the problem completely, 97 but exponential growth of network traffic is changed to linear 98 (branches, that exceed pmtu are pruned) and tunnel mtu 99 rapidly degrades to value <68, where looping stops. 100 Yes, it is not good if there exists a router in the loop, 101 which does not force DF, even when encapsulating packets have DF set. 102 But it is not our problem! Nobody could accuse us, we made 103 all that we could make. Even if it is your gated who injected 104 fatal route to network, even if it were you who configured 105 fatal static route: you are innocent. :-) 106 107 Alexey Kuznetsov. 108 */ 109 110 static bool log_ecn_error = true; 111 module_param(log_ecn_error, bool, 0644); 112 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); 113 114 static struct rtnl_link_ops ipgre_link_ops __read_mostly; 115 static int ipgre_tunnel_init(struct net_device *dev); 116 static void erspan_build_header(struct sk_buff *skb, 117 __be32 id, u32 index, bool truncate); 118 119 static unsigned int ipgre_net_id __read_mostly; 120 static unsigned int gre_tap_net_id __read_mostly; 121 static unsigned int erspan_net_id __read_mostly; 122 123 static void ipgre_err(struct sk_buff *skb, u32 info, 124 const struct tnl_ptk_info *tpi) 125 { 126 127 /* All the routers (except for Linux) return only 128 8 bytes of packet payload. It means, that precise relaying of 129 ICMP in the real Internet is absolutely infeasible. 130 131 Moreover, Cisco "wise men" put GRE key to the third word 132 in GRE header. It makes impossible maintaining even soft 133 state for keyed GRE tunnels with enabled checksum. Tell 134 them "thank you". 135 136 Well, I wonder, rfc1812 was written by Cisco employee, 137 what the hell these idiots break standards established 138 by themselves??? 139 */ 140 struct net *net = dev_net(skb->dev); 141 struct ip_tunnel_net *itn; 142 const struct iphdr *iph; 143 const int type = icmp_hdr(skb)->type; 144 const int code = icmp_hdr(skb)->code; 145 unsigned int data_len = 0; 146 struct ip_tunnel *t; 147 148 switch (type) { 149 default: 150 case ICMP_PARAMETERPROB: 151 return; 152 153 case ICMP_DEST_UNREACH: 154 switch (code) { 155 case ICMP_SR_FAILED: 156 case ICMP_PORT_UNREACH: 157 /* Impossible event. */ 158 return; 159 default: 160 /* All others are translated to HOST_UNREACH. 161 rfc2003 contains "deep thoughts" about NET_UNREACH, 162 I believe they are just ether pollution. --ANK 163 */ 164 break; 165 } 166 break; 167 168 case ICMP_TIME_EXCEEDED: 169 if (code != ICMP_EXC_TTL) 170 return; 171 data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */ 172 break; 173 174 case ICMP_REDIRECT: 175 break; 176 } 177 178 if (tpi->proto == htons(ETH_P_TEB)) 179 itn = net_generic(net, gre_tap_net_id); 180 else 181 itn = net_generic(net, ipgre_net_id); 182 183 iph = (const struct iphdr *)(icmp_hdr(skb) + 1); 184 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, 185 iph->daddr, iph->saddr, tpi->key); 186 187 if (!t) 188 return; 189 190 #if IS_ENABLED(CONFIG_IPV6) 191 if (tpi->proto == htons(ETH_P_IPV6) && 192 !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len, 193 type, data_len)) 194 return; 195 #endif 196 197 if (t->parms.iph.daddr == 0 || 198 ipv4_is_multicast(t->parms.iph.daddr)) 199 return; 200 201 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) 202 return; 203 204 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) 205 t->err_count++; 206 else 207 t->err_count = 1; 208 t->err_time = jiffies; 209 } 210 211 static void gre_err(struct sk_buff *skb, u32 info) 212 { 213 /* All the routers (except for Linux) return only 214 * 8 bytes of packet payload. It means, that precise relaying of 215 * ICMP in the real Internet is absolutely infeasible. 216 * 217 * Moreover, Cisco "wise men" put GRE key to the third word 218 * in GRE header. It makes impossible maintaining even soft 219 * state for keyed 220 * GRE tunnels with enabled checksum. Tell them "thank you". 221 * 222 * Well, I wonder, rfc1812 was written by Cisco employee, 223 * what the hell these idiots break standards established 224 * by themselves??? 225 */ 226 227 const struct iphdr *iph = (struct iphdr *)skb->data; 228 const int type = icmp_hdr(skb)->type; 229 const int code = icmp_hdr(skb)->code; 230 struct tnl_ptk_info tpi; 231 bool csum_err = false; 232 233 if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 234 iph->ihl * 4) < 0) { 235 if (!csum_err) /* ignore csum errors. */ 236 return; 237 } 238 239 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { 240 ipv4_update_pmtu(skb, dev_net(skb->dev), info, 241 skb->dev->ifindex, 0, IPPROTO_GRE, 0); 242 return; 243 } 244 if (type == ICMP_REDIRECT) { 245 ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0, 246 IPPROTO_GRE, 0); 247 return; 248 } 249 250 ipgre_err(skb, info, &tpi); 251 } 252 253 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, 254 int gre_hdr_len) 255 { 256 struct net *net = dev_net(skb->dev); 257 struct metadata_dst *tun_dst = NULL; 258 struct ip_tunnel_net *itn; 259 struct ip_tunnel *tunnel; 260 struct erspanhdr *ershdr; 261 const struct iphdr *iph; 262 __be32 session_id; 263 __be32 index; 264 int len; 265 266 itn = net_generic(net, erspan_net_id); 267 len = gre_hdr_len + sizeof(*ershdr); 268 269 if (unlikely(!pskb_may_pull(skb, len))) 270 return -ENOMEM; 271 272 iph = ip_hdr(skb); 273 ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len); 274 275 /* The original GRE header does not have key field, 276 * Use ERSPAN 10-bit session ID as key. 277 */ 278 session_id = cpu_to_be32(ntohs(ershdr->session_id)); 279 tpi->key = session_id; 280 index = ershdr->md.index; 281 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, 282 tpi->flags | TUNNEL_KEY, 283 iph->saddr, iph->daddr, tpi->key); 284 285 if (tunnel) { 286 if (__iptunnel_pull_header(skb, 287 gre_hdr_len + sizeof(*ershdr), 288 htons(ETH_P_TEB), 289 false, false) < 0) 290 goto drop; 291 292 if (tunnel->collect_md) { 293 struct ip_tunnel_info *info; 294 struct erspan_metadata *md; 295 __be64 tun_id; 296 __be16 flags; 297 298 tpi->flags |= TUNNEL_KEY; 299 flags = tpi->flags; 300 tun_id = key32_to_tunnel_id(tpi->key); 301 302 tun_dst = ip_tun_rx_dst(skb, flags, 303 tun_id, sizeof(*md)); 304 if (!tun_dst) 305 return PACKET_REJECT; 306 307 md = ip_tunnel_info_opts(&tun_dst->u.tun_info); 308 if (!md) 309 return PACKET_REJECT; 310 311 md->index = index; 312 info = &tun_dst->u.tun_info; 313 info->key.tun_flags |= TUNNEL_ERSPAN_OPT; 314 info->options_len = sizeof(*md); 315 } else { 316 tunnel->index = ntohl(index); 317 } 318 319 skb_reset_mac_header(skb); 320 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); 321 return PACKET_RCVD; 322 } 323 drop: 324 kfree_skb(skb); 325 return PACKET_RCVD; 326 } 327 328 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi, 329 struct ip_tunnel_net *itn, int hdr_len, bool raw_proto) 330 { 331 struct metadata_dst *tun_dst = NULL; 332 const struct iphdr *iph; 333 struct ip_tunnel *tunnel; 334 335 iph = ip_hdr(skb); 336 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, 337 iph->saddr, iph->daddr, tpi->key); 338 339 if (tunnel) { 340 if (__iptunnel_pull_header(skb, hdr_len, tpi->proto, 341 raw_proto, false) < 0) 342 goto drop; 343 344 if (tunnel->dev->type != ARPHRD_NONE) 345 skb_pop_mac_header(skb); 346 else 347 skb_reset_mac_header(skb); 348 if (tunnel->collect_md) { 349 __be16 flags; 350 __be64 tun_id; 351 352 flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY); 353 tun_id = key32_to_tunnel_id(tpi->key); 354 tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0); 355 if (!tun_dst) 356 return PACKET_REJECT; 357 } 358 359 ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); 360 return PACKET_RCVD; 361 } 362 return PACKET_NEXT; 363 364 drop: 365 kfree_skb(skb); 366 return PACKET_RCVD; 367 } 368 369 static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi, 370 int hdr_len) 371 { 372 struct net *net = dev_net(skb->dev); 373 struct ip_tunnel_net *itn; 374 int res; 375 376 if (tpi->proto == htons(ETH_P_TEB)) 377 itn = net_generic(net, gre_tap_net_id); 378 else 379 itn = net_generic(net, ipgre_net_id); 380 381 res = __ipgre_rcv(skb, tpi, itn, hdr_len, false); 382 if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) { 383 /* ipgre tunnels in collect metadata mode should receive 384 * also ETH_P_TEB traffic. 385 */ 386 itn = net_generic(net, ipgre_net_id); 387 res = __ipgre_rcv(skb, tpi, itn, hdr_len, true); 388 } 389 return res; 390 } 391 392 static int gre_rcv(struct sk_buff *skb) 393 { 394 struct tnl_ptk_info tpi; 395 bool csum_err = false; 396 int hdr_len; 397 398 #ifdef CONFIG_NET_IPGRE_BROADCAST 399 if (ipv4_is_multicast(ip_hdr(skb)->daddr)) { 400 /* Looped back packet, drop it! */ 401 if (rt_is_output_route(skb_rtable(skb))) 402 goto drop; 403 } 404 #endif 405 406 hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0); 407 if (hdr_len < 0) 408 goto drop; 409 410 if (unlikely(tpi.proto == htons(ETH_P_ERSPAN))) { 411 if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) 412 return 0; 413 } 414 415 if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) 416 return 0; 417 418 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 419 drop: 420 kfree_skb(skb); 421 return 0; 422 } 423 424 static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, 425 const struct iphdr *tnl_params, 426 __be16 proto) 427 { 428 struct ip_tunnel *tunnel = netdev_priv(dev); 429 430 if (tunnel->parms.o_flags & TUNNEL_SEQ) 431 tunnel->o_seqno++; 432 433 /* Push GRE header. */ 434 gre_build_header(skb, tunnel->tun_hlen, 435 tunnel->parms.o_flags, proto, tunnel->parms.o_key, 436 htonl(tunnel->o_seqno)); 437 438 ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); 439 } 440 441 static int gre_handle_offloads(struct sk_buff *skb, bool csum) 442 { 443 return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); 444 } 445 446 static struct rtable *gre_get_rt(struct sk_buff *skb, 447 struct net_device *dev, 448 struct flowi4 *fl, 449 const struct ip_tunnel_key *key) 450 { 451 struct net *net = dev_net(dev); 452 453 memset(fl, 0, sizeof(*fl)); 454 fl->daddr = key->u.ipv4.dst; 455 fl->saddr = key->u.ipv4.src; 456 fl->flowi4_tos = RT_TOS(key->tos); 457 fl->flowi4_mark = skb->mark; 458 fl->flowi4_proto = IPPROTO_GRE; 459 460 return ip_route_output_key(net, fl); 461 } 462 463 static struct rtable *prepare_fb_xmit(struct sk_buff *skb, 464 struct net_device *dev, 465 struct flowi4 *fl, 466 int tunnel_hlen) 467 { 468 struct ip_tunnel_info *tun_info; 469 const struct ip_tunnel_key *key; 470 struct rtable *rt = NULL; 471 int min_headroom; 472 bool use_cache; 473 int err; 474 475 tun_info = skb_tunnel_info(skb); 476 key = &tun_info->key; 477 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info); 478 479 if (use_cache) 480 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr); 481 if (!rt) { 482 rt = gre_get_rt(skb, dev, fl, key); 483 if (IS_ERR(rt)) 484 goto err_free_skb; 485 if (use_cache) 486 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst, 487 fl->saddr); 488 } 489 490 min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len 491 + tunnel_hlen + sizeof(struct iphdr); 492 if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { 493 int head_delta = SKB_DATA_ALIGN(min_headroom - 494 skb_headroom(skb) + 495 16); 496 err = pskb_expand_head(skb, max_t(int, head_delta, 0), 497 0, GFP_ATOMIC); 498 if (unlikely(err)) 499 goto err_free_rt; 500 } 501 return rt; 502 503 err_free_rt: 504 ip_rt_put(rt); 505 err_free_skb: 506 kfree_skb(skb); 507 dev->stats.tx_dropped++; 508 return NULL; 509 } 510 511 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, 512 __be16 proto) 513 { 514 struct ip_tunnel_info *tun_info; 515 const struct ip_tunnel_key *key; 516 struct rtable *rt = NULL; 517 struct flowi4 fl; 518 int tunnel_hlen; 519 __be16 df, flags; 520 521 tun_info = skb_tunnel_info(skb); 522 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || 523 ip_tunnel_info_af(tun_info) != AF_INET)) 524 goto err_free_skb; 525 526 key = &tun_info->key; 527 tunnel_hlen = gre_calc_hlen(key->tun_flags); 528 529 rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen); 530 if (!rt) 531 return; 532 533 /* Push Tunnel header. */ 534 if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM))) 535 goto err_free_rt; 536 537 flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY); 538 gre_build_header(skb, tunnel_hlen, flags, proto, 539 tunnel_id_to_key32(tun_info->key.tun_id), 0); 540 541 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; 542 543 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE, 544 key->tos, key->ttl, df, false); 545 return; 546 547 err_free_rt: 548 ip_rt_put(rt); 549 err_free_skb: 550 kfree_skb(skb); 551 dev->stats.tx_dropped++; 552 } 553 554 static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev, 555 __be16 proto) 556 { 557 struct ip_tunnel *tunnel = netdev_priv(dev); 558 struct ip_tunnel_info *tun_info; 559 const struct ip_tunnel_key *key; 560 struct erspan_metadata *md; 561 struct rtable *rt = NULL; 562 bool truncate = false; 563 struct flowi4 fl; 564 int tunnel_hlen; 565 __be16 df; 566 567 tun_info = skb_tunnel_info(skb); 568 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || 569 ip_tunnel_info_af(tun_info) != AF_INET)) 570 goto err_free_skb; 571 572 key = &tun_info->key; 573 574 /* ERSPAN has fixed 8 byte GRE header */ 575 tunnel_hlen = 8 + sizeof(struct erspanhdr); 576 577 rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen); 578 if (!rt) 579 return; 580 581 if (gre_handle_offloads(skb, false)) 582 goto err_free_rt; 583 584 if (skb->len > dev->mtu) { 585 pskb_trim(skb, dev->mtu); 586 truncate = true; 587 } 588 589 md = ip_tunnel_info_opts(tun_info); 590 if (!md) 591 goto err_free_rt; 592 593 erspan_build_header(skb, tunnel_id_to_key32(key->tun_id), 594 ntohl(md->index), truncate); 595 596 gre_build_header(skb, 8, TUNNEL_SEQ, 597 htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++)); 598 599 df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; 600 601 iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE, 602 key->tos, key->ttl, df, false); 603 return; 604 605 err_free_rt: 606 ip_rt_put(rt); 607 err_free_skb: 608 kfree_skb(skb); 609 dev->stats.tx_dropped++; 610 } 611 612 static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) 613 { 614 struct ip_tunnel_info *info = skb_tunnel_info(skb); 615 struct rtable *rt; 616 struct flowi4 fl4; 617 618 if (ip_tunnel_info_af(info) != AF_INET) 619 return -EINVAL; 620 621 rt = gre_get_rt(skb, dev, &fl4, &info->key); 622 if (IS_ERR(rt)) 623 return PTR_ERR(rt); 624 625 ip_rt_put(rt); 626 info->key.u.ipv4.src = fl4.saddr; 627 return 0; 628 } 629 630 static netdev_tx_t ipgre_xmit(struct sk_buff *skb, 631 struct net_device *dev) 632 { 633 struct ip_tunnel *tunnel = netdev_priv(dev); 634 const struct iphdr *tnl_params; 635 636 if (tunnel->collect_md) { 637 gre_fb_xmit(skb, dev, skb->protocol); 638 return NETDEV_TX_OK; 639 } 640 641 if (dev->header_ops) { 642 /* Need space for new headers */ 643 if (skb_cow_head(skb, dev->needed_headroom - 644 (tunnel->hlen + sizeof(struct iphdr)))) 645 goto free_skb; 646 647 tnl_params = (const struct iphdr *)skb->data; 648 649 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing 650 * to gre header. 651 */ 652 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr)); 653 skb_reset_mac_header(skb); 654 } else { 655 if (skb_cow_head(skb, dev->needed_headroom)) 656 goto free_skb; 657 658 tnl_params = &tunnel->parms.iph; 659 } 660 661 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM))) 662 goto free_skb; 663 664 __gre_xmit(skb, dev, tnl_params, skb->protocol); 665 return NETDEV_TX_OK; 666 667 free_skb: 668 kfree_skb(skb); 669 dev->stats.tx_dropped++; 670 return NETDEV_TX_OK; 671 } 672 673 static inline u8 tos_to_cos(u8 tos) 674 { 675 u8 dscp, cos; 676 677 dscp = tos >> 2; 678 cos = dscp >> 3; 679 return cos; 680 } 681 682 static void erspan_build_header(struct sk_buff *skb, 683 __be32 id, u32 index, bool truncate) 684 { 685 struct iphdr *iphdr = ip_hdr(skb); 686 struct ethhdr *eth = eth_hdr(skb); 687 enum erspan_encap_type enc_type; 688 struct erspanhdr *ershdr; 689 struct qtag_prefix { 690 __be16 eth_type; 691 __be16 tci; 692 } *qp; 693 u16 vlan_tci = 0; 694 695 enc_type = ERSPAN_ENCAP_NOVLAN; 696 697 /* If mirrored packet has vlan tag, extract tci and 698 * perserve vlan header in the mirrored frame. 699 */ 700 if (eth->h_proto == htons(ETH_P_8021Q)) { 701 qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN); 702 vlan_tci = ntohs(qp->tci); 703 enc_type = ERSPAN_ENCAP_INFRAME; 704 } 705 706 skb_push(skb, sizeof(*ershdr)); 707 ershdr = (struct erspanhdr *)skb->data; 708 memset(ershdr, 0, sizeof(*ershdr)); 709 710 ershdr->ver_vlan = htons((vlan_tci & VLAN_MASK) | 711 (ERSPAN_VERSION << VER_OFFSET)); 712 ershdr->session_id = htons((u16)(ntohl(id) & ID_MASK) | 713 ((tos_to_cos(iphdr->tos) << COS_OFFSET) & COS_MASK) | 714 (enc_type << EN_OFFSET & EN_MASK) | 715 ((truncate << T_OFFSET) & T_MASK)); 716 ershdr->md.index = htonl(index & INDEX_MASK); 717 } 718 719 static netdev_tx_t erspan_xmit(struct sk_buff *skb, 720 struct net_device *dev) 721 { 722 struct ip_tunnel *tunnel = netdev_priv(dev); 723 bool truncate = false; 724 725 if (tunnel->collect_md) { 726 erspan_fb_xmit(skb, dev, skb->protocol); 727 return NETDEV_TX_OK; 728 } 729 730 if (gre_handle_offloads(skb, false)) 731 goto free_skb; 732 733 if (skb_cow_head(skb, dev->needed_headroom)) 734 goto free_skb; 735 736 if (skb->len > dev->mtu) { 737 pskb_trim(skb, dev->mtu); 738 truncate = true; 739 } 740 741 /* Push ERSPAN header */ 742 erspan_build_header(skb, tunnel->parms.o_key, tunnel->index, truncate); 743 tunnel->parms.o_flags &= ~TUNNEL_KEY; 744 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN)); 745 return NETDEV_TX_OK; 746 747 free_skb: 748 kfree_skb(skb); 749 dev->stats.tx_dropped++; 750 return NETDEV_TX_OK; 751 } 752 753 static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, 754 struct net_device *dev) 755 { 756 struct ip_tunnel *tunnel = netdev_priv(dev); 757 758 if (tunnel->collect_md) { 759 gre_fb_xmit(skb, dev, htons(ETH_P_TEB)); 760 return NETDEV_TX_OK; 761 } 762 763 if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM))) 764 goto free_skb; 765 766 if (skb_cow_head(skb, dev->needed_headroom)) 767 goto free_skb; 768 769 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB)); 770 return NETDEV_TX_OK; 771 772 free_skb: 773 kfree_skb(skb); 774 dev->stats.tx_dropped++; 775 return NETDEV_TX_OK; 776 } 777 778 static int ipgre_tunnel_ioctl(struct net_device *dev, 779 struct ifreq *ifr, int cmd) 780 { 781 int err; 782 struct ip_tunnel_parm p; 783 784 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 785 return -EFAULT; 786 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { 787 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || 788 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || 789 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) 790 return -EINVAL; 791 } 792 p.i_flags = gre_flags_to_tnl_flags(p.i_flags); 793 p.o_flags = gre_flags_to_tnl_flags(p.o_flags); 794 795 err = ip_tunnel_ioctl(dev, &p, cmd); 796 if (err) 797 return err; 798 799 p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags); 800 p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags); 801 802 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) 803 return -EFAULT; 804 return 0; 805 } 806 807 /* Nice toy. Unfortunately, useless in real life :-) 808 It allows to construct virtual multiprotocol broadcast "LAN" 809 over the Internet, provided multicast routing is tuned. 810 811 812 I have no idea was this bicycle invented before me, 813 so that I had to set ARPHRD_IPGRE to a random value. 814 I have an impression, that Cisco could make something similar, 815 but this feature is apparently missing in IOS<=11.2(8). 816 817 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks 818 with broadcast 224.66.66.66. If you have access to mbone, play with me :-) 819 820 ping -t 255 224.66.66.66 821 822 If nobody answers, mbone does not work. 823 824 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255 825 ip addr add 10.66.66.<somewhat>/24 dev Universe 826 ifconfig Universe up 827 ifconfig Universe add fe80::<Your_real_addr>/10 828 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96 829 ftp 10.66.66.66 830 ... 831 ftp fec0:6666:6666::193.233.7.65 832 ... 833 */ 834 static int ipgre_header(struct sk_buff *skb, struct net_device *dev, 835 unsigned short type, 836 const void *daddr, const void *saddr, unsigned int len) 837 { 838 struct ip_tunnel *t = netdev_priv(dev); 839 struct iphdr *iph; 840 struct gre_base_hdr *greh; 841 842 iph = skb_push(skb, t->hlen + sizeof(*iph)); 843 greh = (struct gre_base_hdr *)(iph+1); 844 greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags); 845 greh->protocol = htons(type); 846 847 memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); 848 849 /* Set the source hardware address. */ 850 if (saddr) 851 memcpy(&iph->saddr, saddr, 4); 852 if (daddr) 853 memcpy(&iph->daddr, daddr, 4); 854 if (iph->daddr) 855 return t->hlen + sizeof(*iph); 856 857 return -(t->hlen + sizeof(*iph)); 858 } 859 860 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) 861 { 862 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb); 863 memcpy(haddr, &iph->saddr, 4); 864 return 4; 865 } 866 867 static const struct header_ops ipgre_header_ops = { 868 .create = ipgre_header, 869 .parse = ipgre_header_parse, 870 }; 871 872 #ifdef CONFIG_NET_IPGRE_BROADCAST 873 static int ipgre_open(struct net_device *dev) 874 { 875 struct ip_tunnel *t = netdev_priv(dev); 876 877 if (ipv4_is_multicast(t->parms.iph.daddr)) { 878 struct flowi4 fl4; 879 struct rtable *rt; 880 881 rt = ip_route_output_gre(t->net, &fl4, 882 t->parms.iph.daddr, 883 t->parms.iph.saddr, 884 t->parms.o_key, 885 RT_TOS(t->parms.iph.tos), 886 t->parms.link); 887 if (IS_ERR(rt)) 888 return -EADDRNOTAVAIL; 889 dev = rt->dst.dev; 890 ip_rt_put(rt); 891 if (!__in_dev_get_rtnl(dev)) 892 return -EADDRNOTAVAIL; 893 t->mlink = dev->ifindex; 894 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr); 895 } 896 return 0; 897 } 898 899 static int ipgre_close(struct net_device *dev) 900 { 901 struct ip_tunnel *t = netdev_priv(dev); 902 903 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { 904 struct in_device *in_dev; 905 in_dev = inetdev_by_index(t->net, t->mlink); 906 if (in_dev) 907 ip_mc_dec_group(in_dev, t->parms.iph.daddr); 908 } 909 return 0; 910 } 911 #endif 912 913 static const struct net_device_ops ipgre_netdev_ops = { 914 .ndo_init = ipgre_tunnel_init, 915 .ndo_uninit = ip_tunnel_uninit, 916 #ifdef CONFIG_NET_IPGRE_BROADCAST 917 .ndo_open = ipgre_open, 918 .ndo_stop = ipgre_close, 919 #endif 920 .ndo_start_xmit = ipgre_xmit, 921 .ndo_do_ioctl = ipgre_tunnel_ioctl, 922 .ndo_change_mtu = ip_tunnel_change_mtu, 923 .ndo_get_stats64 = ip_tunnel_get_stats64, 924 .ndo_get_iflink = ip_tunnel_get_iflink, 925 }; 926 927 #define GRE_FEATURES (NETIF_F_SG | \ 928 NETIF_F_FRAGLIST | \ 929 NETIF_F_HIGHDMA | \ 930 NETIF_F_HW_CSUM) 931 932 static void ipgre_tunnel_setup(struct net_device *dev) 933 { 934 dev->netdev_ops = &ipgre_netdev_ops; 935 dev->type = ARPHRD_IPGRE; 936 ip_tunnel_setup(dev, ipgre_net_id); 937 } 938 939 static void __gre_tunnel_init(struct net_device *dev) 940 { 941 struct ip_tunnel *tunnel; 942 int t_hlen; 943 944 tunnel = netdev_priv(dev); 945 tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); 946 tunnel->parms.iph.protocol = IPPROTO_GRE; 947 948 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen; 949 950 t_hlen = tunnel->hlen + sizeof(struct iphdr); 951 952 dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4; 953 dev->mtu = ETH_DATA_LEN - t_hlen - 4; 954 955 dev->features |= GRE_FEATURES; 956 dev->hw_features |= GRE_FEATURES; 957 958 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) { 959 /* TCP offload with GRE SEQ is not supported, nor 960 * can we support 2 levels of outer headers requiring 961 * an update. 962 */ 963 if (!(tunnel->parms.o_flags & TUNNEL_CSUM) || 964 (tunnel->encap.type == TUNNEL_ENCAP_NONE)) { 965 dev->features |= NETIF_F_GSO_SOFTWARE; 966 dev->hw_features |= NETIF_F_GSO_SOFTWARE; 967 } 968 969 /* Can use a lockless transmit, unless we generate 970 * output sequences 971 */ 972 dev->features |= NETIF_F_LLTX; 973 } 974 } 975 976 static int ipgre_tunnel_init(struct net_device *dev) 977 { 978 struct ip_tunnel *tunnel = netdev_priv(dev); 979 struct iphdr *iph = &tunnel->parms.iph; 980 981 __gre_tunnel_init(dev); 982 983 memcpy(dev->dev_addr, &iph->saddr, 4); 984 memcpy(dev->broadcast, &iph->daddr, 4); 985 986 dev->flags = IFF_NOARP; 987 netif_keep_dst(dev); 988 dev->addr_len = 4; 989 990 if (iph->daddr && !tunnel->collect_md) { 991 #ifdef CONFIG_NET_IPGRE_BROADCAST 992 if (ipv4_is_multicast(iph->daddr)) { 993 if (!iph->saddr) 994 return -EINVAL; 995 dev->flags = IFF_BROADCAST; 996 dev->header_ops = &ipgre_header_ops; 997 } 998 #endif 999 } else if (!tunnel->collect_md) { 1000 dev->header_ops = &ipgre_header_ops; 1001 } 1002 1003 return ip_tunnel_init(dev); 1004 } 1005 1006 static const struct gre_protocol ipgre_protocol = { 1007 .handler = gre_rcv, 1008 .err_handler = gre_err, 1009 }; 1010 1011 static int __net_init ipgre_init_net(struct net *net) 1012 { 1013 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL); 1014 } 1015 1016 static void __net_exit ipgre_exit_batch_net(struct list_head *list_net) 1017 { 1018 ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops); 1019 } 1020 1021 static struct pernet_operations ipgre_net_ops = { 1022 .init = ipgre_init_net, 1023 .exit_batch = ipgre_exit_batch_net, 1024 .id = &ipgre_net_id, 1025 .size = sizeof(struct ip_tunnel_net), 1026 }; 1027 1028 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[], 1029 struct netlink_ext_ack *extack) 1030 { 1031 __be16 flags; 1032 1033 if (!data) 1034 return 0; 1035 1036 flags = 0; 1037 if (data[IFLA_GRE_IFLAGS]) 1038 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); 1039 if (data[IFLA_GRE_OFLAGS]) 1040 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); 1041 if (flags & (GRE_VERSION|GRE_ROUTING)) 1042 return -EINVAL; 1043 1044 if (data[IFLA_GRE_COLLECT_METADATA] && 1045 data[IFLA_GRE_ENCAP_TYPE] && 1046 nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE) 1047 return -EINVAL; 1048 1049 return 0; 1050 } 1051 1052 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[], 1053 struct netlink_ext_ack *extack) 1054 { 1055 __be32 daddr; 1056 1057 if (tb[IFLA_ADDRESS]) { 1058 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) 1059 return -EINVAL; 1060 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) 1061 return -EADDRNOTAVAIL; 1062 } 1063 1064 if (!data) 1065 goto out; 1066 1067 if (data[IFLA_GRE_REMOTE]) { 1068 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4); 1069 if (!daddr) 1070 return -EINVAL; 1071 } 1072 1073 out: 1074 return ipgre_tunnel_validate(tb, data, extack); 1075 } 1076 1077 static int erspan_validate(struct nlattr *tb[], struct nlattr *data[], 1078 struct netlink_ext_ack *extack) 1079 { 1080 __be16 flags = 0; 1081 int ret; 1082 1083 if (!data) 1084 return 0; 1085 1086 ret = ipgre_tap_validate(tb, data, extack); 1087 if (ret) 1088 return ret; 1089 1090 /* ERSPAN should only have GRE sequence and key flag */ 1091 if (data[IFLA_GRE_OFLAGS]) 1092 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); 1093 if (data[IFLA_GRE_IFLAGS]) 1094 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); 1095 if (!data[IFLA_GRE_COLLECT_METADATA] && 1096 flags != (GRE_SEQ | GRE_KEY)) 1097 return -EINVAL; 1098 1099 /* ERSPAN Session ID only has 10-bit. Since we reuse 1100 * 32-bit key field as ID, check it's range. 1101 */ 1102 if (data[IFLA_GRE_IKEY] && 1103 (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK)) 1104 return -EINVAL; 1105 1106 if (data[IFLA_GRE_OKEY] && 1107 (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK)) 1108 return -EINVAL; 1109 1110 return 0; 1111 } 1112 1113 static int ipgre_netlink_parms(struct net_device *dev, 1114 struct nlattr *data[], 1115 struct nlattr *tb[], 1116 struct ip_tunnel_parm *parms, 1117 __u32 *fwmark) 1118 { 1119 struct ip_tunnel *t = netdev_priv(dev); 1120 1121 memset(parms, 0, sizeof(*parms)); 1122 1123 parms->iph.protocol = IPPROTO_GRE; 1124 1125 if (!data) 1126 return 0; 1127 1128 if (data[IFLA_GRE_LINK]) 1129 parms->link = nla_get_u32(data[IFLA_GRE_LINK]); 1130 1131 if (data[IFLA_GRE_IFLAGS]) 1132 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS])); 1133 1134 if (data[IFLA_GRE_OFLAGS]) 1135 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS])); 1136 1137 if (data[IFLA_GRE_IKEY]) 1138 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); 1139 1140 if (data[IFLA_GRE_OKEY]) 1141 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]); 1142 1143 if (data[IFLA_GRE_LOCAL]) 1144 parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]); 1145 1146 if (data[IFLA_GRE_REMOTE]) 1147 parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]); 1148 1149 if (data[IFLA_GRE_TTL]) 1150 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]); 1151 1152 if (data[IFLA_GRE_TOS]) 1153 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]); 1154 1155 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) { 1156 if (t->ignore_df) 1157 return -EINVAL; 1158 parms->iph.frag_off = htons(IP_DF); 1159 } 1160 1161 if (data[IFLA_GRE_COLLECT_METADATA]) { 1162 t->collect_md = true; 1163 if (dev->type == ARPHRD_IPGRE) 1164 dev->type = ARPHRD_NONE; 1165 } 1166 1167 if (data[IFLA_GRE_IGNORE_DF]) { 1168 if (nla_get_u8(data[IFLA_GRE_IGNORE_DF]) 1169 && (parms->iph.frag_off & htons(IP_DF))) 1170 return -EINVAL; 1171 t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]); 1172 } 1173 1174 if (data[IFLA_GRE_FWMARK]) 1175 *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]); 1176 1177 if (data[IFLA_GRE_ERSPAN_INDEX]) { 1178 t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]); 1179 1180 if (t->index & ~INDEX_MASK) 1181 return -EINVAL; 1182 } 1183 1184 return 0; 1185 } 1186 1187 /* This function returns true when ENCAP attributes are present in the nl msg */ 1188 static bool ipgre_netlink_encap_parms(struct nlattr *data[], 1189 struct ip_tunnel_encap *ipencap) 1190 { 1191 bool ret = false; 1192 1193 memset(ipencap, 0, sizeof(*ipencap)); 1194 1195 if (!data) 1196 return ret; 1197 1198 if (data[IFLA_GRE_ENCAP_TYPE]) { 1199 ret = true; 1200 ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]); 1201 } 1202 1203 if (data[IFLA_GRE_ENCAP_FLAGS]) { 1204 ret = true; 1205 ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]); 1206 } 1207 1208 if (data[IFLA_GRE_ENCAP_SPORT]) { 1209 ret = true; 1210 ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]); 1211 } 1212 1213 if (data[IFLA_GRE_ENCAP_DPORT]) { 1214 ret = true; 1215 ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]); 1216 } 1217 1218 return ret; 1219 } 1220 1221 static int gre_tap_init(struct net_device *dev) 1222 { 1223 __gre_tunnel_init(dev); 1224 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1225 1226 return ip_tunnel_init(dev); 1227 } 1228 1229 static const struct net_device_ops gre_tap_netdev_ops = { 1230 .ndo_init = gre_tap_init, 1231 .ndo_uninit = ip_tunnel_uninit, 1232 .ndo_start_xmit = gre_tap_xmit, 1233 .ndo_set_mac_address = eth_mac_addr, 1234 .ndo_validate_addr = eth_validate_addr, 1235 .ndo_change_mtu = ip_tunnel_change_mtu, 1236 .ndo_get_stats64 = ip_tunnel_get_stats64, 1237 .ndo_get_iflink = ip_tunnel_get_iflink, 1238 .ndo_fill_metadata_dst = gre_fill_metadata_dst, 1239 }; 1240 1241 static int erspan_tunnel_init(struct net_device *dev) 1242 { 1243 struct ip_tunnel *tunnel = netdev_priv(dev); 1244 int t_hlen; 1245 1246 tunnel->tun_hlen = 8; 1247 tunnel->parms.iph.protocol = IPPROTO_GRE; 1248 t_hlen = tunnel->hlen + sizeof(struct iphdr) + sizeof(struct erspanhdr); 1249 1250 dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4; 1251 dev->mtu = ETH_DATA_LEN - t_hlen - 4; 1252 dev->features |= GRE_FEATURES; 1253 dev->hw_features |= GRE_FEATURES; 1254 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1255 1256 return ip_tunnel_init(dev); 1257 } 1258 1259 static const struct net_device_ops erspan_netdev_ops = { 1260 .ndo_init = erspan_tunnel_init, 1261 .ndo_uninit = ip_tunnel_uninit, 1262 .ndo_start_xmit = erspan_xmit, 1263 .ndo_set_mac_address = eth_mac_addr, 1264 .ndo_validate_addr = eth_validate_addr, 1265 .ndo_change_mtu = ip_tunnel_change_mtu, 1266 .ndo_get_stats64 = ip_tunnel_get_stats64, 1267 .ndo_get_iflink = ip_tunnel_get_iflink, 1268 .ndo_fill_metadata_dst = gre_fill_metadata_dst, 1269 }; 1270 1271 static void ipgre_tap_setup(struct net_device *dev) 1272 { 1273 ether_setup(dev); 1274 dev->netdev_ops = &gre_tap_netdev_ops; 1275 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1276 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1277 ip_tunnel_setup(dev, gre_tap_net_id); 1278 } 1279 1280 static int ipgre_newlink(struct net *src_net, struct net_device *dev, 1281 struct nlattr *tb[], struct nlattr *data[], 1282 struct netlink_ext_ack *extack) 1283 { 1284 struct ip_tunnel_parm p; 1285 struct ip_tunnel_encap ipencap; 1286 __u32 fwmark = 0; 1287 int err; 1288 1289 if (ipgre_netlink_encap_parms(data, &ipencap)) { 1290 struct ip_tunnel *t = netdev_priv(dev); 1291 err = ip_tunnel_encap_setup(t, &ipencap); 1292 1293 if (err < 0) 1294 return err; 1295 } 1296 1297 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark); 1298 if (err < 0) 1299 return err; 1300 return ip_tunnel_newlink(dev, tb, &p, fwmark); 1301 } 1302 1303 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], 1304 struct nlattr *data[], 1305 struct netlink_ext_ack *extack) 1306 { 1307 struct ip_tunnel *t = netdev_priv(dev); 1308 struct ip_tunnel_parm p; 1309 struct ip_tunnel_encap ipencap; 1310 __u32 fwmark = t->fwmark; 1311 int err; 1312 1313 if (ipgre_netlink_encap_parms(data, &ipencap)) { 1314 err = ip_tunnel_encap_setup(t, &ipencap); 1315 1316 if (err < 0) 1317 return err; 1318 } 1319 1320 err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark); 1321 if (err < 0) 1322 return err; 1323 return ip_tunnel_changelink(dev, tb, &p, fwmark); 1324 } 1325 1326 static size_t ipgre_get_size(const struct net_device *dev) 1327 { 1328 return 1329 /* IFLA_GRE_LINK */ 1330 nla_total_size(4) + 1331 /* IFLA_GRE_IFLAGS */ 1332 nla_total_size(2) + 1333 /* IFLA_GRE_OFLAGS */ 1334 nla_total_size(2) + 1335 /* IFLA_GRE_IKEY */ 1336 nla_total_size(4) + 1337 /* IFLA_GRE_OKEY */ 1338 nla_total_size(4) + 1339 /* IFLA_GRE_LOCAL */ 1340 nla_total_size(4) + 1341 /* IFLA_GRE_REMOTE */ 1342 nla_total_size(4) + 1343 /* IFLA_GRE_TTL */ 1344 nla_total_size(1) + 1345 /* IFLA_GRE_TOS */ 1346 nla_total_size(1) + 1347 /* IFLA_GRE_PMTUDISC */ 1348 nla_total_size(1) + 1349 /* IFLA_GRE_ENCAP_TYPE */ 1350 nla_total_size(2) + 1351 /* IFLA_GRE_ENCAP_FLAGS */ 1352 nla_total_size(2) + 1353 /* IFLA_GRE_ENCAP_SPORT */ 1354 nla_total_size(2) + 1355 /* IFLA_GRE_ENCAP_DPORT */ 1356 nla_total_size(2) + 1357 /* IFLA_GRE_COLLECT_METADATA */ 1358 nla_total_size(0) + 1359 /* IFLA_GRE_IGNORE_DF */ 1360 nla_total_size(1) + 1361 /* IFLA_GRE_FWMARK */ 1362 nla_total_size(4) + 1363 /* IFLA_GRE_ERSPAN_INDEX */ 1364 nla_total_size(4) + 1365 0; 1366 } 1367 1368 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) 1369 { 1370 struct ip_tunnel *t = netdev_priv(dev); 1371 struct ip_tunnel_parm *p = &t->parms; 1372 1373 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || 1374 nla_put_be16(skb, IFLA_GRE_IFLAGS, 1375 gre_tnl_flags_to_gre_flags(p->i_flags)) || 1376 nla_put_be16(skb, IFLA_GRE_OFLAGS, 1377 gre_tnl_flags_to_gre_flags(p->o_flags)) || 1378 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || 1379 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || 1380 nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) || 1381 nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) || 1382 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) || 1383 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) || 1384 nla_put_u8(skb, IFLA_GRE_PMTUDISC, 1385 !!(p->iph.frag_off & htons(IP_DF))) || 1386 nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark)) 1387 goto nla_put_failure; 1388 1389 if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE, 1390 t->encap.type) || 1391 nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT, 1392 t->encap.sport) || 1393 nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT, 1394 t->encap.dport) || 1395 nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS, 1396 t->encap.flags)) 1397 goto nla_put_failure; 1398 1399 if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df)) 1400 goto nla_put_failure; 1401 1402 if (t->collect_md) { 1403 if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA)) 1404 goto nla_put_failure; 1405 } 1406 1407 if (t->index) 1408 if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index)) 1409 goto nla_put_failure; 1410 1411 return 0; 1412 1413 nla_put_failure: 1414 return -EMSGSIZE; 1415 } 1416 1417 static void erspan_setup(struct net_device *dev) 1418 { 1419 ether_setup(dev); 1420 dev->netdev_ops = &erspan_netdev_ops; 1421 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1422 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1423 ip_tunnel_setup(dev, erspan_net_id); 1424 } 1425 1426 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = { 1427 [IFLA_GRE_LINK] = { .type = NLA_U32 }, 1428 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 }, 1429 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 }, 1430 [IFLA_GRE_IKEY] = { .type = NLA_U32 }, 1431 [IFLA_GRE_OKEY] = { .type = NLA_U32 }, 1432 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) }, 1433 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, 1434 [IFLA_GRE_TTL] = { .type = NLA_U8 }, 1435 [IFLA_GRE_TOS] = { .type = NLA_U8 }, 1436 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 }, 1437 [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 }, 1438 [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 }, 1439 [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 }, 1440 [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 }, 1441 [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG }, 1442 [IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 }, 1443 [IFLA_GRE_FWMARK] = { .type = NLA_U32 }, 1444 [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 }, 1445 }; 1446 1447 static struct rtnl_link_ops ipgre_link_ops __read_mostly = { 1448 .kind = "gre", 1449 .maxtype = IFLA_GRE_MAX, 1450 .policy = ipgre_policy, 1451 .priv_size = sizeof(struct ip_tunnel), 1452 .setup = ipgre_tunnel_setup, 1453 .validate = ipgre_tunnel_validate, 1454 .newlink = ipgre_newlink, 1455 .changelink = ipgre_changelink, 1456 .dellink = ip_tunnel_dellink, 1457 .get_size = ipgre_get_size, 1458 .fill_info = ipgre_fill_info, 1459 .get_link_net = ip_tunnel_get_link_net, 1460 }; 1461 1462 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = { 1463 .kind = "gretap", 1464 .maxtype = IFLA_GRE_MAX, 1465 .policy = ipgre_policy, 1466 .priv_size = sizeof(struct ip_tunnel), 1467 .setup = ipgre_tap_setup, 1468 .validate = ipgre_tap_validate, 1469 .newlink = ipgre_newlink, 1470 .changelink = ipgre_changelink, 1471 .dellink = ip_tunnel_dellink, 1472 .get_size = ipgre_get_size, 1473 .fill_info = ipgre_fill_info, 1474 .get_link_net = ip_tunnel_get_link_net, 1475 }; 1476 1477 static struct rtnl_link_ops erspan_link_ops __read_mostly = { 1478 .kind = "erspan", 1479 .maxtype = IFLA_GRE_MAX, 1480 .policy = ipgre_policy, 1481 .priv_size = sizeof(struct ip_tunnel), 1482 .setup = erspan_setup, 1483 .validate = erspan_validate, 1484 .newlink = ipgre_newlink, 1485 .changelink = ipgre_changelink, 1486 .dellink = ip_tunnel_dellink, 1487 .get_size = ipgre_get_size, 1488 .fill_info = ipgre_fill_info, 1489 .get_link_net = ip_tunnel_get_link_net, 1490 }; 1491 1492 struct net_device *gretap_fb_dev_create(struct net *net, const char *name, 1493 u8 name_assign_type) 1494 { 1495 struct nlattr *tb[IFLA_MAX + 1]; 1496 struct net_device *dev; 1497 LIST_HEAD(list_kill); 1498 struct ip_tunnel *t; 1499 int err; 1500 1501 memset(&tb, 0, sizeof(tb)); 1502 1503 dev = rtnl_create_link(net, name, name_assign_type, 1504 &ipgre_tap_ops, tb); 1505 if (IS_ERR(dev)) 1506 return dev; 1507 1508 /* Configure flow based GRE device. */ 1509 t = netdev_priv(dev); 1510 t->collect_md = true; 1511 1512 err = ipgre_newlink(net, dev, tb, NULL, NULL); 1513 if (err < 0) { 1514 free_netdev(dev); 1515 return ERR_PTR(err); 1516 } 1517 1518 /* openvswitch users expect packet sizes to be unrestricted, 1519 * so set the largest MTU we can. 1520 */ 1521 err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false); 1522 if (err) 1523 goto out; 1524 1525 err = rtnl_configure_link(dev, NULL); 1526 if (err < 0) 1527 goto out; 1528 1529 return dev; 1530 out: 1531 ip_tunnel_dellink(dev, &list_kill); 1532 unregister_netdevice_many(&list_kill); 1533 return ERR_PTR(err); 1534 } 1535 EXPORT_SYMBOL_GPL(gretap_fb_dev_create); 1536 1537 static int __net_init ipgre_tap_init_net(struct net *net) 1538 { 1539 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0"); 1540 } 1541 1542 static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net) 1543 { 1544 ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops); 1545 } 1546 1547 static struct pernet_operations ipgre_tap_net_ops = { 1548 .init = ipgre_tap_init_net, 1549 .exit_batch = ipgre_tap_exit_batch_net, 1550 .id = &gre_tap_net_id, 1551 .size = sizeof(struct ip_tunnel_net), 1552 }; 1553 1554 static int __net_init erspan_init_net(struct net *net) 1555 { 1556 return ip_tunnel_init_net(net, erspan_net_id, 1557 &erspan_link_ops, "erspan0"); 1558 } 1559 1560 static void __net_exit erspan_exit_batch_net(struct list_head *net_list) 1561 { 1562 ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops); 1563 } 1564 1565 static struct pernet_operations erspan_net_ops = { 1566 .init = erspan_init_net, 1567 .exit_batch = erspan_exit_batch_net, 1568 .id = &erspan_net_id, 1569 .size = sizeof(struct ip_tunnel_net), 1570 }; 1571 1572 static int __init ipgre_init(void) 1573 { 1574 int err; 1575 1576 pr_info("GRE over IPv4 tunneling driver\n"); 1577 1578 err = register_pernet_device(&ipgre_net_ops); 1579 if (err < 0) 1580 return err; 1581 1582 err = register_pernet_device(&ipgre_tap_net_ops); 1583 if (err < 0) 1584 goto pnet_tap_failed; 1585 1586 err = register_pernet_device(&erspan_net_ops); 1587 if (err < 0) 1588 goto pnet_erspan_failed; 1589 1590 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO); 1591 if (err < 0) { 1592 pr_info("%s: can't add protocol\n", __func__); 1593 goto add_proto_failed; 1594 } 1595 1596 err = rtnl_link_register(&ipgre_link_ops); 1597 if (err < 0) 1598 goto rtnl_link_failed; 1599 1600 err = rtnl_link_register(&ipgre_tap_ops); 1601 if (err < 0) 1602 goto tap_ops_failed; 1603 1604 err = rtnl_link_register(&erspan_link_ops); 1605 if (err < 0) 1606 goto erspan_link_failed; 1607 1608 return 0; 1609 1610 erspan_link_failed: 1611 rtnl_link_unregister(&ipgre_tap_ops); 1612 tap_ops_failed: 1613 rtnl_link_unregister(&ipgre_link_ops); 1614 rtnl_link_failed: 1615 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); 1616 add_proto_failed: 1617 unregister_pernet_device(&erspan_net_ops); 1618 pnet_erspan_failed: 1619 unregister_pernet_device(&ipgre_tap_net_ops); 1620 pnet_tap_failed: 1621 unregister_pernet_device(&ipgre_net_ops); 1622 return err; 1623 } 1624 1625 static void __exit ipgre_fini(void) 1626 { 1627 rtnl_link_unregister(&ipgre_tap_ops); 1628 rtnl_link_unregister(&ipgre_link_ops); 1629 rtnl_link_unregister(&erspan_link_ops); 1630 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); 1631 unregister_pernet_device(&ipgre_tap_net_ops); 1632 unregister_pernet_device(&ipgre_net_ops); 1633 unregister_pernet_device(&erspan_net_ops); 1634 } 1635 1636 module_init(ipgre_init); 1637 module_exit(ipgre_fini); 1638 MODULE_LICENSE("GPL"); 1639 MODULE_ALIAS_RTNL_LINK("gre"); 1640 MODULE_ALIAS_RTNL_LINK("gretap"); 1641 MODULE_ALIAS_RTNL_LINK("erspan"); 1642 MODULE_ALIAS_NETDEV("gre0"); 1643 MODULE_ALIAS_NETDEV("gretap0"); 1644 MODULE_ALIAS_NETDEV("erspan0"); 1645