1 /* 2 * Linux NET3: GRE over IP protocol decoder. 3 * 4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru) 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 * 11 */ 12 13 #include <linux/capability.h> 14 #include <linux/module.h> 15 #include <linux/types.h> 16 #include <linux/kernel.h> 17 #include <asm/uaccess.h> 18 #include <linux/skbuff.h> 19 #include <linux/netdevice.h> 20 #include <linux/in.h> 21 #include <linux/tcp.h> 22 #include <linux/udp.h> 23 #include <linux/if_arp.h> 24 #include <linux/mroute.h> 25 #include <linux/init.h> 26 #include <linux/in6.h> 27 #include <linux/inetdevice.h> 28 #include <linux/igmp.h> 29 #include <linux/netfilter_ipv4.h> 30 #include <linux/etherdevice.h> 31 #include <linux/if_ether.h> 32 33 #include <net/sock.h> 34 #include <net/ip.h> 35 #include <net/icmp.h> 36 #include <net/protocol.h> 37 #include <net/ipip.h> 38 #include <net/arp.h> 39 #include <net/checksum.h> 40 #include <net/dsfield.h> 41 #include <net/inet_ecn.h> 42 #include <net/xfrm.h> 43 #include <net/net_namespace.h> 44 #include <net/netns/generic.h> 45 #include <net/rtnetlink.h> 46 47 #ifdef CONFIG_IPV6 48 #include <net/ipv6.h> 49 #include <net/ip6_fib.h> 50 #include <net/ip6_route.h> 51 #endif 52 53 /* 54 Problems & solutions 55 -------------------- 56 57 1. The most important issue is detecting local dead loops. 58 They would cause complete host lockup in transmit, which 59 would be "resolved" by stack overflow or, if queueing is enabled, 60 with infinite looping in net_bh. 61 62 We cannot track such dead loops during route installation, 63 it is infeasible task. The most general solutions would be 64 to keep skb->encapsulation counter (sort of local ttl), 65 and silently drop packet when it expires. It is the best 66 solution, but it supposes maintaing new variable in ALL 67 skb, even if no tunneling is used. 68 69 Current solution: t->recursion lock breaks dead loops. It looks 70 like dev->tbusy flag, but I preferred new variable, because 71 the semantics is different. One day, when hard_start_xmit 72 will be multithreaded we will have to use skb->encapsulation. 73 74 75 76 2. Networking dead loops would not kill routers, but would really 77 kill network. IP hop limit plays role of "t->recursion" in this case, 78 if we copy it from packet being encapsulated to upper header. 79 It is very good solution, but it introduces two problems: 80 81 - Routing protocols, using packets with ttl=1 (OSPF, RIP2), 82 do not work over tunnels. 83 - traceroute does not work. I planned to relay ICMP from tunnel, 84 so that this problem would be solved and traceroute output 85 would even more informative. This idea appeared to be wrong: 86 only Linux complies to rfc1812 now (yes, guys, Linux is the only 87 true router now :-)), all routers (at least, in neighbourhood of mine) 88 return only 8 bytes of payload. It is the end. 89 90 Hence, if we want that OSPF worked or traceroute said something reasonable, 91 we should search for another solution. 92 93 One of them is to parse packet trying to detect inner encapsulation 94 made by our node. It is difficult or even impossible, especially, 95 taking into account fragmentation. TO be short, tt is not solution at all. 96 97 Current solution: The solution was UNEXPECTEDLY SIMPLE. 98 We force DF flag on tunnels with preconfigured hop limit, 99 that is ALL. :-) Well, it does not remove the problem completely, 100 but exponential growth of network traffic is changed to linear 101 (branches, that exceed pmtu are pruned) and tunnel mtu 102 fastly degrades to value <68, where looping stops. 103 Yes, it is not good if there exists a router in the loop, 104 which does not force DF, even when encapsulating packets have DF set. 105 But it is not our problem! Nobody could accuse us, we made 106 all that we could make. Even if it is your gated who injected 107 fatal route to network, even if it were you who configured 108 fatal static route: you are innocent. :-) 109 110 111 112 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain 113 practically identical code. It would be good to glue them 114 together, but it is not very evident, how to make them modular. 115 sit is integral part of IPv6, ipip and gre are naturally modular. 116 We could extract common parts (hash table, ioctl etc) 117 to a separate module (ip_tunnel.c). 118 119 Alexey Kuznetsov. 120 */ 121 122 static struct rtnl_link_ops ipgre_link_ops __read_mostly; 123 static int ipgre_tunnel_init(struct net_device *dev); 124 static void ipgre_tunnel_setup(struct net_device *dev); 125 static int ipgre_tunnel_bind_dev(struct net_device *dev); 126 127 /* Fallback tunnel: no source, no destination, no key, no options */ 128 129 #define HASH_SIZE 16 130 131 static int ipgre_net_id; 132 struct ipgre_net { 133 struct ip_tunnel *tunnels[4][HASH_SIZE]; 134 135 struct net_device *fb_tunnel_dev; 136 }; 137 138 /* Tunnel hash table */ 139 140 /* 141 4 hash tables: 142 143 3: (remote,local) 144 2: (remote,*) 145 1: (*,local) 146 0: (*,*) 147 148 We require exact key match i.e. if a key is present in packet 149 it will match only tunnel with the same key; if it is not present, 150 it will match only keyless tunnel. 151 152 All keysless packets, if not matched configured keyless tunnels 153 will match fallback tunnel. 154 */ 155 156 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) 157 158 #define tunnels_r_l tunnels[3] 159 #define tunnels_r tunnels[2] 160 #define tunnels_l tunnels[1] 161 #define tunnels_wc tunnels[0] 162 163 static DEFINE_RWLOCK(ipgre_lock); 164 165 /* Given src, dst and key, find appropriate for input tunnel. */ 166 167 static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net, 168 __be32 remote, __be32 local, 169 __be32 key, __be16 gre_proto) 170 { 171 unsigned h0 = HASH(remote); 172 unsigned h1 = HASH(key); 173 struct ip_tunnel *t; 174 struct ip_tunnel *t2 = NULL; 175 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 176 int dev_type = (gre_proto == htons(ETH_P_TEB)) ? 177 ARPHRD_ETHER : ARPHRD_IPGRE; 178 179 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) { 180 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { 181 if (t->parms.i_key == key && t->dev->flags & IFF_UP) { 182 if (t->dev->type == dev_type) 183 return t; 184 if (t->dev->type == ARPHRD_IPGRE && !t2) 185 t2 = t; 186 } 187 } 188 } 189 190 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) { 191 if (remote == t->parms.iph.daddr) { 192 if (t->parms.i_key == key && t->dev->flags & IFF_UP) { 193 if (t->dev->type == dev_type) 194 return t; 195 if (t->dev->type == ARPHRD_IPGRE && !t2) 196 t2 = t; 197 } 198 } 199 } 200 201 for (t = ign->tunnels_l[h1]; t; t = t->next) { 202 if (local == t->parms.iph.saddr || 203 (local == t->parms.iph.daddr && 204 ipv4_is_multicast(local))) { 205 if (t->parms.i_key == key && t->dev->flags & IFF_UP) { 206 if (t->dev->type == dev_type) 207 return t; 208 if (t->dev->type == ARPHRD_IPGRE && !t2) 209 t2 = t; 210 } 211 } 212 } 213 214 for (t = ign->tunnels_wc[h1]; t; t = t->next) { 215 if (t->parms.i_key == key && t->dev->flags & IFF_UP) { 216 if (t->dev->type == dev_type) 217 return t; 218 if (t->dev->type == ARPHRD_IPGRE && !t2) 219 t2 = t; 220 } 221 } 222 223 if (t2) 224 return t2; 225 226 if (ign->fb_tunnel_dev->flags&IFF_UP) 227 return netdev_priv(ign->fb_tunnel_dev); 228 return NULL; 229 } 230 231 static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign, 232 struct ip_tunnel_parm *parms) 233 { 234 __be32 remote = parms->iph.daddr; 235 __be32 local = parms->iph.saddr; 236 __be32 key = parms->i_key; 237 unsigned h = HASH(key); 238 int prio = 0; 239 240 if (local) 241 prio |= 1; 242 if (remote && !ipv4_is_multicast(remote)) { 243 prio |= 2; 244 h ^= HASH(remote); 245 } 246 247 return &ign->tunnels[prio][h]; 248 } 249 250 static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign, 251 struct ip_tunnel *t) 252 { 253 return __ipgre_bucket(ign, &t->parms); 254 } 255 256 static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t) 257 { 258 struct ip_tunnel **tp = ipgre_bucket(ign, t); 259 260 t->next = *tp; 261 write_lock_bh(&ipgre_lock); 262 *tp = t; 263 write_unlock_bh(&ipgre_lock); 264 } 265 266 static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) 267 { 268 struct ip_tunnel **tp; 269 270 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) { 271 if (t == *tp) { 272 write_lock_bh(&ipgre_lock); 273 *tp = t->next; 274 write_unlock_bh(&ipgre_lock); 275 break; 276 } 277 } 278 } 279 280 static struct ip_tunnel *ipgre_tunnel_find(struct net *net, 281 struct ip_tunnel_parm *parms, 282 int type) 283 { 284 __be32 remote = parms->iph.daddr; 285 __be32 local = parms->iph.saddr; 286 __be32 key = parms->i_key; 287 struct ip_tunnel *t, **tp; 288 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 289 290 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) 291 if (local == t->parms.iph.saddr && 292 remote == t->parms.iph.daddr && 293 key == t->parms.i_key && 294 type == t->dev->type) 295 break; 296 297 return t; 298 } 299 300 static struct ip_tunnel * ipgre_tunnel_locate(struct net *net, 301 struct ip_tunnel_parm *parms, int create) 302 { 303 struct ip_tunnel *t, *nt; 304 struct net_device *dev; 305 char name[IFNAMSIZ]; 306 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 307 308 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE); 309 if (t || !create) 310 return t; 311 312 if (parms->name[0]) 313 strlcpy(name, parms->name, IFNAMSIZ); 314 else 315 sprintf(name, "gre%%d"); 316 317 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup); 318 if (!dev) 319 return NULL; 320 321 dev_net_set(dev, net); 322 323 if (strchr(name, '%')) { 324 if (dev_alloc_name(dev, name) < 0) 325 goto failed_free; 326 } 327 328 nt = netdev_priv(dev); 329 nt->parms = *parms; 330 dev->rtnl_link_ops = &ipgre_link_ops; 331 332 dev->mtu = ipgre_tunnel_bind_dev(dev); 333 334 if (register_netdevice(dev) < 0) 335 goto failed_free; 336 337 dev_hold(dev); 338 ipgre_tunnel_link(ign, nt); 339 return nt; 340 341 failed_free: 342 free_netdev(dev); 343 return NULL; 344 } 345 346 static void ipgre_tunnel_uninit(struct net_device *dev) 347 { 348 struct net *net = dev_net(dev); 349 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 350 351 ipgre_tunnel_unlink(ign, netdev_priv(dev)); 352 dev_put(dev); 353 } 354 355 356 static void ipgre_err(struct sk_buff *skb, u32 info) 357 { 358 359 /* All the routers (except for Linux) return only 360 8 bytes of packet payload. It means, that precise relaying of 361 ICMP in the real Internet is absolutely infeasible. 362 363 Moreover, Cisco "wise men" put GRE key to the third word 364 in GRE header. It makes impossible maintaining even soft state for keyed 365 GRE tunnels with enabled checksum. Tell them "thank you". 366 367 Well, I wonder, rfc1812 was written by Cisco employee, 368 what the hell these idiots break standrads established 369 by themself??? 370 */ 371 372 struct iphdr *iph = (struct iphdr *)skb->data; 373 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2)); 374 int grehlen = (iph->ihl<<2) + 4; 375 const int type = icmp_hdr(skb)->type; 376 const int code = icmp_hdr(skb)->code; 377 struct ip_tunnel *t; 378 __be16 flags; 379 380 flags = p[0]; 381 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { 382 if (flags&(GRE_VERSION|GRE_ROUTING)) 383 return; 384 if (flags&GRE_KEY) { 385 grehlen += 4; 386 if (flags&GRE_CSUM) 387 grehlen += 4; 388 } 389 } 390 391 /* If only 8 bytes returned, keyed message will be dropped here */ 392 if (skb_headlen(skb) < grehlen) 393 return; 394 395 switch (type) { 396 default: 397 case ICMP_PARAMETERPROB: 398 return; 399 400 case ICMP_DEST_UNREACH: 401 switch (code) { 402 case ICMP_SR_FAILED: 403 case ICMP_PORT_UNREACH: 404 /* Impossible event. */ 405 return; 406 case ICMP_FRAG_NEEDED: 407 /* Soft state for pmtu is maintained by IP core. */ 408 return; 409 default: 410 /* All others are translated to HOST_UNREACH. 411 rfc2003 contains "deep thoughts" about NET_UNREACH, 412 I believe they are just ether pollution. --ANK 413 */ 414 break; 415 } 416 break; 417 case ICMP_TIME_EXCEEDED: 418 if (code != ICMP_EXC_TTL) 419 return; 420 break; 421 } 422 423 read_lock(&ipgre_lock); 424 t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr, 425 flags & GRE_KEY ? 426 *(((__be32 *)p) + (grehlen / 4) - 1) : 0, 427 p[1]); 428 if (t == NULL || t->parms.iph.daddr == 0 || 429 ipv4_is_multicast(t->parms.iph.daddr)) 430 goto out; 431 432 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) 433 goto out; 434 435 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) 436 t->err_count++; 437 else 438 t->err_count = 1; 439 t->err_time = jiffies; 440 out: 441 read_unlock(&ipgre_lock); 442 return; 443 } 444 445 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) 446 { 447 if (INET_ECN_is_ce(iph->tos)) { 448 if (skb->protocol == htons(ETH_P_IP)) { 449 IP_ECN_set_ce(ip_hdr(skb)); 450 } else if (skb->protocol == htons(ETH_P_IPV6)) { 451 IP6_ECN_set_ce(ipv6_hdr(skb)); 452 } 453 } 454 } 455 456 static inline u8 457 ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb) 458 { 459 u8 inner = 0; 460 if (skb->protocol == htons(ETH_P_IP)) 461 inner = old_iph->tos; 462 else if (skb->protocol == htons(ETH_P_IPV6)) 463 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph); 464 return INET_ECN_encapsulate(tos, inner); 465 } 466 467 static int ipgre_rcv(struct sk_buff *skb) 468 { 469 struct iphdr *iph; 470 u8 *h; 471 __be16 flags; 472 __sum16 csum = 0; 473 __be32 key = 0; 474 u32 seqno = 0; 475 struct ip_tunnel *tunnel; 476 int offset = 4; 477 __be16 gre_proto; 478 unsigned int len; 479 480 if (!pskb_may_pull(skb, 16)) 481 goto drop_nolock; 482 483 iph = ip_hdr(skb); 484 h = skb->data; 485 flags = *(__be16*)h; 486 487 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) { 488 /* - Version must be 0. 489 - We do not support routing headers. 490 */ 491 if (flags&(GRE_VERSION|GRE_ROUTING)) 492 goto drop_nolock; 493 494 if (flags&GRE_CSUM) { 495 switch (skb->ip_summed) { 496 case CHECKSUM_COMPLETE: 497 csum = csum_fold(skb->csum); 498 if (!csum) 499 break; 500 /* fall through */ 501 case CHECKSUM_NONE: 502 skb->csum = 0; 503 csum = __skb_checksum_complete(skb); 504 skb->ip_summed = CHECKSUM_COMPLETE; 505 } 506 offset += 4; 507 } 508 if (flags&GRE_KEY) { 509 key = *(__be32*)(h + offset); 510 offset += 4; 511 } 512 if (flags&GRE_SEQ) { 513 seqno = ntohl(*(__be32*)(h + offset)); 514 offset += 4; 515 } 516 } 517 518 gre_proto = *(__be16 *)(h + 2); 519 520 read_lock(&ipgre_lock); 521 if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev), 522 iph->saddr, iph->daddr, key, 523 gre_proto))) { 524 struct net_device_stats *stats = &tunnel->dev->stats; 525 526 secpath_reset(skb); 527 528 skb->protocol = gre_proto; 529 /* WCCP version 1 and 2 protocol decoding. 530 * - Change protocol to IP 531 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header 532 */ 533 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) { 534 skb->protocol = htons(ETH_P_IP); 535 if ((*(h + offset) & 0xF0) != 0x40) 536 offset += 4; 537 } 538 539 skb->mac_header = skb->network_header; 540 __pskb_pull(skb, offset); 541 skb_postpull_rcsum(skb, skb_transport_header(skb), offset); 542 skb->pkt_type = PACKET_HOST; 543 #ifdef CONFIG_NET_IPGRE_BROADCAST 544 if (ipv4_is_multicast(iph->daddr)) { 545 /* Looped back packet, drop it! */ 546 if (skb->rtable->fl.iif == 0) 547 goto drop; 548 stats->multicast++; 549 skb->pkt_type = PACKET_BROADCAST; 550 } 551 #endif 552 553 if (((flags&GRE_CSUM) && csum) || 554 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { 555 stats->rx_crc_errors++; 556 stats->rx_errors++; 557 goto drop; 558 } 559 if (tunnel->parms.i_flags&GRE_SEQ) { 560 if (!(flags&GRE_SEQ) || 561 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) { 562 stats->rx_fifo_errors++; 563 stats->rx_errors++; 564 goto drop; 565 } 566 tunnel->i_seqno = seqno + 1; 567 } 568 569 len = skb->len; 570 571 /* Warning: All skb pointers will be invalidated! */ 572 if (tunnel->dev->type == ARPHRD_ETHER) { 573 if (!pskb_may_pull(skb, ETH_HLEN)) { 574 stats->rx_length_errors++; 575 stats->rx_errors++; 576 goto drop; 577 } 578 579 iph = ip_hdr(skb); 580 skb->protocol = eth_type_trans(skb, tunnel->dev); 581 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 582 } 583 584 stats->rx_packets++; 585 stats->rx_bytes += len; 586 skb->dev = tunnel->dev; 587 dst_release(skb->dst); 588 skb->dst = NULL; 589 nf_reset(skb); 590 591 skb_reset_network_header(skb); 592 ipgre_ecn_decapsulate(iph, skb); 593 594 netif_rx(skb); 595 read_unlock(&ipgre_lock); 596 return(0); 597 } 598 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 599 600 drop: 601 read_unlock(&ipgre_lock); 602 drop_nolock: 603 kfree_skb(skb); 604 return(0); 605 } 606 607 static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 608 { 609 struct ip_tunnel *tunnel = netdev_priv(dev); 610 struct net_device_stats *stats = &tunnel->dev->stats; 611 struct iphdr *old_iph = ip_hdr(skb); 612 struct iphdr *tiph; 613 u8 tos; 614 __be16 df; 615 struct rtable *rt; /* Route to the other host */ 616 struct net_device *tdev; /* Device to other host */ 617 struct iphdr *iph; /* Our new IP header */ 618 unsigned int max_headroom; /* The extra header space needed */ 619 int gre_hlen; 620 __be32 dst; 621 int mtu; 622 623 if (tunnel->recursion++) { 624 stats->collisions++; 625 goto tx_error; 626 } 627 628 if (dev->type == ARPHRD_ETHER) 629 IPCB(skb)->flags = 0; 630 631 if (dev->header_ops && dev->type == ARPHRD_IPGRE) { 632 gre_hlen = 0; 633 tiph = (struct iphdr *)skb->data; 634 } else { 635 gre_hlen = tunnel->hlen; 636 tiph = &tunnel->parms.iph; 637 } 638 639 if ((dst = tiph->daddr) == 0) { 640 /* NBMA tunnel */ 641 642 if (skb->dst == NULL) { 643 stats->tx_fifo_errors++; 644 goto tx_error; 645 } 646 647 if (skb->protocol == htons(ETH_P_IP)) { 648 rt = skb->rtable; 649 if ((dst = rt->rt_gateway) == 0) 650 goto tx_error_icmp; 651 } 652 #ifdef CONFIG_IPV6 653 else if (skb->protocol == htons(ETH_P_IPV6)) { 654 struct in6_addr *addr6; 655 int addr_type; 656 struct neighbour *neigh = skb->dst->neighbour; 657 658 if (neigh == NULL) 659 goto tx_error; 660 661 addr6 = (struct in6_addr *)&neigh->primary_key; 662 addr_type = ipv6_addr_type(addr6); 663 664 if (addr_type == IPV6_ADDR_ANY) { 665 addr6 = &ipv6_hdr(skb)->daddr; 666 addr_type = ipv6_addr_type(addr6); 667 } 668 669 if ((addr_type & IPV6_ADDR_COMPATv4) == 0) 670 goto tx_error_icmp; 671 672 dst = addr6->s6_addr32[3]; 673 } 674 #endif 675 else 676 goto tx_error; 677 } 678 679 tos = tiph->tos; 680 if (tos&1) { 681 if (skb->protocol == htons(ETH_P_IP)) 682 tos = old_iph->tos; 683 tos &= ~1; 684 } 685 686 { 687 struct flowi fl = { .oif = tunnel->parms.link, 688 .nl_u = { .ip4_u = 689 { .daddr = dst, 690 .saddr = tiph->saddr, 691 .tos = RT_TOS(tos) } }, 692 .proto = IPPROTO_GRE }; 693 if (ip_route_output_key(dev_net(dev), &rt, &fl)) { 694 stats->tx_carrier_errors++; 695 goto tx_error; 696 } 697 } 698 tdev = rt->u.dst.dev; 699 700 if (tdev == dev) { 701 ip_rt_put(rt); 702 stats->collisions++; 703 goto tx_error; 704 } 705 706 df = tiph->frag_off; 707 if (df) 708 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen; 709 else 710 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu; 711 712 if (skb->dst) 713 skb->dst->ops->update_pmtu(skb->dst, mtu); 714 715 if (skb->protocol == htons(ETH_P_IP)) { 716 df |= (old_iph->frag_off&htons(IP_DF)); 717 718 if ((old_iph->frag_off&htons(IP_DF)) && 719 mtu < ntohs(old_iph->tot_len)) { 720 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 721 ip_rt_put(rt); 722 goto tx_error; 723 } 724 } 725 #ifdef CONFIG_IPV6 726 else if (skb->protocol == htons(ETH_P_IPV6)) { 727 struct rt6_info *rt6 = (struct rt6_info *)skb->dst; 728 729 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) { 730 if ((tunnel->parms.iph.daddr && 731 !ipv4_is_multicast(tunnel->parms.iph.daddr)) || 732 rt6->rt6i_dst.plen == 128) { 733 rt6->rt6i_flags |= RTF_MODIFIED; 734 skb->dst->metrics[RTAX_MTU-1] = mtu; 735 } 736 } 737 738 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) { 739 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); 740 ip_rt_put(rt); 741 goto tx_error; 742 } 743 } 744 #endif 745 746 if (tunnel->err_count > 0) { 747 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { 748 tunnel->err_count--; 749 750 dst_link_failure(skb); 751 } else 752 tunnel->err_count = 0; 753 } 754 755 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen; 756 757 if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| 758 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { 759 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); 760 if (!new_skb) { 761 ip_rt_put(rt); 762 stats->tx_dropped++; 763 dev_kfree_skb(skb); 764 tunnel->recursion--; 765 return 0; 766 } 767 if (skb->sk) 768 skb_set_owner_w(new_skb, skb->sk); 769 dev_kfree_skb(skb); 770 skb = new_skb; 771 old_iph = ip_hdr(skb); 772 } 773 774 skb_reset_transport_header(skb); 775 skb_push(skb, gre_hlen); 776 skb_reset_network_header(skb); 777 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 778 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | 779 IPSKB_REROUTED); 780 dst_release(skb->dst); 781 skb->dst = &rt->u.dst; 782 783 /* 784 * Push down and install the IPIP header. 785 */ 786 787 iph = ip_hdr(skb); 788 iph->version = 4; 789 iph->ihl = sizeof(struct iphdr) >> 2; 790 iph->frag_off = df; 791 iph->protocol = IPPROTO_GRE; 792 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb); 793 iph->daddr = rt->rt_dst; 794 iph->saddr = rt->rt_src; 795 796 if ((iph->ttl = tiph->ttl) == 0) { 797 if (skb->protocol == htons(ETH_P_IP)) 798 iph->ttl = old_iph->ttl; 799 #ifdef CONFIG_IPV6 800 else if (skb->protocol == htons(ETH_P_IPV6)) 801 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit; 802 #endif 803 else 804 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT); 805 } 806 807 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags; 808 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ? 809 htons(ETH_P_TEB) : skb->protocol; 810 811 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { 812 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4); 813 814 if (tunnel->parms.o_flags&GRE_SEQ) { 815 ++tunnel->o_seqno; 816 *ptr = htonl(tunnel->o_seqno); 817 ptr--; 818 } 819 if (tunnel->parms.o_flags&GRE_KEY) { 820 *ptr = tunnel->parms.o_key; 821 ptr--; 822 } 823 if (tunnel->parms.o_flags&GRE_CSUM) { 824 *ptr = 0; 825 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr)); 826 } 827 } 828 829 nf_reset(skb); 830 831 IPTUNNEL_XMIT(); 832 tunnel->recursion--; 833 return 0; 834 835 tx_error_icmp: 836 dst_link_failure(skb); 837 838 tx_error: 839 stats->tx_errors++; 840 dev_kfree_skb(skb); 841 tunnel->recursion--; 842 return 0; 843 } 844 845 static int ipgre_tunnel_bind_dev(struct net_device *dev) 846 { 847 struct net_device *tdev = NULL; 848 struct ip_tunnel *tunnel; 849 struct iphdr *iph; 850 int hlen = LL_MAX_HEADER; 851 int mtu = ETH_DATA_LEN; 852 int addend = sizeof(struct iphdr) + 4; 853 854 tunnel = netdev_priv(dev); 855 iph = &tunnel->parms.iph; 856 857 /* Guess output device to choose reasonable mtu and needed_headroom */ 858 859 if (iph->daddr) { 860 struct flowi fl = { .oif = tunnel->parms.link, 861 .nl_u = { .ip4_u = 862 { .daddr = iph->daddr, 863 .saddr = iph->saddr, 864 .tos = RT_TOS(iph->tos) } }, 865 .proto = IPPROTO_GRE }; 866 struct rtable *rt; 867 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { 868 tdev = rt->u.dst.dev; 869 ip_rt_put(rt); 870 } 871 872 if (dev->type != ARPHRD_ETHER) 873 dev->flags |= IFF_POINTOPOINT; 874 } 875 876 if (!tdev && tunnel->parms.link) 877 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); 878 879 if (tdev) { 880 hlen = tdev->hard_header_len + tdev->needed_headroom; 881 mtu = tdev->mtu; 882 } 883 dev->iflink = tunnel->parms.link; 884 885 /* Precalculate GRE options length */ 886 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) { 887 if (tunnel->parms.o_flags&GRE_CSUM) 888 addend += 4; 889 if (tunnel->parms.o_flags&GRE_KEY) 890 addend += 4; 891 if (tunnel->parms.o_flags&GRE_SEQ) 892 addend += 4; 893 } 894 dev->needed_headroom = addend + hlen; 895 mtu -= dev->hard_header_len - addend; 896 897 if (mtu < 68) 898 mtu = 68; 899 900 tunnel->hlen = addend; 901 902 return mtu; 903 } 904 905 static int 906 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) 907 { 908 int err = 0; 909 struct ip_tunnel_parm p; 910 struct ip_tunnel *t; 911 struct net *net = dev_net(dev); 912 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 913 914 switch (cmd) { 915 case SIOCGETTUNNEL: 916 t = NULL; 917 if (dev == ign->fb_tunnel_dev) { 918 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { 919 err = -EFAULT; 920 break; 921 } 922 t = ipgre_tunnel_locate(net, &p, 0); 923 } 924 if (t == NULL) 925 t = netdev_priv(dev); 926 memcpy(&p, &t->parms, sizeof(p)); 927 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) 928 err = -EFAULT; 929 break; 930 931 case SIOCADDTUNNEL: 932 case SIOCCHGTUNNEL: 933 err = -EPERM; 934 if (!capable(CAP_NET_ADMIN)) 935 goto done; 936 937 err = -EFAULT; 938 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 939 goto done; 940 941 err = -EINVAL; 942 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || 943 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || 944 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) 945 goto done; 946 if (p.iph.ttl) 947 p.iph.frag_off |= htons(IP_DF); 948 949 if (!(p.i_flags&GRE_KEY)) 950 p.i_key = 0; 951 if (!(p.o_flags&GRE_KEY)) 952 p.o_key = 0; 953 954 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); 955 956 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { 957 if (t != NULL) { 958 if (t->dev != dev) { 959 err = -EEXIST; 960 break; 961 } 962 } else { 963 unsigned nflags = 0; 964 965 t = netdev_priv(dev); 966 967 if (ipv4_is_multicast(p.iph.daddr)) 968 nflags = IFF_BROADCAST; 969 else if (p.iph.daddr) 970 nflags = IFF_POINTOPOINT; 971 972 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { 973 err = -EINVAL; 974 break; 975 } 976 ipgre_tunnel_unlink(ign, t); 977 t->parms.iph.saddr = p.iph.saddr; 978 t->parms.iph.daddr = p.iph.daddr; 979 t->parms.i_key = p.i_key; 980 t->parms.o_key = p.o_key; 981 memcpy(dev->dev_addr, &p.iph.saddr, 4); 982 memcpy(dev->broadcast, &p.iph.daddr, 4); 983 ipgre_tunnel_link(ign, t); 984 netdev_state_change(dev); 985 } 986 } 987 988 if (t) { 989 err = 0; 990 if (cmd == SIOCCHGTUNNEL) { 991 t->parms.iph.ttl = p.iph.ttl; 992 t->parms.iph.tos = p.iph.tos; 993 t->parms.iph.frag_off = p.iph.frag_off; 994 if (t->parms.link != p.link) { 995 t->parms.link = p.link; 996 dev->mtu = ipgre_tunnel_bind_dev(dev); 997 netdev_state_change(dev); 998 } 999 } 1000 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) 1001 err = -EFAULT; 1002 } else 1003 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); 1004 break; 1005 1006 case SIOCDELTUNNEL: 1007 err = -EPERM; 1008 if (!capable(CAP_NET_ADMIN)) 1009 goto done; 1010 1011 if (dev == ign->fb_tunnel_dev) { 1012 err = -EFAULT; 1013 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 1014 goto done; 1015 err = -ENOENT; 1016 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL) 1017 goto done; 1018 err = -EPERM; 1019 if (t == netdev_priv(ign->fb_tunnel_dev)) 1020 goto done; 1021 dev = t->dev; 1022 } 1023 unregister_netdevice(dev); 1024 err = 0; 1025 break; 1026 1027 default: 1028 err = -EINVAL; 1029 } 1030 1031 done: 1032 return err; 1033 } 1034 1035 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) 1036 { 1037 struct ip_tunnel *tunnel = netdev_priv(dev); 1038 if (new_mtu < 68 || 1039 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen) 1040 return -EINVAL; 1041 dev->mtu = new_mtu; 1042 return 0; 1043 } 1044 1045 /* Nice toy. Unfortunately, useless in real life :-) 1046 It allows to construct virtual multiprotocol broadcast "LAN" 1047 over the Internet, provided multicast routing is tuned. 1048 1049 1050 I have no idea was this bicycle invented before me, 1051 so that I had to set ARPHRD_IPGRE to a random value. 1052 I have an impression, that Cisco could make something similar, 1053 but this feature is apparently missing in IOS<=11.2(8). 1054 1055 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks 1056 with broadcast 224.66.66.66. If you have access to mbone, play with me :-) 1057 1058 ping -t 255 224.66.66.66 1059 1060 If nobody answers, mbone does not work. 1061 1062 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255 1063 ip addr add 10.66.66.<somewhat>/24 dev Universe 1064 ifconfig Universe up 1065 ifconfig Universe add fe80::<Your_real_addr>/10 1066 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96 1067 ftp 10.66.66.66 1068 ... 1069 ftp fec0:6666:6666::193.233.7.65 1070 ... 1071 1072 */ 1073 1074 static int ipgre_header(struct sk_buff *skb, struct net_device *dev, 1075 unsigned short type, 1076 const void *daddr, const void *saddr, unsigned len) 1077 { 1078 struct ip_tunnel *t = netdev_priv(dev); 1079 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); 1080 __be16 *p = (__be16*)(iph+1); 1081 1082 memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); 1083 p[0] = t->parms.o_flags; 1084 p[1] = htons(type); 1085 1086 /* 1087 * Set the source hardware address. 1088 */ 1089 1090 if (saddr) 1091 memcpy(&iph->saddr, saddr, 4); 1092 1093 if (daddr) { 1094 memcpy(&iph->daddr, daddr, 4); 1095 return t->hlen; 1096 } 1097 if (iph->daddr && !ipv4_is_multicast(iph->daddr)) 1098 return t->hlen; 1099 1100 return -t->hlen; 1101 } 1102 1103 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) 1104 { 1105 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb); 1106 memcpy(haddr, &iph->saddr, 4); 1107 return 4; 1108 } 1109 1110 static const struct header_ops ipgre_header_ops = { 1111 .create = ipgre_header, 1112 .parse = ipgre_header_parse, 1113 }; 1114 1115 #ifdef CONFIG_NET_IPGRE_BROADCAST 1116 static int ipgre_open(struct net_device *dev) 1117 { 1118 struct ip_tunnel *t = netdev_priv(dev); 1119 1120 if (ipv4_is_multicast(t->parms.iph.daddr)) { 1121 struct flowi fl = { .oif = t->parms.link, 1122 .nl_u = { .ip4_u = 1123 { .daddr = t->parms.iph.daddr, 1124 .saddr = t->parms.iph.saddr, 1125 .tos = RT_TOS(t->parms.iph.tos) } }, 1126 .proto = IPPROTO_GRE }; 1127 struct rtable *rt; 1128 if (ip_route_output_key(dev_net(dev), &rt, &fl)) 1129 return -EADDRNOTAVAIL; 1130 dev = rt->u.dst.dev; 1131 ip_rt_put(rt); 1132 if (__in_dev_get_rtnl(dev) == NULL) 1133 return -EADDRNOTAVAIL; 1134 t->mlink = dev->ifindex; 1135 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr); 1136 } 1137 return 0; 1138 } 1139 1140 static int ipgre_close(struct net_device *dev) 1141 { 1142 struct ip_tunnel *t = netdev_priv(dev); 1143 1144 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { 1145 struct in_device *in_dev; 1146 in_dev = inetdev_by_index(dev_net(dev), t->mlink); 1147 if (in_dev) { 1148 ip_mc_dec_group(in_dev, t->parms.iph.daddr); 1149 in_dev_put(in_dev); 1150 } 1151 } 1152 return 0; 1153 } 1154 1155 #endif 1156 1157 static const struct net_device_ops ipgre_netdev_ops = { 1158 .ndo_init = ipgre_tunnel_init, 1159 .ndo_uninit = ipgre_tunnel_uninit, 1160 #ifdef CONFIG_NET_IPGRE_BROADCAST 1161 .ndo_open = ipgre_open, 1162 .ndo_stop = ipgre_close, 1163 #endif 1164 .ndo_start_xmit = ipgre_tunnel_xmit, 1165 .ndo_do_ioctl = ipgre_tunnel_ioctl, 1166 .ndo_change_mtu = ipgre_tunnel_change_mtu, 1167 }; 1168 1169 static void ipgre_tunnel_setup(struct net_device *dev) 1170 { 1171 dev->netdev_ops = &ipgre_netdev_ops; 1172 dev->destructor = free_netdev; 1173 1174 dev->type = ARPHRD_IPGRE; 1175 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; 1176 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4; 1177 dev->flags = IFF_NOARP; 1178 dev->iflink = 0; 1179 dev->addr_len = 4; 1180 dev->features |= NETIF_F_NETNS_LOCAL; 1181 } 1182 1183 static int ipgre_tunnel_init(struct net_device *dev) 1184 { 1185 struct ip_tunnel *tunnel; 1186 struct iphdr *iph; 1187 1188 tunnel = netdev_priv(dev); 1189 iph = &tunnel->parms.iph; 1190 1191 tunnel->dev = dev; 1192 strcpy(tunnel->parms.name, dev->name); 1193 1194 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); 1195 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 1196 1197 if (iph->daddr) { 1198 #ifdef CONFIG_NET_IPGRE_BROADCAST 1199 if (ipv4_is_multicast(iph->daddr)) { 1200 if (!iph->saddr) 1201 return -EINVAL; 1202 dev->flags = IFF_BROADCAST; 1203 dev->header_ops = &ipgre_header_ops; 1204 } 1205 #endif 1206 } else 1207 dev->header_ops = &ipgre_header_ops; 1208 1209 return 0; 1210 } 1211 1212 static void ipgre_fb_tunnel_init(struct net_device *dev) 1213 { 1214 struct ip_tunnel *tunnel = netdev_priv(dev); 1215 struct iphdr *iph = &tunnel->parms.iph; 1216 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id); 1217 1218 tunnel->dev = dev; 1219 strcpy(tunnel->parms.name, dev->name); 1220 1221 iph->version = 4; 1222 iph->protocol = IPPROTO_GRE; 1223 iph->ihl = 5; 1224 tunnel->hlen = sizeof(struct iphdr) + 4; 1225 1226 dev_hold(dev); 1227 ign->tunnels_wc[0] = tunnel; 1228 } 1229 1230 1231 static struct net_protocol ipgre_protocol = { 1232 .handler = ipgre_rcv, 1233 .err_handler = ipgre_err, 1234 .netns_ok = 1, 1235 }; 1236 1237 static void ipgre_destroy_tunnels(struct ipgre_net *ign) 1238 { 1239 int prio; 1240 1241 for (prio = 0; prio < 4; prio++) { 1242 int h; 1243 for (h = 0; h < HASH_SIZE; h++) { 1244 struct ip_tunnel *t; 1245 while ((t = ign->tunnels[prio][h]) != NULL) 1246 unregister_netdevice(t->dev); 1247 } 1248 } 1249 } 1250 1251 static int ipgre_init_net(struct net *net) 1252 { 1253 int err; 1254 struct ipgre_net *ign; 1255 1256 err = -ENOMEM; 1257 ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL); 1258 if (ign == NULL) 1259 goto err_alloc; 1260 1261 err = net_assign_generic(net, ipgre_net_id, ign); 1262 if (err < 0) 1263 goto err_assign; 1264 1265 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0", 1266 ipgre_tunnel_setup); 1267 if (!ign->fb_tunnel_dev) { 1268 err = -ENOMEM; 1269 goto err_alloc_dev; 1270 } 1271 dev_net_set(ign->fb_tunnel_dev, net); 1272 1273 ipgre_fb_tunnel_init(ign->fb_tunnel_dev); 1274 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops; 1275 1276 if ((err = register_netdev(ign->fb_tunnel_dev))) 1277 goto err_reg_dev; 1278 1279 return 0; 1280 1281 err_reg_dev: 1282 free_netdev(ign->fb_tunnel_dev); 1283 err_alloc_dev: 1284 /* nothing */ 1285 err_assign: 1286 kfree(ign); 1287 err_alloc: 1288 return err; 1289 } 1290 1291 static void ipgre_exit_net(struct net *net) 1292 { 1293 struct ipgre_net *ign; 1294 1295 ign = net_generic(net, ipgre_net_id); 1296 rtnl_lock(); 1297 ipgre_destroy_tunnels(ign); 1298 rtnl_unlock(); 1299 kfree(ign); 1300 } 1301 1302 static struct pernet_operations ipgre_net_ops = { 1303 .init = ipgre_init_net, 1304 .exit = ipgre_exit_net, 1305 }; 1306 1307 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) 1308 { 1309 __be16 flags; 1310 1311 if (!data) 1312 return 0; 1313 1314 flags = 0; 1315 if (data[IFLA_GRE_IFLAGS]) 1316 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); 1317 if (data[IFLA_GRE_OFLAGS]) 1318 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); 1319 if (flags & (GRE_VERSION|GRE_ROUTING)) 1320 return -EINVAL; 1321 1322 return 0; 1323 } 1324 1325 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[]) 1326 { 1327 __be32 daddr; 1328 1329 if (tb[IFLA_ADDRESS]) { 1330 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) 1331 return -EINVAL; 1332 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) 1333 return -EADDRNOTAVAIL; 1334 } 1335 1336 if (!data) 1337 goto out; 1338 1339 if (data[IFLA_GRE_REMOTE]) { 1340 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4); 1341 if (!daddr) 1342 return -EINVAL; 1343 } 1344 1345 out: 1346 return ipgre_tunnel_validate(tb, data); 1347 } 1348 1349 static void ipgre_netlink_parms(struct nlattr *data[], 1350 struct ip_tunnel_parm *parms) 1351 { 1352 memset(parms, 0, sizeof(*parms)); 1353 1354 parms->iph.protocol = IPPROTO_GRE; 1355 1356 if (!data) 1357 return; 1358 1359 if (data[IFLA_GRE_LINK]) 1360 parms->link = nla_get_u32(data[IFLA_GRE_LINK]); 1361 1362 if (data[IFLA_GRE_IFLAGS]) 1363 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]); 1364 1365 if (data[IFLA_GRE_OFLAGS]) 1366 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]); 1367 1368 if (data[IFLA_GRE_IKEY]) 1369 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); 1370 1371 if (data[IFLA_GRE_OKEY]) 1372 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]); 1373 1374 if (data[IFLA_GRE_LOCAL]) 1375 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]); 1376 1377 if (data[IFLA_GRE_REMOTE]) 1378 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]); 1379 1380 if (data[IFLA_GRE_TTL]) 1381 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]); 1382 1383 if (data[IFLA_GRE_TOS]) 1384 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]); 1385 1386 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) 1387 parms->iph.frag_off = htons(IP_DF); 1388 } 1389 1390 static int ipgre_tap_init(struct net_device *dev) 1391 { 1392 struct ip_tunnel *tunnel; 1393 1394 tunnel = netdev_priv(dev); 1395 1396 tunnel->dev = dev; 1397 strcpy(tunnel->parms.name, dev->name); 1398 1399 ipgre_tunnel_bind_dev(dev); 1400 1401 return 0; 1402 } 1403 1404 static const struct net_device_ops ipgre_tap_netdev_ops = { 1405 .ndo_init = ipgre_tap_init, 1406 .ndo_uninit = ipgre_tunnel_uninit, 1407 .ndo_start_xmit = ipgre_tunnel_xmit, 1408 .ndo_set_mac_address = eth_mac_addr, 1409 .ndo_validate_addr = eth_validate_addr, 1410 .ndo_change_mtu = ipgre_tunnel_change_mtu, 1411 }; 1412 1413 static void ipgre_tap_setup(struct net_device *dev) 1414 { 1415 1416 ether_setup(dev); 1417 1418 dev->netdev_ops = &ipgre_netdev_ops; 1419 dev->destructor = free_netdev; 1420 1421 dev->iflink = 0; 1422 dev->features |= NETIF_F_NETNS_LOCAL; 1423 } 1424 1425 static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[], 1426 struct nlattr *data[]) 1427 { 1428 struct ip_tunnel *nt; 1429 struct net *net = dev_net(dev); 1430 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 1431 int mtu; 1432 int err; 1433 1434 nt = netdev_priv(dev); 1435 ipgre_netlink_parms(data, &nt->parms); 1436 1437 if (ipgre_tunnel_find(net, &nt->parms, dev->type)) 1438 return -EEXIST; 1439 1440 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) 1441 random_ether_addr(dev->dev_addr); 1442 1443 mtu = ipgre_tunnel_bind_dev(dev); 1444 if (!tb[IFLA_MTU]) 1445 dev->mtu = mtu; 1446 1447 err = register_netdevice(dev); 1448 if (err) 1449 goto out; 1450 1451 dev_hold(dev); 1452 ipgre_tunnel_link(ign, nt); 1453 1454 out: 1455 return err; 1456 } 1457 1458 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], 1459 struct nlattr *data[]) 1460 { 1461 struct ip_tunnel *t, *nt; 1462 struct net *net = dev_net(dev); 1463 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 1464 struct ip_tunnel_parm p; 1465 int mtu; 1466 1467 if (dev == ign->fb_tunnel_dev) 1468 return -EINVAL; 1469 1470 nt = netdev_priv(dev); 1471 ipgre_netlink_parms(data, &p); 1472 1473 t = ipgre_tunnel_locate(net, &p, 0); 1474 1475 if (t) { 1476 if (t->dev != dev) 1477 return -EEXIST; 1478 } else { 1479 unsigned nflags = 0; 1480 1481 t = nt; 1482 1483 if (ipv4_is_multicast(p.iph.daddr)) 1484 nflags = IFF_BROADCAST; 1485 else if (p.iph.daddr) 1486 nflags = IFF_POINTOPOINT; 1487 1488 if ((dev->flags ^ nflags) & 1489 (IFF_POINTOPOINT | IFF_BROADCAST)) 1490 return -EINVAL; 1491 1492 ipgre_tunnel_unlink(ign, t); 1493 t->parms.iph.saddr = p.iph.saddr; 1494 t->parms.iph.daddr = p.iph.daddr; 1495 t->parms.i_key = p.i_key; 1496 memcpy(dev->dev_addr, &p.iph.saddr, 4); 1497 memcpy(dev->broadcast, &p.iph.daddr, 4); 1498 ipgre_tunnel_link(ign, t); 1499 netdev_state_change(dev); 1500 } 1501 1502 t->parms.o_key = p.o_key; 1503 t->parms.iph.ttl = p.iph.ttl; 1504 t->parms.iph.tos = p.iph.tos; 1505 t->parms.iph.frag_off = p.iph.frag_off; 1506 1507 if (t->parms.link != p.link) { 1508 t->parms.link = p.link; 1509 mtu = ipgre_tunnel_bind_dev(dev); 1510 if (!tb[IFLA_MTU]) 1511 dev->mtu = mtu; 1512 netdev_state_change(dev); 1513 } 1514 1515 return 0; 1516 } 1517 1518 static size_t ipgre_get_size(const struct net_device *dev) 1519 { 1520 return 1521 /* IFLA_GRE_LINK */ 1522 nla_total_size(4) + 1523 /* IFLA_GRE_IFLAGS */ 1524 nla_total_size(2) + 1525 /* IFLA_GRE_OFLAGS */ 1526 nla_total_size(2) + 1527 /* IFLA_GRE_IKEY */ 1528 nla_total_size(4) + 1529 /* IFLA_GRE_OKEY */ 1530 nla_total_size(4) + 1531 /* IFLA_GRE_LOCAL */ 1532 nla_total_size(4) + 1533 /* IFLA_GRE_REMOTE */ 1534 nla_total_size(4) + 1535 /* IFLA_GRE_TTL */ 1536 nla_total_size(1) + 1537 /* IFLA_GRE_TOS */ 1538 nla_total_size(1) + 1539 /* IFLA_GRE_PMTUDISC */ 1540 nla_total_size(1) + 1541 0; 1542 } 1543 1544 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) 1545 { 1546 struct ip_tunnel *t = netdev_priv(dev); 1547 struct ip_tunnel_parm *p = &t->parms; 1548 1549 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link); 1550 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags); 1551 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags); 1552 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key); 1553 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key); 1554 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr); 1555 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr); 1556 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl); 1557 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos); 1558 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF))); 1559 1560 return 0; 1561 1562 nla_put_failure: 1563 return -EMSGSIZE; 1564 } 1565 1566 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = { 1567 [IFLA_GRE_LINK] = { .type = NLA_U32 }, 1568 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 }, 1569 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 }, 1570 [IFLA_GRE_IKEY] = { .type = NLA_U32 }, 1571 [IFLA_GRE_OKEY] = { .type = NLA_U32 }, 1572 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) }, 1573 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, 1574 [IFLA_GRE_TTL] = { .type = NLA_U8 }, 1575 [IFLA_GRE_TOS] = { .type = NLA_U8 }, 1576 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 }, 1577 }; 1578 1579 static struct rtnl_link_ops ipgre_link_ops __read_mostly = { 1580 .kind = "gre", 1581 .maxtype = IFLA_GRE_MAX, 1582 .policy = ipgre_policy, 1583 .priv_size = sizeof(struct ip_tunnel), 1584 .setup = ipgre_tunnel_setup, 1585 .validate = ipgre_tunnel_validate, 1586 .newlink = ipgre_newlink, 1587 .changelink = ipgre_changelink, 1588 .get_size = ipgre_get_size, 1589 .fill_info = ipgre_fill_info, 1590 }; 1591 1592 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = { 1593 .kind = "gretap", 1594 .maxtype = IFLA_GRE_MAX, 1595 .policy = ipgre_policy, 1596 .priv_size = sizeof(struct ip_tunnel), 1597 .setup = ipgre_tap_setup, 1598 .validate = ipgre_tap_validate, 1599 .newlink = ipgre_newlink, 1600 .changelink = ipgre_changelink, 1601 .get_size = ipgre_get_size, 1602 .fill_info = ipgre_fill_info, 1603 }; 1604 1605 /* 1606 * And now the modules code and kernel interface. 1607 */ 1608 1609 static int __init ipgre_init(void) 1610 { 1611 int err; 1612 1613 printk(KERN_INFO "GRE over IPv4 tunneling driver\n"); 1614 1615 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) { 1616 printk(KERN_INFO "ipgre init: can't add protocol\n"); 1617 return -EAGAIN; 1618 } 1619 1620 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops); 1621 if (err < 0) 1622 goto gen_device_failed; 1623 1624 err = rtnl_link_register(&ipgre_link_ops); 1625 if (err < 0) 1626 goto rtnl_link_failed; 1627 1628 err = rtnl_link_register(&ipgre_tap_ops); 1629 if (err < 0) 1630 goto tap_ops_failed; 1631 1632 out: 1633 return err; 1634 1635 tap_ops_failed: 1636 rtnl_link_unregister(&ipgre_link_ops); 1637 rtnl_link_failed: 1638 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops); 1639 gen_device_failed: 1640 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); 1641 goto out; 1642 } 1643 1644 static void __exit ipgre_fini(void) 1645 { 1646 rtnl_link_unregister(&ipgre_tap_ops); 1647 rtnl_link_unregister(&ipgre_link_ops); 1648 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops); 1649 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) 1650 printk(KERN_INFO "ipgre close: can't remove protocol\n"); 1651 } 1652 1653 module_init(ipgre_init); 1654 module_exit(ipgre_fini); 1655 MODULE_LICENSE("GPL"); 1656 MODULE_ALIAS_RTNL_LINK("gre"); 1657 MODULE_ALIAS_RTNL_LINK("gretap"); 1658