11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * Linux NET3: GRE over IP protocol decoder. 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru) 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 71da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 81da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 91da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 101da177e4SLinus Torvalds * 111da177e4SLinus Torvalds */ 121da177e4SLinus Torvalds 134fc268d2SRandy Dunlap #include <linux/capability.h> 141da177e4SLinus Torvalds #include <linux/module.h> 151da177e4SLinus Torvalds #include <linux/types.h> 161da177e4SLinus Torvalds #include <linux/kernel.h> 171da177e4SLinus Torvalds #include <asm/uaccess.h> 181da177e4SLinus Torvalds #include <linux/skbuff.h> 191da177e4SLinus Torvalds #include <linux/netdevice.h> 201da177e4SLinus Torvalds #include <linux/in.h> 211da177e4SLinus Torvalds #include <linux/tcp.h> 221da177e4SLinus Torvalds #include <linux/udp.h> 231da177e4SLinus Torvalds #include <linux/if_arp.h> 241da177e4SLinus Torvalds #include <linux/mroute.h> 251da177e4SLinus Torvalds #include <linux/init.h> 261da177e4SLinus Torvalds #include <linux/in6.h> 271da177e4SLinus Torvalds #include <linux/inetdevice.h> 281da177e4SLinus Torvalds #include <linux/igmp.h> 291da177e4SLinus Torvalds #include <linux/netfilter_ipv4.h> 3046f25dffSKris Katterjohn #include <linux/if_ether.h> 311da177e4SLinus Torvalds 321da177e4SLinus Torvalds #include <net/sock.h> 331da177e4SLinus Torvalds #include <net/ip.h> 341da177e4SLinus Torvalds #include <net/icmp.h> 351da177e4SLinus Torvalds #include <net/protocol.h> 361da177e4SLinus Torvalds #include <net/ipip.h> 371da177e4SLinus Torvalds #include <net/arp.h> 381da177e4SLinus Torvalds #include <net/checksum.h> 391da177e4SLinus Torvalds #include <net/dsfield.h> 401da177e4SLinus Torvalds #include <net/inet_ecn.h> 411da177e4SLinus Torvalds #include <net/xfrm.h> 4259a4c759SPavel Emelyanov #include <net/net_namespace.h> 4359a4c759SPavel Emelyanov #include <net/netns/generic.h> 441da177e4SLinus Torvalds 451da177e4SLinus Torvalds #ifdef CONFIG_IPV6 461da177e4SLinus Torvalds #include <net/ipv6.h> 471da177e4SLinus Torvalds #include <net/ip6_fib.h> 481da177e4SLinus Torvalds #include <net/ip6_route.h> 491da177e4SLinus Torvalds #endif 501da177e4SLinus Torvalds 511da177e4SLinus Torvalds /* 521da177e4SLinus Torvalds Problems & solutions 531da177e4SLinus Torvalds -------------------- 541da177e4SLinus Torvalds 551da177e4SLinus Torvalds 1. The most important issue is detecting local dead loops. 561da177e4SLinus Torvalds They would cause complete host lockup in transmit, which 571da177e4SLinus Torvalds would be "resolved" by stack overflow or, if queueing is enabled, 581da177e4SLinus Torvalds with infinite looping in net_bh. 591da177e4SLinus Torvalds 601da177e4SLinus Torvalds We cannot track such dead loops during route installation, 611da177e4SLinus Torvalds it is infeasible task. The most general solutions would be 621da177e4SLinus Torvalds to keep skb->encapsulation counter (sort of local ttl), 631da177e4SLinus Torvalds and silently drop packet when it expires. It is the best 641da177e4SLinus Torvalds solution, but it supposes maintaing new variable in ALL 651da177e4SLinus Torvalds skb, even if no tunneling is used. 661da177e4SLinus Torvalds 671da177e4SLinus Torvalds Current solution: t->recursion lock breaks dead loops. It looks 681da177e4SLinus Torvalds like dev->tbusy flag, but I preferred new variable, because 691da177e4SLinus Torvalds the semantics is different. One day, when hard_start_xmit 701da177e4SLinus Torvalds will be multithreaded we will have to use skb->encapsulation. 711da177e4SLinus Torvalds 721da177e4SLinus Torvalds 731da177e4SLinus Torvalds 741da177e4SLinus Torvalds 2. Networking dead loops would not kill routers, but would really 751da177e4SLinus Torvalds kill network. IP hop limit plays role of "t->recursion" in this case, 761da177e4SLinus Torvalds if we copy it from packet being encapsulated to upper header. 771da177e4SLinus Torvalds It is very good solution, but it introduces two problems: 781da177e4SLinus Torvalds 791da177e4SLinus Torvalds - Routing protocols, using packets with ttl=1 (OSPF, RIP2), 801da177e4SLinus Torvalds do not work over tunnels. 811da177e4SLinus Torvalds - traceroute does not work. I planned to relay ICMP from tunnel, 821da177e4SLinus Torvalds so that this problem would be solved and traceroute output 831da177e4SLinus Torvalds would even more informative. This idea appeared to be wrong: 841da177e4SLinus Torvalds only Linux complies to rfc1812 now (yes, guys, Linux is the only 851da177e4SLinus Torvalds true router now :-)), all routers (at least, in neighbourhood of mine) 861da177e4SLinus Torvalds return only 8 bytes of payload. It is the end. 871da177e4SLinus Torvalds 881da177e4SLinus Torvalds Hence, if we want that OSPF worked or traceroute said something reasonable, 891da177e4SLinus Torvalds we should search for another solution. 901da177e4SLinus Torvalds 911da177e4SLinus Torvalds One of them is to parse packet trying to detect inner encapsulation 921da177e4SLinus Torvalds made by our node. It is difficult or even impossible, especially, 931da177e4SLinus Torvalds taking into account fragmentation. TO be short, tt is not solution at all. 941da177e4SLinus Torvalds 951da177e4SLinus Torvalds Current solution: The solution was UNEXPECTEDLY SIMPLE. 961da177e4SLinus Torvalds We force DF flag on tunnels with preconfigured hop limit, 971da177e4SLinus Torvalds that is ALL. :-) Well, it does not remove the problem completely, 981da177e4SLinus Torvalds but exponential growth of network traffic is changed to linear 991da177e4SLinus Torvalds (branches, that exceed pmtu are pruned) and tunnel mtu 1001da177e4SLinus Torvalds fastly degrades to value <68, where looping stops. 1011da177e4SLinus Torvalds Yes, it is not good if there exists a router in the loop, 1021da177e4SLinus Torvalds which does not force DF, even when encapsulating packets have DF set. 1031da177e4SLinus Torvalds But it is not our problem! Nobody could accuse us, we made 1041da177e4SLinus Torvalds all that we could make. Even if it is your gated who injected 1051da177e4SLinus Torvalds fatal route to network, even if it were you who configured 1061da177e4SLinus Torvalds fatal static route: you are innocent. :-) 1071da177e4SLinus Torvalds 1081da177e4SLinus Torvalds 1091da177e4SLinus Torvalds 1101da177e4SLinus Torvalds 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain 1111da177e4SLinus Torvalds practically identical code. It would be good to glue them 1121da177e4SLinus Torvalds together, but it is not very evident, how to make them modular. 1131da177e4SLinus Torvalds sit is integral part of IPv6, ipip and gre are naturally modular. 1141da177e4SLinus Torvalds We could extract common parts (hash table, ioctl etc) 1151da177e4SLinus Torvalds to a separate module (ip_tunnel.c). 1161da177e4SLinus Torvalds 1171da177e4SLinus Torvalds Alexey Kuznetsov. 1181da177e4SLinus Torvalds */ 1191da177e4SLinus Torvalds 1201da177e4SLinus Torvalds static int ipgre_tunnel_init(struct net_device *dev); 1211da177e4SLinus Torvalds static void ipgre_tunnel_setup(struct net_device *dev); 1221da177e4SLinus Torvalds 1231da177e4SLinus Torvalds /* Fallback tunnel: no source, no destination, no key, no options */ 1241da177e4SLinus Torvalds 1251da177e4SLinus Torvalds static int ipgre_fb_tunnel_init(struct net_device *dev); 1261da177e4SLinus Torvalds 127eb8ce741SPavel Emelyanov #define HASH_SIZE 16 128eb8ce741SPavel Emelyanov 12959a4c759SPavel Emelyanov static int ipgre_net_id; 13059a4c759SPavel Emelyanov struct ipgre_net { 131eb8ce741SPavel Emelyanov struct ip_tunnel *tunnels[4][HASH_SIZE]; 132eb8ce741SPavel Emelyanov 1337daa0004SPavel Emelyanov struct net_device *fb_tunnel_dev; 13459a4c759SPavel Emelyanov }; 13559a4c759SPavel Emelyanov 1361da177e4SLinus Torvalds /* Tunnel hash table */ 1371da177e4SLinus Torvalds 1381da177e4SLinus Torvalds /* 1391da177e4SLinus Torvalds 4 hash tables: 1401da177e4SLinus Torvalds 1411da177e4SLinus Torvalds 3: (remote,local) 1421da177e4SLinus Torvalds 2: (remote,*) 1431da177e4SLinus Torvalds 1: (*,local) 1441da177e4SLinus Torvalds 0: (*,*) 1451da177e4SLinus Torvalds 1461da177e4SLinus Torvalds We require exact key match i.e. if a key is present in packet 1471da177e4SLinus Torvalds it will match only tunnel with the same key; if it is not present, 1481da177e4SLinus Torvalds it will match only keyless tunnel. 1491da177e4SLinus Torvalds 1501da177e4SLinus Torvalds All keysless packets, if not matched configured keyless tunnels 1511da177e4SLinus Torvalds will match fallback tunnel. 1521da177e4SLinus Torvalds */ 1531da177e4SLinus Torvalds 154d5a0a1e3SAl Viro #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) 1551da177e4SLinus Torvalds 156eb8ce741SPavel Emelyanov #define tunnels_r_l tunnels[3] 157eb8ce741SPavel Emelyanov #define tunnels_r tunnels[2] 158eb8ce741SPavel Emelyanov #define tunnels_l tunnels[1] 159eb8ce741SPavel Emelyanov #define tunnels_wc tunnels[0] 1601da177e4SLinus Torvalds 1611da177e4SLinus Torvalds static DEFINE_RWLOCK(ipgre_lock); 1621da177e4SLinus Torvalds 1631da177e4SLinus Torvalds /* Given src, dst and key, find appropriate for input tunnel. */ 1641da177e4SLinus Torvalds 165f57e7d5aSPavel Emelyanov static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net, 166f57e7d5aSPavel Emelyanov __be32 remote, __be32 local, __be32 key) 1671da177e4SLinus Torvalds { 1681da177e4SLinus Torvalds unsigned h0 = HASH(remote); 1691da177e4SLinus Torvalds unsigned h1 = HASH(key); 1701da177e4SLinus Torvalds struct ip_tunnel *t; 1717daa0004SPavel Emelyanov struct ipgre_net *ign = net_generic(net, ipgre_net_id); 1721da177e4SLinus Torvalds 173eb8ce741SPavel Emelyanov for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) { 1741da177e4SLinus Torvalds if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { 1751da177e4SLinus Torvalds if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) 1761da177e4SLinus Torvalds return t; 1771da177e4SLinus Torvalds } 1781da177e4SLinus Torvalds } 179eb8ce741SPavel Emelyanov for (t = ign->tunnels_r[h0^h1]; t; t = t->next) { 1801da177e4SLinus Torvalds if (remote == t->parms.iph.daddr) { 1811da177e4SLinus Torvalds if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) 1821da177e4SLinus Torvalds return t; 1831da177e4SLinus Torvalds } 1841da177e4SLinus Torvalds } 185eb8ce741SPavel Emelyanov for (t = ign->tunnels_l[h1]; t; t = t->next) { 1861da177e4SLinus Torvalds if (local == t->parms.iph.saddr || 187f97c1e0cSJoe Perches (local == t->parms.iph.daddr && 188f97c1e0cSJoe Perches ipv4_is_multicast(local))) { 1891da177e4SLinus Torvalds if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) 1901da177e4SLinus Torvalds return t; 1911da177e4SLinus Torvalds } 1921da177e4SLinus Torvalds } 193eb8ce741SPavel Emelyanov for (t = ign->tunnels_wc[h1]; t; t = t->next) { 1941da177e4SLinus Torvalds if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) 1951da177e4SLinus Torvalds return t; 1961da177e4SLinus Torvalds } 1971da177e4SLinus Torvalds 1987daa0004SPavel Emelyanov if (ign->fb_tunnel_dev->flags&IFF_UP) 1997daa0004SPavel Emelyanov return netdev_priv(ign->fb_tunnel_dev); 2001da177e4SLinus Torvalds return NULL; 2011da177e4SLinus Torvalds } 2021da177e4SLinus Torvalds 203f57e7d5aSPavel Emelyanov static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign, 204f57e7d5aSPavel Emelyanov struct ip_tunnel_parm *parms) 2051da177e4SLinus Torvalds { 2065056a1efSYOSHIFUJI Hideaki __be32 remote = parms->iph.daddr; 2075056a1efSYOSHIFUJI Hideaki __be32 local = parms->iph.saddr; 2085056a1efSYOSHIFUJI Hideaki __be32 key = parms->i_key; 2091da177e4SLinus Torvalds unsigned h = HASH(key); 2101da177e4SLinus Torvalds int prio = 0; 2111da177e4SLinus Torvalds 2121da177e4SLinus Torvalds if (local) 2131da177e4SLinus Torvalds prio |= 1; 214f97c1e0cSJoe Perches if (remote && !ipv4_is_multicast(remote)) { 2151da177e4SLinus Torvalds prio |= 2; 2161da177e4SLinus Torvalds h ^= HASH(remote); 2171da177e4SLinus Torvalds } 2181da177e4SLinus Torvalds 219eb8ce741SPavel Emelyanov return &ign->tunnels[prio][h]; 2201da177e4SLinus Torvalds } 2211da177e4SLinus Torvalds 222f57e7d5aSPavel Emelyanov static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign, 223f57e7d5aSPavel Emelyanov struct ip_tunnel *t) 2245056a1efSYOSHIFUJI Hideaki { 225f57e7d5aSPavel Emelyanov return __ipgre_bucket(ign, &t->parms); 2265056a1efSYOSHIFUJI Hideaki } 2275056a1efSYOSHIFUJI Hideaki 228f57e7d5aSPavel Emelyanov static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t) 2291da177e4SLinus Torvalds { 230f57e7d5aSPavel Emelyanov struct ip_tunnel **tp = ipgre_bucket(ign, t); 2311da177e4SLinus Torvalds 2321da177e4SLinus Torvalds t->next = *tp; 2331da177e4SLinus Torvalds write_lock_bh(&ipgre_lock); 2341da177e4SLinus Torvalds *tp = t; 2351da177e4SLinus Torvalds write_unlock_bh(&ipgre_lock); 2361da177e4SLinus Torvalds } 2371da177e4SLinus Torvalds 238f57e7d5aSPavel Emelyanov static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) 2391da177e4SLinus Torvalds { 2401da177e4SLinus Torvalds struct ip_tunnel **tp; 2411da177e4SLinus Torvalds 242f57e7d5aSPavel Emelyanov for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) { 2431da177e4SLinus Torvalds if (t == *tp) { 2441da177e4SLinus Torvalds write_lock_bh(&ipgre_lock); 2451da177e4SLinus Torvalds *tp = t->next; 2461da177e4SLinus Torvalds write_unlock_bh(&ipgre_lock); 2471da177e4SLinus Torvalds break; 2481da177e4SLinus Torvalds } 2491da177e4SLinus Torvalds } 2501da177e4SLinus Torvalds } 2511da177e4SLinus Torvalds 252f57e7d5aSPavel Emelyanov static struct ip_tunnel * ipgre_tunnel_locate(struct net *net, 253f57e7d5aSPavel Emelyanov struct ip_tunnel_parm *parms, int create) 2541da177e4SLinus Torvalds { 255d5a0a1e3SAl Viro __be32 remote = parms->iph.daddr; 256d5a0a1e3SAl Viro __be32 local = parms->iph.saddr; 257d5a0a1e3SAl Viro __be32 key = parms->i_key; 2581da177e4SLinus Torvalds struct ip_tunnel *t, **tp, *nt; 2591da177e4SLinus Torvalds struct net_device *dev; 2601da177e4SLinus Torvalds char name[IFNAMSIZ]; 261f57e7d5aSPavel Emelyanov struct ipgre_net *ign = net_generic(net, ipgre_net_id); 2621da177e4SLinus Torvalds 263f57e7d5aSPavel Emelyanov for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) { 2641da177e4SLinus Torvalds if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { 2651da177e4SLinus Torvalds if (key == t->parms.i_key) 2661da177e4SLinus Torvalds return t; 2671da177e4SLinus Torvalds } 2681da177e4SLinus Torvalds } 2691da177e4SLinus Torvalds if (!create) 2701da177e4SLinus Torvalds return NULL; 2711da177e4SLinus Torvalds 2721da177e4SLinus Torvalds if (parms->name[0]) 2731da177e4SLinus Torvalds strlcpy(name, parms->name, IFNAMSIZ); 27434cc7ba6SPavel Emelyanov else 27534cc7ba6SPavel Emelyanov sprintf(name, "gre%%d"); 2761da177e4SLinus Torvalds 2771da177e4SLinus Torvalds dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup); 2781da177e4SLinus Torvalds if (!dev) 2791da177e4SLinus Torvalds return NULL; 2801da177e4SLinus Torvalds 2810b67ecebSPavel Emelyanov dev_net_set(dev, net); 2820b67ecebSPavel Emelyanov 283b37d428bSPavel Emelyanov if (strchr(name, '%')) { 284b37d428bSPavel Emelyanov if (dev_alloc_name(dev, name) < 0) 285b37d428bSPavel Emelyanov goto failed_free; 286b37d428bSPavel Emelyanov } 287b37d428bSPavel Emelyanov 2881da177e4SLinus Torvalds dev->init = ipgre_tunnel_init; 2892941a486SPatrick McHardy nt = netdev_priv(dev); 2901da177e4SLinus Torvalds nt->parms = *parms; 2911da177e4SLinus Torvalds 292b37d428bSPavel Emelyanov if (register_netdevice(dev) < 0) 293b37d428bSPavel Emelyanov goto failed_free; 2941da177e4SLinus Torvalds 2951da177e4SLinus Torvalds dev_hold(dev); 296f57e7d5aSPavel Emelyanov ipgre_tunnel_link(ign, nt); 2971da177e4SLinus Torvalds return nt; 2981da177e4SLinus Torvalds 299b37d428bSPavel Emelyanov failed_free: 300b37d428bSPavel Emelyanov free_netdev(dev); 3011da177e4SLinus Torvalds return NULL; 3021da177e4SLinus Torvalds } 3031da177e4SLinus Torvalds 3041da177e4SLinus Torvalds static void ipgre_tunnel_uninit(struct net_device *dev) 3051da177e4SLinus Torvalds { 306f57e7d5aSPavel Emelyanov struct net *net = dev_net(dev); 307f57e7d5aSPavel Emelyanov struct ipgre_net *ign = net_generic(net, ipgre_net_id); 308f57e7d5aSPavel Emelyanov 309f57e7d5aSPavel Emelyanov ipgre_tunnel_unlink(ign, netdev_priv(dev)); 3101da177e4SLinus Torvalds dev_put(dev); 3111da177e4SLinus Torvalds } 3121da177e4SLinus Torvalds 3131da177e4SLinus Torvalds 3141da177e4SLinus Torvalds static void ipgre_err(struct sk_buff *skb, u32 info) 3151da177e4SLinus Torvalds { 3161da177e4SLinus Torvalds 317071f92d0SRami Rosen /* All the routers (except for Linux) return only 3181da177e4SLinus Torvalds 8 bytes of packet payload. It means, that precise relaying of 3191da177e4SLinus Torvalds ICMP in the real Internet is absolutely infeasible. 3201da177e4SLinus Torvalds 3211da177e4SLinus Torvalds Moreover, Cisco "wise men" put GRE key to the third word 3221da177e4SLinus Torvalds in GRE header. It makes impossible maintaining even soft state for keyed 3231da177e4SLinus Torvalds GRE tunnels with enabled checksum. Tell them "thank you". 3241da177e4SLinus Torvalds 3251da177e4SLinus Torvalds Well, I wonder, rfc1812 was written by Cisco employee, 3261da177e4SLinus Torvalds what the hell these idiots break standrads established 3271da177e4SLinus Torvalds by themself??? 3281da177e4SLinus Torvalds */ 3291da177e4SLinus Torvalds 3301da177e4SLinus Torvalds struct iphdr *iph = (struct iphdr*)skb->data; 331d5a0a1e3SAl Viro __be16 *p = (__be16*)(skb->data+(iph->ihl<<2)); 3321da177e4SLinus Torvalds int grehlen = (iph->ihl<<2) + 4; 33388c7664fSArnaldo Carvalho de Melo const int type = icmp_hdr(skb)->type; 33488c7664fSArnaldo Carvalho de Melo const int code = icmp_hdr(skb)->code; 3351da177e4SLinus Torvalds struct ip_tunnel *t; 336d5a0a1e3SAl Viro __be16 flags; 3371da177e4SLinus Torvalds 3381da177e4SLinus Torvalds flags = p[0]; 3391da177e4SLinus Torvalds if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { 3401da177e4SLinus Torvalds if (flags&(GRE_VERSION|GRE_ROUTING)) 3411da177e4SLinus Torvalds return; 3421da177e4SLinus Torvalds if (flags&GRE_KEY) { 3431da177e4SLinus Torvalds grehlen += 4; 3441da177e4SLinus Torvalds if (flags&GRE_CSUM) 3451da177e4SLinus Torvalds grehlen += 4; 3461da177e4SLinus Torvalds } 3471da177e4SLinus Torvalds } 3481da177e4SLinus Torvalds 3491da177e4SLinus Torvalds /* If only 8 bytes returned, keyed message will be dropped here */ 3501da177e4SLinus Torvalds if (skb_headlen(skb) < grehlen) 3511da177e4SLinus Torvalds return; 3521da177e4SLinus Torvalds 3531da177e4SLinus Torvalds switch (type) { 3541da177e4SLinus Torvalds default: 3551da177e4SLinus Torvalds case ICMP_PARAMETERPROB: 3561da177e4SLinus Torvalds return; 3571da177e4SLinus Torvalds 3581da177e4SLinus Torvalds case ICMP_DEST_UNREACH: 3591da177e4SLinus Torvalds switch (code) { 3601da177e4SLinus Torvalds case ICMP_SR_FAILED: 3611da177e4SLinus Torvalds case ICMP_PORT_UNREACH: 3621da177e4SLinus Torvalds /* Impossible event. */ 3631da177e4SLinus Torvalds return; 3641da177e4SLinus Torvalds case ICMP_FRAG_NEEDED: 3651da177e4SLinus Torvalds /* Soft state for pmtu is maintained by IP core. */ 3661da177e4SLinus Torvalds return; 3671da177e4SLinus Torvalds default: 3681da177e4SLinus Torvalds /* All others are translated to HOST_UNREACH. 3691da177e4SLinus Torvalds rfc2003 contains "deep thoughts" about NET_UNREACH, 3701da177e4SLinus Torvalds I believe they are just ether pollution. --ANK 3711da177e4SLinus Torvalds */ 3721da177e4SLinus Torvalds break; 3731da177e4SLinus Torvalds } 3741da177e4SLinus Torvalds break; 3751da177e4SLinus Torvalds case ICMP_TIME_EXCEEDED: 3761da177e4SLinus Torvalds if (code != ICMP_EXC_TTL) 3771da177e4SLinus Torvalds return; 3781da177e4SLinus Torvalds break; 3791da177e4SLinus Torvalds } 3801da177e4SLinus Torvalds 3811da177e4SLinus Torvalds read_lock(&ipgre_lock); 3823b4667f3SPavel Emelyanov t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr, 383f57e7d5aSPavel Emelyanov (flags&GRE_KEY) ? 384f57e7d5aSPavel Emelyanov *(((__be32*)p) + (grehlen>>2) - 1) : 0); 385f97c1e0cSJoe Perches if (t == NULL || t->parms.iph.daddr == 0 || 386f97c1e0cSJoe Perches ipv4_is_multicast(t->parms.iph.daddr)) 3871da177e4SLinus Torvalds goto out; 3881da177e4SLinus Torvalds 3891da177e4SLinus Torvalds if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) 3901da177e4SLinus Torvalds goto out; 3911da177e4SLinus Torvalds 3921da177e4SLinus Torvalds if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) 3931da177e4SLinus Torvalds t->err_count++; 3941da177e4SLinus Torvalds else 3951da177e4SLinus Torvalds t->err_count = 1; 3961da177e4SLinus Torvalds t->err_time = jiffies; 3971da177e4SLinus Torvalds out: 3981da177e4SLinus Torvalds read_unlock(&ipgre_lock); 3991da177e4SLinus Torvalds return; 4001da177e4SLinus Torvalds } 4011da177e4SLinus Torvalds 4021da177e4SLinus Torvalds static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) 4031da177e4SLinus Torvalds { 4041da177e4SLinus Torvalds if (INET_ECN_is_ce(iph->tos)) { 4051da177e4SLinus Torvalds if (skb->protocol == htons(ETH_P_IP)) { 406eddc9ec5SArnaldo Carvalho de Melo IP_ECN_set_ce(ip_hdr(skb)); 4071da177e4SLinus Torvalds } else if (skb->protocol == htons(ETH_P_IPV6)) { 4080660e03fSArnaldo Carvalho de Melo IP6_ECN_set_ce(ipv6_hdr(skb)); 4091da177e4SLinus Torvalds } 4101da177e4SLinus Torvalds } 4111da177e4SLinus Torvalds } 4121da177e4SLinus Torvalds 4131da177e4SLinus Torvalds static inline u8 4141da177e4SLinus Torvalds ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb) 4151da177e4SLinus Torvalds { 4161da177e4SLinus Torvalds u8 inner = 0; 4171da177e4SLinus Torvalds if (skb->protocol == htons(ETH_P_IP)) 4181da177e4SLinus Torvalds inner = old_iph->tos; 4191da177e4SLinus Torvalds else if (skb->protocol == htons(ETH_P_IPV6)) 4201da177e4SLinus Torvalds inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph); 4211da177e4SLinus Torvalds return INET_ECN_encapsulate(tos, inner); 4221da177e4SLinus Torvalds } 4231da177e4SLinus Torvalds 4241da177e4SLinus Torvalds static int ipgre_rcv(struct sk_buff *skb) 4251da177e4SLinus Torvalds { 4261da177e4SLinus Torvalds struct iphdr *iph; 4271da177e4SLinus Torvalds u8 *h; 428d5a0a1e3SAl Viro __be16 flags; 429d3bc23e7SAl Viro __sum16 csum = 0; 430d5a0a1e3SAl Viro __be32 key = 0; 4311da177e4SLinus Torvalds u32 seqno = 0; 4321da177e4SLinus Torvalds struct ip_tunnel *tunnel; 4331da177e4SLinus Torvalds int offset = 4; 4341da177e4SLinus Torvalds 4351da177e4SLinus Torvalds if (!pskb_may_pull(skb, 16)) 4361da177e4SLinus Torvalds goto drop_nolock; 4371da177e4SLinus Torvalds 438eddc9ec5SArnaldo Carvalho de Melo iph = ip_hdr(skb); 4391da177e4SLinus Torvalds h = skb->data; 440d5a0a1e3SAl Viro flags = *(__be16*)h; 4411da177e4SLinus Torvalds 4421da177e4SLinus Torvalds if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) { 4431da177e4SLinus Torvalds /* - Version must be 0. 4441da177e4SLinus Torvalds - We do not support routing headers. 4451da177e4SLinus Torvalds */ 4461da177e4SLinus Torvalds if (flags&(GRE_VERSION|GRE_ROUTING)) 4471da177e4SLinus Torvalds goto drop_nolock; 4481da177e4SLinus Torvalds 4491da177e4SLinus Torvalds if (flags&GRE_CSUM) { 450fb286bb2SHerbert Xu switch (skb->ip_summed) { 45184fa7933SPatrick McHardy case CHECKSUM_COMPLETE: 452d3bc23e7SAl Viro csum = csum_fold(skb->csum); 453fb286bb2SHerbert Xu if (!csum) 454fb286bb2SHerbert Xu break; 455fb286bb2SHerbert Xu /* fall through */ 456fb286bb2SHerbert Xu case CHECKSUM_NONE: 457fb286bb2SHerbert Xu skb->csum = 0; 458fb286bb2SHerbert Xu csum = __skb_checksum_complete(skb); 45984fa7933SPatrick McHardy skb->ip_summed = CHECKSUM_COMPLETE; 4601da177e4SLinus Torvalds } 4611da177e4SLinus Torvalds offset += 4; 4621da177e4SLinus Torvalds } 4631da177e4SLinus Torvalds if (flags&GRE_KEY) { 464d5a0a1e3SAl Viro key = *(__be32*)(h + offset); 4651da177e4SLinus Torvalds offset += 4; 4661da177e4SLinus Torvalds } 4671da177e4SLinus Torvalds if (flags&GRE_SEQ) { 468d5a0a1e3SAl Viro seqno = ntohl(*(__be32*)(h + offset)); 4691da177e4SLinus Torvalds offset += 4; 4701da177e4SLinus Torvalds } 4711da177e4SLinus Torvalds } 4721da177e4SLinus Torvalds 4731da177e4SLinus Torvalds read_lock(&ipgre_lock); 4743b4667f3SPavel Emelyanov if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev), 475f57e7d5aSPavel Emelyanov iph->saddr, iph->daddr, key)) != NULL) { 476addd68ebSPavel Emelyanov struct net_device_stats *stats = &tunnel->dev->stats; 477addd68ebSPavel Emelyanov 4781da177e4SLinus Torvalds secpath_reset(skb); 4791da177e4SLinus Torvalds 480d5a0a1e3SAl Viro skb->protocol = *(__be16*)(h + 2); 4811da177e4SLinus Torvalds /* WCCP version 1 and 2 protocol decoding. 4821da177e4SLinus Torvalds * - Change protocol to IP 4831da177e4SLinus Torvalds * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header 4841da177e4SLinus Torvalds */ 4851da177e4SLinus Torvalds if (flags == 0 && 486496c98dfSYOSHIFUJI Hideaki skb->protocol == htons(ETH_P_WCCP)) { 487496c98dfSYOSHIFUJI Hideaki skb->protocol = htons(ETH_P_IP); 4881da177e4SLinus Torvalds if ((*(h + offset) & 0xF0) != 0x40) 4891da177e4SLinus Torvalds offset += 4; 4901da177e4SLinus Torvalds } 4911da177e4SLinus Torvalds 4921d069167STimo Teras skb->mac_header = skb->network_header; 4934209fb60SArnaldo Carvalho de Melo __pskb_pull(skb, offset); 4944209fb60SArnaldo Carvalho de Melo skb_reset_network_header(skb); 4959c70220bSArnaldo Carvalho de Melo skb_postpull_rcsum(skb, skb_transport_header(skb), offset); 4961da177e4SLinus Torvalds skb->pkt_type = PACKET_HOST; 4971da177e4SLinus Torvalds #ifdef CONFIG_NET_IPGRE_BROADCAST 498f97c1e0cSJoe Perches if (ipv4_is_multicast(iph->daddr)) { 4991da177e4SLinus Torvalds /* Looped back packet, drop it! */ 500ee6b9673SEric Dumazet if (skb->rtable->fl.iif == 0) 5011da177e4SLinus Torvalds goto drop; 502addd68ebSPavel Emelyanov stats->multicast++; 5031da177e4SLinus Torvalds skb->pkt_type = PACKET_BROADCAST; 5041da177e4SLinus Torvalds } 5051da177e4SLinus Torvalds #endif 5061da177e4SLinus Torvalds 5071da177e4SLinus Torvalds if (((flags&GRE_CSUM) && csum) || 5081da177e4SLinus Torvalds (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { 509addd68ebSPavel Emelyanov stats->rx_crc_errors++; 510addd68ebSPavel Emelyanov stats->rx_errors++; 5111da177e4SLinus Torvalds goto drop; 5121da177e4SLinus Torvalds } 5131da177e4SLinus Torvalds if (tunnel->parms.i_flags&GRE_SEQ) { 5141da177e4SLinus Torvalds if (!(flags&GRE_SEQ) || 5151da177e4SLinus Torvalds (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) { 516addd68ebSPavel Emelyanov stats->rx_fifo_errors++; 517addd68ebSPavel Emelyanov stats->rx_errors++; 5181da177e4SLinus Torvalds goto drop; 5191da177e4SLinus Torvalds } 5201da177e4SLinus Torvalds tunnel->i_seqno = seqno + 1; 5211da177e4SLinus Torvalds } 522addd68ebSPavel Emelyanov stats->rx_packets++; 523addd68ebSPavel Emelyanov stats->rx_bytes += skb->len; 5241da177e4SLinus Torvalds skb->dev = tunnel->dev; 5251da177e4SLinus Torvalds dst_release(skb->dst); 5261da177e4SLinus Torvalds skb->dst = NULL; 5271da177e4SLinus Torvalds nf_reset(skb); 5281da177e4SLinus Torvalds ipgre_ecn_decapsulate(iph, skb); 5291da177e4SLinus Torvalds netif_rx(skb); 5301da177e4SLinus Torvalds read_unlock(&ipgre_lock); 5311da177e4SLinus Torvalds return(0); 5321da177e4SLinus Torvalds } 53345af08beSHerbert Xu icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 5341da177e4SLinus Torvalds 5351da177e4SLinus Torvalds drop: 5361da177e4SLinus Torvalds read_unlock(&ipgre_lock); 5371da177e4SLinus Torvalds drop_nolock: 5381da177e4SLinus Torvalds kfree_skb(skb); 5391da177e4SLinus Torvalds return(0); 5401da177e4SLinus Torvalds } 5411da177e4SLinus Torvalds 5421da177e4SLinus Torvalds static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 5431da177e4SLinus Torvalds { 5442941a486SPatrick McHardy struct ip_tunnel *tunnel = netdev_priv(dev); 545addd68ebSPavel Emelyanov struct net_device_stats *stats = &tunnel->dev->stats; 546eddc9ec5SArnaldo Carvalho de Melo struct iphdr *old_iph = ip_hdr(skb); 5471da177e4SLinus Torvalds struct iphdr *tiph; 5481da177e4SLinus Torvalds u8 tos; 549d5a0a1e3SAl Viro __be16 df; 5501da177e4SLinus Torvalds struct rtable *rt; /* Route to the other host */ 5511da177e4SLinus Torvalds struct net_device *tdev; /* Device to other host */ 5521da177e4SLinus Torvalds struct iphdr *iph; /* Our new IP header */ 553c2636b4dSChuck Lever unsigned int max_headroom; /* The extra header space needed */ 5541da177e4SLinus Torvalds int gre_hlen; 555d5a0a1e3SAl Viro __be32 dst; 5561da177e4SLinus Torvalds int mtu; 5571da177e4SLinus Torvalds 5581da177e4SLinus Torvalds if (tunnel->recursion++) { 559addd68ebSPavel Emelyanov stats->collisions++; 5601da177e4SLinus Torvalds goto tx_error; 5611da177e4SLinus Torvalds } 5621da177e4SLinus Torvalds 5633b04dddeSStephen Hemminger if (dev->header_ops) { 5641da177e4SLinus Torvalds gre_hlen = 0; 5651da177e4SLinus Torvalds tiph = (struct iphdr*)skb->data; 5661da177e4SLinus Torvalds } else { 5671da177e4SLinus Torvalds gre_hlen = tunnel->hlen; 5681da177e4SLinus Torvalds tiph = &tunnel->parms.iph; 5691da177e4SLinus Torvalds } 5701da177e4SLinus Torvalds 5711da177e4SLinus Torvalds if ((dst = tiph->daddr) == 0) { 5721da177e4SLinus Torvalds /* NBMA tunnel */ 5731da177e4SLinus Torvalds 5741da177e4SLinus Torvalds if (skb->dst == NULL) { 575addd68ebSPavel Emelyanov stats->tx_fifo_errors++; 5761da177e4SLinus Torvalds goto tx_error; 5771da177e4SLinus Torvalds } 5781da177e4SLinus Torvalds 5791da177e4SLinus Torvalds if (skb->protocol == htons(ETH_P_IP)) { 580ee6b9673SEric Dumazet rt = skb->rtable; 5811da177e4SLinus Torvalds if ((dst = rt->rt_gateway) == 0) 5821da177e4SLinus Torvalds goto tx_error_icmp; 5831da177e4SLinus Torvalds } 5841da177e4SLinus Torvalds #ifdef CONFIG_IPV6 5851da177e4SLinus Torvalds else if (skb->protocol == htons(ETH_P_IPV6)) { 5861da177e4SLinus Torvalds struct in6_addr *addr6; 5871da177e4SLinus Torvalds int addr_type; 5881da177e4SLinus Torvalds struct neighbour *neigh = skb->dst->neighbour; 5891da177e4SLinus Torvalds 5901da177e4SLinus Torvalds if (neigh == NULL) 5911da177e4SLinus Torvalds goto tx_error; 5921da177e4SLinus Torvalds 5931da177e4SLinus Torvalds addr6 = (struct in6_addr*)&neigh->primary_key; 5941da177e4SLinus Torvalds addr_type = ipv6_addr_type(addr6); 5951da177e4SLinus Torvalds 5961da177e4SLinus Torvalds if (addr_type == IPV6_ADDR_ANY) { 5970660e03fSArnaldo Carvalho de Melo addr6 = &ipv6_hdr(skb)->daddr; 5981da177e4SLinus Torvalds addr_type = ipv6_addr_type(addr6); 5991da177e4SLinus Torvalds } 6001da177e4SLinus Torvalds 6011da177e4SLinus Torvalds if ((addr_type & IPV6_ADDR_COMPATv4) == 0) 6021da177e4SLinus Torvalds goto tx_error_icmp; 6031da177e4SLinus Torvalds 6041da177e4SLinus Torvalds dst = addr6->s6_addr32[3]; 6051da177e4SLinus Torvalds } 6061da177e4SLinus Torvalds #endif 6071da177e4SLinus Torvalds else 6081da177e4SLinus Torvalds goto tx_error; 6091da177e4SLinus Torvalds } 6101da177e4SLinus Torvalds 6111da177e4SLinus Torvalds tos = tiph->tos; 6121da177e4SLinus Torvalds if (tos&1) { 6131da177e4SLinus Torvalds if (skb->protocol == htons(ETH_P_IP)) 6141da177e4SLinus Torvalds tos = old_iph->tos; 6151da177e4SLinus Torvalds tos &= ~1; 6161da177e4SLinus Torvalds } 6171da177e4SLinus Torvalds 6181da177e4SLinus Torvalds { 6191da177e4SLinus Torvalds struct flowi fl = { .oif = tunnel->parms.link, 6201da177e4SLinus Torvalds .nl_u = { .ip4_u = 6211da177e4SLinus Torvalds { .daddr = dst, 6221da177e4SLinus Torvalds .saddr = tiph->saddr, 6231da177e4SLinus Torvalds .tos = RT_TOS(tos) } }, 6241da177e4SLinus Torvalds .proto = IPPROTO_GRE }; 62596635522SPavel Emelyanov if (ip_route_output_key(dev_net(dev), &rt, &fl)) { 626addd68ebSPavel Emelyanov stats->tx_carrier_errors++; 6271da177e4SLinus Torvalds goto tx_error; 6281da177e4SLinus Torvalds } 6291da177e4SLinus Torvalds } 6301da177e4SLinus Torvalds tdev = rt->u.dst.dev; 6311da177e4SLinus Torvalds 6321da177e4SLinus Torvalds if (tdev == dev) { 6331da177e4SLinus Torvalds ip_rt_put(rt); 634addd68ebSPavel Emelyanov stats->collisions++; 6351da177e4SLinus Torvalds goto tx_error; 6361da177e4SLinus Torvalds } 6371da177e4SLinus Torvalds 6381da177e4SLinus Torvalds df = tiph->frag_off; 6391da177e4SLinus Torvalds if (df) 640*c95b819aSHerbert Xu mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen; 6411da177e4SLinus Torvalds else 6421da177e4SLinus Torvalds mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu; 6431da177e4SLinus Torvalds 6441da177e4SLinus Torvalds if (skb->dst) 6451da177e4SLinus Torvalds skb->dst->ops->update_pmtu(skb->dst, mtu); 6461da177e4SLinus Torvalds 6471da177e4SLinus Torvalds if (skb->protocol == htons(ETH_P_IP)) { 6481da177e4SLinus Torvalds df |= (old_iph->frag_off&htons(IP_DF)); 6491da177e4SLinus Torvalds 6501da177e4SLinus Torvalds if ((old_iph->frag_off&htons(IP_DF)) && 6511da177e4SLinus Torvalds mtu < ntohs(old_iph->tot_len)) { 6521da177e4SLinus Torvalds icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 6531da177e4SLinus Torvalds ip_rt_put(rt); 6541da177e4SLinus Torvalds goto tx_error; 6551da177e4SLinus Torvalds } 6561da177e4SLinus Torvalds } 6571da177e4SLinus Torvalds #ifdef CONFIG_IPV6 6581da177e4SLinus Torvalds else if (skb->protocol == htons(ETH_P_IPV6)) { 6591da177e4SLinus Torvalds struct rt6_info *rt6 = (struct rt6_info*)skb->dst; 6601da177e4SLinus Torvalds 6611da177e4SLinus Torvalds if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) { 662f97c1e0cSJoe Perches if ((tunnel->parms.iph.daddr && 663f97c1e0cSJoe Perches !ipv4_is_multicast(tunnel->parms.iph.daddr)) || 6641da177e4SLinus Torvalds rt6->rt6i_dst.plen == 128) { 6651da177e4SLinus Torvalds rt6->rt6i_flags |= RTF_MODIFIED; 6661da177e4SLinus Torvalds skb->dst->metrics[RTAX_MTU-1] = mtu; 6671da177e4SLinus Torvalds } 6681da177e4SLinus Torvalds } 6691da177e4SLinus Torvalds 6701da177e4SLinus Torvalds if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) { 6711da177e4SLinus Torvalds icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); 6721da177e4SLinus Torvalds ip_rt_put(rt); 6731da177e4SLinus Torvalds goto tx_error; 6741da177e4SLinus Torvalds } 6751da177e4SLinus Torvalds } 6761da177e4SLinus Torvalds #endif 6771da177e4SLinus Torvalds 6781da177e4SLinus Torvalds if (tunnel->err_count > 0) { 6791da177e4SLinus Torvalds if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { 6801da177e4SLinus Torvalds tunnel->err_count--; 6811da177e4SLinus Torvalds 6821da177e4SLinus Torvalds dst_link_failure(skb); 6831da177e4SLinus Torvalds } else 6841da177e4SLinus Torvalds tunnel->err_count = 0; 6851da177e4SLinus Torvalds } 6861da177e4SLinus Torvalds 6871da177e4SLinus Torvalds max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen; 6881da177e4SLinus Torvalds 689cfbba49dSPatrick McHardy if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| 690cfbba49dSPatrick McHardy (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { 6911da177e4SLinus Torvalds struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); 6921da177e4SLinus Torvalds if (!new_skb) { 6931da177e4SLinus Torvalds ip_rt_put(rt); 6941da177e4SLinus Torvalds stats->tx_dropped++; 6951da177e4SLinus Torvalds dev_kfree_skb(skb); 6961da177e4SLinus Torvalds tunnel->recursion--; 6971da177e4SLinus Torvalds return 0; 6981da177e4SLinus Torvalds } 6991da177e4SLinus Torvalds if (skb->sk) 7001da177e4SLinus Torvalds skb_set_owner_w(new_skb, skb->sk); 7011da177e4SLinus Torvalds dev_kfree_skb(skb); 7021da177e4SLinus Torvalds skb = new_skb; 703eddc9ec5SArnaldo Carvalho de Melo old_iph = ip_hdr(skb); 7041da177e4SLinus Torvalds } 7051da177e4SLinus Torvalds 706b0e380b1SArnaldo Carvalho de Melo skb->transport_header = skb->network_header; 707e2d1bca7SArnaldo Carvalho de Melo skb_push(skb, gre_hlen); 708e2d1bca7SArnaldo Carvalho de Melo skb_reset_network_header(skb); 7091da177e4SLinus Torvalds memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 71048d5cad8SPatrick McHardy IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | 71148d5cad8SPatrick McHardy IPSKB_REROUTED); 7121da177e4SLinus Torvalds dst_release(skb->dst); 7131da177e4SLinus Torvalds skb->dst = &rt->u.dst; 7141da177e4SLinus Torvalds 7151da177e4SLinus Torvalds /* 7161da177e4SLinus Torvalds * Push down and install the IPIP header. 7171da177e4SLinus Torvalds */ 7181da177e4SLinus Torvalds 719eddc9ec5SArnaldo Carvalho de Melo iph = ip_hdr(skb); 7201da177e4SLinus Torvalds iph->version = 4; 7211da177e4SLinus Torvalds iph->ihl = sizeof(struct iphdr) >> 2; 7221da177e4SLinus Torvalds iph->frag_off = df; 7231da177e4SLinus Torvalds iph->protocol = IPPROTO_GRE; 7241da177e4SLinus Torvalds iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb); 7251da177e4SLinus Torvalds iph->daddr = rt->rt_dst; 7261da177e4SLinus Torvalds iph->saddr = rt->rt_src; 7271da177e4SLinus Torvalds 7281da177e4SLinus Torvalds if ((iph->ttl = tiph->ttl) == 0) { 7291da177e4SLinus Torvalds if (skb->protocol == htons(ETH_P_IP)) 7301da177e4SLinus Torvalds iph->ttl = old_iph->ttl; 7311da177e4SLinus Torvalds #ifdef CONFIG_IPV6 7321da177e4SLinus Torvalds else if (skb->protocol == htons(ETH_P_IPV6)) 7331da177e4SLinus Torvalds iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit; 7341da177e4SLinus Torvalds #endif 7351da177e4SLinus Torvalds else 7361da177e4SLinus Torvalds iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT); 7371da177e4SLinus Torvalds } 7381da177e4SLinus Torvalds 739d5a0a1e3SAl Viro ((__be16*)(iph+1))[0] = tunnel->parms.o_flags; 740d5a0a1e3SAl Viro ((__be16*)(iph+1))[1] = skb->protocol; 7411da177e4SLinus Torvalds 7421da177e4SLinus Torvalds if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { 743d5a0a1e3SAl Viro __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4); 7441da177e4SLinus Torvalds 7451da177e4SLinus Torvalds if (tunnel->parms.o_flags&GRE_SEQ) { 7461da177e4SLinus Torvalds ++tunnel->o_seqno; 7471da177e4SLinus Torvalds *ptr = htonl(tunnel->o_seqno); 7481da177e4SLinus Torvalds ptr--; 7491da177e4SLinus Torvalds } 7501da177e4SLinus Torvalds if (tunnel->parms.o_flags&GRE_KEY) { 7511da177e4SLinus Torvalds *ptr = tunnel->parms.o_key; 7521da177e4SLinus Torvalds ptr--; 7531da177e4SLinus Torvalds } 7541da177e4SLinus Torvalds if (tunnel->parms.o_flags&GRE_CSUM) { 7551da177e4SLinus Torvalds *ptr = 0; 7565f92a738SAl Viro *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr)); 7571da177e4SLinus Torvalds } 7581da177e4SLinus Torvalds } 7591da177e4SLinus Torvalds 7601da177e4SLinus Torvalds nf_reset(skb); 7611da177e4SLinus Torvalds 7621da177e4SLinus Torvalds IPTUNNEL_XMIT(); 7631da177e4SLinus Torvalds tunnel->recursion--; 7641da177e4SLinus Torvalds return 0; 7651da177e4SLinus Torvalds 7661da177e4SLinus Torvalds tx_error_icmp: 7671da177e4SLinus Torvalds dst_link_failure(skb); 7681da177e4SLinus Torvalds 7691da177e4SLinus Torvalds tx_error: 7701da177e4SLinus Torvalds stats->tx_errors++; 7711da177e4SLinus Torvalds dev_kfree_skb(skb); 7721da177e4SLinus Torvalds tunnel->recursion--; 7731da177e4SLinus Torvalds return 0; 7741da177e4SLinus Torvalds } 7751da177e4SLinus Torvalds 776ee34c1ebSMichal Schmidt static void ipgre_tunnel_bind_dev(struct net_device *dev) 777ee34c1ebSMichal Schmidt { 778ee34c1ebSMichal Schmidt struct net_device *tdev = NULL; 779ee34c1ebSMichal Schmidt struct ip_tunnel *tunnel; 780ee34c1ebSMichal Schmidt struct iphdr *iph; 781ee34c1ebSMichal Schmidt int hlen = LL_MAX_HEADER; 782ee34c1ebSMichal Schmidt int mtu = ETH_DATA_LEN; 783ee34c1ebSMichal Schmidt int addend = sizeof(struct iphdr) + 4; 784ee34c1ebSMichal Schmidt 785ee34c1ebSMichal Schmidt tunnel = netdev_priv(dev); 786ee34c1ebSMichal Schmidt iph = &tunnel->parms.iph; 787ee34c1ebSMichal Schmidt 788*c95b819aSHerbert Xu /* Guess output device to choose reasonable mtu and needed_headroom */ 789ee34c1ebSMichal Schmidt 790ee34c1ebSMichal Schmidt if (iph->daddr) { 791ee34c1ebSMichal Schmidt struct flowi fl = { .oif = tunnel->parms.link, 792ee34c1ebSMichal Schmidt .nl_u = { .ip4_u = 793ee34c1ebSMichal Schmidt { .daddr = iph->daddr, 794ee34c1ebSMichal Schmidt .saddr = iph->saddr, 795ee34c1ebSMichal Schmidt .tos = RT_TOS(iph->tos) } }, 796ee34c1ebSMichal Schmidt .proto = IPPROTO_GRE }; 797ee34c1ebSMichal Schmidt struct rtable *rt; 79896635522SPavel Emelyanov if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { 799ee34c1ebSMichal Schmidt tdev = rt->u.dst.dev; 800ee34c1ebSMichal Schmidt ip_rt_put(rt); 801ee34c1ebSMichal Schmidt } 802ee34c1ebSMichal Schmidt dev->flags |= IFF_POINTOPOINT; 803ee34c1ebSMichal Schmidt } 804ee34c1ebSMichal Schmidt 805ee34c1ebSMichal Schmidt if (!tdev && tunnel->parms.link) 80696635522SPavel Emelyanov tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); 807ee34c1ebSMichal Schmidt 808ee34c1ebSMichal Schmidt if (tdev) { 809*c95b819aSHerbert Xu hlen = tdev->hard_header_len + tdev->needed_headroom; 810ee34c1ebSMichal Schmidt mtu = tdev->mtu; 811ee34c1ebSMichal Schmidt } 812ee34c1ebSMichal Schmidt dev->iflink = tunnel->parms.link; 813ee34c1ebSMichal Schmidt 814ee34c1ebSMichal Schmidt /* Precalculate GRE options length */ 815ee34c1ebSMichal Schmidt if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) { 816ee34c1ebSMichal Schmidt if (tunnel->parms.o_flags&GRE_CSUM) 817ee34c1ebSMichal Schmidt addend += 4; 818ee34c1ebSMichal Schmidt if (tunnel->parms.o_flags&GRE_KEY) 819ee34c1ebSMichal Schmidt addend += 4; 820ee34c1ebSMichal Schmidt if (tunnel->parms.o_flags&GRE_SEQ) 821ee34c1ebSMichal Schmidt addend += 4; 822ee34c1ebSMichal Schmidt } 823*c95b819aSHerbert Xu dev->needed_headroom = addend + hlen; 824*c95b819aSHerbert Xu dev->mtu = mtu - dev->hard_header_len - addend; 825ee34c1ebSMichal Schmidt tunnel->hlen = addend; 826ee34c1ebSMichal Schmidt 827ee34c1ebSMichal Schmidt } 828ee34c1ebSMichal Schmidt 8291da177e4SLinus Torvalds static int 8301da177e4SLinus Torvalds ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) 8311da177e4SLinus Torvalds { 8321da177e4SLinus Torvalds int err = 0; 8331da177e4SLinus Torvalds struct ip_tunnel_parm p; 8341da177e4SLinus Torvalds struct ip_tunnel *t; 835f57e7d5aSPavel Emelyanov struct net *net = dev_net(dev); 836f57e7d5aSPavel Emelyanov struct ipgre_net *ign = net_generic(net, ipgre_net_id); 8371da177e4SLinus Torvalds 8381da177e4SLinus Torvalds switch (cmd) { 8391da177e4SLinus Torvalds case SIOCGETTUNNEL: 8401da177e4SLinus Torvalds t = NULL; 8417daa0004SPavel Emelyanov if (dev == ign->fb_tunnel_dev) { 8421da177e4SLinus Torvalds if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { 8431da177e4SLinus Torvalds err = -EFAULT; 8441da177e4SLinus Torvalds break; 8451da177e4SLinus Torvalds } 846f57e7d5aSPavel Emelyanov t = ipgre_tunnel_locate(net, &p, 0); 8471da177e4SLinus Torvalds } 8481da177e4SLinus Torvalds if (t == NULL) 8492941a486SPatrick McHardy t = netdev_priv(dev); 8501da177e4SLinus Torvalds memcpy(&p, &t->parms, sizeof(p)); 8511da177e4SLinus Torvalds if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) 8521da177e4SLinus Torvalds err = -EFAULT; 8531da177e4SLinus Torvalds break; 8541da177e4SLinus Torvalds 8551da177e4SLinus Torvalds case SIOCADDTUNNEL: 8561da177e4SLinus Torvalds case SIOCCHGTUNNEL: 8571da177e4SLinus Torvalds err = -EPERM; 8581da177e4SLinus Torvalds if (!capable(CAP_NET_ADMIN)) 8591da177e4SLinus Torvalds goto done; 8601da177e4SLinus Torvalds 8611da177e4SLinus Torvalds err = -EFAULT; 8621da177e4SLinus Torvalds if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 8631da177e4SLinus Torvalds goto done; 8641da177e4SLinus Torvalds 8651da177e4SLinus Torvalds err = -EINVAL; 8661da177e4SLinus Torvalds if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || 8671da177e4SLinus Torvalds p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || 8681da177e4SLinus Torvalds ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) 8691da177e4SLinus Torvalds goto done; 8701da177e4SLinus Torvalds if (p.iph.ttl) 8711da177e4SLinus Torvalds p.iph.frag_off |= htons(IP_DF); 8721da177e4SLinus Torvalds 8731da177e4SLinus Torvalds if (!(p.i_flags&GRE_KEY)) 8741da177e4SLinus Torvalds p.i_key = 0; 8751da177e4SLinus Torvalds if (!(p.o_flags&GRE_KEY)) 8761da177e4SLinus Torvalds p.o_key = 0; 8771da177e4SLinus Torvalds 878f57e7d5aSPavel Emelyanov t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); 8791da177e4SLinus Torvalds 8807daa0004SPavel Emelyanov if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { 8811da177e4SLinus Torvalds if (t != NULL) { 8821da177e4SLinus Torvalds if (t->dev != dev) { 8831da177e4SLinus Torvalds err = -EEXIST; 8841da177e4SLinus Torvalds break; 8851da177e4SLinus Torvalds } 8861da177e4SLinus Torvalds } else { 8871da177e4SLinus Torvalds unsigned nflags=0; 8881da177e4SLinus Torvalds 8892941a486SPatrick McHardy t = netdev_priv(dev); 8901da177e4SLinus Torvalds 891f97c1e0cSJoe Perches if (ipv4_is_multicast(p.iph.daddr)) 8921da177e4SLinus Torvalds nflags = IFF_BROADCAST; 8931da177e4SLinus Torvalds else if (p.iph.daddr) 8941da177e4SLinus Torvalds nflags = IFF_POINTOPOINT; 8951da177e4SLinus Torvalds 8961da177e4SLinus Torvalds if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { 8971da177e4SLinus Torvalds err = -EINVAL; 8981da177e4SLinus Torvalds break; 8991da177e4SLinus Torvalds } 900f57e7d5aSPavel Emelyanov ipgre_tunnel_unlink(ign, t); 9011da177e4SLinus Torvalds t->parms.iph.saddr = p.iph.saddr; 9021da177e4SLinus Torvalds t->parms.iph.daddr = p.iph.daddr; 9031da177e4SLinus Torvalds t->parms.i_key = p.i_key; 9041da177e4SLinus Torvalds t->parms.o_key = p.o_key; 9051da177e4SLinus Torvalds memcpy(dev->dev_addr, &p.iph.saddr, 4); 9061da177e4SLinus Torvalds memcpy(dev->broadcast, &p.iph.daddr, 4); 907f57e7d5aSPavel Emelyanov ipgre_tunnel_link(ign, t); 9081da177e4SLinus Torvalds netdev_state_change(dev); 9091da177e4SLinus Torvalds } 9101da177e4SLinus Torvalds } 9111da177e4SLinus Torvalds 9121da177e4SLinus Torvalds if (t) { 9131da177e4SLinus Torvalds err = 0; 9141da177e4SLinus Torvalds if (cmd == SIOCCHGTUNNEL) { 9151da177e4SLinus Torvalds t->parms.iph.ttl = p.iph.ttl; 9161da177e4SLinus Torvalds t->parms.iph.tos = p.iph.tos; 9171da177e4SLinus Torvalds t->parms.iph.frag_off = p.iph.frag_off; 918ee34c1ebSMichal Schmidt if (t->parms.link != p.link) { 919ee34c1ebSMichal Schmidt t->parms.link = p.link; 920ee34c1ebSMichal Schmidt ipgre_tunnel_bind_dev(dev); 921ee34c1ebSMichal Schmidt netdev_state_change(dev); 922ee34c1ebSMichal Schmidt } 9231da177e4SLinus Torvalds } 9241da177e4SLinus Torvalds if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) 9251da177e4SLinus Torvalds err = -EFAULT; 9261da177e4SLinus Torvalds } else 9271da177e4SLinus Torvalds err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); 9281da177e4SLinus Torvalds break; 9291da177e4SLinus Torvalds 9301da177e4SLinus Torvalds case SIOCDELTUNNEL: 9311da177e4SLinus Torvalds err = -EPERM; 9321da177e4SLinus Torvalds if (!capable(CAP_NET_ADMIN)) 9331da177e4SLinus Torvalds goto done; 9341da177e4SLinus Torvalds 9357daa0004SPavel Emelyanov if (dev == ign->fb_tunnel_dev) { 9361da177e4SLinus Torvalds err = -EFAULT; 9371da177e4SLinus Torvalds if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 9381da177e4SLinus Torvalds goto done; 9391da177e4SLinus Torvalds err = -ENOENT; 940f57e7d5aSPavel Emelyanov if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL) 9411da177e4SLinus Torvalds goto done; 9421da177e4SLinus Torvalds err = -EPERM; 9437daa0004SPavel Emelyanov if (t == netdev_priv(ign->fb_tunnel_dev)) 9441da177e4SLinus Torvalds goto done; 9451da177e4SLinus Torvalds dev = t->dev; 9461da177e4SLinus Torvalds } 94722f8cde5SStephen Hemminger unregister_netdevice(dev); 94822f8cde5SStephen Hemminger err = 0; 9491da177e4SLinus Torvalds break; 9501da177e4SLinus Torvalds 9511da177e4SLinus Torvalds default: 9521da177e4SLinus Torvalds err = -EINVAL; 9531da177e4SLinus Torvalds } 9541da177e4SLinus Torvalds 9551da177e4SLinus Torvalds done: 9561da177e4SLinus Torvalds return err; 9571da177e4SLinus Torvalds } 9581da177e4SLinus Torvalds 9591da177e4SLinus Torvalds static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) 9601da177e4SLinus Torvalds { 9612941a486SPatrick McHardy struct ip_tunnel *tunnel = netdev_priv(dev); 962*c95b819aSHerbert Xu if (new_mtu < 68 || 963*c95b819aSHerbert Xu new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen) 9641da177e4SLinus Torvalds return -EINVAL; 9651da177e4SLinus Torvalds dev->mtu = new_mtu; 9661da177e4SLinus Torvalds return 0; 9671da177e4SLinus Torvalds } 9681da177e4SLinus Torvalds 9691da177e4SLinus Torvalds /* Nice toy. Unfortunately, useless in real life :-) 9701da177e4SLinus Torvalds It allows to construct virtual multiprotocol broadcast "LAN" 9711da177e4SLinus Torvalds over the Internet, provided multicast routing is tuned. 9721da177e4SLinus Torvalds 9731da177e4SLinus Torvalds 9741da177e4SLinus Torvalds I have no idea was this bicycle invented before me, 9751da177e4SLinus Torvalds so that I had to set ARPHRD_IPGRE to a random value. 9761da177e4SLinus Torvalds I have an impression, that Cisco could make something similar, 9771da177e4SLinus Torvalds but this feature is apparently missing in IOS<=11.2(8). 9781da177e4SLinus Torvalds 9791da177e4SLinus Torvalds I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks 9801da177e4SLinus Torvalds with broadcast 224.66.66.66. If you have access to mbone, play with me :-) 9811da177e4SLinus Torvalds 9821da177e4SLinus Torvalds ping -t 255 224.66.66.66 9831da177e4SLinus Torvalds 9841da177e4SLinus Torvalds If nobody answers, mbone does not work. 9851da177e4SLinus Torvalds 9861da177e4SLinus Torvalds ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255 9871da177e4SLinus Torvalds ip addr add 10.66.66.<somewhat>/24 dev Universe 9881da177e4SLinus Torvalds ifconfig Universe up 9891da177e4SLinus Torvalds ifconfig Universe add fe80::<Your_real_addr>/10 9901da177e4SLinus Torvalds ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96 9911da177e4SLinus Torvalds ftp 10.66.66.66 9921da177e4SLinus Torvalds ... 9931da177e4SLinus Torvalds ftp fec0:6666:6666::193.233.7.65 9941da177e4SLinus Torvalds ... 9951da177e4SLinus Torvalds 9961da177e4SLinus Torvalds */ 9971da177e4SLinus Torvalds 9983b04dddeSStephen Hemminger static int ipgre_header(struct sk_buff *skb, struct net_device *dev, 9993b04dddeSStephen Hemminger unsigned short type, 10003b04dddeSStephen Hemminger const void *daddr, const void *saddr, unsigned len) 10011da177e4SLinus Torvalds { 10022941a486SPatrick McHardy struct ip_tunnel *t = netdev_priv(dev); 10031da177e4SLinus Torvalds struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); 1004d5a0a1e3SAl Viro __be16 *p = (__be16*)(iph+1); 10051da177e4SLinus Torvalds 10061da177e4SLinus Torvalds memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); 10071da177e4SLinus Torvalds p[0] = t->parms.o_flags; 10081da177e4SLinus Torvalds p[1] = htons(type); 10091da177e4SLinus Torvalds 10101da177e4SLinus Torvalds /* 10111da177e4SLinus Torvalds * Set the source hardware address. 10121da177e4SLinus Torvalds */ 10131da177e4SLinus Torvalds 10141da177e4SLinus Torvalds if (saddr) 10151da177e4SLinus Torvalds memcpy(&iph->saddr, saddr, 4); 10161da177e4SLinus Torvalds 10171da177e4SLinus Torvalds if (daddr) { 10181da177e4SLinus Torvalds memcpy(&iph->daddr, daddr, 4); 10191da177e4SLinus Torvalds return t->hlen; 10201da177e4SLinus Torvalds } 1021f97c1e0cSJoe Perches if (iph->daddr && !ipv4_is_multicast(iph->daddr)) 10221da177e4SLinus Torvalds return t->hlen; 10231da177e4SLinus Torvalds 10241da177e4SLinus Torvalds return -t->hlen; 10251da177e4SLinus Torvalds } 10261da177e4SLinus Torvalds 10276a5f44d7STimo Teras static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) 10286a5f44d7STimo Teras { 10296a5f44d7STimo Teras struct iphdr *iph = (struct iphdr*) skb_mac_header(skb); 10306a5f44d7STimo Teras memcpy(haddr, &iph->saddr, 4); 10316a5f44d7STimo Teras return 4; 10326a5f44d7STimo Teras } 10336a5f44d7STimo Teras 10343b04dddeSStephen Hemminger static const struct header_ops ipgre_header_ops = { 10353b04dddeSStephen Hemminger .create = ipgre_header, 10366a5f44d7STimo Teras .parse = ipgre_header_parse, 10373b04dddeSStephen Hemminger }; 10383b04dddeSStephen Hemminger 10396a5f44d7STimo Teras #ifdef CONFIG_NET_IPGRE_BROADCAST 10401da177e4SLinus Torvalds static int ipgre_open(struct net_device *dev) 10411da177e4SLinus Torvalds { 10422941a486SPatrick McHardy struct ip_tunnel *t = netdev_priv(dev); 10431da177e4SLinus Torvalds 1044f97c1e0cSJoe Perches if (ipv4_is_multicast(t->parms.iph.daddr)) { 10451da177e4SLinus Torvalds struct flowi fl = { .oif = t->parms.link, 10461da177e4SLinus Torvalds .nl_u = { .ip4_u = 10471da177e4SLinus Torvalds { .daddr = t->parms.iph.daddr, 10481da177e4SLinus Torvalds .saddr = t->parms.iph.saddr, 10491da177e4SLinus Torvalds .tos = RT_TOS(t->parms.iph.tos) } }, 10501da177e4SLinus Torvalds .proto = IPPROTO_GRE }; 10511da177e4SLinus Torvalds struct rtable *rt; 105296635522SPavel Emelyanov if (ip_route_output_key(dev_net(dev), &rt, &fl)) 10531da177e4SLinus Torvalds return -EADDRNOTAVAIL; 10541da177e4SLinus Torvalds dev = rt->u.dst.dev; 10551da177e4SLinus Torvalds ip_rt_put(rt); 1056e5ed6399SHerbert Xu if (__in_dev_get_rtnl(dev) == NULL) 10571da177e4SLinus Torvalds return -EADDRNOTAVAIL; 10581da177e4SLinus Torvalds t->mlink = dev->ifindex; 1059e5ed6399SHerbert Xu ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr); 10601da177e4SLinus Torvalds } 10611da177e4SLinus Torvalds return 0; 10621da177e4SLinus Torvalds } 10631da177e4SLinus Torvalds 10641da177e4SLinus Torvalds static int ipgre_close(struct net_device *dev) 10651da177e4SLinus Torvalds { 10662941a486SPatrick McHardy struct ip_tunnel *t = netdev_priv(dev); 1067f97c1e0cSJoe Perches if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { 10687fee0ca2SDenis V. Lunev struct in_device *in_dev; 1069c346dca1SYOSHIFUJI Hideaki in_dev = inetdev_by_index(dev_net(dev), t->mlink); 10701da177e4SLinus Torvalds if (in_dev) { 10711da177e4SLinus Torvalds ip_mc_dec_group(in_dev, t->parms.iph.daddr); 10721da177e4SLinus Torvalds in_dev_put(in_dev); 10731da177e4SLinus Torvalds } 10741da177e4SLinus Torvalds } 10751da177e4SLinus Torvalds return 0; 10761da177e4SLinus Torvalds } 10771da177e4SLinus Torvalds 10781da177e4SLinus Torvalds #endif 10791da177e4SLinus Torvalds 10801da177e4SLinus Torvalds static void ipgre_tunnel_setup(struct net_device *dev) 10811da177e4SLinus Torvalds { 10821da177e4SLinus Torvalds dev->uninit = ipgre_tunnel_uninit; 10831da177e4SLinus Torvalds dev->destructor = free_netdev; 10841da177e4SLinus Torvalds dev->hard_start_xmit = ipgre_tunnel_xmit; 10851da177e4SLinus Torvalds dev->do_ioctl = ipgre_tunnel_ioctl; 10861da177e4SLinus Torvalds dev->change_mtu = ipgre_tunnel_change_mtu; 10871da177e4SLinus Torvalds 10881da177e4SLinus Torvalds dev->type = ARPHRD_IPGRE; 1089*c95b819aSHerbert Xu dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; 109046f25dffSKris Katterjohn dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4; 10911da177e4SLinus Torvalds dev->flags = IFF_NOARP; 10921da177e4SLinus Torvalds dev->iflink = 0; 10931da177e4SLinus Torvalds dev->addr_len = 4; 10940b67ecebSPavel Emelyanov dev->features |= NETIF_F_NETNS_LOCAL; 10951da177e4SLinus Torvalds } 10961da177e4SLinus Torvalds 10971da177e4SLinus Torvalds static int ipgre_tunnel_init(struct net_device *dev) 10981da177e4SLinus Torvalds { 10991da177e4SLinus Torvalds struct ip_tunnel *tunnel; 11001da177e4SLinus Torvalds struct iphdr *iph; 11011da177e4SLinus Torvalds 11022941a486SPatrick McHardy tunnel = netdev_priv(dev); 11031da177e4SLinus Torvalds iph = &tunnel->parms.iph; 11041da177e4SLinus Torvalds 11051da177e4SLinus Torvalds tunnel->dev = dev; 11061da177e4SLinus Torvalds strcpy(tunnel->parms.name, dev->name); 11071da177e4SLinus Torvalds 11081da177e4SLinus Torvalds memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); 11091da177e4SLinus Torvalds memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 11101da177e4SLinus Torvalds 1111ee34c1ebSMichal Schmidt ipgre_tunnel_bind_dev(dev); 11121da177e4SLinus Torvalds 11131da177e4SLinus Torvalds if (iph->daddr) { 11141da177e4SLinus Torvalds #ifdef CONFIG_NET_IPGRE_BROADCAST 1115f97c1e0cSJoe Perches if (ipv4_is_multicast(iph->daddr)) { 11161da177e4SLinus Torvalds if (!iph->saddr) 11171da177e4SLinus Torvalds return -EINVAL; 11181da177e4SLinus Torvalds dev->flags = IFF_BROADCAST; 11193b04dddeSStephen Hemminger dev->header_ops = &ipgre_header_ops; 11201da177e4SLinus Torvalds dev->open = ipgre_open; 11211da177e4SLinus Torvalds dev->stop = ipgre_close; 11221da177e4SLinus Torvalds } 11231da177e4SLinus Torvalds #endif 1124ee34c1ebSMichal Schmidt } else 11256a5f44d7STimo Teras dev->header_ops = &ipgre_header_ops; 11261da177e4SLinus Torvalds 11271da177e4SLinus Torvalds return 0; 11281da177e4SLinus Torvalds } 11291da177e4SLinus Torvalds 11307daa0004SPavel Emelyanov static int ipgre_fb_tunnel_init(struct net_device *dev) 11311da177e4SLinus Torvalds { 11322941a486SPatrick McHardy struct ip_tunnel *tunnel = netdev_priv(dev); 11331da177e4SLinus Torvalds struct iphdr *iph = &tunnel->parms.iph; 1134eb8ce741SPavel Emelyanov struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id); 11351da177e4SLinus Torvalds 11361da177e4SLinus Torvalds tunnel->dev = dev; 11371da177e4SLinus Torvalds strcpy(tunnel->parms.name, dev->name); 11381da177e4SLinus Torvalds 11391da177e4SLinus Torvalds iph->version = 4; 11401da177e4SLinus Torvalds iph->protocol = IPPROTO_GRE; 11411da177e4SLinus Torvalds iph->ihl = 5; 11421da177e4SLinus Torvalds tunnel->hlen = sizeof(struct iphdr) + 4; 11431da177e4SLinus Torvalds 11441da177e4SLinus Torvalds dev_hold(dev); 1145eb8ce741SPavel Emelyanov ign->tunnels_wc[0] = tunnel; 11461da177e4SLinus Torvalds return 0; 11471da177e4SLinus Torvalds } 11481da177e4SLinus Torvalds 11491da177e4SLinus Torvalds 11501da177e4SLinus Torvalds static struct net_protocol ipgre_protocol = { 11511da177e4SLinus Torvalds .handler = ipgre_rcv, 11521da177e4SLinus Torvalds .err_handler = ipgre_err, 1153f96c148fSPavel Emelyanov .netns_ok = 1, 11541da177e4SLinus Torvalds }; 11551da177e4SLinus Torvalds 1156eb8ce741SPavel Emelyanov static void ipgre_destroy_tunnels(struct ipgre_net *ign) 1157eb8ce741SPavel Emelyanov { 1158eb8ce741SPavel Emelyanov int prio; 1159eb8ce741SPavel Emelyanov 1160eb8ce741SPavel Emelyanov for (prio = 0; prio < 4; prio++) { 1161eb8ce741SPavel Emelyanov int h; 1162eb8ce741SPavel Emelyanov for (h = 0; h < HASH_SIZE; h++) { 1163eb8ce741SPavel Emelyanov struct ip_tunnel *t; 1164eb8ce741SPavel Emelyanov while ((t = ign->tunnels[prio][h]) != NULL) 1165eb8ce741SPavel Emelyanov unregister_netdevice(t->dev); 1166eb8ce741SPavel Emelyanov } 1167eb8ce741SPavel Emelyanov } 1168eb8ce741SPavel Emelyanov } 1169eb8ce741SPavel Emelyanov 117059a4c759SPavel Emelyanov static int ipgre_init_net(struct net *net) 117159a4c759SPavel Emelyanov { 117259a4c759SPavel Emelyanov int err; 117359a4c759SPavel Emelyanov struct ipgre_net *ign; 117459a4c759SPavel Emelyanov 117559a4c759SPavel Emelyanov err = -ENOMEM; 1176eb8ce741SPavel Emelyanov ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL); 117759a4c759SPavel Emelyanov if (ign == NULL) 117859a4c759SPavel Emelyanov goto err_alloc; 117959a4c759SPavel Emelyanov 118059a4c759SPavel Emelyanov err = net_assign_generic(net, ipgre_net_id, ign); 118159a4c759SPavel Emelyanov if (err < 0) 118259a4c759SPavel Emelyanov goto err_assign; 118359a4c759SPavel Emelyanov 11847daa0004SPavel Emelyanov ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0", 11857daa0004SPavel Emelyanov ipgre_tunnel_setup); 11867daa0004SPavel Emelyanov if (!ign->fb_tunnel_dev) { 11877daa0004SPavel Emelyanov err = -ENOMEM; 11887daa0004SPavel Emelyanov goto err_alloc_dev; 11897daa0004SPavel Emelyanov } 11907daa0004SPavel Emelyanov 11917daa0004SPavel Emelyanov ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init; 11927daa0004SPavel Emelyanov dev_net_set(ign->fb_tunnel_dev, net); 11937daa0004SPavel Emelyanov 11947daa0004SPavel Emelyanov if ((err = register_netdev(ign->fb_tunnel_dev))) 11957daa0004SPavel Emelyanov goto err_reg_dev; 11967daa0004SPavel Emelyanov 119759a4c759SPavel Emelyanov return 0; 119859a4c759SPavel Emelyanov 11997daa0004SPavel Emelyanov err_reg_dev: 12007daa0004SPavel Emelyanov free_netdev(ign->fb_tunnel_dev); 12017daa0004SPavel Emelyanov err_alloc_dev: 12027daa0004SPavel Emelyanov /* nothing */ 120359a4c759SPavel Emelyanov err_assign: 120459a4c759SPavel Emelyanov kfree(ign); 120559a4c759SPavel Emelyanov err_alloc: 120659a4c759SPavel Emelyanov return err; 120759a4c759SPavel Emelyanov } 120859a4c759SPavel Emelyanov 120959a4c759SPavel Emelyanov static void ipgre_exit_net(struct net *net) 121059a4c759SPavel Emelyanov { 121159a4c759SPavel Emelyanov struct ipgre_net *ign; 121259a4c759SPavel Emelyanov 121359a4c759SPavel Emelyanov ign = net_generic(net, ipgre_net_id); 12147daa0004SPavel Emelyanov rtnl_lock(); 1215eb8ce741SPavel Emelyanov ipgre_destroy_tunnels(ign); 12167daa0004SPavel Emelyanov rtnl_unlock(); 121759a4c759SPavel Emelyanov kfree(ign); 121859a4c759SPavel Emelyanov } 121959a4c759SPavel Emelyanov 122059a4c759SPavel Emelyanov static struct pernet_operations ipgre_net_ops = { 122159a4c759SPavel Emelyanov .init = ipgre_init_net, 122259a4c759SPavel Emelyanov .exit = ipgre_exit_net, 122359a4c759SPavel Emelyanov }; 12241da177e4SLinus Torvalds 12251da177e4SLinus Torvalds /* 12261da177e4SLinus Torvalds * And now the modules code and kernel interface. 12271da177e4SLinus Torvalds */ 12281da177e4SLinus Torvalds 12291da177e4SLinus Torvalds static int __init ipgre_init(void) 12301da177e4SLinus Torvalds { 12311da177e4SLinus Torvalds int err; 12321da177e4SLinus Torvalds 12331da177e4SLinus Torvalds printk(KERN_INFO "GRE over IPv4 tunneling driver\n"); 12341da177e4SLinus Torvalds 12351da177e4SLinus Torvalds if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) { 12361da177e4SLinus Torvalds printk(KERN_INFO "ipgre init: can't add protocol\n"); 12371da177e4SLinus Torvalds return -EAGAIN; 12381da177e4SLinus Torvalds } 12391da177e4SLinus Torvalds 124059a4c759SPavel Emelyanov err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops); 124159a4c759SPavel Emelyanov if (err < 0) 12421da177e4SLinus Torvalds inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); 12437daa0004SPavel Emelyanov 12447daa0004SPavel Emelyanov return err; 12451da177e4SLinus Torvalds } 12461da177e4SLinus Torvalds 1247db44575fSAlexey Kuznetsov static void __exit ipgre_fini(void) 12481da177e4SLinus Torvalds { 12491da177e4SLinus Torvalds if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) 12501da177e4SLinus Torvalds printk(KERN_INFO "ipgre close: can't remove protocol\n"); 12511da177e4SLinus Torvalds 125259a4c759SPavel Emelyanov unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops); 12531da177e4SLinus Torvalds } 12541da177e4SLinus Torvalds 12551da177e4SLinus Torvalds module_init(ipgre_init); 12561da177e4SLinus Torvalds module_exit(ipgre_fini); 12571da177e4SLinus Torvalds MODULE_LICENSE("GPL"); 1258