11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * Linux NET3: GRE over IP protocol decoder. 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru) 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 71da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 81da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 91da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 101da177e4SLinus Torvalds * 111da177e4SLinus Torvalds */ 121da177e4SLinus Torvalds 134fc268d2SRandy Dunlap #include <linux/capability.h> 141da177e4SLinus Torvalds #include <linux/module.h> 151da177e4SLinus Torvalds #include <linux/types.h> 161da177e4SLinus Torvalds #include <linux/kernel.h> 171da177e4SLinus Torvalds #include <asm/uaccess.h> 181da177e4SLinus Torvalds #include <linux/skbuff.h> 191da177e4SLinus Torvalds #include <linux/netdevice.h> 201da177e4SLinus Torvalds #include <linux/in.h> 211da177e4SLinus Torvalds #include <linux/tcp.h> 221da177e4SLinus Torvalds #include <linux/udp.h> 231da177e4SLinus Torvalds #include <linux/if_arp.h> 241da177e4SLinus Torvalds #include <linux/mroute.h> 251da177e4SLinus Torvalds #include <linux/init.h> 261da177e4SLinus Torvalds #include <linux/in6.h> 271da177e4SLinus Torvalds #include <linux/inetdevice.h> 281da177e4SLinus Torvalds #include <linux/igmp.h> 291da177e4SLinus Torvalds #include <linux/netfilter_ipv4.h> 30e1a80002SHerbert Xu #include <linux/etherdevice.h> 3146f25dffSKris Katterjohn #include <linux/if_ether.h> 321da177e4SLinus Torvalds 331da177e4SLinus Torvalds #include <net/sock.h> 341da177e4SLinus Torvalds #include <net/ip.h> 351da177e4SLinus Torvalds #include <net/icmp.h> 361da177e4SLinus Torvalds #include <net/protocol.h> 371da177e4SLinus Torvalds #include <net/ipip.h> 381da177e4SLinus Torvalds #include <net/arp.h> 391da177e4SLinus Torvalds #include <net/checksum.h> 401da177e4SLinus Torvalds #include <net/dsfield.h> 411da177e4SLinus Torvalds #include <net/inet_ecn.h> 421da177e4SLinus Torvalds #include <net/xfrm.h> 4359a4c759SPavel Emelyanov #include <net/net_namespace.h> 4459a4c759SPavel Emelyanov #include <net/netns/generic.h> 45c19e654dSHerbert Xu #include <net/rtnetlink.h> 461da177e4SLinus Torvalds 471da177e4SLinus Torvalds #ifdef CONFIG_IPV6 481da177e4SLinus Torvalds #include <net/ipv6.h> 491da177e4SLinus Torvalds #include <net/ip6_fib.h> 501da177e4SLinus Torvalds #include <net/ip6_route.h> 511da177e4SLinus Torvalds #endif 521da177e4SLinus Torvalds 531da177e4SLinus Torvalds /* 541da177e4SLinus Torvalds Problems & solutions 551da177e4SLinus Torvalds -------------------- 561da177e4SLinus Torvalds 571da177e4SLinus Torvalds 1. The most important issue is detecting local dead loops. 581da177e4SLinus Torvalds They would cause complete host lockup in transmit, which 591da177e4SLinus Torvalds would be "resolved" by stack overflow or, if queueing is enabled, 601da177e4SLinus Torvalds with infinite looping in net_bh. 611da177e4SLinus Torvalds 621da177e4SLinus Torvalds We cannot track such dead loops during route installation, 631da177e4SLinus Torvalds it is infeasible task. The most general solutions would be 641da177e4SLinus Torvalds to keep skb->encapsulation counter (sort of local ttl), 651da177e4SLinus Torvalds and silently drop packet when it expires. It is the best 661da177e4SLinus Torvalds solution, but it supposes maintaing new variable in ALL 671da177e4SLinus Torvalds skb, even if no tunneling is used. 681da177e4SLinus Torvalds 691da177e4SLinus Torvalds Current solution: t->recursion lock breaks dead loops. It looks 701da177e4SLinus Torvalds like dev->tbusy flag, but I preferred new variable, because 711da177e4SLinus Torvalds the semantics is different. One day, when hard_start_xmit 721da177e4SLinus Torvalds will be multithreaded we will have to use skb->encapsulation. 731da177e4SLinus Torvalds 741da177e4SLinus Torvalds 751da177e4SLinus Torvalds 761da177e4SLinus Torvalds 2. Networking dead loops would not kill routers, but would really 771da177e4SLinus Torvalds kill network. IP hop limit plays role of "t->recursion" in this case, 781da177e4SLinus Torvalds if we copy it from packet being encapsulated to upper header. 791da177e4SLinus Torvalds It is very good solution, but it introduces two problems: 801da177e4SLinus Torvalds 811da177e4SLinus Torvalds - Routing protocols, using packets with ttl=1 (OSPF, RIP2), 821da177e4SLinus Torvalds do not work over tunnels. 831da177e4SLinus Torvalds - traceroute does not work. I planned to relay ICMP from tunnel, 841da177e4SLinus Torvalds so that this problem would be solved and traceroute output 851da177e4SLinus Torvalds would even more informative. This idea appeared to be wrong: 861da177e4SLinus Torvalds only Linux complies to rfc1812 now (yes, guys, Linux is the only 871da177e4SLinus Torvalds true router now :-)), all routers (at least, in neighbourhood of mine) 881da177e4SLinus Torvalds return only 8 bytes of payload. It is the end. 891da177e4SLinus Torvalds 901da177e4SLinus Torvalds Hence, if we want that OSPF worked or traceroute said something reasonable, 911da177e4SLinus Torvalds we should search for another solution. 921da177e4SLinus Torvalds 931da177e4SLinus Torvalds One of them is to parse packet trying to detect inner encapsulation 941da177e4SLinus Torvalds made by our node. It is difficult or even impossible, especially, 951da177e4SLinus Torvalds taking into account fragmentation. TO be short, tt is not solution at all. 961da177e4SLinus Torvalds 971da177e4SLinus Torvalds Current solution: The solution was UNEXPECTEDLY SIMPLE. 981da177e4SLinus Torvalds We force DF flag on tunnels with preconfigured hop limit, 991da177e4SLinus Torvalds that is ALL. :-) Well, it does not remove the problem completely, 1001da177e4SLinus Torvalds but exponential growth of network traffic is changed to linear 1011da177e4SLinus Torvalds (branches, that exceed pmtu are pruned) and tunnel mtu 1021da177e4SLinus Torvalds fastly degrades to value <68, where looping stops. 1031da177e4SLinus Torvalds Yes, it is not good if there exists a router in the loop, 1041da177e4SLinus Torvalds which does not force DF, even when encapsulating packets have DF set. 1051da177e4SLinus Torvalds But it is not our problem! Nobody could accuse us, we made 1061da177e4SLinus Torvalds all that we could make. Even if it is your gated who injected 1071da177e4SLinus Torvalds fatal route to network, even if it were you who configured 1081da177e4SLinus Torvalds fatal static route: you are innocent. :-) 1091da177e4SLinus Torvalds 1101da177e4SLinus Torvalds 1111da177e4SLinus Torvalds 1121da177e4SLinus Torvalds 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain 1131da177e4SLinus Torvalds practically identical code. It would be good to glue them 1141da177e4SLinus Torvalds together, but it is not very evident, how to make them modular. 1151da177e4SLinus Torvalds sit is integral part of IPv6, ipip and gre are naturally modular. 1161da177e4SLinus Torvalds We could extract common parts (hash table, ioctl etc) 1171da177e4SLinus Torvalds to a separate module (ip_tunnel.c). 1181da177e4SLinus Torvalds 1191da177e4SLinus Torvalds Alexey Kuznetsov. 1201da177e4SLinus Torvalds */ 1211da177e4SLinus Torvalds 122c19e654dSHerbert Xu static struct rtnl_link_ops ipgre_link_ops __read_mostly; 1231da177e4SLinus Torvalds static int ipgre_tunnel_init(struct net_device *dev); 1241da177e4SLinus Torvalds static void ipgre_tunnel_setup(struct net_device *dev); 12542aa9162SHerbert Xu static int ipgre_tunnel_bind_dev(struct net_device *dev); 1261da177e4SLinus Torvalds 1271da177e4SLinus Torvalds /* Fallback tunnel: no source, no destination, no key, no options */ 1281da177e4SLinus Torvalds 1291da177e4SLinus Torvalds static int ipgre_fb_tunnel_init(struct net_device *dev); 1301da177e4SLinus Torvalds 131eb8ce741SPavel Emelyanov #define HASH_SIZE 16 132eb8ce741SPavel Emelyanov 13359a4c759SPavel Emelyanov static int ipgre_net_id; 13459a4c759SPavel Emelyanov struct ipgre_net { 135eb8ce741SPavel Emelyanov struct ip_tunnel *tunnels[4][HASH_SIZE]; 136eb8ce741SPavel Emelyanov 1377daa0004SPavel Emelyanov struct net_device *fb_tunnel_dev; 13859a4c759SPavel Emelyanov }; 13959a4c759SPavel Emelyanov 1401da177e4SLinus Torvalds /* Tunnel hash table */ 1411da177e4SLinus Torvalds 1421da177e4SLinus Torvalds /* 1431da177e4SLinus Torvalds 4 hash tables: 1441da177e4SLinus Torvalds 1451da177e4SLinus Torvalds 3: (remote,local) 1461da177e4SLinus Torvalds 2: (remote,*) 1471da177e4SLinus Torvalds 1: (*,local) 1481da177e4SLinus Torvalds 0: (*,*) 1491da177e4SLinus Torvalds 1501da177e4SLinus Torvalds We require exact key match i.e. if a key is present in packet 1511da177e4SLinus Torvalds it will match only tunnel with the same key; if it is not present, 1521da177e4SLinus Torvalds it will match only keyless tunnel. 1531da177e4SLinus Torvalds 1541da177e4SLinus Torvalds All keysless packets, if not matched configured keyless tunnels 1551da177e4SLinus Torvalds will match fallback tunnel. 1561da177e4SLinus Torvalds */ 1571da177e4SLinus Torvalds 158d5a0a1e3SAl Viro #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) 1591da177e4SLinus Torvalds 160eb8ce741SPavel Emelyanov #define tunnels_r_l tunnels[3] 161eb8ce741SPavel Emelyanov #define tunnels_r tunnels[2] 162eb8ce741SPavel Emelyanov #define tunnels_l tunnels[1] 163eb8ce741SPavel Emelyanov #define tunnels_wc tunnels[0] 1641da177e4SLinus Torvalds 1651da177e4SLinus Torvalds static DEFINE_RWLOCK(ipgre_lock); 1661da177e4SLinus Torvalds 1671da177e4SLinus Torvalds /* Given src, dst and key, find appropriate for input tunnel. */ 1681da177e4SLinus Torvalds 169f57e7d5aSPavel Emelyanov static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net, 170e1a80002SHerbert Xu __be32 remote, __be32 local, 171e1a80002SHerbert Xu __be32 key, __be16 gre_proto) 1721da177e4SLinus Torvalds { 1731da177e4SLinus Torvalds unsigned h0 = HASH(remote); 1741da177e4SLinus Torvalds unsigned h1 = HASH(key); 1751da177e4SLinus Torvalds struct ip_tunnel *t; 176e1a80002SHerbert Xu struct ip_tunnel *t2 = NULL; 1777daa0004SPavel Emelyanov struct ipgre_net *ign = net_generic(net, ipgre_net_id); 178e1a80002SHerbert Xu int dev_type = (gre_proto == htons(ETH_P_TEB)) ? 179e1a80002SHerbert Xu ARPHRD_ETHER : ARPHRD_IPGRE; 1801da177e4SLinus Torvalds 181eb8ce741SPavel Emelyanov for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) { 1821da177e4SLinus Torvalds if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { 183e1a80002SHerbert Xu if (t->parms.i_key == key && t->dev->flags & IFF_UP) { 184e1a80002SHerbert Xu if (t->dev->type == dev_type) 1851da177e4SLinus Torvalds return t; 186e1a80002SHerbert Xu if (t->dev->type == ARPHRD_IPGRE && !t2) 187e1a80002SHerbert Xu t2 = t; 1881da177e4SLinus Torvalds } 1891da177e4SLinus Torvalds } 190e1a80002SHerbert Xu } 191e1a80002SHerbert Xu 192eb8ce741SPavel Emelyanov for (t = ign->tunnels_r[h0^h1]; t; t = t->next) { 1931da177e4SLinus Torvalds if (remote == t->parms.iph.daddr) { 194e1a80002SHerbert Xu if (t->parms.i_key == key && t->dev->flags & IFF_UP) { 195e1a80002SHerbert Xu if (t->dev->type == dev_type) 1961da177e4SLinus Torvalds return t; 197e1a80002SHerbert Xu if (t->dev->type == ARPHRD_IPGRE && !t2) 198e1a80002SHerbert Xu t2 = t; 1991da177e4SLinus Torvalds } 2001da177e4SLinus Torvalds } 201e1a80002SHerbert Xu } 202e1a80002SHerbert Xu 203eb8ce741SPavel Emelyanov for (t = ign->tunnels_l[h1]; t; t = t->next) { 2041da177e4SLinus Torvalds if (local == t->parms.iph.saddr || 205f97c1e0cSJoe Perches (local == t->parms.iph.daddr && 206f97c1e0cSJoe Perches ipv4_is_multicast(local))) { 207e1a80002SHerbert Xu if (t->parms.i_key == key && t->dev->flags & IFF_UP) { 208e1a80002SHerbert Xu if (t->dev->type == dev_type) 2091da177e4SLinus Torvalds return t; 210e1a80002SHerbert Xu if (t->dev->type == ARPHRD_IPGRE && !t2) 211e1a80002SHerbert Xu t2 = t; 2121da177e4SLinus Torvalds } 2131da177e4SLinus Torvalds } 214e1a80002SHerbert Xu } 215e1a80002SHerbert Xu 216eb8ce741SPavel Emelyanov for (t = ign->tunnels_wc[h1]; t; t = t->next) { 217e1a80002SHerbert Xu if (t->parms.i_key == key && t->dev->flags & IFF_UP) { 218e1a80002SHerbert Xu if (t->dev->type == dev_type) 2191da177e4SLinus Torvalds return t; 220e1a80002SHerbert Xu if (t->dev->type == ARPHRD_IPGRE && !t2) 221e1a80002SHerbert Xu t2 = t; 2221da177e4SLinus Torvalds } 223e1a80002SHerbert Xu } 224e1a80002SHerbert Xu 225e1a80002SHerbert Xu if (t2) 226e1a80002SHerbert Xu return t2; 2271da177e4SLinus Torvalds 2287daa0004SPavel Emelyanov if (ign->fb_tunnel_dev->flags&IFF_UP) 2297daa0004SPavel Emelyanov return netdev_priv(ign->fb_tunnel_dev); 2301da177e4SLinus Torvalds return NULL; 2311da177e4SLinus Torvalds } 2321da177e4SLinus Torvalds 233f57e7d5aSPavel Emelyanov static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign, 234f57e7d5aSPavel Emelyanov struct ip_tunnel_parm *parms) 2351da177e4SLinus Torvalds { 2365056a1efSYOSHIFUJI Hideaki __be32 remote = parms->iph.daddr; 2375056a1efSYOSHIFUJI Hideaki __be32 local = parms->iph.saddr; 2385056a1efSYOSHIFUJI Hideaki __be32 key = parms->i_key; 2391da177e4SLinus Torvalds unsigned h = HASH(key); 2401da177e4SLinus Torvalds int prio = 0; 2411da177e4SLinus Torvalds 2421da177e4SLinus Torvalds if (local) 2431da177e4SLinus Torvalds prio |= 1; 244f97c1e0cSJoe Perches if (remote && !ipv4_is_multicast(remote)) { 2451da177e4SLinus Torvalds prio |= 2; 2461da177e4SLinus Torvalds h ^= HASH(remote); 2471da177e4SLinus Torvalds } 2481da177e4SLinus Torvalds 249eb8ce741SPavel Emelyanov return &ign->tunnels[prio][h]; 2501da177e4SLinus Torvalds } 2511da177e4SLinus Torvalds 252f57e7d5aSPavel Emelyanov static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign, 253f57e7d5aSPavel Emelyanov struct ip_tunnel *t) 2545056a1efSYOSHIFUJI Hideaki { 255f57e7d5aSPavel Emelyanov return __ipgre_bucket(ign, &t->parms); 2565056a1efSYOSHIFUJI Hideaki } 2575056a1efSYOSHIFUJI Hideaki 258f57e7d5aSPavel Emelyanov static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t) 2591da177e4SLinus Torvalds { 260f57e7d5aSPavel Emelyanov struct ip_tunnel **tp = ipgre_bucket(ign, t); 2611da177e4SLinus Torvalds 2621da177e4SLinus Torvalds t->next = *tp; 2631da177e4SLinus Torvalds write_lock_bh(&ipgre_lock); 2641da177e4SLinus Torvalds *tp = t; 2651da177e4SLinus Torvalds write_unlock_bh(&ipgre_lock); 2661da177e4SLinus Torvalds } 2671da177e4SLinus Torvalds 268f57e7d5aSPavel Emelyanov static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) 2691da177e4SLinus Torvalds { 2701da177e4SLinus Torvalds struct ip_tunnel **tp; 2711da177e4SLinus Torvalds 272f57e7d5aSPavel Emelyanov for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) { 2731da177e4SLinus Torvalds if (t == *tp) { 2741da177e4SLinus Torvalds write_lock_bh(&ipgre_lock); 2751da177e4SLinus Torvalds *tp = t->next; 2761da177e4SLinus Torvalds write_unlock_bh(&ipgre_lock); 2771da177e4SLinus Torvalds break; 2781da177e4SLinus Torvalds } 2791da177e4SLinus Torvalds } 2801da177e4SLinus Torvalds } 2811da177e4SLinus Torvalds 282e1a80002SHerbert Xu static struct ip_tunnel *ipgre_tunnel_find(struct net *net, 283e1a80002SHerbert Xu struct ip_tunnel_parm *parms, 284e1a80002SHerbert Xu int type) 2851da177e4SLinus Torvalds { 286d5a0a1e3SAl Viro __be32 remote = parms->iph.daddr; 287d5a0a1e3SAl Viro __be32 local = parms->iph.saddr; 288d5a0a1e3SAl Viro __be32 key = parms->i_key; 289e1a80002SHerbert Xu struct ip_tunnel *t, **tp; 290e1a80002SHerbert Xu struct ipgre_net *ign = net_generic(net, ipgre_net_id); 291e1a80002SHerbert Xu 292e1a80002SHerbert Xu for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) 293e1a80002SHerbert Xu if (local == t->parms.iph.saddr && 294e1a80002SHerbert Xu remote == t->parms.iph.daddr && 295e1a80002SHerbert Xu key == t->parms.i_key && 296e1a80002SHerbert Xu type == t->dev->type) 297e1a80002SHerbert Xu break; 298e1a80002SHerbert Xu 299e1a80002SHerbert Xu return t; 300e1a80002SHerbert Xu } 301e1a80002SHerbert Xu 302e1a80002SHerbert Xu static struct ip_tunnel * ipgre_tunnel_locate(struct net *net, 303e1a80002SHerbert Xu struct ip_tunnel_parm *parms, int create) 304e1a80002SHerbert Xu { 305e1a80002SHerbert Xu struct ip_tunnel *t, *nt; 3061da177e4SLinus Torvalds struct net_device *dev; 3071da177e4SLinus Torvalds char name[IFNAMSIZ]; 308f57e7d5aSPavel Emelyanov struct ipgre_net *ign = net_generic(net, ipgre_net_id); 3091da177e4SLinus Torvalds 310e1a80002SHerbert Xu t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE); 311e1a80002SHerbert Xu if (t || !create) 3121da177e4SLinus Torvalds return t; 3131da177e4SLinus Torvalds 3141da177e4SLinus Torvalds if (parms->name[0]) 3151da177e4SLinus Torvalds strlcpy(name, parms->name, IFNAMSIZ); 31634cc7ba6SPavel Emelyanov else 31734cc7ba6SPavel Emelyanov sprintf(name, "gre%%d"); 3181da177e4SLinus Torvalds 3191da177e4SLinus Torvalds dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup); 3201da177e4SLinus Torvalds if (!dev) 3211da177e4SLinus Torvalds return NULL; 3221da177e4SLinus Torvalds 3230b67ecebSPavel Emelyanov dev_net_set(dev, net); 3240b67ecebSPavel Emelyanov 325b37d428bSPavel Emelyanov if (strchr(name, '%')) { 326b37d428bSPavel Emelyanov if (dev_alloc_name(dev, name) < 0) 327b37d428bSPavel Emelyanov goto failed_free; 328b37d428bSPavel Emelyanov } 329b37d428bSPavel Emelyanov 3302941a486SPatrick McHardy nt = netdev_priv(dev); 3311da177e4SLinus Torvalds nt->parms = *parms; 332c19e654dSHerbert Xu dev->rtnl_link_ops = &ipgre_link_ops; 3331da177e4SLinus Torvalds 33442aa9162SHerbert Xu dev->mtu = ipgre_tunnel_bind_dev(dev); 33542aa9162SHerbert Xu 336b37d428bSPavel Emelyanov if (register_netdevice(dev) < 0) 337b37d428bSPavel Emelyanov goto failed_free; 3381da177e4SLinus Torvalds 3391da177e4SLinus Torvalds dev_hold(dev); 340f57e7d5aSPavel Emelyanov ipgre_tunnel_link(ign, nt); 3411da177e4SLinus Torvalds return nt; 3421da177e4SLinus Torvalds 343b37d428bSPavel Emelyanov failed_free: 344b37d428bSPavel Emelyanov free_netdev(dev); 3451da177e4SLinus Torvalds return NULL; 3461da177e4SLinus Torvalds } 3471da177e4SLinus Torvalds 3481da177e4SLinus Torvalds static void ipgre_tunnel_uninit(struct net_device *dev) 3491da177e4SLinus Torvalds { 350f57e7d5aSPavel Emelyanov struct net *net = dev_net(dev); 351f57e7d5aSPavel Emelyanov struct ipgre_net *ign = net_generic(net, ipgre_net_id); 352f57e7d5aSPavel Emelyanov 353f57e7d5aSPavel Emelyanov ipgre_tunnel_unlink(ign, netdev_priv(dev)); 3541da177e4SLinus Torvalds dev_put(dev); 3551da177e4SLinus Torvalds } 3561da177e4SLinus Torvalds 3571da177e4SLinus Torvalds 3581da177e4SLinus Torvalds static void ipgre_err(struct sk_buff *skb, u32 info) 3591da177e4SLinus Torvalds { 3601da177e4SLinus Torvalds 361071f92d0SRami Rosen /* All the routers (except for Linux) return only 3621da177e4SLinus Torvalds 8 bytes of packet payload. It means, that precise relaying of 3631da177e4SLinus Torvalds ICMP in the real Internet is absolutely infeasible. 3641da177e4SLinus Torvalds 3651da177e4SLinus Torvalds Moreover, Cisco "wise men" put GRE key to the third word 3661da177e4SLinus Torvalds in GRE header. It makes impossible maintaining even soft state for keyed 3671da177e4SLinus Torvalds GRE tunnels with enabled checksum. Tell them "thank you". 3681da177e4SLinus Torvalds 3691da177e4SLinus Torvalds Well, I wonder, rfc1812 was written by Cisco employee, 3701da177e4SLinus Torvalds what the hell these idiots break standrads established 3711da177e4SLinus Torvalds by themself??? 3721da177e4SLinus Torvalds */ 3731da177e4SLinus Torvalds 3741da177e4SLinus Torvalds struct iphdr *iph = (struct iphdr*)skb->data; 375d5a0a1e3SAl Viro __be16 *p = (__be16*)(skb->data+(iph->ihl<<2)); 3761da177e4SLinus Torvalds int grehlen = (iph->ihl<<2) + 4; 37788c7664fSArnaldo Carvalho de Melo const int type = icmp_hdr(skb)->type; 37888c7664fSArnaldo Carvalho de Melo const int code = icmp_hdr(skb)->code; 3791da177e4SLinus Torvalds struct ip_tunnel *t; 380d5a0a1e3SAl Viro __be16 flags; 3811da177e4SLinus Torvalds 3821da177e4SLinus Torvalds flags = p[0]; 3831da177e4SLinus Torvalds if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { 3841da177e4SLinus Torvalds if (flags&(GRE_VERSION|GRE_ROUTING)) 3851da177e4SLinus Torvalds return; 3861da177e4SLinus Torvalds if (flags&GRE_KEY) { 3871da177e4SLinus Torvalds grehlen += 4; 3881da177e4SLinus Torvalds if (flags&GRE_CSUM) 3891da177e4SLinus Torvalds grehlen += 4; 3901da177e4SLinus Torvalds } 3911da177e4SLinus Torvalds } 3921da177e4SLinus Torvalds 3931da177e4SLinus Torvalds /* If only 8 bytes returned, keyed message will be dropped here */ 3941da177e4SLinus Torvalds if (skb_headlen(skb) < grehlen) 3951da177e4SLinus Torvalds return; 3961da177e4SLinus Torvalds 3971da177e4SLinus Torvalds switch (type) { 3981da177e4SLinus Torvalds default: 3991da177e4SLinus Torvalds case ICMP_PARAMETERPROB: 4001da177e4SLinus Torvalds return; 4011da177e4SLinus Torvalds 4021da177e4SLinus Torvalds case ICMP_DEST_UNREACH: 4031da177e4SLinus Torvalds switch (code) { 4041da177e4SLinus Torvalds case ICMP_SR_FAILED: 4051da177e4SLinus Torvalds case ICMP_PORT_UNREACH: 4061da177e4SLinus Torvalds /* Impossible event. */ 4071da177e4SLinus Torvalds return; 4081da177e4SLinus Torvalds case ICMP_FRAG_NEEDED: 4091da177e4SLinus Torvalds /* Soft state for pmtu is maintained by IP core. */ 4101da177e4SLinus Torvalds return; 4111da177e4SLinus Torvalds default: 4121da177e4SLinus Torvalds /* All others are translated to HOST_UNREACH. 4131da177e4SLinus Torvalds rfc2003 contains "deep thoughts" about NET_UNREACH, 4141da177e4SLinus Torvalds I believe they are just ether pollution. --ANK 4151da177e4SLinus Torvalds */ 4161da177e4SLinus Torvalds break; 4171da177e4SLinus Torvalds } 4181da177e4SLinus Torvalds break; 4191da177e4SLinus Torvalds case ICMP_TIME_EXCEEDED: 4201da177e4SLinus Torvalds if (code != ICMP_EXC_TTL) 4211da177e4SLinus Torvalds return; 4221da177e4SLinus Torvalds break; 4231da177e4SLinus Torvalds } 4241da177e4SLinus Torvalds 4251da177e4SLinus Torvalds read_lock(&ipgre_lock); 4263b4667f3SPavel Emelyanov t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr, 427e1a80002SHerbert Xu flags & GRE_KEY ? 428e1a80002SHerbert Xu *(((__be32 *)p) + (grehlen / 4) - 1) : 0, 429e1a80002SHerbert Xu p[1]); 430f97c1e0cSJoe Perches if (t == NULL || t->parms.iph.daddr == 0 || 431f97c1e0cSJoe Perches ipv4_is_multicast(t->parms.iph.daddr)) 4321da177e4SLinus Torvalds goto out; 4331da177e4SLinus Torvalds 4341da177e4SLinus Torvalds if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) 4351da177e4SLinus Torvalds goto out; 4361da177e4SLinus Torvalds 4371da177e4SLinus Torvalds if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) 4381da177e4SLinus Torvalds t->err_count++; 4391da177e4SLinus Torvalds else 4401da177e4SLinus Torvalds t->err_count = 1; 4411da177e4SLinus Torvalds t->err_time = jiffies; 4421da177e4SLinus Torvalds out: 4431da177e4SLinus Torvalds read_unlock(&ipgre_lock); 4441da177e4SLinus Torvalds return; 4451da177e4SLinus Torvalds } 4461da177e4SLinus Torvalds 4471da177e4SLinus Torvalds static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) 4481da177e4SLinus Torvalds { 4491da177e4SLinus Torvalds if (INET_ECN_is_ce(iph->tos)) { 4501da177e4SLinus Torvalds if (skb->protocol == htons(ETH_P_IP)) { 451eddc9ec5SArnaldo Carvalho de Melo IP_ECN_set_ce(ip_hdr(skb)); 4521da177e4SLinus Torvalds } else if (skb->protocol == htons(ETH_P_IPV6)) { 4530660e03fSArnaldo Carvalho de Melo IP6_ECN_set_ce(ipv6_hdr(skb)); 4541da177e4SLinus Torvalds } 4551da177e4SLinus Torvalds } 4561da177e4SLinus Torvalds } 4571da177e4SLinus Torvalds 4581da177e4SLinus Torvalds static inline u8 4591da177e4SLinus Torvalds ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb) 4601da177e4SLinus Torvalds { 4611da177e4SLinus Torvalds u8 inner = 0; 4621da177e4SLinus Torvalds if (skb->protocol == htons(ETH_P_IP)) 4631da177e4SLinus Torvalds inner = old_iph->tos; 4641da177e4SLinus Torvalds else if (skb->protocol == htons(ETH_P_IPV6)) 4651da177e4SLinus Torvalds inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph); 4661da177e4SLinus Torvalds return INET_ECN_encapsulate(tos, inner); 4671da177e4SLinus Torvalds } 4681da177e4SLinus Torvalds 4691da177e4SLinus Torvalds static int ipgre_rcv(struct sk_buff *skb) 4701da177e4SLinus Torvalds { 4711da177e4SLinus Torvalds struct iphdr *iph; 4721da177e4SLinus Torvalds u8 *h; 473d5a0a1e3SAl Viro __be16 flags; 474d3bc23e7SAl Viro __sum16 csum = 0; 475d5a0a1e3SAl Viro __be32 key = 0; 4761da177e4SLinus Torvalds u32 seqno = 0; 4771da177e4SLinus Torvalds struct ip_tunnel *tunnel; 4781da177e4SLinus Torvalds int offset = 4; 479e1a80002SHerbert Xu __be16 gre_proto; 48064194c31SHerbert Xu unsigned int len; 4811da177e4SLinus Torvalds 4821da177e4SLinus Torvalds if (!pskb_may_pull(skb, 16)) 4831da177e4SLinus Torvalds goto drop_nolock; 4841da177e4SLinus Torvalds 485eddc9ec5SArnaldo Carvalho de Melo iph = ip_hdr(skb); 4861da177e4SLinus Torvalds h = skb->data; 487d5a0a1e3SAl Viro flags = *(__be16*)h; 4881da177e4SLinus Torvalds 4891da177e4SLinus Torvalds if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) { 4901da177e4SLinus Torvalds /* - Version must be 0. 4911da177e4SLinus Torvalds - We do not support routing headers. 4921da177e4SLinus Torvalds */ 4931da177e4SLinus Torvalds if (flags&(GRE_VERSION|GRE_ROUTING)) 4941da177e4SLinus Torvalds goto drop_nolock; 4951da177e4SLinus Torvalds 4961da177e4SLinus Torvalds if (flags&GRE_CSUM) { 497fb286bb2SHerbert Xu switch (skb->ip_summed) { 49884fa7933SPatrick McHardy case CHECKSUM_COMPLETE: 499d3bc23e7SAl Viro csum = csum_fold(skb->csum); 500fb286bb2SHerbert Xu if (!csum) 501fb286bb2SHerbert Xu break; 502fb286bb2SHerbert Xu /* fall through */ 503fb286bb2SHerbert Xu case CHECKSUM_NONE: 504fb286bb2SHerbert Xu skb->csum = 0; 505fb286bb2SHerbert Xu csum = __skb_checksum_complete(skb); 50684fa7933SPatrick McHardy skb->ip_summed = CHECKSUM_COMPLETE; 5071da177e4SLinus Torvalds } 5081da177e4SLinus Torvalds offset += 4; 5091da177e4SLinus Torvalds } 5101da177e4SLinus Torvalds if (flags&GRE_KEY) { 511d5a0a1e3SAl Viro key = *(__be32*)(h + offset); 5121da177e4SLinus Torvalds offset += 4; 5131da177e4SLinus Torvalds } 5141da177e4SLinus Torvalds if (flags&GRE_SEQ) { 515d5a0a1e3SAl Viro seqno = ntohl(*(__be32*)(h + offset)); 5161da177e4SLinus Torvalds offset += 4; 5171da177e4SLinus Torvalds } 5181da177e4SLinus Torvalds } 5191da177e4SLinus Torvalds 520e1a80002SHerbert Xu gre_proto = *(__be16 *)(h + 2); 521e1a80002SHerbert Xu 5221da177e4SLinus Torvalds read_lock(&ipgre_lock); 5233b4667f3SPavel Emelyanov if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev), 524e1a80002SHerbert Xu iph->saddr, iph->daddr, key, 525e1a80002SHerbert Xu gre_proto))) { 526addd68ebSPavel Emelyanov struct net_device_stats *stats = &tunnel->dev->stats; 527addd68ebSPavel Emelyanov 5281da177e4SLinus Torvalds secpath_reset(skb); 5291da177e4SLinus Torvalds 530e1a80002SHerbert Xu skb->protocol = gre_proto; 5311da177e4SLinus Torvalds /* WCCP version 1 and 2 protocol decoding. 5321da177e4SLinus Torvalds * - Change protocol to IP 5331da177e4SLinus Torvalds * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header 5341da177e4SLinus Torvalds */ 535e1a80002SHerbert Xu if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) { 536496c98dfSYOSHIFUJI Hideaki skb->protocol = htons(ETH_P_IP); 5371da177e4SLinus Torvalds if ((*(h + offset) & 0xF0) != 0x40) 5381da177e4SLinus Torvalds offset += 4; 5391da177e4SLinus Torvalds } 5401da177e4SLinus Torvalds 5411d069167STimo Teras skb->mac_header = skb->network_header; 5424209fb60SArnaldo Carvalho de Melo __pskb_pull(skb, offset); 5439c70220bSArnaldo Carvalho de Melo skb_postpull_rcsum(skb, skb_transport_header(skb), offset); 5441da177e4SLinus Torvalds skb->pkt_type = PACKET_HOST; 5451da177e4SLinus Torvalds #ifdef CONFIG_NET_IPGRE_BROADCAST 546f97c1e0cSJoe Perches if (ipv4_is_multicast(iph->daddr)) { 5471da177e4SLinus Torvalds /* Looped back packet, drop it! */ 548ee6b9673SEric Dumazet if (skb->rtable->fl.iif == 0) 5491da177e4SLinus Torvalds goto drop; 550addd68ebSPavel Emelyanov stats->multicast++; 5511da177e4SLinus Torvalds skb->pkt_type = PACKET_BROADCAST; 5521da177e4SLinus Torvalds } 5531da177e4SLinus Torvalds #endif 5541da177e4SLinus Torvalds 5551da177e4SLinus Torvalds if (((flags&GRE_CSUM) && csum) || 5561da177e4SLinus Torvalds (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { 557addd68ebSPavel Emelyanov stats->rx_crc_errors++; 558addd68ebSPavel Emelyanov stats->rx_errors++; 5591da177e4SLinus Torvalds goto drop; 5601da177e4SLinus Torvalds } 5611da177e4SLinus Torvalds if (tunnel->parms.i_flags&GRE_SEQ) { 5621da177e4SLinus Torvalds if (!(flags&GRE_SEQ) || 5631da177e4SLinus Torvalds (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) { 564addd68ebSPavel Emelyanov stats->rx_fifo_errors++; 565addd68ebSPavel Emelyanov stats->rx_errors++; 5661da177e4SLinus Torvalds goto drop; 5671da177e4SLinus Torvalds } 5681da177e4SLinus Torvalds tunnel->i_seqno = seqno + 1; 5691da177e4SLinus Torvalds } 570e1a80002SHerbert Xu 57164194c31SHerbert Xu len = skb->len; 57264194c31SHerbert Xu 573e1a80002SHerbert Xu /* Warning: All skb pointers will be invalidated! */ 574e1a80002SHerbert Xu if (tunnel->dev->type == ARPHRD_ETHER) { 575e1a80002SHerbert Xu if (!pskb_may_pull(skb, ETH_HLEN)) { 576e1a80002SHerbert Xu stats->rx_length_errors++; 577e1a80002SHerbert Xu stats->rx_errors++; 578e1a80002SHerbert Xu goto drop; 579e1a80002SHerbert Xu } 580e1a80002SHerbert Xu 581e1a80002SHerbert Xu iph = ip_hdr(skb); 582e1a80002SHerbert Xu skb->protocol = eth_type_trans(skb, tunnel->dev); 583e1a80002SHerbert Xu skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 584e1a80002SHerbert Xu } 585e1a80002SHerbert Xu 586addd68ebSPavel Emelyanov stats->rx_packets++; 58764194c31SHerbert Xu stats->rx_bytes += len; 5881da177e4SLinus Torvalds skb->dev = tunnel->dev; 5891da177e4SLinus Torvalds dst_release(skb->dst); 5901da177e4SLinus Torvalds skb->dst = NULL; 5911da177e4SLinus Torvalds nf_reset(skb); 592e1a80002SHerbert Xu 593e1a80002SHerbert Xu skb_reset_network_header(skb); 5941da177e4SLinus Torvalds ipgre_ecn_decapsulate(iph, skb); 595e1a80002SHerbert Xu 5961da177e4SLinus Torvalds netif_rx(skb); 5971da177e4SLinus Torvalds read_unlock(&ipgre_lock); 5981da177e4SLinus Torvalds return(0); 5991da177e4SLinus Torvalds } 60045af08beSHerbert Xu icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 6011da177e4SLinus Torvalds 6021da177e4SLinus Torvalds drop: 6031da177e4SLinus Torvalds read_unlock(&ipgre_lock); 6041da177e4SLinus Torvalds drop_nolock: 6051da177e4SLinus Torvalds kfree_skb(skb); 6061da177e4SLinus Torvalds return(0); 6071da177e4SLinus Torvalds } 6081da177e4SLinus Torvalds 6091da177e4SLinus Torvalds static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 6101da177e4SLinus Torvalds { 6112941a486SPatrick McHardy struct ip_tunnel *tunnel = netdev_priv(dev); 612addd68ebSPavel Emelyanov struct net_device_stats *stats = &tunnel->dev->stats; 613eddc9ec5SArnaldo Carvalho de Melo struct iphdr *old_iph = ip_hdr(skb); 6141da177e4SLinus Torvalds struct iphdr *tiph; 6151da177e4SLinus Torvalds u8 tos; 616d5a0a1e3SAl Viro __be16 df; 6171da177e4SLinus Torvalds struct rtable *rt; /* Route to the other host */ 6181da177e4SLinus Torvalds struct net_device *tdev; /* Device to other host */ 6191da177e4SLinus Torvalds struct iphdr *iph; /* Our new IP header */ 620c2636b4dSChuck Lever unsigned int max_headroom; /* The extra header space needed */ 6211da177e4SLinus Torvalds int gre_hlen; 622d5a0a1e3SAl Viro __be32 dst; 6231da177e4SLinus Torvalds int mtu; 6241da177e4SLinus Torvalds 6251da177e4SLinus Torvalds if (tunnel->recursion++) { 626addd68ebSPavel Emelyanov stats->collisions++; 6271da177e4SLinus Torvalds goto tx_error; 6281da177e4SLinus Torvalds } 6291da177e4SLinus Torvalds 630e1a80002SHerbert Xu if (dev->type == ARPHRD_ETHER) 631e1a80002SHerbert Xu IPCB(skb)->flags = 0; 632e1a80002SHerbert Xu 633e1a80002SHerbert Xu if (dev->header_ops && dev->type == ARPHRD_IPGRE) { 6341da177e4SLinus Torvalds gre_hlen = 0; 6351da177e4SLinus Torvalds tiph = (struct iphdr*)skb->data; 6361da177e4SLinus Torvalds } else { 6371da177e4SLinus Torvalds gre_hlen = tunnel->hlen; 6381da177e4SLinus Torvalds tiph = &tunnel->parms.iph; 6391da177e4SLinus Torvalds } 6401da177e4SLinus Torvalds 6411da177e4SLinus Torvalds if ((dst = tiph->daddr) == 0) { 6421da177e4SLinus Torvalds /* NBMA tunnel */ 6431da177e4SLinus Torvalds 6441da177e4SLinus Torvalds if (skb->dst == NULL) { 645addd68ebSPavel Emelyanov stats->tx_fifo_errors++; 6461da177e4SLinus Torvalds goto tx_error; 6471da177e4SLinus Torvalds } 6481da177e4SLinus Torvalds 6491da177e4SLinus Torvalds if (skb->protocol == htons(ETH_P_IP)) { 650ee6b9673SEric Dumazet rt = skb->rtable; 6511da177e4SLinus Torvalds if ((dst = rt->rt_gateway) == 0) 6521da177e4SLinus Torvalds goto tx_error_icmp; 6531da177e4SLinus Torvalds } 6541da177e4SLinus Torvalds #ifdef CONFIG_IPV6 6551da177e4SLinus Torvalds else if (skb->protocol == htons(ETH_P_IPV6)) { 6561da177e4SLinus Torvalds struct in6_addr *addr6; 6571da177e4SLinus Torvalds int addr_type; 6581da177e4SLinus Torvalds struct neighbour *neigh = skb->dst->neighbour; 6591da177e4SLinus Torvalds 6601da177e4SLinus Torvalds if (neigh == NULL) 6611da177e4SLinus Torvalds goto tx_error; 6621da177e4SLinus Torvalds 6631da177e4SLinus Torvalds addr6 = (struct in6_addr*)&neigh->primary_key; 6641da177e4SLinus Torvalds addr_type = ipv6_addr_type(addr6); 6651da177e4SLinus Torvalds 6661da177e4SLinus Torvalds if (addr_type == IPV6_ADDR_ANY) { 6670660e03fSArnaldo Carvalho de Melo addr6 = &ipv6_hdr(skb)->daddr; 6681da177e4SLinus Torvalds addr_type = ipv6_addr_type(addr6); 6691da177e4SLinus Torvalds } 6701da177e4SLinus Torvalds 6711da177e4SLinus Torvalds if ((addr_type & IPV6_ADDR_COMPATv4) == 0) 6721da177e4SLinus Torvalds goto tx_error_icmp; 6731da177e4SLinus Torvalds 6741da177e4SLinus Torvalds dst = addr6->s6_addr32[3]; 6751da177e4SLinus Torvalds } 6761da177e4SLinus Torvalds #endif 6771da177e4SLinus Torvalds else 6781da177e4SLinus Torvalds goto tx_error; 6791da177e4SLinus Torvalds } 6801da177e4SLinus Torvalds 6811da177e4SLinus Torvalds tos = tiph->tos; 6821da177e4SLinus Torvalds if (tos&1) { 6831da177e4SLinus Torvalds if (skb->protocol == htons(ETH_P_IP)) 6841da177e4SLinus Torvalds tos = old_iph->tos; 6851da177e4SLinus Torvalds tos &= ~1; 6861da177e4SLinus Torvalds } 6871da177e4SLinus Torvalds 6881da177e4SLinus Torvalds { 6891da177e4SLinus Torvalds struct flowi fl = { .oif = tunnel->parms.link, 6901da177e4SLinus Torvalds .nl_u = { .ip4_u = 6911da177e4SLinus Torvalds { .daddr = dst, 6921da177e4SLinus Torvalds .saddr = tiph->saddr, 6931da177e4SLinus Torvalds .tos = RT_TOS(tos) } }, 6941da177e4SLinus Torvalds .proto = IPPROTO_GRE }; 69596635522SPavel Emelyanov if (ip_route_output_key(dev_net(dev), &rt, &fl)) { 696addd68ebSPavel Emelyanov stats->tx_carrier_errors++; 6971da177e4SLinus Torvalds goto tx_error; 6981da177e4SLinus Torvalds } 6991da177e4SLinus Torvalds } 7001da177e4SLinus Torvalds tdev = rt->u.dst.dev; 7011da177e4SLinus Torvalds 7021da177e4SLinus Torvalds if (tdev == dev) { 7031da177e4SLinus Torvalds ip_rt_put(rt); 704addd68ebSPavel Emelyanov stats->collisions++; 7051da177e4SLinus Torvalds goto tx_error; 7061da177e4SLinus Torvalds } 7071da177e4SLinus Torvalds 7081da177e4SLinus Torvalds df = tiph->frag_off; 7091da177e4SLinus Torvalds if (df) 710c95b819aSHerbert Xu mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen; 7111da177e4SLinus Torvalds else 7121da177e4SLinus Torvalds mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu; 7131da177e4SLinus Torvalds 7141da177e4SLinus Torvalds if (skb->dst) 7151da177e4SLinus Torvalds skb->dst->ops->update_pmtu(skb->dst, mtu); 7161da177e4SLinus Torvalds 7171da177e4SLinus Torvalds if (skb->protocol == htons(ETH_P_IP)) { 7181da177e4SLinus Torvalds df |= (old_iph->frag_off&htons(IP_DF)); 7191da177e4SLinus Torvalds 7201da177e4SLinus Torvalds if ((old_iph->frag_off&htons(IP_DF)) && 7211da177e4SLinus Torvalds mtu < ntohs(old_iph->tot_len)) { 7221da177e4SLinus Torvalds icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 7231da177e4SLinus Torvalds ip_rt_put(rt); 7241da177e4SLinus Torvalds goto tx_error; 7251da177e4SLinus Torvalds } 7261da177e4SLinus Torvalds } 7271da177e4SLinus Torvalds #ifdef CONFIG_IPV6 7281da177e4SLinus Torvalds else if (skb->protocol == htons(ETH_P_IPV6)) { 7291da177e4SLinus Torvalds struct rt6_info *rt6 = (struct rt6_info*)skb->dst; 7301da177e4SLinus Torvalds 7311da177e4SLinus Torvalds if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) { 732f97c1e0cSJoe Perches if ((tunnel->parms.iph.daddr && 733f97c1e0cSJoe Perches !ipv4_is_multicast(tunnel->parms.iph.daddr)) || 7341da177e4SLinus Torvalds rt6->rt6i_dst.plen == 128) { 7351da177e4SLinus Torvalds rt6->rt6i_flags |= RTF_MODIFIED; 7361da177e4SLinus Torvalds skb->dst->metrics[RTAX_MTU-1] = mtu; 7371da177e4SLinus Torvalds } 7381da177e4SLinus Torvalds } 7391da177e4SLinus Torvalds 7401da177e4SLinus Torvalds if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) { 7411da177e4SLinus Torvalds icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); 7421da177e4SLinus Torvalds ip_rt_put(rt); 7431da177e4SLinus Torvalds goto tx_error; 7441da177e4SLinus Torvalds } 7451da177e4SLinus Torvalds } 7461da177e4SLinus Torvalds #endif 7471da177e4SLinus Torvalds 7481da177e4SLinus Torvalds if (tunnel->err_count > 0) { 7491da177e4SLinus Torvalds if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { 7501da177e4SLinus Torvalds tunnel->err_count--; 7511da177e4SLinus Torvalds 7521da177e4SLinus Torvalds dst_link_failure(skb); 7531da177e4SLinus Torvalds } else 7541da177e4SLinus Torvalds tunnel->err_count = 0; 7551da177e4SLinus Torvalds } 7561da177e4SLinus Torvalds 7571da177e4SLinus Torvalds max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen; 7581da177e4SLinus Torvalds 759cfbba49dSPatrick McHardy if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| 760cfbba49dSPatrick McHardy (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { 7611da177e4SLinus Torvalds struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); 7621da177e4SLinus Torvalds if (!new_skb) { 7631da177e4SLinus Torvalds ip_rt_put(rt); 7641da177e4SLinus Torvalds stats->tx_dropped++; 7651da177e4SLinus Torvalds dev_kfree_skb(skb); 7661da177e4SLinus Torvalds tunnel->recursion--; 7671da177e4SLinus Torvalds return 0; 7681da177e4SLinus Torvalds } 7691da177e4SLinus Torvalds if (skb->sk) 7701da177e4SLinus Torvalds skb_set_owner_w(new_skb, skb->sk); 7711da177e4SLinus Torvalds dev_kfree_skb(skb); 7721da177e4SLinus Torvalds skb = new_skb; 773eddc9ec5SArnaldo Carvalho de Melo old_iph = ip_hdr(skb); 7741da177e4SLinus Torvalds } 7751da177e4SLinus Torvalds 77664194c31SHerbert Xu skb_reset_transport_header(skb); 777e2d1bca7SArnaldo Carvalho de Melo skb_push(skb, gre_hlen); 778e2d1bca7SArnaldo Carvalho de Melo skb_reset_network_header(skb); 7791da177e4SLinus Torvalds memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 78048d5cad8SPatrick McHardy IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | 78148d5cad8SPatrick McHardy IPSKB_REROUTED); 7821da177e4SLinus Torvalds dst_release(skb->dst); 7831da177e4SLinus Torvalds skb->dst = &rt->u.dst; 7841da177e4SLinus Torvalds 7851da177e4SLinus Torvalds /* 7861da177e4SLinus Torvalds * Push down and install the IPIP header. 7871da177e4SLinus Torvalds */ 7881da177e4SLinus Torvalds 789eddc9ec5SArnaldo Carvalho de Melo iph = ip_hdr(skb); 7901da177e4SLinus Torvalds iph->version = 4; 7911da177e4SLinus Torvalds iph->ihl = sizeof(struct iphdr) >> 2; 7921da177e4SLinus Torvalds iph->frag_off = df; 7931da177e4SLinus Torvalds iph->protocol = IPPROTO_GRE; 7941da177e4SLinus Torvalds iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb); 7951da177e4SLinus Torvalds iph->daddr = rt->rt_dst; 7961da177e4SLinus Torvalds iph->saddr = rt->rt_src; 7971da177e4SLinus Torvalds 7981da177e4SLinus Torvalds if ((iph->ttl = tiph->ttl) == 0) { 7991da177e4SLinus Torvalds if (skb->protocol == htons(ETH_P_IP)) 8001da177e4SLinus Torvalds iph->ttl = old_iph->ttl; 8011da177e4SLinus Torvalds #ifdef CONFIG_IPV6 8021da177e4SLinus Torvalds else if (skb->protocol == htons(ETH_P_IPV6)) 8031da177e4SLinus Torvalds iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit; 8041da177e4SLinus Torvalds #endif 8051da177e4SLinus Torvalds else 8061da177e4SLinus Torvalds iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT); 8071da177e4SLinus Torvalds } 8081da177e4SLinus Torvalds 809d5a0a1e3SAl Viro ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags; 810e1a80002SHerbert Xu ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ? 811e1a80002SHerbert Xu htons(ETH_P_TEB) : skb->protocol; 8121da177e4SLinus Torvalds 8131da177e4SLinus Torvalds if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { 814d5a0a1e3SAl Viro __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4); 8151da177e4SLinus Torvalds 8161da177e4SLinus Torvalds if (tunnel->parms.o_flags&GRE_SEQ) { 8171da177e4SLinus Torvalds ++tunnel->o_seqno; 8181da177e4SLinus Torvalds *ptr = htonl(tunnel->o_seqno); 8191da177e4SLinus Torvalds ptr--; 8201da177e4SLinus Torvalds } 8211da177e4SLinus Torvalds if (tunnel->parms.o_flags&GRE_KEY) { 8221da177e4SLinus Torvalds *ptr = tunnel->parms.o_key; 8231da177e4SLinus Torvalds ptr--; 8241da177e4SLinus Torvalds } 8251da177e4SLinus Torvalds if (tunnel->parms.o_flags&GRE_CSUM) { 8261da177e4SLinus Torvalds *ptr = 0; 8275f92a738SAl Viro *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr)); 8281da177e4SLinus Torvalds } 8291da177e4SLinus Torvalds } 8301da177e4SLinus Torvalds 8311da177e4SLinus Torvalds nf_reset(skb); 8321da177e4SLinus Torvalds 8331da177e4SLinus Torvalds IPTUNNEL_XMIT(); 8341da177e4SLinus Torvalds tunnel->recursion--; 8351da177e4SLinus Torvalds return 0; 8361da177e4SLinus Torvalds 8371da177e4SLinus Torvalds tx_error_icmp: 8381da177e4SLinus Torvalds dst_link_failure(skb); 8391da177e4SLinus Torvalds 8401da177e4SLinus Torvalds tx_error: 8411da177e4SLinus Torvalds stats->tx_errors++; 8421da177e4SLinus Torvalds dev_kfree_skb(skb); 8431da177e4SLinus Torvalds tunnel->recursion--; 8441da177e4SLinus Torvalds return 0; 8451da177e4SLinus Torvalds } 8461da177e4SLinus Torvalds 84742aa9162SHerbert Xu static int ipgre_tunnel_bind_dev(struct net_device *dev) 848ee34c1ebSMichal Schmidt { 849ee34c1ebSMichal Schmidt struct net_device *tdev = NULL; 850ee34c1ebSMichal Schmidt struct ip_tunnel *tunnel; 851ee34c1ebSMichal Schmidt struct iphdr *iph; 852ee34c1ebSMichal Schmidt int hlen = LL_MAX_HEADER; 853ee34c1ebSMichal Schmidt int mtu = ETH_DATA_LEN; 854ee34c1ebSMichal Schmidt int addend = sizeof(struct iphdr) + 4; 855ee34c1ebSMichal Schmidt 856ee34c1ebSMichal Schmidt tunnel = netdev_priv(dev); 857ee34c1ebSMichal Schmidt iph = &tunnel->parms.iph; 858ee34c1ebSMichal Schmidt 859c95b819aSHerbert Xu /* Guess output device to choose reasonable mtu and needed_headroom */ 860ee34c1ebSMichal Schmidt 861ee34c1ebSMichal Schmidt if (iph->daddr) { 862ee34c1ebSMichal Schmidt struct flowi fl = { .oif = tunnel->parms.link, 863ee34c1ebSMichal Schmidt .nl_u = { .ip4_u = 864ee34c1ebSMichal Schmidt { .daddr = iph->daddr, 865ee34c1ebSMichal Schmidt .saddr = iph->saddr, 866ee34c1ebSMichal Schmidt .tos = RT_TOS(iph->tos) } }, 867ee34c1ebSMichal Schmidt .proto = IPPROTO_GRE }; 868ee34c1ebSMichal Schmidt struct rtable *rt; 86996635522SPavel Emelyanov if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { 870ee34c1ebSMichal Schmidt tdev = rt->u.dst.dev; 871ee34c1ebSMichal Schmidt ip_rt_put(rt); 872ee34c1ebSMichal Schmidt } 873e1a80002SHerbert Xu 874e1a80002SHerbert Xu if (dev->type != ARPHRD_ETHER) 875ee34c1ebSMichal Schmidt dev->flags |= IFF_POINTOPOINT; 876ee34c1ebSMichal Schmidt } 877ee34c1ebSMichal Schmidt 878ee34c1ebSMichal Schmidt if (!tdev && tunnel->parms.link) 87996635522SPavel Emelyanov tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); 880ee34c1ebSMichal Schmidt 881ee34c1ebSMichal Schmidt if (tdev) { 882c95b819aSHerbert Xu hlen = tdev->hard_header_len + tdev->needed_headroom; 883ee34c1ebSMichal Schmidt mtu = tdev->mtu; 884ee34c1ebSMichal Schmidt } 885ee34c1ebSMichal Schmidt dev->iflink = tunnel->parms.link; 886ee34c1ebSMichal Schmidt 887ee34c1ebSMichal Schmidt /* Precalculate GRE options length */ 888ee34c1ebSMichal Schmidt if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) { 889ee34c1ebSMichal Schmidt if (tunnel->parms.o_flags&GRE_CSUM) 890ee34c1ebSMichal Schmidt addend += 4; 891ee34c1ebSMichal Schmidt if (tunnel->parms.o_flags&GRE_KEY) 892ee34c1ebSMichal Schmidt addend += 4; 893ee34c1ebSMichal Schmidt if (tunnel->parms.o_flags&GRE_SEQ) 894ee34c1ebSMichal Schmidt addend += 4; 895ee34c1ebSMichal Schmidt } 896c95b819aSHerbert Xu dev->needed_headroom = addend + hlen; 89742aa9162SHerbert Xu mtu -= dev->hard_header_len - addend; 89842aa9162SHerbert Xu 89942aa9162SHerbert Xu if (mtu < 68) 90042aa9162SHerbert Xu mtu = 68; 90142aa9162SHerbert Xu 902ee34c1ebSMichal Schmidt tunnel->hlen = addend; 903ee34c1ebSMichal Schmidt 90442aa9162SHerbert Xu return mtu; 905ee34c1ebSMichal Schmidt } 906ee34c1ebSMichal Schmidt 9071da177e4SLinus Torvalds static int 9081da177e4SLinus Torvalds ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) 9091da177e4SLinus Torvalds { 9101da177e4SLinus Torvalds int err = 0; 9111da177e4SLinus Torvalds struct ip_tunnel_parm p; 9121da177e4SLinus Torvalds struct ip_tunnel *t; 913f57e7d5aSPavel Emelyanov struct net *net = dev_net(dev); 914f57e7d5aSPavel Emelyanov struct ipgre_net *ign = net_generic(net, ipgre_net_id); 9151da177e4SLinus Torvalds 9161da177e4SLinus Torvalds switch (cmd) { 9171da177e4SLinus Torvalds case SIOCGETTUNNEL: 9181da177e4SLinus Torvalds t = NULL; 9197daa0004SPavel Emelyanov if (dev == ign->fb_tunnel_dev) { 9201da177e4SLinus Torvalds if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { 9211da177e4SLinus Torvalds err = -EFAULT; 9221da177e4SLinus Torvalds break; 9231da177e4SLinus Torvalds } 924f57e7d5aSPavel Emelyanov t = ipgre_tunnel_locate(net, &p, 0); 9251da177e4SLinus Torvalds } 9261da177e4SLinus Torvalds if (t == NULL) 9272941a486SPatrick McHardy t = netdev_priv(dev); 9281da177e4SLinus Torvalds memcpy(&p, &t->parms, sizeof(p)); 9291da177e4SLinus Torvalds if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) 9301da177e4SLinus Torvalds err = -EFAULT; 9311da177e4SLinus Torvalds break; 9321da177e4SLinus Torvalds 9331da177e4SLinus Torvalds case SIOCADDTUNNEL: 9341da177e4SLinus Torvalds case SIOCCHGTUNNEL: 9351da177e4SLinus Torvalds err = -EPERM; 9361da177e4SLinus Torvalds if (!capable(CAP_NET_ADMIN)) 9371da177e4SLinus Torvalds goto done; 9381da177e4SLinus Torvalds 9391da177e4SLinus Torvalds err = -EFAULT; 9401da177e4SLinus Torvalds if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 9411da177e4SLinus Torvalds goto done; 9421da177e4SLinus Torvalds 9431da177e4SLinus Torvalds err = -EINVAL; 9441da177e4SLinus Torvalds if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || 9451da177e4SLinus Torvalds p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || 9461da177e4SLinus Torvalds ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) 9471da177e4SLinus Torvalds goto done; 9481da177e4SLinus Torvalds if (p.iph.ttl) 9491da177e4SLinus Torvalds p.iph.frag_off |= htons(IP_DF); 9501da177e4SLinus Torvalds 9511da177e4SLinus Torvalds if (!(p.i_flags&GRE_KEY)) 9521da177e4SLinus Torvalds p.i_key = 0; 9531da177e4SLinus Torvalds if (!(p.o_flags&GRE_KEY)) 9541da177e4SLinus Torvalds p.o_key = 0; 9551da177e4SLinus Torvalds 956f57e7d5aSPavel Emelyanov t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); 9571da177e4SLinus Torvalds 9587daa0004SPavel Emelyanov if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { 9591da177e4SLinus Torvalds if (t != NULL) { 9601da177e4SLinus Torvalds if (t->dev != dev) { 9611da177e4SLinus Torvalds err = -EEXIST; 9621da177e4SLinus Torvalds break; 9631da177e4SLinus Torvalds } 9641da177e4SLinus Torvalds } else { 9651da177e4SLinus Torvalds unsigned nflags=0; 9661da177e4SLinus Torvalds 9672941a486SPatrick McHardy t = netdev_priv(dev); 9681da177e4SLinus Torvalds 969f97c1e0cSJoe Perches if (ipv4_is_multicast(p.iph.daddr)) 9701da177e4SLinus Torvalds nflags = IFF_BROADCAST; 9711da177e4SLinus Torvalds else if (p.iph.daddr) 9721da177e4SLinus Torvalds nflags = IFF_POINTOPOINT; 9731da177e4SLinus Torvalds 9741da177e4SLinus Torvalds if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { 9751da177e4SLinus Torvalds err = -EINVAL; 9761da177e4SLinus Torvalds break; 9771da177e4SLinus Torvalds } 978f57e7d5aSPavel Emelyanov ipgre_tunnel_unlink(ign, t); 9791da177e4SLinus Torvalds t->parms.iph.saddr = p.iph.saddr; 9801da177e4SLinus Torvalds t->parms.iph.daddr = p.iph.daddr; 9811da177e4SLinus Torvalds t->parms.i_key = p.i_key; 9821da177e4SLinus Torvalds t->parms.o_key = p.o_key; 9831da177e4SLinus Torvalds memcpy(dev->dev_addr, &p.iph.saddr, 4); 9841da177e4SLinus Torvalds memcpy(dev->broadcast, &p.iph.daddr, 4); 985f57e7d5aSPavel Emelyanov ipgre_tunnel_link(ign, t); 9861da177e4SLinus Torvalds netdev_state_change(dev); 9871da177e4SLinus Torvalds } 9881da177e4SLinus Torvalds } 9891da177e4SLinus Torvalds 9901da177e4SLinus Torvalds if (t) { 9911da177e4SLinus Torvalds err = 0; 9921da177e4SLinus Torvalds if (cmd == SIOCCHGTUNNEL) { 9931da177e4SLinus Torvalds t->parms.iph.ttl = p.iph.ttl; 9941da177e4SLinus Torvalds t->parms.iph.tos = p.iph.tos; 9951da177e4SLinus Torvalds t->parms.iph.frag_off = p.iph.frag_off; 996ee34c1ebSMichal Schmidt if (t->parms.link != p.link) { 997ee34c1ebSMichal Schmidt t->parms.link = p.link; 99842aa9162SHerbert Xu dev->mtu = ipgre_tunnel_bind_dev(dev); 999ee34c1ebSMichal Schmidt netdev_state_change(dev); 1000ee34c1ebSMichal Schmidt } 10011da177e4SLinus Torvalds } 10021da177e4SLinus Torvalds if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) 10031da177e4SLinus Torvalds err = -EFAULT; 10041da177e4SLinus Torvalds } else 10051da177e4SLinus Torvalds err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); 10061da177e4SLinus Torvalds break; 10071da177e4SLinus Torvalds 10081da177e4SLinus Torvalds case SIOCDELTUNNEL: 10091da177e4SLinus Torvalds err = -EPERM; 10101da177e4SLinus Torvalds if (!capable(CAP_NET_ADMIN)) 10111da177e4SLinus Torvalds goto done; 10121da177e4SLinus Torvalds 10137daa0004SPavel Emelyanov if (dev == ign->fb_tunnel_dev) { 10141da177e4SLinus Torvalds err = -EFAULT; 10151da177e4SLinus Torvalds if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 10161da177e4SLinus Torvalds goto done; 10171da177e4SLinus Torvalds err = -ENOENT; 1018f57e7d5aSPavel Emelyanov if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL) 10191da177e4SLinus Torvalds goto done; 10201da177e4SLinus Torvalds err = -EPERM; 10217daa0004SPavel Emelyanov if (t == netdev_priv(ign->fb_tunnel_dev)) 10221da177e4SLinus Torvalds goto done; 10231da177e4SLinus Torvalds dev = t->dev; 10241da177e4SLinus Torvalds } 102522f8cde5SStephen Hemminger unregister_netdevice(dev); 102622f8cde5SStephen Hemminger err = 0; 10271da177e4SLinus Torvalds break; 10281da177e4SLinus Torvalds 10291da177e4SLinus Torvalds default: 10301da177e4SLinus Torvalds err = -EINVAL; 10311da177e4SLinus Torvalds } 10321da177e4SLinus Torvalds 10331da177e4SLinus Torvalds done: 10341da177e4SLinus Torvalds return err; 10351da177e4SLinus Torvalds } 10361da177e4SLinus Torvalds 10371da177e4SLinus Torvalds static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) 10381da177e4SLinus Torvalds { 10392941a486SPatrick McHardy struct ip_tunnel *tunnel = netdev_priv(dev); 1040c95b819aSHerbert Xu if (new_mtu < 68 || 1041c95b819aSHerbert Xu new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen) 10421da177e4SLinus Torvalds return -EINVAL; 10431da177e4SLinus Torvalds dev->mtu = new_mtu; 10441da177e4SLinus Torvalds return 0; 10451da177e4SLinus Torvalds } 10461da177e4SLinus Torvalds 10471da177e4SLinus Torvalds /* Nice toy. Unfortunately, useless in real life :-) 10481da177e4SLinus Torvalds It allows to construct virtual multiprotocol broadcast "LAN" 10491da177e4SLinus Torvalds over the Internet, provided multicast routing is tuned. 10501da177e4SLinus Torvalds 10511da177e4SLinus Torvalds 10521da177e4SLinus Torvalds I have no idea was this bicycle invented before me, 10531da177e4SLinus Torvalds so that I had to set ARPHRD_IPGRE to a random value. 10541da177e4SLinus Torvalds I have an impression, that Cisco could make something similar, 10551da177e4SLinus Torvalds but this feature is apparently missing in IOS<=11.2(8). 10561da177e4SLinus Torvalds 10571da177e4SLinus Torvalds I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks 10581da177e4SLinus Torvalds with broadcast 224.66.66.66. If you have access to mbone, play with me :-) 10591da177e4SLinus Torvalds 10601da177e4SLinus Torvalds ping -t 255 224.66.66.66 10611da177e4SLinus Torvalds 10621da177e4SLinus Torvalds If nobody answers, mbone does not work. 10631da177e4SLinus Torvalds 10641da177e4SLinus Torvalds ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255 10651da177e4SLinus Torvalds ip addr add 10.66.66.<somewhat>/24 dev Universe 10661da177e4SLinus Torvalds ifconfig Universe up 10671da177e4SLinus Torvalds ifconfig Universe add fe80::<Your_real_addr>/10 10681da177e4SLinus Torvalds ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96 10691da177e4SLinus Torvalds ftp 10.66.66.66 10701da177e4SLinus Torvalds ... 10711da177e4SLinus Torvalds ftp fec0:6666:6666::193.233.7.65 10721da177e4SLinus Torvalds ... 10731da177e4SLinus Torvalds 10741da177e4SLinus Torvalds */ 10751da177e4SLinus Torvalds 10763b04dddeSStephen Hemminger static int ipgre_header(struct sk_buff *skb, struct net_device *dev, 10773b04dddeSStephen Hemminger unsigned short type, 10783b04dddeSStephen Hemminger const void *daddr, const void *saddr, unsigned len) 10791da177e4SLinus Torvalds { 10802941a486SPatrick McHardy struct ip_tunnel *t = netdev_priv(dev); 10811da177e4SLinus Torvalds struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); 1082d5a0a1e3SAl Viro __be16 *p = (__be16*)(iph+1); 10831da177e4SLinus Torvalds 10841da177e4SLinus Torvalds memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); 10851da177e4SLinus Torvalds p[0] = t->parms.o_flags; 10861da177e4SLinus Torvalds p[1] = htons(type); 10871da177e4SLinus Torvalds 10881da177e4SLinus Torvalds /* 10891da177e4SLinus Torvalds * Set the source hardware address. 10901da177e4SLinus Torvalds */ 10911da177e4SLinus Torvalds 10921da177e4SLinus Torvalds if (saddr) 10931da177e4SLinus Torvalds memcpy(&iph->saddr, saddr, 4); 10941da177e4SLinus Torvalds 10951da177e4SLinus Torvalds if (daddr) { 10961da177e4SLinus Torvalds memcpy(&iph->daddr, daddr, 4); 10971da177e4SLinus Torvalds return t->hlen; 10981da177e4SLinus Torvalds } 1099f97c1e0cSJoe Perches if (iph->daddr && !ipv4_is_multicast(iph->daddr)) 11001da177e4SLinus Torvalds return t->hlen; 11011da177e4SLinus Torvalds 11021da177e4SLinus Torvalds return -t->hlen; 11031da177e4SLinus Torvalds } 11041da177e4SLinus Torvalds 11056a5f44d7STimo Teras static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) 11066a5f44d7STimo Teras { 11076a5f44d7STimo Teras struct iphdr *iph = (struct iphdr*) skb_mac_header(skb); 11086a5f44d7STimo Teras memcpy(haddr, &iph->saddr, 4); 11096a5f44d7STimo Teras return 4; 11106a5f44d7STimo Teras } 11116a5f44d7STimo Teras 11123b04dddeSStephen Hemminger static const struct header_ops ipgre_header_ops = { 11133b04dddeSStephen Hemminger .create = ipgre_header, 11146a5f44d7STimo Teras .parse = ipgre_header_parse, 11153b04dddeSStephen Hemminger }; 11163b04dddeSStephen Hemminger 11176a5f44d7STimo Teras #ifdef CONFIG_NET_IPGRE_BROADCAST 11181da177e4SLinus Torvalds static int ipgre_open(struct net_device *dev) 11191da177e4SLinus Torvalds { 11202941a486SPatrick McHardy struct ip_tunnel *t = netdev_priv(dev); 11211da177e4SLinus Torvalds 1122f97c1e0cSJoe Perches if (ipv4_is_multicast(t->parms.iph.daddr)) { 11231da177e4SLinus Torvalds struct flowi fl = { .oif = t->parms.link, 11241da177e4SLinus Torvalds .nl_u = { .ip4_u = 11251da177e4SLinus Torvalds { .daddr = t->parms.iph.daddr, 11261da177e4SLinus Torvalds .saddr = t->parms.iph.saddr, 11271da177e4SLinus Torvalds .tos = RT_TOS(t->parms.iph.tos) } }, 11281da177e4SLinus Torvalds .proto = IPPROTO_GRE }; 11291da177e4SLinus Torvalds struct rtable *rt; 113096635522SPavel Emelyanov if (ip_route_output_key(dev_net(dev), &rt, &fl)) 11311da177e4SLinus Torvalds return -EADDRNOTAVAIL; 11321da177e4SLinus Torvalds dev = rt->u.dst.dev; 11331da177e4SLinus Torvalds ip_rt_put(rt); 1134e5ed6399SHerbert Xu if (__in_dev_get_rtnl(dev) == NULL) 11351da177e4SLinus Torvalds return -EADDRNOTAVAIL; 11361da177e4SLinus Torvalds t->mlink = dev->ifindex; 1137e5ed6399SHerbert Xu ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr); 11381da177e4SLinus Torvalds } 11391da177e4SLinus Torvalds return 0; 11401da177e4SLinus Torvalds } 11411da177e4SLinus Torvalds 11421da177e4SLinus Torvalds static int ipgre_close(struct net_device *dev) 11431da177e4SLinus Torvalds { 11442941a486SPatrick McHardy struct ip_tunnel *t = netdev_priv(dev); 1145f97c1e0cSJoe Perches if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { 11467fee0ca2SDenis V. Lunev struct in_device *in_dev; 1147c346dca1SYOSHIFUJI Hideaki in_dev = inetdev_by_index(dev_net(dev), t->mlink); 11481da177e4SLinus Torvalds if (in_dev) { 11491da177e4SLinus Torvalds ip_mc_dec_group(in_dev, t->parms.iph.daddr); 11501da177e4SLinus Torvalds in_dev_put(in_dev); 11511da177e4SLinus Torvalds } 11521da177e4SLinus Torvalds } 11531da177e4SLinus Torvalds return 0; 11541da177e4SLinus Torvalds } 11551da177e4SLinus Torvalds 11561da177e4SLinus Torvalds #endif 11571da177e4SLinus Torvalds 11581da177e4SLinus Torvalds static void ipgre_tunnel_setup(struct net_device *dev) 11591da177e4SLinus Torvalds { 1160c19e654dSHerbert Xu dev->init = ipgre_tunnel_init; 11611da177e4SLinus Torvalds dev->uninit = ipgre_tunnel_uninit; 11621da177e4SLinus Torvalds dev->destructor = free_netdev; 11631da177e4SLinus Torvalds dev->hard_start_xmit = ipgre_tunnel_xmit; 11641da177e4SLinus Torvalds dev->do_ioctl = ipgre_tunnel_ioctl; 11651da177e4SLinus Torvalds dev->change_mtu = ipgre_tunnel_change_mtu; 11661da177e4SLinus Torvalds 11671da177e4SLinus Torvalds dev->type = ARPHRD_IPGRE; 1168c95b819aSHerbert Xu dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; 116946f25dffSKris Katterjohn dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4; 11701da177e4SLinus Torvalds dev->flags = IFF_NOARP; 11711da177e4SLinus Torvalds dev->iflink = 0; 11721da177e4SLinus Torvalds dev->addr_len = 4; 11730b67ecebSPavel Emelyanov dev->features |= NETIF_F_NETNS_LOCAL; 11741da177e4SLinus Torvalds } 11751da177e4SLinus Torvalds 11761da177e4SLinus Torvalds static int ipgre_tunnel_init(struct net_device *dev) 11771da177e4SLinus Torvalds { 11781da177e4SLinus Torvalds struct ip_tunnel *tunnel; 11791da177e4SLinus Torvalds struct iphdr *iph; 11801da177e4SLinus Torvalds 11812941a486SPatrick McHardy tunnel = netdev_priv(dev); 11821da177e4SLinus Torvalds iph = &tunnel->parms.iph; 11831da177e4SLinus Torvalds 11841da177e4SLinus Torvalds tunnel->dev = dev; 11851da177e4SLinus Torvalds strcpy(tunnel->parms.name, dev->name); 11861da177e4SLinus Torvalds 11871da177e4SLinus Torvalds memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); 11881da177e4SLinus Torvalds memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 11891da177e4SLinus Torvalds 11901da177e4SLinus Torvalds if (iph->daddr) { 11911da177e4SLinus Torvalds #ifdef CONFIG_NET_IPGRE_BROADCAST 1192f97c1e0cSJoe Perches if (ipv4_is_multicast(iph->daddr)) { 11931da177e4SLinus Torvalds if (!iph->saddr) 11941da177e4SLinus Torvalds return -EINVAL; 11951da177e4SLinus Torvalds dev->flags = IFF_BROADCAST; 11963b04dddeSStephen Hemminger dev->header_ops = &ipgre_header_ops; 11971da177e4SLinus Torvalds dev->open = ipgre_open; 11981da177e4SLinus Torvalds dev->stop = ipgre_close; 11991da177e4SLinus Torvalds } 12001da177e4SLinus Torvalds #endif 1201ee34c1ebSMichal Schmidt } else 12026a5f44d7STimo Teras dev->header_ops = &ipgre_header_ops; 12031da177e4SLinus Torvalds 12041da177e4SLinus Torvalds return 0; 12051da177e4SLinus Torvalds } 12061da177e4SLinus Torvalds 12077daa0004SPavel Emelyanov static int ipgre_fb_tunnel_init(struct net_device *dev) 12081da177e4SLinus Torvalds { 12092941a486SPatrick McHardy struct ip_tunnel *tunnel = netdev_priv(dev); 12101da177e4SLinus Torvalds struct iphdr *iph = &tunnel->parms.iph; 1211eb8ce741SPavel Emelyanov struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id); 12121da177e4SLinus Torvalds 12131da177e4SLinus Torvalds tunnel->dev = dev; 12141da177e4SLinus Torvalds strcpy(tunnel->parms.name, dev->name); 12151da177e4SLinus Torvalds 12161da177e4SLinus Torvalds iph->version = 4; 12171da177e4SLinus Torvalds iph->protocol = IPPROTO_GRE; 12181da177e4SLinus Torvalds iph->ihl = 5; 12191da177e4SLinus Torvalds tunnel->hlen = sizeof(struct iphdr) + 4; 12201da177e4SLinus Torvalds 12211da177e4SLinus Torvalds dev_hold(dev); 1222eb8ce741SPavel Emelyanov ign->tunnels_wc[0] = tunnel; 12231da177e4SLinus Torvalds return 0; 12241da177e4SLinus Torvalds } 12251da177e4SLinus Torvalds 12261da177e4SLinus Torvalds 12271da177e4SLinus Torvalds static struct net_protocol ipgre_protocol = { 12281da177e4SLinus Torvalds .handler = ipgre_rcv, 12291da177e4SLinus Torvalds .err_handler = ipgre_err, 1230f96c148fSPavel Emelyanov .netns_ok = 1, 12311da177e4SLinus Torvalds }; 12321da177e4SLinus Torvalds 1233eb8ce741SPavel Emelyanov static void ipgre_destroy_tunnels(struct ipgre_net *ign) 1234eb8ce741SPavel Emelyanov { 1235eb8ce741SPavel Emelyanov int prio; 1236eb8ce741SPavel Emelyanov 1237eb8ce741SPavel Emelyanov for (prio = 0; prio < 4; prio++) { 1238eb8ce741SPavel Emelyanov int h; 1239eb8ce741SPavel Emelyanov for (h = 0; h < HASH_SIZE; h++) { 1240eb8ce741SPavel Emelyanov struct ip_tunnel *t; 1241eb8ce741SPavel Emelyanov while ((t = ign->tunnels[prio][h]) != NULL) 1242eb8ce741SPavel Emelyanov unregister_netdevice(t->dev); 1243eb8ce741SPavel Emelyanov } 1244eb8ce741SPavel Emelyanov } 1245eb8ce741SPavel Emelyanov } 1246eb8ce741SPavel Emelyanov 124759a4c759SPavel Emelyanov static int ipgre_init_net(struct net *net) 124859a4c759SPavel Emelyanov { 124959a4c759SPavel Emelyanov int err; 125059a4c759SPavel Emelyanov struct ipgre_net *ign; 125159a4c759SPavel Emelyanov 125259a4c759SPavel Emelyanov err = -ENOMEM; 1253eb8ce741SPavel Emelyanov ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL); 125459a4c759SPavel Emelyanov if (ign == NULL) 125559a4c759SPavel Emelyanov goto err_alloc; 125659a4c759SPavel Emelyanov 125759a4c759SPavel Emelyanov err = net_assign_generic(net, ipgre_net_id, ign); 125859a4c759SPavel Emelyanov if (err < 0) 125959a4c759SPavel Emelyanov goto err_assign; 126059a4c759SPavel Emelyanov 12617daa0004SPavel Emelyanov ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0", 12627daa0004SPavel Emelyanov ipgre_tunnel_setup); 12637daa0004SPavel Emelyanov if (!ign->fb_tunnel_dev) { 12647daa0004SPavel Emelyanov err = -ENOMEM; 12657daa0004SPavel Emelyanov goto err_alloc_dev; 12667daa0004SPavel Emelyanov } 12677daa0004SPavel Emelyanov 12687daa0004SPavel Emelyanov ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init; 12697daa0004SPavel Emelyanov dev_net_set(ign->fb_tunnel_dev, net); 1270c19e654dSHerbert Xu ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops; 12717daa0004SPavel Emelyanov 12727daa0004SPavel Emelyanov if ((err = register_netdev(ign->fb_tunnel_dev))) 12737daa0004SPavel Emelyanov goto err_reg_dev; 12747daa0004SPavel Emelyanov 127559a4c759SPavel Emelyanov return 0; 127659a4c759SPavel Emelyanov 12777daa0004SPavel Emelyanov err_reg_dev: 12787daa0004SPavel Emelyanov free_netdev(ign->fb_tunnel_dev); 12797daa0004SPavel Emelyanov err_alloc_dev: 12807daa0004SPavel Emelyanov /* nothing */ 128159a4c759SPavel Emelyanov err_assign: 128259a4c759SPavel Emelyanov kfree(ign); 128359a4c759SPavel Emelyanov err_alloc: 128459a4c759SPavel Emelyanov return err; 128559a4c759SPavel Emelyanov } 128659a4c759SPavel Emelyanov 128759a4c759SPavel Emelyanov static void ipgre_exit_net(struct net *net) 128859a4c759SPavel Emelyanov { 128959a4c759SPavel Emelyanov struct ipgre_net *ign; 129059a4c759SPavel Emelyanov 129159a4c759SPavel Emelyanov ign = net_generic(net, ipgre_net_id); 12927daa0004SPavel Emelyanov rtnl_lock(); 1293eb8ce741SPavel Emelyanov ipgre_destroy_tunnels(ign); 12947daa0004SPavel Emelyanov rtnl_unlock(); 129559a4c759SPavel Emelyanov kfree(ign); 129659a4c759SPavel Emelyanov } 129759a4c759SPavel Emelyanov 129859a4c759SPavel Emelyanov static struct pernet_operations ipgre_net_ops = { 129959a4c759SPavel Emelyanov .init = ipgre_init_net, 130059a4c759SPavel Emelyanov .exit = ipgre_exit_net, 130159a4c759SPavel Emelyanov }; 13021da177e4SLinus Torvalds 1303c19e654dSHerbert Xu static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) 1304c19e654dSHerbert Xu { 1305c19e654dSHerbert Xu __be16 flags; 1306c19e654dSHerbert Xu 1307c19e654dSHerbert Xu if (!data) 1308c19e654dSHerbert Xu return 0; 1309c19e654dSHerbert Xu 1310c19e654dSHerbert Xu flags = 0; 1311c19e654dSHerbert Xu if (data[IFLA_GRE_IFLAGS]) 1312c19e654dSHerbert Xu flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); 1313c19e654dSHerbert Xu if (data[IFLA_GRE_OFLAGS]) 1314c19e654dSHerbert Xu flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); 1315c19e654dSHerbert Xu if (flags & (GRE_VERSION|GRE_ROUTING)) 1316c19e654dSHerbert Xu return -EINVAL; 1317c19e654dSHerbert Xu 1318c19e654dSHerbert Xu return 0; 1319c19e654dSHerbert Xu } 1320c19e654dSHerbert Xu 1321e1a80002SHerbert Xu static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[]) 1322e1a80002SHerbert Xu { 1323e1a80002SHerbert Xu __be32 daddr; 1324e1a80002SHerbert Xu 1325e1a80002SHerbert Xu if (tb[IFLA_ADDRESS]) { 1326e1a80002SHerbert Xu if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) 1327e1a80002SHerbert Xu return -EINVAL; 1328e1a80002SHerbert Xu if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) 1329e1a80002SHerbert Xu return -EADDRNOTAVAIL; 1330e1a80002SHerbert Xu } 1331e1a80002SHerbert Xu 1332e1a80002SHerbert Xu if (!data) 1333e1a80002SHerbert Xu goto out; 1334e1a80002SHerbert Xu 1335e1a80002SHerbert Xu if (data[IFLA_GRE_REMOTE]) { 1336e1a80002SHerbert Xu memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4); 1337e1a80002SHerbert Xu if (!daddr) 1338e1a80002SHerbert Xu return -EINVAL; 1339e1a80002SHerbert Xu } 1340e1a80002SHerbert Xu 1341e1a80002SHerbert Xu out: 1342e1a80002SHerbert Xu return ipgre_tunnel_validate(tb, data); 1343e1a80002SHerbert Xu } 1344e1a80002SHerbert Xu 1345c19e654dSHerbert Xu static void ipgre_netlink_parms(struct nlattr *data[], 1346c19e654dSHerbert Xu struct ip_tunnel_parm *parms) 1347c19e654dSHerbert Xu { 1348c19e654dSHerbert Xu memset(parms, 0, sizeof(parms)); 1349c19e654dSHerbert Xu 1350c19e654dSHerbert Xu parms->iph.protocol = IPPROTO_GRE; 1351c19e654dSHerbert Xu 1352c19e654dSHerbert Xu if (!data) 1353c19e654dSHerbert Xu return; 1354c19e654dSHerbert Xu 1355c19e654dSHerbert Xu if (data[IFLA_GRE_LINK]) 1356c19e654dSHerbert Xu parms->link = nla_get_u32(data[IFLA_GRE_LINK]); 1357c19e654dSHerbert Xu 1358c19e654dSHerbert Xu if (data[IFLA_GRE_IFLAGS]) 1359c19e654dSHerbert Xu parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]); 1360c19e654dSHerbert Xu 1361c19e654dSHerbert Xu if (data[IFLA_GRE_OFLAGS]) 1362c19e654dSHerbert Xu parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]); 1363c19e654dSHerbert Xu 1364c19e654dSHerbert Xu if (data[IFLA_GRE_IKEY]) 1365c19e654dSHerbert Xu parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); 1366c19e654dSHerbert Xu 1367c19e654dSHerbert Xu if (data[IFLA_GRE_OKEY]) 1368c19e654dSHerbert Xu parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]); 1369c19e654dSHerbert Xu 1370c19e654dSHerbert Xu if (data[IFLA_GRE_LOCAL]) 1371*4d74f8baSPatrick McHardy parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]); 1372c19e654dSHerbert Xu 1373c19e654dSHerbert Xu if (data[IFLA_GRE_REMOTE]) 1374*4d74f8baSPatrick McHardy parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]); 1375c19e654dSHerbert Xu 1376c19e654dSHerbert Xu if (data[IFLA_GRE_TTL]) 1377c19e654dSHerbert Xu parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]); 1378c19e654dSHerbert Xu 1379c19e654dSHerbert Xu if (data[IFLA_GRE_TOS]) 1380c19e654dSHerbert Xu parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]); 1381c19e654dSHerbert Xu 1382c19e654dSHerbert Xu if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) 1383c19e654dSHerbert Xu parms->iph.frag_off = htons(IP_DF); 1384c19e654dSHerbert Xu } 1385c19e654dSHerbert Xu 1386e1a80002SHerbert Xu static int ipgre_tap_init(struct net_device *dev) 1387e1a80002SHerbert Xu { 1388e1a80002SHerbert Xu struct ip_tunnel *tunnel; 1389e1a80002SHerbert Xu 1390e1a80002SHerbert Xu tunnel = netdev_priv(dev); 1391e1a80002SHerbert Xu 1392e1a80002SHerbert Xu tunnel->dev = dev; 1393e1a80002SHerbert Xu strcpy(tunnel->parms.name, dev->name); 1394e1a80002SHerbert Xu 1395e1a80002SHerbert Xu ipgre_tunnel_bind_dev(dev); 1396e1a80002SHerbert Xu 1397e1a80002SHerbert Xu return 0; 1398e1a80002SHerbert Xu } 1399e1a80002SHerbert Xu 1400e1a80002SHerbert Xu static void ipgre_tap_setup(struct net_device *dev) 1401e1a80002SHerbert Xu { 1402e1a80002SHerbert Xu 1403e1a80002SHerbert Xu ether_setup(dev); 1404e1a80002SHerbert Xu 1405e1a80002SHerbert Xu dev->init = ipgre_tap_init; 1406e1a80002SHerbert Xu dev->uninit = ipgre_tunnel_uninit; 1407e1a80002SHerbert Xu dev->destructor = free_netdev; 1408e1a80002SHerbert Xu dev->hard_start_xmit = ipgre_tunnel_xmit; 1409e1a80002SHerbert Xu dev->change_mtu = ipgre_tunnel_change_mtu; 1410e1a80002SHerbert Xu 1411e1a80002SHerbert Xu dev->iflink = 0; 1412e1a80002SHerbert Xu dev->features |= NETIF_F_NETNS_LOCAL; 1413e1a80002SHerbert Xu } 1414e1a80002SHerbert Xu 1415c19e654dSHerbert Xu static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[], 1416c19e654dSHerbert Xu struct nlattr *data[]) 1417c19e654dSHerbert Xu { 1418c19e654dSHerbert Xu struct ip_tunnel *nt; 1419c19e654dSHerbert Xu struct net *net = dev_net(dev); 1420c19e654dSHerbert Xu struct ipgre_net *ign = net_generic(net, ipgre_net_id); 1421c19e654dSHerbert Xu int mtu; 1422c19e654dSHerbert Xu int err; 1423c19e654dSHerbert Xu 1424c19e654dSHerbert Xu nt = netdev_priv(dev); 1425c19e654dSHerbert Xu ipgre_netlink_parms(data, &nt->parms); 1426c19e654dSHerbert Xu 1427e1a80002SHerbert Xu if (ipgre_tunnel_find(net, &nt->parms, dev->type)) 1428c19e654dSHerbert Xu return -EEXIST; 1429c19e654dSHerbert Xu 1430e1a80002SHerbert Xu if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) 1431e1a80002SHerbert Xu random_ether_addr(dev->dev_addr); 1432e1a80002SHerbert Xu 1433c19e654dSHerbert Xu mtu = ipgre_tunnel_bind_dev(dev); 1434c19e654dSHerbert Xu if (!tb[IFLA_MTU]) 1435c19e654dSHerbert Xu dev->mtu = mtu; 1436c19e654dSHerbert Xu 1437c19e654dSHerbert Xu err = register_netdevice(dev); 1438c19e654dSHerbert Xu if (err) 1439c19e654dSHerbert Xu goto out; 1440c19e654dSHerbert Xu 1441c19e654dSHerbert Xu dev_hold(dev); 1442c19e654dSHerbert Xu ipgre_tunnel_link(ign, nt); 1443c19e654dSHerbert Xu 1444c19e654dSHerbert Xu out: 1445c19e654dSHerbert Xu return err; 1446c19e654dSHerbert Xu } 1447c19e654dSHerbert Xu 1448c19e654dSHerbert Xu static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], 1449c19e654dSHerbert Xu struct nlattr *data[]) 1450c19e654dSHerbert Xu { 1451c19e654dSHerbert Xu struct ip_tunnel *t, *nt; 1452c19e654dSHerbert Xu struct net *net = dev_net(dev); 1453c19e654dSHerbert Xu struct ipgre_net *ign = net_generic(net, ipgre_net_id); 1454c19e654dSHerbert Xu struct ip_tunnel_parm p; 1455c19e654dSHerbert Xu int mtu; 1456c19e654dSHerbert Xu 1457c19e654dSHerbert Xu if (dev == ign->fb_tunnel_dev) 1458c19e654dSHerbert Xu return -EINVAL; 1459c19e654dSHerbert Xu 1460c19e654dSHerbert Xu nt = netdev_priv(dev); 1461c19e654dSHerbert Xu ipgre_netlink_parms(data, &p); 1462c19e654dSHerbert Xu 1463c19e654dSHerbert Xu t = ipgre_tunnel_locate(net, &p, 0); 1464c19e654dSHerbert Xu 1465c19e654dSHerbert Xu if (t) { 1466c19e654dSHerbert Xu if (t->dev != dev) 1467c19e654dSHerbert Xu return -EEXIST; 1468c19e654dSHerbert Xu } else { 1469c19e654dSHerbert Xu unsigned nflags = 0; 1470c19e654dSHerbert Xu 1471c19e654dSHerbert Xu t = nt; 1472c19e654dSHerbert Xu 1473c19e654dSHerbert Xu if (ipv4_is_multicast(p.iph.daddr)) 1474c19e654dSHerbert Xu nflags = IFF_BROADCAST; 1475c19e654dSHerbert Xu else if (p.iph.daddr) 1476c19e654dSHerbert Xu nflags = IFF_POINTOPOINT; 1477c19e654dSHerbert Xu 1478c19e654dSHerbert Xu if ((dev->flags ^ nflags) & 1479c19e654dSHerbert Xu (IFF_POINTOPOINT | IFF_BROADCAST)) 1480c19e654dSHerbert Xu return -EINVAL; 1481c19e654dSHerbert Xu 1482c19e654dSHerbert Xu ipgre_tunnel_unlink(ign, t); 1483c19e654dSHerbert Xu t->parms.iph.saddr = p.iph.saddr; 1484c19e654dSHerbert Xu t->parms.iph.daddr = p.iph.daddr; 1485c19e654dSHerbert Xu t->parms.i_key = p.i_key; 1486c19e654dSHerbert Xu memcpy(dev->dev_addr, &p.iph.saddr, 4); 1487c19e654dSHerbert Xu memcpy(dev->broadcast, &p.iph.daddr, 4); 1488c19e654dSHerbert Xu ipgre_tunnel_link(ign, t); 1489c19e654dSHerbert Xu netdev_state_change(dev); 1490c19e654dSHerbert Xu } 1491c19e654dSHerbert Xu 1492c19e654dSHerbert Xu t->parms.o_key = p.o_key; 1493c19e654dSHerbert Xu t->parms.iph.ttl = p.iph.ttl; 1494c19e654dSHerbert Xu t->parms.iph.tos = p.iph.tos; 1495c19e654dSHerbert Xu t->parms.iph.frag_off = p.iph.frag_off; 1496c19e654dSHerbert Xu 1497c19e654dSHerbert Xu if (t->parms.link != p.link) { 1498c19e654dSHerbert Xu t->parms.link = p.link; 1499c19e654dSHerbert Xu mtu = ipgre_tunnel_bind_dev(dev); 1500c19e654dSHerbert Xu if (!tb[IFLA_MTU]) 1501c19e654dSHerbert Xu dev->mtu = mtu; 1502c19e654dSHerbert Xu netdev_state_change(dev); 1503c19e654dSHerbert Xu } 1504c19e654dSHerbert Xu 1505c19e654dSHerbert Xu return 0; 1506c19e654dSHerbert Xu } 1507c19e654dSHerbert Xu 1508c19e654dSHerbert Xu static size_t ipgre_get_size(const struct net_device *dev) 1509c19e654dSHerbert Xu { 1510c19e654dSHerbert Xu return 1511c19e654dSHerbert Xu /* IFLA_GRE_LINK */ 1512c19e654dSHerbert Xu nla_total_size(4) + 1513c19e654dSHerbert Xu /* IFLA_GRE_IFLAGS */ 1514c19e654dSHerbert Xu nla_total_size(2) + 1515c19e654dSHerbert Xu /* IFLA_GRE_OFLAGS */ 1516c19e654dSHerbert Xu nla_total_size(2) + 1517c19e654dSHerbert Xu /* IFLA_GRE_IKEY */ 1518c19e654dSHerbert Xu nla_total_size(4) + 1519c19e654dSHerbert Xu /* IFLA_GRE_OKEY */ 1520c19e654dSHerbert Xu nla_total_size(4) + 1521c19e654dSHerbert Xu /* IFLA_GRE_LOCAL */ 1522c19e654dSHerbert Xu nla_total_size(4) + 1523c19e654dSHerbert Xu /* IFLA_GRE_REMOTE */ 1524c19e654dSHerbert Xu nla_total_size(4) + 1525c19e654dSHerbert Xu /* IFLA_GRE_TTL */ 1526c19e654dSHerbert Xu nla_total_size(1) + 1527c19e654dSHerbert Xu /* IFLA_GRE_TOS */ 1528c19e654dSHerbert Xu nla_total_size(1) + 1529c19e654dSHerbert Xu /* IFLA_GRE_PMTUDISC */ 1530c19e654dSHerbert Xu nla_total_size(1) + 1531c19e654dSHerbert Xu 0; 1532c19e654dSHerbert Xu } 1533c19e654dSHerbert Xu 1534c19e654dSHerbert Xu static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) 1535c19e654dSHerbert Xu { 1536c19e654dSHerbert Xu struct ip_tunnel *t = netdev_priv(dev); 1537c19e654dSHerbert Xu struct ip_tunnel_parm *p = &t->parms; 1538c19e654dSHerbert Xu 1539c19e654dSHerbert Xu NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link); 1540c19e654dSHerbert Xu NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags); 1541c19e654dSHerbert Xu NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags); 1542ba9e64b1SPatrick McHardy NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key); 1543ba9e64b1SPatrick McHardy NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key); 1544*4d74f8baSPatrick McHardy NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr); 1545*4d74f8baSPatrick McHardy NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr); 1546c19e654dSHerbert Xu NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl); 1547c19e654dSHerbert Xu NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos); 1548c19e654dSHerbert Xu NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF))); 1549c19e654dSHerbert Xu 1550c19e654dSHerbert Xu return 0; 1551c19e654dSHerbert Xu 1552c19e654dSHerbert Xu nla_put_failure: 1553c19e654dSHerbert Xu return -EMSGSIZE; 1554c19e654dSHerbert Xu } 1555c19e654dSHerbert Xu 1556c19e654dSHerbert Xu static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = { 1557c19e654dSHerbert Xu [IFLA_GRE_LINK] = { .type = NLA_U32 }, 1558c19e654dSHerbert Xu [IFLA_GRE_IFLAGS] = { .type = NLA_U16 }, 1559c19e654dSHerbert Xu [IFLA_GRE_OFLAGS] = { .type = NLA_U16 }, 1560c19e654dSHerbert Xu [IFLA_GRE_IKEY] = { .type = NLA_U32 }, 1561c19e654dSHerbert Xu [IFLA_GRE_OKEY] = { .type = NLA_U32 }, 1562*4d74f8baSPatrick McHardy [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) }, 1563*4d74f8baSPatrick McHardy [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, 1564c19e654dSHerbert Xu [IFLA_GRE_TTL] = { .type = NLA_U8 }, 1565c19e654dSHerbert Xu [IFLA_GRE_TOS] = { .type = NLA_U8 }, 1566c19e654dSHerbert Xu [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 }, 1567c19e654dSHerbert Xu }; 1568c19e654dSHerbert Xu 1569c19e654dSHerbert Xu static struct rtnl_link_ops ipgre_link_ops __read_mostly = { 1570c19e654dSHerbert Xu .kind = "gre", 1571c19e654dSHerbert Xu .maxtype = IFLA_GRE_MAX, 1572c19e654dSHerbert Xu .policy = ipgre_policy, 1573c19e654dSHerbert Xu .priv_size = sizeof(struct ip_tunnel), 1574c19e654dSHerbert Xu .setup = ipgre_tunnel_setup, 1575c19e654dSHerbert Xu .validate = ipgre_tunnel_validate, 1576c19e654dSHerbert Xu .newlink = ipgre_newlink, 1577c19e654dSHerbert Xu .changelink = ipgre_changelink, 1578c19e654dSHerbert Xu .get_size = ipgre_get_size, 1579c19e654dSHerbert Xu .fill_info = ipgre_fill_info, 1580c19e654dSHerbert Xu }; 1581c19e654dSHerbert Xu 1582e1a80002SHerbert Xu static struct rtnl_link_ops ipgre_tap_ops __read_mostly = { 1583e1a80002SHerbert Xu .kind = "gretap", 1584e1a80002SHerbert Xu .maxtype = IFLA_GRE_MAX, 1585e1a80002SHerbert Xu .policy = ipgre_policy, 1586e1a80002SHerbert Xu .priv_size = sizeof(struct ip_tunnel), 1587e1a80002SHerbert Xu .setup = ipgre_tap_setup, 1588e1a80002SHerbert Xu .validate = ipgre_tap_validate, 1589e1a80002SHerbert Xu .newlink = ipgre_newlink, 1590e1a80002SHerbert Xu .changelink = ipgre_changelink, 1591e1a80002SHerbert Xu .get_size = ipgre_get_size, 1592e1a80002SHerbert Xu .fill_info = ipgre_fill_info, 1593e1a80002SHerbert Xu }; 1594e1a80002SHerbert Xu 15951da177e4SLinus Torvalds /* 15961da177e4SLinus Torvalds * And now the modules code and kernel interface. 15971da177e4SLinus Torvalds */ 15981da177e4SLinus Torvalds 15991da177e4SLinus Torvalds static int __init ipgre_init(void) 16001da177e4SLinus Torvalds { 16011da177e4SLinus Torvalds int err; 16021da177e4SLinus Torvalds 16031da177e4SLinus Torvalds printk(KERN_INFO "GRE over IPv4 tunneling driver\n"); 16041da177e4SLinus Torvalds 16051da177e4SLinus Torvalds if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) { 16061da177e4SLinus Torvalds printk(KERN_INFO "ipgre init: can't add protocol\n"); 16071da177e4SLinus Torvalds return -EAGAIN; 16081da177e4SLinus Torvalds } 16091da177e4SLinus Torvalds 161059a4c759SPavel Emelyanov err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops); 161159a4c759SPavel Emelyanov if (err < 0) 1612c19e654dSHerbert Xu goto gen_device_failed; 16137daa0004SPavel Emelyanov 1614c19e654dSHerbert Xu err = rtnl_link_register(&ipgre_link_ops); 1615c19e654dSHerbert Xu if (err < 0) 1616c19e654dSHerbert Xu goto rtnl_link_failed; 1617c19e654dSHerbert Xu 1618e1a80002SHerbert Xu err = rtnl_link_register(&ipgre_tap_ops); 1619e1a80002SHerbert Xu if (err < 0) 1620e1a80002SHerbert Xu goto tap_ops_failed; 1621e1a80002SHerbert Xu 1622c19e654dSHerbert Xu out: 16237daa0004SPavel Emelyanov return err; 1624c19e654dSHerbert Xu 1625e1a80002SHerbert Xu tap_ops_failed: 1626e1a80002SHerbert Xu rtnl_link_unregister(&ipgre_link_ops); 1627c19e654dSHerbert Xu rtnl_link_failed: 1628c19e654dSHerbert Xu unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops); 1629c19e654dSHerbert Xu gen_device_failed: 1630c19e654dSHerbert Xu inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); 1631c19e654dSHerbert Xu goto out; 16321da177e4SLinus Torvalds } 16331da177e4SLinus Torvalds 1634db44575fSAlexey Kuznetsov static void __exit ipgre_fini(void) 16351da177e4SLinus Torvalds { 1636e1a80002SHerbert Xu rtnl_link_unregister(&ipgre_tap_ops); 1637c19e654dSHerbert Xu rtnl_link_unregister(&ipgre_link_ops); 1638c19e654dSHerbert Xu unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops); 16391da177e4SLinus Torvalds if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) 16401da177e4SLinus Torvalds printk(KERN_INFO "ipgre close: can't remove protocol\n"); 16411da177e4SLinus Torvalds } 16421da177e4SLinus Torvalds 16431da177e4SLinus Torvalds module_init(ipgre_init); 16441da177e4SLinus Torvalds module_exit(ipgre_fini); 16451da177e4SLinus Torvalds MODULE_LICENSE("GPL"); 1646*4d74f8baSPatrick McHardy MODULE_ALIAS_RTNL_LINK("gre"); 1647*4d74f8baSPatrick McHardy MODULE_ALIAS_RTNL_LINK("gretap"); 1648