1*1da177e4SLinus Torvalds /* 2*1da177e4SLinus Torvalds * Linux NET3: GRE over IP protocol decoder. 3*1da177e4SLinus Torvalds * 4*1da177e4SLinus Torvalds * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru) 5*1da177e4SLinus Torvalds * 6*1da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 7*1da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 8*1da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 9*1da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 10*1da177e4SLinus Torvalds * 11*1da177e4SLinus Torvalds */ 12*1da177e4SLinus Torvalds 13*1da177e4SLinus Torvalds #include <linux/config.h> 14*1da177e4SLinus Torvalds #include <linux/module.h> 15*1da177e4SLinus Torvalds #include <linux/types.h> 16*1da177e4SLinus Torvalds #include <linux/sched.h> 17*1da177e4SLinus Torvalds #include <linux/kernel.h> 18*1da177e4SLinus Torvalds #include <asm/uaccess.h> 19*1da177e4SLinus Torvalds #include <linux/skbuff.h> 20*1da177e4SLinus Torvalds #include <linux/netdevice.h> 21*1da177e4SLinus Torvalds #include <linux/in.h> 22*1da177e4SLinus Torvalds #include <linux/tcp.h> 23*1da177e4SLinus Torvalds #include <linux/udp.h> 24*1da177e4SLinus Torvalds #include <linux/if_arp.h> 25*1da177e4SLinus Torvalds #include <linux/mroute.h> 26*1da177e4SLinus Torvalds #include <linux/init.h> 27*1da177e4SLinus Torvalds #include <linux/in6.h> 28*1da177e4SLinus Torvalds #include <linux/inetdevice.h> 29*1da177e4SLinus Torvalds #include <linux/igmp.h> 30*1da177e4SLinus Torvalds #include <linux/netfilter_ipv4.h> 31*1da177e4SLinus Torvalds 32*1da177e4SLinus Torvalds #include <net/sock.h> 33*1da177e4SLinus Torvalds #include <net/ip.h> 34*1da177e4SLinus Torvalds #include <net/icmp.h> 35*1da177e4SLinus Torvalds #include <net/protocol.h> 36*1da177e4SLinus Torvalds #include <net/ipip.h> 37*1da177e4SLinus Torvalds #include <net/arp.h> 38*1da177e4SLinus Torvalds #include <net/checksum.h> 39*1da177e4SLinus Torvalds #include <net/dsfield.h> 40*1da177e4SLinus Torvalds #include <net/inet_ecn.h> 41*1da177e4SLinus Torvalds #include <net/xfrm.h> 42*1da177e4SLinus Torvalds 43*1da177e4SLinus Torvalds #ifdef CONFIG_IPV6 44*1da177e4SLinus Torvalds #include <net/ipv6.h> 45*1da177e4SLinus Torvalds #include <net/ip6_fib.h> 46*1da177e4SLinus Torvalds #include <net/ip6_route.h> 47*1da177e4SLinus Torvalds #endif 48*1da177e4SLinus Torvalds 49*1da177e4SLinus Torvalds /* 50*1da177e4SLinus Torvalds Problems & solutions 51*1da177e4SLinus Torvalds -------------------- 52*1da177e4SLinus Torvalds 53*1da177e4SLinus Torvalds 1. The most important issue is detecting local dead loops. 54*1da177e4SLinus Torvalds They would cause complete host lockup in transmit, which 55*1da177e4SLinus Torvalds would be "resolved" by stack overflow or, if queueing is enabled, 56*1da177e4SLinus Torvalds with infinite looping in net_bh. 57*1da177e4SLinus Torvalds 58*1da177e4SLinus Torvalds We cannot track such dead loops during route installation, 59*1da177e4SLinus Torvalds it is infeasible task. The most general solutions would be 60*1da177e4SLinus Torvalds to keep skb->encapsulation counter (sort of local ttl), 61*1da177e4SLinus Torvalds and silently drop packet when it expires. It is the best 62*1da177e4SLinus Torvalds solution, but it supposes maintaing new variable in ALL 63*1da177e4SLinus Torvalds skb, even if no tunneling is used. 64*1da177e4SLinus Torvalds 65*1da177e4SLinus Torvalds Current solution: t->recursion lock breaks dead loops. It looks 66*1da177e4SLinus Torvalds like dev->tbusy flag, but I preferred new variable, because 67*1da177e4SLinus Torvalds the semantics is different. One day, when hard_start_xmit 68*1da177e4SLinus Torvalds will be multithreaded we will have to use skb->encapsulation. 69*1da177e4SLinus Torvalds 70*1da177e4SLinus Torvalds 71*1da177e4SLinus Torvalds 72*1da177e4SLinus Torvalds 2. Networking dead loops would not kill routers, but would really 73*1da177e4SLinus Torvalds kill network. IP hop limit plays role of "t->recursion" in this case, 74*1da177e4SLinus Torvalds if we copy it from packet being encapsulated to upper header. 75*1da177e4SLinus Torvalds It is very good solution, but it introduces two problems: 76*1da177e4SLinus Torvalds 77*1da177e4SLinus Torvalds - Routing protocols, using packets with ttl=1 (OSPF, RIP2), 78*1da177e4SLinus Torvalds do not work over tunnels. 79*1da177e4SLinus Torvalds - traceroute does not work. I planned to relay ICMP from tunnel, 80*1da177e4SLinus Torvalds so that this problem would be solved and traceroute output 81*1da177e4SLinus Torvalds would even more informative. This idea appeared to be wrong: 82*1da177e4SLinus Torvalds only Linux complies to rfc1812 now (yes, guys, Linux is the only 83*1da177e4SLinus Torvalds true router now :-)), all routers (at least, in neighbourhood of mine) 84*1da177e4SLinus Torvalds return only 8 bytes of payload. It is the end. 85*1da177e4SLinus Torvalds 86*1da177e4SLinus Torvalds Hence, if we want that OSPF worked or traceroute said something reasonable, 87*1da177e4SLinus Torvalds we should search for another solution. 88*1da177e4SLinus Torvalds 89*1da177e4SLinus Torvalds One of them is to parse packet trying to detect inner encapsulation 90*1da177e4SLinus Torvalds made by our node. It is difficult or even impossible, especially, 91*1da177e4SLinus Torvalds taking into account fragmentation. TO be short, tt is not solution at all. 92*1da177e4SLinus Torvalds 93*1da177e4SLinus Torvalds Current solution: The solution was UNEXPECTEDLY SIMPLE. 94*1da177e4SLinus Torvalds We force DF flag on tunnels with preconfigured hop limit, 95*1da177e4SLinus Torvalds that is ALL. :-) Well, it does not remove the problem completely, 96*1da177e4SLinus Torvalds but exponential growth of network traffic is changed to linear 97*1da177e4SLinus Torvalds (branches, that exceed pmtu are pruned) and tunnel mtu 98*1da177e4SLinus Torvalds fastly degrades to value <68, where looping stops. 99*1da177e4SLinus Torvalds Yes, it is not good if there exists a router in the loop, 100*1da177e4SLinus Torvalds which does not force DF, even when encapsulating packets have DF set. 101*1da177e4SLinus Torvalds But it is not our problem! Nobody could accuse us, we made 102*1da177e4SLinus Torvalds all that we could make. Even if it is your gated who injected 103*1da177e4SLinus Torvalds fatal route to network, even if it were you who configured 104*1da177e4SLinus Torvalds fatal static route: you are innocent. :-) 105*1da177e4SLinus Torvalds 106*1da177e4SLinus Torvalds 107*1da177e4SLinus Torvalds 108*1da177e4SLinus Torvalds 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain 109*1da177e4SLinus Torvalds practically identical code. It would be good to glue them 110*1da177e4SLinus Torvalds together, but it is not very evident, how to make them modular. 111*1da177e4SLinus Torvalds sit is integral part of IPv6, ipip and gre are naturally modular. 112*1da177e4SLinus Torvalds We could extract common parts (hash table, ioctl etc) 113*1da177e4SLinus Torvalds to a separate module (ip_tunnel.c). 114*1da177e4SLinus Torvalds 115*1da177e4SLinus Torvalds Alexey Kuznetsov. 116*1da177e4SLinus Torvalds */ 117*1da177e4SLinus Torvalds 118*1da177e4SLinus Torvalds static int ipgre_tunnel_init(struct net_device *dev); 119*1da177e4SLinus Torvalds static void ipgre_tunnel_setup(struct net_device *dev); 120*1da177e4SLinus Torvalds 121*1da177e4SLinus Torvalds /* Fallback tunnel: no source, no destination, no key, no options */ 122*1da177e4SLinus Torvalds 123*1da177e4SLinus Torvalds static int ipgre_fb_tunnel_init(struct net_device *dev); 124*1da177e4SLinus Torvalds 125*1da177e4SLinus Torvalds static struct net_device *ipgre_fb_tunnel_dev; 126*1da177e4SLinus Torvalds 127*1da177e4SLinus Torvalds /* Tunnel hash table */ 128*1da177e4SLinus Torvalds 129*1da177e4SLinus Torvalds /* 130*1da177e4SLinus Torvalds 4 hash tables: 131*1da177e4SLinus Torvalds 132*1da177e4SLinus Torvalds 3: (remote,local) 133*1da177e4SLinus Torvalds 2: (remote,*) 134*1da177e4SLinus Torvalds 1: (*,local) 135*1da177e4SLinus Torvalds 0: (*,*) 136*1da177e4SLinus Torvalds 137*1da177e4SLinus Torvalds We require exact key match i.e. if a key is present in packet 138*1da177e4SLinus Torvalds it will match only tunnel with the same key; if it is not present, 139*1da177e4SLinus Torvalds it will match only keyless tunnel. 140*1da177e4SLinus Torvalds 141*1da177e4SLinus Torvalds All keysless packets, if not matched configured keyless tunnels 142*1da177e4SLinus Torvalds will match fallback tunnel. 143*1da177e4SLinus Torvalds */ 144*1da177e4SLinus Torvalds 145*1da177e4SLinus Torvalds #define HASH_SIZE 16 146*1da177e4SLinus Torvalds #define HASH(addr) ((addr^(addr>>4))&0xF) 147*1da177e4SLinus Torvalds 148*1da177e4SLinus Torvalds static struct ip_tunnel *tunnels[4][HASH_SIZE]; 149*1da177e4SLinus Torvalds 150*1da177e4SLinus Torvalds #define tunnels_r_l (tunnels[3]) 151*1da177e4SLinus Torvalds #define tunnels_r (tunnels[2]) 152*1da177e4SLinus Torvalds #define tunnels_l (tunnels[1]) 153*1da177e4SLinus Torvalds #define tunnels_wc (tunnels[0]) 154*1da177e4SLinus Torvalds 155*1da177e4SLinus Torvalds static DEFINE_RWLOCK(ipgre_lock); 156*1da177e4SLinus Torvalds 157*1da177e4SLinus Torvalds /* Given src, dst and key, find appropriate for input tunnel. */ 158*1da177e4SLinus Torvalds 159*1da177e4SLinus Torvalds static struct ip_tunnel * ipgre_tunnel_lookup(u32 remote, u32 local, u32 key) 160*1da177e4SLinus Torvalds { 161*1da177e4SLinus Torvalds unsigned h0 = HASH(remote); 162*1da177e4SLinus Torvalds unsigned h1 = HASH(key); 163*1da177e4SLinus Torvalds struct ip_tunnel *t; 164*1da177e4SLinus Torvalds 165*1da177e4SLinus Torvalds for (t = tunnels_r_l[h0^h1]; t; t = t->next) { 166*1da177e4SLinus Torvalds if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { 167*1da177e4SLinus Torvalds if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) 168*1da177e4SLinus Torvalds return t; 169*1da177e4SLinus Torvalds } 170*1da177e4SLinus Torvalds } 171*1da177e4SLinus Torvalds for (t = tunnels_r[h0^h1]; t; t = t->next) { 172*1da177e4SLinus Torvalds if (remote == t->parms.iph.daddr) { 173*1da177e4SLinus Torvalds if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) 174*1da177e4SLinus Torvalds return t; 175*1da177e4SLinus Torvalds } 176*1da177e4SLinus Torvalds } 177*1da177e4SLinus Torvalds for (t = tunnels_l[h1]; t; t = t->next) { 178*1da177e4SLinus Torvalds if (local == t->parms.iph.saddr || 179*1da177e4SLinus Torvalds (local == t->parms.iph.daddr && MULTICAST(local))) { 180*1da177e4SLinus Torvalds if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) 181*1da177e4SLinus Torvalds return t; 182*1da177e4SLinus Torvalds } 183*1da177e4SLinus Torvalds } 184*1da177e4SLinus Torvalds for (t = tunnels_wc[h1]; t; t = t->next) { 185*1da177e4SLinus Torvalds if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) 186*1da177e4SLinus Torvalds return t; 187*1da177e4SLinus Torvalds } 188*1da177e4SLinus Torvalds 189*1da177e4SLinus Torvalds if (ipgre_fb_tunnel_dev->flags&IFF_UP) 190*1da177e4SLinus Torvalds return ipgre_fb_tunnel_dev->priv; 191*1da177e4SLinus Torvalds return NULL; 192*1da177e4SLinus Torvalds } 193*1da177e4SLinus Torvalds 194*1da177e4SLinus Torvalds static struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t) 195*1da177e4SLinus Torvalds { 196*1da177e4SLinus Torvalds u32 remote = t->parms.iph.daddr; 197*1da177e4SLinus Torvalds u32 local = t->parms.iph.saddr; 198*1da177e4SLinus Torvalds u32 key = t->parms.i_key; 199*1da177e4SLinus Torvalds unsigned h = HASH(key); 200*1da177e4SLinus Torvalds int prio = 0; 201*1da177e4SLinus Torvalds 202*1da177e4SLinus Torvalds if (local) 203*1da177e4SLinus Torvalds prio |= 1; 204*1da177e4SLinus Torvalds if (remote && !MULTICAST(remote)) { 205*1da177e4SLinus Torvalds prio |= 2; 206*1da177e4SLinus Torvalds h ^= HASH(remote); 207*1da177e4SLinus Torvalds } 208*1da177e4SLinus Torvalds 209*1da177e4SLinus Torvalds return &tunnels[prio][h]; 210*1da177e4SLinus Torvalds } 211*1da177e4SLinus Torvalds 212*1da177e4SLinus Torvalds static void ipgre_tunnel_link(struct ip_tunnel *t) 213*1da177e4SLinus Torvalds { 214*1da177e4SLinus Torvalds struct ip_tunnel **tp = ipgre_bucket(t); 215*1da177e4SLinus Torvalds 216*1da177e4SLinus Torvalds t->next = *tp; 217*1da177e4SLinus Torvalds write_lock_bh(&ipgre_lock); 218*1da177e4SLinus Torvalds *tp = t; 219*1da177e4SLinus Torvalds write_unlock_bh(&ipgre_lock); 220*1da177e4SLinus Torvalds } 221*1da177e4SLinus Torvalds 222*1da177e4SLinus Torvalds static void ipgre_tunnel_unlink(struct ip_tunnel *t) 223*1da177e4SLinus Torvalds { 224*1da177e4SLinus Torvalds struct ip_tunnel **tp; 225*1da177e4SLinus Torvalds 226*1da177e4SLinus Torvalds for (tp = ipgre_bucket(t); *tp; tp = &(*tp)->next) { 227*1da177e4SLinus Torvalds if (t == *tp) { 228*1da177e4SLinus Torvalds write_lock_bh(&ipgre_lock); 229*1da177e4SLinus Torvalds *tp = t->next; 230*1da177e4SLinus Torvalds write_unlock_bh(&ipgre_lock); 231*1da177e4SLinus Torvalds break; 232*1da177e4SLinus Torvalds } 233*1da177e4SLinus Torvalds } 234*1da177e4SLinus Torvalds } 235*1da177e4SLinus Torvalds 236*1da177e4SLinus Torvalds static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int create) 237*1da177e4SLinus Torvalds { 238*1da177e4SLinus Torvalds u32 remote = parms->iph.daddr; 239*1da177e4SLinus Torvalds u32 local = parms->iph.saddr; 240*1da177e4SLinus Torvalds u32 key = parms->i_key; 241*1da177e4SLinus Torvalds struct ip_tunnel *t, **tp, *nt; 242*1da177e4SLinus Torvalds struct net_device *dev; 243*1da177e4SLinus Torvalds unsigned h = HASH(key); 244*1da177e4SLinus Torvalds int prio = 0; 245*1da177e4SLinus Torvalds char name[IFNAMSIZ]; 246*1da177e4SLinus Torvalds 247*1da177e4SLinus Torvalds if (local) 248*1da177e4SLinus Torvalds prio |= 1; 249*1da177e4SLinus Torvalds if (remote && !MULTICAST(remote)) { 250*1da177e4SLinus Torvalds prio |= 2; 251*1da177e4SLinus Torvalds h ^= HASH(remote); 252*1da177e4SLinus Torvalds } 253*1da177e4SLinus Torvalds for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) { 254*1da177e4SLinus Torvalds if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { 255*1da177e4SLinus Torvalds if (key == t->parms.i_key) 256*1da177e4SLinus Torvalds return t; 257*1da177e4SLinus Torvalds } 258*1da177e4SLinus Torvalds } 259*1da177e4SLinus Torvalds if (!create) 260*1da177e4SLinus Torvalds return NULL; 261*1da177e4SLinus Torvalds 262*1da177e4SLinus Torvalds if (parms->name[0]) 263*1da177e4SLinus Torvalds strlcpy(name, parms->name, IFNAMSIZ); 264*1da177e4SLinus Torvalds else { 265*1da177e4SLinus Torvalds int i; 266*1da177e4SLinus Torvalds for (i=1; i<100; i++) { 267*1da177e4SLinus Torvalds sprintf(name, "gre%d", i); 268*1da177e4SLinus Torvalds if (__dev_get_by_name(name) == NULL) 269*1da177e4SLinus Torvalds break; 270*1da177e4SLinus Torvalds } 271*1da177e4SLinus Torvalds if (i==100) 272*1da177e4SLinus Torvalds goto failed; 273*1da177e4SLinus Torvalds } 274*1da177e4SLinus Torvalds 275*1da177e4SLinus Torvalds dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup); 276*1da177e4SLinus Torvalds if (!dev) 277*1da177e4SLinus Torvalds return NULL; 278*1da177e4SLinus Torvalds 279*1da177e4SLinus Torvalds dev->init = ipgre_tunnel_init; 280*1da177e4SLinus Torvalds nt = dev->priv; 281*1da177e4SLinus Torvalds nt->parms = *parms; 282*1da177e4SLinus Torvalds 283*1da177e4SLinus Torvalds if (register_netdevice(dev) < 0) { 284*1da177e4SLinus Torvalds free_netdev(dev); 285*1da177e4SLinus Torvalds goto failed; 286*1da177e4SLinus Torvalds } 287*1da177e4SLinus Torvalds 288*1da177e4SLinus Torvalds nt = dev->priv; 289*1da177e4SLinus Torvalds nt->parms = *parms; 290*1da177e4SLinus Torvalds 291*1da177e4SLinus Torvalds dev_hold(dev); 292*1da177e4SLinus Torvalds ipgre_tunnel_link(nt); 293*1da177e4SLinus Torvalds /* Do not decrement MOD_USE_COUNT here. */ 294*1da177e4SLinus Torvalds return nt; 295*1da177e4SLinus Torvalds 296*1da177e4SLinus Torvalds failed: 297*1da177e4SLinus Torvalds return NULL; 298*1da177e4SLinus Torvalds } 299*1da177e4SLinus Torvalds 300*1da177e4SLinus Torvalds static void ipgre_tunnel_uninit(struct net_device *dev) 301*1da177e4SLinus Torvalds { 302*1da177e4SLinus Torvalds ipgre_tunnel_unlink((struct ip_tunnel*)dev->priv); 303*1da177e4SLinus Torvalds dev_put(dev); 304*1da177e4SLinus Torvalds } 305*1da177e4SLinus Torvalds 306*1da177e4SLinus Torvalds 307*1da177e4SLinus Torvalds static void ipgre_err(struct sk_buff *skb, u32 info) 308*1da177e4SLinus Torvalds { 309*1da177e4SLinus Torvalds #ifndef I_WISH_WORLD_WERE_PERFECT 310*1da177e4SLinus Torvalds 311*1da177e4SLinus Torvalds /* It is not :-( All the routers (except for Linux) return only 312*1da177e4SLinus Torvalds 8 bytes of packet payload. It means, that precise relaying of 313*1da177e4SLinus Torvalds ICMP in the real Internet is absolutely infeasible. 314*1da177e4SLinus Torvalds 315*1da177e4SLinus Torvalds Moreover, Cisco "wise men" put GRE key to the third word 316*1da177e4SLinus Torvalds in GRE header. It makes impossible maintaining even soft state for keyed 317*1da177e4SLinus Torvalds GRE tunnels with enabled checksum. Tell them "thank you". 318*1da177e4SLinus Torvalds 319*1da177e4SLinus Torvalds Well, I wonder, rfc1812 was written by Cisco employee, 320*1da177e4SLinus Torvalds what the hell these idiots break standrads established 321*1da177e4SLinus Torvalds by themself??? 322*1da177e4SLinus Torvalds */ 323*1da177e4SLinus Torvalds 324*1da177e4SLinus Torvalds struct iphdr *iph = (struct iphdr*)skb->data; 325*1da177e4SLinus Torvalds u16 *p = (u16*)(skb->data+(iph->ihl<<2)); 326*1da177e4SLinus Torvalds int grehlen = (iph->ihl<<2) + 4; 327*1da177e4SLinus Torvalds int type = skb->h.icmph->type; 328*1da177e4SLinus Torvalds int code = skb->h.icmph->code; 329*1da177e4SLinus Torvalds struct ip_tunnel *t; 330*1da177e4SLinus Torvalds u16 flags; 331*1da177e4SLinus Torvalds 332*1da177e4SLinus Torvalds flags = p[0]; 333*1da177e4SLinus Torvalds if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { 334*1da177e4SLinus Torvalds if (flags&(GRE_VERSION|GRE_ROUTING)) 335*1da177e4SLinus Torvalds return; 336*1da177e4SLinus Torvalds if (flags&GRE_KEY) { 337*1da177e4SLinus Torvalds grehlen += 4; 338*1da177e4SLinus Torvalds if (flags&GRE_CSUM) 339*1da177e4SLinus Torvalds grehlen += 4; 340*1da177e4SLinus Torvalds } 341*1da177e4SLinus Torvalds } 342*1da177e4SLinus Torvalds 343*1da177e4SLinus Torvalds /* If only 8 bytes returned, keyed message will be dropped here */ 344*1da177e4SLinus Torvalds if (skb_headlen(skb) < grehlen) 345*1da177e4SLinus Torvalds return; 346*1da177e4SLinus Torvalds 347*1da177e4SLinus Torvalds switch (type) { 348*1da177e4SLinus Torvalds default: 349*1da177e4SLinus Torvalds case ICMP_PARAMETERPROB: 350*1da177e4SLinus Torvalds return; 351*1da177e4SLinus Torvalds 352*1da177e4SLinus Torvalds case ICMP_DEST_UNREACH: 353*1da177e4SLinus Torvalds switch (code) { 354*1da177e4SLinus Torvalds case ICMP_SR_FAILED: 355*1da177e4SLinus Torvalds case ICMP_PORT_UNREACH: 356*1da177e4SLinus Torvalds /* Impossible event. */ 357*1da177e4SLinus Torvalds return; 358*1da177e4SLinus Torvalds case ICMP_FRAG_NEEDED: 359*1da177e4SLinus Torvalds /* Soft state for pmtu is maintained by IP core. */ 360*1da177e4SLinus Torvalds return; 361*1da177e4SLinus Torvalds default: 362*1da177e4SLinus Torvalds /* All others are translated to HOST_UNREACH. 363*1da177e4SLinus Torvalds rfc2003 contains "deep thoughts" about NET_UNREACH, 364*1da177e4SLinus Torvalds I believe they are just ether pollution. --ANK 365*1da177e4SLinus Torvalds */ 366*1da177e4SLinus Torvalds break; 367*1da177e4SLinus Torvalds } 368*1da177e4SLinus Torvalds break; 369*1da177e4SLinus Torvalds case ICMP_TIME_EXCEEDED: 370*1da177e4SLinus Torvalds if (code != ICMP_EXC_TTL) 371*1da177e4SLinus Torvalds return; 372*1da177e4SLinus Torvalds break; 373*1da177e4SLinus Torvalds } 374*1da177e4SLinus Torvalds 375*1da177e4SLinus Torvalds read_lock(&ipgre_lock); 376*1da177e4SLinus Torvalds t = ipgre_tunnel_lookup(iph->daddr, iph->saddr, (flags&GRE_KEY) ? *(((u32*)p) + (grehlen>>2) - 1) : 0); 377*1da177e4SLinus Torvalds if (t == NULL || t->parms.iph.daddr == 0 || MULTICAST(t->parms.iph.daddr)) 378*1da177e4SLinus Torvalds goto out; 379*1da177e4SLinus Torvalds 380*1da177e4SLinus Torvalds if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) 381*1da177e4SLinus Torvalds goto out; 382*1da177e4SLinus Torvalds 383*1da177e4SLinus Torvalds if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) 384*1da177e4SLinus Torvalds t->err_count++; 385*1da177e4SLinus Torvalds else 386*1da177e4SLinus Torvalds t->err_count = 1; 387*1da177e4SLinus Torvalds t->err_time = jiffies; 388*1da177e4SLinus Torvalds out: 389*1da177e4SLinus Torvalds read_unlock(&ipgre_lock); 390*1da177e4SLinus Torvalds return; 391*1da177e4SLinus Torvalds #else 392*1da177e4SLinus Torvalds struct iphdr *iph = (struct iphdr*)dp; 393*1da177e4SLinus Torvalds struct iphdr *eiph; 394*1da177e4SLinus Torvalds u16 *p = (u16*)(dp+(iph->ihl<<2)); 395*1da177e4SLinus Torvalds int type = skb->h.icmph->type; 396*1da177e4SLinus Torvalds int code = skb->h.icmph->code; 397*1da177e4SLinus Torvalds int rel_type = 0; 398*1da177e4SLinus Torvalds int rel_code = 0; 399*1da177e4SLinus Torvalds int rel_info = 0; 400*1da177e4SLinus Torvalds u16 flags; 401*1da177e4SLinus Torvalds int grehlen = (iph->ihl<<2) + 4; 402*1da177e4SLinus Torvalds struct sk_buff *skb2; 403*1da177e4SLinus Torvalds struct flowi fl; 404*1da177e4SLinus Torvalds struct rtable *rt; 405*1da177e4SLinus Torvalds 406*1da177e4SLinus Torvalds if (p[1] != htons(ETH_P_IP)) 407*1da177e4SLinus Torvalds return; 408*1da177e4SLinus Torvalds 409*1da177e4SLinus Torvalds flags = p[0]; 410*1da177e4SLinus Torvalds if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { 411*1da177e4SLinus Torvalds if (flags&(GRE_VERSION|GRE_ROUTING)) 412*1da177e4SLinus Torvalds return; 413*1da177e4SLinus Torvalds if (flags&GRE_CSUM) 414*1da177e4SLinus Torvalds grehlen += 4; 415*1da177e4SLinus Torvalds if (flags&GRE_KEY) 416*1da177e4SLinus Torvalds grehlen += 4; 417*1da177e4SLinus Torvalds if (flags&GRE_SEQ) 418*1da177e4SLinus Torvalds grehlen += 4; 419*1da177e4SLinus Torvalds } 420*1da177e4SLinus Torvalds if (len < grehlen + sizeof(struct iphdr)) 421*1da177e4SLinus Torvalds return; 422*1da177e4SLinus Torvalds eiph = (struct iphdr*)(dp + grehlen); 423*1da177e4SLinus Torvalds 424*1da177e4SLinus Torvalds switch (type) { 425*1da177e4SLinus Torvalds default: 426*1da177e4SLinus Torvalds return; 427*1da177e4SLinus Torvalds case ICMP_PARAMETERPROB: 428*1da177e4SLinus Torvalds if (skb->h.icmph->un.gateway < (iph->ihl<<2)) 429*1da177e4SLinus Torvalds return; 430*1da177e4SLinus Torvalds 431*1da177e4SLinus Torvalds /* So... This guy found something strange INSIDE encapsulated 432*1da177e4SLinus Torvalds packet. Well, he is fool, but what can we do ? 433*1da177e4SLinus Torvalds */ 434*1da177e4SLinus Torvalds rel_type = ICMP_PARAMETERPROB; 435*1da177e4SLinus Torvalds rel_info = skb->h.icmph->un.gateway - grehlen; 436*1da177e4SLinus Torvalds break; 437*1da177e4SLinus Torvalds 438*1da177e4SLinus Torvalds case ICMP_DEST_UNREACH: 439*1da177e4SLinus Torvalds switch (code) { 440*1da177e4SLinus Torvalds case ICMP_SR_FAILED: 441*1da177e4SLinus Torvalds case ICMP_PORT_UNREACH: 442*1da177e4SLinus Torvalds /* Impossible event. */ 443*1da177e4SLinus Torvalds return; 444*1da177e4SLinus Torvalds case ICMP_FRAG_NEEDED: 445*1da177e4SLinus Torvalds /* And it is the only really necessary thing :-) */ 446*1da177e4SLinus Torvalds rel_info = ntohs(skb->h.icmph->un.frag.mtu); 447*1da177e4SLinus Torvalds if (rel_info < grehlen+68) 448*1da177e4SLinus Torvalds return; 449*1da177e4SLinus Torvalds rel_info -= grehlen; 450*1da177e4SLinus Torvalds /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ 451*1da177e4SLinus Torvalds if (rel_info > ntohs(eiph->tot_len)) 452*1da177e4SLinus Torvalds return; 453*1da177e4SLinus Torvalds break; 454*1da177e4SLinus Torvalds default: 455*1da177e4SLinus Torvalds /* All others are translated to HOST_UNREACH. 456*1da177e4SLinus Torvalds rfc2003 contains "deep thoughts" about NET_UNREACH, 457*1da177e4SLinus Torvalds I believe, it is just ether pollution. --ANK 458*1da177e4SLinus Torvalds */ 459*1da177e4SLinus Torvalds rel_type = ICMP_DEST_UNREACH; 460*1da177e4SLinus Torvalds rel_code = ICMP_HOST_UNREACH; 461*1da177e4SLinus Torvalds break; 462*1da177e4SLinus Torvalds } 463*1da177e4SLinus Torvalds break; 464*1da177e4SLinus Torvalds case ICMP_TIME_EXCEEDED: 465*1da177e4SLinus Torvalds if (code != ICMP_EXC_TTL) 466*1da177e4SLinus Torvalds return; 467*1da177e4SLinus Torvalds break; 468*1da177e4SLinus Torvalds } 469*1da177e4SLinus Torvalds 470*1da177e4SLinus Torvalds /* Prepare fake skb to feed it to icmp_send */ 471*1da177e4SLinus Torvalds skb2 = skb_clone(skb, GFP_ATOMIC); 472*1da177e4SLinus Torvalds if (skb2 == NULL) 473*1da177e4SLinus Torvalds return; 474*1da177e4SLinus Torvalds dst_release(skb2->dst); 475*1da177e4SLinus Torvalds skb2->dst = NULL; 476*1da177e4SLinus Torvalds skb_pull(skb2, skb->data - (u8*)eiph); 477*1da177e4SLinus Torvalds skb2->nh.raw = skb2->data; 478*1da177e4SLinus Torvalds 479*1da177e4SLinus Torvalds /* Try to guess incoming interface */ 480*1da177e4SLinus Torvalds memset(&fl, 0, sizeof(fl)); 481*1da177e4SLinus Torvalds fl.fl4_dst = eiph->saddr; 482*1da177e4SLinus Torvalds fl.fl4_tos = RT_TOS(eiph->tos); 483*1da177e4SLinus Torvalds fl.proto = IPPROTO_GRE; 484*1da177e4SLinus Torvalds if (ip_route_output_key(&rt, &fl)) { 485*1da177e4SLinus Torvalds kfree_skb(skb2); 486*1da177e4SLinus Torvalds return; 487*1da177e4SLinus Torvalds } 488*1da177e4SLinus Torvalds skb2->dev = rt->u.dst.dev; 489*1da177e4SLinus Torvalds 490*1da177e4SLinus Torvalds /* route "incoming" packet */ 491*1da177e4SLinus Torvalds if (rt->rt_flags&RTCF_LOCAL) { 492*1da177e4SLinus Torvalds ip_rt_put(rt); 493*1da177e4SLinus Torvalds rt = NULL; 494*1da177e4SLinus Torvalds fl.fl4_dst = eiph->daddr; 495*1da177e4SLinus Torvalds fl.fl4_src = eiph->saddr; 496*1da177e4SLinus Torvalds fl.fl4_tos = eiph->tos; 497*1da177e4SLinus Torvalds if (ip_route_output_key(&rt, &fl) || 498*1da177e4SLinus Torvalds rt->u.dst.dev->type != ARPHRD_IPGRE) { 499*1da177e4SLinus Torvalds ip_rt_put(rt); 500*1da177e4SLinus Torvalds kfree_skb(skb2); 501*1da177e4SLinus Torvalds return; 502*1da177e4SLinus Torvalds } 503*1da177e4SLinus Torvalds } else { 504*1da177e4SLinus Torvalds ip_rt_put(rt); 505*1da177e4SLinus Torvalds if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || 506*1da177e4SLinus Torvalds skb2->dst->dev->type != ARPHRD_IPGRE) { 507*1da177e4SLinus Torvalds kfree_skb(skb2); 508*1da177e4SLinus Torvalds return; 509*1da177e4SLinus Torvalds } 510*1da177e4SLinus Torvalds } 511*1da177e4SLinus Torvalds 512*1da177e4SLinus Torvalds /* change mtu on this route */ 513*1da177e4SLinus Torvalds if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { 514*1da177e4SLinus Torvalds if (rel_info > dst_mtu(skb2->dst)) { 515*1da177e4SLinus Torvalds kfree_skb(skb2); 516*1da177e4SLinus Torvalds return; 517*1da177e4SLinus Torvalds } 518*1da177e4SLinus Torvalds skb2->dst->ops->update_pmtu(skb2->dst, rel_info); 519*1da177e4SLinus Torvalds rel_info = htonl(rel_info); 520*1da177e4SLinus Torvalds } else if (type == ICMP_TIME_EXCEEDED) { 521*1da177e4SLinus Torvalds struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv; 522*1da177e4SLinus Torvalds if (t->parms.iph.ttl) { 523*1da177e4SLinus Torvalds rel_type = ICMP_DEST_UNREACH; 524*1da177e4SLinus Torvalds rel_code = ICMP_HOST_UNREACH; 525*1da177e4SLinus Torvalds } 526*1da177e4SLinus Torvalds } 527*1da177e4SLinus Torvalds 528*1da177e4SLinus Torvalds icmp_send(skb2, rel_type, rel_code, rel_info); 529*1da177e4SLinus Torvalds kfree_skb(skb2); 530*1da177e4SLinus Torvalds #endif 531*1da177e4SLinus Torvalds } 532*1da177e4SLinus Torvalds 533*1da177e4SLinus Torvalds static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) 534*1da177e4SLinus Torvalds { 535*1da177e4SLinus Torvalds if (INET_ECN_is_ce(iph->tos)) { 536*1da177e4SLinus Torvalds if (skb->protocol == htons(ETH_P_IP)) { 537*1da177e4SLinus Torvalds IP_ECN_set_ce(skb->nh.iph); 538*1da177e4SLinus Torvalds } else if (skb->protocol == htons(ETH_P_IPV6)) { 539*1da177e4SLinus Torvalds IP6_ECN_set_ce(skb->nh.ipv6h); 540*1da177e4SLinus Torvalds } 541*1da177e4SLinus Torvalds } 542*1da177e4SLinus Torvalds } 543*1da177e4SLinus Torvalds 544*1da177e4SLinus Torvalds static inline u8 545*1da177e4SLinus Torvalds ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb) 546*1da177e4SLinus Torvalds { 547*1da177e4SLinus Torvalds u8 inner = 0; 548*1da177e4SLinus Torvalds if (skb->protocol == htons(ETH_P_IP)) 549*1da177e4SLinus Torvalds inner = old_iph->tos; 550*1da177e4SLinus Torvalds else if (skb->protocol == htons(ETH_P_IPV6)) 551*1da177e4SLinus Torvalds inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph); 552*1da177e4SLinus Torvalds return INET_ECN_encapsulate(tos, inner); 553*1da177e4SLinus Torvalds } 554*1da177e4SLinus Torvalds 555*1da177e4SLinus Torvalds static int ipgre_rcv(struct sk_buff *skb) 556*1da177e4SLinus Torvalds { 557*1da177e4SLinus Torvalds struct iphdr *iph; 558*1da177e4SLinus Torvalds u8 *h; 559*1da177e4SLinus Torvalds u16 flags; 560*1da177e4SLinus Torvalds u16 csum = 0; 561*1da177e4SLinus Torvalds u32 key = 0; 562*1da177e4SLinus Torvalds u32 seqno = 0; 563*1da177e4SLinus Torvalds struct ip_tunnel *tunnel; 564*1da177e4SLinus Torvalds int offset = 4; 565*1da177e4SLinus Torvalds 566*1da177e4SLinus Torvalds if (!pskb_may_pull(skb, 16)) 567*1da177e4SLinus Torvalds goto drop_nolock; 568*1da177e4SLinus Torvalds 569*1da177e4SLinus Torvalds iph = skb->nh.iph; 570*1da177e4SLinus Torvalds h = skb->data; 571*1da177e4SLinus Torvalds flags = *(u16*)h; 572*1da177e4SLinus Torvalds 573*1da177e4SLinus Torvalds if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) { 574*1da177e4SLinus Torvalds /* - Version must be 0. 575*1da177e4SLinus Torvalds - We do not support routing headers. 576*1da177e4SLinus Torvalds */ 577*1da177e4SLinus Torvalds if (flags&(GRE_VERSION|GRE_ROUTING)) 578*1da177e4SLinus Torvalds goto drop_nolock; 579*1da177e4SLinus Torvalds 580*1da177e4SLinus Torvalds if (flags&GRE_CSUM) { 581*1da177e4SLinus Torvalds if (skb->ip_summed == CHECKSUM_HW) { 582*1da177e4SLinus Torvalds csum = (u16)csum_fold(skb->csum); 583*1da177e4SLinus Torvalds if (csum) 584*1da177e4SLinus Torvalds skb->ip_summed = CHECKSUM_NONE; 585*1da177e4SLinus Torvalds } 586*1da177e4SLinus Torvalds if (skb->ip_summed == CHECKSUM_NONE) { 587*1da177e4SLinus Torvalds skb->csum = skb_checksum(skb, 0, skb->len, 0); 588*1da177e4SLinus Torvalds skb->ip_summed = CHECKSUM_HW; 589*1da177e4SLinus Torvalds csum = (u16)csum_fold(skb->csum); 590*1da177e4SLinus Torvalds } 591*1da177e4SLinus Torvalds offset += 4; 592*1da177e4SLinus Torvalds } 593*1da177e4SLinus Torvalds if (flags&GRE_KEY) { 594*1da177e4SLinus Torvalds key = *(u32*)(h + offset); 595*1da177e4SLinus Torvalds offset += 4; 596*1da177e4SLinus Torvalds } 597*1da177e4SLinus Torvalds if (flags&GRE_SEQ) { 598*1da177e4SLinus Torvalds seqno = ntohl(*(u32*)(h + offset)); 599*1da177e4SLinus Torvalds offset += 4; 600*1da177e4SLinus Torvalds } 601*1da177e4SLinus Torvalds } 602*1da177e4SLinus Torvalds 603*1da177e4SLinus Torvalds read_lock(&ipgre_lock); 604*1da177e4SLinus Torvalds if ((tunnel = ipgre_tunnel_lookup(iph->saddr, iph->daddr, key)) != NULL) { 605*1da177e4SLinus Torvalds secpath_reset(skb); 606*1da177e4SLinus Torvalds 607*1da177e4SLinus Torvalds skb->protocol = *(u16*)(h + 2); 608*1da177e4SLinus Torvalds /* WCCP version 1 and 2 protocol decoding. 609*1da177e4SLinus Torvalds * - Change protocol to IP 610*1da177e4SLinus Torvalds * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header 611*1da177e4SLinus Torvalds */ 612*1da177e4SLinus Torvalds if (flags == 0 && 613*1da177e4SLinus Torvalds skb->protocol == __constant_htons(ETH_P_WCCP)) { 614*1da177e4SLinus Torvalds skb->protocol = __constant_htons(ETH_P_IP); 615*1da177e4SLinus Torvalds if ((*(h + offset) & 0xF0) != 0x40) 616*1da177e4SLinus Torvalds offset += 4; 617*1da177e4SLinus Torvalds } 618*1da177e4SLinus Torvalds 619*1da177e4SLinus Torvalds skb->mac.raw = skb->nh.raw; 620*1da177e4SLinus Torvalds skb->nh.raw = __pskb_pull(skb, offset); 621*1da177e4SLinus Torvalds skb_postpull_rcsum(skb, skb->mac.raw, offset); 622*1da177e4SLinus Torvalds memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); 623*1da177e4SLinus Torvalds skb->pkt_type = PACKET_HOST; 624*1da177e4SLinus Torvalds #ifdef CONFIG_NET_IPGRE_BROADCAST 625*1da177e4SLinus Torvalds if (MULTICAST(iph->daddr)) { 626*1da177e4SLinus Torvalds /* Looped back packet, drop it! */ 627*1da177e4SLinus Torvalds if (((struct rtable*)skb->dst)->fl.iif == 0) 628*1da177e4SLinus Torvalds goto drop; 629*1da177e4SLinus Torvalds tunnel->stat.multicast++; 630*1da177e4SLinus Torvalds skb->pkt_type = PACKET_BROADCAST; 631*1da177e4SLinus Torvalds } 632*1da177e4SLinus Torvalds #endif 633*1da177e4SLinus Torvalds 634*1da177e4SLinus Torvalds if (((flags&GRE_CSUM) && csum) || 635*1da177e4SLinus Torvalds (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { 636*1da177e4SLinus Torvalds tunnel->stat.rx_crc_errors++; 637*1da177e4SLinus Torvalds tunnel->stat.rx_errors++; 638*1da177e4SLinus Torvalds goto drop; 639*1da177e4SLinus Torvalds } 640*1da177e4SLinus Torvalds if (tunnel->parms.i_flags&GRE_SEQ) { 641*1da177e4SLinus Torvalds if (!(flags&GRE_SEQ) || 642*1da177e4SLinus Torvalds (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) { 643*1da177e4SLinus Torvalds tunnel->stat.rx_fifo_errors++; 644*1da177e4SLinus Torvalds tunnel->stat.rx_errors++; 645*1da177e4SLinus Torvalds goto drop; 646*1da177e4SLinus Torvalds } 647*1da177e4SLinus Torvalds tunnel->i_seqno = seqno + 1; 648*1da177e4SLinus Torvalds } 649*1da177e4SLinus Torvalds tunnel->stat.rx_packets++; 650*1da177e4SLinus Torvalds tunnel->stat.rx_bytes += skb->len; 651*1da177e4SLinus Torvalds skb->dev = tunnel->dev; 652*1da177e4SLinus Torvalds dst_release(skb->dst); 653*1da177e4SLinus Torvalds skb->dst = NULL; 654*1da177e4SLinus Torvalds nf_reset(skb); 655*1da177e4SLinus Torvalds ipgre_ecn_decapsulate(iph, skb); 656*1da177e4SLinus Torvalds netif_rx(skb); 657*1da177e4SLinus Torvalds read_unlock(&ipgre_lock); 658*1da177e4SLinus Torvalds return(0); 659*1da177e4SLinus Torvalds } 660*1da177e4SLinus Torvalds icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0); 661*1da177e4SLinus Torvalds 662*1da177e4SLinus Torvalds drop: 663*1da177e4SLinus Torvalds read_unlock(&ipgre_lock); 664*1da177e4SLinus Torvalds drop_nolock: 665*1da177e4SLinus Torvalds kfree_skb(skb); 666*1da177e4SLinus Torvalds return(0); 667*1da177e4SLinus Torvalds } 668*1da177e4SLinus Torvalds 669*1da177e4SLinus Torvalds static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 670*1da177e4SLinus Torvalds { 671*1da177e4SLinus Torvalds struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; 672*1da177e4SLinus Torvalds struct net_device_stats *stats = &tunnel->stat; 673*1da177e4SLinus Torvalds struct iphdr *old_iph = skb->nh.iph; 674*1da177e4SLinus Torvalds struct iphdr *tiph; 675*1da177e4SLinus Torvalds u8 tos; 676*1da177e4SLinus Torvalds u16 df; 677*1da177e4SLinus Torvalds struct rtable *rt; /* Route to the other host */ 678*1da177e4SLinus Torvalds struct net_device *tdev; /* Device to other host */ 679*1da177e4SLinus Torvalds struct iphdr *iph; /* Our new IP header */ 680*1da177e4SLinus Torvalds int max_headroom; /* The extra header space needed */ 681*1da177e4SLinus Torvalds int gre_hlen; 682*1da177e4SLinus Torvalds u32 dst; 683*1da177e4SLinus Torvalds int mtu; 684*1da177e4SLinus Torvalds 685*1da177e4SLinus Torvalds if (tunnel->recursion++) { 686*1da177e4SLinus Torvalds tunnel->stat.collisions++; 687*1da177e4SLinus Torvalds goto tx_error; 688*1da177e4SLinus Torvalds } 689*1da177e4SLinus Torvalds 690*1da177e4SLinus Torvalds if (dev->hard_header) { 691*1da177e4SLinus Torvalds gre_hlen = 0; 692*1da177e4SLinus Torvalds tiph = (struct iphdr*)skb->data; 693*1da177e4SLinus Torvalds } else { 694*1da177e4SLinus Torvalds gre_hlen = tunnel->hlen; 695*1da177e4SLinus Torvalds tiph = &tunnel->parms.iph; 696*1da177e4SLinus Torvalds } 697*1da177e4SLinus Torvalds 698*1da177e4SLinus Torvalds if ((dst = tiph->daddr) == 0) { 699*1da177e4SLinus Torvalds /* NBMA tunnel */ 700*1da177e4SLinus Torvalds 701*1da177e4SLinus Torvalds if (skb->dst == NULL) { 702*1da177e4SLinus Torvalds tunnel->stat.tx_fifo_errors++; 703*1da177e4SLinus Torvalds goto tx_error; 704*1da177e4SLinus Torvalds } 705*1da177e4SLinus Torvalds 706*1da177e4SLinus Torvalds if (skb->protocol == htons(ETH_P_IP)) { 707*1da177e4SLinus Torvalds rt = (struct rtable*)skb->dst; 708*1da177e4SLinus Torvalds if ((dst = rt->rt_gateway) == 0) 709*1da177e4SLinus Torvalds goto tx_error_icmp; 710*1da177e4SLinus Torvalds } 711*1da177e4SLinus Torvalds #ifdef CONFIG_IPV6 712*1da177e4SLinus Torvalds else if (skb->protocol == htons(ETH_P_IPV6)) { 713*1da177e4SLinus Torvalds struct in6_addr *addr6; 714*1da177e4SLinus Torvalds int addr_type; 715*1da177e4SLinus Torvalds struct neighbour *neigh = skb->dst->neighbour; 716*1da177e4SLinus Torvalds 717*1da177e4SLinus Torvalds if (neigh == NULL) 718*1da177e4SLinus Torvalds goto tx_error; 719*1da177e4SLinus Torvalds 720*1da177e4SLinus Torvalds addr6 = (struct in6_addr*)&neigh->primary_key; 721*1da177e4SLinus Torvalds addr_type = ipv6_addr_type(addr6); 722*1da177e4SLinus Torvalds 723*1da177e4SLinus Torvalds if (addr_type == IPV6_ADDR_ANY) { 724*1da177e4SLinus Torvalds addr6 = &skb->nh.ipv6h->daddr; 725*1da177e4SLinus Torvalds addr_type = ipv6_addr_type(addr6); 726*1da177e4SLinus Torvalds } 727*1da177e4SLinus Torvalds 728*1da177e4SLinus Torvalds if ((addr_type & IPV6_ADDR_COMPATv4) == 0) 729*1da177e4SLinus Torvalds goto tx_error_icmp; 730*1da177e4SLinus Torvalds 731*1da177e4SLinus Torvalds dst = addr6->s6_addr32[3]; 732*1da177e4SLinus Torvalds } 733*1da177e4SLinus Torvalds #endif 734*1da177e4SLinus Torvalds else 735*1da177e4SLinus Torvalds goto tx_error; 736*1da177e4SLinus Torvalds } 737*1da177e4SLinus Torvalds 738*1da177e4SLinus Torvalds tos = tiph->tos; 739*1da177e4SLinus Torvalds if (tos&1) { 740*1da177e4SLinus Torvalds if (skb->protocol == htons(ETH_P_IP)) 741*1da177e4SLinus Torvalds tos = old_iph->tos; 742*1da177e4SLinus Torvalds tos &= ~1; 743*1da177e4SLinus Torvalds } 744*1da177e4SLinus Torvalds 745*1da177e4SLinus Torvalds { 746*1da177e4SLinus Torvalds struct flowi fl = { .oif = tunnel->parms.link, 747*1da177e4SLinus Torvalds .nl_u = { .ip4_u = 748*1da177e4SLinus Torvalds { .daddr = dst, 749*1da177e4SLinus Torvalds .saddr = tiph->saddr, 750*1da177e4SLinus Torvalds .tos = RT_TOS(tos) } }, 751*1da177e4SLinus Torvalds .proto = IPPROTO_GRE }; 752*1da177e4SLinus Torvalds if (ip_route_output_key(&rt, &fl)) { 753*1da177e4SLinus Torvalds tunnel->stat.tx_carrier_errors++; 754*1da177e4SLinus Torvalds goto tx_error; 755*1da177e4SLinus Torvalds } 756*1da177e4SLinus Torvalds } 757*1da177e4SLinus Torvalds tdev = rt->u.dst.dev; 758*1da177e4SLinus Torvalds 759*1da177e4SLinus Torvalds if (tdev == dev) { 760*1da177e4SLinus Torvalds ip_rt_put(rt); 761*1da177e4SLinus Torvalds tunnel->stat.collisions++; 762*1da177e4SLinus Torvalds goto tx_error; 763*1da177e4SLinus Torvalds } 764*1da177e4SLinus Torvalds 765*1da177e4SLinus Torvalds df = tiph->frag_off; 766*1da177e4SLinus Torvalds if (df) 767*1da177e4SLinus Torvalds mtu = dst_mtu(&rt->u.dst) - tunnel->hlen; 768*1da177e4SLinus Torvalds else 769*1da177e4SLinus Torvalds mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu; 770*1da177e4SLinus Torvalds 771*1da177e4SLinus Torvalds if (skb->dst) 772*1da177e4SLinus Torvalds skb->dst->ops->update_pmtu(skb->dst, mtu); 773*1da177e4SLinus Torvalds 774*1da177e4SLinus Torvalds if (skb->protocol == htons(ETH_P_IP)) { 775*1da177e4SLinus Torvalds df |= (old_iph->frag_off&htons(IP_DF)); 776*1da177e4SLinus Torvalds 777*1da177e4SLinus Torvalds if ((old_iph->frag_off&htons(IP_DF)) && 778*1da177e4SLinus Torvalds mtu < ntohs(old_iph->tot_len)) { 779*1da177e4SLinus Torvalds icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 780*1da177e4SLinus Torvalds ip_rt_put(rt); 781*1da177e4SLinus Torvalds goto tx_error; 782*1da177e4SLinus Torvalds } 783*1da177e4SLinus Torvalds } 784*1da177e4SLinus Torvalds #ifdef CONFIG_IPV6 785*1da177e4SLinus Torvalds else if (skb->protocol == htons(ETH_P_IPV6)) { 786*1da177e4SLinus Torvalds struct rt6_info *rt6 = (struct rt6_info*)skb->dst; 787*1da177e4SLinus Torvalds 788*1da177e4SLinus Torvalds if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) { 789*1da177e4SLinus Torvalds if ((tunnel->parms.iph.daddr && !MULTICAST(tunnel->parms.iph.daddr)) || 790*1da177e4SLinus Torvalds rt6->rt6i_dst.plen == 128) { 791*1da177e4SLinus Torvalds rt6->rt6i_flags |= RTF_MODIFIED; 792*1da177e4SLinus Torvalds skb->dst->metrics[RTAX_MTU-1] = mtu; 793*1da177e4SLinus Torvalds } 794*1da177e4SLinus Torvalds } 795*1da177e4SLinus Torvalds 796*1da177e4SLinus Torvalds if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) { 797*1da177e4SLinus Torvalds icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); 798*1da177e4SLinus Torvalds ip_rt_put(rt); 799*1da177e4SLinus Torvalds goto tx_error; 800*1da177e4SLinus Torvalds } 801*1da177e4SLinus Torvalds } 802*1da177e4SLinus Torvalds #endif 803*1da177e4SLinus Torvalds 804*1da177e4SLinus Torvalds if (tunnel->err_count > 0) { 805*1da177e4SLinus Torvalds if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { 806*1da177e4SLinus Torvalds tunnel->err_count--; 807*1da177e4SLinus Torvalds 808*1da177e4SLinus Torvalds dst_link_failure(skb); 809*1da177e4SLinus Torvalds } else 810*1da177e4SLinus Torvalds tunnel->err_count = 0; 811*1da177e4SLinus Torvalds } 812*1da177e4SLinus Torvalds 813*1da177e4SLinus Torvalds max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen; 814*1da177e4SLinus Torvalds 815*1da177e4SLinus Torvalds if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { 816*1da177e4SLinus Torvalds struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); 817*1da177e4SLinus Torvalds if (!new_skb) { 818*1da177e4SLinus Torvalds ip_rt_put(rt); 819*1da177e4SLinus Torvalds stats->tx_dropped++; 820*1da177e4SLinus Torvalds dev_kfree_skb(skb); 821*1da177e4SLinus Torvalds tunnel->recursion--; 822*1da177e4SLinus Torvalds return 0; 823*1da177e4SLinus Torvalds } 824*1da177e4SLinus Torvalds if (skb->sk) 825*1da177e4SLinus Torvalds skb_set_owner_w(new_skb, skb->sk); 826*1da177e4SLinus Torvalds dev_kfree_skb(skb); 827*1da177e4SLinus Torvalds skb = new_skb; 828*1da177e4SLinus Torvalds old_iph = skb->nh.iph; 829*1da177e4SLinus Torvalds } 830*1da177e4SLinus Torvalds 831*1da177e4SLinus Torvalds skb->h.raw = skb->nh.raw; 832*1da177e4SLinus Torvalds skb->nh.raw = skb_push(skb, gre_hlen); 833*1da177e4SLinus Torvalds memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 834*1da177e4SLinus Torvalds dst_release(skb->dst); 835*1da177e4SLinus Torvalds skb->dst = &rt->u.dst; 836*1da177e4SLinus Torvalds 837*1da177e4SLinus Torvalds /* 838*1da177e4SLinus Torvalds * Push down and install the IPIP header. 839*1da177e4SLinus Torvalds */ 840*1da177e4SLinus Torvalds 841*1da177e4SLinus Torvalds iph = skb->nh.iph; 842*1da177e4SLinus Torvalds iph->version = 4; 843*1da177e4SLinus Torvalds iph->ihl = sizeof(struct iphdr) >> 2; 844*1da177e4SLinus Torvalds iph->frag_off = df; 845*1da177e4SLinus Torvalds iph->protocol = IPPROTO_GRE; 846*1da177e4SLinus Torvalds iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb); 847*1da177e4SLinus Torvalds iph->daddr = rt->rt_dst; 848*1da177e4SLinus Torvalds iph->saddr = rt->rt_src; 849*1da177e4SLinus Torvalds 850*1da177e4SLinus Torvalds if ((iph->ttl = tiph->ttl) == 0) { 851*1da177e4SLinus Torvalds if (skb->protocol == htons(ETH_P_IP)) 852*1da177e4SLinus Torvalds iph->ttl = old_iph->ttl; 853*1da177e4SLinus Torvalds #ifdef CONFIG_IPV6 854*1da177e4SLinus Torvalds else if (skb->protocol == htons(ETH_P_IPV6)) 855*1da177e4SLinus Torvalds iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit; 856*1da177e4SLinus Torvalds #endif 857*1da177e4SLinus Torvalds else 858*1da177e4SLinus Torvalds iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT); 859*1da177e4SLinus Torvalds } 860*1da177e4SLinus Torvalds 861*1da177e4SLinus Torvalds ((u16*)(iph+1))[0] = tunnel->parms.o_flags; 862*1da177e4SLinus Torvalds ((u16*)(iph+1))[1] = skb->protocol; 863*1da177e4SLinus Torvalds 864*1da177e4SLinus Torvalds if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { 865*1da177e4SLinus Torvalds u32 *ptr = (u32*)(((u8*)iph) + tunnel->hlen - 4); 866*1da177e4SLinus Torvalds 867*1da177e4SLinus Torvalds if (tunnel->parms.o_flags&GRE_SEQ) { 868*1da177e4SLinus Torvalds ++tunnel->o_seqno; 869*1da177e4SLinus Torvalds *ptr = htonl(tunnel->o_seqno); 870*1da177e4SLinus Torvalds ptr--; 871*1da177e4SLinus Torvalds } 872*1da177e4SLinus Torvalds if (tunnel->parms.o_flags&GRE_KEY) { 873*1da177e4SLinus Torvalds *ptr = tunnel->parms.o_key; 874*1da177e4SLinus Torvalds ptr--; 875*1da177e4SLinus Torvalds } 876*1da177e4SLinus Torvalds if (tunnel->parms.o_flags&GRE_CSUM) { 877*1da177e4SLinus Torvalds *ptr = 0; 878*1da177e4SLinus Torvalds *(__u16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr)); 879*1da177e4SLinus Torvalds } 880*1da177e4SLinus Torvalds } 881*1da177e4SLinus Torvalds 882*1da177e4SLinus Torvalds nf_reset(skb); 883*1da177e4SLinus Torvalds 884*1da177e4SLinus Torvalds IPTUNNEL_XMIT(); 885*1da177e4SLinus Torvalds tunnel->recursion--; 886*1da177e4SLinus Torvalds return 0; 887*1da177e4SLinus Torvalds 888*1da177e4SLinus Torvalds tx_error_icmp: 889*1da177e4SLinus Torvalds dst_link_failure(skb); 890*1da177e4SLinus Torvalds 891*1da177e4SLinus Torvalds tx_error: 892*1da177e4SLinus Torvalds stats->tx_errors++; 893*1da177e4SLinus Torvalds dev_kfree_skb(skb); 894*1da177e4SLinus Torvalds tunnel->recursion--; 895*1da177e4SLinus Torvalds return 0; 896*1da177e4SLinus Torvalds } 897*1da177e4SLinus Torvalds 898*1da177e4SLinus Torvalds static int 899*1da177e4SLinus Torvalds ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) 900*1da177e4SLinus Torvalds { 901*1da177e4SLinus Torvalds int err = 0; 902*1da177e4SLinus Torvalds struct ip_tunnel_parm p; 903*1da177e4SLinus Torvalds struct ip_tunnel *t; 904*1da177e4SLinus Torvalds 905*1da177e4SLinus Torvalds switch (cmd) { 906*1da177e4SLinus Torvalds case SIOCGETTUNNEL: 907*1da177e4SLinus Torvalds t = NULL; 908*1da177e4SLinus Torvalds if (dev == ipgre_fb_tunnel_dev) { 909*1da177e4SLinus Torvalds if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { 910*1da177e4SLinus Torvalds err = -EFAULT; 911*1da177e4SLinus Torvalds break; 912*1da177e4SLinus Torvalds } 913*1da177e4SLinus Torvalds t = ipgre_tunnel_locate(&p, 0); 914*1da177e4SLinus Torvalds } 915*1da177e4SLinus Torvalds if (t == NULL) 916*1da177e4SLinus Torvalds t = (struct ip_tunnel*)dev->priv; 917*1da177e4SLinus Torvalds memcpy(&p, &t->parms, sizeof(p)); 918*1da177e4SLinus Torvalds if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) 919*1da177e4SLinus Torvalds err = -EFAULT; 920*1da177e4SLinus Torvalds break; 921*1da177e4SLinus Torvalds 922*1da177e4SLinus Torvalds case SIOCADDTUNNEL: 923*1da177e4SLinus Torvalds case SIOCCHGTUNNEL: 924*1da177e4SLinus Torvalds err = -EPERM; 925*1da177e4SLinus Torvalds if (!capable(CAP_NET_ADMIN)) 926*1da177e4SLinus Torvalds goto done; 927*1da177e4SLinus Torvalds 928*1da177e4SLinus Torvalds err = -EFAULT; 929*1da177e4SLinus Torvalds if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 930*1da177e4SLinus Torvalds goto done; 931*1da177e4SLinus Torvalds 932*1da177e4SLinus Torvalds err = -EINVAL; 933*1da177e4SLinus Torvalds if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || 934*1da177e4SLinus Torvalds p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || 935*1da177e4SLinus Torvalds ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) 936*1da177e4SLinus Torvalds goto done; 937*1da177e4SLinus Torvalds if (p.iph.ttl) 938*1da177e4SLinus Torvalds p.iph.frag_off |= htons(IP_DF); 939*1da177e4SLinus Torvalds 940*1da177e4SLinus Torvalds if (!(p.i_flags&GRE_KEY)) 941*1da177e4SLinus Torvalds p.i_key = 0; 942*1da177e4SLinus Torvalds if (!(p.o_flags&GRE_KEY)) 943*1da177e4SLinus Torvalds p.o_key = 0; 944*1da177e4SLinus Torvalds 945*1da177e4SLinus Torvalds t = ipgre_tunnel_locate(&p, cmd == SIOCADDTUNNEL); 946*1da177e4SLinus Torvalds 947*1da177e4SLinus Torvalds if (dev != ipgre_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { 948*1da177e4SLinus Torvalds if (t != NULL) { 949*1da177e4SLinus Torvalds if (t->dev != dev) { 950*1da177e4SLinus Torvalds err = -EEXIST; 951*1da177e4SLinus Torvalds break; 952*1da177e4SLinus Torvalds } 953*1da177e4SLinus Torvalds } else { 954*1da177e4SLinus Torvalds unsigned nflags=0; 955*1da177e4SLinus Torvalds 956*1da177e4SLinus Torvalds t = (struct ip_tunnel*)dev->priv; 957*1da177e4SLinus Torvalds 958*1da177e4SLinus Torvalds if (MULTICAST(p.iph.daddr)) 959*1da177e4SLinus Torvalds nflags = IFF_BROADCAST; 960*1da177e4SLinus Torvalds else if (p.iph.daddr) 961*1da177e4SLinus Torvalds nflags = IFF_POINTOPOINT; 962*1da177e4SLinus Torvalds 963*1da177e4SLinus Torvalds if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { 964*1da177e4SLinus Torvalds err = -EINVAL; 965*1da177e4SLinus Torvalds break; 966*1da177e4SLinus Torvalds } 967*1da177e4SLinus Torvalds ipgre_tunnel_unlink(t); 968*1da177e4SLinus Torvalds t->parms.iph.saddr = p.iph.saddr; 969*1da177e4SLinus Torvalds t->parms.iph.daddr = p.iph.daddr; 970*1da177e4SLinus Torvalds t->parms.i_key = p.i_key; 971*1da177e4SLinus Torvalds t->parms.o_key = p.o_key; 972*1da177e4SLinus Torvalds memcpy(dev->dev_addr, &p.iph.saddr, 4); 973*1da177e4SLinus Torvalds memcpy(dev->broadcast, &p.iph.daddr, 4); 974*1da177e4SLinus Torvalds ipgre_tunnel_link(t); 975*1da177e4SLinus Torvalds netdev_state_change(dev); 976*1da177e4SLinus Torvalds } 977*1da177e4SLinus Torvalds } 978*1da177e4SLinus Torvalds 979*1da177e4SLinus Torvalds if (t) { 980*1da177e4SLinus Torvalds err = 0; 981*1da177e4SLinus Torvalds if (cmd == SIOCCHGTUNNEL) { 982*1da177e4SLinus Torvalds t->parms.iph.ttl = p.iph.ttl; 983*1da177e4SLinus Torvalds t->parms.iph.tos = p.iph.tos; 984*1da177e4SLinus Torvalds t->parms.iph.frag_off = p.iph.frag_off; 985*1da177e4SLinus Torvalds } 986*1da177e4SLinus Torvalds if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) 987*1da177e4SLinus Torvalds err = -EFAULT; 988*1da177e4SLinus Torvalds } else 989*1da177e4SLinus Torvalds err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); 990*1da177e4SLinus Torvalds break; 991*1da177e4SLinus Torvalds 992*1da177e4SLinus Torvalds case SIOCDELTUNNEL: 993*1da177e4SLinus Torvalds err = -EPERM; 994*1da177e4SLinus Torvalds if (!capable(CAP_NET_ADMIN)) 995*1da177e4SLinus Torvalds goto done; 996*1da177e4SLinus Torvalds 997*1da177e4SLinus Torvalds if (dev == ipgre_fb_tunnel_dev) { 998*1da177e4SLinus Torvalds err = -EFAULT; 999*1da177e4SLinus Torvalds if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 1000*1da177e4SLinus Torvalds goto done; 1001*1da177e4SLinus Torvalds err = -ENOENT; 1002*1da177e4SLinus Torvalds if ((t = ipgre_tunnel_locate(&p, 0)) == NULL) 1003*1da177e4SLinus Torvalds goto done; 1004*1da177e4SLinus Torvalds err = -EPERM; 1005*1da177e4SLinus Torvalds if (t == ipgre_fb_tunnel_dev->priv) 1006*1da177e4SLinus Torvalds goto done; 1007*1da177e4SLinus Torvalds dev = t->dev; 1008*1da177e4SLinus Torvalds } 1009*1da177e4SLinus Torvalds err = unregister_netdevice(dev); 1010*1da177e4SLinus Torvalds break; 1011*1da177e4SLinus Torvalds 1012*1da177e4SLinus Torvalds default: 1013*1da177e4SLinus Torvalds err = -EINVAL; 1014*1da177e4SLinus Torvalds } 1015*1da177e4SLinus Torvalds 1016*1da177e4SLinus Torvalds done: 1017*1da177e4SLinus Torvalds return err; 1018*1da177e4SLinus Torvalds } 1019*1da177e4SLinus Torvalds 1020*1da177e4SLinus Torvalds static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev) 1021*1da177e4SLinus Torvalds { 1022*1da177e4SLinus Torvalds return &(((struct ip_tunnel*)dev->priv)->stat); 1023*1da177e4SLinus Torvalds } 1024*1da177e4SLinus Torvalds 1025*1da177e4SLinus Torvalds static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) 1026*1da177e4SLinus Torvalds { 1027*1da177e4SLinus Torvalds struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; 1028*1da177e4SLinus Torvalds if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen) 1029*1da177e4SLinus Torvalds return -EINVAL; 1030*1da177e4SLinus Torvalds dev->mtu = new_mtu; 1031*1da177e4SLinus Torvalds return 0; 1032*1da177e4SLinus Torvalds } 1033*1da177e4SLinus Torvalds 1034*1da177e4SLinus Torvalds #ifdef CONFIG_NET_IPGRE_BROADCAST 1035*1da177e4SLinus Torvalds /* Nice toy. Unfortunately, useless in real life :-) 1036*1da177e4SLinus Torvalds It allows to construct virtual multiprotocol broadcast "LAN" 1037*1da177e4SLinus Torvalds over the Internet, provided multicast routing is tuned. 1038*1da177e4SLinus Torvalds 1039*1da177e4SLinus Torvalds 1040*1da177e4SLinus Torvalds I have no idea was this bicycle invented before me, 1041*1da177e4SLinus Torvalds so that I had to set ARPHRD_IPGRE to a random value. 1042*1da177e4SLinus Torvalds I have an impression, that Cisco could make something similar, 1043*1da177e4SLinus Torvalds but this feature is apparently missing in IOS<=11.2(8). 1044*1da177e4SLinus Torvalds 1045*1da177e4SLinus Torvalds I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks 1046*1da177e4SLinus Torvalds with broadcast 224.66.66.66. If you have access to mbone, play with me :-) 1047*1da177e4SLinus Torvalds 1048*1da177e4SLinus Torvalds ping -t 255 224.66.66.66 1049*1da177e4SLinus Torvalds 1050*1da177e4SLinus Torvalds If nobody answers, mbone does not work. 1051*1da177e4SLinus Torvalds 1052*1da177e4SLinus Torvalds ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255 1053*1da177e4SLinus Torvalds ip addr add 10.66.66.<somewhat>/24 dev Universe 1054*1da177e4SLinus Torvalds ifconfig Universe up 1055*1da177e4SLinus Torvalds ifconfig Universe add fe80::<Your_real_addr>/10 1056*1da177e4SLinus Torvalds ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96 1057*1da177e4SLinus Torvalds ftp 10.66.66.66 1058*1da177e4SLinus Torvalds ... 1059*1da177e4SLinus Torvalds ftp fec0:6666:6666::193.233.7.65 1060*1da177e4SLinus Torvalds ... 1061*1da177e4SLinus Torvalds 1062*1da177e4SLinus Torvalds */ 1063*1da177e4SLinus Torvalds 1064*1da177e4SLinus Torvalds static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, 1065*1da177e4SLinus Torvalds void *daddr, void *saddr, unsigned len) 1066*1da177e4SLinus Torvalds { 1067*1da177e4SLinus Torvalds struct ip_tunnel *t = (struct ip_tunnel*)dev->priv; 1068*1da177e4SLinus Torvalds struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); 1069*1da177e4SLinus Torvalds u16 *p = (u16*)(iph+1); 1070*1da177e4SLinus Torvalds 1071*1da177e4SLinus Torvalds memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); 1072*1da177e4SLinus Torvalds p[0] = t->parms.o_flags; 1073*1da177e4SLinus Torvalds p[1] = htons(type); 1074*1da177e4SLinus Torvalds 1075*1da177e4SLinus Torvalds /* 1076*1da177e4SLinus Torvalds * Set the source hardware address. 1077*1da177e4SLinus Torvalds */ 1078*1da177e4SLinus Torvalds 1079*1da177e4SLinus Torvalds if (saddr) 1080*1da177e4SLinus Torvalds memcpy(&iph->saddr, saddr, 4); 1081*1da177e4SLinus Torvalds 1082*1da177e4SLinus Torvalds if (daddr) { 1083*1da177e4SLinus Torvalds memcpy(&iph->daddr, daddr, 4); 1084*1da177e4SLinus Torvalds return t->hlen; 1085*1da177e4SLinus Torvalds } 1086*1da177e4SLinus Torvalds if (iph->daddr && !MULTICAST(iph->daddr)) 1087*1da177e4SLinus Torvalds return t->hlen; 1088*1da177e4SLinus Torvalds 1089*1da177e4SLinus Torvalds return -t->hlen; 1090*1da177e4SLinus Torvalds } 1091*1da177e4SLinus Torvalds 1092*1da177e4SLinus Torvalds static int ipgre_open(struct net_device *dev) 1093*1da177e4SLinus Torvalds { 1094*1da177e4SLinus Torvalds struct ip_tunnel *t = (struct ip_tunnel*)dev->priv; 1095*1da177e4SLinus Torvalds 1096*1da177e4SLinus Torvalds if (MULTICAST(t->parms.iph.daddr)) { 1097*1da177e4SLinus Torvalds struct flowi fl = { .oif = t->parms.link, 1098*1da177e4SLinus Torvalds .nl_u = { .ip4_u = 1099*1da177e4SLinus Torvalds { .daddr = t->parms.iph.daddr, 1100*1da177e4SLinus Torvalds .saddr = t->parms.iph.saddr, 1101*1da177e4SLinus Torvalds .tos = RT_TOS(t->parms.iph.tos) } }, 1102*1da177e4SLinus Torvalds .proto = IPPROTO_GRE }; 1103*1da177e4SLinus Torvalds struct rtable *rt; 1104*1da177e4SLinus Torvalds if (ip_route_output_key(&rt, &fl)) 1105*1da177e4SLinus Torvalds return -EADDRNOTAVAIL; 1106*1da177e4SLinus Torvalds dev = rt->u.dst.dev; 1107*1da177e4SLinus Torvalds ip_rt_put(rt); 1108*1da177e4SLinus Torvalds if (__in_dev_get(dev) == NULL) 1109*1da177e4SLinus Torvalds return -EADDRNOTAVAIL; 1110*1da177e4SLinus Torvalds t->mlink = dev->ifindex; 1111*1da177e4SLinus Torvalds ip_mc_inc_group(__in_dev_get(dev), t->parms.iph.daddr); 1112*1da177e4SLinus Torvalds } 1113*1da177e4SLinus Torvalds return 0; 1114*1da177e4SLinus Torvalds } 1115*1da177e4SLinus Torvalds 1116*1da177e4SLinus Torvalds static int ipgre_close(struct net_device *dev) 1117*1da177e4SLinus Torvalds { 1118*1da177e4SLinus Torvalds struct ip_tunnel *t = (struct ip_tunnel*)dev->priv; 1119*1da177e4SLinus Torvalds if (MULTICAST(t->parms.iph.daddr) && t->mlink) { 1120*1da177e4SLinus Torvalds struct in_device *in_dev = inetdev_by_index(t->mlink); 1121*1da177e4SLinus Torvalds if (in_dev) { 1122*1da177e4SLinus Torvalds ip_mc_dec_group(in_dev, t->parms.iph.daddr); 1123*1da177e4SLinus Torvalds in_dev_put(in_dev); 1124*1da177e4SLinus Torvalds } 1125*1da177e4SLinus Torvalds } 1126*1da177e4SLinus Torvalds return 0; 1127*1da177e4SLinus Torvalds } 1128*1da177e4SLinus Torvalds 1129*1da177e4SLinus Torvalds #endif 1130*1da177e4SLinus Torvalds 1131*1da177e4SLinus Torvalds static void ipgre_tunnel_setup(struct net_device *dev) 1132*1da177e4SLinus Torvalds { 1133*1da177e4SLinus Torvalds SET_MODULE_OWNER(dev); 1134*1da177e4SLinus Torvalds dev->uninit = ipgre_tunnel_uninit; 1135*1da177e4SLinus Torvalds dev->destructor = free_netdev; 1136*1da177e4SLinus Torvalds dev->hard_start_xmit = ipgre_tunnel_xmit; 1137*1da177e4SLinus Torvalds dev->get_stats = ipgre_tunnel_get_stats; 1138*1da177e4SLinus Torvalds dev->do_ioctl = ipgre_tunnel_ioctl; 1139*1da177e4SLinus Torvalds dev->change_mtu = ipgre_tunnel_change_mtu; 1140*1da177e4SLinus Torvalds 1141*1da177e4SLinus Torvalds dev->type = ARPHRD_IPGRE; 1142*1da177e4SLinus Torvalds dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4; 1143*1da177e4SLinus Torvalds dev->mtu = 1500 - sizeof(struct iphdr) - 4; 1144*1da177e4SLinus Torvalds dev->flags = IFF_NOARP; 1145*1da177e4SLinus Torvalds dev->iflink = 0; 1146*1da177e4SLinus Torvalds dev->addr_len = 4; 1147*1da177e4SLinus Torvalds } 1148*1da177e4SLinus Torvalds 1149*1da177e4SLinus Torvalds static int ipgre_tunnel_init(struct net_device *dev) 1150*1da177e4SLinus Torvalds { 1151*1da177e4SLinus Torvalds struct net_device *tdev = NULL; 1152*1da177e4SLinus Torvalds struct ip_tunnel *tunnel; 1153*1da177e4SLinus Torvalds struct iphdr *iph; 1154*1da177e4SLinus Torvalds int hlen = LL_MAX_HEADER; 1155*1da177e4SLinus Torvalds int mtu = 1500; 1156*1da177e4SLinus Torvalds int addend = sizeof(struct iphdr) + 4; 1157*1da177e4SLinus Torvalds 1158*1da177e4SLinus Torvalds tunnel = (struct ip_tunnel*)dev->priv; 1159*1da177e4SLinus Torvalds iph = &tunnel->parms.iph; 1160*1da177e4SLinus Torvalds 1161*1da177e4SLinus Torvalds tunnel->dev = dev; 1162*1da177e4SLinus Torvalds strcpy(tunnel->parms.name, dev->name); 1163*1da177e4SLinus Torvalds 1164*1da177e4SLinus Torvalds memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); 1165*1da177e4SLinus Torvalds memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 1166*1da177e4SLinus Torvalds 1167*1da177e4SLinus Torvalds /* Guess output device to choose reasonable mtu and hard_header_len */ 1168*1da177e4SLinus Torvalds 1169*1da177e4SLinus Torvalds if (iph->daddr) { 1170*1da177e4SLinus Torvalds struct flowi fl = { .oif = tunnel->parms.link, 1171*1da177e4SLinus Torvalds .nl_u = { .ip4_u = 1172*1da177e4SLinus Torvalds { .daddr = iph->daddr, 1173*1da177e4SLinus Torvalds .saddr = iph->saddr, 1174*1da177e4SLinus Torvalds .tos = RT_TOS(iph->tos) } }, 1175*1da177e4SLinus Torvalds .proto = IPPROTO_GRE }; 1176*1da177e4SLinus Torvalds struct rtable *rt; 1177*1da177e4SLinus Torvalds if (!ip_route_output_key(&rt, &fl)) { 1178*1da177e4SLinus Torvalds tdev = rt->u.dst.dev; 1179*1da177e4SLinus Torvalds ip_rt_put(rt); 1180*1da177e4SLinus Torvalds } 1181*1da177e4SLinus Torvalds 1182*1da177e4SLinus Torvalds dev->flags |= IFF_POINTOPOINT; 1183*1da177e4SLinus Torvalds 1184*1da177e4SLinus Torvalds #ifdef CONFIG_NET_IPGRE_BROADCAST 1185*1da177e4SLinus Torvalds if (MULTICAST(iph->daddr)) { 1186*1da177e4SLinus Torvalds if (!iph->saddr) 1187*1da177e4SLinus Torvalds return -EINVAL; 1188*1da177e4SLinus Torvalds dev->flags = IFF_BROADCAST; 1189*1da177e4SLinus Torvalds dev->hard_header = ipgre_header; 1190*1da177e4SLinus Torvalds dev->open = ipgre_open; 1191*1da177e4SLinus Torvalds dev->stop = ipgre_close; 1192*1da177e4SLinus Torvalds } 1193*1da177e4SLinus Torvalds #endif 1194*1da177e4SLinus Torvalds } 1195*1da177e4SLinus Torvalds 1196*1da177e4SLinus Torvalds if (!tdev && tunnel->parms.link) 1197*1da177e4SLinus Torvalds tdev = __dev_get_by_index(tunnel->parms.link); 1198*1da177e4SLinus Torvalds 1199*1da177e4SLinus Torvalds if (tdev) { 1200*1da177e4SLinus Torvalds hlen = tdev->hard_header_len; 1201*1da177e4SLinus Torvalds mtu = tdev->mtu; 1202*1da177e4SLinus Torvalds } 1203*1da177e4SLinus Torvalds dev->iflink = tunnel->parms.link; 1204*1da177e4SLinus Torvalds 1205*1da177e4SLinus Torvalds /* Precalculate GRE options length */ 1206*1da177e4SLinus Torvalds if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) { 1207*1da177e4SLinus Torvalds if (tunnel->parms.o_flags&GRE_CSUM) 1208*1da177e4SLinus Torvalds addend += 4; 1209*1da177e4SLinus Torvalds if (tunnel->parms.o_flags&GRE_KEY) 1210*1da177e4SLinus Torvalds addend += 4; 1211*1da177e4SLinus Torvalds if (tunnel->parms.o_flags&GRE_SEQ) 1212*1da177e4SLinus Torvalds addend += 4; 1213*1da177e4SLinus Torvalds } 1214*1da177e4SLinus Torvalds dev->hard_header_len = hlen + addend; 1215*1da177e4SLinus Torvalds dev->mtu = mtu - addend; 1216*1da177e4SLinus Torvalds tunnel->hlen = addend; 1217*1da177e4SLinus Torvalds return 0; 1218*1da177e4SLinus Torvalds } 1219*1da177e4SLinus Torvalds 1220*1da177e4SLinus Torvalds int __init ipgre_fb_tunnel_init(struct net_device *dev) 1221*1da177e4SLinus Torvalds { 1222*1da177e4SLinus Torvalds struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv; 1223*1da177e4SLinus Torvalds struct iphdr *iph = &tunnel->parms.iph; 1224*1da177e4SLinus Torvalds 1225*1da177e4SLinus Torvalds tunnel->dev = dev; 1226*1da177e4SLinus Torvalds strcpy(tunnel->parms.name, dev->name); 1227*1da177e4SLinus Torvalds 1228*1da177e4SLinus Torvalds iph->version = 4; 1229*1da177e4SLinus Torvalds iph->protocol = IPPROTO_GRE; 1230*1da177e4SLinus Torvalds iph->ihl = 5; 1231*1da177e4SLinus Torvalds tunnel->hlen = sizeof(struct iphdr) + 4; 1232*1da177e4SLinus Torvalds 1233*1da177e4SLinus Torvalds dev_hold(dev); 1234*1da177e4SLinus Torvalds tunnels_wc[0] = tunnel; 1235*1da177e4SLinus Torvalds return 0; 1236*1da177e4SLinus Torvalds } 1237*1da177e4SLinus Torvalds 1238*1da177e4SLinus Torvalds 1239*1da177e4SLinus Torvalds static struct net_protocol ipgre_protocol = { 1240*1da177e4SLinus Torvalds .handler = ipgre_rcv, 1241*1da177e4SLinus Torvalds .err_handler = ipgre_err, 1242*1da177e4SLinus Torvalds }; 1243*1da177e4SLinus Torvalds 1244*1da177e4SLinus Torvalds 1245*1da177e4SLinus Torvalds /* 1246*1da177e4SLinus Torvalds * And now the modules code and kernel interface. 1247*1da177e4SLinus Torvalds */ 1248*1da177e4SLinus Torvalds 1249*1da177e4SLinus Torvalds static int __init ipgre_init(void) 1250*1da177e4SLinus Torvalds { 1251*1da177e4SLinus Torvalds int err; 1252*1da177e4SLinus Torvalds 1253*1da177e4SLinus Torvalds printk(KERN_INFO "GRE over IPv4 tunneling driver\n"); 1254*1da177e4SLinus Torvalds 1255*1da177e4SLinus Torvalds if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) { 1256*1da177e4SLinus Torvalds printk(KERN_INFO "ipgre init: can't add protocol\n"); 1257*1da177e4SLinus Torvalds return -EAGAIN; 1258*1da177e4SLinus Torvalds } 1259*1da177e4SLinus Torvalds 1260*1da177e4SLinus Torvalds ipgre_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0", 1261*1da177e4SLinus Torvalds ipgre_tunnel_setup); 1262*1da177e4SLinus Torvalds if (!ipgre_fb_tunnel_dev) { 1263*1da177e4SLinus Torvalds err = -ENOMEM; 1264*1da177e4SLinus Torvalds goto err1; 1265*1da177e4SLinus Torvalds } 1266*1da177e4SLinus Torvalds 1267*1da177e4SLinus Torvalds ipgre_fb_tunnel_dev->init = ipgre_fb_tunnel_init; 1268*1da177e4SLinus Torvalds 1269*1da177e4SLinus Torvalds if ((err = register_netdev(ipgre_fb_tunnel_dev))) 1270*1da177e4SLinus Torvalds goto err2; 1271*1da177e4SLinus Torvalds out: 1272*1da177e4SLinus Torvalds return err; 1273*1da177e4SLinus Torvalds err2: 1274*1da177e4SLinus Torvalds free_netdev(ipgre_fb_tunnel_dev); 1275*1da177e4SLinus Torvalds err1: 1276*1da177e4SLinus Torvalds inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); 1277*1da177e4SLinus Torvalds goto out; 1278*1da177e4SLinus Torvalds } 1279*1da177e4SLinus Torvalds 1280*1da177e4SLinus Torvalds static void ipgre_fini(void) 1281*1da177e4SLinus Torvalds { 1282*1da177e4SLinus Torvalds if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) 1283*1da177e4SLinus Torvalds printk(KERN_INFO "ipgre close: can't remove protocol\n"); 1284*1da177e4SLinus Torvalds 1285*1da177e4SLinus Torvalds unregister_netdev(ipgre_fb_tunnel_dev); 1286*1da177e4SLinus Torvalds } 1287*1da177e4SLinus Torvalds 1288*1da177e4SLinus Torvalds module_init(ipgre_init); 1289*1da177e4SLinus Torvalds module_exit(ipgre_fini); 1290*1da177e4SLinus Torvalds MODULE_LICENSE("GPL"); 1291