xref: /linux/net/ipv4/ipip.c (revision 153f0943382e9ae0bff7caa110a1a4656088d0d4)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  *	Linux NET3:	IP/IP protocol decoder.
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  *	Authors:
51da177e4SLinus Torvalds  *		Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
61da177e4SLinus Torvalds  *
71da177e4SLinus Torvalds  *	Fixes:
81da177e4SLinus Torvalds  *		Alan Cox	:	Merged and made usable non modular (its so tiny its silly as
91da177e4SLinus Torvalds  *					a module taking up 2 pages).
101da177e4SLinus Torvalds  *		Alan Cox	: 	Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
111da177e4SLinus Torvalds  *					to keep ip_forward happy.
121da177e4SLinus Torvalds  *		Alan Cox	:	More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
131da177e4SLinus Torvalds  *		Kai Schulte	:	Fixed #defines for IP_FIREWALL->FIREWALL
141da177e4SLinus Torvalds  *              David Woodhouse :       Perform some basic ICMP handling.
151da177e4SLinus Torvalds  *                                      IPIP Routing without decapsulation.
161da177e4SLinus Torvalds  *              Carlos Picoto   :       GRE over IP support
171da177e4SLinus Torvalds  *		Alexey Kuznetsov:	Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
181da177e4SLinus Torvalds  *					I do not want to merge them together.
191da177e4SLinus Torvalds  *
201da177e4SLinus Torvalds  *	This program is free software; you can redistribute it and/or
211da177e4SLinus Torvalds  *	modify it under the terms of the GNU General Public License
221da177e4SLinus Torvalds  *	as published by the Free Software Foundation; either version
231da177e4SLinus Torvalds  *	2 of the License, or (at your option) any later version.
241da177e4SLinus Torvalds  *
251da177e4SLinus Torvalds  */
261da177e4SLinus Torvalds 
271da177e4SLinus Torvalds /* tunnel.c: an IP tunnel driver
281da177e4SLinus Torvalds 
291da177e4SLinus Torvalds 	The purpose of this driver is to provide an IP tunnel through
301da177e4SLinus Torvalds 	which you can tunnel network traffic transparently across subnets.
311da177e4SLinus Torvalds 
321da177e4SLinus Torvalds 	This was written by looking at Nick Holloway's dummy driver
331da177e4SLinus Torvalds 	Thanks for the great code!
341da177e4SLinus Torvalds 
351da177e4SLinus Torvalds 		-Sam Lantinga	(slouken@cs.ucdavis.edu)  02/01/95
361da177e4SLinus Torvalds 
371da177e4SLinus Torvalds 	Minor tweaks:
381da177e4SLinus Torvalds 		Cleaned up the code a little and added some pre-1.3.0 tweaks.
391da177e4SLinus Torvalds 		dev->hard_header/hard_header_len changed to use no headers.
401da177e4SLinus Torvalds 		Comments/bracketing tweaked.
411da177e4SLinus Torvalds 		Made the tunnels use dev->name not tunnel: when error reporting.
421da177e4SLinus Torvalds 		Added tx_dropped stat
431da177e4SLinus Torvalds 
44113aa838SAlan Cox 		-Alan Cox	(alan@lxorguk.ukuu.org.uk) 21 March 95
451da177e4SLinus Torvalds 
461da177e4SLinus Torvalds 	Reworked:
471da177e4SLinus Torvalds 		Changed to tunnel to destination gateway in addition to the
481da177e4SLinus Torvalds 			tunnel's pointopoint address
491da177e4SLinus Torvalds 		Almost completely rewritten
501da177e4SLinus Torvalds 		Note:  There is currently no firewall or ICMP handling done.
511da177e4SLinus Torvalds 
521da177e4SLinus Torvalds 		-Sam Lantinga	(slouken@cs.ucdavis.edu) 02/13/96
531da177e4SLinus Torvalds 
541da177e4SLinus Torvalds */
551da177e4SLinus Torvalds 
561da177e4SLinus Torvalds /* Things I wish I had known when writing the tunnel driver:
571da177e4SLinus Torvalds 
581da177e4SLinus Torvalds 	When the tunnel_xmit() function is called, the skb contains the
591da177e4SLinus Torvalds 	packet to be sent (plus a great deal of extra info), and dev
601da177e4SLinus Torvalds 	contains the tunnel device that _we_ are.
611da177e4SLinus Torvalds 
621da177e4SLinus Torvalds 	When we are passed a packet, we are expected to fill in the
631da177e4SLinus Torvalds 	source address with our source IP address.
641da177e4SLinus Torvalds 
651da177e4SLinus Torvalds 	What is the proper way to allocate, copy and free a buffer?
661da177e4SLinus Torvalds 	After you allocate it, it is a "0 length" chunk of memory
671da177e4SLinus Torvalds 	starting at zero.  If you want to add headers to the buffer
681da177e4SLinus Torvalds 	later, you'll have to call "skb_reserve(skb, amount)" with
691da177e4SLinus Torvalds 	the amount of memory you want reserved.  Then, you call
701da177e4SLinus Torvalds 	"skb_put(skb, amount)" with the amount of space you want in
711da177e4SLinus Torvalds 	the buffer.  skb_put() returns a pointer to the top (#0) of
721da177e4SLinus Torvalds 	that buffer.  skb->len is set to the amount of space you have
731da177e4SLinus Torvalds 	"allocated" with skb_put().  You can then write up to skb->len
741da177e4SLinus Torvalds 	bytes to that buffer.  If you need more, you can call skb_put()
751da177e4SLinus Torvalds 	again with the additional amount of space you need.  You can
761da177e4SLinus Torvalds 	find out how much more space you can allocate by calling
771da177e4SLinus Torvalds 	"skb_tailroom(skb)".
781da177e4SLinus Torvalds 	Now, to add header space, call "skb_push(skb, header_len)".
791da177e4SLinus Torvalds 	This creates space at the beginning of the buffer and returns
801da177e4SLinus Torvalds 	a pointer to this new space.  If later you need to strip a
811da177e4SLinus Torvalds 	header from a buffer, call "skb_pull(skb, header_len)".
821da177e4SLinus Torvalds 	skb_headroom() will return how much space is left at the top
831da177e4SLinus Torvalds 	of the buffer (before the main data).  Remember, this headroom
841da177e4SLinus Torvalds 	space must be reserved before the skb_put() function is called.
851da177e4SLinus Torvalds 	*/
861da177e4SLinus Torvalds 
871da177e4SLinus Torvalds /*
881da177e4SLinus Torvalds    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
891da177e4SLinus Torvalds 
901da177e4SLinus Torvalds    For comments look at net/ipv4/ip_gre.c --ANK
911da177e4SLinus Torvalds  */
921da177e4SLinus Torvalds 
931da177e4SLinus Torvalds 
944fc268d2SRandy Dunlap #include <linux/capability.h>
951da177e4SLinus Torvalds #include <linux/module.h>
961da177e4SLinus Torvalds #include <linux/types.h>
971da177e4SLinus Torvalds #include <linux/kernel.h>
985a0e3ad6STejun Heo #include <linux/slab.h>
991da177e4SLinus Torvalds #include <asm/uaccess.h>
1001da177e4SLinus Torvalds #include <linux/skbuff.h>
1011da177e4SLinus Torvalds #include <linux/netdevice.h>
1021da177e4SLinus Torvalds #include <linux/in.h>
1031da177e4SLinus Torvalds #include <linux/tcp.h>
1041da177e4SLinus Torvalds #include <linux/udp.h>
1051da177e4SLinus Torvalds #include <linux/if_arp.h>
1061da177e4SLinus Torvalds #include <linux/mroute.h>
1071da177e4SLinus Torvalds #include <linux/init.h>
1081da177e4SLinus Torvalds #include <linux/netfilter_ipv4.h>
10946f25dffSKris Katterjohn #include <linux/if_ether.h>
1101da177e4SLinus Torvalds 
1111da177e4SLinus Torvalds #include <net/sock.h>
1121da177e4SLinus Torvalds #include <net/ip.h>
1131da177e4SLinus Torvalds #include <net/icmp.h>
1141da177e4SLinus Torvalds #include <net/ipip.h>
1151da177e4SLinus Torvalds #include <net/inet_ecn.h>
1161da177e4SLinus Torvalds #include <net/xfrm.h>
11710dc4c7bSPavel Emelyanov #include <net/net_namespace.h>
11810dc4c7bSPavel Emelyanov #include <net/netns/generic.h>
1191da177e4SLinus Torvalds 
1201da177e4SLinus Torvalds #define HASH_SIZE  16
121d5a0a1e3SAl Viro #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
1221da177e4SLinus Torvalds 
123f99189b1SEric Dumazet static int ipip_net_id __read_mostly;
12410dc4c7bSPavel Emelyanov struct ipip_net {
125b7285b79SEric Dumazet 	struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
126b7285b79SEric Dumazet 	struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
127b7285b79SEric Dumazet 	struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
128b7285b79SEric Dumazet 	struct ip_tunnel __rcu *tunnels_wc[1];
129b7285b79SEric Dumazet 	struct ip_tunnel __rcu **tunnels[4];
13044d3c299SPavel Emelyanov 
131b9855c54SPavel Emelyanov 	struct net_device *fb_tunnel_dev;
13210dc4c7bSPavel Emelyanov };
13310dc4c7bSPavel Emelyanov 
1343c97af99SEric Dumazet static int ipip_tunnel_init(struct net_device *dev);
1351da177e4SLinus Torvalds static void ipip_tunnel_setup(struct net_device *dev);
1363c97af99SEric Dumazet static void ipip_dev_free(struct net_device *dev);
1371da177e4SLinus Torvalds 
1388f95dd63SEric Dumazet /*
139b7285b79SEric Dumazet  * Locking : hash tables are protected by RCU and RTNL
1408f95dd63SEric Dumazet  */
1418f95dd63SEric Dumazet 
1428f95dd63SEric Dumazet #define for_each_ip_tunnel_rcu(start) \
1438f95dd63SEric Dumazet 	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
1441da177e4SLinus Torvalds 
1453c97af99SEric Dumazet /* often modified stats are per cpu, other are shared (netdev->stats) */
1463c97af99SEric Dumazet struct pcpu_tstats {
1473c97af99SEric Dumazet 	unsigned long	rx_packets;
1483c97af99SEric Dumazet 	unsigned long	rx_bytes;
1493c97af99SEric Dumazet 	unsigned long	tx_packets;
1503c97af99SEric Dumazet 	unsigned long	tx_bytes;
1513c97af99SEric Dumazet };
1523c97af99SEric Dumazet 
1533c97af99SEric Dumazet static struct net_device_stats *ipip_get_stats(struct net_device *dev)
1543c97af99SEric Dumazet {
1553c97af99SEric Dumazet 	struct pcpu_tstats sum = { 0 };
1563c97af99SEric Dumazet 	int i;
1573c97af99SEric Dumazet 
1583c97af99SEric Dumazet 	for_each_possible_cpu(i) {
1593c97af99SEric Dumazet 		const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
1603c97af99SEric Dumazet 
1613c97af99SEric Dumazet 		sum.rx_packets += tstats->rx_packets;
1623c97af99SEric Dumazet 		sum.rx_bytes   += tstats->rx_bytes;
1633c97af99SEric Dumazet 		sum.tx_packets += tstats->tx_packets;
1643c97af99SEric Dumazet 		sum.tx_bytes   += tstats->tx_bytes;
1653c97af99SEric Dumazet 	}
1663c97af99SEric Dumazet 	dev->stats.rx_packets = sum.rx_packets;
1673c97af99SEric Dumazet 	dev->stats.rx_bytes   = sum.rx_bytes;
1683c97af99SEric Dumazet 	dev->stats.tx_packets = sum.tx_packets;
1693c97af99SEric Dumazet 	dev->stats.tx_bytes   = sum.tx_bytes;
1703c97af99SEric Dumazet 	return &dev->stats;
1713c97af99SEric Dumazet }
1723c97af99SEric Dumazet 
173b9fae5c9SPavel Emelyanov static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
174b9fae5c9SPavel Emelyanov 		__be32 remote, __be32 local)
1751da177e4SLinus Torvalds {
176b7285b79SEric Dumazet 	unsigned int h0 = HASH(remote);
177b7285b79SEric Dumazet 	unsigned int h1 = HASH(local);
1781da177e4SLinus Torvalds 	struct ip_tunnel *t;
17944d3c299SPavel Emelyanov 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
1801da177e4SLinus Torvalds 
1818f95dd63SEric Dumazet 	for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
1821da177e4SLinus Torvalds 		if (local == t->parms.iph.saddr &&
1831da177e4SLinus Torvalds 		    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
1841da177e4SLinus Torvalds 			return t;
1858f95dd63SEric Dumazet 
1868f95dd63SEric Dumazet 	for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
1871da177e4SLinus Torvalds 		if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
1881da177e4SLinus Torvalds 			return t;
1898f95dd63SEric Dumazet 
1908f95dd63SEric Dumazet 	for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
1911da177e4SLinus Torvalds 		if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
1921da177e4SLinus Torvalds 			return t;
1938f95dd63SEric Dumazet 
1948f95dd63SEric Dumazet 	t = rcu_dereference(ipn->tunnels_wc[0]);
1958f95dd63SEric Dumazet 	if (t && (t->dev->flags&IFF_UP))
1961da177e4SLinus Torvalds 		return t;
1971da177e4SLinus Torvalds 	return NULL;
1981da177e4SLinus Torvalds }
1991da177e4SLinus Torvalds 
200b7285b79SEric Dumazet static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
201b9fae5c9SPavel Emelyanov 		struct ip_tunnel_parm *parms)
2021da177e4SLinus Torvalds {
20387d1a164SYOSHIFUJI Hideaki 	__be32 remote = parms->iph.daddr;
20487d1a164SYOSHIFUJI Hideaki 	__be32 local = parms->iph.saddr;
205b7285b79SEric Dumazet 	unsigned int h = 0;
2061da177e4SLinus Torvalds 	int prio = 0;
2071da177e4SLinus Torvalds 
2081da177e4SLinus Torvalds 	if (remote) {
2091da177e4SLinus Torvalds 		prio |= 2;
2101da177e4SLinus Torvalds 		h ^= HASH(remote);
2111da177e4SLinus Torvalds 	}
2121da177e4SLinus Torvalds 	if (local) {
2131da177e4SLinus Torvalds 		prio |= 1;
2141da177e4SLinus Torvalds 		h ^= HASH(local);
2151da177e4SLinus Torvalds 	}
21644d3c299SPavel Emelyanov 	return &ipn->tunnels[prio][h];
2171da177e4SLinus Torvalds }
2181da177e4SLinus Torvalds 
219b7285b79SEric Dumazet static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
220b9fae5c9SPavel Emelyanov 		struct ip_tunnel *t)
22187d1a164SYOSHIFUJI Hideaki {
222b9fae5c9SPavel Emelyanov 	return __ipip_bucket(ipn, &t->parms);
22387d1a164SYOSHIFUJI Hideaki }
2241da177e4SLinus Torvalds 
225b9fae5c9SPavel Emelyanov static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
2261da177e4SLinus Torvalds {
227b7285b79SEric Dumazet 	struct ip_tunnel __rcu **tp;
228b7285b79SEric Dumazet 	struct ip_tunnel *iter;
2291da177e4SLinus Torvalds 
230b7285b79SEric Dumazet 	for (tp = ipip_bucket(ipn, t);
231b7285b79SEric Dumazet 	     (iter = rtnl_dereference(*tp)) != NULL;
232b7285b79SEric Dumazet 	     tp = &iter->next) {
233b7285b79SEric Dumazet 		if (t == iter) {
234b7285b79SEric Dumazet 			rcu_assign_pointer(*tp, t->next);
2351da177e4SLinus Torvalds 			break;
2361da177e4SLinus Torvalds 		}
2371da177e4SLinus Torvalds 	}
2381da177e4SLinus Torvalds }
2391da177e4SLinus Torvalds 
240b9fae5c9SPavel Emelyanov static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
2411da177e4SLinus Torvalds {
242b7285b79SEric Dumazet 	struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
2431da177e4SLinus Torvalds 
244b7285b79SEric Dumazet 	rcu_assign_pointer(t->next, rtnl_dereference(*tp));
2458f95dd63SEric Dumazet 	rcu_assign_pointer(*tp, t);
2461da177e4SLinus Torvalds }
2471da177e4SLinus Torvalds 
248b9fae5c9SPavel Emelyanov static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
249b9fae5c9SPavel Emelyanov 		struct ip_tunnel_parm *parms, int create)
2501da177e4SLinus Torvalds {
251d5a0a1e3SAl Viro 	__be32 remote = parms->iph.daddr;
252d5a0a1e3SAl Viro 	__be32 local = parms->iph.saddr;
253b7285b79SEric Dumazet 	struct ip_tunnel *t, *nt;
254b7285b79SEric Dumazet 	struct ip_tunnel __rcu **tp;
2551da177e4SLinus Torvalds 	struct net_device *dev;
2561da177e4SLinus Torvalds 	char name[IFNAMSIZ];
257b9fae5c9SPavel Emelyanov 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
2581da177e4SLinus Torvalds 
259b7285b79SEric Dumazet 	for (tp = __ipip_bucket(ipn, parms);
260b7285b79SEric Dumazet 		 (t = rtnl_dereference(*tp)) != NULL;
261b7285b79SEric Dumazet 		 tp = &t->next) {
2621da177e4SLinus Torvalds 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
2631da177e4SLinus Torvalds 			return t;
2641da177e4SLinus Torvalds 	}
2651da177e4SLinus Torvalds 	if (!create)
2661da177e4SLinus Torvalds 		return NULL;
2671da177e4SLinus Torvalds 
2681da177e4SLinus Torvalds 	if (parms->name[0])
2691da177e4SLinus Torvalds 		strlcpy(name, parms->name, IFNAMSIZ);
27034cc7ba6SPavel Emelyanov 	else
2713c97af99SEric Dumazet 		strcpy(name, "tunl%d");
2721da177e4SLinus Torvalds 
2731da177e4SLinus Torvalds 	dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
2741da177e4SLinus Torvalds 	if (dev == NULL)
2751da177e4SLinus Torvalds 		return NULL;
2761da177e4SLinus Torvalds 
2770a826406SPavel Emelyanov 	dev_net_set(dev, net);
2780a826406SPavel Emelyanov 
279b37d428bSPavel Emelyanov 	if (strchr(name, '%')) {
280b37d428bSPavel Emelyanov 		if (dev_alloc_name(dev, name) < 0)
281b37d428bSPavel Emelyanov 			goto failed_free;
282b37d428bSPavel Emelyanov 	}
283b37d428bSPavel Emelyanov 
2842941a486SPatrick McHardy 	nt = netdev_priv(dev);
2851da177e4SLinus Torvalds 	nt->parms = *parms;
2861da177e4SLinus Torvalds 
2873c97af99SEric Dumazet 	if (ipip_tunnel_init(dev) < 0)
2883c97af99SEric Dumazet 		goto failed_free;
28923a12b14SStephen Hemminger 
290b37d428bSPavel Emelyanov 	if (register_netdevice(dev) < 0)
291b37d428bSPavel Emelyanov 		goto failed_free;
2921da177e4SLinus Torvalds 
2931da177e4SLinus Torvalds 	dev_hold(dev);
294b9fae5c9SPavel Emelyanov 	ipip_tunnel_link(ipn, nt);
2951da177e4SLinus Torvalds 	return nt;
2961da177e4SLinus Torvalds 
297b37d428bSPavel Emelyanov failed_free:
2983c97af99SEric Dumazet 	ipip_dev_free(dev);
2991da177e4SLinus Torvalds 	return NULL;
3001da177e4SLinus Torvalds }
3011da177e4SLinus Torvalds 
302b7285b79SEric Dumazet /* called with RTNL */
3031da177e4SLinus Torvalds static void ipip_tunnel_uninit(struct net_device *dev)
3041da177e4SLinus Torvalds {
305b9855c54SPavel Emelyanov 	struct net *net = dev_net(dev);
306b9855c54SPavel Emelyanov 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
307b9855c54SPavel Emelyanov 
308b7285b79SEric Dumazet 	if (dev == ipn->fb_tunnel_dev)
309b7285b79SEric Dumazet 		rcu_assign_pointer(ipn->tunnels_wc[0], NULL);
310b7285b79SEric Dumazet 	else
311b9fae5c9SPavel Emelyanov 		ipip_tunnel_unlink(ipn, netdev_priv(dev));
3121da177e4SLinus Torvalds 	dev_put(dev);
3131da177e4SLinus Torvalds }
3141da177e4SLinus Torvalds 
315d2acc347SHerbert Xu static int ipip_err(struct sk_buff *skb, u32 info)
3161da177e4SLinus Torvalds {
3171da177e4SLinus Torvalds 
318071f92d0SRami Rosen /* All the routers (except for Linux) return only
3191da177e4SLinus Torvalds    8 bytes of packet payload. It means, that precise relaying of
3201da177e4SLinus Torvalds    ICMP in the real Internet is absolutely infeasible.
3211da177e4SLinus Torvalds  */
3221da177e4SLinus Torvalds 	struct iphdr *iph = (struct iphdr *)skb->data;
32388c7664fSArnaldo Carvalho de Melo 	const int type = icmp_hdr(skb)->type;
32488c7664fSArnaldo Carvalho de Melo 	const int code = icmp_hdr(skb)->code;
3251da177e4SLinus Torvalds 	struct ip_tunnel *t;
326d2acc347SHerbert Xu 	int err;
3271da177e4SLinus Torvalds 
3281da177e4SLinus Torvalds 	switch (type) {
3291da177e4SLinus Torvalds 	default:
3301da177e4SLinus Torvalds 	case ICMP_PARAMETERPROB:
331d2acc347SHerbert Xu 		return 0;
3321da177e4SLinus Torvalds 
3331da177e4SLinus Torvalds 	case ICMP_DEST_UNREACH:
3341da177e4SLinus Torvalds 		switch (code) {
3351da177e4SLinus Torvalds 		case ICMP_SR_FAILED:
3361da177e4SLinus Torvalds 		case ICMP_PORT_UNREACH:
3371da177e4SLinus Torvalds 			/* Impossible event. */
338d2acc347SHerbert Xu 			return 0;
3391da177e4SLinus Torvalds 		case ICMP_FRAG_NEEDED:
3401da177e4SLinus Torvalds 			/* Soft state for pmtu is maintained by IP core. */
341d2acc347SHerbert Xu 			return 0;
3421da177e4SLinus Torvalds 		default:
3431da177e4SLinus Torvalds 			/* All others are translated to HOST_UNREACH.
3441da177e4SLinus Torvalds 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
3451da177e4SLinus Torvalds 			   I believe they are just ether pollution. --ANK
3461da177e4SLinus Torvalds 			 */
3471da177e4SLinus Torvalds 			break;
3481da177e4SLinus Torvalds 		}
3491da177e4SLinus Torvalds 		break;
3501da177e4SLinus Torvalds 	case ICMP_TIME_EXCEEDED:
3511da177e4SLinus Torvalds 		if (code != ICMP_EXC_TTL)
352d2acc347SHerbert Xu 			return 0;
3531da177e4SLinus Torvalds 		break;
3541da177e4SLinus Torvalds 	}
3551da177e4SLinus Torvalds 
356d2acc347SHerbert Xu 	err = -ENOENT;
357d2acc347SHerbert Xu 
3588f95dd63SEric Dumazet 	rcu_read_lock();
359cec3ffaeSPavel Emelyanov 	t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
3601da177e4SLinus Torvalds 	if (t == NULL || t->parms.iph.daddr == 0)
3611da177e4SLinus Torvalds 		goto out;
362d2acc347SHerbert Xu 
363d2acc347SHerbert Xu 	err = 0;
3641da177e4SLinus Torvalds 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
3651da177e4SLinus Torvalds 		goto out;
3661da177e4SLinus Torvalds 
36726d94b46SWei Yongjun 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
3681da177e4SLinus Torvalds 		t->err_count++;
3691da177e4SLinus Torvalds 	else
3701da177e4SLinus Torvalds 		t->err_count = 1;
3711da177e4SLinus Torvalds 	t->err_time = jiffies;
3721da177e4SLinus Torvalds out:
3738f95dd63SEric Dumazet 	rcu_read_unlock();
374d2acc347SHerbert Xu 	return err;
3751da177e4SLinus Torvalds }
3761da177e4SLinus Torvalds 
377eddc9ec5SArnaldo Carvalho de Melo static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
378eddc9ec5SArnaldo Carvalho de Melo 					struct sk_buff *skb)
3791da177e4SLinus Torvalds {
380eddc9ec5SArnaldo Carvalho de Melo 	struct iphdr *inner_iph = ip_hdr(skb);
3811da177e4SLinus Torvalds 
3821da177e4SLinus Torvalds 	if (INET_ECN_is_ce(outer_iph->tos))
3831da177e4SLinus Torvalds 		IP_ECN_set_ce(inner_iph);
3841da177e4SLinus Torvalds }
3851da177e4SLinus Torvalds 
3861da177e4SLinus Torvalds static int ipip_rcv(struct sk_buff *skb)
3871da177e4SLinus Torvalds {
3881da177e4SLinus Torvalds 	struct ip_tunnel *tunnel;
389eddc9ec5SArnaldo Carvalho de Melo 	const struct iphdr *iph = ip_hdr(skb);
3901da177e4SLinus Torvalds 
3918f95dd63SEric Dumazet 	rcu_read_lock();
3923c97af99SEric Dumazet 	tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
3933c97af99SEric Dumazet 	if (tunnel != NULL) {
3943c97af99SEric Dumazet 		struct pcpu_tstats *tstats;
3953c97af99SEric Dumazet 
3961da177e4SLinus Torvalds 		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
3978f95dd63SEric Dumazet 			rcu_read_unlock();
3981da177e4SLinus Torvalds 			kfree_skb(skb);
3991da177e4SLinus Torvalds 			return 0;
4001da177e4SLinus Torvalds 		}
4011da177e4SLinus Torvalds 
4021da177e4SLinus Torvalds 		secpath_reset(skb);
4031da177e4SLinus Torvalds 
404b0e380b1SArnaldo Carvalho de Melo 		skb->mac_header = skb->network_header;
405c1d2bbe1SArnaldo Carvalho de Melo 		skb_reset_network_header(skb);
4061da177e4SLinus Torvalds 		skb->protocol = htons(ETH_P_IP);
4071da177e4SLinus Torvalds 		skb->pkt_type = PACKET_HOST;
4081da177e4SLinus Torvalds 
4093c97af99SEric Dumazet 		tstats = this_cpu_ptr(tunnel->dev->tstats);
4103c97af99SEric Dumazet 		tstats->rx_packets++;
4113c97af99SEric Dumazet 		tstats->rx_bytes += skb->len;
4123c97af99SEric Dumazet 
4133c97af99SEric Dumazet 		__skb_tunnel_rx(skb, tunnel->dev);
414d19d56ddSEric Dumazet 
4151da177e4SLinus Torvalds 		ipip_ecn_decapsulate(iph, skb);
4168990f468SEric Dumazet 
4178990f468SEric Dumazet 		if (netif_rx(skb) == NET_RX_DROP)
4188990f468SEric Dumazet 			tunnel->dev->stats.rx_dropped++;
4198990f468SEric Dumazet 
4208f95dd63SEric Dumazet 		rcu_read_unlock();
4211da177e4SLinus Torvalds 		return 0;
4221da177e4SLinus Torvalds 	}
4238f95dd63SEric Dumazet 	rcu_read_unlock();
4241da177e4SLinus Torvalds 
4251da177e4SLinus Torvalds 	return -1;
4261da177e4SLinus Torvalds }
4271da177e4SLinus Torvalds 
4281da177e4SLinus Torvalds /*
4291da177e4SLinus Torvalds  *	This function assumes it is being called from dev_queue_xmit()
4301da177e4SLinus Torvalds  *	and that skb is filled properly by that function.
4311da177e4SLinus Torvalds  */
4321da177e4SLinus Torvalds 
4336fef4c0cSStephen Hemminger static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
4341da177e4SLinus Torvalds {
4352941a486SPatrick McHardy 	struct ip_tunnel *tunnel = netdev_priv(dev);
4363c97af99SEric Dumazet 	struct pcpu_tstats *tstats;
4371da177e4SLinus Torvalds 	struct iphdr  *tiph = &tunnel->parms.iph;
4381da177e4SLinus Torvalds 	u8     tos = tunnel->parms.iph.tos;
439d5a0a1e3SAl Viro 	__be16 df = tiph->frag_off;
4401da177e4SLinus Torvalds 	struct rtable *rt;     			/* Route to the other host */
4411da177e4SLinus Torvalds 	struct net_device *tdev;		/* Device to other host */
442eddc9ec5SArnaldo Carvalho de Melo 	struct iphdr  *old_iph = ip_hdr(skb);
4431da177e4SLinus Torvalds 	struct iphdr  *iph;			/* Our new IP header */
444c2636b4dSChuck Lever 	unsigned int max_headroom;		/* The extra header space needed */
445d5a0a1e3SAl Viro 	__be32 dst = tiph->daddr;
4461da177e4SLinus Torvalds 	int    mtu;
4471da177e4SLinus Torvalds 
4481da177e4SLinus Torvalds 	if (skb->protocol != htons(ETH_P_IP))
4491da177e4SLinus Torvalds 		goto tx_error;
4501da177e4SLinus Torvalds 
4511da177e4SLinus Torvalds 	if (tos & 1)
4521da177e4SLinus Torvalds 		tos = old_iph->tos;
4531da177e4SLinus Torvalds 
4541da177e4SLinus Torvalds 	if (!dst) {
4551da177e4SLinus Torvalds 		/* NBMA tunnel */
456511c3f92SEric Dumazet 		if ((rt = skb_rtable(skb)) == NULL) {
4573c97af99SEric Dumazet 			dev->stats.tx_fifo_errors++;
4581da177e4SLinus Torvalds 			goto tx_error;
4591da177e4SLinus Torvalds 		}
4601da177e4SLinus Torvalds 		if ((dst = rt->rt_gateway) == 0)
4611da177e4SLinus Torvalds 			goto tx_error_icmp;
4621da177e4SLinus Torvalds 	}
4631da177e4SLinus Torvalds 
4641da177e4SLinus Torvalds 	{
4653c97af99SEric Dumazet 		struct flowi fl = {
4663c97af99SEric Dumazet 			.oif = tunnel->parms.link,
4673c97af99SEric Dumazet 			.nl_u = {
4683c97af99SEric Dumazet 				.ip4_u = {
4693c97af99SEric Dumazet 					.daddr = dst,
4701da177e4SLinus Torvalds 					.saddr = tiph->saddr,
4713c97af99SEric Dumazet 					.tos = RT_TOS(tos)
4723c97af99SEric Dumazet 				}
4733c97af99SEric Dumazet 			},
4743c97af99SEric Dumazet 			.proto = IPPROTO_IPIP
4753c97af99SEric Dumazet 		};
4763c97af99SEric Dumazet 
477b99f0152SPavel Emelyanov 		if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
4783c97af99SEric Dumazet 			dev->stats.tx_carrier_errors++;
4791da177e4SLinus Torvalds 			goto tx_error_icmp;
4801da177e4SLinus Torvalds 		}
4811da177e4SLinus Torvalds 	}
482d8d1f30bSChangli Gao 	tdev = rt->dst.dev;
4831da177e4SLinus Torvalds 
4841da177e4SLinus Torvalds 	if (tdev == dev) {
4851da177e4SLinus Torvalds 		ip_rt_put(rt);
4863c97af99SEric Dumazet 		dev->stats.collisions++;
4871da177e4SLinus Torvalds 		goto tx_error;
4881da177e4SLinus Torvalds 	}
4891da177e4SLinus Torvalds 
49023ca0c98SHerbert Xu 	df |= old_iph->frag_off & htons(IP_DF);
49123ca0c98SHerbert Xu 
49223ca0c98SHerbert Xu 	if (df) {
493d8d1f30bSChangli Gao 		mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
4941da177e4SLinus Torvalds 
4951da177e4SLinus Torvalds 		if (mtu < 68) {
4963c97af99SEric Dumazet 			dev->stats.collisions++;
4971da177e4SLinus Torvalds 			ip_rt_put(rt);
4981da177e4SLinus Torvalds 			goto tx_error;
4991da177e4SLinus Torvalds 		}
50023ca0c98SHerbert Xu 
501adf30907SEric Dumazet 		if (skb_dst(skb))
502adf30907SEric Dumazet 			skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
5031da177e4SLinus Torvalds 
50423ca0c98SHerbert Xu 		if ((old_iph->frag_off & htons(IP_DF)) &&
50523ca0c98SHerbert Xu 		    mtu < ntohs(old_iph->tot_len)) {
50623ca0c98SHerbert Xu 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
50723ca0c98SHerbert Xu 				  htonl(mtu));
5081da177e4SLinus Torvalds 			ip_rt_put(rt);
5091da177e4SLinus Torvalds 			goto tx_error;
5101da177e4SLinus Torvalds 		}
51123ca0c98SHerbert Xu 	}
5121da177e4SLinus Torvalds 
5131da177e4SLinus Torvalds 	if (tunnel->err_count > 0) {
51426d94b46SWei Yongjun 		if (time_before(jiffies,
51526d94b46SWei Yongjun 				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
5161da177e4SLinus Torvalds 			tunnel->err_count--;
5171da177e4SLinus Torvalds 			dst_link_failure(skb);
5181da177e4SLinus Torvalds 		} else
5191da177e4SLinus Torvalds 			tunnel->err_count = 0;
5201da177e4SLinus Torvalds 	}
5211da177e4SLinus Torvalds 
5221da177e4SLinus Torvalds 	/*
5231da177e4SLinus Torvalds 	 * Okay, now see if we can stuff it in the buffer as-is.
5241da177e4SLinus Torvalds 	 */
5251da177e4SLinus Torvalds 	max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
5261da177e4SLinus Torvalds 
527cfbba49dSPatrick McHardy 	if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
528cfbba49dSPatrick McHardy 	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
5291da177e4SLinus Torvalds 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
5301da177e4SLinus Torvalds 		if (!new_skb) {
5311da177e4SLinus Torvalds 			ip_rt_put(rt);
5323c97af99SEric Dumazet 			dev->stats.tx_dropped++;
5331da177e4SLinus Torvalds 			dev_kfree_skb(skb);
5346ed10654SPatrick McHardy 			return NETDEV_TX_OK;
5351da177e4SLinus Torvalds 		}
5361da177e4SLinus Torvalds 		if (skb->sk)
5371da177e4SLinus Torvalds 			skb_set_owner_w(new_skb, skb->sk);
5381da177e4SLinus Torvalds 		dev_kfree_skb(skb);
5391da177e4SLinus Torvalds 		skb = new_skb;
540eddc9ec5SArnaldo Carvalho de Melo 		old_iph = ip_hdr(skb);
5411da177e4SLinus Torvalds 	}
5421da177e4SLinus Torvalds 
543b0e380b1SArnaldo Carvalho de Melo 	skb->transport_header = skb->network_header;
544e2d1bca7SArnaldo Carvalho de Melo 	skb_push(skb, sizeof(struct iphdr));
545e2d1bca7SArnaldo Carvalho de Melo 	skb_reset_network_header(skb);
5461da177e4SLinus Torvalds 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
54748d5cad8SPatrick McHardy 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
54848d5cad8SPatrick McHardy 			      IPSKB_REROUTED);
549adf30907SEric Dumazet 	skb_dst_drop(skb);
550d8d1f30bSChangli Gao 	skb_dst_set(skb, &rt->dst);
5511da177e4SLinus Torvalds 
5521da177e4SLinus Torvalds 	/*
5531da177e4SLinus Torvalds 	 *	Push down and install the IPIP header.
5541da177e4SLinus Torvalds 	 */
5551da177e4SLinus Torvalds 
556eddc9ec5SArnaldo Carvalho de Melo 	iph 			=	ip_hdr(skb);
5571da177e4SLinus Torvalds 	iph->version		=	4;
5581da177e4SLinus Torvalds 	iph->ihl		=	sizeof(struct iphdr)>>2;
5591da177e4SLinus Torvalds 	iph->frag_off		=	df;
5601da177e4SLinus Torvalds 	iph->protocol		=	IPPROTO_IPIP;
5611da177e4SLinus Torvalds 	iph->tos		=	INET_ECN_encapsulate(tos, old_iph->tos);
5621da177e4SLinus Torvalds 	iph->daddr		=	rt->rt_dst;
5631da177e4SLinus Torvalds 	iph->saddr		=	rt->rt_src;
5641da177e4SLinus Torvalds 
5651da177e4SLinus Torvalds 	if ((iph->ttl = tiph->ttl) == 0)
5661da177e4SLinus Torvalds 		iph->ttl	=	old_iph->ttl;
5671da177e4SLinus Torvalds 
5681da177e4SLinus Torvalds 	nf_reset(skb);
5693c97af99SEric Dumazet 	tstats = this_cpu_ptr(dev->tstats);
5703c97af99SEric Dumazet 	__IPTUNNEL_XMIT(tstats, &dev->stats);
5716ed10654SPatrick McHardy 	return NETDEV_TX_OK;
5721da177e4SLinus Torvalds 
5731da177e4SLinus Torvalds tx_error_icmp:
5741da177e4SLinus Torvalds 	dst_link_failure(skb);
5751da177e4SLinus Torvalds tx_error:
5763c97af99SEric Dumazet 	dev->stats.tx_errors++;
5771da177e4SLinus Torvalds 	dev_kfree_skb(skb);
5786ed10654SPatrick McHardy 	return NETDEV_TX_OK;
5791da177e4SLinus Torvalds }
5801da177e4SLinus Torvalds 
5815533995bSMichal Schmidt static void ipip_tunnel_bind_dev(struct net_device *dev)
5825533995bSMichal Schmidt {
5835533995bSMichal Schmidt 	struct net_device *tdev = NULL;
5845533995bSMichal Schmidt 	struct ip_tunnel *tunnel;
5855533995bSMichal Schmidt 	struct iphdr *iph;
5865533995bSMichal Schmidt 
5875533995bSMichal Schmidt 	tunnel = netdev_priv(dev);
5885533995bSMichal Schmidt 	iph = &tunnel->parms.iph;
5895533995bSMichal Schmidt 
5905533995bSMichal Schmidt 	if (iph->daddr) {
5913c97af99SEric Dumazet 		struct flowi fl = {
5923c97af99SEric Dumazet 			.oif = tunnel->parms.link,
5933c97af99SEric Dumazet 			.nl_u = {
5943c97af99SEric Dumazet 				.ip4_u = {
5953c97af99SEric Dumazet 					.daddr = iph->daddr,
5965533995bSMichal Schmidt 					.saddr = iph->saddr,
5973c97af99SEric Dumazet 					.tos = RT_TOS(iph->tos)
5983c97af99SEric Dumazet 				}
5993c97af99SEric Dumazet 			},
6003c97af99SEric Dumazet 			.proto = IPPROTO_IPIP
6013c97af99SEric Dumazet 		};
6025533995bSMichal Schmidt 		struct rtable *rt;
6033c97af99SEric Dumazet 
604b99f0152SPavel Emelyanov 		if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
605d8d1f30bSChangli Gao 			tdev = rt->dst.dev;
6065533995bSMichal Schmidt 			ip_rt_put(rt);
6075533995bSMichal Schmidt 		}
6085533995bSMichal Schmidt 		dev->flags |= IFF_POINTOPOINT;
6095533995bSMichal Schmidt 	}
6105533995bSMichal Schmidt 
6115533995bSMichal Schmidt 	if (!tdev && tunnel->parms.link)
612b99f0152SPavel Emelyanov 		tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
6135533995bSMichal Schmidt 
6145533995bSMichal Schmidt 	if (tdev) {
6155533995bSMichal Schmidt 		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
6165533995bSMichal Schmidt 		dev->mtu = tdev->mtu - sizeof(struct iphdr);
6175533995bSMichal Schmidt 	}
6185533995bSMichal Schmidt 	dev->iflink = tunnel->parms.link;
6195533995bSMichal Schmidt }
6205533995bSMichal Schmidt 
6211da177e4SLinus Torvalds static int
6221da177e4SLinus Torvalds ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
6231da177e4SLinus Torvalds {
6241da177e4SLinus Torvalds 	int err = 0;
6251da177e4SLinus Torvalds 	struct ip_tunnel_parm p;
6261da177e4SLinus Torvalds 	struct ip_tunnel *t;
627b9855c54SPavel Emelyanov 	struct net *net = dev_net(dev);
628b9855c54SPavel Emelyanov 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
6291da177e4SLinus Torvalds 
6301da177e4SLinus Torvalds 	switch (cmd) {
6311da177e4SLinus Torvalds 	case SIOCGETTUNNEL:
6321da177e4SLinus Torvalds 		t = NULL;
633b9855c54SPavel Emelyanov 		if (dev == ipn->fb_tunnel_dev) {
6341da177e4SLinus Torvalds 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
6351da177e4SLinus Torvalds 				err = -EFAULT;
6361da177e4SLinus Torvalds 				break;
6371da177e4SLinus Torvalds 			}
638b9fae5c9SPavel Emelyanov 			t = ipip_tunnel_locate(net, &p, 0);
6391da177e4SLinus Torvalds 		}
6401da177e4SLinus Torvalds 		if (t == NULL)
6412941a486SPatrick McHardy 			t = netdev_priv(dev);
6421da177e4SLinus Torvalds 		memcpy(&p, &t->parms, sizeof(p));
6431da177e4SLinus Torvalds 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
6441da177e4SLinus Torvalds 			err = -EFAULT;
6451da177e4SLinus Torvalds 		break;
6461da177e4SLinus Torvalds 
6471da177e4SLinus Torvalds 	case SIOCADDTUNNEL:
6481da177e4SLinus Torvalds 	case SIOCCHGTUNNEL:
6491da177e4SLinus Torvalds 		err = -EPERM;
6501da177e4SLinus Torvalds 		if (!capable(CAP_NET_ADMIN))
6511da177e4SLinus Torvalds 			goto done;
6521da177e4SLinus Torvalds 
6531da177e4SLinus Torvalds 		err = -EFAULT;
6541da177e4SLinus Torvalds 		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
6551da177e4SLinus Torvalds 			goto done;
6561da177e4SLinus Torvalds 
6571da177e4SLinus Torvalds 		err = -EINVAL;
6581da177e4SLinus Torvalds 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
6591da177e4SLinus Torvalds 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
6601da177e4SLinus Torvalds 			goto done;
6611da177e4SLinus Torvalds 		if (p.iph.ttl)
6621da177e4SLinus Torvalds 			p.iph.frag_off |= htons(IP_DF);
6631da177e4SLinus Torvalds 
664b9fae5c9SPavel Emelyanov 		t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
6651da177e4SLinus Torvalds 
666b9855c54SPavel Emelyanov 		if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
6671da177e4SLinus Torvalds 			if (t != NULL) {
6681da177e4SLinus Torvalds 				if (t->dev != dev) {
6691da177e4SLinus Torvalds 					err = -EEXIST;
6701da177e4SLinus Torvalds 					break;
6711da177e4SLinus Torvalds 				}
6721da177e4SLinus Torvalds 			} else {
6731da177e4SLinus Torvalds 				if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
6741da177e4SLinus Torvalds 				    (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
6751da177e4SLinus Torvalds 					err = -EINVAL;
6761da177e4SLinus Torvalds 					break;
6771da177e4SLinus Torvalds 				}
6782941a486SPatrick McHardy 				t = netdev_priv(dev);
679b9fae5c9SPavel Emelyanov 				ipip_tunnel_unlink(ipn, t);
6801da177e4SLinus Torvalds 				t->parms.iph.saddr = p.iph.saddr;
6811da177e4SLinus Torvalds 				t->parms.iph.daddr = p.iph.daddr;
6821da177e4SLinus Torvalds 				memcpy(dev->dev_addr, &p.iph.saddr, 4);
6831da177e4SLinus Torvalds 				memcpy(dev->broadcast, &p.iph.daddr, 4);
684b9fae5c9SPavel Emelyanov 				ipip_tunnel_link(ipn, t);
6851da177e4SLinus Torvalds 				netdev_state_change(dev);
6861da177e4SLinus Torvalds 			}
6871da177e4SLinus Torvalds 		}
6881da177e4SLinus Torvalds 
6891da177e4SLinus Torvalds 		if (t) {
6901da177e4SLinus Torvalds 			err = 0;
6911da177e4SLinus Torvalds 			if (cmd == SIOCCHGTUNNEL) {
6921da177e4SLinus Torvalds 				t->parms.iph.ttl = p.iph.ttl;
6931da177e4SLinus Torvalds 				t->parms.iph.tos = p.iph.tos;
6941da177e4SLinus Torvalds 				t->parms.iph.frag_off = p.iph.frag_off;
6955533995bSMichal Schmidt 				if (t->parms.link != p.link) {
6965533995bSMichal Schmidt 					t->parms.link = p.link;
6975533995bSMichal Schmidt 					ipip_tunnel_bind_dev(dev);
6985533995bSMichal Schmidt 					netdev_state_change(dev);
6995533995bSMichal Schmidt 				}
7001da177e4SLinus Torvalds 			}
7011da177e4SLinus Torvalds 			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
7021da177e4SLinus Torvalds 				err = -EFAULT;
7031da177e4SLinus Torvalds 		} else
7041da177e4SLinus Torvalds 			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
7051da177e4SLinus Torvalds 		break;
7061da177e4SLinus Torvalds 
7071da177e4SLinus Torvalds 	case SIOCDELTUNNEL:
7081da177e4SLinus Torvalds 		err = -EPERM;
7091da177e4SLinus Torvalds 		if (!capable(CAP_NET_ADMIN))
7101da177e4SLinus Torvalds 			goto done;
7111da177e4SLinus Torvalds 
712b9855c54SPavel Emelyanov 		if (dev == ipn->fb_tunnel_dev) {
7131da177e4SLinus Torvalds 			err = -EFAULT;
7141da177e4SLinus Torvalds 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
7151da177e4SLinus Torvalds 				goto done;
7161da177e4SLinus Torvalds 			err = -ENOENT;
717b9fae5c9SPavel Emelyanov 			if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
7181da177e4SLinus Torvalds 				goto done;
7191da177e4SLinus Torvalds 			err = -EPERM;
720b9855c54SPavel Emelyanov 			if (t->dev == ipn->fb_tunnel_dev)
7211da177e4SLinus Torvalds 				goto done;
7221da177e4SLinus Torvalds 			dev = t->dev;
7231da177e4SLinus Torvalds 		}
72422f8cde5SStephen Hemminger 		unregister_netdevice(dev);
72522f8cde5SStephen Hemminger 		err = 0;
7261da177e4SLinus Torvalds 		break;
7271da177e4SLinus Torvalds 
7281da177e4SLinus Torvalds 	default:
7291da177e4SLinus Torvalds 		err = -EINVAL;
7301da177e4SLinus Torvalds 	}
7311da177e4SLinus Torvalds 
7321da177e4SLinus Torvalds done:
7331da177e4SLinus Torvalds 	return err;
7341da177e4SLinus Torvalds }
7351da177e4SLinus Torvalds 
7361da177e4SLinus Torvalds static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
7371da177e4SLinus Torvalds {
7381da177e4SLinus Torvalds 	if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
7391da177e4SLinus Torvalds 		return -EINVAL;
7401da177e4SLinus Torvalds 	dev->mtu = new_mtu;
7411da177e4SLinus Torvalds 	return 0;
7421da177e4SLinus Torvalds }
7431da177e4SLinus Torvalds 
74423a12b14SStephen Hemminger static const struct net_device_ops ipip_netdev_ops = {
74523a12b14SStephen Hemminger 	.ndo_uninit	= ipip_tunnel_uninit,
74623a12b14SStephen Hemminger 	.ndo_start_xmit	= ipip_tunnel_xmit,
74723a12b14SStephen Hemminger 	.ndo_do_ioctl	= ipip_tunnel_ioctl,
74823a12b14SStephen Hemminger 	.ndo_change_mtu	= ipip_tunnel_change_mtu,
7493c97af99SEric Dumazet 	.ndo_get_stats  = ipip_get_stats,
75023a12b14SStephen Hemminger };
75123a12b14SStephen Hemminger 
7523c97af99SEric Dumazet static void ipip_dev_free(struct net_device *dev)
7533c97af99SEric Dumazet {
7543c97af99SEric Dumazet 	free_percpu(dev->tstats);
7553c97af99SEric Dumazet 	free_netdev(dev);
7563c97af99SEric Dumazet }
7573c97af99SEric Dumazet 
7581da177e4SLinus Torvalds static void ipip_tunnel_setup(struct net_device *dev)
7591da177e4SLinus Torvalds {
76023a12b14SStephen Hemminger 	dev->netdev_ops		= &ipip_netdev_ops;
7613c97af99SEric Dumazet 	dev->destructor		= ipip_dev_free;
7621da177e4SLinus Torvalds 
7631da177e4SLinus Torvalds 	dev->type		= ARPHRD_TUNNEL;
7641da177e4SLinus Torvalds 	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr);
76546f25dffSKris Katterjohn 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr);
7661da177e4SLinus Torvalds 	dev->flags		= IFF_NOARP;
7671da177e4SLinus Torvalds 	dev->iflink		= 0;
7681da177e4SLinus Torvalds 	dev->addr_len		= 4;
7690a826406SPavel Emelyanov 	dev->features		|= NETIF_F_NETNS_LOCAL;
770*153f0943SEric Dumazet 	dev->features		|= NETIF_F_LLTX;
77128e72216SEric Dumazet 	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
7721da177e4SLinus Torvalds }
7731da177e4SLinus Torvalds 
7743c97af99SEric Dumazet static int ipip_tunnel_init(struct net_device *dev)
7751da177e4SLinus Torvalds {
77623a12b14SStephen Hemminger 	struct ip_tunnel *tunnel = netdev_priv(dev);
7771da177e4SLinus Torvalds 
7781da177e4SLinus Torvalds 	tunnel->dev = dev;
7791da177e4SLinus Torvalds 	strcpy(tunnel->parms.name, dev->name);
7801da177e4SLinus Torvalds 
7811da177e4SLinus Torvalds 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
7821da177e4SLinus Torvalds 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
7831da177e4SLinus Torvalds 
7845533995bSMichal Schmidt 	ipip_tunnel_bind_dev(dev);
7853c97af99SEric Dumazet 
7863c97af99SEric Dumazet 	dev->tstats = alloc_percpu(struct pcpu_tstats);
7873c97af99SEric Dumazet 	if (!dev->tstats)
7883c97af99SEric Dumazet 		return -ENOMEM;
7893c97af99SEric Dumazet 
7903c97af99SEric Dumazet 	return 0;
7911da177e4SLinus Torvalds }
7921da177e4SLinus Torvalds 
793fada5636SEric Dumazet static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
7941da177e4SLinus Torvalds {
7952941a486SPatrick McHardy 	struct ip_tunnel *tunnel = netdev_priv(dev);
7961da177e4SLinus Torvalds 	struct iphdr *iph = &tunnel->parms.iph;
79744d3c299SPavel Emelyanov 	struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
7981da177e4SLinus Torvalds 
7991da177e4SLinus Torvalds 	tunnel->dev = dev;
8001da177e4SLinus Torvalds 	strcpy(tunnel->parms.name, dev->name);
8011da177e4SLinus Torvalds 
8021da177e4SLinus Torvalds 	iph->version		= 4;
8031da177e4SLinus Torvalds 	iph->protocol		= IPPROTO_IPIP;
8041da177e4SLinus Torvalds 	iph->ihl		= 5;
8051da177e4SLinus Torvalds 
806fada5636SEric Dumazet 	dev->tstats = alloc_percpu(struct pcpu_tstats);
807fada5636SEric Dumazet 	if (!dev->tstats)
808fada5636SEric Dumazet 		return -ENOMEM;
809fada5636SEric Dumazet 
8101da177e4SLinus Torvalds 	dev_hold(dev);
811b7285b79SEric Dumazet 	rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
812fada5636SEric Dumazet 	return 0;
8131da177e4SLinus Torvalds }
8141da177e4SLinus Torvalds 
8156dcd814bSEric Dumazet static struct xfrm_tunnel ipip_handler __read_mostly = {
8161da177e4SLinus Torvalds 	.handler	=	ipip_rcv,
8171da177e4SLinus Torvalds 	.err_handler	=	ipip_err,
818d2acc347SHerbert Xu 	.priority	=	1,
8191da177e4SLinus Torvalds };
8201da177e4SLinus Torvalds 
8215747a1aaSStephen Hemminger static const char banner[] __initconst =
8221da177e4SLinus Torvalds 	KERN_INFO "IPv4 over IPv4 tunneling driver\n";
8231da177e4SLinus Torvalds 
8240694c4c0SEric Dumazet static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
82544d3c299SPavel Emelyanov {
82644d3c299SPavel Emelyanov 	int prio;
82744d3c299SPavel Emelyanov 
82844d3c299SPavel Emelyanov 	for (prio = 1; prio < 4; prio++) {
82944d3c299SPavel Emelyanov 		int h;
83044d3c299SPavel Emelyanov 		for (h = 0; h < HASH_SIZE; h++) {
831b7285b79SEric Dumazet 			struct ip_tunnel *t;
8320694c4c0SEric Dumazet 
833b7285b79SEric Dumazet 			t = rtnl_dereference(ipn->tunnels[prio][h]);
8340694c4c0SEric Dumazet 			while (t != NULL) {
8350694c4c0SEric Dumazet 				unregister_netdevice_queue(t->dev, head);
836b7285b79SEric Dumazet 				t = rtnl_dereference(t->next);
8370694c4c0SEric Dumazet 			}
83844d3c299SPavel Emelyanov 		}
83944d3c299SPavel Emelyanov 	}
84044d3c299SPavel Emelyanov }
84144d3c299SPavel Emelyanov 
8422c8c1e72SAlexey Dobriyan static int __net_init ipip_init_net(struct net *net)
84310dc4c7bSPavel Emelyanov {
84486de8a63SEric W. Biederman 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
84510dc4c7bSPavel Emelyanov 	int err;
84610dc4c7bSPavel Emelyanov 
84744d3c299SPavel Emelyanov 	ipn->tunnels[0] = ipn->tunnels_wc;
84844d3c299SPavel Emelyanov 	ipn->tunnels[1] = ipn->tunnels_l;
84944d3c299SPavel Emelyanov 	ipn->tunnels[2] = ipn->tunnels_r;
85044d3c299SPavel Emelyanov 	ipn->tunnels[3] = ipn->tunnels_r_l;
85144d3c299SPavel Emelyanov 
852b9855c54SPavel Emelyanov 	ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
853b9855c54SPavel Emelyanov 					   "tunl0",
854b9855c54SPavel Emelyanov 					   ipip_tunnel_setup);
855b9855c54SPavel Emelyanov 	if (!ipn->fb_tunnel_dev) {
856b9855c54SPavel Emelyanov 		err = -ENOMEM;
857b9855c54SPavel Emelyanov 		goto err_alloc_dev;
858b9855c54SPavel Emelyanov 	}
859be77e593SAlexey Dobriyan 	dev_net_set(ipn->fb_tunnel_dev, net);
860b9855c54SPavel Emelyanov 
861fada5636SEric Dumazet 	err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
862fada5636SEric Dumazet 	if (err)
863fada5636SEric Dumazet 		goto err_reg_dev;
864b9855c54SPavel Emelyanov 
865b9855c54SPavel Emelyanov 	if ((err = register_netdev(ipn->fb_tunnel_dev)))
866b9855c54SPavel Emelyanov 		goto err_reg_dev;
867b9855c54SPavel Emelyanov 
86810dc4c7bSPavel Emelyanov 	return 0;
86910dc4c7bSPavel Emelyanov 
870b9855c54SPavel Emelyanov err_reg_dev:
871fada5636SEric Dumazet 	ipip_dev_free(ipn->fb_tunnel_dev);
872b9855c54SPavel Emelyanov err_alloc_dev:
873b9855c54SPavel Emelyanov 	/* nothing */
87410dc4c7bSPavel Emelyanov 	return err;
87510dc4c7bSPavel Emelyanov }
87610dc4c7bSPavel Emelyanov 
8772c8c1e72SAlexey Dobriyan static void __net_exit ipip_exit_net(struct net *net)
87810dc4c7bSPavel Emelyanov {
87986de8a63SEric W. Biederman 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
8800694c4c0SEric Dumazet 	LIST_HEAD(list);
88110dc4c7bSPavel Emelyanov 
882b9855c54SPavel Emelyanov 	rtnl_lock();
8830694c4c0SEric Dumazet 	ipip_destroy_tunnels(ipn, &list);
8840694c4c0SEric Dumazet 	unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
8850694c4c0SEric Dumazet 	unregister_netdevice_many(&list);
886b9855c54SPavel Emelyanov 	rtnl_unlock();
88710dc4c7bSPavel Emelyanov }
88810dc4c7bSPavel Emelyanov 
88910dc4c7bSPavel Emelyanov static struct pernet_operations ipip_net_ops = {
89010dc4c7bSPavel Emelyanov 	.init = ipip_init_net,
89110dc4c7bSPavel Emelyanov 	.exit = ipip_exit_net,
89286de8a63SEric W. Biederman 	.id   = &ipip_net_id,
89386de8a63SEric W. Biederman 	.size = sizeof(struct ipip_net),
89410dc4c7bSPavel Emelyanov };
89510dc4c7bSPavel Emelyanov 
8961da177e4SLinus Torvalds static int __init ipip_init(void)
8971da177e4SLinus Torvalds {
8981da177e4SLinus Torvalds 	int err;
8991da177e4SLinus Torvalds 
9001da177e4SLinus Torvalds 	printk(banner);
9011da177e4SLinus Torvalds 
90286de8a63SEric W. Biederman 	err = register_pernet_device(&ipip_net_ops);
903d5aa407fSAlexey Dobriyan 	if (err < 0)
904d5aa407fSAlexey Dobriyan 		return err;
905d5aa407fSAlexey Dobriyan 	err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
906d5aa407fSAlexey Dobriyan 	if (err < 0) {
907d5aa407fSAlexey Dobriyan 		unregister_pernet_device(&ipip_net_ops);
908d5aa407fSAlexey Dobriyan 		printk(KERN_INFO "ipip init: can't register tunnel\n");
909d5aa407fSAlexey Dobriyan 	}
910b9855c54SPavel Emelyanov 	return err;
9111da177e4SLinus Torvalds }
9121da177e4SLinus Torvalds 
9131da177e4SLinus Torvalds static void __exit ipip_fini(void)
9141da177e4SLinus Torvalds {
915c0d56408SKazunori MIYAZAWA 	if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
9161da177e4SLinus Torvalds 		printk(KERN_INFO "ipip close: can't deregister tunnel\n");
9171da177e4SLinus Torvalds 
91886de8a63SEric W. Biederman 	unregister_pernet_device(&ipip_net_ops);
9191da177e4SLinus Torvalds }
9201da177e4SLinus Torvalds 
9211da177e4SLinus Torvalds module_init(ipip_init);
9221da177e4SLinus Torvalds module_exit(ipip_fini);
9231da177e4SLinus Torvalds MODULE_LICENSE("GPL");
924