xref: /linux/net/ipv4/ipip.c (revision 46f25dffbaba48c571d75f5f574f31978287b8d2)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  *	Linux NET3:	IP/IP protocol decoder.
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  *	Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  *	Authors:
71da177e4SLinus Torvalds  *		Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
81da177e4SLinus Torvalds  *
91da177e4SLinus Torvalds  *	Fixes:
101da177e4SLinus Torvalds  *		Alan Cox	:	Merged and made usable non modular (its so tiny its silly as
111da177e4SLinus Torvalds  *					a module taking up 2 pages).
121da177e4SLinus Torvalds  *		Alan Cox	: 	Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
131da177e4SLinus Torvalds  *					to keep ip_forward happy.
141da177e4SLinus Torvalds  *		Alan Cox	:	More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
151da177e4SLinus Torvalds  *		Kai Schulte	:	Fixed #defines for IP_FIREWALL->FIREWALL
161da177e4SLinus Torvalds  *              David Woodhouse :       Perform some basic ICMP handling.
171da177e4SLinus Torvalds  *                                      IPIP Routing without decapsulation.
181da177e4SLinus Torvalds  *              Carlos Picoto   :       GRE over IP support
191da177e4SLinus Torvalds  *		Alexey Kuznetsov:	Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
201da177e4SLinus Torvalds  *					I do not want to merge them together.
211da177e4SLinus Torvalds  *
221da177e4SLinus Torvalds  *	This program is free software; you can redistribute it and/or
231da177e4SLinus Torvalds  *	modify it under the terms of the GNU General Public License
241da177e4SLinus Torvalds  *	as published by the Free Software Foundation; either version
251da177e4SLinus Torvalds  *	2 of the License, or (at your option) any later version.
261da177e4SLinus Torvalds  *
271da177e4SLinus Torvalds  */
281da177e4SLinus Torvalds 
291da177e4SLinus Torvalds /* tunnel.c: an IP tunnel driver
301da177e4SLinus Torvalds 
311da177e4SLinus Torvalds 	The purpose of this driver is to provide an IP tunnel through
321da177e4SLinus Torvalds 	which you can tunnel network traffic transparently across subnets.
331da177e4SLinus Torvalds 
341da177e4SLinus Torvalds 	This was written by looking at Nick Holloway's dummy driver
351da177e4SLinus Torvalds 	Thanks for the great code!
361da177e4SLinus Torvalds 
371da177e4SLinus Torvalds 		-Sam Lantinga	(slouken@cs.ucdavis.edu)  02/01/95
381da177e4SLinus Torvalds 
391da177e4SLinus Torvalds 	Minor tweaks:
401da177e4SLinus Torvalds 		Cleaned up the code a little and added some pre-1.3.0 tweaks.
411da177e4SLinus Torvalds 		dev->hard_header/hard_header_len changed to use no headers.
421da177e4SLinus Torvalds 		Comments/bracketing tweaked.
431da177e4SLinus Torvalds 		Made the tunnels use dev->name not tunnel: when error reporting.
441da177e4SLinus Torvalds 		Added tx_dropped stat
451da177e4SLinus Torvalds 
461da177e4SLinus Torvalds 		-Alan Cox	(Alan.Cox@linux.org) 21 March 95
471da177e4SLinus Torvalds 
481da177e4SLinus Torvalds 	Reworked:
491da177e4SLinus Torvalds 		Changed to tunnel to destination gateway in addition to the
501da177e4SLinus Torvalds 			tunnel's pointopoint address
511da177e4SLinus Torvalds 		Almost completely rewritten
521da177e4SLinus Torvalds 		Note:  There is currently no firewall or ICMP handling done.
531da177e4SLinus Torvalds 
541da177e4SLinus Torvalds 		-Sam Lantinga	(slouken@cs.ucdavis.edu) 02/13/96
551da177e4SLinus Torvalds 
561da177e4SLinus Torvalds */
571da177e4SLinus Torvalds 
581da177e4SLinus Torvalds /* Things I wish I had known when writing the tunnel driver:
591da177e4SLinus Torvalds 
601da177e4SLinus Torvalds 	When the tunnel_xmit() function is called, the skb contains the
611da177e4SLinus Torvalds 	packet to be sent (plus a great deal of extra info), and dev
621da177e4SLinus Torvalds 	contains the tunnel device that _we_ are.
631da177e4SLinus Torvalds 
641da177e4SLinus Torvalds 	When we are passed a packet, we are expected to fill in the
651da177e4SLinus Torvalds 	source address with our source IP address.
661da177e4SLinus Torvalds 
671da177e4SLinus Torvalds 	What is the proper way to allocate, copy and free a buffer?
681da177e4SLinus Torvalds 	After you allocate it, it is a "0 length" chunk of memory
691da177e4SLinus Torvalds 	starting at zero.  If you want to add headers to the buffer
701da177e4SLinus Torvalds 	later, you'll have to call "skb_reserve(skb, amount)" with
711da177e4SLinus Torvalds 	the amount of memory you want reserved.  Then, you call
721da177e4SLinus Torvalds 	"skb_put(skb, amount)" with the amount of space you want in
731da177e4SLinus Torvalds 	the buffer.  skb_put() returns a pointer to the top (#0) of
741da177e4SLinus Torvalds 	that buffer.  skb->len is set to the amount of space you have
751da177e4SLinus Torvalds 	"allocated" with skb_put().  You can then write up to skb->len
761da177e4SLinus Torvalds 	bytes to that buffer.  If you need more, you can call skb_put()
771da177e4SLinus Torvalds 	again with the additional amount of space you need.  You can
781da177e4SLinus Torvalds 	find out how much more space you can allocate by calling
791da177e4SLinus Torvalds 	"skb_tailroom(skb)".
801da177e4SLinus Torvalds 	Now, to add header space, call "skb_push(skb, header_len)".
811da177e4SLinus Torvalds 	This creates space at the beginning of the buffer and returns
821da177e4SLinus Torvalds 	a pointer to this new space.  If later you need to strip a
831da177e4SLinus Torvalds 	header from a buffer, call "skb_pull(skb, header_len)".
841da177e4SLinus Torvalds 	skb_headroom() will return how much space is left at the top
851da177e4SLinus Torvalds 	of the buffer (before the main data).  Remember, this headroom
861da177e4SLinus Torvalds 	space must be reserved before the skb_put() function is called.
871da177e4SLinus Torvalds 	*/
881da177e4SLinus Torvalds 
891da177e4SLinus Torvalds /*
901da177e4SLinus Torvalds    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
911da177e4SLinus Torvalds 
921da177e4SLinus Torvalds    For comments look at net/ipv4/ip_gre.c --ANK
931da177e4SLinus Torvalds  */
941da177e4SLinus Torvalds 
951da177e4SLinus Torvalds 
961da177e4SLinus Torvalds #include <linux/config.h>
971da177e4SLinus Torvalds #include <linux/module.h>
981da177e4SLinus Torvalds #include <linux/types.h>
991da177e4SLinus Torvalds #include <linux/sched.h>
1001da177e4SLinus Torvalds #include <linux/kernel.h>
1011da177e4SLinus Torvalds #include <asm/uaccess.h>
1021da177e4SLinus Torvalds #include <linux/skbuff.h>
1031da177e4SLinus Torvalds #include <linux/netdevice.h>
1041da177e4SLinus Torvalds #include <linux/in.h>
1051da177e4SLinus Torvalds #include <linux/tcp.h>
1061da177e4SLinus Torvalds #include <linux/udp.h>
1071da177e4SLinus Torvalds #include <linux/if_arp.h>
1081da177e4SLinus Torvalds #include <linux/mroute.h>
1091da177e4SLinus Torvalds #include <linux/init.h>
1101da177e4SLinus Torvalds #include <linux/netfilter_ipv4.h>
111*46f25dffSKris Katterjohn #include <linux/if_ether.h>
1121da177e4SLinus Torvalds 
1131da177e4SLinus Torvalds #include <net/sock.h>
1141da177e4SLinus Torvalds #include <net/ip.h>
1151da177e4SLinus Torvalds #include <net/icmp.h>
1161da177e4SLinus Torvalds #include <net/protocol.h>
1171da177e4SLinus Torvalds #include <net/ipip.h>
1181da177e4SLinus Torvalds #include <net/inet_ecn.h>
1191da177e4SLinus Torvalds #include <net/xfrm.h>
1201da177e4SLinus Torvalds 
1211da177e4SLinus Torvalds #define HASH_SIZE  16
1221da177e4SLinus Torvalds #define HASH(addr) ((addr^(addr>>4))&0xF)
1231da177e4SLinus Torvalds 
1241da177e4SLinus Torvalds static int ipip_fb_tunnel_init(struct net_device *dev);
1251da177e4SLinus Torvalds static int ipip_tunnel_init(struct net_device *dev);
1261da177e4SLinus Torvalds static void ipip_tunnel_setup(struct net_device *dev);
1271da177e4SLinus Torvalds 
1281da177e4SLinus Torvalds static struct net_device *ipip_fb_tunnel_dev;
1291da177e4SLinus Torvalds 
1301da177e4SLinus Torvalds static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
1311da177e4SLinus Torvalds static struct ip_tunnel *tunnels_r[HASH_SIZE];
1321da177e4SLinus Torvalds static struct ip_tunnel *tunnels_l[HASH_SIZE];
1331da177e4SLinus Torvalds static struct ip_tunnel *tunnels_wc[1];
1341da177e4SLinus Torvalds static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
1351da177e4SLinus Torvalds 
1361da177e4SLinus Torvalds static DEFINE_RWLOCK(ipip_lock);
1371da177e4SLinus Torvalds 
1381da177e4SLinus Torvalds static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local)
1391da177e4SLinus Torvalds {
1401da177e4SLinus Torvalds 	unsigned h0 = HASH(remote);
1411da177e4SLinus Torvalds 	unsigned h1 = HASH(local);
1421da177e4SLinus Torvalds 	struct ip_tunnel *t;
1431da177e4SLinus Torvalds 
1441da177e4SLinus Torvalds 	for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
1451da177e4SLinus Torvalds 		if (local == t->parms.iph.saddr &&
1461da177e4SLinus Torvalds 		    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
1471da177e4SLinus Torvalds 			return t;
1481da177e4SLinus Torvalds 	}
1491da177e4SLinus Torvalds 	for (t = tunnels_r[h0]; t; t = t->next) {
1501da177e4SLinus Torvalds 		if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
1511da177e4SLinus Torvalds 			return t;
1521da177e4SLinus Torvalds 	}
1531da177e4SLinus Torvalds 	for (t = tunnels_l[h1]; t; t = t->next) {
1541da177e4SLinus Torvalds 		if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
1551da177e4SLinus Torvalds 			return t;
1561da177e4SLinus Torvalds 	}
1571da177e4SLinus Torvalds 	if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
1581da177e4SLinus Torvalds 		return t;
1591da177e4SLinus Torvalds 	return NULL;
1601da177e4SLinus Torvalds }
1611da177e4SLinus Torvalds 
1621da177e4SLinus Torvalds static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
1631da177e4SLinus Torvalds {
1641da177e4SLinus Torvalds 	u32 remote = t->parms.iph.daddr;
1651da177e4SLinus Torvalds 	u32 local = t->parms.iph.saddr;
1661da177e4SLinus Torvalds 	unsigned h = 0;
1671da177e4SLinus Torvalds 	int prio = 0;
1681da177e4SLinus Torvalds 
1691da177e4SLinus Torvalds 	if (remote) {
1701da177e4SLinus Torvalds 		prio |= 2;
1711da177e4SLinus Torvalds 		h ^= HASH(remote);
1721da177e4SLinus Torvalds 	}
1731da177e4SLinus Torvalds 	if (local) {
1741da177e4SLinus Torvalds 		prio |= 1;
1751da177e4SLinus Torvalds 		h ^= HASH(local);
1761da177e4SLinus Torvalds 	}
1771da177e4SLinus Torvalds 	return &tunnels[prio][h];
1781da177e4SLinus Torvalds }
1791da177e4SLinus Torvalds 
1801da177e4SLinus Torvalds 
1811da177e4SLinus Torvalds static void ipip_tunnel_unlink(struct ip_tunnel *t)
1821da177e4SLinus Torvalds {
1831da177e4SLinus Torvalds 	struct ip_tunnel **tp;
1841da177e4SLinus Torvalds 
1851da177e4SLinus Torvalds 	for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
1861da177e4SLinus Torvalds 		if (t == *tp) {
1871da177e4SLinus Torvalds 			write_lock_bh(&ipip_lock);
1881da177e4SLinus Torvalds 			*tp = t->next;
1891da177e4SLinus Torvalds 			write_unlock_bh(&ipip_lock);
1901da177e4SLinus Torvalds 			break;
1911da177e4SLinus Torvalds 		}
1921da177e4SLinus Torvalds 	}
1931da177e4SLinus Torvalds }
1941da177e4SLinus Torvalds 
1951da177e4SLinus Torvalds static void ipip_tunnel_link(struct ip_tunnel *t)
1961da177e4SLinus Torvalds {
1971da177e4SLinus Torvalds 	struct ip_tunnel **tp = ipip_bucket(t);
1981da177e4SLinus Torvalds 
1991da177e4SLinus Torvalds 	t->next = *tp;
2001da177e4SLinus Torvalds 	write_lock_bh(&ipip_lock);
2011da177e4SLinus Torvalds 	*tp = t;
2021da177e4SLinus Torvalds 	write_unlock_bh(&ipip_lock);
2031da177e4SLinus Torvalds }
2041da177e4SLinus Torvalds 
2051da177e4SLinus Torvalds static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
2061da177e4SLinus Torvalds {
2071da177e4SLinus Torvalds 	u32 remote = parms->iph.daddr;
2081da177e4SLinus Torvalds 	u32 local = parms->iph.saddr;
2091da177e4SLinus Torvalds 	struct ip_tunnel *t, **tp, *nt;
2101da177e4SLinus Torvalds 	struct net_device *dev;
2111da177e4SLinus Torvalds 	unsigned h = 0;
2121da177e4SLinus Torvalds 	int prio = 0;
2131da177e4SLinus Torvalds 	char name[IFNAMSIZ];
2141da177e4SLinus Torvalds 
2151da177e4SLinus Torvalds 	if (remote) {
2161da177e4SLinus Torvalds 		prio |= 2;
2171da177e4SLinus Torvalds 		h ^= HASH(remote);
2181da177e4SLinus Torvalds 	}
2191da177e4SLinus Torvalds 	if (local) {
2201da177e4SLinus Torvalds 		prio |= 1;
2211da177e4SLinus Torvalds 		h ^= HASH(local);
2221da177e4SLinus Torvalds 	}
2231da177e4SLinus Torvalds 	for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
2241da177e4SLinus Torvalds 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
2251da177e4SLinus Torvalds 			return t;
2261da177e4SLinus Torvalds 	}
2271da177e4SLinus Torvalds 	if (!create)
2281da177e4SLinus Torvalds 		return NULL;
2291da177e4SLinus Torvalds 
2301da177e4SLinus Torvalds 	if (parms->name[0])
2311da177e4SLinus Torvalds 		strlcpy(name, parms->name, IFNAMSIZ);
2321da177e4SLinus Torvalds 	else {
2331da177e4SLinus Torvalds 		int i;
2341da177e4SLinus Torvalds 		for (i=1; i<100; i++) {
2351da177e4SLinus Torvalds 			sprintf(name, "tunl%d", i);
2361da177e4SLinus Torvalds 			if (__dev_get_by_name(name) == NULL)
2371da177e4SLinus Torvalds 				break;
2381da177e4SLinus Torvalds 		}
2391da177e4SLinus Torvalds 		if (i==100)
2401da177e4SLinus Torvalds 			goto failed;
2411da177e4SLinus Torvalds 	}
2421da177e4SLinus Torvalds 
2431da177e4SLinus Torvalds 	dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
2441da177e4SLinus Torvalds 	if (dev == NULL)
2451da177e4SLinus Torvalds 		return NULL;
2461da177e4SLinus Torvalds 
2471da177e4SLinus Torvalds 	nt = dev->priv;
2481da177e4SLinus Torvalds 	SET_MODULE_OWNER(dev);
2491da177e4SLinus Torvalds 	dev->init = ipip_tunnel_init;
2501da177e4SLinus Torvalds 	nt->parms = *parms;
2511da177e4SLinus Torvalds 
2521da177e4SLinus Torvalds 	if (register_netdevice(dev) < 0) {
2531da177e4SLinus Torvalds 		free_netdev(dev);
2541da177e4SLinus Torvalds 		goto failed;
2551da177e4SLinus Torvalds 	}
2561da177e4SLinus Torvalds 
2571da177e4SLinus Torvalds 	dev_hold(dev);
2581da177e4SLinus Torvalds 	ipip_tunnel_link(nt);
2591da177e4SLinus Torvalds 	return nt;
2601da177e4SLinus Torvalds 
2611da177e4SLinus Torvalds failed:
2621da177e4SLinus Torvalds 	return NULL;
2631da177e4SLinus Torvalds }
2641da177e4SLinus Torvalds 
2651da177e4SLinus Torvalds static void ipip_tunnel_uninit(struct net_device *dev)
2661da177e4SLinus Torvalds {
2671da177e4SLinus Torvalds 	if (dev == ipip_fb_tunnel_dev) {
2681da177e4SLinus Torvalds 		write_lock_bh(&ipip_lock);
2691da177e4SLinus Torvalds 		tunnels_wc[0] = NULL;
2701da177e4SLinus Torvalds 		write_unlock_bh(&ipip_lock);
2711da177e4SLinus Torvalds 	} else
2721da177e4SLinus Torvalds 		ipip_tunnel_unlink((struct ip_tunnel*)dev->priv);
2731da177e4SLinus Torvalds 	dev_put(dev);
2741da177e4SLinus Torvalds }
2751da177e4SLinus Torvalds 
2760303770dSPatrick McHardy static void ipip_err(struct sk_buff *skb, u32 info)
2771da177e4SLinus Torvalds {
2781da177e4SLinus Torvalds #ifndef I_WISH_WORLD_WERE_PERFECT
2791da177e4SLinus Torvalds 
2801da177e4SLinus Torvalds /* It is not :-( All the routers (except for Linux) return only
2811da177e4SLinus Torvalds    8 bytes of packet payload. It means, that precise relaying of
2821da177e4SLinus Torvalds    ICMP in the real Internet is absolutely infeasible.
2831da177e4SLinus Torvalds  */
2841da177e4SLinus Torvalds 	struct iphdr *iph = (struct iphdr*)skb->data;
2851da177e4SLinus Torvalds 	int type = skb->h.icmph->type;
2861da177e4SLinus Torvalds 	int code = skb->h.icmph->code;
2871da177e4SLinus Torvalds 	struct ip_tunnel *t;
2881da177e4SLinus Torvalds 
2891da177e4SLinus Torvalds 	switch (type) {
2901da177e4SLinus Torvalds 	default:
2911da177e4SLinus Torvalds 	case ICMP_PARAMETERPROB:
2921da177e4SLinus Torvalds 		return;
2931da177e4SLinus Torvalds 
2941da177e4SLinus Torvalds 	case ICMP_DEST_UNREACH:
2951da177e4SLinus Torvalds 		switch (code) {
2961da177e4SLinus Torvalds 		case ICMP_SR_FAILED:
2971da177e4SLinus Torvalds 		case ICMP_PORT_UNREACH:
2981da177e4SLinus Torvalds 			/* Impossible event. */
2991da177e4SLinus Torvalds 			return;
3001da177e4SLinus Torvalds 		case ICMP_FRAG_NEEDED:
3011da177e4SLinus Torvalds 			/* Soft state for pmtu is maintained by IP core. */
3021da177e4SLinus Torvalds 			return;
3031da177e4SLinus Torvalds 		default:
3041da177e4SLinus Torvalds 			/* All others are translated to HOST_UNREACH.
3051da177e4SLinus Torvalds 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
3061da177e4SLinus Torvalds 			   I believe they are just ether pollution. --ANK
3071da177e4SLinus Torvalds 			 */
3081da177e4SLinus Torvalds 			break;
3091da177e4SLinus Torvalds 		}
3101da177e4SLinus Torvalds 		break;
3111da177e4SLinus Torvalds 	case ICMP_TIME_EXCEEDED:
3121da177e4SLinus Torvalds 		if (code != ICMP_EXC_TTL)
3131da177e4SLinus Torvalds 			return;
3141da177e4SLinus Torvalds 		break;
3151da177e4SLinus Torvalds 	}
3161da177e4SLinus Torvalds 
3171da177e4SLinus Torvalds 	read_lock(&ipip_lock);
3181da177e4SLinus Torvalds 	t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
3191da177e4SLinus Torvalds 	if (t == NULL || t->parms.iph.daddr == 0)
3201da177e4SLinus Torvalds 		goto out;
3211da177e4SLinus Torvalds 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
3221da177e4SLinus Torvalds 		goto out;
3231da177e4SLinus Torvalds 
3241da177e4SLinus Torvalds 	if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
3251da177e4SLinus Torvalds 		t->err_count++;
3261da177e4SLinus Torvalds 	else
3271da177e4SLinus Torvalds 		t->err_count = 1;
3281da177e4SLinus Torvalds 	t->err_time = jiffies;
3291da177e4SLinus Torvalds out:
3301da177e4SLinus Torvalds 	read_unlock(&ipip_lock);
3311da177e4SLinus Torvalds 	return;
3321da177e4SLinus Torvalds #else
3331da177e4SLinus Torvalds 	struct iphdr *iph = (struct iphdr*)dp;
3341da177e4SLinus Torvalds 	int hlen = iph->ihl<<2;
3351da177e4SLinus Torvalds 	struct iphdr *eiph;
3361da177e4SLinus Torvalds 	int type = skb->h.icmph->type;
3371da177e4SLinus Torvalds 	int code = skb->h.icmph->code;
3381da177e4SLinus Torvalds 	int rel_type = 0;
3391da177e4SLinus Torvalds 	int rel_code = 0;
3401da177e4SLinus Torvalds 	int rel_info = 0;
3411da177e4SLinus Torvalds 	struct sk_buff *skb2;
3421da177e4SLinus Torvalds 	struct flowi fl;
3431da177e4SLinus Torvalds 	struct rtable *rt;
3441da177e4SLinus Torvalds 
3451da177e4SLinus Torvalds 	if (len < hlen + sizeof(struct iphdr))
3461da177e4SLinus Torvalds 		return;
3471da177e4SLinus Torvalds 	eiph = (struct iphdr*)(dp + hlen);
3481da177e4SLinus Torvalds 
3491da177e4SLinus Torvalds 	switch (type) {
3501da177e4SLinus Torvalds 	default:
3511da177e4SLinus Torvalds 		return;
3521da177e4SLinus Torvalds 	case ICMP_PARAMETERPROB:
3531da177e4SLinus Torvalds 		if (skb->h.icmph->un.gateway < hlen)
3541da177e4SLinus Torvalds 			return;
3551da177e4SLinus Torvalds 
3561da177e4SLinus Torvalds 		/* So... This guy found something strange INSIDE encapsulated
3571da177e4SLinus Torvalds 		   packet. Well, he is fool, but what can we do ?
3581da177e4SLinus Torvalds 		 */
3591da177e4SLinus Torvalds 		rel_type = ICMP_PARAMETERPROB;
3601da177e4SLinus Torvalds 		rel_info = skb->h.icmph->un.gateway - hlen;
3611da177e4SLinus Torvalds 		break;
3621da177e4SLinus Torvalds 
3631da177e4SLinus Torvalds 	case ICMP_DEST_UNREACH:
3641da177e4SLinus Torvalds 		switch (code) {
3651da177e4SLinus Torvalds 		case ICMP_SR_FAILED:
3661da177e4SLinus Torvalds 		case ICMP_PORT_UNREACH:
3671da177e4SLinus Torvalds 			/* Impossible event. */
3681da177e4SLinus Torvalds 			return;
3691da177e4SLinus Torvalds 		case ICMP_FRAG_NEEDED:
3701da177e4SLinus Torvalds 			/* And it is the only really necessary thing :-) */
3711da177e4SLinus Torvalds 			rel_info = ntohs(skb->h.icmph->un.frag.mtu);
3721da177e4SLinus Torvalds 			if (rel_info < hlen+68)
3731da177e4SLinus Torvalds 				return;
3741da177e4SLinus Torvalds 			rel_info -= hlen;
3751da177e4SLinus Torvalds 			/* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
3761da177e4SLinus Torvalds 			if (rel_info > ntohs(eiph->tot_len))
3771da177e4SLinus Torvalds 				return;
3781da177e4SLinus Torvalds 			break;
3791da177e4SLinus Torvalds 		default:
3801da177e4SLinus Torvalds 			/* All others are translated to HOST_UNREACH.
3811da177e4SLinus Torvalds 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
3821da177e4SLinus Torvalds 			   I believe, it is just ether pollution. --ANK
3831da177e4SLinus Torvalds 			 */
3841da177e4SLinus Torvalds 			rel_type = ICMP_DEST_UNREACH;
3851da177e4SLinus Torvalds 			rel_code = ICMP_HOST_UNREACH;
3861da177e4SLinus Torvalds 			break;
3871da177e4SLinus Torvalds 		}
3881da177e4SLinus Torvalds 		break;
3891da177e4SLinus Torvalds 	case ICMP_TIME_EXCEEDED:
3901da177e4SLinus Torvalds 		if (code != ICMP_EXC_TTL)
3911da177e4SLinus Torvalds 			return;
3921da177e4SLinus Torvalds 		break;
3931da177e4SLinus Torvalds 	}
3941da177e4SLinus Torvalds 
3951da177e4SLinus Torvalds 	/* Prepare fake skb to feed it to icmp_send */
3961da177e4SLinus Torvalds 	skb2 = skb_clone(skb, GFP_ATOMIC);
3971da177e4SLinus Torvalds 	if (skb2 == NULL)
3981da177e4SLinus Torvalds 		return;
3991da177e4SLinus Torvalds 	dst_release(skb2->dst);
4001da177e4SLinus Torvalds 	skb2->dst = NULL;
4011da177e4SLinus Torvalds 	skb_pull(skb2, skb->data - (u8*)eiph);
4021da177e4SLinus Torvalds 	skb2->nh.raw = skb2->data;
4031da177e4SLinus Torvalds 
4041da177e4SLinus Torvalds 	/* Try to guess incoming interface */
4051da177e4SLinus Torvalds 	memset(&fl, 0, sizeof(fl));
4061da177e4SLinus Torvalds 	fl.fl4_daddr = eiph->saddr;
4071da177e4SLinus Torvalds 	fl.fl4_tos = RT_TOS(eiph->tos);
4081da177e4SLinus Torvalds 	fl.proto = IPPROTO_IPIP;
4091da177e4SLinus Torvalds 	if (ip_route_output_key(&rt, &key)) {
4101da177e4SLinus Torvalds 		kfree_skb(skb2);
4111da177e4SLinus Torvalds 		return;
4121da177e4SLinus Torvalds 	}
4131da177e4SLinus Torvalds 	skb2->dev = rt->u.dst.dev;
4141da177e4SLinus Torvalds 
4151da177e4SLinus Torvalds 	/* route "incoming" packet */
4161da177e4SLinus Torvalds 	if (rt->rt_flags&RTCF_LOCAL) {
4171da177e4SLinus Torvalds 		ip_rt_put(rt);
4181da177e4SLinus Torvalds 		rt = NULL;
4191da177e4SLinus Torvalds 		fl.fl4_daddr = eiph->daddr;
4201da177e4SLinus Torvalds 		fl.fl4_src = eiph->saddr;
4211da177e4SLinus Torvalds 		fl.fl4_tos = eiph->tos;
4221da177e4SLinus Torvalds 		if (ip_route_output_key(&rt, &fl) ||
4231da177e4SLinus Torvalds 		    rt->u.dst.dev->type != ARPHRD_TUNNEL) {
4241da177e4SLinus Torvalds 			ip_rt_put(rt);
4251da177e4SLinus Torvalds 			kfree_skb(skb2);
4261da177e4SLinus Torvalds 			return;
4271da177e4SLinus Torvalds 		}
4281da177e4SLinus Torvalds 	} else {
4291da177e4SLinus Torvalds 		ip_rt_put(rt);
4301da177e4SLinus Torvalds 		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
4311da177e4SLinus Torvalds 		    skb2->dst->dev->type != ARPHRD_TUNNEL) {
4321da177e4SLinus Torvalds 			kfree_skb(skb2);
4331da177e4SLinus Torvalds 			return;
4341da177e4SLinus Torvalds 		}
4351da177e4SLinus Torvalds 	}
4361da177e4SLinus Torvalds 
4371da177e4SLinus Torvalds 	/* change mtu on this route */
4381da177e4SLinus Torvalds 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
4391da177e4SLinus Torvalds 		if (rel_info > dst_mtu(skb2->dst)) {
4401da177e4SLinus Torvalds 			kfree_skb(skb2);
4411da177e4SLinus Torvalds 			return;
4421da177e4SLinus Torvalds 		}
4431da177e4SLinus Torvalds 		skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
4441da177e4SLinus Torvalds 		rel_info = htonl(rel_info);
4451da177e4SLinus Torvalds 	} else if (type == ICMP_TIME_EXCEEDED) {
4461da177e4SLinus Torvalds 		struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv;
4471da177e4SLinus Torvalds 		if (t->parms.iph.ttl) {
4481da177e4SLinus Torvalds 			rel_type = ICMP_DEST_UNREACH;
4491da177e4SLinus Torvalds 			rel_code = ICMP_HOST_UNREACH;
4501da177e4SLinus Torvalds 		}
4511da177e4SLinus Torvalds 	}
4521da177e4SLinus Torvalds 
4531da177e4SLinus Torvalds 	icmp_send(skb2, rel_type, rel_code, rel_info);
4541da177e4SLinus Torvalds 	kfree_skb(skb2);
4551da177e4SLinus Torvalds 	return;
4561da177e4SLinus Torvalds #endif
4571da177e4SLinus Torvalds }
4581da177e4SLinus Torvalds 
4591da177e4SLinus Torvalds static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb)
4601da177e4SLinus Torvalds {
4611da177e4SLinus Torvalds 	struct iphdr *inner_iph = skb->nh.iph;
4621da177e4SLinus Torvalds 
4631da177e4SLinus Torvalds 	if (INET_ECN_is_ce(outer_iph->tos))
4641da177e4SLinus Torvalds 		IP_ECN_set_ce(inner_iph);
4651da177e4SLinus Torvalds }
4661da177e4SLinus Torvalds 
4671da177e4SLinus Torvalds static int ipip_rcv(struct sk_buff *skb)
4681da177e4SLinus Torvalds {
4691da177e4SLinus Torvalds 	struct iphdr *iph;
4701da177e4SLinus Torvalds 	struct ip_tunnel *tunnel;
4711da177e4SLinus Torvalds 
4721da177e4SLinus Torvalds 	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
4731da177e4SLinus Torvalds 		goto out;
4741da177e4SLinus Torvalds 
4751da177e4SLinus Torvalds 	iph = skb->nh.iph;
4761da177e4SLinus Torvalds 
4771da177e4SLinus Torvalds 	read_lock(&ipip_lock);
4781da177e4SLinus Torvalds 	if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
4791da177e4SLinus Torvalds 		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
4801da177e4SLinus Torvalds 			read_unlock(&ipip_lock);
4811da177e4SLinus Torvalds 			kfree_skb(skb);
4821da177e4SLinus Torvalds 			return 0;
4831da177e4SLinus Torvalds 		}
4841da177e4SLinus Torvalds 
4851da177e4SLinus Torvalds 		secpath_reset(skb);
4861da177e4SLinus Torvalds 
4871da177e4SLinus Torvalds 		skb->mac.raw = skb->nh.raw;
4881da177e4SLinus Torvalds 		skb->nh.raw = skb->data;
4891da177e4SLinus Torvalds 		memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
4901da177e4SLinus Torvalds 		skb->protocol = htons(ETH_P_IP);
4911da177e4SLinus Torvalds 		skb->pkt_type = PACKET_HOST;
4921da177e4SLinus Torvalds 
4931da177e4SLinus Torvalds 		tunnel->stat.rx_packets++;
4941da177e4SLinus Torvalds 		tunnel->stat.rx_bytes += skb->len;
4951da177e4SLinus Torvalds 		skb->dev = tunnel->dev;
4961da177e4SLinus Torvalds 		dst_release(skb->dst);
4971da177e4SLinus Torvalds 		skb->dst = NULL;
4981da177e4SLinus Torvalds 		nf_reset(skb);
4991da177e4SLinus Torvalds 		ipip_ecn_decapsulate(iph, skb);
5001da177e4SLinus Torvalds 		netif_rx(skb);
5011da177e4SLinus Torvalds 		read_unlock(&ipip_lock);
5021da177e4SLinus Torvalds 		return 0;
5031da177e4SLinus Torvalds 	}
5041da177e4SLinus Torvalds 	read_unlock(&ipip_lock);
5051da177e4SLinus Torvalds 
5061da177e4SLinus Torvalds out:
5071da177e4SLinus Torvalds 	return -1;
5081da177e4SLinus Torvalds }
5091da177e4SLinus Torvalds 
5101da177e4SLinus Torvalds /*
5111da177e4SLinus Torvalds  *	This function assumes it is being called from dev_queue_xmit()
5121da177e4SLinus Torvalds  *	and that skb is filled properly by that function.
5131da177e4SLinus Torvalds  */
5141da177e4SLinus Torvalds 
5151da177e4SLinus Torvalds static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
5161da177e4SLinus Torvalds {
5171da177e4SLinus Torvalds 	struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
5181da177e4SLinus Torvalds 	struct net_device_stats *stats = &tunnel->stat;
5191da177e4SLinus Torvalds 	struct iphdr  *tiph = &tunnel->parms.iph;
5201da177e4SLinus Torvalds 	u8     tos = tunnel->parms.iph.tos;
5211da177e4SLinus Torvalds 	u16    df = tiph->frag_off;
5221da177e4SLinus Torvalds 	struct rtable *rt;     			/* Route to the other host */
5231da177e4SLinus Torvalds 	struct net_device *tdev;			/* Device to other host */
5241da177e4SLinus Torvalds 	struct iphdr  *old_iph = skb->nh.iph;
5251da177e4SLinus Torvalds 	struct iphdr  *iph;			/* Our new IP header */
5261da177e4SLinus Torvalds 	int    max_headroom;			/* The extra header space needed */
5271da177e4SLinus Torvalds 	u32    dst = tiph->daddr;
5281da177e4SLinus Torvalds 	int    mtu;
5291da177e4SLinus Torvalds 
5301da177e4SLinus Torvalds 	if (tunnel->recursion++) {
5311da177e4SLinus Torvalds 		tunnel->stat.collisions++;
5321da177e4SLinus Torvalds 		goto tx_error;
5331da177e4SLinus Torvalds 	}
5341da177e4SLinus Torvalds 
5351da177e4SLinus Torvalds 	if (skb->protocol != htons(ETH_P_IP))
5361da177e4SLinus Torvalds 		goto tx_error;
5371da177e4SLinus Torvalds 
5381da177e4SLinus Torvalds 	if (tos&1)
5391da177e4SLinus Torvalds 		tos = old_iph->tos;
5401da177e4SLinus Torvalds 
5411da177e4SLinus Torvalds 	if (!dst) {
5421da177e4SLinus Torvalds 		/* NBMA tunnel */
5431da177e4SLinus Torvalds 		if ((rt = (struct rtable*)skb->dst) == NULL) {
5441da177e4SLinus Torvalds 			tunnel->stat.tx_fifo_errors++;
5451da177e4SLinus Torvalds 			goto tx_error;
5461da177e4SLinus Torvalds 		}
5471da177e4SLinus Torvalds 		if ((dst = rt->rt_gateway) == 0)
5481da177e4SLinus Torvalds 			goto tx_error_icmp;
5491da177e4SLinus Torvalds 	}
5501da177e4SLinus Torvalds 
5511da177e4SLinus Torvalds 	{
5521da177e4SLinus Torvalds 		struct flowi fl = { .oif = tunnel->parms.link,
5531da177e4SLinus Torvalds 				    .nl_u = { .ip4_u =
5541da177e4SLinus Torvalds 					      { .daddr = dst,
5551da177e4SLinus Torvalds 						.saddr = tiph->saddr,
5561da177e4SLinus Torvalds 						.tos = RT_TOS(tos) } },
5571da177e4SLinus Torvalds 				    .proto = IPPROTO_IPIP };
5581da177e4SLinus Torvalds 		if (ip_route_output_key(&rt, &fl)) {
5591da177e4SLinus Torvalds 			tunnel->stat.tx_carrier_errors++;
5601da177e4SLinus Torvalds 			goto tx_error_icmp;
5611da177e4SLinus Torvalds 		}
5621da177e4SLinus Torvalds 	}
5631da177e4SLinus Torvalds 	tdev = rt->u.dst.dev;
5641da177e4SLinus Torvalds 
5651da177e4SLinus Torvalds 	if (tdev == dev) {
5661da177e4SLinus Torvalds 		ip_rt_put(rt);
5671da177e4SLinus Torvalds 		tunnel->stat.collisions++;
5681da177e4SLinus Torvalds 		goto tx_error;
5691da177e4SLinus Torvalds 	}
5701da177e4SLinus Torvalds 
5711da177e4SLinus Torvalds 	if (tiph->frag_off)
5721da177e4SLinus Torvalds 		mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
5731da177e4SLinus Torvalds 	else
5741da177e4SLinus Torvalds 		mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
5751da177e4SLinus Torvalds 
5761da177e4SLinus Torvalds 	if (mtu < 68) {
5771da177e4SLinus Torvalds 		tunnel->stat.collisions++;
5781da177e4SLinus Torvalds 		ip_rt_put(rt);
5791da177e4SLinus Torvalds 		goto tx_error;
5801da177e4SLinus Torvalds 	}
5811da177e4SLinus Torvalds 	if (skb->dst)
5821da177e4SLinus Torvalds 		skb->dst->ops->update_pmtu(skb->dst, mtu);
5831da177e4SLinus Torvalds 
5841da177e4SLinus Torvalds 	df |= (old_iph->frag_off&htons(IP_DF));
5851da177e4SLinus Torvalds 
5861da177e4SLinus Torvalds 	if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
5871da177e4SLinus Torvalds 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
5881da177e4SLinus Torvalds 		ip_rt_put(rt);
5891da177e4SLinus Torvalds 		goto tx_error;
5901da177e4SLinus Torvalds 	}
5911da177e4SLinus Torvalds 
5921da177e4SLinus Torvalds 	if (tunnel->err_count > 0) {
5931da177e4SLinus Torvalds 		if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
5941da177e4SLinus Torvalds 			tunnel->err_count--;
5951da177e4SLinus Torvalds 			dst_link_failure(skb);
5961da177e4SLinus Torvalds 		} else
5971da177e4SLinus Torvalds 			tunnel->err_count = 0;
5981da177e4SLinus Torvalds 	}
5991da177e4SLinus Torvalds 
6001da177e4SLinus Torvalds 	/*
6011da177e4SLinus Torvalds 	 * Okay, now see if we can stuff it in the buffer as-is.
6021da177e4SLinus Torvalds 	 */
6031da177e4SLinus Torvalds 	max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
6041da177e4SLinus Torvalds 
6051da177e4SLinus Torvalds 	if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
6061da177e4SLinus Torvalds 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
6071da177e4SLinus Torvalds 		if (!new_skb) {
6081da177e4SLinus Torvalds 			ip_rt_put(rt);
6091da177e4SLinus Torvalds   			stats->tx_dropped++;
6101da177e4SLinus Torvalds 			dev_kfree_skb(skb);
6111da177e4SLinus Torvalds 			tunnel->recursion--;
6121da177e4SLinus Torvalds 			return 0;
6131da177e4SLinus Torvalds 		}
6141da177e4SLinus Torvalds 		if (skb->sk)
6151da177e4SLinus Torvalds 			skb_set_owner_w(new_skb, skb->sk);
6161da177e4SLinus Torvalds 		dev_kfree_skb(skb);
6171da177e4SLinus Torvalds 		skb = new_skb;
6181da177e4SLinus Torvalds 		old_iph = skb->nh.iph;
6191da177e4SLinus Torvalds 	}
6201da177e4SLinus Torvalds 
6211da177e4SLinus Torvalds 	skb->h.raw = skb->nh.raw;
6221da177e4SLinus Torvalds 	skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
6231da177e4SLinus Torvalds 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
6241da177e4SLinus Torvalds 	dst_release(skb->dst);
6251da177e4SLinus Torvalds 	skb->dst = &rt->u.dst;
6261da177e4SLinus Torvalds 
6271da177e4SLinus Torvalds 	/*
6281da177e4SLinus Torvalds 	 *	Push down and install the IPIP header.
6291da177e4SLinus Torvalds 	 */
6301da177e4SLinus Torvalds 
6311da177e4SLinus Torvalds 	iph 			=	skb->nh.iph;
6321da177e4SLinus Torvalds 	iph->version		=	4;
6331da177e4SLinus Torvalds 	iph->ihl		=	sizeof(struct iphdr)>>2;
6341da177e4SLinus Torvalds 	iph->frag_off		=	df;
6351da177e4SLinus Torvalds 	iph->protocol		=	IPPROTO_IPIP;
6361da177e4SLinus Torvalds 	iph->tos		=	INET_ECN_encapsulate(tos, old_iph->tos);
6371da177e4SLinus Torvalds 	iph->daddr		=	rt->rt_dst;
6381da177e4SLinus Torvalds 	iph->saddr		=	rt->rt_src;
6391da177e4SLinus Torvalds 
6401da177e4SLinus Torvalds 	if ((iph->ttl = tiph->ttl) == 0)
6411da177e4SLinus Torvalds 		iph->ttl	=	old_iph->ttl;
6421da177e4SLinus Torvalds 
6431da177e4SLinus Torvalds 	nf_reset(skb);
6441da177e4SLinus Torvalds 
6451da177e4SLinus Torvalds 	IPTUNNEL_XMIT();
6461da177e4SLinus Torvalds 	tunnel->recursion--;
6471da177e4SLinus Torvalds 	return 0;
6481da177e4SLinus Torvalds 
6491da177e4SLinus Torvalds tx_error_icmp:
6501da177e4SLinus Torvalds 	dst_link_failure(skb);
6511da177e4SLinus Torvalds tx_error:
6521da177e4SLinus Torvalds 	stats->tx_errors++;
6531da177e4SLinus Torvalds 	dev_kfree_skb(skb);
6541da177e4SLinus Torvalds 	tunnel->recursion--;
6551da177e4SLinus Torvalds 	return 0;
6561da177e4SLinus Torvalds }
6571da177e4SLinus Torvalds 
6581da177e4SLinus Torvalds static int
6591da177e4SLinus Torvalds ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
6601da177e4SLinus Torvalds {
6611da177e4SLinus Torvalds 	int err = 0;
6621da177e4SLinus Torvalds 	struct ip_tunnel_parm p;
6631da177e4SLinus Torvalds 	struct ip_tunnel *t;
6641da177e4SLinus Torvalds 
6651da177e4SLinus Torvalds 	switch (cmd) {
6661da177e4SLinus Torvalds 	case SIOCGETTUNNEL:
6671da177e4SLinus Torvalds 		t = NULL;
6681da177e4SLinus Torvalds 		if (dev == ipip_fb_tunnel_dev) {
6691da177e4SLinus Torvalds 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
6701da177e4SLinus Torvalds 				err = -EFAULT;
6711da177e4SLinus Torvalds 				break;
6721da177e4SLinus Torvalds 			}
6731da177e4SLinus Torvalds 			t = ipip_tunnel_locate(&p, 0);
6741da177e4SLinus Torvalds 		}
6751da177e4SLinus Torvalds 		if (t == NULL)
6761da177e4SLinus Torvalds 			t = (struct ip_tunnel*)dev->priv;
6771da177e4SLinus Torvalds 		memcpy(&p, &t->parms, sizeof(p));
6781da177e4SLinus Torvalds 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
6791da177e4SLinus Torvalds 			err = -EFAULT;
6801da177e4SLinus Torvalds 		break;
6811da177e4SLinus Torvalds 
6821da177e4SLinus Torvalds 	case SIOCADDTUNNEL:
6831da177e4SLinus Torvalds 	case SIOCCHGTUNNEL:
6841da177e4SLinus Torvalds 		err = -EPERM;
6851da177e4SLinus Torvalds 		if (!capable(CAP_NET_ADMIN))
6861da177e4SLinus Torvalds 			goto done;
6871da177e4SLinus Torvalds 
6881da177e4SLinus Torvalds 		err = -EFAULT;
6891da177e4SLinus Torvalds 		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
6901da177e4SLinus Torvalds 			goto done;
6911da177e4SLinus Torvalds 
6921da177e4SLinus Torvalds 		err = -EINVAL;
6931da177e4SLinus Torvalds 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
6941da177e4SLinus Torvalds 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
6951da177e4SLinus Torvalds 			goto done;
6961da177e4SLinus Torvalds 		if (p.iph.ttl)
6971da177e4SLinus Torvalds 			p.iph.frag_off |= htons(IP_DF);
6981da177e4SLinus Torvalds 
6991da177e4SLinus Torvalds 		t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
7001da177e4SLinus Torvalds 
7011da177e4SLinus Torvalds 		if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
7021da177e4SLinus Torvalds 			if (t != NULL) {
7031da177e4SLinus Torvalds 				if (t->dev != dev) {
7041da177e4SLinus Torvalds 					err = -EEXIST;
7051da177e4SLinus Torvalds 					break;
7061da177e4SLinus Torvalds 				}
7071da177e4SLinus Torvalds 			} else {
7081da177e4SLinus Torvalds 				if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
7091da177e4SLinus Torvalds 				    (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
7101da177e4SLinus Torvalds 					err = -EINVAL;
7111da177e4SLinus Torvalds 					break;
7121da177e4SLinus Torvalds 				}
7131da177e4SLinus Torvalds 				t = (struct ip_tunnel*)dev->priv;
7141da177e4SLinus Torvalds 				ipip_tunnel_unlink(t);
7151da177e4SLinus Torvalds 				t->parms.iph.saddr = p.iph.saddr;
7161da177e4SLinus Torvalds 				t->parms.iph.daddr = p.iph.daddr;
7171da177e4SLinus Torvalds 				memcpy(dev->dev_addr, &p.iph.saddr, 4);
7181da177e4SLinus Torvalds 				memcpy(dev->broadcast, &p.iph.daddr, 4);
7191da177e4SLinus Torvalds 				ipip_tunnel_link(t);
7201da177e4SLinus Torvalds 				netdev_state_change(dev);
7211da177e4SLinus Torvalds 			}
7221da177e4SLinus Torvalds 		}
7231da177e4SLinus Torvalds 
7241da177e4SLinus Torvalds 		if (t) {
7251da177e4SLinus Torvalds 			err = 0;
7261da177e4SLinus Torvalds 			if (cmd == SIOCCHGTUNNEL) {
7271da177e4SLinus Torvalds 				t->parms.iph.ttl = p.iph.ttl;
7281da177e4SLinus Torvalds 				t->parms.iph.tos = p.iph.tos;
7291da177e4SLinus Torvalds 				t->parms.iph.frag_off = p.iph.frag_off;
7301da177e4SLinus Torvalds 			}
7311da177e4SLinus Torvalds 			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
7321da177e4SLinus Torvalds 				err = -EFAULT;
7331da177e4SLinus Torvalds 		} else
7341da177e4SLinus Torvalds 			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
7351da177e4SLinus Torvalds 		break;
7361da177e4SLinus Torvalds 
7371da177e4SLinus Torvalds 	case SIOCDELTUNNEL:
7381da177e4SLinus Torvalds 		err = -EPERM;
7391da177e4SLinus Torvalds 		if (!capable(CAP_NET_ADMIN))
7401da177e4SLinus Torvalds 			goto done;
7411da177e4SLinus Torvalds 
7421da177e4SLinus Torvalds 		if (dev == ipip_fb_tunnel_dev) {
7431da177e4SLinus Torvalds 			err = -EFAULT;
7441da177e4SLinus Torvalds 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
7451da177e4SLinus Torvalds 				goto done;
7461da177e4SLinus Torvalds 			err = -ENOENT;
7471da177e4SLinus Torvalds 			if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
7481da177e4SLinus Torvalds 				goto done;
7491da177e4SLinus Torvalds 			err = -EPERM;
7501da177e4SLinus Torvalds 			if (t->dev == ipip_fb_tunnel_dev)
7511da177e4SLinus Torvalds 				goto done;
7521da177e4SLinus Torvalds 			dev = t->dev;
7531da177e4SLinus Torvalds 		}
7541da177e4SLinus Torvalds 		err = unregister_netdevice(dev);
7551da177e4SLinus Torvalds 		break;
7561da177e4SLinus Torvalds 
7571da177e4SLinus Torvalds 	default:
7581da177e4SLinus Torvalds 		err = -EINVAL;
7591da177e4SLinus Torvalds 	}
7601da177e4SLinus Torvalds 
7611da177e4SLinus Torvalds done:
7621da177e4SLinus Torvalds 	return err;
7631da177e4SLinus Torvalds }
7641da177e4SLinus Torvalds 
7651da177e4SLinus Torvalds static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
7661da177e4SLinus Torvalds {
7671da177e4SLinus Torvalds 	return &(((struct ip_tunnel*)dev->priv)->stat);
7681da177e4SLinus Torvalds }
7691da177e4SLinus Torvalds 
7701da177e4SLinus Torvalds static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
7711da177e4SLinus Torvalds {
7721da177e4SLinus Torvalds 	if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
7731da177e4SLinus Torvalds 		return -EINVAL;
7741da177e4SLinus Torvalds 	dev->mtu = new_mtu;
7751da177e4SLinus Torvalds 	return 0;
7761da177e4SLinus Torvalds }
7771da177e4SLinus Torvalds 
7781da177e4SLinus Torvalds static void ipip_tunnel_setup(struct net_device *dev)
7791da177e4SLinus Torvalds {
7801da177e4SLinus Torvalds 	SET_MODULE_OWNER(dev);
7811da177e4SLinus Torvalds 	dev->uninit		= ipip_tunnel_uninit;
7821da177e4SLinus Torvalds 	dev->hard_start_xmit	= ipip_tunnel_xmit;
7831da177e4SLinus Torvalds 	dev->get_stats		= ipip_tunnel_get_stats;
7841da177e4SLinus Torvalds 	dev->do_ioctl		= ipip_tunnel_ioctl;
7851da177e4SLinus Torvalds 	dev->change_mtu		= ipip_tunnel_change_mtu;
7861da177e4SLinus Torvalds 	dev->destructor		= free_netdev;
7871da177e4SLinus Torvalds 
7881da177e4SLinus Torvalds 	dev->type		= ARPHRD_TUNNEL;
7891da177e4SLinus Torvalds 	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr);
790*46f25dffSKris Katterjohn 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr);
7911da177e4SLinus Torvalds 	dev->flags		= IFF_NOARP;
7921da177e4SLinus Torvalds 	dev->iflink		= 0;
7931da177e4SLinus Torvalds 	dev->addr_len		= 4;
7941da177e4SLinus Torvalds }
7951da177e4SLinus Torvalds 
7961da177e4SLinus Torvalds static int ipip_tunnel_init(struct net_device *dev)
7971da177e4SLinus Torvalds {
7981da177e4SLinus Torvalds 	struct net_device *tdev = NULL;
7991da177e4SLinus Torvalds 	struct ip_tunnel *tunnel;
8001da177e4SLinus Torvalds 	struct iphdr *iph;
8011da177e4SLinus Torvalds 
8021da177e4SLinus Torvalds 	tunnel = (struct ip_tunnel*)dev->priv;
8031da177e4SLinus Torvalds 	iph = &tunnel->parms.iph;
8041da177e4SLinus Torvalds 
8051da177e4SLinus Torvalds 	tunnel->dev = dev;
8061da177e4SLinus Torvalds 	strcpy(tunnel->parms.name, dev->name);
8071da177e4SLinus Torvalds 
8081da177e4SLinus Torvalds 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
8091da177e4SLinus Torvalds 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
8101da177e4SLinus Torvalds 
8111da177e4SLinus Torvalds 	if (iph->daddr) {
8121da177e4SLinus Torvalds 		struct flowi fl = { .oif = tunnel->parms.link,
8131da177e4SLinus Torvalds 				    .nl_u = { .ip4_u =
8141da177e4SLinus Torvalds 					      { .daddr = iph->daddr,
8151da177e4SLinus Torvalds 						.saddr = iph->saddr,
8161da177e4SLinus Torvalds 						.tos = RT_TOS(iph->tos) } },
8171da177e4SLinus Torvalds 				    .proto = IPPROTO_IPIP };
8181da177e4SLinus Torvalds 		struct rtable *rt;
8191da177e4SLinus Torvalds 		if (!ip_route_output_key(&rt, &fl)) {
8201da177e4SLinus Torvalds 			tdev = rt->u.dst.dev;
8211da177e4SLinus Torvalds 			ip_rt_put(rt);
8221da177e4SLinus Torvalds 		}
8231da177e4SLinus Torvalds 		dev->flags |= IFF_POINTOPOINT;
8241da177e4SLinus Torvalds 	}
8251da177e4SLinus Torvalds 
8261da177e4SLinus Torvalds 	if (!tdev && tunnel->parms.link)
8271da177e4SLinus Torvalds 		tdev = __dev_get_by_index(tunnel->parms.link);
8281da177e4SLinus Torvalds 
8291da177e4SLinus Torvalds 	if (tdev) {
8301da177e4SLinus Torvalds 		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
8311da177e4SLinus Torvalds 		dev->mtu = tdev->mtu - sizeof(struct iphdr);
8321da177e4SLinus Torvalds 	}
8331da177e4SLinus Torvalds 	dev->iflink = tunnel->parms.link;
8341da177e4SLinus Torvalds 
8351da177e4SLinus Torvalds 	return 0;
8361da177e4SLinus Torvalds }
8371da177e4SLinus Torvalds 
8381da177e4SLinus Torvalds static int __init ipip_fb_tunnel_init(struct net_device *dev)
8391da177e4SLinus Torvalds {
8401da177e4SLinus Torvalds 	struct ip_tunnel *tunnel = dev->priv;
8411da177e4SLinus Torvalds 	struct iphdr *iph = &tunnel->parms.iph;
8421da177e4SLinus Torvalds 
8431da177e4SLinus Torvalds 	tunnel->dev = dev;
8441da177e4SLinus Torvalds 	strcpy(tunnel->parms.name, dev->name);
8451da177e4SLinus Torvalds 
8461da177e4SLinus Torvalds 	iph->version		= 4;
8471da177e4SLinus Torvalds 	iph->protocol		= IPPROTO_IPIP;
8481da177e4SLinus Torvalds 	iph->ihl		= 5;
8491da177e4SLinus Torvalds 
8501da177e4SLinus Torvalds 	dev_hold(dev);
8511da177e4SLinus Torvalds 	tunnels_wc[0]		= tunnel;
8521da177e4SLinus Torvalds 	return 0;
8531da177e4SLinus Torvalds }
8541da177e4SLinus Torvalds 
8550303770dSPatrick McHardy #ifdef CONFIG_INET_TUNNEL
8561da177e4SLinus Torvalds static struct xfrm_tunnel ipip_handler = {
8571da177e4SLinus Torvalds 	.handler	=	ipip_rcv,
8581da177e4SLinus Torvalds 	.err_handler	=	ipip_err,
8591da177e4SLinus Torvalds };
8601da177e4SLinus Torvalds 
8610303770dSPatrick McHardy static inline int ipip_register(void)
8620303770dSPatrick McHardy {
8630303770dSPatrick McHardy 	return xfrm4_tunnel_register(&ipip_handler);
8640303770dSPatrick McHardy }
8650303770dSPatrick McHardy 
8660303770dSPatrick McHardy static inline int ipip_unregister(void)
8670303770dSPatrick McHardy {
8680303770dSPatrick McHardy 	return xfrm4_tunnel_deregister(&ipip_handler);
8690303770dSPatrick McHardy }
8700303770dSPatrick McHardy #else
8710303770dSPatrick McHardy static struct net_protocol ipip_protocol = {
8720303770dSPatrick McHardy 	.handler	=	ipip_rcv,
8730303770dSPatrick McHardy 	.err_handler	=	ipip_err,
8740303770dSPatrick McHardy 	.no_policy	=	1,
8750303770dSPatrick McHardy };
8760303770dSPatrick McHardy 
8770303770dSPatrick McHardy static inline int ipip_register(void)
8780303770dSPatrick McHardy {
8790303770dSPatrick McHardy 	return inet_add_protocol(&ipip_protocol, IPPROTO_IPIP);
8800303770dSPatrick McHardy }
8810303770dSPatrick McHardy 
8820303770dSPatrick McHardy static inline int ipip_unregister(void)
8830303770dSPatrick McHardy {
8840303770dSPatrick McHardy 	return inet_del_protocol(&ipip_protocol, IPPROTO_IPIP);
8850303770dSPatrick McHardy }
8860303770dSPatrick McHardy #endif
8870303770dSPatrick McHardy 
8881da177e4SLinus Torvalds static char banner[] __initdata =
8891da177e4SLinus Torvalds 	KERN_INFO "IPv4 over IPv4 tunneling driver\n";
8901da177e4SLinus Torvalds 
8911da177e4SLinus Torvalds static int __init ipip_init(void)
8921da177e4SLinus Torvalds {
8931da177e4SLinus Torvalds 	int err;
8941da177e4SLinus Torvalds 
8951da177e4SLinus Torvalds 	printk(banner);
8961da177e4SLinus Torvalds 
8970303770dSPatrick McHardy 	if (ipip_register() < 0) {
8981da177e4SLinus Torvalds 		printk(KERN_INFO "ipip init: can't register tunnel\n");
8991da177e4SLinus Torvalds 		return -EAGAIN;
9001da177e4SLinus Torvalds 	}
9011da177e4SLinus Torvalds 
9021da177e4SLinus Torvalds 	ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
9031da177e4SLinus Torvalds 					   "tunl0",
9041da177e4SLinus Torvalds 					   ipip_tunnel_setup);
9051da177e4SLinus Torvalds 	if (!ipip_fb_tunnel_dev) {
9061da177e4SLinus Torvalds 		err = -ENOMEM;
9071da177e4SLinus Torvalds 		goto err1;
9081da177e4SLinus Torvalds 	}
9091da177e4SLinus Torvalds 
9101da177e4SLinus Torvalds 	ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
9111da177e4SLinus Torvalds 
9121da177e4SLinus Torvalds 	if ((err = register_netdev(ipip_fb_tunnel_dev)))
9131da177e4SLinus Torvalds 		goto err2;
9141da177e4SLinus Torvalds  out:
9151da177e4SLinus Torvalds 	return err;
9161da177e4SLinus Torvalds  err2:
9171da177e4SLinus Torvalds 	free_netdev(ipip_fb_tunnel_dev);
9181da177e4SLinus Torvalds  err1:
9190303770dSPatrick McHardy 	ipip_unregister();
9201da177e4SLinus Torvalds 	goto out;
9211da177e4SLinus Torvalds }
9221da177e4SLinus Torvalds 
923db44575fSAlexey Kuznetsov static void __exit ipip_destroy_tunnels(void)
924db44575fSAlexey Kuznetsov {
925db44575fSAlexey Kuznetsov 	int prio;
926db44575fSAlexey Kuznetsov 
927db44575fSAlexey Kuznetsov 	for (prio = 1; prio < 4; prio++) {
928db44575fSAlexey Kuznetsov 		int h;
929db44575fSAlexey Kuznetsov 		for (h = 0; h < HASH_SIZE; h++) {
930db44575fSAlexey Kuznetsov 			struct ip_tunnel *t;
931db44575fSAlexey Kuznetsov 			while ((t = tunnels[prio][h]) != NULL)
932db44575fSAlexey Kuznetsov 				unregister_netdevice(t->dev);
933db44575fSAlexey Kuznetsov 		}
934db44575fSAlexey Kuznetsov 	}
935db44575fSAlexey Kuznetsov }
936db44575fSAlexey Kuznetsov 
9371da177e4SLinus Torvalds static void __exit ipip_fini(void)
9381da177e4SLinus Torvalds {
9390303770dSPatrick McHardy 	if (ipip_unregister() < 0)
9401da177e4SLinus Torvalds 		printk(KERN_INFO "ipip close: can't deregister tunnel\n");
9411da177e4SLinus Torvalds 
942db44575fSAlexey Kuznetsov 	rtnl_lock();
943db44575fSAlexey Kuznetsov 	ipip_destroy_tunnels();
944db44575fSAlexey Kuznetsov 	unregister_netdevice(ipip_fb_tunnel_dev);
945db44575fSAlexey Kuznetsov 	rtnl_unlock();
9461da177e4SLinus Torvalds }
9471da177e4SLinus Torvalds 
9481da177e4SLinus Torvalds module_init(ipip_init);
9491da177e4SLinus Torvalds module_exit(ipip_fini);
9501da177e4SLinus Torvalds MODULE_LICENSE("GPL");
951