xref: /linux/net/ipv4/ipip.c (revision 6e8331ac6973435b1e7604c30f2ad394035b46e1)
1 /*
2  *	Linux NET3:	IP/IP protocol decoder.
3  *
4  *	Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
5  *
6  *	Authors:
7  *		Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
8  *
9  *	Fixes:
10  *		Alan Cox	:	Merged and made usable non modular (its so tiny its silly as
11  *					a module taking up 2 pages).
12  *		Alan Cox	: 	Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13  *					to keep ip_forward happy.
14  *		Alan Cox	:	More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15  *		Kai Schulte	:	Fixed #defines for IP_FIREWALL->FIREWALL
16  *              David Woodhouse :       Perform some basic ICMP handling.
17  *                                      IPIP Routing without decapsulation.
18  *              Carlos Picoto   :       GRE over IP support
19  *		Alexey Kuznetsov:	Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20  *					I do not want to merge them together.
21  *
22  *	This program is free software; you can redistribute it and/or
23  *	modify it under the terms of the GNU General Public License
24  *	as published by the Free Software Foundation; either version
25  *	2 of the License, or (at your option) any later version.
26  *
27  */
28 
29 /* tunnel.c: an IP tunnel driver
30 
31 	The purpose of this driver is to provide an IP tunnel through
32 	which you can tunnel network traffic transparently across subnets.
33 
34 	This was written by looking at Nick Holloway's dummy driver
35 	Thanks for the great code!
36 
37 		-Sam Lantinga	(slouken@cs.ucdavis.edu)  02/01/95
38 
39 	Minor tweaks:
40 		Cleaned up the code a little and added some pre-1.3.0 tweaks.
41 		dev->hard_header/hard_header_len changed to use no headers.
42 		Comments/bracketing tweaked.
43 		Made the tunnels use dev->name not tunnel: when error reporting.
44 		Added tx_dropped stat
45 
46 		-Alan Cox	(Alan.Cox@linux.org) 21 March 95
47 
48 	Reworked:
49 		Changed to tunnel to destination gateway in addition to the
50 			tunnel's pointopoint address
51 		Almost completely rewritten
52 		Note:  There is currently no firewall or ICMP handling done.
53 
54 		-Sam Lantinga	(slouken@cs.ucdavis.edu) 02/13/96
55 
56 */
57 
58 /* Things I wish I had known when writing the tunnel driver:
59 
60 	When the tunnel_xmit() function is called, the skb contains the
61 	packet to be sent (plus a great deal of extra info), and dev
62 	contains the tunnel device that _we_ are.
63 
64 	When we are passed a packet, we are expected to fill in the
65 	source address with our source IP address.
66 
67 	What is the proper way to allocate, copy and free a buffer?
68 	After you allocate it, it is a "0 length" chunk of memory
69 	starting at zero.  If you want to add headers to the buffer
70 	later, you'll have to call "skb_reserve(skb, amount)" with
71 	the amount of memory you want reserved.  Then, you call
72 	"skb_put(skb, amount)" with the amount of space you want in
73 	the buffer.  skb_put() returns a pointer to the top (#0) of
74 	that buffer.  skb->len is set to the amount of space you have
75 	"allocated" with skb_put().  You can then write up to skb->len
76 	bytes to that buffer.  If you need more, you can call skb_put()
77 	again with the additional amount of space you need.  You can
78 	find out how much more space you can allocate by calling
79 	"skb_tailroom(skb)".
80 	Now, to add header space, call "skb_push(skb, header_len)".
81 	This creates space at the beginning of the buffer and returns
82 	a pointer to this new space.  If later you need to strip a
83 	header from a buffer, call "skb_pull(skb, header_len)".
84 	skb_headroom() will return how much space is left at the top
85 	of the buffer (before the main data).  Remember, this headroom
86 	space must be reserved before the skb_put() function is called.
87 	*/
88 
89 /*
90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
91 
92    For comments look at net/ipv4/ip_gre.c --ANK
93  */
94 
95 
96 #include <linux/capability.h>
97 #include <linux/module.h>
98 #include <linux/types.h>
99 #include <linux/sched.h>
100 #include <linux/kernel.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <linux/in.h>
105 #include <linux/tcp.h>
106 #include <linux/udp.h>
107 #include <linux/if_arp.h>
108 #include <linux/mroute.h>
109 #include <linux/init.h>
110 #include <linux/netfilter_ipv4.h>
111 #include <linux/if_ether.h>
112 
113 #include <net/sock.h>
114 #include <net/ip.h>
115 #include <net/icmp.h>
116 #include <net/ipip.h>
117 #include <net/inet_ecn.h>
118 #include <net/xfrm.h>
119 
120 #define HASH_SIZE  16
121 #define HASH(addr) ((addr^(addr>>4))&0xF)
122 
123 static int ipip_fb_tunnel_init(struct net_device *dev);
124 static int ipip_tunnel_init(struct net_device *dev);
125 static void ipip_tunnel_setup(struct net_device *dev);
126 
127 static struct net_device *ipip_fb_tunnel_dev;
128 
129 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
130 static struct ip_tunnel *tunnels_r[HASH_SIZE];
131 static struct ip_tunnel *tunnels_l[HASH_SIZE];
132 static struct ip_tunnel *tunnels_wc[1];
133 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
134 
135 static DEFINE_RWLOCK(ipip_lock);
136 
137 static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local)
138 {
139 	unsigned h0 = HASH(remote);
140 	unsigned h1 = HASH(local);
141 	struct ip_tunnel *t;
142 
143 	for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
144 		if (local == t->parms.iph.saddr &&
145 		    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
146 			return t;
147 	}
148 	for (t = tunnels_r[h0]; t; t = t->next) {
149 		if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
150 			return t;
151 	}
152 	for (t = tunnels_l[h1]; t; t = t->next) {
153 		if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
154 			return t;
155 	}
156 	if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
157 		return t;
158 	return NULL;
159 }
160 
161 static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
162 {
163 	u32 remote = t->parms.iph.daddr;
164 	u32 local = t->parms.iph.saddr;
165 	unsigned h = 0;
166 	int prio = 0;
167 
168 	if (remote) {
169 		prio |= 2;
170 		h ^= HASH(remote);
171 	}
172 	if (local) {
173 		prio |= 1;
174 		h ^= HASH(local);
175 	}
176 	return &tunnels[prio][h];
177 }
178 
179 
180 static void ipip_tunnel_unlink(struct ip_tunnel *t)
181 {
182 	struct ip_tunnel **tp;
183 
184 	for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
185 		if (t == *tp) {
186 			write_lock_bh(&ipip_lock);
187 			*tp = t->next;
188 			write_unlock_bh(&ipip_lock);
189 			break;
190 		}
191 	}
192 }
193 
194 static void ipip_tunnel_link(struct ip_tunnel *t)
195 {
196 	struct ip_tunnel **tp = ipip_bucket(t);
197 
198 	t->next = *tp;
199 	write_lock_bh(&ipip_lock);
200 	*tp = t;
201 	write_unlock_bh(&ipip_lock);
202 }
203 
204 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
205 {
206 	u32 remote = parms->iph.daddr;
207 	u32 local = parms->iph.saddr;
208 	struct ip_tunnel *t, **tp, *nt;
209 	struct net_device *dev;
210 	unsigned h = 0;
211 	int prio = 0;
212 	char name[IFNAMSIZ];
213 
214 	if (remote) {
215 		prio |= 2;
216 		h ^= HASH(remote);
217 	}
218 	if (local) {
219 		prio |= 1;
220 		h ^= HASH(local);
221 	}
222 	for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
223 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
224 			return t;
225 	}
226 	if (!create)
227 		return NULL;
228 
229 	if (parms->name[0])
230 		strlcpy(name, parms->name, IFNAMSIZ);
231 	else {
232 		int i;
233 		for (i=1; i<100; i++) {
234 			sprintf(name, "tunl%d", i);
235 			if (__dev_get_by_name(name) == NULL)
236 				break;
237 		}
238 		if (i==100)
239 			goto failed;
240 	}
241 
242 	dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
243 	if (dev == NULL)
244 		return NULL;
245 
246 	nt = netdev_priv(dev);
247 	SET_MODULE_OWNER(dev);
248 	dev->init = ipip_tunnel_init;
249 	nt->parms = *parms;
250 
251 	if (register_netdevice(dev) < 0) {
252 		free_netdev(dev);
253 		goto failed;
254 	}
255 
256 	dev_hold(dev);
257 	ipip_tunnel_link(nt);
258 	return nt;
259 
260 failed:
261 	return NULL;
262 }
263 
264 static void ipip_tunnel_uninit(struct net_device *dev)
265 {
266 	if (dev == ipip_fb_tunnel_dev) {
267 		write_lock_bh(&ipip_lock);
268 		tunnels_wc[0] = NULL;
269 		write_unlock_bh(&ipip_lock);
270 	} else
271 		ipip_tunnel_unlink(netdev_priv(dev));
272 	dev_put(dev);
273 }
274 
275 static int ipip_err(struct sk_buff *skb, u32 info)
276 {
277 #ifndef I_WISH_WORLD_WERE_PERFECT
278 
279 /* It is not :-( All the routers (except for Linux) return only
280    8 bytes of packet payload. It means, that precise relaying of
281    ICMP in the real Internet is absolutely infeasible.
282  */
283 	struct iphdr *iph = (struct iphdr*)skb->data;
284 	int type = skb->h.icmph->type;
285 	int code = skb->h.icmph->code;
286 	struct ip_tunnel *t;
287 	int err;
288 
289 	switch (type) {
290 	default:
291 	case ICMP_PARAMETERPROB:
292 		return 0;
293 
294 	case ICMP_DEST_UNREACH:
295 		switch (code) {
296 		case ICMP_SR_FAILED:
297 		case ICMP_PORT_UNREACH:
298 			/* Impossible event. */
299 			return 0;
300 		case ICMP_FRAG_NEEDED:
301 			/* Soft state for pmtu is maintained by IP core. */
302 			return 0;
303 		default:
304 			/* All others are translated to HOST_UNREACH.
305 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
306 			   I believe they are just ether pollution. --ANK
307 			 */
308 			break;
309 		}
310 		break;
311 	case ICMP_TIME_EXCEEDED:
312 		if (code != ICMP_EXC_TTL)
313 			return 0;
314 		break;
315 	}
316 
317 	err = -ENOENT;
318 
319 	read_lock(&ipip_lock);
320 	t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
321 	if (t == NULL || t->parms.iph.daddr == 0)
322 		goto out;
323 
324 	err = 0;
325 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
326 		goto out;
327 
328 	if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
329 		t->err_count++;
330 	else
331 		t->err_count = 1;
332 	t->err_time = jiffies;
333 out:
334 	read_unlock(&ipip_lock);
335 	return err;
336 #else
337 	struct iphdr *iph = (struct iphdr*)dp;
338 	int hlen = iph->ihl<<2;
339 	struct iphdr *eiph;
340 	int type = skb->h.icmph->type;
341 	int code = skb->h.icmph->code;
342 	int rel_type = 0;
343 	int rel_code = 0;
344 	int rel_info = 0;
345 	struct sk_buff *skb2;
346 	struct flowi fl;
347 	struct rtable *rt;
348 
349 	if (len < hlen + sizeof(struct iphdr))
350 		return 0;
351 	eiph = (struct iphdr*)(dp + hlen);
352 
353 	switch (type) {
354 	default:
355 		return 0;
356 	case ICMP_PARAMETERPROB:
357 		if (skb->h.icmph->un.gateway < hlen)
358 			return 0;
359 
360 		/* So... This guy found something strange INSIDE encapsulated
361 		   packet. Well, he is fool, but what can we do ?
362 		 */
363 		rel_type = ICMP_PARAMETERPROB;
364 		rel_info = skb->h.icmph->un.gateway - hlen;
365 		break;
366 
367 	case ICMP_DEST_UNREACH:
368 		switch (code) {
369 		case ICMP_SR_FAILED:
370 		case ICMP_PORT_UNREACH:
371 			/* Impossible event. */
372 			return 0;
373 		case ICMP_FRAG_NEEDED:
374 			/* And it is the only really necessary thing :-) */
375 			rel_info = ntohs(skb->h.icmph->un.frag.mtu);
376 			if (rel_info < hlen+68)
377 				return 0;
378 			rel_info -= hlen;
379 			/* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
380 			if (rel_info > ntohs(eiph->tot_len))
381 				return 0;
382 			break;
383 		default:
384 			/* All others are translated to HOST_UNREACH.
385 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
386 			   I believe, it is just ether pollution. --ANK
387 			 */
388 			rel_type = ICMP_DEST_UNREACH;
389 			rel_code = ICMP_HOST_UNREACH;
390 			break;
391 		}
392 		break;
393 	case ICMP_TIME_EXCEEDED:
394 		if (code != ICMP_EXC_TTL)
395 			return 0;
396 		break;
397 	}
398 
399 	/* Prepare fake skb to feed it to icmp_send */
400 	skb2 = skb_clone(skb, GFP_ATOMIC);
401 	if (skb2 == NULL)
402 		return 0;
403 	dst_release(skb2->dst);
404 	skb2->dst = NULL;
405 	skb_pull(skb2, skb->data - (u8*)eiph);
406 	skb2->nh.raw = skb2->data;
407 
408 	/* Try to guess incoming interface */
409 	memset(&fl, 0, sizeof(fl));
410 	fl.fl4_daddr = eiph->saddr;
411 	fl.fl4_tos = RT_TOS(eiph->tos);
412 	fl.proto = IPPROTO_IPIP;
413 	if (ip_route_output_key(&rt, &key)) {
414 		kfree_skb(skb2);
415 		return 0;
416 	}
417 	skb2->dev = rt->u.dst.dev;
418 
419 	/* route "incoming" packet */
420 	if (rt->rt_flags&RTCF_LOCAL) {
421 		ip_rt_put(rt);
422 		rt = NULL;
423 		fl.fl4_daddr = eiph->daddr;
424 		fl.fl4_src = eiph->saddr;
425 		fl.fl4_tos = eiph->tos;
426 		if (ip_route_output_key(&rt, &fl) ||
427 		    rt->u.dst.dev->type != ARPHRD_TUNNEL) {
428 			ip_rt_put(rt);
429 			kfree_skb(skb2);
430 			return 0;
431 		}
432 	} else {
433 		ip_rt_put(rt);
434 		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
435 		    skb2->dst->dev->type != ARPHRD_TUNNEL) {
436 			kfree_skb(skb2);
437 			return 0;
438 		}
439 	}
440 
441 	/* change mtu on this route */
442 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
443 		if (rel_info > dst_mtu(skb2->dst)) {
444 			kfree_skb(skb2);
445 			return 0;
446 		}
447 		skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
448 		rel_info = htonl(rel_info);
449 	} else if (type == ICMP_TIME_EXCEEDED) {
450 		struct ip_tunnel *t = netdev_priv(skb2->dev);
451 		if (t->parms.iph.ttl) {
452 			rel_type = ICMP_DEST_UNREACH;
453 			rel_code = ICMP_HOST_UNREACH;
454 		}
455 	}
456 
457 	icmp_send(skb2, rel_type, rel_code, rel_info);
458 	kfree_skb(skb2);
459 	return 0;
460 #endif
461 }
462 
463 static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb)
464 {
465 	struct iphdr *inner_iph = skb->nh.iph;
466 
467 	if (INET_ECN_is_ce(outer_iph->tos))
468 		IP_ECN_set_ce(inner_iph);
469 }
470 
471 static int ipip_rcv(struct sk_buff *skb)
472 {
473 	struct iphdr *iph;
474 	struct ip_tunnel *tunnel;
475 
476 	iph = skb->nh.iph;
477 
478 	read_lock(&ipip_lock);
479 	if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
480 		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
481 			read_unlock(&ipip_lock);
482 			kfree_skb(skb);
483 			return 0;
484 		}
485 
486 		secpath_reset(skb);
487 
488 		skb->mac.raw = skb->nh.raw;
489 		skb->nh.raw = skb->data;
490 		skb->protocol = htons(ETH_P_IP);
491 		skb->pkt_type = PACKET_HOST;
492 
493 		tunnel->stat.rx_packets++;
494 		tunnel->stat.rx_bytes += skb->len;
495 		skb->dev = tunnel->dev;
496 		dst_release(skb->dst);
497 		skb->dst = NULL;
498 		nf_reset(skb);
499 		ipip_ecn_decapsulate(iph, skb);
500 		netif_rx(skb);
501 		read_unlock(&ipip_lock);
502 		return 0;
503 	}
504 	read_unlock(&ipip_lock);
505 
506 	return -1;
507 }
508 
509 /*
510  *	This function assumes it is being called from dev_queue_xmit()
511  *	and that skb is filled properly by that function.
512  */
513 
514 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
515 {
516 	struct ip_tunnel *tunnel = netdev_priv(dev);
517 	struct net_device_stats *stats = &tunnel->stat;
518 	struct iphdr  *tiph = &tunnel->parms.iph;
519 	u8     tos = tunnel->parms.iph.tos;
520 	u16    df = tiph->frag_off;
521 	struct rtable *rt;     			/* Route to the other host */
522 	struct net_device *tdev;			/* Device to other host */
523 	struct iphdr  *old_iph = skb->nh.iph;
524 	struct iphdr  *iph;			/* Our new IP header */
525 	int    max_headroom;			/* The extra header space needed */
526 	u32    dst = tiph->daddr;
527 	int    mtu;
528 
529 	if (tunnel->recursion++) {
530 		tunnel->stat.collisions++;
531 		goto tx_error;
532 	}
533 
534 	if (skb->protocol != htons(ETH_P_IP))
535 		goto tx_error;
536 
537 	if (tos&1)
538 		tos = old_iph->tos;
539 
540 	if (!dst) {
541 		/* NBMA tunnel */
542 		if ((rt = (struct rtable*)skb->dst) == NULL) {
543 			tunnel->stat.tx_fifo_errors++;
544 			goto tx_error;
545 		}
546 		if ((dst = rt->rt_gateway) == 0)
547 			goto tx_error_icmp;
548 	}
549 
550 	{
551 		struct flowi fl = { .oif = tunnel->parms.link,
552 				    .nl_u = { .ip4_u =
553 					      { .daddr = dst,
554 						.saddr = tiph->saddr,
555 						.tos = RT_TOS(tos) } },
556 				    .proto = IPPROTO_IPIP };
557 		if (ip_route_output_key(&rt, &fl)) {
558 			tunnel->stat.tx_carrier_errors++;
559 			goto tx_error_icmp;
560 		}
561 	}
562 	tdev = rt->u.dst.dev;
563 
564 	if (tdev == dev) {
565 		ip_rt_put(rt);
566 		tunnel->stat.collisions++;
567 		goto tx_error;
568 	}
569 
570 	if (tiph->frag_off)
571 		mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
572 	else
573 		mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
574 
575 	if (mtu < 68) {
576 		tunnel->stat.collisions++;
577 		ip_rt_put(rt);
578 		goto tx_error;
579 	}
580 	if (skb->dst)
581 		skb->dst->ops->update_pmtu(skb->dst, mtu);
582 
583 	df |= (old_iph->frag_off&htons(IP_DF));
584 
585 	if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
586 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
587 		ip_rt_put(rt);
588 		goto tx_error;
589 	}
590 
591 	if (tunnel->err_count > 0) {
592 		if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
593 			tunnel->err_count--;
594 			dst_link_failure(skb);
595 		} else
596 			tunnel->err_count = 0;
597 	}
598 
599 	/*
600 	 * Okay, now see if we can stuff it in the buffer as-is.
601 	 */
602 	max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
603 
604 	if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
605 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
606 		if (!new_skb) {
607 			ip_rt_put(rt);
608   			stats->tx_dropped++;
609 			dev_kfree_skb(skb);
610 			tunnel->recursion--;
611 			return 0;
612 		}
613 		if (skb->sk)
614 			skb_set_owner_w(new_skb, skb->sk);
615 		dev_kfree_skb(skb);
616 		skb = new_skb;
617 		old_iph = skb->nh.iph;
618 	}
619 
620 	skb->h.raw = skb->nh.raw;
621 	skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
622 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
623 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
624 			      IPSKB_REROUTED);
625 	dst_release(skb->dst);
626 	skb->dst = &rt->u.dst;
627 
628 	/*
629 	 *	Push down and install the IPIP header.
630 	 */
631 
632 	iph 			=	skb->nh.iph;
633 	iph->version		=	4;
634 	iph->ihl		=	sizeof(struct iphdr)>>2;
635 	iph->frag_off		=	df;
636 	iph->protocol		=	IPPROTO_IPIP;
637 	iph->tos		=	INET_ECN_encapsulate(tos, old_iph->tos);
638 	iph->daddr		=	rt->rt_dst;
639 	iph->saddr		=	rt->rt_src;
640 
641 	if ((iph->ttl = tiph->ttl) == 0)
642 		iph->ttl	=	old_iph->ttl;
643 
644 	nf_reset(skb);
645 
646 	IPTUNNEL_XMIT();
647 	tunnel->recursion--;
648 	return 0;
649 
650 tx_error_icmp:
651 	dst_link_failure(skb);
652 tx_error:
653 	stats->tx_errors++;
654 	dev_kfree_skb(skb);
655 	tunnel->recursion--;
656 	return 0;
657 }
658 
659 static int
660 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
661 {
662 	int err = 0;
663 	struct ip_tunnel_parm p;
664 	struct ip_tunnel *t;
665 
666 	switch (cmd) {
667 	case SIOCGETTUNNEL:
668 		t = NULL;
669 		if (dev == ipip_fb_tunnel_dev) {
670 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
671 				err = -EFAULT;
672 				break;
673 			}
674 			t = ipip_tunnel_locate(&p, 0);
675 		}
676 		if (t == NULL)
677 			t = netdev_priv(dev);
678 		memcpy(&p, &t->parms, sizeof(p));
679 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
680 			err = -EFAULT;
681 		break;
682 
683 	case SIOCADDTUNNEL:
684 	case SIOCCHGTUNNEL:
685 		err = -EPERM;
686 		if (!capable(CAP_NET_ADMIN))
687 			goto done;
688 
689 		err = -EFAULT;
690 		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
691 			goto done;
692 
693 		err = -EINVAL;
694 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
695 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
696 			goto done;
697 		if (p.iph.ttl)
698 			p.iph.frag_off |= htons(IP_DF);
699 
700 		t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
701 
702 		if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
703 			if (t != NULL) {
704 				if (t->dev != dev) {
705 					err = -EEXIST;
706 					break;
707 				}
708 			} else {
709 				if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
710 				    (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
711 					err = -EINVAL;
712 					break;
713 				}
714 				t = netdev_priv(dev);
715 				ipip_tunnel_unlink(t);
716 				t->parms.iph.saddr = p.iph.saddr;
717 				t->parms.iph.daddr = p.iph.daddr;
718 				memcpy(dev->dev_addr, &p.iph.saddr, 4);
719 				memcpy(dev->broadcast, &p.iph.daddr, 4);
720 				ipip_tunnel_link(t);
721 				netdev_state_change(dev);
722 			}
723 		}
724 
725 		if (t) {
726 			err = 0;
727 			if (cmd == SIOCCHGTUNNEL) {
728 				t->parms.iph.ttl = p.iph.ttl;
729 				t->parms.iph.tos = p.iph.tos;
730 				t->parms.iph.frag_off = p.iph.frag_off;
731 			}
732 			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
733 				err = -EFAULT;
734 		} else
735 			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
736 		break;
737 
738 	case SIOCDELTUNNEL:
739 		err = -EPERM;
740 		if (!capable(CAP_NET_ADMIN))
741 			goto done;
742 
743 		if (dev == ipip_fb_tunnel_dev) {
744 			err = -EFAULT;
745 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
746 				goto done;
747 			err = -ENOENT;
748 			if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
749 				goto done;
750 			err = -EPERM;
751 			if (t->dev == ipip_fb_tunnel_dev)
752 				goto done;
753 			dev = t->dev;
754 		}
755 		err = unregister_netdevice(dev);
756 		break;
757 
758 	default:
759 		err = -EINVAL;
760 	}
761 
762 done:
763 	return err;
764 }
765 
766 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
767 {
768 	return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
769 }
770 
771 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
772 {
773 	if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
774 		return -EINVAL;
775 	dev->mtu = new_mtu;
776 	return 0;
777 }
778 
779 static void ipip_tunnel_setup(struct net_device *dev)
780 {
781 	SET_MODULE_OWNER(dev);
782 	dev->uninit		= ipip_tunnel_uninit;
783 	dev->hard_start_xmit	= ipip_tunnel_xmit;
784 	dev->get_stats		= ipip_tunnel_get_stats;
785 	dev->do_ioctl		= ipip_tunnel_ioctl;
786 	dev->change_mtu		= ipip_tunnel_change_mtu;
787 	dev->destructor		= free_netdev;
788 
789 	dev->type		= ARPHRD_TUNNEL;
790 	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr);
791 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr);
792 	dev->flags		= IFF_NOARP;
793 	dev->iflink		= 0;
794 	dev->addr_len		= 4;
795 }
796 
797 static int ipip_tunnel_init(struct net_device *dev)
798 {
799 	struct net_device *tdev = NULL;
800 	struct ip_tunnel *tunnel;
801 	struct iphdr *iph;
802 
803 	tunnel = netdev_priv(dev);
804 	iph = &tunnel->parms.iph;
805 
806 	tunnel->dev = dev;
807 	strcpy(tunnel->parms.name, dev->name);
808 
809 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
810 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
811 
812 	if (iph->daddr) {
813 		struct flowi fl = { .oif = tunnel->parms.link,
814 				    .nl_u = { .ip4_u =
815 					      { .daddr = iph->daddr,
816 						.saddr = iph->saddr,
817 						.tos = RT_TOS(iph->tos) } },
818 				    .proto = IPPROTO_IPIP };
819 		struct rtable *rt;
820 		if (!ip_route_output_key(&rt, &fl)) {
821 			tdev = rt->u.dst.dev;
822 			ip_rt_put(rt);
823 		}
824 		dev->flags |= IFF_POINTOPOINT;
825 	}
826 
827 	if (!tdev && tunnel->parms.link)
828 		tdev = __dev_get_by_index(tunnel->parms.link);
829 
830 	if (tdev) {
831 		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
832 		dev->mtu = tdev->mtu - sizeof(struct iphdr);
833 	}
834 	dev->iflink = tunnel->parms.link;
835 
836 	return 0;
837 }
838 
839 static int __init ipip_fb_tunnel_init(struct net_device *dev)
840 {
841 	struct ip_tunnel *tunnel = netdev_priv(dev);
842 	struct iphdr *iph = &tunnel->parms.iph;
843 
844 	tunnel->dev = dev;
845 	strcpy(tunnel->parms.name, dev->name);
846 
847 	iph->version		= 4;
848 	iph->protocol		= IPPROTO_IPIP;
849 	iph->ihl		= 5;
850 
851 	dev_hold(dev);
852 	tunnels_wc[0]		= tunnel;
853 	return 0;
854 }
855 
856 static struct xfrm_tunnel ipip_handler = {
857 	.handler	=	ipip_rcv,
858 	.err_handler	=	ipip_err,
859 	.priority	=	1,
860 };
861 
862 static char banner[] __initdata =
863 	KERN_INFO "IPv4 over IPv4 tunneling driver\n";
864 
865 static int __init ipip_init(void)
866 {
867 	int err;
868 
869 	printk(banner);
870 
871 	if (xfrm4_tunnel_register(&ipip_handler)) {
872 		printk(KERN_INFO "ipip init: can't register tunnel\n");
873 		return -EAGAIN;
874 	}
875 
876 	ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
877 					   "tunl0",
878 					   ipip_tunnel_setup);
879 	if (!ipip_fb_tunnel_dev) {
880 		err = -ENOMEM;
881 		goto err1;
882 	}
883 
884 	ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
885 
886 	if ((err = register_netdev(ipip_fb_tunnel_dev)))
887 		goto err2;
888  out:
889 	return err;
890  err2:
891 	free_netdev(ipip_fb_tunnel_dev);
892  err1:
893 	xfrm4_tunnel_deregister(&ipip_handler);
894 	goto out;
895 }
896 
897 static void __exit ipip_destroy_tunnels(void)
898 {
899 	int prio;
900 
901 	for (prio = 1; prio < 4; prio++) {
902 		int h;
903 		for (h = 0; h < HASH_SIZE; h++) {
904 			struct ip_tunnel *t;
905 			while ((t = tunnels[prio][h]) != NULL)
906 				unregister_netdevice(t->dev);
907 		}
908 	}
909 }
910 
911 static void __exit ipip_fini(void)
912 {
913 	if (xfrm4_tunnel_deregister(&ipip_handler))
914 		printk(KERN_INFO "ipip close: can't deregister tunnel\n");
915 
916 	rtnl_lock();
917 	ipip_destroy_tunnels();
918 	unregister_netdevice(ipip_fb_tunnel_dev);
919 	rtnl_unlock();
920 }
921 
922 module_init(ipip_init);
923 module_exit(ipip_fini);
924 MODULE_LICENSE("GPL");
925