xref: /linux/net/ipv4/ipip.c (revision c537b994505099b7197e7d3125b942ecbcc51eb6)
1 /*
2  *	Linux NET3:	IP/IP protocol decoder.
3  *
4  *	Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
5  *
6  *	Authors:
7  *		Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
8  *
9  *	Fixes:
10  *		Alan Cox	:	Merged and made usable non modular (its so tiny its silly as
11  *					a module taking up 2 pages).
12  *		Alan Cox	: 	Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13  *					to keep ip_forward happy.
14  *		Alan Cox	:	More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15  *		Kai Schulte	:	Fixed #defines for IP_FIREWALL->FIREWALL
16  *              David Woodhouse :       Perform some basic ICMP handling.
17  *                                      IPIP Routing without decapsulation.
18  *              Carlos Picoto   :       GRE over IP support
19  *		Alexey Kuznetsov:	Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20  *					I do not want to merge them together.
21  *
22  *	This program is free software; you can redistribute it and/or
23  *	modify it under the terms of the GNU General Public License
24  *	as published by the Free Software Foundation; either version
25  *	2 of the License, or (at your option) any later version.
26  *
27  */
28 
29 /* tunnel.c: an IP tunnel driver
30 
31 	The purpose of this driver is to provide an IP tunnel through
32 	which you can tunnel network traffic transparently across subnets.
33 
34 	This was written by looking at Nick Holloway's dummy driver
35 	Thanks for the great code!
36 
37 		-Sam Lantinga	(slouken@cs.ucdavis.edu)  02/01/95
38 
39 	Minor tweaks:
40 		Cleaned up the code a little and added some pre-1.3.0 tweaks.
41 		dev->hard_header/hard_header_len changed to use no headers.
42 		Comments/bracketing tweaked.
43 		Made the tunnels use dev->name not tunnel: when error reporting.
44 		Added tx_dropped stat
45 
46 		-Alan Cox	(Alan.Cox@linux.org) 21 March 95
47 
48 	Reworked:
49 		Changed to tunnel to destination gateway in addition to the
50 			tunnel's pointopoint address
51 		Almost completely rewritten
52 		Note:  There is currently no firewall or ICMP handling done.
53 
54 		-Sam Lantinga	(slouken@cs.ucdavis.edu) 02/13/96
55 
56 */
57 
58 /* Things I wish I had known when writing the tunnel driver:
59 
60 	When the tunnel_xmit() function is called, the skb contains the
61 	packet to be sent (plus a great deal of extra info), and dev
62 	contains the tunnel device that _we_ are.
63 
64 	When we are passed a packet, we are expected to fill in the
65 	source address with our source IP address.
66 
67 	What is the proper way to allocate, copy and free a buffer?
68 	After you allocate it, it is a "0 length" chunk of memory
69 	starting at zero.  If you want to add headers to the buffer
70 	later, you'll have to call "skb_reserve(skb, amount)" with
71 	the amount of memory you want reserved.  Then, you call
72 	"skb_put(skb, amount)" with the amount of space you want in
73 	the buffer.  skb_put() returns a pointer to the top (#0) of
74 	that buffer.  skb->len is set to the amount of space you have
75 	"allocated" with skb_put().  You can then write up to skb->len
76 	bytes to that buffer.  If you need more, you can call skb_put()
77 	again with the additional amount of space you need.  You can
78 	find out how much more space you can allocate by calling
79 	"skb_tailroom(skb)".
80 	Now, to add header space, call "skb_push(skb, header_len)".
81 	This creates space at the beginning of the buffer and returns
82 	a pointer to this new space.  If later you need to strip a
83 	header from a buffer, call "skb_pull(skb, header_len)".
84 	skb_headroom() will return how much space is left at the top
85 	of the buffer (before the main data).  Remember, this headroom
86 	space must be reserved before the skb_put() function is called.
87 	*/
88 
89 /*
90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
91 
92    For comments look at net/ipv4/ip_gre.c --ANK
93  */
94 
95 
96 #include <linux/capability.h>
97 #include <linux/module.h>
98 #include <linux/types.h>
99 #include <linux/kernel.h>
100 #include <asm/uaccess.h>
101 #include <linux/skbuff.h>
102 #include <linux/netdevice.h>
103 #include <linux/in.h>
104 #include <linux/tcp.h>
105 #include <linux/udp.h>
106 #include <linux/if_arp.h>
107 #include <linux/mroute.h>
108 #include <linux/init.h>
109 #include <linux/netfilter_ipv4.h>
110 #include <linux/if_ether.h>
111 
112 #include <net/sock.h>
113 #include <net/ip.h>
114 #include <net/icmp.h>
115 #include <net/ipip.h>
116 #include <net/inet_ecn.h>
117 #include <net/xfrm.h>
118 
119 #define HASH_SIZE  16
120 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
121 
122 static int ipip_fb_tunnel_init(struct net_device *dev);
123 static int ipip_tunnel_init(struct net_device *dev);
124 static void ipip_tunnel_setup(struct net_device *dev);
125 
126 static struct net_device *ipip_fb_tunnel_dev;
127 
128 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
129 static struct ip_tunnel *tunnels_r[HASH_SIZE];
130 static struct ip_tunnel *tunnels_l[HASH_SIZE];
131 static struct ip_tunnel *tunnels_wc[1];
132 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
133 
134 static DEFINE_RWLOCK(ipip_lock);
135 
136 static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local)
137 {
138 	unsigned h0 = HASH(remote);
139 	unsigned h1 = HASH(local);
140 	struct ip_tunnel *t;
141 
142 	for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
143 		if (local == t->parms.iph.saddr &&
144 		    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
145 			return t;
146 	}
147 	for (t = tunnels_r[h0]; t; t = t->next) {
148 		if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
149 			return t;
150 	}
151 	for (t = tunnels_l[h1]; t; t = t->next) {
152 		if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
153 			return t;
154 	}
155 	if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
156 		return t;
157 	return NULL;
158 }
159 
160 static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
161 {
162 	__be32 remote = t->parms.iph.daddr;
163 	__be32 local = t->parms.iph.saddr;
164 	unsigned h = 0;
165 	int prio = 0;
166 
167 	if (remote) {
168 		prio |= 2;
169 		h ^= HASH(remote);
170 	}
171 	if (local) {
172 		prio |= 1;
173 		h ^= HASH(local);
174 	}
175 	return &tunnels[prio][h];
176 }
177 
178 
179 static void ipip_tunnel_unlink(struct ip_tunnel *t)
180 {
181 	struct ip_tunnel **tp;
182 
183 	for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
184 		if (t == *tp) {
185 			write_lock_bh(&ipip_lock);
186 			*tp = t->next;
187 			write_unlock_bh(&ipip_lock);
188 			break;
189 		}
190 	}
191 }
192 
193 static void ipip_tunnel_link(struct ip_tunnel *t)
194 {
195 	struct ip_tunnel **tp = ipip_bucket(t);
196 
197 	t->next = *tp;
198 	write_lock_bh(&ipip_lock);
199 	*tp = t;
200 	write_unlock_bh(&ipip_lock);
201 }
202 
203 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
204 {
205 	__be32 remote = parms->iph.daddr;
206 	__be32 local = parms->iph.saddr;
207 	struct ip_tunnel *t, **tp, *nt;
208 	struct net_device *dev;
209 	unsigned h = 0;
210 	int prio = 0;
211 	char name[IFNAMSIZ];
212 
213 	if (remote) {
214 		prio |= 2;
215 		h ^= HASH(remote);
216 	}
217 	if (local) {
218 		prio |= 1;
219 		h ^= HASH(local);
220 	}
221 	for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
222 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
223 			return t;
224 	}
225 	if (!create)
226 		return NULL;
227 
228 	if (parms->name[0])
229 		strlcpy(name, parms->name, IFNAMSIZ);
230 	else {
231 		int i;
232 		for (i=1; i<100; i++) {
233 			sprintf(name, "tunl%d", i);
234 			if (__dev_get_by_name(name) == NULL)
235 				break;
236 		}
237 		if (i==100)
238 			goto failed;
239 	}
240 
241 	dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
242 	if (dev == NULL)
243 		return NULL;
244 
245 	nt = netdev_priv(dev);
246 	SET_MODULE_OWNER(dev);
247 	dev->init = ipip_tunnel_init;
248 	nt->parms = *parms;
249 
250 	if (register_netdevice(dev) < 0) {
251 		free_netdev(dev);
252 		goto failed;
253 	}
254 
255 	dev_hold(dev);
256 	ipip_tunnel_link(nt);
257 	return nt;
258 
259 failed:
260 	return NULL;
261 }
262 
263 static void ipip_tunnel_uninit(struct net_device *dev)
264 {
265 	if (dev == ipip_fb_tunnel_dev) {
266 		write_lock_bh(&ipip_lock);
267 		tunnels_wc[0] = NULL;
268 		write_unlock_bh(&ipip_lock);
269 	} else
270 		ipip_tunnel_unlink(netdev_priv(dev));
271 	dev_put(dev);
272 }
273 
274 static int ipip_err(struct sk_buff *skb, u32 info)
275 {
276 #ifndef I_WISH_WORLD_WERE_PERFECT
277 
278 /* It is not :-( All the routers (except for Linux) return only
279    8 bytes of packet payload. It means, that precise relaying of
280    ICMP in the real Internet is absolutely infeasible.
281  */
282 	struct iphdr *iph = (struct iphdr*)skb->data;
283 	int type = skb->h.icmph->type;
284 	int code = skb->h.icmph->code;
285 	struct ip_tunnel *t;
286 	int err;
287 
288 	switch (type) {
289 	default:
290 	case ICMP_PARAMETERPROB:
291 		return 0;
292 
293 	case ICMP_DEST_UNREACH:
294 		switch (code) {
295 		case ICMP_SR_FAILED:
296 		case ICMP_PORT_UNREACH:
297 			/* Impossible event. */
298 			return 0;
299 		case ICMP_FRAG_NEEDED:
300 			/* Soft state for pmtu is maintained by IP core. */
301 			return 0;
302 		default:
303 			/* All others are translated to HOST_UNREACH.
304 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
305 			   I believe they are just ether pollution. --ANK
306 			 */
307 			break;
308 		}
309 		break;
310 	case ICMP_TIME_EXCEEDED:
311 		if (code != ICMP_EXC_TTL)
312 			return 0;
313 		break;
314 	}
315 
316 	err = -ENOENT;
317 
318 	read_lock(&ipip_lock);
319 	t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
320 	if (t == NULL || t->parms.iph.daddr == 0)
321 		goto out;
322 
323 	err = 0;
324 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
325 		goto out;
326 
327 	if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
328 		t->err_count++;
329 	else
330 		t->err_count = 1;
331 	t->err_time = jiffies;
332 out:
333 	read_unlock(&ipip_lock);
334 	return err;
335 #else
336 	struct iphdr *iph = (struct iphdr*)dp;
337 	int hlen = iph->ihl<<2;
338 	struct iphdr *eiph;
339 	int type = skb->h.icmph->type;
340 	int code = skb->h.icmph->code;
341 	int rel_type = 0;
342 	int rel_code = 0;
343 	__be32 rel_info = 0;
344 	__u32 n = 0;
345 	struct sk_buff *skb2;
346 	struct flowi fl;
347 	struct rtable *rt;
348 
349 	if (len < hlen + sizeof(struct iphdr))
350 		return 0;
351 	eiph = (struct iphdr*)(dp + hlen);
352 
353 	switch (type) {
354 	default:
355 		return 0;
356 	case ICMP_PARAMETERPROB:
357 		n = ntohl(skb->h.icmph->un.gateway) >> 24;
358 		if (n < hlen)
359 			return 0;
360 
361 		/* So... This guy found something strange INSIDE encapsulated
362 		   packet. Well, he is fool, but what can we do ?
363 		 */
364 		rel_type = ICMP_PARAMETERPROB;
365 		rel_info = htonl((n - hlen) << 24);
366 		break;
367 
368 	case ICMP_DEST_UNREACH:
369 		switch (code) {
370 		case ICMP_SR_FAILED:
371 		case ICMP_PORT_UNREACH:
372 			/* Impossible event. */
373 			return 0;
374 		case ICMP_FRAG_NEEDED:
375 			/* And it is the only really necessary thing :-) */
376 			n = ntohs(skb->h.icmph->un.frag.mtu);
377 			if (n < hlen+68)
378 				return 0;
379 			n -= hlen;
380 			/* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
381 			if (n > ntohs(eiph->tot_len))
382 				return 0;
383 			rel_info = htonl(n);
384 			break;
385 		default:
386 			/* All others are translated to HOST_UNREACH.
387 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
388 			   I believe, it is just ether pollution. --ANK
389 			 */
390 			rel_type = ICMP_DEST_UNREACH;
391 			rel_code = ICMP_HOST_UNREACH;
392 			break;
393 		}
394 		break;
395 	case ICMP_TIME_EXCEEDED:
396 		if (code != ICMP_EXC_TTL)
397 			return 0;
398 		break;
399 	}
400 
401 	/* Prepare fake skb to feed it to icmp_send */
402 	skb2 = skb_clone(skb, GFP_ATOMIC);
403 	if (skb2 == NULL)
404 		return 0;
405 	dst_release(skb2->dst);
406 	skb2->dst = NULL;
407 	skb_pull(skb2, skb->data - (u8*)eiph);
408 	skb2->nh.raw = skb2->data;
409 
410 	/* Try to guess incoming interface */
411 	memset(&fl, 0, sizeof(fl));
412 	fl.fl4_daddr = eiph->saddr;
413 	fl.fl4_tos = RT_TOS(eiph->tos);
414 	fl.proto = IPPROTO_IPIP;
415 	if (ip_route_output_key(&rt, &key)) {
416 		kfree_skb(skb2);
417 		return 0;
418 	}
419 	skb2->dev = rt->u.dst.dev;
420 
421 	/* route "incoming" packet */
422 	if (rt->rt_flags&RTCF_LOCAL) {
423 		ip_rt_put(rt);
424 		rt = NULL;
425 		fl.fl4_daddr = eiph->daddr;
426 		fl.fl4_src = eiph->saddr;
427 		fl.fl4_tos = eiph->tos;
428 		if (ip_route_output_key(&rt, &fl) ||
429 		    rt->u.dst.dev->type != ARPHRD_TUNNEL) {
430 			ip_rt_put(rt);
431 			kfree_skb(skb2);
432 			return 0;
433 		}
434 	} else {
435 		ip_rt_put(rt);
436 		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
437 		    skb2->dst->dev->type != ARPHRD_TUNNEL) {
438 			kfree_skb(skb2);
439 			return 0;
440 		}
441 	}
442 
443 	/* change mtu on this route */
444 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
445 		if (n > dst_mtu(skb2->dst)) {
446 			kfree_skb(skb2);
447 			return 0;
448 		}
449 		skb2->dst->ops->update_pmtu(skb2->dst, n);
450 	} else if (type == ICMP_TIME_EXCEEDED) {
451 		struct ip_tunnel *t = netdev_priv(skb2->dev);
452 		if (t->parms.iph.ttl) {
453 			rel_type = ICMP_DEST_UNREACH;
454 			rel_code = ICMP_HOST_UNREACH;
455 		}
456 	}
457 
458 	icmp_send(skb2, rel_type, rel_code, rel_info);
459 	kfree_skb(skb2);
460 	return 0;
461 #endif
462 }
463 
464 static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb)
465 {
466 	struct iphdr *inner_iph = skb->nh.iph;
467 
468 	if (INET_ECN_is_ce(outer_iph->tos))
469 		IP_ECN_set_ce(inner_iph);
470 }
471 
472 static int ipip_rcv(struct sk_buff *skb)
473 {
474 	struct iphdr *iph;
475 	struct ip_tunnel *tunnel;
476 
477 	iph = skb->nh.iph;
478 
479 	read_lock(&ipip_lock);
480 	if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
481 		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
482 			read_unlock(&ipip_lock);
483 			kfree_skb(skb);
484 			return 0;
485 		}
486 
487 		secpath_reset(skb);
488 
489 		skb->mac.raw = skb->nh.raw;
490 		skb->nh.raw = skb->data;
491 		skb->protocol = htons(ETH_P_IP);
492 		skb->pkt_type = PACKET_HOST;
493 
494 		tunnel->stat.rx_packets++;
495 		tunnel->stat.rx_bytes += skb->len;
496 		skb->dev = tunnel->dev;
497 		dst_release(skb->dst);
498 		skb->dst = NULL;
499 		nf_reset(skb);
500 		ipip_ecn_decapsulate(iph, skb);
501 		netif_rx(skb);
502 		read_unlock(&ipip_lock);
503 		return 0;
504 	}
505 	read_unlock(&ipip_lock);
506 
507 	return -1;
508 }
509 
510 /*
511  *	This function assumes it is being called from dev_queue_xmit()
512  *	and that skb is filled properly by that function.
513  */
514 
515 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
516 {
517 	struct ip_tunnel *tunnel = netdev_priv(dev);
518 	struct net_device_stats *stats = &tunnel->stat;
519 	struct iphdr  *tiph = &tunnel->parms.iph;
520 	u8     tos = tunnel->parms.iph.tos;
521 	__be16 df = tiph->frag_off;
522 	struct rtable *rt;     			/* Route to the other host */
523 	struct net_device *tdev;			/* Device to other host */
524 	struct iphdr  *old_iph = skb->nh.iph;
525 	struct iphdr  *iph;			/* Our new IP header */
526 	int    max_headroom;			/* The extra header space needed */
527 	__be32 dst = tiph->daddr;
528 	int    mtu;
529 
530 	if (tunnel->recursion++) {
531 		tunnel->stat.collisions++;
532 		goto tx_error;
533 	}
534 
535 	if (skb->protocol != htons(ETH_P_IP))
536 		goto tx_error;
537 
538 	if (tos&1)
539 		tos = old_iph->tos;
540 
541 	if (!dst) {
542 		/* NBMA tunnel */
543 		if ((rt = (struct rtable*)skb->dst) == NULL) {
544 			tunnel->stat.tx_fifo_errors++;
545 			goto tx_error;
546 		}
547 		if ((dst = rt->rt_gateway) == 0)
548 			goto tx_error_icmp;
549 	}
550 
551 	{
552 		struct flowi fl = { .oif = tunnel->parms.link,
553 				    .nl_u = { .ip4_u =
554 					      { .daddr = dst,
555 						.saddr = tiph->saddr,
556 						.tos = RT_TOS(tos) } },
557 				    .proto = IPPROTO_IPIP };
558 		if (ip_route_output_key(&rt, &fl)) {
559 			tunnel->stat.tx_carrier_errors++;
560 			goto tx_error_icmp;
561 		}
562 	}
563 	tdev = rt->u.dst.dev;
564 
565 	if (tdev == dev) {
566 		ip_rt_put(rt);
567 		tunnel->stat.collisions++;
568 		goto tx_error;
569 	}
570 
571 	if (tiph->frag_off)
572 		mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
573 	else
574 		mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
575 
576 	if (mtu < 68) {
577 		tunnel->stat.collisions++;
578 		ip_rt_put(rt);
579 		goto tx_error;
580 	}
581 	if (skb->dst)
582 		skb->dst->ops->update_pmtu(skb->dst, mtu);
583 
584 	df |= (old_iph->frag_off&htons(IP_DF));
585 
586 	if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
587 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
588 		ip_rt_put(rt);
589 		goto tx_error;
590 	}
591 
592 	if (tunnel->err_count > 0) {
593 		if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
594 			tunnel->err_count--;
595 			dst_link_failure(skb);
596 		} else
597 			tunnel->err_count = 0;
598 	}
599 
600 	/*
601 	 * Okay, now see if we can stuff it in the buffer as-is.
602 	 */
603 	max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
604 
605 	if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
606 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
607 		if (!new_skb) {
608 			ip_rt_put(rt);
609 			stats->tx_dropped++;
610 			dev_kfree_skb(skb);
611 			tunnel->recursion--;
612 			return 0;
613 		}
614 		if (skb->sk)
615 			skb_set_owner_w(new_skb, skb->sk);
616 		dev_kfree_skb(skb);
617 		skb = new_skb;
618 		old_iph = skb->nh.iph;
619 	}
620 
621 	skb->h.raw = skb->nh.raw;
622 	skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
623 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
624 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
625 			      IPSKB_REROUTED);
626 	dst_release(skb->dst);
627 	skb->dst = &rt->u.dst;
628 
629 	/*
630 	 *	Push down and install the IPIP header.
631 	 */
632 
633 	iph 			=	skb->nh.iph;
634 	iph->version		=	4;
635 	iph->ihl		=	sizeof(struct iphdr)>>2;
636 	iph->frag_off		=	df;
637 	iph->protocol		=	IPPROTO_IPIP;
638 	iph->tos		=	INET_ECN_encapsulate(tos, old_iph->tos);
639 	iph->daddr		=	rt->rt_dst;
640 	iph->saddr		=	rt->rt_src;
641 
642 	if ((iph->ttl = tiph->ttl) == 0)
643 		iph->ttl	=	old_iph->ttl;
644 
645 	nf_reset(skb);
646 
647 	IPTUNNEL_XMIT();
648 	tunnel->recursion--;
649 	return 0;
650 
651 tx_error_icmp:
652 	dst_link_failure(skb);
653 tx_error:
654 	stats->tx_errors++;
655 	dev_kfree_skb(skb);
656 	tunnel->recursion--;
657 	return 0;
658 }
659 
660 static int
661 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
662 {
663 	int err = 0;
664 	struct ip_tunnel_parm p;
665 	struct ip_tunnel *t;
666 
667 	switch (cmd) {
668 	case SIOCGETTUNNEL:
669 		t = NULL;
670 		if (dev == ipip_fb_tunnel_dev) {
671 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
672 				err = -EFAULT;
673 				break;
674 			}
675 			t = ipip_tunnel_locate(&p, 0);
676 		}
677 		if (t == NULL)
678 			t = netdev_priv(dev);
679 		memcpy(&p, &t->parms, sizeof(p));
680 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
681 			err = -EFAULT;
682 		break;
683 
684 	case SIOCADDTUNNEL:
685 	case SIOCCHGTUNNEL:
686 		err = -EPERM;
687 		if (!capable(CAP_NET_ADMIN))
688 			goto done;
689 
690 		err = -EFAULT;
691 		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
692 			goto done;
693 
694 		err = -EINVAL;
695 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
696 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
697 			goto done;
698 		if (p.iph.ttl)
699 			p.iph.frag_off |= htons(IP_DF);
700 
701 		t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
702 
703 		if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
704 			if (t != NULL) {
705 				if (t->dev != dev) {
706 					err = -EEXIST;
707 					break;
708 				}
709 			} else {
710 				if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
711 				    (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
712 					err = -EINVAL;
713 					break;
714 				}
715 				t = netdev_priv(dev);
716 				ipip_tunnel_unlink(t);
717 				t->parms.iph.saddr = p.iph.saddr;
718 				t->parms.iph.daddr = p.iph.daddr;
719 				memcpy(dev->dev_addr, &p.iph.saddr, 4);
720 				memcpy(dev->broadcast, &p.iph.daddr, 4);
721 				ipip_tunnel_link(t);
722 				netdev_state_change(dev);
723 			}
724 		}
725 
726 		if (t) {
727 			err = 0;
728 			if (cmd == SIOCCHGTUNNEL) {
729 				t->parms.iph.ttl = p.iph.ttl;
730 				t->parms.iph.tos = p.iph.tos;
731 				t->parms.iph.frag_off = p.iph.frag_off;
732 			}
733 			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
734 				err = -EFAULT;
735 		} else
736 			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
737 		break;
738 
739 	case SIOCDELTUNNEL:
740 		err = -EPERM;
741 		if (!capable(CAP_NET_ADMIN))
742 			goto done;
743 
744 		if (dev == ipip_fb_tunnel_dev) {
745 			err = -EFAULT;
746 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
747 				goto done;
748 			err = -ENOENT;
749 			if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
750 				goto done;
751 			err = -EPERM;
752 			if (t->dev == ipip_fb_tunnel_dev)
753 				goto done;
754 			dev = t->dev;
755 		}
756 		unregister_netdevice(dev);
757 		err = 0;
758 		break;
759 
760 	default:
761 		err = -EINVAL;
762 	}
763 
764 done:
765 	return err;
766 }
767 
768 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
769 {
770 	return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
771 }
772 
773 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
774 {
775 	if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
776 		return -EINVAL;
777 	dev->mtu = new_mtu;
778 	return 0;
779 }
780 
781 static void ipip_tunnel_setup(struct net_device *dev)
782 {
783 	SET_MODULE_OWNER(dev);
784 	dev->uninit		= ipip_tunnel_uninit;
785 	dev->hard_start_xmit	= ipip_tunnel_xmit;
786 	dev->get_stats		= ipip_tunnel_get_stats;
787 	dev->do_ioctl		= ipip_tunnel_ioctl;
788 	dev->change_mtu		= ipip_tunnel_change_mtu;
789 	dev->destructor		= free_netdev;
790 
791 	dev->type		= ARPHRD_TUNNEL;
792 	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr);
793 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr);
794 	dev->flags		= IFF_NOARP;
795 	dev->iflink		= 0;
796 	dev->addr_len		= 4;
797 }
798 
799 static int ipip_tunnel_init(struct net_device *dev)
800 {
801 	struct net_device *tdev = NULL;
802 	struct ip_tunnel *tunnel;
803 	struct iphdr *iph;
804 
805 	tunnel = netdev_priv(dev);
806 	iph = &tunnel->parms.iph;
807 
808 	tunnel->dev = dev;
809 	strcpy(tunnel->parms.name, dev->name);
810 
811 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
812 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
813 
814 	if (iph->daddr) {
815 		struct flowi fl = { .oif = tunnel->parms.link,
816 				    .nl_u = { .ip4_u =
817 					      { .daddr = iph->daddr,
818 						.saddr = iph->saddr,
819 						.tos = RT_TOS(iph->tos) } },
820 				    .proto = IPPROTO_IPIP };
821 		struct rtable *rt;
822 		if (!ip_route_output_key(&rt, &fl)) {
823 			tdev = rt->u.dst.dev;
824 			ip_rt_put(rt);
825 		}
826 		dev->flags |= IFF_POINTOPOINT;
827 	}
828 
829 	if (!tdev && tunnel->parms.link)
830 		tdev = __dev_get_by_index(tunnel->parms.link);
831 
832 	if (tdev) {
833 		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
834 		dev->mtu = tdev->mtu - sizeof(struct iphdr);
835 	}
836 	dev->iflink = tunnel->parms.link;
837 
838 	return 0;
839 }
840 
841 static int __init ipip_fb_tunnel_init(struct net_device *dev)
842 {
843 	struct ip_tunnel *tunnel = netdev_priv(dev);
844 	struct iphdr *iph = &tunnel->parms.iph;
845 
846 	tunnel->dev = dev;
847 	strcpy(tunnel->parms.name, dev->name);
848 
849 	iph->version		= 4;
850 	iph->protocol		= IPPROTO_IPIP;
851 	iph->ihl		= 5;
852 
853 	dev_hold(dev);
854 	tunnels_wc[0]		= tunnel;
855 	return 0;
856 }
857 
858 static struct xfrm_tunnel ipip_handler = {
859 	.handler	=	ipip_rcv,
860 	.err_handler	=	ipip_err,
861 	.priority	=	1,
862 };
863 
864 static char banner[] __initdata =
865 	KERN_INFO "IPv4 over IPv4 tunneling driver\n";
866 
867 static int __init ipip_init(void)
868 {
869 	int err;
870 
871 	printk(banner);
872 
873 	if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
874 		printk(KERN_INFO "ipip init: can't register tunnel\n");
875 		return -EAGAIN;
876 	}
877 
878 	ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
879 					   "tunl0",
880 					   ipip_tunnel_setup);
881 	if (!ipip_fb_tunnel_dev) {
882 		err = -ENOMEM;
883 		goto err1;
884 	}
885 
886 	ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
887 
888 	if ((err = register_netdev(ipip_fb_tunnel_dev)))
889 		goto err2;
890  out:
891 	return err;
892  err2:
893 	free_netdev(ipip_fb_tunnel_dev);
894  err1:
895 	xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
896 	goto out;
897 }
898 
899 static void __exit ipip_destroy_tunnels(void)
900 {
901 	int prio;
902 
903 	for (prio = 1; prio < 4; prio++) {
904 		int h;
905 		for (h = 0; h < HASH_SIZE; h++) {
906 			struct ip_tunnel *t;
907 			while ((t = tunnels[prio][h]) != NULL)
908 				unregister_netdevice(t->dev);
909 		}
910 	}
911 }
912 
913 static void __exit ipip_fini(void)
914 {
915 	if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
916 		printk(KERN_INFO "ipip close: can't deregister tunnel\n");
917 
918 	rtnl_lock();
919 	ipip_destroy_tunnels();
920 	unregister_netdevice(ipip_fb_tunnel_dev);
921 	rtnl_unlock();
922 }
923 
924 module_init(ipip_init);
925 module_exit(ipip_fini);
926 MODULE_LICENSE("GPL");
927