xref: /linux/net/ipv4/ipip.c (revision 4f1933620f57145212cdbb1ac6ce099eeeb21c5a)
1 /*
2  *	Linux NET3:	IP/IP protocol decoder.
3  *
4  *	Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
5  *
6  *	Authors:
7  *		Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
8  *
9  *	Fixes:
10  *		Alan Cox	:	Merged and made usable non modular (its so tiny its silly as
11  *					a module taking up 2 pages).
12  *		Alan Cox	: 	Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13  *					to keep ip_forward happy.
14  *		Alan Cox	:	More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15  *		Kai Schulte	:	Fixed #defines for IP_FIREWALL->FIREWALL
16  *              David Woodhouse :       Perform some basic ICMP handling.
17  *                                      IPIP Routing without decapsulation.
18  *              Carlos Picoto   :       GRE over IP support
19  *		Alexey Kuznetsov:	Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20  *					I do not want to merge them together.
21  *
22  *	This program is free software; you can redistribute it and/or
23  *	modify it under the terms of the GNU General Public License
24  *	as published by the Free Software Foundation; either version
25  *	2 of the License, or (at your option) any later version.
26  *
27  */
28 
29 /* tunnel.c: an IP tunnel driver
30 
31 	The purpose of this driver is to provide an IP tunnel through
32 	which you can tunnel network traffic transparently across subnets.
33 
34 	This was written by looking at Nick Holloway's dummy driver
35 	Thanks for the great code!
36 
37 		-Sam Lantinga	(slouken@cs.ucdavis.edu)  02/01/95
38 
39 	Minor tweaks:
40 		Cleaned up the code a little and added some pre-1.3.0 tweaks.
41 		dev->hard_header/hard_header_len changed to use no headers.
42 		Comments/bracketing tweaked.
43 		Made the tunnels use dev->name not tunnel: when error reporting.
44 		Added tx_dropped stat
45 
46 		-Alan Cox	(Alan.Cox@linux.org) 21 March 95
47 
48 	Reworked:
49 		Changed to tunnel to destination gateway in addition to the
50 			tunnel's pointopoint address
51 		Almost completely rewritten
52 		Note:  There is currently no firewall or ICMP handling done.
53 
54 		-Sam Lantinga	(slouken@cs.ucdavis.edu) 02/13/96
55 
56 */
57 
58 /* Things I wish I had known when writing the tunnel driver:
59 
60 	When the tunnel_xmit() function is called, the skb contains the
61 	packet to be sent (plus a great deal of extra info), and dev
62 	contains the tunnel device that _we_ are.
63 
64 	When we are passed a packet, we are expected to fill in the
65 	source address with our source IP address.
66 
67 	What is the proper way to allocate, copy and free a buffer?
68 	After you allocate it, it is a "0 length" chunk of memory
69 	starting at zero.  If you want to add headers to the buffer
70 	later, you'll have to call "skb_reserve(skb, amount)" with
71 	the amount of memory you want reserved.  Then, you call
72 	"skb_put(skb, amount)" with the amount of space you want in
73 	the buffer.  skb_put() returns a pointer to the top (#0) of
74 	that buffer.  skb->len is set to the amount of space you have
75 	"allocated" with skb_put().  You can then write up to skb->len
76 	bytes to that buffer.  If you need more, you can call skb_put()
77 	again with the additional amount of space you need.  You can
78 	find out how much more space you can allocate by calling
79 	"skb_tailroom(skb)".
80 	Now, to add header space, call "skb_push(skb, header_len)".
81 	This creates space at the beginning of the buffer and returns
82 	a pointer to this new space.  If later you need to strip a
83 	header from a buffer, call "skb_pull(skb, header_len)".
84 	skb_headroom() will return how much space is left at the top
85 	of the buffer (before the main data).  Remember, this headroom
86 	space must be reserved before the skb_put() function is called.
87 	*/
88 
89 /*
90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
91 
92    For comments look at net/ipv4/ip_gre.c --ANK
93  */
94 
95 
96 #include <linux/capability.h>
97 #include <linux/config.h>
98 #include <linux/module.h>
99 #include <linux/types.h>
100 #include <linux/sched.h>
101 #include <linux/kernel.h>
102 #include <asm/uaccess.h>
103 #include <linux/skbuff.h>
104 #include <linux/netdevice.h>
105 #include <linux/in.h>
106 #include <linux/tcp.h>
107 #include <linux/udp.h>
108 #include <linux/if_arp.h>
109 #include <linux/mroute.h>
110 #include <linux/init.h>
111 #include <linux/netfilter_ipv4.h>
112 #include <linux/if_ether.h>
113 
114 #include <net/sock.h>
115 #include <net/ip.h>
116 #include <net/icmp.h>
117 #include <net/protocol.h>
118 #include <net/ipip.h>
119 #include <net/inet_ecn.h>
120 #include <net/xfrm.h>
121 
122 #define HASH_SIZE  16
123 #define HASH(addr) ((addr^(addr>>4))&0xF)
124 
125 static int ipip_fb_tunnel_init(struct net_device *dev);
126 static int ipip_tunnel_init(struct net_device *dev);
127 static void ipip_tunnel_setup(struct net_device *dev);
128 
129 static struct net_device *ipip_fb_tunnel_dev;
130 
131 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
132 static struct ip_tunnel *tunnels_r[HASH_SIZE];
133 static struct ip_tunnel *tunnels_l[HASH_SIZE];
134 static struct ip_tunnel *tunnels_wc[1];
135 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
136 
137 static DEFINE_RWLOCK(ipip_lock);
138 
139 static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local)
140 {
141 	unsigned h0 = HASH(remote);
142 	unsigned h1 = HASH(local);
143 	struct ip_tunnel *t;
144 
145 	for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
146 		if (local == t->parms.iph.saddr &&
147 		    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
148 			return t;
149 	}
150 	for (t = tunnels_r[h0]; t; t = t->next) {
151 		if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
152 			return t;
153 	}
154 	for (t = tunnels_l[h1]; t; t = t->next) {
155 		if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
156 			return t;
157 	}
158 	if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
159 		return t;
160 	return NULL;
161 }
162 
163 static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
164 {
165 	u32 remote = t->parms.iph.daddr;
166 	u32 local = t->parms.iph.saddr;
167 	unsigned h = 0;
168 	int prio = 0;
169 
170 	if (remote) {
171 		prio |= 2;
172 		h ^= HASH(remote);
173 	}
174 	if (local) {
175 		prio |= 1;
176 		h ^= HASH(local);
177 	}
178 	return &tunnels[prio][h];
179 }
180 
181 
182 static void ipip_tunnel_unlink(struct ip_tunnel *t)
183 {
184 	struct ip_tunnel **tp;
185 
186 	for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
187 		if (t == *tp) {
188 			write_lock_bh(&ipip_lock);
189 			*tp = t->next;
190 			write_unlock_bh(&ipip_lock);
191 			break;
192 		}
193 	}
194 }
195 
196 static void ipip_tunnel_link(struct ip_tunnel *t)
197 {
198 	struct ip_tunnel **tp = ipip_bucket(t);
199 
200 	t->next = *tp;
201 	write_lock_bh(&ipip_lock);
202 	*tp = t;
203 	write_unlock_bh(&ipip_lock);
204 }
205 
206 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
207 {
208 	u32 remote = parms->iph.daddr;
209 	u32 local = parms->iph.saddr;
210 	struct ip_tunnel *t, **tp, *nt;
211 	struct net_device *dev;
212 	unsigned h = 0;
213 	int prio = 0;
214 	char name[IFNAMSIZ];
215 
216 	if (remote) {
217 		prio |= 2;
218 		h ^= HASH(remote);
219 	}
220 	if (local) {
221 		prio |= 1;
222 		h ^= HASH(local);
223 	}
224 	for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
225 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
226 			return t;
227 	}
228 	if (!create)
229 		return NULL;
230 
231 	if (parms->name[0])
232 		strlcpy(name, parms->name, IFNAMSIZ);
233 	else {
234 		int i;
235 		for (i=1; i<100; i++) {
236 			sprintf(name, "tunl%d", i);
237 			if (__dev_get_by_name(name) == NULL)
238 				break;
239 		}
240 		if (i==100)
241 			goto failed;
242 	}
243 
244 	dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
245 	if (dev == NULL)
246 		return NULL;
247 
248 	nt = netdev_priv(dev);
249 	SET_MODULE_OWNER(dev);
250 	dev->init = ipip_tunnel_init;
251 	nt->parms = *parms;
252 
253 	if (register_netdevice(dev) < 0) {
254 		free_netdev(dev);
255 		goto failed;
256 	}
257 
258 	dev_hold(dev);
259 	ipip_tunnel_link(nt);
260 	return nt;
261 
262 failed:
263 	return NULL;
264 }
265 
266 static void ipip_tunnel_uninit(struct net_device *dev)
267 {
268 	if (dev == ipip_fb_tunnel_dev) {
269 		write_lock_bh(&ipip_lock);
270 		tunnels_wc[0] = NULL;
271 		write_unlock_bh(&ipip_lock);
272 	} else
273 		ipip_tunnel_unlink(netdev_priv(dev));
274 	dev_put(dev);
275 }
276 
277 static void ipip_err(struct sk_buff *skb, u32 info)
278 {
279 #ifndef I_WISH_WORLD_WERE_PERFECT
280 
281 /* It is not :-( All the routers (except for Linux) return only
282    8 bytes of packet payload. It means, that precise relaying of
283    ICMP in the real Internet is absolutely infeasible.
284  */
285 	struct iphdr *iph = (struct iphdr*)skb->data;
286 	int type = skb->h.icmph->type;
287 	int code = skb->h.icmph->code;
288 	struct ip_tunnel *t;
289 
290 	switch (type) {
291 	default:
292 	case ICMP_PARAMETERPROB:
293 		return;
294 
295 	case ICMP_DEST_UNREACH:
296 		switch (code) {
297 		case ICMP_SR_FAILED:
298 		case ICMP_PORT_UNREACH:
299 			/* Impossible event. */
300 			return;
301 		case ICMP_FRAG_NEEDED:
302 			/* Soft state for pmtu is maintained by IP core. */
303 			return;
304 		default:
305 			/* All others are translated to HOST_UNREACH.
306 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
307 			   I believe they are just ether pollution. --ANK
308 			 */
309 			break;
310 		}
311 		break;
312 	case ICMP_TIME_EXCEEDED:
313 		if (code != ICMP_EXC_TTL)
314 			return;
315 		break;
316 	}
317 
318 	read_lock(&ipip_lock);
319 	t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
320 	if (t == NULL || t->parms.iph.daddr == 0)
321 		goto out;
322 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
323 		goto out;
324 
325 	if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
326 		t->err_count++;
327 	else
328 		t->err_count = 1;
329 	t->err_time = jiffies;
330 out:
331 	read_unlock(&ipip_lock);
332 	return;
333 #else
334 	struct iphdr *iph = (struct iphdr*)dp;
335 	int hlen = iph->ihl<<2;
336 	struct iphdr *eiph;
337 	int type = skb->h.icmph->type;
338 	int code = skb->h.icmph->code;
339 	int rel_type = 0;
340 	int rel_code = 0;
341 	int rel_info = 0;
342 	struct sk_buff *skb2;
343 	struct flowi fl;
344 	struct rtable *rt;
345 
346 	if (len < hlen + sizeof(struct iphdr))
347 		return;
348 	eiph = (struct iphdr*)(dp + hlen);
349 
350 	switch (type) {
351 	default:
352 		return;
353 	case ICMP_PARAMETERPROB:
354 		if (skb->h.icmph->un.gateway < hlen)
355 			return;
356 
357 		/* So... This guy found something strange INSIDE encapsulated
358 		   packet. Well, he is fool, but what can we do ?
359 		 */
360 		rel_type = ICMP_PARAMETERPROB;
361 		rel_info = skb->h.icmph->un.gateway - hlen;
362 		break;
363 
364 	case ICMP_DEST_UNREACH:
365 		switch (code) {
366 		case ICMP_SR_FAILED:
367 		case ICMP_PORT_UNREACH:
368 			/* Impossible event. */
369 			return;
370 		case ICMP_FRAG_NEEDED:
371 			/* And it is the only really necessary thing :-) */
372 			rel_info = ntohs(skb->h.icmph->un.frag.mtu);
373 			if (rel_info < hlen+68)
374 				return;
375 			rel_info -= hlen;
376 			/* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
377 			if (rel_info > ntohs(eiph->tot_len))
378 				return;
379 			break;
380 		default:
381 			/* All others are translated to HOST_UNREACH.
382 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
383 			   I believe, it is just ether pollution. --ANK
384 			 */
385 			rel_type = ICMP_DEST_UNREACH;
386 			rel_code = ICMP_HOST_UNREACH;
387 			break;
388 		}
389 		break;
390 	case ICMP_TIME_EXCEEDED:
391 		if (code != ICMP_EXC_TTL)
392 			return;
393 		break;
394 	}
395 
396 	/* Prepare fake skb to feed it to icmp_send */
397 	skb2 = skb_clone(skb, GFP_ATOMIC);
398 	if (skb2 == NULL)
399 		return;
400 	dst_release(skb2->dst);
401 	skb2->dst = NULL;
402 	skb_pull(skb2, skb->data - (u8*)eiph);
403 	skb2->nh.raw = skb2->data;
404 
405 	/* Try to guess incoming interface */
406 	memset(&fl, 0, sizeof(fl));
407 	fl.fl4_daddr = eiph->saddr;
408 	fl.fl4_tos = RT_TOS(eiph->tos);
409 	fl.proto = IPPROTO_IPIP;
410 	if (ip_route_output_key(&rt, &key)) {
411 		kfree_skb(skb2);
412 		return;
413 	}
414 	skb2->dev = rt->u.dst.dev;
415 
416 	/* route "incoming" packet */
417 	if (rt->rt_flags&RTCF_LOCAL) {
418 		ip_rt_put(rt);
419 		rt = NULL;
420 		fl.fl4_daddr = eiph->daddr;
421 		fl.fl4_src = eiph->saddr;
422 		fl.fl4_tos = eiph->tos;
423 		if (ip_route_output_key(&rt, &fl) ||
424 		    rt->u.dst.dev->type != ARPHRD_TUNNEL) {
425 			ip_rt_put(rt);
426 			kfree_skb(skb2);
427 			return;
428 		}
429 	} else {
430 		ip_rt_put(rt);
431 		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
432 		    skb2->dst->dev->type != ARPHRD_TUNNEL) {
433 			kfree_skb(skb2);
434 			return;
435 		}
436 	}
437 
438 	/* change mtu on this route */
439 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
440 		if (rel_info > dst_mtu(skb2->dst)) {
441 			kfree_skb(skb2);
442 			return;
443 		}
444 		skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
445 		rel_info = htonl(rel_info);
446 	} else if (type == ICMP_TIME_EXCEEDED) {
447 		struct ip_tunnel *t = netdev_priv(skb2->dev);
448 		if (t->parms.iph.ttl) {
449 			rel_type = ICMP_DEST_UNREACH;
450 			rel_code = ICMP_HOST_UNREACH;
451 		}
452 	}
453 
454 	icmp_send(skb2, rel_type, rel_code, rel_info);
455 	kfree_skb(skb2);
456 	return;
457 #endif
458 }
459 
460 static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb)
461 {
462 	struct iphdr *inner_iph = skb->nh.iph;
463 
464 	if (INET_ECN_is_ce(outer_iph->tos))
465 		IP_ECN_set_ce(inner_iph);
466 }
467 
468 static int ipip_rcv(struct sk_buff *skb)
469 {
470 	struct iphdr *iph;
471 	struct ip_tunnel *tunnel;
472 
473 	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
474 		goto out;
475 
476 	iph = skb->nh.iph;
477 
478 	read_lock(&ipip_lock);
479 	if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
480 		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
481 			read_unlock(&ipip_lock);
482 			kfree_skb(skb);
483 			return 0;
484 		}
485 
486 		secpath_reset(skb);
487 
488 		skb->mac.raw = skb->nh.raw;
489 		skb->nh.raw = skb->data;
490 		memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
491 		skb->protocol = htons(ETH_P_IP);
492 		skb->pkt_type = PACKET_HOST;
493 
494 		tunnel->stat.rx_packets++;
495 		tunnel->stat.rx_bytes += skb->len;
496 		skb->dev = tunnel->dev;
497 		dst_release(skb->dst);
498 		skb->dst = NULL;
499 		nf_reset(skb);
500 		ipip_ecn_decapsulate(iph, skb);
501 		netif_rx(skb);
502 		read_unlock(&ipip_lock);
503 		return 0;
504 	}
505 	read_unlock(&ipip_lock);
506 
507 out:
508 	return -1;
509 }
510 
511 /*
512  *	This function assumes it is being called from dev_queue_xmit()
513  *	and that skb is filled properly by that function.
514  */
515 
516 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
517 {
518 	struct ip_tunnel *tunnel = netdev_priv(dev);
519 	struct net_device_stats *stats = &tunnel->stat;
520 	struct iphdr  *tiph = &tunnel->parms.iph;
521 	u8     tos = tunnel->parms.iph.tos;
522 	u16    df = tiph->frag_off;
523 	struct rtable *rt;     			/* Route to the other host */
524 	struct net_device *tdev;			/* Device to other host */
525 	struct iphdr  *old_iph = skb->nh.iph;
526 	struct iphdr  *iph;			/* Our new IP header */
527 	int    max_headroom;			/* The extra header space needed */
528 	u32    dst = tiph->daddr;
529 	int    mtu;
530 
531 	if (tunnel->recursion++) {
532 		tunnel->stat.collisions++;
533 		goto tx_error;
534 	}
535 
536 	if (skb->protocol != htons(ETH_P_IP))
537 		goto tx_error;
538 
539 	if (tos&1)
540 		tos = old_iph->tos;
541 
542 	if (!dst) {
543 		/* NBMA tunnel */
544 		if ((rt = (struct rtable*)skb->dst) == NULL) {
545 			tunnel->stat.tx_fifo_errors++;
546 			goto tx_error;
547 		}
548 		if ((dst = rt->rt_gateway) == 0)
549 			goto tx_error_icmp;
550 	}
551 
552 	{
553 		struct flowi fl = { .oif = tunnel->parms.link,
554 				    .nl_u = { .ip4_u =
555 					      { .daddr = dst,
556 						.saddr = tiph->saddr,
557 						.tos = RT_TOS(tos) } },
558 				    .proto = IPPROTO_IPIP };
559 		if (ip_route_output_key(&rt, &fl)) {
560 			tunnel->stat.tx_carrier_errors++;
561 			goto tx_error_icmp;
562 		}
563 	}
564 	tdev = rt->u.dst.dev;
565 
566 	if (tdev == dev) {
567 		ip_rt_put(rt);
568 		tunnel->stat.collisions++;
569 		goto tx_error;
570 	}
571 
572 	if (tiph->frag_off)
573 		mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
574 	else
575 		mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
576 
577 	if (mtu < 68) {
578 		tunnel->stat.collisions++;
579 		ip_rt_put(rt);
580 		goto tx_error;
581 	}
582 	if (skb->dst)
583 		skb->dst->ops->update_pmtu(skb->dst, mtu);
584 
585 	df |= (old_iph->frag_off&htons(IP_DF));
586 
587 	if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
588 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
589 		ip_rt_put(rt);
590 		goto tx_error;
591 	}
592 
593 	if (tunnel->err_count > 0) {
594 		if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
595 			tunnel->err_count--;
596 			dst_link_failure(skb);
597 		} else
598 			tunnel->err_count = 0;
599 	}
600 
601 	/*
602 	 * Okay, now see if we can stuff it in the buffer as-is.
603 	 */
604 	max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
605 
606 	if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
607 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
608 		if (!new_skb) {
609 			ip_rt_put(rt);
610   			stats->tx_dropped++;
611 			dev_kfree_skb(skb);
612 			tunnel->recursion--;
613 			return 0;
614 		}
615 		if (skb->sk)
616 			skb_set_owner_w(new_skb, skb->sk);
617 		dev_kfree_skb(skb);
618 		skb = new_skb;
619 		old_iph = skb->nh.iph;
620 	}
621 
622 	skb->h.raw = skb->nh.raw;
623 	skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
624 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
625 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
626 			      IPSKB_REROUTED);
627 	dst_release(skb->dst);
628 	skb->dst = &rt->u.dst;
629 
630 	/*
631 	 *	Push down and install the IPIP header.
632 	 */
633 
634 	iph 			=	skb->nh.iph;
635 	iph->version		=	4;
636 	iph->ihl		=	sizeof(struct iphdr)>>2;
637 	iph->frag_off		=	df;
638 	iph->protocol		=	IPPROTO_IPIP;
639 	iph->tos		=	INET_ECN_encapsulate(tos, old_iph->tos);
640 	iph->daddr		=	rt->rt_dst;
641 	iph->saddr		=	rt->rt_src;
642 
643 	if ((iph->ttl = tiph->ttl) == 0)
644 		iph->ttl	=	old_iph->ttl;
645 
646 	nf_reset(skb);
647 
648 	IPTUNNEL_XMIT();
649 	tunnel->recursion--;
650 	return 0;
651 
652 tx_error_icmp:
653 	dst_link_failure(skb);
654 tx_error:
655 	stats->tx_errors++;
656 	dev_kfree_skb(skb);
657 	tunnel->recursion--;
658 	return 0;
659 }
660 
661 static int
662 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
663 {
664 	int err = 0;
665 	struct ip_tunnel_parm p;
666 	struct ip_tunnel *t;
667 
668 	switch (cmd) {
669 	case SIOCGETTUNNEL:
670 		t = NULL;
671 		if (dev == ipip_fb_tunnel_dev) {
672 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
673 				err = -EFAULT;
674 				break;
675 			}
676 			t = ipip_tunnel_locate(&p, 0);
677 		}
678 		if (t == NULL)
679 			t = netdev_priv(dev);
680 		memcpy(&p, &t->parms, sizeof(p));
681 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
682 			err = -EFAULT;
683 		break;
684 
685 	case SIOCADDTUNNEL:
686 	case SIOCCHGTUNNEL:
687 		err = -EPERM;
688 		if (!capable(CAP_NET_ADMIN))
689 			goto done;
690 
691 		err = -EFAULT;
692 		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
693 			goto done;
694 
695 		err = -EINVAL;
696 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
697 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
698 			goto done;
699 		if (p.iph.ttl)
700 			p.iph.frag_off |= htons(IP_DF);
701 
702 		t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
703 
704 		if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
705 			if (t != NULL) {
706 				if (t->dev != dev) {
707 					err = -EEXIST;
708 					break;
709 				}
710 			} else {
711 				if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
712 				    (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
713 					err = -EINVAL;
714 					break;
715 				}
716 				t = netdev_priv(dev);
717 				ipip_tunnel_unlink(t);
718 				t->parms.iph.saddr = p.iph.saddr;
719 				t->parms.iph.daddr = p.iph.daddr;
720 				memcpy(dev->dev_addr, &p.iph.saddr, 4);
721 				memcpy(dev->broadcast, &p.iph.daddr, 4);
722 				ipip_tunnel_link(t);
723 				netdev_state_change(dev);
724 			}
725 		}
726 
727 		if (t) {
728 			err = 0;
729 			if (cmd == SIOCCHGTUNNEL) {
730 				t->parms.iph.ttl = p.iph.ttl;
731 				t->parms.iph.tos = p.iph.tos;
732 				t->parms.iph.frag_off = p.iph.frag_off;
733 			}
734 			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
735 				err = -EFAULT;
736 		} else
737 			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
738 		break;
739 
740 	case SIOCDELTUNNEL:
741 		err = -EPERM;
742 		if (!capable(CAP_NET_ADMIN))
743 			goto done;
744 
745 		if (dev == ipip_fb_tunnel_dev) {
746 			err = -EFAULT;
747 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
748 				goto done;
749 			err = -ENOENT;
750 			if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
751 				goto done;
752 			err = -EPERM;
753 			if (t->dev == ipip_fb_tunnel_dev)
754 				goto done;
755 			dev = t->dev;
756 		}
757 		err = unregister_netdevice(dev);
758 		break;
759 
760 	default:
761 		err = -EINVAL;
762 	}
763 
764 done:
765 	return err;
766 }
767 
768 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
769 {
770 	return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
771 }
772 
773 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
774 {
775 	if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
776 		return -EINVAL;
777 	dev->mtu = new_mtu;
778 	return 0;
779 }
780 
781 static void ipip_tunnel_setup(struct net_device *dev)
782 {
783 	SET_MODULE_OWNER(dev);
784 	dev->uninit		= ipip_tunnel_uninit;
785 	dev->hard_start_xmit	= ipip_tunnel_xmit;
786 	dev->get_stats		= ipip_tunnel_get_stats;
787 	dev->do_ioctl		= ipip_tunnel_ioctl;
788 	dev->change_mtu		= ipip_tunnel_change_mtu;
789 	dev->destructor		= free_netdev;
790 
791 	dev->type		= ARPHRD_TUNNEL;
792 	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr);
793 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr);
794 	dev->flags		= IFF_NOARP;
795 	dev->iflink		= 0;
796 	dev->addr_len		= 4;
797 }
798 
799 static int ipip_tunnel_init(struct net_device *dev)
800 {
801 	struct net_device *tdev = NULL;
802 	struct ip_tunnel *tunnel;
803 	struct iphdr *iph;
804 
805 	tunnel = netdev_priv(dev);
806 	iph = &tunnel->parms.iph;
807 
808 	tunnel->dev = dev;
809 	strcpy(tunnel->parms.name, dev->name);
810 
811 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
812 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
813 
814 	if (iph->daddr) {
815 		struct flowi fl = { .oif = tunnel->parms.link,
816 				    .nl_u = { .ip4_u =
817 					      { .daddr = iph->daddr,
818 						.saddr = iph->saddr,
819 						.tos = RT_TOS(iph->tos) } },
820 				    .proto = IPPROTO_IPIP };
821 		struct rtable *rt;
822 		if (!ip_route_output_key(&rt, &fl)) {
823 			tdev = rt->u.dst.dev;
824 			ip_rt_put(rt);
825 		}
826 		dev->flags |= IFF_POINTOPOINT;
827 	}
828 
829 	if (!tdev && tunnel->parms.link)
830 		tdev = __dev_get_by_index(tunnel->parms.link);
831 
832 	if (tdev) {
833 		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
834 		dev->mtu = tdev->mtu - sizeof(struct iphdr);
835 	}
836 	dev->iflink = tunnel->parms.link;
837 
838 	return 0;
839 }
840 
841 static int __init ipip_fb_tunnel_init(struct net_device *dev)
842 {
843 	struct ip_tunnel *tunnel = netdev_priv(dev);
844 	struct iphdr *iph = &tunnel->parms.iph;
845 
846 	tunnel->dev = dev;
847 	strcpy(tunnel->parms.name, dev->name);
848 
849 	iph->version		= 4;
850 	iph->protocol		= IPPROTO_IPIP;
851 	iph->ihl		= 5;
852 
853 	dev_hold(dev);
854 	tunnels_wc[0]		= tunnel;
855 	return 0;
856 }
857 
858 #ifdef CONFIG_INET_TUNNEL
859 static struct xfrm_tunnel ipip_handler = {
860 	.handler	=	ipip_rcv,
861 	.err_handler	=	ipip_err,
862 };
863 
864 static inline int ipip_register(void)
865 {
866 	return xfrm4_tunnel_register(&ipip_handler);
867 }
868 
869 static inline int ipip_unregister(void)
870 {
871 	return xfrm4_tunnel_deregister(&ipip_handler);
872 }
873 #else
874 static struct net_protocol ipip_protocol = {
875 	.handler	=	ipip_rcv,
876 	.err_handler	=	ipip_err,
877 	.no_policy	=	1,
878 };
879 
880 static inline int ipip_register(void)
881 {
882 	return inet_add_protocol(&ipip_protocol, IPPROTO_IPIP);
883 }
884 
885 static inline int ipip_unregister(void)
886 {
887 	return inet_del_protocol(&ipip_protocol, IPPROTO_IPIP);
888 }
889 #endif
890 
891 static char banner[] __initdata =
892 	KERN_INFO "IPv4 over IPv4 tunneling driver\n";
893 
894 static int __init ipip_init(void)
895 {
896 	int err;
897 
898 	printk(banner);
899 
900 	if (ipip_register() < 0) {
901 		printk(KERN_INFO "ipip init: can't register tunnel\n");
902 		return -EAGAIN;
903 	}
904 
905 	ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
906 					   "tunl0",
907 					   ipip_tunnel_setup);
908 	if (!ipip_fb_tunnel_dev) {
909 		err = -ENOMEM;
910 		goto err1;
911 	}
912 
913 	ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
914 
915 	if ((err = register_netdev(ipip_fb_tunnel_dev)))
916 		goto err2;
917  out:
918 	return err;
919  err2:
920 	free_netdev(ipip_fb_tunnel_dev);
921  err1:
922 	ipip_unregister();
923 	goto out;
924 }
925 
926 static void __exit ipip_destroy_tunnels(void)
927 {
928 	int prio;
929 
930 	for (prio = 1; prio < 4; prio++) {
931 		int h;
932 		for (h = 0; h < HASH_SIZE; h++) {
933 			struct ip_tunnel *t;
934 			while ((t = tunnels[prio][h]) != NULL)
935 				unregister_netdevice(t->dev);
936 		}
937 	}
938 }
939 
940 static void __exit ipip_fini(void)
941 {
942 	if (ipip_unregister() < 0)
943 		printk(KERN_INFO "ipip close: can't deregister tunnel\n");
944 
945 	rtnl_lock();
946 	ipip_destroy_tunnels();
947 	unregister_netdevice(ipip_fb_tunnel_dev);
948 	rtnl_unlock();
949 }
950 
951 module_init(ipip_init);
952 module_exit(ipip_fini);
953 MODULE_LICENSE("GPL");
954