xref: /linux/net/ipv4/ipip.c (revision 14b42963f64b98ab61fa9723c03d71aa5ef4f862)
1 /*
2  *	Linux NET3:	IP/IP protocol decoder.
3  *
4  *	Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
5  *
6  *	Authors:
7  *		Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
8  *
9  *	Fixes:
10  *		Alan Cox	:	Merged and made usable non modular (its so tiny its silly as
11  *					a module taking up 2 pages).
12  *		Alan Cox	: 	Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13  *					to keep ip_forward happy.
14  *		Alan Cox	:	More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15  *		Kai Schulte	:	Fixed #defines for IP_FIREWALL->FIREWALL
16  *              David Woodhouse :       Perform some basic ICMP handling.
17  *                                      IPIP Routing without decapsulation.
18  *              Carlos Picoto   :       GRE over IP support
19  *		Alexey Kuznetsov:	Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20  *					I do not want to merge them together.
21  *
22  *	This program is free software; you can redistribute it and/or
23  *	modify it under the terms of the GNU General Public License
24  *	as published by the Free Software Foundation; either version
25  *	2 of the License, or (at your option) any later version.
26  *
27  */
28 
29 /* tunnel.c: an IP tunnel driver
30 
31 	The purpose of this driver is to provide an IP tunnel through
32 	which you can tunnel network traffic transparently across subnets.
33 
34 	This was written by looking at Nick Holloway's dummy driver
35 	Thanks for the great code!
36 
37 		-Sam Lantinga	(slouken@cs.ucdavis.edu)  02/01/95
38 
39 	Minor tweaks:
40 		Cleaned up the code a little and added some pre-1.3.0 tweaks.
41 		dev->hard_header/hard_header_len changed to use no headers.
42 		Comments/bracketing tweaked.
43 		Made the tunnels use dev->name not tunnel: when error reporting.
44 		Added tx_dropped stat
45 
46 		-Alan Cox	(Alan.Cox@linux.org) 21 March 95
47 
48 	Reworked:
49 		Changed to tunnel to destination gateway in addition to the
50 			tunnel's pointopoint address
51 		Almost completely rewritten
52 		Note:  There is currently no firewall or ICMP handling done.
53 
54 		-Sam Lantinga	(slouken@cs.ucdavis.edu) 02/13/96
55 
56 */
57 
58 /* Things I wish I had known when writing the tunnel driver:
59 
60 	When the tunnel_xmit() function is called, the skb contains the
61 	packet to be sent (plus a great deal of extra info), and dev
62 	contains the tunnel device that _we_ are.
63 
64 	When we are passed a packet, we are expected to fill in the
65 	source address with our source IP address.
66 
67 	What is the proper way to allocate, copy and free a buffer?
68 	After you allocate it, it is a "0 length" chunk of memory
69 	starting at zero.  If you want to add headers to the buffer
70 	later, you'll have to call "skb_reserve(skb, amount)" with
71 	the amount of memory you want reserved.  Then, you call
72 	"skb_put(skb, amount)" with the amount of space you want in
73 	the buffer.  skb_put() returns a pointer to the top (#0) of
74 	that buffer.  skb->len is set to the amount of space you have
75 	"allocated" with skb_put().  You can then write up to skb->len
76 	bytes to that buffer.  If you need more, you can call skb_put()
77 	again with the additional amount of space you need.  You can
78 	find out how much more space you can allocate by calling
79 	"skb_tailroom(skb)".
80 	Now, to add header space, call "skb_push(skb, header_len)".
81 	This creates space at the beginning of the buffer and returns
82 	a pointer to this new space.  If later you need to strip a
83 	header from a buffer, call "skb_pull(skb, header_len)".
84 	skb_headroom() will return how much space is left at the top
85 	of the buffer (before the main data).  Remember, this headroom
86 	space must be reserved before the skb_put() function is called.
87 	*/
88 
89 /*
90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
91 
92    For comments look at net/ipv4/ip_gre.c --ANK
93  */
94 
95 
96 #include <linux/capability.h>
97 #include <linux/module.h>
98 #include <linux/types.h>
99 #include <linux/sched.h>
100 #include <linux/kernel.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <linux/in.h>
105 #include <linux/tcp.h>
106 #include <linux/udp.h>
107 #include <linux/if_arp.h>
108 #include <linux/mroute.h>
109 #include <linux/init.h>
110 #include <linux/netfilter_ipv4.h>
111 #include <linux/if_ether.h>
112 
113 #include <net/sock.h>
114 #include <net/ip.h>
115 #include <net/icmp.h>
116 #include <net/ipip.h>
117 #include <net/inet_ecn.h>
118 #include <net/xfrm.h>
119 
120 #define HASH_SIZE  16
121 #define HASH(addr) ((addr^(addr>>4))&0xF)
122 
123 static int ipip_fb_tunnel_init(struct net_device *dev);
124 static int ipip_tunnel_init(struct net_device *dev);
125 static void ipip_tunnel_setup(struct net_device *dev);
126 
127 static struct net_device *ipip_fb_tunnel_dev;
128 
129 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
130 static struct ip_tunnel *tunnels_r[HASH_SIZE];
131 static struct ip_tunnel *tunnels_l[HASH_SIZE];
132 static struct ip_tunnel *tunnels_wc[1];
133 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
134 
135 static DEFINE_RWLOCK(ipip_lock);
136 
137 static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local)
138 {
139 	unsigned h0 = HASH(remote);
140 	unsigned h1 = HASH(local);
141 	struct ip_tunnel *t;
142 
143 	for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
144 		if (local == t->parms.iph.saddr &&
145 		    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
146 			return t;
147 	}
148 	for (t = tunnels_r[h0]; t; t = t->next) {
149 		if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
150 			return t;
151 	}
152 	for (t = tunnels_l[h1]; t; t = t->next) {
153 		if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
154 			return t;
155 	}
156 	if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
157 		return t;
158 	return NULL;
159 }
160 
161 static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
162 {
163 	u32 remote = t->parms.iph.daddr;
164 	u32 local = t->parms.iph.saddr;
165 	unsigned h = 0;
166 	int prio = 0;
167 
168 	if (remote) {
169 		prio |= 2;
170 		h ^= HASH(remote);
171 	}
172 	if (local) {
173 		prio |= 1;
174 		h ^= HASH(local);
175 	}
176 	return &tunnels[prio][h];
177 }
178 
179 
180 static void ipip_tunnel_unlink(struct ip_tunnel *t)
181 {
182 	struct ip_tunnel **tp;
183 
184 	for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
185 		if (t == *tp) {
186 			write_lock_bh(&ipip_lock);
187 			*tp = t->next;
188 			write_unlock_bh(&ipip_lock);
189 			break;
190 		}
191 	}
192 }
193 
194 static void ipip_tunnel_link(struct ip_tunnel *t)
195 {
196 	struct ip_tunnel **tp = ipip_bucket(t);
197 
198 	t->next = *tp;
199 	write_lock_bh(&ipip_lock);
200 	*tp = t;
201 	write_unlock_bh(&ipip_lock);
202 }
203 
204 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
205 {
206 	u32 remote = parms->iph.daddr;
207 	u32 local = parms->iph.saddr;
208 	struct ip_tunnel *t, **tp, *nt;
209 	struct net_device *dev;
210 	unsigned h = 0;
211 	int prio = 0;
212 	char name[IFNAMSIZ];
213 
214 	if (remote) {
215 		prio |= 2;
216 		h ^= HASH(remote);
217 	}
218 	if (local) {
219 		prio |= 1;
220 		h ^= HASH(local);
221 	}
222 	for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
223 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
224 			return t;
225 	}
226 	if (!create)
227 		return NULL;
228 
229 	if (parms->name[0])
230 		strlcpy(name, parms->name, IFNAMSIZ);
231 	else {
232 		int i;
233 		for (i=1; i<100; i++) {
234 			sprintf(name, "tunl%d", i);
235 			if (__dev_get_by_name(name) == NULL)
236 				break;
237 		}
238 		if (i==100)
239 			goto failed;
240 	}
241 
242 	dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
243 	if (dev == NULL)
244 		return NULL;
245 
246 	nt = netdev_priv(dev);
247 	SET_MODULE_OWNER(dev);
248 	dev->init = ipip_tunnel_init;
249 	nt->parms = *parms;
250 
251 	if (register_netdevice(dev) < 0) {
252 		free_netdev(dev);
253 		goto failed;
254 	}
255 
256 	dev_hold(dev);
257 	ipip_tunnel_link(nt);
258 	return nt;
259 
260 failed:
261 	return NULL;
262 }
263 
264 static void ipip_tunnel_uninit(struct net_device *dev)
265 {
266 	if (dev == ipip_fb_tunnel_dev) {
267 		write_lock_bh(&ipip_lock);
268 		tunnels_wc[0] = NULL;
269 		write_unlock_bh(&ipip_lock);
270 	} else
271 		ipip_tunnel_unlink(netdev_priv(dev));
272 	dev_put(dev);
273 }
274 
275 static int ipip_err(struct sk_buff *skb, u32 info)
276 {
277 #ifndef I_WISH_WORLD_WERE_PERFECT
278 
279 /* It is not :-( All the routers (except for Linux) return only
280    8 bytes of packet payload. It means, that precise relaying of
281    ICMP in the real Internet is absolutely infeasible.
282  */
283 	struct iphdr *iph = (struct iphdr*)skb->data;
284 	int type = skb->h.icmph->type;
285 	int code = skb->h.icmph->code;
286 	struct ip_tunnel *t;
287 	int err;
288 
289 	switch (type) {
290 	default:
291 	case ICMP_PARAMETERPROB:
292 		return 0;
293 
294 	case ICMP_DEST_UNREACH:
295 		switch (code) {
296 		case ICMP_SR_FAILED:
297 		case ICMP_PORT_UNREACH:
298 			/* Impossible event. */
299 			return 0;
300 		case ICMP_FRAG_NEEDED:
301 			/* Soft state for pmtu is maintained by IP core. */
302 			return 0;
303 		default:
304 			/* All others are translated to HOST_UNREACH.
305 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
306 			   I believe they are just ether pollution. --ANK
307 			 */
308 			break;
309 		}
310 		break;
311 	case ICMP_TIME_EXCEEDED:
312 		if (code != ICMP_EXC_TTL)
313 			return 0;
314 		break;
315 	}
316 
317 	err = -ENOENT;
318 
319 	read_lock(&ipip_lock);
320 	t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
321 	if (t == NULL || t->parms.iph.daddr == 0)
322 		goto out;
323 
324 	err = 0;
325 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
326 		goto out;
327 
328 	if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
329 		t->err_count++;
330 	else
331 		t->err_count = 1;
332 	t->err_time = jiffies;
333 out:
334 	read_unlock(&ipip_lock);
335 	return err;
336 #else
337 	struct iphdr *iph = (struct iphdr*)dp;
338 	int hlen = iph->ihl<<2;
339 	struct iphdr *eiph;
340 	int type = skb->h.icmph->type;
341 	int code = skb->h.icmph->code;
342 	int rel_type = 0;
343 	int rel_code = 0;
344 	int rel_info = 0;
345 	struct sk_buff *skb2;
346 	struct flowi fl;
347 	struct rtable *rt;
348 
349 	if (len < hlen + sizeof(struct iphdr))
350 		return 0;
351 	eiph = (struct iphdr*)(dp + hlen);
352 
353 	switch (type) {
354 	default:
355 		return 0;
356 	case ICMP_PARAMETERPROB:
357 		if (skb->h.icmph->un.gateway < hlen)
358 			return 0;
359 
360 		/* So... This guy found something strange INSIDE encapsulated
361 		   packet. Well, he is fool, but what can we do ?
362 		 */
363 		rel_type = ICMP_PARAMETERPROB;
364 		rel_info = skb->h.icmph->un.gateway - hlen;
365 		break;
366 
367 	case ICMP_DEST_UNREACH:
368 		switch (code) {
369 		case ICMP_SR_FAILED:
370 		case ICMP_PORT_UNREACH:
371 			/* Impossible event. */
372 			return 0;
373 		case ICMP_FRAG_NEEDED:
374 			/* And it is the only really necessary thing :-) */
375 			rel_info = ntohs(skb->h.icmph->un.frag.mtu);
376 			if (rel_info < hlen+68)
377 				return 0;
378 			rel_info -= hlen;
379 			/* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
380 			if (rel_info > ntohs(eiph->tot_len))
381 				return 0;
382 			break;
383 		default:
384 			/* All others are translated to HOST_UNREACH.
385 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
386 			   I believe, it is just ether pollution. --ANK
387 			 */
388 			rel_type = ICMP_DEST_UNREACH;
389 			rel_code = ICMP_HOST_UNREACH;
390 			break;
391 		}
392 		break;
393 	case ICMP_TIME_EXCEEDED:
394 		if (code != ICMP_EXC_TTL)
395 			return 0;
396 		break;
397 	}
398 
399 	/* Prepare fake skb to feed it to icmp_send */
400 	skb2 = skb_clone(skb, GFP_ATOMIC);
401 	if (skb2 == NULL)
402 		return 0;
403 	dst_release(skb2->dst);
404 	skb2->dst = NULL;
405 	skb_pull(skb2, skb->data - (u8*)eiph);
406 	skb2->nh.raw = skb2->data;
407 
408 	/* Try to guess incoming interface */
409 	memset(&fl, 0, sizeof(fl));
410 	fl.fl4_daddr = eiph->saddr;
411 	fl.fl4_tos = RT_TOS(eiph->tos);
412 	fl.proto = IPPROTO_IPIP;
413 	if (ip_route_output_key(&rt, &key)) {
414 		kfree_skb(skb2);
415 		return 0;
416 	}
417 	skb2->dev = rt->u.dst.dev;
418 
419 	/* route "incoming" packet */
420 	if (rt->rt_flags&RTCF_LOCAL) {
421 		ip_rt_put(rt);
422 		rt = NULL;
423 		fl.fl4_daddr = eiph->daddr;
424 		fl.fl4_src = eiph->saddr;
425 		fl.fl4_tos = eiph->tos;
426 		if (ip_route_output_key(&rt, &fl) ||
427 		    rt->u.dst.dev->type != ARPHRD_TUNNEL) {
428 			ip_rt_put(rt);
429 			kfree_skb(skb2);
430 			return 0;
431 		}
432 	} else {
433 		ip_rt_put(rt);
434 		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
435 		    skb2->dst->dev->type != ARPHRD_TUNNEL) {
436 			kfree_skb(skb2);
437 			return 0;
438 		}
439 	}
440 
441 	/* change mtu on this route */
442 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
443 		if (rel_info > dst_mtu(skb2->dst)) {
444 			kfree_skb(skb2);
445 			return 0;
446 		}
447 		skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
448 		rel_info = htonl(rel_info);
449 	} else if (type == ICMP_TIME_EXCEEDED) {
450 		struct ip_tunnel *t = netdev_priv(skb2->dev);
451 		if (t->parms.iph.ttl) {
452 			rel_type = ICMP_DEST_UNREACH;
453 			rel_code = ICMP_HOST_UNREACH;
454 		}
455 	}
456 
457 	icmp_send(skb2, rel_type, rel_code, rel_info);
458 	kfree_skb(skb2);
459 	return 0;
460 #endif
461 }
462 
463 static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb)
464 {
465 	struct iphdr *inner_iph = skb->nh.iph;
466 
467 	if (INET_ECN_is_ce(outer_iph->tos))
468 		IP_ECN_set_ce(inner_iph);
469 }
470 
471 static int ipip_rcv(struct sk_buff *skb)
472 {
473 	struct iphdr *iph;
474 	struct ip_tunnel *tunnel;
475 
476 	iph = skb->nh.iph;
477 
478 	read_lock(&ipip_lock);
479 	if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
480 		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
481 			read_unlock(&ipip_lock);
482 			kfree_skb(skb);
483 			return 0;
484 		}
485 
486 		secpath_reset(skb);
487 
488 		skb->mac.raw = skb->nh.raw;
489 		skb->nh.raw = skb->data;
490 		memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
491 		skb->protocol = htons(ETH_P_IP);
492 		skb->pkt_type = PACKET_HOST;
493 
494 		tunnel->stat.rx_packets++;
495 		tunnel->stat.rx_bytes += skb->len;
496 		skb->dev = tunnel->dev;
497 		dst_release(skb->dst);
498 		skb->dst = NULL;
499 		nf_reset(skb);
500 		ipip_ecn_decapsulate(iph, skb);
501 		netif_rx(skb);
502 		read_unlock(&ipip_lock);
503 		return 0;
504 	}
505 	read_unlock(&ipip_lock);
506 
507 	return -1;
508 }
509 
510 /*
511  *	This function assumes it is being called from dev_queue_xmit()
512  *	and that skb is filled properly by that function.
513  */
514 
515 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
516 {
517 	struct ip_tunnel *tunnel = netdev_priv(dev);
518 	struct net_device_stats *stats = &tunnel->stat;
519 	struct iphdr  *tiph = &tunnel->parms.iph;
520 	u8     tos = tunnel->parms.iph.tos;
521 	u16    df = tiph->frag_off;
522 	struct rtable *rt;     			/* Route to the other host */
523 	struct net_device *tdev;			/* Device to other host */
524 	struct iphdr  *old_iph = skb->nh.iph;
525 	struct iphdr  *iph;			/* Our new IP header */
526 	int    max_headroom;			/* The extra header space needed */
527 	u32    dst = tiph->daddr;
528 	int    mtu;
529 
530 	if (tunnel->recursion++) {
531 		tunnel->stat.collisions++;
532 		goto tx_error;
533 	}
534 
535 	if (skb->protocol != htons(ETH_P_IP))
536 		goto tx_error;
537 
538 	if (tos&1)
539 		tos = old_iph->tos;
540 
541 	if (!dst) {
542 		/* NBMA tunnel */
543 		if ((rt = (struct rtable*)skb->dst) == NULL) {
544 			tunnel->stat.tx_fifo_errors++;
545 			goto tx_error;
546 		}
547 		if ((dst = rt->rt_gateway) == 0)
548 			goto tx_error_icmp;
549 	}
550 
551 	{
552 		struct flowi fl = { .oif = tunnel->parms.link,
553 				    .nl_u = { .ip4_u =
554 					      { .daddr = dst,
555 						.saddr = tiph->saddr,
556 						.tos = RT_TOS(tos) } },
557 				    .proto = IPPROTO_IPIP };
558 		if (ip_route_output_key(&rt, &fl)) {
559 			tunnel->stat.tx_carrier_errors++;
560 			goto tx_error_icmp;
561 		}
562 	}
563 	tdev = rt->u.dst.dev;
564 
565 	if (tdev == dev) {
566 		ip_rt_put(rt);
567 		tunnel->stat.collisions++;
568 		goto tx_error;
569 	}
570 
571 	if (tiph->frag_off)
572 		mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
573 	else
574 		mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
575 
576 	if (mtu < 68) {
577 		tunnel->stat.collisions++;
578 		ip_rt_put(rt);
579 		goto tx_error;
580 	}
581 	if (skb->dst)
582 		skb->dst->ops->update_pmtu(skb->dst, mtu);
583 
584 	df |= (old_iph->frag_off&htons(IP_DF));
585 
586 	if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
587 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
588 		ip_rt_put(rt);
589 		goto tx_error;
590 	}
591 
592 	if (tunnel->err_count > 0) {
593 		if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
594 			tunnel->err_count--;
595 			dst_link_failure(skb);
596 		} else
597 			tunnel->err_count = 0;
598 	}
599 
600 	/*
601 	 * Okay, now see if we can stuff it in the buffer as-is.
602 	 */
603 	max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
604 
605 	if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
606 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
607 		if (!new_skb) {
608 			ip_rt_put(rt);
609   			stats->tx_dropped++;
610 			dev_kfree_skb(skb);
611 			tunnel->recursion--;
612 			return 0;
613 		}
614 		if (skb->sk)
615 			skb_set_owner_w(new_skb, skb->sk);
616 		dev_kfree_skb(skb);
617 		skb = new_skb;
618 		old_iph = skb->nh.iph;
619 	}
620 
621 	skb->h.raw = skb->nh.raw;
622 	skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
623 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
624 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
625 			      IPSKB_REROUTED);
626 	dst_release(skb->dst);
627 	skb->dst = &rt->u.dst;
628 
629 	/*
630 	 *	Push down and install the IPIP header.
631 	 */
632 
633 	iph 			=	skb->nh.iph;
634 	iph->version		=	4;
635 	iph->ihl		=	sizeof(struct iphdr)>>2;
636 	iph->frag_off		=	df;
637 	iph->protocol		=	IPPROTO_IPIP;
638 	iph->tos		=	INET_ECN_encapsulate(tos, old_iph->tos);
639 	iph->daddr		=	rt->rt_dst;
640 	iph->saddr		=	rt->rt_src;
641 
642 	if ((iph->ttl = tiph->ttl) == 0)
643 		iph->ttl	=	old_iph->ttl;
644 
645 	nf_reset(skb);
646 
647 	IPTUNNEL_XMIT();
648 	tunnel->recursion--;
649 	return 0;
650 
651 tx_error_icmp:
652 	dst_link_failure(skb);
653 tx_error:
654 	stats->tx_errors++;
655 	dev_kfree_skb(skb);
656 	tunnel->recursion--;
657 	return 0;
658 }
659 
660 static int
661 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
662 {
663 	int err = 0;
664 	struct ip_tunnel_parm p;
665 	struct ip_tunnel *t;
666 
667 	switch (cmd) {
668 	case SIOCGETTUNNEL:
669 		t = NULL;
670 		if (dev == ipip_fb_tunnel_dev) {
671 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
672 				err = -EFAULT;
673 				break;
674 			}
675 			t = ipip_tunnel_locate(&p, 0);
676 		}
677 		if (t == NULL)
678 			t = netdev_priv(dev);
679 		memcpy(&p, &t->parms, sizeof(p));
680 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
681 			err = -EFAULT;
682 		break;
683 
684 	case SIOCADDTUNNEL:
685 	case SIOCCHGTUNNEL:
686 		err = -EPERM;
687 		if (!capable(CAP_NET_ADMIN))
688 			goto done;
689 
690 		err = -EFAULT;
691 		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
692 			goto done;
693 
694 		err = -EINVAL;
695 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
696 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
697 			goto done;
698 		if (p.iph.ttl)
699 			p.iph.frag_off |= htons(IP_DF);
700 
701 		t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
702 
703 		if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
704 			if (t != NULL) {
705 				if (t->dev != dev) {
706 					err = -EEXIST;
707 					break;
708 				}
709 			} else {
710 				if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
711 				    (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
712 					err = -EINVAL;
713 					break;
714 				}
715 				t = netdev_priv(dev);
716 				ipip_tunnel_unlink(t);
717 				t->parms.iph.saddr = p.iph.saddr;
718 				t->parms.iph.daddr = p.iph.daddr;
719 				memcpy(dev->dev_addr, &p.iph.saddr, 4);
720 				memcpy(dev->broadcast, &p.iph.daddr, 4);
721 				ipip_tunnel_link(t);
722 				netdev_state_change(dev);
723 			}
724 		}
725 
726 		if (t) {
727 			err = 0;
728 			if (cmd == SIOCCHGTUNNEL) {
729 				t->parms.iph.ttl = p.iph.ttl;
730 				t->parms.iph.tos = p.iph.tos;
731 				t->parms.iph.frag_off = p.iph.frag_off;
732 			}
733 			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
734 				err = -EFAULT;
735 		} else
736 			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
737 		break;
738 
739 	case SIOCDELTUNNEL:
740 		err = -EPERM;
741 		if (!capable(CAP_NET_ADMIN))
742 			goto done;
743 
744 		if (dev == ipip_fb_tunnel_dev) {
745 			err = -EFAULT;
746 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
747 				goto done;
748 			err = -ENOENT;
749 			if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
750 				goto done;
751 			err = -EPERM;
752 			if (t->dev == ipip_fb_tunnel_dev)
753 				goto done;
754 			dev = t->dev;
755 		}
756 		err = unregister_netdevice(dev);
757 		break;
758 
759 	default:
760 		err = -EINVAL;
761 	}
762 
763 done:
764 	return err;
765 }
766 
767 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
768 {
769 	return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
770 }
771 
772 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
773 {
774 	if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
775 		return -EINVAL;
776 	dev->mtu = new_mtu;
777 	return 0;
778 }
779 
780 static void ipip_tunnel_setup(struct net_device *dev)
781 {
782 	SET_MODULE_OWNER(dev);
783 	dev->uninit		= ipip_tunnel_uninit;
784 	dev->hard_start_xmit	= ipip_tunnel_xmit;
785 	dev->get_stats		= ipip_tunnel_get_stats;
786 	dev->do_ioctl		= ipip_tunnel_ioctl;
787 	dev->change_mtu		= ipip_tunnel_change_mtu;
788 	dev->destructor		= free_netdev;
789 
790 	dev->type		= ARPHRD_TUNNEL;
791 	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr);
792 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr);
793 	dev->flags		= IFF_NOARP;
794 	dev->iflink		= 0;
795 	dev->addr_len		= 4;
796 }
797 
798 static int ipip_tunnel_init(struct net_device *dev)
799 {
800 	struct net_device *tdev = NULL;
801 	struct ip_tunnel *tunnel;
802 	struct iphdr *iph;
803 
804 	tunnel = netdev_priv(dev);
805 	iph = &tunnel->parms.iph;
806 
807 	tunnel->dev = dev;
808 	strcpy(tunnel->parms.name, dev->name);
809 
810 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
811 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
812 
813 	if (iph->daddr) {
814 		struct flowi fl = { .oif = tunnel->parms.link,
815 				    .nl_u = { .ip4_u =
816 					      { .daddr = iph->daddr,
817 						.saddr = iph->saddr,
818 						.tos = RT_TOS(iph->tos) } },
819 				    .proto = IPPROTO_IPIP };
820 		struct rtable *rt;
821 		if (!ip_route_output_key(&rt, &fl)) {
822 			tdev = rt->u.dst.dev;
823 			ip_rt_put(rt);
824 		}
825 		dev->flags |= IFF_POINTOPOINT;
826 	}
827 
828 	if (!tdev && tunnel->parms.link)
829 		tdev = __dev_get_by_index(tunnel->parms.link);
830 
831 	if (tdev) {
832 		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
833 		dev->mtu = tdev->mtu - sizeof(struct iphdr);
834 	}
835 	dev->iflink = tunnel->parms.link;
836 
837 	return 0;
838 }
839 
840 static int __init ipip_fb_tunnel_init(struct net_device *dev)
841 {
842 	struct ip_tunnel *tunnel = netdev_priv(dev);
843 	struct iphdr *iph = &tunnel->parms.iph;
844 
845 	tunnel->dev = dev;
846 	strcpy(tunnel->parms.name, dev->name);
847 
848 	iph->version		= 4;
849 	iph->protocol		= IPPROTO_IPIP;
850 	iph->ihl		= 5;
851 
852 	dev_hold(dev);
853 	tunnels_wc[0]		= tunnel;
854 	return 0;
855 }
856 
857 static struct xfrm_tunnel ipip_handler = {
858 	.handler	=	ipip_rcv,
859 	.err_handler	=	ipip_err,
860 	.priority	=	1,
861 };
862 
863 static char banner[] __initdata =
864 	KERN_INFO "IPv4 over IPv4 tunneling driver\n";
865 
866 static int __init ipip_init(void)
867 {
868 	int err;
869 
870 	printk(banner);
871 
872 	if (xfrm4_tunnel_register(&ipip_handler)) {
873 		printk(KERN_INFO "ipip init: can't register tunnel\n");
874 		return -EAGAIN;
875 	}
876 
877 	ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
878 					   "tunl0",
879 					   ipip_tunnel_setup);
880 	if (!ipip_fb_tunnel_dev) {
881 		err = -ENOMEM;
882 		goto err1;
883 	}
884 
885 	ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
886 
887 	if ((err = register_netdev(ipip_fb_tunnel_dev)))
888 		goto err2;
889  out:
890 	return err;
891  err2:
892 	free_netdev(ipip_fb_tunnel_dev);
893  err1:
894 	xfrm4_tunnel_deregister(&ipip_handler);
895 	goto out;
896 }
897 
898 static void __exit ipip_destroy_tunnels(void)
899 {
900 	int prio;
901 
902 	for (prio = 1; prio < 4; prio++) {
903 		int h;
904 		for (h = 0; h < HASH_SIZE; h++) {
905 			struct ip_tunnel *t;
906 			while ((t = tunnels[prio][h]) != NULL)
907 				unregister_netdevice(t->dev);
908 		}
909 	}
910 }
911 
912 static void __exit ipip_fini(void)
913 {
914 	if (xfrm4_tunnel_deregister(&ipip_handler))
915 		printk(KERN_INFO "ipip close: can't deregister tunnel\n");
916 
917 	rtnl_lock();
918 	ipip_destroy_tunnels();
919 	unregister_netdevice(ipip_fb_tunnel_dev);
920 	rtnl_unlock();
921 }
922 
923 module_init(ipip_init);
924 module_exit(ipip_fini);
925 MODULE_LICENSE("GPL");
926