xref: /linux/net/ipv4/ipip.c (revision 8b4a40809e5330c9da5d20107d693d92d73b31dc)
1 /*
2  *	Linux NET3:	IP/IP protocol decoder.
3  *
4  *	Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
5  *
6  *	Authors:
7  *		Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
8  *
9  *	Fixes:
10  *		Alan Cox	:	Merged and made usable non modular (its so tiny its silly as
11  *					a module taking up 2 pages).
12  *		Alan Cox	: 	Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13  *					to keep ip_forward happy.
14  *		Alan Cox	:	More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15  *		Kai Schulte	:	Fixed #defines for IP_FIREWALL->FIREWALL
16  *              David Woodhouse :       Perform some basic ICMP handling.
17  *                                      IPIP Routing without decapsulation.
18  *              Carlos Picoto   :       GRE over IP support
19  *		Alexey Kuznetsov:	Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20  *					I do not want to merge them together.
21  *
22  *	This program is free software; you can redistribute it and/or
23  *	modify it under the terms of the GNU General Public License
24  *	as published by the Free Software Foundation; either version
25  *	2 of the License, or (at your option) any later version.
26  *
27  */
28 
29 /* tunnel.c: an IP tunnel driver
30 
31 	The purpose of this driver is to provide an IP tunnel through
32 	which you can tunnel network traffic transparently across subnets.
33 
34 	This was written by looking at Nick Holloway's dummy driver
35 	Thanks for the great code!
36 
37 		-Sam Lantinga	(slouken@cs.ucdavis.edu)  02/01/95
38 
39 	Minor tweaks:
40 		Cleaned up the code a little and added some pre-1.3.0 tweaks.
41 		dev->hard_header/hard_header_len changed to use no headers.
42 		Comments/bracketing tweaked.
43 		Made the tunnels use dev->name not tunnel: when error reporting.
44 		Added tx_dropped stat
45 
46 		-Alan Cox	(Alan.Cox@linux.org) 21 March 95
47 
48 	Reworked:
49 		Changed to tunnel to destination gateway in addition to the
50 			tunnel's pointopoint address
51 		Almost completely rewritten
52 		Note:  There is currently no firewall or ICMP handling done.
53 
54 		-Sam Lantinga	(slouken@cs.ucdavis.edu) 02/13/96
55 
56 */
57 
58 /* Things I wish I had known when writing the tunnel driver:
59 
60 	When the tunnel_xmit() function is called, the skb contains the
61 	packet to be sent (plus a great deal of extra info), and dev
62 	contains the tunnel device that _we_ are.
63 
64 	When we are passed a packet, we are expected to fill in the
65 	source address with our source IP address.
66 
67 	What is the proper way to allocate, copy and free a buffer?
68 	After you allocate it, it is a "0 length" chunk of memory
69 	starting at zero.  If you want to add headers to the buffer
70 	later, you'll have to call "skb_reserve(skb, amount)" with
71 	the amount of memory you want reserved.  Then, you call
72 	"skb_put(skb, amount)" with the amount of space you want in
73 	the buffer.  skb_put() returns a pointer to the top (#0) of
74 	that buffer.  skb->len is set to the amount of space you have
75 	"allocated" with skb_put().  You can then write up to skb->len
76 	bytes to that buffer.  If you need more, you can call skb_put()
77 	again with the additional amount of space you need.  You can
78 	find out how much more space you can allocate by calling
79 	"skb_tailroom(skb)".
80 	Now, to add header space, call "skb_push(skb, header_len)".
81 	This creates space at the beginning of the buffer and returns
82 	a pointer to this new space.  If later you need to strip a
83 	header from a buffer, call "skb_pull(skb, header_len)".
84 	skb_headroom() will return how much space is left at the top
85 	of the buffer (before the main data).  Remember, this headroom
86 	space must be reserved before the skb_put() function is called.
87 	*/
88 
89 /*
90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
91 
92    For comments look at net/ipv4/ip_gre.c --ANK
93  */
94 
95 
96 #include <linux/capability.h>
97 #include <linux/module.h>
98 #include <linux/types.h>
99 #include <linux/kernel.h>
100 #include <asm/uaccess.h>
101 #include <linux/skbuff.h>
102 #include <linux/netdevice.h>
103 #include <linux/in.h>
104 #include <linux/tcp.h>
105 #include <linux/udp.h>
106 #include <linux/if_arp.h>
107 #include <linux/mroute.h>
108 #include <linux/init.h>
109 #include <linux/netfilter_ipv4.h>
110 #include <linux/if_ether.h>
111 
112 #include <net/sock.h>
113 #include <net/ip.h>
114 #include <net/icmp.h>
115 #include <net/ipip.h>
116 #include <net/inet_ecn.h>
117 #include <net/xfrm.h>
118 
119 #define HASH_SIZE  16
120 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
121 
122 static int ipip_fb_tunnel_init(struct net_device *dev);
123 static int ipip_tunnel_init(struct net_device *dev);
124 static void ipip_tunnel_setup(struct net_device *dev);
125 
126 static struct net_device *ipip_fb_tunnel_dev;
127 
128 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
129 static struct ip_tunnel *tunnels_r[HASH_SIZE];
130 static struct ip_tunnel *tunnels_l[HASH_SIZE];
131 static struct ip_tunnel *tunnels_wc[1];
132 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
133 
134 static DEFINE_RWLOCK(ipip_lock);
135 
136 static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local)
137 {
138 	unsigned h0 = HASH(remote);
139 	unsigned h1 = HASH(local);
140 	struct ip_tunnel *t;
141 
142 	for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
143 		if (local == t->parms.iph.saddr &&
144 		    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
145 			return t;
146 	}
147 	for (t = tunnels_r[h0]; t; t = t->next) {
148 		if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
149 			return t;
150 	}
151 	for (t = tunnels_l[h1]; t; t = t->next) {
152 		if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
153 			return t;
154 	}
155 	if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
156 		return t;
157 	return NULL;
158 }
159 
160 static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms)
161 {
162 	__be32 remote = parms->iph.daddr;
163 	__be32 local = parms->iph.saddr;
164 	unsigned h = 0;
165 	int prio = 0;
166 
167 	if (remote) {
168 		prio |= 2;
169 		h ^= HASH(remote);
170 	}
171 	if (local) {
172 		prio |= 1;
173 		h ^= HASH(local);
174 	}
175 	return &tunnels[prio][h];
176 }
177 
178 static inline struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
179 {
180 	return __ipip_bucket(&t->parms);
181 }
182 
183 static void ipip_tunnel_unlink(struct ip_tunnel *t)
184 {
185 	struct ip_tunnel **tp;
186 
187 	for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
188 		if (t == *tp) {
189 			write_lock_bh(&ipip_lock);
190 			*tp = t->next;
191 			write_unlock_bh(&ipip_lock);
192 			break;
193 		}
194 	}
195 }
196 
197 static void ipip_tunnel_link(struct ip_tunnel *t)
198 {
199 	struct ip_tunnel **tp = ipip_bucket(t);
200 
201 	t->next = *tp;
202 	write_lock_bh(&ipip_lock);
203 	*tp = t;
204 	write_unlock_bh(&ipip_lock);
205 }
206 
207 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
208 {
209 	__be32 remote = parms->iph.daddr;
210 	__be32 local = parms->iph.saddr;
211 	struct ip_tunnel *t, **tp, *nt;
212 	struct net_device *dev;
213 	char name[IFNAMSIZ];
214 
215 	for (tp = __ipip_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
216 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
217 			return t;
218 	}
219 	if (!create)
220 		return NULL;
221 
222 	if (parms->name[0])
223 		strlcpy(name, parms->name, IFNAMSIZ);
224 	else {
225 		int i;
226 		for (i=1; i<100; i++) {
227 			sprintf(name, "tunl%d", i);
228 			if (__dev_get_by_name(name) == NULL)
229 				break;
230 		}
231 		if (i==100)
232 			goto failed;
233 	}
234 
235 	dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
236 	if (dev == NULL)
237 		return NULL;
238 
239 	nt = netdev_priv(dev);
240 	SET_MODULE_OWNER(dev);
241 	dev->init = ipip_tunnel_init;
242 	nt->parms = *parms;
243 
244 	if (register_netdevice(dev) < 0) {
245 		free_netdev(dev);
246 		goto failed;
247 	}
248 
249 	dev_hold(dev);
250 	ipip_tunnel_link(nt);
251 	return nt;
252 
253 failed:
254 	return NULL;
255 }
256 
257 static void ipip_tunnel_uninit(struct net_device *dev)
258 {
259 	if (dev == ipip_fb_tunnel_dev) {
260 		write_lock_bh(&ipip_lock);
261 		tunnels_wc[0] = NULL;
262 		write_unlock_bh(&ipip_lock);
263 	} else
264 		ipip_tunnel_unlink(netdev_priv(dev));
265 	dev_put(dev);
266 }
267 
268 static int ipip_err(struct sk_buff *skb, u32 info)
269 {
270 #ifndef I_WISH_WORLD_WERE_PERFECT
271 
272 /* It is not :-( All the routers (except for Linux) return only
273    8 bytes of packet payload. It means, that precise relaying of
274    ICMP in the real Internet is absolutely infeasible.
275  */
276 	struct iphdr *iph = (struct iphdr*)skb->data;
277 	const int type = icmp_hdr(skb)->type;
278 	const int code = icmp_hdr(skb)->code;
279 	struct ip_tunnel *t;
280 	int err;
281 
282 	switch (type) {
283 	default:
284 	case ICMP_PARAMETERPROB:
285 		return 0;
286 
287 	case ICMP_DEST_UNREACH:
288 		switch (code) {
289 		case ICMP_SR_FAILED:
290 		case ICMP_PORT_UNREACH:
291 			/* Impossible event. */
292 			return 0;
293 		case ICMP_FRAG_NEEDED:
294 			/* Soft state for pmtu is maintained by IP core. */
295 			return 0;
296 		default:
297 			/* All others are translated to HOST_UNREACH.
298 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
299 			   I believe they are just ether pollution. --ANK
300 			 */
301 			break;
302 		}
303 		break;
304 	case ICMP_TIME_EXCEEDED:
305 		if (code != ICMP_EXC_TTL)
306 			return 0;
307 		break;
308 	}
309 
310 	err = -ENOENT;
311 
312 	read_lock(&ipip_lock);
313 	t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
314 	if (t == NULL || t->parms.iph.daddr == 0)
315 		goto out;
316 
317 	err = 0;
318 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
319 		goto out;
320 
321 	if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
322 		t->err_count++;
323 	else
324 		t->err_count = 1;
325 	t->err_time = jiffies;
326 out:
327 	read_unlock(&ipip_lock);
328 	return err;
329 #else
330 	struct iphdr *iph = (struct iphdr*)dp;
331 	int hlen = iph->ihl<<2;
332 	struct iphdr *eiph;
333 	const int type = icmp_hdr(skb)->type;
334 	const int code = icmp_hdr(skb)->code;
335 	int rel_type = 0;
336 	int rel_code = 0;
337 	__be32 rel_info = 0;
338 	__u32 n = 0;
339 	struct sk_buff *skb2;
340 	struct flowi fl;
341 	struct rtable *rt;
342 
343 	if (len < hlen + sizeof(struct iphdr))
344 		return 0;
345 	eiph = (struct iphdr*)(dp + hlen);
346 
347 	switch (type) {
348 	default:
349 		return 0;
350 	case ICMP_PARAMETERPROB:
351 		n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
352 		if (n < hlen)
353 			return 0;
354 
355 		/* So... This guy found something strange INSIDE encapsulated
356 		   packet. Well, he is fool, but what can we do ?
357 		 */
358 		rel_type = ICMP_PARAMETERPROB;
359 		rel_info = htonl((n - hlen) << 24);
360 		break;
361 
362 	case ICMP_DEST_UNREACH:
363 		switch (code) {
364 		case ICMP_SR_FAILED:
365 		case ICMP_PORT_UNREACH:
366 			/* Impossible event. */
367 			return 0;
368 		case ICMP_FRAG_NEEDED:
369 			/* And it is the only really necessary thing :-) */
370 			n = ntohs(icmp_hdr(skb)->un.frag.mtu);
371 			if (n < hlen+68)
372 				return 0;
373 			n -= hlen;
374 			/* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
375 			if (n > ntohs(eiph->tot_len))
376 				return 0;
377 			rel_info = htonl(n);
378 			break;
379 		default:
380 			/* All others are translated to HOST_UNREACH.
381 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
382 			   I believe, it is just ether pollution. --ANK
383 			 */
384 			rel_type = ICMP_DEST_UNREACH;
385 			rel_code = ICMP_HOST_UNREACH;
386 			break;
387 		}
388 		break;
389 	case ICMP_TIME_EXCEEDED:
390 		if (code != ICMP_EXC_TTL)
391 			return 0;
392 		break;
393 	}
394 
395 	/* Prepare fake skb to feed it to icmp_send */
396 	skb2 = skb_clone(skb, GFP_ATOMIC);
397 	if (skb2 == NULL)
398 		return 0;
399 	dst_release(skb2->dst);
400 	skb2->dst = NULL;
401 	skb_pull(skb2, skb->data - (u8*)eiph);
402 	skb_reset_network_header(skb2);
403 
404 	/* Try to guess incoming interface */
405 	memset(&fl, 0, sizeof(fl));
406 	fl.fl4_daddr = eiph->saddr;
407 	fl.fl4_tos = RT_TOS(eiph->tos);
408 	fl.proto = IPPROTO_IPIP;
409 	if (ip_route_output_key(&rt, &key)) {
410 		kfree_skb(skb2);
411 		return 0;
412 	}
413 	skb2->dev = rt->u.dst.dev;
414 
415 	/* route "incoming" packet */
416 	if (rt->rt_flags&RTCF_LOCAL) {
417 		ip_rt_put(rt);
418 		rt = NULL;
419 		fl.fl4_daddr = eiph->daddr;
420 		fl.fl4_src = eiph->saddr;
421 		fl.fl4_tos = eiph->tos;
422 		if (ip_route_output_key(&rt, &fl) ||
423 		    rt->u.dst.dev->type != ARPHRD_TUNNEL) {
424 			ip_rt_put(rt);
425 			kfree_skb(skb2);
426 			return 0;
427 		}
428 	} else {
429 		ip_rt_put(rt);
430 		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
431 		    skb2->dst->dev->type != ARPHRD_TUNNEL) {
432 			kfree_skb(skb2);
433 			return 0;
434 		}
435 	}
436 
437 	/* change mtu on this route */
438 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
439 		if (n > dst_mtu(skb2->dst)) {
440 			kfree_skb(skb2);
441 			return 0;
442 		}
443 		skb2->dst->ops->update_pmtu(skb2->dst, n);
444 	} else if (type == ICMP_TIME_EXCEEDED) {
445 		struct ip_tunnel *t = netdev_priv(skb2->dev);
446 		if (t->parms.iph.ttl) {
447 			rel_type = ICMP_DEST_UNREACH;
448 			rel_code = ICMP_HOST_UNREACH;
449 		}
450 	}
451 
452 	icmp_send(skb2, rel_type, rel_code, rel_info);
453 	kfree_skb(skb2);
454 	return 0;
455 #endif
456 }
457 
458 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
459 					struct sk_buff *skb)
460 {
461 	struct iphdr *inner_iph = ip_hdr(skb);
462 
463 	if (INET_ECN_is_ce(outer_iph->tos))
464 		IP_ECN_set_ce(inner_iph);
465 }
466 
467 static int ipip_rcv(struct sk_buff *skb)
468 {
469 	struct ip_tunnel *tunnel;
470 	const struct iphdr *iph = ip_hdr(skb);
471 
472 	read_lock(&ipip_lock);
473 	if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
474 		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
475 			read_unlock(&ipip_lock);
476 			kfree_skb(skb);
477 			return 0;
478 		}
479 
480 		secpath_reset(skb);
481 
482 		skb->mac_header = skb->network_header;
483 		skb_reset_network_header(skb);
484 		skb->protocol = htons(ETH_P_IP);
485 		skb->pkt_type = PACKET_HOST;
486 
487 		tunnel->stat.rx_packets++;
488 		tunnel->stat.rx_bytes += skb->len;
489 		skb->dev = tunnel->dev;
490 		dst_release(skb->dst);
491 		skb->dst = NULL;
492 		nf_reset(skb);
493 		ipip_ecn_decapsulate(iph, skb);
494 		netif_rx(skb);
495 		read_unlock(&ipip_lock);
496 		return 0;
497 	}
498 	read_unlock(&ipip_lock);
499 
500 	return -1;
501 }
502 
503 /*
504  *	This function assumes it is being called from dev_queue_xmit()
505  *	and that skb is filled properly by that function.
506  */
507 
508 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
509 {
510 	struct ip_tunnel *tunnel = netdev_priv(dev);
511 	struct net_device_stats *stats = &tunnel->stat;
512 	struct iphdr  *tiph = &tunnel->parms.iph;
513 	u8     tos = tunnel->parms.iph.tos;
514 	__be16 df = tiph->frag_off;
515 	struct rtable *rt;     			/* Route to the other host */
516 	struct net_device *tdev;			/* Device to other host */
517 	struct iphdr  *old_iph = ip_hdr(skb);
518 	struct iphdr  *iph;			/* Our new IP header */
519 	int    max_headroom;			/* The extra header space needed */
520 	__be32 dst = tiph->daddr;
521 	int    mtu;
522 
523 	if (tunnel->recursion++) {
524 		tunnel->stat.collisions++;
525 		goto tx_error;
526 	}
527 
528 	if (skb->protocol != htons(ETH_P_IP))
529 		goto tx_error;
530 
531 	if (tos&1)
532 		tos = old_iph->tos;
533 
534 	if (!dst) {
535 		/* NBMA tunnel */
536 		if ((rt = (struct rtable*)skb->dst) == NULL) {
537 			tunnel->stat.tx_fifo_errors++;
538 			goto tx_error;
539 		}
540 		if ((dst = rt->rt_gateway) == 0)
541 			goto tx_error_icmp;
542 	}
543 
544 	{
545 		struct flowi fl = { .oif = tunnel->parms.link,
546 				    .nl_u = { .ip4_u =
547 					      { .daddr = dst,
548 						.saddr = tiph->saddr,
549 						.tos = RT_TOS(tos) } },
550 				    .proto = IPPROTO_IPIP };
551 		if (ip_route_output_key(&rt, &fl)) {
552 			tunnel->stat.tx_carrier_errors++;
553 			goto tx_error_icmp;
554 		}
555 	}
556 	tdev = rt->u.dst.dev;
557 
558 	if (tdev == dev) {
559 		ip_rt_put(rt);
560 		tunnel->stat.collisions++;
561 		goto tx_error;
562 	}
563 
564 	if (tiph->frag_off)
565 		mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
566 	else
567 		mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
568 
569 	if (mtu < 68) {
570 		tunnel->stat.collisions++;
571 		ip_rt_put(rt);
572 		goto tx_error;
573 	}
574 	if (skb->dst)
575 		skb->dst->ops->update_pmtu(skb->dst, mtu);
576 
577 	df |= (old_iph->frag_off&htons(IP_DF));
578 
579 	if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
580 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
581 		ip_rt_put(rt);
582 		goto tx_error;
583 	}
584 
585 	if (tunnel->err_count > 0) {
586 		if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
587 			tunnel->err_count--;
588 			dst_link_failure(skb);
589 		} else
590 			tunnel->err_count = 0;
591 	}
592 
593 	/*
594 	 * Okay, now see if we can stuff it in the buffer as-is.
595 	 */
596 	max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
597 
598 	if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
599 	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
600 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
601 		if (!new_skb) {
602 			ip_rt_put(rt);
603 			stats->tx_dropped++;
604 			dev_kfree_skb(skb);
605 			tunnel->recursion--;
606 			return 0;
607 		}
608 		if (skb->sk)
609 			skb_set_owner_w(new_skb, skb->sk);
610 		dev_kfree_skb(skb);
611 		skb = new_skb;
612 		old_iph = ip_hdr(skb);
613 	}
614 
615 	skb->transport_header = skb->network_header;
616 	skb_push(skb, sizeof(struct iphdr));
617 	skb_reset_network_header(skb);
618 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
619 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
620 			      IPSKB_REROUTED);
621 	dst_release(skb->dst);
622 	skb->dst = &rt->u.dst;
623 
624 	/*
625 	 *	Push down and install the IPIP header.
626 	 */
627 
628 	iph 			=	ip_hdr(skb);
629 	iph->version		=	4;
630 	iph->ihl		=	sizeof(struct iphdr)>>2;
631 	iph->frag_off		=	df;
632 	iph->protocol		=	IPPROTO_IPIP;
633 	iph->tos		=	INET_ECN_encapsulate(tos, old_iph->tos);
634 	iph->daddr		=	rt->rt_dst;
635 	iph->saddr		=	rt->rt_src;
636 
637 	if ((iph->ttl = tiph->ttl) == 0)
638 		iph->ttl	=	old_iph->ttl;
639 
640 	nf_reset(skb);
641 
642 	IPTUNNEL_XMIT();
643 	tunnel->recursion--;
644 	return 0;
645 
646 tx_error_icmp:
647 	dst_link_failure(skb);
648 tx_error:
649 	stats->tx_errors++;
650 	dev_kfree_skb(skb);
651 	tunnel->recursion--;
652 	return 0;
653 }
654 
655 static int
656 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
657 {
658 	int err = 0;
659 	struct ip_tunnel_parm p;
660 	struct ip_tunnel *t;
661 
662 	switch (cmd) {
663 	case SIOCGETTUNNEL:
664 		t = NULL;
665 		if (dev == ipip_fb_tunnel_dev) {
666 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
667 				err = -EFAULT;
668 				break;
669 			}
670 			t = ipip_tunnel_locate(&p, 0);
671 		}
672 		if (t == NULL)
673 			t = netdev_priv(dev);
674 		memcpy(&p, &t->parms, sizeof(p));
675 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
676 			err = -EFAULT;
677 		break;
678 
679 	case SIOCADDTUNNEL:
680 	case SIOCCHGTUNNEL:
681 		err = -EPERM;
682 		if (!capable(CAP_NET_ADMIN))
683 			goto done;
684 
685 		err = -EFAULT;
686 		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
687 			goto done;
688 
689 		err = -EINVAL;
690 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
691 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
692 			goto done;
693 		if (p.iph.ttl)
694 			p.iph.frag_off |= htons(IP_DF);
695 
696 		t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
697 
698 		if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
699 			if (t != NULL) {
700 				if (t->dev != dev) {
701 					err = -EEXIST;
702 					break;
703 				}
704 			} else {
705 				if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
706 				    (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
707 					err = -EINVAL;
708 					break;
709 				}
710 				t = netdev_priv(dev);
711 				ipip_tunnel_unlink(t);
712 				t->parms.iph.saddr = p.iph.saddr;
713 				t->parms.iph.daddr = p.iph.daddr;
714 				memcpy(dev->dev_addr, &p.iph.saddr, 4);
715 				memcpy(dev->broadcast, &p.iph.daddr, 4);
716 				ipip_tunnel_link(t);
717 				netdev_state_change(dev);
718 			}
719 		}
720 
721 		if (t) {
722 			err = 0;
723 			if (cmd == SIOCCHGTUNNEL) {
724 				t->parms.iph.ttl = p.iph.ttl;
725 				t->parms.iph.tos = p.iph.tos;
726 				t->parms.iph.frag_off = p.iph.frag_off;
727 			}
728 			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
729 				err = -EFAULT;
730 		} else
731 			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
732 		break;
733 
734 	case SIOCDELTUNNEL:
735 		err = -EPERM;
736 		if (!capable(CAP_NET_ADMIN))
737 			goto done;
738 
739 		if (dev == ipip_fb_tunnel_dev) {
740 			err = -EFAULT;
741 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
742 				goto done;
743 			err = -ENOENT;
744 			if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
745 				goto done;
746 			err = -EPERM;
747 			if (t->dev == ipip_fb_tunnel_dev)
748 				goto done;
749 			dev = t->dev;
750 		}
751 		unregister_netdevice(dev);
752 		err = 0;
753 		break;
754 
755 	default:
756 		err = -EINVAL;
757 	}
758 
759 done:
760 	return err;
761 }
762 
763 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
764 {
765 	return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
766 }
767 
768 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
769 {
770 	if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
771 		return -EINVAL;
772 	dev->mtu = new_mtu;
773 	return 0;
774 }
775 
776 static void ipip_tunnel_setup(struct net_device *dev)
777 {
778 	SET_MODULE_OWNER(dev);
779 	dev->uninit		= ipip_tunnel_uninit;
780 	dev->hard_start_xmit	= ipip_tunnel_xmit;
781 	dev->get_stats		= ipip_tunnel_get_stats;
782 	dev->do_ioctl		= ipip_tunnel_ioctl;
783 	dev->change_mtu		= ipip_tunnel_change_mtu;
784 	dev->destructor		= free_netdev;
785 
786 	dev->type		= ARPHRD_TUNNEL;
787 	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr);
788 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr);
789 	dev->flags		= IFF_NOARP;
790 	dev->iflink		= 0;
791 	dev->addr_len		= 4;
792 }
793 
794 static int ipip_tunnel_init(struct net_device *dev)
795 {
796 	struct net_device *tdev = NULL;
797 	struct ip_tunnel *tunnel;
798 	struct iphdr *iph;
799 
800 	tunnel = netdev_priv(dev);
801 	iph = &tunnel->parms.iph;
802 
803 	tunnel->dev = dev;
804 	strcpy(tunnel->parms.name, dev->name);
805 
806 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
807 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
808 
809 	if (iph->daddr) {
810 		struct flowi fl = { .oif = tunnel->parms.link,
811 				    .nl_u = { .ip4_u =
812 					      { .daddr = iph->daddr,
813 						.saddr = iph->saddr,
814 						.tos = RT_TOS(iph->tos) } },
815 				    .proto = IPPROTO_IPIP };
816 		struct rtable *rt;
817 		if (!ip_route_output_key(&rt, &fl)) {
818 			tdev = rt->u.dst.dev;
819 			ip_rt_put(rt);
820 		}
821 		dev->flags |= IFF_POINTOPOINT;
822 	}
823 
824 	if (!tdev && tunnel->parms.link)
825 		tdev = __dev_get_by_index(tunnel->parms.link);
826 
827 	if (tdev) {
828 		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
829 		dev->mtu = tdev->mtu - sizeof(struct iphdr);
830 	}
831 	dev->iflink = tunnel->parms.link;
832 
833 	return 0;
834 }
835 
836 static int __init ipip_fb_tunnel_init(struct net_device *dev)
837 {
838 	struct ip_tunnel *tunnel = netdev_priv(dev);
839 	struct iphdr *iph = &tunnel->parms.iph;
840 
841 	tunnel->dev = dev;
842 	strcpy(tunnel->parms.name, dev->name);
843 
844 	iph->version		= 4;
845 	iph->protocol		= IPPROTO_IPIP;
846 	iph->ihl		= 5;
847 
848 	dev_hold(dev);
849 	tunnels_wc[0]		= tunnel;
850 	return 0;
851 }
852 
853 static struct xfrm_tunnel ipip_handler = {
854 	.handler	=	ipip_rcv,
855 	.err_handler	=	ipip_err,
856 	.priority	=	1,
857 };
858 
859 static char banner[] __initdata =
860 	KERN_INFO "IPv4 over IPv4 tunneling driver\n";
861 
862 static int __init ipip_init(void)
863 {
864 	int err;
865 
866 	printk(banner);
867 
868 	if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
869 		printk(KERN_INFO "ipip init: can't register tunnel\n");
870 		return -EAGAIN;
871 	}
872 
873 	ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
874 					   "tunl0",
875 					   ipip_tunnel_setup);
876 	if (!ipip_fb_tunnel_dev) {
877 		err = -ENOMEM;
878 		goto err1;
879 	}
880 
881 	ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
882 
883 	if ((err = register_netdev(ipip_fb_tunnel_dev)))
884 		goto err2;
885  out:
886 	return err;
887  err2:
888 	free_netdev(ipip_fb_tunnel_dev);
889  err1:
890 	xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
891 	goto out;
892 }
893 
894 static void __exit ipip_destroy_tunnels(void)
895 {
896 	int prio;
897 
898 	for (prio = 1; prio < 4; prio++) {
899 		int h;
900 		for (h = 0; h < HASH_SIZE; h++) {
901 			struct ip_tunnel *t;
902 			while ((t = tunnels[prio][h]) != NULL)
903 				unregister_netdevice(t->dev);
904 		}
905 	}
906 }
907 
908 static void __exit ipip_fini(void)
909 {
910 	if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
911 		printk(KERN_INFO "ipip close: can't deregister tunnel\n");
912 
913 	rtnl_lock();
914 	ipip_destroy_tunnels();
915 	unregister_netdevice(ipip_fb_tunnel_dev);
916 	rtnl_unlock();
917 }
918 
919 module_init(ipip_init);
920 module_exit(ipip_fini);
921 MODULE_LICENSE("GPL");
922