xref: /linux/net/ipv4/ipip.c (revision de2fe5e07d58424bc286fff3fd3c1b0bf933cd58)
1 /*
2  *	Linux NET3:	IP/IP protocol decoder.
3  *
4  *	Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
5  *
6  *	Authors:
7  *		Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
8  *
9  *	Fixes:
10  *		Alan Cox	:	Merged and made usable non modular (its so tiny its silly as
11  *					a module taking up 2 pages).
12  *		Alan Cox	: 	Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
13  *					to keep ip_forward happy.
14  *		Alan Cox	:	More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
15  *		Kai Schulte	:	Fixed #defines for IP_FIREWALL->FIREWALL
16  *              David Woodhouse :       Perform some basic ICMP handling.
17  *                                      IPIP Routing without decapsulation.
18  *              Carlos Picoto   :       GRE over IP support
19  *		Alexey Kuznetsov:	Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
20  *					I do not want to merge them together.
21  *
22  *	This program is free software; you can redistribute it and/or
23  *	modify it under the terms of the GNU General Public License
24  *	as published by the Free Software Foundation; either version
25  *	2 of the License, or (at your option) any later version.
26  *
27  */
28 
29 /* tunnel.c: an IP tunnel driver
30 
31 	The purpose of this driver is to provide an IP tunnel through
32 	which you can tunnel network traffic transparently across subnets.
33 
34 	This was written by looking at Nick Holloway's dummy driver
35 	Thanks for the great code!
36 
37 		-Sam Lantinga	(slouken@cs.ucdavis.edu)  02/01/95
38 
39 	Minor tweaks:
40 		Cleaned up the code a little and added some pre-1.3.0 tweaks.
41 		dev->hard_header/hard_header_len changed to use no headers.
42 		Comments/bracketing tweaked.
43 		Made the tunnels use dev->name not tunnel: when error reporting.
44 		Added tx_dropped stat
45 
46 		-Alan Cox	(Alan.Cox@linux.org) 21 March 95
47 
48 	Reworked:
49 		Changed to tunnel to destination gateway in addition to the
50 			tunnel's pointopoint address
51 		Almost completely rewritten
52 		Note:  There is currently no firewall or ICMP handling done.
53 
54 		-Sam Lantinga	(slouken@cs.ucdavis.edu) 02/13/96
55 
56 */
57 
58 /* Things I wish I had known when writing the tunnel driver:
59 
60 	When the tunnel_xmit() function is called, the skb contains the
61 	packet to be sent (plus a great deal of extra info), and dev
62 	contains the tunnel device that _we_ are.
63 
64 	When we are passed a packet, we are expected to fill in the
65 	source address with our source IP address.
66 
67 	What is the proper way to allocate, copy and free a buffer?
68 	After you allocate it, it is a "0 length" chunk of memory
69 	starting at zero.  If you want to add headers to the buffer
70 	later, you'll have to call "skb_reserve(skb, amount)" with
71 	the amount of memory you want reserved.  Then, you call
72 	"skb_put(skb, amount)" with the amount of space you want in
73 	the buffer.  skb_put() returns a pointer to the top (#0) of
74 	that buffer.  skb->len is set to the amount of space you have
75 	"allocated" with skb_put().  You can then write up to skb->len
76 	bytes to that buffer.  If you need more, you can call skb_put()
77 	again with the additional amount of space you need.  You can
78 	find out how much more space you can allocate by calling
79 	"skb_tailroom(skb)".
80 	Now, to add header space, call "skb_push(skb, header_len)".
81 	This creates space at the beginning of the buffer and returns
82 	a pointer to this new space.  If later you need to strip a
83 	header from a buffer, call "skb_pull(skb, header_len)".
84 	skb_headroom() will return how much space is left at the top
85 	of the buffer (before the main data).  Remember, this headroom
86 	space must be reserved before the skb_put() function is called.
87 	*/
88 
89 /*
90    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
91 
92    For comments look at net/ipv4/ip_gre.c --ANK
93  */
94 
95 
96 #include <linux/capability.h>
97 #include <linux/config.h>
98 #include <linux/module.h>
99 #include <linux/types.h>
100 #include <linux/sched.h>
101 #include <linux/kernel.h>
102 #include <asm/uaccess.h>
103 #include <linux/skbuff.h>
104 #include <linux/netdevice.h>
105 #include <linux/in.h>
106 #include <linux/tcp.h>
107 #include <linux/udp.h>
108 #include <linux/if_arp.h>
109 #include <linux/mroute.h>
110 #include <linux/init.h>
111 #include <linux/netfilter_ipv4.h>
112 #include <linux/if_ether.h>
113 
114 #include <net/sock.h>
115 #include <net/ip.h>
116 #include <net/icmp.h>
117 #include <net/ipip.h>
118 #include <net/inet_ecn.h>
119 #include <net/xfrm.h>
120 
121 #define HASH_SIZE  16
122 #define HASH(addr) ((addr^(addr>>4))&0xF)
123 
124 static int ipip_fb_tunnel_init(struct net_device *dev);
125 static int ipip_tunnel_init(struct net_device *dev);
126 static void ipip_tunnel_setup(struct net_device *dev);
127 
128 static struct net_device *ipip_fb_tunnel_dev;
129 
130 static struct ip_tunnel *tunnels_r_l[HASH_SIZE];
131 static struct ip_tunnel *tunnels_r[HASH_SIZE];
132 static struct ip_tunnel *tunnels_l[HASH_SIZE];
133 static struct ip_tunnel *tunnels_wc[1];
134 static struct ip_tunnel **tunnels[4] = { tunnels_wc, tunnels_l, tunnels_r, tunnels_r_l };
135 
136 static DEFINE_RWLOCK(ipip_lock);
137 
138 static struct ip_tunnel * ipip_tunnel_lookup(u32 remote, u32 local)
139 {
140 	unsigned h0 = HASH(remote);
141 	unsigned h1 = HASH(local);
142 	struct ip_tunnel *t;
143 
144 	for (t = tunnels_r_l[h0^h1]; t; t = t->next) {
145 		if (local == t->parms.iph.saddr &&
146 		    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
147 			return t;
148 	}
149 	for (t = tunnels_r[h0]; t; t = t->next) {
150 		if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
151 			return t;
152 	}
153 	for (t = tunnels_l[h1]; t; t = t->next) {
154 		if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
155 			return t;
156 	}
157 	if ((t = tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
158 		return t;
159 	return NULL;
160 }
161 
162 static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
163 {
164 	u32 remote = t->parms.iph.daddr;
165 	u32 local = t->parms.iph.saddr;
166 	unsigned h = 0;
167 	int prio = 0;
168 
169 	if (remote) {
170 		prio |= 2;
171 		h ^= HASH(remote);
172 	}
173 	if (local) {
174 		prio |= 1;
175 		h ^= HASH(local);
176 	}
177 	return &tunnels[prio][h];
178 }
179 
180 
181 static void ipip_tunnel_unlink(struct ip_tunnel *t)
182 {
183 	struct ip_tunnel **tp;
184 
185 	for (tp = ipip_bucket(t); *tp; tp = &(*tp)->next) {
186 		if (t == *tp) {
187 			write_lock_bh(&ipip_lock);
188 			*tp = t->next;
189 			write_unlock_bh(&ipip_lock);
190 			break;
191 		}
192 	}
193 }
194 
195 static void ipip_tunnel_link(struct ip_tunnel *t)
196 {
197 	struct ip_tunnel **tp = ipip_bucket(t);
198 
199 	t->next = *tp;
200 	write_lock_bh(&ipip_lock);
201 	*tp = t;
202 	write_unlock_bh(&ipip_lock);
203 }
204 
205 static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
206 {
207 	u32 remote = parms->iph.daddr;
208 	u32 local = parms->iph.saddr;
209 	struct ip_tunnel *t, **tp, *nt;
210 	struct net_device *dev;
211 	unsigned h = 0;
212 	int prio = 0;
213 	char name[IFNAMSIZ];
214 
215 	if (remote) {
216 		prio |= 2;
217 		h ^= HASH(remote);
218 	}
219 	if (local) {
220 		prio |= 1;
221 		h ^= HASH(local);
222 	}
223 	for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
224 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
225 			return t;
226 	}
227 	if (!create)
228 		return NULL;
229 
230 	if (parms->name[0])
231 		strlcpy(name, parms->name, IFNAMSIZ);
232 	else {
233 		int i;
234 		for (i=1; i<100; i++) {
235 			sprintf(name, "tunl%d", i);
236 			if (__dev_get_by_name(name) == NULL)
237 				break;
238 		}
239 		if (i==100)
240 			goto failed;
241 	}
242 
243 	dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
244 	if (dev == NULL)
245 		return NULL;
246 
247 	nt = netdev_priv(dev);
248 	SET_MODULE_OWNER(dev);
249 	dev->init = ipip_tunnel_init;
250 	nt->parms = *parms;
251 
252 	if (register_netdevice(dev) < 0) {
253 		free_netdev(dev);
254 		goto failed;
255 	}
256 
257 	dev_hold(dev);
258 	ipip_tunnel_link(nt);
259 	return nt;
260 
261 failed:
262 	return NULL;
263 }
264 
265 static void ipip_tunnel_uninit(struct net_device *dev)
266 {
267 	if (dev == ipip_fb_tunnel_dev) {
268 		write_lock_bh(&ipip_lock);
269 		tunnels_wc[0] = NULL;
270 		write_unlock_bh(&ipip_lock);
271 	} else
272 		ipip_tunnel_unlink(netdev_priv(dev));
273 	dev_put(dev);
274 }
275 
276 static int ipip_err(struct sk_buff *skb, u32 info)
277 {
278 #ifndef I_WISH_WORLD_WERE_PERFECT
279 
280 /* It is not :-( All the routers (except for Linux) return only
281    8 bytes of packet payload. It means, that precise relaying of
282    ICMP in the real Internet is absolutely infeasible.
283  */
284 	struct iphdr *iph = (struct iphdr*)skb->data;
285 	int type = skb->h.icmph->type;
286 	int code = skb->h.icmph->code;
287 	struct ip_tunnel *t;
288 	int err;
289 
290 	switch (type) {
291 	default:
292 	case ICMP_PARAMETERPROB:
293 		return 0;
294 
295 	case ICMP_DEST_UNREACH:
296 		switch (code) {
297 		case ICMP_SR_FAILED:
298 		case ICMP_PORT_UNREACH:
299 			/* Impossible event. */
300 			return 0;
301 		case ICMP_FRAG_NEEDED:
302 			/* Soft state for pmtu is maintained by IP core. */
303 			return 0;
304 		default:
305 			/* All others are translated to HOST_UNREACH.
306 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
307 			   I believe they are just ether pollution. --ANK
308 			 */
309 			break;
310 		}
311 		break;
312 	case ICMP_TIME_EXCEEDED:
313 		if (code != ICMP_EXC_TTL)
314 			return 0;
315 		break;
316 	}
317 
318 	err = -ENOENT;
319 
320 	read_lock(&ipip_lock);
321 	t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
322 	if (t == NULL || t->parms.iph.daddr == 0)
323 		goto out;
324 
325 	err = 0;
326 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
327 		goto out;
328 
329 	if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
330 		t->err_count++;
331 	else
332 		t->err_count = 1;
333 	t->err_time = jiffies;
334 out:
335 	read_unlock(&ipip_lock);
336 	return err;
337 #else
338 	struct iphdr *iph = (struct iphdr*)dp;
339 	int hlen = iph->ihl<<2;
340 	struct iphdr *eiph;
341 	int type = skb->h.icmph->type;
342 	int code = skb->h.icmph->code;
343 	int rel_type = 0;
344 	int rel_code = 0;
345 	int rel_info = 0;
346 	struct sk_buff *skb2;
347 	struct flowi fl;
348 	struct rtable *rt;
349 
350 	if (len < hlen + sizeof(struct iphdr))
351 		return 0;
352 	eiph = (struct iphdr*)(dp + hlen);
353 
354 	switch (type) {
355 	default:
356 		return 0;
357 	case ICMP_PARAMETERPROB:
358 		if (skb->h.icmph->un.gateway < hlen)
359 			return 0;
360 
361 		/* So... This guy found something strange INSIDE encapsulated
362 		   packet. Well, he is fool, but what can we do ?
363 		 */
364 		rel_type = ICMP_PARAMETERPROB;
365 		rel_info = skb->h.icmph->un.gateway - hlen;
366 		break;
367 
368 	case ICMP_DEST_UNREACH:
369 		switch (code) {
370 		case ICMP_SR_FAILED:
371 		case ICMP_PORT_UNREACH:
372 			/* Impossible event. */
373 			return 0;
374 		case ICMP_FRAG_NEEDED:
375 			/* And it is the only really necessary thing :-) */
376 			rel_info = ntohs(skb->h.icmph->un.frag.mtu);
377 			if (rel_info < hlen+68)
378 				return 0;
379 			rel_info -= hlen;
380 			/* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
381 			if (rel_info > ntohs(eiph->tot_len))
382 				return 0;
383 			break;
384 		default:
385 			/* All others are translated to HOST_UNREACH.
386 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
387 			   I believe, it is just ether pollution. --ANK
388 			 */
389 			rel_type = ICMP_DEST_UNREACH;
390 			rel_code = ICMP_HOST_UNREACH;
391 			break;
392 		}
393 		break;
394 	case ICMP_TIME_EXCEEDED:
395 		if (code != ICMP_EXC_TTL)
396 			return 0;
397 		break;
398 	}
399 
400 	/* Prepare fake skb to feed it to icmp_send */
401 	skb2 = skb_clone(skb, GFP_ATOMIC);
402 	if (skb2 == NULL)
403 		return 0;
404 	dst_release(skb2->dst);
405 	skb2->dst = NULL;
406 	skb_pull(skb2, skb->data - (u8*)eiph);
407 	skb2->nh.raw = skb2->data;
408 
409 	/* Try to guess incoming interface */
410 	memset(&fl, 0, sizeof(fl));
411 	fl.fl4_daddr = eiph->saddr;
412 	fl.fl4_tos = RT_TOS(eiph->tos);
413 	fl.proto = IPPROTO_IPIP;
414 	if (ip_route_output_key(&rt, &key)) {
415 		kfree_skb(skb2);
416 		return 0;
417 	}
418 	skb2->dev = rt->u.dst.dev;
419 
420 	/* route "incoming" packet */
421 	if (rt->rt_flags&RTCF_LOCAL) {
422 		ip_rt_put(rt);
423 		rt = NULL;
424 		fl.fl4_daddr = eiph->daddr;
425 		fl.fl4_src = eiph->saddr;
426 		fl.fl4_tos = eiph->tos;
427 		if (ip_route_output_key(&rt, &fl) ||
428 		    rt->u.dst.dev->type != ARPHRD_TUNNEL) {
429 			ip_rt_put(rt);
430 			kfree_skb(skb2);
431 			return 0;
432 		}
433 	} else {
434 		ip_rt_put(rt);
435 		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
436 		    skb2->dst->dev->type != ARPHRD_TUNNEL) {
437 			kfree_skb(skb2);
438 			return 0;
439 		}
440 	}
441 
442 	/* change mtu on this route */
443 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
444 		if (rel_info > dst_mtu(skb2->dst)) {
445 			kfree_skb(skb2);
446 			return 0;
447 		}
448 		skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
449 		rel_info = htonl(rel_info);
450 	} else if (type == ICMP_TIME_EXCEEDED) {
451 		struct ip_tunnel *t = netdev_priv(skb2->dev);
452 		if (t->parms.iph.ttl) {
453 			rel_type = ICMP_DEST_UNREACH;
454 			rel_code = ICMP_HOST_UNREACH;
455 		}
456 	}
457 
458 	icmp_send(skb2, rel_type, rel_code, rel_info);
459 	kfree_skb(skb2);
460 	return 0;
461 #endif
462 }
463 
464 static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb)
465 {
466 	struct iphdr *inner_iph = skb->nh.iph;
467 
468 	if (INET_ECN_is_ce(outer_iph->tos))
469 		IP_ECN_set_ce(inner_iph);
470 }
471 
472 static int ipip_rcv(struct sk_buff *skb)
473 {
474 	struct iphdr *iph;
475 	struct ip_tunnel *tunnel;
476 
477 	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
478 		goto out;
479 
480 	iph = skb->nh.iph;
481 
482 	read_lock(&ipip_lock);
483 	if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
484 		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
485 			read_unlock(&ipip_lock);
486 			kfree_skb(skb);
487 			return 0;
488 		}
489 
490 		secpath_reset(skb);
491 
492 		skb->mac.raw = skb->nh.raw;
493 		skb->nh.raw = skb->data;
494 		memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
495 		skb->protocol = htons(ETH_P_IP);
496 		skb->pkt_type = PACKET_HOST;
497 
498 		tunnel->stat.rx_packets++;
499 		tunnel->stat.rx_bytes += skb->len;
500 		skb->dev = tunnel->dev;
501 		dst_release(skb->dst);
502 		skb->dst = NULL;
503 		nf_reset(skb);
504 		ipip_ecn_decapsulate(iph, skb);
505 		netif_rx(skb);
506 		read_unlock(&ipip_lock);
507 		return 0;
508 	}
509 	read_unlock(&ipip_lock);
510 
511 out:
512 	return -1;
513 }
514 
515 /*
516  *	This function assumes it is being called from dev_queue_xmit()
517  *	and that skb is filled properly by that function.
518  */
519 
520 static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
521 {
522 	struct ip_tunnel *tunnel = netdev_priv(dev);
523 	struct net_device_stats *stats = &tunnel->stat;
524 	struct iphdr  *tiph = &tunnel->parms.iph;
525 	u8     tos = tunnel->parms.iph.tos;
526 	u16    df = tiph->frag_off;
527 	struct rtable *rt;     			/* Route to the other host */
528 	struct net_device *tdev;			/* Device to other host */
529 	struct iphdr  *old_iph = skb->nh.iph;
530 	struct iphdr  *iph;			/* Our new IP header */
531 	int    max_headroom;			/* The extra header space needed */
532 	u32    dst = tiph->daddr;
533 	int    mtu;
534 
535 	if (tunnel->recursion++) {
536 		tunnel->stat.collisions++;
537 		goto tx_error;
538 	}
539 
540 	if (skb->protocol != htons(ETH_P_IP))
541 		goto tx_error;
542 
543 	if (tos&1)
544 		tos = old_iph->tos;
545 
546 	if (!dst) {
547 		/* NBMA tunnel */
548 		if ((rt = (struct rtable*)skb->dst) == NULL) {
549 			tunnel->stat.tx_fifo_errors++;
550 			goto tx_error;
551 		}
552 		if ((dst = rt->rt_gateway) == 0)
553 			goto tx_error_icmp;
554 	}
555 
556 	{
557 		struct flowi fl = { .oif = tunnel->parms.link,
558 				    .nl_u = { .ip4_u =
559 					      { .daddr = dst,
560 						.saddr = tiph->saddr,
561 						.tos = RT_TOS(tos) } },
562 				    .proto = IPPROTO_IPIP };
563 		if (ip_route_output_key(&rt, &fl)) {
564 			tunnel->stat.tx_carrier_errors++;
565 			goto tx_error_icmp;
566 		}
567 	}
568 	tdev = rt->u.dst.dev;
569 
570 	if (tdev == dev) {
571 		ip_rt_put(rt);
572 		tunnel->stat.collisions++;
573 		goto tx_error;
574 	}
575 
576 	if (tiph->frag_off)
577 		mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
578 	else
579 		mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
580 
581 	if (mtu < 68) {
582 		tunnel->stat.collisions++;
583 		ip_rt_put(rt);
584 		goto tx_error;
585 	}
586 	if (skb->dst)
587 		skb->dst->ops->update_pmtu(skb->dst, mtu);
588 
589 	df |= (old_iph->frag_off&htons(IP_DF));
590 
591 	if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
592 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
593 		ip_rt_put(rt);
594 		goto tx_error;
595 	}
596 
597 	if (tunnel->err_count > 0) {
598 		if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
599 			tunnel->err_count--;
600 			dst_link_failure(skb);
601 		} else
602 			tunnel->err_count = 0;
603 	}
604 
605 	/*
606 	 * Okay, now see if we can stuff it in the buffer as-is.
607 	 */
608 	max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
609 
610 	if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
611 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
612 		if (!new_skb) {
613 			ip_rt_put(rt);
614   			stats->tx_dropped++;
615 			dev_kfree_skb(skb);
616 			tunnel->recursion--;
617 			return 0;
618 		}
619 		if (skb->sk)
620 			skb_set_owner_w(new_skb, skb->sk);
621 		dev_kfree_skb(skb);
622 		skb = new_skb;
623 		old_iph = skb->nh.iph;
624 	}
625 
626 	skb->h.raw = skb->nh.raw;
627 	skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
628 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
629 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
630 			      IPSKB_REROUTED);
631 	dst_release(skb->dst);
632 	skb->dst = &rt->u.dst;
633 
634 	/*
635 	 *	Push down and install the IPIP header.
636 	 */
637 
638 	iph 			=	skb->nh.iph;
639 	iph->version		=	4;
640 	iph->ihl		=	sizeof(struct iphdr)>>2;
641 	iph->frag_off		=	df;
642 	iph->protocol		=	IPPROTO_IPIP;
643 	iph->tos		=	INET_ECN_encapsulate(tos, old_iph->tos);
644 	iph->daddr		=	rt->rt_dst;
645 	iph->saddr		=	rt->rt_src;
646 
647 	if ((iph->ttl = tiph->ttl) == 0)
648 		iph->ttl	=	old_iph->ttl;
649 
650 	nf_reset(skb);
651 
652 	IPTUNNEL_XMIT();
653 	tunnel->recursion--;
654 	return 0;
655 
656 tx_error_icmp:
657 	dst_link_failure(skb);
658 tx_error:
659 	stats->tx_errors++;
660 	dev_kfree_skb(skb);
661 	tunnel->recursion--;
662 	return 0;
663 }
664 
665 static int
666 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
667 {
668 	int err = 0;
669 	struct ip_tunnel_parm p;
670 	struct ip_tunnel *t;
671 
672 	switch (cmd) {
673 	case SIOCGETTUNNEL:
674 		t = NULL;
675 		if (dev == ipip_fb_tunnel_dev) {
676 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
677 				err = -EFAULT;
678 				break;
679 			}
680 			t = ipip_tunnel_locate(&p, 0);
681 		}
682 		if (t == NULL)
683 			t = netdev_priv(dev);
684 		memcpy(&p, &t->parms, sizeof(p));
685 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
686 			err = -EFAULT;
687 		break;
688 
689 	case SIOCADDTUNNEL:
690 	case SIOCCHGTUNNEL:
691 		err = -EPERM;
692 		if (!capable(CAP_NET_ADMIN))
693 			goto done;
694 
695 		err = -EFAULT;
696 		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
697 			goto done;
698 
699 		err = -EINVAL;
700 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
701 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
702 			goto done;
703 		if (p.iph.ttl)
704 			p.iph.frag_off |= htons(IP_DF);
705 
706 		t = ipip_tunnel_locate(&p, cmd == SIOCADDTUNNEL);
707 
708 		if (dev != ipip_fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
709 			if (t != NULL) {
710 				if (t->dev != dev) {
711 					err = -EEXIST;
712 					break;
713 				}
714 			} else {
715 				if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
716 				    (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
717 					err = -EINVAL;
718 					break;
719 				}
720 				t = netdev_priv(dev);
721 				ipip_tunnel_unlink(t);
722 				t->parms.iph.saddr = p.iph.saddr;
723 				t->parms.iph.daddr = p.iph.daddr;
724 				memcpy(dev->dev_addr, &p.iph.saddr, 4);
725 				memcpy(dev->broadcast, &p.iph.daddr, 4);
726 				ipip_tunnel_link(t);
727 				netdev_state_change(dev);
728 			}
729 		}
730 
731 		if (t) {
732 			err = 0;
733 			if (cmd == SIOCCHGTUNNEL) {
734 				t->parms.iph.ttl = p.iph.ttl;
735 				t->parms.iph.tos = p.iph.tos;
736 				t->parms.iph.frag_off = p.iph.frag_off;
737 			}
738 			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
739 				err = -EFAULT;
740 		} else
741 			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
742 		break;
743 
744 	case SIOCDELTUNNEL:
745 		err = -EPERM;
746 		if (!capable(CAP_NET_ADMIN))
747 			goto done;
748 
749 		if (dev == ipip_fb_tunnel_dev) {
750 			err = -EFAULT;
751 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
752 				goto done;
753 			err = -ENOENT;
754 			if ((t = ipip_tunnel_locate(&p, 0)) == NULL)
755 				goto done;
756 			err = -EPERM;
757 			if (t->dev == ipip_fb_tunnel_dev)
758 				goto done;
759 			dev = t->dev;
760 		}
761 		err = unregister_netdevice(dev);
762 		break;
763 
764 	default:
765 		err = -EINVAL;
766 	}
767 
768 done:
769 	return err;
770 }
771 
772 static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
773 {
774 	return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
775 }
776 
777 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
778 {
779 	if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
780 		return -EINVAL;
781 	dev->mtu = new_mtu;
782 	return 0;
783 }
784 
785 static void ipip_tunnel_setup(struct net_device *dev)
786 {
787 	SET_MODULE_OWNER(dev);
788 	dev->uninit		= ipip_tunnel_uninit;
789 	dev->hard_start_xmit	= ipip_tunnel_xmit;
790 	dev->get_stats		= ipip_tunnel_get_stats;
791 	dev->do_ioctl		= ipip_tunnel_ioctl;
792 	dev->change_mtu		= ipip_tunnel_change_mtu;
793 	dev->destructor		= free_netdev;
794 
795 	dev->type		= ARPHRD_TUNNEL;
796 	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr);
797 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr);
798 	dev->flags		= IFF_NOARP;
799 	dev->iflink		= 0;
800 	dev->addr_len		= 4;
801 }
802 
803 static int ipip_tunnel_init(struct net_device *dev)
804 {
805 	struct net_device *tdev = NULL;
806 	struct ip_tunnel *tunnel;
807 	struct iphdr *iph;
808 
809 	tunnel = netdev_priv(dev);
810 	iph = &tunnel->parms.iph;
811 
812 	tunnel->dev = dev;
813 	strcpy(tunnel->parms.name, dev->name);
814 
815 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
816 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
817 
818 	if (iph->daddr) {
819 		struct flowi fl = { .oif = tunnel->parms.link,
820 				    .nl_u = { .ip4_u =
821 					      { .daddr = iph->daddr,
822 						.saddr = iph->saddr,
823 						.tos = RT_TOS(iph->tos) } },
824 				    .proto = IPPROTO_IPIP };
825 		struct rtable *rt;
826 		if (!ip_route_output_key(&rt, &fl)) {
827 			tdev = rt->u.dst.dev;
828 			ip_rt_put(rt);
829 		}
830 		dev->flags |= IFF_POINTOPOINT;
831 	}
832 
833 	if (!tdev && tunnel->parms.link)
834 		tdev = __dev_get_by_index(tunnel->parms.link);
835 
836 	if (tdev) {
837 		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
838 		dev->mtu = tdev->mtu - sizeof(struct iphdr);
839 	}
840 	dev->iflink = tunnel->parms.link;
841 
842 	return 0;
843 }
844 
845 static int __init ipip_fb_tunnel_init(struct net_device *dev)
846 {
847 	struct ip_tunnel *tunnel = netdev_priv(dev);
848 	struct iphdr *iph = &tunnel->parms.iph;
849 
850 	tunnel->dev = dev;
851 	strcpy(tunnel->parms.name, dev->name);
852 
853 	iph->version		= 4;
854 	iph->protocol		= IPPROTO_IPIP;
855 	iph->ihl		= 5;
856 
857 	dev_hold(dev);
858 	tunnels_wc[0]		= tunnel;
859 	return 0;
860 }
861 
862 static struct xfrm_tunnel ipip_handler = {
863 	.handler	=	ipip_rcv,
864 	.err_handler	=	ipip_err,
865 	.priority	=	1,
866 };
867 
868 static char banner[] __initdata =
869 	KERN_INFO "IPv4 over IPv4 tunneling driver\n";
870 
871 static int __init ipip_init(void)
872 {
873 	int err;
874 
875 	printk(banner);
876 
877 	if (xfrm4_tunnel_register(&ipip_handler)) {
878 		printk(KERN_INFO "ipip init: can't register tunnel\n");
879 		return -EAGAIN;
880 	}
881 
882 	ipip_fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
883 					   "tunl0",
884 					   ipip_tunnel_setup);
885 	if (!ipip_fb_tunnel_dev) {
886 		err = -ENOMEM;
887 		goto err1;
888 	}
889 
890 	ipip_fb_tunnel_dev->init = ipip_fb_tunnel_init;
891 
892 	if ((err = register_netdev(ipip_fb_tunnel_dev)))
893 		goto err2;
894  out:
895 	return err;
896  err2:
897 	free_netdev(ipip_fb_tunnel_dev);
898  err1:
899 	xfrm4_tunnel_deregister(&ipip_handler);
900 	goto out;
901 }
902 
903 static void __exit ipip_destroy_tunnels(void)
904 {
905 	int prio;
906 
907 	for (prio = 1; prio < 4; prio++) {
908 		int h;
909 		for (h = 0; h < HASH_SIZE; h++) {
910 			struct ip_tunnel *t;
911 			while ((t = tunnels[prio][h]) != NULL)
912 				unregister_netdevice(t->dev);
913 		}
914 	}
915 }
916 
917 static void __exit ipip_fini(void)
918 {
919 	if (xfrm4_tunnel_deregister(&ipip_handler))
920 		printk(KERN_INFO "ipip close: can't deregister tunnel\n");
921 
922 	rtnl_lock();
923 	ipip_destroy_tunnels();
924 	unregister_netdevice(ipip_fb_tunnel_dev);
925 	rtnl_unlock();
926 }
927 
928 module_init(ipip_init);
929 module_exit(ipip_fini);
930 MODULE_LICENSE("GPL");
931