xref: /linux/net/ipv4/ipip.c (revision cb299ba8b5ef2239429484072fea394cd7581bd7)
1 /*
2  *	Linux NET3:	IP/IP protocol decoder.
3  *
4  *	Authors:
5  *		Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
6  *
7  *	Fixes:
8  *		Alan Cox	:	Merged and made usable non modular (its so tiny its silly as
9  *					a module taking up 2 pages).
10  *		Alan Cox	: 	Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11  *					to keep ip_forward happy.
12  *		Alan Cox	:	More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13  *		Kai Schulte	:	Fixed #defines for IP_FIREWALL->FIREWALL
14  *              David Woodhouse :       Perform some basic ICMP handling.
15  *                                      IPIP Routing without decapsulation.
16  *              Carlos Picoto   :       GRE over IP support
17  *		Alexey Kuznetsov:	Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18  *					I do not want to merge them together.
19  *
20  *	This program is free software; you can redistribute it and/or
21  *	modify it under the terms of the GNU General Public License
22  *	as published by the Free Software Foundation; either version
23  *	2 of the License, or (at your option) any later version.
24  *
25  */
26 
27 /* tunnel.c: an IP tunnel driver
28 
29 	The purpose of this driver is to provide an IP tunnel through
30 	which you can tunnel network traffic transparently across subnets.
31 
32 	This was written by looking at Nick Holloway's dummy driver
33 	Thanks for the great code!
34 
35 		-Sam Lantinga	(slouken@cs.ucdavis.edu)  02/01/95
36 
37 	Minor tweaks:
38 		Cleaned up the code a little and added some pre-1.3.0 tweaks.
39 		dev->hard_header/hard_header_len changed to use no headers.
40 		Comments/bracketing tweaked.
41 		Made the tunnels use dev->name not tunnel: when error reporting.
42 		Added tx_dropped stat
43 
44 		-Alan Cox	(alan@lxorguk.ukuu.org.uk) 21 March 95
45 
46 	Reworked:
47 		Changed to tunnel to destination gateway in addition to the
48 			tunnel's pointopoint address
49 		Almost completely rewritten
50 		Note:  There is currently no firewall or ICMP handling done.
51 
52 		-Sam Lantinga	(slouken@cs.ucdavis.edu) 02/13/96
53 
54 */
55 
56 /* Things I wish I had known when writing the tunnel driver:
57 
58 	When the tunnel_xmit() function is called, the skb contains the
59 	packet to be sent (plus a great deal of extra info), and dev
60 	contains the tunnel device that _we_ are.
61 
62 	When we are passed a packet, we are expected to fill in the
63 	source address with our source IP address.
64 
65 	What is the proper way to allocate, copy and free a buffer?
66 	After you allocate it, it is a "0 length" chunk of memory
67 	starting at zero.  If you want to add headers to the buffer
68 	later, you'll have to call "skb_reserve(skb, amount)" with
69 	the amount of memory you want reserved.  Then, you call
70 	"skb_put(skb, amount)" with the amount of space you want in
71 	the buffer.  skb_put() returns a pointer to the top (#0) of
72 	that buffer.  skb->len is set to the amount of space you have
73 	"allocated" with skb_put().  You can then write up to skb->len
74 	bytes to that buffer.  If you need more, you can call skb_put()
75 	again with the additional amount of space you need.  You can
76 	find out how much more space you can allocate by calling
77 	"skb_tailroom(skb)".
78 	Now, to add header space, call "skb_push(skb, header_len)".
79 	This creates space at the beginning of the buffer and returns
80 	a pointer to this new space.  If later you need to strip a
81 	header from a buffer, call "skb_pull(skb, header_len)".
82 	skb_headroom() will return how much space is left at the top
83 	of the buffer (before the main data).  Remember, this headroom
84 	space must be reserved before the skb_put() function is called.
85 	*/
86 
87 /*
88    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89 
90    For comments look at net/ipv4/ip_gre.c --ANK
91  */
92 
93 
94 #include <linux/capability.h>
95 #include <linux/module.h>
96 #include <linux/types.h>
97 #include <linux/kernel.h>
98 #include <linux/slab.h>
99 #include <asm/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <linux/in.h>
103 #include <linux/tcp.h>
104 #include <linux/udp.h>
105 #include <linux/if_arp.h>
106 #include <linux/mroute.h>
107 #include <linux/init.h>
108 #include <linux/netfilter_ipv4.h>
109 #include <linux/if_ether.h>
110 
111 #include <net/sock.h>
112 #include <net/ip.h>
113 #include <net/icmp.h>
114 #include <net/ipip.h>
115 #include <net/inet_ecn.h>
116 #include <net/xfrm.h>
117 #include <net/net_namespace.h>
118 #include <net/netns/generic.h>
119 
120 #define HASH_SIZE  16
121 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
122 
123 static int ipip_net_id __read_mostly;
124 struct ipip_net {
125 	struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
126 	struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
127 	struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
128 	struct ip_tunnel __rcu *tunnels_wc[1];
129 	struct ip_tunnel __rcu **tunnels[4];
130 
131 	struct net_device *fb_tunnel_dev;
132 };
133 
134 static int ipip_tunnel_init(struct net_device *dev);
135 static void ipip_tunnel_setup(struct net_device *dev);
136 static void ipip_dev_free(struct net_device *dev);
137 
138 /*
139  * Locking : hash tables are protected by RCU and RTNL
140  */
141 
142 #define for_each_ip_tunnel_rcu(start) \
143 	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
144 
145 /* often modified stats are per cpu, other are shared (netdev->stats) */
146 struct pcpu_tstats {
147 	unsigned long	rx_packets;
148 	unsigned long	rx_bytes;
149 	unsigned long	tx_packets;
150 	unsigned long	tx_bytes;
151 };
152 
153 static struct net_device_stats *ipip_get_stats(struct net_device *dev)
154 {
155 	struct pcpu_tstats sum = { 0 };
156 	int i;
157 
158 	for_each_possible_cpu(i) {
159 		const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
160 
161 		sum.rx_packets += tstats->rx_packets;
162 		sum.rx_bytes   += tstats->rx_bytes;
163 		sum.tx_packets += tstats->tx_packets;
164 		sum.tx_bytes   += tstats->tx_bytes;
165 	}
166 	dev->stats.rx_packets = sum.rx_packets;
167 	dev->stats.rx_bytes   = sum.rx_bytes;
168 	dev->stats.tx_packets = sum.tx_packets;
169 	dev->stats.tx_bytes   = sum.tx_bytes;
170 	return &dev->stats;
171 }
172 
173 static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
174 		__be32 remote, __be32 local)
175 {
176 	unsigned int h0 = HASH(remote);
177 	unsigned int h1 = HASH(local);
178 	struct ip_tunnel *t;
179 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
180 
181 	for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
182 		if (local == t->parms.iph.saddr &&
183 		    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
184 			return t;
185 
186 	for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
187 		if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
188 			return t;
189 
190 	for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
191 		if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
192 			return t;
193 
194 	t = rcu_dereference(ipn->tunnels_wc[0]);
195 	if (t && (t->dev->flags&IFF_UP))
196 		return t;
197 	return NULL;
198 }
199 
200 static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
201 		struct ip_tunnel_parm *parms)
202 {
203 	__be32 remote = parms->iph.daddr;
204 	__be32 local = parms->iph.saddr;
205 	unsigned int h = 0;
206 	int prio = 0;
207 
208 	if (remote) {
209 		prio |= 2;
210 		h ^= HASH(remote);
211 	}
212 	if (local) {
213 		prio |= 1;
214 		h ^= HASH(local);
215 	}
216 	return &ipn->tunnels[prio][h];
217 }
218 
219 static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
220 		struct ip_tunnel *t)
221 {
222 	return __ipip_bucket(ipn, &t->parms);
223 }
224 
225 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
226 {
227 	struct ip_tunnel __rcu **tp;
228 	struct ip_tunnel *iter;
229 
230 	for (tp = ipip_bucket(ipn, t);
231 	     (iter = rtnl_dereference(*tp)) != NULL;
232 	     tp = &iter->next) {
233 		if (t == iter) {
234 			rcu_assign_pointer(*tp, t->next);
235 			break;
236 		}
237 	}
238 }
239 
240 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
241 {
242 	struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
243 
244 	rcu_assign_pointer(t->next, rtnl_dereference(*tp));
245 	rcu_assign_pointer(*tp, t);
246 }
247 
248 static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
249 		struct ip_tunnel_parm *parms, int create)
250 {
251 	__be32 remote = parms->iph.daddr;
252 	__be32 local = parms->iph.saddr;
253 	struct ip_tunnel *t, *nt;
254 	struct ip_tunnel __rcu **tp;
255 	struct net_device *dev;
256 	char name[IFNAMSIZ];
257 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
258 
259 	for (tp = __ipip_bucket(ipn, parms);
260 		 (t = rtnl_dereference(*tp)) != NULL;
261 		 tp = &t->next) {
262 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
263 			return t;
264 	}
265 	if (!create)
266 		return NULL;
267 
268 	if (parms->name[0])
269 		strlcpy(name, parms->name, IFNAMSIZ);
270 	else
271 		strcpy(name, "tunl%d");
272 
273 	dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
274 	if (dev == NULL)
275 		return NULL;
276 
277 	dev_net_set(dev, net);
278 
279 	if (strchr(name, '%')) {
280 		if (dev_alloc_name(dev, name) < 0)
281 			goto failed_free;
282 	}
283 
284 	nt = netdev_priv(dev);
285 	nt->parms = *parms;
286 
287 	if (ipip_tunnel_init(dev) < 0)
288 		goto failed_free;
289 
290 	if (register_netdevice(dev) < 0)
291 		goto failed_free;
292 
293 	dev_hold(dev);
294 	ipip_tunnel_link(ipn, nt);
295 	return nt;
296 
297 failed_free:
298 	ipip_dev_free(dev);
299 	return NULL;
300 }
301 
302 /* called with RTNL */
303 static void ipip_tunnel_uninit(struct net_device *dev)
304 {
305 	struct net *net = dev_net(dev);
306 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
307 
308 	if (dev == ipn->fb_tunnel_dev)
309 		rcu_assign_pointer(ipn->tunnels_wc[0], NULL);
310 	else
311 		ipip_tunnel_unlink(ipn, netdev_priv(dev));
312 	dev_put(dev);
313 }
314 
315 static int ipip_err(struct sk_buff *skb, u32 info)
316 {
317 
318 /* All the routers (except for Linux) return only
319    8 bytes of packet payload. It means, that precise relaying of
320    ICMP in the real Internet is absolutely infeasible.
321  */
322 	struct iphdr *iph = (struct iphdr *)skb->data;
323 	const int type = icmp_hdr(skb)->type;
324 	const int code = icmp_hdr(skb)->code;
325 	struct ip_tunnel *t;
326 	int err;
327 
328 	switch (type) {
329 	default:
330 	case ICMP_PARAMETERPROB:
331 		return 0;
332 
333 	case ICMP_DEST_UNREACH:
334 		switch (code) {
335 		case ICMP_SR_FAILED:
336 		case ICMP_PORT_UNREACH:
337 			/* Impossible event. */
338 			return 0;
339 		case ICMP_FRAG_NEEDED:
340 			/* Soft state for pmtu is maintained by IP core. */
341 			return 0;
342 		default:
343 			/* All others are translated to HOST_UNREACH.
344 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
345 			   I believe they are just ether pollution. --ANK
346 			 */
347 			break;
348 		}
349 		break;
350 	case ICMP_TIME_EXCEEDED:
351 		if (code != ICMP_EXC_TTL)
352 			return 0;
353 		break;
354 	}
355 
356 	err = -ENOENT;
357 
358 	rcu_read_lock();
359 	t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
360 	if (t == NULL || t->parms.iph.daddr == 0)
361 		goto out;
362 
363 	err = 0;
364 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
365 		goto out;
366 
367 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
368 		t->err_count++;
369 	else
370 		t->err_count = 1;
371 	t->err_time = jiffies;
372 out:
373 	rcu_read_unlock();
374 	return err;
375 }
376 
377 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
378 					struct sk_buff *skb)
379 {
380 	struct iphdr *inner_iph = ip_hdr(skb);
381 
382 	if (INET_ECN_is_ce(outer_iph->tos))
383 		IP_ECN_set_ce(inner_iph);
384 }
385 
386 static int ipip_rcv(struct sk_buff *skb)
387 {
388 	struct ip_tunnel *tunnel;
389 	const struct iphdr *iph = ip_hdr(skb);
390 
391 	rcu_read_lock();
392 	tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
393 	if (tunnel != NULL) {
394 		struct pcpu_tstats *tstats;
395 
396 		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
397 			rcu_read_unlock();
398 			kfree_skb(skb);
399 			return 0;
400 		}
401 
402 		secpath_reset(skb);
403 
404 		skb->mac_header = skb->network_header;
405 		skb_reset_network_header(skb);
406 		skb->protocol = htons(ETH_P_IP);
407 		skb->pkt_type = PACKET_HOST;
408 
409 		tstats = this_cpu_ptr(tunnel->dev->tstats);
410 		tstats->rx_packets++;
411 		tstats->rx_bytes += skb->len;
412 
413 		__skb_tunnel_rx(skb, tunnel->dev);
414 
415 		ipip_ecn_decapsulate(iph, skb);
416 
417 		netif_rx(skb);
418 
419 		rcu_read_unlock();
420 		return 0;
421 	}
422 	rcu_read_unlock();
423 
424 	return -1;
425 }
426 
427 /*
428  *	This function assumes it is being called from dev_queue_xmit()
429  *	and that skb is filled properly by that function.
430  */
431 
432 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
433 {
434 	struct ip_tunnel *tunnel = netdev_priv(dev);
435 	struct pcpu_tstats *tstats;
436 	struct iphdr  *tiph = &tunnel->parms.iph;
437 	u8     tos = tunnel->parms.iph.tos;
438 	__be16 df = tiph->frag_off;
439 	struct rtable *rt;     			/* Route to the other host */
440 	struct net_device *tdev;		/* Device to other host */
441 	struct iphdr  *old_iph = ip_hdr(skb);
442 	struct iphdr  *iph;			/* Our new IP header */
443 	unsigned int max_headroom;		/* The extra header space needed */
444 	__be32 dst = tiph->daddr;
445 	int    mtu;
446 
447 	if (skb->protocol != htons(ETH_P_IP))
448 		goto tx_error;
449 
450 	if (tos & 1)
451 		tos = old_iph->tos;
452 
453 	if (!dst) {
454 		/* NBMA tunnel */
455 		if ((rt = skb_rtable(skb)) == NULL) {
456 			dev->stats.tx_fifo_errors++;
457 			goto tx_error;
458 		}
459 		if ((dst = rt->rt_gateway) == 0)
460 			goto tx_error_icmp;
461 	}
462 
463 	{
464 		struct flowi fl = {
465 			.oif = tunnel->parms.link,
466 			.nl_u = {
467 				.ip4_u = {
468 					.daddr = dst,
469 					.saddr = tiph->saddr,
470 					.tos = RT_TOS(tos)
471 				}
472 			},
473 			.proto = IPPROTO_IPIP
474 		};
475 
476 		if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
477 			dev->stats.tx_carrier_errors++;
478 			goto tx_error_icmp;
479 		}
480 	}
481 	tdev = rt->dst.dev;
482 
483 	if (tdev == dev) {
484 		ip_rt_put(rt);
485 		dev->stats.collisions++;
486 		goto tx_error;
487 	}
488 
489 	df |= old_iph->frag_off & htons(IP_DF);
490 
491 	if (df) {
492 		mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
493 
494 		if (mtu < 68) {
495 			dev->stats.collisions++;
496 			ip_rt_put(rt);
497 			goto tx_error;
498 		}
499 
500 		if (skb_dst(skb))
501 			skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
502 
503 		if ((old_iph->frag_off & htons(IP_DF)) &&
504 		    mtu < ntohs(old_iph->tot_len)) {
505 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
506 				  htonl(mtu));
507 			ip_rt_put(rt);
508 			goto tx_error;
509 		}
510 	}
511 
512 	if (tunnel->err_count > 0) {
513 		if (time_before(jiffies,
514 				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
515 			tunnel->err_count--;
516 			dst_link_failure(skb);
517 		} else
518 			tunnel->err_count = 0;
519 	}
520 
521 	/*
522 	 * Okay, now see if we can stuff it in the buffer as-is.
523 	 */
524 	max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
525 
526 	if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
527 	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
528 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
529 		if (!new_skb) {
530 			ip_rt_put(rt);
531 			dev->stats.tx_dropped++;
532 			dev_kfree_skb(skb);
533 			return NETDEV_TX_OK;
534 		}
535 		if (skb->sk)
536 			skb_set_owner_w(new_skb, skb->sk);
537 		dev_kfree_skb(skb);
538 		skb = new_skb;
539 		old_iph = ip_hdr(skb);
540 	}
541 
542 	skb->transport_header = skb->network_header;
543 	skb_push(skb, sizeof(struct iphdr));
544 	skb_reset_network_header(skb);
545 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
546 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
547 			      IPSKB_REROUTED);
548 	skb_dst_drop(skb);
549 	skb_dst_set(skb, &rt->dst);
550 
551 	/*
552 	 *	Push down and install the IPIP header.
553 	 */
554 
555 	iph 			=	ip_hdr(skb);
556 	iph->version		=	4;
557 	iph->ihl		=	sizeof(struct iphdr)>>2;
558 	iph->frag_off		=	df;
559 	iph->protocol		=	IPPROTO_IPIP;
560 	iph->tos		=	INET_ECN_encapsulate(tos, old_iph->tos);
561 	iph->daddr		=	rt->rt_dst;
562 	iph->saddr		=	rt->rt_src;
563 
564 	if ((iph->ttl = tiph->ttl) == 0)
565 		iph->ttl	=	old_iph->ttl;
566 
567 	nf_reset(skb);
568 	tstats = this_cpu_ptr(dev->tstats);
569 	__IPTUNNEL_XMIT(tstats, &dev->stats);
570 	return NETDEV_TX_OK;
571 
572 tx_error_icmp:
573 	dst_link_failure(skb);
574 tx_error:
575 	dev->stats.tx_errors++;
576 	dev_kfree_skb(skb);
577 	return NETDEV_TX_OK;
578 }
579 
580 static void ipip_tunnel_bind_dev(struct net_device *dev)
581 {
582 	struct net_device *tdev = NULL;
583 	struct ip_tunnel *tunnel;
584 	struct iphdr *iph;
585 
586 	tunnel = netdev_priv(dev);
587 	iph = &tunnel->parms.iph;
588 
589 	if (iph->daddr) {
590 		struct flowi fl = {
591 			.oif = tunnel->parms.link,
592 			.nl_u = {
593 				.ip4_u = {
594 					.daddr = iph->daddr,
595 					.saddr = iph->saddr,
596 					.tos = RT_TOS(iph->tos)
597 				}
598 			},
599 			.proto = IPPROTO_IPIP
600 		};
601 		struct rtable *rt;
602 
603 		if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
604 			tdev = rt->dst.dev;
605 			ip_rt_put(rt);
606 		}
607 		dev->flags |= IFF_POINTOPOINT;
608 	}
609 
610 	if (!tdev && tunnel->parms.link)
611 		tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
612 
613 	if (tdev) {
614 		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
615 		dev->mtu = tdev->mtu - sizeof(struct iphdr);
616 	}
617 	dev->iflink = tunnel->parms.link;
618 }
619 
620 static int
621 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
622 {
623 	int err = 0;
624 	struct ip_tunnel_parm p;
625 	struct ip_tunnel *t;
626 	struct net *net = dev_net(dev);
627 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
628 
629 	switch (cmd) {
630 	case SIOCGETTUNNEL:
631 		t = NULL;
632 		if (dev == ipn->fb_tunnel_dev) {
633 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
634 				err = -EFAULT;
635 				break;
636 			}
637 			t = ipip_tunnel_locate(net, &p, 0);
638 		}
639 		if (t == NULL)
640 			t = netdev_priv(dev);
641 		memcpy(&p, &t->parms, sizeof(p));
642 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
643 			err = -EFAULT;
644 		break;
645 
646 	case SIOCADDTUNNEL:
647 	case SIOCCHGTUNNEL:
648 		err = -EPERM;
649 		if (!capable(CAP_NET_ADMIN))
650 			goto done;
651 
652 		err = -EFAULT;
653 		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
654 			goto done;
655 
656 		err = -EINVAL;
657 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
658 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
659 			goto done;
660 		if (p.iph.ttl)
661 			p.iph.frag_off |= htons(IP_DF);
662 
663 		t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
664 
665 		if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
666 			if (t != NULL) {
667 				if (t->dev != dev) {
668 					err = -EEXIST;
669 					break;
670 				}
671 			} else {
672 				if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
673 				    (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
674 					err = -EINVAL;
675 					break;
676 				}
677 				t = netdev_priv(dev);
678 				ipip_tunnel_unlink(ipn, t);
679 				t->parms.iph.saddr = p.iph.saddr;
680 				t->parms.iph.daddr = p.iph.daddr;
681 				memcpy(dev->dev_addr, &p.iph.saddr, 4);
682 				memcpy(dev->broadcast, &p.iph.daddr, 4);
683 				ipip_tunnel_link(ipn, t);
684 				netdev_state_change(dev);
685 			}
686 		}
687 
688 		if (t) {
689 			err = 0;
690 			if (cmd == SIOCCHGTUNNEL) {
691 				t->parms.iph.ttl = p.iph.ttl;
692 				t->parms.iph.tos = p.iph.tos;
693 				t->parms.iph.frag_off = p.iph.frag_off;
694 				if (t->parms.link != p.link) {
695 					t->parms.link = p.link;
696 					ipip_tunnel_bind_dev(dev);
697 					netdev_state_change(dev);
698 				}
699 			}
700 			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
701 				err = -EFAULT;
702 		} else
703 			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
704 		break;
705 
706 	case SIOCDELTUNNEL:
707 		err = -EPERM;
708 		if (!capable(CAP_NET_ADMIN))
709 			goto done;
710 
711 		if (dev == ipn->fb_tunnel_dev) {
712 			err = -EFAULT;
713 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
714 				goto done;
715 			err = -ENOENT;
716 			if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
717 				goto done;
718 			err = -EPERM;
719 			if (t->dev == ipn->fb_tunnel_dev)
720 				goto done;
721 			dev = t->dev;
722 		}
723 		unregister_netdevice(dev);
724 		err = 0;
725 		break;
726 
727 	default:
728 		err = -EINVAL;
729 	}
730 
731 done:
732 	return err;
733 }
734 
735 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
736 {
737 	if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
738 		return -EINVAL;
739 	dev->mtu = new_mtu;
740 	return 0;
741 }
742 
743 static const struct net_device_ops ipip_netdev_ops = {
744 	.ndo_uninit	= ipip_tunnel_uninit,
745 	.ndo_start_xmit	= ipip_tunnel_xmit,
746 	.ndo_do_ioctl	= ipip_tunnel_ioctl,
747 	.ndo_change_mtu	= ipip_tunnel_change_mtu,
748 	.ndo_get_stats  = ipip_get_stats,
749 };
750 
751 static void ipip_dev_free(struct net_device *dev)
752 {
753 	free_percpu(dev->tstats);
754 	free_netdev(dev);
755 }
756 
757 static void ipip_tunnel_setup(struct net_device *dev)
758 {
759 	dev->netdev_ops		= &ipip_netdev_ops;
760 	dev->destructor		= ipip_dev_free;
761 
762 	dev->type		= ARPHRD_TUNNEL;
763 	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr);
764 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr);
765 	dev->flags		= IFF_NOARP;
766 	dev->iflink		= 0;
767 	dev->addr_len		= 4;
768 	dev->features		|= NETIF_F_NETNS_LOCAL;
769 	dev->features		|= NETIF_F_LLTX;
770 	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
771 }
772 
773 static int ipip_tunnel_init(struct net_device *dev)
774 {
775 	struct ip_tunnel *tunnel = netdev_priv(dev);
776 
777 	tunnel->dev = dev;
778 	strcpy(tunnel->parms.name, dev->name);
779 
780 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
781 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
782 
783 	ipip_tunnel_bind_dev(dev);
784 
785 	dev->tstats = alloc_percpu(struct pcpu_tstats);
786 	if (!dev->tstats)
787 		return -ENOMEM;
788 
789 	return 0;
790 }
791 
792 static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
793 {
794 	struct ip_tunnel *tunnel = netdev_priv(dev);
795 	struct iphdr *iph = &tunnel->parms.iph;
796 	struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
797 
798 	tunnel->dev = dev;
799 	strcpy(tunnel->parms.name, dev->name);
800 
801 	iph->version		= 4;
802 	iph->protocol		= IPPROTO_IPIP;
803 	iph->ihl		= 5;
804 
805 	dev->tstats = alloc_percpu(struct pcpu_tstats);
806 	if (!dev->tstats)
807 		return -ENOMEM;
808 
809 	dev_hold(dev);
810 	rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
811 	return 0;
812 }
813 
814 static struct xfrm_tunnel ipip_handler __read_mostly = {
815 	.handler	=	ipip_rcv,
816 	.err_handler	=	ipip_err,
817 	.priority	=	1,
818 };
819 
820 static const char banner[] __initconst =
821 	KERN_INFO "IPv4 over IPv4 tunneling driver\n";
822 
823 static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
824 {
825 	int prio;
826 
827 	for (prio = 1; prio < 4; prio++) {
828 		int h;
829 		for (h = 0; h < HASH_SIZE; h++) {
830 			struct ip_tunnel *t;
831 
832 			t = rtnl_dereference(ipn->tunnels[prio][h]);
833 			while (t != NULL) {
834 				unregister_netdevice_queue(t->dev, head);
835 				t = rtnl_dereference(t->next);
836 			}
837 		}
838 	}
839 }
840 
841 static int __net_init ipip_init_net(struct net *net)
842 {
843 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
844 	int err;
845 
846 	ipn->tunnels[0] = ipn->tunnels_wc;
847 	ipn->tunnels[1] = ipn->tunnels_l;
848 	ipn->tunnels[2] = ipn->tunnels_r;
849 	ipn->tunnels[3] = ipn->tunnels_r_l;
850 
851 	ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
852 					   "tunl0",
853 					   ipip_tunnel_setup);
854 	if (!ipn->fb_tunnel_dev) {
855 		err = -ENOMEM;
856 		goto err_alloc_dev;
857 	}
858 	dev_net_set(ipn->fb_tunnel_dev, net);
859 
860 	err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
861 	if (err)
862 		goto err_reg_dev;
863 
864 	if ((err = register_netdev(ipn->fb_tunnel_dev)))
865 		goto err_reg_dev;
866 
867 	return 0;
868 
869 err_reg_dev:
870 	ipip_dev_free(ipn->fb_tunnel_dev);
871 err_alloc_dev:
872 	/* nothing */
873 	return err;
874 }
875 
876 static void __net_exit ipip_exit_net(struct net *net)
877 {
878 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
879 	LIST_HEAD(list);
880 
881 	rtnl_lock();
882 	ipip_destroy_tunnels(ipn, &list);
883 	unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
884 	unregister_netdevice_many(&list);
885 	rtnl_unlock();
886 }
887 
888 static struct pernet_operations ipip_net_ops = {
889 	.init = ipip_init_net,
890 	.exit = ipip_exit_net,
891 	.id   = &ipip_net_id,
892 	.size = sizeof(struct ipip_net),
893 };
894 
895 static int __init ipip_init(void)
896 {
897 	int err;
898 
899 	printk(banner);
900 
901 	err = register_pernet_device(&ipip_net_ops);
902 	if (err < 0)
903 		return err;
904 	err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
905 	if (err < 0) {
906 		unregister_pernet_device(&ipip_net_ops);
907 		printk(KERN_INFO "ipip init: can't register tunnel\n");
908 	}
909 	return err;
910 }
911 
912 static void __exit ipip_fini(void)
913 {
914 	if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
915 		printk(KERN_INFO "ipip close: can't deregister tunnel\n");
916 
917 	unregister_pernet_device(&ipip_net_ops);
918 }
919 
920 module_init(ipip_init);
921 module_exit(ipip_fini);
922 MODULE_LICENSE("GPL");
923