xref: /linux/net/ipv4/ipip.c (revision f49f4ab95c301dbccad0efe85296d908b8ae7ad4)
1 /*
2  *	Linux NET3:	IP/IP protocol decoder.
3  *
4  *	Authors:
5  *		Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
6  *
7  *	Fixes:
8  *		Alan Cox	:	Merged and made usable non modular (its so tiny its silly as
9  *					a module taking up 2 pages).
10  *		Alan Cox	: 	Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11  *					to keep ip_forward happy.
12  *		Alan Cox	:	More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13  *		Kai Schulte	:	Fixed #defines for IP_FIREWALL->FIREWALL
14  *              David Woodhouse :       Perform some basic ICMP handling.
15  *                                      IPIP Routing without decapsulation.
16  *              Carlos Picoto   :       GRE over IP support
17  *		Alexey Kuznetsov:	Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18  *					I do not want to merge them together.
19  *
20  *	This program is free software; you can redistribute it and/or
21  *	modify it under the terms of the GNU General Public License
22  *	as published by the Free Software Foundation; either version
23  *	2 of the License, or (at your option) any later version.
24  *
25  */
26 
27 /* tunnel.c: an IP tunnel driver
28 
29 	The purpose of this driver is to provide an IP tunnel through
30 	which you can tunnel network traffic transparently across subnets.
31 
32 	This was written by looking at Nick Holloway's dummy driver
33 	Thanks for the great code!
34 
35 		-Sam Lantinga	(slouken@cs.ucdavis.edu)  02/01/95
36 
37 	Minor tweaks:
38 		Cleaned up the code a little and added some pre-1.3.0 tweaks.
39 		dev->hard_header/hard_header_len changed to use no headers.
40 		Comments/bracketing tweaked.
41 		Made the tunnels use dev->name not tunnel: when error reporting.
42 		Added tx_dropped stat
43 
44 		-Alan Cox	(alan@lxorguk.ukuu.org.uk) 21 March 95
45 
46 	Reworked:
47 		Changed to tunnel to destination gateway in addition to the
48 			tunnel's pointopoint address
49 		Almost completely rewritten
50 		Note:  There is currently no firewall or ICMP handling done.
51 
52 		-Sam Lantinga	(slouken@cs.ucdavis.edu) 02/13/96
53 
54 */
55 
56 /* Things I wish I had known when writing the tunnel driver:
57 
58 	When the tunnel_xmit() function is called, the skb contains the
59 	packet to be sent (plus a great deal of extra info), and dev
60 	contains the tunnel device that _we_ are.
61 
62 	When we are passed a packet, we are expected to fill in the
63 	source address with our source IP address.
64 
65 	What is the proper way to allocate, copy and free a buffer?
66 	After you allocate it, it is a "0 length" chunk of memory
67 	starting at zero.  If you want to add headers to the buffer
68 	later, you'll have to call "skb_reserve(skb, amount)" with
69 	the amount of memory you want reserved.  Then, you call
70 	"skb_put(skb, amount)" with the amount of space you want in
71 	the buffer.  skb_put() returns a pointer to the top (#0) of
72 	that buffer.  skb->len is set to the amount of space you have
73 	"allocated" with skb_put().  You can then write up to skb->len
74 	bytes to that buffer.  If you need more, you can call skb_put()
75 	again with the additional amount of space you need.  You can
76 	find out how much more space you can allocate by calling
77 	"skb_tailroom(skb)".
78 	Now, to add header space, call "skb_push(skb, header_len)".
79 	This creates space at the beginning of the buffer and returns
80 	a pointer to this new space.  If later you need to strip a
81 	header from a buffer, call "skb_pull(skb, header_len)".
82 	skb_headroom() will return how much space is left at the top
83 	of the buffer (before the main data).  Remember, this headroom
84 	space must be reserved before the skb_put() function is called.
85 	*/
86 
87 /*
88    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89 
90    For comments look at net/ipv4/ip_gre.c --ANK
91  */
92 
93 
94 #include <linux/capability.h>
95 #include <linux/module.h>
96 #include <linux/types.h>
97 #include <linux/kernel.h>
98 #include <linux/slab.h>
99 #include <asm/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <linux/in.h>
103 #include <linux/tcp.h>
104 #include <linux/udp.h>
105 #include <linux/if_arp.h>
106 #include <linux/mroute.h>
107 #include <linux/init.h>
108 #include <linux/netfilter_ipv4.h>
109 #include <linux/if_ether.h>
110 
111 #include <net/sock.h>
112 #include <net/ip.h>
113 #include <net/icmp.h>
114 #include <net/ipip.h>
115 #include <net/inet_ecn.h>
116 #include <net/xfrm.h>
117 #include <net/net_namespace.h>
118 #include <net/netns/generic.h>
119 
120 #define HASH_SIZE  16
121 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
122 
123 static bool log_ecn_error = true;
124 module_param(log_ecn_error, bool, 0644);
125 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
126 
127 static int ipip_net_id __read_mostly;
128 struct ipip_net {
129 	struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
130 	struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
131 	struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
132 	struct ip_tunnel __rcu *tunnels_wc[1];
133 	struct ip_tunnel __rcu **tunnels[4];
134 
135 	struct net_device *fb_tunnel_dev;
136 };
137 
138 static int ipip_tunnel_init(struct net_device *dev);
139 static void ipip_tunnel_setup(struct net_device *dev);
140 static void ipip_dev_free(struct net_device *dev);
141 
142 /*
143  * Locking : hash tables are protected by RCU and RTNL
144  */
145 
146 #define for_each_ip_tunnel_rcu(start) \
147 	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
148 
149 /* often modified stats are per cpu, other are shared (netdev->stats) */
150 struct pcpu_tstats {
151 	u64	rx_packets;
152 	u64	rx_bytes;
153 	u64	tx_packets;
154 	u64	tx_bytes;
155 	struct u64_stats_sync	syncp;
156 };
157 
158 static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
159 						  struct rtnl_link_stats64 *tot)
160 {
161 	int i;
162 
163 	for_each_possible_cpu(i) {
164 		const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
165 		u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
166 		unsigned int start;
167 
168 		do {
169 			start = u64_stats_fetch_begin_bh(&tstats->syncp);
170 			rx_packets = tstats->rx_packets;
171 			tx_packets = tstats->tx_packets;
172 			rx_bytes = tstats->rx_bytes;
173 			tx_bytes = tstats->tx_bytes;
174 		} while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
175 
176 		tot->rx_packets += rx_packets;
177 		tot->tx_packets += tx_packets;
178 		tot->rx_bytes   += rx_bytes;
179 		tot->tx_bytes   += tx_bytes;
180 	}
181 
182 	tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
183 	tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
184 	tot->tx_dropped = dev->stats.tx_dropped;
185 	tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
186 	tot->tx_errors = dev->stats.tx_errors;
187 	tot->collisions = dev->stats.collisions;
188 
189 	return tot;
190 }
191 
192 static struct ip_tunnel *ipip_tunnel_lookup(struct net *net,
193 		__be32 remote, __be32 local)
194 {
195 	unsigned int h0 = HASH(remote);
196 	unsigned int h1 = HASH(local);
197 	struct ip_tunnel *t;
198 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
199 
200 	for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
201 		if (local == t->parms.iph.saddr &&
202 		    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
203 			return t;
204 
205 	for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
206 		if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
207 			return t;
208 
209 	for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
210 		if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
211 			return t;
212 
213 	t = rcu_dereference(ipn->tunnels_wc[0]);
214 	if (t && (t->dev->flags&IFF_UP))
215 		return t;
216 	return NULL;
217 }
218 
219 static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
220 		struct ip_tunnel_parm *parms)
221 {
222 	__be32 remote = parms->iph.daddr;
223 	__be32 local = parms->iph.saddr;
224 	unsigned int h = 0;
225 	int prio = 0;
226 
227 	if (remote) {
228 		prio |= 2;
229 		h ^= HASH(remote);
230 	}
231 	if (local) {
232 		prio |= 1;
233 		h ^= HASH(local);
234 	}
235 	return &ipn->tunnels[prio][h];
236 }
237 
238 static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
239 		struct ip_tunnel *t)
240 {
241 	return __ipip_bucket(ipn, &t->parms);
242 }
243 
244 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
245 {
246 	struct ip_tunnel __rcu **tp;
247 	struct ip_tunnel *iter;
248 
249 	for (tp = ipip_bucket(ipn, t);
250 	     (iter = rtnl_dereference(*tp)) != NULL;
251 	     tp = &iter->next) {
252 		if (t == iter) {
253 			rcu_assign_pointer(*tp, t->next);
254 			break;
255 		}
256 	}
257 }
258 
259 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
260 {
261 	struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
262 
263 	rcu_assign_pointer(t->next, rtnl_dereference(*tp));
264 	rcu_assign_pointer(*tp, t);
265 }
266 
267 static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
268 		struct ip_tunnel_parm *parms, int create)
269 {
270 	__be32 remote = parms->iph.daddr;
271 	__be32 local = parms->iph.saddr;
272 	struct ip_tunnel *t, *nt;
273 	struct ip_tunnel __rcu **tp;
274 	struct net_device *dev;
275 	char name[IFNAMSIZ];
276 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
277 
278 	for (tp = __ipip_bucket(ipn, parms);
279 		 (t = rtnl_dereference(*tp)) != NULL;
280 		 tp = &t->next) {
281 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
282 			return t;
283 	}
284 	if (!create)
285 		return NULL;
286 
287 	if (parms->name[0])
288 		strlcpy(name, parms->name, IFNAMSIZ);
289 	else
290 		strcpy(name, "tunl%d");
291 
292 	dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
293 	if (dev == NULL)
294 		return NULL;
295 
296 	dev_net_set(dev, net);
297 
298 	nt = netdev_priv(dev);
299 	nt->parms = *parms;
300 
301 	if (ipip_tunnel_init(dev) < 0)
302 		goto failed_free;
303 
304 	if (register_netdevice(dev) < 0)
305 		goto failed_free;
306 
307 	strcpy(nt->parms.name, dev->name);
308 
309 	dev_hold(dev);
310 	ipip_tunnel_link(ipn, nt);
311 	return nt;
312 
313 failed_free:
314 	ipip_dev_free(dev);
315 	return NULL;
316 }
317 
318 /* called with RTNL */
319 static void ipip_tunnel_uninit(struct net_device *dev)
320 {
321 	struct net *net = dev_net(dev);
322 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
323 
324 	if (dev == ipn->fb_tunnel_dev)
325 		RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL);
326 	else
327 		ipip_tunnel_unlink(ipn, netdev_priv(dev));
328 	dev_put(dev);
329 }
330 
331 static int ipip_err(struct sk_buff *skb, u32 info)
332 {
333 
334 /* All the routers (except for Linux) return only
335    8 bytes of packet payload. It means, that precise relaying of
336    ICMP in the real Internet is absolutely infeasible.
337  */
338 	const struct iphdr *iph = (const struct iphdr *)skb->data;
339 	const int type = icmp_hdr(skb)->type;
340 	const int code = icmp_hdr(skb)->code;
341 	struct ip_tunnel *t;
342 	int err;
343 
344 	switch (type) {
345 	default:
346 	case ICMP_PARAMETERPROB:
347 		return 0;
348 
349 	case ICMP_DEST_UNREACH:
350 		switch (code) {
351 		case ICMP_SR_FAILED:
352 		case ICMP_PORT_UNREACH:
353 			/* Impossible event. */
354 			return 0;
355 		default:
356 			/* All others are translated to HOST_UNREACH.
357 			   rfc2003 contains "deep thoughts" about NET_UNREACH,
358 			   I believe they are just ether pollution. --ANK
359 			 */
360 			break;
361 		}
362 		break;
363 	case ICMP_TIME_EXCEEDED:
364 		if (code != ICMP_EXC_TTL)
365 			return 0;
366 		break;
367 	case ICMP_REDIRECT:
368 		break;
369 	}
370 
371 	err = -ENOENT;
372 	t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
373 	if (t == NULL)
374 		goto out;
375 
376 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
377 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
378 				 t->dev->ifindex, 0, IPPROTO_IPIP, 0);
379 		err = 0;
380 		goto out;
381 	}
382 
383 	if (type == ICMP_REDIRECT) {
384 		ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
385 			      IPPROTO_IPIP, 0);
386 		err = 0;
387 		goto out;
388 	}
389 
390 	if (t->parms.iph.daddr == 0)
391 		goto out;
392 
393 	err = 0;
394 	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
395 		goto out;
396 
397 	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
398 		t->err_count++;
399 	else
400 		t->err_count = 1;
401 	t->err_time = jiffies;
402 out:
403 
404 	return err;
405 }
406 
407 static int ipip_rcv(struct sk_buff *skb)
408 {
409 	struct ip_tunnel *tunnel;
410 	const struct iphdr *iph = ip_hdr(skb);
411 	int err;
412 
413 	tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
414 	if (tunnel != NULL) {
415 		struct pcpu_tstats *tstats;
416 
417 		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
418 			goto drop;
419 
420 		secpath_reset(skb);
421 
422 		skb->mac_header = skb->network_header;
423 		skb_reset_network_header(skb);
424 		skb->protocol = htons(ETH_P_IP);
425 		skb->pkt_type = PACKET_HOST;
426 
427 		__skb_tunnel_rx(skb, tunnel->dev);
428 
429 		err = IP_ECN_decapsulate(iph, skb);
430 		if (unlikely(err)) {
431 			if (log_ecn_error)
432 				net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
433 						     &iph->saddr, iph->tos);
434 			if (err > 1) {
435 				++tunnel->dev->stats.rx_frame_errors;
436 				++tunnel->dev->stats.rx_errors;
437 				goto drop;
438 			}
439 		}
440 
441 		tstats = this_cpu_ptr(tunnel->dev->tstats);
442 		u64_stats_update_begin(&tstats->syncp);
443 		tstats->rx_packets++;
444 		tstats->rx_bytes += skb->len;
445 		u64_stats_update_end(&tstats->syncp);
446 
447 		netif_rx(skb);
448 		return 0;
449 	}
450 
451 	return -1;
452 
453 drop:
454 	kfree_skb(skb);
455 	return 0;
456 }
457 
458 /*
459  *	This function assumes it is being called from dev_queue_xmit()
460  *	and that skb is filled properly by that function.
461  */
462 
463 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
464 {
465 	struct ip_tunnel *tunnel = netdev_priv(dev);
466 	struct pcpu_tstats *tstats;
467 	const struct iphdr  *tiph = &tunnel->parms.iph;
468 	u8     tos = tunnel->parms.iph.tos;
469 	__be16 df = tiph->frag_off;
470 	struct rtable *rt;     			/* Route to the other host */
471 	struct net_device *tdev;		/* Device to other host */
472 	const struct iphdr  *old_iph = ip_hdr(skb);
473 	struct iphdr  *iph;			/* Our new IP header */
474 	unsigned int max_headroom;		/* The extra header space needed */
475 	__be32 dst = tiph->daddr;
476 	struct flowi4 fl4;
477 	int    mtu;
478 
479 	if (skb->protocol != htons(ETH_P_IP))
480 		goto tx_error;
481 
482 	if (tos & 1)
483 		tos = old_iph->tos;
484 
485 	if (!dst) {
486 		/* NBMA tunnel */
487 		if ((rt = skb_rtable(skb)) == NULL) {
488 			dev->stats.tx_fifo_errors++;
489 			goto tx_error;
490 		}
491 		dst = rt_nexthop(rt, old_iph->daddr);
492 	}
493 
494 	rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
495 				   dst, tiph->saddr,
496 				   0, 0,
497 				   IPPROTO_IPIP, RT_TOS(tos),
498 				   tunnel->parms.link);
499 	if (IS_ERR(rt)) {
500 		dev->stats.tx_carrier_errors++;
501 		goto tx_error_icmp;
502 	}
503 	tdev = rt->dst.dev;
504 
505 	if (tdev == dev) {
506 		ip_rt_put(rt);
507 		dev->stats.collisions++;
508 		goto tx_error;
509 	}
510 
511 	df |= old_iph->frag_off & htons(IP_DF);
512 
513 	if (df) {
514 		mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
515 
516 		if (mtu < 68) {
517 			dev->stats.collisions++;
518 			ip_rt_put(rt);
519 			goto tx_error;
520 		}
521 
522 		if (skb_dst(skb))
523 			skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
524 
525 		if ((old_iph->frag_off & htons(IP_DF)) &&
526 		    mtu < ntohs(old_iph->tot_len)) {
527 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
528 				  htonl(mtu));
529 			ip_rt_put(rt);
530 			goto tx_error;
531 		}
532 	}
533 
534 	if (tunnel->err_count > 0) {
535 		if (time_before(jiffies,
536 				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
537 			tunnel->err_count--;
538 			dst_link_failure(skb);
539 		} else
540 			tunnel->err_count = 0;
541 	}
542 
543 	/*
544 	 * Okay, now see if we can stuff it in the buffer as-is.
545 	 */
546 	max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
547 
548 	if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
549 	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
550 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
551 		if (!new_skb) {
552 			ip_rt_put(rt);
553 			dev->stats.tx_dropped++;
554 			dev_kfree_skb(skb);
555 			return NETDEV_TX_OK;
556 		}
557 		if (skb->sk)
558 			skb_set_owner_w(new_skb, skb->sk);
559 		dev_kfree_skb(skb);
560 		skb = new_skb;
561 		old_iph = ip_hdr(skb);
562 	}
563 
564 	skb->transport_header = skb->network_header;
565 	skb_push(skb, sizeof(struct iphdr));
566 	skb_reset_network_header(skb);
567 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
568 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
569 			      IPSKB_REROUTED);
570 	skb_dst_drop(skb);
571 	skb_dst_set(skb, &rt->dst);
572 
573 	/*
574 	 *	Push down and install the IPIP header.
575 	 */
576 
577 	iph 			=	ip_hdr(skb);
578 	iph->version		=	4;
579 	iph->ihl		=	sizeof(struct iphdr)>>2;
580 	iph->frag_off		=	df;
581 	iph->protocol		=	IPPROTO_IPIP;
582 	iph->tos		=	INET_ECN_encapsulate(tos, old_iph->tos);
583 	iph->daddr		=	fl4.daddr;
584 	iph->saddr		=	fl4.saddr;
585 
586 	if ((iph->ttl = tiph->ttl) == 0)
587 		iph->ttl	=	old_iph->ttl;
588 
589 	nf_reset(skb);
590 	tstats = this_cpu_ptr(dev->tstats);
591 	__IPTUNNEL_XMIT(tstats, &dev->stats);
592 	return NETDEV_TX_OK;
593 
594 tx_error_icmp:
595 	dst_link_failure(skb);
596 tx_error:
597 	dev->stats.tx_errors++;
598 	dev_kfree_skb(skb);
599 	return NETDEV_TX_OK;
600 }
601 
602 static void ipip_tunnel_bind_dev(struct net_device *dev)
603 {
604 	struct net_device *tdev = NULL;
605 	struct ip_tunnel *tunnel;
606 	const struct iphdr *iph;
607 
608 	tunnel = netdev_priv(dev);
609 	iph = &tunnel->parms.iph;
610 
611 	if (iph->daddr) {
612 		struct rtable *rt;
613 		struct flowi4 fl4;
614 
615 		rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
616 					   iph->daddr, iph->saddr,
617 					   0, 0,
618 					   IPPROTO_IPIP,
619 					   RT_TOS(iph->tos),
620 					   tunnel->parms.link);
621 		if (!IS_ERR(rt)) {
622 			tdev = rt->dst.dev;
623 			ip_rt_put(rt);
624 		}
625 		dev->flags |= IFF_POINTOPOINT;
626 	}
627 
628 	if (!tdev && tunnel->parms.link)
629 		tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
630 
631 	if (tdev) {
632 		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
633 		dev->mtu = tdev->mtu - sizeof(struct iphdr);
634 	}
635 	dev->iflink = tunnel->parms.link;
636 }
637 
638 static int
639 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
640 {
641 	int err = 0;
642 	struct ip_tunnel_parm p;
643 	struct ip_tunnel *t;
644 	struct net *net = dev_net(dev);
645 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
646 
647 	switch (cmd) {
648 	case SIOCGETTUNNEL:
649 		t = NULL;
650 		if (dev == ipn->fb_tunnel_dev) {
651 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
652 				err = -EFAULT;
653 				break;
654 			}
655 			t = ipip_tunnel_locate(net, &p, 0);
656 		}
657 		if (t == NULL)
658 			t = netdev_priv(dev);
659 		memcpy(&p, &t->parms, sizeof(p));
660 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
661 			err = -EFAULT;
662 		break;
663 
664 	case SIOCADDTUNNEL:
665 	case SIOCCHGTUNNEL:
666 		err = -EPERM;
667 		if (!capable(CAP_NET_ADMIN))
668 			goto done;
669 
670 		err = -EFAULT;
671 		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
672 			goto done;
673 
674 		err = -EINVAL;
675 		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
676 		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
677 			goto done;
678 		if (p.iph.ttl)
679 			p.iph.frag_off |= htons(IP_DF);
680 
681 		t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
682 
683 		if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
684 			if (t != NULL) {
685 				if (t->dev != dev) {
686 					err = -EEXIST;
687 					break;
688 				}
689 			} else {
690 				if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
691 				    (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
692 					err = -EINVAL;
693 					break;
694 				}
695 				t = netdev_priv(dev);
696 				ipip_tunnel_unlink(ipn, t);
697 				synchronize_net();
698 				t->parms.iph.saddr = p.iph.saddr;
699 				t->parms.iph.daddr = p.iph.daddr;
700 				memcpy(dev->dev_addr, &p.iph.saddr, 4);
701 				memcpy(dev->broadcast, &p.iph.daddr, 4);
702 				ipip_tunnel_link(ipn, t);
703 				netdev_state_change(dev);
704 			}
705 		}
706 
707 		if (t) {
708 			err = 0;
709 			if (cmd == SIOCCHGTUNNEL) {
710 				t->parms.iph.ttl = p.iph.ttl;
711 				t->parms.iph.tos = p.iph.tos;
712 				t->parms.iph.frag_off = p.iph.frag_off;
713 				if (t->parms.link != p.link) {
714 					t->parms.link = p.link;
715 					ipip_tunnel_bind_dev(dev);
716 					netdev_state_change(dev);
717 				}
718 			}
719 			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
720 				err = -EFAULT;
721 		} else
722 			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
723 		break;
724 
725 	case SIOCDELTUNNEL:
726 		err = -EPERM;
727 		if (!capable(CAP_NET_ADMIN))
728 			goto done;
729 
730 		if (dev == ipn->fb_tunnel_dev) {
731 			err = -EFAULT;
732 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
733 				goto done;
734 			err = -ENOENT;
735 			if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
736 				goto done;
737 			err = -EPERM;
738 			if (t->dev == ipn->fb_tunnel_dev)
739 				goto done;
740 			dev = t->dev;
741 		}
742 		unregister_netdevice(dev);
743 		err = 0;
744 		break;
745 
746 	default:
747 		err = -EINVAL;
748 	}
749 
750 done:
751 	return err;
752 }
753 
754 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
755 {
756 	if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
757 		return -EINVAL;
758 	dev->mtu = new_mtu;
759 	return 0;
760 }
761 
762 static const struct net_device_ops ipip_netdev_ops = {
763 	.ndo_uninit	= ipip_tunnel_uninit,
764 	.ndo_start_xmit	= ipip_tunnel_xmit,
765 	.ndo_do_ioctl	= ipip_tunnel_ioctl,
766 	.ndo_change_mtu	= ipip_tunnel_change_mtu,
767 	.ndo_get_stats64 = ipip_get_stats64,
768 };
769 
770 static void ipip_dev_free(struct net_device *dev)
771 {
772 	free_percpu(dev->tstats);
773 	free_netdev(dev);
774 }
775 
776 static void ipip_tunnel_setup(struct net_device *dev)
777 {
778 	dev->netdev_ops		= &ipip_netdev_ops;
779 	dev->destructor		= ipip_dev_free;
780 
781 	dev->type		= ARPHRD_TUNNEL;
782 	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr);
783 	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr);
784 	dev->flags		= IFF_NOARP;
785 	dev->iflink		= 0;
786 	dev->addr_len		= 4;
787 	dev->features		|= NETIF_F_NETNS_LOCAL;
788 	dev->features		|= NETIF_F_LLTX;
789 	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
790 }
791 
792 static int ipip_tunnel_init(struct net_device *dev)
793 {
794 	struct ip_tunnel *tunnel = netdev_priv(dev);
795 
796 	tunnel->dev = dev;
797 
798 	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
799 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
800 
801 	ipip_tunnel_bind_dev(dev);
802 
803 	dev->tstats = alloc_percpu(struct pcpu_tstats);
804 	if (!dev->tstats)
805 		return -ENOMEM;
806 
807 	return 0;
808 }
809 
810 static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
811 {
812 	struct ip_tunnel *tunnel = netdev_priv(dev);
813 	struct iphdr *iph = &tunnel->parms.iph;
814 	struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
815 
816 	tunnel->dev = dev;
817 	strcpy(tunnel->parms.name, dev->name);
818 
819 	iph->version		= 4;
820 	iph->protocol		= IPPROTO_IPIP;
821 	iph->ihl		= 5;
822 
823 	dev->tstats = alloc_percpu(struct pcpu_tstats);
824 	if (!dev->tstats)
825 		return -ENOMEM;
826 
827 	dev_hold(dev);
828 	rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
829 	return 0;
830 }
831 
832 static struct xfrm_tunnel ipip_handler __read_mostly = {
833 	.handler	=	ipip_rcv,
834 	.err_handler	=	ipip_err,
835 	.priority	=	1,
836 };
837 
838 static const char banner[] __initconst =
839 	KERN_INFO "IPv4 over IPv4 tunneling driver\n";
840 
841 static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
842 {
843 	int prio;
844 
845 	for (prio = 1; prio < 4; prio++) {
846 		int h;
847 		for (h = 0; h < HASH_SIZE; h++) {
848 			struct ip_tunnel *t;
849 
850 			t = rtnl_dereference(ipn->tunnels[prio][h]);
851 			while (t != NULL) {
852 				unregister_netdevice_queue(t->dev, head);
853 				t = rtnl_dereference(t->next);
854 			}
855 		}
856 	}
857 }
858 
859 static int __net_init ipip_init_net(struct net *net)
860 {
861 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
862 	struct ip_tunnel *t;
863 	int err;
864 
865 	ipn->tunnels[0] = ipn->tunnels_wc;
866 	ipn->tunnels[1] = ipn->tunnels_l;
867 	ipn->tunnels[2] = ipn->tunnels_r;
868 	ipn->tunnels[3] = ipn->tunnels_r_l;
869 
870 	ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
871 					   "tunl0",
872 					   ipip_tunnel_setup);
873 	if (!ipn->fb_tunnel_dev) {
874 		err = -ENOMEM;
875 		goto err_alloc_dev;
876 	}
877 	dev_net_set(ipn->fb_tunnel_dev, net);
878 
879 	err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
880 	if (err)
881 		goto err_reg_dev;
882 
883 	if ((err = register_netdev(ipn->fb_tunnel_dev)))
884 		goto err_reg_dev;
885 
886 	t = netdev_priv(ipn->fb_tunnel_dev);
887 
888 	strcpy(t->parms.name, ipn->fb_tunnel_dev->name);
889 	return 0;
890 
891 err_reg_dev:
892 	ipip_dev_free(ipn->fb_tunnel_dev);
893 err_alloc_dev:
894 	/* nothing */
895 	return err;
896 }
897 
898 static void __net_exit ipip_exit_net(struct net *net)
899 {
900 	struct ipip_net *ipn = net_generic(net, ipip_net_id);
901 	LIST_HEAD(list);
902 
903 	rtnl_lock();
904 	ipip_destroy_tunnels(ipn, &list);
905 	unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
906 	unregister_netdevice_many(&list);
907 	rtnl_unlock();
908 }
909 
910 static struct pernet_operations ipip_net_ops = {
911 	.init = ipip_init_net,
912 	.exit = ipip_exit_net,
913 	.id   = &ipip_net_id,
914 	.size = sizeof(struct ipip_net),
915 };
916 
917 static int __init ipip_init(void)
918 {
919 	int err;
920 
921 	printk(banner);
922 
923 	err = register_pernet_device(&ipip_net_ops);
924 	if (err < 0)
925 		return err;
926 	err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
927 	if (err < 0) {
928 		unregister_pernet_device(&ipip_net_ops);
929 		pr_info("%s: can't register tunnel\n", __func__);
930 	}
931 	return err;
932 }
933 
934 static void __exit ipip_fini(void)
935 {
936 	if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
937 		pr_info("%s: can't deregister tunnel\n", __func__);
938 
939 	unregister_pernet_device(&ipip_net_ops);
940 }
941 
942 module_init(ipip_init);
943 module_exit(ipip_fini);
944 MODULE_LICENSE("GPL");
945 MODULE_ALIAS_NETDEV("tunl0");
946