xref: /linux/net/ipv4/devinet.c (revision d39d0ed196aa1685bb24771e92f78633c66ac9cb)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 
59 #include <net/arp.h>
60 #include <net/ip.h>
61 #include <net/route.h>
62 #include <net/ip_fib.h>
63 #include <net/rtnetlink.h>
64 #include <net/net_namespace.h>
65 
66 static struct ipv4_devconf ipv4_devconf = {
67 	.data = {
68 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
69 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
70 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
71 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
72 	},
73 };
74 
75 static struct ipv4_devconf ipv4_devconf_dflt = {
76 	.data = {
77 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
78 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
79 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
80 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
81 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
82 	},
83 };
84 
85 #define IPV4_DEVCONF_DFLT(net, attr) \
86 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
87 
88 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
89 	[IFA_LOCAL]     	= { .type = NLA_U32 },
90 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
91 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
92 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
93 };
94 
95 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
96 
97 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
98 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
99 			 int destroy);
100 #ifdef CONFIG_SYSCTL
101 static void devinet_sysctl_register(struct in_device *idev);
102 static void devinet_sysctl_unregister(struct in_device *idev);
103 #else
104 static inline void devinet_sysctl_register(struct in_device *idev)
105 {
106 }
107 static inline void devinet_sysctl_unregister(struct in_device *idev)
108 {
109 }
110 #endif
111 
112 /* Locks all the inet devices. */
113 
114 static struct in_ifaddr *inet_alloc_ifa(void)
115 {
116 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
117 }
118 
119 static void inet_rcu_free_ifa(struct rcu_head *head)
120 {
121 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
122 	if (ifa->ifa_dev)
123 		in_dev_put(ifa->ifa_dev);
124 	kfree(ifa);
125 }
126 
127 static inline void inet_free_ifa(struct in_ifaddr *ifa)
128 {
129 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
130 }
131 
132 void in_dev_finish_destroy(struct in_device *idev)
133 {
134 	struct net_device *dev = idev->dev;
135 
136 	WARN_ON(idev->ifa_list);
137 	WARN_ON(idev->mc_list);
138 #ifdef NET_REFCNT_DEBUG
139 	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
140 	       idev, dev ? dev->name : "NIL");
141 #endif
142 	dev_put(dev);
143 	if (!idev->dead)
144 		pr_err("Freeing alive in_device %p\n", idev);
145 	else
146 		kfree(idev);
147 }
148 EXPORT_SYMBOL(in_dev_finish_destroy);
149 
150 static struct in_device *inetdev_init(struct net_device *dev)
151 {
152 	struct in_device *in_dev;
153 
154 	ASSERT_RTNL();
155 
156 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
157 	if (!in_dev)
158 		goto out;
159 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
160 			sizeof(in_dev->cnf));
161 	in_dev->cnf.sysctl = NULL;
162 	in_dev->dev = dev;
163 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
164 	if (!in_dev->arp_parms)
165 		goto out_kfree;
166 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
167 		dev_disable_lro(dev);
168 	/* Reference in_dev->dev */
169 	dev_hold(dev);
170 	/* Account for reference dev->ip_ptr (below) */
171 	in_dev_hold(in_dev);
172 
173 	devinet_sysctl_register(in_dev);
174 	ip_mc_init_dev(in_dev);
175 	if (dev->flags & IFF_UP)
176 		ip_mc_up(in_dev);
177 
178 	/* we can receive as soon as ip_ptr is set -- do this last */
179 	rcu_assign_pointer(dev->ip_ptr, in_dev);
180 out:
181 	return in_dev;
182 out_kfree:
183 	kfree(in_dev);
184 	in_dev = NULL;
185 	goto out;
186 }
187 
188 static void in_dev_rcu_put(struct rcu_head *head)
189 {
190 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
191 	in_dev_put(idev);
192 }
193 
194 static void inetdev_destroy(struct in_device *in_dev)
195 {
196 	struct in_ifaddr *ifa;
197 	struct net_device *dev;
198 
199 	ASSERT_RTNL();
200 
201 	dev = in_dev->dev;
202 
203 	in_dev->dead = 1;
204 
205 	ip_mc_destroy_dev(in_dev);
206 
207 	while ((ifa = in_dev->ifa_list) != NULL) {
208 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
209 		inet_free_ifa(ifa);
210 	}
211 
212 	dev->ip_ptr = NULL;
213 
214 	devinet_sysctl_unregister(in_dev);
215 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
216 	arp_ifdown(dev);
217 
218 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
219 }
220 
221 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
222 {
223 	rcu_read_lock();
224 	for_primary_ifa(in_dev) {
225 		if (inet_ifa_match(a, ifa)) {
226 			if (!b || inet_ifa_match(b, ifa)) {
227 				rcu_read_unlock();
228 				return 1;
229 			}
230 		}
231 	} endfor_ifa(in_dev);
232 	rcu_read_unlock();
233 	return 0;
234 }
235 
236 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
237 			 int destroy, struct nlmsghdr *nlh, u32 pid)
238 {
239 	struct in_ifaddr *promote = NULL;
240 	struct in_ifaddr *ifa, *ifa1 = *ifap;
241 	struct in_ifaddr *last_prim = in_dev->ifa_list;
242 	struct in_ifaddr *prev_prom = NULL;
243 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
244 
245 	ASSERT_RTNL();
246 
247 	/* 1. Deleting primary ifaddr forces deletion all secondaries
248 	 * unless alias promotion is set
249 	 **/
250 
251 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
252 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
253 
254 		while ((ifa = *ifap1) != NULL) {
255 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
256 			    ifa1->ifa_scope <= ifa->ifa_scope)
257 				last_prim = ifa;
258 
259 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
260 			    ifa1->ifa_mask != ifa->ifa_mask ||
261 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
262 				ifap1 = &ifa->ifa_next;
263 				prev_prom = ifa;
264 				continue;
265 			}
266 
267 			if (!do_promote) {
268 				*ifap1 = ifa->ifa_next;
269 
270 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
271 				blocking_notifier_call_chain(&inetaddr_chain,
272 						NETDEV_DOWN, ifa);
273 				inet_free_ifa(ifa);
274 			} else {
275 				promote = ifa;
276 				break;
277 			}
278 		}
279 	}
280 
281 	/* 2. Unlink it */
282 
283 	*ifap = ifa1->ifa_next;
284 
285 	/* 3. Announce address deletion */
286 
287 	/* Send message first, then call notifier.
288 	   At first sight, FIB update triggered by notifier
289 	   will refer to already deleted ifaddr, that could confuse
290 	   netlink listeners. It is not true: look, gated sees
291 	   that route deleted and if it still thinks that ifaddr
292 	   is valid, it will try to restore deleted routes... Grr.
293 	   So that, this order is correct.
294 	 */
295 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
296 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
297 
298 	if (promote) {
299 
300 		if (prev_prom) {
301 			prev_prom->ifa_next = promote->ifa_next;
302 			promote->ifa_next = last_prim->ifa_next;
303 			last_prim->ifa_next = promote;
304 		}
305 
306 		promote->ifa_flags &= ~IFA_F_SECONDARY;
307 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
308 		blocking_notifier_call_chain(&inetaddr_chain,
309 				NETDEV_UP, promote);
310 		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
311 			if (ifa1->ifa_mask != ifa->ifa_mask ||
312 			    !inet_ifa_match(ifa1->ifa_address, ifa))
313 					continue;
314 			fib_add_ifaddr(ifa);
315 		}
316 
317 	}
318 	if (destroy)
319 		inet_free_ifa(ifa1);
320 }
321 
322 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
323 			 int destroy)
324 {
325 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
326 }
327 
328 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
329 			     u32 pid)
330 {
331 	struct in_device *in_dev = ifa->ifa_dev;
332 	struct in_ifaddr *ifa1, **ifap, **last_primary;
333 
334 	ASSERT_RTNL();
335 
336 	if (!ifa->ifa_local) {
337 		inet_free_ifa(ifa);
338 		return 0;
339 	}
340 
341 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
342 	last_primary = &in_dev->ifa_list;
343 
344 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
345 	     ifap = &ifa1->ifa_next) {
346 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
347 		    ifa->ifa_scope <= ifa1->ifa_scope)
348 			last_primary = &ifa1->ifa_next;
349 		if (ifa1->ifa_mask == ifa->ifa_mask &&
350 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
351 			if (ifa1->ifa_local == ifa->ifa_local) {
352 				inet_free_ifa(ifa);
353 				return -EEXIST;
354 			}
355 			if (ifa1->ifa_scope != ifa->ifa_scope) {
356 				inet_free_ifa(ifa);
357 				return -EINVAL;
358 			}
359 			ifa->ifa_flags |= IFA_F_SECONDARY;
360 		}
361 	}
362 
363 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
364 		net_srandom(ifa->ifa_local);
365 		ifap = last_primary;
366 	}
367 
368 	ifa->ifa_next = *ifap;
369 	*ifap = ifa;
370 
371 	/* Send message first, then call notifier.
372 	   Notifier will trigger FIB update, so that
373 	   listeners of netlink will know about new ifaddr */
374 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
375 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
376 
377 	return 0;
378 }
379 
380 static int inet_insert_ifa(struct in_ifaddr *ifa)
381 {
382 	return __inet_insert_ifa(ifa, NULL, 0);
383 }
384 
385 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
386 {
387 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
388 
389 	ASSERT_RTNL();
390 
391 	if (!in_dev) {
392 		inet_free_ifa(ifa);
393 		return -ENOBUFS;
394 	}
395 	ipv4_devconf_setall(in_dev);
396 	if (ifa->ifa_dev != in_dev) {
397 		WARN_ON(ifa->ifa_dev);
398 		in_dev_hold(in_dev);
399 		ifa->ifa_dev = in_dev;
400 	}
401 	if (ipv4_is_loopback(ifa->ifa_local))
402 		ifa->ifa_scope = RT_SCOPE_HOST;
403 	return inet_insert_ifa(ifa);
404 }
405 
406 struct in_device *inetdev_by_index(struct net *net, int ifindex)
407 {
408 	struct net_device *dev;
409 	struct in_device *in_dev = NULL;
410 
411 	rcu_read_lock();
412 	dev = dev_get_by_index_rcu(net, ifindex);
413 	if (dev)
414 		in_dev = in_dev_get(dev);
415 	rcu_read_unlock();
416 	return in_dev;
417 }
418 EXPORT_SYMBOL(inetdev_by_index);
419 
420 /* Called only from RTNL semaphored context. No locks. */
421 
422 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
423 				    __be32 mask)
424 {
425 	ASSERT_RTNL();
426 
427 	for_primary_ifa(in_dev) {
428 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
429 			return ifa;
430 	} endfor_ifa(in_dev);
431 	return NULL;
432 }
433 
434 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
435 {
436 	struct net *net = sock_net(skb->sk);
437 	struct nlattr *tb[IFA_MAX+1];
438 	struct in_device *in_dev;
439 	struct ifaddrmsg *ifm;
440 	struct in_ifaddr *ifa, **ifap;
441 	int err = -EINVAL;
442 
443 	ASSERT_RTNL();
444 
445 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
446 	if (err < 0)
447 		goto errout;
448 
449 	ifm = nlmsg_data(nlh);
450 	in_dev = inetdev_by_index(net, ifm->ifa_index);
451 	if (in_dev == NULL) {
452 		err = -ENODEV;
453 		goto errout;
454 	}
455 
456 	__in_dev_put(in_dev);
457 
458 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
459 	     ifap = &ifa->ifa_next) {
460 		if (tb[IFA_LOCAL] &&
461 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
462 			continue;
463 
464 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
465 			continue;
466 
467 		if (tb[IFA_ADDRESS] &&
468 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
469 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
470 			continue;
471 
472 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
473 		return 0;
474 	}
475 
476 	err = -EADDRNOTAVAIL;
477 errout:
478 	return err;
479 }
480 
481 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
482 {
483 	struct nlattr *tb[IFA_MAX+1];
484 	struct in_ifaddr *ifa;
485 	struct ifaddrmsg *ifm;
486 	struct net_device *dev;
487 	struct in_device *in_dev;
488 	int err;
489 
490 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
491 	if (err < 0)
492 		goto errout;
493 
494 	ifm = nlmsg_data(nlh);
495 	err = -EINVAL;
496 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
497 		goto errout;
498 
499 	dev = __dev_get_by_index(net, ifm->ifa_index);
500 	err = -ENODEV;
501 	if (dev == NULL)
502 		goto errout;
503 
504 	in_dev = __in_dev_get_rtnl(dev);
505 	err = -ENOBUFS;
506 	if (in_dev == NULL)
507 		goto errout;
508 
509 	ifa = inet_alloc_ifa();
510 	if (ifa == NULL)
511 		/*
512 		 * A potential indev allocation can be left alive, it stays
513 		 * assigned to its device and is destroy with it.
514 		 */
515 		goto errout;
516 
517 	ipv4_devconf_setall(in_dev);
518 	in_dev_hold(in_dev);
519 
520 	if (tb[IFA_ADDRESS] == NULL)
521 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
522 
523 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
524 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
525 	ifa->ifa_flags = ifm->ifa_flags;
526 	ifa->ifa_scope = ifm->ifa_scope;
527 	ifa->ifa_dev = in_dev;
528 
529 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
530 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
531 
532 	if (tb[IFA_BROADCAST])
533 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
534 
535 	if (tb[IFA_LABEL])
536 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
537 	else
538 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
539 
540 	return ifa;
541 
542 errout:
543 	return ERR_PTR(err);
544 }
545 
546 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
547 {
548 	struct net *net = sock_net(skb->sk);
549 	struct in_ifaddr *ifa;
550 
551 	ASSERT_RTNL();
552 
553 	ifa = rtm_to_ifaddr(net, nlh);
554 	if (IS_ERR(ifa))
555 		return PTR_ERR(ifa);
556 
557 	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
558 }
559 
560 /*
561  *	Determine a default network mask, based on the IP address.
562  */
563 
564 static inline int inet_abc_len(__be32 addr)
565 {
566 	int rc = -1;	/* Something else, probably a multicast. */
567 
568 	if (ipv4_is_zeronet(addr))
569 		rc = 0;
570 	else {
571 		__u32 haddr = ntohl(addr);
572 
573 		if (IN_CLASSA(haddr))
574 			rc = 8;
575 		else if (IN_CLASSB(haddr))
576 			rc = 16;
577 		else if (IN_CLASSC(haddr))
578 			rc = 24;
579 	}
580 
581 	return rc;
582 }
583 
584 
585 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
586 {
587 	struct ifreq ifr;
588 	struct sockaddr_in sin_orig;
589 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
590 	struct in_device *in_dev;
591 	struct in_ifaddr **ifap = NULL;
592 	struct in_ifaddr *ifa = NULL;
593 	struct net_device *dev;
594 	char *colon;
595 	int ret = -EFAULT;
596 	int tryaddrmatch = 0;
597 
598 	/*
599 	 *	Fetch the caller's info block into kernel space
600 	 */
601 
602 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
603 		goto out;
604 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
605 
606 	/* save original address for comparison */
607 	memcpy(&sin_orig, sin, sizeof(*sin));
608 
609 	colon = strchr(ifr.ifr_name, ':');
610 	if (colon)
611 		*colon = 0;
612 
613 	dev_load(net, ifr.ifr_name);
614 
615 	switch (cmd) {
616 	case SIOCGIFADDR:	/* Get interface address */
617 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
618 	case SIOCGIFDSTADDR:	/* Get the destination address */
619 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
620 		/* Note that these ioctls will not sleep,
621 		   so that we do not impose a lock.
622 		   One day we will be forced to put shlock here (I mean SMP)
623 		 */
624 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
625 		memset(sin, 0, sizeof(*sin));
626 		sin->sin_family = AF_INET;
627 		break;
628 
629 	case SIOCSIFFLAGS:
630 		ret = -EACCES;
631 		if (!capable(CAP_NET_ADMIN))
632 			goto out;
633 		break;
634 	case SIOCSIFADDR:	/* Set interface address (and family) */
635 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
636 	case SIOCSIFDSTADDR:	/* Set the destination address */
637 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
638 		ret = -EACCES;
639 		if (!capable(CAP_NET_ADMIN))
640 			goto out;
641 		ret = -EINVAL;
642 		if (sin->sin_family != AF_INET)
643 			goto out;
644 		break;
645 	default:
646 		ret = -EINVAL;
647 		goto out;
648 	}
649 
650 	rtnl_lock();
651 
652 	ret = -ENODEV;
653 	dev = __dev_get_by_name(net, ifr.ifr_name);
654 	if (!dev)
655 		goto done;
656 
657 	if (colon)
658 		*colon = ':';
659 
660 	in_dev = __in_dev_get_rtnl(dev);
661 	if (in_dev) {
662 		if (tryaddrmatch) {
663 			/* Matthias Andree */
664 			/* compare label and address (4.4BSD style) */
665 			/* note: we only do this for a limited set of ioctls
666 			   and only if the original address family was AF_INET.
667 			   This is checked above. */
668 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
669 			     ifap = &ifa->ifa_next) {
670 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
671 				    sin_orig.sin_addr.s_addr ==
672 							ifa->ifa_address) {
673 					break; /* found */
674 				}
675 			}
676 		}
677 		/* we didn't get a match, maybe the application is
678 		   4.3BSD-style and passed in junk so we fall back to
679 		   comparing just the label */
680 		if (!ifa) {
681 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
682 			     ifap = &ifa->ifa_next)
683 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
684 					break;
685 		}
686 	}
687 
688 	ret = -EADDRNOTAVAIL;
689 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
690 		goto done;
691 
692 	switch (cmd) {
693 	case SIOCGIFADDR:	/* Get interface address */
694 		sin->sin_addr.s_addr = ifa->ifa_local;
695 		goto rarok;
696 
697 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
698 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
699 		goto rarok;
700 
701 	case SIOCGIFDSTADDR:	/* Get the destination address */
702 		sin->sin_addr.s_addr = ifa->ifa_address;
703 		goto rarok;
704 
705 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
706 		sin->sin_addr.s_addr = ifa->ifa_mask;
707 		goto rarok;
708 
709 	case SIOCSIFFLAGS:
710 		if (colon) {
711 			ret = -EADDRNOTAVAIL;
712 			if (!ifa)
713 				break;
714 			ret = 0;
715 			if (!(ifr.ifr_flags & IFF_UP))
716 				inet_del_ifa(in_dev, ifap, 1);
717 			break;
718 		}
719 		ret = dev_change_flags(dev, ifr.ifr_flags);
720 		break;
721 
722 	case SIOCSIFADDR:	/* Set interface address (and family) */
723 		ret = -EINVAL;
724 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
725 			break;
726 
727 		if (!ifa) {
728 			ret = -ENOBUFS;
729 			ifa = inet_alloc_ifa();
730 			if (!ifa)
731 				break;
732 			if (colon)
733 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
734 			else
735 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
736 		} else {
737 			ret = 0;
738 			if (ifa->ifa_local == sin->sin_addr.s_addr)
739 				break;
740 			inet_del_ifa(in_dev, ifap, 0);
741 			ifa->ifa_broadcast = 0;
742 			ifa->ifa_scope = 0;
743 		}
744 
745 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
746 
747 		if (!(dev->flags & IFF_POINTOPOINT)) {
748 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
749 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
750 			if ((dev->flags & IFF_BROADCAST) &&
751 			    ifa->ifa_prefixlen < 31)
752 				ifa->ifa_broadcast = ifa->ifa_address |
753 						     ~ifa->ifa_mask;
754 		} else {
755 			ifa->ifa_prefixlen = 32;
756 			ifa->ifa_mask = inet_make_mask(32);
757 		}
758 		ret = inet_set_ifa(dev, ifa);
759 		break;
760 
761 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
762 		ret = 0;
763 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
764 			inet_del_ifa(in_dev, ifap, 0);
765 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
766 			inet_insert_ifa(ifa);
767 		}
768 		break;
769 
770 	case SIOCSIFDSTADDR:	/* Set the destination address */
771 		ret = 0;
772 		if (ifa->ifa_address == sin->sin_addr.s_addr)
773 			break;
774 		ret = -EINVAL;
775 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
776 			break;
777 		ret = 0;
778 		inet_del_ifa(in_dev, ifap, 0);
779 		ifa->ifa_address = sin->sin_addr.s_addr;
780 		inet_insert_ifa(ifa);
781 		break;
782 
783 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
784 
785 		/*
786 		 *	The mask we set must be legal.
787 		 */
788 		ret = -EINVAL;
789 		if (bad_mask(sin->sin_addr.s_addr, 0))
790 			break;
791 		ret = 0;
792 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
793 			__be32 old_mask = ifa->ifa_mask;
794 			inet_del_ifa(in_dev, ifap, 0);
795 			ifa->ifa_mask = sin->sin_addr.s_addr;
796 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
797 
798 			/* See if current broadcast address matches
799 			 * with current netmask, then recalculate
800 			 * the broadcast address. Otherwise it's a
801 			 * funny address, so don't touch it since
802 			 * the user seems to know what (s)he's doing...
803 			 */
804 			if ((dev->flags & IFF_BROADCAST) &&
805 			    (ifa->ifa_prefixlen < 31) &&
806 			    (ifa->ifa_broadcast ==
807 			     (ifa->ifa_local|~old_mask))) {
808 				ifa->ifa_broadcast = (ifa->ifa_local |
809 						      ~sin->sin_addr.s_addr);
810 			}
811 			inet_insert_ifa(ifa);
812 		}
813 		break;
814 	}
815 done:
816 	rtnl_unlock();
817 out:
818 	return ret;
819 rarok:
820 	rtnl_unlock();
821 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
822 	goto out;
823 }
824 
825 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
826 {
827 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
828 	struct in_ifaddr *ifa;
829 	struct ifreq ifr;
830 	int done = 0;
831 
832 	if (!in_dev)
833 		goto out;
834 
835 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
836 		if (!buf) {
837 			done += sizeof(ifr);
838 			continue;
839 		}
840 		if (len < (int) sizeof(ifr))
841 			break;
842 		memset(&ifr, 0, sizeof(struct ifreq));
843 		if (ifa->ifa_label)
844 			strcpy(ifr.ifr_name, ifa->ifa_label);
845 		else
846 			strcpy(ifr.ifr_name, dev->name);
847 
848 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
849 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
850 								ifa->ifa_local;
851 
852 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
853 			done = -EFAULT;
854 			break;
855 		}
856 		buf  += sizeof(struct ifreq);
857 		len  -= sizeof(struct ifreq);
858 		done += sizeof(struct ifreq);
859 	}
860 out:
861 	return done;
862 }
863 
864 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
865 {
866 	__be32 addr = 0;
867 	struct in_device *in_dev;
868 	struct net *net = dev_net(dev);
869 
870 	rcu_read_lock();
871 	in_dev = __in_dev_get_rcu(dev);
872 	if (!in_dev)
873 		goto no_in_dev;
874 
875 	for_primary_ifa(in_dev) {
876 		if (ifa->ifa_scope > scope)
877 			continue;
878 		if (!dst || inet_ifa_match(dst, ifa)) {
879 			addr = ifa->ifa_local;
880 			break;
881 		}
882 		if (!addr)
883 			addr = ifa->ifa_local;
884 	} endfor_ifa(in_dev);
885 
886 	if (addr)
887 		goto out_unlock;
888 no_in_dev:
889 
890 	/* Not loopback addresses on loopback should be preferred
891 	   in this case. It is importnat that lo is the first interface
892 	   in dev_base list.
893 	 */
894 	for_each_netdev_rcu(net, dev) {
895 		in_dev = __in_dev_get_rcu(dev);
896 		if (!in_dev)
897 			continue;
898 
899 		for_primary_ifa(in_dev) {
900 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
901 			    ifa->ifa_scope <= scope) {
902 				addr = ifa->ifa_local;
903 				goto out_unlock;
904 			}
905 		} endfor_ifa(in_dev);
906 	}
907 out_unlock:
908 	rcu_read_unlock();
909 	return addr;
910 }
911 EXPORT_SYMBOL(inet_select_addr);
912 
913 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
914 			      __be32 local, int scope)
915 {
916 	int same = 0;
917 	__be32 addr = 0;
918 
919 	for_ifa(in_dev) {
920 		if (!addr &&
921 		    (local == ifa->ifa_local || !local) &&
922 		    ifa->ifa_scope <= scope) {
923 			addr = ifa->ifa_local;
924 			if (same)
925 				break;
926 		}
927 		if (!same) {
928 			same = (!local || inet_ifa_match(local, ifa)) &&
929 				(!dst || inet_ifa_match(dst, ifa));
930 			if (same && addr) {
931 				if (local || !dst)
932 					break;
933 				/* Is the selected addr into dst subnet? */
934 				if (inet_ifa_match(addr, ifa))
935 					break;
936 				/* No, then can we use new local src? */
937 				if (ifa->ifa_scope <= scope) {
938 					addr = ifa->ifa_local;
939 					break;
940 				}
941 				/* search for large dst subnet for addr */
942 				same = 0;
943 			}
944 		}
945 	} endfor_ifa(in_dev);
946 
947 	return same ? addr : 0;
948 }
949 
950 /*
951  * Confirm that local IP address exists using wildcards:
952  * - in_dev: only on this interface, 0=any interface
953  * - dst: only in the same subnet as dst, 0=any dst
954  * - local: address, 0=autoselect the local address
955  * - scope: maximum allowed scope value for the local address
956  */
957 __be32 inet_confirm_addr(struct in_device *in_dev,
958 			 __be32 dst, __be32 local, int scope)
959 {
960 	__be32 addr = 0;
961 	struct net_device *dev;
962 	struct net *net;
963 
964 	if (scope != RT_SCOPE_LINK)
965 		return confirm_addr_indev(in_dev, dst, local, scope);
966 
967 	net = dev_net(in_dev->dev);
968 	rcu_read_lock();
969 	for_each_netdev_rcu(net, dev) {
970 		in_dev = __in_dev_get_rcu(dev);
971 		if (in_dev) {
972 			addr = confirm_addr_indev(in_dev, dst, local, scope);
973 			if (addr)
974 				break;
975 		}
976 	}
977 	rcu_read_unlock();
978 
979 	return addr;
980 }
981 
982 /*
983  *	Device notifier
984  */
985 
986 int register_inetaddr_notifier(struct notifier_block *nb)
987 {
988 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
989 }
990 EXPORT_SYMBOL(register_inetaddr_notifier);
991 
992 int unregister_inetaddr_notifier(struct notifier_block *nb)
993 {
994 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
995 }
996 EXPORT_SYMBOL(unregister_inetaddr_notifier);
997 
998 /* Rename ifa_labels for a device name change. Make some effort to preserve
999  * existing alias numbering and to create unique labels if possible.
1000 */
1001 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1002 {
1003 	struct in_ifaddr *ifa;
1004 	int named = 0;
1005 
1006 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1007 		char old[IFNAMSIZ], *dot;
1008 
1009 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1010 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1011 		if (named++ == 0)
1012 			goto skip;
1013 		dot = strchr(old, ':');
1014 		if (dot == NULL) {
1015 			sprintf(old, ":%d", named);
1016 			dot = old;
1017 		}
1018 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1019 			strcat(ifa->ifa_label, dot);
1020 		else
1021 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1022 skip:
1023 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1024 	}
1025 }
1026 
1027 static inline bool inetdev_valid_mtu(unsigned mtu)
1028 {
1029 	return mtu >= 68;
1030 }
1031 
1032 /* Called only under RTNL semaphore */
1033 
1034 static int inetdev_event(struct notifier_block *this, unsigned long event,
1035 			 void *ptr)
1036 {
1037 	struct net_device *dev = ptr;
1038 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1039 
1040 	ASSERT_RTNL();
1041 
1042 	if (!in_dev) {
1043 		if (event == NETDEV_REGISTER) {
1044 			in_dev = inetdev_init(dev);
1045 			if (!in_dev)
1046 				return notifier_from_errno(-ENOMEM);
1047 			if (dev->flags & IFF_LOOPBACK) {
1048 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1049 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1050 			}
1051 		} else if (event == NETDEV_CHANGEMTU) {
1052 			/* Re-enabling IP */
1053 			if (inetdev_valid_mtu(dev->mtu))
1054 				in_dev = inetdev_init(dev);
1055 		}
1056 		goto out;
1057 	}
1058 
1059 	switch (event) {
1060 	case NETDEV_REGISTER:
1061 		printk(KERN_DEBUG "inetdev_event: bug\n");
1062 		dev->ip_ptr = NULL;
1063 		break;
1064 	case NETDEV_UP:
1065 		if (!inetdev_valid_mtu(dev->mtu))
1066 			break;
1067 		if (dev->flags & IFF_LOOPBACK) {
1068 			struct in_ifaddr *ifa = inet_alloc_ifa();
1069 
1070 			if (ifa) {
1071 				ifa->ifa_local =
1072 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1073 				ifa->ifa_prefixlen = 8;
1074 				ifa->ifa_mask = inet_make_mask(8);
1075 				in_dev_hold(in_dev);
1076 				ifa->ifa_dev = in_dev;
1077 				ifa->ifa_scope = RT_SCOPE_HOST;
1078 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1079 				inet_insert_ifa(ifa);
1080 			}
1081 		}
1082 		ip_mc_up(in_dev);
1083 		/* fall through */
1084 	case NETDEV_NOTIFY_PEERS:
1085 	case NETDEV_CHANGEADDR:
1086 		/* Send gratuitous ARP to notify of link change */
1087 		if (IN_DEV_ARP_NOTIFY(in_dev)) {
1088 			struct in_ifaddr *ifa = in_dev->ifa_list;
1089 
1090 			if (ifa)
1091 				arp_send(ARPOP_REQUEST, ETH_P_ARP,
1092 					 ifa->ifa_address, dev,
1093 					 ifa->ifa_address, NULL,
1094 					 dev->dev_addr, NULL);
1095 		}
1096 		break;
1097 	case NETDEV_DOWN:
1098 		ip_mc_down(in_dev);
1099 		break;
1100 	case NETDEV_PRE_TYPE_CHANGE:
1101 		ip_mc_unmap(in_dev);
1102 		break;
1103 	case NETDEV_POST_TYPE_CHANGE:
1104 		ip_mc_remap(in_dev);
1105 		break;
1106 	case NETDEV_CHANGEMTU:
1107 		if (inetdev_valid_mtu(dev->mtu))
1108 			break;
1109 		/* disable IP when MTU is not enough */
1110 	case NETDEV_UNREGISTER:
1111 		inetdev_destroy(in_dev);
1112 		break;
1113 	case NETDEV_CHANGENAME:
1114 		/* Do not notify about label change, this event is
1115 		 * not interesting to applications using netlink.
1116 		 */
1117 		inetdev_changename(dev, in_dev);
1118 
1119 		devinet_sysctl_unregister(in_dev);
1120 		devinet_sysctl_register(in_dev);
1121 		break;
1122 	}
1123 out:
1124 	return NOTIFY_DONE;
1125 }
1126 
1127 static struct notifier_block ip_netdev_notifier = {
1128 	.notifier_call = inetdev_event,
1129 };
1130 
1131 static inline size_t inet_nlmsg_size(void)
1132 {
1133 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1134 	       + nla_total_size(4) /* IFA_ADDRESS */
1135 	       + nla_total_size(4) /* IFA_LOCAL */
1136 	       + nla_total_size(4) /* IFA_BROADCAST */
1137 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1138 }
1139 
1140 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1141 			    u32 pid, u32 seq, int event, unsigned int flags)
1142 {
1143 	struct ifaddrmsg *ifm;
1144 	struct nlmsghdr  *nlh;
1145 
1146 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1147 	if (nlh == NULL)
1148 		return -EMSGSIZE;
1149 
1150 	ifm = nlmsg_data(nlh);
1151 	ifm->ifa_family = AF_INET;
1152 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1153 	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1154 	ifm->ifa_scope = ifa->ifa_scope;
1155 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1156 
1157 	if (ifa->ifa_address)
1158 		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1159 
1160 	if (ifa->ifa_local)
1161 		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1162 
1163 	if (ifa->ifa_broadcast)
1164 		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1165 
1166 	if (ifa->ifa_label[0])
1167 		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1168 
1169 	return nlmsg_end(skb, nlh);
1170 
1171 nla_put_failure:
1172 	nlmsg_cancel(skb, nlh);
1173 	return -EMSGSIZE;
1174 }
1175 
1176 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1177 {
1178 	struct net *net = sock_net(skb->sk);
1179 	int h, s_h;
1180 	int idx, s_idx;
1181 	int ip_idx, s_ip_idx;
1182 	struct net_device *dev;
1183 	struct in_device *in_dev;
1184 	struct in_ifaddr *ifa;
1185 	struct hlist_head *head;
1186 	struct hlist_node *node;
1187 
1188 	s_h = cb->args[0];
1189 	s_idx = idx = cb->args[1];
1190 	s_ip_idx = ip_idx = cb->args[2];
1191 
1192 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1193 		idx = 0;
1194 		head = &net->dev_index_head[h];
1195 		rcu_read_lock();
1196 		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1197 			if (idx < s_idx)
1198 				goto cont;
1199 			if (h > s_h || idx > s_idx)
1200 				s_ip_idx = 0;
1201 			in_dev = __in_dev_get_rcu(dev);
1202 			if (!in_dev)
1203 				goto cont;
1204 
1205 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1206 			     ifa = ifa->ifa_next, ip_idx++) {
1207 				if (ip_idx < s_ip_idx)
1208 					continue;
1209 				if (inet_fill_ifaddr(skb, ifa,
1210 					     NETLINK_CB(cb->skb).pid,
1211 					     cb->nlh->nlmsg_seq,
1212 					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1213 					rcu_read_unlock();
1214 					goto done;
1215 				}
1216 			}
1217 cont:
1218 			idx++;
1219 		}
1220 		rcu_read_unlock();
1221 	}
1222 
1223 done:
1224 	cb->args[0] = h;
1225 	cb->args[1] = idx;
1226 	cb->args[2] = ip_idx;
1227 
1228 	return skb->len;
1229 }
1230 
1231 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1232 		      u32 pid)
1233 {
1234 	struct sk_buff *skb;
1235 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1236 	int err = -ENOBUFS;
1237 	struct net *net;
1238 
1239 	net = dev_net(ifa->ifa_dev->dev);
1240 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1241 	if (skb == NULL)
1242 		goto errout;
1243 
1244 	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1245 	if (err < 0) {
1246 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1247 		WARN_ON(err == -EMSGSIZE);
1248 		kfree_skb(skb);
1249 		goto errout;
1250 	}
1251 	rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1252 	return;
1253 errout:
1254 	if (err < 0)
1255 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1256 }
1257 
1258 #ifdef CONFIG_SYSCTL
1259 
1260 static void devinet_copy_dflt_conf(struct net *net, int i)
1261 {
1262 	struct net_device *dev;
1263 
1264 	rcu_read_lock();
1265 	for_each_netdev_rcu(net, dev) {
1266 		struct in_device *in_dev;
1267 
1268 		in_dev = __in_dev_get_rcu(dev);
1269 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1270 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1271 	}
1272 	rcu_read_unlock();
1273 }
1274 
1275 /* called with RTNL locked */
1276 static void inet_forward_change(struct net *net)
1277 {
1278 	struct net_device *dev;
1279 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1280 
1281 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1282 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1283 
1284 	for_each_netdev(net, dev) {
1285 		struct in_device *in_dev;
1286 		if (on)
1287 			dev_disable_lro(dev);
1288 		rcu_read_lock();
1289 		in_dev = __in_dev_get_rcu(dev);
1290 		if (in_dev)
1291 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1292 		rcu_read_unlock();
1293 	}
1294 }
1295 
1296 static int devinet_conf_proc(ctl_table *ctl, int write,
1297 			     void __user *buffer,
1298 			     size_t *lenp, loff_t *ppos)
1299 {
1300 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1301 
1302 	if (write) {
1303 		struct ipv4_devconf *cnf = ctl->extra1;
1304 		struct net *net = ctl->extra2;
1305 		int i = (int *)ctl->data - cnf->data;
1306 
1307 		set_bit(i, cnf->state);
1308 
1309 		if (cnf == net->ipv4.devconf_dflt)
1310 			devinet_copy_dflt_conf(net, i);
1311 	}
1312 
1313 	return ret;
1314 }
1315 
1316 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1317 				  void __user *buffer,
1318 				  size_t *lenp, loff_t *ppos)
1319 {
1320 	int *valp = ctl->data;
1321 	int val = *valp;
1322 	loff_t pos = *ppos;
1323 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1324 
1325 	if (write && *valp != val) {
1326 		struct net *net = ctl->extra2;
1327 
1328 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1329 			if (!rtnl_trylock()) {
1330 				/* Restore the original values before restarting */
1331 				*valp = val;
1332 				*ppos = pos;
1333 				return restart_syscall();
1334 			}
1335 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1336 				inet_forward_change(net);
1337 			} else if (*valp) {
1338 				struct ipv4_devconf *cnf = ctl->extra1;
1339 				struct in_device *idev =
1340 					container_of(cnf, struct in_device, cnf);
1341 				dev_disable_lro(idev->dev);
1342 			}
1343 			rtnl_unlock();
1344 			rt_cache_flush(net, 0);
1345 		}
1346 	}
1347 
1348 	return ret;
1349 }
1350 
1351 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1352 			 void __user *buffer,
1353 			 size_t *lenp, loff_t *ppos)
1354 {
1355 	int *valp = ctl->data;
1356 	int val = *valp;
1357 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1358 	struct net *net = ctl->extra2;
1359 
1360 	if (write && *valp != val)
1361 		rt_cache_flush(net, 0);
1362 
1363 	return ret;
1364 }
1365 
1366 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1367 	{ \
1368 		.procname	= name, \
1369 		.data		= ipv4_devconf.data + \
1370 				  IPV4_DEVCONF_ ## attr - 1, \
1371 		.maxlen		= sizeof(int), \
1372 		.mode		= mval, \
1373 		.proc_handler	= proc, \
1374 		.extra1		= &ipv4_devconf, \
1375 	}
1376 
1377 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1378 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1379 
1380 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1381 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1382 
1383 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1384 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1385 
1386 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1387 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1388 
1389 static struct devinet_sysctl_table {
1390 	struct ctl_table_header *sysctl_header;
1391 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1392 	char *dev_name;
1393 } devinet_sysctl = {
1394 	.devinet_vars = {
1395 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1396 					     devinet_sysctl_forward),
1397 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1398 
1399 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1400 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1401 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1402 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1403 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1404 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1405 					"accept_source_route"),
1406 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1407 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1408 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1409 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1410 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1411 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1412 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1413 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1414 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1415 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1416 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1417 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1418 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1419 
1420 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1421 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1422 		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1423 					      "force_igmp_version"),
1424 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1425 					      "promote_secondaries"),
1426 	},
1427 };
1428 
1429 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1430 					struct ipv4_devconf *p)
1431 {
1432 	int i;
1433 	struct devinet_sysctl_table *t;
1434 
1435 #define DEVINET_CTL_PATH_DEV	3
1436 
1437 	struct ctl_path devinet_ctl_path[] = {
1438 		{ .procname = "net",  },
1439 		{ .procname = "ipv4", },
1440 		{ .procname = "conf", },
1441 		{ /* to be set */ },
1442 		{ },
1443 	};
1444 
1445 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1446 	if (!t)
1447 		goto out;
1448 
1449 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1450 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1451 		t->devinet_vars[i].extra1 = p;
1452 		t->devinet_vars[i].extra2 = net;
1453 	}
1454 
1455 	/*
1456 	 * Make a copy of dev_name, because '.procname' is regarded as const
1457 	 * by sysctl and we wouldn't want anyone to change it under our feet
1458 	 * (see SIOCSIFNAME).
1459 	 */
1460 	t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1461 	if (!t->dev_name)
1462 		goto free;
1463 
1464 	devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1465 
1466 	t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1467 			t->devinet_vars);
1468 	if (!t->sysctl_header)
1469 		goto free_procname;
1470 
1471 	p->sysctl = t;
1472 	return 0;
1473 
1474 free_procname:
1475 	kfree(t->dev_name);
1476 free:
1477 	kfree(t);
1478 out:
1479 	return -ENOBUFS;
1480 }
1481 
1482 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1483 {
1484 	struct devinet_sysctl_table *t = cnf->sysctl;
1485 
1486 	if (t == NULL)
1487 		return;
1488 
1489 	cnf->sysctl = NULL;
1490 	unregister_sysctl_table(t->sysctl_header);
1491 	kfree(t->dev_name);
1492 	kfree(t);
1493 }
1494 
1495 static void devinet_sysctl_register(struct in_device *idev)
1496 {
1497 	neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1498 	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1499 					&idev->cnf);
1500 }
1501 
1502 static void devinet_sysctl_unregister(struct in_device *idev)
1503 {
1504 	__devinet_sysctl_unregister(&idev->cnf);
1505 	neigh_sysctl_unregister(idev->arp_parms);
1506 }
1507 
1508 static struct ctl_table ctl_forward_entry[] = {
1509 	{
1510 		.procname	= "ip_forward",
1511 		.data		= &ipv4_devconf.data[
1512 					IPV4_DEVCONF_FORWARDING - 1],
1513 		.maxlen		= sizeof(int),
1514 		.mode		= 0644,
1515 		.proc_handler	= devinet_sysctl_forward,
1516 		.extra1		= &ipv4_devconf,
1517 		.extra2		= &init_net,
1518 	},
1519 	{ },
1520 };
1521 
1522 static __net_initdata struct ctl_path net_ipv4_path[] = {
1523 	{ .procname = "net", },
1524 	{ .procname = "ipv4", },
1525 	{ },
1526 };
1527 #endif
1528 
1529 static __net_init int devinet_init_net(struct net *net)
1530 {
1531 	int err;
1532 	struct ipv4_devconf *all, *dflt;
1533 #ifdef CONFIG_SYSCTL
1534 	struct ctl_table *tbl = ctl_forward_entry;
1535 	struct ctl_table_header *forw_hdr;
1536 #endif
1537 
1538 	err = -ENOMEM;
1539 	all = &ipv4_devconf;
1540 	dflt = &ipv4_devconf_dflt;
1541 
1542 	if (!net_eq(net, &init_net)) {
1543 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1544 		if (all == NULL)
1545 			goto err_alloc_all;
1546 
1547 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1548 		if (dflt == NULL)
1549 			goto err_alloc_dflt;
1550 
1551 #ifdef CONFIG_SYSCTL
1552 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1553 		if (tbl == NULL)
1554 			goto err_alloc_ctl;
1555 
1556 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1557 		tbl[0].extra1 = all;
1558 		tbl[0].extra2 = net;
1559 #endif
1560 	}
1561 
1562 #ifdef CONFIG_SYSCTL
1563 	err = __devinet_sysctl_register(net, "all", all);
1564 	if (err < 0)
1565 		goto err_reg_all;
1566 
1567 	err = __devinet_sysctl_register(net, "default", dflt);
1568 	if (err < 0)
1569 		goto err_reg_dflt;
1570 
1571 	err = -ENOMEM;
1572 	forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1573 	if (forw_hdr == NULL)
1574 		goto err_reg_ctl;
1575 	net->ipv4.forw_hdr = forw_hdr;
1576 #endif
1577 
1578 	net->ipv4.devconf_all = all;
1579 	net->ipv4.devconf_dflt = dflt;
1580 	return 0;
1581 
1582 #ifdef CONFIG_SYSCTL
1583 err_reg_ctl:
1584 	__devinet_sysctl_unregister(dflt);
1585 err_reg_dflt:
1586 	__devinet_sysctl_unregister(all);
1587 err_reg_all:
1588 	if (tbl != ctl_forward_entry)
1589 		kfree(tbl);
1590 err_alloc_ctl:
1591 #endif
1592 	if (dflt != &ipv4_devconf_dflt)
1593 		kfree(dflt);
1594 err_alloc_dflt:
1595 	if (all != &ipv4_devconf)
1596 		kfree(all);
1597 err_alloc_all:
1598 	return err;
1599 }
1600 
1601 static __net_exit void devinet_exit_net(struct net *net)
1602 {
1603 #ifdef CONFIG_SYSCTL
1604 	struct ctl_table *tbl;
1605 
1606 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
1607 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
1608 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1609 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
1610 	kfree(tbl);
1611 #endif
1612 	kfree(net->ipv4.devconf_dflt);
1613 	kfree(net->ipv4.devconf_all);
1614 }
1615 
1616 static __net_initdata struct pernet_operations devinet_ops = {
1617 	.init = devinet_init_net,
1618 	.exit = devinet_exit_net,
1619 };
1620 
1621 void __init devinet_init(void)
1622 {
1623 	register_pernet_subsys(&devinet_ops);
1624 
1625 	register_gifconf(PF_INET, inet_gifconf);
1626 	register_netdevice_notifier(&ip_netdev_notifier);
1627 
1628 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1629 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1630 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1631 }
1632 
1633