xref: /linux/net/ipv4/devinet.c (revision f3a8b6645dc2e60d11f20c1c23afd964ff4e55ae)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59 
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67 
68 #include "fib_lookup.h"
69 
70 static struct ipv4_devconf ipv4_devconf = {
71 	.data = {
72 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78 	},
79 };
80 
81 static struct ipv4_devconf ipv4_devconf_dflt = {
82 	.data = {
83 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
86 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
87 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
90 	},
91 };
92 
93 #define IPV4_DEVCONF_DFLT(net, attr) \
94 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
95 
96 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
97 	[IFA_LOCAL]     	= { .type = NLA_U32 },
98 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
99 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
100 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
102 	[IFA_FLAGS]		= { .type = NLA_U32 },
103 };
104 
105 #define IN4_ADDR_HSIZE_SHIFT	8
106 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
107 
108 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
109 
110 static u32 inet_addr_hash(const struct net *net, __be32 addr)
111 {
112 	u32 val = (__force u32) addr ^ net_hash_mix(net);
113 
114 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
115 }
116 
117 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
118 {
119 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
120 
121 	ASSERT_RTNL();
122 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
123 }
124 
125 static void inet_hash_remove(struct in_ifaddr *ifa)
126 {
127 	ASSERT_RTNL();
128 	hlist_del_init_rcu(&ifa->hash);
129 }
130 
131 /**
132  * __ip_dev_find - find the first device with a given source address.
133  * @net: the net namespace
134  * @addr: the source address
135  * @devref: if true, take a reference on the found device
136  *
137  * If a caller uses devref=false, it should be protected by RCU, or RTNL
138  */
139 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
140 {
141 	u32 hash = inet_addr_hash(net, addr);
142 	struct net_device *result = NULL;
143 	struct in_ifaddr *ifa;
144 
145 	rcu_read_lock();
146 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
147 		if (ifa->ifa_local == addr) {
148 			struct net_device *dev = ifa->ifa_dev->dev;
149 
150 			if (!net_eq(dev_net(dev), net))
151 				continue;
152 			result = dev;
153 			break;
154 		}
155 	}
156 	if (!result) {
157 		struct flowi4 fl4 = { .daddr = addr };
158 		struct fib_result res = { 0 };
159 		struct fib_table *local;
160 
161 		/* Fallback to FIB local table so that communication
162 		 * over loopback subnets work.
163 		 */
164 		local = fib_get_table(net, RT_TABLE_LOCAL);
165 		if (local &&
166 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
167 		    res.type == RTN_LOCAL)
168 			result = FIB_RES_DEV(res);
169 	}
170 	if (result && devref)
171 		dev_hold(result);
172 	rcu_read_unlock();
173 	return result;
174 }
175 EXPORT_SYMBOL(__ip_dev_find);
176 
177 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
178 
179 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
180 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
181 			 int destroy);
182 #ifdef CONFIG_SYSCTL
183 static int devinet_sysctl_register(struct in_device *idev);
184 static void devinet_sysctl_unregister(struct in_device *idev);
185 #else
186 static int devinet_sysctl_register(struct in_device *idev)
187 {
188 	return 0;
189 }
190 static void devinet_sysctl_unregister(struct in_device *idev)
191 {
192 }
193 #endif
194 
195 /* Locks all the inet devices. */
196 
197 static struct in_ifaddr *inet_alloc_ifa(void)
198 {
199 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
200 }
201 
202 static void inet_rcu_free_ifa(struct rcu_head *head)
203 {
204 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
205 	if (ifa->ifa_dev)
206 		in_dev_put(ifa->ifa_dev);
207 	kfree(ifa);
208 }
209 
210 static void inet_free_ifa(struct in_ifaddr *ifa)
211 {
212 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
213 }
214 
215 void in_dev_finish_destroy(struct in_device *idev)
216 {
217 	struct net_device *dev = idev->dev;
218 
219 	WARN_ON(idev->ifa_list);
220 	WARN_ON(idev->mc_list);
221 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
222 #ifdef NET_REFCNT_DEBUG
223 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
224 #endif
225 	dev_put(dev);
226 	if (!idev->dead)
227 		pr_err("Freeing alive in_device %p\n", idev);
228 	else
229 		kfree(idev);
230 }
231 EXPORT_SYMBOL(in_dev_finish_destroy);
232 
233 static struct in_device *inetdev_init(struct net_device *dev)
234 {
235 	struct in_device *in_dev;
236 	int err = -ENOMEM;
237 
238 	ASSERT_RTNL();
239 
240 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
241 	if (!in_dev)
242 		goto out;
243 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
244 			sizeof(in_dev->cnf));
245 	in_dev->cnf.sysctl = NULL;
246 	in_dev->dev = dev;
247 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
248 	if (!in_dev->arp_parms)
249 		goto out_kfree;
250 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
251 		dev_disable_lro(dev);
252 	/* Reference in_dev->dev */
253 	dev_hold(dev);
254 	/* Account for reference dev->ip_ptr (below) */
255 	in_dev_hold(in_dev);
256 
257 	err = devinet_sysctl_register(in_dev);
258 	if (err) {
259 		in_dev->dead = 1;
260 		in_dev_put(in_dev);
261 		in_dev = NULL;
262 		goto out;
263 	}
264 	ip_mc_init_dev(in_dev);
265 	if (dev->flags & IFF_UP)
266 		ip_mc_up(in_dev);
267 
268 	/* we can receive as soon as ip_ptr is set -- do this last */
269 	rcu_assign_pointer(dev->ip_ptr, in_dev);
270 out:
271 	return in_dev ?: ERR_PTR(err);
272 out_kfree:
273 	kfree(in_dev);
274 	in_dev = NULL;
275 	goto out;
276 }
277 
278 static void in_dev_rcu_put(struct rcu_head *head)
279 {
280 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
281 	in_dev_put(idev);
282 }
283 
284 static void inetdev_destroy(struct in_device *in_dev)
285 {
286 	struct in_ifaddr *ifa;
287 	struct net_device *dev;
288 
289 	ASSERT_RTNL();
290 
291 	dev = in_dev->dev;
292 
293 	in_dev->dead = 1;
294 
295 	ip_mc_destroy_dev(in_dev);
296 
297 	while ((ifa = in_dev->ifa_list) != NULL) {
298 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
299 		inet_free_ifa(ifa);
300 	}
301 
302 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
303 
304 	devinet_sysctl_unregister(in_dev);
305 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
306 	arp_ifdown(dev);
307 
308 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
309 }
310 
311 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
312 {
313 	rcu_read_lock();
314 	for_primary_ifa(in_dev) {
315 		if (inet_ifa_match(a, ifa)) {
316 			if (!b || inet_ifa_match(b, ifa)) {
317 				rcu_read_unlock();
318 				return 1;
319 			}
320 		}
321 	} endfor_ifa(in_dev);
322 	rcu_read_unlock();
323 	return 0;
324 }
325 
326 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
327 			 int destroy, struct nlmsghdr *nlh, u32 portid)
328 {
329 	struct in_ifaddr *promote = NULL;
330 	struct in_ifaddr *ifa, *ifa1 = *ifap;
331 	struct in_ifaddr *last_prim = in_dev->ifa_list;
332 	struct in_ifaddr *prev_prom = NULL;
333 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
334 
335 	ASSERT_RTNL();
336 
337 	if (in_dev->dead)
338 		goto no_promotions;
339 
340 	/* 1. Deleting primary ifaddr forces deletion all secondaries
341 	 * unless alias promotion is set
342 	 **/
343 
344 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
345 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
346 
347 		while ((ifa = *ifap1) != NULL) {
348 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
349 			    ifa1->ifa_scope <= ifa->ifa_scope)
350 				last_prim = ifa;
351 
352 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
353 			    ifa1->ifa_mask != ifa->ifa_mask ||
354 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
355 				ifap1 = &ifa->ifa_next;
356 				prev_prom = ifa;
357 				continue;
358 			}
359 
360 			if (!do_promote) {
361 				inet_hash_remove(ifa);
362 				*ifap1 = ifa->ifa_next;
363 
364 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
365 				blocking_notifier_call_chain(&inetaddr_chain,
366 						NETDEV_DOWN, ifa);
367 				inet_free_ifa(ifa);
368 			} else {
369 				promote = ifa;
370 				break;
371 			}
372 		}
373 	}
374 
375 	/* On promotion all secondaries from subnet are changing
376 	 * the primary IP, we must remove all their routes silently
377 	 * and later to add them back with new prefsrc. Do this
378 	 * while all addresses are on the device list.
379 	 */
380 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
381 		if (ifa1->ifa_mask == ifa->ifa_mask &&
382 		    inet_ifa_match(ifa1->ifa_address, ifa))
383 			fib_del_ifaddr(ifa, ifa1);
384 	}
385 
386 no_promotions:
387 	/* 2. Unlink it */
388 
389 	*ifap = ifa1->ifa_next;
390 	inet_hash_remove(ifa1);
391 
392 	/* 3. Announce address deletion */
393 
394 	/* Send message first, then call notifier.
395 	   At first sight, FIB update triggered by notifier
396 	   will refer to already deleted ifaddr, that could confuse
397 	   netlink listeners. It is not true: look, gated sees
398 	   that route deleted and if it still thinks that ifaddr
399 	   is valid, it will try to restore deleted routes... Grr.
400 	   So that, this order is correct.
401 	 */
402 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
403 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
404 
405 	if (promote) {
406 		struct in_ifaddr *next_sec = promote->ifa_next;
407 
408 		if (prev_prom) {
409 			prev_prom->ifa_next = promote->ifa_next;
410 			promote->ifa_next = last_prim->ifa_next;
411 			last_prim->ifa_next = promote;
412 		}
413 
414 		promote->ifa_flags &= ~IFA_F_SECONDARY;
415 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
416 		blocking_notifier_call_chain(&inetaddr_chain,
417 				NETDEV_UP, promote);
418 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
419 			if (ifa1->ifa_mask != ifa->ifa_mask ||
420 			    !inet_ifa_match(ifa1->ifa_address, ifa))
421 					continue;
422 			fib_add_ifaddr(ifa);
423 		}
424 
425 	}
426 	if (destroy)
427 		inet_free_ifa(ifa1);
428 }
429 
430 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
431 			 int destroy)
432 {
433 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
434 }
435 
436 static void check_lifetime(struct work_struct *work);
437 
438 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
439 
440 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
441 			     u32 portid)
442 {
443 	struct in_device *in_dev = ifa->ifa_dev;
444 	struct in_ifaddr *ifa1, **ifap, **last_primary;
445 
446 	ASSERT_RTNL();
447 
448 	if (!ifa->ifa_local) {
449 		inet_free_ifa(ifa);
450 		return 0;
451 	}
452 
453 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
454 	last_primary = &in_dev->ifa_list;
455 
456 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
457 	     ifap = &ifa1->ifa_next) {
458 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
459 		    ifa->ifa_scope <= ifa1->ifa_scope)
460 			last_primary = &ifa1->ifa_next;
461 		if (ifa1->ifa_mask == ifa->ifa_mask &&
462 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
463 			if (ifa1->ifa_local == ifa->ifa_local) {
464 				inet_free_ifa(ifa);
465 				return -EEXIST;
466 			}
467 			if (ifa1->ifa_scope != ifa->ifa_scope) {
468 				inet_free_ifa(ifa);
469 				return -EINVAL;
470 			}
471 			ifa->ifa_flags |= IFA_F_SECONDARY;
472 		}
473 	}
474 
475 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
476 		prandom_seed((__force u32) ifa->ifa_local);
477 		ifap = last_primary;
478 	}
479 
480 	ifa->ifa_next = *ifap;
481 	*ifap = ifa;
482 
483 	inet_hash_insert(dev_net(in_dev->dev), ifa);
484 
485 	cancel_delayed_work(&check_lifetime_work);
486 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
487 
488 	/* Send message first, then call notifier.
489 	   Notifier will trigger FIB update, so that
490 	   listeners of netlink will know about new ifaddr */
491 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
492 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
493 
494 	return 0;
495 }
496 
497 static int inet_insert_ifa(struct in_ifaddr *ifa)
498 {
499 	return __inet_insert_ifa(ifa, NULL, 0);
500 }
501 
502 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
503 {
504 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
505 
506 	ASSERT_RTNL();
507 
508 	if (!in_dev) {
509 		inet_free_ifa(ifa);
510 		return -ENOBUFS;
511 	}
512 	ipv4_devconf_setall(in_dev);
513 	neigh_parms_data_state_setall(in_dev->arp_parms);
514 	if (ifa->ifa_dev != in_dev) {
515 		WARN_ON(ifa->ifa_dev);
516 		in_dev_hold(in_dev);
517 		ifa->ifa_dev = in_dev;
518 	}
519 	if (ipv4_is_loopback(ifa->ifa_local))
520 		ifa->ifa_scope = RT_SCOPE_HOST;
521 	return inet_insert_ifa(ifa);
522 }
523 
524 /* Caller must hold RCU or RTNL :
525  * We dont take a reference on found in_device
526  */
527 struct in_device *inetdev_by_index(struct net *net, int ifindex)
528 {
529 	struct net_device *dev;
530 	struct in_device *in_dev = NULL;
531 
532 	rcu_read_lock();
533 	dev = dev_get_by_index_rcu(net, ifindex);
534 	if (dev)
535 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
536 	rcu_read_unlock();
537 	return in_dev;
538 }
539 EXPORT_SYMBOL(inetdev_by_index);
540 
541 /* Called only from RTNL semaphored context. No locks. */
542 
543 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
544 				    __be32 mask)
545 {
546 	ASSERT_RTNL();
547 
548 	for_primary_ifa(in_dev) {
549 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
550 			return ifa;
551 	} endfor_ifa(in_dev);
552 	return NULL;
553 }
554 
555 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
556 {
557 	struct ip_mreqn mreq = {
558 		.imr_multiaddr.s_addr = ifa->ifa_address,
559 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
560 	};
561 	int ret;
562 
563 	ASSERT_RTNL();
564 
565 	lock_sock(sk);
566 	if (join)
567 		ret = ip_mc_join_group(sk, &mreq);
568 	else
569 		ret = ip_mc_leave_group(sk, &mreq);
570 	release_sock(sk);
571 
572 	return ret;
573 }
574 
575 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
576 {
577 	struct net *net = sock_net(skb->sk);
578 	struct nlattr *tb[IFA_MAX+1];
579 	struct in_device *in_dev;
580 	struct ifaddrmsg *ifm;
581 	struct in_ifaddr *ifa, **ifap;
582 	int err = -EINVAL;
583 
584 	ASSERT_RTNL();
585 
586 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
587 	if (err < 0)
588 		goto errout;
589 
590 	ifm = nlmsg_data(nlh);
591 	in_dev = inetdev_by_index(net, ifm->ifa_index);
592 	if (!in_dev) {
593 		err = -ENODEV;
594 		goto errout;
595 	}
596 
597 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
598 	     ifap = &ifa->ifa_next) {
599 		if (tb[IFA_LOCAL] &&
600 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
601 			continue;
602 
603 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
604 			continue;
605 
606 		if (tb[IFA_ADDRESS] &&
607 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
608 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
609 			continue;
610 
611 		if (ipv4_is_multicast(ifa->ifa_address))
612 			ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
613 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
614 		return 0;
615 	}
616 
617 	err = -EADDRNOTAVAIL;
618 errout:
619 	return err;
620 }
621 
622 #define INFINITY_LIFE_TIME	0xFFFFFFFF
623 
624 static void check_lifetime(struct work_struct *work)
625 {
626 	unsigned long now, next, next_sec, next_sched;
627 	struct in_ifaddr *ifa;
628 	struct hlist_node *n;
629 	int i;
630 
631 	now = jiffies;
632 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
633 
634 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
635 		bool change_needed = false;
636 
637 		rcu_read_lock();
638 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
639 			unsigned long age;
640 
641 			if (ifa->ifa_flags & IFA_F_PERMANENT)
642 				continue;
643 
644 			/* We try to batch several events at once. */
645 			age = (now - ifa->ifa_tstamp +
646 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
647 
648 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
649 			    age >= ifa->ifa_valid_lft) {
650 				change_needed = true;
651 			} else if (ifa->ifa_preferred_lft ==
652 				   INFINITY_LIFE_TIME) {
653 				continue;
654 			} else if (age >= ifa->ifa_preferred_lft) {
655 				if (time_before(ifa->ifa_tstamp +
656 						ifa->ifa_valid_lft * HZ, next))
657 					next = ifa->ifa_tstamp +
658 					       ifa->ifa_valid_lft * HZ;
659 
660 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
661 					change_needed = true;
662 			} else if (time_before(ifa->ifa_tstamp +
663 					       ifa->ifa_preferred_lft * HZ,
664 					       next)) {
665 				next = ifa->ifa_tstamp +
666 				       ifa->ifa_preferred_lft * HZ;
667 			}
668 		}
669 		rcu_read_unlock();
670 		if (!change_needed)
671 			continue;
672 		rtnl_lock();
673 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
674 			unsigned long age;
675 
676 			if (ifa->ifa_flags & IFA_F_PERMANENT)
677 				continue;
678 
679 			/* We try to batch several events at once. */
680 			age = (now - ifa->ifa_tstamp +
681 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
682 
683 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
684 			    age >= ifa->ifa_valid_lft) {
685 				struct in_ifaddr **ifap;
686 
687 				for (ifap = &ifa->ifa_dev->ifa_list;
688 				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
689 					if (*ifap == ifa) {
690 						inet_del_ifa(ifa->ifa_dev,
691 							     ifap, 1);
692 						break;
693 					}
694 				}
695 			} else if (ifa->ifa_preferred_lft !=
696 				   INFINITY_LIFE_TIME &&
697 				   age >= ifa->ifa_preferred_lft &&
698 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
699 				ifa->ifa_flags |= IFA_F_DEPRECATED;
700 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
701 			}
702 		}
703 		rtnl_unlock();
704 	}
705 
706 	next_sec = round_jiffies_up(next);
707 	next_sched = next;
708 
709 	/* If rounded timeout is accurate enough, accept it. */
710 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
711 		next_sched = next_sec;
712 
713 	now = jiffies;
714 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
715 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
716 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
717 
718 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
719 			next_sched - now);
720 }
721 
722 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
723 			     __u32 prefered_lft)
724 {
725 	unsigned long timeout;
726 
727 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
728 
729 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
730 	if (addrconf_finite_timeout(timeout))
731 		ifa->ifa_valid_lft = timeout;
732 	else
733 		ifa->ifa_flags |= IFA_F_PERMANENT;
734 
735 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
736 	if (addrconf_finite_timeout(timeout)) {
737 		if (timeout == 0)
738 			ifa->ifa_flags |= IFA_F_DEPRECATED;
739 		ifa->ifa_preferred_lft = timeout;
740 	}
741 	ifa->ifa_tstamp = jiffies;
742 	if (!ifa->ifa_cstamp)
743 		ifa->ifa_cstamp = ifa->ifa_tstamp;
744 }
745 
746 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
747 				       __u32 *pvalid_lft, __u32 *pprefered_lft)
748 {
749 	struct nlattr *tb[IFA_MAX+1];
750 	struct in_ifaddr *ifa;
751 	struct ifaddrmsg *ifm;
752 	struct net_device *dev;
753 	struct in_device *in_dev;
754 	int err;
755 
756 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
757 	if (err < 0)
758 		goto errout;
759 
760 	ifm = nlmsg_data(nlh);
761 	err = -EINVAL;
762 	if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
763 		goto errout;
764 
765 	dev = __dev_get_by_index(net, ifm->ifa_index);
766 	err = -ENODEV;
767 	if (!dev)
768 		goto errout;
769 
770 	in_dev = __in_dev_get_rtnl(dev);
771 	err = -ENOBUFS;
772 	if (!in_dev)
773 		goto errout;
774 
775 	ifa = inet_alloc_ifa();
776 	if (!ifa)
777 		/*
778 		 * A potential indev allocation can be left alive, it stays
779 		 * assigned to its device and is destroy with it.
780 		 */
781 		goto errout;
782 
783 	ipv4_devconf_setall(in_dev);
784 	neigh_parms_data_state_setall(in_dev->arp_parms);
785 	in_dev_hold(in_dev);
786 
787 	if (!tb[IFA_ADDRESS])
788 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
789 
790 	INIT_HLIST_NODE(&ifa->hash);
791 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
792 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
793 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
794 					 ifm->ifa_flags;
795 	ifa->ifa_scope = ifm->ifa_scope;
796 	ifa->ifa_dev = in_dev;
797 
798 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
799 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
800 
801 	if (tb[IFA_BROADCAST])
802 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
803 
804 	if (tb[IFA_LABEL])
805 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
806 	else
807 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
808 
809 	if (tb[IFA_CACHEINFO]) {
810 		struct ifa_cacheinfo *ci;
811 
812 		ci = nla_data(tb[IFA_CACHEINFO]);
813 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
814 			err = -EINVAL;
815 			goto errout_free;
816 		}
817 		*pvalid_lft = ci->ifa_valid;
818 		*pprefered_lft = ci->ifa_prefered;
819 	}
820 
821 	return ifa;
822 
823 errout_free:
824 	inet_free_ifa(ifa);
825 errout:
826 	return ERR_PTR(err);
827 }
828 
829 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
830 {
831 	struct in_device *in_dev = ifa->ifa_dev;
832 	struct in_ifaddr *ifa1, **ifap;
833 
834 	if (!ifa->ifa_local)
835 		return NULL;
836 
837 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
838 	     ifap = &ifa1->ifa_next) {
839 		if (ifa1->ifa_mask == ifa->ifa_mask &&
840 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
841 		    ifa1->ifa_local == ifa->ifa_local)
842 			return ifa1;
843 	}
844 	return NULL;
845 }
846 
847 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
848 {
849 	struct net *net = sock_net(skb->sk);
850 	struct in_ifaddr *ifa;
851 	struct in_ifaddr *ifa_existing;
852 	__u32 valid_lft = INFINITY_LIFE_TIME;
853 	__u32 prefered_lft = INFINITY_LIFE_TIME;
854 
855 	ASSERT_RTNL();
856 
857 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
858 	if (IS_ERR(ifa))
859 		return PTR_ERR(ifa);
860 
861 	ifa_existing = find_matching_ifa(ifa);
862 	if (!ifa_existing) {
863 		/* It would be best to check for !NLM_F_CREATE here but
864 		 * userspace already relies on not having to provide this.
865 		 */
866 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
867 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
868 			int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
869 					       true, ifa);
870 
871 			if (ret < 0) {
872 				inet_free_ifa(ifa);
873 				return ret;
874 			}
875 		}
876 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
877 	} else {
878 		inet_free_ifa(ifa);
879 
880 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
881 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
882 			return -EEXIST;
883 		ifa = ifa_existing;
884 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
885 		cancel_delayed_work(&check_lifetime_work);
886 		queue_delayed_work(system_power_efficient_wq,
887 				&check_lifetime_work, 0);
888 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
889 	}
890 	return 0;
891 }
892 
893 /*
894  *	Determine a default network mask, based on the IP address.
895  */
896 
897 static int inet_abc_len(__be32 addr)
898 {
899 	int rc = -1;	/* Something else, probably a multicast. */
900 
901 	if (ipv4_is_zeronet(addr))
902 		rc = 0;
903 	else {
904 		__u32 haddr = ntohl(addr);
905 
906 		if (IN_CLASSA(haddr))
907 			rc = 8;
908 		else if (IN_CLASSB(haddr))
909 			rc = 16;
910 		else if (IN_CLASSC(haddr))
911 			rc = 24;
912 	}
913 
914 	return rc;
915 }
916 
917 
918 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
919 {
920 	struct ifreq ifr;
921 	struct sockaddr_in sin_orig;
922 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
923 	struct in_device *in_dev;
924 	struct in_ifaddr **ifap = NULL;
925 	struct in_ifaddr *ifa = NULL;
926 	struct net_device *dev;
927 	char *colon;
928 	int ret = -EFAULT;
929 	int tryaddrmatch = 0;
930 
931 	/*
932 	 *	Fetch the caller's info block into kernel space
933 	 */
934 
935 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
936 		goto out;
937 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
938 
939 	/* save original address for comparison */
940 	memcpy(&sin_orig, sin, sizeof(*sin));
941 
942 	colon = strchr(ifr.ifr_name, ':');
943 	if (colon)
944 		*colon = 0;
945 
946 	dev_load(net, ifr.ifr_name);
947 
948 	switch (cmd) {
949 	case SIOCGIFADDR:	/* Get interface address */
950 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
951 	case SIOCGIFDSTADDR:	/* Get the destination address */
952 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
953 		/* Note that these ioctls will not sleep,
954 		   so that we do not impose a lock.
955 		   One day we will be forced to put shlock here (I mean SMP)
956 		 */
957 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
958 		memset(sin, 0, sizeof(*sin));
959 		sin->sin_family = AF_INET;
960 		break;
961 
962 	case SIOCSIFFLAGS:
963 		ret = -EPERM;
964 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
965 			goto out;
966 		break;
967 	case SIOCSIFADDR:	/* Set interface address (and family) */
968 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
969 	case SIOCSIFDSTADDR:	/* Set the destination address */
970 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
971 		ret = -EPERM;
972 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
973 			goto out;
974 		ret = -EINVAL;
975 		if (sin->sin_family != AF_INET)
976 			goto out;
977 		break;
978 	default:
979 		ret = -EINVAL;
980 		goto out;
981 	}
982 
983 	rtnl_lock();
984 
985 	ret = -ENODEV;
986 	dev = __dev_get_by_name(net, ifr.ifr_name);
987 	if (!dev)
988 		goto done;
989 
990 	if (colon)
991 		*colon = ':';
992 
993 	in_dev = __in_dev_get_rtnl(dev);
994 	if (in_dev) {
995 		if (tryaddrmatch) {
996 			/* Matthias Andree */
997 			/* compare label and address (4.4BSD style) */
998 			/* note: we only do this for a limited set of ioctls
999 			   and only if the original address family was AF_INET.
1000 			   This is checked above. */
1001 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1002 			     ifap = &ifa->ifa_next) {
1003 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
1004 				    sin_orig.sin_addr.s_addr ==
1005 							ifa->ifa_local) {
1006 					break; /* found */
1007 				}
1008 			}
1009 		}
1010 		/* we didn't get a match, maybe the application is
1011 		   4.3BSD-style and passed in junk so we fall back to
1012 		   comparing just the label */
1013 		if (!ifa) {
1014 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1015 			     ifap = &ifa->ifa_next)
1016 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
1017 					break;
1018 		}
1019 	}
1020 
1021 	ret = -EADDRNOTAVAIL;
1022 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1023 		goto done;
1024 
1025 	switch (cmd) {
1026 	case SIOCGIFADDR:	/* Get interface address */
1027 		sin->sin_addr.s_addr = ifa->ifa_local;
1028 		goto rarok;
1029 
1030 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1031 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1032 		goto rarok;
1033 
1034 	case SIOCGIFDSTADDR:	/* Get the destination address */
1035 		sin->sin_addr.s_addr = ifa->ifa_address;
1036 		goto rarok;
1037 
1038 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1039 		sin->sin_addr.s_addr = ifa->ifa_mask;
1040 		goto rarok;
1041 
1042 	case SIOCSIFFLAGS:
1043 		if (colon) {
1044 			ret = -EADDRNOTAVAIL;
1045 			if (!ifa)
1046 				break;
1047 			ret = 0;
1048 			if (!(ifr.ifr_flags & IFF_UP))
1049 				inet_del_ifa(in_dev, ifap, 1);
1050 			break;
1051 		}
1052 		ret = dev_change_flags(dev, ifr.ifr_flags);
1053 		break;
1054 
1055 	case SIOCSIFADDR:	/* Set interface address (and family) */
1056 		ret = -EINVAL;
1057 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1058 			break;
1059 
1060 		if (!ifa) {
1061 			ret = -ENOBUFS;
1062 			ifa = inet_alloc_ifa();
1063 			if (!ifa)
1064 				break;
1065 			INIT_HLIST_NODE(&ifa->hash);
1066 			if (colon)
1067 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1068 			else
1069 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1070 		} else {
1071 			ret = 0;
1072 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1073 				break;
1074 			inet_del_ifa(in_dev, ifap, 0);
1075 			ifa->ifa_broadcast = 0;
1076 			ifa->ifa_scope = 0;
1077 		}
1078 
1079 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1080 
1081 		if (!(dev->flags & IFF_POINTOPOINT)) {
1082 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1083 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1084 			if ((dev->flags & IFF_BROADCAST) &&
1085 			    ifa->ifa_prefixlen < 31)
1086 				ifa->ifa_broadcast = ifa->ifa_address |
1087 						     ~ifa->ifa_mask;
1088 		} else {
1089 			ifa->ifa_prefixlen = 32;
1090 			ifa->ifa_mask = inet_make_mask(32);
1091 		}
1092 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1093 		ret = inet_set_ifa(dev, ifa);
1094 		break;
1095 
1096 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1097 		ret = 0;
1098 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1099 			inet_del_ifa(in_dev, ifap, 0);
1100 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1101 			inet_insert_ifa(ifa);
1102 		}
1103 		break;
1104 
1105 	case SIOCSIFDSTADDR:	/* Set the destination address */
1106 		ret = 0;
1107 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1108 			break;
1109 		ret = -EINVAL;
1110 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1111 			break;
1112 		ret = 0;
1113 		inet_del_ifa(in_dev, ifap, 0);
1114 		ifa->ifa_address = sin->sin_addr.s_addr;
1115 		inet_insert_ifa(ifa);
1116 		break;
1117 
1118 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1119 
1120 		/*
1121 		 *	The mask we set must be legal.
1122 		 */
1123 		ret = -EINVAL;
1124 		if (bad_mask(sin->sin_addr.s_addr, 0))
1125 			break;
1126 		ret = 0;
1127 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1128 			__be32 old_mask = ifa->ifa_mask;
1129 			inet_del_ifa(in_dev, ifap, 0);
1130 			ifa->ifa_mask = sin->sin_addr.s_addr;
1131 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1132 
1133 			/* See if current broadcast address matches
1134 			 * with current netmask, then recalculate
1135 			 * the broadcast address. Otherwise it's a
1136 			 * funny address, so don't touch it since
1137 			 * the user seems to know what (s)he's doing...
1138 			 */
1139 			if ((dev->flags & IFF_BROADCAST) &&
1140 			    (ifa->ifa_prefixlen < 31) &&
1141 			    (ifa->ifa_broadcast ==
1142 			     (ifa->ifa_local|~old_mask))) {
1143 				ifa->ifa_broadcast = (ifa->ifa_local |
1144 						      ~sin->sin_addr.s_addr);
1145 			}
1146 			inet_insert_ifa(ifa);
1147 		}
1148 		break;
1149 	}
1150 done:
1151 	rtnl_unlock();
1152 out:
1153 	return ret;
1154 rarok:
1155 	rtnl_unlock();
1156 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1157 	goto out;
1158 }
1159 
1160 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1161 {
1162 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1163 	struct in_ifaddr *ifa;
1164 	struct ifreq ifr;
1165 	int done = 0;
1166 
1167 	if (!in_dev)
1168 		goto out;
1169 
1170 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1171 		if (!buf) {
1172 			done += sizeof(ifr);
1173 			continue;
1174 		}
1175 		if (len < (int) sizeof(ifr))
1176 			break;
1177 		memset(&ifr, 0, sizeof(struct ifreq));
1178 		strcpy(ifr.ifr_name, ifa->ifa_label);
1179 
1180 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1181 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1182 								ifa->ifa_local;
1183 
1184 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1185 			done = -EFAULT;
1186 			break;
1187 		}
1188 		buf  += sizeof(struct ifreq);
1189 		len  -= sizeof(struct ifreq);
1190 		done += sizeof(struct ifreq);
1191 	}
1192 out:
1193 	return done;
1194 }
1195 
1196 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1197 {
1198 	__be32 addr = 0;
1199 	struct in_device *in_dev;
1200 	struct net *net = dev_net(dev);
1201 	int master_idx;
1202 
1203 	rcu_read_lock();
1204 	in_dev = __in_dev_get_rcu(dev);
1205 	if (!in_dev)
1206 		goto no_in_dev;
1207 
1208 	for_primary_ifa(in_dev) {
1209 		if (ifa->ifa_scope > scope)
1210 			continue;
1211 		if (!dst || inet_ifa_match(dst, ifa)) {
1212 			addr = ifa->ifa_local;
1213 			break;
1214 		}
1215 		if (!addr)
1216 			addr = ifa->ifa_local;
1217 	} endfor_ifa(in_dev);
1218 
1219 	if (addr)
1220 		goto out_unlock;
1221 no_in_dev:
1222 	master_idx = l3mdev_master_ifindex_rcu(dev);
1223 
1224 	/* For VRFs, the VRF device takes the place of the loopback device,
1225 	 * with addresses on it being preferred.  Note in such cases the
1226 	 * loopback device will be among the devices that fail the master_idx
1227 	 * equality check in the loop below.
1228 	 */
1229 	if (master_idx &&
1230 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1231 	    (in_dev = __in_dev_get_rcu(dev))) {
1232 		for_primary_ifa(in_dev) {
1233 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
1234 			    ifa->ifa_scope <= scope) {
1235 				addr = ifa->ifa_local;
1236 				goto out_unlock;
1237 			}
1238 		} endfor_ifa(in_dev);
1239 	}
1240 
1241 	/* Not loopback addresses on loopback should be preferred
1242 	   in this case. It is important that lo is the first interface
1243 	   in dev_base list.
1244 	 */
1245 	for_each_netdev_rcu(net, dev) {
1246 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1247 			continue;
1248 
1249 		in_dev = __in_dev_get_rcu(dev);
1250 		if (!in_dev)
1251 			continue;
1252 
1253 		for_primary_ifa(in_dev) {
1254 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
1255 			    ifa->ifa_scope <= scope) {
1256 				addr = ifa->ifa_local;
1257 				goto out_unlock;
1258 			}
1259 		} endfor_ifa(in_dev);
1260 	}
1261 out_unlock:
1262 	rcu_read_unlock();
1263 	return addr;
1264 }
1265 EXPORT_SYMBOL(inet_select_addr);
1266 
1267 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1268 			      __be32 local, int scope)
1269 {
1270 	int same = 0;
1271 	__be32 addr = 0;
1272 
1273 	for_ifa(in_dev) {
1274 		if (!addr &&
1275 		    (local == ifa->ifa_local || !local) &&
1276 		    ifa->ifa_scope <= scope) {
1277 			addr = ifa->ifa_local;
1278 			if (same)
1279 				break;
1280 		}
1281 		if (!same) {
1282 			same = (!local || inet_ifa_match(local, ifa)) &&
1283 				(!dst || inet_ifa_match(dst, ifa));
1284 			if (same && addr) {
1285 				if (local || !dst)
1286 					break;
1287 				/* Is the selected addr into dst subnet? */
1288 				if (inet_ifa_match(addr, ifa))
1289 					break;
1290 				/* No, then can we use new local src? */
1291 				if (ifa->ifa_scope <= scope) {
1292 					addr = ifa->ifa_local;
1293 					break;
1294 				}
1295 				/* search for large dst subnet for addr */
1296 				same = 0;
1297 			}
1298 		}
1299 	} endfor_ifa(in_dev);
1300 
1301 	return same ? addr : 0;
1302 }
1303 
1304 /*
1305  * Confirm that local IP address exists using wildcards:
1306  * - net: netns to check, cannot be NULL
1307  * - in_dev: only on this interface, NULL=any interface
1308  * - dst: only in the same subnet as dst, 0=any dst
1309  * - local: address, 0=autoselect the local address
1310  * - scope: maximum allowed scope value for the local address
1311  */
1312 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1313 			 __be32 dst, __be32 local, int scope)
1314 {
1315 	__be32 addr = 0;
1316 	struct net_device *dev;
1317 
1318 	if (in_dev)
1319 		return confirm_addr_indev(in_dev, dst, local, scope);
1320 
1321 	rcu_read_lock();
1322 	for_each_netdev_rcu(net, dev) {
1323 		in_dev = __in_dev_get_rcu(dev);
1324 		if (in_dev) {
1325 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1326 			if (addr)
1327 				break;
1328 		}
1329 	}
1330 	rcu_read_unlock();
1331 
1332 	return addr;
1333 }
1334 EXPORT_SYMBOL(inet_confirm_addr);
1335 
1336 /*
1337  *	Device notifier
1338  */
1339 
1340 int register_inetaddr_notifier(struct notifier_block *nb)
1341 {
1342 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1343 }
1344 EXPORT_SYMBOL(register_inetaddr_notifier);
1345 
1346 int unregister_inetaddr_notifier(struct notifier_block *nb)
1347 {
1348 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1349 }
1350 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1351 
1352 /* Rename ifa_labels for a device name change. Make some effort to preserve
1353  * existing alias numbering and to create unique labels if possible.
1354 */
1355 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1356 {
1357 	struct in_ifaddr *ifa;
1358 	int named = 0;
1359 
1360 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1361 		char old[IFNAMSIZ], *dot;
1362 
1363 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1364 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1365 		if (named++ == 0)
1366 			goto skip;
1367 		dot = strchr(old, ':');
1368 		if (!dot) {
1369 			sprintf(old, ":%d", named);
1370 			dot = old;
1371 		}
1372 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1373 			strcat(ifa->ifa_label, dot);
1374 		else
1375 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1376 skip:
1377 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1378 	}
1379 }
1380 
1381 static bool inetdev_valid_mtu(unsigned int mtu)
1382 {
1383 	return mtu >= 68;
1384 }
1385 
1386 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1387 					struct in_device *in_dev)
1388 
1389 {
1390 	struct in_ifaddr *ifa;
1391 
1392 	for (ifa = in_dev->ifa_list; ifa;
1393 	     ifa = ifa->ifa_next) {
1394 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1395 			 ifa->ifa_local, dev,
1396 			 ifa->ifa_local, NULL,
1397 			 dev->dev_addr, NULL);
1398 	}
1399 }
1400 
1401 /* Called only under RTNL semaphore */
1402 
1403 static int inetdev_event(struct notifier_block *this, unsigned long event,
1404 			 void *ptr)
1405 {
1406 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1407 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1408 
1409 	ASSERT_RTNL();
1410 
1411 	if (!in_dev) {
1412 		if (event == NETDEV_REGISTER) {
1413 			in_dev = inetdev_init(dev);
1414 			if (IS_ERR(in_dev))
1415 				return notifier_from_errno(PTR_ERR(in_dev));
1416 			if (dev->flags & IFF_LOOPBACK) {
1417 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1418 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1419 			}
1420 		} else if (event == NETDEV_CHANGEMTU) {
1421 			/* Re-enabling IP */
1422 			if (inetdev_valid_mtu(dev->mtu))
1423 				in_dev = inetdev_init(dev);
1424 		}
1425 		goto out;
1426 	}
1427 
1428 	switch (event) {
1429 	case NETDEV_REGISTER:
1430 		pr_debug("%s: bug\n", __func__);
1431 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1432 		break;
1433 	case NETDEV_UP:
1434 		if (!inetdev_valid_mtu(dev->mtu))
1435 			break;
1436 		if (dev->flags & IFF_LOOPBACK) {
1437 			struct in_ifaddr *ifa = inet_alloc_ifa();
1438 
1439 			if (ifa) {
1440 				INIT_HLIST_NODE(&ifa->hash);
1441 				ifa->ifa_local =
1442 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1443 				ifa->ifa_prefixlen = 8;
1444 				ifa->ifa_mask = inet_make_mask(8);
1445 				in_dev_hold(in_dev);
1446 				ifa->ifa_dev = in_dev;
1447 				ifa->ifa_scope = RT_SCOPE_HOST;
1448 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1449 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1450 						 INFINITY_LIFE_TIME);
1451 				ipv4_devconf_setall(in_dev);
1452 				neigh_parms_data_state_setall(in_dev->arp_parms);
1453 				inet_insert_ifa(ifa);
1454 			}
1455 		}
1456 		ip_mc_up(in_dev);
1457 		/* fall through */
1458 	case NETDEV_CHANGEADDR:
1459 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1460 			break;
1461 		/* fall through */
1462 	case NETDEV_NOTIFY_PEERS:
1463 		/* Send gratuitous ARP to notify of link change */
1464 		inetdev_send_gratuitous_arp(dev, in_dev);
1465 		break;
1466 	case NETDEV_DOWN:
1467 		ip_mc_down(in_dev);
1468 		break;
1469 	case NETDEV_PRE_TYPE_CHANGE:
1470 		ip_mc_unmap(in_dev);
1471 		break;
1472 	case NETDEV_POST_TYPE_CHANGE:
1473 		ip_mc_remap(in_dev);
1474 		break;
1475 	case NETDEV_CHANGEMTU:
1476 		if (inetdev_valid_mtu(dev->mtu))
1477 			break;
1478 		/* disable IP when MTU is not enough */
1479 	case NETDEV_UNREGISTER:
1480 		inetdev_destroy(in_dev);
1481 		break;
1482 	case NETDEV_CHANGENAME:
1483 		/* Do not notify about label change, this event is
1484 		 * not interesting to applications using netlink.
1485 		 */
1486 		inetdev_changename(dev, in_dev);
1487 
1488 		devinet_sysctl_unregister(in_dev);
1489 		devinet_sysctl_register(in_dev);
1490 		break;
1491 	}
1492 out:
1493 	return NOTIFY_DONE;
1494 }
1495 
1496 static struct notifier_block ip_netdev_notifier = {
1497 	.notifier_call = inetdev_event,
1498 };
1499 
1500 static size_t inet_nlmsg_size(void)
1501 {
1502 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1503 	       + nla_total_size(4) /* IFA_ADDRESS */
1504 	       + nla_total_size(4) /* IFA_LOCAL */
1505 	       + nla_total_size(4) /* IFA_BROADCAST */
1506 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1507 	       + nla_total_size(4)  /* IFA_FLAGS */
1508 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1509 }
1510 
1511 static inline u32 cstamp_delta(unsigned long cstamp)
1512 {
1513 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1514 }
1515 
1516 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1517 			 unsigned long tstamp, u32 preferred, u32 valid)
1518 {
1519 	struct ifa_cacheinfo ci;
1520 
1521 	ci.cstamp = cstamp_delta(cstamp);
1522 	ci.tstamp = cstamp_delta(tstamp);
1523 	ci.ifa_prefered = preferred;
1524 	ci.ifa_valid = valid;
1525 
1526 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1527 }
1528 
1529 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1530 			    u32 portid, u32 seq, int event, unsigned int flags)
1531 {
1532 	struct ifaddrmsg *ifm;
1533 	struct nlmsghdr  *nlh;
1534 	u32 preferred, valid;
1535 
1536 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1537 	if (!nlh)
1538 		return -EMSGSIZE;
1539 
1540 	ifm = nlmsg_data(nlh);
1541 	ifm->ifa_family = AF_INET;
1542 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1543 	ifm->ifa_flags = ifa->ifa_flags;
1544 	ifm->ifa_scope = ifa->ifa_scope;
1545 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1546 
1547 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1548 		preferred = ifa->ifa_preferred_lft;
1549 		valid = ifa->ifa_valid_lft;
1550 		if (preferred != INFINITY_LIFE_TIME) {
1551 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1552 
1553 			if (preferred > tval)
1554 				preferred -= tval;
1555 			else
1556 				preferred = 0;
1557 			if (valid != INFINITY_LIFE_TIME) {
1558 				if (valid > tval)
1559 					valid -= tval;
1560 				else
1561 					valid = 0;
1562 			}
1563 		}
1564 	} else {
1565 		preferred = INFINITY_LIFE_TIME;
1566 		valid = INFINITY_LIFE_TIME;
1567 	}
1568 	if ((ifa->ifa_address &&
1569 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1570 	    (ifa->ifa_local &&
1571 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1572 	    (ifa->ifa_broadcast &&
1573 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1574 	    (ifa->ifa_label[0] &&
1575 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1576 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1577 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1578 			  preferred, valid))
1579 		goto nla_put_failure;
1580 
1581 	nlmsg_end(skb, nlh);
1582 	return 0;
1583 
1584 nla_put_failure:
1585 	nlmsg_cancel(skb, nlh);
1586 	return -EMSGSIZE;
1587 }
1588 
1589 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1590 {
1591 	struct net *net = sock_net(skb->sk);
1592 	int h, s_h;
1593 	int idx, s_idx;
1594 	int ip_idx, s_ip_idx;
1595 	struct net_device *dev;
1596 	struct in_device *in_dev;
1597 	struct in_ifaddr *ifa;
1598 	struct hlist_head *head;
1599 
1600 	s_h = cb->args[0];
1601 	s_idx = idx = cb->args[1];
1602 	s_ip_idx = ip_idx = cb->args[2];
1603 
1604 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1605 		idx = 0;
1606 		head = &net->dev_index_head[h];
1607 		rcu_read_lock();
1608 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1609 			  net->dev_base_seq;
1610 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1611 			if (idx < s_idx)
1612 				goto cont;
1613 			if (h > s_h || idx > s_idx)
1614 				s_ip_idx = 0;
1615 			in_dev = __in_dev_get_rcu(dev);
1616 			if (!in_dev)
1617 				goto cont;
1618 
1619 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1620 			     ifa = ifa->ifa_next, ip_idx++) {
1621 				if (ip_idx < s_ip_idx)
1622 					continue;
1623 				if (inet_fill_ifaddr(skb, ifa,
1624 					     NETLINK_CB(cb->skb).portid,
1625 					     cb->nlh->nlmsg_seq,
1626 					     RTM_NEWADDR, NLM_F_MULTI) < 0) {
1627 					rcu_read_unlock();
1628 					goto done;
1629 				}
1630 				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1631 			}
1632 cont:
1633 			idx++;
1634 		}
1635 		rcu_read_unlock();
1636 	}
1637 
1638 done:
1639 	cb->args[0] = h;
1640 	cb->args[1] = idx;
1641 	cb->args[2] = ip_idx;
1642 
1643 	return skb->len;
1644 }
1645 
1646 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1647 		      u32 portid)
1648 {
1649 	struct sk_buff *skb;
1650 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1651 	int err = -ENOBUFS;
1652 	struct net *net;
1653 
1654 	net = dev_net(ifa->ifa_dev->dev);
1655 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1656 	if (!skb)
1657 		goto errout;
1658 
1659 	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1660 	if (err < 0) {
1661 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1662 		WARN_ON(err == -EMSGSIZE);
1663 		kfree_skb(skb);
1664 		goto errout;
1665 	}
1666 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1667 	return;
1668 errout:
1669 	if (err < 0)
1670 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1671 }
1672 
1673 static size_t inet_get_link_af_size(const struct net_device *dev,
1674 				    u32 ext_filter_mask)
1675 {
1676 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1677 
1678 	if (!in_dev)
1679 		return 0;
1680 
1681 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1682 }
1683 
1684 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1685 			     u32 ext_filter_mask)
1686 {
1687 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1688 	struct nlattr *nla;
1689 	int i;
1690 
1691 	if (!in_dev)
1692 		return -ENODATA;
1693 
1694 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1695 	if (!nla)
1696 		return -EMSGSIZE;
1697 
1698 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1699 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1700 
1701 	return 0;
1702 }
1703 
1704 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1705 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1706 };
1707 
1708 static int inet_validate_link_af(const struct net_device *dev,
1709 				 const struct nlattr *nla)
1710 {
1711 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1712 	int err, rem;
1713 
1714 	if (dev && !__in_dev_get_rtnl(dev))
1715 		return -EAFNOSUPPORT;
1716 
1717 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1718 	if (err < 0)
1719 		return err;
1720 
1721 	if (tb[IFLA_INET_CONF]) {
1722 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1723 			int cfgid = nla_type(a);
1724 
1725 			if (nla_len(a) < 4)
1726 				return -EINVAL;
1727 
1728 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1729 				return -EINVAL;
1730 		}
1731 	}
1732 
1733 	return 0;
1734 }
1735 
1736 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1737 {
1738 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1739 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1740 	int rem;
1741 
1742 	if (!in_dev)
1743 		return -EAFNOSUPPORT;
1744 
1745 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1746 		BUG();
1747 
1748 	if (tb[IFLA_INET_CONF]) {
1749 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1750 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1751 	}
1752 
1753 	return 0;
1754 }
1755 
1756 static int inet_netconf_msgsize_devconf(int type)
1757 {
1758 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1759 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1760 	bool all = false;
1761 
1762 	if (type == NETCONFA_ALL)
1763 		all = true;
1764 
1765 	if (all || type == NETCONFA_FORWARDING)
1766 		size += nla_total_size(4);
1767 	if (all || type == NETCONFA_RP_FILTER)
1768 		size += nla_total_size(4);
1769 	if (all || type == NETCONFA_MC_FORWARDING)
1770 		size += nla_total_size(4);
1771 	if (all || type == NETCONFA_PROXY_NEIGH)
1772 		size += nla_total_size(4);
1773 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1774 		size += nla_total_size(4);
1775 
1776 	return size;
1777 }
1778 
1779 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1780 				     struct ipv4_devconf *devconf, u32 portid,
1781 				     u32 seq, int event, unsigned int flags,
1782 				     int type)
1783 {
1784 	struct nlmsghdr  *nlh;
1785 	struct netconfmsg *ncm;
1786 	bool all = false;
1787 
1788 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1789 			flags);
1790 	if (!nlh)
1791 		return -EMSGSIZE;
1792 
1793 	if (type == NETCONFA_ALL)
1794 		all = true;
1795 
1796 	ncm = nlmsg_data(nlh);
1797 	ncm->ncm_family = AF_INET;
1798 
1799 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1800 		goto nla_put_failure;
1801 
1802 	if ((all || type == NETCONFA_FORWARDING) &&
1803 	    nla_put_s32(skb, NETCONFA_FORWARDING,
1804 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1805 		goto nla_put_failure;
1806 	if ((all || type == NETCONFA_RP_FILTER) &&
1807 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1808 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1809 		goto nla_put_failure;
1810 	if ((all || type == NETCONFA_MC_FORWARDING) &&
1811 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1812 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1813 		goto nla_put_failure;
1814 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
1815 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1816 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1817 		goto nla_put_failure;
1818 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1819 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1820 			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1821 		goto nla_put_failure;
1822 
1823 	nlmsg_end(skb, nlh);
1824 	return 0;
1825 
1826 nla_put_failure:
1827 	nlmsg_cancel(skb, nlh);
1828 	return -EMSGSIZE;
1829 }
1830 
1831 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1832 				 struct ipv4_devconf *devconf)
1833 {
1834 	struct sk_buff *skb;
1835 	int err = -ENOBUFS;
1836 
1837 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
1838 	if (!skb)
1839 		goto errout;
1840 
1841 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1842 					RTM_NEWNETCONF, 0, type);
1843 	if (err < 0) {
1844 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1845 		WARN_ON(err == -EMSGSIZE);
1846 		kfree_skb(skb);
1847 		goto errout;
1848 	}
1849 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
1850 	return;
1851 errout:
1852 	if (err < 0)
1853 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1854 }
1855 
1856 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1857 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1858 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1859 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1860 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
1861 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
1862 };
1863 
1864 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1865 				    struct nlmsghdr *nlh)
1866 {
1867 	struct net *net = sock_net(in_skb->sk);
1868 	struct nlattr *tb[NETCONFA_MAX+1];
1869 	struct netconfmsg *ncm;
1870 	struct sk_buff *skb;
1871 	struct ipv4_devconf *devconf;
1872 	struct in_device *in_dev;
1873 	struct net_device *dev;
1874 	int ifindex;
1875 	int err;
1876 
1877 	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1878 			  devconf_ipv4_policy);
1879 	if (err < 0)
1880 		goto errout;
1881 
1882 	err = -EINVAL;
1883 	if (!tb[NETCONFA_IFINDEX])
1884 		goto errout;
1885 
1886 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1887 	switch (ifindex) {
1888 	case NETCONFA_IFINDEX_ALL:
1889 		devconf = net->ipv4.devconf_all;
1890 		break;
1891 	case NETCONFA_IFINDEX_DEFAULT:
1892 		devconf = net->ipv4.devconf_dflt;
1893 		break;
1894 	default:
1895 		dev = __dev_get_by_index(net, ifindex);
1896 		if (!dev)
1897 			goto errout;
1898 		in_dev = __in_dev_get_rtnl(dev);
1899 		if (!in_dev)
1900 			goto errout;
1901 		devconf = &in_dev->cnf;
1902 		break;
1903 	}
1904 
1905 	err = -ENOBUFS;
1906 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
1907 	if (!skb)
1908 		goto errout;
1909 
1910 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1911 					NETLINK_CB(in_skb).portid,
1912 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1913 					NETCONFA_ALL);
1914 	if (err < 0) {
1915 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1916 		WARN_ON(err == -EMSGSIZE);
1917 		kfree_skb(skb);
1918 		goto errout;
1919 	}
1920 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1921 errout:
1922 	return err;
1923 }
1924 
1925 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1926 				     struct netlink_callback *cb)
1927 {
1928 	struct net *net = sock_net(skb->sk);
1929 	int h, s_h;
1930 	int idx, s_idx;
1931 	struct net_device *dev;
1932 	struct in_device *in_dev;
1933 	struct hlist_head *head;
1934 
1935 	s_h = cb->args[0];
1936 	s_idx = idx = cb->args[1];
1937 
1938 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1939 		idx = 0;
1940 		head = &net->dev_index_head[h];
1941 		rcu_read_lock();
1942 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1943 			  net->dev_base_seq;
1944 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1945 			if (idx < s_idx)
1946 				goto cont;
1947 			in_dev = __in_dev_get_rcu(dev);
1948 			if (!in_dev)
1949 				goto cont;
1950 
1951 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
1952 						      &in_dev->cnf,
1953 						      NETLINK_CB(cb->skb).portid,
1954 						      cb->nlh->nlmsg_seq,
1955 						      RTM_NEWNETCONF,
1956 						      NLM_F_MULTI,
1957 						      NETCONFA_ALL) < 0) {
1958 				rcu_read_unlock();
1959 				goto done;
1960 			}
1961 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1962 cont:
1963 			idx++;
1964 		}
1965 		rcu_read_unlock();
1966 	}
1967 	if (h == NETDEV_HASHENTRIES) {
1968 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1969 					      net->ipv4.devconf_all,
1970 					      NETLINK_CB(cb->skb).portid,
1971 					      cb->nlh->nlmsg_seq,
1972 					      RTM_NEWNETCONF, NLM_F_MULTI,
1973 					      NETCONFA_ALL) < 0)
1974 			goto done;
1975 		else
1976 			h++;
1977 	}
1978 	if (h == NETDEV_HASHENTRIES + 1) {
1979 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1980 					      net->ipv4.devconf_dflt,
1981 					      NETLINK_CB(cb->skb).portid,
1982 					      cb->nlh->nlmsg_seq,
1983 					      RTM_NEWNETCONF, NLM_F_MULTI,
1984 					      NETCONFA_ALL) < 0)
1985 			goto done;
1986 		else
1987 			h++;
1988 	}
1989 done:
1990 	cb->args[0] = h;
1991 	cb->args[1] = idx;
1992 
1993 	return skb->len;
1994 }
1995 
1996 #ifdef CONFIG_SYSCTL
1997 
1998 static void devinet_copy_dflt_conf(struct net *net, int i)
1999 {
2000 	struct net_device *dev;
2001 
2002 	rcu_read_lock();
2003 	for_each_netdev_rcu(net, dev) {
2004 		struct in_device *in_dev;
2005 
2006 		in_dev = __in_dev_get_rcu(dev);
2007 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2008 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2009 	}
2010 	rcu_read_unlock();
2011 }
2012 
2013 /* called with RTNL locked */
2014 static void inet_forward_change(struct net *net)
2015 {
2016 	struct net_device *dev;
2017 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2018 
2019 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2020 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2021 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2022 				    NETCONFA_IFINDEX_ALL,
2023 				    net->ipv4.devconf_all);
2024 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2025 				    NETCONFA_IFINDEX_DEFAULT,
2026 				    net->ipv4.devconf_dflt);
2027 
2028 	for_each_netdev(net, dev) {
2029 		struct in_device *in_dev;
2030 
2031 		if (on)
2032 			dev_disable_lro(dev);
2033 
2034 		in_dev = __in_dev_get_rtnl(dev);
2035 		if (in_dev) {
2036 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2037 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2038 						    dev->ifindex, &in_dev->cnf);
2039 		}
2040 	}
2041 }
2042 
2043 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2044 {
2045 	if (cnf == net->ipv4.devconf_dflt)
2046 		return NETCONFA_IFINDEX_DEFAULT;
2047 	else if (cnf == net->ipv4.devconf_all)
2048 		return NETCONFA_IFINDEX_ALL;
2049 	else {
2050 		struct in_device *idev
2051 			= container_of(cnf, struct in_device, cnf);
2052 		return idev->dev->ifindex;
2053 	}
2054 }
2055 
2056 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2057 			     void __user *buffer,
2058 			     size_t *lenp, loff_t *ppos)
2059 {
2060 	int old_value = *(int *)ctl->data;
2061 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2062 	int new_value = *(int *)ctl->data;
2063 
2064 	if (write) {
2065 		struct ipv4_devconf *cnf = ctl->extra1;
2066 		struct net *net = ctl->extra2;
2067 		int i = (int *)ctl->data - cnf->data;
2068 		int ifindex;
2069 
2070 		set_bit(i, cnf->state);
2071 
2072 		if (cnf == net->ipv4.devconf_dflt)
2073 			devinet_copy_dflt_conf(net, i);
2074 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2075 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2076 			if ((new_value == 0) && (old_value != 0))
2077 				rt_cache_flush(net);
2078 
2079 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2080 		    new_value != old_value) {
2081 			ifindex = devinet_conf_ifindex(net, cnf);
2082 			inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2083 						    ifindex, cnf);
2084 		}
2085 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2086 		    new_value != old_value) {
2087 			ifindex = devinet_conf_ifindex(net, cnf);
2088 			inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2089 						    ifindex, cnf);
2090 		}
2091 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2092 		    new_value != old_value) {
2093 			ifindex = devinet_conf_ifindex(net, cnf);
2094 			inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2095 						    ifindex, cnf);
2096 		}
2097 	}
2098 
2099 	return ret;
2100 }
2101 
2102 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2103 				  void __user *buffer,
2104 				  size_t *lenp, loff_t *ppos)
2105 {
2106 	int *valp = ctl->data;
2107 	int val = *valp;
2108 	loff_t pos = *ppos;
2109 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2110 
2111 	if (write && *valp != val) {
2112 		struct net *net = ctl->extra2;
2113 
2114 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2115 			if (!rtnl_trylock()) {
2116 				/* Restore the original values before restarting */
2117 				*valp = val;
2118 				*ppos = pos;
2119 				return restart_syscall();
2120 			}
2121 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2122 				inet_forward_change(net);
2123 			} else {
2124 				struct ipv4_devconf *cnf = ctl->extra1;
2125 				struct in_device *idev =
2126 					container_of(cnf, struct in_device, cnf);
2127 				if (*valp)
2128 					dev_disable_lro(idev->dev);
2129 				inet_netconf_notify_devconf(net,
2130 							    NETCONFA_FORWARDING,
2131 							    idev->dev->ifindex,
2132 							    cnf);
2133 			}
2134 			rtnl_unlock();
2135 			rt_cache_flush(net);
2136 		} else
2137 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2138 						    NETCONFA_IFINDEX_DEFAULT,
2139 						    net->ipv4.devconf_dflt);
2140 	}
2141 
2142 	return ret;
2143 }
2144 
2145 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2146 				void __user *buffer,
2147 				size_t *lenp, loff_t *ppos)
2148 {
2149 	int *valp = ctl->data;
2150 	int val = *valp;
2151 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2152 	struct net *net = ctl->extra2;
2153 
2154 	if (write && *valp != val)
2155 		rt_cache_flush(net);
2156 
2157 	return ret;
2158 }
2159 
2160 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2161 	{ \
2162 		.procname	= name, \
2163 		.data		= ipv4_devconf.data + \
2164 				  IPV4_DEVCONF_ ## attr - 1, \
2165 		.maxlen		= sizeof(int), \
2166 		.mode		= mval, \
2167 		.proc_handler	= proc, \
2168 		.extra1		= &ipv4_devconf, \
2169 	}
2170 
2171 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2172 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2173 
2174 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2175 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2176 
2177 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2178 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2179 
2180 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2181 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2182 
2183 static struct devinet_sysctl_table {
2184 	struct ctl_table_header *sysctl_header;
2185 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2186 } devinet_sysctl = {
2187 	.devinet_vars = {
2188 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2189 					     devinet_sysctl_forward),
2190 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2191 
2192 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2193 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2194 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2195 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2196 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2197 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2198 					"accept_source_route"),
2199 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2200 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2201 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2202 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2203 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2204 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2205 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2206 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2207 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2208 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2209 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2210 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2211 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2212 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2213 					"force_igmp_version"),
2214 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2215 					"igmpv2_unsolicited_report_interval"),
2216 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2217 					"igmpv3_unsolicited_report_interval"),
2218 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2219 					"ignore_routes_with_linkdown"),
2220 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2221 					"drop_gratuitous_arp"),
2222 
2223 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2224 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2225 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2226 					      "promote_secondaries"),
2227 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2228 					      "route_localnet"),
2229 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2230 					      "drop_unicast_in_l2_multicast"),
2231 	},
2232 };
2233 
2234 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2235 				     int ifindex, struct ipv4_devconf *p)
2236 {
2237 	int i;
2238 	struct devinet_sysctl_table *t;
2239 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2240 
2241 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2242 	if (!t)
2243 		goto out;
2244 
2245 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2246 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2247 		t->devinet_vars[i].extra1 = p;
2248 		t->devinet_vars[i].extra2 = net;
2249 	}
2250 
2251 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2252 
2253 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2254 	if (!t->sysctl_header)
2255 		goto free;
2256 
2257 	p->sysctl = t;
2258 
2259 	inet_netconf_notify_devconf(net, NETCONFA_ALL, ifindex, p);
2260 	return 0;
2261 
2262 free:
2263 	kfree(t);
2264 out:
2265 	return -ENOBUFS;
2266 }
2267 
2268 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2269 {
2270 	struct devinet_sysctl_table *t = cnf->sysctl;
2271 
2272 	if (!t)
2273 		return;
2274 
2275 	cnf->sysctl = NULL;
2276 	unregister_net_sysctl_table(t->sysctl_header);
2277 	kfree(t);
2278 }
2279 
2280 static int devinet_sysctl_register(struct in_device *idev)
2281 {
2282 	int err;
2283 
2284 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2285 		return -EINVAL;
2286 
2287 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2288 	if (err)
2289 		return err;
2290 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2291 					idev->dev->ifindex, &idev->cnf);
2292 	if (err)
2293 		neigh_sysctl_unregister(idev->arp_parms);
2294 	return err;
2295 }
2296 
2297 static void devinet_sysctl_unregister(struct in_device *idev)
2298 {
2299 	__devinet_sysctl_unregister(&idev->cnf);
2300 	neigh_sysctl_unregister(idev->arp_parms);
2301 }
2302 
2303 static struct ctl_table ctl_forward_entry[] = {
2304 	{
2305 		.procname	= "ip_forward",
2306 		.data		= &ipv4_devconf.data[
2307 					IPV4_DEVCONF_FORWARDING - 1],
2308 		.maxlen		= sizeof(int),
2309 		.mode		= 0644,
2310 		.proc_handler	= devinet_sysctl_forward,
2311 		.extra1		= &ipv4_devconf,
2312 		.extra2		= &init_net,
2313 	},
2314 	{ },
2315 };
2316 #endif
2317 
2318 static __net_init int devinet_init_net(struct net *net)
2319 {
2320 	int err;
2321 	struct ipv4_devconf *all, *dflt;
2322 #ifdef CONFIG_SYSCTL
2323 	struct ctl_table *tbl = ctl_forward_entry;
2324 	struct ctl_table_header *forw_hdr;
2325 #endif
2326 
2327 	err = -ENOMEM;
2328 	all = &ipv4_devconf;
2329 	dflt = &ipv4_devconf_dflt;
2330 
2331 	if (!net_eq(net, &init_net)) {
2332 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2333 		if (!all)
2334 			goto err_alloc_all;
2335 
2336 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2337 		if (!dflt)
2338 			goto err_alloc_dflt;
2339 
2340 #ifdef CONFIG_SYSCTL
2341 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2342 		if (!tbl)
2343 			goto err_alloc_ctl;
2344 
2345 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2346 		tbl[0].extra1 = all;
2347 		tbl[0].extra2 = net;
2348 #endif
2349 	}
2350 
2351 #ifdef CONFIG_SYSCTL
2352 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2353 	if (err < 0)
2354 		goto err_reg_all;
2355 
2356 	err = __devinet_sysctl_register(net, "default",
2357 					NETCONFA_IFINDEX_DEFAULT, dflt);
2358 	if (err < 0)
2359 		goto err_reg_dflt;
2360 
2361 	err = -ENOMEM;
2362 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2363 	if (!forw_hdr)
2364 		goto err_reg_ctl;
2365 	net->ipv4.forw_hdr = forw_hdr;
2366 #endif
2367 
2368 	net->ipv4.devconf_all = all;
2369 	net->ipv4.devconf_dflt = dflt;
2370 	return 0;
2371 
2372 #ifdef CONFIG_SYSCTL
2373 err_reg_ctl:
2374 	__devinet_sysctl_unregister(dflt);
2375 err_reg_dflt:
2376 	__devinet_sysctl_unregister(all);
2377 err_reg_all:
2378 	if (tbl != ctl_forward_entry)
2379 		kfree(tbl);
2380 err_alloc_ctl:
2381 #endif
2382 	if (dflt != &ipv4_devconf_dflt)
2383 		kfree(dflt);
2384 err_alloc_dflt:
2385 	if (all != &ipv4_devconf)
2386 		kfree(all);
2387 err_alloc_all:
2388 	return err;
2389 }
2390 
2391 static __net_exit void devinet_exit_net(struct net *net)
2392 {
2393 #ifdef CONFIG_SYSCTL
2394 	struct ctl_table *tbl;
2395 
2396 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2397 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2398 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2399 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
2400 	kfree(tbl);
2401 #endif
2402 	kfree(net->ipv4.devconf_dflt);
2403 	kfree(net->ipv4.devconf_all);
2404 }
2405 
2406 static __net_initdata struct pernet_operations devinet_ops = {
2407 	.init = devinet_init_net,
2408 	.exit = devinet_exit_net,
2409 };
2410 
2411 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2412 	.family		  = AF_INET,
2413 	.fill_link_af	  = inet_fill_link_af,
2414 	.get_link_af_size = inet_get_link_af_size,
2415 	.validate_link_af = inet_validate_link_af,
2416 	.set_link_af	  = inet_set_link_af,
2417 };
2418 
2419 void __init devinet_init(void)
2420 {
2421 	int i;
2422 
2423 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2424 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2425 
2426 	register_pernet_subsys(&devinet_ops);
2427 
2428 	register_gifconf(PF_INET, inet_gifconf);
2429 	register_netdevice_notifier(&ip_netdev_notifier);
2430 
2431 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2432 
2433 	rtnl_af_register(&inet_af_ops);
2434 
2435 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2436 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2437 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2438 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2439 		      inet_netconf_dump_devconf, NULL);
2440 }
2441