xref: /linux/net/ipv4/devinet.c (revision e5c86679d5e864947a52fb31e45a425dea3e7fa9)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/sched/signal.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 #include <linux/netconf.h>
60 
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68 
69 static struct ipv4_devconf ipv4_devconf = {
70 	.data = {
71 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
76 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
77 	},
78 };
79 
80 static struct ipv4_devconf ipv4_devconf_dflt = {
81 	.data = {
82 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
83 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
86 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
87 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
88 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
89 	},
90 };
91 
92 #define IPV4_DEVCONF_DFLT(net, attr) \
93 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
94 
95 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
96 	[IFA_LOCAL]     	= { .type = NLA_U32 },
97 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
98 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
99 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
100 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
101 	[IFA_FLAGS]		= { .type = NLA_U32 },
102 };
103 
104 #define IN4_ADDR_HSIZE_SHIFT	8
105 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
106 
107 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
108 
109 static u32 inet_addr_hash(const struct net *net, __be32 addr)
110 {
111 	u32 val = (__force u32) addr ^ net_hash_mix(net);
112 
113 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
114 }
115 
116 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
117 {
118 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
119 
120 	ASSERT_RTNL();
121 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
122 }
123 
124 static void inet_hash_remove(struct in_ifaddr *ifa)
125 {
126 	ASSERT_RTNL();
127 	hlist_del_init_rcu(&ifa->hash);
128 }
129 
130 /**
131  * __ip_dev_find - find the first device with a given source address.
132  * @net: the net namespace
133  * @addr: the source address
134  * @devref: if true, take a reference on the found device
135  *
136  * If a caller uses devref=false, it should be protected by RCU, or RTNL
137  */
138 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
139 {
140 	u32 hash = inet_addr_hash(net, addr);
141 	struct net_device *result = NULL;
142 	struct in_ifaddr *ifa;
143 
144 	rcu_read_lock();
145 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
146 		if (ifa->ifa_local == addr) {
147 			struct net_device *dev = ifa->ifa_dev->dev;
148 
149 			if (!net_eq(dev_net(dev), net))
150 				continue;
151 			result = dev;
152 			break;
153 		}
154 	}
155 	if (!result) {
156 		struct flowi4 fl4 = { .daddr = addr };
157 		struct fib_result res = { 0 };
158 		struct fib_table *local;
159 
160 		/* Fallback to FIB local table so that communication
161 		 * over loopback subnets work.
162 		 */
163 		local = fib_get_table(net, RT_TABLE_LOCAL);
164 		if (local &&
165 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
166 		    res.type == RTN_LOCAL)
167 			result = FIB_RES_DEV(res);
168 	}
169 	if (result && devref)
170 		dev_hold(result);
171 	rcu_read_unlock();
172 	return result;
173 }
174 EXPORT_SYMBOL(__ip_dev_find);
175 
176 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
177 
178 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
179 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
180 			 int destroy);
181 #ifdef CONFIG_SYSCTL
182 static int devinet_sysctl_register(struct in_device *idev);
183 static void devinet_sysctl_unregister(struct in_device *idev);
184 #else
185 static int devinet_sysctl_register(struct in_device *idev)
186 {
187 	return 0;
188 }
189 static void devinet_sysctl_unregister(struct in_device *idev)
190 {
191 }
192 #endif
193 
194 /* Locks all the inet devices. */
195 
196 static struct in_ifaddr *inet_alloc_ifa(void)
197 {
198 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
199 }
200 
201 static void inet_rcu_free_ifa(struct rcu_head *head)
202 {
203 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
204 	if (ifa->ifa_dev)
205 		in_dev_put(ifa->ifa_dev);
206 	kfree(ifa);
207 }
208 
209 static void inet_free_ifa(struct in_ifaddr *ifa)
210 {
211 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
212 }
213 
214 void in_dev_finish_destroy(struct in_device *idev)
215 {
216 	struct net_device *dev = idev->dev;
217 
218 	WARN_ON(idev->ifa_list);
219 	WARN_ON(idev->mc_list);
220 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
221 #ifdef NET_REFCNT_DEBUG
222 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
223 #endif
224 	dev_put(dev);
225 	if (!idev->dead)
226 		pr_err("Freeing alive in_device %p\n", idev);
227 	else
228 		kfree(idev);
229 }
230 EXPORT_SYMBOL(in_dev_finish_destroy);
231 
232 static struct in_device *inetdev_init(struct net_device *dev)
233 {
234 	struct in_device *in_dev;
235 	int err = -ENOMEM;
236 
237 	ASSERT_RTNL();
238 
239 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
240 	if (!in_dev)
241 		goto out;
242 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
243 			sizeof(in_dev->cnf));
244 	in_dev->cnf.sysctl = NULL;
245 	in_dev->dev = dev;
246 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
247 	if (!in_dev->arp_parms)
248 		goto out_kfree;
249 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
250 		dev_disable_lro(dev);
251 	/* Reference in_dev->dev */
252 	dev_hold(dev);
253 	/* Account for reference dev->ip_ptr (below) */
254 	in_dev_hold(in_dev);
255 
256 	err = devinet_sysctl_register(in_dev);
257 	if (err) {
258 		in_dev->dead = 1;
259 		in_dev_put(in_dev);
260 		in_dev = NULL;
261 		goto out;
262 	}
263 	ip_mc_init_dev(in_dev);
264 	if (dev->flags & IFF_UP)
265 		ip_mc_up(in_dev);
266 
267 	/* we can receive as soon as ip_ptr is set -- do this last */
268 	rcu_assign_pointer(dev->ip_ptr, in_dev);
269 out:
270 	return in_dev ?: ERR_PTR(err);
271 out_kfree:
272 	kfree(in_dev);
273 	in_dev = NULL;
274 	goto out;
275 }
276 
277 static void in_dev_rcu_put(struct rcu_head *head)
278 {
279 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
280 	in_dev_put(idev);
281 }
282 
283 static void inetdev_destroy(struct in_device *in_dev)
284 {
285 	struct in_ifaddr *ifa;
286 	struct net_device *dev;
287 
288 	ASSERT_RTNL();
289 
290 	dev = in_dev->dev;
291 
292 	in_dev->dead = 1;
293 
294 	ip_mc_destroy_dev(in_dev);
295 
296 	while ((ifa = in_dev->ifa_list) != NULL) {
297 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
298 		inet_free_ifa(ifa);
299 	}
300 
301 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
302 
303 	devinet_sysctl_unregister(in_dev);
304 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
305 	arp_ifdown(dev);
306 
307 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
308 }
309 
310 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
311 {
312 	rcu_read_lock();
313 	for_primary_ifa(in_dev) {
314 		if (inet_ifa_match(a, ifa)) {
315 			if (!b || inet_ifa_match(b, ifa)) {
316 				rcu_read_unlock();
317 				return 1;
318 			}
319 		}
320 	} endfor_ifa(in_dev);
321 	rcu_read_unlock();
322 	return 0;
323 }
324 
325 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
326 			 int destroy, struct nlmsghdr *nlh, u32 portid)
327 {
328 	struct in_ifaddr *promote = NULL;
329 	struct in_ifaddr *ifa, *ifa1 = *ifap;
330 	struct in_ifaddr *last_prim = in_dev->ifa_list;
331 	struct in_ifaddr *prev_prom = NULL;
332 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
333 
334 	ASSERT_RTNL();
335 
336 	if (in_dev->dead)
337 		goto no_promotions;
338 
339 	/* 1. Deleting primary ifaddr forces deletion all secondaries
340 	 * unless alias promotion is set
341 	 **/
342 
343 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
344 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
345 
346 		while ((ifa = *ifap1) != NULL) {
347 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
348 			    ifa1->ifa_scope <= ifa->ifa_scope)
349 				last_prim = ifa;
350 
351 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
352 			    ifa1->ifa_mask != ifa->ifa_mask ||
353 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
354 				ifap1 = &ifa->ifa_next;
355 				prev_prom = ifa;
356 				continue;
357 			}
358 
359 			if (!do_promote) {
360 				inet_hash_remove(ifa);
361 				*ifap1 = ifa->ifa_next;
362 
363 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
364 				blocking_notifier_call_chain(&inetaddr_chain,
365 						NETDEV_DOWN, ifa);
366 				inet_free_ifa(ifa);
367 			} else {
368 				promote = ifa;
369 				break;
370 			}
371 		}
372 	}
373 
374 	/* On promotion all secondaries from subnet are changing
375 	 * the primary IP, we must remove all their routes silently
376 	 * and later to add them back with new prefsrc. Do this
377 	 * while all addresses are on the device list.
378 	 */
379 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
380 		if (ifa1->ifa_mask == ifa->ifa_mask &&
381 		    inet_ifa_match(ifa1->ifa_address, ifa))
382 			fib_del_ifaddr(ifa, ifa1);
383 	}
384 
385 no_promotions:
386 	/* 2. Unlink it */
387 
388 	*ifap = ifa1->ifa_next;
389 	inet_hash_remove(ifa1);
390 
391 	/* 3. Announce address deletion */
392 
393 	/* Send message first, then call notifier.
394 	   At first sight, FIB update triggered by notifier
395 	   will refer to already deleted ifaddr, that could confuse
396 	   netlink listeners. It is not true: look, gated sees
397 	   that route deleted and if it still thinks that ifaddr
398 	   is valid, it will try to restore deleted routes... Grr.
399 	   So that, this order is correct.
400 	 */
401 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
402 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
403 
404 	if (promote) {
405 		struct in_ifaddr *next_sec = promote->ifa_next;
406 
407 		if (prev_prom) {
408 			prev_prom->ifa_next = promote->ifa_next;
409 			promote->ifa_next = last_prim->ifa_next;
410 			last_prim->ifa_next = promote;
411 		}
412 
413 		promote->ifa_flags &= ~IFA_F_SECONDARY;
414 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
415 		blocking_notifier_call_chain(&inetaddr_chain,
416 				NETDEV_UP, promote);
417 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
418 			if (ifa1->ifa_mask != ifa->ifa_mask ||
419 			    !inet_ifa_match(ifa1->ifa_address, ifa))
420 					continue;
421 			fib_add_ifaddr(ifa);
422 		}
423 
424 	}
425 	if (destroy)
426 		inet_free_ifa(ifa1);
427 }
428 
429 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
430 			 int destroy)
431 {
432 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
433 }
434 
435 static void check_lifetime(struct work_struct *work);
436 
437 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
438 
439 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
440 			     u32 portid)
441 {
442 	struct in_device *in_dev = ifa->ifa_dev;
443 	struct in_ifaddr *ifa1, **ifap, **last_primary;
444 
445 	ASSERT_RTNL();
446 
447 	if (!ifa->ifa_local) {
448 		inet_free_ifa(ifa);
449 		return 0;
450 	}
451 
452 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
453 	last_primary = &in_dev->ifa_list;
454 
455 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
456 	     ifap = &ifa1->ifa_next) {
457 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
458 		    ifa->ifa_scope <= ifa1->ifa_scope)
459 			last_primary = &ifa1->ifa_next;
460 		if (ifa1->ifa_mask == ifa->ifa_mask &&
461 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
462 			if (ifa1->ifa_local == ifa->ifa_local) {
463 				inet_free_ifa(ifa);
464 				return -EEXIST;
465 			}
466 			if (ifa1->ifa_scope != ifa->ifa_scope) {
467 				inet_free_ifa(ifa);
468 				return -EINVAL;
469 			}
470 			ifa->ifa_flags |= IFA_F_SECONDARY;
471 		}
472 	}
473 
474 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
475 		prandom_seed((__force u32) ifa->ifa_local);
476 		ifap = last_primary;
477 	}
478 
479 	ifa->ifa_next = *ifap;
480 	*ifap = ifa;
481 
482 	inet_hash_insert(dev_net(in_dev->dev), ifa);
483 
484 	cancel_delayed_work(&check_lifetime_work);
485 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
486 
487 	/* Send message first, then call notifier.
488 	   Notifier will trigger FIB update, so that
489 	   listeners of netlink will know about new ifaddr */
490 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
491 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
492 
493 	return 0;
494 }
495 
496 static int inet_insert_ifa(struct in_ifaddr *ifa)
497 {
498 	return __inet_insert_ifa(ifa, NULL, 0);
499 }
500 
501 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
502 {
503 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
504 
505 	ASSERT_RTNL();
506 
507 	if (!in_dev) {
508 		inet_free_ifa(ifa);
509 		return -ENOBUFS;
510 	}
511 	ipv4_devconf_setall(in_dev);
512 	neigh_parms_data_state_setall(in_dev->arp_parms);
513 	if (ifa->ifa_dev != in_dev) {
514 		WARN_ON(ifa->ifa_dev);
515 		in_dev_hold(in_dev);
516 		ifa->ifa_dev = in_dev;
517 	}
518 	if (ipv4_is_loopback(ifa->ifa_local))
519 		ifa->ifa_scope = RT_SCOPE_HOST;
520 	return inet_insert_ifa(ifa);
521 }
522 
523 /* Caller must hold RCU or RTNL :
524  * We dont take a reference on found in_device
525  */
526 struct in_device *inetdev_by_index(struct net *net, int ifindex)
527 {
528 	struct net_device *dev;
529 	struct in_device *in_dev = NULL;
530 
531 	rcu_read_lock();
532 	dev = dev_get_by_index_rcu(net, ifindex);
533 	if (dev)
534 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
535 	rcu_read_unlock();
536 	return in_dev;
537 }
538 EXPORT_SYMBOL(inetdev_by_index);
539 
540 /* Called only from RTNL semaphored context. No locks. */
541 
542 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
543 				    __be32 mask)
544 {
545 	ASSERT_RTNL();
546 
547 	for_primary_ifa(in_dev) {
548 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
549 			return ifa;
550 	} endfor_ifa(in_dev);
551 	return NULL;
552 }
553 
554 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
555 {
556 	struct ip_mreqn mreq = {
557 		.imr_multiaddr.s_addr = ifa->ifa_address,
558 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
559 	};
560 	int ret;
561 
562 	ASSERT_RTNL();
563 
564 	lock_sock(sk);
565 	if (join)
566 		ret = ip_mc_join_group(sk, &mreq);
567 	else
568 		ret = ip_mc_leave_group(sk, &mreq);
569 	release_sock(sk);
570 
571 	return ret;
572 }
573 
574 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
575 {
576 	struct net *net = sock_net(skb->sk);
577 	struct nlattr *tb[IFA_MAX+1];
578 	struct in_device *in_dev;
579 	struct ifaddrmsg *ifm;
580 	struct in_ifaddr *ifa, **ifap;
581 	int err = -EINVAL;
582 
583 	ASSERT_RTNL();
584 
585 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
586 	if (err < 0)
587 		goto errout;
588 
589 	ifm = nlmsg_data(nlh);
590 	in_dev = inetdev_by_index(net, ifm->ifa_index);
591 	if (!in_dev) {
592 		err = -ENODEV;
593 		goto errout;
594 	}
595 
596 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
597 	     ifap = &ifa->ifa_next) {
598 		if (tb[IFA_LOCAL] &&
599 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
600 			continue;
601 
602 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
603 			continue;
604 
605 		if (tb[IFA_ADDRESS] &&
606 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
607 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
608 			continue;
609 
610 		if (ipv4_is_multicast(ifa->ifa_address))
611 			ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
612 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
613 		return 0;
614 	}
615 
616 	err = -EADDRNOTAVAIL;
617 errout:
618 	return err;
619 }
620 
621 #define INFINITY_LIFE_TIME	0xFFFFFFFF
622 
623 static void check_lifetime(struct work_struct *work)
624 {
625 	unsigned long now, next, next_sec, next_sched;
626 	struct in_ifaddr *ifa;
627 	struct hlist_node *n;
628 	int i;
629 
630 	now = jiffies;
631 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
632 
633 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
634 		bool change_needed = false;
635 
636 		rcu_read_lock();
637 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
638 			unsigned long age;
639 
640 			if (ifa->ifa_flags & IFA_F_PERMANENT)
641 				continue;
642 
643 			/* We try to batch several events at once. */
644 			age = (now - ifa->ifa_tstamp +
645 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
646 
647 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
648 			    age >= ifa->ifa_valid_lft) {
649 				change_needed = true;
650 			} else if (ifa->ifa_preferred_lft ==
651 				   INFINITY_LIFE_TIME) {
652 				continue;
653 			} else if (age >= ifa->ifa_preferred_lft) {
654 				if (time_before(ifa->ifa_tstamp +
655 						ifa->ifa_valid_lft * HZ, next))
656 					next = ifa->ifa_tstamp +
657 					       ifa->ifa_valid_lft * HZ;
658 
659 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
660 					change_needed = true;
661 			} else if (time_before(ifa->ifa_tstamp +
662 					       ifa->ifa_preferred_lft * HZ,
663 					       next)) {
664 				next = ifa->ifa_tstamp +
665 				       ifa->ifa_preferred_lft * HZ;
666 			}
667 		}
668 		rcu_read_unlock();
669 		if (!change_needed)
670 			continue;
671 		rtnl_lock();
672 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
673 			unsigned long age;
674 
675 			if (ifa->ifa_flags & IFA_F_PERMANENT)
676 				continue;
677 
678 			/* We try to batch several events at once. */
679 			age = (now - ifa->ifa_tstamp +
680 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
681 
682 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
683 			    age >= ifa->ifa_valid_lft) {
684 				struct in_ifaddr **ifap;
685 
686 				for (ifap = &ifa->ifa_dev->ifa_list;
687 				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
688 					if (*ifap == ifa) {
689 						inet_del_ifa(ifa->ifa_dev,
690 							     ifap, 1);
691 						break;
692 					}
693 				}
694 			} else if (ifa->ifa_preferred_lft !=
695 				   INFINITY_LIFE_TIME &&
696 				   age >= ifa->ifa_preferred_lft &&
697 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
698 				ifa->ifa_flags |= IFA_F_DEPRECATED;
699 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
700 			}
701 		}
702 		rtnl_unlock();
703 	}
704 
705 	next_sec = round_jiffies_up(next);
706 	next_sched = next;
707 
708 	/* If rounded timeout is accurate enough, accept it. */
709 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
710 		next_sched = next_sec;
711 
712 	now = jiffies;
713 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
714 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
715 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
716 
717 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
718 			next_sched - now);
719 }
720 
721 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
722 			     __u32 prefered_lft)
723 {
724 	unsigned long timeout;
725 
726 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
727 
728 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
729 	if (addrconf_finite_timeout(timeout))
730 		ifa->ifa_valid_lft = timeout;
731 	else
732 		ifa->ifa_flags |= IFA_F_PERMANENT;
733 
734 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
735 	if (addrconf_finite_timeout(timeout)) {
736 		if (timeout == 0)
737 			ifa->ifa_flags |= IFA_F_DEPRECATED;
738 		ifa->ifa_preferred_lft = timeout;
739 	}
740 	ifa->ifa_tstamp = jiffies;
741 	if (!ifa->ifa_cstamp)
742 		ifa->ifa_cstamp = ifa->ifa_tstamp;
743 }
744 
745 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
746 				       __u32 *pvalid_lft, __u32 *pprefered_lft)
747 {
748 	struct nlattr *tb[IFA_MAX+1];
749 	struct in_ifaddr *ifa;
750 	struct ifaddrmsg *ifm;
751 	struct net_device *dev;
752 	struct in_device *in_dev;
753 	int err;
754 
755 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
756 	if (err < 0)
757 		goto errout;
758 
759 	ifm = nlmsg_data(nlh);
760 	err = -EINVAL;
761 	if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
762 		goto errout;
763 
764 	dev = __dev_get_by_index(net, ifm->ifa_index);
765 	err = -ENODEV;
766 	if (!dev)
767 		goto errout;
768 
769 	in_dev = __in_dev_get_rtnl(dev);
770 	err = -ENOBUFS;
771 	if (!in_dev)
772 		goto errout;
773 
774 	ifa = inet_alloc_ifa();
775 	if (!ifa)
776 		/*
777 		 * A potential indev allocation can be left alive, it stays
778 		 * assigned to its device and is destroy with it.
779 		 */
780 		goto errout;
781 
782 	ipv4_devconf_setall(in_dev);
783 	neigh_parms_data_state_setall(in_dev->arp_parms);
784 	in_dev_hold(in_dev);
785 
786 	if (!tb[IFA_ADDRESS])
787 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
788 
789 	INIT_HLIST_NODE(&ifa->hash);
790 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
791 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
792 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
793 					 ifm->ifa_flags;
794 	ifa->ifa_scope = ifm->ifa_scope;
795 	ifa->ifa_dev = in_dev;
796 
797 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
798 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
799 
800 	if (tb[IFA_BROADCAST])
801 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
802 
803 	if (tb[IFA_LABEL])
804 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
805 	else
806 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
807 
808 	if (tb[IFA_CACHEINFO]) {
809 		struct ifa_cacheinfo *ci;
810 
811 		ci = nla_data(tb[IFA_CACHEINFO]);
812 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
813 			err = -EINVAL;
814 			goto errout_free;
815 		}
816 		*pvalid_lft = ci->ifa_valid;
817 		*pprefered_lft = ci->ifa_prefered;
818 	}
819 
820 	return ifa;
821 
822 errout_free:
823 	inet_free_ifa(ifa);
824 errout:
825 	return ERR_PTR(err);
826 }
827 
828 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
829 {
830 	struct in_device *in_dev = ifa->ifa_dev;
831 	struct in_ifaddr *ifa1, **ifap;
832 
833 	if (!ifa->ifa_local)
834 		return NULL;
835 
836 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
837 	     ifap = &ifa1->ifa_next) {
838 		if (ifa1->ifa_mask == ifa->ifa_mask &&
839 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
840 		    ifa1->ifa_local == ifa->ifa_local)
841 			return ifa1;
842 	}
843 	return NULL;
844 }
845 
846 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
847 {
848 	struct net *net = sock_net(skb->sk);
849 	struct in_ifaddr *ifa;
850 	struct in_ifaddr *ifa_existing;
851 	__u32 valid_lft = INFINITY_LIFE_TIME;
852 	__u32 prefered_lft = INFINITY_LIFE_TIME;
853 
854 	ASSERT_RTNL();
855 
856 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
857 	if (IS_ERR(ifa))
858 		return PTR_ERR(ifa);
859 
860 	ifa_existing = find_matching_ifa(ifa);
861 	if (!ifa_existing) {
862 		/* It would be best to check for !NLM_F_CREATE here but
863 		 * userspace already relies on not having to provide this.
864 		 */
865 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
866 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
867 			int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
868 					       true, ifa);
869 
870 			if (ret < 0) {
871 				inet_free_ifa(ifa);
872 				return ret;
873 			}
874 		}
875 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
876 	} else {
877 		inet_free_ifa(ifa);
878 
879 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
880 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
881 			return -EEXIST;
882 		ifa = ifa_existing;
883 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
884 		cancel_delayed_work(&check_lifetime_work);
885 		queue_delayed_work(system_power_efficient_wq,
886 				&check_lifetime_work, 0);
887 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
888 	}
889 	return 0;
890 }
891 
892 /*
893  *	Determine a default network mask, based on the IP address.
894  */
895 
896 static int inet_abc_len(__be32 addr)
897 {
898 	int rc = -1;	/* Something else, probably a multicast. */
899 
900 	if (ipv4_is_zeronet(addr))
901 		rc = 0;
902 	else {
903 		__u32 haddr = ntohl(addr);
904 
905 		if (IN_CLASSA(haddr))
906 			rc = 8;
907 		else if (IN_CLASSB(haddr))
908 			rc = 16;
909 		else if (IN_CLASSC(haddr))
910 			rc = 24;
911 	}
912 
913 	return rc;
914 }
915 
916 
917 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
918 {
919 	struct ifreq ifr;
920 	struct sockaddr_in sin_orig;
921 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
922 	struct in_device *in_dev;
923 	struct in_ifaddr **ifap = NULL;
924 	struct in_ifaddr *ifa = NULL;
925 	struct net_device *dev;
926 	char *colon;
927 	int ret = -EFAULT;
928 	int tryaddrmatch = 0;
929 
930 	/*
931 	 *	Fetch the caller's info block into kernel space
932 	 */
933 
934 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
935 		goto out;
936 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
937 
938 	/* save original address for comparison */
939 	memcpy(&sin_orig, sin, sizeof(*sin));
940 
941 	colon = strchr(ifr.ifr_name, ':');
942 	if (colon)
943 		*colon = 0;
944 
945 	dev_load(net, ifr.ifr_name);
946 
947 	switch (cmd) {
948 	case SIOCGIFADDR:	/* Get interface address */
949 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
950 	case SIOCGIFDSTADDR:	/* Get the destination address */
951 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
952 		/* Note that these ioctls will not sleep,
953 		   so that we do not impose a lock.
954 		   One day we will be forced to put shlock here (I mean SMP)
955 		 */
956 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
957 		memset(sin, 0, sizeof(*sin));
958 		sin->sin_family = AF_INET;
959 		break;
960 
961 	case SIOCSIFFLAGS:
962 		ret = -EPERM;
963 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
964 			goto out;
965 		break;
966 	case SIOCSIFADDR:	/* Set interface address (and family) */
967 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
968 	case SIOCSIFDSTADDR:	/* Set the destination address */
969 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
970 		ret = -EPERM;
971 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
972 			goto out;
973 		ret = -EINVAL;
974 		if (sin->sin_family != AF_INET)
975 			goto out;
976 		break;
977 	default:
978 		ret = -EINVAL;
979 		goto out;
980 	}
981 
982 	rtnl_lock();
983 
984 	ret = -ENODEV;
985 	dev = __dev_get_by_name(net, ifr.ifr_name);
986 	if (!dev)
987 		goto done;
988 
989 	if (colon)
990 		*colon = ':';
991 
992 	in_dev = __in_dev_get_rtnl(dev);
993 	if (in_dev) {
994 		if (tryaddrmatch) {
995 			/* Matthias Andree */
996 			/* compare label and address (4.4BSD style) */
997 			/* note: we only do this for a limited set of ioctls
998 			   and only if the original address family was AF_INET.
999 			   This is checked above. */
1000 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1001 			     ifap = &ifa->ifa_next) {
1002 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
1003 				    sin_orig.sin_addr.s_addr ==
1004 							ifa->ifa_local) {
1005 					break; /* found */
1006 				}
1007 			}
1008 		}
1009 		/* we didn't get a match, maybe the application is
1010 		   4.3BSD-style and passed in junk so we fall back to
1011 		   comparing just the label */
1012 		if (!ifa) {
1013 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1014 			     ifap = &ifa->ifa_next)
1015 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
1016 					break;
1017 		}
1018 	}
1019 
1020 	ret = -EADDRNOTAVAIL;
1021 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1022 		goto done;
1023 
1024 	switch (cmd) {
1025 	case SIOCGIFADDR:	/* Get interface address */
1026 		sin->sin_addr.s_addr = ifa->ifa_local;
1027 		goto rarok;
1028 
1029 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1030 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1031 		goto rarok;
1032 
1033 	case SIOCGIFDSTADDR:	/* Get the destination address */
1034 		sin->sin_addr.s_addr = ifa->ifa_address;
1035 		goto rarok;
1036 
1037 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1038 		sin->sin_addr.s_addr = ifa->ifa_mask;
1039 		goto rarok;
1040 
1041 	case SIOCSIFFLAGS:
1042 		if (colon) {
1043 			ret = -EADDRNOTAVAIL;
1044 			if (!ifa)
1045 				break;
1046 			ret = 0;
1047 			if (!(ifr.ifr_flags & IFF_UP))
1048 				inet_del_ifa(in_dev, ifap, 1);
1049 			break;
1050 		}
1051 		ret = dev_change_flags(dev, ifr.ifr_flags);
1052 		break;
1053 
1054 	case SIOCSIFADDR:	/* Set interface address (and family) */
1055 		ret = -EINVAL;
1056 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1057 			break;
1058 
1059 		if (!ifa) {
1060 			ret = -ENOBUFS;
1061 			ifa = inet_alloc_ifa();
1062 			if (!ifa)
1063 				break;
1064 			INIT_HLIST_NODE(&ifa->hash);
1065 			if (colon)
1066 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1067 			else
1068 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1069 		} else {
1070 			ret = 0;
1071 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1072 				break;
1073 			inet_del_ifa(in_dev, ifap, 0);
1074 			ifa->ifa_broadcast = 0;
1075 			ifa->ifa_scope = 0;
1076 		}
1077 
1078 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1079 
1080 		if (!(dev->flags & IFF_POINTOPOINT)) {
1081 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1082 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1083 			if ((dev->flags & IFF_BROADCAST) &&
1084 			    ifa->ifa_prefixlen < 31)
1085 				ifa->ifa_broadcast = ifa->ifa_address |
1086 						     ~ifa->ifa_mask;
1087 		} else {
1088 			ifa->ifa_prefixlen = 32;
1089 			ifa->ifa_mask = inet_make_mask(32);
1090 		}
1091 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1092 		ret = inet_set_ifa(dev, ifa);
1093 		break;
1094 
1095 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1096 		ret = 0;
1097 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1098 			inet_del_ifa(in_dev, ifap, 0);
1099 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1100 			inet_insert_ifa(ifa);
1101 		}
1102 		break;
1103 
1104 	case SIOCSIFDSTADDR:	/* Set the destination address */
1105 		ret = 0;
1106 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1107 			break;
1108 		ret = -EINVAL;
1109 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1110 			break;
1111 		ret = 0;
1112 		inet_del_ifa(in_dev, ifap, 0);
1113 		ifa->ifa_address = sin->sin_addr.s_addr;
1114 		inet_insert_ifa(ifa);
1115 		break;
1116 
1117 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1118 
1119 		/*
1120 		 *	The mask we set must be legal.
1121 		 */
1122 		ret = -EINVAL;
1123 		if (bad_mask(sin->sin_addr.s_addr, 0))
1124 			break;
1125 		ret = 0;
1126 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1127 			__be32 old_mask = ifa->ifa_mask;
1128 			inet_del_ifa(in_dev, ifap, 0);
1129 			ifa->ifa_mask = sin->sin_addr.s_addr;
1130 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1131 
1132 			/* See if current broadcast address matches
1133 			 * with current netmask, then recalculate
1134 			 * the broadcast address. Otherwise it's a
1135 			 * funny address, so don't touch it since
1136 			 * the user seems to know what (s)he's doing...
1137 			 */
1138 			if ((dev->flags & IFF_BROADCAST) &&
1139 			    (ifa->ifa_prefixlen < 31) &&
1140 			    (ifa->ifa_broadcast ==
1141 			     (ifa->ifa_local|~old_mask))) {
1142 				ifa->ifa_broadcast = (ifa->ifa_local |
1143 						      ~sin->sin_addr.s_addr);
1144 			}
1145 			inet_insert_ifa(ifa);
1146 		}
1147 		break;
1148 	}
1149 done:
1150 	rtnl_unlock();
1151 out:
1152 	return ret;
1153 rarok:
1154 	rtnl_unlock();
1155 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1156 	goto out;
1157 }
1158 
1159 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1160 {
1161 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1162 	struct in_ifaddr *ifa;
1163 	struct ifreq ifr;
1164 	int done = 0;
1165 
1166 	if (!in_dev)
1167 		goto out;
1168 
1169 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1170 		if (!buf) {
1171 			done += sizeof(ifr);
1172 			continue;
1173 		}
1174 		if (len < (int) sizeof(ifr))
1175 			break;
1176 		memset(&ifr, 0, sizeof(struct ifreq));
1177 		strcpy(ifr.ifr_name, ifa->ifa_label);
1178 
1179 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1180 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1181 								ifa->ifa_local;
1182 
1183 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1184 			done = -EFAULT;
1185 			break;
1186 		}
1187 		buf  += sizeof(struct ifreq);
1188 		len  -= sizeof(struct ifreq);
1189 		done += sizeof(struct ifreq);
1190 	}
1191 out:
1192 	return done;
1193 }
1194 
1195 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1196 {
1197 	__be32 addr = 0;
1198 	struct in_device *in_dev;
1199 	struct net *net = dev_net(dev);
1200 	int master_idx;
1201 
1202 	rcu_read_lock();
1203 	in_dev = __in_dev_get_rcu(dev);
1204 	if (!in_dev)
1205 		goto no_in_dev;
1206 
1207 	for_primary_ifa(in_dev) {
1208 		if (ifa->ifa_scope > scope)
1209 			continue;
1210 		if (!dst || inet_ifa_match(dst, ifa)) {
1211 			addr = ifa->ifa_local;
1212 			break;
1213 		}
1214 		if (!addr)
1215 			addr = ifa->ifa_local;
1216 	} endfor_ifa(in_dev);
1217 
1218 	if (addr)
1219 		goto out_unlock;
1220 no_in_dev:
1221 	master_idx = l3mdev_master_ifindex_rcu(dev);
1222 
1223 	/* For VRFs, the VRF device takes the place of the loopback device,
1224 	 * with addresses on it being preferred.  Note in such cases the
1225 	 * loopback device will be among the devices that fail the master_idx
1226 	 * equality check in the loop below.
1227 	 */
1228 	if (master_idx &&
1229 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1230 	    (in_dev = __in_dev_get_rcu(dev))) {
1231 		for_primary_ifa(in_dev) {
1232 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
1233 			    ifa->ifa_scope <= scope) {
1234 				addr = ifa->ifa_local;
1235 				goto out_unlock;
1236 			}
1237 		} endfor_ifa(in_dev);
1238 	}
1239 
1240 	/* Not loopback addresses on loopback should be preferred
1241 	   in this case. It is important that lo is the first interface
1242 	   in dev_base list.
1243 	 */
1244 	for_each_netdev_rcu(net, dev) {
1245 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1246 			continue;
1247 
1248 		in_dev = __in_dev_get_rcu(dev);
1249 		if (!in_dev)
1250 			continue;
1251 
1252 		for_primary_ifa(in_dev) {
1253 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
1254 			    ifa->ifa_scope <= scope) {
1255 				addr = ifa->ifa_local;
1256 				goto out_unlock;
1257 			}
1258 		} endfor_ifa(in_dev);
1259 	}
1260 out_unlock:
1261 	rcu_read_unlock();
1262 	return addr;
1263 }
1264 EXPORT_SYMBOL(inet_select_addr);
1265 
1266 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1267 			      __be32 local, int scope)
1268 {
1269 	int same = 0;
1270 	__be32 addr = 0;
1271 
1272 	for_ifa(in_dev) {
1273 		if (!addr &&
1274 		    (local == ifa->ifa_local || !local) &&
1275 		    ifa->ifa_scope <= scope) {
1276 			addr = ifa->ifa_local;
1277 			if (same)
1278 				break;
1279 		}
1280 		if (!same) {
1281 			same = (!local || inet_ifa_match(local, ifa)) &&
1282 				(!dst || inet_ifa_match(dst, ifa));
1283 			if (same && addr) {
1284 				if (local || !dst)
1285 					break;
1286 				/* Is the selected addr into dst subnet? */
1287 				if (inet_ifa_match(addr, ifa))
1288 					break;
1289 				/* No, then can we use new local src? */
1290 				if (ifa->ifa_scope <= scope) {
1291 					addr = ifa->ifa_local;
1292 					break;
1293 				}
1294 				/* search for large dst subnet for addr */
1295 				same = 0;
1296 			}
1297 		}
1298 	} endfor_ifa(in_dev);
1299 
1300 	return same ? addr : 0;
1301 }
1302 
1303 /*
1304  * Confirm that local IP address exists using wildcards:
1305  * - net: netns to check, cannot be NULL
1306  * - in_dev: only on this interface, NULL=any interface
1307  * - dst: only in the same subnet as dst, 0=any dst
1308  * - local: address, 0=autoselect the local address
1309  * - scope: maximum allowed scope value for the local address
1310  */
1311 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1312 			 __be32 dst, __be32 local, int scope)
1313 {
1314 	__be32 addr = 0;
1315 	struct net_device *dev;
1316 
1317 	if (in_dev)
1318 		return confirm_addr_indev(in_dev, dst, local, scope);
1319 
1320 	rcu_read_lock();
1321 	for_each_netdev_rcu(net, dev) {
1322 		in_dev = __in_dev_get_rcu(dev);
1323 		if (in_dev) {
1324 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1325 			if (addr)
1326 				break;
1327 		}
1328 	}
1329 	rcu_read_unlock();
1330 
1331 	return addr;
1332 }
1333 EXPORT_SYMBOL(inet_confirm_addr);
1334 
1335 /*
1336  *	Device notifier
1337  */
1338 
1339 int register_inetaddr_notifier(struct notifier_block *nb)
1340 {
1341 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1342 }
1343 EXPORT_SYMBOL(register_inetaddr_notifier);
1344 
1345 int unregister_inetaddr_notifier(struct notifier_block *nb)
1346 {
1347 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1348 }
1349 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1350 
1351 /* Rename ifa_labels for a device name change. Make some effort to preserve
1352  * existing alias numbering and to create unique labels if possible.
1353 */
1354 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1355 {
1356 	struct in_ifaddr *ifa;
1357 	int named = 0;
1358 
1359 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1360 		char old[IFNAMSIZ], *dot;
1361 
1362 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1363 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1364 		if (named++ == 0)
1365 			goto skip;
1366 		dot = strchr(old, ':');
1367 		if (!dot) {
1368 			sprintf(old, ":%d", named);
1369 			dot = old;
1370 		}
1371 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1372 			strcat(ifa->ifa_label, dot);
1373 		else
1374 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1375 skip:
1376 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1377 	}
1378 }
1379 
1380 static bool inetdev_valid_mtu(unsigned int mtu)
1381 {
1382 	return mtu >= 68;
1383 }
1384 
1385 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1386 					struct in_device *in_dev)
1387 
1388 {
1389 	struct in_ifaddr *ifa;
1390 
1391 	for (ifa = in_dev->ifa_list; ifa;
1392 	     ifa = ifa->ifa_next) {
1393 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1394 			 ifa->ifa_local, dev,
1395 			 ifa->ifa_local, NULL,
1396 			 dev->dev_addr, NULL);
1397 	}
1398 }
1399 
1400 /* Called only under RTNL semaphore */
1401 
1402 static int inetdev_event(struct notifier_block *this, unsigned long event,
1403 			 void *ptr)
1404 {
1405 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1406 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1407 
1408 	ASSERT_RTNL();
1409 
1410 	if (!in_dev) {
1411 		if (event == NETDEV_REGISTER) {
1412 			in_dev = inetdev_init(dev);
1413 			if (IS_ERR(in_dev))
1414 				return notifier_from_errno(PTR_ERR(in_dev));
1415 			if (dev->flags & IFF_LOOPBACK) {
1416 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1417 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1418 			}
1419 		} else if (event == NETDEV_CHANGEMTU) {
1420 			/* Re-enabling IP */
1421 			if (inetdev_valid_mtu(dev->mtu))
1422 				in_dev = inetdev_init(dev);
1423 		}
1424 		goto out;
1425 	}
1426 
1427 	switch (event) {
1428 	case NETDEV_REGISTER:
1429 		pr_debug("%s: bug\n", __func__);
1430 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1431 		break;
1432 	case NETDEV_UP:
1433 		if (!inetdev_valid_mtu(dev->mtu))
1434 			break;
1435 		if (dev->flags & IFF_LOOPBACK) {
1436 			struct in_ifaddr *ifa = inet_alloc_ifa();
1437 
1438 			if (ifa) {
1439 				INIT_HLIST_NODE(&ifa->hash);
1440 				ifa->ifa_local =
1441 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1442 				ifa->ifa_prefixlen = 8;
1443 				ifa->ifa_mask = inet_make_mask(8);
1444 				in_dev_hold(in_dev);
1445 				ifa->ifa_dev = in_dev;
1446 				ifa->ifa_scope = RT_SCOPE_HOST;
1447 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1448 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1449 						 INFINITY_LIFE_TIME);
1450 				ipv4_devconf_setall(in_dev);
1451 				neigh_parms_data_state_setall(in_dev->arp_parms);
1452 				inet_insert_ifa(ifa);
1453 			}
1454 		}
1455 		ip_mc_up(in_dev);
1456 		/* fall through */
1457 	case NETDEV_CHANGEADDR:
1458 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1459 			break;
1460 		/* fall through */
1461 	case NETDEV_NOTIFY_PEERS:
1462 		/* Send gratuitous ARP to notify of link change */
1463 		inetdev_send_gratuitous_arp(dev, in_dev);
1464 		break;
1465 	case NETDEV_DOWN:
1466 		ip_mc_down(in_dev);
1467 		break;
1468 	case NETDEV_PRE_TYPE_CHANGE:
1469 		ip_mc_unmap(in_dev);
1470 		break;
1471 	case NETDEV_POST_TYPE_CHANGE:
1472 		ip_mc_remap(in_dev);
1473 		break;
1474 	case NETDEV_CHANGEMTU:
1475 		if (inetdev_valid_mtu(dev->mtu))
1476 			break;
1477 		/* disable IP when MTU is not enough */
1478 	case NETDEV_UNREGISTER:
1479 		inetdev_destroy(in_dev);
1480 		break;
1481 	case NETDEV_CHANGENAME:
1482 		/* Do not notify about label change, this event is
1483 		 * not interesting to applications using netlink.
1484 		 */
1485 		inetdev_changename(dev, in_dev);
1486 
1487 		devinet_sysctl_unregister(in_dev);
1488 		devinet_sysctl_register(in_dev);
1489 		break;
1490 	}
1491 out:
1492 	return NOTIFY_DONE;
1493 }
1494 
1495 static struct notifier_block ip_netdev_notifier = {
1496 	.notifier_call = inetdev_event,
1497 };
1498 
1499 static size_t inet_nlmsg_size(void)
1500 {
1501 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1502 	       + nla_total_size(4) /* IFA_ADDRESS */
1503 	       + nla_total_size(4) /* IFA_LOCAL */
1504 	       + nla_total_size(4) /* IFA_BROADCAST */
1505 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1506 	       + nla_total_size(4)  /* IFA_FLAGS */
1507 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1508 }
1509 
1510 static inline u32 cstamp_delta(unsigned long cstamp)
1511 {
1512 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1513 }
1514 
1515 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1516 			 unsigned long tstamp, u32 preferred, u32 valid)
1517 {
1518 	struct ifa_cacheinfo ci;
1519 
1520 	ci.cstamp = cstamp_delta(cstamp);
1521 	ci.tstamp = cstamp_delta(tstamp);
1522 	ci.ifa_prefered = preferred;
1523 	ci.ifa_valid = valid;
1524 
1525 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1526 }
1527 
1528 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1529 			    u32 portid, u32 seq, int event, unsigned int flags)
1530 {
1531 	struct ifaddrmsg *ifm;
1532 	struct nlmsghdr  *nlh;
1533 	u32 preferred, valid;
1534 
1535 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1536 	if (!nlh)
1537 		return -EMSGSIZE;
1538 
1539 	ifm = nlmsg_data(nlh);
1540 	ifm->ifa_family = AF_INET;
1541 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1542 	ifm->ifa_flags = ifa->ifa_flags;
1543 	ifm->ifa_scope = ifa->ifa_scope;
1544 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1545 
1546 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1547 		preferred = ifa->ifa_preferred_lft;
1548 		valid = ifa->ifa_valid_lft;
1549 		if (preferred != INFINITY_LIFE_TIME) {
1550 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1551 
1552 			if (preferred > tval)
1553 				preferred -= tval;
1554 			else
1555 				preferred = 0;
1556 			if (valid != INFINITY_LIFE_TIME) {
1557 				if (valid > tval)
1558 					valid -= tval;
1559 				else
1560 					valid = 0;
1561 			}
1562 		}
1563 	} else {
1564 		preferred = INFINITY_LIFE_TIME;
1565 		valid = INFINITY_LIFE_TIME;
1566 	}
1567 	if ((ifa->ifa_address &&
1568 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1569 	    (ifa->ifa_local &&
1570 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1571 	    (ifa->ifa_broadcast &&
1572 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1573 	    (ifa->ifa_label[0] &&
1574 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1575 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1576 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1577 			  preferred, valid))
1578 		goto nla_put_failure;
1579 
1580 	nlmsg_end(skb, nlh);
1581 	return 0;
1582 
1583 nla_put_failure:
1584 	nlmsg_cancel(skb, nlh);
1585 	return -EMSGSIZE;
1586 }
1587 
1588 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1589 {
1590 	struct net *net = sock_net(skb->sk);
1591 	int h, s_h;
1592 	int idx, s_idx;
1593 	int ip_idx, s_ip_idx;
1594 	struct net_device *dev;
1595 	struct in_device *in_dev;
1596 	struct in_ifaddr *ifa;
1597 	struct hlist_head *head;
1598 
1599 	s_h = cb->args[0];
1600 	s_idx = idx = cb->args[1];
1601 	s_ip_idx = ip_idx = cb->args[2];
1602 
1603 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1604 		idx = 0;
1605 		head = &net->dev_index_head[h];
1606 		rcu_read_lock();
1607 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1608 			  net->dev_base_seq;
1609 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1610 			if (idx < s_idx)
1611 				goto cont;
1612 			if (h > s_h || idx > s_idx)
1613 				s_ip_idx = 0;
1614 			in_dev = __in_dev_get_rcu(dev);
1615 			if (!in_dev)
1616 				goto cont;
1617 
1618 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1619 			     ifa = ifa->ifa_next, ip_idx++) {
1620 				if (ip_idx < s_ip_idx)
1621 					continue;
1622 				if (inet_fill_ifaddr(skb, ifa,
1623 					     NETLINK_CB(cb->skb).portid,
1624 					     cb->nlh->nlmsg_seq,
1625 					     RTM_NEWADDR, NLM_F_MULTI) < 0) {
1626 					rcu_read_unlock();
1627 					goto done;
1628 				}
1629 				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1630 			}
1631 cont:
1632 			idx++;
1633 		}
1634 		rcu_read_unlock();
1635 	}
1636 
1637 done:
1638 	cb->args[0] = h;
1639 	cb->args[1] = idx;
1640 	cb->args[2] = ip_idx;
1641 
1642 	return skb->len;
1643 }
1644 
1645 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1646 		      u32 portid)
1647 {
1648 	struct sk_buff *skb;
1649 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1650 	int err = -ENOBUFS;
1651 	struct net *net;
1652 
1653 	net = dev_net(ifa->ifa_dev->dev);
1654 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1655 	if (!skb)
1656 		goto errout;
1657 
1658 	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1659 	if (err < 0) {
1660 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1661 		WARN_ON(err == -EMSGSIZE);
1662 		kfree_skb(skb);
1663 		goto errout;
1664 	}
1665 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1666 	return;
1667 errout:
1668 	if (err < 0)
1669 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1670 }
1671 
1672 static size_t inet_get_link_af_size(const struct net_device *dev,
1673 				    u32 ext_filter_mask)
1674 {
1675 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1676 
1677 	if (!in_dev)
1678 		return 0;
1679 
1680 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1681 }
1682 
1683 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1684 			     u32 ext_filter_mask)
1685 {
1686 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1687 	struct nlattr *nla;
1688 	int i;
1689 
1690 	if (!in_dev)
1691 		return -ENODATA;
1692 
1693 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1694 	if (!nla)
1695 		return -EMSGSIZE;
1696 
1697 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1698 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1699 
1700 	return 0;
1701 }
1702 
1703 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1704 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1705 };
1706 
1707 static int inet_validate_link_af(const struct net_device *dev,
1708 				 const struct nlattr *nla)
1709 {
1710 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1711 	int err, rem;
1712 
1713 	if (dev && !__in_dev_get_rtnl(dev))
1714 		return -EAFNOSUPPORT;
1715 
1716 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1717 	if (err < 0)
1718 		return err;
1719 
1720 	if (tb[IFLA_INET_CONF]) {
1721 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1722 			int cfgid = nla_type(a);
1723 
1724 			if (nla_len(a) < 4)
1725 				return -EINVAL;
1726 
1727 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1728 				return -EINVAL;
1729 		}
1730 	}
1731 
1732 	return 0;
1733 }
1734 
1735 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1736 {
1737 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1738 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1739 	int rem;
1740 
1741 	if (!in_dev)
1742 		return -EAFNOSUPPORT;
1743 
1744 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1745 		BUG();
1746 
1747 	if (tb[IFLA_INET_CONF]) {
1748 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1749 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1750 	}
1751 
1752 	return 0;
1753 }
1754 
1755 static int inet_netconf_msgsize_devconf(int type)
1756 {
1757 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1758 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1759 	bool all = false;
1760 
1761 	if (type == NETCONFA_ALL)
1762 		all = true;
1763 
1764 	if (all || type == NETCONFA_FORWARDING)
1765 		size += nla_total_size(4);
1766 	if (all || type == NETCONFA_RP_FILTER)
1767 		size += nla_total_size(4);
1768 	if (all || type == NETCONFA_MC_FORWARDING)
1769 		size += nla_total_size(4);
1770 	if (all || type == NETCONFA_PROXY_NEIGH)
1771 		size += nla_total_size(4);
1772 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1773 		size += nla_total_size(4);
1774 
1775 	return size;
1776 }
1777 
1778 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1779 				     struct ipv4_devconf *devconf, u32 portid,
1780 				     u32 seq, int event, unsigned int flags,
1781 				     int type)
1782 {
1783 	struct nlmsghdr  *nlh;
1784 	struct netconfmsg *ncm;
1785 	bool all = false;
1786 
1787 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1788 			flags);
1789 	if (!nlh)
1790 		return -EMSGSIZE;
1791 
1792 	if (type == NETCONFA_ALL)
1793 		all = true;
1794 
1795 	ncm = nlmsg_data(nlh);
1796 	ncm->ncm_family = AF_INET;
1797 
1798 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1799 		goto nla_put_failure;
1800 
1801 	if ((all || type == NETCONFA_FORWARDING) &&
1802 	    nla_put_s32(skb, NETCONFA_FORWARDING,
1803 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1804 		goto nla_put_failure;
1805 	if ((all || type == NETCONFA_RP_FILTER) &&
1806 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1807 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1808 		goto nla_put_failure;
1809 	if ((all || type == NETCONFA_MC_FORWARDING) &&
1810 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1811 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1812 		goto nla_put_failure;
1813 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
1814 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1815 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1816 		goto nla_put_failure;
1817 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1818 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1819 			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1820 		goto nla_put_failure;
1821 
1822 	nlmsg_end(skb, nlh);
1823 	return 0;
1824 
1825 nla_put_failure:
1826 	nlmsg_cancel(skb, nlh);
1827 	return -EMSGSIZE;
1828 }
1829 
1830 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1831 				 struct ipv4_devconf *devconf)
1832 {
1833 	struct sk_buff *skb;
1834 	int err = -ENOBUFS;
1835 
1836 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
1837 	if (!skb)
1838 		goto errout;
1839 
1840 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1841 					RTM_NEWNETCONF, 0, type);
1842 	if (err < 0) {
1843 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1844 		WARN_ON(err == -EMSGSIZE);
1845 		kfree_skb(skb);
1846 		goto errout;
1847 	}
1848 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
1849 	return;
1850 errout:
1851 	if (err < 0)
1852 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1853 }
1854 
1855 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1856 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1857 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1858 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1859 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
1860 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
1861 };
1862 
1863 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1864 				    struct nlmsghdr *nlh)
1865 {
1866 	struct net *net = sock_net(in_skb->sk);
1867 	struct nlattr *tb[NETCONFA_MAX+1];
1868 	struct netconfmsg *ncm;
1869 	struct sk_buff *skb;
1870 	struct ipv4_devconf *devconf;
1871 	struct in_device *in_dev;
1872 	struct net_device *dev;
1873 	int ifindex;
1874 	int err;
1875 
1876 	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1877 			  devconf_ipv4_policy);
1878 	if (err < 0)
1879 		goto errout;
1880 
1881 	err = -EINVAL;
1882 	if (!tb[NETCONFA_IFINDEX])
1883 		goto errout;
1884 
1885 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1886 	switch (ifindex) {
1887 	case NETCONFA_IFINDEX_ALL:
1888 		devconf = net->ipv4.devconf_all;
1889 		break;
1890 	case NETCONFA_IFINDEX_DEFAULT:
1891 		devconf = net->ipv4.devconf_dflt;
1892 		break;
1893 	default:
1894 		dev = __dev_get_by_index(net, ifindex);
1895 		if (!dev)
1896 			goto errout;
1897 		in_dev = __in_dev_get_rtnl(dev);
1898 		if (!in_dev)
1899 			goto errout;
1900 		devconf = &in_dev->cnf;
1901 		break;
1902 	}
1903 
1904 	err = -ENOBUFS;
1905 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
1906 	if (!skb)
1907 		goto errout;
1908 
1909 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1910 					NETLINK_CB(in_skb).portid,
1911 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1912 					NETCONFA_ALL);
1913 	if (err < 0) {
1914 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1915 		WARN_ON(err == -EMSGSIZE);
1916 		kfree_skb(skb);
1917 		goto errout;
1918 	}
1919 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1920 errout:
1921 	return err;
1922 }
1923 
1924 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1925 				     struct netlink_callback *cb)
1926 {
1927 	struct net *net = sock_net(skb->sk);
1928 	int h, s_h;
1929 	int idx, s_idx;
1930 	struct net_device *dev;
1931 	struct in_device *in_dev;
1932 	struct hlist_head *head;
1933 
1934 	s_h = cb->args[0];
1935 	s_idx = idx = cb->args[1];
1936 
1937 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1938 		idx = 0;
1939 		head = &net->dev_index_head[h];
1940 		rcu_read_lock();
1941 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1942 			  net->dev_base_seq;
1943 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1944 			if (idx < s_idx)
1945 				goto cont;
1946 			in_dev = __in_dev_get_rcu(dev);
1947 			if (!in_dev)
1948 				goto cont;
1949 
1950 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
1951 						      &in_dev->cnf,
1952 						      NETLINK_CB(cb->skb).portid,
1953 						      cb->nlh->nlmsg_seq,
1954 						      RTM_NEWNETCONF,
1955 						      NLM_F_MULTI,
1956 						      NETCONFA_ALL) < 0) {
1957 				rcu_read_unlock();
1958 				goto done;
1959 			}
1960 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1961 cont:
1962 			idx++;
1963 		}
1964 		rcu_read_unlock();
1965 	}
1966 	if (h == NETDEV_HASHENTRIES) {
1967 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1968 					      net->ipv4.devconf_all,
1969 					      NETLINK_CB(cb->skb).portid,
1970 					      cb->nlh->nlmsg_seq,
1971 					      RTM_NEWNETCONF, NLM_F_MULTI,
1972 					      NETCONFA_ALL) < 0)
1973 			goto done;
1974 		else
1975 			h++;
1976 	}
1977 	if (h == NETDEV_HASHENTRIES + 1) {
1978 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1979 					      net->ipv4.devconf_dflt,
1980 					      NETLINK_CB(cb->skb).portid,
1981 					      cb->nlh->nlmsg_seq,
1982 					      RTM_NEWNETCONF, NLM_F_MULTI,
1983 					      NETCONFA_ALL) < 0)
1984 			goto done;
1985 		else
1986 			h++;
1987 	}
1988 done:
1989 	cb->args[0] = h;
1990 	cb->args[1] = idx;
1991 
1992 	return skb->len;
1993 }
1994 
1995 #ifdef CONFIG_SYSCTL
1996 
1997 static void devinet_copy_dflt_conf(struct net *net, int i)
1998 {
1999 	struct net_device *dev;
2000 
2001 	rcu_read_lock();
2002 	for_each_netdev_rcu(net, dev) {
2003 		struct in_device *in_dev;
2004 
2005 		in_dev = __in_dev_get_rcu(dev);
2006 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2007 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2008 	}
2009 	rcu_read_unlock();
2010 }
2011 
2012 /* called with RTNL locked */
2013 static void inet_forward_change(struct net *net)
2014 {
2015 	struct net_device *dev;
2016 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2017 
2018 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2019 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2020 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2021 				    NETCONFA_IFINDEX_ALL,
2022 				    net->ipv4.devconf_all);
2023 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2024 				    NETCONFA_IFINDEX_DEFAULT,
2025 				    net->ipv4.devconf_dflt);
2026 
2027 	for_each_netdev(net, dev) {
2028 		struct in_device *in_dev;
2029 
2030 		if (on)
2031 			dev_disable_lro(dev);
2032 
2033 		in_dev = __in_dev_get_rtnl(dev);
2034 		if (in_dev) {
2035 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2036 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2037 						    dev->ifindex, &in_dev->cnf);
2038 		}
2039 	}
2040 }
2041 
2042 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2043 {
2044 	if (cnf == net->ipv4.devconf_dflt)
2045 		return NETCONFA_IFINDEX_DEFAULT;
2046 	else if (cnf == net->ipv4.devconf_all)
2047 		return NETCONFA_IFINDEX_ALL;
2048 	else {
2049 		struct in_device *idev
2050 			= container_of(cnf, struct in_device, cnf);
2051 		return idev->dev->ifindex;
2052 	}
2053 }
2054 
2055 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2056 			     void __user *buffer,
2057 			     size_t *lenp, loff_t *ppos)
2058 {
2059 	int old_value = *(int *)ctl->data;
2060 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2061 	int new_value = *(int *)ctl->data;
2062 
2063 	if (write) {
2064 		struct ipv4_devconf *cnf = ctl->extra1;
2065 		struct net *net = ctl->extra2;
2066 		int i = (int *)ctl->data - cnf->data;
2067 		int ifindex;
2068 
2069 		set_bit(i, cnf->state);
2070 
2071 		if (cnf == net->ipv4.devconf_dflt)
2072 			devinet_copy_dflt_conf(net, i);
2073 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2074 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2075 			if ((new_value == 0) && (old_value != 0))
2076 				rt_cache_flush(net);
2077 
2078 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2079 		    new_value != old_value) {
2080 			ifindex = devinet_conf_ifindex(net, cnf);
2081 			inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2082 						    ifindex, cnf);
2083 		}
2084 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2085 		    new_value != old_value) {
2086 			ifindex = devinet_conf_ifindex(net, cnf);
2087 			inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2088 						    ifindex, cnf);
2089 		}
2090 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2091 		    new_value != old_value) {
2092 			ifindex = devinet_conf_ifindex(net, cnf);
2093 			inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2094 						    ifindex, cnf);
2095 		}
2096 	}
2097 
2098 	return ret;
2099 }
2100 
2101 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2102 				  void __user *buffer,
2103 				  size_t *lenp, loff_t *ppos)
2104 {
2105 	int *valp = ctl->data;
2106 	int val = *valp;
2107 	loff_t pos = *ppos;
2108 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2109 
2110 	if (write && *valp != val) {
2111 		struct net *net = ctl->extra2;
2112 
2113 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2114 			if (!rtnl_trylock()) {
2115 				/* Restore the original values before restarting */
2116 				*valp = val;
2117 				*ppos = pos;
2118 				return restart_syscall();
2119 			}
2120 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2121 				inet_forward_change(net);
2122 			} else {
2123 				struct ipv4_devconf *cnf = ctl->extra1;
2124 				struct in_device *idev =
2125 					container_of(cnf, struct in_device, cnf);
2126 				if (*valp)
2127 					dev_disable_lro(idev->dev);
2128 				inet_netconf_notify_devconf(net,
2129 							    NETCONFA_FORWARDING,
2130 							    idev->dev->ifindex,
2131 							    cnf);
2132 			}
2133 			rtnl_unlock();
2134 			rt_cache_flush(net);
2135 		} else
2136 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2137 						    NETCONFA_IFINDEX_DEFAULT,
2138 						    net->ipv4.devconf_dflt);
2139 	}
2140 
2141 	return ret;
2142 }
2143 
2144 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2145 				void __user *buffer,
2146 				size_t *lenp, loff_t *ppos)
2147 {
2148 	int *valp = ctl->data;
2149 	int val = *valp;
2150 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2151 	struct net *net = ctl->extra2;
2152 
2153 	if (write && *valp != val)
2154 		rt_cache_flush(net);
2155 
2156 	return ret;
2157 }
2158 
2159 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2160 	{ \
2161 		.procname	= name, \
2162 		.data		= ipv4_devconf.data + \
2163 				  IPV4_DEVCONF_ ## attr - 1, \
2164 		.maxlen		= sizeof(int), \
2165 		.mode		= mval, \
2166 		.proc_handler	= proc, \
2167 		.extra1		= &ipv4_devconf, \
2168 	}
2169 
2170 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2171 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2172 
2173 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2174 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2175 
2176 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2177 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2178 
2179 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2180 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2181 
2182 static struct devinet_sysctl_table {
2183 	struct ctl_table_header *sysctl_header;
2184 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2185 } devinet_sysctl = {
2186 	.devinet_vars = {
2187 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2188 					     devinet_sysctl_forward),
2189 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2190 
2191 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2192 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2193 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2194 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2195 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2196 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2197 					"accept_source_route"),
2198 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2199 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2200 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2201 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2202 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2203 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2204 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2205 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2206 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2207 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2208 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2209 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2210 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2211 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2212 					"force_igmp_version"),
2213 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2214 					"igmpv2_unsolicited_report_interval"),
2215 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2216 					"igmpv3_unsolicited_report_interval"),
2217 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2218 					"ignore_routes_with_linkdown"),
2219 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2220 					"drop_gratuitous_arp"),
2221 
2222 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2223 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2224 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2225 					      "promote_secondaries"),
2226 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2227 					      "route_localnet"),
2228 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2229 					      "drop_unicast_in_l2_multicast"),
2230 	},
2231 };
2232 
2233 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2234 				     int ifindex, struct ipv4_devconf *p)
2235 {
2236 	int i;
2237 	struct devinet_sysctl_table *t;
2238 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2239 
2240 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2241 	if (!t)
2242 		goto out;
2243 
2244 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2245 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2246 		t->devinet_vars[i].extra1 = p;
2247 		t->devinet_vars[i].extra2 = net;
2248 	}
2249 
2250 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2251 
2252 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2253 	if (!t->sysctl_header)
2254 		goto free;
2255 
2256 	p->sysctl = t;
2257 
2258 	inet_netconf_notify_devconf(net, NETCONFA_ALL, ifindex, p);
2259 	return 0;
2260 
2261 free:
2262 	kfree(t);
2263 out:
2264 	return -ENOBUFS;
2265 }
2266 
2267 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2268 {
2269 	struct devinet_sysctl_table *t = cnf->sysctl;
2270 
2271 	if (!t)
2272 		return;
2273 
2274 	cnf->sysctl = NULL;
2275 	unregister_net_sysctl_table(t->sysctl_header);
2276 	kfree(t);
2277 }
2278 
2279 static int devinet_sysctl_register(struct in_device *idev)
2280 {
2281 	int err;
2282 
2283 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2284 		return -EINVAL;
2285 
2286 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2287 	if (err)
2288 		return err;
2289 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2290 					idev->dev->ifindex, &idev->cnf);
2291 	if (err)
2292 		neigh_sysctl_unregister(idev->arp_parms);
2293 	return err;
2294 }
2295 
2296 static void devinet_sysctl_unregister(struct in_device *idev)
2297 {
2298 	__devinet_sysctl_unregister(&idev->cnf);
2299 	neigh_sysctl_unregister(idev->arp_parms);
2300 }
2301 
2302 static struct ctl_table ctl_forward_entry[] = {
2303 	{
2304 		.procname	= "ip_forward",
2305 		.data		= &ipv4_devconf.data[
2306 					IPV4_DEVCONF_FORWARDING - 1],
2307 		.maxlen		= sizeof(int),
2308 		.mode		= 0644,
2309 		.proc_handler	= devinet_sysctl_forward,
2310 		.extra1		= &ipv4_devconf,
2311 		.extra2		= &init_net,
2312 	},
2313 	{ },
2314 };
2315 #endif
2316 
2317 static __net_init int devinet_init_net(struct net *net)
2318 {
2319 	int err;
2320 	struct ipv4_devconf *all, *dflt;
2321 #ifdef CONFIG_SYSCTL
2322 	struct ctl_table *tbl = ctl_forward_entry;
2323 	struct ctl_table_header *forw_hdr;
2324 #endif
2325 
2326 	err = -ENOMEM;
2327 	all = &ipv4_devconf;
2328 	dflt = &ipv4_devconf_dflt;
2329 
2330 	if (!net_eq(net, &init_net)) {
2331 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2332 		if (!all)
2333 			goto err_alloc_all;
2334 
2335 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2336 		if (!dflt)
2337 			goto err_alloc_dflt;
2338 
2339 #ifdef CONFIG_SYSCTL
2340 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2341 		if (!tbl)
2342 			goto err_alloc_ctl;
2343 
2344 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2345 		tbl[0].extra1 = all;
2346 		tbl[0].extra2 = net;
2347 #endif
2348 	}
2349 
2350 #ifdef CONFIG_SYSCTL
2351 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2352 	if (err < 0)
2353 		goto err_reg_all;
2354 
2355 	err = __devinet_sysctl_register(net, "default",
2356 					NETCONFA_IFINDEX_DEFAULT, dflt);
2357 	if (err < 0)
2358 		goto err_reg_dflt;
2359 
2360 	err = -ENOMEM;
2361 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2362 	if (!forw_hdr)
2363 		goto err_reg_ctl;
2364 	net->ipv4.forw_hdr = forw_hdr;
2365 #endif
2366 
2367 	net->ipv4.devconf_all = all;
2368 	net->ipv4.devconf_dflt = dflt;
2369 	return 0;
2370 
2371 #ifdef CONFIG_SYSCTL
2372 err_reg_ctl:
2373 	__devinet_sysctl_unregister(dflt);
2374 err_reg_dflt:
2375 	__devinet_sysctl_unregister(all);
2376 err_reg_all:
2377 	if (tbl != ctl_forward_entry)
2378 		kfree(tbl);
2379 err_alloc_ctl:
2380 #endif
2381 	if (dflt != &ipv4_devconf_dflt)
2382 		kfree(dflt);
2383 err_alloc_dflt:
2384 	if (all != &ipv4_devconf)
2385 		kfree(all);
2386 err_alloc_all:
2387 	return err;
2388 }
2389 
2390 static __net_exit void devinet_exit_net(struct net *net)
2391 {
2392 #ifdef CONFIG_SYSCTL
2393 	struct ctl_table *tbl;
2394 
2395 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2396 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2397 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2398 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
2399 	kfree(tbl);
2400 #endif
2401 	kfree(net->ipv4.devconf_dflt);
2402 	kfree(net->ipv4.devconf_all);
2403 }
2404 
2405 static __net_initdata struct pernet_operations devinet_ops = {
2406 	.init = devinet_init_net,
2407 	.exit = devinet_exit_net,
2408 };
2409 
2410 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2411 	.family		  = AF_INET,
2412 	.fill_link_af	  = inet_fill_link_af,
2413 	.get_link_af_size = inet_get_link_af_size,
2414 	.validate_link_af = inet_validate_link_af,
2415 	.set_link_af	  = inet_set_link_af,
2416 };
2417 
2418 void __init devinet_init(void)
2419 {
2420 	int i;
2421 
2422 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2423 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2424 
2425 	register_pernet_subsys(&devinet_ops);
2426 
2427 	register_gifconf(PF_INET, inet_gifconf);
2428 	register_netdevice_notifier(&ip_netdev_notifier);
2429 
2430 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2431 
2432 	rtnl_af_register(&inet_af_ops);
2433 
2434 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2435 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2436 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2437 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2438 		      inet_netconf_dump_devconf, NULL);
2439 }
2440