xref: /linux/net/ipv4/devinet.c (revision b896c54e8d7bbf6d5d48f9296b26c9d3f10ec795)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/sched/signal.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 #include <linux/netconf.h>
60 
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68 
69 static struct ipv4_devconf ipv4_devconf = {
70 	.data = {
71 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
76 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
77 	},
78 };
79 
80 static struct ipv4_devconf ipv4_devconf_dflt = {
81 	.data = {
82 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
83 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
86 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
87 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
88 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
89 	},
90 };
91 
92 #define IPV4_DEVCONF_DFLT(net, attr) \
93 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
94 
95 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
96 	[IFA_LOCAL]     	= { .type = NLA_U32 },
97 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
98 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
99 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
100 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
101 	[IFA_FLAGS]		= { .type = NLA_U32 },
102 };
103 
104 #define IN4_ADDR_HSIZE_SHIFT	8
105 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
106 
107 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
108 
109 static u32 inet_addr_hash(const struct net *net, __be32 addr)
110 {
111 	u32 val = (__force u32) addr ^ net_hash_mix(net);
112 
113 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
114 }
115 
116 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
117 {
118 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
119 
120 	ASSERT_RTNL();
121 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
122 }
123 
124 static void inet_hash_remove(struct in_ifaddr *ifa)
125 {
126 	ASSERT_RTNL();
127 	hlist_del_init_rcu(&ifa->hash);
128 }
129 
130 /**
131  * __ip_dev_find - find the first device with a given source address.
132  * @net: the net namespace
133  * @addr: the source address
134  * @devref: if true, take a reference on the found device
135  *
136  * If a caller uses devref=false, it should be protected by RCU, or RTNL
137  */
138 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
139 {
140 	struct net_device *result = NULL;
141 	struct in_ifaddr *ifa;
142 
143 	rcu_read_lock();
144 	ifa = inet_lookup_ifaddr_rcu(net, addr);
145 	if (!ifa) {
146 		struct flowi4 fl4 = { .daddr = addr };
147 		struct fib_result res = { 0 };
148 		struct fib_table *local;
149 
150 		/* Fallback to FIB local table so that communication
151 		 * over loopback subnets work.
152 		 */
153 		local = fib_get_table(net, RT_TABLE_LOCAL);
154 		if (local &&
155 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
156 		    res.type == RTN_LOCAL)
157 			result = FIB_RES_DEV(res);
158 	} else {
159 		result = ifa->ifa_dev->dev;
160 	}
161 	if (result && devref)
162 		dev_hold(result);
163 	rcu_read_unlock();
164 	return result;
165 }
166 EXPORT_SYMBOL(__ip_dev_find);
167 
168 /* called under RCU lock */
169 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
170 {
171 	u32 hash = inet_addr_hash(net, addr);
172 	struct in_ifaddr *ifa;
173 
174 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
175 		if (ifa->ifa_local == addr &&
176 		    net_eq(dev_net(ifa->ifa_dev->dev), net))
177 			return ifa;
178 
179 	return NULL;
180 }
181 
182 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
183 
184 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
185 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
186 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
187 			 int destroy);
188 #ifdef CONFIG_SYSCTL
189 static int devinet_sysctl_register(struct in_device *idev);
190 static void devinet_sysctl_unregister(struct in_device *idev);
191 #else
192 static int devinet_sysctl_register(struct in_device *idev)
193 {
194 	return 0;
195 }
196 static void devinet_sysctl_unregister(struct in_device *idev)
197 {
198 }
199 #endif
200 
201 /* Locks all the inet devices. */
202 
203 static struct in_ifaddr *inet_alloc_ifa(void)
204 {
205 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
206 }
207 
208 static void inet_rcu_free_ifa(struct rcu_head *head)
209 {
210 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
211 	if (ifa->ifa_dev)
212 		in_dev_put(ifa->ifa_dev);
213 	kfree(ifa);
214 }
215 
216 static void inet_free_ifa(struct in_ifaddr *ifa)
217 {
218 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
219 }
220 
221 void in_dev_finish_destroy(struct in_device *idev)
222 {
223 	struct net_device *dev = idev->dev;
224 
225 	WARN_ON(idev->ifa_list);
226 	WARN_ON(idev->mc_list);
227 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
228 #ifdef NET_REFCNT_DEBUG
229 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
230 #endif
231 	dev_put(dev);
232 	if (!idev->dead)
233 		pr_err("Freeing alive in_device %p\n", idev);
234 	else
235 		kfree(idev);
236 }
237 EXPORT_SYMBOL(in_dev_finish_destroy);
238 
239 static struct in_device *inetdev_init(struct net_device *dev)
240 {
241 	struct in_device *in_dev;
242 	int err = -ENOMEM;
243 
244 	ASSERT_RTNL();
245 
246 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
247 	if (!in_dev)
248 		goto out;
249 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
250 			sizeof(in_dev->cnf));
251 	in_dev->cnf.sysctl = NULL;
252 	in_dev->dev = dev;
253 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
254 	if (!in_dev->arp_parms)
255 		goto out_kfree;
256 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
257 		dev_disable_lro(dev);
258 	/* Reference in_dev->dev */
259 	dev_hold(dev);
260 	/* Account for reference dev->ip_ptr (below) */
261 	refcount_set(&in_dev->refcnt, 1);
262 
263 	err = devinet_sysctl_register(in_dev);
264 	if (err) {
265 		in_dev->dead = 1;
266 		in_dev_put(in_dev);
267 		in_dev = NULL;
268 		goto out;
269 	}
270 	ip_mc_init_dev(in_dev);
271 	if (dev->flags & IFF_UP)
272 		ip_mc_up(in_dev);
273 
274 	/* we can receive as soon as ip_ptr is set -- do this last */
275 	rcu_assign_pointer(dev->ip_ptr, in_dev);
276 out:
277 	return in_dev ?: ERR_PTR(err);
278 out_kfree:
279 	kfree(in_dev);
280 	in_dev = NULL;
281 	goto out;
282 }
283 
284 static void in_dev_rcu_put(struct rcu_head *head)
285 {
286 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
287 	in_dev_put(idev);
288 }
289 
290 static void inetdev_destroy(struct in_device *in_dev)
291 {
292 	struct in_ifaddr *ifa;
293 	struct net_device *dev;
294 
295 	ASSERT_RTNL();
296 
297 	dev = in_dev->dev;
298 
299 	in_dev->dead = 1;
300 
301 	ip_mc_destroy_dev(in_dev);
302 
303 	while ((ifa = in_dev->ifa_list) != NULL) {
304 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
305 		inet_free_ifa(ifa);
306 	}
307 
308 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
309 
310 	devinet_sysctl_unregister(in_dev);
311 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
312 	arp_ifdown(dev);
313 
314 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
315 }
316 
317 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
318 {
319 	rcu_read_lock();
320 	for_primary_ifa(in_dev) {
321 		if (inet_ifa_match(a, ifa)) {
322 			if (!b || inet_ifa_match(b, ifa)) {
323 				rcu_read_unlock();
324 				return 1;
325 			}
326 		}
327 	} endfor_ifa(in_dev);
328 	rcu_read_unlock();
329 	return 0;
330 }
331 
332 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
333 			 int destroy, struct nlmsghdr *nlh, u32 portid)
334 {
335 	struct in_ifaddr *promote = NULL;
336 	struct in_ifaddr *ifa, *ifa1 = *ifap;
337 	struct in_ifaddr *last_prim = in_dev->ifa_list;
338 	struct in_ifaddr *prev_prom = NULL;
339 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
340 
341 	ASSERT_RTNL();
342 
343 	if (in_dev->dead)
344 		goto no_promotions;
345 
346 	/* 1. Deleting primary ifaddr forces deletion all secondaries
347 	 * unless alias promotion is set
348 	 **/
349 
350 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
351 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
352 
353 		while ((ifa = *ifap1) != NULL) {
354 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
355 			    ifa1->ifa_scope <= ifa->ifa_scope)
356 				last_prim = ifa;
357 
358 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
359 			    ifa1->ifa_mask != ifa->ifa_mask ||
360 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
361 				ifap1 = &ifa->ifa_next;
362 				prev_prom = ifa;
363 				continue;
364 			}
365 
366 			if (!do_promote) {
367 				inet_hash_remove(ifa);
368 				*ifap1 = ifa->ifa_next;
369 
370 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
371 				blocking_notifier_call_chain(&inetaddr_chain,
372 						NETDEV_DOWN, ifa);
373 				inet_free_ifa(ifa);
374 			} else {
375 				promote = ifa;
376 				break;
377 			}
378 		}
379 	}
380 
381 	/* On promotion all secondaries from subnet are changing
382 	 * the primary IP, we must remove all their routes silently
383 	 * and later to add them back with new prefsrc. Do this
384 	 * while all addresses are on the device list.
385 	 */
386 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
387 		if (ifa1->ifa_mask == ifa->ifa_mask &&
388 		    inet_ifa_match(ifa1->ifa_address, ifa))
389 			fib_del_ifaddr(ifa, ifa1);
390 	}
391 
392 no_promotions:
393 	/* 2. Unlink it */
394 
395 	*ifap = ifa1->ifa_next;
396 	inet_hash_remove(ifa1);
397 
398 	/* 3. Announce address deletion */
399 
400 	/* Send message first, then call notifier.
401 	   At first sight, FIB update triggered by notifier
402 	   will refer to already deleted ifaddr, that could confuse
403 	   netlink listeners. It is not true: look, gated sees
404 	   that route deleted and if it still thinks that ifaddr
405 	   is valid, it will try to restore deleted routes... Grr.
406 	   So that, this order is correct.
407 	 */
408 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
409 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
410 
411 	if (promote) {
412 		struct in_ifaddr *next_sec = promote->ifa_next;
413 
414 		if (prev_prom) {
415 			prev_prom->ifa_next = promote->ifa_next;
416 			promote->ifa_next = last_prim->ifa_next;
417 			last_prim->ifa_next = promote;
418 		}
419 
420 		promote->ifa_flags &= ~IFA_F_SECONDARY;
421 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
422 		blocking_notifier_call_chain(&inetaddr_chain,
423 				NETDEV_UP, promote);
424 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
425 			if (ifa1->ifa_mask != ifa->ifa_mask ||
426 			    !inet_ifa_match(ifa1->ifa_address, ifa))
427 					continue;
428 			fib_add_ifaddr(ifa);
429 		}
430 
431 	}
432 	if (destroy)
433 		inet_free_ifa(ifa1);
434 }
435 
436 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
437 			 int destroy)
438 {
439 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
440 }
441 
442 static void check_lifetime(struct work_struct *work);
443 
444 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
445 
446 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
447 			     u32 portid, struct netlink_ext_ack *extack)
448 {
449 	struct in_device *in_dev = ifa->ifa_dev;
450 	struct in_ifaddr *ifa1, **ifap, **last_primary;
451 	struct in_validator_info ivi;
452 	int ret;
453 
454 	ASSERT_RTNL();
455 
456 	if (!ifa->ifa_local) {
457 		inet_free_ifa(ifa);
458 		return 0;
459 	}
460 
461 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
462 	last_primary = &in_dev->ifa_list;
463 
464 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
465 	     ifap = &ifa1->ifa_next) {
466 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
467 		    ifa->ifa_scope <= ifa1->ifa_scope)
468 			last_primary = &ifa1->ifa_next;
469 		if (ifa1->ifa_mask == ifa->ifa_mask &&
470 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
471 			if (ifa1->ifa_local == ifa->ifa_local) {
472 				inet_free_ifa(ifa);
473 				return -EEXIST;
474 			}
475 			if (ifa1->ifa_scope != ifa->ifa_scope) {
476 				inet_free_ifa(ifa);
477 				return -EINVAL;
478 			}
479 			ifa->ifa_flags |= IFA_F_SECONDARY;
480 		}
481 	}
482 
483 	/* Allow any devices that wish to register ifaddr validtors to weigh
484 	 * in now, before changes are committed.  The rntl lock is serializing
485 	 * access here, so the state should not change between a validator call
486 	 * and a final notify on commit.  This isn't invoked on promotion under
487 	 * the assumption that validators are checking the address itself, and
488 	 * not the flags.
489 	 */
490 	ivi.ivi_addr = ifa->ifa_address;
491 	ivi.ivi_dev = ifa->ifa_dev;
492 	ivi.extack = extack;
493 	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
494 					   NETDEV_UP, &ivi);
495 	ret = notifier_to_errno(ret);
496 	if (ret) {
497 		inet_free_ifa(ifa);
498 		return ret;
499 	}
500 
501 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
502 		prandom_seed((__force u32) ifa->ifa_local);
503 		ifap = last_primary;
504 	}
505 
506 	ifa->ifa_next = *ifap;
507 	*ifap = ifa;
508 
509 	inet_hash_insert(dev_net(in_dev->dev), ifa);
510 
511 	cancel_delayed_work(&check_lifetime_work);
512 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
513 
514 	/* Send message first, then call notifier.
515 	   Notifier will trigger FIB update, so that
516 	   listeners of netlink will know about new ifaddr */
517 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
518 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
519 
520 	return 0;
521 }
522 
523 static int inet_insert_ifa(struct in_ifaddr *ifa)
524 {
525 	return __inet_insert_ifa(ifa, NULL, 0, NULL);
526 }
527 
528 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
529 {
530 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
531 
532 	ASSERT_RTNL();
533 
534 	if (!in_dev) {
535 		inet_free_ifa(ifa);
536 		return -ENOBUFS;
537 	}
538 	ipv4_devconf_setall(in_dev);
539 	neigh_parms_data_state_setall(in_dev->arp_parms);
540 	if (ifa->ifa_dev != in_dev) {
541 		WARN_ON(ifa->ifa_dev);
542 		in_dev_hold(in_dev);
543 		ifa->ifa_dev = in_dev;
544 	}
545 	if (ipv4_is_loopback(ifa->ifa_local))
546 		ifa->ifa_scope = RT_SCOPE_HOST;
547 	return inet_insert_ifa(ifa);
548 }
549 
550 /* Caller must hold RCU or RTNL :
551  * We dont take a reference on found in_device
552  */
553 struct in_device *inetdev_by_index(struct net *net, int ifindex)
554 {
555 	struct net_device *dev;
556 	struct in_device *in_dev = NULL;
557 
558 	rcu_read_lock();
559 	dev = dev_get_by_index_rcu(net, ifindex);
560 	if (dev)
561 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
562 	rcu_read_unlock();
563 	return in_dev;
564 }
565 EXPORT_SYMBOL(inetdev_by_index);
566 
567 /* Called only from RTNL semaphored context. No locks. */
568 
569 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
570 				    __be32 mask)
571 {
572 	ASSERT_RTNL();
573 
574 	for_primary_ifa(in_dev) {
575 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
576 			return ifa;
577 	} endfor_ifa(in_dev);
578 	return NULL;
579 }
580 
581 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
582 {
583 	struct ip_mreqn mreq = {
584 		.imr_multiaddr.s_addr = ifa->ifa_address,
585 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
586 	};
587 	int ret;
588 
589 	ASSERT_RTNL();
590 
591 	lock_sock(sk);
592 	if (join)
593 		ret = ip_mc_join_group(sk, &mreq);
594 	else
595 		ret = ip_mc_leave_group(sk, &mreq);
596 	release_sock(sk);
597 
598 	return ret;
599 }
600 
601 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
602 			    struct netlink_ext_ack *extack)
603 {
604 	struct net *net = sock_net(skb->sk);
605 	struct nlattr *tb[IFA_MAX+1];
606 	struct in_device *in_dev;
607 	struct ifaddrmsg *ifm;
608 	struct in_ifaddr *ifa, **ifap;
609 	int err = -EINVAL;
610 
611 	ASSERT_RTNL();
612 
613 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
614 			  extack);
615 	if (err < 0)
616 		goto errout;
617 
618 	ifm = nlmsg_data(nlh);
619 	in_dev = inetdev_by_index(net, ifm->ifa_index);
620 	if (!in_dev) {
621 		err = -ENODEV;
622 		goto errout;
623 	}
624 
625 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
626 	     ifap = &ifa->ifa_next) {
627 		if (tb[IFA_LOCAL] &&
628 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
629 			continue;
630 
631 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
632 			continue;
633 
634 		if (tb[IFA_ADDRESS] &&
635 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
636 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
637 			continue;
638 
639 		if (ipv4_is_multicast(ifa->ifa_address))
640 			ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
641 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
642 		return 0;
643 	}
644 
645 	err = -EADDRNOTAVAIL;
646 errout:
647 	return err;
648 }
649 
650 #define INFINITY_LIFE_TIME	0xFFFFFFFF
651 
652 static void check_lifetime(struct work_struct *work)
653 {
654 	unsigned long now, next, next_sec, next_sched;
655 	struct in_ifaddr *ifa;
656 	struct hlist_node *n;
657 	int i;
658 
659 	now = jiffies;
660 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
661 
662 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
663 		bool change_needed = false;
664 
665 		rcu_read_lock();
666 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
667 			unsigned long age;
668 
669 			if (ifa->ifa_flags & IFA_F_PERMANENT)
670 				continue;
671 
672 			/* We try to batch several events at once. */
673 			age = (now - ifa->ifa_tstamp +
674 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
675 
676 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
677 			    age >= ifa->ifa_valid_lft) {
678 				change_needed = true;
679 			} else if (ifa->ifa_preferred_lft ==
680 				   INFINITY_LIFE_TIME) {
681 				continue;
682 			} else if (age >= ifa->ifa_preferred_lft) {
683 				if (time_before(ifa->ifa_tstamp +
684 						ifa->ifa_valid_lft * HZ, next))
685 					next = ifa->ifa_tstamp +
686 					       ifa->ifa_valid_lft * HZ;
687 
688 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
689 					change_needed = true;
690 			} else if (time_before(ifa->ifa_tstamp +
691 					       ifa->ifa_preferred_lft * HZ,
692 					       next)) {
693 				next = ifa->ifa_tstamp +
694 				       ifa->ifa_preferred_lft * HZ;
695 			}
696 		}
697 		rcu_read_unlock();
698 		if (!change_needed)
699 			continue;
700 		rtnl_lock();
701 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
702 			unsigned long age;
703 
704 			if (ifa->ifa_flags & IFA_F_PERMANENT)
705 				continue;
706 
707 			/* We try to batch several events at once. */
708 			age = (now - ifa->ifa_tstamp +
709 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
710 
711 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
712 			    age >= ifa->ifa_valid_lft) {
713 				struct in_ifaddr **ifap;
714 
715 				for (ifap = &ifa->ifa_dev->ifa_list;
716 				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
717 					if (*ifap == ifa) {
718 						inet_del_ifa(ifa->ifa_dev,
719 							     ifap, 1);
720 						break;
721 					}
722 				}
723 			} else if (ifa->ifa_preferred_lft !=
724 				   INFINITY_LIFE_TIME &&
725 				   age >= ifa->ifa_preferred_lft &&
726 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
727 				ifa->ifa_flags |= IFA_F_DEPRECATED;
728 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
729 			}
730 		}
731 		rtnl_unlock();
732 	}
733 
734 	next_sec = round_jiffies_up(next);
735 	next_sched = next;
736 
737 	/* If rounded timeout is accurate enough, accept it. */
738 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
739 		next_sched = next_sec;
740 
741 	now = jiffies;
742 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
743 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
744 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
745 
746 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
747 			next_sched - now);
748 }
749 
750 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
751 			     __u32 prefered_lft)
752 {
753 	unsigned long timeout;
754 
755 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
756 
757 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
758 	if (addrconf_finite_timeout(timeout))
759 		ifa->ifa_valid_lft = timeout;
760 	else
761 		ifa->ifa_flags |= IFA_F_PERMANENT;
762 
763 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
764 	if (addrconf_finite_timeout(timeout)) {
765 		if (timeout == 0)
766 			ifa->ifa_flags |= IFA_F_DEPRECATED;
767 		ifa->ifa_preferred_lft = timeout;
768 	}
769 	ifa->ifa_tstamp = jiffies;
770 	if (!ifa->ifa_cstamp)
771 		ifa->ifa_cstamp = ifa->ifa_tstamp;
772 }
773 
774 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
775 				       __u32 *pvalid_lft, __u32 *pprefered_lft)
776 {
777 	struct nlattr *tb[IFA_MAX+1];
778 	struct in_ifaddr *ifa;
779 	struct ifaddrmsg *ifm;
780 	struct net_device *dev;
781 	struct in_device *in_dev;
782 	int err;
783 
784 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
785 			  NULL);
786 	if (err < 0)
787 		goto errout;
788 
789 	ifm = nlmsg_data(nlh);
790 	err = -EINVAL;
791 	if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
792 		goto errout;
793 
794 	dev = __dev_get_by_index(net, ifm->ifa_index);
795 	err = -ENODEV;
796 	if (!dev)
797 		goto errout;
798 
799 	in_dev = __in_dev_get_rtnl(dev);
800 	err = -ENOBUFS;
801 	if (!in_dev)
802 		goto errout;
803 
804 	ifa = inet_alloc_ifa();
805 	if (!ifa)
806 		/*
807 		 * A potential indev allocation can be left alive, it stays
808 		 * assigned to its device and is destroy with it.
809 		 */
810 		goto errout;
811 
812 	ipv4_devconf_setall(in_dev);
813 	neigh_parms_data_state_setall(in_dev->arp_parms);
814 	in_dev_hold(in_dev);
815 
816 	if (!tb[IFA_ADDRESS])
817 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
818 
819 	INIT_HLIST_NODE(&ifa->hash);
820 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
821 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
822 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
823 					 ifm->ifa_flags;
824 	ifa->ifa_scope = ifm->ifa_scope;
825 	ifa->ifa_dev = in_dev;
826 
827 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
828 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
829 
830 	if (tb[IFA_BROADCAST])
831 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
832 
833 	if (tb[IFA_LABEL])
834 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
835 	else
836 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
837 
838 	if (tb[IFA_CACHEINFO]) {
839 		struct ifa_cacheinfo *ci;
840 
841 		ci = nla_data(tb[IFA_CACHEINFO]);
842 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
843 			err = -EINVAL;
844 			goto errout_free;
845 		}
846 		*pvalid_lft = ci->ifa_valid;
847 		*pprefered_lft = ci->ifa_prefered;
848 	}
849 
850 	return ifa;
851 
852 errout_free:
853 	inet_free_ifa(ifa);
854 errout:
855 	return ERR_PTR(err);
856 }
857 
858 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
859 {
860 	struct in_device *in_dev = ifa->ifa_dev;
861 	struct in_ifaddr *ifa1, **ifap;
862 
863 	if (!ifa->ifa_local)
864 		return NULL;
865 
866 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
867 	     ifap = &ifa1->ifa_next) {
868 		if (ifa1->ifa_mask == ifa->ifa_mask &&
869 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
870 		    ifa1->ifa_local == ifa->ifa_local)
871 			return ifa1;
872 	}
873 	return NULL;
874 }
875 
876 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
877 			    struct netlink_ext_ack *extack)
878 {
879 	struct net *net = sock_net(skb->sk);
880 	struct in_ifaddr *ifa;
881 	struct in_ifaddr *ifa_existing;
882 	__u32 valid_lft = INFINITY_LIFE_TIME;
883 	__u32 prefered_lft = INFINITY_LIFE_TIME;
884 
885 	ASSERT_RTNL();
886 
887 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
888 	if (IS_ERR(ifa))
889 		return PTR_ERR(ifa);
890 
891 	ifa_existing = find_matching_ifa(ifa);
892 	if (!ifa_existing) {
893 		/* It would be best to check for !NLM_F_CREATE here but
894 		 * userspace already relies on not having to provide this.
895 		 */
896 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
897 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
898 			int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
899 					       true, ifa);
900 
901 			if (ret < 0) {
902 				inet_free_ifa(ifa);
903 				return ret;
904 			}
905 		}
906 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
907 					 extack);
908 	} else {
909 		inet_free_ifa(ifa);
910 
911 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
912 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
913 			return -EEXIST;
914 		ifa = ifa_existing;
915 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
916 		cancel_delayed_work(&check_lifetime_work);
917 		queue_delayed_work(system_power_efficient_wq,
918 				&check_lifetime_work, 0);
919 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
920 	}
921 	return 0;
922 }
923 
924 /*
925  *	Determine a default network mask, based on the IP address.
926  */
927 
928 static int inet_abc_len(__be32 addr)
929 {
930 	int rc = -1;	/* Something else, probably a multicast. */
931 
932 	if (ipv4_is_zeronet(addr))
933 		rc = 0;
934 	else {
935 		__u32 haddr = ntohl(addr);
936 
937 		if (IN_CLASSA(haddr))
938 			rc = 8;
939 		else if (IN_CLASSB(haddr))
940 			rc = 16;
941 		else if (IN_CLASSC(haddr))
942 			rc = 24;
943 	}
944 
945 	return rc;
946 }
947 
948 
949 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
950 {
951 	struct sockaddr_in sin_orig;
952 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
953 	struct in_device *in_dev;
954 	struct in_ifaddr **ifap = NULL;
955 	struct in_ifaddr *ifa = NULL;
956 	struct net_device *dev;
957 	char *colon;
958 	int ret = -EFAULT;
959 	int tryaddrmatch = 0;
960 
961 	ifr->ifr_name[IFNAMSIZ - 1] = 0;
962 
963 	/* save original address for comparison */
964 	memcpy(&sin_orig, sin, sizeof(*sin));
965 
966 	colon = strchr(ifr->ifr_name, ':');
967 	if (colon)
968 		*colon = 0;
969 
970 	dev_load(net, ifr->ifr_name);
971 
972 	switch (cmd) {
973 	case SIOCGIFADDR:	/* Get interface address */
974 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
975 	case SIOCGIFDSTADDR:	/* Get the destination address */
976 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
977 		/* Note that these ioctls will not sleep,
978 		   so that we do not impose a lock.
979 		   One day we will be forced to put shlock here (I mean SMP)
980 		 */
981 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
982 		memset(sin, 0, sizeof(*sin));
983 		sin->sin_family = AF_INET;
984 		break;
985 
986 	case SIOCSIFFLAGS:
987 		ret = -EPERM;
988 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
989 			goto out;
990 		break;
991 	case SIOCSIFADDR:	/* Set interface address (and family) */
992 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
993 	case SIOCSIFDSTADDR:	/* Set the destination address */
994 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
995 		ret = -EPERM;
996 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
997 			goto out;
998 		ret = -EINVAL;
999 		if (sin->sin_family != AF_INET)
1000 			goto out;
1001 		break;
1002 	default:
1003 		ret = -EINVAL;
1004 		goto out;
1005 	}
1006 
1007 	rtnl_lock();
1008 
1009 	ret = -ENODEV;
1010 	dev = __dev_get_by_name(net, ifr->ifr_name);
1011 	if (!dev)
1012 		goto done;
1013 
1014 	if (colon)
1015 		*colon = ':';
1016 
1017 	in_dev = __in_dev_get_rtnl(dev);
1018 	if (in_dev) {
1019 		if (tryaddrmatch) {
1020 			/* Matthias Andree */
1021 			/* compare label and address (4.4BSD style) */
1022 			/* note: we only do this for a limited set of ioctls
1023 			   and only if the original address family was AF_INET.
1024 			   This is checked above. */
1025 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1026 			     ifap = &ifa->ifa_next) {
1027 				if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1028 				    sin_orig.sin_addr.s_addr ==
1029 							ifa->ifa_local) {
1030 					break; /* found */
1031 				}
1032 			}
1033 		}
1034 		/* we didn't get a match, maybe the application is
1035 		   4.3BSD-style and passed in junk so we fall back to
1036 		   comparing just the label */
1037 		if (!ifa) {
1038 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1039 			     ifap = &ifa->ifa_next)
1040 				if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1041 					break;
1042 		}
1043 	}
1044 
1045 	ret = -EADDRNOTAVAIL;
1046 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1047 		goto done;
1048 
1049 	switch (cmd) {
1050 	case SIOCGIFADDR:	/* Get interface address */
1051 		ret = 0;
1052 		sin->sin_addr.s_addr = ifa->ifa_local;
1053 		break;
1054 
1055 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1056 		ret = 0;
1057 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1058 		break;
1059 
1060 	case SIOCGIFDSTADDR:	/* Get the destination address */
1061 		ret = 0;
1062 		sin->sin_addr.s_addr = ifa->ifa_address;
1063 		break;
1064 
1065 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1066 		ret = 0;
1067 		sin->sin_addr.s_addr = ifa->ifa_mask;
1068 		break;
1069 
1070 	case SIOCSIFFLAGS:
1071 		if (colon) {
1072 			ret = -EADDRNOTAVAIL;
1073 			if (!ifa)
1074 				break;
1075 			ret = 0;
1076 			if (!(ifr->ifr_flags & IFF_UP))
1077 				inet_del_ifa(in_dev, ifap, 1);
1078 			break;
1079 		}
1080 		ret = dev_change_flags(dev, ifr->ifr_flags);
1081 		break;
1082 
1083 	case SIOCSIFADDR:	/* Set interface address (and family) */
1084 		ret = -EINVAL;
1085 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1086 			break;
1087 
1088 		if (!ifa) {
1089 			ret = -ENOBUFS;
1090 			ifa = inet_alloc_ifa();
1091 			if (!ifa)
1092 				break;
1093 			INIT_HLIST_NODE(&ifa->hash);
1094 			if (colon)
1095 				memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1096 			else
1097 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1098 		} else {
1099 			ret = 0;
1100 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1101 				break;
1102 			inet_del_ifa(in_dev, ifap, 0);
1103 			ifa->ifa_broadcast = 0;
1104 			ifa->ifa_scope = 0;
1105 		}
1106 
1107 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1108 
1109 		if (!(dev->flags & IFF_POINTOPOINT)) {
1110 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1111 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1112 			if ((dev->flags & IFF_BROADCAST) &&
1113 			    ifa->ifa_prefixlen < 31)
1114 				ifa->ifa_broadcast = ifa->ifa_address |
1115 						     ~ifa->ifa_mask;
1116 		} else {
1117 			ifa->ifa_prefixlen = 32;
1118 			ifa->ifa_mask = inet_make_mask(32);
1119 		}
1120 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1121 		ret = inet_set_ifa(dev, ifa);
1122 		break;
1123 
1124 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1125 		ret = 0;
1126 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1127 			inet_del_ifa(in_dev, ifap, 0);
1128 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1129 			inet_insert_ifa(ifa);
1130 		}
1131 		break;
1132 
1133 	case SIOCSIFDSTADDR:	/* Set the destination address */
1134 		ret = 0;
1135 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1136 			break;
1137 		ret = -EINVAL;
1138 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1139 			break;
1140 		ret = 0;
1141 		inet_del_ifa(in_dev, ifap, 0);
1142 		ifa->ifa_address = sin->sin_addr.s_addr;
1143 		inet_insert_ifa(ifa);
1144 		break;
1145 
1146 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1147 
1148 		/*
1149 		 *	The mask we set must be legal.
1150 		 */
1151 		ret = -EINVAL;
1152 		if (bad_mask(sin->sin_addr.s_addr, 0))
1153 			break;
1154 		ret = 0;
1155 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1156 			__be32 old_mask = ifa->ifa_mask;
1157 			inet_del_ifa(in_dev, ifap, 0);
1158 			ifa->ifa_mask = sin->sin_addr.s_addr;
1159 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1160 
1161 			/* See if current broadcast address matches
1162 			 * with current netmask, then recalculate
1163 			 * the broadcast address. Otherwise it's a
1164 			 * funny address, so don't touch it since
1165 			 * the user seems to know what (s)he's doing...
1166 			 */
1167 			if ((dev->flags & IFF_BROADCAST) &&
1168 			    (ifa->ifa_prefixlen < 31) &&
1169 			    (ifa->ifa_broadcast ==
1170 			     (ifa->ifa_local|~old_mask))) {
1171 				ifa->ifa_broadcast = (ifa->ifa_local |
1172 						      ~sin->sin_addr.s_addr);
1173 			}
1174 			inet_insert_ifa(ifa);
1175 		}
1176 		break;
1177 	}
1178 done:
1179 	rtnl_unlock();
1180 out:
1181 	return ret;
1182 }
1183 
1184 static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1185 {
1186 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1187 	struct in_ifaddr *ifa;
1188 	struct ifreq ifr;
1189 	int done = 0;
1190 
1191 	if (WARN_ON(size > sizeof(struct ifreq)))
1192 		goto out;
1193 
1194 	if (!in_dev)
1195 		goto out;
1196 
1197 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1198 		if (!buf) {
1199 			done += size;
1200 			continue;
1201 		}
1202 		if (len < size)
1203 			break;
1204 		memset(&ifr, 0, sizeof(struct ifreq));
1205 		strcpy(ifr.ifr_name, ifa->ifa_label);
1206 
1207 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1208 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1209 								ifa->ifa_local;
1210 
1211 		if (copy_to_user(buf + done, &ifr, size)) {
1212 			done = -EFAULT;
1213 			break;
1214 		}
1215 		len  -= size;
1216 		done += size;
1217 	}
1218 out:
1219 	return done;
1220 }
1221 
1222 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1223 				 int scope)
1224 {
1225 	for_primary_ifa(in_dev) {
1226 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1227 		    ifa->ifa_scope <= scope)
1228 			return ifa->ifa_local;
1229 	} endfor_ifa(in_dev);
1230 
1231 	return 0;
1232 }
1233 
1234 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1235 {
1236 	__be32 addr = 0;
1237 	struct in_device *in_dev;
1238 	struct net *net = dev_net(dev);
1239 	int master_idx;
1240 
1241 	rcu_read_lock();
1242 	in_dev = __in_dev_get_rcu(dev);
1243 	if (!in_dev)
1244 		goto no_in_dev;
1245 
1246 	for_primary_ifa(in_dev) {
1247 		if (ifa->ifa_scope > scope)
1248 			continue;
1249 		if (!dst || inet_ifa_match(dst, ifa)) {
1250 			addr = ifa->ifa_local;
1251 			break;
1252 		}
1253 		if (!addr)
1254 			addr = ifa->ifa_local;
1255 	} endfor_ifa(in_dev);
1256 
1257 	if (addr)
1258 		goto out_unlock;
1259 no_in_dev:
1260 	master_idx = l3mdev_master_ifindex_rcu(dev);
1261 
1262 	/* For VRFs, the VRF device takes the place of the loopback device,
1263 	 * with addresses on it being preferred.  Note in such cases the
1264 	 * loopback device will be among the devices that fail the master_idx
1265 	 * equality check in the loop below.
1266 	 */
1267 	if (master_idx &&
1268 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1269 	    (in_dev = __in_dev_get_rcu(dev))) {
1270 		addr = in_dev_select_addr(in_dev, scope);
1271 		if (addr)
1272 			goto out_unlock;
1273 	}
1274 
1275 	/* Not loopback addresses on loopback should be preferred
1276 	   in this case. It is important that lo is the first interface
1277 	   in dev_base list.
1278 	 */
1279 	for_each_netdev_rcu(net, dev) {
1280 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1281 			continue;
1282 
1283 		in_dev = __in_dev_get_rcu(dev);
1284 		if (!in_dev)
1285 			continue;
1286 
1287 		addr = in_dev_select_addr(in_dev, scope);
1288 		if (addr)
1289 			goto out_unlock;
1290 	}
1291 out_unlock:
1292 	rcu_read_unlock();
1293 	return addr;
1294 }
1295 EXPORT_SYMBOL(inet_select_addr);
1296 
1297 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1298 			      __be32 local, int scope)
1299 {
1300 	int same = 0;
1301 	__be32 addr = 0;
1302 
1303 	for_ifa(in_dev) {
1304 		if (!addr &&
1305 		    (local == ifa->ifa_local || !local) &&
1306 		    ifa->ifa_scope <= scope) {
1307 			addr = ifa->ifa_local;
1308 			if (same)
1309 				break;
1310 		}
1311 		if (!same) {
1312 			same = (!local || inet_ifa_match(local, ifa)) &&
1313 				(!dst || inet_ifa_match(dst, ifa));
1314 			if (same && addr) {
1315 				if (local || !dst)
1316 					break;
1317 				/* Is the selected addr into dst subnet? */
1318 				if (inet_ifa_match(addr, ifa))
1319 					break;
1320 				/* No, then can we use new local src? */
1321 				if (ifa->ifa_scope <= scope) {
1322 					addr = ifa->ifa_local;
1323 					break;
1324 				}
1325 				/* search for large dst subnet for addr */
1326 				same = 0;
1327 			}
1328 		}
1329 	} endfor_ifa(in_dev);
1330 
1331 	return same ? addr : 0;
1332 }
1333 
1334 /*
1335  * Confirm that local IP address exists using wildcards:
1336  * - net: netns to check, cannot be NULL
1337  * - in_dev: only on this interface, NULL=any interface
1338  * - dst: only in the same subnet as dst, 0=any dst
1339  * - local: address, 0=autoselect the local address
1340  * - scope: maximum allowed scope value for the local address
1341  */
1342 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1343 			 __be32 dst, __be32 local, int scope)
1344 {
1345 	__be32 addr = 0;
1346 	struct net_device *dev;
1347 
1348 	if (in_dev)
1349 		return confirm_addr_indev(in_dev, dst, local, scope);
1350 
1351 	rcu_read_lock();
1352 	for_each_netdev_rcu(net, dev) {
1353 		in_dev = __in_dev_get_rcu(dev);
1354 		if (in_dev) {
1355 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1356 			if (addr)
1357 				break;
1358 		}
1359 	}
1360 	rcu_read_unlock();
1361 
1362 	return addr;
1363 }
1364 EXPORT_SYMBOL(inet_confirm_addr);
1365 
1366 /*
1367  *	Device notifier
1368  */
1369 
1370 int register_inetaddr_notifier(struct notifier_block *nb)
1371 {
1372 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1373 }
1374 EXPORT_SYMBOL(register_inetaddr_notifier);
1375 
1376 int unregister_inetaddr_notifier(struct notifier_block *nb)
1377 {
1378 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1379 }
1380 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1381 
1382 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1383 {
1384 	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1385 }
1386 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1387 
1388 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1389 {
1390 	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1391 	    nb);
1392 }
1393 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1394 
1395 /* Rename ifa_labels for a device name change. Make some effort to preserve
1396  * existing alias numbering and to create unique labels if possible.
1397 */
1398 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1399 {
1400 	struct in_ifaddr *ifa;
1401 	int named = 0;
1402 
1403 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1404 		char old[IFNAMSIZ], *dot;
1405 
1406 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1407 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1408 		if (named++ == 0)
1409 			goto skip;
1410 		dot = strchr(old, ':');
1411 		if (!dot) {
1412 			sprintf(old, ":%d", named);
1413 			dot = old;
1414 		}
1415 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1416 			strcat(ifa->ifa_label, dot);
1417 		else
1418 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1419 skip:
1420 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1421 	}
1422 }
1423 
1424 static bool inetdev_valid_mtu(unsigned int mtu)
1425 {
1426 	return mtu >= IPV4_MIN_MTU;
1427 }
1428 
1429 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1430 					struct in_device *in_dev)
1431 
1432 {
1433 	struct in_ifaddr *ifa;
1434 
1435 	for (ifa = in_dev->ifa_list; ifa;
1436 	     ifa = ifa->ifa_next) {
1437 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1438 			 ifa->ifa_local, dev,
1439 			 ifa->ifa_local, NULL,
1440 			 dev->dev_addr, NULL);
1441 	}
1442 }
1443 
1444 /* Called only under RTNL semaphore */
1445 
1446 static int inetdev_event(struct notifier_block *this, unsigned long event,
1447 			 void *ptr)
1448 {
1449 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1450 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1451 
1452 	ASSERT_RTNL();
1453 
1454 	if (!in_dev) {
1455 		if (event == NETDEV_REGISTER) {
1456 			in_dev = inetdev_init(dev);
1457 			if (IS_ERR(in_dev))
1458 				return notifier_from_errno(PTR_ERR(in_dev));
1459 			if (dev->flags & IFF_LOOPBACK) {
1460 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1461 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1462 			}
1463 		} else if (event == NETDEV_CHANGEMTU) {
1464 			/* Re-enabling IP */
1465 			if (inetdev_valid_mtu(dev->mtu))
1466 				in_dev = inetdev_init(dev);
1467 		}
1468 		goto out;
1469 	}
1470 
1471 	switch (event) {
1472 	case NETDEV_REGISTER:
1473 		pr_debug("%s: bug\n", __func__);
1474 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1475 		break;
1476 	case NETDEV_UP:
1477 		if (!inetdev_valid_mtu(dev->mtu))
1478 			break;
1479 		if (dev->flags & IFF_LOOPBACK) {
1480 			struct in_ifaddr *ifa = inet_alloc_ifa();
1481 
1482 			if (ifa) {
1483 				INIT_HLIST_NODE(&ifa->hash);
1484 				ifa->ifa_local =
1485 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1486 				ifa->ifa_prefixlen = 8;
1487 				ifa->ifa_mask = inet_make_mask(8);
1488 				in_dev_hold(in_dev);
1489 				ifa->ifa_dev = in_dev;
1490 				ifa->ifa_scope = RT_SCOPE_HOST;
1491 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1492 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1493 						 INFINITY_LIFE_TIME);
1494 				ipv4_devconf_setall(in_dev);
1495 				neigh_parms_data_state_setall(in_dev->arp_parms);
1496 				inet_insert_ifa(ifa);
1497 			}
1498 		}
1499 		ip_mc_up(in_dev);
1500 		/* fall through */
1501 	case NETDEV_CHANGEADDR:
1502 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1503 			break;
1504 		/* fall through */
1505 	case NETDEV_NOTIFY_PEERS:
1506 		/* Send gratuitous ARP to notify of link change */
1507 		inetdev_send_gratuitous_arp(dev, in_dev);
1508 		break;
1509 	case NETDEV_DOWN:
1510 		ip_mc_down(in_dev);
1511 		break;
1512 	case NETDEV_PRE_TYPE_CHANGE:
1513 		ip_mc_unmap(in_dev);
1514 		break;
1515 	case NETDEV_POST_TYPE_CHANGE:
1516 		ip_mc_remap(in_dev);
1517 		break;
1518 	case NETDEV_CHANGEMTU:
1519 		if (inetdev_valid_mtu(dev->mtu))
1520 			break;
1521 		/* disable IP when MTU is not enough */
1522 		/* fall through */
1523 	case NETDEV_UNREGISTER:
1524 		inetdev_destroy(in_dev);
1525 		break;
1526 	case NETDEV_CHANGENAME:
1527 		/* Do not notify about label change, this event is
1528 		 * not interesting to applications using netlink.
1529 		 */
1530 		inetdev_changename(dev, in_dev);
1531 
1532 		devinet_sysctl_unregister(in_dev);
1533 		devinet_sysctl_register(in_dev);
1534 		break;
1535 	}
1536 out:
1537 	return NOTIFY_DONE;
1538 }
1539 
1540 static struct notifier_block ip_netdev_notifier = {
1541 	.notifier_call = inetdev_event,
1542 };
1543 
1544 static size_t inet_nlmsg_size(void)
1545 {
1546 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1547 	       + nla_total_size(4) /* IFA_ADDRESS */
1548 	       + nla_total_size(4) /* IFA_LOCAL */
1549 	       + nla_total_size(4) /* IFA_BROADCAST */
1550 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1551 	       + nla_total_size(4)  /* IFA_FLAGS */
1552 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1553 }
1554 
1555 static inline u32 cstamp_delta(unsigned long cstamp)
1556 {
1557 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1558 }
1559 
1560 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1561 			 unsigned long tstamp, u32 preferred, u32 valid)
1562 {
1563 	struct ifa_cacheinfo ci;
1564 
1565 	ci.cstamp = cstamp_delta(cstamp);
1566 	ci.tstamp = cstamp_delta(tstamp);
1567 	ci.ifa_prefered = preferred;
1568 	ci.ifa_valid = valid;
1569 
1570 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1571 }
1572 
1573 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1574 			    u32 portid, u32 seq, int event, unsigned int flags)
1575 {
1576 	struct ifaddrmsg *ifm;
1577 	struct nlmsghdr  *nlh;
1578 	u32 preferred, valid;
1579 
1580 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1581 	if (!nlh)
1582 		return -EMSGSIZE;
1583 
1584 	ifm = nlmsg_data(nlh);
1585 	ifm->ifa_family = AF_INET;
1586 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1587 	ifm->ifa_flags = ifa->ifa_flags;
1588 	ifm->ifa_scope = ifa->ifa_scope;
1589 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1590 
1591 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1592 		preferred = ifa->ifa_preferred_lft;
1593 		valid = ifa->ifa_valid_lft;
1594 		if (preferred != INFINITY_LIFE_TIME) {
1595 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1596 
1597 			if (preferred > tval)
1598 				preferred -= tval;
1599 			else
1600 				preferred = 0;
1601 			if (valid != INFINITY_LIFE_TIME) {
1602 				if (valid > tval)
1603 					valid -= tval;
1604 				else
1605 					valid = 0;
1606 			}
1607 		}
1608 	} else {
1609 		preferred = INFINITY_LIFE_TIME;
1610 		valid = INFINITY_LIFE_TIME;
1611 	}
1612 	if ((ifa->ifa_address &&
1613 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1614 	    (ifa->ifa_local &&
1615 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1616 	    (ifa->ifa_broadcast &&
1617 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1618 	    (ifa->ifa_label[0] &&
1619 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1620 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1621 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1622 			  preferred, valid))
1623 		goto nla_put_failure;
1624 
1625 	nlmsg_end(skb, nlh);
1626 	return 0;
1627 
1628 nla_put_failure:
1629 	nlmsg_cancel(skb, nlh);
1630 	return -EMSGSIZE;
1631 }
1632 
1633 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1634 {
1635 	struct net *net = sock_net(skb->sk);
1636 	int h, s_h;
1637 	int idx, s_idx;
1638 	int ip_idx, s_ip_idx;
1639 	struct net_device *dev;
1640 	struct in_device *in_dev;
1641 	struct in_ifaddr *ifa;
1642 	struct hlist_head *head;
1643 
1644 	s_h = cb->args[0];
1645 	s_idx = idx = cb->args[1];
1646 	s_ip_idx = ip_idx = cb->args[2];
1647 
1648 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1649 		idx = 0;
1650 		head = &net->dev_index_head[h];
1651 		rcu_read_lock();
1652 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1653 			  net->dev_base_seq;
1654 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1655 			if (idx < s_idx)
1656 				goto cont;
1657 			if (h > s_h || idx > s_idx)
1658 				s_ip_idx = 0;
1659 			in_dev = __in_dev_get_rcu(dev);
1660 			if (!in_dev)
1661 				goto cont;
1662 
1663 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1664 			     ifa = ifa->ifa_next, ip_idx++) {
1665 				if (ip_idx < s_ip_idx)
1666 					continue;
1667 				if (inet_fill_ifaddr(skb, ifa,
1668 					     NETLINK_CB(cb->skb).portid,
1669 					     cb->nlh->nlmsg_seq,
1670 					     RTM_NEWADDR, NLM_F_MULTI) < 0) {
1671 					rcu_read_unlock();
1672 					goto done;
1673 				}
1674 				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1675 			}
1676 cont:
1677 			idx++;
1678 		}
1679 		rcu_read_unlock();
1680 	}
1681 
1682 done:
1683 	cb->args[0] = h;
1684 	cb->args[1] = idx;
1685 	cb->args[2] = ip_idx;
1686 
1687 	return skb->len;
1688 }
1689 
1690 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1691 		      u32 portid)
1692 {
1693 	struct sk_buff *skb;
1694 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1695 	int err = -ENOBUFS;
1696 	struct net *net;
1697 
1698 	net = dev_net(ifa->ifa_dev->dev);
1699 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1700 	if (!skb)
1701 		goto errout;
1702 
1703 	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1704 	if (err < 0) {
1705 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1706 		WARN_ON(err == -EMSGSIZE);
1707 		kfree_skb(skb);
1708 		goto errout;
1709 	}
1710 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1711 	return;
1712 errout:
1713 	if (err < 0)
1714 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1715 }
1716 
1717 static size_t inet_get_link_af_size(const struct net_device *dev,
1718 				    u32 ext_filter_mask)
1719 {
1720 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1721 
1722 	if (!in_dev)
1723 		return 0;
1724 
1725 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1726 }
1727 
1728 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1729 			     u32 ext_filter_mask)
1730 {
1731 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1732 	struct nlattr *nla;
1733 	int i;
1734 
1735 	if (!in_dev)
1736 		return -ENODATA;
1737 
1738 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1739 	if (!nla)
1740 		return -EMSGSIZE;
1741 
1742 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1743 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1744 
1745 	return 0;
1746 }
1747 
1748 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1749 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1750 };
1751 
1752 static int inet_validate_link_af(const struct net_device *dev,
1753 				 const struct nlattr *nla)
1754 {
1755 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1756 	int err, rem;
1757 
1758 	if (dev && !__in_dev_get_rcu(dev))
1759 		return -EAFNOSUPPORT;
1760 
1761 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
1762 	if (err < 0)
1763 		return err;
1764 
1765 	if (tb[IFLA_INET_CONF]) {
1766 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1767 			int cfgid = nla_type(a);
1768 
1769 			if (nla_len(a) < 4)
1770 				return -EINVAL;
1771 
1772 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1773 				return -EINVAL;
1774 		}
1775 	}
1776 
1777 	return 0;
1778 }
1779 
1780 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1781 {
1782 	struct in_device *in_dev = __in_dev_get_rcu(dev);
1783 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1784 	int rem;
1785 
1786 	if (!in_dev)
1787 		return -EAFNOSUPPORT;
1788 
1789 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1790 		BUG();
1791 
1792 	if (tb[IFLA_INET_CONF]) {
1793 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1794 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1795 	}
1796 
1797 	return 0;
1798 }
1799 
1800 static int inet_netconf_msgsize_devconf(int type)
1801 {
1802 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1803 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1804 	bool all = false;
1805 
1806 	if (type == NETCONFA_ALL)
1807 		all = true;
1808 
1809 	if (all || type == NETCONFA_FORWARDING)
1810 		size += nla_total_size(4);
1811 	if (all || type == NETCONFA_RP_FILTER)
1812 		size += nla_total_size(4);
1813 	if (all || type == NETCONFA_MC_FORWARDING)
1814 		size += nla_total_size(4);
1815 	if (all || type == NETCONFA_PROXY_NEIGH)
1816 		size += nla_total_size(4);
1817 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1818 		size += nla_total_size(4);
1819 
1820 	return size;
1821 }
1822 
1823 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1824 				     struct ipv4_devconf *devconf, u32 portid,
1825 				     u32 seq, int event, unsigned int flags,
1826 				     int type)
1827 {
1828 	struct nlmsghdr  *nlh;
1829 	struct netconfmsg *ncm;
1830 	bool all = false;
1831 
1832 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1833 			flags);
1834 	if (!nlh)
1835 		return -EMSGSIZE;
1836 
1837 	if (type == NETCONFA_ALL)
1838 		all = true;
1839 
1840 	ncm = nlmsg_data(nlh);
1841 	ncm->ncm_family = AF_INET;
1842 
1843 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1844 		goto nla_put_failure;
1845 
1846 	if (!devconf)
1847 		goto out;
1848 
1849 	if ((all || type == NETCONFA_FORWARDING) &&
1850 	    nla_put_s32(skb, NETCONFA_FORWARDING,
1851 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1852 		goto nla_put_failure;
1853 	if ((all || type == NETCONFA_RP_FILTER) &&
1854 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1855 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1856 		goto nla_put_failure;
1857 	if ((all || type == NETCONFA_MC_FORWARDING) &&
1858 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1859 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1860 		goto nla_put_failure;
1861 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
1862 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1863 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1864 		goto nla_put_failure;
1865 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1866 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1867 			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1868 		goto nla_put_failure;
1869 
1870 out:
1871 	nlmsg_end(skb, nlh);
1872 	return 0;
1873 
1874 nla_put_failure:
1875 	nlmsg_cancel(skb, nlh);
1876 	return -EMSGSIZE;
1877 }
1878 
1879 void inet_netconf_notify_devconf(struct net *net, int event, int type,
1880 				 int ifindex, struct ipv4_devconf *devconf)
1881 {
1882 	struct sk_buff *skb;
1883 	int err = -ENOBUFS;
1884 
1885 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
1886 	if (!skb)
1887 		goto errout;
1888 
1889 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1890 					event, 0, type);
1891 	if (err < 0) {
1892 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1893 		WARN_ON(err == -EMSGSIZE);
1894 		kfree_skb(skb);
1895 		goto errout;
1896 	}
1897 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
1898 	return;
1899 errout:
1900 	if (err < 0)
1901 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1902 }
1903 
1904 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1905 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1906 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1907 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1908 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
1909 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
1910 };
1911 
1912 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1913 				    struct nlmsghdr *nlh,
1914 				    struct netlink_ext_ack *extack)
1915 {
1916 	struct net *net = sock_net(in_skb->sk);
1917 	struct nlattr *tb[NETCONFA_MAX+1];
1918 	struct netconfmsg *ncm;
1919 	struct sk_buff *skb;
1920 	struct ipv4_devconf *devconf;
1921 	struct in_device *in_dev;
1922 	struct net_device *dev;
1923 	int ifindex;
1924 	int err;
1925 
1926 	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1927 			  devconf_ipv4_policy, extack);
1928 	if (err < 0)
1929 		goto errout;
1930 
1931 	err = -EINVAL;
1932 	if (!tb[NETCONFA_IFINDEX])
1933 		goto errout;
1934 
1935 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1936 	switch (ifindex) {
1937 	case NETCONFA_IFINDEX_ALL:
1938 		devconf = net->ipv4.devconf_all;
1939 		break;
1940 	case NETCONFA_IFINDEX_DEFAULT:
1941 		devconf = net->ipv4.devconf_dflt;
1942 		break;
1943 	default:
1944 		dev = __dev_get_by_index(net, ifindex);
1945 		if (!dev)
1946 			goto errout;
1947 		in_dev = __in_dev_get_rtnl(dev);
1948 		if (!in_dev)
1949 			goto errout;
1950 		devconf = &in_dev->cnf;
1951 		break;
1952 	}
1953 
1954 	err = -ENOBUFS;
1955 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
1956 	if (!skb)
1957 		goto errout;
1958 
1959 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1960 					NETLINK_CB(in_skb).portid,
1961 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1962 					NETCONFA_ALL);
1963 	if (err < 0) {
1964 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1965 		WARN_ON(err == -EMSGSIZE);
1966 		kfree_skb(skb);
1967 		goto errout;
1968 	}
1969 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1970 errout:
1971 	return err;
1972 }
1973 
1974 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1975 				     struct netlink_callback *cb)
1976 {
1977 	struct net *net = sock_net(skb->sk);
1978 	int h, s_h;
1979 	int idx, s_idx;
1980 	struct net_device *dev;
1981 	struct in_device *in_dev;
1982 	struct hlist_head *head;
1983 
1984 	s_h = cb->args[0];
1985 	s_idx = idx = cb->args[1];
1986 
1987 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1988 		idx = 0;
1989 		head = &net->dev_index_head[h];
1990 		rcu_read_lock();
1991 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1992 			  net->dev_base_seq;
1993 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1994 			if (idx < s_idx)
1995 				goto cont;
1996 			in_dev = __in_dev_get_rcu(dev);
1997 			if (!in_dev)
1998 				goto cont;
1999 
2000 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
2001 						      &in_dev->cnf,
2002 						      NETLINK_CB(cb->skb).portid,
2003 						      cb->nlh->nlmsg_seq,
2004 						      RTM_NEWNETCONF,
2005 						      NLM_F_MULTI,
2006 						      NETCONFA_ALL) < 0) {
2007 				rcu_read_unlock();
2008 				goto done;
2009 			}
2010 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2011 cont:
2012 			idx++;
2013 		}
2014 		rcu_read_unlock();
2015 	}
2016 	if (h == NETDEV_HASHENTRIES) {
2017 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2018 					      net->ipv4.devconf_all,
2019 					      NETLINK_CB(cb->skb).portid,
2020 					      cb->nlh->nlmsg_seq,
2021 					      RTM_NEWNETCONF, NLM_F_MULTI,
2022 					      NETCONFA_ALL) < 0)
2023 			goto done;
2024 		else
2025 			h++;
2026 	}
2027 	if (h == NETDEV_HASHENTRIES + 1) {
2028 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2029 					      net->ipv4.devconf_dflt,
2030 					      NETLINK_CB(cb->skb).portid,
2031 					      cb->nlh->nlmsg_seq,
2032 					      RTM_NEWNETCONF, NLM_F_MULTI,
2033 					      NETCONFA_ALL) < 0)
2034 			goto done;
2035 		else
2036 			h++;
2037 	}
2038 done:
2039 	cb->args[0] = h;
2040 	cb->args[1] = idx;
2041 
2042 	return skb->len;
2043 }
2044 
2045 #ifdef CONFIG_SYSCTL
2046 
2047 static void devinet_copy_dflt_conf(struct net *net, int i)
2048 {
2049 	struct net_device *dev;
2050 
2051 	rcu_read_lock();
2052 	for_each_netdev_rcu(net, dev) {
2053 		struct in_device *in_dev;
2054 
2055 		in_dev = __in_dev_get_rcu(dev);
2056 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2057 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2058 	}
2059 	rcu_read_unlock();
2060 }
2061 
2062 /* called with RTNL locked */
2063 static void inet_forward_change(struct net *net)
2064 {
2065 	struct net_device *dev;
2066 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2067 
2068 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2069 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2070 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2071 				    NETCONFA_FORWARDING,
2072 				    NETCONFA_IFINDEX_ALL,
2073 				    net->ipv4.devconf_all);
2074 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2075 				    NETCONFA_FORWARDING,
2076 				    NETCONFA_IFINDEX_DEFAULT,
2077 				    net->ipv4.devconf_dflt);
2078 
2079 	for_each_netdev(net, dev) {
2080 		struct in_device *in_dev;
2081 
2082 		if (on)
2083 			dev_disable_lro(dev);
2084 
2085 		in_dev = __in_dev_get_rtnl(dev);
2086 		if (in_dev) {
2087 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2088 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2089 						    NETCONFA_FORWARDING,
2090 						    dev->ifindex, &in_dev->cnf);
2091 		}
2092 	}
2093 }
2094 
2095 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2096 {
2097 	if (cnf == net->ipv4.devconf_dflt)
2098 		return NETCONFA_IFINDEX_DEFAULT;
2099 	else if (cnf == net->ipv4.devconf_all)
2100 		return NETCONFA_IFINDEX_ALL;
2101 	else {
2102 		struct in_device *idev
2103 			= container_of(cnf, struct in_device, cnf);
2104 		return idev->dev->ifindex;
2105 	}
2106 }
2107 
2108 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2109 			     void __user *buffer,
2110 			     size_t *lenp, loff_t *ppos)
2111 {
2112 	int old_value = *(int *)ctl->data;
2113 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2114 	int new_value = *(int *)ctl->data;
2115 
2116 	if (write) {
2117 		struct ipv4_devconf *cnf = ctl->extra1;
2118 		struct net *net = ctl->extra2;
2119 		int i = (int *)ctl->data - cnf->data;
2120 		int ifindex;
2121 
2122 		set_bit(i, cnf->state);
2123 
2124 		if (cnf == net->ipv4.devconf_dflt)
2125 			devinet_copy_dflt_conf(net, i);
2126 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2127 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2128 			if ((new_value == 0) && (old_value != 0))
2129 				rt_cache_flush(net);
2130 
2131 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2132 		    new_value != old_value) {
2133 			ifindex = devinet_conf_ifindex(net, cnf);
2134 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2135 						    NETCONFA_RP_FILTER,
2136 						    ifindex, cnf);
2137 		}
2138 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2139 		    new_value != old_value) {
2140 			ifindex = devinet_conf_ifindex(net, cnf);
2141 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2142 						    NETCONFA_PROXY_NEIGH,
2143 						    ifindex, cnf);
2144 		}
2145 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2146 		    new_value != old_value) {
2147 			ifindex = devinet_conf_ifindex(net, cnf);
2148 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2149 						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2150 						    ifindex, cnf);
2151 		}
2152 	}
2153 
2154 	return ret;
2155 }
2156 
2157 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2158 				  void __user *buffer,
2159 				  size_t *lenp, loff_t *ppos)
2160 {
2161 	int *valp = ctl->data;
2162 	int val = *valp;
2163 	loff_t pos = *ppos;
2164 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2165 
2166 	if (write && *valp != val) {
2167 		struct net *net = ctl->extra2;
2168 
2169 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2170 			if (!rtnl_trylock()) {
2171 				/* Restore the original values before restarting */
2172 				*valp = val;
2173 				*ppos = pos;
2174 				return restart_syscall();
2175 			}
2176 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2177 				inet_forward_change(net);
2178 			} else {
2179 				struct ipv4_devconf *cnf = ctl->extra1;
2180 				struct in_device *idev =
2181 					container_of(cnf, struct in_device, cnf);
2182 				if (*valp)
2183 					dev_disable_lro(idev->dev);
2184 				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2185 							    NETCONFA_FORWARDING,
2186 							    idev->dev->ifindex,
2187 							    cnf);
2188 			}
2189 			rtnl_unlock();
2190 			rt_cache_flush(net);
2191 		} else
2192 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2193 						    NETCONFA_FORWARDING,
2194 						    NETCONFA_IFINDEX_DEFAULT,
2195 						    net->ipv4.devconf_dflt);
2196 	}
2197 
2198 	return ret;
2199 }
2200 
2201 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2202 				void __user *buffer,
2203 				size_t *lenp, loff_t *ppos)
2204 {
2205 	int *valp = ctl->data;
2206 	int val = *valp;
2207 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2208 	struct net *net = ctl->extra2;
2209 
2210 	if (write && *valp != val)
2211 		rt_cache_flush(net);
2212 
2213 	return ret;
2214 }
2215 
2216 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2217 	{ \
2218 		.procname	= name, \
2219 		.data		= ipv4_devconf.data + \
2220 				  IPV4_DEVCONF_ ## attr - 1, \
2221 		.maxlen		= sizeof(int), \
2222 		.mode		= mval, \
2223 		.proc_handler	= proc, \
2224 		.extra1		= &ipv4_devconf, \
2225 	}
2226 
2227 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2228 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2229 
2230 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2231 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2232 
2233 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2234 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2235 
2236 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2237 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2238 
2239 static struct devinet_sysctl_table {
2240 	struct ctl_table_header *sysctl_header;
2241 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2242 } devinet_sysctl = {
2243 	.devinet_vars = {
2244 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2245 					     devinet_sysctl_forward),
2246 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2247 
2248 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2249 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2250 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2251 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2252 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2253 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2254 					"accept_source_route"),
2255 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2256 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2257 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2258 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2259 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2260 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2261 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2262 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2263 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2264 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2265 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2266 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2267 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2268 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2269 					"force_igmp_version"),
2270 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2271 					"igmpv2_unsolicited_report_interval"),
2272 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2273 					"igmpv3_unsolicited_report_interval"),
2274 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2275 					"ignore_routes_with_linkdown"),
2276 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2277 					"drop_gratuitous_arp"),
2278 
2279 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2280 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2281 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2282 					      "promote_secondaries"),
2283 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2284 					      "route_localnet"),
2285 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2286 					      "drop_unicast_in_l2_multicast"),
2287 	},
2288 };
2289 
2290 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2291 				     int ifindex, struct ipv4_devconf *p)
2292 {
2293 	int i;
2294 	struct devinet_sysctl_table *t;
2295 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2296 
2297 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2298 	if (!t)
2299 		goto out;
2300 
2301 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2302 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2303 		t->devinet_vars[i].extra1 = p;
2304 		t->devinet_vars[i].extra2 = net;
2305 	}
2306 
2307 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2308 
2309 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2310 	if (!t->sysctl_header)
2311 		goto free;
2312 
2313 	p->sysctl = t;
2314 
2315 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2316 				    ifindex, p);
2317 	return 0;
2318 
2319 free:
2320 	kfree(t);
2321 out:
2322 	return -ENOBUFS;
2323 }
2324 
2325 static void __devinet_sysctl_unregister(struct net *net,
2326 					struct ipv4_devconf *cnf, int ifindex)
2327 {
2328 	struct devinet_sysctl_table *t = cnf->sysctl;
2329 
2330 	if (t) {
2331 		cnf->sysctl = NULL;
2332 		unregister_net_sysctl_table(t->sysctl_header);
2333 		kfree(t);
2334 	}
2335 
2336 	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2337 }
2338 
2339 static int devinet_sysctl_register(struct in_device *idev)
2340 {
2341 	int err;
2342 
2343 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2344 		return -EINVAL;
2345 
2346 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2347 	if (err)
2348 		return err;
2349 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2350 					idev->dev->ifindex, &idev->cnf);
2351 	if (err)
2352 		neigh_sysctl_unregister(idev->arp_parms);
2353 	return err;
2354 }
2355 
2356 static void devinet_sysctl_unregister(struct in_device *idev)
2357 {
2358 	struct net *net = dev_net(idev->dev);
2359 
2360 	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2361 	neigh_sysctl_unregister(idev->arp_parms);
2362 }
2363 
2364 static struct ctl_table ctl_forward_entry[] = {
2365 	{
2366 		.procname	= "ip_forward",
2367 		.data		= &ipv4_devconf.data[
2368 					IPV4_DEVCONF_FORWARDING - 1],
2369 		.maxlen		= sizeof(int),
2370 		.mode		= 0644,
2371 		.proc_handler	= devinet_sysctl_forward,
2372 		.extra1		= &ipv4_devconf,
2373 		.extra2		= &init_net,
2374 	},
2375 	{ },
2376 };
2377 #endif
2378 
2379 static __net_init int devinet_init_net(struct net *net)
2380 {
2381 	int err;
2382 	struct ipv4_devconf *all, *dflt;
2383 #ifdef CONFIG_SYSCTL
2384 	struct ctl_table *tbl = ctl_forward_entry;
2385 	struct ctl_table_header *forw_hdr;
2386 #endif
2387 
2388 	err = -ENOMEM;
2389 	all = &ipv4_devconf;
2390 	dflt = &ipv4_devconf_dflt;
2391 
2392 	if (!net_eq(net, &init_net)) {
2393 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2394 		if (!all)
2395 			goto err_alloc_all;
2396 
2397 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2398 		if (!dflt)
2399 			goto err_alloc_dflt;
2400 
2401 #ifdef CONFIG_SYSCTL
2402 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2403 		if (!tbl)
2404 			goto err_alloc_ctl;
2405 
2406 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2407 		tbl[0].extra1 = all;
2408 		tbl[0].extra2 = net;
2409 #endif
2410 	}
2411 
2412 #ifdef CONFIG_SYSCTL
2413 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2414 	if (err < 0)
2415 		goto err_reg_all;
2416 
2417 	err = __devinet_sysctl_register(net, "default",
2418 					NETCONFA_IFINDEX_DEFAULT, dflt);
2419 	if (err < 0)
2420 		goto err_reg_dflt;
2421 
2422 	err = -ENOMEM;
2423 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2424 	if (!forw_hdr)
2425 		goto err_reg_ctl;
2426 	net->ipv4.forw_hdr = forw_hdr;
2427 #endif
2428 
2429 	net->ipv4.devconf_all = all;
2430 	net->ipv4.devconf_dflt = dflt;
2431 	return 0;
2432 
2433 #ifdef CONFIG_SYSCTL
2434 err_reg_ctl:
2435 	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2436 err_reg_dflt:
2437 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2438 err_reg_all:
2439 	if (tbl != ctl_forward_entry)
2440 		kfree(tbl);
2441 err_alloc_ctl:
2442 #endif
2443 	if (dflt != &ipv4_devconf_dflt)
2444 		kfree(dflt);
2445 err_alloc_dflt:
2446 	if (all != &ipv4_devconf)
2447 		kfree(all);
2448 err_alloc_all:
2449 	return err;
2450 }
2451 
2452 static __net_exit void devinet_exit_net(struct net *net)
2453 {
2454 #ifdef CONFIG_SYSCTL
2455 	struct ctl_table *tbl;
2456 
2457 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2458 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2459 	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2460 				    NETCONFA_IFINDEX_DEFAULT);
2461 	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2462 				    NETCONFA_IFINDEX_ALL);
2463 	kfree(tbl);
2464 #endif
2465 	kfree(net->ipv4.devconf_dflt);
2466 	kfree(net->ipv4.devconf_all);
2467 }
2468 
2469 static __net_initdata struct pernet_operations devinet_ops = {
2470 	.init = devinet_init_net,
2471 	.exit = devinet_exit_net,
2472 };
2473 
2474 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2475 	.family		  = AF_INET,
2476 	.fill_link_af	  = inet_fill_link_af,
2477 	.get_link_af_size = inet_get_link_af_size,
2478 	.validate_link_af = inet_validate_link_af,
2479 	.set_link_af	  = inet_set_link_af,
2480 };
2481 
2482 void __init devinet_init(void)
2483 {
2484 	int i;
2485 
2486 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2487 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2488 
2489 	register_pernet_subsys(&devinet_ops);
2490 
2491 	register_gifconf(PF_INET, inet_gifconf);
2492 	register_netdevice_notifier(&ip_netdev_notifier);
2493 
2494 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2495 
2496 	rtnl_af_register(&inet_af_ops);
2497 
2498 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2499 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2500 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2501 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2502 		      inet_netconf_dump_devconf, 0);
2503 }
2504