xref: /linux/net/ipv4/devinet.c (revision a6cdeeb16bff89c8486324f53577db058cbe81ba)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	NET3	IP device support routines.
4  *
5  *	Derived from the IP parts of dev.c 1.0.19
6  * 		Authors:	Ross Biro
7  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
9  *
10  *	Additional Authors:
11  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
12  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13  *
14  *	Changes:
15  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
16  *					lists.
17  *		Cyrus Durgin:		updated for kmod
18  *		Matthias Andree:	in devinet_ioctl, compare label and
19  *					address (4.4BSD alias style support),
20  *					fall back to comparing just the label
21  *					if no match found.
22  */
23 
24 
25 #include <linux/uaccess.h>
26 #include <linux/bitops.h>
27 #include <linux/capability.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/mm.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/in.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/if_addr.h>
40 #include <linux/if_ether.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/etherdevice.h>
44 #include <linux/skbuff.h>
45 #include <linux/init.h>
46 #include <linux/notifier.h>
47 #include <linux/inetdevice.h>
48 #include <linux/igmp.h>
49 #include <linux/slab.h>
50 #include <linux/hash.h>
51 #ifdef CONFIG_SYSCTL
52 #include <linux/sysctl.h>
53 #endif
54 #include <linux/kmod.h>
55 #include <linux/netconf.h>
56 
57 #include <net/arp.h>
58 #include <net/ip.h>
59 #include <net/route.h>
60 #include <net/ip_fib.h>
61 #include <net/rtnetlink.h>
62 #include <net/net_namespace.h>
63 #include <net/addrconf.h>
64 
65 static struct ipv4_devconf ipv4_devconf = {
66 	.data = {
67 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
68 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
69 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
70 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
71 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
72 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
73 	},
74 };
75 
76 static struct ipv4_devconf ipv4_devconf_dflt = {
77 	.data = {
78 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
79 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
80 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
81 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
82 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
83 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
84 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
85 	},
86 };
87 
88 #define IPV4_DEVCONF_DFLT(net, attr) \
89 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
90 
91 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
92 	[IFA_LOCAL]     	= { .type = NLA_U32 },
93 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
94 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
95 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
96 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
97 	[IFA_FLAGS]		= { .type = NLA_U32 },
98 	[IFA_RT_PRIORITY]	= { .type = NLA_U32 },
99 	[IFA_TARGET_NETNSID]	= { .type = NLA_S32 },
100 };
101 
102 struct inet_fill_args {
103 	u32 portid;
104 	u32 seq;
105 	int event;
106 	unsigned int flags;
107 	int netnsid;
108 	int ifindex;
109 };
110 
111 #define IN4_ADDR_HSIZE_SHIFT	8
112 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
113 
114 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
115 
116 static u32 inet_addr_hash(const struct net *net, __be32 addr)
117 {
118 	u32 val = (__force u32) addr ^ net_hash_mix(net);
119 
120 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
121 }
122 
123 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
124 {
125 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
126 
127 	ASSERT_RTNL();
128 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
129 }
130 
131 static void inet_hash_remove(struct in_ifaddr *ifa)
132 {
133 	ASSERT_RTNL();
134 	hlist_del_init_rcu(&ifa->hash);
135 }
136 
137 /**
138  * __ip_dev_find - find the first device with a given source address.
139  * @net: the net namespace
140  * @addr: the source address
141  * @devref: if true, take a reference on the found device
142  *
143  * If a caller uses devref=false, it should be protected by RCU, or RTNL
144  */
145 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
146 {
147 	struct net_device *result = NULL;
148 	struct in_ifaddr *ifa;
149 
150 	rcu_read_lock();
151 	ifa = inet_lookup_ifaddr_rcu(net, addr);
152 	if (!ifa) {
153 		struct flowi4 fl4 = { .daddr = addr };
154 		struct fib_result res = { 0 };
155 		struct fib_table *local;
156 
157 		/* Fallback to FIB local table so that communication
158 		 * over loopback subnets work.
159 		 */
160 		local = fib_get_table(net, RT_TABLE_LOCAL);
161 		if (local &&
162 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
163 		    res.type == RTN_LOCAL)
164 			result = FIB_RES_DEV(res);
165 	} else {
166 		result = ifa->ifa_dev->dev;
167 	}
168 	if (result && devref)
169 		dev_hold(result);
170 	rcu_read_unlock();
171 	return result;
172 }
173 EXPORT_SYMBOL(__ip_dev_find);
174 
175 /* called under RCU lock */
176 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
177 {
178 	u32 hash = inet_addr_hash(net, addr);
179 	struct in_ifaddr *ifa;
180 
181 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
182 		if (ifa->ifa_local == addr &&
183 		    net_eq(dev_net(ifa->ifa_dev->dev), net))
184 			return ifa;
185 
186 	return NULL;
187 }
188 
189 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
190 
191 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
192 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
193 static void inet_del_ifa(struct in_device *in_dev,
194 			 struct in_ifaddr __rcu **ifap,
195 			 int destroy);
196 #ifdef CONFIG_SYSCTL
197 static int devinet_sysctl_register(struct in_device *idev);
198 static void devinet_sysctl_unregister(struct in_device *idev);
199 #else
200 static int devinet_sysctl_register(struct in_device *idev)
201 {
202 	return 0;
203 }
204 static void devinet_sysctl_unregister(struct in_device *idev)
205 {
206 }
207 #endif
208 
209 /* Locks all the inet devices. */
210 
211 static struct in_ifaddr *inet_alloc_ifa(void)
212 {
213 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
214 }
215 
216 static void inet_rcu_free_ifa(struct rcu_head *head)
217 {
218 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
219 	if (ifa->ifa_dev)
220 		in_dev_put(ifa->ifa_dev);
221 	kfree(ifa);
222 }
223 
224 static void inet_free_ifa(struct in_ifaddr *ifa)
225 {
226 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
227 }
228 
229 void in_dev_finish_destroy(struct in_device *idev)
230 {
231 	struct net_device *dev = idev->dev;
232 
233 	WARN_ON(idev->ifa_list);
234 	WARN_ON(idev->mc_list);
235 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
236 #ifdef NET_REFCNT_DEBUG
237 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
238 #endif
239 	dev_put(dev);
240 	if (!idev->dead)
241 		pr_err("Freeing alive in_device %p\n", idev);
242 	else
243 		kfree(idev);
244 }
245 EXPORT_SYMBOL(in_dev_finish_destroy);
246 
247 static struct in_device *inetdev_init(struct net_device *dev)
248 {
249 	struct in_device *in_dev;
250 	int err = -ENOMEM;
251 
252 	ASSERT_RTNL();
253 
254 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
255 	if (!in_dev)
256 		goto out;
257 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
258 			sizeof(in_dev->cnf));
259 	in_dev->cnf.sysctl = NULL;
260 	in_dev->dev = dev;
261 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
262 	if (!in_dev->arp_parms)
263 		goto out_kfree;
264 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
265 		dev_disable_lro(dev);
266 	/* Reference in_dev->dev */
267 	dev_hold(dev);
268 	/* Account for reference dev->ip_ptr (below) */
269 	refcount_set(&in_dev->refcnt, 1);
270 
271 	err = devinet_sysctl_register(in_dev);
272 	if (err) {
273 		in_dev->dead = 1;
274 		in_dev_put(in_dev);
275 		in_dev = NULL;
276 		goto out;
277 	}
278 	ip_mc_init_dev(in_dev);
279 	if (dev->flags & IFF_UP)
280 		ip_mc_up(in_dev);
281 
282 	/* we can receive as soon as ip_ptr is set -- do this last */
283 	rcu_assign_pointer(dev->ip_ptr, in_dev);
284 out:
285 	return in_dev ?: ERR_PTR(err);
286 out_kfree:
287 	kfree(in_dev);
288 	in_dev = NULL;
289 	goto out;
290 }
291 
292 static void in_dev_rcu_put(struct rcu_head *head)
293 {
294 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
295 	in_dev_put(idev);
296 }
297 
298 static void inetdev_destroy(struct in_device *in_dev)
299 {
300 	struct net_device *dev;
301 	struct in_ifaddr *ifa;
302 
303 	ASSERT_RTNL();
304 
305 	dev = in_dev->dev;
306 
307 	in_dev->dead = 1;
308 
309 	ip_mc_destroy_dev(in_dev);
310 
311 	while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
312 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
313 		inet_free_ifa(ifa);
314 	}
315 
316 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
317 
318 	devinet_sysctl_unregister(in_dev);
319 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
320 	arp_ifdown(dev);
321 
322 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
323 }
324 
325 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
326 {
327 	const struct in_ifaddr *ifa;
328 
329 	rcu_read_lock();
330 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
331 		if (inet_ifa_match(a, ifa)) {
332 			if (!b || inet_ifa_match(b, ifa)) {
333 				rcu_read_unlock();
334 				return 1;
335 			}
336 		}
337 	}
338 	rcu_read_unlock();
339 	return 0;
340 }
341 
342 static void __inet_del_ifa(struct in_device *in_dev,
343 			   struct in_ifaddr __rcu **ifap,
344 			   int destroy, struct nlmsghdr *nlh, u32 portid)
345 {
346 	struct in_ifaddr *promote = NULL;
347 	struct in_ifaddr *ifa, *ifa1;
348 	struct in_ifaddr *last_prim;
349 	struct in_ifaddr *prev_prom = NULL;
350 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
351 
352 	ASSERT_RTNL();
353 
354 	ifa1 = rtnl_dereference(*ifap);
355 	last_prim = rtnl_dereference(in_dev->ifa_list);
356 	if (in_dev->dead)
357 		goto no_promotions;
358 
359 	/* 1. Deleting primary ifaddr forces deletion all secondaries
360 	 * unless alias promotion is set
361 	 **/
362 
363 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
364 		struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
365 
366 		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
367 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
368 			    ifa1->ifa_scope <= ifa->ifa_scope)
369 				last_prim = ifa;
370 
371 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
372 			    ifa1->ifa_mask != ifa->ifa_mask ||
373 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
374 				ifap1 = &ifa->ifa_next;
375 				prev_prom = ifa;
376 				continue;
377 			}
378 
379 			if (!do_promote) {
380 				inet_hash_remove(ifa);
381 				*ifap1 = ifa->ifa_next;
382 
383 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
384 				blocking_notifier_call_chain(&inetaddr_chain,
385 						NETDEV_DOWN, ifa);
386 				inet_free_ifa(ifa);
387 			} else {
388 				promote = ifa;
389 				break;
390 			}
391 		}
392 	}
393 
394 	/* On promotion all secondaries from subnet are changing
395 	 * the primary IP, we must remove all their routes silently
396 	 * and later to add them back with new prefsrc. Do this
397 	 * while all addresses are on the device list.
398 	 */
399 	for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
400 		if (ifa1->ifa_mask == ifa->ifa_mask &&
401 		    inet_ifa_match(ifa1->ifa_address, ifa))
402 			fib_del_ifaddr(ifa, ifa1);
403 	}
404 
405 no_promotions:
406 	/* 2. Unlink it */
407 
408 	*ifap = ifa1->ifa_next;
409 	inet_hash_remove(ifa1);
410 
411 	/* 3. Announce address deletion */
412 
413 	/* Send message first, then call notifier.
414 	   At first sight, FIB update triggered by notifier
415 	   will refer to already deleted ifaddr, that could confuse
416 	   netlink listeners. It is not true: look, gated sees
417 	   that route deleted and if it still thinks that ifaddr
418 	   is valid, it will try to restore deleted routes... Grr.
419 	   So that, this order is correct.
420 	 */
421 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
422 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
423 
424 	if (promote) {
425 		struct in_ifaddr *next_sec;
426 
427 		next_sec = rtnl_dereference(promote->ifa_next);
428 		if (prev_prom) {
429 			struct in_ifaddr *last_sec;
430 
431 			last_sec = rtnl_dereference(last_prim->ifa_next);
432 			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
433 			rcu_assign_pointer(promote->ifa_next, last_sec);
434 			rcu_assign_pointer(last_prim->ifa_next, promote);
435 		}
436 
437 		promote->ifa_flags &= ~IFA_F_SECONDARY;
438 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
439 		blocking_notifier_call_chain(&inetaddr_chain,
440 				NETDEV_UP, promote);
441 		for (ifa = next_sec; ifa;
442 		     ifa = rtnl_dereference(ifa->ifa_next)) {
443 			if (ifa1->ifa_mask != ifa->ifa_mask ||
444 			    !inet_ifa_match(ifa1->ifa_address, ifa))
445 					continue;
446 			fib_add_ifaddr(ifa);
447 		}
448 
449 	}
450 	if (destroy)
451 		inet_free_ifa(ifa1);
452 }
453 
454 static void inet_del_ifa(struct in_device *in_dev,
455 			 struct in_ifaddr __rcu **ifap,
456 			 int destroy)
457 {
458 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
459 }
460 
461 static void check_lifetime(struct work_struct *work);
462 
463 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
464 
465 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
466 			     u32 portid, struct netlink_ext_ack *extack)
467 {
468 	struct in_ifaddr __rcu **last_primary, **ifap;
469 	struct in_device *in_dev = ifa->ifa_dev;
470 	struct in_validator_info ivi;
471 	struct in_ifaddr *ifa1;
472 	int ret;
473 
474 	ASSERT_RTNL();
475 
476 	if (!ifa->ifa_local) {
477 		inet_free_ifa(ifa);
478 		return 0;
479 	}
480 
481 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
482 	last_primary = &in_dev->ifa_list;
483 
484 	ifap = &in_dev->ifa_list;
485 	ifa1 = rtnl_dereference(*ifap);
486 
487 	while (ifa1) {
488 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
489 		    ifa->ifa_scope <= ifa1->ifa_scope)
490 			last_primary = &ifa1->ifa_next;
491 		if (ifa1->ifa_mask == ifa->ifa_mask &&
492 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
493 			if (ifa1->ifa_local == ifa->ifa_local) {
494 				inet_free_ifa(ifa);
495 				return -EEXIST;
496 			}
497 			if (ifa1->ifa_scope != ifa->ifa_scope) {
498 				inet_free_ifa(ifa);
499 				return -EINVAL;
500 			}
501 			ifa->ifa_flags |= IFA_F_SECONDARY;
502 		}
503 
504 		ifap = &ifa1->ifa_next;
505 		ifa1 = rtnl_dereference(*ifap);
506 	}
507 
508 	/* Allow any devices that wish to register ifaddr validtors to weigh
509 	 * in now, before changes are committed.  The rntl lock is serializing
510 	 * access here, so the state should not change between a validator call
511 	 * and a final notify on commit.  This isn't invoked on promotion under
512 	 * the assumption that validators are checking the address itself, and
513 	 * not the flags.
514 	 */
515 	ivi.ivi_addr = ifa->ifa_address;
516 	ivi.ivi_dev = ifa->ifa_dev;
517 	ivi.extack = extack;
518 	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
519 					   NETDEV_UP, &ivi);
520 	ret = notifier_to_errno(ret);
521 	if (ret) {
522 		inet_free_ifa(ifa);
523 		return ret;
524 	}
525 
526 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
527 		prandom_seed((__force u32) ifa->ifa_local);
528 		ifap = last_primary;
529 	}
530 
531 	rcu_assign_pointer(ifa->ifa_next, *ifap);
532 	rcu_assign_pointer(*ifap, ifa);
533 
534 	inet_hash_insert(dev_net(in_dev->dev), ifa);
535 
536 	cancel_delayed_work(&check_lifetime_work);
537 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
538 
539 	/* Send message first, then call notifier.
540 	   Notifier will trigger FIB update, so that
541 	   listeners of netlink will know about new ifaddr */
542 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
543 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
544 
545 	return 0;
546 }
547 
548 static int inet_insert_ifa(struct in_ifaddr *ifa)
549 {
550 	return __inet_insert_ifa(ifa, NULL, 0, NULL);
551 }
552 
553 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
554 {
555 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
556 
557 	ASSERT_RTNL();
558 
559 	if (!in_dev) {
560 		inet_free_ifa(ifa);
561 		return -ENOBUFS;
562 	}
563 	ipv4_devconf_setall(in_dev);
564 	neigh_parms_data_state_setall(in_dev->arp_parms);
565 	if (ifa->ifa_dev != in_dev) {
566 		WARN_ON(ifa->ifa_dev);
567 		in_dev_hold(in_dev);
568 		ifa->ifa_dev = in_dev;
569 	}
570 	if (ipv4_is_loopback(ifa->ifa_local))
571 		ifa->ifa_scope = RT_SCOPE_HOST;
572 	return inet_insert_ifa(ifa);
573 }
574 
575 /* Caller must hold RCU or RTNL :
576  * We dont take a reference on found in_device
577  */
578 struct in_device *inetdev_by_index(struct net *net, int ifindex)
579 {
580 	struct net_device *dev;
581 	struct in_device *in_dev = NULL;
582 
583 	rcu_read_lock();
584 	dev = dev_get_by_index_rcu(net, ifindex);
585 	if (dev)
586 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
587 	rcu_read_unlock();
588 	return in_dev;
589 }
590 EXPORT_SYMBOL(inetdev_by_index);
591 
592 /* Called only from RTNL semaphored context. No locks. */
593 
594 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
595 				    __be32 mask)
596 {
597 	struct in_ifaddr *ifa;
598 
599 	ASSERT_RTNL();
600 
601 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
602 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
603 			return ifa;
604 	}
605 	return NULL;
606 }
607 
608 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
609 {
610 	struct ip_mreqn mreq = {
611 		.imr_multiaddr.s_addr = ifa->ifa_address,
612 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
613 	};
614 	int ret;
615 
616 	ASSERT_RTNL();
617 
618 	lock_sock(sk);
619 	if (join)
620 		ret = ip_mc_join_group(sk, &mreq);
621 	else
622 		ret = ip_mc_leave_group(sk, &mreq);
623 	release_sock(sk);
624 
625 	return ret;
626 }
627 
628 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
629 			    struct netlink_ext_ack *extack)
630 {
631 	struct net *net = sock_net(skb->sk);
632 	struct in_ifaddr __rcu **ifap;
633 	struct nlattr *tb[IFA_MAX+1];
634 	struct in_device *in_dev;
635 	struct ifaddrmsg *ifm;
636 	struct in_ifaddr *ifa;
637 
638 	int err = -EINVAL;
639 
640 	ASSERT_RTNL();
641 
642 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
643 				     ifa_ipv4_policy, extack);
644 	if (err < 0)
645 		goto errout;
646 
647 	ifm = nlmsg_data(nlh);
648 	in_dev = inetdev_by_index(net, ifm->ifa_index);
649 	if (!in_dev) {
650 		err = -ENODEV;
651 		goto errout;
652 	}
653 
654 	for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
655 	     ifap = &ifa->ifa_next) {
656 		if (tb[IFA_LOCAL] &&
657 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
658 			continue;
659 
660 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
661 			continue;
662 
663 		if (tb[IFA_ADDRESS] &&
664 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
665 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
666 			continue;
667 
668 		if (ipv4_is_multicast(ifa->ifa_address))
669 			ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
670 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
671 		return 0;
672 	}
673 
674 	err = -EADDRNOTAVAIL;
675 errout:
676 	return err;
677 }
678 
679 #define INFINITY_LIFE_TIME	0xFFFFFFFF
680 
681 static void check_lifetime(struct work_struct *work)
682 {
683 	unsigned long now, next, next_sec, next_sched;
684 	struct in_ifaddr *ifa;
685 	struct hlist_node *n;
686 	int i;
687 
688 	now = jiffies;
689 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
690 
691 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
692 		bool change_needed = false;
693 
694 		rcu_read_lock();
695 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
696 			unsigned long age;
697 
698 			if (ifa->ifa_flags & IFA_F_PERMANENT)
699 				continue;
700 
701 			/* We try to batch several events at once. */
702 			age = (now - ifa->ifa_tstamp +
703 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
704 
705 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
706 			    age >= ifa->ifa_valid_lft) {
707 				change_needed = true;
708 			} else if (ifa->ifa_preferred_lft ==
709 				   INFINITY_LIFE_TIME) {
710 				continue;
711 			} else if (age >= ifa->ifa_preferred_lft) {
712 				if (time_before(ifa->ifa_tstamp +
713 						ifa->ifa_valid_lft * HZ, next))
714 					next = ifa->ifa_tstamp +
715 					       ifa->ifa_valid_lft * HZ;
716 
717 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
718 					change_needed = true;
719 			} else if (time_before(ifa->ifa_tstamp +
720 					       ifa->ifa_preferred_lft * HZ,
721 					       next)) {
722 				next = ifa->ifa_tstamp +
723 				       ifa->ifa_preferred_lft * HZ;
724 			}
725 		}
726 		rcu_read_unlock();
727 		if (!change_needed)
728 			continue;
729 		rtnl_lock();
730 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
731 			unsigned long age;
732 
733 			if (ifa->ifa_flags & IFA_F_PERMANENT)
734 				continue;
735 
736 			/* We try to batch several events at once. */
737 			age = (now - ifa->ifa_tstamp +
738 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
739 
740 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
741 			    age >= ifa->ifa_valid_lft) {
742 				struct in_ifaddr __rcu **ifap;
743 				struct in_ifaddr *tmp;
744 
745 				ifap = &ifa->ifa_dev->ifa_list;
746 				tmp = rtnl_dereference(*ifap);
747 				while (tmp) {
748 					tmp = rtnl_dereference(tmp->ifa_next);
749 					if (rtnl_dereference(*ifap) == ifa) {
750 						inet_del_ifa(ifa->ifa_dev,
751 							     ifap, 1);
752 						break;
753 					}
754 					ifap = &tmp->ifa_next;
755 					tmp = rtnl_dereference(*ifap);
756 				}
757 			} else if (ifa->ifa_preferred_lft !=
758 				   INFINITY_LIFE_TIME &&
759 				   age >= ifa->ifa_preferred_lft &&
760 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
761 				ifa->ifa_flags |= IFA_F_DEPRECATED;
762 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
763 			}
764 		}
765 		rtnl_unlock();
766 	}
767 
768 	next_sec = round_jiffies_up(next);
769 	next_sched = next;
770 
771 	/* If rounded timeout is accurate enough, accept it. */
772 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
773 		next_sched = next_sec;
774 
775 	now = jiffies;
776 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
777 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
778 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
779 
780 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
781 			next_sched - now);
782 }
783 
784 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
785 			     __u32 prefered_lft)
786 {
787 	unsigned long timeout;
788 
789 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
790 
791 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
792 	if (addrconf_finite_timeout(timeout))
793 		ifa->ifa_valid_lft = timeout;
794 	else
795 		ifa->ifa_flags |= IFA_F_PERMANENT;
796 
797 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
798 	if (addrconf_finite_timeout(timeout)) {
799 		if (timeout == 0)
800 			ifa->ifa_flags |= IFA_F_DEPRECATED;
801 		ifa->ifa_preferred_lft = timeout;
802 	}
803 	ifa->ifa_tstamp = jiffies;
804 	if (!ifa->ifa_cstamp)
805 		ifa->ifa_cstamp = ifa->ifa_tstamp;
806 }
807 
808 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
809 				       __u32 *pvalid_lft, __u32 *pprefered_lft,
810 				       struct netlink_ext_ack *extack)
811 {
812 	struct nlattr *tb[IFA_MAX+1];
813 	struct in_ifaddr *ifa;
814 	struct ifaddrmsg *ifm;
815 	struct net_device *dev;
816 	struct in_device *in_dev;
817 	int err;
818 
819 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
820 				     ifa_ipv4_policy, extack);
821 	if (err < 0)
822 		goto errout;
823 
824 	ifm = nlmsg_data(nlh);
825 	err = -EINVAL;
826 	if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
827 		goto errout;
828 
829 	dev = __dev_get_by_index(net, ifm->ifa_index);
830 	err = -ENODEV;
831 	if (!dev)
832 		goto errout;
833 
834 	in_dev = __in_dev_get_rtnl(dev);
835 	err = -ENOBUFS;
836 	if (!in_dev)
837 		goto errout;
838 
839 	ifa = inet_alloc_ifa();
840 	if (!ifa)
841 		/*
842 		 * A potential indev allocation can be left alive, it stays
843 		 * assigned to its device and is destroy with it.
844 		 */
845 		goto errout;
846 
847 	ipv4_devconf_setall(in_dev);
848 	neigh_parms_data_state_setall(in_dev->arp_parms);
849 	in_dev_hold(in_dev);
850 
851 	if (!tb[IFA_ADDRESS])
852 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
853 
854 	INIT_HLIST_NODE(&ifa->hash);
855 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
856 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
857 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
858 					 ifm->ifa_flags;
859 	ifa->ifa_scope = ifm->ifa_scope;
860 	ifa->ifa_dev = in_dev;
861 
862 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
863 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
864 
865 	if (tb[IFA_BROADCAST])
866 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
867 
868 	if (tb[IFA_LABEL])
869 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
870 	else
871 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
872 
873 	if (tb[IFA_RT_PRIORITY])
874 		ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
875 
876 	if (tb[IFA_CACHEINFO]) {
877 		struct ifa_cacheinfo *ci;
878 
879 		ci = nla_data(tb[IFA_CACHEINFO]);
880 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
881 			err = -EINVAL;
882 			goto errout_free;
883 		}
884 		*pvalid_lft = ci->ifa_valid;
885 		*pprefered_lft = ci->ifa_prefered;
886 	}
887 
888 	return ifa;
889 
890 errout_free:
891 	inet_free_ifa(ifa);
892 errout:
893 	return ERR_PTR(err);
894 }
895 
896 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
897 {
898 	struct in_device *in_dev = ifa->ifa_dev;
899 	struct in_ifaddr *ifa1;
900 
901 	if (!ifa->ifa_local)
902 		return NULL;
903 
904 	in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
905 		if (ifa1->ifa_mask == ifa->ifa_mask &&
906 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
907 		    ifa1->ifa_local == ifa->ifa_local)
908 			return ifa1;
909 	}
910 	return NULL;
911 }
912 
913 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
914 			    struct netlink_ext_ack *extack)
915 {
916 	struct net *net = sock_net(skb->sk);
917 	struct in_ifaddr *ifa;
918 	struct in_ifaddr *ifa_existing;
919 	__u32 valid_lft = INFINITY_LIFE_TIME;
920 	__u32 prefered_lft = INFINITY_LIFE_TIME;
921 
922 	ASSERT_RTNL();
923 
924 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
925 	if (IS_ERR(ifa))
926 		return PTR_ERR(ifa);
927 
928 	ifa_existing = find_matching_ifa(ifa);
929 	if (!ifa_existing) {
930 		/* It would be best to check for !NLM_F_CREATE here but
931 		 * userspace already relies on not having to provide this.
932 		 */
933 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
934 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
935 			int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
936 					       true, ifa);
937 
938 			if (ret < 0) {
939 				inet_free_ifa(ifa);
940 				return ret;
941 			}
942 		}
943 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
944 					 extack);
945 	} else {
946 		u32 new_metric = ifa->ifa_rt_priority;
947 
948 		inet_free_ifa(ifa);
949 
950 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
951 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
952 			return -EEXIST;
953 		ifa = ifa_existing;
954 
955 		if (ifa->ifa_rt_priority != new_metric) {
956 			fib_modify_prefix_metric(ifa, new_metric);
957 			ifa->ifa_rt_priority = new_metric;
958 		}
959 
960 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
961 		cancel_delayed_work(&check_lifetime_work);
962 		queue_delayed_work(system_power_efficient_wq,
963 				&check_lifetime_work, 0);
964 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
965 	}
966 	return 0;
967 }
968 
969 /*
970  *	Determine a default network mask, based on the IP address.
971  */
972 
973 static int inet_abc_len(__be32 addr)
974 {
975 	int rc = -1;	/* Something else, probably a multicast. */
976 
977 	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
978 		rc = 0;
979 	else {
980 		__u32 haddr = ntohl(addr);
981 		if (IN_CLASSA(haddr))
982 			rc = 8;
983 		else if (IN_CLASSB(haddr))
984 			rc = 16;
985 		else if (IN_CLASSC(haddr))
986 			rc = 24;
987 		else if (IN_CLASSE(haddr))
988 			rc = 32;
989 	}
990 
991 	return rc;
992 }
993 
994 
995 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
996 {
997 	struct sockaddr_in sin_orig;
998 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
999 	struct in_ifaddr __rcu **ifap = NULL;
1000 	struct in_device *in_dev;
1001 	struct in_ifaddr *ifa = NULL;
1002 	struct net_device *dev;
1003 	char *colon;
1004 	int ret = -EFAULT;
1005 	int tryaddrmatch = 0;
1006 
1007 	ifr->ifr_name[IFNAMSIZ - 1] = 0;
1008 
1009 	/* save original address for comparison */
1010 	memcpy(&sin_orig, sin, sizeof(*sin));
1011 
1012 	colon = strchr(ifr->ifr_name, ':');
1013 	if (colon)
1014 		*colon = 0;
1015 
1016 	dev_load(net, ifr->ifr_name);
1017 
1018 	switch (cmd) {
1019 	case SIOCGIFADDR:	/* Get interface address */
1020 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1021 	case SIOCGIFDSTADDR:	/* Get the destination address */
1022 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1023 		/* Note that these ioctls will not sleep,
1024 		   so that we do not impose a lock.
1025 		   One day we will be forced to put shlock here (I mean SMP)
1026 		 */
1027 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
1028 		memset(sin, 0, sizeof(*sin));
1029 		sin->sin_family = AF_INET;
1030 		break;
1031 
1032 	case SIOCSIFFLAGS:
1033 		ret = -EPERM;
1034 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1035 			goto out;
1036 		break;
1037 	case SIOCSIFADDR:	/* Set interface address (and family) */
1038 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1039 	case SIOCSIFDSTADDR:	/* Set the destination address */
1040 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1041 		ret = -EPERM;
1042 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1043 			goto out;
1044 		ret = -EINVAL;
1045 		if (sin->sin_family != AF_INET)
1046 			goto out;
1047 		break;
1048 	default:
1049 		ret = -EINVAL;
1050 		goto out;
1051 	}
1052 
1053 	rtnl_lock();
1054 
1055 	ret = -ENODEV;
1056 	dev = __dev_get_by_name(net, ifr->ifr_name);
1057 	if (!dev)
1058 		goto done;
1059 
1060 	if (colon)
1061 		*colon = ':';
1062 
1063 	in_dev = __in_dev_get_rtnl(dev);
1064 	if (in_dev) {
1065 		if (tryaddrmatch) {
1066 			/* Matthias Andree */
1067 			/* compare label and address (4.4BSD style) */
1068 			/* note: we only do this for a limited set of ioctls
1069 			   and only if the original address family was AF_INET.
1070 			   This is checked above. */
1071 
1072 			for (ifap = &in_dev->ifa_list;
1073 			     (ifa = rtnl_dereference(*ifap)) != NULL;
1074 			     ifap = &ifa->ifa_next) {
1075 				if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1076 				    sin_orig.sin_addr.s_addr ==
1077 							ifa->ifa_local) {
1078 					break; /* found */
1079 				}
1080 			}
1081 		}
1082 		/* we didn't get a match, maybe the application is
1083 		   4.3BSD-style and passed in junk so we fall back to
1084 		   comparing just the label */
1085 		if (!ifa) {
1086 			for (ifap = &in_dev->ifa_list;
1087 			     (ifa = rtnl_dereference(*ifap)) != NULL;
1088 			     ifap = &ifa->ifa_next)
1089 				if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1090 					break;
1091 		}
1092 	}
1093 
1094 	ret = -EADDRNOTAVAIL;
1095 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1096 		goto done;
1097 
1098 	switch (cmd) {
1099 	case SIOCGIFADDR:	/* Get interface address */
1100 		ret = 0;
1101 		sin->sin_addr.s_addr = ifa->ifa_local;
1102 		break;
1103 
1104 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1105 		ret = 0;
1106 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1107 		break;
1108 
1109 	case SIOCGIFDSTADDR:	/* Get the destination address */
1110 		ret = 0;
1111 		sin->sin_addr.s_addr = ifa->ifa_address;
1112 		break;
1113 
1114 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1115 		ret = 0;
1116 		sin->sin_addr.s_addr = ifa->ifa_mask;
1117 		break;
1118 
1119 	case SIOCSIFFLAGS:
1120 		if (colon) {
1121 			ret = -EADDRNOTAVAIL;
1122 			if (!ifa)
1123 				break;
1124 			ret = 0;
1125 			if (!(ifr->ifr_flags & IFF_UP))
1126 				inet_del_ifa(in_dev, ifap, 1);
1127 			break;
1128 		}
1129 		ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1130 		break;
1131 
1132 	case SIOCSIFADDR:	/* Set interface address (and family) */
1133 		ret = -EINVAL;
1134 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1135 			break;
1136 
1137 		if (!ifa) {
1138 			ret = -ENOBUFS;
1139 			ifa = inet_alloc_ifa();
1140 			if (!ifa)
1141 				break;
1142 			INIT_HLIST_NODE(&ifa->hash);
1143 			if (colon)
1144 				memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1145 			else
1146 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1147 		} else {
1148 			ret = 0;
1149 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1150 				break;
1151 			inet_del_ifa(in_dev, ifap, 0);
1152 			ifa->ifa_broadcast = 0;
1153 			ifa->ifa_scope = 0;
1154 		}
1155 
1156 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1157 
1158 		if (!(dev->flags & IFF_POINTOPOINT)) {
1159 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1160 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1161 			if ((dev->flags & IFF_BROADCAST) &&
1162 			    ifa->ifa_prefixlen < 31)
1163 				ifa->ifa_broadcast = ifa->ifa_address |
1164 						     ~ifa->ifa_mask;
1165 		} else {
1166 			ifa->ifa_prefixlen = 32;
1167 			ifa->ifa_mask = inet_make_mask(32);
1168 		}
1169 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1170 		ret = inet_set_ifa(dev, ifa);
1171 		break;
1172 
1173 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1174 		ret = 0;
1175 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1176 			inet_del_ifa(in_dev, ifap, 0);
1177 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1178 			inet_insert_ifa(ifa);
1179 		}
1180 		break;
1181 
1182 	case SIOCSIFDSTADDR:	/* Set the destination address */
1183 		ret = 0;
1184 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1185 			break;
1186 		ret = -EINVAL;
1187 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1188 			break;
1189 		ret = 0;
1190 		inet_del_ifa(in_dev, ifap, 0);
1191 		ifa->ifa_address = sin->sin_addr.s_addr;
1192 		inet_insert_ifa(ifa);
1193 		break;
1194 
1195 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1196 
1197 		/*
1198 		 *	The mask we set must be legal.
1199 		 */
1200 		ret = -EINVAL;
1201 		if (bad_mask(sin->sin_addr.s_addr, 0))
1202 			break;
1203 		ret = 0;
1204 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1205 			__be32 old_mask = ifa->ifa_mask;
1206 			inet_del_ifa(in_dev, ifap, 0);
1207 			ifa->ifa_mask = sin->sin_addr.s_addr;
1208 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1209 
1210 			/* See if current broadcast address matches
1211 			 * with current netmask, then recalculate
1212 			 * the broadcast address. Otherwise it's a
1213 			 * funny address, so don't touch it since
1214 			 * the user seems to know what (s)he's doing...
1215 			 */
1216 			if ((dev->flags & IFF_BROADCAST) &&
1217 			    (ifa->ifa_prefixlen < 31) &&
1218 			    (ifa->ifa_broadcast ==
1219 			     (ifa->ifa_local|~old_mask))) {
1220 				ifa->ifa_broadcast = (ifa->ifa_local |
1221 						      ~sin->sin_addr.s_addr);
1222 			}
1223 			inet_insert_ifa(ifa);
1224 		}
1225 		break;
1226 	}
1227 done:
1228 	rtnl_unlock();
1229 out:
1230 	return ret;
1231 }
1232 
1233 static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1234 {
1235 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1236 	const struct in_ifaddr *ifa;
1237 	struct ifreq ifr;
1238 	int done = 0;
1239 
1240 	if (WARN_ON(size > sizeof(struct ifreq)))
1241 		goto out;
1242 
1243 	if (!in_dev)
1244 		goto out;
1245 
1246 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1247 		if (!buf) {
1248 			done += size;
1249 			continue;
1250 		}
1251 		if (len < size)
1252 			break;
1253 		memset(&ifr, 0, sizeof(struct ifreq));
1254 		strcpy(ifr.ifr_name, ifa->ifa_label);
1255 
1256 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1257 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1258 								ifa->ifa_local;
1259 
1260 		if (copy_to_user(buf + done, &ifr, size)) {
1261 			done = -EFAULT;
1262 			break;
1263 		}
1264 		len  -= size;
1265 		done += size;
1266 	}
1267 out:
1268 	return done;
1269 }
1270 
1271 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1272 				 int scope)
1273 {
1274 	const struct in_ifaddr *ifa;
1275 
1276 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1277 		if (ifa->ifa_flags & IFA_F_SECONDARY)
1278 			continue;
1279 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1280 		    ifa->ifa_scope <= scope)
1281 			return ifa->ifa_local;
1282 	}
1283 
1284 	return 0;
1285 }
1286 
1287 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1288 {
1289 	const struct in_ifaddr *ifa;
1290 	__be32 addr = 0;
1291 	struct in_device *in_dev;
1292 	struct net *net = dev_net(dev);
1293 	int master_idx;
1294 
1295 	rcu_read_lock();
1296 	in_dev = __in_dev_get_rcu(dev);
1297 	if (!in_dev)
1298 		goto no_in_dev;
1299 
1300 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1301 		if (ifa->ifa_flags & IFA_F_SECONDARY)
1302 			continue;
1303 		if (ifa->ifa_scope > scope)
1304 			continue;
1305 		if (!dst || inet_ifa_match(dst, ifa)) {
1306 			addr = ifa->ifa_local;
1307 			break;
1308 		}
1309 		if (!addr)
1310 			addr = ifa->ifa_local;
1311 	}
1312 
1313 	if (addr)
1314 		goto out_unlock;
1315 no_in_dev:
1316 	master_idx = l3mdev_master_ifindex_rcu(dev);
1317 
1318 	/* For VRFs, the VRF device takes the place of the loopback device,
1319 	 * with addresses on it being preferred.  Note in such cases the
1320 	 * loopback device will be among the devices that fail the master_idx
1321 	 * equality check in the loop below.
1322 	 */
1323 	if (master_idx &&
1324 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1325 	    (in_dev = __in_dev_get_rcu(dev))) {
1326 		addr = in_dev_select_addr(in_dev, scope);
1327 		if (addr)
1328 			goto out_unlock;
1329 	}
1330 
1331 	/* Not loopback addresses on loopback should be preferred
1332 	   in this case. It is important that lo is the first interface
1333 	   in dev_base list.
1334 	 */
1335 	for_each_netdev_rcu(net, dev) {
1336 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1337 			continue;
1338 
1339 		in_dev = __in_dev_get_rcu(dev);
1340 		if (!in_dev)
1341 			continue;
1342 
1343 		addr = in_dev_select_addr(in_dev, scope);
1344 		if (addr)
1345 			goto out_unlock;
1346 	}
1347 out_unlock:
1348 	rcu_read_unlock();
1349 	return addr;
1350 }
1351 EXPORT_SYMBOL(inet_select_addr);
1352 
1353 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1354 			      __be32 local, int scope)
1355 {
1356 	const struct in_ifaddr *ifa;
1357 	__be32 addr = 0;
1358 	int same = 0;
1359 
1360 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1361 		if (!addr &&
1362 		    (local == ifa->ifa_local || !local) &&
1363 		    ifa->ifa_scope <= scope) {
1364 			addr = ifa->ifa_local;
1365 			if (same)
1366 				break;
1367 		}
1368 		if (!same) {
1369 			same = (!local || inet_ifa_match(local, ifa)) &&
1370 				(!dst || inet_ifa_match(dst, ifa));
1371 			if (same && addr) {
1372 				if (local || !dst)
1373 					break;
1374 				/* Is the selected addr into dst subnet? */
1375 				if (inet_ifa_match(addr, ifa))
1376 					break;
1377 				/* No, then can we use new local src? */
1378 				if (ifa->ifa_scope <= scope) {
1379 					addr = ifa->ifa_local;
1380 					break;
1381 				}
1382 				/* search for large dst subnet for addr */
1383 				same = 0;
1384 			}
1385 		}
1386 	}
1387 
1388 	return same ? addr : 0;
1389 }
1390 
1391 /*
1392  * Confirm that local IP address exists using wildcards:
1393  * - net: netns to check, cannot be NULL
1394  * - in_dev: only on this interface, NULL=any interface
1395  * - dst: only in the same subnet as dst, 0=any dst
1396  * - local: address, 0=autoselect the local address
1397  * - scope: maximum allowed scope value for the local address
1398  */
1399 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1400 			 __be32 dst, __be32 local, int scope)
1401 {
1402 	__be32 addr = 0;
1403 	struct net_device *dev;
1404 
1405 	if (in_dev)
1406 		return confirm_addr_indev(in_dev, dst, local, scope);
1407 
1408 	rcu_read_lock();
1409 	for_each_netdev_rcu(net, dev) {
1410 		in_dev = __in_dev_get_rcu(dev);
1411 		if (in_dev) {
1412 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1413 			if (addr)
1414 				break;
1415 		}
1416 	}
1417 	rcu_read_unlock();
1418 
1419 	return addr;
1420 }
1421 EXPORT_SYMBOL(inet_confirm_addr);
1422 
1423 /*
1424  *	Device notifier
1425  */
1426 
1427 int register_inetaddr_notifier(struct notifier_block *nb)
1428 {
1429 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1430 }
1431 EXPORT_SYMBOL(register_inetaddr_notifier);
1432 
1433 int unregister_inetaddr_notifier(struct notifier_block *nb)
1434 {
1435 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1436 }
1437 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1438 
1439 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1440 {
1441 	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1442 }
1443 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1444 
1445 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1446 {
1447 	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1448 	    nb);
1449 }
1450 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1451 
1452 /* Rename ifa_labels for a device name change. Make some effort to preserve
1453  * existing alias numbering and to create unique labels if possible.
1454 */
1455 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1456 {
1457 	struct in_ifaddr *ifa;
1458 	int named = 0;
1459 
1460 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1461 		char old[IFNAMSIZ], *dot;
1462 
1463 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1464 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1465 		if (named++ == 0)
1466 			goto skip;
1467 		dot = strchr(old, ':');
1468 		if (!dot) {
1469 			sprintf(old, ":%d", named);
1470 			dot = old;
1471 		}
1472 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1473 			strcat(ifa->ifa_label, dot);
1474 		else
1475 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1476 skip:
1477 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1478 	}
1479 }
1480 
1481 static bool inetdev_valid_mtu(unsigned int mtu)
1482 {
1483 	return mtu >= IPV4_MIN_MTU;
1484 }
1485 
1486 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1487 					struct in_device *in_dev)
1488 
1489 {
1490 	const struct in_ifaddr *ifa;
1491 
1492 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1493 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1494 			 ifa->ifa_local, dev,
1495 			 ifa->ifa_local, NULL,
1496 			 dev->dev_addr, NULL);
1497 	}
1498 }
1499 
1500 /* Called only under RTNL semaphore */
1501 
1502 static int inetdev_event(struct notifier_block *this, unsigned long event,
1503 			 void *ptr)
1504 {
1505 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1506 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1507 
1508 	ASSERT_RTNL();
1509 
1510 	if (!in_dev) {
1511 		if (event == NETDEV_REGISTER) {
1512 			in_dev = inetdev_init(dev);
1513 			if (IS_ERR(in_dev))
1514 				return notifier_from_errno(PTR_ERR(in_dev));
1515 			if (dev->flags & IFF_LOOPBACK) {
1516 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1517 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1518 			}
1519 		} else if (event == NETDEV_CHANGEMTU) {
1520 			/* Re-enabling IP */
1521 			if (inetdev_valid_mtu(dev->mtu))
1522 				in_dev = inetdev_init(dev);
1523 		}
1524 		goto out;
1525 	}
1526 
1527 	switch (event) {
1528 	case NETDEV_REGISTER:
1529 		pr_debug("%s: bug\n", __func__);
1530 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1531 		break;
1532 	case NETDEV_UP:
1533 		if (!inetdev_valid_mtu(dev->mtu))
1534 			break;
1535 		if (dev->flags & IFF_LOOPBACK) {
1536 			struct in_ifaddr *ifa = inet_alloc_ifa();
1537 
1538 			if (ifa) {
1539 				INIT_HLIST_NODE(&ifa->hash);
1540 				ifa->ifa_local =
1541 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1542 				ifa->ifa_prefixlen = 8;
1543 				ifa->ifa_mask = inet_make_mask(8);
1544 				in_dev_hold(in_dev);
1545 				ifa->ifa_dev = in_dev;
1546 				ifa->ifa_scope = RT_SCOPE_HOST;
1547 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1548 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1549 						 INFINITY_LIFE_TIME);
1550 				ipv4_devconf_setall(in_dev);
1551 				neigh_parms_data_state_setall(in_dev->arp_parms);
1552 				inet_insert_ifa(ifa);
1553 			}
1554 		}
1555 		ip_mc_up(in_dev);
1556 		/* fall through */
1557 	case NETDEV_CHANGEADDR:
1558 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1559 			break;
1560 		/* fall through */
1561 	case NETDEV_NOTIFY_PEERS:
1562 		/* Send gratuitous ARP to notify of link change */
1563 		inetdev_send_gratuitous_arp(dev, in_dev);
1564 		break;
1565 	case NETDEV_DOWN:
1566 		ip_mc_down(in_dev);
1567 		break;
1568 	case NETDEV_PRE_TYPE_CHANGE:
1569 		ip_mc_unmap(in_dev);
1570 		break;
1571 	case NETDEV_POST_TYPE_CHANGE:
1572 		ip_mc_remap(in_dev);
1573 		break;
1574 	case NETDEV_CHANGEMTU:
1575 		if (inetdev_valid_mtu(dev->mtu))
1576 			break;
1577 		/* disable IP when MTU is not enough */
1578 		/* fall through */
1579 	case NETDEV_UNREGISTER:
1580 		inetdev_destroy(in_dev);
1581 		break;
1582 	case NETDEV_CHANGENAME:
1583 		/* Do not notify about label change, this event is
1584 		 * not interesting to applications using netlink.
1585 		 */
1586 		inetdev_changename(dev, in_dev);
1587 
1588 		devinet_sysctl_unregister(in_dev);
1589 		devinet_sysctl_register(in_dev);
1590 		break;
1591 	}
1592 out:
1593 	return NOTIFY_DONE;
1594 }
1595 
1596 static struct notifier_block ip_netdev_notifier = {
1597 	.notifier_call = inetdev_event,
1598 };
1599 
1600 static size_t inet_nlmsg_size(void)
1601 {
1602 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1603 	       + nla_total_size(4) /* IFA_ADDRESS */
1604 	       + nla_total_size(4) /* IFA_LOCAL */
1605 	       + nla_total_size(4) /* IFA_BROADCAST */
1606 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1607 	       + nla_total_size(4)  /* IFA_FLAGS */
1608 	       + nla_total_size(4)  /* IFA_RT_PRIORITY */
1609 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1610 }
1611 
1612 static inline u32 cstamp_delta(unsigned long cstamp)
1613 {
1614 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1615 }
1616 
1617 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1618 			 unsigned long tstamp, u32 preferred, u32 valid)
1619 {
1620 	struct ifa_cacheinfo ci;
1621 
1622 	ci.cstamp = cstamp_delta(cstamp);
1623 	ci.tstamp = cstamp_delta(tstamp);
1624 	ci.ifa_prefered = preferred;
1625 	ci.ifa_valid = valid;
1626 
1627 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1628 }
1629 
1630 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1631 			    struct inet_fill_args *args)
1632 {
1633 	struct ifaddrmsg *ifm;
1634 	struct nlmsghdr  *nlh;
1635 	u32 preferred, valid;
1636 
1637 	nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1638 			args->flags);
1639 	if (!nlh)
1640 		return -EMSGSIZE;
1641 
1642 	ifm = nlmsg_data(nlh);
1643 	ifm->ifa_family = AF_INET;
1644 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1645 	ifm->ifa_flags = ifa->ifa_flags;
1646 	ifm->ifa_scope = ifa->ifa_scope;
1647 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1648 
1649 	if (args->netnsid >= 0 &&
1650 	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1651 		goto nla_put_failure;
1652 
1653 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1654 		preferred = ifa->ifa_preferred_lft;
1655 		valid = ifa->ifa_valid_lft;
1656 		if (preferred != INFINITY_LIFE_TIME) {
1657 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1658 
1659 			if (preferred > tval)
1660 				preferred -= tval;
1661 			else
1662 				preferred = 0;
1663 			if (valid != INFINITY_LIFE_TIME) {
1664 				if (valid > tval)
1665 					valid -= tval;
1666 				else
1667 					valid = 0;
1668 			}
1669 		}
1670 	} else {
1671 		preferred = INFINITY_LIFE_TIME;
1672 		valid = INFINITY_LIFE_TIME;
1673 	}
1674 	if ((ifa->ifa_address &&
1675 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1676 	    (ifa->ifa_local &&
1677 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1678 	    (ifa->ifa_broadcast &&
1679 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1680 	    (ifa->ifa_label[0] &&
1681 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1682 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1683 	    (ifa->ifa_rt_priority &&
1684 	     nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1685 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1686 			  preferred, valid))
1687 		goto nla_put_failure;
1688 
1689 	nlmsg_end(skb, nlh);
1690 	return 0;
1691 
1692 nla_put_failure:
1693 	nlmsg_cancel(skb, nlh);
1694 	return -EMSGSIZE;
1695 }
1696 
1697 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1698 				      struct inet_fill_args *fillargs,
1699 				      struct net **tgt_net, struct sock *sk,
1700 				      struct netlink_callback *cb)
1701 {
1702 	struct netlink_ext_ack *extack = cb->extack;
1703 	struct nlattr *tb[IFA_MAX+1];
1704 	struct ifaddrmsg *ifm;
1705 	int err, i;
1706 
1707 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1708 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1709 		return -EINVAL;
1710 	}
1711 
1712 	ifm = nlmsg_data(nlh);
1713 	if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1714 		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1715 		return -EINVAL;
1716 	}
1717 
1718 	fillargs->ifindex = ifm->ifa_index;
1719 	if (fillargs->ifindex) {
1720 		cb->answer_flags |= NLM_F_DUMP_FILTERED;
1721 		fillargs->flags |= NLM_F_DUMP_FILTERED;
1722 	}
1723 
1724 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1725 					    ifa_ipv4_policy, extack);
1726 	if (err < 0)
1727 		return err;
1728 
1729 	for (i = 0; i <= IFA_MAX; ++i) {
1730 		if (!tb[i])
1731 			continue;
1732 
1733 		if (i == IFA_TARGET_NETNSID) {
1734 			struct net *net;
1735 
1736 			fillargs->netnsid = nla_get_s32(tb[i]);
1737 
1738 			net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1739 			if (IS_ERR(net)) {
1740 				fillargs->netnsid = -1;
1741 				NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1742 				return PTR_ERR(net);
1743 			}
1744 			*tgt_net = net;
1745 		} else {
1746 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1747 			return -EINVAL;
1748 		}
1749 	}
1750 
1751 	return 0;
1752 }
1753 
1754 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1755 			    struct netlink_callback *cb, int s_ip_idx,
1756 			    struct inet_fill_args *fillargs)
1757 {
1758 	struct in_ifaddr *ifa;
1759 	int ip_idx = 0;
1760 	int err;
1761 
1762 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1763 		if (ip_idx < s_ip_idx) {
1764 			ip_idx++;
1765 			continue;
1766 		}
1767 		err = inet_fill_ifaddr(skb, ifa, fillargs);
1768 		if (err < 0)
1769 			goto done;
1770 
1771 		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1772 		ip_idx++;
1773 	}
1774 	err = 0;
1775 
1776 done:
1777 	cb->args[2] = ip_idx;
1778 
1779 	return err;
1780 }
1781 
1782 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1783 {
1784 	const struct nlmsghdr *nlh = cb->nlh;
1785 	struct inet_fill_args fillargs = {
1786 		.portid = NETLINK_CB(cb->skb).portid,
1787 		.seq = nlh->nlmsg_seq,
1788 		.event = RTM_NEWADDR,
1789 		.flags = NLM_F_MULTI,
1790 		.netnsid = -1,
1791 	};
1792 	struct net *net = sock_net(skb->sk);
1793 	struct net *tgt_net = net;
1794 	int h, s_h;
1795 	int idx, s_idx;
1796 	int s_ip_idx;
1797 	struct net_device *dev;
1798 	struct in_device *in_dev;
1799 	struct hlist_head *head;
1800 	int err = 0;
1801 
1802 	s_h = cb->args[0];
1803 	s_idx = idx = cb->args[1];
1804 	s_ip_idx = cb->args[2];
1805 
1806 	if (cb->strict_check) {
1807 		err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1808 						 skb->sk, cb);
1809 		if (err < 0)
1810 			goto put_tgt_net;
1811 
1812 		err = 0;
1813 		if (fillargs.ifindex) {
1814 			dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
1815 			if (!dev) {
1816 				err = -ENODEV;
1817 				goto put_tgt_net;
1818 			}
1819 
1820 			in_dev = __in_dev_get_rtnl(dev);
1821 			if (in_dev) {
1822 				err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1823 						       &fillargs);
1824 			}
1825 			goto put_tgt_net;
1826 		}
1827 	}
1828 
1829 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1830 		idx = 0;
1831 		head = &tgt_net->dev_index_head[h];
1832 		rcu_read_lock();
1833 		cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
1834 			  tgt_net->dev_base_seq;
1835 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1836 			if (idx < s_idx)
1837 				goto cont;
1838 			if (h > s_h || idx > s_idx)
1839 				s_ip_idx = 0;
1840 			in_dev = __in_dev_get_rcu(dev);
1841 			if (!in_dev)
1842 				goto cont;
1843 
1844 			err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1845 					       &fillargs);
1846 			if (err < 0) {
1847 				rcu_read_unlock();
1848 				goto done;
1849 			}
1850 cont:
1851 			idx++;
1852 		}
1853 		rcu_read_unlock();
1854 	}
1855 
1856 done:
1857 	cb->args[0] = h;
1858 	cb->args[1] = idx;
1859 put_tgt_net:
1860 	if (fillargs.netnsid >= 0)
1861 		put_net(tgt_net);
1862 
1863 	return skb->len ? : err;
1864 }
1865 
1866 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1867 		      u32 portid)
1868 {
1869 	struct inet_fill_args fillargs = {
1870 		.portid = portid,
1871 		.seq = nlh ? nlh->nlmsg_seq : 0,
1872 		.event = event,
1873 		.flags = 0,
1874 		.netnsid = -1,
1875 	};
1876 	struct sk_buff *skb;
1877 	int err = -ENOBUFS;
1878 	struct net *net;
1879 
1880 	net = dev_net(ifa->ifa_dev->dev);
1881 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1882 	if (!skb)
1883 		goto errout;
1884 
1885 	err = inet_fill_ifaddr(skb, ifa, &fillargs);
1886 	if (err < 0) {
1887 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1888 		WARN_ON(err == -EMSGSIZE);
1889 		kfree_skb(skb);
1890 		goto errout;
1891 	}
1892 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1893 	return;
1894 errout:
1895 	if (err < 0)
1896 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1897 }
1898 
1899 static size_t inet_get_link_af_size(const struct net_device *dev,
1900 				    u32 ext_filter_mask)
1901 {
1902 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1903 
1904 	if (!in_dev)
1905 		return 0;
1906 
1907 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1908 }
1909 
1910 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1911 			     u32 ext_filter_mask)
1912 {
1913 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1914 	struct nlattr *nla;
1915 	int i;
1916 
1917 	if (!in_dev)
1918 		return -ENODATA;
1919 
1920 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1921 	if (!nla)
1922 		return -EMSGSIZE;
1923 
1924 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1925 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1926 
1927 	return 0;
1928 }
1929 
1930 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1931 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1932 };
1933 
1934 static int inet_validate_link_af(const struct net_device *dev,
1935 				 const struct nlattr *nla)
1936 {
1937 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1938 	int err, rem;
1939 
1940 	if (dev && !__in_dev_get_rcu(dev))
1941 		return -EAFNOSUPPORT;
1942 
1943 	err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
1944 					  inet_af_policy, NULL);
1945 	if (err < 0)
1946 		return err;
1947 
1948 	if (tb[IFLA_INET_CONF]) {
1949 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1950 			int cfgid = nla_type(a);
1951 
1952 			if (nla_len(a) < 4)
1953 				return -EINVAL;
1954 
1955 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1956 				return -EINVAL;
1957 		}
1958 	}
1959 
1960 	return 0;
1961 }
1962 
1963 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1964 {
1965 	struct in_device *in_dev = __in_dev_get_rcu(dev);
1966 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1967 	int rem;
1968 
1969 	if (!in_dev)
1970 		return -EAFNOSUPPORT;
1971 
1972 	if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1973 		BUG();
1974 
1975 	if (tb[IFLA_INET_CONF]) {
1976 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1977 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1978 	}
1979 
1980 	return 0;
1981 }
1982 
1983 static int inet_netconf_msgsize_devconf(int type)
1984 {
1985 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1986 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1987 	bool all = false;
1988 
1989 	if (type == NETCONFA_ALL)
1990 		all = true;
1991 
1992 	if (all || type == NETCONFA_FORWARDING)
1993 		size += nla_total_size(4);
1994 	if (all || type == NETCONFA_RP_FILTER)
1995 		size += nla_total_size(4);
1996 	if (all || type == NETCONFA_MC_FORWARDING)
1997 		size += nla_total_size(4);
1998 	if (all || type == NETCONFA_BC_FORWARDING)
1999 		size += nla_total_size(4);
2000 	if (all || type == NETCONFA_PROXY_NEIGH)
2001 		size += nla_total_size(4);
2002 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2003 		size += nla_total_size(4);
2004 
2005 	return size;
2006 }
2007 
2008 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2009 				     struct ipv4_devconf *devconf, u32 portid,
2010 				     u32 seq, int event, unsigned int flags,
2011 				     int type)
2012 {
2013 	struct nlmsghdr  *nlh;
2014 	struct netconfmsg *ncm;
2015 	bool all = false;
2016 
2017 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2018 			flags);
2019 	if (!nlh)
2020 		return -EMSGSIZE;
2021 
2022 	if (type == NETCONFA_ALL)
2023 		all = true;
2024 
2025 	ncm = nlmsg_data(nlh);
2026 	ncm->ncm_family = AF_INET;
2027 
2028 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2029 		goto nla_put_failure;
2030 
2031 	if (!devconf)
2032 		goto out;
2033 
2034 	if ((all || type == NETCONFA_FORWARDING) &&
2035 	    nla_put_s32(skb, NETCONFA_FORWARDING,
2036 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
2037 		goto nla_put_failure;
2038 	if ((all || type == NETCONFA_RP_FILTER) &&
2039 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
2040 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
2041 		goto nla_put_failure;
2042 	if ((all || type == NETCONFA_MC_FORWARDING) &&
2043 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2044 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
2045 		goto nla_put_failure;
2046 	if ((all || type == NETCONFA_BC_FORWARDING) &&
2047 	    nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2048 			IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
2049 		goto nla_put_failure;
2050 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
2051 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2052 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
2053 		goto nla_put_failure;
2054 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2055 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2056 			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2057 		goto nla_put_failure;
2058 
2059 out:
2060 	nlmsg_end(skb, nlh);
2061 	return 0;
2062 
2063 nla_put_failure:
2064 	nlmsg_cancel(skb, nlh);
2065 	return -EMSGSIZE;
2066 }
2067 
2068 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2069 				 int ifindex, struct ipv4_devconf *devconf)
2070 {
2071 	struct sk_buff *skb;
2072 	int err = -ENOBUFS;
2073 
2074 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2075 	if (!skb)
2076 		goto errout;
2077 
2078 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2079 					event, 0, type);
2080 	if (err < 0) {
2081 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2082 		WARN_ON(err == -EMSGSIZE);
2083 		kfree_skb(skb);
2084 		goto errout;
2085 	}
2086 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2087 	return;
2088 errout:
2089 	if (err < 0)
2090 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2091 }
2092 
2093 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2094 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
2095 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
2096 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
2097 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
2098 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
2099 };
2100 
2101 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2102 				      const struct nlmsghdr *nlh,
2103 				      struct nlattr **tb,
2104 				      struct netlink_ext_ack *extack)
2105 {
2106 	int i, err;
2107 
2108 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2109 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2110 		return -EINVAL;
2111 	}
2112 
2113 	if (!netlink_strict_get_check(skb))
2114 		return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2115 					      tb, NETCONFA_MAX,
2116 					      devconf_ipv4_policy, extack);
2117 
2118 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2119 					    tb, NETCONFA_MAX,
2120 					    devconf_ipv4_policy, extack);
2121 	if (err)
2122 		return err;
2123 
2124 	for (i = 0; i <= NETCONFA_MAX; i++) {
2125 		if (!tb[i])
2126 			continue;
2127 
2128 		switch (i) {
2129 		case NETCONFA_IFINDEX:
2130 			break;
2131 		default:
2132 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2133 			return -EINVAL;
2134 		}
2135 	}
2136 
2137 	return 0;
2138 }
2139 
2140 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2141 				    struct nlmsghdr *nlh,
2142 				    struct netlink_ext_ack *extack)
2143 {
2144 	struct net *net = sock_net(in_skb->sk);
2145 	struct nlattr *tb[NETCONFA_MAX+1];
2146 	struct sk_buff *skb;
2147 	struct ipv4_devconf *devconf;
2148 	struct in_device *in_dev;
2149 	struct net_device *dev;
2150 	int ifindex;
2151 	int err;
2152 
2153 	err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2154 	if (err)
2155 		goto errout;
2156 
2157 	err = -EINVAL;
2158 	if (!tb[NETCONFA_IFINDEX])
2159 		goto errout;
2160 
2161 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2162 	switch (ifindex) {
2163 	case NETCONFA_IFINDEX_ALL:
2164 		devconf = net->ipv4.devconf_all;
2165 		break;
2166 	case NETCONFA_IFINDEX_DEFAULT:
2167 		devconf = net->ipv4.devconf_dflt;
2168 		break;
2169 	default:
2170 		dev = __dev_get_by_index(net, ifindex);
2171 		if (!dev)
2172 			goto errout;
2173 		in_dev = __in_dev_get_rtnl(dev);
2174 		if (!in_dev)
2175 			goto errout;
2176 		devconf = &in_dev->cnf;
2177 		break;
2178 	}
2179 
2180 	err = -ENOBUFS;
2181 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2182 	if (!skb)
2183 		goto errout;
2184 
2185 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2186 					NETLINK_CB(in_skb).portid,
2187 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2188 					NETCONFA_ALL);
2189 	if (err < 0) {
2190 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2191 		WARN_ON(err == -EMSGSIZE);
2192 		kfree_skb(skb);
2193 		goto errout;
2194 	}
2195 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2196 errout:
2197 	return err;
2198 }
2199 
2200 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2201 				     struct netlink_callback *cb)
2202 {
2203 	const struct nlmsghdr *nlh = cb->nlh;
2204 	struct net *net = sock_net(skb->sk);
2205 	int h, s_h;
2206 	int idx, s_idx;
2207 	struct net_device *dev;
2208 	struct in_device *in_dev;
2209 	struct hlist_head *head;
2210 
2211 	if (cb->strict_check) {
2212 		struct netlink_ext_ack *extack = cb->extack;
2213 		struct netconfmsg *ncm;
2214 
2215 		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2216 			NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2217 			return -EINVAL;
2218 		}
2219 
2220 		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2221 			NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2222 			return -EINVAL;
2223 		}
2224 	}
2225 
2226 	s_h = cb->args[0];
2227 	s_idx = idx = cb->args[1];
2228 
2229 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2230 		idx = 0;
2231 		head = &net->dev_index_head[h];
2232 		rcu_read_lock();
2233 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2234 			  net->dev_base_seq;
2235 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
2236 			if (idx < s_idx)
2237 				goto cont;
2238 			in_dev = __in_dev_get_rcu(dev);
2239 			if (!in_dev)
2240 				goto cont;
2241 
2242 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
2243 						      &in_dev->cnf,
2244 						      NETLINK_CB(cb->skb).portid,
2245 						      nlh->nlmsg_seq,
2246 						      RTM_NEWNETCONF,
2247 						      NLM_F_MULTI,
2248 						      NETCONFA_ALL) < 0) {
2249 				rcu_read_unlock();
2250 				goto done;
2251 			}
2252 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2253 cont:
2254 			idx++;
2255 		}
2256 		rcu_read_unlock();
2257 	}
2258 	if (h == NETDEV_HASHENTRIES) {
2259 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2260 					      net->ipv4.devconf_all,
2261 					      NETLINK_CB(cb->skb).portid,
2262 					      nlh->nlmsg_seq,
2263 					      RTM_NEWNETCONF, NLM_F_MULTI,
2264 					      NETCONFA_ALL) < 0)
2265 			goto done;
2266 		else
2267 			h++;
2268 	}
2269 	if (h == NETDEV_HASHENTRIES + 1) {
2270 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2271 					      net->ipv4.devconf_dflt,
2272 					      NETLINK_CB(cb->skb).portid,
2273 					      nlh->nlmsg_seq,
2274 					      RTM_NEWNETCONF, NLM_F_MULTI,
2275 					      NETCONFA_ALL) < 0)
2276 			goto done;
2277 		else
2278 			h++;
2279 	}
2280 done:
2281 	cb->args[0] = h;
2282 	cb->args[1] = idx;
2283 
2284 	return skb->len;
2285 }
2286 
2287 #ifdef CONFIG_SYSCTL
2288 
2289 static void devinet_copy_dflt_conf(struct net *net, int i)
2290 {
2291 	struct net_device *dev;
2292 
2293 	rcu_read_lock();
2294 	for_each_netdev_rcu(net, dev) {
2295 		struct in_device *in_dev;
2296 
2297 		in_dev = __in_dev_get_rcu(dev);
2298 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2299 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2300 	}
2301 	rcu_read_unlock();
2302 }
2303 
2304 /* called with RTNL locked */
2305 static void inet_forward_change(struct net *net)
2306 {
2307 	struct net_device *dev;
2308 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2309 
2310 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2311 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2312 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2313 				    NETCONFA_FORWARDING,
2314 				    NETCONFA_IFINDEX_ALL,
2315 				    net->ipv4.devconf_all);
2316 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2317 				    NETCONFA_FORWARDING,
2318 				    NETCONFA_IFINDEX_DEFAULT,
2319 				    net->ipv4.devconf_dflt);
2320 
2321 	for_each_netdev(net, dev) {
2322 		struct in_device *in_dev;
2323 
2324 		if (on)
2325 			dev_disable_lro(dev);
2326 
2327 		in_dev = __in_dev_get_rtnl(dev);
2328 		if (in_dev) {
2329 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2330 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2331 						    NETCONFA_FORWARDING,
2332 						    dev->ifindex, &in_dev->cnf);
2333 		}
2334 	}
2335 }
2336 
2337 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2338 {
2339 	if (cnf == net->ipv4.devconf_dflt)
2340 		return NETCONFA_IFINDEX_DEFAULT;
2341 	else if (cnf == net->ipv4.devconf_all)
2342 		return NETCONFA_IFINDEX_ALL;
2343 	else {
2344 		struct in_device *idev
2345 			= container_of(cnf, struct in_device, cnf);
2346 		return idev->dev->ifindex;
2347 	}
2348 }
2349 
2350 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2351 			     void __user *buffer,
2352 			     size_t *lenp, loff_t *ppos)
2353 {
2354 	int old_value = *(int *)ctl->data;
2355 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2356 	int new_value = *(int *)ctl->data;
2357 
2358 	if (write) {
2359 		struct ipv4_devconf *cnf = ctl->extra1;
2360 		struct net *net = ctl->extra2;
2361 		int i = (int *)ctl->data - cnf->data;
2362 		int ifindex;
2363 
2364 		set_bit(i, cnf->state);
2365 
2366 		if (cnf == net->ipv4.devconf_dflt)
2367 			devinet_copy_dflt_conf(net, i);
2368 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2369 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2370 			if ((new_value == 0) && (old_value != 0))
2371 				rt_cache_flush(net);
2372 
2373 		if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2374 		    new_value != old_value)
2375 			rt_cache_flush(net);
2376 
2377 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2378 		    new_value != old_value) {
2379 			ifindex = devinet_conf_ifindex(net, cnf);
2380 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2381 						    NETCONFA_RP_FILTER,
2382 						    ifindex, cnf);
2383 		}
2384 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2385 		    new_value != old_value) {
2386 			ifindex = devinet_conf_ifindex(net, cnf);
2387 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2388 						    NETCONFA_PROXY_NEIGH,
2389 						    ifindex, cnf);
2390 		}
2391 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2392 		    new_value != old_value) {
2393 			ifindex = devinet_conf_ifindex(net, cnf);
2394 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2395 						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2396 						    ifindex, cnf);
2397 		}
2398 	}
2399 
2400 	return ret;
2401 }
2402 
2403 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2404 				  void __user *buffer,
2405 				  size_t *lenp, loff_t *ppos)
2406 {
2407 	int *valp = ctl->data;
2408 	int val = *valp;
2409 	loff_t pos = *ppos;
2410 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2411 
2412 	if (write && *valp != val) {
2413 		struct net *net = ctl->extra2;
2414 
2415 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2416 			if (!rtnl_trylock()) {
2417 				/* Restore the original values before restarting */
2418 				*valp = val;
2419 				*ppos = pos;
2420 				return restart_syscall();
2421 			}
2422 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2423 				inet_forward_change(net);
2424 			} else {
2425 				struct ipv4_devconf *cnf = ctl->extra1;
2426 				struct in_device *idev =
2427 					container_of(cnf, struct in_device, cnf);
2428 				if (*valp)
2429 					dev_disable_lro(idev->dev);
2430 				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2431 							    NETCONFA_FORWARDING,
2432 							    idev->dev->ifindex,
2433 							    cnf);
2434 			}
2435 			rtnl_unlock();
2436 			rt_cache_flush(net);
2437 		} else
2438 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2439 						    NETCONFA_FORWARDING,
2440 						    NETCONFA_IFINDEX_DEFAULT,
2441 						    net->ipv4.devconf_dflt);
2442 	}
2443 
2444 	return ret;
2445 }
2446 
2447 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2448 				void __user *buffer,
2449 				size_t *lenp, loff_t *ppos)
2450 {
2451 	int *valp = ctl->data;
2452 	int val = *valp;
2453 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2454 	struct net *net = ctl->extra2;
2455 
2456 	if (write && *valp != val)
2457 		rt_cache_flush(net);
2458 
2459 	return ret;
2460 }
2461 
2462 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2463 	{ \
2464 		.procname	= name, \
2465 		.data		= ipv4_devconf.data + \
2466 				  IPV4_DEVCONF_ ## attr - 1, \
2467 		.maxlen		= sizeof(int), \
2468 		.mode		= mval, \
2469 		.proc_handler	= proc, \
2470 		.extra1		= &ipv4_devconf, \
2471 	}
2472 
2473 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2474 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2475 
2476 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2477 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2478 
2479 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2480 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2481 
2482 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2483 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2484 
2485 static struct devinet_sysctl_table {
2486 	struct ctl_table_header *sysctl_header;
2487 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2488 } devinet_sysctl = {
2489 	.devinet_vars = {
2490 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2491 					     devinet_sysctl_forward),
2492 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2493 		DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2494 
2495 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2496 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2497 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2498 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2499 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2500 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2501 					"accept_source_route"),
2502 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2503 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2504 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2505 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2506 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2507 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2508 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2509 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2510 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2511 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2512 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2513 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2514 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2515 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2516 					"force_igmp_version"),
2517 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2518 					"igmpv2_unsolicited_report_interval"),
2519 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2520 					"igmpv3_unsolicited_report_interval"),
2521 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2522 					"ignore_routes_with_linkdown"),
2523 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2524 					"drop_gratuitous_arp"),
2525 
2526 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2527 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2528 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2529 					      "promote_secondaries"),
2530 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2531 					      "route_localnet"),
2532 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2533 					      "drop_unicast_in_l2_multicast"),
2534 	},
2535 };
2536 
2537 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2538 				     int ifindex, struct ipv4_devconf *p)
2539 {
2540 	int i;
2541 	struct devinet_sysctl_table *t;
2542 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2543 
2544 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2545 	if (!t)
2546 		goto out;
2547 
2548 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2549 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2550 		t->devinet_vars[i].extra1 = p;
2551 		t->devinet_vars[i].extra2 = net;
2552 	}
2553 
2554 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2555 
2556 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2557 	if (!t->sysctl_header)
2558 		goto free;
2559 
2560 	p->sysctl = t;
2561 
2562 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2563 				    ifindex, p);
2564 	return 0;
2565 
2566 free:
2567 	kfree(t);
2568 out:
2569 	return -ENOBUFS;
2570 }
2571 
2572 static void __devinet_sysctl_unregister(struct net *net,
2573 					struct ipv4_devconf *cnf, int ifindex)
2574 {
2575 	struct devinet_sysctl_table *t = cnf->sysctl;
2576 
2577 	if (t) {
2578 		cnf->sysctl = NULL;
2579 		unregister_net_sysctl_table(t->sysctl_header);
2580 		kfree(t);
2581 	}
2582 
2583 	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2584 }
2585 
2586 static int devinet_sysctl_register(struct in_device *idev)
2587 {
2588 	int err;
2589 
2590 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2591 		return -EINVAL;
2592 
2593 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2594 	if (err)
2595 		return err;
2596 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2597 					idev->dev->ifindex, &idev->cnf);
2598 	if (err)
2599 		neigh_sysctl_unregister(idev->arp_parms);
2600 	return err;
2601 }
2602 
2603 static void devinet_sysctl_unregister(struct in_device *idev)
2604 {
2605 	struct net *net = dev_net(idev->dev);
2606 
2607 	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2608 	neigh_sysctl_unregister(idev->arp_parms);
2609 }
2610 
2611 static struct ctl_table ctl_forward_entry[] = {
2612 	{
2613 		.procname	= "ip_forward",
2614 		.data		= &ipv4_devconf.data[
2615 					IPV4_DEVCONF_FORWARDING - 1],
2616 		.maxlen		= sizeof(int),
2617 		.mode		= 0644,
2618 		.proc_handler	= devinet_sysctl_forward,
2619 		.extra1		= &ipv4_devconf,
2620 		.extra2		= &init_net,
2621 	},
2622 	{ },
2623 };
2624 #endif
2625 
2626 static __net_init int devinet_init_net(struct net *net)
2627 {
2628 	int err;
2629 	struct ipv4_devconf *all, *dflt;
2630 #ifdef CONFIG_SYSCTL
2631 	struct ctl_table *tbl;
2632 	struct ctl_table_header *forw_hdr;
2633 #endif
2634 
2635 	err = -ENOMEM;
2636 	all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2637 	if (!all)
2638 		goto err_alloc_all;
2639 
2640 	dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2641 	if (!dflt)
2642 		goto err_alloc_dflt;
2643 
2644 #ifdef CONFIG_SYSCTL
2645 	tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2646 	if (!tbl)
2647 		goto err_alloc_ctl;
2648 
2649 	tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2650 	tbl[0].extra1 = all;
2651 	tbl[0].extra2 = net;
2652 #endif
2653 
2654 	if ((!IS_ENABLED(CONFIG_SYSCTL) ||
2655 	     sysctl_devconf_inherit_init_net != 2) &&
2656 	    !net_eq(net, &init_net)) {
2657 		memcpy(all, init_net.ipv4.devconf_all, sizeof(ipv4_devconf));
2658 		memcpy(dflt, init_net.ipv4.devconf_dflt, sizeof(ipv4_devconf_dflt));
2659 	}
2660 
2661 #ifdef CONFIG_SYSCTL
2662 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2663 	if (err < 0)
2664 		goto err_reg_all;
2665 
2666 	err = __devinet_sysctl_register(net, "default",
2667 					NETCONFA_IFINDEX_DEFAULT, dflt);
2668 	if (err < 0)
2669 		goto err_reg_dflt;
2670 
2671 	err = -ENOMEM;
2672 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2673 	if (!forw_hdr)
2674 		goto err_reg_ctl;
2675 	net->ipv4.forw_hdr = forw_hdr;
2676 #endif
2677 
2678 	net->ipv4.devconf_all = all;
2679 	net->ipv4.devconf_dflt = dflt;
2680 	return 0;
2681 
2682 #ifdef CONFIG_SYSCTL
2683 err_reg_ctl:
2684 	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2685 err_reg_dflt:
2686 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2687 err_reg_all:
2688 	kfree(tbl);
2689 err_alloc_ctl:
2690 #endif
2691 	kfree(dflt);
2692 err_alloc_dflt:
2693 	kfree(all);
2694 err_alloc_all:
2695 	return err;
2696 }
2697 
2698 static __net_exit void devinet_exit_net(struct net *net)
2699 {
2700 #ifdef CONFIG_SYSCTL
2701 	struct ctl_table *tbl;
2702 
2703 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2704 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2705 	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2706 				    NETCONFA_IFINDEX_DEFAULT);
2707 	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2708 				    NETCONFA_IFINDEX_ALL);
2709 	kfree(tbl);
2710 #endif
2711 	kfree(net->ipv4.devconf_dflt);
2712 	kfree(net->ipv4.devconf_all);
2713 }
2714 
2715 static __net_initdata struct pernet_operations devinet_ops = {
2716 	.init = devinet_init_net,
2717 	.exit = devinet_exit_net,
2718 };
2719 
2720 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2721 	.family		  = AF_INET,
2722 	.fill_link_af	  = inet_fill_link_af,
2723 	.get_link_af_size = inet_get_link_af_size,
2724 	.validate_link_af = inet_validate_link_af,
2725 	.set_link_af	  = inet_set_link_af,
2726 };
2727 
2728 void __init devinet_init(void)
2729 {
2730 	int i;
2731 
2732 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2733 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2734 
2735 	register_pernet_subsys(&devinet_ops);
2736 
2737 	register_gifconf(PF_INET, inet_gifconf);
2738 	register_netdevice_notifier(&ip_netdev_notifier);
2739 
2740 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2741 
2742 	rtnl_af_register(&inet_af_ops);
2743 
2744 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2745 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2746 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2747 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2748 		      inet_netconf_dump_devconf, 0);
2749 }
2750