xref: /linux/net/ipv4/devinet.c (revision f72aa1b276281b4e4f75261af8425bc99d903f3e)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	NET3	IP device support routines.
4  *
5  *	Derived from the IP parts of dev.c 1.0.19
6  * 		Authors:	Ross Biro
7  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
9  *
10  *	Additional Authors:
11  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
12  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13  *
14  *	Changes:
15  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
16  *					lists.
17  *		Cyrus Durgin:		updated for kmod
18  *		Matthias Andree:	in devinet_ioctl, compare label and
19  *					address (4.4BSD alias style support),
20  *					fall back to comparing just the label
21  *					if no match found.
22  */
23 
24 
25 #include <linux/uaccess.h>
26 #include <linux/bitops.h>
27 #include <linux/capability.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/mm.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/in.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/if_addr.h>
40 #include <linux/if_ether.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/etherdevice.h>
44 #include <linux/skbuff.h>
45 #include <linux/init.h>
46 #include <linux/notifier.h>
47 #include <linux/inetdevice.h>
48 #include <linux/igmp.h>
49 #include <linux/slab.h>
50 #include <linux/hash.h>
51 #ifdef CONFIG_SYSCTL
52 #include <linux/sysctl.h>
53 #endif
54 #include <linux/kmod.h>
55 #include <linux/netconf.h>
56 
57 #include <net/arp.h>
58 #include <net/ip.h>
59 #include <net/route.h>
60 #include <net/ip_fib.h>
61 #include <net/rtnetlink.h>
62 #include <net/net_namespace.h>
63 #include <net/addrconf.h>
64 
65 #define IPV6ONLY_FLAGS	\
66 		(IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
67 		 IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
68 		 IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
69 
70 static struct ipv4_devconf ipv4_devconf = {
71 	.data = {
72 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78 		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
79 	},
80 };
81 
82 static struct ipv4_devconf ipv4_devconf_dflt = {
83 	.data = {
84 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
86 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
87 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
88 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
90 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
91 		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
92 	},
93 };
94 
95 #define IPV4_DEVCONF_DFLT(net, attr) \
96 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
97 
98 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
99 	[IFA_LOCAL]     	= { .type = NLA_U32 },
100 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
101 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
102 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
103 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
104 	[IFA_FLAGS]		= { .type = NLA_U32 },
105 	[IFA_RT_PRIORITY]	= { .type = NLA_U32 },
106 	[IFA_TARGET_NETNSID]	= { .type = NLA_S32 },
107 	[IFA_PROTO]		= { .type = NLA_U8 },
108 };
109 
110 struct inet_fill_args {
111 	u32 portid;
112 	u32 seq;
113 	int event;
114 	unsigned int flags;
115 	int netnsid;
116 	int ifindex;
117 };
118 
119 #define IN4_ADDR_HSIZE_SHIFT	8
120 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
121 
122 static u32 inet_addr_hash(const struct net *net, __be32 addr)
123 {
124 	u32 val = __ipv4_addr_hash(addr, net_hash_mix(net));
125 
126 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
127 }
128 
129 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
130 {
131 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
132 
133 	ASSERT_RTNL();
134 	hlist_add_head_rcu(&ifa->addr_lst, &net->ipv4.inet_addr_lst[hash]);
135 }
136 
137 static void inet_hash_remove(struct in_ifaddr *ifa)
138 {
139 	ASSERT_RTNL();
140 	hlist_del_init_rcu(&ifa->addr_lst);
141 }
142 
143 /**
144  * __ip_dev_find - find the first device with a given source address.
145  * @net: the net namespace
146  * @addr: the source address
147  * @devref: if true, take a reference on the found device
148  *
149  * If a caller uses devref=false, it should be protected by RCU, or RTNL
150  */
151 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
152 {
153 	struct net_device *result = NULL;
154 	struct in_ifaddr *ifa;
155 
156 	rcu_read_lock();
157 	ifa = inet_lookup_ifaddr_rcu(net, addr);
158 	if (!ifa) {
159 		struct flowi4 fl4 = { .daddr = addr };
160 		struct fib_result res = { 0 };
161 		struct fib_table *local;
162 
163 		/* Fallback to FIB local table so that communication
164 		 * over loopback subnets work.
165 		 */
166 		local = fib_get_table(net, RT_TABLE_LOCAL);
167 		if (local &&
168 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
169 		    res.type == RTN_LOCAL)
170 			result = FIB_RES_DEV(res);
171 	} else {
172 		result = ifa->ifa_dev->dev;
173 	}
174 	if (result && devref)
175 		dev_hold(result);
176 	rcu_read_unlock();
177 	return result;
178 }
179 EXPORT_SYMBOL(__ip_dev_find);
180 
181 /* called under RCU lock */
182 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
183 {
184 	u32 hash = inet_addr_hash(net, addr);
185 	struct in_ifaddr *ifa;
186 
187 	hlist_for_each_entry_rcu(ifa, &net->ipv4.inet_addr_lst[hash], addr_lst)
188 		if (ifa->ifa_local == addr)
189 			return ifa;
190 
191 	return NULL;
192 }
193 
194 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
195 
196 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
197 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
198 static void inet_del_ifa(struct in_device *in_dev,
199 			 struct in_ifaddr __rcu **ifap,
200 			 int destroy);
201 #ifdef CONFIG_SYSCTL
202 static int devinet_sysctl_register(struct in_device *idev);
203 static void devinet_sysctl_unregister(struct in_device *idev);
204 #else
205 static int devinet_sysctl_register(struct in_device *idev)
206 {
207 	return 0;
208 }
209 static void devinet_sysctl_unregister(struct in_device *idev)
210 {
211 }
212 #endif
213 
214 /* Locks all the inet devices. */
215 
216 static struct in_ifaddr *inet_alloc_ifa(struct in_device *in_dev)
217 {
218 	struct in_ifaddr *ifa;
219 
220 	ifa = kzalloc(sizeof(*ifa), GFP_KERNEL_ACCOUNT);
221 	if (!ifa)
222 		return NULL;
223 
224 	in_dev_hold(in_dev);
225 	ifa->ifa_dev = in_dev;
226 
227 	INIT_HLIST_NODE(&ifa->addr_lst);
228 
229 	return ifa;
230 }
231 
232 static void inet_rcu_free_ifa(struct rcu_head *head)
233 {
234 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
235 
236 	in_dev_put(ifa->ifa_dev);
237 	kfree(ifa);
238 }
239 
240 static void inet_free_ifa(struct in_ifaddr *ifa)
241 {
242 	/* Our reference to ifa->ifa_dev must be freed ASAP
243 	 * to release the reference to the netdev the same way.
244 	 * in_dev_put() -> in_dev_finish_destroy() -> netdev_put()
245 	 */
246 	call_rcu_hurry(&ifa->rcu_head, inet_rcu_free_ifa);
247 }
248 
249 static void in_dev_free_rcu(struct rcu_head *head)
250 {
251 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
252 
253 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
254 	kfree(idev);
255 }
256 
257 void in_dev_finish_destroy(struct in_device *idev)
258 {
259 	struct net_device *dev = idev->dev;
260 
261 	WARN_ON(idev->ifa_list);
262 	WARN_ON(idev->mc_list);
263 #ifdef NET_REFCNT_DEBUG
264 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
265 #endif
266 	netdev_put(dev, &idev->dev_tracker);
267 	if (!idev->dead)
268 		pr_err("Freeing alive in_device %p\n", idev);
269 	else
270 		call_rcu(&idev->rcu_head, in_dev_free_rcu);
271 }
272 EXPORT_SYMBOL(in_dev_finish_destroy);
273 
274 static struct in_device *inetdev_init(struct net_device *dev)
275 {
276 	struct in_device *in_dev;
277 	int err = -ENOMEM;
278 
279 	ASSERT_RTNL();
280 
281 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
282 	if (!in_dev)
283 		goto out;
284 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
285 			sizeof(in_dev->cnf));
286 	in_dev->cnf.sysctl = NULL;
287 	in_dev->dev = dev;
288 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
289 	if (!in_dev->arp_parms)
290 		goto out_kfree;
291 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
292 		dev_disable_lro(dev);
293 	/* Reference in_dev->dev */
294 	netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
295 	/* Account for reference dev->ip_ptr (below) */
296 	refcount_set(&in_dev->refcnt, 1);
297 
298 	if (dev != blackhole_netdev) {
299 		err = devinet_sysctl_register(in_dev);
300 		if (err) {
301 			in_dev->dead = 1;
302 			neigh_parms_release(&arp_tbl, in_dev->arp_parms);
303 			in_dev_put(in_dev);
304 			in_dev = NULL;
305 			goto out;
306 		}
307 		ip_mc_init_dev(in_dev);
308 		if (dev->flags & IFF_UP)
309 			ip_mc_up(in_dev);
310 	}
311 
312 	/* we can receive as soon as ip_ptr is set -- do this last */
313 	rcu_assign_pointer(dev->ip_ptr, in_dev);
314 out:
315 	return in_dev ?: ERR_PTR(err);
316 out_kfree:
317 	kfree(in_dev);
318 	in_dev = NULL;
319 	goto out;
320 }
321 
322 static void inetdev_destroy(struct in_device *in_dev)
323 {
324 	struct net_device *dev;
325 	struct in_ifaddr *ifa;
326 
327 	ASSERT_RTNL();
328 
329 	dev = in_dev->dev;
330 
331 	in_dev->dead = 1;
332 
333 	ip_mc_destroy_dev(in_dev);
334 
335 	while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
336 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
337 		inet_free_ifa(ifa);
338 	}
339 
340 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
341 
342 	devinet_sysctl_unregister(in_dev);
343 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
344 	arp_ifdown(dev);
345 
346 	in_dev_put(in_dev);
347 }
348 
349 static int __init inet_blackhole_dev_init(void)
350 {
351 	int err = 0;
352 
353 	rtnl_lock();
354 	if (!inetdev_init(blackhole_netdev))
355 		err = -ENOMEM;
356 	rtnl_unlock();
357 
358 	return err;
359 }
360 late_initcall(inet_blackhole_dev_init);
361 
362 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
363 {
364 	const struct in_ifaddr *ifa;
365 
366 	rcu_read_lock();
367 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
368 		if (inet_ifa_match(a, ifa)) {
369 			if (!b || inet_ifa_match(b, ifa)) {
370 				rcu_read_unlock();
371 				return 1;
372 			}
373 		}
374 	}
375 	rcu_read_unlock();
376 	return 0;
377 }
378 
379 static void __inet_del_ifa(struct in_device *in_dev,
380 			   struct in_ifaddr __rcu **ifap,
381 			   int destroy, struct nlmsghdr *nlh, u32 portid)
382 {
383 	struct in_ifaddr *promote = NULL;
384 	struct in_ifaddr *ifa, *ifa1;
385 	struct in_ifaddr __rcu **last_prim;
386 	struct in_ifaddr *prev_prom = NULL;
387 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
388 
389 	ASSERT_RTNL();
390 
391 	ifa1 = rtnl_dereference(*ifap);
392 	last_prim = ifap;
393 	if (in_dev->dead)
394 		goto no_promotions;
395 
396 	/* 1. Deleting primary ifaddr forces deletion all secondaries
397 	 * unless alias promotion is set
398 	 **/
399 
400 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
401 		struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
402 
403 		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
404 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
405 			    ifa1->ifa_scope <= ifa->ifa_scope)
406 				last_prim = &ifa->ifa_next;
407 
408 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
409 			    ifa1->ifa_mask != ifa->ifa_mask ||
410 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
411 				ifap1 = &ifa->ifa_next;
412 				prev_prom = ifa;
413 				continue;
414 			}
415 
416 			if (!do_promote) {
417 				inet_hash_remove(ifa);
418 				*ifap1 = ifa->ifa_next;
419 
420 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
421 				blocking_notifier_call_chain(&inetaddr_chain,
422 						NETDEV_DOWN, ifa);
423 				inet_free_ifa(ifa);
424 			} else {
425 				promote = ifa;
426 				break;
427 			}
428 		}
429 	}
430 
431 	/* On promotion all secondaries from subnet are changing
432 	 * the primary IP, we must remove all their routes silently
433 	 * and later to add them back with new prefsrc. Do this
434 	 * while all addresses are on the device list.
435 	 */
436 	for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
437 		if (ifa1->ifa_mask == ifa->ifa_mask &&
438 		    inet_ifa_match(ifa1->ifa_address, ifa))
439 			fib_del_ifaddr(ifa, ifa1);
440 	}
441 
442 no_promotions:
443 	/* 2. Unlink it */
444 
445 	*ifap = ifa1->ifa_next;
446 	inet_hash_remove(ifa1);
447 
448 	/* 3. Announce address deletion */
449 
450 	/* Send message first, then call notifier.
451 	   At first sight, FIB update triggered by notifier
452 	   will refer to already deleted ifaddr, that could confuse
453 	   netlink listeners. It is not true: look, gated sees
454 	   that route deleted and if it still thinks that ifaddr
455 	   is valid, it will try to restore deleted routes... Grr.
456 	   So that, this order is correct.
457 	 */
458 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
459 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
460 
461 	if (promote) {
462 		struct in_ifaddr *next_sec;
463 
464 		next_sec = rtnl_dereference(promote->ifa_next);
465 		if (prev_prom) {
466 			struct in_ifaddr *last_sec;
467 
468 			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
469 
470 			last_sec = rtnl_dereference(*last_prim);
471 			rcu_assign_pointer(promote->ifa_next, last_sec);
472 			rcu_assign_pointer(*last_prim, promote);
473 		}
474 
475 		promote->ifa_flags &= ~IFA_F_SECONDARY;
476 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
477 		blocking_notifier_call_chain(&inetaddr_chain,
478 				NETDEV_UP, promote);
479 		for (ifa = next_sec; ifa;
480 		     ifa = rtnl_dereference(ifa->ifa_next)) {
481 			if (ifa1->ifa_mask != ifa->ifa_mask ||
482 			    !inet_ifa_match(ifa1->ifa_address, ifa))
483 					continue;
484 			fib_add_ifaddr(ifa);
485 		}
486 
487 	}
488 	if (destroy)
489 		inet_free_ifa(ifa1);
490 }
491 
492 static void inet_del_ifa(struct in_device *in_dev,
493 			 struct in_ifaddr __rcu **ifap,
494 			 int destroy)
495 {
496 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
497 }
498 
499 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
500 			     u32 portid, struct netlink_ext_ack *extack)
501 {
502 	struct in_ifaddr __rcu **last_primary, **ifap;
503 	struct in_device *in_dev = ifa->ifa_dev;
504 	struct net *net = dev_net(in_dev->dev);
505 	struct in_validator_info ivi;
506 	struct in_ifaddr *ifa1;
507 	int ret;
508 
509 	ASSERT_RTNL();
510 
511 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
512 	last_primary = &in_dev->ifa_list;
513 
514 	/* Don't set IPv6 only flags to IPv4 addresses */
515 	ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
516 
517 	ifap = &in_dev->ifa_list;
518 	ifa1 = rtnl_dereference(*ifap);
519 
520 	while (ifa1) {
521 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
522 		    ifa->ifa_scope <= ifa1->ifa_scope)
523 			last_primary = &ifa1->ifa_next;
524 		if (ifa1->ifa_mask == ifa->ifa_mask &&
525 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
526 			if (ifa1->ifa_local == ifa->ifa_local) {
527 				inet_free_ifa(ifa);
528 				return -EEXIST;
529 			}
530 			if (ifa1->ifa_scope != ifa->ifa_scope) {
531 				NL_SET_ERR_MSG(extack, "ipv4: Invalid scope value");
532 				inet_free_ifa(ifa);
533 				return -EINVAL;
534 			}
535 			ifa->ifa_flags |= IFA_F_SECONDARY;
536 		}
537 
538 		ifap = &ifa1->ifa_next;
539 		ifa1 = rtnl_dereference(*ifap);
540 	}
541 
542 	/* Allow any devices that wish to register ifaddr validtors to weigh
543 	 * in now, before changes are committed.  The rntl lock is serializing
544 	 * access here, so the state should not change between a validator call
545 	 * and a final notify on commit.  This isn't invoked on promotion under
546 	 * the assumption that validators are checking the address itself, and
547 	 * not the flags.
548 	 */
549 	ivi.ivi_addr = ifa->ifa_address;
550 	ivi.ivi_dev = ifa->ifa_dev;
551 	ivi.extack = extack;
552 	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
553 					   NETDEV_UP, &ivi);
554 	ret = notifier_to_errno(ret);
555 	if (ret) {
556 		inet_free_ifa(ifa);
557 		return ret;
558 	}
559 
560 	if (!(ifa->ifa_flags & IFA_F_SECONDARY))
561 		ifap = last_primary;
562 
563 	rcu_assign_pointer(ifa->ifa_next, *ifap);
564 	rcu_assign_pointer(*ifap, ifa);
565 
566 	inet_hash_insert(dev_net(in_dev->dev), ifa);
567 
568 	cancel_delayed_work(&net->ipv4.addr_chk_work);
569 	queue_delayed_work(system_power_efficient_wq, &net->ipv4.addr_chk_work, 0);
570 
571 	/* Send message first, then call notifier.
572 	   Notifier will trigger FIB update, so that
573 	   listeners of netlink will know about new ifaddr */
574 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
575 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
576 
577 	return 0;
578 }
579 
580 static int inet_insert_ifa(struct in_ifaddr *ifa)
581 {
582 	if (!ifa->ifa_local) {
583 		inet_free_ifa(ifa);
584 		return 0;
585 	}
586 
587 	return __inet_insert_ifa(ifa, NULL, 0, NULL);
588 }
589 
590 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
591 {
592 	struct in_device *in_dev = __in_dev_get_rtnl_net(dev);
593 
594 	ipv4_devconf_setall(in_dev);
595 	neigh_parms_data_state_setall(in_dev->arp_parms);
596 
597 	if (ipv4_is_loopback(ifa->ifa_local))
598 		ifa->ifa_scope = RT_SCOPE_HOST;
599 	return inet_insert_ifa(ifa);
600 }
601 
602 /* Caller must hold RCU or RTNL :
603  * We dont take a reference on found in_device
604  */
605 struct in_device *inetdev_by_index(struct net *net, int ifindex)
606 {
607 	struct net_device *dev;
608 	struct in_device *in_dev = NULL;
609 
610 	rcu_read_lock();
611 	dev = dev_get_by_index_rcu(net, ifindex);
612 	if (dev)
613 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
614 	rcu_read_unlock();
615 	return in_dev;
616 }
617 EXPORT_SYMBOL(inetdev_by_index);
618 
619 /* Called only from RTNL semaphored context. No locks. */
620 
621 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
622 				    __be32 mask)
623 {
624 	struct in_ifaddr *ifa;
625 
626 	ASSERT_RTNL();
627 
628 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
629 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
630 			return ifa;
631 	}
632 	return NULL;
633 }
634 
635 static int ip_mc_autojoin_config(struct net *net, bool join,
636 				 const struct in_ifaddr *ifa)
637 {
638 #if defined(CONFIG_IP_MULTICAST)
639 	struct ip_mreqn mreq = {
640 		.imr_multiaddr.s_addr = ifa->ifa_address,
641 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
642 	};
643 	struct sock *sk = net->ipv4.mc_autojoin_sk;
644 	int ret;
645 
646 	ASSERT_RTNL_NET(net);
647 
648 	lock_sock(sk);
649 	if (join)
650 		ret = ip_mc_join_group(sk, &mreq);
651 	else
652 		ret = ip_mc_leave_group(sk, &mreq);
653 	release_sock(sk);
654 
655 	return ret;
656 #else
657 	return -EOPNOTSUPP;
658 #endif
659 }
660 
661 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
662 			    struct netlink_ext_ack *extack)
663 {
664 	struct net *net = sock_net(skb->sk);
665 	struct in_ifaddr __rcu **ifap;
666 	struct nlattr *tb[IFA_MAX+1];
667 	struct in_device *in_dev;
668 	struct ifaddrmsg *ifm;
669 	struct in_ifaddr *ifa;
670 	int err;
671 
672 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
673 				     ifa_ipv4_policy, extack);
674 	if (err < 0)
675 		goto out;
676 
677 	ifm = nlmsg_data(nlh);
678 
679 	rtnl_net_lock(net);
680 
681 	in_dev = inetdev_by_index(net, ifm->ifa_index);
682 	if (!in_dev) {
683 		NL_SET_ERR_MSG(extack, "ipv4: Device not found");
684 		err = -ENODEV;
685 		goto unlock;
686 	}
687 
688 	for (ifap = &in_dev->ifa_list;
689 	     (ifa = rtnl_net_dereference(net, *ifap)) != NULL;
690 	     ifap = &ifa->ifa_next) {
691 		if (tb[IFA_LOCAL] &&
692 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
693 			continue;
694 
695 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
696 			continue;
697 
698 		if (tb[IFA_ADDRESS] &&
699 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
700 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
701 			continue;
702 
703 		if (ipv4_is_multicast(ifa->ifa_address))
704 			ip_mc_autojoin_config(net, false, ifa);
705 
706 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
707 		goto unlock;
708 	}
709 
710 	NL_SET_ERR_MSG(extack, "ipv4: Address not found");
711 	err = -EADDRNOTAVAIL;
712 unlock:
713 	rtnl_net_unlock(net);
714 out:
715 	return err;
716 }
717 
718 static void check_lifetime(struct work_struct *work)
719 {
720 	unsigned long now, next, next_sec, next_sched;
721 	struct in_ifaddr *ifa;
722 	struct hlist_node *n;
723 	struct net *net;
724 	int i;
725 
726 	net = container_of(to_delayed_work(work), struct net, ipv4.addr_chk_work);
727 	now = jiffies;
728 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
729 
730 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
731 		struct hlist_head *head = &net->ipv4.inet_addr_lst[i];
732 		bool change_needed = false;
733 
734 		rcu_read_lock();
735 		hlist_for_each_entry_rcu(ifa, head, addr_lst) {
736 			unsigned long age, tstamp;
737 			u32 preferred_lft;
738 			u32 valid_lft;
739 			u32 flags;
740 
741 			flags = READ_ONCE(ifa->ifa_flags);
742 			if (flags & IFA_F_PERMANENT)
743 				continue;
744 
745 			preferred_lft = READ_ONCE(ifa->ifa_preferred_lft);
746 			valid_lft = READ_ONCE(ifa->ifa_valid_lft);
747 			tstamp = READ_ONCE(ifa->ifa_tstamp);
748 			/* We try to batch several events at once. */
749 			age = (now - tstamp +
750 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
751 
752 			if (valid_lft != INFINITY_LIFE_TIME &&
753 			    age >= valid_lft) {
754 				change_needed = true;
755 			} else if (preferred_lft ==
756 				   INFINITY_LIFE_TIME) {
757 				continue;
758 			} else if (age >= preferred_lft) {
759 				if (time_before(tstamp + valid_lft * HZ, next))
760 					next = tstamp + valid_lft * HZ;
761 
762 				if (!(flags & IFA_F_DEPRECATED))
763 					change_needed = true;
764 			} else if (time_before(tstamp + preferred_lft * HZ,
765 					       next)) {
766 				next = tstamp + preferred_lft * HZ;
767 			}
768 		}
769 		rcu_read_unlock();
770 		if (!change_needed)
771 			continue;
772 
773 		rtnl_net_lock(net);
774 		hlist_for_each_entry_safe(ifa, n, head, addr_lst) {
775 			unsigned long age;
776 
777 			if (ifa->ifa_flags & IFA_F_PERMANENT)
778 				continue;
779 
780 			/* We try to batch several events at once. */
781 			age = (now - ifa->ifa_tstamp +
782 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
783 
784 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
785 			    age >= ifa->ifa_valid_lft) {
786 				struct in_ifaddr __rcu **ifap;
787 				struct in_ifaddr *tmp;
788 
789 				ifap = &ifa->ifa_dev->ifa_list;
790 				tmp = rtnl_net_dereference(net, *ifap);
791 				while (tmp) {
792 					if (tmp == ifa) {
793 						inet_del_ifa(ifa->ifa_dev,
794 							     ifap, 1);
795 						break;
796 					}
797 					ifap = &tmp->ifa_next;
798 					tmp = rtnl_net_dereference(net, *ifap);
799 				}
800 			} else if (ifa->ifa_preferred_lft !=
801 				   INFINITY_LIFE_TIME &&
802 				   age >= ifa->ifa_preferred_lft &&
803 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
804 				ifa->ifa_flags |= IFA_F_DEPRECATED;
805 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
806 			}
807 		}
808 		rtnl_net_unlock(net);
809 	}
810 
811 	next_sec = round_jiffies_up(next);
812 	next_sched = next;
813 
814 	/* If rounded timeout is accurate enough, accept it. */
815 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
816 		next_sched = next_sec;
817 
818 	now = jiffies;
819 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
820 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
821 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
822 
823 	queue_delayed_work(system_power_efficient_wq, &net->ipv4.addr_chk_work,
824 			   next_sched - now);
825 }
826 
827 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
828 			     __u32 prefered_lft)
829 {
830 	unsigned long timeout;
831 	u32 flags;
832 
833 	flags = ifa->ifa_flags & ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
834 
835 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
836 	if (addrconf_finite_timeout(timeout))
837 		WRITE_ONCE(ifa->ifa_valid_lft, timeout);
838 	else
839 		flags |= IFA_F_PERMANENT;
840 
841 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
842 	if (addrconf_finite_timeout(timeout)) {
843 		if (timeout == 0)
844 			flags |= IFA_F_DEPRECATED;
845 		WRITE_ONCE(ifa->ifa_preferred_lft, timeout);
846 	}
847 	WRITE_ONCE(ifa->ifa_flags, flags);
848 	WRITE_ONCE(ifa->ifa_tstamp, jiffies);
849 	if (!ifa->ifa_cstamp)
850 		WRITE_ONCE(ifa->ifa_cstamp, ifa->ifa_tstamp);
851 }
852 
853 static int inet_validate_rtm(struct nlmsghdr *nlh, struct nlattr **tb,
854 			     struct netlink_ext_ack *extack,
855 			     __u32 *valid_lft, __u32 *prefered_lft)
856 {
857 	struct ifaddrmsg *ifm = nlmsg_data(nlh);
858 	int err;
859 
860 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
861 				     ifa_ipv4_policy, extack);
862 	if (err < 0)
863 		return err;
864 
865 	if (ifm->ifa_prefixlen > 32) {
866 		NL_SET_ERR_MSG(extack, "ipv4: Invalid prefix length");
867 		return -EINVAL;
868 	}
869 
870 	if (!tb[IFA_LOCAL]) {
871 		NL_SET_ERR_MSG(extack, "ipv4: Local address is not supplied");
872 		return -EINVAL;
873 	}
874 
875 	if (tb[IFA_CACHEINFO]) {
876 		struct ifa_cacheinfo *ci;
877 
878 		ci = nla_data(tb[IFA_CACHEINFO]);
879 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
880 			NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid");
881 			return -EINVAL;
882 		}
883 
884 		*valid_lft = ci->ifa_valid;
885 		*prefered_lft = ci->ifa_prefered;
886 	}
887 
888 	return 0;
889 }
890 
891 static struct in_ifaddr *inet_rtm_to_ifa(struct net *net, struct nlmsghdr *nlh,
892 					 struct nlattr **tb,
893 					 struct netlink_ext_ack *extack)
894 {
895 	struct ifaddrmsg *ifm = nlmsg_data(nlh);
896 	struct in_device *in_dev;
897 	struct net_device *dev;
898 	struct in_ifaddr *ifa;
899 	int err;
900 
901 	dev = __dev_get_by_index(net, ifm->ifa_index);
902 	err = -ENODEV;
903 	if (!dev) {
904 		NL_SET_ERR_MSG(extack, "ipv4: Device not found");
905 		goto errout;
906 	}
907 
908 	in_dev = __in_dev_get_rtnl_net(dev);
909 	err = -ENOBUFS;
910 	if (!in_dev)
911 		goto errout;
912 
913 	ifa = inet_alloc_ifa(in_dev);
914 	if (!ifa)
915 		/*
916 		 * A potential indev allocation can be left alive, it stays
917 		 * assigned to its device and is destroy with it.
918 		 */
919 		goto errout;
920 
921 	ipv4_devconf_setall(in_dev);
922 	neigh_parms_data_state_setall(in_dev->arp_parms);
923 
924 	if (!tb[IFA_ADDRESS])
925 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
926 
927 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
928 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
929 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
930 					 ifm->ifa_flags;
931 	ifa->ifa_scope = ifm->ifa_scope;
932 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
933 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
934 
935 	if (tb[IFA_BROADCAST])
936 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
937 
938 	if (tb[IFA_LABEL])
939 		nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
940 	else
941 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
942 
943 	if (tb[IFA_RT_PRIORITY])
944 		ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
945 
946 	if (tb[IFA_PROTO])
947 		ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
948 
949 	return ifa;
950 
951 errout:
952 	return ERR_PTR(err);
953 }
954 
955 static struct in_ifaddr *find_matching_ifa(struct net *net, struct in_ifaddr *ifa)
956 {
957 	struct in_device *in_dev = ifa->ifa_dev;
958 	struct in_ifaddr *ifa1;
959 
960 	in_dev_for_each_ifa_rtnl_net(net, ifa1, in_dev) {
961 		if (ifa1->ifa_mask == ifa->ifa_mask &&
962 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
963 		    ifa1->ifa_local == ifa->ifa_local)
964 			return ifa1;
965 	}
966 
967 	return NULL;
968 }
969 
970 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
971 			    struct netlink_ext_ack *extack)
972 {
973 	__u32 prefered_lft = INFINITY_LIFE_TIME;
974 	__u32 valid_lft = INFINITY_LIFE_TIME;
975 	struct net *net = sock_net(skb->sk);
976 	struct in_ifaddr *ifa_existing;
977 	struct nlattr *tb[IFA_MAX + 1];
978 	struct in_ifaddr *ifa;
979 	int ret;
980 
981 	ret = inet_validate_rtm(nlh, tb, extack, &valid_lft, &prefered_lft);
982 	if (ret < 0)
983 		return ret;
984 
985 	if (!nla_get_in_addr(tb[IFA_LOCAL]))
986 		return 0;
987 
988 	rtnl_net_lock(net);
989 
990 	ifa = inet_rtm_to_ifa(net, nlh, tb, extack);
991 	if (IS_ERR(ifa)) {
992 		ret = PTR_ERR(ifa);
993 		goto unlock;
994 	}
995 
996 	ifa_existing = find_matching_ifa(net, ifa);
997 	if (!ifa_existing) {
998 		/* It would be best to check for !NLM_F_CREATE here but
999 		 * userspace already relies on not having to provide this.
1000 		 */
1001 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
1002 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
1003 			ret = ip_mc_autojoin_config(net, true, ifa);
1004 			if (ret < 0) {
1005 				NL_SET_ERR_MSG(extack, "ipv4: Multicast auto join failed");
1006 				inet_free_ifa(ifa);
1007 				goto unlock;
1008 			}
1009 		}
1010 
1011 		ret = __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid, extack);
1012 	} else {
1013 		u32 new_metric = ifa->ifa_rt_priority;
1014 		u8 new_proto = ifa->ifa_proto;
1015 
1016 		inet_free_ifa(ifa);
1017 
1018 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
1019 		    !(nlh->nlmsg_flags & NLM_F_REPLACE)) {
1020 			NL_SET_ERR_MSG(extack, "ipv4: Address already assigned");
1021 			ret = -EEXIST;
1022 			goto unlock;
1023 		}
1024 		ifa = ifa_existing;
1025 
1026 		if (ifa->ifa_rt_priority != new_metric) {
1027 			fib_modify_prefix_metric(ifa, new_metric);
1028 			ifa->ifa_rt_priority = new_metric;
1029 		}
1030 
1031 		ifa->ifa_proto = new_proto;
1032 
1033 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
1034 		cancel_delayed_work(&net->ipv4.addr_chk_work);
1035 		queue_delayed_work(system_power_efficient_wq,
1036 				   &net->ipv4.addr_chk_work, 0);
1037 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
1038 	}
1039 
1040 unlock:
1041 	rtnl_net_unlock(net);
1042 
1043 	return ret;
1044 }
1045 
1046 /*
1047  *	Determine a default network mask, based on the IP address.
1048  */
1049 
1050 static int inet_abc_len(__be32 addr)
1051 {
1052 	int rc = -1;	/* Something else, probably a multicast. */
1053 
1054 	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
1055 		rc = 0;
1056 	else {
1057 		__u32 haddr = ntohl(addr);
1058 		if (IN_CLASSA(haddr))
1059 			rc = 8;
1060 		else if (IN_CLASSB(haddr))
1061 			rc = 16;
1062 		else if (IN_CLASSC(haddr))
1063 			rc = 24;
1064 		else if (IN_CLASSE(haddr))
1065 			rc = 32;
1066 	}
1067 
1068 	return rc;
1069 }
1070 
1071 
1072 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1073 {
1074 	struct sockaddr_in sin_orig;
1075 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1076 	struct in_ifaddr __rcu **ifap = NULL;
1077 	struct in_device *in_dev;
1078 	struct in_ifaddr *ifa = NULL;
1079 	struct net_device *dev;
1080 	char *colon;
1081 	int ret = -EFAULT;
1082 	int tryaddrmatch = 0;
1083 
1084 	ifr->ifr_name[IFNAMSIZ - 1] = 0;
1085 
1086 	/* save original address for comparison */
1087 	memcpy(&sin_orig, sin, sizeof(*sin));
1088 
1089 	colon = strchr(ifr->ifr_name, ':');
1090 	if (colon)
1091 		*colon = 0;
1092 
1093 	dev_load(net, ifr->ifr_name);
1094 
1095 	switch (cmd) {
1096 	case SIOCGIFADDR:	/* Get interface address */
1097 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1098 	case SIOCGIFDSTADDR:	/* Get the destination address */
1099 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1100 		/* Note that these ioctls will not sleep,
1101 		   so that we do not impose a lock.
1102 		   One day we will be forced to put shlock here (I mean SMP)
1103 		 */
1104 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
1105 		memset(sin, 0, sizeof(*sin));
1106 		sin->sin_family = AF_INET;
1107 		break;
1108 
1109 	case SIOCSIFFLAGS:
1110 		ret = -EPERM;
1111 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1112 			goto out;
1113 		break;
1114 	case SIOCSIFADDR:	/* Set interface address (and family) */
1115 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1116 	case SIOCSIFDSTADDR:	/* Set the destination address */
1117 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1118 		ret = -EPERM;
1119 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1120 			goto out;
1121 		ret = -EINVAL;
1122 		if (sin->sin_family != AF_INET)
1123 			goto out;
1124 		break;
1125 	default:
1126 		ret = -EINVAL;
1127 		goto out;
1128 	}
1129 
1130 	rtnl_net_lock(net);
1131 
1132 	ret = -ENODEV;
1133 	dev = __dev_get_by_name(net, ifr->ifr_name);
1134 	if (!dev)
1135 		goto done;
1136 
1137 	if (colon)
1138 		*colon = ':';
1139 
1140 	in_dev = __in_dev_get_rtnl_net(dev);
1141 	if (in_dev) {
1142 		if (tryaddrmatch) {
1143 			/* Matthias Andree */
1144 			/* compare label and address (4.4BSD style) */
1145 			/* note: we only do this for a limited set of ioctls
1146 			   and only if the original address family was AF_INET.
1147 			   This is checked above. */
1148 
1149 			for (ifap = &in_dev->ifa_list;
1150 			     (ifa = rtnl_net_dereference(net, *ifap)) != NULL;
1151 			     ifap = &ifa->ifa_next) {
1152 				if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1153 				    sin_orig.sin_addr.s_addr ==
1154 							ifa->ifa_local) {
1155 					break; /* found */
1156 				}
1157 			}
1158 		}
1159 		/* we didn't get a match, maybe the application is
1160 		   4.3BSD-style and passed in junk so we fall back to
1161 		   comparing just the label */
1162 		if (!ifa) {
1163 			for (ifap = &in_dev->ifa_list;
1164 			     (ifa = rtnl_net_dereference(net, *ifap)) != NULL;
1165 			     ifap = &ifa->ifa_next)
1166 				if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1167 					break;
1168 		}
1169 	}
1170 
1171 	ret = -EADDRNOTAVAIL;
1172 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1173 		goto done;
1174 
1175 	switch (cmd) {
1176 	case SIOCGIFADDR:	/* Get interface address */
1177 		ret = 0;
1178 		sin->sin_addr.s_addr = ifa->ifa_local;
1179 		break;
1180 
1181 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1182 		ret = 0;
1183 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1184 		break;
1185 
1186 	case SIOCGIFDSTADDR:	/* Get the destination address */
1187 		ret = 0;
1188 		sin->sin_addr.s_addr = ifa->ifa_address;
1189 		break;
1190 
1191 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1192 		ret = 0;
1193 		sin->sin_addr.s_addr = ifa->ifa_mask;
1194 		break;
1195 
1196 	case SIOCSIFFLAGS:
1197 		if (colon) {
1198 			ret = -EADDRNOTAVAIL;
1199 			if (!ifa)
1200 				break;
1201 			ret = 0;
1202 			if (!(ifr->ifr_flags & IFF_UP))
1203 				inet_del_ifa(in_dev, ifap, 1);
1204 			break;
1205 		}
1206 
1207 		/* NETDEV_UP/DOWN/CHANGE could touch a peer dev */
1208 		ASSERT_RTNL();
1209 		ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1210 		break;
1211 
1212 	case SIOCSIFADDR:	/* Set interface address (and family) */
1213 		ret = -EINVAL;
1214 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1215 			break;
1216 
1217 		if (!ifa) {
1218 			ret = -ENOBUFS;
1219 			if (!in_dev)
1220 				break;
1221 			ifa = inet_alloc_ifa(in_dev);
1222 			if (!ifa)
1223 				break;
1224 
1225 			if (colon)
1226 				memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1227 			else
1228 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1229 		} else {
1230 			ret = 0;
1231 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1232 				break;
1233 			inet_del_ifa(in_dev, ifap, 0);
1234 			ifa->ifa_broadcast = 0;
1235 			ifa->ifa_scope = 0;
1236 		}
1237 
1238 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1239 
1240 		if (!(dev->flags & IFF_POINTOPOINT)) {
1241 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1242 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1243 			if ((dev->flags & IFF_BROADCAST) &&
1244 			    ifa->ifa_prefixlen < 31)
1245 				ifa->ifa_broadcast = ifa->ifa_address |
1246 						     ~ifa->ifa_mask;
1247 		} else {
1248 			ifa->ifa_prefixlen = 32;
1249 			ifa->ifa_mask = inet_make_mask(32);
1250 		}
1251 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1252 		ret = inet_set_ifa(dev, ifa);
1253 		break;
1254 
1255 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1256 		ret = 0;
1257 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1258 			inet_del_ifa(in_dev, ifap, 0);
1259 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1260 			inet_insert_ifa(ifa);
1261 		}
1262 		break;
1263 
1264 	case SIOCSIFDSTADDR:	/* Set the destination address */
1265 		ret = 0;
1266 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1267 			break;
1268 		ret = -EINVAL;
1269 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1270 			break;
1271 		ret = 0;
1272 		inet_del_ifa(in_dev, ifap, 0);
1273 		ifa->ifa_address = sin->sin_addr.s_addr;
1274 		inet_insert_ifa(ifa);
1275 		break;
1276 
1277 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1278 
1279 		/*
1280 		 *	The mask we set must be legal.
1281 		 */
1282 		ret = -EINVAL;
1283 		if (bad_mask(sin->sin_addr.s_addr, 0))
1284 			break;
1285 		ret = 0;
1286 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1287 			__be32 old_mask = ifa->ifa_mask;
1288 			inet_del_ifa(in_dev, ifap, 0);
1289 			ifa->ifa_mask = sin->sin_addr.s_addr;
1290 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1291 
1292 			/* See if current broadcast address matches
1293 			 * with current netmask, then recalculate
1294 			 * the broadcast address. Otherwise it's a
1295 			 * funny address, so don't touch it since
1296 			 * the user seems to know what (s)he's doing...
1297 			 */
1298 			if ((dev->flags & IFF_BROADCAST) &&
1299 			    (ifa->ifa_prefixlen < 31) &&
1300 			    (ifa->ifa_broadcast ==
1301 			     (ifa->ifa_local|~old_mask))) {
1302 				ifa->ifa_broadcast = (ifa->ifa_local |
1303 						      ~sin->sin_addr.s_addr);
1304 			}
1305 			inet_insert_ifa(ifa);
1306 		}
1307 		break;
1308 	}
1309 done:
1310 	rtnl_net_unlock(net);
1311 out:
1312 	return ret;
1313 }
1314 
1315 int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1316 {
1317 	struct in_device *in_dev = __in_dev_get_rtnl_net(dev);
1318 	const struct in_ifaddr *ifa;
1319 	struct ifreq ifr;
1320 	int done = 0;
1321 
1322 	if (WARN_ON(size > sizeof(struct ifreq)))
1323 		goto out;
1324 
1325 	if (!in_dev)
1326 		goto out;
1327 
1328 	in_dev_for_each_ifa_rtnl_net(dev_net(dev), ifa, in_dev) {
1329 		if (!buf) {
1330 			done += size;
1331 			continue;
1332 		}
1333 		if (len < size)
1334 			break;
1335 		memset(&ifr, 0, sizeof(struct ifreq));
1336 		strcpy(ifr.ifr_name, ifa->ifa_label);
1337 
1338 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1339 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1340 								ifa->ifa_local;
1341 
1342 		if (copy_to_user(buf + done, &ifr, size)) {
1343 			done = -EFAULT;
1344 			break;
1345 		}
1346 		len  -= size;
1347 		done += size;
1348 	}
1349 out:
1350 	return done;
1351 }
1352 
1353 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1354 				 int scope)
1355 {
1356 	const struct in_ifaddr *ifa;
1357 
1358 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1359 		if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
1360 			continue;
1361 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1362 		    ifa->ifa_scope <= scope)
1363 			return ifa->ifa_local;
1364 	}
1365 
1366 	return 0;
1367 }
1368 
1369 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1370 {
1371 	const struct in_ifaddr *ifa;
1372 	__be32 addr = 0;
1373 	unsigned char localnet_scope = RT_SCOPE_HOST;
1374 	struct in_device *in_dev;
1375 	struct net *net = dev_net(dev);
1376 	int master_idx;
1377 
1378 	rcu_read_lock();
1379 	in_dev = __in_dev_get_rcu(dev);
1380 	if (!in_dev)
1381 		goto no_in_dev;
1382 
1383 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1384 		localnet_scope = RT_SCOPE_LINK;
1385 
1386 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1387 		if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
1388 			continue;
1389 		if (min(ifa->ifa_scope, localnet_scope) > scope)
1390 			continue;
1391 		if (!dst || inet_ifa_match(dst, ifa)) {
1392 			addr = ifa->ifa_local;
1393 			break;
1394 		}
1395 		if (!addr)
1396 			addr = ifa->ifa_local;
1397 	}
1398 
1399 	if (addr)
1400 		goto out_unlock;
1401 no_in_dev:
1402 	master_idx = l3mdev_master_ifindex_rcu(dev);
1403 
1404 	/* For VRFs, the VRF device takes the place of the loopback device,
1405 	 * with addresses on it being preferred.  Note in such cases the
1406 	 * loopback device will be among the devices that fail the master_idx
1407 	 * equality check in the loop below.
1408 	 */
1409 	if (master_idx &&
1410 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1411 	    (in_dev = __in_dev_get_rcu(dev))) {
1412 		addr = in_dev_select_addr(in_dev, scope);
1413 		if (addr)
1414 			goto out_unlock;
1415 	}
1416 
1417 	/* Not loopback addresses on loopback should be preferred
1418 	   in this case. It is important that lo is the first interface
1419 	   in dev_base list.
1420 	 */
1421 	for_each_netdev_rcu(net, dev) {
1422 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1423 			continue;
1424 
1425 		in_dev = __in_dev_get_rcu(dev);
1426 		if (!in_dev)
1427 			continue;
1428 
1429 		addr = in_dev_select_addr(in_dev, scope);
1430 		if (addr)
1431 			goto out_unlock;
1432 	}
1433 out_unlock:
1434 	rcu_read_unlock();
1435 	return addr;
1436 }
1437 EXPORT_SYMBOL(inet_select_addr);
1438 
1439 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1440 			      __be32 local, int scope)
1441 {
1442 	unsigned char localnet_scope = RT_SCOPE_HOST;
1443 	const struct in_ifaddr *ifa;
1444 	__be32 addr = 0;
1445 	int same = 0;
1446 
1447 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1448 		localnet_scope = RT_SCOPE_LINK;
1449 
1450 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1451 		unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1452 
1453 		if (!addr &&
1454 		    (local == ifa->ifa_local || !local) &&
1455 		    min_scope <= scope) {
1456 			addr = ifa->ifa_local;
1457 			if (same)
1458 				break;
1459 		}
1460 		if (!same) {
1461 			same = (!local || inet_ifa_match(local, ifa)) &&
1462 				(!dst || inet_ifa_match(dst, ifa));
1463 			if (same && addr) {
1464 				if (local || !dst)
1465 					break;
1466 				/* Is the selected addr into dst subnet? */
1467 				if (inet_ifa_match(addr, ifa))
1468 					break;
1469 				/* No, then can we use new local src? */
1470 				if (min_scope <= scope) {
1471 					addr = ifa->ifa_local;
1472 					break;
1473 				}
1474 				/* search for large dst subnet for addr */
1475 				same = 0;
1476 			}
1477 		}
1478 	}
1479 
1480 	return same ? addr : 0;
1481 }
1482 
1483 /*
1484  * Confirm that local IP address exists using wildcards:
1485  * - net: netns to check, cannot be NULL
1486  * - in_dev: only on this interface, NULL=any interface
1487  * - dst: only in the same subnet as dst, 0=any dst
1488  * - local: address, 0=autoselect the local address
1489  * - scope: maximum allowed scope value for the local address
1490  */
1491 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1492 			 __be32 dst, __be32 local, int scope)
1493 {
1494 	__be32 addr = 0;
1495 	struct net_device *dev;
1496 
1497 	if (in_dev)
1498 		return confirm_addr_indev(in_dev, dst, local, scope);
1499 
1500 	rcu_read_lock();
1501 	for_each_netdev_rcu(net, dev) {
1502 		in_dev = __in_dev_get_rcu(dev);
1503 		if (in_dev) {
1504 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1505 			if (addr)
1506 				break;
1507 		}
1508 	}
1509 	rcu_read_unlock();
1510 
1511 	return addr;
1512 }
1513 EXPORT_SYMBOL(inet_confirm_addr);
1514 
1515 /*
1516  *	Device notifier
1517  */
1518 
1519 int register_inetaddr_notifier(struct notifier_block *nb)
1520 {
1521 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1522 }
1523 EXPORT_SYMBOL(register_inetaddr_notifier);
1524 
1525 int unregister_inetaddr_notifier(struct notifier_block *nb)
1526 {
1527 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1528 }
1529 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1530 
1531 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1532 {
1533 	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1534 }
1535 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1536 
1537 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1538 {
1539 	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1540 	    nb);
1541 }
1542 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1543 
1544 /* Rename ifa_labels for a device name change. Make some effort to preserve
1545  * existing alias numbering and to create unique labels if possible.
1546 */
1547 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1548 {
1549 	struct in_ifaddr *ifa;
1550 	int named = 0;
1551 
1552 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1553 		char old[IFNAMSIZ], *dot;
1554 
1555 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1556 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1557 		if (named++ == 0)
1558 			goto skip;
1559 		dot = strchr(old, ':');
1560 		if (!dot) {
1561 			sprintf(old, ":%d", named);
1562 			dot = old;
1563 		}
1564 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1565 			strcat(ifa->ifa_label, dot);
1566 		else
1567 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1568 skip:
1569 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1570 	}
1571 }
1572 
1573 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1574 					struct in_device *in_dev)
1575 
1576 {
1577 	const struct in_ifaddr *ifa;
1578 
1579 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1580 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1581 			 ifa->ifa_local, dev,
1582 			 ifa->ifa_local, NULL,
1583 			 dev->dev_addr, NULL);
1584 	}
1585 }
1586 
1587 /* Called only under RTNL semaphore */
1588 
1589 static int inetdev_event(struct notifier_block *this, unsigned long event,
1590 			 void *ptr)
1591 {
1592 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1593 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1594 
1595 	ASSERT_RTNL();
1596 
1597 	if (!in_dev) {
1598 		if (event == NETDEV_REGISTER) {
1599 			in_dev = inetdev_init(dev);
1600 			if (IS_ERR(in_dev))
1601 				return notifier_from_errno(PTR_ERR(in_dev));
1602 			if (dev->flags & IFF_LOOPBACK) {
1603 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1604 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1605 			}
1606 		} else if (event == NETDEV_CHANGEMTU) {
1607 			/* Re-enabling IP */
1608 			if (inetdev_valid_mtu(dev->mtu))
1609 				in_dev = inetdev_init(dev);
1610 		}
1611 		goto out;
1612 	}
1613 
1614 	switch (event) {
1615 	case NETDEV_REGISTER:
1616 		pr_debug("%s: bug\n", __func__);
1617 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1618 		break;
1619 	case NETDEV_UP:
1620 		if (!inetdev_valid_mtu(dev->mtu))
1621 			break;
1622 		if (dev->flags & IFF_LOOPBACK) {
1623 			struct in_ifaddr *ifa = inet_alloc_ifa(in_dev);
1624 
1625 			if (ifa) {
1626 				ifa->ifa_local =
1627 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1628 				ifa->ifa_prefixlen = 8;
1629 				ifa->ifa_mask = inet_make_mask(8);
1630 				ifa->ifa_scope = RT_SCOPE_HOST;
1631 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1632 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1633 						 INFINITY_LIFE_TIME);
1634 				ipv4_devconf_setall(in_dev);
1635 				neigh_parms_data_state_setall(in_dev->arp_parms);
1636 				inet_insert_ifa(ifa);
1637 			}
1638 		}
1639 		ip_mc_up(in_dev);
1640 		fallthrough;
1641 	case NETDEV_CHANGEADDR:
1642 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1643 			break;
1644 		fallthrough;
1645 	case NETDEV_NOTIFY_PEERS:
1646 		/* Send gratuitous ARP to notify of link change */
1647 		inetdev_send_gratuitous_arp(dev, in_dev);
1648 		break;
1649 	case NETDEV_DOWN:
1650 		ip_mc_down(in_dev);
1651 		break;
1652 	case NETDEV_PRE_TYPE_CHANGE:
1653 		ip_mc_unmap(in_dev);
1654 		break;
1655 	case NETDEV_POST_TYPE_CHANGE:
1656 		ip_mc_remap(in_dev);
1657 		break;
1658 	case NETDEV_CHANGEMTU:
1659 		if (inetdev_valid_mtu(dev->mtu))
1660 			break;
1661 		/* disable IP when MTU is not enough */
1662 		fallthrough;
1663 	case NETDEV_UNREGISTER:
1664 		inetdev_destroy(in_dev);
1665 		break;
1666 	case NETDEV_CHANGENAME:
1667 		/* Do not notify about label change, this event is
1668 		 * not interesting to applications using netlink.
1669 		 */
1670 		inetdev_changename(dev, in_dev);
1671 
1672 		devinet_sysctl_unregister(in_dev);
1673 		devinet_sysctl_register(in_dev);
1674 		break;
1675 	}
1676 out:
1677 	return NOTIFY_DONE;
1678 }
1679 
1680 static struct notifier_block ip_netdev_notifier = {
1681 	.notifier_call = inetdev_event,
1682 };
1683 
1684 static size_t inet_nlmsg_size(void)
1685 {
1686 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1687 	       + nla_total_size(4) /* IFA_ADDRESS */
1688 	       + nla_total_size(4) /* IFA_LOCAL */
1689 	       + nla_total_size(4) /* IFA_BROADCAST */
1690 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1691 	       + nla_total_size(4)  /* IFA_FLAGS */
1692 	       + nla_total_size(1)  /* IFA_PROTO */
1693 	       + nla_total_size(4)  /* IFA_RT_PRIORITY */
1694 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1695 }
1696 
1697 static inline u32 cstamp_delta(unsigned long cstamp)
1698 {
1699 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1700 }
1701 
1702 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1703 			 unsigned long tstamp, u32 preferred, u32 valid)
1704 {
1705 	struct ifa_cacheinfo ci;
1706 
1707 	ci.cstamp = cstamp_delta(cstamp);
1708 	ci.tstamp = cstamp_delta(tstamp);
1709 	ci.ifa_prefered = preferred;
1710 	ci.ifa_valid = valid;
1711 
1712 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1713 }
1714 
1715 static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa,
1716 			    struct inet_fill_args *args)
1717 {
1718 	struct ifaddrmsg *ifm;
1719 	struct nlmsghdr  *nlh;
1720 	unsigned long tstamp;
1721 	u32 preferred, valid;
1722 	u32 flags;
1723 
1724 	nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1725 			args->flags);
1726 	if (!nlh)
1727 		return -EMSGSIZE;
1728 
1729 	ifm = nlmsg_data(nlh);
1730 	ifm->ifa_family = AF_INET;
1731 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1732 
1733 	flags = READ_ONCE(ifa->ifa_flags);
1734 	/* Warning : ifm->ifa_flags is an __u8, it holds only 8 bits.
1735 	 * The 32bit value is given in IFA_FLAGS attribute.
1736 	 */
1737 	ifm->ifa_flags = (__u8)flags;
1738 
1739 	ifm->ifa_scope = ifa->ifa_scope;
1740 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1741 
1742 	if (args->netnsid >= 0 &&
1743 	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1744 		goto nla_put_failure;
1745 
1746 	tstamp = READ_ONCE(ifa->ifa_tstamp);
1747 	if (!(flags & IFA_F_PERMANENT)) {
1748 		preferred = READ_ONCE(ifa->ifa_preferred_lft);
1749 		valid = READ_ONCE(ifa->ifa_valid_lft);
1750 		if (preferred != INFINITY_LIFE_TIME) {
1751 			long tval = (jiffies - tstamp) / HZ;
1752 
1753 			if (preferred > tval)
1754 				preferred -= tval;
1755 			else
1756 				preferred = 0;
1757 			if (valid != INFINITY_LIFE_TIME) {
1758 				if (valid > tval)
1759 					valid -= tval;
1760 				else
1761 					valid = 0;
1762 			}
1763 		}
1764 	} else {
1765 		preferred = INFINITY_LIFE_TIME;
1766 		valid = INFINITY_LIFE_TIME;
1767 	}
1768 	if ((ifa->ifa_address &&
1769 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1770 	    (ifa->ifa_local &&
1771 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1772 	    (ifa->ifa_broadcast &&
1773 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1774 	    (ifa->ifa_label[0] &&
1775 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1776 	    (ifa->ifa_proto &&
1777 	     nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
1778 	    nla_put_u32(skb, IFA_FLAGS, flags) ||
1779 	    (ifa->ifa_rt_priority &&
1780 	     nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1781 	    put_cacheinfo(skb, READ_ONCE(ifa->ifa_cstamp), tstamp,
1782 			  preferred, valid))
1783 		goto nla_put_failure;
1784 
1785 	nlmsg_end(skb, nlh);
1786 	return 0;
1787 
1788 nla_put_failure:
1789 	nlmsg_cancel(skb, nlh);
1790 	return -EMSGSIZE;
1791 }
1792 
1793 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1794 				      struct inet_fill_args *fillargs,
1795 				      struct net **tgt_net, struct sock *sk,
1796 				      struct netlink_callback *cb)
1797 {
1798 	struct netlink_ext_ack *extack = cb->extack;
1799 	struct nlattr *tb[IFA_MAX+1];
1800 	struct ifaddrmsg *ifm;
1801 	int err, i;
1802 
1803 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1804 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1805 		return -EINVAL;
1806 	}
1807 
1808 	ifm = nlmsg_data(nlh);
1809 	if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1810 		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1811 		return -EINVAL;
1812 	}
1813 
1814 	fillargs->ifindex = ifm->ifa_index;
1815 	if (fillargs->ifindex) {
1816 		cb->answer_flags |= NLM_F_DUMP_FILTERED;
1817 		fillargs->flags |= NLM_F_DUMP_FILTERED;
1818 	}
1819 
1820 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1821 					    ifa_ipv4_policy, extack);
1822 	if (err < 0)
1823 		return err;
1824 
1825 	for (i = 0; i <= IFA_MAX; ++i) {
1826 		if (!tb[i])
1827 			continue;
1828 
1829 		if (i == IFA_TARGET_NETNSID) {
1830 			struct net *net;
1831 
1832 			fillargs->netnsid = nla_get_s32(tb[i]);
1833 
1834 			net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1835 			if (IS_ERR(net)) {
1836 				fillargs->netnsid = -1;
1837 				NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1838 				return PTR_ERR(net);
1839 			}
1840 			*tgt_net = net;
1841 		} else {
1842 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1843 			return -EINVAL;
1844 		}
1845 	}
1846 
1847 	return 0;
1848 }
1849 
1850 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1851 			    struct netlink_callback *cb, int *s_ip_idx,
1852 			    struct inet_fill_args *fillargs)
1853 {
1854 	struct in_ifaddr *ifa;
1855 	int ip_idx = 0;
1856 	int err;
1857 
1858 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1859 		if (ip_idx < *s_ip_idx) {
1860 			ip_idx++;
1861 			continue;
1862 		}
1863 		err = inet_fill_ifaddr(skb, ifa, fillargs);
1864 		if (err < 0)
1865 			goto done;
1866 
1867 		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1868 		ip_idx++;
1869 	}
1870 	err = 0;
1871 	ip_idx = 0;
1872 done:
1873 	*s_ip_idx = ip_idx;
1874 
1875 	return err;
1876 }
1877 
1878 /* Combine dev_addr_genid and dev_base_seq to detect changes.
1879  */
1880 static u32 inet_base_seq(const struct net *net)
1881 {
1882 	u32 res = atomic_read(&net->ipv4.dev_addr_genid) +
1883 		  READ_ONCE(net->dev_base_seq);
1884 
1885 	/* Must not return 0 (see nl_dump_check_consistent()).
1886 	 * Chose a value far away from 0.
1887 	 */
1888 	if (!res)
1889 		res = 0x80000000;
1890 	return res;
1891 }
1892 
1893 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1894 {
1895 	const struct nlmsghdr *nlh = cb->nlh;
1896 	struct inet_fill_args fillargs = {
1897 		.portid = NETLINK_CB(cb->skb).portid,
1898 		.seq = nlh->nlmsg_seq,
1899 		.event = RTM_NEWADDR,
1900 		.flags = NLM_F_MULTI,
1901 		.netnsid = -1,
1902 	};
1903 	struct net *net = sock_net(skb->sk);
1904 	struct net *tgt_net = net;
1905 	struct {
1906 		unsigned long ifindex;
1907 		int ip_idx;
1908 	} *ctx = (void *)cb->ctx;
1909 	struct in_device *in_dev;
1910 	struct net_device *dev;
1911 	int err = 0;
1912 
1913 	rcu_read_lock();
1914 	if (cb->strict_check) {
1915 		err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1916 						 skb->sk, cb);
1917 		if (err < 0)
1918 			goto done;
1919 
1920 		if (fillargs.ifindex) {
1921 			dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex);
1922 			if (!dev) {
1923 				err = -ENODEV;
1924 				goto done;
1925 			}
1926 			in_dev = __in_dev_get_rcu(dev);
1927 			if (!in_dev)
1928 				goto done;
1929 			err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
1930 					       &fillargs);
1931 			goto done;
1932 		}
1933 	}
1934 
1935 	cb->seq = inet_base_seq(tgt_net);
1936 
1937 	for_each_netdev_dump(tgt_net, dev, ctx->ifindex) {
1938 		in_dev = __in_dev_get_rcu(dev);
1939 		if (!in_dev)
1940 			continue;
1941 		err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
1942 				       &fillargs);
1943 		if (err < 0)
1944 			goto done;
1945 	}
1946 done:
1947 	if (fillargs.netnsid >= 0)
1948 		put_net(tgt_net);
1949 	rcu_read_unlock();
1950 	return err;
1951 }
1952 
1953 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1954 		      u32 portid)
1955 {
1956 	struct inet_fill_args fillargs = {
1957 		.portid = portid,
1958 		.seq = nlh ? nlh->nlmsg_seq : 0,
1959 		.event = event,
1960 		.flags = 0,
1961 		.netnsid = -1,
1962 	};
1963 	struct sk_buff *skb;
1964 	int err = -ENOBUFS;
1965 	struct net *net;
1966 
1967 	net = dev_net(ifa->ifa_dev->dev);
1968 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1969 	if (!skb)
1970 		goto errout;
1971 
1972 	err = inet_fill_ifaddr(skb, ifa, &fillargs);
1973 	if (err < 0) {
1974 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1975 		WARN_ON(err == -EMSGSIZE);
1976 		kfree_skb(skb);
1977 		goto errout;
1978 	}
1979 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1980 	return;
1981 errout:
1982 	rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1983 }
1984 
1985 static size_t inet_get_link_af_size(const struct net_device *dev,
1986 				    u32 ext_filter_mask)
1987 {
1988 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1989 
1990 	if (!in_dev)
1991 		return 0;
1992 
1993 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1994 }
1995 
1996 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1997 			     u32 ext_filter_mask)
1998 {
1999 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
2000 	struct nlattr *nla;
2001 	int i;
2002 
2003 	if (!in_dev)
2004 		return -ENODATA;
2005 
2006 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
2007 	if (!nla)
2008 		return -EMSGSIZE;
2009 
2010 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
2011 		((u32 *) nla_data(nla))[i] = READ_ONCE(in_dev->cnf.data[i]);
2012 
2013 	return 0;
2014 }
2015 
2016 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
2017 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
2018 };
2019 
2020 static int inet_validate_link_af(const struct net_device *dev,
2021 				 const struct nlattr *nla,
2022 				 struct netlink_ext_ack *extack)
2023 {
2024 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
2025 	int err, rem;
2026 
2027 	if (dev && !__in_dev_get_rtnl(dev))
2028 		return -EAFNOSUPPORT;
2029 
2030 	err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
2031 					  inet_af_policy, extack);
2032 	if (err < 0)
2033 		return err;
2034 
2035 	if (tb[IFLA_INET_CONF]) {
2036 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
2037 			int cfgid = nla_type(a);
2038 
2039 			if (nla_len(a) < 4)
2040 				return -EINVAL;
2041 
2042 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
2043 				return -EINVAL;
2044 		}
2045 	}
2046 
2047 	return 0;
2048 }
2049 
2050 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
2051 			    struct netlink_ext_ack *extack)
2052 {
2053 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
2054 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
2055 	int rem;
2056 
2057 	if (!in_dev)
2058 		return -EAFNOSUPPORT;
2059 
2060 	if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
2061 		return -EINVAL;
2062 
2063 	if (tb[IFLA_INET_CONF]) {
2064 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
2065 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
2066 	}
2067 
2068 	return 0;
2069 }
2070 
2071 static int inet_netconf_msgsize_devconf(int type)
2072 {
2073 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
2074 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
2075 	bool all = false;
2076 
2077 	if (type == NETCONFA_ALL)
2078 		all = true;
2079 
2080 	if (all || type == NETCONFA_FORWARDING)
2081 		size += nla_total_size(4);
2082 	if (all || type == NETCONFA_RP_FILTER)
2083 		size += nla_total_size(4);
2084 	if (all || type == NETCONFA_MC_FORWARDING)
2085 		size += nla_total_size(4);
2086 	if (all || type == NETCONFA_BC_FORWARDING)
2087 		size += nla_total_size(4);
2088 	if (all || type == NETCONFA_PROXY_NEIGH)
2089 		size += nla_total_size(4);
2090 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2091 		size += nla_total_size(4);
2092 
2093 	return size;
2094 }
2095 
2096 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2097 				     const struct ipv4_devconf *devconf,
2098 				     u32 portid, u32 seq, int event,
2099 				     unsigned int flags, int type)
2100 {
2101 	struct nlmsghdr  *nlh;
2102 	struct netconfmsg *ncm;
2103 	bool all = false;
2104 
2105 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2106 			flags);
2107 	if (!nlh)
2108 		return -EMSGSIZE;
2109 
2110 	if (type == NETCONFA_ALL)
2111 		all = true;
2112 
2113 	ncm = nlmsg_data(nlh);
2114 	ncm->ncm_family = AF_INET;
2115 
2116 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2117 		goto nla_put_failure;
2118 
2119 	if (!devconf)
2120 		goto out;
2121 
2122 	if ((all || type == NETCONFA_FORWARDING) &&
2123 	    nla_put_s32(skb, NETCONFA_FORWARDING,
2124 			IPV4_DEVCONF_RO(*devconf, FORWARDING)) < 0)
2125 		goto nla_put_failure;
2126 	if ((all || type == NETCONFA_RP_FILTER) &&
2127 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
2128 			IPV4_DEVCONF_RO(*devconf, RP_FILTER)) < 0)
2129 		goto nla_put_failure;
2130 	if ((all || type == NETCONFA_MC_FORWARDING) &&
2131 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2132 			IPV4_DEVCONF_RO(*devconf, MC_FORWARDING)) < 0)
2133 		goto nla_put_failure;
2134 	if ((all || type == NETCONFA_BC_FORWARDING) &&
2135 	    nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2136 			IPV4_DEVCONF_RO(*devconf, BC_FORWARDING)) < 0)
2137 		goto nla_put_failure;
2138 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
2139 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2140 			IPV4_DEVCONF_RO(*devconf, PROXY_ARP)) < 0)
2141 		goto nla_put_failure;
2142 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2143 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2144 			IPV4_DEVCONF_RO(*devconf,
2145 					IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2146 		goto nla_put_failure;
2147 
2148 out:
2149 	nlmsg_end(skb, nlh);
2150 	return 0;
2151 
2152 nla_put_failure:
2153 	nlmsg_cancel(skb, nlh);
2154 	return -EMSGSIZE;
2155 }
2156 
2157 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2158 				 int ifindex, struct ipv4_devconf *devconf)
2159 {
2160 	struct sk_buff *skb;
2161 	int err = -ENOBUFS;
2162 
2163 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2164 	if (!skb)
2165 		goto errout;
2166 
2167 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2168 					event, 0, type);
2169 	if (err < 0) {
2170 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2171 		WARN_ON(err == -EMSGSIZE);
2172 		kfree_skb(skb);
2173 		goto errout;
2174 	}
2175 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2176 	return;
2177 errout:
2178 	rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2179 }
2180 
2181 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2182 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
2183 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
2184 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
2185 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
2186 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
2187 };
2188 
2189 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2190 				      const struct nlmsghdr *nlh,
2191 				      struct nlattr **tb,
2192 				      struct netlink_ext_ack *extack)
2193 {
2194 	int i, err;
2195 
2196 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2197 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2198 		return -EINVAL;
2199 	}
2200 
2201 	if (!netlink_strict_get_check(skb))
2202 		return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2203 					      tb, NETCONFA_MAX,
2204 					      devconf_ipv4_policy, extack);
2205 
2206 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2207 					    tb, NETCONFA_MAX,
2208 					    devconf_ipv4_policy, extack);
2209 	if (err)
2210 		return err;
2211 
2212 	for (i = 0; i <= NETCONFA_MAX; i++) {
2213 		if (!tb[i])
2214 			continue;
2215 
2216 		switch (i) {
2217 		case NETCONFA_IFINDEX:
2218 			break;
2219 		default:
2220 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2221 			return -EINVAL;
2222 		}
2223 	}
2224 
2225 	return 0;
2226 }
2227 
2228 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2229 				    struct nlmsghdr *nlh,
2230 				    struct netlink_ext_ack *extack)
2231 {
2232 	struct net *net = sock_net(in_skb->sk);
2233 	struct nlattr *tb[NETCONFA_MAX + 1];
2234 	const struct ipv4_devconf *devconf;
2235 	struct in_device *in_dev = NULL;
2236 	struct net_device *dev = NULL;
2237 	struct sk_buff *skb;
2238 	int ifindex;
2239 	int err;
2240 
2241 	err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2242 	if (err)
2243 		return err;
2244 
2245 	if (!tb[NETCONFA_IFINDEX])
2246 		return -EINVAL;
2247 
2248 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2249 	switch (ifindex) {
2250 	case NETCONFA_IFINDEX_ALL:
2251 		devconf = net->ipv4.devconf_all;
2252 		break;
2253 	case NETCONFA_IFINDEX_DEFAULT:
2254 		devconf = net->ipv4.devconf_dflt;
2255 		break;
2256 	default:
2257 		err = -ENODEV;
2258 		dev = dev_get_by_index(net, ifindex);
2259 		if (dev)
2260 			in_dev = in_dev_get(dev);
2261 		if (!in_dev)
2262 			goto errout;
2263 		devconf = &in_dev->cnf;
2264 		break;
2265 	}
2266 
2267 	err = -ENOBUFS;
2268 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2269 	if (!skb)
2270 		goto errout;
2271 
2272 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2273 					NETLINK_CB(in_skb).portid,
2274 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2275 					NETCONFA_ALL);
2276 	if (err < 0) {
2277 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2278 		WARN_ON(err == -EMSGSIZE);
2279 		kfree_skb(skb);
2280 		goto errout;
2281 	}
2282 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2283 errout:
2284 	if (in_dev)
2285 		in_dev_put(in_dev);
2286 	dev_put(dev);
2287 	return err;
2288 }
2289 
2290 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2291 				     struct netlink_callback *cb)
2292 {
2293 	const struct nlmsghdr *nlh = cb->nlh;
2294 	struct net *net = sock_net(skb->sk);
2295 	struct {
2296 		unsigned long ifindex;
2297 		unsigned int all_default;
2298 	} *ctx = (void *)cb->ctx;
2299 	const struct in_device *in_dev;
2300 	struct net_device *dev;
2301 	int err = 0;
2302 
2303 	if (cb->strict_check) {
2304 		struct netlink_ext_ack *extack = cb->extack;
2305 		struct netconfmsg *ncm;
2306 
2307 		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2308 			NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2309 			return -EINVAL;
2310 		}
2311 
2312 		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2313 			NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2314 			return -EINVAL;
2315 		}
2316 	}
2317 
2318 	rcu_read_lock();
2319 	for_each_netdev_dump(net, dev, ctx->ifindex) {
2320 		in_dev = __in_dev_get_rcu(dev);
2321 		if (!in_dev)
2322 			continue;
2323 		err = inet_netconf_fill_devconf(skb, dev->ifindex,
2324 						&in_dev->cnf,
2325 						NETLINK_CB(cb->skb).portid,
2326 						nlh->nlmsg_seq,
2327 						RTM_NEWNETCONF, NLM_F_MULTI,
2328 						NETCONFA_ALL);
2329 		if (err < 0)
2330 			goto done;
2331 	}
2332 	if (ctx->all_default == 0) {
2333 		err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2334 						net->ipv4.devconf_all,
2335 						NETLINK_CB(cb->skb).portid,
2336 						nlh->nlmsg_seq,
2337 						RTM_NEWNETCONF, NLM_F_MULTI,
2338 						NETCONFA_ALL);
2339 		if (err < 0)
2340 			goto done;
2341 		ctx->all_default++;
2342 	}
2343 	if (ctx->all_default == 1) {
2344 		err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2345 						net->ipv4.devconf_dflt,
2346 						NETLINK_CB(cb->skb).portid,
2347 						nlh->nlmsg_seq,
2348 						RTM_NEWNETCONF, NLM_F_MULTI,
2349 						NETCONFA_ALL);
2350 		if (err < 0)
2351 			goto done;
2352 		ctx->all_default++;
2353 	}
2354 done:
2355 	rcu_read_unlock();
2356 	return err;
2357 }
2358 
2359 #ifdef CONFIG_SYSCTL
2360 
2361 static void devinet_copy_dflt_conf(struct net *net, int i)
2362 {
2363 	struct net_device *dev;
2364 
2365 	rcu_read_lock();
2366 	for_each_netdev_rcu(net, dev) {
2367 		struct in_device *in_dev;
2368 
2369 		in_dev = __in_dev_get_rcu(dev);
2370 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2371 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2372 	}
2373 	rcu_read_unlock();
2374 }
2375 
2376 /* called with RTNL locked */
2377 static void inet_forward_change(struct net *net)
2378 {
2379 	struct net_device *dev;
2380 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2381 
2382 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2383 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2384 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2385 				    NETCONFA_FORWARDING,
2386 				    NETCONFA_IFINDEX_ALL,
2387 				    net->ipv4.devconf_all);
2388 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2389 				    NETCONFA_FORWARDING,
2390 				    NETCONFA_IFINDEX_DEFAULT,
2391 				    net->ipv4.devconf_dflt);
2392 
2393 	for_each_netdev(net, dev) {
2394 		struct in_device *in_dev;
2395 
2396 		if (on)
2397 			dev_disable_lro(dev);
2398 
2399 		in_dev = __in_dev_get_rtnl_net(dev);
2400 		if (in_dev) {
2401 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2402 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2403 						    NETCONFA_FORWARDING,
2404 						    dev->ifindex, &in_dev->cnf);
2405 		}
2406 	}
2407 }
2408 
2409 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2410 {
2411 	if (cnf == net->ipv4.devconf_dflt)
2412 		return NETCONFA_IFINDEX_DEFAULT;
2413 	else if (cnf == net->ipv4.devconf_all)
2414 		return NETCONFA_IFINDEX_ALL;
2415 	else {
2416 		struct in_device *idev
2417 			= container_of(cnf, struct in_device, cnf);
2418 		return idev->dev->ifindex;
2419 	}
2420 }
2421 
2422 static int devinet_conf_proc(const struct ctl_table *ctl, int write,
2423 			     void *buffer, size_t *lenp, loff_t *ppos)
2424 {
2425 	int old_value = *(int *)ctl->data;
2426 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2427 	int new_value = *(int *)ctl->data;
2428 
2429 	if (write) {
2430 		struct ipv4_devconf *cnf = ctl->extra1;
2431 		struct net *net = ctl->extra2;
2432 		int i = (int *)ctl->data - cnf->data;
2433 		int ifindex;
2434 
2435 		set_bit(i, cnf->state);
2436 
2437 		if (cnf == net->ipv4.devconf_dflt)
2438 			devinet_copy_dflt_conf(net, i);
2439 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2440 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2441 			if ((new_value == 0) && (old_value != 0))
2442 				rt_cache_flush(net);
2443 
2444 		if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2445 		    new_value != old_value)
2446 			rt_cache_flush(net);
2447 
2448 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2449 		    new_value != old_value) {
2450 			ifindex = devinet_conf_ifindex(net, cnf);
2451 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2452 						    NETCONFA_RP_FILTER,
2453 						    ifindex, cnf);
2454 		}
2455 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2456 		    new_value != old_value) {
2457 			ifindex = devinet_conf_ifindex(net, cnf);
2458 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2459 						    NETCONFA_PROXY_NEIGH,
2460 						    ifindex, cnf);
2461 		}
2462 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2463 		    new_value != old_value) {
2464 			ifindex = devinet_conf_ifindex(net, cnf);
2465 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2466 						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2467 						    ifindex, cnf);
2468 		}
2469 	}
2470 
2471 	return ret;
2472 }
2473 
2474 static int devinet_sysctl_forward(const struct ctl_table *ctl, int write,
2475 				  void *buffer, size_t *lenp, loff_t *ppos)
2476 {
2477 	int *valp = ctl->data;
2478 	int val = *valp;
2479 	loff_t pos = *ppos;
2480 	struct net *net = ctl->extra2;
2481 	int ret;
2482 
2483 	if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2484 		return -EPERM;
2485 
2486 	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2487 
2488 	if (write && *valp != val) {
2489 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2490 			if (!rtnl_net_trylock(net)) {
2491 				/* Restore the original values before restarting */
2492 				*valp = val;
2493 				*ppos = pos;
2494 				return restart_syscall();
2495 			}
2496 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2497 				inet_forward_change(net);
2498 			} else {
2499 				struct ipv4_devconf *cnf = ctl->extra1;
2500 				struct in_device *idev =
2501 					container_of(cnf, struct in_device, cnf);
2502 				if (*valp)
2503 					dev_disable_lro(idev->dev);
2504 				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2505 							    NETCONFA_FORWARDING,
2506 							    idev->dev->ifindex,
2507 							    cnf);
2508 			}
2509 			rtnl_net_unlock(net);
2510 			rt_cache_flush(net);
2511 		} else
2512 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2513 						    NETCONFA_FORWARDING,
2514 						    NETCONFA_IFINDEX_DEFAULT,
2515 						    net->ipv4.devconf_dflt);
2516 	}
2517 
2518 	return ret;
2519 }
2520 
2521 static int ipv4_doint_and_flush(const struct ctl_table *ctl, int write,
2522 				void *buffer, size_t *lenp, loff_t *ppos)
2523 {
2524 	int *valp = ctl->data;
2525 	int val = *valp;
2526 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2527 	struct net *net = ctl->extra2;
2528 
2529 	if (write && *valp != val)
2530 		rt_cache_flush(net);
2531 
2532 	return ret;
2533 }
2534 
2535 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2536 	{ \
2537 		.procname	= name, \
2538 		.data		= ipv4_devconf.data + \
2539 				  IPV4_DEVCONF_ ## attr - 1, \
2540 		.maxlen		= sizeof(int), \
2541 		.mode		= mval, \
2542 		.proc_handler	= proc, \
2543 		.extra1		= &ipv4_devconf, \
2544 	}
2545 
2546 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2547 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2548 
2549 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2550 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2551 
2552 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2553 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2554 
2555 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2556 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2557 
2558 static struct devinet_sysctl_table {
2559 	struct ctl_table_header *sysctl_header;
2560 	struct ctl_table devinet_vars[IPV4_DEVCONF_MAX];
2561 } devinet_sysctl = {
2562 	.devinet_vars = {
2563 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2564 					     devinet_sysctl_forward),
2565 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2566 		DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2567 
2568 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2569 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2570 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2571 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2572 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2573 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2574 					"accept_source_route"),
2575 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2576 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2577 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2578 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2579 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2580 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2581 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2582 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2583 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2584 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2585 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2586 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2587 		DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
2588 					"arp_evict_nocarrier"),
2589 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2590 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2591 					"force_igmp_version"),
2592 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2593 					"igmpv2_unsolicited_report_interval"),
2594 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2595 					"igmpv3_unsolicited_report_interval"),
2596 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2597 					"ignore_routes_with_linkdown"),
2598 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2599 					"drop_gratuitous_arp"),
2600 
2601 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2602 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2603 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2604 					      "promote_secondaries"),
2605 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2606 					      "route_localnet"),
2607 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2608 					      "drop_unicast_in_l2_multicast"),
2609 	},
2610 };
2611 
2612 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2613 				     int ifindex, struct ipv4_devconf *p)
2614 {
2615 	int i;
2616 	struct devinet_sysctl_table *t;
2617 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2618 
2619 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
2620 	if (!t)
2621 		goto out;
2622 
2623 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars); i++) {
2624 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2625 		t->devinet_vars[i].extra1 = p;
2626 		t->devinet_vars[i].extra2 = net;
2627 	}
2628 
2629 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2630 
2631 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2632 	if (!t->sysctl_header)
2633 		goto free;
2634 
2635 	p->sysctl = t;
2636 
2637 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2638 				    ifindex, p);
2639 	return 0;
2640 
2641 free:
2642 	kfree(t);
2643 out:
2644 	return -ENOMEM;
2645 }
2646 
2647 static void __devinet_sysctl_unregister(struct net *net,
2648 					struct ipv4_devconf *cnf, int ifindex)
2649 {
2650 	struct devinet_sysctl_table *t = cnf->sysctl;
2651 
2652 	if (t) {
2653 		cnf->sysctl = NULL;
2654 		unregister_net_sysctl_table(t->sysctl_header);
2655 		kfree(t);
2656 	}
2657 
2658 	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2659 }
2660 
2661 static int devinet_sysctl_register(struct in_device *idev)
2662 {
2663 	int err;
2664 
2665 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2666 		return -EINVAL;
2667 
2668 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2669 	if (err)
2670 		return err;
2671 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2672 					idev->dev->ifindex, &idev->cnf);
2673 	if (err)
2674 		neigh_sysctl_unregister(idev->arp_parms);
2675 	return err;
2676 }
2677 
2678 static void devinet_sysctl_unregister(struct in_device *idev)
2679 {
2680 	struct net *net = dev_net(idev->dev);
2681 
2682 	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2683 	neigh_sysctl_unregister(idev->arp_parms);
2684 }
2685 
2686 static struct ctl_table ctl_forward_entry[] = {
2687 	{
2688 		.procname	= "ip_forward",
2689 		.data		= &ipv4_devconf.data[
2690 					IPV4_DEVCONF_FORWARDING - 1],
2691 		.maxlen		= sizeof(int),
2692 		.mode		= 0644,
2693 		.proc_handler	= devinet_sysctl_forward,
2694 		.extra1		= &ipv4_devconf,
2695 		.extra2		= &init_net,
2696 	},
2697 };
2698 #endif
2699 
2700 static __net_init int devinet_init_net(struct net *net)
2701 {
2702 #ifdef CONFIG_SYSCTL
2703 	struct ctl_table_header *forw_hdr;
2704 	struct ctl_table *tbl;
2705 #endif
2706 	struct ipv4_devconf *all, *dflt;
2707 	int err;
2708 	int i;
2709 
2710 	err = -ENOMEM;
2711 	net->ipv4.inet_addr_lst = kmalloc_array(IN4_ADDR_HSIZE,
2712 						sizeof(struct hlist_head),
2713 						GFP_KERNEL);
2714 	if (!net->ipv4.inet_addr_lst)
2715 		goto err_alloc_hash;
2716 
2717 	all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2718 	if (!all)
2719 		goto err_alloc_all;
2720 
2721 	dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2722 	if (!dflt)
2723 		goto err_alloc_dflt;
2724 
2725 #ifdef CONFIG_SYSCTL
2726 	tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2727 	if (!tbl)
2728 		goto err_alloc_ctl;
2729 
2730 	tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2731 	tbl[0].extra1 = all;
2732 	tbl[0].extra2 = net;
2733 #endif
2734 
2735 	if (!net_eq(net, &init_net)) {
2736 		switch (net_inherit_devconf()) {
2737 		case 3:
2738 			/* copy from the current netns */
2739 			memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
2740 			       sizeof(ipv4_devconf));
2741 			memcpy(dflt,
2742 			       current->nsproxy->net_ns->ipv4.devconf_dflt,
2743 			       sizeof(ipv4_devconf_dflt));
2744 			break;
2745 		case 0:
2746 		case 1:
2747 			/* copy from init_net */
2748 			memcpy(all, init_net.ipv4.devconf_all,
2749 			       sizeof(ipv4_devconf));
2750 			memcpy(dflt, init_net.ipv4.devconf_dflt,
2751 			       sizeof(ipv4_devconf_dflt));
2752 			break;
2753 		case 2:
2754 			/* use compiled values */
2755 			break;
2756 		}
2757 	}
2758 
2759 #ifdef CONFIG_SYSCTL
2760 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2761 	if (err < 0)
2762 		goto err_reg_all;
2763 
2764 	err = __devinet_sysctl_register(net, "default",
2765 					NETCONFA_IFINDEX_DEFAULT, dflt);
2766 	if (err < 0)
2767 		goto err_reg_dflt;
2768 
2769 	err = -ENOMEM;
2770 	forw_hdr = register_net_sysctl_sz(net, "net/ipv4", tbl,
2771 					  ARRAY_SIZE(ctl_forward_entry));
2772 	if (!forw_hdr)
2773 		goto err_reg_ctl;
2774 	net->ipv4.forw_hdr = forw_hdr;
2775 #endif
2776 
2777 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2778 		INIT_HLIST_HEAD(&net->ipv4.inet_addr_lst[i]);
2779 
2780 	INIT_DEFERRABLE_WORK(&net->ipv4.addr_chk_work, check_lifetime);
2781 
2782 	net->ipv4.devconf_all = all;
2783 	net->ipv4.devconf_dflt = dflt;
2784 	return 0;
2785 
2786 #ifdef CONFIG_SYSCTL
2787 err_reg_ctl:
2788 	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2789 err_reg_dflt:
2790 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2791 err_reg_all:
2792 	kfree(tbl);
2793 err_alloc_ctl:
2794 #endif
2795 	kfree(dflt);
2796 err_alloc_dflt:
2797 	kfree(all);
2798 err_alloc_all:
2799 	kfree(net->ipv4.inet_addr_lst);
2800 err_alloc_hash:
2801 	return err;
2802 }
2803 
2804 static __net_exit void devinet_exit_net(struct net *net)
2805 {
2806 #ifdef CONFIG_SYSCTL
2807 	const struct ctl_table *tbl;
2808 #endif
2809 
2810 	cancel_delayed_work_sync(&net->ipv4.addr_chk_work);
2811 
2812 #ifdef CONFIG_SYSCTL
2813 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2814 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2815 	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2816 				    NETCONFA_IFINDEX_DEFAULT);
2817 	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2818 				    NETCONFA_IFINDEX_ALL);
2819 	kfree(tbl);
2820 #endif
2821 	kfree(net->ipv4.devconf_dflt);
2822 	kfree(net->ipv4.devconf_all);
2823 	kfree(net->ipv4.inet_addr_lst);
2824 }
2825 
2826 static __net_initdata struct pernet_operations devinet_ops = {
2827 	.init = devinet_init_net,
2828 	.exit = devinet_exit_net,
2829 };
2830 
2831 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2832 	.family		  = AF_INET,
2833 	.fill_link_af	  = inet_fill_link_af,
2834 	.get_link_af_size = inet_get_link_af_size,
2835 	.validate_link_af = inet_validate_link_af,
2836 	.set_link_af	  = inet_set_link_af,
2837 };
2838 
2839 static const struct rtnl_msg_handler devinet_rtnl_msg_handlers[] __initconst = {
2840 	{.protocol = PF_INET, .msgtype = RTM_NEWADDR, .doit = inet_rtm_newaddr,
2841 	 .flags = RTNL_FLAG_DOIT_PERNET},
2842 	{.protocol = PF_INET, .msgtype = RTM_DELADDR, .doit = inet_rtm_deladdr,
2843 	 .flags = RTNL_FLAG_DOIT_PERNET},
2844 	{.protocol = PF_INET, .msgtype = RTM_GETADDR, .dumpit = inet_dump_ifaddr,
2845 	 .flags = RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE},
2846 	{.protocol = PF_INET, .msgtype = RTM_GETNETCONF,
2847 	 .doit = inet_netconf_get_devconf, .dumpit = inet_netconf_dump_devconf,
2848 	 .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
2849 };
2850 
2851 void __init devinet_init(void)
2852 {
2853 	register_pernet_subsys(&devinet_ops);
2854 	register_netdevice_notifier(&ip_netdev_notifier);
2855 
2856 	if (rtnl_af_register(&inet_af_ops))
2857 		panic("Unable to register inet_af_ops\n");
2858 
2859 	rtnl_register_many(devinet_rtnl_msg_handlers);
2860 }
2861