xref: /linux/net/ipv4/devinet.c (revision 17e548405a81665fd14cee960db7d093d1396400)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	NET3	IP device support routines.
4  *
5  *	Derived from the IP parts of dev.c 1.0.19
6  * 		Authors:	Ross Biro
7  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
9  *
10  *	Additional Authors:
11  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
12  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13  *
14  *	Changes:
15  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
16  *					lists.
17  *		Cyrus Durgin:		updated for kmod
18  *		Matthias Andree:	in devinet_ioctl, compare label and
19  *					address (4.4BSD alias style support),
20  *					fall back to comparing just the label
21  *					if no match found.
22  */
23 
24 
25 #include <linux/uaccess.h>
26 #include <linux/bitops.h>
27 #include <linux/capability.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/mm.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/in.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/if_addr.h>
40 #include <linux/if_ether.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/etherdevice.h>
44 #include <linux/skbuff.h>
45 #include <linux/init.h>
46 #include <linux/notifier.h>
47 #include <linux/inetdevice.h>
48 #include <linux/igmp.h>
49 #include "igmp_internal.h"
50 #include <linux/slab.h>
51 #include <linux/hash.h>
52 #ifdef CONFIG_SYSCTL
53 #include <linux/sysctl.h>
54 #endif
55 #include <linux/kmod.h>
56 #include <linux/netconf.h>
57 
58 #include <net/arp.h>
59 #include <net/ip.h>
60 #include <net/route.h>
61 #include <net/ip_fib.h>
62 #include <net/rtnetlink.h>
63 #include <net/net_namespace.h>
64 #include <net/addrconf.h>
65 
66 #define IPV6ONLY_FLAGS	\
67 		(IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
68 		 IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
69 		 IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
70 
71 static struct ipv4_devconf ipv4_devconf = {
72 	.data = {
73 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
75 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
76 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
77 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
78 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
79 		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
80 	},
81 };
82 
83 static struct ipv4_devconf ipv4_devconf_dflt = {
84 	.data = {
85 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
86 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
87 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
88 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
89 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
90 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
91 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
92 		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
93 	},
94 };
95 
96 #define IPV4_DEVCONF_DFLT(net, attr) \
97 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
98 
99 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
100 	[IFA_LOCAL]     	= { .type = NLA_U32 },
101 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
102 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
103 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
104 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
105 	[IFA_FLAGS]		= { .type = NLA_U32 },
106 	[IFA_RT_PRIORITY]	= { .type = NLA_U32 },
107 	[IFA_TARGET_NETNSID]	= { .type = NLA_S32 },
108 	[IFA_PROTO]		= { .type = NLA_U8 },
109 };
110 
111 #define IN4_ADDR_HSIZE_SHIFT	8
112 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
113 
114 static u32 inet_addr_hash(const struct net *net, __be32 addr)
115 {
116 	u32 val = __ipv4_addr_hash(addr, net_hash_mix(net));
117 
118 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
119 }
120 
121 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
122 {
123 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
124 
125 	ASSERT_RTNL();
126 	hlist_add_head_rcu(&ifa->addr_lst, &net->ipv4.inet_addr_lst[hash]);
127 }
128 
129 static void inet_hash_remove(struct in_ifaddr *ifa)
130 {
131 	ASSERT_RTNL();
132 	hlist_del_init_rcu(&ifa->addr_lst);
133 }
134 
135 /**
136  * __ip_dev_find - find the first device with a given source address.
137  * @net: the net namespace
138  * @addr: the source address
139  * @devref: if true, take a reference on the found device
140  *
141  * If a caller uses devref=false, it should be protected by RCU, or RTNL
142  */
143 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
144 {
145 	struct net_device *result = NULL;
146 	struct in_ifaddr *ifa;
147 
148 	rcu_read_lock();
149 	ifa = inet_lookup_ifaddr_rcu(net, addr);
150 	if (!ifa) {
151 		struct flowi4 fl4 = { .daddr = addr };
152 		struct fib_result res = { 0 };
153 		struct fib_table *local;
154 
155 		/* Fallback to FIB local table so that communication
156 		 * over loopback subnets work.
157 		 */
158 		local = fib_get_table(net, RT_TABLE_LOCAL);
159 		if (local &&
160 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
161 		    res.type == RTN_LOCAL)
162 			result = FIB_RES_DEV(res);
163 	} else {
164 		result = ifa->ifa_dev->dev;
165 	}
166 	if (result && devref)
167 		dev_hold(result);
168 	rcu_read_unlock();
169 	return result;
170 }
171 EXPORT_SYMBOL(__ip_dev_find);
172 
173 /* called under RCU lock */
174 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
175 {
176 	u32 hash = inet_addr_hash(net, addr);
177 	struct in_ifaddr *ifa;
178 
179 	hlist_for_each_entry_rcu(ifa, &net->ipv4.inet_addr_lst[hash], addr_lst)
180 		if (ifa->ifa_local == addr)
181 			return ifa;
182 
183 	return NULL;
184 }
185 
186 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
187 
188 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
189 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
190 static void inet_del_ifa(struct in_device *in_dev,
191 			 struct in_ifaddr __rcu **ifap,
192 			 int destroy);
193 #ifdef CONFIG_SYSCTL
194 static int devinet_sysctl_register(struct in_device *idev);
195 static void devinet_sysctl_unregister(struct in_device *idev);
196 #else
197 static int devinet_sysctl_register(struct in_device *idev)
198 {
199 	return 0;
200 }
201 static void devinet_sysctl_unregister(struct in_device *idev)
202 {
203 }
204 #endif
205 
206 /* Locks all the inet devices. */
207 
208 static struct in_ifaddr *inet_alloc_ifa(struct in_device *in_dev)
209 {
210 	struct in_ifaddr *ifa;
211 
212 	ifa = kzalloc(sizeof(*ifa), GFP_KERNEL_ACCOUNT);
213 	if (!ifa)
214 		return NULL;
215 
216 	in_dev_hold(in_dev);
217 	ifa->ifa_dev = in_dev;
218 
219 	INIT_HLIST_NODE(&ifa->addr_lst);
220 
221 	return ifa;
222 }
223 
224 static void inet_rcu_free_ifa(struct rcu_head *head)
225 {
226 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
227 
228 	in_dev_put(ifa->ifa_dev);
229 	kfree(ifa);
230 }
231 
232 static void inet_free_ifa(struct in_ifaddr *ifa)
233 {
234 	/* Our reference to ifa->ifa_dev must be freed ASAP
235 	 * to release the reference to the netdev the same way.
236 	 * in_dev_put() -> in_dev_finish_destroy() -> netdev_put()
237 	 */
238 	call_rcu_hurry(&ifa->rcu_head, inet_rcu_free_ifa);
239 }
240 
241 static void in_dev_free_rcu(struct rcu_head *head)
242 {
243 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
244 
245 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
246 	kfree(idev);
247 }
248 
249 void in_dev_finish_destroy(struct in_device *idev)
250 {
251 	struct net_device *dev = idev->dev;
252 
253 	WARN_ON(idev->ifa_list);
254 	WARN_ON(idev->mc_list);
255 #ifdef NET_REFCNT_DEBUG
256 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
257 #endif
258 	netdev_put(dev, &idev->dev_tracker);
259 	if (!idev->dead)
260 		pr_err("Freeing alive in_device %p\n", idev);
261 	else
262 		call_rcu(&idev->rcu_head, in_dev_free_rcu);
263 }
264 EXPORT_SYMBOL(in_dev_finish_destroy);
265 
266 static struct in_device *inetdev_init(struct net_device *dev)
267 {
268 	struct in_device *in_dev;
269 	int err = -ENOMEM;
270 
271 	ASSERT_RTNL();
272 
273 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
274 	if (!in_dev)
275 		goto out;
276 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
277 			sizeof(in_dev->cnf));
278 	in_dev->cnf.sysctl = NULL;
279 	in_dev->dev = dev;
280 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
281 	if (!in_dev->arp_parms)
282 		goto out_kfree;
283 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
284 		netif_disable_lro(dev);
285 	/* Reference in_dev->dev */
286 	netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
287 	/* Account for reference dev->ip_ptr (below) */
288 	refcount_set(&in_dev->refcnt, 1);
289 
290 	if (dev != blackhole_netdev) {
291 		err = devinet_sysctl_register(in_dev);
292 		if (err) {
293 			in_dev->dead = 1;
294 			neigh_parms_release(&arp_tbl, in_dev->arp_parms);
295 			in_dev_put(in_dev);
296 			in_dev = NULL;
297 			goto out;
298 		}
299 		ip_mc_init_dev(in_dev);
300 		if (dev->flags & IFF_UP)
301 			ip_mc_up(in_dev);
302 	}
303 
304 	/* we can receive as soon as ip_ptr is set -- do this last */
305 	rcu_assign_pointer(dev->ip_ptr, in_dev);
306 out:
307 	return in_dev ?: ERR_PTR(err);
308 out_kfree:
309 	kfree(in_dev);
310 	in_dev = NULL;
311 	goto out;
312 }
313 
314 static void inetdev_destroy(struct in_device *in_dev)
315 {
316 	struct net_device *dev;
317 	struct in_ifaddr *ifa;
318 
319 	ASSERT_RTNL();
320 
321 	dev = in_dev->dev;
322 
323 	in_dev->dead = 1;
324 
325 	ip_mc_destroy_dev(in_dev);
326 
327 	while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
328 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
329 		inet_free_ifa(ifa);
330 	}
331 
332 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
333 
334 	devinet_sysctl_unregister(in_dev);
335 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
336 	arp_ifdown(dev);
337 
338 	in_dev_put(in_dev);
339 }
340 
341 static int __init inet_blackhole_dev_init(void)
342 {
343 	int err = 0;
344 
345 	rtnl_lock();
346 	if (!inetdev_init(blackhole_netdev))
347 		err = -ENOMEM;
348 	rtnl_unlock();
349 
350 	return err;
351 }
352 late_initcall(inet_blackhole_dev_init);
353 
354 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
355 {
356 	const struct in_ifaddr *ifa;
357 
358 	rcu_read_lock();
359 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
360 		if (inet_ifa_match(a, ifa)) {
361 			if (!b || inet_ifa_match(b, ifa)) {
362 				rcu_read_unlock();
363 				return 1;
364 			}
365 		}
366 	}
367 	rcu_read_unlock();
368 	return 0;
369 }
370 
371 static void __inet_del_ifa(struct in_device *in_dev,
372 			   struct in_ifaddr __rcu **ifap,
373 			   int destroy, struct nlmsghdr *nlh, u32 portid)
374 {
375 	struct in_ifaddr *promote = NULL;
376 	struct in_ifaddr *ifa, *ifa1;
377 	struct in_ifaddr __rcu **last_prim;
378 	struct in_ifaddr *prev_prom = NULL;
379 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
380 
381 	ASSERT_RTNL();
382 
383 	ifa1 = rtnl_dereference(*ifap);
384 	last_prim = ifap;
385 	if (in_dev->dead)
386 		goto no_promotions;
387 
388 	/* 1. Deleting primary ifaddr forces deletion all secondaries
389 	 * unless alias promotion is set
390 	 **/
391 
392 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
393 		struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
394 
395 		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
396 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
397 			    ifa1->ifa_scope <= ifa->ifa_scope)
398 				last_prim = &ifa->ifa_next;
399 
400 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
401 			    ifa1->ifa_mask != ifa->ifa_mask ||
402 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
403 				ifap1 = &ifa->ifa_next;
404 				prev_prom = ifa;
405 				continue;
406 			}
407 
408 			if (!do_promote) {
409 				inet_hash_remove(ifa);
410 				*ifap1 = ifa->ifa_next;
411 
412 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
413 				blocking_notifier_call_chain(&inetaddr_chain,
414 						NETDEV_DOWN, ifa);
415 				inet_free_ifa(ifa);
416 			} else {
417 				promote = ifa;
418 				break;
419 			}
420 		}
421 	}
422 
423 	/* On promotion all secondaries from subnet are changing
424 	 * the primary IP, we must remove all their routes silently
425 	 * and later to add them back with new prefsrc. Do this
426 	 * while all addresses are on the device list.
427 	 */
428 	for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
429 		if (ifa1->ifa_mask == ifa->ifa_mask &&
430 		    inet_ifa_match(ifa1->ifa_address, ifa))
431 			fib_del_ifaddr(ifa, ifa1);
432 	}
433 
434 no_promotions:
435 	/* 2. Unlink it */
436 
437 	*ifap = ifa1->ifa_next;
438 	inet_hash_remove(ifa1);
439 
440 	/* 3. Announce address deletion */
441 
442 	/* Send message first, then call notifier.
443 	   At first sight, FIB update triggered by notifier
444 	   will refer to already deleted ifaddr, that could confuse
445 	   netlink listeners. It is not true: look, gated sees
446 	   that route deleted and if it still thinks that ifaddr
447 	   is valid, it will try to restore deleted routes... Grr.
448 	   So that, this order is correct.
449 	 */
450 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
451 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
452 
453 	if (promote) {
454 		struct in_ifaddr *next_sec;
455 
456 		next_sec = rtnl_dereference(promote->ifa_next);
457 		if (prev_prom) {
458 			struct in_ifaddr *last_sec;
459 
460 			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
461 
462 			last_sec = rtnl_dereference(*last_prim);
463 			rcu_assign_pointer(promote->ifa_next, last_sec);
464 			rcu_assign_pointer(*last_prim, promote);
465 		}
466 
467 		promote->ifa_flags &= ~IFA_F_SECONDARY;
468 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
469 		blocking_notifier_call_chain(&inetaddr_chain,
470 				NETDEV_UP, promote);
471 		for (ifa = next_sec; ifa;
472 		     ifa = rtnl_dereference(ifa->ifa_next)) {
473 			if (ifa1->ifa_mask != ifa->ifa_mask ||
474 			    !inet_ifa_match(ifa1->ifa_address, ifa))
475 					continue;
476 			fib_add_ifaddr(ifa);
477 		}
478 
479 	}
480 	if (destroy)
481 		inet_free_ifa(ifa1);
482 }
483 
484 static void inet_del_ifa(struct in_device *in_dev,
485 			 struct in_ifaddr __rcu **ifap,
486 			 int destroy)
487 {
488 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
489 }
490 
491 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
492 			     u32 portid, struct netlink_ext_ack *extack)
493 {
494 	struct in_ifaddr __rcu **last_primary, **ifap;
495 	struct in_device *in_dev = ifa->ifa_dev;
496 	struct net *net = dev_net(in_dev->dev);
497 	struct in_validator_info ivi;
498 	struct in_ifaddr *ifa1;
499 	int ret;
500 
501 	ASSERT_RTNL();
502 
503 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
504 	last_primary = &in_dev->ifa_list;
505 
506 	/* Don't set IPv6 only flags to IPv4 addresses */
507 	ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
508 
509 	ifap = &in_dev->ifa_list;
510 	ifa1 = rtnl_dereference(*ifap);
511 
512 	while (ifa1) {
513 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
514 		    ifa->ifa_scope <= ifa1->ifa_scope)
515 			last_primary = &ifa1->ifa_next;
516 		if (ifa1->ifa_mask == ifa->ifa_mask &&
517 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
518 			if (ifa1->ifa_local == ifa->ifa_local) {
519 				inet_free_ifa(ifa);
520 				return -EEXIST;
521 			}
522 			if (ifa1->ifa_scope != ifa->ifa_scope) {
523 				NL_SET_ERR_MSG(extack, "ipv4: Invalid scope value");
524 				inet_free_ifa(ifa);
525 				return -EINVAL;
526 			}
527 			ifa->ifa_flags |= IFA_F_SECONDARY;
528 		}
529 
530 		ifap = &ifa1->ifa_next;
531 		ifa1 = rtnl_dereference(*ifap);
532 	}
533 
534 	/* Allow any devices that wish to register ifaddr validtors to weigh
535 	 * in now, before changes are committed.  The rntl lock is serializing
536 	 * access here, so the state should not change between a validator call
537 	 * and a final notify on commit.  This isn't invoked on promotion under
538 	 * the assumption that validators are checking the address itself, and
539 	 * not the flags.
540 	 */
541 	ivi.ivi_addr = ifa->ifa_address;
542 	ivi.ivi_dev = ifa->ifa_dev;
543 	ivi.extack = extack;
544 	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
545 					   NETDEV_UP, &ivi);
546 	ret = notifier_to_errno(ret);
547 	if (ret) {
548 		inet_free_ifa(ifa);
549 		return ret;
550 	}
551 
552 	if (!(ifa->ifa_flags & IFA_F_SECONDARY))
553 		ifap = last_primary;
554 
555 	rcu_assign_pointer(ifa->ifa_next, *ifap);
556 	rcu_assign_pointer(*ifap, ifa);
557 
558 	inet_hash_insert(dev_net(in_dev->dev), ifa);
559 
560 	cancel_delayed_work(&net->ipv4.addr_chk_work);
561 	queue_delayed_work(system_power_efficient_wq, &net->ipv4.addr_chk_work, 0);
562 
563 	/* Send message first, then call notifier.
564 	   Notifier will trigger FIB update, so that
565 	   listeners of netlink will know about new ifaddr */
566 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
567 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
568 
569 	return 0;
570 }
571 
572 static int inet_insert_ifa(struct in_ifaddr *ifa)
573 {
574 	if (!ifa->ifa_local) {
575 		inet_free_ifa(ifa);
576 		return 0;
577 	}
578 
579 	return __inet_insert_ifa(ifa, NULL, 0, NULL);
580 }
581 
582 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
583 {
584 	struct in_device *in_dev = __in_dev_get_rtnl_net(dev);
585 
586 	ipv4_devconf_setall(in_dev);
587 	neigh_parms_data_state_setall(in_dev->arp_parms);
588 
589 	if (ipv4_is_loopback(ifa->ifa_local))
590 		ifa->ifa_scope = RT_SCOPE_HOST;
591 	return inet_insert_ifa(ifa);
592 }
593 
594 /* Caller must hold RCU or RTNL :
595  * We dont take a reference on found in_device
596  */
597 struct in_device *inetdev_by_index(struct net *net, int ifindex)
598 {
599 	struct net_device *dev;
600 	struct in_device *in_dev = NULL;
601 
602 	rcu_read_lock();
603 	dev = dev_get_by_index_rcu(net, ifindex);
604 	if (dev)
605 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
606 	rcu_read_unlock();
607 	return in_dev;
608 }
609 EXPORT_SYMBOL(inetdev_by_index);
610 
611 /* Called only from RTNL semaphored context. No locks. */
612 
613 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
614 				    __be32 mask)
615 {
616 	struct in_ifaddr *ifa;
617 
618 	ASSERT_RTNL();
619 
620 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
621 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
622 			return ifa;
623 	}
624 	return NULL;
625 }
626 
627 static int ip_mc_autojoin_config(struct net *net, bool join,
628 				 const struct in_ifaddr *ifa)
629 {
630 #if defined(CONFIG_IP_MULTICAST)
631 	struct ip_mreqn mreq = {
632 		.imr_multiaddr.s_addr = ifa->ifa_address,
633 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
634 	};
635 	struct sock *sk = net->ipv4.mc_autojoin_sk;
636 	int ret;
637 
638 	ASSERT_RTNL_NET(net);
639 
640 	lock_sock(sk);
641 	if (join)
642 		ret = ip_mc_join_group(sk, &mreq);
643 	else
644 		ret = ip_mc_leave_group(sk, &mreq);
645 	release_sock(sk);
646 
647 	return ret;
648 #else
649 	return -EOPNOTSUPP;
650 #endif
651 }
652 
653 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
654 			    struct netlink_ext_ack *extack)
655 {
656 	struct net *net = sock_net(skb->sk);
657 	struct in_ifaddr __rcu **ifap;
658 	struct nlattr *tb[IFA_MAX+1];
659 	struct in_device *in_dev;
660 	struct ifaddrmsg *ifm;
661 	struct in_ifaddr *ifa;
662 	int err;
663 
664 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
665 				     ifa_ipv4_policy, extack);
666 	if (err < 0)
667 		goto out;
668 
669 	ifm = nlmsg_data(nlh);
670 
671 	rtnl_net_lock(net);
672 
673 	in_dev = inetdev_by_index(net, ifm->ifa_index);
674 	if (!in_dev) {
675 		NL_SET_ERR_MSG(extack, "ipv4: Device not found");
676 		err = -ENODEV;
677 		goto unlock;
678 	}
679 
680 	for (ifap = &in_dev->ifa_list;
681 	     (ifa = rtnl_net_dereference(net, *ifap)) != NULL;
682 	     ifap = &ifa->ifa_next) {
683 		if (tb[IFA_LOCAL] &&
684 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
685 			continue;
686 
687 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
688 			continue;
689 
690 		if (tb[IFA_ADDRESS] &&
691 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
692 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
693 			continue;
694 
695 		if (ipv4_is_multicast(ifa->ifa_address))
696 			ip_mc_autojoin_config(net, false, ifa);
697 
698 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
699 		goto unlock;
700 	}
701 
702 	NL_SET_ERR_MSG(extack, "ipv4: Address not found");
703 	err = -EADDRNOTAVAIL;
704 unlock:
705 	rtnl_net_unlock(net);
706 out:
707 	return err;
708 }
709 
710 static void check_lifetime(struct work_struct *work)
711 {
712 	unsigned long now, next, next_sec, next_sched;
713 	struct in_ifaddr *ifa;
714 	struct hlist_node *n;
715 	struct net *net;
716 	int i;
717 
718 	net = container_of(to_delayed_work(work), struct net, ipv4.addr_chk_work);
719 	now = jiffies;
720 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
721 
722 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
723 		struct hlist_head *head = &net->ipv4.inet_addr_lst[i];
724 		bool change_needed = false;
725 
726 		rcu_read_lock();
727 		hlist_for_each_entry_rcu(ifa, head, addr_lst) {
728 			unsigned long age, tstamp;
729 			u32 preferred_lft;
730 			u32 valid_lft;
731 			u32 flags;
732 
733 			flags = READ_ONCE(ifa->ifa_flags);
734 			if (flags & IFA_F_PERMANENT)
735 				continue;
736 
737 			preferred_lft = READ_ONCE(ifa->ifa_preferred_lft);
738 			valid_lft = READ_ONCE(ifa->ifa_valid_lft);
739 			tstamp = READ_ONCE(ifa->ifa_tstamp);
740 			/* We try to batch several events at once. */
741 			age = (now - tstamp +
742 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
743 
744 			if (valid_lft != INFINITY_LIFE_TIME &&
745 			    age >= valid_lft) {
746 				change_needed = true;
747 			} else if (preferred_lft ==
748 				   INFINITY_LIFE_TIME) {
749 				continue;
750 			} else if (age >= preferred_lft) {
751 				if (time_before(tstamp + valid_lft * HZ, next))
752 					next = tstamp + valid_lft * HZ;
753 
754 				if (!(flags & IFA_F_DEPRECATED))
755 					change_needed = true;
756 			} else if (time_before(tstamp + preferred_lft * HZ,
757 					       next)) {
758 				next = tstamp + preferred_lft * HZ;
759 			}
760 		}
761 		rcu_read_unlock();
762 		if (!change_needed)
763 			continue;
764 
765 		rtnl_net_lock(net);
766 		hlist_for_each_entry_safe(ifa, n, head, addr_lst) {
767 			unsigned long age;
768 
769 			if (ifa->ifa_flags & IFA_F_PERMANENT)
770 				continue;
771 
772 			/* We try to batch several events at once. */
773 			age = (now - ifa->ifa_tstamp +
774 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
775 
776 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
777 			    age >= ifa->ifa_valid_lft) {
778 				struct in_ifaddr __rcu **ifap;
779 				struct in_ifaddr *tmp;
780 
781 				ifap = &ifa->ifa_dev->ifa_list;
782 				tmp = rtnl_net_dereference(net, *ifap);
783 				while (tmp) {
784 					if (tmp == ifa) {
785 						inet_del_ifa(ifa->ifa_dev,
786 							     ifap, 1);
787 						break;
788 					}
789 					ifap = &tmp->ifa_next;
790 					tmp = rtnl_net_dereference(net, *ifap);
791 				}
792 			} else if (ifa->ifa_preferred_lft !=
793 				   INFINITY_LIFE_TIME &&
794 				   age >= ifa->ifa_preferred_lft &&
795 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
796 				ifa->ifa_flags |= IFA_F_DEPRECATED;
797 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
798 			}
799 		}
800 		rtnl_net_unlock(net);
801 	}
802 
803 	next_sec = round_jiffies_up(next);
804 	next_sched = next;
805 
806 	/* If rounded timeout is accurate enough, accept it. */
807 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
808 		next_sched = next_sec;
809 
810 	now = jiffies;
811 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
812 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
813 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
814 
815 	queue_delayed_work(system_power_efficient_wq, &net->ipv4.addr_chk_work,
816 			   next_sched - now);
817 }
818 
819 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
820 			     __u32 prefered_lft)
821 {
822 	unsigned long timeout;
823 	u32 flags;
824 
825 	flags = ifa->ifa_flags & ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
826 
827 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
828 	if (addrconf_finite_timeout(timeout))
829 		WRITE_ONCE(ifa->ifa_valid_lft, timeout);
830 	else
831 		flags |= IFA_F_PERMANENT;
832 
833 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
834 	if (addrconf_finite_timeout(timeout)) {
835 		if (timeout == 0)
836 			flags |= IFA_F_DEPRECATED;
837 		WRITE_ONCE(ifa->ifa_preferred_lft, timeout);
838 	}
839 	WRITE_ONCE(ifa->ifa_flags, flags);
840 	WRITE_ONCE(ifa->ifa_tstamp, jiffies);
841 	if (!ifa->ifa_cstamp)
842 		WRITE_ONCE(ifa->ifa_cstamp, ifa->ifa_tstamp);
843 }
844 
845 static int inet_validate_rtm(struct nlmsghdr *nlh, struct nlattr **tb,
846 			     struct netlink_ext_ack *extack,
847 			     __u32 *valid_lft, __u32 *prefered_lft)
848 {
849 	struct ifaddrmsg *ifm = nlmsg_data(nlh);
850 	int err;
851 
852 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
853 				     ifa_ipv4_policy, extack);
854 	if (err < 0)
855 		return err;
856 
857 	if (ifm->ifa_prefixlen > 32) {
858 		NL_SET_ERR_MSG(extack, "ipv4: Invalid prefix length");
859 		return -EINVAL;
860 	}
861 
862 	if (!tb[IFA_LOCAL]) {
863 		NL_SET_ERR_MSG(extack, "ipv4: Local address is not supplied");
864 		return -EINVAL;
865 	}
866 
867 	if (tb[IFA_CACHEINFO]) {
868 		struct ifa_cacheinfo *ci;
869 
870 		ci = nla_data(tb[IFA_CACHEINFO]);
871 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
872 			NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid");
873 			return -EINVAL;
874 		}
875 
876 		*valid_lft = ci->ifa_valid;
877 		*prefered_lft = ci->ifa_prefered;
878 	}
879 
880 	return 0;
881 }
882 
883 static struct in_ifaddr *inet_rtm_to_ifa(struct net *net, struct nlmsghdr *nlh,
884 					 struct nlattr **tb,
885 					 struct netlink_ext_ack *extack)
886 {
887 	struct ifaddrmsg *ifm = nlmsg_data(nlh);
888 	struct in_device *in_dev;
889 	struct net_device *dev;
890 	struct in_ifaddr *ifa;
891 	int err;
892 
893 	dev = __dev_get_by_index(net, ifm->ifa_index);
894 	err = -ENODEV;
895 	if (!dev) {
896 		NL_SET_ERR_MSG(extack, "ipv4: Device not found");
897 		goto errout;
898 	}
899 
900 	in_dev = __in_dev_get_rtnl_net(dev);
901 	err = -ENOBUFS;
902 	if (!in_dev)
903 		goto errout;
904 
905 	ifa = inet_alloc_ifa(in_dev);
906 	if (!ifa)
907 		/*
908 		 * A potential indev allocation can be left alive, it stays
909 		 * assigned to its device and is destroy with it.
910 		 */
911 		goto errout;
912 
913 	ipv4_devconf_setall(in_dev);
914 	neigh_parms_data_state_setall(in_dev->arp_parms);
915 
916 	if (!tb[IFA_ADDRESS])
917 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
918 
919 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
920 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
921 	ifa->ifa_flags = nla_get_u32_default(tb[IFA_FLAGS], ifm->ifa_flags);
922 	ifa->ifa_scope = ifm->ifa_scope;
923 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
924 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
925 
926 	if (tb[IFA_BROADCAST])
927 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
928 
929 	if (tb[IFA_LABEL])
930 		nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
931 	else
932 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
933 
934 	if (tb[IFA_RT_PRIORITY])
935 		ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
936 
937 	if (tb[IFA_PROTO])
938 		ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
939 
940 	return ifa;
941 
942 errout:
943 	return ERR_PTR(err);
944 }
945 
946 static struct in_ifaddr *find_matching_ifa(struct net *net, struct in_ifaddr *ifa)
947 {
948 	struct in_device *in_dev = ifa->ifa_dev;
949 	struct in_ifaddr *ifa1;
950 
951 	in_dev_for_each_ifa_rtnl_net(net, ifa1, in_dev) {
952 		if (ifa1->ifa_mask == ifa->ifa_mask &&
953 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
954 		    ifa1->ifa_local == ifa->ifa_local)
955 			return ifa1;
956 	}
957 
958 	return NULL;
959 }
960 
961 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
962 			    struct netlink_ext_ack *extack)
963 {
964 	__u32 prefered_lft = INFINITY_LIFE_TIME;
965 	__u32 valid_lft = INFINITY_LIFE_TIME;
966 	struct net *net = sock_net(skb->sk);
967 	struct in_ifaddr *ifa_existing;
968 	struct nlattr *tb[IFA_MAX + 1];
969 	struct in_ifaddr *ifa;
970 	int ret;
971 
972 	ret = inet_validate_rtm(nlh, tb, extack, &valid_lft, &prefered_lft);
973 	if (ret < 0)
974 		return ret;
975 
976 	if (!nla_get_in_addr(tb[IFA_LOCAL]))
977 		return 0;
978 
979 	rtnl_net_lock(net);
980 
981 	ifa = inet_rtm_to_ifa(net, nlh, tb, extack);
982 	if (IS_ERR(ifa)) {
983 		ret = PTR_ERR(ifa);
984 		goto unlock;
985 	}
986 
987 	ifa_existing = find_matching_ifa(net, ifa);
988 	if (!ifa_existing) {
989 		/* It would be best to check for !NLM_F_CREATE here but
990 		 * userspace already relies on not having to provide this.
991 		 */
992 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
993 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
994 			ret = ip_mc_autojoin_config(net, true, ifa);
995 			if (ret < 0) {
996 				NL_SET_ERR_MSG(extack, "ipv4: Multicast auto join failed");
997 				inet_free_ifa(ifa);
998 				goto unlock;
999 			}
1000 		}
1001 
1002 		ret = __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid, extack);
1003 	} else {
1004 		u32 new_metric = ifa->ifa_rt_priority;
1005 		u8 new_proto = ifa->ifa_proto;
1006 
1007 		inet_free_ifa(ifa);
1008 
1009 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
1010 		    !(nlh->nlmsg_flags & NLM_F_REPLACE)) {
1011 			NL_SET_ERR_MSG(extack, "ipv4: Address already assigned");
1012 			ret = -EEXIST;
1013 			goto unlock;
1014 		}
1015 		ifa = ifa_existing;
1016 
1017 		if (ifa->ifa_rt_priority != new_metric) {
1018 			fib_modify_prefix_metric(ifa, new_metric);
1019 			ifa->ifa_rt_priority = new_metric;
1020 		}
1021 
1022 		ifa->ifa_proto = new_proto;
1023 
1024 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
1025 		cancel_delayed_work(&net->ipv4.addr_chk_work);
1026 		queue_delayed_work(system_power_efficient_wq,
1027 				   &net->ipv4.addr_chk_work, 0);
1028 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
1029 	}
1030 
1031 unlock:
1032 	rtnl_net_unlock(net);
1033 
1034 	return ret;
1035 }
1036 
1037 /*
1038  *	Determine a default network mask, based on the IP address.
1039  */
1040 
1041 static int inet_abc_len(__be32 addr)
1042 {
1043 	int rc = -1;	/* Something else, probably a multicast. */
1044 
1045 	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
1046 		rc = 0;
1047 	else {
1048 		__u32 haddr = ntohl(addr);
1049 		if (IN_CLASSA(haddr))
1050 			rc = 8;
1051 		else if (IN_CLASSB(haddr))
1052 			rc = 16;
1053 		else if (IN_CLASSC(haddr))
1054 			rc = 24;
1055 		else if (IN_CLASSE(haddr))
1056 			rc = 32;
1057 	}
1058 
1059 	return rc;
1060 }
1061 
1062 
1063 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1064 {
1065 	struct sockaddr_in sin_orig;
1066 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1067 	struct in_ifaddr __rcu **ifap = NULL;
1068 	struct in_device *in_dev;
1069 	struct in_ifaddr *ifa = NULL;
1070 	struct net_device *dev;
1071 	char *colon;
1072 	int ret = -EFAULT;
1073 	int tryaddrmatch = 0;
1074 
1075 	ifr->ifr_name[IFNAMSIZ - 1] = 0;
1076 
1077 	/* save original address for comparison */
1078 	memcpy(&sin_orig, sin, sizeof(*sin));
1079 
1080 	colon = strchr(ifr->ifr_name, ':');
1081 	if (colon)
1082 		*colon = 0;
1083 
1084 	dev_load(net, ifr->ifr_name);
1085 
1086 	switch (cmd) {
1087 	case SIOCGIFADDR:	/* Get interface address */
1088 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1089 	case SIOCGIFDSTADDR:	/* Get the destination address */
1090 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1091 		/* Note that these ioctls will not sleep,
1092 		   so that we do not impose a lock.
1093 		   One day we will be forced to put shlock here (I mean SMP)
1094 		 */
1095 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
1096 		memset(sin, 0, sizeof(*sin));
1097 		sin->sin_family = AF_INET;
1098 		break;
1099 
1100 	case SIOCSIFFLAGS:
1101 		ret = -EPERM;
1102 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1103 			goto out;
1104 		break;
1105 	case SIOCSIFADDR:	/* Set interface address (and family) */
1106 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1107 	case SIOCSIFDSTADDR:	/* Set the destination address */
1108 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1109 		ret = -EPERM;
1110 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1111 			goto out;
1112 		ret = -EINVAL;
1113 		if (sin->sin_family != AF_INET)
1114 			goto out;
1115 		break;
1116 	default:
1117 		ret = -EINVAL;
1118 		goto out;
1119 	}
1120 
1121 	rtnl_net_lock(net);
1122 
1123 	ret = -ENODEV;
1124 	dev = __dev_get_by_name(net, ifr->ifr_name);
1125 	if (!dev)
1126 		goto done;
1127 
1128 	if (colon)
1129 		*colon = ':';
1130 
1131 	in_dev = __in_dev_get_rtnl_net(dev);
1132 	if (in_dev) {
1133 		if (tryaddrmatch) {
1134 			/* Matthias Andree */
1135 			/* compare label and address (4.4BSD style) */
1136 			/* note: we only do this for a limited set of ioctls
1137 			   and only if the original address family was AF_INET.
1138 			   This is checked above. */
1139 
1140 			for (ifap = &in_dev->ifa_list;
1141 			     (ifa = rtnl_net_dereference(net, *ifap)) != NULL;
1142 			     ifap = &ifa->ifa_next) {
1143 				if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1144 				    sin_orig.sin_addr.s_addr ==
1145 							ifa->ifa_local) {
1146 					break; /* found */
1147 				}
1148 			}
1149 		}
1150 		/* we didn't get a match, maybe the application is
1151 		   4.3BSD-style and passed in junk so we fall back to
1152 		   comparing just the label */
1153 		if (!ifa) {
1154 			for (ifap = &in_dev->ifa_list;
1155 			     (ifa = rtnl_net_dereference(net, *ifap)) != NULL;
1156 			     ifap = &ifa->ifa_next)
1157 				if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1158 					break;
1159 		}
1160 	}
1161 
1162 	ret = -EADDRNOTAVAIL;
1163 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1164 		goto done;
1165 
1166 	switch (cmd) {
1167 	case SIOCGIFADDR:	/* Get interface address */
1168 		ret = 0;
1169 		sin->sin_addr.s_addr = ifa->ifa_local;
1170 		break;
1171 
1172 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1173 		ret = 0;
1174 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1175 		break;
1176 
1177 	case SIOCGIFDSTADDR:	/* Get the destination address */
1178 		ret = 0;
1179 		sin->sin_addr.s_addr = ifa->ifa_address;
1180 		break;
1181 
1182 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1183 		ret = 0;
1184 		sin->sin_addr.s_addr = ifa->ifa_mask;
1185 		break;
1186 
1187 	case SIOCSIFFLAGS:
1188 		if (colon) {
1189 			ret = -EADDRNOTAVAIL;
1190 			if (!ifa)
1191 				break;
1192 			ret = 0;
1193 			if (!(ifr->ifr_flags & IFF_UP))
1194 				inet_del_ifa(in_dev, ifap, 1);
1195 			break;
1196 		}
1197 
1198 		/* NETDEV_UP/DOWN/CHANGE could touch a peer dev */
1199 		ASSERT_RTNL();
1200 		ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1201 		break;
1202 
1203 	case SIOCSIFADDR:	/* Set interface address (and family) */
1204 		ret = -EINVAL;
1205 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1206 			break;
1207 
1208 		if (!ifa) {
1209 			ret = -ENOBUFS;
1210 			if (!in_dev)
1211 				break;
1212 			ifa = inet_alloc_ifa(in_dev);
1213 			if (!ifa)
1214 				break;
1215 
1216 			if (colon)
1217 				memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1218 			else
1219 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1220 		} else {
1221 			ret = 0;
1222 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1223 				break;
1224 			inet_del_ifa(in_dev, ifap, 0);
1225 			ifa->ifa_broadcast = 0;
1226 			ifa->ifa_scope = 0;
1227 		}
1228 
1229 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1230 
1231 		if (!(dev->flags & IFF_POINTOPOINT)) {
1232 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1233 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1234 			if ((dev->flags & IFF_BROADCAST) &&
1235 			    ifa->ifa_prefixlen < 31)
1236 				ifa->ifa_broadcast = ifa->ifa_address |
1237 						     ~ifa->ifa_mask;
1238 		} else {
1239 			ifa->ifa_prefixlen = 32;
1240 			ifa->ifa_mask = inet_make_mask(32);
1241 		}
1242 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1243 		ret = inet_set_ifa(dev, ifa);
1244 		break;
1245 
1246 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1247 		ret = 0;
1248 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1249 			inet_del_ifa(in_dev, ifap, 0);
1250 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1251 			inet_insert_ifa(ifa);
1252 		}
1253 		break;
1254 
1255 	case SIOCSIFDSTADDR:	/* Set the destination address */
1256 		ret = 0;
1257 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1258 			break;
1259 		ret = -EINVAL;
1260 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1261 			break;
1262 		ret = 0;
1263 		inet_del_ifa(in_dev, ifap, 0);
1264 		ifa->ifa_address = sin->sin_addr.s_addr;
1265 		inet_insert_ifa(ifa);
1266 		break;
1267 
1268 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1269 
1270 		/*
1271 		 *	The mask we set must be legal.
1272 		 */
1273 		ret = -EINVAL;
1274 		if (bad_mask(sin->sin_addr.s_addr, 0))
1275 			break;
1276 		ret = 0;
1277 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1278 			__be32 old_mask = ifa->ifa_mask;
1279 			inet_del_ifa(in_dev, ifap, 0);
1280 			ifa->ifa_mask = sin->sin_addr.s_addr;
1281 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1282 
1283 			/* See if current broadcast address matches
1284 			 * with current netmask, then recalculate
1285 			 * the broadcast address. Otherwise it's a
1286 			 * funny address, so don't touch it since
1287 			 * the user seems to know what (s)he's doing...
1288 			 */
1289 			if ((dev->flags & IFF_BROADCAST) &&
1290 			    (ifa->ifa_prefixlen < 31) &&
1291 			    (ifa->ifa_broadcast ==
1292 			     (ifa->ifa_local|~old_mask))) {
1293 				ifa->ifa_broadcast = (ifa->ifa_local |
1294 						      ~sin->sin_addr.s_addr);
1295 			}
1296 			inet_insert_ifa(ifa);
1297 		}
1298 		break;
1299 	}
1300 done:
1301 	rtnl_net_unlock(net);
1302 out:
1303 	return ret;
1304 }
1305 
1306 int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1307 {
1308 	struct in_device *in_dev = __in_dev_get_rtnl_net(dev);
1309 	const struct in_ifaddr *ifa;
1310 	struct ifreq ifr;
1311 	int done = 0;
1312 
1313 	if (WARN_ON(size > sizeof(struct ifreq)))
1314 		goto out;
1315 
1316 	if (!in_dev)
1317 		goto out;
1318 
1319 	in_dev_for_each_ifa_rtnl_net(dev_net(dev), ifa, in_dev) {
1320 		if (!buf) {
1321 			done += size;
1322 			continue;
1323 		}
1324 		if (len < size)
1325 			break;
1326 		memset(&ifr, 0, sizeof(struct ifreq));
1327 		strcpy(ifr.ifr_name, ifa->ifa_label);
1328 
1329 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1330 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1331 								ifa->ifa_local;
1332 
1333 		if (copy_to_user(buf + done, &ifr, size)) {
1334 			done = -EFAULT;
1335 			break;
1336 		}
1337 		len  -= size;
1338 		done += size;
1339 	}
1340 out:
1341 	return done;
1342 }
1343 
1344 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1345 				 int scope)
1346 {
1347 	const struct in_ifaddr *ifa;
1348 
1349 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1350 		if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
1351 			continue;
1352 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1353 		    ifa->ifa_scope <= scope)
1354 			return ifa->ifa_local;
1355 	}
1356 
1357 	return 0;
1358 }
1359 
1360 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1361 {
1362 	const struct in_ifaddr *ifa;
1363 	__be32 addr = 0;
1364 	unsigned char localnet_scope = RT_SCOPE_HOST;
1365 	struct in_device *in_dev;
1366 	struct net *net;
1367 	int master_idx;
1368 
1369 	rcu_read_lock();
1370 	net = dev_net_rcu(dev);
1371 	in_dev = __in_dev_get_rcu(dev);
1372 	if (!in_dev)
1373 		goto no_in_dev;
1374 
1375 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1376 		localnet_scope = RT_SCOPE_LINK;
1377 
1378 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1379 		if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
1380 			continue;
1381 		if (min(ifa->ifa_scope, localnet_scope) > scope)
1382 			continue;
1383 		if (!dst || inet_ifa_match(dst, ifa)) {
1384 			addr = ifa->ifa_local;
1385 			break;
1386 		}
1387 		if (!addr)
1388 			addr = ifa->ifa_local;
1389 	}
1390 
1391 	if (addr)
1392 		goto out_unlock;
1393 no_in_dev:
1394 	master_idx = l3mdev_master_ifindex_rcu(dev);
1395 
1396 	/* For VRFs, the VRF device takes the place of the loopback device,
1397 	 * with addresses on it being preferred.  Note in such cases the
1398 	 * loopback device will be among the devices that fail the master_idx
1399 	 * equality check in the loop below.
1400 	 */
1401 	if (master_idx &&
1402 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1403 	    (in_dev = __in_dev_get_rcu(dev))) {
1404 		addr = in_dev_select_addr(in_dev, scope);
1405 		if (addr)
1406 			goto out_unlock;
1407 	}
1408 
1409 	/* Not loopback addresses on loopback should be preferred
1410 	   in this case. It is important that lo is the first interface
1411 	   in dev_base list.
1412 	 */
1413 	for_each_netdev_rcu(net, dev) {
1414 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1415 			continue;
1416 
1417 		in_dev = __in_dev_get_rcu(dev);
1418 		if (!in_dev)
1419 			continue;
1420 
1421 		addr = in_dev_select_addr(in_dev, scope);
1422 		if (addr)
1423 			goto out_unlock;
1424 	}
1425 out_unlock:
1426 	rcu_read_unlock();
1427 	return addr;
1428 }
1429 EXPORT_SYMBOL(inet_select_addr);
1430 
1431 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1432 			      __be32 local, int scope)
1433 {
1434 	unsigned char localnet_scope = RT_SCOPE_HOST;
1435 	const struct in_ifaddr *ifa;
1436 	__be32 addr = 0;
1437 	int same = 0;
1438 
1439 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1440 		localnet_scope = RT_SCOPE_LINK;
1441 
1442 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1443 		unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1444 
1445 		if (!addr &&
1446 		    (local == ifa->ifa_local || !local) &&
1447 		    min_scope <= scope) {
1448 			addr = ifa->ifa_local;
1449 			if (same)
1450 				break;
1451 		}
1452 		if (!same) {
1453 			same = (!local || inet_ifa_match(local, ifa)) &&
1454 				(!dst || inet_ifa_match(dst, ifa));
1455 			if (same && addr) {
1456 				if (local || !dst)
1457 					break;
1458 				/* Is the selected addr into dst subnet? */
1459 				if (inet_ifa_match(addr, ifa))
1460 					break;
1461 				/* No, then can we use new local src? */
1462 				if (min_scope <= scope) {
1463 					addr = ifa->ifa_local;
1464 					break;
1465 				}
1466 				/* search for large dst subnet for addr */
1467 				same = 0;
1468 			}
1469 		}
1470 	}
1471 
1472 	return same ? addr : 0;
1473 }
1474 
1475 /*
1476  * Confirm that local IP address exists using wildcards:
1477  * - net: netns to check, cannot be NULL
1478  * - in_dev: only on this interface, NULL=any interface
1479  * - dst: only in the same subnet as dst, 0=any dst
1480  * - local: address, 0=autoselect the local address
1481  * - scope: maximum allowed scope value for the local address
1482  */
1483 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1484 			 __be32 dst, __be32 local, int scope)
1485 {
1486 	__be32 addr = 0;
1487 	struct net_device *dev;
1488 
1489 	if (in_dev)
1490 		return confirm_addr_indev(in_dev, dst, local, scope);
1491 
1492 	rcu_read_lock();
1493 	for_each_netdev_rcu(net, dev) {
1494 		in_dev = __in_dev_get_rcu(dev);
1495 		if (in_dev) {
1496 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1497 			if (addr)
1498 				break;
1499 		}
1500 	}
1501 	rcu_read_unlock();
1502 
1503 	return addr;
1504 }
1505 EXPORT_SYMBOL(inet_confirm_addr);
1506 
1507 /*
1508  *	Device notifier
1509  */
1510 
1511 int register_inetaddr_notifier(struct notifier_block *nb)
1512 {
1513 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1514 }
1515 EXPORT_SYMBOL(register_inetaddr_notifier);
1516 
1517 int unregister_inetaddr_notifier(struct notifier_block *nb)
1518 {
1519 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1520 }
1521 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1522 
1523 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1524 {
1525 	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1526 }
1527 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1528 
1529 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1530 {
1531 	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1532 	    nb);
1533 }
1534 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1535 
1536 /* Rename ifa_labels for a device name change. Make some effort to preserve
1537  * existing alias numbering and to create unique labels if possible.
1538 */
1539 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1540 {
1541 	struct in_ifaddr *ifa;
1542 	int named = 0;
1543 
1544 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1545 		char old[IFNAMSIZ], *dot;
1546 
1547 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1548 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1549 		if (named++ == 0)
1550 			goto skip;
1551 		dot = strchr(old, ':');
1552 		if (!dot) {
1553 			sprintf(old, ":%d", named);
1554 			dot = old;
1555 		}
1556 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1557 			strcat(ifa->ifa_label, dot);
1558 		else
1559 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1560 skip:
1561 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1562 	}
1563 }
1564 
1565 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1566 					struct in_device *in_dev)
1567 
1568 {
1569 	const struct in_ifaddr *ifa;
1570 
1571 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1572 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1573 			 ifa->ifa_local, dev,
1574 			 ifa->ifa_local, NULL,
1575 			 dev->dev_addr, NULL);
1576 	}
1577 }
1578 
1579 /* Called only under RTNL semaphore */
1580 
1581 static int inetdev_event(struct notifier_block *this, unsigned long event,
1582 			 void *ptr)
1583 {
1584 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1585 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1586 
1587 	ASSERT_RTNL();
1588 
1589 	if (!in_dev) {
1590 		if (event == NETDEV_REGISTER) {
1591 			in_dev = inetdev_init(dev);
1592 			if (IS_ERR(in_dev))
1593 				return notifier_from_errno(PTR_ERR(in_dev));
1594 			if (dev->flags & IFF_LOOPBACK) {
1595 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1596 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1597 			}
1598 		} else if (event == NETDEV_CHANGEMTU) {
1599 			/* Re-enabling IP */
1600 			if (inetdev_valid_mtu(dev->mtu))
1601 				in_dev = inetdev_init(dev);
1602 		}
1603 		goto out;
1604 	}
1605 
1606 	switch (event) {
1607 	case NETDEV_REGISTER:
1608 		pr_debug("%s: bug\n", __func__);
1609 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1610 		break;
1611 	case NETDEV_UP:
1612 		if (!inetdev_valid_mtu(dev->mtu))
1613 			break;
1614 		if (dev->flags & IFF_LOOPBACK) {
1615 			struct in_ifaddr *ifa = inet_alloc_ifa(in_dev);
1616 
1617 			if (ifa) {
1618 				ifa->ifa_local =
1619 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1620 				ifa->ifa_prefixlen = 8;
1621 				ifa->ifa_mask = inet_make_mask(8);
1622 				ifa->ifa_scope = RT_SCOPE_HOST;
1623 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1624 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1625 						 INFINITY_LIFE_TIME);
1626 				ipv4_devconf_setall(in_dev);
1627 				neigh_parms_data_state_setall(in_dev->arp_parms);
1628 				inet_insert_ifa(ifa);
1629 			}
1630 		}
1631 		ip_mc_up(in_dev);
1632 		fallthrough;
1633 	case NETDEV_CHANGEADDR:
1634 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1635 			break;
1636 		fallthrough;
1637 	case NETDEV_NOTIFY_PEERS:
1638 		/* Send gratuitous ARP to notify of link change */
1639 		inetdev_send_gratuitous_arp(dev, in_dev);
1640 		break;
1641 	case NETDEV_DOWN:
1642 		ip_mc_down(in_dev);
1643 		break;
1644 	case NETDEV_PRE_TYPE_CHANGE:
1645 		ip_mc_unmap(in_dev);
1646 		break;
1647 	case NETDEV_POST_TYPE_CHANGE:
1648 		ip_mc_remap(in_dev);
1649 		break;
1650 	case NETDEV_CHANGEMTU:
1651 		if (inetdev_valid_mtu(dev->mtu))
1652 			break;
1653 		/* disable IP when MTU is not enough */
1654 		fallthrough;
1655 	case NETDEV_UNREGISTER:
1656 		inetdev_destroy(in_dev);
1657 		break;
1658 	case NETDEV_CHANGENAME:
1659 		/* Do not notify about label change, this event is
1660 		 * not interesting to applications using netlink.
1661 		 */
1662 		inetdev_changename(dev, in_dev);
1663 
1664 		devinet_sysctl_unregister(in_dev);
1665 		devinet_sysctl_register(in_dev);
1666 		break;
1667 	}
1668 out:
1669 	return NOTIFY_DONE;
1670 }
1671 
1672 static struct notifier_block ip_netdev_notifier = {
1673 	.notifier_call = inetdev_event,
1674 };
1675 
1676 static size_t inet_nlmsg_size(void)
1677 {
1678 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1679 	       + nla_total_size(4) /* IFA_ADDRESS */
1680 	       + nla_total_size(4) /* IFA_LOCAL */
1681 	       + nla_total_size(4) /* IFA_BROADCAST */
1682 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1683 	       + nla_total_size(4)  /* IFA_FLAGS */
1684 	       + nla_total_size(1)  /* IFA_PROTO */
1685 	       + nla_total_size(4)  /* IFA_RT_PRIORITY */
1686 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1687 }
1688 
1689 static inline u32 cstamp_delta(unsigned long cstamp)
1690 {
1691 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1692 }
1693 
1694 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1695 			 unsigned long tstamp, u32 preferred, u32 valid)
1696 {
1697 	struct ifa_cacheinfo ci;
1698 
1699 	ci.cstamp = cstamp_delta(cstamp);
1700 	ci.tstamp = cstamp_delta(tstamp);
1701 	ci.ifa_prefered = preferred;
1702 	ci.ifa_valid = valid;
1703 
1704 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1705 }
1706 
1707 static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa,
1708 			    struct inet_fill_args *args)
1709 {
1710 	struct ifaddrmsg *ifm;
1711 	struct nlmsghdr  *nlh;
1712 	unsigned long tstamp;
1713 	u32 preferred, valid;
1714 	u32 flags;
1715 
1716 	nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1717 			args->flags);
1718 	if (!nlh)
1719 		return -EMSGSIZE;
1720 
1721 	ifm = nlmsg_data(nlh);
1722 	ifm->ifa_family = AF_INET;
1723 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1724 
1725 	flags = READ_ONCE(ifa->ifa_flags);
1726 	/* Warning : ifm->ifa_flags is an __u8, it holds only 8 bits.
1727 	 * The 32bit value is given in IFA_FLAGS attribute.
1728 	 */
1729 	ifm->ifa_flags = (__u8)flags;
1730 
1731 	ifm->ifa_scope = ifa->ifa_scope;
1732 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1733 
1734 	if (args->netnsid >= 0 &&
1735 	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1736 		goto nla_put_failure;
1737 
1738 	tstamp = READ_ONCE(ifa->ifa_tstamp);
1739 	if (!(flags & IFA_F_PERMANENT)) {
1740 		preferred = READ_ONCE(ifa->ifa_preferred_lft);
1741 		valid = READ_ONCE(ifa->ifa_valid_lft);
1742 		if (preferred != INFINITY_LIFE_TIME) {
1743 			long tval = (jiffies - tstamp) / HZ;
1744 
1745 			if (preferred > tval)
1746 				preferred -= tval;
1747 			else
1748 				preferred = 0;
1749 			if (valid != INFINITY_LIFE_TIME) {
1750 				if (valid > tval)
1751 					valid -= tval;
1752 				else
1753 					valid = 0;
1754 			}
1755 		}
1756 	} else {
1757 		preferred = INFINITY_LIFE_TIME;
1758 		valid = INFINITY_LIFE_TIME;
1759 	}
1760 	if ((ifa->ifa_address &&
1761 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1762 	    (ifa->ifa_local &&
1763 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1764 	    (ifa->ifa_broadcast &&
1765 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1766 	    (ifa->ifa_label[0] &&
1767 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1768 	    (ifa->ifa_proto &&
1769 	     nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
1770 	    nla_put_u32(skb, IFA_FLAGS, flags) ||
1771 	    (ifa->ifa_rt_priority &&
1772 	     nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1773 	    put_cacheinfo(skb, READ_ONCE(ifa->ifa_cstamp), tstamp,
1774 			  preferred, valid))
1775 		goto nla_put_failure;
1776 
1777 	nlmsg_end(skb, nlh);
1778 	return 0;
1779 
1780 nla_put_failure:
1781 	nlmsg_cancel(skb, nlh);
1782 	return -EMSGSIZE;
1783 }
1784 
1785 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1786 				      struct inet_fill_args *fillargs,
1787 				      struct net **tgt_net, struct sock *sk,
1788 				      struct netlink_callback *cb)
1789 {
1790 	struct netlink_ext_ack *extack = cb->extack;
1791 	struct nlattr *tb[IFA_MAX+1];
1792 	struct ifaddrmsg *ifm;
1793 	int err, i;
1794 
1795 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1796 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1797 		return -EINVAL;
1798 	}
1799 
1800 	ifm = nlmsg_data(nlh);
1801 	if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1802 		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1803 		return -EINVAL;
1804 	}
1805 
1806 	fillargs->ifindex = ifm->ifa_index;
1807 	if (fillargs->ifindex) {
1808 		cb->answer_flags |= NLM_F_DUMP_FILTERED;
1809 		fillargs->flags |= NLM_F_DUMP_FILTERED;
1810 	}
1811 
1812 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1813 					    ifa_ipv4_policy, extack);
1814 	if (err < 0)
1815 		return err;
1816 
1817 	for (i = 0; i <= IFA_MAX; ++i) {
1818 		if (!tb[i])
1819 			continue;
1820 
1821 		if (i == IFA_TARGET_NETNSID) {
1822 			struct net *net;
1823 
1824 			fillargs->netnsid = nla_get_s32(tb[i]);
1825 
1826 			net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1827 			if (IS_ERR(net)) {
1828 				fillargs->netnsid = -1;
1829 				NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1830 				return PTR_ERR(net);
1831 			}
1832 			*tgt_net = net;
1833 		} else {
1834 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1835 			return -EINVAL;
1836 		}
1837 	}
1838 
1839 	return 0;
1840 }
1841 
1842 static int in_dev_dump_ifmcaddr(struct in_device *in_dev, struct sk_buff *skb,
1843 				struct netlink_callback *cb, int *s_ip_idx,
1844 				struct inet_fill_args *fillargs)
1845 {
1846 	struct ip_mc_list *im;
1847 	int ip_idx = 0;
1848 	int err;
1849 
1850 	for (im = rcu_dereference(in_dev->mc_list);
1851 	     im;
1852 	     im = rcu_dereference(im->next_rcu)) {
1853 		if (ip_idx < *s_ip_idx) {
1854 			ip_idx++;
1855 			continue;
1856 		}
1857 		err = inet_fill_ifmcaddr(skb, in_dev->dev, im, fillargs);
1858 		if (err < 0)
1859 			goto done;
1860 
1861 		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1862 		ip_idx++;
1863 	}
1864 	err = 0;
1865 	ip_idx = 0;
1866 done:
1867 	*s_ip_idx = ip_idx;
1868 	return err;
1869 }
1870 
1871 static int in_dev_dump_ifaddr(struct in_device *in_dev, struct sk_buff *skb,
1872 			      struct netlink_callback *cb, int *s_ip_idx,
1873 			      struct inet_fill_args *fillargs)
1874 {
1875 	struct in_ifaddr *ifa;
1876 	int ip_idx = 0;
1877 	int err;
1878 
1879 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1880 		if (ip_idx < *s_ip_idx) {
1881 			ip_idx++;
1882 			continue;
1883 		}
1884 		err = inet_fill_ifaddr(skb, ifa, fillargs);
1885 		if (err < 0)
1886 			goto done;
1887 
1888 		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1889 		ip_idx++;
1890 	}
1891 	err = 0;
1892 	ip_idx = 0;
1893 done:
1894 	*s_ip_idx = ip_idx;
1895 
1896 	return err;
1897 }
1898 
1899 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1900 			    struct netlink_callback *cb, int *s_ip_idx,
1901 			    struct inet_fill_args *fillargs)
1902 {
1903 	switch (fillargs->event) {
1904 	case RTM_NEWADDR:
1905 		return in_dev_dump_ifaddr(in_dev, skb, cb, s_ip_idx, fillargs);
1906 	case RTM_GETMULTICAST:
1907 		return in_dev_dump_ifmcaddr(in_dev, skb, cb, s_ip_idx,
1908 					    fillargs);
1909 	default:
1910 		return -EINVAL;
1911 	}
1912 }
1913 
1914 /* Combine dev_addr_genid and dev_base_seq to detect changes.
1915  */
1916 static u32 inet_base_seq(const struct net *net)
1917 {
1918 	u32 res = atomic_read(&net->ipv4.dev_addr_genid) +
1919 		  READ_ONCE(net->dev_base_seq);
1920 
1921 	/* Must not return 0 (see nl_dump_check_consistent()).
1922 	 * Chose a value far away from 0.
1923 	 */
1924 	if (!res)
1925 		res = 0x80000000;
1926 	return res;
1927 }
1928 
1929 static int inet_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
1930 			  int event)
1931 {
1932 	const struct nlmsghdr *nlh = cb->nlh;
1933 	struct inet_fill_args fillargs = {
1934 		.portid = NETLINK_CB(cb->skb).portid,
1935 		.seq = nlh->nlmsg_seq,
1936 		.event = event,
1937 		.flags = NLM_F_MULTI,
1938 		.netnsid = -1,
1939 	};
1940 	struct net *net = sock_net(skb->sk);
1941 	struct net *tgt_net = net;
1942 	struct {
1943 		unsigned long ifindex;
1944 		int ip_idx;
1945 	} *ctx = (void *)cb->ctx;
1946 	struct in_device *in_dev;
1947 	struct net_device *dev;
1948 	int err = 0;
1949 
1950 	rcu_read_lock();
1951 	if (cb->strict_check) {
1952 		err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1953 						 skb->sk, cb);
1954 		if (err < 0)
1955 			goto done;
1956 
1957 		if (fillargs.ifindex) {
1958 			dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex);
1959 			if (!dev) {
1960 				err = -ENODEV;
1961 				goto done;
1962 			}
1963 			in_dev = __in_dev_get_rcu(dev);
1964 			if (!in_dev)
1965 				goto done;
1966 			err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
1967 					       &fillargs);
1968 			goto done;
1969 		}
1970 	}
1971 
1972 	cb->seq = inet_base_seq(tgt_net);
1973 
1974 	for_each_netdev_dump(tgt_net, dev, ctx->ifindex) {
1975 		in_dev = __in_dev_get_rcu(dev);
1976 		if (!in_dev)
1977 			continue;
1978 		err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
1979 				       &fillargs);
1980 		if (err < 0)
1981 			goto done;
1982 	}
1983 done:
1984 	if (fillargs.netnsid >= 0)
1985 		put_net(tgt_net);
1986 	rcu_read_unlock();
1987 	return err;
1988 }
1989 
1990 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1991 {
1992 	return inet_dump_addr(skb, cb, RTM_NEWADDR);
1993 }
1994 
1995 static int inet_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb)
1996 {
1997 	return inet_dump_addr(skb, cb, RTM_GETMULTICAST);
1998 }
1999 
2000 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
2001 		      u32 portid)
2002 {
2003 	struct inet_fill_args fillargs = {
2004 		.portid = portid,
2005 		.seq = nlh ? nlh->nlmsg_seq : 0,
2006 		.event = event,
2007 		.flags = 0,
2008 		.netnsid = -1,
2009 	};
2010 	struct sk_buff *skb;
2011 	int err = -ENOBUFS;
2012 	struct net *net;
2013 
2014 	net = dev_net(ifa->ifa_dev->dev);
2015 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
2016 	if (!skb)
2017 		goto errout;
2018 
2019 	err = inet_fill_ifaddr(skb, ifa, &fillargs);
2020 	if (err < 0) {
2021 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
2022 		WARN_ON(err == -EMSGSIZE);
2023 		kfree_skb(skb);
2024 		goto errout;
2025 	}
2026 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
2027 	return;
2028 errout:
2029 	rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
2030 }
2031 
2032 static size_t inet_get_link_af_size(const struct net_device *dev,
2033 				    u32 ext_filter_mask)
2034 {
2035 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
2036 
2037 	if (!in_dev)
2038 		return 0;
2039 
2040 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
2041 }
2042 
2043 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
2044 			     u32 ext_filter_mask)
2045 {
2046 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
2047 	struct nlattr *nla;
2048 	int i;
2049 
2050 	if (!in_dev)
2051 		return -ENODATA;
2052 
2053 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
2054 	if (!nla)
2055 		return -EMSGSIZE;
2056 
2057 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
2058 		((u32 *) nla_data(nla))[i] = READ_ONCE(in_dev->cnf.data[i]);
2059 
2060 	return 0;
2061 }
2062 
2063 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
2064 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
2065 };
2066 
2067 static int inet_validate_link_af(const struct net_device *dev,
2068 				 const struct nlattr *nla,
2069 				 struct netlink_ext_ack *extack)
2070 {
2071 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
2072 	int err, rem;
2073 
2074 	if (dev && !__in_dev_get_rtnl(dev))
2075 		return -EAFNOSUPPORT;
2076 
2077 	err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
2078 					  inet_af_policy, extack);
2079 	if (err < 0)
2080 		return err;
2081 
2082 	if (tb[IFLA_INET_CONF]) {
2083 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
2084 			int cfgid = nla_type(a);
2085 
2086 			if (nla_len(a) < 4)
2087 				return -EINVAL;
2088 
2089 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
2090 				return -EINVAL;
2091 		}
2092 	}
2093 
2094 	return 0;
2095 }
2096 
2097 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
2098 			    struct netlink_ext_ack *extack)
2099 {
2100 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
2101 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
2102 	int rem;
2103 
2104 	if (!in_dev)
2105 		return -EAFNOSUPPORT;
2106 
2107 	if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
2108 		return -EINVAL;
2109 
2110 	if (tb[IFLA_INET_CONF]) {
2111 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
2112 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
2113 	}
2114 
2115 	return 0;
2116 }
2117 
2118 static int inet_netconf_msgsize_devconf(int type)
2119 {
2120 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
2121 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
2122 	bool all = false;
2123 
2124 	if (type == NETCONFA_ALL)
2125 		all = true;
2126 
2127 	if (all || type == NETCONFA_FORWARDING)
2128 		size += nla_total_size(4);
2129 	if (all || type == NETCONFA_RP_FILTER)
2130 		size += nla_total_size(4);
2131 	if (all || type == NETCONFA_MC_FORWARDING)
2132 		size += nla_total_size(4);
2133 	if (all || type == NETCONFA_BC_FORWARDING)
2134 		size += nla_total_size(4);
2135 	if (all || type == NETCONFA_PROXY_NEIGH)
2136 		size += nla_total_size(4);
2137 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2138 		size += nla_total_size(4);
2139 
2140 	return size;
2141 }
2142 
2143 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2144 				     const struct ipv4_devconf *devconf,
2145 				     u32 portid, u32 seq, int event,
2146 				     unsigned int flags, int type)
2147 {
2148 	struct nlmsghdr  *nlh;
2149 	struct netconfmsg *ncm;
2150 	bool all = false;
2151 
2152 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2153 			flags);
2154 	if (!nlh)
2155 		return -EMSGSIZE;
2156 
2157 	if (type == NETCONFA_ALL)
2158 		all = true;
2159 
2160 	ncm = nlmsg_data(nlh);
2161 	ncm->ncm_family = AF_INET;
2162 
2163 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2164 		goto nla_put_failure;
2165 
2166 	if (!devconf)
2167 		goto out;
2168 
2169 	if ((all || type == NETCONFA_FORWARDING) &&
2170 	    nla_put_s32(skb, NETCONFA_FORWARDING,
2171 			IPV4_DEVCONF_RO(*devconf, FORWARDING)) < 0)
2172 		goto nla_put_failure;
2173 	if ((all || type == NETCONFA_RP_FILTER) &&
2174 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
2175 			IPV4_DEVCONF_RO(*devconf, RP_FILTER)) < 0)
2176 		goto nla_put_failure;
2177 	if ((all || type == NETCONFA_MC_FORWARDING) &&
2178 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2179 			IPV4_DEVCONF_RO(*devconf, MC_FORWARDING)) < 0)
2180 		goto nla_put_failure;
2181 	if ((all || type == NETCONFA_BC_FORWARDING) &&
2182 	    nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2183 			IPV4_DEVCONF_RO(*devconf, BC_FORWARDING)) < 0)
2184 		goto nla_put_failure;
2185 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
2186 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2187 			IPV4_DEVCONF_RO(*devconf, PROXY_ARP)) < 0)
2188 		goto nla_put_failure;
2189 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2190 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2191 			IPV4_DEVCONF_RO(*devconf,
2192 					IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2193 		goto nla_put_failure;
2194 
2195 out:
2196 	nlmsg_end(skb, nlh);
2197 	return 0;
2198 
2199 nla_put_failure:
2200 	nlmsg_cancel(skb, nlh);
2201 	return -EMSGSIZE;
2202 }
2203 
2204 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2205 				 int ifindex, struct ipv4_devconf *devconf)
2206 {
2207 	struct sk_buff *skb;
2208 	int err = -ENOBUFS;
2209 
2210 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2211 	if (!skb)
2212 		goto errout;
2213 
2214 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2215 					event, 0, type);
2216 	if (err < 0) {
2217 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2218 		WARN_ON(err == -EMSGSIZE);
2219 		kfree_skb(skb);
2220 		goto errout;
2221 	}
2222 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2223 	return;
2224 errout:
2225 	rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2226 }
2227 
2228 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2229 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
2230 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
2231 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
2232 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
2233 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
2234 };
2235 
2236 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2237 				      const struct nlmsghdr *nlh,
2238 				      struct nlattr **tb,
2239 				      struct netlink_ext_ack *extack)
2240 {
2241 	int i, err;
2242 
2243 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2244 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2245 		return -EINVAL;
2246 	}
2247 
2248 	if (!netlink_strict_get_check(skb))
2249 		return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2250 					      tb, NETCONFA_MAX,
2251 					      devconf_ipv4_policy, extack);
2252 
2253 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2254 					    tb, NETCONFA_MAX,
2255 					    devconf_ipv4_policy, extack);
2256 	if (err)
2257 		return err;
2258 
2259 	for (i = 0; i <= NETCONFA_MAX; i++) {
2260 		if (!tb[i])
2261 			continue;
2262 
2263 		switch (i) {
2264 		case NETCONFA_IFINDEX:
2265 			break;
2266 		default:
2267 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2268 			return -EINVAL;
2269 		}
2270 	}
2271 
2272 	return 0;
2273 }
2274 
2275 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2276 				    struct nlmsghdr *nlh,
2277 				    struct netlink_ext_ack *extack)
2278 {
2279 	struct net *net = sock_net(in_skb->sk);
2280 	struct nlattr *tb[NETCONFA_MAX + 1];
2281 	const struct ipv4_devconf *devconf;
2282 	struct in_device *in_dev = NULL;
2283 	struct net_device *dev = NULL;
2284 	struct sk_buff *skb;
2285 	int ifindex;
2286 	int err;
2287 
2288 	err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2289 	if (err)
2290 		return err;
2291 
2292 	if (!tb[NETCONFA_IFINDEX])
2293 		return -EINVAL;
2294 
2295 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2296 	switch (ifindex) {
2297 	case NETCONFA_IFINDEX_ALL:
2298 		devconf = net->ipv4.devconf_all;
2299 		break;
2300 	case NETCONFA_IFINDEX_DEFAULT:
2301 		devconf = net->ipv4.devconf_dflt;
2302 		break;
2303 	default:
2304 		err = -ENODEV;
2305 		dev = dev_get_by_index(net, ifindex);
2306 		if (dev)
2307 			in_dev = in_dev_get(dev);
2308 		if (!in_dev)
2309 			goto errout;
2310 		devconf = &in_dev->cnf;
2311 		break;
2312 	}
2313 
2314 	err = -ENOBUFS;
2315 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2316 	if (!skb)
2317 		goto errout;
2318 
2319 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2320 					NETLINK_CB(in_skb).portid,
2321 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2322 					NETCONFA_ALL);
2323 	if (err < 0) {
2324 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2325 		WARN_ON(err == -EMSGSIZE);
2326 		kfree_skb(skb);
2327 		goto errout;
2328 	}
2329 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2330 errout:
2331 	if (in_dev)
2332 		in_dev_put(in_dev);
2333 	dev_put(dev);
2334 	return err;
2335 }
2336 
2337 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2338 				     struct netlink_callback *cb)
2339 {
2340 	const struct nlmsghdr *nlh = cb->nlh;
2341 	struct net *net = sock_net(skb->sk);
2342 	struct {
2343 		unsigned long ifindex;
2344 		unsigned int all_default;
2345 	} *ctx = (void *)cb->ctx;
2346 	const struct in_device *in_dev;
2347 	struct net_device *dev;
2348 	int err = 0;
2349 
2350 	if (cb->strict_check) {
2351 		struct netlink_ext_ack *extack = cb->extack;
2352 		struct netconfmsg *ncm;
2353 
2354 		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2355 			NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2356 			return -EINVAL;
2357 		}
2358 
2359 		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2360 			NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2361 			return -EINVAL;
2362 		}
2363 	}
2364 
2365 	rcu_read_lock();
2366 	for_each_netdev_dump(net, dev, ctx->ifindex) {
2367 		in_dev = __in_dev_get_rcu(dev);
2368 		if (!in_dev)
2369 			continue;
2370 		err = inet_netconf_fill_devconf(skb, dev->ifindex,
2371 						&in_dev->cnf,
2372 						NETLINK_CB(cb->skb).portid,
2373 						nlh->nlmsg_seq,
2374 						RTM_NEWNETCONF, NLM_F_MULTI,
2375 						NETCONFA_ALL);
2376 		if (err < 0)
2377 			goto done;
2378 	}
2379 	if (ctx->all_default == 0) {
2380 		err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2381 						net->ipv4.devconf_all,
2382 						NETLINK_CB(cb->skb).portid,
2383 						nlh->nlmsg_seq,
2384 						RTM_NEWNETCONF, NLM_F_MULTI,
2385 						NETCONFA_ALL);
2386 		if (err < 0)
2387 			goto done;
2388 		ctx->all_default++;
2389 	}
2390 	if (ctx->all_default == 1) {
2391 		err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2392 						net->ipv4.devconf_dflt,
2393 						NETLINK_CB(cb->skb).portid,
2394 						nlh->nlmsg_seq,
2395 						RTM_NEWNETCONF, NLM_F_MULTI,
2396 						NETCONFA_ALL);
2397 		if (err < 0)
2398 			goto done;
2399 		ctx->all_default++;
2400 	}
2401 done:
2402 	rcu_read_unlock();
2403 	return err;
2404 }
2405 
2406 #ifdef CONFIG_SYSCTL
2407 
2408 static void devinet_copy_dflt_conf(struct net *net, int i)
2409 {
2410 	struct net_device *dev;
2411 
2412 	rcu_read_lock();
2413 	for_each_netdev_rcu(net, dev) {
2414 		struct in_device *in_dev;
2415 
2416 		in_dev = __in_dev_get_rcu(dev);
2417 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2418 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2419 	}
2420 	rcu_read_unlock();
2421 }
2422 
2423 /* called with RTNL locked */
2424 static void inet_forward_change(struct net *net)
2425 {
2426 	struct net_device *dev;
2427 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2428 
2429 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2430 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2431 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2432 				    NETCONFA_FORWARDING,
2433 				    NETCONFA_IFINDEX_ALL,
2434 				    net->ipv4.devconf_all);
2435 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2436 				    NETCONFA_FORWARDING,
2437 				    NETCONFA_IFINDEX_DEFAULT,
2438 				    net->ipv4.devconf_dflt);
2439 
2440 	for_each_netdev(net, dev) {
2441 		struct in_device *in_dev;
2442 
2443 		if (on)
2444 			dev_disable_lro(dev);
2445 
2446 		in_dev = __in_dev_get_rtnl_net(dev);
2447 		if (in_dev) {
2448 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2449 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2450 						    NETCONFA_FORWARDING,
2451 						    dev->ifindex, &in_dev->cnf);
2452 		}
2453 	}
2454 }
2455 
2456 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2457 {
2458 	if (cnf == net->ipv4.devconf_dflt)
2459 		return NETCONFA_IFINDEX_DEFAULT;
2460 	else if (cnf == net->ipv4.devconf_all)
2461 		return NETCONFA_IFINDEX_ALL;
2462 	else {
2463 		struct in_device *idev
2464 			= container_of(cnf, struct in_device, cnf);
2465 		return idev->dev->ifindex;
2466 	}
2467 }
2468 
2469 static int devinet_conf_proc(const struct ctl_table *ctl, int write,
2470 			     void *buffer, size_t *lenp, loff_t *ppos)
2471 {
2472 	int old_value = *(int *)ctl->data;
2473 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2474 	int new_value = *(int *)ctl->data;
2475 
2476 	if (write) {
2477 		struct ipv4_devconf *cnf = ctl->extra1;
2478 		struct net *net = ctl->extra2;
2479 		int i = (int *)ctl->data - cnf->data;
2480 		int ifindex;
2481 
2482 		set_bit(i, cnf->state);
2483 
2484 		if (cnf == net->ipv4.devconf_dflt)
2485 			devinet_copy_dflt_conf(net, i);
2486 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2487 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2488 			if ((new_value == 0) && (old_value != 0))
2489 				rt_cache_flush(net);
2490 
2491 		if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2492 		    new_value != old_value)
2493 			rt_cache_flush(net);
2494 
2495 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2496 		    new_value != old_value) {
2497 			ifindex = devinet_conf_ifindex(net, cnf);
2498 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2499 						    NETCONFA_RP_FILTER,
2500 						    ifindex, cnf);
2501 		}
2502 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2503 		    new_value != old_value) {
2504 			ifindex = devinet_conf_ifindex(net, cnf);
2505 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2506 						    NETCONFA_PROXY_NEIGH,
2507 						    ifindex, cnf);
2508 		}
2509 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2510 		    new_value != old_value) {
2511 			ifindex = devinet_conf_ifindex(net, cnf);
2512 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2513 						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2514 						    ifindex, cnf);
2515 		}
2516 	}
2517 
2518 	return ret;
2519 }
2520 
2521 static int devinet_sysctl_forward(const struct ctl_table *ctl, int write,
2522 				  void *buffer, size_t *lenp, loff_t *ppos)
2523 {
2524 	int *valp = ctl->data;
2525 	int val = *valp;
2526 	loff_t pos = *ppos;
2527 	struct net *net = ctl->extra2;
2528 	int ret;
2529 
2530 	if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2531 		return -EPERM;
2532 
2533 	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2534 
2535 	if (write && *valp != val) {
2536 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2537 			if (!rtnl_net_trylock(net)) {
2538 				/* Restore the original values before restarting */
2539 				*valp = val;
2540 				*ppos = pos;
2541 				return restart_syscall();
2542 			}
2543 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2544 				inet_forward_change(net);
2545 			} else {
2546 				struct ipv4_devconf *cnf = ctl->extra1;
2547 				struct in_device *idev =
2548 					container_of(cnf, struct in_device, cnf);
2549 				if (*valp)
2550 					dev_disable_lro(idev->dev);
2551 				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2552 							    NETCONFA_FORWARDING,
2553 							    idev->dev->ifindex,
2554 							    cnf);
2555 			}
2556 			rtnl_net_unlock(net);
2557 			rt_cache_flush(net);
2558 		} else
2559 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2560 						    NETCONFA_FORWARDING,
2561 						    NETCONFA_IFINDEX_DEFAULT,
2562 						    net->ipv4.devconf_dflt);
2563 	}
2564 
2565 	return ret;
2566 }
2567 
2568 static int ipv4_doint_and_flush(const struct ctl_table *ctl, int write,
2569 				void *buffer, size_t *lenp, loff_t *ppos)
2570 {
2571 	int *valp = ctl->data;
2572 	int val = *valp;
2573 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2574 	struct net *net = ctl->extra2;
2575 
2576 	if (write && *valp != val)
2577 		rt_cache_flush(net);
2578 
2579 	return ret;
2580 }
2581 
2582 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2583 	{ \
2584 		.procname	= name, \
2585 		.data		= ipv4_devconf.data + \
2586 				  IPV4_DEVCONF_ ## attr - 1, \
2587 		.maxlen		= sizeof(int), \
2588 		.mode		= mval, \
2589 		.proc_handler	= proc, \
2590 		.extra1		= &ipv4_devconf, \
2591 	}
2592 
2593 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2594 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2595 
2596 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2597 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2598 
2599 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2600 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2601 
2602 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2603 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2604 
2605 static struct devinet_sysctl_table {
2606 	struct ctl_table_header *sysctl_header;
2607 	struct ctl_table devinet_vars[IPV4_DEVCONF_MAX];
2608 } devinet_sysctl = {
2609 	.devinet_vars = {
2610 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2611 					     devinet_sysctl_forward),
2612 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2613 		DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2614 
2615 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2616 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2617 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2618 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2619 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2620 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2621 					"accept_source_route"),
2622 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2623 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2624 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2625 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2626 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2627 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2628 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2629 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2630 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2631 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2632 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2633 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2634 		DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
2635 					"arp_evict_nocarrier"),
2636 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2637 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2638 					"force_igmp_version"),
2639 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2640 					"igmpv2_unsolicited_report_interval"),
2641 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2642 					"igmpv3_unsolicited_report_interval"),
2643 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2644 					"ignore_routes_with_linkdown"),
2645 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2646 					"drop_gratuitous_arp"),
2647 
2648 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2649 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2650 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2651 					      "promote_secondaries"),
2652 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2653 					      "route_localnet"),
2654 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2655 					      "drop_unicast_in_l2_multicast"),
2656 	},
2657 };
2658 
2659 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2660 				     int ifindex, struct ipv4_devconf *p)
2661 {
2662 	int i;
2663 	struct devinet_sysctl_table *t;
2664 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2665 
2666 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
2667 	if (!t)
2668 		goto out;
2669 
2670 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars); i++) {
2671 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2672 		t->devinet_vars[i].extra1 = p;
2673 		t->devinet_vars[i].extra2 = net;
2674 	}
2675 
2676 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2677 
2678 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2679 	if (!t->sysctl_header)
2680 		goto free;
2681 
2682 	p->sysctl = t;
2683 
2684 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2685 				    ifindex, p);
2686 	return 0;
2687 
2688 free:
2689 	kfree(t);
2690 out:
2691 	return -ENOMEM;
2692 }
2693 
2694 static void __devinet_sysctl_unregister(struct net *net,
2695 					struct ipv4_devconf *cnf, int ifindex)
2696 {
2697 	struct devinet_sysctl_table *t = cnf->sysctl;
2698 
2699 	if (t) {
2700 		cnf->sysctl = NULL;
2701 		unregister_net_sysctl_table(t->sysctl_header);
2702 		kfree(t);
2703 	}
2704 
2705 	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2706 }
2707 
2708 static int devinet_sysctl_register(struct in_device *idev)
2709 {
2710 	int err;
2711 
2712 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2713 		return -EINVAL;
2714 
2715 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2716 	if (err)
2717 		return err;
2718 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2719 					idev->dev->ifindex, &idev->cnf);
2720 	if (err)
2721 		neigh_sysctl_unregister(idev->arp_parms);
2722 	return err;
2723 }
2724 
2725 static void devinet_sysctl_unregister(struct in_device *idev)
2726 {
2727 	struct net *net = dev_net(idev->dev);
2728 
2729 	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2730 	neigh_sysctl_unregister(idev->arp_parms);
2731 }
2732 
2733 static struct ctl_table ctl_forward_entry[] = {
2734 	{
2735 		.procname	= "ip_forward",
2736 		.data		= &ipv4_devconf.data[
2737 					IPV4_DEVCONF_FORWARDING - 1],
2738 		.maxlen		= sizeof(int),
2739 		.mode		= 0644,
2740 		.proc_handler	= devinet_sysctl_forward,
2741 		.extra1		= &ipv4_devconf,
2742 		.extra2		= &init_net,
2743 	},
2744 };
2745 #endif
2746 
2747 static __net_init int devinet_init_net(struct net *net)
2748 {
2749 #ifdef CONFIG_SYSCTL
2750 	struct ctl_table_header *forw_hdr;
2751 	struct ctl_table *tbl;
2752 #endif
2753 	struct ipv4_devconf *all, *dflt;
2754 	int err;
2755 	int i;
2756 
2757 	err = -ENOMEM;
2758 	net->ipv4.inet_addr_lst = kmalloc_array(IN4_ADDR_HSIZE,
2759 						sizeof(struct hlist_head),
2760 						GFP_KERNEL);
2761 	if (!net->ipv4.inet_addr_lst)
2762 		goto err_alloc_hash;
2763 
2764 	all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2765 	if (!all)
2766 		goto err_alloc_all;
2767 
2768 	dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2769 	if (!dflt)
2770 		goto err_alloc_dflt;
2771 
2772 #ifdef CONFIG_SYSCTL
2773 	tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2774 	if (!tbl)
2775 		goto err_alloc_ctl;
2776 
2777 	tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2778 	tbl[0].extra1 = all;
2779 	tbl[0].extra2 = net;
2780 #endif
2781 
2782 	if (!net_eq(net, &init_net)) {
2783 		switch (net_inherit_devconf()) {
2784 		case 3:
2785 			/* copy from the current netns */
2786 			memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
2787 			       sizeof(ipv4_devconf));
2788 			memcpy(dflt,
2789 			       current->nsproxy->net_ns->ipv4.devconf_dflt,
2790 			       sizeof(ipv4_devconf_dflt));
2791 			break;
2792 		case 0:
2793 		case 1:
2794 			/* copy from init_net */
2795 			memcpy(all, init_net.ipv4.devconf_all,
2796 			       sizeof(ipv4_devconf));
2797 			memcpy(dflt, init_net.ipv4.devconf_dflt,
2798 			       sizeof(ipv4_devconf_dflt));
2799 			break;
2800 		case 2:
2801 			/* use compiled values */
2802 			break;
2803 		}
2804 	}
2805 
2806 #ifdef CONFIG_SYSCTL
2807 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2808 	if (err < 0)
2809 		goto err_reg_all;
2810 
2811 	err = __devinet_sysctl_register(net, "default",
2812 					NETCONFA_IFINDEX_DEFAULT, dflt);
2813 	if (err < 0)
2814 		goto err_reg_dflt;
2815 
2816 	err = -ENOMEM;
2817 	forw_hdr = register_net_sysctl_sz(net, "net/ipv4", tbl,
2818 					  ARRAY_SIZE(ctl_forward_entry));
2819 	if (!forw_hdr)
2820 		goto err_reg_ctl;
2821 	net->ipv4.forw_hdr = forw_hdr;
2822 #endif
2823 
2824 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2825 		INIT_HLIST_HEAD(&net->ipv4.inet_addr_lst[i]);
2826 
2827 	INIT_DEFERRABLE_WORK(&net->ipv4.addr_chk_work, check_lifetime);
2828 
2829 	net->ipv4.devconf_all = all;
2830 	net->ipv4.devconf_dflt = dflt;
2831 	return 0;
2832 
2833 #ifdef CONFIG_SYSCTL
2834 err_reg_ctl:
2835 	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2836 err_reg_dflt:
2837 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2838 err_reg_all:
2839 	kfree(tbl);
2840 err_alloc_ctl:
2841 #endif
2842 	kfree(dflt);
2843 err_alloc_dflt:
2844 	kfree(all);
2845 err_alloc_all:
2846 	kfree(net->ipv4.inet_addr_lst);
2847 err_alloc_hash:
2848 	return err;
2849 }
2850 
2851 static __net_exit void devinet_exit_net(struct net *net)
2852 {
2853 #ifdef CONFIG_SYSCTL
2854 	const struct ctl_table *tbl;
2855 #endif
2856 
2857 	cancel_delayed_work_sync(&net->ipv4.addr_chk_work);
2858 
2859 #ifdef CONFIG_SYSCTL
2860 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2861 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2862 	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2863 				    NETCONFA_IFINDEX_DEFAULT);
2864 	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2865 				    NETCONFA_IFINDEX_ALL);
2866 	kfree(tbl);
2867 #endif
2868 	kfree(net->ipv4.devconf_dflt);
2869 	kfree(net->ipv4.devconf_all);
2870 	kfree(net->ipv4.inet_addr_lst);
2871 }
2872 
2873 static __net_initdata struct pernet_operations devinet_ops = {
2874 	.init = devinet_init_net,
2875 	.exit = devinet_exit_net,
2876 };
2877 
2878 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2879 	.family		  = AF_INET,
2880 	.fill_link_af	  = inet_fill_link_af,
2881 	.get_link_af_size = inet_get_link_af_size,
2882 	.validate_link_af = inet_validate_link_af,
2883 	.set_link_af	  = inet_set_link_af,
2884 };
2885 
2886 static const struct rtnl_msg_handler devinet_rtnl_msg_handlers[] __initconst = {
2887 	{.protocol = PF_INET, .msgtype = RTM_NEWADDR, .doit = inet_rtm_newaddr,
2888 	 .flags = RTNL_FLAG_DOIT_PERNET},
2889 	{.protocol = PF_INET, .msgtype = RTM_DELADDR, .doit = inet_rtm_deladdr,
2890 	 .flags = RTNL_FLAG_DOIT_PERNET},
2891 	{.protocol = PF_INET, .msgtype = RTM_GETADDR, .dumpit = inet_dump_ifaddr,
2892 	 .flags = RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE},
2893 	{.protocol = PF_INET, .msgtype = RTM_GETNETCONF,
2894 	 .doit = inet_netconf_get_devconf, .dumpit = inet_netconf_dump_devconf,
2895 	 .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
2896 	{.owner = THIS_MODULE, .protocol = PF_INET, .msgtype = RTM_GETMULTICAST,
2897 	 .dumpit = inet_dump_ifmcaddr, .flags = RTNL_FLAG_DUMP_UNLOCKED},
2898 };
2899 
2900 void __init devinet_init(void)
2901 {
2902 	register_pernet_subsys(&devinet_ops);
2903 	register_netdevice_notifier(&ip_netdev_notifier);
2904 
2905 	if (rtnl_af_register(&inet_af_ops))
2906 		panic("Unable to register inet_af_ops\n");
2907 
2908 	rtnl_register_many(devinet_rtnl_msg_handlers);
2909 }
2910