xref: /linux/net/ipv4/devinet.c (revision 9c0fc36ec493d20599cf088d21b6bddcdc184242)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	NET3	IP device support routines.
4  *
5  *	Derived from the IP parts of dev.c 1.0.19
6  * 		Authors:	Ross Biro
7  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
9  *
10  *	Additional Authors:
11  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
12  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13  *
14  *	Changes:
15  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
16  *					lists.
17  *		Cyrus Durgin:		updated for kmod
18  *		Matthias Andree:	in devinet_ioctl, compare label and
19  *					address (4.4BSD alias style support),
20  *					fall back to comparing just the label
21  *					if no match found.
22  */
23 
24 
25 #include <linux/uaccess.h>
26 #include <linux/bitops.h>
27 #include <linux/capability.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/mm.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/in.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/if_addr.h>
40 #include <linux/if_ether.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/etherdevice.h>
44 #include <linux/skbuff.h>
45 #include <linux/init.h>
46 #include <linux/notifier.h>
47 #include <linux/inetdevice.h>
48 #include <linux/igmp.h>
49 #include <linux/slab.h>
50 #include <linux/hash.h>
51 #ifdef CONFIG_SYSCTL
52 #include <linux/sysctl.h>
53 #endif
54 #include <linux/kmod.h>
55 #include <linux/netconf.h>
56 
57 #include <net/arp.h>
58 #include <net/ip.h>
59 #include <net/route.h>
60 #include <net/ip_fib.h>
61 #include <net/rtnetlink.h>
62 #include <net/net_namespace.h>
63 #include <net/addrconf.h>
64 
65 #define IPV6ONLY_FLAGS	\
66 		(IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
67 		 IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
68 		 IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
69 
70 static struct ipv4_devconf ipv4_devconf = {
71 	.data = {
72 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78 		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
79 	},
80 };
81 
82 static struct ipv4_devconf ipv4_devconf_dflt = {
83 	.data = {
84 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
86 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
87 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
88 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
90 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
91 		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
92 	},
93 };
94 
95 #define IPV4_DEVCONF_DFLT(net, attr) \
96 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
97 
98 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
99 	[IFA_LOCAL]     	= { .type = NLA_U32 },
100 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
101 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
102 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
103 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
104 	[IFA_FLAGS]		= { .type = NLA_U32 },
105 	[IFA_RT_PRIORITY]	= { .type = NLA_U32 },
106 	[IFA_TARGET_NETNSID]	= { .type = NLA_S32 },
107 	[IFA_PROTO]		= { .type = NLA_U8 },
108 };
109 
110 struct inet_fill_args {
111 	u32 portid;
112 	u32 seq;
113 	int event;
114 	unsigned int flags;
115 	int netnsid;
116 	int ifindex;
117 };
118 
119 #define IN4_ADDR_HSIZE_SHIFT	8
120 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
121 
122 static u32 inet_addr_hash(const struct net *net, __be32 addr)
123 {
124 	u32 val = (__force u32) addr ^ net_hash_mix(net);
125 
126 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
127 }
128 
129 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
130 {
131 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
132 
133 	ASSERT_RTNL();
134 	hlist_add_head_rcu(&ifa->addr_lst, &net->ipv4.inet_addr_lst[hash]);
135 }
136 
137 static void inet_hash_remove(struct in_ifaddr *ifa)
138 {
139 	ASSERT_RTNL();
140 	hlist_del_init_rcu(&ifa->addr_lst);
141 }
142 
143 /**
144  * __ip_dev_find - find the first device with a given source address.
145  * @net: the net namespace
146  * @addr: the source address
147  * @devref: if true, take a reference on the found device
148  *
149  * If a caller uses devref=false, it should be protected by RCU, or RTNL
150  */
151 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
152 {
153 	struct net_device *result = NULL;
154 	struct in_ifaddr *ifa;
155 
156 	rcu_read_lock();
157 	ifa = inet_lookup_ifaddr_rcu(net, addr);
158 	if (!ifa) {
159 		struct flowi4 fl4 = { .daddr = addr };
160 		struct fib_result res = { 0 };
161 		struct fib_table *local;
162 
163 		/* Fallback to FIB local table so that communication
164 		 * over loopback subnets work.
165 		 */
166 		local = fib_get_table(net, RT_TABLE_LOCAL);
167 		if (local &&
168 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
169 		    res.type == RTN_LOCAL)
170 			result = FIB_RES_DEV(res);
171 	} else {
172 		result = ifa->ifa_dev->dev;
173 	}
174 	if (result && devref)
175 		dev_hold(result);
176 	rcu_read_unlock();
177 	return result;
178 }
179 EXPORT_SYMBOL(__ip_dev_find);
180 
181 /* called under RCU lock */
182 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
183 {
184 	u32 hash = inet_addr_hash(net, addr);
185 	struct in_ifaddr *ifa;
186 
187 	hlist_for_each_entry_rcu(ifa, &net->ipv4.inet_addr_lst[hash], addr_lst)
188 		if (ifa->ifa_local == addr)
189 			return ifa;
190 
191 	return NULL;
192 }
193 
194 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
195 
196 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
197 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
198 static void inet_del_ifa(struct in_device *in_dev,
199 			 struct in_ifaddr __rcu **ifap,
200 			 int destroy);
201 #ifdef CONFIG_SYSCTL
202 static int devinet_sysctl_register(struct in_device *idev);
203 static void devinet_sysctl_unregister(struct in_device *idev);
204 #else
205 static int devinet_sysctl_register(struct in_device *idev)
206 {
207 	return 0;
208 }
209 static void devinet_sysctl_unregister(struct in_device *idev)
210 {
211 }
212 #endif
213 
214 /* Locks all the inet devices. */
215 
216 static struct in_ifaddr *inet_alloc_ifa(struct in_device *in_dev)
217 {
218 	struct in_ifaddr *ifa;
219 
220 	ifa = kzalloc(sizeof(*ifa), GFP_KERNEL_ACCOUNT);
221 	if (!ifa)
222 		return NULL;
223 
224 	in_dev_hold(in_dev);
225 	ifa->ifa_dev = in_dev;
226 
227 	INIT_HLIST_NODE(&ifa->addr_lst);
228 
229 	return ifa;
230 }
231 
232 static void inet_rcu_free_ifa(struct rcu_head *head)
233 {
234 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
235 
236 	in_dev_put(ifa->ifa_dev);
237 	kfree(ifa);
238 }
239 
240 static void inet_free_ifa(struct in_ifaddr *ifa)
241 {
242 	/* Our reference to ifa->ifa_dev must be freed ASAP
243 	 * to release the reference to the netdev the same way.
244 	 * in_dev_put() -> in_dev_finish_destroy() -> netdev_put()
245 	 */
246 	call_rcu_hurry(&ifa->rcu_head, inet_rcu_free_ifa);
247 }
248 
249 static void in_dev_free_rcu(struct rcu_head *head)
250 {
251 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
252 
253 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
254 	kfree(idev);
255 }
256 
257 void in_dev_finish_destroy(struct in_device *idev)
258 {
259 	struct net_device *dev = idev->dev;
260 
261 	WARN_ON(idev->ifa_list);
262 	WARN_ON(idev->mc_list);
263 #ifdef NET_REFCNT_DEBUG
264 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
265 #endif
266 	netdev_put(dev, &idev->dev_tracker);
267 	if (!idev->dead)
268 		pr_err("Freeing alive in_device %p\n", idev);
269 	else
270 		call_rcu(&idev->rcu_head, in_dev_free_rcu);
271 }
272 EXPORT_SYMBOL(in_dev_finish_destroy);
273 
274 static struct in_device *inetdev_init(struct net_device *dev)
275 {
276 	struct in_device *in_dev;
277 	int err = -ENOMEM;
278 
279 	ASSERT_RTNL();
280 
281 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
282 	if (!in_dev)
283 		goto out;
284 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
285 			sizeof(in_dev->cnf));
286 	in_dev->cnf.sysctl = NULL;
287 	in_dev->dev = dev;
288 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
289 	if (!in_dev->arp_parms)
290 		goto out_kfree;
291 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
292 		dev_disable_lro(dev);
293 	/* Reference in_dev->dev */
294 	netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
295 	/* Account for reference dev->ip_ptr (below) */
296 	refcount_set(&in_dev->refcnt, 1);
297 
298 	err = devinet_sysctl_register(in_dev);
299 	if (err) {
300 		in_dev->dead = 1;
301 		neigh_parms_release(&arp_tbl, in_dev->arp_parms);
302 		in_dev_put(in_dev);
303 		in_dev = NULL;
304 		goto out;
305 	}
306 	ip_mc_init_dev(in_dev);
307 	if (dev->flags & IFF_UP)
308 		ip_mc_up(in_dev);
309 
310 	/* we can receive as soon as ip_ptr is set -- do this last */
311 	rcu_assign_pointer(dev->ip_ptr, in_dev);
312 out:
313 	return in_dev ?: ERR_PTR(err);
314 out_kfree:
315 	kfree(in_dev);
316 	in_dev = NULL;
317 	goto out;
318 }
319 
320 static void inetdev_destroy(struct in_device *in_dev)
321 {
322 	struct net_device *dev;
323 	struct in_ifaddr *ifa;
324 
325 	ASSERT_RTNL();
326 
327 	dev = in_dev->dev;
328 
329 	in_dev->dead = 1;
330 
331 	ip_mc_destroy_dev(in_dev);
332 
333 	while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
334 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
335 		inet_free_ifa(ifa);
336 	}
337 
338 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
339 
340 	devinet_sysctl_unregister(in_dev);
341 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
342 	arp_ifdown(dev);
343 
344 	in_dev_put(in_dev);
345 }
346 
347 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
348 {
349 	const struct in_ifaddr *ifa;
350 
351 	rcu_read_lock();
352 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
353 		if (inet_ifa_match(a, ifa)) {
354 			if (!b || inet_ifa_match(b, ifa)) {
355 				rcu_read_unlock();
356 				return 1;
357 			}
358 		}
359 	}
360 	rcu_read_unlock();
361 	return 0;
362 }
363 
364 static void __inet_del_ifa(struct in_device *in_dev,
365 			   struct in_ifaddr __rcu **ifap,
366 			   int destroy, struct nlmsghdr *nlh, u32 portid)
367 {
368 	struct in_ifaddr *promote = NULL;
369 	struct in_ifaddr *ifa, *ifa1;
370 	struct in_ifaddr __rcu **last_prim;
371 	struct in_ifaddr *prev_prom = NULL;
372 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
373 
374 	ASSERT_RTNL();
375 
376 	ifa1 = rtnl_dereference(*ifap);
377 	last_prim = ifap;
378 	if (in_dev->dead)
379 		goto no_promotions;
380 
381 	/* 1. Deleting primary ifaddr forces deletion all secondaries
382 	 * unless alias promotion is set
383 	 **/
384 
385 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
386 		struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
387 
388 		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
389 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
390 			    ifa1->ifa_scope <= ifa->ifa_scope)
391 				last_prim = &ifa->ifa_next;
392 
393 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
394 			    ifa1->ifa_mask != ifa->ifa_mask ||
395 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
396 				ifap1 = &ifa->ifa_next;
397 				prev_prom = ifa;
398 				continue;
399 			}
400 
401 			if (!do_promote) {
402 				inet_hash_remove(ifa);
403 				*ifap1 = ifa->ifa_next;
404 
405 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
406 				blocking_notifier_call_chain(&inetaddr_chain,
407 						NETDEV_DOWN, ifa);
408 				inet_free_ifa(ifa);
409 			} else {
410 				promote = ifa;
411 				break;
412 			}
413 		}
414 	}
415 
416 	/* On promotion all secondaries from subnet are changing
417 	 * the primary IP, we must remove all their routes silently
418 	 * and later to add them back with new prefsrc. Do this
419 	 * while all addresses are on the device list.
420 	 */
421 	for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
422 		if (ifa1->ifa_mask == ifa->ifa_mask &&
423 		    inet_ifa_match(ifa1->ifa_address, ifa))
424 			fib_del_ifaddr(ifa, ifa1);
425 	}
426 
427 no_promotions:
428 	/* 2. Unlink it */
429 
430 	*ifap = ifa1->ifa_next;
431 	inet_hash_remove(ifa1);
432 
433 	/* 3. Announce address deletion */
434 
435 	/* Send message first, then call notifier.
436 	   At first sight, FIB update triggered by notifier
437 	   will refer to already deleted ifaddr, that could confuse
438 	   netlink listeners. It is not true: look, gated sees
439 	   that route deleted and if it still thinks that ifaddr
440 	   is valid, it will try to restore deleted routes... Grr.
441 	   So that, this order is correct.
442 	 */
443 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
444 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
445 
446 	if (promote) {
447 		struct in_ifaddr *next_sec;
448 
449 		next_sec = rtnl_dereference(promote->ifa_next);
450 		if (prev_prom) {
451 			struct in_ifaddr *last_sec;
452 
453 			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
454 
455 			last_sec = rtnl_dereference(*last_prim);
456 			rcu_assign_pointer(promote->ifa_next, last_sec);
457 			rcu_assign_pointer(*last_prim, promote);
458 		}
459 
460 		promote->ifa_flags &= ~IFA_F_SECONDARY;
461 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
462 		blocking_notifier_call_chain(&inetaddr_chain,
463 				NETDEV_UP, promote);
464 		for (ifa = next_sec; ifa;
465 		     ifa = rtnl_dereference(ifa->ifa_next)) {
466 			if (ifa1->ifa_mask != ifa->ifa_mask ||
467 			    !inet_ifa_match(ifa1->ifa_address, ifa))
468 					continue;
469 			fib_add_ifaddr(ifa);
470 		}
471 
472 	}
473 	if (destroy)
474 		inet_free_ifa(ifa1);
475 }
476 
477 static void inet_del_ifa(struct in_device *in_dev,
478 			 struct in_ifaddr __rcu **ifap,
479 			 int destroy)
480 {
481 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
482 }
483 
484 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
485 			     u32 portid, struct netlink_ext_ack *extack)
486 {
487 	struct in_ifaddr __rcu **last_primary, **ifap;
488 	struct in_device *in_dev = ifa->ifa_dev;
489 	struct net *net = dev_net(in_dev->dev);
490 	struct in_validator_info ivi;
491 	struct in_ifaddr *ifa1;
492 	int ret;
493 
494 	ASSERT_RTNL();
495 
496 	if (!ifa->ifa_local) {
497 		inet_free_ifa(ifa);
498 		return 0;
499 	}
500 
501 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
502 	last_primary = &in_dev->ifa_list;
503 
504 	/* Don't set IPv6 only flags to IPv4 addresses */
505 	ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
506 
507 	ifap = &in_dev->ifa_list;
508 	ifa1 = rtnl_dereference(*ifap);
509 
510 	while (ifa1) {
511 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
512 		    ifa->ifa_scope <= ifa1->ifa_scope)
513 			last_primary = &ifa1->ifa_next;
514 		if (ifa1->ifa_mask == ifa->ifa_mask &&
515 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
516 			if (ifa1->ifa_local == ifa->ifa_local) {
517 				inet_free_ifa(ifa);
518 				return -EEXIST;
519 			}
520 			if (ifa1->ifa_scope != ifa->ifa_scope) {
521 				NL_SET_ERR_MSG(extack, "ipv4: Invalid scope value");
522 				inet_free_ifa(ifa);
523 				return -EINVAL;
524 			}
525 			ifa->ifa_flags |= IFA_F_SECONDARY;
526 		}
527 
528 		ifap = &ifa1->ifa_next;
529 		ifa1 = rtnl_dereference(*ifap);
530 	}
531 
532 	/* Allow any devices that wish to register ifaddr validtors to weigh
533 	 * in now, before changes are committed.  The rntl lock is serializing
534 	 * access here, so the state should not change between a validator call
535 	 * and a final notify on commit.  This isn't invoked on promotion under
536 	 * the assumption that validators are checking the address itself, and
537 	 * not the flags.
538 	 */
539 	ivi.ivi_addr = ifa->ifa_address;
540 	ivi.ivi_dev = ifa->ifa_dev;
541 	ivi.extack = extack;
542 	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
543 					   NETDEV_UP, &ivi);
544 	ret = notifier_to_errno(ret);
545 	if (ret) {
546 		inet_free_ifa(ifa);
547 		return ret;
548 	}
549 
550 	if (!(ifa->ifa_flags & IFA_F_SECONDARY))
551 		ifap = last_primary;
552 
553 	rcu_assign_pointer(ifa->ifa_next, *ifap);
554 	rcu_assign_pointer(*ifap, ifa);
555 
556 	inet_hash_insert(dev_net(in_dev->dev), ifa);
557 
558 	cancel_delayed_work(&net->ipv4.addr_chk_work);
559 	queue_delayed_work(system_power_efficient_wq, &net->ipv4.addr_chk_work, 0);
560 
561 	/* Send message first, then call notifier.
562 	   Notifier will trigger FIB update, so that
563 	   listeners of netlink will know about new ifaddr */
564 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
565 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
566 
567 	return 0;
568 }
569 
570 static int inet_insert_ifa(struct in_ifaddr *ifa)
571 {
572 	return __inet_insert_ifa(ifa, NULL, 0, NULL);
573 }
574 
575 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
576 {
577 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
578 
579 	ASSERT_RTNL();
580 
581 	ipv4_devconf_setall(in_dev);
582 	neigh_parms_data_state_setall(in_dev->arp_parms);
583 
584 	if (ipv4_is_loopback(ifa->ifa_local))
585 		ifa->ifa_scope = RT_SCOPE_HOST;
586 	return inet_insert_ifa(ifa);
587 }
588 
589 /* Caller must hold RCU or RTNL :
590  * We dont take a reference on found in_device
591  */
592 struct in_device *inetdev_by_index(struct net *net, int ifindex)
593 {
594 	struct net_device *dev;
595 	struct in_device *in_dev = NULL;
596 
597 	rcu_read_lock();
598 	dev = dev_get_by_index_rcu(net, ifindex);
599 	if (dev)
600 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
601 	rcu_read_unlock();
602 	return in_dev;
603 }
604 EXPORT_SYMBOL(inetdev_by_index);
605 
606 /* Called only from RTNL semaphored context. No locks. */
607 
608 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
609 				    __be32 mask)
610 {
611 	struct in_ifaddr *ifa;
612 
613 	ASSERT_RTNL();
614 
615 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
616 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
617 			return ifa;
618 	}
619 	return NULL;
620 }
621 
622 static int ip_mc_autojoin_config(struct net *net, bool join,
623 				 const struct in_ifaddr *ifa)
624 {
625 #if defined(CONFIG_IP_MULTICAST)
626 	struct ip_mreqn mreq = {
627 		.imr_multiaddr.s_addr = ifa->ifa_address,
628 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
629 	};
630 	struct sock *sk = net->ipv4.mc_autojoin_sk;
631 	int ret;
632 
633 	ASSERT_RTNL();
634 
635 	lock_sock(sk);
636 	if (join)
637 		ret = ip_mc_join_group(sk, &mreq);
638 	else
639 		ret = ip_mc_leave_group(sk, &mreq);
640 	release_sock(sk);
641 
642 	return ret;
643 #else
644 	return -EOPNOTSUPP;
645 #endif
646 }
647 
648 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
649 			    struct netlink_ext_ack *extack)
650 {
651 	struct net *net = sock_net(skb->sk);
652 	struct in_ifaddr __rcu **ifap;
653 	struct nlattr *tb[IFA_MAX+1];
654 	struct in_device *in_dev;
655 	struct ifaddrmsg *ifm;
656 	struct in_ifaddr *ifa;
657 	int err;
658 
659 	ASSERT_RTNL();
660 
661 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
662 				     ifa_ipv4_policy, extack);
663 	if (err < 0)
664 		goto errout;
665 
666 	ifm = nlmsg_data(nlh);
667 	in_dev = inetdev_by_index(net, ifm->ifa_index);
668 	if (!in_dev) {
669 		NL_SET_ERR_MSG(extack, "ipv4: Device not found");
670 		err = -ENODEV;
671 		goto errout;
672 	}
673 
674 	for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
675 	     ifap = &ifa->ifa_next) {
676 		if (tb[IFA_LOCAL] &&
677 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
678 			continue;
679 
680 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
681 			continue;
682 
683 		if (tb[IFA_ADDRESS] &&
684 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
685 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
686 			continue;
687 
688 		if (ipv4_is_multicast(ifa->ifa_address))
689 			ip_mc_autojoin_config(net, false, ifa);
690 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
691 		return 0;
692 	}
693 
694 	NL_SET_ERR_MSG(extack, "ipv4: Address not found");
695 	err = -EADDRNOTAVAIL;
696 errout:
697 	return err;
698 }
699 
700 static void check_lifetime(struct work_struct *work)
701 {
702 	unsigned long now, next, next_sec, next_sched;
703 	struct in_ifaddr *ifa;
704 	struct hlist_node *n;
705 	struct net *net;
706 	int i;
707 
708 	net = container_of(to_delayed_work(work), struct net, ipv4.addr_chk_work);
709 	now = jiffies;
710 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
711 
712 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
713 		struct hlist_head *head = &net->ipv4.inet_addr_lst[i];
714 		bool change_needed = false;
715 
716 		rcu_read_lock();
717 		hlist_for_each_entry_rcu(ifa, head, addr_lst) {
718 			unsigned long age, tstamp;
719 			u32 preferred_lft;
720 			u32 valid_lft;
721 			u32 flags;
722 
723 			flags = READ_ONCE(ifa->ifa_flags);
724 			if (flags & IFA_F_PERMANENT)
725 				continue;
726 
727 			preferred_lft = READ_ONCE(ifa->ifa_preferred_lft);
728 			valid_lft = READ_ONCE(ifa->ifa_valid_lft);
729 			tstamp = READ_ONCE(ifa->ifa_tstamp);
730 			/* We try to batch several events at once. */
731 			age = (now - tstamp +
732 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
733 
734 			if (valid_lft != INFINITY_LIFE_TIME &&
735 			    age >= valid_lft) {
736 				change_needed = true;
737 			} else if (preferred_lft ==
738 				   INFINITY_LIFE_TIME) {
739 				continue;
740 			} else if (age >= preferred_lft) {
741 				if (time_before(tstamp + valid_lft * HZ, next))
742 					next = tstamp + valid_lft * HZ;
743 
744 				if (!(flags & IFA_F_DEPRECATED))
745 					change_needed = true;
746 			} else if (time_before(tstamp + preferred_lft * HZ,
747 					       next)) {
748 				next = tstamp + preferred_lft * HZ;
749 			}
750 		}
751 		rcu_read_unlock();
752 		if (!change_needed)
753 			continue;
754 		rtnl_lock();
755 		hlist_for_each_entry_safe(ifa, n, head, addr_lst) {
756 			unsigned long age;
757 
758 			if (ifa->ifa_flags & IFA_F_PERMANENT)
759 				continue;
760 
761 			/* We try to batch several events at once. */
762 			age = (now - ifa->ifa_tstamp +
763 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
764 
765 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
766 			    age >= ifa->ifa_valid_lft) {
767 				struct in_ifaddr __rcu **ifap;
768 				struct in_ifaddr *tmp;
769 
770 				ifap = &ifa->ifa_dev->ifa_list;
771 				tmp = rtnl_dereference(*ifap);
772 				while (tmp) {
773 					if (tmp == ifa) {
774 						inet_del_ifa(ifa->ifa_dev,
775 							     ifap, 1);
776 						break;
777 					}
778 					ifap = &tmp->ifa_next;
779 					tmp = rtnl_dereference(*ifap);
780 				}
781 			} else if (ifa->ifa_preferred_lft !=
782 				   INFINITY_LIFE_TIME &&
783 				   age >= ifa->ifa_preferred_lft &&
784 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
785 				ifa->ifa_flags |= IFA_F_DEPRECATED;
786 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
787 			}
788 		}
789 		rtnl_unlock();
790 	}
791 
792 	next_sec = round_jiffies_up(next);
793 	next_sched = next;
794 
795 	/* If rounded timeout is accurate enough, accept it. */
796 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
797 		next_sched = next_sec;
798 
799 	now = jiffies;
800 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
801 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
802 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
803 
804 	queue_delayed_work(system_power_efficient_wq, &net->ipv4.addr_chk_work,
805 			   next_sched - now);
806 }
807 
808 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
809 			     __u32 prefered_lft)
810 {
811 	unsigned long timeout;
812 	u32 flags;
813 
814 	flags = ifa->ifa_flags & ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
815 
816 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
817 	if (addrconf_finite_timeout(timeout))
818 		WRITE_ONCE(ifa->ifa_valid_lft, timeout);
819 	else
820 		flags |= IFA_F_PERMANENT;
821 
822 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
823 	if (addrconf_finite_timeout(timeout)) {
824 		if (timeout == 0)
825 			flags |= IFA_F_DEPRECATED;
826 		WRITE_ONCE(ifa->ifa_preferred_lft, timeout);
827 	}
828 	WRITE_ONCE(ifa->ifa_flags, flags);
829 	WRITE_ONCE(ifa->ifa_tstamp, jiffies);
830 	if (!ifa->ifa_cstamp)
831 		WRITE_ONCE(ifa->ifa_cstamp, ifa->ifa_tstamp);
832 }
833 
834 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
835 				       __u32 *pvalid_lft, __u32 *pprefered_lft,
836 				       struct netlink_ext_ack *extack)
837 {
838 	struct nlattr *tb[IFA_MAX+1];
839 	struct in_ifaddr *ifa;
840 	struct ifaddrmsg *ifm;
841 	struct net_device *dev;
842 	struct in_device *in_dev;
843 	int err;
844 
845 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
846 				     ifa_ipv4_policy, extack);
847 	if (err < 0)
848 		goto errout;
849 
850 	ifm = nlmsg_data(nlh);
851 	err = -EINVAL;
852 
853 	if (ifm->ifa_prefixlen > 32) {
854 		NL_SET_ERR_MSG(extack, "ipv4: Invalid prefix length");
855 		goto errout;
856 	}
857 
858 	if (!tb[IFA_LOCAL]) {
859 		NL_SET_ERR_MSG(extack, "ipv4: Local address is not supplied");
860 		goto errout;
861 	}
862 
863 	dev = __dev_get_by_index(net, ifm->ifa_index);
864 	err = -ENODEV;
865 	if (!dev) {
866 		NL_SET_ERR_MSG(extack, "ipv4: Device not found");
867 		goto errout;
868 	}
869 
870 	in_dev = __in_dev_get_rtnl(dev);
871 	err = -ENOBUFS;
872 	if (!in_dev)
873 		goto errout;
874 
875 	ifa = inet_alloc_ifa(in_dev);
876 	if (!ifa)
877 		/*
878 		 * A potential indev allocation can be left alive, it stays
879 		 * assigned to its device and is destroy with it.
880 		 */
881 		goto errout;
882 
883 	ipv4_devconf_setall(in_dev);
884 	neigh_parms_data_state_setall(in_dev->arp_parms);
885 
886 	if (!tb[IFA_ADDRESS])
887 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
888 
889 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
890 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
891 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
892 					 ifm->ifa_flags;
893 	ifa->ifa_scope = ifm->ifa_scope;
894 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
895 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
896 
897 	if (tb[IFA_BROADCAST])
898 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
899 
900 	if (tb[IFA_LABEL])
901 		nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
902 	else
903 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
904 
905 	if (tb[IFA_RT_PRIORITY])
906 		ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
907 
908 	if (tb[IFA_PROTO])
909 		ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
910 
911 	if (tb[IFA_CACHEINFO]) {
912 		struct ifa_cacheinfo *ci;
913 
914 		ci = nla_data(tb[IFA_CACHEINFO]);
915 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
916 			NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid");
917 			err = -EINVAL;
918 			goto errout_free;
919 		}
920 		*pvalid_lft = ci->ifa_valid;
921 		*pprefered_lft = ci->ifa_prefered;
922 	}
923 
924 	return ifa;
925 
926 errout_free:
927 	inet_free_ifa(ifa);
928 errout:
929 	return ERR_PTR(err);
930 }
931 
932 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
933 {
934 	struct in_device *in_dev = ifa->ifa_dev;
935 	struct in_ifaddr *ifa1;
936 
937 	if (!ifa->ifa_local)
938 		return NULL;
939 
940 	in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
941 		if (ifa1->ifa_mask == ifa->ifa_mask &&
942 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
943 		    ifa1->ifa_local == ifa->ifa_local)
944 			return ifa1;
945 	}
946 	return NULL;
947 }
948 
949 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
950 			    struct netlink_ext_ack *extack)
951 {
952 	struct net *net = sock_net(skb->sk);
953 	struct in_ifaddr *ifa;
954 	struct in_ifaddr *ifa_existing;
955 	__u32 valid_lft = INFINITY_LIFE_TIME;
956 	__u32 prefered_lft = INFINITY_LIFE_TIME;
957 
958 	ASSERT_RTNL();
959 
960 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
961 	if (IS_ERR(ifa))
962 		return PTR_ERR(ifa);
963 
964 	ifa_existing = find_matching_ifa(ifa);
965 	if (!ifa_existing) {
966 		/* It would be best to check for !NLM_F_CREATE here but
967 		 * userspace already relies on not having to provide this.
968 		 */
969 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
970 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
971 			int ret = ip_mc_autojoin_config(net, true, ifa);
972 
973 			if (ret < 0) {
974 				NL_SET_ERR_MSG(extack, "ipv4: Multicast auto join failed");
975 				inet_free_ifa(ifa);
976 				return ret;
977 			}
978 		}
979 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
980 					 extack);
981 	} else {
982 		u32 new_metric = ifa->ifa_rt_priority;
983 		u8 new_proto = ifa->ifa_proto;
984 
985 		inet_free_ifa(ifa);
986 
987 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
988 		    !(nlh->nlmsg_flags & NLM_F_REPLACE)) {
989 			NL_SET_ERR_MSG(extack, "ipv4: Address already assigned");
990 			return -EEXIST;
991 		}
992 		ifa = ifa_existing;
993 
994 		if (ifa->ifa_rt_priority != new_metric) {
995 			fib_modify_prefix_metric(ifa, new_metric);
996 			ifa->ifa_rt_priority = new_metric;
997 		}
998 
999 		ifa->ifa_proto = new_proto;
1000 
1001 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
1002 		cancel_delayed_work(&net->ipv4.addr_chk_work);
1003 		queue_delayed_work(system_power_efficient_wq,
1004 				   &net->ipv4.addr_chk_work, 0);
1005 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
1006 	}
1007 	return 0;
1008 }
1009 
1010 /*
1011  *	Determine a default network mask, based on the IP address.
1012  */
1013 
1014 static int inet_abc_len(__be32 addr)
1015 {
1016 	int rc = -1;	/* Something else, probably a multicast. */
1017 
1018 	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
1019 		rc = 0;
1020 	else {
1021 		__u32 haddr = ntohl(addr);
1022 		if (IN_CLASSA(haddr))
1023 			rc = 8;
1024 		else if (IN_CLASSB(haddr))
1025 			rc = 16;
1026 		else if (IN_CLASSC(haddr))
1027 			rc = 24;
1028 		else if (IN_CLASSE(haddr))
1029 			rc = 32;
1030 	}
1031 
1032 	return rc;
1033 }
1034 
1035 
1036 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1037 {
1038 	struct sockaddr_in sin_orig;
1039 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1040 	struct in_ifaddr __rcu **ifap = NULL;
1041 	struct in_device *in_dev;
1042 	struct in_ifaddr *ifa = NULL;
1043 	struct net_device *dev;
1044 	char *colon;
1045 	int ret = -EFAULT;
1046 	int tryaddrmatch = 0;
1047 
1048 	ifr->ifr_name[IFNAMSIZ - 1] = 0;
1049 
1050 	/* save original address for comparison */
1051 	memcpy(&sin_orig, sin, sizeof(*sin));
1052 
1053 	colon = strchr(ifr->ifr_name, ':');
1054 	if (colon)
1055 		*colon = 0;
1056 
1057 	dev_load(net, ifr->ifr_name);
1058 
1059 	switch (cmd) {
1060 	case SIOCGIFADDR:	/* Get interface address */
1061 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1062 	case SIOCGIFDSTADDR:	/* Get the destination address */
1063 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1064 		/* Note that these ioctls will not sleep,
1065 		   so that we do not impose a lock.
1066 		   One day we will be forced to put shlock here (I mean SMP)
1067 		 */
1068 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
1069 		memset(sin, 0, sizeof(*sin));
1070 		sin->sin_family = AF_INET;
1071 		break;
1072 
1073 	case SIOCSIFFLAGS:
1074 		ret = -EPERM;
1075 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1076 			goto out;
1077 		break;
1078 	case SIOCSIFADDR:	/* Set interface address (and family) */
1079 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1080 	case SIOCSIFDSTADDR:	/* Set the destination address */
1081 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1082 		ret = -EPERM;
1083 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1084 			goto out;
1085 		ret = -EINVAL;
1086 		if (sin->sin_family != AF_INET)
1087 			goto out;
1088 		break;
1089 	default:
1090 		ret = -EINVAL;
1091 		goto out;
1092 	}
1093 
1094 	rtnl_lock();
1095 
1096 	ret = -ENODEV;
1097 	dev = __dev_get_by_name(net, ifr->ifr_name);
1098 	if (!dev)
1099 		goto done;
1100 
1101 	if (colon)
1102 		*colon = ':';
1103 
1104 	in_dev = __in_dev_get_rtnl(dev);
1105 	if (in_dev) {
1106 		if (tryaddrmatch) {
1107 			/* Matthias Andree */
1108 			/* compare label and address (4.4BSD style) */
1109 			/* note: we only do this for a limited set of ioctls
1110 			   and only if the original address family was AF_INET.
1111 			   This is checked above. */
1112 
1113 			for (ifap = &in_dev->ifa_list;
1114 			     (ifa = rtnl_dereference(*ifap)) != NULL;
1115 			     ifap = &ifa->ifa_next) {
1116 				if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1117 				    sin_orig.sin_addr.s_addr ==
1118 							ifa->ifa_local) {
1119 					break; /* found */
1120 				}
1121 			}
1122 		}
1123 		/* we didn't get a match, maybe the application is
1124 		   4.3BSD-style and passed in junk so we fall back to
1125 		   comparing just the label */
1126 		if (!ifa) {
1127 			for (ifap = &in_dev->ifa_list;
1128 			     (ifa = rtnl_dereference(*ifap)) != NULL;
1129 			     ifap = &ifa->ifa_next)
1130 				if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1131 					break;
1132 		}
1133 	}
1134 
1135 	ret = -EADDRNOTAVAIL;
1136 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1137 		goto done;
1138 
1139 	switch (cmd) {
1140 	case SIOCGIFADDR:	/* Get interface address */
1141 		ret = 0;
1142 		sin->sin_addr.s_addr = ifa->ifa_local;
1143 		break;
1144 
1145 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1146 		ret = 0;
1147 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1148 		break;
1149 
1150 	case SIOCGIFDSTADDR:	/* Get the destination address */
1151 		ret = 0;
1152 		sin->sin_addr.s_addr = ifa->ifa_address;
1153 		break;
1154 
1155 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1156 		ret = 0;
1157 		sin->sin_addr.s_addr = ifa->ifa_mask;
1158 		break;
1159 
1160 	case SIOCSIFFLAGS:
1161 		if (colon) {
1162 			ret = -EADDRNOTAVAIL;
1163 			if (!ifa)
1164 				break;
1165 			ret = 0;
1166 			if (!(ifr->ifr_flags & IFF_UP))
1167 				inet_del_ifa(in_dev, ifap, 1);
1168 			break;
1169 		}
1170 		ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1171 		break;
1172 
1173 	case SIOCSIFADDR:	/* Set interface address (and family) */
1174 		ret = -EINVAL;
1175 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1176 			break;
1177 
1178 		if (!ifa) {
1179 			ret = -ENOBUFS;
1180 			if (!in_dev)
1181 				break;
1182 			ifa = inet_alloc_ifa(in_dev);
1183 			if (!ifa)
1184 				break;
1185 
1186 			if (colon)
1187 				memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1188 			else
1189 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1190 		} else {
1191 			ret = 0;
1192 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1193 				break;
1194 			inet_del_ifa(in_dev, ifap, 0);
1195 			ifa->ifa_broadcast = 0;
1196 			ifa->ifa_scope = 0;
1197 		}
1198 
1199 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1200 
1201 		if (!(dev->flags & IFF_POINTOPOINT)) {
1202 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1203 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1204 			if ((dev->flags & IFF_BROADCAST) &&
1205 			    ifa->ifa_prefixlen < 31)
1206 				ifa->ifa_broadcast = ifa->ifa_address |
1207 						     ~ifa->ifa_mask;
1208 		} else {
1209 			ifa->ifa_prefixlen = 32;
1210 			ifa->ifa_mask = inet_make_mask(32);
1211 		}
1212 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1213 		ret = inet_set_ifa(dev, ifa);
1214 		break;
1215 
1216 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1217 		ret = 0;
1218 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1219 			inet_del_ifa(in_dev, ifap, 0);
1220 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1221 			inet_insert_ifa(ifa);
1222 		}
1223 		break;
1224 
1225 	case SIOCSIFDSTADDR:	/* Set the destination address */
1226 		ret = 0;
1227 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1228 			break;
1229 		ret = -EINVAL;
1230 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1231 			break;
1232 		ret = 0;
1233 		inet_del_ifa(in_dev, ifap, 0);
1234 		ifa->ifa_address = sin->sin_addr.s_addr;
1235 		inet_insert_ifa(ifa);
1236 		break;
1237 
1238 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1239 
1240 		/*
1241 		 *	The mask we set must be legal.
1242 		 */
1243 		ret = -EINVAL;
1244 		if (bad_mask(sin->sin_addr.s_addr, 0))
1245 			break;
1246 		ret = 0;
1247 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1248 			__be32 old_mask = ifa->ifa_mask;
1249 			inet_del_ifa(in_dev, ifap, 0);
1250 			ifa->ifa_mask = sin->sin_addr.s_addr;
1251 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1252 
1253 			/* See if current broadcast address matches
1254 			 * with current netmask, then recalculate
1255 			 * the broadcast address. Otherwise it's a
1256 			 * funny address, so don't touch it since
1257 			 * the user seems to know what (s)he's doing...
1258 			 */
1259 			if ((dev->flags & IFF_BROADCAST) &&
1260 			    (ifa->ifa_prefixlen < 31) &&
1261 			    (ifa->ifa_broadcast ==
1262 			     (ifa->ifa_local|~old_mask))) {
1263 				ifa->ifa_broadcast = (ifa->ifa_local |
1264 						      ~sin->sin_addr.s_addr);
1265 			}
1266 			inet_insert_ifa(ifa);
1267 		}
1268 		break;
1269 	}
1270 done:
1271 	rtnl_unlock();
1272 out:
1273 	return ret;
1274 }
1275 
1276 int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1277 {
1278 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1279 	const struct in_ifaddr *ifa;
1280 	struct ifreq ifr;
1281 	int done = 0;
1282 
1283 	if (WARN_ON(size > sizeof(struct ifreq)))
1284 		goto out;
1285 
1286 	if (!in_dev)
1287 		goto out;
1288 
1289 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1290 		if (!buf) {
1291 			done += size;
1292 			continue;
1293 		}
1294 		if (len < size)
1295 			break;
1296 		memset(&ifr, 0, sizeof(struct ifreq));
1297 		strcpy(ifr.ifr_name, ifa->ifa_label);
1298 
1299 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1300 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1301 								ifa->ifa_local;
1302 
1303 		if (copy_to_user(buf + done, &ifr, size)) {
1304 			done = -EFAULT;
1305 			break;
1306 		}
1307 		len  -= size;
1308 		done += size;
1309 	}
1310 out:
1311 	return done;
1312 }
1313 
1314 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1315 				 int scope)
1316 {
1317 	const struct in_ifaddr *ifa;
1318 
1319 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1320 		if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
1321 			continue;
1322 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1323 		    ifa->ifa_scope <= scope)
1324 			return ifa->ifa_local;
1325 	}
1326 
1327 	return 0;
1328 }
1329 
1330 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1331 {
1332 	const struct in_ifaddr *ifa;
1333 	__be32 addr = 0;
1334 	unsigned char localnet_scope = RT_SCOPE_HOST;
1335 	struct in_device *in_dev;
1336 	struct net *net = dev_net(dev);
1337 	int master_idx;
1338 
1339 	rcu_read_lock();
1340 	in_dev = __in_dev_get_rcu(dev);
1341 	if (!in_dev)
1342 		goto no_in_dev;
1343 
1344 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1345 		localnet_scope = RT_SCOPE_LINK;
1346 
1347 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1348 		if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
1349 			continue;
1350 		if (min(ifa->ifa_scope, localnet_scope) > scope)
1351 			continue;
1352 		if (!dst || inet_ifa_match(dst, ifa)) {
1353 			addr = ifa->ifa_local;
1354 			break;
1355 		}
1356 		if (!addr)
1357 			addr = ifa->ifa_local;
1358 	}
1359 
1360 	if (addr)
1361 		goto out_unlock;
1362 no_in_dev:
1363 	master_idx = l3mdev_master_ifindex_rcu(dev);
1364 
1365 	/* For VRFs, the VRF device takes the place of the loopback device,
1366 	 * with addresses on it being preferred.  Note in such cases the
1367 	 * loopback device will be among the devices that fail the master_idx
1368 	 * equality check in the loop below.
1369 	 */
1370 	if (master_idx &&
1371 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1372 	    (in_dev = __in_dev_get_rcu(dev))) {
1373 		addr = in_dev_select_addr(in_dev, scope);
1374 		if (addr)
1375 			goto out_unlock;
1376 	}
1377 
1378 	/* Not loopback addresses on loopback should be preferred
1379 	   in this case. It is important that lo is the first interface
1380 	   in dev_base list.
1381 	 */
1382 	for_each_netdev_rcu(net, dev) {
1383 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1384 			continue;
1385 
1386 		in_dev = __in_dev_get_rcu(dev);
1387 		if (!in_dev)
1388 			continue;
1389 
1390 		addr = in_dev_select_addr(in_dev, scope);
1391 		if (addr)
1392 			goto out_unlock;
1393 	}
1394 out_unlock:
1395 	rcu_read_unlock();
1396 	return addr;
1397 }
1398 EXPORT_SYMBOL(inet_select_addr);
1399 
1400 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1401 			      __be32 local, int scope)
1402 {
1403 	unsigned char localnet_scope = RT_SCOPE_HOST;
1404 	const struct in_ifaddr *ifa;
1405 	__be32 addr = 0;
1406 	int same = 0;
1407 
1408 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1409 		localnet_scope = RT_SCOPE_LINK;
1410 
1411 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1412 		unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1413 
1414 		if (!addr &&
1415 		    (local == ifa->ifa_local || !local) &&
1416 		    min_scope <= scope) {
1417 			addr = ifa->ifa_local;
1418 			if (same)
1419 				break;
1420 		}
1421 		if (!same) {
1422 			same = (!local || inet_ifa_match(local, ifa)) &&
1423 				(!dst || inet_ifa_match(dst, ifa));
1424 			if (same && addr) {
1425 				if (local || !dst)
1426 					break;
1427 				/* Is the selected addr into dst subnet? */
1428 				if (inet_ifa_match(addr, ifa))
1429 					break;
1430 				/* No, then can we use new local src? */
1431 				if (min_scope <= scope) {
1432 					addr = ifa->ifa_local;
1433 					break;
1434 				}
1435 				/* search for large dst subnet for addr */
1436 				same = 0;
1437 			}
1438 		}
1439 	}
1440 
1441 	return same ? addr : 0;
1442 }
1443 
1444 /*
1445  * Confirm that local IP address exists using wildcards:
1446  * - net: netns to check, cannot be NULL
1447  * - in_dev: only on this interface, NULL=any interface
1448  * - dst: only in the same subnet as dst, 0=any dst
1449  * - local: address, 0=autoselect the local address
1450  * - scope: maximum allowed scope value for the local address
1451  */
1452 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1453 			 __be32 dst, __be32 local, int scope)
1454 {
1455 	__be32 addr = 0;
1456 	struct net_device *dev;
1457 
1458 	if (in_dev)
1459 		return confirm_addr_indev(in_dev, dst, local, scope);
1460 
1461 	rcu_read_lock();
1462 	for_each_netdev_rcu(net, dev) {
1463 		in_dev = __in_dev_get_rcu(dev);
1464 		if (in_dev) {
1465 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1466 			if (addr)
1467 				break;
1468 		}
1469 	}
1470 	rcu_read_unlock();
1471 
1472 	return addr;
1473 }
1474 EXPORT_SYMBOL(inet_confirm_addr);
1475 
1476 /*
1477  *	Device notifier
1478  */
1479 
1480 int register_inetaddr_notifier(struct notifier_block *nb)
1481 {
1482 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1483 }
1484 EXPORT_SYMBOL(register_inetaddr_notifier);
1485 
1486 int unregister_inetaddr_notifier(struct notifier_block *nb)
1487 {
1488 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1489 }
1490 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1491 
1492 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1493 {
1494 	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1495 }
1496 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1497 
1498 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1499 {
1500 	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1501 	    nb);
1502 }
1503 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1504 
1505 /* Rename ifa_labels for a device name change. Make some effort to preserve
1506  * existing alias numbering and to create unique labels if possible.
1507 */
1508 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1509 {
1510 	struct in_ifaddr *ifa;
1511 	int named = 0;
1512 
1513 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1514 		char old[IFNAMSIZ], *dot;
1515 
1516 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1517 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1518 		if (named++ == 0)
1519 			goto skip;
1520 		dot = strchr(old, ':');
1521 		if (!dot) {
1522 			sprintf(old, ":%d", named);
1523 			dot = old;
1524 		}
1525 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1526 			strcat(ifa->ifa_label, dot);
1527 		else
1528 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1529 skip:
1530 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1531 	}
1532 }
1533 
1534 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1535 					struct in_device *in_dev)
1536 
1537 {
1538 	const struct in_ifaddr *ifa;
1539 
1540 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1541 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1542 			 ifa->ifa_local, dev,
1543 			 ifa->ifa_local, NULL,
1544 			 dev->dev_addr, NULL);
1545 	}
1546 }
1547 
1548 /* Called only under RTNL semaphore */
1549 
1550 static int inetdev_event(struct notifier_block *this, unsigned long event,
1551 			 void *ptr)
1552 {
1553 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1554 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1555 
1556 	ASSERT_RTNL();
1557 
1558 	if (!in_dev) {
1559 		if (event == NETDEV_REGISTER) {
1560 			in_dev = inetdev_init(dev);
1561 			if (IS_ERR(in_dev))
1562 				return notifier_from_errno(PTR_ERR(in_dev));
1563 			if (dev->flags & IFF_LOOPBACK) {
1564 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1565 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1566 			}
1567 		} else if (event == NETDEV_CHANGEMTU) {
1568 			/* Re-enabling IP */
1569 			if (inetdev_valid_mtu(dev->mtu))
1570 				in_dev = inetdev_init(dev);
1571 		}
1572 		goto out;
1573 	}
1574 
1575 	switch (event) {
1576 	case NETDEV_REGISTER:
1577 		pr_debug("%s: bug\n", __func__);
1578 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1579 		break;
1580 	case NETDEV_UP:
1581 		if (!inetdev_valid_mtu(dev->mtu))
1582 			break;
1583 		if (dev->flags & IFF_LOOPBACK) {
1584 			struct in_ifaddr *ifa = inet_alloc_ifa(in_dev);
1585 
1586 			if (ifa) {
1587 				ifa->ifa_local =
1588 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1589 				ifa->ifa_prefixlen = 8;
1590 				ifa->ifa_mask = inet_make_mask(8);
1591 				ifa->ifa_scope = RT_SCOPE_HOST;
1592 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1593 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1594 						 INFINITY_LIFE_TIME);
1595 				ipv4_devconf_setall(in_dev);
1596 				neigh_parms_data_state_setall(in_dev->arp_parms);
1597 				inet_insert_ifa(ifa);
1598 			}
1599 		}
1600 		ip_mc_up(in_dev);
1601 		fallthrough;
1602 	case NETDEV_CHANGEADDR:
1603 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1604 			break;
1605 		fallthrough;
1606 	case NETDEV_NOTIFY_PEERS:
1607 		/* Send gratuitous ARP to notify of link change */
1608 		inetdev_send_gratuitous_arp(dev, in_dev);
1609 		break;
1610 	case NETDEV_DOWN:
1611 		ip_mc_down(in_dev);
1612 		break;
1613 	case NETDEV_PRE_TYPE_CHANGE:
1614 		ip_mc_unmap(in_dev);
1615 		break;
1616 	case NETDEV_POST_TYPE_CHANGE:
1617 		ip_mc_remap(in_dev);
1618 		break;
1619 	case NETDEV_CHANGEMTU:
1620 		if (inetdev_valid_mtu(dev->mtu))
1621 			break;
1622 		/* disable IP when MTU is not enough */
1623 		fallthrough;
1624 	case NETDEV_UNREGISTER:
1625 		inetdev_destroy(in_dev);
1626 		break;
1627 	case NETDEV_CHANGENAME:
1628 		/* Do not notify about label change, this event is
1629 		 * not interesting to applications using netlink.
1630 		 */
1631 		inetdev_changename(dev, in_dev);
1632 
1633 		devinet_sysctl_unregister(in_dev);
1634 		devinet_sysctl_register(in_dev);
1635 		break;
1636 	}
1637 out:
1638 	return NOTIFY_DONE;
1639 }
1640 
1641 static struct notifier_block ip_netdev_notifier = {
1642 	.notifier_call = inetdev_event,
1643 };
1644 
1645 static size_t inet_nlmsg_size(void)
1646 {
1647 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1648 	       + nla_total_size(4) /* IFA_ADDRESS */
1649 	       + nla_total_size(4) /* IFA_LOCAL */
1650 	       + nla_total_size(4) /* IFA_BROADCAST */
1651 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1652 	       + nla_total_size(4)  /* IFA_FLAGS */
1653 	       + nla_total_size(1)  /* IFA_PROTO */
1654 	       + nla_total_size(4)  /* IFA_RT_PRIORITY */
1655 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1656 }
1657 
1658 static inline u32 cstamp_delta(unsigned long cstamp)
1659 {
1660 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1661 }
1662 
1663 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1664 			 unsigned long tstamp, u32 preferred, u32 valid)
1665 {
1666 	struct ifa_cacheinfo ci;
1667 
1668 	ci.cstamp = cstamp_delta(cstamp);
1669 	ci.tstamp = cstamp_delta(tstamp);
1670 	ci.ifa_prefered = preferred;
1671 	ci.ifa_valid = valid;
1672 
1673 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1674 }
1675 
1676 static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa,
1677 			    struct inet_fill_args *args)
1678 {
1679 	struct ifaddrmsg *ifm;
1680 	struct nlmsghdr  *nlh;
1681 	unsigned long tstamp;
1682 	u32 preferred, valid;
1683 	u32 flags;
1684 
1685 	nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1686 			args->flags);
1687 	if (!nlh)
1688 		return -EMSGSIZE;
1689 
1690 	ifm = nlmsg_data(nlh);
1691 	ifm->ifa_family = AF_INET;
1692 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1693 
1694 	flags = READ_ONCE(ifa->ifa_flags);
1695 	/* Warning : ifm->ifa_flags is an __u8, it holds only 8 bits.
1696 	 * The 32bit value is given in IFA_FLAGS attribute.
1697 	 */
1698 	ifm->ifa_flags = (__u8)flags;
1699 
1700 	ifm->ifa_scope = ifa->ifa_scope;
1701 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1702 
1703 	if (args->netnsid >= 0 &&
1704 	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1705 		goto nla_put_failure;
1706 
1707 	tstamp = READ_ONCE(ifa->ifa_tstamp);
1708 	if (!(flags & IFA_F_PERMANENT)) {
1709 		preferred = READ_ONCE(ifa->ifa_preferred_lft);
1710 		valid = READ_ONCE(ifa->ifa_valid_lft);
1711 		if (preferred != INFINITY_LIFE_TIME) {
1712 			long tval = (jiffies - tstamp) / HZ;
1713 
1714 			if (preferred > tval)
1715 				preferred -= tval;
1716 			else
1717 				preferred = 0;
1718 			if (valid != INFINITY_LIFE_TIME) {
1719 				if (valid > tval)
1720 					valid -= tval;
1721 				else
1722 					valid = 0;
1723 			}
1724 		}
1725 	} else {
1726 		preferred = INFINITY_LIFE_TIME;
1727 		valid = INFINITY_LIFE_TIME;
1728 	}
1729 	if ((ifa->ifa_address &&
1730 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1731 	    (ifa->ifa_local &&
1732 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1733 	    (ifa->ifa_broadcast &&
1734 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1735 	    (ifa->ifa_label[0] &&
1736 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1737 	    (ifa->ifa_proto &&
1738 	     nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
1739 	    nla_put_u32(skb, IFA_FLAGS, flags) ||
1740 	    (ifa->ifa_rt_priority &&
1741 	     nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1742 	    put_cacheinfo(skb, READ_ONCE(ifa->ifa_cstamp), tstamp,
1743 			  preferred, valid))
1744 		goto nla_put_failure;
1745 
1746 	nlmsg_end(skb, nlh);
1747 	return 0;
1748 
1749 nla_put_failure:
1750 	nlmsg_cancel(skb, nlh);
1751 	return -EMSGSIZE;
1752 }
1753 
1754 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1755 				      struct inet_fill_args *fillargs,
1756 				      struct net **tgt_net, struct sock *sk,
1757 				      struct netlink_callback *cb)
1758 {
1759 	struct netlink_ext_ack *extack = cb->extack;
1760 	struct nlattr *tb[IFA_MAX+1];
1761 	struct ifaddrmsg *ifm;
1762 	int err, i;
1763 
1764 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1765 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1766 		return -EINVAL;
1767 	}
1768 
1769 	ifm = nlmsg_data(nlh);
1770 	if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1771 		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1772 		return -EINVAL;
1773 	}
1774 
1775 	fillargs->ifindex = ifm->ifa_index;
1776 	if (fillargs->ifindex) {
1777 		cb->answer_flags |= NLM_F_DUMP_FILTERED;
1778 		fillargs->flags |= NLM_F_DUMP_FILTERED;
1779 	}
1780 
1781 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1782 					    ifa_ipv4_policy, extack);
1783 	if (err < 0)
1784 		return err;
1785 
1786 	for (i = 0; i <= IFA_MAX; ++i) {
1787 		if (!tb[i])
1788 			continue;
1789 
1790 		if (i == IFA_TARGET_NETNSID) {
1791 			struct net *net;
1792 
1793 			fillargs->netnsid = nla_get_s32(tb[i]);
1794 
1795 			net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1796 			if (IS_ERR(net)) {
1797 				fillargs->netnsid = -1;
1798 				NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1799 				return PTR_ERR(net);
1800 			}
1801 			*tgt_net = net;
1802 		} else {
1803 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1804 			return -EINVAL;
1805 		}
1806 	}
1807 
1808 	return 0;
1809 }
1810 
1811 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1812 			    struct netlink_callback *cb, int *s_ip_idx,
1813 			    struct inet_fill_args *fillargs)
1814 {
1815 	struct in_ifaddr *ifa;
1816 	int ip_idx = 0;
1817 	int err;
1818 
1819 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1820 		if (ip_idx < *s_ip_idx) {
1821 			ip_idx++;
1822 			continue;
1823 		}
1824 		err = inet_fill_ifaddr(skb, ifa, fillargs);
1825 		if (err < 0)
1826 			goto done;
1827 
1828 		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1829 		ip_idx++;
1830 	}
1831 	err = 0;
1832 	ip_idx = 0;
1833 done:
1834 	*s_ip_idx = ip_idx;
1835 
1836 	return err;
1837 }
1838 
1839 /* Combine dev_addr_genid and dev_base_seq to detect changes.
1840  */
1841 static u32 inet_base_seq(const struct net *net)
1842 {
1843 	u32 res = atomic_read(&net->ipv4.dev_addr_genid) +
1844 		  READ_ONCE(net->dev_base_seq);
1845 
1846 	/* Must not return 0 (see nl_dump_check_consistent()).
1847 	 * Chose a value far away from 0.
1848 	 */
1849 	if (!res)
1850 		res = 0x80000000;
1851 	return res;
1852 }
1853 
1854 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1855 {
1856 	const struct nlmsghdr *nlh = cb->nlh;
1857 	struct inet_fill_args fillargs = {
1858 		.portid = NETLINK_CB(cb->skb).portid,
1859 		.seq = nlh->nlmsg_seq,
1860 		.event = RTM_NEWADDR,
1861 		.flags = NLM_F_MULTI,
1862 		.netnsid = -1,
1863 	};
1864 	struct net *net = sock_net(skb->sk);
1865 	struct net *tgt_net = net;
1866 	struct {
1867 		unsigned long ifindex;
1868 		int ip_idx;
1869 	} *ctx = (void *)cb->ctx;
1870 	struct in_device *in_dev;
1871 	struct net_device *dev;
1872 	int err = 0;
1873 
1874 	rcu_read_lock();
1875 	if (cb->strict_check) {
1876 		err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1877 						 skb->sk, cb);
1878 		if (err < 0)
1879 			goto done;
1880 
1881 		if (fillargs.ifindex) {
1882 			dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex);
1883 			if (!dev) {
1884 				err = -ENODEV;
1885 				goto done;
1886 			}
1887 			in_dev = __in_dev_get_rcu(dev);
1888 			if (!in_dev)
1889 				goto done;
1890 			err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
1891 					       &fillargs);
1892 			goto done;
1893 		}
1894 	}
1895 
1896 	cb->seq = inet_base_seq(tgt_net);
1897 
1898 	for_each_netdev_dump(tgt_net, dev, ctx->ifindex) {
1899 		in_dev = __in_dev_get_rcu(dev);
1900 		if (!in_dev)
1901 			continue;
1902 		err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
1903 				       &fillargs);
1904 		if (err < 0)
1905 			goto done;
1906 	}
1907 done:
1908 	if (fillargs.netnsid >= 0)
1909 		put_net(tgt_net);
1910 	rcu_read_unlock();
1911 	return err;
1912 }
1913 
1914 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1915 		      u32 portid)
1916 {
1917 	struct inet_fill_args fillargs = {
1918 		.portid = portid,
1919 		.seq = nlh ? nlh->nlmsg_seq : 0,
1920 		.event = event,
1921 		.flags = 0,
1922 		.netnsid = -1,
1923 	};
1924 	struct sk_buff *skb;
1925 	int err = -ENOBUFS;
1926 	struct net *net;
1927 
1928 	net = dev_net(ifa->ifa_dev->dev);
1929 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1930 	if (!skb)
1931 		goto errout;
1932 
1933 	err = inet_fill_ifaddr(skb, ifa, &fillargs);
1934 	if (err < 0) {
1935 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1936 		WARN_ON(err == -EMSGSIZE);
1937 		kfree_skb(skb);
1938 		goto errout;
1939 	}
1940 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1941 	return;
1942 errout:
1943 	rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1944 }
1945 
1946 static size_t inet_get_link_af_size(const struct net_device *dev,
1947 				    u32 ext_filter_mask)
1948 {
1949 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1950 
1951 	if (!in_dev)
1952 		return 0;
1953 
1954 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1955 }
1956 
1957 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1958 			     u32 ext_filter_mask)
1959 {
1960 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1961 	struct nlattr *nla;
1962 	int i;
1963 
1964 	if (!in_dev)
1965 		return -ENODATA;
1966 
1967 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1968 	if (!nla)
1969 		return -EMSGSIZE;
1970 
1971 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1972 		((u32 *) nla_data(nla))[i] = READ_ONCE(in_dev->cnf.data[i]);
1973 
1974 	return 0;
1975 }
1976 
1977 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1978 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1979 };
1980 
1981 static int inet_validate_link_af(const struct net_device *dev,
1982 				 const struct nlattr *nla,
1983 				 struct netlink_ext_ack *extack)
1984 {
1985 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1986 	int err, rem;
1987 
1988 	if (dev && !__in_dev_get_rtnl(dev))
1989 		return -EAFNOSUPPORT;
1990 
1991 	err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
1992 					  inet_af_policy, extack);
1993 	if (err < 0)
1994 		return err;
1995 
1996 	if (tb[IFLA_INET_CONF]) {
1997 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1998 			int cfgid = nla_type(a);
1999 
2000 			if (nla_len(a) < 4)
2001 				return -EINVAL;
2002 
2003 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
2004 				return -EINVAL;
2005 		}
2006 	}
2007 
2008 	return 0;
2009 }
2010 
2011 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
2012 			    struct netlink_ext_ack *extack)
2013 {
2014 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
2015 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
2016 	int rem;
2017 
2018 	if (!in_dev)
2019 		return -EAFNOSUPPORT;
2020 
2021 	if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
2022 		return -EINVAL;
2023 
2024 	if (tb[IFLA_INET_CONF]) {
2025 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
2026 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
2027 	}
2028 
2029 	return 0;
2030 }
2031 
2032 static int inet_netconf_msgsize_devconf(int type)
2033 {
2034 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
2035 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
2036 	bool all = false;
2037 
2038 	if (type == NETCONFA_ALL)
2039 		all = true;
2040 
2041 	if (all || type == NETCONFA_FORWARDING)
2042 		size += nla_total_size(4);
2043 	if (all || type == NETCONFA_RP_FILTER)
2044 		size += nla_total_size(4);
2045 	if (all || type == NETCONFA_MC_FORWARDING)
2046 		size += nla_total_size(4);
2047 	if (all || type == NETCONFA_BC_FORWARDING)
2048 		size += nla_total_size(4);
2049 	if (all || type == NETCONFA_PROXY_NEIGH)
2050 		size += nla_total_size(4);
2051 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2052 		size += nla_total_size(4);
2053 
2054 	return size;
2055 }
2056 
2057 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2058 				     const struct ipv4_devconf *devconf,
2059 				     u32 portid, u32 seq, int event,
2060 				     unsigned int flags, int type)
2061 {
2062 	struct nlmsghdr  *nlh;
2063 	struct netconfmsg *ncm;
2064 	bool all = false;
2065 
2066 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2067 			flags);
2068 	if (!nlh)
2069 		return -EMSGSIZE;
2070 
2071 	if (type == NETCONFA_ALL)
2072 		all = true;
2073 
2074 	ncm = nlmsg_data(nlh);
2075 	ncm->ncm_family = AF_INET;
2076 
2077 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2078 		goto nla_put_failure;
2079 
2080 	if (!devconf)
2081 		goto out;
2082 
2083 	if ((all || type == NETCONFA_FORWARDING) &&
2084 	    nla_put_s32(skb, NETCONFA_FORWARDING,
2085 			IPV4_DEVCONF_RO(*devconf, FORWARDING)) < 0)
2086 		goto nla_put_failure;
2087 	if ((all || type == NETCONFA_RP_FILTER) &&
2088 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
2089 			IPV4_DEVCONF_RO(*devconf, RP_FILTER)) < 0)
2090 		goto nla_put_failure;
2091 	if ((all || type == NETCONFA_MC_FORWARDING) &&
2092 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2093 			IPV4_DEVCONF_RO(*devconf, MC_FORWARDING)) < 0)
2094 		goto nla_put_failure;
2095 	if ((all || type == NETCONFA_BC_FORWARDING) &&
2096 	    nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2097 			IPV4_DEVCONF_RO(*devconf, BC_FORWARDING)) < 0)
2098 		goto nla_put_failure;
2099 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
2100 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2101 			IPV4_DEVCONF_RO(*devconf, PROXY_ARP)) < 0)
2102 		goto nla_put_failure;
2103 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2104 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2105 			IPV4_DEVCONF_RO(*devconf,
2106 					IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2107 		goto nla_put_failure;
2108 
2109 out:
2110 	nlmsg_end(skb, nlh);
2111 	return 0;
2112 
2113 nla_put_failure:
2114 	nlmsg_cancel(skb, nlh);
2115 	return -EMSGSIZE;
2116 }
2117 
2118 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2119 				 int ifindex, struct ipv4_devconf *devconf)
2120 {
2121 	struct sk_buff *skb;
2122 	int err = -ENOBUFS;
2123 
2124 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2125 	if (!skb)
2126 		goto errout;
2127 
2128 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2129 					event, 0, type);
2130 	if (err < 0) {
2131 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2132 		WARN_ON(err == -EMSGSIZE);
2133 		kfree_skb(skb);
2134 		goto errout;
2135 	}
2136 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2137 	return;
2138 errout:
2139 	rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2140 }
2141 
2142 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2143 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
2144 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
2145 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
2146 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
2147 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
2148 };
2149 
2150 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2151 				      const struct nlmsghdr *nlh,
2152 				      struct nlattr **tb,
2153 				      struct netlink_ext_ack *extack)
2154 {
2155 	int i, err;
2156 
2157 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2158 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2159 		return -EINVAL;
2160 	}
2161 
2162 	if (!netlink_strict_get_check(skb))
2163 		return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2164 					      tb, NETCONFA_MAX,
2165 					      devconf_ipv4_policy, extack);
2166 
2167 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2168 					    tb, NETCONFA_MAX,
2169 					    devconf_ipv4_policy, extack);
2170 	if (err)
2171 		return err;
2172 
2173 	for (i = 0; i <= NETCONFA_MAX; i++) {
2174 		if (!tb[i])
2175 			continue;
2176 
2177 		switch (i) {
2178 		case NETCONFA_IFINDEX:
2179 			break;
2180 		default:
2181 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2182 			return -EINVAL;
2183 		}
2184 	}
2185 
2186 	return 0;
2187 }
2188 
2189 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2190 				    struct nlmsghdr *nlh,
2191 				    struct netlink_ext_ack *extack)
2192 {
2193 	struct net *net = sock_net(in_skb->sk);
2194 	struct nlattr *tb[NETCONFA_MAX + 1];
2195 	const struct ipv4_devconf *devconf;
2196 	struct in_device *in_dev = NULL;
2197 	struct net_device *dev = NULL;
2198 	struct sk_buff *skb;
2199 	int ifindex;
2200 	int err;
2201 
2202 	err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2203 	if (err)
2204 		return err;
2205 
2206 	if (!tb[NETCONFA_IFINDEX])
2207 		return -EINVAL;
2208 
2209 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2210 	switch (ifindex) {
2211 	case NETCONFA_IFINDEX_ALL:
2212 		devconf = net->ipv4.devconf_all;
2213 		break;
2214 	case NETCONFA_IFINDEX_DEFAULT:
2215 		devconf = net->ipv4.devconf_dflt;
2216 		break;
2217 	default:
2218 		err = -ENODEV;
2219 		dev = dev_get_by_index(net, ifindex);
2220 		if (dev)
2221 			in_dev = in_dev_get(dev);
2222 		if (!in_dev)
2223 			goto errout;
2224 		devconf = &in_dev->cnf;
2225 		break;
2226 	}
2227 
2228 	err = -ENOBUFS;
2229 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2230 	if (!skb)
2231 		goto errout;
2232 
2233 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2234 					NETLINK_CB(in_skb).portid,
2235 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2236 					NETCONFA_ALL);
2237 	if (err < 0) {
2238 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2239 		WARN_ON(err == -EMSGSIZE);
2240 		kfree_skb(skb);
2241 		goto errout;
2242 	}
2243 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2244 errout:
2245 	if (in_dev)
2246 		in_dev_put(in_dev);
2247 	dev_put(dev);
2248 	return err;
2249 }
2250 
2251 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2252 				     struct netlink_callback *cb)
2253 {
2254 	const struct nlmsghdr *nlh = cb->nlh;
2255 	struct net *net = sock_net(skb->sk);
2256 	struct {
2257 		unsigned long ifindex;
2258 		unsigned int all_default;
2259 	} *ctx = (void *)cb->ctx;
2260 	const struct in_device *in_dev;
2261 	struct net_device *dev;
2262 	int err = 0;
2263 
2264 	if (cb->strict_check) {
2265 		struct netlink_ext_ack *extack = cb->extack;
2266 		struct netconfmsg *ncm;
2267 
2268 		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2269 			NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2270 			return -EINVAL;
2271 		}
2272 
2273 		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2274 			NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2275 			return -EINVAL;
2276 		}
2277 	}
2278 
2279 	rcu_read_lock();
2280 	for_each_netdev_dump(net, dev, ctx->ifindex) {
2281 		in_dev = __in_dev_get_rcu(dev);
2282 		if (!in_dev)
2283 			continue;
2284 		err = inet_netconf_fill_devconf(skb, dev->ifindex,
2285 						&in_dev->cnf,
2286 						NETLINK_CB(cb->skb).portid,
2287 						nlh->nlmsg_seq,
2288 						RTM_NEWNETCONF, NLM_F_MULTI,
2289 						NETCONFA_ALL);
2290 		if (err < 0)
2291 			goto done;
2292 	}
2293 	if (ctx->all_default == 0) {
2294 		err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2295 						net->ipv4.devconf_all,
2296 						NETLINK_CB(cb->skb).portid,
2297 						nlh->nlmsg_seq,
2298 						RTM_NEWNETCONF, NLM_F_MULTI,
2299 						NETCONFA_ALL);
2300 		if (err < 0)
2301 			goto done;
2302 		ctx->all_default++;
2303 	}
2304 	if (ctx->all_default == 1) {
2305 		err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2306 						net->ipv4.devconf_dflt,
2307 						NETLINK_CB(cb->skb).portid,
2308 						nlh->nlmsg_seq,
2309 						RTM_NEWNETCONF, NLM_F_MULTI,
2310 						NETCONFA_ALL);
2311 		if (err < 0)
2312 			goto done;
2313 		ctx->all_default++;
2314 	}
2315 done:
2316 	rcu_read_unlock();
2317 	return err;
2318 }
2319 
2320 #ifdef CONFIG_SYSCTL
2321 
2322 static void devinet_copy_dflt_conf(struct net *net, int i)
2323 {
2324 	struct net_device *dev;
2325 
2326 	rcu_read_lock();
2327 	for_each_netdev_rcu(net, dev) {
2328 		struct in_device *in_dev;
2329 
2330 		in_dev = __in_dev_get_rcu(dev);
2331 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2332 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2333 	}
2334 	rcu_read_unlock();
2335 }
2336 
2337 /* called with RTNL locked */
2338 static void inet_forward_change(struct net *net)
2339 {
2340 	struct net_device *dev;
2341 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2342 
2343 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2344 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2345 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2346 				    NETCONFA_FORWARDING,
2347 				    NETCONFA_IFINDEX_ALL,
2348 				    net->ipv4.devconf_all);
2349 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2350 				    NETCONFA_FORWARDING,
2351 				    NETCONFA_IFINDEX_DEFAULT,
2352 				    net->ipv4.devconf_dflt);
2353 
2354 	for_each_netdev(net, dev) {
2355 		struct in_device *in_dev;
2356 
2357 		if (on)
2358 			dev_disable_lro(dev);
2359 
2360 		in_dev = __in_dev_get_rtnl(dev);
2361 		if (in_dev) {
2362 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2363 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2364 						    NETCONFA_FORWARDING,
2365 						    dev->ifindex, &in_dev->cnf);
2366 		}
2367 	}
2368 }
2369 
2370 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2371 {
2372 	if (cnf == net->ipv4.devconf_dflt)
2373 		return NETCONFA_IFINDEX_DEFAULT;
2374 	else if (cnf == net->ipv4.devconf_all)
2375 		return NETCONFA_IFINDEX_ALL;
2376 	else {
2377 		struct in_device *idev
2378 			= container_of(cnf, struct in_device, cnf);
2379 		return idev->dev->ifindex;
2380 	}
2381 }
2382 
2383 static int devinet_conf_proc(const struct ctl_table *ctl, int write,
2384 			     void *buffer, size_t *lenp, loff_t *ppos)
2385 {
2386 	int old_value = *(int *)ctl->data;
2387 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2388 	int new_value = *(int *)ctl->data;
2389 
2390 	if (write) {
2391 		struct ipv4_devconf *cnf = ctl->extra1;
2392 		struct net *net = ctl->extra2;
2393 		int i = (int *)ctl->data - cnf->data;
2394 		int ifindex;
2395 
2396 		set_bit(i, cnf->state);
2397 
2398 		if (cnf == net->ipv4.devconf_dflt)
2399 			devinet_copy_dflt_conf(net, i);
2400 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2401 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2402 			if ((new_value == 0) && (old_value != 0))
2403 				rt_cache_flush(net);
2404 
2405 		if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2406 		    new_value != old_value)
2407 			rt_cache_flush(net);
2408 
2409 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2410 		    new_value != old_value) {
2411 			ifindex = devinet_conf_ifindex(net, cnf);
2412 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2413 						    NETCONFA_RP_FILTER,
2414 						    ifindex, cnf);
2415 		}
2416 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2417 		    new_value != old_value) {
2418 			ifindex = devinet_conf_ifindex(net, cnf);
2419 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2420 						    NETCONFA_PROXY_NEIGH,
2421 						    ifindex, cnf);
2422 		}
2423 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2424 		    new_value != old_value) {
2425 			ifindex = devinet_conf_ifindex(net, cnf);
2426 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2427 						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2428 						    ifindex, cnf);
2429 		}
2430 	}
2431 
2432 	return ret;
2433 }
2434 
2435 static int devinet_sysctl_forward(const struct ctl_table *ctl, int write,
2436 				  void *buffer, size_t *lenp, loff_t *ppos)
2437 {
2438 	int *valp = ctl->data;
2439 	int val = *valp;
2440 	loff_t pos = *ppos;
2441 	struct net *net = ctl->extra2;
2442 	int ret;
2443 
2444 	if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2445 		return -EPERM;
2446 
2447 	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2448 
2449 	if (write && *valp != val) {
2450 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2451 			if (!rtnl_trylock()) {
2452 				/* Restore the original values before restarting */
2453 				*valp = val;
2454 				*ppos = pos;
2455 				return restart_syscall();
2456 			}
2457 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2458 				inet_forward_change(net);
2459 			} else {
2460 				struct ipv4_devconf *cnf = ctl->extra1;
2461 				struct in_device *idev =
2462 					container_of(cnf, struct in_device, cnf);
2463 				if (*valp)
2464 					dev_disable_lro(idev->dev);
2465 				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2466 							    NETCONFA_FORWARDING,
2467 							    idev->dev->ifindex,
2468 							    cnf);
2469 			}
2470 			rtnl_unlock();
2471 			rt_cache_flush(net);
2472 		} else
2473 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2474 						    NETCONFA_FORWARDING,
2475 						    NETCONFA_IFINDEX_DEFAULT,
2476 						    net->ipv4.devconf_dflt);
2477 	}
2478 
2479 	return ret;
2480 }
2481 
2482 static int ipv4_doint_and_flush(const struct ctl_table *ctl, int write,
2483 				void *buffer, size_t *lenp, loff_t *ppos)
2484 {
2485 	int *valp = ctl->data;
2486 	int val = *valp;
2487 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2488 	struct net *net = ctl->extra2;
2489 
2490 	if (write && *valp != val)
2491 		rt_cache_flush(net);
2492 
2493 	return ret;
2494 }
2495 
2496 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2497 	{ \
2498 		.procname	= name, \
2499 		.data		= ipv4_devconf.data + \
2500 				  IPV4_DEVCONF_ ## attr - 1, \
2501 		.maxlen		= sizeof(int), \
2502 		.mode		= mval, \
2503 		.proc_handler	= proc, \
2504 		.extra1		= &ipv4_devconf, \
2505 	}
2506 
2507 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2508 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2509 
2510 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2511 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2512 
2513 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2514 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2515 
2516 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2517 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2518 
2519 static struct devinet_sysctl_table {
2520 	struct ctl_table_header *sysctl_header;
2521 	struct ctl_table devinet_vars[IPV4_DEVCONF_MAX];
2522 } devinet_sysctl = {
2523 	.devinet_vars = {
2524 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2525 					     devinet_sysctl_forward),
2526 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2527 		DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2528 
2529 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2530 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2531 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2532 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2533 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2534 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2535 					"accept_source_route"),
2536 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2537 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2538 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2539 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2540 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2541 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2542 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2543 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2544 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2545 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2546 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2547 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2548 		DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
2549 					"arp_evict_nocarrier"),
2550 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2551 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2552 					"force_igmp_version"),
2553 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2554 					"igmpv2_unsolicited_report_interval"),
2555 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2556 					"igmpv3_unsolicited_report_interval"),
2557 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2558 					"ignore_routes_with_linkdown"),
2559 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2560 					"drop_gratuitous_arp"),
2561 
2562 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2563 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2564 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2565 					      "promote_secondaries"),
2566 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2567 					      "route_localnet"),
2568 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2569 					      "drop_unicast_in_l2_multicast"),
2570 	},
2571 };
2572 
2573 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2574 				     int ifindex, struct ipv4_devconf *p)
2575 {
2576 	int i;
2577 	struct devinet_sysctl_table *t;
2578 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2579 
2580 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
2581 	if (!t)
2582 		goto out;
2583 
2584 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars); i++) {
2585 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2586 		t->devinet_vars[i].extra1 = p;
2587 		t->devinet_vars[i].extra2 = net;
2588 	}
2589 
2590 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2591 
2592 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2593 	if (!t->sysctl_header)
2594 		goto free;
2595 
2596 	p->sysctl = t;
2597 
2598 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2599 				    ifindex, p);
2600 	return 0;
2601 
2602 free:
2603 	kfree(t);
2604 out:
2605 	return -ENOMEM;
2606 }
2607 
2608 static void __devinet_sysctl_unregister(struct net *net,
2609 					struct ipv4_devconf *cnf, int ifindex)
2610 {
2611 	struct devinet_sysctl_table *t = cnf->sysctl;
2612 
2613 	if (t) {
2614 		cnf->sysctl = NULL;
2615 		unregister_net_sysctl_table(t->sysctl_header);
2616 		kfree(t);
2617 	}
2618 
2619 	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2620 }
2621 
2622 static int devinet_sysctl_register(struct in_device *idev)
2623 {
2624 	int err;
2625 
2626 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2627 		return -EINVAL;
2628 
2629 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2630 	if (err)
2631 		return err;
2632 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2633 					idev->dev->ifindex, &idev->cnf);
2634 	if (err)
2635 		neigh_sysctl_unregister(idev->arp_parms);
2636 	return err;
2637 }
2638 
2639 static void devinet_sysctl_unregister(struct in_device *idev)
2640 {
2641 	struct net *net = dev_net(idev->dev);
2642 
2643 	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2644 	neigh_sysctl_unregister(idev->arp_parms);
2645 }
2646 
2647 static struct ctl_table ctl_forward_entry[] = {
2648 	{
2649 		.procname	= "ip_forward",
2650 		.data		= &ipv4_devconf.data[
2651 					IPV4_DEVCONF_FORWARDING - 1],
2652 		.maxlen		= sizeof(int),
2653 		.mode		= 0644,
2654 		.proc_handler	= devinet_sysctl_forward,
2655 		.extra1		= &ipv4_devconf,
2656 		.extra2		= &init_net,
2657 	},
2658 };
2659 #endif
2660 
2661 static __net_init int devinet_init_net(struct net *net)
2662 {
2663 #ifdef CONFIG_SYSCTL
2664 	struct ctl_table_header *forw_hdr;
2665 	struct ctl_table *tbl;
2666 #endif
2667 	struct ipv4_devconf *all, *dflt;
2668 	int err;
2669 	int i;
2670 
2671 	err = -ENOMEM;
2672 	net->ipv4.inet_addr_lst = kmalloc_array(IN4_ADDR_HSIZE,
2673 						sizeof(struct hlist_head),
2674 						GFP_KERNEL);
2675 	if (!net->ipv4.inet_addr_lst)
2676 		goto err_alloc_hash;
2677 
2678 	all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2679 	if (!all)
2680 		goto err_alloc_all;
2681 
2682 	dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2683 	if (!dflt)
2684 		goto err_alloc_dflt;
2685 
2686 #ifdef CONFIG_SYSCTL
2687 	tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2688 	if (!tbl)
2689 		goto err_alloc_ctl;
2690 
2691 	tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2692 	tbl[0].extra1 = all;
2693 	tbl[0].extra2 = net;
2694 #endif
2695 
2696 	if (!net_eq(net, &init_net)) {
2697 		switch (net_inherit_devconf()) {
2698 		case 3:
2699 			/* copy from the current netns */
2700 			memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
2701 			       sizeof(ipv4_devconf));
2702 			memcpy(dflt,
2703 			       current->nsproxy->net_ns->ipv4.devconf_dflt,
2704 			       sizeof(ipv4_devconf_dflt));
2705 			break;
2706 		case 0:
2707 		case 1:
2708 			/* copy from init_net */
2709 			memcpy(all, init_net.ipv4.devconf_all,
2710 			       sizeof(ipv4_devconf));
2711 			memcpy(dflt, init_net.ipv4.devconf_dflt,
2712 			       sizeof(ipv4_devconf_dflt));
2713 			break;
2714 		case 2:
2715 			/* use compiled values */
2716 			break;
2717 		}
2718 	}
2719 
2720 #ifdef CONFIG_SYSCTL
2721 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2722 	if (err < 0)
2723 		goto err_reg_all;
2724 
2725 	err = __devinet_sysctl_register(net, "default",
2726 					NETCONFA_IFINDEX_DEFAULT, dflt);
2727 	if (err < 0)
2728 		goto err_reg_dflt;
2729 
2730 	err = -ENOMEM;
2731 	forw_hdr = register_net_sysctl_sz(net, "net/ipv4", tbl,
2732 					  ARRAY_SIZE(ctl_forward_entry));
2733 	if (!forw_hdr)
2734 		goto err_reg_ctl;
2735 	net->ipv4.forw_hdr = forw_hdr;
2736 #endif
2737 
2738 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2739 		INIT_HLIST_HEAD(&net->ipv4.inet_addr_lst[i]);
2740 
2741 	INIT_DEFERRABLE_WORK(&net->ipv4.addr_chk_work, check_lifetime);
2742 
2743 	net->ipv4.devconf_all = all;
2744 	net->ipv4.devconf_dflt = dflt;
2745 	return 0;
2746 
2747 #ifdef CONFIG_SYSCTL
2748 err_reg_ctl:
2749 	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2750 err_reg_dflt:
2751 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2752 err_reg_all:
2753 	kfree(tbl);
2754 err_alloc_ctl:
2755 #endif
2756 	kfree(dflt);
2757 err_alloc_dflt:
2758 	kfree(all);
2759 err_alloc_all:
2760 	kfree(net->ipv4.inet_addr_lst);
2761 err_alloc_hash:
2762 	return err;
2763 }
2764 
2765 static __net_exit void devinet_exit_net(struct net *net)
2766 {
2767 #ifdef CONFIG_SYSCTL
2768 	const struct ctl_table *tbl;
2769 #endif
2770 
2771 	cancel_delayed_work_sync(&net->ipv4.addr_chk_work);
2772 
2773 #ifdef CONFIG_SYSCTL
2774 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2775 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2776 	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2777 				    NETCONFA_IFINDEX_DEFAULT);
2778 	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2779 				    NETCONFA_IFINDEX_ALL);
2780 	kfree(tbl);
2781 #endif
2782 	kfree(net->ipv4.devconf_dflt);
2783 	kfree(net->ipv4.devconf_all);
2784 	kfree(net->ipv4.inet_addr_lst);
2785 }
2786 
2787 static __net_initdata struct pernet_operations devinet_ops = {
2788 	.init = devinet_init_net,
2789 	.exit = devinet_exit_net,
2790 };
2791 
2792 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2793 	.family		  = AF_INET,
2794 	.fill_link_af	  = inet_fill_link_af,
2795 	.get_link_af_size = inet_get_link_af_size,
2796 	.validate_link_af = inet_validate_link_af,
2797 	.set_link_af	  = inet_set_link_af,
2798 };
2799 
2800 void __init devinet_init(void)
2801 {
2802 	register_pernet_subsys(&devinet_ops);
2803 	register_netdevice_notifier(&ip_netdev_notifier);
2804 
2805 	rtnl_af_register(&inet_af_ops);
2806 
2807 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2808 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2809 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr,
2810 		      RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE);
2811 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2812 		      inet_netconf_dump_devconf,
2813 		      RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED);
2814 }
2815