xref: /linux/net/ipv4/devinet.c (revision 4e887471e8e3a513607495d18333c44f59a82c5a)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	NET3	IP device support routines.
4  *
5  *	Derived from the IP parts of dev.c 1.0.19
6  * 		Authors:	Ross Biro
7  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
9  *
10  *	Additional Authors:
11  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
12  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13  *
14  *	Changes:
15  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
16  *					lists.
17  *		Cyrus Durgin:		updated for kmod
18  *		Matthias Andree:	in devinet_ioctl, compare label and
19  *					address (4.4BSD alias style support),
20  *					fall back to comparing just the label
21  *					if no match found.
22  */
23 
24 
25 #include <linux/uaccess.h>
26 #include <linux/bitops.h>
27 #include <linux/capability.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/mm.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/in.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/if_addr.h>
40 #include <linux/if_ether.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/etherdevice.h>
44 #include <linux/skbuff.h>
45 #include <linux/init.h>
46 #include <linux/notifier.h>
47 #include <linux/inetdevice.h>
48 #include <linux/igmp.h>
49 #include <linux/slab.h>
50 #include <linux/hash.h>
51 #ifdef CONFIG_SYSCTL
52 #include <linux/sysctl.h>
53 #endif
54 #include <linux/kmod.h>
55 #include <linux/netconf.h>
56 
57 #include <net/arp.h>
58 #include <net/ip.h>
59 #include <net/route.h>
60 #include <net/ip_fib.h>
61 #include <net/rtnetlink.h>
62 #include <net/net_namespace.h>
63 #include <net/addrconf.h>
64 
65 #define IPV6ONLY_FLAGS	\
66 		(IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
67 		 IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
68 		 IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
69 
70 static struct ipv4_devconf ipv4_devconf = {
71 	.data = {
72 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78 		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
79 	},
80 };
81 
82 static struct ipv4_devconf ipv4_devconf_dflt = {
83 	.data = {
84 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
86 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
87 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
88 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
90 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
91 		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
92 	},
93 };
94 
95 #define IPV4_DEVCONF_DFLT(net, attr) \
96 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
97 
98 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
99 	[IFA_LOCAL]     	= { .type = NLA_U32 },
100 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
101 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
102 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
103 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
104 	[IFA_FLAGS]		= { .type = NLA_U32 },
105 	[IFA_RT_PRIORITY]	= { .type = NLA_U32 },
106 	[IFA_TARGET_NETNSID]	= { .type = NLA_S32 },
107 	[IFA_PROTO]		= { .type = NLA_U8 },
108 };
109 
110 struct inet_fill_args {
111 	u32 portid;
112 	u32 seq;
113 	int event;
114 	unsigned int flags;
115 	int netnsid;
116 	int ifindex;
117 };
118 
119 #define IN4_ADDR_HSIZE_SHIFT	8
120 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
121 
122 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
123 
124 static u32 inet_addr_hash(const struct net *net, __be32 addr)
125 {
126 	u32 val = (__force u32) addr ^ net_hash_mix(net);
127 
128 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
129 }
130 
131 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
132 {
133 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
134 
135 	ASSERT_RTNL();
136 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
137 }
138 
139 static void inet_hash_remove(struct in_ifaddr *ifa)
140 {
141 	ASSERT_RTNL();
142 	hlist_del_init_rcu(&ifa->hash);
143 }
144 
145 /**
146  * __ip_dev_find - find the first device with a given source address.
147  * @net: the net namespace
148  * @addr: the source address
149  * @devref: if true, take a reference on the found device
150  *
151  * If a caller uses devref=false, it should be protected by RCU, or RTNL
152  */
153 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
154 {
155 	struct net_device *result = NULL;
156 	struct in_ifaddr *ifa;
157 
158 	rcu_read_lock();
159 	ifa = inet_lookup_ifaddr_rcu(net, addr);
160 	if (!ifa) {
161 		struct flowi4 fl4 = { .daddr = addr };
162 		struct fib_result res = { 0 };
163 		struct fib_table *local;
164 
165 		/* Fallback to FIB local table so that communication
166 		 * over loopback subnets work.
167 		 */
168 		local = fib_get_table(net, RT_TABLE_LOCAL);
169 		if (local &&
170 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
171 		    res.type == RTN_LOCAL)
172 			result = FIB_RES_DEV(res);
173 	} else {
174 		result = ifa->ifa_dev->dev;
175 	}
176 	if (result && devref)
177 		dev_hold(result);
178 	rcu_read_unlock();
179 	return result;
180 }
181 EXPORT_SYMBOL(__ip_dev_find);
182 
183 /* called under RCU lock */
184 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
185 {
186 	u32 hash = inet_addr_hash(net, addr);
187 	struct in_ifaddr *ifa;
188 
189 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
190 		if (ifa->ifa_local == addr &&
191 		    net_eq(dev_net(ifa->ifa_dev->dev), net))
192 			return ifa;
193 
194 	return NULL;
195 }
196 
197 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
198 
199 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
200 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
201 static void inet_del_ifa(struct in_device *in_dev,
202 			 struct in_ifaddr __rcu **ifap,
203 			 int destroy);
204 #ifdef CONFIG_SYSCTL
205 static int devinet_sysctl_register(struct in_device *idev);
206 static void devinet_sysctl_unregister(struct in_device *idev);
207 #else
208 static int devinet_sysctl_register(struct in_device *idev)
209 {
210 	return 0;
211 }
212 static void devinet_sysctl_unregister(struct in_device *idev)
213 {
214 }
215 #endif
216 
217 /* Locks all the inet devices. */
218 
219 static struct in_ifaddr *inet_alloc_ifa(void)
220 {
221 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
222 }
223 
224 static void inet_rcu_free_ifa(struct rcu_head *head)
225 {
226 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
227 	if (ifa->ifa_dev)
228 		in_dev_put(ifa->ifa_dev);
229 	kfree(ifa);
230 }
231 
232 static void inet_free_ifa(struct in_ifaddr *ifa)
233 {
234 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
235 }
236 
237 static void in_dev_free_rcu(struct rcu_head *head)
238 {
239 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
240 
241 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
242 	kfree(idev);
243 }
244 
245 void in_dev_finish_destroy(struct in_device *idev)
246 {
247 	struct net_device *dev = idev->dev;
248 
249 	WARN_ON(idev->ifa_list);
250 	WARN_ON(idev->mc_list);
251 #ifdef NET_REFCNT_DEBUG
252 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
253 #endif
254 	netdev_put(dev, &idev->dev_tracker);
255 	if (!idev->dead)
256 		pr_err("Freeing alive in_device %p\n", idev);
257 	else
258 		call_rcu(&idev->rcu_head, in_dev_free_rcu);
259 }
260 EXPORT_SYMBOL(in_dev_finish_destroy);
261 
262 static struct in_device *inetdev_init(struct net_device *dev)
263 {
264 	struct in_device *in_dev;
265 	int err = -ENOMEM;
266 
267 	ASSERT_RTNL();
268 
269 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
270 	if (!in_dev)
271 		goto out;
272 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
273 			sizeof(in_dev->cnf));
274 	in_dev->cnf.sysctl = NULL;
275 	in_dev->dev = dev;
276 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
277 	if (!in_dev->arp_parms)
278 		goto out_kfree;
279 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
280 		dev_disable_lro(dev);
281 	/* Reference in_dev->dev */
282 	netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
283 	/* Account for reference dev->ip_ptr (below) */
284 	refcount_set(&in_dev->refcnt, 1);
285 
286 	err = devinet_sysctl_register(in_dev);
287 	if (err) {
288 		in_dev->dead = 1;
289 		neigh_parms_release(&arp_tbl, in_dev->arp_parms);
290 		in_dev_put(in_dev);
291 		in_dev = NULL;
292 		goto out;
293 	}
294 	ip_mc_init_dev(in_dev);
295 	if (dev->flags & IFF_UP)
296 		ip_mc_up(in_dev);
297 
298 	/* we can receive as soon as ip_ptr is set -- do this last */
299 	rcu_assign_pointer(dev->ip_ptr, in_dev);
300 out:
301 	return in_dev ?: ERR_PTR(err);
302 out_kfree:
303 	kfree(in_dev);
304 	in_dev = NULL;
305 	goto out;
306 }
307 
308 static void inetdev_destroy(struct in_device *in_dev)
309 {
310 	struct net_device *dev;
311 	struct in_ifaddr *ifa;
312 
313 	ASSERT_RTNL();
314 
315 	dev = in_dev->dev;
316 
317 	in_dev->dead = 1;
318 
319 	ip_mc_destroy_dev(in_dev);
320 
321 	while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
322 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
323 		inet_free_ifa(ifa);
324 	}
325 
326 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
327 
328 	devinet_sysctl_unregister(in_dev);
329 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
330 	arp_ifdown(dev);
331 
332 	in_dev_put(in_dev);
333 }
334 
335 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
336 {
337 	const struct in_ifaddr *ifa;
338 
339 	rcu_read_lock();
340 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
341 		if (inet_ifa_match(a, ifa)) {
342 			if (!b || inet_ifa_match(b, ifa)) {
343 				rcu_read_unlock();
344 				return 1;
345 			}
346 		}
347 	}
348 	rcu_read_unlock();
349 	return 0;
350 }
351 
352 static void __inet_del_ifa(struct in_device *in_dev,
353 			   struct in_ifaddr __rcu **ifap,
354 			   int destroy, struct nlmsghdr *nlh, u32 portid)
355 {
356 	struct in_ifaddr *promote = NULL;
357 	struct in_ifaddr *ifa, *ifa1;
358 	struct in_ifaddr __rcu **last_prim;
359 	struct in_ifaddr *prev_prom = NULL;
360 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
361 
362 	ASSERT_RTNL();
363 
364 	ifa1 = rtnl_dereference(*ifap);
365 	last_prim = ifap;
366 	if (in_dev->dead)
367 		goto no_promotions;
368 
369 	/* 1. Deleting primary ifaddr forces deletion all secondaries
370 	 * unless alias promotion is set
371 	 **/
372 
373 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
374 		struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
375 
376 		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
377 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
378 			    ifa1->ifa_scope <= ifa->ifa_scope)
379 				last_prim = &ifa->ifa_next;
380 
381 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
382 			    ifa1->ifa_mask != ifa->ifa_mask ||
383 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
384 				ifap1 = &ifa->ifa_next;
385 				prev_prom = ifa;
386 				continue;
387 			}
388 
389 			if (!do_promote) {
390 				inet_hash_remove(ifa);
391 				*ifap1 = ifa->ifa_next;
392 
393 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
394 				blocking_notifier_call_chain(&inetaddr_chain,
395 						NETDEV_DOWN, ifa);
396 				inet_free_ifa(ifa);
397 			} else {
398 				promote = ifa;
399 				break;
400 			}
401 		}
402 	}
403 
404 	/* On promotion all secondaries from subnet are changing
405 	 * the primary IP, we must remove all their routes silently
406 	 * and later to add them back with new prefsrc. Do this
407 	 * while all addresses are on the device list.
408 	 */
409 	for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
410 		if (ifa1->ifa_mask == ifa->ifa_mask &&
411 		    inet_ifa_match(ifa1->ifa_address, ifa))
412 			fib_del_ifaddr(ifa, ifa1);
413 	}
414 
415 no_promotions:
416 	/* 2. Unlink it */
417 
418 	*ifap = ifa1->ifa_next;
419 	inet_hash_remove(ifa1);
420 
421 	/* 3. Announce address deletion */
422 
423 	/* Send message first, then call notifier.
424 	   At first sight, FIB update triggered by notifier
425 	   will refer to already deleted ifaddr, that could confuse
426 	   netlink listeners. It is not true: look, gated sees
427 	   that route deleted and if it still thinks that ifaddr
428 	   is valid, it will try to restore deleted routes... Grr.
429 	   So that, this order is correct.
430 	 */
431 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
432 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
433 
434 	if (promote) {
435 		struct in_ifaddr *next_sec;
436 
437 		next_sec = rtnl_dereference(promote->ifa_next);
438 		if (prev_prom) {
439 			struct in_ifaddr *last_sec;
440 
441 			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
442 
443 			last_sec = rtnl_dereference(*last_prim);
444 			rcu_assign_pointer(promote->ifa_next, last_sec);
445 			rcu_assign_pointer(*last_prim, promote);
446 		}
447 
448 		promote->ifa_flags &= ~IFA_F_SECONDARY;
449 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
450 		blocking_notifier_call_chain(&inetaddr_chain,
451 				NETDEV_UP, promote);
452 		for (ifa = next_sec; ifa;
453 		     ifa = rtnl_dereference(ifa->ifa_next)) {
454 			if (ifa1->ifa_mask != ifa->ifa_mask ||
455 			    !inet_ifa_match(ifa1->ifa_address, ifa))
456 					continue;
457 			fib_add_ifaddr(ifa);
458 		}
459 
460 	}
461 	if (destroy)
462 		inet_free_ifa(ifa1);
463 }
464 
465 static void inet_del_ifa(struct in_device *in_dev,
466 			 struct in_ifaddr __rcu **ifap,
467 			 int destroy)
468 {
469 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
470 }
471 
472 static void check_lifetime(struct work_struct *work);
473 
474 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
475 
476 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
477 			     u32 portid, struct netlink_ext_ack *extack)
478 {
479 	struct in_ifaddr __rcu **last_primary, **ifap;
480 	struct in_device *in_dev = ifa->ifa_dev;
481 	struct in_validator_info ivi;
482 	struct in_ifaddr *ifa1;
483 	int ret;
484 
485 	ASSERT_RTNL();
486 
487 	if (!ifa->ifa_local) {
488 		inet_free_ifa(ifa);
489 		return 0;
490 	}
491 
492 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
493 	last_primary = &in_dev->ifa_list;
494 
495 	/* Don't set IPv6 only flags to IPv4 addresses */
496 	ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
497 
498 	ifap = &in_dev->ifa_list;
499 	ifa1 = rtnl_dereference(*ifap);
500 
501 	while (ifa1) {
502 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
503 		    ifa->ifa_scope <= ifa1->ifa_scope)
504 			last_primary = &ifa1->ifa_next;
505 		if (ifa1->ifa_mask == ifa->ifa_mask &&
506 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
507 			if (ifa1->ifa_local == ifa->ifa_local) {
508 				inet_free_ifa(ifa);
509 				return -EEXIST;
510 			}
511 			if (ifa1->ifa_scope != ifa->ifa_scope) {
512 				NL_SET_ERR_MSG(extack, "ipv4: Invalid scope value");
513 				inet_free_ifa(ifa);
514 				return -EINVAL;
515 			}
516 			ifa->ifa_flags |= IFA_F_SECONDARY;
517 		}
518 
519 		ifap = &ifa1->ifa_next;
520 		ifa1 = rtnl_dereference(*ifap);
521 	}
522 
523 	/* Allow any devices that wish to register ifaddr validtors to weigh
524 	 * in now, before changes are committed.  The rntl lock is serializing
525 	 * access here, so the state should not change between a validator call
526 	 * and a final notify on commit.  This isn't invoked on promotion under
527 	 * the assumption that validators are checking the address itself, and
528 	 * not the flags.
529 	 */
530 	ivi.ivi_addr = ifa->ifa_address;
531 	ivi.ivi_dev = ifa->ifa_dev;
532 	ivi.extack = extack;
533 	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
534 					   NETDEV_UP, &ivi);
535 	ret = notifier_to_errno(ret);
536 	if (ret) {
537 		inet_free_ifa(ifa);
538 		return ret;
539 	}
540 
541 	if (!(ifa->ifa_flags & IFA_F_SECONDARY))
542 		ifap = last_primary;
543 
544 	rcu_assign_pointer(ifa->ifa_next, *ifap);
545 	rcu_assign_pointer(*ifap, ifa);
546 
547 	inet_hash_insert(dev_net(in_dev->dev), ifa);
548 
549 	cancel_delayed_work(&check_lifetime_work);
550 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
551 
552 	/* Send message first, then call notifier.
553 	   Notifier will trigger FIB update, so that
554 	   listeners of netlink will know about new ifaddr */
555 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
556 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
557 
558 	return 0;
559 }
560 
561 static int inet_insert_ifa(struct in_ifaddr *ifa)
562 {
563 	return __inet_insert_ifa(ifa, NULL, 0, NULL);
564 }
565 
566 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
567 {
568 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
569 
570 	ASSERT_RTNL();
571 
572 	if (!in_dev) {
573 		inet_free_ifa(ifa);
574 		return -ENOBUFS;
575 	}
576 	ipv4_devconf_setall(in_dev);
577 	neigh_parms_data_state_setall(in_dev->arp_parms);
578 	if (ifa->ifa_dev != in_dev) {
579 		WARN_ON(ifa->ifa_dev);
580 		in_dev_hold(in_dev);
581 		ifa->ifa_dev = in_dev;
582 	}
583 	if (ipv4_is_loopback(ifa->ifa_local))
584 		ifa->ifa_scope = RT_SCOPE_HOST;
585 	return inet_insert_ifa(ifa);
586 }
587 
588 /* Caller must hold RCU or RTNL :
589  * We dont take a reference on found in_device
590  */
591 struct in_device *inetdev_by_index(struct net *net, int ifindex)
592 {
593 	struct net_device *dev;
594 	struct in_device *in_dev = NULL;
595 
596 	rcu_read_lock();
597 	dev = dev_get_by_index_rcu(net, ifindex);
598 	if (dev)
599 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
600 	rcu_read_unlock();
601 	return in_dev;
602 }
603 EXPORT_SYMBOL(inetdev_by_index);
604 
605 /* Called only from RTNL semaphored context. No locks. */
606 
607 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
608 				    __be32 mask)
609 {
610 	struct in_ifaddr *ifa;
611 
612 	ASSERT_RTNL();
613 
614 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
615 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
616 			return ifa;
617 	}
618 	return NULL;
619 }
620 
621 static int ip_mc_autojoin_config(struct net *net, bool join,
622 				 const struct in_ifaddr *ifa)
623 {
624 #if defined(CONFIG_IP_MULTICAST)
625 	struct ip_mreqn mreq = {
626 		.imr_multiaddr.s_addr = ifa->ifa_address,
627 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
628 	};
629 	struct sock *sk = net->ipv4.mc_autojoin_sk;
630 	int ret;
631 
632 	ASSERT_RTNL();
633 
634 	lock_sock(sk);
635 	if (join)
636 		ret = ip_mc_join_group(sk, &mreq);
637 	else
638 		ret = ip_mc_leave_group(sk, &mreq);
639 	release_sock(sk);
640 
641 	return ret;
642 #else
643 	return -EOPNOTSUPP;
644 #endif
645 }
646 
647 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
648 			    struct netlink_ext_ack *extack)
649 {
650 	struct net *net = sock_net(skb->sk);
651 	struct in_ifaddr __rcu **ifap;
652 	struct nlattr *tb[IFA_MAX+1];
653 	struct in_device *in_dev;
654 	struct ifaddrmsg *ifm;
655 	struct in_ifaddr *ifa;
656 	int err;
657 
658 	ASSERT_RTNL();
659 
660 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
661 				     ifa_ipv4_policy, extack);
662 	if (err < 0)
663 		goto errout;
664 
665 	ifm = nlmsg_data(nlh);
666 	in_dev = inetdev_by_index(net, ifm->ifa_index);
667 	if (!in_dev) {
668 		NL_SET_ERR_MSG(extack, "ipv4: Device not found");
669 		err = -ENODEV;
670 		goto errout;
671 	}
672 
673 	for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
674 	     ifap = &ifa->ifa_next) {
675 		if (tb[IFA_LOCAL] &&
676 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
677 			continue;
678 
679 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
680 			continue;
681 
682 		if (tb[IFA_ADDRESS] &&
683 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
684 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
685 			continue;
686 
687 		if (ipv4_is_multicast(ifa->ifa_address))
688 			ip_mc_autojoin_config(net, false, ifa);
689 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
690 		return 0;
691 	}
692 
693 	NL_SET_ERR_MSG(extack, "ipv4: Address not found");
694 	err = -EADDRNOTAVAIL;
695 errout:
696 	return err;
697 }
698 
699 #define INFINITY_LIFE_TIME	0xFFFFFFFF
700 
701 static void check_lifetime(struct work_struct *work)
702 {
703 	unsigned long now, next, next_sec, next_sched;
704 	struct in_ifaddr *ifa;
705 	struct hlist_node *n;
706 	int i;
707 
708 	now = jiffies;
709 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
710 
711 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
712 		bool change_needed = false;
713 
714 		rcu_read_lock();
715 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
716 			unsigned long age, tstamp;
717 			u32 preferred_lft;
718 			u32 valid_lft;
719 			u32 flags;
720 
721 			flags = READ_ONCE(ifa->ifa_flags);
722 			if (flags & IFA_F_PERMANENT)
723 				continue;
724 
725 			preferred_lft = READ_ONCE(ifa->ifa_preferred_lft);
726 			valid_lft = READ_ONCE(ifa->ifa_valid_lft);
727 			tstamp = READ_ONCE(ifa->ifa_tstamp);
728 			/* We try to batch several events at once. */
729 			age = (now - tstamp +
730 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
731 
732 			if (valid_lft != INFINITY_LIFE_TIME &&
733 			    age >= valid_lft) {
734 				change_needed = true;
735 			} else if (preferred_lft ==
736 				   INFINITY_LIFE_TIME) {
737 				continue;
738 			} else if (age >= preferred_lft) {
739 				if (time_before(tstamp + valid_lft * HZ, next))
740 					next = tstamp + valid_lft * HZ;
741 
742 				if (!(flags & IFA_F_DEPRECATED))
743 					change_needed = true;
744 			} else if (time_before(tstamp + preferred_lft * HZ,
745 					       next)) {
746 				next = tstamp + preferred_lft * HZ;
747 			}
748 		}
749 		rcu_read_unlock();
750 		if (!change_needed)
751 			continue;
752 		rtnl_lock();
753 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
754 			unsigned long age;
755 
756 			if (ifa->ifa_flags & IFA_F_PERMANENT)
757 				continue;
758 
759 			/* We try to batch several events at once. */
760 			age = (now - ifa->ifa_tstamp +
761 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
762 
763 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
764 			    age >= ifa->ifa_valid_lft) {
765 				struct in_ifaddr __rcu **ifap;
766 				struct in_ifaddr *tmp;
767 
768 				ifap = &ifa->ifa_dev->ifa_list;
769 				tmp = rtnl_dereference(*ifap);
770 				while (tmp) {
771 					if (tmp == ifa) {
772 						inet_del_ifa(ifa->ifa_dev,
773 							     ifap, 1);
774 						break;
775 					}
776 					ifap = &tmp->ifa_next;
777 					tmp = rtnl_dereference(*ifap);
778 				}
779 			} else if (ifa->ifa_preferred_lft !=
780 				   INFINITY_LIFE_TIME &&
781 				   age >= ifa->ifa_preferred_lft &&
782 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
783 				ifa->ifa_flags |= IFA_F_DEPRECATED;
784 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
785 			}
786 		}
787 		rtnl_unlock();
788 	}
789 
790 	next_sec = round_jiffies_up(next);
791 	next_sched = next;
792 
793 	/* If rounded timeout is accurate enough, accept it. */
794 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
795 		next_sched = next_sec;
796 
797 	now = jiffies;
798 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
799 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
800 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
801 
802 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
803 			next_sched - now);
804 }
805 
806 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
807 			     __u32 prefered_lft)
808 {
809 	unsigned long timeout;
810 	u32 flags;
811 
812 	flags = ifa->ifa_flags & ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
813 
814 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
815 	if (addrconf_finite_timeout(timeout))
816 		WRITE_ONCE(ifa->ifa_valid_lft, timeout);
817 	else
818 		flags |= IFA_F_PERMANENT;
819 
820 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
821 	if (addrconf_finite_timeout(timeout)) {
822 		if (timeout == 0)
823 			flags |= IFA_F_DEPRECATED;
824 		WRITE_ONCE(ifa->ifa_preferred_lft, timeout);
825 	}
826 	WRITE_ONCE(ifa->ifa_flags, flags);
827 	WRITE_ONCE(ifa->ifa_tstamp, jiffies);
828 	if (!ifa->ifa_cstamp)
829 		WRITE_ONCE(ifa->ifa_cstamp, ifa->ifa_tstamp);
830 }
831 
832 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
833 				       __u32 *pvalid_lft, __u32 *pprefered_lft,
834 				       struct netlink_ext_ack *extack)
835 {
836 	struct nlattr *tb[IFA_MAX+1];
837 	struct in_ifaddr *ifa;
838 	struct ifaddrmsg *ifm;
839 	struct net_device *dev;
840 	struct in_device *in_dev;
841 	int err;
842 
843 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
844 				     ifa_ipv4_policy, extack);
845 	if (err < 0)
846 		goto errout;
847 
848 	ifm = nlmsg_data(nlh);
849 	err = -EINVAL;
850 
851 	if (ifm->ifa_prefixlen > 32) {
852 		NL_SET_ERR_MSG(extack, "ipv4: Invalid prefix length");
853 		goto errout;
854 	}
855 
856 	if (!tb[IFA_LOCAL]) {
857 		NL_SET_ERR_MSG(extack, "ipv4: Local address is not supplied");
858 		goto errout;
859 	}
860 
861 	dev = __dev_get_by_index(net, ifm->ifa_index);
862 	err = -ENODEV;
863 	if (!dev) {
864 		NL_SET_ERR_MSG(extack, "ipv4: Device not found");
865 		goto errout;
866 	}
867 
868 	in_dev = __in_dev_get_rtnl(dev);
869 	err = -ENOBUFS;
870 	if (!in_dev)
871 		goto errout;
872 
873 	ifa = inet_alloc_ifa();
874 	if (!ifa)
875 		/*
876 		 * A potential indev allocation can be left alive, it stays
877 		 * assigned to its device and is destroy with it.
878 		 */
879 		goto errout;
880 
881 	ipv4_devconf_setall(in_dev);
882 	neigh_parms_data_state_setall(in_dev->arp_parms);
883 	in_dev_hold(in_dev);
884 
885 	if (!tb[IFA_ADDRESS])
886 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
887 
888 	INIT_HLIST_NODE(&ifa->hash);
889 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
890 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
891 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
892 					 ifm->ifa_flags;
893 	ifa->ifa_scope = ifm->ifa_scope;
894 	ifa->ifa_dev = in_dev;
895 
896 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
897 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
898 
899 	if (tb[IFA_BROADCAST])
900 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
901 
902 	if (tb[IFA_LABEL])
903 		nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
904 	else
905 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
906 
907 	if (tb[IFA_RT_PRIORITY])
908 		ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
909 
910 	if (tb[IFA_PROTO])
911 		ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
912 
913 	if (tb[IFA_CACHEINFO]) {
914 		struct ifa_cacheinfo *ci;
915 
916 		ci = nla_data(tb[IFA_CACHEINFO]);
917 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
918 			NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid");
919 			err = -EINVAL;
920 			goto errout_free;
921 		}
922 		*pvalid_lft = ci->ifa_valid;
923 		*pprefered_lft = ci->ifa_prefered;
924 	}
925 
926 	return ifa;
927 
928 errout_free:
929 	inet_free_ifa(ifa);
930 errout:
931 	return ERR_PTR(err);
932 }
933 
934 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
935 {
936 	struct in_device *in_dev = ifa->ifa_dev;
937 	struct in_ifaddr *ifa1;
938 
939 	if (!ifa->ifa_local)
940 		return NULL;
941 
942 	in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
943 		if (ifa1->ifa_mask == ifa->ifa_mask &&
944 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
945 		    ifa1->ifa_local == ifa->ifa_local)
946 			return ifa1;
947 	}
948 	return NULL;
949 }
950 
951 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
952 			    struct netlink_ext_ack *extack)
953 {
954 	struct net *net = sock_net(skb->sk);
955 	struct in_ifaddr *ifa;
956 	struct in_ifaddr *ifa_existing;
957 	__u32 valid_lft = INFINITY_LIFE_TIME;
958 	__u32 prefered_lft = INFINITY_LIFE_TIME;
959 
960 	ASSERT_RTNL();
961 
962 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
963 	if (IS_ERR(ifa))
964 		return PTR_ERR(ifa);
965 
966 	ifa_existing = find_matching_ifa(ifa);
967 	if (!ifa_existing) {
968 		/* It would be best to check for !NLM_F_CREATE here but
969 		 * userspace already relies on not having to provide this.
970 		 */
971 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
972 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
973 			int ret = ip_mc_autojoin_config(net, true, ifa);
974 
975 			if (ret < 0) {
976 				NL_SET_ERR_MSG(extack, "ipv4: Multicast auto join failed");
977 				inet_free_ifa(ifa);
978 				return ret;
979 			}
980 		}
981 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
982 					 extack);
983 	} else {
984 		u32 new_metric = ifa->ifa_rt_priority;
985 		u8 new_proto = ifa->ifa_proto;
986 
987 		inet_free_ifa(ifa);
988 
989 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
990 		    !(nlh->nlmsg_flags & NLM_F_REPLACE)) {
991 			NL_SET_ERR_MSG(extack, "ipv4: Address already assigned");
992 			return -EEXIST;
993 		}
994 		ifa = ifa_existing;
995 
996 		if (ifa->ifa_rt_priority != new_metric) {
997 			fib_modify_prefix_metric(ifa, new_metric);
998 			ifa->ifa_rt_priority = new_metric;
999 		}
1000 
1001 		ifa->ifa_proto = new_proto;
1002 
1003 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
1004 		cancel_delayed_work(&check_lifetime_work);
1005 		queue_delayed_work(system_power_efficient_wq,
1006 				&check_lifetime_work, 0);
1007 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
1008 	}
1009 	return 0;
1010 }
1011 
1012 /*
1013  *	Determine a default network mask, based on the IP address.
1014  */
1015 
1016 static int inet_abc_len(__be32 addr)
1017 {
1018 	int rc = -1;	/* Something else, probably a multicast. */
1019 
1020 	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
1021 		rc = 0;
1022 	else {
1023 		__u32 haddr = ntohl(addr);
1024 		if (IN_CLASSA(haddr))
1025 			rc = 8;
1026 		else if (IN_CLASSB(haddr))
1027 			rc = 16;
1028 		else if (IN_CLASSC(haddr))
1029 			rc = 24;
1030 		else if (IN_CLASSE(haddr))
1031 			rc = 32;
1032 	}
1033 
1034 	return rc;
1035 }
1036 
1037 
1038 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1039 {
1040 	struct sockaddr_in sin_orig;
1041 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1042 	struct in_ifaddr __rcu **ifap = NULL;
1043 	struct in_device *in_dev;
1044 	struct in_ifaddr *ifa = NULL;
1045 	struct net_device *dev;
1046 	char *colon;
1047 	int ret = -EFAULT;
1048 	int tryaddrmatch = 0;
1049 
1050 	ifr->ifr_name[IFNAMSIZ - 1] = 0;
1051 
1052 	/* save original address for comparison */
1053 	memcpy(&sin_orig, sin, sizeof(*sin));
1054 
1055 	colon = strchr(ifr->ifr_name, ':');
1056 	if (colon)
1057 		*colon = 0;
1058 
1059 	dev_load(net, ifr->ifr_name);
1060 
1061 	switch (cmd) {
1062 	case SIOCGIFADDR:	/* Get interface address */
1063 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1064 	case SIOCGIFDSTADDR:	/* Get the destination address */
1065 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1066 		/* Note that these ioctls will not sleep,
1067 		   so that we do not impose a lock.
1068 		   One day we will be forced to put shlock here (I mean SMP)
1069 		 */
1070 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
1071 		memset(sin, 0, sizeof(*sin));
1072 		sin->sin_family = AF_INET;
1073 		break;
1074 
1075 	case SIOCSIFFLAGS:
1076 		ret = -EPERM;
1077 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1078 			goto out;
1079 		break;
1080 	case SIOCSIFADDR:	/* Set interface address (and family) */
1081 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1082 	case SIOCSIFDSTADDR:	/* Set the destination address */
1083 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1084 		ret = -EPERM;
1085 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1086 			goto out;
1087 		ret = -EINVAL;
1088 		if (sin->sin_family != AF_INET)
1089 			goto out;
1090 		break;
1091 	default:
1092 		ret = -EINVAL;
1093 		goto out;
1094 	}
1095 
1096 	rtnl_lock();
1097 
1098 	ret = -ENODEV;
1099 	dev = __dev_get_by_name(net, ifr->ifr_name);
1100 	if (!dev)
1101 		goto done;
1102 
1103 	if (colon)
1104 		*colon = ':';
1105 
1106 	in_dev = __in_dev_get_rtnl(dev);
1107 	if (in_dev) {
1108 		if (tryaddrmatch) {
1109 			/* Matthias Andree */
1110 			/* compare label and address (4.4BSD style) */
1111 			/* note: we only do this for a limited set of ioctls
1112 			   and only if the original address family was AF_INET.
1113 			   This is checked above. */
1114 
1115 			for (ifap = &in_dev->ifa_list;
1116 			     (ifa = rtnl_dereference(*ifap)) != NULL;
1117 			     ifap = &ifa->ifa_next) {
1118 				if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1119 				    sin_orig.sin_addr.s_addr ==
1120 							ifa->ifa_local) {
1121 					break; /* found */
1122 				}
1123 			}
1124 		}
1125 		/* we didn't get a match, maybe the application is
1126 		   4.3BSD-style and passed in junk so we fall back to
1127 		   comparing just the label */
1128 		if (!ifa) {
1129 			for (ifap = &in_dev->ifa_list;
1130 			     (ifa = rtnl_dereference(*ifap)) != NULL;
1131 			     ifap = &ifa->ifa_next)
1132 				if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1133 					break;
1134 		}
1135 	}
1136 
1137 	ret = -EADDRNOTAVAIL;
1138 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1139 		goto done;
1140 
1141 	switch (cmd) {
1142 	case SIOCGIFADDR:	/* Get interface address */
1143 		ret = 0;
1144 		sin->sin_addr.s_addr = ifa->ifa_local;
1145 		break;
1146 
1147 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1148 		ret = 0;
1149 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1150 		break;
1151 
1152 	case SIOCGIFDSTADDR:	/* Get the destination address */
1153 		ret = 0;
1154 		sin->sin_addr.s_addr = ifa->ifa_address;
1155 		break;
1156 
1157 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1158 		ret = 0;
1159 		sin->sin_addr.s_addr = ifa->ifa_mask;
1160 		break;
1161 
1162 	case SIOCSIFFLAGS:
1163 		if (colon) {
1164 			ret = -EADDRNOTAVAIL;
1165 			if (!ifa)
1166 				break;
1167 			ret = 0;
1168 			if (!(ifr->ifr_flags & IFF_UP))
1169 				inet_del_ifa(in_dev, ifap, 1);
1170 			break;
1171 		}
1172 		ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1173 		break;
1174 
1175 	case SIOCSIFADDR:	/* Set interface address (and family) */
1176 		ret = -EINVAL;
1177 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1178 			break;
1179 
1180 		if (!ifa) {
1181 			ret = -ENOBUFS;
1182 			ifa = inet_alloc_ifa();
1183 			if (!ifa)
1184 				break;
1185 			INIT_HLIST_NODE(&ifa->hash);
1186 			if (colon)
1187 				memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1188 			else
1189 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1190 		} else {
1191 			ret = 0;
1192 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1193 				break;
1194 			inet_del_ifa(in_dev, ifap, 0);
1195 			ifa->ifa_broadcast = 0;
1196 			ifa->ifa_scope = 0;
1197 		}
1198 
1199 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1200 
1201 		if (!(dev->flags & IFF_POINTOPOINT)) {
1202 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1203 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1204 			if ((dev->flags & IFF_BROADCAST) &&
1205 			    ifa->ifa_prefixlen < 31)
1206 				ifa->ifa_broadcast = ifa->ifa_address |
1207 						     ~ifa->ifa_mask;
1208 		} else {
1209 			ifa->ifa_prefixlen = 32;
1210 			ifa->ifa_mask = inet_make_mask(32);
1211 		}
1212 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1213 		ret = inet_set_ifa(dev, ifa);
1214 		break;
1215 
1216 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1217 		ret = 0;
1218 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1219 			inet_del_ifa(in_dev, ifap, 0);
1220 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1221 			inet_insert_ifa(ifa);
1222 		}
1223 		break;
1224 
1225 	case SIOCSIFDSTADDR:	/* Set the destination address */
1226 		ret = 0;
1227 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1228 			break;
1229 		ret = -EINVAL;
1230 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1231 			break;
1232 		ret = 0;
1233 		inet_del_ifa(in_dev, ifap, 0);
1234 		ifa->ifa_address = sin->sin_addr.s_addr;
1235 		inet_insert_ifa(ifa);
1236 		break;
1237 
1238 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1239 
1240 		/*
1241 		 *	The mask we set must be legal.
1242 		 */
1243 		ret = -EINVAL;
1244 		if (bad_mask(sin->sin_addr.s_addr, 0))
1245 			break;
1246 		ret = 0;
1247 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1248 			__be32 old_mask = ifa->ifa_mask;
1249 			inet_del_ifa(in_dev, ifap, 0);
1250 			ifa->ifa_mask = sin->sin_addr.s_addr;
1251 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1252 
1253 			/* See if current broadcast address matches
1254 			 * with current netmask, then recalculate
1255 			 * the broadcast address. Otherwise it's a
1256 			 * funny address, so don't touch it since
1257 			 * the user seems to know what (s)he's doing...
1258 			 */
1259 			if ((dev->flags & IFF_BROADCAST) &&
1260 			    (ifa->ifa_prefixlen < 31) &&
1261 			    (ifa->ifa_broadcast ==
1262 			     (ifa->ifa_local|~old_mask))) {
1263 				ifa->ifa_broadcast = (ifa->ifa_local |
1264 						      ~sin->sin_addr.s_addr);
1265 			}
1266 			inet_insert_ifa(ifa);
1267 		}
1268 		break;
1269 	}
1270 done:
1271 	rtnl_unlock();
1272 out:
1273 	return ret;
1274 }
1275 
1276 int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1277 {
1278 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1279 	const struct in_ifaddr *ifa;
1280 	struct ifreq ifr;
1281 	int done = 0;
1282 
1283 	if (WARN_ON(size > sizeof(struct ifreq)))
1284 		goto out;
1285 
1286 	if (!in_dev)
1287 		goto out;
1288 
1289 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1290 		if (!buf) {
1291 			done += size;
1292 			continue;
1293 		}
1294 		if (len < size)
1295 			break;
1296 		memset(&ifr, 0, sizeof(struct ifreq));
1297 		strcpy(ifr.ifr_name, ifa->ifa_label);
1298 
1299 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1300 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1301 								ifa->ifa_local;
1302 
1303 		if (copy_to_user(buf + done, &ifr, size)) {
1304 			done = -EFAULT;
1305 			break;
1306 		}
1307 		len  -= size;
1308 		done += size;
1309 	}
1310 out:
1311 	return done;
1312 }
1313 
1314 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1315 				 int scope)
1316 {
1317 	const struct in_ifaddr *ifa;
1318 
1319 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1320 		if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
1321 			continue;
1322 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1323 		    ifa->ifa_scope <= scope)
1324 			return ifa->ifa_local;
1325 	}
1326 
1327 	return 0;
1328 }
1329 
1330 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1331 {
1332 	const struct in_ifaddr *ifa;
1333 	__be32 addr = 0;
1334 	unsigned char localnet_scope = RT_SCOPE_HOST;
1335 	struct in_device *in_dev;
1336 	struct net *net = dev_net(dev);
1337 	int master_idx;
1338 
1339 	rcu_read_lock();
1340 	in_dev = __in_dev_get_rcu(dev);
1341 	if (!in_dev)
1342 		goto no_in_dev;
1343 
1344 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1345 		localnet_scope = RT_SCOPE_LINK;
1346 
1347 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1348 		if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
1349 			continue;
1350 		if (min(ifa->ifa_scope, localnet_scope) > scope)
1351 			continue;
1352 		if (!dst || inet_ifa_match(dst, ifa)) {
1353 			addr = ifa->ifa_local;
1354 			break;
1355 		}
1356 		if (!addr)
1357 			addr = ifa->ifa_local;
1358 	}
1359 
1360 	if (addr)
1361 		goto out_unlock;
1362 no_in_dev:
1363 	master_idx = l3mdev_master_ifindex_rcu(dev);
1364 
1365 	/* For VRFs, the VRF device takes the place of the loopback device,
1366 	 * with addresses on it being preferred.  Note in such cases the
1367 	 * loopback device will be among the devices that fail the master_idx
1368 	 * equality check in the loop below.
1369 	 */
1370 	if (master_idx &&
1371 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1372 	    (in_dev = __in_dev_get_rcu(dev))) {
1373 		addr = in_dev_select_addr(in_dev, scope);
1374 		if (addr)
1375 			goto out_unlock;
1376 	}
1377 
1378 	/* Not loopback addresses on loopback should be preferred
1379 	   in this case. It is important that lo is the first interface
1380 	   in dev_base list.
1381 	 */
1382 	for_each_netdev_rcu(net, dev) {
1383 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1384 			continue;
1385 
1386 		in_dev = __in_dev_get_rcu(dev);
1387 		if (!in_dev)
1388 			continue;
1389 
1390 		addr = in_dev_select_addr(in_dev, scope);
1391 		if (addr)
1392 			goto out_unlock;
1393 	}
1394 out_unlock:
1395 	rcu_read_unlock();
1396 	return addr;
1397 }
1398 EXPORT_SYMBOL(inet_select_addr);
1399 
1400 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1401 			      __be32 local, int scope)
1402 {
1403 	unsigned char localnet_scope = RT_SCOPE_HOST;
1404 	const struct in_ifaddr *ifa;
1405 	__be32 addr = 0;
1406 	int same = 0;
1407 
1408 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1409 		localnet_scope = RT_SCOPE_LINK;
1410 
1411 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1412 		unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1413 
1414 		if (!addr &&
1415 		    (local == ifa->ifa_local || !local) &&
1416 		    min_scope <= scope) {
1417 			addr = ifa->ifa_local;
1418 			if (same)
1419 				break;
1420 		}
1421 		if (!same) {
1422 			same = (!local || inet_ifa_match(local, ifa)) &&
1423 				(!dst || inet_ifa_match(dst, ifa));
1424 			if (same && addr) {
1425 				if (local || !dst)
1426 					break;
1427 				/* Is the selected addr into dst subnet? */
1428 				if (inet_ifa_match(addr, ifa))
1429 					break;
1430 				/* No, then can we use new local src? */
1431 				if (min_scope <= scope) {
1432 					addr = ifa->ifa_local;
1433 					break;
1434 				}
1435 				/* search for large dst subnet for addr */
1436 				same = 0;
1437 			}
1438 		}
1439 	}
1440 
1441 	return same ? addr : 0;
1442 }
1443 
1444 /*
1445  * Confirm that local IP address exists using wildcards:
1446  * - net: netns to check, cannot be NULL
1447  * - in_dev: only on this interface, NULL=any interface
1448  * - dst: only in the same subnet as dst, 0=any dst
1449  * - local: address, 0=autoselect the local address
1450  * - scope: maximum allowed scope value for the local address
1451  */
1452 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1453 			 __be32 dst, __be32 local, int scope)
1454 {
1455 	__be32 addr = 0;
1456 	struct net_device *dev;
1457 
1458 	if (in_dev)
1459 		return confirm_addr_indev(in_dev, dst, local, scope);
1460 
1461 	rcu_read_lock();
1462 	for_each_netdev_rcu(net, dev) {
1463 		in_dev = __in_dev_get_rcu(dev);
1464 		if (in_dev) {
1465 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1466 			if (addr)
1467 				break;
1468 		}
1469 	}
1470 	rcu_read_unlock();
1471 
1472 	return addr;
1473 }
1474 EXPORT_SYMBOL(inet_confirm_addr);
1475 
1476 /*
1477  *	Device notifier
1478  */
1479 
1480 int register_inetaddr_notifier(struct notifier_block *nb)
1481 {
1482 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1483 }
1484 EXPORT_SYMBOL(register_inetaddr_notifier);
1485 
1486 int unregister_inetaddr_notifier(struct notifier_block *nb)
1487 {
1488 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1489 }
1490 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1491 
1492 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1493 {
1494 	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1495 }
1496 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1497 
1498 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1499 {
1500 	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1501 	    nb);
1502 }
1503 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1504 
1505 /* Rename ifa_labels for a device name change. Make some effort to preserve
1506  * existing alias numbering and to create unique labels if possible.
1507 */
1508 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1509 {
1510 	struct in_ifaddr *ifa;
1511 	int named = 0;
1512 
1513 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1514 		char old[IFNAMSIZ], *dot;
1515 
1516 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1517 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1518 		if (named++ == 0)
1519 			goto skip;
1520 		dot = strchr(old, ':');
1521 		if (!dot) {
1522 			sprintf(old, ":%d", named);
1523 			dot = old;
1524 		}
1525 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1526 			strcat(ifa->ifa_label, dot);
1527 		else
1528 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1529 skip:
1530 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1531 	}
1532 }
1533 
1534 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1535 					struct in_device *in_dev)
1536 
1537 {
1538 	const struct in_ifaddr *ifa;
1539 
1540 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1541 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1542 			 ifa->ifa_local, dev,
1543 			 ifa->ifa_local, NULL,
1544 			 dev->dev_addr, NULL);
1545 	}
1546 }
1547 
1548 /* Called only under RTNL semaphore */
1549 
1550 static int inetdev_event(struct notifier_block *this, unsigned long event,
1551 			 void *ptr)
1552 {
1553 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1554 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1555 
1556 	ASSERT_RTNL();
1557 
1558 	if (!in_dev) {
1559 		if (event == NETDEV_REGISTER) {
1560 			in_dev = inetdev_init(dev);
1561 			if (IS_ERR(in_dev))
1562 				return notifier_from_errno(PTR_ERR(in_dev));
1563 			if (dev->flags & IFF_LOOPBACK) {
1564 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1565 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1566 			}
1567 		} else if (event == NETDEV_CHANGEMTU) {
1568 			/* Re-enabling IP */
1569 			if (inetdev_valid_mtu(dev->mtu))
1570 				in_dev = inetdev_init(dev);
1571 		}
1572 		goto out;
1573 	}
1574 
1575 	switch (event) {
1576 	case NETDEV_REGISTER:
1577 		pr_debug("%s: bug\n", __func__);
1578 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1579 		break;
1580 	case NETDEV_UP:
1581 		if (!inetdev_valid_mtu(dev->mtu))
1582 			break;
1583 		if (dev->flags & IFF_LOOPBACK) {
1584 			struct in_ifaddr *ifa = inet_alloc_ifa();
1585 
1586 			if (ifa) {
1587 				INIT_HLIST_NODE(&ifa->hash);
1588 				ifa->ifa_local =
1589 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1590 				ifa->ifa_prefixlen = 8;
1591 				ifa->ifa_mask = inet_make_mask(8);
1592 				in_dev_hold(in_dev);
1593 				ifa->ifa_dev = in_dev;
1594 				ifa->ifa_scope = RT_SCOPE_HOST;
1595 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1596 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1597 						 INFINITY_LIFE_TIME);
1598 				ipv4_devconf_setall(in_dev);
1599 				neigh_parms_data_state_setall(in_dev->arp_parms);
1600 				inet_insert_ifa(ifa);
1601 			}
1602 		}
1603 		ip_mc_up(in_dev);
1604 		fallthrough;
1605 	case NETDEV_CHANGEADDR:
1606 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1607 			break;
1608 		fallthrough;
1609 	case NETDEV_NOTIFY_PEERS:
1610 		/* Send gratuitous ARP to notify of link change */
1611 		inetdev_send_gratuitous_arp(dev, in_dev);
1612 		break;
1613 	case NETDEV_DOWN:
1614 		ip_mc_down(in_dev);
1615 		break;
1616 	case NETDEV_PRE_TYPE_CHANGE:
1617 		ip_mc_unmap(in_dev);
1618 		break;
1619 	case NETDEV_POST_TYPE_CHANGE:
1620 		ip_mc_remap(in_dev);
1621 		break;
1622 	case NETDEV_CHANGEMTU:
1623 		if (inetdev_valid_mtu(dev->mtu))
1624 			break;
1625 		/* disable IP when MTU is not enough */
1626 		fallthrough;
1627 	case NETDEV_UNREGISTER:
1628 		inetdev_destroy(in_dev);
1629 		break;
1630 	case NETDEV_CHANGENAME:
1631 		/* Do not notify about label change, this event is
1632 		 * not interesting to applications using netlink.
1633 		 */
1634 		inetdev_changename(dev, in_dev);
1635 
1636 		devinet_sysctl_unregister(in_dev);
1637 		devinet_sysctl_register(in_dev);
1638 		break;
1639 	}
1640 out:
1641 	return NOTIFY_DONE;
1642 }
1643 
1644 static struct notifier_block ip_netdev_notifier = {
1645 	.notifier_call = inetdev_event,
1646 };
1647 
1648 static size_t inet_nlmsg_size(void)
1649 {
1650 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1651 	       + nla_total_size(4) /* IFA_ADDRESS */
1652 	       + nla_total_size(4) /* IFA_LOCAL */
1653 	       + nla_total_size(4) /* IFA_BROADCAST */
1654 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1655 	       + nla_total_size(4)  /* IFA_FLAGS */
1656 	       + nla_total_size(1)  /* IFA_PROTO */
1657 	       + nla_total_size(4)  /* IFA_RT_PRIORITY */
1658 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1659 }
1660 
1661 static inline u32 cstamp_delta(unsigned long cstamp)
1662 {
1663 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1664 }
1665 
1666 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1667 			 unsigned long tstamp, u32 preferred, u32 valid)
1668 {
1669 	struct ifa_cacheinfo ci;
1670 
1671 	ci.cstamp = cstamp_delta(cstamp);
1672 	ci.tstamp = cstamp_delta(tstamp);
1673 	ci.ifa_prefered = preferred;
1674 	ci.ifa_valid = valid;
1675 
1676 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1677 }
1678 
1679 static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa,
1680 			    struct inet_fill_args *args)
1681 {
1682 	struct ifaddrmsg *ifm;
1683 	struct nlmsghdr  *nlh;
1684 	unsigned long tstamp;
1685 	u32 preferred, valid;
1686 
1687 	nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1688 			args->flags);
1689 	if (!nlh)
1690 		return -EMSGSIZE;
1691 
1692 	ifm = nlmsg_data(nlh);
1693 	ifm->ifa_family = AF_INET;
1694 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1695 	ifm->ifa_flags = READ_ONCE(ifa->ifa_flags);
1696 	ifm->ifa_scope = ifa->ifa_scope;
1697 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1698 
1699 	if (args->netnsid >= 0 &&
1700 	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1701 		goto nla_put_failure;
1702 
1703 	tstamp = READ_ONCE(ifa->ifa_tstamp);
1704 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1705 		preferred = READ_ONCE(ifa->ifa_preferred_lft);
1706 		valid = READ_ONCE(ifa->ifa_valid_lft);
1707 		if (preferred != INFINITY_LIFE_TIME) {
1708 			long tval = (jiffies - tstamp) / HZ;
1709 
1710 			if (preferred > tval)
1711 				preferred -= tval;
1712 			else
1713 				preferred = 0;
1714 			if (valid != INFINITY_LIFE_TIME) {
1715 				if (valid > tval)
1716 					valid -= tval;
1717 				else
1718 					valid = 0;
1719 			}
1720 		}
1721 	} else {
1722 		preferred = INFINITY_LIFE_TIME;
1723 		valid = INFINITY_LIFE_TIME;
1724 	}
1725 	if ((ifa->ifa_address &&
1726 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1727 	    (ifa->ifa_local &&
1728 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1729 	    (ifa->ifa_broadcast &&
1730 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1731 	    (ifa->ifa_label[0] &&
1732 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1733 	    (ifa->ifa_proto &&
1734 	     nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
1735 	    nla_put_u32(skb, IFA_FLAGS, ifm->ifa_flags) ||
1736 	    (ifa->ifa_rt_priority &&
1737 	     nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1738 	    put_cacheinfo(skb, READ_ONCE(ifa->ifa_cstamp), tstamp,
1739 			  preferred, valid))
1740 		goto nla_put_failure;
1741 
1742 	nlmsg_end(skb, nlh);
1743 	return 0;
1744 
1745 nla_put_failure:
1746 	nlmsg_cancel(skb, nlh);
1747 	return -EMSGSIZE;
1748 }
1749 
1750 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1751 				      struct inet_fill_args *fillargs,
1752 				      struct net **tgt_net, struct sock *sk,
1753 				      struct netlink_callback *cb)
1754 {
1755 	struct netlink_ext_ack *extack = cb->extack;
1756 	struct nlattr *tb[IFA_MAX+1];
1757 	struct ifaddrmsg *ifm;
1758 	int err, i;
1759 
1760 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1761 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1762 		return -EINVAL;
1763 	}
1764 
1765 	ifm = nlmsg_data(nlh);
1766 	if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1767 		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1768 		return -EINVAL;
1769 	}
1770 
1771 	fillargs->ifindex = ifm->ifa_index;
1772 	if (fillargs->ifindex) {
1773 		cb->answer_flags |= NLM_F_DUMP_FILTERED;
1774 		fillargs->flags |= NLM_F_DUMP_FILTERED;
1775 	}
1776 
1777 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1778 					    ifa_ipv4_policy, extack);
1779 	if (err < 0)
1780 		return err;
1781 
1782 	for (i = 0; i <= IFA_MAX; ++i) {
1783 		if (!tb[i])
1784 			continue;
1785 
1786 		if (i == IFA_TARGET_NETNSID) {
1787 			struct net *net;
1788 
1789 			fillargs->netnsid = nla_get_s32(tb[i]);
1790 
1791 			net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1792 			if (IS_ERR(net)) {
1793 				fillargs->netnsid = -1;
1794 				NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1795 				return PTR_ERR(net);
1796 			}
1797 			*tgt_net = net;
1798 		} else {
1799 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1800 			return -EINVAL;
1801 		}
1802 	}
1803 
1804 	return 0;
1805 }
1806 
1807 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1808 			    struct netlink_callback *cb, int *s_ip_idx,
1809 			    struct inet_fill_args *fillargs)
1810 {
1811 	struct in_ifaddr *ifa;
1812 	int ip_idx = 0;
1813 	int err;
1814 
1815 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1816 		if (ip_idx < *s_ip_idx) {
1817 			ip_idx++;
1818 			continue;
1819 		}
1820 		err = inet_fill_ifaddr(skb, ifa, fillargs);
1821 		if (err < 0)
1822 			goto done;
1823 
1824 		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1825 		ip_idx++;
1826 	}
1827 	err = 0;
1828 	ip_idx = 0;
1829 done:
1830 	*s_ip_idx = ip_idx;
1831 
1832 	return err;
1833 }
1834 
1835 /* Combine dev_addr_genid and dev_base_seq to detect changes.
1836  */
1837 static u32 inet_base_seq(const struct net *net)
1838 {
1839 	u32 res = atomic_read(&net->ipv4.dev_addr_genid) +
1840 		  READ_ONCE(net->dev_base_seq);
1841 
1842 	/* Must not return 0 (see nl_dump_check_consistent()).
1843 	 * Chose a value far away from 0.
1844 	 */
1845 	if (!res)
1846 		res = 0x80000000;
1847 	return res;
1848 }
1849 
1850 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1851 {
1852 	const struct nlmsghdr *nlh = cb->nlh;
1853 	struct inet_fill_args fillargs = {
1854 		.portid = NETLINK_CB(cb->skb).portid,
1855 		.seq = nlh->nlmsg_seq,
1856 		.event = RTM_NEWADDR,
1857 		.flags = NLM_F_MULTI,
1858 		.netnsid = -1,
1859 	};
1860 	struct net *net = sock_net(skb->sk);
1861 	struct net *tgt_net = net;
1862 	struct {
1863 		unsigned long ifindex;
1864 		int ip_idx;
1865 	} *ctx = (void *)cb->ctx;
1866 	struct in_device *in_dev;
1867 	struct net_device *dev;
1868 	int err = 0;
1869 
1870 	rcu_read_lock();
1871 	if (cb->strict_check) {
1872 		err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1873 						 skb->sk, cb);
1874 		if (err < 0)
1875 			goto done;
1876 
1877 		if (fillargs.ifindex) {
1878 			err = -ENODEV;
1879 			dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex);
1880 			if (!dev)
1881 				goto done;
1882 			in_dev = __in_dev_get_rcu(dev);
1883 			if (!in_dev)
1884 				goto done;
1885 			err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
1886 					       &fillargs);
1887 			goto done;
1888 		}
1889 	}
1890 
1891 	cb->seq = inet_base_seq(tgt_net);
1892 
1893 	for_each_netdev_dump(net, dev, ctx->ifindex) {
1894 		in_dev = __in_dev_get_rcu(dev);
1895 		if (!in_dev)
1896 			continue;
1897 		err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
1898 				       &fillargs);
1899 		if (err < 0)
1900 			goto done;
1901 	}
1902 done:
1903 	if (err < 0 && likely(skb->len))
1904 		err = skb->len;
1905 	if (fillargs.netnsid >= 0)
1906 		put_net(tgt_net);
1907 	rcu_read_unlock();
1908 	return err;
1909 }
1910 
1911 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1912 		      u32 portid)
1913 {
1914 	struct inet_fill_args fillargs = {
1915 		.portid = portid,
1916 		.seq = nlh ? nlh->nlmsg_seq : 0,
1917 		.event = event,
1918 		.flags = 0,
1919 		.netnsid = -1,
1920 	};
1921 	struct sk_buff *skb;
1922 	int err = -ENOBUFS;
1923 	struct net *net;
1924 
1925 	net = dev_net(ifa->ifa_dev->dev);
1926 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1927 	if (!skb)
1928 		goto errout;
1929 
1930 	err = inet_fill_ifaddr(skb, ifa, &fillargs);
1931 	if (err < 0) {
1932 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1933 		WARN_ON(err == -EMSGSIZE);
1934 		kfree_skb(skb);
1935 		goto errout;
1936 	}
1937 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1938 	return;
1939 errout:
1940 	if (err < 0)
1941 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1942 }
1943 
1944 static size_t inet_get_link_af_size(const struct net_device *dev,
1945 				    u32 ext_filter_mask)
1946 {
1947 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1948 
1949 	if (!in_dev)
1950 		return 0;
1951 
1952 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1953 }
1954 
1955 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1956 			     u32 ext_filter_mask)
1957 {
1958 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1959 	struct nlattr *nla;
1960 	int i;
1961 
1962 	if (!in_dev)
1963 		return -ENODATA;
1964 
1965 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1966 	if (!nla)
1967 		return -EMSGSIZE;
1968 
1969 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1970 		((u32 *) nla_data(nla))[i] = READ_ONCE(in_dev->cnf.data[i]);
1971 
1972 	return 0;
1973 }
1974 
1975 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1976 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1977 };
1978 
1979 static int inet_validate_link_af(const struct net_device *dev,
1980 				 const struct nlattr *nla,
1981 				 struct netlink_ext_ack *extack)
1982 {
1983 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1984 	int err, rem;
1985 
1986 	if (dev && !__in_dev_get_rtnl(dev))
1987 		return -EAFNOSUPPORT;
1988 
1989 	err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
1990 					  inet_af_policy, extack);
1991 	if (err < 0)
1992 		return err;
1993 
1994 	if (tb[IFLA_INET_CONF]) {
1995 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1996 			int cfgid = nla_type(a);
1997 
1998 			if (nla_len(a) < 4)
1999 				return -EINVAL;
2000 
2001 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
2002 				return -EINVAL;
2003 		}
2004 	}
2005 
2006 	return 0;
2007 }
2008 
2009 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
2010 			    struct netlink_ext_ack *extack)
2011 {
2012 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
2013 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
2014 	int rem;
2015 
2016 	if (!in_dev)
2017 		return -EAFNOSUPPORT;
2018 
2019 	if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
2020 		return -EINVAL;
2021 
2022 	if (tb[IFLA_INET_CONF]) {
2023 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
2024 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
2025 	}
2026 
2027 	return 0;
2028 }
2029 
2030 static int inet_netconf_msgsize_devconf(int type)
2031 {
2032 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
2033 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
2034 	bool all = false;
2035 
2036 	if (type == NETCONFA_ALL)
2037 		all = true;
2038 
2039 	if (all || type == NETCONFA_FORWARDING)
2040 		size += nla_total_size(4);
2041 	if (all || type == NETCONFA_RP_FILTER)
2042 		size += nla_total_size(4);
2043 	if (all || type == NETCONFA_MC_FORWARDING)
2044 		size += nla_total_size(4);
2045 	if (all || type == NETCONFA_BC_FORWARDING)
2046 		size += nla_total_size(4);
2047 	if (all || type == NETCONFA_PROXY_NEIGH)
2048 		size += nla_total_size(4);
2049 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2050 		size += nla_total_size(4);
2051 
2052 	return size;
2053 }
2054 
2055 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2056 				     const struct ipv4_devconf *devconf,
2057 				     u32 portid, u32 seq, int event,
2058 				     unsigned int flags, int type)
2059 {
2060 	struct nlmsghdr  *nlh;
2061 	struct netconfmsg *ncm;
2062 	bool all = false;
2063 
2064 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2065 			flags);
2066 	if (!nlh)
2067 		return -EMSGSIZE;
2068 
2069 	if (type == NETCONFA_ALL)
2070 		all = true;
2071 
2072 	ncm = nlmsg_data(nlh);
2073 	ncm->ncm_family = AF_INET;
2074 
2075 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2076 		goto nla_put_failure;
2077 
2078 	if (!devconf)
2079 		goto out;
2080 
2081 	if ((all || type == NETCONFA_FORWARDING) &&
2082 	    nla_put_s32(skb, NETCONFA_FORWARDING,
2083 			IPV4_DEVCONF_RO(*devconf, FORWARDING)) < 0)
2084 		goto nla_put_failure;
2085 	if ((all || type == NETCONFA_RP_FILTER) &&
2086 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
2087 			IPV4_DEVCONF_RO(*devconf, RP_FILTER)) < 0)
2088 		goto nla_put_failure;
2089 	if ((all || type == NETCONFA_MC_FORWARDING) &&
2090 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2091 			IPV4_DEVCONF_RO(*devconf, MC_FORWARDING)) < 0)
2092 		goto nla_put_failure;
2093 	if ((all || type == NETCONFA_BC_FORWARDING) &&
2094 	    nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2095 			IPV4_DEVCONF_RO(*devconf, BC_FORWARDING)) < 0)
2096 		goto nla_put_failure;
2097 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
2098 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2099 			IPV4_DEVCONF_RO(*devconf, PROXY_ARP)) < 0)
2100 		goto nla_put_failure;
2101 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2102 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2103 			IPV4_DEVCONF_RO(*devconf,
2104 					IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2105 		goto nla_put_failure;
2106 
2107 out:
2108 	nlmsg_end(skb, nlh);
2109 	return 0;
2110 
2111 nla_put_failure:
2112 	nlmsg_cancel(skb, nlh);
2113 	return -EMSGSIZE;
2114 }
2115 
2116 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2117 				 int ifindex, struct ipv4_devconf *devconf)
2118 {
2119 	struct sk_buff *skb;
2120 	int err = -ENOBUFS;
2121 
2122 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2123 	if (!skb)
2124 		goto errout;
2125 
2126 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2127 					event, 0, type);
2128 	if (err < 0) {
2129 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2130 		WARN_ON(err == -EMSGSIZE);
2131 		kfree_skb(skb);
2132 		goto errout;
2133 	}
2134 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2135 	return;
2136 errout:
2137 	if (err < 0)
2138 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2139 }
2140 
2141 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2142 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
2143 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
2144 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
2145 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
2146 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
2147 };
2148 
2149 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2150 				      const struct nlmsghdr *nlh,
2151 				      struct nlattr **tb,
2152 				      struct netlink_ext_ack *extack)
2153 {
2154 	int i, err;
2155 
2156 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2157 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2158 		return -EINVAL;
2159 	}
2160 
2161 	if (!netlink_strict_get_check(skb))
2162 		return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2163 					      tb, NETCONFA_MAX,
2164 					      devconf_ipv4_policy, extack);
2165 
2166 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2167 					    tb, NETCONFA_MAX,
2168 					    devconf_ipv4_policy, extack);
2169 	if (err)
2170 		return err;
2171 
2172 	for (i = 0; i <= NETCONFA_MAX; i++) {
2173 		if (!tb[i])
2174 			continue;
2175 
2176 		switch (i) {
2177 		case NETCONFA_IFINDEX:
2178 			break;
2179 		default:
2180 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2181 			return -EINVAL;
2182 		}
2183 	}
2184 
2185 	return 0;
2186 }
2187 
2188 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2189 				    struct nlmsghdr *nlh,
2190 				    struct netlink_ext_ack *extack)
2191 {
2192 	struct net *net = sock_net(in_skb->sk);
2193 	struct nlattr *tb[NETCONFA_MAX + 1];
2194 	const struct ipv4_devconf *devconf;
2195 	struct in_device *in_dev = NULL;
2196 	struct net_device *dev = NULL;
2197 	struct sk_buff *skb;
2198 	int ifindex;
2199 	int err;
2200 
2201 	err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2202 	if (err)
2203 		return err;
2204 
2205 	if (!tb[NETCONFA_IFINDEX])
2206 		return -EINVAL;
2207 
2208 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2209 	switch (ifindex) {
2210 	case NETCONFA_IFINDEX_ALL:
2211 		devconf = net->ipv4.devconf_all;
2212 		break;
2213 	case NETCONFA_IFINDEX_DEFAULT:
2214 		devconf = net->ipv4.devconf_dflt;
2215 		break;
2216 	default:
2217 		err = -ENODEV;
2218 		dev = dev_get_by_index(net, ifindex);
2219 		if (dev)
2220 			in_dev = in_dev_get(dev);
2221 		if (!in_dev)
2222 			goto errout;
2223 		devconf = &in_dev->cnf;
2224 		break;
2225 	}
2226 
2227 	err = -ENOBUFS;
2228 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2229 	if (!skb)
2230 		goto errout;
2231 
2232 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2233 					NETLINK_CB(in_skb).portid,
2234 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2235 					NETCONFA_ALL);
2236 	if (err < 0) {
2237 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2238 		WARN_ON(err == -EMSGSIZE);
2239 		kfree_skb(skb);
2240 		goto errout;
2241 	}
2242 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2243 errout:
2244 	if (in_dev)
2245 		in_dev_put(in_dev);
2246 	dev_put(dev);
2247 	return err;
2248 }
2249 
2250 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2251 				     struct netlink_callback *cb)
2252 {
2253 	const struct nlmsghdr *nlh = cb->nlh;
2254 	struct net *net = sock_net(skb->sk);
2255 	struct {
2256 		unsigned long ifindex;
2257 		unsigned int all_default;
2258 	} *ctx = (void *)cb->ctx;
2259 	const struct in_device *in_dev;
2260 	struct net_device *dev;
2261 	int err = 0;
2262 
2263 	if (cb->strict_check) {
2264 		struct netlink_ext_ack *extack = cb->extack;
2265 		struct netconfmsg *ncm;
2266 
2267 		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2268 			NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2269 			return -EINVAL;
2270 		}
2271 
2272 		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2273 			NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2274 			return -EINVAL;
2275 		}
2276 	}
2277 
2278 	rcu_read_lock();
2279 	for_each_netdev_dump(net, dev, ctx->ifindex) {
2280 		in_dev = __in_dev_get_rcu(dev);
2281 		if (!in_dev)
2282 			continue;
2283 		err = inet_netconf_fill_devconf(skb, dev->ifindex,
2284 						&in_dev->cnf,
2285 						NETLINK_CB(cb->skb).portid,
2286 						nlh->nlmsg_seq,
2287 						RTM_NEWNETCONF, NLM_F_MULTI,
2288 						NETCONFA_ALL);
2289 		if (err < 0)
2290 			goto done;
2291 	}
2292 	if (ctx->all_default == 0) {
2293 		err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2294 						net->ipv4.devconf_all,
2295 						NETLINK_CB(cb->skb).portid,
2296 						nlh->nlmsg_seq,
2297 						RTM_NEWNETCONF, NLM_F_MULTI,
2298 						NETCONFA_ALL);
2299 		if (err < 0)
2300 			goto done;
2301 		ctx->all_default++;
2302 	}
2303 	if (ctx->all_default == 1) {
2304 		err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2305 						net->ipv4.devconf_dflt,
2306 						NETLINK_CB(cb->skb).portid,
2307 						nlh->nlmsg_seq,
2308 						RTM_NEWNETCONF, NLM_F_MULTI,
2309 						NETCONFA_ALL);
2310 		if (err < 0)
2311 			goto done;
2312 		ctx->all_default++;
2313 	}
2314 done:
2315 	if (err < 0 && likely(skb->len))
2316 		err = skb->len;
2317 	rcu_read_unlock();
2318 	return err;
2319 }
2320 
2321 #ifdef CONFIG_SYSCTL
2322 
2323 static void devinet_copy_dflt_conf(struct net *net, int i)
2324 {
2325 	struct net_device *dev;
2326 
2327 	rcu_read_lock();
2328 	for_each_netdev_rcu(net, dev) {
2329 		struct in_device *in_dev;
2330 
2331 		in_dev = __in_dev_get_rcu(dev);
2332 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2333 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2334 	}
2335 	rcu_read_unlock();
2336 }
2337 
2338 /* called with RTNL locked */
2339 static void inet_forward_change(struct net *net)
2340 {
2341 	struct net_device *dev;
2342 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2343 
2344 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2345 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2346 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2347 				    NETCONFA_FORWARDING,
2348 				    NETCONFA_IFINDEX_ALL,
2349 				    net->ipv4.devconf_all);
2350 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2351 				    NETCONFA_FORWARDING,
2352 				    NETCONFA_IFINDEX_DEFAULT,
2353 				    net->ipv4.devconf_dflt);
2354 
2355 	for_each_netdev(net, dev) {
2356 		struct in_device *in_dev;
2357 
2358 		if (on)
2359 			dev_disable_lro(dev);
2360 
2361 		in_dev = __in_dev_get_rtnl(dev);
2362 		if (in_dev) {
2363 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2364 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2365 						    NETCONFA_FORWARDING,
2366 						    dev->ifindex, &in_dev->cnf);
2367 		}
2368 	}
2369 }
2370 
2371 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2372 {
2373 	if (cnf == net->ipv4.devconf_dflt)
2374 		return NETCONFA_IFINDEX_DEFAULT;
2375 	else if (cnf == net->ipv4.devconf_all)
2376 		return NETCONFA_IFINDEX_ALL;
2377 	else {
2378 		struct in_device *idev
2379 			= container_of(cnf, struct in_device, cnf);
2380 		return idev->dev->ifindex;
2381 	}
2382 }
2383 
2384 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2385 			     void *buffer, size_t *lenp, loff_t *ppos)
2386 {
2387 	int old_value = *(int *)ctl->data;
2388 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2389 	int new_value = *(int *)ctl->data;
2390 
2391 	if (write) {
2392 		struct ipv4_devconf *cnf = ctl->extra1;
2393 		struct net *net = ctl->extra2;
2394 		int i = (int *)ctl->data - cnf->data;
2395 		int ifindex;
2396 
2397 		set_bit(i, cnf->state);
2398 
2399 		if (cnf == net->ipv4.devconf_dflt)
2400 			devinet_copy_dflt_conf(net, i);
2401 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2402 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2403 			if ((new_value == 0) && (old_value != 0))
2404 				rt_cache_flush(net);
2405 
2406 		if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2407 		    new_value != old_value)
2408 			rt_cache_flush(net);
2409 
2410 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2411 		    new_value != old_value) {
2412 			ifindex = devinet_conf_ifindex(net, cnf);
2413 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2414 						    NETCONFA_RP_FILTER,
2415 						    ifindex, cnf);
2416 		}
2417 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2418 		    new_value != old_value) {
2419 			ifindex = devinet_conf_ifindex(net, cnf);
2420 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2421 						    NETCONFA_PROXY_NEIGH,
2422 						    ifindex, cnf);
2423 		}
2424 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2425 		    new_value != old_value) {
2426 			ifindex = devinet_conf_ifindex(net, cnf);
2427 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2428 						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2429 						    ifindex, cnf);
2430 		}
2431 	}
2432 
2433 	return ret;
2434 }
2435 
2436 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2437 				  void *buffer, size_t *lenp, loff_t *ppos)
2438 {
2439 	int *valp = ctl->data;
2440 	int val = *valp;
2441 	loff_t pos = *ppos;
2442 	struct net *net = ctl->extra2;
2443 	int ret;
2444 
2445 	if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2446 		return -EPERM;
2447 
2448 	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2449 
2450 	if (write && *valp != val) {
2451 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2452 			if (!rtnl_trylock()) {
2453 				/* Restore the original values before restarting */
2454 				*valp = val;
2455 				*ppos = pos;
2456 				return restart_syscall();
2457 			}
2458 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2459 				inet_forward_change(net);
2460 			} else {
2461 				struct ipv4_devconf *cnf = ctl->extra1;
2462 				struct in_device *idev =
2463 					container_of(cnf, struct in_device, cnf);
2464 				if (*valp)
2465 					dev_disable_lro(idev->dev);
2466 				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2467 							    NETCONFA_FORWARDING,
2468 							    idev->dev->ifindex,
2469 							    cnf);
2470 			}
2471 			rtnl_unlock();
2472 			rt_cache_flush(net);
2473 		} else
2474 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2475 						    NETCONFA_FORWARDING,
2476 						    NETCONFA_IFINDEX_DEFAULT,
2477 						    net->ipv4.devconf_dflt);
2478 	}
2479 
2480 	return ret;
2481 }
2482 
2483 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2484 				void *buffer, size_t *lenp, loff_t *ppos)
2485 {
2486 	int *valp = ctl->data;
2487 	int val = *valp;
2488 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2489 	struct net *net = ctl->extra2;
2490 
2491 	if (write && *valp != val)
2492 		rt_cache_flush(net);
2493 
2494 	return ret;
2495 }
2496 
2497 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2498 	{ \
2499 		.procname	= name, \
2500 		.data		= ipv4_devconf.data + \
2501 				  IPV4_DEVCONF_ ## attr - 1, \
2502 		.maxlen		= sizeof(int), \
2503 		.mode		= mval, \
2504 		.proc_handler	= proc, \
2505 		.extra1		= &ipv4_devconf, \
2506 	}
2507 
2508 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2509 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2510 
2511 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2512 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2513 
2514 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2515 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2516 
2517 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2518 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2519 
2520 static struct devinet_sysctl_table {
2521 	struct ctl_table_header *sysctl_header;
2522 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2523 } devinet_sysctl = {
2524 	.devinet_vars = {
2525 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2526 					     devinet_sysctl_forward),
2527 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2528 		DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2529 
2530 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2531 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2532 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2533 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2534 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2535 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2536 					"accept_source_route"),
2537 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2538 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2539 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2540 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2541 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2542 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2543 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2544 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2545 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2546 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2547 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2548 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2549 		DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
2550 					"arp_evict_nocarrier"),
2551 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2552 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2553 					"force_igmp_version"),
2554 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2555 					"igmpv2_unsolicited_report_interval"),
2556 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2557 					"igmpv3_unsolicited_report_interval"),
2558 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2559 					"ignore_routes_with_linkdown"),
2560 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2561 					"drop_gratuitous_arp"),
2562 
2563 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2564 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2565 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2566 					      "promote_secondaries"),
2567 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2568 					      "route_localnet"),
2569 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2570 					      "drop_unicast_in_l2_multicast"),
2571 	},
2572 };
2573 
2574 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2575 				     int ifindex, struct ipv4_devconf *p)
2576 {
2577 	int i;
2578 	struct devinet_sysctl_table *t;
2579 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2580 
2581 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
2582 	if (!t)
2583 		goto out;
2584 
2585 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2586 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2587 		t->devinet_vars[i].extra1 = p;
2588 		t->devinet_vars[i].extra2 = net;
2589 	}
2590 
2591 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2592 
2593 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2594 	if (!t->sysctl_header)
2595 		goto free;
2596 
2597 	p->sysctl = t;
2598 
2599 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2600 				    ifindex, p);
2601 	return 0;
2602 
2603 free:
2604 	kfree(t);
2605 out:
2606 	return -ENOMEM;
2607 }
2608 
2609 static void __devinet_sysctl_unregister(struct net *net,
2610 					struct ipv4_devconf *cnf, int ifindex)
2611 {
2612 	struct devinet_sysctl_table *t = cnf->sysctl;
2613 
2614 	if (t) {
2615 		cnf->sysctl = NULL;
2616 		unregister_net_sysctl_table(t->sysctl_header);
2617 		kfree(t);
2618 	}
2619 
2620 	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2621 }
2622 
2623 static int devinet_sysctl_register(struct in_device *idev)
2624 {
2625 	int err;
2626 
2627 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2628 		return -EINVAL;
2629 
2630 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2631 	if (err)
2632 		return err;
2633 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2634 					idev->dev->ifindex, &idev->cnf);
2635 	if (err)
2636 		neigh_sysctl_unregister(idev->arp_parms);
2637 	return err;
2638 }
2639 
2640 static void devinet_sysctl_unregister(struct in_device *idev)
2641 {
2642 	struct net *net = dev_net(idev->dev);
2643 
2644 	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2645 	neigh_sysctl_unregister(idev->arp_parms);
2646 }
2647 
2648 static struct ctl_table ctl_forward_entry[] = {
2649 	{
2650 		.procname	= "ip_forward",
2651 		.data		= &ipv4_devconf.data[
2652 					IPV4_DEVCONF_FORWARDING - 1],
2653 		.maxlen		= sizeof(int),
2654 		.mode		= 0644,
2655 		.proc_handler	= devinet_sysctl_forward,
2656 		.extra1		= &ipv4_devconf,
2657 		.extra2		= &init_net,
2658 	},
2659 	{ },
2660 };
2661 #endif
2662 
2663 static __net_init int devinet_init_net(struct net *net)
2664 {
2665 	int err;
2666 	struct ipv4_devconf *all, *dflt;
2667 #ifdef CONFIG_SYSCTL
2668 	struct ctl_table *tbl;
2669 	struct ctl_table_header *forw_hdr;
2670 #endif
2671 
2672 	err = -ENOMEM;
2673 	all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2674 	if (!all)
2675 		goto err_alloc_all;
2676 
2677 	dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2678 	if (!dflt)
2679 		goto err_alloc_dflt;
2680 
2681 #ifdef CONFIG_SYSCTL
2682 	tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2683 	if (!tbl)
2684 		goto err_alloc_ctl;
2685 
2686 	tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2687 	tbl[0].extra1 = all;
2688 	tbl[0].extra2 = net;
2689 #endif
2690 
2691 	if (!net_eq(net, &init_net)) {
2692 		switch (net_inherit_devconf()) {
2693 		case 3:
2694 			/* copy from the current netns */
2695 			memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
2696 			       sizeof(ipv4_devconf));
2697 			memcpy(dflt,
2698 			       current->nsproxy->net_ns->ipv4.devconf_dflt,
2699 			       sizeof(ipv4_devconf_dflt));
2700 			break;
2701 		case 0:
2702 		case 1:
2703 			/* copy from init_net */
2704 			memcpy(all, init_net.ipv4.devconf_all,
2705 			       sizeof(ipv4_devconf));
2706 			memcpy(dflt, init_net.ipv4.devconf_dflt,
2707 			       sizeof(ipv4_devconf_dflt));
2708 			break;
2709 		case 2:
2710 			/* use compiled values */
2711 			break;
2712 		}
2713 	}
2714 
2715 #ifdef CONFIG_SYSCTL
2716 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2717 	if (err < 0)
2718 		goto err_reg_all;
2719 
2720 	err = __devinet_sysctl_register(net, "default",
2721 					NETCONFA_IFINDEX_DEFAULT, dflt);
2722 	if (err < 0)
2723 		goto err_reg_dflt;
2724 
2725 	err = -ENOMEM;
2726 	forw_hdr = register_net_sysctl_sz(net, "net/ipv4", tbl,
2727 					  ARRAY_SIZE(ctl_forward_entry));
2728 	if (!forw_hdr)
2729 		goto err_reg_ctl;
2730 	net->ipv4.forw_hdr = forw_hdr;
2731 #endif
2732 
2733 	net->ipv4.devconf_all = all;
2734 	net->ipv4.devconf_dflt = dflt;
2735 	return 0;
2736 
2737 #ifdef CONFIG_SYSCTL
2738 err_reg_ctl:
2739 	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2740 err_reg_dflt:
2741 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2742 err_reg_all:
2743 	kfree(tbl);
2744 err_alloc_ctl:
2745 #endif
2746 	kfree(dflt);
2747 err_alloc_dflt:
2748 	kfree(all);
2749 err_alloc_all:
2750 	return err;
2751 }
2752 
2753 static __net_exit void devinet_exit_net(struct net *net)
2754 {
2755 #ifdef CONFIG_SYSCTL
2756 	struct ctl_table *tbl;
2757 
2758 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2759 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2760 	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2761 				    NETCONFA_IFINDEX_DEFAULT);
2762 	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2763 				    NETCONFA_IFINDEX_ALL);
2764 	kfree(tbl);
2765 #endif
2766 	kfree(net->ipv4.devconf_dflt);
2767 	kfree(net->ipv4.devconf_all);
2768 }
2769 
2770 static __net_initdata struct pernet_operations devinet_ops = {
2771 	.init = devinet_init_net,
2772 	.exit = devinet_exit_net,
2773 };
2774 
2775 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2776 	.family		  = AF_INET,
2777 	.fill_link_af	  = inet_fill_link_af,
2778 	.get_link_af_size = inet_get_link_af_size,
2779 	.validate_link_af = inet_validate_link_af,
2780 	.set_link_af	  = inet_set_link_af,
2781 };
2782 
2783 void __init devinet_init(void)
2784 {
2785 	int i;
2786 
2787 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2788 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2789 
2790 	register_pernet_subsys(&devinet_ops);
2791 	register_netdevice_notifier(&ip_netdev_notifier);
2792 
2793 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2794 
2795 	rtnl_af_register(&inet_af_ops);
2796 
2797 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2798 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2799 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr,
2800 		      RTNL_FLAG_DUMP_UNLOCKED);
2801 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2802 		      inet_netconf_dump_devconf,
2803 		      RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED);
2804 }
2805