xref: /linux/net/ipv4/devinet.c (revision c94cd9508b1335b949fd13ebd269313c65492df0)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	NET3	IP device support routines.
4  *
5  *	Derived from the IP parts of dev.c 1.0.19
6  * 		Authors:	Ross Biro
7  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
9  *
10  *	Additional Authors:
11  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
12  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13  *
14  *	Changes:
15  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
16  *					lists.
17  *		Cyrus Durgin:		updated for kmod
18  *		Matthias Andree:	in devinet_ioctl, compare label and
19  *					address (4.4BSD alias style support),
20  *					fall back to comparing just the label
21  *					if no match found.
22  */
23 
24 
25 #include <linux/uaccess.h>
26 #include <linux/bitops.h>
27 #include <linux/capability.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/mm.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/in.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/if_addr.h>
40 #include <linux/if_ether.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/etherdevice.h>
44 #include <linux/skbuff.h>
45 #include <linux/init.h>
46 #include <linux/notifier.h>
47 #include <linux/inetdevice.h>
48 #include <linux/igmp.h>
49 #include <linux/slab.h>
50 #include <linux/hash.h>
51 #ifdef CONFIG_SYSCTL
52 #include <linux/sysctl.h>
53 #endif
54 #include <linux/kmod.h>
55 #include <linux/netconf.h>
56 
57 #include <net/arp.h>
58 #include <net/ip.h>
59 #include <net/route.h>
60 #include <net/ip_fib.h>
61 #include <net/rtnetlink.h>
62 #include <net/net_namespace.h>
63 #include <net/addrconf.h>
64 
65 #define IPV6ONLY_FLAGS	\
66 		(IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
67 		 IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
68 		 IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
69 
70 static struct ipv4_devconf ipv4_devconf = {
71 	.data = {
72 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78 		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
79 	},
80 };
81 
82 static struct ipv4_devconf ipv4_devconf_dflt = {
83 	.data = {
84 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
86 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
87 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
88 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
90 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
91 		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
92 	},
93 };
94 
95 #define IPV4_DEVCONF_DFLT(net, attr) \
96 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
97 
98 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
99 	[IFA_LOCAL]     	= { .type = NLA_U32 },
100 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
101 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
102 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
103 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
104 	[IFA_FLAGS]		= { .type = NLA_U32 },
105 	[IFA_RT_PRIORITY]	= { .type = NLA_U32 },
106 	[IFA_TARGET_NETNSID]	= { .type = NLA_S32 },
107 	[IFA_PROTO]		= { .type = NLA_U8 },
108 };
109 
110 struct inet_fill_args {
111 	u32 portid;
112 	u32 seq;
113 	int event;
114 	unsigned int flags;
115 	int netnsid;
116 	int ifindex;
117 };
118 
119 #define IN4_ADDR_HSIZE_SHIFT	8
120 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
121 
122 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
123 
124 static u32 inet_addr_hash(const struct net *net, __be32 addr)
125 {
126 	u32 val = (__force u32) addr ^ net_hash_mix(net);
127 
128 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
129 }
130 
131 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
132 {
133 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
134 
135 	ASSERT_RTNL();
136 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
137 }
138 
139 static void inet_hash_remove(struct in_ifaddr *ifa)
140 {
141 	ASSERT_RTNL();
142 	hlist_del_init_rcu(&ifa->hash);
143 }
144 
145 /**
146  * __ip_dev_find - find the first device with a given source address.
147  * @net: the net namespace
148  * @addr: the source address
149  * @devref: if true, take a reference on the found device
150  *
151  * If a caller uses devref=false, it should be protected by RCU, or RTNL
152  */
153 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
154 {
155 	struct net_device *result = NULL;
156 	struct in_ifaddr *ifa;
157 
158 	rcu_read_lock();
159 	ifa = inet_lookup_ifaddr_rcu(net, addr);
160 	if (!ifa) {
161 		struct flowi4 fl4 = { .daddr = addr };
162 		struct fib_result res = { 0 };
163 		struct fib_table *local;
164 
165 		/* Fallback to FIB local table so that communication
166 		 * over loopback subnets work.
167 		 */
168 		local = fib_get_table(net, RT_TABLE_LOCAL);
169 		if (local &&
170 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
171 		    res.type == RTN_LOCAL)
172 			result = FIB_RES_DEV(res);
173 	} else {
174 		result = ifa->ifa_dev->dev;
175 	}
176 	if (result && devref)
177 		dev_hold(result);
178 	rcu_read_unlock();
179 	return result;
180 }
181 EXPORT_SYMBOL(__ip_dev_find);
182 
183 /* called under RCU lock */
184 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
185 {
186 	u32 hash = inet_addr_hash(net, addr);
187 	struct in_ifaddr *ifa;
188 
189 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
190 		if (ifa->ifa_local == addr &&
191 		    net_eq(dev_net(ifa->ifa_dev->dev), net))
192 			return ifa;
193 
194 	return NULL;
195 }
196 
197 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
198 
199 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
200 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
201 static void inet_del_ifa(struct in_device *in_dev,
202 			 struct in_ifaddr __rcu **ifap,
203 			 int destroy);
204 #ifdef CONFIG_SYSCTL
205 static int devinet_sysctl_register(struct in_device *idev);
206 static void devinet_sysctl_unregister(struct in_device *idev);
207 #else
208 static int devinet_sysctl_register(struct in_device *idev)
209 {
210 	return 0;
211 }
212 static void devinet_sysctl_unregister(struct in_device *idev)
213 {
214 }
215 #endif
216 
217 /* Locks all the inet devices. */
218 
219 static struct in_ifaddr *inet_alloc_ifa(struct in_device *in_dev)
220 {
221 	struct in_ifaddr *ifa;
222 
223 	ifa = kzalloc(sizeof(*ifa), GFP_KERNEL_ACCOUNT);
224 	if (!ifa)
225 		return NULL;
226 
227 	in_dev_hold(in_dev);
228 	ifa->ifa_dev = in_dev;
229 
230 	INIT_HLIST_NODE(&ifa->hash);
231 
232 	return ifa;
233 }
234 
235 static void inet_rcu_free_ifa(struct rcu_head *head)
236 {
237 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
238 
239 	in_dev_put(ifa->ifa_dev);
240 	kfree(ifa);
241 }
242 
243 static void inet_free_ifa(struct in_ifaddr *ifa)
244 {
245 	/* Our reference to ifa->ifa_dev must be freed ASAP
246 	 * to release the reference to the netdev the same way.
247 	 * in_dev_put() -> in_dev_finish_destroy() -> netdev_put()
248 	 */
249 	call_rcu_hurry(&ifa->rcu_head, inet_rcu_free_ifa);
250 }
251 
252 static void in_dev_free_rcu(struct rcu_head *head)
253 {
254 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
255 
256 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
257 	kfree(idev);
258 }
259 
260 void in_dev_finish_destroy(struct in_device *idev)
261 {
262 	struct net_device *dev = idev->dev;
263 
264 	WARN_ON(idev->ifa_list);
265 	WARN_ON(idev->mc_list);
266 #ifdef NET_REFCNT_DEBUG
267 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
268 #endif
269 	netdev_put(dev, &idev->dev_tracker);
270 	if (!idev->dead)
271 		pr_err("Freeing alive in_device %p\n", idev);
272 	else
273 		call_rcu(&idev->rcu_head, in_dev_free_rcu);
274 }
275 EXPORT_SYMBOL(in_dev_finish_destroy);
276 
277 static struct in_device *inetdev_init(struct net_device *dev)
278 {
279 	struct in_device *in_dev;
280 	int err = -ENOMEM;
281 
282 	ASSERT_RTNL();
283 
284 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
285 	if (!in_dev)
286 		goto out;
287 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
288 			sizeof(in_dev->cnf));
289 	in_dev->cnf.sysctl = NULL;
290 	in_dev->dev = dev;
291 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
292 	if (!in_dev->arp_parms)
293 		goto out_kfree;
294 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
295 		dev_disable_lro(dev);
296 	/* Reference in_dev->dev */
297 	netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
298 	/* Account for reference dev->ip_ptr (below) */
299 	refcount_set(&in_dev->refcnt, 1);
300 
301 	err = devinet_sysctl_register(in_dev);
302 	if (err) {
303 		in_dev->dead = 1;
304 		neigh_parms_release(&arp_tbl, in_dev->arp_parms);
305 		in_dev_put(in_dev);
306 		in_dev = NULL;
307 		goto out;
308 	}
309 	ip_mc_init_dev(in_dev);
310 	if (dev->flags & IFF_UP)
311 		ip_mc_up(in_dev);
312 
313 	/* we can receive as soon as ip_ptr is set -- do this last */
314 	rcu_assign_pointer(dev->ip_ptr, in_dev);
315 out:
316 	return in_dev ?: ERR_PTR(err);
317 out_kfree:
318 	kfree(in_dev);
319 	in_dev = NULL;
320 	goto out;
321 }
322 
323 static void inetdev_destroy(struct in_device *in_dev)
324 {
325 	struct net_device *dev;
326 	struct in_ifaddr *ifa;
327 
328 	ASSERT_RTNL();
329 
330 	dev = in_dev->dev;
331 
332 	in_dev->dead = 1;
333 
334 	ip_mc_destroy_dev(in_dev);
335 
336 	while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
337 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
338 		inet_free_ifa(ifa);
339 	}
340 
341 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
342 
343 	devinet_sysctl_unregister(in_dev);
344 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
345 	arp_ifdown(dev);
346 
347 	in_dev_put(in_dev);
348 }
349 
350 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
351 {
352 	const struct in_ifaddr *ifa;
353 
354 	rcu_read_lock();
355 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
356 		if (inet_ifa_match(a, ifa)) {
357 			if (!b || inet_ifa_match(b, ifa)) {
358 				rcu_read_unlock();
359 				return 1;
360 			}
361 		}
362 	}
363 	rcu_read_unlock();
364 	return 0;
365 }
366 
367 static void __inet_del_ifa(struct in_device *in_dev,
368 			   struct in_ifaddr __rcu **ifap,
369 			   int destroy, struct nlmsghdr *nlh, u32 portid)
370 {
371 	struct in_ifaddr *promote = NULL;
372 	struct in_ifaddr *ifa, *ifa1;
373 	struct in_ifaddr __rcu **last_prim;
374 	struct in_ifaddr *prev_prom = NULL;
375 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
376 
377 	ASSERT_RTNL();
378 
379 	ifa1 = rtnl_dereference(*ifap);
380 	last_prim = ifap;
381 	if (in_dev->dead)
382 		goto no_promotions;
383 
384 	/* 1. Deleting primary ifaddr forces deletion all secondaries
385 	 * unless alias promotion is set
386 	 **/
387 
388 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
389 		struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
390 
391 		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
392 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
393 			    ifa1->ifa_scope <= ifa->ifa_scope)
394 				last_prim = &ifa->ifa_next;
395 
396 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
397 			    ifa1->ifa_mask != ifa->ifa_mask ||
398 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
399 				ifap1 = &ifa->ifa_next;
400 				prev_prom = ifa;
401 				continue;
402 			}
403 
404 			if (!do_promote) {
405 				inet_hash_remove(ifa);
406 				*ifap1 = ifa->ifa_next;
407 
408 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
409 				blocking_notifier_call_chain(&inetaddr_chain,
410 						NETDEV_DOWN, ifa);
411 				inet_free_ifa(ifa);
412 			} else {
413 				promote = ifa;
414 				break;
415 			}
416 		}
417 	}
418 
419 	/* On promotion all secondaries from subnet are changing
420 	 * the primary IP, we must remove all their routes silently
421 	 * and later to add them back with new prefsrc. Do this
422 	 * while all addresses are on the device list.
423 	 */
424 	for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
425 		if (ifa1->ifa_mask == ifa->ifa_mask &&
426 		    inet_ifa_match(ifa1->ifa_address, ifa))
427 			fib_del_ifaddr(ifa, ifa1);
428 	}
429 
430 no_promotions:
431 	/* 2. Unlink it */
432 
433 	*ifap = ifa1->ifa_next;
434 	inet_hash_remove(ifa1);
435 
436 	/* 3. Announce address deletion */
437 
438 	/* Send message first, then call notifier.
439 	   At first sight, FIB update triggered by notifier
440 	   will refer to already deleted ifaddr, that could confuse
441 	   netlink listeners. It is not true: look, gated sees
442 	   that route deleted and if it still thinks that ifaddr
443 	   is valid, it will try to restore deleted routes... Grr.
444 	   So that, this order is correct.
445 	 */
446 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
447 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
448 
449 	if (promote) {
450 		struct in_ifaddr *next_sec;
451 
452 		next_sec = rtnl_dereference(promote->ifa_next);
453 		if (prev_prom) {
454 			struct in_ifaddr *last_sec;
455 
456 			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
457 
458 			last_sec = rtnl_dereference(*last_prim);
459 			rcu_assign_pointer(promote->ifa_next, last_sec);
460 			rcu_assign_pointer(*last_prim, promote);
461 		}
462 
463 		promote->ifa_flags &= ~IFA_F_SECONDARY;
464 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
465 		blocking_notifier_call_chain(&inetaddr_chain,
466 				NETDEV_UP, promote);
467 		for (ifa = next_sec; ifa;
468 		     ifa = rtnl_dereference(ifa->ifa_next)) {
469 			if (ifa1->ifa_mask != ifa->ifa_mask ||
470 			    !inet_ifa_match(ifa1->ifa_address, ifa))
471 					continue;
472 			fib_add_ifaddr(ifa);
473 		}
474 
475 	}
476 	if (destroy)
477 		inet_free_ifa(ifa1);
478 }
479 
480 static void inet_del_ifa(struct in_device *in_dev,
481 			 struct in_ifaddr __rcu **ifap,
482 			 int destroy)
483 {
484 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
485 }
486 
487 static void check_lifetime(struct work_struct *work);
488 
489 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
490 
491 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
492 			     u32 portid, struct netlink_ext_ack *extack)
493 {
494 	struct in_ifaddr __rcu **last_primary, **ifap;
495 	struct in_device *in_dev = ifa->ifa_dev;
496 	struct in_validator_info ivi;
497 	struct in_ifaddr *ifa1;
498 	int ret;
499 
500 	ASSERT_RTNL();
501 
502 	if (!ifa->ifa_local) {
503 		inet_free_ifa(ifa);
504 		return 0;
505 	}
506 
507 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
508 	last_primary = &in_dev->ifa_list;
509 
510 	/* Don't set IPv6 only flags to IPv4 addresses */
511 	ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
512 
513 	ifap = &in_dev->ifa_list;
514 	ifa1 = rtnl_dereference(*ifap);
515 
516 	while (ifa1) {
517 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
518 		    ifa->ifa_scope <= ifa1->ifa_scope)
519 			last_primary = &ifa1->ifa_next;
520 		if (ifa1->ifa_mask == ifa->ifa_mask &&
521 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
522 			if (ifa1->ifa_local == ifa->ifa_local) {
523 				inet_free_ifa(ifa);
524 				return -EEXIST;
525 			}
526 			if (ifa1->ifa_scope != ifa->ifa_scope) {
527 				NL_SET_ERR_MSG(extack, "ipv4: Invalid scope value");
528 				inet_free_ifa(ifa);
529 				return -EINVAL;
530 			}
531 			ifa->ifa_flags |= IFA_F_SECONDARY;
532 		}
533 
534 		ifap = &ifa1->ifa_next;
535 		ifa1 = rtnl_dereference(*ifap);
536 	}
537 
538 	/* Allow any devices that wish to register ifaddr validtors to weigh
539 	 * in now, before changes are committed.  The rntl lock is serializing
540 	 * access here, so the state should not change between a validator call
541 	 * and a final notify on commit.  This isn't invoked on promotion under
542 	 * the assumption that validators are checking the address itself, and
543 	 * not the flags.
544 	 */
545 	ivi.ivi_addr = ifa->ifa_address;
546 	ivi.ivi_dev = ifa->ifa_dev;
547 	ivi.extack = extack;
548 	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
549 					   NETDEV_UP, &ivi);
550 	ret = notifier_to_errno(ret);
551 	if (ret) {
552 		inet_free_ifa(ifa);
553 		return ret;
554 	}
555 
556 	if (!(ifa->ifa_flags & IFA_F_SECONDARY))
557 		ifap = last_primary;
558 
559 	rcu_assign_pointer(ifa->ifa_next, *ifap);
560 	rcu_assign_pointer(*ifap, ifa);
561 
562 	inet_hash_insert(dev_net(in_dev->dev), ifa);
563 
564 	cancel_delayed_work(&check_lifetime_work);
565 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
566 
567 	/* Send message first, then call notifier.
568 	   Notifier will trigger FIB update, so that
569 	   listeners of netlink will know about new ifaddr */
570 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
571 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
572 
573 	return 0;
574 }
575 
576 static int inet_insert_ifa(struct in_ifaddr *ifa)
577 {
578 	return __inet_insert_ifa(ifa, NULL, 0, NULL);
579 }
580 
581 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
582 {
583 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
584 
585 	ASSERT_RTNL();
586 
587 	ipv4_devconf_setall(in_dev);
588 	neigh_parms_data_state_setall(in_dev->arp_parms);
589 
590 	if (ipv4_is_loopback(ifa->ifa_local))
591 		ifa->ifa_scope = RT_SCOPE_HOST;
592 	return inet_insert_ifa(ifa);
593 }
594 
595 /* Caller must hold RCU or RTNL :
596  * We dont take a reference on found in_device
597  */
598 struct in_device *inetdev_by_index(struct net *net, int ifindex)
599 {
600 	struct net_device *dev;
601 	struct in_device *in_dev = NULL;
602 
603 	rcu_read_lock();
604 	dev = dev_get_by_index_rcu(net, ifindex);
605 	if (dev)
606 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
607 	rcu_read_unlock();
608 	return in_dev;
609 }
610 EXPORT_SYMBOL(inetdev_by_index);
611 
612 /* Called only from RTNL semaphored context. No locks. */
613 
614 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
615 				    __be32 mask)
616 {
617 	struct in_ifaddr *ifa;
618 
619 	ASSERT_RTNL();
620 
621 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
622 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
623 			return ifa;
624 	}
625 	return NULL;
626 }
627 
628 static int ip_mc_autojoin_config(struct net *net, bool join,
629 				 const struct in_ifaddr *ifa)
630 {
631 #if defined(CONFIG_IP_MULTICAST)
632 	struct ip_mreqn mreq = {
633 		.imr_multiaddr.s_addr = ifa->ifa_address,
634 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
635 	};
636 	struct sock *sk = net->ipv4.mc_autojoin_sk;
637 	int ret;
638 
639 	ASSERT_RTNL();
640 
641 	lock_sock(sk);
642 	if (join)
643 		ret = ip_mc_join_group(sk, &mreq);
644 	else
645 		ret = ip_mc_leave_group(sk, &mreq);
646 	release_sock(sk);
647 
648 	return ret;
649 #else
650 	return -EOPNOTSUPP;
651 #endif
652 }
653 
654 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
655 			    struct netlink_ext_ack *extack)
656 {
657 	struct net *net = sock_net(skb->sk);
658 	struct in_ifaddr __rcu **ifap;
659 	struct nlattr *tb[IFA_MAX+1];
660 	struct in_device *in_dev;
661 	struct ifaddrmsg *ifm;
662 	struct in_ifaddr *ifa;
663 	int err;
664 
665 	ASSERT_RTNL();
666 
667 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
668 				     ifa_ipv4_policy, extack);
669 	if (err < 0)
670 		goto errout;
671 
672 	ifm = nlmsg_data(nlh);
673 	in_dev = inetdev_by_index(net, ifm->ifa_index);
674 	if (!in_dev) {
675 		NL_SET_ERR_MSG(extack, "ipv4: Device not found");
676 		err = -ENODEV;
677 		goto errout;
678 	}
679 
680 	for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
681 	     ifap = &ifa->ifa_next) {
682 		if (tb[IFA_LOCAL] &&
683 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
684 			continue;
685 
686 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
687 			continue;
688 
689 		if (tb[IFA_ADDRESS] &&
690 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
691 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
692 			continue;
693 
694 		if (ipv4_is_multicast(ifa->ifa_address))
695 			ip_mc_autojoin_config(net, false, ifa);
696 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
697 		return 0;
698 	}
699 
700 	NL_SET_ERR_MSG(extack, "ipv4: Address not found");
701 	err = -EADDRNOTAVAIL;
702 errout:
703 	return err;
704 }
705 
706 static void check_lifetime(struct work_struct *work)
707 {
708 	unsigned long now, next, next_sec, next_sched;
709 	struct in_ifaddr *ifa;
710 	struct hlist_node *n;
711 	int i;
712 
713 	now = jiffies;
714 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
715 
716 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
717 		bool change_needed = false;
718 
719 		rcu_read_lock();
720 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
721 			unsigned long age, tstamp;
722 			u32 preferred_lft;
723 			u32 valid_lft;
724 			u32 flags;
725 
726 			flags = READ_ONCE(ifa->ifa_flags);
727 			if (flags & IFA_F_PERMANENT)
728 				continue;
729 
730 			preferred_lft = READ_ONCE(ifa->ifa_preferred_lft);
731 			valid_lft = READ_ONCE(ifa->ifa_valid_lft);
732 			tstamp = READ_ONCE(ifa->ifa_tstamp);
733 			/* We try to batch several events at once. */
734 			age = (now - tstamp +
735 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
736 
737 			if (valid_lft != INFINITY_LIFE_TIME &&
738 			    age >= valid_lft) {
739 				change_needed = true;
740 			} else if (preferred_lft ==
741 				   INFINITY_LIFE_TIME) {
742 				continue;
743 			} else if (age >= preferred_lft) {
744 				if (time_before(tstamp + valid_lft * HZ, next))
745 					next = tstamp + valid_lft * HZ;
746 
747 				if (!(flags & IFA_F_DEPRECATED))
748 					change_needed = true;
749 			} else if (time_before(tstamp + preferred_lft * HZ,
750 					       next)) {
751 				next = tstamp + preferred_lft * HZ;
752 			}
753 		}
754 		rcu_read_unlock();
755 		if (!change_needed)
756 			continue;
757 		rtnl_lock();
758 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
759 			unsigned long age;
760 
761 			if (ifa->ifa_flags & IFA_F_PERMANENT)
762 				continue;
763 
764 			/* We try to batch several events at once. */
765 			age = (now - ifa->ifa_tstamp +
766 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
767 
768 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
769 			    age >= ifa->ifa_valid_lft) {
770 				struct in_ifaddr __rcu **ifap;
771 				struct in_ifaddr *tmp;
772 
773 				ifap = &ifa->ifa_dev->ifa_list;
774 				tmp = rtnl_dereference(*ifap);
775 				while (tmp) {
776 					if (tmp == ifa) {
777 						inet_del_ifa(ifa->ifa_dev,
778 							     ifap, 1);
779 						break;
780 					}
781 					ifap = &tmp->ifa_next;
782 					tmp = rtnl_dereference(*ifap);
783 				}
784 			} else if (ifa->ifa_preferred_lft !=
785 				   INFINITY_LIFE_TIME &&
786 				   age >= ifa->ifa_preferred_lft &&
787 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
788 				ifa->ifa_flags |= IFA_F_DEPRECATED;
789 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
790 			}
791 		}
792 		rtnl_unlock();
793 	}
794 
795 	next_sec = round_jiffies_up(next);
796 	next_sched = next;
797 
798 	/* If rounded timeout is accurate enough, accept it. */
799 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
800 		next_sched = next_sec;
801 
802 	now = jiffies;
803 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
804 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
805 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
806 
807 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
808 			next_sched - now);
809 }
810 
811 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
812 			     __u32 prefered_lft)
813 {
814 	unsigned long timeout;
815 	u32 flags;
816 
817 	flags = ifa->ifa_flags & ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
818 
819 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
820 	if (addrconf_finite_timeout(timeout))
821 		WRITE_ONCE(ifa->ifa_valid_lft, timeout);
822 	else
823 		flags |= IFA_F_PERMANENT;
824 
825 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
826 	if (addrconf_finite_timeout(timeout)) {
827 		if (timeout == 0)
828 			flags |= IFA_F_DEPRECATED;
829 		WRITE_ONCE(ifa->ifa_preferred_lft, timeout);
830 	}
831 	WRITE_ONCE(ifa->ifa_flags, flags);
832 	WRITE_ONCE(ifa->ifa_tstamp, jiffies);
833 	if (!ifa->ifa_cstamp)
834 		WRITE_ONCE(ifa->ifa_cstamp, ifa->ifa_tstamp);
835 }
836 
837 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
838 				       __u32 *pvalid_lft, __u32 *pprefered_lft,
839 				       struct netlink_ext_ack *extack)
840 {
841 	struct nlattr *tb[IFA_MAX+1];
842 	struct in_ifaddr *ifa;
843 	struct ifaddrmsg *ifm;
844 	struct net_device *dev;
845 	struct in_device *in_dev;
846 	int err;
847 
848 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
849 				     ifa_ipv4_policy, extack);
850 	if (err < 0)
851 		goto errout;
852 
853 	ifm = nlmsg_data(nlh);
854 	err = -EINVAL;
855 
856 	if (ifm->ifa_prefixlen > 32) {
857 		NL_SET_ERR_MSG(extack, "ipv4: Invalid prefix length");
858 		goto errout;
859 	}
860 
861 	if (!tb[IFA_LOCAL]) {
862 		NL_SET_ERR_MSG(extack, "ipv4: Local address is not supplied");
863 		goto errout;
864 	}
865 
866 	dev = __dev_get_by_index(net, ifm->ifa_index);
867 	err = -ENODEV;
868 	if (!dev) {
869 		NL_SET_ERR_MSG(extack, "ipv4: Device not found");
870 		goto errout;
871 	}
872 
873 	in_dev = __in_dev_get_rtnl(dev);
874 	err = -ENOBUFS;
875 	if (!in_dev)
876 		goto errout;
877 
878 	ifa = inet_alloc_ifa(in_dev);
879 	if (!ifa)
880 		/*
881 		 * A potential indev allocation can be left alive, it stays
882 		 * assigned to its device and is destroy with it.
883 		 */
884 		goto errout;
885 
886 	ipv4_devconf_setall(in_dev);
887 	neigh_parms_data_state_setall(in_dev->arp_parms);
888 
889 	if (!tb[IFA_ADDRESS])
890 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
891 
892 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
893 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
894 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
895 					 ifm->ifa_flags;
896 	ifa->ifa_scope = ifm->ifa_scope;
897 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
898 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
899 
900 	if (tb[IFA_BROADCAST])
901 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
902 
903 	if (tb[IFA_LABEL])
904 		nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
905 	else
906 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
907 
908 	if (tb[IFA_RT_PRIORITY])
909 		ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
910 
911 	if (tb[IFA_PROTO])
912 		ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
913 
914 	if (tb[IFA_CACHEINFO]) {
915 		struct ifa_cacheinfo *ci;
916 
917 		ci = nla_data(tb[IFA_CACHEINFO]);
918 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
919 			NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid");
920 			err = -EINVAL;
921 			goto errout_free;
922 		}
923 		*pvalid_lft = ci->ifa_valid;
924 		*pprefered_lft = ci->ifa_prefered;
925 	}
926 
927 	return ifa;
928 
929 errout_free:
930 	inet_free_ifa(ifa);
931 errout:
932 	return ERR_PTR(err);
933 }
934 
935 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
936 {
937 	struct in_device *in_dev = ifa->ifa_dev;
938 	struct in_ifaddr *ifa1;
939 
940 	if (!ifa->ifa_local)
941 		return NULL;
942 
943 	in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
944 		if (ifa1->ifa_mask == ifa->ifa_mask &&
945 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
946 		    ifa1->ifa_local == ifa->ifa_local)
947 			return ifa1;
948 	}
949 	return NULL;
950 }
951 
952 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
953 			    struct netlink_ext_ack *extack)
954 {
955 	struct net *net = sock_net(skb->sk);
956 	struct in_ifaddr *ifa;
957 	struct in_ifaddr *ifa_existing;
958 	__u32 valid_lft = INFINITY_LIFE_TIME;
959 	__u32 prefered_lft = INFINITY_LIFE_TIME;
960 
961 	ASSERT_RTNL();
962 
963 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
964 	if (IS_ERR(ifa))
965 		return PTR_ERR(ifa);
966 
967 	ifa_existing = find_matching_ifa(ifa);
968 	if (!ifa_existing) {
969 		/* It would be best to check for !NLM_F_CREATE here but
970 		 * userspace already relies on not having to provide this.
971 		 */
972 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
973 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
974 			int ret = ip_mc_autojoin_config(net, true, ifa);
975 
976 			if (ret < 0) {
977 				NL_SET_ERR_MSG(extack, "ipv4: Multicast auto join failed");
978 				inet_free_ifa(ifa);
979 				return ret;
980 			}
981 		}
982 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
983 					 extack);
984 	} else {
985 		u32 new_metric = ifa->ifa_rt_priority;
986 		u8 new_proto = ifa->ifa_proto;
987 
988 		inet_free_ifa(ifa);
989 
990 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
991 		    !(nlh->nlmsg_flags & NLM_F_REPLACE)) {
992 			NL_SET_ERR_MSG(extack, "ipv4: Address already assigned");
993 			return -EEXIST;
994 		}
995 		ifa = ifa_existing;
996 
997 		if (ifa->ifa_rt_priority != new_metric) {
998 			fib_modify_prefix_metric(ifa, new_metric);
999 			ifa->ifa_rt_priority = new_metric;
1000 		}
1001 
1002 		ifa->ifa_proto = new_proto;
1003 
1004 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
1005 		cancel_delayed_work(&check_lifetime_work);
1006 		queue_delayed_work(system_power_efficient_wq,
1007 				&check_lifetime_work, 0);
1008 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
1009 	}
1010 	return 0;
1011 }
1012 
1013 /*
1014  *	Determine a default network mask, based on the IP address.
1015  */
1016 
1017 static int inet_abc_len(__be32 addr)
1018 {
1019 	int rc = -1;	/* Something else, probably a multicast. */
1020 
1021 	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
1022 		rc = 0;
1023 	else {
1024 		__u32 haddr = ntohl(addr);
1025 		if (IN_CLASSA(haddr))
1026 			rc = 8;
1027 		else if (IN_CLASSB(haddr))
1028 			rc = 16;
1029 		else if (IN_CLASSC(haddr))
1030 			rc = 24;
1031 		else if (IN_CLASSE(haddr))
1032 			rc = 32;
1033 	}
1034 
1035 	return rc;
1036 }
1037 
1038 
1039 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1040 {
1041 	struct sockaddr_in sin_orig;
1042 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1043 	struct in_ifaddr __rcu **ifap = NULL;
1044 	struct in_device *in_dev;
1045 	struct in_ifaddr *ifa = NULL;
1046 	struct net_device *dev;
1047 	char *colon;
1048 	int ret = -EFAULT;
1049 	int tryaddrmatch = 0;
1050 
1051 	ifr->ifr_name[IFNAMSIZ - 1] = 0;
1052 
1053 	/* save original address for comparison */
1054 	memcpy(&sin_orig, sin, sizeof(*sin));
1055 
1056 	colon = strchr(ifr->ifr_name, ':');
1057 	if (colon)
1058 		*colon = 0;
1059 
1060 	dev_load(net, ifr->ifr_name);
1061 
1062 	switch (cmd) {
1063 	case SIOCGIFADDR:	/* Get interface address */
1064 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1065 	case SIOCGIFDSTADDR:	/* Get the destination address */
1066 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1067 		/* Note that these ioctls will not sleep,
1068 		   so that we do not impose a lock.
1069 		   One day we will be forced to put shlock here (I mean SMP)
1070 		 */
1071 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
1072 		memset(sin, 0, sizeof(*sin));
1073 		sin->sin_family = AF_INET;
1074 		break;
1075 
1076 	case SIOCSIFFLAGS:
1077 		ret = -EPERM;
1078 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1079 			goto out;
1080 		break;
1081 	case SIOCSIFADDR:	/* Set interface address (and family) */
1082 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1083 	case SIOCSIFDSTADDR:	/* Set the destination address */
1084 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1085 		ret = -EPERM;
1086 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1087 			goto out;
1088 		ret = -EINVAL;
1089 		if (sin->sin_family != AF_INET)
1090 			goto out;
1091 		break;
1092 	default:
1093 		ret = -EINVAL;
1094 		goto out;
1095 	}
1096 
1097 	rtnl_lock();
1098 
1099 	ret = -ENODEV;
1100 	dev = __dev_get_by_name(net, ifr->ifr_name);
1101 	if (!dev)
1102 		goto done;
1103 
1104 	if (colon)
1105 		*colon = ':';
1106 
1107 	in_dev = __in_dev_get_rtnl(dev);
1108 	if (in_dev) {
1109 		if (tryaddrmatch) {
1110 			/* Matthias Andree */
1111 			/* compare label and address (4.4BSD style) */
1112 			/* note: we only do this for a limited set of ioctls
1113 			   and only if the original address family was AF_INET.
1114 			   This is checked above. */
1115 
1116 			for (ifap = &in_dev->ifa_list;
1117 			     (ifa = rtnl_dereference(*ifap)) != NULL;
1118 			     ifap = &ifa->ifa_next) {
1119 				if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1120 				    sin_orig.sin_addr.s_addr ==
1121 							ifa->ifa_local) {
1122 					break; /* found */
1123 				}
1124 			}
1125 		}
1126 		/* we didn't get a match, maybe the application is
1127 		   4.3BSD-style and passed in junk so we fall back to
1128 		   comparing just the label */
1129 		if (!ifa) {
1130 			for (ifap = &in_dev->ifa_list;
1131 			     (ifa = rtnl_dereference(*ifap)) != NULL;
1132 			     ifap = &ifa->ifa_next)
1133 				if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1134 					break;
1135 		}
1136 	}
1137 
1138 	ret = -EADDRNOTAVAIL;
1139 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1140 		goto done;
1141 
1142 	switch (cmd) {
1143 	case SIOCGIFADDR:	/* Get interface address */
1144 		ret = 0;
1145 		sin->sin_addr.s_addr = ifa->ifa_local;
1146 		break;
1147 
1148 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1149 		ret = 0;
1150 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1151 		break;
1152 
1153 	case SIOCGIFDSTADDR:	/* Get the destination address */
1154 		ret = 0;
1155 		sin->sin_addr.s_addr = ifa->ifa_address;
1156 		break;
1157 
1158 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1159 		ret = 0;
1160 		sin->sin_addr.s_addr = ifa->ifa_mask;
1161 		break;
1162 
1163 	case SIOCSIFFLAGS:
1164 		if (colon) {
1165 			ret = -EADDRNOTAVAIL;
1166 			if (!ifa)
1167 				break;
1168 			ret = 0;
1169 			if (!(ifr->ifr_flags & IFF_UP))
1170 				inet_del_ifa(in_dev, ifap, 1);
1171 			break;
1172 		}
1173 		ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1174 		break;
1175 
1176 	case SIOCSIFADDR:	/* Set interface address (and family) */
1177 		ret = -EINVAL;
1178 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1179 			break;
1180 
1181 		if (!ifa) {
1182 			ret = -ENOBUFS;
1183 			if (!in_dev)
1184 				break;
1185 			ifa = inet_alloc_ifa(in_dev);
1186 			if (!ifa)
1187 				break;
1188 
1189 			if (colon)
1190 				memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1191 			else
1192 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1193 		} else {
1194 			ret = 0;
1195 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1196 				break;
1197 			inet_del_ifa(in_dev, ifap, 0);
1198 			ifa->ifa_broadcast = 0;
1199 			ifa->ifa_scope = 0;
1200 		}
1201 
1202 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1203 
1204 		if (!(dev->flags & IFF_POINTOPOINT)) {
1205 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1206 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1207 			if ((dev->flags & IFF_BROADCAST) &&
1208 			    ifa->ifa_prefixlen < 31)
1209 				ifa->ifa_broadcast = ifa->ifa_address |
1210 						     ~ifa->ifa_mask;
1211 		} else {
1212 			ifa->ifa_prefixlen = 32;
1213 			ifa->ifa_mask = inet_make_mask(32);
1214 		}
1215 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1216 		ret = inet_set_ifa(dev, ifa);
1217 		break;
1218 
1219 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1220 		ret = 0;
1221 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1222 			inet_del_ifa(in_dev, ifap, 0);
1223 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1224 			inet_insert_ifa(ifa);
1225 		}
1226 		break;
1227 
1228 	case SIOCSIFDSTADDR:	/* Set the destination address */
1229 		ret = 0;
1230 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1231 			break;
1232 		ret = -EINVAL;
1233 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1234 			break;
1235 		ret = 0;
1236 		inet_del_ifa(in_dev, ifap, 0);
1237 		ifa->ifa_address = sin->sin_addr.s_addr;
1238 		inet_insert_ifa(ifa);
1239 		break;
1240 
1241 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1242 
1243 		/*
1244 		 *	The mask we set must be legal.
1245 		 */
1246 		ret = -EINVAL;
1247 		if (bad_mask(sin->sin_addr.s_addr, 0))
1248 			break;
1249 		ret = 0;
1250 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1251 			__be32 old_mask = ifa->ifa_mask;
1252 			inet_del_ifa(in_dev, ifap, 0);
1253 			ifa->ifa_mask = sin->sin_addr.s_addr;
1254 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1255 
1256 			/* See if current broadcast address matches
1257 			 * with current netmask, then recalculate
1258 			 * the broadcast address. Otherwise it's a
1259 			 * funny address, so don't touch it since
1260 			 * the user seems to know what (s)he's doing...
1261 			 */
1262 			if ((dev->flags & IFF_BROADCAST) &&
1263 			    (ifa->ifa_prefixlen < 31) &&
1264 			    (ifa->ifa_broadcast ==
1265 			     (ifa->ifa_local|~old_mask))) {
1266 				ifa->ifa_broadcast = (ifa->ifa_local |
1267 						      ~sin->sin_addr.s_addr);
1268 			}
1269 			inet_insert_ifa(ifa);
1270 		}
1271 		break;
1272 	}
1273 done:
1274 	rtnl_unlock();
1275 out:
1276 	return ret;
1277 }
1278 
1279 int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1280 {
1281 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1282 	const struct in_ifaddr *ifa;
1283 	struct ifreq ifr;
1284 	int done = 0;
1285 
1286 	if (WARN_ON(size > sizeof(struct ifreq)))
1287 		goto out;
1288 
1289 	if (!in_dev)
1290 		goto out;
1291 
1292 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1293 		if (!buf) {
1294 			done += size;
1295 			continue;
1296 		}
1297 		if (len < size)
1298 			break;
1299 		memset(&ifr, 0, sizeof(struct ifreq));
1300 		strcpy(ifr.ifr_name, ifa->ifa_label);
1301 
1302 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1303 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1304 								ifa->ifa_local;
1305 
1306 		if (copy_to_user(buf + done, &ifr, size)) {
1307 			done = -EFAULT;
1308 			break;
1309 		}
1310 		len  -= size;
1311 		done += size;
1312 	}
1313 out:
1314 	return done;
1315 }
1316 
1317 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1318 				 int scope)
1319 {
1320 	const struct in_ifaddr *ifa;
1321 
1322 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1323 		if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
1324 			continue;
1325 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1326 		    ifa->ifa_scope <= scope)
1327 			return ifa->ifa_local;
1328 	}
1329 
1330 	return 0;
1331 }
1332 
1333 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1334 {
1335 	const struct in_ifaddr *ifa;
1336 	__be32 addr = 0;
1337 	unsigned char localnet_scope = RT_SCOPE_HOST;
1338 	struct in_device *in_dev;
1339 	struct net *net = dev_net(dev);
1340 	int master_idx;
1341 
1342 	rcu_read_lock();
1343 	in_dev = __in_dev_get_rcu(dev);
1344 	if (!in_dev)
1345 		goto no_in_dev;
1346 
1347 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1348 		localnet_scope = RT_SCOPE_LINK;
1349 
1350 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1351 		if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
1352 			continue;
1353 		if (min(ifa->ifa_scope, localnet_scope) > scope)
1354 			continue;
1355 		if (!dst || inet_ifa_match(dst, ifa)) {
1356 			addr = ifa->ifa_local;
1357 			break;
1358 		}
1359 		if (!addr)
1360 			addr = ifa->ifa_local;
1361 	}
1362 
1363 	if (addr)
1364 		goto out_unlock;
1365 no_in_dev:
1366 	master_idx = l3mdev_master_ifindex_rcu(dev);
1367 
1368 	/* For VRFs, the VRF device takes the place of the loopback device,
1369 	 * with addresses on it being preferred.  Note in such cases the
1370 	 * loopback device will be among the devices that fail the master_idx
1371 	 * equality check in the loop below.
1372 	 */
1373 	if (master_idx &&
1374 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1375 	    (in_dev = __in_dev_get_rcu(dev))) {
1376 		addr = in_dev_select_addr(in_dev, scope);
1377 		if (addr)
1378 			goto out_unlock;
1379 	}
1380 
1381 	/* Not loopback addresses on loopback should be preferred
1382 	   in this case. It is important that lo is the first interface
1383 	   in dev_base list.
1384 	 */
1385 	for_each_netdev_rcu(net, dev) {
1386 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1387 			continue;
1388 
1389 		in_dev = __in_dev_get_rcu(dev);
1390 		if (!in_dev)
1391 			continue;
1392 
1393 		addr = in_dev_select_addr(in_dev, scope);
1394 		if (addr)
1395 			goto out_unlock;
1396 	}
1397 out_unlock:
1398 	rcu_read_unlock();
1399 	return addr;
1400 }
1401 EXPORT_SYMBOL(inet_select_addr);
1402 
1403 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1404 			      __be32 local, int scope)
1405 {
1406 	unsigned char localnet_scope = RT_SCOPE_HOST;
1407 	const struct in_ifaddr *ifa;
1408 	__be32 addr = 0;
1409 	int same = 0;
1410 
1411 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1412 		localnet_scope = RT_SCOPE_LINK;
1413 
1414 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1415 		unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1416 
1417 		if (!addr &&
1418 		    (local == ifa->ifa_local || !local) &&
1419 		    min_scope <= scope) {
1420 			addr = ifa->ifa_local;
1421 			if (same)
1422 				break;
1423 		}
1424 		if (!same) {
1425 			same = (!local || inet_ifa_match(local, ifa)) &&
1426 				(!dst || inet_ifa_match(dst, ifa));
1427 			if (same && addr) {
1428 				if (local || !dst)
1429 					break;
1430 				/* Is the selected addr into dst subnet? */
1431 				if (inet_ifa_match(addr, ifa))
1432 					break;
1433 				/* No, then can we use new local src? */
1434 				if (min_scope <= scope) {
1435 					addr = ifa->ifa_local;
1436 					break;
1437 				}
1438 				/* search for large dst subnet for addr */
1439 				same = 0;
1440 			}
1441 		}
1442 	}
1443 
1444 	return same ? addr : 0;
1445 }
1446 
1447 /*
1448  * Confirm that local IP address exists using wildcards:
1449  * - net: netns to check, cannot be NULL
1450  * - in_dev: only on this interface, NULL=any interface
1451  * - dst: only in the same subnet as dst, 0=any dst
1452  * - local: address, 0=autoselect the local address
1453  * - scope: maximum allowed scope value for the local address
1454  */
1455 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1456 			 __be32 dst, __be32 local, int scope)
1457 {
1458 	__be32 addr = 0;
1459 	struct net_device *dev;
1460 
1461 	if (in_dev)
1462 		return confirm_addr_indev(in_dev, dst, local, scope);
1463 
1464 	rcu_read_lock();
1465 	for_each_netdev_rcu(net, dev) {
1466 		in_dev = __in_dev_get_rcu(dev);
1467 		if (in_dev) {
1468 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1469 			if (addr)
1470 				break;
1471 		}
1472 	}
1473 	rcu_read_unlock();
1474 
1475 	return addr;
1476 }
1477 EXPORT_SYMBOL(inet_confirm_addr);
1478 
1479 /*
1480  *	Device notifier
1481  */
1482 
1483 int register_inetaddr_notifier(struct notifier_block *nb)
1484 {
1485 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1486 }
1487 EXPORT_SYMBOL(register_inetaddr_notifier);
1488 
1489 int unregister_inetaddr_notifier(struct notifier_block *nb)
1490 {
1491 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1492 }
1493 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1494 
1495 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1496 {
1497 	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1498 }
1499 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1500 
1501 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1502 {
1503 	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1504 	    nb);
1505 }
1506 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1507 
1508 /* Rename ifa_labels for a device name change. Make some effort to preserve
1509  * existing alias numbering and to create unique labels if possible.
1510 */
1511 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1512 {
1513 	struct in_ifaddr *ifa;
1514 	int named = 0;
1515 
1516 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1517 		char old[IFNAMSIZ], *dot;
1518 
1519 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1520 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1521 		if (named++ == 0)
1522 			goto skip;
1523 		dot = strchr(old, ':');
1524 		if (!dot) {
1525 			sprintf(old, ":%d", named);
1526 			dot = old;
1527 		}
1528 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1529 			strcat(ifa->ifa_label, dot);
1530 		else
1531 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1532 skip:
1533 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1534 	}
1535 }
1536 
1537 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1538 					struct in_device *in_dev)
1539 
1540 {
1541 	const struct in_ifaddr *ifa;
1542 
1543 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1544 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1545 			 ifa->ifa_local, dev,
1546 			 ifa->ifa_local, NULL,
1547 			 dev->dev_addr, NULL);
1548 	}
1549 }
1550 
1551 /* Called only under RTNL semaphore */
1552 
1553 static int inetdev_event(struct notifier_block *this, unsigned long event,
1554 			 void *ptr)
1555 {
1556 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1557 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1558 
1559 	ASSERT_RTNL();
1560 
1561 	if (!in_dev) {
1562 		if (event == NETDEV_REGISTER) {
1563 			in_dev = inetdev_init(dev);
1564 			if (IS_ERR(in_dev))
1565 				return notifier_from_errno(PTR_ERR(in_dev));
1566 			if (dev->flags & IFF_LOOPBACK) {
1567 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1568 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1569 			}
1570 		} else if (event == NETDEV_CHANGEMTU) {
1571 			/* Re-enabling IP */
1572 			if (inetdev_valid_mtu(dev->mtu))
1573 				in_dev = inetdev_init(dev);
1574 		}
1575 		goto out;
1576 	}
1577 
1578 	switch (event) {
1579 	case NETDEV_REGISTER:
1580 		pr_debug("%s: bug\n", __func__);
1581 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1582 		break;
1583 	case NETDEV_UP:
1584 		if (!inetdev_valid_mtu(dev->mtu))
1585 			break;
1586 		if (dev->flags & IFF_LOOPBACK) {
1587 			struct in_ifaddr *ifa = inet_alloc_ifa(in_dev);
1588 
1589 			if (ifa) {
1590 				ifa->ifa_local =
1591 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1592 				ifa->ifa_prefixlen = 8;
1593 				ifa->ifa_mask = inet_make_mask(8);
1594 				ifa->ifa_scope = RT_SCOPE_HOST;
1595 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1596 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1597 						 INFINITY_LIFE_TIME);
1598 				ipv4_devconf_setall(in_dev);
1599 				neigh_parms_data_state_setall(in_dev->arp_parms);
1600 				inet_insert_ifa(ifa);
1601 			}
1602 		}
1603 		ip_mc_up(in_dev);
1604 		fallthrough;
1605 	case NETDEV_CHANGEADDR:
1606 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1607 			break;
1608 		fallthrough;
1609 	case NETDEV_NOTIFY_PEERS:
1610 		/* Send gratuitous ARP to notify of link change */
1611 		inetdev_send_gratuitous_arp(dev, in_dev);
1612 		break;
1613 	case NETDEV_DOWN:
1614 		ip_mc_down(in_dev);
1615 		break;
1616 	case NETDEV_PRE_TYPE_CHANGE:
1617 		ip_mc_unmap(in_dev);
1618 		break;
1619 	case NETDEV_POST_TYPE_CHANGE:
1620 		ip_mc_remap(in_dev);
1621 		break;
1622 	case NETDEV_CHANGEMTU:
1623 		if (inetdev_valid_mtu(dev->mtu))
1624 			break;
1625 		/* disable IP when MTU is not enough */
1626 		fallthrough;
1627 	case NETDEV_UNREGISTER:
1628 		inetdev_destroy(in_dev);
1629 		break;
1630 	case NETDEV_CHANGENAME:
1631 		/* Do not notify about label change, this event is
1632 		 * not interesting to applications using netlink.
1633 		 */
1634 		inetdev_changename(dev, in_dev);
1635 
1636 		devinet_sysctl_unregister(in_dev);
1637 		devinet_sysctl_register(in_dev);
1638 		break;
1639 	}
1640 out:
1641 	return NOTIFY_DONE;
1642 }
1643 
1644 static struct notifier_block ip_netdev_notifier = {
1645 	.notifier_call = inetdev_event,
1646 };
1647 
1648 static size_t inet_nlmsg_size(void)
1649 {
1650 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1651 	       + nla_total_size(4) /* IFA_ADDRESS */
1652 	       + nla_total_size(4) /* IFA_LOCAL */
1653 	       + nla_total_size(4) /* IFA_BROADCAST */
1654 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1655 	       + nla_total_size(4)  /* IFA_FLAGS */
1656 	       + nla_total_size(1)  /* IFA_PROTO */
1657 	       + nla_total_size(4)  /* IFA_RT_PRIORITY */
1658 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1659 }
1660 
1661 static inline u32 cstamp_delta(unsigned long cstamp)
1662 {
1663 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1664 }
1665 
1666 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1667 			 unsigned long tstamp, u32 preferred, u32 valid)
1668 {
1669 	struct ifa_cacheinfo ci;
1670 
1671 	ci.cstamp = cstamp_delta(cstamp);
1672 	ci.tstamp = cstamp_delta(tstamp);
1673 	ci.ifa_prefered = preferred;
1674 	ci.ifa_valid = valid;
1675 
1676 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1677 }
1678 
1679 static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa,
1680 			    struct inet_fill_args *args)
1681 {
1682 	struct ifaddrmsg *ifm;
1683 	struct nlmsghdr  *nlh;
1684 	unsigned long tstamp;
1685 	u32 preferred, valid;
1686 	u32 flags;
1687 
1688 	nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1689 			args->flags);
1690 	if (!nlh)
1691 		return -EMSGSIZE;
1692 
1693 	ifm = nlmsg_data(nlh);
1694 	ifm->ifa_family = AF_INET;
1695 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1696 
1697 	flags = READ_ONCE(ifa->ifa_flags);
1698 	/* Warning : ifm->ifa_flags is an __u8, it holds only 8 bits.
1699 	 * The 32bit value is given in IFA_FLAGS attribute.
1700 	 */
1701 	ifm->ifa_flags = (__u8)flags;
1702 
1703 	ifm->ifa_scope = ifa->ifa_scope;
1704 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1705 
1706 	if (args->netnsid >= 0 &&
1707 	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1708 		goto nla_put_failure;
1709 
1710 	tstamp = READ_ONCE(ifa->ifa_tstamp);
1711 	if (!(flags & IFA_F_PERMANENT)) {
1712 		preferred = READ_ONCE(ifa->ifa_preferred_lft);
1713 		valid = READ_ONCE(ifa->ifa_valid_lft);
1714 		if (preferred != INFINITY_LIFE_TIME) {
1715 			long tval = (jiffies - tstamp) / HZ;
1716 
1717 			if (preferred > tval)
1718 				preferred -= tval;
1719 			else
1720 				preferred = 0;
1721 			if (valid != INFINITY_LIFE_TIME) {
1722 				if (valid > tval)
1723 					valid -= tval;
1724 				else
1725 					valid = 0;
1726 			}
1727 		}
1728 	} else {
1729 		preferred = INFINITY_LIFE_TIME;
1730 		valid = INFINITY_LIFE_TIME;
1731 	}
1732 	if ((ifa->ifa_address &&
1733 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1734 	    (ifa->ifa_local &&
1735 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1736 	    (ifa->ifa_broadcast &&
1737 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1738 	    (ifa->ifa_label[0] &&
1739 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1740 	    (ifa->ifa_proto &&
1741 	     nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
1742 	    nla_put_u32(skb, IFA_FLAGS, flags) ||
1743 	    (ifa->ifa_rt_priority &&
1744 	     nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1745 	    put_cacheinfo(skb, READ_ONCE(ifa->ifa_cstamp), tstamp,
1746 			  preferred, valid))
1747 		goto nla_put_failure;
1748 
1749 	nlmsg_end(skb, nlh);
1750 	return 0;
1751 
1752 nla_put_failure:
1753 	nlmsg_cancel(skb, nlh);
1754 	return -EMSGSIZE;
1755 }
1756 
1757 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1758 				      struct inet_fill_args *fillargs,
1759 				      struct net **tgt_net, struct sock *sk,
1760 				      struct netlink_callback *cb)
1761 {
1762 	struct netlink_ext_ack *extack = cb->extack;
1763 	struct nlattr *tb[IFA_MAX+1];
1764 	struct ifaddrmsg *ifm;
1765 	int err, i;
1766 
1767 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1768 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1769 		return -EINVAL;
1770 	}
1771 
1772 	ifm = nlmsg_data(nlh);
1773 	if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1774 		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1775 		return -EINVAL;
1776 	}
1777 
1778 	fillargs->ifindex = ifm->ifa_index;
1779 	if (fillargs->ifindex) {
1780 		cb->answer_flags |= NLM_F_DUMP_FILTERED;
1781 		fillargs->flags |= NLM_F_DUMP_FILTERED;
1782 	}
1783 
1784 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1785 					    ifa_ipv4_policy, extack);
1786 	if (err < 0)
1787 		return err;
1788 
1789 	for (i = 0; i <= IFA_MAX; ++i) {
1790 		if (!tb[i])
1791 			continue;
1792 
1793 		if (i == IFA_TARGET_NETNSID) {
1794 			struct net *net;
1795 
1796 			fillargs->netnsid = nla_get_s32(tb[i]);
1797 
1798 			net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1799 			if (IS_ERR(net)) {
1800 				fillargs->netnsid = -1;
1801 				NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1802 				return PTR_ERR(net);
1803 			}
1804 			*tgt_net = net;
1805 		} else {
1806 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1807 			return -EINVAL;
1808 		}
1809 	}
1810 
1811 	return 0;
1812 }
1813 
1814 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1815 			    struct netlink_callback *cb, int *s_ip_idx,
1816 			    struct inet_fill_args *fillargs)
1817 {
1818 	struct in_ifaddr *ifa;
1819 	int ip_idx = 0;
1820 	int err;
1821 
1822 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1823 		if (ip_idx < *s_ip_idx) {
1824 			ip_idx++;
1825 			continue;
1826 		}
1827 		err = inet_fill_ifaddr(skb, ifa, fillargs);
1828 		if (err < 0)
1829 			goto done;
1830 
1831 		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1832 		ip_idx++;
1833 	}
1834 	err = 0;
1835 	ip_idx = 0;
1836 done:
1837 	*s_ip_idx = ip_idx;
1838 
1839 	return err;
1840 }
1841 
1842 /* Combine dev_addr_genid and dev_base_seq to detect changes.
1843  */
1844 static u32 inet_base_seq(const struct net *net)
1845 {
1846 	u32 res = atomic_read(&net->ipv4.dev_addr_genid) +
1847 		  READ_ONCE(net->dev_base_seq);
1848 
1849 	/* Must not return 0 (see nl_dump_check_consistent()).
1850 	 * Chose a value far away from 0.
1851 	 */
1852 	if (!res)
1853 		res = 0x80000000;
1854 	return res;
1855 }
1856 
1857 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1858 {
1859 	const struct nlmsghdr *nlh = cb->nlh;
1860 	struct inet_fill_args fillargs = {
1861 		.portid = NETLINK_CB(cb->skb).portid,
1862 		.seq = nlh->nlmsg_seq,
1863 		.event = RTM_NEWADDR,
1864 		.flags = NLM_F_MULTI,
1865 		.netnsid = -1,
1866 	};
1867 	struct net *net = sock_net(skb->sk);
1868 	struct net *tgt_net = net;
1869 	struct {
1870 		unsigned long ifindex;
1871 		int ip_idx;
1872 	} *ctx = (void *)cb->ctx;
1873 	struct in_device *in_dev;
1874 	struct net_device *dev;
1875 	int err = 0;
1876 
1877 	rcu_read_lock();
1878 	if (cb->strict_check) {
1879 		err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1880 						 skb->sk, cb);
1881 		if (err < 0)
1882 			goto done;
1883 
1884 		if (fillargs.ifindex) {
1885 			dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex);
1886 			if (!dev) {
1887 				err = -ENODEV;
1888 				goto done;
1889 			}
1890 			in_dev = __in_dev_get_rcu(dev);
1891 			if (!in_dev)
1892 				goto done;
1893 			err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
1894 					       &fillargs);
1895 			goto done;
1896 		}
1897 	}
1898 
1899 	cb->seq = inet_base_seq(tgt_net);
1900 
1901 	for_each_netdev_dump(tgt_net, dev, ctx->ifindex) {
1902 		in_dev = __in_dev_get_rcu(dev);
1903 		if (!in_dev)
1904 			continue;
1905 		err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
1906 				       &fillargs);
1907 		if (err < 0)
1908 			goto done;
1909 	}
1910 done:
1911 	if (fillargs.netnsid >= 0)
1912 		put_net(tgt_net);
1913 	rcu_read_unlock();
1914 	return err;
1915 }
1916 
1917 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1918 		      u32 portid)
1919 {
1920 	struct inet_fill_args fillargs = {
1921 		.portid = portid,
1922 		.seq = nlh ? nlh->nlmsg_seq : 0,
1923 		.event = event,
1924 		.flags = 0,
1925 		.netnsid = -1,
1926 	};
1927 	struct sk_buff *skb;
1928 	int err = -ENOBUFS;
1929 	struct net *net;
1930 
1931 	net = dev_net(ifa->ifa_dev->dev);
1932 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1933 	if (!skb)
1934 		goto errout;
1935 
1936 	err = inet_fill_ifaddr(skb, ifa, &fillargs);
1937 	if (err < 0) {
1938 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1939 		WARN_ON(err == -EMSGSIZE);
1940 		kfree_skb(skb);
1941 		goto errout;
1942 	}
1943 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1944 	return;
1945 errout:
1946 	rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1947 }
1948 
1949 static size_t inet_get_link_af_size(const struct net_device *dev,
1950 				    u32 ext_filter_mask)
1951 {
1952 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1953 
1954 	if (!in_dev)
1955 		return 0;
1956 
1957 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1958 }
1959 
1960 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1961 			     u32 ext_filter_mask)
1962 {
1963 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1964 	struct nlattr *nla;
1965 	int i;
1966 
1967 	if (!in_dev)
1968 		return -ENODATA;
1969 
1970 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1971 	if (!nla)
1972 		return -EMSGSIZE;
1973 
1974 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1975 		((u32 *) nla_data(nla))[i] = READ_ONCE(in_dev->cnf.data[i]);
1976 
1977 	return 0;
1978 }
1979 
1980 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1981 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1982 };
1983 
1984 static int inet_validate_link_af(const struct net_device *dev,
1985 				 const struct nlattr *nla,
1986 				 struct netlink_ext_ack *extack)
1987 {
1988 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1989 	int err, rem;
1990 
1991 	if (dev && !__in_dev_get_rtnl(dev))
1992 		return -EAFNOSUPPORT;
1993 
1994 	err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
1995 					  inet_af_policy, extack);
1996 	if (err < 0)
1997 		return err;
1998 
1999 	if (tb[IFLA_INET_CONF]) {
2000 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
2001 			int cfgid = nla_type(a);
2002 
2003 			if (nla_len(a) < 4)
2004 				return -EINVAL;
2005 
2006 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
2007 				return -EINVAL;
2008 		}
2009 	}
2010 
2011 	return 0;
2012 }
2013 
2014 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
2015 			    struct netlink_ext_ack *extack)
2016 {
2017 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
2018 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
2019 	int rem;
2020 
2021 	if (!in_dev)
2022 		return -EAFNOSUPPORT;
2023 
2024 	if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
2025 		return -EINVAL;
2026 
2027 	if (tb[IFLA_INET_CONF]) {
2028 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
2029 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
2030 	}
2031 
2032 	return 0;
2033 }
2034 
2035 static int inet_netconf_msgsize_devconf(int type)
2036 {
2037 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
2038 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
2039 	bool all = false;
2040 
2041 	if (type == NETCONFA_ALL)
2042 		all = true;
2043 
2044 	if (all || type == NETCONFA_FORWARDING)
2045 		size += nla_total_size(4);
2046 	if (all || type == NETCONFA_RP_FILTER)
2047 		size += nla_total_size(4);
2048 	if (all || type == NETCONFA_MC_FORWARDING)
2049 		size += nla_total_size(4);
2050 	if (all || type == NETCONFA_BC_FORWARDING)
2051 		size += nla_total_size(4);
2052 	if (all || type == NETCONFA_PROXY_NEIGH)
2053 		size += nla_total_size(4);
2054 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2055 		size += nla_total_size(4);
2056 
2057 	return size;
2058 }
2059 
2060 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2061 				     const struct ipv4_devconf *devconf,
2062 				     u32 portid, u32 seq, int event,
2063 				     unsigned int flags, int type)
2064 {
2065 	struct nlmsghdr  *nlh;
2066 	struct netconfmsg *ncm;
2067 	bool all = false;
2068 
2069 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2070 			flags);
2071 	if (!nlh)
2072 		return -EMSGSIZE;
2073 
2074 	if (type == NETCONFA_ALL)
2075 		all = true;
2076 
2077 	ncm = nlmsg_data(nlh);
2078 	ncm->ncm_family = AF_INET;
2079 
2080 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2081 		goto nla_put_failure;
2082 
2083 	if (!devconf)
2084 		goto out;
2085 
2086 	if ((all || type == NETCONFA_FORWARDING) &&
2087 	    nla_put_s32(skb, NETCONFA_FORWARDING,
2088 			IPV4_DEVCONF_RO(*devconf, FORWARDING)) < 0)
2089 		goto nla_put_failure;
2090 	if ((all || type == NETCONFA_RP_FILTER) &&
2091 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
2092 			IPV4_DEVCONF_RO(*devconf, RP_FILTER)) < 0)
2093 		goto nla_put_failure;
2094 	if ((all || type == NETCONFA_MC_FORWARDING) &&
2095 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2096 			IPV4_DEVCONF_RO(*devconf, MC_FORWARDING)) < 0)
2097 		goto nla_put_failure;
2098 	if ((all || type == NETCONFA_BC_FORWARDING) &&
2099 	    nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2100 			IPV4_DEVCONF_RO(*devconf, BC_FORWARDING)) < 0)
2101 		goto nla_put_failure;
2102 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
2103 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2104 			IPV4_DEVCONF_RO(*devconf, PROXY_ARP)) < 0)
2105 		goto nla_put_failure;
2106 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2107 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2108 			IPV4_DEVCONF_RO(*devconf,
2109 					IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2110 		goto nla_put_failure;
2111 
2112 out:
2113 	nlmsg_end(skb, nlh);
2114 	return 0;
2115 
2116 nla_put_failure:
2117 	nlmsg_cancel(skb, nlh);
2118 	return -EMSGSIZE;
2119 }
2120 
2121 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2122 				 int ifindex, struct ipv4_devconf *devconf)
2123 {
2124 	struct sk_buff *skb;
2125 	int err = -ENOBUFS;
2126 
2127 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2128 	if (!skb)
2129 		goto errout;
2130 
2131 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2132 					event, 0, type);
2133 	if (err < 0) {
2134 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2135 		WARN_ON(err == -EMSGSIZE);
2136 		kfree_skb(skb);
2137 		goto errout;
2138 	}
2139 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2140 	return;
2141 errout:
2142 	rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2143 }
2144 
2145 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2146 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
2147 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
2148 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
2149 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
2150 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
2151 };
2152 
2153 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2154 				      const struct nlmsghdr *nlh,
2155 				      struct nlattr **tb,
2156 				      struct netlink_ext_ack *extack)
2157 {
2158 	int i, err;
2159 
2160 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2161 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2162 		return -EINVAL;
2163 	}
2164 
2165 	if (!netlink_strict_get_check(skb))
2166 		return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2167 					      tb, NETCONFA_MAX,
2168 					      devconf_ipv4_policy, extack);
2169 
2170 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2171 					    tb, NETCONFA_MAX,
2172 					    devconf_ipv4_policy, extack);
2173 	if (err)
2174 		return err;
2175 
2176 	for (i = 0; i <= NETCONFA_MAX; i++) {
2177 		if (!tb[i])
2178 			continue;
2179 
2180 		switch (i) {
2181 		case NETCONFA_IFINDEX:
2182 			break;
2183 		default:
2184 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2185 			return -EINVAL;
2186 		}
2187 	}
2188 
2189 	return 0;
2190 }
2191 
2192 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2193 				    struct nlmsghdr *nlh,
2194 				    struct netlink_ext_ack *extack)
2195 {
2196 	struct net *net = sock_net(in_skb->sk);
2197 	struct nlattr *tb[NETCONFA_MAX + 1];
2198 	const struct ipv4_devconf *devconf;
2199 	struct in_device *in_dev = NULL;
2200 	struct net_device *dev = NULL;
2201 	struct sk_buff *skb;
2202 	int ifindex;
2203 	int err;
2204 
2205 	err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2206 	if (err)
2207 		return err;
2208 
2209 	if (!tb[NETCONFA_IFINDEX])
2210 		return -EINVAL;
2211 
2212 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2213 	switch (ifindex) {
2214 	case NETCONFA_IFINDEX_ALL:
2215 		devconf = net->ipv4.devconf_all;
2216 		break;
2217 	case NETCONFA_IFINDEX_DEFAULT:
2218 		devconf = net->ipv4.devconf_dflt;
2219 		break;
2220 	default:
2221 		err = -ENODEV;
2222 		dev = dev_get_by_index(net, ifindex);
2223 		if (dev)
2224 			in_dev = in_dev_get(dev);
2225 		if (!in_dev)
2226 			goto errout;
2227 		devconf = &in_dev->cnf;
2228 		break;
2229 	}
2230 
2231 	err = -ENOBUFS;
2232 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2233 	if (!skb)
2234 		goto errout;
2235 
2236 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2237 					NETLINK_CB(in_skb).portid,
2238 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2239 					NETCONFA_ALL);
2240 	if (err < 0) {
2241 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2242 		WARN_ON(err == -EMSGSIZE);
2243 		kfree_skb(skb);
2244 		goto errout;
2245 	}
2246 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2247 errout:
2248 	if (in_dev)
2249 		in_dev_put(in_dev);
2250 	dev_put(dev);
2251 	return err;
2252 }
2253 
2254 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2255 				     struct netlink_callback *cb)
2256 {
2257 	const struct nlmsghdr *nlh = cb->nlh;
2258 	struct net *net = sock_net(skb->sk);
2259 	struct {
2260 		unsigned long ifindex;
2261 		unsigned int all_default;
2262 	} *ctx = (void *)cb->ctx;
2263 	const struct in_device *in_dev;
2264 	struct net_device *dev;
2265 	int err = 0;
2266 
2267 	if (cb->strict_check) {
2268 		struct netlink_ext_ack *extack = cb->extack;
2269 		struct netconfmsg *ncm;
2270 
2271 		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2272 			NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2273 			return -EINVAL;
2274 		}
2275 
2276 		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2277 			NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2278 			return -EINVAL;
2279 		}
2280 	}
2281 
2282 	rcu_read_lock();
2283 	for_each_netdev_dump(net, dev, ctx->ifindex) {
2284 		in_dev = __in_dev_get_rcu(dev);
2285 		if (!in_dev)
2286 			continue;
2287 		err = inet_netconf_fill_devconf(skb, dev->ifindex,
2288 						&in_dev->cnf,
2289 						NETLINK_CB(cb->skb).portid,
2290 						nlh->nlmsg_seq,
2291 						RTM_NEWNETCONF, NLM_F_MULTI,
2292 						NETCONFA_ALL);
2293 		if (err < 0)
2294 			goto done;
2295 	}
2296 	if (ctx->all_default == 0) {
2297 		err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2298 						net->ipv4.devconf_all,
2299 						NETLINK_CB(cb->skb).portid,
2300 						nlh->nlmsg_seq,
2301 						RTM_NEWNETCONF, NLM_F_MULTI,
2302 						NETCONFA_ALL);
2303 		if (err < 0)
2304 			goto done;
2305 		ctx->all_default++;
2306 	}
2307 	if (ctx->all_default == 1) {
2308 		err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2309 						net->ipv4.devconf_dflt,
2310 						NETLINK_CB(cb->skb).portid,
2311 						nlh->nlmsg_seq,
2312 						RTM_NEWNETCONF, NLM_F_MULTI,
2313 						NETCONFA_ALL);
2314 		if (err < 0)
2315 			goto done;
2316 		ctx->all_default++;
2317 	}
2318 done:
2319 	rcu_read_unlock();
2320 	return err;
2321 }
2322 
2323 #ifdef CONFIG_SYSCTL
2324 
2325 static void devinet_copy_dflt_conf(struct net *net, int i)
2326 {
2327 	struct net_device *dev;
2328 
2329 	rcu_read_lock();
2330 	for_each_netdev_rcu(net, dev) {
2331 		struct in_device *in_dev;
2332 
2333 		in_dev = __in_dev_get_rcu(dev);
2334 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2335 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2336 	}
2337 	rcu_read_unlock();
2338 }
2339 
2340 /* called with RTNL locked */
2341 static void inet_forward_change(struct net *net)
2342 {
2343 	struct net_device *dev;
2344 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2345 
2346 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2347 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2348 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2349 				    NETCONFA_FORWARDING,
2350 				    NETCONFA_IFINDEX_ALL,
2351 				    net->ipv4.devconf_all);
2352 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2353 				    NETCONFA_FORWARDING,
2354 				    NETCONFA_IFINDEX_DEFAULT,
2355 				    net->ipv4.devconf_dflt);
2356 
2357 	for_each_netdev(net, dev) {
2358 		struct in_device *in_dev;
2359 
2360 		if (on)
2361 			dev_disable_lro(dev);
2362 
2363 		in_dev = __in_dev_get_rtnl(dev);
2364 		if (in_dev) {
2365 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2366 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2367 						    NETCONFA_FORWARDING,
2368 						    dev->ifindex, &in_dev->cnf);
2369 		}
2370 	}
2371 }
2372 
2373 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2374 {
2375 	if (cnf == net->ipv4.devconf_dflt)
2376 		return NETCONFA_IFINDEX_DEFAULT;
2377 	else if (cnf == net->ipv4.devconf_all)
2378 		return NETCONFA_IFINDEX_ALL;
2379 	else {
2380 		struct in_device *idev
2381 			= container_of(cnf, struct in_device, cnf);
2382 		return idev->dev->ifindex;
2383 	}
2384 }
2385 
2386 static int devinet_conf_proc(const struct ctl_table *ctl, int write,
2387 			     void *buffer, size_t *lenp, loff_t *ppos)
2388 {
2389 	int old_value = *(int *)ctl->data;
2390 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2391 	int new_value = *(int *)ctl->data;
2392 
2393 	if (write) {
2394 		struct ipv4_devconf *cnf = ctl->extra1;
2395 		struct net *net = ctl->extra2;
2396 		int i = (int *)ctl->data - cnf->data;
2397 		int ifindex;
2398 
2399 		set_bit(i, cnf->state);
2400 
2401 		if (cnf == net->ipv4.devconf_dflt)
2402 			devinet_copy_dflt_conf(net, i);
2403 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2404 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2405 			if ((new_value == 0) && (old_value != 0))
2406 				rt_cache_flush(net);
2407 
2408 		if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2409 		    new_value != old_value)
2410 			rt_cache_flush(net);
2411 
2412 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2413 		    new_value != old_value) {
2414 			ifindex = devinet_conf_ifindex(net, cnf);
2415 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2416 						    NETCONFA_RP_FILTER,
2417 						    ifindex, cnf);
2418 		}
2419 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2420 		    new_value != old_value) {
2421 			ifindex = devinet_conf_ifindex(net, cnf);
2422 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2423 						    NETCONFA_PROXY_NEIGH,
2424 						    ifindex, cnf);
2425 		}
2426 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2427 		    new_value != old_value) {
2428 			ifindex = devinet_conf_ifindex(net, cnf);
2429 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2430 						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2431 						    ifindex, cnf);
2432 		}
2433 	}
2434 
2435 	return ret;
2436 }
2437 
2438 static int devinet_sysctl_forward(const struct ctl_table *ctl, int write,
2439 				  void *buffer, size_t *lenp, loff_t *ppos)
2440 {
2441 	int *valp = ctl->data;
2442 	int val = *valp;
2443 	loff_t pos = *ppos;
2444 	struct net *net = ctl->extra2;
2445 	int ret;
2446 
2447 	if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2448 		return -EPERM;
2449 
2450 	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2451 
2452 	if (write && *valp != val) {
2453 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2454 			if (!rtnl_trylock()) {
2455 				/* Restore the original values before restarting */
2456 				*valp = val;
2457 				*ppos = pos;
2458 				return restart_syscall();
2459 			}
2460 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2461 				inet_forward_change(net);
2462 			} else {
2463 				struct ipv4_devconf *cnf = ctl->extra1;
2464 				struct in_device *idev =
2465 					container_of(cnf, struct in_device, cnf);
2466 				if (*valp)
2467 					dev_disable_lro(idev->dev);
2468 				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2469 							    NETCONFA_FORWARDING,
2470 							    idev->dev->ifindex,
2471 							    cnf);
2472 			}
2473 			rtnl_unlock();
2474 			rt_cache_flush(net);
2475 		} else
2476 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2477 						    NETCONFA_FORWARDING,
2478 						    NETCONFA_IFINDEX_DEFAULT,
2479 						    net->ipv4.devconf_dflt);
2480 	}
2481 
2482 	return ret;
2483 }
2484 
2485 static int ipv4_doint_and_flush(const struct ctl_table *ctl, int write,
2486 				void *buffer, size_t *lenp, loff_t *ppos)
2487 {
2488 	int *valp = ctl->data;
2489 	int val = *valp;
2490 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2491 	struct net *net = ctl->extra2;
2492 
2493 	if (write && *valp != val)
2494 		rt_cache_flush(net);
2495 
2496 	return ret;
2497 }
2498 
2499 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2500 	{ \
2501 		.procname	= name, \
2502 		.data		= ipv4_devconf.data + \
2503 				  IPV4_DEVCONF_ ## attr - 1, \
2504 		.maxlen		= sizeof(int), \
2505 		.mode		= mval, \
2506 		.proc_handler	= proc, \
2507 		.extra1		= &ipv4_devconf, \
2508 	}
2509 
2510 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2511 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2512 
2513 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2514 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2515 
2516 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2517 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2518 
2519 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2520 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2521 
2522 static struct devinet_sysctl_table {
2523 	struct ctl_table_header *sysctl_header;
2524 	struct ctl_table devinet_vars[IPV4_DEVCONF_MAX];
2525 } devinet_sysctl = {
2526 	.devinet_vars = {
2527 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2528 					     devinet_sysctl_forward),
2529 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2530 		DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2531 
2532 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2533 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2534 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2535 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2536 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2537 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2538 					"accept_source_route"),
2539 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2540 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2541 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2542 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2543 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2544 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2545 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2546 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2547 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2548 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2549 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2550 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2551 		DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
2552 					"arp_evict_nocarrier"),
2553 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2554 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2555 					"force_igmp_version"),
2556 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2557 					"igmpv2_unsolicited_report_interval"),
2558 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2559 					"igmpv3_unsolicited_report_interval"),
2560 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2561 					"ignore_routes_with_linkdown"),
2562 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2563 					"drop_gratuitous_arp"),
2564 
2565 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2566 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2567 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2568 					      "promote_secondaries"),
2569 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2570 					      "route_localnet"),
2571 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2572 					      "drop_unicast_in_l2_multicast"),
2573 	},
2574 };
2575 
2576 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2577 				     int ifindex, struct ipv4_devconf *p)
2578 {
2579 	int i;
2580 	struct devinet_sysctl_table *t;
2581 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2582 
2583 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
2584 	if (!t)
2585 		goto out;
2586 
2587 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars); i++) {
2588 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2589 		t->devinet_vars[i].extra1 = p;
2590 		t->devinet_vars[i].extra2 = net;
2591 	}
2592 
2593 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2594 
2595 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2596 	if (!t->sysctl_header)
2597 		goto free;
2598 
2599 	p->sysctl = t;
2600 
2601 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2602 				    ifindex, p);
2603 	return 0;
2604 
2605 free:
2606 	kfree(t);
2607 out:
2608 	return -ENOMEM;
2609 }
2610 
2611 static void __devinet_sysctl_unregister(struct net *net,
2612 					struct ipv4_devconf *cnf, int ifindex)
2613 {
2614 	struct devinet_sysctl_table *t = cnf->sysctl;
2615 
2616 	if (t) {
2617 		cnf->sysctl = NULL;
2618 		unregister_net_sysctl_table(t->sysctl_header);
2619 		kfree(t);
2620 	}
2621 
2622 	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2623 }
2624 
2625 static int devinet_sysctl_register(struct in_device *idev)
2626 {
2627 	int err;
2628 
2629 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2630 		return -EINVAL;
2631 
2632 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2633 	if (err)
2634 		return err;
2635 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2636 					idev->dev->ifindex, &idev->cnf);
2637 	if (err)
2638 		neigh_sysctl_unregister(idev->arp_parms);
2639 	return err;
2640 }
2641 
2642 static void devinet_sysctl_unregister(struct in_device *idev)
2643 {
2644 	struct net *net = dev_net(idev->dev);
2645 
2646 	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2647 	neigh_sysctl_unregister(idev->arp_parms);
2648 }
2649 
2650 static struct ctl_table ctl_forward_entry[] = {
2651 	{
2652 		.procname	= "ip_forward",
2653 		.data		= &ipv4_devconf.data[
2654 					IPV4_DEVCONF_FORWARDING - 1],
2655 		.maxlen		= sizeof(int),
2656 		.mode		= 0644,
2657 		.proc_handler	= devinet_sysctl_forward,
2658 		.extra1		= &ipv4_devconf,
2659 		.extra2		= &init_net,
2660 	},
2661 };
2662 #endif
2663 
2664 static __net_init int devinet_init_net(struct net *net)
2665 {
2666 	int err;
2667 	struct ipv4_devconf *all, *dflt;
2668 #ifdef CONFIG_SYSCTL
2669 	struct ctl_table *tbl;
2670 	struct ctl_table_header *forw_hdr;
2671 #endif
2672 
2673 	err = -ENOMEM;
2674 	all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2675 	if (!all)
2676 		goto err_alloc_all;
2677 
2678 	dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2679 	if (!dflt)
2680 		goto err_alloc_dflt;
2681 
2682 #ifdef CONFIG_SYSCTL
2683 	tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2684 	if (!tbl)
2685 		goto err_alloc_ctl;
2686 
2687 	tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2688 	tbl[0].extra1 = all;
2689 	tbl[0].extra2 = net;
2690 #endif
2691 
2692 	if (!net_eq(net, &init_net)) {
2693 		switch (net_inherit_devconf()) {
2694 		case 3:
2695 			/* copy from the current netns */
2696 			memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
2697 			       sizeof(ipv4_devconf));
2698 			memcpy(dflt,
2699 			       current->nsproxy->net_ns->ipv4.devconf_dflt,
2700 			       sizeof(ipv4_devconf_dflt));
2701 			break;
2702 		case 0:
2703 		case 1:
2704 			/* copy from init_net */
2705 			memcpy(all, init_net.ipv4.devconf_all,
2706 			       sizeof(ipv4_devconf));
2707 			memcpy(dflt, init_net.ipv4.devconf_dflt,
2708 			       sizeof(ipv4_devconf_dflt));
2709 			break;
2710 		case 2:
2711 			/* use compiled values */
2712 			break;
2713 		}
2714 	}
2715 
2716 #ifdef CONFIG_SYSCTL
2717 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2718 	if (err < 0)
2719 		goto err_reg_all;
2720 
2721 	err = __devinet_sysctl_register(net, "default",
2722 					NETCONFA_IFINDEX_DEFAULT, dflt);
2723 	if (err < 0)
2724 		goto err_reg_dflt;
2725 
2726 	err = -ENOMEM;
2727 	forw_hdr = register_net_sysctl_sz(net, "net/ipv4", tbl,
2728 					  ARRAY_SIZE(ctl_forward_entry));
2729 	if (!forw_hdr)
2730 		goto err_reg_ctl;
2731 	net->ipv4.forw_hdr = forw_hdr;
2732 #endif
2733 
2734 	net->ipv4.devconf_all = all;
2735 	net->ipv4.devconf_dflt = dflt;
2736 	return 0;
2737 
2738 #ifdef CONFIG_SYSCTL
2739 err_reg_ctl:
2740 	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2741 err_reg_dflt:
2742 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2743 err_reg_all:
2744 	kfree(tbl);
2745 err_alloc_ctl:
2746 #endif
2747 	kfree(dflt);
2748 err_alloc_dflt:
2749 	kfree(all);
2750 err_alloc_all:
2751 	return err;
2752 }
2753 
2754 static __net_exit void devinet_exit_net(struct net *net)
2755 {
2756 #ifdef CONFIG_SYSCTL
2757 	const struct ctl_table *tbl;
2758 
2759 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2760 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2761 	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2762 				    NETCONFA_IFINDEX_DEFAULT);
2763 	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2764 				    NETCONFA_IFINDEX_ALL);
2765 	kfree(tbl);
2766 #endif
2767 	kfree(net->ipv4.devconf_dflt);
2768 	kfree(net->ipv4.devconf_all);
2769 }
2770 
2771 static __net_initdata struct pernet_operations devinet_ops = {
2772 	.init = devinet_init_net,
2773 	.exit = devinet_exit_net,
2774 };
2775 
2776 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2777 	.family		  = AF_INET,
2778 	.fill_link_af	  = inet_fill_link_af,
2779 	.get_link_af_size = inet_get_link_af_size,
2780 	.validate_link_af = inet_validate_link_af,
2781 	.set_link_af	  = inet_set_link_af,
2782 };
2783 
2784 void __init devinet_init(void)
2785 {
2786 	int i;
2787 
2788 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2789 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2790 
2791 	register_pernet_subsys(&devinet_ops);
2792 	register_netdevice_notifier(&ip_netdev_notifier);
2793 
2794 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2795 
2796 	rtnl_af_register(&inet_af_ops);
2797 
2798 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2799 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2800 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr,
2801 		      RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE);
2802 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2803 		      inet_netconf_dump_devconf,
2804 		      RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED);
2805 }
2806