xref: /linux/net/ipv4/devinet.c (revision 7f71507851fc7764b36a3221839607d3a45c2025)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	NET3	IP device support routines.
4  *
5  *	Derived from the IP parts of dev.c 1.0.19
6  * 		Authors:	Ross Biro
7  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
9  *
10  *	Additional Authors:
11  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
12  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13  *
14  *	Changes:
15  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
16  *					lists.
17  *		Cyrus Durgin:		updated for kmod
18  *		Matthias Andree:	in devinet_ioctl, compare label and
19  *					address (4.4BSD alias style support),
20  *					fall back to comparing just the label
21  *					if no match found.
22  */
23 
24 
25 #include <linux/uaccess.h>
26 #include <linux/bitops.h>
27 #include <linux/capability.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/mm.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/in.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/if_addr.h>
40 #include <linux/if_ether.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/etherdevice.h>
44 #include <linux/skbuff.h>
45 #include <linux/init.h>
46 #include <linux/notifier.h>
47 #include <linux/inetdevice.h>
48 #include <linux/igmp.h>
49 #include <linux/slab.h>
50 #include <linux/hash.h>
51 #ifdef CONFIG_SYSCTL
52 #include <linux/sysctl.h>
53 #endif
54 #include <linux/kmod.h>
55 #include <linux/netconf.h>
56 
57 #include <net/arp.h>
58 #include <net/ip.h>
59 #include <net/route.h>
60 #include <net/ip_fib.h>
61 #include <net/rtnetlink.h>
62 #include <net/net_namespace.h>
63 #include <net/addrconf.h>
64 
65 #define IPV6ONLY_FLAGS	\
66 		(IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
67 		 IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
68 		 IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
69 
70 static struct ipv4_devconf ipv4_devconf = {
71 	.data = {
72 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78 		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
79 	},
80 };
81 
82 static struct ipv4_devconf ipv4_devconf_dflt = {
83 	.data = {
84 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
86 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
87 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
88 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
90 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
91 		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
92 	},
93 };
94 
95 #define IPV4_DEVCONF_DFLT(net, attr) \
96 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
97 
98 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
99 	[IFA_LOCAL]     	= { .type = NLA_U32 },
100 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
101 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
102 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
103 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
104 	[IFA_FLAGS]		= { .type = NLA_U32 },
105 	[IFA_RT_PRIORITY]	= { .type = NLA_U32 },
106 	[IFA_TARGET_NETNSID]	= { .type = NLA_S32 },
107 	[IFA_PROTO]		= { .type = NLA_U8 },
108 };
109 
110 struct inet_fill_args {
111 	u32 portid;
112 	u32 seq;
113 	int event;
114 	unsigned int flags;
115 	int netnsid;
116 	int ifindex;
117 };
118 
119 #define IN4_ADDR_HSIZE_SHIFT	8
120 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
121 
122 static u32 inet_addr_hash(const struct net *net, __be32 addr)
123 {
124 	u32 val = __ipv4_addr_hash(addr, net_hash_mix(net));
125 
126 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
127 }
128 
129 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
130 {
131 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
132 
133 	ASSERT_RTNL();
134 	hlist_add_head_rcu(&ifa->addr_lst, &net->ipv4.inet_addr_lst[hash]);
135 }
136 
137 static void inet_hash_remove(struct in_ifaddr *ifa)
138 {
139 	ASSERT_RTNL();
140 	hlist_del_init_rcu(&ifa->addr_lst);
141 }
142 
143 /**
144  * __ip_dev_find - find the first device with a given source address.
145  * @net: the net namespace
146  * @addr: the source address
147  * @devref: if true, take a reference on the found device
148  *
149  * If a caller uses devref=false, it should be protected by RCU, or RTNL
150  */
151 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
152 {
153 	struct net_device *result = NULL;
154 	struct in_ifaddr *ifa;
155 
156 	rcu_read_lock();
157 	ifa = inet_lookup_ifaddr_rcu(net, addr);
158 	if (!ifa) {
159 		struct flowi4 fl4 = { .daddr = addr };
160 		struct fib_result res = { 0 };
161 		struct fib_table *local;
162 
163 		/* Fallback to FIB local table so that communication
164 		 * over loopback subnets work.
165 		 */
166 		local = fib_get_table(net, RT_TABLE_LOCAL);
167 		if (local &&
168 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
169 		    res.type == RTN_LOCAL)
170 			result = FIB_RES_DEV(res);
171 	} else {
172 		result = ifa->ifa_dev->dev;
173 	}
174 	if (result && devref)
175 		dev_hold(result);
176 	rcu_read_unlock();
177 	return result;
178 }
179 EXPORT_SYMBOL(__ip_dev_find);
180 
181 /* called under RCU lock */
182 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
183 {
184 	u32 hash = inet_addr_hash(net, addr);
185 	struct in_ifaddr *ifa;
186 
187 	hlist_for_each_entry_rcu(ifa, &net->ipv4.inet_addr_lst[hash], addr_lst)
188 		if (ifa->ifa_local == addr)
189 			return ifa;
190 
191 	return NULL;
192 }
193 
194 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
195 
196 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
197 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
198 static void inet_del_ifa(struct in_device *in_dev,
199 			 struct in_ifaddr __rcu **ifap,
200 			 int destroy);
201 #ifdef CONFIG_SYSCTL
202 static int devinet_sysctl_register(struct in_device *idev);
203 static void devinet_sysctl_unregister(struct in_device *idev);
204 #else
205 static int devinet_sysctl_register(struct in_device *idev)
206 {
207 	return 0;
208 }
209 static void devinet_sysctl_unregister(struct in_device *idev)
210 {
211 }
212 #endif
213 
214 /* Locks all the inet devices. */
215 
216 static struct in_ifaddr *inet_alloc_ifa(struct in_device *in_dev)
217 {
218 	struct in_ifaddr *ifa;
219 
220 	ifa = kzalloc(sizeof(*ifa), GFP_KERNEL_ACCOUNT);
221 	if (!ifa)
222 		return NULL;
223 
224 	in_dev_hold(in_dev);
225 	ifa->ifa_dev = in_dev;
226 
227 	INIT_HLIST_NODE(&ifa->addr_lst);
228 
229 	return ifa;
230 }
231 
232 static void inet_rcu_free_ifa(struct rcu_head *head)
233 {
234 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
235 
236 	in_dev_put(ifa->ifa_dev);
237 	kfree(ifa);
238 }
239 
240 static void inet_free_ifa(struct in_ifaddr *ifa)
241 {
242 	/* Our reference to ifa->ifa_dev must be freed ASAP
243 	 * to release the reference to the netdev the same way.
244 	 * in_dev_put() -> in_dev_finish_destroy() -> netdev_put()
245 	 */
246 	call_rcu_hurry(&ifa->rcu_head, inet_rcu_free_ifa);
247 }
248 
249 static void in_dev_free_rcu(struct rcu_head *head)
250 {
251 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
252 
253 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
254 	kfree(idev);
255 }
256 
257 void in_dev_finish_destroy(struct in_device *idev)
258 {
259 	struct net_device *dev = idev->dev;
260 
261 	WARN_ON(idev->ifa_list);
262 	WARN_ON(idev->mc_list);
263 #ifdef NET_REFCNT_DEBUG
264 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
265 #endif
266 	netdev_put(dev, &idev->dev_tracker);
267 	if (!idev->dead)
268 		pr_err("Freeing alive in_device %p\n", idev);
269 	else
270 		call_rcu(&idev->rcu_head, in_dev_free_rcu);
271 }
272 EXPORT_SYMBOL(in_dev_finish_destroy);
273 
274 static struct in_device *inetdev_init(struct net_device *dev)
275 {
276 	struct in_device *in_dev;
277 	int err = -ENOMEM;
278 
279 	ASSERT_RTNL();
280 
281 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
282 	if (!in_dev)
283 		goto out;
284 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
285 			sizeof(in_dev->cnf));
286 	in_dev->cnf.sysctl = NULL;
287 	in_dev->dev = dev;
288 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
289 	if (!in_dev->arp_parms)
290 		goto out_kfree;
291 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
292 		dev_disable_lro(dev);
293 	/* Reference in_dev->dev */
294 	netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
295 	/* Account for reference dev->ip_ptr (below) */
296 	refcount_set(&in_dev->refcnt, 1);
297 
298 	if (dev != blackhole_netdev) {
299 		err = devinet_sysctl_register(in_dev);
300 		if (err) {
301 			in_dev->dead = 1;
302 			neigh_parms_release(&arp_tbl, in_dev->arp_parms);
303 			in_dev_put(in_dev);
304 			in_dev = NULL;
305 			goto out;
306 		}
307 		ip_mc_init_dev(in_dev);
308 		if (dev->flags & IFF_UP)
309 			ip_mc_up(in_dev);
310 	}
311 
312 	/* we can receive as soon as ip_ptr is set -- do this last */
313 	rcu_assign_pointer(dev->ip_ptr, in_dev);
314 out:
315 	return in_dev ?: ERR_PTR(err);
316 out_kfree:
317 	kfree(in_dev);
318 	in_dev = NULL;
319 	goto out;
320 }
321 
322 static void inetdev_destroy(struct in_device *in_dev)
323 {
324 	struct net_device *dev;
325 	struct in_ifaddr *ifa;
326 
327 	ASSERT_RTNL();
328 
329 	dev = in_dev->dev;
330 
331 	in_dev->dead = 1;
332 
333 	ip_mc_destroy_dev(in_dev);
334 
335 	while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
336 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
337 		inet_free_ifa(ifa);
338 	}
339 
340 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
341 
342 	devinet_sysctl_unregister(in_dev);
343 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
344 	arp_ifdown(dev);
345 
346 	in_dev_put(in_dev);
347 }
348 
349 static int __init inet_blackhole_dev_init(void)
350 {
351 	int err = 0;
352 
353 	rtnl_lock();
354 	if (!inetdev_init(blackhole_netdev))
355 		err = -ENOMEM;
356 	rtnl_unlock();
357 
358 	return err;
359 }
360 late_initcall(inet_blackhole_dev_init);
361 
362 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
363 {
364 	const struct in_ifaddr *ifa;
365 
366 	rcu_read_lock();
367 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
368 		if (inet_ifa_match(a, ifa)) {
369 			if (!b || inet_ifa_match(b, ifa)) {
370 				rcu_read_unlock();
371 				return 1;
372 			}
373 		}
374 	}
375 	rcu_read_unlock();
376 	return 0;
377 }
378 
379 static void __inet_del_ifa(struct in_device *in_dev,
380 			   struct in_ifaddr __rcu **ifap,
381 			   int destroy, struct nlmsghdr *nlh, u32 portid)
382 {
383 	struct in_ifaddr *promote = NULL;
384 	struct in_ifaddr *ifa, *ifa1;
385 	struct in_ifaddr __rcu **last_prim;
386 	struct in_ifaddr *prev_prom = NULL;
387 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
388 
389 	ASSERT_RTNL();
390 
391 	ifa1 = rtnl_dereference(*ifap);
392 	last_prim = ifap;
393 	if (in_dev->dead)
394 		goto no_promotions;
395 
396 	/* 1. Deleting primary ifaddr forces deletion all secondaries
397 	 * unless alias promotion is set
398 	 **/
399 
400 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
401 		struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
402 
403 		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
404 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
405 			    ifa1->ifa_scope <= ifa->ifa_scope)
406 				last_prim = &ifa->ifa_next;
407 
408 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
409 			    ifa1->ifa_mask != ifa->ifa_mask ||
410 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
411 				ifap1 = &ifa->ifa_next;
412 				prev_prom = ifa;
413 				continue;
414 			}
415 
416 			if (!do_promote) {
417 				inet_hash_remove(ifa);
418 				*ifap1 = ifa->ifa_next;
419 
420 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
421 				blocking_notifier_call_chain(&inetaddr_chain,
422 						NETDEV_DOWN, ifa);
423 				inet_free_ifa(ifa);
424 			} else {
425 				promote = ifa;
426 				break;
427 			}
428 		}
429 	}
430 
431 	/* On promotion all secondaries from subnet are changing
432 	 * the primary IP, we must remove all their routes silently
433 	 * and later to add them back with new prefsrc. Do this
434 	 * while all addresses are on the device list.
435 	 */
436 	for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
437 		if (ifa1->ifa_mask == ifa->ifa_mask &&
438 		    inet_ifa_match(ifa1->ifa_address, ifa))
439 			fib_del_ifaddr(ifa, ifa1);
440 	}
441 
442 no_promotions:
443 	/* 2. Unlink it */
444 
445 	*ifap = ifa1->ifa_next;
446 	inet_hash_remove(ifa1);
447 
448 	/* 3. Announce address deletion */
449 
450 	/* Send message first, then call notifier.
451 	   At first sight, FIB update triggered by notifier
452 	   will refer to already deleted ifaddr, that could confuse
453 	   netlink listeners. It is not true: look, gated sees
454 	   that route deleted and if it still thinks that ifaddr
455 	   is valid, it will try to restore deleted routes... Grr.
456 	   So that, this order is correct.
457 	 */
458 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
459 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
460 
461 	if (promote) {
462 		struct in_ifaddr *next_sec;
463 
464 		next_sec = rtnl_dereference(promote->ifa_next);
465 		if (prev_prom) {
466 			struct in_ifaddr *last_sec;
467 
468 			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
469 
470 			last_sec = rtnl_dereference(*last_prim);
471 			rcu_assign_pointer(promote->ifa_next, last_sec);
472 			rcu_assign_pointer(*last_prim, promote);
473 		}
474 
475 		promote->ifa_flags &= ~IFA_F_SECONDARY;
476 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
477 		blocking_notifier_call_chain(&inetaddr_chain,
478 				NETDEV_UP, promote);
479 		for (ifa = next_sec; ifa;
480 		     ifa = rtnl_dereference(ifa->ifa_next)) {
481 			if (ifa1->ifa_mask != ifa->ifa_mask ||
482 			    !inet_ifa_match(ifa1->ifa_address, ifa))
483 					continue;
484 			fib_add_ifaddr(ifa);
485 		}
486 
487 	}
488 	if (destroy)
489 		inet_free_ifa(ifa1);
490 }
491 
492 static void inet_del_ifa(struct in_device *in_dev,
493 			 struct in_ifaddr __rcu **ifap,
494 			 int destroy)
495 {
496 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
497 }
498 
499 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
500 			     u32 portid, struct netlink_ext_ack *extack)
501 {
502 	struct in_ifaddr __rcu **last_primary, **ifap;
503 	struct in_device *in_dev = ifa->ifa_dev;
504 	struct net *net = dev_net(in_dev->dev);
505 	struct in_validator_info ivi;
506 	struct in_ifaddr *ifa1;
507 	int ret;
508 
509 	ASSERT_RTNL();
510 
511 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
512 	last_primary = &in_dev->ifa_list;
513 
514 	/* Don't set IPv6 only flags to IPv4 addresses */
515 	ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
516 
517 	ifap = &in_dev->ifa_list;
518 	ifa1 = rtnl_dereference(*ifap);
519 
520 	while (ifa1) {
521 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
522 		    ifa->ifa_scope <= ifa1->ifa_scope)
523 			last_primary = &ifa1->ifa_next;
524 		if (ifa1->ifa_mask == ifa->ifa_mask &&
525 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
526 			if (ifa1->ifa_local == ifa->ifa_local) {
527 				inet_free_ifa(ifa);
528 				return -EEXIST;
529 			}
530 			if (ifa1->ifa_scope != ifa->ifa_scope) {
531 				NL_SET_ERR_MSG(extack, "ipv4: Invalid scope value");
532 				inet_free_ifa(ifa);
533 				return -EINVAL;
534 			}
535 			ifa->ifa_flags |= IFA_F_SECONDARY;
536 		}
537 
538 		ifap = &ifa1->ifa_next;
539 		ifa1 = rtnl_dereference(*ifap);
540 	}
541 
542 	/* Allow any devices that wish to register ifaddr validtors to weigh
543 	 * in now, before changes are committed.  The rntl lock is serializing
544 	 * access here, so the state should not change between a validator call
545 	 * and a final notify on commit.  This isn't invoked on promotion under
546 	 * the assumption that validators are checking the address itself, and
547 	 * not the flags.
548 	 */
549 	ivi.ivi_addr = ifa->ifa_address;
550 	ivi.ivi_dev = ifa->ifa_dev;
551 	ivi.extack = extack;
552 	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
553 					   NETDEV_UP, &ivi);
554 	ret = notifier_to_errno(ret);
555 	if (ret) {
556 		inet_free_ifa(ifa);
557 		return ret;
558 	}
559 
560 	if (!(ifa->ifa_flags & IFA_F_SECONDARY))
561 		ifap = last_primary;
562 
563 	rcu_assign_pointer(ifa->ifa_next, *ifap);
564 	rcu_assign_pointer(*ifap, ifa);
565 
566 	inet_hash_insert(dev_net(in_dev->dev), ifa);
567 
568 	cancel_delayed_work(&net->ipv4.addr_chk_work);
569 	queue_delayed_work(system_power_efficient_wq, &net->ipv4.addr_chk_work, 0);
570 
571 	/* Send message first, then call notifier.
572 	   Notifier will trigger FIB update, so that
573 	   listeners of netlink will know about new ifaddr */
574 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
575 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
576 
577 	return 0;
578 }
579 
580 static int inet_insert_ifa(struct in_ifaddr *ifa)
581 {
582 	if (!ifa->ifa_local) {
583 		inet_free_ifa(ifa);
584 		return 0;
585 	}
586 
587 	return __inet_insert_ifa(ifa, NULL, 0, NULL);
588 }
589 
590 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
591 {
592 	struct in_device *in_dev = __in_dev_get_rtnl_net(dev);
593 
594 	ipv4_devconf_setall(in_dev);
595 	neigh_parms_data_state_setall(in_dev->arp_parms);
596 
597 	if (ipv4_is_loopback(ifa->ifa_local))
598 		ifa->ifa_scope = RT_SCOPE_HOST;
599 	return inet_insert_ifa(ifa);
600 }
601 
602 /* Caller must hold RCU or RTNL :
603  * We dont take a reference on found in_device
604  */
605 struct in_device *inetdev_by_index(struct net *net, int ifindex)
606 {
607 	struct net_device *dev;
608 	struct in_device *in_dev = NULL;
609 
610 	rcu_read_lock();
611 	dev = dev_get_by_index_rcu(net, ifindex);
612 	if (dev)
613 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
614 	rcu_read_unlock();
615 	return in_dev;
616 }
617 EXPORT_SYMBOL(inetdev_by_index);
618 
619 /* Called only from RTNL semaphored context. No locks. */
620 
621 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
622 				    __be32 mask)
623 {
624 	struct in_ifaddr *ifa;
625 
626 	ASSERT_RTNL();
627 
628 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
629 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
630 			return ifa;
631 	}
632 	return NULL;
633 }
634 
635 static int ip_mc_autojoin_config(struct net *net, bool join,
636 				 const struct in_ifaddr *ifa)
637 {
638 #if defined(CONFIG_IP_MULTICAST)
639 	struct ip_mreqn mreq = {
640 		.imr_multiaddr.s_addr = ifa->ifa_address,
641 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
642 	};
643 	struct sock *sk = net->ipv4.mc_autojoin_sk;
644 	int ret;
645 
646 	ASSERT_RTNL_NET(net);
647 
648 	lock_sock(sk);
649 	if (join)
650 		ret = ip_mc_join_group(sk, &mreq);
651 	else
652 		ret = ip_mc_leave_group(sk, &mreq);
653 	release_sock(sk);
654 
655 	return ret;
656 #else
657 	return -EOPNOTSUPP;
658 #endif
659 }
660 
661 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
662 			    struct netlink_ext_ack *extack)
663 {
664 	struct net *net = sock_net(skb->sk);
665 	struct in_ifaddr __rcu **ifap;
666 	struct nlattr *tb[IFA_MAX+1];
667 	struct in_device *in_dev;
668 	struct ifaddrmsg *ifm;
669 	struct in_ifaddr *ifa;
670 	int err;
671 
672 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
673 				     ifa_ipv4_policy, extack);
674 	if (err < 0)
675 		goto out;
676 
677 	ifm = nlmsg_data(nlh);
678 
679 	rtnl_net_lock(net);
680 
681 	in_dev = inetdev_by_index(net, ifm->ifa_index);
682 	if (!in_dev) {
683 		NL_SET_ERR_MSG(extack, "ipv4: Device not found");
684 		err = -ENODEV;
685 		goto unlock;
686 	}
687 
688 	for (ifap = &in_dev->ifa_list;
689 	     (ifa = rtnl_net_dereference(net, *ifap)) != NULL;
690 	     ifap = &ifa->ifa_next) {
691 		if (tb[IFA_LOCAL] &&
692 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
693 			continue;
694 
695 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
696 			continue;
697 
698 		if (tb[IFA_ADDRESS] &&
699 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
700 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
701 			continue;
702 
703 		if (ipv4_is_multicast(ifa->ifa_address))
704 			ip_mc_autojoin_config(net, false, ifa);
705 
706 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
707 		goto unlock;
708 	}
709 
710 	NL_SET_ERR_MSG(extack, "ipv4: Address not found");
711 	err = -EADDRNOTAVAIL;
712 unlock:
713 	rtnl_net_unlock(net);
714 out:
715 	return err;
716 }
717 
718 static void check_lifetime(struct work_struct *work)
719 {
720 	unsigned long now, next, next_sec, next_sched;
721 	struct in_ifaddr *ifa;
722 	struct hlist_node *n;
723 	struct net *net;
724 	int i;
725 
726 	net = container_of(to_delayed_work(work), struct net, ipv4.addr_chk_work);
727 	now = jiffies;
728 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
729 
730 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
731 		struct hlist_head *head = &net->ipv4.inet_addr_lst[i];
732 		bool change_needed = false;
733 
734 		rcu_read_lock();
735 		hlist_for_each_entry_rcu(ifa, head, addr_lst) {
736 			unsigned long age, tstamp;
737 			u32 preferred_lft;
738 			u32 valid_lft;
739 			u32 flags;
740 
741 			flags = READ_ONCE(ifa->ifa_flags);
742 			if (flags & IFA_F_PERMANENT)
743 				continue;
744 
745 			preferred_lft = READ_ONCE(ifa->ifa_preferred_lft);
746 			valid_lft = READ_ONCE(ifa->ifa_valid_lft);
747 			tstamp = READ_ONCE(ifa->ifa_tstamp);
748 			/* We try to batch several events at once. */
749 			age = (now - tstamp +
750 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
751 
752 			if (valid_lft != INFINITY_LIFE_TIME &&
753 			    age >= valid_lft) {
754 				change_needed = true;
755 			} else if (preferred_lft ==
756 				   INFINITY_LIFE_TIME) {
757 				continue;
758 			} else if (age >= preferred_lft) {
759 				if (time_before(tstamp + valid_lft * HZ, next))
760 					next = tstamp + valid_lft * HZ;
761 
762 				if (!(flags & IFA_F_DEPRECATED))
763 					change_needed = true;
764 			} else if (time_before(tstamp + preferred_lft * HZ,
765 					       next)) {
766 				next = tstamp + preferred_lft * HZ;
767 			}
768 		}
769 		rcu_read_unlock();
770 		if (!change_needed)
771 			continue;
772 
773 		rtnl_net_lock(net);
774 		hlist_for_each_entry_safe(ifa, n, head, addr_lst) {
775 			unsigned long age;
776 
777 			if (ifa->ifa_flags & IFA_F_PERMANENT)
778 				continue;
779 
780 			/* We try to batch several events at once. */
781 			age = (now - ifa->ifa_tstamp +
782 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
783 
784 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
785 			    age >= ifa->ifa_valid_lft) {
786 				struct in_ifaddr __rcu **ifap;
787 				struct in_ifaddr *tmp;
788 
789 				ifap = &ifa->ifa_dev->ifa_list;
790 				tmp = rtnl_net_dereference(net, *ifap);
791 				while (tmp) {
792 					if (tmp == ifa) {
793 						inet_del_ifa(ifa->ifa_dev,
794 							     ifap, 1);
795 						break;
796 					}
797 					ifap = &tmp->ifa_next;
798 					tmp = rtnl_net_dereference(net, *ifap);
799 				}
800 			} else if (ifa->ifa_preferred_lft !=
801 				   INFINITY_LIFE_TIME &&
802 				   age >= ifa->ifa_preferred_lft &&
803 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
804 				ifa->ifa_flags |= IFA_F_DEPRECATED;
805 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
806 			}
807 		}
808 		rtnl_net_unlock(net);
809 	}
810 
811 	next_sec = round_jiffies_up(next);
812 	next_sched = next;
813 
814 	/* If rounded timeout is accurate enough, accept it. */
815 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
816 		next_sched = next_sec;
817 
818 	now = jiffies;
819 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
820 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
821 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
822 
823 	queue_delayed_work(system_power_efficient_wq, &net->ipv4.addr_chk_work,
824 			   next_sched - now);
825 }
826 
827 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
828 			     __u32 prefered_lft)
829 {
830 	unsigned long timeout;
831 	u32 flags;
832 
833 	flags = ifa->ifa_flags & ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
834 
835 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
836 	if (addrconf_finite_timeout(timeout))
837 		WRITE_ONCE(ifa->ifa_valid_lft, timeout);
838 	else
839 		flags |= IFA_F_PERMANENT;
840 
841 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
842 	if (addrconf_finite_timeout(timeout)) {
843 		if (timeout == 0)
844 			flags |= IFA_F_DEPRECATED;
845 		WRITE_ONCE(ifa->ifa_preferred_lft, timeout);
846 	}
847 	WRITE_ONCE(ifa->ifa_flags, flags);
848 	WRITE_ONCE(ifa->ifa_tstamp, jiffies);
849 	if (!ifa->ifa_cstamp)
850 		WRITE_ONCE(ifa->ifa_cstamp, ifa->ifa_tstamp);
851 }
852 
853 static int inet_validate_rtm(struct nlmsghdr *nlh, struct nlattr **tb,
854 			     struct netlink_ext_ack *extack,
855 			     __u32 *valid_lft, __u32 *prefered_lft)
856 {
857 	struct ifaddrmsg *ifm = nlmsg_data(nlh);
858 	int err;
859 
860 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
861 				     ifa_ipv4_policy, extack);
862 	if (err < 0)
863 		return err;
864 
865 	if (ifm->ifa_prefixlen > 32) {
866 		NL_SET_ERR_MSG(extack, "ipv4: Invalid prefix length");
867 		return -EINVAL;
868 	}
869 
870 	if (!tb[IFA_LOCAL]) {
871 		NL_SET_ERR_MSG(extack, "ipv4: Local address is not supplied");
872 		return -EINVAL;
873 	}
874 
875 	if (tb[IFA_CACHEINFO]) {
876 		struct ifa_cacheinfo *ci;
877 
878 		ci = nla_data(tb[IFA_CACHEINFO]);
879 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
880 			NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid");
881 			return -EINVAL;
882 		}
883 
884 		*valid_lft = ci->ifa_valid;
885 		*prefered_lft = ci->ifa_prefered;
886 	}
887 
888 	return 0;
889 }
890 
891 static struct in_ifaddr *inet_rtm_to_ifa(struct net *net, struct nlmsghdr *nlh,
892 					 struct nlattr **tb,
893 					 struct netlink_ext_ack *extack)
894 {
895 	struct ifaddrmsg *ifm = nlmsg_data(nlh);
896 	struct in_device *in_dev;
897 	struct net_device *dev;
898 	struct in_ifaddr *ifa;
899 	int err;
900 
901 	dev = __dev_get_by_index(net, ifm->ifa_index);
902 	err = -ENODEV;
903 	if (!dev) {
904 		NL_SET_ERR_MSG(extack, "ipv4: Device not found");
905 		goto errout;
906 	}
907 
908 	in_dev = __in_dev_get_rtnl_net(dev);
909 	err = -ENOBUFS;
910 	if (!in_dev)
911 		goto errout;
912 
913 	ifa = inet_alloc_ifa(in_dev);
914 	if (!ifa)
915 		/*
916 		 * A potential indev allocation can be left alive, it stays
917 		 * assigned to its device and is destroy with it.
918 		 */
919 		goto errout;
920 
921 	ipv4_devconf_setall(in_dev);
922 	neigh_parms_data_state_setall(in_dev->arp_parms);
923 
924 	if (!tb[IFA_ADDRESS])
925 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
926 
927 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
928 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
929 	ifa->ifa_flags = nla_get_u32_default(tb[IFA_FLAGS], ifm->ifa_flags);
930 	ifa->ifa_scope = ifm->ifa_scope;
931 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
932 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
933 
934 	if (tb[IFA_BROADCAST])
935 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
936 
937 	if (tb[IFA_LABEL])
938 		nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
939 	else
940 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
941 
942 	if (tb[IFA_RT_PRIORITY])
943 		ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
944 
945 	if (tb[IFA_PROTO])
946 		ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
947 
948 	return ifa;
949 
950 errout:
951 	return ERR_PTR(err);
952 }
953 
954 static struct in_ifaddr *find_matching_ifa(struct net *net, struct in_ifaddr *ifa)
955 {
956 	struct in_device *in_dev = ifa->ifa_dev;
957 	struct in_ifaddr *ifa1;
958 
959 	in_dev_for_each_ifa_rtnl_net(net, ifa1, in_dev) {
960 		if (ifa1->ifa_mask == ifa->ifa_mask &&
961 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
962 		    ifa1->ifa_local == ifa->ifa_local)
963 			return ifa1;
964 	}
965 
966 	return NULL;
967 }
968 
969 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
970 			    struct netlink_ext_ack *extack)
971 {
972 	__u32 prefered_lft = INFINITY_LIFE_TIME;
973 	__u32 valid_lft = INFINITY_LIFE_TIME;
974 	struct net *net = sock_net(skb->sk);
975 	struct in_ifaddr *ifa_existing;
976 	struct nlattr *tb[IFA_MAX + 1];
977 	struct in_ifaddr *ifa;
978 	int ret;
979 
980 	ret = inet_validate_rtm(nlh, tb, extack, &valid_lft, &prefered_lft);
981 	if (ret < 0)
982 		return ret;
983 
984 	if (!nla_get_in_addr(tb[IFA_LOCAL]))
985 		return 0;
986 
987 	rtnl_net_lock(net);
988 
989 	ifa = inet_rtm_to_ifa(net, nlh, tb, extack);
990 	if (IS_ERR(ifa)) {
991 		ret = PTR_ERR(ifa);
992 		goto unlock;
993 	}
994 
995 	ifa_existing = find_matching_ifa(net, ifa);
996 	if (!ifa_existing) {
997 		/* It would be best to check for !NLM_F_CREATE here but
998 		 * userspace already relies on not having to provide this.
999 		 */
1000 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
1001 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
1002 			ret = ip_mc_autojoin_config(net, true, ifa);
1003 			if (ret < 0) {
1004 				NL_SET_ERR_MSG(extack, "ipv4: Multicast auto join failed");
1005 				inet_free_ifa(ifa);
1006 				goto unlock;
1007 			}
1008 		}
1009 
1010 		ret = __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid, extack);
1011 	} else {
1012 		u32 new_metric = ifa->ifa_rt_priority;
1013 		u8 new_proto = ifa->ifa_proto;
1014 
1015 		inet_free_ifa(ifa);
1016 
1017 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
1018 		    !(nlh->nlmsg_flags & NLM_F_REPLACE)) {
1019 			NL_SET_ERR_MSG(extack, "ipv4: Address already assigned");
1020 			ret = -EEXIST;
1021 			goto unlock;
1022 		}
1023 		ifa = ifa_existing;
1024 
1025 		if (ifa->ifa_rt_priority != new_metric) {
1026 			fib_modify_prefix_metric(ifa, new_metric);
1027 			ifa->ifa_rt_priority = new_metric;
1028 		}
1029 
1030 		ifa->ifa_proto = new_proto;
1031 
1032 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
1033 		cancel_delayed_work(&net->ipv4.addr_chk_work);
1034 		queue_delayed_work(system_power_efficient_wq,
1035 				   &net->ipv4.addr_chk_work, 0);
1036 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
1037 	}
1038 
1039 unlock:
1040 	rtnl_net_unlock(net);
1041 
1042 	return ret;
1043 }
1044 
1045 /*
1046  *	Determine a default network mask, based on the IP address.
1047  */
1048 
1049 static int inet_abc_len(__be32 addr)
1050 {
1051 	int rc = -1;	/* Something else, probably a multicast. */
1052 
1053 	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
1054 		rc = 0;
1055 	else {
1056 		__u32 haddr = ntohl(addr);
1057 		if (IN_CLASSA(haddr))
1058 			rc = 8;
1059 		else if (IN_CLASSB(haddr))
1060 			rc = 16;
1061 		else if (IN_CLASSC(haddr))
1062 			rc = 24;
1063 		else if (IN_CLASSE(haddr))
1064 			rc = 32;
1065 	}
1066 
1067 	return rc;
1068 }
1069 
1070 
1071 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1072 {
1073 	struct sockaddr_in sin_orig;
1074 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1075 	struct in_ifaddr __rcu **ifap = NULL;
1076 	struct in_device *in_dev;
1077 	struct in_ifaddr *ifa = NULL;
1078 	struct net_device *dev;
1079 	char *colon;
1080 	int ret = -EFAULT;
1081 	int tryaddrmatch = 0;
1082 
1083 	ifr->ifr_name[IFNAMSIZ - 1] = 0;
1084 
1085 	/* save original address for comparison */
1086 	memcpy(&sin_orig, sin, sizeof(*sin));
1087 
1088 	colon = strchr(ifr->ifr_name, ':');
1089 	if (colon)
1090 		*colon = 0;
1091 
1092 	dev_load(net, ifr->ifr_name);
1093 
1094 	switch (cmd) {
1095 	case SIOCGIFADDR:	/* Get interface address */
1096 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1097 	case SIOCGIFDSTADDR:	/* Get the destination address */
1098 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1099 		/* Note that these ioctls will not sleep,
1100 		   so that we do not impose a lock.
1101 		   One day we will be forced to put shlock here (I mean SMP)
1102 		 */
1103 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
1104 		memset(sin, 0, sizeof(*sin));
1105 		sin->sin_family = AF_INET;
1106 		break;
1107 
1108 	case SIOCSIFFLAGS:
1109 		ret = -EPERM;
1110 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1111 			goto out;
1112 		break;
1113 	case SIOCSIFADDR:	/* Set interface address (and family) */
1114 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1115 	case SIOCSIFDSTADDR:	/* Set the destination address */
1116 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1117 		ret = -EPERM;
1118 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1119 			goto out;
1120 		ret = -EINVAL;
1121 		if (sin->sin_family != AF_INET)
1122 			goto out;
1123 		break;
1124 	default:
1125 		ret = -EINVAL;
1126 		goto out;
1127 	}
1128 
1129 	rtnl_net_lock(net);
1130 
1131 	ret = -ENODEV;
1132 	dev = __dev_get_by_name(net, ifr->ifr_name);
1133 	if (!dev)
1134 		goto done;
1135 
1136 	if (colon)
1137 		*colon = ':';
1138 
1139 	in_dev = __in_dev_get_rtnl_net(dev);
1140 	if (in_dev) {
1141 		if (tryaddrmatch) {
1142 			/* Matthias Andree */
1143 			/* compare label and address (4.4BSD style) */
1144 			/* note: we only do this for a limited set of ioctls
1145 			   and only if the original address family was AF_INET.
1146 			   This is checked above. */
1147 
1148 			for (ifap = &in_dev->ifa_list;
1149 			     (ifa = rtnl_net_dereference(net, *ifap)) != NULL;
1150 			     ifap = &ifa->ifa_next) {
1151 				if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1152 				    sin_orig.sin_addr.s_addr ==
1153 							ifa->ifa_local) {
1154 					break; /* found */
1155 				}
1156 			}
1157 		}
1158 		/* we didn't get a match, maybe the application is
1159 		   4.3BSD-style and passed in junk so we fall back to
1160 		   comparing just the label */
1161 		if (!ifa) {
1162 			for (ifap = &in_dev->ifa_list;
1163 			     (ifa = rtnl_net_dereference(net, *ifap)) != NULL;
1164 			     ifap = &ifa->ifa_next)
1165 				if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1166 					break;
1167 		}
1168 	}
1169 
1170 	ret = -EADDRNOTAVAIL;
1171 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1172 		goto done;
1173 
1174 	switch (cmd) {
1175 	case SIOCGIFADDR:	/* Get interface address */
1176 		ret = 0;
1177 		sin->sin_addr.s_addr = ifa->ifa_local;
1178 		break;
1179 
1180 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1181 		ret = 0;
1182 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1183 		break;
1184 
1185 	case SIOCGIFDSTADDR:	/* Get the destination address */
1186 		ret = 0;
1187 		sin->sin_addr.s_addr = ifa->ifa_address;
1188 		break;
1189 
1190 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1191 		ret = 0;
1192 		sin->sin_addr.s_addr = ifa->ifa_mask;
1193 		break;
1194 
1195 	case SIOCSIFFLAGS:
1196 		if (colon) {
1197 			ret = -EADDRNOTAVAIL;
1198 			if (!ifa)
1199 				break;
1200 			ret = 0;
1201 			if (!(ifr->ifr_flags & IFF_UP))
1202 				inet_del_ifa(in_dev, ifap, 1);
1203 			break;
1204 		}
1205 
1206 		/* NETDEV_UP/DOWN/CHANGE could touch a peer dev */
1207 		ASSERT_RTNL();
1208 		ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1209 		break;
1210 
1211 	case SIOCSIFADDR:	/* Set interface address (and family) */
1212 		ret = -EINVAL;
1213 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1214 			break;
1215 
1216 		if (!ifa) {
1217 			ret = -ENOBUFS;
1218 			if (!in_dev)
1219 				break;
1220 			ifa = inet_alloc_ifa(in_dev);
1221 			if (!ifa)
1222 				break;
1223 
1224 			if (colon)
1225 				memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1226 			else
1227 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1228 		} else {
1229 			ret = 0;
1230 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1231 				break;
1232 			inet_del_ifa(in_dev, ifap, 0);
1233 			ifa->ifa_broadcast = 0;
1234 			ifa->ifa_scope = 0;
1235 		}
1236 
1237 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1238 
1239 		if (!(dev->flags & IFF_POINTOPOINT)) {
1240 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1241 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1242 			if ((dev->flags & IFF_BROADCAST) &&
1243 			    ifa->ifa_prefixlen < 31)
1244 				ifa->ifa_broadcast = ifa->ifa_address |
1245 						     ~ifa->ifa_mask;
1246 		} else {
1247 			ifa->ifa_prefixlen = 32;
1248 			ifa->ifa_mask = inet_make_mask(32);
1249 		}
1250 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1251 		ret = inet_set_ifa(dev, ifa);
1252 		break;
1253 
1254 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1255 		ret = 0;
1256 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1257 			inet_del_ifa(in_dev, ifap, 0);
1258 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1259 			inet_insert_ifa(ifa);
1260 		}
1261 		break;
1262 
1263 	case SIOCSIFDSTADDR:	/* Set the destination address */
1264 		ret = 0;
1265 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1266 			break;
1267 		ret = -EINVAL;
1268 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1269 			break;
1270 		ret = 0;
1271 		inet_del_ifa(in_dev, ifap, 0);
1272 		ifa->ifa_address = sin->sin_addr.s_addr;
1273 		inet_insert_ifa(ifa);
1274 		break;
1275 
1276 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1277 
1278 		/*
1279 		 *	The mask we set must be legal.
1280 		 */
1281 		ret = -EINVAL;
1282 		if (bad_mask(sin->sin_addr.s_addr, 0))
1283 			break;
1284 		ret = 0;
1285 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1286 			__be32 old_mask = ifa->ifa_mask;
1287 			inet_del_ifa(in_dev, ifap, 0);
1288 			ifa->ifa_mask = sin->sin_addr.s_addr;
1289 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1290 
1291 			/* See if current broadcast address matches
1292 			 * with current netmask, then recalculate
1293 			 * the broadcast address. Otherwise it's a
1294 			 * funny address, so don't touch it since
1295 			 * the user seems to know what (s)he's doing...
1296 			 */
1297 			if ((dev->flags & IFF_BROADCAST) &&
1298 			    (ifa->ifa_prefixlen < 31) &&
1299 			    (ifa->ifa_broadcast ==
1300 			     (ifa->ifa_local|~old_mask))) {
1301 				ifa->ifa_broadcast = (ifa->ifa_local |
1302 						      ~sin->sin_addr.s_addr);
1303 			}
1304 			inet_insert_ifa(ifa);
1305 		}
1306 		break;
1307 	}
1308 done:
1309 	rtnl_net_unlock(net);
1310 out:
1311 	return ret;
1312 }
1313 
1314 int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1315 {
1316 	struct in_device *in_dev = __in_dev_get_rtnl_net(dev);
1317 	const struct in_ifaddr *ifa;
1318 	struct ifreq ifr;
1319 	int done = 0;
1320 
1321 	if (WARN_ON(size > sizeof(struct ifreq)))
1322 		goto out;
1323 
1324 	if (!in_dev)
1325 		goto out;
1326 
1327 	in_dev_for_each_ifa_rtnl_net(dev_net(dev), ifa, in_dev) {
1328 		if (!buf) {
1329 			done += size;
1330 			continue;
1331 		}
1332 		if (len < size)
1333 			break;
1334 		memset(&ifr, 0, sizeof(struct ifreq));
1335 		strcpy(ifr.ifr_name, ifa->ifa_label);
1336 
1337 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1338 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1339 								ifa->ifa_local;
1340 
1341 		if (copy_to_user(buf + done, &ifr, size)) {
1342 			done = -EFAULT;
1343 			break;
1344 		}
1345 		len  -= size;
1346 		done += size;
1347 	}
1348 out:
1349 	return done;
1350 }
1351 
1352 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1353 				 int scope)
1354 {
1355 	const struct in_ifaddr *ifa;
1356 
1357 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1358 		if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
1359 			continue;
1360 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1361 		    ifa->ifa_scope <= scope)
1362 			return ifa->ifa_local;
1363 	}
1364 
1365 	return 0;
1366 }
1367 
1368 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1369 {
1370 	const struct in_ifaddr *ifa;
1371 	__be32 addr = 0;
1372 	unsigned char localnet_scope = RT_SCOPE_HOST;
1373 	struct in_device *in_dev;
1374 	struct net *net = dev_net(dev);
1375 	int master_idx;
1376 
1377 	rcu_read_lock();
1378 	in_dev = __in_dev_get_rcu(dev);
1379 	if (!in_dev)
1380 		goto no_in_dev;
1381 
1382 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1383 		localnet_scope = RT_SCOPE_LINK;
1384 
1385 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1386 		if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
1387 			continue;
1388 		if (min(ifa->ifa_scope, localnet_scope) > scope)
1389 			continue;
1390 		if (!dst || inet_ifa_match(dst, ifa)) {
1391 			addr = ifa->ifa_local;
1392 			break;
1393 		}
1394 		if (!addr)
1395 			addr = ifa->ifa_local;
1396 	}
1397 
1398 	if (addr)
1399 		goto out_unlock;
1400 no_in_dev:
1401 	master_idx = l3mdev_master_ifindex_rcu(dev);
1402 
1403 	/* For VRFs, the VRF device takes the place of the loopback device,
1404 	 * with addresses on it being preferred.  Note in such cases the
1405 	 * loopback device will be among the devices that fail the master_idx
1406 	 * equality check in the loop below.
1407 	 */
1408 	if (master_idx &&
1409 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1410 	    (in_dev = __in_dev_get_rcu(dev))) {
1411 		addr = in_dev_select_addr(in_dev, scope);
1412 		if (addr)
1413 			goto out_unlock;
1414 	}
1415 
1416 	/* Not loopback addresses on loopback should be preferred
1417 	   in this case. It is important that lo is the first interface
1418 	   in dev_base list.
1419 	 */
1420 	for_each_netdev_rcu(net, dev) {
1421 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1422 			continue;
1423 
1424 		in_dev = __in_dev_get_rcu(dev);
1425 		if (!in_dev)
1426 			continue;
1427 
1428 		addr = in_dev_select_addr(in_dev, scope);
1429 		if (addr)
1430 			goto out_unlock;
1431 	}
1432 out_unlock:
1433 	rcu_read_unlock();
1434 	return addr;
1435 }
1436 EXPORT_SYMBOL(inet_select_addr);
1437 
1438 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1439 			      __be32 local, int scope)
1440 {
1441 	unsigned char localnet_scope = RT_SCOPE_HOST;
1442 	const struct in_ifaddr *ifa;
1443 	__be32 addr = 0;
1444 	int same = 0;
1445 
1446 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1447 		localnet_scope = RT_SCOPE_LINK;
1448 
1449 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1450 		unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1451 
1452 		if (!addr &&
1453 		    (local == ifa->ifa_local || !local) &&
1454 		    min_scope <= scope) {
1455 			addr = ifa->ifa_local;
1456 			if (same)
1457 				break;
1458 		}
1459 		if (!same) {
1460 			same = (!local || inet_ifa_match(local, ifa)) &&
1461 				(!dst || inet_ifa_match(dst, ifa));
1462 			if (same && addr) {
1463 				if (local || !dst)
1464 					break;
1465 				/* Is the selected addr into dst subnet? */
1466 				if (inet_ifa_match(addr, ifa))
1467 					break;
1468 				/* No, then can we use new local src? */
1469 				if (min_scope <= scope) {
1470 					addr = ifa->ifa_local;
1471 					break;
1472 				}
1473 				/* search for large dst subnet for addr */
1474 				same = 0;
1475 			}
1476 		}
1477 	}
1478 
1479 	return same ? addr : 0;
1480 }
1481 
1482 /*
1483  * Confirm that local IP address exists using wildcards:
1484  * - net: netns to check, cannot be NULL
1485  * - in_dev: only on this interface, NULL=any interface
1486  * - dst: only in the same subnet as dst, 0=any dst
1487  * - local: address, 0=autoselect the local address
1488  * - scope: maximum allowed scope value for the local address
1489  */
1490 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1491 			 __be32 dst, __be32 local, int scope)
1492 {
1493 	__be32 addr = 0;
1494 	struct net_device *dev;
1495 
1496 	if (in_dev)
1497 		return confirm_addr_indev(in_dev, dst, local, scope);
1498 
1499 	rcu_read_lock();
1500 	for_each_netdev_rcu(net, dev) {
1501 		in_dev = __in_dev_get_rcu(dev);
1502 		if (in_dev) {
1503 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1504 			if (addr)
1505 				break;
1506 		}
1507 	}
1508 	rcu_read_unlock();
1509 
1510 	return addr;
1511 }
1512 EXPORT_SYMBOL(inet_confirm_addr);
1513 
1514 /*
1515  *	Device notifier
1516  */
1517 
1518 int register_inetaddr_notifier(struct notifier_block *nb)
1519 {
1520 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1521 }
1522 EXPORT_SYMBOL(register_inetaddr_notifier);
1523 
1524 int unregister_inetaddr_notifier(struct notifier_block *nb)
1525 {
1526 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1527 }
1528 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1529 
1530 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1531 {
1532 	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1533 }
1534 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1535 
1536 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1537 {
1538 	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1539 	    nb);
1540 }
1541 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1542 
1543 /* Rename ifa_labels for a device name change. Make some effort to preserve
1544  * existing alias numbering and to create unique labels if possible.
1545 */
1546 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1547 {
1548 	struct in_ifaddr *ifa;
1549 	int named = 0;
1550 
1551 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1552 		char old[IFNAMSIZ], *dot;
1553 
1554 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1555 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1556 		if (named++ == 0)
1557 			goto skip;
1558 		dot = strchr(old, ':');
1559 		if (!dot) {
1560 			sprintf(old, ":%d", named);
1561 			dot = old;
1562 		}
1563 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1564 			strcat(ifa->ifa_label, dot);
1565 		else
1566 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1567 skip:
1568 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1569 	}
1570 }
1571 
1572 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1573 					struct in_device *in_dev)
1574 
1575 {
1576 	const struct in_ifaddr *ifa;
1577 
1578 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1579 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1580 			 ifa->ifa_local, dev,
1581 			 ifa->ifa_local, NULL,
1582 			 dev->dev_addr, NULL);
1583 	}
1584 }
1585 
1586 /* Called only under RTNL semaphore */
1587 
1588 static int inetdev_event(struct notifier_block *this, unsigned long event,
1589 			 void *ptr)
1590 {
1591 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1592 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1593 
1594 	ASSERT_RTNL();
1595 
1596 	if (!in_dev) {
1597 		if (event == NETDEV_REGISTER) {
1598 			in_dev = inetdev_init(dev);
1599 			if (IS_ERR(in_dev))
1600 				return notifier_from_errno(PTR_ERR(in_dev));
1601 			if (dev->flags & IFF_LOOPBACK) {
1602 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1603 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1604 			}
1605 		} else if (event == NETDEV_CHANGEMTU) {
1606 			/* Re-enabling IP */
1607 			if (inetdev_valid_mtu(dev->mtu))
1608 				in_dev = inetdev_init(dev);
1609 		}
1610 		goto out;
1611 	}
1612 
1613 	switch (event) {
1614 	case NETDEV_REGISTER:
1615 		pr_debug("%s: bug\n", __func__);
1616 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1617 		break;
1618 	case NETDEV_UP:
1619 		if (!inetdev_valid_mtu(dev->mtu))
1620 			break;
1621 		if (dev->flags & IFF_LOOPBACK) {
1622 			struct in_ifaddr *ifa = inet_alloc_ifa(in_dev);
1623 
1624 			if (ifa) {
1625 				ifa->ifa_local =
1626 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1627 				ifa->ifa_prefixlen = 8;
1628 				ifa->ifa_mask = inet_make_mask(8);
1629 				ifa->ifa_scope = RT_SCOPE_HOST;
1630 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1631 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1632 						 INFINITY_LIFE_TIME);
1633 				ipv4_devconf_setall(in_dev);
1634 				neigh_parms_data_state_setall(in_dev->arp_parms);
1635 				inet_insert_ifa(ifa);
1636 			}
1637 		}
1638 		ip_mc_up(in_dev);
1639 		fallthrough;
1640 	case NETDEV_CHANGEADDR:
1641 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1642 			break;
1643 		fallthrough;
1644 	case NETDEV_NOTIFY_PEERS:
1645 		/* Send gratuitous ARP to notify of link change */
1646 		inetdev_send_gratuitous_arp(dev, in_dev);
1647 		break;
1648 	case NETDEV_DOWN:
1649 		ip_mc_down(in_dev);
1650 		break;
1651 	case NETDEV_PRE_TYPE_CHANGE:
1652 		ip_mc_unmap(in_dev);
1653 		break;
1654 	case NETDEV_POST_TYPE_CHANGE:
1655 		ip_mc_remap(in_dev);
1656 		break;
1657 	case NETDEV_CHANGEMTU:
1658 		if (inetdev_valid_mtu(dev->mtu))
1659 			break;
1660 		/* disable IP when MTU is not enough */
1661 		fallthrough;
1662 	case NETDEV_UNREGISTER:
1663 		inetdev_destroy(in_dev);
1664 		break;
1665 	case NETDEV_CHANGENAME:
1666 		/* Do not notify about label change, this event is
1667 		 * not interesting to applications using netlink.
1668 		 */
1669 		inetdev_changename(dev, in_dev);
1670 
1671 		devinet_sysctl_unregister(in_dev);
1672 		devinet_sysctl_register(in_dev);
1673 		break;
1674 	}
1675 out:
1676 	return NOTIFY_DONE;
1677 }
1678 
1679 static struct notifier_block ip_netdev_notifier = {
1680 	.notifier_call = inetdev_event,
1681 };
1682 
1683 static size_t inet_nlmsg_size(void)
1684 {
1685 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1686 	       + nla_total_size(4) /* IFA_ADDRESS */
1687 	       + nla_total_size(4) /* IFA_LOCAL */
1688 	       + nla_total_size(4) /* IFA_BROADCAST */
1689 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1690 	       + nla_total_size(4)  /* IFA_FLAGS */
1691 	       + nla_total_size(1)  /* IFA_PROTO */
1692 	       + nla_total_size(4)  /* IFA_RT_PRIORITY */
1693 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1694 }
1695 
1696 static inline u32 cstamp_delta(unsigned long cstamp)
1697 {
1698 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1699 }
1700 
1701 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1702 			 unsigned long tstamp, u32 preferred, u32 valid)
1703 {
1704 	struct ifa_cacheinfo ci;
1705 
1706 	ci.cstamp = cstamp_delta(cstamp);
1707 	ci.tstamp = cstamp_delta(tstamp);
1708 	ci.ifa_prefered = preferred;
1709 	ci.ifa_valid = valid;
1710 
1711 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1712 }
1713 
1714 static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa,
1715 			    struct inet_fill_args *args)
1716 {
1717 	struct ifaddrmsg *ifm;
1718 	struct nlmsghdr  *nlh;
1719 	unsigned long tstamp;
1720 	u32 preferred, valid;
1721 	u32 flags;
1722 
1723 	nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1724 			args->flags);
1725 	if (!nlh)
1726 		return -EMSGSIZE;
1727 
1728 	ifm = nlmsg_data(nlh);
1729 	ifm->ifa_family = AF_INET;
1730 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1731 
1732 	flags = READ_ONCE(ifa->ifa_flags);
1733 	/* Warning : ifm->ifa_flags is an __u8, it holds only 8 bits.
1734 	 * The 32bit value is given in IFA_FLAGS attribute.
1735 	 */
1736 	ifm->ifa_flags = (__u8)flags;
1737 
1738 	ifm->ifa_scope = ifa->ifa_scope;
1739 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1740 
1741 	if (args->netnsid >= 0 &&
1742 	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1743 		goto nla_put_failure;
1744 
1745 	tstamp = READ_ONCE(ifa->ifa_tstamp);
1746 	if (!(flags & IFA_F_PERMANENT)) {
1747 		preferred = READ_ONCE(ifa->ifa_preferred_lft);
1748 		valid = READ_ONCE(ifa->ifa_valid_lft);
1749 		if (preferred != INFINITY_LIFE_TIME) {
1750 			long tval = (jiffies - tstamp) / HZ;
1751 
1752 			if (preferred > tval)
1753 				preferred -= tval;
1754 			else
1755 				preferred = 0;
1756 			if (valid != INFINITY_LIFE_TIME) {
1757 				if (valid > tval)
1758 					valid -= tval;
1759 				else
1760 					valid = 0;
1761 			}
1762 		}
1763 	} else {
1764 		preferred = INFINITY_LIFE_TIME;
1765 		valid = INFINITY_LIFE_TIME;
1766 	}
1767 	if ((ifa->ifa_address &&
1768 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1769 	    (ifa->ifa_local &&
1770 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1771 	    (ifa->ifa_broadcast &&
1772 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1773 	    (ifa->ifa_label[0] &&
1774 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1775 	    (ifa->ifa_proto &&
1776 	     nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
1777 	    nla_put_u32(skb, IFA_FLAGS, flags) ||
1778 	    (ifa->ifa_rt_priority &&
1779 	     nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1780 	    put_cacheinfo(skb, READ_ONCE(ifa->ifa_cstamp), tstamp,
1781 			  preferred, valid))
1782 		goto nla_put_failure;
1783 
1784 	nlmsg_end(skb, nlh);
1785 	return 0;
1786 
1787 nla_put_failure:
1788 	nlmsg_cancel(skb, nlh);
1789 	return -EMSGSIZE;
1790 }
1791 
1792 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1793 				      struct inet_fill_args *fillargs,
1794 				      struct net **tgt_net, struct sock *sk,
1795 				      struct netlink_callback *cb)
1796 {
1797 	struct netlink_ext_ack *extack = cb->extack;
1798 	struct nlattr *tb[IFA_MAX+1];
1799 	struct ifaddrmsg *ifm;
1800 	int err, i;
1801 
1802 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1803 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1804 		return -EINVAL;
1805 	}
1806 
1807 	ifm = nlmsg_data(nlh);
1808 	if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1809 		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1810 		return -EINVAL;
1811 	}
1812 
1813 	fillargs->ifindex = ifm->ifa_index;
1814 	if (fillargs->ifindex) {
1815 		cb->answer_flags |= NLM_F_DUMP_FILTERED;
1816 		fillargs->flags |= NLM_F_DUMP_FILTERED;
1817 	}
1818 
1819 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1820 					    ifa_ipv4_policy, extack);
1821 	if (err < 0)
1822 		return err;
1823 
1824 	for (i = 0; i <= IFA_MAX; ++i) {
1825 		if (!tb[i])
1826 			continue;
1827 
1828 		if (i == IFA_TARGET_NETNSID) {
1829 			struct net *net;
1830 
1831 			fillargs->netnsid = nla_get_s32(tb[i]);
1832 
1833 			net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1834 			if (IS_ERR(net)) {
1835 				fillargs->netnsid = -1;
1836 				NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1837 				return PTR_ERR(net);
1838 			}
1839 			*tgt_net = net;
1840 		} else {
1841 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1842 			return -EINVAL;
1843 		}
1844 	}
1845 
1846 	return 0;
1847 }
1848 
1849 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1850 			    struct netlink_callback *cb, int *s_ip_idx,
1851 			    struct inet_fill_args *fillargs)
1852 {
1853 	struct in_ifaddr *ifa;
1854 	int ip_idx = 0;
1855 	int err;
1856 
1857 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1858 		if (ip_idx < *s_ip_idx) {
1859 			ip_idx++;
1860 			continue;
1861 		}
1862 		err = inet_fill_ifaddr(skb, ifa, fillargs);
1863 		if (err < 0)
1864 			goto done;
1865 
1866 		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1867 		ip_idx++;
1868 	}
1869 	err = 0;
1870 	ip_idx = 0;
1871 done:
1872 	*s_ip_idx = ip_idx;
1873 
1874 	return err;
1875 }
1876 
1877 /* Combine dev_addr_genid and dev_base_seq to detect changes.
1878  */
1879 static u32 inet_base_seq(const struct net *net)
1880 {
1881 	u32 res = atomic_read(&net->ipv4.dev_addr_genid) +
1882 		  READ_ONCE(net->dev_base_seq);
1883 
1884 	/* Must not return 0 (see nl_dump_check_consistent()).
1885 	 * Chose a value far away from 0.
1886 	 */
1887 	if (!res)
1888 		res = 0x80000000;
1889 	return res;
1890 }
1891 
1892 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1893 {
1894 	const struct nlmsghdr *nlh = cb->nlh;
1895 	struct inet_fill_args fillargs = {
1896 		.portid = NETLINK_CB(cb->skb).portid,
1897 		.seq = nlh->nlmsg_seq,
1898 		.event = RTM_NEWADDR,
1899 		.flags = NLM_F_MULTI,
1900 		.netnsid = -1,
1901 	};
1902 	struct net *net = sock_net(skb->sk);
1903 	struct net *tgt_net = net;
1904 	struct {
1905 		unsigned long ifindex;
1906 		int ip_idx;
1907 	} *ctx = (void *)cb->ctx;
1908 	struct in_device *in_dev;
1909 	struct net_device *dev;
1910 	int err = 0;
1911 
1912 	rcu_read_lock();
1913 	if (cb->strict_check) {
1914 		err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1915 						 skb->sk, cb);
1916 		if (err < 0)
1917 			goto done;
1918 
1919 		if (fillargs.ifindex) {
1920 			dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex);
1921 			if (!dev) {
1922 				err = -ENODEV;
1923 				goto done;
1924 			}
1925 			in_dev = __in_dev_get_rcu(dev);
1926 			if (!in_dev)
1927 				goto done;
1928 			err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
1929 					       &fillargs);
1930 			goto done;
1931 		}
1932 	}
1933 
1934 	cb->seq = inet_base_seq(tgt_net);
1935 
1936 	for_each_netdev_dump(tgt_net, dev, ctx->ifindex) {
1937 		in_dev = __in_dev_get_rcu(dev);
1938 		if (!in_dev)
1939 			continue;
1940 		err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
1941 				       &fillargs);
1942 		if (err < 0)
1943 			goto done;
1944 	}
1945 done:
1946 	if (fillargs.netnsid >= 0)
1947 		put_net(tgt_net);
1948 	rcu_read_unlock();
1949 	return err;
1950 }
1951 
1952 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1953 		      u32 portid)
1954 {
1955 	struct inet_fill_args fillargs = {
1956 		.portid = portid,
1957 		.seq = nlh ? nlh->nlmsg_seq : 0,
1958 		.event = event,
1959 		.flags = 0,
1960 		.netnsid = -1,
1961 	};
1962 	struct sk_buff *skb;
1963 	int err = -ENOBUFS;
1964 	struct net *net;
1965 
1966 	net = dev_net(ifa->ifa_dev->dev);
1967 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1968 	if (!skb)
1969 		goto errout;
1970 
1971 	err = inet_fill_ifaddr(skb, ifa, &fillargs);
1972 	if (err < 0) {
1973 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1974 		WARN_ON(err == -EMSGSIZE);
1975 		kfree_skb(skb);
1976 		goto errout;
1977 	}
1978 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1979 	return;
1980 errout:
1981 	rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1982 }
1983 
1984 static size_t inet_get_link_af_size(const struct net_device *dev,
1985 				    u32 ext_filter_mask)
1986 {
1987 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1988 
1989 	if (!in_dev)
1990 		return 0;
1991 
1992 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1993 }
1994 
1995 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1996 			     u32 ext_filter_mask)
1997 {
1998 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1999 	struct nlattr *nla;
2000 	int i;
2001 
2002 	if (!in_dev)
2003 		return -ENODATA;
2004 
2005 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
2006 	if (!nla)
2007 		return -EMSGSIZE;
2008 
2009 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
2010 		((u32 *) nla_data(nla))[i] = READ_ONCE(in_dev->cnf.data[i]);
2011 
2012 	return 0;
2013 }
2014 
2015 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
2016 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
2017 };
2018 
2019 static int inet_validate_link_af(const struct net_device *dev,
2020 				 const struct nlattr *nla,
2021 				 struct netlink_ext_ack *extack)
2022 {
2023 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
2024 	int err, rem;
2025 
2026 	if (dev && !__in_dev_get_rtnl(dev))
2027 		return -EAFNOSUPPORT;
2028 
2029 	err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
2030 					  inet_af_policy, extack);
2031 	if (err < 0)
2032 		return err;
2033 
2034 	if (tb[IFLA_INET_CONF]) {
2035 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
2036 			int cfgid = nla_type(a);
2037 
2038 			if (nla_len(a) < 4)
2039 				return -EINVAL;
2040 
2041 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
2042 				return -EINVAL;
2043 		}
2044 	}
2045 
2046 	return 0;
2047 }
2048 
2049 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
2050 			    struct netlink_ext_ack *extack)
2051 {
2052 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
2053 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
2054 	int rem;
2055 
2056 	if (!in_dev)
2057 		return -EAFNOSUPPORT;
2058 
2059 	if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
2060 		return -EINVAL;
2061 
2062 	if (tb[IFLA_INET_CONF]) {
2063 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
2064 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
2065 	}
2066 
2067 	return 0;
2068 }
2069 
2070 static int inet_netconf_msgsize_devconf(int type)
2071 {
2072 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
2073 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
2074 	bool all = false;
2075 
2076 	if (type == NETCONFA_ALL)
2077 		all = true;
2078 
2079 	if (all || type == NETCONFA_FORWARDING)
2080 		size += nla_total_size(4);
2081 	if (all || type == NETCONFA_RP_FILTER)
2082 		size += nla_total_size(4);
2083 	if (all || type == NETCONFA_MC_FORWARDING)
2084 		size += nla_total_size(4);
2085 	if (all || type == NETCONFA_BC_FORWARDING)
2086 		size += nla_total_size(4);
2087 	if (all || type == NETCONFA_PROXY_NEIGH)
2088 		size += nla_total_size(4);
2089 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2090 		size += nla_total_size(4);
2091 
2092 	return size;
2093 }
2094 
2095 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2096 				     const struct ipv4_devconf *devconf,
2097 				     u32 portid, u32 seq, int event,
2098 				     unsigned int flags, int type)
2099 {
2100 	struct nlmsghdr  *nlh;
2101 	struct netconfmsg *ncm;
2102 	bool all = false;
2103 
2104 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2105 			flags);
2106 	if (!nlh)
2107 		return -EMSGSIZE;
2108 
2109 	if (type == NETCONFA_ALL)
2110 		all = true;
2111 
2112 	ncm = nlmsg_data(nlh);
2113 	ncm->ncm_family = AF_INET;
2114 
2115 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2116 		goto nla_put_failure;
2117 
2118 	if (!devconf)
2119 		goto out;
2120 
2121 	if ((all || type == NETCONFA_FORWARDING) &&
2122 	    nla_put_s32(skb, NETCONFA_FORWARDING,
2123 			IPV4_DEVCONF_RO(*devconf, FORWARDING)) < 0)
2124 		goto nla_put_failure;
2125 	if ((all || type == NETCONFA_RP_FILTER) &&
2126 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
2127 			IPV4_DEVCONF_RO(*devconf, RP_FILTER)) < 0)
2128 		goto nla_put_failure;
2129 	if ((all || type == NETCONFA_MC_FORWARDING) &&
2130 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2131 			IPV4_DEVCONF_RO(*devconf, MC_FORWARDING)) < 0)
2132 		goto nla_put_failure;
2133 	if ((all || type == NETCONFA_BC_FORWARDING) &&
2134 	    nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2135 			IPV4_DEVCONF_RO(*devconf, BC_FORWARDING)) < 0)
2136 		goto nla_put_failure;
2137 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
2138 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2139 			IPV4_DEVCONF_RO(*devconf, PROXY_ARP)) < 0)
2140 		goto nla_put_failure;
2141 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2142 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2143 			IPV4_DEVCONF_RO(*devconf,
2144 					IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2145 		goto nla_put_failure;
2146 
2147 out:
2148 	nlmsg_end(skb, nlh);
2149 	return 0;
2150 
2151 nla_put_failure:
2152 	nlmsg_cancel(skb, nlh);
2153 	return -EMSGSIZE;
2154 }
2155 
2156 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2157 				 int ifindex, struct ipv4_devconf *devconf)
2158 {
2159 	struct sk_buff *skb;
2160 	int err = -ENOBUFS;
2161 
2162 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2163 	if (!skb)
2164 		goto errout;
2165 
2166 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2167 					event, 0, type);
2168 	if (err < 0) {
2169 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2170 		WARN_ON(err == -EMSGSIZE);
2171 		kfree_skb(skb);
2172 		goto errout;
2173 	}
2174 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2175 	return;
2176 errout:
2177 	rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2178 }
2179 
2180 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2181 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
2182 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
2183 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
2184 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
2185 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
2186 };
2187 
2188 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2189 				      const struct nlmsghdr *nlh,
2190 				      struct nlattr **tb,
2191 				      struct netlink_ext_ack *extack)
2192 {
2193 	int i, err;
2194 
2195 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2196 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2197 		return -EINVAL;
2198 	}
2199 
2200 	if (!netlink_strict_get_check(skb))
2201 		return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2202 					      tb, NETCONFA_MAX,
2203 					      devconf_ipv4_policy, extack);
2204 
2205 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2206 					    tb, NETCONFA_MAX,
2207 					    devconf_ipv4_policy, extack);
2208 	if (err)
2209 		return err;
2210 
2211 	for (i = 0; i <= NETCONFA_MAX; i++) {
2212 		if (!tb[i])
2213 			continue;
2214 
2215 		switch (i) {
2216 		case NETCONFA_IFINDEX:
2217 			break;
2218 		default:
2219 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2220 			return -EINVAL;
2221 		}
2222 	}
2223 
2224 	return 0;
2225 }
2226 
2227 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2228 				    struct nlmsghdr *nlh,
2229 				    struct netlink_ext_ack *extack)
2230 {
2231 	struct net *net = sock_net(in_skb->sk);
2232 	struct nlattr *tb[NETCONFA_MAX + 1];
2233 	const struct ipv4_devconf *devconf;
2234 	struct in_device *in_dev = NULL;
2235 	struct net_device *dev = NULL;
2236 	struct sk_buff *skb;
2237 	int ifindex;
2238 	int err;
2239 
2240 	err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2241 	if (err)
2242 		return err;
2243 
2244 	if (!tb[NETCONFA_IFINDEX])
2245 		return -EINVAL;
2246 
2247 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2248 	switch (ifindex) {
2249 	case NETCONFA_IFINDEX_ALL:
2250 		devconf = net->ipv4.devconf_all;
2251 		break;
2252 	case NETCONFA_IFINDEX_DEFAULT:
2253 		devconf = net->ipv4.devconf_dflt;
2254 		break;
2255 	default:
2256 		err = -ENODEV;
2257 		dev = dev_get_by_index(net, ifindex);
2258 		if (dev)
2259 			in_dev = in_dev_get(dev);
2260 		if (!in_dev)
2261 			goto errout;
2262 		devconf = &in_dev->cnf;
2263 		break;
2264 	}
2265 
2266 	err = -ENOBUFS;
2267 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2268 	if (!skb)
2269 		goto errout;
2270 
2271 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2272 					NETLINK_CB(in_skb).portid,
2273 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2274 					NETCONFA_ALL);
2275 	if (err < 0) {
2276 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2277 		WARN_ON(err == -EMSGSIZE);
2278 		kfree_skb(skb);
2279 		goto errout;
2280 	}
2281 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2282 errout:
2283 	if (in_dev)
2284 		in_dev_put(in_dev);
2285 	dev_put(dev);
2286 	return err;
2287 }
2288 
2289 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2290 				     struct netlink_callback *cb)
2291 {
2292 	const struct nlmsghdr *nlh = cb->nlh;
2293 	struct net *net = sock_net(skb->sk);
2294 	struct {
2295 		unsigned long ifindex;
2296 		unsigned int all_default;
2297 	} *ctx = (void *)cb->ctx;
2298 	const struct in_device *in_dev;
2299 	struct net_device *dev;
2300 	int err = 0;
2301 
2302 	if (cb->strict_check) {
2303 		struct netlink_ext_ack *extack = cb->extack;
2304 		struct netconfmsg *ncm;
2305 
2306 		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2307 			NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2308 			return -EINVAL;
2309 		}
2310 
2311 		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2312 			NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2313 			return -EINVAL;
2314 		}
2315 	}
2316 
2317 	rcu_read_lock();
2318 	for_each_netdev_dump(net, dev, ctx->ifindex) {
2319 		in_dev = __in_dev_get_rcu(dev);
2320 		if (!in_dev)
2321 			continue;
2322 		err = inet_netconf_fill_devconf(skb, dev->ifindex,
2323 						&in_dev->cnf,
2324 						NETLINK_CB(cb->skb).portid,
2325 						nlh->nlmsg_seq,
2326 						RTM_NEWNETCONF, NLM_F_MULTI,
2327 						NETCONFA_ALL);
2328 		if (err < 0)
2329 			goto done;
2330 	}
2331 	if (ctx->all_default == 0) {
2332 		err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2333 						net->ipv4.devconf_all,
2334 						NETLINK_CB(cb->skb).portid,
2335 						nlh->nlmsg_seq,
2336 						RTM_NEWNETCONF, NLM_F_MULTI,
2337 						NETCONFA_ALL);
2338 		if (err < 0)
2339 			goto done;
2340 		ctx->all_default++;
2341 	}
2342 	if (ctx->all_default == 1) {
2343 		err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2344 						net->ipv4.devconf_dflt,
2345 						NETLINK_CB(cb->skb).portid,
2346 						nlh->nlmsg_seq,
2347 						RTM_NEWNETCONF, NLM_F_MULTI,
2348 						NETCONFA_ALL);
2349 		if (err < 0)
2350 			goto done;
2351 		ctx->all_default++;
2352 	}
2353 done:
2354 	rcu_read_unlock();
2355 	return err;
2356 }
2357 
2358 #ifdef CONFIG_SYSCTL
2359 
2360 static void devinet_copy_dflt_conf(struct net *net, int i)
2361 {
2362 	struct net_device *dev;
2363 
2364 	rcu_read_lock();
2365 	for_each_netdev_rcu(net, dev) {
2366 		struct in_device *in_dev;
2367 
2368 		in_dev = __in_dev_get_rcu(dev);
2369 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2370 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2371 	}
2372 	rcu_read_unlock();
2373 }
2374 
2375 /* called with RTNL locked */
2376 static void inet_forward_change(struct net *net)
2377 {
2378 	struct net_device *dev;
2379 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2380 
2381 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2382 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2383 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2384 				    NETCONFA_FORWARDING,
2385 				    NETCONFA_IFINDEX_ALL,
2386 				    net->ipv4.devconf_all);
2387 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2388 				    NETCONFA_FORWARDING,
2389 				    NETCONFA_IFINDEX_DEFAULT,
2390 				    net->ipv4.devconf_dflt);
2391 
2392 	for_each_netdev(net, dev) {
2393 		struct in_device *in_dev;
2394 
2395 		if (on)
2396 			dev_disable_lro(dev);
2397 
2398 		in_dev = __in_dev_get_rtnl_net(dev);
2399 		if (in_dev) {
2400 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2401 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2402 						    NETCONFA_FORWARDING,
2403 						    dev->ifindex, &in_dev->cnf);
2404 		}
2405 	}
2406 }
2407 
2408 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2409 {
2410 	if (cnf == net->ipv4.devconf_dflt)
2411 		return NETCONFA_IFINDEX_DEFAULT;
2412 	else if (cnf == net->ipv4.devconf_all)
2413 		return NETCONFA_IFINDEX_ALL;
2414 	else {
2415 		struct in_device *idev
2416 			= container_of(cnf, struct in_device, cnf);
2417 		return idev->dev->ifindex;
2418 	}
2419 }
2420 
2421 static int devinet_conf_proc(const struct ctl_table *ctl, int write,
2422 			     void *buffer, size_t *lenp, loff_t *ppos)
2423 {
2424 	int old_value = *(int *)ctl->data;
2425 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2426 	int new_value = *(int *)ctl->data;
2427 
2428 	if (write) {
2429 		struct ipv4_devconf *cnf = ctl->extra1;
2430 		struct net *net = ctl->extra2;
2431 		int i = (int *)ctl->data - cnf->data;
2432 		int ifindex;
2433 
2434 		set_bit(i, cnf->state);
2435 
2436 		if (cnf == net->ipv4.devconf_dflt)
2437 			devinet_copy_dflt_conf(net, i);
2438 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2439 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2440 			if ((new_value == 0) && (old_value != 0))
2441 				rt_cache_flush(net);
2442 
2443 		if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2444 		    new_value != old_value)
2445 			rt_cache_flush(net);
2446 
2447 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2448 		    new_value != old_value) {
2449 			ifindex = devinet_conf_ifindex(net, cnf);
2450 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2451 						    NETCONFA_RP_FILTER,
2452 						    ifindex, cnf);
2453 		}
2454 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2455 		    new_value != old_value) {
2456 			ifindex = devinet_conf_ifindex(net, cnf);
2457 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2458 						    NETCONFA_PROXY_NEIGH,
2459 						    ifindex, cnf);
2460 		}
2461 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2462 		    new_value != old_value) {
2463 			ifindex = devinet_conf_ifindex(net, cnf);
2464 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2465 						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2466 						    ifindex, cnf);
2467 		}
2468 	}
2469 
2470 	return ret;
2471 }
2472 
2473 static int devinet_sysctl_forward(const struct ctl_table *ctl, int write,
2474 				  void *buffer, size_t *lenp, loff_t *ppos)
2475 {
2476 	int *valp = ctl->data;
2477 	int val = *valp;
2478 	loff_t pos = *ppos;
2479 	struct net *net = ctl->extra2;
2480 	int ret;
2481 
2482 	if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2483 		return -EPERM;
2484 
2485 	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2486 
2487 	if (write && *valp != val) {
2488 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2489 			if (!rtnl_net_trylock(net)) {
2490 				/* Restore the original values before restarting */
2491 				*valp = val;
2492 				*ppos = pos;
2493 				return restart_syscall();
2494 			}
2495 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2496 				inet_forward_change(net);
2497 			} else {
2498 				struct ipv4_devconf *cnf = ctl->extra1;
2499 				struct in_device *idev =
2500 					container_of(cnf, struct in_device, cnf);
2501 				if (*valp)
2502 					dev_disable_lro(idev->dev);
2503 				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2504 							    NETCONFA_FORWARDING,
2505 							    idev->dev->ifindex,
2506 							    cnf);
2507 			}
2508 			rtnl_net_unlock(net);
2509 			rt_cache_flush(net);
2510 		} else
2511 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2512 						    NETCONFA_FORWARDING,
2513 						    NETCONFA_IFINDEX_DEFAULT,
2514 						    net->ipv4.devconf_dflt);
2515 	}
2516 
2517 	return ret;
2518 }
2519 
2520 static int ipv4_doint_and_flush(const struct ctl_table *ctl, int write,
2521 				void *buffer, size_t *lenp, loff_t *ppos)
2522 {
2523 	int *valp = ctl->data;
2524 	int val = *valp;
2525 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2526 	struct net *net = ctl->extra2;
2527 
2528 	if (write && *valp != val)
2529 		rt_cache_flush(net);
2530 
2531 	return ret;
2532 }
2533 
2534 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2535 	{ \
2536 		.procname	= name, \
2537 		.data		= ipv4_devconf.data + \
2538 				  IPV4_DEVCONF_ ## attr - 1, \
2539 		.maxlen		= sizeof(int), \
2540 		.mode		= mval, \
2541 		.proc_handler	= proc, \
2542 		.extra1		= &ipv4_devconf, \
2543 	}
2544 
2545 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2546 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2547 
2548 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2549 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2550 
2551 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2552 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2553 
2554 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2555 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2556 
2557 static struct devinet_sysctl_table {
2558 	struct ctl_table_header *sysctl_header;
2559 	struct ctl_table devinet_vars[IPV4_DEVCONF_MAX];
2560 } devinet_sysctl = {
2561 	.devinet_vars = {
2562 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2563 					     devinet_sysctl_forward),
2564 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2565 		DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2566 
2567 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2568 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2569 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2570 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2571 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2572 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2573 					"accept_source_route"),
2574 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2575 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2576 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2577 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2578 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2579 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2580 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2581 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2582 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2583 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2584 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2585 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2586 		DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
2587 					"arp_evict_nocarrier"),
2588 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2589 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2590 					"force_igmp_version"),
2591 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2592 					"igmpv2_unsolicited_report_interval"),
2593 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2594 					"igmpv3_unsolicited_report_interval"),
2595 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2596 					"ignore_routes_with_linkdown"),
2597 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2598 					"drop_gratuitous_arp"),
2599 
2600 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2601 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2602 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2603 					      "promote_secondaries"),
2604 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2605 					      "route_localnet"),
2606 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2607 					      "drop_unicast_in_l2_multicast"),
2608 	},
2609 };
2610 
2611 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2612 				     int ifindex, struct ipv4_devconf *p)
2613 {
2614 	int i;
2615 	struct devinet_sysctl_table *t;
2616 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2617 
2618 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
2619 	if (!t)
2620 		goto out;
2621 
2622 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars); i++) {
2623 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2624 		t->devinet_vars[i].extra1 = p;
2625 		t->devinet_vars[i].extra2 = net;
2626 	}
2627 
2628 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2629 
2630 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2631 	if (!t->sysctl_header)
2632 		goto free;
2633 
2634 	p->sysctl = t;
2635 
2636 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2637 				    ifindex, p);
2638 	return 0;
2639 
2640 free:
2641 	kfree(t);
2642 out:
2643 	return -ENOMEM;
2644 }
2645 
2646 static void __devinet_sysctl_unregister(struct net *net,
2647 					struct ipv4_devconf *cnf, int ifindex)
2648 {
2649 	struct devinet_sysctl_table *t = cnf->sysctl;
2650 
2651 	if (t) {
2652 		cnf->sysctl = NULL;
2653 		unregister_net_sysctl_table(t->sysctl_header);
2654 		kfree(t);
2655 	}
2656 
2657 	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2658 }
2659 
2660 static int devinet_sysctl_register(struct in_device *idev)
2661 {
2662 	int err;
2663 
2664 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2665 		return -EINVAL;
2666 
2667 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2668 	if (err)
2669 		return err;
2670 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2671 					idev->dev->ifindex, &idev->cnf);
2672 	if (err)
2673 		neigh_sysctl_unregister(idev->arp_parms);
2674 	return err;
2675 }
2676 
2677 static void devinet_sysctl_unregister(struct in_device *idev)
2678 {
2679 	struct net *net = dev_net(idev->dev);
2680 
2681 	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2682 	neigh_sysctl_unregister(idev->arp_parms);
2683 }
2684 
2685 static struct ctl_table ctl_forward_entry[] = {
2686 	{
2687 		.procname	= "ip_forward",
2688 		.data		= &ipv4_devconf.data[
2689 					IPV4_DEVCONF_FORWARDING - 1],
2690 		.maxlen		= sizeof(int),
2691 		.mode		= 0644,
2692 		.proc_handler	= devinet_sysctl_forward,
2693 		.extra1		= &ipv4_devconf,
2694 		.extra2		= &init_net,
2695 	},
2696 };
2697 #endif
2698 
2699 static __net_init int devinet_init_net(struct net *net)
2700 {
2701 #ifdef CONFIG_SYSCTL
2702 	struct ctl_table_header *forw_hdr;
2703 	struct ctl_table *tbl;
2704 #endif
2705 	struct ipv4_devconf *all, *dflt;
2706 	int err;
2707 	int i;
2708 
2709 	err = -ENOMEM;
2710 	net->ipv4.inet_addr_lst = kmalloc_array(IN4_ADDR_HSIZE,
2711 						sizeof(struct hlist_head),
2712 						GFP_KERNEL);
2713 	if (!net->ipv4.inet_addr_lst)
2714 		goto err_alloc_hash;
2715 
2716 	all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2717 	if (!all)
2718 		goto err_alloc_all;
2719 
2720 	dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2721 	if (!dflt)
2722 		goto err_alloc_dflt;
2723 
2724 #ifdef CONFIG_SYSCTL
2725 	tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2726 	if (!tbl)
2727 		goto err_alloc_ctl;
2728 
2729 	tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2730 	tbl[0].extra1 = all;
2731 	tbl[0].extra2 = net;
2732 #endif
2733 
2734 	if (!net_eq(net, &init_net)) {
2735 		switch (net_inherit_devconf()) {
2736 		case 3:
2737 			/* copy from the current netns */
2738 			memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
2739 			       sizeof(ipv4_devconf));
2740 			memcpy(dflt,
2741 			       current->nsproxy->net_ns->ipv4.devconf_dflt,
2742 			       sizeof(ipv4_devconf_dflt));
2743 			break;
2744 		case 0:
2745 		case 1:
2746 			/* copy from init_net */
2747 			memcpy(all, init_net.ipv4.devconf_all,
2748 			       sizeof(ipv4_devconf));
2749 			memcpy(dflt, init_net.ipv4.devconf_dflt,
2750 			       sizeof(ipv4_devconf_dflt));
2751 			break;
2752 		case 2:
2753 			/* use compiled values */
2754 			break;
2755 		}
2756 	}
2757 
2758 #ifdef CONFIG_SYSCTL
2759 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2760 	if (err < 0)
2761 		goto err_reg_all;
2762 
2763 	err = __devinet_sysctl_register(net, "default",
2764 					NETCONFA_IFINDEX_DEFAULT, dflt);
2765 	if (err < 0)
2766 		goto err_reg_dflt;
2767 
2768 	err = -ENOMEM;
2769 	forw_hdr = register_net_sysctl_sz(net, "net/ipv4", tbl,
2770 					  ARRAY_SIZE(ctl_forward_entry));
2771 	if (!forw_hdr)
2772 		goto err_reg_ctl;
2773 	net->ipv4.forw_hdr = forw_hdr;
2774 #endif
2775 
2776 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2777 		INIT_HLIST_HEAD(&net->ipv4.inet_addr_lst[i]);
2778 
2779 	INIT_DEFERRABLE_WORK(&net->ipv4.addr_chk_work, check_lifetime);
2780 
2781 	net->ipv4.devconf_all = all;
2782 	net->ipv4.devconf_dflt = dflt;
2783 	return 0;
2784 
2785 #ifdef CONFIG_SYSCTL
2786 err_reg_ctl:
2787 	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2788 err_reg_dflt:
2789 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2790 err_reg_all:
2791 	kfree(tbl);
2792 err_alloc_ctl:
2793 #endif
2794 	kfree(dflt);
2795 err_alloc_dflt:
2796 	kfree(all);
2797 err_alloc_all:
2798 	kfree(net->ipv4.inet_addr_lst);
2799 err_alloc_hash:
2800 	return err;
2801 }
2802 
2803 static __net_exit void devinet_exit_net(struct net *net)
2804 {
2805 #ifdef CONFIG_SYSCTL
2806 	const struct ctl_table *tbl;
2807 #endif
2808 
2809 	cancel_delayed_work_sync(&net->ipv4.addr_chk_work);
2810 
2811 #ifdef CONFIG_SYSCTL
2812 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2813 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2814 	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2815 				    NETCONFA_IFINDEX_DEFAULT);
2816 	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2817 				    NETCONFA_IFINDEX_ALL);
2818 	kfree(tbl);
2819 #endif
2820 	kfree(net->ipv4.devconf_dflt);
2821 	kfree(net->ipv4.devconf_all);
2822 	kfree(net->ipv4.inet_addr_lst);
2823 }
2824 
2825 static __net_initdata struct pernet_operations devinet_ops = {
2826 	.init = devinet_init_net,
2827 	.exit = devinet_exit_net,
2828 };
2829 
2830 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2831 	.family		  = AF_INET,
2832 	.fill_link_af	  = inet_fill_link_af,
2833 	.get_link_af_size = inet_get_link_af_size,
2834 	.validate_link_af = inet_validate_link_af,
2835 	.set_link_af	  = inet_set_link_af,
2836 };
2837 
2838 static const struct rtnl_msg_handler devinet_rtnl_msg_handlers[] __initconst = {
2839 	{.protocol = PF_INET, .msgtype = RTM_NEWADDR, .doit = inet_rtm_newaddr,
2840 	 .flags = RTNL_FLAG_DOIT_PERNET},
2841 	{.protocol = PF_INET, .msgtype = RTM_DELADDR, .doit = inet_rtm_deladdr,
2842 	 .flags = RTNL_FLAG_DOIT_PERNET},
2843 	{.protocol = PF_INET, .msgtype = RTM_GETADDR, .dumpit = inet_dump_ifaddr,
2844 	 .flags = RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE},
2845 	{.protocol = PF_INET, .msgtype = RTM_GETNETCONF,
2846 	 .doit = inet_netconf_get_devconf, .dumpit = inet_netconf_dump_devconf,
2847 	 .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
2848 };
2849 
2850 void __init devinet_init(void)
2851 {
2852 	register_pernet_subsys(&devinet_ops);
2853 	register_netdevice_notifier(&ip_netdev_notifier);
2854 
2855 	if (rtnl_af_register(&inet_af_ops))
2856 		panic("Unable to register inet_af_ops\n");
2857 
2858 	rtnl_register_many(devinet_rtnl_msg_handlers);
2859 }
2860