xref: /linux/net/ipv4/devinet.c (revision eb01fe7abbe2d0b38824d2a93fdb4cc3eaf2ccc1)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	NET3	IP device support routines.
4  *
5  *	Derived from the IP parts of dev.c 1.0.19
6  * 		Authors:	Ross Biro
7  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
9  *
10  *	Additional Authors:
11  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
12  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13  *
14  *	Changes:
15  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
16  *					lists.
17  *		Cyrus Durgin:		updated for kmod
18  *		Matthias Andree:	in devinet_ioctl, compare label and
19  *					address (4.4BSD alias style support),
20  *					fall back to comparing just the label
21  *					if no match found.
22  */
23 
24 
25 #include <linux/uaccess.h>
26 #include <linux/bitops.h>
27 #include <linux/capability.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/mm.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/in.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/if_addr.h>
40 #include <linux/if_ether.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/etherdevice.h>
44 #include <linux/skbuff.h>
45 #include <linux/init.h>
46 #include <linux/notifier.h>
47 #include <linux/inetdevice.h>
48 #include <linux/igmp.h>
49 #include <linux/slab.h>
50 #include <linux/hash.h>
51 #ifdef CONFIG_SYSCTL
52 #include <linux/sysctl.h>
53 #endif
54 #include <linux/kmod.h>
55 #include <linux/netconf.h>
56 
57 #include <net/arp.h>
58 #include <net/ip.h>
59 #include <net/route.h>
60 #include <net/ip_fib.h>
61 #include <net/rtnetlink.h>
62 #include <net/net_namespace.h>
63 #include <net/addrconf.h>
64 
65 #define IPV6ONLY_FLAGS	\
66 		(IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
67 		 IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
68 		 IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
69 
70 static struct ipv4_devconf ipv4_devconf = {
71 	.data = {
72 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78 		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
79 	},
80 };
81 
82 static struct ipv4_devconf ipv4_devconf_dflt = {
83 	.data = {
84 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
86 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
87 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
88 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
90 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
91 		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
92 	},
93 };
94 
95 #define IPV4_DEVCONF_DFLT(net, attr) \
96 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
97 
98 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
99 	[IFA_LOCAL]     	= { .type = NLA_U32 },
100 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
101 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
102 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
103 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
104 	[IFA_FLAGS]		= { .type = NLA_U32 },
105 	[IFA_RT_PRIORITY]	= { .type = NLA_U32 },
106 	[IFA_TARGET_NETNSID]	= { .type = NLA_S32 },
107 	[IFA_PROTO]		= { .type = NLA_U8 },
108 };
109 
110 struct inet_fill_args {
111 	u32 portid;
112 	u32 seq;
113 	int event;
114 	unsigned int flags;
115 	int netnsid;
116 	int ifindex;
117 };
118 
119 #define IN4_ADDR_HSIZE_SHIFT	8
120 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
121 
122 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
123 
124 static u32 inet_addr_hash(const struct net *net, __be32 addr)
125 {
126 	u32 val = (__force u32) addr ^ net_hash_mix(net);
127 
128 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
129 }
130 
131 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
132 {
133 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
134 
135 	ASSERT_RTNL();
136 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
137 }
138 
139 static void inet_hash_remove(struct in_ifaddr *ifa)
140 {
141 	ASSERT_RTNL();
142 	hlist_del_init_rcu(&ifa->hash);
143 }
144 
145 /**
146  * __ip_dev_find - find the first device with a given source address.
147  * @net: the net namespace
148  * @addr: the source address
149  * @devref: if true, take a reference on the found device
150  *
151  * If a caller uses devref=false, it should be protected by RCU, or RTNL
152  */
153 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
154 {
155 	struct net_device *result = NULL;
156 	struct in_ifaddr *ifa;
157 
158 	rcu_read_lock();
159 	ifa = inet_lookup_ifaddr_rcu(net, addr);
160 	if (!ifa) {
161 		struct flowi4 fl4 = { .daddr = addr };
162 		struct fib_result res = { 0 };
163 		struct fib_table *local;
164 
165 		/* Fallback to FIB local table so that communication
166 		 * over loopback subnets work.
167 		 */
168 		local = fib_get_table(net, RT_TABLE_LOCAL);
169 		if (local &&
170 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
171 		    res.type == RTN_LOCAL)
172 			result = FIB_RES_DEV(res);
173 	} else {
174 		result = ifa->ifa_dev->dev;
175 	}
176 	if (result && devref)
177 		dev_hold(result);
178 	rcu_read_unlock();
179 	return result;
180 }
181 EXPORT_SYMBOL(__ip_dev_find);
182 
183 /* called under RCU lock */
184 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
185 {
186 	u32 hash = inet_addr_hash(net, addr);
187 	struct in_ifaddr *ifa;
188 
189 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
190 		if (ifa->ifa_local == addr &&
191 		    net_eq(dev_net(ifa->ifa_dev->dev), net))
192 			return ifa;
193 
194 	return NULL;
195 }
196 
197 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
198 
199 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
200 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
201 static void inet_del_ifa(struct in_device *in_dev,
202 			 struct in_ifaddr __rcu **ifap,
203 			 int destroy);
204 #ifdef CONFIG_SYSCTL
205 static int devinet_sysctl_register(struct in_device *idev);
206 static void devinet_sysctl_unregister(struct in_device *idev);
207 #else
208 static int devinet_sysctl_register(struct in_device *idev)
209 {
210 	return 0;
211 }
212 static void devinet_sysctl_unregister(struct in_device *idev)
213 {
214 }
215 #endif
216 
217 /* Locks all the inet devices. */
218 
219 static struct in_ifaddr *inet_alloc_ifa(void)
220 {
221 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
222 }
223 
224 static void inet_rcu_free_ifa(struct rcu_head *head)
225 {
226 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
227 	if (ifa->ifa_dev)
228 		in_dev_put(ifa->ifa_dev);
229 	kfree(ifa);
230 }
231 
232 static void inet_free_ifa(struct in_ifaddr *ifa)
233 {
234 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
235 }
236 
237 static void in_dev_free_rcu(struct rcu_head *head)
238 {
239 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
240 
241 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
242 	kfree(idev);
243 }
244 
245 void in_dev_finish_destroy(struct in_device *idev)
246 {
247 	struct net_device *dev = idev->dev;
248 
249 	WARN_ON(idev->ifa_list);
250 	WARN_ON(idev->mc_list);
251 #ifdef NET_REFCNT_DEBUG
252 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
253 #endif
254 	netdev_put(dev, &idev->dev_tracker);
255 	if (!idev->dead)
256 		pr_err("Freeing alive in_device %p\n", idev);
257 	else
258 		call_rcu(&idev->rcu_head, in_dev_free_rcu);
259 }
260 EXPORT_SYMBOL(in_dev_finish_destroy);
261 
262 static struct in_device *inetdev_init(struct net_device *dev)
263 {
264 	struct in_device *in_dev;
265 	int err = -ENOMEM;
266 
267 	ASSERT_RTNL();
268 
269 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
270 	if (!in_dev)
271 		goto out;
272 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
273 			sizeof(in_dev->cnf));
274 	in_dev->cnf.sysctl = NULL;
275 	in_dev->dev = dev;
276 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
277 	if (!in_dev->arp_parms)
278 		goto out_kfree;
279 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
280 		dev_disable_lro(dev);
281 	/* Reference in_dev->dev */
282 	netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
283 	/* Account for reference dev->ip_ptr (below) */
284 	refcount_set(&in_dev->refcnt, 1);
285 
286 	err = devinet_sysctl_register(in_dev);
287 	if (err) {
288 		in_dev->dead = 1;
289 		neigh_parms_release(&arp_tbl, in_dev->arp_parms);
290 		in_dev_put(in_dev);
291 		in_dev = NULL;
292 		goto out;
293 	}
294 	ip_mc_init_dev(in_dev);
295 	if (dev->flags & IFF_UP)
296 		ip_mc_up(in_dev);
297 
298 	/* we can receive as soon as ip_ptr is set -- do this last */
299 	rcu_assign_pointer(dev->ip_ptr, in_dev);
300 out:
301 	return in_dev ?: ERR_PTR(err);
302 out_kfree:
303 	kfree(in_dev);
304 	in_dev = NULL;
305 	goto out;
306 }
307 
308 static void inetdev_destroy(struct in_device *in_dev)
309 {
310 	struct net_device *dev;
311 	struct in_ifaddr *ifa;
312 
313 	ASSERT_RTNL();
314 
315 	dev = in_dev->dev;
316 
317 	in_dev->dead = 1;
318 
319 	ip_mc_destroy_dev(in_dev);
320 
321 	while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
322 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
323 		inet_free_ifa(ifa);
324 	}
325 
326 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
327 
328 	devinet_sysctl_unregister(in_dev);
329 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
330 	arp_ifdown(dev);
331 
332 	in_dev_put(in_dev);
333 }
334 
335 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
336 {
337 	const struct in_ifaddr *ifa;
338 
339 	rcu_read_lock();
340 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
341 		if (inet_ifa_match(a, ifa)) {
342 			if (!b || inet_ifa_match(b, ifa)) {
343 				rcu_read_unlock();
344 				return 1;
345 			}
346 		}
347 	}
348 	rcu_read_unlock();
349 	return 0;
350 }
351 
352 static void __inet_del_ifa(struct in_device *in_dev,
353 			   struct in_ifaddr __rcu **ifap,
354 			   int destroy, struct nlmsghdr *nlh, u32 portid)
355 {
356 	struct in_ifaddr *promote = NULL;
357 	struct in_ifaddr *ifa, *ifa1;
358 	struct in_ifaddr __rcu **last_prim;
359 	struct in_ifaddr *prev_prom = NULL;
360 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
361 
362 	ASSERT_RTNL();
363 
364 	ifa1 = rtnl_dereference(*ifap);
365 	last_prim = ifap;
366 	if (in_dev->dead)
367 		goto no_promotions;
368 
369 	/* 1. Deleting primary ifaddr forces deletion all secondaries
370 	 * unless alias promotion is set
371 	 **/
372 
373 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
374 		struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
375 
376 		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
377 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
378 			    ifa1->ifa_scope <= ifa->ifa_scope)
379 				last_prim = &ifa->ifa_next;
380 
381 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
382 			    ifa1->ifa_mask != ifa->ifa_mask ||
383 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
384 				ifap1 = &ifa->ifa_next;
385 				prev_prom = ifa;
386 				continue;
387 			}
388 
389 			if (!do_promote) {
390 				inet_hash_remove(ifa);
391 				*ifap1 = ifa->ifa_next;
392 
393 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
394 				blocking_notifier_call_chain(&inetaddr_chain,
395 						NETDEV_DOWN, ifa);
396 				inet_free_ifa(ifa);
397 			} else {
398 				promote = ifa;
399 				break;
400 			}
401 		}
402 	}
403 
404 	/* On promotion all secondaries from subnet are changing
405 	 * the primary IP, we must remove all their routes silently
406 	 * and later to add them back with new prefsrc. Do this
407 	 * while all addresses are on the device list.
408 	 */
409 	for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
410 		if (ifa1->ifa_mask == ifa->ifa_mask &&
411 		    inet_ifa_match(ifa1->ifa_address, ifa))
412 			fib_del_ifaddr(ifa, ifa1);
413 	}
414 
415 no_promotions:
416 	/* 2. Unlink it */
417 
418 	*ifap = ifa1->ifa_next;
419 	inet_hash_remove(ifa1);
420 
421 	/* 3. Announce address deletion */
422 
423 	/* Send message first, then call notifier.
424 	   At first sight, FIB update triggered by notifier
425 	   will refer to already deleted ifaddr, that could confuse
426 	   netlink listeners. It is not true: look, gated sees
427 	   that route deleted and if it still thinks that ifaddr
428 	   is valid, it will try to restore deleted routes... Grr.
429 	   So that, this order is correct.
430 	 */
431 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
432 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
433 
434 	if (promote) {
435 		struct in_ifaddr *next_sec;
436 
437 		next_sec = rtnl_dereference(promote->ifa_next);
438 		if (prev_prom) {
439 			struct in_ifaddr *last_sec;
440 
441 			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
442 
443 			last_sec = rtnl_dereference(*last_prim);
444 			rcu_assign_pointer(promote->ifa_next, last_sec);
445 			rcu_assign_pointer(*last_prim, promote);
446 		}
447 
448 		promote->ifa_flags &= ~IFA_F_SECONDARY;
449 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
450 		blocking_notifier_call_chain(&inetaddr_chain,
451 				NETDEV_UP, promote);
452 		for (ifa = next_sec; ifa;
453 		     ifa = rtnl_dereference(ifa->ifa_next)) {
454 			if (ifa1->ifa_mask != ifa->ifa_mask ||
455 			    !inet_ifa_match(ifa1->ifa_address, ifa))
456 					continue;
457 			fib_add_ifaddr(ifa);
458 		}
459 
460 	}
461 	if (destroy)
462 		inet_free_ifa(ifa1);
463 }
464 
465 static void inet_del_ifa(struct in_device *in_dev,
466 			 struct in_ifaddr __rcu **ifap,
467 			 int destroy)
468 {
469 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
470 }
471 
472 static void check_lifetime(struct work_struct *work);
473 
474 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
475 
476 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
477 			     u32 portid, struct netlink_ext_ack *extack)
478 {
479 	struct in_ifaddr __rcu **last_primary, **ifap;
480 	struct in_device *in_dev = ifa->ifa_dev;
481 	struct in_validator_info ivi;
482 	struct in_ifaddr *ifa1;
483 	int ret;
484 
485 	ASSERT_RTNL();
486 
487 	if (!ifa->ifa_local) {
488 		inet_free_ifa(ifa);
489 		return 0;
490 	}
491 
492 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
493 	last_primary = &in_dev->ifa_list;
494 
495 	/* Don't set IPv6 only flags to IPv4 addresses */
496 	ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
497 
498 	ifap = &in_dev->ifa_list;
499 	ifa1 = rtnl_dereference(*ifap);
500 
501 	while (ifa1) {
502 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
503 		    ifa->ifa_scope <= ifa1->ifa_scope)
504 			last_primary = &ifa1->ifa_next;
505 		if (ifa1->ifa_mask == ifa->ifa_mask &&
506 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
507 			if (ifa1->ifa_local == ifa->ifa_local) {
508 				inet_free_ifa(ifa);
509 				return -EEXIST;
510 			}
511 			if (ifa1->ifa_scope != ifa->ifa_scope) {
512 				NL_SET_ERR_MSG(extack, "ipv4: Invalid scope value");
513 				inet_free_ifa(ifa);
514 				return -EINVAL;
515 			}
516 			ifa->ifa_flags |= IFA_F_SECONDARY;
517 		}
518 
519 		ifap = &ifa1->ifa_next;
520 		ifa1 = rtnl_dereference(*ifap);
521 	}
522 
523 	/* Allow any devices that wish to register ifaddr validtors to weigh
524 	 * in now, before changes are committed.  The rntl lock is serializing
525 	 * access here, so the state should not change between a validator call
526 	 * and a final notify on commit.  This isn't invoked on promotion under
527 	 * the assumption that validators are checking the address itself, and
528 	 * not the flags.
529 	 */
530 	ivi.ivi_addr = ifa->ifa_address;
531 	ivi.ivi_dev = ifa->ifa_dev;
532 	ivi.extack = extack;
533 	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
534 					   NETDEV_UP, &ivi);
535 	ret = notifier_to_errno(ret);
536 	if (ret) {
537 		inet_free_ifa(ifa);
538 		return ret;
539 	}
540 
541 	if (!(ifa->ifa_flags & IFA_F_SECONDARY))
542 		ifap = last_primary;
543 
544 	rcu_assign_pointer(ifa->ifa_next, *ifap);
545 	rcu_assign_pointer(*ifap, ifa);
546 
547 	inet_hash_insert(dev_net(in_dev->dev), ifa);
548 
549 	cancel_delayed_work(&check_lifetime_work);
550 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
551 
552 	/* Send message first, then call notifier.
553 	   Notifier will trigger FIB update, so that
554 	   listeners of netlink will know about new ifaddr */
555 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
556 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
557 
558 	return 0;
559 }
560 
561 static int inet_insert_ifa(struct in_ifaddr *ifa)
562 {
563 	return __inet_insert_ifa(ifa, NULL, 0, NULL);
564 }
565 
566 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
567 {
568 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
569 
570 	ASSERT_RTNL();
571 
572 	if (!in_dev) {
573 		inet_free_ifa(ifa);
574 		return -ENOBUFS;
575 	}
576 	ipv4_devconf_setall(in_dev);
577 	neigh_parms_data_state_setall(in_dev->arp_parms);
578 	if (ifa->ifa_dev != in_dev) {
579 		WARN_ON(ifa->ifa_dev);
580 		in_dev_hold(in_dev);
581 		ifa->ifa_dev = in_dev;
582 	}
583 	if (ipv4_is_loopback(ifa->ifa_local))
584 		ifa->ifa_scope = RT_SCOPE_HOST;
585 	return inet_insert_ifa(ifa);
586 }
587 
588 /* Caller must hold RCU or RTNL :
589  * We dont take a reference on found in_device
590  */
591 struct in_device *inetdev_by_index(struct net *net, int ifindex)
592 {
593 	struct net_device *dev;
594 	struct in_device *in_dev = NULL;
595 
596 	rcu_read_lock();
597 	dev = dev_get_by_index_rcu(net, ifindex);
598 	if (dev)
599 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
600 	rcu_read_unlock();
601 	return in_dev;
602 }
603 EXPORT_SYMBOL(inetdev_by_index);
604 
605 /* Called only from RTNL semaphored context. No locks. */
606 
607 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
608 				    __be32 mask)
609 {
610 	struct in_ifaddr *ifa;
611 
612 	ASSERT_RTNL();
613 
614 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
615 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
616 			return ifa;
617 	}
618 	return NULL;
619 }
620 
621 static int ip_mc_autojoin_config(struct net *net, bool join,
622 				 const struct in_ifaddr *ifa)
623 {
624 #if defined(CONFIG_IP_MULTICAST)
625 	struct ip_mreqn mreq = {
626 		.imr_multiaddr.s_addr = ifa->ifa_address,
627 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
628 	};
629 	struct sock *sk = net->ipv4.mc_autojoin_sk;
630 	int ret;
631 
632 	ASSERT_RTNL();
633 
634 	lock_sock(sk);
635 	if (join)
636 		ret = ip_mc_join_group(sk, &mreq);
637 	else
638 		ret = ip_mc_leave_group(sk, &mreq);
639 	release_sock(sk);
640 
641 	return ret;
642 #else
643 	return -EOPNOTSUPP;
644 #endif
645 }
646 
647 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
648 			    struct netlink_ext_ack *extack)
649 {
650 	struct net *net = sock_net(skb->sk);
651 	struct in_ifaddr __rcu **ifap;
652 	struct nlattr *tb[IFA_MAX+1];
653 	struct in_device *in_dev;
654 	struct ifaddrmsg *ifm;
655 	struct in_ifaddr *ifa;
656 	int err;
657 
658 	ASSERT_RTNL();
659 
660 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
661 				     ifa_ipv4_policy, extack);
662 	if (err < 0)
663 		goto errout;
664 
665 	ifm = nlmsg_data(nlh);
666 	in_dev = inetdev_by_index(net, ifm->ifa_index);
667 	if (!in_dev) {
668 		NL_SET_ERR_MSG(extack, "ipv4: Device not found");
669 		err = -ENODEV;
670 		goto errout;
671 	}
672 
673 	for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
674 	     ifap = &ifa->ifa_next) {
675 		if (tb[IFA_LOCAL] &&
676 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
677 			continue;
678 
679 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
680 			continue;
681 
682 		if (tb[IFA_ADDRESS] &&
683 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
684 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
685 			continue;
686 
687 		if (ipv4_is_multicast(ifa->ifa_address))
688 			ip_mc_autojoin_config(net, false, ifa);
689 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
690 		return 0;
691 	}
692 
693 	NL_SET_ERR_MSG(extack, "ipv4: Address not found");
694 	err = -EADDRNOTAVAIL;
695 errout:
696 	return err;
697 }
698 
699 #define INFINITY_LIFE_TIME	0xFFFFFFFF
700 
701 static void check_lifetime(struct work_struct *work)
702 {
703 	unsigned long now, next, next_sec, next_sched;
704 	struct in_ifaddr *ifa;
705 	struct hlist_node *n;
706 	int i;
707 
708 	now = jiffies;
709 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
710 
711 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
712 		bool change_needed = false;
713 
714 		rcu_read_lock();
715 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
716 			unsigned long age, tstamp;
717 			u32 preferred_lft;
718 			u32 valid_lft;
719 			u32 flags;
720 
721 			flags = READ_ONCE(ifa->ifa_flags);
722 			if (flags & IFA_F_PERMANENT)
723 				continue;
724 
725 			preferred_lft = READ_ONCE(ifa->ifa_preferred_lft);
726 			valid_lft = READ_ONCE(ifa->ifa_valid_lft);
727 			tstamp = READ_ONCE(ifa->ifa_tstamp);
728 			/* We try to batch several events at once. */
729 			age = (now - tstamp +
730 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
731 
732 			if (valid_lft != INFINITY_LIFE_TIME &&
733 			    age >= valid_lft) {
734 				change_needed = true;
735 			} else if (preferred_lft ==
736 				   INFINITY_LIFE_TIME) {
737 				continue;
738 			} else if (age >= preferred_lft) {
739 				if (time_before(tstamp + valid_lft * HZ, next))
740 					next = tstamp + valid_lft * HZ;
741 
742 				if (!(flags & IFA_F_DEPRECATED))
743 					change_needed = true;
744 			} else if (time_before(tstamp + preferred_lft * HZ,
745 					       next)) {
746 				next = tstamp + preferred_lft * HZ;
747 			}
748 		}
749 		rcu_read_unlock();
750 		if (!change_needed)
751 			continue;
752 		rtnl_lock();
753 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
754 			unsigned long age;
755 
756 			if (ifa->ifa_flags & IFA_F_PERMANENT)
757 				continue;
758 
759 			/* We try to batch several events at once. */
760 			age = (now - ifa->ifa_tstamp +
761 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
762 
763 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
764 			    age >= ifa->ifa_valid_lft) {
765 				struct in_ifaddr __rcu **ifap;
766 				struct in_ifaddr *tmp;
767 
768 				ifap = &ifa->ifa_dev->ifa_list;
769 				tmp = rtnl_dereference(*ifap);
770 				while (tmp) {
771 					if (tmp == ifa) {
772 						inet_del_ifa(ifa->ifa_dev,
773 							     ifap, 1);
774 						break;
775 					}
776 					ifap = &tmp->ifa_next;
777 					tmp = rtnl_dereference(*ifap);
778 				}
779 			} else if (ifa->ifa_preferred_lft !=
780 				   INFINITY_LIFE_TIME &&
781 				   age >= ifa->ifa_preferred_lft &&
782 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
783 				ifa->ifa_flags |= IFA_F_DEPRECATED;
784 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
785 			}
786 		}
787 		rtnl_unlock();
788 	}
789 
790 	next_sec = round_jiffies_up(next);
791 	next_sched = next;
792 
793 	/* If rounded timeout is accurate enough, accept it. */
794 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
795 		next_sched = next_sec;
796 
797 	now = jiffies;
798 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
799 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
800 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
801 
802 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
803 			next_sched - now);
804 }
805 
806 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
807 			     __u32 prefered_lft)
808 {
809 	unsigned long timeout;
810 	u32 flags;
811 
812 	flags = ifa->ifa_flags & ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
813 
814 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
815 	if (addrconf_finite_timeout(timeout))
816 		WRITE_ONCE(ifa->ifa_valid_lft, timeout);
817 	else
818 		flags |= IFA_F_PERMANENT;
819 
820 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
821 	if (addrconf_finite_timeout(timeout)) {
822 		if (timeout == 0)
823 			flags |= IFA_F_DEPRECATED;
824 		WRITE_ONCE(ifa->ifa_preferred_lft, timeout);
825 	}
826 	WRITE_ONCE(ifa->ifa_flags, flags);
827 	WRITE_ONCE(ifa->ifa_tstamp, jiffies);
828 	if (!ifa->ifa_cstamp)
829 		WRITE_ONCE(ifa->ifa_cstamp, ifa->ifa_tstamp);
830 }
831 
832 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
833 				       __u32 *pvalid_lft, __u32 *pprefered_lft,
834 				       struct netlink_ext_ack *extack)
835 {
836 	struct nlattr *tb[IFA_MAX+1];
837 	struct in_ifaddr *ifa;
838 	struct ifaddrmsg *ifm;
839 	struct net_device *dev;
840 	struct in_device *in_dev;
841 	int err;
842 
843 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
844 				     ifa_ipv4_policy, extack);
845 	if (err < 0)
846 		goto errout;
847 
848 	ifm = nlmsg_data(nlh);
849 	err = -EINVAL;
850 
851 	if (ifm->ifa_prefixlen > 32) {
852 		NL_SET_ERR_MSG(extack, "ipv4: Invalid prefix length");
853 		goto errout;
854 	}
855 
856 	if (!tb[IFA_LOCAL]) {
857 		NL_SET_ERR_MSG(extack, "ipv4: Local address is not supplied");
858 		goto errout;
859 	}
860 
861 	dev = __dev_get_by_index(net, ifm->ifa_index);
862 	err = -ENODEV;
863 	if (!dev) {
864 		NL_SET_ERR_MSG(extack, "ipv4: Device not found");
865 		goto errout;
866 	}
867 
868 	in_dev = __in_dev_get_rtnl(dev);
869 	err = -ENOBUFS;
870 	if (!in_dev)
871 		goto errout;
872 
873 	ifa = inet_alloc_ifa();
874 	if (!ifa)
875 		/*
876 		 * A potential indev allocation can be left alive, it stays
877 		 * assigned to its device and is destroy with it.
878 		 */
879 		goto errout;
880 
881 	ipv4_devconf_setall(in_dev);
882 	neigh_parms_data_state_setall(in_dev->arp_parms);
883 	in_dev_hold(in_dev);
884 
885 	if (!tb[IFA_ADDRESS])
886 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
887 
888 	INIT_HLIST_NODE(&ifa->hash);
889 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
890 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
891 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
892 					 ifm->ifa_flags;
893 	ifa->ifa_scope = ifm->ifa_scope;
894 	ifa->ifa_dev = in_dev;
895 
896 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
897 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
898 
899 	if (tb[IFA_BROADCAST])
900 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
901 
902 	if (tb[IFA_LABEL])
903 		nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
904 	else
905 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
906 
907 	if (tb[IFA_RT_PRIORITY])
908 		ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
909 
910 	if (tb[IFA_PROTO])
911 		ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
912 
913 	if (tb[IFA_CACHEINFO]) {
914 		struct ifa_cacheinfo *ci;
915 
916 		ci = nla_data(tb[IFA_CACHEINFO]);
917 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
918 			NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid");
919 			err = -EINVAL;
920 			goto errout_free;
921 		}
922 		*pvalid_lft = ci->ifa_valid;
923 		*pprefered_lft = ci->ifa_prefered;
924 	}
925 
926 	return ifa;
927 
928 errout_free:
929 	inet_free_ifa(ifa);
930 errout:
931 	return ERR_PTR(err);
932 }
933 
934 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
935 {
936 	struct in_device *in_dev = ifa->ifa_dev;
937 	struct in_ifaddr *ifa1;
938 
939 	if (!ifa->ifa_local)
940 		return NULL;
941 
942 	in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
943 		if (ifa1->ifa_mask == ifa->ifa_mask &&
944 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
945 		    ifa1->ifa_local == ifa->ifa_local)
946 			return ifa1;
947 	}
948 	return NULL;
949 }
950 
951 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
952 			    struct netlink_ext_ack *extack)
953 {
954 	struct net *net = sock_net(skb->sk);
955 	struct in_ifaddr *ifa;
956 	struct in_ifaddr *ifa_existing;
957 	__u32 valid_lft = INFINITY_LIFE_TIME;
958 	__u32 prefered_lft = INFINITY_LIFE_TIME;
959 
960 	ASSERT_RTNL();
961 
962 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
963 	if (IS_ERR(ifa))
964 		return PTR_ERR(ifa);
965 
966 	ifa_existing = find_matching_ifa(ifa);
967 	if (!ifa_existing) {
968 		/* It would be best to check for !NLM_F_CREATE here but
969 		 * userspace already relies on not having to provide this.
970 		 */
971 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
972 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
973 			int ret = ip_mc_autojoin_config(net, true, ifa);
974 
975 			if (ret < 0) {
976 				NL_SET_ERR_MSG(extack, "ipv4: Multicast auto join failed");
977 				inet_free_ifa(ifa);
978 				return ret;
979 			}
980 		}
981 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
982 					 extack);
983 	} else {
984 		u32 new_metric = ifa->ifa_rt_priority;
985 		u8 new_proto = ifa->ifa_proto;
986 
987 		inet_free_ifa(ifa);
988 
989 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
990 		    !(nlh->nlmsg_flags & NLM_F_REPLACE)) {
991 			NL_SET_ERR_MSG(extack, "ipv4: Address already assigned");
992 			return -EEXIST;
993 		}
994 		ifa = ifa_existing;
995 
996 		if (ifa->ifa_rt_priority != new_metric) {
997 			fib_modify_prefix_metric(ifa, new_metric);
998 			ifa->ifa_rt_priority = new_metric;
999 		}
1000 
1001 		ifa->ifa_proto = new_proto;
1002 
1003 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
1004 		cancel_delayed_work(&check_lifetime_work);
1005 		queue_delayed_work(system_power_efficient_wq,
1006 				&check_lifetime_work, 0);
1007 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
1008 	}
1009 	return 0;
1010 }
1011 
1012 /*
1013  *	Determine a default network mask, based on the IP address.
1014  */
1015 
1016 static int inet_abc_len(__be32 addr)
1017 {
1018 	int rc = -1;	/* Something else, probably a multicast. */
1019 
1020 	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
1021 		rc = 0;
1022 	else {
1023 		__u32 haddr = ntohl(addr);
1024 		if (IN_CLASSA(haddr))
1025 			rc = 8;
1026 		else if (IN_CLASSB(haddr))
1027 			rc = 16;
1028 		else if (IN_CLASSC(haddr))
1029 			rc = 24;
1030 		else if (IN_CLASSE(haddr))
1031 			rc = 32;
1032 	}
1033 
1034 	return rc;
1035 }
1036 
1037 
1038 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1039 {
1040 	struct sockaddr_in sin_orig;
1041 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1042 	struct in_ifaddr __rcu **ifap = NULL;
1043 	struct in_device *in_dev;
1044 	struct in_ifaddr *ifa = NULL;
1045 	struct net_device *dev;
1046 	char *colon;
1047 	int ret = -EFAULT;
1048 	int tryaddrmatch = 0;
1049 
1050 	ifr->ifr_name[IFNAMSIZ - 1] = 0;
1051 
1052 	/* save original address for comparison */
1053 	memcpy(&sin_orig, sin, sizeof(*sin));
1054 
1055 	colon = strchr(ifr->ifr_name, ':');
1056 	if (colon)
1057 		*colon = 0;
1058 
1059 	dev_load(net, ifr->ifr_name);
1060 
1061 	switch (cmd) {
1062 	case SIOCGIFADDR:	/* Get interface address */
1063 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1064 	case SIOCGIFDSTADDR:	/* Get the destination address */
1065 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1066 		/* Note that these ioctls will not sleep,
1067 		   so that we do not impose a lock.
1068 		   One day we will be forced to put shlock here (I mean SMP)
1069 		 */
1070 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
1071 		memset(sin, 0, sizeof(*sin));
1072 		sin->sin_family = AF_INET;
1073 		break;
1074 
1075 	case SIOCSIFFLAGS:
1076 		ret = -EPERM;
1077 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1078 			goto out;
1079 		break;
1080 	case SIOCSIFADDR:	/* Set interface address (and family) */
1081 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1082 	case SIOCSIFDSTADDR:	/* Set the destination address */
1083 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1084 		ret = -EPERM;
1085 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1086 			goto out;
1087 		ret = -EINVAL;
1088 		if (sin->sin_family != AF_INET)
1089 			goto out;
1090 		break;
1091 	default:
1092 		ret = -EINVAL;
1093 		goto out;
1094 	}
1095 
1096 	rtnl_lock();
1097 
1098 	ret = -ENODEV;
1099 	dev = __dev_get_by_name(net, ifr->ifr_name);
1100 	if (!dev)
1101 		goto done;
1102 
1103 	if (colon)
1104 		*colon = ':';
1105 
1106 	in_dev = __in_dev_get_rtnl(dev);
1107 	if (in_dev) {
1108 		if (tryaddrmatch) {
1109 			/* Matthias Andree */
1110 			/* compare label and address (4.4BSD style) */
1111 			/* note: we only do this for a limited set of ioctls
1112 			   and only if the original address family was AF_INET.
1113 			   This is checked above. */
1114 
1115 			for (ifap = &in_dev->ifa_list;
1116 			     (ifa = rtnl_dereference(*ifap)) != NULL;
1117 			     ifap = &ifa->ifa_next) {
1118 				if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1119 				    sin_orig.sin_addr.s_addr ==
1120 							ifa->ifa_local) {
1121 					break; /* found */
1122 				}
1123 			}
1124 		}
1125 		/* we didn't get a match, maybe the application is
1126 		   4.3BSD-style and passed in junk so we fall back to
1127 		   comparing just the label */
1128 		if (!ifa) {
1129 			for (ifap = &in_dev->ifa_list;
1130 			     (ifa = rtnl_dereference(*ifap)) != NULL;
1131 			     ifap = &ifa->ifa_next)
1132 				if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1133 					break;
1134 		}
1135 	}
1136 
1137 	ret = -EADDRNOTAVAIL;
1138 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1139 		goto done;
1140 
1141 	switch (cmd) {
1142 	case SIOCGIFADDR:	/* Get interface address */
1143 		ret = 0;
1144 		sin->sin_addr.s_addr = ifa->ifa_local;
1145 		break;
1146 
1147 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1148 		ret = 0;
1149 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1150 		break;
1151 
1152 	case SIOCGIFDSTADDR:	/* Get the destination address */
1153 		ret = 0;
1154 		sin->sin_addr.s_addr = ifa->ifa_address;
1155 		break;
1156 
1157 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1158 		ret = 0;
1159 		sin->sin_addr.s_addr = ifa->ifa_mask;
1160 		break;
1161 
1162 	case SIOCSIFFLAGS:
1163 		if (colon) {
1164 			ret = -EADDRNOTAVAIL;
1165 			if (!ifa)
1166 				break;
1167 			ret = 0;
1168 			if (!(ifr->ifr_flags & IFF_UP))
1169 				inet_del_ifa(in_dev, ifap, 1);
1170 			break;
1171 		}
1172 		ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1173 		break;
1174 
1175 	case SIOCSIFADDR:	/* Set interface address (and family) */
1176 		ret = -EINVAL;
1177 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1178 			break;
1179 
1180 		if (!ifa) {
1181 			ret = -ENOBUFS;
1182 			ifa = inet_alloc_ifa();
1183 			if (!ifa)
1184 				break;
1185 			INIT_HLIST_NODE(&ifa->hash);
1186 			if (colon)
1187 				memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1188 			else
1189 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1190 		} else {
1191 			ret = 0;
1192 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1193 				break;
1194 			inet_del_ifa(in_dev, ifap, 0);
1195 			ifa->ifa_broadcast = 0;
1196 			ifa->ifa_scope = 0;
1197 		}
1198 
1199 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1200 
1201 		if (!(dev->flags & IFF_POINTOPOINT)) {
1202 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1203 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1204 			if ((dev->flags & IFF_BROADCAST) &&
1205 			    ifa->ifa_prefixlen < 31)
1206 				ifa->ifa_broadcast = ifa->ifa_address |
1207 						     ~ifa->ifa_mask;
1208 		} else {
1209 			ifa->ifa_prefixlen = 32;
1210 			ifa->ifa_mask = inet_make_mask(32);
1211 		}
1212 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1213 		ret = inet_set_ifa(dev, ifa);
1214 		break;
1215 
1216 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1217 		ret = 0;
1218 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1219 			inet_del_ifa(in_dev, ifap, 0);
1220 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1221 			inet_insert_ifa(ifa);
1222 		}
1223 		break;
1224 
1225 	case SIOCSIFDSTADDR:	/* Set the destination address */
1226 		ret = 0;
1227 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1228 			break;
1229 		ret = -EINVAL;
1230 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1231 			break;
1232 		ret = 0;
1233 		inet_del_ifa(in_dev, ifap, 0);
1234 		ifa->ifa_address = sin->sin_addr.s_addr;
1235 		inet_insert_ifa(ifa);
1236 		break;
1237 
1238 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1239 
1240 		/*
1241 		 *	The mask we set must be legal.
1242 		 */
1243 		ret = -EINVAL;
1244 		if (bad_mask(sin->sin_addr.s_addr, 0))
1245 			break;
1246 		ret = 0;
1247 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1248 			__be32 old_mask = ifa->ifa_mask;
1249 			inet_del_ifa(in_dev, ifap, 0);
1250 			ifa->ifa_mask = sin->sin_addr.s_addr;
1251 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1252 
1253 			/* See if current broadcast address matches
1254 			 * with current netmask, then recalculate
1255 			 * the broadcast address. Otherwise it's a
1256 			 * funny address, so don't touch it since
1257 			 * the user seems to know what (s)he's doing...
1258 			 */
1259 			if ((dev->flags & IFF_BROADCAST) &&
1260 			    (ifa->ifa_prefixlen < 31) &&
1261 			    (ifa->ifa_broadcast ==
1262 			     (ifa->ifa_local|~old_mask))) {
1263 				ifa->ifa_broadcast = (ifa->ifa_local |
1264 						      ~sin->sin_addr.s_addr);
1265 			}
1266 			inet_insert_ifa(ifa);
1267 		}
1268 		break;
1269 	}
1270 done:
1271 	rtnl_unlock();
1272 out:
1273 	return ret;
1274 }
1275 
1276 int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1277 {
1278 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1279 	const struct in_ifaddr *ifa;
1280 	struct ifreq ifr;
1281 	int done = 0;
1282 
1283 	if (WARN_ON(size > sizeof(struct ifreq)))
1284 		goto out;
1285 
1286 	if (!in_dev)
1287 		goto out;
1288 
1289 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1290 		if (!buf) {
1291 			done += size;
1292 			continue;
1293 		}
1294 		if (len < size)
1295 			break;
1296 		memset(&ifr, 0, sizeof(struct ifreq));
1297 		strcpy(ifr.ifr_name, ifa->ifa_label);
1298 
1299 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1300 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1301 								ifa->ifa_local;
1302 
1303 		if (copy_to_user(buf + done, &ifr, size)) {
1304 			done = -EFAULT;
1305 			break;
1306 		}
1307 		len  -= size;
1308 		done += size;
1309 	}
1310 out:
1311 	return done;
1312 }
1313 
1314 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1315 				 int scope)
1316 {
1317 	const struct in_ifaddr *ifa;
1318 
1319 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1320 		if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
1321 			continue;
1322 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1323 		    ifa->ifa_scope <= scope)
1324 			return ifa->ifa_local;
1325 	}
1326 
1327 	return 0;
1328 }
1329 
1330 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1331 {
1332 	const struct in_ifaddr *ifa;
1333 	__be32 addr = 0;
1334 	unsigned char localnet_scope = RT_SCOPE_HOST;
1335 	struct in_device *in_dev;
1336 	struct net *net = dev_net(dev);
1337 	int master_idx;
1338 
1339 	rcu_read_lock();
1340 	in_dev = __in_dev_get_rcu(dev);
1341 	if (!in_dev)
1342 		goto no_in_dev;
1343 
1344 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1345 		localnet_scope = RT_SCOPE_LINK;
1346 
1347 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1348 		if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
1349 			continue;
1350 		if (min(ifa->ifa_scope, localnet_scope) > scope)
1351 			continue;
1352 		if (!dst || inet_ifa_match(dst, ifa)) {
1353 			addr = ifa->ifa_local;
1354 			break;
1355 		}
1356 		if (!addr)
1357 			addr = ifa->ifa_local;
1358 	}
1359 
1360 	if (addr)
1361 		goto out_unlock;
1362 no_in_dev:
1363 	master_idx = l3mdev_master_ifindex_rcu(dev);
1364 
1365 	/* For VRFs, the VRF device takes the place of the loopback device,
1366 	 * with addresses on it being preferred.  Note in such cases the
1367 	 * loopback device will be among the devices that fail the master_idx
1368 	 * equality check in the loop below.
1369 	 */
1370 	if (master_idx &&
1371 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1372 	    (in_dev = __in_dev_get_rcu(dev))) {
1373 		addr = in_dev_select_addr(in_dev, scope);
1374 		if (addr)
1375 			goto out_unlock;
1376 	}
1377 
1378 	/* Not loopback addresses on loopback should be preferred
1379 	   in this case. It is important that lo is the first interface
1380 	   in dev_base list.
1381 	 */
1382 	for_each_netdev_rcu(net, dev) {
1383 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1384 			continue;
1385 
1386 		in_dev = __in_dev_get_rcu(dev);
1387 		if (!in_dev)
1388 			continue;
1389 
1390 		addr = in_dev_select_addr(in_dev, scope);
1391 		if (addr)
1392 			goto out_unlock;
1393 	}
1394 out_unlock:
1395 	rcu_read_unlock();
1396 	return addr;
1397 }
1398 EXPORT_SYMBOL(inet_select_addr);
1399 
1400 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1401 			      __be32 local, int scope)
1402 {
1403 	unsigned char localnet_scope = RT_SCOPE_HOST;
1404 	const struct in_ifaddr *ifa;
1405 	__be32 addr = 0;
1406 	int same = 0;
1407 
1408 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1409 		localnet_scope = RT_SCOPE_LINK;
1410 
1411 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1412 		unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1413 
1414 		if (!addr &&
1415 		    (local == ifa->ifa_local || !local) &&
1416 		    min_scope <= scope) {
1417 			addr = ifa->ifa_local;
1418 			if (same)
1419 				break;
1420 		}
1421 		if (!same) {
1422 			same = (!local || inet_ifa_match(local, ifa)) &&
1423 				(!dst || inet_ifa_match(dst, ifa));
1424 			if (same && addr) {
1425 				if (local || !dst)
1426 					break;
1427 				/* Is the selected addr into dst subnet? */
1428 				if (inet_ifa_match(addr, ifa))
1429 					break;
1430 				/* No, then can we use new local src? */
1431 				if (min_scope <= scope) {
1432 					addr = ifa->ifa_local;
1433 					break;
1434 				}
1435 				/* search for large dst subnet for addr */
1436 				same = 0;
1437 			}
1438 		}
1439 	}
1440 
1441 	return same ? addr : 0;
1442 }
1443 
1444 /*
1445  * Confirm that local IP address exists using wildcards:
1446  * - net: netns to check, cannot be NULL
1447  * - in_dev: only on this interface, NULL=any interface
1448  * - dst: only in the same subnet as dst, 0=any dst
1449  * - local: address, 0=autoselect the local address
1450  * - scope: maximum allowed scope value for the local address
1451  */
1452 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1453 			 __be32 dst, __be32 local, int scope)
1454 {
1455 	__be32 addr = 0;
1456 	struct net_device *dev;
1457 
1458 	if (in_dev)
1459 		return confirm_addr_indev(in_dev, dst, local, scope);
1460 
1461 	rcu_read_lock();
1462 	for_each_netdev_rcu(net, dev) {
1463 		in_dev = __in_dev_get_rcu(dev);
1464 		if (in_dev) {
1465 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1466 			if (addr)
1467 				break;
1468 		}
1469 	}
1470 	rcu_read_unlock();
1471 
1472 	return addr;
1473 }
1474 EXPORT_SYMBOL(inet_confirm_addr);
1475 
1476 /*
1477  *	Device notifier
1478  */
1479 
1480 int register_inetaddr_notifier(struct notifier_block *nb)
1481 {
1482 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1483 }
1484 EXPORT_SYMBOL(register_inetaddr_notifier);
1485 
1486 int unregister_inetaddr_notifier(struct notifier_block *nb)
1487 {
1488 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1489 }
1490 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1491 
1492 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1493 {
1494 	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1495 }
1496 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1497 
1498 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1499 {
1500 	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1501 	    nb);
1502 }
1503 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1504 
1505 /* Rename ifa_labels for a device name change. Make some effort to preserve
1506  * existing alias numbering and to create unique labels if possible.
1507 */
1508 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1509 {
1510 	struct in_ifaddr *ifa;
1511 	int named = 0;
1512 
1513 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1514 		char old[IFNAMSIZ], *dot;
1515 
1516 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1517 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1518 		if (named++ == 0)
1519 			goto skip;
1520 		dot = strchr(old, ':');
1521 		if (!dot) {
1522 			sprintf(old, ":%d", named);
1523 			dot = old;
1524 		}
1525 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1526 			strcat(ifa->ifa_label, dot);
1527 		else
1528 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1529 skip:
1530 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1531 	}
1532 }
1533 
1534 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1535 					struct in_device *in_dev)
1536 
1537 {
1538 	const struct in_ifaddr *ifa;
1539 
1540 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1541 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1542 			 ifa->ifa_local, dev,
1543 			 ifa->ifa_local, NULL,
1544 			 dev->dev_addr, NULL);
1545 	}
1546 }
1547 
1548 /* Called only under RTNL semaphore */
1549 
1550 static int inetdev_event(struct notifier_block *this, unsigned long event,
1551 			 void *ptr)
1552 {
1553 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1554 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1555 
1556 	ASSERT_RTNL();
1557 
1558 	if (!in_dev) {
1559 		if (event == NETDEV_REGISTER) {
1560 			in_dev = inetdev_init(dev);
1561 			if (IS_ERR(in_dev))
1562 				return notifier_from_errno(PTR_ERR(in_dev));
1563 			if (dev->flags & IFF_LOOPBACK) {
1564 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1565 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1566 			}
1567 		} else if (event == NETDEV_CHANGEMTU) {
1568 			/* Re-enabling IP */
1569 			if (inetdev_valid_mtu(dev->mtu))
1570 				in_dev = inetdev_init(dev);
1571 		}
1572 		goto out;
1573 	}
1574 
1575 	switch (event) {
1576 	case NETDEV_REGISTER:
1577 		pr_debug("%s: bug\n", __func__);
1578 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1579 		break;
1580 	case NETDEV_UP:
1581 		if (!inetdev_valid_mtu(dev->mtu))
1582 			break;
1583 		if (dev->flags & IFF_LOOPBACK) {
1584 			struct in_ifaddr *ifa = inet_alloc_ifa();
1585 
1586 			if (ifa) {
1587 				INIT_HLIST_NODE(&ifa->hash);
1588 				ifa->ifa_local =
1589 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1590 				ifa->ifa_prefixlen = 8;
1591 				ifa->ifa_mask = inet_make_mask(8);
1592 				in_dev_hold(in_dev);
1593 				ifa->ifa_dev = in_dev;
1594 				ifa->ifa_scope = RT_SCOPE_HOST;
1595 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1596 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1597 						 INFINITY_LIFE_TIME);
1598 				ipv4_devconf_setall(in_dev);
1599 				neigh_parms_data_state_setall(in_dev->arp_parms);
1600 				inet_insert_ifa(ifa);
1601 			}
1602 		}
1603 		ip_mc_up(in_dev);
1604 		fallthrough;
1605 	case NETDEV_CHANGEADDR:
1606 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1607 			break;
1608 		fallthrough;
1609 	case NETDEV_NOTIFY_PEERS:
1610 		/* Send gratuitous ARP to notify of link change */
1611 		inetdev_send_gratuitous_arp(dev, in_dev);
1612 		break;
1613 	case NETDEV_DOWN:
1614 		ip_mc_down(in_dev);
1615 		break;
1616 	case NETDEV_PRE_TYPE_CHANGE:
1617 		ip_mc_unmap(in_dev);
1618 		break;
1619 	case NETDEV_POST_TYPE_CHANGE:
1620 		ip_mc_remap(in_dev);
1621 		break;
1622 	case NETDEV_CHANGEMTU:
1623 		if (inetdev_valid_mtu(dev->mtu))
1624 			break;
1625 		/* disable IP when MTU is not enough */
1626 		fallthrough;
1627 	case NETDEV_UNREGISTER:
1628 		inetdev_destroy(in_dev);
1629 		break;
1630 	case NETDEV_CHANGENAME:
1631 		/* Do not notify about label change, this event is
1632 		 * not interesting to applications using netlink.
1633 		 */
1634 		inetdev_changename(dev, in_dev);
1635 
1636 		devinet_sysctl_unregister(in_dev);
1637 		devinet_sysctl_register(in_dev);
1638 		break;
1639 	}
1640 out:
1641 	return NOTIFY_DONE;
1642 }
1643 
1644 static struct notifier_block ip_netdev_notifier = {
1645 	.notifier_call = inetdev_event,
1646 };
1647 
1648 static size_t inet_nlmsg_size(void)
1649 {
1650 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1651 	       + nla_total_size(4) /* IFA_ADDRESS */
1652 	       + nla_total_size(4) /* IFA_LOCAL */
1653 	       + nla_total_size(4) /* IFA_BROADCAST */
1654 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1655 	       + nla_total_size(4)  /* IFA_FLAGS */
1656 	       + nla_total_size(1)  /* IFA_PROTO */
1657 	       + nla_total_size(4)  /* IFA_RT_PRIORITY */
1658 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1659 }
1660 
1661 static inline u32 cstamp_delta(unsigned long cstamp)
1662 {
1663 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1664 }
1665 
1666 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1667 			 unsigned long tstamp, u32 preferred, u32 valid)
1668 {
1669 	struct ifa_cacheinfo ci;
1670 
1671 	ci.cstamp = cstamp_delta(cstamp);
1672 	ci.tstamp = cstamp_delta(tstamp);
1673 	ci.ifa_prefered = preferred;
1674 	ci.ifa_valid = valid;
1675 
1676 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1677 }
1678 
1679 static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa,
1680 			    struct inet_fill_args *args)
1681 {
1682 	struct ifaddrmsg *ifm;
1683 	struct nlmsghdr  *nlh;
1684 	unsigned long tstamp;
1685 	u32 preferred, valid;
1686 
1687 	nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1688 			args->flags);
1689 	if (!nlh)
1690 		return -EMSGSIZE;
1691 
1692 	ifm = nlmsg_data(nlh);
1693 	ifm->ifa_family = AF_INET;
1694 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1695 	ifm->ifa_flags = READ_ONCE(ifa->ifa_flags);
1696 	ifm->ifa_scope = ifa->ifa_scope;
1697 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1698 
1699 	if (args->netnsid >= 0 &&
1700 	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1701 		goto nla_put_failure;
1702 
1703 	tstamp = READ_ONCE(ifa->ifa_tstamp);
1704 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1705 		preferred = READ_ONCE(ifa->ifa_preferred_lft);
1706 		valid = READ_ONCE(ifa->ifa_valid_lft);
1707 		if (preferred != INFINITY_LIFE_TIME) {
1708 			long tval = (jiffies - tstamp) / HZ;
1709 
1710 			if (preferred > tval)
1711 				preferred -= tval;
1712 			else
1713 				preferred = 0;
1714 			if (valid != INFINITY_LIFE_TIME) {
1715 				if (valid > tval)
1716 					valid -= tval;
1717 				else
1718 					valid = 0;
1719 			}
1720 		}
1721 	} else {
1722 		preferred = INFINITY_LIFE_TIME;
1723 		valid = INFINITY_LIFE_TIME;
1724 	}
1725 	if ((ifa->ifa_address &&
1726 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1727 	    (ifa->ifa_local &&
1728 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1729 	    (ifa->ifa_broadcast &&
1730 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1731 	    (ifa->ifa_label[0] &&
1732 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1733 	    (ifa->ifa_proto &&
1734 	     nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
1735 	    nla_put_u32(skb, IFA_FLAGS, ifm->ifa_flags) ||
1736 	    (ifa->ifa_rt_priority &&
1737 	     nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1738 	    put_cacheinfo(skb, READ_ONCE(ifa->ifa_cstamp), tstamp,
1739 			  preferred, valid))
1740 		goto nla_put_failure;
1741 
1742 	nlmsg_end(skb, nlh);
1743 	return 0;
1744 
1745 nla_put_failure:
1746 	nlmsg_cancel(skb, nlh);
1747 	return -EMSGSIZE;
1748 }
1749 
1750 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1751 				      struct inet_fill_args *fillargs,
1752 				      struct net **tgt_net, struct sock *sk,
1753 				      struct netlink_callback *cb)
1754 {
1755 	struct netlink_ext_ack *extack = cb->extack;
1756 	struct nlattr *tb[IFA_MAX+1];
1757 	struct ifaddrmsg *ifm;
1758 	int err, i;
1759 
1760 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1761 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1762 		return -EINVAL;
1763 	}
1764 
1765 	ifm = nlmsg_data(nlh);
1766 	if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1767 		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1768 		return -EINVAL;
1769 	}
1770 
1771 	fillargs->ifindex = ifm->ifa_index;
1772 	if (fillargs->ifindex) {
1773 		cb->answer_flags |= NLM_F_DUMP_FILTERED;
1774 		fillargs->flags |= NLM_F_DUMP_FILTERED;
1775 	}
1776 
1777 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1778 					    ifa_ipv4_policy, extack);
1779 	if (err < 0)
1780 		return err;
1781 
1782 	for (i = 0; i <= IFA_MAX; ++i) {
1783 		if (!tb[i])
1784 			continue;
1785 
1786 		if (i == IFA_TARGET_NETNSID) {
1787 			struct net *net;
1788 
1789 			fillargs->netnsid = nla_get_s32(tb[i]);
1790 
1791 			net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1792 			if (IS_ERR(net)) {
1793 				fillargs->netnsid = -1;
1794 				NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1795 				return PTR_ERR(net);
1796 			}
1797 			*tgt_net = net;
1798 		} else {
1799 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1800 			return -EINVAL;
1801 		}
1802 	}
1803 
1804 	return 0;
1805 }
1806 
1807 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1808 			    struct netlink_callback *cb, int *s_ip_idx,
1809 			    struct inet_fill_args *fillargs)
1810 {
1811 	struct in_ifaddr *ifa;
1812 	int ip_idx = 0;
1813 	int err;
1814 
1815 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1816 		if (ip_idx < *s_ip_idx) {
1817 			ip_idx++;
1818 			continue;
1819 		}
1820 		err = inet_fill_ifaddr(skb, ifa, fillargs);
1821 		if (err < 0)
1822 			goto done;
1823 
1824 		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1825 		ip_idx++;
1826 	}
1827 	err = 0;
1828 	ip_idx = 0;
1829 done:
1830 	*s_ip_idx = ip_idx;
1831 
1832 	return err;
1833 }
1834 
1835 /* Combine dev_addr_genid and dev_base_seq to detect changes.
1836  */
1837 static u32 inet_base_seq(const struct net *net)
1838 {
1839 	u32 res = atomic_read(&net->ipv4.dev_addr_genid) +
1840 		  READ_ONCE(net->dev_base_seq);
1841 
1842 	/* Must not return 0 (see nl_dump_check_consistent()).
1843 	 * Chose a value far away from 0.
1844 	 */
1845 	if (!res)
1846 		res = 0x80000000;
1847 	return res;
1848 }
1849 
1850 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1851 {
1852 	const struct nlmsghdr *nlh = cb->nlh;
1853 	struct inet_fill_args fillargs = {
1854 		.portid = NETLINK_CB(cb->skb).portid,
1855 		.seq = nlh->nlmsg_seq,
1856 		.event = RTM_NEWADDR,
1857 		.flags = NLM_F_MULTI,
1858 		.netnsid = -1,
1859 	};
1860 	struct net *net = sock_net(skb->sk);
1861 	struct net *tgt_net = net;
1862 	struct {
1863 		unsigned long ifindex;
1864 		int ip_idx;
1865 	} *ctx = (void *)cb->ctx;
1866 	struct in_device *in_dev;
1867 	struct net_device *dev;
1868 	int err = 0;
1869 
1870 	rcu_read_lock();
1871 	if (cb->strict_check) {
1872 		err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1873 						 skb->sk, cb);
1874 		if (err < 0)
1875 			goto done;
1876 
1877 		if (fillargs.ifindex) {
1878 			err = -ENODEV;
1879 			dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex);
1880 			if (!dev)
1881 				goto done;
1882 			in_dev = __in_dev_get_rcu(dev);
1883 			if (!in_dev)
1884 				goto done;
1885 			err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
1886 					       &fillargs);
1887 			goto done;
1888 		}
1889 	}
1890 
1891 	cb->seq = inet_base_seq(tgt_net);
1892 
1893 	for_each_netdev_dump(net, dev, ctx->ifindex) {
1894 		in_dev = __in_dev_get_rcu(dev);
1895 		if (!in_dev)
1896 			continue;
1897 		err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
1898 				       &fillargs);
1899 		if (err < 0)
1900 			goto done;
1901 	}
1902 done:
1903 	if (fillargs.netnsid >= 0)
1904 		put_net(tgt_net);
1905 	rcu_read_unlock();
1906 	return err;
1907 }
1908 
1909 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1910 		      u32 portid)
1911 {
1912 	struct inet_fill_args fillargs = {
1913 		.portid = portid,
1914 		.seq = nlh ? nlh->nlmsg_seq : 0,
1915 		.event = event,
1916 		.flags = 0,
1917 		.netnsid = -1,
1918 	};
1919 	struct sk_buff *skb;
1920 	int err = -ENOBUFS;
1921 	struct net *net;
1922 
1923 	net = dev_net(ifa->ifa_dev->dev);
1924 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1925 	if (!skb)
1926 		goto errout;
1927 
1928 	err = inet_fill_ifaddr(skb, ifa, &fillargs);
1929 	if (err < 0) {
1930 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1931 		WARN_ON(err == -EMSGSIZE);
1932 		kfree_skb(skb);
1933 		goto errout;
1934 	}
1935 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1936 	return;
1937 errout:
1938 	if (err < 0)
1939 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1940 }
1941 
1942 static size_t inet_get_link_af_size(const struct net_device *dev,
1943 				    u32 ext_filter_mask)
1944 {
1945 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1946 
1947 	if (!in_dev)
1948 		return 0;
1949 
1950 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1951 }
1952 
1953 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1954 			     u32 ext_filter_mask)
1955 {
1956 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1957 	struct nlattr *nla;
1958 	int i;
1959 
1960 	if (!in_dev)
1961 		return -ENODATA;
1962 
1963 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1964 	if (!nla)
1965 		return -EMSGSIZE;
1966 
1967 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1968 		((u32 *) nla_data(nla))[i] = READ_ONCE(in_dev->cnf.data[i]);
1969 
1970 	return 0;
1971 }
1972 
1973 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1974 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1975 };
1976 
1977 static int inet_validate_link_af(const struct net_device *dev,
1978 				 const struct nlattr *nla,
1979 				 struct netlink_ext_ack *extack)
1980 {
1981 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1982 	int err, rem;
1983 
1984 	if (dev && !__in_dev_get_rtnl(dev))
1985 		return -EAFNOSUPPORT;
1986 
1987 	err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
1988 					  inet_af_policy, extack);
1989 	if (err < 0)
1990 		return err;
1991 
1992 	if (tb[IFLA_INET_CONF]) {
1993 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1994 			int cfgid = nla_type(a);
1995 
1996 			if (nla_len(a) < 4)
1997 				return -EINVAL;
1998 
1999 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
2000 				return -EINVAL;
2001 		}
2002 	}
2003 
2004 	return 0;
2005 }
2006 
2007 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
2008 			    struct netlink_ext_ack *extack)
2009 {
2010 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
2011 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
2012 	int rem;
2013 
2014 	if (!in_dev)
2015 		return -EAFNOSUPPORT;
2016 
2017 	if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
2018 		return -EINVAL;
2019 
2020 	if (tb[IFLA_INET_CONF]) {
2021 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
2022 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
2023 	}
2024 
2025 	return 0;
2026 }
2027 
2028 static int inet_netconf_msgsize_devconf(int type)
2029 {
2030 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
2031 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
2032 	bool all = false;
2033 
2034 	if (type == NETCONFA_ALL)
2035 		all = true;
2036 
2037 	if (all || type == NETCONFA_FORWARDING)
2038 		size += nla_total_size(4);
2039 	if (all || type == NETCONFA_RP_FILTER)
2040 		size += nla_total_size(4);
2041 	if (all || type == NETCONFA_MC_FORWARDING)
2042 		size += nla_total_size(4);
2043 	if (all || type == NETCONFA_BC_FORWARDING)
2044 		size += nla_total_size(4);
2045 	if (all || type == NETCONFA_PROXY_NEIGH)
2046 		size += nla_total_size(4);
2047 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2048 		size += nla_total_size(4);
2049 
2050 	return size;
2051 }
2052 
2053 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2054 				     const struct ipv4_devconf *devconf,
2055 				     u32 portid, u32 seq, int event,
2056 				     unsigned int flags, int type)
2057 {
2058 	struct nlmsghdr  *nlh;
2059 	struct netconfmsg *ncm;
2060 	bool all = false;
2061 
2062 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2063 			flags);
2064 	if (!nlh)
2065 		return -EMSGSIZE;
2066 
2067 	if (type == NETCONFA_ALL)
2068 		all = true;
2069 
2070 	ncm = nlmsg_data(nlh);
2071 	ncm->ncm_family = AF_INET;
2072 
2073 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2074 		goto nla_put_failure;
2075 
2076 	if (!devconf)
2077 		goto out;
2078 
2079 	if ((all || type == NETCONFA_FORWARDING) &&
2080 	    nla_put_s32(skb, NETCONFA_FORWARDING,
2081 			IPV4_DEVCONF_RO(*devconf, FORWARDING)) < 0)
2082 		goto nla_put_failure;
2083 	if ((all || type == NETCONFA_RP_FILTER) &&
2084 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
2085 			IPV4_DEVCONF_RO(*devconf, RP_FILTER)) < 0)
2086 		goto nla_put_failure;
2087 	if ((all || type == NETCONFA_MC_FORWARDING) &&
2088 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2089 			IPV4_DEVCONF_RO(*devconf, MC_FORWARDING)) < 0)
2090 		goto nla_put_failure;
2091 	if ((all || type == NETCONFA_BC_FORWARDING) &&
2092 	    nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2093 			IPV4_DEVCONF_RO(*devconf, BC_FORWARDING)) < 0)
2094 		goto nla_put_failure;
2095 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
2096 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2097 			IPV4_DEVCONF_RO(*devconf, PROXY_ARP)) < 0)
2098 		goto nla_put_failure;
2099 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2100 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2101 			IPV4_DEVCONF_RO(*devconf,
2102 					IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2103 		goto nla_put_failure;
2104 
2105 out:
2106 	nlmsg_end(skb, nlh);
2107 	return 0;
2108 
2109 nla_put_failure:
2110 	nlmsg_cancel(skb, nlh);
2111 	return -EMSGSIZE;
2112 }
2113 
2114 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2115 				 int ifindex, struct ipv4_devconf *devconf)
2116 {
2117 	struct sk_buff *skb;
2118 	int err = -ENOBUFS;
2119 
2120 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2121 	if (!skb)
2122 		goto errout;
2123 
2124 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2125 					event, 0, type);
2126 	if (err < 0) {
2127 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2128 		WARN_ON(err == -EMSGSIZE);
2129 		kfree_skb(skb);
2130 		goto errout;
2131 	}
2132 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2133 	return;
2134 errout:
2135 	if (err < 0)
2136 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2137 }
2138 
2139 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2140 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
2141 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
2142 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
2143 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
2144 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
2145 };
2146 
2147 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2148 				      const struct nlmsghdr *nlh,
2149 				      struct nlattr **tb,
2150 				      struct netlink_ext_ack *extack)
2151 {
2152 	int i, err;
2153 
2154 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2155 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2156 		return -EINVAL;
2157 	}
2158 
2159 	if (!netlink_strict_get_check(skb))
2160 		return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2161 					      tb, NETCONFA_MAX,
2162 					      devconf_ipv4_policy, extack);
2163 
2164 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2165 					    tb, NETCONFA_MAX,
2166 					    devconf_ipv4_policy, extack);
2167 	if (err)
2168 		return err;
2169 
2170 	for (i = 0; i <= NETCONFA_MAX; i++) {
2171 		if (!tb[i])
2172 			continue;
2173 
2174 		switch (i) {
2175 		case NETCONFA_IFINDEX:
2176 			break;
2177 		default:
2178 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2179 			return -EINVAL;
2180 		}
2181 	}
2182 
2183 	return 0;
2184 }
2185 
2186 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2187 				    struct nlmsghdr *nlh,
2188 				    struct netlink_ext_ack *extack)
2189 {
2190 	struct net *net = sock_net(in_skb->sk);
2191 	struct nlattr *tb[NETCONFA_MAX + 1];
2192 	const struct ipv4_devconf *devconf;
2193 	struct in_device *in_dev = NULL;
2194 	struct net_device *dev = NULL;
2195 	struct sk_buff *skb;
2196 	int ifindex;
2197 	int err;
2198 
2199 	err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2200 	if (err)
2201 		return err;
2202 
2203 	if (!tb[NETCONFA_IFINDEX])
2204 		return -EINVAL;
2205 
2206 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2207 	switch (ifindex) {
2208 	case NETCONFA_IFINDEX_ALL:
2209 		devconf = net->ipv4.devconf_all;
2210 		break;
2211 	case NETCONFA_IFINDEX_DEFAULT:
2212 		devconf = net->ipv4.devconf_dflt;
2213 		break;
2214 	default:
2215 		err = -ENODEV;
2216 		dev = dev_get_by_index(net, ifindex);
2217 		if (dev)
2218 			in_dev = in_dev_get(dev);
2219 		if (!in_dev)
2220 			goto errout;
2221 		devconf = &in_dev->cnf;
2222 		break;
2223 	}
2224 
2225 	err = -ENOBUFS;
2226 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2227 	if (!skb)
2228 		goto errout;
2229 
2230 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2231 					NETLINK_CB(in_skb).portid,
2232 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2233 					NETCONFA_ALL);
2234 	if (err < 0) {
2235 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2236 		WARN_ON(err == -EMSGSIZE);
2237 		kfree_skb(skb);
2238 		goto errout;
2239 	}
2240 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2241 errout:
2242 	if (in_dev)
2243 		in_dev_put(in_dev);
2244 	dev_put(dev);
2245 	return err;
2246 }
2247 
2248 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2249 				     struct netlink_callback *cb)
2250 {
2251 	const struct nlmsghdr *nlh = cb->nlh;
2252 	struct net *net = sock_net(skb->sk);
2253 	struct {
2254 		unsigned long ifindex;
2255 		unsigned int all_default;
2256 	} *ctx = (void *)cb->ctx;
2257 	const struct in_device *in_dev;
2258 	struct net_device *dev;
2259 	int err = 0;
2260 
2261 	if (cb->strict_check) {
2262 		struct netlink_ext_ack *extack = cb->extack;
2263 		struct netconfmsg *ncm;
2264 
2265 		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2266 			NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2267 			return -EINVAL;
2268 		}
2269 
2270 		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2271 			NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2272 			return -EINVAL;
2273 		}
2274 	}
2275 
2276 	rcu_read_lock();
2277 	for_each_netdev_dump(net, dev, ctx->ifindex) {
2278 		in_dev = __in_dev_get_rcu(dev);
2279 		if (!in_dev)
2280 			continue;
2281 		err = inet_netconf_fill_devconf(skb, dev->ifindex,
2282 						&in_dev->cnf,
2283 						NETLINK_CB(cb->skb).portid,
2284 						nlh->nlmsg_seq,
2285 						RTM_NEWNETCONF, NLM_F_MULTI,
2286 						NETCONFA_ALL);
2287 		if (err < 0)
2288 			goto done;
2289 	}
2290 	if (ctx->all_default == 0) {
2291 		err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2292 						net->ipv4.devconf_all,
2293 						NETLINK_CB(cb->skb).portid,
2294 						nlh->nlmsg_seq,
2295 						RTM_NEWNETCONF, NLM_F_MULTI,
2296 						NETCONFA_ALL);
2297 		if (err < 0)
2298 			goto done;
2299 		ctx->all_default++;
2300 	}
2301 	if (ctx->all_default == 1) {
2302 		err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2303 						net->ipv4.devconf_dflt,
2304 						NETLINK_CB(cb->skb).portid,
2305 						nlh->nlmsg_seq,
2306 						RTM_NEWNETCONF, NLM_F_MULTI,
2307 						NETCONFA_ALL);
2308 		if (err < 0)
2309 			goto done;
2310 		ctx->all_default++;
2311 	}
2312 done:
2313 	rcu_read_unlock();
2314 	return err;
2315 }
2316 
2317 #ifdef CONFIG_SYSCTL
2318 
2319 static void devinet_copy_dflt_conf(struct net *net, int i)
2320 {
2321 	struct net_device *dev;
2322 
2323 	rcu_read_lock();
2324 	for_each_netdev_rcu(net, dev) {
2325 		struct in_device *in_dev;
2326 
2327 		in_dev = __in_dev_get_rcu(dev);
2328 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2329 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2330 	}
2331 	rcu_read_unlock();
2332 }
2333 
2334 /* called with RTNL locked */
2335 static void inet_forward_change(struct net *net)
2336 {
2337 	struct net_device *dev;
2338 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2339 
2340 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2341 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2342 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2343 				    NETCONFA_FORWARDING,
2344 				    NETCONFA_IFINDEX_ALL,
2345 				    net->ipv4.devconf_all);
2346 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2347 				    NETCONFA_FORWARDING,
2348 				    NETCONFA_IFINDEX_DEFAULT,
2349 				    net->ipv4.devconf_dflt);
2350 
2351 	for_each_netdev(net, dev) {
2352 		struct in_device *in_dev;
2353 
2354 		if (on)
2355 			dev_disable_lro(dev);
2356 
2357 		in_dev = __in_dev_get_rtnl(dev);
2358 		if (in_dev) {
2359 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2360 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2361 						    NETCONFA_FORWARDING,
2362 						    dev->ifindex, &in_dev->cnf);
2363 		}
2364 	}
2365 }
2366 
2367 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2368 {
2369 	if (cnf == net->ipv4.devconf_dflt)
2370 		return NETCONFA_IFINDEX_DEFAULT;
2371 	else if (cnf == net->ipv4.devconf_all)
2372 		return NETCONFA_IFINDEX_ALL;
2373 	else {
2374 		struct in_device *idev
2375 			= container_of(cnf, struct in_device, cnf);
2376 		return idev->dev->ifindex;
2377 	}
2378 }
2379 
2380 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2381 			     void *buffer, size_t *lenp, loff_t *ppos)
2382 {
2383 	int old_value = *(int *)ctl->data;
2384 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2385 	int new_value = *(int *)ctl->data;
2386 
2387 	if (write) {
2388 		struct ipv4_devconf *cnf = ctl->extra1;
2389 		struct net *net = ctl->extra2;
2390 		int i = (int *)ctl->data - cnf->data;
2391 		int ifindex;
2392 
2393 		set_bit(i, cnf->state);
2394 
2395 		if (cnf == net->ipv4.devconf_dflt)
2396 			devinet_copy_dflt_conf(net, i);
2397 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2398 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2399 			if ((new_value == 0) && (old_value != 0))
2400 				rt_cache_flush(net);
2401 
2402 		if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2403 		    new_value != old_value)
2404 			rt_cache_flush(net);
2405 
2406 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2407 		    new_value != old_value) {
2408 			ifindex = devinet_conf_ifindex(net, cnf);
2409 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2410 						    NETCONFA_RP_FILTER,
2411 						    ifindex, cnf);
2412 		}
2413 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2414 		    new_value != old_value) {
2415 			ifindex = devinet_conf_ifindex(net, cnf);
2416 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2417 						    NETCONFA_PROXY_NEIGH,
2418 						    ifindex, cnf);
2419 		}
2420 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2421 		    new_value != old_value) {
2422 			ifindex = devinet_conf_ifindex(net, cnf);
2423 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2424 						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2425 						    ifindex, cnf);
2426 		}
2427 	}
2428 
2429 	return ret;
2430 }
2431 
2432 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2433 				  void *buffer, size_t *lenp, loff_t *ppos)
2434 {
2435 	int *valp = ctl->data;
2436 	int val = *valp;
2437 	loff_t pos = *ppos;
2438 	struct net *net = ctl->extra2;
2439 	int ret;
2440 
2441 	if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2442 		return -EPERM;
2443 
2444 	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2445 
2446 	if (write && *valp != val) {
2447 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2448 			if (!rtnl_trylock()) {
2449 				/* Restore the original values before restarting */
2450 				*valp = val;
2451 				*ppos = pos;
2452 				return restart_syscall();
2453 			}
2454 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2455 				inet_forward_change(net);
2456 			} else {
2457 				struct ipv4_devconf *cnf = ctl->extra1;
2458 				struct in_device *idev =
2459 					container_of(cnf, struct in_device, cnf);
2460 				if (*valp)
2461 					dev_disable_lro(idev->dev);
2462 				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2463 							    NETCONFA_FORWARDING,
2464 							    idev->dev->ifindex,
2465 							    cnf);
2466 			}
2467 			rtnl_unlock();
2468 			rt_cache_flush(net);
2469 		} else
2470 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2471 						    NETCONFA_FORWARDING,
2472 						    NETCONFA_IFINDEX_DEFAULT,
2473 						    net->ipv4.devconf_dflt);
2474 	}
2475 
2476 	return ret;
2477 }
2478 
2479 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2480 				void *buffer, size_t *lenp, loff_t *ppos)
2481 {
2482 	int *valp = ctl->data;
2483 	int val = *valp;
2484 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2485 	struct net *net = ctl->extra2;
2486 
2487 	if (write && *valp != val)
2488 		rt_cache_flush(net);
2489 
2490 	return ret;
2491 }
2492 
2493 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2494 	{ \
2495 		.procname	= name, \
2496 		.data		= ipv4_devconf.data + \
2497 				  IPV4_DEVCONF_ ## attr - 1, \
2498 		.maxlen		= sizeof(int), \
2499 		.mode		= mval, \
2500 		.proc_handler	= proc, \
2501 		.extra1		= &ipv4_devconf, \
2502 	}
2503 
2504 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2505 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2506 
2507 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2508 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2509 
2510 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2511 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2512 
2513 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2514 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2515 
2516 static struct devinet_sysctl_table {
2517 	struct ctl_table_header *sysctl_header;
2518 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2519 } devinet_sysctl = {
2520 	.devinet_vars = {
2521 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2522 					     devinet_sysctl_forward),
2523 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2524 		DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2525 
2526 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2527 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2528 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2529 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2530 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2531 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2532 					"accept_source_route"),
2533 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2534 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2535 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2536 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2537 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2538 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2539 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2540 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2541 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2542 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2543 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2544 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2545 		DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
2546 					"arp_evict_nocarrier"),
2547 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2548 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2549 					"force_igmp_version"),
2550 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2551 					"igmpv2_unsolicited_report_interval"),
2552 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2553 					"igmpv3_unsolicited_report_interval"),
2554 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2555 					"ignore_routes_with_linkdown"),
2556 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2557 					"drop_gratuitous_arp"),
2558 
2559 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2560 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2561 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2562 					      "promote_secondaries"),
2563 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2564 					      "route_localnet"),
2565 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2566 					      "drop_unicast_in_l2_multicast"),
2567 	},
2568 };
2569 
2570 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2571 				     int ifindex, struct ipv4_devconf *p)
2572 {
2573 	int i;
2574 	struct devinet_sysctl_table *t;
2575 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2576 
2577 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
2578 	if (!t)
2579 		goto out;
2580 
2581 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2582 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2583 		t->devinet_vars[i].extra1 = p;
2584 		t->devinet_vars[i].extra2 = net;
2585 	}
2586 
2587 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2588 
2589 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2590 	if (!t->sysctl_header)
2591 		goto free;
2592 
2593 	p->sysctl = t;
2594 
2595 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2596 				    ifindex, p);
2597 	return 0;
2598 
2599 free:
2600 	kfree(t);
2601 out:
2602 	return -ENOMEM;
2603 }
2604 
2605 static void __devinet_sysctl_unregister(struct net *net,
2606 					struct ipv4_devconf *cnf, int ifindex)
2607 {
2608 	struct devinet_sysctl_table *t = cnf->sysctl;
2609 
2610 	if (t) {
2611 		cnf->sysctl = NULL;
2612 		unregister_net_sysctl_table(t->sysctl_header);
2613 		kfree(t);
2614 	}
2615 
2616 	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2617 }
2618 
2619 static int devinet_sysctl_register(struct in_device *idev)
2620 {
2621 	int err;
2622 
2623 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2624 		return -EINVAL;
2625 
2626 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2627 	if (err)
2628 		return err;
2629 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2630 					idev->dev->ifindex, &idev->cnf);
2631 	if (err)
2632 		neigh_sysctl_unregister(idev->arp_parms);
2633 	return err;
2634 }
2635 
2636 static void devinet_sysctl_unregister(struct in_device *idev)
2637 {
2638 	struct net *net = dev_net(idev->dev);
2639 
2640 	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2641 	neigh_sysctl_unregister(idev->arp_parms);
2642 }
2643 
2644 static struct ctl_table ctl_forward_entry[] = {
2645 	{
2646 		.procname	= "ip_forward",
2647 		.data		= &ipv4_devconf.data[
2648 					IPV4_DEVCONF_FORWARDING - 1],
2649 		.maxlen		= sizeof(int),
2650 		.mode		= 0644,
2651 		.proc_handler	= devinet_sysctl_forward,
2652 		.extra1		= &ipv4_devconf,
2653 		.extra2		= &init_net,
2654 	},
2655 	{ },
2656 };
2657 #endif
2658 
2659 static __net_init int devinet_init_net(struct net *net)
2660 {
2661 	int err;
2662 	struct ipv4_devconf *all, *dflt;
2663 #ifdef CONFIG_SYSCTL
2664 	struct ctl_table *tbl;
2665 	struct ctl_table_header *forw_hdr;
2666 #endif
2667 
2668 	err = -ENOMEM;
2669 	all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2670 	if (!all)
2671 		goto err_alloc_all;
2672 
2673 	dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2674 	if (!dflt)
2675 		goto err_alloc_dflt;
2676 
2677 #ifdef CONFIG_SYSCTL
2678 	tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2679 	if (!tbl)
2680 		goto err_alloc_ctl;
2681 
2682 	tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2683 	tbl[0].extra1 = all;
2684 	tbl[0].extra2 = net;
2685 #endif
2686 
2687 	if (!net_eq(net, &init_net)) {
2688 		switch (net_inherit_devconf()) {
2689 		case 3:
2690 			/* copy from the current netns */
2691 			memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
2692 			       sizeof(ipv4_devconf));
2693 			memcpy(dflt,
2694 			       current->nsproxy->net_ns->ipv4.devconf_dflt,
2695 			       sizeof(ipv4_devconf_dflt));
2696 			break;
2697 		case 0:
2698 		case 1:
2699 			/* copy from init_net */
2700 			memcpy(all, init_net.ipv4.devconf_all,
2701 			       sizeof(ipv4_devconf));
2702 			memcpy(dflt, init_net.ipv4.devconf_dflt,
2703 			       sizeof(ipv4_devconf_dflt));
2704 			break;
2705 		case 2:
2706 			/* use compiled values */
2707 			break;
2708 		}
2709 	}
2710 
2711 #ifdef CONFIG_SYSCTL
2712 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2713 	if (err < 0)
2714 		goto err_reg_all;
2715 
2716 	err = __devinet_sysctl_register(net, "default",
2717 					NETCONFA_IFINDEX_DEFAULT, dflt);
2718 	if (err < 0)
2719 		goto err_reg_dflt;
2720 
2721 	err = -ENOMEM;
2722 	forw_hdr = register_net_sysctl_sz(net, "net/ipv4", tbl,
2723 					  ARRAY_SIZE(ctl_forward_entry));
2724 	if (!forw_hdr)
2725 		goto err_reg_ctl;
2726 	net->ipv4.forw_hdr = forw_hdr;
2727 #endif
2728 
2729 	net->ipv4.devconf_all = all;
2730 	net->ipv4.devconf_dflt = dflt;
2731 	return 0;
2732 
2733 #ifdef CONFIG_SYSCTL
2734 err_reg_ctl:
2735 	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2736 err_reg_dflt:
2737 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2738 err_reg_all:
2739 	kfree(tbl);
2740 err_alloc_ctl:
2741 #endif
2742 	kfree(dflt);
2743 err_alloc_dflt:
2744 	kfree(all);
2745 err_alloc_all:
2746 	return err;
2747 }
2748 
2749 static __net_exit void devinet_exit_net(struct net *net)
2750 {
2751 #ifdef CONFIG_SYSCTL
2752 	struct ctl_table *tbl;
2753 
2754 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2755 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2756 	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2757 				    NETCONFA_IFINDEX_DEFAULT);
2758 	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2759 				    NETCONFA_IFINDEX_ALL);
2760 	kfree(tbl);
2761 #endif
2762 	kfree(net->ipv4.devconf_dflt);
2763 	kfree(net->ipv4.devconf_all);
2764 }
2765 
2766 static __net_initdata struct pernet_operations devinet_ops = {
2767 	.init = devinet_init_net,
2768 	.exit = devinet_exit_net,
2769 };
2770 
2771 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2772 	.family		  = AF_INET,
2773 	.fill_link_af	  = inet_fill_link_af,
2774 	.get_link_af_size = inet_get_link_af_size,
2775 	.validate_link_af = inet_validate_link_af,
2776 	.set_link_af	  = inet_set_link_af,
2777 };
2778 
2779 void __init devinet_init(void)
2780 {
2781 	int i;
2782 
2783 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2784 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2785 
2786 	register_pernet_subsys(&devinet_ops);
2787 	register_netdevice_notifier(&ip_netdev_notifier);
2788 
2789 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2790 
2791 	rtnl_af_register(&inet_af_ops);
2792 
2793 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2794 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2795 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr,
2796 		      RTNL_FLAG_DUMP_UNLOCKED);
2797 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2798 		      inet_netconf_dump_devconf,
2799 		      RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED);
2800 }
2801