xref: /linux/net/ipv4/devinet.c (revision 9112fc0109fc0037ac3b8b633a169e78b4e23ca1)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	NET3	IP device support routines.
4  *
5  *	Derived from the IP parts of dev.c 1.0.19
6  * 		Authors:	Ross Biro
7  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
9  *
10  *	Additional Authors:
11  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
12  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13  *
14  *	Changes:
15  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
16  *					lists.
17  *		Cyrus Durgin:		updated for kmod
18  *		Matthias Andree:	in devinet_ioctl, compare label and
19  *					address (4.4BSD alias style support),
20  *					fall back to comparing just the label
21  *					if no match found.
22  */
23 
24 
25 #include <linux/uaccess.h>
26 #include <linux/bitops.h>
27 #include <linux/capability.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/mm.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/in.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/if_addr.h>
40 #include <linux/if_ether.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/etherdevice.h>
44 #include <linux/skbuff.h>
45 #include <linux/init.h>
46 #include <linux/notifier.h>
47 #include <linux/inetdevice.h>
48 #include <linux/igmp.h>
49 #include <linux/slab.h>
50 #include <linux/hash.h>
51 #ifdef CONFIG_SYSCTL
52 #include <linux/sysctl.h>
53 #endif
54 #include <linux/kmod.h>
55 #include <linux/netconf.h>
56 
57 #include <net/arp.h>
58 #include <net/ip.h>
59 #include <net/route.h>
60 #include <net/ip_fib.h>
61 #include <net/rtnetlink.h>
62 #include <net/net_namespace.h>
63 #include <net/addrconf.h>
64 
65 #define IPV6ONLY_FLAGS	\
66 		(IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
67 		 IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
68 		 IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
69 
70 static struct ipv4_devconf ipv4_devconf = {
71 	.data = {
72 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78 		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
79 	},
80 };
81 
82 static struct ipv4_devconf ipv4_devconf_dflt = {
83 	.data = {
84 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
86 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
87 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
88 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
90 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
91 		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
92 	},
93 };
94 
95 #define IPV4_DEVCONF_DFLT(net, attr) \
96 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
97 
98 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
99 	[IFA_LOCAL]     	= { .type = NLA_U32 },
100 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
101 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
102 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
103 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
104 	[IFA_FLAGS]		= { .type = NLA_U32 },
105 	[IFA_RT_PRIORITY]	= { .type = NLA_U32 },
106 	[IFA_TARGET_NETNSID]	= { .type = NLA_S32 },
107 	[IFA_PROTO]		= { .type = NLA_U8 },
108 };
109 
110 struct inet_fill_args {
111 	u32 portid;
112 	u32 seq;
113 	int event;
114 	unsigned int flags;
115 	int netnsid;
116 	int ifindex;
117 };
118 
119 #define IN4_ADDR_HSIZE_SHIFT	8
120 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
121 
122 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
123 
124 static u32 inet_addr_hash(const struct net *net, __be32 addr)
125 {
126 	u32 val = (__force u32) addr ^ net_hash_mix(net);
127 
128 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
129 }
130 
131 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
132 {
133 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
134 
135 	ASSERT_RTNL();
136 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
137 }
138 
139 static void inet_hash_remove(struct in_ifaddr *ifa)
140 {
141 	ASSERT_RTNL();
142 	hlist_del_init_rcu(&ifa->hash);
143 }
144 
145 /**
146  * __ip_dev_find - find the first device with a given source address.
147  * @net: the net namespace
148  * @addr: the source address
149  * @devref: if true, take a reference on the found device
150  *
151  * If a caller uses devref=false, it should be protected by RCU, or RTNL
152  */
153 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
154 {
155 	struct net_device *result = NULL;
156 	struct in_ifaddr *ifa;
157 
158 	rcu_read_lock();
159 	ifa = inet_lookup_ifaddr_rcu(net, addr);
160 	if (!ifa) {
161 		struct flowi4 fl4 = { .daddr = addr };
162 		struct fib_result res = { 0 };
163 		struct fib_table *local;
164 
165 		/* Fallback to FIB local table so that communication
166 		 * over loopback subnets work.
167 		 */
168 		local = fib_get_table(net, RT_TABLE_LOCAL);
169 		if (local &&
170 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
171 		    res.type == RTN_LOCAL)
172 			result = FIB_RES_DEV(res);
173 	} else {
174 		result = ifa->ifa_dev->dev;
175 	}
176 	if (result && devref)
177 		dev_hold(result);
178 	rcu_read_unlock();
179 	return result;
180 }
181 EXPORT_SYMBOL(__ip_dev_find);
182 
183 /* called under RCU lock */
184 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
185 {
186 	u32 hash = inet_addr_hash(net, addr);
187 	struct in_ifaddr *ifa;
188 
189 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
190 		if (ifa->ifa_local == addr &&
191 		    net_eq(dev_net(ifa->ifa_dev->dev), net))
192 			return ifa;
193 
194 	return NULL;
195 }
196 
197 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
198 
199 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
200 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
201 static void inet_del_ifa(struct in_device *in_dev,
202 			 struct in_ifaddr __rcu **ifap,
203 			 int destroy);
204 #ifdef CONFIG_SYSCTL
205 static int devinet_sysctl_register(struct in_device *idev);
206 static void devinet_sysctl_unregister(struct in_device *idev);
207 #else
208 static int devinet_sysctl_register(struct in_device *idev)
209 {
210 	return 0;
211 }
212 static void devinet_sysctl_unregister(struct in_device *idev)
213 {
214 }
215 #endif
216 
217 /* Locks all the inet devices. */
218 
219 static struct in_ifaddr *inet_alloc_ifa(void)
220 {
221 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
222 }
223 
224 static void inet_rcu_free_ifa(struct rcu_head *head)
225 {
226 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
227 	if (ifa->ifa_dev)
228 		in_dev_put(ifa->ifa_dev);
229 	kfree(ifa);
230 }
231 
232 static void inet_free_ifa(struct in_ifaddr *ifa)
233 {
234 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
235 }
236 
237 static void in_dev_free_rcu(struct rcu_head *head)
238 {
239 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
240 
241 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
242 	kfree(idev);
243 }
244 
245 void in_dev_finish_destroy(struct in_device *idev)
246 {
247 	struct net_device *dev = idev->dev;
248 
249 	WARN_ON(idev->ifa_list);
250 	WARN_ON(idev->mc_list);
251 #ifdef NET_REFCNT_DEBUG
252 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
253 #endif
254 	netdev_put(dev, &idev->dev_tracker);
255 	if (!idev->dead)
256 		pr_err("Freeing alive in_device %p\n", idev);
257 	else
258 		call_rcu(&idev->rcu_head, in_dev_free_rcu);
259 }
260 EXPORT_SYMBOL(in_dev_finish_destroy);
261 
262 static struct in_device *inetdev_init(struct net_device *dev)
263 {
264 	struct in_device *in_dev;
265 	int err = -ENOMEM;
266 
267 	ASSERT_RTNL();
268 
269 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
270 	if (!in_dev)
271 		goto out;
272 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
273 			sizeof(in_dev->cnf));
274 	in_dev->cnf.sysctl = NULL;
275 	in_dev->dev = dev;
276 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
277 	if (!in_dev->arp_parms)
278 		goto out_kfree;
279 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
280 		dev_disable_lro(dev);
281 	/* Reference in_dev->dev */
282 	netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
283 	/* Account for reference dev->ip_ptr (below) */
284 	refcount_set(&in_dev->refcnt, 1);
285 
286 	err = devinet_sysctl_register(in_dev);
287 	if (err) {
288 		in_dev->dead = 1;
289 		neigh_parms_release(&arp_tbl, in_dev->arp_parms);
290 		in_dev_put(in_dev);
291 		in_dev = NULL;
292 		goto out;
293 	}
294 	ip_mc_init_dev(in_dev);
295 	if (dev->flags & IFF_UP)
296 		ip_mc_up(in_dev);
297 
298 	/* we can receive as soon as ip_ptr is set -- do this last */
299 	rcu_assign_pointer(dev->ip_ptr, in_dev);
300 out:
301 	return in_dev ?: ERR_PTR(err);
302 out_kfree:
303 	kfree(in_dev);
304 	in_dev = NULL;
305 	goto out;
306 }
307 
308 static void inetdev_destroy(struct in_device *in_dev)
309 {
310 	struct net_device *dev;
311 	struct in_ifaddr *ifa;
312 
313 	ASSERT_RTNL();
314 
315 	dev = in_dev->dev;
316 
317 	in_dev->dead = 1;
318 
319 	ip_mc_destroy_dev(in_dev);
320 
321 	while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
322 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
323 		inet_free_ifa(ifa);
324 	}
325 
326 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
327 
328 	devinet_sysctl_unregister(in_dev);
329 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
330 	arp_ifdown(dev);
331 
332 	in_dev_put(in_dev);
333 }
334 
335 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
336 {
337 	const struct in_ifaddr *ifa;
338 
339 	rcu_read_lock();
340 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
341 		if (inet_ifa_match(a, ifa)) {
342 			if (!b || inet_ifa_match(b, ifa)) {
343 				rcu_read_unlock();
344 				return 1;
345 			}
346 		}
347 	}
348 	rcu_read_unlock();
349 	return 0;
350 }
351 
352 static void __inet_del_ifa(struct in_device *in_dev,
353 			   struct in_ifaddr __rcu **ifap,
354 			   int destroy, struct nlmsghdr *nlh, u32 portid)
355 {
356 	struct in_ifaddr *promote = NULL;
357 	struct in_ifaddr *ifa, *ifa1;
358 	struct in_ifaddr __rcu **last_prim;
359 	struct in_ifaddr *prev_prom = NULL;
360 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
361 
362 	ASSERT_RTNL();
363 
364 	ifa1 = rtnl_dereference(*ifap);
365 	last_prim = ifap;
366 	if (in_dev->dead)
367 		goto no_promotions;
368 
369 	/* 1. Deleting primary ifaddr forces deletion all secondaries
370 	 * unless alias promotion is set
371 	 **/
372 
373 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
374 		struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
375 
376 		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
377 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
378 			    ifa1->ifa_scope <= ifa->ifa_scope)
379 				last_prim = &ifa->ifa_next;
380 
381 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
382 			    ifa1->ifa_mask != ifa->ifa_mask ||
383 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
384 				ifap1 = &ifa->ifa_next;
385 				prev_prom = ifa;
386 				continue;
387 			}
388 
389 			if (!do_promote) {
390 				inet_hash_remove(ifa);
391 				*ifap1 = ifa->ifa_next;
392 
393 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
394 				blocking_notifier_call_chain(&inetaddr_chain,
395 						NETDEV_DOWN, ifa);
396 				inet_free_ifa(ifa);
397 			} else {
398 				promote = ifa;
399 				break;
400 			}
401 		}
402 	}
403 
404 	/* On promotion all secondaries from subnet are changing
405 	 * the primary IP, we must remove all their routes silently
406 	 * and later to add them back with new prefsrc. Do this
407 	 * while all addresses are on the device list.
408 	 */
409 	for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
410 		if (ifa1->ifa_mask == ifa->ifa_mask &&
411 		    inet_ifa_match(ifa1->ifa_address, ifa))
412 			fib_del_ifaddr(ifa, ifa1);
413 	}
414 
415 no_promotions:
416 	/* 2. Unlink it */
417 
418 	*ifap = ifa1->ifa_next;
419 	inet_hash_remove(ifa1);
420 
421 	/* 3. Announce address deletion */
422 
423 	/* Send message first, then call notifier.
424 	   At first sight, FIB update triggered by notifier
425 	   will refer to already deleted ifaddr, that could confuse
426 	   netlink listeners. It is not true: look, gated sees
427 	   that route deleted and if it still thinks that ifaddr
428 	   is valid, it will try to restore deleted routes... Grr.
429 	   So that, this order is correct.
430 	 */
431 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
432 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
433 
434 	if (promote) {
435 		struct in_ifaddr *next_sec;
436 
437 		next_sec = rtnl_dereference(promote->ifa_next);
438 		if (prev_prom) {
439 			struct in_ifaddr *last_sec;
440 
441 			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
442 
443 			last_sec = rtnl_dereference(*last_prim);
444 			rcu_assign_pointer(promote->ifa_next, last_sec);
445 			rcu_assign_pointer(*last_prim, promote);
446 		}
447 
448 		promote->ifa_flags &= ~IFA_F_SECONDARY;
449 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
450 		blocking_notifier_call_chain(&inetaddr_chain,
451 				NETDEV_UP, promote);
452 		for (ifa = next_sec; ifa;
453 		     ifa = rtnl_dereference(ifa->ifa_next)) {
454 			if (ifa1->ifa_mask != ifa->ifa_mask ||
455 			    !inet_ifa_match(ifa1->ifa_address, ifa))
456 					continue;
457 			fib_add_ifaddr(ifa);
458 		}
459 
460 	}
461 	if (destroy)
462 		inet_free_ifa(ifa1);
463 }
464 
465 static void inet_del_ifa(struct in_device *in_dev,
466 			 struct in_ifaddr __rcu **ifap,
467 			 int destroy)
468 {
469 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
470 }
471 
472 static void check_lifetime(struct work_struct *work);
473 
474 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
475 
476 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
477 			     u32 portid, struct netlink_ext_ack *extack)
478 {
479 	struct in_ifaddr __rcu **last_primary, **ifap;
480 	struct in_device *in_dev = ifa->ifa_dev;
481 	struct in_validator_info ivi;
482 	struct in_ifaddr *ifa1;
483 	int ret;
484 
485 	ASSERT_RTNL();
486 
487 	if (!ifa->ifa_local) {
488 		inet_free_ifa(ifa);
489 		return 0;
490 	}
491 
492 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
493 	last_primary = &in_dev->ifa_list;
494 
495 	/* Don't set IPv6 only flags to IPv4 addresses */
496 	ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
497 
498 	ifap = &in_dev->ifa_list;
499 	ifa1 = rtnl_dereference(*ifap);
500 
501 	while (ifa1) {
502 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
503 		    ifa->ifa_scope <= ifa1->ifa_scope)
504 			last_primary = &ifa1->ifa_next;
505 		if (ifa1->ifa_mask == ifa->ifa_mask &&
506 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
507 			if (ifa1->ifa_local == ifa->ifa_local) {
508 				inet_free_ifa(ifa);
509 				return -EEXIST;
510 			}
511 			if (ifa1->ifa_scope != ifa->ifa_scope) {
512 				NL_SET_ERR_MSG(extack, "ipv4: Invalid scope value");
513 				inet_free_ifa(ifa);
514 				return -EINVAL;
515 			}
516 			ifa->ifa_flags |= IFA_F_SECONDARY;
517 		}
518 
519 		ifap = &ifa1->ifa_next;
520 		ifa1 = rtnl_dereference(*ifap);
521 	}
522 
523 	/* Allow any devices that wish to register ifaddr validtors to weigh
524 	 * in now, before changes are committed.  The rntl lock is serializing
525 	 * access here, so the state should not change between a validator call
526 	 * and a final notify on commit.  This isn't invoked on promotion under
527 	 * the assumption that validators are checking the address itself, and
528 	 * not the flags.
529 	 */
530 	ivi.ivi_addr = ifa->ifa_address;
531 	ivi.ivi_dev = ifa->ifa_dev;
532 	ivi.extack = extack;
533 	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
534 					   NETDEV_UP, &ivi);
535 	ret = notifier_to_errno(ret);
536 	if (ret) {
537 		inet_free_ifa(ifa);
538 		return ret;
539 	}
540 
541 	if (!(ifa->ifa_flags & IFA_F_SECONDARY))
542 		ifap = last_primary;
543 
544 	rcu_assign_pointer(ifa->ifa_next, *ifap);
545 	rcu_assign_pointer(*ifap, ifa);
546 
547 	inet_hash_insert(dev_net(in_dev->dev), ifa);
548 
549 	cancel_delayed_work(&check_lifetime_work);
550 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
551 
552 	/* Send message first, then call notifier.
553 	   Notifier will trigger FIB update, so that
554 	   listeners of netlink will know about new ifaddr */
555 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
556 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
557 
558 	return 0;
559 }
560 
561 static int inet_insert_ifa(struct in_ifaddr *ifa)
562 {
563 	return __inet_insert_ifa(ifa, NULL, 0, NULL);
564 }
565 
566 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
567 {
568 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
569 
570 	ASSERT_RTNL();
571 
572 	if (!in_dev) {
573 		inet_free_ifa(ifa);
574 		return -ENOBUFS;
575 	}
576 	ipv4_devconf_setall(in_dev);
577 	neigh_parms_data_state_setall(in_dev->arp_parms);
578 	if (ifa->ifa_dev != in_dev) {
579 		WARN_ON(ifa->ifa_dev);
580 		in_dev_hold(in_dev);
581 		ifa->ifa_dev = in_dev;
582 	}
583 	if (ipv4_is_loopback(ifa->ifa_local))
584 		ifa->ifa_scope = RT_SCOPE_HOST;
585 	return inet_insert_ifa(ifa);
586 }
587 
588 /* Caller must hold RCU or RTNL :
589  * We dont take a reference on found in_device
590  */
591 struct in_device *inetdev_by_index(struct net *net, int ifindex)
592 {
593 	struct net_device *dev;
594 	struct in_device *in_dev = NULL;
595 
596 	rcu_read_lock();
597 	dev = dev_get_by_index_rcu(net, ifindex);
598 	if (dev)
599 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
600 	rcu_read_unlock();
601 	return in_dev;
602 }
603 EXPORT_SYMBOL(inetdev_by_index);
604 
605 /* Called only from RTNL semaphored context. No locks. */
606 
607 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
608 				    __be32 mask)
609 {
610 	struct in_ifaddr *ifa;
611 
612 	ASSERT_RTNL();
613 
614 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
615 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
616 			return ifa;
617 	}
618 	return NULL;
619 }
620 
621 static int ip_mc_autojoin_config(struct net *net, bool join,
622 				 const struct in_ifaddr *ifa)
623 {
624 #if defined(CONFIG_IP_MULTICAST)
625 	struct ip_mreqn mreq = {
626 		.imr_multiaddr.s_addr = ifa->ifa_address,
627 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
628 	};
629 	struct sock *sk = net->ipv4.mc_autojoin_sk;
630 	int ret;
631 
632 	ASSERT_RTNL();
633 
634 	lock_sock(sk);
635 	if (join)
636 		ret = ip_mc_join_group(sk, &mreq);
637 	else
638 		ret = ip_mc_leave_group(sk, &mreq);
639 	release_sock(sk);
640 
641 	return ret;
642 #else
643 	return -EOPNOTSUPP;
644 #endif
645 }
646 
647 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
648 			    struct netlink_ext_ack *extack)
649 {
650 	struct net *net = sock_net(skb->sk);
651 	struct in_ifaddr __rcu **ifap;
652 	struct nlattr *tb[IFA_MAX+1];
653 	struct in_device *in_dev;
654 	struct ifaddrmsg *ifm;
655 	struct in_ifaddr *ifa;
656 	int err;
657 
658 	ASSERT_RTNL();
659 
660 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
661 				     ifa_ipv4_policy, extack);
662 	if (err < 0)
663 		goto errout;
664 
665 	ifm = nlmsg_data(nlh);
666 	in_dev = inetdev_by_index(net, ifm->ifa_index);
667 	if (!in_dev) {
668 		NL_SET_ERR_MSG(extack, "ipv4: Device not found");
669 		err = -ENODEV;
670 		goto errout;
671 	}
672 
673 	for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
674 	     ifap = &ifa->ifa_next) {
675 		if (tb[IFA_LOCAL] &&
676 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
677 			continue;
678 
679 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
680 			continue;
681 
682 		if (tb[IFA_ADDRESS] &&
683 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
684 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
685 			continue;
686 
687 		if (ipv4_is_multicast(ifa->ifa_address))
688 			ip_mc_autojoin_config(net, false, ifa);
689 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
690 		return 0;
691 	}
692 
693 	NL_SET_ERR_MSG(extack, "ipv4: Address not found");
694 	err = -EADDRNOTAVAIL;
695 errout:
696 	return err;
697 }
698 
699 #define INFINITY_LIFE_TIME	0xFFFFFFFF
700 
701 static void check_lifetime(struct work_struct *work)
702 {
703 	unsigned long now, next, next_sec, next_sched;
704 	struct in_ifaddr *ifa;
705 	struct hlist_node *n;
706 	int i;
707 
708 	now = jiffies;
709 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
710 
711 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
712 		bool change_needed = false;
713 
714 		rcu_read_lock();
715 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
716 			unsigned long age;
717 
718 			if (ifa->ifa_flags & IFA_F_PERMANENT)
719 				continue;
720 
721 			/* We try to batch several events at once. */
722 			age = (now - ifa->ifa_tstamp +
723 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
724 
725 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
726 			    age >= ifa->ifa_valid_lft) {
727 				change_needed = true;
728 			} else if (ifa->ifa_preferred_lft ==
729 				   INFINITY_LIFE_TIME) {
730 				continue;
731 			} else if (age >= ifa->ifa_preferred_lft) {
732 				if (time_before(ifa->ifa_tstamp +
733 						ifa->ifa_valid_lft * HZ, next))
734 					next = ifa->ifa_tstamp +
735 					       ifa->ifa_valid_lft * HZ;
736 
737 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
738 					change_needed = true;
739 			} else if (time_before(ifa->ifa_tstamp +
740 					       ifa->ifa_preferred_lft * HZ,
741 					       next)) {
742 				next = ifa->ifa_tstamp +
743 				       ifa->ifa_preferred_lft * HZ;
744 			}
745 		}
746 		rcu_read_unlock();
747 		if (!change_needed)
748 			continue;
749 		rtnl_lock();
750 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
751 			unsigned long age;
752 
753 			if (ifa->ifa_flags & IFA_F_PERMANENT)
754 				continue;
755 
756 			/* We try to batch several events at once. */
757 			age = (now - ifa->ifa_tstamp +
758 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
759 
760 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
761 			    age >= ifa->ifa_valid_lft) {
762 				struct in_ifaddr __rcu **ifap;
763 				struct in_ifaddr *tmp;
764 
765 				ifap = &ifa->ifa_dev->ifa_list;
766 				tmp = rtnl_dereference(*ifap);
767 				while (tmp) {
768 					if (tmp == ifa) {
769 						inet_del_ifa(ifa->ifa_dev,
770 							     ifap, 1);
771 						break;
772 					}
773 					ifap = &tmp->ifa_next;
774 					tmp = rtnl_dereference(*ifap);
775 				}
776 			} else if (ifa->ifa_preferred_lft !=
777 				   INFINITY_LIFE_TIME &&
778 				   age >= ifa->ifa_preferred_lft &&
779 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
780 				ifa->ifa_flags |= IFA_F_DEPRECATED;
781 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
782 			}
783 		}
784 		rtnl_unlock();
785 	}
786 
787 	next_sec = round_jiffies_up(next);
788 	next_sched = next;
789 
790 	/* If rounded timeout is accurate enough, accept it. */
791 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
792 		next_sched = next_sec;
793 
794 	now = jiffies;
795 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
796 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
797 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
798 
799 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
800 			next_sched - now);
801 }
802 
803 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
804 			     __u32 prefered_lft)
805 {
806 	unsigned long timeout;
807 
808 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
809 
810 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
811 	if (addrconf_finite_timeout(timeout))
812 		ifa->ifa_valid_lft = timeout;
813 	else
814 		ifa->ifa_flags |= IFA_F_PERMANENT;
815 
816 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
817 	if (addrconf_finite_timeout(timeout)) {
818 		if (timeout == 0)
819 			ifa->ifa_flags |= IFA_F_DEPRECATED;
820 		ifa->ifa_preferred_lft = timeout;
821 	}
822 	ifa->ifa_tstamp = jiffies;
823 	if (!ifa->ifa_cstamp)
824 		ifa->ifa_cstamp = ifa->ifa_tstamp;
825 }
826 
827 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
828 				       __u32 *pvalid_lft, __u32 *pprefered_lft,
829 				       struct netlink_ext_ack *extack)
830 {
831 	struct nlattr *tb[IFA_MAX+1];
832 	struct in_ifaddr *ifa;
833 	struct ifaddrmsg *ifm;
834 	struct net_device *dev;
835 	struct in_device *in_dev;
836 	int err;
837 
838 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
839 				     ifa_ipv4_policy, extack);
840 	if (err < 0)
841 		goto errout;
842 
843 	ifm = nlmsg_data(nlh);
844 	err = -EINVAL;
845 
846 	if (ifm->ifa_prefixlen > 32) {
847 		NL_SET_ERR_MSG(extack, "ipv4: Invalid prefix length");
848 		goto errout;
849 	}
850 
851 	if (!tb[IFA_LOCAL]) {
852 		NL_SET_ERR_MSG(extack, "ipv4: Local address is not supplied");
853 		goto errout;
854 	}
855 
856 	dev = __dev_get_by_index(net, ifm->ifa_index);
857 	err = -ENODEV;
858 	if (!dev) {
859 		NL_SET_ERR_MSG(extack, "ipv4: Device not found");
860 		goto errout;
861 	}
862 
863 	in_dev = __in_dev_get_rtnl(dev);
864 	err = -ENOBUFS;
865 	if (!in_dev)
866 		goto errout;
867 
868 	ifa = inet_alloc_ifa();
869 	if (!ifa)
870 		/*
871 		 * A potential indev allocation can be left alive, it stays
872 		 * assigned to its device and is destroy with it.
873 		 */
874 		goto errout;
875 
876 	ipv4_devconf_setall(in_dev);
877 	neigh_parms_data_state_setall(in_dev->arp_parms);
878 	in_dev_hold(in_dev);
879 
880 	if (!tb[IFA_ADDRESS])
881 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
882 
883 	INIT_HLIST_NODE(&ifa->hash);
884 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
885 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
886 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
887 					 ifm->ifa_flags;
888 	ifa->ifa_scope = ifm->ifa_scope;
889 	ifa->ifa_dev = in_dev;
890 
891 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
892 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
893 
894 	if (tb[IFA_BROADCAST])
895 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
896 
897 	if (tb[IFA_LABEL])
898 		nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
899 	else
900 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
901 
902 	if (tb[IFA_RT_PRIORITY])
903 		ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
904 
905 	if (tb[IFA_PROTO])
906 		ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
907 
908 	if (tb[IFA_CACHEINFO]) {
909 		struct ifa_cacheinfo *ci;
910 
911 		ci = nla_data(tb[IFA_CACHEINFO]);
912 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
913 			NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid");
914 			err = -EINVAL;
915 			goto errout_free;
916 		}
917 		*pvalid_lft = ci->ifa_valid;
918 		*pprefered_lft = ci->ifa_prefered;
919 	}
920 
921 	return ifa;
922 
923 errout_free:
924 	inet_free_ifa(ifa);
925 errout:
926 	return ERR_PTR(err);
927 }
928 
929 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
930 {
931 	struct in_device *in_dev = ifa->ifa_dev;
932 	struct in_ifaddr *ifa1;
933 
934 	if (!ifa->ifa_local)
935 		return NULL;
936 
937 	in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
938 		if (ifa1->ifa_mask == ifa->ifa_mask &&
939 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
940 		    ifa1->ifa_local == ifa->ifa_local)
941 			return ifa1;
942 	}
943 	return NULL;
944 }
945 
946 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
947 			    struct netlink_ext_ack *extack)
948 {
949 	struct net *net = sock_net(skb->sk);
950 	struct in_ifaddr *ifa;
951 	struct in_ifaddr *ifa_existing;
952 	__u32 valid_lft = INFINITY_LIFE_TIME;
953 	__u32 prefered_lft = INFINITY_LIFE_TIME;
954 
955 	ASSERT_RTNL();
956 
957 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
958 	if (IS_ERR(ifa))
959 		return PTR_ERR(ifa);
960 
961 	ifa_existing = find_matching_ifa(ifa);
962 	if (!ifa_existing) {
963 		/* It would be best to check for !NLM_F_CREATE here but
964 		 * userspace already relies on not having to provide this.
965 		 */
966 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
967 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
968 			int ret = ip_mc_autojoin_config(net, true, ifa);
969 
970 			if (ret < 0) {
971 				NL_SET_ERR_MSG(extack, "ipv4: Multicast auto join failed");
972 				inet_free_ifa(ifa);
973 				return ret;
974 			}
975 		}
976 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
977 					 extack);
978 	} else {
979 		u32 new_metric = ifa->ifa_rt_priority;
980 		u8 new_proto = ifa->ifa_proto;
981 
982 		inet_free_ifa(ifa);
983 
984 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
985 		    !(nlh->nlmsg_flags & NLM_F_REPLACE)) {
986 			NL_SET_ERR_MSG(extack, "ipv4: Address already assigned");
987 			return -EEXIST;
988 		}
989 		ifa = ifa_existing;
990 
991 		if (ifa->ifa_rt_priority != new_metric) {
992 			fib_modify_prefix_metric(ifa, new_metric);
993 			ifa->ifa_rt_priority = new_metric;
994 		}
995 
996 		ifa->ifa_proto = new_proto;
997 
998 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
999 		cancel_delayed_work(&check_lifetime_work);
1000 		queue_delayed_work(system_power_efficient_wq,
1001 				&check_lifetime_work, 0);
1002 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
1003 	}
1004 	return 0;
1005 }
1006 
1007 /*
1008  *	Determine a default network mask, based on the IP address.
1009  */
1010 
1011 static int inet_abc_len(__be32 addr)
1012 {
1013 	int rc = -1;	/* Something else, probably a multicast. */
1014 
1015 	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
1016 		rc = 0;
1017 	else {
1018 		__u32 haddr = ntohl(addr);
1019 		if (IN_CLASSA(haddr))
1020 			rc = 8;
1021 		else if (IN_CLASSB(haddr))
1022 			rc = 16;
1023 		else if (IN_CLASSC(haddr))
1024 			rc = 24;
1025 		else if (IN_CLASSE(haddr))
1026 			rc = 32;
1027 	}
1028 
1029 	return rc;
1030 }
1031 
1032 
1033 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1034 {
1035 	struct sockaddr_in sin_orig;
1036 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1037 	struct in_ifaddr __rcu **ifap = NULL;
1038 	struct in_device *in_dev;
1039 	struct in_ifaddr *ifa = NULL;
1040 	struct net_device *dev;
1041 	char *colon;
1042 	int ret = -EFAULT;
1043 	int tryaddrmatch = 0;
1044 
1045 	ifr->ifr_name[IFNAMSIZ - 1] = 0;
1046 
1047 	/* save original address for comparison */
1048 	memcpy(&sin_orig, sin, sizeof(*sin));
1049 
1050 	colon = strchr(ifr->ifr_name, ':');
1051 	if (colon)
1052 		*colon = 0;
1053 
1054 	dev_load(net, ifr->ifr_name);
1055 
1056 	switch (cmd) {
1057 	case SIOCGIFADDR:	/* Get interface address */
1058 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1059 	case SIOCGIFDSTADDR:	/* Get the destination address */
1060 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1061 		/* Note that these ioctls will not sleep,
1062 		   so that we do not impose a lock.
1063 		   One day we will be forced to put shlock here (I mean SMP)
1064 		 */
1065 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
1066 		memset(sin, 0, sizeof(*sin));
1067 		sin->sin_family = AF_INET;
1068 		break;
1069 
1070 	case SIOCSIFFLAGS:
1071 		ret = -EPERM;
1072 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1073 			goto out;
1074 		break;
1075 	case SIOCSIFADDR:	/* Set interface address (and family) */
1076 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1077 	case SIOCSIFDSTADDR:	/* Set the destination address */
1078 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1079 		ret = -EPERM;
1080 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1081 			goto out;
1082 		ret = -EINVAL;
1083 		if (sin->sin_family != AF_INET)
1084 			goto out;
1085 		break;
1086 	default:
1087 		ret = -EINVAL;
1088 		goto out;
1089 	}
1090 
1091 	rtnl_lock();
1092 
1093 	ret = -ENODEV;
1094 	dev = __dev_get_by_name(net, ifr->ifr_name);
1095 	if (!dev)
1096 		goto done;
1097 
1098 	if (colon)
1099 		*colon = ':';
1100 
1101 	in_dev = __in_dev_get_rtnl(dev);
1102 	if (in_dev) {
1103 		if (tryaddrmatch) {
1104 			/* Matthias Andree */
1105 			/* compare label and address (4.4BSD style) */
1106 			/* note: we only do this for a limited set of ioctls
1107 			   and only if the original address family was AF_INET.
1108 			   This is checked above. */
1109 
1110 			for (ifap = &in_dev->ifa_list;
1111 			     (ifa = rtnl_dereference(*ifap)) != NULL;
1112 			     ifap = &ifa->ifa_next) {
1113 				if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1114 				    sin_orig.sin_addr.s_addr ==
1115 							ifa->ifa_local) {
1116 					break; /* found */
1117 				}
1118 			}
1119 		}
1120 		/* we didn't get a match, maybe the application is
1121 		   4.3BSD-style and passed in junk so we fall back to
1122 		   comparing just the label */
1123 		if (!ifa) {
1124 			for (ifap = &in_dev->ifa_list;
1125 			     (ifa = rtnl_dereference(*ifap)) != NULL;
1126 			     ifap = &ifa->ifa_next)
1127 				if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1128 					break;
1129 		}
1130 	}
1131 
1132 	ret = -EADDRNOTAVAIL;
1133 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1134 		goto done;
1135 
1136 	switch (cmd) {
1137 	case SIOCGIFADDR:	/* Get interface address */
1138 		ret = 0;
1139 		sin->sin_addr.s_addr = ifa->ifa_local;
1140 		break;
1141 
1142 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1143 		ret = 0;
1144 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1145 		break;
1146 
1147 	case SIOCGIFDSTADDR:	/* Get the destination address */
1148 		ret = 0;
1149 		sin->sin_addr.s_addr = ifa->ifa_address;
1150 		break;
1151 
1152 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1153 		ret = 0;
1154 		sin->sin_addr.s_addr = ifa->ifa_mask;
1155 		break;
1156 
1157 	case SIOCSIFFLAGS:
1158 		if (colon) {
1159 			ret = -EADDRNOTAVAIL;
1160 			if (!ifa)
1161 				break;
1162 			ret = 0;
1163 			if (!(ifr->ifr_flags & IFF_UP))
1164 				inet_del_ifa(in_dev, ifap, 1);
1165 			break;
1166 		}
1167 		ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1168 		break;
1169 
1170 	case SIOCSIFADDR:	/* Set interface address (and family) */
1171 		ret = -EINVAL;
1172 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1173 			break;
1174 
1175 		if (!ifa) {
1176 			ret = -ENOBUFS;
1177 			ifa = inet_alloc_ifa();
1178 			if (!ifa)
1179 				break;
1180 			INIT_HLIST_NODE(&ifa->hash);
1181 			if (colon)
1182 				memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1183 			else
1184 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1185 		} else {
1186 			ret = 0;
1187 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1188 				break;
1189 			inet_del_ifa(in_dev, ifap, 0);
1190 			ifa->ifa_broadcast = 0;
1191 			ifa->ifa_scope = 0;
1192 		}
1193 
1194 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1195 
1196 		if (!(dev->flags & IFF_POINTOPOINT)) {
1197 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1198 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1199 			if ((dev->flags & IFF_BROADCAST) &&
1200 			    ifa->ifa_prefixlen < 31)
1201 				ifa->ifa_broadcast = ifa->ifa_address |
1202 						     ~ifa->ifa_mask;
1203 		} else {
1204 			ifa->ifa_prefixlen = 32;
1205 			ifa->ifa_mask = inet_make_mask(32);
1206 		}
1207 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1208 		ret = inet_set_ifa(dev, ifa);
1209 		break;
1210 
1211 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1212 		ret = 0;
1213 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1214 			inet_del_ifa(in_dev, ifap, 0);
1215 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1216 			inet_insert_ifa(ifa);
1217 		}
1218 		break;
1219 
1220 	case SIOCSIFDSTADDR:	/* Set the destination address */
1221 		ret = 0;
1222 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1223 			break;
1224 		ret = -EINVAL;
1225 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1226 			break;
1227 		ret = 0;
1228 		inet_del_ifa(in_dev, ifap, 0);
1229 		ifa->ifa_address = sin->sin_addr.s_addr;
1230 		inet_insert_ifa(ifa);
1231 		break;
1232 
1233 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1234 
1235 		/*
1236 		 *	The mask we set must be legal.
1237 		 */
1238 		ret = -EINVAL;
1239 		if (bad_mask(sin->sin_addr.s_addr, 0))
1240 			break;
1241 		ret = 0;
1242 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1243 			__be32 old_mask = ifa->ifa_mask;
1244 			inet_del_ifa(in_dev, ifap, 0);
1245 			ifa->ifa_mask = sin->sin_addr.s_addr;
1246 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1247 
1248 			/* See if current broadcast address matches
1249 			 * with current netmask, then recalculate
1250 			 * the broadcast address. Otherwise it's a
1251 			 * funny address, so don't touch it since
1252 			 * the user seems to know what (s)he's doing...
1253 			 */
1254 			if ((dev->flags & IFF_BROADCAST) &&
1255 			    (ifa->ifa_prefixlen < 31) &&
1256 			    (ifa->ifa_broadcast ==
1257 			     (ifa->ifa_local|~old_mask))) {
1258 				ifa->ifa_broadcast = (ifa->ifa_local |
1259 						      ~sin->sin_addr.s_addr);
1260 			}
1261 			inet_insert_ifa(ifa);
1262 		}
1263 		break;
1264 	}
1265 done:
1266 	rtnl_unlock();
1267 out:
1268 	return ret;
1269 }
1270 
1271 int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1272 {
1273 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1274 	const struct in_ifaddr *ifa;
1275 	struct ifreq ifr;
1276 	int done = 0;
1277 
1278 	if (WARN_ON(size > sizeof(struct ifreq)))
1279 		goto out;
1280 
1281 	if (!in_dev)
1282 		goto out;
1283 
1284 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1285 		if (!buf) {
1286 			done += size;
1287 			continue;
1288 		}
1289 		if (len < size)
1290 			break;
1291 		memset(&ifr, 0, sizeof(struct ifreq));
1292 		strcpy(ifr.ifr_name, ifa->ifa_label);
1293 
1294 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1295 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1296 								ifa->ifa_local;
1297 
1298 		if (copy_to_user(buf + done, &ifr, size)) {
1299 			done = -EFAULT;
1300 			break;
1301 		}
1302 		len  -= size;
1303 		done += size;
1304 	}
1305 out:
1306 	return done;
1307 }
1308 
1309 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1310 				 int scope)
1311 {
1312 	const struct in_ifaddr *ifa;
1313 
1314 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1315 		if (ifa->ifa_flags & IFA_F_SECONDARY)
1316 			continue;
1317 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1318 		    ifa->ifa_scope <= scope)
1319 			return ifa->ifa_local;
1320 	}
1321 
1322 	return 0;
1323 }
1324 
1325 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1326 {
1327 	const struct in_ifaddr *ifa;
1328 	__be32 addr = 0;
1329 	unsigned char localnet_scope = RT_SCOPE_HOST;
1330 	struct in_device *in_dev;
1331 	struct net *net = dev_net(dev);
1332 	int master_idx;
1333 
1334 	rcu_read_lock();
1335 	in_dev = __in_dev_get_rcu(dev);
1336 	if (!in_dev)
1337 		goto no_in_dev;
1338 
1339 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1340 		localnet_scope = RT_SCOPE_LINK;
1341 
1342 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1343 		if (ifa->ifa_flags & IFA_F_SECONDARY)
1344 			continue;
1345 		if (min(ifa->ifa_scope, localnet_scope) > scope)
1346 			continue;
1347 		if (!dst || inet_ifa_match(dst, ifa)) {
1348 			addr = ifa->ifa_local;
1349 			break;
1350 		}
1351 		if (!addr)
1352 			addr = ifa->ifa_local;
1353 	}
1354 
1355 	if (addr)
1356 		goto out_unlock;
1357 no_in_dev:
1358 	master_idx = l3mdev_master_ifindex_rcu(dev);
1359 
1360 	/* For VRFs, the VRF device takes the place of the loopback device,
1361 	 * with addresses on it being preferred.  Note in such cases the
1362 	 * loopback device will be among the devices that fail the master_idx
1363 	 * equality check in the loop below.
1364 	 */
1365 	if (master_idx &&
1366 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1367 	    (in_dev = __in_dev_get_rcu(dev))) {
1368 		addr = in_dev_select_addr(in_dev, scope);
1369 		if (addr)
1370 			goto out_unlock;
1371 	}
1372 
1373 	/* Not loopback addresses on loopback should be preferred
1374 	   in this case. It is important that lo is the first interface
1375 	   in dev_base list.
1376 	 */
1377 	for_each_netdev_rcu(net, dev) {
1378 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1379 			continue;
1380 
1381 		in_dev = __in_dev_get_rcu(dev);
1382 		if (!in_dev)
1383 			continue;
1384 
1385 		addr = in_dev_select_addr(in_dev, scope);
1386 		if (addr)
1387 			goto out_unlock;
1388 	}
1389 out_unlock:
1390 	rcu_read_unlock();
1391 	return addr;
1392 }
1393 EXPORT_SYMBOL(inet_select_addr);
1394 
1395 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1396 			      __be32 local, int scope)
1397 {
1398 	unsigned char localnet_scope = RT_SCOPE_HOST;
1399 	const struct in_ifaddr *ifa;
1400 	__be32 addr = 0;
1401 	int same = 0;
1402 
1403 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1404 		localnet_scope = RT_SCOPE_LINK;
1405 
1406 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1407 		unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1408 
1409 		if (!addr &&
1410 		    (local == ifa->ifa_local || !local) &&
1411 		    min_scope <= scope) {
1412 			addr = ifa->ifa_local;
1413 			if (same)
1414 				break;
1415 		}
1416 		if (!same) {
1417 			same = (!local || inet_ifa_match(local, ifa)) &&
1418 				(!dst || inet_ifa_match(dst, ifa));
1419 			if (same && addr) {
1420 				if (local || !dst)
1421 					break;
1422 				/* Is the selected addr into dst subnet? */
1423 				if (inet_ifa_match(addr, ifa))
1424 					break;
1425 				/* No, then can we use new local src? */
1426 				if (min_scope <= scope) {
1427 					addr = ifa->ifa_local;
1428 					break;
1429 				}
1430 				/* search for large dst subnet for addr */
1431 				same = 0;
1432 			}
1433 		}
1434 	}
1435 
1436 	return same ? addr : 0;
1437 }
1438 
1439 /*
1440  * Confirm that local IP address exists using wildcards:
1441  * - net: netns to check, cannot be NULL
1442  * - in_dev: only on this interface, NULL=any interface
1443  * - dst: only in the same subnet as dst, 0=any dst
1444  * - local: address, 0=autoselect the local address
1445  * - scope: maximum allowed scope value for the local address
1446  */
1447 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1448 			 __be32 dst, __be32 local, int scope)
1449 {
1450 	__be32 addr = 0;
1451 	struct net_device *dev;
1452 
1453 	if (in_dev)
1454 		return confirm_addr_indev(in_dev, dst, local, scope);
1455 
1456 	rcu_read_lock();
1457 	for_each_netdev_rcu(net, dev) {
1458 		in_dev = __in_dev_get_rcu(dev);
1459 		if (in_dev) {
1460 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1461 			if (addr)
1462 				break;
1463 		}
1464 	}
1465 	rcu_read_unlock();
1466 
1467 	return addr;
1468 }
1469 EXPORT_SYMBOL(inet_confirm_addr);
1470 
1471 /*
1472  *	Device notifier
1473  */
1474 
1475 int register_inetaddr_notifier(struct notifier_block *nb)
1476 {
1477 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1478 }
1479 EXPORT_SYMBOL(register_inetaddr_notifier);
1480 
1481 int unregister_inetaddr_notifier(struct notifier_block *nb)
1482 {
1483 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1484 }
1485 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1486 
1487 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1488 {
1489 	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1490 }
1491 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1492 
1493 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1494 {
1495 	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1496 	    nb);
1497 }
1498 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1499 
1500 /* Rename ifa_labels for a device name change. Make some effort to preserve
1501  * existing alias numbering and to create unique labels if possible.
1502 */
1503 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1504 {
1505 	struct in_ifaddr *ifa;
1506 	int named = 0;
1507 
1508 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1509 		char old[IFNAMSIZ], *dot;
1510 
1511 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1512 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1513 		if (named++ == 0)
1514 			goto skip;
1515 		dot = strchr(old, ':');
1516 		if (!dot) {
1517 			sprintf(old, ":%d", named);
1518 			dot = old;
1519 		}
1520 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1521 			strcat(ifa->ifa_label, dot);
1522 		else
1523 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1524 skip:
1525 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1526 	}
1527 }
1528 
1529 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1530 					struct in_device *in_dev)
1531 
1532 {
1533 	const struct in_ifaddr *ifa;
1534 
1535 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1536 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1537 			 ifa->ifa_local, dev,
1538 			 ifa->ifa_local, NULL,
1539 			 dev->dev_addr, NULL);
1540 	}
1541 }
1542 
1543 /* Called only under RTNL semaphore */
1544 
1545 static int inetdev_event(struct notifier_block *this, unsigned long event,
1546 			 void *ptr)
1547 {
1548 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1549 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1550 
1551 	ASSERT_RTNL();
1552 
1553 	if (!in_dev) {
1554 		if (event == NETDEV_REGISTER) {
1555 			in_dev = inetdev_init(dev);
1556 			if (IS_ERR(in_dev))
1557 				return notifier_from_errno(PTR_ERR(in_dev));
1558 			if (dev->flags & IFF_LOOPBACK) {
1559 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1560 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1561 			}
1562 		} else if (event == NETDEV_CHANGEMTU) {
1563 			/* Re-enabling IP */
1564 			if (inetdev_valid_mtu(dev->mtu))
1565 				in_dev = inetdev_init(dev);
1566 		}
1567 		goto out;
1568 	}
1569 
1570 	switch (event) {
1571 	case NETDEV_REGISTER:
1572 		pr_debug("%s: bug\n", __func__);
1573 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1574 		break;
1575 	case NETDEV_UP:
1576 		if (!inetdev_valid_mtu(dev->mtu))
1577 			break;
1578 		if (dev->flags & IFF_LOOPBACK) {
1579 			struct in_ifaddr *ifa = inet_alloc_ifa();
1580 
1581 			if (ifa) {
1582 				INIT_HLIST_NODE(&ifa->hash);
1583 				ifa->ifa_local =
1584 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1585 				ifa->ifa_prefixlen = 8;
1586 				ifa->ifa_mask = inet_make_mask(8);
1587 				in_dev_hold(in_dev);
1588 				ifa->ifa_dev = in_dev;
1589 				ifa->ifa_scope = RT_SCOPE_HOST;
1590 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1591 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1592 						 INFINITY_LIFE_TIME);
1593 				ipv4_devconf_setall(in_dev);
1594 				neigh_parms_data_state_setall(in_dev->arp_parms);
1595 				inet_insert_ifa(ifa);
1596 			}
1597 		}
1598 		ip_mc_up(in_dev);
1599 		fallthrough;
1600 	case NETDEV_CHANGEADDR:
1601 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1602 			break;
1603 		fallthrough;
1604 	case NETDEV_NOTIFY_PEERS:
1605 		/* Send gratuitous ARP to notify of link change */
1606 		inetdev_send_gratuitous_arp(dev, in_dev);
1607 		break;
1608 	case NETDEV_DOWN:
1609 		ip_mc_down(in_dev);
1610 		break;
1611 	case NETDEV_PRE_TYPE_CHANGE:
1612 		ip_mc_unmap(in_dev);
1613 		break;
1614 	case NETDEV_POST_TYPE_CHANGE:
1615 		ip_mc_remap(in_dev);
1616 		break;
1617 	case NETDEV_CHANGEMTU:
1618 		if (inetdev_valid_mtu(dev->mtu))
1619 			break;
1620 		/* disable IP when MTU is not enough */
1621 		fallthrough;
1622 	case NETDEV_UNREGISTER:
1623 		inetdev_destroy(in_dev);
1624 		break;
1625 	case NETDEV_CHANGENAME:
1626 		/* Do not notify about label change, this event is
1627 		 * not interesting to applications using netlink.
1628 		 */
1629 		inetdev_changename(dev, in_dev);
1630 
1631 		devinet_sysctl_unregister(in_dev);
1632 		devinet_sysctl_register(in_dev);
1633 		break;
1634 	}
1635 out:
1636 	return NOTIFY_DONE;
1637 }
1638 
1639 static struct notifier_block ip_netdev_notifier = {
1640 	.notifier_call = inetdev_event,
1641 };
1642 
1643 static size_t inet_nlmsg_size(void)
1644 {
1645 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1646 	       + nla_total_size(4) /* IFA_ADDRESS */
1647 	       + nla_total_size(4) /* IFA_LOCAL */
1648 	       + nla_total_size(4) /* IFA_BROADCAST */
1649 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1650 	       + nla_total_size(4)  /* IFA_FLAGS */
1651 	       + nla_total_size(1)  /* IFA_PROTO */
1652 	       + nla_total_size(4)  /* IFA_RT_PRIORITY */
1653 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1654 }
1655 
1656 static inline u32 cstamp_delta(unsigned long cstamp)
1657 {
1658 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1659 }
1660 
1661 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1662 			 unsigned long tstamp, u32 preferred, u32 valid)
1663 {
1664 	struct ifa_cacheinfo ci;
1665 
1666 	ci.cstamp = cstamp_delta(cstamp);
1667 	ci.tstamp = cstamp_delta(tstamp);
1668 	ci.ifa_prefered = preferred;
1669 	ci.ifa_valid = valid;
1670 
1671 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1672 }
1673 
1674 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1675 			    struct inet_fill_args *args)
1676 {
1677 	struct ifaddrmsg *ifm;
1678 	struct nlmsghdr  *nlh;
1679 	u32 preferred, valid;
1680 
1681 	nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1682 			args->flags);
1683 	if (!nlh)
1684 		return -EMSGSIZE;
1685 
1686 	ifm = nlmsg_data(nlh);
1687 	ifm->ifa_family = AF_INET;
1688 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1689 	ifm->ifa_flags = ifa->ifa_flags;
1690 	ifm->ifa_scope = ifa->ifa_scope;
1691 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1692 
1693 	if (args->netnsid >= 0 &&
1694 	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1695 		goto nla_put_failure;
1696 
1697 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1698 		preferred = ifa->ifa_preferred_lft;
1699 		valid = ifa->ifa_valid_lft;
1700 		if (preferred != INFINITY_LIFE_TIME) {
1701 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1702 
1703 			if (preferred > tval)
1704 				preferred -= tval;
1705 			else
1706 				preferred = 0;
1707 			if (valid != INFINITY_LIFE_TIME) {
1708 				if (valid > tval)
1709 					valid -= tval;
1710 				else
1711 					valid = 0;
1712 			}
1713 		}
1714 	} else {
1715 		preferred = INFINITY_LIFE_TIME;
1716 		valid = INFINITY_LIFE_TIME;
1717 	}
1718 	if ((ifa->ifa_address &&
1719 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1720 	    (ifa->ifa_local &&
1721 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1722 	    (ifa->ifa_broadcast &&
1723 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1724 	    (ifa->ifa_label[0] &&
1725 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1726 	    (ifa->ifa_proto &&
1727 	     nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
1728 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1729 	    (ifa->ifa_rt_priority &&
1730 	     nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1731 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1732 			  preferred, valid))
1733 		goto nla_put_failure;
1734 
1735 	nlmsg_end(skb, nlh);
1736 	return 0;
1737 
1738 nla_put_failure:
1739 	nlmsg_cancel(skb, nlh);
1740 	return -EMSGSIZE;
1741 }
1742 
1743 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1744 				      struct inet_fill_args *fillargs,
1745 				      struct net **tgt_net, struct sock *sk,
1746 				      struct netlink_callback *cb)
1747 {
1748 	struct netlink_ext_ack *extack = cb->extack;
1749 	struct nlattr *tb[IFA_MAX+1];
1750 	struct ifaddrmsg *ifm;
1751 	int err, i;
1752 
1753 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1754 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1755 		return -EINVAL;
1756 	}
1757 
1758 	ifm = nlmsg_data(nlh);
1759 	if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1760 		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1761 		return -EINVAL;
1762 	}
1763 
1764 	fillargs->ifindex = ifm->ifa_index;
1765 	if (fillargs->ifindex) {
1766 		cb->answer_flags |= NLM_F_DUMP_FILTERED;
1767 		fillargs->flags |= NLM_F_DUMP_FILTERED;
1768 	}
1769 
1770 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1771 					    ifa_ipv4_policy, extack);
1772 	if (err < 0)
1773 		return err;
1774 
1775 	for (i = 0; i <= IFA_MAX; ++i) {
1776 		if (!tb[i])
1777 			continue;
1778 
1779 		if (i == IFA_TARGET_NETNSID) {
1780 			struct net *net;
1781 
1782 			fillargs->netnsid = nla_get_s32(tb[i]);
1783 
1784 			net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1785 			if (IS_ERR(net)) {
1786 				fillargs->netnsid = -1;
1787 				NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1788 				return PTR_ERR(net);
1789 			}
1790 			*tgt_net = net;
1791 		} else {
1792 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1793 			return -EINVAL;
1794 		}
1795 	}
1796 
1797 	return 0;
1798 }
1799 
1800 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1801 			    struct netlink_callback *cb, int s_ip_idx,
1802 			    struct inet_fill_args *fillargs)
1803 {
1804 	struct in_ifaddr *ifa;
1805 	int ip_idx = 0;
1806 	int err;
1807 
1808 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1809 		if (ip_idx < s_ip_idx) {
1810 			ip_idx++;
1811 			continue;
1812 		}
1813 		err = inet_fill_ifaddr(skb, ifa, fillargs);
1814 		if (err < 0)
1815 			goto done;
1816 
1817 		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1818 		ip_idx++;
1819 	}
1820 	err = 0;
1821 
1822 done:
1823 	cb->args[2] = ip_idx;
1824 
1825 	return err;
1826 }
1827 
1828 /* Combine dev_addr_genid and dev_base_seq to detect changes.
1829  */
1830 static u32 inet_base_seq(const struct net *net)
1831 {
1832 	u32 res = atomic_read(&net->ipv4.dev_addr_genid) +
1833 		  net->dev_base_seq;
1834 
1835 	/* Must not return 0 (see nl_dump_check_consistent()).
1836 	 * Chose a value far away from 0.
1837 	 */
1838 	if (!res)
1839 		res = 0x80000000;
1840 	return res;
1841 }
1842 
1843 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1844 {
1845 	const struct nlmsghdr *nlh = cb->nlh;
1846 	struct inet_fill_args fillargs = {
1847 		.portid = NETLINK_CB(cb->skb).portid,
1848 		.seq = nlh->nlmsg_seq,
1849 		.event = RTM_NEWADDR,
1850 		.flags = NLM_F_MULTI,
1851 		.netnsid = -1,
1852 	};
1853 	struct net *net = sock_net(skb->sk);
1854 	struct net *tgt_net = net;
1855 	int h, s_h;
1856 	int idx, s_idx;
1857 	int s_ip_idx;
1858 	struct net_device *dev;
1859 	struct in_device *in_dev;
1860 	struct hlist_head *head;
1861 	int err = 0;
1862 
1863 	s_h = cb->args[0];
1864 	s_idx = idx = cb->args[1];
1865 	s_ip_idx = cb->args[2];
1866 
1867 	if (cb->strict_check) {
1868 		err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1869 						 skb->sk, cb);
1870 		if (err < 0)
1871 			goto put_tgt_net;
1872 
1873 		err = 0;
1874 		if (fillargs.ifindex) {
1875 			dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
1876 			if (!dev) {
1877 				err = -ENODEV;
1878 				goto put_tgt_net;
1879 			}
1880 
1881 			in_dev = __in_dev_get_rtnl(dev);
1882 			if (in_dev) {
1883 				err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1884 						       &fillargs);
1885 			}
1886 			goto put_tgt_net;
1887 		}
1888 	}
1889 
1890 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1891 		idx = 0;
1892 		head = &tgt_net->dev_index_head[h];
1893 		rcu_read_lock();
1894 		cb->seq = inet_base_seq(tgt_net);
1895 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1896 			if (idx < s_idx)
1897 				goto cont;
1898 			if (h > s_h || idx > s_idx)
1899 				s_ip_idx = 0;
1900 			in_dev = __in_dev_get_rcu(dev);
1901 			if (!in_dev)
1902 				goto cont;
1903 
1904 			err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1905 					       &fillargs);
1906 			if (err < 0) {
1907 				rcu_read_unlock();
1908 				goto done;
1909 			}
1910 cont:
1911 			idx++;
1912 		}
1913 		rcu_read_unlock();
1914 	}
1915 
1916 done:
1917 	cb->args[0] = h;
1918 	cb->args[1] = idx;
1919 put_tgt_net:
1920 	if (fillargs.netnsid >= 0)
1921 		put_net(tgt_net);
1922 
1923 	return skb->len ? : err;
1924 }
1925 
1926 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1927 		      u32 portid)
1928 {
1929 	struct inet_fill_args fillargs = {
1930 		.portid = portid,
1931 		.seq = nlh ? nlh->nlmsg_seq : 0,
1932 		.event = event,
1933 		.flags = 0,
1934 		.netnsid = -1,
1935 	};
1936 	struct sk_buff *skb;
1937 	int err = -ENOBUFS;
1938 	struct net *net;
1939 
1940 	net = dev_net(ifa->ifa_dev->dev);
1941 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1942 	if (!skb)
1943 		goto errout;
1944 
1945 	err = inet_fill_ifaddr(skb, ifa, &fillargs);
1946 	if (err < 0) {
1947 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1948 		WARN_ON(err == -EMSGSIZE);
1949 		kfree_skb(skb);
1950 		goto errout;
1951 	}
1952 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1953 	return;
1954 errout:
1955 	if (err < 0)
1956 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1957 }
1958 
1959 static size_t inet_get_link_af_size(const struct net_device *dev,
1960 				    u32 ext_filter_mask)
1961 {
1962 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1963 
1964 	if (!in_dev)
1965 		return 0;
1966 
1967 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1968 }
1969 
1970 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1971 			     u32 ext_filter_mask)
1972 {
1973 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1974 	struct nlattr *nla;
1975 	int i;
1976 
1977 	if (!in_dev)
1978 		return -ENODATA;
1979 
1980 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1981 	if (!nla)
1982 		return -EMSGSIZE;
1983 
1984 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1985 		((u32 *) nla_data(nla))[i] = READ_ONCE(in_dev->cnf.data[i]);
1986 
1987 	return 0;
1988 }
1989 
1990 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1991 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1992 };
1993 
1994 static int inet_validate_link_af(const struct net_device *dev,
1995 				 const struct nlattr *nla,
1996 				 struct netlink_ext_ack *extack)
1997 {
1998 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1999 	int err, rem;
2000 
2001 	if (dev && !__in_dev_get_rtnl(dev))
2002 		return -EAFNOSUPPORT;
2003 
2004 	err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
2005 					  inet_af_policy, extack);
2006 	if (err < 0)
2007 		return err;
2008 
2009 	if (tb[IFLA_INET_CONF]) {
2010 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
2011 			int cfgid = nla_type(a);
2012 
2013 			if (nla_len(a) < 4)
2014 				return -EINVAL;
2015 
2016 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
2017 				return -EINVAL;
2018 		}
2019 	}
2020 
2021 	return 0;
2022 }
2023 
2024 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
2025 			    struct netlink_ext_ack *extack)
2026 {
2027 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
2028 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
2029 	int rem;
2030 
2031 	if (!in_dev)
2032 		return -EAFNOSUPPORT;
2033 
2034 	if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
2035 		return -EINVAL;
2036 
2037 	if (tb[IFLA_INET_CONF]) {
2038 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
2039 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
2040 	}
2041 
2042 	return 0;
2043 }
2044 
2045 static int inet_netconf_msgsize_devconf(int type)
2046 {
2047 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
2048 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
2049 	bool all = false;
2050 
2051 	if (type == NETCONFA_ALL)
2052 		all = true;
2053 
2054 	if (all || type == NETCONFA_FORWARDING)
2055 		size += nla_total_size(4);
2056 	if (all || type == NETCONFA_RP_FILTER)
2057 		size += nla_total_size(4);
2058 	if (all || type == NETCONFA_MC_FORWARDING)
2059 		size += nla_total_size(4);
2060 	if (all || type == NETCONFA_BC_FORWARDING)
2061 		size += nla_total_size(4);
2062 	if (all || type == NETCONFA_PROXY_NEIGH)
2063 		size += nla_total_size(4);
2064 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2065 		size += nla_total_size(4);
2066 
2067 	return size;
2068 }
2069 
2070 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2071 				     const struct ipv4_devconf *devconf,
2072 				     u32 portid, u32 seq, int event,
2073 				     unsigned int flags, int type)
2074 {
2075 	struct nlmsghdr  *nlh;
2076 	struct netconfmsg *ncm;
2077 	bool all = false;
2078 
2079 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2080 			flags);
2081 	if (!nlh)
2082 		return -EMSGSIZE;
2083 
2084 	if (type == NETCONFA_ALL)
2085 		all = true;
2086 
2087 	ncm = nlmsg_data(nlh);
2088 	ncm->ncm_family = AF_INET;
2089 
2090 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2091 		goto nla_put_failure;
2092 
2093 	if (!devconf)
2094 		goto out;
2095 
2096 	if ((all || type == NETCONFA_FORWARDING) &&
2097 	    nla_put_s32(skb, NETCONFA_FORWARDING,
2098 			IPV4_DEVCONF_RO(*devconf, FORWARDING)) < 0)
2099 		goto nla_put_failure;
2100 	if ((all || type == NETCONFA_RP_FILTER) &&
2101 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
2102 			IPV4_DEVCONF_RO(*devconf, RP_FILTER)) < 0)
2103 		goto nla_put_failure;
2104 	if ((all || type == NETCONFA_MC_FORWARDING) &&
2105 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2106 			IPV4_DEVCONF_RO(*devconf, MC_FORWARDING)) < 0)
2107 		goto nla_put_failure;
2108 	if ((all || type == NETCONFA_BC_FORWARDING) &&
2109 	    nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2110 			IPV4_DEVCONF_RO(*devconf, BC_FORWARDING)) < 0)
2111 		goto nla_put_failure;
2112 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
2113 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2114 			IPV4_DEVCONF_RO(*devconf, PROXY_ARP)) < 0)
2115 		goto nla_put_failure;
2116 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2117 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2118 			IPV4_DEVCONF_RO(*devconf,
2119 					IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2120 		goto nla_put_failure;
2121 
2122 out:
2123 	nlmsg_end(skb, nlh);
2124 	return 0;
2125 
2126 nla_put_failure:
2127 	nlmsg_cancel(skb, nlh);
2128 	return -EMSGSIZE;
2129 }
2130 
2131 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2132 				 int ifindex, struct ipv4_devconf *devconf)
2133 {
2134 	struct sk_buff *skb;
2135 	int err = -ENOBUFS;
2136 
2137 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2138 	if (!skb)
2139 		goto errout;
2140 
2141 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2142 					event, 0, type);
2143 	if (err < 0) {
2144 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2145 		WARN_ON(err == -EMSGSIZE);
2146 		kfree_skb(skb);
2147 		goto errout;
2148 	}
2149 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2150 	return;
2151 errout:
2152 	if (err < 0)
2153 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2154 }
2155 
2156 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2157 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
2158 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
2159 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
2160 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
2161 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
2162 };
2163 
2164 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2165 				      const struct nlmsghdr *nlh,
2166 				      struct nlattr **tb,
2167 				      struct netlink_ext_ack *extack)
2168 {
2169 	int i, err;
2170 
2171 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2172 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2173 		return -EINVAL;
2174 	}
2175 
2176 	if (!netlink_strict_get_check(skb))
2177 		return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2178 					      tb, NETCONFA_MAX,
2179 					      devconf_ipv4_policy, extack);
2180 
2181 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2182 					    tb, NETCONFA_MAX,
2183 					    devconf_ipv4_policy, extack);
2184 	if (err)
2185 		return err;
2186 
2187 	for (i = 0; i <= NETCONFA_MAX; i++) {
2188 		if (!tb[i])
2189 			continue;
2190 
2191 		switch (i) {
2192 		case NETCONFA_IFINDEX:
2193 			break;
2194 		default:
2195 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2196 			return -EINVAL;
2197 		}
2198 	}
2199 
2200 	return 0;
2201 }
2202 
2203 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2204 				    struct nlmsghdr *nlh,
2205 				    struct netlink_ext_ack *extack)
2206 {
2207 	struct net *net = sock_net(in_skb->sk);
2208 	struct nlattr *tb[NETCONFA_MAX + 1];
2209 	const struct ipv4_devconf *devconf;
2210 	struct in_device *in_dev = NULL;
2211 	struct net_device *dev = NULL;
2212 	struct sk_buff *skb;
2213 	int ifindex;
2214 	int err;
2215 
2216 	err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2217 	if (err)
2218 		return err;
2219 
2220 	if (!tb[NETCONFA_IFINDEX])
2221 		return -EINVAL;
2222 
2223 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2224 	switch (ifindex) {
2225 	case NETCONFA_IFINDEX_ALL:
2226 		devconf = net->ipv4.devconf_all;
2227 		break;
2228 	case NETCONFA_IFINDEX_DEFAULT:
2229 		devconf = net->ipv4.devconf_dflt;
2230 		break;
2231 	default:
2232 		err = -ENODEV;
2233 		dev = dev_get_by_index(net, ifindex);
2234 		if (dev)
2235 			in_dev = in_dev_get(dev);
2236 		if (!in_dev)
2237 			goto errout;
2238 		devconf = &in_dev->cnf;
2239 		break;
2240 	}
2241 
2242 	err = -ENOBUFS;
2243 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2244 	if (!skb)
2245 		goto errout;
2246 
2247 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2248 					NETLINK_CB(in_skb).portid,
2249 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2250 					NETCONFA_ALL);
2251 	if (err < 0) {
2252 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2253 		WARN_ON(err == -EMSGSIZE);
2254 		kfree_skb(skb);
2255 		goto errout;
2256 	}
2257 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2258 errout:
2259 	if (in_dev)
2260 		in_dev_put(in_dev);
2261 	dev_put(dev);
2262 	return err;
2263 }
2264 
2265 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2266 				     struct netlink_callback *cb)
2267 {
2268 	const struct nlmsghdr *nlh = cb->nlh;
2269 	struct net *net = sock_net(skb->sk);
2270 	struct {
2271 		unsigned long ifindex;
2272 		unsigned int all_default;
2273 	} *ctx = (void *)cb->ctx;
2274 	const struct in_device *in_dev;
2275 	struct net_device *dev;
2276 	int err = 0;
2277 
2278 	if (cb->strict_check) {
2279 		struct netlink_ext_ack *extack = cb->extack;
2280 		struct netconfmsg *ncm;
2281 
2282 		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2283 			NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2284 			return -EINVAL;
2285 		}
2286 
2287 		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2288 			NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2289 			return -EINVAL;
2290 		}
2291 	}
2292 
2293 	rcu_read_lock();
2294 	for_each_netdev_dump(net, dev, ctx->ifindex) {
2295 		in_dev = __in_dev_get_rcu(dev);
2296 		if (!in_dev)
2297 			continue;
2298 		err = inet_netconf_fill_devconf(skb, dev->ifindex,
2299 						&in_dev->cnf,
2300 						NETLINK_CB(cb->skb).portid,
2301 						nlh->nlmsg_seq,
2302 						RTM_NEWNETCONF, NLM_F_MULTI,
2303 						NETCONFA_ALL);
2304 		if (err < 0)
2305 			goto done;
2306 	}
2307 	if (ctx->all_default == 0) {
2308 		err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2309 						net->ipv4.devconf_all,
2310 						NETLINK_CB(cb->skb).portid,
2311 						nlh->nlmsg_seq,
2312 						RTM_NEWNETCONF, NLM_F_MULTI,
2313 						NETCONFA_ALL);
2314 		if (err < 0)
2315 			goto done;
2316 		ctx->all_default++;
2317 	}
2318 	if (ctx->all_default == 1) {
2319 		err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2320 						net->ipv4.devconf_dflt,
2321 						NETLINK_CB(cb->skb).portid,
2322 						nlh->nlmsg_seq,
2323 						RTM_NEWNETCONF, NLM_F_MULTI,
2324 						NETCONFA_ALL);
2325 		if (err < 0)
2326 			goto done;
2327 		ctx->all_default++;
2328 	}
2329 done:
2330 	if (err < 0 && likely(skb->len))
2331 		err = skb->len;
2332 	rcu_read_unlock();
2333 	return err;
2334 }
2335 
2336 #ifdef CONFIG_SYSCTL
2337 
2338 static void devinet_copy_dflt_conf(struct net *net, int i)
2339 {
2340 	struct net_device *dev;
2341 
2342 	rcu_read_lock();
2343 	for_each_netdev_rcu(net, dev) {
2344 		struct in_device *in_dev;
2345 
2346 		in_dev = __in_dev_get_rcu(dev);
2347 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2348 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2349 	}
2350 	rcu_read_unlock();
2351 }
2352 
2353 /* called with RTNL locked */
2354 static void inet_forward_change(struct net *net)
2355 {
2356 	struct net_device *dev;
2357 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2358 
2359 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2360 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2361 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2362 				    NETCONFA_FORWARDING,
2363 				    NETCONFA_IFINDEX_ALL,
2364 				    net->ipv4.devconf_all);
2365 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2366 				    NETCONFA_FORWARDING,
2367 				    NETCONFA_IFINDEX_DEFAULT,
2368 				    net->ipv4.devconf_dflt);
2369 
2370 	for_each_netdev(net, dev) {
2371 		struct in_device *in_dev;
2372 
2373 		if (on)
2374 			dev_disable_lro(dev);
2375 
2376 		in_dev = __in_dev_get_rtnl(dev);
2377 		if (in_dev) {
2378 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2379 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2380 						    NETCONFA_FORWARDING,
2381 						    dev->ifindex, &in_dev->cnf);
2382 		}
2383 	}
2384 }
2385 
2386 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2387 {
2388 	if (cnf == net->ipv4.devconf_dflt)
2389 		return NETCONFA_IFINDEX_DEFAULT;
2390 	else if (cnf == net->ipv4.devconf_all)
2391 		return NETCONFA_IFINDEX_ALL;
2392 	else {
2393 		struct in_device *idev
2394 			= container_of(cnf, struct in_device, cnf);
2395 		return idev->dev->ifindex;
2396 	}
2397 }
2398 
2399 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2400 			     void *buffer, size_t *lenp, loff_t *ppos)
2401 {
2402 	int old_value = *(int *)ctl->data;
2403 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2404 	int new_value = *(int *)ctl->data;
2405 
2406 	if (write) {
2407 		struct ipv4_devconf *cnf = ctl->extra1;
2408 		struct net *net = ctl->extra2;
2409 		int i = (int *)ctl->data - cnf->data;
2410 		int ifindex;
2411 
2412 		set_bit(i, cnf->state);
2413 
2414 		if (cnf == net->ipv4.devconf_dflt)
2415 			devinet_copy_dflt_conf(net, i);
2416 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2417 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2418 			if ((new_value == 0) && (old_value != 0))
2419 				rt_cache_flush(net);
2420 
2421 		if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2422 		    new_value != old_value)
2423 			rt_cache_flush(net);
2424 
2425 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2426 		    new_value != old_value) {
2427 			ifindex = devinet_conf_ifindex(net, cnf);
2428 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2429 						    NETCONFA_RP_FILTER,
2430 						    ifindex, cnf);
2431 		}
2432 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2433 		    new_value != old_value) {
2434 			ifindex = devinet_conf_ifindex(net, cnf);
2435 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2436 						    NETCONFA_PROXY_NEIGH,
2437 						    ifindex, cnf);
2438 		}
2439 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2440 		    new_value != old_value) {
2441 			ifindex = devinet_conf_ifindex(net, cnf);
2442 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2443 						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2444 						    ifindex, cnf);
2445 		}
2446 	}
2447 
2448 	return ret;
2449 }
2450 
2451 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2452 				  void *buffer, size_t *lenp, loff_t *ppos)
2453 {
2454 	int *valp = ctl->data;
2455 	int val = *valp;
2456 	loff_t pos = *ppos;
2457 	struct net *net = ctl->extra2;
2458 	int ret;
2459 
2460 	if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2461 		return -EPERM;
2462 
2463 	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2464 
2465 	if (write && *valp != val) {
2466 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2467 			if (!rtnl_trylock()) {
2468 				/* Restore the original values before restarting */
2469 				*valp = val;
2470 				*ppos = pos;
2471 				return restart_syscall();
2472 			}
2473 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2474 				inet_forward_change(net);
2475 			} else {
2476 				struct ipv4_devconf *cnf = ctl->extra1;
2477 				struct in_device *idev =
2478 					container_of(cnf, struct in_device, cnf);
2479 				if (*valp)
2480 					dev_disable_lro(idev->dev);
2481 				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2482 							    NETCONFA_FORWARDING,
2483 							    idev->dev->ifindex,
2484 							    cnf);
2485 			}
2486 			rtnl_unlock();
2487 			rt_cache_flush(net);
2488 		} else
2489 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2490 						    NETCONFA_FORWARDING,
2491 						    NETCONFA_IFINDEX_DEFAULT,
2492 						    net->ipv4.devconf_dflt);
2493 	}
2494 
2495 	return ret;
2496 }
2497 
2498 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2499 				void *buffer, size_t *lenp, loff_t *ppos)
2500 {
2501 	int *valp = ctl->data;
2502 	int val = *valp;
2503 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2504 	struct net *net = ctl->extra2;
2505 
2506 	if (write && *valp != val)
2507 		rt_cache_flush(net);
2508 
2509 	return ret;
2510 }
2511 
2512 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2513 	{ \
2514 		.procname	= name, \
2515 		.data		= ipv4_devconf.data + \
2516 				  IPV4_DEVCONF_ ## attr - 1, \
2517 		.maxlen		= sizeof(int), \
2518 		.mode		= mval, \
2519 		.proc_handler	= proc, \
2520 		.extra1		= &ipv4_devconf, \
2521 	}
2522 
2523 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2524 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2525 
2526 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2527 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2528 
2529 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2530 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2531 
2532 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2533 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2534 
2535 static struct devinet_sysctl_table {
2536 	struct ctl_table_header *sysctl_header;
2537 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2538 } devinet_sysctl = {
2539 	.devinet_vars = {
2540 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2541 					     devinet_sysctl_forward),
2542 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2543 		DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2544 
2545 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2546 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2547 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2548 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2549 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2550 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2551 					"accept_source_route"),
2552 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2553 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2554 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2555 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2556 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2557 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2558 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2559 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2560 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2561 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2562 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2563 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2564 		DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
2565 					"arp_evict_nocarrier"),
2566 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2567 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2568 					"force_igmp_version"),
2569 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2570 					"igmpv2_unsolicited_report_interval"),
2571 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2572 					"igmpv3_unsolicited_report_interval"),
2573 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2574 					"ignore_routes_with_linkdown"),
2575 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2576 					"drop_gratuitous_arp"),
2577 
2578 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2579 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2580 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2581 					      "promote_secondaries"),
2582 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2583 					      "route_localnet"),
2584 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2585 					      "drop_unicast_in_l2_multicast"),
2586 	},
2587 };
2588 
2589 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2590 				     int ifindex, struct ipv4_devconf *p)
2591 {
2592 	int i;
2593 	struct devinet_sysctl_table *t;
2594 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2595 
2596 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
2597 	if (!t)
2598 		goto out;
2599 
2600 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2601 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2602 		t->devinet_vars[i].extra1 = p;
2603 		t->devinet_vars[i].extra2 = net;
2604 	}
2605 
2606 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2607 
2608 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2609 	if (!t->sysctl_header)
2610 		goto free;
2611 
2612 	p->sysctl = t;
2613 
2614 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2615 				    ifindex, p);
2616 	return 0;
2617 
2618 free:
2619 	kfree(t);
2620 out:
2621 	return -ENOMEM;
2622 }
2623 
2624 static void __devinet_sysctl_unregister(struct net *net,
2625 					struct ipv4_devconf *cnf, int ifindex)
2626 {
2627 	struct devinet_sysctl_table *t = cnf->sysctl;
2628 
2629 	if (t) {
2630 		cnf->sysctl = NULL;
2631 		unregister_net_sysctl_table(t->sysctl_header);
2632 		kfree(t);
2633 	}
2634 
2635 	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2636 }
2637 
2638 static int devinet_sysctl_register(struct in_device *idev)
2639 {
2640 	int err;
2641 
2642 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2643 		return -EINVAL;
2644 
2645 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2646 	if (err)
2647 		return err;
2648 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2649 					idev->dev->ifindex, &idev->cnf);
2650 	if (err)
2651 		neigh_sysctl_unregister(idev->arp_parms);
2652 	return err;
2653 }
2654 
2655 static void devinet_sysctl_unregister(struct in_device *idev)
2656 {
2657 	struct net *net = dev_net(idev->dev);
2658 
2659 	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2660 	neigh_sysctl_unregister(idev->arp_parms);
2661 }
2662 
2663 static struct ctl_table ctl_forward_entry[] = {
2664 	{
2665 		.procname	= "ip_forward",
2666 		.data		= &ipv4_devconf.data[
2667 					IPV4_DEVCONF_FORWARDING - 1],
2668 		.maxlen		= sizeof(int),
2669 		.mode		= 0644,
2670 		.proc_handler	= devinet_sysctl_forward,
2671 		.extra1		= &ipv4_devconf,
2672 		.extra2		= &init_net,
2673 	},
2674 	{ },
2675 };
2676 #endif
2677 
2678 static __net_init int devinet_init_net(struct net *net)
2679 {
2680 	int err;
2681 	struct ipv4_devconf *all, *dflt;
2682 #ifdef CONFIG_SYSCTL
2683 	struct ctl_table *tbl;
2684 	struct ctl_table_header *forw_hdr;
2685 #endif
2686 
2687 	err = -ENOMEM;
2688 	all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2689 	if (!all)
2690 		goto err_alloc_all;
2691 
2692 	dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2693 	if (!dflt)
2694 		goto err_alloc_dflt;
2695 
2696 #ifdef CONFIG_SYSCTL
2697 	tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2698 	if (!tbl)
2699 		goto err_alloc_ctl;
2700 
2701 	tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2702 	tbl[0].extra1 = all;
2703 	tbl[0].extra2 = net;
2704 #endif
2705 
2706 	if (!net_eq(net, &init_net)) {
2707 		switch (net_inherit_devconf()) {
2708 		case 3:
2709 			/* copy from the current netns */
2710 			memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
2711 			       sizeof(ipv4_devconf));
2712 			memcpy(dflt,
2713 			       current->nsproxy->net_ns->ipv4.devconf_dflt,
2714 			       sizeof(ipv4_devconf_dflt));
2715 			break;
2716 		case 0:
2717 		case 1:
2718 			/* copy from init_net */
2719 			memcpy(all, init_net.ipv4.devconf_all,
2720 			       sizeof(ipv4_devconf));
2721 			memcpy(dflt, init_net.ipv4.devconf_dflt,
2722 			       sizeof(ipv4_devconf_dflt));
2723 			break;
2724 		case 2:
2725 			/* use compiled values */
2726 			break;
2727 		}
2728 	}
2729 
2730 #ifdef CONFIG_SYSCTL
2731 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2732 	if (err < 0)
2733 		goto err_reg_all;
2734 
2735 	err = __devinet_sysctl_register(net, "default",
2736 					NETCONFA_IFINDEX_DEFAULT, dflt);
2737 	if (err < 0)
2738 		goto err_reg_dflt;
2739 
2740 	err = -ENOMEM;
2741 	forw_hdr = register_net_sysctl_sz(net, "net/ipv4", tbl,
2742 					  ARRAY_SIZE(ctl_forward_entry));
2743 	if (!forw_hdr)
2744 		goto err_reg_ctl;
2745 	net->ipv4.forw_hdr = forw_hdr;
2746 #endif
2747 
2748 	net->ipv4.devconf_all = all;
2749 	net->ipv4.devconf_dflt = dflt;
2750 	return 0;
2751 
2752 #ifdef CONFIG_SYSCTL
2753 err_reg_ctl:
2754 	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2755 err_reg_dflt:
2756 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2757 err_reg_all:
2758 	kfree(tbl);
2759 err_alloc_ctl:
2760 #endif
2761 	kfree(dflt);
2762 err_alloc_dflt:
2763 	kfree(all);
2764 err_alloc_all:
2765 	return err;
2766 }
2767 
2768 static __net_exit void devinet_exit_net(struct net *net)
2769 {
2770 #ifdef CONFIG_SYSCTL
2771 	struct ctl_table *tbl;
2772 
2773 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2774 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2775 	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2776 				    NETCONFA_IFINDEX_DEFAULT);
2777 	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2778 				    NETCONFA_IFINDEX_ALL);
2779 	kfree(tbl);
2780 #endif
2781 	kfree(net->ipv4.devconf_dflt);
2782 	kfree(net->ipv4.devconf_all);
2783 }
2784 
2785 static __net_initdata struct pernet_operations devinet_ops = {
2786 	.init = devinet_init_net,
2787 	.exit = devinet_exit_net,
2788 };
2789 
2790 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2791 	.family		  = AF_INET,
2792 	.fill_link_af	  = inet_fill_link_af,
2793 	.get_link_af_size = inet_get_link_af_size,
2794 	.validate_link_af = inet_validate_link_af,
2795 	.set_link_af	  = inet_set_link_af,
2796 };
2797 
2798 void __init devinet_init(void)
2799 {
2800 	int i;
2801 
2802 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2803 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2804 
2805 	register_pernet_subsys(&devinet_ops);
2806 	register_netdevice_notifier(&ip_netdev_notifier);
2807 
2808 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2809 
2810 	rtnl_af_register(&inet_af_ops);
2811 
2812 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2813 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2814 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2815 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2816 		      inet_netconf_dump_devconf,
2817 		      RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED);
2818 }
2819