xref: /linux/net/ipv4/devinet.c (revision 63d6e721527e175edbbe8513cba8e4a0caf94da5)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	NET3	IP device support routines.
4  *
5  *	Derived from the IP parts of dev.c 1.0.19
6  * 		Authors:	Ross Biro
7  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
9  *
10  *	Additional Authors:
11  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
12  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13  *
14  *	Changes:
15  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
16  *					lists.
17  *		Cyrus Durgin:		updated for kmod
18  *		Matthias Andree:	in devinet_ioctl, compare label and
19  *					address (4.4BSD alias style support),
20  *					fall back to comparing just the label
21  *					if no match found.
22  */
23 
24 
25 #include <linux/uaccess.h>
26 #include <linux/bitops.h>
27 #include <linux/capability.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/mm.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/in.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/if_addr.h>
40 #include <linux/if_ether.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/etherdevice.h>
44 #include <linux/skbuff.h>
45 #include <linux/init.h>
46 #include <linux/notifier.h>
47 #include <linux/inetdevice.h>
48 #include <linux/igmp.h>
49 #include <linux/slab.h>
50 #include <linux/hash.h>
51 #ifdef CONFIG_SYSCTL
52 #include <linux/sysctl.h>
53 #endif
54 #include <linux/kmod.h>
55 #include <linux/netconf.h>
56 
57 #include <net/arp.h>
58 #include <net/ip.h>
59 #include <net/route.h>
60 #include <net/ip_fib.h>
61 #include <net/rtnetlink.h>
62 #include <net/net_namespace.h>
63 #include <net/addrconf.h>
64 
65 #define IPV6ONLY_FLAGS	\
66 		(IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
67 		 IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
68 		 IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
69 
70 static struct ipv4_devconf ipv4_devconf = {
71 	.data = {
72 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78 	},
79 };
80 
81 static struct ipv4_devconf ipv4_devconf_dflt = {
82 	.data = {
83 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
86 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
87 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
90 	},
91 };
92 
93 #define IPV4_DEVCONF_DFLT(net, attr) \
94 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
95 
96 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
97 	[IFA_LOCAL]     	= { .type = NLA_U32 },
98 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
99 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
100 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
102 	[IFA_FLAGS]		= { .type = NLA_U32 },
103 	[IFA_RT_PRIORITY]	= { .type = NLA_U32 },
104 	[IFA_TARGET_NETNSID]	= { .type = NLA_S32 },
105 };
106 
107 struct inet_fill_args {
108 	u32 portid;
109 	u32 seq;
110 	int event;
111 	unsigned int flags;
112 	int netnsid;
113 	int ifindex;
114 };
115 
116 #define IN4_ADDR_HSIZE_SHIFT	8
117 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
118 
119 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
120 
121 static u32 inet_addr_hash(const struct net *net, __be32 addr)
122 {
123 	u32 val = (__force u32) addr ^ net_hash_mix(net);
124 
125 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
126 }
127 
128 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
129 {
130 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
131 
132 	ASSERT_RTNL();
133 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
134 }
135 
136 static void inet_hash_remove(struct in_ifaddr *ifa)
137 {
138 	ASSERT_RTNL();
139 	hlist_del_init_rcu(&ifa->hash);
140 }
141 
142 /**
143  * __ip_dev_find - find the first device with a given source address.
144  * @net: the net namespace
145  * @addr: the source address
146  * @devref: if true, take a reference on the found device
147  *
148  * If a caller uses devref=false, it should be protected by RCU, or RTNL
149  */
150 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
151 {
152 	struct net_device *result = NULL;
153 	struct in_ifaddr *ifa;
154 
155 	rcu_read_lock();
156 	ifa = inet_lookup_ifaddr_rcu(net, addr);
157 	if (!ifa) {
158 		struct flowi4 fl4 = { .daddr = addr };
159 		struct fib_result res = { 0 };
160 		struct fib_table *local;
161 
162 		/* Fallback to FIB local table so that communication
163 		 * over loopback subnets work.
164 		 */
165 		local = fib_get_table(net, RT_TABLE_LOCAL);
166 		if (local &&
167 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
168 		    res.type == RTN_LOCAL)
169 			result = FIB_RES_DEV(res);
170 	} else {
171 		result = ifa->ifa_dev->dev;
172 	}
173 	if (result && devref)
174 		dev_hold(result);
175 	rcu_read_unlock();
176 	return result;
177 }
178 EXPORT_SYMBOL(__ip_dev_find);
179 
180 /* called under RCU lock */
181 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
182 {
183 	u32 hash = inet_addr_hash(net, addr);
184 	struct in_ifaddr *ifa;
185 
186 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
187 		if (ifa->ifa_local == addr &&
188 		    net_eq(dev_net(ifa->ifa_dev->dev), net))
189 			return ifa;
190 
191 	return NULL;
192 }
193 
194 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
195 
196 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
197 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
198 static void inet_del_ifa(struct in_device *in_dev,
199 			 struct in_ifaddr __rcu **ifap,
200 			 int destroy);
201 #ifdef CONFIG_SYSCTL
202 static int devinet_sysctl_register(struct in_device *idev);
203 static void devinet_sysctl_unregister(struct in_device *idev);
204 #else
205 static int devinet_sysctl_register(struct in_device *idev)
206 {
207 	return 0;
208 }
209 static void devinet_sysctl_unregister(struct in_device *idev)
210 {
211 }
212 #endif
213 
214 /* Locks all the inet devices. */
215 
216 static struct in_ifaddr *inet_alloc_ifa(void)
217 {
218 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
219 }
220 
221 static void inet_rcu_free_ifa(struct rcu_head *head)
222 {
223 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
224 	if (ifa->ifa_dev)
225 		in_dev_put(ifa->ifa_dev);
226 	kfree(ifa);
227 }
228 
229 static void inet_free_ifa(struct in_ifaddr *ifa)
230 {
231 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
232 }
233 
234 void in_dev_finish_destroy(struct in_device *idev)
235 {
236 	struct net_device *dev = idev->dev;
237 
238 	WARN_ON(idev->ifa_list);
239 	WARN_ON(idev->mc_list);
240 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
241 #ifdef NET_REFCNT_DEBUG
242 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
243 #endif
244 	dev_put(dev);
245 	if (!idev->dead)
246 		pr_err("Freeing alive in_device %p\n", idev);
247 	else
248 		kfree(idev);
249 }
250 EXPORT_SYMBOL(in_dev_finish_destroy);
251 
252 static struct in_device *inetdev_init(struct net_device *dev)
253 {
254 	struct in_device *in_dev;
255 	int err = -ENOMEM;
256 
257 	ASSERT_RTNL();
258 
259 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
260 	if (!in_dev)
261 		goto out;
262 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
263 			sizeof(in_dev->cnf));
264 	in_dev->cnf.sysctl = NULL;
265 	in_dev->dev = dev;
266 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
267 	if (!in_dev->arp_parms)
268 		goto out_kfree;
269 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
270 		dev_disable_lro(dev);
271 	/* Reference in_dev->dev */
272 	dev_hold(dev);
273 	/* Account for reference dev->ip_ptr (below) */
274 	refcount_set(&in_dev->refcnt, 1);
275 
276 	err = devinet_sysctl_register(in_dev);
277 	if (err) {
278 		in_dev->dead = 1;
279 		in_dev_put(in_dev);
280 		in_dev = NULL;
281 		goto out;
282 	}
283 	ip_mc_init_dev(in_dev);
284 	if (dev->flags & IFF_UP)
285 		ip_mc_up(in_dev);
286 
287 	/* we can receive as soon as ip_ptr is set -- do this last */
288 	rcu_assign_pointer(dev->ip_ptr, in_dev);
289 out:
290 	return in_dev ?: ERR_PTR(err);
291 out_kfree:
292 	kfree(in_dev);
293 	in_dev = NULL;
294 	goto out;
295 }
296 
297 static void in_dev_rcu_put(struct rcu_head *head)
298 {
299 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
300 	in_dev_put(idev);
301 }
302 
303 static void inetdev_destroy(struct in_device *in_dev)
304 {
305 	struct net_device *dev;
306 	struct in_ifaddr *ifa;
307 
308 	ASSERT_RTNL();
309 
310 	dev = in_dev->dev;
311 
312 	in_dev->dead = 1;
313 
314 	ip_mc_destroy_dev(in_dev);
315 
316 	while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
317 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
318 		inet_free_ifa(ifa);
319 	}
320 
321 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
322 
323 	devinet_sysctl_unregister(in_dev);
324 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
325 	arp_ifdown(dev);
326 
327 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
328 }
329 
330 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
331 {
332 	const struct in_ifaddr *ifa;
333 
334 	rcu_read_lock();
335 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
336 		if (inet_ifa_match(a, ifa)) {
337 			if (!b || inet_ifa_match(b, ifa)) {
338 				rcu_read_unlock();
339 				return 1;
340 			}
341 		}
342 	}
343 	rcu_read_unlock();
344 	return 0;
345 }
346 
347 static void __inet_del_ifa(struct in_device *in_dev,
348 			   struct in_ifaddr __rcu **ifap,
349 			   int destroy, struct nlmsghdr *nlh, u32 portid)
350 {
351 	struct in_ifaddr *promote = NULL;
352 	struct in_ifaddr *ifa, *ifa1;
353 	struct in_ifaddr *last_prim;
354 	struct in_ifaddr *prev_prom = NULL;
355 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
356 
357 	ASSERT_RTNL();
358 
359 	ifa1 = rtnl_dereference(*ifap);
360 	last_prim = rtnl_dereference(in_dev->ifa_list);
361 	if (in_dev->dead)
362 		goto no_promotions;
363 
364 	/* 1. Deleting primary ifaddr forces deletion all secondaries
365 	 * unless alias promotion is set
366 	 **/
367 
368 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
369 		struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
370 
371 		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
372 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
373 			    ifa1->ifa_scope <= ifa->ifa_scope)
374 				last_prim = ifa;
375 
376 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
377 			    ifa1->ifa_mask != ifa->ifa_mask ||
378 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
379 				ifap1 = &ifa->ifa_next;
380 				prev_prom = ifa;
381 				continue;
382 			}
383 
384 			if (!do_promote) {
385 				inet_hash_remove(ifa);
386 				*ifap1 = ifa->ifa_next;
387 
388 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
389 				blocking_notifier_call_chain(&inetaddr_chain,
390 						NETDEV_DOWN, ifa);
391 				inet_free_ifa(ifa);
392 			} else {
393 				promote = ifa;
394 				break;
395 			}
396 		}
397 	}
398 
399 	/* On promotion all secondaries from subnet are changing
400 	 * the primary IP, we must remove all their routes silently
401 	 * and later to add them back with new prefsrc. Do this
402 	 * while all addresses are on the device list.
403 	 */
404 	for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
405 		if (ifa1->ifa_mask == ifa->ifa_mask &&
406 		    inet_ifa_match(ifa1->ifa_address, ifa))
407 			fib_del_ifaddr(ifa, ifa1);
408 	}
409 
410 no_promotions:
411 	/* 2. Unlink it */
412 
413 	*ifap = ifa1->ifa_next;
414 	inet_hash_remove(ifa1);
415 
416 	/* 3. Announce address deletion */
417 
418 	/* Send message first, then call notifier.
419 	   At first sight, FIB update triggered by notifier
420 	   will refer to already deleted ifaddr, that could confuse
421 	   netlink listeners. It is not true: look, gated sees
422 	   that route deleted and if it still thinks that ifaddr
423 	   is valid, it will try to restore deleted routes... Grr.
424 	   So that, this order is correct.
425 	 */
426 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
427 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
428 
429 	if (promote) {
430 		struct in_ifaddr *next_sec;
431 
432 		next_sec = rtnl_dereference(promote->ifa_next);
433 		if (prev_prom) {
434 			struct in_ifaddr *last_sec;
435 
436 			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
437 
438 			last_sec = rtnl_dereference(last_prim->ifa_next);
439 			rcu_assign_pointer(promote->ifa_next, last_sec);
440 			rcu_assign_pointer(last_prim->ifa_next, promote);
441 		}
442 
443 		promote->ifa_flags &= ~IFA_F_SECONDARY;
444 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
445 		blocking_notifier_call_chain(&inetaddr_chain,
446 				NETDEV_UP, promote);
447 		for (ifa = next_sec; ifa;
448 		     ifa = rtnl_dereference(ifa->ifa_next)) {
449 			if (ifa1->ifa_mask != ifa->ifa_mask ||
450 			    !inet_ifa_match(ifa1->ifa_address, ifa))
451 					continue;
452 			fib_add_ifaddr(ifa);
453 		}
454 
455 	}
456 	if (destroy)
457 		inet_free_ifa(ifa1);
458 }
459 
460 static void inet_del_ifa(struct in_device *in_dev,
461 			 struct in_ifaddr __rcu **ifap,
462 			 int destroy)
463 {
464 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
465 }
466 
467 static void check_lifetime(struct work_struct *work);
468 
469 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
470 
471 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
472 			     u32 portid, struct netlink_ext_ack *extack)
473 {
474 	struct in_ifaddr __rcu **last_primary, **ifap;
475 	struct in_device *in_dev = ifa->ifa_dev;
476 	struct in_validator_info ivi;
477 	struct in_ifaddr *ifa1;
478 	int ret;
479 
480 	ASSERT_RTNL();
481 
482 	if (!ifa->ifa_local) {
483 		inet_free_ifa(ifa);
484 		return 0;
485 	}
486 
487 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
488 	last_primary = &in_dev->ifa_list;
489 
490 	/* Don't set IPv6 only flags to IPv4 addresses */
491 	ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
492 
493 	ifap = &in_dev->ifa_list;
494 	ifa1 = rtnl_dereference(*ifap);
495 
496 	while (ifa1) {
497 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
498 		    ifa->ifa_scope <= ifa1->ifa_scope)
499 			last_primary = &ifa1->ifa_next;
500 		if (ifa1->ifa_mask == ifa->ifa_mask &&
501 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
502 			if (ifa1->ifa_local == ifa->ifa_local) {
503 				inet_free_ifa(ifa);
504 				return -EEXIST;
505 			}
506 			if (ifa1->ifa_scope != ifa->ifa_scope) {
507 				inet_free_ifa(ifa);
508 				return -EINVAL;
509 			}
510 			ifa->ifa_flags |= IFA_F_SECONDARY;
511 		}
512 
513 		ifap = &ifa1->ifa_next;
514 		ifa1 = rtnl_dereference(*ifap);
515 	}
516 
517 	/* Allow any devices that wish to register ifaddr validtors to weigh
518 	 * in now, before changes are committed.  The rntl lock is serializing
519 	 * access here, so the state should not change between a validator call
520 	 * and a final notify on commit.  This isn't invoked on promotion under
521 	 * the assumption that validators are checking the address itself, and
522 	 * not the flags.
523 	 */
524 	ivi.ivi_addr = ifa->ifa_address;
525 	ivi.ivi_dev = ifa->ifa_dev;
526 	ivi.extack = extack;
527 	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
528 					   NETDEV_UP, &ivi);
529 	ret = notifier_to_errno(ret);
530 	if (ret) {
531 		inet_free_ifa(ifa);
532 		return ret;
533 	}
534 
535 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
536 		prandom_seed((__force u32) ifa->ifa_local);
537 		ifap = last_primary;
538 	}
539 
540 	rcu_assign_pointer(ifa->ifa_next, *ifap);
541 	rcu_assign_pointer(*ifap, ifa);
542 
543 	inet_hash_insert(dev_net(in_dev->dev), ifa);
544 
545 	cancel_delayed_work(&check_lifetime_work);
546 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
547 
548 	/* Send message first, then call notifier.
549 	   Notifier will trigger FIB update, so that
550 	   listeners of netlink will know about new ifaddr */
551 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
552 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
553 
554 	return 0;
555 }
556 
557 static int inet_insert_ifa(struct in_ifaddr *ifa)
558 {
559 	return __inet_insert_ifa(ifa, NULL, 0, NULL);
560 }
561 
562 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
563 {
564 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
565 
566 	ASSERT_RTNL();
567 
568 	if (!in_dev) {
569 		inet_free_ifa(ifa);
570 		return -ENOBUFS;
571 	}
572 	ipv4_devconf_setall(in_dev);
573 	neigh_parms_data_state_setall(in_dev->arp_parms);
574 	if (ifa->ifa_dev != in_dev) {
575 		WARN_ON(ifa->ifa_dev);
576 		in_dev_hold(in_dev);
577 		ifa->ifa_dev = in_dev;
578 	}
579 	if (ipv4_is_loopback(ifa->ifa_local))
580 		ifa->ifa_scope = RT_SCOPE_HOST;
581 	return inet_insert_ifa(ifa);
582 }
583 
584 /* Caller must hold RCU or RTNL :
585  * We dont take a reference on found in_device
586  */
587 struct in_device *inetdev_by_index(struct net *net, int ifindex)
588 {
589 	struct net_device *dev;
590 	struct in_device *in_dev = NULL;
591 
592 	rcu_read_lock();
593 	dev = dev_get_by_index_rcu(net, ifindex);
594 	if (dev)
595 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
596 	rcu_read_unlock();
597 	return in_dev;
598 }
599 EXPORT_SYMBOL(inetdev_by_index);
600 
601 /* Called only from RTNL semaphored context. No locks. */
602 
603 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
604 				    __be32 mask)
605 {
606 	struct in_ifaddr *ifa;
607 
608 	ASSERT_RTNL();
609 
610 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
611 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
612 			return ifa;
613 	}
614 	return NULL;
615 }
616 
617 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
618 {
619 	struct ip_mreqn mreq = {
620 		.imr_multiaddr.s_addr = ifa->ifa_address,
621 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
622 	};
623 	int ret;
624 
625 	ASSERT_RTNL();
626 
627 	lock_sock(sk);
628 	if (join)
629 		ret = ip_mc_join_group(sk, &mreq);
630 	else
631 		ret = ip_mc_leave_group(sk, &mreq);
632 	release_sock(sk);
633 
634 	return ret;
635 }
636 
637 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
638 			    struct netlink_ext_ack *extack)
639 {
640 	struct net *net = sock_net(skb->sk);
641 	struct in_ifaddr __rcu **ifap;
642 	struct nlattr *tb[IFA_MAX+1];
643 	struct in_device *in_dev;
644 	struct ifaddrmsg *ifm;
645 	struct in_ifaddr *ifa;
646 
647 	int err = -EINVAL;
648 
649 	ASSERT_RTNL();
650 
651 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
652 				     ifa_ipv4_policy, extack);
653 	if (err < 0)
654 		goto errout;
655 
656 	ifm = nlmsg_data(nlh);
657 	in_dev = inetdev_by_index(net, ifm->ifa_index);
658 	if (!in_dev) {
659 		err = -ENODEV;
660 		goto errout;
661 	}
662 
663 	for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
664 	     ifap = &ifa->ifa_next) {
665 		if (tb[IFA_LOCAL] &&
666 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
667 			continue;
668 
669 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
670 			continue;
671 
672 		if (tb[IFA_ADDRESS] &&
673 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
674 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
675 			continue;
676 
677 		if (ipv4_is_multicast(ifa->ifa_address))
678 			ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
679 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
680 		return 0;
681 	}
682 
683 	err = -EADDRNOTAVAIL;
684 errout:
685 	return err;
686 }
687 
688 #define INFINITY_LIFE_TIME	0xFFFFFFFF
689 
690 static void check_lifetime(struct work_struct *work)
691 {
692 	unsigned long now, next, next_sec, next_sched;
693 	struct in_ifaddr *ifa;
694 	struct hlist_node *n;
695 	int i;
696 
697 	now = jiffies;
698 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
699 
700 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
701 		bool change_needed = false;
702 
703 		rcu_read_lock();
704 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
705 			unsigned long age;
706 
707 			if (ifa->ifa_flags & IFA_F_PERMANENT)
708 				continue;
709 
710 			/* We try to batch several events at once. */
711 			age = (now - ifa->ifa_tstamp +
712 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
713 
714 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
715 			    age >= ifa->ifa_valid_lft) {
716 				change_needed = true;
717 			} else if (ifa->ifa_preferred_lft ==
718 				   INFINITY_LIFE_TIME) {
719 				continue;
720 			} else if (age >= ifa->ifa_preferred_lft) {
721 				if (time_before(ifa->ifa_tstamp +
722 						ifa->ifa_valid_lft * HZ, next))
723 					next = ifa->ifa_tstamp +
724 					       ifa->ifa_valid_lft * HZ;
725 
726 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
727 					change_needed = true;
728 			} else if (time_before(ifa->ifa_tstamp +
729 					       ifa->ifa_preferred_lft * HZ,
730 					       next)) {
731 				next = ifa->ifa_tstamp +
732 				       ifa->ifa_preferred_lft * HZ;
733 			}
734 		}
735 		rcu_read_unlock();
736 		if (!change_needed)
737 			continue;
738 		rtnl_lock();
739 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
740 			unsigned long age;
741 
742 			if (ifa->ifa_flags & IFA_F_PERMANENT)
743 				continue;
744 
745 			/* We try to batch several events at once. */
746 			age = (now - ifa->ifa_tstamp +
747 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
748 
749 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
750 			    age >= ifa->ifa_valid_lft) {
751 				struct in_ifaddr __rcu **ifap;
752 				struct in_ifaddr *tmp;
753 
754 				ifap = &ifa->ifa_dev->ifa_list;
755 				tmp = rtnl_dereference(*ifap);
756 				while (tmp) {
757 					if (tmp == ifa) {
758 						inet_del_ifa(ifa->ifa_dev,
759 							     ifap, 1);
760 						break;
761 					}
762 					ifap = &tmp->ifa_next;
763 					tmp = rtnl_dereference(*ifap);
764 				}
765 			} else if (ifa->ifa_preferred_lft !=
766 				   INFINITY_LIFE_TIME &&
767 				   age >= ifa->ifa_preferred_lft &&
768 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
769 				ifa->ifa_flags |= IFA_F_DEPRECATED;
770 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
771 			}
772 		}
773 		rtnl_unlock();
774 	}
775 
776 	next_sec = round_jiffies_up(next);
777 	next_sched = next;
778 
779 	/* If rounded timeout is accurate enough, accept it. */
780 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
781 		next_sched = next_sec;
782 
783 	now = jiffies;
784 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
785 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
786 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
787 
788 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
789 			next_sched - now);
790 }
791 
792 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
793 			     __u32 prefered_lft)
794 {
795 	unsigned long timeout;
796 
797 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
798 
799 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
800 	if (addrconf_finite_timeout(timeout))
801 		ifa->ifa_valid_lft = timeout;
802 	else
803 		ifa->ifa_flags |= IFA_F_PERMANENT;
804 
805 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
806 	if (addrconf_finite_timeout(timeout)) {
807 		if (timeout == 0)
808 			ifa->ifa_flags |= IFA_F_DEPRECATED;
809 		ifa->ifa_preferred_lft = timeout;
810 	}
811 	ifa->ifa_tstamp = jiffies;
812 	if (!ifa->ifa_cstamp)
813 		ifa->ifa_cstamp = ifa->ifa_tstamp;
814 }
815 
816 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
817 				       __u32 *pvalid_lft, __u32 *pprefered_lft,
818 				       struct netlink_ext_ack *extack)
819 {
820 	struct nlattr *tb[IFA_MAX+1];
821 	struct in_ifaddr *ifa;
822 	struct ifaddrmsg *ifm;
823 	struct net_device *dev;
824 	struct in_device *in_dev;
825 	int err;
826 
827 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
828 				     ifa_ipv4_policy, extack);
829 	if (err < 0)
830 		goto errout;
831 
832 	ifm = nlmsg_data(nlh);
833 	err = -EINVAL;
834 	if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
835 		goto errout;
836 
837 	dev = __dev_get_by_index(net, ifm->ifa_index);
838 	err = -ENODEV;
839 	if (!dev)
840 		goto errout;
841 
842 	in_dev = __in_dev_get_rtnl(dev);
843 	err = -ENOBUFS;
844 	if (!in_dev)
845 		goto errout;
846 
847 	ifa = inet_alloc_ifa();
848 	if (!ifa)
849 		/*
850 		 * A potential indev allocation can be left alive, it stays
851 		 * assigned to its device and is destroy with it.
852 		 */
853 		goto errout;
854 
855 	ipv4_devconf_setall(in_dev);
856 	neigh_parms_data_state_setall(in_dev->arp_parms);
857 	in_dev_hold(in_dev);
858 
859 	if (!tb[IFA_ADDRESS])
860 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
861 
862 	INIT_HLIST_NODE(&ifa->hash);
863 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
864 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
865 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
866 					 ifm->ifa_flags;
867 	ifa->ifa_scope = ifm->ifa_scope;
868 	ifa->ifa_dev = in_dev;
869 
870 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
871 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
872 
873 	if (tb[IFA_BROADCAST])
874 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
875 
876 	if (tb[IFA_LABEL])
877 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
878 	else
879 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
880 
881 	if (tb[IFA_RT_PRIORITY])
882 		ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
883 
884 	if (tb[IFA_CACHEINFO]) {
885 		struct ifa_cacheinfo *ci;
886 
887 		ci = nla_data(tb[IFA_CACHEINFO]);
888 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
889 			err = -EINVAL;
890 			goto errout_free;
891 		}
892 		*pvalid_lft = ci->ifa_valid;
893 		*pprefered_lft = ci->ifa_prefered;
894 	}
895 
896 	return ifa;
897 
898 errout_free:
899 	inet_free_ifa(ifa);
900 errout:
901 	return ERR_PTR(err);
902 }
903 
904 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
905 {
906 	struct in_device *in_dev = ifa->ifa_dev;
907 	struct in_ifaddr *ifa1;
908 
909 	if (!ifa->ifa_local)
910 		return NULL;
911 
912 	in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
913 		if (ifa1->ifa_mask == ifa->ifa_mask &&
914 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
915 		    ifa1->ifa_local == ifa->ifa_local)
916 			return ifa1;
917 	}
918 	return NULL;
919 }
920 
921 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
922 			    struct netlink_ext_ack *extack)
923 {
924 	struct net *net = sock_net(skb->sk);
925 	struct in_ifaddr *ifa;
926 	struct in_ifaddr *ifa_existing;
927 	__u32 valid_lft = INFINITY_LIFE_TIME;
928 	__u32 prefered_lft = INFINITY_LIFE_TIME;
929 
930 	ASSERT_RTNL();
931 
932 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
933 	if (IS_ERR(ifa))
934 		return PTR_ERR(ifa);
935 
936 	ifa_existing = find_matching_ifa(ifa);
937 	if (!ifa_existing) {
938 		/* It would be best to check for !NLM_F_CREATE here but
939 		 * userspace already relies on not having to provide this.
940 		 */
941 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
942 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
943 			int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
944 					       true, ifa);
945 
946 			if (ret < 0) {
947 				inet_free_ifa(ifa);
948 				return ret;
949 			}
950 		}
951 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
952 					 extack);
953 	} else {
954 		u32 new_metric = ifa->ifa_rt_priority;
955 
956 		inet_free_ifa(ifa);
957 
958 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
959 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
960 			return -EEXIST;
961 		ifa = ifa_existing;
962 
963 		if (ifa->ifa_rt_priority != new_metric) {
964 			fib_modify_prefix_metric(ifa, new_metric);
965 			ifa->ifa_rt_priority = new_metric;
966 		}
967 
968 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
969 		cancel_delayed_work(&check_lifetime_work);
970 		queue_delayed_work(system_power_efficient_wq,
971 				&check_lifetime_work, 0);
972 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
973 	}
974 	return 0;
975 }
976 
977 /*
978  *	Determine a default network mask, based on the IP address.
979  */
980 
981 static int inet_abc_len(__be32 addr)
982 {
983 	int rc = -1;	/* Something else, probably a multicast. */
984 
985 	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
986 		rc = 0;
987 	else {
988 		__u32 haddr = ntohl(addr);
989 		if (IN_CLASSA(haddr))
990 			rc = 8;
991 		else if (IN_CLASSB(haddr))
992 			rc = 16;
993 		else if (IN_CLASSC(haddr))
994 			rc = 24;
995 		else if (IN_CLASSE(haddr))
996 			rc = 32;
997 	}
998 
999 	return rc;
1000 }
1001 
1002 
1003 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1004 {
1005 	struct sockaddr_in sin_orig;
1006 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1007 	struct in_ifaddr __rcu **ifap = NULL;
1008 	struct in_device *in_dev;
1009 	struct in_ifaddr *ifa = NULL;
1010 	struct net_device *dev;
1011 	char *colon;
1012 	int ret = -EFAULT;
1013 	int tryaddrmatch = 0;
1014 
1015 	ifr->ifr_name[IFNAMSIZ - 1] = 0;
1016 
1017 	/* save original address for comparison */
1018 	memcpy(&sin_orig, sin, sizeof(*sin));
1019 
1020 	colon = strchr(ifr->ifr_name, ':');
1021 	if (colon)
1022 		*colon = 0;
1023 
1024 	dev_load(net, ifr->ifr_name);
1025 
1026 	switch (cmd) {
1027 	case SIOCGIFADDR:	/* Get interface address */
1028 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1029 	case SIOCGIFDSTADDR:	/* Get the destination address */
1030 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1031 		/* Note that these ioctls will not sleep,
1032 		   so that we do not impose a lock.
1033 		   One day we will be forced to put shlock here (I mean SMP)
1034 		 */
1035 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
1036 		memset(sin, 0, sizeof(*sin));
1037 		sin->sin_family = AF_INET;
1038 		break;
1039 
1040 	case SIOCSIFFLAGS:
1041 		ret = -EPERM;
1042 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1043 			goto out;
1044 		break;
1045 	case SIOCSIFADDR:	/* Set interface address (and family) */
1046 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1047 	case SIOCSIFDSTADDR:	/* Set the destination address */
1048 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1049 		ret = -EPERM;
1050 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1051 			goto out;
1052 		ret = -EINVAL;
1053 		if (sin->sin_family != AF_INET)
1054 			goto out;
1055 		break;
1056 	default:
1057 		ret = -EINVAL;
1058 		goto out;
1059 	}
1060 
1061 	rtnl_lock();
1062 
1063 	ret = -ENODEV;
1064 	dev = __dev_get_by_name(net, ifr->ifr_name);
1065 	if (!dev)
1066 		goto done;
1067 
1068 	if (colon)
1069 		*colon = ':';
1070 
1071 	in_dev = __in_dev_get_rtnl(dev);
1072 	if (in_dev) {
1073 		if (tryaddrmatch) {
1074 			/* Matthias Andree */
1075 			/* compare label and address (4.4BSD style) */
1076 			/* note: we only do this for a limited set of ioctls
1077 			   and only if the original address family was AF_INET.
1078 			   This is checked above. */
1079 
1080 			for (ifap = &in_dev->ifa_list;
1081 			     (ifa = rtnl_dereference(*ifap)) != NULL;
1082 			     ifap = &ifa->ifa_next) {
1083 				if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1084 				    sin_orig.sin_addr.s_addr ==
1085 							ifa->ifa_local) {
1086 					break; /* found */
1087 				}
1088 			}
1089 		}
1090 		/* we didn't get a match, maybe the application is
1091 		   4.3BSD-style and passed in junk so we fall back to
1092 		   comparing just the label */
1093 		if (!ifa) {
1094 			for (ifap = &in_dev->ifa_list;
1095 			     (ifa = rtnl_dereference(*ifap)) != NULL;
1096 			     ifap = &ifa->ifa_next)
1097 				if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1098 					break;
1099 		}
1100 	}
1101 
1102 	ret = -EADDRNOTAVAIL;
1103 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1104 		goto done;
1105 
1106 	switch (cmd) {
1107 	case SIOCGIFADDR:	/* Get interface address */
1108 		ret = 0;
1109 		sin->sin_addr.s_addr = ifa->ifa_local;
1110 		break;
1111 
1112 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1113 		ret = 0;
1114 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1115 		break;
1116 
1117 	case SIOCGIFDSTADDR:	/* Get the destination address */
1118 		ret = 0;
1119 		sin->sin_addr.s_addr = ifa->ifa_address;
1120 		break;
1121 
1122 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1123 		ret = 0;
1124 		sin->sin_addr.s_addr = ifa->ifa_mask;
1125 		break;
1126 
1127 	case SIOCSIFFLAGS:
1128 		if (colon) {
1129 			ret = -EADDRNOTAVAIL;
1130 			if (!ifa)
1131 				break;
1132 			ret = 0;
1133 			if (!(ifr->ifr_flags & IFF_UP))
1134 				inet_del_ifa(in_dev, ifap, 1);
1135 			break;
1136 		}
1137 		ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1138 		break;
1139 
1140 	case SIOCSIFADDR:	/* Set interface address (and family) */
1141 		ret = -EINVAL;
1142 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1143 			break;
1144 
1145 		if (!ifa) {
1146 			ret = -ENOBUFS;
1147 			ifa = inet_alloc_ifa();
1148 			if (!ifa)
1149 				break;
1150 			INIT_HLIST_NODE(&ifa->hash);
1151 			if (colon)
1152 				memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1153 			else
1154 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1155 		} else {
1156 			ret = 0;
1157 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1158 				break;
1159 			inet_del_ifa(in_dev, ifap, 0);
1160 			ifa->ifa_broadcast = 0;
1161 			ifa->ifa_scope = 0;
1162 		}
1163 
1164 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1165 
1166 		if (!(dev->flags & IFF_POINTOPOINT)) {
1167 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1168 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1169 			if ((dev->flags & IFF_BROADCAST) &&
1170 			    ifa->ifa_prefixlen < 31)
1171 				ifa->ifa_broadcast = ifa->ifa_address |
1172 						     ~ifa->ifa_mask;
1173 		} else {
1174 			ifa->ifa_prefixlen = 32;
1175 			ifa->ifa_mask = inet_make_mask(32);
1176 		}
1177 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1178 		ret = inet_set_ifa(dev, ifa);
1179 		break;
1180 
1181 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1182 		ret = 0;
1183 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1184 			inet_del_ifa(in_dev, ifap, 0);
1185 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1186 			inet_insert_ifa(ifa);
1187 		}
1188 		break;
1189 
1190 	case SIOCSIFDSTADDR:	/* Set the destination address */
1191 		ret = 0;
1192 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1193 			break;
1194 		ret = -EINVAL;
1195 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1196 			break;
1197 		ret = 0;
1198 		inet_del_ifa(in_dev, ifap, 0);
1199 		ifa->ifa_address = sin->sin_addr.s_addr;
1200 		inet_insert_ifa(ifa);
1201 		break;
1202 
1203 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1204 
1205 		/*
1206 		 *	The mask we set must be legal.
1207 		 */
1208 		ret = -EINVAL;
1209 		if (bad_mask(sin->sin_addr.s_addr, 0))
1210 			break;
1211 		ret = 0;
1212 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1213 			__be32 old_mask = ifa->ifa_mask;
1214 			inet_del_ifa(in_dev, ifap, 0);
1215 			ifa->ifa_mask = sin->sin_addr.s_addr;
1216 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1217 
1218 			/* See if current broadcast address matches
1219 			 * with current netmask, then recalculate
1220 			 * the broadcast address. Otherwise it's a
1221 			 * funny address, so don't touch it since
1222 			 * the user seems to know what (s)he's doing...
1223 			 */
1224 			if ((dev->flags & IFF_BROADCAST) &&
1225 			    (ifa->ifa_prefixlen < 31) &&
1226 			    (ifa->ifa_broadcast ==
1227 			     (ifa->ifa_local|~old_mask))) {
1228 				ifa->ifa_broadcast = (ifa->ifa_local |
1229 						      ~sin->sin_addr.s_addr);
1230 			}
1231 			inet_insert_ifa(ifa);
1232 		}
1233 		break;
1234 	}
1235 done:
1236 	rtnl_unlock();
1237 out:
1238 	return ret;
1239 }
1240 
1241 static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1242 {
1243 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1244 	const struct in_ifaddr *ifa;
1245 	struct ifreq ifr;
1246 	int done = 0;
1247 
1248 	if (WARN_ON(size > sizeof(struct ifreq)))
1249 		goto out;
1250 
1251 	if (!in_dev)
1252 		goto out;
1253 
1254 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1255 		if (!buf) {
1256 			done += size;
1257 			continue;
1258 		}
1259 		if (len < size)
1260 			break;
1261 		memset(&ifr, 0, sizeof(struct ifreq));
1262 		strcpy(ifr.ifr_name, ifa->ifa_label);
1263 
1264 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1265 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1266 								ifa->ifa_local;
1267 
1268 		if (copy_to_user(buf + done, &ifr, size)) {
1269 			done = -EFAULT;
1270 			break;
1271 		}
1272 		len  -= size;
1273 		done += size;
1274 	}
1275 out:
1276 	return done;
1277 }
1278 
1279 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1280 				 int scope)
1281 {
1282 	const struct in_ifaddr *ifa;
1283 
1284 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1285 		if (ifa->ifa_flags & IFA_F_SECONDARY)
1286 			continue;
1287 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1288 		    ifa->ifa_scope <= scope)
1289 			return ifa->ifa_local;
1290 	}
1291 
1292 	return 0;
1293 }
1294 
1295 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1296 {
1297 	const struct in_ifaddr *ifa;
1298 	__be32 addr = 0;
1299 	unsigned char localnet_scope = RT_SCOPE_HOST;
1300 	struct in_device *in_dev;
1301 	struct net *net = dev_net(dev);
1302 	int master_idx;
1303 
1304 	rcu_read_lock();
1305 	in_dev = __in_dev_get_rcu(dev);
1306 	if (!in_dev)
1307 		goto no_in_dev;
1308 
1309 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1310 		localnet_scope = RT_SCOPE_LINK;
1311 
1312 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1313 		if (ifa->ifa_flags & IFA_F_SECONDARY)
1314 			continue;
1315 		if (min(ifa->ifa_scope, localnet_scope) > scope)
1316 			continue;
1317 		if (!dst || inet_ifa_match(dst, ifa)) {
1318 			addr = ifa->ifa_local;
1319 			break;
1320 		}
1321 		if (!addr)
1322 			addr = ifa->ifa_local;
1323 	}
1324 
1325 	if (addr)
1326 		goto out_unlock;
1327 no_in_dev:
1328 	master_idx = l3mdev_master_ifindex_rcu(dev);
1329 
1330 	/* For VRFs, the VRF device takes the place of the loopback device,
1331 	 * with addresses on it being preferred.  Note in such cases the
1332 	 * loopback device will be among the devices that fail the master_idx
1333 	 * equality check in the loop below.
1334 	 */
1335 	if (master_idx &&
1336 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1337 	    (in_dev = __in_dev_get_rcu(dev))) {
1338 		addr = in_dev_select_addr(in_dev, scope);
1339 		if (addr)
1340 			goto out_unlock;
1341 	}
1342 
1343 	/* Not loopback addresses on loopback should be preferred
1344 	   in this case. It is important that lo is the first interface
1345 	   in dev_base list.
1346 	 */
1347 	for_each_netdev_rcu(net, dev) {
1348 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1349 			continue;
1350 
1351 		in_dev = __in_dev_get_rcu(dev);
1352 		if (!in_dev)
1353 			continue;
1354 
1355 		addr = in_dev_select_addr(in_dev, scope);
1356 		if (addr)
1357 			goto out_unlock;
1358 	}
1359 out_unlock:
1360 	rcu_read_unlock();
1361 	return addr;
1362 }
1363 EXPORT_SYMBOL(inet_select_addr);
1364 
1365 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1366 			      __be32 local, int scope)
1367 {
1368 	unsigned char localnet_scope = RT_SCOPE_HOST;
1369 	const struct in_ifaddr *ifa;
1370 	__be32 addr = 0;
1371 	int same = 0;
1372 
1373 	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1374 		localnet_scope = RT_SCOPE_LINK;
1375 
1376 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1377 		unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1378 
1379 		if (!addr &&
1380 		    (local == ifa->ifa_local || !local) &&
1381 		    min_scope <= scope) {
1382 			addr = ifa->ifa_local;
1383 			if (same)
1384 				break;
1385 		}
1386 		if (!same) {
1387 			same = (!local || inet_ifa_match(local, ifa)) &&
1388 				(!dst || inet_ifa_match(dst, ifa));
1389 			if (same && addr) {
1390 				if (local || !dst)
1391 					break;
1392 				/* Is the selected addr into dst subnet? */
1393 				if (inet_ifa_match(addr, ifa))
1394 					break;
1395 				/* No, then can we use new local src? */
1396 				if (min_scope <= scope) {
1397 					addr = ifa->ifa_local;
1398 					break;
1399 				}
1400 				/* search for large dst subnet for addr */
1401 				same = 0;
1402 			}
1403 		}
1404 	}
1405 
1406 	return same ? addr : 0;
1407 }
1408 
1409 /*
1410  * Confirm that local IP address exists using wildcards:
1411  * - net: netns to check, cannot be NULL
1412  * - in_dev: only on this interface, NULL=any interface
1413  * - dst: only in the same subnet as dst, 0=any dst
1414  * - local: address, 0=autoselect the local address
1415  * - scope: maximum allowed scope value for the local address
1416  */
1417 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1418 			 __be32 dst, __be32 local, int scope)
1419 {
1420 	__be32 addr = 0;
1421 	struct net_device *dev;
1422 
1423 	if (in_dev)
1424 		return confirm_addr_indev(in_dev, dst, local, scope);
1425 
1426 	rcu_read_lock();
1427 	for_each_netdev_rcu(net, dev) {
1428 		in_dev = __in_dev_get_rcu(dev);
1429 		if (in_dev) {
1430 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1431 			if (addr)
1432 				break;
1433 		}
1434 	}
1435 	rcu_read_unlock();
1436 
1437 	return addr;
1438 }
1439 EXPORT_SYMBOL(inet_confirm_addr);
1440 
1441 /*
1442  *	Device notifier
1443  */
1444 
1445 int register_inetaddr_notifier(struct notifier_block *nb)
1446 {
1447 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1448 }
1449 EXPORT_SYMBOL(register_inetaddr_notifier);
1450 
1451 int unregister_inetaddr_notifier(struct notifier_block *nb)
1452 {
1453 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1454 }
1455 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1456 
1457 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1458 {
1459 	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1460 }
1461 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1462 
1463 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1464 {
1465 	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1466 	    nb);
1467 }
1468 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1469 
1470 /* Rename ifa_labels for a device name change. Make some effort to preserve
1471  * existing alias numbering and to create unique labels if possible.
1472 */
1473 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1474 {
1475 	struct in_ifaddr *ifa;
1476 	int named = 0;
1477 
1478 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1479 		char old[IFNAMSIZ], *dot;
1480 
1481 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1482 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1483 		if (named++ == 0)
1484 			goto skip;
1485 		dot = strchr(old, ':');
1486 		if (!dot) {
1487 			sprintf(old, ":%d", named);
1488 			dot = old;
1489 		}
1490 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1491 			strcat(ifa->ifa_label, dot);
1492 		else
1493 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1494 skip:
1495 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1496 	}
1497 }
1498 
1499 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1500 					struct in_device *in_dev)
1501 
1502 {
1503 	const struct in_ifaddr *ifa;
1504 
1505 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1506 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1507 			 ifa->ifa_local, dev,
1508 			 ifa->ifa_local, NULL,
1509 			 dev->dev_addr, NULL);
1510 	}
1511 }
1512 
1513 /* Called only under RTNL semaphore */
1514 
1515 static int inetdev_event(struct notifier_block *this, unsigned long event,
1516 			 void *ptr)
1517 {
1518 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1519 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1520 
1521 	ASSERT_RTNL();
1522 
1523 	if (!in_dev) {
1524 		if (event == NETDEV_REGISTER) {
1525 			in_dev = inetdev_init(dev);
1526 			if (IS_ERR(in_dev))
1527 				return notifier_from_errno(PTR_ERR(in_dev));
1528 			if (dev->flags & IFF_LOOPBACK) {
1529 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1530 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1531 			}
1532 		} else if (event == NETDEV_CHANGEMTU) {
1533 			/* Re-enabling IP */
1534 			if (inetdev_valid_mtu(dev->mtu))
1535 				in_dev = inetdev_init(dev);
1536 		}
1537 		goto out;
1538 	}
1539 
1540 	switch (event) {
1541 	case NETDEV_REGISTER:
1542 		pr_debug("%s: bug\n", __func__);
1543 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1544 		break;
1545 	case NETDEV_UP:
1546 		if (!inetdev_valid_mtu(dev->mtu))
1547 			break;
1548 		if (dev->flags & IFF_LOOPBACK) {
1549 			struct in_ifaddr *ifa = inet_alloc_ifa();
1550 
1551 			if (ifa) {
1552 				INIT_HLIST_NODE(&ifa->hash);
1553 				ifa->ifa_local =
1554 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1555 				ifa->ifa_prefixlen = 8;
1556 				ifa->ifa_mask = inet_make_mask(8);
1557 				in_dev_hold(in_dev);
1558 				ifa->ifa_dev = in_dev;
1559 				ifa->ifa_scope = RT_SCOPE_HOST;
1560 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1561 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1562 						 INFINITY_LIFE_TIME);
1563 				ipv4_devconf_setall(in_dev);
1564 				neigh_parms_data_state_setall(in_dev->arp_parms);
1565 				inet_insert_ifa(ifa);
1566 			}
1567 		}
1568 		ip_mc_up(in_dev);
1569 		/* fall through */
1570 	case NETDEV_CHANGEADDR:
1571 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1572 			break;
1573 		/* fall through */
1574 	case NETDEV_NOTIFY_PEERS:
1575 		/* Send gratuitous ARP to notify of link change */
1576 		inetdev_send_gratuitous_arp(dev, in_dev);
1577 		break;
1578 	case NETDEV_DOWN:
1579 		ip_mc_down(in_dev);
1580 		break;
1581 	case NETDEV_PRE_TYPE_CHANGE:
1582 		ip_mc_unmap(in_dev);
1583 		break;
1584 	case NETDEV_POST_TYPE_CHANGE:
1585 		ip_mc_remap(in_dev);
1586 		break;
1587 	case NETDEV_CHANGEMTU:
1588 		if (inetdev_valid_mtu(dev->mtu))
1589 			break;
1590 		/* disable IP when MTU is not enough */
1591 		/* fall through */
1592 	case NETDEV_UNREGISTER:
1593 		inetdev_destroy(in_dev);
1594 		break;
1595 	case NETDEV_CHANGENAME:
1596 		/* Do not notify about label change, this event is
1597 		 * not interesting to applications using netlink.
1598 		 */
1599 		inetdev_changename(dev, in_dev);
1600 
1601 		devinet_sysctl_unregister(in_dev);
1602 		devinet_sysctl_register(in_dev);
1603 		break;
1604 	}
1605 out:
1606 	return NOTIFY_DONE;
1607 }
1608 
1609 static struct notifier_block ip_netdev_notifier = {
1610 	.notifier_call = inetdev_event,
1611 };
1612 
1613 static size_t inet_nlmsg_size(void)
1614 {
1615 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1616 	       + nla_total_size(4) /* IFA_ADDRESS */
1617 	       + nla_total_size(4) /* IFA_LOCAL */
1618 	       + nla_total_size(4) /* IFA_BROADCAST */
1619 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1620 	       + nla_total_size(4)  /* IFA_FLAGS */
1621 	       + nla_total_size(4)  /* IFA_RT_PRIORITY */
1622 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1623 }
1624 
1625 static inline u32 cstamp_delta(unsigned long cstamp)
1626 {
1627 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1628 }
1629 
1630 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1631 			 unsigned long tstamp, u32 preferred, u32 valid)
1632 {
1633 	struct ifa_cacheinfo ci;
1634 
1635 	ci.cstamp = cstamp_delta(cstamp);
1636 	ci.tstamp = cstamp_delta(tstamp);
1637 	ci.ifa_prefered = preferred;
1638 	ci.ifa_valid = valid;
1639 
1640 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1641 }
1642 
1643 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1644 			    struct inet_fill_args *args)
1645 {
1646 	struct ifaddrmsg *ifm;
1647 	struct nlmsghdr  *nlh;
1648 	u32 preferred, valid;
1649 
1650 	nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1651 			args->flags);
1652 	if (!nlh)
1653 		return -EMSGSIZE;
1654 
1655 	ifm = nlmsg_data(nlh);
1656 	ifm->ifa_family = AF_INET;
1657 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1658 	ifm->ifa_flags = ifa->ifa_flags;
1659 	ifm->ifa_scope = ifa->ifa_scope;
1660 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1661 
1662 	if (args->netnsid >= 0 &&
1663 	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1664 		goto nla_put_failure;
1665 
1666 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1667 		preferred = ifa->ifa_preferred_lft;
1668 		valid = ifa->ifa_valid_lft;
1669 		if (preferred != INFINITY_LIFE_TIME) {
1670 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1671 
1672 			if (preferred > tval)
1673 				preferred -= tval;
1674 			else
1675 				preferred = 0;
1676 			if (valid != INFINITY_LIFE_TIME) {
1677 				if (valid > tval)
1678 					valid -= tval;
1679 				else
1680 					valid = 0;
1681 			}
1682 		}
1683 	} else {
1684 		preferred = INFINITY_LIFE_TIME;
1685 		valid = INFINITY_LIFE_TIME;
1686 	}
1687 	if ((ifa->ifa_address &&
1688 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1689 	    (ifa->ifa_local &&
1690 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1691 	    (ifa->ifa_broadcast &&
1692 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1693 	    (ifa->ifa_label[0] &&
1694 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1695 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1696 	    (ifa->ifa_rt_priority &&
1697 	     nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1698 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1699 			  preferred, valid))
1700 		goto nla_put_failure;
1701 
1702 	nlmsg_end(skb, nlh);
1703 	return 0;
1704 
1705 nla_put_failure:
1706 	nlmsg_cancel(skb, nlh);
1707 	return -EMSGSIZE;
1708 }
1709 
1710 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1711 				      struct inet_fill_args *fillargs,
1712 				      struct net **tgt_net, struct sock *sk,
1713 				      struct netlink_callback *cb)
1714 {
1715 	struct netlink_ext_ack *extack = cb->extack;
1716 	struct nlattr *tb[IFA_MAX+1];
1717 	struct ifaddrmsg *ifm;
1718 	int err, i;
1719 
1720 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1721 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1722 		return -EINVAL;
1723 	}
1724 
1725 	ifm = nlmsg_data(nlh);
1726 	if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1727 		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1728 		return -EINVAL;
1729 	}
1730 
1731 	fillargs->ifindex = ifm->ifa_index;
1732 	if (fillargs->ifindex) {
1733 		cb->answer_flags |= NLM_F_DUMP_FILTERED;
1734 		fillargs->flags |= NLM_F_DUMP_FILTERED;
1735 	}
1736 
1737 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1738 					    ifa_ipv4_policy, extack);
1739 	if (err < 0)
1740 		return err;
1741 
1742 	for (i = 0; i <= IFA_MAX; ++i) {
1743 		if (!tb[i])
1744 			continue;
1745 
1746 		if (i == IFA_TARGET_NETNSID) {
1747 			struct net *net;
1748 
1749 			fillargs->netnsid = nla_get_s32(tb[i]);
1750 
1751 			net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1752 			if (IS_ERR(net)) {
1753 				fillargs->netnsid = -1;
1754 				NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1755 				return PTR_ERR(net);
1756 			}
1757 			*tgt_net = net;
1758 		} else {
1759 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1760 			return -EINVAL;
1761 		}
1762 	}
1763 
1764 	return 0;
1765 }
1766 
1767 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1768 			    struct netlink_callback *cb, int s_ip_idx,
1769 			    struct inet_fill_args *fillargs)
1770 {
1771 	struct in_ifaddr *ifa;
1772 	int ip_idx = 0;
1773 	int err;
1774 
1775 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1776 		if (ip_idx < s_ip_idx) {
1777 			ip_idx++;
1778 			continue;
1779 		}
1780 		err = inet_fill_ifaddr(skb, ifa, fillargs);
1781 		if (err < 0)
1782 			goto done;
1783 
1784 		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1785 		ip_idx++;
1786 	}
1787 	err = 0;
1788 
1789 done:
1790 	cb->args[2] = ip_idx;
1791 
1792 	return err;
1793 }
1794 
1795 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1796 {
1797 	const struct nlmsghdr *nlh = cb->nlh;
1798 	struct inet_fill_args fillargs = {
1799 		.portid = NETLINK_CB(cb->skb).portid,
1800 		.seq = nlh->nlmsg_seq,
1801 		.event = RTM_NEWADDR,
1802 		.flags = NLM_F_MULTI,
1803 		.netnsid = -1,
1804 	};
1805 	struct net *net = sock_net(skb->sk);
1806 	struct net *tgt_net = net;
1807 	int h, s_h;
1808 	int idx, s_idx;
1809 	int s_ip_idx;
1810 	struct net_device *dev;
1811 	struct in_device *in_dev;
1812 	struct hlist_head *head;
1813 	int err = 0;
1814 
1815 	s_h = cb->args[0];
1816 	s_idx = idx = cb->args[1];
1817 	s_ip_idx = cb->args[2];
1818 
1819 	if (cb->strict_check) {
1820 		err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1821 						 skb->sk, cb);
1822 		if (err < 0)
1823 			goto put_tgt_net;
1824 
1825 		err = 0;
1826 		if (fillargs.ifindex) {
1827 			dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
1828 			if (!dev) {
1829 				err = -ENODEV;
1830 				goto put_tgt_net;
1831 			}
1832 
1833 			in_dev = __in_dev_get_rtnl(dev);
1834 			if (in_dev) {
1835 				err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1836 						       &fillargs);
1837 			}
1838 			goto put_tgt_net;
1839 		}
1840 	}
1841 
1842 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1843 		idx = 0;
1844 		head = &tgt_net->dev_index_head[h];
1845 		rcu_read_lock();
1846 		cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
1847 			  tgt_net->dev_base_seq;
1848 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1849 			if (idx < s_idx)
1850 				goto cont;
1851 			if (h > s_h || idx > s_idx)
1852 				s_ip_idx = 0;
1853 			in_dev = __in_dev_get_rcu(dev);
1854 			if (!in_dev)
1855 				goto cont;
1856 
1857 			err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1858 					       &fillargs);
1859 			if (err < 0) {
1860 				rcu_read_unlock();
1861 				goto done;
1862 			}
1863 cont:
1864 			idx++;
1865 		}
1866 		rcu_read_unlock();
1867 	}
1868 
1869 done:
1870 	cb->args[0] = h;
1871 	cb->args[1] = idx;
1872 put_tgt_net:
1873 	if (fillargs.netnsid >= 0)
1874 		put_net(tgt_net);
1875 
1876 	return skb->len ? : err;
1877 }
1878 
1879 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1880 		      u32 portid)
1881 {
1882 	struct inet_fill_args fillargs = {
1883 		.portid = portid,
1884 		.seq = nlh ? nlh->nlmsg_seq : 0,
1885 		.event = event,
1886 		.flags = 0,
1887 		.netnsid = -1,
1888 	};
1889 	struct sk_buff *skb;
1890 	int err = -ENOBUFS;
1891 	struct net *net;
1892 
1893 	net = dev_net(ifa->ifa_dev->dev);
1894 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1895 	if (!skb)
1896 		goto errout;
1897 
1898 	err = inet_fill_ifaddr(skb, ifa, &fillargs);
1899 	if (err < 0) {
1900 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1901 		WARN_ON(err == -EMSGSIZE);
1902 		kfree_skb(skb);
1903 		goto errout;
1904 	}
1905 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1906 	return;
1907 errout:
1908 	if (err < 0)
1909 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1910 }
1911 
1912 static size_t inet_get_link_af_size(const struct net_device *dev,
1913 				    u32 ext_filter_mask)
1914 {
1915 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1916 
1917 	if (!in_dev)
1918 		return 0;
1919 
1920 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1921 }
1922 
1923 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1924 			     u32 ext_filter_mask)
1925 {
1926 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1927 	struct nlattr *nla;
1928 	int i;
1929 
1930 	if (!in_dev)
1931 		return -ENODATA;
1932 
1933 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1934 	if (!nla)
1935 		return -EMSGSIZE;
1936 
1937 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1938 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1939 
1940 	return 0;
1941 }
1942 
1943 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1944 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1945 };
1946 
1947 static int inet_validate_link_af(const struct net_device *dev,
1948 				 const struct nlattr *nla)
1949 {
1950 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1951 	int err, rem;
1952 
1953 	if (dev && !__in_dev_get_rcu(dev))
1954 		return -EAFNOSUPPORT;
1955 
1956 	err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
1957 					  inet_af_policy, NULL);
1958 	if (err < 0)
1959 		return err;
1960 
1961 	if (tb[IFLA_INET_CONF]) {
1962 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1963 			int cfgid = nla_type(a);
1964 
1965 			if (nla_len(a) < 4)
1966 				return -EINVAL;
1967 
1968 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1969 				return -EINVAL;
1970 		}
1971 	}
1972 
1973 	return 0;
1974 }
1975 
1976 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1977 {
1978 	struct in_device *in_dev = __in_dev_get_rcu(dev);
1979 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1980 	int rem;
1981 
1982 	if (!in_dev)
1983 		return -EAFNOSUPPORT;
1984 
1985 	if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1986 		BUG();
1987 
1988 	if (tb[IFLA_INET_CONF]) {
1989 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1990 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1991 	}
1992 
1993 	return 0;
1994 }
1995 
1996 static int inet_netconf_msgsize_devconf(int type)
1997 {
1998 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1999 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
2000 	bool all = false;
2001 
2002 	if (type == NETCONFA_ALL)
2003 		all = true;
2004 
2005 	if (all || type == NETCONFA_FORWARDING)
2006 		size += nla_total_size(4);
2007 	if (all || type == NETCONFA_RP_FILTER)
2008 		size += nla_total_size(4);
2009 	if (all || type == NETCONFA_MC_FORWARDING)
2010 		size += nla_total_size(4);
2011 	if (all || type == NETCONFA_BC_FORWARDING)
2012 		size += nla_total_size(4);
2013 	if (all || type == NETCONFA_PROXY_NEIGH)
2014 		size += nla_total_size(4);
2015 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2016 		size += nla_total_size(4);
2017 
2018 	return size;
2019 }
2020 
2021 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2022 				     struct ipv4_devconf *devconf, u32 portid,
2023 				     u32 seq, int event, unsigned int flags,
2024 				     int type)
2025 {
2026 	struct nlmsghdr  *nlh;
2027 	struct netconfmsg *ncm;
2028 	bool all = false;
2029 
2030 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2031 			flags);
2032 	if (!nlh)
2033 		return -EMSGSIZE;
2034 
2035 	if (type == NETCONFA_ALL)
2036 		all = true;
2037 
2038 	ncm = nlmsg_data(nlh);
2039 	ncm->ncm_family = AF_INET;
2040 
2041 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2042 		goto nla_put_failure;
2043 
2044 	if (!devconf)
2045 		goto out;
2046 
2047 	if ((all || type == NETCONFA_FORWARDING) &&
2048 	    nla_put_s32(skb, NETCONFA_FORWARDING,
2049 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
2050 		goto nla_put_failure;
2051 	if ((all || type == NETCONFA_RP_FILTER) &&
2052 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
2053 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
2054 		goto nla_put_failure;
2055 	if ((all || type == NETCONFA_MC_FORWARDING) &&
2056 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2057 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
2058 		goto nla_put_failure;
2059 	if ((all || type == NETCONFA_BC_FORWARDING) &&
2060 	    nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2061 			IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
2062 		goto nla_put_failure;
2063 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
2064 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2065 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
2066 		goto nla_put_failure;
2067 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2068 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2069 			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2070 		goto nla_put_failure;
2071 
2072 out:
2073 	nlmsg_end(skb, nlh);
2074 	return 0;
2075 
2076 nla_put_failure:
2077 	nlmsg_cancel(skb, nlh);
2078 	return -EMSGSIZE;
2079 }
2080 
2081 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2082 				 int ifindex, struct ipv4_devconf *devconf)
2083 {
2084 	struct sk_buff *skb;
2085 	int err = -ENOBUFS;
2086 
2087 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2088 	if (!skb)
2089 		goto errout;
2090 
2091 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2092 					event, 0, type);
2093 	if (err < 0) {
2094 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2095 		WARN_ON(err == -EMSGSIZE);
2096 		kfree_skb(skb);
2097 		goto errout;
2098 	}
2099 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2100 	return;
2101 errout:
2102 	if (err < 0)
2103 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2104 }
2105 
2106 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2107 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
2108 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
2109 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
2110 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
2111 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
2112 };
2113 
2114 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2115 				      const struct nlmsghdr *nlh,
2116 				      struct nlattr **tb,
2117 				      struct netlink_ext_ack *extack)
2118 {
2119 	int i, err;
2120 
2121 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2122 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2123 		return -EINVAL;
2124 	}
2125 
2126 	if (!netlink_strict_get_check(skb))
2127 		return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2128 					      tb, NETCONFA_MAX,
2129 					      devconf_ipv4_policy, extack);
2130 
2131 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2132 					    tb, NETCONFA_MAX,
2133 					    devconf_ipv4_policy, extack);
2134 	if (err)
2135 		return err;
2136 
2137 	for (i = 0; i <= NETCONFA_MAX; i++) {
2138 		if (!tb[i])
2139 			continue;
2140 
2141 		switch (i) {
2142 		case NETCONFA_IFINDEX:
2143 			break;
2144 		default:
2145 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2146 			return -EINVAL;
2147 		}
2148 	}
2149 
2150 	return 0;
2151 }
2152 
2153 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2154 				    struct nlmsghdr *nlh,
2155 				    struct netlink_ext_ack *extack)
2156 {
2157 	struct net *net = sock_net(in_skb->sk);
2158 	struct nlattr *tb[NETCONFA_MAX+1];
2159 	struct sk_buff *skb;
2160 	struct ipv4_devconf *devconf;
2161 	struct in_device *in_dev;
2162 	struct net_device *dev;
2163 	int ifindex;
2164 	int err;
2165 
2166 	err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2167 	if (err)
2168 		goto errout;
2169 
2170 	err = -EINVAL;
2171 	if (!tb[NETCONFA_IFINDEX])
2172 		goto errout;
2173 
2174 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2175 	switch (ifindex) {
2176 	case NETCONFA_IFINDEX_ALL:
2177 		devconf = net->ipv4.devconf_all;
2178 		break;
2179 	case NETCONFA_IFINDEX_DEFAULT:
2180 		devconf = net->ipv4.devconf_dflt;
2181 		break;
2182 	default:
2183 		dev = __dev_get_by_index(net, ifindex);
2184 		if (!dev)
2185 			goto errout;
2186 		in_dev = __in_dev_get_rtnl(dev);
2187 		if (!in_dev)
2188 			goto errout;
2189 		devconf = &in_dev->cnf;
2190 		break;
2191 	}
2192 
2193 	err = -ENOBUFS;
2194 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2195 	if (!skb)
2196 		goto errout;
2197 
2198 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2199 					NETLINK_CB(in_skb).portid,
2200 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2201 					NETCONFA_ALL);
2202 	if (err < 0) {
2203 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2204 		WARN_ON(err == -EMSGSIZE);
2205 		kfree_skb(skb);
2206 		goto errout;
2207 	}
2208 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2209 errout:
2210 	return err;
2211 }
2212 
2213 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2214 				     struct netlink_callback *cb)
2215 {
2216 	const struct nlmsghdr *nlh = cb->nlh;
2217 	struct net *net = sock_net(skb->sk);
2218 	int h, s_h;
2219 	int idx, s_idx;
2220 	struct net_device *dev;
2221 	struct in_device *in_dev;
2222 	struct hlist_head *head;
2223 
2224 	if (cb->strict_check) {
2225 		struct netlink_ext_ack *extack = cb->extack;
2226 		struct netconfmsg *ncm;
2227 
2228 		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2229 			NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2230 			return -EINVAL;
2231 		}
2232 
2233 		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2234 			NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2235 			return -EINVAL;
2236 		}
2237 	}
2238 
2239 	s_h = cb->args[0];
2240 	s_idx = idx = cb->args[1];
2241 
2242 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2243 		idx = 0;
2244 		head = &net->dev_index_head[h];
2245 		rcu_read_lock();
2246 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2247 			  net->dev_base_seq;
2248 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
2249 			if (idx < s_idx)
2250 				goto cont;
2251 			in_dev = __in_dev_get_rcu(dev);
2252 			if (!in_dev)
2253 				goto cont;
2254 
2255 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
2256 						      &in_dev->cnf,
2257 						      NETLINK_CB(cb->skb).portid,
2258 						      nlh->nlmsg_seq,
2259 						      RTM_NEWNETCONF,
2260 						      NLM_F_MULTI,
2261 						      NETCONFA_ALL) < 0) {
2262 				rcu_read_unlock();
2263 				goto done;
2264 			}
2265 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2266 cont:
2267 			idx++;
2268 		}
2269 		rcu_read_unlock();
2270 	}
2271 	if (h == NETDEV_HASHENTRIES) {
2272 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2273 					      net->ipv4.devconf_all,
2274 					      NETLINK_CB(cb->skb).portid,
2275 					      nlh->nlmsg_seq,
2276 					      RTM_NEWNETCONF, NLM_F_MULTI,
2277 					      NETCONFA_ALL) < 0)
2278 			goto done;
2279 		else
2280 			h++;
2281 	}
2282 	if (h == NETDEV_HASHENTRIES + 1) {
2283 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2284 					      net->ipv4.devconf_dflt,
2285 					      NETLINK_CB(cb->skb).portid,
2286 					      nlh->nlmsg_seq,
2287 					      RTM_NEWNETCONF, NLM_F_MULTI,
2288 					      NETCONFA_ALL) < 0)
2289 			goto done;
2290 		else
2291 			h++;
2292 	}
2293 done:
2294 	cb->args[0] = h;
2295 	cb->args[1] = idx;
2296 
2297 	return skb->len;
2298 }
2299 
2300 #ifdef CONFIG_SYSCTL
2301 
2302 static void devinet_copy_dflt_conf(struct net *net, int i)
2303 {
2304 	struct net_device *dev;
2305 
2306 	rcu_read_lock();
2307 	for_each_netdev_rcu(net, dev) {
2308 		struct in_device *in_dev;
2309 
2310 		in_dev = __in_dev_get_rcu(dev);
2311 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2312 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2313 	}
2314 	rcu_read_unlock();
2315 }
2316 
2317 /* called with RTNL locked */
2318 static void inet_forward_change(struct net *net)
2319 {
2320 	struct net_device *dev;
2321 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2322 
2323 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2324 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2325 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2326 				    NETCONFA_FORWARDING,
2327 				    NETCONFA_IFINDEX_ALL,
2328 				    net->ipv4.devconf_all);
2329 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2330 				    NETCONFA_FORWARDING,
2331 				    NETCONFA_IFINDEX_DEFAULT,
2332 				    net->ipv4.devconf_dflt);
2333 
2334 	for_each_netdev(net, dev) {
2335 		struct in_device *in_dev;
2336 
2337 		if (on)
2338 			dev_disable_lro(dev);
2339 
2340 		in_dev = __in_dev_get_rtnl(dev);
2341 		if (in_dev) {
2342 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2343 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2344 						    NETCONFA_FORWARDING,
2345 						    dev->ifindex, &in_dev->cnf);
2346 		}
2347 	}
2348 }
2349 
2350 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2351 {
2352 	if (cnf == net->ipv4.devconf_dflt)
2353 		return NETCONFA_IFINDEX_DEFAULT;
2354 	else if (cnf == net->ipv4.devconf_all)
2355 		return NETCONFA_IFINDEX_ALL;
2356 	else {
2357 		struct in_device *idev
2358 			= container_of(cnf, struct in_device, cnf);
2359 		return idev->dev->ifindex;
2360 	}
2361 }
2362 
2363 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2364 			     void __user *buffer,
2365 			     size_t *lenp, loff_t *ppos)
2366 {
2367 	int old_value = *(int *)ctl->data;
2368 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2369 	int new_value = *(int *)ctl->data;
2370 
2371 	if (write) {
2372 		struct ipv4_devconf *cnf = ctl->extra1;
2373 		struct net *net = ctl->extra2;
2374 		int i = (int *)ctl->data - cnf->data;
2375 		int ifindex;
2376 
2377 		set_bit(i, cnf->state);
2378 
2379 		if (cnf == net->ipv4.devconf_dflt)
2380 			devinet_copy_dflt_conf(net, i);
2381 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2382 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2383 			if ((new_value == 0) && (old_value != 0))
2384 				rt_cache_flush(net);
2385 
2386 		if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2387 		    new_value != old_value)
2388 			rt_cache_flush(net);
2389 
2390 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2391 		    new_value != old_value) {
2392 			ifindex = devinet_conf_ifindex(net, cnf);
2393 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2394 						    NETCONFA_RP_FILTER,
2395 						    ifindex, cnf);
2396 		}
2397 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2398 		    new_value != old_value) {
2399 			ifindex = devinet_conf_ifindex(net, cnf);
2400 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2401 						    NETCONFA_PROXY_NEIGH,
2402 						    ifindex, cnf);
2403 		}
2404 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2405 		    new_value != old_value) {
2406 			ifindex = devinet_conf_ifindex(net, cnf);
2407 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2408 						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2409 						    ifindex, cnf);
2410 		}
2411 	}
2412 
2413 	return ret;
2414 }
2415 
2416 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2417 				  void __user *buffer,
2418 				  size_t *lenp, loff_t *ppos)
2419 {
2420 	int *valp = ctl->data;
2421 	int val = *valp;
2422 	loff_t pos = *ppos;
2423 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2424 
2425 	if (write && *valp != val) {
2426 		struct net *net = ctl->extra2;
2427 
2428 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2429 			if (!rtnl_trylock()) {
2430 				/* Restore the original values before restarting */
2431 				*valp = val;
2432 				*ppos = pos;
2433 				return restart_syscall();
2434 			}
2435 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2436 				inet_forward_change(net);
2437 			} else {
2438 				struct ipv4_devconf *cnf = ctl->extra1;
2439 				struct in_device *idev =
2440 					container_of(cnf, struct in_device, cnf);
2441 				if (*valp)
2442 					dev_disable_lro(idev->dev);
2443 				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2444 							    NETCONFA_FORWARDING,
2445 							    idev->dev->ifindex,
2446 							    cnf);
2447 			}
2448 			rtnl_unlock();
2449 			rt_cache_flush(net);
2450 		} else
2451 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2452 						    NETCONFA_FORWARDING,
2453 						    NETCONFA_IFINDEX_DEFAULT,
2454 						    net->ipv4.devconf_dflt);
2455 	}
2456 
2457 	return ret;
2458 }
2459 
2460 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2461 				void __user *buffer,
2462 				size_t *lenp, loff_t *ppos)
2463 {
2464 	int *valp = ctl->data;
2465 	int val = *valp;
2466 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2467 	struct net *net = ctl->extra2;
2468 
2469 	if (write && *valp != val)
2470 		rt_cache_flush(net);
2471 
2472 	return ret;
2473 }
2474 
2475 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2476 	{ \
2477 		.procname	= name, \
2478 		.data		= ipv4_devconf.data + \
2479 				  IPV4_DEVCONF_ ## attr - 1, \
2480 		.maxlen		= sizeof(int), \
2481 		.mode		= mval, \
2482 		.proc_handler	= proc, \
2483 		.extra1		= &ipv4_devconf, \
2484 	}
2485 
2486 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2487 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2488 
2489 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2490 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2491 
2492 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2493 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2494 
2495 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2496 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2497 
2498 static struct devinet_sysctl_table {
2499 	struct ctl_table_header *sysctl_header;
2500 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2501 } devinet_sysctl = {
2502 	.devinet_vars = {
2503 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2504 					     devinet_sysctl_forward),
2505 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2506 		DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2507 
2508 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2509 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2510 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2511 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2512 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2513 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2514 					"accept_source_route"),
2515 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2516 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2517 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2518 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2519 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2520 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2521 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2522 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2523 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2524 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2525 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2526 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2527 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2528 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2529 					"force_igmp_version"),
2530 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2531 					"igmpv2_unsolicited_report_interval"),
2532 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2533 					"igmpv3_unsolicited_report_interval"),
2534 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2535 					"ignore_routes_with_linkdown"),
2536 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2537 					"drop_gratuitous_arp"),
2538 
2539 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2540 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2541 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2542 					      "promote_secondaries"),
2543 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2544 					      "route_localnet"),
2545 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2546 					      "drop_unicast_in_l2_multicast"),
2547 	},
2548 };
2549 
2550 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2551 				     int ifindex, struct ipv4_devconf *p)
2552 {
2553 	int i;
2554 	struct devinet_sysctl_table *t;
2555 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2556 
2557 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2558 	if (!t)
2559 		goto out;
2560 
2561 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2562 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2563 		t->devinet_vars[i].extra1 = p;
2564 		t->devinet_vars[i].extra2 = net;
2565 	}
2566 
2567 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2568 
2569 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2570 	if (!t->sysctl_header)
2571 		goto free;
2572 
2573 	p->sysctl = t;
2574 
2575 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2576 				    ifindex, p);
2577 	return 0;
2578 
2579 free:
2580 	kfree(t);
2581 out:
2582 	return -ENOBUFS;
2583 }
2584 
2585 static void __devinet_sysctl_unregister(struct net *net,
2586 					struct ipv4_devconf *cnf, int ifindex)
2587 {
2588 	struct devinet_sysctl_table *t = cnf->sysctl;
2589 
2590 	if (t) {
2591 		cnf->sysctl = NULL;
2592 		unregister_net_sysctl_table(t->sysctl_header);
2593 		kfree(t);
2594 	}
2595 
2596 	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2597 }
2598 
2599 static int devinet_sysctl_register(struct in_device *idev)
2600 {
2601 	int err;
2602 
2603 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2604 		return -EINVAL;
2605 
2606 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2607 	if (err)
2608 		return err;
2609 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2610 					idev->dev->ifindex, &idev->cnf);
2611 	if (err)
2612 		neigh_sysctl_unregister(idev->arp_parms);
2613 	return err;
2614 }
2615 
2616 static void devinet_sysctl_unregister(struct in_device *idev)
2617 {
2618 	struct net *net = dev_net(idev->dev);
2619 
2620 	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2621 	neigh_sysctl_unregister(idev->arp_parms);
2622 }
2623 
2624 static struct ctl_table ctl_forward_entry[] = {
2625 	{
2626 		.procname	= "ip_forward",
2627 		.data		= &ipv4_devconf.data[
2628 					IPV4_DEVCONF_FORWARDING - 1],
2629 		.maxlen		= sizeof(int),
2630 		.mode		= 0644,
2631 		.proc_handler	= devinet_sysctl_forward,
2632 		.extra1		= &ipv4_devconf,
2633 		.extra2		= &init_net,
2634 	},
2635 	{ },
2636 };
2637 #endif
2638 
2639 static __net_init int devinet_init_net(struct net *net)
2640 {
2641 	int err;
2642 	struct ipv4_devconf *all, *dflt;
2643 #ifdef CONFIG_SYSCTL
2644 	struct ctl_table *tbl;
2645 	struct ctl_table_header *forw_hdr;
2646 #endif
2647 
2648 	err = -ENOMEM;
2649 	all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2650 	if (!all)
2651 		goto err_alloc_all;
2652 
2653 	dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2654 	if (!dflt)
2655 		goto err_alloc_dflt;
2656 
2657 #ifdef CONFIG_SYSCTL
2658 	tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2659 	if (!tbl)
2660 		goto err_alloc_ctl;
2661 
2662 	tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2663 	tbl[0].extra1 = all;
2664 	tbl[0].extra2 = net;
2665 #endif
2666 
2667 	if ((!IS_ENABLED(CONFIG_SYSCTL) ||
2668 	     sysctl_devconf_inherit_init_net != 2) &&
2669 	    !net_eq(net, &init_net)) {
2670 		memcpy(all, init_net.ipv4.devconf_all, sizeof(ipv4_devconf));
2671 		memcpy(dflt, init_net.ipv4.devconf_dflt, sizeof(ipv4_devconf_dflt));
2672 	}
2673 
2674 #ifdef CONFIG_SYSCTL
2675 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2676 	if (err < 0)
2677 		goto err_reg_all;
2678 
2679 	err = __devinet_sysctl_register(net, "default",
2680 					NETCONFA_IFINDEX_DEFAULT, dflt);
2681 	if (err < 0)
2682 		goto err_reg_dflt;
2683 
2684 	err = -ENOMEM;
2685 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2686 	if (!forw_hdr)
2687 		goto err_reg_ctl;
2688 	net->ipv4.forw_hdr = forw_hdr;
2689 #endif
2690 
2691 	net->ipv4.devconf_all = all;
2692 	net->ipv4.devconf_dflt = dflt;
2693 	return 0;
2694 
2695 #ifdef CONFIG_SYSCTL
2696 err_reg_ctl:
2697 	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2698 err_reg_dflt:
2699 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2700 err_reg_all:
2701 	kfree(tbl);
2702 err_alloc_ctl:
2703 #endif
2704 	kfree(dflt);
2705 err_alloc_dflt:
2706 	kfree(all);
2707 err_alloc_all:
2708 	return err;
2709 }
2710 
2711 static __net_exit void devinet_exit_net(struct net *net)
2712 {
2713 #ifdef CONFIG_SYSCTL
2714 	struct ctl_table *tbl;
2715 
2716 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2717 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2718 	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2719 				    NETCONFA_IFINDEX_DEFAULT);
2720 	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2721 				    NETCONFA_IFINDEX_ALL);
2722 	kfree(tbl);
2723 #endif
2724 	kfree(net->ipv4.devconf_dflt);
2725 	kfree(net->ipv4.devconf_all);
2726 }
2727 
2728 static __net_initdata struct pernet_operations devinet_ops = {
2729 	.init = devinet_init_net,
2730 	.exit = devinet_exit_net,
2731 };
2732 
2733 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2734 	.family		  = AF_INET,
2735 	.fill_link_af	  = inet_fill_link_af,
2736 	.get_link_af_size = inet_get_link_af_size,
2737 	.validate_link_af = inet_validate_link_af,
2738 	.set_link_af	  = inet_set_link_af,
2739 };
2740 
2741 void __init devinet_init(void)
2742 {
2743 	int i;
2744 
2745 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2746 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2747 
2748 	register_pernet_subsys(&devinet_ops);
2749 
2750 	register_gifconf(PF_INET, inet_gifconf);
2751 	register_netdevice_notifier(&ip_netdev_notifier);
2752 
2753 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2754 
2755 	rtnl_af_register(&inet_af_ops);
2756 
2757 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2758 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2759 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2760 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2761 		      inet_netconf_dump_devconf, 0);
2762 }
2763