xref: /linux/net/ipv4/devinet.c (revision 2638eb8b50cfc16240e0bb080b9afbf541a9b39d)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/sched/signal.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 #include <linux/netconf.h>
60 
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68 
69 static struct ipv4_devconf ipv4_devconf = {
70 	.data = {
71 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
76 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
77 	},
78 };
79 
80 static struct ipv4_devconf ipv4_devconf_dflt = {
81 	.data = {
82 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
83 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
86 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
87 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
88 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
89 	},
90 };
91 
92 #define IPV4_DEVCONF_DFLT(net, attr) \
93 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
94 
95 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
96 	[IFA_LOCAL]     	= { .type = NLA_U32 },
97 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
98 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
99 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
100 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
101 	[IFA_FLAGS]		= { .type = NLA_U32 },
102 	[IFA_RT_PRIORITY]	= { .type = NLA_U32 },
103 	[IFA_TARGET_NETNSID]	= { .type = NLA_S32 },
104 };
105 
106 struct inet_fill_args {
107 	u32 portid;
108 	u32 seq;
109 	int event;
110 	unsigned int flags;
111 	int netnsid;
112 	int ifindex;
113 };
114 
115 #define IN4_ADDR_HSIZE_SHIFT	8
116 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
117 
118 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
119 
120 static u32 inet_addr_hash(const struct net *net, __be32 addr)
121 {
122 	u32 val = (__force u32) addr ^ net_hash_mix(net);
123 
124 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
125 }
126 
127 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
128 {
129 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
130 
131 	ASSERT_RTNL();
132 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
133 }
134 
135 static void inet_hash_remove(struct in_ifaddr *ifa)
136 {
137 	ASSERT_RTNL();
138 	hlist_del_init_rcu(&ifa->hash);
139 }
140 
141 /**
142  * __ip_dev_find - find the first device with a given source address.
143  * @net: the net namespace
144  * @addr: the source address
145  * @devref: if true, take a reference on the found device
146  *
147  * If a caller uses devref=false, it should be protected by RCU, or RTNL
148  */
149 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
150 {
151 	struct net_device *result = NULL;
152 	struct in_ifaddr *ifa;
153 
154 	rcu_read_lock();
155 	ifa = inet_lookup_ifaddr_rcu(net, addr);
156 	if (!ifa) {
157 		struct flowi4 fl4 = { .daddr = addr };
158 		struct fib_result res = { 0 };
159 		struct fib_table *local;
160 
161 		/* Fallback to FIB local table so that communication
162 		 * over loopback subnets work.
163 		 */
164 		local = fib_get_table(net, RT_TABLE_LOCAL);
165 		if (local &&
166 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
167 		    res.type == RTN_LOCAL)
168 			result = FIB_RES_DEV(res);
169 	} else {
170 		result = ifa->ifa_dev->dev;
171 	}
172 	if (result && devref)
173 		dev_hold(result);
174 	rcu_read_unlock();
175 	return result;
176 }
177 EXPORT_SYMBOL(__ip_dev_find);
178 
179 /* called under RCU lock */
180 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
181 {
182 	u32 hash = inet_addr_hash(net, addr);
183 	struct in_ifaddr *ifa;
184 
185 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
186 		if (ifa->ifa_local == addr &&
187 		    net_eq(dev_net(ifa->ifa_dev->dev), net))
188 			return ifa;
189 
190 	return NULL;
191 }
192 
193 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
194 
195 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
196 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
197 static void inet_del_ifa(struct in_device *in_dev,
198 			 struct in_ifaddr __rcu **ifap,
199 			 int destroy);
200 #ifdef CONFIG_SYSCTL
201 static int devinet_sysctl_register(struct in_device *idev);
202 static void devinet_sysctl_unregister(struct in_device *idev);
203 #else
204 static int devinet_sysctl_register(struct in_device *idev)
205 {
206 	return 0;
207 }
208 static void devinet_sysctl_unregister(struct in_device *idev)
209 {
210 }
211 #endif
212 
213 /* Locks all the inet devices. */
214 
215 static struct in_ifaddr *inet_alloc_ifa(void)
216 {
217 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
218 }
219 
220 static void inet_rcu_free_ifa(struct rcu_head *head)
221 {
222 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
223 	if (ifa->ifa_dev)
224 		in_dev_put(ifa->ifa_dev);
225 	kfree(ifa);
226 }
227 
228 static void inet_free_ifa(struct in_ifaddr *ifa)
229 {
230 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
231 }
232 
233 void in_dev_finish_destroy(struct in_device *idev)
234 {
235 	struct net_device *dev = idev->dev;
236 
237 	WARN_ON(idev->ifa_list);
238 	WARN_ON(idev->mc_list);
239 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
240 #ifdef NET_REFCNT_DEBUG
241 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
242 #endif
243 	dev_put(dev);
244 	if (!idev->dead)
245 		pr_err("Freeing alive in_device %p\n", idev);
246 	else
247 		kfree(idev);
248 }
249 EXPORT_SYMBOL(in_dev_finish_destroy);
250 
251 static struct in_device *inetdev_init(struct net_device *dev)
252 {
253 	struct in_device *in_dev;
254 	int err = -ENOMEM;
255 
256 	ASSERT_RTNL();
257 
258 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
259 	if (!in_dev)
260 		goto out;
261 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
262 			sizeof(in_dev->cnf));
263 	in_dev->cnf.sysctl = NULL;
264 	in_dev->dev = dev;
265 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
266 	if (!in_dev->arp_parms)
267 		goto out_kfree;
268 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
269 		dev_disable_lro(dev);
270 	/* Reference in_dev->dev */
271 	dev_hold(dev);
272 	/* Account for reference dev->ip_ptr (below) */
273 	refcount_set(&in_dev->refcnt, 1);
274 
275 	err = devinet_sysctl_register(in_dev);
276 	if (err) {
277 		in_dev->dead = 1;
278 		in_dev_put(in_dev);
279 		in_dev = NULL;
280 		goto out;
281 	}
282 	ip_mc_init_dev(in_dev);
283 	if (dev->flags & IFF_UP)
284 		ip_mc_up(in_dev);
285 
286 	/* we can receive as soon as ip_ptr is set -- do this last */
287 	rcu_assign_pointer(dev->ip_ptr, in_dev);
288 out:
289 	return in_dev ?: ERR_PTR(err);
290 out_kfree:
291 	kfree(in_dev);
292 	in_dev = NULL;
293 	goto out;
294 }
295 
296 static void in_dev_rcu_put(struct rcu_head *head)
297 {
298 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
299 	in_dev_put(idev);
300 }
301 
302 static void inetdev_destroy(struct in_device *in_dev)
303 {
304 	struct net_device *dev;
305 	struct in_ifaddr *ifa;
306 
307 	ASSERT_RTNL();
308 
309 	dev = in_dev->dev;
310 
311 	in_dev->dead = 1;
312 
313 	ip_mc_destroy_dev(in_dev);
314 
315 	while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
316 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
317 		inet_free_ifa(ifa);
318 	}
319 
320 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
321 
322 	devinet_sysctl_unregister(in_dev);
323 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
324 	arp_ifdown(dev);
325 
326 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
327 }
328 
329 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
330 {
331 	const struct in_ifaddr *ifa;
332 
333 	rcu_read_lock();
334 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
335 		if (inet_ifa_match(a, ifa)) {
336 			if (!b || inet_ifa_match(b, ifa)) {
337 				rcu_read_unlock();
338 				return 1;
339 			}
340 		}
341 	}
342 	rcu_read_unlock();
343 	return 0;
344 }
345 
346 static void __inet_del_ifa(struct in_device *in_dev,
347 			   struct in_ifaddr __rcu **ifap,
348 			   int destroy, struct nlmsghdr *nlh, u32 portid)
349 {
350 	struct in_ifaddr *promote = NULL;
351 	struct in_ifaddr *ifa, *ifa1;
352 	struct in_ifaddr *last_prim;
353 	struct in_ifaddr *prev_prom = NULL;
354 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
355 
356 	ASSERT_RTNL();
357 
358 	ifa1 = rtnl_dereference(*ifap);
359 	last_prim = rtnl_dereference(in_dev->ifa_list);
360 	if (in_dev->dead)
361 		goto no_promotions;
362 
363 	/* 1. Deleting primary ifaddr forces deletion all secondaries
364 	 * unless alias promotion is set
365 	 **/
366 
367 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
368 		struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
369 
370 		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
371 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
372 			    ifa1->ifa_scope <= ifa->ifa_scope)
373 				last_prim = ifa;
374 
375 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
376 			    ifa1->ifa_mask != ifa->ifa_mask ||
377 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
378 				ifap1 = &ifa->ifa_next;
379 				prev_prom = ifa;
380 				continue;
381 			}
382 
383 			if (!do_promote) {
384 				inet_hash_remove(ifa);
385 				*ifap1 = ifa->ifa_next;
386 
387 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
388 				blocking_notifier_call_chain(&inetaddr_chain,
389 						NETDEV_DOWN, ifa);
390 				inet_free_ifa(ifa);
391 			} else {
392 				promote = ifa;
393 				break;
394 			}
395 		}
396 	}
397 
398 	/* On promotion all secondaries from subnet are changing
399 	 * the primary IP, we must remove all their routes silently
400 	 * and later to add them back with new prefsrc. Do this
401 	 * while all addresses are on the device list.
402 	 */
403 	for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
404 		if (ifa1->ifa_mask == ifa->ifa_mask &&
405 		    inet_ifa_match(ifa1->ifa_address, ifa))
406 			fib_del_ifaddr(ifa, ifa1);
407 	}
408 
409 no_promotions:
410 	/* 2. Unlink it */
411 
412 	*ifap = ifa1->ifa_next;
413 	inet_hash_remove(ifa1);
414 
415 	/* 3. Announce address deletion */
416 
417 	/* Send message first, then call notifier.
418 	   At first sight, FIB update triggered by notifier
419 	   will refer to already deleted ifaddr, that could confuse
420 	   netlink listeners. It is not true: look, gated sees
421 	   that route deleted and if it still thinks that ifaddr
422 	   is valid, it will try to restore deleted routes... Grr.
423 	   So that, this order is correct.
424 	 */
425 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
426 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
427 
428 	if (promote) {
429 		struct in_ifaddr *next_sec;
430 
431 		next_sec = rtnl_dereference(promote->ifa_next);
432 		if (prev_prom) {
433 			struct in_ifaddr *last_sec;
434 
435 			last_sec = rtnl_dereference(last_prim->ifa_next);
436 			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
437 			rcu_assign_pointer(promote->ifa_next, last_sec);
438 			rcu_assign_pointer(last_prim->ifa_next, promote);
439 		}
440 
441 		promote->ifa_flags &= ~IFA_F_SECONDARY;
442 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
443 		blocking_notifier_call_chain(&inetaddr_chain,
444 				NETDEV_UP, promote);
445 		for (ifa = next_sec; ifa;
446 		     ifa = rtnl_dereference(ifa->ifa_next)) {
447 			if (ifa1->ifa_mask != ifa->ifa_mask ||
448 			    !inet_ifa_match(ifa1->ifa_address, ifa))
449 					continue;
450 			fib_add_ifaddr(ifa);
451 		}
452 
453 	}
454 	if (destroy)
455 		inet_free_ifa(ifa1);
456 }
457 
458 static void inet_del_ifa(struct in_device *in_dev,
459 			 struct in_ifaddr __rcu **ifap,
460 			 int destroy)
461 {
462 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
463 }
464 
465 static void check_lifetime(struct work_struct *work);
466 
467 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
468 
469 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
470 			     u32 portid, struct netlink_ext_ack *extack)
471 {
472 	struct in_ifaddr __rcu **last_primary, **ifap;
473 	struct in_device *in_dev = ifa->ifa_dev;
474 	struct in_validator_info ivi;
475 	struct in_ifaddr *ifa1;
476 	int ret;
477 
478 	ASSERT_RTNL();
479 
480 	if (!ifa->ifa_local) {
481 		inet_free_ifa(ifa);
482 		return 0;
483 	}
484 
485 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
486 	last_primary = &in_dev->ifa_list;
487 
488 	ifap = &in_dev->ifa_list;
489 	ifa1 = rtnl_dereference(*ifap);
490 
491 	while (ifa1) {
492 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
493 		    ifa->ifa_scope <= ifa1->ifa_scope)
494 			last_primary = &ifa1->ifa_next;
495 		if (ifa1->ifa_mask == ifa->ifa_mask &&
496 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
497 			if (ifa1->ifa_local == ifa->ifa_local) {
498 				inet_free_ifa(ifa);
499 				return -EEXIST;
500 			}
501 			if (ifa1->ifa_scope != ifa->ifa_scope) {
502 				inet_free_ifa(ifa);
503 				return -EINVAL;
504 			}
505 			ifa->ifa_flags |= IFA_F_SECONDARY;
506 		}
507 
508 		ifap = &ifa1->ifa_next;
509 		ifa1 = rtnl_dereference(*ifap);
510 	}
511 
512 	/* Allow any devices that wish to register ifaddr validtors to weigh
513 	 * in now, before changes are committed.  The rntl lock is serializing
514 	 * access here, so the state should not change between a validator call
515 	 * and a final notify on commit.  This isn't invoked on promotion under
516 	 * the assumption that validators are checking the address itself, and
517 	 * not the flags.
518 	 */
519 	ivi.ivi_addr = ifa->ifa_address;
520 	ivi.ivi_dev = ifa->ifa_dev;
521 	ivi.extack = extack;
522 	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
523 					   NETDEV_UP, &ivi);
524 	ret = notifier_to_errno(ret);
525 	if (ret) {
526 		inet_free_ifa(ifa);
527 		return ret;
528 	}
529 
530 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
531 		prandom_seed((__force u32) ifa->ifa_local);
532 		ifap = last_primary;
533 	}
534 
535 	rcu_assign_pointer(ifa->ifa_next, *ifap);
536 	rcu_assign_pointer(*ifap, ifa);
537 
538 	inet_hash_insert(dev_net(in_dev->dev), ifa);
539 
540 	cancel_delayed_work(&check_lifetime_work);
541 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
542 
543 	/* Send message first, then call notifier.
544 	   Notifier will trigger FIB update, so that
545 	   listeners of netlink will know about new ifaddr */
546 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
547 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
548 
549 	return 0;
550 }
551 
552 static int inet_insert_ifa(struct in_ifaddr *ifa)
553 {
554 	return __inet_insert_ifa(ifa, NULL, 0, NULL);
555 }
556 
557 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
558 {
559 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
560 
561 	ASSERT_RTNL();
562 
563 	if (!in_dev) {
564 		inet_free_ifa(ifa);
565 		return -ENOBUFS;
566 	}
567 	ipv4_devconf_setall(in_dev);
568 	neigh_parms_data_state_setall(in_dev->arp_parms);
569 	if (ifa->ifa_dev != in_dev) {
570 		WARN_ON(ifa->ifa_dev);
571 		in_dev_hold(in_dev);
572 		ifa->ifa_dev = in_dev;
573 	}
574 	if (ipv4_is_loopback(ifa->ifa_local))
575 		ifa->ifa_scope = RT_SCOPE_HOST;
576 	return inet_insert_ifa(ifa);
577 }
578 
579 /* Caller must hold RCU or RTNL :
580  * We dont take a reference on found in_device
581  */
582 struct in_device *inetdev_by_index(struct net *net, int ifindex)
583 {
584 	struct net_device *dev;
585 	struct in_device *in_dev = NULL;
586 
587 	rcu_read_lock();
588 	dev = dev_get_by_index_rcu(net, ifindex);
589 	if (dev)
590 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
591 	rcu_read_unlock();
592 	return in_dev;
593 }
594 EXPORT_SYMBOL(inetdev_by_index);
595 
596 /* Called only from RTNL semaphored context. No locks. */
597 
598 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
599 				    __be32 mask)
600 {
601 	struct in_ifaddr *ifa;
602 
603 	ASSERT_RTNL();
604 
605 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
606 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
607 			return ifa;
608 	}
609 	return NULL;
610 }
611 
612 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
613 {
614 	struct ip_mreqn mreq = {
615 		.imr_multiaddr.s_addr = ifa->ifa_address,
616 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
617 	};
618 	int ret;
619 
620 	ASSERT_RTNL();
621 
622 	lock_sock(sk);
623 	if (join)
624 		ret = ip_mc_join_group(sk, &mreq);
625 	else
626 		ret = ip_mc_leave_group(sk, &mreq);
627 	release_sock(sk);
628 
629 	return ret;
630 }
631 
632 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
633 			    struct netlink_ext_ack *extack)
634 {
635 	struct net *net = sock_net(skb->sk);
636 	struct in_ifaddr __rcu **ifap;
637 	struct nlattr *tb[IFA_MAX+1];
638 	struct in_device *in_dev;
639 	struct ifaddrmsg *ifm;
640 	struct in_ifaddr *ifa;
641 
642 	int err = -EINVAL;
643 
644 	ASSERT_RTNL();
645 
646 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
647 				     ifa_ipv4_policy, extack);
648 	if (err < 0)
649 		goto errout;
650 
651 	ifm = nlmsg_data(nlh);
652 	in_dev = inetdev_by_index(net, ifm->ifa_index);
653 	if (!in_dev) {
654 		err = -ENODEV;
655 		goto errout;
656 	}
657 
658 	for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
659 	     ifap = &ifa->ifa_next) {
660 		if (tb[IFA_LOCAL] &&
661 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
662 			continue;
663 
664 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
665 			continue;
666 
667 		if (tb[IFA_ADDRESS] &&
668 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
669 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
670 			continue;
671 
672 		if (ipv4_is_multicast(ifa->ifa_address))
673 			ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
674 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
675 		return 0;
676 	}
677 
678 	err = -EADDRNOTAVAIL;
679 errout:
680 	return err;
681 }
682 
683 #define INFINITY_LIFE_TIME	0xFFFFFFFF
684 
685 static void check_lifetime(struct work_struct *work)
686 {
687 	unsigned long now, next, next_sec, next_sched;
688 	struct in_ifaddr *ifa;
689 	struct hlist_node *n;
690 	int i;
691 
692 	now = jiffies;
693 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
694 
695 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
696 		bool change_needed = false;
697 
698 		rcu_read_lock();
699 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
700 			unsigned long age;
701 
702 			if (ifa->ifa_flags & IFA_F_PERMANENT)
703 				continue;
704 
705 			/* We try to batch several events at once. */
706 			age = (now - ifa->ifa_tstamp +
707 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
708 
709 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
710 			    age >= ifa->ifa_valid_lft) {
711 				change_needed = true;
712 			} else if (ifa->ifa_preferred_lft ==
713 				   INFINITY_LIFE_TIME) {
714 				continue;
715 			} else if (age >= ifa->ifa_preferred_lft) {
716 				if (time_before(ifa->ifa_tstamp +
717 						ifa->ifa_valid_lft * HZ, next))
718 					next = ifa->ifa_tstamp +
719 					       ifa->ifa_valid_lft * HZ;
720 
721 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
722 					change_needed = true;
723 			} else if (time_before(ifa->ifa_tstamp +
724 					       ifa->ifa_preferred_lft * HZ,
725 					       next)) {
726 				next = ifa->ifa_tstamp +
727 				       ifa->ifa_preferred_lft * HZ;
728 			}
729 		}
730 		rcu_read_unlock();
731 		if (!change_needed)
732 			continue;
733 		rtnl_lock();
734 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
735 			unsigned long age;
736 
737 			if (ifa->ifa_flags & IFA_F_PERMANENT)
738 				continue;
739 
740 			/* We try to batch several events at once. */
741 			age = (now - ifa->ifa_tstamp +
742 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
743 
744 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
745 			    age >= ifa->ifa_valid_lft) {
746 				struct in_ifaddr __rcu **ifap;
747 				struct in_ifaddr *tmp;
748 
749 				ifap = &ifa->ifa_dev->ifa_list;
750 				tmp = rtnl_dereference(*ifap);
751 				while (tmp) {
752 					tmp = rtnl_dereference(tmp->ifa_next);
753 					if (rtnl_dereference(*ifap) == ifa) {
754 						inet_del_ifa(ifa->ifa_dev,
755 							     ifap, 1);
756 						break;
757 					}
758 					ifap = &tmp->ifa_next;
759 					tmp = rtnl_dereference(*ifap);
760 				}
761 			} else if (ifa->ifa_preferred_lft !=
762 				   INFINITY_LIFE_TIME &&
763 				   age >= ifa->ifa_preferred_lft &&
764 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
765 				ifa->ifa_flags |= IFA_F_DEPRECATED;
766 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
767 			}
768 		}
769 		rtnl_unlock();
770 	}
771 
772 	next_sec = round_jiffies_up(next);
773 	next_sched = next;
774 
775 	/* If rounded timeout is accurate enough, accept it. */
776 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
777 		next_sched = next_sec;
778 
779 	now = jiffies;
780 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
781 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
782 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
783 
784 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
785 			next_sched - now);
786 }
787 
788 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
789 			     __u32 prefered_lft)
790 {
791 	unsigned long timeout;
792 
793 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
794 
795 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
796 	if (addrconf_finite_timeout(timeout))
797 		ifa->ifa_valid_lft = timeout;
798 	else
799 		ifa->ifa_flags |= IFA_F_PERMANENT;
800 
801 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
802 	if (addrconf_finite_timeout(timeout)) {
803 		if (timeout == 0)
804 			ifa->ifa_flags |= IFA_F_DEPRECATED;
805 		ifa->ifa_preferred_lft = timeout;
806 	}
807 	ifa->ifa_tstamp = jiffies;
808 	if (!ifa->ifa_cstamp)
809 		ifa->ifa_cstamp = ifa->ifa_tstamp;
810 }
811 
812 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
813 				       __u32 *pvalid_lft, __u32 *pprefered_lft,
814 				       struct netlink_ext_ack *extack)
815 {
816 	struct nlattr *tb[IFA_MAX+1];
817 	struct in_ifaddr *ifa;
818 	struct ifaddrmsg *ifm;
819 	struct net_device *dev;
820 	struct in_device *in_dev;
821 	int err;
822 
823 	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
824 				     ifa_ipv4_policy, extack);
825 	if (err < 0)
826 		goto errout;
827 
828 	ifm = nlmsg_data(nlh);
829 	err = -EINVAL;
830 	if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
831 		goto errout;
832 
833 	dev = __dev_get_by_index(net, ifm->ifa_index);
834 	err = -ENODEV;
835 	if (!dev)
836 		goto errout;
837 
838 	in_dev = __in_dev_get_rtnl(dev);
839 	err = -ENOBUFS;
840 	if (!in_dev)
841 		goto errout;
842 
843 	ifa = inet_alloc_ifa();
844 	if (!ifa)
845 		/*
846 		 * A potential indev allocation can be left alive, it stays
847 		 * assigned to its device and is destroy with it.
848 		 */
849 		goto errout;
850 
851 	ipv4_devconf_setall(in_dev);
852 	neigh_parms_data_state_setall(in_dev->arp_parms);
853 	in_dev_hold(in_dev);
854 
855 	if (!tb[IFA_ADDRESS])
856 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
857 
858 	INIT_HLIST_NODE(&ifa->hash);
859 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
860 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
861 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
862 					 ifm->ifa_flags;
863 	ifa->ifa_scope = ifm->ifa_scope;
864 	ifa->ifa_dev = in_dev;
865 
866 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
867 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
868 
869 	if (tb[IFA_BROADCAST])
870 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
871 
872 	if (tb[IFA_LABEL])
873 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
874 	else
875 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
876 
877 	if (tb[IFA_RT_PRIORITY])
878 		ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
879 
880 	if (tb[IFA_CACHEINFO]) {
881 		struct ifa_cacheinfo *ci;
882 
883 		ci = nla_data(tb[IFA_CACHEINFO]);
884 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
885 			err = -EINVAL;
886 			goto errout_free;
887 		}
888 		*pvalid_lft = ci->ifa_valid;
889 		*pprefered_lft = ci->ifa_prefered;
890 	}
891 
892 	return ifa;
893 
894 errout_free:
895 	inet_free_ifa(ifa);
896 errout:
897 	return ERR_PTR(err);
898 }
899 
900 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
901 {
902 	struct in_device *in_dev = ifa->ifa_dev;
903 	struct in_ifaddr *ifa1;
904 
905 	if (!ifa->ifa_local)
906 		return NULL;
907 
908 	in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
909 		if (ifa1->ifa_mask == ifa->ifa_mask &&
910 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
911 		    ifa1->ifa_local == ifa->ifa_local)
912 			return ifa1;
913 	}
914 	return NULL;
915 }
916 
917 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
918 			    struct netlink_ext_ack *extack)
919 {
920 	struct net *net = sock_net(skb->sk);
921 	struct in_ifaddr *ifa;
922 	struct in_ifaddr *ifa_existing;
923 	__u32 valid_lft = INFINITY_LIFE_TIME;
924 	__u32 prefered_lft = INFINITY_LIFE_TIME;
925 
926 	ASSERT_RTNL();
927 
928 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
929 	if (IS_ERR(ifa))
930 		return PTR_ERR(ifa);
931 
932 	ifa_existing = find_matching_ifa(ifa);
933 	if (!ifa_existing) {
934 		/* It would be best to check for !NLM_F_CREATE here but
935 		 * userspace already relies on not having to provide this.
936 		 */
937 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
938 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
939 			int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
940 					       true, ifa);
941 
942 			if (ret < 0) {
943 				inet_free_ifa(ifa);
944 				return ret;
945 			}
946 		}
947 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
948 					 extack);
949 	} else {
950 		u32 new_metric = ifa->ifa_rt_priority;
951 
952 		inet_free_ifa(ifa);
953 
954 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
955 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
956 			return -EEXIST;
957 		ifa = ifa_existing;
958 
959 		if (ifa->ifa_rt_priority != new_metric) {
960 			fib_modify_prefix_metric(ifa, new_metric);
961 			ifa->ifa_rt_priority = new_metric;
962 		}
963 
964 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
965 		cancel_delayed_work(&check_lifetime_work);
966 		queue_delayed_work(system_power_efficient_wq,
967 				&check_lifetime_work, 0);
968 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
969 	}
970 	return 0;
971 }
972 
973 /*
974  *	Determine a default network mask, based on the IP address.
975  */
976 
977 static int inet_abc_len(__be32 addr)
978 {
979 	int rc = -1;	/* Something else, probably a multicast. */
980 
981 	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
982 		rc = 0;
983 	else {
984 		__u32 haddr = ntohl(addr);
985 		if (IN_CLASSA(haddr))
986 			rc = 8;
987 		else if (IN_CLASSB(haddr))
988 			rc = 16;
989 		else if (IN_CLASSC(haddr))
990 			rc = 24;
991 		else if (IN_CLASSE(haddr))
992 			rc = 32;
993 	}
994 
995 	return rc;
996 }
997 
998 
999 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1000 {
1001 	struct sockaddr_in sin_orig;
1002 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1003 	struct in_ifaddr __rcu **ifap = NULL;
1004 	struct in_device *in_dev;
1005 	struct in_ifaddr *ifa = NULL;
1006 	struct net_device *dev;
1007 	char *colon;
1008 	int ret = -EFAULT;
1009 	int tryaddrmatch = 0;
1010 
1011 	ifr->ifr_name[IFNAMSIZ - 1] = 0;
1012 
1013 	/* save original address for comparison */
1014 	memcpy(&sin_orig, sin, sizeof(*sin));
1015 
1016 	colon = strchr(ifr->ifr_name, ':');
1017 	if (colon)
1018 		*colon = 0;
1019 
1020 	dev_load(net, ifr->ifr_name);
1021 
1022 	switch (cmd) {
1023 	case SIOCGIFADDR:	/* Get interface address */
1024 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1025 	case SIOCGIFDSTADDR:	/* Get the destination address */
1026 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1027 		/* Note that these ioctls will not sleep,
1028 		   so that we do not impose a lock.
1029 		   One day we will be forced to put shlock here (I mean SMP)
1030 		 */
1031 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
1032 		memset(sin, 0, sizeof(*sin));
1033 		sin->sin_family = AF_INET;
1034 		break;
1035 
1036 	case SIOCSIFFLAGS:
1037 		ret = -EPERM;
1038 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1039 			goto out;
1040 		break;
1041 	case SIOCSIFADDR:	/* Set interface address (and family) */
1042 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1043 	case SIOCSIFDSTADDR:	/* Set the destination address */
1044 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1045 		ret = -EPERM;
1046 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1047 			goto out;
1048 		ret = -EINVAL;
1049 		if (sin->sin_family != AF_INET)
1050 			goto out;
1051 		break;
1052 	default:
1053 		ret = -EINVAL;
1054 		goto out;
1055 	}
1056 
1057 	rtnl_lock();
1058 
1059 	ret = -ENODEV;
1060 	dev = __dev_get_by_name(net, ifr->ifr_name);
1061 	if (!dev)
1062 		goto done;
1063 
1064 	if (colon)
1065 		*colon = ':';
1066 
1067 	in_dev = __in_dev_get_rtnl(dev);
1068 	if (in_dev) {
1069 		if (tryaddrmatch) {
1070 			/* Matthias Andree */
1071 			/* compare label and address (4.4BSD style) */
1072 			/* note: we only do this for a limited set of ioctls
1073 			   and only if the original address family was AF_INET.
1074 			   This is checked above. */
1075 
1076 			for (ifap = &in_dev->ifa_list;
1077 			     (ifa = rtnl_dereference(*ifap)) != NULL;
1078 			     ifap = &ifa->ifa_next) {
1079 				if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1080 				    sin_orig.sin_addr.s_addr ==
1081 							ifa->ifa_local) {
1082 					break; /* found */
1083 				}
1084 			}
1085 		}
1086 		/* we didn't get a match, maybe the application is
1087 		   4.3BSD-style and passed in junk so we fall back to
1088 		   comparing just the label */
1089 		if (!ifa) {
1090 			for (ifap = &in_dev->ifa_list;
1091 			     (ifa = rtnl_dereference(*ifap)) != NULL;
1092 			     ifap = &ifa->ifa_next)
1093 				if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1094 					break;
1095 		}
1096 	}
1097 
1098 	ret = -EADDRNOTAVAIL;
1099 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1100 		goto done;
1101 
1102 	switch (cmd) {
1103 	case SIOCGIFADDR:	/* Get interface address */
1104 		ret = 0;
1105 		sin->sin_addr.s_addr = ifa->ifa_local;
1106 		break;
1107 
1108 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1109 		ret = 0;
1110 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1111 		break;
1112 
1113 	case SIOCGIFDSTADDR:	/* Get the destination address */
1114 		ret = 0;
1115 		sin->sin_addr.s_addr = ifa->ifa_address;
1116 		break;
1117 
1118 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1119 		ret = 0;
1120 		sin->sin_addr.s_addr = ifa->ifa_mask;
1121 		break;
1122 
1123 	case SIOCSIFFLAGS:
1124 		if (colon) {
1125 			ret = -EADDRNOTAVAIL;
1126 			if (!ifa)
1127 				break;
1128 			ret = 0;
1129 			if (!(ifr->ifr_flags & IFF_UP))
1130 				inet_del_ifa(in_dev, ifap, 1);
1131 			break;
1132 		}
1133 		ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1134 		break;
1135 
1136 	case SIOCSIFADDR:	/* Set interface address (and family) */
1137 		ret = -EINVAL;
1138 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1139 			break;
1140 
1141 		if (!ifa) {
1142 			ret = -ENOBUFS;
1143 			ifa = inet_alloc_ifa();
1144 			if (!ifa)
1145 				break;
1146 			INIT_HLIST_NODE(&ifa->hash);
1147 			if (colon)
1148 				memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1149 			else
1150 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1151 		} else {
1152 			ret = 0;
1153 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1154 				break;
1155 			inet_del_ifa(in_dev, ifap, 0);
1156 			ifa->ifa_broadcast = 0;
1157 			ifa->ifa_scope = 0;
1158 		}
1159 
1160 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1161 
1162 		if (!(dev->flags & IFF_POINTOPOINT)) {
1163 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1164 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1165 			if ((dev->flags & IFF_BROADCAST) &&
1166 			    ifa->ifa_prefixlen < 31)
1167 				ifa->ifa_broadcast = ifa->ifa_address |
1168 						     ~ifa->ifa_mask;
1169 		} else {
1170 			ifa->ifa_prefixlen = 32;
1171 			ifa->ifa_mask = inet_make_mask(32);
1172 		}
1173 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1174 		ret = inet_set_ifa(dev, ifa);
1175 		break;
1176 
1177 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1178 		ret = 0;
1179 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1180 			inet_del_ifa(in_dev, ifap, 0);
1181 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1182 			inet_insert_ifa(ifa);
1183 		}
1184 		break;
1185 
1186 	case SIOCSIFDSTADDR:	/* Set the destination address */
1187 		ret = 0;
1188 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1189 			break;
1190 		ret = -EINVAL;
1191 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1192 			break;
1193 		ret = 0;
1194 		inet_del_ifa(in_dev, ifap, 0);
1195 		ifa->ifa_address = sin->sin_addr.s_addr;
1196 		inet_insert_ifa(ifa);
1197 		break;
1198 
1199 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1200 
1201 		/*
1202 		 *	The mask we set must be legal.
1203 		 */
1204 		ret = -EINVAL;
1205 		if (bad_mask(sin->sin_addr.s_addr, 0))
1206 			break;
1207 		ret = 0;
1208 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1209 			__be32 old_mask = ifa->ifa_mask;
1210 			inet_del_ifa(in_dev, ifap, 0);
1211 			ifa->ifa_mask = sin->sin_addr.s_addr;
1212 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1213 
1214 			/* See if current broadcast address matches
1215 			 * with current netmask, then recalculate
1216 			 * the broadcast address. Otherwise it's a
1217 			 * funny address, so don't touch it since
1218 			 * the user seems to know what (s)he's doing...
1219 			 */
1220 			if ((dev->flags & IFF_BROADCAST) &&
1221 			    (ifa->ifa_prefixlen < 31) &&
1222 			    (ifa->ifa_broadcast ==
1223 			     (ifa->ifa_local|~old_mask))) {
1224 				ifa->ifa_broadcast = (ifa->ifa_local |
1225 						      ~sin->sin_addr.s_addr);
1226 			}
1227 			inet_insert_ifa(ifa);
1228 		}
1229 		break;
1230 	}
1231 done:
1232 	rtnl_unlock();
1233 out:
1234 	return ret;
1235 }
1236 
1237 static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1238 {
1239 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1240 	const struct in_ifaddr *ifa;
1241 	struct ifreq ifr;
1242 	int done = 0;
1243 
1244 	if (WARN_ON(size > sizeof(struct ifreq)))
1245 		goto out;
1246 
1247 	if (!in_dev)
1248 		goto out;
1249 
1250 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1251 		if (!buf) {
1252 			done += size;
1253 			continue;
1254 		}
1255 		if (len < size)
1256 			break;
1257 		memset(&ifr, 0, sizeof(struct ifreq));
1258 		strcpy(ifr.ifr_name, ifa->ifa_label);
1259 
1260 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1261 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1262 								ifa->ifa_local;
1263 
1264 		if (copy_to_user(buf + done, &ifr, size)) {
1265 			done = -EFAULT;
1266 			break;
1267 		}
1268 		len  -= size;
1269 		done += size;
1270 	}
1271 out:
1272 	return done;
1273 }
1274 
1275 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1276 				 int scope)
1277 {
1278 	const struct in_ifaddr *ifa;
1279 
1280 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1281 		if (ifa->ifa_flags & IFA_F_SECONDARY)
1282 			continue;
1283 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1284 		    ifa->ifa_scope <= scope)
1285 			return ifa->ifa_local;
1286 	}
1287 
1288 	return 0;
1289 }
1290 
1291 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1292 {
1293 	const struct in_ifaddr *ifa;
1294 	__be32 addr = 0;
1295 	struct in_device *in_dev;
1296 	struct net *net = dev_net(dev);
1297 	int master_idx;
1298 
1299 	rcu_read_lock();
1300 	in_dev = __in_dev_get_rcu(dev);
1301 	if (!in_dev)
1302 		goto no_in_dev;
1303 
1304 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1305 		if (ifa->ifa_flags & IFA_F_SECONDARY)
1306 			continue;
1307 		if (ifa->ifa_scope > scope)
1308 			continue;
1309 		if (!dst || inet_ifa_match(dst, ifa)) {
1310 			addr = ifa->ifa_local;
1311 			break;
1312 		}
1313 		if (!addr)
1314 			addr = ifa->ifa_local;
1315 	}
1316 
1317 	if (addr)
1318 		goto out_unlock;
1319 no_in_dev:
1320 	master_idx = l3mdev_master_ifindex_rcu(dev);
1321 
1322 	/* For VRFs, the VRF device takes the place of the loopback device,
1323 	 * with addresses on it being preferred.  Note in such cases the
1324 	 * loopback device will be among the devices that fail the master_idx
1325 	 * equality check in the loop below.
1326 	 */
1327 	if (master_idx &&
1328 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1329 	    (in_dev = __in_dev_get_rcu(dev))) {
1330 		addr = in_dev_select_addr(in_dev, scope);
1331 		if (addr)
1332 			goto out_unlock;
1333 	}
1334 
1335 	/* Not loopback addresses on loopback should be preferred
1336 	   in this case. It is important that lo is the first interface
1337 	   in dev_base list.
1338 	 */
1339 	for_each_netdev_rcu(net, dev) {
1340 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1341 			continue;
1342 
1343 		in_dev = __in_dev_get_rcu(dev);
1344 		if (!in_dev)
1345 			continue;
1346 
1347 		addr = in_dev_select_addr(in_dev, scope);
1348 		if (addr)
1349 			goto out_unlock;
1350 	}
1351 out_unlock:
1352 	rcu_read_unlock();
1353 	return addr;
1354 }
1355 EXPORT_SYMBOL(inet_select_addr);
1356 
1357 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1358 			      __be32 local, int scope)
1359 {
1360 	const struct in_ifaddr *ifa;
1361 	__be32 addr = 0;
1362 	int same = 0;
1363 
1364 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1365 		if (!addr &&
1366 		    (local == ifa->ifa_local || !local) &&
1367 		    ifa->ifa_scope <= scope) {
1368 			addr = ifa->ifa_local;
1369 			if (same)
1370 				break;
1371 		}
1372 		if (!same) {
1373 			same = (!local || inet_ifa_match(local, ifa)) &&
1374 				(!dst || inet_ifa_match(dst, ifa));
1375 			if (same && addr) {
1376 				if (local || !dst)
1377 					break;
1378 				/* Is the selected addr into dst subnet? */
1379 				if (inet_ifa_match(addr, ifa))
1380 					break;
1381 				/* No, then can we use new local src? */
1382 				if (ifa->ifa_scope <= scope) {
1383 					addr = ifa->ifa_local;
1384 					break;
1385 				}
1386 				/* search for large dst subnet for addr */
1387 				same = 0;
1388 			}
1389 		}
1390 	}
1391 
1392 	return same ? addr : 0;
1393 }
1394 
1395 /*
1396  * Confirm that local IP address exists using wildcards:
1397  * - net: netns to check, cannot be NULL
1398  * - in_dev: only on this interface, NULL=any interface
1399  * - dst: only in the same subnet as dst, 0=any dst
1400  * - local: address, 0=autoselect the local address
1401  * - scope: maximum allowed scope value for the local address
1402  */
1403 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1404 			 __be32 dst, __be32 local, int scope)
1405 {
1406 	__be32 addr = 0;
1407 	struct net_device *dev;
1408 
1409 	if (in_dev)
1410 		return confirm_addr_indev(in_dev, dst, local, scope);
1411 
1412 	rcu_read_lock();
1413 	for_each_netdev_rcu(net, dev) {
1414 		in_dev = __in_dev_get_rcu(dev);
1415 		if (in_dev) {
1416 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1417 			if (addr)
1418 				break;
1419 		}
1420 	}
1421 	rcu_read_unlock();
1422 
1423 	return addr;
1424 }
1425 EXPORT_SYMBOL(inet_confirm_addr);
1426 
1427 /*
1428  *	Device notifier
1429  */
1430 
1431 int register_inetaddr_notifier(struct notifier_block *nb)
1432 {
1433 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1434 }
1435 EXPORT_SYMBOL(register_inetaddr_notifier);
1436 
1437 int unregister_inetaddr_notifier(struct notifier_block *nb)
1438 {
1439 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1440 }
1441 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1442 
1443 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1444 {
1445 	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1446 }
1447 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1448 
1449 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1450 {
1451 	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1452 	    nb);
1453 }
1454 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1455 
1456 /* Rename ifa_labels for a device name change. Make some effort to preserve
1457  * existing alias numbering and to create unique labels if possible.
1458 */
1459 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1460 {
1461 	struct in_ifaddr *ifa;
1462 	int named = 0;
1463 
1464 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1465 		char old[IFNAMSIZ], *dot;
1466 
1467 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1468 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1469 		if (named++ == 0)
1470 			goto skip;
1471 		dot = strchr(old, ':');
1472 		if (!dot) {
1473 			sprintf(old, ":%d", named);
1474 			dot = old;
1475 		}
1476 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1477 			strcat(ifa->ifa_label, dot);
1478 		else
1479 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1480 skip:
1481 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1482 	}
1483 }
1484 
1485 static bool inetdev_valid_mtu(unsigned int mtu)
1486 {
1487 	return mtu >= IPV4_MIN_MTU;
1488 }
1489 
1490 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1491 					struct in_device *in_dev)
1492 
1493 {
1494 	const struct in_ifaddr *ifa;
1495 
1496 	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1497 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1498 			 ifa->ifa_local, dev,
1499 			 ifa->ifa_local, NULL,
1500 			 dev->dev_addr, NULL);
1501 	}
1502 }
1503 
1504 /* Called only under RTNL semaphore */
1505 
1506 static int inetdev_event(struct notifier_block *this, unsigned long event,
1507 			 void *ptr)
1508 {
1509 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1510 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1511 
1512 	ASSERT_RTNL();
1513 
1514 	if (!in_dev) {
1515 		if (event == NETDEV_REGISTER) {
1516 			in_dev = inetdev_init(dev);
1517 			if (IS_ERR(in_dev))
1518 				return notifier_from_errno(PTR_ERR(in_dev));
1519 			if (dev->flags & IFF_LOOPBACK) {
1520 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1521 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1522 			}
1523 		} else if (event == NETDEV_CHANGEMTU) {
1524 			/* Re-enabling IP */
1525 			if (inetdev_valid_mtu(dev->mtu))
1526 				in_dev = inetdev_init(dev);
1527 		}
1528 		goto out;
1529 	}
1530 
1531 	switch (event) {
1532 	case NETDEV_REGISTER:
1533 		pr_debug("%s: bug\n", __func__);
1534 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1535 		break;
1536 	case NETDEV_UP:
1537 		if (!inetdev_valid_mtu(dev->mtu))
1538 			break;
1539 		if (dev->flags & IFF_LOOPBACK) {
1540 			struct in_ifaddr *ifa = inet_alloc_ifa();
1541 
1542 			if (ifa) {
1543 				INIT_HLIST_NODE(&ifa->hash);
1544 				ifa->ifa_local =
1545 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1546 				ifa->ifa_prefixlen = 8;
1547 				ifa->ifa_mask = inet_make_mask(8);
1548 				in_dev_hold(in_dev);
1549 				ifa->ifa_dev = in_dev;
1550 				ifa->ifa_scope = RT_SCOPE_HOST;
1551 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1552 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1553 						 INFINITY_LIFE_TIME);
1554 				ipv4_devconf_setall(in_dev);
1555 				neigh_parms_data_state_setall(in_dev->arp_parms);
1556 				inet_insert_ifa(ifa);
1557 			}
1558 		}
1559 		ip_mc_up(in_dev);
1560 		/* fall through */
1561 	case NETDEV_CHANGEADDR:
1562 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1563 			break;
1564 		/* fall through */
1565 	case NETDEV_NOTIFY_PEERS:
1566 		/* Send gratuitous ARP to notify of link change */
1567 		inetdev_send_gratuitous_arp(dev, in_dev);
1568 		break;
1569 	case NETDEV_DOWN:
1570 		ip_mc_down(in_dev);
1571 		break;
1572 	case NETDEV_PRE_TYPE_CHANGE:
1573 		ip_mc_unmap(in_dev);
1574 		break;
1575 	case NETDEV_POST_TYPE_CHANGE:
1576 		ip_mc_remap(in_dev);
1577 		break;
1578 	case NETDEV_CHANGEMTU:
1579 		if (inetdev_valid_mtu(dev->mtu))
1580 			break;
1581 		/* disable IP when MTU is not enough */
1582 		/* fall through */
1583 	case NETDEV_UNREGISTER:
1584 		inetdev_destroy(in_dev);
1585 		break;
1586 	case NETDEV_CHANGENAME:
1587 		/* Do not notify about label change, this event is
1588 		 * not interesting to applications using netlink.
1589 		 */
1590 		inetdev_changename(dev, in_dev);
1591 
1592 		devinet_sysctl_unregister(in_dev);
1593 		devinet_sysctl_register(in_dev);
1594 		break;
1595 	}
1596 out:
1597 	return NOTIFY_DONE;
1598 }
1599 
1600 static struct notifier_block ip_netdev_notifier = {
1601 	.notifier_call = inetdev_event,
1602 };
1603 
1604 static size_t inet_nlmsg_size(void)
1605 {
1606 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1607 	       + nla_total_size(4) /* IFA_ADDRESS */
1608 	       + nla_total_size(4) /* IFA_LOCAL */
1609 	       + nla_total_size(4) /* IFA_BROADCAST */
1610 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1611 	       + nla_total_size(4)  /* IFA_FLAGS */
1612 	       + nla_total_size(4)  /* IFA_RT_PRIORITY */
1613 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1614 }
1615 
1616 static inline u32 cstamp_delta(unsigned long cstamp)
1617 {
1618 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1619 }
1620 
1621 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1622 			 unsigned long tstamp, u32 preferred, u32 valid)
1623 {
1624 	struct ifa_cacheinfo ci;
1625 
1626 	ci.cstamp = cstamp_delta(cstamp);
1627 	ci.tstamp = cstamp_delta(tstamp);
1628 	ci.ifa_prefered = preferred;
1629 	ci.ifa_valid = valid;
1630 
1631 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1632 }
1633 
1634 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1635 			    struct inet_fill_args *args)
1636 {
1637 	struct ifaddrmsg *ifm;
1638 	struct nlmsghdr  *nlh;
1639 	u32 preferred, valid;
1640 
1641 	nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1642 			args->flags);
1643 	if (!nlh)
1644 		return -EMSGSIZE;
1645 
1646 	ifm = nlmsg_data(nlh);
1647 	ifm->ifa_family = AF_INET;
1648 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1649 	ifm->ifa_flags = ifa->ifa_flags;
1650 	ifm->ifa_scope = ifa->ifa_scope;
1651 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1652 
1653 	if (args->netnsid >= 0 &&
1654 	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1655 		goto nla_put_failure;
1656 
1657 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1658 		preferred = ifa->ifa_preferred_lft;
1659 		valid = ifa->ifa_valid_lft;
1660 		if (preferred != INFINITY_LIFE_TIME) {
1661 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1662 
1663 			if (preferred > tval)
1664 				preferred -= tval;
1665 			else
1666 				preferred = 0;
1667 			if (valid != INFINITY_LIFE_TIME) {
1668 				if (valid > tval)
1669 					valid -= tval;
1670 				else
1671 					valid = 0;
1672 			}
1673 		}
1674 	} else {
1675 		preferred = INFINITY_LIFE_TIME;
1676 		valid = INFINITY_LIFE_TIME;
1677 	}
1678 	if ((ifa->ifa_address &&
1679 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1680 	    (ifa->ifa_local &&
1681 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1682 	    (ifa->ifa_broadcast &&
1683 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1684 	    (ifa->ifa_label[0] &&
1685 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1686 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1687 	    (ifa->ifa_rt_priority &&
1688 	     nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1689 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1690 			  preferred, valid))
1691 		goto nla_put_failure;
1692 
1693 	nlmsg_end(skb, nlh);
1694 	return 0;
1695 
1696 nla_put_failure:
1697 	nlmsg_cancel(skb, nlh);
1698 	return -EMSGSIZE;
1699 }
1700 
1701 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1702 				      struct inet_fill_args *fillargs,
1703 				      struct net **tgt_net, struct sock *sk,
1704 				      struct netlink_callback *cb)
1705 {
1706 	struct netlink_ext_ack *extack = cb->extack;
1707 	struct nlattr *tb[IFA_MAX+1];
1708 	struct ifaddrmsg *ifm;
1709 	int err, i;
1710 
1711 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1712 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1713 		return -EINVAL;
1714 	}
1715 
1716 	ifm = nlmsg_data(nlh);
1717 	if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1718 		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1719 		return -EINVAL;
1720 	}
1721 
1722 	fillargs->ifindex = ifm->ifa_index;
1723 	if (fillargs->ifindex) {
1724 		cb->answer_flags |= NLM_F_DUMP_FILTERED;
1725 		fillargs->flags |= NLM_F_DUMP_FILTERED;
1726 	}
1727 
1728 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1729 					    ifa_ipv4_policy, extack);
1730 	if (err < 0)
1731 		return err;
1732 
1733 	for (i = 0; i <= IFA_MAX; ++i) {
1734 		if (!tb[i])
1735 			continue;
1736 
1737 		if (i == IFA_TARGET_NETNSID) {
1738 			struct net *net;
1739 
1740 			fillargs->netnsid = nla_get_s32(tb[i]);
1741 
1742 			net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1743 			if (IS_ERR(net)) {
1744 				fillargs->netnsid = -1;
1745 				NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1746 				return PTR_ERR(net);
1747 			}
1748 			*tgt_net = net;
1749 		} else {
1750 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1751 			return -EINVAL;
1752 		}
1753 	}
1754 
1755 	return 0;
1756 }
1757 
1758 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1759 			    struct netlink_callback *cb, int s_ip_idx,
1760 			    struct inet_fill_args *fillargs)
1761 {
1762 	struct in_ifaddr *ifa;
1763 	int ip_idx = 0;
1764 	int err;
1765 
1766 	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1767 		if (ip_idx < s_ip_idx) {
1768 			ip_idx++;
1769 			continue;
1770 		}
1771 		err = inet_fill_ifaddr(skb, ifa, fillargs);
1772 		if (err < 0)
1773 			goto done;
1774 
1775 		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1776 		ip_idx++;
1777 	}
1778 	err = 0;
1779 
1780 done:
1781 	cb->args[2] = ip_idx;
1782 
1783 	return err;
1784 }
1785 
1786 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1787 {
1788 	const struct nlmsghdr *nlh = cb->nlh;
1789 	struct inet_fill_args fillargs = {
1790 		.portid = NETLINK_CB(cb->skb).portid,
1791 		.seq = nlh->nlmsg_seq,
1792 		.event = RTM_NEWADDR,
1793 		.flags = NLM_F_MULTI,
1794 		.netnsid = -1,
1795 	};
1796 	struct net *net = sock_net(skb->sk);
1797 	struct net *tgt_net = net;
1798 	int h, s_h;
1799 	int idx, s_idx;
1800 	int s_ip_idx;
1801 	struct net_device *dev;
1802 	struct in_device *in_dev;
1803 	struct hlist_head *head;
1804 	int err = 0;
1805 
1806 	s_h = cb->args[0];
1807 	s_idx = idx = cb->args[1];
1808 	s_ip_idx = cb->args[2];
1809 
1810 	if (cb->strict_check) {
1811 		err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1812 						 skb->sk, cb);
1813 		if (err < 0)
1814 			goto put_tgt_net;
1815 
1816 		err = 0;
1817 		if (fillargs.ifindex) {
1818 			dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
1819 			if (!dev) {
1820 				err = -ENODEV;
1821 				goto put_tgt_net;
1822 			}
1823 
1824 			in_dev = __in_dev_get_rtnl(dev);
1825 			if (in_dev) {
1826 				err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1827 						       &fillargs);
1828 			}
1829 			goto put_tgt_net;
1830 		}
1831 	}
1832 
1833 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1834 		idx = 0;
1835 		head = &tgt_net->dev_index_head[h];
1836 		rcu_read_lock();
1837 		cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
1838 			  tgt_net->dev_base_seq;
1839 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1840 			if (idx < s_idx)
1841 				goto cont;
1842 			if (h > s_h || idx > s_idx)
1843 				s_ip_idx = 0;
1844 			in_dev = __in_dev_get_rcu(dev);
1845 			if (!in_dev)
1846 				goto cont;
1847 
1848 			err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1849 					       &fillargs);
1850 			if (err < 0) {
1851 				rcu_read_unlock();
1852 				goto done;
1853 			}
1854 cont:
1855 			idx++;
1856 		}
1857 		rcu_read_unlock();
1858 	}
1859 
1860 done:
1861 	cb->args[0] = h;
1862 	cb->args[1] = idx;
1863 put_tgt_net:
1864 	if (fillargs.netnsid >= 0)
1865 		put_net(tgt_net);
1866 
1867 	return skb->len ? : err;
1868 }
1869 
1870 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1871 		      u32 portid)
1872 {
1873 	struct inet_fill_args fillargs = {
1874 		.portid = portid,
1875 		.seq = nlh ? nlh->nlmsg_seq : 0,
1876 		.event = event,
1877 		.flags = 0,
1878 		.netnsid = -1,
1879 	};
1880 	struct sk_buff *skb;
1881 	int err = -ENOBUFS;
1882 	struct net *net;
1883 
1884 	net = dev_net(ifa->ifa_dev->dev);
1885 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1886 	if (!skb)
1887 		goto errout;
1888 
1889 	err = inet_fill_ifaddr(skb, ifa, &fillargs);
1890 	if (err < 0) {
1891 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1892 		WARN_ON(err == -EMSGSIZE);
1893 		kfree_skb(skb);
1894 		goto errout;
1895 	}
1896 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1897 	return;
1898 errout:
1899 	if (err < 0)
1900 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1901 }
1902 
1903 static size_t inet_get_link_af_size(const struct net_device *dev,
1904 				    u32 ext_filter_mask)
1905 {
1906 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1907 
1908 	if (!in_dev)
1909 		return 0;
1910 
1911 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1912 }
1913 
1914 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1915 			     u32 ext_filter_mask)
1916 {
1917 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1918 	struct nlattr *nla;
1919 	int i;
1920 
1921 	if (!in_dev)
1922 		return -ENODATA;
1923 
1924 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1925 	if (!nla)
1926 		return -EMSGSIZE;
1927 
1928 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1929 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1930 
1931 	return 0;
1932 }
1933 
1934 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1935 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1936 };
1937 
1938 static int inet_validate_link_af(const struct net_device *dev,
1939 				 const struct nlattr *nla)
1940 {
1941 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1942 	int err, rem;
1943 
1944 	if (dev && !__in_dev_get_rcu(dev))
1945 		return -EAFNOSUPPORT;
1946 
1947 	err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
1948 					  inet_af_policy, NULL);
1949 	if (err < 0)
1950 		return err;
1951 
1952 	if (tb[IFLA_INET_CONF]) {
1953 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1954 			int cfgid = nla_type(a);
1955 
1956 			if (nla_len(a) < 4)
1957 				return -EINVAL;
1958 
1959 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1960 				return -EINVAL;
1961 		}
1962 	}
1963 
1964 	return 0;
1965 }
1966 
1967 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1968 {
1969 	struct in_device *in_dev = __in_dev_get_rcu(dev);
1970 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1971 	int rem;
1972 
1973 	if (!in_dev)
1974 		return -EAFNOSUPPORT;
1975 
1976 	if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1977 		BUG();
1978 
1979 	if (tb[IFLA_INET_CONF]) {
1980 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1981 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1982 	}
1983 
1984 	return 0;
1985 }
1986 
1987 static int inet_netconf_msgsize_devconf(int type)
1988 {
1989 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1990 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1991 	bool all = false;
1992 
1993 	if (type == NETCONFA_ALL)
1994 		all = true;
1995 
1996 	if (all || type == NETCONFA_FORWARDING)
1997 		size += nla_total_size(4);
1998 	if (all || type == NETCONFA_RP_FILTER)
1999 		size += nla_total_size(4);
2000 	if (all || type == NETCONFA_MC_FORWARDING)
2001 		size += nla_total_size(4);
2002 	if (all || type == NETCONFA_BC_FORWARDING)
2003 		size += nla_total_size(4);
2004 	if (all || type == NETCONFA_PROXY_NEIGH)
2005 		size += nla_total_size(4);
2006 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2007 		size += nla_total_size(4);
2008 
2009 	return size;
2010 }
2011 
2012 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2013 				     struct ipv4_devconf *devconf, u32 portid,
2014 				     u32 seq, int event, unsigned int flags,
2015 				     int type)
2016 {
2017 	struct nlmsghdr  *nlh;
2018 	struct netconfmsg *ncm;
2019 	bool all = false;
2020 
2021 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2022 			flags);
2023 	if (!nlh)
2024 		return -EMSGSIZE;
2025 
2026 	if (type == NETCONFA_ALL)
2027 		all = true;
2028 
2029 	ncm = nlmsg_data(nlh);
2030 	ncm->ncm_family = AF_INET;
2031 
2032 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2033 		goto nla_put_failure;
2034 
2035 	if (!devconf)
2036 		goto out;
2037 
2038 	if ((all || type == NETCONFA_FORWARDING) &&
2039 	    nla_put_s32(skb, NETCONFA_FORWARDING,
2040 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
2041 		goto nla_put_failure;
2042 	if ((all || type == NETCONFA_RP_FILTER) &&
2043 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
2044 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
2045 		goto nla_put_failure;
2046 	if ((all || type == NETCONFA_MC_FORWARDING) &&
2047 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2048 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
2049 		goto nla_put_failure;
2050 	if ((all || type == NETCONFA_BC_FORWARDING) &&
2051 	    nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2052 			IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
2053 		goto nla_put_failure;
2054 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
2055 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2056 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
2057 		goto nla_put_failure;
2058 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2059 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2060 			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2061 		goto nla_put_failure;
2062 
2063 out:
2064 	nlmsg_end(skb, nlh);
2065 	return 0;
2066 
2067 nla_put_failure:
2068 	nlmsg_cancel(skb, nlh);
2069 	return -EMSGSIZE;
2070 }
2071 
2072 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2073 				 int ifindex, struct ipv4_devconf *devconf)
2074 {
2075 	struct sk_buff *skb;
2076 	int err = -ENOBUFS;
2077 
2078 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2079 	if (!skb)
2080 		goto errout;
2081 
2082 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2083 					event, 0, type);
2084 	if (err < 0) {
2085 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2086 		WARN_ON(err == -EMSGSIZE);
2087 		kfree_skb(skb);
2088 		goto errout;
2089 	}
2090 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2091 	return;
2092 errout:
2093 	if (err < 0)
2094 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2095 }
2096 
2097 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2098 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
2099 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
2100 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
2101 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
2102 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
2103 };
2104 
2105 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2106 				      const struct nlmsghdr *nlh,
2107 				      struct nlattr **tb,
2108 				      struct netlink_ext_ack *extack)
2109 {
2110 	int i, err;
2111 
2112 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2113 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2114 		return -EINVAL;
2115 	}
2116 
2117 	if (!netlink_strict_get_check(skb))
2118 		return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2119 					      tb, NETCONFA_MAX,
2120 					      devconf_ipv4_policy, extack);
2121 
2122 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2123 					    tb, NETCONFA_MAX,
2124 					    devconf_ipv4_policy, extack);
2125 	if (err)
2126 		return err;
2127 
2128 	for (i = 0; i <= NETCONFA_MAX; i++) {
2129 		if (!tb[i])
2130 			continue;
2131 
2132 		switch (i) {
2133 		case NETCONFA_IFINDEX:
2134 			break;
2135 		default:
2136 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2137 			return -EINVAL;
2138 		}
2139 	}
2140 
2141 	return 0;
2142 }
2143 
2144 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2145 				    struct nlmsghdr *nlh,
2146 				    struct netlink_ext_ack *extack)
2147 {
2148 	struct net *net = sock_net(in_skb->sk);
2149 	struct nlattr *tb[NETCONFA_MAX+1];
2150 	struct sk_buff *skb;
2151 	struct ipv4_devconf *devconf;
2152 	struct in_device *in_dev;
2153 	struct net_device *dev;
2154 	int ifindex;
2155 	int err;
2156 
2157 	err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2158 	if (err)
2159 		goto errout;
2160 
2161 	err = -EINVAL;
2162 	if (!tb[NETCONFA_IFINDEX])
2163 		goto errout;
2164 
2165 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2166 	switch (ifindex) {
2167 	case NETCONFA_IFINDEX_ALL:
2168 		devconf = net->ipv4.devconf_all;
2169 		break;
2170 	case NETCONFA_IFINDEX_DEFAULT:
2171 		devconf = net->ipv4.devconf_dflt;
2172 		break;
2173 	default:
2174 		dev = __dev_get_by_index(net, ifindex);
2175 		if (!dev)
2176 			goto errout;
2177 		in_dev = __in_dev_get_rtnl(dev);
2178 		if (!in_dev)
2179 			goto errout;
2180 		devconf = &in_dev->cnf;
2181 		break;
2182 	}
2183 
2184 	err = -ENOBUFS;
2185 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2186 	if (!skb)
2187 		goto errout;
2188 
2189 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2190 					NETLINK_CB(in_skb).portid,
2191 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2192 					NETCONFA_ALL);
2193 	if (err < 0) {
2194 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2195 		WARN_ON(err == -EMSGSIZE);
2196 		kfree_skb(skb);
2197 		goto errout;
2198 	}
2199 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2200 errout:
2201 	return err;
2202 }
2203 
2204 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2205 				     struct netlink_callback *cb)
2206 {
2207 	const struct nlmsghdr *nlh = cb->nlh;
2208 	struct net *net = sock_net(skb->sk);
2209 	int h, s_h;
2210 	int idx, s_idx;
2211 	struct net_device *dev;
2212 	struct in_device *in_dev;
2213 	struct hlist_head *head;
2214 
2215 	if (cb->strict_check) {
2216 		struct netlink_ext_ack *extack = cb->extack;
2217 		struct netconfmsg *ncm;
2218 
2219 		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2220 			NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2221 			return -EINVAL;
2222 		}
2223 
2224 		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2225 			NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2226 			return -EINVAL;
2227 		}
2228 	}
2229 
2230 	s_h = cb->args[0];
2231 	s_idx = idx = cb->args[1];
2232 
2233 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2234 		idx = 0;
2235 		head = &net->dev_index_head[h];
2236 		rcu_read_lock();
2237 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2238 			  net->dev_base_seq;
2239 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
2240 			if (idx < s_idx)
2241 				goto cont;
2242 			in_dev = __in_dev_get_rcu(dev);
2243 			if (!in_dev)
2244 				goto cont;
2245 
2246 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
2247 						      &in_dev->cnf,
2248 						      NETLINK_CB(cb->skb).portid,
2249 						      nlh->nlmsg_seq,
2250 						      RTM_NEWNETCONF,
2251 						      NLM_F_MULTI,
2252 						      NETCONFA_ALL) < 0) {
2253 				rcu_read_unlock();
2254 				goto done;
2255 			}
2256 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2257 cont:
2258 			idx++;
2259 		}
2260 		rcu_read_unlock();
2261 	}
2262 	if (h == NETDEV_HASHENTRIES) {
2263 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2264 					      net->ipv4.devconf_all,
2265 					      NETLINK_CB(cb->skb).portid,
2266 					      nlh->nlmsg_seq,
2267 					      RTM_NEWNETCONF, NLM_F_MULTI,
2268 					      NETCONFA_ALL) < 0)
2269 			goto done;
2270 		else
2271 			h++;
2272 	}
2273 	if (h == NETDEV_HASHENTRIES + 1) {
2274 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2275 					      net->ipv4.devconf_dflt,
2276 					      NETLINK_CB(cb->skb).portid,
2277 					      nlh->nlmsg_seq,
2278 					      RTM_NEWNETCONF, NLM_F_MULTI,
2279 					      NETCONFA_ALL) < 0)
2280 			goto done;
2281 		else
2282 			h++;
2283 	}
2284 done:
2285 	cb->args[0] = h;
2286 	cb->args[1] = idx;
2287 
2288 	return skb->len;
2289 }
2290 
2291 #ifdef CONFIG_SYSCTL
2292 
2293 static void devinet_copy_dflt_conf(struct net *net, int i)
2294 {
2295 	struct net_device *dev;
2296 
2297 	rcu_read_lock();
2298 	for_each_netdev_rcu(net, dev) {
2299 		struct in_device *in_dev;
2300 
2301 		in_dev = __in_dev_get_rcu(dev);
2302 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2303 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2304 	}
2305 	rcu_read_unlock();
2306 }
2307 
2308 /* called with RTNL locked */
2309 static void inet_forward_change(struct net *net)
2310 {
2311 	struct net_device *dev;
2312 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2313 
2314 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2315 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2316 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2317 				    NETCONFA_FORWARDING,
2318 				    NETCONFA_IFINDEX_ALL,
2319 				    net->ipv4.devconf_all);
2320 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2321 				    NETCONFA_FORWARDING,
2322 				    NETCONFA_IFINDEX_DEFAULT,
2323 				    net->ipv4.devconf_dflt);
2324 
2325 	for_each_netdev(net, dev) {
2326 		struct in_device *in_dev;
2327 
2328 		if (on)
2329 			dev_disable_lro(dev);
2330 
2331 		in_dev = __in_dev_get_rtnl(dev);
2332 		if (in_dev) {
2333 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2334 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2335 						    NETCONFA_FORWARDING,
2336 						    dev->ifindex, &in_dev->cnf);
2337 		}
2338 	}
2339 }
2340 
2341 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2342 {
2343 	if (cnf == net->ipv4.devconf_dflt)
2344 		return NETCONFA_IFINDEX_DEFAULT;
2345 	else if (cnf == net->ipv4.devconf_all)
2346 		return NETCONFA_IFINDEX_ALL;
2347 	else {
2348 		struct in_device *idev
2349 			= container_of(cnf, struct in_device, cnf);
2350 		return idev->dev->ifindex;
2351 	}
2352 }
2353 
2354 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2355 			     void __user *buffer,
2356 			     size_t *lenp, loff_t *ppos)
2357 {
2358 	int old_value = *(int *)ctl->data;
2359 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2360 	int new_value = *(int *)ctl->data;
2361 
2362 	if (write) {
2363 		struct ipv4_devconf *cnf = ctl->extra1;
2364 		struct net *net = ctl->extra2;
2365 		int i = (int *)ctl->data - cnf->data;
2366 		int ifindex;
2367 
2368 		set_bit(i, cnf->state);
2369 
2370 		if (cnf == net->ipv4.devconf_dflt)
2371 			devinet_copy_dflt_conf(net, i);
2372 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2373 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2374 			if ((new_value == 0) && (old_value != 0))
2375 				rt_cache_flush(net);
2376 
2377 		if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2378 		    new_value != old_value)
2379 			rt_cache_flush(net);
2380 
2381 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2382 		    new_value != old_value) {
2383 			ifindex = devinet_conf_ifindex(net, cnf);
2384 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2385 						    NETCONFA_RP_FILTER,
2386 						    ifindex, cnf);
2387 		}
2388 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2389 		    new_value != old_value) {
2390 			ifindex = devinet_conf_ifindex(net, cnf);
2391 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2392 						    NETCONFA_PROXY_NEIGH,
2393 						    ifindex, cnf);
2394 		}
2395 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2396 		    new_value != old_value) {
2397 			ifindex = devinet_conf_ifindex(net, cnf);
2398 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2399 						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2400 						    ifindex, cnf);
2401 		}
2402 	}
2403 
2404 	return ret;
2405 }
2406 
2407 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2408 				  void __user *buffer,
2409 				  size_t *lenp, loff_t *ppos)
2410 {
2411 	int *valp = ctl->data;
2412 	int val = *valp;
2413 	loff_t pos = *ppos;
2414 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2415 
2416 	if (write && *valp != val) {
2417 		struct net *net = ctl->extra2;
2418 
2419 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2420 			if (!rtnl_trylock()) {
2421 				/* Restore the original values before restarting */
2422 				*valp = val;
2423 				*ppos = pos;
2424 				return restart_syscall();
2425 			}
2426 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2427 				inet_forward_change(net);
2428 			} else {
2429 				struct ipv4_devconf *cnf = ctl->extra1;
2430 				struct in_device *idev =
2431 					container_of(cnf, struct in_device, cnf);
2432 				if (*valp)
2433 					dev_disable_lro(idev->dev);
2434 				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2435 							    NETCONFA_FORWARDING,
2436 							    idev->dev->ifindex,
2437 							    cnf);
2438 			}
2439 			rtnl_unlock();
2440 			rt_cache_flush(net);
2441 		} else
2442 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2443 						    NETCONFA_FORWARDING,
2444 						    NETCONFA_IFINDEX_DEFAULT,
2445 						    net->ipv4.devconf_dflt);
2446 	}
2447 
2448 	return ret;
2449 }
2450 
2451 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2452 				void __user *buffer,
2453 				size_t *lenp, loff_t *ppos)
2454 {
2455 	int *valp = ctl->data;
2456 	int val = *valp;
2457 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2458 	struct net *net = ctl->extra2;
2459 
2460 	if (write && *valp != val)
2461 		rt_cache_flush(net);
2462 
2463 	return ret;
2464 }
2465 
2466 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2467 	{ \
2468 		.procname	= name, \
2469 		.data		= ipv4_devconf.data + \
2470 				  IPV4_DEVCONF_ ## attr - 1, \
2471 		.maxlen		= sizeof(int), \
2472 		.mode		= mval, \
2473 		.proc_handler	= proc, \
2474 		.extra1		= &ipv4_devconf, \
2475 	}
2476 
2477 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2478 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2479 
2480 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2481 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2482 
2483 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2484 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2485 
2486 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2487 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2488 
2489 static struct devinet_sysctl_table {
2490 	struct ctl_table_header *sysctl_header;
2491 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2492 } devinet_sysctl = {
2493 	.devinet_vars = {
2494 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2495 					     devinet_sysctl_forward),
2496 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2497 		DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2498 
2499 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2500 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2501 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2502 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2503 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2504 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2505 					"accept_source_route"),
2506 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2507 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2508 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2509 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2510 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2511 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2512 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2513 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2514 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2515 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2516 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2517 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2518 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2519 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2520 					"force_igmp_version"),
2521 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2522 					"igmpv2_unsolicited_report_interval"),
2523 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2524 					"igmpv3_unsolicited_report_interval"),
2525 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2526 					"ignore_routes_with_linkdown"),
2527 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2528 					"drop_gratuitous_arp"),
2529 
2530 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2531 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2532 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2533 					      "promote_secondaries"),
2534 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2535 					      "route_localnet"),
2536 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2537 					      "drop_unicast_in_l2_multicast"),
2538 	},
2539 };
2540 
2541 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2542 				     int ifindex, struct ipv4_devconf *p)
2543 {
2544 	int i;
2545 	struct devinet_sysctl_table *t;
2546 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2547 
2548 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2549 	if (!t)
2550 		goto out;
2551 
2552 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2553 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2554 		t->devinet_vars[i].extra1 = p;
2555 		t->devinet_vars[i].extra2 = net;
2556 	}
2557 
2558 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2559 
2560 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2561 	if (!t->sysctl_header)
2562 		goto free;
2563 
2564 	p->sysctl = t;
2565 
2566 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2567 				    ifindex, p);
2568 	return 0;
2569 
2570 free:
2571 	kfree(t);
2572 out:
2573 	return -ENOBUFS;
2574 }
2575 
2576 static void __devinet_sysctl_unregister(struct net *net,
2577 					struct ipv4_devconf *cnf, int ifindex)
2578 {
2579 	struct devinet_sysctl_table *t = cnf->sysctl;
2580 
2581 	if (t) {
2582 		cnf->sysctl = NULL;
2583 		unregister_net_sysctl_table(t->sysctl_header);
2584 		kfree(t);
2585 	}
2586 
2587 	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2588 }
2589 
2590 static int devinet_sysctl_register(struct in_device *idev)
2591 {
2592 	int err;
2593 
2594 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2595 		return -EINVAL;
2596 
2597 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2598 	if (err)
2599 		return err;
2600 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2601 					idev->dev->ifindex, &idev->cnf);
2602 	if (err)
2603 		neigh_sysctl_unregister(idev->arp_parms);
2604 	return err;
2605 }
2606 
2607 static void devinet_sysctl_unregister(struct in_device *idev)
2608 {
2609 	struct net *net = dev_net(idev->dev);
2610 
2611 	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2612 	neigh_sysctl_unregister(idev->arp_parms);
2613 }
2614 
2615 static struct ctl_table ctl_forward_entry[] = {
2616 	{
2617 		.procname	= "ip_forward",
2618 		.data		= &ipv4_devconf.data[
2619 					IPV4_DEVCONF_FORWARDING - 1],
2620 		.maxlen		= sizeof(int),
2621 		.mode		= 0644,
2622 		.proc_handler	= devinet_sysctl_forward,
2623 		.extra1		= &ipv4_devconf,
2624 		.extra2		= &init_net,
2625 	},
2626 	{ },
2627 };
2628 #endif
2629 
2630 static __net_init int devinet_init_net(struct net *net)
2631 {
2632 	int err;
2633 	struct ipv4_devconf *all, *dflt;
2634 #ifdef CONFIG_SYSCTL
2635 	struct ctl_table *tbl;
2636 	struct ctl_table_header *forw_hdr;
2637 #endif
2638 
2639 	err = -ENOMEM;
2640 	all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2641 	if (!all)
2642 		goto err_alloc_all;
2643 
2644 	dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2645 	if (!dflt)
2646 		goto err_alloc_dflt;
2647 
2648 #ifdef CONFIG_SYSCTL
2649 	tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2650 	if (!tbl)
2651 		goto err_alloc_ctl;
2652 
2653 	tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2654 	tbl[0].extra1 = all;
2655 	tbl[0].extra2 = net;
2656 #endif
2657 
2658 	if ((!IS_ENABLED(CONFIG_SYSCTL) ||
2659 	     sysctl_devconf_inherit_init_net != 2) &&
2660 	    !net_eq(net, &init_net)) {
2661 		memcpy(all, init_net.ipv4.devconf_all, sizeof(ipv4_devconf));
2662 		memcpy(dflt, init_net.ipv4.devconf_dflt, sizeof(ipv4_devconf_dflt));
2663 	}
2664 
2665 #ifdef CONFIG_SYSCTL
2666 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2667 	if (err < 0)
2668 		goto err_reg_all;
2669 
2670 	err = __devinet_sysctl_register(net, "default",
2671 					NETCONFA_IFINDEX_DEFAULT, dflt);
2672 	if (err < 0)
2673 		goto err_reg_dflt;
2674 
2675 	err = -ENOMEM;
2676 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2677 	if (!forw_hdr)
2678 		goto err_reg_ctl;
2679 	net->ipv4.forw_hdr = forw_hdr;
2680 #endif
2681 
2682 	net->ipv4.devconf_all = all;
2683 	net->ipv4.devconf_dflt = dflt;
2684 	return 0;
2685 
2686 #ifdef CONFIG_SYSCTL
2687 err_reg_ctl:
2688 	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2689 err_reg_dflt:
2690 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2691 err_reg_all:
2692 	kfree(tbl);
2693 err_alloc_ctl:
2694 #endif
2695 	kfree(dflt);
2696 err_alloc_dflt:
2697 	kfree(all);
2698 err_alloc_all:
2699 	return err;
2700 }
2701 
2702 static __net_exit void devinet_exit_net(struct net *net)
2703 {
2704 #ifdef CONFIG_SYSCTL
2705 	struct ctl_table *tbl;
2706 
2707 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2708 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2709 	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2710 				    NETCONFA_IFINDEX_DEFAULT);
2711 	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2712 				    NETCONFA_IFINDEX_ALL);
2713 	kfree(tbl);
2714 #endif
2715 	kfree(net->ipv4.devconf_dflt);
2716 	kfree(net->ipv4.devconf_all);
2717 }
2718 
2719 static __net_initdata struct pernet_operations devinet_ops = {
2720 	.init = devinet_init_net,
2721 	.exit = devinet_exit_net,
2722 };
2723 
2724 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2725 	.family		  = AF_INET,
2726 	.fill_link_af	  = inet_fill_link_af,
2727 	.get_link_af_size = inet_get_link_af_size,
2728 	.validate_link_af = inet_validate_link_af,
2729 	.set_link_af	  = inet_set_link_af,
2730 };
2731 
2732 void __init devinet_init(void)
2733 {
2734 	int i;
2735 
2736 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2737 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2738 
2739 	register_pernet_subsys(&devinet_ops);
2740 
2741 	register_gifconf(PF_INET, inet_gifconf);
2742 	register_netdevice_notifier(&ip_netdev_notifier);
2743 
2744 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2745 
2746 	rtnl_af_register(&inet_af_ops);
2747 
2748 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2749 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2750 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2751 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2752 		      inet_netconf_dump_devconf, 0);
2753 }
2754