xref: /linux/net/ipv4/devinet.c (revision b7d3826c2ed6c3e626e7ae796c5df2c0d2551c6a)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/sched/signal.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 #include <linux/netconf.h>
60 
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68 
69 static struct ipv4_devconf ipv4_devconf = {
70 	.data = {
71 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
76 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
77 	},
78 };
79 
80 static struct ipv4_devconf ipv4_devconf_dflt = {
81 	.data = {
82 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
83 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
86 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
87 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
88 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
89 	},
90 };
91 
92 #define IPV4_DEVCONF_DFLT(net, attr) \
93 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
94 
95 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
96 	[IFA_LOCAL]     	= { .type = NLA_U32 },
97 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
98 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
99 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
100 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
101 	[IFA_FLAGS]		= { .type = NLA_U32 },
102 	[IFA_RT_PRIORITY]	= { .type = NLA_U32 },
103 	[IFA_TARGET_NETNSID]	= { .type = NLA_S32 },
104 };
105 
106 struct inet_fill_args {
107 	u32 portid;
108 	u32 seq;
109 	int event;
110 	unsigned int flags;
111 	int netnsid;
112 };
113 
114 #define IN4_ADDR_HSIZE_SHIFT	8
115 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
116 
117 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
118 
119 static u32 inet_addr_hash(const struct net *net, __be32 addr)
120 {
121 	u32 val = (__force u32) addr ^ net_hash_mix(net);
122 
123 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
124 }
125 
126 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
127 {
128 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
129 
130 	ASSERT_RTNL();
131 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
132 }
133 
134 static void inet_hash_remove(struct in_ifaddr *ifa)
135 {
136 	ASSERT_RTNL();
137 	hlist_del_init_rcu(&ifa->hash);
138 }
139 
140 /**
141  * __ip_dev_find - find the first device with a given source address.
142  * @net: the net namespace
143  * @addr: the source address
144  * @devref: if true, take a reference on the found device
145  *
146  * If a caller uses devref=false, it should be protected by RCU, or RTNL
147  */
148 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
149 {
150 	struct net_device *result = NULL;
151 	struct in_ifaddr *ifa;
152 
153 	rcu_read_lock();
154 	ifa = inet_lookup_ifaddr_rcu(net, addr);
155 	if (!ifa) {
156 		struct flowi4 fl4 = { .daddr = addr };
157 		struct fib_result res = { 0 };
158 		struct fib_table *local;
159 
160 		/* Fallback to FIB local table so that communication
161 		 * over loopback subnets work.
162 		 */
163 		local = fib_get_table(net, RT_TABLE_LOCAL);
164 		if (local &&
165 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
166 		    res.type == RTN_LOCAL)
167 			result = FIB_RES_DEV(res);
168 	} else {
169 		result = ifa->ifa_dev->dev;
170 	}
171 	if (result && devref)
172 		dev_hold(result);
173 	rcu_read_unlock();
174 	return result;
175 }
176 EXPORT_SYMBOL(__ip_dev_find);
177 
178 /* called under RCU lock */
179 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
180 {
181 	u32 hash = inet_addr_hash(net, addr);
182 	struct in_ifaddr *ifa;
183 
184 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
185 		if (ifa->ifa_local == addr &&
186 		    net_eq(dev_net(ifa->ifa_dev->dev), net))
187 			return ifa;
188 
189 	return NULL;
190 }
191 
192 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
193 
194 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
195 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
196 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
197 			 int destroy);
198 #ifdef CONFIG_SYSCTL
199 static int devinet_sysctl_register(struct in_device *idev);
200 static void devinet_sysctl_unregister(struct in_device *idev);
201 #else
202 static int devinet_sysctl_register(struct in_device *idev)
203 {
204 	return 0;
205 }
206 static void devinet_sysctl_unregister(struct in_device *idev)
207 {
208 }
209 #endif
210 
211 /* Locks all the inet devices. */
212 
213 static struct in_ifaddr *inet_alloc_ifa(void)
214 {
215 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
216 }
217 
218 static void inet_rcu_free_ifa(struct rcu_head *head)
219 {
220 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
221 	if (ifa->ifa_dev)
222 		in_dev_put(ifa->ifa_dev);
223 	kfree(ifa);
224 }
225 
226 static void inet_free_ifa(struct in_ifaddr *ifa)
227 {
228 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
229 }
230 
231 void in_dev_finish_destroy(struct in_device *idev)
232 {
233 	struct net_device *dev = idev->dev;
234 
235 	WARN_ON(idev->ifa_list);
236 	WARN_ON(idev->mc_list);
237 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
238 #ifdef NET_REFCNT_DEBUG
239 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
240 #endif
241 	dev_put(dev);
242 	if (!idev->dead)
243 		pr_err("Freeing alive in_device %p\n", idev);
244 	else
245 		kfree(idev);
246 }
247 EXPORT_SYMBOL(in_dev_finish_destroy);
248 
249 static struct in_device *inetdev_init(struct net_device *dev)
250 {
251 	struct in_device *in_dev;
252 	int err = -ENOMEM;
253 
254 	ASSERT_RTNL();
255 
256 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
257 	if (!in_dev)
258 		goto out;
259 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
260 			sizeof(in_dev->cnf));
261 	in_dev->cnf.sysctl = NULL;
262 	in_dev->dev = dev;
263 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
264 	if (!in_dev->arp_parms)
265 		goto out_kfree;
266 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
267 		dev_disable_lro(dev);
268 	/* Reference in_dev->dev */
269 	dev_hold(dev);
270 	/* Account for reference dev->ip_ptr (below) */
271 	refcount_set(&in_dev->refcnt, 1);
272 
273 	err = devinet_sysctl_register(in_dev);
274 	if (err) {
275 		in_dev->dead = 1;
276 		in_dev_put(in_dev);
277 		in_dev = NULL;
278 		goto out;
279 	}
280 	ip_mc_init_dev(in_dev);
281 	if (dev->flags & IFF_UP)
282 		ip_mc_up(in_dev);
283 
284 	/* we can receive as soon as ip_ptr is set -- do this last */
285 	rcu_assign_pointer(dev->ip_ptr, in_dev);
286 out:
287 	return in_dev ?: ERR_PTR(err);
288 out_kfree:
289 	kfree(in_dev);
290 	in_dev = NULL;
291 	goto out;
292 }
293 
294 static void in_dev_rcu_put(struct rcu_head *head)
295 {
296 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
297 	in_dev_put(idev);
298 }
299 
300 static void inetdev_destroy(struct in_device *in_dev)
301 {
302 	struct in_ifaddr *ifa;
303 	struct net_device *dev;
304 
305 	ASSERT_RTNL();
306 
307 	dev = in_dev->dev;
308 
309 	in_dev->dead = 1;
310 
311 	ip_mc_destroy_dev(in_dev);
312 
313 	while ((ifa = in_dev->ifa_list) != NULL) {
314 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
315 		inet_free_ifa(ifa);
316 	}
317 
318 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
319 
320 	devinet_sysctl_unregister(in_dev);
321 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
322 	arp_ifdown(dev);
323 
324 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
325 }
326 
327 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
328 {
329 	rcu_read_lock();
330 	for_primary_ifa(in_dev) {
331 		if (inet_ifa_match(a, ifa)) {
332 			if (!b || inet_ifa_match(b, ifa)) {
333 				rcu_read_unlock();
334 				return 1;
335 			}
336 		}
337 	} endfor_ifa(in_dev);
338 	rcu_read_unlock();
339 	return 0;
340 }
341 
342 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
343 			 int destroy, struct nlmsghdr *nlh, u32 portid)
344 {
345 	struct in_ifaddr *promote = NULL;
346 	struct in_ifaddr *ifa, *ifa1 = *ifap;
347 	struct in_ifaddr *last_prim = in_dev->ifa_list;
348 	struct in_ifaddr *prev_prom = NULL;
349 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
350 
351 	ASSERT_RTNL();
352 
353 	if (in_dev->dead)
354 		goto no_promotions;
355 
356 	/* 1. Deleting primary ifaddr forces deletion all secondaries
357 	 * unless alias promotion is set
358 	 **/
359 
360 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
361 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
362 
363 		while ((ifa = *ifap1) != NULL) {
364 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
365 			    ifa1->ifa_scope <= ifa->ifa_scope)
366 				last_prim = ifa;
367 
368 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
369 			    ifa1->ifa_mask != ifa->ifa_mask ||
370 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
371 				ifap1 = &ifa->ifa_next;
372 				prev_prom = ifa;
373 				continue;
374 			}
375 
376 			if (!do_promote) {
377 				inet_hash_remove(ifa);
378 				*ifap1 = ifa->ifa_next;
379 
380 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
381 				blocking_notifier_call_chain(&inetaddr_chain,
382 						NETDEV_DOWN, ifa);
383 				inet_free_ifa(ifa);
384 			} else {
385 				promote = ifa;
386 				break;
387 			}
388 		}
389 	}
390 
391 	/* On promotion all secondaries from subnet are changing
392 	 * the primary IP, we must remove all their routes silently
393 	 * and later to add them back with new prefsrc. Do this
394 	 * while all addresses are on the device list.
395 	 */
396 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
397 		if (ifa1->ifa_mask == ifa->ifa_mask &&
398 		    inet_ifa_match(ifa1->ifa_address, ifa))
399 			fib_del_ifaddr(ifa, ifa1);
400 	}
401 
402 no_promotions:
403 	/* 2. Unlink it */
404 
405 	*ifap = ifa1->ifa_next;
406 	inet_hash_remove(ifa1);
407 
408 	/* 3. Announce address deletion */
409 
410 	/* Send message first, then call notifier.
411 	   At first sight, FIB update triggered by notifier
412 	   will refer to already deleted ifaddr, that could confuse
413 	   netlink listeners. It is not true: look, gated sees
414 	   that route deleted and if it still thinks that ifaddr
415 	   is valid, it will try to restore deleted routes... Grr.
416 	   So that, this order is correct.
417 	 */
418 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
419 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
420 
421 	if (promote) {
422 		struct in_ifaddr *next_sec = promote->ifa_next;
423 
424 		if (prev_prom) {
425 			prev_prom->ifa_next = promote->ifa_next;
426 			promote->ifa_next = last_prim->ifa_next;
427 			last_prim->ifa_next = promote;
428 		}
429 
430 		promote->ifa_flags &= ~IFA_F_SECONDARY;
431 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
432 		blocking_notifier_call_chain(&inetaddr_chain,
433 				NETDEV_UP, promote);
434 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
435 			if (ifa1->ifa_mask != ifa->ifa_mask ||
436 			    !inet_ifa_match(ifa1->ifa_address, ifa))
437 					continue;
438 			fib_add_ifaddr(ifa);
439 		}
440 
441 	}
442 	if (destroy)
443 		inet_free_ifa(ifa1);
444 }
445 
446 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
447 			 int destroy)
448 {
449 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
450 }
451 
452 static void check_lifetime(struct work_struct *work);
453 
454 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
455 
456 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
457 			     u32 portid, struct netlink_ext_ack *extack)
458 {
459 	struct in_device *in_dev = ifa->ifa_dev;
460 	struct in_ifaddr *ifa1, **ifap, **last_primary;
461 	struct in_validator_info ivi;
462 	int ret;
463 
464 	ASSERT_RTNL();
465 
466 	if (!ifa->ifa_local) {
467 		inet_free_ifa(ifa);
468 		return 0;
469 	}
470 
471 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
472 	last_primary = &in_dev->ifa_list;
473 
474 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
475 	     ifap = &ifa1->ifa_next) {
476 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
477 		    ifa->ifa_scope <= ifa1->ifa_scope)
478 			last_primary = &ifa1->ifa_next;
479 		if (ifa1->ifa_mask == ifa->ifa_mask &&
480 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
481 			if (ifa1->ifa_local == ifa->ifa_local) {
482 				inet_free_ifa(ifa);
483 				return -EEXIST;
484 			}
485 			if (ifa1->ifa_scope != ifa->ifa_scope) {
486 				inet_free_ifa(ifa);
487 				return -EINVAL;
488 			}
489 			ifa->ifa_flags |= IFA_F_SECONDARY;
490 		}
491 	}
492 
493 	/* Allow any devices that wish to register ifaddr validtors to weigh
494 	 * in now, before changes are committed.  The rntl lock is serializing
495 	 * access here, so the state should not change between a validator call
496 	 * and a final notify on commit.  This isn't invoked on promotion under
497 	 * the assumption that validators are checking the address itself, and
498 	 * not the flags.
499 	 */
500 	ivi.ivi_addr = ifa->ifa_address;
501 	ivi.ivi_dev = ifa->ifa_dev;
502 	ivi.extack = extack;
503 	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
504 					   NETDEV_UP, &ivi);
505 	ret = notifier_to_errno(ret);
506 	if (ret) {
507 		inet_free_ifa(ifa);
508 		return ret;
509 	}
510 
511 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
512 		prandom_seed((__force u32) ifa->ifa_local);
513 		ifap = last_primary;
514 	}
515 
516 	ifa->ifa_next = *ifap;
517 	*ifap = ifa;
518 
519 	inet_hash_insert(dev_net(in_dev->dev), ifa);
520 
521 	cancel_delayed_work(&check_lifetime_work);
522 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
523 
524 	/* Send message first, then call notifier.
525 	   Notifier will trigger FIB update, so that
526 	   listeners of netlink will know about new ifaddr */
527 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
528 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
529 
530 	return 0;
531 }
532 
533 static int inet_insert_ifa(struct in_ifaddr *ifa)
534 {
535 	return __inet_insert_ifa(ifa, NULL, 0, NULL);
536 }
537 
538 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
539 {
540 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
541 
542 	ASSERT_RTNL();
543 
544 	if (!in_dev) {
545 		inet_free_ifa(ifa);
546 		return -ENOBUFS;
547 	}
548 	ipv4_devconf_setall(in_dev);
549 	neigh_parms_data_state_setall(in_dev->arp_parms);
550 	if (ifa->ifa_dev != in_dev) {
551 		WARN_ON(ifa->ifa_dev);
552 		in_dev_hold(in_dev);
553 		ifa->ifa_dev = in_dev;
554 	}
555 	if (ipv4_is_loopback(ifa->ifa_local))
556 		ifa->ifa_scope = RT_SCOPE_HOST;
557 	return inet_insert_ifa(ifa);
558 }
559 
560 /* Caller must hold RCU or RTNL :
561  * We dont take a reference on found in_device
562  */
563 struct in_device *inetdev_by_index(struct net *net, int ifindex)
564 {
565 	struct net_device *dev;
566 	struct in_device *in_dev = NULL;
567 
568 	rcu_read_lock();
569 	dev = dev_get_by_index_rcu(net, ifindex);
570 	if (dev)
571 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
572 	rcu_read_unlock();
573 	return in_dev;
574 }
575 EXPORT_SYMBOL(inetdev_by_index);
576 
577 /* Called only from RTNL semaphored context. No locks. */
578 
579 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
580 				    __be32 mask)
581 {
582 	ASSERT_RTNL();
583 
584 	for_primary_ifa(in_dev) {
585 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
586 			return ifa;
587 	} endfor_ifa(in_dev);
588 	return NULL;
589 }
590 
591 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
592 {
593 	struct ip_mreqn mreq = {
594 		.imr_multiaddr.s_addr = ifa->ifa_address,
595 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
596 	};
597 	int ret;
598 
599 	ASSERT_RTNL();
600 
601 	lock_sock(sk);
602 	if (join)
603 		ret = ip_mc_join_group(sk, &mreq);
604 	else
605 		ret = ip_mc_leave_group(sk, &mreq);
606 	release_sock(sk);
607 
608 	return ret;
609 }
610 
611 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
612 			    struct netlink_ext_ack *extack)
613 {
614 	struct net *net = sock_net(skb->sk);
615 	struct nlattr *tb[IFA_MAX+1];
616 	struct in_device *in_dev;
617 	struct ifaddrmsg *ifm;
618 	struct in_ifaddr *ifa, **ifap;
619 	int err = -EINVAL;
620 
621 	ASSERT_RTNL();
622 
623 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
624 			  extack);
625 	if (err < 0)
626 		goto errout;
627 
628 	ifm = nlmsg_data(nlh);
629 	in_dev = inetdev_by_index(net, ifm->ifa_index);
630 	if (!in_dev) {
631 		err = -ENODEV;
632 		goto errout;
633 	}
634 
635 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
636 	     ifap = &ifa->ifa_next) {
637 		if (tb[IFA_LOCAL] &&
638 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
639 			continue;
640 
641 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
642 			continue;
643 
644 		if (tb[IFA_ADDRESS] &&
645 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
646 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
647 			continue;
648 
649 		if (ipv4_is_multicast(ifa->ifa_address))
650 			ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
651 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
652 		return 0;
653 	}
654 
655 	err = -EADDRNOTAVAIL;
656 errout:
657 	return err;
658 }
659 
660 #define INFINITY_LIFE_TIME	0xFFFFFFFF
661 
662 static void check_lifetime(struct work_struct *work)
663 {
664 	unsigned long now, next, next_sec, next_sched;
665 	struct in_ifaddr *ifa;
666 	struct hlist_node *n;
667 	int i;
668 
669 	now = jiffies;
670 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
671 
672 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
673 		bool change_needed = false;
674 
675 		rcu_read_lock();
676 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
677 			unsigned long age;
678 
679 			if (ifa->ifa_flags & IFA_F_PERMANENT)
680 				continue;
681 
682 			/* We try to batch several events at once. */
683 			age = (now - ifa->ifa_tstamp +
684 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
685 
686 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
687 			    age >= ifa->ifa_valid_lft) {
688 				change_needed = true;
689 			} else if (ifa->ifa_preferred_lft ==
690 				   INFINITY_LIFE_TIME) {
691 				continue;
692 			} else if (age >= ifa->ifa_preferred_lft) {
693 				if (time_before(ifa->ifa_tstamp +
694 						ifa->ifa_valid_lft * HZ, next))
695 					next = ifa->ifa_tstamp +
696 					       ifa->ifa_valid_lft * HZ;
697 
698 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
699 					change_needed = true;
700 			} else if (time_before(ifa->ifa_tstamp +
701 					       ifa->ifa_preferred_lft * HZ,
702 					       next)) {
703 				next = ifa->ifa_tstamp +
704 				       ifa->ifa_preferred_lft * HZ;
705 			}
706 		}
707 		rcu_read_unlock();
708 		if (!change_needed)
709 			continue;
710 		rtnl_lock();
711 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
712 			unsigned long age;
713 
714 			if (ifa->ifa_flags & IFA_F_PERMANENT)
715 				continue;
716 
717 			/* We try to batch several events at once. */
718 			age = (now - ifa->ifa_tstamp +
719 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
720 
721 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
722 			    age >= ifa->ifa_valid_lft) {
723 				struct in_ifaddr **ifap;
724 
725 				for (ifap = &ifa->ifa_dev->ifa_list;
726 				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
727 					if (*ifap == ifa) {
728 						inet_del_ifa(ifa->ifa_dev,
729 							     ifap, 1);
730 						break;
731 					}
732 				}
733 			} else if (ifa->ifa_preferred_lft !=
734 				   INFINITY_LIFE_TIME &&
735 				   age >= ifa->ifa_preferred_lft &&
736 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
737 				ifa->ifa_flags |= IFA_F_DEPRECATED;
738 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
739 			}
740 		}
741 		rtnl_unlock();
742 	}
743 
744 	next_sec = round_jiffies_up(next);
745 	next_sched = next;
746 
747 	/* If rounded timeout is accurate enough, accept it. */
748 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
749 		next_sched = next_sec;
750 
751 	now = jiffies;
752 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
753 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
754 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
755 
756 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
757 			next_sched - now);
758 }
759 
760 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
761 			     __u32 prefered_lft)
762 {
763 	unsigned long timeout;
764 
765 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
766 
767 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
768 	if (addrconf_finite_timeout(timeout))
769 		ifa->ifa_valid_lft = timeout;
770 	else
771 		ifa->ifa_flags |= IFA_F_PERMANENT;
772 
773 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
774 	if (addrconf_finite_timeout(timeout)) {
775 		if (timeout == 0)
776 			ifa->ifa_flags |= IFA_F_DEPRECATED;
777 		ifa->ifa_preferred_lft = timeout;
778 	}
779 	ifa->ifa_tstamp = jiffies;
780 	if (!ifa->ifa_cstamp)
781 		ifa->ifa_cstamp = ifa->ifa_tstamp;
782 }
783 
784 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
785 				       __u32 *pvalid_lft, __u32 *pprefered_lft,
786 				       struct netlink_ext_ack *extack)
787 {
788 	struct nlattr *tb[IFA_MAX+1];
789 	struct in_ifaddr *ifa;
790 	struct ifaddrmsg *ifm;
791 	struct net_device *dev;
792 	struct in_device *in_dev;
793 	int err;
794 
795 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
796 			  extack);
797 	if (err < 0)
798 		goto errout;
799 
800 	ifm = nlmsg_data(nlh);
801 	err = -EINVAL;
802 	if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
803 		goto errout;
804 
805 	dev = __dev_get_by_index(net, ifm->ifa_index);
806 	err = -ENODEV;
807 	if (!dev)
808 		goto errout;
809 
810 	in_dev = __in_dev_get_rtnl(dev);
811 	err = -ENOBUFS;
812 	if (!in_dev)
813 		goto errout;
814 
815 	ifa = inet_alloc_ifa();
816 	if (!ifa)
817 		/*
818 		 * A potential indev allocation can be left alive, it stays
819 		 * assigned to its device and is destroy with it.
820 		 */
821 		goto errout;
822 
823 	ipv4_devconf_setall(in_dev);
824 	neigh_parms_data_state_setall(in_dev->arp_parms);
825 	in_dev_hold(in_dev);
826 
827 	if (!tb[IFA_ADDRESS])
828 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
829 
830 	INIT_HLIST_NODE(&ifa->hash);
831 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
832 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
833 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
834 					 ifm->ifa_flags;
835 	ifa->ifa_scope = ifm->ifa_scope;
836 	ifa->ifa_dev = in_dev;
837 
838 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
839 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
840 
841 	if (tb[IFA_BROADCAST])
842 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
843 
844 	if (tb[IFA_LABEL])
845 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
846 	else
847 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
848 
849 	if (tb[IFA_RT_PRIORITY])
850 		ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
851 
852 	if (tb[IFA_CACHEINFO]) {
853 		struct ifa_cacheinfo *ci;
854 
855 		ci = nla_data(tb[IFA_CACHEINFO]);
856 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
857 			err = -EINVAL;
858 			goto errout_free;
859 		}
860 		*pvalid_lft = ci->ifa_valid;
861 		*pprefered_lft = ci->ifa_prefered;
862 	}
863 
864 	return ifa;
865 
866 errout_free:
867 	inet_free_ifa(ifa);
868 errout:
869 	return ERR_PTR(err);
870 }
871 
872 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
873 {
874 	struct in_device *in_dev = ifa->ifa_dev;
875 	struct in_ifaddr *ifa1, **ifap;
876 
877 	if (!ifa->ifa_local)
878 		return NULL;
879 
880 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
881 	     ifap = &ifa1->ifa_next) {
882 		if (ifa1->ifa_mask == ifa->ifa_mask &&
883 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
884 		    ifa1->ifa_local == ifa->ifa_local)
885 			return ifa1;
886 	}
887 	return NULL;
888 }
889 
890 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
891 			    struct netlink_ext_ack *extack)
892 {
893 	struct net *net = sock_net(skb->sk);
894 	struct in_ifaddr *ifa;
895 	struct in_ifaddr *ifa_existing;
896 	__u32 valid_lft = INFINITY_LIFE_TIME;
897 	__u32 prefered_lft = INFINITY_LIFE_TIME;
898 
899 	ASSERT_RTNL();
900 
901 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
902 	if (IS_ERR(ifa))
903 		return PTR_ERR(ifa);
904 
905 	ifa_existing = find_matching_ifa(ifa);
906 	if (!ifa_existing) {
907 		/* It would be best to check for !NLM_F_CREATE here but
908 		 * userspace already relies on not having to provide this.
909 		 */
910 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
911 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
912 			int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
913 					       true, ifa);
914 
915 			if (ret < 0) {
916 				inet_free_ifa(ifa);
917 				return ret;
918 			}
919 		}
920 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
921 					 extack);
922 	} else {
923 		u32 new_metric = ifa->ifa_rt_priority;
924 
925 		inet_free_ifa(ifa);
926 
927 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
928 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
929 			return -EEXIST;
930 		ifa = ifa_existing;
931 
932 		if (ifa->ifa_rt_priority != new_metric) {
933 			fib_modify_prefix_metric(ifa, new_metric);
934 			ifa->ifa_rt_priority = new_metric;
935 		}
936 
937 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
938 		cancel_delayed_work(&check_lifetime_work);
939 		queue_delayed_work(system_power_efficient_wq,
940 				&check_lifetime_work, 0);
941 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
942 	}
943 	return 0;
944 }
945 
946 /*
947  *	Determine a default network mask, based on the IP address.
948  */
949 
950 static int inet_abc_len(__be32 addr)
951 {
952 	int rc = -1;	/* Something else, probably a multicast. */
953 
954 	if (ipv4_is_zeronet(addr))
955 		rc = 0;
956 	else {
957 		__u32 haddr = ntohl(addr);
958 
959 		if (IN_CLASSA(haddr))
960 			rc = 8;
961 		else if (IN_CLASSB(haddr))
962 			rc = 16;
963 		else if (IN_CLASSC(haddr))
964 			rc = 24;
965 	}
966 
967 	return rc;
968 }
969 
970 
971 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
972 {
973 	struct sockaddr_in sin_orig;
974 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
975 	struct in_device *in_dev;
976 	struct in_ifaddr **ifap = NULL;
977 	struct in_ifaddr *ifa = NULL;
978 	struct net_device *dev;
979 	char *colon;
980 	int ret = -EFAULT;
981 	int tryaddrmatch = 0;
982 
983 	ifr->ifr_name[IFNAMSIZ - 1] = 0;
984 
985 	/* save original address for comparison */
986 	memcpy(&sin_orig, sin, sizeof(*sin));
987 
988 	colon = strchr(ifr->ifr_name, ':');
989 	if (colon)
990 		*colon = 0;
991 
992 	dev_load(net, ifr->ifr_name);
993 
994 	switch (cmd) {
995 	case SIOCGIFADDR:	/* Get interface address */
996 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
997 	case SIOCGIFDSTADDR:	/* Get the destination address */
998 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
999 		/* Note that these ioctls will not sleep,
1000 		   so that we do not impose a lock.
1001 		   One day we will be forced to put shlock here (I mean SMP)
1002 		 */
1003 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
1004 		memset(sin, 0, sizeof(*sin));
1005 		sin->sin_family = AF_INET;
1006 		break;
1007 
1008 	case SIOCSIFFLAGS:
1009 		ret = -EPERM;
1010 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1011 			goto out;
1012 		break;
1013 	case SIOCSIFADDR:	/* Set interface address (and family) */
1014 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1015 	case SIOCSIFDSTADDR:	/* Set the destination address */
1016 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1017 		ret = -EPERM;
1018 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1019 			goto out;
1020 		ret = -EINVAL;
1021 		if (sin->sin_family != AF_INET)
1022 			goto out;
1023 		break;
1024 	default:
1025 		ret = -EINVAL;
1026 		goto out;
1027 	}
1028 
1029 	rtnl_lock();
1030 
1031 	ret = -ENODEV;
1032 	dev = __dev_get_by_name(net, ifr->ifr_name);
1033 	if (!dev)
1034 		goto done;
1035 
1036 	if (colon)
1037 		*colon = ':';
1038 
1039 	in_dev = __in_dev_get_rtnl(dev);
1040 	if (in_dev) {
1041 		if (tryaddrmatch) {
1042 			/* Matthias Andree */
1043 			/* compare label and address (4.4BSD style) */
1044 			/* note: we only do this for a limited set of ioctls
1045 			   and only if the original address family was AF_INET.
1046 			   This is checked above. */
1047 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1048 			     ifap = &ifa->ifa_next) {
1049 				if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1050 				    sin_orig.sin_addr.s_addr ==
1051 							ifa->ifa_local) {
1052 					break; /* found */
1053 				}
1054 			}
1055 		}
1056 		/* we didn't get a match, maybe the application is
1057 		   4.3BSD-style and passed in junk so we fall back to
1058 		   comparing just the label */
1059 		if (!ifa) {
1060 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1061 			     ifap = &ifa->ifa_next)
1062 				if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1063 					break;
1064 		}
1065 	}
1066 
1067 	ret = -EADDRNOTAVAIL;
1068 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1069 		goto done;
1070 
1071 	switch (cmd) {
1072 	case SIOCGIFADDR:	/* Get interface address */
1073 		ret = 0;
1074 		sin->sin_addr.s_addr = ifa->ifa_local;
1075 		break;
1076 
1077 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1078 		ret = 0;
1079 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1080 		break;
1081 
1082 	case SIOCGIFDSTADDR:	/* Get the destination address */
1083 		ret = 0;
1084 		sin->sin_addr.s_addr = ifa->ifa_address;
1085 		break;
1086 
1087 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1088 		ret = 0;
1089 		sin->sin_addr.s_addr = ifa->ifa_mask;
1090 		break;
1091 
1092 	case SIOCSIFFLAGS:
1093 		if (colon) {
1094 			ret = -EADDRNOTAVAIL;
1095 			if (!ifa)
1096 				break;
1097 			ret = 0;
1098 			if (!(ifr->ifr_flags & IFF_UP))
1099 				inet_del_ifa(in_dev, ifap, 1);
1100 			break;
1101 		}
1102 		ret = dev_change_flags(dev, ifr->ifr_flags);
1103 		break;
1104 
1105 	case SIOCSIFADDR:	/* Set interface address (and family) */
1106 		ret = -EINVAL;
1107 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1108 			break;
1109 
1110 		if (!ifa) {
1111 			ret = -ENOBUFS;
1112 			ifa = inet_alloc_ifa();
1113 			if (!ifa)
1114 				break;
1115 			INIT_HLIST_NODE(&ifa->hash);
1116 			if (colon)
1117 				memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1118 			else
1119 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1120 		} else {
1121 			ret = 0;
1122 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1123 				break;
1124 			inet_del_ifa(in_dev, ifap, 0);
1125 			ifa->ifa_broadcast = 0;
1126 			ifa->ifa_scope = 0;
1127 		}
1128 
1129 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1130 
1131 		if (!(dev->flags & IFF_POINTOPOINT)) {
1132 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1133 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1134 			if ((dev->flags & IFF_BROADCAST) &&
1135 			    ifa->ifa_prefixlen < 31)
1136 				ifa->ifa_broadcast = ifa->ifa_address |
1137 						     ~ifa->ifa_mask;
1138 		} else {
1139 			ifa->ifa_prefixlen = 32;
1140 			ifa->ifa_mask = inet_make_mask(32);
1141 		}
1142 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1143 		ret = inet_set_ifa(dev, ifa);
1144 		break;
1145 
1146 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1147 		ret = 0;
1148 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1149 			inet_del_ifa(in_dev, ifap, 0);
1150 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1151 			inet_insert_ifa(ifa);
1152 		}
1153 		break;
1154 
1155 	case SIOCSIFDSTADDR:	/* Set the destination address */
1156 		ret = 0;
1157 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1158 			break;
1159 		ret = -EINVAL;
1160 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1161 			break;
1162 		ret = 0;
1163 		inet_del_ifa(in_dev, ifap, 0);
1164 		ifa->ifa_address = sin->sin_addr.s_addr;
1165 		inet_insert_ifa(ifa);
1166 		break;
1167 
1168 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1169 
1170 		/*
1171 		 *	The mask we set must be legal.
1172 		 */
1173 		ret = -EINVAL;
1174 		if (bad_mask(sin->sin_addr.s_addr, 0))
1175 			break;
1176 		ret = 0;
1177 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1178 			__be32 old_mask = ifa->ifa_mask;
1179 			inet_del_ifa(in_dev, ifap, 0);
1180 			ifa->ifa_mask = sin->sin_addr.s_addr;
1181 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1182 
1183 			/* See if current broadcast address matches
1184 			 * with current netmask, then recalculate
1185 			 * the broadcast address. Otherwise it's a
1186 			 * funny address, so don't touch it since
1187 			 * the user seems to know what (s)he's doing...
1188 			 */
1189 			if ((dev->flags & IFF_BROADCAST) &&
1190 			    (ifa->ifa_prefixlen < 31) &&
1191 			    (ifa->ifa_broadcast ==
1192 			     (ifa->ifa_local|~old_mask))) {
1193 				ifa->ifa_broadcast = (ifa->ifa_local |
1194 						      ~sin->sin_addr.s_addr);
1195 			}
1196 			inet_insert_ifa(ifa);
1197 		}
1198 		break;
1199 	}
1200 done:
1201 	rtnl_unlock();
1202 out:
1203 	return ret;
1204 }
1205 
1206 static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1207 {
1208 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1209 	struct in_ifaddr *ifa;
1210 	struct ifreq ifr;
1211 	int done = 0;
1212 
1213 	if (WARN_ON(size > sizeof(struct ifreq)))
1214 		goto out;
1215 
1216 	if (!in_dev)
1217 		goto out;
1218 
1219 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1220 		if (!buf) {
1221 			done += size;
1222 			continue;
1223 		}
1224 		if (len < size)
1225 			break;
1226 		memset(&ifr, 0, sizeof(struct ifreq));
1227 		strcpy(ifr.ifr_name, ifa->ifa_label);
1228 
1229 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1230 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1231 								ifa->ifa_local;
1232 
1233 		if (copy_to_user(buf + done, &ifr, size)) {
1234 			done = -EFAULT;
1235 			break;
1236 		}
1237 		len  -= size;
1238 		done += size;
1239 	}
1240 out:
1241 	return done;
1242 }
1243 
1244 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1245 				 int scope)
1246 {
1247 	for_primary_ifa(in_dev) {
1248 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1249 		    ifa->ifa_scope <= scope)
1250 			return ifa->ifa_local;
1251 	} endfor_ifa(in_dev);
1252 
1253 	return 0;
1254 }
1255 
1256 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1257 {
1258 	__be32 addr = 0;
1259 	struct in_device *in_dev;
1260 	struct net *net = dev_net(dev);
1261 	int master_idx;
1262 
1263 	rcu_read_lock();
1264 	in_dev = __in_dev_get_rcu(dev);
1265 	if (!in_dev)
1266 		goto no_in_dev;
1267 
1268 	for_primary_ifa(in_dev) {
1269 		if (ifa->ifa_scope > scope)
1270 			continue;
1271 		if (!dst || inet_ifa_match(dst, ifa)) {
1272 			addr = ifa->ifa_local;
1273 			break;
1274 		}
1275 		if (!addr)
1276 			addr = ifa->ifa_local;
1277 	} endfor_ifa(in_dev);
1278 
1279 	if (addr)
1280 		goto out_unlock;
1281 no_in_dev:
1282 	master_idx = l3mdev_master_ifindex_rcu(dev);
1283 
1284 	/* For VRFs, the VRF device takes the place of the loopback device,
1285 	 * with addresses on it being preferred.  Note in such cases the
1286 	 * loopback device will be among the devices that fail the master_idx
1287 	 * equality check in the loop below.
1288 	 */
1289 	if (master_idx &&
1290 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1291 	    (in_dev = __in_dev_get_rcu(dev))) {
1292 		addr = in_dev_select_addr(in_dev, scope);
1293 		if (addr)
1294 			goto out_unlock;
1295 	}
1296 
1297 	/* Not loopback addresses on loopback should be preferred
1298 	   in this case. It is important that lo is the first interface
1299 	   in dev_base list.
1300 	 */
1301 	for_each_netdev_rcu(net, dev) {
1302 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1303 			continue;
1304 
1305 		in_dev = __in_dev_get_rcu(dev);
1306 		if (!in_dev)
1307 			continue;
1308 
1309 		addr = in_dev_select_addr(in_dev, scope);
1310 		if (addr)
1311 			goto out_unlock;
1312 	}
1313 out_unlock:
1314 	rcu_read_unlock();
1315 	return addr;
1316 }
1317 EXPORT_SYMBOL(inet_select_addr);
1318 
1319 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1320 			      __be32 local, int scope)
1321 {
1322 	int same = 0;
1323 	__be32 addr = 0;
1324 
1325 	for_ifa(in_dev) {
1326 		if (!addr &&
1327 		    (local == ifa->ifa_local || !local) &&
1328 		    ifa->ifa_scope <= scope) {
1329 			addr = ifa->ifa_local;
1330 			if (same)
1331 				break;
1332 		}
1333 		if (!same) {
1334 			same = (!local || inet_ifa_match(local, ifa)) &&
1335 				(!dst || inet_ifa_match(dst, ifa));
1336 			if (same && addr) {
1337 				if (local || !dst)
1338 					break;
1339 				/* Is the selected addr into dst subnet? */
1340 				if (inet_ifa_match(addr, ifa))
1341 					break;
1342 				/* No, then can we use new local src? */
1343 				if (ifa->ifa_scope <= scope) {
1344 					addr = ifa->ifa_local;
1345 					break;
1346 				}
1347 				/* search for large dst subnet for addr */
1348 				same = 0;
1349 			}
1350 		}
1351 	} endfor_ifa(in_dev);
1352 
1353 	return same ? addr : 0;
1354 }
1355 
1356 /*
1357  * Confirm that local IP address exists using wildcards:
1358  * - net: netns to check, cannot be NULL
1359  * - in_dev: only on this interface, NULL=any interface
1360  * - dst: only in the same subnet as dst, 0=any dst
1361  * - local: address, 0=autoselect the local address
1362  * - scope: maximum allowed scope value for the local address
1363  */
1364 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1365 			 __be32 dst, __be32 local, int scope)
1366 {
1367 	__be32 addr = 0;
1368 	struct net_device *dev;
1369 
1370 	if (in_dev)
1371 		return confirm_addr_indev(in_dev, dst, local, scope);
1372 
1373 	rcu_read_lock();
1374 	for_each_netdev_rcu(net, dev) {
1375 		in_dev = __in_dev_get_rcu(dev);
1376 		if (in_dev) {
1377 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1378 			if (addr)
1379 				break;
1380 		}
1381 	}
1382 	rcu_read_unlock();
1383 
1384 	return addr;
1385 }
1386 EXPORT_SYMBOL(inet_confirm_addr);
1387 
1388 /*
1389  *	Device notifier
1390  */
1391 
1392 int register_inetaddr_notifier(struct notifier_block *nb)
1393 {
1394 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1395 }
1396 EXPORT_SYMBOL(register_inetaddr_notifier);
1397 
1398 int unregister_inetaddr_notifier(struct notifier_block *nb)
1399 {
1400 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1401 }
1402 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1403 
1404 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1405 {
1406 	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1407 }
1408 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1409 
1410 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1411 {
1412 	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1413 	    nb);
1414 }
1415 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1416 
1417 /* Rename ifa_labels for a device name change. Make some effort to preserve
1418  * existing alias numbering and to create unique labels if possible.
1419 */
1420 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1421 {
1422 	struct in_ifaddr *ifa;
1423 	int named = 0;
1424 
1425 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1426 		char old[IFNAMSIZ], *dot;
1427 
1428 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1429 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1430 		if (named++ == 0)
1431 			goto skip;
1432 		dot = strchr(old, ':');
1433 		if (!dot) {
1434 			sprintf(old, ":%d", named);
1435 			dot = old;
1436 		}
1437 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1438 			strcat(ifa->ifa_label, dot);
1439 		else
1440 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1441 skip:
1442 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1443 	}
1444 }
1445 
1446 static bool inetdev_valid_mtu(unsigned int mtu)
1447 {
1448 	return mtu >= IPV4_MIN_MTU;
1449 }
1450 
1451 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1452 					struct in_device *in_dev)
1453 
1454 {
1455 	struct in_ifaddr *ifa;
1456 
1457 	for (ifa = in_dev->ifa_list; ifa;
1458 	     ifa = ifa->ifa_next) {
1459 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1460 			 ifa->ifa_local, dev,
1461 			 ifa->ifa_local, NULL,
1462 			 dev->dev_addr, NULL);
1463 	}
1464 }
1465 
1466 /* Called only under RTNL semaphore */
1467 
1468 static int inetdev_event(struct notifier_block *this, unsigned long event,
1469 			 void *ptr)
1470 {
1471 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1472 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1473 
1474 	ASSERT_RTNL();
1475 
1476 	if (!in_dev) {
1477 		if (event == NETDEV_REGISTER) {
1478 			in_dev = inetdev_init(dev);
1479 			if (IS_ERR(in_dev))
1480 				return notifier_from_errno(PTR_ERR(in_dev));
1481 			if (dev->flags & IFF_LOOPBACK) {
1482 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1483 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1484 			}
1485 		} else if (event == NETDEV_CHANGEMTU) {
1486 			/* Re-enabling IP */
1487 			if (inetdev_valid_mtu(dev->mtu))
1488 				in_dev = inetdev_init(dev);
1489 		}
1490 		goto out;
1491 	}
1492 
1493 	switch (event) {
1494 	case NETDEV_REGISTER:
1495 		pr_debug("%s: bug\n", __func__);
1496 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1497 		break;
1498 	case NETDEV_UP:
1499 		if (!inetdev_valid_mtu(dev->mtu))
1500 			break;
1501 		if (dev->flags & IFF_LOOPBACK) {
1502 			struct in_ifaddr *ifa = inet_alloc_ifa();
1503 
1504 			if (ifa) {
1505 				INIT_HLIST_NODE(&ifa->hash);
1506 				ifa->ifa_local =
1507 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1508 				ifa->ifa_prefixlen = 8;
1509 				ifa->ifa_mask = inet_make_mask(8);
1510 				in_dev_hold(in_dev);
1511 				ifa->ifa_dev = in_dev;
1512 				ifa->ifa_scope = RT_SCOPE_HOST;
1513 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1514 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1515 						 INFINITY_LIFE_TIME);
1516 				ipv4_devconf_setall(in_dev);
1517 				neigh_parms_data_state_setall(in_dev->arp_parms);
1518 				inet_insert_ifa(ifa);
1519 			}
1520 		}
1521 		ip_mc_up(in_dev);
1522 		/* fall through */
1523 	case NETDEV_CHANGEADDR:
1524 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1525 			break;
1526 		/* fall through */
1527 	case NETDEV_NOTIFY_PEERS:
1528 		/* Send gratuitous ARP to notify of link change */
1529 		inetdev_send_gratuitous_arp(dev, in_dev);
1530 		break;
1531 	case NETDEV_DOWN:
1532 		ip_mc_down(in_dev);
1533 		break;
1534 	case NETDEV_PRE_TYPE_CHANGE:
1535 		ip_mc_unmap(in_dev);
1536 		break;
1537 	case NETDEV_POST_TYPE_CHANGE:
1538 		ip_mc_remap(in_dev);
1539 		break;
1540 	case NETDEV_CHANGEMTU:
1541 		if (inetdev_valid_mtu(dev->mtu))
1542 			break;
1543 		/* disable IP when MTU is not enough */
1544 		/* fall through */
1545 	case NETDEV_UNREGISTER:
1546 		inetdev_destroy(in_dev);
1547 		break;
1548 	case NETDEV_CHANGENAME:
1549 		/* Do not notify about label change, this event is
1550 		 * not interesting to applications using netlink.
1551 		 */
1552 		inetdev_changename(dev, in_dev);
1553 
1554 		devinet_sysctl_unregister(in_dev);
1555 		devinet_sysctl_register(in_dev);
1556 		break;
1557 	}
1558 out:
1559 	return NOTIFY_DONE;
1560 }
1561 
1562 static struct notifier_block ip_netdev_notifier = {
1563 	.notifier_call = inetdev_event,
1564 };
1565 
1566 static size_t inet_nlmsg_size(void)
1567 {
1568 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1569 	       + nla_total_size(4) /* IFA_ADDRESS */
1570 	       + nla_total_size(4) /* IFA_LOCAL */
1571 	       + nla_total_size(4) /* IFA_BROADCAST */
1572 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1573 	       + nla_total_size(4)  /* IFA_FLAGS */
1574 	       + nla_total_size(4)  /* IFA_RT_PRIORITY */
1575 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1576 }
1577 
1578 static inline u32 cstamp_delta(unsigned long cstamp)
1579 {
1580 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1581 }
1582 
1583 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1584 			 unsigned long tstamp, u32 preferred, u32 valid)
1585 {
1586 	struct ifa_cacheinfo ci;
1587 
1588 	ci.cstamp = cstamp_delta(cstamp);
1589 	ci.tstamp = cstamp_delta(tstamp);
1590 	ci.ifa_prefered = preferred;
1591 	ci.ifa_valid = valid;
1592 
1593 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1594 }
1595 
1596 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1597 			    struct inet_fill_args *args)
1598 {
1599 	struct ifaddrmsg *ifm;
1600 	struct nlmsghdr  *nlh;
1601 	u32 preferred, valid;
1602 
1603 	nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1604 			args->flags);
1605 	if (!nlh)
1606 		return -EMSGSIZE;
1607 
1608 	ifm = nlmsg_data(nlh);
1609 	ifm->ifa_family = AF_INET;
1610 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1611 	ifm->ifa_flags = ifa->ifa_flags;
1612 	ifm->ifa_scope = ifa->ifa_scope;
1613 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1614 
1615 	if (args->netnsid >= 0 &&
1616 	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1617 		goto nla_put_failure;
1618 
1619 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1620 		preferred = ifa->ifa_preferred_lft;
1621 		valid = ifa->ifa_valid_lft;
1622 		if (preferred != INFINITY_LIFE_TIME) {
1623 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1624 
1625 			if (preferred > tval)
1626 				preferred -= tval;
1627 			else
1628 				preferred = 0;
1629 			if (valid != INFINITY_LIFE_TIME) {
1630 				if (valid > tval)
1631 					valid -= tval;
1632 				else
1633 					valid = 0;
1634 			}
1635 		}
1636 	} else {
1637 		preferred = INFINITY_LIFE_TIME;
1638 		valid = INFINITY_LIFE_TIME;
1639 	}
1640 	if ((ifa->ifa_address &&
1641 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1642 	    (ifa->ifa_local &&
1643 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1644 	    (ifa->ifa_broadcast &&
1645 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1646 	    (ifa->ifa_label[0] &&
1647 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1648 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1649 	    (ifa->ifa_rt_priority &&
1650 	     nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1651 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1652 			  preferred, valid))
1653 		goto nla_put_failure;
1654 
1655 	nlmsg_end(skb, nlh);
1656 	return 0;
1657 
1658 nla_put_failure:
1659 	nlmsg_cancel(skb, nlh);
1660 	return -EMSGSIZE;
1661 }
1662 
1663 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1664 				      struct inet_fill_args *fillargs,
1665 				      struct net **tgt_net, struct sock *sk,
1666 				      struct netlink_ext_ack *extack)
1667 {
1668 	struct nlattr *tb[IFA_MAX+1];
1669 	struct ifaddrmsg *ifm;
1670 	int err, i;
1671 
1672 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1673 		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1674 		return -EINVAL;
1675 	}
1676 
1677 	ifm = nlmsg_data(nlh);
1678 	if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1679 		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1680 		return -EINVAL;
1681 	}
1682 	if (ifm->ifa_index) {
1683 		NL_SET_ERR_MSG(extack, "ipv4: Filter by device index not supported for address dump");
1684 		return -EINVAL;
1685 	}
1686 
1687 	err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1688 				 ifa_ipv4_policy, extack);
1689 	if (err < 0)
1690 		return err;
1691 
1692 	for (i = 0; i <= IFA_MAX; ++i) {
1693 		if (!tb[i])
1694 			continue;
1695 
1696 		if (i == IFA_TARGET_NETNSID) {
1697 			struct net *net;
1698 
1699 			fillargs->netnsid = nla_get_s32(tb[i]);
1700 
1701 			net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1702 			if (IS_ERR(net)) {
1703 				NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1704 				return PTR_ERR(net);
1705 			}
1706 			*tgt_net = net;
1707 		} else {
1708 			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1709 			return -EINVAL;
1710 		}
1711 	}
1712 
1713 	return 0;
1714 }
1715 
1716 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1717 {
1718 	const struct nlmsghdr *nlh = cb->nlh;
1719 	struct inet_fill_args fillargs = {
1720 		.portid = NETLINK_CB(cb->skb).portid,
1721 		.seq = nlh->nlmsg_seq,
1722 		.event = RTM_NEWADDR,
1723 		.flags = NLM_F_MULTI,
1724 		.netnsid = -1,
1725 	};
1726 	struct net *net = sock_net(skb->sk);
1727 	struct net *tgt_net = net;
1728 	int h, s_h;
1729 	int idx, s_idx;
1730 	int ip_idx, s_ip_idx;
1731 	struct net_device *dev;
1732 	struct in_device *in_dev;
1733 	struct in_ifaddr *ifa;
1734 	struct hlist_head *head;
1735 
1736 	s_h = cb->args[0];
1737 	s_idx = idx = cb->args[1];
1738 	s_ip_idx = ip_idx = cb->args[2];
1739 
1740 	if (cb->strict_check) {
1741 		int err;
1742 
1743 		err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1744 						 skb->sk, cb->extack);
1745 		if (err < 0)
1746 			return err;
1747 	}
1748 
1749 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1750 		idx = 0;
1751 		head = &tgt_net->dev_index_head[h];
1752 		rcu_read_lock();
1753 		cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
1754 			  tgt_net->dev_base_seq;
1755 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1756 			if (idx < s_idx)
1757 				goto cont;
1758 			if (h > s_h || idx > s_idx)
1759 				s_ip_idx = 0;
1760 			in_dev = __in_dev_get_rcu(dev);
1761 			if (!in_dev)
1762 				goto cont;
1763 
1764 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1765 			     ifa = ifa->ifa_next, ip_idx++) {
1766 				if (ip_idx < s_ip_idx)
1767 					continue;
1768 				if (inet_fill_ifaddr(skb, ifa, &fillargs) < 0) {
1769 					rcu_read_unlock();
1770 					goto done;
1771 				}
1772 				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1773 			}
1774 cont:
1775 			idx++;
1776 		}
1777 		rcu_read_unlock();
1778 	}
1779 
1780 done:
1781 	cb->args[0] = h;
1782 	cb->args[1] = idx;
1783 	cb->args[2] = ip_idx;
1784 	if (fillargs.netnsid >= 0)
1785 		put_net(tgt_net);
1786 
1787 	return skb->len;
1788 }
1789 
1790 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1791 		      u32 portid)
1792 {
1793 	struct inet_fill_args fillargs = {
1794 		.portid = portid,
1795 		.seq = nlh ? nlh->nlmsg_seq : 0,
1796 		.event = event,
1797 		.flags = 0,
1798 		.netnsid = -1,
1799 	};
1800 	struct sk_buff *skb;
1801 	int err = -ENOBUFS;
1802 	struct net *net;
1803 
1804 	net = dev_net(ifa->ifa_dev->dev);
1805 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1806 	if (!skb)
1807 		goto errout;
1808 
1809 	err = inet_fill_ifaddr(skb, ifa, &fillargs);
1810 	if (err < 0) {
1811 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1812 		WARN_ON(err == -EMSGSIZE);
1813 		kfree_skb(skb);
1814 		goto errout;
1815 	}
1816 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1817 	return;
1818 errout:
1819 	if (err < 0)
1820 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1821 }
1822 
1823 static size_t inet_get_link_af_size(const struct net_device *dev,
1824 				    u32 ext_filter_mask)
1825 {
1826 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1827 
1828 	if (!in_dev)
1829 		return 0;
1830 
1831 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1832 }
1833 
1834 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1835 			     u32 ext_filter_mask)
1836 {
1837 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1838 	struct nlattr *nla;
1839 	int i;
1840 
1841 	if (!in_dev)
1842 		return -ENODATA;
1843 
1844 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1845 	if (!nla)
1846 		return -EMSGSIZE;
1847 
1848 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1849 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1850 
1851 	return 0;
1852 }
1853 
1854 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1855 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1856 };
1857 
1858 static int inet_validate_link_af(const struct net_device *dev,
1859 				 const struct nlattr *nla)
1860 {
1861 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1862 	int err, rem;
1863 
1864 	if (dev && !__in_dev_get_rcu(dev))
1865 		return -EAFNOSUPPORT;
1866 
1867 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
1868 	if (err < 0)
1869 		return err;
1870 
1871 	if (tb[IFLA_INET_CONF]) {
1872 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1873 			int cfgid = nla_type(a);
1874 
1875 			if (nla_len(a) < 4)
1876 				return -EINVAL;
1877 
1878 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1879 				return -EINVAL;
1880 		}
1881 	}
1882 
1883 	return 0;
1884 }
1885 
1886 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1887 {
1888 	struct in_device *in_dev = __in_dev_get_rcu(dev);
1889 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1890 	int rem;
1891 
1892 	if (!in_dev)
1893 		return -EAFNOSUPPORT;
1894 
1895 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1896 		BUG();
1897 
1898 	if (tb[IFLA_INET_CONF]) {
1899 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1900 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1901 	}
1902 
1903 	return 0;
1904 }
1905 
1906 static int inet_netconf_msgsize_devconf(int type)
1907 {
1908 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1909 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1910 	bool all = false;
1911 
1912 	if (type == NETCONFA_ALL)
1913 		all = true;
1914 
1915 	if (all || type == NETCONFA_FORWARDING)
1916 		size += nla_total_size(4);
1917 	if (all || type == NETCONFA_RP_FILTER)
1918 		size += nla_total_size(4);
1919 	if (all || type == NETCONFA_MC_FORWARDING)
1920 		size += nla_total_size(4);
1921 	if (all || type == NETCONFA_BC_FORWARDING)
1922 		size += nla_total_size(4);
1923 	if (all || type == NETCONFA_PROXY_NEIGH)
1924 		size += nla_total_size(4);
1925 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1926 		size += nla_total_size(4);
1927 
1928 	return size;
1929 }
1930 
1931 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1932 				     struct ipv4_devconf *devconf, u32 portid,
1933 				     u32 seq, int event, unsigned int flags,
1934 				     int type)
1935 {
1936 	struct nlmsghdr  *nlh;
1937 	struct netconfmsg *ncm;
1938 	bool all = false;
1939 
1940 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1941 			flags);
1942 	if (!nlh)
1943 		return -EMSGSIZE;
1944 
1945 	if (type == NETCONFA_ALL)
1946 		all = true;
1947 
1948 	ncm = nlmsg_data(nlh);
1949 	ncm->ncm_family = AF_INET;
1950 
1951 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1952 		goto nla_put_failure;
1953 
1954 	if (!devconf)
1955 		goto out;
1956 
1957 	if ((all || type == NETCONFA_FORWARDING) &&
1958 	    nla_put_s32(skb, NETCONFA_FORWARDING,
1959 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1960 		goto nla_put_failure;
1961 	if ((all || type == NETCONFA_RP_FILTER) &&
1962 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1963 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1964 		goto nla_put_failure;
1965 	if ((all || type == NETCONFA_MC_FORWARDING) &&
1966 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1967 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1968 		goto nla_put_failure;
1969 	if ((all || type == NETCONFA_BC_FORWARDING) &&
1970 	    nla_put_s32(skb, NETCONFA_BC_FORWARDING,
1971 			IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
1972 		goto nla_put_failure;
1973 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
1974 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1975 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1976 		goto nla_put_failure;
1977 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1978 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1979 			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1980 		goto nla_put_failure;
1981 
1982 out:
1983 	nlmsg_end(skb, nlh);
1984 	return 0;
1985 
1986 nla_put_failure:
1987 	nlmsg_cancel(skb, nlh);
1988 	return -EMSGSIZE;
1989 }
1990 
1991 void inet_netconf_notify_devconf(struct net *net, int event, int type,
1992 				 int ifindex, struct ipv4_devconf *devconf)
1993 {
1994 	struct sk_buff *skb;
1995 	int err = -ENOBUFS;
1996 
1997 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
1998 	if (!skb)
1999 		goto errout;
2000 
2001 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2002 					event, 0, type);
2003 	if (err < 0) {
2004 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2005 		WARN_ON(err == -EMSGSIZE);
2006 		kfree_skb(skb);
2007 		goto errout;
2008 	}
2009 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2010 	return;
2011 errout:
2012 	if (err < 0)
2013 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2014 }
2015 
2016 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2017 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
2018 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
2019 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
2020 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
2021 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
2022 };
2023 
2024 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2025 				    struct nlmsghdr *nlh,
2026 				    struct netlink_ext_ack *extack)
2027 {
2028 	struct net *net = sock_net(in_skb->sk);
2029 	struct nlattr *tb[NETCONFA_MAX+1];
2030 	struct netconfmsg *ncm;
2031 	struct sk_buff *skb;
2032 	struct ipv4_devconf *devconf;
2033 	struct in_device *in_dev;
2034 	struct net_device *dev;
2035 	int ifindex;
2036 	int err;
2037 
2038 	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
2039 			  devconf_ipv4_policy, extack);
2040 	if (err < 0)
2041 		goto errout;
2042 
2043 	err = -EINVAL;
2044 	if (!tb[NETCONFA_IFINDEX])
2045 		goto errout;
2046 
2047 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2048 	switch (ifindex) {
2049 	case NETCONFA_IFINDEX_ALL:
2050 		devconf = net->ipv4.devconf_all;
2051 		break;
2052 	case NETCONFA_IFINDEX_DEFAULT:
2053 		devconf = net->ipv4.devconf_dflt;
2054 		break;
2055 	default:
2056 		dev = __dev_get_by_index(net, ifindex);
2057 		if (!dev)
2058 			goto errout;
2059 		in_dev = __in_dev_get_rtnl(dev);
2060 		if (!in_dev)
2061 			goto errout;
2062 		devconf = &in_dev->cnf;
2063 		break;
2064 	}
2065 
2066 	err = -ENOBUFS;
2067 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2068 	if (!skb)
2069 		goto errout;
2070 
2071 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2072 					NETLINK_CB(in_skb).portid,
2073 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2074 					NETCONFA_ALL);
2075 	if (err < 0) {
2076 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2077 		WARN_ON(err == -EMSGSIZE);
2078 		kfree_skb(skb);
2079 		goto errout;
2080 	}
2081 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2082 errout:
2083 	return err;
2084 }
2085 
2086 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2087 				     struct netlink_callback *cb)
2088 {
2089 	const struct nlmsghdr *nlh = cb->nlh;
2090 	struct net *net = sock_net(skb->sk);
2091 	int h, s_h;
2092 	int idx, s_idx;
2093 	struct net_device *dev;
2094 	struct in_device *in_dev;
2095 	struct hlist_head *head;
2096 
2097 	if (cb->strict_check) {
2098 		struct netlink_ext_ack *extack = cb->extack;
2099 		struct netconfmsg *ncm;
2100 
2101 		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2102 			NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2103 			return -EINVAL;
2104 		}
2105 
2106 		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2107 			NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2108 			return -EINVAL;
2109 		}
2110 	}
2111 
2112 	s_h = cb->args[0];
2113 	s_idx = idx = cb->args[1];
2114 
2115 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2116 		idx = 0;
2117 		head = &net->dev_index_head[h];
2118 		rcu_read_lock();
2119 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2120 			  net->dev_base_seq;
2121 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
2122 			if (idx < s_idx)
2123 				goto cont;
2124 			in_dev = __in_dev_get_rcu(dev);
2125 			if (!in_dev)
2126 				goto cont;
2127 
2128 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
2129 						      &in_dev->cnf,
2130 						      NETLINK_CB(cb->skb).portid,
2131 						      nlh->nlmsg_seq,
2132 						      RTM_NEWNETCONF,
2133 						      NLM_F_MULTI,
2134 						      NETCONFA_ALL) < 0) {
2135 				rcu_read_unlock();
2136 				goto done;
2137 			}
2138 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2139 cont:
2140 			idx++;
2141 		}
2142 		rcu_read_unlock();
2143 	}
2144 	if (h == NETDEV_HASHENTRIES) {
2145 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2146 					      net->ipv4.devconf_all,
2147 					      NETLINK_CB(cb->skb).portid,
2148 					      nlh->nlmsg_seq,
2149 					      RTM_NEWNETCONF, NLM_F_MULTI,
2150 					      NETCONFA_ALL) < 0)
2151 			goto done;
2152 		else
2153 			h++;
2154 	}
2155 	if (h == NETDEV_HASHENTRIES + 1) {
2156 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2157 					      net->ipv4.devconf_dflt,
2158 					      NETLINK_CB(cb->skb).portid,
2159 					      nlh->nlmsg_seq,
2160 					      RTM_NEWNETCONF, NLM_F_MULTI,
2161 					      NETCONFA_ALL) < 0)
2162 			goto done;
2163 		else
2164 			h++;
2165 	}
2166 done:
2167 	cb->args[0] = h;
2168 	cb->args[1] = idx;
2169 
2170 	return skb->len;
2171 }
2172 
2173 #ifdef CONFIG_SYSCTL
2174 
2175 static void devinet_copy_dflt_conf(struct net *net, int i)
2176 {
2177 	struct net_device *dev;
2178 
2179 	rcu_read_lock();
2180 	for_each_netdev_rcu(net, dev) {
2181 		struct in_device *in_dev;
2182 
2183 		in_dev = __in_dev_get_rcu(dev);
2184 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2185 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2186 	}
2187 	rcu_read_unlock();
2188 }
2189 
2190 /* called with RTNL locked */
2191 static void inet_forward_change(struct net *net)
2192 {
2193 	struct net_device *dev;
2194 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2195 
2196 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2197 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2198 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2199 				    NETCONFA_FORWARDING,
2200 				    NETCONFA_IFINDEX_ALL,
2201 				    net->ipv4.devconf_all);
2202 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2203 				    NETCONFA_FORWARDING,
2204 				    NETCONFA_IFINDEX_DEFAULT,
2205 				    net->ipv4.devconf_dflt);
2206 
2207 	for_each_netdev(net, dev) {
2208 		struct in_device *in_dev;
2209 
2210 		if (on)
2211 			dev_disable_lro(dev);
2212 
2213 		in_dev = __in_dev_get_rtnl(dev);
2214 		if (in_dev) {
2215 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2216 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2217 						    NETCONFA_FORWARDING,
2218 						    dev->ifindex, &in_dev->cnf);
2219 		}
2220 	}
2221 }
2222 
2223 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2224 {
2225 	if (cnf == net->ipv4.devconf_dflt)
2226 		return NETCONFA_IFINDEX_DEFAULT;
2227 	else if (cnf == net->ipv4.devconf_all)
2228 		return NETCONFA_IFINDEX_ALL;
2229 	else {
2230 		struct in_device *idev
2231 			= container_of(cnf, struct in_device, cnf);
2232 		return idev->dev->ifindex;
2233 	}
2234 }
2235 
2236 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2237 			     void __user *buffer,
2238 			     size_t *lenp, loff_t *ppos)
2239 {
2240 	int old_value = *(int *)ctl->data;
2241 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2242 	int new_value = *(int *)ctl->data;
2243 
2244 	if (write) {
2245 		struct ipv4_devconf *cnf = ctl->extra1;
2246 		struct net *net = ctl->extra2;
2247 		int i = (int *)ctl->data - cnf->data;
2248 		int ifindex;
2249 
2250 		set_bit(i, cnf->state);
2251 
2252 		if (cnf == net->ipv4.devconf_dflt)
2253 			devinet_copy_dflt_conf(net, i);
2254 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2255 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2256 			if ((new_value == 0) && (old_value != 0))
2257 				rt_cache_flush(net);
2258 
2259 		if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2260 		    new_value != old_value)
2261 			rt_cache_flush(net);
2262 
2263 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2264 		    new_value != old_value) {
2265 			ifindex = devinet_conf_ifindex(net, cnf);
2266 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2267 						    NETCONFA_RP_FILTER,
2268 						    ifindex, cnf);
2269 		}
2270 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2271 		    new_value != old_value) {
2272 			ifindex = devinet_conf_ifindex(net, cnf);
2273 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2274 						    NETCONFA_PROXY_NEIGH,
2275 						    ifindex, cnf);
2276 		}
2277 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2278 		    new_value != old_value) {
2279 			ifindex = devinet_conf_ifindex(net, cnf);
2280 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2281 						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2282 						    ifindex, cnf);
2283 		}
2284 	}
2285 
2286 	return ret;
2287 }
2288 
2289 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2290 				  void __user *buffer,
2291 				  size_t *lenp, loff_t *ppos)
2292 {
2293 	int *valp = ctl->data;
2294 	int val = *valp;
2295 	loff_t pos = *ppos;
2296 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2297 
2298 	if (write && *valp != val) {
2299 		struct net *net = ctl->extra2;
2300 
2301 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2302 			if (!rtnl_trylock()) {
2303 				/* Restore the original values before restarting */
2304 				*valp = val;
2305 				*ppos = pos;
2306 				return restart_syscall();
2307 			}
2308 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2309 				inet_forward_change(net);
2310 			} else {
2311 				struct ipv4_devconf *cnf = ctl->extra1;
2312 				struct in_device *idev =
2313 					container_of(cnf, struct in_device, cnf);
2314 				if (*valp)
2315 					dev_disable_lro(idev->dev);
2316 				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2317 							    NETCONFA_FORWARDING,
2318 							    idev->dev->ifindex,
2319 							    cnf);
2320 			}
2321 			rtnl_unlock();
2322 			rt_cache_flush(net);
2323 		} else
2324 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2325 						    NETCONFA_FORWARDING,
2326 						    NETCONFA_IFINDEX_DEFAULT,
2327 						    net->ipv4.devconf_dflt);
2328 	}
2329 
2330 	return ret;
2331 }
2332 
2333 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2334 				void __user *buffer,
2335 				size_t *lenp, loff_t *ppos)
2336 {
2337 	int *valp = ctl->data;
2338 	int val = *valp;
2339 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2340 	struct net *net = ctl->extra2;
2341 
2342 	if (write && *valp != val)
2343 		rt_cache_flush(net);
2344 
2345 	return ret;
2346 }
2347 
2348 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2349 	{ \
2350 		.procname	= name, \
2351 		.data		= ipv4_devconf.data + \
2352 				  IPV4_DEVCONF_ ## attr - 1, \
2353 		.maxlen		= sizeof(int), \
2354 		.mode		= mval, \
2355 		.proc_handler	= proc, \
2356 		.extra1		= &ipv4_devconf, \
2357 	}
2358 
2359 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2360 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2361 
2362 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2363 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2364 
2365 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2366 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2367 
2368 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2369 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2370 
2371 static struct devinet_sysctl_table {
2372 	struct ctl_table_header *sysctl_header;
2373 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2374 } devinet_sysctl = {
2375 	.devinet_vars = {
2376 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2377 					     devinet_sysctl_forward),
2378 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2379 		DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2380 
2381 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2382 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2383 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2384 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2385 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2386 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2387 					"accept_source_route"),
2388 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2389 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2390 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2391 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2392 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2393 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2394 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2395 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2396 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2397 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2398 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2399 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2400 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2401 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2402 					"force_igmp_version"),
2403 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2404 					"igmpv2_unsolicited_report_interval"),
2405 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2406 					"igmpv3_unsolicited_report_interval"),
2407 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2408 					"ignore_routes_with_linkdown"),
2409 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2410 					"drop_gratuitous_arp"),
2411 
2412 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2413 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2414 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2415 					      "promote_secondaries"),
2416 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2417 					      "route_localnet"),
2418 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2419 					      "drop_unicast_in_l2_multicast"),
2420 	},
2421 };
2422 
2423 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2424 				     int ifindex, struct ipv4_devconf *p)
2425 {
2426 	int i;
2427 	struct devinet_sysctl_table *t;
2428 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2429 
2430 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2431 	if (!t)
2432 		goto out;
2433 
2434 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2435 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2436 		t->devinet_vars[i].extra1 = p;
2437 		t->devinet_vars[i].extra2 = net;
2438 	}
2439 
2440 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2441 
2442 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2443 	if (!t->sysctl_header)
2444 		goto free;
2445 
2446 	p->sysctl = t;
2447 
2448 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2449 				    ifindex, p);
2450 	return 0;
2451 
2452 free:
2453 	kfree(t);
2454 out:
2455 	return -ENOBUFS;
2456 }
2457 
2458 static void __devinet_sysctl_unregister(struct net *net,
2459 					struct ipv4_devconf *cnf, int ifindex)
2460 {
2461 	struct devinet_sysctl_table *t = cnf->sysctl;
2462 
2463 	if (t) {
2464 		cnf->sysctl = NULL;
2465 		unregister_net_sysctl_table(t->sysctl_header);
2466 		kfree(t);
2467 	}
2468 
2469 	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2470 }
2471 
2472 static int devinet_sysctl_register(struct in_device *idev)
2473 {
2474 	int err;
2475 
2476 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2477 		return -EINVAL;
2478 
2479 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2480 	if (err)
2481 		return err;
2482 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2483 					idev->dev->ifindex, &idev->cnf);
2484 	if (err)
2485 		neigh_sysctl_unregister(idev->arp_parms);
2486 	return err;
2487 }
2488 
2489 static void devinet_sysctl_unregister(struct in_device *idev)
2490 {
2491 	struct net *net = dev_net(idev->dev);
2492 
2493 	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2494 	neigh_sysctl_unregister(idev->arp_parms);
2495 }
2496 
2497 static struct ctl_table ctl_forward_entry[] = {
2498 	{
2499 		.procname	= "ip_forward",
2500 		.data		= &ipv4_devconf.data[
2501 					IPV4_DEVCONF_FORWARDING - 1],
2502 		.maxlen		= sizeof(int),
2503 		.mode		= 0644,
2504 		.proc_handler	= devinet_sysctl_forward,
2505 		.extra1		= &ipv4_devconf,
2506 		.extra2		= &init_net,
2507 	},
2508 	{ },
2509 };
2510 #endif
2511 
2512 static __net_init int devinet_init_net(struct net *net)
2513 {
2514 	int err;
2515 	struct ipv4_devconf *all, *dflt;
2516 #ifdef CONFIG_SYSCTL
2517 	struct ctl_table *tbl = ctl_forward_entry;
2518 	struct ctl_table_header *forw_hdr;
2519 #endif
2520 
2521 	err = -ENOMEM;
2522 	all = &ipv4_devconf;
2523 	dflt = &ipv4_devconf_dflt;
2524 
2525 	if (!net_eq(net, &init_net)) {
2526 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2527 		if (!all)
2528 			goto err_alloc_all;
2529 
2530 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2531 		if (!dflt)
2532 			goto err_alloc_dflt;
2533 
2534 #ifdef CONFIG_SYSCTL
2535 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2536 		if (!tbl)
2537 			goto err_alloc_ctl;
2538 
2539 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2540 		tbl[0].extra1 = all;
2541 		tbl[0].extra2 = net;
2542 #endif
2543 	}
2544 
2545 #ifdef CONFIG_SYSCTL
2546 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2547 	if (err < 0)
2548 		goto err_reg_all;
2549 
2550 	err = __devinet_sysctl_register(net, "default",
2551 					NETCONFA_IFINDEX_DEFAULT, dflt);
2552 	if (err < 0)
2553 		goto err_reg_dflt;
2554 
2555 	err = -ENOMEM;
2556 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2557 	if (!forw_hdr)
2558 		goto err_reg_ctl;
2559 	net->ipv4.forw_hdr = forw_hdr;
2560 #endif
2561 
2562 	net->ipv4.devconf_all = all;
2563 	net->ipv4.devconf_dflt = dflt;
2564 	return 0;
2565 
2566 #ifdef CONFIG_SYSCTL
2567 err_reg_ctl:
2568 	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2569 err_reg_dflt:
2570 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2571 err_reg_all:
2572 	if (tbl != ctl_forward_entry)
2573 		kfree(tbl);
2574 err_alloc_ctl:
2575 #endif
2576 	if (dflt != &ipv4_devconf_dflt)
2577 		kfree(dflt);
2578 err_alloc_dflt:
2579 	if (all != &ipv4_devconf)
2580 		kfree(all);
2581 err_alloc_all:
2582 	return err;
2583 }
2584 
2585 static __net_exit void devinet_exit_net(struct net *net)
2586 {
2587 #ifdef CONFIG_SYSCTL
2588 	struct ctl_table *tbl;
2589 
2590 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2591 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2592 	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2593 				    NETCONFA_IFINDEX_DEFAULT);
2594 	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2595 				    NETCONFA_IFINDEX_ALL);
2596 	kfree(tbl);
2597 #endif
2598 	kfree(net->ipv4.devconf_dflt);
2599 	kfree(net->ipv4.devconf_all);
2600 }
2601 
2602 static __net_initdata struct pernet_operations devinet_ops = {
2603 	.init = devinet_init_net,
2604 	.exit = devinet_exit_net,
2605 };
2606 
2607 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2608 	.family		  = AF_INET,
2609 	.fill_link_af	  = inet_fill_link_af,
2610 	.get_link_af_size = inet_get_link_af_size,
2611 	.validate_link_af = inet_validate_link_af,
2612 	.set_link_af	  = inet_set_link_af,
2613 };
2614 
2615 void __init devinet_init(void)
2616 {
2617 	int i;
2618 
2619 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2620 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2621 
2622 	register_pernet_subsys(&devinet_ops);
2623 
2624 	register_gifconf(PF_INET, inet_gifconf);
2625 	register_netdevice_notifier(&ip_netdev_notifier);
2626 
2627 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2628 
2629 	rtnl_af_register(&inet_af_ops);
2630 
2631 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2632 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2633 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2634 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2635 		      inet_netconf_dump_devconf, 0);
2636 }
2637