xref: /linux/net/ipv4/devinet.c (revision c73690ca16b1dd17b5e45d23fb2d76d083fb03bf)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/sched/signal.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 #include <linux/netconf.h>
60 
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68 
69 static struct ipv4_devconf ipv4_devconf = {
70 	.data = {
71 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
76 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
77 	},
78 };
79 
80 static struct ipv4_devconf ipv4_devconf_dflt = {
81 	.data = {
82 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
83 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
86 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
87 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
88 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
89 	},
90 };
91 
92 #define IPV4_DEVCONF_DFLT(net, attr) \
93 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
94 
95 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
96 	[IFA_LOCAL]     	= { .type = NLA_U32 },
97 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
98 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
99 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
100 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
101 	[IFA_FLAGS]		= { .type = NLA_U32 },
102 };
103 
104 #define IN4_ADDR_HSIZE_SHIFT	8
105 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
106 
107 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
108 
109 static u32 inet_addr_hash(const struct net *net, __be32 addr)
110 {
111 	u32 val = (__force u32) addr ^ net_hash_mix(net);
112 
113 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
114 }
115 
116 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
117 {
118 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
119 
120 	ASSERT_RTNL();
121 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
122 }
123 
124 static void inet_hash_remove(struct in_ifaddr *ifa)
125 {
126 	ASSERT_RTNL();
127 	hlist_del_init_rcu(&ifa->hash);
128 }
129 
130 /**
131  * __ip_dev_find - find the first device with a given source address.
132  * @net: the net namespace
133  * @addr: the source address
134  * @devref: if true, take a reference on the found device
135  *
136  * If a caller uses devref=false, it should be protected by RCU, or RTNL
137  */
138 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
139 {
140 	u32 hash = inet_addr_hash(net, addr);
141 	struct net_device *result = NULL;
142 	struct in_ifaddr *ifa;
143 
144 	rcu_read_lock();
145 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
146 		if (ifa->ifa_local == addr) {
147 			struct net_device *dev = ifa->ifa_dev->dev;
148 
149 			if (!net_eq(dev_net(dev), net))
150 				continue;
151 			result = dev;
152 			break;
153 		}
154 	}
155 	if (!result) {
156 		struct flowi4 fl4 = { .daddr = addr };
157 		struct fib_result res = { 0 };
158 		struct fib_table *local;
159 
160 		/* Fallback to FIB local table so that communication
161 		 * over loopback subnets work.
162 		 */
163 		local = fib_get_table(net, RT_TABLE_LOCAL);
164 		if (local &&
165 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
166 		    res.type == RTN_LOCAL)
167 			result = FIB_RES_DEV(res);
168 	}
169 	if (result && devref)
170 		dev_hold(result);
171 	rcu_read_unlock();
172 	return result;
173 }
174 EXPORT_SYMBOL(__ip_dev_find);
175 
176 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
177 
178 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
179 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
180 			 int destroy);
181 #ifdef CONFIG_SYSCTL
182 static int devinet_sysctl_register(struct in_device *idev);
183 static void devinet_sysctl_unregister(struct in_device *idev);
184 #else
185 static int devinet_sysctl_register(struct in_device *idev)
186 {
187 	return 0;
188 }
189 static void devinet_sysctl_unregister(struct in_device *idev)
190 {
191 }
192 #endif
193 
194 /* Locks all the inet devices. */
195 
196 static struct in_ifaddr *inet_alloc_ifa(void)
197 {
198 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
199 }
200 
201 static void inet_rcu_free_ifa(struct rcu_head *head)
202 {
203 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
204 	if (ifa->ifa_dev)
205 		in_dev_put(ifa->ifa_dev);
206 	kfree(ifa);
207 }
208 
209 static void inet_free_ifa(struct in_ifaddr *ifa)
210 {
211 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
212 }
213 
214 void in_dev_finish_destroy(struct in_device *idev)
215 {
216 	struct net_device *dev = idev->dev;
217 
218 	WARN_ON(idev->ifa_list);
219 	WARN_ON(idev->mc_list);
220 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
221 #ifdef NET_REFCNT_DEBUG
222 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
223 #endif
224 	dev_put(dev);
225 	if (!idev->dead)
226 		pr_err("Freeing alive in_device %p\n", idev);
227 	else
228 		kfree(idev);
229 }
230 EXPORT_SYMBOL(in_dev_finish_destroy);
231 
232 static struct in_device *inetdev_init(struct net_device *dev)
233 {
234 	struct in_device *in_dev;
235 	int err = -ENOMEM;
236 
237 	ASSERT_RTNL();
238 
239 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
240 	if (!in_dev)
241 		goto out;
242 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
243 			sizeof(in_dev->cnf));
244 	in_dev->cnf.sysctl = NULL;
245 	in_dev->dev = dev;
246 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
247 	if (!in_dev->arp_parms)
248 		goto out_kfree;
249 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
250 		dev_disable_lro(dev);
251 	/* Reference in_dev->dev */
252 	dev_hold(dev);
253 	/* Account for reference dev->ip_ptr (below) */
254 	in_dev_hold(in_dev);
255 
256 	err = devinet_sysctl_register(in_dev);
257 	if (err) {
258 		in_dev->dead = 1;
259 		in_dev_put(in_dev);
260 		in_dev = NULL;
261 		goto out;
262 	}
263 	ip_mc_init_dev(in_dev);
264 	if (dev->flags & IFF_UP)
265 		ip_mc_up(in_dev);
266 
267 	/* we can receive as soon as ip_ptr is set -- do this last */
268 	rcu_assign_pointer(dev->ip_ptr, in_dev);
269 out:
270 	return in_dev ?: ERR_PTR(err);
271 out_kfree:
272 	kfree(in_dev);
273 	in_dev = NULL;
274 	goto out;
275 }
276 
277 static void in_dev_rcu_put(struct rcu_head *head)
278 {
279 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
280 	in_dev_put(idev);
281 }
282 
283 static void inetdev_destroy(struct in_device *in_dev)
284 {
285 	struct in_ifaddr *ifa;
286 	struct net_device *dev;
287 
288 	ASSERT_RTNL();
289 
290 	dev = in_dev->dev;
291 
292 	in_dev->dead = 1;
293 
294 	ip_mc_destroy_dev(in_dev);
295 
296 	while ((ifa = in_dev->ifa_list) != NULL) {
297 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
298 		inet_free_ifa(ifa);
299 	}
300 
301 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
302 
303 	devinet_sysctl_unregister(in_dev);
304 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
305 	arp_ifdown(dev);
306 
307 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
308 }
309 
310 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
311 {
312 	rcu_read_lock();
313 	for_primary_ifa(in_dev) {
314 		if (inet_ifa_match(a, ifa)) {
315 			if (!b || inet_ifa_match(b, ifa)) {
316 				rcu_read_unlock();
317 				return 1;
318 			}
319 		}
320 	} endfor_ifa(in_dev);
321 	rcu_read_unlock();
322 	return 0;
323 }
324 
325 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
326 			 int destroy, struct nlmsghdr *nlh, u32 portid)
327 {
328 	struct in_ifaddr *promote = NULL;
329 	struct in_ifaddr *ifa, *ifa1 = *ifap;
330 	struct in_ifaddr *last_prim = in_dev->ifa_list;
331 	struct in_ifaddr *prev_prom = NULL;
332 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
333 
334 	ASSERT_RTNL();
335 
336 	if (in_dev->dead)
337 		goto no_promotions;
338 
339 	/* 1. Deleting primary ifaddr forces deletion all secondaries
340 	 * unless alias promotion is set
341 	 **/
342 
343 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
344 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
345 
346 		while ((ifa = *ifap1) != NULL) {
347 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
348 			    ifa1->ifa_scope <= ifa->ifa_scope)
349 				last_prim = ifa;
350 
351 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
352 			    ifa1->ifa_mask != ifa->ifa_mask ||
353 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
354 				ifap1 = &ifa->ifa_next;
355 				prev_prom = ifa;
356 				continue;
357 			}
358 
359 			if (!do_promote) {
360 				inet_hash_remove(ifa);
361 				*ifap1 = ifa->ifa_next;
362 
363 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
364 				blocking_notifier_call_chain(&inetaddr_chain,
365 						NETDEV_DOWN, ifa);
366 				inet_free_ifa(ifa);
367 			} else {
368 				promote = ifa;
369 				break;
370 			}
371 		}
372 	}
373 
374 	/* On promotion all secondaries from subnet are changing
375 	 * the primary IP, we must remove all their routes silently
376 	 * and later to add them back with new prefsrc. Do this
377 	 * while all addresses are on the device list.
378 	 */
379 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
380 		if (ifa1->ifa_mask == ifa->ifa_mask &&
381 		    inet_ifa_match(ifa1->ifa_address, ifa))
382 			fib_del_ifaddr(ifa, ifa1);
383 	}
384 
385 no_promotions:
386 	/* 2. Unlink it */
387 
388 	*ifap = ifa1->ifa_next;
389 	inet_hash_remove(ifa1);
390 
391 	/* 3. Announce address deletion */
392 
393 	/* Send message first, then call notifier.
394 	   At first sight, FIB update triggered by notifier
395 	   will refer to already deleted ifaddr, that could confuse
396 	   netlink listeners. It is not true: look, gated sees
397 	   that route deleted and if it still thinks that ifaddr
398 	   is valid, it will try to restore deleted routes... Grr.
399 	   So that, this order is correct.
400 	 */
401 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
402 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
403 
404 	if (promote) {
405 		struct in_ifaddr *next_sec = promote->ifa_next;
406 
407 		if (prev_prom) {
408 			prev_prom->ifa_next = promote->ifa_next;
409 			promote->ifa_next = last_prim->ifa_next;
410 			last_prim->ifa_next = promote;
411 		}
412 
413 		promote->ifa_flags &= ~IFA_F_SECONDARY;
414 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
415 		blocking_notifier_call_chain(&inetaddr_chain,
416 				NETDEV_UP, promote);
417 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
418 			if (ifa1->ifa_mask != ifa->ifa_mask ||
419 			    !inet_ifa_match(ifa1->ifa_address, ifa))
420 					continue;
421 			fib_add_ifaddr(ifa);
422 		}
423 
424 	}
425 	if (destroy)
426 		inet_free_ifa(ifa1);
427 }
428 
429 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
430 			 int destroy)
431 {
432 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
433 }
434 
435 static void check_lifetime(struct work_struct *work);
436 
437 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
438 
439 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
440 			     u32 portid)
441 {
442 	struct in_device *in_dev = ifa->ifa_dev;
443 	struct in_ifaddr *ifa1, **ifap, **last_primary;
444 
445 	ASSERT_RTNL();
446 
447 	if (!ifa->ifa_local) {
448 		inet_free_ifa(ifa);
449 		return 0;
450 	}
451 
452 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
453 	last_primary = &in_dev->ifa_list;
454 
455 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
456 	     ifap = &ifa1->ifa_next) {
457 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
458 		    ifa->ifa_scope <= ifa1->ifa_scope)
459 			last_primary = &ifa1->ifa_next;
460 		if (ifa1->ifa_mask == ifa->ifa_mask &&
461 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
462 			if (ifa1->ifa_local == ifa->ifa_local) {
463 				inet_free_ifa(ifa);
464 				return -EEXIST;
465 			}
466 			if (ifa1->ifa_scope != ifa->ifa_scope) {
467 				inet_free_ifa(ifa);
468 				return -EINVAL;
469 			}
470 			ifa->ifa_flags |= IFA_F_SECONDARY;
471 		}
472 	}
473 
474 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
475 		prandom_seed((__force u32) ifa->ifa_local);
476 		ifap = last_primary;
477 	}
478 
479 	ifa->ifa_next = *ifap;
480 	*ifap = ifa;
481 
482 	inet_hash_insert(dev_net(in_dev->dev), ifa);
483 
484 	cancel_delayed_work(&check_lifetime_work);
485 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
486 
487 	/* Send message first, then call notifier.
488 	   Notifier will trigger FIB update, so that
489 	   listeners of netlink will know about new ifaddr */
490 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
491 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
492 
493 	return 0;
494 }
495 
496 static int inet_insert_ifa(struct in_ifaddr *ifa)
497 {
498 	return __inet_insert_ifa(ifa, NULL, 0);
499 }
500 
501 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
502 {
503 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
504 
505 	ASSERT_RTNL();
506 
507 	if (!in_dev) {
508 		inet_free_ifa(ifa);
509 		return -ENOBUFS;
510 	}
511 	ipv4_devconf_setall(in_dev);
512 	neigh_parms_data_state_setall(in_dev->arp_parms);
513 	if (ifa->ifa_dev != in_dev) {
514 		WARN_ON(ifa->ifa_dev);
515 		in_dev_hold(in_dev);
516 		ifa->ifa_dev = in_dev;
517 	}
518 	if (ipv4_is_loopback(ifa->ifa_local))
519 		ifa->ifa_scope = RT_SCOPE_HOST;
520 	return inet_insert_ifa(ifa);
521 }
522 
523 /* Caller must hold RCU or RTNL :
524  * We dont take a reference on found in_device
525  */
526 struct in_device *inetdev_by_index(struct net *net, int ifindex)
527 {
528 	struct net_device *dev;
529 	struct in_device *in_dev = NULL;
530 
531 	rcu_read_lock();
532 	dev = dev_get_by_index_rcu(net, ifindex);
533 	if (dev)
534 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
535 	rcu_read_unlock();
536 	return in_dev;
537 }
538 EXPORT_SYMBOL(inetdev_by_index);
539 
540 /* Called only from RTNL semaphored context. No locks. */
541 
542 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
543 				    __be32 mask)
544 {
545 	ASSERT_RTNL();
546 
547 	for_primary_ifa(in_dev) {
548 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
549 			return ifa;
550 	} endfor_ifa(in_dev);
551 	return NULL;
552 }
553 
554 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
555 {
556 	struct ip_mreqn mreq = {
557 		.imr_multiaddr.s_addr = ifa->ifa_address,
558 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
559 	};
560 	int ret;
561 
562 	ASSERT_RTNL();
563 
564 	lock_sock(sk);
565 	if (join)
566 		ret = ip_mc_join_group(sk, &mreq);
567 	else
568 		ret = ip_mc_leave_group(sk, &mreq);
569 	release_sock(sk);
570 
571 	return ret;
572 }
573 
574 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
575 {
576 	struct net *net = sock_net(skb->sk);
577 	struct nlattr *tb[IFA_MAX+1];
578 	struct in_device *in_dev;
579 	struct ifaddrmsg *ifm;
580 	struct in_ifaddr *ifa, **ifap;
581 	int err = -EINVAL;
582 
583 	ASSERT_RTNL();
584 
585 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
586 			  NULL);
587 	if (err < 0)
588 		goto errout;
589 
590 	ifm = nlmsg_data(nlh);
591 	in_dev = inetdev_by_index(net, ifm->ifa_index);
592 	if (!in_dev) {
593 		err = -ENODEV;
594 		goto errout;
595 	}
596 
597 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
598 	     ifap = &ifa->ifa_next) {
599 		if (tb[IFA_LOCAL] &&
600 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
601 			continue;
602 
603 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
604 			continue;
605 
606 		if (tb[IFA_ADDRESS] &&
607 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
608 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
609 			continue;
610 
611 		if (ipv4_is_multicast(ifa->ifa_address))
612 			ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
613 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
614 		return 0;
615 	}
616 
617 	err = -EADDRNOTAVAIL;
618 errout:
619 	return err;
620 }
621 
622 #define INFINITY_LIFE_TIME	0xFFFFFFFF
623 
624 static void check_lifetime(struct work_struct *work)
625 {
626 	unsigned long now, next, next_sec, next_sched;
627 	struct in_ifaddr *ifa;
628 	struct hlist_node *n;
629 	int i;
630 
631 	now = jiffies;
632 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
633 
634 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
635 		bool change_needed = false;
636 
637 		rcu_read_lock();
638 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
639 			unsigned long age;
640 
641 			if (ifa->ifa_flags & IFA_F_PERMANENT)
642 				continue;
643 
644 			/* We try to batch several events at once. */
645 			age = (now - ifa->ifa_tstamp +
646 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
647 
648 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
649 			    age >= ifa->ifa_valid_lft) {
650 				change_needed = true;
651 			} else if (ifa->ifa_preferred_lft ==
652 				   INFINITY_LIFE_TIME) {
653 				continue;
654 			} else if (age >= ifa->ifa_preferred_lft) {
655 				if (time_before(ifa->ifa_tstamp +
656 						ifa->ifa_valid_lft * HZ, next))
657 					next = ifa->ifa_tstamp +
658 					       ifa->ifa_valid_lft * HZ;
659 
660 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
661 					change_needed = true;
662 			} else if (time_before(ifa->ifa_tstamp +
663 					       ifa->ifa_preferred_lft * HZ,
664 					       next)) {
665 				next = ifa->ifa_tstamp +
666 				       ifa->ifa_preferred_lft * HZ;
667 			}
668 		}
669 		rcu_read_unlock();
670 		if (!change_needed)
671 			continue;
672 		rtnl_lock();
673 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
674 			unsigned long age;
675 
676 			if (ifa->ifa_flags & IFA_F_PERMANENT)
677 				continue;
678 
679 			/* We try to batch several events at once. */
680 			age = (now - ifa->ifa_tstamp +
681 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
682 
683 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
684 			    age >= ifa->ifa_valid_lft) {
685 				struct in_ifaddr **ifap;
686 
687 				for (ifap = &ifa->ifa_dev->ifa_list;
688 				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
689 					if (*ifap == ifa) {
690 						inet_del_ifa(ifa->ifa_dev,
691 							     ifap, 1);
692 						break;
693 					}
694 				}
695 			} else if (ifa->ifa_preferred_lft !=
696 				   INFINITY_LIFE_TIME &&
697 				   age >= ifa->ifa_preferred_lft &&
698 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
699 				ifa->ifa_flags |= IFA_F_DEPRECATED;
700 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
701 			}
702 		}
703 		rtnl_unlock();
704 	}
705 
706 	next_sec = round_jiffies_up(next);
707 	next_sched = next;
708 
709 	/* If rounded timeout is accurate enough, accept it. */
710 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
711 		next_sched = next_sec;
712 
713 	now = jiffies;
714 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
715 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
716 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
717 
718 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
719 			next_sched - now);
720 }
721 
722 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
723 			     __u32 prefered_lft)
724 {
725 	unsigned long timeout;
726 
727 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
728 
729 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
730 	if (addrconf_finite_timeout(timeout))
731 		ifa->ifa_valid_lft = timeout;
732 	else
733 		ifa->ifa_flags |= IFA_F_PERMANENT;
734 
735 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
736 	if (addrconf_finite_timeout(timeout)) {
737 		if (timeout == 0)
738 			ifa->ifa_flags |= IFA_F_DEPRECATED;
739 		ifa->ifa_preferred_lft = timeout;
740 	}
741 	ifa->ifa_tstamp = jiffies;
742 	if (!ifa->ifa_cstamp)
743 		ifa->ifa_cstamp = ifa->ifa_tstamp;
744 }
745 
746 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
747 				       __u32 *pvalid_lft, __u32 *pprefered_lft)
748 {
749 	struct nlattr *tb[IFA_MAX+1];
750 	struct in_ifaddr *ifa;
751 	struct ifaddrmsg *ifm;
752 	struct net_device *dev;
753 	struct in_device *in_dev;
754 	int err;
755 
756 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
757 			  NULL);
758 	if (err < 0)
759 		goto errout;
760 
761 	ifm = nlmsg_data(nlh);
762 	err = -EINVAL;
763 	if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
764 		goto errout;
765 
766 	dev = __dev_get_by_index(net, ifm->ifa_index);
767 	err = -ENODEV;
768 	if (!dev)
769 		goto errout;
770 
771 	in_dev = __in_dev_get_rtnl(dev);
772 	err = -ENOBUFS;
773 	if (!in_dev)
774 		goto errout;
775 
776 	ifa = inet_alloc_ifa();
777 	if (!ifa)
778 		/*
779 		 * A potential indev allocation can be left alive, it stays
780 		 * assigned to its device and is destroy with it.
781 		 */
782 		goto errout;
783 
784 	ipv4_devconf_setall(in_dev);
785 	neigh_parms_data_state_setall(in_dev->arp_parms);
786 	in_dev_hold(in_dev);
787 
788 	if (!tb[IFA_ADDRESS])
789 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
790 
791 	INIT_HLIST_NODE(&ifa->hash);
792 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
793 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
794 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
795 					 ifm->ifa_flags;
796 	ifa->ifa_scope = ifm->ifa_scope;
797 	ifa->ifa_dev = in_dev;
798 
799 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
800 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
801 
802 	if (tb[IFA_BROADCAST])
803 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
804 
805 	if (tb[IFA_LABEL])
806 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
807 	else
808 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
809 
810 	if (tb[IFA_CACHEINFO]) {
811 		struct ifa_cacheinfo *ci;
812 
813 		ci = nla_data(tb[IFA_CACHEINFO]);
814 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
815 			err = -EINVAL;
816 			goto errout_free;
817 		}
818 		*pvalid_lft = ci->ifa_valid;
819 		*pprefered_lft = ci->ifa_prefered;
820 	}
821 
822 	return ifa;
823 
824 errout_free:
825 	inet_free_ifa(ifa);
826 errout:
827 	return ERR_PTR(err);
828 }
829 
830 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
831 {
832 	struct in_device *in_dev = ifa->ifa_dev;
833 	struct in_ifaddr *ifa1, **ifap;
834 
835 	if (!ifa->ifa_local)
836 		return NULL;
837 
838 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
839 	     ifap = &ifa1->ifa_next) {
840 		if (ifa1->ifa_mask == ifa->ifa_mask &&
841 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
842 		    ifa1->ifa_local == ifa->ifa_local)
843 			return ifa1;
844 	}
845 	return NULL;
846 }
847 
848 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
849 {
850 	struct net *net = sock_net(skb->sk);
851 	struct in_ifaddr *ifa;
852 	struct in_ifaddr *ifa_existing;
853 	__u32 valid_lft = INFINITY_LIFE_TIME;
854 	__u32 prefered_lft = INFINITY_LIFE_TIME;
855 
856 	ASSERT_RTNL();
857 
858 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
859 	if (IS_ERR(ifa))
860 		return PTR_ERR(ifa);
861 
862 	ifa_existing = find_matching_ifa(ifa);
863 	if (!ifa_existing) {
864 		/* It would be best to check for !NLM_F_CREATE here but
865 		 * userspace already relies on not having to provide this.
866 		 */
867 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
868 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
869 			int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
870 					       true, ifa);
871 
872 			if (ret < 0) {
873 				inet_free_ifa(ifa);
874 				return ret;
875 			}
876 		}
877 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
878 	} else {
879 		inet_free_ifa(ifa);
880 
881 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
882 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
883 			return -EEXIST;
884 		ifa = ifa_existing;
885 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
886 		cancel_delayed_work(&check_lifetime_work);
887 		queue_delayed_work(system_power_efficient_wq,
888 				&check_lifetime_work, 0);
889 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
890 	}
891 	return 0;
892 }
893 
894 /*
895  *	Determine a default network mask, based on the IP address.
896  */
897 
898 static int inet_abc_len(__be32 addr)
899 {
900 	int rc = -1;	/* Something else, probably a multicast. */
901 
902 	if (ipv4_is_zeronet(addr))
903 		rc = 0;
904 	else {
905 		__u32 haddr = ntohl(addr);
906 
907 		if (IN_CLASSA(haddr))
908 			rc = 8;
909 		else if (IN_CLASSB(haddr))
910 			rc = 16;
911 		else if (IN_CLASSC(haddr))
912 			rc = 24;
913 	}
914 
915 	return rc;
916 }
917 
918 
919 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
920 {
921 	struct ifreq ifr;
922 	struct sockaddr_in sin_orig;
923 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
924 	struct in_device *in_dev;
925 	struct in_ifaddr **ifap = NULL;
926 	struct in_ifaddr *ifa = NULL;
927 	struct net_device *dev;
928 	char *colon;
929 	int ret = -EFAULT;
930 	int tryaddrmatch = 0;
931 
932 	/*
933 	 *	Fetch the caller's info block into kernel space
934 	 */
935 
936 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
937 		goto out;
938 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
939 
940 	/* save original address for comparison */
941 	memcpy(&sin_orig, sin, sizeof(*sin));
942 
943 	colon = strchr(ifr.ifr_name, ':');
944 	if (colon)
945 		*colon = 0;
946 
947 	dev_load(net, ifr.ifr_name);
948 
949 	switch (cmd) {
950 	case SIOCGIFADDR:	/* Get interface address */
951 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
952 	case SIOCGIFDSTADDR:	/* Get the destination address */
953 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
954 		/* Note that these ioctls will not sleep,
955 		   so that we do not impose a lock.
956 		   One day we will be forced to put shlock here (I mean SMP)
957 		 */
958 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
959 		memset(sin, 0, sizeof(*sin));
960 		sin->sin_family = AF_INET;
961 		break;
962 
963 	case SIOCSIFFLAGS:
964 		ret = -EPERM;
965 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
966 			goto out;
967 		break;
968 	case SIOCSIFADDR:	/* Set interface address (and family) */
969 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
970 	case SIOCSIFDSTADDR:	/* Set the destination address */
971 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
972 		ret = -EPERM;
973 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
974 			goto out;
975 		ret = -EINVAL;
976 		if (sin->sin_family != AF_INET)
977 			goto out;
978 		break;
979 	default:
980 		ret = -EINVAL;
981 		goto out;
982 	}
983 
984 	rtnl_lock();
985 
986 	ret = -ENODEV;
987 	dev = __dev_get_by_name(net, ifr.ifr_name);
988 	if (!dev)
989 		goto done;
990 
991 	if (colon)
992 		*colon = ':';
993 
994 	in_dev = __in_dev_get_rtnl(dev);
995 	if (in_dev) {
996 		if (tryaddrmatch) {
997 			/* Matthias Andree */
998 			/* compare label and address (4.4BSD style) */
999 			/* note: we only do this for a limited set of ioctls
1000 			   and only if the original address family was AF_INET.
1001 			   This is checked above. */
1002 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1003 			     ifap = &ifa->ifa_next) {
1004 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
1005 				    sin_orig.sin_addr.s_addr ==
1006 							ifa->ifa_local) {
1007 					break; /* found */
1008 				}
1009 			}
1010 		}
1011 		/* we didn't get a match, maybe the application is
1012 		   4.3BSD-style and passed in junk so we fall back to
1013 		   comparing just the label */
1014 		if (!ifa) {
1015 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1016 			     ifap = &ifa->ifa_next)
1017 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
1018 					break;
1019 		}
1020 	}
1021 
1022 	ret = -EADDRNOTAVAIL;
1023 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1024 		goto done;
1025 
1026 	switch (cmd) {
1027 	case SIOCGIFADDR:	/* Get interface address */
1028 		sin->sin_addr.s_addr = ifa->ifa_local;
1029 		goto rarok;
1030 
1031 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1032 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1033 		goto rarok;
1034 
1035 	case SIOCGIFDSTADDR:	/* Get the destination address */
1036 		sin->sin_addr.s_addr = ifa->ifa_address;
1037 		goto rarok;
1038 
1039 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1040 		sin->sin_addr.s_addr = ifa->ifa_mask;
1041 		goto rarok;
1042 
1043 	case SIOCSIFFLAGS:
1044 		if (colon) {
1045 			ret = -EADDRNOTAVAIL;
1046 			if (!ifa)
1047 				break;
1048 			ret = 0;
1049 			if (!(ifr.ifr_flags & IFF_UP))
1050 				inet_del_ifa(in_dev, ifap, 1);
1051 			break;
1052 		}
1053 		ret = dev_change_flags(dev, ifr.ifr_flags);
1054 		break;
1055 
1056 	case SIOCSIFADDR:	/* Set interface address (and family) */
1057 		ret = -EINVAL;
1058 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1059 			break;
1060 
1061 		if (!ifa) {
1062 			ret = -ENOBUFS;
1063 			ifa = inet_alloc_ifa();
1064 			if (!ifa)
1065 				break;
1066 			INIT_HLIST_NODE(&ifa->hash);
1067 			if (colon)
1068 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1069 			else
1070 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1071 		} else {
1072 			ret = 0;
1073 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1074 				break;
1075 			inet_del_ifa(in_dev, ifap, 0);
1076 			ifa->ifa_broadcast = 0;
1077 			ifa->ifa_scope = 0;
1078 		}
1079 
1080 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1081 
1082 		if (!(dev->flags & IFF_POINTOPOINT)) {
1083 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1084 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1085 			if ((dev->flags & IFF_BROADCAST) &&
1086 			    ifa->ifa_prefixlen < 31)
1087 				ifa->ifa_broadcast = ifa->ifa_address |
1088 						     ~ifa->ifa_mask;
1089 		} else {
1090 			ifa->ifa_prefixlen = 32;
1091 			ifa->ifa_mask = inet_make_mask(32);
1092 		}
1093 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1094 		ret = inet_set_ifa(dev, ifa);
1095 		break;
1096 
1097 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1098 		ret = 0;
1099 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1100 			inet_del_ifa(in_dev, ifap, 0);
1101 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1102 			inet_insert_ifa(ifa);
1103 		}
1104 		break;
1105 
1106 	case SIOCSIFDSTADDR:	/* Set the destination address */
1107 		ret = 0;
1108 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1109 			break;
1110 		ret = -EINVAL;
1111 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1112 			break;
1113 		ret = 0;
1114 		inet_del_ifa(in_dev, ifap, 0);
1115 		ifa->ifa_address = sin->sin_addr.s_addr;
1116 		inet_insert_ifa(ifa);
1117 		break;
1118 
1119 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1120 
1121 		/*
1122 		 *	The mask we set must be legal.
1123 		 */
1124 		ret = -EINVAL;
1125 		if (bad_mask(sin->sin_addr.s_addr, 0))
1126 			break;
1127 		ret = 0;
1128 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1129 			__be32 old_mask = ifa->ifa_mask;
1130 			inet_del_ifa(in_dev, ifap, 0);
1131 			ifa->ifa_mask = sin->sin_addr.s_addr;
1132 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1133 
1134 			/* See if current broadcast address matches
1135 			 * with current netmask, then recalculate
1136 			 * the broadcast address. Otherwise it's a
1137 			 * funny address, so don't touch it since
1138 			 * the user seems to know what (s)he's doing...
1139 			 */
1140 			if ((dev->flags & IFF_BROADCAST) &&
1141 			    (ifa->ifa_prefixlen < 31) &&
1142 			    (ifa->ifa_broadcast ==
1143 			     (ifa->ifa_local|~old_mask))) {
1144 				ifa->ifa_broadcast = (ifa->ifa_local |
1145 						      ~sin->sin_addr.s_addr);
1146 			}
1147 			inet_insert_ifa(ifa);
1148 		}
1149 		break;
1150 	}
1151 done:
1152 	rtnl_unlock();
1153 out:
1154 	return ret;
1155 rarok:
1156 	rtnl_unlock();
1157 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1158 	goto out;
1159 }
1160 
1161 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1162 {
1163 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1164 	struct in_ifaddr *ifa;
1165 	struct ifreq ifr;
1166 	int done = 0;
1167 
1168 	if (!in_dev)
1169 		goto out;
1170 
1171 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1172 		if (!buf) {
1173 			done += sizeof(ifr);
1174 			continue;
1175 		}
1176 		if (len < (int) sizeof(ifr))
1177 			break;
1178 		memset(&ifr, 0, sizeof(struct ifreq));
1179 		strcpy(ifr.ifr_name, ifa->ifa_label);
1180 
1181 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1182 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1183 								ifa->ifa_local;
1184 
1185 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1186 			done = -EFAULT;
1187 			break;
1188 		}
1189 		buf  += sizeof(struct ifreq);
1190 		len  -= sizeof(struct ifreq);
1191 		done += sizeof(struct ifreq);
1192 	}
1193 out:
1194 	return done;
1195 }
1196 
1197 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1198 				 int scope)
1199 {
1200 	for_primary_ifa(in_dev) {
1201 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1202 		    ifa->ifa_scope <= scope)
1203 			return ifa->ifa_local;
1204 	} endfor_ifa(in_dev);
1205 
1206 	return 0;
1207 }
1208 
1209 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1210 {
1211 	__be32 addr = 0;
1212 	struct in_device *in_dev;
1213 	struct net *net = dev_net(dev);
1214 	int master_idx;
1215 
1216 	rcu_read_lock();
1217 	in_dev = __in_dev_get_rcu(dev);
1218 	if (!in_dev)
1219 		goto no_in_dev;
1220 
1221 	for_primary_ifa(in_dev) {
1222 		if (ifa->ifa_scope > scope)
1223 			continue;
1224 		if (!dst || inet_ifa_match(dst, ifa)) {
1225 			addr = ifa->ifa_local;
1226 			break;
1227 		}
1228 		if (!addr)
1229 			addr = ifa->ifa_local;
1230 	} endfor_ifa(in_dev);
1231 
1232 	if (addr)
1233 		goto out_unlock;
1234 no_in_dev:
1235 	master_idx = l3mdev_master_ifindex_rcu(dev);
1236 
1237 	/* For VRFs, the VRF device takes the place of the loopback device,
1238 	 * with addresses on it being preferred.  Note in such cases the
1239 	 * loopback device will be among the devices that fail the master_idx
1240 	 * equality check in the loop below.
1241 	 */
1242 	if (master_idx &&
1243 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1244 	    (in_dev = __in_dev_get_rcu(dev))) {
1245 		addr = in_dev_select_addr(in_dev, scope);
1246 		if (addr)
1247 			goto out_unlock;
1248 	}
1249 
1250 	/* Not loopback addresses on loopback should be preferred
1251 	   in this case. It is important that lo is the first interface
1252 	   in dev_base list.
1253 	 */
1254 	for_each_netdev_rcu(net, dev) {
1255 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1256 			continue;
1257 
1258 		in_dev = __in_dev_get_rcu(dev);
1259 		if (!in_dev)
1260 			continue;
1261 
1262 		addr = in_dev_select_addr(in_dev, scope);
1263 		if (addr)
1264 			goto out_unlock;
1265 	}
1266 out_unlock:
1267 	rcu_read_unlock();
1268 	return addr;
1269 }
1270 EXPORT_SYMBOL(inet_select_addr);
1271 
1272 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1273 			      __be32 local, int scope)
1274 {
1275 	int same = 0;
1276 	__be32 addr = 0;
1277 
1278 	for_ifa(in_dev) {
1279 		if (!addr &&
1280 		    (local == ifa->ifa_local || !local) &&
1281 		    ifa->ifa_scope <= scope) {
1282 			addr = ifa->ifa_local;
1283 			if (same)
1284 				break;
1285 		}
1286 		if (!same) {
1287 			same = (!local || inet_ifa_match(local, ifa)) &&
1288 				(!dst || inet_ifa_match(dst, ifa));
1289 			if (same && addr) {
1290 				if (local || !dst)
1291 					break;
1292 				/* Is the selected addr into dst subnet? */
1293 				if (inet_ifa_match(addr, ifa))
1294 					break;
1295 				/* No, then can we use new local src? */
1296 				if (ifa->ifa_scope <= scope) {
1297 					addr = ifa->ifa_local;
1298 					break;
1299 				}
1300 				/* search for large dst subnet for addr */
1301 				same = 0;
1302 			}
1303 		}
1304 	} endfor_ifa(in_dev);
1305 
1306 	return same ? addr : 0;
1307 }
1308 
1309 /*
1310  * Confirm that local IP address exists using wildcards:
1311  * - net: netns to check, cannot be NULL
1312  * - in_dev: only on this interface, NULL=any interface
1313  * - dst: only in the same subnet as dst, 0=any dst
1314  * - local: address, 0=autoselect the local address
1315  * - scope: maximum allowed scope value for the local address
1316  */
1317 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1318 			 __be32 dst, __be32 local, int scope)
1319 {
1320 	__be32 addr = 0;
1321 	struct net_device *dev;
1322 
1323 	if (in_dev)
1324 		return confirm_addr_indev(in_dev, dst, local, scope);
1325 
1326 	rcu_read_lock();
1327 	for_each_netdev_rcu(net, dev) {
1328 		in_dev = __in_dev_get_rcu(dev);
1329 		if (in_dev) {
1330 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1331 			if (addr)
1332 				break;
1333 		}
1334 	}
1335 	rcu_read_unlock();
1336 
1337 	return addr;
1338 }
1339 EXPORT_SYMBOL(inet_confirm_addr);
1340 
1341 /*
1342  *	Device notifier
1343  */
1344 
1345 int register_inetaddr_notifier(struct notifier_block *nb)
1346 {
1347 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1348 }
1349 EXPORT_SYMBOL(register_inetaddr_notifier);
1350 
1351 int unregister_inetaddr_notifier(struct notifier_block *nb)
1352 {
1353 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1354 }
1355 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1356 
1357 /* Rename ifa_labels for a device name change. Make some effort to preserve
1358  * existing alias numbering and to create unique labels if possible.
1359 */
1360 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1361 {
1362 	struct in_ifaddr *ifa;
1363 	int named = 0;
1364 
1365 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1366 		char old[IFNAMSIZ], *dot;
1367 
1368 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1369 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1370 		if (named++ == 0)
1371 			goto skip;
1372 		dot = strchr(old, ':');
1373 		if (!dot) {
1374 			sprintf(old, ":%d", named);
1375 			dot = old;
1376 		}
1377 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1378 			strcat(ifa->ifa_label, dot);
1379 		else
1380 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1381 skip:
1382 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1383 	}
1384 }
1385 
1386 static bool inetdev_valid_mtu(unsigned int mtu)
1387 {
1388 	return mtu >= 68;
1389 }
1390 
1391 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1392 					struct in_device *in_dev)
1393 
1394 {
1395 	struct in_ifaddr *ifa;
1396 
1397 	for (ifa = in_dev->ifa_list; ifa;
1398 	     ifa = ifa->ifa_next) {
1399 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1400 			 ifa->ifa_local, dev,
1401 			 ifa->ifa_local, NULL,
1402 			 dev->dev_addr, NULL);
1403 	}
1404 }
1405 
1406 /* Called only under RTNL semaphore */
1407 
1408 static int inetdev_event(struct notifier_block *this, unsigned long event,
1409 			 void *ptr)
1410 {
1411 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1412 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1413 
1414 	ASSERT_RTNL();
1415 
1416 	if (!in_dev) {
1417 		if (event == NETDEV_REGISTER) {
1418 			in_dev = inetdev_init(dev);
1419 			if (IS_ERR(in_dev))
1420 				return notifier_from_errno(PTR_ERR(in_dev));
1421 			if (dev->flags & IFF_LOOPBACK) {
1422 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1423 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1424 			}
1425 		} else if (event == NETDEV_CHANGEMTU) {
1426 			/* Re-enabling IP */
1427 			if (inetdev_valid_mtu(dev->mtu))
1428 				in_dev = inetdev_init(dev);
1429 		}
1430 		goto out;
1431 	}
1432 
1433 	switch (event) {
1434 	case NETDEV_REGISTER:
1435 		pr_debug("%s: bug\n", __func__);
1436 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1437 		break;
1438 	case NETDEV_UP:
1439 		if (!inetdev_valid_mtu(dev->mtu))
1440 			break;
1441 		if (dev->flags & IFF_LOOPBACK) {
1442 			struct in_ifaddr *ifa = inet_alloc_ifa();
1443 
1444 			if (ifa) {
1445 				INIT_HLIST_NODE(&ifa->hash);
1446 				ifa->ifa_local =
1447 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1448 				ifa->ifa_prefixlen = 8;
1449 				ifa->ifa_mask = inet_make_mask(8);
1450 				in_dev_hold(in_dev);
1451 				ifa->ifa_dev = in_dev;
1452 				ifa->ifa_scope = RT_SCOPE_HOST;
1453 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1454 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1455 						 INFINITY_LIFE_TIME);
1456 				ipv4_devconf_setall(in_dev);
1457 				neigh_parms_data_state_setall(in_dev->arp_parms);
1458 				inet_insert_ifa(ifa);
1459 			}
1460 		}
1461 		ip_mc_up(in_dev);
1462 		/* fall through */
1463 	case NETDEV_CHANGEADDR:
1464 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1465 			break;
1466 		/* fall through */
1467 	case NETDEV_NOTIFY_PEERS:
1468 		/* Send gratuitous ARP to notify of link change */
1469 		inetdev_send_gratuitous_arp(dev, in_dev);
1470 		break;
1471 	case NETDEV_DOWN:
1472 		ip_mc_down(in_dev);
1473 		break;
1474 	case NETDEV_PRE_TYPE_CHANGE:
1475 		ip_mc_unmap(in_dev);
1476 		break;
1477 	case NETDEV_POST_TYPE_CHANGE:
1478 		ip_mc_remap(in_dev);
1479 		break;
1480 	case NETDEV_CHANGEMTU:
1481 		if (inetdev_valid_mtu(dev->mtu))
1482 			break;
1483 		/* disable IP when MTU is not enough */
1484 	case NETDEV_UNREGISTER:
1485 		inetdev_destroy(in_dev);
1486 		break;
1487 	case NETDEV_CHANGENAME:
1488 		/* Do not notify about label change, this event is
1489 		 * not interesting to applications using netlink.
1490 		 */
1491 		inetdev_changename(dev, in_dev);
1492 
1493 		devinet_sysctl_unregister(in_dev);
1494 		devinet_sysctl_register(in_dev);
1495 		break;
1496 	}
1497 out:
1498 	return NOTIFY_DONE;
1499 }
1500 
1501 static struct notifier_block ip_netdev_notifier = {
1502 	.notifier_call = inetdev_event,
1503 };
1504 
1505 static size_t inet_nlmsg_size(void)
1506 {
1507 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1508 	       + nla_total_size(4) /* IFA_ADDRESS */
1509 	       + nla_total_size(4) /* IFA_LOCAL */
1510 	       + nla_total_size(4) /* IFA_BROADCAST */
1511 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1512 	       + nla_total_size(4)  /* IFA_FLAGS */
1513 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1514 }
1515 
1516 static inline u32 cstamp_delta(unsigned long cstamp)
1517 {
1518 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1519 }
1520 
1521 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1522 			 unsigned long tstamp, u32 preferred, u32 valid)
1523 {
1524 	struct ifa_cacheinfo ci;
1525 
1526 	ci.cstamp = cstamp_delta(cstamp);
1527 	ci.tstamp = cstamp_delta(tstamp);
1528 	ci.ifa_prefered = preferred;
1529 	ci.ifa_valid = valid;
1530 
1531 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1532 }
1533 
1534 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1535 			    u32 portid, u32 seq, int event, unsigned int flags)
1536 {
1537 	struct ifaddrmsg *ifm;
1538 	struct nlmsghdr  *nlh;
1539 	u32 preferred, valid;
1540 
1541 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1542 	if (!nlh)
1543 		return -EMSGSIZE;
1544 
1545 	ifm = nlmsg_data(nlh);
1546 	ifm->ifa_family = AF_INET;
1547 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1548 	ifm->ifa_flags = ifa->ifa_flags;
1549 	ifm->ifa_scope = ifa->ifa_scope;
1550 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1551 
1552 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1553 		preferred = ifa->ifa_preferred_lft;
1554 		valid = ifa->ifa_valid_lft;
1555 		if (preferred != INFINITY_LIFE_TIME) {
1556 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1557 
1558 			if (preferred > tval)
1559 				preferred -= tval;
1560 			else
1561 				preferred = 0;
1562 			if (valid != INFINITY_LIFE_TIME) {
1563 				if (valid > tval)
1564 					valid -= tval;
1565 				else
1566 					valid = 0;
1567 			}
1568 		}
1569 	} else {
1570 		preferred = INFINITY_LIFE_TIME;
1571 		valid = INFINITY_LIFE_TIME;
1572 	}
1573 	if ((ifa->ifa_address &&
1574 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1575 	    (ifa->ifa_local &&
1576 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1577 	    (ifa->ifa_broadcast &&
1578 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1579 	    (ifa->ifa_label[0] &&
1580 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1581 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1582 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1583 			  preferred, valid))
1584 		goto nla_put_failure;
1585 
1586 	nlmsg_end(skb, nlh);
1587 	return 0;
1588 
1589 nla_put_failure:
1590 	nlmsg_cancel(skb, nlh);
1591 	return -EMSGSIZE;
1592 }
1593 
1594 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1595 {
1596 	struct net *net = sock_net(skb->sk);
1597 	int h, s_h;
1598 	int idx, s_idx;
1599 	int ip_idx, s_ip_idx;
1600 	struct net_device *dev;
1601 	struct in_device *in_dev;
1602 	struct in_ifaddr *ifa;
1603 	struct hlist_head *head;
1604 
1605 	s_h = cb->args[0];
1606 	s_idx = idx = cb->args[1];
1607 	s_ip_idx = ip_idx = cb->args[2];
1608 
1609 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1610 		idx = 0;
1611 		head = &net->dev_index_head[h];
1612 		rcu_read_lock();
1613 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1614 			  net->dev_base_seq;
1615 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1616 			if (idx < s_idx)
1617 				goto cont;
1618 			if (h > s_h || idx > s_idx)
1619 				s_ip_idx = 0;
1620 			in_dev = __in_dev_get_rcu(dev);
1621 			if (!in_dev)
1622 				goto cont;
1623 
1624 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1625 			     ifa = ifa->ifa_next, ip_idx++) {
1626 				if (ip_idx < s_ip_idx)
1627 					continue;
1628 				if (inet_fill_ifaddr(skb, ifa,
1629 					     NETLINK_CB(cb->skb).portid,
1630 					     cb->nlh->nlmsg_seq,
1631 					     RTM_NEWADDR, NLM_F_MULTI) < 0) {
1632 					rcu_read_unlock();
1633 					goto done;
1634 				}
1635 				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1636 			}
1637 cont:
1638 			idx++;
1639 		}
1640 		rcu_read_unlock();
1641 	}
1642 
1643 done:
1644 	cb->args[0] = h;
1645 	cb->args[1] = idx;
1646 	cb->args[2] = ip_idx;
1647 
1648 	return skb->len;
1649 }
1650 
1651 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1652 		      u32 portid)
1653 {
1654 	struct sk_buff *skb;
1655 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1656 	int err = -ENOBUFS;
1657 	struct net *net;
1658 
1659 	net = dev_net(ifa->ifa_dev->dev);
1660 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1661 	if (!skb)
1662 		goto errout;
1663 
1664 	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1665 	if (err < 0) {
1666 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1667 		WARN_ON(err == -EMSGSIZE);
1668 		kfree_skb(skb);
1669 		goto errout;
1670 	}
1671 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1672 	return;
1673 errout:
1674 	if (err < 0)
1675 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1676 }
1677 
1678 static size_t inet_get_link_af_size(const struct net_device *dev,
1679 				    u32 ext_filter_mask)
1680 {
1681 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1682 
1683 	if (!in_dev)
1684 		return 0;
1685 
1686 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1687 }
1688 
1689 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1690 			     u32 ext_filter_mask)
1691 {
1692 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1693 	struct nlattr *nla;
1694 	int i;
1695 
1696 	if (!in_dev)
1697 		return -ENODATA;
1698 
1699 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1700 	if (!nla)
1701 		return -EMSGSIZE;
1702 
1703 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1704 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1705 
1706 	return 0;
1707 }
1708 
1709 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1710 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1711 };
1712 
1713 static int inet_validate_link_af(const struct net_device *dev,
1714 				 const struct nlattr *nla)
1715 {
1716 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1717 	int err, rem;
1718 
1719 	if (dev && !__in_dev_get_rtnl(dev))
1720 		return -EAFNOSUPPORT;
1721 
1722 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
1723 	if (err < 0)
1724 		return err;
1725 
1726 	if (tb[IFLA_INET_CONF]) {
1727 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1728 			int cfgid = nla_type(a);
1729 
1730 			if (nla_len(a) < 4)
1731 				return -EINVAL;
1732 
1733 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1734 				return -EINVAL;
1735 		}
1736 	}
1737 
1738 	return 0;
1739 }
1740 
1741 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1742 {
1743 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1744 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1745 	int rem;
1746 
1747 	if (!in_dev)
1748 		return -EAFNOSUPPORT;
1749 
1750 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1751 		BUG();
1752 
1753 	if (tb[IFLA_INET_CONF]) {
1754 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1755 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1756 	}
1757 
1758 	return 0;
1759 }
1760 
1761 static int inet_netconf_msgsize_devconf(int type)
1762 {
1763 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1764 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1765 	bool all = false;
1766 
1767 	if (type == NETCONFA_ALL)
1768 		all = true;
1769 
1770 	if (all || type == NETCONFA_FORWARDING)
1771 		size += nla_total_size(4);
1772 	if (all || type == NETCONFA_RP_FILTER)
1773 		size += nla_total_size(4);
1774 	if (all || type == NETCONFA_MC_FORWARDING)
1775 		size += nla_total_size(4);
1776 	if (all || type == NETCONFA_PROXY_NEIGH)
1777 		size += nla_total_size(4);
1778 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1779 		size += nla_total_size(4);
1780 
1781 	return size;
1782 }
1783 
1784 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1785 				     struct ipv4_devconf *devconf, u32 portid,
1786 				     u32 seq, int event, unsigned int flags,
1787 				     int type)
1788 {
1789 	struct nlmsghdr  *nlh;
1790 	struct netconfmsg *ncm;
1791 	bool all = false;
1792 
1793 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1794 			flags);
1795 	if (!nlh)
1796 		return -EMSGSIZE;
1797 
1798 	if (type == NETCONFA_ALL)
1799 		all = true;
1800 
1801 	ncm = nlmsg_data(nlh);
1802 	ncm->ncm_family = AF_INET;
1803 
1804 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1805 		goto nla_put_failure;
1806 
1807 	if (!devconf)
1808 		goto out;
1809 
1810 	if ((all || type == NETCONFA_FORWARDING) &&
1811 	    nla_put_s32(skb, NETCONFA_FORWARDING,
1812 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1813 		goto nla_put_failure;
1814 	if ((all || type == NETCONFA_RP_FILTER) &&
1815 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1816 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1817 		goto nla_put_failure;
1818 	if ((all || type == NETCONFA_MC_FORWARDING) &&
1819 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1820 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1821 		goto nla_put_failure;
1822 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
1823 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1824 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1825 		goto nla_put_failure;
1826 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1827 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1828 			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1829 		goto nla_put_failure;
1830 
1831 out:
1832 	nlmsg_end(skb, nlh);
1833 	return 0;
1834 
1835 nla_put_failure:
1836 	nlmsg_cancel(skb, nlh);
1837 	return -EMSGSIZE;
1838 }
1839 
1840 void inet_netconf_notify_devconf(struct net *net, int event, int type,
1841 				 int ifindex, struct ipv4_devconf *devconf)
1842 {
1843 	struct sk_buff *skb;
1844 	int err = -ENOBUFS;
1845 
1846 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
1847 	if (!skb)
1848 		goto errout;
1849 
1850 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1851 					event, 0, type);
1852 	if (err < 0) {
1853 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1854 		WARN_ON(err == -EMSGSIZE);
1855 		kfree_skb(skb);
1856 		goto errout;
1857 	}
1858 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
1859 	return;
1860 errout:
1861 	if (err < 0)
1862 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1863 }
1864 
1865 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1866 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1867 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1868 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1869 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
1870 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
1871 };
1872 
1873 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1874 				    struct nlmsghdr *nlh)
1875 {
1876 	struct net *net = sock_net(in_skb->sk);
1877 	struct nlattr *tb[NETCONFA_MAX+1];
1878 	struct netconfmsg *ncm;
1879 	struct sk_buff *skb;
1880 	struct ipv4_devconf *devconf;
1881 	struct in_device *in_dev;
1882 	struct net_device *dev;
1883 	int ifindex;
1884 	int err;
1885 
1886 	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1887 			  devconf_ipv4_policy, NULL);
1888 	if (err < 0)
1889 		goto errout;
1890 
1891 	err = -EINVAL;
1892 	if (!tb[NETCONFA_IFINDEX])
1893 		goto errout;
1894 
1895 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1896 	switch (ifindex) {
1897 	case NETCONFA_IFINDEX_ALL:
1898 		devconf = net->ipv4.devconf_all;
1899 		break;
1900 	case NETCONFA_IFINDEX_DEFAULT:
1901 		devconf = net->ipv4.devconf_dflt;
1902 		break;
1903 	default:
1904 		dev = __dev_get_by_index(net, ifindex);
1905 		if (!dev)
1906 			goto errout;
1907 		in_dev = __in_dev_get_rtnl(dev);
1908 		if (!in_dev)
1909 			goto errout;
1910 		devconf = &in_dev->cnf;
1911 		break;
1912 	}
1913 
1914 	err = -ENOBUFS;
1915 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
1916 	if (!skb)
1917 		goto errout;
1918 
1919 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1920 					NETLINK_CB(in_skb).portid,
1921 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1922 					NETCONFA_ALL);
1923 	if (err < 0) {
1924 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1925 		WARN_ON(err == -EMSGSIZE);
1926 		kfree_skb(skb);
1927 		goto errout;
1928 	}
1929 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1930 errout:
1931 	return err;
1932 }
1933 
1934 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1935 				     struct netlink_callback *cb)
1936 {
1937 	struct net *net = sock_net(skb->sk);
1938 	int h, s_h;
1939 	int idx, s_idx;
1940 	struct net_device *dev;
1941 	struct in_device *in_dev;
1942 	struct hlist_head *head;
1943 
1944 	s_h = cb->args[0];
1945 	s_idx = idx = cb->args[1];
1946 
1947 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1948 		idx = 0;
1949 		head = &net->dev_index_head[h];
1950 		rcu_read_lock();
1951 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1952 			  net->dev_base_seq;
1953 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1954 			if (idx < s_idx)
1955 				goto cont;
1956 			in_dev = __in_dev_get_rcu(dev);
1957 			if (!in_dev)
1958 				goto cont;
1959 
1960 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
1961 						      &in_dev->cnf,
1962 						      NETLINK_CB(cb->skb).portid,
1963 						      cb->nlh->nlmsg_seq,
1964 						      RTM_NEWNETCONF,
1965 						      NLM_F_MULTI,
1966 						      NETCONFA_ALL) < 0) {
1967 				rcu_read_unlock();
1968 				goto done;
1969 			}
1970 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1971 cont:
1972 			idx++;
1973 		}
1974 		rcu_read_unlock();
1975 	}
1976 	if (h == NETDEV_HASHENTRIES) {
1977 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1978 					      net->ipv4.devconf_all,
1979 					      NETLINK_CB(cb->skb).portid,
1980 					      cb->nlh->nlmsg_seq,
1981 					      RTM_NEWNETCONF, NLM_F_MULTI,
1982 					      NETCONFA_ALL) < 0)
1983 			goto done;
1984 		else
1985 			h++;
1986 	}
1987 	if (h == NETDEV_HASHENTRIES + 1) {
1988 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1989 					      net->ipv4.devconf_dflt,
1990 					      NETLINK_CB(cb->skb).portid,
1991 					      cb->nlh->nlmsg_seq,
1992 					      RTM_NEWNETCONF, NLM_F_MULTI,
1993 					      NETCONFA_ALL) < 0)
1994 			goto done;
1995 		else
1996 			h++;
1997 	}
1998 done:
1999 	cb->args[0] = h;
2000 	cb->args[1] = idx;
2001 
2002 	return skb->len;
2003 }
2004 
2005 #ifdef CONFIG_SYSCTL
2006 
2007 static void devinet_copy_dflt_conf(struct net *net, int i)
2008 {
2009 	struct net_device *dev;
2010 
2011 	rcu_read_lock();
2012 	for_each_netdev_rcu(net, dev) {
2013 		struct in_device *in_dev;
2014 
2015 		in_dev = __in_dev_get_rcu(dev);
2016 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2017 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2018 	}
2019 	rcu_read_unlock();
2020 }
2021 
2022 /* called with RTNL locked */
2023 static void inet_forward_change(struct net *net)
2024 {
2025 	struct net_device *dev;
2026 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2027 
2028 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2029 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2030 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2031 				    NETCONFA_FORWARDING,
2032 				    NETCONFA_IFINDEX_ALL,
2033 				    net->ipv4.devconf_all);
2034 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2035 				    NETCONFA_FORWARDING,
2036 				    NETCONFA_IFINDEX_DEFAULT,
2037 				    net->ipv4.devconf_dflt);
2038 
2039 	for_each_netdev(net, dev) {
2040 		struct in_device *in_dev;
2041 
2042 		if (on)
2043 			dev_disable_lro(dev);
2044 
2045 		in_dev = __in_dev_get_rtnl(dev);
2046 		if (in_dev) {
2047 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2048 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2049 						    NETCONFA_FORWARDING,
2050 						    dev->ifindex, &in_dev->cnf);
2051 		}
2052 	}
2053 }
2054 
2055 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2056 {
2057 	if (cnf == net->ipv4.devconf_dflt)
2058 		return NETCONFA_IFINDEX_DEFAULT;
2059 	else if (cnf == net->ipv4.devconf_all)
2060 		return NETCONFA_IFINDEX_ALL;
2061 	else {
2062 		struct in_device *idev
2063 			= container_of(cnf, struct in_device, cnf);
2064 		return idev->dev->ifindex;
2065 	}
2066 }
2067 
2068 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2069 			     void __user *buffer,
2070 			     size_t *lenp, loff_t *ppos)
2071 {
2072 	int old_value = *(int *)ctl->data;
2073 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2074 	int new_value = *(int *)ctl->data;
2075 
2076 	if (write) {
2077 		struct ipv4_devconf *cnf = ctl->extra1;
2078 		struct net *net = ctl->extra2;
2079 		int i = (int *)ctl->data - cnf->data;
2080 		int ifindex;
2081 
2082 		set_bit(i, cnf->state);
2083 
2084 		if (cnf == net->ipv4.devconf_dflt)
2085 			devinet_copy_dflt_conf(net, i);
2086 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2087 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2088 			if ((new_value == 0) && (old_value != 0))
2089 				rt_cache_flush(net);
2090 
2091 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2092 		    new_value != old_value) {
2093 			ifindex = devinet_conf_ifindex(net, cnf);
2094 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2095 						    NETCONFA_RP_FILTER,
2096 						    ifindex, cnf);
2097 		}
2098 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2099 		    new_value != old_value) {
2100 			ifindex = devinet_conf_ifindex(net, cnf);
2101 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2102 						    NETCONFA_PROXY_NEIGH,
2103 						    ifindex, cnf);
2104 		}
2105 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2106 		    new_value != old_value) {
2107 			ifindex = devinet_conf_ifindex(net, cnf);
2108 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2109 						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2110 						    ifindex, cnf);
2111 		}
2112 	}
2113 
2114 	return ret;
2115 }
2116 
2117 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2118 				  void __user *buffer,
2119 				  size_t *lenp, loff_t *ppos)
2120 {
2121 	int *valp = ctl->data;
2122 	int val = *valp;
2123 	loff_t pos = *ppos;
2124 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2125 
2126 	if (write && *valp != val) {
2127 		struct net *net = ctl->extra2;
2128 
2129 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2130 			if (!rtnl_trylock()) {
2131 				/* Restore the original values before restarting */
2132 				*valp = val;
2133 				*ppos = pos;
2134 				return restart_syscall();
2135 			}
2136 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2137 				inet_forward_change(net);
2138 			} else {
2139 				struct ipv4_devconf *cnf = ctl->extra1;
2140 				struct in_device *idev =
2141 					container_of(cnf, struct in_device, cnf);
2142 				if (*valp)
2143 					dev_disable_lro(idev->dev);
2144 				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2145 							    NETCONFA_FORWARDING,
2146 							    idev->dev->ifindex,
2147 							    cnf);
2148 			}
2149 			rtnl_unlock();
2150 			rt_cache_flush(net);
2151 		} else
2152 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2153 						    NETCONFA_FORWARDING,
2154 						    NETCONFA_IFINDEX_DEFAULT,
2155 						    net->ipv4.devconf_dflt);
2156 	}
2157 
2158 	return ret;
2159 }
2160 
2161 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2162 				void __user *buffer,
2163 				size_t *lenp, loff_t *ppos)
2164 {
2165 	int *valp = ctl->data;
2166 	int val = *valp;
2167 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2168 	struct net *net = ctl->extra2;
2169 
2170 	if (write && *valp != val)
2171 		rt_cache_flush(net);
2172 
2173 	return ret;
2174 }
2175 
2176 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2177 	{ \
2178 		.procname	= name, \
2179 		.data		= ipv4_devconf.data + \
2180 				  IPV4_DEVCONF_ ## attr - 1, \
2181 		.maxlen		= sizeof(int), \
2182 		.mode		= mval, \
2183 		.proc_handler	= proc, \
2184 		.extra1		= &ipv4_devconf, \
2185 	}
2186 
2187 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2188 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2189 
2190 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2191 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2192 
2193 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2194 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2195 
2196 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2197 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2198 
2199 static struct devinet_sysctl_table {
2200 	struct ctl_table_header *sysctl_header;
2201 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2202 } devinet_sysctl = {
2203 	.devinet_vars = {
2204 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2205 					     devinet_sysctl_forward),
2206 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2207 
2208 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2209 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2210 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2211 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2212 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2213 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2214 					"accept_source_route"),
2215 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2216 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2217 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2218 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2219 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2220 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2221 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2222 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2223 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2224 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2225 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2226 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2227 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2228 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2229 					"force_igmp_version"),
2230 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2231 					"igmpv2_unsolicited_report_interval"),
2232 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2233 					"igmpv3_unsolicited_report_interval"),
2234 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2235 					"ignore_routes_with_linkdown"),
2236 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2237 					"drop_gratuitous_arp"),
2238 
2239 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2240 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2241 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2242 					      "promote_secondaries"),
2243 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2244 					      "route_localnet"),
2245 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2246 					      "drop_unicast_in_l2_multicast"),
2247 	},
2248 };
2249 
2250 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2251 				     int ifindex, struct ipv4_devconf *p)
2252 {
2253 	int i;
2254 	struct devinet_sysctl_table *t;
2255 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2256 
2257 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2258 	if (!t)
2259 		goto out;
2260 
2261 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2262 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2263 		t->devinet_vars[i].extra1 = p;
2264 		t->devinet_vars[i].extra2 = net;
2265 	}
2266 
2267 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2268 
2269 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2270 	if (!t->sysctl_header)
2271 		goto free;
2272 
2273 	p->sysctl = t;
2274 
2275 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2276 				    ifindex, p);
2277 	return 0;
2278 
2279 free:
2280 	kfree(t);
2281 out:
2282 	return -ENOBUFS;
2283 }
2284 
2285 static void __devinet_sysctl_unregister(struct net *net,
2286 					struct ipv4_devconf *cnf, int ifindex)
2287 {
2288 	struct devinet_sysctl_table *t = cnf->sysctl;
2289 
2290 	if (t) {
2291 		cnf->sysctl = NULL;
2292 		unregister_net_sysctl_table(t->sysctl_header);
2293 		kfree(t);
2294 	}
2295 
2296 	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2297 }
2298 
2299 static int devinet_sysctl_register(struct in_device *idev)
2300 {
2301 	int err;
2302 
2303 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2304 		return -EINVAL;
2305 
2306 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2307 	if (err)
2308 		return err;
2309 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2310 					idev->dev->ifindex, &idev->cnf);
2311 	if (err)
2312 		neigh_sysctl_unregister(idev->arp_parms);
2313 	return err;
2314 }
2315 
2316 static void devinet_sysctl_unregister(struct in_device *idev)
2317 {
2318 	struct net *net = dev_net(idev->dev);
2319 
2320 	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2321 	neigh_sysctl_unregister(idev->arp_parms);
2322 }
2323 
2324 static struct ctl_table ctl_forward_entry[] = {
2325 	{
2326 		.procname	= "ip_forward",
2327 		.data		= &ipv4_devconf.data[
2328 					IPV4_DEVCONF_FORWARDING - 1],
2329 		.maxlen		= sizeof(int),
2330 		.mode		= 0644,
2331 		.proc_handler	= devinet_sysctl_forward,
2332 		.extra1		= &ipv4_devconf,
2333 		.extra2		= &init_net,
2334 	},
2335 	{ },
2336 };
2337 #endif
2338 
2339 static __net_init int devinet_init_net(struct net *net)
2340 {
2341 	int err;
2342 	struct ipv4_devconf *all, *dflt;
2343 #ifdef CONFIG_SYSCTL
2344 	struct ctl_table *tbl = ctl_forward_entry;
2345 	struct ctl_table_header *forw_hdr;
2346 #endif
2347 
2348 	err = -ENOMEM;
2349 	all = &ipv4_devconf;
2350 	dflt = &ipv4_devconf_dflt;
2351 
2352 	if (!net_eq(net, &init_net)) {
2353 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2354 		if (!all)
2355 			goto err_alloc_all;
2356 
2357 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2358 		if (!dflt)
2359 			goto err_alloc_dflt;
2360 
2361 #ifdef CONFIG_SYSCTL
2362 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2363 		if (!tbl)
2364 			goto err_alloc_ctl;
2365 
2366 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2367 		tbl[0].extra1 = all;
2368 		tbl[0].extra2 = net;
2369 #endif
2370 	}
2371 
2372 #ifdef CONFIG_SYSCTL
2373 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2374 	if (err < 0)
2375 		goto err_reg_all;
2376 
2377 	err = __devinet_sysctl_register(net, "default",
2378 					NETCONFA_IFINDEX_DEFAULT, dflt);
2379 	if (err < 0)
2380 		goto err_reg_dflt;
2381 
2382 	err = -ENOMEM;
2383 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2384 	if (!forw_hdr)
2385 		goto err_reg_ctl;
2386 	net->ipv4.forw_hdr = forw_hdr;
2387 #endif
2388 
2389 	net->ipv4.devconf_all = all;
2390 	net->ipv4.devconf_dflt = dflt;
2391 	return 0;
2392 
2393 #ifdef CONFIG_SYSCTL
2394 err_reg_ctl:
2395 	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2396 err_reg_dflt:
2397 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2398 err_reg_all:
2399 	if (tbl != ctl_forward_entry)
2400 		kfree(tbl);
2401 err_alloc_ctl:
2402 #endif
2403 	if (dflt != &ipv4_devconf_dflt)
2404 		kfree(dflt);
2405 err_alloc_dflt:
2406 	if (all != &ipv4_devconf)
2407 		kfree(all);
2408 err_alloc_all:
2409 	return err;
2410 }
2411 
2412 static __net_exit void devinet_exit_net(struct net *net)
2413 {
2414 #ifdef CONFIG_SYSCTL
2415 	struct ctl_table *tbl;
2416 
2417 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2418 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2419 	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2420 				    NETCONFA_IFINDEX_DEFAULT);
2421 	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2422 				    NETCONFA_IFINDEX_ALL);
2423 	kfree(tbl);
2424 #endif
2425 	kfree(net->ipv4.devconf_dflt);
2426 	kfree(net->ipv4.devconf_all);
2427 }
2428 
2429 static __net_initdata struct pernet_operations devinet_ops = {
2430 	.init = devinet_init_net,
2431 	.exit = devinet_exit_net,
2432 };
2433 
2434 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2435 	.family		  = AF_INET,
2436 	.fill_link_af	  = inet_fill_link_af,
2437 	.get_link_af_size = inet_get_link_af_size,
2438 	.validate_link_af = inet_validate_link_af,
2439 	.set_link_af	  = inet_set_link_af,
2440 };
2441 
2442 void __init devinet_init(void)
2443 {
2444 	int i;
2445 
2446 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2447 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2448 
2449 	register_pernet_subsys(&devinet_ops);
2450 
2451 	register_gifconf(PF_INET, inet_gifconf);
2452 	register_netdevice_notifier(&ip_netdev_notifier);
2453 
2454 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2455 
2456 	rtnl_af_register(&inet_af_ops);
2457 
2458 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2459 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2460 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2461 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2462 		      inet_netconf_dump_devconf, NULL);
2463 }
2464