xref: /linux/net/ipv4/devinet.c (revision 4f139972b489f8bc2c821aa25ac65018d92af3f7)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/sched/signal.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 #include <linux/netconf.h>
60 
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68 
69 static struct ipv4_devconf ipv4_devconf = {
70 	.data = {
71 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
76 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
77 	},
78 };
79 
80 static struct ipv4_devconf ipv4_devconf_dflt = {
81 	.data = {
82 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
83 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
86 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
87 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
88 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
89 	},
90 };
91 
92 #define IPV4_DEVCONF_DFLT(net, attr) \
93 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
94 
95 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
96 	[IFA_LOCAL]     	= { .type = NLA_U32 },
97 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
98 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
99 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
100 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
101 	[IFA_FLAGS]		= { .type = NLA_U32 },
102 };
103 
104 #define IN4_ADDR_HSIZE_SHIFT	8
105 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
106 
107 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
108 
109 static u32 inet_addr_hash(const struct net *net, __be32 addr)
110 {
111 	u32 val = (__force u32) addr ^ net_hash_mix(net);
112 
113 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
114 }
115 
116 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
117 {
118 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
119 
120 	ASSERT_RTNL();
121 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
122 }
123 
124 static void inet_hash_remove(struct in_ifaddr *ifa)
125 {
126 	ASSERT_RTNL();
127 	hlist_del_init_rcu(&ifa->hash);
128 }
129 
130 /**
131  * __ip_dev_find - find the first device with a given source address.
132  * @net: the net namespace
133  * @addr: the source address
134  * @devref: if true, take a reference on the found device
135  *
136  * If a caller uses devref=false, it should be protected by RCU, or RTNL
137  */
138 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
139 {
140 	u32 hash = inet_addr_hash(net, addr);
141 	struct net_device *result = NULL;
142 	struct in_ifaddr *ifa;
143 
144 	rcu_read_lock();
145 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
146 		if (ifa->ifa_local == addr) {
147 			struct net_device *dev = ifa->ifa_dev->dev;
148 
149 			if (!net_eq(dev_net(dev), net))
150 				continue;
151 			result = dev;
152 			break;
153 		}
154 	}
155 	if (!result) {
156 		struct flowi4 fl4 = { .daddr = addr };
157 		struct fib_result res = { 0 };
158 		struct fib_table *local;
159 
160 		/* Fallback to FIB local table so that communication
161 		 * over loopback subnets work.
162 		 */
163 		local = fib_get_table(net, RT_TABLE_LOCAL);
164 		if (local &&
165 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
166 		    res.type == RTN_LOCAL)
167 			result = FIB_RES_DEV(res);
168 	}
169 	if (result && devref)
170 		dev_hold(result);
171 	rcu_read_unlock();
172 	return result;
173 }
174 EXPORT_SYMBOL(__ip_dev_find);
175 
176 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
177 
178 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
179 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
180 			 int destroy);
181 #ifdef CONFIG_SYSCTL
182 static int devinet_sysctl_register(struct in_device *idev);
183 static void devinet_sysctl_unregister(struct in_device *idev);
184 #else
185 static int devinet_sysctl_register(struct in_device *idev)
186 {
187 	return 0;
188 }
189 static void devinet_sysctl_unregister(struct in_device *idev)
190 {
191 }
192 #endif
193 
194 /* Locks all the inet devices. */
195 
196 static struct in_ifaddr *inet_alloc_ifa(void)
197 {
198 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
199 }
200 
201 static void inet_rcu_free_ifa(struct rcu_head *head)
202 {
203 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
204 	if (ifa->ifa_dev)
205 		in_dev_put(ifa->ifa_dev);
206 	kfree(ifa);
207 }
208 
209 static void inet_free_ifa(struct in_ifaddr *ifa)
210 {
211 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
212 }
213 
214 void in_dev_finish_destroy(struct in_device *idev)
215 {
216 	struct net_device *dev = idev->dev;
217 
218 	WARN_ON(idev->ifa_list);
219 	WARN_ON(idev->mc_list);
220 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
221 #ifdef NET_REFCNT_DEBUG
222 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
223 #endif
224 	dev_put(dev);
225 	if (!idev->dead)
226 		pr_err("Freeing alive in_device %p\n", idev);
227 	else
228 		kfree(idev);
229 }
230 EXPORT_SYMBOL(in_dev_finish_destroy);
231 
232 static struct in_device *inetdev_init(struct net_device *dev)
233 {
234 	struct in_device *in_dev;
235 	int err = -ENOMEM;
236 
237 	ASSERT_RTNL();
238 
239 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
240 	if (!in_dev)
241 		goto out;
242 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
243 			sizeof(in_dev->cnf));
244 	in_dev->cnf.sysctl = NULL;
245 	in_dev->dev = dev;
246 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
247 	if (!in_dev->arp_parms)
248 		goto out_kfree;
249 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
250 		dev_disable_lro(dev);
251 	/* Reference in_dev->dev */
252 	dev_hold(dev);
253 	/* Account for reference dev->ip_ptr (below) */
254 	in_dev_hold(in_dev);
255 
256 	err = devinet_sysctl_register(in_dev);
257 	if (err) {
258 		in_dev->dead = 1;
259 		in_dev_put(in_dev);
260 		in_dev = NULL;
261 		goto out;
262 	}
263 	ip_mc_init_dev(in_dev);
264 	if (dev->flags & IFF_UP)
265 		ip_mc_up(in_dev);
266 
267 	/* we can receive as soon as ip_ptr is set -- do this last */
268 	rcu_assign_pointer(dev->ip_ptr, in_dev);
269 out:
270 	return in_dev ?: ERR_PTR(err);
271 out_kfree:
272 	kfree(in_dev);
273 	in_dev = NULL;
274 	goto out;
275 }
276 
277 static void in_dev_rcu_put(struct rcu_head *head)
278 {
279 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
280 	in_dev_put(idev);
281 }
282 
283 static void inetdev_destroy(struct in_device *in_dev)
284 {
285 	struct in_ifaddr *ifa;
286 	struct net_device *dev;
287 
288 	ASSERT_RTNL();
289 
290 	dev = in_dev->dev;
291 
292 	in_dev->dead = 1;
293 
294 	ip_mc_destroy_dev(in_dev);
295 
296 	while ((ifa = in_dev->ifa_list) != NULL) {
297 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
298 		inet_free_ifa(ifa);
299 	}
300 
301 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
302 
303 	devinet_sysctl_unregister(in_dev);
304 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
305 	arp_ifdown(dev);
306 
307 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
308 }
309 
310 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
311 {
312 	rcu_read_lock();
313 	for_primary_ifa(in_dev) {
314 		if (inet_ifa_match(a, ifa)) {
315 			if (!b || inet_ifa_match(b, ifa)) {
316 				rcu_read_unlock();
317 				return 1;
318 			}
319 		}
320 	} endfor_ifa(in_dev);
321 	rcu_read_unlock();
322 	return 0;
323 }
324 
325 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
326 			 int destroy, struct nlmsghdr *nlh, u32 portid)
327 {
328 	struct in_ifaddr *promote = NULL;
329 	struct in_ifaddr *ifa, *ifa1 = *ifap;
330 	struct in_ifaddr *last_prim = in_dev->ifa_list;
331 	struct in_ifaddr *prev_prom = NULL;
332 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
333 
334 	ASSERT_RTNL();
335 
336 	if (in_dev->dead)
337 		goto no_promotions;
338 
339 	/* 1. Deleting primary ifaddr forces deletion all secondaries
340 	 * unless alias promotion is set
341 	 **/
342 
343 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
344 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
345 
346 		while ((ifa = *ifap1) != NULL) {
347 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
348 			    ifa1->ifa_scope <= ifa->ifa_scope)
349 				last_prim = ifa;
350 
351 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
352 			    ifa1->ifa_mask != ifa->ifa_mask ||
353 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
354 				ifap1 = &ifa->ifa_next;
355 				prev_prom = ifa;
356 				continue;
357 			}
358 
359 			if (!do_promote) {
360 				inet_hash_remove(ifa);
361 				*ifap1 = ifa->ifa_next;
362 
363 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
364 				blocking_notifier_call_chain(&inetaddr_chain,
365 						NETDEV_DOWN, ifa);
366 				inet_free_ifa(ifa);
367 			} else {
368 				promote = ifa;
369 				break;
370 			}
371 		}
372 	}
373 
374 	/* On promotion all secondaries from subnet are changing
375 	 * the primary IP, we must remove all their routes silently
376 	 * and later to add them back with new prefsrc. Do this
377 	 * while all addresses are on the device list.
378 	 */
379 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
380 		if (ifa1->ifa_mask == ifa->ifa_mask &&
381 		    inet_ifa_match(ifa1->ifa_address, ifa))
382 			fib_del_ifaddr(ifa, ifa1);
383 	}
384 
385 no_promotions:
386 	/* 2. Unlink it */
387 
388 	*ifap = ifa1->ifa_next;
389 	inet_hash_remove(ifa1);
390 
391 	/* 3. Announce address deletion */
392 
393 	/* Send message first, then call notifier.
394 	   At first sight, FIB update triggered by notifier
395 	   will refer to already deleted ifaddr, that could confuse
396 	   netlink listeners. It is not true: look, gated sees
397 	   that route deleted and if it still thinks that ifaddr
398 	   is valid, it will try to restore deleted routes... Grr.
399 	   So that, this order is correct.
400 	 */
401 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
402 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
403 
404 	if (promote) {
405 		struct in_ifaddr *next_sec = promote->ifa_next;
406 
407 		if (prev_prom) {
408 			prev_prom->ifa_next = promote->ifa_next;
409 			promote->ifa_next = last_prim->ifa_next;
410 			last_prim->ifa_next = promote;
411 		}
412 
413 		promote->ifa_flags &= ~IFA_F_SECONDARY;
414 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
415 		blocking_notifier_call_chain(&inetaddr_chain,
416 				NETDEV_UP, promote);
417 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
418 			if (ifa1->ifa_mask != ifa->ifa_mask ||
419 			    !inet_ifa_match(ifa1->ifa_address, ifa))
420 					continue;
421 			fib_add_ifaddr(ifa);
422 		}
423 
424 	}
425 	if (destroy)
426 		inet_free_ifa(ifa1);
427 }
428 
429 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
430 			 int destroy)
431 {
432 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
433 }
434 
435 static void check_lifetime(struct work_struct *work);
436 
437 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
438 
439 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
440 			     u32 portid)
441 {
442 	struct in_device *in_dev = ifa->ifa_dev;
443 	struct in_ifaddr *ifa1, **ifap, **last_primary;
444 
445 	ASSERT_RTNL();
446 
447 	if (!ifa->ifa_local) {
448 		inet_free_ifa(ifa);
449 		return 0;
450 	}
451 
452 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
453 	last_primary = &in_dev->ifa_list;
454 
455 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
456 	     ifap = &ifa1->ifa_next) {
457 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
458 		    ifa->ifa_scope <= ifa1->ifa_scope)
459 			last_primary = &ifa1->ifa_next;
460 		if (ifa1->ifa_mask == ifa->ifa_mask &&
461 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
462 			if (ifa1->ifa_local == ifa->ifa_local) {
463 				inet_free_ifa(ifa);
464 				return -EEXIST;
465 			}
466 			if (ifa1->ifa_scope != ifa->ifa_scope) {
467 				inet_free_ifa(ifa);
468 				return -EINVAL;
469 			}
470 			ifa->ifa_flags |= IFA_F_SECONDARY;
471 		}
472 	}
473 
474 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
475 		prandom_seed((__force u32) ifa->ifa_local);
476 		ifap = last_primary;
477 	}
478 
479 	ifa->ifa_next = *ifap;
480 	*ifap = ifa;
481 
482 	inet_hash_insert(dev_net(in_dev->dev), ifa);
483 
484 	cancel_delayed_work(&check_lifetime_work);
485 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
486 
487 	/* Send message first, then call notifier.
488 	   Notifier will trigger FIB update, so that
489 	   listeners of netlink will know about new ifaddr */
490 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
491 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
492 
493 	return 0;
494 }
495 
496 static int inet_insert_ifa(struct in_ifaddr *ifa)
497 {
498 	return __inet_insert_ifa(ifa, NULL, 0);
499 }
500 
501 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
502 {
503 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
504 
505 	ASSERT_RTNL();
506 
507 	if (!in_dev) {
508 		inet_free_ifa(ifa);
509 		return -ENOBUFS;
510 	}
511 	ipv4_devconf_setall(in_dev);
512 	neigh_parms_data_state_setall(in_dev->arp_parms);
513 	if (ifa->ifa_dev != in_dev) {
514 		WARN_ON(ifa->ifa_dev);
515 		in_dev_hold(in_dev);
516 		ifa->ifa_dev = in_dev;
517 	}
518 	if (ipv4_is_loopback(ifa->ifa_local))
519 		ifa->ifa_scope = RT_SCOPE_HOST;
520 	return inet_insert_ifa(ifa);
521 }
522 
523 /* Caller must hold RCU or RTNL :
524  * We dont take a reference on found in_device
525  */
526 struct in_device *inetdev_by_index(struct net *net, int ifindex)
527 {
528 	struct net_device *dev;
529 	struct in_device *in_dev = NULL;
530 
531 	rcu_read_lock();
532 	dev = dev_get_by_index_rcu(net, ifindex);
533 	if (dev)
534 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
535 	rcu_read_unlock();
536 	return in_dev;
537 }
538 EXPORT_SYMBOL(inetdev_by_index);
539 
540 /* Called only from RTNL semaphored context. No locks. */
541 
542 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
543 				    __be32 mask)
544 {
545 	ASSERT_RTNL();
546 
547 	for_primary_ifa(in_dev) {
548 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
549 			return ifa;
550 	} endfor_ifa(in_dev);
551 	return NULL;
552 }
553 
554 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
555 {
556 	struct ip_mreqn mreq = {
557 		.imr_multiaddr.s_addr = ifa->ifa_address,
558 		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
559 	};
560 	int ret;
561 
562 	ASSERT_RTNL();
563 
564 	lock_sock(sk);
565 	if (join)
566 		ret = ip_mc_join_group(sk, &mreq);
567 	else
568 		ret = ip_mc_leave_group(sk, &mreq);
569 	release_sock(sk);
570 
571 	return ret;
572 }
573 
574 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
575 {
576 	struct net *net = sock_net(skb->sk);
577 	struct nlattr *tb[IFA_MAX+1];
578 	struct in_device *in_dev;
579 	struct ifaddrmsg *ifm;
580 	struct in_ifaddr *ifa, **ifap;
581 	int err = -EINVAL;
582 
583 	ASSERT_RTNL();
584 
585 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
586 	if (err < 0)
587 		goto errout;
588 
589 	ifm = nlmsg_data(nlh);
590 	in_dev = inetdev_by_index(net, ifm->ifa_index);
591 	if (!in_dev) {
592 		err = -ENODEV;
593 		goto errout;
594 	}
595 
596 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
597 	     ifap = &ifa->ifa_next) {
598 		if (tb[IFA_LOCAL] &&
599 		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
600 			continue;
601 
602 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
603 			continue;
604 
605 		if (tb[IFA_ADDRESS] &&
606 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
607 		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
608 			continue;
609 
610 		if (ipv4_is_multicast(ifa->ifa_address))
611 			ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
612 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
613 		return 0;
614 	}
615 
616 	err = -EADDRNOTAVAIL;
617 errout:
618 	return err;
619 }
620 
621 #define INFINITY_LIFE_TIME	0xFFFFFFFF
622 
623 static void check_lifetime(struct work_struct *work)
624 {
625 	unsigned long now, next, next_sec, next_sched;
626 	struct in_ifaddr *ifa;
627 	struct hlist_node *n;
628 	int i;
629 
630 	now = jiffies;
631 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
632 
633 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
634 		bool change_needed = false;
635 
636 		rcu_read_lock();
637 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
638 			unsigned long age;
639 
640 			if (ifa->ifa_flags & IFA_F_PERMANENT)
641 				continue;
642 
643 			/* We try to batch several events at once. */
644 			age = (now - ifa->ifa_tstamp +
645 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
646 
647 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
648 			    age >= ifa->ifa_valid_lft) {
649 				change_needed = true;
650 			} else if (ifa->ifa_preferred_lft ==
651 				   INFINITY_LIFE_TIME) {
652 				continue;
653 			} else if (age >= ifa->ifa_preferred_lft) {
654 				if (time_before(ifa->ifa_tstamp +
655 						ifa->ifa_valid_lft * HZ, next))
656 					next = ifa->ifa_tstamp +
657 					       ifa->ifa_valid_lft * HZ;
658 
659 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
660 					change_needed = true;
661 			} else if (time_before(ifa->ifa_tstamp +
662 					       ifa->ifa_preferred_lft * HZ,
663 					       next)) {
664 				next = ifa->ifa_tstamp +
665 				       ifa->ifa_preferred_lft * HZ;
666 			}
667 		}
668 		rcu_read_unlock();
669 		if (!change_needed)
670 			continue;
671 		rtnl_lock();
672 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
673 			unsigned long age;
674 
675 			if (ifa->ifa_flags & IFA_F_PERMANENT)
676 				continue;
677 
678 			/* We try to batch several events at once. */
679 			age = (now - ifa->ifa_tstamp +
680 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
681 
682 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
683 			    age >= ifa->ifa_valid_lft) {
684 				struct in_ifaddr **ifap;
685 
686 				for (ifap = &ifa->ifa_dev->ifa_list;
687 				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
688 					if (*ifap == ifa) {
689 						inet_del_ifa(ifa->ifa_dev,
690 							     ifap, 1);
691 						break;
692 					}
693 				}
694 			} else if (ifa->ifa_preferred_lft !=
695 				   INFINITY_LIFE_TIME &&
696 				   age >= ifa->ifa_preferred_lft &&
697 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
698 				ifa->ifa_flags |= IFA_F_DEPRECATED;
699 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
700 			}
701 		}
702 		rtnl_unlock();
703 	}
704 
705 	next_sec = round_jiffies_up(next);
706 	next_sched = next;
707 
708 	/* If rounded timeout is accurate enough, accept it. */
709 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
710 		next_sched = next_sec;
711 
712 	now = jiffies;
713 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
714 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
715 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
716 
717 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
718 			next_sched - now);
719 }
720 
721 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
722 			     __u32 prefered_lft)
723 {
724 	unsigned long timeout;
725 
726 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
727 
728 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
729 	if (addrconf_finite_timeout(timeout))
730 		ifa->ifa_valid_lft = timeout;
731 	else
732 		ifa->ifa_flags |= IFA_F_PERMANENT;
733 
734 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
735 	if (addrconf_finite_timeout(timeout)) {
736 		if (timeout == 0)
737 			ifa->ifa_flags |= IFA_F_DEPRECATED;
738 		ifa->ifa_preferred_lft = timeout;
739 	}
740 	ifa->ifa_tstamp = jiffies;
741 	if (!ifa->ifa_cstamp)
742 		ifa->ifa_cstamp = ifa->ifa_tstamp;
743 }
744 
745 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
746 				       __u32 *pvalid_lft, __u32 *pprefered_lft)
747 {
748 	struct nlattr *tb[IFA_MAX+1];
749 	struct in_ifaddr *ifa;
750 	struct ifaddrmsg *ifm;
751 	struct net_device *dev;
752 	struct in_device *in_dev;
753 	int err;
754 
755 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
756 	if (err < 0)
757 		goto errout;
758 
759 	ifm = nlmsg_data(nlh);
760 	err = -EINVAL;
761 	if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
762 		goto errout;
763 
764 	dev = __dev_get_by_index(net, ifm->ifa_index);
765 	err = -ENODEV;
766 	if (!dev)
767 		goto errout;
768 
769 	in_dev = __in_dev_get_rtnl(dev);
770 	err = -ENOBUFS;
771 	if (!in_dev)
772 		goto errout;
773 
774 	ifa = inet_alloc_ifa();
775 	if (!ifa)
776 		/*
777 		 * A potential indev allocation can be left alive, it stays
778 		 * assigned to its device and is destroy with it.
779 		 */
780 		goto errout;
781 
782 	ipv4_devconf_setall(in_dev);
783 	neigh_parms_data_state_setall(in_dev->arp_parms);
784 	in_dev_hold(in_dev);
785 
786 	if (!tb[IFA_ADDRESS])
787 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
788 
789 	INIT_HLIST_NODE(&ifa->hash);
790 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
791 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
792 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
793 					 ifm->ifa_flags;
794 	ifa->ifa_scope = ifm->ifa_scope;
795 	ifa->ifa_dev = in_dev;
796 
797 	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
798 	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
799 
800 	if (tb[IFA_BROADCAST])
801 		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
802 
803 	if (tb[IFA_LABEL])
804 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
805 	else
806 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
807 
808 	if (tb[IFA_CACHEINFO]) {
809 		struct ifa_cacheinfo *ci;
810 
811 		ci = nla_data(tb[IFA_CACHEINFO]);
812 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
813 			err = -EINVAL;
814 			goto errout_free;
815 		}
816 		*pvalid_lft = ci->ifa_valid;
817 		*pprefered_lft = ci->ifa_prefered;
818 	}
819 
820 	return ifa;
821 
822 errout_free:
823 	inet_free_ifa(ifa);
824 errout:
825 	return ERR_PTR(err);
826 }
827 
828 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
829 {
830 	struct in_device *in_dev = ifa->ifa_dev;
831 	struct in_ifaddr *ifa1, **ifap;
832 
833 	if (!ifa->ifa_local)
834 		return NULL;
835 
836 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
837 	     ifap = &ifa1->ifa_next) {
838 		if (ifa1->ifa_mask == ifa->ifa_mask &&
839 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
840 		    ifa1->ifa_local == ifa->ifa_local)
841 			return ifa1;
842 	}
843 	return NULL;
844 }
845 
846 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
847 {
848 	struct net *net = sock_net(skb->sk);
849 	struct in_ifaddr *ifa;
850 	struct in_ifaddr *ifa_existing;
851 	__u32 valid_lft = INFINITY_LIFE_TIME;
852 	__u32 prefered_lft = INFINITY_LIFE_TIME;
853 
854 	ASSERT_RTNL();
855 
856 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
857 	if (IS_ERR(ifa))
858 		return PTR_ERR(ifa);
859 
860 	ifa_existing = find_matching_ifa(ifa);
861 	if (!ifa_existing) {
862 		/* It would be best to check for !NLM_F_CREATE here but
863 		 * userspace already relies on not having to provide this.
864 		 */
865 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
866 		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
867 			int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
868 					       true, ifa);
869 
870 			if (ret < 0) {
871 				inet_free_ifa(ifa);
872 				return ret;
873 			}
874 		}
875 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
876 	} else {
877 		inet_free_ifa(ifa);
878 
879 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
880 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
881 			return -EEXIST;
882 		ifa = ifa_existing;
883 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
884 		cancel_delayed_work(&check_lifetime_work);
885 		queue_delayed_work(system_power_efficient_wq,
886 				&check_lifetime_work, 0);
887 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
888 	}
889 	return 0;
890 }
891 
892 /*
893  *	Determine a default network mask, based on the IP address.
894  */
895 
896 static int inet_abc_len(__be32 addr)
897 {
898 	int rc = -1;	/* Something else, probably a multicast. */
899 
900 	if (ipv4_is_zeronet(addr))
901 		rc = 0;
902 	else {
903 		__u32 haddr = ntohl(addr);
904 
905 		if (IN_CLASSA(haddr))
906 			rc = 8;
907 		else if (IN_CLASSB(haddr))
908 			rc = 16;
909 		else if (IN_CLASSC(haddr))
910 			rc = 24;
911 	}
912 
913 	return rc;
914 }
915 
916 
917 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
918 {
919 	struct ifreq ifr;
920 	struct sockaddr_in sin_orig;
921 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
922 	struct in_device *in_dev;
923 	struct in_ifaddr **ifap = NULL;
924 	struct in_ifaddr *ifa = NULL;
925 	struct net_device *dev;
926 	char *colon;
927 	int ret = -EFAULT;
928 	int tryaddrmatch = 0;
929 
930 	/*
931 	 *	Fetch the caller's info block into kernel space
932 	 */
933 
934 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
935 		goto out;
936 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
937 
938 	/* save original address for comparison */
939 	memcpy(&sin_orig, sin, sizeof(*sin));
940 
941 	colon = strchr(ifr.ifr_name, ':');
942 	if (colon)
943 		*colon = 0;
944 
945 	dev_load(net, ifr.ifr_name);
946 
947 	switch (cmd) {
948 	case SIOCGIFADDR:	/* Get interface address */
949 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
950 	case SIOCGIFDSTADDR:	/* Get the destination address */
951 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
952 		/* Note that these ioctls will not sleep,
953 		   so that we do not impose a lock.
954 		   One day we will be forced to put shlock here (I mean SMP)
955 		 */
956 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
957 		memset(sin, 0, sizeof(*sin));
958 		sin->sin_family = AF_INET;
959 		break;
960 
961 	case SIOCSIFFLAGS:
962 		ret = -EPERM;
963 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
964 			goto out;
965 		break;
966 	case SIOCSIFADDR:	/* Set interface address (and family) */
967 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
968 	case SIOCSIFDSTADDR:	/* Set the destination address */
969 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
970 		ret = -EPERM;
971 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
972 			goto out;
973 		ret = -EINVAL;
974 		if (sin->sin_family != AF_INET)
975 			goto out;
976 		break;
977 	default:
978 		ret = -EINVAL;
979 		goto out;
980 	}
981 
982 	rtnl_lock();
983 
984 	ret = -ENODEV;
985 	dev = __dev_get_by_name(net, ifr.ifr_name);
986 	if (!dev)
987 		goto done;
988 
989 	if (colon)
990 		*colon = ':';
991 
992 	in_dev = __in_dev_get_rtnl(dev);
993 	if (in_dev) {
994 		if (tryaddrmatch) {
995 			/* Matthias Andree */
996 			/* compare label and address (4.4BSD style) */
997 			/* note: we only do this for a limited set of ioctls
998 			   and only if the original address family was AF_INET.
999 			   This is checked above. */
1000 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1001 			     ifap = &ifa->ifa_next) {
1002 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
1003 				    sin_orig.sin_addr.s_addr ==
1004 							ifa->ifa_local) {
1005 					break; /* found */
1006 				}
1007 			}
1008 		}
1009 		/* we didn't get a match, maybe the application is
1010 		   4.3BSD-style and passed in junk so we fall back to
1011 		   comparing just the label */
1012 		if (!ifa) {
1013 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1014 			     ifap = &ifa->ifa_next)
1015 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
1016 					break;
1017 		}
1018 	}
1019 
1020 	ret = -EADDRNOTAVAIL;
1021 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1022 		goto done;
1023 
1024 	switch (cmd) {
1025 	case SIOCGIFADDR:	/* Get interface address */
1026 		sin->sin_addr.s_addr = ifa->ifa_local;
1027 		goto rarok;
1028 
1029 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1030 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1031 		goto rarok;
1032 
1033 	case SIOCGIFDSTADDR:	/* Get the destination address */
1034 		sin->sin_addr.s_addr = ifa->ifa_address;
1035 		goto rarok;
1036 
1037 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1038 		sin->sin_addr.s_addr = ifa->ifa_mask;
1039 		goto rarok;
1040 
1041 	case SIOCSIFFLAGS:
1042 		if (colon) {
1043 			ret = -EADDRNOTAVAIL;
1044 			if (!ifa)
1045 				break;
1046 			ret = 0;
1047 			if (!(ifr.ifr_flags & IFF_UP))
1048 				inet_del_ifa(in_dev, ifap, 1);
1049 			break;
1050 		}
1051 		ret = dev_change_flags(dev, ifr.ifr_flags);
1052 		break;
1053 
1054 	case SIOCSIFADDR:	/* Set interface address (and family) */
1055 		ret = -EINVAL;
1056 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1057 			break;
1058 
1059 		if (!ifa) {
1060 			ret = -ENOBUFS;
1061 			ifa = inet_alloc_ifa();
1062 			if (!ifa)
1063 				break;
1064 			INIT_HLIST_NODE(&ifa->hash);
1065 			if (colon)
1066 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1067 			else
1068 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1069 		} else {
1070 			ret = 0;
1071 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1072 				break;
1073 			inet_del_ifa(in_dev, ifap, 0);
1074 			ifa->ifa_broadcast = 0;
1075 			ifa->ifa_scope = 0;
1076 		}
1077 
1078 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1079 
1080 		if (!(dev->flags & IFF_POINTOPOINT)) {
1081 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1082 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1083 			if ((dev->flags & IFF_BROADCAST) &&
1084 			    ifa->ifa_prefixlen < 31)
1085 				ifa->ifa_broadcast = ifa->ifa_address |
1086 						     ~ifa->ifa_mask;
1087 		} else {
1088 			ifa->ifa_prefixlen = 32;
1089 			ifa->ifa_mask = inet_make_mask(32);
1090 		}
1091 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1092 		ret = inet_set_ifa(dev, ifa);
1093 		break;
1094 
1095 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1096 		ret = 0;
1097 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1098 			inet_del_ifa(in_dev, ifap, 0);
1099 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1100 			inet_insert_ifa(ifa);
1101 		}
1102 		break;
1103 
1104 	case SIOCSIFDSTADDR:	/* Set the destination address */
1105 		ret = 0;
1106 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1107 			break;
1108 		ret = -EINVAL;
1109 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1110 			break;
1111 		ret = 0;
1112 		inet_del_ifa(in_dev, ifap, 0);
1113 		ifa->ifa_address = sin->sin_addr.s_addr;
1114 		inet_insert_ifa(ifa);
1115 		break;
1116 
1117 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1118 
1119 		/*
1120 		 *	The mask we set must be legal.
1121 		 */
1122 		ret = -EINVAL;
1123 		if (bad_mask(sin->sin_addr.s_addr, 0))
1124 			break;
1125 		ret = 0;
1126 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1127 			__be32 old_mask = ifa->ifa_mask;
1128 			inet_del_ifa(in_dev, ifap, 0);
1129 			ifa->ifa_mask = sin->sin_addr.s_addr;
1130 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1131 
1132 			/* See if current broadcast address matches
1133 			 * with current netmask, then recalculate
1134 			 * the broadcast address. Otherwise it's a
1135 			 * funny address, so don't touch it since
1136 			 * the user seems to know what (s)he's doing...
1137 			 */
1138 			if ((dev->flags & IFF_BROADCAST) &&
1139 			    (ifa->ifa_prefixlen < 31) &&
1140 			    (ifa->ifa_broadcast ==
1141 			     (ifa->ifa_local|~old_mask))) {
1142 				ifa->ifa_broadcast = (ifa->ifa_local |
1143 						      ~sin->sin_addr.s_addr);
1144 			}
1145 			inet_insert_ifa(ifa);
1146 		}
1147 		break;
1148 	}
1149 done:
1150 	rtnl_unlock();
1151 out:
1152 	return ret;
1153 rarok:
1154 	rtnl_unlock();
1155 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1156 	goto out;
1157 }
1158 
1159 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1160 {
1161 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1162 	struct in_ifaddr *ifa;
1163 	struct ifreq ifr;
1164 	int done = 0;
1165 
1166 	if (!in_dev)
1167 		goto out;
1168 
1169 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1170 		if (!buf) {
1171 			done += sizeof(ifr);
1172 			continue;
1173 		}
1174 		if (len < (int) sizeof(ifr))
1175 			break;
1176 		memset(&ifr, 0, sizeof(struct ifreq));
1177 		strcpy(ifr.ifr_name, ifa->ifa_label);
1178 
1179 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1180 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1181 								ifa->ifa_local;
1182 
1183 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1184 			done = -EFAULT;
1185 			break;
1186 		}
1187 		buf  += sizeof(struct ifreq);
1188 		len  -= sizeof(struct ifreq);
1189 		done += sizeof(struct ifreq);
1190 	}
1191 out:
1192 	return done;
1193 }
1194 
1195 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1196 				 int scope)
1197 {
1198 	for_primary_ifa(in_dev) {
1199 		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1200 		    ifa->ifa_scope <= scope)
1201 			return ifa->ifa_local;
1202 	} endfor_ifa(in_dev);
1203 
1204 	return 0;
1205 }
1206 
1207 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1208 {
1209 	__be32 addr = 0;
1210 	struct in_device *in_dev;
1211 	struct net *net = dev_net(dev);
1212 	int master_idx;
1213 
1214 	rcu_read_lock();
1215 	in_dev = __in_dev_get_rcu(dev);
1216 	if (!in_dev)
1217 		goto no_in_dev;
1218 
1219 	for_primary_ifa(in_dev) {
1220 		if (ifa->ifa_scope > scope)
1221 			continue;
1222 		if (!dst || inet_ifa_match(dst, ifa)) {
1223 			addr = ifa->ifa_local;
1224 			break;
1225 		}
1226 		if (!addr)
1227 			addr = ifa->ifa_local;
1228 	} endfor_ifa(in_dev);
1229 
1230 	if (addr)
1231 		goto out_unlock;
1232 no_in_dev:
1233 	master_idx = l3mdev_master_ifindex_rcu(dev);
1234 
1235 	/* For VRFs, the VRF device takes the place of the loopback device,
1236 	 * with addresses on it being preferred.  Note in such cases the
1237 	 * loopback device will be among the devices that fail the master_idx
1238 	 * equality check in the loop below.
1239 	 */
1240 	if (master_idx &&
1241 	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1242 	    (in_dev = __in_dev_get_rcu(dev))) {
1243 		addr = in_dev_select_addr(in_dev, scope);
1244 		if (addr)
1245 			goto out_unlock;
1246 	}
1247 
1248 	/* Not loopback addresses on loopback should be preferred
1249 	   in this case. It is important that lo is the first interface
1250 	   in dev_base list.
1251 	 */
1252 	for_each_netdev_rcu(net, dev) {
1253 		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1254 			continue;
1255 
1256 		in_dev = __in_dev_get_rcu(dev);
1257 		if (!in_dev)
1258 			continue;
1259 
1260 		addr = in_dev_select_addr(in_dev, scope);
1261 		if (addr)
1262 			goto out_unlock;
1263 	}
1264 out_unlock:
1265 	rcu_read_unlock();
1266 	return addr;
1267 }
1268 EXPORT_SYMBOL(inet_select_addr);
1269 
1270 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1271 			      __be32 local, int scope)
1272 {
1273 	int same = 0;
1274 	__be32 addr = 0;
1275 
1276 	for_ifa(in_dev) {
1277 		if (!addr &&
1278 		    (local == ifa->ifa_local || !local) &&
1279 		    ifa->ifa_scope <= scope) {
1280 			addr = ifa->ifa_local;
1281 			if (same)
1282 				break;
1283 		}
1284 		if (!same) {
1285 			same = (!local || inet_ifa_match(local, ifa)) &&
1286 				(!dst || inet_ifa_match(dst, ifa));
1287 			if (same && addr) {
1288 				if (local || !dst)
1289 					break;
1290 				/* Is the selected addr into dst subnet? */
1291 				if (inet_ifa_match(addr, ifa))
1292 					break;
1293 				/* No, then can we use new local src? */
1294 				if (ifa->ifa_scope <= scope) {
1295 					addr = ifa->ifa_local;
1296 					break;
1297 				}
1298 				/* search for large dst subnet for addr */
1299 				same = 0;
1300 			}
1301 		}
1302 	} endfor_ifa(in_dev);
1303 
1304 	return same ? addr : 0;
1305 }
1306 
1307 /*
1308  * Confirm that local IP address exists using wildcards:
1309  * - net: netns to check, cannot be NULL
1310  * - in_dev: only on this interface, NULL=any interface
1311  * - dst: only in the same subnet as dst, 0=any dst
1312  * - local: address, 0=autoselect the local address
1313  * - scope: maximum allowed scope value for the local address
1314  */
1315 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1316 			 __be32 dst, __be32 local, int scope)
1317 {
1318 	__be32 addr = 0;
1319 	struct net_device *dev;
1320 
1321 	if (in_dev)
1322 		return confirm_addr_indev(in_dev, dst, local, scope);
1323 
1324 	rcu_read_lock();
1325 	for_each_netdev_rcu(net, dev) {
1326 		in_dev = __in_dev_get_rcu(dev);
1327 		if (in_dev) {
1328 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1329 			if (addr)
1330 				break;
1331 		}
1332 	}
1333 	rcu_read_unlock();
1334 
1335 	return addr;
1336 }
1337 EXPORT_SYMBOL(inet_confirm_addr);
1338 
1339 /*
1340  *	Device notifier
1341  */
1342 
1343 int register_inetaddr_notifier(struct notifier_block *nb)
1344 {
1345 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1346 }
1347 EXPORT_SYMBOL(register_inetaddr_notifier);
1348 
1349 int unregister_inetaddr_notifier(struct notifier_block *nb)
1350 {
1351 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1352 }
1353 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1354 
1355 /* Rename ifa_labels for a device name change. Make some effort to preserve
1356  * existing alias numbering and to create unique labels if possible.
1357 */
1358 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1359 {
1360 	struct in_ifaddr *ifa;
1361 	int named = 0;
1362 
1363 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1364 		char old[IFNAMSIZ], *dot;
1365 
1366 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1367 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1368 		if (named++ == 0)
1369 			goto skip;
1370 		dot = strchr(old, ':');
1371 		if (!dot) {
1372 			sprintf(old, ":%d", named);
1373 			dot = old;
1374 		}
1375 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1376 			strcat(ifa->ifa_label, dot);
1377 		else
1378 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1379 skip:
1380 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1381 	}
1382 }
1383 
1384 static bool inetdev_valid_mtu(unsigned int mtu)
1385 {
1386 	return mtu >= 68;
1387 }
1388 
1389 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1390 					struct in_device *in_dev)
1391 
1392 {
1393 	struct in_ifaddr *ifa;
1394 
1395 	for (ifa = in_dev->ifa_list; ifa;
1396 	     ifa = ifa->ifa_next) {
1397 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1398 			 ifa->ifa_local, dev,
1399 			 ifa->ifa_local, NULL,
1400 			 dev->dev_addr, NULL);
1401 	}
1402 }
1403 
1404 /* Called only under RTNL semaphore */
1405 
1406 static int inetdev_event(struct notifier_block *this, unsigned long event,
1407 			 void *ptr)
1408 {
1409 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1410 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1411 
1412 	ASSERT_RTNL();
1413 
1414 	if (!in_dev) {
1415 		if (event == NETDEV_REGISTER) {
1416 			in_dev = inetdev_init(dev);
1417 			if (IS_ERR(in_dev))
1418 				return notifier_from_errno(PTR_ERR(in_dev));
1419 			if (dev->flags & IFF_LOOPBACK) {
1420 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1421 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1422 			}
1423 		} else if (event == NETDEV_CHANGEMTU) {
1424 			/* Re-enabling IP */
1425 			if (inetdev_valid_mtu(dev->mtu))
1426 				in_dev = inetdev_init(dev);
1427 		}
1428 		goto out;
1429 	}
1430 
1431 	switch (event) {
1432 	case NETDEV_REGISTER:
1433 		pr_debug("%s: bug\n", __func__);
1434 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1435 		break;
1436 	case NETDEV_UP:
1437 		if (!inetdev_valid_mtu(dev->mtu))
1438 			break;
1439 		if (dev->flags & IFF_LOOPBACK) {
1440 			struct in_ifaddr *ifa = inet_alloc_ifa();
1441 
1442 			if (ifa) {
1443 				INIT_HLIST_NODE(&ifa->hash);
1444 				ifa->ifa_local =
1445 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1446 				ifa->ifa_prefixlen = 8;
1447 				ifa->ifa_mask = inet_make_mask(8);
1448 				in_dev_hold(in_dev);
1449 				ifa->ifa_dev = in_dev;
1450 				ifa->ifa_scope = RT_SCOPE_HOST;
1451 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1452 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1453 						 INFINITY_LIFE_TIME);
1454 				ipv4_devconf_setall(in_dev);
1455 				neigh_parms_data_state_setall(in_dev->arp_parms);
1456 				inet_insert_ifa(ifa);
1457 			}
1458 		}
1459 		ip_mc_up(in_dev);
1460 		/* fall through */
1461 	case NETDEV_CHANGEADDR:
1462 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1463 			break;
1464 		/* fall through */
1465 	case NETDEV_NOTIFY_PEERS:
1466 		/* Send gratuitous ARP to notify of link change */
1467 		inetdev_send_gratuitous_arp(dev, in_dev);
1468 		break;
1469 	case NETDEV_DOWN:
1470 		ip_mc_down(in_dev);
1471 		break;
1472 	case NETDEV_PRE_TYPE_CHANGE:
1473 		ip_mc_unmap(in_dev);
1474 		break;
1475 	case NETDEV_POST_TYPE_CHANGE:
1476 		ip_mc_remap(in_dev);
1477 		break;
1478 	case NETDEV_CHANGEMTU:
1479 		if (inetdev_valid_mtu(dev->mtu))
1480 			break;
1481 		/* disable IP when MTU is not enough */
1482 	case NETDEV_UNREGISTER:
1483 		inetdev_destroy(in_dev);
1484 		break;
1485 	case NETDEV_CHANGENAME:
1486 		/* Do not notify about label change, this event is
1487 		 * not interesting to applications using netlink.
1488 		 */
1489 		inetdev_changename(dev, in_dev);
1490 
1491 		devinet_sysctl_unregister(in_dev);
1492 		devinet_sysctl_register(in_dev);
1493 		break;
1494 	}
1495 out:
1496 	return NOTIFY_DONE;
1497 }
1498 
1499 static struct notifier_block ip_netdev_notifier = {
1500 	.notifier_call = inetdev_event,
1501 };
1502 
1503 static size_t inet_nlmsg_size(void)
1504 {
1505 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1506 	       + nla_total_size(4) /* IFA_ADDRESS */
1507 	       + nla_total_size(4) /* IFA_LOCAL */
1508 	       + nla_total_size(4) /* IFA_BROADCAST */
1509 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1510 	       + nla_total_size(4)  /* IFA_FLAGS */
1511 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1512 }
1513 
1514 static inline u32 cstamp_delta(unsigned long cstamp)
1515 {
1516 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1517 }
1518 
1519 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1520 			 unsigned long tstamp, u32 preferred, u32 valid)
1521 {
1522 	struct ifa_cacheinfo ci;
1523 
1524 	ci.cstamp = cstamp_delta(cstamp);
1525 	ci.tstamp = cstamp_delta(tstamp);
1526 	ci.ifa_prefered = preferred;
1527 	ci.ifa_valid = valid;
1528 
1529 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1530 }
1531 
1532 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1533 			    u32 portid, u32 seq, int event, unsigned int flags)
1534 {
1535 	struct ifaddrmsg *ifm;
1536 	struct nlmsghdr  *nlh;
1537 	u32 preferred, valid;
1538 
1539 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1540 	if (!nlh)
1541 		return -EMSGSIZE;
1542 
1543 	ifm = nlmsg_data(nlh);
1544 	ifm->ifa_family = AF_INET;
1545 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1546 	ifm->ifa_flags = ifa->ifa_flags;
1547 	ifm->ifa_scope = ifa->ifa_scope;
1548 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1549 
1550 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1551 		preferred = ifa->ifa_preferred_lft;
1552 		valid = ifa->ifa_valid_lft;
1553 		if (preferred != INFINITY_LIFE_TIME) {
1554 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1555 
1556 			if (preferred > tval)
1557 				preferred -= tval;
1558 			else
1559 				preferred = 0;
1560 			if (valid != INFINITY_LIFE_TIME) {
1561 				if (valid > tval)
1562 					valid -= tval;
1563 				else
1564 					valid = 0;
1565 			}
1566 		}
1567 	} else {
1568 		preferred = INFINITY_LIFE_TIME;
1569 		valid = INFINITY_LIFE_TIME;
1570 	}
1571 	if ((ifa->ifa_address &&
1572 	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1573 	    (ifa->ifa_local &&
1574 	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1575 	    (ifa->ifa_broadcast &&
1576 	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1577 	    (ifa->ifa_label[0] &&
1578 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1579 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1580 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1581 			  preferred, valid))
1582 		goto nla_put_failure;
1583 
1584 	nlmsg_end(skb, nlh);
1585 	return 0;
1586 
1587 nla_put_failure:
1588 	nlmsg_cancel(skb, nlh);
1589 	return -EMSGSIZE;
1590 }
1591 
1592 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1593 {
1594 	struct net *net = sock_net(skb->sk);
1595 	int h, s_h;
1596 	int idx, s_idx;
1597 	int ip_idx, s_ip_idx;
1598 	struct net_device *dev;
1599 	struct in_device *in_dev;
1600 	struct in_ifaddr *ifa;
1601 	struct hlist_head *head;
1602 
1603 	s_h = cb->args[0];
1604 	s_idx = idx = cb->args[1];
1605 	s_ip_idx = ip_idx = cb->args[2];
1606 
1607 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1608 		idx = 0;
1609 		head = &net->dev_index_head[h];
1610 		rcu_read_lock();
1611 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1612 			  net->dev_base_seq;
1613 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1614 			if (idx < s_idx)
1615 				goto cont;
1616 			if (h > s_h || idx > s_idx)
1617 				s_ip_idx = 0;
1618 			in_dev = __in_dev_get_rcu(dev);
1619 			if (!in_dev)
1620 				goto cont;
1621 
1622 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1623 			     ifa = ifa->ifa_next, ip_idx++) {
1624 				if (ip_idx < s_ip_idx)
1625 					continue;
1626 				if (inet_fill_ifaddr(skb, ifa,
1627 					     NETLINK_CB(cb->skb).portid,
1628 					     cb->nlh->nlmsg_seq,
1629 					     RTM_NEWADDR, NLM_F_MULTI) < 0) {
1630 					rcu_read_unlock();
1631 					goto done;
1632 				}
1633 				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1634 			}
1635 cont:
1636 			idx++;
1637 		}
1638 		rcu_read_unlock();
1639 	}
1640 
1641 done:
1642 	cb->args[0] = h;
1643 	cb->args[1] = idx;
1644 	cb->args[2] = ip_idx;
1645 
1646 	return skb->len;
1647 }
1648 
1649 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1650 		      u32 portid)
1651 {
1652 	struct sk_buff *skb;
1653 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1654 	int err = -ENOBUFS;
1655 	struct net *net;
1656 
1657 	net = dev_net(ifa->ifa_dev->dev);
1658 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1659 	if (!skb)
1660 		goto errout;
1661 
1662 	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1663 	if (err < 0) {
1664 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1665 		WARN_ON(err == -EMSGSIZE);
1666 		kfree_skb(skb);
1667 		goto errout;
1668 	}
1669 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1670 	return;
1671 errout:
1672 	if (err < 0)
1673 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1674 }
1675 
1676 static size_t inet_get_link_af_size(const struct net_device *dev,
1677 				    u32 ext_filter_mask)
1678 {
1679 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1680 
1681 	if (!in_dev)
1682 		return 0;
1683 
1684 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1685 }
1686 
1687 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1688 			     u32 ext_filter_mask)
1689 {
1690 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1691 	struct nlattr *nla;
1692 	int i;
1693 
1694 	if (!in_dev)
1695 		return -ENODATA;
1696 
1697 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1698 	if (!nla)
1699 		return -EMSGSIZE;
1700 
1701 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1702 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1703 
1704 	return 0;
1705 }
1706 
1707 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1708 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1709 };
1710 
1711 static int inet_validate_link_af(const struct net_device *dev,
1712 				 const struct nlattr *nla)
1713 {
1714 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1715 	int err, rem;
1716 
1717 	if (dev && !__in_dev_get_rtnl(dev))
1718 		return -EAFNOSUPPORT;
1719 
1720 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1721 	if (err < 0)
1722 		return err;
1723 
1724 	if (tb[IFLA_INET_CONF]) {
1725 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1726 			int cfgid = nla_type(a);
1727 
1728 			if (nla_len(a) < 4)
1729 				return -EINVAL;
1730 
1731 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1732 				return -EINVAL;
1733 		}
1734 	}
1735 
1736 	return 0;
1737 }
1738 
1739 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1740 {
1741 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1742 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1743 	int rem;
1744 
1745 	if (!in_dev)
1746 		return -EAFNOSUPPORT;
1747 
1748 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1749 		BUG();
1750 
1751 	if (tb[IFLA_INET_CONF]) {
1752 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1753 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1754 	}
1755 
1756 	return 0;
1757 }
1758 
1759 static int inet_netconf_msgsize_devconf(int type)
1760 {
1761 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1762 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1763 	bool all = false;
1764 
1765 	if (type == NETCONFA_ALL)
1766 		all = true;
1767 
1768 	if (all || type == NETCONFA_FORWARDING)
1769 		size += nla_total_size(4);
1770 	if (all || type == NETCONFA_RP_FILTER)
1771 		size += nla_total_size(4);
1772 	if (all || type == NETCONFA_MC_FORWARDING)
1773 		size += nla_total_size(4);
1774 	if (all || type == NETCONFA_PROXY_NEIGH)
1775 		size += nla_total_size(4);
1776 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1777 		size += nla_total_size(4);
1778 
1779 	return size;
1780 }
1781 
1782 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1783 				     struct ipv4_devconf *devconf, u32 portid,
1784 				     u32 seq, int event, unsigned int flags,
1785 				     int type)
1786 {
1787 	struct nlmsghdr  *nlh;
1788 	struct netconfmsg *ncm;
1789 	bool all = false;
1790 
1791 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1792 			flags);
1793 	if (!nlh)
1794 		return -EMSGSIZE;
1795 
1796 	if (type == NETCONFA_ALL)
1797 		all = true;
1798 
1799 	ncm = nlmsg_data(nlh);
1800 	ncm->ncm_family = AF_INET;
1801 
1802 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1803 		goto nla_put_failure;
1804 
1805 	if (!devconf)
1806 		goto out;
1807 
1808 	if ((all || type == NETCONFA_FORWARDING) &&
1809 	    nla_put_s32(skb, NETCONFA_FORWARDING,
1810 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1811 		goto nla_put_failure;
1812 	if ((all || type == NETCONFA_RP_FILTER) &&
1813 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1814 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1815 		goto nla_put_failure;
1816 	if ((all || type == NETCONFA_MC_FORWARDING) &&
1817 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1818 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1819 		goto nla_put_failure;
1820 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
1821 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1822 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1823 		goto nla_put_failure;
1824 	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1825 	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1826 			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1827 		goto nla_put_failure;
1828 
1829 out:
1830 	nlmsg_end(skb, nlh);
1831 	return 0;
1832 
1833 nla_put_failure:
1834 	nlmsg_cancel(skb, nlh);
1835 	return -EMSGSIZE;
1836 }
1837 
1838 void inet_netconf_notify_devconf(struct net *net, int event, int type,
1839 				 int ifindex, struct ipv4_devconf *devconf)
1840 {
1841 	struct sk_buff *skb;
1842 	int err = -ENOBUFS;
1843 
1844 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
1845 	if (!skb)
1846 		goto errout;
1847 
1848 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1849 					event, 0, type);
1850 	if (err < 0) {
1851 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1852 		WARN_ON(err == -EMSGSIZE);
1853 		kfree_skb(skb);
1854 		goto errout;
1855 	}
1856 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
1857 	return;
1858 errout:
1859 	if (err < 0)
1860 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1861 }
1862 
1863 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1864 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1865 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1866 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1867 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
1868 	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
1869 };
1870 
1871 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1872 				    struct nlmsghdr *nlh)
1873 {
1874 	struct net *net = sock_net(in_skb->sk);
1875 	struct nlattr *tb[NETCONFA_MAX+1];
1876 	struct netconfmsg *ncm;
1877 	struct sk_buff *skb;
1878 	struct ipv4_devconf *devconf;
1879 	struct in_device *in_dev;
1880 	struct net_device *dev;
1881 	int ifindex;
1882 	int err;
1883 
1884 	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1885 			  devconf_ipv4_policy);
1886 	if (err < 0)
1887 		goto errout;
1888 
1889 	err = -EINVAL;
1890 	if (!tb[NETCONFA_IFINDEX])
1891 		goto errout;
1892 
1893 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1894 	switch (ifindex) {
1895 	case NETCONFA_IFINDEX_ALL:
1896 		devconf = net->ipv4.devconf_all;
1897 		break;
1898 	case NETCONFA_IFINDEX_DEFAULT:
1899 		devconf = net->ipv4.devconf_dflt;
1900 		break;
1901 	default:
1902 		dev = __dev_get_by_index(net, ifindex);
1903 		if (!dev)
1904 			goto errout;
1905 		in_dev = __in_dev_get_rtnl(dev);
1906 		if (!in_dev)
1907 			goto errout;
1908 		devconf = &in_dev->cnf;
1909 		break;
1910 	}
1911 
1912 	err = -ENOBUFS;
1913 	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
1914 	if (!skb)
1915 		goto errout;
1916 
1917 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1918 					NETLINK_CB(in_skb).portid,
1919 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1920 					NETCONFA_ALL);
1921 	if (err < 0) {
1922 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1923 		WARN_ON(err == -EMSGSIZE);
1924 		kfree_skb(skb);
1925 		goto errout;
1926 	}
1927 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1928 errout:
1929 	return err;
1930 }
1931 
1932 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1933 				     struct netlink_callback *cb)
1934 {
1935 	struct net *net = sock_net(skb->sk);
1936 	int h, s_h;
1937 	int idx, s_idx;
1938 	struct net_device *dev;
1939 	struct in_device *in_dev;
1940 	struct hlist_head *head;
1941 
1942 	s_h = cb->args[0];
1943 	s_idx = idx = cb->args[1];
1944 
1945 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1946 		idx = 0;
1947 		head = &net->dev_index_head[h];
1948 		rcu_read_lock();
1949 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1950 			  net->dev_base_seq;
1951 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1952 			if (idx < s_idx)
1953 				goto cont;
1954 			in_dev = __in_dev_get_rcu(dev);
1955 			if (!in_dev)
1956 				goto cont;
1957 
1958 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
1959 						      &in_dev->cnf,
1960 						      NETLINK_CB(cb->skb).portid,
1961 						      cb->nlh->nlmsg_seq,
1962 						      RTM_NEWNETCONF,
1963 						      NLM_F_MULTI,
1964 						      NETCONFA_ALL) < 0) {
1965 				rcu_read_unlock();
1966 				goto done;
1967 			}
1968 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1969 cont:
1970 			idx++;
1971 		}
1972 		rcu_read_unlock();
1973 	}
1974 	if (h == NETDEV_HASHENTRIES) {
1975 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1976 					      net->ipv4.devconf_all,
1977 					      NETLINK_CB(cb->skb).portid,
1978 					      cb->nlh->nlmsg_seq,
1979 					      RTM_NEWNETCONF, NLM_F_MULTI,
1980 					      NETCONFA_ALL) < 0)
1981 			goto done;
1982 		else
1983 			h++;
1984 	}
1985 	if (h == NETDEV_HASHENTRIES + 1) {
1986 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1987 					      net->ipv4.devconf_dflt,
1988 					      NETLINK_CB(cb->skb).portid,
1989 					      cb->nlh->nlmsg_seq,
1990 					      RTM_NEWNETCONF, NLM_F_MULTI,
1991 					      NETCONFA_ALL) < 0)
1992 			goto done;
1993 		else
1994 			h++;
1995 	}
1996 done:
1997 	cb->args[0] = h;
1998 	cb->args[1] = idx;
1999 
2000 	return skb->len;
2001 }
2002 
2003 #ifdef CONFIG_SYSCTL
2004 
2005 static void devinet_copy_dflt_conf(struct net *net, int i)
2006 {
2007 	struct net_device *dev;
2008 
2009 	rcu_read_lock();
2010 	for_each_netdev_rcu(net, dev) {
2011 		struct in_device *in_dev;
2012 
2013 		in_dev = __in_dev_get_rcu(dev);
2014 		if (in_dev && !test_bit(i, in_dev->cnf.state))
2015 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2016 	}
2017 	rcu_read_unlock();
2018 }
2019 
2020 /* called with RTNL locked */
2021 static void inet_forward_change(struct net *net)
2022 {
2023 	struct net_device *dev;
2024 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2025 
2026 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2027 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2028 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2029 				    NETCONFA_FORWARDING,
2030 				    NETCONFA_IFINDEX_ALL,
2031 				    net->ipv4.devconf_all);
2032 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2033 				    NETCONFA_FORWARDING,
2034 				    NETCONFA_IFINDEX_DEFAULT,
2035 				    net->ipv4.devconf_dflt);
2036 
2037 	for_each_netdev(net, dev) {
2038 		struct in_device *in_dev;
2039 
2040 		if (on)
2041 			dev_disable_lro(dev);
2042 
2043 		in_dev = __in_dev_get_rtnl(dev);
2044 		if (in_dev) {
2045 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2046 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2047 						    NETCONFA_FORWARDING,
2048 						    dev->ifindex, &in_dev->cnf);
2049 		}
2050 	}
2051 }
2052 
2053 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2054 {
2055 	if (cnf == net->ipv4.devconf_dflt)
2056 		return NETCONFA_IFINDEX_DEFAULT;
2057 	else if (cnf == net->ipv4.devconf_all)
2058 		return NETCONFA_IFINDEX_ALL;
2059 	else {
2060 		struct in_device *idev
2061 			= container_of(cnf, struct in_device, cnf);
2062 		return idev->dev->ifindex;
2063 	}
2064 }
2065 
2066 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2067 			     void __user *buffer,
2068 			     size_t *lenp, loff_t *ppos)
2069 {
2070 	int old_value = *(int *)ctl->data;
2071 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2072 	int new_value = *(int *)ctl->data;
2073 
2074 	if (write) {
2075 		struct ipv4_devconf *cnf = ctl->extra1;
2076 		struct net *net = ctl->extra2;
2077 		int i = (int *)ctl->data - cnf->data;
2078 		int ifindex;
2079 
2080 		set_bit(i, cnf->state);
2081 
2082 		if (cnf == net->ipv4.devconf_dflt)
2083 			devinet_copy_dflt_conf(net, i);
2084 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2085 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2086 			if ((new_value == 0) && (old_value != 0))
2087 				rt_cache_flush(net);
2088 
2089 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2090 		    new_value != old_value) {
2091 			ifindex = devinet_conf_ifindex(net, cnf);
2092 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2093 						    NETCONFA_RP_FILTER,
2094 						    ifindex, cnf);
2095 		}
2096 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2097 		    new_value != old_value) {
2098 			ifindex = devinet_conf_ifindex(net, cnf);
2099 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2100 						    NETCONFA_PROXY_NEIGH,
2101 						    ifindex, cnf);
2102 		}
2103 		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2104 		    new_value != old_value) {
2105 			ifindex = devinet_conf_ifindex(net, cnf);
2106 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2107 						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2108 						    ifindex, cnf);
2109 		}
2110 	}
2111 
2112 	return ret;
2113 }
2114 
2115 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2116 				  void __user *buffer,
2117 				  size_t *lenp, loff_t *ppos)
2118 {
2119 	int *valp = ctl->data;
2120 	int val = *valp;
2121 	loff_t pos = *ppos;
2122 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2123 
2124 	if (write && *valp != val) {
2125 		struct net *net = ctl->extra2;
2126 
2127 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2128 			if (!rtnl_trylock()) {
2129 				/* Restore the original values before restarting */
2130 				*valp = val;
2131 				*ppos = pos;
2132 				return restart_syscall();
2133 			}
2134 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2135 				inet_forward_change(net);
2136 			} else {
2137 				struct ipv4_devconf *cnf = ctl->extra1;
2138 				struct in_device *idev =
2139 					container_of(cnf, struct in_device, cnf);
2140 				if (*valp)
2141 					dev_disable_lro(idev->dev);
2142 				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2143 							    NETCONFA_FORWARDING,
2144 							    idev->dev->ifindex,
2145 							    cnf);
2146 			}
2147 			rtnl_unlock();
2148 			rt_cache_flush(net);
2149 		} else
2150 			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2151 						    NETCONFA_FORWARDING,
2152 						    NETCONFA_IFINDEX_DEFAULT,
2153 						    net->ipv4.devconf_dflt);
2154 	}
2155 
2156 	return ret;
2157 }
2158 
2159 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2160 				void __user *buffer,
2161 				size_t *lenp, loff_t *ppos)
2162 {
2163 	int *valp = ctl->data;
2164 	int val = *valp;
2165 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2166 	struct net *net = ctl->extra2;
2167 
2168 	if (write && *valp != val)
2169 		rt_cache_flush(net);
2170 
2171 	return ret;
2172 }
2173 
2174 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2175 	{ \
2176 		.procname	= name, \
2177 		.data		= ipv4_devconf.data + \
2178 				  IPV4_DEVCONF_ ## attr - 1, \
2179 		.maxlen		= sizeof(int), \
2180 		.mode		= mval, \
2181 		.proc_handler	= proc, \
2182 		.extra1		= &ipv4_devconf, \
2183 	}
2184 
2185 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2186 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2187 
2188 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2189 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2190 
2191 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2192 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2193 
2194 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2195 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2196 
2197 static struct devinet_sysctl_table {
2198 	struct ctl_table_header *sysctl_header;
2199 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2200 } devinet_sysctl = {
2201 	.devinet_vars = {
2202 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2203 					     devinet_sysctl_forward),
2204 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2205 
2206 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2207 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2208 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2209 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2210 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2211 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2212 					"accept_source_route"),
2213 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2214 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2215 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2216 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2217 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2218 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2219 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2220 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2221 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2222 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2223 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2224 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2225 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2226 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2227 					"force_igmp_version"),
2228 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2229 					"igmpv2_unsolicited_report_interval"),
2230 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2231 					"igmpv3_unsolicited_report_interval"),
2232 		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2233 					"ignore_routes_with_linkdown"),
2234 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2235 					"drop_gratuitous_arp"),
2236 
2237 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2238 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2239 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2240 					      "promote_secondaries"),
2241 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2242 					      "route_localnet"),
2243 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2244 					      "drop_unicast_in_l2_multicast"),
2245 	},
2246 };
2247 
2248 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2249 				     int ifindex, struct ipv4_devconf *p)
2250 {
2251 	int i;
2252 	struct devinet_sysctl_table *t;
2253 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2254 
2255 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2256 	if (!t)
2257 		goto out;
2258 
2259 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2260 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2261 		t->devinet_vars[i].extra1 = p;
2262 		t->devinet_vars[i].extra2 = net;
2263 	}
2264 
2265 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2266 
2267 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2268 	if (!t->sysctl_header)
2269 		goto free;
2270 
2271 	p->sysctl = t;
2272 
2273 	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2274 				    ifindex, p);
2275 	return 0;
2276 
2277 free:
2278 	kfree(t);
2279 out:
2280 	return -ENOBUFS;
2281 }
2282 
2283 static void __devinet_sysctl_unregister(struct net *net,
2284 					struct ipv4_devconf *cnf, int ifindex)
2285 {
2286 	struct devinet_sysctl_table *t = cnf->sysctl;
2287 
2288 	if (t) {
2289 		cnf->sysctl = NULL;
2290 		unregister_net_sysctl_table(t->sysctl_header);
2291 		kfree(t);
2292 	}
2293 
2294 	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2295 }
2296 
2297 static int devinet_sysctl_register(struct in_device *idev)
2298 {
2299 	int err;
2300 
2301 	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2302 		return -EINVAL;
2303 
2304 	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2305 	if (err)
2306 		return err;
2307 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2308 					idev->dev->ifindex, &idev->cnf);
2309 	if (err)
2310 		neigh_sysctl_unregister(idev->arp_parms);
2311 	return err;
2312 }
2313 
2314 static void devinet_sysctl_unregister(struct in_device *idev)
2315 {
2316 	struct net *net = dev_net(idev->dev);
2317 
2318 	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2319 	neigh_sysctl_unregister(idev->arp_parms);
2320 }
2321 
2322 static struct ctl_table ctl_forward_entry[] = {
2323 	{
2324 		.procname	= "ip_forward",
2325 		.data		= &ipv4_devconf.data[
2326 					IPV4_DEVCONF_FORWARDING - 1],
2327 		.maxlen		= sizeof(int),
2328 		.mode		= 0644,
2329 		.proc_handler	= devinet_sysctl_forward,
2330 		.extra1		= &ipv4_devconf,
2331 		.extra2		= &init_net,
2332 	},
2333 	{ },
2334 };
2335 #endif
2336 
2337 static __net_init int devinet_init_net(struct net *net)
2338 {
2339 	int err;
2340 	struct ipv4_devconf *all, *dflt;
2341 #ifdef CONFIG_SYSCTL
2342 	struct ctl_table *tbl = ctl_forward_entry;
2343 	struct ctl_table_header *forw_hdr;
2344 #endif
2345 
2346 	err = -ENOMEM;
2347 	all = &ipv4_devconf;
2348 	dflt = &ipv4_devconf_dflt;
2349 
2350 	if (!net_eq(net, &init_net)) {
2351 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2352 		if (!all)
2353 			goto err_alloc_all;
2354 
2355 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2356 		if (!dflt)
2357 			goto err_alloc_dflt;
2358 
2359 #ifdef CONFIG_SYSCTL
2360 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2361 		if (!tbl)
2362 			goto err_alloc_ctl;
2363 
2364 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2365 		tbl[0].extra1 = all;
2366 		tbl[0].extra2 = net;
2367 #endif
2368 	}
2369 
2370 #ifdef CONFIG_SYSCTL
2371 	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2372 	if (err < 0)
2373 		goto err_reg_all;
2374 
2375 	err = __devinet_sysctl_register(net, "default",
2376 					NETCONFA_IFINDEX_DEFAULT, dflt);
2377 	if (err < 0)
2378 		goto err_reg_dflt;
2379 
2380 	err = -ENOMEM;
2381 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2382 	if (!forw_hdr)
2383 		goto err_reg_ctl;
2384 	net->ipv4.forw_hdr = forw_hdr;
2385 #endif
2386 
2387 	net->ipv4.devconf_all = all;
2388 	net->ipv4.devconf_dflt = dflt;
2389 	return 0;
2390 
2391 #ifdef CONFIG_SYSCTL
2392 err_reg_ctl:
2393 	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2394 err_reg_dflt:
2395 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2396 err_reg_all:
2397 	if (tbl != ctl_forward_entry)
2398 		kfree(tbl);
2399 err_alloc_ctl:
2400 #endif
2401 	if (dflt != &ipv4_devconf_dflt)
2402 		kfree(dflt);
2403 err_alloc_dflt:
2404 	if (all != &ipv4_devconf)
2405 		kfree(all);
2406 err_alloc_all:
2407 	return err;
2408 }
2409 
2410 static __net_exit void devinet_exit_net(struct net *net)
2411 {
2412 #ifdef CONFIG_SYSCTL
2413 	struct ctl_table *tbl;
2414 
2415 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2416 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2417 	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2418 				    NETCONFA_IFINDEX_DEFAULT);
2419 	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2420 				    NETCONFA_IFINDEX_ALL);
2421 	kfree(tbl);
2422 #endif
2423 	kfree(net->ipv4.devconf_dflt);
2424 	kfree(net->ipv4.devconf_all);
2425 }
2426 
2427 static __net_initdata struct pernet_operations devinet_ops = {
2428 	.init = devinet_init_net,
2429 	.exit = devinet_exit_net,
2430 };
2431 
2432 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2433 	.family		  = AF_INET,
2434 	.fill_link_af	  = inet_fill_link_af,
2435 	.get_link_af_size = inet_get_link_af_size,
2436 	.validate_link_af = inet_validate_link_af,
2437 	.set_link_af	  = inet_set_link_af,
2438 };
2439 
2440 void __init devinet_init(void)
2441 {
2442 	int i;
2443 
2444 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2445 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2446 
2447 	register_pernet_subsys(&devinet_ops);
2448 
2449 	register_gifconf(PF_INET, inet_gifconf);
2450 	register_netdevice_notifier(&ip_netdev_notifier);
2451 
2452 	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2453 
2454 	rtnl_af_register(&inet_af_ops);
2455 
2456 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2457 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2458 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2459 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2460 		      inet_netconf_dump_devconf, NULL);
2461 }
2462