xref: /linux/net/ipv4/devinet.c (revision a81ab36bf52d0ca3a32251a923be1dbced726141)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59 
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67 
68 #include "fib_lookup.h"
69 
70 static struct ipv4_devconf ipv4_devconf = {
71 	.data = {
72 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78 	},
79 };
80 
81 static struct ipv4_devconf ipv4_devconf_dflt = {
82 	.data = {
83 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
86 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
87 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
90 	},
91 };
92 
93 #define IPV4_DEVCONF_DFLT(net, attr) \
94 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
95 
96 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
97 	[IFA_LOCAL]     	= { .type = NLA_U32 },
98 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
99 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
100 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
102 	[IFA_FLAGS]		= { .type = NLA_U32 },
103 };
104 
105 #define IN4_ADDR_HSIZE_SHIFT	8
106 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
107 
108 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
109 static DEFINE_SPINLOCK(inet_addr_hash_lock);
110 
111 static u32 inet_addr_hash(struct net *net, __be32 addr)
112 {
113 	u32 val = (__force u32) addr ^ net_hash_mix(net);
114 
115 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
116 }
117 
118 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
119 {
120 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
121 
122 	spin_lock(&inet_addr_hash_lock);
123 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
124 	spin_unlock(&inet_addr_hash_lock);
125 }
126 
127 static void inet_hash_remove(struct in_ifaddr *ifa)
128 {
129 	spin_lock(&inet_addr_hash_lock);
130 	hlist_del_init_rcu(&ifa->hash);
131 	spin_unlock(&inet_addr_hash_lock);
132 }
133 
134 /**
135  * __ip_dev_find - find the first device with a given source address.
136  * @net: the net namespace
137  * @addr: the source address
138  * @devref: if true, take a reference on the found device
139  *
140  * If a caller uses devref=false, it should be protected by RCU, or RTNL
141  */
142 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
143 {
144 	u32 hash = inet_addr_hash(net, addr);
145 	struct net_device *result = NULL;
146 	struct in_ifaddr *ifa;
147 
148 	rcu_read_lock();
149 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
150 		if (ifa->ifa_local == addr) {
151 			struct net_device *dev = ifa->ifa_dev->dev;
152 
153 			if (!net_eq(dev_net(dev), net))
154 				continue;
155 			result = dev;
156 			break;
157 		}
158 	}
159 	if (!result) {
160 		struct flowi4 fl4 = { .daddr = addr };
161 		struct fib_result res = { 0 };
162 		struct fib_table *local;
163 
164 		/* Fallback to FIB local table so that communication
165 		 * over loopback subnets work.
166 		 */
167 		local = fib_get_table(net, RT_TABLE_LOCAL);
168 		if (local &&
169 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
170 		    res.type == RTN_LOCAL)
171 			result = FIB_RES_DEV(res);
172 	}
173 	if (result && devref)
174 		dev_hold(result);
175 	rcu_read_unlock();
176 	return result;
177 }
178 EXPORT_SYMBOL(__ip_dev_find);
179 
180 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
181 
182 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
183 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
184 			 int destroy);
185 #ifdef CONFIG_SYSCTL
186 static void devinet_sysctl_register(struct in_device *idev);
187 static void devinet_sysctl_unregister(struct in_device *idev);
188 #else
189 static void devinet_sysctl_register(struct in_device *idev)
190 {
191 }
192 static void devinet_sysctl_unregister(struct in_device *idev)
193 {
194 }
195 #endif
196 
197 /* Locks all the inet devices. */
198 
199 static struct in_ifaddr *inet_alloc_ifa(void)
200 {
201 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
202 }
203 
204 static void inet_rcu_free_ifa(struct rcu_head *head)
205 {
206 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
207 	if (ifa->ifa_dev)
208 		in_dev_put(ifa->ifa_dev);
209 	kfree(ifa);
210 }
211 
212 static void inet_free_ifa(struct in_ifaddr *ifa)
213 {
214 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
215 }
216 
217 void in_dev_finish_destroy(struct in_device *idev)
218 {
219 	struct net_device *dev = idev->dev;
220 
221 	WARN_ON(idev->ifa_list);
222 	WARN_ON(idev->mc_list);
223 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
224 #ifdef NET_REFCNT_DEBUG
225 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
226 #endif
227 	dev_put(dev);
228 	if (!idev->dead)
229 		pr_err("Freeing alive in_device %p\n", idev);
230 	else
231 		kfree(idev);
232 }
233 EXPORT_SYMBOL(in_dev_finish_destroy);
234 
235 static struct in_device *inetdev_init(struct net_device *dev)
236 {
237 	struct in_device *in_dev;
238 
239 	ASSERT_RTNL();
240 
241 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
242 	if (!in_dev)
243 		goto out;
244 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
245 			sizeof(in_dev->cnf));
246 	in_dev->cnf.sysctl = NULL;
247 	in_dev->dev = dev;
248 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
249 	if (!in_dev->arp_parms)
250 		goto out_kfree;
251 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
252 		dev_disable_lro(dev);
253 	/* Reference in_dev->dev */
254 	dev_hold(dev);
255 	/* Account for reference dev->ip_ptr (below) */
256 	in_dev_hold(in_dev);
257 
258 	devinet_sysctl_register(in_dev);
259 	ip_mc_init_dev(in_dev);
260 	if (dev->flags & IFF_UP)
261 		ip_mc_up(in_dev);
262 
263 	/* we can receive as soon as ip_ptr is set -- do this last */
264 	rcu_assign_pointer(dev->ip_ptr, in_dev);
265 out:
266 	return in_dev;
267 out_kfree:
268 	kfree(in_dev);
269 	in_dev = NULL;
270 	goto out;
271 }
272 
273 static void in_dev_rcu_put(struct rcu_head *head)
274 {
275 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
276 	in_dev_put(idev);
277 }
278 
279 static void inetdev_destroy(struct in_device *in_dev)
280 {
281 	struct in_ifaddr *ifa;
282 	struct net_device *dev;
283 
284 	ASSERT_RTNL();
285 
286 	dev = in_dev->dev;
287 
288 	in_dev->dead = 1;
289 
290 	ip_mc_destroy_dev(in_dev);
291 
292 	while ((ifa = in_dev->ifa_list) != NULL) {
293 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
294 		inet_free_ifa(ifa);
295 	}
296 
297 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
298 
299 	devinet_sysctl_unregister(in_dev);
300 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
301 	arp_ifdown(dev);
302 
303 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
304 }
305 
306 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
307 {
308 	rcu_read_lock();
309 	for_primary_ifa(in_dev) {
310 		if (inet_ifa_match(a, ifa)) {
311 			if (!b || inet_ifa_match(b, ifa)) {
312 				rcu_read_unlock();
313 				return 1;
314 			}
315 		}
316 	} endfor_ifa(in_dev);
317 	rcu_read_unlock();
318 	return 0;
319 }
320 
321 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
322 			 int destroy, struct nlmsghdr *nlh, u32 portid)
323 {
324 	struct in_ifaddr *promote = NULL;
325 	struct in_ifaddr *ifa, *ifa1 = *ifap;
326 	struct in_ifaddr *last_prim = in_dev->ifa_list;
327 	struct in_ifaddr *prev_prom = NULL;
328 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
329 
330 	ASSERT_RTNL();
331 
332 	/* 1. Deleting primary ifaddr forces deletion all secondaries
333 	 * unless alias promotion is set
334 	 **/
335 
336 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
337 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
338 
339 		while ((ifa = *ifap1) != NULL) {
340 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
341 			    ifa1->ifa_scope <= ifa->ifa_scope)
342 				last_prim = ifa;
343 
344 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
345 			    ifa1->ifa_mask != ifa->ifa_mask ||
346 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
347 				ifap1 = &ifa->ifa_next;
348 				prev_prom = ifa;
349 				continue;
350 			}
351 
352 			if (!do_promote) {
353 				inet_hash_remove(ifa);
354 				*ifap1 = ifa->ifa_next;
355 
356 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
357 				blocking_notifier_call_chain(&inetaddr_chain,
358 						NETDEV_DOWN, ifa);
359 				inet_free_ifa(ifa);
360 			} else {
361 				promote = ifa;
362 				break;
363 			}
364 		}
365 	}
366 
367 	/* On promotion all secondaries from subnet are changing
368 	 * the primary IP, we must remove all their routes silently
369 	 * and later to add them back with new prefsrc. Do this
370 	 * while all addresses are on the device list.
371 	 */
372 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
373 		if (ifa1->ifa_mask == ifa->ifa_mask &&
374 		    inet_ifa_match(ifa1->ifa_address, ifa))
375 			fib_del_ifaddr(ifa, ifa1);
376 	}
377 
378 	/* 2. Unlink it */
379 
380 	*ifap = ifa1->ifa_next;
381 	inet_hash_remove(ifa1);
382 
383 	/* 3. Announce address deletion */
384 
385 	/* Send message first, then call notifier.
386 	   At first sight, FIB update triggered by notifier
387 	   will refer to already deleted ifaddr, that could confuse
388 	   netlink listeners. It is not true: look, gated sees
389 	   that route deleted and if it still thinks that ifaddr
390 	   is valid, it will try to restore deleted routes... Grr.
391 	   So that, this order is correct.
392 	 */
393 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
394 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
395 
396 	if (promote) {
397 		struct in_ifaddr *next_sec = promote->ifa_next;
398 
399 		if (prev_prom) {
400 			prev_prom->ifa_next = promote->ifa_next;
401 			promote->ifa_next = last_prim->ifa_next;
402 			last_prim->ifa_next = promote;
403 		}
404 
405 		promote->ifa_flags &= ~IFA_F_SECONDARY;
406 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
407 		blocking_notifier_call_chain(&inetaddr_chain,
408 				NETDEV_UP, promote);
409 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
410 			if (ifa1->ifa_mask != ifa->ifa_mask ||
411 			    !inet_ifa_match(ifa1->ifa_address, ifa))
412 					continue;
413 			fib_add_ifaddr(ifa);
414 		}
415 
416 	}
417 	if (destroy)
418 		inet_free_ifa(ifa1);
419 }
420 
421 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
422 			 int destroy)
423 {
424 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
425 }
426 
427 static void check_lifetime(struct work_struct *work);
428 
429 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
430 
431 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
432 			     u32 portid)
433 {
434 	struct in_device *in_dev = ifa->ifa_dev;
435 	struct in_ifaddr *ifa1, **ifap, **last_primary;
436 
437 	ASSERT_RTNL();
438 
439 	if (!ifa->ifa_local) {
440 		inet_free_ifa(ifa);
441 		return 0;
442 	}
443 
444 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
445 	last_primary = &in_dev->ifa_list;
446 
447 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
448 	     ifap = &ifa1->ifa_next) {
449 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
450 		    ifa->ifa_scope <= ifa1->ifa_scope)
451 			last_primary = &ifa1->ifa_next;
452 		if (ifa1->ifa_mask == ifa->ifa_mask &&
453 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
454 			if (ifa1->ifa_local == ifa->ifa_local) {
455 				inet_free_ifa(ifa);
456 				return -EEXIST;
457 			}
458 			if (ifa1->ifa_scope != ifa->ifa_scope) {
459 				inet_free_ifa(ifa);
460 				return -EINVAL;
461 			}
462 			ifa->ifa_flags |= IFA_F_SECONDARY;
463 		}
464 	}
465 
466 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
467 		prandom_seed((__force u32) ifa->ifa_local);
468 		ifap = last_primary;
469 	}
470 
471 	ifa->ifa_next = *ifap;
472 	*ifap = ifa;
473 
474 	inet_hash_insert(dev_net(in_dev->dev), ifa);
475 
476 	cancel_delayed_work(&check_lifetime_work);
477 	schedule_delayed_work(&check_lifetime_work, 0);
478 
479 	/* Send message first, then call notifier.
480 	   Notifier will trigger FIB update, so that
481 	   listeners of netlink will know about new ifaddr */
482 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
483 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
484 
485 	return 0;
486 }
487 
488 static int inet_insert_ifa(struct in_ifaddr *ifa)
489 {
490 	return __inet_insert_ifa(ifa, NULL, 0);
491 }
492 
493 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
494 {
495 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
496 
497 	ASSERT_RTNL();
498 
499 	if (!in_dev) {
500 		inet_free_ifa(ifa);
501 		return -ENOBUFS;
502 	}
503 	ipv4_devconf_setall(in_dev);
504 	neigh_parms_data_state_setall(in_dev->arp_parms);
505 	if (ifa->ifa_dev != in_dev) {
506 		WARN_ON(ifa->ifa_dev);
507 		in_dev_hold(in_dev);
508 		ifa->ifa_dev = in_dev;
509 	}
510 	if (ipv4_is_loopback(ifa->ifa_local))
511 		ifa->ifa_scope = RT_SCOPE_HOST;
512 	return inet_insert_ifa(ifa);
513 }
514 
515 /* Caller must hold RCU or RTNL :
516  * We dont take a reference on found in_device
517  */
518 struct in_device *inetdev_by_index(struct net *net, int ifindex)
519 {
520 	struct net_device *dev;
521 	struct in_device *in_dev = NULL;
522 
523 	rcu_read_lock();
524 	dev = dev_get_by_index_rcu(net, ifindex);
525 	if (dev)
526 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
527 	rcu_read_unlock();
528 	return in_dev;
529 }
530 EXPORT_SYMBOL(inetdev_by_index);
531 
532 /* Called only from RTNL semaphored context. No locks. */
533 
534 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
535 				    __be32 mask)
536 {
537 	ASSERT_RTNL();
538 
539 	for_primary_ifa(in_dev) {
540 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
541 			return ifa;
542 	} endfor_ifa(in_dev);
543 	return NULL;
544 }
545 
546 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
547 {
548 	struct net *net = sock_net(skb->sk);
549 	struct nlattr *tb[IFA_MAX+1];
550 	struct in_device *in_dev;
551 	struct ifaddrmsg *ifm;
552 	struct in_ifaddr *ifa, **ifap;
553 	int err = -EINVAL;
554 
555 	ASSERT_RTNL();
556 
557 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
558 	if (err < 0)
559 		goto errout;
560 
561 	ifm = nlmsg_data(nlh);
562 	in_dev = inetdev_by_index(net, ifm->ifa_index);
563 	if (in_dev == NULL) {
564 		err = -ENODEV;
565 		goto errout;
566 	}
567 
568 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
569 	     ifap = &ifa->ifa_next) {
570 		if (tb[IFA_LOCAL] &&
571 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
572 			continue;
573 
574 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
575 			continue;
576 
577 		if (tb[IFA_ADDRESS] &&
578 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
579 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
580 			continue;
581 
582 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
583 		return 0;
584 	}
585 
586 	err = -EADDRNOTAVAIL;
587 errout:
588 	return err;
589 }
590 
591 #define INFINITY_LIFE_TIME	0xFFFFFFFF
592 
593 static void check_lifetime(struct work_struct *work)
594 {
595 	unsigned long now, next, next_sec, next_sched;
596 	struct in_ifaddr *ifa;
597 	struct hlist_node *n;
598 	int i;
599 
600 	now = jiffies;
601 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
602 
603 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
604 		bool change_needed = false;
605 
606 		rcu_read_lock();
607 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
608 			unsigned long age;
609 
610 			if (ifa->ifa_flags & IFA_F_PERMANENT)
611 				continue;
612 
613 			/* We try to batch several events at once. */
614 			age = (now - ifa->ifa_tstamp +
615 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
616 
617 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
618 			    age >= ifa->ifa_valid_lft) {
619 				change_needed = true;
620 			} else if (ifa->ifa_preferred_lft ==
621 				   INFINITY_LIFE_TIME) {
622 				continue;
623 			} else if (age >= ifa->ifa_preferred_lft) {
624 				if (time_before(ifa->ifa_tstamp +
625 						ifa->ifa_valid_lft * HZ, next))
626 					next = ifa->ifa_tstamp +
627 					       ifa->ifa_valid_lft * HZ;
628 
629 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
630 					change_needed = true;
631 			} else if (time_before(ifa->ifa_tstamp +
632 					       ifa->ifa_preferred_lft * HZ,
633 					       next)) {
634 				next = ifa->ifa_tstamp +
635 				       ifa->ifa_preferred_lft * HZ;
636 			}
637 		}
638 		rcu_read_unlock();
639 		if (!change_needed)
640 			continue;
641 		rtnl_lock();
642 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
643 			unsigned long age;
644 
645 			if (ifa->ifa_flags & IFA_F_PERMANENT)
646 				continue;
647 
648 			/* We try to batch several events at once. */
649 			age = (now - ifa->ifa_tstamp +
650 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
651 
652 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
653 			    age >= ifa->ifa_valid_lft) {
654 				struct in_ifaddr **ifap;
655 
656 				for (ifap = &ifa->ifa_dev->ifa_list;
657 				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
658 					if (*ifap == ifa) {
659 						inet_del_ifa(ifa->ifa_dev,
660 							     ifap, 1);
661 						break;
662 					}
663 				}
664 			} else if (ifa->ifa_preferred_lft !=
665 				   INFINITY_LIFE_TIME &&
666 				   age >= ifa->ifa_preferred_lft &&
667 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
668 				ifa->ifa_flags |= IFA_F_DEPRECATED;
669 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
670 			}
671 		}
672 		rtnl_unlock();
673 	}
674 
675 	next_sec = round_jiffies_up(next);
676 	next_sched = next;
677 
678 	/* If rounded timeout is accurate enough, accept it. */
679 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
680 		next_sched = next_sec;
681 
682 	now = jiffies;
683 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
684 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
685 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
686 
687 	schedule_delayed_work(&check_lifetime_work, next_sched - now);
688 }
689 
690 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
691 			     __u32 prefered_lft)
692 {
693 	unsigned long timeout;
694 
695 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
696 
697 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
698 	if (addrconf_finite_timeout(timeout))
699 		ifa->ifa_valid_lft = timeout;
700 	else
701 		ifa->ifa_flags |= IFA_F_PERMANENT;
702 
703 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
704 	if (addrconf_finite_timeout(timeout)) {
705 		if (timeout == 0)
706 			ifa->ifa_flags |= IFA_F_DEPRECATED;
707 		ifa->ifa_preferred_lft = timeout;
708 	}
709 	ifa->ifa_tstamp = jiffies;
710 	if (!ifa->ifa_cstamp)
711 		ifa->ifa_cstamp = ifa->ifa_tstamp;
712 }
713 
714 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
715 				       __u32 *pvalid_lft, __u32 *pprefered_lft)
716 {
717 	struct nlattr *tb[IFA_MAX+1];
718 	struct in_ifaddr *ifa;
719 	struct ifaddrmsg *ifm;
720 	struct net_device *dev;
721 	struct in_device *in_dev;
722 	int err;
723 
724 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
725 	if (err < 0)
726 		goto errout;
727 
728 	ifm = nlmsg_data(nlh);
729 	err = -EINVAL;
730 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
731 		goto errout;
732 
733 	dev = __dev_get_by_index(net, ifm->ifa_index);
734 	err = -ENODEV;
735 	if (dev == NULL)
736 		goto errout;
737 
738 	in_dev = __in_dev_get_rtnl(dev);
739 	err = -ENOBUFS;
740 	if (in_dev == NULL)
741 		goto errout;
742 
743 	ifa = inet_alloc_ifa();
744 	if (ifa == NULL)
745 		/*
746 		 * A potential indev allocation can be left alive, it stays
747 		 * assigned to its device and is destroy with it.
748 		 */
749 		goto errout;
750 
751 	ipv4_devconf_setall(in_dev);
752 	neigh_parms_data_state_setall(in_dev->arp_parms);
753 	in_dev_hold(in_dev);
754 
755 	if (tb[IFA_ADDRESS] == NULL)
756 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
757 
758 	INIT_HLIST_NODE(&ifa->hash);
759 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
760 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
761 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
762 					 ifm->ifa_flags;
763 	ifa->ifa_scope = ifm->ifa_scope;
764 	ifa->ifa_dev = in_dev;
765 
766 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
767 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
768 
769 	if (tb[IFA_BROADCAST])
770 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
771 
772 	if (tb[IFA_LABEL])
773 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
774 	else
775 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
776 
777 	if (tb[IFA_CACHEINFO]) {
778 		struct ifa_cacheinfo *ci;
779 
780 		ci = nla_data(tb[IFA_CACHEINFO]);
781 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
782 			err = -EINVAL;
783 			goto errout_free;
784 		}
785 		*pvalid_lft = ci->ifa_valid;
786 		*pprefered_lft = ci->ifa_prefered;
787 	}
788 
789 	return ifa;
790 
791 errout_free:
792 	inet_free_ifa(ifa);
793 errout:
794 	return ERR_PTR(err);
795 }
796 
797 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
798 {
799 	struct in_device *in_dev = ifa->ifa_dev;
800 	struct in_ifaddr *ifa1, **ifap;
801 
802 	if (!ifa->ifa_local)
803 		return NULL;
804 
805 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
806 	     ifap = &ifa1->ifa_next) {
807 		if (ifa1->ifa_mask == ifa->ifa_mask &&
808 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
809 		    ifa1->ifa_local == ifa->ifa_local)
810 			return ifa1;
811 	}
812 	return NULL;
813 }
814 
815 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
816 {
817 	struct net *net = sock_net(skb->sk);
818 	struct in_ifaddr *ifa;
819 	struct in_ifaddr *ifa_existing;
820 	__u32 valid_lft = INFINITY_LIFE_TIME;
821 	__u32 prefered_lft = INFINITY_LIFE_TIME;
822 
823 	ASSERT_RTNL();
824 
825 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
826 	if (IS_ERR(ifa))
827 		return PTR_ERR(ifa);
828 
829 	ifa_existing = find_matching_ifa(ifa);
830 	if (!ifa_existing) {
831 		/* It would be best to check for !NLM_F_CREATE here but
832 		 * userspace alreay relies on not having to provide this.
833 		 */
834 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
835 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
836 	} else {
837 		inet_free_ifa(ifa);
838 
839 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
840 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
841 			return -EEXIST;
842 		ifa = ifa_existing;
843 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
844 		cancel_delayed_work(&check_lifetime_work);
845 		schedule_delayed_work(&check_lifetime_work, 0);
846 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
847 		blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
848 	}
849 	return 0;
850 }
851 
852 /*
853  *	Determine a default network mask, based on the IP address.
854  */
855 
856 static int inet_abc_len(__be32 addr)
857 {
858 	int rc = -1;	/* Something else, probably a multicast. */
859 
860 	if (ipv4_is_zeronet(addr))
861 		rc = 0;
862 	else {
863 		__u32 haddr = ntohl(addr);
864 
865 		if (IN_CLASSA(haddr))
866 			rc = 8;
867 		else if (IN_CLASSB(haddr))
868 			rc = 16;
869 		else if (IN_CLASSC(haddr))
870 			rc = 24;
871 	}
872 
873 	return rc;
874 }
875 
876 
877 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
878 {
879 	struct ifreq ifr;
880 	struct sockaddr_in sin_orig;
881 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
882 	struct in_device *in_dev;
883 	struct in_ifaddr **ifap = NULL;
884 	struct in_ifaddr *ifa = NULL;
885 	struct net_device *dev;
886 	char *colon;
887 	int ret = -EFAULT;
888 	int tryaddrmatch = 0;
889 
890 	/*
891 	 *	Fetch the caller's info block into kernel space
892 	 */
893 
894 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
895 		goto out;
896 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
897 
898 	/* save original address for comparison */
899 	memcpy(&sin_orig, sin, sizeof(*sin));
900 
901 	colon = strchr(ifr.ifr_name, ':');
902 	if (colon)
903 		*colon = 0;
904 
905 	dev_load(net, ifr.ifr_name);
906 
907 	switch (cmd) {
908 	case SIOCGIFADDR:	/* Get interface address */
909 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
910 	case SIOCGIFDSTADDR:	/* Get the destination address */
911 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
912 		/* Note that these ioctls will not sleep,
913 		   so that we do not impose a lock.
914 		   One day we will be forced to put shlock here (I mean SMP)
915 		 */
916 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
917 		memset(sin, 0, sizeof(*sin));
918 		sin->sin_family = AF_INET;
919 		break;
920 
921 	case SIOCSIFFLAGS:
922 		ret = -EPERM;
923 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
924 			goto out;
925 		break;
926 	case SIOCSIFADDR:	/* Set interface address (and family) */
927 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
928 	case SIOCSIFDSTADDR:	/* Set the destination address */
929 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
930 		ret = -EPERM;
931 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
932 			goto out;
933 		ret = -EINVAL;
934 		if (sin->sin_family != AF_INET)
935 			goto out;
936 		break;
937 	default:
938 		ret = -EINVAL;
939 		goto out;
940 	}
941 
942 	rtnl_lock();
943 
944 	ret = -ENODEV;
945 	dev = __dev_get_by_name(net, ifr.ifr_name);
946 	if (!dev)
947 		goto done;
948 
949 	if (colon)
950 		*colon = ':';
951 
952 	in_dev = __in_dev_get_rtnl(dev);
953 	if (in_dev) {
954 		if (tryaddrmatch) {
955 			/* Matthias Andree */
956 			/* compare label and address (4.4BSD style) */
957 			/* note: we only do this for a limited set of ioctls
958 			   and only if the original address family was AF_INET.
959 			   This is checked above. */
960 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
961 			     ifap = &ifa->ifa_next) {
962 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
963 				    sin_orig.sin_addr.s_addr ==
964 							ifa->ifa_local) {
965 					break; /* found */
966 				}
967 			}
968 		}
969 		/* we didn't get a match, maybe the application is
970 		   4.3BSD-style and passed in junk so we fall back to
971 		   comparing just the label */
972 		if (!ifa) {
973 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
974 			     ifap = &ifa->ifa_next)
975 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
976 					break;
977 		}
978 	}
979 
980 	ret = -EADDRNOTAVAIL;
981 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
982 		goto done;
983 
984 	switch (cmd) {
985 	case SIOCGIFADDR:	/* Get interface address */
986 		sin->sin_addr.s_addr = ifa->ifa_local;
987 		goto rarok;
988 
989 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
990 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
991 		goto rarok;
992 
993 	case SIOCGIFDSTADDR:	/* Get the destination address */
994 		sin->sin_addr.s_addr = ifa->ifa_address;
995 		goto rarok;
996 
997 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
998 		sin->sin_addr.s_addr = ifa->ifa_mask;
999 		goto rarok;
1000 
1001 	case SIOCSIFFLAGS:
1002 		if (colon) {
1003 			ret = -EADDRNOTAVAIL;
1004 			if (!ifa)
1005 				break;
1006 			ret = 0;
1007 			if (!(ifr.ifr_flags & IFF_UP))
1008 				inet_del_ifa(in_dev, ifap, 1);
1009 			break;
1010 		}
1011 		ret = dev_change_flags(dev, ifr.ifr_flags);
1012 		break;
1013 
1014 	case SIOCSIFADDR:	/* Set interface address (and family) */
1015 		ret = -EINVAL;
1016 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1017 			break;
1018 
1019 		if (!ifa) {
1020 			ret = -ENOBUFS;
1021 			ifa = inet_alloc_ifa();
1022 			if (!ifa)
1023 				break;
1024 			INIT_HLIST_NODE(&ifa->hash);
1025 			if (colon)
1026 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1027 			else
1028 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1029 		} else {
1030 			ret = 0;
1031 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1032 				break;
1033 			inet_del_ifa(in_dev, ifap, 0);
1034 			ifa->ifa_broadcast = 0;
1035 			ifa->ifa_scope = 0;
1036 		}
1037 
1038 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1039 
1040 		if (!(dev->flags & IFF_POINTOPOINT)) {
1041 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1042 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1043 			if ((dev->flags & IFF_BROADCAST) &&
1044 			    ifa->ifa_prefixlen < 31)
1045 				ifa->ifa_broadcast = ifa->ifa_address |
1046 						     ~ifa->ifa_mask;
1047 		} else {
1048 			ifa->ifa_prefixlen = 32;
1049 			ifa->ifa_mask = inet_make_mask(32);
1050 		}
1051 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1052 		ret = inet_set_ifa(dev, ifa);
1053 		break;
1054 
1055 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1056 		ret = 0;
1057 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1058 			inet_del_ifa(in_dev, ifap, 0);
1059 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1060 			inet_insert_ifa(ifa);
1061 		}
1062 		break;
1063 
1064 	case SIOCSIFDSTADDR:	/* Set the destination address */
1065 		ret = 0;
1066 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1067 			break;
1068 		ret = -EINVAL;
1069 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1070 			break;
1071 		ret = 0;
1072 		inet_del_ifa(in_dev, ifap, 0);
1073 		ifa->ifa_address = sin->sin_addr.s_addr;
1074 		inet_insert_ifa(ifa);
1075 		break;
1076 
1077 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1078 
1079 		/*
1080 		 *	The mask we set must be legal.
1081 		 */
1082 		ret = -EINVAL;
1083 		if (bad_mask(sin->sin_addr.s_addr, 0))
1084 			break;
1085 		ret = 0;
1086 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1087 			__be32 old_mask = ifa->ifa_mask;
1088 			inet_del_ifa(in_dev, ifap, 0);
1089 			ifa->ifa_mask = sin->sin_addr.s_addr;
1090 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1091 
1092 			/* See if current broadcast address matches
1093 			 * with current netmask, then recalculate
1094 			 * the broadcast address. Otherwise it's a
1095 			 * funny address, so don't touch it since
1096 			 * the user seems to know what (s)he's doing...
1097 			 */
1098 			if ((dev->flags & IFF_BROADCAST) &&
1099 			    (ifa->ifa_prefixlen < 31) &&
1100 			    (ifa->ifa_broadcast ==
1101 			     (ifa->ifa_local|~old_mask))) {
1102 				ifa->ifa_broadcast = (ifa->ifa_local |
1103 						      ~sin->sin_addr.s_addr);
1104 			}
1105 			inet_insert_ifa(ifa);
1106 		}
1107 		break;
1108 	}
1109 done:
1110 	rtnl_unlock();
1111 out:
1112 	return ret;
1113 rarok:
1114 	rtnl_unlock();
1115 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1116 	goto out;
1117 }
1118 
1119 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1120 {
1121 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1122 	struct in_ifaddr *ifa;
1123 	struct ifreq ifr;
1124 	int done = 0;
1125 
1126 	if (!in_dev)
1127 		goto out;
1128 
1129 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1130 		if (!buf) {
1131 			done += sizeof(ifr);
1132 			continue;
1133 		}
1134 		if (len < (int) sizeof(ifr))
1135 			break;
1136 		memset(&ifr, 0, sizeof(struct ifreq));
1137 		strcpy(ifr.ifr_name, ifa->ifa_label);
1138 
1139 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1140 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1141 								ifa->ifa_local;
1142 
1143 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1144 			done = -EFAULT;
1145 			break;
1146 		}
1147 		buf  += sizeof(struct ifreq);
1148 		len  -= sizeof(struct ifreq);
1149 		done += sizeof(struct ifreq);
1150 	}
1151 out:
1152 	return done;
1153 }
1154 
1155 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1156 {
1157 	__be32 addr = 0;
1158 	struct in_device *in_dev;
1159 	struct net *net = dev_net(dev);
1160 
1161 	rcu_read_lock();
1162 	in_dev = __in_dev_get_rcu(dev);
1163 	if (!in_dev)
1164 		goto no_in_dev;
1165 
1166 	for_primary_ifa(in_dev) {
1167 		if (ifa->ifa_scope > scope)
1168 			continue;
1169 		if (!dst || inet_ifa_match(dst, ifa)) {
1170 			addr = ifa->ifa_local;
1171 			break;
1172 		}
1173 		if (!addr)
1174 			addr = ifa->ifa_local;
1175 	} endfor_ifa(in_dev);
1176 
1177 	if (addr)
1178 		goto out_unlock;
1179 no_in_dev:
1180 
1181 	/* Not loopback addresses on loopback should be preferred
1182 	   in this case. It is importnat that lo is the first interface
1183 	   in dev_base list.
1184 	 */
1185 	for_each_netdev_rcu(net, dev) {
1186 		in_dev = __in_dev_get_rcu(dev);
1187 		if (!in_dev)
1188 			continue;
1189 
1190 		for_primary_ifa(in_dev) {
1191 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
1192 			    ifa->ifa_scope <= scope) {
1193 				addr = ifa->ifa_local;
1194 				goto out_unlock;
1195 			}
1196 		} endfor_ifa(in_dev);
1197 	}
1198 out_unlock:
1199 	rcu_read_unlock();
1200 	return addr;
1201 }
1202 EXPORT_SYMBOL(inet_select_addr);
1203 
1204 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1205 			      __be32 local, int scope)
1206 {
1207 	int same = 0;
1208 	__be32 addr = 0;
1209 
1210 	for_ifa(in_dev) {
1211 		if (!addr &&
1212 		    (local == ifa->ifa_local || !local) &&
1213 		    ifa->ifa_scope <= scope) {
1214 			addr = ifa->ifa_local;
1215 			if (same)
1216 				break;
1217 		}
1218 		if (!same) {
1219 			same = (!local || inet_ifa_match(local, ifa)) &&
1220 				(!dst || inet_ifa_match(dst, ifa));
1221 			if (same && addr) {
1222 				if (local || !dst)
1223 					break;
1224 				/* Is the selected addr into dst subnet? */
1225 				if (inet_ifa_match(addr, ifa))
1226 					break;
1227 				/* No, then can we use new local src? */
1228 				if (ifa->ifa_scope <= scope) {
1229 					addr = ifa->ifa_local;
1230 					break;
1231 				}
1232 				/* search for large dst subnet for addr */
1233 				same = 0;
1234 			}
1235 		}
1236 	} endfor_ifa(in_dev);
1237 
1238 	return same ? addr : 0;
1239 }
1240 
1241 /*
1242  * Confirm that local IP address exists using wildcards:
1243  * - net: netns to check, cannot be NULL
1244  * - in_dev: only on this interface, NULL=any interface
1245  * - dst: only in the same subnet as dst, 0=any dst
1246  * - local: address, 0=autoselect the local address
1247  * - scope: maximum allowed scope value for the local address
1248  */
1249 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1250 			 __be32 dst, __be32 local, int scope)
1251 {
1252 	__be32 addr = 0;
1253 	struct net_device *dev;
1254 
1255 	if (in_dev != NULL)
1256 		return confirm_addr_indev(in_dev, dst, local, scope);
1257 
1258 	rcu_read_lock();
1259 	for_each_netdev_rcu(net, dev) {
1260 		in_dev = __in_dev_get_rcu(dev);
1261 		if (in_dev) {
1262 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1263 			if (addr)
1264 				break;
1265 		}
1266 	}
1267 	rcu_read_unlock();
1268 
1269 	return addr;
1270 }
1271 EXPORT_SYMBOL(inet_confirm_addr);
1272 
1273 /*
1274  *	Device notifier
1275  */
1276 
1277 int register_inetaddr_notifier(struct notifier_block *nb)
1278 {
1279 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1280 }
1281 EXPORT_SYMBOL(register_inetaddr_notifier);
1282 
1283 int unregister_inetaddr_notifier(struct notifier_block *nb)
1284 {
1285 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1286 }
1287 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1288 
1289 /* Rename ifa_labels for a device name change. Make some effort to preserve
1290  * existing alias numbering and to create unique labels if possible.
1291 */
1292 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1293 {
1294 	struct in_ifaddr *ifa;
1295 	int named = 0;
1296 
1297 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1298 		char old[IFNAMSIZ], *dot;
1299 
1300 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1301 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1302 		if (named++ == 0)
1303 			goto skip;
1304 		dot = strchr(old, ':');
1305 		if (dot == NULL) {
1306 			sprintf(old, ":%d", named);
1307 			dot = old;
1308 		}
1309 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1310 			strcat(ifa->ifa_label, dot);
1311 		else
1312 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1313 skip:
1314 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1315 	}
1316 }
1317 
1318 static bool inetdev_valid_mtu(unsigned int mtu)
1319 {
1320 	return mtu >= 68;
1321 }
1322 
1323 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1324 					struct in_device *in_dev)
1325 
1326 {
1327 	struct in_ifaddr *ifa;
1328 
1329 	for (ifa = in_dev->ifa_list; ifa;
1330 	     ifa = ifa->ifa_next) {
1331 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1332 			 ifa->ifa_local, dev,
1333 			 ifa->ifa_local, NULL,
1334 			 dev->dev_addr, NULL);
1335 	}
1336 }
1337 
1338 /* Called only under RTNL semaphore */
1339 
1340 static int inetdev_event(struct notifier_block *this, unsigned long event,
1341 			 void *ptr)
1342 {
1343 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1344 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1345 
1346 	ASSERT_RTNL();
1347 
1348 	if (!in_dev) {
1349 		if (event == NETDEV_REGISTER) {
1350 			in_dev = inetdev_init(dev);
1351 			if (!in_dev)
1352 				return notifier_from_errno(-ENOMEM);
1353 			if (dev->flags & IFF_LOOPBACK) {
1354 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1355 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1356 			}
1357 		} else if (event == NETDEV_CHANGEMTU) {
1358 			/* Re-enabling IP */
1359 			if (inetdev_valid_mtu(dev->mtu))
1360 				in_dev = inetdev_init(dev);
1361 		}
1362 		goto out;
1363 	}
1364 
1365 	switch (event) {
1366 	case NETDEV_REGISTER:
1367 		pr_debug("%s: bug\n", __func__);
1368 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1369 		break;
1370 	case NETDEV_UP:
1371 		if (!inetdev_valid_mtu(dev->mtu))
1372 			break;
1373 		if (dev->flags & IFF_LOOPBACK) {
1374 			struct in_ifaddr *ifa = inet_alloc_ifa();
1375 
1376 			if (ifa) {
1377 				INIT_HLIST_NODE(&ifa->hash);
1378 				ifa->ifa_local =
1379 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1380 				ifa->ifa_prefixlen = 8;
1381 				ifa->ifa_mask = inet_make_mask(8);
1382 				in_dev_hold(in_dev);
1383 				ifa->ifa_dev = in_dev;
1384 				ifa->ifa_scope = RT_SCOPE_HOST;
1385 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1386 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1387 						 INFINITY_LIFE_TIME);
1388 				ipv4_devconf_setall(in_dev);
1389 				neigh_parms_data_state_setall(in_dev->arp_parms);
1390 				inet_insert_ifa(ifa);
1391 			}
1392 		}
1393 		ip_mc_up(in_dev);
1394 		/* fall through */
1395 	case NETDEV_CHANGEADDR:
1396 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1397 			break;
1398 		/* fall through */
1399 	case NETDEV_NOTIFY_PEERS:
1400 		/* Send gratuitous ARP to notify of link change */
1401 		inetdev_send_gratuitous_arp(dev, in_dev);
1402 		break;
1403 	case NETDEV_DOWN:
1404 		ip_mc_down(in_dev);
1405 		break;
1406 	case NETDEV_PRE_TYPE_CHANGE:
1407 		ip_mc_unmap(in_dev);
1408 		break;
1409 	case NETDEV_POST_TYPE_CHANGE:
1410 		ip_mc_remap(in_dev);
1411 		break;
1412 	case NETDEV_CHANGEMTU:
1413 		if (inetdev_valid_mtu(dev->mtu))
1414 			break;
1415 		/* disable IP when MTU is not enough */
1416 	case NETDEV_UNREGISTER:
1417 		inetdev_destroy(in_dev);
1418 		break;
1419 	case NETDEV_CHANGENAME:
1420 		/* Do not notify about label change, this event is
1421 		 * not interesting to applications using netlink.
1422 		 */
1423 		inetdev_changename(dev, in_dev);
1424 
1425 		devinet_sysctl_unregister(in_dev);
1426 		devinet_sysctl_register(in_dev);
1427 		break;
1428 	}
1429 out:
1430 	return NOTIFY_DONE;
1431 }
1432 
1433 static struct notifier_block ip_netdev_notifier = {
1434 	.notifier_call = inetdev_event,
1435 };
1436 
1437 static size_t inet_nlmsg_size(void)
1438 {
1439 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1440 	       + nla_total_size(4) /* IFA_ADDRESS */
1441 	       + nla_total_size(4) /* IFA_LOCAL */
1442 	       + nla_total_size(4) /* IFA_BROADCAST */
1443 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1444 	       + nla_total_size(4);  /* IFA_FLAGS */
1445 }
1446 
1447 static inline u32 cstamp_delta(unsigned long cstamp)
1448 {
1449 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1450 }
1451 
1452 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1453 			 unsigned long tstamp, u32 preferred, u32 valid)
1454 {
1455 	struct ifa_cacheinfo ci;
1456 
1457 	ci.cstamp = cstamp_delta(cstamp);
1458 	ci.tstamp = cstamp_delta(tstamp);
1459 	ci.ifa_prefered = preferred;
1460 	ci.ifa_valid = valid;
1461 
1462 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1463 }
1464 
1465 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1466 			    u32 portid, u32 seq, int event, unsigned int flags)
1467 {
1468 	struct ifaddrmsg *ifm;
1469 	struct nlmsghdr  *nlh;
1470 	u32 preferred, valid;
1471 
1472 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1473 	if (nlh == NULL)
1474 		return -EMSGSIZE;
1475 
1476 	ifm = nlmsg_data(nlh);
1477 	ifm->ifa_family = AF_INET;
1478 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1479 	ifm->ifa_flags = ifa->ifa_flags;
1480 	ifm->ifa_scope = ifa->ifa_scope;
1481 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1482 
1483 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1484 		preferred = ifa->ifa_preferred_lft;
1485 		valid = ifa->ifa_valid_lft;
1486 		if (preferred != INFINITY_LIFE_TIME) {
1487 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1488 
1489 			if (preferred > tval)
1490 				preferred -= tval;
1491 			else
1492 				preferred = 0;
1493 			if (valid != INFINITY_LIFE_TIME) {
1494 				if (valid > tval)
1495 					valid -= tval;
1496 				else
1497 					valid = 0;
1498 			}
1499 		}
1500 	} else {
1501 		preferred = INFINITY_LIFE_TIME;
1502 		valid = INFINITY_LIFE_TIME;
1503 	}
1504 	if ((ifa->ifa_address &&
1505 	     nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1506 	    (ifa->ifa_local &&
1507 	     nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1508 	    (ifa->ifa_broadcast &&
1509 	     nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1510 	    (ifa->ifa_label[0] &&
1511 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1512 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1513 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1514 			  preferred, valid))
1515 		goto nla_put_failure;
1516 
1517 	return nlmsg_end(skb, nlh);
1518 
1519 nla_put_failure:
1520 	nlmsg_cancel(skb, nlh);
1521 	return -EMSGSIZE;
1522 }
1523 
1524 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1525 {
1526 	struct net *net = sock_net(skb->sk);
1527 	int h, s_h;
1528 	int idx, s_idx;
1529 	int ip_idx, s_ip_idx;
1530 	struct net_device *dev;
1531 	struct in_device *in_dev;
1532 	struct in_ifaddr *ifa;
1533 	struct hlist_head *head;
1534 
1535 	s_h = cb->args[0];
1536 	s_idx = idx = cb->args[1];
1537 	s_ip_idx = ip_idx = cb->args[2];
1538 
1539 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1540 		idx = 0;
1541 		head = &net->dev_index_head[h];
1542 		rcu_read_lock();
1543 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1544 			  net->dev_base_seq;
1545 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1546 			if (idx < s_idx)
1547 				goto cont;
1548 			if (h > s_h || idx > s_idx)
1549 				s_ip_idx = 0;
1550 			in_dev = __in_dev_get_rcu(dev);
1551 			if (!in_dev)
1552 				goto cont;
1553 
1554 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1555 			     ifa = ifa->ifa_next, ip_idx++) {
1556 				if (ip_idx < s_ip_idx)
1557 					continue;
1558 				if (inet_fill_ifaddr(skb, ifa,
1559 					     NETLINK_CB(cb->skb).portid,
1560 					     cb->nlh->nlmsg_seq,
1561 					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1562 					rcu_read_unlock();
1563 					goto done;
1564 				}
1565 				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1566 			}
1567 cont:
1568 			idx++;
1569 		}
1570 		rcu_read_unlock();
1571 	}
1572 
1573 done:
1574 	cb->args[0] = h;
1575 	cb->args[1] = idx;
1576 	cb->args[2] = ip_idx;
1577 
1578 	return skb->len;
1579 }
1580 
1581 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1582 		      u32 portid)
1583 {
1584 	struct sk_buff *skb;
1585 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1586 	int err = -ENOBUFS;
1587 	struct net *net;
1588 
1589 	net = dev_net(ifa->ifa_dev->dev);
1590 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1591 	if (skb == NULL)
1592 		goto errout;
1593 
1594 	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1595 	if (err < 0) {
1596 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1597 		WARN_ON(err == -EMSGSIZE);
1598 		kfree_skb(skb);
1599 		goto errout;
1600 	}
1601 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1602 	return;
1603 errout:
1604 	if (err < 0)
1605 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1606 }
1607 
1608 static size_t inet_get_link_af_size(const struct net_device *dev)
1609 {
1610 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1611 
1612 	if (!in_dev)
1613 		return 0;
1614 
1615 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1616 }
1617 
1618 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1619 {
1620 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1621 	struct nlattr *nla;
1622 	int i;
1623 
1624 	if (!in_dev)
1625 		return -ENODATA;
1626 
1627 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1628 	if (nla == NULL)
1629 		return -EMSGSIZE;
1630 
1631 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1632 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1633 
1634 	return 0;
1635 }
1636 
1637 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1638 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1639 };
1640 
1641 static int inet_validate_link_af(const struct net_device *dev,
1642 				 const struct nlattr *nla)
1643 {
1644 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1645 	int err, rem;
1646 
1647 	if (dev && !__in_dev_get_rtnl(dev))
1648 		return -EAFNOSUPPORT;
1649 
1650 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1651 	if (err < 0)
1652 		return err;
1653 
1654 	if (tb[IFLA_INET_CONF]) {
1655 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1656 			int cfgid = nla_type(a);
1657 
1658 			if (nla_len(a) < 4)
1659 				return -EINVAL;
1660 
1661 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1662 				return -EINVAL;
1663 		}
1664 	}
1665 
1666 	return 0;
1667 }
1668 
1669 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1670 {
1671 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1672 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1673 	int rem;
1674 
1675 	if (!in_dev)
1676 		return -EAFNOSUPPORT;
1677 
1678 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1679 		BUG();
1680 
1681 	if (tb[IFLA_INET_CONF]) {
1682 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1683 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1684 	}
1685 
1686 	return 0;
1687 }
1688 
1689 static int inet_netconf_msgsize_devconf(int type)
1690 {
1691 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1692 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1693 
1694 	/* type -1 is used for ALL */
1695 	if (type == -1 || type == NETCONFA_FORWARDING)
1696 		size += nla_total_size(4);
1697 	if (type == -1 || type == NETCONFA_RP_FILTER)
1698 		size += nla_total_size(4);
1699 	if (type == -1 || type == NETCONFA_MC_FORWARDING)
1700 		size += nla_total_size(4);
1701 	if (type == -1 || type == NETCONFA_PROXY_NEIGH)
1702 		size += nla_total_size(4);
1703 
1704 	return size;
1705 }
1706 
1707 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1708 				     struct ipv4_devconf *devconf, u32 portid,
1709 				     u32 seq, int event, unsigned int flags,
1710 				     int type)
1711 {
1712 	struct nlmsghdr  *nlh;
1713 	struct netconfmsg *ncm;
1714 
1715 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1716 			flags);
1717 	if (nlh == NULL)
1718 		return -EMSGSIZE;
1719 
1720 	ncm = nlmsg_data(nlh);
1721 	ncm->ncm_family = AF_INET;
1722 
1723 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1724 		goto nla_put_failure;
1725 
1726 	/* type -1 is used for ALL */
1727 	if ((type == -1 || type == NETCONFA_FORWARDING) &&
1728 	    nla_put_s32(skb, NETCONFA_FORWARDING,
1729 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1730 		goto nla_put_failure;
1731 	if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1732 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1733 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1734 		goto nla_put_failure;
1735 	if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1736 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1737 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1738 		goto nla_put_failure;
1739 	if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
1740 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1741 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1742 		goto nla_put_failure;
1743 
1744 	return nlmsg_end(skb, nlh);
1745 
1746 nla_put_failure:
1747 	nlmsg_cancel(skb, nlh);
1748 	return -EMSGSIZE;
1749 }
1750 
1751 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1752 				 struct ipv4_devconf *devconf)
1753 {
1754 	struct sk_buff *skb;
1755 	int err = -ENOBUFS;
1756 
1757 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1758 	if (skb == NULL)
1759 		goto errout;
1760 
1761 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1762 					RTM_NEWNETCONF, 0, type);
1763 	if (err < 0) {
1764 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1765 		WARN_ON(err == -EMSGSIZE);
1766 		kfree_skb(skb);
1767 		goto errout;
1768 	}
1769 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1770 	return;
1771 errout:
1772 	if (err < 0)
1773 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1774 }
1775 
1776 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1777 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1778 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1779 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1780 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
1781 };
1782 
1783 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1784 				    struct nlmsghdr *nlh)
1785 {
1786 	struct net *net = sock_net(in_skb->sk);
1787 	struct nlattr *tb[NETCONFA_MAX+1];
1788 	struct netconfmsg *ncm;
1789 	struct sk_buff *skb;
1790 	struct ipv4_devconf *devconf;
1791 	struct in_device *in_dev;
1792 	struct net_device *dev;
1793 	int ifindex;
1794 	int err;
1795 
1796 	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1797 			  devconf_ipv4_policy);
1798 	if (err < 0)
1799 		goto errout;
1800 
1801 	err = EINVAL;
1802 	if (!tb[NETCONFA_IFINDEX])
1803 		goto errout;
1804 
1805 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1806 	switch (ifindex) {
1807 	case NETCONFA_IFINDEX_ALL:
1808 		devconf = net->ipv4.devconf_all;
1809 		break;
1810 	case NETCONFA_IFINDEX_DEFAULT:
1811 		devconf = net->ipv4.devconf_dflt;
1812 		break;
1813 	default:
1814 		dev = __dev_get_by_index(net, ifindex);
1815 		if (dev == NULL)
1816 			goto errout;
1817 		in_dev = __in_dev_get_rtnl(dev);
1818 		if (in_dev == NULL)
1819 			goto errout;
1820 		devconf = &in_dev->cnf;
1821 		break;
1822 	}
1823 
1824 	err = -ENOBUFS;
1825 	skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1826 	if (skb == NULL)
1827 		goto errout;
1828 
1829 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1830 					NETLINK_CB(in_skb).portid,
1831 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1832 					-1);
1833 	if (err < 0) {
1834 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1835 		WARN_ON(err == -EMSGSIZE);
1836 		kfree_skb(skb);
1837 		goto errout;
1838 	}
1839 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1840 errout:
1841 	return err;
1842 }
1843 
1844 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1845 				     struct netlink_callback *cb)
1846 {
1847 	struct net *net = sock_net(skb->sk);
1848 	int h, s_h;
1849 	int idx, s_idx;
1850 	struct net_device *dev;
1851 	struct in_device *in_dev;
1852 	struct hlist_head *head;
1853 
1854 	s_h = cb->args[0];
1855 	s_idx = idx = cb->args[1];
1856 
1857 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1858 		idx = 0;
1859 		head = &net->dev_index_head[h];
1860 		rcu_read_lock();
1861 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1862 			  net->dev_base_seq;
1863 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1864 			if (idx < s_idx)
1865 				goto cont;
1866 			in_dev = __in_dev_get_rcu(dev);
1867 			if (!in_dev)
1868 				goto cont;
1869 
1870 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
1871 						      &in_dev->cnf,
1872 						      NETLINK_CB(cb->skb).portid,
1873 						      cb->nlh->nlmsg_seq,
1874 						      RTM_NEWNETCONF,
1875 						      NLM_F_MULTI,
1876 						      -1) <= 0) {
1877 				rcu_read_unlock();
1878 				goto done;
1879 			}
1880 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1881 cont:
1882 			idx++;
1883 		}
1884 		rcu_read_unlock();
1885 	}
1886 	if (h == NETDEV_HASHENTRIES) {
1887 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1888 					      net->ipv4.devconf_all,
1889 					      NETLINK_CB(cb->skb).portid,
1890 					      cb->nlh->nlmsg_seq,
1891 					      RTM_NEWNETCONF, NLM_F_MULTI,
1892 					      -1) <= 0)
1893 			goto done;
1894 		else
1895 			h++;
1896 	}
1897 	if (h == NETDEV_HASHENTRIES + 1) {
1898 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1899 					      net->ipv4.devconf_dflt,
1900 					      NETLINK_CB(cb->skb).portid,
1901 					      cb->nlh->nlmsg_seq,
1902 					      RTM_NEWNETCONF, NLM_F_MULTI,
1903 					      -1) <= 0)
1904 			goto done;
1905 		else
1906 			h++;
1907 	}
1908 done:
1909 	cb->args[0] = h;
1910 	cb->args[1] = idx;
1911 
1912 	return skb->len;
1913 }
1914 
1915 #ifdef CONFIG_SYSCTL
1916 
1917 static void devinet_copy_dflt_conf(struct net *net, int i)
1918 {
1919 	struct net_device *dev;
1920 
1921 	rcu_read_lock();
1922 	for_each_netdev_rcu(net, dev) {
1923 		struct in_device *in_dev;
1924 
1925 		in_dev = __in_dev_get_rcu(dev);
1926 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1927 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1928 	}
1929 	rcu_read_unlock();
1930 }
1931 
1932 /* called with RTNL locked */
1933 static void inet_forward_change(struct net *net)
1934 {
1935 	struct net_device *dev;
1936 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1937 
1938 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1939 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1940 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1941 				    NETCONFA_IFINDEX_ALL,
1942 				    net->ipv4.devconf_all);
1943 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1944 				    NETCONFA_IFINDEX_DEFAULT,
1945 				    net->ipv4.devconf_dflt);
1946 
1947 	for_each_netdev(net, dev) {
1948 		struct in_device *in_dev;
1949 		if (on)
1950 			dev_disable_lro(dev);
1951 		rcu_read_lock();
1952 		in_dev = __in_dev_get_rcu(dev);
1953 		if (in_dev) {
1954 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1955 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1956 						    dev->ifindex, &in_dev->cnf);
1957 		}
1958 		rcu_read_unlock();
1959 	}
1960 }
1961 
1962 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
1963 {
1964 	if (cnf == net->ipv4.devconf_dflt)
1965 		return NETCONFA_IFINDEX_DEFAULT;
1966 	else if (cnf == net->ipv4.devconf_all)
1967 		return NETCONFA_IFINDEX_ALL;
1968 	else {
1969 		struct in_device *idev
1970 			= container_of(cnf, struct in_device, cnf);
1971 		return idev->dev->ifindex;
1972 	}
1973 }
1974 
1975 static int devinet_conf_proc(struct ctl_table *ctl, int write,
1976 			     void __user *buffer,
1977 			     size_t *lenp, loff_t *ppos)
1978 {
1979 	int old_value = *(int *)ctl->data;
1980 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1981 	int new_value = *(int *)ctl->data;
1982 
1983 	if (write) {
1984 		struct ipv4_devconf *cnf = ctl->extra1;
1985 		struct net *net = ctl->extra2;
1986 		int i = (int *)ctl->data - cnf->data;
1987 		int ifindex;
1988 
1989 		set_bit(i, cnf->state);
1990 
1991 		if (cnf == net->ipv4.devconf_dflt)
1992 			devinet_copy_dflt_conf(net, i);
1993 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1994 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1995 			if ((new_value == 0) && (old_value != 0))
1996 				rt_cache_flush(net);
1997 
1998 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1999 		    new_value != old_value) {
2000 			ifindex = devinet_conf_ifindex(net, cnf);
2001 			inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2002 						    ifindex, cnf);
2003 		}
2004 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2005 		    new_value != old_value) {
2006 			ifindex = devinet_conf_ifindex(net, cnf);
2007 			inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2008 						    ifindex, cnf);
2009 		}
2010 	}
2011 
2012 	return ret;
2013 }
2014 
2015 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2016 				  void __user *buffer,
2017 				  size_t *lenp, loff_t *ppos)
2018 {
2019 	int *valp = ctl->data;
2020 	int val = *valp;
2021 	loff_t pos = *ppos;
2022 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2023 
2024 	if (write && *valp != val) {
2025 		struct net *net = ctl->extra2;
2026 
2027 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2028 			if (!rtnl_trylock()) {
2029 				/* Restore the original values before restarting */
2030 				*valp = val;
2031 				*ppos = pos;
2032 				return restart_syscall();
2033 			}
2034 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2035 				inet_forward_change(net);
2036 			} else {
2037 				struct ipv4_devconf *cnf = ctl->extra1;
2038 				struct in_device *idev =
2039 					container_of(cnf, struct in_device, cnf);
2040 				if (*valp)
2041 					dev_disable_lro(idev->dev);
2042 				inet_netconf_notify_devconf(net,
2043 							    NETCONFA_FORWARDING,
2044 							    idev->dev->ifindex,
2045 							    cnf);
2046 			}
2047 			rtnl_unlock();
2048 			rt_cache_flush(net);
2049 		} else
2050 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2051 						    NETCONFA_IFINDEX_DEFAULT,
2052 						    net->ipv4.devconf_dflt);
2053 	}
2054 
2055 	return ret;
2056 }
2057 
2058 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2059 				void __user *buffer,
2060 				size_t *lenp, loff_t *ppos)
2061 {
2062 	int *valp = ctl->data;
2063 	int val = *valp;
2064 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2065 	struct net *net = ctl->extra2;
2066 
2067 	if (write && *valp != val)
2068 		rt_cache_flush(net);
2069 
2070 	return ret;
2071 }
2072 
2073 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2074 	{ \
2075 		.procname	= name, \
2076 		.data		= ipv4_devconf.data + \
2077 				  IPV4_DEVCONF_ ## attr - 1, \
2078 		.maxlen		= sizeof(int), \
2079 		.mode		= mval, \
2080 		.proc_handler	= proc, \
2081 		.extra1		= &ipv4_devconf, \
2082 	}
2083 
2084 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2085 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2086 
2087 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2088 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2089 
2090 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2091 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2092 
2093 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2094 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2095 
2096 static struct devinet_sysctl_table {
2097 	struct ctl_table_header *sysctl_header;
2098 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2099 } devinet_sysctl = {
2100 	.devinet_vars = {
2101 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2102 					     devinet_sysctl_forward),
2103 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2104 
2105 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2106 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2107 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2108 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2109 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2110 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2111 					"accept_source_route"),
2112 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2113 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2114 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2115 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2116 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2117 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2118 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2119 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2120 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2121 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2122 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2123 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2124 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2125 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2126 					"force_igmp_version"),
2127 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2128 					"igmpv2_unsolicited_report_interval"),
2129 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2130 					"igmpv3_unsolicited_report_interval"),
2131 
2132 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2133 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2134 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2135 					      "promote_secondaries"),
2136 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2137 					      "route_localnet"),
2138 	},
2139 };
2140 
2141 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2142 					struct ipv4_devconf *p)
2143 {
2144 	int i;
2145 	struct devinet_sysctl_table *t;
2146 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2147 
2148 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2149 	if (!t)
2150 		goto out;
2151 
2152 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2153 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2154 		t->devinet_vars[i].extra1 = p;
2155 		t->devinet_vars[i].extra2 = net;
2156 	}
2157 
2158 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2159 
2160 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2161 	if (!t->sysctl_header)
2162 		goto free;
2163 
2164 	p->sysctl = t;
2165 	return 0;
2166 
2167 free:
2168 	kfree(t);
2169 out:
2170 	return -ENOBUFS;
2171 }
2172 
2173 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2174 {
2175 	struct devinet_sysctl_table *t = cnf->sysctl;
2176 
2177 	if (t == NULL)
2178 		return;
2179 
2180 	cnf->sysctl = NULL;
2181 	unregister_net_sysctl_table(t->sysctl_header);
2182 	kfree(t);
2183 }
2184 
2185 static void devinet_sysctl_register(struct in_device *idev)
2186 {
2187 	neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2188 	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2189 					&idev->cnf);
2190 }
2191 
2192 static void devinet_sysctl_unregister(struct in_device *idev)
2193 {
2194 	__devinet_sysctl_unregister(&idev->cnf);
2195 	neigh_sysctl_unregister(idev->arp_parms);
2196 }
2197 
2198 static struct ctl_table ctl_forward_entry[] = {
2199 	{
2200 		.procname	= "ip_forward",
2201 		.data		= &ipv4_devconf.data[
2202 					IPV4_DEVCONF_FORWARDING - 1],
2203 		.maxlen		= sizeof(int),
2204 		.mode		= 0644,
2205 		.proc_handler	= devinet_sysctl_forward,
2206 		.extra1		= &ipv4_devconf,
2207 		.extra2		= &init_net,
2208 	},
2209 	{ },
2210 };
2211 #endif
2212 
2213 static __net_init int devinet_init_net(struct net *net)
2214 {
2215 	int err;
2216 	struct ipv4_devconf *all, *dflt;
2217 #ifdef CONFIG_SYSCTL
2218 	struct ctl_table *tbl = ctl_forward_entry;
2219 	struct ctl_table_header *forw_hdr;
2220 #endif
2221 
2222 	err = -ENOMEM;
2223 	all = &ipv4_devconf;
2224 	dflt = &ipv4_devconf_dflt;
2225 
2226 	if (!net_eq(net, &init_net)) {
2227 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2228 		if (all == NULL)
2229 			goto err_alloc_all;
2230 
2231 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2232 		if (dflt == NULL)
2233 			goto err_alloc_dflt;
2234 
2235 #ifdef CONFIG_SYSCTL
2236 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2237 		if (tbl == NULL)
2238 			goto err_alloc_ctl;
2239 
2240 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2241 		tbl[0].extra1 = all;
2242 		tbl[0].extra2 = net;
2243 #endif
2244 	}
2245 
2246 #ifdef CONFIG_SYSCTL
2247 	err = __devinet_sysctl_register(net, "all", all);
2248 	if (err < 0)
2249 		goto err_reg_all;
2250 
2251 	err = __devinet_sysctl_register(net, "default", dflt);
2252 	if (err < 0)
2253 		goto err_reg_dflt;
2254 
2255 	err = -ENOMEM;
2256 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2257 	if (forw_hdr == NULL)
2258 		goto err_reg_ctl;
2259 	net->ipv4.forw_hdr = forw_hdr;
2260 #endif
2261 
2262 	net->ipv4.devconf_all = all;
2263 	net->ipv4.devconf_dflt = dflt;
2264 	return 0;
2265 
2266 #ifdef CONFIG_SYSCTL
2267 err_reg_ctl:
2268 	__devinet_sysctl_unregister(dflt);
2269 err_reg_dflt:
2270 	__devinet_sysctl_unregister(all);
2271 err_reg_all:
2272 	if (tbl != ctl_forward_entry)
2273 		kfree(tbl);
2274 err_alloc_ctl:
2275 #endif
2276 	if (dflt != &ipv4_devconf_dflt)
2277 		kfree(dflt);
2278 err_alloc_dflt:
2279 	if (all != &ipv4_devconf)
2280 		kfree(all);
2281 err_alloc_all:
2282 	return err;
2283 }
2284 
2285 static __net_exit void devinet_exit_net(struct net *net)
2286 {
2287 #ifdef CONFIG_SYSCTL
2288 	struct ctl_table *tbl;
2289 
2290 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2291 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2292 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2293 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
2294 	kfree(tbl);
2295 #endif
2296 	kfree(net->ipv4.devconf_dflt);
2297 	kfree(net->ipv4.devconf_all);
2298 }
2299 
2300 static __net_initdata struct pernet_operations devinet_ops = {
2301 	.init = devinet_init_net,
2302 	.exit = devinet_exit_net,
2303 };
2304 
2305 static struct rtnl_af_ops inet_af_ops = {
2306 	.family		  = AF_INET,
2307 	.fill_link_af	  = inet_fill_link_af,
2308 	.get_link_af_size = inet_get_link_af_size,
2309 	.validate_link_af = inet_validate_link_af,
2310 	.set_link_af	  = inet_set_link_af,
2311 };
2312 
2313 void __init devinet_init(void)
2314 {
2315 	int i;
2316 
2317 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2318 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2319 
2320 	register_pernet_subsys(&devinet_ops);
2321 
2322 	register_gifconf(PF_INET, inet_gifconf);
2323 	register_netdevice_notifier(&ip_netdev_notifier);
2324 
2325 	schedule_delayed_work(&check_lifetime_work, 0);
2326 
2327 	rtnl_af_register(&inet_af_ops);
2328 
2329 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2330 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2331 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2332 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2333 		      inet_netconf_dump_devconf, NULL);
2334 }
2335 
2336