xref: /linux/net/ipv4/devinet.c (revision 2d87650a3bf1b80f7d0d150ee1af3f8a89e5b7aa)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59 
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67 
68 #include "fib_lookup.h"
69 
70 static struct ipv4_devconf ipv4_devconf = {
71 	.data = {
72 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78 	},
79 };
80 
81 static struct ipv4_devconf ipv4_devconf_dflt = {
82 	.data = {
83 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
85 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
86 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
87 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
90 	},
91 };
92 
93 #define IPV4_DEVCONF_DFLT(net, attr) \
94 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
95 
96 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
97 	[IFA_LOCAL]     	= { .type = NLA_U32 },
98 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
99 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
100 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
102 	[IFA_FLAGS]		= { .type = NLA_U32 },
103 };
104 
105 #define IN4_ADDR_HSIZE_SHIFT	8
106 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
107 
108 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
109 static DEFINE_SPINLOCK(inet_addr_hash_lock);
110 
111 static u32 inet_addr_hash(struct net *net, __be32 addr)
112 {
113 	u32 val = (__force u32) addr ^ net_hash_mix(net);
114 
115 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
116 }
117 
118 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
119 {
120 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
121 
122 	spin_lock(&inet_addr_hash_lock);
123 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
124 	spin_unlock(&inet_addr_hash_lock);
125 }
126 
127 static void inet_hash_remove(struct in_ifaddr *ifa)
128 {
129 	spin_lock(&inet_addr_hash_lock);
130 	hlist_del_init_rcu(&ifa->hash);
131 	spin_unlock(&inet_addr_hash_lock);
132 }
133 
134 /**
135  * __ip_dev_find - find the first device with a given source address.
136  * @net: the net namespace
137  * @addr: the source address
138  * @devref: if true, take a reference on the found device
139  *
140  * If a caller uses devref=false, it should be protected by RCU, or RTNL
141  */
142 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
143 {
144 	u32 hash = inet_addr_hash(net, addr);
145 	struct net_device *result = NULL;
146 	struct in_ifaddr *ifa;
147 
148 	rcu_read_lock();
149 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
150 		if (ifa->ifa_local == addr) {
151 			struct net_device *dev = ifa->ifa_dev->dev;
152 
153 			if (!net_eq(dev_net(dev), net))
154 				continue;
155 			result = dev;
156 			break;
157 		}
158 	}
159 	if (!result) {
160 		struct flowi4 fl4 = { .daddr = addr };
161 		struct fib_result res = { 0 };
162 		struct fib_table *local;
163 
164 		/* Fallback to FIB local table so that communication
165 		 * over loopback subnets work.
166 		 */
167 		local = fib_get_table(net, RT_TABLE_LOCAL);
168 		if (local &&
169 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
170 		    res.type == RTN_LOCAL)
171 			result = FIB_RES_DEV(res);
172 	}
173 	if (result && devref)
174 		dev_hold(result);
175 	rcu_read_unlock();
176 	return result;
177 }
178 EXPORT_SYMBOL(__ip_dev_find);
179 
180 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
181 
182 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
183 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
184 			 int destroy);
185 #ifdef CONFIG_SYSCTL
186 static void devinet_sysctl_register(struct in_device *idev);
187 static void devinet_sysctl_unregister(struct in_device *idev);
188 #else
189 static void devinet_sysctl_register(struct in_device *idev)
190 {
191 }
192 static void devinet_sysctl_unregister(struct in_device *idev)
193 {
194 }
195 #endif
196 
197 /* Locks all the inet devices. */
198 
199 static struct in_ifaddr *inet_alloc_ifa(void)
200 {
201 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
202 }
203 
204 static void inet_rcu_free_ifa(struct rcu_head *head)
205 {
206 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
207 	if (ifa->ifa_dev)
208 		in_dev_put(ifa->ifa_dev);
209 	kfree(ifa);
210 }
211 
212 static void inet_free_ifa(struct in_ifaddr *ifa)
213 {
214 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
215 }
216 
217 void in_dev_finish_destroy(struct in_device *idev)
218 {
219 	struct net_device *dev = idev->dev;
220 
221 	WARN_ON(idev->ifa_list);
222 	WARN_ON(idev->mc_list);
223 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
224 #ifdef NET_REFCNT_DEBUG
225 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
226 #endif
227 	dev_put(dev);
228 	if (!idev->dead)
229 		pr_err("Freeing alive in_device %p\n", idev);
230 	else
231 		kfree(idev);
232 }
233 EXPORT_SYMBOL(in_dev_finish_destroy);
234 
235 static struct in_device *inetdev_init(struct net_device *dev)
236 {
237 	struct in_device *in_dev;
238 
239 	ASSERT_RTNL();
240 
241 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
242 	if (!in_dev)
243 		goto out;
244 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
245 			sizeof(in_dev->cnf));
246 	in_dev->cnf.sysctl = NULL;
247 	in_dev->dev = dev;
248 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
249 	if (!in_dev->arp_parms)
250 		goto out_kfree;
251 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
252 		dev_disable_lro(dev);
253 	/* Reference in_dev->dev */
254 	dev_hold(dev);
255 	/* Account for reference dev->ip_ptr (below) */
256 	in_dev_hold(in_dev);
257 
258 	devinet_sysctl_register(in_dev);
259 	ip_mc_init_dev(in_dev);
260 	if (dev->flags & IFF_UP)
261 		ip_mc_up(in_dev);
262 
263 	/* we can receive as soon as ip_ptr is set -- do this last */
264 	rcu_assign_pointer(dev->ip_ptr, in_dev);
265 out:
266 	return in_dev;
267 out_kfree:
268 	kfree(in_dev);
269 	in_dev = NULL;
270 	goto out;
271 }
272 
273 static void in_dev_rcu_put(struct rcu_head *head)
274 {
275 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
276 	in_dev_put(idev);
277 }
278 
279 static void inetdev_destroy(struct in_device *in_dev)
280 {
281 	struct in_ifaddr *ifa;
282 	struct net_device *dev;
283 
284 	ASSERT_RTNL();
285 
286 	dev = in_dev->dev;
287 
288 	in_dev->dead = 1;
289 
290 	ip_mc_destroy_dev(in_dev);
291 
292 	while ((ifa = in_dev->ifa_list) != NULL) {
293 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
294 		inet_free_ifa(ifa);
295 	}
296 
297 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
298 
299 	devinet_sysctl_unregister(in_dev);
300 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
301 	arp_ifdown(dev);
302 
303 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
304 }
305 
306 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
307 {
308 	rcu_read_lock();
309 	for_primary_ifa(in_dev) {
310 		if (inet_ifa_match(a, ifa)) {
311 			if (!b || inet_ifa_match(b, ifa)) {
312 				rcu_read_unlock();
313 				return 1;
314 			}
315 		}
316 	} endfor_ifa(in_dev);
317 	rcu_read_unlock();
318 	return 0;
319 }
320 
321 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
322 			 int destroy, struct nlmsghdr *nlh, u32 portid)
323 {
324 	struct in_ifaddr *promote = NULL;
325 	struct in_ifaddr *ifa, *ifa1 = *ifap;
326 	struct in_ifaddr *last_prim = in_dev->ifa_list;
327 	struct in_ifaddr *prev_prom = NULL;
328 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
329 
330 	ASSERT_RTNL();
331 
332 	/* 1. Deleting primary ifaddr forces deletion all secondaries
333 	 * unless alias promotion is set
334 	 **/
335 
336 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
337 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
338 
339 		while ((ifa = *ifap1) != NULL) {
340 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
341 			    ifa1->ifa_scope <= ifa->ifa_scope)
342 				last_prim = ifa;
343 
344 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
345 			    ifa1->ifa_mask != ifa->ifa_mask ||
346 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
347 				ifap1 = &ifa->ifa_next;
348 				prev_prom = ifa;
349 				continue;
350 			}
351 
352 			if (!do_promote) {
353 				inet_hash_remove(ifa);
354 				*ifap1 = ifa->ifa_next;
355 
356 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
357 				blocking_notifier_call_chain(&inetaddr_chain,
358 						NETDEV_DOWN, ifa);
359 				inet_free_ifa(ifa);
360 			} else {
361 				promote = ifa;
362 				break;
363 			}
364 		}
365 	}
366 
367 	/* On promotion all secondaries from subnet are changing
368 	 * the primary IP, we must remove all their routes silently
369 	 * and later to add them back with new prefsrc. Do this
370 	 * while all addresses are on the device list.
371 	 */
372 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
373 		if (ifa1->ifa_mask == ifa->ifa_mask &&
374 		    inet_ifa_match(ifa1->ifa_address, ifa))
375 			fib_del_ifaddr(ifa, ifa1);
376 	}
377 
378 	/* 2. Unlink it */
379 
380 	*ifap = ifa1->ifa_next;
381 	inet_hash_remove(ifa1);
382 
383 	/* 3. Announce address deletion */
384 
385 	/* Send message first, then call notifier.
386 	   At first sight, FIB update triggered by notifier
387 	   will refer to already deleted ifaddr, that could confuse
388 	   netlink listeners. It is not true: look, gated sees
389 	   that route deleted and if it still thinks that ifaddr
390 	   is valid, it will try to restore deleted routes... Grr.
391 	   So that, this order is correct.
392 	 */
393 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
394 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
395 
396 	if (promote) {
397 		struct in_ifaddr *next_sec = promote->ifa_next;
398 
399 		if (prev_prom) {
400 			prev_prom->ifa_next = promote->ifa_next;
401 			promote->ifa_next = last_prim->ifa_next;
402 			last_prim->ifa_next = promote;
403 		}
404 
405 		promote->ifa_flags &= ~IFA_F_SECONDARY;
406 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
407 		blocking_notifier_call_chain(&inetaddr_chain,
408 				NETDEV_UP, promote);
409 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
410 			if (ifa1->ifa_mask != ifa->ifa_mask ||
411 			    !inet_ifa_match(ifa1->ifa_address, ifa))
412 					continue;
413 			fib_add_ifaddr(ifa);
414 		}
415 
416 	}
417 	if (destroy)
418 		inet_free_ifa(ifa1);
419 }
420 
421 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
422 			 int destroy)
423 {
424 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
425 }
426 
427 static void check_lifetime(struct work_struct *work);
428 
429 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
430 
431 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
432 			     u32 portid)
433 {
434 	struct in_device *in_dev = ifa->ifa_dev;
435 	struct in_ifaddr *ifa1, **ifap, **last_primary;
436 
437 	ASSERT_RTNL();
438 
439 	if (!ifa->ifa_local) {
440 		inet_free_ifa(ifa);
441 		return 0;
442 	}
443 
444 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
445 	last_primary = &in_dev->ifa_list;
446 
447 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
448 	     ifap = &ifa1->ifa_next) {
449 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
450 		    ifa->ifa_scope <= ifa1->ifa_scope)
451 			last_primary = &ifa1->ifa_next;
452 		if (ifa1->ifa_mask == ifa->ifa_mask &&
453 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
454 			if (ifa1->ifa_local == ifa->ifa_local) {
455 				inet_free_ifa(ifa);
456 				return -EEXIST;
457 			}
458 			if (ifa1->ifa_scope != ifa->ifa_scope) {
459 				inet_free_ifa(ifa);
460 				return -EINVAL;
461 			}
462 			ifa->ifa_flags |= IFA_F_SECONDARY;
463 		}
464 	}
465 
466 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
467 		net_srandom(ifa->ifa_local);
468 		ifap = last_primary;
469 	}
470 
471 	ifa->ifa_next = *ifap;
472 	*ifap = ifa;
473 
474 	inet_hash_insert(dev_net(in_dev->dev), ifa);
475 
476 	cancel_delayed_work(&check_lifetime_work);
477 	schedule_delayed_work(&check_lifetime_work, 0);
478 
479 	/* Send message first, then call notifier.
480 	   Notifier will trigger FIB update, so that
481 	   listeners of netlink will know about new ifaddr */
482 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
483 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
484 
485 	return 0;
486 }
487 
488 static int inet_insert_ifa(struct in_ifaddr *ifa)
489 {
490 	return __inet_insert_ifa(ifa, NULL, 0);
491 }
492 
493 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
494 {
495 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
496 
497 	ASSERT_RTNL();
498 
499 	if (!in_dev) {
500 		inet_free_ifa(ifa);
501 		return -ENOBUFS;
502 	}
503 	ipv4_devconf_setall(in_dev);
504 	neigh_parms_data_state_setall(in_dev->arp_parms);
505 	if (ifa->ifa_dev != in_dev) {
506 		WARN_ON(ifa->ifa_dev);
507 		in_dev_hold(in_dev);
508 		ifa->ifa_dev = in_dev;
509 	}
510 	if (ipv4_is_loopback(ifa->ifa_local))
511 		ifa->ifa_scope = RT_SCOPE_HOST;
512 	return inet_insert_ifa(ifa);
513 }
514 
515 /* Caller must hold RCU or RTNL :
516  * We dont take a reference on found in_device
517  */
518 struct in_device *inetdev_by_index(struct net *net, int ifindex)
519 {
520 	struct net_device *dev;
521 	struct in_device *in_dev = NULL;
522 
523 	rcu_read_lock();
524 	dev = dev_get_by_index_rcu(net, ifindex);
525 	if (dev)
526 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
527 	rcu_read_unlock();
528 	return in_dev;
529 }
530 EXPORT_SYMBOL(inetdev_by_index);
531 
532 /* Called only from RTNL semaphored context. No locks. */
533 
534 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
535 				    __be32 mask)
536 {
537 	ASSERT_RTNL();
538 
539 	for_primary_ifa(in_dev) {
540 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
541 			return ifa;
542 	} endfor_ifa(in_dev);
543 	return NULL;
544 }
545 
546 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
547 {
548 	struct net *net = sock_net(skb->sk);
549 	struct nlattr *tb[IFA_MAX+1];
550 	struct in_device *in_dev;
551 	struct ifaddrmsg *ifm;
552 	struct in_ifaddr *ifa, **ifap;
553 	int err = -EINVAL;
554 
555 	ASSERT_RTNL();
556 
557 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
558 	if (err < 0)
559 		goto errout;
560 
561 	ifm = nlmsg_data(nlh);
562 	in_dev = inetdev_by_index(net, ifm->ifa_index);
563 	if (in_dev == NULL) {
564 		err = -ENODEV;
565 		goto errout;
566 	}
567 
568 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
569 	     ifap = &ifa->ifa_next) {
570 		if (tb[IFA_LOCAL] &&
571 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
572 			continue;
573 
574 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
575 			continue;
576 
577 		if (tb[IFA_ADDRESS] &&
578 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
579 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
580 			continue;
581 
582 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
583 		return 0;
584 	}
585 
586 	err = -EADDRNOTAVAIL;
587 errout:
588 	return err;
589 }
590 
591 #define INFINITY_LIFE_TIME	0xFFFFFFFF
592 
593 static void check_lifetime(struct work_struct *work)
594 {
595 	unsigned long now, next, next_sec, next_sched;
596 	struct in_ifaddr *ifa;
597 	struct hlist_node *n;
598 	int i;
599 
600 	now = jiffies;
601 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
602 
603 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
604 		bool change_needed = false;
605 
606 		rcu_read_lock();
607 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
608 			unsigned long age;
609 
610 			if (ifa->ifa_flags & IFA_F_PERMANENT)
611 				continue;
612 
613 			/* We try to batch several events at once. */
614 			age = (now - ifa->ifa_tstamp +
615 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
616 
617 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
618 			    age >= ifa->ifa_valid_lft) {
619 				change_needed = true;
620 			} else if (ifa->ifa_preferred_lft ==
621 				   INFINITY_LIFE_TIME) {
622 				continue;
623 			} else if (age >= ifa->ifa_preferred_lft) {
624 				if (time_before(ifa->ifa_tstamp +
625 						ifa->ifa_valid_lft * HZ, next))
626 					next = ifa->ifa_tstamp +
627 					       ifa->ifa_valid_lft * HZ;
628 
629 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
630 					change_needed = true;
631 			} else if (time_before(ifa->ifa_tstamp +
632 					       ifa->ifa_preferred_lft * HZ,
633 					       next)) {
634 				next = ifa->ifa_tstamp +
635 				       ifa->ifa_preferred_lft * HZ;
636 			}
637 		}
638 		rcu_read_unlock();
639 		if (!change_needed)
640 			continue;
641 		rtnl_lock();
642 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
643 			unsigned long age;
644 
645 			if (ifa->ifa_flags & IFA_F_PERMANENT)
646 				continue;
647 
648 			/* We try to batch several events at once. */
649 			age = (now - ifa->ifa_tstamp +
650 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
651 
652 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
653 			    age >= ifa->ifa_valid_lft) {
654 				struct in_ifaddr **ifap;
655 
656 				for (ifap = &ifa->ifa_dev->ifa_list;
657 				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
658 					if (*ifap == ifa) {
659 						inet_del_ifa(ifa->ifa_dev,
660 							     ifap, 1);
661 						break;
662 					}
663 				}
664 			} else if (ifa->ifa_preferred_lft !=
665 				   INFINITY_LIFE_TIME &&
666 				   age >= ifa->ifa_preferred_lft &&
667 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
668 				ifa->ifa_flags |= IFA_F_DEPRECATED;
669 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
670 			}
671 		}
672 		rtnl_unlock();
673 	}
674 
675 	next_sec = round_jiffies_up(next);
676 	next_sched = next;
677 
678 	/* If rounded timeout is accurate enough, accept it. */
679 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
680 		next_sched = next_sec;
681 
682 	now = jiffies;
683 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
684 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
685 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
686 
687 	schedule_delayed_work(&check_lifetime_work, next_sched - now);
688 }
689 
690 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
691 			     __u32 prefered_lft)
692 {
693 	unsigned long timeout;
694 
695 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
696 
697 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
698 	if (addrconf_finite_timeout(timeout))
699 		ifa->ifa_valid_lft = timeout;
700 	else
701 		ifa->ifa_flags |= IFA_F_PERMANENT;
702 
703 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
704 	if (addrconf_finite_timeout(timeout)) {
705 		if (timeout == 0)
706 			ifa->ifa_flags |= IFA_F_DEPRECATED;
707 		ifa->ifa_preferred_lft = timeout;
708 	}
709 	ifa->ifa_tstamp = jiffies;
710 	if (!ifa->ifa_cstamp)
711 		ifa->ifa_cstamp = ifa->ifa_tstamp;
712 }
713 
714 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
715 				       __u32 *pvalid_lft, __u32 *pprefered_lft)
716 {
717 	struct nlattr *tb[IFA_MAX+1];
718 	struct in_ifaddr *ifa;
719 	struct ifaddrmsg *ifm;
720 	struct net_device *dev;
721 	struct in_device *in_dev;
722 	int err;
723 
724 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
725 	if (err < 0)
726 		goto errout;
727 
728 	ifm = nlmsg_data(nlh);
729 	err = -EINVAL;
730 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
731 		goto errout;
732 
733 	dev = __dev_get_by_index(net, ifm->ifa_index);
734 	err = -ENODEV;
735 	if (dev == NULL)
736 		goto errout;
737 
738 	in_dev = __in_dev_get_rtnl(dev);
739 	err = -ENOBUFS;
740 	if (in_dev == NULL)
741 		goto errout;
742 
743 	ifa = inet_alloc_ifa();
744 	if (ifa == NULL)
745 		/*
746 		 * A potential indev allocation can be left alive, it stays
747 		 * assigned to its device and is destroy with it.
748 		 */
749 		goto errout;
750 
751 	ipv4_devconf_setall(in_dev);
752 	neigh_parms_data_state_setall(in_dev->arp_parms);
753 	in_dev_hold(in_dev);
754 
755 	if (tb[IFA_ADDRESS] == NULL)
756 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
757 
758 	INIT_HLIST_NODE(&ifa->hash);
759 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
760 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
761 	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
762 					 ifm->ifa_flags;
763 	ifa->ifa_scope = ifm->ifa_scope;
764 	ifa->ifa_dev = in_dev;
765 
766 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
767 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
768 
769 	if (tb[IFA_BROADCAST])
770 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
771 
772 	if (tb[IFA_LABEL])
773 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
774 	else
775 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
776 
777 	if (tb[IFA_CACHEINFO]) {
778 		struct ifa_cacheinfo *ci;
779 
780 		ci = nla_data(tb[IFA_CACHEINFO]);
781 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
782 			err = -EINVAL;
783 			goto errout_free;
784 		}
785 		*pvalid_lft = ci->ifa_valid;
786 		*pprefered_lft = ci->ifa_prefered;
787 	}
788 
789 	return ifa;
790 
791 errout_free:
792 	inet_free_ifa(ifa);
793 errout:
794 	return ERR_PTR(err);
795 }
796 
797 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
798 {
799 	struct in_device *in_dev = ifa->ifa_dev;
800 	struct in_ifaddr *ifa1, **ifap;
801 
802 	if (!ifa->ifa_local)
803 		return NULL;
804 
805 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
806 	     ifap = &ifa1->ifa_next) {
807 		if (ifa1->ifa_mask == ifa->ifa_mask &&
808 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
809 		    ifa1->ifa_local == ifa->ifa_local)
810 			return ifa1;
811 	}
812 	return NULL;
813 }
814 
815 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
816 {
817 	struct net *net = sock_net(skb->sk);
818 	struct in_ifaddr *ifa;
819 	struct in_ifaddr *ifa_existing;
820 	__u32 valid_lft = INFINITY_LIFE_TIME;
821 	__u32 prefered_lft = INFINITY_LIFE_TIME;
822 
823 	ASSERT_RTNL();
824 
825 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
826 	if (IS_ERR(ifa))
827 		return PTR_ERR(ifa);
828 
829 	ifa_existing = find_matching_ifa(ifa);
830 	if (!ifa_existing) {
831 		/* It would be best to check for !NLM_F_CREATE here but
832 		 * userspace alreay relies on not having to provide this.
833 		 */
834 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
835 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
836 	} else {
837 		inet_free_ifa(ifa);
838 
839 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
840 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
841 			return -EEXIST;
842 		ifa = ifa_existing;
843 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
844 		cancel_delayed_work(&check_lifetime_work);
845 		schedule_delayed_work(&check_lifetime_work, 0);
846 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
847 		blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
848 	}
849 	return 0;
850 }
851 
852 /*
853  *	Determine a default network mask, based on the IP address.
854  */
855 
856 static int inet_abc_len(__be32 addr)
857 {
858 	int rc = -1;	/* Something else, probably a multicast. */
859 
860 	if (ipv4_is_zeronet(addr))
861 		rc = 0;
862 	else {
863 		__u32 haddr = ntohl(addr);
864 
865 		if (IN_CLASSA(haddr))
866 			rc = 8;
867 		else if (IN_CLASSB(haddr))
868 			rc = 16;
869 		else if (IN_CLASSC(haddr))
870 			rc = 24;
871 	}
872 
873 	return rc;
874 }
875 
876 
877 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
878 {
879 	struct ifreq ifr;
880 	struct sockaddr_in sin_orig;
881 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
882 	struct in_device *in_dev;
883 	struct in_ifaddr **ifap = NULL;
884 	struct in_ifaddr *ifa = NULL;
885 	struct net_device *dev;
886 	char *colon;
887 	int ret = -EFAULT;
888 	int tryaddrmatch = 0;
889 
890 	/*
891 	 *	Fetch the caller's info block into kernel space
892 	 */
893 
894 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
895 		goto out;
896 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
897 
898 	/* save original address for comparison */
899 	memcpy(&sin_orig, sin, sizeof(*sin));
900 
901 	colon = strchr(ifr.ifr_name, ':');
902 	if (colon)
903 		*colon = 0;
904 
905 	dev_load(net, ifr.ifr_name);
906 
907 	switch (cmd) {
908 	case SIOCGIFADDR:	/* Get interface address */
909 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
910 	case SIOCGIFDSTADDR:	/* Get the destination address */
911 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
912 		/* Note that these ioctls will not sleep,
913 		   so that we do not impose a lock.
914 		   One day we will be forced to put shlock here (I mean SMP)
915 		 */
916 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
917 		memset(sin, 0, sizeof(*sin));
918 		sin->sin_family = AF_INET;
919 		break;
920 
921 	case SIOCSIFFLAGS:
922 		ret = -EPERM;
923 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
924 			goto out;
925 		break;
926 	case SIOCSIFADDR:	/* Set interface address (and family) */
927 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
928 	case SIOCSIFDSTADDR:	/* Set the destination address */
929 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
930 		ret = -EPERM;
931 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
932 			goto out;
933 		ret = -EINVAL;
934 		if (sin->sin_family != AF_INET)
935 			goto out;
936 		break;
937 	default:
938 		ret = -EINVAL;
939 		goto out;
940 	}
941 
942 	rtnl_lock();
943 
944 	ret = -ENODEV;
945 	dev = __dev_get_by_name(net, ifr.ifr_name);
946 	if (!dev)
947 		goto done;
948 
949 	if (colon)
950 		*colon = ':';
951 
952 	in_dev = __in_dev_get_rtnl(dev);
953 	if (in_dev) {
954 		if (tryaddrmatch) {
955 			/* Matthias Andree */
956 			/* compare label and address (4.4BSD style) */
957 			/* note: we only do this for a limited set of ioctls
958 			   and only if the original address family was AF_INET.
959 			   This is checked above. */
960 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
961 			     ifap = &ifa->ifa_next) {
962 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
963 				    sin_orig.sin_addr.s_addr ==
964 							ifa->ifa_local) {
965 					break; /* found */
966 				}
967 			}
968 		}
969 		/* we didn't get a match, maybe the application is
970 		   4.3BSD-style and passed in junk so we fall back to
971 		   comparing just the label */
972 		if (!ifa) {
973 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
974 			     ifap = &ifa->ifa_next)
975 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
976 					break;
977 		}
978 	}
979 
980 	ret = -EADDRNOTAVAIL;
981 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
982 		goto done;
983 
984 	switch (cmd) {
985 	case SIOCGIFADDR:	/* Get interface address */
986 		sin->sin_addr.s_addr = ifa->ifa_local;
987 		goto rarok;
988 
989 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
990 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
991 		goto rarok;
992 
993 	case SIOCGIFDSTADDR:	/* Get the destination address */
994 		sin->sin_addr.s_addr = ifa->ifa_address;
995 		goto rarok;
996 
997 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
998 		sin->sin_addr.s_addr = ifa->ifa_mask;
999 		goto rarok;
1000 
1001 	case SIOCSIFFLAGS:
1002 		if (colon) {
1003 			ret = -EADDRNOTAVAIL;
1004 			if (!ifa)
1005 				break;
1006 			ret = 0;
1007 			if (!(ifr.ifr_flags & IFF_UP))
1008 				inet_del_ifa(in_dev, ifap, 1);
1009 			break;
1010 		}
1011 		ret = dev_change_flags(dev, ifr.ifr_flags);
1012 		break;
1013 
1014 	case SIOCSIFADDR:	/* Set interface address (and family) */
1015 		ret = -EINVAL;
1016 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1017 			break;
1018 
1019 		if (!ifa) {
1020 			ret = -ENOBUFS;
1021 			ifa = inet_alloc_ifa();
1022 			if (!ifa)
1023 				break;
1024 			INIT_HLIST_NODE(&ifa->hash);
1025 			if (colon)
1026 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1027 			else
1028 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1029 		} else {
1030 			ret = 0;
1031 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1032 				break;
1033 			inet_del_ifa(in_dev, ifap, 0);
1034 			ifa->ifa_broadcast = 0;
1035 			ifa->ifa_scope = 0;
1036 		}
1037 
1038 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1039 
1040 		if (!(dev->flags & IFF_POINTOPOINT)) {
1041 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1042 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1043 			if ((dev->flags & IFF_BROADCAST) &&
1044 			    ifa->ifa_prefixlen < 31)
1045 				ifa->ifa_broadcast = ifa->ifa_address |
1046 						     ~ifa->ifa_mask;
1047 		} else {
1048 			ifa->ifa_prefixlen = 32;
1049 			ifa->ifa_mask = inet_make_mask(32);
1050 		}
1051 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1052 		ret = inet_set_ifa(dev, ifa);
1053 		break;
1054 
1055 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1056 		ret = 0;
1057 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1058 			inet_del_ifa(in_dev, ifap, 0);
1059 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1060 			inet_insert_ifa(ifa);
1061 		}
1062 		break;
1063 
1064 	case SIOCSIFDSTADDR:	/* Set the destination address */
1065 		ret = 0;
1066 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1067 			break;
1068 		ret = -EINVAL;
1069 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1070 			break;
1071 		ret = 0;
1072 		inet_del_ifa(in_dev, ifap, 0);
1073 		ifa->ifa_address = sin->sin_addr.s_addr;
1074 		inet_insert_ifa(ifa);
1075 		break;
1076 
1077 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1078 
1079 		/*
1080 		 *	The mask we set must be legal.
1081 		 */
1082 		ret = -EINVAL;
1083 		if (bad_mask(sin->sin_addr.s_addr, 0))
1084 			break;
1085 		ret = 0;
1086 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1087 			__be32 old_mask = ifa->ifa_mask;
1088 			inet_del_ifa(in_dev, ifap, 0);
1089 			ifa->ifa_mask = sin->sin_addr.s_addr;
1090 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1091 
1092 			/* See if current broadcast address matches
1093 			 * with current netmask, then recalculate
1094 			 * the broadcast address. Otherwise it's a
1095 			 * funny address, so don't touch it since
1096 			 * the user seems to know what (s)he's doing...
1097 			 */
1098 			if ((dev->flags & IFF_BROADCAST) &&
1099 			    (ifa->ifa_prefixlen < 31) &&
1100 			    (ifa->ifa_broadcast ==
1101 			     (ifa->ifa_local|~old_mask))) {
1102 				ifa->ifa_broadcast = (ifa->ifa_local |
1103 						      ~sin->sin_addr.s_addr);
1104 			}
1105 			inet_insert_ifa(ifa);
1106 		}
1107 		break;
1108 	}
1109 done:
1110 	rtnl_unlock();
1111 out:
1112 	return ret;
1113 rarok:
1114 	rtnl_unlock();
1115 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1116 	goto out;
1117 }
1118 
1119 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1120 {
1121 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1122 	struct in_ifaddr *ifa;
1123 	struct ifreq ifr;
1124 	int done = 0;
1125 
1126 	if (!in_dev)
1127 		goto out;
1128 
1129 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1130 		if (!buf) {
1131 			done += sizeof(ifr);
1132 			continue;
1133 		}
1134 		if (len < (int) sizeof(ifr))
1135 			break;
1136 		memset(&ifr, 0, sizeof(struct ifreq));
1137 		strcpy(ifr.ifr_name, ifa->ifa_label);
1138 
1139 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1140 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1141 								ifa->ifa_local;
1142 
1143 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1144 			done = -EFAULT;
1145 			break;
1146 		}
1147 		buf  += sizeof(struct ifreq);
1148 		len  -= sizeof(struct ifreq);
1149 		done += sizeof(struct ifreq);
1150 	}
1151 out:
1152 	return done;
1153 }
1154 
1155 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1156 {
1157 	__be32 addr = 0;
1158 	struct in_device *in_dev;
1159 	struct net *net = dev_net(dev);
1160 
1161 	rcu_read_lock();
1162 	in_dev = __in_dev_get_rcu(dev);
1163 	if (!in_dev)
1164 		goto no_in_dev;
1165 
1166 	for_primary_ifa(in_dev) {
1167 		if (ifa->ifa_scope > scope)
1168 			continue;
1169 		if (!dst || inet_ifa_match(dst, ifa)) {
1170 			addr = ifa->ifa_local;
1171 			break;
1172 		}
1173 		if (!addr)
1174 			addr = ifa->ifa_local;
1175 	} endfor_ifa(in_dev);
1176 
1177 	if (addr)
1178 		goto out_unlock;
1179 no_in_dev:
1180 
1181 	/* Not loopback addresses on loopback should be preferred
1182 	   in this case. It is importnat that lo is the first interface
1183 	   in dev_base list.
1184 	 */
1185 	for_each_netdev_rcu(net, dev) {
1186 		in_dev = __in_dev_get_rcu(dev);
1187 		if (!in_dev)
1188 			continue;
1189 
1190 		for_primary_ifa(in_dev) {
1191 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
1192 			    ifa->ifa_scope <= scope) {
1193 				addr = ifa->ifa_local;
1194 				goto out_unlock;
1195 			}
1196 		} endfor_ifa(in_dev);
1197 	}
1198 out_unlock:
1199 	rcu_read_unlock();
1200 	return addr;
1201 }
1202 EXPORT_SYMBOL(inet_select_addr);
1203 
1204 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1205 			      __be32 local, int scope)
1206 {
1207 	int same = 0;
1208 	__be32 addr = 0;
1209 
1210 	for_ifa(in_dev) {
1211 		if (!addr &&
1212 		    (local == ifa->ifa_local || !local) &&
1213 		    ifa->ifa_scope <= scope) {
1214 			addr = ifa->ifa_local;
1215 			if (same)
1216 				break;
1217 		}
1218 		if (!same) {
1219 			same = (!local || inet_ifa_match(local, ifa)) &&
1220 				(!dst || inet_ifa_match(dst, ifa));
1221 			if (same && addr) {
1222 				if (local || !dst)
1223 					break;
1224 				/* Is the selected addr into dst subnet? */
1225 				if (inet_ifa_match(addr, ifa))
1226 					break;
1227 				/* No, then can we use new local src? */
1228 				if (ifa->ifa_scope <= scope) {
1229 					addr = ifa->ifa_local;
1230 					break;
1231 				}
1232 				/* search for large dst subnet for addr */
1233 				same = 0;
1234 			}
1235 		}
1236 	} endfor_ifa(in_dev);
1237 
1238 	return same ? addr : 0;
1239 }
1240 
1241 /*
1242  * Confirm that local IP address exists using wildcards:
1243  * - net: netns to check, cannot be NULL
1244  * - in_dev: only on this interface, NULL=any interface
1245  * - dst: only in the same subnet as dst, 0=any dst
1246  * - local: address, 0=autoselect the local address
1247  * - scope: maximum allowed scope value for the local address
1248  */
1249 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1250 			 __be32 dst, __be32 local, int scope)
1251 {
1252 	__be32 addr = 0;
1253 	struct net_device *dev;
1254 
1255 	if (in_dev != NULL)
1256 		return confirm_addr_indev(in_dev, dst, local, scope);
1257 
1258 	rcu_read_lock();
1259 	for_each_netdev_rcu(net, dev) {
1260 		in_dev = __in_dev_get_rcu(dev);
1261 		if (in_dev) {
1262 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1263 			if (addr)
1264 				break;
1265 		}
1266 	}
1267 	rcu_read_unlock();
1268 
1269 	return addr;
1270 }
1271 EXPORT_SYMBOL(inet_confirm_addr);
1272 
1273 /*
1274  *	Device notifier
1275  */
1276 
1277 int register_inetaddr_notifier(struct notifier_block *nb)
1278 {
1279 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1280 }
1281 EXPORT_SYMBOL(register_inetaddr_notifier);
1282 
1283 int unregister_inetaddr_notifier(struct notifier_block *nb)
1284 {
1285 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1286 }
1287 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1288 
1289 /* Rename ifa_labels for a device name change. Make some effort to preserve
1290  * existing alias numbering and to create unique labels if possible.
1291 */
1292 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1293 {
1294 	struct in_ifaddr *ifa;
1295 	int named = 0;
1296 
1297 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1298 		char old[IFNAMSIZ], *dot;
1299 
1300 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1301 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1302 		if (named++ == 0)
1303 			goto skip;
1304 		dot = strchr(old, ':');
1305 		if (dot == NULL) {
1306 			sprintf(old, ":%d", named);
1307 			dot = old;
1308 		}
1309 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1310 			strcat(ifa->ifa_label, dot);
1311 		else
1312 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1313 skip:
1314 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1315 	}
1316 }
1317 
1318 static bool inetdev_valid_mtu(unsigned int mtu)
1319 {
1320 	return mtu >= 68;
1321 }
1322 
1323 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1324 					struct in_device *in_dev)
1325 
1326 {
1327 	struct in_ifaddr *ifa;
1328 
1329 	for (ifa = in_dev->ifa_list; ifa;
1330 	     ifa = ifa->ifa_next) {
1331 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1332 			 ifa->ifa_local, dev,
1333 			 ifa->ifa_local, NULL,
1334 			 dev->dev_addr, NULL);
1335 	}
1336 }
1337 
1338 /* Called only under RTNL semaphore */
1339 
1340 static int inetdev_event(struct notifier_block *this, unsigned long event,
1341 			 void *ptr)
1342 {
1343 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1344 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1345 
1346 	ASSERT_RTNL();
1347 
1348 	if (!in_dev) {
1349 		if (event == NETDEV_REGISTER) {
1350 			in_dev = inetdev_init(dev);
1351 			if (!in_dev)
1352 				return notifier_from_errno(-ENOMEM);
1353 			if (dev->flags & IFF_LOOPBACK) {
1354 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1355 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1356 			}
1357 		} else if (event == NETDEV_CHANGEMTU) {
1358 			/* Re-enabling IP */
1359 			if (inetdev_valid_mtu(dev->mtu))
1360 				in_dev = inetdev_init(dev);
1361 		}
1362 		goto out;
1363 	}
1364 
1365 	switch (event) {
1366 	case NETDEV_REGISTER:
1367 		pr_debug("%s: bug\n", __func__);
1368 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1369 		break;
1370 	case NETDEV_UP:
1371 		if (!inetdev_valid_mtu(dev->mtu))
1372 			break;
1373 		if (dev->flags & IFF_LOOPBACK) {
1374 			struct in_ifaddr *ifa = inet_alloc_ifa();
1375 
1376 			if (ifa) {
1377 				INIT_HLIST_NODE(&ifa->hash);
1378 				ifa->ifa_local =
1379 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1380 				ifa->ifa_prefixlen = 8;
1381 				ifa->ifa_mask = inet_make_mask(8);
1382 				in_dev_hold(in_dev);
1383 				ifa->ifa_dev = in_dev;
1384 				ifa->ifa_scope = RT_SCOPE_HOST;
1385 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1386 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1387 						 INFINITY_LIFE_TIME);
1388 				inet_insert_ifa(ifa);
1389 			}
1390 		}
1391 		ip_mc_up(in_dev);
1392 		/* fall through */
1393 	case NETDEV_CHANGEADDR:
1394 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1395 			break;
1396 		/* fall through */
1397 	case NETDEV_NOTIFY_PEERS:
1398 		/* Send gratuitous ARP to notify of link change */
1399 		inetdev_send_gratuitous_arp(dev, in_dev);
1400 		break;
1401 	case NETDEV_DOWN:
1402 		ip_mc_down(in_dev);
1403 		break;
1404 	case NETDEV_PRE_TYPE_CHANGE:
1405 		ip_mc_unmap(in_dev);
1406 		break;
1407 	case NETDEV_POST_TYPE_CHANGE:
1408 		ip_mc_remap(in_dev);
1409 		break;
1410 	case NETDEV_CHANGEMTU:
1411 		if (inetdev_valid_mtu(dev->mtu))
1412 			break;
1413 		/* disable IP when MTU is not enough */
1414 	case NETDEV_UNREGISTER:
1415 		inetdev_destroy(in_dev);
1416 		break;
1417 	case NETDEV_CHANGENAME:
1418 		/* Do not notify about label change, this event is
1419 		 * not interesting to applications using netlink.
1420 		 */
1421 		inetdev_changename(dev, in_dev);
1422 
1423 		devinet_sysctl_unregister(in_dev);
1424 		devinet_sysctl_register(in_dev);
1425 		break;
1426 	}
1427 out:
1428 	return NOTIFY_DONE;
1429 }
1430 
1431 static struct notifier_block ip_netdev_notifier = {
1432 	.notifier_call = inetdev_event,
1433 };
1434 
1435 static size_t inet_nlmsg_size(void)
1436 {
1437 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1438 	       + nla_total_size(4) /* IFA_ADDRESS */
1439 	       + nla_total_size(4) /* IFA_LOCAL */
1440 	       + nla_total_size(4) /* IFA_BROADCAST */
1441 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1442 	       + nla_total_size(4);  /* IFA_FLAGS */
1443 }
1444 
1445 static inline u32 cstamp_delta(unsigned long cstamp)
1446 {
1447 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1448 }
1449 
1450 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1451 			 unsigned long tstamp, u32 preferred, u32 valid)
1452 {
1453 	struct ifa_cacheinfo ci;
1454 
1455 	ci.cstamp = cstamp_delta(cstamp);
1456 	ci.tstamp = cstamp_delta(tstamp);
1457 	ci.ifa_prefered = preferred;
1458 	ci.ifa_valid = valid;
1459 
1460 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1461 }
1462 
1463 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1464 			    u32 portid, u32 seq, int event, unsigned int flags)
1465 {
1466 	struct ifaddrmsg *ifm;
1467 	struct nlmsghdr  *nlh;
1468 	u32 preferred, valid;
1469 
1470 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1471 	if (nlh == NULL)
1472 		return -EMSGSIZE;
1473 
1474 	ifm = nlmsg_data(nlh);
1475 	ifm->ifa_family = AF_INET;
1476 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1477 	ifm->ifa_flags = ifa->ifa_flags;
1478 	ifm->ifa_scope = ifa->ifa_scope;
1479 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1480 
1481 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1482 		preferred = ifa->ifa_preferred_lft;
1483 		valid = ifa->ifa_valid_lft;
1484 		if (preferred != INFINITY_LIFE_TIME) {
1485 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1486 
1487 			if (preferred > tval)
1488 				preferred -= tval;
1489 			else
1490 				preferred = 0;
1491 			if (valid != INFINITY_LIFE_TIME) {
1492 				if (valid > tval)
1493 					valid -= tval;
1494 				else
1495 					valid = 0;
1496 			}
1497 		}
1498 	} else {
1499 		preferred = INFINITY_LIFE_TIME;
1500 		valid = INFINITY_LIFE_TIME;
1501 	}
1502 	if ((ifa->ifa_address &&
1503 	     nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1504 	    (ifa->ifa_local &&
1505 	     nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1506 	    (ifa->ifa_broadcast &&
1507 	     nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1508 	    (ifa->ifa_label[0] &&
1509 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1510 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1511 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1512 			  preferred, valid))
1513 		goto nla_put_failure;
1514 
1515 	return nlmsg_end(skb, nlh);
1516 
1517 nla_put_failure:
1518 	nlmsg_cancel(skb, nlh);
1519 	return -EMSGSIZE;
1520 }
1521 
1522 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1523 {
1524 	struct net *net = sock_net(skb->sk);
1525 	int h, s_h;
1526 	int idx, s_idx;
1527 	int ip_idx, s_ip_idx;
1528 	struct net_device *dev;
1529 	struct in_device *in_dev;
1530 	struct in_ifaddr *ifa;
1531 	struct hlist_head *head;
1532 
1533 	s_h = cb->args[0];
1534 	s_idx = idx = cb->args[1];
1535 	s_ip_idx = ip_idx = cb->args[2];
1536 
1537 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1538 		idx = 0;
1539 		head = &net->dev_index_head[h];
1540 		rcu_read_lock();
1541 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1542 			  net->dev_base_seq;
1543 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1544 			if (idx < s_idx)
1545 				goto cont;
1546 			if (h > s_h || idx > s_idx)
1547 				s_ip_idx = 0;
1548 			in_dev = __in_dev_get_rcu(dev);
1549 			if (!in_dev)
1550 				goto cont;
1551 
1552 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1553 			     ifa = ifa->ifa_next, ip_idx++) {
1554 				if (ip_idx < s_ip_idx)
1555 					continue;
1556 				if (inet_fill_ifaddr(skb, ifa,
1557 					     NETLINK_CB(cb->skb).portid,
1558 					     cb->nlh->nlmsg_seq,
1559 					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1560 					rcu_read_unlock();
1561 					goto done;
1562 				}
1563 				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1564 			}
1565 cont:
1566 			idx++;
1567 		}
1568 		rcu_read_unlock();
1569 	}
1570 
1571 done:
1572 	cb->args[0] = h;
1573 	cb->args[1] = idx;
1574 	cb->args[2] = ip_idx;
1575 
1576 	return skb->len;
1577 }
1578 
1579 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1580 		      u32 portid)
1581 {
1582 	struct sk_buff *skb;
1583 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1584 	int err = -ENOBUFS;
1585 	struct net *net;
1586 
1587 	net = dev_net(ifa->ifa_dev->dev);
1588 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1589 	if (skb == NULL)
1590 		goto errout;
1591 
1592 	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1593 	if (err < 0) {
1594 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1595 		WARN_ON(err == -EMSGSIZE);
1596 		kfree_skb(skb);
1597 		goto errout;
1598 	}
1599 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1600 	return;
1601 errout:
1602 	if (err < 0)
1603 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1604 }
1605 
1606 static size_t inet_get_link_af_size(const struct net_device *dev)
1607 {
1608 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1609 
1610 	if (!in_dev)
1611 		return 0;
1612 
1613 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1614 }
1615 
1616 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1617 {
1618 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1619 	struct nlattr *nla;
1620 	int i;
1621 
1622 	if (!in_dev)
1623 		return -ENODATA;
1624 
1625 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1626 	if (nla == NULL)
1627 		return -EMSGSIZE;
1628 
1629 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1630 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1631 
1632 	return 0;
1633 }
1634 
1635 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1636 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1637 };
1638 
1639 static int inet_validate_link_af(const struct net_device *dev,
1640 				 const struct nlattr *nla)
1641 {
1642 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1643 	int err, rem;
1644 
1645 	if (dev && !__in_dev_get_rtnl(dev))
1646 		return -EAFNOSUPPORT;
1647 
1648 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1649 	if (err < 0)
1650 		return err;
1651 
1652 	if (tb[IFLA_INET_CONF]) {
1653 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1654 			int cfgid = nla_type(a);
1655 
1656 			if (nla_len(a) < 4)
1657 				return -EINVAL;
1658 
1659 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1660 				return -EINVAL;
1661 		}
1662 	}
1663 
1664 	return 0;
1665 }
1666 
1667 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1668 {
1669 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1670 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1671 	int rem;
1672 
1673 	if (!in_dev)
1674 		return -EAFNOSUPPORT;
1675 
1676 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1677 		BUG();
1678 
1679 	if (tb[IFLA_INET_CONF]) {
1680 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1681 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1682 	}
1683 
1684 	return 0;
1685 }
1686 
1687 static int inet_netconf_msgsize_devconf(int type)
1688 {
1689 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1690 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1691 
1692 	/* type -1 is used for ALL */
1693 	if (type == -1 || type == NETCONFA_FORWARDING)
1694 		size += nla_total_size(4);
1695 	if (type == -1 || type == NETCONFA_RP_FILTER)
1696 		size += nla_total_size(4);
1697 	if (type == -1 || type == NETCONFA_MC_FORWARDING)
1698 		size += nla_total_size(4);
1699 	if (type == -1 || type == NETCONFA_PROXY_NEIGH)
1700 		size += nla_total_size(4);
1701 
1702 	return size;
1703 }
1704 
1705 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1706 				     struct ipv4_devconf *devconf, u32 portid,
1707 				     u32 seq, int event, unsigned int flags,
1708 				     int type)
1709 {
1710 	struct nlmsghdr  *nlh;
1711 	struct netconfmsg *ncm;
1712 
1713 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1714 			flags);
1715 	if (nlh == NULL)
1716 		return -EMSGSIZE;
1717 
1718 	ncm = nlmsg_data(nlh);
1719 	ncm->ncm_family = AF_INET;
1720 
1721 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1722 		goto nla_put_failure;
1723 
1724 	/* type -1 is used for ALL */
1725 	if ((type == -1 || type == NETCONFA_FORWARDING) &&
1726 	    nla_put_s32(skb, NETCONFA_FORWARDING,
1727 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1728 		goto nla_put_failure;
1729 	if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1730 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1731 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1732 		goto nla_put_failure;
1733 	if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1734 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1735 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1736 		goto nla_put_failure;
1737 	if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
1738 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1739 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1740 		goto nla_put_failure;
1741 
1742 	return nlmsg_end(skb, nlh);
1743 
1744 nla_put_failure:
1745 	nlmsg_cancel(skb, nlh);
1746 	return -EMSGSIZE;
1747 }
1748 
1749 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1750 				 struct ipv4_devconf *devconf)
1751 {
1752 	struct sk_buff *skb;
1753 	int err = -ENOBUFS;
1754 
1755 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1756 	if (skb == NULL)
1757 		goto errout;
1758 
1759 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1760 					RTM_NEWNETCONF, 0, type);
1761 	if (err < 0) {
1762 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1763 		WARN_ON(err == -EMSGSIZE);
1764 		kfree_skb(skb);
1765 		goto errout;
1766 	}
1767 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1768 	return;
1769 errout:
1770 	if (err < 0)
1771 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1772 }
1773 
1774 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1775 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1776 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1777 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1778 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
1779 };
1780 
1781 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1782 				    struct nlmsghdr *nlh)
1783 {
1784 	struct net *net = sock_net(in_skb->sk);
1785 	struct nlattr *tb[NETCONFA_MAX+1];
1786 	struct netconfmsg *ncm;
1787 	struct sk_buff *skb;
1788 	struct ipv4_devconf *devconf;
1789 	struct in_device *in_dev;
1790 	struct net_device *dev;
1791 	int ifindex;
1792 	int err;
1793 
1794 	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1795 			  devconf_ipv4_policy);
1796 	if (err < 0)
1797 		goto errout;
1798 
1799 	err = EINVAL;
1800 	if (!tb[NETCONFA_IFINDEX])
1801 		goto errout;
1802 
1803 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1804 	switch (ifindex) {
1805 	case NETCONFA_IFINDEX_ALL:
1806 		devconf = net->ipv4.devconf_all;
1807 		break;
1808 	case NETCONFA_IFINDEX_DEFAULT:
1809 		devconf = net->ipv4.devconf_dflt;
1810 		break;
1811 	default:
1812 		dev = __dev_get_by_index(net, ifindex);
1813 		if (dev == NULL)
1814 			goto errout;
1815 		in_dev = __in_dev_get_rtnl(dev);
1816 		if (in_dev == NULL)
1817 			goto errout;
1818 		devconf = &in_dev->cnf;
1819 		break;
1820 	}
1821 
1822 	err = -ENOBUFS;
1823 	skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1824 	if (skb == NULL)
1825 		goto errout;
1826 
1827 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1828 					NETLINK_CB(in_skb).portid,
1829 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1830 					-1);
1831 	if (err < 0) {
1832 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1833 		WARN_ON(err == -EMSGSIZE);
1834 		kfree_skb(skb);
1835 		goto errout;
1836 	}
1837 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1838 errout:
1839 	return err;
1840 }
1841 
1842 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1843 				     struct netlink_callback *cb)
1844 {
1845 	struct net *net = sock_net(skb->sk);
1846 	int h, s_h;
1847 	int idx, s_idx;
1848 	struct net_device *dev;
1849 	struct in_device *in_dev;
1850 	struct hlist_head *head;
1851 
1852 	s_h = cb->args[0];
1853 	s_idx = idx = cb->args[1];
1854 
1855 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1856 		idx = 0;
1857 		head = &net->dev_index_head[h];
1858 		rcu_read_lock();
1859 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1860 			  net->dev_base_seq;
1861 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1862 			if (idx < s_idx)
1863 				goto cont;
1864 			in_dev = __in_dev_get_rcu(dev);
1865 			if (!in_dev)
1866 				goto cont;
1867 
1868 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
1869 						      &in_dev->cnf,
1870 						      NETLINK_CB(cb->skb).portid,
1871 						      cb->nlh->nlmsg_seq,
1872 						      RTM_NEWNETCONF,
1873 						      NLM_F_MULTI,
1874 						      -1) <= 0) {
1875 				rcu_read_unlock();
1876 				goto done;
1877 			}
1878 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1879 cont:
1880 			idx++;
1881 		}
1882 		rcu_read_unlock();
1883 	}
1884 	if (h == NETDEV_HASHENTRIES) {
1885 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1886 					      net->ipv4.devconf_all,
1887 					      NETLINK_CB(cb->skb).portid,
1888 					      cb->nlh->nlmsg_seq,
1889 					      RTM_NEWNETCONF, NLM_F_MULTI,
1890 					      -1) <= 0)
1891 			goto done;
1892 		else
1893 			h++;
1894 	}
1895 	if (h == NETDEV_HASHENTRIES + 1) {
1896 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1897 					      net->ipv4.devconf_dflt,
1898 					      NETLINK_CB(cb->skb).portid,
1899 					      cb->nlh->nlmsg_seq,
1900 					      RTM_NEWNETCONF, NLM_F_MULTI,
1901 					      -1) <= 0)
1902 			goto done;
1903 		else
1904 			h++;
1905 	}
1906 done:
1907 	cb->args[0] = h;
1908 	cb->args[1] = idx;
1909 
1910 	return skb->len;
1911 }
1912 
1913 #ifdef CONFIG_SYSCTL
1914 
1915 static void devinet_copy_dflt_conf(struct net *net, int i)
1916 {
1917 	struct net_device *dev;
1918 
1919 	rcu_read_lock();
1920 	for_each_netdev_rcu(net, dev) {
1921 		struct in_device *in_dev;
1922 
1923 		in_dev = __in_dev_get_rcu(dev);
1924 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1925 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1926 	}
1927 	rcu_read_unlock();
1928 }
1929 
1930 /* called with RTNL locked */
1931 static void inet_forward_change(struct net *net)
1932 {
1933 	struct net_device *dev;
1934 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1935 
1936 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1937 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1938 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1939 				    NETCONFA_IFINDEX_ALL,
1940 				    net->ipv4.devconf_all);
1941 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1942 				    NETCONFA_IFINDEX_DEFAULT,
1943 				    net->ipv4.devconf_dflt);
1944 
1945 	for_each_netdev(net, dev) {
1946 		struct in_device *in_dev;
1947 		if (on)
1948 			dev_disable_lro(dev);
1949 		rcu_read_lock();
1950 		in_dev = __in_dev_get_rcu(dev);
1951 		if (in_dev) {
1952 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1953 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1954 						    dev->ifindex, &in_dev->cnf);
1955 		}
1956 		rcu_read_unlock();
1957 	}
1958 }
1959 
1960 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
1961 {
1962 	if (cnf == net->ipv4.devconf_dflt)
1963 		return NETCONFA_IFINDEX_DEFAULT;
1964 	else if (cnf == net->ipv4.devconf_all)
1965 		return NETCONFA_IFINDEX_ALL;
1966 	else {
1967 		struct in_device *idev
1968 			= container_of(cnf, struct in_device, cnf);
1969 		return idev->dev->ifindex;
1970 	}
1971 }
1972 
1973 static int devinet_conf_proc(struct ctl_table *ctl, int write,
1974 			     void __user *buffer,
1975 			     size_t *lenp, loff_t *ppos)
1976 {
1977 	int old_value = *(int *)ctl->data;
1978 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1979 	int new_value = *(int *)ctl->data;
1980 
1981 	if (write) {
1982 		struct ipv4_devconf *cnf = ctl->extra1;
1983 		struct net *net = ctl->extra2;
1984 		int i = (int *)ctl->data - cnf->data;
1985 		int ifindex;
1986 
1987 		set_bit(i, cnf->state);
1988 
1989 		if (cnf == net->ipv4.devconf_dflt)
1990 			devinet_copy_dflt_conf(net, i);
1991 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1992 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1993 			if ((new_value == 0) && (old_value != 0))
1994 				rt_cache_flush(net);
1995 
1996 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1997 		    new_value != old_value) {
1998 			ifindex = devinet_conf_ifindex(net, cnf);
1999 			inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2000 						    ifindex, cnf);
2001 		}
2002 		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2003 		    new_value != old_value) {
2004 			ifindex = devinet_conf_ifindex(net, cnf);
2005 			inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2006 						    ifindex, cnf);
2007 		}
2008 	}
2009 
2010 	return ret;
2011 }
2012 
2013 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2014 				  void __user *buffer,
2015 				  size_t *lenp, loff_t *ppos)
2016 {
2017 	int *valp = ctl->data;
2018 	int val = *valp;
2019 	loff_t pos = *ppos;
2020 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2021 
2022 	if (write && *valp != val) {
2023 		struct net *net = ctl->extra2;
2024 
2025 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2026 			if (!rtnl_trylock()) {
2027 				/* Restore the original values before restarting */
2028 				*valp = val;
2029 				*ppos = pos;
2030 				return restart_syscall();
2031 			}
2032 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2033 				inet_forward_change(net);
2034 			} else {
2035 				struct ipv4_devconf *cnf = ctl->extra1;
2036 				struct in_device *idev =
2037 					container_of(cnf, struct in_device, cnf);
2038 				if (*valp)
2039 					dev_disable_lro(idev->dev);
2040 				inet_netconf_notify_devconf(net,
2041 							    NETCONFA_FORWARDING,
2042 							    idev->dev->ifindex,
2043 							    cnf);
2044 			}
2045 			rtnl_unlock();
2046 			rt_cache_flush(net);
2047 		} else
2048 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2049 						    NETCONFA_IFINDEX_DEFAULT,
2050 						    net->ipv4.devconf_dflt);
2051 	}
2052 
2053 	return ret;
2054 }
2055 
2056 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2057 				void __user *buffer,
2058 				size_t *lenp, loff_t *ppos)
2059 {
2060 	int *valp = ctl->data;
2061 	int val = *valp;
2062 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2063 	struct net *net = ctl->extra2;
2064 
2065 	if (write && *valp != val)
2066 		rt_cache_flush(net);
2067 
2068 	return ret;
2069 }
2070 
2071 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2072 	{ \
2073 		.procname	= name, \
2074 		.data		= ipv4_devconf.data + \
2075 				  IPV4_DEVCONF_ ## attr - 1, \
2076 		.maxlen		= sizeof(int), \
2077 		.mode		= mval, \
2078 		.proc_handler	= proc, \
2079 		.extra1		= &ipv4_devconf, \
2080 	}
2081 
2082 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2083 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2084 
2085 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2086 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2087 
2088 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2089 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2090 
2091 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2092 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2093 
2094 static struct devinet_sysctl_table {
2095 	struct ctl_table_header *sysctl_header;
2096 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2097 } devinet_sysctl = {
2098 	.devinet_vars = {
2099 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2100 					     devinet_sysctl_forward),
2101 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2102 
2103 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2104 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2105 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2106 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2107 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2108 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2109 					"accept_source_route"),
2110 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2111 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2112 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2113 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2114 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2115 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2116 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2117 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2118 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2119 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2120 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2121 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2122 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2123 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2124 					"force_igmp_version"),
2125 		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2126 					"igmpv2_unsolicited_report_interval"),
2127 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2128 					"igmpv3_unsolicited_report_interval"),
2129 
2130 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2131 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2132 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2133 					      "promote_secondaries"),
2134 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2135 					      "route_localnet"),
2136 	},
2137 };
2138 
2139 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2140 					struct ipv4_devconf *p)
2141 {
2142 	int i;
2143 	struct devinet_sysctl_table *t;
2144 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2145 
2146 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2147 	if (!t)
2148 		goto out;
2149 
2150 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2151 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2152 		t->devinet_vars[i].extra1 = p;
2153 		t->devinet_vars[i].extra2 = net;
2154 	}
2155 
2156 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2157 
2158 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2159 	if (!t->sysctl_header)
2160 		goto free;
2161 
2162 	p->sysctl = t;
2163 	return 0;
2164 
2165 free:
2166 	kfree(t);
2167 out:
2168 	return -ENOBUFS;
2169 }
2170 
2171 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2172 {
2173 	struct devinet_sysctl_table *t = cnf->sysctl;
2174 
2175 	if (t == NULL)
2176 		return;
2177 
2178 	cnf->sysctl = NULL;
2179 	unregister_net_sysctl_table(t->sysctl_header);
2180 	kfree(t);
2181 }
2182 
2183 static void devinet_sysctl_register(struct in_device *idev)
2184 {
2185 	neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2186 	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2187 					&idev->cnf);
2188 }
2189 
2190 static void devinet_sysctl_unregister(struct in_device *idev)
2191 {
2192 	__devinet_sysctl_unregister(&idev->cnf);
2193 	neigh_sysctl_unregister(idev->arp_parms);
2194 }
2195 
2196 static struct ctl_table ctl_forward_entry[] = {
2197 	{
2198 		.procname	= "ip_forward",
2199 		.data		= &ipv4_devconf.data[
2200 					IPV4_DEVCONF_FORWARDING - 1],
2201 		.maxlen		= sizeof(int),
2202 		.mode		= 0644,
2203 		.proc_handler	= devinet_sysctl_forward,
2204 		.extra1		= &ipv4_devconf,
2205 		.extra2		= &init_net,
2206 	},
2207 	{ },
2208 };
2209 #endif
2210 
2211 static __net_init int devinet_init_net(struct net *net)
2212 {
2213 	int err;
2214 	struct ipv4_devconf *all, *dflt;
2215 #ifdef CONFIG_SYSCTL
2216 	struct ctl_table *tbl = ctl_forward_entry;
2217 	struct ctl_table_header *forw_hdr;
2218 #endif
2219 
2220 	err = -ENOMEM;
2221 	all = &ipv4_devconf;
2222 	dflt = &ipv4_devconf_dflt;
2223 
2224 	if (!net_eq(net, &init_net)) {
2225 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2226 		if (all == NULL)
2227 			goto err_alloc_all;
2228 
2229 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2230 		if (dflt == NULL)
2231 			goto err_alloc_dflt;
2232 
2233 #ifdef CONFIG_SYSCTL
2234 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2235 		if (tbl == NULL)
2236 			goto err_alloc_ctl;
2237 
2238 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2239 		tbl[0].extra1 = all;
2240 		tbl[0].extra2 = net;
2241 #endif
2242 	}
2243 
2244 #ifdef CONFIG_SYSCTL
2245 	err = __devinet_sysctl_register(net, "all", all);
2246 	if (err < 0)
2247 		goto err_reg_all;
2248 
2249 	err = __devinet_sysctl_register(net, "default", dflt);
2250 	if (err < 0)
2251 		goto err_reg_dflt;
2252 
2253 	err = -ENOMEM;
2254 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2255 	if (forw_hdr == NULL)
2256 		goto err_reg_ctl;
2257 	net->ipv4.forw_hdr = forw_hdr;
2258 #endif
2259 
2260 	net->ipv4.devconf_all = all;
2261 	net->ipv4.devconf_dflt = dflt;
2262 	return 0;
2263 
2264 #ifdef CONFIG_SYSCTL
2265 err_reg_ctl:
2266 	__devinet_sysctl_unregister(dflt);
2267 err_reg_dflt:
2268 	__devinet_sysctl_unregister(all);
2269 err_reg_all:
2270 	if (tbl != ctl_forward_entry)
2271 		kfree(tbl);
2272 err_alloc_ctl:
2273 #endif
2274 	if (dflt != &ipv4_devconf_dflt)
2275 		kfree(dflt);
2276 err_alloc_dflt:
2277 	if (all != &ipv4_devconf)
2278 		kfree(all);
2279 err_alloc_all:
2280 	return err;
2281 }
2282 
2283 static __net_exit void devinet_exit_net(struct net *net)
2284 {
2285 #ifdef CONFIG_SYSCTL
2286 	struct ctl_table *tbl;
2287 
2288 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2289 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2290 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2291 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
2292 	kfree(tbl);
2293 #endif
2294 	kfree(net->ipv4.devconf_dflt);
2295 	kfree(net->ipv4.devconf_all);
2296 }
2297 
2298 static __net_initdata struct pernet_operations devinet_ops = {
2299 	.init = devinet_init_net,
2300 	.exit = devinet_exit_net,
2301 };
2302 
2303 static struct rtnl_af_ops inet_af_ops = {
2304 	.family		  = AF_INET,
2305 	.fill_link_af	  = inet_fill_link_af,
2306 	.get_link_af_size = inet_get_link_af_size,
2307 	.validate_link_af = inet_validate_link_af,
2308 	.set_link_af	  = inet_set_link_af,
2309 };
2310 
2311 void __init devinet_init(void)
2312 {
2313 	int i;
2314 
2315 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2316 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2317 
2318 	register_pernet_subsys(&devinet_ops);
2319 
2320 	register_gifconf(PF_INET, inet_gifconf);
2321 	register_netdevice_notifier(&ip_netdev_notifier);
2322 
2323 	schedule_delayed_work(&check_lifetime_work, 0);
2324 
2325 	rtnl_af_register(&inet_af_ops);
2326 
2327 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2328 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2329 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2330 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2331 		      inet_netconf_dump_devconf, NULL);
2332 }
2333 
2334