xref: /linux/net/ipv4/devinet.c (revision c4c11dd160a8cc98f402c4e12f94b1572e822ffd)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59 
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67 
68 #include "fib_lookup.h"
69 
70 static struct ipv4_devconf ipv4_devconf = {
71 	.data = {
72 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 	},
77 };
78 
79 static struct ipv4_devconf ipv4_devconf_dflt = {
80 	.data = {
81 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
82 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
83 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
85 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
86 	},
87 };
88 
89 #define IPV4_DEVCONF_DFLT(net, attr) \
90 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
91 
92 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
93 	[IFA_LOCAL]     	= { .type = NLA_U32 },
94 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
95 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
96 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
97 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
98 };
99 
100 #define IN4_ADDR_HSIZE_SHIFT	8
101 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
102 
103 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
104 static DEFINE_SPINLOCK(inet_addr_hash_lock);
105 
106 static u32 inet_addr_hash(struct net *net, __be32 addr)
107 {
108 	u32 val = (__force u32) addr ^ net_hash_mix(net);
109 
110 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
111 }
112 
113 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
114 {
115 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
116 
117 	spin_lock(&inet_addr_hash_lock);
118 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
119 	spin_unlock(&inet_addr_hash_lock);
120 }
121 
122 static void inet_hash_remove(struct in_ifaddr *ifa)
123 {
124 	spin_lock(&inet_addr_hash_lock);
125 	hlist_del_init_rcu(&ifa->hash);
126 	spin_unlock(&inet_addr_hash_lock);
127 }
128 
129 /**
130  * __ip_dev_find - find the first device with a given source address.
131  * @net: the net namespace
132  * @addr: the source address
133  * @devref: if true, take a reference on the found device
134  *
135  * If a caller uses devref=false, it should be protected by RCU, or RTNL
136  */
137 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
138 {
139 	u32 hash = inet_addr_hash(net, addr);
140 	struct net_device *result = NULL;
141 	struct in_ifaddr *ifa;
142 
143 	rcu_read_lock();
144 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
145 		if (ifa->ifa_local == addr) {
146 			struct net_device *dev = ifa->ifa_dev->dev;
147 
148 			if (!net_eq(dev_net(dev), net))
149 				continue;
150 			result = dev;
151 			break;
152 		}
153 	}
154 	if (!result) {
155 		struct flowi4 fl4 = { .daddr = addr };
156 		struct fib_result res = { 0 };
157 		struct fib_table *local;
158 
159 		/* Fallback to FIB local table so that communication
160 		 * over loopback subnets work.
161 		 */
162 		local = fib_get_table(net, RT_TABLE_LOCAL);
163 		if (local &&
164 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
165 		    res.type == RTN_LOCAL)
166 			result = FIB_RES_DEV(res);
167 	}
168 	if (result && devref)
169 		dev_hold(result);
170 	rcu_read_unlock();
171 	return result;
172 }
173 EXPORT_SYMBOL(__ip_dev_find);
174 
175 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
176 
177 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
178 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
179 			 int destroy);
180 #ifdef CONFIG_SYSCTL
181 static void devinet_sysctl_register(struct in_device *idev);
182 static void devinet_sysctl_unregister(struct in_device *idev);
183 #else
184 static void devinet_sysctl_register(struct in_device *idev)
185 {
186 }
187 static void devinet_sysctl_unregister(struct in_device *idev)
188 {
189 }
190 #endif
191 
192 /* Locks all the inet devices. */
193 
194 static struct in_ifaddr *inet_alloc_ifa(void)
195 {
196 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
197 }
198 
199 static void inet_rcu_free_ifa(struct rcu_head *head)
200 {
201 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
202 	if (ifa->ifa_dev)
203 		in_dev_put(ifa->ifa_dev);
204 	kfree(ifa);
205 }
206 
207 static void inet_free_ifa(struct in_ifaddr *ifa)
208 {
209 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
210 }
211 
212 void in_dev_finish_destroy(struct in_device *idev)
213 {
214 	struct net_device *dev = idev->dev;
215 
216 	WARN_ON(idev->ifa_list);
217 	WARN_ON(idev->mc_list);
218 	kfree(rcu_dereference_protected(idev->mc_hash, 1));
219 #ifdef NET_REFCNT_DEBUG
220 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
221 #endif
222 	dev_put(dev);
223 	if (!idev->dead)
224 		pr_err("Freeing alive in_device %p\n", idev);
225 	else
226 		kfree(idev);
227 }
228 EXPORT_SYMBOL(in_dev_finish_destroy);
229 
230 static struct in_device *inetdev_init(struct net_device *dev)
231 {
232 	struct in_device *in_dev;
233 
234 	ASSERT_RTNL();
235 
236 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
237 	if (!in_dev)
238 		goto out;
239 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
240 			sizeof(in_dev->cnf));
241 	in_dev->cnf.sysctl = NULL;
242 	in_dev->dev = dev;
243 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
244 	if (!in_dev->arp_parms)
245 		goto out_kfree;
246 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
247 		dev_disable_lro(dev);
248 	/* Reference in_dev->dev */
249 	dev_hold(dev);
250 	/* Account for reference dev->ip_ptr (below) */
251 	in_dev_hold(in_dev);
252 
253 	devinet_sysctl_register(in_dev);
254 	ip_mc_init_dev(in_dev);
255 	if (dev->flags & IFF_UP)
256 		ip_mc_up(in_dev);
257 
258 	/* we can receive as soon as ip_ptr is set -- do this last */
259 	rcu_assign_pointer(dev->ip_ptr, in_dev);
260 out:
261 	return in_dev;
262 out_kfree:
263 	kfree(in_dev);
264 	in_dev = NULL;
265 	goto out;
266 }
267 
268 static void in_dev_rcu_put(struct rcu_head *head)
269 {
270 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
271 	in_dev_put(idev);
272 }
273 
274 static void inetdev_destroy(struct in_device *in_dev)
275 {
276 	struct in_ifaddr *ifa;
277 	struct net_device *dev;
278 
279 	ASSERT_RTNL();
280 
281 	dev = in_dev->dev;
282 
283 	in_dev->dead = 1;
284 
285 	ip_mc_destroy_dev(in_dev);
286 
287 	while ((ifa = in_dev->ifa_list) != NULL) {
288 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
289 		inet_free_ifa(ifa);
290 	}
291 
292 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
293 
294 	devinet_sysctl_unregister(in_dev);
295 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
296 	arp_ifdown(dev);
297 
298 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
299 }
300 
301 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
302 {
303 	rcu_read_lock();
304 	for_primary_ifa(in_dev) {
305 		if (inet_ifa_match(a, ifa)) {
306 			if (!b || inet_ifa_match(b, ifa)) {
307 				rcu_read_unlock();
308 				return 1;
309 			}
310 		}
311 	} endfor_ifa(in_dev);
312 	rcu_read_unlock();
313 	return 0;
314 }
315 
316 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
317 			 int destroy, struct nlmsghdr *nlh, u32 portid)
318 {
319 	struct in_ifaddr *promote = NULL;
320 	struct in_ifaddr *ifa, *ifa1 = *ifap;
321 	struct in_ifaddr *last_prim = in_dev->ifa_list;
322 	struct in_ifaddr *prev_prom = NULL;
323 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
324 
325 	ASSERT_RTNL();
326 
327 	/* 1. Deleting primary ifaddr forces deletion all secondaries
328 	 * unless alias promotion is set
329 	 **/
330 
331 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
332 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
333 
334 		while ((ifa = *ifap1) != NULL) {
335 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
336 			    ifa1->ifa_scope <= ifa->ifa_scope)
337 				last_prim = ifa;
338 
339 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
340 			    ifa1->ifa_mask != ifa->ifa_mask ||
341 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
342 				ifap1 = &ifa->ifa_next;
343 				prev_prom = ifa;
344 				continue;
345 			}
346 
347 			if (!do_promote) {
348 				inet_hash_remove(ifa);
349 				*ifap1 = ifa->ifa_next;
350 
351 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
352 				blocking_notifier_call_chain(&inetaddr_chain,
353 						NETDEV_DOWN, ifa);
354 				inet_free_ifa(ifa);
355 			} else {
356 				promote = ifa;
357 				break;
358 			}
359 		}
360 	}
361 
362 	/* On promotion all secondaries from subnet are changing
363 	 * the primary IP, we must remove all their routes silently
364 	 * and later to add them back with new prefsrc. Do this
365 	 * while all addresses are on the device list.
366 	 */
367 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
368 		if (ifa1->ifa_mask == ifa->ifa_mask &&
369 		    inet_ifa_match(ifa1->ifa_address, ifa))
370 			fib_del_ifaddr(ifa, ifa1);
371 	}
372 
373 	/* 2. Unlink it */
374 
375 	*ifap = ifa1->ifa_next;
376 	inet_hash_remove(ifa1);
377 
378 	/* 3. Announce address deletion */
379 
380 	/* Send message first, then call notifier.
381 	   At first sight, FIB update triggered by notifier
382 	   will refer to already deleted ifaddr, that could confuse
383 	   netlink listeners. It is not true: look, gated sees
384 	   that route deleted and if it still thinks that ifaddr
385 	   is valid, it will try to restore deleted routes... Grr.
386 	   So that, this order is correct.
387 	 */
388 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
389 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
390 
391 	if (promote) {
392 		struct in_ifaddr *next_sec = promote->ifa_next;
393 
394 		if (prev_prom) {
395 			prev_prom->ifa_next = promote->ifa_next;
396 			promote->ifa_next = last_prim->ifa_next;
397 			last_prim->ifa_next = promote;
398 		}
399 
400 		promote->ifa_flags &= ~IFA_F_SECONDARY;
401 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
402 		blocking_notifier_call_chain(&inetaddr_chain,
403 				NETDEV_UP, promote);
404 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
405 			if (ifa1->ifa_mask != ifa->ifa_mask ||
406 			    !inet_ifa_match(ifa1->ifa_address, ifa))
407 					continue;
408 			fib_add_ifaddr(ifa);
409 		}
410 
411 	}
412 	if (destroy)
413 		inet_free_ifa(ifa1);
414 }
415 
416 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
417 			 int destroy)
418 {
419 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
420 }
421 
422 static void check_lifetime(struct work_struct *work);
423 
424 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
425 
426 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
427 			     u32 portid)
428 {
429 	struct in_device *in_dev = ifa->ifa_dev;
430 	struct in_ifaddr *ifa1, **ifap, **last_primary;
431 
432 	ASSERT_RTNL();
433 
434 	if (!ifa->ifa_local) {
435 		inet_free_ifa(ifa);
436 		return 0;
437 	}
438 
439 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
440 	last_primary = &in_dev->ifa_list;
441 
442 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
443 	     ifap = &ifa1->ifa_next) {
444 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
445 		    ifa->ifa_scope <= ifa1->ifa_scope)
446 			last_primary = &ifa1->ifa_next;
447 		if (ifa1->ifa_mask == ifa->ifa_mask &&
448 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
449 			if (ifa1->ifa_local == ifa->ifa_local) {
450 				inet_free_ifa(ifa);
451 				return -EEXIST;
452 			}
453 			if (ifa1->ifa_scope != ifa->ifa_scope) {
454 				inet_free_ifa(ifa);
455 				return -EINVAL;
456 			}
457 			ifa->ifa_flags |= IFA_F_SECONDARY;
458 		}
459 	}
460 
461 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
462 		net_srandom(ifa->ifa_local);
463 		ifap = last_primary;
464 	}
465 
466 	ifa->ifa_next = *ifap;
467 	*ifap = ifa;
468 
469 	inet_hash_insert(dev_net(in_dev->dev), ifa);
470 
471 	cancel_delayed_work(&check_lifetime_work);
472 	schedule_delayed_work(&check_lifetime_work, 0);
473 
474 	/* Send message first, then call notifier.
475 	   Notifier will trigger FIB update, so that
476 	   listeners of netlink will know about new ifaddr */
477 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
478 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
479 
480 	return 0;
481 }
482 
483 static int inet_insert_ifa(struct in_ifaddr *ifa)
484 {
485 	return __inet_insert_ifa(ifa, NULL, 0);
486 }
487 
488 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
489 {
490 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
491 
492 	ASSERT_RTNL();
493 
494 	if (!in_dev) {
495 		inet_free_ifa(ifa);
496 		return -ENOBUFS;
497 	}
498 	ipv4_devconf_setall(in_dev);
499 	if (ifa->ifa_dev != in_dev) {
500 		WARN_ON(ifa->ifa_dev);
501 		in_dev_hold(in_dev);
502 		ifa->ifa_dev = in_dev;
503 	}
504 	if (ipv4_is_loopback(ifa->ifa_local))
505 		ifa->ifa_scope = RT_SCOPE_HOST;
506 	return inet_insert_ifa(ifa);
507 }
508 
509 /* Caller must hold RCU or RTNL :
510  * We dont take a reference on found in_device
511  */
512 struct in_device *inetdev_by_index(struct net *net, int ifindex)
513 {
514 	struct net_device *dev;
515 	struct in_device *in_dev = NULL;
516 
517 	rcu_read_lock();
518 	dev = dev_get_by_index_rcu(net, ifindex);
519 	if (dev)
520 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
521 	rcu_read_unlock();
522 	return in_dev;
523 }
524 EXPORT_SYMBOL(inetdev_by_index);
525 
526 /* Called only from RTNL semaphored context. No locks. */
527 
528 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
529 				    __be32 mask)
530 {
531 	ASSERT_RTNL();
532 
533 	for_primary_ifa(in_dev) {
534 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
535 			return ifa;
536 	} endfor_ifa(in_dev);
537 	return NULL;
538 }
539 
540 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
541 {
542 	struct net *net = sock_net(skb->sk);
543 	struct nlattr *tb[IFA_MAX+1];
544 	struct in_device *in_dev;
545 	struct ifaddrmsg *ifm;
546 	struct in_ifaddr *ifa, **ifap;
547 	int err = -EINVAL;
548 
549 	ASSERT_RTNL();
550 
551 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
552 	if (err < 0)
553 		goto errout;
554 
555 	ifm = nlmsg_data(nlh);
556 	in_dev = inetdev_by_index(net, ifm->ifa_index);
557 	if (in_dev == NULL) {
558 		err = -ENODEV;
559 		goto errout;
560 	}
561 
562 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
563 	     ifap = &ifa->ifa_next) {
564 		if (tb[IFA_LOCAL] &&
565 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
566 			continue;
567 
568 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
569 			continue;
570 
571 		if (tb[IFA_ADDRESS] &&
572 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
573 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
574 			continue;
575 
576 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
577 		return 0;
578 	}
579 
580 	err = -EADDRNOTAVAIL;
581 errout:
582 	return err;
583 }
584 
585 #define INFINITY_LIFE_TIME	0xFFFFFFFF
586 
587 static void check_lifetime(struct work_struct *work)
588 {
589 	unsigned long now, next, next_sec, next_sched;
590 	struct in_ifaddr *ifa;
591 	struct hlist_node *n;
592 	int i;
593 
594 	now = jiffies;
595 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
596 
597 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
598 		bool change_needed = false;
599 
600 		rcu_read_lock();
601 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
602 			unsigned long age;
603 
604 			if (ifa->ifa_flags & IFA_F_PERMANENT)
605 				continue;
606 
607 			/* We try to batch several events at once. */
608 			age = (now - ifa->ifa_tstamp +
609 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
610 
611 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
612 			    age >= ifa->ifa_valid_lft) {
613 				change_needed = true;
614 			} else if (ifa->ifa_preferred_lft ==
615 				   INFINITY_LIFE_TIME) {
616 				continue;
617 			} else if (age >= ifa->ifa_preferred_lft) {
618 				if (time_before(ifa->ifa_tstamp +
619 						ifa->ifa_valid_lft * HZ, next))
620 					next = ifa->ifa_tstamp +
621 					       ifa->ifa_valid_lft * HZ;
622 
623 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
624 					change_needed = true;
625 			} else if (time_before(ifa->ifa_tstamp +
626 					       ifa->ifa_preferred_lft * HZ,
627 					       next)) {
628 				next = ifa->ifa_tstamp +
629 				       ifa->ifa_preferred_lft * HZ;
630 			}
631 		}
632 		rcu_read_unlock();
633 		if (!change_needed)
634 			continue;
635 		rtnl_lock();
636 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
637 			unsigned long age;
638 
639 			if (ifa->ifa_flags & IFA_F_PERMANENT)
640 				continue;
641 
642 			/* We try to batch several events at once. */
643 			age = (now - ifa->ifa_tstamp +
644 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
645 
646 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
647 			    age >= ifa->ifa_valid_lft) {
648 				struct in_ifaddr **ifap;
649 
650 				for (ifap = &ifa->ifa_dev->ifa_list;
651 				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
652 					if (*ifap == ifa) {
653 						inet_del_ifa(ifa->ifa_dev,
654 							     ifap, 1);
655 						break;
656 					}
657 				}
658 			} else if (ifa->ifa_preferred_lft !=
659 				   INFINITY_LIFE_TIME &&
660 				   age >= ifa->ifa_preferred_lft &&
661 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
662 				ifa->ifa_flags |= IFA_F_DEPRECATED;
663 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
664 			}
665 		}
666 		rtnl_unlock();
667 	}
668 
669 	next_sec = round_jiffies_up(next);
670 	next_sched = next;
671 
672 	/* If rounded timeout is accurate enough, accept it. */
673 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
674 		next_sched = next_sec;
675 
676 	now = jiffies;
677 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
678 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
679 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
680 
681 	schedule_delayed_work(&check_lifetime_work, next_sched - now);
682 }
683 
684 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
685 			     __u32 prefered_lft)
686 {
687 	unsigned long timeout;
688 
689 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
690 
691 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
692 	if (addrconf_finite_timeout(timeout))
693 		ifa->ifa_valid_lft = timeout;
694 	else
695 		ifa->ifa_flags |= IFA_F_PERMANENT;
696 
697 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
698 	if (addrconf_finite_timeout(timeout)) {
699 		if (timeout == 0)
700 			ifa->ifa_flags |= IFA_F_DEPRECATED;
701 		ifa->ifa_preferred_lft = timeout;
702 	}
703 	ifa->ifa_tstamp = jiffies;
704 	if (!ifa->ifa_cstamp)
705 		ifa->ifa_cstamp = ifa->ifa_tstamp;
706 }
707 
708 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
709 				       __u32 *pvalid_lft, __u32 *pprefered_lft)
710 {
711 	struct nlattr *tb[IFA_MAX+1];
712 	struct in_ifaddr *ifa;
713 	struct ifaddrmsg *ifm;
714 	struct net_device *dev;
715 	struct in_device *in_dev;
716 	int err;
717 
718 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
719 	if (err < 0)
720 		goto errout;
721 
722 	ifm = nlmsg_data(nlh);
723 	err = -EINVAL;
724 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
725 		goto errout;
726 
727 	dev = __dev_get_by_index(net, ifm->ifa_index);
728 	err = -ENODEV;
729 	if (dev == NULL)
730 		goto errout;
731 
732 	in_dev = __in_dev_get_rtnl(dev);
733 	err = -ENOBUFS;
734 	if (in_dev == NULL)
735 		goto errout;
736 
737 	ifa = inet_alloc_ifa();
738 	if (ifa == NULL)
739 		/*
740 		 * A potential indev allocation can be left alive, it stays
741 		 * assigned to its device and is destroy with it.
742 		 */
743 		goto errout;
744 
745 	ipv4_devconf_setall(in_dev);
746 	in_dev_hold(in_dev);
747 
748 	if (tb[IFA_ADDRESS] == NULL)
749 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
750 
751 	INIT_HLIST_NODE(&ifa->hash);
752 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
753 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
754 	ifa->ifa_flags = ifm->ifa_flags;
755 	ifa->ifa_scope = ifm->ifa_scope;
756 	ifa->ifa_dev = in_dev;
757 
758 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
759 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
760 
761 	if (tb[IFA_BROADCAST])
762 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
763 
764 	if (tb[IFA_LABEL])
765 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
766 	else
767 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
768 
769 	if (tb[IFA_CACHEINFO]) {
770 		struct ifa_cacheinfo *ci;
771 
772 		ci = nla_data(tb[IFA_CACHEINFO]);
773 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
774 			err = -EINVAL;
775 			goto errout;
776 		}
777 		*pvalid_lft = ci->ifa_valid;
778 		*pprefered_lft = ci->ifa_prefered;
779 	}
780 
781 	return ifa;
782 
783 errout:
784 	return ERR_PTR(err);
785 }
786 
787 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
788 {
789 	struct in_device *in_dev = ifa->ifa_dev;
790 	struct in_ifaddr *ifa1, **ifap;
791 
792 	if (!ifa->ifa_local)
793 		return NULL;
794 
795 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
796 	     ifap = &ifa1->ifa_next) {
797 		if (ifa1->ifa_mask == ifa->ifa_mask &&
798 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
799 		    ifa1->ifa_local == ifa->ifa_local)
800 			return ifa1;
801 	}
802 	return NULL;
803 }
804 
805 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
806 {
807 	struct net *net = sock_net(skb->sk);
808 	struct in_ifaddr *ifa;
809 	struct in_ifaddr *ifa_existing;
810 	__u32 valid_lft = INFINITY_LIFE_TIME;
811 	__u32 prefered_lft = INFINITY_LIFE_TIME;
812 
813 	ASSERT_RTNL();
814 
815 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
816 	if (IS_ERR(ifa))
817 		return PTR_ERR(ifa);
818 
819 	ifa_existing = find_matching_ifa(ifa);
820 	if (!ifa_existing) {
821 		/* It would be best to check for !NLM_F_CREATE here but
822 		 * userspace alreay relies on not having to provide this.
823 		 */
824 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
825 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
826 	} else {
827 		inet_free_ifa(ifa);
828 
829 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
830 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
831 			return -EEXIST;
832 		ifa = ifa_existing;
833 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
834 		cancel_delayed_work(&check_lifetime_work);
835 		schedule_delayed_work(&check_lifetime_work, 0);
836 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
837 		blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
838 	}
839 	return 0;
840 }
841 
842 /*
843  *	Determine a default network mask, based on the IP address.
844  */
845 
846 static int inet_abc_len(__be32 addr)
847 {
848 	int rc = -1;	/* Something else, probably a multicast. */
849 
850 	if (ipv4_is_zeronet(addr))
851 		rc = 0;
852 	else {
853 		__u32 haddr = ntohl(addr);
854 
855 		if (IN_CLASSA(haddr))
856 			rc = 8;
857 		else if (IN_CLASSB(haddr))
858 			rc = 16;
859 		else if (IN_CLASSC(haddr))
860 			rc = 24;
861 	}
862 
863 	return rc;
864 }
865 
866 
867 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
868 {
869 	struct ifreq ifr;
870 	struct sockaddr_in sin_orig;
871 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
872 	struct in_device *in_dev;
873 	struct in_ifaddr **ifap = NULL;
874 	struct in_ifaddr *ifa = NULL;
875 	struct net_device *dev;
876 	char *colon;
877 	int ret = -EFAULT;
878 	int tryaddrmatch = 0;
879 
880 	/*
881 	 *	Fetch the caller's info block into kernel space
882 	 */
883 
884 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
885 		goto out;
886 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
887 
888 	/* save original address for comparison */
889 	memcpy(&sin_orig, sin, sizeof(*sin));
890 
891 	colon = strchr(ifr.ifr_name, ':');
892 	if (colon)
893 		*colon = 0;
894 
895 	dev_load(net, ifr.ifr_name);
896 
897 	switch (cmd) {
898 	case SIOCGIFADDR:	/* Get interface address */
899 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
900 	case SIOCGIFDSTADDR:	/* Get the destination address */
901 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
902 		/* Note that these ioctls will not sleep,
903 		   so that we do not impose a lock.
904 		   One day we will be forced to put shlock here (I mean SMP)
905 		 */
906 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
907 		memset(sin, 0, sizeof(*sin));
908 		sin->sin_family = AF_INET;
909 		break;
910 
911 	case SIOCSIFFLAGS:
912 		ret = -EPERM;
913 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
914 			goto out;
915 		break;
916 	case SIOCSIFADDR:	/* Set interface address (and family) */
917 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
918 	case SIOCSIFDSTADDR:	/* Set the destination address */
919 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
920 		ret = -EPERM;
921 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
922 			goto out;
923 		ret = -EINVAL;
924 		if (sin->sin_family != AF_INET)
925 			goto out;
926 		break;
927 	default:
928 		ret = -EINVAL;
929 		goto out;
930 	}
931 
932 	rtnl_lock();
933 
934 	ret = -ENODEV;
935 	dev = __dev_get_by_name(net, ifr.ifr_name);
936 	if (!dev)
937 		goto done;
938 
939 	if (colon)
940 		*colon = ':';
941 
942 	in_dev = __in_dev_get_rtnl(dev);
943 	if (in_dev) {
944 		if (tryaddrmatch) {
945 			/* Matthias Andree */
946 			/* compare label and address (4.4BSD style) */
947 			/* note: we only do this for a limited set of ioctls
948 			   and only if the original address family was AF_INET.
949 			   This is checked above. */
950 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
951 			     ifap = &ifa->ifa_next) {
952 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
953 				    sin_orig.sin_addr.s_addr ==
954 							ifa->ifa_local) {
955 					break; /* found */
956 				}
957 			}
958 		}
959 		/* we didn't get a match, maybe the application is
960 		   4.3BSD-style and passed in junk so we fall back to
961 		   comparing just the label */
962 		if (!ifa) {
963 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
964 			     ifap = &ifa->ifa_next)
965 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
966 					break;
967 		}
968 	}
969 
970 	ret = -EADDRNOTAVAIL;
971 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
972 		goto done;
973 
974 	switch (cmd) {
975 	case SIOCGIFADDR:	/* Get interface address */
976 		sin->sin_addr.s_addr = ifa->ifa_local;
977 		goto rarok;
978 
979 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
980 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
981 		goto rarok;
982 
983 	case SIOCGIFDSTADDR:	/* Get the destination address */
984 		sin->sin_addr.s_addr = ifa->ifa_address;
985 		goto rarok;
986 
987 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
988 		sin->sin_addr.s_addr = ifa->ifa_mask;
989 		goto rarok;
990 
991 	case SIOCSIFFLAGS:
992 		if (colon) {
993 			ret = -EADDRNOTAVAIL;
994 			if (!ifa)
995 				break;
996 			ret = 0;
997 			if (!(ifr.ifr_flags & IFF_UP))
998 				inet_del_ifa(in_dev, ifap, 1);
999 			break;
1000 		}
1001 		ret = dev_change_flags(dev, ifr.ifr_flags);
1002 		break;
1003 
1004 	case SIOCSIFADDR:	/* Set interface address (and family) */
1005 		ret = -EINVAL;
1006 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1007 			break;
1008 
1009 		if (!ifa) {
1010 			ret = -ENOBUFS;
1011 			ifa = inet_alloc_ifa();
1012 			if (!ifa)
1013 				break;
1014 			INIT_HLIST_NODE(&ifa->hash);
1015 			if (colon)
1016 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1017 			else
1018 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1019 		} else {
1020 			ret = 0;
1021 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1022 				break;
1023 			inet_del_ifa(in_dev, ifap, 0);
1024 			ifa->ifa_broadcast = 0;
1025 			ifa->ifa_scope = 0;
1026 		}
1027 
1028 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1029 
1030 		if (!(dev->flags & IFF_POINTOPOINT)) {
1031 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1032 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1033 			if ((dev->flags & IFF_BROADCAST) &&
1034 			    ifa->ifa_prefixlen < 31)
1035 				ifa->ifa_broadcast = ifa->ifa_address |
1036 						     ~ifa->ifa_mask;
1037 		} else {
1038 			ifa->ifa_prefixlen = 32;
1039 			ifa->ifa_mask = inet_make_mask(32);
1040 		}
1041 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1042 		ret = inet_set_ifa(dev, ifa);
1043 		break;
1044 
1045 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1046 		ret = 0;
1047 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1048 			inet_del_ifa(in_dev, ifap, 0);
1049 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1050 			inet_insert_ifa(ifa);
1051 		}
1052 		break;
1053 
1054 	case SIOCSIFDSTADDR:	/* Set the destination address */
1055 		ret = 0;
1056 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1057 			break;
1058 		ret = -EINVAL;
1059 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1060 			break;
1061 		ret = 0;
1062 		inet_del_ifa(in_dev, ifap, 0);
1063 		ifa->ifa_address = sin->sin_addr.s_addr;
1064 		inet_insert_ifa(ifa);
1065 		break;
1066 
1067 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1068 
1069 		/*
1070 		 *	The mask we set must be legal.
1071 		 */
1072 		ret = -EINVAL;
1073 		if (bad_mask(sin->sin_addr.s_addr, 0))
1074 			break;
1075 		ret = 0;
1076 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1077 			__be32 old_mask = ifa->ifa_mask;
1078 			inet_del_ifa(in_dev, ifap, 0);
1079 			ifa->ifa_mask = sin->sin_addr.s_addr;
1080 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1081 
1082 			/* See if current broadcast address matches
1083 			 * with current netmask, then recalculate
1084 			 * the broadcast address. Otherwise it's a
1085 			 * funny address, so don't touch it since
1086 			 * the user seems to know what (s)he's doing...
1087 			 */
1088 			if ((dev->flags & IFF_BROADCAST) &&
1089 			    (ifa->ifa_prefixlen < 31) &&
1090 			    (ifa->ifa_broadcast ==
1091 			     (ifa->ifa_local|~old_mask))) {
1092 				ifa->ifa_broadcast = (ifa->ifa_local |
1093 						      ~sin->sin_addr.s_addr);
1094 			}
1095 			inet_insert_ifa(ifa);
1096 		}
1097 		break;
1098 	}
1099 done:
1100 	rtnl_unlock();
1101 out:
1102 	return ret;
1103 rarok:
1104 	rtnl_unlock();
1105 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1106 	goto out;
1107 }
1108 
1109 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1110 {
1111 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1112 	struct in_ifaddr *ifa;
1113 	struct ifreq ifr;
1114 	int done = 0;
1115 
1116 	if (!in_dev)
1117 		goto out;
1118 
1119 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1120 		if (!buf) {
1121 			done += sizeof(ifr);
1122 			continue;
1123 		}
1124 		if (len < (int) sizeof(ifr))
1125 			break;
1126 		memset(&ifr, 0, sizeof(struct ifreq));
1127 		if (ifa->ifa_label)
1128 			strcpy(ifr.ifr_name, ifa->ifa_label);
1129 		else
1130 			strcpy(ifr.ifr_name, dev->name);
1131 
1132 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1133 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1134 								ifa->ifa_local;
1135 
1136 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1137 			done = -EFAULT;
1138 			break;
1139 		}
1140 		buf  += sizeof(struct ifreq);
1141 		len  -= sizeof(struct ifreq);
1142 		done += sizeof(struct ifreq);
1143 	}
1144 out:
1145 	return done;
1146 }
1147 
1148 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1149 {
1150 	__be32 addr = 0;
1151 	struct in_device *in_dev;
1152 	struct net *net = dev_net(dev);
1153 
1154 	rcu_read_lock();
1155 	in_dev = __in_dev_get_rcu(dev);
1156 	if (!in_dev)
1157 		goto no_in_dev;
1158 
1159 	for_primary_ifa(in_dev) {
1160 		if (ifa->ifa_scope > scope)
1161 			continue;
1162 		if (!dst || inet_ifa_match(dst, ifa)) {
1163 			addr = ifa->ifa_local;
1164 			break;
1165 		}
1166 		if (!addr)
1167 			addr = ifa->ifa_local;
1168 	} endfor_ifa(in_dev);
1169 
1170 	if (addr)
1171 		goto out_unlock;
1172 no_in_dev:
1173 
1174 	/* Not loopback addresses on loopback should be preferred
1175 	   in this case. It is importnat that lo is the first interface
1176 	   in dev_base list.
1177 	 */
1178 	for_each_netdev_rcu(net, dev) {
1179 		in_dev = __in_dev_get_rcu(dev);
1180 		if (!in_dev)
1181 			continue;
1182 
1183 		for_primary_ifa(in_dev) {
1184 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
1185 			    ifa->ifa_scope <= scope) {
1186 				addr = ifa->ifa_local;
1187 				goto out_unlock;
1188 			}
1189 		} endfor_ifa(in_dev);
1190 	}
1191 out_unlock:
1192 	rcu_read_unlock();
1193 	return addr;
1194 }
1195 EXPORT_SYMBOL(inet_select_addr);
1196 
1197 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1198 			      __be32 local, int scope)
1199 {
1200 	int same = 0;
1201 	__be32 addr = 0;
1202 
1203 	for_ifa(in_dev) {
1204 		if (!addr &&
1205 		    (local == ifa->ifa_local || !local) &&
1206 		    ifa->ifa_scope <= scope) {
1207 			addr = ifa->ifa_local;
1208 			if (same)
1209 				break;
1210 		}
1211 		if (!same) {
1212 			same = (!local || inet_ifa_match(local, ifa)) &&
1213 				(!dst || inet_ifa_match(dst, ifa));
1214 			if (same && addr) {
1215 				if (local || !dst)
1216 					break;
1217 				/* Is the selected addr into dst subnet? */
1218 				if (inet_ifa_match(addr, ifa))
1219 					break;
1220 				/* No, then can we use new local src? */
1221 				if (ifa->ifa_scope <= scope) {
1222 					addr = ifa->ifa_local;
1223 					break;
1224 				}
1225 				/* search for large dst subnet for addr */
1226 				same = 0;
1227 			}
1228 		}
1229 	} endfor_ifa(in_dev);
1230 
1231 	return same ? addr : 0;
1232 }
1233 
1234 /*
1235  * Confirm that local IP address exists using wildcards:
1236  * - in_dev: only on this interface, 0=any interface
1237  * - dst: only in the same subnet as dst, 0=any dst
1238  * - local: address, 0=autoselect the local address
1239  * - scope: maximum allowed scope value for the local address
1240  */
1241 __be32 inet_confirm_addr(struct in_device *in_dev,
1242 			 __be32 dst, __be32 local, int scope)
1243 {
1244 	__be32 addr = 0;
1245 	struct net_device *dev;
1246 	struct net *net;
1247 
1248 	if (scope != RT_SCOPE_LINK)
1249 		return confirm_addr_indev(in_dev, dst, local, scope);
1250 
1251 	net = dev_net(in_dev->dev);
1252 	rcu_read_lock();
1253 	for_each_netdev_rcu(net, dev) {
1254 		in_dev = __in_dev_get_rcu(dev);
1255 		if (in_dev) {
1256 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1257 			if (addr)
1258 				break;
1259 		}
1260 	}
1261 	rcu_read_unlock();
1262 
1263 	return addr;
1264 }
1265 EXPORT_SYMBOL(inet_confirm_addr);
1266 
1267 /*
1268  *	Device notifier
1269  */
1270 
1271 int register_inetaddr_notifier(struct notifier_block *nb)
1272 {
1273 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1274 }
1275 EXPORT_SYMBOL(register_inetaddr_notifier);
1276 
1277 int unregister_inetaddr_notifier(struct notifier_block *nb)
1278 {
1279 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1280 }
1281 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1282 
1283 /* Rename ifa_labels for a device name change. Make some effort to preserve
1284  * existing alias numbering and to create unique labels if possible.
1285 */
1286 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1287 {
1288 	struct in_ifaddr *ifa;
1289 	int named = 0;
1290 
1291 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1292 		char old[IFNAMSIZ], *dot;
1293 
1294 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1295 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1296 		if (named++ == 0)
1297 			goto skip;
1298 		dot = strchr(old, ':');
1299 		if (dot == NULL) {
1300 			sprintf(old, ":%d", named);
1301 			dot = old;
1302 		}
1303 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1304 			strcat(ifa->ifa_label, dot);
1305 		else
1306 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1307 skip:
1308 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1309 	}
1310 }
1311 
1312 static bool inetdev_valid_mtu(unsigned int mtu)
1313 {
1314 	return mtu >= 68;
1315 }
1316 
1317 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1318 					struct in_device *in_dev)
1319 
1320 {
1321 	struct in_ifaddr *ifa;
1322 
1323 	for (ifa = in_dev->ifa_list; ifa;
1324 	     ifa = ifa->ifa_next) {
1325 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1326 			 ifa->ifa_local, dev,
1327 			 ifa->ifa_local, NULL,
1328 			 dev->dev_addr, NULL);
1329 	}
1330 }
1331 
1332 /* Called only under RTNL semaphore */
1333 
1334 static int inetdev_event(struct notifier_block *this, unsigned long event,
1335 			 void *ptr)
1336 {
1337 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1338 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1339 
1340 	ASSERT_RTNL();
1341 
1342 	if (!in_dev) {
1343 		if (event == NETDEV_REGISTER) {
1344 			in_dev = inetdev_init(dev);
1345 			if (!in_dev)
1346 				return notifier_from_errno(-ENOMEM);
1347 			if (dev->flags & IFF_LOOPBACK) {
1348 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1349 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1350 			}
1351 		} else if (event == NETDEV_CHANGEMTU) {
1352 			/* Re-enabling IP */
1353 			if (inetdev_valid_mtu(dev->mtu))
1354 				in_dev = inetdev_init(dev);
1355 		}
1356 		goto out;
1357 	}
1358 
1359 	switch (event) {
1360 	case NETDEV_REGISTER:
1361 		pr_debug("%s: bug\n", __func__);
1362 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1363 		break;
1364 	case NETDEV_UP:
1365 		if (!inetdev_valid_mtu(dev->mtu))
1366 			break;
1367 		if (dev->flags & IFF_LOOPBACK) {
1368 			struct in_ifaddr *ifa = inet_alloc_ifa();
1369 
1370 			if (ifa) {
1371 				INIT_HLIST_NODE(&ifa->hash);
1372 				ifa->ifa_local =
1373 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1374 				ifa->ifa_prefixlen = 8;
1375 				ifa->ifa_mask = inet_make_mask(8);
1376 				in_dev_hold(in_dev);
1377 				ifa->ifa_dev = in_dev;
1378 				ifa->ifa_scope = RT_SCOPE_HOST;
1379 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1380 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1381 						 INFINITY_LIFE_TIME);
1382 				inet_insert_ifa(ifa);
1383 			}
1384 		}
1385 		ip_mc_up(in_dev);
1386 		/* fall through */
1387 	case NETDEV_CHANGEADDR:
1388 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1389 			break;
1390 		/* fall through */
1391 	case NETDEV_NOTIFY_PEERS:
1392 		/* Send gratuitous ARP to notify of link change */
1393 		inetdev_send_gratuitous_arp(dev, in_dev);
1394 		break;
1395 	case NETDEV_DOWN:
1396 		ip_mc_down(in_dev);
1397 		break;
1398 	case NETDEV_PRE_TYPE_CHANGE:
1399 		ip_mc_unmap(in_dev);
1400 		break;
1401 	case NETDEV_POST_TYPE_CHANGE:
1402 		ip_mc_remap(in_dev);
1403 		break;
1404 	case NETDEV_CHANGEMTU:
1405 		if (inetdev_valid_mtu(dev->mtu))
1406 			break;
1407 		/* disable IP when MTU is not enough */
1408 	case NETDEV_UNREGISTER:
1409 		inetdev_destroy(in_dev);
1410 		break;
1411 	case NETDEV_CHANGENAME:
1412 		/* Do not notify about label change, this event is
1413 		 * not interesting to applications using netlink.
1414 		 */
1415 		inetdev_changename(dev, in_dev);
1416 
1417 		devinet_sysctl_unregister(in_dev);
1418 		devinet_sysctl_register(in_dev);
1419 		break;
1420 	}
1421 out:
1422 	return NOTIFY_DONE;
1423 }
1424 
1425 static struct notifier_block ip_netdev_notifier = {
1426 	.notifier_call = inetdev_event,
1427 };
1428 
1429 static size_t inet_nlmsg_size(void)
1430 {
1431 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1432 	       + nla_total_size(4) /* IFA_ADDRESS */
1433 	       + nla_total_size(4) /* IFA_LOCAL */
1434 	       + nla_total_size(4) /* IFA_BROADCAST */
1435 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1436 }
1437 
1438 static inline u32 cstamp_delta(unsigned long cstamp)
1439 {
1440 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1441 }
1442 
1443 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1444 			 unsigned long tstamp, u32 preferred, u32 valid)
1445 {
1446 	struct ifa_cacheinfo ci;
1447 
1448 	ci.cstamp = cstamp_delta(cstamp);
1449 	ci.tstamp = cstamp_delta(tstamp);
1450 	ci.ifa_prefered = preferred;
1451 	ci.ifa_valid = valid;
1452 
1453 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1454 }
1455 
1456 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1457 			    u32 portid, u32 seq, int event, unsigned int flags)
1458 {
1459 	struct ifaddrmsg *ifm;
1460 	struct nlmsghdr  *nlh;
1461 	u32 preferred, valid;
1462 
1463 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1464 	if (nlh == NULL)
1465 		return -EMSGSIZE;
1466 
1467 	ifm = nlmsg_data(nlh);
1468 	ifm->ifa_family = AF_INET;
1469 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1470 	ifm->ifa_flags = ifa->ifa_flags;
1471 	ifm->ifa_scope = ifa->ifa_scope;
1472 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1473 
1474 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1475 		preferred = ifa->ifa_preferred_lft;
1476 		valid = ifa->ifa_valid_lft;
1477 		if (preferred != INFINITY_LIFE_TIME) {
1478 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1479 
1480 			if (preferred > tval)
1481 				preferred -= tval;
1482 			else
1483 				preferred = 0;
1484 			if (valid != INFINITY_LIFE_TIME) {
1485 				if (valid > tval)
1486 					valid -= tval;
1487 				else
1488 					valid = 0;
1489 			}
1490 		}
1491 	} else {
1492 		preferred = INFINITY_LIFE_TIME;
1493 		valid = INFINITY_LIFE_TIME;
1494 	}
1495 	if ((ifa->ifa_address &&
1496 	     nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1497 	    (ifa->ifa_local &&
1498 	     nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1499 	    (ifa->ifa_broadcast &&
1500 	     nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1501 	    (ifa->ifa_label[0] &&
1502 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1503 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1504 			  preferred, valid))
1505 		goto nla_put_failure;
1506 
1507 	return nlmsg_end(skb, nlh);
1508 
1509 nla_put_failure:
1510 	nlmsg_cancel(skb, nlh);
1511 	return -EMSGSIZE;
1512 }
1513 
1514 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1515 {
1516 	struct net *net = sock_net(skb->sk);
1517 	int h, s_h;
1518 	int idx, s_idx;
1519 	int ip_idx, s_ip_idx;
1520 	struct net_device *dev;
1521 	struct in_device *in_dev;
1522 	struct in_ifaddr *ifa;
1523 	struct hlist_head *head;
1524 
1525 	s_h = cb->args[0];
1526 	s_idx = idx = cb->args[1];
1527 	s_ip_idx = ip_idx = cb->args[2];
1528 
1529 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1530 		idx = 0;
1531 		head = &net->dev_index_head[h];
1532 		rcu_read_lock();
1533 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1534 			  net->dev_base_seq;
1535 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1536 			if (idx < s_idx)
1537 				goto cont;
1538 			if (h > s_h || idx > s_idx)
1539 				s_ip_idx = 0;
1540 			in_dev = __in_dev_get_rcu(dev);
1541 			if (!in_dev)
1542 				goto cont;
1543 
1544 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1545 			     ifa = ifa->ifa_next, ip_idx++) {
1546 				if (ip_idx < s_ip_idx)
1547 					continue;
1548 				if (inet_fill_ifaddr(skb, ifa,
1549 					     NETLINK_CB(cb->skb).portid,
1550 					     cb->nlh->nlmsg_seq,
1551 					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1552 					rcu_read_unlock();
1553 					goto done;
1554 				}
1555 				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1556 			}
1557 cont:
1558 			idx++;
1559 		}
1560 		rcu_read_unlock();
1561 	}
1562 
1563 done:
1564 	cb->args[0] = h;
1565 	cb->args[1] = idx;
1566 	cb->args[2] = ip_idx;
1567 
1568 	return skb->len;
1569 }
1570 
1571 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1572 		      u32 portid)
1573 {
1574 	struct sk_buff *skb;
1575 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1576 	int err = -ENOBUFS;
1577 	struct net *net;
1578 
1579 	net = dev_net(ifa->ifa_dev->dev);
1580 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1581 	if (skb == NULL)
1582 		goto errout;
1583 
1584 	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1585 	if (err < 0) {
1586 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1587 		WARN_ON(err == -EMSGSIZE);
1588 		kfree_skb(skb);
1589 		goto errout;
1590 	}
1591 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1592 	return;
1593 errout:
1594 	if (err < 0)
1595 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1596 }
1597 
1598 static size_t inet_get_link_af_size(const struct net_device *dev)
1599 {
1600 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1601 
1602 	if (!in_dev)
1603 		return 0;
1604 
1605 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1606 }
1607 
1608 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1609 {
1610 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1611 	struct nlattr *nla;
1612 	int i;
1613 
1614 	if (!in_dev)
1615 		return -ENODATA;
1616 
1617 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1618 	if (nla == NULL)
1619 		return -EMSGSIZE;
1620 
1621 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1622 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1623 
1624 	return 0;
1625 }
1626 
1627 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1628 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1629 };
1630 
1631 static int inet_validate_link_af(const struct net_device *dev,
1632 				 const struct nlattr *nla)
1633 {
1634 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1635 	int err, rem;
1636 
1637 	if (dev && !__in_dev_get_rtnl(dev))
1638 		return -EAFNOSUPPORT;
1639 
1640 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1641 	if (err < 0)
1642 		return err;
1643 
1644 	if (tb[IFLA_INET_CONF]) {
1645 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1646 			int cfgid = nla_type(a);
1647 
1648 			if (nla_len(a) < 4)
1649 				return -EINVAL;
1650 
1651 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1652 				return -EINVAL;
1653 		}
1654 	}
1655 
1656 	return 0;
1657 }
1658 
1659 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1660 {
1661 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1662 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1663 	int rem;
1664 
1665 	if (!in_dev)
1666 		return -EAFNOSUPPORT;
1667 
1668 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1669 		BUG();
1670 
1671 	if (tb[IFLA_INET_CONF]) {
1672 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1673 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1674 	}
1675 
1676 	return 0;
1677 }
1678 
1679 static int inet_netconf_msgsize_devconf(int type)
1680 {
1681 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1682 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1683 
1684 	/* type -1 is used for ALL */
1685 	if (type == -1 || type == NETCONFA_FORWARDING)
1686 		size += nla_total_size(4);
1687 	if (type == -1 || type == NETCONFA_RP_FILTER)
1688 		size += nla_total_size(4);
1689 	if (type == -1 || type == NETCONFA_MC_FORWARDING)
1690 		size += nla_total_size(4);
1691 
1692 	return size;
1693 }
1694 
1695 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1696 				     struct ipv4_devconf *devconf, u32 portid,
1697 				     u32 seq, int event, unsigned int flags,
1698 				     int type)
1699 {
1700 	struct nlmsghdr  *nlh;
1701 	struct netconfmsg *ncm;
1702 
1703 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1704 			flags);
1705 	if (nlh == NULL)
1706 		return -EMSGSIZE;
1707 
1708 	ncm = nlmsg_data(nlh);
1709 	ncm->ncm_family = AF_INET;
1710 
1711 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1712 		goto nla_put_failure;
1713 
1714 	/* type -1 is used for ALL */
1715 	if ((type == -1 || type == NETCONFA_FORWARDING) &&
1716 	    nla_put_s32(skb, NETCONFA_FORWARDING,
1717 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1718 		goto nla_put_failure;
1719 	if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1720 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1721 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1722 		goto nla_put_failure;
1723 	if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1724 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1725 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1726 		goto nla_put_failure;
1727 
1728 	return nlmsg_end(skb, nlh);
1729 
1730 nla_put_failure:
1731 	nlmsg_cancel(skb, nlh);
1732 	return -EMSGSIZE;
1733 }
1734 
1735 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1736 				 struct ipv4_devconf *devconf)
1737 {
1738 	struct sk_buff *skb;
1739 	int err = -ENOBUFS;
1740 
1741 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1742 	if (skb == NULL)
1743 		goto errout;
1744 
1745 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1746 					RTM_NEWNETCONF, 0, type);
1747 	if (err < 0) {
1748 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1749 		WARN_ON(err == -EMSGSIZE);
1750 		kfree_skb(skb);
1751 		goto errout;
1752 	}
1753 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1754 	return;
1755 errout:
1756 	if (err < 0)
1757 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1758 }
1759 
1760 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1761 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1762 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1763 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1764 };
1765 
1766 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1767 				    struct nlmsghdr *nlh)
1768 {
1769 	struct net *net = sock_net(in_skb->sk);
1770 	struct nlattr *tb[NETCONFA_MAX+1];
1771 	struct netconfmsg *ncm;
1772 	struct sk_buff *skb;
1773 	struct ipv4_devconf *devconf;
1774 	struct in_device *in_dev;
1775 	struct net_device *dev;
1776 	int ifindex;
1777 	int err;
1778 
1779 	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1780 			  devconf_ipv4_policy);
1781 	if (err < 0)
1782 		goto errout;
1783 
1784 	err = EINVAL;
1785 	if (!tb[NETCONFA_IFINDEX])
1786 		goto errout;
1787 
1788 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1789 	switch (ifindex) {
1790 	case NETCONFA_IFINDEX_ALL:
1791 		devconf = net->ipv4.devconf_all;
1792 		break;
1793 	case NETCONFA_IFINDEX_DEFAULT:
1794 		devconf = net->ipv4.devconf_dflt;
1795 		break;
1796 	default:
1797 		dev = __dev_get_by_index(net, ifindex);
1798 		if (dev == NULL)
1799 			goto errout;
1800 		in_dev = __in_dev_get_rtnl(dev);
1801 		if (in_dev == NULL)
1802 			goto errout;
1803 		devconf = &in_dev->cnf;
1804 		break;
1805 	}
1806 
1807 	err = -ENOBUFS;
1808 	skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1809 	if (skb == NULL)
1810 		goto errout;
1811 
1812 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1813 					NETLINK_CB(in_skb).portid,
1814 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1815 					-1);
1816 	if (err < 0) {
1817 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1818 		WARN_ON(err == -EMSGSIZE);
1819 		kfree_skb(skb);
1820 		goto errout;
1821 	}
1822 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1823 errout:
1824 	return err;
1825 }
1826 
1827 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1828 				     struct netlink_callback *cb)
1829 {
1830 	struct net *net = sock_net(skb->sk);
1831 	int h, s_h;
1832 	int idx, s_idx;
1833 	struct net_device *dev;
1834 	struct in_device *in_dev;
1835 	struct hlist_head *head;
1836 
1837 	s_h = cb->args[0];
1838 	s_idx = idx = cb->args[1];
1839 
1840 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1841 		idx = 0;
1842 		head = &net->dev_index_head[h];
1843 		rcu_read_lock();
1844 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1845 			  net->dev_base_seq;
1846 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1847 			if (idx < s_idx)
1848 				goto cont;
1849 			in_dev = __in_dev_get_rcu(dev);
1850 			if (!in_dev)
1851 				goto cont;
1852 
1853 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
1854 						      &in_dev->cnf,
1855 						      NETLINK_CB(cb->skb).portid,
1856 						      cb->nlh->nlmsg_seq,
1857 						      RTM_NEWNETCONF,
1858 						      NLM_F_MULTI,
1859 						      -1) <= 0) {
1860 				rcu_read_unlock();
1861 				goto done;
1862 			}
1863 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1864 cont:
1865 			idx++;
1866 		}
1867 		rcu_read_unlock();
1868 	}
1869 	if (h == NETDEV_HASHENTRIES) {
1870 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1871 					      net->ipv4.devconf_all,
1872 					      NETLINK_CB(cb->skb).portid,
1873 					      cb->nlh->nlmsg_seq,
1874 					      RTM_NEWNETCONF, NLM_F_MULTI,
1875 					      -1) <= 0)
1876 			goto done;
1877 		else
1878 			h++;
1879 	}
1880 	if (h == NETDEV_HASHENTRIES + 1) {
1881 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1882 					      net->ipv4.devconf_dflt,
1883 					      NETLINK_CB(cb->skb).portid,
1884 					      cb->nlh->nlmsg_seq,
1885 					      RTM_NEWNETCONF, NLM_F_MULTI,
1886 					      -1) <= 0)
1887 			goto done;
1888 		else
1889 			h++;
1890 	}
1891 done:
1892 	cb->args[0] = h;
1893 	cb->args[1] = idx;
1894 
1895 	return skb->len;
1896 }
1897 
1898 #ifdef CONFIG_SYSCTL
1899 
1900 static void devinet_copy_dflt_conf(struct net *net, int i)
1901 {
1902 	struct net_device *dev;
1903 
1904 	rcu_read_lock();
1905 	for_each_netdev_rcu(net, dev) {
1906 		struct in_device *in_dev;
1907 
1908 		in_dev = __in_dev_get_rcu(dev);
1909 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1910 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1911 	}
1912 	rcu_read_unlock();
1913 }
1914 
1915 /* called with RTNL locked */
1916 static void inet_forward_change(struct net *net)
1917 {
1918 	struct net_device *dev;
1919 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1920 
1921 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1922 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1923 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1924 				    NETCONFA_IFINDEX_ALL,
1925 				    net->ipv4.devconf_all);
1926 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1927 				    NETCONFA_IFINDEX_DEFAULT,
1928 				    net->ipv4.devconf_dflt);
1929 
1930 	for_each_netdev(net, dev) {
1931 		struct in_device *in_dev;
1932 		if (on)
1933 			dev_disable_lro(dev);
1934 		rcu_read_lock();
1935 		in_dev = __in_dev_get_rcu(dev);
1936 		if (in_dev) {
1937 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1938 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1939 						    dev->ifindex, &in_dev->cnf);
1940 		}
1941 		rcu_read_unlock();
1942 	}
1943 }
1944 
1945 static int devinet_conf_proc(struct ctl_table *ctl, int write,
1946 			     void __user *buffer,
1947 			     size_t *lenp, loff_t *ppos)
1948 {
1949 	int old_value = *(int *)ctl->data;
1950 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1951 	int new_value = *(int *)ctl->data;
1952 
1953 	if (write) {
1954 		struct ipv4_devconf *cnf = ctl->extra1;
1955 		struct net *net = ctl->extra2;
1956 		int i = (int *)ctl->data - cnf->data;
1957 
1958 		set_bit(i, cnf->state);
1959 
1960 		if (cnf == net->ipv4.devconf_dflt)
1961 			devinet_copy_dflt_conf(net, i);
1962 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1963 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1964 			if ((new_value == 0) && (old_value != 0))
1965 				rt_cache_flush(net);
1966 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1967 		    new_value != old_value) {
1968 			int ifindex;
1969 
1970 			if (cnf == net->ipv4.devconf_dflt)
1971 				ifindex = NETCONFA_IFINDEX_DEFAULT;
1972 			else if (cnf == net->ipv4.devconf_all)
1973 				ifindex = NETCONFA_IFINDEX_ALL;
1974 			else {
1975 				struct in_device *idev =
1976 					container_of(cnf, struct in_device,
1977 						     cnf);
1978 				ifindex = idev->dev->ifindex;
1979 			}
1980 			inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1981 						    ifindex, cnf);
1982 		}
1983 	}
1984 
1985 	return ret;
1986 }
1987 
1988 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
1989 				  void __user *buffer,
1990 				  size_t *lenp, loff_t *ppos)
1991 {
1992 	int *valp = ctl->data;
1993 	int val = *valp;
1994 	loff_t pos = *ppos;
1995 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1996 
1997 	if (write && *valp != val) {
1998 		struct net *net = ctl->extra2;
1999 
2000 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2001 			if (!rtnl_trylock()) {
2002 				/* Restore the original values before restarting */
2003 				*valp = val;
2004 				*ppos = pos;
2005 				return restart_syscall();
2006 			}
2007 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2008 				inet_forward_change(net);
2009 			} else {
2010 				struct ipv4_devconf *cnf = ctl->extra1;
2011 				struct in_device *idev =
2012 					container_of(cnf, struct in_device, cnf);
2013 				if (*valp)
2014 					dev_disable_lro(idev->dev);
2015 				inet_netconf_notify_devconf(net,
2016 							    NETCONFA_FORWARDING,
2017 							    idev->dev->ifindex,
2018 							    cnf);
2019 			}
2020 			rtnl_unlock();
2021 			rt_cache_flush(net);
2022 		} else
2023 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2024 						    NETCONFA_IFINDEX_DEFAULT,
2025 						    net->ipv4.devconf_dflt);
2026 	}
2027 
2028 	return ret;
2029 }
2030 
2031 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2032 				void __user *buffer,
2033 				size_t *lenp, loff_t *ppos)
2034 {
2035 	int *valp = ctl->data;
2036 	int val = *valp;
2037 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2038 	struct net *net = ctl->extra2;
2039 
2040 	if (write && *valp != val)
2041 		rt_cache_flush(net);
2042 
2043 	return ret;
2044 }
2045 
2046 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2047 	{ \
2048 		.procname	= name, \
2049 		.data		= ipv4_devconf.data + \
2050 				  IPV4_DEVCONF_ ## attr - 1, \
2051 		.maxlen		= sizeof(int), \
2052 		.mode		= mval, \
2053 		.proc_handler	= proc, \
2054 		.extra1		= &ipv4_devconf, \
2055 	}
2056 
2057 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2058 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2059 
2060 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2061 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2062 
2063 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2064 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2065 
2066 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2067 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2068 
2069 static struct devinet_sysctl_table {
2070 	struct ctl_table_header *sysctl_header;
2071 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2072 } devinet_sysctl = {
2073 	.devinet_vars = {
2074 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2075 					     devinet_sysctl_forward),
2076 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2077 
2078 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2079 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2080 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2081 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2082 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2083 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2084 					"accept_source_route"),
2085 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2086 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2087 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2088 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2089 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2090 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2091 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2092 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2093 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2094 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2095 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2096 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2097 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2098 
2099 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2100 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2101 		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
2102 					      "force_igmp_version"),
2103 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2104 					      "promote_secondaries"),
2105 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2106 					      "route_localnet"),
2107 	},
2108 };
2109 
2110 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2111 					struct ipv4_devconf *p)
2112 {
2113 	int i;
2114 	struct devinet_sysctl_table *t;
2115 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2116 
2117 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2118 	if (!t)
2119 		goto out;
2120 
2121 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2122 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2123 		t->devinet_vars[i].extra1 = p;
2124 		t->devinet_vars[i].extra2 = net;
2125 	}
2126 
2127 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2128 
2129 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2130 	if (!t->sysctl_header)
2131 		goto free;
2132 
2133 	p->sysctl = t;
2134 	return 0;
2135 
2136 free:
2137 	kfree(t);
2138 out:
2139 	return -ENOBUFS;
2140 }
2141 
2142 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2143 {
2144 	struct devinet_sysctl_table *t = cnf->sysctl;
2145 
2146 	if (t == NULL)
2147 		return;
2148 
2149 	cnf->sysctl = NULL;
2150 	unregister_net_sysctl_table(t->sysctl_header);
2151 	kfree(t);
2152 }
2153 
2154 static void devinet_sysctl_register(struct in_device *idev)
2155 {
2156 	neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
2157 	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2158 					&idev->cnf);
2159 }
2160 
2161 static void devinet_sysctl_unregister(struct in_device *idev)
2162 {
2163 	__devinet_sysctl_unregister(&idev->cnf);
2164 	neigh_sysctl_unregister(idev->arp_parms);
2165 }
2166 
2167 static struct ctl_table ctl_forward_entry[] = {
2168 	{
2169 		.procname	= "ip_forward",
2170 		.data		= &ipv4_devconf.data[
2171 					IPV4_DEVCONF_FORWARDING - 1],
2172 		.maxlen		= sizeof(int),
2173 		.mode		= 0644,
2174 		.proc_handler	= devinet_sysctl_forward,
2175 		.extra1		= &ipv4_devconf,
2176 		.extra2		= &init_net,
2177 	},
2178 	{ },
2179 };
2180 #endif
2181 
2182 static __net_init int devinet_init_net(struct net *net)
2183 {
2184 	int err;
2185 	struct ipv4_devconf *all, *dflt;
2186 #ifdef CONFIG_SYSCTL
2187 	struct ctl_table *tbl = ctl_forward_entry;
2188 	struct ctl_table_header *forw_hdr;
2189 #endif
2190 
2191 	err = -ENOMEM;
2192 	all = &ipv4_devconf;
2193 	dflt = &ipv4_devconf_dflt;
2194 
2195 	if (!net_eq(net, &init_net)) {
2196 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2197 		if (all == NULL)
2198 			goto err_alloc_all;
2199 
2200 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2201 		if (dflt == NULL)
2202 			goto err_alloc_dflt;
2203 
2204 #ifdef CONFIG_SYSCTL
2205 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2206 		if (tbl == NULL)
2207 			goto err_alloc_ctl;
2208 
2209 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2210 		tbl[0].extra1 = all;
2211 		tbl[0].extra2 = net;
2212 #endif
2213 	}
2214 
2215 #ifdef CONFIG_SYSCTL
2216 	err = __devinet_sysctl_register(net, "all", all);
2217 	if (err < 0)
2218 		goto err_reg_all;
2219 
2220 	err = __devinet_sysctl_register(net, "default", dflt);
2221 	if (err < 0)
2222 		goto err_reg_dflt;
2223 
2224 	err = -ENOMEM;
2225 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2226 	if (forw_hdr == NULL)
2227 		goto err_reg_ctl;
2228 	net->ipv4.forw_hdr = forw_hdr;
2229 #endif
2230 
2231 	net->ipv4.devconf_all = all;
2232 	net->ipv4.devconf_dflt = dflt;
2233 	return 0;
2234 
2235 #ifdef CONFIG_SYSCTL
2236 err_reg_ctl:
2237 	__devinet_sysctl_unregister(dflt);
2238 err_reg_dflt:
2239 	__devinet_sysctl_unregister(all);
2240 err_reg_all:
2241 	if (tbl != ctl_forward_entry)
2242 		kfree(tbl);
2243 err_alloc_ctl:
2244 #endif
2245 	if (dflt != &ipv4_devconf_dflt)
2246 		kfree(dflt);
2247 err_alloc_dflt:
2248 	if (all != &ipv4_devconf)
2249 		kfree(all);
2250 err_alloc_all:
2251 	return err;
2252 }
2253 
2254 static __net_exit void devinet_exit_net(struct net *net)
2255 {
2256 #ifdef CONFIG_SYSCTL
2257 	struct ctl_table *tbl;
2258 
2259 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2260 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2261 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2262 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
2263 	kfree(tbl);
2264 #endif
2265 	kfree(net->ipv4.devconf_dflt);
2266 	kfree(net->ipv4.devconf_all);
2267 }
2268 
2269 static __net_initdata struct pernet_operations devinet_ops = {
2270 	.init = devinet_init_net,
2271 	.exit = devinet_exit_net,
2272 };
2273 
2274 static struct rtnl_af_ops inet_af_ops = {
2275 	.family		  = AF_INET,
2276 	.fill_link_af	  = inet_fill_link_af,
2277 	.get_link_af_size = inet_get_link_af_size,
2278 	.validate_link_af = inet_validate_link_af,
2279 	.set_link_af	  = inet_set_link_af,
2280 };
2281 
2282 void __init devinet_init(void)
2283 {
2284 	int i;
2285 
2286 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2287 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2288 
2289 	register_pernet_subsys(&devinet_ops);
2290 
2291 	register_gifconf(PF_INET, inet_gifconf);
2292 	register_netdevice_notifier(&ip_netdev_notifier);
2293 
2294 	schedule_delayed_work(&check_lifetime_work, 0);
2295 
2296 	rtnl_af_register(&inet_af_ops);
2297 
2298 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2299 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2300 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2301 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2302 		      inet_netconf_dump_devconf, NULL);
2303 }
2304 
2305