xref: /linux/net/ipv4/devinet.c (revision c75c5ab575af7db707689cdbb5a5c458e9a034bb)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59 
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67 
68 #include "fib_lookup.h"
69 
70 static struct ipv4_devconf ipv4_devconf = {
71 	.data = {
72 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 	},
77 };
78 
79 static struct ipv4_devconf ipv4_devconf_dflt = {
80 	.data = {
81 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
82 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
83 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
85 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
86 	},
87 };
88 
89 #define IPV4_DEVCONF_DFLT(net, attr) \
90 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
91 
92 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
93 	[IFA_LOCAL]     	= { .type = NLA_U32 },
94 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
95 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
96 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
97 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
98 };
99 
100 #define IN4_ADDR_HSIZE_SHIFT	8
101 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
102 
103 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
104 static DEFINE_SPINLOCK(inet_addr_hash_lock);
105 
106 static u32 inet_addr_hash(struct net *net, __be32 addr)
107 {
108 	u32 val = (__force u32) addr ^ net_hash_mix(net);
109 
110 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
111 }
112 
113 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
114 {
115 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
116 
117 	spin_lock(&inet_addr_hash_lock);
118 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
119 	spin_unlock(&inet_addr_hash_lock);
120 }
121 
122 static void inet_hash_remove(struct in_ifaddr *ifa)
123 {
124 	spin_lock(&inet_addr_hash_lock);
125 	hlist_del_init_rcu(&ifa->hash);
126 	spin_unlock(&inet_addr_hash_lock);
127 }
128 
129 /**
130  * __ip_dev_find - find the first device with a given source address.
131  * @net: the net namespace
132  * @addr: the source address
133  * @devref: if true, take a reference on the found device
134  *
135  * If a caller uses devref=false, it should be protected by RCU, or RTNL
136  */
137 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
138 {
139 	u32 hash = inet_addr_hash(net, addr);
140 	struct net_device *result = NULL;
141 	struct in_ifaddr *ifa;
142 
143 	rcu_read_lock();
144 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
145 		if (ifa->ifa_local == addr) {
146 			struct net_device *dev = ifa->ifa_dev->dev;
147 
148 			if (!net_eq(dev_net(dev), net))
149 				continue;
150 			result = dev;
151 			break;
152 		}
153 	}
154 	if (!result) {
155 		struct flowi4 fl4 = { .daddr = addr };
156 		struct fib_result res = { 0 };
157 		struct fib_table *local;
158 
159 		/* Fallback to FIB local table so that communication
160 		 * over loopback subnets work.
161 		 */
162 		local = fib_get_table(net, RT_TABLE_LOCAL);
163 		if (local &&
164 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
165 		    res.type == RTN_LOCAL)
166 			result = FIB_RES_DEV(res);
167 	}
168 	if (result && devref)
169 		dev_hold(result);
170 	rcu_read_unlock();
171 	return result;
172 }
173 EXPORT_SYMBOL(__ip_dev_find);
174 
175 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
176 
177 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
178 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
179 			 int destroy);
180 #ifdef CONFIG_SYSCTL
181 static void devinet_sysctl_register(struct in_device *idev);
182 static void devinet_sysctl_unregister(struct in_device *idev);
183 #else
184 static void devinet_sysctl_register(struct in_device *idev)
185 {
186 }
187 static void devinet_sysctl_unregister(struct in_device *idev)
188 {
189 }
190 #endif
191 
192 /* Locks all the inet devices. */
193 
194 static struct in_ifaddr *inet_alloc_ifa(void)
195 {
196 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
197 }
198 
199 static void inet_rcu_free_ifa(struct rcu_head *head)
200 {
201 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
202 	if (ifa->ifa_dev)
203 		in_dev_put(ifa->ifa_dev);
204 	kfree(ifa);
205 }
206 
207 static void inet_free_ifa(struct in_ifaddr *ifa)
208 {
209 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
210 }
211 
212 void in_dev_finish_destroy(struct in_device *idev)
213 {
214 	struct net_device *dev = idev->dev;
215 
216 	WARN_ON(idev->ifa_list);
217 	WARN_ON(idev->mc_list);
218 #ifdef NET_REFCNT_DEBUG
219 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
220 #endif
221 	dev_put(dev);
222 	if (!idev->dead)
223 		pr_err("Freeing alive in_device %p\n", idev);
224 	else
225 		kfree(idev);
226 }
227 EXPORT_SYMBOL(in_dev_finish_destroy);
228 
229 static struct in_device *inetdev_init(struct net_device *dev)
230 {
231 	struct in_device *in_dev;
232 
233 	ASSERT_RTNL();
234 
235 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
236 	if (!in_dev)
237 		goto out;
238 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
239 			sizeof(in_dev->cnf));
240 	in_dev->cnf.sysctl = NULL;
241 	in_dev->dev = dev;
242 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
243 	if (!in_dev->arp_parms)
244 		goto out_kfree;
245 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
246 		dev_disable_lro(dev);
247 	/* Reference in_dev->dev */
248 	dev_hold(dev);
249 	/* Account for reference dev->ip_ptr (below) */
250 	in_dev_hold(in_dev);
251 
252 	devinet_sysctl_register(in_dev);
253 	ip_mc_init_dev(in_dev);
254 	if (dev->flags & IFF_UP)
255 		ip_mc_up(in_dev);
256 
257 	/* we can receive as soon as ip_ptr is set -- do this last */
258 	rcu_assign_pointer(dev->ip_ptr, in_dev);
259 out:
260 	return in_dev;
261 out_kfree:
262 	kfree(in_dev);
263 	in_dev = NULL;
264 	goto out;
265 }
266 
267 static void in_dev_rcu_put(struct rcu_head *head)
268 {
269 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
270 	in_dev_put(idev);
271 }
272 
273 static void inetdev_destroy(struct in_device *in_dev)
274 {
275 	struct in_ifaddr *ifa;
276 	struct net_device *dev;
277 
278 	ASSERT_RTNL();
279 
280 	dev = in_dev->dev;
281 
282 	in_dev->dead = 1;
283 
284 	ip_mc_destroy_dev(in_dev);
285 
286 	while ((ifa = in_dev->ifa_list) != NULL) {
287 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
288 		inet_free_ifa(ifa);
289 	}
290 
291 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
292 
293 	devinet_sysctl_unregister(in_dev);
294 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
295 	arp_ifdown(dev);
296 
297 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
298 }
299 
300 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
301 {
302 	rcu_read_lock();
303 	for_primary_ifa(in_dev) {
304 		if (inet_ifa_match(a, ifa)) {
305 			if (!b || inet_ifa_match(b, ifa)) {
306 				rcu_read_unlock();
307 				return 1;
308 			}
309 		}
310 	} endfor_ifa(in_dev);
311 	rcu_read_unlock();
312 	return 0;
313 }
314 
315 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
316 			 int destroy, struct nlmsghdr *nlh, u32 portid)
317 {
318 	struct in_ifaddr *promote = NULL;
319 	struct in_ifaddr *ifa, *ifa1 = *ifap;
320 	struct in_ifaddr *last_prim = in_dev->ifa_list;
321 	struct in_ifaddr *prev_prom = NULL;
322 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
323 
324 	ASSERT_RTNL();
325 
326 	/* 1. Deleting primary ifaddr forces deletion all secondaries
327 	 * unless alias promotion is set
328 	 **/
329 
330 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
331 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
332 
333 		while ((ifa = *ifap1) != NULL) {
334 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
335 			    ifa1->ifa_scope <= ifa->ifa_scope)
336 				last_prim = ifa;
337 
338 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
339 			    ifa1->ifa_mask != ifa->ifa_mask ||
340 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
341 				ifap1 = &ifa->ifa_next;
342 				prev_prom = ifa;
343 				continue;
344 			}
345 
346 			if (!do_promote) {
347 				inet_hash_remove(ifa);
348 				*ifap1 = ifa->ifa_next;
349 
350 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
351 				blocking_notifier_call_chain(&inetaddr_chain,
352 						NETDEV_DOWN, ifa);
353 				inet_free_ifa(ifa);
354 			} else {
355 				promote = ifa;
356 				break;
357 			}
358 		}
359 	}
360 
361 	/* On promotion all secondaries from subnet are changing
362 	 * the primary IP, we must remove all their routes silently
363 	 * and later to add them back with new prefsrc. Do this
364 	 * while all addresses are on the device list.
365 	 */
366 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
367 		if (ifa1->ifa_mask == ifa->ifa_mask &&
368 		    inet_ifa_match(ifa1->ifa_address, ifa))
369 			fib_del_ifaddr(ifa, ifa1);
370 	}
371 
372 	/* 2. Unlink it */
373 
374 	*ifap = ifa1->ifa_next;
375 	inet_hash_remove(ifa1);
376 
377 	/* 3. Announce address deletion */
378 
379 	/* Send message first, then call notifier.
380 	   At first sight, FIB update triggered by notifier
381 	   will refer to already deleted ifaddr, that could confuse
382 	   netlink listeners. It is not true: look, gated sees
383 	   that route deleted and if it still thinks that ifaddr
384 	   is valid, it will try to restore deleted routes... Grr.
385 	   So that, this order is correct.
386 	 */
387 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
388 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
389 
390 	if (promote) {
391 		struct in_ifaddr *next_sec = promote->ifa_next;
392 
393 		if (prev_prom) {
394 			prev_prom->ifa_next = promote->ifa_next;
395 			promote->ifa_next = last_prim->ifa_next;
396 			last_prim->ifa_next = promote;
397 		}
398 
399 		promote->ifa_flags &= ~IFA_F_SECONDARY;
400 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
401 		blocking_notifier_call_chain(&inetaddr_chain,
402 				NETDEV_UP, promote);
403 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
404 			if (ifa1->ifa_mask != ifa->ifa_mask ||
405 			    !inet_ifa_match(ifa1->ifa_address, ifa))
406 					continue;
407 			fib_add_ifaddr(ifa);
408 		}
409 
410 	}
411 	if (destroy)
412 		inet_free_ifa(ifa1);
413 }
414 
415 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
416 			 int destroy)
417 {
418 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
419 }
420 
421 static void check_lifetime(struct work_struct *work);
422 
423 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
424 
425 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
426 			     u32 portid)
427 {
428 	struct in_device *in_dev = ifa->ifa_dev;
429 	struct in_ifaddr *ifa1, **ifap, **last_primary;
430 
431 	ASSERT_RTNL();
432 
433 	if (!ifa->ifa_local) {
434 		inet_free_ifa(ifa);
435 		return 0;
436 	}
437 
438 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
439 	last_primary = &in_dev->ifa_list;
440 
441 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
442 	     ifap = &ifa1->ifa_next) {
443 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
444 		    ifa->ifa_scope <= ifa1->ifa_scope)
445 			last_primary = &ifa1->ifa_next;
446 		if (ifa1->ifa_mask == ifa->ifa_mask &&
447 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
448 			if (ifa1->ifa_local == ifa->ifa_local) {
449 				inet_free_ifa(ifa);
450 				return -EEXIST;
451 			}
452 			if (ifa1->ifa_scope != ifa->ifa_scope) {
453 				inet_free_ifa(ifa);
454 				return -EINVAL;
455 			}
456 			ifa->ifa_flags |= IFA_F_SECONDARY;
457 		}
458 	}
459 
460 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
461 		net_srandom(ifa->ifa_local);
462 		ifap = last_primary;
463 	}
464 
465 	ifa->ifa_next = *ifap;
466 	*ifap = ifa;
467 
468 	inet_hash_insert(dev_net(in_dev->dev), ifa);
469 
470 	cancel_delayed_work(&check_lifetime_work);
471 	schedule_delayed_work(&check_lifetime_work, 0);
472 
473 	/* Send message first, then call notifier.
474 	   Notifier will trigger FIB update, so that
475 	   listeners of netlink will know about new ifaddr */
476 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
477 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
478 
479 	return 0;
480 }
481 
482 static int inet_insert_ifa(struct in_ifaddr *ifa)
483 {
484 	return __inet_insert_ifa(ifa, NULL, 0);
485 }
486 
487 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
488 {
489 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
490 
491 	ASSERT_RTNL();
492 
493 	if (!in_dev) {
494 		inet_free_ifa(ifa);
495 		return -ENOBUFS;
496 	}
497 	ipv4_devconf_setall(in_dev);
498 	if (ifa->ifa_dev != in_dev) {
499 		WARN_ON(ifa->ifa_dev);
500 		in_dev_hold(in_dev);
501 		ifa->ifa_dev = in_dev;
502 	}
503 	if (ipv4_is_loopback(ifa->ifa_local))
504 		ifa->ifa_scope = RT_SCOPE_HOST;
505 	return inet_insert_ifa(ifa);
506 }
507 
508 /* Caller must hold RCU or RTNL :
509  * We dont take a reference on found in_device
510  */
511 struct in_device *inetdev_by_index(struct net *net, int ifindex)
512 {
513 	struct net_device *dev;
514 	struct in_device *in_dev = NULL;
515 
516 	rcu_read_lock();
517 	dev = dev_get_by_index_rcu(net, ifindex);
518 	if (dev)
519 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
520 	rcu_read_unlock();
521 	return in_dev;
522 }
523 EXPORT_SYMBOL(inetdev_by_index);
524 
525 /* Called only from RTNL semaphored context. No locks. */
526 
527 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
528 				    __be32 mask)
529 {
530 	ASSERT_RTNL();
531 
532 	for_primary_ifa(in_dev) {
533 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
534 			return ifa;
535 	} endfor_ifa(in_dev);
536 	return NULL;
537 }
538 
539 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
540 {
541 	struct net *net = sock_net(skb->sk);
542 	struct nlattr *tb[IFA_MAX+1];
543 	struct in_device *in_dev;
544 	struct ifaddrmsg *ifm;
545 	struct in_ifaddr *ifa, **ifap;
546 	int err = -EINVAL;
547 
548 	ASSERT_RTNL();
549 
550 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
551 	if (err < 0)
552 		goto errout;
553 
554 	ifm = nlmsg_data(nlh);
555 	in_dev = inetdev_by_index(net, ifm->ifa_index);
556 	if (in_dev == NULL) {
557 		err = -ENODEV;
558 		goto errout;
559 	}
560 
561 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
562 	     ifap = &ifa->ifa_next) {
563 		if (tb[IFA_LOCAL] &&
564 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
565 			continue;
566 
567 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
568 			continue;
569 
570 		if (tb[IFA_ADDRESS] &&
571 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
572 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
573 			continue;
574 
575 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
576 		return 0;
577 	}
578 
579 	err = -EADDRNOTAVAIL;
580 errout:
581 	return err;
582 }
583 
584 #define INFINITY_LIFE_TIME	0xFFFFFFFF
585 
586 static void check_lifetime(struct work_struct *work)
587 {
588 	unsigned long now, next, next_sec, next_sched;
589 	struct in_ifaddr *ifa;
590 	struct hlist_node *n;
591 	int i;
592 
593 	now = jiffies;
594 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
595 
596 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
597 		bool change_needed = false;
598 
599 		rcu_read_lock();
600 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
601 			unsigned long age;
602 
603 			if (ifa->ifa_flags & IFA_F_PERMANENT)
604 				continue;
605 
606 			/* We try to batch several events at once. */
607 			age = (now - ifa->ifa_tstamp +
608 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
609 
610 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
611 			    age >= ifa->ifa_valid_lft) {
612 				change_needed = true;
613 			} else if (ifa->ifa_preferred_lft ==
614 				   INFINITY_LIFE_TIME) {
615 				continue;
616 			} else if (age >= ifa->ifa_preferred_lft) {
617 				if (time_before(ifa->ifa_tstamp +
618 						ifa->ifa_valid_lft * HZ, next))
619 					next = ifa->ifa_tstamp +
620 					       ifa->ifa_valid_lft * HZ;
621 
622 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
623 					change_needed = true;
624 			} else if (time_before(ifa->ifa_tstamp +
625 					       ifa->ifa_preferred_lft * HZ,
626 					       next)) {
627 				next = ifa->ifa_tstamp +
628 				       ifa->ifa_preferred_lft * HZ;
629 			}
630 		}
631 		rcu_read_unlock();
632 		if (!change_needed)
633 			continue;
634 		rtnl_lock();
635 		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
636 			unsigned long age;
637 
638 			if (ifa->ifa_flags & IFA_F_PERMANENT)
639 				continue;
640 
641 			/* We try to batch several events at once. */
642 			age = (now - ifa->ifa_tstamp +
643 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
644 
645 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
646 			    age >= ifa->ifa_valid_lft) {
647 				struct in_ifaddr **ifap;
648 
649 				for (ifap = &ifa->ifa_dev->ifa_list;
650 				     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
651 					if (*ifap == ifa) {
652 						inet_del_ifa(ifa->ifa_dev,
653 							     ifap, 1);
654 						break;
655 					}
656 				}
657 			} else if (ifa->ifa_preferred_lft !=
658 				   INFINITY_LIFE_TIME &&
659 				   age >= ifa->ifa_preferred_lft &&
660 				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
661 				ifa->ifa_flags |= IFA_F_DEPRECATED;
662 				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
663 			}
664 		}
665 		rtnl_unlock();
666 	}
667 
668 	next_sec = round_jiffies_up(next);
669 	next_sched = next;
670 
671 	/* If rounded timeout is accurate enough, accept it. */
672 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
673 		next_sched = next_sec;
674 
675 	now = jiffies;
676 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
677 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
678 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
679 
680 	schedule_delayed_work(&check_lifetime_work, next_sched - now);
681 }
682 
683 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
684 			     __u32 prefered_lft)
685 {
686 	unsigned long timeout;
687 
688 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
689 
690 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
691 	if (addrconf_finite_timeout(timeout))
692 		ifa->ifa_valid_lft = timeout;
693 	else
694 		ifa->ifa_flags |= IFA_F_PERMANENT;
695 
696 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
697 	if (addrconf_finite_timeout(timeout)) {
698 		if (timeout == 0)
699 			ifa->ifa_flags |= IFA_F_DEPRECATED;
700 		ifa->ifa_preferred_lft = timeout;
701 	}
702 	ifa->ifa_tstamp = jiffies;
703 	if (!ifa->ifa_cstamp)
704 		ifa->ifa_cstamp = ifa->ifa_tstamp;
705 }
706 
707 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
708 				       __u32 *pvalid_lft, __u32 *pprefered_lft)
709 {
710 	struct nlattr *tb[IFA_MAX+1];
711 	struct in_ifaddr *ifa;
712 	struct ifaddrmsg *ifm;
713 	struct net_device *dev;
714 	struct in_device *in_dev;
715 	int err;
716 
717 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
718 	if (err < 0)
719 		goto errout;
720 
721 	ifm = nlmsg_data(nlh);
722 	err = -EINVAL;
723 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
724 		goto errout;
725 
726 	dev = __dev_get_by_index(net, ifm->ifa_index);
727 	err = -ENODEV;
728 	if (dev == NULL)
729 		goto errout;
730 
731 	in_dev = __in_dev_get_rtnl(dev);
732 	err = -ENOBUFS;
733 	if (in_dev == NULL)
734 		goto errout;
735 
736 	ifa = inet_alloc_ifa();
737 	if (ifa == NULL)
738 		/*
739 		 * A potential indev allocation can be left alive, it stays
740 		 * assigned to its device and is destroy with it.
741 		 */
742 		goto errout;
743 
744 	ipv4_devconf_setall(in_dev);
745 	in_dev_hold(in_dev);
746 
747 	if (tb[IFA_ADDRESS] == NULL)
748 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
749 
750 	INIT_HLIST_NODE(&ifa->hash);
751 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
752 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
753 	ifa->ifa_flags = ifm->ifa_flags;
754 	ifa->ifa_scope = ifm->ifa_scope;
755 	ifa->ifa_dev = in_dev;
756 
757 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
758 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
759 
760 	if (tb[IFA_BROADCAST])
761 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
762 
763 	if (tb[IFA_LABEL])
764 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
765 	else
766 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
767 
768 	if (tb[IFA_CACHEINFO]) {
769 		struct ifa_cacheinfo *ci;
770 
771 		ci = nla_data(tb[IFA_CACHEINFO]);
772 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
773 			err = -EINVAL;
774 			goto errout;
775 		}
776 		*pvalid_lft = ci->ifa_valid;
777 		*pprefered_lft = ci->ifa_prefered;
778 	}
779 
780 	return ifa;
781 
782 errout:
783 	return ERR_PTR(err);
784 }
785 
786 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
787 {
788 	struct in_device *in_dev = ifa->ifa_dev;
789 	struct in_ifaddr *ifa1, **ifap;
790 
791 	if (!ifa->ifa_local)
792 		return NULL;
793 
794 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
795 	     ifap = &ifa1->ifa_next) {
796 		if (ifa1->ifa_mask == ifa->ifa_mask &&
797 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
798 		    ifa1->ifa_local == ifa->ifa_local)
799 			return ifa1;
800 	}
801 	return NULL;
802 }
803 
804 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
805 {
806 	struct net *net = sock_net(skb->sk);
807 	struct in_ifaddr *ifa;
808 	struct in_ifaddr *ifa_existing;
809 	__u32 valid_lft = INFINITY_LIFE_TIME;
810 	__u32 prefered_lft = INFINITY_LIFE_TIME;
811 
812 	ASSERT_RTNL();
813 
814 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
815 	if (IS_ERR(ifa))
816 		return PTR_ERR(ifa);
817 
818 	ifa_existing = find_matching_ifa(ifa);
819 	if (!ifa_existing) {
820 		/* It would be best to check for !NLM_F_CREATE here but
821 		 * userspace alreay relies on not having to provide this.
822 		 */
823 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
824 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
825 	} else {
826 		inet_free_ifa(ifa);
827 
828 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
829 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
830 			return -EEXIST;
831 		ifa = ifa_existing;
832 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
833 		cancel_delayed_work(&check_lifetime_work);
834 		schedule_delayed_work(&check_lifetime_work, 0);
835 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
836 		blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
837 	}
838 	return 0;
839 }
840 
841 /*
842  *	Determine a default network mask, based on the IP address.
843  */
844 
845 static int inet_abc_len(__be32 addr)
846 {
847 	int rc = -1;	/* Something else, probably a multicast. */
848 
849 	if (ipv4_is_zeronet(addr))
850 		rc = 0;
851 	else {
852 		__u32 haddr = ntohl(addr);
853 
854 		if (IN_CLASSA(haddr))
855 			rc = 8;
856 		else if (IN_CLASSB(haddr))
857 			rc = 16;
858 		else if (IN_CLASSC(haddr))
859 			rc = 24;
860 	}
861 
862 	return rc;
863 }
864 
865 
866 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
867 {
868 	struct ifreq ifr;
869 	struct sockaddr_in sin_orig;
870 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
871 	struct in_device *in_dev;
872 	struct in_ifaddr **ifap = NULL;
873 	struct in_ifaddr *ifa = NULL;
874 	struct net_device *dev;
875 	char *colon;
876 	int ret = -EFAULT;
877 	int tryaddrmatch = 0;
878 
879 	/*
880 	 *	Fetch the caller's info block into kernel space
881 	 */
882 
883 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
884 		goto out;
885 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
886 
887 	/* save original address for comparison */
888 	memcpy(&sin_orig, sin, sizeof(*sin));
889 
890 	colon = strchr(ifr.ifr_name, ':');
891 	if (colon)
892 		*colon = 0;
893 
894 	dev_load(net, ifr.ifr_name);
895 
896 	switch (cmd) {
897 	case SIOCGIFADDR:	/* Get interface address */
898 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
899 	case SIOCGIFDSTADDR:	/* Get the destination address */
900 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
901 		/* Note that these ioctls will not sleep,
902 		   so that we do not impose a lock.
903 		   One day we will be forced to put shlock here (I mean SMP)
904 		 */
905 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
906 		memset(sin, 0, sizeof(*sin));
907 		sin->sin_family = AF_INET;
908 		break;
909 
910 	case SIOCSIFFLAGS:
911 		ret = -EPERM;
912 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
913 			goto out;
914 		break;
915 	case SIOCSIFADDR:	/* Set interface address (and family) */
916 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
917 	case SIOCSIFDSTADDR:	/* Set the destination address */
918 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
919 		ret = -EPERM;
920 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
921 			goto out;
922 		ret = -EINVAL;
923 		if (sin->sin_family != AF_INET)
924 			goto out;
925 		break;
926 	default:
927 		ret = -EINVAL;
928 		goto out;
929 	}
930 
931 	rtnl_lock();
932 
933 	ret = -ENODEV;
934 	dev = __dev_get_by_name(net, ifr.ifr_name);
935 	if (!dev)
936 		goto done;
937 
938 	if (colon)
939 		*colon = ':';
940 
941 	in_dev = __in_dev_get_rtnl(dev);
942 	if (in_dev) {
943 		if (tryaddrmatch) {
944 			/* Matthias Andree */
945 			/* compare label and address (4.4BSD style) */
946 			/* note: we only do this for a limited set of ioctls
947 			   and only if the original address family was AF_INET.
948 			   This is checked above. */
949 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
950 			     ifap = &ifa->ifa_next) {
951 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
952 				    sin_orig.sin_addr.s_addr ==
953 							ifa->ifa_local) {
954 					break; /* found */
955 				}
956 			}
957 		}
958 		/* we didn't get a match, maybe the application is
959 		   4.3BSD-style and passed in junk so we fall back to
960 		   comparing just the label */
961 		if (!ifa) {
962 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
963 			     ifap = &ifa->ifa_next)
964 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
965 					break;
966 		}
967 	}
968 
969 	ret = -EADDRNOTAVAIL;
970 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
971 		goto done;
972 
973 	switch (cmd) {
974 	case SIOCGIFADDR:	/* Get interface address */
975 		sin->sin_addr.s_addr = ifa->ifa_local;
976 		goto rarok;
977 
978 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
979 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
980 		goto rarok;
981 
982 	case SIOCGIFDSTADDR:	/* Get the destination address */
983 		sin->sin_addr.s_addr = ifa->ifa_address;
984 		goto rarok;
985 
986 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
987 		sin->sin_addr.s_addr = ifa->ifa_mask;
988 		goto rarok;
989 
990 	case SIOCSIFFLAGS:
991 		if (colon) {
992 			ret = -EADDRNOTAVAIL;
993 			if (!ifa)
994 				break;
995 			ret = 0;
996 			if (!(ifr.ifr_flags & IFF_UP))
997 				inet_del_ifa(in_dev, ifap, 1);
998 			break;
999 		}
1000 		ret = dev_change_flags(dev, ifr.ifr_flags);
1001 		break;
1002 
1003 	case SIOCSIFADDR:	/* Set interface address (and family) */
1004 		ret = -EINVAL;
1005 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1006 			break;
1007 
1008 		if (!ifa) {
1009 			ret = -ENOBUFS;
1010 			ifa = inet_alloc_ifa();
1011 			if (!ifa)
1012 				break;
1013 			INIT_HLIST_NODE(&ifa->hash);
1014 			if (colon)
1015 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1016 			else
1017 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1018 		} else {
1019 			ret = 0;
1020 			if (ifa->ifa_local == sin->sin_addr.s_addr)
1021 				break;
1022 			inet_del_ifa(in_dev, ifap, 0);
1023 			ifa->ifa_broadcast = 0;
1024 			ifa->ifa_scope = 0;
1025 		}
1026 
1027 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1028 
1029 		if (!(dev->flags & IFF_POINTOPOINT)) {
1030 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1031 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1032 			if ((dev->flags & IFF_BROADCAST) &&
1033 			    ifa->ifa_prefixlen < 31)
1034 				ifa->ifa_broadcast = ifa->ifa_address |
1035 						     ~ifa->ifa_mask;
1036 		} else {
1037 			ifa->ifa_prefixlen = 32;
1038 			ifa->ifa_mask = inet_make_mask(32);
1039 		}
1040 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1041 		ret = inet_set_ifa(dev, ifa);
1042 		break;
1043 
1044 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1045 		ret = 0;
1046 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1047 			inet_del_ifa(in_dev, ifap, 0);
1048 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1049 			inet_insert_ifa(ifa);
1050 		}
1051 		break;
1052 
1053 	case SIOCSIFDSTADDR:	/* Set the destination address */
1054 		ret = 0;
1055 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1056 			break;
1057 		ret = -EINVAL;
1058 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1059 			break;
1060 		ret = 0;
1061 		inet_del_ifa(in_dev, ifap, 0);
1062 		ifa->ifa_address = sin->sin_addr.s_addr;
1063 		inet_insert_ifa(ifa);
1064 		break;
1065 
1066 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1067 
1068 		/*
1069 		 *	The mask we set must be legal.
1070 		 */
1071 		ret = -EINVAL;
1072 		if (bad_mask(sin->sin_addr.s_addr, 0))
1073 			break;
1074 		ret = 0;
1075 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1076 			__be32 old_mask = ifa->ifa_mask;
1077 			inet_del_ifa(in_dev, ifap, 0);
1078 			ifa->ifa_mask = sin->sin_addr.s_addr;
1079 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1080 
1081 			/* See if current broadcast address matches
1082 			 * with current netmask, then recalculate
1083 			 * the broadcast address. Otherwise it's a
1084 			 * funny address, so don't touch it since
1085 			 * the user seems to know what (s)he's doing...
1086 			 */
1087 			if ((dev->flags & IFF_BROADCAST) &&
1088 			    (ifa->ifa_prefixlen < 31) &&
1089 			    (ifa->ifa_broadcast ==
1090 			     (ifa->ifa_local|~old_mask))) {
1091 				ifa->ifa_broadcast = (ifa->ifa_local |
1092 						      ~sin->sin_addr.s_addr);
1093 			}
1094 			inet_insert_ifa(ifa);
1095 		}
1096 		break;
1097 	}
1098 done:
1099 	rtnl_unlock();
1100 out:
1101 	return ret;
1102 rarok:
1103 	rtnl_unlock();
1104 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1105 	goto out;
1106 }
1107 
1108 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1109 {
1110 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1111 	struct in_ifaddr *ifa;
1112 	struct ifreq ifr;
1113 	int done = 0;
1114 
1115 	if (!in_dev)
1116 		goto out;
1117 
1118 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1119 		if (!buf) {
1120 			done += sizeof(ifr);
1121 			continue;
1122 		}
1123 		if (len < (int) sizeof(ifr))
1124 			break;
1125 		memset(&ifr, 0, sizeof(struct ifreq));
1126 		if (ifa->ifa_label)
1127 			strcpy(ifr.ifr_name, ifa->ifa_label);
1128 		else
1129 			strcpy(ifr.ifr_name, dev->name);
1130 
1131 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1132 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1133 								ifa->ifa_local;
1134 
1135 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1136 			done = -EFAULT;
1137 			break;
1138 		}
1139 		buf  += sizeof(struct ifreq);
1140 		len  -= sizeof(struct ifreq);
1141 		done += sizeof(struct ifreq);
1142 	}
1143 out:
1144 	return done;
1145 }
1146 
1147 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1148 {
1149 	__be32 addr = 0;
1150 	struct in_device *in_dev;
1151 	struct net *net = dev_net(dev);
1152 
1153 	rcu_read_lock();
1154 	in_dev = __in_dev_get_rcu(dev);
1155 	if (!in_dev)
1156 		goto no_in_dev;
1157 
1158 	for_primary_ifa(in_dev) {
1159 		if (ifa->ifa_scope > scope)
1160 			continue;
1161 		if (!dst || inet_ifa_match(dst, ifa)) {
1162 			addr = ifa->ifa_local;
1163 			break;
1164 		}
1165 		if (!addr)
1166 			addr = ifa->ifa_local;
1167 	} endfor_ifa(in_dev);
1168 
1169 	if (addr)
1170 		goto out_unlock;
1171 no_in_dev:
1172 
1173 	/* Not loopback addresses on loopback should be preferred
1174 	   in this case. It is importnat that lo is the first interface
1175 	   in dev_base list.
1176 	 */
1177 	for_each_netdev_rcu(net, dev) {
1178 		in_dev = __in_dev_get_rcu(dev);
1179 		if (!in_dev)
1180 			continue;
1181 
1182 		for_primary_ifa(in_dev) {
1183 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
1184 			    ifa->ifa_scope <= scope) {
1185 				addr = ifa->ifa_local;
1186 				goto out_unlock;
1187 			}
1188 		} endfor_ifa(in_dev);
1189 	}
1190 out_unlock:
1191 	rcu_read_unlock();
1192 	return addr;
1193 }
1194 EXPORT_SYMBOL(inet_select_addr);
1195 
1196 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1197 			      __be32 local, int scope)
1198 {
1199 	int same = 0;
1200 	__be32 addr = 0;
1201 
1202 	for_ifa(in_dev) {
1203 		if (!addr &&
1204 		    (local == ifa->ifa_local || !local) &&
1205 		    ifa->ifa_scope <= scope) {
1206 			addr = ifa->ifa_local;
1207 			if (same)
1208 				break;
1209 		}
1210 		if (!same) {
1211 			same = (!local || inet_ifa_match(local, ifa)) &&
1212 				(!dst || inet_ifa_match(dst, ifa));
1213 			if (same && addr) {
1214 				if (local || !dst)
1215 					break;
1216 				/* Is the selected addr into dst subnet? */
1217 				if (inet_ifa_match(addr, ifa))
1218 					break;
1219 				/* No, then can we use new local src? */
1220 				if (ifa->ifa_scope <= scope) {
1221 					addr = ifa->ifa_local;
1222 					break;
1223 				}
1224 				/* search for large dst subnet for addr */
1225 				same = 0;
1226 			}
1227 		}
1228 	} endfor_ifa(in_dev);
1229 
1230 	return same ? addr : 0;
1231 }
1232 
1233 /*
1234  * Confirm that local IP address exists using wildcards:
1235  * - in_dev: only on this interface, 0=any interface
1236  * - dst: only in the same subnet as dst, 0=any dst
1237  * - local: address, 0=autoselect the local address
1238  * - scope: maximum allowed scope value for the local address
1239  */
1240 __be32 inet_confirm_addr(struct in_device *in_dev,
1241 			 __be32 dst, __be32 local, int scope)
1242 {
1243 	__be32 addr = 0;
1244 	struct net_device *dev;
1245 	struct net *net;
1246 
1247 	if (scope != RT_SCOPE_LINK)
1248 		return confirm_addr_indev(in_dev, dst, local, scope);
1249 
1250 	net = dev_net(in_dev->dev);
1251 	rcu_read_lock();
1252 	for_each_netdev_rcu(net, dev) {
1253 		in_dev = __in_dev_get_rcu(dev);
1254 		if (in_dev) {
1255 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1256 			if (addr)
1257 				break;
1258 		}
1259 	}
1260 	rcu_read_unlock();
1261 
1262 	return addr;
1263 }
1264 EXPORT_SYMBOL(inet_confirm_addr);
1265 
1266 /*
1267  *	Device notifier
1268  */
1269 
1270 int register_inetaddr_notifier(struct notifier_block *nb)
1271 {
1272 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1273 }
1274 EXPORT_SYMBOL(register_inetaddr_notifier);
1275 
1276 int unregister_inetaddr_notifier(struct notifier_block *nb)
1277 {
1278 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1279 }
1280 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1281 
1282 /* Rename ifa_labels for a device name change. Make some effort to preserve
1283  * existing alias numbering and to create unique labels if possible.
1284 */
1285 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1286 {
1287 	struct in_ifaddr *ifa;
1288 	int named = 0;
1289 
1290 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1291 		char old[IFNAMSIZ], *dot;
1292 
1293 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1294 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1295 		if (named++ == 0)
1296 			goto skip;
1297 		dot = strchr(old, ':');
1298 		if (dot == NULL) {
1299 			sprintf(old, ":%d", named);
1300 			dot = old;
1301 		}
1302 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1303 			strcat(ifa->ifa_label, dot);
1304 		else
1305 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1306 skip:
1307 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1308 	}
1309 }
1310 
1311 static bool inetdev_valid_mtu(unsigned int mtu)
1312 {
1313 	return mtu >= 68;
1314 }
1315 
1316 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1317 					struct in_device *in_dev)
1318 
1319 {
1320 	struct in_ifaddr *ifa;
1321 
1322 	for (ifa = in_dev->ifa_list; ifa;
1323 	     ifa = ifa->ifa_next) {
1324 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1325 			 ifa->ifa_local, dev,
1326 			 ifa->ifa_local, NULL,
1327 			 dev->dev_addr, NULL);
1328 	}
1329 }
1330 
1331 /* Called only under RTNL semaphore */
1332 
1333 static int inetdev_event(struct notifier_block *this, unsigned long event,
1334 			 void *ptr)
1335 {
1336 	struct net_device *dev = ptr;
1337 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1338 
1339 	ASSERT_RTNL();
1340 
1341 	if (!in_dev) {
1342 		if (event == NETDEV_REGISTER) {
1343 			in_dev = inetdev_init(dev);
1344 			if (!in_dev)
1345 				return notifier_from_errno(-ENOMEM);
1346 			if (dev->flags & IFF_LOOPBACK) {
1347 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1348 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1349 			}
1350 		} else if (event == NETDEV_CHANGEMTU) {
1351 			/* Re-enabling IP */
1352 			if (inetdev_valid_mtu(dev->mtu))
1353 				in_dev = inetdev_init(dev);
1354 		}
1355 		goto out;
1356 	}
1357 
1358 	switch (event) {
1359 	case NETDEV_REGISTER:
1360 		pr_debug("%s: bug\n", __func__);
1361 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1362 		break;
1363 	case NETDEV_UP:
1364 		if (!inetdev_valid_mtu(dev->mtu))
1365 			break;
1366 		if (dev->flags & IFF_LOOPBACK) {
1367 			struct in_ifaddr *ifa = inet_alloc_ifa();
1368 
1369 			if (ifa) {
1370 				INIT_HLIST_NODE(&ifa->hash);
1371 				ifa->ifa_local =
1372 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1373 				ifa->ifa_prefixlen = 8;
1374 				ifa->ifa_mask = inet_make_mask(8);
1375 				in_dev_hold(in_dev);
1376 				ifa->ifa_dev = in_dev;
1377 				ifa->ifa_scope = RT_SCOPE_HOST;
1378 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1379 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1380 						 INFINITY_LIFE_TIME);
1381 				inet_insert_ifa(ifa);
1382 			}
1383 		}
1384 		ip_mc_up(in_dev);
1385 		/* fall through */
1386 	case NETDEV_CHANGEADDR:
1387 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1388 			break;
1389 		/* fall through */
1390 	case NETDEV_NOTIFY_PEERS:
1391 		/* Send gratuitous ARP to notify of link change */
1392 		inetdev_send_gratuitous_arp(dev, in_dev);
1393 		break;
1394 	case NETDEV_DOWN:
1395 		ip_mc_down(in_dev);
1396 		break;
1397 	case NETDEV_PRE_TYPE_CHANGE:
1398 		ip_mc_unmap(in_dev);
1399 		break;
1400 	case NETDEV_POST_TYPE_CHANGE:
1401 		ip_mc_remap(in_dev);
1402 		break;
1403 	case NETDEV_CHANGEMTU:
1404 		if (inetdev_valid_mtu(dev->mtu))
1405 			break;
1406 		/* disable IP when MTU is not enough */
1407 	case NETDEV_UNREGISTER:
1408 		inetdev_destroy(in_dev);
1409 		break;
1410 	case NETDEV_CHANGENAME:
1411 		/* Do not notify about label change, this event is
1412 		 * not interesting to applications using netlink.
1413 		 */
1414 		inetdev_changename(dev, in_dev);
1415 
1416 		devinet_sysctl_unregister(in_dev);
1417 		devinet_sysctl_register(in_dev);
1418 		break;
1419 	}
1420 out:
1421 	return NOTIFY_DONE;
1422 }
1423 
1424 static struct notifier_block ip_netdev_notifier = {
1425 	.notifier_call = inetdev_event,
1426 };
1427 
1428 static size_t inet_nlmsg_size(void)
1429 {
1430 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1431 	       + nla_total_size(4) /* IFA_ADDRESS */
1432 	       + nla_total_size(4) /* IFA_LOCAL */
1433 	       + nla_total_size(4) /* IFA_BROADCAST */
1434 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1435 }
1436 
1437 static inline u32 cstamp_delta(unsigned long cstamp)
1438 {
1439 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1440 }
1441 
1442 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1443 			 unsigned long tstamp, u32 preferred, u32 valid)
1444 {
1445 	struct ifa_cacheinfo ci;
1446 
1447 	ci.cstamp = cstamp_delta(cstamp);
1448 	ci.tstamp = cstamp_delta(tstamp);
1449 	ci.ifa_prefered = preferred;
1450 	ci.ifa_valid = valid;
1451 
1452 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1453 }
1454 
1455 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1456 			    u32 portid, u32 seq, int event, unsigned int flags)
1457 {
1458 	struct ifaddrmsg *ifm;
1459 	struct nlmsghdr  *nlh;
1460 	u32 preferred, valid;
1461 
1462 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1463 	if (nlh == NULL)
1464 		return -EMSGSIZE;
1465 
1466 	ifm = nlmsg_data(nlh);
1467 	ifm->ifa_family = AF_INET;
1468 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1469 	ifm->ifa_flags = ifa->ifa_flags;
1470 	ifm->ifa_scope = ifa->ifa_scope;
1471 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1472 
1473 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1474 		preferred = ifa->ifa_preferred_lft;
1475 		valid = ifa->ifa_valid_lft;
1476 		if (preferred != INFINITY_LIFE_TIME) {
1477 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1478 
1479 			if (preferred > tval)
1480 				preferred -= tval;
1481 			else
1482 				preferred = 0;
1483 			if (valid != INFINITY_LIFE_TIME) {
1484 				if (valid > tval)
1485 					valid -= tval;
1486 				else
1487 					valid = 0;
1488 			}
1489 		}
1490 	} else {
1491 		preferred = INFINITY_LIFE_TIME;
1492 		valid = INFINITY_LIFE_TIME;
1493 	}
1494 	if ((ifa->ifa_address &&
1495 	     nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1496 	    (ifa->ifa_local &&
1497 	     nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1498 	    (ifa->ifa_broadcast &&
1499 	     nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1500 	    (ifa->ifa_label[0] &&
1501 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1502 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1503 			  preferred, valid))
1504 		goto nla_put_failure;
1505 
1506 	return nlmsg_end(skb, nlh);
1507 
1508 nla_put_failure:
1509 	nlmsg_cancel(skb, nlh);
1510 	return -EMSGSIZE;
1511 }
1512 
1513 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1514 {
1515 	struct net *net = sock_net(skb->sk);
1516 	int h, s_h;
1517 	int idx, s_idx;
1518 	int ip_idx, s_ip_idx;
1519 	struct net_device *dev;
1520 	struct in_device *in_dev;
1521 	struct in_ifaddr *ifa;
1522 	struct hlist_head *head;
1523 
1524 	s_h = cb->args[0];
1525 	s_idx = idx = cb->args[1];
1526 	s_ip_idx = ip_idx = cb->args[2];
1527 
1528 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1529 		idx = 0;
1530 		head = &net->dev_index_head[h];
1531 		rcu_read_lock();
1532 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1533 			if (idx < s_idx)
1534 				goto cont;
1535 			if (h > s_h || idx > s_idx)
1536 				s_ip_idx = 0;
1537 			in_dev = __in_dev_get_rcu(dev);
1538 			if (!in_dev)
1539 				goto cont;
1540 
1541 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1542 			     ifa = ifa->ifa_next, ip_idx++) {
1543 				if (ip_idx < s_ip_idx)
1544 					continue;
1545 				if (inet_fill_ifaddr(skb, ifa,
1546 					     NETLINK_CB(cb->skb).portid,
1547 					     cb->nlh->nlmsg_seq,
1548 					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1549 					rcu_read_unlock();
1550 					goto done;
1551 				}
1552 			}
1553 cont:
1554 			idx++;
1555 		}
1556 		rcu_read_unlock();
1557 	}
1558 
1559 done:
1560 	cb->args[0] = h;
1561 	cb->args[1] = idx;
1562 	cb->args[2] = ip_idx;
1563 
1564 	return skb->len;
1565 }
1566 
1567 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1568 		      u32 portid)
1569 {
1570 	struct sk_buff *skb;
1571 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1572 	int err = -ENOBUFS;
1573 	struct net *net;
1574 
1575 	net = dev_net(ifa->ifa_dev->dev);
1576 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1577 	if (skb == NULL)
1578 		goto errout;
1579 
1580 	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1581 	if (err < 0) {
1582 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1583 		WARN_ON(err == -EMSGSIZE);
1584 		kfree_skb(skb);
1585 		goto errout;
1586 	}
1587 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1588 	return;
1589 errout:
1590 	if (err < 0)
1591 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1592 }
1593 
1594 static size_t inet_get_link_af_size(const struct net_device *dev)
1595 {
1596 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1597 
1598 	if (!in_dev)
1599 		return 0;
1600 
1601 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1602 }
1603 
1604 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1605 {
1606 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1607 	struct nlattr *nla;
1608 	int i;
1609 
1610 	if (!in_dev)
1611 		return -ENODATA;
1612 
1613 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1614 	if (nla == NULL)
1615 		return -EMSGSIZE;
1616 
1617 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1618 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1619 
1620 	return 0;
1621 }
1622 
1623 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1624 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1625 };
1626 
1627 static int inet_validate_link_af(const struct net_device *dev,
1628 				 const struct nlattr *nla)
1629 {
1630 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1631 	int err, rem;
1632 
1633 	if (dev && !__in_dev_get_rtnl(dev))
1634 		return -EAFNOSUPPORT;
1635 
1636 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1637 	if (err < 0)
1638 		return err;
1639 
1640 	if (tb[IFLA_INET_CONF]) {
1641 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1642 			int cfgid = nla_type(a);
1643 
1644 			if (nla_len(a) < 4)
1645 				return -EINVAL;
1646 
1647 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1648 				return -EINVAL;
1649 		}
1650 	}
1651 
1652 	return 0;
1653 }
1654 
1655 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1656 {
1657 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1658 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1659 	int rem;
1660 
1661 	if (!in_dev)
1662 		return -EAFNOSUPPORT;
1663 
1664 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1665 		BUG();
1666 
1667 	if (tb[IFLA_INET_CONF]) {
1668 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1669 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1670 	}
1671 
1672 	return 0;
1673 }
1674 
1675 static int inet_netconf_msgsize_devconf(int type)
1676 {
1677 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1678 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1679 
1680 	/* type -1 is used for ALL */
1681 	if (type == -1 || type == NETCONFA_FORWARDING)
1682 		size += nla_total_size(4);
1683 	if (type == -1 || type == NETCONFA_RP_FILTER)
1684 		size += nla_total_size(4);
1685 	if (type == -1 || type == NETCONFA_MC_FORWARDING)
1686 		size += nla_total_size(4);
1687 
1688 	return size;
1689 }
1690 
1691 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1692 				     struct ipv4_devconf *devconf, u32 portid,
1693 				     u32 seq, int event, unsigned int flags,
1694 				     int type)
1695 {
1696 	struct nlmsghdr  *nlh;
1697 	struct netconfmsg *ncm;
1698 
1699 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1700 			flags);
1701 	if (nlh == NULL)
1702 		return -EMSGSIZE;
1703 
1704 	ncm = nlmsg_data(nlh);
1705 	ncm->ncm_family = AF_INET;
1706 
1707 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1708 		goto nla_put_failure;
1709 
1710 	/* type -1 is used for ALL */
1711 	if ((type == -1 || type == NETCONFA_FORWARDING) &&
1712 	    nla_put_s32(skb, NETCONFA_FORWARDING,
1713 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1714 		goto nla_put_failure;
1715 	if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1716 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1717 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1718 		goto nla_put_failure;
1719 	if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1720 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1721 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1722 		goto nla_put_failure;
1723 
1724 	return nlmsg_end(skb, nlh);
1725 
1726 nla_put_failure:
1727 	nlmsg_cancel(skb, nlh);
1728 	return -EMSGSIZE;
1729 }
1730 
1731 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1732 				 struct ipv4_devconf *devconf)
1733 {
1734 	struct sk_buff *skb;
1735 	int err = -ENOBUFS;
1736 
1737 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1738 	if (skb == NULL)
1739 		goto errout;
1740 
1741 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1742 					RTM_NEWNETCONF, 0, type);
1743 	if (err < 0) {
1744 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1745 		WARN_ON(err == -EMSGSIZE);
1746 		kfree_skb(skb);
1747 		goto errout;
1748 	}
1749 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1750 	return;
1751 errout:
1752 	if (err < 0)
1753 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1754 }
1755 
1756 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1757 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1758 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1759 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1760 };
1761 
1762 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1763 				    struct nlmsghdr *nlh,
1764 				    void *arg)
1765 {
1766 	struct net *net = sock_net(in_skb->sk);
1767 	struct nlattr *tb[NETCONFA_MAX+1];
1768 	struct netconfmsg *ncm;
1769 	struct sk_buff *skb;
1770 	struct ipv4_devconf *devconf;
1771 	struct in_device *in_dev;
1772 	struct net_device *dev;
1773 	int ifindex;
1774 	int err;
1775 
1776 	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1777 			  devconf_ipv4_policy);
1778 	if (err < 0)
1779 		goto errout;
1780 
1781 	err = EINVAL;
1782 	if (!tb[NETCONFA_IFINDEX])
1783 		goto errout;
1784 
1785 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1786 	switch (ifindex) {
1787 	case NETCONFA_IFINDEX_ALL:
1788 		devconf = net->ipv4.devconf_all;
1789 		break;
1790 	case NETCONFA_IFINDEX_DEFAULT:
1791 		devconf = net->ipv4.devconf_dflt;
1792 		break;
1793 	default:
1794 		dev = __dev_get_by_index(net, ifindex);
1795 		if (dev == NULL)
1796 			goto errout;
1797 		in_dev = __in_dev_get_rtnl(dev);
1798 		if (in_dev == NULL)
1799 			goto errout;
1800 		devconf = &in_dev->cnf;
1801 		break;
1802 	}
1803 
1804 	err = -ENOBUFS;
1805 	skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1806 	if (skb == NULL)
1807 		goto errout;
1808 
1809 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1810 					NETLINK_CB(in_skb).portid,
1811 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1812 					-1);
1813 	if (err < 0) {
1814 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1815 		WARN_ON(err == -EMSGSIZE);
1816 		kfree_skb(skb);
1817 		goto errout;
1818 	}
1819 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1820 errout:
1821 	return err;
1822 }
1823 
1824 #ifdef CONFIG_SYSCTL
1825 
1826 static void devinet_copy_dflt_conf(struct net *net, int i)
1827 {
1828 	struct net_device *dev;
1829 
1830 	rcu_read_lock();
1831 	for_each_netdev_rcu(net, dev) {
1832 		struct in_device *in_dev;
1833 
1834 		in_dev = __in_dev_get_rcu(dev);
1835 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1836 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1837 	}
1838 	rcu_read_unlock();
1839 }
1840 
1841 /* called with RTNL locked */
1842 static void inet_forward_change(struct net *net)
1843 {
1844 	struct net_device *dev;
1845 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1846 
1847 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1848 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1849 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1850 				    NETCONFA_IFINDEX_ALL,
1851 				    net->ipv4.devconf_all);
1852 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1853 				    NETCONFA_IFINDEX_DEFAULT,
1854 				    net->ipv4.devconf_dflt);
1855 
1856 	for_each_netdev(net, dev) {
1857 		struct in_device *in_dev;
1858 		if (on)
1859 			dev_disable_lro(dev);
1860 		rcu_read_lock();
1861 		in_dev = __in_dev_get_rcu(dev);
1862 		if (in_dev) {
1863 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1864 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1865 						    dev->ifindex, &in_dev->cnf);
1866 		}
1867 		rcu_read_unlock();
1868 	}
1869 }
1870 
1871 static int devinet_conf_proc(ctl_table *ctl, int write,
1872 			     void __user *buffer,
1873 			     size_t *lenp, loff_t *ppos)
1874 {
1875 	int old_value = *(int *)ctl->data;
1876 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1877 	int new_value = *(int *)ctl->data;
1878 
1879 	if (write) {
1880 		struct ipv4_devconf *cnf = ctl->extra1;
1881 		struct net *net = ctl->extra2;
1882 		int i = (int *)ctl->data - cnf->data;
1883 
1884 		set_bit(i, cnf->state);
1885 
1886 		if (cnf == net->ipv4.devconf_dflt)
1887 			devinet_copy_dflt_conf(net, i);
1888 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1889 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1890 			if ((new_value == 0) && (old_value != 0))
1891 				rt_cache_flush(net);
1892 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1893 		    new_value != old_value) {
1894 			int ifindex;
1895 
1896 			if (cnf == net->ipv4.devconf_dflt)
1897 				ifindex = NETCONFA_IFINDEX_DEFAULT;
1898 			else if (cnf == net->ipv4.devconf_all)
1899 				ifindex = NETCONFA_IFINDEX_ALL;
1900 			else {
1901 				struct in_device *idev =
1902 					container_of(cnf, struct in_device,
1903 						     cnf);
1904 				ifindex = idev->dev->ifindex;
1905 			}
1906 			inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1907 						    ifindex, cnf);
1908 		}
1909 	}
1910 
1911 	return ret;
1912 }
1913 
1914 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1915 				  void __user *buffer,
1916 				  size_t *lenp, loff_t *ppos)
1917 {
1918 	int *valp = ctl->data;
1919 	int val = *valp;
1920 	loff_t pos = *ppos;
1921 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1922 
1923 	if (write && *valp != val) {
1924 		struct net *net = ctl->extra2;
1925 
1926 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1927 			if (!rtnl_trylock()) {
1928 				/* Restore the original values before restarting */
1929 				*valp = val;
1930 				*ppos = pos;
1931 				return restart_syscall();
1932 			}
1933 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1934 				inet_forward_change(net);
1935 			} else {
1936 				struct ipv4_devconf *cnf = ctl->extra1;
1937 				struct in_device *idev =
1938 					container_of(cnf, struct in_device, cnf);
1939 				if (*valp)
1940 					dev_disable_lro(idev->dev);
1941 				inet_netconf_notify_devconf(net,
1942 							    NETCONFA_FORWARDING,
1943 							    idev->dev->ifindex,
1944 							    cnf);
1945 			}
1946 			rtnl_unlock();
1947 			rt_cache_flush(net);
1948 		} else
1949 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1950 						    NETCONFA_IFINDEX_DEFAULT,
1951 						    net->ipv4.devconf_dflt);
1952 	}
1953 
1954 	return ret;
1955 }
1956 
1957 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1958 				void __user *buffer,
1959 				size_t *lenp, loff_t *ppos)
1960 {
1961 	int *valp = ctl->data;
1962 	int val = *valp;
1963 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1964 	struct net *net = ctl->extra2;
1965 
1966 	if (write && *valp != val)
1967 		rt_cache_flush(net);
1968 
1969 	return ret;
1970 }
1971 
1972 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1973 	{ \
1974 		.procname	= name, \
1975 		.data		= ipv4_devconf.data + \
1976 				  IPV4_DEVCONF_ ## attr - 1, \
1977 		.maxlen		= sizeof(int), \
1978 		.mode		= mval, \
1979 		.proc_handler	= proc, \
1980 		.extra1		= &ipv4_devconf, \
1981 	}
1982 
1983 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1984 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1985 
1986 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1987 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1988 
1989 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1990 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1991 
1992 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1993 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1994 
1995 static struct devinet_sysctl_table {
1996 	struct ctl_table_header *sysctl_header;
1997 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1998 } devinet_sysctl = {
1999 	.devinet_vars = {
2000 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2001 					     devinet_sysctl_forward),
2002 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2003 
2004 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2005 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2006 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2007 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2008 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2009 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2010 					"accept_source_route"),
2011 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2012 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2013 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2014 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2015 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2016 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2017 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2018 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2019 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2020 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2021 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2022 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2023 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2024 
2025 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2026 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2027 		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
2028 					      "force_igmp_version"),
2029 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2030 					      "promote_secondaries"),
2031 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2032 					      "route_localnet"),
2033 	},
2034 };
2035 
2036 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2037 					struct ipv4_devconf *p)
2038 {
2039 	int i;
2040 	struct devinet_sysctl_table *t;
2041 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2042 
2043 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2044 	if (!t)
2045 		goto out;
2046 
2047 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2048 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2049 		t->devinet_vars[i].extra1 = p;
2050 		t->devinet_vars[i].extra2 = net;
2051 	}
2052 
2053 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2054 
2055 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2056 	if (!t->sysctl_header)
2057 		goto free;
2058 
2059 	p->sysctl = t;
2060 	return 0;
2061 
2062 free:
2063 	kfree(t);
2064 out:
2065 	return -ENOBUFS;
2066 }
2067 
2068 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2069 {
2070 	struct devinet_sysctl_table *t = cnf->sysctl;
2071 
2072 	if (t == NULL)
2073 		return;
2074 
2075 	cnf->sysctl = NULL;
2076 	unregister_net_sysctl_table(t->sysctl_header);
2077 	kfree(t);
2078 }
2079 
2080 static void devinet_sysctl_register(struct in_device *idev)
2081 {
2082 	neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
2083 	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2084 					&idev->cnf);
2085 }
2086 
2087 static void devinet_sysctl_unregister(struct in_device *idev)
2088 {
2089 	__devinet_sysctl_unregister(&idev->cnf);
2090 	neigh_sysctl_unregister(idev->arp_parms);
2091 }
2092 
2093 static struct ctl_table ctl_forward_entry[] = {
2094 	{
2095 		.procname	= "ip_forward",
2096 		.data		= &ipv4_devconf.data[
2097 					IPV4_DEVCONF_FORWARDING - 1],
2098 		.maxlen		= sizeof(int),
2099 		.mode		= 0644,
2100 		.proc_handler	= devinet_sysctl_forward,
2101 		.extra1		= &ipv4_devconf,
2102 		.extra2		= &init_net,
2103 	},
2104 	{ },
2105 };
2106 #endif
2107 
2108 static __net_init int devinet_init_net(struct net *net)
2109 {
2110 	int err;
2111 	struct ipv4_devconf *all, *dflt;
2112 #ifdef CONFIG_SYSCTL
2113 	struct ctl_table *tbl = ctl_forward_entry;
2114 	struct ctl_table_header *forw_hdr;
2115 #endif
2116 
2117 	err = -ENOMEM;
2118 	all = &ipv4_devconf;
2119 	dflt = &ipv4_devconf_dflt;
2120 
2121 	if (!net_eq(net, &init_net)) {
2122 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2123 		if (all == NULL)
2124 			goto err_alloc_all;
2125 
2126 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2127 		if (dflt == NULL)
2128 			goto err_alloc_dflt;
2129 
2130 #ifdef CONFIG_SYSCTL
2131 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2132 		if (tbl == NULL)
2133 			goto err_alloc_ctl;
2134 
2135 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2136 		tbl[0].extra1 = all;
2137 		tbl[0].extra2 = net;
2138 #endif
2139 	}
2140 
2141 #ifdef CONFIG_SYSCTL
2142 	err = __devinet_sysctl_register(net, "all", all);
2143 	if (err < 0)
2144 		goto err_reg_all;
2145 
2146 	err = __devinet_sysctl_register(net, "default", dflt);
2147 	if (err < 0)
2148 		goto err_reg_dflt;
2149 
2150 	err = -ENOMEM;
2151 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2152 	if (forw_hdr == NULL)
2153 		goto err_reg_ctl;
2154 	net->ipv4.forw_hdr = forw_hdr;
2155 #endif
2156 
2157 	net->ipv4.devconf_all = all;
2158 	net->ipv4.devconf_dflt = dflt;
2159 	return 0;
2160 
2161 #ifdef CONFIG_SYSCTL
2162 err_reg_ctl:
2163 	__devinet_sysctl_unregister(dflt);
2164 err_reg_dflt:
2165 	__devinet_sysctl_unregister(all);
2166 err_reg_all:
2167 	if (tbl != ctl_forward_entry)
2168 		kfree(tbl);
2169 err_alloc_ctl:
2170 #endif
2171 	if (dflt != &ipv4_devconf_dflt)
2172 		kfree(dflt);
2173 err_alloc_dflt:
2174 	if (all != &ipv4_devconf)
2175 		kfree(all);
2176 err_alloc_all:
2177 	return err;
2178 }
2179 
2180 static __net_exit void devinet_exit_net(struct net *net)
2181 {
2182 #ifdef CONFIG_SYSCTL
2183 	struct ctl_table *tbl;
2184 
2185 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2186 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2187 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2188 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
2189 	kfree(tbl);
2190 #endif
2191 	kfree(net->ipv4.devconf_dflt);
2192 	kfree(net->ipv4.devconf_all);
2193 }
2194 
2195 static __net_initdata struct pernet_operations devinet_ops = {
2196 	.init = devinet_init_net,
2197 	.exit = devinet_exit_net,
2198 };
2199 
2200 static struct rtnl_af_ops inet_af_ops = {
2201 	.family		  = AF_INET,
2202 	.fill_link_af	  = inet_fill_link_af,
2203 	.get_link_af_size = inet_get_link_af_size,
2204 	.validate_link_af = inet_validate_link_af,
2205 	.set_link_af	  = inet_set_link_af,
2206 };
2207 
2208 void __init devinet_init(void)
2209 {
2210 	int i;
2211 
2212 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2213 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2214 
2215 	register_pernet_subsys(&devinet_ops);
2216 
2217 	register_gifconf(PF_INET, inet_gifconf);
2218 	register_netdevice_notifier(&ip_netdev_notifier);
2219 
2220 	schedule_delayed_work(&check_lifetime_work, 0);
2221 
2222 	rtnl_af_register(&inet_af_ops);
2223 
2224 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2225 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2226 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2227 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2228 		      NULL, NULL);
2229 }
2230 
2231