xref: /linux/net/ipv4/devinet.c (revision 055d752f85f18abb2ad7e2193f61afefe36fd452)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59 
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67 
68 #include "fib_lookup.h"
69 
70 static struct ipv4_devconf ipv4_devconf = {
71 	.data = {
72 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 	},
77 };
78 
79 static struct ipv4_devconf ipv4_devconf_dflt = {
80 	.data = {
81 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
82 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
83 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
85 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
86 	},
87 };
88 
89 #define IPV4_DEVCONF_DFLT(net, attr) \
90 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
91 
92 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
93 	[IFA_LOCAL]     	= { .type = NLA_U32 },
94 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
95 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
96 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
97 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
98 };
99 
100 #define IN4_ADDR_HSIZE_SHIFT	8
101 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
102 
103 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
104 static DEFINE_SPINLOCK(inet_addr_hash_lock);
105 
106 static u32 inet_addr_hash(struct net *net, __be32 addr)
107 {
108 	u32 val = (__force u32) addr ^ net_hash_mix(net);
109 
110 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
111 }
112 
113 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
114 {
115 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
116 
117 	spin_lock(&inet_addr_hash_lock);
118 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
119 	spin_unlock(&inet_addr_hash_lock);
120 }
121 
122 static void inet_hash_remove(struct in_ifaddr *ifa)
123 {
124 	spin_lock(&inet_addr_hash_lock);
125 	hlist_del_init_rcu(&ifa->hash);
126 	spin_unlock(&inet_addr_hash_lock);
127 }
128 
129 /**
130  * __ip_dev_find - find the first device with a given source address.
131  * @net: the net namespace
132  * @addr: the source address
133  * @devref: if true, take a reference on the found device
134  *
135  * If a caller uses devref=false, it should be protected by RCU, or RTNL
136  */
137 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
138 {
139 	u32 hash = inet_addr_hash(net, addr);
140 	struct net_device *result = NULL;
141 	struct in_ifaddr *ifa;
142 
143 	rcu_read_lock();
144 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
145 		if (ifa->ifa_local == addr) {
146 			struct net_device *dev = ifa->ifa_dev->dev;
147 
148 			if (!net_eq(dev_net(dev), net))
149 				continue;
150 			result = dev;
151 			break;
152 		}
153 	}
154 	if (!result) {
155 		struct flowi4 fl4 = { .daddr = addr };
156 		struct fib_result res = { 0 };
157 		struct fib_table *local;
158 
159 		/* Fallback to FIB local table so that communication
160 		 * over loopback subnets work.
161 		 */
162 		local = fib_get_table(net, RT_TABLE_LOCAL);
163 		if (local &&
164 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
165 		    res.type == RTN_LOCAL)
166 			result = FIB_RES_DEV(res);
167 	}
168 	if (result && devref)
169 		dev_hold(result);
170 	rcu_read_unlock();
171 	return result;
172 }
173 EXPORT_SYMBOL(__ip_dev_find);
174 
175 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
176 
177 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
178 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
179 			 int destroy);
180 #ifdef CONFIG_SYSCTL
181 static void devinet_sysctl_register(struct in_device *idev);
182 static void devinet_sysctl_unregister(struct in_device *idev);
183 #else
184 static void devinet_sysctl_register(struct in_device *idev)
185 {
186 }
187 static void devinet_sysctl_unregister(struct in_device *idev)
188 {
189 }
190 #endif
191 
192 /* Locks all the inet devices. */
193 
194 static struct in_ifaddr *inet_alloc_ifa(void)
195 {
196 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
197 }
198 
199 static void inet_rcu_free_ifa(struct rcu_head *head)
200 {
201 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
202 	if (ifa->ifa_dev)
203 		in_dev_put(ifa->ifa_dev);
204 	kfree(ifa);
205 }
206 
207 static void inet_free_ifa(struct in_ifaddr *ifa)
208 {
209 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
210 }
211 
212 void in_dev_finish_destroy(struct in_device *idev)
213 {
214 	struct net_device *dev = idev->dev;
215 
216 	WARN_ON(idev->ifa_list);
217 	WARN_ON(idev->mc_list);
218 #ifdef NET_REFCNT_DEBUG
219 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
220 #endif
221 	dev_put(dev);
222 	if (!idev->dead)
223 		pr_err("Freeing alive in_device %p\n", idev);
224 	else
225 		kfree(idev);
226 }
227 EXPORT_SYMBOL(in_dev_finish_destroy);
228 
229 static struct in_device *inetdev_init(struct net_device *dev)
230 {
231 	struct in_device *in_dev;
232 
233 	ASSERT_RTNL();
234 
235 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
236 	if (!in_dev)
237 		goto out;
238 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
239 			sizeof(in_dev->cnf));
240 	in_dev->cnf.sysctl = NULL;
241 	in_dev->dev = dev;
242 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
243 	if (!in_dev->arp_parms)
244 		goto out_kfree;
245 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
246 		dev_disable_lro(dev);
247 	/* Reference in_dev->dev */
248 	dev_hold(dev);
249 	/* Account for reference dev->ip_ptr (below) */
250 	in_dev_hold(in_dev);
251 
252 	devinet_sysctl_register(in_dev);
253 	ip_mc_init_dev(in_dev);
254 	if (dev->flags & IFF_UP)
255 		ip_mc_up(in_dev);
256 
257 	/* we can receive as soon as ip_ptr is set -- do this last */
258 	rcu_assign_pointer(dev->ip_ptr, in_dev);
259 out:
260 	return in_dev;
261 out_kfree:
262 	kfree(in_dev);
263 	in_dev = NULL;
264 	goto out;
265 }
266 
267 static void in_dev_rcu_put(struct rcu_head *head)
268 {
269 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
270 	in_dev_put(idev);
271 }
272 
273 static void inetdev_destroy(struct in_device *in_dev)
274 {
275 	struct in_ifaddr *ifa;
276 	struct net_device *dev;
277 
278 	ASSERT_RTNL();
279 
280 	dev = in_dev->dev;
281 
282 	in_dev->dead = 1;
283 
284 	ip_mc_destroy_dev(in_dev);
285 
286 	while ((ifa = in_dev->ifa_list) != NULL) {
287 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
288 		inet_free_ifa(ifa);
289 	}
290 
291 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
292 
293 	devinet_sysctl_unregister(in_dev);
294 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
295 	arp_ifdown(dev);
296 
297 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
298 }
299 
300 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
301 {
302 	rcu_read_lock();
303 	for_primary_ifa(in_dev) {
304 		if (inet_ifa_match(a, ifa)) {
305 			if (!b || inet_ifa_match(b, ifa)) {
306 				rcu_read_unlock();
307 				return 1;
308 			}
309 		}
310 	} endfor_ifa(in_dev);
311 	rcu_read_unlock();
312 	return 0;
313 }
314 
315 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
316 			 int destroy, struct nlmsghdr *nlh, u32 portid)
317 {
318 	struct in_ifaddr *promote = NULL;
319 	struct in_ifaddr *ifa, *ifa1 = *ifap;
320 	struct in_ifaddr *last_prim = in_dev->ifa_list;
321 	struct in_ifaddr *prev_prom = NULL;
322 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
323 
324 	ASSERT_RTNL();
325 
326 	/* 1. Deleting primary ifaddr forces deletion all secondaries
327 	 * unless alias promotion is set
328 	 **/
329 
330 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
331 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
332 
333 		while ((ifa = *ifap1) != NULL) {
334 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
335 			    ifa1->ifa_scope <= ifa->ifa_scope)
336 				last_prim = ifa;
337 
338 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
339 			    ifa1->ifa_mask != ifa->ifa_mask ||
340 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
341 				ifap1 = &ifa->ifa_next;
342 				prev_prom = ifa;
343 				continue;
344 			}
345 
346 			if (!do_promote) {
347 				inet_hash_remove(ifa);
348 				*ifap1 = ifa->ifa_next;
349 
350 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
351 				blocking_notifier_call_chain(&inetaddr_chain,
352 						NETDEV_DOWN, ifa);
353 				inet_free_ifa(ifa);
354 			} else {
355 				promote = ifa;
356 				break;
357 			}
358 		}
359 	}
360 
361 	/* On promotion all secondaries from subnet are changing
362 	 * the primary IP, we must remove all their routes silently
363 	 * and later to add them back with new prefsrc. Do this
364 	 * while all addresses are on the device list.
365 	 */
366 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
367 		if (ifa1->ifa_mask == ifa->ifa_mask &&
368 		    inet_ifa_match(ifa1->ifa_address, ifa))
369 			fib_del_ifaddr(ifa, ifa1);
370 	}
371 
372 	/* 2. Unlink it */
373 
374 	*ifap = ifa1->ifa_next;
375 	inet_hash_remove(ifa1);
376 
377 	/* 3. Announce address deletion */
378 
379 	/* Send message first, then call notifier.
380 	   At first sight, FIB update triggered by notifier
381 	   will refer to already deleted ifaddr, that could confuse
382 	   netlink listeners. It is not true: look, gated sees
383 	   that route deleted and if it still thinks that ifaddr
384 	   is valid, it will try to restore deleted routes... Grr.
385 	   So that, this order is correct.
386 	 */
387 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
388 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
389 
390 	if (promote) {
391 		struct in_ifaddr *next_sec = promote->ifa_next;
392 
393 		if (prev_prom) {
394 			prev_prom->ifa_next = promote->ifa_next;
395 			promote->ifa_next = last_prim->ifa_next;
396 			last_prim->ifa_next = promote;
397 		}
398 
399 		promote->ifa_flags &= ~IFA_F_SECONDARY;
400 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
401 		blocking_notifier_call_chain(&inetaddr_chain,
402 				NETDEV_UP, promote);
403 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
404 			if (ifa1->ifa_mask != ifa->ifa_mask ||
405 			    !inet_ifa_match(ifa1->ifa_address, ifa))
406 					continue;
407 			fib_add_ifaddr(ifa);
408 		}
409 
410 	}
411 	if (destroy)
412 		inet_free_ifa(ifa1);
413 }
414 
415 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
416 			 int destroy)
417 {
418 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
419 }
420 
421 static void check_lifetime(struct work_struct *work);
422 
423 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
424 
425 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
426 			     u32 portid)
427 {
428 	struct in_device *in_dev = ifa->ifa_dev;
429 	struct in_ifaddr *ifa1, **ifap, **last_primary;
430 
431 	ASSERT_RTNL();
432 
433 	if (!ifa->ifa_local) {
434 		inet_free_ifa(ifa);
435 		return 0;
436 	}
437 
438 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
439 	last_primary = &in_dev->ifa_list;
440 
441 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
442 	     ifap = &ifa1->ifa_next) {
443 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
444 		    ifa->ifa_scope <= ifa1->ifa_scope)
445 			last_primary = &ifa1->ifa_next;
446 		if (ifa1->ifa_mask == ifa->ifa_mask &&
447 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
448 			if (ifa1->ifa_local == ifa->ifa_local) {
449 				inet_free_ifa(ifa);
450 				return -EEXIST;
451 			}
452 			if (ifa1->ifa_scope != ifa->ifa_scope) {
453 				inet_free_ifa(ifa);
454 				return -EINVAL;
455 			}
456 			ifa->ifa_flags |= IFA_F_SECONDARY;
457 		}
458 	}
459 
460 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
461 		net_srandom(ifa->ifa_local);
462 		ifap = last_primary;
463 	}
464 
465 	ifa->ifa_next = *ifap;
466 	*ifap = ifa;
467 
468 	inet_hash_insert(dev_net(in_dev->dev), ifa);
469 
470 	cancel_delayed_work(&check_lifetime_work);
471 	schedule_delayed_work(&check_lifetime_work, 0);
472 
473 	/* Send message first, then call notifier.
474 	   Notifier will trigger FIB update, so that
475 	   listeners of netlink will know about new ifaddr */
476 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
477 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
478 
479 	return 0;
480 }
481 
482 static int inet_insert_ifa(struct in_ifaddr *ifa)
483 {
484 	return __inet_insert_ifa(ifa, NULL, 0);
485 }
486 
487 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
488 {
489 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
490 
491 	ASSERT_RTNL();
492 
493 	if (!in_dev) {
494 		inet_free_ifa(ifa);
495 		return -ENOBUFS;
496 	}
497 	ipv4_devconf_setall(in_dev);
498 	if (ifa->ifa_dev != in_dev) {
499 		WARN_ON(ifa->ifa_dev);
500 		in_dev_hold(in_dev);
501 		ifa->ifa_dev = in_dev;
502 	}
503 	if (ipv4_is_loopback(ifa->ifa_local))
504 		ifa->ifa_scope = RT_SCOPE_HOST;
505 	return inet_insert_ifa(ifa);
506 }
507 
508 /* Caller must hold RCU or RTNL :
509  * We dont take a reference on found in_device
510  */
511 struct in_device *inetdev_by_index(struct net *net, int ifindex)
512 {
513 	struct net_device *dev;
514 	struct in_device *in_dev = NULL;
515 
516 	rcu_read_lock();
517 	dev = dev_get_by_index_rcu(net, ifindex);
518 	if (dev)
519 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
520 	rcu_read_unlock();
521 	return in_dev;
522 }
523 EXPORT_SYMBOL(inetdev_by_index);
524 
525 /* Called only from RTNL semaphored context. No locks. */
526 
527 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
528 				    __be32 mask)
529 {
530 	ASSERT_RTNL();
531 
532 	for_primary_ifa(in_dev) {
533 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
534 			return ifa;
535 	} endfor_ifa(in_dev);
536 	return NULL;
537 }
538 
539 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
540 {
541 	struct net *net = sock_net(skb->sk);
542 	struct nlattr *tb[IFA_MAX+1];
543 	struct in_device *in_dev;
544 	struct ifaddrmsg *ifm;
545 	struct in_ifaddr *ifa, **ifap;
546 	int err = -EINVAL;
547 
548 	ASSERT_RTNL();
549 
550 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
551 	if (err < 0)
552 		goto errout;
553 
554 	ifm = nlmsg_data(nlh);
555 	in_dev = inetdev_by_index(net, ifm->ifa_index);
556 	if (in_dev == NULL) {
557 		err = -ENODEV;
558 		goto errout;
559 	}
560 
561 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
562 	     ifap = &ifa->ifa_next) {
563 		if (tb[IFA_LOCAL] &&
564 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
565 			continue;
566 
567 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
568 			continue;
569 
570 		if (tb[IFA_ADDRESS] &&
571 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
572 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
573 			continue;
574 
575 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
576 		return 0;
577 	}
578 
579 	err = -EADDRNOTAVAIL;
580 errout:
581 	return err;
582 }
583 
584 #define INFINITY_LIFE_TIME	0xFFFFFFFF
585 
586 static void check_lifetime(struct work_struct *work)
587 {
588 	unsigned long now, next, next_sec, next_sched;
589 	struct in_ifaddr *ifa;
590 	int i;
591 
592 	now = jiffies;
593 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
594 
595 	rcu_read_lock();
596 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
597 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
598 			unsigned long age;
599 
600 			if (ifa->ifa_flags & IFA_F_PERMANENT)
601 				continue;
602 
603 			/* We try to batch several events at once. */
604 			age = (now - ifa->ifa_tstamp +
605 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
606 
607 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
608 			    age >= ifa->ifa_valid_lft) {
609 				struct in_ifaddr **ifap ;
610 
611 				rtnl_lock();
612 				for (ifap = &ifa->ifa_dev->ifa_list;
613 				     *ifap != NULL; ifap = &ifa->ifa_next) {
614 					if (*ifap == ifa)
615 						inet_del_ifa(ifa->ifa_dev,
616 							     ifap, 1);
617 				}
618 				rtnl_unlock();
619 			} else if (ifa->ifa_preferred_lft ==
620 				   INFINITY_LIFE_TIME) {
621 				continue;
622 			} else if (age >= ifa->ifa_preferred_lft) {
623 				if (time_before(ifa->ifa_tstamp +
624 						ifa->ifa_valid_lft * HZ, next))
625 					next = ifa->ifa_tstamp +
626 					       ifa->ifa_valid_lft * HZ;
627 
628 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) {
629 					ifa->ifa_flags |= IFA_F_DEPRECATED;
630 					rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
631 				}
632 			} else if (time_before(ifa->ifa_tstamp +
633 					       ifa->ifa_preferred_lft * HZ,
634 					       next)) {
635 				next = ifa->ifa_tstamp +
636 				       ifa->ifa_preferred_lft * HZ;
637 			}
638 		}
639 	}
640 	rcu_read_unlock();
641 
642 	next_sec = round_jiffies_up(next);
643 	next_sched = next;
644 
645 	/* If rounded timeout is accurate enough, accept it. */
646 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
647 		next_sched = next_sec;
648 
649 	now = jiffies;
650 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
651 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
652 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
653 
654 	schedule_delayed_work(&check_lifetime_work, next_sched - now);
655 }
656 
657 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
658 			     __u32 prefered_lft)
659 {
660 	unsigned long timeout;
661 
662 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
663 
664 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
665 	if (addrconf_finite_timeout(timeout))
666 		ifa->ifa_valid_lft = timeout;
667 	else
668 		ifa->ifa_flags |= IFA_F_PERMANENT;
669 
670 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
671 	if (addrconf_finite_timeout(timeout)) {
672 		if (timeout == 0)
673 			ifa->ifa_flags |= IFA_F_DEPRECATED;
674 		ifa->ifa_preferred_lft = timeout;
675 	}
676 	ifa->ifa_tstamp = jiffies;
677 	if (!ifa->ifa_cstamp)
678 		ifa->ifa_cstamp = ifa->ifa_tstamp;
679 }
680 
681 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
682 				       __u32 *pvalid_lft, __u32 *pprefered_lft)
683 {
684 	struct nlattr *tb[IFA_MAX+1];
685 	struct in_ifaddr *ifa;
686 	struct ifaddrmsg *ifm;
687 	struct net_device *dev;
688 	struct in_device *in_dev;
689 	int err;
690 
691 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
692 	if (err < 0)
693 		goto errout;
694 
695 	ifm = nlmsg_data(nlh);
696 	err = -EINVAL;
697 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
698 		goto errout;
699 
700 	dev = __dev_get_by_index(net, ifm->ifa_index);
701 	err = -ENODEV;
702 	if (dev == NULL)
703 		goto errout;
704 
705 	in_dev = __in_dev_get_rtnl(dev);
706 	err = -ENOBUFS;
707 	if (in_dev == NULL)
708 		goto errout;
709 
710 	ifa = inet_alloc_ifa();
711 	if (ifa == NULL)
712 		/*
713 		 * A potential indev allocation can be left alive, it stays
714 		 * assigned to its device and is destroy with it.
715 		 */
716 		goto errout;
717 
718 	ipv4_devconf_setall(in_dev);
719 	in_dev_hold(in_dev);
720 
721 	if (tb[IFA_ADDRESS] == NULL)
722 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
723 
724 	INIT_HLIST_NODE(&ifa->hash);
725 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
726 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
727 	ifa->ifa_flags = ifm->ifa_flags;
728 	ifa->ifa_scope = ifm->ifa_scope;
729 	ifa->ifa_dev = in_dev;
730 
731 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
732 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
733 
734 	if (tb[IFA_BROADCAST])
735 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
736 
737 	if (tb[IFA_LABEL])
738 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
739 	else
740 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
741 
742 	if (tb[IFA_CACHEINFO]) {
743 		struct ifa_cacheinfo *ci;
744 
745 		ci = nla_data(tb[IFA_CACHEINFO]);
746 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
747 			err = -EINVAL;
748 			goto errout;
749 		}
750 		*pvalid_lft = ci->ifa_valid;
751 		*pprefered_lft = ci->ifa_prefered;
752 	}
753 
754 	return ifa;
755 
756 errout:
757 	return ERR_PTR(err);
758 }
759 
760 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
761 {
762 	struct in_device *in_dev = ifa->ifa_dev;
763 	struct in_ifaddr *ifa1, **ifap;
764 
765 	if (!ifa->ifa_local)
766 		return NULL;
767 
768 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
769 	     ifap = &ifa1->ifa_next) {
770 		if (ifa1->ifa_mask == ifa->ifa_mask &&
771 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
772 		    ifa1->ifa_local == ifa->ifa_local)
773 			return ifa1;
774 	}
775 	return NULL;
776 }
777 
778 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
779 {
780 	struct net *net = sock_net(skb->sk);
781 	struct in_ifaddr *ifa;
782 	struct in_ifaddr *ifa_existing;
783 	__u32 valid_lft = INFINITY_LIFE_TIME;
784 	__u32 prefered_lft = INFINITY_LIFE_TIME;
785 
786 	ASSERT_RTNL();
787 
788 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
789 	if (IS_ERR(ifa))
790 		return PTR_ERR(ifa);
791 
792 	ifa_existing = find_matching_ifa(ifa);
793 	if (!ifa_existing) {
794 		/* It would be best to check for !NLM_F_CREATE here but
795 		 * userspace alreay relies on not having to provide this.
796 		 */
797 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
798 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
799 	} else {
800 		inet_free_ifa(ifa);
801 
802 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
803 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
804 			return -EEXIST;
805 		ifa = ifa_existing;
806 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
807 		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
808 		blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
809 	}
810 	return 0;
811 }
812 
813 /*
814  *	Determine a default network mask, based on the IP address.
815  */
816 
817 static int inet_abc_len(__be32 addr)
818 {
819 	int rc = -1;	/* Something else, probably a multicast. */
820 
821 	if (ipv4_is_zeronet(addr))
822 		rc = 0;
823 	else {
824 		__u32 haddr = ntohl(addr);
825 
826 		if (IN_CLASSA(haddr))
827 			rc = 8;
828 		else if (IN_CLASSB(haddr))
829 			rc = 16;
830 		else if (IN_CLASSC(haddr))
831 			rc = 24;
832 	}
833 
834 	return rc;
835 }
836 
837 
838 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
839 {
840 	struct ifreq ifr;
841 	struct sockaddr_in sin_orig;
842 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
843 	struct in_device *in_dev;
844 	struct in_ifaddr **ifap = NULL;
845 	struct in_ifaddr *ifa = NULL;
846 	struct net_device *dev;
847 	char *colon;
848 	int ret = -EFAULT;
849 	int tryaddrmatch = 0;
850 
851 	/*
852 	 *	Fetch the caller's info block into kernel space
853 	 */
854 
855 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
856 		goto out;
857 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
858 
859 	/* save original address for comparison */
860 	memcpy(&sin_orig, sin, sizeof(*sin));
861 
862 	colon = strchr(ifr.ifr_name, ':');
863 	if (colon)
864 		*colon = 0;
865 
866 	dev_load(net, ifr.ifr_name);
867 
868 	switch (cmd) {
869 	case SIOCGIFADDR:	/* Get interface address */
870 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
871 	case SIOCGIFDSTADDR:	/* Get the destination address */
872 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
873 		/* Note that these ioctls will not sleep,
874 		   so that we do not impose a lock.
875 		   One day we will be forced to put shlock here (I mean SMP)
876 		 */
877 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
878 		memset(sin, 0, sizeof(*sin));
879 		sin->sin_family = AF_INET;
880 		break;
881 
882 	case SIOCSIFFLAGS:
883 		ret = -EPERM;
884 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
885 			goto out;
886 		break;
887 	case SIOCSIFADDR:	/* Set interface address (and family) */
888 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
889 	case SIOCSIFDSTADDR:	/* Set the destination address */
890 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
891 		ret = -EPERM;
892 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
893 			goto out;
894 		ret = -EINVAL;
895 		if (sin->sin_family != AF_INET)
896 			goto out;
897 		break;
898 	default:
899 		ret = -EINVAL;
900 		goto out;
901 	}
902 
903 	rtnl_lock();
904 
905 	ret = -ENODEV;
906 	dev = __dev_get_by_name(net, ifr.ifr_name);
907 	if (!dev)
908 		goto done;
909 
910 	if (colon)
911 		*colon = ':';
912 
913 	in_dev = __in_dev_get_rtnl(dev);
914 	if (in_dev) {
915 		if (tryaddrmatch) {
916 			/* Matthias Andree */
917 			/* compare label and address (4.4BSD style) */
918 			/* note: we only do this for a limited set of ioctls
919 			   and only if the original address family was AF_INET.
920 			   This is checked above. */
921 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
922 			     ifap = &ifa->ifa_next) {
923 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
924 				    sin_orig.sin_addr.s_addr ==
925 							ifa->ifa_local) {
926 					break; /* found */
927 				}
928 			}
929 		}
930 		/* we didn't get a match, maybe the application is
931 		   4.3BSD-style and passed in junk so we fall back to
932 		   comparing just the label */
933 		if (!ifa) {
934 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
935 			     ifap = &ifa->ifa_next)
936 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
937 					break;
938 		}
939 	}
940 
941 	ret = -EADDRNOTAVAIL;
942 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
943 		goto done;
944 
945 	switch (cmd) {
946 	case SIOCGIFADDR:	/* Get interface address */
947 		sin->sin_addr.s_addr = ifa->ifa_local;
948 		goto rarok;
949 
950 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
951 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
952 		goto rarok;
953 
954 	case SIOCGIFDSTADDR:	/* Get the destination address */
955 		sin->sin_addr.s_addr = ifa->ifa_address;
956 		goto rarok;
957 
958 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
959 		sin->sin_addr.s_addr = ifa->ifa_mask;
960 		goto rarok;
961 
962 	case SIOCSIFFLAGS:
963 		if (colon) {
964 			ret = -EADDRNOTAVAIL;
965 			if (!ifa)
966 				break;
967 			ret = 0;
968 			if (!(ifr.ifr_flags & IFF_UP))
969 				inet_del_ifa(in_dev, ifap, 1);
970 			break;
971 		}
972 		ret = dev_change_flags(dev, ifr.ifr_flags);
973 		break;
974 
975 	case SIOCSIFADDR:	/* Set interface address (and family) */
976 		ret = -EINVAL;
977 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
978 			break;
979 
980 		if (!ifa) {
981 			ret = -ENOBUFS;
982 			ifa = inet_alloc_ifa();
983 			if (!ifa)
984 				break;
985 			INIT_HLIST_NODE(&ifa->hash);
986 			if (colon)
987 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
988 			else
989 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
990 		} else {
991 			ret = 0;
992 			if (ifa->ifa_local == sin->sin_addr.s_addr)
993 				break;
994 			inet_del_ifa(in_dev, ifap, 0);
995 			ifa->ifa_broadcast = 0;
996 			ifa->ifa_scope = 0;
997 		}
998 
999 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1000 
1001 		if (!(dev->flags & IFF_POINTOPOINT)) {
1002 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1003 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1004 			if ((dev->flags & IFF_BROADCAST) &&
1005 			    ifa->ifa_prefixlen < 31)
1006 				ifa->ifa_broadcast = ifa->ifa_address |
1007 						     ~ifa->ifa_mask;
1008 		} else {
1009 			ifa->ifa_prefixlen = 32;
1010 			ifa->ifa_mask = inet_make_mask(32);
1011 		}
1012 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1013 		ret = inet_set_ifa(dev, ifa);
1014 		break;
1015 
1016 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1017 		ret = 0;
1018 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1019 			inet_del_ifa(in_dev, ifap, 0);
1020 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1021 			inet_insert_ifa(ifa);
1022 		}
1023 		break;
1024 
1025 	case SIOCSIFDSTADDR:	/* Set the destination address */
1026 		ret = 0;
1027 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1028 			break;
1029 		ret = -EINVAL;
1030 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1031 			break;
1032 		ret = 0;
1033 		inet_del_ifa(in_dev, ifap, 0);
1034 		ifa->ifa_address = sin->sin_addr.s_addr;
1035 		inet_insert_ifa(ifa);
1036 		break;
1037 
1038 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1039 
1040 		/*
1041 		 *	The mask we set must be legal.
1042 		 */
1043 		ret = -EINVAL;
1044 		if (bad_mask(sin->sin_addr.s_addr, 0))
1045 			break;
1046 		ret = 0;
1047 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1048 			__be32 old_mask = ifa->ifa_mask;
1049 			inet_del_ifa(in_dev, ifap, 0);
1050 			ifa->ifa_mask = sin->sin_addr.s_addr;
1051 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1052 
1053 			/* See if current broadcast address matches
1054 			 * with current netmask, then recalculate
1055 			 * the broadcast address. Otherwise it's a
1056 			 * funny address, so don't touch it since
1057 			 * the user seems to know what (s)he's doing...
1058 			 */
1059 			if ((dev->flags & IFF_BROADCAST) &&
1060 			    (ifa->ifa_prefixlen < 31) &&
1061 			    (ifa->ifa_broadcast ==
1062 			     (ifa->ifa_local|~old_mask))) {
1063 				ifa->ifa_broadcast = (ifa->ifa_local |
1064 						      ~sin->sin_addr.s_addr);
1065 			}
1066 			inet_insert_ifa(ifa);
1067 		}
1068 		break;
1069 	}
1070 done:
1071 	rtnl_unlock();
1072 out:
1073 	return ret;
1074 rarok:
1075 	rtnl_unlock();
1076 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1077 	goto out;
1078 }
1079 
1080 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1081 {
1082 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1083 	struct in_ifaddr *ifa;
1084 	struct ifreq ifr;
1085 	int done = 0;
1086 
1087 	if (!in_dev)
1088 		goto out;
1089 
1090 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1091 		if (!buf) {
1092 			done += sizeof(ifr);
1093 			continue;
1094 		}
1095 		if (len < (int) sizeof(ifr))
1096 			break;
1097 		memset(&ifr, 0, sizeof(struct ifreq));
1098 		if (ifa->ifa_label)
1099 			strcpy(ifr.ifr_name, ifa->ifa_label);
1100 		else
1101 			strcpy(ifr.ifr_name, dev->name);
1102 
1103 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1104 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1105 								ifa->ifa_local;
1106 
1107 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1108 			done = -EFAULT;
1109 			break;
1110 		}
1111 		buf  += sizeof(struct ifreq);
1112 		len  -= sizeof(struct ifreq);
1113 		done += sizeof(struct ifreq);
1114 	}
1115 out:
1116 	return done;
1117 }
1118 
1119 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1120 {
1121 	__be32 addr = 0;
1122 	struct in_device *in_dev;
1123 	struct net *net = dev_net(dev);
1124 
1125 	rcu_read_lock();
1126 	in_dev = __in_dev_get_rcu(dev);
1127 	if (!in_dev)
1128 		goto no_in_dev;
1129 
1130 	for_primary_ifa(in_dev) {
1131 		if (ifa->ifa_scope > scope)
1132 			continue;
1133 		if (!dst || inet_ifa_match(dst, ifa)) {
1134 			addr = ifa->ifa_local;
1135 			break;
1136 		}
1137 		if (!addr)
1138 			addr = ifa->ifa_local;
1139 	} endfor_ifa(in_dev);
1140 
1141 	if (addr)
1142 		goto out_unlock;
1143 no_in_dev:
1144 
1145 	/* Not loopback addresses on loopback should be preferred
1146 	   in this case. It is importnat that lo is the first interface
1147 	   in dev_base list.
1148 	 */
1149 	for_each_netdev_rcu(net, dev) {
1150 		in_dev = __in_dev_get_rcu(dev);
1151 		if (!in_dev)
1152 			continue;
1153 
1154 		for_primary_ifa(in_dev) {
1155 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
1156 			    ifa->ifa_scope <= scope) {
1157 				addr = ifa->ifa_local;
1158 				goto out_unlock;
1159 			}
1160 		} endfor_ifa(in_dev);
1161 	}
1162 out_unlock:
1163 	rcu_read_unlock();
1164 	return addr;
1165 }
1166 EXPORT_SYMBOL(inet_select_addr);
1167 
1168 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1169 			      __be32 local, int scope)
1170 {
1171 	int same = 0;
1172 	__be32 addr = 0;
1173 
1174 	for_ifa(in_dev) {
1175 		if (!addr &&
1176 		    (local == ifa->ifa_local || !local) &&
1177 		    ifa->ifa_scope <= scope) {
1178 			addr = ifa->ifa_local;
1179 			if (same)
1180 				break;
1181 		}
1182 		if (!same) {
1183 			same = (!local || inet_ifa_match(local, ifa)) &&
1184 				(!dst || inet_ifa_match(dst, ifa));
1185 			if (same && addr) {
1186 				if (local || !dst)
1187 					break;
1188 				/* Is the selected addr into dst subnet? */
1189 				if (inet_ifa_match(addr, ifa))
1190 					break;
1191 				/* No, then can we use new local src? */
1192 				if (ifa->ifa_scope <= scope) {
1193 					addr = ifa->ifa_local;
1194 					break;
1195 				}
1196 				/* search for large dst subnet for addr */
1197 				same = 0;
1198 			}
1199 		}
1200 	} endfor_ifa(in_dev);
1201 
1202 	return same ? addr : 0;
1203 }
1204 
1205 /*
1206  * Confirm that local IP address exists using wildcards:
1207  * - in_dev: only on this interface, 0=any interface
1208  * - dst: only in the same subnet as dst, 0=any dst
1209  * - local: address, 0=autoselect the local address
1210  * - scope: maximum allowed scope value for the local address
1211  */
1212 __be32 inet_confirm_addr(struct in_device *in_dev,
1213 			 __be32 dst, __be32 local, int scope)
1214 {
1215 	__be32 addr = 0;
1216 	struct net_device *dev;
1217 	struct net *net;
1218 
1219 	if (scope != RT_SCOPE_LINK)
1220 		return confirm_addr_indev(in_dev, dst, local, scope);
1221 
1222 	net = dev_net(in_dev->dev);
1223 	rcu_read_lock();
1224 	for_each_netdev_rcu(net, dev) {
1225 		in_dev = __in_dev_get_rcu(dev);
1226 		if (in_dev) {
1227 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1228 			if (addr)
1229 				break;
1230 		}
1231 	}
1232 	rcu_read_unlock();
1233 
1234 	return addr;
1235 }
1236 EXPORT_SYMBOL(inet_confirm_addr);
1237 
1238 /*
1239  *	Device notifier
1240  */
1241 
1242 int register_inetaddr_notifier(struct notifier_block *nb)
1243 {
1244 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1245 }
1246 EXPORT_SYMBOL(register_inetaddr_notifier);
1247 
1248 int unregister_inetaddr_notifier(struct notifier_block *nb)
1249 {
1250 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1251 }
1252 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1253 
1254 /* Rename ifa_labels for a device name change. Make some effort to preserve
1255  * existing alias numbering and to create unique labels if possible.
1256 */
1257 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1258 {
1259 	struct in_ifaddr *ifa;
1260 	int named = 0;
1261 
1262 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1263 		char old[IFNAMSIZ], *dot;
1264 
1265 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1266 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1267 		if (named++ == 0)
1268 			goto skip;
1269 		dot = strchr(old, ':');
1270 		if (dot == NULL) {
1271 			sprintf(old, ":%d", named);
1272 			dot = old;
1273 		}
1274 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1275 			strcat(ifa->ifa_label, dot);
1276 		else
1277 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1278 skip:
1279 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1280 	}
1281 }
1282 
1283 static bool inetdev_valid_mtu(unsigned int mtu)
1284 {
1285 	return mtu >= 68;
1286 }
1287 
1288 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1289 					struct in_device *in_dev)
1290 
1291 {
1292 	struct in_ifaddr *ifa;
1293 
1294 	for (ifa = in_dev->ifa_list; ifa;
1295 	     ifa = ifa->ifa_next) {
1296 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1297 			 ifa->ifa_local, dev,
1298 			 ifa->ifa_local, NULL,
1299 			 dev->dev_addr, NULL);
1300 	}
1301 }
1302 
1303 /* Called only under RTNL semaphore */
1304 
1305 static int inetdev_event(struct notifier_block *this, unsigned long event,
1306 			 void *ptr)
1307 {
1308 	struct net_device *dev = ptr;
1309 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1310 
1311 	ASSERT_RTNL();
1312 
1313 	if (!in_dev) {
1314 		if (event == NETDEV_REGISTER) {
1315 			in_dev = inetdev_init(dev);
1316 			if (!in_dev)
1317 				return notifier_from_errno(-ENOMEM);
1318 			if (dev->flags & IFF_LOOPBACK) {
1319 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1320 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1321 			}
1322 		} else if (event == NETDEV_CHANGEMTU) {
1323 			/* Re-enabling IP */
1324 			if (inetdev_valid_mtu(dev->mtu))
1325 				in_dev = inetdev_init(dev);
1326 		}
1327 		goto out;
1328 	}
1329 
1330 	switch (event) {
1331 	case NETDEV_REGISTER:
1332 		pr_debug("%s: bug\n", __func__);
1333 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1334 		break;
1335 	case NETDEV_UP:
1336 		if (!inetdev_valid_mtu(dev->mtu))
1337 			break;
1338 		if (dev->flags & IFF_LOOPBACK) {
1339 			struct in_ifaddr *ifa = inet_alloc_ifa();
1340 
1341 			if (ifa) {
1342 				INIT_HLIST_NODE(&ifa->hash);
1343 				ifa->ifa_local =
1344 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1345 				ifa->ifa_prefixlen = 8;
1346 				ifa->ifa_mask = inet_make_mask(8);
1347 				in_dev_hold(in_dev);
1348 				ifa->ifa_dev = in_dev;
1349 				ifa->ifa_scope = RT_SCOPE_HOST;
1350 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1351 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1352 						 INFINITY_LIFE_TIME);
1353 				inet_insert_ifa(ifa);
1354 			}
1355 		}
1356 		ip_mc_up(in_dev);
1357 		/* fall through */
1358 	case NETDEV_CHANGEADDR:
1359 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1360 			break;
1361 		/* fall through */
1362 	case NETDEV_NOTIFY_PEERS:
1363 		/* Send gratuitous ARP to notify of link change */
1364 		inetdev_send_gratuitous_arp(dev, in_dev);
1365 		break;
1366 	case NETDEV_DOWN:
1367 		ip_mc_down(in_dev);
1368 		break;
1369 	case NETDEV_PRE_TYPE_CHANGE:
1370 		ip_mc_unmap(in_dev);
1371 		break;
1372 	case NETDEV_POST_TYPE_CHANGE:
1373 		ip_mc_remap(in_dev);
1374 		break;
1375 	case NETDEV_CHANGEMTU:
1376 		if (inetdev_valid_mtu(dev->mtu))
1377 			break;
1378 		/* disable IP when MTU is not enough */
1379 	case NETDEV_UNREGISTER:
1380 		inetdev_destroy(in_dev);
1381 		break;
1382 	case NETDEV_CHANGENAME:
1383 		/* Do not notify about label change, this event is
1384 		 * not interesting to applications using netlink.
1385 		 */
1386 		inetdev_changename(dev, in_dev);
1387 
1388 		devinet_sysctl_unregister(in_dev);
1389 		devinet_sysctl_register(in_dev);
1390 		break;
1391 	}
1392 out:
1393 	return NOTIFY_DONE;
1394 }
1395 
1396 static struct notifier_block ip_netdev_notifier = {
1397 	.notifier_call = inetdev_event,
1398 };
1399 
1400 static size_t inet_nlmsg_size(void)
1401 {
1402 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1403 	       + nla_total_size(4) /* IFA_ADDRESS */
1404 	       + nla_total_size(4) /* IFA_LOCAL */
1405 	       + nla_total_size(4) /* IFA_BROADCAST */
1406 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1407 }
1408 
1409 static inline u32 cstamp_delta(unsigned long cstamp)
1410 {
1411 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1412 }
1413 
1414 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1415 			 unsigned long tstamp, u32 preferred, u32 valid)
1416 {
1417 	struct ifa_cacheinfo ci;
1418 
1419 	ci.cstamp = cstamp_delta(cstamp);
1420 	ci.tstamp = cstamp_delta(tstamp);
1421 	ci.ifa_prefered = preferred;
1422 	ci.ifa_valid = valid;
1423 
1424 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1425 }
1426 
1427 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1428 			    u32 portid, u32 seq, int event, unsigned int flags)
1429 {
1430 	struct ifaddrmsg *ifm;
1431 	struct nlmsghdr  *nlh;
1432 	u32 preferred, valid;
1433 
1434 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1435 	if (nlh == NULL)
1436 		return -EMSGSIZE;
1437 
1438 	ifm = nlmsg_data(nlh);
1439 	ifm->ifa_family = AF_INET;
1440 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1441 	ifm->ifa_flags = ifa->ifa_flags;
1442 	ifm->ifa_scope = ifa->ifa_scope;
1443 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1444 
1445 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1446 		preferred = ifa->ifa_preferred_lft;
1447 		valid = ifa->ifa_valid_lft;
1448 		if (preferred != INFINITY_LIFE_TIME) {
1449 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1450 
1451 			if (preferred > tval)
1452 				preferred -= tval;
1453 			else
1454 				preferred = 0;
1455 			if (valid != INFINITY_LIFE_TIME) {
1456 				if (valid > tval)
1457 					valid -= tval;
1458 				else
1459 					valid = 0;
1460 			}
1461 		}
1462 	} else {
1463 		preferred = INFINITY_LIFE_TIME;
1464 		valid = INFINITY_LIFE_TIME;
1465 	}
1466 	if ((ifa->ifa_address &&
1467 	     nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1468 	    (ifa->ifa_local &&
1469 	     nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1470 	    (ifa->ifa_broadcast &&
1471 	     nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1472 	    (ifa->ifa_label[0] &&
1473 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1474 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1475 			  preferred, valid))
1476 		goto nla_put_failure;
1477 
1478 	return nlmsg_end(skb, nlh);
1479 
1480 nla_put_failure:
1481 	nlmsg_cancel(skb, nlh);
1482 	return -EMSGSIZE;
1483 }
1484 
1485 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1486 {
1487 	struct net *net = sock_net(skb->sk);
1488 	int h, s_h;
1489 	int idx, s_idx;
1490 	int ip_idx, s_ip_idx;
1491 	struct net_device *dev;
1492 	struct in_device *in_dev;
1493 	struct in_ifaddr *ifa;
1494 	struct hlist_head *head;
1495 
1496 	s_h = cb->args[0];
1497 	s_idx = idx = cb->args[1];
1498 	s_ip_idx = ip_idx = cb->args[2];
1499 
1500 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1501 		idx = 0;
1502 		head = &net->dev_index_head[h];
1503 		rcu_read_lock();
1504 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1505 			if (idx < s_idx)
1506 				goto cont;
1507 			if (h > s_h || idx > s_idx)
1508 				s_ip_idx = 0;
1509 			in_dev = __in_dev_get_rcu(dev);
1510 			if (!in_dev)
1511 				goto cont;
1512 
1513 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1514 			     ifa = ifa->ifa_next, ip_idx++) {
1515 				if (ip_idx < s_ip_idx)
1516 					continue;
1517 				if (inet_fill_ifaddr(skb, ifa,
1518 					     NETLINK_CB(cb->skb).portid,
1519 					     cb->nlh->nlmsg_seq,
1520 					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1521 					rcu_read_unlock();
1522 					goto done;
1523 				}
1524 			}
1525 cont:
1526 			idx++;
1527 		}
1528 		rcu_read_unlock();
1529 	}
1530 
1531 done:
1532 	cb->args[0] = h;
1533 	cb->args[1] = idx;
1534 	cb->args[2] = ip_idx;
1535 
1536 	return skb->len;
1537 }
1538 
1539 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1540 		      u32 portid)
1541 {
1542 	struct sk_buff *skb;
1543 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1544 	int err = -ENOBUFS;
1545 	struct net *net;
1546 
1547 	net = dev_net(ifa->ifa_dev->dev);
1548 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1549 	if (skb == NULL)
1550 		goto errout;
1551 
1552 	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1553 	if (err < 0) {
1554 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1555 		WARN_ON(err == -EMSGSIZE);
1556 		kfree_skb(skb);
1557 		goto errout;
1558 	}
1559 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1560 	return;
1561 errout:
1562 	if (err < 0)
1563 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1564 }
1565 
1566 static size_t inet_get_link_af_size(const struct net_device *dev)
1567 {
1568 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1569 
1570 	if (!in_dev)
1571 		return 0;
1572 
1573 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1574 }
1575 
1576 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1577 {
1578 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1579 	struct nlattr *nla;
1580 	int i;
1581 
1582 	if (!in_dev)
1583 		return -ENODATA;
1584 
1585 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1586 	if (nla == NULL)
1587 		return -EMSGSIZE;
1588 
1589 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1590 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1591 
1592 	return 0;
1593 }
1594 
1595 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1596 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1597 };
1598 
1599 static int inet_validate_link_af(const struct net_device *dev,
1600 				 const struct nlattr *nla)
1601 {
1602 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1603 	int err, rem;
1604 
1605 	if (dev && !__in_dev_get_rtnl(dev))
1606 		return -EAFNOSUPPORT;
1607 
1608 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1609 	if (err < 0)
1610 		return err;
1611 
1612 	if (tb[IFLA_INET_CONF]) {
1613 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1614 			int cfgid = nla_type(a);
1615 
1616 			if (nla_len(a) < 4)
1617 				return -EINVAL;
1618 
1619 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1620 				return -EINVAL;
1621 		}
1622 	}
1623 
1624 	return 0;
1625 }
1626 
1627 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1628 {
1629 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1630 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1631 	int rem;
1632 
1633 	if (!in_dev)
1634 		return -EAFNOSUPPORT;
1635 
1636 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1637 		BUG();
1638 
1639 	if (tb[IFLA_INET_CONF]) {
1640 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1641 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1642 	}
1643 
1644 	return 0;
1645 }
1646 
1647 static int inet_netconf_msgsize_devconf(int type)
1648 {
1649 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1650 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1651 
1652 	/* type -1 is used for ALL */
1653 	if (type == -1 || type == NETCONFA_FORWARDING)
1654 		size += nla_total_size(4);
1655 	if (type == -1 || type == NETCONFA_RP_FILTER)
1656 		size += nla_total_size(4);
1657 	if (type == -1 || type == NETCONFA_MC_FORWARDING)
1658 		size += nla_total_size(4);
1659 
1660 	return size;
1661 }
1662 
1663 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1664 				     struct ipv4_devconf *devconf, u32 portid,
1665 				     u32 seq, int event, unsigned int flags,
1666 				     int type)
1667 {
1668 	struct nlmsghdr  *nlh;
1669 	struct netconfmsg *ncm;
1670 
1671 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1672 			flags);
1673 	if (nlh == NULL)
1674 		return -EMSGSIZE;
1675 
1676 	ncm = nlmsg_data(nlh);
1677 	ncm->ncm_family = AF_INET;
1678 
1679 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1680 		goto nla_put_failure;
1681 
1682 	/* type -1 is used for ALL */
1683 	if ((type == -1 || type == NETCONFA_FORWARDING) &&
1684 	    nla_put_s32(skb, NETCONFA_FORWARDING,
1685 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1686 		goto nla_put_failure;
1687 	if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1688 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1689 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1690 		goto nla_put_failure;
1691 	if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1692 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1693 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1694 		goto nla_put_failure;
1695 
1696 	return nlmsg_end(skb, nlh);
1697 
1698 nla_put_failure:
1699 	nlmsg_cancel(skb, nlh);
1700 	return -EMSGSIZE;
1701 }
1702 
1703 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1704 				 struct ipv4_devconf *devconf)
1705 {
1706 	struct sk_buff *skb;
1707 	int err = -ENOBUFS;
1708 
1709 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1710 	if (skb == NULL)
1711 		goto errout;
1712 
1713 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1714 					RTM_NEWNETCONF, 0, type);
1715 	if (err < 0) {
1716 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1717 		WARN_ON(err == -EMSGSIZE);
1718 		kfree_skb(skb);
1719 		goto errout;
1720 	}
1721 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1722 	return;
1723 errout:
1724 	if (err < 0)
1725 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1726 }
1727 
1728 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1729 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1730 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1731 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1732 };
1733 
1734 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1735 				    struct nlmsghdr *nlh,
1736 				    void *arg)
1737 {
1738 	struct net *net = sock_net(in_skb->sk);
1739 	struct nlattr *tb[NETCONFA_MAX+1];
1740 	struct netconfmsg *ncm;
1741 	struct sk_buff *skb;
1742 	struct ipv4_devconf *devconf;
1743 	struct in_device *in_dev;
1744 	struct net_device *dev;
1745 	int ifindex;
1746 	int err;
1747 
1748 	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1749 			  devconf_ipv4_policy);
1750 	if (err < 0)
1751 		goto errout;
1752 
1753 	err = EINVAL;
1754 	if (!tb[NETCONFA_IFINDEX])
1755 		goto errout;
1756 
1757 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1758 	switch (ifindex) {
1759 	case NETCONFA_IFINDEX_ALL:
1760 		devconf = net->ipv4.devconf_all;
1761 		break;
1762 	case NETCONFA_IFINDEX_DEFAULT:
1763 		devconf = net->ipv4.devconf_dflt;
1764 		break;
1765 	default:
1766 		dev = __dev_get_by_index(net, ifindex);
1767 		if (dev == NULL)
1768 			goto errout;
1769 		in_dev = __in_dev_get_rtnl(dev);
1770 		if (in_dev == NULL)
1771 			goto errout;
1772 		devconf = &in_dev->cnf;
1773 		break;
1774 	}
1775 
1776 	err = -ENOBUFS;
1777 	skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1778 	if (skb == NULL)
1779 		goto errout;
1780 
1781 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1782 					NETLINK_CB(in_skb).portid,
1783 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1784 					-1);
1785 	if (err < 0) {
1786 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1787 		WARN_ON(err == -EMSGSIZE);
1788 		kfree_skb(skb);
1789 		goto errout;
1790 	}
1791 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1792 errout:
1793 	return err;
1794 }
1795 
1796 #ifdef CONFIG_SYSCTL
1797 
1798 static void devinet_copy_dflt_conf(struct net *net, int i)
1799 {
1800 	struct net_device *dev;
1801 
1802 	rcu_read_lock();
1803 	for_each_netdev_rcu(net, dev) {
1804 		struct in_device *in_dev;
1805 
1806 		in_dev = __in_dev_get_rcu(dev);
1807 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1808 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1809 	}
1810 	rcu_read_unlock();
1811 }
1812 
1813 /* called with RTNL locked */
1814 static void inet_forward_change(struct net *net)
1815 {
1816 	struct net_device *dev;
1817 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1818 
1819 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1820 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1821 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1822 				    NETCONFA_IFINDEX_ALL,
1823 				    net->ipv4.devconf_all);
1824 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1825 				    NETCONFA_IFINDEX_DEFAULT,
1826 				    net->ipv4.devconf_dflt);
1827 
1828 	for_each_netdev(net, dev) {
1829 		struct in_device *in_dev;
1830 		if (on)
1831 			dev_disable_lro(dev);
1832 		rcu_read_lock();
1833 		in_dev = __in_dev_get_rcu(dev);
1834 		if (in_dev) {
1835 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1836 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1837 						    dev->ifindex, &in_dev->cnf);
1838 		}
1839 		rcu_read_unlock();
1840 	}
1841 }
1842 
1843 static int devinet_conf_proc(ctl_table *ctl, int write,
1844 			     void __user *buffer,
1845 			     size_t *lenp, loff_t *ppos)
1846 {
1847 	int old_value = *(int *)ctl->data;
1848 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1849 	int new_value = *(int *)ctl->data;
1850 
1851 	if (write) {
1852 		struct ipv4_devconf *cnf = ctl->extra1;
1853 		struct net *net = ctl->extra2;
1854 		int i = (int *)ctl->data - cnf->data;
1855 
1856 		set_bit(i, cnf->state);
1857 
1858 		if (cnf == net->ipv4.devconf_dflt)
1859 			devinet_copy_dflt_conf(net, i);
1860 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1861 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1862 			if ((new_value == 0) && (old_value != 0))
1863 				rt_cache_flush(net);
1864 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1865 		    new_value != old_value) {
1866 			int ifindex;
1867 
1868 			if (cnf == net->ipv4.devconf_dflt)
1869 				ifindex = NETCONFA_IFINDEX_DEFAULT;
1870 			else if (cnf == net->ipv4.devconf_all)
1871 				ifindex = NETCONFA_IFINDEX_ALL;
1872 			else {
1873 				struct in_device *idev =
1874 					container_of(cnf, struct in_device,
1875 						     cnf);
1876 				ifindex = idev->dev->ifindex;
1877 			}
1878 			inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1879 						    ifindex, cnf);
1880 		}
1881 	}
1882 
1883 	return ret;
1884 }
1885 
1886 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1887 				  void __user *buffer,
1888 				  size_t *lenp, loff_t *ppos)
1889 {
1890 	int *valp = ctl->data;
1891 	int val = *valp;
1892 	loff_t pos = *ppos;
1893 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1894 
1895 	if (write && *valp != val) {
1896 		struct net *net = ctl->extra2;
1897 
1898 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1899 			if (!rtnl_trylock()) {
1900 				/* Restore the original values before restarting */
1901 				*valp = val;
1902 				*ppos = pos;
1903 				return restart_syscall();
1904 			}
1905 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1906 				inet_forward_change(net);
1907 			} else {
1908 				struct ipv4_devconf *cnf = ctl->extra1;
1909 				struct in_device *idev =
1910 					container_of(cnf, struct in_device, cnf);
1911 				if (*valp)
1912 					dev_disable_lro(idev->dev);
1913 				inet_netconf_notify_devconf(net,
1914 							    NETCONFA_FORWARDING,
1915 							    idev->dev->ifindex,
1916 							    cnf);
1917 			}
1918 			rtnl_unlock();
1919 			rt_cache_flush(net);
1920 		} else
1921 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1922 						    NETCONFA_IFINDEX_DEFAULT,
1923 						    net->ipv4.devconf_dflt);
1924 	}
1925 
1926 	return ret;
1927 }
1928 
1929 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1930 				void __user *buffer,
1931 				size_t *lenp, loff_t *ppos)
1932 {
1933 	int *valp = ctl->data;
1934 	int val = *valp;
1935 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1936 	struct net *net = ctl->extra2;
1937 
1938 	if (write && *valp != val)
1939 		rt_cache_flush(net);
1940 
1941 	return ret;
1942 }
1943 
1944 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1945 	{ \
1946 		.procname	= name, \
1947 		.data		= ipv4_devconf.data + \
1948 				  IPV4_DEVCONF_ ## attr - 1, \
1949 		.maxlen		= sizeof(int), \
1950 		.mode		= mval, \
1951 		.proc_handler	= proc, \
1952 		.extra1		= &ipv4_devconf, \
1953 	}
1954 
1955 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1956 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1957 
1958 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1959 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1960 
1961 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1962 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1963 
1964 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1965 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1966 
1967 static struct devinet_sysctl_table {
1968 	struct ctl_table_header *sysctl_header;
1969 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1970 } devinet_sysctl = {
1971 	.devinet_vars = {
1972 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1973 					     devinet_sysctl_forward),
1974 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1975 
1976 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1977 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1978 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1979 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1980 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1981 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1982 					"accept_source_route"),
1983 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1984 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1985 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1986 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1987 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1988 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1989 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1990 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1991 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1992 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1993 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1994 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1995 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1996 
1997 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1998 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1999 		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
2000 					      "force_igmp_version"),
2001 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2002 					      "promote_secondaries"),
2003 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2004 					      "route_localnet"),
2005 	},
2006 };
2007 
2008 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2009 					struct ipv4_devconf *p)
2010 {
2011 	int i;
2012 	struct devinet_sysctl_table *t;
2013 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2014 
2015 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2016 	if (!t)
2017 		goto out;
2018 
2019 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2020 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2021 		t->devinet_vars[i].extra1 = p;
2022 		t->devinet_vars[i].extra2 = net;
2023 	}
2024 
2025 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2026 
2027 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2028 	if (!t->sysctl_header)
2029 		goto free;
2030 
2031 	p->sysctl = t;
2032 	return 0;
2033 
2034 free:
2035 	kfree(t);
2036 out:
2037 	return -ENOBUFS;
2038 }
2039 
2040 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2041 {
2042 	struct devinet_sysctl_table *t = cnf->sysctl;
2043 
2044 	if (t == NULL)
2045 		return;
2046 
2047 	cnf->sysctl = NULL;
2048 	unregister_net_sysctl_table(t->sysctl_header);
2049 	kfree(t);
2050 }
2051 
2052 static void devinet_sysctl_register(struct in_device *idev)
2053 {
2054 	neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
2055 	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2056 					&idev->cnf);
2057 }
2058 
2059 static void devinet_sysctl_unregister(struct in_device *idev)
2060 {
2061 	__devinet_sysctl_unregister(&idev->cnf);
2062 	neigh_sysctl_unregister(idev->arp_parms);
2063 }
2064 
2065 static struct ctl_table ctl_forward_entry[] = {
2066 	{
2067 		.procname	= "ip_forward",
2068 		.data		= &ipv4_devconf.data[
2069 					IPV4_DEVCONF_FORWARDING - 1],
2070 		.maxlen		= sizeof(int),
2071 		.mode		= 0644,
2072 		.proc_handler	= devinet_sysctl_forward,
2073 		.extra1		= &ipv4_devconf,
2074 		.extra2		= &init_net,
2075 	},
2076 	{ },
2077 };
2078 #endif
2079 
2080 static __net_init int devinet_init_net(struct net *net)
2081 {
2082 	int err;
2083 	struct ipv4_devconf *all, *dflt;
2084 #ifdef CONFIG_SYSCTL
2085 	struct ctl_table *tbl = ctl_forward_entry;
2086 	struct ctl_table_header *forw_hdr;
2087 #endif
2088 
2089 	err = -ENOMEM;
2090 	all = &ipv4_devconf;
2091 	dflt = &ipv4_devconf_dflt;
2092 
2093 	if (!net_eq(net, &init_net)) {
2094 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2095 		if (all == NULL)
2096 			goto err_alloc_all;
2097 
2098 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2099 		if (dflt == NULL)
2100 			goto err_alloc_dflt;
2101 
2102 #ifdef CONFIG_SYSCTL
2103 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2104 		if (tbl == NULL)
2105 			goto err_alloc_ctl;
2106 
2107 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2108 		tbl[0].extra1 = all;
2109 		tbl[0].extra2 = net;
2110 #endif
2111 	}
2112 
2113 #ifdef CONFIG_SYSCTL
2114 	err = __devinet_sysctl_register(net, "all", all);
2115 	if (err < 0)
2116 		goto err_reg_all;
2117 
2118 	err = __devinet_sysctl_register(net, "default", dflt);
2119 	if (err < 0)
2120 		goto err_reg_dflt;
2121 
2122 	err = -ENOMEM;
2123 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2124 	if (forw_hdr == NULL)
2125 		goto err_reg_ctl;
2126 	net->ipv4.forw_hdr = forw_hdr;
2127 #endif
2128 
2129 	net->ipv4.devconf_all = all;
2130 	net->ipv4.devconf_dflt = dflt;
2131 	return 0;
2132 
2133 #ifdef CONFIG_SYSCTL
2134 err_reg_ctl:
2135 	__devinet_sysctl_unregister(dflt);
2136 err_reg_dflt:
2137 	__devinet_sysctl_unregister(all);
2138 err_reg_all:
2139 	if (tbl != ctl_forward_entry)
2140 		kfree(tbl);
2141 err_alloc_ctl:
2142 #endif
2143 	if (dflt != &ipv4_devconf_dflt)
2144 		kfree(dflt);
2145 err_alloc_dflt:
2146 	if (all != &ipv4_devconf)
2147 		kfree(all);
2148 err_alloc_all:
2149 	return err;
2150 }
2151 
2152 static __net_exit void devinet_exit_net(struct net *net)
2153 {
2154 #ifdef CONFIG_SYSCTL
2155 	struct ctl_table *tbl;
2156 
2157 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2158 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2159 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2160 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
2161 	kfree(tbl);
2162 #endif
2163 	kfree(net->ipv4.devconf_dflt);
2164 	kfree(net->ipv4.devconf_all);
2165 }
2166 
2167 static __net_initdata struct pernet_operations devinet_ops = {
2168 	.init = devinet_init_net,
2169 	.exit = devinet_exit_net,
2170 };
2171 
2172 static struct rtnl_af_ops inet_af_ops = {
2173 	.family		  = AF_INET,
2174 	.fill_link_af	  = inet_fill_link_af,
2175 	.get_link_af_size = inet_get_link_af_size,
2176 	.validate_link_af = inet_validate_link_af,
2177 	.set_link_af	  = inet_set_link_af,
2178 };
2179 
2180 void __init devinet_init(void)
2181 {
2182 	int i;
2183 
2184 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2185 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2186 
2187 	register_pernet_subsys(&devinet_ops);
2188 
2189 	register_gifconf(PF_INET, inet_gifconf);
2190 	register_netdevice_notifier(&ip_netdev_notifier);
2191 
2192 	schedule_delayed_work(&check_lifetime_work, 0);
2193 
2194 	rtnl_af_register(&inet_af_ops);
2195 
2196 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2197 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2198 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2199 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2200 		      NULL, NULL);
2201 }
2202 
2203