xref: /linux/net/ipv4/devinet.c (revision e5c5d22e8dcf7c2d430336cbf8e180bd38e8daf1)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59 
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67 
68 #include "fib_lookup.h"
69 
70 static struct ipv4_devconf ipv4_devconf = {
71 	.data = {
72 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 	},
77 };
78 
79 static struct ipv4_devconf ipv4_devconf_dflt = {
80 	.data = {
81 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
82 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
83 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
84 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
85 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
86 	},
87 };
88 
89 #define IPV4_DEVCONF_DFLT(net, attr) \
90 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
91 
92 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
93 	[IFA_LOCAL]     	= { .type = NLA_U32 },
94 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
95 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
96 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
97 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
98 };
99 
100 #define IN4_ADDR_HSIZE_SHIFT	8
101 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
102 
103 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
104 static DEFINE_SPINLOCK(inet_addr_hash_lock);
105 
106 static u32 inet_addr_hash(struct net *net, __be32 addr)
107 {
108 	u32 val = (__force u32) addr ^ net_hash_mix(net);
109 
110 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
111 }
112 
113 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
114 {
115 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
116 
117 	spin_lock(&inet_addr_hash_lock);
118 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
119 	spin_unlock(&inet_addr_hash_lock);
120 }
121 
122 static void inet_hash_remove(struct in_ifaddr *ifa)
123 {
124 	spin_lock(&inet_addr_hash_lock);
125 	hlist_del_init_rcu(&ifa->hash);
126 	spin_unlock(&inet_addr_hash_lock);
127 }
128 
129 /**
130  * __ip_dev_find - find the first device with a given source address.
131  * @net: the net namespace
132  * @addr: the source address
133  * @devref: if true, take a reference on the found device
134  *
135  * If a caller uses devref=false, it should be protected by RCU, or RTNL
136  */
137 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
138 {
139 	u32 hash = inet_addr_hash(net, addr);
140 	struct net_device *result = NULL;
141 	struct in_ifaddr *ifa;
142 
143 	rcu_read_lock();
144 	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
145 		if (ifa->ifa_local == addr) {
146 			struct net_device *dev = ifa->ifa_dev->dev;
147 
148 			if (!net_eq(dev_net(dev), net))
149 				continue;
150 			result = dev;
151 			break;
152 		}
153 	}
154 	if (!result) {
155 		struct flowi4 fl4 = { .daddr = addr };
156 		struct fib_result res = { 0 };
157 		struct fib_table *local;
158 
159 		/* Fallback to FIB local table so that communication
160 		 * over loopback subnets work.
161 		 */
162 		local = fib_get_table(net, RT_TABLE_LOCAL);
163 		if (local &&
164 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
165 		    res.type == RTN_LOCAL)
166 			result = FIB_RES_DEV(res);
167 	}
168 	if (result && devref)
169 		dev_hold(result);
170 	rcu_read_unlock();
171 	return result;
172 }
173 EXPORT_SYMBOL(__ip_dev_find);
174 
175 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
176 
177 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
178 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
179 			 int destroy);
180 #ifdef CONFIG_SYSCTL
181 static void devinet_sysctl_register(struct in_device *idev);
182 static void devinet_sysctl_unregister(struct in_device *idev);
183 #else
184 static void devinet_sysctl_register(struct in_device *idev)
185 {
186 }
187 static void devinet_sysctl_unregister(struct in_device *idev)
188 {
189 }
190 #endif
191 
192 /* Locks all the inet devices. */
193 
194 static struct in_ifaddr *inet_alloc_ifa(void)
195 {
196 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
197 }
198 
199 static void inet_rcu_free_ifa(struct rcu_head *head)
200 {
201 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
202 	if (ifa->ifa_dev)
203 		in_dev_put(ifa->ifa_dev);
204 	kfree(ifa);
205 }
206 
207 static void inet_free_ifa(struct in_ifaddr *ifa)
208 {
209 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
210 }
211 
212 void in_dev_finish_destroy(struct in_device *idev)
213 {
214 	struct net_device *dev = idev->dev;
215 
216 	WARN_ON(idev->ifa_list);
217 	WARN_ON(idev->mc_list);
218 #ifdef NET_REFCNT_DEBUG
219 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
220 #endif
221 	dev_put(dev);
222 	if (!idev->dead)
223 		pr_err("Freeing alive in_device %p\n", idev);
224 	else
225 		kfree(idev);
226 }
227 EXPORT_SYMBOL(in_dev_finish_destroy);
228 
229 static struct in_device *inetdev_init(struct net_device *dev)
230 {
231 	struct in_device *in_dev;
232 
233 	ASSERT_RTNL();
234 
235 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
236 	if (!in_dev)
237 		goto out;
238 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
239 			sizeof(in_dev->cnf));
240 	in_dev->cnf.sysctl = NULL;
241 	in_dev->dev = dev;
242 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
243 	if (!in_dev->arp_parms)
244 		goto out_kfree;
245 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
246 		dev_disable_lro(dev);
247 	/* Reference in_dev->dev */
248 	dev_hold(dev);
249 	/* Account for reference dev->ip_ptr (below) */
250 	in_dev_hold(in_dev);
251 
252 	devinet_sysctl_register(in_dev);
253 	ip_mc_init_dev(in_dev);
254 	if (dev->flags & IFF_UP)
255 		ip_mc_up(in_dev);
256 
257 	/* we can receive as soon as ip_ptr is set -- do this last */
258 	rcu_assign_pointer(dev->ip_ptr, in_dev);
259 out:
260 	return in_dev;
261 out_kfree:
262 	kfree(in_dev);
263 	in_dev = NULL;
264 	goto out;
265 }
266 
267 static void in_dev_rcu_put(struct rcu_head *head)
268 {
269 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
270 	in_dev_put(idev);
271 }
272 
273 static void inetdev_destroy(struct in_device *in_dev)
274 {
275 	struct in_ifaddr *ifa;
276 	struct net_device *dev;
277 
278 	ASSERT_RTNL();
279 
280 	dev = in_dev->dev;
281 
282 	in_dev->dead = 1;
283 
284 	ip_mc_destroy_dev(in_dev);
285 
286 	while ((ifa = in_dev->ifa_list) != NULL) {
287 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
288 		inet_free_ifa(ifa);
289 	}
290 
291 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
292 
293 	devinet_sysctl_unregister(in_dev);
294 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
295 	arp_ifdown(dev);
296 
297 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
298 }
299 
300 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
301 {
302 	rcu_read_lock();
303 	for_primary_ifa(in_dev) {
304 		if (inet_ifa_match(a, ifa)) {
305 			if (!b || inet_ifa_match(b, ifa)) {
306 				rcu_read_unlock();
307 				return 1;
308 			}
309 		}
310 	} endfor_ifa(in_dev);
311 	rcu_read_unlock();
312 	return 0;
313 }
314 
315 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
316 			 int destroy, struct nlmsghdr *nlh, u32 portid)
317 {
318 	struct in_ifaddr *promote = NULL;
319 	struct in_ifaddr *ifa, *ifa1 = *ifap;
320 	struct in_ifaddr *last_prim = in_dev->ifa_list;
321 	struct in_ifaddr *prev_prom = NULL;
322 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
323 
324 	ASSERT_RTNL();
325 
326 	/* 1. Deleting primary ifaddr forces deletion all secondaries
327 	 * unless alias promotion is set
328 	 **/
329 
330 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
331 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
332 
333 		while ((ifa = *ifap1) != NULL) {
334 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
335 			    ifa1->ifa_scope <= ifa->ifa_scope)
336 				last_prim = ifa;
337 
338 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
339 			    ifa1->ifa_mask != ifa->ifa_mask ||
340 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
341 				ifap1 = &ifa->ifa_next;
342 				prev_prom = ifa;
343 				continue;
344 			}
345 
346 			if (!do_promote) {
347 				inet_hash_remove(ifa);
348 				*ifap1 = ifa->ifa_next;
349 
350 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
351 				blocking_notifier_call_chain(&inetaddr_chain,
352 						NETDEV_DOWN, ifa);
353 				inet_free_ifa(ifa);
354 			} else {
355 				promote = ifa;
356 				break;
357 			}
358 		}
359 	}
360 
361 	/* On promotion all secondaries from subnet are changing
362 	 * the primary IP, we must remove all their routes silently
363 	 * and later to add them back with new prefsrc. Do this
364 	 * while all addresses are on the device list.
365 	 */
366 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
367 		if (ifa1->ifa_mask == ifa->ifa_mask &&
368 		    inet_ifa_match(ifa1->ifa_address, ifa))
369 			fib_del_ifaddr(ifa, ifa1);
370 	}
371 
372 	/* 2. Unlink it */
373 
374 	*ifap = ifa1->ifa_next;
375 	inet_hash_remove(ifa1);
376 
377 	/* 3. Announce address deletion */
378 
379 	/* Send message first, then call notifier.
380 	   At first sight, FIB update triggered by notifier
381 	   will refer to already deleted ifaddr, that could confuse
382 	   netlink listeners. It is not true: look, gated sees
383 	   that route deleted and if it still thinks that ifaddr
384 	   is valid, it will try to restore deleted routes... Grr.
385 	   So that, this order is correct.
386 	 */
387 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
388 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
389 
390 	if (promote) {
391 		struct in_ifaddr *next_sec = promote->ifa_next;
392 
393 		if (prev_prom) {
394 			prev_prom->ifa_next = promote->ifa_next;
395 			promote->ifa_next = last_prim->ifa_next;
396 			last_prim->ifa_next = promote;
397 		}
398 
399 		promote->ifa_flags &= ~IFA_F_SECONDARY;
400 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
401 		blocking_notifier_call_chain(&inetaddr_chain,
402 				NETDEV_UP, promote);
403 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
404 			if (ifa1->ifa_mask != ifa->ifa_mask ||
405 			    !inet_ifa_match(ifa1->ifa_address, ifa))
406 					continue;
407 			fib_add_ifaddr(ifa);
408 		}
409 
410 	}
411 	if (destroy)
412 		inet_free_ifa(ifa1);
413 }
414 
415 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
416 			 int destroy)
417 {
418 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
419 }
420 
421 static void check_lifetime(struct work_struct *work);
422 
423 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
424 
425 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
426 			     u32 portid)
427 {
428 	struct in_device *in_dev = ifa->ifa_dev;
429 	struct in_ifaddr *ifa1, **ifap, **last_primary;
430 
431 	ASSERT_RTNL();
432 
433 	if (!ifa->ifa_local) {
434 		inet_free_ifa(ifa);
435 		return 0;
436 	}
437 
438 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
439 	last_primary = &in_dev->ifa_list;
440 
441 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
442 	     ifap = &ifa1->ifa_next) {
443 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
444 		    ifa->ifa_scope <= ifa1->ifa_scope)
445 			last_primary = &ifa1->ifa_next;
446 		if (ifa1->ifa_mask == ifa->ifa_mask &&
447 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
448 			if (ifa1->ifa_local == ifa->ifa_local) {
449 				inet_free_ifa(ifa);
450 				return -EEXIST;
451 			}
452 			if (ifa1->ifa_scope != ifa->ifa_scope) {
453 				inet_free_ifa(ifa);
454 				return -EINVAL;
455 			}
456 			ifa->ifa_flags |= IFA_F_SECONDARY;
457 		}
458 	}
459 
460 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
461 		net_srandom(ifa->ifa_local);
462 		ifap = last_primary;
463 	}
464 
465 	ifa->ifa_next = *ifap;
466 	*ifap = ifa;
467 
468 	inet_hash_insert(dev_net(in_dev->dev), ifa);
469 
470 	cancel_delayed_work(&check_lifetime_work);
471 	schedule_delayed_work(&check_lifetime_work, 0);
472 
473 	/* Send message first, then call notifier.
474 	   Notifier will trigger FIB update, so that
475 	   listeners of netlink will know about new ifaddr */
476 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
477 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
478 
479 	return 0;
480 }
481 
482 static int inet_insert_ifa(struct in_ifaddr *ifa)
483 {
484 	return __inet_insert_ifa(ifa, NULL, 0);
485 }
486 
487 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
488 {
489 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
490 
491 	ASSERT_RTNL();
492 
493 	if (!in_dev) {
494 		inet_free_ifa(ifa);
495 		return -ENOBUFS;
496 	}
497 	ipv4_devconf_setall(in_dev);
498 	if (ifa->ifa_dev != in_dev) {
499 		WARN_ON(ifa->ifa_dev);
500 		in_dev_hold(in_dev);
501 		ifa->ifa_dev = in_dev;
502 	}
503 	if (ipv4_is_loopback(ifa->ifa_local))
504 		ifa->ifa_scope = RT_SCOPE_HOST;
505 	return inet_insert_ifa(ifa);
506 }
507 
508 /* Caller must hold RCU or RTNL :
509  * We dont take a reference on found in_device
510  */
511 struct in_device *inetdev_by_index(struct net *net, int ifindex)
512 {
513 	struct net_device *dev;
514 	struct in_device *in_dev = NULL;
515 
516 	rcu_read_lock();
517 	dev = dev_get_by_index_rcu(net, ifindex);
518 	if (dev)
519 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
520 	rcu_read_unlock();
521 	return in_dev;
522 }
523 EXPORT_SYMBOL(inetdev_by_index);
524 
525 /* Called only from RTNL semaphored context. No locks. */
526 
527 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
528 				    __be32 mask)
529 {
530 	ASSERT_RTNL();
531 
532 	for_primary_ifa(in_dev) {
533 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
534 			return ifa;
535 	} endfor_ifa(in_dev);
536 	return NULL;
537 }
538 
539 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
540 {
541 	struct net *net = sock_net(skb->sk);
542 	struct nlattr *tb[IFA_MAX+1];
543 	struct in_device *in_dev;
544 	struct ifaddrmsg *ifm;
545 	struct in_ifaddr *ifa, **ifap;
546 	int err = -EINVAL;
547 
548 	ASSERT_RTNL();
549 
550 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
551 	if (err < 0)
552 		goto errout;
553 
554 	ifm = nlmsg_data(nlh);
555 	in_dev = inetdev_by_index(net, ifm->ifa_index);
556 	if (in_dev == NULL) {
557 		err = -ENODEV;
558 		goto errout;
559 	}
560 
561 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
562 	     ifap = &ifa->ifa_next) {
563 		if (tb[IFA_LOCAL] &&
564 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
565 			continue;
566 
567 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
568 			continue;
569 
570 		if (tb[IFA_ADDRESS] &&
571 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
572 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
573 			continue;
574 
575 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
576 		return 0;
577 	}
578 
579 	err = -EADDRNOTAVAIL;
580 errout:
581 	return err;
582 }
583 
584 #define INFINITY_LIFE_TIME	0xFFFFFFFF
585 
586 static void check_lifetime(struct work_struct *work)
587 {
588 	unsigned long now, next, next_sec, next_sched;
589 	struct in_ifaddr *ifa;
590 	int i;
591 
592 	now = jiffies;
593 	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
594 
595 	rcu_read_lock();
596 	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
597 		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
598 			unsigned long age;
599 
600 			if (ifa->ifa_flags & IFA_F_PERMANENT)
601 				continue;
602 
603 			/* We try to batch several events at once. */
604 			age = (now - ifa->ifa_tstamp +
605 			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
606 
607 			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
608 			    age >= ifa->ifa_valid_lft) {
609 				struct in_ifaddr **ifap ;
610 
611 				rtnl_lock();
612 				for (ifap = &ifa->ifa_dev->ifa_list;
613 				     *ifap != NULL; ifap = &ifa->ifa_next) {
614 					if (*ifap == ifa)
615 						inet_del_ifa(ifa->ifa_dev,
616 							     ifap, 1);
617 				}
618 				rtnl_unlock();
619 			} else if (ifa->ifa_preferred_lft ==
620 				   INFINITY_LIFE_TIME) {
621 				continue;
622 			} else if (age >= ifa->ifa_preferred_lft) {
623 				if (time_before(ifa->ifa_tstamp +
624 						ifa->ifa_valid_lft * HZ, next))
625 					next = ifa->ifa_tstamp +
626 					       ifa->ifa_valid_lft * HZ;
627 
628 				if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) {
629 					ifa->ifa_flags |= IFA_F_DEPRECATED;
630 					rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
631 				}
632 			} else if (time_before(ifa->ifa_tstamp +
633 					       ifa->ifa_preferred_lft * HZ,
634 					       next)) {
635 				next = ifa->ifa_tstamp +
636 				       ifa->ifa_preferred_lft * HZ;
637 			}
638 		}
639 	}
640 	rcu_read_unlock();
641 
642 	next_sec = round_jiffies_up(next);
643 	next_sched = next;
644 
645 	/* If rounded timeout is accurate enough, accept it. */
646 	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
647 		next_sched = next_sec;
648 
649 	now = jiffies;
650 	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
651 	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
652 		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
653 
654 	schedule_delayed_work(&check_lifetime_work, next_sched - now);
655 }
656 
657 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
658 			     __u32 prefered_lft)
659 {
660 	unsigned long timeout;
661 
662 	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
663 
664 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
665 	if (addrconf_finite_timeout(timeout))
666 		ifa->ifa_valid_lft = timeout;
667 	else
668 		ifa->ifa_flags |= IFA_F_PERMANENT;
669 
670 	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
671 	if (addrconf_finite_timeout(timeout)) {
672 		if (timeout == 0)
673 			ifa->ifa_flags |= IFA_F_DEPRECATED;
674 		ifa->ifa_preferred_lft = timeout;
675 	}
676 	ifa->ifa_tstamp = jiffies;
677 	if (!ifa->ifa_cstamp)
678 		ifa->ifa_cstamp = ifa->ifa_tstamp;
679 }
680 
681 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
682 				       __u32 *pvalid_lft, __u32 *pprefered_lft)
683 {
684 	struct nlattr *tb[IFA_MAX+1];
685 	struct in_ifaddr *ifa;
686 	struct ifaddrmsg *ifm;
687 	struct net_device *dev;
688 	struct in_device *in_dev;
689 	int err;
690 
691 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
692 	if (err < 0)
693 		goto errout;
694 
695 	ifm = nlmsg_data(nlh);
696 	err = -EINVAL;
697 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
698 		goto errout;
699 
700 	dev = __dev_get_by_index(net, ifm->ifa_index);
701 	err = -ENODEV;
702 	if (dev == NULL)
703 		goto errout;
704 
705 	in_dev = __in_dev_get_rtnl(dev);
706 	err = -ENOBUFS;
707 	if (in_dev == NULL)
708 		goto errout;
709 
710 	ifa = inet_alloc_ifa();
711 	if (ifa == NULL)
712 		/*
713 		 * A potential indev allocation can be left alive, it stays
714 		 * assigned to its device and is destroy with it.
715 		 */
716 		goto errout;
717 
718 	ipv4_devconf_setall(in_dev);
719 	in_dev_hold(in_dev);
720 
721 	if (tb[IFA_ADDRESS] == NULL)
722 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
723 
724 	INIT_HLIST_NODE(&ifa->hash);
725 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
726 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
727 	ifa->ifa_flags = ifm->ifa_flags;
728 	ifa->ifa_scope = ifm->ifa_scope;
729 	ifa->ifa_dev = in_dev;
730 
731 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
732 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
733 
734 	if (tb[IFA_BROADCAST])
735 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
736 
737 	if (tb[IFA_LABEL])
738 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
739 	else
740 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
741 
742 	if (tb[IFA_CACHEINFO]) {
743 		struct ifa_cacheinfo *ci;
744 
745 		ci = nla_data(tb[IFA_CACHEINFO]);
746 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
747 			err = -EINVAL;
748 			goto errout;
749 		}
750 		*pvalid_lft = ci->ifa_valid;
751 		*pprefered_lft = ci->ifa_prefered;
752 	}
753 
754 	return ifa;
755 
756 errout:
757 	return ERR_PTR(err);
758 }
759 
760 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
761 {
762 	struct in_device *in_dev = ifa->ifa_dev;
763 	struct in_ifaddr *ifa1, **ifap;
764 
765 	if (!ifa->ifa_local)
766 		return NULL;
767 
768 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
769 	     ifap = &ifa1->ifa_next) {
770 		if (ifa1->ifa_mask == ifa->ifa_mask &&
771 		    inet_ifa_match(ifa1->ifa_address, ifa) &&
772 		    ifa1->ifa_local == ifa->ifa_local)
773 			return ifa1;
774 	}
775 	return NULL;
776 }
777 
778 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
779 {
780 	struct net *net = sock_net(skb->sk);
781 	struct in_ifaddr *ifa;
782 	struct in_ifaddr *ifa_existing;
783 	__u32 valid_lft = INFINITY_LIFE_TIME;
784 	__u32 prefered_lft = INFINITY_LIFE_TIME;
785 
786 	ASSERT_RTNL();
787 
788 	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
789 	if (IS_ERR(ifa))
790 		return PTR_ERR(ifa);
791 
792 	ifa_existing = find_matching_ifa(ifa);
793 	if (!ifa_existing) {
794 		/* It would be best to check for !NLM_F_CREATE here but
795 		 * userspace alreay relies on not having to provide this.
796 		 */
797 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
798 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
799 	} else {
800 		inet_free_ifa(ifa);
801 
802 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
803 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
804 			return -EEXIST;
805 
806 		set_ifa_lifetime(ifa_existing, valid_lft, prefered_lft);
807 	}
808 	return 0;
809 }
810 
811 /*
812  *	Determine a default network mask, based on the IP address.
813  */
814 
815 static int inet_abc_len(__be32 addr)
816 {
817 	int rc = -1;	/* Something else, probably a multicast. */
818 
819 	if (ipv4_is_zeronet(addr))
820 		rc = 0;
821 	else {
822 		__u32 haddr = ntohl(addr);
823 
824 		if (IN_CLASSA(haddr))
825 			rc = 8;
826 		else if (IN_CLASSB(haddr))
827 			rc = 16;
828 		else if (IN_CLASSC(haddr))
829 			rc = 24;
830 	}
831 
832 	return rc;
833 }
834 
835 
836 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
837 {
838 	struct ifreq ifr;
839 	struct sockaddr_in sin_orig;
840 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
841 	struct in_device *in_dev;
842 	struct in_ifaddr **ifap = NULL;
843 	struct in_ifaddr *ifa = NULL;
844 	struct net_device *dev;
845 	char *colon;
846 	int ret = -EFAULT;
847 	int tryaddrmatch = 0;
848 
849 	/*
850 	 *	Fetch the caller's info block into kernel space
851 	 */
852 
853 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
854 		goto out;
855 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
856 
857 	/* save original address for comparison */
858 	memcpy(&sin_orig, sin, sizeof(*sin));
859 
860 	colon = strchr(ifr.ifr_name, ':');
861 	if (colon)
862 		*colon = 0;
863 
864 	dev_load(net, ifr.ifr_name);
865 
866 	switch (cmd) {
867 	case SIOCGIFADDR:	/* Get interface address */
868 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
869 	case SIOCGIFDSTADDR:	/* Get the destination address */
870 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
871 		/* Note that these ioctls will not sleep,
872 		   so that we do not impose a lock.
873 		   One day we will be forced to put shlock here (I mean SMP)
874 		 */
875 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
876 		memset(sin, 0, sizeof(*sin));
877 		sin->sin_family = AF_INET;
878 		break;
879 
880 	case SIOCSIFFLAGS:
881 		ret = -EPERM;
882 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
883 			goto out;
884 		break;
885 	case SIOCSIFADDR:	/* Set interface address (and family) */
886 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
887 	case SIOCSIFDSTADDR:	/* Set the destination address */
888 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
889 		ret = -EPERM;
890 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
891 			goto out;
892 		ret = -EINVAL;
893 		if (sin->sin_family != AF_INET)
894 			goto out;
895 		break;
896 	default:
897 		ret = -EINVAL;
898 		goto out;
899 	}
900 
901 	rtnl_lock();
902 
903 	ret = -ENODEV;
904 	dev = __dev_get_by_name(net, ifr.ifr_name);
905 	if (!dev)
906 		goto done;
907 
908 	if (colon)
909 		*colon = ':';
910 
911 	in_dev = __in_dev_get_rtnl(dev);
912 	if (in_dev) {
913 		if (tryaddrmatch) {
914 			/* Matthias Andree */
915 			/* compare label and address (4.4BSD style) */
916 			/* note: we only do this for a limited set of ioctls
917 			   and only if the original address family was AF_INET.
918 			   This is checked above. */
919 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
920 			     ifap = &ifa->ifa_next) {
921 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
922 				    sin_orig.sin_addr.s_addr ==
923 							ifa->ifa_local) {
924 					break; /* found */
925 				}
926 			}
927 		}
928 		/* we didn't get a match, maybe the application is
929 		   4.3BSD-style and passed in junk so we fall back to
930 		   comparing just the label */
931 		if (!ifa) {
932 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
933 			     ifap = &ifa->ifa_next)
934 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
935 					break;
936 		}
937 	}
938 
939 	ret = -EADDRNOTAVAIL;
940 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
941 		goto done;
942 
943 	switch (cmd) {
944 	case SIOCGIFADDR:	/* Get interface address */
945 		sin->sin_addr.s_addr = ifa->ifa_local;
946 		goto rarok;
947 
948 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
949 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
950 		goto rarok;
951 
952 	case SIOCGIFDSTADDR:	/* Get the destination address */
953 		sin->sin_addr.s_addr = ifa->ifa_address;
954 		goto rarok;
955 
956 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
957 		sin->sin_addr.s_addr = ifa->ifa_mask;
958 		goto rarok;
959 
960 	case SIOCSIFFLAGS:
961 		if (colon) {
962 			ret = -EADDRNOTAVAIL;
963 			if (!ifa)
964 				break;
965 			ret = 0;
966 			if (!(ifr.ifr_flags & IFF_UP))
967 				inet_del_ifa(in_dev, ifap, 1);
968 			break;
969 		}
970 		ret = dev_change_flags(dev, ifr.ifr_flags);
971 		break;
972 
973 	case SIOCSIFADDR:	/* Set interface address (and family) */
974 		ret = -EINVAL;
975 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
976 			break;
977 
978 		if (!ifa) {
979 			ret = -ENOBUFS;
980 			ifa = inet_alloc_ifa();
981 			if (!ifa)
982 				break;
983 			INIT_HLIST_NODE(&ifa->hash);
984 			if (colon)
985 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
986 			else
987 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
988 		} else {
989 			ret = 0;
990 			if (ifa->ifa_local == sin->sin_addr.s_addr)
991 				break;
992 			inet_del_ifa(in_dev, ifap, 0);
993 			ifa->ifa_broadcast = 0;
994 			ifa->ifa_scope = 0;
995 		}
996 
997 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
998 
999 		if (!(dev->flags & IFF_POINTOPOINT)) {
1000 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1001 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1002 			if ((dev->flags & IFF_BROADCAST) &&
1003 			    ifa->ifa_prefixlen < 31)
1004 				ifa->ifa_broadcast = ifa->ifa_address |
1005 						     ~ifa->ifa_mask;
1006 		} else {
1007 			ifa->ifa_prefixlen = 32;
1008 			ifa->ifa_mask = inet_make_mask(32);
1009 		}
1010 		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1011 		ret = inet_set_ifa(dev, ifa);
1012 		break;
1013 
1014 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1015 		ret = 0;
1016 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1017 			inet_del_ifa(in_dev, ifap, 0);
1018 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1019 			inet_insert_ifa(ifa);
1020 		}
1021 		break;
1022 
1023 	case SIOCSIFDSTADDR:	/* Set the destination address */
1024 		ret = 0;
1025 		if (ifa->ifa_address == sin->sin_addr.s_addr)
1026 			break;
1027 		ret = -EINVAL;
1028 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1029 			break;
1030 		ret = 0;
1031 		inet_del_ifa(in_dev, ifap, 0);
1032 		ifa->ifa_address = sin->sin_addr.s_addr;
1033 		inet_insert_ifa(ifa);
1034 		break;
1035 
1036 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1037 
1038 		/*
1039 		 *	The mask we set must be legal.
1040 		 */
1041 		ret = -EINVAL;
1042 		if (bad_mask(sin->sin_addr.s_addr, 0))
1043 			break;
1044 		ret = 0;
1045 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1046 			__be32 old_mask = ifa->ifa_mask;
1047 			inet_del_ifa(in_dev, ifap, 0);
1048 			ifa->ifa_mask = sin->sin_addr.s_addr;
1049 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1050 
1051 			/* See if current broadcast address matches
1052 			 * with current netmask, then recalculate
1053 			 * the broadcast address. Otherwise it's a
1054 			 * funny address, so don't touch it since
1055 			 * the user seems to know what (s)he's doing...
1056 			 */
1057 			if ((dev->flags & IFF_BROADCAST) &&
1058 			    (ifa->ifa_prefixlen < 31) &&
1059 			    (ifa->ifa_broadcast ==
1060 			     (ifa->ifa_local|~old_mask))) {
1061 				ifa->ifa_broadcast = (ifa->ifa_local |
1062 						      ~sin->sin_addr.s_addr);
1063 			}
1064 			inet_insert_ifa(ifa);
1065 		}
1066 		break;
1067 	}
1068 done:
1069 	rtnl_unlock();
1070 out:
1071 	return ret;
1072 rarok:
1073 	rtnl_unlock();
1074 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1075 	goto out;
1076 }
1077 
1078 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1079 {
1080 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1081 	struct in_ifaddr *ifa;
1082 	struct ifreq ifr;
1083 	int done = 0;
1084 
1085 	if (!in_dev)
1086 		goto out;
1087 
1088 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1089 		if (!buf) {
1090 			done += sizeof(ifr);
1091 			continue;
1092 		}
1093 		if (len < (int) sizeof(ifr))
1094 			break;
1095 		memset(&ifr, 0, sizeof(struct ifreq));
1096 		if (ifa->ifa_label)
1097 			strcpy(ifr.ifr_name, ifa->ifa_label);
1098 		else
1099 			strcpy(ifr.ifr_name, dev->name);
1100 
1101 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1102 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1103 								ifa->ifa_local;
1104 
1105 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1106 			done = -EFAULT;
1107 			break;
1108 		}
1109 		buf  += sizeof(struct ifreq);
1110 		len  -= sizeof(struct ifreq);
1111 		done += sizeof(struct ifreq);
1112 	}
1113 out:
1114 	return done;
1115 }
1116 
1117 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1118 {
1119 	__be32 addr = 0;
1120 	struct in_device *in_dev;
1121 	struct net *net = dev_net(dev);
1122 
1123 	rcu_read_lock();
1124 	in_dev = __in_dev_get_rcu(dev);
1125 	if (!in_dev)
1126 		goto no_in_dev;
1127 
1128 	for_primary_ifa(in_dev) {
1129 		if (ifa->ifa_scope > scope)
1130 			continue;
1131 		if (!dst || inet_ifa_match(dst, ifa)) {
1132 			addr = ifa->ifa_local;
1133 			break;
1134 		}
1135 		if (!addr)
1136 			addr = ifa->ifa_local;
1137 	} endfor_ifa(in_dev);
1138 
1139 	if (addr)
1140 		goto out_unlock;
1141 no_in_dev:
1142 
1143 	/* Not loopback addresses on loopback should be preferred
1144 	   in this case. It is importnat that lo is the first interface
1145 	   in dev_base list.
1146 	 */
1147 	for_each_netdev_rcu(net, dev) {
1148 		in_dev = __in_dev_get_rcu(dev);
1149 		if (!in_dev)
1150 			continue;
1151 
1152 		for_primary_ifa(in_dev) {
1153 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
1154 			    ifa->ifa_scope <= scope) {
1155 				addr = ifa->ifa_local;
1156 				goto out_unlock;
1157 			}
1158 		} endfor_ifa(in_dev);
1159 	}
1160 out_unlock:
1161 	rcu_read_unlock();
1162 	return addr;
1163 }
1164 EXPORT_SYMBOL(inet_select_addr);
1165 
1166 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1167 			      __be32 local, int scope)
1168 {
1169 	int same = 0;
1170 	__be32 addr = 0;
1171 
1172 	for_ifa(in_dev) {
1173 		if (!addr &&
1174 		    (local == ifa->ifa_local || !local) &&
1175 		    ifa->ifa_scope <= scope) {
1176 			addr = ifa->ifa_local;
1177 			if (same)
1178 				break;
1179 		}
1180 		if (!same) {
1181 			same = (!local || inet_ifa_match(local, ifa)) &&
1182 				(!dst || inet_ifa_match(dst, ifa));
1183 			if (same && addr) {
1184 				if (local || !dst)
1185 					break;
1186 				/* Is the selected addr into dst subnet? */
1187 				if (inet_ifa_match(addr, ifa))
1188 					break;
1189 				/* No, then can we use new local src? */
1190 				if (ifa->ifa_scope <= scope) {
1191 					addr = ifa->ifa_local;
1192 					break;
1193 				}
1194 				/* search for large dst subnet for addr */
1195 				same = 0;
1196 			}
1197 		}
1198 	} endfor_ifa(in_dev);
1199 
1200 	return same ? addr : 0;
1201 }
1202 
1203 /*
1204  * Confirm that local IP address exists using wildcards:
1205  * - in_dev: only on this interface, 0=any interface
1206  * - dst: only in the same subnet as dst, 0=any dst
1207  * - local: address, 0=autoselect the local address
1208  * - scope: maximum allowed scope value for the local address
1209  */
1210 __be32 inet_confirm_addr(struct in_device *in_dev,
1211 			 __be32 dst, __be32 local, int scope)
1212 {
1213 	__be32 addr = 0;
1214 	struct net_device *dev;
1215 	struct net *net;
1216 
1217 	if (scope != RT_SCOPE_LINK)
1218 		return confirm_addr_indev(in_dev, dst, local, scope);
1219 
1220 	net = dev_net(in_dev->dev);
1221 	rcu_read_lock();
1222 	for_each_netdev_rcu(net, dev) {
1223 		in_dev = __in_dev_get_rcu(dev);
1224 		if (in_dev) {
1225 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1226 			if (addr)
1227 				break;
1228 		}
1229 	}
1230 	rcu_read_unlock();
1231 
1232 	return addr;
1233 }
1234 EXPORT_SYMBOL(inet_confirm_addr);
1235 
1236 /*
1237  *	Device notifier
1238  */
1239 
1240 int register_inetaddr_notifier(struct notifier_block *nb)
1241 {
1242 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1243 }
1244 EXPORT_SYMBOL(register_inetaddr_notifier);
1245 
1246 int unregister_inetaddr_notifier(struct notifier_block *nb)
1247 {
1248 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1249 }
1250 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1251 
1252 /* Rename ifa_labels for a device name change. Make some effort to preserve
1253  * existing alias numbering and to create unique labels if possible.
1254 */
1255 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1256 {
1257 	struct in_ifaddr *ifa;
1258 	int named = 0;
1259 
1260 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1261 		char old[IFNAMSIZ], *dot;
1262 
1263 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1264 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1265 		if (named++ == 0)
1266 			goto skip;
1267 		dot = strchr(old, ':');
1268 		if (dot == NULL) {
1269 			sprintf(old, ":%d", named);
1270 			dot = old;
1271 		}
1272 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1273 			strcat(ifa->ifa_label, dot);
1274 		else
1275 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1276 skip:
1277 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1278 	}
1279 }
1280 
1281 static bool inetdev_valid_mtu(unsigned int mtu)
1282 {
1283 	return mtu >= 68;
1284 }
1285 
1286 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1287 					struct in_device *in_dev)
1288 
1289 {
1290 	struct in_ifaddr *ifa;
1291 
1292 	for (ifa = in_dev->ifa_list; ifa;
1293 	     ifa = ifa->ifa_next) {
1294 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1295 			 ifa->ifa_local, dev,
1296 			 ifa->ifa_local, NULL,
1297 			 dev->dev_addr, NULL);
1298 	}
1299 }
1300 
1301 /* Called only under RTNL semaphore */
1302 
1303 static int inetdev_event(struct notifier_block *this, unsigned long event,
1304 			 void *ptr)
1305 {
1306 	struct net_device *dev = ptr;
1307 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1308 
1309 	ASSERT_RTNL();
1310 
1311 	if (!in_dev) {
1312 		if (event == NETDEV_REGISTER) {
1313 			in_dev = inetdev_init(dev);
1314 			if (!in_dev)
1315 				return notifier_from_errno(-ENOMEM);
1316 			if (dev->flags & IFF_LOOPBACK) {
1317 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1318 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1319 			}
1320 		} else if (event == NETDEV_CHANGEMTU) {
1321 			/* Re-enabling IP */
1322 			if (inetdev_valid_mtu(dev->mtu))
1323 				in_dev = inetdev_init(dev);
1324 		}
1325 		goto out;
1326 	}
1327 
1328 	switch (event) {
1329 	case NETDEV_REGISTER:
1330 		pr_debug("%s: bug\n", __func__);
1331 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1332 		break;
1333 	case NETDEV_UP:
1334 		if (!inetdev_valid_mtu(dev->mtu))
1335 			break;
1336 		if (dev->flags & IFF_LOOPBACK) {
1337 			struct in_ifaddr *ifa = inet_alloc_ifa();
1338 
1339 			if (ifa) {
1340 				INIT_HLIST_NODE(&ifa->hash);
1341 				ifa->ifa_local =
1342 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1343 				ifa->ifa_prefixlen = 8;
1344 				ifa->ifa_mask = inet_make_mask(8);
1345 				in_dev_hold(in_dev);
1346 				ifa->ifa_dev = in_dev;
1347 				ifa->ifa_scope = RT_SCOPE_HOST;
1348 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1349 				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1350 						 INFINITY_LIFE_TIME);
1351 				inet_insert_ifa(ifa);
1352 			}
1353 		}
1354 		ip_mc_up(in_dev);
1355 		/* fall through */
1356 	case NETDEV_CHANGEADDR:
1357 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1358 			break;
1359 		/* fall through */
1360 	case NETDEV_NOTIFY_PEERS:
1361 		/* Send gratuitous ARP to notify of link change */
1362 		inetdev_send_gratuitous_arp(dev, in_dev);
1363 		break;
1364 	case NETDEV_DOWN:
1365 		ip_mc_down(in_dev);
1366 		break;
1367 	case NETDEV_PRE_TYPE_CHANGE:
1368 		ip_mc_unmap(in_dev);
1369 		break;
1370 	case NETDEV_POST_TYPE_CHANGE:
1371 		ip_mc_remap(in_dev);
1372 		break;
1373 	case NETDEV_CHANGEMTU:
1374 		if (inetdev_valid_mtu(dev->mtu))
1375 			break;
1376 		/* disable IP when MTU is not enough */
1377 	case NETDEV_UNREGISTER:
1378 		inetdev_destroy(in_dev);
1379 		break;
1380 	case NETDEV_CHANGENAME:
1381 		/* Do not notify about label change, this event is
1382 		 * not interesting to applications using netlink.
1383 		 */
1384 		inetdev_changename(dev, in_dev);
1385 
1386 		devinet_sysctl_unregister(in_dev);
1387 		devinet_sysctl_register(in_dev);
1388 		break;
1389 	}
1390 out:
1391 	return NOTIFY_DONE;
1392 }
1393 
1394 static struct notifier_block ip_netdev_notifier = {
1395 	.notifier_call = inetdev_event,
1396 };
1397 
1398 static size_t inet_nlmsg_size(void)
1399 {
1400 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1401 	       + nla_total_size(4) /* IFA_ADDRESS */
1402 	       + nla_total_size(4) /* IFA_LOCAL */
1403 	       + nla_total_size(4) /* IFA_BROADCAST */
1404 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1405 }
1406 
1407 static inline u32 cstamp_delta(unsigned long cstamp)
1408 {
1409 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1410 }
1411 
1412 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1413 			 unsigned long tstamp, u32 preferred, u32 valid)
1414 {
1415 	struct ifa_cacheinfo ci;
1416 
1417 	ci.cstamp = cstamp_delta(cstamp);
1418 	ci.tstamp = cstamp_delta(tstamp);
1419 	ci.ifa_prefered = preferred;
1420 	ci.ifa_valid = valid;
1421 
1422 	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1423 }
1424 
1425 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1426 			    u32 portid, u32 seq, int event, unsigned int flags)
1427 {
1428 	struct ifaddrmsg *ifm;
1429 	struct nlmsghdr  *nlh;
1430 	u32 preferred, valid;
1431 
1432 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1433 	if (nlh == NULL)
1434 		return -EMSGSIZE;
1435 
1436 	ifm = nlmsg_data(nlh);
1437 	ifm->ifa_family = AF_INET;
1438 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1439 	ifm->ifa_flags = ifa->ifa_flags;
1440 	ifm->ifa_scope = ifa->ifa_scope;
1441 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1442 
1443 	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1444 		preferred = ifa->ifa_preferred_lft;
1445 		valid = ifa->ifa_valid_lft;
1446 		if (preferred != INFINITY_LIFE_TIME) {
1447 			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1448 
1449 			if (preferred > tval)
1450 				preferred -= tval;
1451 			else
1452 				preferred = 0;
1453 			if (valid != INFINITY_LIFE_TIME) {
1454 				if (valid > tval)
1455 					valid -= tval;
1456 				else
1457 					valid = 0;
1458 			}
1459 		}
1460 	} else {
1461 		preferred = INFINITY_LIFE_TIME;
1462 		valid = INFINITY_LIFE_TIME;
1463 	}
1464 	if ((ifa->ifa_address &&
1465 	     nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1466 	    (ifa->ifa_local &&
1467 	     nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1468 	    (ifa->ifa_broadcast &&
1469 	     nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1470 	    (ifa->ifa_label[0] &&
1471 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1472 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1473 			  preferred, valid))
1474 		goto nla_put_failure;
1475 
1476 	return nlmsg_end(skb, nlh);
1477 
1478 nla_put_failure:
1479 	nlmsg_cancel(skb, nlh);
1480 	return -EMSGSIZE;
1481 }
1482 
1483 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1484 {
1485 	struct net *net = sock_net(skb->sk);
1486 	int h, s_h;
1487 	int idx, s_idx;
1488 	int ip_idx, s_ip_idx;
1489 	struct net_device *dev;
1490 	struct in_device *in_dev;
1491 	struct in_ifaddr *ifa;
1492 	struct hlist_head *head;
1493 
1494 	s_h = cb->args[0];
1495 	s_idx = idx = cb->args[1];
1496 	s_ip_idx = ip_idx = cb->args[2];
1497 
1498 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1499 		idx = 0;
1500 		head = &net->dev_index_head[h];
1501 		rcu_read_lock();
1502 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1503 			  net->dev_base_seq;
1504 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1505 			if (idx < s_idx)
1506 				goto cont;
1507 			if (h > s_h || idx > s_idx)
1508 				s_ip_idx = 0;
1509 			in_dev = __in_dev_get_rcu(dev);
1510 			if (!in_dev)
1511 				goto cont;
1512 
1513 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1514 			     ifa = ifa->ifa_next, ip_idx++) {
1515 				if (ip_idx < s_ip_idx)
1516 					continue;
1517 				if (inet_fill_ifaddr(skb, ifa,
1518 					     NETLINK_CB(cb->skb).portid,
1519 					     cb->nlh->nlmsg_seq,
1520 					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1521 					rcu_read_unlock();
1522 					goto done;
1523 				}
1524 				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1525 			}
1526 cont:
1527 			idx++;
1528 		}
1529 		rcu_read_unlock();
1530 	}
1531 
1532 done:
1533 	cb->args[0] = h;
1534 	cb->args[1] = idx;
1535 	cb->args[2] = ip_idx;
1536 
1537 	return skb->len;
1538 }
1539 
1540 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1541 		      u32 portid)
1542 {
1543 	struct sk_buff *skb;
1544 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1545 	int err = -ENOBUFS;
1546 	struct net *net;
1547 
1548 	net = dev_net(ifa->ifa_dev->dev);
1549 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1550 	if (skb == NULL)
1551 		goto errout;
1552 
1553 	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1554 	if (err < 0) {
1555 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1556 		WARN_ON(err == -EMSGSIZE);
1557 		kfree_skb(skb);
1558 		goto errout;
1559 	}
1560 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1561 	return;
1562 errout:
1563 	if (err < 0)
1564 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1565 }
1566 
1567 static size_t inet_get_link_af_size(const struct net_device *dev)
1568 {
1569 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1570 
1571 	if (!in_dev)
1572 		return 0;
1573 
1574 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1575 }
1576 
1577 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1578 {
1579 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1580 	struct nlattr *nla;
1581 	int i;
1582 
1583 	if (!in_dev)
1584 		return -ENODATA;
1585 
1586 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1587 	if (nla == NULL)
1588 		return -EMSGSIZE;
1589 
1590 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1591 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1592 
1593 	return 0;
1594 }
1595 
1596 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1597 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1598 };
1599 
1600 static int inet_validate_link_af(const struct net_device *dev,
1601 				 const struct nlattr *nla)
1602 {
1603 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1604 	int err, rem;
1605 
1606 	if (dev && !__in_dev_get_rtnl(dev))
1607 		return -EAFNOSUPPORT;
1608 
1609 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1610 	if (err < 0)
1611 		return err;
1612 
1613 	if (tb[IFLA_INET_CONF]) {
1614 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1615 			int cfgid = nla_type(a);
1616 
1617 			if (nla_len(a) < 4)
1618 				return -EINVAL;
1619 
1620 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1621 				return -EINVAL;
1622 		}
1623 	}
1624 
1625 	return 0;
1626 }
1627 
1628 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1629 {
1630 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1631 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1632 	int rem;
1633 
1634 	if (!in_dev)
1635 		return -EAFNOSUPPORT;
1636 
1637 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1638 		BUG();
1639 
1640 	if (tb[IFLA_INET_CONF]) {
1641 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1642 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1643 	}
1644 
1645 	return 0;
1646 }
1647 
1648 static int inet_netconf_msgsize_devconf(int type)
1649 {
1650 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1651 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1652 
1653 	/* type -1 is used for ALL */
1654 	if (type == -1 || type == NETCONFA_FORWARDING)
1655 		size += nla_total_size(4);
1656 	if (type == -1 || type == NETCONFA_RP_FILTER)
1657 		size += nla_total_size(4);
1658 	if (type == -1 || type == NETCONFA_MC_FORWARDING)
1659 		size += nla_total_size(4);
1660 
1661 	return size;
1662 }
1663 
1664 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1665 				     struct ipv4_devconf *devconf, u32 portid,
1666 				     u32 seq, int event, unsigned int flags,
1667 				     int type)
1668 {
1669 	struct nlmsghdr  *nlh;
1670 	struct netconfmsg *ncm;
1671 
1672 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1673 			flags);
1674 	if (nlh == NULL)
1675 		return -EMSGSIZE;
1676 
1677 	ncm = nlmsg_data(nlh);
1678 	ncm->ncm_family = AF_INET;
1679 
1680 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1681 		goto nla_put_failure;
1682 
1683 	/* type -1 is used for ALL */
1684 	if ((type == -1 || type == NETCONFA_FORWARDING) &&
1685 	    nla_put_s32(skb, NETCONFA_FORWARDING,
1686 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1687 		goto nla_put_failure;
1688 	if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1689 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1690 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1691 		goto nla_put_failure;
1692 	if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1693 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1694 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1695 		goto nla_put_failure;
1696 
1697 	return nlmsg_end(skb, nlh);
1698 
1699 nla_put_failure:
1700 	nlmsg_cancel(skb, nlh);
1701 	return -EMSGSIZE;
1702 }
1703 
1704 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1705 				 struct ipv4_devconf *devconf)
1706 {
1707 	struct sk_buff *skb;
1708 	int err = -ENOBUFS;
1709 
1710 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1711 	if (skb == NULL)
1712 		goto errout;
1713 
1714 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1715 					RTM_NEWNETCONF, 0, type);
1716 	if (err < 0) {
1717 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1718 		WARN_ON(err == -EMSGSIZE);
1719 		kfree_skb(skb);
1720 		goto errout;
1721 	}
1722 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1723 	return;
1724 errout:
1725 	if (err < 0)
1726 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1727 }
1728 
1729 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1730 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1731 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1732 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1733 };
1734 
1735 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1736 				    struct nlmsghdr *nlh)
1737 {
1738 	struct net *net = sock_net(in_skb->sk);
1739 	struct nlattr *tb[NETCONFA_MAX+1];
1740 	struct netconfmsg *ncm;
1741 	struct sk_buff *skb;
1742 	struct ipv4_devconf *devconf;
1743 	struct in_device *in_dev;
1744 	struct net_device *dev;
1745 	int ifindex;
1746 	int err;
1747 
1748 	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1749 			  devconf_ipv4_policy);
1750 	if (err < 0)
1751 		goto errout;
1752 
1753 	err = EINVAL;
1754 	if (!tb[NETCONFA_IFINDEX])
1755 		goto errout;
1756 
1757 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1758 	switch (ifindex) {
1759 	case NETCONFA_IFINDEX_ALL:
1760 		devconf = net->ipv4.devconf_all;
1761 		break;
1762 	case NETCONFA_IFINDEX_DEFAULT:
1763 		devconf = net->ipv4.devconf_dflt;
1764 		break;
1765 	default:
1766 		dev = __dev_get_by_index(net, ifindex);
1767 		if (dev == NULL)
1768 			goto errout;
1769 		in_dev = __in_dev_get_rtnl(dev);
1770 		if (in_dev == NULL)
1771 			goto errout;
1772 		devconf = &in_dev->cnf;
1773 		break;
1774 	}
1775 
1776 	err = -ENOBUFS;
1777 	skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1778 	if (skb == NULL)
1779 		goto errout;
1780 
1781 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1782 					NETLINK_CB(in_skb).portid,
1783 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1784 					-1);
1785 	if (err < 0) {
1786 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1787 		WARN_ON(err == -EMSGSIZE);
1788 		kfree_skb(skb);
1789 		goto errout;
1790 	}
1791 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1792 errout:
1793 	return err;
1794 }
1795 
1796 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1797 				     struct netlink_callback *cb)
1798 {
1799 	struct net *net = sock_net(skb->sk);
1800 	int h, s_h;
1801 	int idx, s_idx;
1802 	struct net_device *dev;
1803 	struct in_device *in_dev;
1804 	struct hlist_head *head;
1805 
1806 	s_h = cb->args[0];
1807 	s_idx = idx = cb->args[1];
1808 
1809 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1810 		idx = 0;
1811 		head = &net->dev_index_head[h];
1812 		rcu_read_lock();
1813 		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1814 			  net->dev_base_seq;
1815 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1816 			if (idx < s_idx)
1817 				goto cont;
1818 			in_dev = __in_dev_get_rcu(dev);
1819 			if (!in_dev)
1820 				goto cont;
1821 
1822 			if (inet_netconf_fill_devconf(skb, dev->ifindex,
1823 						      &in_dev->cnf,
1824 						      NETLINK_CB(cb->skb).portid,
1825 						      cb->nlh->nlmsg_seq,
1826 						      RTM_NEWNETCONF,
1827 						      NLM_F_MULTI,
1828 						      -1) <= 0) {
1829 				rcu_read_unlock();
1830 				goto done;
1831 			}
1832 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1833 cont:
1834 			idx++;
1835 		}
1836 		rcu_read_unlock();
1837 	}
1838 	if (h == NETDEV_HASHENTRIES) {
1839 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1840 					      net->ipv4.devconf_all,
1841 					      NETLINK_CB(cb->skb).portid,
1842 					      cb->nlh->nlmsg_seq,
1843 					      RTM_NEWNETCONF, NLM_F_MULTI,
1844 					      -1) <= 0)
1845 			goto done;
1846 		else
1847 			h++;
1848 	}
1849 	if (h == NETDEV_HASHENTRIES + 1) {
1850 		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1851 					      net->ipv4.devconf_dflt,
1852 					      NETLINK_CB(cb->skb).portid,
1853 					      cb->nlh->nlmsg_seq,
1854 					      RTM_NEWNETCONF, NLM_F_MULTI,
1855 					      -1) <= 0)
1856 			goto done;
1857 		else
1858 			h++;
1859 	}
1860 done:
1861 	cb->args[0] = h;
1862 	cb->args[1] = idx;
1863 
1864 	return skb->len;
1865 }
1866 
1867 #ifdef CONFIG_SYSCTL
1868 
1869 static void devinet_copy_dflt_conf(struct net *net, int i)
1870 {
1871 	struct net_device *dev;
1872 
1873 	rcu_read_lock();
1874 	for_each_netdev_rcu(net, dev) {
1875 		struct in_device *in_dev;
1876 
1877 		in_dev = __in_dev_get_rcu(dev);
1878 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1879 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1880 	}
1881 	rcu_read_unlock();
1882 }
1883 
1884 /* called with RTNL locked */
1885 static void inet_forward_change(struct net *net)
1886 {
1887 	struct net_device *dev;
1888 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1889 
1890 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1891 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1892 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1893 				    NETCONFA_IFINDEX_ALL,
1894 				    net->ipv4.devconf_all);
1895 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1896 				    NETCONFA_IFINDEX_DEFAULT,
1897 				    net->ipv4.devconf_dflt);
1898 
1899 	for_each_netdev(net, dev) {
1900 		struct in_device *in_dev;
1901 		if (on)
1902 			dev_disable_lro(dev);
1903 		rcu_read_lock();
1904 		in_dev = __in_dev_get_rcu(dev);
1905 		if (in_dev) {
1906 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1907 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1908 						    dev->ifindex, &in_dev->cnf);
1909 		}
1910 		rcu_read_unlock();
1911 	}
1912 }
1913 
1914 static int devinet_conf_proc(ctl_table *ctl, int write,
1915 			     void __user *buffer,
1916 			     size_t *lenp, loff_t *ppos)
1917 {
1918 	int old_value = *(int *)ctl->data;
1919 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1920 	int new_value = *(int *)ctl->data;
1921 
1922 	if (write) {
1923 		struct ipv4_devconf *cnf = ctl->extra1;
1924 		struct net *net = ctl->extra2;
1925 		int i = (int *)ctl->data - cnf->data;
1926 
1927 		set_bit(i, cnf->state);
1928 
1929 		if (cnf == net->ipv4.devconf_dflt)
1930 			devinet_copy_dflt_conf(net, i);
1931 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1932 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1933 			if ((new_value == 0) && (old_value != 0))
1934 				rt_cache_flush(net);
1935 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1936 		    new_value != old_value) {
1937 			int ifindex;
1938 
1939 			if (cnf == net->ipv4.devconf_dflt)
1940 				ifindex = NETCONFA_IFINDEX_DEFAULT;
1941 			else if (cnf == net->ipv4.devconf_all)
1942 				ifindex = NETCONFA_IFINDEX_ALL;
1943 			else {
1944 				struct in_device *idev =
1945 					container_of(cnf, struct in_device,
1946 						     cnf);
1947 				ifindex = idev->dev->ifindex;
1948 			}
1949 			inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1950 						    ifindex, cnf);
1951 		}
1952 	}
1953 
1954 	return ret;
1955 }
1956 
1957 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1958 				  void __user *buffer,
1959 				  size_t *lenp, loff_t *ppos)
1960 {
1961 	int *valp = ctl->data;
1962 	int val = *valp;
1963 	loff_t pos = *ppos;
1964 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1965 
1966 	if (write && *valp != val) {
1967 		struct net *net = ctl->extra2;
1968 
1969 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1970 			if (!rtnl_trylock()) {
1971 				/* Restore the original values before restarting */
1972 				*valp = val;
1973 				*ppos = pos;
1974 				return restart_syscall();
1975 			}
1976 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1977 				inet_forward_change(net);
1978 			} else {
1979 				struct ipv4_devconf *cnf = ctl->extra1;
1980 				struct in_device *idev =
1981 					container_of(cnf, struct in_device, cnf);
1982 				if (*valp)
1983 					dev_disable_lro(idev->dev);
1984 				inet_netconf_notify_devconf(net,
1985 							    NETCONFA_FORWARDING,
1986 							    idev->dev->ifindex,
1987 							    cnf);
1988 			}
1989 			rtnl_unlock();
1990 			rt_cache_flush(net);
1991 		} else
1992 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1993 						    NETCONFA_IFINDEX_DEFAULT,
1994 						    net->ipv4.devconf_dflt);
1995 	}
1996 
1997 	return ret;
1998 }
1999 
2000 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
2001 				void __user *buffer,
2002 				size_t *lenp, loff_t *ppos)
2003 {
2004 	int *valp = ctl->data;
2005 	int val = *valp;
2006 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2007 	struct net *net = ctl->extra2;
2008 
2009 	if (write && *valp != val)
2010 		rt_cache_flush(net);
2011 
2012 	return ret;
2013 }
2014 
2015 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2016 	{ \
2017 		.procname	= name, \
2018 		.data		= ipv4_devconf.data + \
2019 				  IPV4_DEVCONF_ ## attr - 1, \
2020 		.maxlen		= sizeof(int), \
2021 		.mode		= mval, \
2022 		.proc_handler	= proc, \
2023 		.extra1		= &ipv4_devconf, \
2024 	}
2025 
2026 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2027 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2028 
2029 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2030 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2031 
2032 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2033 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2034 
2035 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2036 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2037 
2038 static struct devinet_sysctl_table {
2039 	struct ctl_table_header *sysctl_header;
2040 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2041 } devinet_sysctl = {
2042 	.devinet_vars = {
2043 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2044 					     devinet_sysctl_forward),
2045 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2046 
2047 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2048 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2049 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2050 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2051 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2052 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2053 					"accept_source_route"),
2054 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2055 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2056 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2057 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2058 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2059 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2060 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2061 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2062 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2063 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2064 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2065 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2066 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2067 
2068 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2069 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2070 		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
2071 					      "force_igmp_version"),
2072 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2073 					      "promote_secondaries"),
2074 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2075 					      "route_localnet"),
2076 	},
2077 };
2078 
2079 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2080 					struct ipv4_devconf *p)
2081 {
2082 	int i;
2083 	struct devinet_sysctl_table *t;
2084 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2085 
2086 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2087 	if (!t)
2088 		goto out;
2089 
2090 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2091 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2092 		t->devinet_vars[i].extra1 = p;
2093 		t->devinet_vars[i].extra2 = net;
2094 	}
2095 
2096 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2097 
2098 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2099 	if (!t->sysctl_header)
2100 		goto free;
2101 
2102 	p->sysctl = t;
2103 	return 0;
2104 
2105 free:
2106 	kfree(t);
2107 out:
2108 	return -ENOBUFS;
2109 }
2110 
2111 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2112 {
2113 	struct devinet_sysctl_table *t = cnf->sysctl;
2114 
2115 	if (t == NULL)
2116 		return;
2117 
2118 	cnf->sysctl = NULL;
2119 	unregister_net_sysctl_table(t->sysctl_header);
2120 	kfree(t);
2121 }
2122 
2123 static void devinet_sysctl_register(struct in_device *idev)
2124 {
2125 	neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
2126 	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2127 					&idev->cnf);
2128 }
2129 
2130 static void devinet_sysctl_unregister(struct in_device *idev)
2131 {
2132 	__devinet_sysctl_unregister(&idev->cnf);
2133 	neigh_sysctl_unregister(idev->arp_parms);
2134 }
2135 
2136 static struct ctl_table ctl_forward_entry[] = {
2137 	{
2138 		.procname	= "ip_forward",
2139 		.data		= &ipv4_devconf.data[
2140 					IPV4_DEVCONF_FORWARDING - 1],
2141 		.maxlen		= sizeof(int),
2142 		.mode		= 0644,
2143 		.proc_handler	= devinet_sysctl_forward,
2144 		.extra1		= &ipv4_devconf,
2145 		.extra2		= &init_net,
2146 	},
2147 	{ },
2148 };
2149 #endif
2150 
2151 static __net_init int devinet_init_net(struct net *net)
2152 {
2153 	int err;
2154 	struct ipv4_devconf *all, *dflt;
2155 #ifdef CONFIG_SYSCTL
2156 	struct ctl_table *tbl = ctl_forward_entry;
2157 	struct ctl_table_header *forw_hdr;
2158 #endif
2159 
2160 	err = -ENOMEM;
2161 	all = &ipv4_devconf;
2162 	dflt = &ipv4_devconf_dflt;
2163 
2164 	if (!net_eq(net, &init_net)) {
2165 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2166 		if (all == NULL)
2167 			goto err_alloc_all;
2168 
2169 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2170 		if (dflt == NULL)
2171 			goto err_alloc_dflt;
2172 
2173 #ifdef CONFIG_SYSCTL
2174 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2175 		if (tbl == NULL)
2176 			goto err_alloc_ctl;
2177 
2178 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2179 		tbl[0].extra1 = all;
2180 		tbl[0].extra2 = net;
2181 #endif
2182 	}
2183 
2184 #ifdef CONFIG_SYSCTL
2185 	err = __devinet_sysctl_register(net, "all", all);
2186 	if (err < 0)
2187 		goto err_reg_all;
2188 
2189 	err = __devinet_sysctl_register(net, "default", dflt);
2190 	if (err < 0)
2191 		goto err_reg_dflt;
2192 
2193 	err = -ENOMEM;
2194 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2195 	if (forw_hdr == NULL)
2196 		goto err_reg_ctl;
2197 	net->ipv4.forw_hdr = forw_hdr;
2198 #endif
2199 
2200 	net->ipv4.devconf_all = all;
2201 	net->ipv4.devconf_dflt = dflt;
2202 	return 0;
2203 
2204 #ifdef CONFIG_SYSCTL
2205 err_reg_ctl:
2206 	__devinet_sysctl_unregister(dflt);
2207 err_reg_dflt:
2208 	__devinet_sysctl_unregister(all);
2209 err_reg_all:
2210 	if (tbl != ctl_forward_entry)
2211 		kfree(tbl);
2212 err_alloc_ctl:
2213 #endif
2214 	if (dflt != &ipv4_devconf_dflt)
2215 		kfree(dflt);
2216 err_alloc_dflt:
2217 	if (all != &ipv4_devconf)
2218 		kfree(all);
2219 err_alloc_all:
2220 	return err;
2221 }
2222 
2223 static __net_exit void devinet_exit_net(struct net *net)
2224 {
2225 #ifdef CONFIG_SYSCTL
2226 	struct ctl_table *tbl;
2227 
2228 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2229 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2230 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2231 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
2232 	kfree(tbl);
2233 #endif
2234 	kfree(net->ipv4.devconf_dflt);
2235 	kfree(net->ipv4.devconf_all);
2236 }
2237 
2238 static __net_initdata struct pernet_operations devinet_ops = {
2239 	.init = devinet_init_net,
2240 	.exit = devinet_exit_net,
2241 };
2242 
2243 static struct rtnl_af_ops inet_af_ops = {
2244 	.family		  = AF_INET,
2245 	.fill_link_af	  = inet_fill_link_af,
2246 	.get_link_af_size = inet_get_link_af_size,
2247 	.validate_link_af = inet_validate_link_af,
2248 	.set_link_af	  = inet_set_link_af,
2249 };
2250 
2251 void __init devinet_init(void)
2252 {
2253 	int i;
2254 
2255 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2256 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2257 
2258 	register_pernet_subsys(&devinet_ops);
2259 
2260 	register_gifconf(PF_INET, inet_gifconf);
2261 	register_netdevice_notifier(&ip_netdev_notifier);
2262 
2263 	schedule_delayed_work(&check_lifetime_work, 0);
2264 
2265 	rtnl_af_register(&inet_af_ops);
2266 
2267 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2268 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2269 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2270 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2271 		      inet_netconf_dump_devconf, NULL);
2272 }
2273 
2274