xref: /linux/net/ipv4/devinet.c (revision c54ea4918c2b7722d7242ea53271356501988a9b)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 
67 static struct ipv4_devconf ipv4_devconf = {
68 	.data = {
69 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
70 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
71 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
72 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
73 	},
74 };
75 
76 static struct ipv4_devconf ipv4_devconf_dflt = {
77 	.data = {
78 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
79 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
80 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
81 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
82 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
83 	},
84 };
85 
86 #define IPV4_DEVCONF_DFLT(net, attr) \
87 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
88 
89 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
90 	[IFA_LOCAL]     	= { .type = NLA_U32 },
91 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
92 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
93 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
94 };
95 
96 /* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE
97  * value.  So if you change this define, make appropriate changes to
98  * inet_addr_hash as well.
99  */
100 #define IN4_ADDR_HSIZE	256
101 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
102 static DEFINE_SPINLOCK(inet_addr_hash_lock);
103 
104 static inline unsigned int inet_addr_hash(struct net *net, __be32 addr)
105 {
106 	u32 val = (__force u32) addr ^ hash_ptr(net, 8);
107 
108 	return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) &
109 		(IN4_ADDR_HSIZE - 1));
110 }
111 
112 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
113 {
114 	unsigned int hash = inet_addr_hash(net, ifa->ifa_local);
115 
116 	spin_lock(&inet_addr_hash_lock);
117 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
118 	spin_unlock(&inet_addr_hash_lock);
119 }
120 
121 static void inet_hash_remove(struct in_ifaddr *ifa)
122 {
123 	spin_lock(&inet_addr_hash_lock);
124 	hlist_del_init_rcu(&ifa->hash);
125 	spin_unlock(&inet_addr_hash_lock);
126 }
127 
128 /**
129  * __ip_dev_find - find the first device with a given source address.
130  * @net: the net namespace
131  * @addr: the source address
132  * @devref: if true, take a reference on the found device
133  *
134  * If a caller uses devref=false, it should be protected by RCU, or RTNL
135  */
136 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
137 {
138 	unsigned int hash = inet_addr_hash(net, addr);
139 	struct net_device *result = NULL;
140 	struct in_ifaddr *ifa;
141 	struct hlist_node *node;
142 
143 	rcu_read_lock();
144 	hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
145 		struct net_device *dev = ifa->ifa_dev->dev;
146 
147 		if (!net_eq(dev_net(dev), net))
148 			continue;
149 		if (ifa->ifa_local == addr) {
150 			result = dev;
151 			break;
152 		}
153 	}
154 	if (result && devref)
155 		dev_hold(result);
156 	rcu_read_unlock();
157 	return result;
158 }
159 EXPORT_SYMBOL(__ip_dev_find);
160 
161 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
162 
163 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
164 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
165 			 int destroy);
166 #ifdef CONFIG_SYSCTL
167 static void devinet_sysctl_register(struct in_device *idev);
168 static void devinet_sysctl_unregister(struct in_device *idev);
169 #else
170 static inline void devinet_sysctl_register(struct in_device *idev)
171 {
172 }
173 static inline void devinet_sysctl_unregister(struct in_device *idev)
174 {
175 }
176 #endif
177 
178 /* Locks all the inet devices. */
179 
180 static struct in_ifaddr *inet_alloc_ifa(void)
181 {
182 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
183 }
184 
185 static void inet_rcu_free_ifa(struct rcu_head *head)
186 {
187 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
188 	if (ifa->ifa_dev)
189 		in_dev_put(ifa->ifa_dev);
190 	kfree(ifa);
191 }
192 
193 static inline void inet_free_ifa(struct in_ifaddr *ifa)
194 {
195 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
196 }
197 
198 void in_dev_finish_destroy(struct in_device *idev)
199 {
200 	struct net_device *dev = idev->dev;
201 
202 	WARN_ON(idev->ifa_list);
203 	WARN_ON(idev->mc_list);
204 #ifdef NET_REFCNT_DEBUG
205 	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
206 	       idev, dev ? dev->name : "NIL");
207 #endif
208 	dev_put(dev);
209 	if (!idev->dead)
210 		pr_err("Freeing alive in_device %p\n", idev);
211 	else
212 		kfree(idev);
213 }
214 EXPORT_SYMBOL(in_dev_finish_destroy);
215 
216 static struct in_device *inetdev_init(struct net_device *dev)
217 {
218 	struct in_device *in_dev;
219 
220 	ASSERT_RTNL();
221 
222 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
223 	if (!in_dev)
224 		goto out;
225 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
226 			sizeof(in_dev->cnf));
227 	in_dev->cnf.sysctl = NULL;
228 	in_dev->dev = dev;
229 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
230 	if (!in_dev->arp_parms)
231 		goto out_kfree;
232 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
233 		dev_disable_lro(dev);
234 	/* Reference in_dev->dev */
235 	dev_hold(dev);
236 	/* Account for reference dev->ip_ptr (below) */
237 	in_dev_hold(in_dev);
238 
239 	devinet_sysctl_register(in_dev);
240 	ip_mc_init_dev(in_dev);
241 	if (dev->flags & IFF_UP)
242 		ip_mc_up(in_dev);
243 
244 	/* we can receive as soon as ip_ptr is set -- do this last */
245 	rcu_assign_pointer(dev->ip_ptr, in_dev);
246 out:
247 	return in_dev;
248 out_kfree:
249 	kfree(in_dev);
250 	in_dev = NULL;
251 	goto out;
252 }
253 
254 static void in_dev_rcu_put(struct rcu_head *head)
255 {
256 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
257 	in_dev_put(idev);
258 }
259 
260 static void inetdev_destroy(struct in_device *in_dev)
261 {
262 	struct in_ifaddr *ifa;
263 	struct net_device *dev;
264 
265 	ASSERT_RTNL();
266 
267 	dev = in_dev->dev;
268 
269 	in_dev->dead = 1;
270 
271 	ip_mc_destroy_dev(in_dev);
272 
273 	while ((ifa = in_dev->ifa_list) != NULL) {
274 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
275 		inet_free_ifa(ifa);
276 	}
277 
278 	rcu_assign_pointer(dev->ip_ptr, NULL);
279 
280 	devinet_sysctl_unregister(in_dev);
281 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
282 	arp_ifdown(dev);
283 
284 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
285 }
286 
287 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
288 {
289 	rcu_read_lock();
290 	for_primary_ifa(in_dev) {
291 		if (inet_ifa_match(a, ifa)) {
292 			if (!b || inet_ifa_match(b, ifa)) {
293 				rcu_read_unlock();
294 				return 1;
295 			}
296 		}
297 	} endfor_ifa(in_dev);
298 	rcu_read_unlock();
299 	return 0;
300 }
301 
302 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
303 			 int destroy, struct nlmsghdr *nlh, u32 pid)
304 {
305 	struct in_ifaddr *promote = NULL;
306 	struct in_ifaddr *ifa, *ifa1 = *ifap;
307 	struct in_ifaddr *last_prim = in_dev->ifa_list;
308 	struct in_ifaddr *prev_prom = NULL;
309 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
310 
311 	ASSERT_RTNL();
312 
313 	/* 1. Deleting primary ifaddr forces deletion all secondaries
314 	 * unless alias promotion is set
315 	 **/
316 
317 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
318 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
319 
320 		while ((ifa = *ifap1) != NULL) {
321 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
322 			    ifa1->ifa_scope <= ifa->ifa_scope)
323 				last_prim = ifa;
324 
325 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
326 			    ifa1->ifa_mask != ifa->ifa_mask ||
327 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
328 				ifap1 = &ifa->ifa_next;
329 				prev_prom = ifa;
330 				continue;
331 			}
332 
333 			if (!do_promote) {
334 				inet_hash_remove(ifa);
335 				*ifap1 = ifa->ifa_next;
336 
337 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
338 				blocking_notifier_call_chain(&inetaddr_chain,
339 						NETDEV_DOWN, ifa);
340 				inet_free_ifa(ifa);
341 			} else {
342 				promote = ifa;
343 				break;
344 			}
345 		}
346 	}
347 
348 	/* 2. Unlink it */
349 
350 	*ifap = ifa1->ifa_next;
351 	inet_hash_remove(ifa1);
352 
353 	/* 3. Announce address deletion */
354 
355 	/* Send message first, then call notifier.
356 	   At first sight, FIB update triggered by notifier
357 	   will refer to already deleted ifaddr, that could confuse
358 	   netlink listeners. It is not true: look, gated sees
359 	   that route deleted and if it still thinks that ifaddr
360 	   is valid, it will try to restore deleted routes... Grr.
361 	   So that, this order is correct.
362 	 */
363 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
364 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
365 
366 	if (promote) {
367 
368 		if (prev_prom) {
369 			prev_prom->ifa_next = promote->ifa_next;
370 			promote->ifa_next = last_prim->ifa_next;
371 			last_prim->ifa_next = promote;
372 		}
373 
374 		promote->ifa_flags &= ~IFA_F_SECONDARY;
375 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
376 		blocking_notifier_call_chain(&inetaddr_chain,
377 				NETDEV_UP, promote);
378 		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
379 			if (ifa1->ifa_mask != ifa->ifa_mask ||
380 			    !inet_ifa_match(ifa1->ifa_address, ifa))
381 					continue;
382 			fib_add_ifaddr(ifa);
383 		}
384 
385 	}
386 	if (destroy)
387 		inet_free_ifa(ifa1);
388 }
389 
390 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
391 			 int destroy)
392 {
393 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
394 }
395 
396 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
397 			     u32 pid)
398 {
399 	struct in_device *in_dev = ifa->ifa_dev;
400 	struct in_ifaddr *ifa1, **ifap, **last_primary;
401 
402 	ASSERT_RTNL();
403 
404 	if (!ifa->ifa_local) {
405 		inet_free_ifa(ifa);
406 		return 0;
407 	}
408 
409 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
410 	last_primary = &in_dev->ifa_list;
411 
412 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
413 	     ifap = &ifa1->ifa_next) {
414 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
415 		    ifa->ifa_scope <= ifa1->ifa_scope)
416 			last_primary = &ifa1->ifa_next;
417 		if (ifa1->ifa_mask == ifa->ifa_mask &&
418 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
419 			if (ifa1->ifa_local == ifa->ifa_local) {
420 				inet_free_ifa(ifa);
421 				return -EEXIST;
422 			}
423 			if (ifa1->ifa_scope != ifa->ifa_scope) {
424 				inet_free_ifa(ifa);
425 				return -EINVAL;
426 			}
427 			ifa->ifa_flags |= IFA_F_SECONDARY;
428 		}
429 	}
430 
431 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
432 		net_srandom(ifa->ifa_local);
433 		ifap = last_primary;
434 	}
435 
436 	ifa->ifa_next = *ifap;
437 	*ifap = ifa;
438 
439 	inet_hash_insert(dev_net(in_dev->dev), ifa);
440 
441 	/* Send message first, then call notifier.
442 	   Notifier will trigger FIB update, so that
443 	   listeners of netlink will know about new ifaddr */
444 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
445 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
446 
447 	return 0;
448 }
449 
450 static int inet_insert_ifa(struct in_ifaddr *ifa)
451 {
452 	return __inet_insert_ifa(ifa, NULL, 0);
453 }
454 
455 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
456 {
457 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
458 
459 	ASSERT_RTNL();
460 
461 	if (!in_dev) {
462 		inet_free_ifa(ifa);
463 		return -ENOBUFS;
464 	}
465 	ipv4_devconf_setall(in_dev);
466 	if (ifa->ifa_dev != in_dev) {
467 		WARN_ON(ifa->ifa_dev);
468 		in_dev_hold(in_dev);
469 		ifa->ifa_dev = in_dev;
470 	}
471 	if (ipv4_is_loopback(ifa->ifa_local))
472 		ifa->ifa_scope = RT_SCOPE_HOST;
473 	return inet_insert_ifa(ifa);
474 }
475 
476 /* Caller must hold RCU or RTNL :
477  * We dont take a reference on found in_device
478  */
479 struct in_device *inetdev_by_index(struct net *net, int ifindex)
480 {
481 	struct net_device *dev;
482 	struct in_device *in_dev = NULL;
483 
484 	rcu_read_lock();
485 	dev = dev_get_by_index_rcu(net, ifindex);
486 	if (dev)
487 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
488 	rcu_read_unlock();
489 	return in_dev;
490 }
491 EXPORT_SYMBOL(inetdev_by_index);
492 
493 /* Called only from RTNL semaphored context. No locks. */
494 
495 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
496 				    __be32 mask)
497 {
498 	ASSERT_RTNL();
499 
500 	for_primary_ifa(in_dev) {
501 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
502 			return ifa;
503 	} endfor_ifa(in_dev);
504 	return NULL;
505 }
506 
507 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
508 {
509 	struct net *net = sock_net(skb->sk);
510 	struct nlattr *tb[IFA_MAX+1];
511 	struct in_device *in_dev;
512 	struct ifaddrmsg *ifm;
513 	struct in_ifaddr *ifa, **ifap;
514 	int err = -EINVAL;
515 
516 	ASSERT_RTNL();
517 
518 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
519 	if (err < 0)
520 		goto errout;
521 
522 	ifm = nlmsg_data(nlh);
523 	in_dev = inetdev_by_index(net, ifm->ifa_index);
524 	if (in_dev == NULL) {
525 		err = -ENODEV;
526 		goto errout;
527 	}
528 
529 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
530 	     ifap = &ifa->ifa_next) {
531 		if (tb[IFA_LOCAL] &&
532 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
533 			continue;
534 
535 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
536 			continue;
537 
538 		if (tb[IFA_ADDRESS] &&
539 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
540 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
541 			continue;
542 
543 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
544 		return 0;
545 	}
546 
547 	err = -EADDRNOTAVAIL;
548 errout:
549 	return err;
550 }
551 
552 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
553 {
554 	struct nlattr *tb[IFA_MAX+1];
555 	struct in_ifaddr *ifa;
556 	struct ifaddrmsg *ifm;
557 	struct net_device *dev;
558 	struct in_device *in_dev;
559 	int err;
560 
561 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
562 	if (err < 0)
563 		goto errout;
564 
565 	ifm = nlmsg_data(nlh);
566 	err = -EINVAL;
567 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
568 		goto errout;
569 
570 	dev = __dev_get_by_index(net, ifm->ifa_index);
571 	err = -ENODEV;
572 	if (dev == NULL)
573 		goto errout;
574 
575 	in_dev = __in_dev_get_rtnl(dev);
576 	err = -ENOBUFS;
577 	if (in_dev == NULL)
578 		goto errout;
579 
580 	ifa = inet_alloc_ifa();
581 	if (ifa == NULL)
582 		/*
583 		 * A potential indev allocation can be left alive, it stays
584 		 * assigned to its device and is destroy with it.
585 		 */
586 		goto errout;
587 
588 	ipv4_devconf_setall(in_dev);
589 	in_dev_hold(in_dev);
590 
591 	if (tb[IFA_ADDRESS] == NULL)
592 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
593 
594 	INIT_HLIST_NODE(&ifa->hash);
595 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
596 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
597 	ifa->ifa_flags = ifm->ifa_flags;
598 	ifa->ifa_scope = ifm->ifa_scope;
599 	ifa->ifa_dev = in_dev;
600 
601 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
602 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
603 
604 	if (tb[IFA_BROADCAST])
605 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
606 
607 	if (tb[IFA_LABEL])
608 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
609 	else
610 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
611 
612 	return ifa;
613 
614 errout:
615 	return ERR_PTR(err);
616 }
617 
618 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
619 {
620 	struct net *net = sock_net(skb->sk);
621 	struct in_ifaddr *ifa;
622 
623 	ASSERT_RTNL();
624 
625 	ifa = rtm_to_ifaddr(net, nlh);
626 	if (IS_ERR(ifa))
627 		return PTR_ERR(ifa);
628 
629 	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
630 }
631 
632 /*
633  *	Determine a default network mask, based on the IP address.
634  */
635 
636 static inline int inet_abc_len(__be32 addr)
637 {
638 	int rc = -1;	/* Something else, probably a multicast. */
639 
640 	if (ipv4_is_zeronet(addr))
641 		rc = 0;
642 	else {
643 		__u32 haddr = ntohl(addr);
644 
645 		if (IN_CLASSA(haddr))
646 			rc = 8;
647 		else if (IN_CLASSB(haddr))
648 			rc = 16;
649 		else if (IN_CLASSC(haddr))
650 			rc = 24;
651 	}
652 
653 	return rc;
654 }
655 
656 
657 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
658 {
659 	struct ifreq ifr;
660 	struct sockaddr_in sin_orig;
661 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
662 	struct in_device *in_dev;
663 	struct in_ifaddr **ifap = NULL;
664 	struct in_ifaddr *ifa = NULL;
665 	struct net_device *dev;
666 	char *colon;
667 	int ret = -EFAULT;
668 	int tryaddrmatch = 0;
669 
670 	/*
671 	 *	Fetch the caller's info block into kernel space
672 	 */
673 
674 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
675 		goto out;
676 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
677 
678 	/* save original address for comparison */
679 	memcpy(&sin_orig, sin, sizeof(*sin));
680 
681 	colon = strchr(ifr.ifr_name, ':');
682 	if (colon)
683 		*colon = 0;
684 
685 	dev_load(net, ifr.ifr_name);
686 
687 	switch (cmd) {
688 	case SIOCGIFADDR:	/* Get interface address */
689 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
690 	case SIOCGIFDSTADDR:	/* Get the destination address */
691 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
692 		/* Note that these ioctls will not sleep,
693 		   so that we do not impose a lock.
694 		   One day we will be forced to put shlock here (I mean SMP)
695 		 */
696 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
697 		memset(sin, 0, sizeof(*sin));
698 		sin->sin_family = AF_INET;
699 		break;
700 
701 	case SIOCSIFFLAGS:
702 		ret = -EACCES;
703 		if (!capable(CAP_NET_ADMIN))
704 			goto out;
705 		break;
706 	case SIOCSIFADDR:	/* Set interface address (and family) */
707 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
708 	case SIOCSIFDSTADDR:	/* Set the destination address */
709 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
710 		ret = -EACCES;
711 		if (!capable(CAP_NET_ADMIN))
712 			goto out;
713 		ret = -EINVAL;
714 		if (sin->sin_family != AF_INET)
715 			goto out;
716 		break;
717 	default:
718 		ret = -EINVAL;
719 		goto out;
720 	}
721 
722 	rtnl_lock();
723 
724 	ret = -ENODEV;
725 	dev = __dev_get_by_name(net, ifr.ifr_name);
726 	if (!dev)
727 		goto done;
728 
729 	if (colon)
730 		*colon = ':';
731 
732 	in_dev = __in_dev_get_rtnl(dev);
733 	if (in_dev) {
734 		if (tryaddrmatch) {
735 			/* Matthias Andree */
736 			/* compare label and address (4.4BSD style) */
737 			/* note: we only do this for a limited set of ioctls
738 			   and only if the original address family was AF_INET.
739 			   This is checked above. */
740 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
741 			     ifap = &ifa->ifa_next) {
742 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
743 				    sin_orig.sin_addr.s_addr ==
744 							ifa->ifa_local) {
745 					break; /* found */
746 				}
747 			}
748 		}
749 		/* we didn't get a match, maybe the application is
750 		   4.3BSD-style and passed in junk so we fall back to
751 		   comparing just the label */
752 		if (!ifa) {
753 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
754 			     ifap = &ifa->ifa_next)
755 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
756 					break;
757 		}
758 	}
759 
760 	ret = -EADDRNOTAVAIL;
761 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
762 		goto done;
763 
764 	switch (cmd) {
765 	case SIOCGIFADDR:	/* Get interface address */
766 		sin->sin_addr.s_addr = ifa->ifa_local;
767 		goto rarok;
768 
769 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
770 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
771 		goto rarok;
772 
773 	case SIOCGIFDSTADDR:	/* Get the destination address */
774 		sin->sin_addr.s_addr = ifa->ifa_address;
775 		goto rarok;
776 
777 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
778 		sin->sin_addr.s_addr = ifa->ifa_mask;
779 		goto rarok;
780 
781 	case SIOCSIFFLAGS:
782 		if (colon) {
783 			ret = -EADDRNOTAVAIL;
784 			if (!ifa)
785 				break;
786 			ret = 0;
787 			if (!(ifr.ifr_flags & IFF_UP))
788 				inet_del_ifa(in_dev, ifap, 1);
789 			break;
790 		}
791 		ret = dev_change_flags(dev, ifr.ifr_flags);
792 		break;
793 
794 	case SIOCSIFADDR:	/* Set interface address (and family) */
795 		ret = -EINVAL;
796 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
797 			break;
798 
799 		if (!ifa) {
800 			ret = -ENOBUFS;
801 			ifa = inet_alloc_ifa();
802 			INIT_HLIST_NODE(&ifa->hash);
803 			if (!ifa)
804 				break;
805 			if (colon)
806 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
807 			else
808 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
809 		} else {
810 			ret = 0;
811 			if (ifa->ifa_local == sin->sin_addr.s_addr)
812 				break;
813 			inet_del_ifa(in_dev, ifap, 0);
814 			ifa->ifa_broadcast = 0;
815 			ifa->ifa_scope = 0;
816 		}
817 
818 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
819 
820 		if (!(dev->flags & IFF_POINTOPOINT)) {
821 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
822 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
823 			if ((dev->flags & IFF_BROADCAST) &&
824 			    ifa->ifa_prefixlen < 31)
825 				ifa->ifa_broadcast = ifa->ifa_address |
826 						     ~ifa->ifa_mask;
827 		} else {
828 			ifa->ifa_prefixlen = 32;
829 			ifa->ifa_mask = inet_make_mask(32);
830 		}
831 		ret = inet_set_ifa(dev, ifa);
832 		break;
833 
834 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
835 		ret = 0;
836 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
837 			inet_del_ifa(in_dev, ifap, 0);
838 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
839 			inet_insert_ifa(ifa);
840 		}
841 		break;
842 
843 	case SIOCSIFDSTADDR:	/* Set the destination address */
844 		ret = 0;
845 		if (ifa->ifa_address == sin->sin_addr.s_addr)
846 			break;
847 		ret = -EINVAL;
848 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
849 			break;
850 		ret = 0;
851 		inet_del_ifa(in_dev, ifap, 0);
852 		ifa->ifa_address = sin->sin_addr.s_addr;
853 		inet_insert_ifa(ifa);
854 		break;
855 
856 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
857 
858 		/*
859 		 *	The mask we set must be legal.
860 		 */
861 		ret = -EINVAL;
862 		if (bad_mask(sin->sin_addr.s_addr, 0))
863 			break;
864 		ret = 0;
865 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
866 			__be32 old_mask = ifa->ifa_mask;
867 			inet_del_ifa(in_dev, ifap, 0);
868 			ifa->ifa_mask = sin->sin_addr.s_addr;
869 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
870 
871 			/* See if current broadcast address matches
872 			 * with current netmask, then recalculate
873 			 * the broadcast address. Otherwise it's a
874 			 * funny address, so don't touch it since
875 			 * the user seems to know what (s)he's doing...
876 			 */
877 			if ((dev->flags & IFF_BROADCAST) &&
878 			    (ifa->ifa_prefixlen < 31) &&
879 			    (ifa->ifa_broadcast ==
880 			     (ifa->ifa_local|~old_mask))) {
881 				ifa->ifa_broadcast = (ifa->ifa_local |
882 						      ~sin->sin_addr.s_addr);
883 			}
884 			inet_insert_ifa(ifa);
885 		}
886 		break;
887 	}
888 done:
889 	rtnl_unlock();
890 out:
891 	return ret;
892 rarok:
893 	rtnl_unlock();
894 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
895 	goto out;
896 }
897 
898 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
899 {
900 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
901 	struct in_ifaddr *ifa;
902 	struct ifreq ifr;
903 	int done = 0;
904 
905 	if (!in_dev)
906 		goto out;
907 
908 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
909 		if (!buf) {
910 			done += sizeof(ifr);
911 			continue;
912 		}
913 		if (len < (int) sizeof(ifr))
914 			break;
915 		memset(&ifr, 0, sizeof(struct ifreq));
916 		if (ifa->ifa_label)
917 			strcpy(ifr.ifr_name, ifa->ifa_label);
918 		else
919 			strcpy(ifr.ifr_name, dev->name);
920 
921 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
922 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
923 								ifa->ifa_local;
924 
925 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
926 			done = -EFAULT;
927 			break;
928 		}
929 		buf  += sizeof(struct ifreq);
930 		len  -= sizeof(struct ifreq);
931 		done += sizeof(struct ifreq);
932 	}
933 out:
934 	return done;
935 }
936 
937 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
938 {
939 	__be32 addr = 0;
940 	struct in_device *in_dev;
941 	struct net *net = dev_net(dev);
942 
943 	rcu_read_lock();
944 	in_dev = __in_dev_get_rcu(dev);
945 	if (!in_dev)
946 		goto no_in_dev;
947 
948 	for_primary_ifa(in_dev) {
949 		if (ifa->ifa_scope > scope)
950 			continue;
951 		if (!dst || inet_ifa_match(dst, ifa)) {
952 			addr = ifa->ifa_local;
953 			break;
954 		}
955 		if (!addr)
956 			addr = ifa->ifa_local;
957 	} endfor_ifa(in_dev);
958 
959 	if (addr)
960 		goto out_unlock;
961 no_in_dev:
962 
963 	/* Not loopback addresses on loopback should be preferred
964 	   in this case. It is importnat that lo is the first interface
965 	   in dev_base list.
966 	 */
967 	for_each_netdev_rcu(net, dev) {
968 		in_dev = __in_dev_get_rcu(dev);
969 		if (!in_dev)
970 			continue;
971 
972 		for_primary_ifa(in_dev) {
973 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
974 			    ifa->ifa_scope <= scope) {
975 				addr = ifa->ifa_local;
976 				goto out_unlock;
977 			}
978 		} endfor_ifa(in_dev);
979 	}
980 out_unlock:
981 	rcu_read_unlock();
982 	return addr;
983 }
984 EXPORT_SYMBOL(inet_select_addr);
985 
986 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
987 			      __be32 local, int scope)
988 {
989 	int same = 0;
990 	__be32 addr = 0;
991 
992 	for_ifa(in_dev) {
993 		if (!addr &&
994 		    (local == ifa->ifa_local || !local) &&
995 		    ifa->ifa_scope <= scope) {
996 			addr = ifa->ifa_local;
997 			if (same)
998 				break;
999 		}
1000 		if (!same) {
1001 			same = (!local || inet_ifa_match(local, ifa)) &&
1002 				(!dst || inet_ifa_match(dst, ifa));
1003 			if (same && addr) {
1004 				if (local || !dst)
1005 					break;
1006 				/* Is the selected addr into dst subnet? */
1007 				if (inet_ifa_match(addr, ifa))
1008 					break;
1009 				/* No, then can we use new local src? */
1010 				if (ifa->ifa_scope <= scope) {
1011 					addr = ifa->ifa_local;
1012 					break;
1013 				}
1014 				/* search for large dst subnet for addr */
1015 				same = 0;
1016 			}
1017 		}
1018 	} endfor_ifa(in_dev);
1019 
1020 	return same ? addr : 0;
1021 }
1022 
1023 /*
1024  * Confirm that local IP address exists using wildcards:
1025  * - in_dev: only on this interface, 0=any interface
1026  * - dst: only in the same subnet as dst, 0=any dst
1027  * - local: address, 0=autoselect the local address
1028  * - scope: maximum allowed scope value for the local address
1029  */
1030 __be32 inet_confirm_addr(struct in_device *in_dev,
1031 			 __be32 dst, __be32 local, int scope)
1032 {
1033 	__be32 addr = 0;
1034 	struct net_device *dev;
1035 	struct net *net;
1036 
1037 	if (scope != RT_SCOPE_LINK)
1038 		return confirm_addr_indev(in_dev, dst, local, scope);
1039 
1040 	net = dev_net(in_dev->dev);
1041 	rcu_read_lock();
1042 	for_each_netdev_rcu(net, dev) {
1043 		in_dev = __in_dev_get_rcu(dev);
1044 		if (in_dev) {
1045 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1046 			if (addr)
1047 				break;
1048 		}
1049 	}
1050 	rcu_read_unlock();
1051 
1052 	return addr;
1053 }
1054 
1055 /*
1056  *	Device notifier
1057  */
1058 
1059 int register_inetaddr_notifier(struct notifier_block *nb)
1060 {
1061 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1062 }
1063 EXPORT_SYMBOL(register_inetaddr_notifier);
1064 
1065 int unregister_inetaddr_notifier(struct notifier_block *nb)
1066 {
1067 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1068 }
1069 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1070 
1071 /* Rename ifa_labels for a device name change. Make some effort to preserve
1072  * existing alias numbering and to create unique labels if possible.
1073 */
1074 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1075 {
1076 	struct in_ifaddr *ifa;
1077 	int named = 0;
1078 
1079 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1080 		char old[IFNAMSIZ], *dot;
1081 
1082 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1083 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1084 		if (named++ == 0)
1085 			goto skip;
1086 		dot = strchr(old, ':');
1087 		if (dot == NULL) {
1088 			sprintf(old, ":%d", named);
1089 			dot = old;
1090 		}
1091 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1092 			strcat(ifa->ifa_label, dot);
1093 		else
1094 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1095 skip:
1096 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1097 	}
1098 }
1099 
1100 static inline bool inetdev_valid_mtu(unsigned mtu)
1101 {
1102 	return mtu >= 68;
1103 }
1104 
1105 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1106 					struct in_device *in_dev)
1107 
1108 {
1109 	struct in_ifaddr *ifa = in_dev->ifa_list;
1110 
1111 	if (!ifa)
1112 		return;
1113 
1114 	arp_send(ARPOP_REQUEST, ETH_P_ARP,
1115 		 ifa->ifa_local, dev,
1116 		 ifa->ifa_local, NULL,
1117 		 dev->dev_addr, NULL);
1118 }
1119 
1120 /* Called only under RTNL semaphore */
1121 
1122 static int inetdev_event(struct notifier_block *this, unsigned long event,
1123 			 void *ptr)
1124 {
1125 	struct net_device *dev = ptr;
1126 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1127 
1128 	ASSERT_RTNL();
1129 
1130 	if (!in_dev) {
1131 		if (event == NETDEV_REGISTER) {
1132 			in_dev = inetdev_init(dev);
1133 			if (!in_dev)
1134 				return notifier_from_errno(-ENOMEM);
1135 			if (dev->flags & IFF_LOOPBACK) {
1136 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1137 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1138 			}
1139 		} else if (event == NETDEV_CHANGEMTU) {
1140 			/* Re-enabling IP */
1141 			if (inetdev_valid_mtu(dev->mtu))
1142 				in_dev = inetdev_init(dev);
1143 		}
1144 		goto out;
1145 	}
1146 
1147 	switch (event) {
1148 	case NETDEV_REGISTER:
1149 		printk(KERN_DEBUG "inetdev_event: bug\n");
1150 		rcu_assign_pointer(dev->ip_ptr, NULL);
1151 		break;
1152 	case NETDEV_UP:
1153 		if (!inetdev_valid_mtu(dev->mtu))
1154 			break;
1155 		if (dev->flags & IFF_LOOPBACK) {
1156 			struct in_ifaddr *ifa = inet_alloc_ifa();
1157 
1158 			if (ifa) {
1159 				INIT_HLIST_NODE(&ifa->hash);
1160 				ifa->ifa_local =
1161 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1162 				ifa->ifa_prefixlen = 8;
1163 				ifa->ifa_mask = inet_make_mask(8);
1164 				in_dev_hold(in_dev);
1165 				ifa->ifa_dev = in_dev;
1166 				ifa->ifa_scope = RT_SCOPE_HOST;
1167 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1168 				inet_insert_ifa(ifa);
1169 			}
1170 		}
1171 		ip_mc_up(in_dev);
1172 		/* fall through */
1173 	case NETDEV_CHANGEADDR:
1174 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1175 			break;
1176 		/* fall through */
1177 	case NETDEV_NOTIFY_PEERS:
1178 		/* Send gratuitous ARP to notify of link change */
1179 		inetdev_send_gratuitous_arp(dev, in_dev);
1180 		break;
1181 	case NETDEV_DOWN:
1182 		ip_mc_down(in_dev);
1183 		break;
1184 	case NETDEV_PRE_TYPE_CHANGE:
1185 		ip_mc_unmap(in_dev);
1186 		break;
1187 	case NETDEV_POST_TYPE_CHANGE:
1188 		ip_mc_remap(in_dev);
1189 		break;
1190 	case NETDEV_CHANGEMTU:
1191 		if (inetdev_valid_mtu(dev->mtu))
1192 			break;
1193 		/* disable IP when MTU is not enough */
1194 	case NETDEV_UNREGISTER:
1195 		inetdev_destroy(in_dev);
1196 		break;
1197 	case NETDEV_CHANGENAME:
1198 		/* Do not notify about label change, this event is
1199 		 * not interesting to applications using netlink.
1200 		 */
1201 		inetdev_changename(dev, in_dev);
1202 
1203 		devinet_sysctl_unregister(in_dev);
1204 		devinet_sysctl_register(in_dev);
1205 		break;
1206 	}
1207 out:
1208 	return NOTIFY_DONE;
1209 }
1210 
1211 static struct notifier_block ip_netdev_notifier = {
1212 	.notifier_call = inetdev_event,
1213 };
1214 
1215 static inline size_t inet_nlmsg_size(void)
1216 {
1217 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1218 	       + nla_total_size(4) /* IFA_ADDRESS */
1219 	       + nla_total_size(4) /* IFA_LOCAL */
1220 	       + nla_total_size(4) /* IFA_BROADCAST */
1221 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1222 }
1223 
1224 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1225 			    u32 pid, u32 seq, int event, unsigned int flags)
1226 {
1227 	struct ifaddrmsg *ifm;
1228 	struct nlmsghdr  *nlh;
1229 
1230 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1231 	if (nlh == NULL)
1232 		return -EMSGSIZE;
1233 
1234 	ifm = nlmsg_data(nlh);
1235 	ifm->ifa_family = AF_INET;
1236 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1237 	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1238 	ifm->ifa_scope = ifa->ifa_scope;
1239 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1240 
1241 	if (ifa->ifa_address)
1242 		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1243 
1244 	if (ifa->ifa_local)
1245 		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1246 
1247 	if (ifa->ifa_broadcast)
1248 		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1249 
1250 	if (ifa->ifa_label[0])
1251 		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1252 
1253 	return nlmsg_end(skb, nlh);
1254 
1255 nla_put_failure:
1256 	nlmsg_cancel(skb, nlh);
1257 	return -EMSGSIZE;
1258 }
1259 
1260 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1261 {
1262 	struct net *net = sock_net(skb->sk);
1263 	int h, s_h;
1264 	int idx, s_idx;
1265 	int ip_idx, s_ip_idx;
1266 	struct net_device *dev;
1267 	struct in_device *in_dev;
1268 	struct in_ifaddr *ifa;
1269 	struct hlist_head *head;
1270 	struct hlist_node *node;
1271 
1272 	s_h = cb->args[0];
1273 	s_idx = idx = cb->args[1];
1274 	s_ip_idx = ip_idx = cb->args[2];
1275 
1276 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1277 		idx = 0;
1278 		head = &net->dev_index_head[h];
1279 		rcu_read_lock();
1280 		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1281 			if (idx < s_idx)
1282 				goto cont;
1283 			if (h > s_h || idx > s_idx)
1284 				s_ip_idx = 0;
1285 			in_dev = __in_dev_get_rcu(dev);
1286 			if (!in_dev)
1287 				goto cont;
1288 
1289 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1290 			     ifa = ifa->ifa_next, ip_idx++) {
1291 				if (ip_idx < s_ip_idx)
1292 					continue;
1293 				if (inet_fill_ifaddr(skb, ifa,
1294 					     NETLINK_CB(cb->skb).pid,
1295 					     cb->nlh->nlmsg_seq,
1296 					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1297 					rcu_read_unlock();
1298 					goto done;
1299 				}
1300 			}
1301 cont:
1302 			idx++;
1303 		}
1304 		rcu_read_unlock();
1305 	}
1306 
1307 done:
1308 	cb->args[0] = h;
1309 	cb->args[1] = idx;
1310 	cb->args[2] = ip_idx;
1311 
1312 	return skb->len;
1313 }
1314 
1315 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1316 		      u32 pid)
1317 {
1318 	struct sk_buff *skb;
1319 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1320 	int err = -ENOBUFS;
1321 	struct net *net;
1322 
1323 	net = dev_net(ifa->ifa_dev->dev);
1324 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1325 	if (skb == NULL)
1326 		goto errout;
1327 
1328 	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1329 	if (err < 0) {
1330 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1331 		WARN_ON(err == -EMSGSIZE);
1332 		kfree_skb(skb);
1333 		goto errout;
1334 	}
1335 	rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1336 	return;
1337 errout:
1338 	if (err < 0)
1339 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1340 }
1341 
1342 static size_t inet_get_link_af_size(const struct net_device *dev)
1343 {
1344 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1345 
1346 	if (!in_dev)
1347 		return 0;
1348 
1349 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1350 }
1351 
1352 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1353 {
1354 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1355 	struct nlattr *nla;
1356 	int i;
1357 
1358 	if (!in_dev)
1359 		return -ENODATA;
1360 
1361 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1362 	if (nla == NULL)
1363 		return -EMSGSIZE;
1364 
1365 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1366 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1367 
1368 	return 0;
1369 }
1370 
1371 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1372 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1373 };
1374 
1375 static int inet_validate_link_af(const struct net_device *dev,
1376 				 const struct nlattr *nla)
1377 {
1378 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1379 	int err, rem;
1380 
1381 	if (dev && !__in_dev_get_rtnl(dev))
1382 		return -EAFNOSUPPORT;
1383 
1384 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1385 	if (err < 0)
1386 		return err;
1387 
1388 	if (tb[IFLA_INET_CONF]) {
1389 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1390 			int cfgid = nla_type(a);
1391 
1392 			if (nla_len(a) < 4)
1393 				return -EINVAL;
1394 
1395 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1396 				return -EINVAL;
1397 		}
1398 	}
1399 
1400 	return 0;
1401 }
1402 
1403 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1404 {
1405 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1406 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1407 	int rem;
1408 
1409 	if (!in_dev)
1410 		return -EAFNOSUPPORT;
1411 
1412 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1413 		BUG();
1414 
1415 	if (tb[IFLA_INET_CONF]) {
1416 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1417 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1418 	}
1419 
1420 	return 0;
1421 }
1422 
1423 #ifdef CONFIG_SYSCTL
1424 
1425 static void devinet_copy_dflt_conf(struct net *net, int i)
1426 {
1427 	struct net_device *dev;
1428 
1429 	rcu_read_lock();
1430 	for_each_netdev_rcu(net, dev) {
1431 		struct in_device *in_dev;
1432 
1433 		in_dev = __in_dev_get_rcu(dev);
1434 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1435 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1436 	}
1437 	rcu_read_unlock();
1438 }
1439 
1440 /* called with RTNL locked */
1441 static void inet_forward_change(struct net *net)
1442 {
1443 	struct net_device *dev;
1444 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1445 
1446 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1447 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1448 
1449 	for_each_netdev(net, dev) {
1450 		struct in_device *in_dev;
1451 		if (on)
1452 			dev_disable_lro(dev);
1453 		rcu_read_lock();
1454 		in_dev = __in_dev_get_rcu(dev);
1455 		if (in_dev)
1456 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1457 		rcu_read_unlock();
1458 	}
1459 }
1460 
1461 static int devinet_conf_proc(ctl_table *ctl, int write,
1462 			     void __user *buffer,
1463 			     size_t *lenp, loff_t *ppos)
1464 {
1465 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1466 
1467 	if (write) {
1468 		struct ipv4_devconf *cnf = ctl->extra1;
1469 		struct net *net = ctl->extra2;
1470 		int i = (int *)ctl->data - cnf->data;
1471 
1472 		set_bit(i, cnf->state);
1473 
1474 		if (cnf == net->ipv4.devconf_dflt)
1475 			devinet_copy_dflt_conf(net, i);
1476 	}
1477 
1478 	return ret;
1479 }
1480 
1481 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1482 				  void __user *buffer,
1483 				  size_t *lenp, loff_t *ppos)
1484 {
1485 	int *valp = ctl->data;
1486 	int val = *valp;
1487 	loff_t pos = *ppos;
1488 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1489 
1490 	if (write && *valp != val) {
1491 		struct net *net = ctl->extra2;
1492 
1493 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1494 			if (!rtnl_trylock()) {
1495 				/* Restore the original values before restarting */
1496 				*valp = val;
1497 				*ppos = pos;
1498 				return restart_syscall();
1499 			}
1500 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1501 				inet_forward_change(net);
1502 			} else if (*valp) {
1503 				struct ipv4_devconf *cnf = ctl->extra1;
1504 				struct in_device *idev =
1505 					container_of(cnf, struct in_device, cnf);
1506 				dev_disable_lro(idev->dev);
1507 			}
1508 			rtnl_unlock();
1509 			rt_cache_flush(net, 0);
1510 		}
1511 	}
1512 
1513 	return ret;
1514 }
1515 
1516 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1517 				void __user *buffer,
1518 				size_t *lenp, loff_t *ppos)
1519 {
1520 	int *valp = ctl->data;
1521 	int val = *valp;
1522 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1523 	struct net *net = ctl->extra2;
1524 
1525 	if (write && *valp != val)
1526 		rt_cache_flush(net, 0);
1527 
1528 	return ret;
1529 }
1530 
1531 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1532 	{ \
1533 		.procname	= name, \
1534 		.data		= ipv4_devconf.data + \
1535 				  IPV4_DEVCONF_ ## attr - 1, \
1536 		.maxlen		= sizeof(int), \
1537 		.mode		= mval, \
1538 		.proc_handler	= proc, \
1539 		.extra1		= &ipv4_devconf, \
1540 	}
1541 
1542 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1543 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1544 
1545 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1546 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1547 
1548 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1549 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1550 
1551 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1552 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1553 
1554 static struct devinet_sysctl_table {
1555 	struct ctl_table_header *sysctl_header;
1556 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1557 	char *dev_name;
1558 } devinet_sysctl = {
1559 	.devinet_vars = {
1560 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1561 					     devinet_sysctl_forward),
1562 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1563 
1564 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1565 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1566 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1567 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1568 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1569 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1570 					"accept_source_route"),
1571 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1572 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1573 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1574 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1575 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1576 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1577 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1578 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1579 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1580 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1581 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1582 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1583 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1584 
1585 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1586 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1587 		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1588 					      "force_igmp_version"),
1589 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1590 					      "promote_secondaries"),
1591 	},
1592 };
1593 
1594 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1595 					struct ipv4_devconf *p)
1596 {
1597 	int i;
1598 	struct devinet_sysctl_table *t;
1599 
1600 #define DEVINET_CTL_PATH_DEV	3
1601 
1602 	struct ctl_path devinet_ctl_path[] = {
1603 		{ .procname = "net",  },
1604 		{ .procname = "ipv4", },
1605 		{ .procname = "conf", },
1606 		{ /* to be set */ },
1607 		{ },
1608 	};
1609 
1610 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1611 	if (!t)
1612 		goto out;
1613 
1614 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1615 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1616 		t->devinet_vars[i].extra1 = p;
1617 		t->devinet_vars[i].extra2 = net;
1618 	}
1619 
1620 	/*
1621 	 * Make a copy of dev_name, because '.procname' is regarded as const
1622 	 * by sysctl and we wouldn't want anyone to change it under our feet
1623 	 * (see SIOCSIFNAME).
1624 	 */
1625 	t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1626 	if (!t->dev_name)
1627 		goto free;
1628 
1629 	devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1630 
1631 	t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1632 			t->devinet_vars);
1633 	if (!t->sysctl_header)
1634 		goto free_procname;
1635 
1636 	p->sysctl = t;
1637 	return 0;
1638 
1639 free_procname:
1640 	kfree(t->dev_name);
1641 free:
1642 	kfree(t);
1643 out:
1644 	return -ENOBUFS;
1645 }
1646 
1647 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1648 {
1649 	struct devinet_sysctl_table *t = cnf->sysctl;
1650 
1651 	if (t == NULL)
1652 		return;
1653 
1654 	cnf->sysctl = NULL;
1655 	unregister_sysctl_table(t->sysctl_header);
1656 	kfree(t->dev_name);
1657 	kfree(t);
1658 }
1659 
1660 static void devinet_sysctl_register(struct in_device *idev)
1661 {
1662 	neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1663 	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1664 					&idev->cnf);
1665 }
1666 
1667 static void devinet_sysctl_unregister(struct in_device *idev)
1668 {
1669 	__devinet_sysctl_unregister(&idev->cnf);
1670 	neigh_sysctl_unregister(idev->arp_parms);
1671 }
1672 
1673 static struct ctl_table ctl_forward_entry[] = {
1674 	{
1675 		.procname	= "ip_forward",
1676 		.data		= &ipv4_devconf.data[
1677 					IPV4_DEVCONF_FORWARDING - 1],
1678 		.maxlen		= sizeof(int),
1679 		.mode		= 0644,
1680 		.proc_handler	= devinet_sysctl_forward,
1681 		.extra1		= &ipv4_devconf,
1682 		.extra2		= &init_net,
1683 	},
1684 	{ },
1685 };
1686 
1687 static __net_initdata struct ctl_path net_ipv4_path[] = {
1688 	{ .procname = "net", },
1689 	{ .procname = "ipv4", },
1690 	{ },
1691 };
1692 #endif
1693 
1694 static __net_init int devinet_init_net(struct net *net)
1695 {
1696 	int err;
1697 	struct ipv4_devconf *all, *dflt;
1698 #ifdef CONFIG_SYSCTL
1699 	struct ctl_table *tbl = ctl_forward_entry;
1700 	struct ctl_table_header *forw_hdr;
1701 #endif
1702 
1703 	err = -ENOMEM;
1704 	all = &ipv4_devconf;
1705 	dflt = &ipv4_devconf_dflt;
1706 
1707 	if (!net_eq(net, &init_net)) {
1708 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1709 		if (all == NULL)
1710 			goto err_alloc_all;
1711 
1712 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1713 		if (dflt == NULL)
1714 			goto err_alloc_dflt;
1715 
1716 #ifdef CONFIG_SYSCTL
1717 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1718 		if (tbl == NULL)
1719 			goto err_alloc_ctl;
1720 
1721 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1722 		tbl[0].extra1 = all;
1723 		tbl[0].extra2 = net;
1724 #endif
1725 	}
1726 
1727 #ifdef CONFIG_SYSCTL
1728 	err = __devinet_sysctl_register(net, "all", all);
1729 	if (err < 0)
1730 		goto err_reg_all;
1731 
1732 	err = __devinet_sysctl_register(net, "default", dflt);
1733 	if (err < 0)
1734 		goto err_reg_dflt;
1735 
1736 	err = -ENOMEM;
1737 	forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1738 	if (forw_hdr == NULL)
1739 		goto err_reg_ctl;
1740 	net->ipv4.forw_hdr = forw_hdr;
1741 #endif
1742 
1743 	net->ipv4.devconf_all = all;
1744 	net->ipv4.devconf_dflt = dflt;
1745 	return 0;
1746 
1747 #ifdef CONFIG_SYSCTL
1748 err_reg_ctl:
1749 	__devinet_sysctl_unregister(dflt);
1750 err_reg_dflt:
1751 	__devinet_sysctl_unregister(all);
1752 err_reg_all:
1753 	if (tbl != ctl_forward_entry)
1754 		kfree(tbl);
1755 err_alloc_ctl:
1756 #endif
1757 	if (dflt != &ipv4_devconf_dflt)
1758 		kfree(dflt);
1759 err_alloc_dflt:
1760 	if (all != &ipv4_devconf)
1761 		kfree(all);
1762 err_alloc_all:
1763 	return err;
1764 }
1765 
1766 static __net_exit void devinet_exit_net(struct net *net)
1767 {
1768 #ifdef CONFIG_SYSCTL
1769 	struct ctl_table *tbl;
1770 
1771 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
1772 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
1773 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1774 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
1775 	kfree(tbl);
1776 #endif
1777 	kfree(net->ipv4.devconf_dflt);
1778 	kfree(net->ipv4.devconf_all);
1779 }
1780 
1781 static __net_initdata struct pernet_operations devinet_ops = {
1782 	.init = devinet_init_net,
1783 	.exit = devinet_exit_net,
1784 };
1785 
1786 static struct rtnl_af_ops inet_af_ops = {
1787 	.family		  = AF_INET,
1788 	.fill_link_af	  = inet_fill_link_af,
1789 	.get_link_af_size = inet_get_link_af_size,
1790 	.validate_link_af = inet_validate_link_af,
1791 	.set_link_af	  = inet_set_link_af,
1792 };
1793 
1794 void __init devinet_init(void)
1795 {
1796 	int i;
1797 
1798 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
1799 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
1800 
1801 	register_pernet_subsys(&devinet_ops);
1802 
1803 	register_gifconf(PF_INET, inet_gifconf);
1804 	register_netdevice_notifier(&ip_netdev_notifier);
1805 
1806 	rtnl_af_register(&inet_af_ops);
1807 
1808 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1809 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1810 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1811 }
1812 
1813