xref: /linux/net/ipv4/devinet.c (revision 9ffc93f203c18a70623f21950f1dd473c9ec48cd)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 
59 #include <net/arp.h>
60 #include <net/ip.h>
61 #include <net/route.h>
62 #include <net/ip_fib.h>
63 #include <net/rtnetlink.h>
64 #include <net/net_namespace.h>
65 
66 #include "fib_lookup.h"
67 
68 static struct ipv4_devconf ipv4_devconf = {
69 	.data = {
70 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
71 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
72 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
74 	},
75 };
76 
77 static struct ipv4_devconf ipv4_devconf_dflt = {
78 	.data = {
79 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
80 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
81 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
82 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
83 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
84 	},
85 };
86 
87 #define IPV4_DEVCONF_DFLT(net, attr) \
88 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
89 
90 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
91 	[IFA_LOCAL]     	= { .type = NLA_U32 },
92 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
93 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
94 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
95 };
96 
97 /* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE
98  * value.  So if you change this define, make appropriate changes to
99  * inet_addr_hash as well.
100  */
101 #define IN4_ADDR_HSIZE	256
102 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
103 static DEFINE_SPINLOCK(inet_addr_hash_lock);
104 
105 static inline unsigned int inet_addr_hash(struct net *net, __be32 addr)
106 {
107 	u32 val = (__force u32) addr ^ hash_ptr(net, 8);
108 
109 	return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) &
110 		(IN4_ADDR_HSIZE - 1));
111 }
112 
113 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
114 {
115 	unsigned int hash = inet_addr_hash(net, ifa->ifa_local);
116 
117 	spin_lock(&inet_addr_hash_lock);
118 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
119 	spin_unlock(&inet_addr_hash_lock);
120 }
121 
122 static void inet_hash_remove(struct in_ifaddr *ifa)
123 {
124 	spin_lock(&inet_addr_hash_lock);
125 	hlist_del_init_rcu(&ifa->hash);
126 	spin_unlock(&inet_addr_hash_lock);
127 }
128 
129 /**
130  * __ip_dev_find - find the first device with a given source address.
131  * @net: the net namespace
132  * @addr: the source address
133  * @devref: if true, take a reference on the found device
134  *
135  * If a caller uses devref=false, it should be protected by RCU, or RTNL
136  */
137 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
138 {
139 	unsigned int hash = inet_addr_hash(net, addr);
140 	struct net_device *result = NULL;
141 	struct in_ifaddr *ifa;
142 	struct hlist_node *node;
143 
144 	rcu_read_lock();
145 	hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
146 		struct net_device *dev = ifa->ifa_dev->dev;
147 
148 		if (!net_eq(dev_net(dev), net))
149 			continue;
150 		if (ifa->ifa_local == addr) {
151 			result = dev;
152 			break;
153 		}
154 	}
155 	if (!result) {
156 		struct flowi4 fl4 = { .daddr = addr };
157 		struct fib_result res = { 0 };
158 		struct fib_table *local;
159 
160 		/* Fallback to FIB local table so that communication
161 		 * over loopback subnets work.
162 		 */
163 		local = fib_get_table(net, RT_TABLE_LOCAL);
164 		if (local &&
165 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
166 		    res.type == RTN_LOCAL)
167 			result = FIB_RES_DEV(res);
168 	}
169 	if (result && devref)
170 		dev_hold(result);
171 	rcu_read_unlock();
172 	return result;
173 }
174 EXPORT_SYMBOL(__ip_dev_find);
175 
176 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
177 
178 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
179 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
180 			 int destroy);
181 #ifdef CONFIG_SYSCTL
182 static void devinet_sysctl_register(struct in_device *idev);
183 static void devinet_sysctl_unregister(struct in_device *idev);
184 #else
185 static inline void devinet_sysctl_register(struct in_device *idev)
186 {
187 }
188 static inline void devinet_sysctl_unregister(struct in_device *idev)
189 {
190 }
191 #endif
192 
193 /* Locks all the inet devices. */
194 
195 static struct in_ifaddr *inet_alloc_ifa(void)
196 {
197 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
198 }
199 
200 static void inet_rcu_free_ifa(struct rcu_head *head)
201 {
202 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
203 	if (ifa->ifa_dev)
204 		in_dev_put(ifa->ifa_dev);
205 	kfree(ifa);
206 }
207 
208 static inline void inet_free_ifa(struct in_ifaddr *ifa)
209 {
210 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
211 }
212 
213 void in_dev_finish_destroy(struct in_device *idev)
214 {
215 	struct net_device *dev = idev->dev;
216 
217 	WARN_ON(idev->ifa_list);
218 	WARN_ON(idev->mc_list);
219 #ifdef NET_REFCNT_DEBUG
220 	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
221 	       idev, dev ? dev->name : "NIL");
222 #endif
223 	dev_put(dev);
224 	if (!idev->dead)
225 		pr_err("Freeing alive in_device %p\n", idev);
226 	else
227 		kfree(idev);
228 }
229 EXPORT_SYMBOL(in_dev_finish_destroy);
230 
231 static struct in_device *inetdev_init(struct net_device *dev)
232 {
233 	struct in_device *in_dev;
234 
235 	ASSERT_RTNL();
236 
237 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
238 	if (!in_dev)
239 		goto out;
240 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
241 			sizeof(in_dev->cnf));
242 	in_dev->cnf.sysctl = NULL;
243 	in_dev->dev = dev;
244 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
245 	if (!in_dev->arp_parms)
246 		goto out_kfree;
247 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
248 		dev_disable_lro(dev);
249 	/* Reference in_dev->dev */
250 	dev_hold(dev);
251 	/* Account for reference dev->ip_ptr (below) */
252 	in_dev_hold(in_dev);
253 
254 	devinet_sysctl_register(in_dev);
255 	ip_mc_init_dev(in_dev);
256 	if (dev->flags & IFF_UP)
257 		ip_mc_up(in_dev);
258 
259 	/* we can receive as soon as ip_ptr is set -- do this last */
260 	rcu_assign_pointer(dev->ip_ptr, in_dev);
261 out:
262 	return in_dev;
263 out_kfree:
264 	kfree(in_dev);
265 	in_dev = NULL;
266 	goto out;
267 }
268 
269 static void in_dev_rcu_put(struct rcu_head *head)
270 {
271 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
272 	in_dev_put(idev);
273 }
274 
275 static void inetdev_destroy(struct in_device *in_dev)
276 {
277 	struct in_ifaddr *ifa;
278 	struct net_device *dev;
279 
280 	ASSERT_RTNL();
281 
282 	dev = in_dev->dev;
283 
284 	in_dev->dead = 1;
285 
286 	ip_mc_destroy_dev(in_dev);
287 
288 	while ((ifa = in_dev->ifa_list) != NULL) {
289 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
290 		inet_free_ifa(ifa);
291 	}
292 
293 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
294 
295 	devinet_sysctl_unregister(in_dev);
296 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
297 	arp_ifdown(dev);
298 
299 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
300 }
301 
302 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
303 {
304 	rcu_read_lock();
305 	for_primary_ifa(in_dev) {
306 		if (inet_ifa_match(a, ifa)) {
307 			if (!b || inet_ifa_match(b, ifa)) {
308 				rcu_read_unlock();
309 				return 1;
310 			}
311 		}
312 	} endfor_ifa(in_dev);
313 	rcu_read_unlock();
314 	return 0;
315 }
316 
317 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
318 			 int destroy, struct nlmsghdr *nlh, u32 pid)
319 {
320 	struct in_ifaddr *promote = NULL;
321 	struct in_ifaddr *ifa, *ifa1 = *ifap;
322 	struct in_ifaddr *last_prim = in_dev->ifa_list;
323 	struct in_ifaddr *prev_prom = NULL;
324 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
325 
326 	ASSERT_RTNL();
327 
328 	/* 1. Deleting primary ifaddr forces deletion all secondaries
329 	 * unless alias promotion is set
330 	 **/
331 
332 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
333 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
334 
335 		while ((ifa = *ifap1) != NULL) {
336 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
337 			    ifa1->ifa_scope <= ifa->ifa_scope)
338 				last_prim = ifa;
339 
340 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
341 			    ifa1->ifa_mask != ifa->ifa_mask ||
342 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
343 				ifap1 = &ifa->ifa_next;
344 				prev_prom = ifa;
345 				continue;
346 			}
347 
348 			if (!do_promote) {
349 				inet_hash_remove(ifa);
350 				*ifap1 = ifa->ifa_next;
351 
352 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
353 				blocking_notifier_call_chain(&inetaddr_chain,
354 						NETDEV_DOWN, ifa);
355 				inet_free_ifa(ifa);
356 			} else {
357 				promote = ifa;
358 				break;
359 			}
360 		}
361 	}
362 
363 	/* On promotion all secondaries from subnet are changing
364 	 * the primary IP, we must remove all their routes silently
365 	 * and later to add them back with new prefsrc. Do this
366 	 * while all addresses are on the device list.
367 	 */
368 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
369 		if (ifa1->ifa_mask == ifa->ifa_mask &&
370 		    inet_ifa_match(ifa1->ifa_address, ifa))
371 			fib_del_ifaddr(ifa, ifa1);
372 	}
373 
374 	/* 2. Unlink it */
375 
376 	*ifap = ifa1->ifa_next;
377 	inet_hash_remove(ifa1);
378 
379 	/* 3. Announce address deletion */
380 
381 	/* Send message first, then call notifier.
382 	   At first sight, FIB update triggered by notifier
383 	   will refer to already deleted ifaddr, that could confuse
384 	   netlink listeners. It is not true: look, gated sees
385 	   that route deleted and if it still thinks that ifaddr
386 	   is valid, it will try to restore deleted routes... Grr.
387 	   So that, this order is correct.
388 	 */
389 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
390 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
391 
392 	if (promote) {
393 		struct in_ifaddr *next_sec = promote->ifa_next;
394 
395 		if (prev_prom) {
396 			prev_prom->ifa_next = promote->ifa_next;
397 			promote->ifa_next = last_prim->ifa_next;
398 			last_prim->ifa_next = promote;
399 		}
400 
401 		promote->ifa_flags &= ~IFA_F_SECONDARY;
402 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
403 		blocking_notifier_call_chain(&inetaddr_chain,
404 				NETDEV_UP, promote);
405 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
406 			if (ifa1->ifa_mask != ifa->ifa_mask ||
407 			    !inet_ifa_match(ifa1->ifa_address, ifa))
408 					continue;
409 			fib_add_ifaddr(ifa);
410 		}
411 
412 	}
413 	if (destroy)
414 		inet_free_ifa(ifa1);
415 }
416 
417 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
418 			 int destroy)
419 {
420 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
421 }
422 
423 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
424 			     u32 pid)
425 {
426 	struct in_device *in_dev = ifa->ifa_dev;
427 	struct in_ifaddr *ifa1, **ifap, **last_primary;
428 
429 	ASSERT_RTNL();
430 
431 	if (!ifa->ifa_local) {
432 		inet_free_ifa(ifa);
433 		return 0;
434 	}
435 
436 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
437 	last_primary = &in_dev->ifa_list;
438 
439 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
440 	     ifap = &ifa1->ifa_next) {
441 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
442 		    ifa->ifa_scope <= ifa1->ifa_scope)
443 			last_primary = &ifa1->ifa_next;
444 		if (ifa1->ifa_mask == ifa->ifa_mask &&
445 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
446 			if (ifa1->ifa_local == ifa->ifa_local) {
447 				inet_free_ifa(ifa);
448 				return -EEXIST;
449 			}
450 			if (ifa1->ifa_scope != ifa->ifa_scope) {
451 				inet_free_ifa(ifa);
452 				return -EINVAL;
453 			}
454 			ifa->ifa_flags |= IFA_F_SECONDARY;
455 		}
456 	}
457 
458 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
459 		net_srandom(ifa->ifa_local);
460 		ifap = last_primary;
461 	}
462 
463 	ifa->ifa_next = *ifap;
464 	*ifap = ifa;
465 
466 	inet_hash_insert(dev_net(in_dev->dev), ifa);
467 
468 	/* Send message first, then call notifier.
469 	   Notifier will trigger FIB update, so that
470 	   listeners of netlink will know about new ifaddr */
471 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
472 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
473 
474 	return 0;
475 }
476 
477 static int inet_insert_ifa(struct in_ifaddr *ifa)
478 {
479 	return __inet_insert_ifa(ifa, NULL, 0);
480 }
481 
482 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
483 {
484 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
485 
486 	ASSERT_RTNL();
487 
488 	if (!in_dev) {
489 		inet_free_ifa(ifa);
490 		return -ENOBUFS;
491 	}
492 	ipv4_devconf_setall(in_dev);
493 	if (ifa->ifa_dev != in_dev) {
494 		WARN_ON(ifa->ifa_dev);
495 		in_dev_hold(in_dev);
496 		ifa->ifa_dev = in_dev;
497 	}
498 	if (ipv4_is_loopback(ifa->ifa_local))
499 		ifa->ifa_scope = RT_SCOPE_HOST;
500 	return inet_insert_ifa(ifa);
501 }
502 
503 /* Caller must hold RCU or RTNL :
504  * We dont take a reference on found in_device
505  */
506 struct in_device *inetdev_by_index(struct net *net, int ifindex)
507 {
508 	struct net_device *dev;
509 	struct in_device *in_dev = NULL;
510 
511 	rcu_read_lock();
512 	dev = dev_get_by_index_rcu(net, ifindex);
513 	if (dev)
514 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
515 	rcu_read_unlock();
516 	return in_dev;
517 }
518 EXPORT_SYMBOL(inetdev_by_index);
519 
520 /* Called only from RTNL semaphored context. No locks. */
521 
522 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
523 				    __be32 mask)
524 {
525 	ASSERT_RTNL();
526 
527 	for_primary_ifa(in_dev) {
528 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
529 			return ifa;
530 	} endfor_ifa(in_dev);
531 	return NULL;
532 }
533 
534 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
535 {
536 	struct net *net = sock_net(skb->sk);
537 	struct nlattr *tb[IFA_MAX+1];
538 	struct in_device *in_dev;
539 	struct ifaddrmsg *ifm;
540 	struct in_ifaddr *ifa, **ifap;
541 	int err = -EINVAL;
542 
543 	ASSERT_RTNL();
544 
545 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
546 	if (err < 0)
547 		goto errout;
548 
549 	ifm = nlmsg_data(nlh);
550 	in_dev = inetdev_by_index(net, ifm->ifa_index);
551 	if (in_dev == NULL) {
552 		err = -ENODEV;
553 		goto errout;
554 	}
555 
556 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
557 	     ifap = &ifa->ifa_next) {
558 		if (tb[IFA_LOCAL] &&
559 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
560 			continue;
561 
562 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
563 			continue;
564 
565 		if (tb[IFA_ADDRESS] &&
566 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
567 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
568 			continue;
569 
570 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
571 		return 0;
572 	}
573 
574 	err = -EADDRNOTAVAIL;
575 errout:
576 	return err;
577 }
578 
579 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
580 {
581 	struct nlattr *tb[IFA_MAX+1];
582 	struct in_ifaddr *ifa;
583 	struct ifaddrmsg *ifm;
584 	struct net_device *dev;
585 	struct in_device *in_dev;
586 	int err;
587 
588 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
589 	if (err < 0)
590 		goto errout;
591 
592 	ifm = nlmsg_data(nlh);
593 	err = -EINVAL;
594 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
595 		goto errout;
596 
597 	dev = __dev_get_by_index(net, ifm->ifa_index);
598 	err = -ENODEV;
599 	if (dev == NULL)
600 		goto errout;
601 
602 	in_dev = __in_dev_get_rtnl(dev);
603 	err = -ENOBUFS;
604 	if (in_dev == NULL)
605 		goto errout;
606 
607 	ifa = inet_alloc_ifa();
608 	if (ifa == NULL)
609 		/*
610 		 * A potential indev allocation can be left alive, it stays
611 		 * assigned to its device and is destroy with it.
612 		 */
613 		goto errout;
614 
615 	ipv4_devconf_setall(in_dev);
616 	in_dev_hold(in_dev);
617 
618 	if (tb[IFA_ADDRESS] == NULL)
619 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
620 
621 	INIT_HLIST_NODE(&ifa->hash);
622 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
623 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
624 	ifa->ifa_flags = ifm->ifa_flags;
625 	ifa->ifa_scope = ifm->ifa_scope;
626 	ifa->ifa_dev = in_dev;
627 
628 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
629 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
630 
631 	if (tb[IFA_BROADCAST])
632 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
633 
634 	if (tb[IFA_LABEL])
635 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
636 	else
637 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
638 
639 	return ifa;
640 
641 errout:
642 	return ERR_PTR(err);
643 }
644 
645 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
646 {
647 	struct net *net = sock_net(skb->sk);
648 	struct in_ifaddr *ifa;
649 
650 	ASSERT_RTNL();
651 
652 	ifa = rtm_to_ifaddr(net, nlh);
653 	if (IS_ERR(ifa))
654 		return PTR_ERR(ifa);
655 
656 	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
657 }
658 
659 /*
660  *	Determine a default network mask, based on the IP address.
661  */
662 
663 static inline int inet_abc_len(__be32 addr)
664 {
665 	int rc = -1;	/* Something else, probably a multicast. */
666 
667 	if (ipv4_is_zeronet(addr))
668 		rc = 0;
669 	else {
670 		__u32 haddr = ntohl(addr);
671 
672 		if (IN_CLASSA(haddr))
673 			rc = 8;
674 		else if (IN_CLASSB(haddr))
675 			rc = 16;
676 		else if (IN_CLASSC(haddr))
677 			rc = 24;
678 	}
679 
680 	return rc;
681 }
682 
683 
684 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
685 {
686 	struct ifreq ifr;
687 	struct sockaddr_in sin_orig;
688 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
689 	struct in_device *in_dev;
690 	struct in_ifaddr **ifap = NULL;
691 	struct in_ifaddr *ifa = NULL;
692 	struct net_device *dev;
693 	char *colon;
694 	int ret = -EFAULT;
695 	int tryaddrmatch = 0;
696 
697 	/*
698 	 *	Fetch the caller's info block into kernel space
699 	 */
700 
701 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
702 		goto out;
703 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
704 
705 	/* save original address for comparison */
706 	memcpy(&sin_orig, sin, sizeof(*sin));
707 
708 	colon = strchr(ifr.ifr_name, ':');
709 	if (colon)
710 		*colon = 0;
711 
712 	dev_load(net, ifr.ifr_name);
713 
714 	switch (cmd) {
715 	case SIOCGIFADDR:	/* Get interface address */
716 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
717 	case SIOCGIFDSTADDR:	/* Get the destination address */
718 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
719 		/* Note that these ioctls will not sleep,
720 		   so that we do not impose a lock.
721 		   One day we will be forced to put shlock here (I mean SMP)
722 		 */
723 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
724 		memset(sin, 0, sizeof(*sin));
725 		sin->sin_family = AF_INET;
726 		break;
727 
728 	case SIOCSIFFLAGS:
729 		ret = -EACCES;
730 		if (!capable(CAP_NET_ADMIN))
731 			goto out;
732 		break;
733 	case SIOCSIFADDR:	/* Set interface address (and family) */
734 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
735 	case SIOCSIFDSTADDR:	/* Set the destination address */
736 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
737 		ret = -EACCES;
738 		if (!capable(CAP_NET_ADMIN))
739 			goto out;
740 		ret = -EINVAL;
741 		if (sin->sin_family != AF_INET)
742 			goto out;
743 		break;
744 	default:
745 		ret = -EINVAL;
746 		goto out;
747 	}
748 
749 	rtnl_lock();
750 
751 	ret = -ENODEV;
752 	dev = __dev_get_by_name(net, ifr.ifr_name);
753 	if (!dev)
754 		goto done;
755 
756 	if (colon)
757 		*colon = ':';
758 
759 	in_dev = __in_dev_get_rtnl(dev);
760 	if (in_dev) {
761 		if (tryaddrmatch) {
762 			/* Matthias Andree */
763 			/* compare label and address (4.4BSD style) */
764 			/* note: we only do this for a limited set of ioctls
765 			   and only if the original address family was AF_INET.
766 			   This is checked above. */
767 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
768 			     ifap = &ifa->ifa_next) {
769 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
770 				    sin_orig.sin_addr.s_addr ==
771 							ifa->ifa_local) {
772 					break; /* found */
773 				}
774 			}
775 		}
776 		/* we didn't get a match, maybe the application is
777 		   4.3BSD-style and passed in junk so we fall back to
778 		   comparing just the label */
779 		if (!ifa) {
780 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
781 			     ifap = &ifa->ifa_next)
782 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
783 					break;
784 		}
785 	}
786 
787 	ret = -EADDRNOTAVAIL;
788 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
789 		goto done;
790 
791 	switch (cmd) {
792 	case SIOCGIFADDR:	/* Get interface address */
793 		sin->sin_addr.s_addr = ifa->ifa_local;
794 		goto rarok;
795 
796 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
797 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
798 		goto rarok;
799 
800 	case SIOCGIFDSTADDR:	/* Get the destination address */
801 		sin->sin_addr.s_addr = ifa->ifa_address;
802 		goto rarok;
803 
804 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
805 		sin->sin_addr.s_addr = ifa->ifa_mask;
806 		goto rarok;
807 
808 	case SIOCSIFFLAGS:
809 		if (colon) {
810 			ret = -EADDRNOTAVAIL;
811 			if (!ifa)
812 				break;
813 			ret = 0;
814 			if (!(ifr.ifr_flags & IFF_UP))
815 				inet_del_ifa(in_dev, ifap, 1);
816 			break;
817 		}
818 		ret = dev_change_flags(dev, ifr.ifr_flags);
819 		break;
820 
821 	case SIOCSIFADDR:	/* Set interface address (and family) */
822 		ret = -EINVAL;
823 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
824 			break;
825 
826 		if (!ifa) {
827 			ret = -ENOBUFS;
828 			ifa = inet_alloc_ifa();
829 			INIT_HLIST_NODE(&ifa->hash);
830 			if (!ifa)
831 				break;
832 			if (colon)
833 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
834 			else
835 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
836 		} else {
837 			ret = 0;
838 			if (ifa->ifa_local == sin->sin_addr.s_addr)
839 				break;
840 			inet_del_ifa(in_dev, ifap, 0);
841 			ifa->ifa_broadcast = 0;
842 			ifa->ifa_scope = 0;
843 		}
844 
845 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
846 
847 		if (!(dev->flags & IFF_POINTOPOINT)) {
848 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
849 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
850 			if ((dev->flags & IFF_BROADCAST) &&
851 			    ifa->ifa_prefixlen < 31)
852 				ifa->ifa_broadcast = ifa->ifa_address |
853 						     ~ifa->ifa_mask;
854 		} else {
855 			ifa->ifa_prefixlen = 32;
856 			ifa->ifa_mask = inet_make_mask(32);
857 		}
858 		ret = inet_set_ifa(dev, ifa);
859 		break;
860 
861 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
862 		ret = 0;
863 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
864 			inet_del_ifa(in_dev, ifap, 0);
865 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
866 			inet_insert_ifa(ifa);
867 		}
868 		break;
869 
870 	case SIOCSIFDSTADDR:	/* Set the destination address */
871 		ret = 0;
872 		if (ifa->ifa_address == sin->sin_addr.s_addr)
873 			break;
874 		ret = -EINVAL;
875 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
876 			break;
877 		ret = 0;
878 		inet_del_ifa(in_dev, ifap, 0);
879 		ifa->ifa_address = sin->sin_addr.s_addr;
880 		inet_insert_ifa(ifa);
881 		break;
882 
883 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
884 
885 		/*
886 		 *	The mask we set must be legal.
887 		 */
888 		ret = -EINVAL;
889 		if (bad_mask(sin->sin_addr.s_addr, 0))
890 			break;
891 		ret = 0;
892 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
893 			__be32 old_mask = ifa->ifa_mask;
894 			inet_del_ifa(in_dev, ifap, 0);
895 			ifa->ifa_mask = sin->sin_addr.s_addr;
896 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
897 
898 			/* See if current broadcast address matches
899 			 * with current netmask, then recalculate
900 			 * the broadcast address. Otherwise it's a
901 			 * funny address, so don't touch it since
902 			 * the user seems to know what (s)he's doing...
903 			 */
904 			if ((dev->flags & IFF_BROADCAST) &&
905 			    (ifa->ifa_prefixlen < 31) &&
906 			    (ifa->ifa_broadcast ==
907 			     (ifa->ifa_local|~old_mask))) {
908 				ifa->ifa_broadcast = (ifa->ifa_local |
909 						      ~sin->sin_addr.s_addr);
910 			}
911 			inet_insert_ifa(ifa);
912 		}
913 		break;
914 	}
915 done:
916 	rtnl_unlock();
917 out:
918 	return ret;
919 rarok:
920 	rtnl_unlock();
921 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
922 	goto out;
923 }
924 
925 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
926 {
927 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
928 	struct in_ifaddr *ifa;
929 	struct ifreq ifr;
930 	int done = 0;
931 
932 	if (!in_dev)
933 		goto out;
934 
935 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
936 		if (!buf) {
937 			done += sizeof(ifr);
938 			continue;
939 		}
940 		if (len < (int) sizeof(ifr))
941 			break;
942 		memset(&ifr, 0, sizeof(struct ifreq));
943 		if (ifa->ifa_label)
944 			strcpy(ifr.ifr_name, ifa->ifa_label);
945 		else
946 			strcpy(ifr.ifr_name, dev->name);
947 
948 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
949 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
950 								ifa->ifa_local;
951 
952 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
953 			done = -EFAULT;
954 			break;
955 		}
956 		buf  += sizeof(struct ifreq);
957 		len  -= sizeof(struct ifreq);
958 		done += sizeof(struct ifreq);
959 	}
960 out:
961 	return done;
962 }
963 
964 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
965 {
966 	__be32 addr = 0;
967 	struct in_device *in_dev;
968 	struct net *net = dev_net(dev);
969 
970 	rcu_read_lock();
971 	in_dev = __in_dev_get_rcu(dev);
972 	if (!in_dev)
973 		goto no_in_dev;
974 
975 	for_primary_ifa(in_dev) {
976 		if (ifa->ifa_scope > scope)
977 			continue;
978 		if (!dst || inet_ifa_match(dst, ifa)) {
979 			addr = ifa->ifa_local;
980 			break;
981 		}
982 		if (!addr)
983 			addr = ifa->ifa_local;
984 	} endfor_ifa(in_dev);
985 
986 	if (addr)
987 		goto out_unlock;
988 no_in_dev:
989 
990 	/* Not loopback addresses on loopback should be preferred
991 	   in this case. It is importnat that lo is the first interface
992 	   in dev_base list.
993 	 */
994 	for_each_netdev_rcu(net, dev) {
995 		in_dev = __in_dev_get_rcu(dev);
996 		if (!in_dev)
997 			continue;
998 
999 		for_primary_ifa(in_dev) {
1000 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
1001 			    ifa->ifa_scope <= scope) {
1002 				addr = ifa->ifa_local;
1003 				goto out_unlock;
1004 			}
1005 		} endfor_ifa(in_dev);
1006 	}
1007 out_unlock:
1008 	rcu_read_unlock();
1009 	return addr;
1010 }
1011 EXPORT_SYMBOL(inet_select_addr);
1012 
1013 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1014 			      __be32 local, int scope)
1015 {
1016 	int same = 0;
1017 	__be32 addr = 0;
1018 
1019 	for_ifa(in_dev) {
1020 		if (!addr &&
1021 		    (local == ifa->ifa_local || !local) &&
1022 		    ifa->ifa_scope <= scope) {
1023 			addr = ifa->ifa_local;
1024 			if (same)
1025 				break;
1026 		}
1027 		if (!same) {
1028 			same = (!local || inet_ifa_match(local, ifa)) &&
1029 				(!dst || inet_ifa_match(dst, ifa));
1030 			if (same && addr) {
1031 				if (local || !dst)
1032 					break;
1033 				/* Is the selected addr into dst subnet? */
1034 				if (inet_ifa_match(addr, ifa))
1035 					break;
1036 				/* No, then can we use new local src? */
1037 				if (ifa->ifa_scope <= scope) {
1038 					addr = ifa->ifa_local;
1039 					break;
1040 				}
1041 				/* search for large dst subnet for addr */
1042 				same = 0;
1043 			}
1044 		}
1045 	} endfor_ifa(in_dev);
1046 
1047 	return same ? addr : 0;
1048 }
1049 
1050 /*
1051  * Confirm that local IP address exists using wildcards:
1052  * - in_dev: only on this interface, 0=any interface
1053  * - dst: only in the same subnet as dst, 0=any dst
1054  * - local: address, 0=autoselect the local address
1055  * - scope: maximum allowed scope value for the local address
1056  */
1057 __be32 inet_confirm_addr(struct in_device *in_dev,
1058 			 __be32 dst, __be32 local, int scope)
1059 {
1060 	__be32 addr = 0;
1061 	struct net_device *dev;
1062 	struct net *net;
1063 
1064 	if (scope != RT_SCOPE_LINK)
1065 		return confirm_addr_indev(in_dev, dst, local, scope);
1066 
1067 	net = dev_net(in_dev->dev);
1068 	rcu_read_lock();
1069 	for_each_netdev_rcu(net, dev) {
1070 		in_dev = __in_dev_get_rcu(dev);
1071 		if (in_dev) {
1072 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1073 			if (addr)
1074 				break;
1075 		}
1076 	}
1077 	rcu_read_unlock();
1078 
1079 	return addr;
1080 }
1081 
1082 /*
1083  *	Device notifier
1084  */
1085 
1086 int register_inetaddr_notifier(struct notifier_block *nb)
1087 {
1088 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1089 }
1090 EXPORT_SYMBOL(register_inetaddr_notifier);
1091 
1092 int unregister_inetaddr_notifier(struct notifier_block *nb)
1093 {
1094 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1095 }
1096 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1097 
1098 /* Rename ifa_labels for a device name change. Make some effort to preserve
1099  * existing alias numbering and to create unique labels if possible.
1100 */
1101 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1102 {
1103 	struct in_ifaddr *ifa;
1104 	int named = 0;
1105 
1106 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1107 		char old[IFNAMSIZ], *dot;
1108 
1109 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1110 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1111 		if (named++ == 0)
1112 			goto skip;
1113 		dot = strchr(old, ':');
1114 		if (dot == NULL) {
1115 			sprintf(old, ":%d", named);
1116 			dot = old;
1117 		}
1118 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1119 			strcat(ifa->ifa_label, dot);
1120 		else
1121 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1122 skip:
1123 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1124 	}
1125 }
1126 
1127 static inline bool inetdev_valid_mtu(unsigned mtu)
1128 {
1129 	return mtu >= 68;
1130 }
1131 
1132 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1133 					struct in_device *in_dev)
1134 
1135 {
1136 	struct in_ifaddr *ifa;
1137 
1138 	for (ifa = in_dev->ifa_list; ifa;
1139 	     ifa = ifa->ifa_next) {
1140 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1141 			 ifa->ifa_local, dev,
1142 			 ifa->ifa_local, NULL,
1143 			 dev->dev_addr, NULL);
1144 	}
1145 }
1146 
1147 /* Called only under RTNL semaphore */
1148 
1149 static int inetdev_event(struct notifier_block *this, unsigned long event,
1150 			 void *ptr)
1151 {
1152 	struct net_device *dev = ptr;
1153 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1154 
1155 	ASSERT_RTNL();
1156 
1157 	if (!in_dev) {
1158 		if (event == NETDEV_REGISTER) {
1159 			in_dev = inetdev_init(dev);
1160 			if (!in_dev)
1161 				return notifier_from_errno(-ENOMEM);
1162 			if (dev->flags & IFF_LOOPBACK) {
1163 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1164 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1165 			}
1166 		} else if (event == NETDEV_CHANGEMTU) {
1167 			/* Re-enabling IP */
1168 			if (inetdev_valid_mtu(dev->mtu))
1169 				in_dev = inetdev_init(dev);
1170 		}
1171 		goto out;
1172 	}
1173 
1174 	switch (event) {
1175 	case NETDEV_REGISTER:
1176 		printk(KERN_DEBUG "inetdev_event: bug\n");
1177 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1178 		break;
1179 	case NETDEV_UP:
1180 		if (!inetdev_valid_mtu(dev->mtu))
1181 			break;
1182 		if (dev->flags & IFF_LOOPBACK) {
1183 			struct in_ifaddr *ifa = inet_alloc_ifa();
1184 
1185 			if (ifa) {
1186 				INIT_HLIST_NODE(&ifa->hash);
1187 				ifa->ifa_local =
1188 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1189 				ifa->ifa_prefixlen = 8;
1190 				ifa->ifa_mask = inet_make_mask(8);
1191 				in_dev_hold(in_dev);
1192 				ifa->ifa_dev = in_dev;
1193 				ifa->ifa_scope = RT_SCOPE_HOST;
1194 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1195 				inet_insert_ifa(ifa);
1196 			}
1197 		}
1198 		ip_mc_up(in_dev);
1199 		/* fall through */
1200 	case NETDEV_CHANGEADDR:
1201 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1202 			break;
1203 		/* fall through */
1204 	case NETDEV_NOTIFY_PEERS:
1205 		/* Send gratuitous ARP to notify of link change */
1206 		inetdev_send_gratuitous_arp(dev, in_dev);
1207 		break;
1208 	case NETDEV_DOWN:
1209 		ip_mc_down(in_dev);
1210 		break;
1211 	case NETDEV_PRE_TYPE_CHANGE:
1212 		ip_mc_unmap(in_dev);
1213 		break;
1214 	case NETDEV_POST_TYPE_CHANGE:
1215 		ip_mc_remap(in_dev);
1216 		break;
1217 	case NETDEV_CHANGEMTU:
1218 		if (inetdev_valid_mtu(dev->mtu))
1219 			break;
1220 		/* disable IP when MTU is not enough */
1221 	case NETDEV_UNREGISTER:
1222 		inetdev_destroy(in_dev);
1223 		break;
1224 	case NETDEV_CHANGENAME:
1225 		/* Do not notify about label change, this event is
1226 		 * not interesting to applications using netlink.
1227 		 */
1228 		inetdev_changename(dev, in_dev);
1229 
1230 		devinet_sysctl_unregister(in_dev);
1231 		devinet_sysctl_register(in_dev);
1232 		break;
1233 	}
1234 out:
1235 	return NOTIFY_DONE;
1236 }
1237 
1238 static struct notifier_block ip_netdev_notifier = {
1239 	.notifier_call = inetdev_event,
1240 };
1241 
1242 static inline size_t inet_nlmsg_size(void)
1243 {
1244 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1245 	       + nla_total_size(4) /* IFA_ADDRESS */
1246 	       + nla_total_size(4) /* IFA_LOCAL */
1247 	       + nla_total_size(4) /* IFA_BROADCAST */
1248 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1249 }
1250 
1251 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1252 			    u32 pid, u32 seq, int event, unsigned int flags)
1253 {
1254 	struct ifaddrmsg *ifm;
1255 	struct nlmsghdr  *nlh;
1256 
1257 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1258 	if (nlh == NULL)
1259 		return -EMSGSIZE;
1260 
1261 	ifm = nlmsg_data(nlh);
1262 	ifm->ifa_family = AF_INET;
1263 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1264 	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1265 	ifm->ifa_scope = ifa->ifa_scope;
1266 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1267 
1268 	if (ifa->ifa_address)
1269 		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1270 
1271 	if (ifa->ifa_local)
1272 		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1273 
1274 	if (ifa->ifa_broadcast)
1275 		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1276 
1277 	if (ifa->ifa_label[0])
1278 		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1279 
1280 	return nlmsg_end(skb, nlh);
1281 
1282 nla_put_failure:
1283 	nlmsg_cancel(skb, nlh);
1284 	return -EMSGSIZE;
1285 }
1286 
1287 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1288 {
1289 	struct net *net = sock_net(skb->sk);
1290 	int h, s_h;
1291 	int idx, s_idx;
1292 	int ip_idx, s_ip_idx;
1293 	struct net_device *dev;
1294 	struct in_device *in_dev;
1295 	struct in_ifaddr *ifa;
1296 	struct hlist_head *head;
1297 	struct hlist_node *node;
1298 
1299 	s_h = cb->args[0];
1300 	s_idx = idx = cb->args[1];
1301 	s_ip_idx = ip_idx = cb->args[2];
1302 
1303 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1304 		idx = 0;
1305 		head = &net->dev_index_head[h];
1306 		rcu_read_lock();
1307 		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1308 			if (idx < s_idx)
1309 				goto cont;
1310 			if (h > s_h || idx > s_idx)
1311 				s_ip_idx = 0;
1312 			in_dev = __in_dev_get_rcu(dev);
1313 			if (!in_dev)
1314 				goto cont;
1315 
1316 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1317 			     ifa = ifa->ifa_next, ip_idx++) {
1318 				if (ip_idx < s_ip_idx)
1319 					continue;
1320 				if (inet_fill_ifaddr(skb, ifa,
1321 					     NETLINK_CB(cb->skb).pid,
1322 					     cb->nlh->nlmsg_seq,
1323 					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1324 					rcu_read_unlock();
1325 					goto done;
1326 				}
1327 			}
1328 cont:
1329 			idx++;
1330 		}
1331 		rcu_read_unlock();
1332 	}
1333 
1334 done:
1335 	cb->args[0] = h;
1336 	cb->args[1] = idx;
1337 	cb->args[2] = ip_idx;
1338 
1339 	return skb->len;
1340 }
1341 
1342 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1343 		      u32 pid)
1344 {
1345 	struct sk_buff *skb;
1346 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1347 	int err = -ENOBUFS;
1348 	struct net *net;
1349 
1350 	net = dev_net(ifa->ifa_dev->dev);
1351 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1352 	if (skb == NULL)
1353 		goto errout;
1354 
1355 	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1356 	if (err < 0) {
1357 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1358 		WARN_ON(err == -EMSGSIZE);
1359 		kfree_skb(skb);
1360 		goto errout;
1361 	}
1362 	rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1363 	return;
1364 errout:
1365 	if (err < 0)
1366 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1367 }
1368 
1369 static size_t inet_get_link_af_size(const struct net_device *dev)
1370 {
1371 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1372 
1373 	if (!in_dev)
1374 		return 0;
1375 
1376 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1377 }
1378 
1379 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1380 {
1381 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1382 	struct nlattr *nla;
1383 	int i;
1384 
1385 	if (!in_dev)
1386 		return -ENODATA;
1387 
1388 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1389 	if (nla == NULL)
1390 		return -EMSGSIZE;
1391 
1392 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1393 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1394 
1395 	return 0;
1396 }
1397 
1398 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1399 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1400 };
1401 
1402 static int inet_validate_link_af(const struct net_device *dev,
1403 				 const struct nlattr *nla)
1404 {
1405 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1406 	int err, rem;
1407 
1408 	if (dev && !__in_dev_get_rtnl(dev))
1409 		return -EAFNOSUPPORT;
1410 
1411 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1412 	if (err < 0)
1413 		return err;
1414 
1415 	if (tb[IFLA_INET_CONF]) {
1416 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1417 			int cfgid = nla_type(a);
1418 
1419 			if (nla_len(a) < 4)
1420 				return -EINVAL;
1421 
1422 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1423 				return -EINVAL;
1424 		}
1425 	}
1426 
1427 	return 0;
1428 }
1429 
1430 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1431 {
1432 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1433 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1434 	int rem;
1435 
1436 	if (!in_dev)
1437 		return -EAFNOSUPPORT;
1438 
1439 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1440 		BUG();
1441 
1442 	if (tb[IFLA_INET_CONF]) {
1443 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1444 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1445 	}
1446 
1447 	return 0;
1448 }
1449 
1450 #ifdef CONFIG_SYSCTL
1451 
1452 static void devinet_copy_dflt_conf(struct net *net, int i)
1453 {
1454 	struct net_device *dev;
1455 
1456 	rcu_read_lock();
1457 	for_each_netdev_rcu(net, dev) {
1458 		struct in_device *in_dev;
1459 
1460 		in_dev = __in_dev_get_rcu(dev);
1461 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1462 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1463 	}
1464 	rcu_read_unlock();
1465 }
1466 
1467 /* called with RTNL locked */
1468 static void inet_forward_change(struct net *net)
1469 {
1470 	struct net_device *dev;
1471 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1472 
1473 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1474 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1475 
1476 	for_each_netdev(net, dev) {
1477 		struct in_device *in_dev;
1478 		if (on)
1479 			dev_disable_lro(dev);
1480 		rcu_read_lock();
1481 		in_dev = __in_dev_get_rcu(dev);
1482 		if (in_dev)
1483 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1484 		rcu_read_unlock();
1485 	}
1486 }
1487 
1488 static int devinet_conf_proc(ctl_table *ctl, int write,
1489 			     void __user *buffer,
1490 			     size_t *lenp, loff_t *ppos)
1491 {
1492 	int old_value = *(int *)ctl->data;
1493 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1494 	int new_value = *(int *)ctl->data;
1495 
1496 	if (write) {
1497 		struct ipv4_devconf *cnf = ctl->extra1;
1498 		struct net *net = ctl->extra2;
1499 		int i = (int *)ctl->data - cnf->data;
1500 
1501 		set_bit(i, cnf->state);
1502 
1503 		if (cnf == net->ipv4.devconf_dflt)
1504 			devinet_copy_dflt_conf(net, i);
1505 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1)
1506 			if ((new_value == 0) && (old_value != 0))
1507 				rt_cache_flush(net, 0);
1508 	}
1509 
1510 	return ret;
1511 }
1512 
1513 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1514 				  void __user *buffer,
1515 				  size_t *lenp, loff_t *ppos)
1516 {
1517 	int *valp = ctl->data;
1518 	int val = *valp;
1519 	loff_t pos = *ppos;
1520 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1521 
1522 	if (write && *valp != val) {
1523 		struct net *net = ctl->extra2;
1524 
1525 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1526 			if (!rtnl_trylock()) {
1527 				/* Restore the original values before restarting */
1528 				*valp = val;
1529 				*ppos = pos;
1530 				return restart_syscall();
1531 			}
1532 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1533 				inet_forward_change(net);
1534 			} else if (*valp) {
1535 				struct ipv4_devconf *cnf = ctl->extra1;
1536 				struct in_device *idev =
1537 					container_of(cnf, struct in_device, cnf);
1538 				dev_disable_lro(idev->dev);
1539 			}
1540 			rtnl_unlock();
1541 			rt_cache_flush(net, 0);
1542 		}
1543 	}
1544 
1545 	return ret;
1546 }
1547 
1548 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1549 				void __user *buffer,
1550 				size_t *lenp, loff_t *ppos)
1551 {
1552 	int *valp = ctl->data;
1553 	int val = *valp;
1554 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1555 	struct net *net = ctl->extra2;
1556 
1557 	if (write && *valp != val)
1558 		rt_cache_flush(net, 0);
1559 
1560 	return ret;
1561 }
1562 
1563 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1564 	{ \
1565 		.procname	= name, \
1566 		.data		= ipv4_devconf.data + \
1567 				  IPV4_DEVCONF_ ## attr - 1, \
1568 		.maxlen		= sizeof(int), \
1569 		.mode		= mval, \
1570 		.proc_handler	= proc, \
1571 		.extra1		= &ipv4_devconf, \
1572 	}
1573 
1574 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1575 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1576 
1577 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1578 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1579 
1580 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1581 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1582 
1583 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1584 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1585 
1586 static struct devinet_sysctl_table {
1587 	struct ctl_table_header *sysctl_header;
1588 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1589 	char *dev_name;
1590 } devinet_sysctl = {
1591 	.devinet_vars = {
1592 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1593 					     devinet_sysctl_forward),
1594 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1595 
1596 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1597 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1598 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1599 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1600 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1601 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1602 					"accept_source_route"),
1603 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1604 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1605 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1606 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1607 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1608 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1609 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1610 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1611 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1612 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1613 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1614 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1615 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1616 
1617 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1618 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1619 		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1620 					      "force_igmp_version"),
1621 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1622 					      "promote_secondaries"),
1623 	},
1624 };
1625 
1626 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1627 					struct ipv4_devconf *p)
1628 {
1629 	int i;
1630 	struct devinet_sysctl_table *t;
1631 
1632 #define DEVINET_CTL_PATH_DEV	3
1633 
1634 	struct ctl_path devinet_ctl_path[] = {
1635 		{ .procname = "net",  },
1636 		{ .procname = "ipv4", },
1637 		{ .procname = "conf", },
1638 		{ /* to be set */ },
1639 		{ },
1640 	};
1641 
1642 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1643 	if (!t)
1644 		goto out;
1645 
1646 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1647 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1648 		t->devinet_vars[i].extra1 = p;
1649 		t->devinet_vars[i].extra2 = net;
1650 	}
1651 
1652 	/*
1653 	 * Make a copy of dev_name, because '.procname' is regarded as const
1654 	 * by sysctl and we wouldn't want anyone to change it under our feet
1655 	 * (see SIOCSIFNAME).
1656 	 */
1657 	t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1658 	if (!t->dev_name)
1659 		goto free;
1660 
1661 	devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1662 
1663 	t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1664 			t->devinet_vars);
1665 	if (!t->sysctl_header)
1666 		goto free_procname;
1667 
1668 	p->sysctl = t;
1669 	return 0;
1670 
1671 free_procname:
1672 	kfree(t->dev_name);
1673 free:
1674 	kfree(t);
1675 out:
1676 	return -ENOBUFS;
1677 }
1678 
1679 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1680 {
1681 	struct devinet_sysctl_table *t = cnf->sysctl;
1682 
1683 	if (t == NULL)
1684 		return;
1685 
1686 	cnf->sysctl = NULL;
1687 	unregister_net_sysctl_table(t->sysctl_header);
1688 	kfree(t->dev_name);
1689 	kfree(t);
1690 }
1691 
1692 static void devinet_sysctl_register(struct in_device *idev)
1693 {
1694 	neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1695 	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1696 					&idev->cnf);
1697 }
1698 
1699 static void devinet_sysctl_unregister(struct in_device *idev)
1700 {
1701 	__devinet_sysctl_unregister(&idev->cnf);
1702 	neigh_sysctl_unregister(idev->arp_parms);
1703 }
1704 
1705 static struct ctl_table ctl_forward_entry[] = {
1706 	{
1707 		.procname	= "ip_forward",
1708 		.data		= &ipv4_devconf.data[
1709 					IPV4_DEVCONF_FORWARDING - 1],
1710 		.maxlen		= sizeof(int),
1711 		.mode		= 0644,
1712 		.proc_handler	= devinet_sysctl_forward,
1713 		.extra1		= &ipv4_devconf,
1714 		.extra2		= &init_net,
1715 	},
1716 	{ },
1717 };
1718 
1719 static __net_initdata struct ctl_path net_ipv4_path[] = {
1720 	{ .procname = "net", },
1721 	{ .procname = "ipv4", },
1722 	{ },
1723 };
1724 #endif
1725 
1726 static __net_init int devinet_init_net(struct net *net)
1727 {
1728 	int err;
1729 	struct ipv4_devconf *all, *dflt;
1730 #ifdef CONFIG_SYSCTL
1731 	struct ctl_table *tbl = ctl_forward_entry;
1732 	struct ctl_table_header *forw_hdr;
1733 #endif
1734 
1735 	err = -ENOMEM;
1736 	all = &ipv4_devconf;
1737 	dflt = &ipv4_devconf_dflt;
1738 
1739 	if (!net_eq(net, &init_net)) {
1740 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1741 		if (all == NULL)
1742 			goto err_alloc_all;
1743 
1744 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1745 		if (dflt == NULL)
1746 			goto err_alloc_dflt;
1747 
1748 #ifdef CONFIG_SYSCTL
1749 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1750 		if (tbl == NULL)
1751 			goto err_alloc_ctl;
1752 
1753 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1754 		tbl[0].extra1 = all;
1755 		tbl[0].extra2 = net;
1756 #endif
1757 	}
1758 
1759 #ifdef CONFIG_SYSCTL
1760 	err = __devinet_sysctl_register(net, "all", all);
1761 	if (err < 0)
1762 		goto err_reg_all;
1763 
1764 	err = __devinet_sysctl_register(net, "default", dflt);
1765 	if (err < 0)
1766 		goto err_reg_dflt;
1767 
1768 	err = -ENOMEM;
1769 	forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1770 	if (forw_hdr == NULL)
1771 		goto err_reg_ctl;
1772 	net->ipv4.forw_hdr = forw_hdr;
1773 #endif
1774 
1775 	net->ipv4.devconf_all = all;
1776 	net->ipv4.devconf_dflt = dflt;
1777 	return 0;
1778 
1779 #ifdef CONFIG_SYSCTL
1780 err_reg_ctl:
1781 	__devinet_sysctl_unregister(dflt);
1782 err_reg_dflt:
1783 	__devinet_sysctl_unregister(all);
1784 err_reg_all:
1785 	if (tbl != ctl_forward_entry)
1786 		kfree(tbl);
1787 err_alloc_ctl:
1788 #endif
1789 	if (dflt != &ipv4_devconf_dflt)
1790 		kfree(dflt);
1791 err_alloc_dflt:
1792 	if (all != &ipv4_devconf)
1793 		kfree(all);
1794 err_alloc_all:
1795 	return err;
1796 }
1797 
1798 static __net_exit void devinet_exit_net(struct net *net)
1799 {
1800 #ifdef CONFIG_SYSCTL
1801 	struct ctl_table *tbl;
1802 
1803 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
1804 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
1805 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1806 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
1807 	kfree(tbl);
1808 #endif
1809 	kfree(net->ipv4.devconf_dflt);
1810 	kfree(net->ipv4.devconf_all);
1811 }
1812 
1813 static __net_initdata struct pernet_operations devinet_ops = {
1814 	.init = devinet_init_net,
1815 	.exit = devinet_exit_net,
1816 };
1817 
1818 static struct rtnl_af_ops inet_af_ops = {
1819 	.family		  = AF_INET,
1820 	.fill_link_af	  = inet_fill_link_af,
1821 	.get_link_af_size = inet_get_link_af_size,
1822 	.validate_link_af = inet_validate_link_af,
1823 	.set_link_af	  = inet_set_link_af,
1824 };
1825 
1826 void __init devinet_init(void)
1827 {
1828 	int i;
1829 
1830 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
1831 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
1832 
1833 	register_pernet_subsys(&devinet_ops);
1834 
1835 	register_gifconf(PF_INET, inet_gifconf);
1836 	register_netdevice_notifier(&ip_netdev_notifier);
1837 
1838 	rtnl_af_register(&inet_af_ops);
1839 
1840 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
1841 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
1842 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
1843 }
1844 
1845