xref: /linux/net/ipv4/devinet.c (revision 08ec212c0f92cbf30e3ecc7349f18151714041d6)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 
59 #include <net/arp.h>
60 #include <net/ip.h>
61 #include <net/route.h>
62 #include <net/ip_fib.h>
63 #include <net/rtnetlink.h>
64 #include <net/net_namespace.h>
65 
66 #include "fib_lookup.h"
67 
68 static struct ipv4_devconf ipv4_devconf = {
69 	.data = {
70 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
71 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
72 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
74 	},
75 };
76 
77 static struct ipv4_devconf ipv4_devconf_dflt = {
78 	.data = {
79 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
80 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
81 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
82 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
83 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
84 	},
85 };
86 
87 #define IPV4_DEVCONF_DFLT(net, attr) \
88 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
89 
90 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
91 	[IFA_LOCAL]     	= { .type = NLA_U32 },
92 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
93 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
94 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
95 };
96 
97 #define IN4_ADDR_HSIZE_SHIFT	8
98 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
99 
100 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
101 static DEFINE_SPINLOCK(inet_addr_hash_lock);
102 
103 static u32 inet_addr_hash(struct net *net, __be32 addr)
104 {
105 	u32 val = (__force u32) addr ^ net_hash_mix(net);
106 
107 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
108 }
109 
110 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
111 {
112 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
113 
114 	spin_lock(&inet_addr_hash_lock);
115 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
116 	spin_unlock(&inet_addr_hash_lock);
117 }
118 
119 static void inet_hash_remove(struct in_ifaddr *ifa)
120 {
121 	spin_lock(&inet_addr_hash_lock);
122 	hlist_del_init_rcu(&ifa->hash);
123 	spin_unlock(&inet_addr_hash_lock);
124 }
125 
126 /**
127  * __ip_dev_find - find the first device with a given source address.
128  * @net: the net namespace
129  * @addr: the source address
130  * @devref: if true, take a reference on the found device
131  *
132  * If a caller uses devref=false, it should be protected by RCU, or RTNL
133  */
134 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
135 {
136 	u32 hash = inet_addr_hash(net, addr);
137 	struct net_device *result = NULL;
138 	struct in_ifaddr *ifa;
139 	struct hlist_node *node;
140 
141 	rcu_read_lock();
142 	hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
143 		if (ifa->ifa_local == addr) {
144 			struct net_device *dev = ifa->ifa_dev->dev;
145 
146 			if (!net_eq(dev_net(dev), net))
147 				continue;
148 			result = dev;
149 			break;
150 		}
151 	}
152 	if (!result) {
153 		struct flowi4 fl4 = { .daddr = addr };
154 		struct fib_result res = { 0 };
155 		struct fib_table *local;
156 
157 		/* Fallback to FIB local table so that communication
158 		 * over loopback subnets work.
159 		 */
160 		local = fib_get_table(net, RT_TABLE_LOCAL);
161 		if (local &&
162 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
163 		    res.type == RTN_LOCAL)
164 			result = FIB_RES_DEV(res);
165 	}
166 	if (result && devref)
167 		dev_hold(result);
168 	rcu_read_unlock();
169 	return result;
170 }
171 EXPORT_SYMBOL(__ip_dev_find);
172 
173 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
174 
175 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
176 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
177 			 int destroy);
178 #ifdef CONFIG_SYSCTL
179 static void devinet_sysctl_register(struct in_device *idev);
180 static void devinet_sysctl_unregister(struct in_device *idev);
181 #else
182 static void devinet_sysctl_register(struct in_device *idev)
183 {
184 }
185 static void devinet_sysctl_unregister(struct in_device *idev)
186 {
187 }
188 #endif
189 
190 /* Locks all the inet devices. */
191 
192 static struct in_ifaddr *inet_alloc_ifa(void)
193 {
194 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
195 }
196 
197 static void inet_rcu_free_ifa(struct rcu_head *head)
198 {
199 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
200 	if (ifa->ifa_dev)
201 		in_dev_put(ifa->ifa_dev);
202 	kfree(ifa);
203 }
204 
205 static void inet_free_ifa(struct in_ifaddr *ifa)
206 {
207 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
208 }
209 
210 void in_dev_finish_destroy(struct in_device *idev)
211 {
212 	struct net_device *dev = idev->dev;
213 
214 	WARN_ON(idev->ifa_list);
215 	WARN_ON(idev->mc_list);
216 #ifdef NET_REFCNT_DEBUG
217 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
218 #endif
219 	dev_put(dev);
220 	if (!idev->dead)
221 		pr_err("Freeing alive in_device %p\n", idev);
222 	else
223 		kfree(idev);
224 }
225 EXPORT_SYMBOL(in_dev_finish_destroy);
226 
227 static struct in_device *inetdev_init(struct net_device *dev)
228 {
229 	struct in_device *in_dev;
230 
231 	ASSERT_RTNL();
232 
233 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
234 	if (!in_dev)
235 		goto out;
236 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
237 			sizeof(in_dev->cnf));
238 	in_dev->cnf.sysctl = NULL;
239 	in_dev->dev = dev;
240 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
241 	if (!in_dev->arp_parms)
242 		goto out_kfree;
243 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
244 		dev_disable_lro(dev);
245 	/* Reference in_dev->dev */
246 	dev_hold(dev);
247 	/* Account for reference dev->ip_ptr (below) */
248 	in_dev_hold(in_dev);
249 
250 	devinet_sysctl_register(in_dev);
251 	ip_mc_init_dev(in_dev);
252 	if (dev->flags & IFF_UP)
253 		ip_mc_up(in_dev);
254 
255 	/* we can receive as soon as ip_ptr is set -- do this last */
256 	rcu_assign_pointer(dev->ip_ptr, in_dev);
257 out:
258 	return in_dev;
259 out_kfree:
260 	kfree(in_dev);
261 	in_dev = NULL;
262 	goto out;
263 }
264 
265 static void in_dev_rcu_put(struct rcu_head *head)
266 {
267 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
268 	in_dev_put(idev);
269 }
270 
271 static void inetdev_destroy(struct in_device *in_dev)
272 {
273 	struct in_ifaddr *ifa;
274 	struct net_device *dev;
275 
276 	ASSERT_RTNL();
277 
278 	dev = in_dev->dev;
279 
280 	in_dev->dead = 1;
281 
282 	ip_mc_destroy_dev(in_dev);
283 
284 	while ((ifa = in_dev->ifa_list) != NULL) {
285 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
286 		inet_free_ifa(ifa);
287 	}
288 
289 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
290 
291 	devinet_sysctl_unregister(in_dev);
292 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
293 	arp_ifdown(dev);
294 
295 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
296 }
297 
298 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
299 {
300 	rcu_read_lock();
301 	for_primary_ifa(in_dev) {
302 		if (inet_ifa_match(a, ifa)) {
303 			if (!b || inet_ifa_match(b, ifa)) {
304 				rcu_read_unlock();
305 				return 1;
306 			}
307 		}
308 	} endfor_ifa(in_dev);
309 	rcu_read_unlock();
310 	return 0;
311 }
312 
313 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
314 			 int destroy, struct nlmsghdr *nlh, u32 portid)
315 {
316 	struct in_ifaddr *promote = NULL;
317 	struct in_ifaddr *ifa, *ifa1 = *ifap;
318 	struct in_ifaddr *last_prim = in_dev->ifa_list;
319 	struct in_ifaddr *prev_prom = NULL;
320 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
321 
322 	ASSERT_RTNL();
323 
324 	/* 1. Deleting primary ifaddr forces deletion all secondaries
325 	 * unless alias promotion is set
326 	 **/
327 
328 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
329 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
330 
331 		while ((ifa = *ifap1) != NULL) {
332 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
333 			    ifa1->ifa_scope <= ifa->ifa_scope)
334 				last_prim = ifa;
335 
336 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
337 			    ifa1->ifa_mask != ifa->ifa_mask ||
338 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
339 				ifap1 = &ifa->ifa_next;
340 				prev_prom = ifa;
341 				continue;
342 			}
343 
344 			if (!do_promote) {
345 				inet_hash_remove(ifa);
346 				*ifap1 = ifa->ifa_next;
347 
348 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
349 				blocking_notifier_call_chain(&inetaddr_chain,
350 						NETDEV_DOWN, ifa);
351 				inet_free_ifa(ifa);
352 			} else {
353 				promote = ifa;
354 				break;
355 			}
356 		}
357 	}
358 
359 	/* On promotion all secondaries from subnet are changing
360 	 * the primary IP, we must remove all their routes silently
361 	 * and later to add them back with new prefsrc. Do this
362 	 * while all addresses are on the device list.
363 	 */
364 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
365 		if (ifa1->ifa_mask == ifa->ifa_mask &&
366 		    inet_ifa_match(ifa1->ifa_address, ifa))
367 			fib_del_ifaddr(ifa, ifa1);
368 	}
369 
370 	/* 2. Unlink it */
371 
372 	*ifap = ifa1->ifa_next;
373 	inet_hash_remove(ifa1);
374 
375 	/* 3. Announce address deletion */
376 
377 	/* Send message first, then call notifier.
378 	   At first sight, FIB update triggered by notifier
379 	   will refer to already deleted ifaddr, that could confuse
380 	   netlink listeners. It is not true: look, gated sees
381 	   that route deleted and if it still thinks that ifaddr
382 	   is valid, it will try to restore deleted routes... Grr.
383 	   So that, this order is correct.
384 	 */
385 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
386 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
387 
388 	if (promote) {
389 		struct in_ifaddr *next_sec = promote->ifa_next;
390 
391 		if (prev_prom) {
392 			prev_prom->ifa_next = promote->ifa_next;
393 			promote->ifa_next = last_prim->ifa_next;
394 			last_prim->ifa_next = promote;
395 		}
396 
397 		promote->ifa_flags &= ~IFA_F_SECONDARY;
398 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
399 		blocking_notifier_call_chain(&inetaddr_chain,
400 				NETDEV_UP, promote);
401 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
402 			if (ifa1->ifa_mask != ifa->ifa_mask ||
403 			    !inet_ifa_match(ifa1->ifa_address, ifa))
404 					continue;
405 			fib_add_ifaddr(ifa);
406 		}
407 
408 	}
409 	if (destroy)
410 		inet_free_ifa(ifa1);
411 }
412 
413 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
414 			 int destroy)
415 {
416 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
417 }
418 
419 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
420 			     u32 portid)
421 {
422 	struct in_device *in_dev = ifa->ifa_dev;
423 	struct in_ifaddr *ifa1, **ifap, **last_primary;
424 
425 	ASSERT_RTNL();
426 
427 	if (!ifa->ifa_local) {
428 		inet_free_ifa(ifa);
429 		return 0;
430 	}
431 
432 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
433 	last_primary = &in_dev->ifa_list;
434 
435 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
436 	     ifap = &ifa1->ifa_next) {
437 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
438 		    ifa->ifa_scope <= ifa1->ifa_scope)
439 			last_primary = &ifa1->ifa_next;
440 		if (ifa1->ifa_mask == ifa->ifa_mask &&
441 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
442 			if (ifa1->ifa_local == ifa->ifa_local) {
443 				inet_free_ifa(ifa);
444 				return -EEXIST;
445 			}
446 			if (ifa1->ifa_scope != ifa->ifa_scope) {
447 				inet_free_ifa(ifa);
448 				return -EINVAL;
449 			}
450 			ifa->ifa_flags |= IFA_F_SECONDARY;
451 		}
452 	}
453 
454 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
455 		net_srandom(ifa->ifa_local);
456 		ifap = last_primary;
457 	}
458 
459 	ifa->ifa_next = *ifap;
460 	*ifap = ifa;
461 
462 	inet_hash_insert(dev_net(in_dev->dev), ifa);
463 
464 	/* Send message first, then call notifier.
465 	   Notifier will trigger FIB update, so that
466 	   listeners of netlink will know about new ifaddr */
467 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
468 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
469 
470 	return 0;
471 }
472 
473 static int inet_insert_ifa(struct in_ifaddr *ifa)
474 {
475 	return __inet_insert_ifa(ifa, NULL, 0);
476 }
477 
478 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
479 {
480 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
481 
482 	ASSERT_RTNL();
483 
484 	if (!in_dev) {
485 		inet_free_ifa(ifa);
486 		return -ENOBUFS;
487 	}
488 	ipv4_devconf_setall(in_dev);
489 	if (ifa->ifa_dev != in_dev) {
490 		WARN_ON(ifa->ifa_dev);
491 		in_dev_hold(in_dev);
492 		ifa->ifa_dev = in_dev;
493 	}
494 	if (ipv4_is_loopback(ifa->ifa_local))
495 		ifa->ifa_scope = RT_SCOPE_HOST;
496 	return inet_insert_ifa(ifa);
497 }
498 
499 /* Caller must hold RCU or RTNL :
500  * We dont take a reference on found in_device
501  */
502 struct in_device *inetdev_by_index(struct net *net, int ifindex)
503 {
504 	struct net_device *dev;
505 	struct in_device *in_dev = NULL;
506 
507 	rcu_read_lock();
508 	dev = dev_get_by_index_rcu(net, ifindex);
509 	if (dev)
510 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
511 	rcu_read_unlock();
512 	return in_dev;
513 }
514 EXPORT_SYMBOL(inetdev_by_index);
515 
516 /* Called only from RTNL semaphored context. No locks. */
517 
518 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
519 				    __be32 mask)
520 {
521 	ASSERT_RTNL();
522 
523 	for_primary_ifa(in_dev) {
524 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
525 			return ifa;
526 	} endfor_ifa(in_dev);
527 	return NULL;
528 }
529 
530 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
531 {
532 	struct net *net = sock_net(skb->sk);
533 	struct nlattr *tb[IFA_MAX+1];
534 	struct in_device *in_dev;
535 	struct ifaddrmsg *ifm;
536 	struct in_ifaddr *ifa, **ifap;
537 	int err = -EINVAL;
538 
539 	ASSERT_RTNL();
540 
541 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
542 	if (err < 0)
543 		goto errout;
544 
545 	ifm = nlmsg_data(nlh);
546 	in_dev = inetdev_by_index(net, ifm->ifa_index);
547 	if (in_dev == NULL) {
548 		err = -ENODEV;
549 		goto errout;
550 	}
551 
552 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
553 	     ifap = &ifa->ifa_next) {
554 		if (tb[IFA_LOCAL] &&
555 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
556 			continue;
557 
558 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
559 			continue;
560 
561 		if (tb[IFA_ADDRESS] &&
562 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
563 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
564 			continue;
565 
566 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
567 		return 0;
568 	}
569 
570 	err = -EADDRNOTAVAIL;
571 errout:
572 	return err;
573 }
574 
575 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
576 {
577 	struct nlattr *tb[IFA_MAX+1];
578 	struct in_ifaddr *ifa;
579 	struct ifaddrmsg *ifm;
580 	struct net_device *dev;
581 	struct in_device *in_dev;
582 	int err;
583 
584 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
585 	if (err < 0)
586 		goto errout;
587 
588 	ifm = nlmsg_data(nlh);
589 	err = -EINVAL;
590 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
591 		goto errout;
592 
593 	dev = __dev_get_by_index(net, ifm->ifa_index);
594 	err = -ENODEV;
595 	if (dev == NULL)
596 		goto errout;
597 
598 	in_dev = __in_dev_get_rtnl(dev);
599 	err = -ENOBUFS;
600 	if (in_dev == NULL)
601 		goto errout;
602 
603 	ifa = inet_alloc_ifa();
604 	if (ifa == NULL)
605 		/*
606 		 * A potential indev allocation can be left alive, it stays
607 		 * assigned to its device and is destroy with it.
608 		 */
609 		goto errout;
610 
611 	ipv4_devconf_setall(in_dev);
612 	in_dev_hold(in_dev);
613 
614 	if (tb[IFA_ADDRESS] == NULL)
615 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
616 
617 	INIT_HLIST_NODE(&ifa->hash);
618 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
619 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
620 	ifa->ifa_flags = ifm->ifa_flags;
621 	ifa->ifa_scope = ifm->ifa_scope;
622 	ifa->ifa_dev = in_dev;
623 
624 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
625 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
626 
627 	if (tb[IFA_BROADCAST])
628 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
629 
630 	if (tb[IFA_LABEL])
631 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
632 	else
633 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
634 
635 	return ifa;
636 
637 errout:
638 	return ERR_PTR(err);
639 }
640 
641 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
642 {
643 	struct net *net = sock_net(skb->sk);
644 	struct in_ifaddr *ifa;
645 
646 	ASSERT_RTNL();
647 
648 	ifa = rtm_to_ifaddr(net, nlh);
649 	if (IS_ERR(ifa))
650 		return PTR_ERR(ifa);
651 
652 	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
653 }
654 
655 /*
656  *	Determine a default network mask, based on the IP address.
657  */
658 
659 static int inet_abc_len(__be32 addr)
660 {
661 	int rc = -1;	/* Something else, probably a multicast. */
662 
663 	if (ipv4_is_zeronet(addr))
664 		rc = 0;
665 	else {
666 		__u32 haddr = ntohl(addr);
667 
668 		if (IN_CLASSA(haddr))
669 			rc = 8;
670 		else if (IN_CLASSB(haddr))
671 			rc = 16;
672 		else if (IN_CLASSC(haddr))
673 			rc = 24;
674 	}
675 
676 	return rc;
677 }
678 
679 
680 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
681 {
682 	struct ifreq ifr;
683 	struct sockaddr_in sin_orig;
684 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
685 	struct in_device *in_dev;
686 	struct in_ifaddr **ifap = NULL;
687 	struct in_ifaddr *ifa = NULL;
688 	struct net_device *dev;
689 	char *colon;
690 	int ret = -EFAULT;
691 	int tryaddrmatch = 0;
692 
693 	/*
694 	 *	Fetch the caller's info block into kernel space
695 	 */
696 
697 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
698 		goto out;
699 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
700 
701 	/* save original address for comparison */
702 	memcpy(&sin_orig, sin, sizeof(*sin));
703 
704 	colon = strchr(ifr.ifr_name, ':');
705 	if (colon)
706 		*colon = 0;
707 
708 	dev_load(net, ifr.ifr_name);
709 
710 	switch (cmd) {
711 	case SIOCGIFADDR:	/* Get interface address */
712 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
713 	case SIOCGIFDSTADDR:	/* Get the destination address */
714 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
715 		/* Note that these ioctls will not sleep,
716 		   so that we do not impose a lock.
717 		   One day we will be forced to put shlock here (I mean SMP)
718 		 */
719 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
720 		memset(sin, 0, sizeof(*sin));
721 		sin->sin_family = AF_INET;
722 		break;
723 
724 	case SIOCSIFFLAGS:
725 		ret = -EPERM;
726 		if (!capable(CAP_NET_ADMIN))
727 			goto out;
728 		break;
729 	case SIOCSIFADDR:	/* Set interface address (and family) */
730 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
731 	case SIOCSIFDSTADDR:	/* Set the destination address */
732 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
733 		ret = -EPERM;
734 		if (!capable(CAP_NET_ADMIN))
735 			goto out;
736 		ret = -EINVAL;
737 		if (sin->sin_family != AF_INET)
738 			goto out;
739 		break;
740 	default:
741 		ret = -EINVAL;
742 		goto out;
743 	}
744 
745 	rtnl_lock();
746 
747 	ret = -ENODEV;
748 	dev = __dev_get_by_name(net, ifr.ifr_name);
749 	if (!dev)
750 		goto done;
751 
752 	if (colon)
753 		*colon = ':';
754 
755 	in_dev = __in_dev_get_rtnl(dev);
756 	if (in_dev) {
757 		if (tryaddrmatch) {
758 			/* Matthias Andree */
759 			/* compare label and address (4.4BSD style) */
760 			/* note: we only do this for a limited set of ioctls
761 			   and only if the original address family was AF_INET.
762 			   This is checked above. */
763 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
764 			     ifap = &ifa->ifa_next) {
765 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
766 				    sin_orig.sin_addr.s_addr ==
767 							ifa->ifa_local) {
768 					break; /* found */
769 				}
770 			}
771 		}
772 		/* we didn't get a match, maybe the application is
773 		   4.3BSD-style and passed in junk so we fall back to
774 		   comparing just the label */
775 		if (!ifa) {
776 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
777 			     ifap = &ifa->ifa_next)
778 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
779 					break;
780 		}
781 	}
782 
783 	ret = -EADDRNOTAVAIL;
784 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
785 		goto done;
786 
787 	switch (cmd) {
788 	case SIOCGIFADDR:	/* Get interface address */
789 		sin->sin_addr.s_addr = ifa->ifa_local;
790 		goto rarok;
791 
792 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
793 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
794 		goto rarok;
795 
796 	case SIOCGIFDSTADDR:	/* Get the destination address */
797 		sin->sin_addr.s_addr = ifa->ifa_address;
798 		goto rarok;
799 
800 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
801 		sin->sin_addr.s_addr = ifa->ifa_mask;
802 		goto rarok;
803 
804 	case SIOCSIFFLAGS:
805 		if (colon) {
806 			ret = -EADDRNOTAVAIL;
807 			if (!ifa)
808 				break;
809 			ret = 0;
810 			if (!(ifr.ifr_flags & IFF_UP))
811 				inet_del_ifa(in_dev, ifap, 1);
812 			break;
813 		}
814 		ret = dev_change_flags(dev, ifr.ifr_flags);
815 		break;
816 
817 	case SIOCSIFADDR:	/* Set interface address (and family) */
818 		ret = -EINVAL;
819 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
820 			break;
821 
822 		if (!ifa) {
823 			ret = -ENOBUFS;
824 			ifa = inet_alloc_ifa();
825 			INIT_HLIST_NODE(&ifa->hash);
826 			if (!ifa)
827 				break;
828 			if (colon)
829 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
830 			else
831 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
832 		} else {
833 			ret = 0;
834 			if (ifa->ifa_local == sin->sin_addr.s_addr)
835 				break;
836 			inet_del_ifa(in_dev, ifap, 0);
837 			ifa->ifa_broadcast = 0;
838 			ifa->ifa_scope = 0;
839 		}
840 
841 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
842 
843 		if (!(dev->flags & IFF_POINTOPOINT)) {
844 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
845 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
846 			if ((dev->flags & IFF_BROADCAST) &&
847 			    ifa->ifa_prefixlen < 31)
848 				ifa->ifa_broadcast = ifa->ifa_address |
849 						     ~ifa->ifa_mask;
850 		} else {
851 			ifa->ifa_prefixlen = 32;
852 			ifa->ifa_mask = inet_make_mask(32);
853 		}
854 		ret = inet_set_ifa(dev, ifa);
855 		break;
856 
857 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
858 		ret = 0;
859 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
860 			inet_del_ifa(in_dev, ifap, 0);
861 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
862 			inet_insert_ifa(ifa);
863 		}
864 		break;
865 
866 	case SIOCSIFDSTADDR:	/* Set the destination address */
867 		ret = 0;
868 		if (ifa->ifa_address == sin->sin_addr.s_addr)
869 			break;
870 		ret = -EINVAL;
871 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
872 			break;
873 		ret = 0;
874 		inet_del_ifa(in_dev, ifap, 0);
875 		ifa->ifa_address = sin->sin_addr.s_addr;
876 		inet_insert_ifa(ifa);
877 		break;
878 
879 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
880 
881 		/*
882 		 *	The mask we set must be legal.
883 		 */
884 		ret = -EINVAL;
885 		if (bad_mask(sin->sin_addr.s_addr, 0))
886 			break;
887 		ret = 0;
888 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
889 			__be32 old_mask = ifa->ifa_mask;
890 			inet_del_ifa(in_dev, ifap, 0);
891 			ifa->ifa_mask = sin->sin_addr.s_addr;
892 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
893 
894 			/* See if current broadcast address matches
895 			 * with current netmask, then recalculate
896 			 * the broadcast address. Otherwise it's a
897 			 * funny address, so don't touch it since
898 			 * the user seems to know what (s)he's doing...
899 			 */
900 			if ((dev->flags & IFF_BROADCAST) &&
901 			    (ifa->ifa_prefixlen < 31) &&
902 			    (ifa->ifa_broadcast ==
903 			     (ifa->ifa_local|~old_mask))) {
904 				ifa->ifa_broadcast = (ifa->ifa_local |
905 						      ~sin->sin_addr.s_addr);
906 			}
907 			inet_insert_ifa(ifa);
908 		}
909 		break;
910 	}
911 done:
912 	rtnl_unlock();
913 out:
914 	return ret;
915 rarok:
916 	rtnl_unlock();
917 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
918 	goto out;
919 }
920 
921 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
922 {
923 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
924 	struct in_ifaddr *ifa;
925 	struct ifreq ifr;
926 	int done = 0;
927 
928 	if (!in_dev)
929 		goto out;
930 
931 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
932 		if (!buf) {
933 			done += sizeof(ifr);
934 			continue;
935 		}
936 		if (len < (int) sizeof(ifr))
937 			break;
938 		memset(&ifr, 0, sizeof(struct ifreq));
939 		if (ifa->ifa_label)
940 			strcpy(ifr.ifr_name, ifa->ifa_label);
941 		else
942 			strcpy(ifr.ifr_name, dev->name);
943 
944 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
945 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
946 								ifa->ifa_local;
947 
948 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
949 			done = -EFAULT;
950 			break;
951 		}
952 		buf  += sizeof(struct ifreq);
953 		len  -= sizeof(struct ifreq);
954 		done += sizeof(struct ifreq);
955 	}
956 out:
957 	return done;
958 }
959 
960 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
961 {
962 	__be32 addr = 0;
963 	struct in_device *in_dev;
964 	struct net *net = dev_net(dev);
965 
966 	rcu_read_lock();
967 	in_dev = __in_dev_get_rcu(dev);
968 	if (!in_dev)
969 		goto no_in_dev;
970 
971 	for_primary_ifa(in_dev) {
972 		if (ifa->ifa_scope > scope)
973 			continue;
974 		if (!dst || inet_ifa_match(dst, ifa)) {
975 			addr = ifa->ifa_local;
976 			break;
977 		}
978 		if (!addr)
979 			addr = ifa->ifa_local;
980 	} endfor_ifa(in_dev);
981 
982 	if (addr)
983 		goto out_unlock;
984 no_in_dev:
985 
986 	/* Not loopback addresses on loopback should be preferred
987 	   in this case. It is importnat that lo is the first interface
988 	   in dev_base list.
989 	 */
990 	for_each_netdev_rcu(net, dev) {
991 		in_dev = __in_dev_get_rcu(dev);
992 		if (!in_dev)
993 			continue;
994 
995 		for_primary_ifa(in_dev) {
996 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
997 			    ifa->ifa_scope <= scope) {
998 				addr = ifa->ifa_local;
999 				goto out_unlock;
1000 			}
1001 		} endfor_ifa(in_dev);
1002 	}
1003 out_unlock:
1004 	rcu_read_unlock();
1005 	return addr;
1006 }
1007 EXPORT_SYMBOL(inet_select_addr);
1008 
1009 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1010 			      __be32 local, int scope)
1011 {
1012 	int same = 0;
1013 	__be32 addr = 0;
1014 
1015 	for_ifa(in_dev) {
1016 		if (!addr &&
1017 		    (local == ifa->ifa_local || !local) &&
1018 		    ifa->ifa_scope <= scope) {
1019 			addr = ifa->ifa_local;
1020 			if (same)
1021 				break;
1022 		}
1023 		if (!same) {
1024 			same = (!local || inet_ifa_match(local, ifa)) &&
1025 				(!dst || inet_ifa_match(dst, ifa));
1026 			if (same && addr) {
1027 				if (local || !dst)
1028 					break;
1029 				/* Is the selected addr into dst subnet? */
1030 				if (inet_ifa_match(addr, ifa))
1031 					break;
1032 				/* No, then can we use new local src? */
1033 				if (ifa->ifa_scope <= scope) {
1034 					addr = ifa->ifa_local;
1035 					break;
1036 				}
1037 				/* search for large dst subnet for addr */
1038 				same = 0;
1039 			}
1040 		}
1041 	} endfor_ifa(in_dev);
1042 
1043 	return same ? addr : 0;
1044 }
1045 
1046 /*
1047  * Confirm that local IP address exists using wildcards:
1048  * - in_dev: only on this interface, 0=any interface
1049  * - dst: only in the same subnet as dst, 0=any dst
1050  * - local: address, 0=autoselect the local address
1051  * - scope: maximum allowed scope value for the local address
1052  */
1053 __be32 inet_confirm_addr(struct in_device *in_dev,
1054 			 __be32 dst, __be32 local, int scope)
1055 {
1056 	__be32 addr = 0;
1057 	struct net_device *dev;
1058 	struct net *net;
1059 
1060 	if (scope != RT_SCOPE_LINK)
1061 		return confirm_addr_indev(in_dev, dst, local, scope);
1062 
1063 	net = dev_net(in_dev->dev);
1064 	rcu_read_lock();
1065 	for_each_netdev_rcu(net, dev) {
1066 		in_dev = __in_dev_get_rcu(dev);
1067 		if (in_dev) {
1068 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1069 			if (addr)
1070 				break;
1071 		}
1072 	}
1073 	rcu_read_unlock();
1074 
1075 	return addr;
1076 }
1077 EXPORT_SYMBOL(inet_confirm_addr);
1078 
1079 /*
1080  *	Device notifier
1081  */
1082 
1083 int register_inetaddr_notifier(struct notifier_block *nb)
1084 {
1085 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1086 }
1087 EXPORT_SYMBOL(register_inetaddr_notifier);
1088 
1089 int unregister_inetaddr_notifier(struct notifier_block *nb)
1090 {
1091 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1092 }
1093 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1094 
1095 /* Rename ifa_labels for a device name change. Make some effort to preserve
1096  * existing alias numbering and to create unique labels if possible.
1097 */
1098 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1099 {
1100 	struct in_ifaddr *ifa;
1101 	int named = 0;
1102 
1103 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1104 		char old[IFNAMSIZ], *dot;
1105 
1106 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1107 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1108 		if (named++ == 0)
1109 			goto skip;
1110 		dot = strchr(old, ':');
1111 		if (dot == NULL) {
1112 			sprintf(old, ":%d", named);
1113 			dot = old;
1114 		}
1115 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1116 			strcat(ifa->ifa_label, dot);
1117 		else
1118 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1119 skip:
1120 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1121 	}
1122 }
1123 
1124 static bool inetdev_valid_mtu(unsigned int mtu)
1125 {
1126 	return mtu >= 68;
1127 }
1128 
1129 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1130 					struct in_device *in_dev)
1131 
1132 {
1133 	struct in_ifaddr *ifa;
1134 
1135 	for (ifa = in_dev->ifa_list; ifa;
1136 	     ifa = ifa->ifa_next) {
1137 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1138 			 ifa->ifa_local, dev,
1139 			 ifa->ifa_local, NULL,
1140 			 dev->dev_addr, NULL);
1141 	}
1142 }
1143 
1144 /* Called only under RTNL semaphore */
1145 
1146 static int inetdev_event(struct notifier_block *this, unsigned long event,
1147 			 void *ptr)
1148 {
1149 	struct net_device *dev = ptr;
1150 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1151 
1152 	ASSERT_RTNL();
1153 
1154 	if (!in_dev) {
1155 		if (event == NETDEV_REGISTER) {
1156 			in_dev = inetdev_init(dev);
1157 			if (!in_dev)
1158 				return notifier_from_errno(-ENOMEM);
1159 			if (dev->flags & IFF_LOOPBACK) {
1160 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1161 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1162 			}
1163 		} else if (event == NETDEV_CHANGEMTU) {
1164 			/* Re-enabling IP */
1165 			if (inetdev_valid_mtu(dev->mtu))
1166 				in_dev = inetdev_init(dev);
1167 		}
1168 		goto out;
1169 	}
1170 
1171 	switch (event) {
1172 	case NETDEV_REGISTER:
1173 		pr_debug("%s: bug\n", __func__);
1174 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1175 		break;
1176 	case NETDEV_UP:
1177 		if (!inetdev_valid_mtu(dev->mtu))
1178 			break;
1179 		if (dev->flags & IFF_LOOPBACK) {
1180 			struct in_ifaddr *ifa = inet_alloc_ifa();
1181 
1182 			if (ifa) {
1183 				INIT_HLIST_NODE(&ifa->hash);
1184 				ifa->ifa_local =
1185 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1186 				ifa->ifa_prefixlen = 8;
1187 				ifa->ifa_mask = inet_make_mask(8);
1188 				in_dev_hold(in_dev);
1189 				ifa->ifa_dev = in_dev;
1190 				ifa->ifa_scope = RT_SCOPE_HOST;
1191 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1192 				inet_insert_ifa(ifa);
1193 			}
1194 		}
1195 		ip_mc_up(in_dev);
1196 		/* fall through */
1197 	case NETDEV_CHANGEADDR:
1198 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1199 			break;
1200 		/* fall through */
1201 	case NETDEV_NOTIFY_PEERS:
1202 		/* Send gratuitous ARP to notify of link change */
1203 		inetdev_send_gratuitous_arp(dev, in_dev);
1204 		break;
1205 	case NETDEV_DOWN:
1206 		ip_mc_down(in_dev);
1207 		break;
1208 	case NETDEV_PRE_TYPE_CHANGE:
1209 		ip_mc_unmap(in_dev);
1210 		break;
1211 	case NETDEV_POST_TYPE_CHANGE:
1212 		ip_mc_remap(in_dev);
1213 		break;
1214 	case NETDEV_CHANGEMTU:
1215 		if (inetdev_valid_mtu(dev->mtu))
1216 			break;
1217 		/* disable IP when MTU is not enough */
1218 	case NETDEV_UNREGISTER:
1219 		inetdev_destroy(in_dev);
1220 		break;
1221 	case NETDEV_CHANGENAME:
1222 		/* Do not notify about label change, this event is
1223 		 * not interesting to applications using netlink.
1224 		 */
1225 		inetdev_changename(dev, in_dev);
1226 
1227 		devinet_sysctl_unregister(in_dev);
1228 		devinet_sysctl_register(in_dev);
1229 		break;
1230 	}
1231 out:
1232 	return NOTIFY_DONE;
1233 }
1234 
1235 static struct notifier_block ip_netdev_notifier = {
1236 	.notifier_call = inetdev_event,
1237 };
1238 
1239 static size_t inet_nlmsg_size(void)
1240 {
1241 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1242 	       + nla_total_size(4) /* IFA_ADDRESS */
1243 	       + nla_total_size(4) /* IFA_LOCAL */
1244 	       + nla_total_size(4) /* IFA_BROADCAST */
1245 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1246 }
1247 
1248 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1249 			    u32 portid, u32 seq, int event, unsigned int flags)
1250 {
1251 	struct ifaddrmsg *ifm;
1252 	struct nlmsghdr  *nlh;
1253 
1254 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1255 	if (nlh == NULL)
1256 		return -EMSGSIZE;
1257 
1258 	ifm = nlmsg_data(nlh);
1259 	ifm->ifa_family = AF_INET;
1260 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1261 	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1262 	ifm->ifa_scope = ifa->ifa_scope;
1263 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1264 
1265 	if ((ifa->ifa_address &&
1266 	     nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1267 	    (ifa->ifa_local &&
1268 	     nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1269 	    (ifa->ifa_broadcast &&
1270 	     nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1271 	    (ifa->ifa_label[0] &&
1272 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)))
1273 		goto nla_put_failure;
1274 
1275 	return nlmsg_end(skb, nlh);
1276 
1277 nla_put_failure:
1278 	nlmsg_cancel(skb, nlh);
1279 	return -EMSGSIZE;
1280 }
1281 
1282 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1283 {
1284 	struct net *net = sock_net(skb->sk);
1285 	int h, s_h;
1286 	int idx, s_idx;
1287 	int ip_idx, s_ip_idx;
1288 	struct net_device *dev;
1289 	struct in_device *in_dev;
1290 	struct in_ifaddr *ifa;
1291 	struct hlist_head *head;
1292 	struct hlist_node *node;
1293 
1294 	s_h = cb->args[0];
1295 	s_idx = idx = cb->args[1];
1296 	s_ip_idx = ip_idx = cb->args[2];
1297 
1298 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1299 		idx = 0;
1300 		head = &net->dev_index_head[h];
1301 		rcu_read_lock();
1302 		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1303 			if (idx < s_idx)
1304 				goto cont;
1305 			if (h > s_h || idx > s_idx)
1306 				s_ip_idx = 0;
1307 			in_dev = __in_dev_get_rcu(dev);
1308 			if (!in_dev)
1309 				goto cont;
1310 
1311 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1312 			     ifa = ifa->ifa_next, ip_idx++) {
1313 				if (ip_idx < s_ip_idx)
1314 					continue;
1315 				if (inet_fill_ifaddr(skb, ifa,
1316 					     NETLINK_CB(cb->skb).portid,
1317 					     cb->nlh->nlmsg_seq,
1318 					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1319 					rcu_read_unlock();
1320 					goto done;
1321 				}
1322 			}
1323 cont:
1324 			idx++;
1325 		}
1326 		rcu_read_unlock();
1327 	}
1328 
1329 done:
1330 	cb->args[0] = h;
1331 	cb->args[1] = idx;
1332 	cb->args[2] = ip_idx;
1333 
1334 	return skb->len;
1335 }
1336 
1337 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1338 		      u32 portid)
1339 {
1340 	struct sk_buff *skb;
1341 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1342 	int err = -ENOBUFS;
1343 	struct net *net;
1344 
1345 	net = dev_net(ifa->ifa_dev->dev);
1346 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1347 	if (skb == NULL)
1348 		goto errout;
1349 
1350 	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1351 	if (err < 0) {
1352 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1353 		WARN_ON(err == -EMSGSIZE);
1354 		kfree_skb(skb);
1355 		goto errout;
1356 	}
1357 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1358 	return;
1359 errout:
1360 	if (err < 0)
1361 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1362 }
1363 
1364 static size_t inet_get_link_af_size(const struct net_device *dev)
1365 {
1366 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1367 
1368 	if (!in_dev)
1369 		return 0;
1370 
1371 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1372 }
1373 
1374 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1375 {
1376 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1377 	struct nlattr *nla;
1378 	int i;
1379 
1380 	if (!in_dev)
1381 		return -ENODATA;
1382 
1383 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1384 	if (nla == NULL)
1385 		return -EMSGSIZE;
1386 
1387 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1388 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1389 
1390 	return 0;
1391 }
1392 
1393 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1394 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1395 };
1396 
1397 static int inet_validate_link_af(const struct net_device *dev,
1398 				 const struct nlattr *nla)
1399 {
1400 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1401 	int err, rem;
1402 
1403 	if (dev && !__in_dev_get_rtnl(dev))
1404 		return -EAFNOSUPPORT;
1405 
1406 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1407 	if (err < 0)
1408 		return err;
1409 
1410 	if (tb[IFLA_INET_CONF]) {
1411 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1412 			int cfgid = nla_type(a);
1413 
1414 			if (nla_len(a) < 4)
1415 				return -EINVAL;
1416 
1417 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1418 				return -EINVAL;
1419 		}
1420 	}
1421 
1422 	return 0;
1423 }
1424 
1425 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1426 {
1427 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1428 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1429 	int rem;
1430 
1431 	if (!in_dev)
1432 		return -EAFNOSUPPORT;
1433 
1434 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1435 		BUG();
1436 
1437 	if (tb[IFLA_INET_CONF]) {
1438 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1439 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1440 	}
1441 
1442 	return 0;
1443 }
1444 
1445 #ifdef CONFIG_SYSCTL
1446 
1447 static void devinet_copy_dflt_conf(struct net *net, int i)
1448 {
1449 	struct net_device *dev;
1450 
1451 	rcu_read_lock();
1452 	for_each_netdev_rcu(net, dev) {
1453 		struct in_device *in_dev;
1454 
1455 		in_dev = __in_dev_get_rcu(dev);
1456 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1457 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1458 	}
1459 	rcu_read_unlock();
1460 }
1461 
1462 /* called with RTNL locked */
1463 static void inet_forward_change(struct net *net)
1464 {
1465 	struct net_device *dev;
1466 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1467 
1468 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1469 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1470 
1471 	for_each_netdev(net, dev) {
1472 		struct in_device *in_dev;
1473 		if (on)
1474 			dev_disable_lro(dev);
1475 		rcu_read_lock();
1476 		in_dev = __in_dev_get_rcu(dev);
1477 		if (in_dev)
1478 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1479 		rcu_read_unlock();
1480 	}
1481 }
1482 
1483 static int devinet_conf_proc(ctl_table *ctl, int write,
1484 			     void __user *buffer,
1485 			     size_t *lenp, loff_t *ppos)
1486 {
1487 	int old_value = *(int *)ctl->data;
1488 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1489 	int new_value = *(int *)ctl->data;
1490 
1491 	if (write) {
1492 		struct ipv4_devconf *cnf = ctl->extra1;
1493 		struct net *net = ctl->extra2;
1494 		int i = (int *)ctl->data - cnf->data;
1495 
1496 		set_bit(i, cnf->state);
1497 
1498 		if (cnf == net->ipv4.devconf_dflt)
1499 			devinet_copy_dflt_conf(net, i);
1500 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1501 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1502 			if ((new_value == 0) && (old_value != 0))
1503 				rt_cache_flush(net);
1504 	}
1505 
1506 	return ret;
1507 }
1508 
1509 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1510 				  void __user *buffer,
1511 				  size_t *lenp, loff_t *ppos)
1512 {
1513 	int *valp = ctl->data;
1514 	int val = *valp;
1515 	loff_t pos = *ppos;
1516 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1517 
1518 	if (write && *valp != val) {
1519 		struct net *net = ctl->extra2;
1520 
1521 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1522 			if (!rtnl_trylock()) {
1523 				/* Restore the original values before restarting */
1524 				*valp = val;
1525 				*ppos = pos;
1526 				return restart_syscall();
1527 			}
1528 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1529 				inet_forward_change(net);
1530 			} else if (*valp) {
1531 				struct ipv4_devconf *cnf = ctl->extra1;
1532 				struct in_device *idev =
1533 					container_of(cnf, struct in_device, cnf);
1534 				dev_disable_lro(idev->dev);
1535 			}
1536 			rtnl_unlock();
1537 			rt_cache_flush(net);
1538 		}
1539 	}
1540 
1541 	return ret;
1542 }
1543 
1544 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1545 				void __user *buffer,
1546 				size_t *lenp, loff_t *ppos)
1547 {
1548 	int *valp = ctl->data;
1549 	int val = *valp;
1550 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1551 	struct net *net = ctl->extra2;
1552 
1553 	if (write && *valp != val)
1554 		rt_cache_flush(net);
1555 
1556 	return ret;
1557 }
1558 
1559 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1560 	{ \
1561 		.procname	= name, \
1562 		.data		= ipv4_devconf.data + \
1563 				  IPV4_DEVCONF_ ## attr - 1, \
1564 		.maxlen		= sizeof(int), \
1565 		.mode		= mval, \
1566 		.proc_handler	= proc, \
1567 		.extra1		= &ipv4_devconf, \
1568 	}
1569 
1570 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1571 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1572 
1573 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1574 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1575 
1576 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1577 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1578 
1579 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1580 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1581 
1582 static struct devinet_sysctl_table {
1583 	struct ctl_table_header *sysctl_header;
1584 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1585 } devinet_sysctl = {
1586 	.devinet_vars = {
1587 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1588 					     devinet_sysctl_forward),
1589 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1590 
1591 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1592 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1593 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1594 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1595 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1596 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1597 					"accept_source_route"),
1598 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1599 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1600 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1601 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1602 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1603 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1604 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1605 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1606 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1607 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1608 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1609 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1610 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1611 
1612 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1613 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1614 		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1615 					      "force_igmp_version"),
1616 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1617 					      "promote_secondaries"),
1618 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
1619 					      "route_localnet"),
1620 	},
1621 };
1622 
1623 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1624 					struct ipv4_devconf *p)
1625 {
1626 	int i;
1627 	struct devinet_sysctl_table *t;
1628 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
1629 
1630 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1631 	if (!t)
1632 		goto out;
1633 
1634 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1635 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1636 		t->devinet_vars[i].extra1 = p;
1637 		t->devinet_vars[i].extra2 = net;
1638 	}
1639 
1640 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
1641 
1642 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
1643 	if (!t->sysctl_header)
1644 		goto free;
1645 
1646 	p->sysctl = t;
1647 	return 0;
1648 
1649 free:
1650 	kfree(t);
1651 out:
1652 	return -ENOBUFS;
1653 }
1654 
1655 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1656 {
1657 	struct devinet_sysctl_table *t = cnf->sysctl;
1658 
1659 	if (t == NULL)
1660 		return;
1661 
1662 	cnf->sysctl = NULL;
1663 	unregister_net_sysctl_table(t->sysctl_header);
1664 	kfree(t);
1665 }
1666 
1667 static void devinet_sysctl_register(struct in_device *idev)
1668 {
1669 	neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1670 	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1671 					&idev->cnf);
1672 }
1673 
1674 static void devinet_sysctl_unregister(struct in_device *idev)
1675 {
1676 	__devinet_sysctl_unregister(&idev->cnf);
1677 	neigh_sysctl_unregister(idev->arp_parms);
1678 }
1679 
1680 static struct ctl_table ctl_forward_entry[] = {
1681 	{
1682 		.procname	= "ip_forward",
1683 		.data		= &ipv4_devconf.data[
1684 					IPV4_DEVCONF_FORWARDING - 1],
1685 		.maxlen		= sizeof(int),
1686 		.mode		= 0644,
1687 		.proc_handler	= devinet_sysctl_forward,
1688 		.extra1		= &ipv4_devconf,
1689 		.extra2		= &init_net,
1690 	},
1691 	{ },
1692 };
1693 #endif
1694 
1695 static __net_init int devinet_init_net(struct net *net)
1696 {
1697 	int err;
1698 	struct ipv4_devconf *all, *dflt;
1699 #ifdef CONFIG_SYSCTL
1700 	struct ctl_table *tbl = ctl_forward_entry;
1701 	struct ctl_table_header *forw_hdr;
1702 #endif
1703 
1704 	err = -ENOMEM;
1705 	all = &ipv4_devconf;
1706 	dflt = &ipv4_devconf_dflt;
1707 
1708 	if (!net_eq(net, &init_net)) {
1709 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1710 		if (all == NULL)
1711 			goto err_alloc_all;
1712 
1713 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1714 		if (dflt == NULL)
1715 			goto err_alloc_dflt;
1716 
1717 #ifdef CONFIG_SYSCTL
1718 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1719 		if (tbl == NULL)
1720 			goto err_alloc_ctl;
1721 
1722 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1723 		tbl[0].extra1 = all;
1724 		tbl[0].extra2 = net;
1725 #endif
1726 	}
1727 
1728 #ifdef CONFIG_SYSCTL
1729 	err = __devinet_sysctl_register(net, "all", all);
1730 	if (err < 0)
1731 		goto err_reg_all;
1732 
1733 	err = __devinet_sysctl_register(net, "default", dflt);
1734 	if (err < 0)
1735 		goto err_reg_dflt;
1736 
1737 	err = -ENOMEM;
1738 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
1739 	if (forw_hdr == NULL)
1740 		goto err_reg_ctl;
1741 	net->ipv4.forw_hdr = forw_hdr;
1742 #endif
1743 
1744 	net->ipv4.devconf_all = all;
1745 	net->ipv4.devconf_dflt = dflt;
1746 	return 0;
1747 
1748 #ifdef CONFIG_SYSCTL
1749 err_reg_ctl:
1750 	__devinet_sysctl_unregister(dflt);
1751 err_reg_dflt:
1752 	__devinet_sysctl_unregister(all);
1753 err_reg_all:
1754 	if (tbl != ctl_forward_entry)
1755 		kfree(tbl);
1756 err_alloc_ctl:
1757 #endif
1758 	if (dflt != &ipv4_devconf_dflt)
1759 		kfree(dflt);
1760 err_alloc_dflt:
1761 	if (all != &ipv4_devconf)
1762 		kfree(all);
1763 err_alloc_all:
1764 	return err;
1765 }
1766 
1767 static __net_exit void devinet_exit_net(struct net *net)
1768 {
1769 #ifdef CONFIG_SYSCTL
1770 	struct ctl_table *tbl;
1771 
1772 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
1773 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
1774 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1775 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
1776 	kfree(tbl);
1777 #endif
1778 	kfree(net->ipv4.devconf_dflt);
1779 	kfree(net->ipv4.devconf_all);
1780 }
1781 
1782 static __net_initdata struct pernet_operations devinet_ops = {
1783 	.init = devinet_init_net,
1784 	.exit = devinet_exit_net,
1785 };
1786 
1787 static struct rtnl_af_ops inet_af_ops = {
1788 	.family		  = AF_INET,
1789 	.fill_link_af	  = inet_fill_link_af,
1790 	.get_link_af_size = inet_get_link_af_size,
1791 	.validate_link_af = inet_validate_link_af,
1792 	.set_link_af	  = inet_set_link_af,
1793 };
1794 
1795 void __init devinet_init(void)
1796 {
1797 	int i;
1798 
1799 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
1800 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
1801 
1802 	register_pernet_subsys(&devinet_ops);
1803 
1804 	register_gifconf(PF_INET, inet_gifconf);
1805 	register_netdevice_notifier(&ip_netdev_notifier);
1806 
1807 	rtnl_af_register(&inet_af_ops);
1808 
1809 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
1810 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
1811 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
1812 }
1813 
1814