xref: /linux/net/ipv4/devinet.c (revision b889fcf63cb62e7fdb7816565e28f44dbe4a76a5)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *	Changes:
19  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
20  *					lists.
21  *		Cyrus Durgin:		updated for kmod
22  *		Matthias Andree:	in devinet_ioctl, compare label and
23  *					address (4.4BSD alias style support),
24  *					fall back to comparing just the label
25  *					if no match found.
26  */
27 
28 
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59 
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 
67 #include "fib_lookup.h"
68 
69 static struct ipv4_devconf ipv4_devconf = {
70 	.data = {
71 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75 	},
76 };
77 
78 static struct ipv4_devconf ipv4_devconf_dflt = {
79 	.data = {
80 		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
81 		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
82 		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
83 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
84 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
85 	},
86 };
87 
88 #define IPV4_DEVCONF_DFLT(net, attr) \
89 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
90 
91 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
92 	[IFA_LOCAL]     	= { .type = NLA_U32 },
93 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
94 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
95 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
96 };
97 
98 #define IN4_ADDR_HSIZE_SHIFT	8
99 #define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
100 
101 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
102 static DEFINE_SPINLOCK(inet_addr_hash_lock);
103 
104 static u32 inet_addr_hash(struct net *net, __be32 addr)
105 {
106 	u32 val = (__force u32) addr ^ net_hash_mix(net);
107 
108 	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
109 }
110 
111 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
112 {
113 	u32 hash = inet_addr_hash(net, ifa->ifa_local);
114 
115 	spin_lock(&inet_addr_hash_lock);
116 	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
117 	spin_unlock(&inet_addr_hash_lock);
118 }
119 
120 static void inet_hash_remove(struct in_ifaddr *ifa)
121 {
122 	spin_lock(&inet_addr_hash_lock);
123 	hlist_del_init_rcu(&ifa->hash);
124 	spin_unlock(&inet_addr_hash_lock);
125 }
126 
127 /**
128  * __ip_dev_find - find the first device with a given source address.
129  * @net: the net namespace
130  * @addr: the source address
131  * @devref: if true, take a reference on the found device
132  *
133  * If a caller uses devref=false, it should be protected by RCU, or RTNL
134  */
135 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
136 {
137 	u32 hash = inet_addr_hash(net, addr);
138 	struct net_device *result = NULL;
139 	struct in_ifaddr *ifa;
140 	struct hlist_node *node;
141 
142 	rcu_read_lock();
143 	hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
144 		if (ifa->ifa_local == addr) {
145 			struct net_device *dev = ifa->ifa_dev->dev;
146 
147 			if (!net_eq(dev_net(dev), net))
148 				continue;
149 			result = dev;
150 			break;
151 		}
152 	}
153 	if (!result) {
154 		struct flowi4 fl4 = { .daddr = addr };
155 		struct fib_result res = { 0 };
156 		struct fib_table *local;
157 
158 		/* Fallback to FIB local table so that communication
159 		 * over loopback subnets work.
160 		 */
161 		local = fib_get_table(net, RT_TABLE_LOCAL);
162 		if (local &&
163 		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
164 		    res.type == RTN_LOCAL)
165 			result = FIB_RES_DEV(res);
166 	}
167 	if (result && devref)
168 		dev_hold(result);
169 	rcu_read_unlock();
170 	return result;
171 }
172 EXPORT_SYMBOL(__ip_dev_find);
173 
174 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
175 
176 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
177 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
178 			 int destroy);
179 #ifdef CONFIG_SYSCTL
180 static void devinet_sysctl_register(struct in_device *idev);
181 static void devinet_sysctl_unregister(struct in_device *idev);
182 #else
183 static void devinet_sysctl_register(struct in_device *idev)
184 {
185 }
186 static void devinet_sysctl_unregister(struct in_device *idev)
187 {
188 }
189 #endif
190 
191 /* Locks all the inet devices. */
192 
193 static struct in_ifaddr *inet_alloc_ifa(void)
194 {
195 	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
196 }
197 
198 static void inet_rcu_free_ifa(struct rcu_head *head)
199 {
200 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
201 	if (ifa->ifa_dev)
202 		in_dev_put(ifa->ifa_dev);
203 	kfree(ifa);
204 }
205 
206 static void inet_free_ifa(struct in_ifaddr *ifa)
207 {
208 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
209 }
210 
211 void in_dev_finish_destroy(struct in_device *idev)
212 {
213 	struct net_device *dev = idev->dev;
214 
215 	WARN_ON(idev->ifa_list);
216 	WARN_ON(idev->mc_list);
217 #ifdef NET_REFCNT_DEBUG
218 	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
219 #endif
220 	dev_put(dev);
221 	if (!idev->dead)
222 		pr_err("Freeing alive in_device %p\n", idev);
223 	else
224 		kfree(idev);
225 }
226 EXPORT_SYMBOL(in_dev_finish_destroy);
227 
228 static struct in_device *inetdev_init(struct net_device *dev)
229 {
230 	struct in_device *in_dev;
231 
232 	ASSERT_RTNL();
233 
234 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
235 	if (!in_dev)
236 		goto out;
237 	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
238 			sizeof(in_dev->cnf));
239 	in_dev->cnf.sysctl = NULL;
240 	in_dev->dev = dev;
241 	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
242 	if (!in_dev->arp_parms)
243 		goto out_kfree;
244 	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
245 		dev_disable_lro(dev);
246 	/* Reference in_dev->dev */
247 	dev_hold(dev);
248 	/* Account for reference dev->ip_ptr (below) */
249 	in_dev_hold(in_dev);
250 
251 	devinet_sysctl_register(in_dev);
252 	ip_mc_init_dev(in_dev);
253 	if (dev->flags & IFF_UP)
254 		ip_mc_up(in_dev);
255 
256 	/* we can receive as soon as ip_ptr is set -- do this last */
257 	rcu_assign_pointer(dev->ip_ptr, in_dev);
258 out:
259 	return in_dev;
260 out_kfree:
261 	kfree(in_dev);
262 	in_dev = NULL;
263 	goto out;
264 }
265 
266 static void in_dev_rcu_put(struct rcu_head *head)
267 {
268 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
269 	in_dev_put(idev);
270 }
271 
272 static void inetdev_destroy(struct in_device *in_dev)
273 {
274 	struct in_ifaddr *ifa;
275 	struct net_device *dev;
276 
277 	ASSERT_RTNL();
278 
279 	dev = in_dev->dev;
280 
281 	in_dev->dead = 1;
282 
283 	ip_mc_destroy_dev(in_dev);
284 
285 	while ((ifa = in_dev->ifa_list) != NULL) {
286 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
287 		inet_free_ifa(ifa);
288 	}
289 
290 	RCU_INIT_POINTER(dev->ip_ptr, NULL);
291 
292 	devinet_sysctl_unregister(in_dev);
293 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
294 	arp_ifdown(dev);
295 
296 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
297 }
298 
299 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
300 {
301 	rcu_read_lock();
302 	for_primary_ifa(in_dev) {
303 		if (inet_ifa_match(a, ifa)) {
304 			if (!b || inet_ifa_match(b, ifa)) {
305 				rcu_read_unlock();
306 				return 1;
307 			}
308 		}
309 	} endfor_ifa(in_dev);
310 	rcu_read_unlock();
311 	return 0;
312 }
313 
314 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
315 			 int destroy, struct nlmsghdr *nlh, u32 portid)
316 {
317 	struct in_ifaddr *promote = NULL;
318 	struct in_ifaddr *ifa, *ifa1 = *ifap;
319 	struct in_ifaddr *last_prim = in_dev->ifa_list;
320 	struct in_ifaddr *prev_prom = NULL;
321 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
322 
323 	ASSERT_RTNL();
324 
325 	/* 1. Deleting primary ifaddr forces deletion all secondaries
326 	 * unless alias promotion is set
327 	 **/
328 
329 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
330 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
331 
332 		while ((ifa = *ifap1) != NULL) {
333 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
334 			    ifa1->ifa_scope <= ifa->ifa_scope)
335 				last_prim = ifa;
336 
337 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
338 			    ifa1->ifa_mask != ifa->ifa_mask ||
339 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
340 				ifap1 = &ifa->ifa_next;
341 				prev_prom = ifa;
342 				continue;
343 			}
344 
345 			if (!do_promote) {
346 				inet_hash_remove(ifa);
347 				*ifap1 = ifa->ifa_next;
348 
349 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
350 				blocking_notifier_call_chain(&inetaddr_chain,
351 						NETDEV_DOWN, ifa);
352 				inet_free_ifa(ifa);
353 			} else {
354 				promote = ifa;
355 				break;
356 			}
357 		}
358 	}
359 
360 	/* On promotion all secondaries from subnet are changing
361 	 * the primary IP, we must remove all their routes silently
362 	 * and later to add them back with new prefsrc. Do this
363 	 * while all addresses are on the device list.
364 	 */
365 	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
366 		if (ifa1->ifa_mask == ifa->ifa_mask &&
367 		    inet_ifa_match(ifa1->ifa_address, ifa))
368 			fib_del_ifaddr(ifa, ifa1);
369 	}
370 
371 	/* 2. Unlink it */
372 
373 	*ifap = ifa1->ifa_next;
374 	inet_hash_remove(ifa1);
375 
376 	/* 3. Announce address deletion */
377 
378 	/* Send message first, then call notifier.
379 	   At first sight, FIB update triggered by notifier
380 	   will refer to already deleted ifaddr, that could confuse
381 	   netlink listeners. It is not true: look, gated sees
382 	   that route deleted and if it still thinks that ifaddr
383 	   is valid, it will try to restore deleted routes... Grr.
384 	   So that, this order is correct.
385 	 */
386 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
387 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
388 
389 	if (promote) {
390 		struct in_ifaddr *next_sec = promote->ifa_next;
391 
392 		if (prev_prom) {
393 			prev_prom->ifa_next = promote->ifa_next;
394 			promote->ifa_next = last_prim->ifa_next;
395 			last_prim->ifa_next = promote;
396 		}
397 
398 		promote->ifa_flags &= ~IFA_F_SECONDARY;
399 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
400 		blocking_notifier_call_chain(&inetaddr_chain,
401 				NETDEV_UP, promote);
402 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
403 			if (ifa1->ifa_mask != ifa->ifa_mask ||
404 			    !inet_ifa_match(ifa1->ifa_address, ifa))
405 					continue;
406 			fib_add_ifaddr(ifa);
407 		}
408 
409 	}
410 	if (destroy)
411 		inet_free_ifa(ifa1);
412 }
413 
414 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
415 			 int destroy)
416 {
417 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
418 }
419 
420 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
421 			     u32 portid)
422 {
423 	struct in_device *in_dev = ifa->ifa_dev;
424 	struct in_ifaddr *ifa1, **ifap, **last_primary;
425 
426 	ASSERT_RTNL();
427 
428 	if (!ifa->ifa_local) {
429 		inet_free_ifa(ifa);
430 		return 0;
431 	}
432 
433 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
434 	last_primary = &in_dev->ifa_list;
435 
436 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
437 	     ifap = &ifa1->ifa_next) {
438 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
439 		    ifa->ifa_scope <= ifa1->ifa_scope)
440 			last_primary = &ifa1->ifa_next;
441 		if (ifa1->ifa_mask == ifa->ifa_mask &&
442 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
443 			if (ifa1->ifa_local == ifa->ifa_local) {
444 				inet_free_ifa(ifa);
445 				return -EEXIST;
446 			}
447 			if (ifa1->ifa_scope != ifa->ifa_scope) {
448 				inet_free_ifa(ifa);
449 				return -EINVAL;
450 			}
451 			ifa->ifa_flags |= IFA_F_SECONDARY;
452 		}
453 	}
454 
455 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
456 		net_srandom(ifa->ifa_local);
457 		ifap = last_primary;
458 	}
459 
460 	ifa->ifa_next = *ifap;
461 	*ifap = ifa;
462 
463 	inet_hash_insert(dev_net(in_dev->dev), ifa);
464 
465 	/* Send message first, then call notifier.
466 	   Notifier will trigger FIB update, so that
467 	   listeners of netlink will know about new ifaddr */
468 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
469 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
470 
471 	return 0;
472 }
473 
474 static int inet_insert_ifa(struct in_ifaddr *ifa)
475 {
476 	return __inet_insert_ifa(ifa, NULL, 0);
477 }
478 
479 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
480 {
481 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
482 
483 	ASSERT_RTNL();
484 
485 	if (!in_dev) {
486 		inet_free_ifa(ifa);
487 		return -ENOBUFS;
488 	}
489 	ipv4_devconf_setall(in_dev);
490 	if (ifa->ifa_dev != in_dev) {
491 		WARN_ON(ifa->ifa_dev);
492 		in_dev_hold(in_dev);
493 		ifa->ifa_dev = in_dev;
494 	}
495 	if (ipv4_is_loopback(ifa->ifa_local))
496 		ifa->ifa_scope = RT_SCOPE_HOST;
497 	return inet_insert_ifa(ifa);
498 }
499 
500 /* Caller must hold RCU or RTNL :
501  * We dont take a reference on found in_device
502  */
503 struct in_device *inetdev_by_index(struct net *net, int ifindex)
504 {
505 	struct net_device *dev;
506 	struct in_device *in_dev = NULL;
507 
508 	rcu_read_lock();
509 	dev = dev_get_by_index_rcu(net, ifindex);
510 	if (dev)
511 		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
512 	rcu_read_unlock();
513 	return in_dev;
514 }
515 EXPORT_SYMBOL(inetdev_by_index);
516 
517 /* Called only from RTNL semaphored context. No locks. */
518 
519 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
520 				    __be32 mask)
521 {
522 	ASSERT_RTNL();
523 
524 	for_primary_ifa(in_dev) {
525 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
526 			return ifa;
527 	} endfor_ifa(in_dev);
528 	return NULL;
529 }
530 
531 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
532 {
533 	struct net *net = sock_net(skb->sk);
534 	struct nlattr *tb[IFA_MAX+1];
535 	struct in_device *in_dev;
536 	struct ifaddrmsg *ifm;
537 	struct in_ifaddr *ifa, **ifap;
538 	int err = -EINVAL;
539 
540 	ASSERT_RTNL();
541 
542 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
543 	if (err < 0)
544 		goto errout;
545 
546 	ifm = nlmsg_data(nlh);
547 	in_dev = inetdev_by_index(net, ifm->ifa_index);
548 	if (in_dev == NULL) {
549 		err = -ENODEV;
550 		goto errout;
551 	}
552 
553 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
554 	     ifap = &ifa->ifa_next) {
555 		if (tb[IFA_LOCAL] &&
556 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
557 			continue;
558 
559 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
560 			continue;
561 
562 		if (tb[IFA_ADDRESS] &&
563 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
564 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
565 			continue;
566 
567 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
568 		return 0;
569 	}
570 
571 	err = -EADDRNOTAVAIL;
572 errout:
573 	return err;
574 }
575 
576 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
577 {
578 	struct nlattr *tb[IFA_MAX+1];
579 	struct in_ifaddr *ifa;
580 	struct ifaddrmsg *ifm;
581 	struct net_device *dev;
582 	struct in_device *in_dev;
583 	int err;
584 
585 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
586 	if (err < 0)
587 		goto errout;
588 
589 	ifm = nlmsg_data(nlh);
590 	err = -EINVAL;
591 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
592 		goto errout;
593 
594 	dev = __dev_get_by_index(net, ifm->ifa_index);
595 	err = -ENODEV;
596 	if (dev == NULL)
597 		goto errout;
598 
599 	in_dev = __in_dev_get_rtnl(dev);
600 	err = -ENOBUFS;
601 	if (in_dev == NULL)
602 		goto errout;
603 
604 	ifa = inet_alloc_ifa();
605 	if (ifa == NULL)
606 		/*
607 		 * A potential indev allocation can be left alive, it stays
608 		 * assigned to its device and is destroy with it.
609 		 */
610 		goto errout;
611 
612 	ipv4_devconf_setall(in_dev);
613 	in_dev_hold(in_dev);
614 
615 	if (tb[IFA_ADDRESS] == NULL)
616 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
617 
618 	INIT_HLIST_NODE(&ifa->hash);
619 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
620 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
621 	ifa->ifa_flags = ifm->ifa_flags;
622 	ifa->ifa_scope = ifm->ifa_scope;
623 	ifa->ifa_dev = in_dev;
624 
625 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
626 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
627 
628 	if (tb[IFA_BROADCAST])
629 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
630 
631 	if (tb[IFA_LABEL])
632 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
633 	else
634 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
635 
636 	return ifa;
637 
638 errout:
639 	return ERR_PTR(err);
640 }
641 
642 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
643 {
644 	struct net *net = sock_net(skb->sk);
645 	struct in_ifaddr *ifa;
646 
647 	ASSERT_RTNL();
648 
649 	ifa = rtm_to_ifaddr(net, nlh);
650 	if (IS_ERR(ifa))
651 		return PTR_ERR(ifa);
652 
653 	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
654 }
655 
656 /*
657  *	Determine a default network mask, based on the IP address.
658  */
659 
660 static int inet_abc_len(__be32 addr)
661 {
662 	int rc = -1;	/* Something else, probably a multicast. */
663 
664 	if (ipv4_is_zeronet(addr))
665 		rc = 0;
666 	else {
667 		__u32 haddr = ntohl(addr);
668 
669 		if (IN_CLASSA(haddr))
670 			rc = 8;
671 		else if (IN_CLASSB(haddr))
672 			rc = 16;
673 		else if (IN_CLASSC(haddr))
674 			rc = 24;
675 	}
676 
677 	return rc;
678 }
679 
680 
681 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
682 {
683 	struct ifreq ifr;
684 	struct sockaddr_in sin_orig;
685 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
686 	struct in_device *in_dev;
687 	struct in_ifaddr **ifap = NULL;
688 	struct in_ifaddr *ifa = NULL;
689 	struct net_device *dev;
690 	char *colon;
691 	int ret = -EFAULT;
692 	int tryaddrmatch = 0;
693 
694 	/*
695 	 *	Fetch the caller's info block into kernel space
696 	 */
697 
698 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
699 		goto out;
700 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
701 
702 	/* save original address for comparison */
703 	memcpy(&sin_orig, sin, sizeof(*sin));
704 
705 	colon = strchr(ifr.ifr_name, ':');
706 	if (colon)
707 		*colon = 0;
708 
709 	dev_load(net, ifr.ifr_name);
710 
711 	switch (cmd) {
712 	case SIOCGIFADDR:	/* Get interface address */
713 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
714 	case SIOCGIFDSTADDR:	/* Get the destination address */
715 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
716 		/* Note that these ioctls will not sleep,
717 		   so that we do not impose a lock.
718 		   One day we will be forced to put shlock here (I mean SMP)
719 		 */
720 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
721 		memset(sin, 0, sizeof(*sin));
722 		sin->sin_family = AF_INET;
723 		break;
724 
725 	case SIOCSIFFLAGS:
726 		ret = -EPERM;
727 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
728 			goto out;
729 		break;
730 	case SIOCSIFADDR:	/* Set interface address (and family) */
731 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
732 	case SIOCSIFDSTADDR:	/* Set the destination address */
733 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
734 		ret = -EPERM;
735 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
736 			goto out;
737 		ret = -EINVAL;
738 		if (sin->sin_family != AF_INET)
739 			goto out;
740 		break;
741 	default:
742 		ret = -EINVAL;
743 		goto out;
744 	}
745 
746 	rtnl_lock();
747 
748 	ret = -ENODEV;
749 	dev = __dev_get_by_name(net, ifr.ifr_name);
750 	if (!dev)
751 		goto done;
752 
753 	if (colon)
754 		*colon = ':';
755 
756 	in_dev = __in_dev_get_rtnl(dev);
757 	if (in_dev) {
758 		if (tryaddrmatch) {
759 			/* Matthias Andree */
760 			/* compare label and address (4.4BSD style) */
761 			/* note: we only do this for a limited set of ioctls
762 			   and only if the original address family was AF_INET.
763 			   This is checked above. */
764 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
765 			     ifap = &ifa->ifa_next) {
766 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
767 				    sin_orig.sin_addr.s_addr ==
768 							ifa->ifa_local) {
769 					break; /* found */
770 				}
771 			}
772 		}
773 		/* we didn't get a match, maybe the application is
774 		   4.3BSD-style and passed in junk so we fall back to
775 		   comparing just the label */
776 		if (!ifa) {
777 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
778 			     ifap = &ifa->ifa_next)
779 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
780 					break;
781 		}
782 	}
783 
784 	ret = -EADDRNOTAVAIL;
785 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
786 		goto done;
787 
788 	switch (cmd) {
789 	case SIOCGIFADDR:	/* Get interface address */
790 		sin->sin_addr.s_addr = ifa->ifa_local;
791 		goto rarok;
792 
793 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
794 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
795 		goto rarok;
796 
797 	case SIOCGIFDSTADDR:	/* Get the destination address */
798 		sin->sin_addr.s_addr = ifa->ifa_address;
799 		goto rarok;
800 
801 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
802 		sin->sin_addr.s_addr = ifa->ifa_mask;
803 		goto rarok;
804 
805 	case SIOCSIFFLAGS:
806 		if (colon) {
807 			ret = -EADDRNOTAVAIL;
808 			if (!ifa)
809 				break;
810 			ret = 0;
811 			if (!(ifr.ifr_flags & IFF_UP))
812 				inet_del_ifa(in_dev, ifap, 1);
813 			break;
814 		}
815 		ret = dev_change_flags(dev, ifr.ifr_flags);
816 		break;
817 
818 	case SIOCSIFADDR:	/* Set interface address (and family) */
819 		ret = -EINVAL;
820 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
821 			break;
822 
823 		if (!ifa) {
824 			ret = -ENOBUFS;
825 			ifa = inet_alloc_ifa();
826 			INIT_HLIST_NODE(&ifa->hash);
827 			if (!ifa)
828 				break;
829 			if (colon)
830 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
831 			else
832 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
833 		} else {
834 			ret = 0;
835 			if (ifa->ifa_local == sin->sin_addr.s_addr)
836 				break;
837 			inet_del_ifa(in_dev, ifap, 0);
838 			ifa->ifa_broadcast = 0;
839 			ifa->ifa_scope = 0;
840 		}
841 
842 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
843 
844 		if (!(dev->flags & IFF_POINTOPOINT)) {
845 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
846 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
847 			if ((dev->flags & IFF_BROADCAST) &&
848 			    ifa->ifa_prefixlen < 31)
849 				ifa->ifa_broadcast = ifa->ifa_address |
850 						     ~ifa->ifa_mask;
851 		} else {
852 			ifa->ifa_prefixlen = 32;
853 			ifa->ifa_mask = inet_make_mask(32);
854 		}
855 		ret = inet_set_ifa(dev, ifa);
856 		break;
857 
858 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
859 		ret = 0;
860 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
861 			inet_del_ifa(in_dev, ifap, 0);
862 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
863 			inet_insert_ifa(ifa);
864 		}
865 		break;
866 
867 	case SIOCSIFDSTADDR:	/* Set the destination address */
868 		ret = 0;
869 		if (ifa->ifa_address == sin->sin_addr.s_addr)
870 			break;
871 		ret = -EINVAL;
872 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
873 			break;
874 		ret = 0;
875 		inet_del_ifa(in_dev, ifap, 0);
876 		ifa->ifa_address = sin->sin_addr.s_addr;
877 		inet_insert_ifa(ifa);
878 		break;
879 
880 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
881 
882 		/*
883 		 *	The mask we set must be legal.
884 		 */
885 		ret = -EINVAL;
886 		if (bad_mask(sin->sin_addr.s_addr, 0))
887 			break;
888 		ret = 0;
889 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
890 			__be32 old_mask = ifa->ifa_mask;
891 			inet_del_ifa(in_dev, ifap, 0);
892 			ifa->ifa_mask = sin->sin_addr.s_addr;
893 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
894 
895 			/* See if current broadcast address matches
896 			 * with current netmask, then recalculate
897 			 * the broadcast address. Otherwise it's a
898 			 * funny address, so don't touch it since
899 			 * the user seems to know what (s)he's doing...
900 			 */
901 			if ((dev->flags & IFF_BROADCAST) &&
902 			    (ifa->ifa_prefixlen < 31) &&
903 			    (ifa->ifa_broadcast ==
904 			     (ifa->ifa_local|~old_mask))) {
905 				ifa->ifa_broadcast = (ifa->ifa_local |
906 						      ~sin->sin_addr.s_addr);
907 			}
908 			inet_insert_ifa(ifa);
909 		}
910 		break;
911 	}
912 done:
913 	rtnl_unlock();
914 out:
915 	return ret;
916 rarok:
917 	rtnl_unlock();
918 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
919 	goto out;
920 }
921 
922 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
923 {
924 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
925 	struct in_ifaddr *ifa;
926 	struct ifreq ifr;
927 	int done = 0;
928 
929 	if (!in_dev)
930 		goto out;
931 
932 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
933 		if (!buf) {
934 			done += sizeof(ifr);
935 			continue;
936 		}
937 		if (len < (int) sizeof(ifr))
938 			break;
939 		memset(&ifr, 0, sizeof(struct ifreq));
940 		if (ifa->ifa_label)
941 			strcpy(ifr.ifr_name, ifa->ifa_label);
942 		else
943 			strcpy(ifr.ifr_name, dev->name);
944 
945 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
946 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
947 								ifa->ifa_local;
948 
949 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
950 			done = -EFAULT;
951 			break;
952 		}
953 		buf  += sizeof(struct ifreq);
954 		len  -= sizeof(struct ifreq);
955 		done += sizeof(struct ifreq);
956 	}
957 out:
958 	return done;
959 }
960 
961 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
962 {
963 	__be32 addr = 0;
964 	struct in_device *in_dev;
965 	struct net *net = dev_net(dev);
966 
967 	rcu_read_lock();
968 	in_dev = __in_dev_get_rcu(dev);
969 	if (!in_dev)
970 		goto no_in_dev;
971 
972 	for_primary_ifa(in_dev) {
973 		if (ifa->ifa_scope > scope)
974 			continue;
975 		if (!dst || inet_ifa_match(dst, ifa)) {
976 			addr = ifa->ifa_local;
977 			break;
978 		}
979 		if (!addr)
980 			addr = ifa->ifa_local;
981 	} endfor_ifa(in_dev);
982 
983 	if (addr)
984 		goto out_unlock;
985 no_in_dev:
986 
987 	/* Not loopback addresses on loopback should be preferred
988 	   in this case. It is importnat that lo is the first interface
989 	   in dev_base list.
990 	 */
991 	for_each_netdev_rcu(net, dev) {
992 		in_dev = __in_dev_get_rcu(dev);
993 		if (!in_dev)
994 			continue;
995 
996 		for_primary_ifa(in_dev) {
997 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
998 			    ifa->ifa_scope <= scope) {
999 				addr = ifa->ifa_local;
1000 				goto out_unlock;
1001 			}
1002 		} endfor_ifa(in_dev);
1003 	}
1004 out_unlock:
1005 	rcu_read_unlock();
1006 	return addr;
1007 }
1008 EXPORT_SYMBOL(inet_select_addr);
1009 
1010 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1011 			      __be32 local, int scope)
1012 {
1013 	int same = 0;
1014 	__be32 addr = 0;
1015 
1016 	for_ifa(in_dev) {
1017 		if (!addr &&
1018 		    (local == ifa->ifa_local || !local) &&
1019 		    ifa->ifa_scope <= scope) {
1020 			addr = ifa->ifa_local;
1021 			if (same)
1022 				break;
1023 		}
1024 		if (!same) {
1025 			same = (!local || inet_ifa_match(local, ifa)) &&
1026 				(!dst || inet_ifa_match(dst, ifa));
1027 			if (same && addr) {
1028 				if (local || !dst)
1029 					break;
1030 				/* Is the selected addr into dst subnet? */
1031 				if (inet_ifa_match(addr, ifa))
1032 					break;
1033 				/* No, then can we use new local src? */
1034 				if (ifa->ifa_scope <= scope) {
1035 					addr = ifa->ifa_local;
1036 					break;
1037 				}
1038 				/* search for large dst subnet for addr */
1039 				same = 0;
1040 			}
1041 		}
1042 	} endfor_ifa(in_dev);
1043 
1044 	return same ? addr : 0;
1045 }
1046 
1047 /*
1048  * Confirm that local IP address exists using wildcards:
1049  * - in_dev: only on this interface, 0=any interface
1050  * - dst: only in the same subnet as dst, 0=any dst
1051  * - local: address, 0=autoselect the local address
1052  * - scope: maximum allowed scope value for the local address
1053  */
1054 __be32 inet_confirm_addr(struct in_device *in_dev,
1055 			 __be32 dst, __be32 local, int scope)
1056 {
1057 	__be32 addr = 0;
1058 	struct net_device *dev;
1059 	struct net *net;
1060 
1061 	if (scope != RT_SCOPE_LINK)
1062 		return confirm_addr_indev(in_dev, dst, local, scope);
1063 
1064 	net = dev_net(in_dev->dev);
1065 	rcu_read_lock();
1066 	for_each_netdev_rcu(net, dev) {
1067 		in_dev = __in_dev_get_rcu(dev);
1068 		if (in_dev) {
1069 			addr = confirm_addr_indev(in_dev, dst, local, scope);
1070 			if (addr)
1071 				break;
1072 		}
1073 	}
1074 	rcu_read_unlock();
1075 
1076 	return addr;
1077 }
1078 EXPORT_SYMBOL(inet_confirm_addr);
1079 
1080 /*
1081  *	Device notifier
1082  */
1083 
1084 int register_inetaddr_notifier(struct notifier_block *nb)
1085 {
1086 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1087 }
1088 EXPORT_SYMBOL(register_inetaddr_notifier);
1089 
1090 int unregister_inetaddr_notifier(struct notifier_block *nb)
1091 {
1092 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1093 }
1094 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1095 
1096 /* Rename ifa_labels for a device name change. Make some effort to preserve
1097  * existing alias numbering and to create unique labels if possible.
1098 */
1099 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1100 {
1101 	struct in_ifaddr *ifa;
1102 	int named = 0;
1103 
1104 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1105 		char old[IFNAMSIZ], *dot;
1106 
1107 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1108 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1109 		if (named++ == 0)
1110 			goto skip;
1111 		dot = strchr(old, ':');
1112 		if (dot == NULL) {
1113 			sprintf(old, ":%d", named);
1114 			dot = old;
1115 		}
1116 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1117 			strcat(ifa->ifa_label, dot);
1118 		else
1119 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1120 skip:
1121 		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1122 	}
1123 }
1124 
1125 static bool inetdev_valid_mtu(unsigned int mtu)
1126 {
1127 	return mtu >= 68;
1128 }
1129 
1130 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1131 					struct in_device *in_dev)
1132 
1133 {
1134 	struct in_ifaddr *ifa;
1135 
1136 	for (ifa = in_dev->ifa_list; ifa;
1137 	     ifa = ifa->ifa_next) {
1138 		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1139 			 ifa->ifa_local, dev,
1140 			 ifa->ifa_local, NULL,
1141 			 dev->dev_addr, NULL);
1142 	}
1143 }
1144 
1145 /* Called only under RTNL semaphore */
1146 
1147 static int inetdev_event(struct notifier_block *this, unsigned long event,
1148 			 void *ptr)
1149 {
1150 	struct net_device *dev = ptr;
1151 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1152 
1153 	ASSERT_RTNL();
1154 
1155 	if (!in_dev) {
1156 		if (event == NETDEV_REGISTER) {
1157 			in_dev = inetdev_init(dev);
1158 			if (!in_dev)
1159 				return notifier_from_errno(-ENOMEM);
1160 			if (dev->flags & IFF_LOOPBACK) {
1161 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1162 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1163 			}
1164 		} else if (event == NETDEV_CHANGEMTU) {
1165 			/* Re-enabling IP */
1166 			if (inetdev_valid_mtu(dev->mtu))
1167 				in_dev = inetdev_init(dev);
1168 		}
1169 		goto out;
1170 	}
1171 
1172 	switch (event) {
1173 	case NETDEV_REGISTER:
1174 		pr_debug("%s: bug\n", __func__);
1175 		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1176 		break;
1177 	case NETDEV_UP:
1178 		if (!inetdev_valid_mtu(dev->mtu))
1179 			break;
1180 		if (dev->flags & IFF_LOOPBACK) {
1181 			struct in_ifaddr *ifa = inet_alloc_ifa();
1182 
1183 			if (ifa) {
1184 				INIT_HLIST_NODE(&ifa->hash);
1185 				ifa->ifa_local =
1186 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1187 				ifa->ifa_prefixlen = 8;
1188 				ifa->ifa_mask = inet_make_mask(8);
1189 				in_dev_hold(in_dev);
1190 				ifa->ifa_dev = in_dev;
1191 				ifa->ifa_scope = RT_SCOPE_HOST;
1192 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1193 				inet_insert_ifa(ifa);
1194 			}
1195 		}
1196 		ip_mc_up(in_dev);
1197 		/* fall through */
1198 	case NETDEV_CHANGEADDR:
1199 		if (!IN_DEV_ARP_NOTIFY(in_dev))
1200 			break;
1201 		/* fall through */
1202 	case NETDEV_NOTIFY_PEERS:
1203 		/* Send gratuitous ARP to notify of link change */
1204 		inetdev_send_gratuitous_arp(dev, in_dev);
1205 		break;
1206 	case NETDEV_DOWN:
1207 		ip_mc_down(in_dev);
1208 		break;
1209 	case NETDEV_PRE_TYPE_CHANGE:
1210 		ip_mc_unmap(in_dev);
1211 		break;
1212 	case NETDEV_POST_TYPE_CHANGE:
1213 		ip_mc_remap(in_dev);
1214 		break;
1215 	case NETDEV_CHANGEMTU:
1216 		if (inetdev_valid_mtu(dev->mtu))
1217 			break;
1218 		/* disable IP when MTU is not enough */
1219 	case NETDEV_UNREGISTER:
1220 		inetdev_destroy(in_dev);
1221 		break;
1222 	case NETDEV_CHANGENAME:
1223 		/* Do not notify about label change, this event is
1224 		 * not interesting to applications using netlink.
1225 		 */
1226 		inetdev_changename(dev, in_dev);
1227 
1228 		devinet_sysctl_unregister(in_dev);
1229 		devinet_sysctl_register(in_dev);
1230 		break;
1231 	}
1232 out:
1233 	return NOTIFY_DONE;
1234 }
1235 
1236 static struct notifier_block ip_netdev_notifier = {
1237 	.notifier_call = inetdev_event,
1238 };
1239 
1240 static size_t inet_nlmsg_size(void)
1241 {
1242 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1243 	       + nla_total_size(4) /* IFA_ADDRESS */
1244 	       + nla_total_size(4) /* IFA_LOCAL */
1245 	       + nla_total_size(4) /* IFA_BROADCAST */
1246 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1247 }
1248 
1249 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1250 			    u32 portid, u32 seq, int event, unsigned int flags)
1251 {
1252 	struct ifaddrmsg *ifm;
1253 	struct nlmsghdr  *nlh;
1254 
1255 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1256 	if (nlh == NULL)
1257 		return -EMSGSIZE;
1258 
1259 	ifm = nlmsg_data(nlh);
1260 	ifm->ifa_family = AF_INET;
1261 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1262 	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1263 	ifm->ifa_scope = ifa->ifa_scope;
1264 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1265 
1266 	if ((ifa->ifa_address &&
1267 	     nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1268 	    (ifa->ifa_local &&
1269 	     nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1270 	    (ifa->ifa_broadcast &&
1271 	     nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1272 	    (ifa->ifa_label[0] &&
1273 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)))
1274 		goto nla_put_failure;
1275 
1276 	return nlmsg_end(skb, nlh);
1277 
1278 nla_put_failure:
1279 	nlmsg_cancel(skb, nlh);
1280 	return -EMSGSIZE;
1281 }
1282 
1283 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1284 {
1285 	struct net *net = sock_net(skb->sk);
1286 	int h, s_h;
1287 	int idx, s_idx;
1288 	int ip_idx, s_ip_idx;
1289 	struct net_device *dev;
1290 	struct in_device *in_dev;
1291 	struct in_ifaddr *ifa;
1292 	struct hlist_head *head;
1293 	struct hlist_node *node;
1294 
1295 	s_h = cb->args[0];
1296 	s_idx = idx = cb->args[1];
1297 	s_ip_idx = ip_idx = cb->args[2];
1298 
1299 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1300 		idx = 0;
1301 		head = &net->dev_index_head[h];
1302 		rcu_read_lock();
1303 		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1304 			if (idx < s_idx)
1305 				goto cont;
1306 			if (h > s_h || idx > s_idx)
1307 				s_ip_idx = 0;
1308 			in_dev = __in_dev_get_rcu(dev);
1309 			if (!in_dev)
1310 				goto cont;
1311 
1312 			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1313 			     ifa = ifa->ifa_next, ip_idx++) {
1314 				if (ip_idx < s_ip_idx)
1315 					continue;
1316 				if (inet_fill_ifaddr(skb, ifa,
1317 					     NETLINK_CB(cb->skb).portid,
1318 					     cb->nlh->nlmsg_seq,
1319 					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1320 					rcu_read_unlock();
1321 					goto done;
1322 				}
1323 			}
1324 cont:
1325 			idx++;
1326 		}
1327 		rcu_read_unlock();
1328 	}
1329 
1330 done:
1331 	cb->args[0] = h;
1332 	cb->args[1] = idx;
1333 	cb->args[2] = ip_idx;
1334 
1335 	return skb->len;
1336 }
1337 
1338 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1339 		      u32 portid)
1340 {
1341 	struct sk_buff *skb;
1342 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1343 	int err = -ENOBUFS;
1344 	struct net *net;
1345 
1346 	net = dev_net(ifa->ifa_dev->dev);
1347 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1348 	if (skb == NULL)
1349 		goto errout;
1350 
1351 	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1352 	if (err < 0) {
1353 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1354 		WARN_ON(err == -EMSGSIZE);
1355 		kfree_skb(skb);
1356 		goto errout;
1357 	}
1358 	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1359 	return;
1360 errout:
1361 	if (err < 0)
1362 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1363 }
1364 
1365 static size_t inet_get_link_af_size(const struct net_device *dev)
1366 {
1367 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1368 
1369 	if (!in_dev)
1370 		return 0;
1371 
1372 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1373 }
1374 
1375 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1376 {
1377 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1378 	struct nlattr *nla;
1379 	int i;
1380 
1381 	if (!in_dev)
1382 		return -ENODATA;
1383 
1384 	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1385 	if (nla == NULL)
1386 		return -EMSGSIZE;
1387 
1388 	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1389 		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1390 
1391 	return 0;
1392 }
1393 
1394 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1395 	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1396 };
1397 
1398 static int inet_validate_link_af(const struct net_device *dev,
1399 				 const struct nlattr *nla)
1400 {
1401 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1402 	int err, rem;
1403 
1404 	if (dev && !__in_dev_get_rtnl(dev))
1405 		return -EAFNOSUPPORT;
1406 
1407 	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1408 	if (err < 0)
1409 		return err;
1410 
1411 	if (tb[IFLA_INET_CONF]) {
1412 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1413 			int cfgid = nla_type(a);
1414 
1415 			if (nla_len(a) < 4)
1416 				return -EINVAL;
1417 
1418 			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1419 				return -EINVAL;
1420 		}
1421 	}
1422 
1423 	return 0;
1424 }
1425 
1426 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1427 {
1428 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1429 	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1430 	int rem;
1431 
1432 	if (!in_dev)
1433 		return -EAFNOSUPPORT;
1434 
1435 	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1436 		BUG();
1437 
1438 	if (tb[IFLA_INET_CONF]) {
1439 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1440 			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1441 	}
1442 
1443 	return 0;
1444 }
1445 
1446 static int inet_netconf_msgsize_devconf(int type)
1447 {
1448 	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1449 		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
1450 
1451 	/* type -1 is used for ALL */
1452 	if (type == -1 || type == NETCONFA_FORWARDING)
1453 		size += nla_total_size(4);
1454 	if (type == -1 || type == NETCONFA_RP_FILTER)
1455 		size += nla_total_size(4);
1456 	if (type == -1 || type == NETCONFA_MC_FORWARDING)
1457 		size += nla_total_size(4);
1458 
1459 	return size;
1460 }
1461 
1462 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1463 				     struct ipv4_devconf *devconf, u32 portid,
1464 				     u32 seq, int event, unsigned int flags,
1465 				     int type)
1466 {
1467 	struct nlmsghdr  *nlh;
1468 	struct netconfmsg *ncm;
1469 
1470 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1471 			flags);
1472 	if (nlh == NULL)
1473 		return -EMSGSIZE;
1474 
1475 	ncm = nlmsg_data(nlh);
1476 	ncm->ncm_family = AF_INET;
1477 
1478 	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1479 		goto nla_put_failure;
1480 
1481 	/* type -1 is used for ALL */
1482 	if ((type == -1 || type == NETCONFA_FORWARDING) &&
1483 	    nla_put_s32(skb, NETCONFA_FORWARDING,
1484 			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1485 		goto nla_put_failure;
1486 	if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1487 	    nla_put_s32(skb, NETCONFA_RP_FILTER,
1488 			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1489 		goto nla_put_failure;
1490 	if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1491 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1492 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1493 		goto nla_put_failure;
1494 
1495 	return nlmsg_end(skb, nlh);
1496 
1497 nla_put_failure:
1498 	nlmsg_cancel(skb, nlh);
1499 	return -EMSGSIZE;
1500 }
1501 
1502 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1503 				 struct ipv4_devconf *devconf)
1504 {
1505 	struct sk_buff *skb;
1506 	int err = -ENOBUFS;
1507 
1508 	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1509 	if (skb == NULL)
1510 		goto errout;
1511 
1512 	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1513 					RTM_NEWNETCONF, 0, type);
1514 	if (err < 0) {
1515 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1516 		WARN_ON(err == -EMSGSIZE);
1517 		kfree_skb(skb);
1518 		goto errout;
1519 	}
1520 	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1521 	return;
1522 errout:
1523 	if (err < 0)
1524 		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1525 }
1526 
1527 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1528 	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
1529 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
1530 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
1531 };
1532 
1533 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1534 				    struct nlmsghdr *nlh,
1535 				    void *arg)
1536 {
1537 	struct net *net = sock_net(in_skb->sk);
1538 	struct nlattr *tb[NETCONFA_MAX+1];
1539 	struct netconfmsg *ncm;
1540 	struct sk_buff *skb;
1541 	struct ipv4_devconf *devconf;
1542 	struct in_device *in_dev;
1543 	struct net_device *dev;
1544 	int ifindex;
1545 	int err;
1546 
1547 	err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1548 			  devconf_ipv4_policy);
1549 	if (err < 0)
1550 		goto errout;
1551 
1552 	err = EINVAL;
1553 	if (!tb[NETCONFA_IFINDEX])
1554 		goto errout;
1555 
1556 	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1557 	switch (ifindex) {
1558 	case NETCONFA_IFINDEX_ALL:
1559 		devconf = net->ipv4.devconf_all;
1560 		break;
1561 	case NETCONFA_IFINDEX_DEFAULT:
1562 		devconf = net->ipv4.devconf_dflt;
1563 		break;
1564 	default:
1565 		dev = __dev_get_by_index(net, ifindex);
1566 		if (dev == NULL)
1567 			goto errout;
1568 		in_dev = __in_dev_get_rtnl(dev);
1569 		if (in_dev == NULL)
1570 			goto errout;
1571 		devconf = &in_dev->cnf;
1572 		break;
1573 	}
1574 
1575 	err = -ENOBUFS;
1576 	skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1577 	if (skb == NULL)
1578 		goto errout;
1579 
1580 	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1581 					NETLINK_CB(in_skb).portid,
1582 					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1583 					-1);
1584 	if (err < 0) {
1585 		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1586 		WARN_ON(err == -EMSGSIZE);
1587 		kfree_skb(skb);
1588 		goto errout;
1589 	}
1590 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1591 errout:
1592 	return err;
1593 }
1594 
1595 #ifdef CONFIG_SYSCTL
1596 
1597 static void devinet_copy_dflt_conf(struct net *net, int i)
1598 {
1599 	struct net_device *dev;
1600 
1601 	rcu_read_lock();
1602 	for_each_netdev_rcu(net, dev) {
1603 		struct in_device *in_dev;
1604 
1605 		in_dev = __in_dev_get_rcu(dev);
1606 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1607 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1608 	}
1609 	rcu_read_unlock();
1610 }
1611 
1612 /* called with RTNL locked */
1613 static void inet_forward_change(struct net *net)
1614 {
1615 	struct net_device *dev;
1616 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1617 
1618 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1619 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1620 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1621 				    NETCONFA_IFINDEX_ALL,
1622 				    net->ipv4.devconf_all);
1623 	inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1624 				    NETCONFA_IFINDEX_DEFAULT,
1625 				    net->ipv4.devconf_dflt);
1626 
1627 	for_each_netdev(net, dev) {
1628 		struct in_device *in_dev;
1629 		if (on)
1630 			dev_disable_lro(dev);
1631 		rcu_read_lock();
1632 		in_dev = __in_dev_get_rcu(dev);
1633 		if (in_dev) {
1634 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1635 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1636 						    dev->ifindex, &in_dev->cnf);
1637 		}
1638 		rcu_read_unlock();
1639 	}
1640 }
1641 
1642 static int devinet_conf_proc(ctl_table *ctl, int write,
1643 			     void __user *buffer,
1644 			     size_t *lenp, loff_t *ppos)
1645 {
1646 	int old_value = *(int *)ctl->data;
1647 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1648 	int new_value = *(int *)ctl->data;
1649 
1650 	if (write) {
1651 		struct ipv4_devconf *cnf = ctl->extra1;
1652 		struct net *net = ctl->extra2;
1653 		int i = (int *)ctl->data - cnf->data;
1654 
1655 		set_bit(i, cnf->state);
1656 
1657 		if (cnf == net->ipv4.devconf_dflt)
1658 			devinet_copy_dflt_conf(net, i);
1659 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1660 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1661 			if ((new_value == 0) && (old_value != 0))
1662 				rt_cache_flush(net);
1663 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1664 		    new_value != old_value) {
1665 			int ifindex;
1666 
1667 			if (cnf == net->ipv4.devconf_dflt)
1668 				ifindex = NETCONFA_IFINDEX_DEFAULT;
1669 			else if (cnf == net->ipv4.devconf_all)
1670 				ifindex = NETCONFA_IFINDEX_ALL;
1671 			else {
1672 				struct in_device *idev =
1673 					container_of(cnf, struct in_device,
1674 						     cnf);
1675 				ifindex = idev->dev->ifindex;
1676 			}
1677 			inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1678 						    ifindex, cnf);
1679 		}
1680 	}
1681 
1682 	return ret;
1683 }
1684 
1685 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1686 				  void __user *buffer,
1687 				  size_t *lenp, loff_t *ppos)
1688 {
1689 	int *valp = ctl->data;
1690 	int val = *valp;
1691 	loff_t pos = *ppos;
1692 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1693 
1694 	if (write && *valp != val) {
1695 		struct net *net = ctl->extra2;
1696 
1697 		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1698 			if (!rtnl_trylock()) {
1699 				/* Restore the original values before restarting */
1700 				*valp = val;
1701 				*ppos = pos;
1702 				return restart_syscall();
1703 			}
1704 			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1705 				inet_forward_change(net);
1706 			} else {
1707 				struct ipv4_devconf *cnf = ctl->extra1;
1708 				struct in_device *idev =
1709 					container_of(cnf, struct in_device, cnf);
1710 				if (*valp)
1711 					dev_disable_lro(idev->dev);
1712 				inet_netconf_notify_devconf(net,
1713 							    NETCONFA_FORWARDING,
1714 							    idev->dev->ifindex,
1715 							    cnf);
1716 			}
1717 			rtnl_unlock();
1718 			rt_cache_flush(net);
1719 		} else
1720 			inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1721 						    NETCONFA_IFINDEX_DEFAULT,
1722 						    net->ipv4.devconf_dflt);
1723 	}
1724 
1725 	return ret;
1726 }
1727 
1728 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1729 				void __user *buffer,
1730 				size_t *lenp, loff_t *ppos)
1731 {
1732 	int *valp = ctl->data;
1733 	int val = *valp;
1734 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1735 	struct net *net = ctl->extra2;
1736 
1737 	if (write && *valp != val)
1738 		rt_cache_flush(net);
1739 
1740 	return ret;
1741 }
1742 
1743 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1744 	{ \
1745 		.procname	= name, \
1746 		.data		= ipv4_devconf.data + \
1747 				  IPV4_DEVCONF_ ## attr - 1, \
1748 		.maxlen		= sizeof(int), \
1749 		.mode		= mval, \
1750 		.proc_handler	= proc, \
1751 		.extra1		= &ipv4_devconf, \
1752 	}
1753 
1754 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1755 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1756 
1757 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1758 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1759 
1760 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1761 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1762 
1763 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1764 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1765 
1766 static struct devinet_sysctl_table {
1767 	struct ctl_table_header *sysctl_header;
1768 	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1769 } devinet_sysctl = {
1770 	.devinet_vars = {
1771 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1772 					     devinet_sysctl_forward),
1773 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1774 
1775 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1776 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1777 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1778 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1779 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1780 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1781 					"accept_source_route"),
1782 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1783 		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1784 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1785 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1786 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1787 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1788 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1789 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1790 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1791 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1792 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1793 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1794 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1795 
1796 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1797 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1798 		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1799 					      "force_igmp_version"),
1800 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1801 					      "promote_secondaries"),
1802 		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
1803 					      "route_localnet"),
1804 	},
1805 };
1806 
1807 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1808 					struct ipv4_devconf *p)
1809 {
1810 	int i;
1811 	struct devinet_sysctl_table *t;
1812 	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
1813 
1814 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1815 	if (!t)
1816 		goto out;
1817 
1818 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1819 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1820 		t->devinet_vars[i].extra1 = p;
1821 		t->devinet_vars[i].extra2 = net;
1822 	}
1823 
1824 	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
1825 
1826 	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
1827 	if (!t->sysctl_header)
1828 		goto free;
1829 
1830 	p->sysctl = t;
1831 	return 0;
1832 
1833 free:
1834 	kfree(t);
1835 out:
1836 	return -ENOBUFS;
1837 }
1838 
1839 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1840 {
1841 	struct devinet_sysctl_table *t = cnf->sysctl;
1842 
1843 	if (t == NULL)
1844 		return;
1845 
1846 	cnf->sysctl = NULL;
1847 	unregister_net_sysctl_table(t->sysctl_header);
1848 	kfree(t);
1849 }
1850 
1851 static void devinet_sysctl_register(struct in_device *idev)
1852 {
1853 	neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1854 	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1855 					&idev->cnf);
1856 }
1857 
1858 static void devinet_sysctl_unregister(struct in_device *idev)
1859 {
1860 	__devinet_sysctl_unregister(&idev->cnf);
1861 	neigh_sysctl_unregister(idev->arp_parms);
1862 }
1863 
1864 static struct ctl_table ctl_forward_entry[] = {
1865 	{
1866 		.procname	= "ip_forward",
1867 		.data		= &ipv4_devconf.data[
1868 					IPV4_DEVCONF_FORWARDING - 1],
1869 		.maxlen		= sizeof(int),
1870 		.mode		= 0644,
1871 		.proc_handler	= devinet_sysctl_forward,
1872 		.extra1		= &ipv4_devconf,
1873 		.extra2		= &init_net,
1874 	},
1875 	{ },
1876 };
1877 #endif
1878 
1879 static __net_init int devinet_init_net(struct net *net)
1880 {
1881 	int err;
1882 	struct ipv4_devconf *all, *dflt;
1883 #ifdef CONFIG_SYSCTL
1884 	struct ctl_table *tbl = ctl_forward_entry;
1885 	struct ctl_table_header *forw_hdr;
1886 #endif
1887 
1888 	err = -ENOMEM;
1889 	all = &ipv4_devconf;
1890 	dflt = &ipv4_devconf_dflt;
1891 
1892 	if (!net_eq(net, &init_net)) {
1893 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1894 		if (all == NULL)
1895 			goto err_alloc_all;
1896 
1897 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1898 		if (dflt == NULL)
1899 			goto err_alloc_dflt;
1900 
1901 #ifdef CONFIG_SYSCTL
1902 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1903 		if (tbl == NULL)
1904 			goto err_alloc_ctl;
1905 
1906 		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1907 		tbl[0].extra1 = all;
1908 		tbl[0].extra2 = net;
1909 #endif
1910 	}
1911 
1912 #ifdef CONFIG_SYSCTL
1913 	err = __devinet_sysctl_register(net, "all", all);
1914 	if (err < 0)
1915 		goto err_reg_all;
1916 
1917 	err = __devinet_sysctl_register(net, "default", dflt);
1918 	if (err < 0)
1919 		goto err_reg_dflt;
1920 
1921 	err = -ENOMEM;
1922 	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
1923 	if (forw_hdr == NULL)
1924 		goto err_reg_ctl;
1925 	net->ipv4.forw_hdr = forw_hdr;
1926 #endif
1927 
1928 	net->ipv4.devconf_all = all;
1929 	net->ipv4.devconf_dflt = dflt;
1930 	return 0;
1931 
1932 #ifdef CONFIG_SYSCTL
1933 err_reg_ctl:
1934 	__devinet_sysctl_unregister(dflt);
1935 err_reg_dflt:
1936 	__devinet_sysctl_unregister(all);
1937 err_reg_all:
1938 	if (tbl != ctl_forward_entry)
1939 		kfree(tbl);
1940 err_alloc_ctl:
1941 #endif
1942 	if (dflt != &ipv4_devconf_dflt)
1943 		kfree(dflt);
1944 err_alloc_dflt:
1945 	if (all != &ipv4_devconf)
1946 		kfree(all);
1947 err_alloc_all:
1948 	return err;
1949 }
1950 
1951 static __net_exit void devinet_exit_net(struct net *net)
1952 {
1953 #ifdef CONFIG_SYSCTL
1954 	struct ctl_table *tbl;
1955 
1956 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
1957 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
1958 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1959 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
1960 	kfree(tbl);
1961 #endif
1962 	kfree(net->ipv4.devconf_dflt);
1963 	kfree(net->ipv4.devconf_all);
1964 }
1965 
1966 static __net_initdata struct pernet_operations devinet_ops = {
1967 	.init = devinet_init_net,
1968 	.exit = devinet_exit_net,
1969 };
1970 
1971 static struct rtnl_af_ops inet_af_ops = {
1972 	.family		  = AF_INET,
1973 	.fill_link_af	  = inet_fill_link_af,
1974 	.get_link_af_size = inet_get_link_af_size,
1975 	.validate_link_af = inet_validate_link_af,
1976 	.set_link_af	  = inet_set_link_af,
1977 };
1978 
1979 void __init devinet_init(void)
1980 {
1981 	int i;
1982 
1983 	for (i = 0; i < IN4_ADDR_HSIZE; i++)
1984 		INIT_HLIST_HEAD(&inet_addr_lst[i]);
1985 
1986 	register_pernet_subsys(&devinet_ops);
1987 
1988 	register_gifconf(PF_INET, inet_gifconf);
1989 	register_netdevice_notifier(&ip_netdev_notifier);
1990 
1991 	rtnl_af_register(&inet_af_ops);
1992 
1993 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
1994 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
1995 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
1996 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
1997 		      NULL, NULL);
1998 }
1999 
2000