xref: /linux/net/ipv4/devinet.c (revision a1e58bbdc969c3fe60addca7f2729779d22a83c1)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *	Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  *	Derived from the IP parts of dev.c 1.0.19
12  * 		Authors:	Ross Biro
13  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
15  *
16  *	Additional Authors:
17  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
18  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19  *
20  *	Changes:
21  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
22  *					lists.
23  *		Cyrus Durgin:		updated for kmod
24  *		Matthias Andree:	in devinet_ioctl, compare label and
25  *					address (4.4BSD alias style support),
26  *					fall back to comparing just the label
27  *					if no match found.
28  */
29 
30 
31 #include <asm/uaccess.h>
32 #include <asm/system.h>
33 #include <linux/bitops.h>
34 #include <linux/capability.h>
35 #include <linux/module.h>
36 #include <linux/types.h>
37 #include <linux/kernel.h>
38 #include <linux/string.h>
39 #include <linux/mm.h>
40 #include <linux/socket.h>
41 #include <linux/sockios.h>
42 #include <linux/in.h>
43 #include <linux/errno.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_addr.h>
46 #include <linux/if_ether.h>
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/etherdevice.h>
50 #include <linux/skbuff.h>
51 #include <linux/init.h>
52 #include <linux/notifier.h>
53 #include <linux/inetdevice.h>
54 #include <linux/igmp.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 
67 static struct ipv4_devconf ipv4_devconf = {
68 	.data = {
69 		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
70 		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
71 		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
72 		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
73 	},
74 };
75 
76 static struct ipv4_devconf ipv4_devconf_dflt = {
77 	.data = {
78 		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
79 		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
80 		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
81 		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
82 		[NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
83 	},
84 };
85 
86 #define IPV4_DEVCONF_DFLT(net, attr) \
87 	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
88 
89 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
90 	[IFA_LOCAL]     	= { .type = NLA_U32 },
91 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
92 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
93 	[IFA_ANYCAST]   	= { .type = NLA_U32 },
94 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
95 };
96 
97 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
98 
99 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
100 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
101 			 int destroy);
102 #ifdef CONFIG_SYSCTL
103 static void devinet_sysctl_register(struct in_device *idev);
104 static void devinet_sysctl_unregister(struct in_device *idev);
105 #else
106 static inline void devinet_sysctl_register(struct in_device *idev)
107 {
108 }
109 static inline void devinet_sysctl_unregister(struct in_device *idev)
110 {
111 }
112 #endif
113 
114 /* Locks all the inet devices. */
115 
116 static struct in_ifaddr *inet_alloc_ifa(void)
117 {
118 	struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
119 
120 	if (ifa) {
121 		INIT_RCU_HEAD(&ifa->rcu_head);
122 	}
123 
124 	return ifa;
125 }
126 
127 static void inet_rcu_free_ifa(struct rcu_head *head)
128 {
129 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
130 	if (ifa->ifa_dev)
131 		in_dev_put(ifa->ifa_dev);
132 	kfree(ifa);
133 }
134 
135 static inline void inet_free_ifa(struct in_ifaddr *ifa)
136 {
137 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
138 }
139 
140 void in_dev_finish_destroy(struct in_device *idev)
141 {
142 	struct net_device *dev = idev->dev;
143 
144 	BUG_TRAP(!idev->ifa_list);
145 	BUG_TRAP(!idev->mc_list);
146 #ifdef NET_REFCNT_DEBUG
147 	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
148 	       idev, dev ? dev->name : "NIL");
149 #endif
150 	dev_put(dev);
151 	if (!idev->dead)
152 		printk("Freeing alive in_device %p\n", idev);
153 	else {
154 		kfree(idev);
155 	}
156 }
157 
158 static struct in_device *inetdev_init(struct net_device *dev)
159 {
160 	struct in_device *in_dev;
161 
162 	ASSERT_RTNL();
163 
164 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
165 	if (!in_dev)
166 		goto out;
167 	INIT_RCU_HEAD(&in_dev->rcu_head);
168 	memcpy(&in_dev->cnf, dev->nd_net->ipv4.devconf_dflt,
169 			sizeof(in_dev->cnf));
170 	in_dev->cnf.sysctl = NULL;
171 	in_dev->dev = dev;
172 	if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
173 		goto out_kfree;
174 	/* Reference in_dev->dev */
175 	dev_hold(dev);
176 	/* Account for reference dev->ip_ptr (below) */
177 	in_dev_hold(in_dev);
178 
179 	devinet_sysctl_register(in_dev);
180 	ip_mc_init_dev(in_dev);
181 	if (dev->flags & IFF_UP)
182 		ip_mc_up(in_dev);
183 
184 	/* we can receive as soon as ip_ptr is set -- do this last */
185 	rcu_assign_pointer(dev->ip_ptr, in_dev);
186 out:
187 	return in_dev;
188 out_kfree:
189 	kfree(in_dev);
190 	in_dev = NULL;
191 	goto out;
192 }
193 
194 static void in_dev_rcu_put(struct rcu_head *head)
195 {
196 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
197 	in_dev_put(idev);
198 }
199 
200 static void inetdev_destroy(struct in_device *in_dev)
201 {
202 	struct in_ifaddr *ifa;
203 	struct net_device *dev;
204 
205 	ASSERT_RTNL();
206 
207 	dev = in_dev->dev;
208 
209 	in_dev->dead = 1;
210 
211 	ip_mc_destroy_dev(in_dev);
212 
213 	while ((ifa = in_dev->ifa_list) != NULL) {
214 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
215 		inet_free_ifa(ifa);
216 	}
217 
218 	dev->ip_ptr = NULL;
219 
220 	devinet_sysctl_unregister(in_dev);
221 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
222 	arp_ifdown(dev);
223 
224 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
225 }
226 
227 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
228 {
229 	rcu_read_lock();
230 	for_primary_ifa(in_dev) {
231 		if (inet_ifa_match(a, ifa)) {
232 			if (!b || inet_ifa_match(b, ifa)) {
233 				rcu_read_unlock();
234 				return 1;
235 			}
236 		}
237 	} endfor_ifa(in_dev);
238 	rcu_read_unlock();
239 	return 0;
240 }
241 
242 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
243 			 int destroy, struct nlmsghdr *nlh, u32 pid)
244 {
245 	struct in_ifaddr *promote = NULL;
246 	struct in_ifaddr *ifa, *ifa1 = *ifap;
247 	struct in_ifaddr *last_prim = in_dev->ifa_list;
248 	struct in_ifaddr *prev_prom = NULL;
249 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
250 
251 	ASSERT_RTNL();
252 
253 	/* 1. Deleting primary ifaddr forces deletion all secondaries
254 	 * unless alias promotion is set
255 	 **/
256 
257 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
258 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
259 
260 		while ((ifa = *ifap1) != NULL) {
261 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
262 			    ifa1->ifa_scope <= ifa->ifa_scope)
263 				last_prim = ifa;
264 
265 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
266 			    ifa1->ifa_mask != ifa->ifa_mask ||
267 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
268 				ifap1 = &ifa->ifa_next;
269 				prev_prom = ifa;
270 				continue;
271 			}
272 
273 			if (!do_promote) {
274 				*ifap1 = ifa->ifa_next;
275 
276 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
277 				blocking_notifier_call_chain(&inetaddr_chain,
278 						NETDEV_DOWN, ifa);
279 				inet_free_ifa(ifa);
280 			} else {
281 				promote = ifa;
282 				break;
283 			}
284 		}
285 	}
286 
287 	/* 2. Unlink it */
288 
289 	*ifap = ifa1->ifa_next;
290 
291 	/* 3. Announce address deletion */
292 
293 	/* Send message first, then call notifier.
294 	   At first sight, FIB update triggered by notifier
295 	   will refer to already deleted ifaddr, that could confuse
296 	   netlink listeners. It is not true: look, gated sees
297 	   that route deleted and if it still thinks that ifaddr
298 	   is valid, it will try to restore deleted routes... Grr.
299 	   So that, this order is correct.
300 	 */
301 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
302 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
303 
304 	if (promote) {
305 
306 		if (prev_prom) {
307 			prev_prom->ifa_next = promote->ifa_next;
308 			promote->ifa_next = last_prim->ifa_next;
309 			last_prim->ifa_next = promote;
310 		}
311 
312 		promote->ifa_flags &= ~IFA_F_SECONDARY;
313 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
314 		blocking_notifier_call_chain(&inetaddr_chain,
315 				NETDEV_UP, promote);
316 		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
317 			if (ifa1->ifa_mask != ifa->ifa_mask ||
318 			    !inet_ifa_match(ifa1->ifa_address, ifa))
319 					continue;
320 			fib_add_ifaddr(ifa);
321 		}
322 
323 	}
324 	if (destroy)
325 		inet_free_ifa(ifa1);
326 }
327 
328 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
329 			 int destroy)
330 {
331 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
332 }
333 
334 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
335 			     u32 pid)
336 {
337 	struct in_device *in_dev = ifa->ifa_dev;
338 	struct in_ifaddr *ifa1, **ifap, **last_primary;
339 
340 	ASSERT_RTNL();
341 
342 	if (!ifa->ifa_local) {
343 		inet_free_ifa(ifa);
344 		return 0;
345 	}
346 
347 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
348 	last_primary = &in_dev->ifa_list;
349 
350 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
351 	     ifap = &ifa1->ifa_next) {
352 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
353 		    ifa->ifa_scope <= ifa1->ifa_scope)
354 			last_primary = &ifa1->ifa_next;
355 		if (ifa1->ifa_mask == ifa->ifa_mask &&
356 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
357 			if (ifa1->ifa_local == ifa->ifa_local) {
358 				inet_free_ifa(ifa);
359 				return -EEXIST;
360 			}
361 			if (ifa1->ifa_scope != ifa->ifa_scope) {
362 				inet_free_ifa(ifa);
363 				return -EINVAL;
364 			}
365 			ifa->ifa_flags |= IFA_F_SECONDARY;
366 		}
367 	}
368 
369 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
370 		net_srandom(ifa->ifa_local);
371 		ifap = last_primary;
372 	}
373 
374 	ifa->ifa_next = *ifap;
375 	*ifap = ifa;
376 
377 	/* Send message first, then call notifier.
378 	   Notifier will trigger FIB update, so that
379 	   listeners of netlink will know about new ifaddr */
380 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
381 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
382 
383 	return 0;
384 }
385 
386 static int inet_insert_ifa(struct in_ifaddr *ifa)
387 {
388 	return __inet_insert_ifa(ifa, NULL, 0);
389 }
390 
391 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
392 {
393 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
394 
395 	ASSERT_RTNL();
396 
397 	if (!in_dev) {
398 		inet_free_ifa(ifa);
399 		return -ENOBUFS;
400 	}
401 	ipv4_devconf_setall(in_dev);
402 	if (ifa->ifa_dev != in_dev) {
403 		BUG_TRAP(!ifa->ifa_dev);
404 		in_dev_hold(in_dev);
405 		ifa->ifa_dev = in_dev;
406 	}
407 	if (ipv4_is_loopback(ifa->ifa_local))
408 		ifa->ifa_scope = RT_SCOPE_HOST;
409 	return inet_insert_ifa(ifa);
410 }
411 
412 struct in_device *inetdev_by_index(struct net *net, int ifindex)
413 {
414 	struct net_device *dev;
415 	struct in_device *in_dev = NULL;
416 	read_lock(&dev_base_lock);
417 	dev = __dev_get_by_index(net, ifindex);
418 	if (dev)
419 		in_dev = in_dev_get(dev);
420 	read_unlock(&dev_base_lock);
421 	return in_dev;
422 }
423 
424 /* Called only from RTNL semaphored context. No locks. */
425 
426 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
427 				    __be32 mask)
428 {
429 	ASSERT_RTNL();
430 
431 	for_primary_ifa(in_dev) {
432 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
433 			return ifa;
434 	} endfor_ifa(in_dev);
435 	return NULL;
436 }
437 
438 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
439 {
440 	struct net *net = skb->sk->sk_net;
441 	struct nlattr *tb[IFA_MAX+1];
442 	struct in_device *in_dev;
443 	struct ifaddrmsg *ifm;
444 	struct in_ifaddr *ifa, **ifap;
445 	int err = -EINVAL;
446 
447 	ASSERT_RTNL();
448 
449 	if (net != &init_net)
450 		return -EINVAL;
451 
452 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
453 	if (err < 0)
454 		goto errout;
455 
456 	ifm = nlmsg_data(nlh);
457 	in_dev = inetdev_by_index(net, ifm->ifa_index);
458 	if (in_dev == NULL) {
459 		err = -ENODEV;
460 		goto errout;
461 	}
462 
463 	__in_dev_put(in_dev);
464 
465 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
466 	     ifap = &ifa->ifa_next) {
467 		if (tb[IFA_LOCAL] &&
468 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
469 			continue;
470 
471 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
472 			continue;
473 
474 		if (tb[IFA_ADDRESS] &&
475 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
476 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
477 			continue;
478 
479 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
480 		return 0;
481 	}
482 
483 	err = -EADDRNOTAVAIL;
484 errout:
485 	return err;
486 }
487 
488 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
489 {
490 	struct nlattr *tb[IFA_MAX+1];
491 	struct in_ifaddr *ifa;
492 	struct ifaddrmsg *ifm;
493 	struct net_device *dev;
494 	struct in_device *in_dev;
495 	int err;
496 
497 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
498 	if (err < 0)
499 		goto errout;
500 
501 	ifm = nlmsg_data(nlh);
502 	err = -EINVAL;
503 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
504 		goto errout;
505 
506 	dev = __dev_get_by_index(net, ifm->ifa_index);
507 	err = -ENODEV;
508 	if (dev == NULL)
509 		goto errout;
510 
511 	in_dev = __in_dev_get_rtnl(dev);
512 	err = -ENOBUFS;
513 	if (in_dev == NULL)
514 		goto errout;
515 
516 	ifa = inet_alloc_ifa();
517 	if (ifa == NULL)
518 		/*
519 		 * A potential indev allocation can be left alive, it stays
520 		 * assigned to its device and is destroy with it.
521 		 */
522 		goto errout;
523 
524 	ipv4_devconf_setall(in_dev);
525 	in_dev_hold(in_dev);
526 
527 	if (tb[IFA_ADDRESS] == NULL)
528 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
529 
530 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
531 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
532 	ifa->ifa_flags = ifm->ifa_flags;
533 	ifa->ifa_scope = ifm->ifa_scope;
534 	ifa->ifa_dev = in_dev;
535 
536 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
537 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
538 
539 	if (tb[IFA_BROADCAST])
540 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
541 
542 	if (tb[IFA_ANYCAST])
543 		ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
544 
545 	if (tb[IFA_LABEL])
546 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
547 	else
548 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
549 
550 	return ifa;
551 
552 errout:
553 	return ERR_PTR(err);
554 }
555 
556 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
557 {
558 	struct net *net = skb->sk->sk_net;
559 	struct in_ifaddr *ifa;
560 
561 	ASSERT_RTNL();
562 
563 	if (net != &init_net)
564 		return -EINVAL;
565 
566 	ifa = rtm_to_ifaddr(net, nlh);
567 	if (IS_ERR(ifa))
568 		return PTR_ERR(ifa);
569 
570 	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
571 }
572 
573 /*
574  *	Determine a default network mask, based on the IP address.
575  */
576 
577 static __inline__ int inet_abc_len(__be32 addr)
578 {
579 	int rc = -1;	/* Something else, probably a multicast. */
580 
581 	if (ipv4_is_zeronet(addr))
582 		rc = 0;
583 	else {
584 		__u32 haddr = ntohl(addr);
585 
586 		if (IN_CLASSA(haddr))
587 			rc = 8;
588 		else if (IN_CLASSB(haddr))
589 			rc = 16;
590 		else if (IN_CLASSC(haddr))
591 			rc = 24;
592 	}
593 
594 	return rc;
595 }
596 
597 
598 int devinet_ioctl(unsigned int cmd, void __user *arg)
599 {
600 	struct ifreq ifr;
601 	struct sockaddr_in sin_orig;
602 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
603 	struct in_device *in_dev;
604 	struct in_ifaddr **ifap = NULL;
605 	struct in_ifaddr *ifa = NULL;
606 	struct net_device *dev;
607 	char *colon;
608 	int ret = -EFAULT;
609 	int tryaddrmatch = 0;
610 
611 	/*
612 	 *	Fetch the caller's info block into kernel space
613 	 */
614 
615 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
616 		goto out;
617 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
618 
619 	/* save original address for comparison */
620 	memcpy(&sin_orig, sin, sizeof(*sin));
621 
622 	colon = strchr(ifr.ifr_name, ':');
623 	if (colon)
624 		*colon = 0;
625 
626 #ifdef CONFIG_KMOD
627 	dev_load(&init_net, ifr.ifr_name);
628 #endif
629 
630 	switch (cmd) {
631 	case SIOCGIFADDR:	/* Get interface address */
632 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
633 	case SIOCGIFDSTADDR:	/* Get the destination address */
634 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
635 		/* Note that these ioctls will not sleep,
636 		   so that we do not impose a lock.
637 		   One day we will be forced to put shlock here (I mean SMP)
638 		 */
639 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
640 		memset(sin, 0, sizeof(*sin));
641 		sin->sin_family = AF_INET;
642 		break;
643 
644 	case SIOCSIFFLAGS:
645 		ret = -EACCES;
646 		if (!capable(CAP_NET_ADMIN))
647 			goto out;
648 		break;
649 	case SIOCSIFADDR:	/* Set interface address (and family) */
650 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
651 	case SIOCSIFDSTADDR:	/* Set the destination address */
652 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
653 		ret = -EACCES;
654 		if (!capable(CAP_NET_ADMIN))
655 			goto out;
656 		ret = -EINVAL;
657 		if (sin->sin_family != AF_INET)
658 			goto out;
659 		break;
660 	default:
661 		ret = -EINVAL;
662 		goto out;
663 	}
664 
665 	rtnl_lock();
666 
667 	ret = -ENODEV;
668 	if ((dev = __dev_get_by_name(&init_net, ifr.ifr_name)) == NULL)
669 		goto done;
670 
671 	if (colon)
672 		*colon = ':';
673 
674 	if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
675 		if (tryaddrmatch) {
676 			/* Matthias Andree */
677 			/* compare label and address (4.4BSD style) */
678 			/* note: we only do this for a limited set of ioctls
679 			   and only if the original address family was AF_INET.
680 			   This is checked above. */
681 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
682 			     ifap = &ifa->ifa_next) {
683 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
684 				    sin_orig.sin_addr.s_addr ==
685 							ifa->ifa_address) {
686 					break; /* found */
687 				}
688 			}
689 		}
690 		/* we didn't get a match, maybe the application is
691 		   4.3BSD-style and passed in junk so we fall back to
692 		   comparing just the label */
693 		if (!ifa) {
694 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
695 			     ifap = &ifa->ifa_next)
696 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
697 					break;
698 		}
699 	}
700 
701 	ret = -EADDRNOTAVAIL;
702 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
703 		goto done;
704 
705 	switch (cmd) {
706 	case SIOCGIFADDR:	/* Get interface address */
707 		sin->sin_addr.s_addr = ifa->ifa_local;
708 		goto rarok;
709 
710 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
711 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
712 		goto rarok;
713 
714 	case SIOCGIFDSTADDR:	/* Get the destination address */
715 		sin->sin_addr.s_addr = ifa->ifa_address;
716 		goto rarok;
717 
718 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
719 		sin->sin_addr.s_addr = ifa->ifa_mask;
720 		goto rarok;
721 
722 	case SIOCSIFFLAGS:
723 		if (colon) {
724 			ret = -EADDRNOTAVAIL;
725 			if (!ifa)
726 				break;
727 			ret = 0;
728 			if (!(ifr.ifr_flags & IFF_UP))
729 				inet_del_ifa(in_dev, ifap, 1);
730 			break;
731 		}
732 		ret = dev_change_flags(dev, ifr.ifr_flags);
733 		break;
734 
735 	case SIOCSIFADDR:	/* Set interface address (and family) */
736 		ret = -EINVAL;
737 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
738 			break;
739 
740 		if (!ifa) {
741 			ret = -ENOBUFS;
742 			if ((ifa = inet_alloc_ifa()) == NULL)
743 				break;
744 			if (colon)
745 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
746 			else
747 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
748 		} else {
749 			ret = 0;
750 			if (ifa->ifa_local == sin->sin_addr.s_addr)
751 				break;
752 			inet_del_ifa(in_dev, ifap, 0);
753 			ifa->ifa_broadcast = 0;
754 			ifa->ifa_anycast = 0;
755 			ifa->ifa_scope = 0;
756 		}
757 
758 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
759 
760 		if (!(dev->flags & IFF_POINTOPOINT)) {
761 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
762 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
763 			if ((dev->flags & IFF_BROADCAST) &&
764 			    ifa->ifa_prefixlen < 31)
765 				ifa->ifa_broadcast = ifa->ifa_address |
766 						     ~ifa->ifa_mask;
767 		} else {
768 			ifa->ifa_prefixlen = 32;
769 			ifa->ifa_mask = inet_make_mask(32);
770 		}
771 		ret = inet_set_ifa(dev, ifa);
772 		break;
773 
774 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
775 		ret = 0;
776 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
777 			inet_del_ifa(in_dev, ifap, 0);
778 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
779 			inet_insert_ifa(ifa);
780 		}
781 		break;
782 
783 	case SIOCSIFDSTADDR:	/* Set the destination address */
784 		ret = 0;
785 		if (ifa->ifa_address == sin->sin_addr.s_addr)
786 			break;
787 		ret = -EINVAL;
788 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
789 			break;
790 		ret = 0;
791 		inet_del_ifa(in_dev, ifap, 0);
792 		ifa->ifa_address = sin->sin_addr.s_addr;
793 		inet_insert_ifa(ifa);
794 		break;
795 
796 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
797 
798 		/*
799 		 *	The mask we set must be legal.
800 		 */
801 		ret = -EINVAL;
802 		if (bad_mask(sin->sin_addr.s_addr, 0))
803 			break;
804 		ret = 0;
805 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
806 			__be32 old_mask = ifa->ifa_mask;
807 			inet_del_ifa(in_dev, ifap, 0);
808 			ifa->ifa_mask = sin->sin_addr.s_addr;
809 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
810 
811 			/* See if current broadcast address matches
812 			 * with current netmask, then recalculate
813 			 * the broadcast address. Otherwise it's a
814 			 * funny address, so don't touch it since
815 			 * the user seems to know what (s)he's doing...
816 			 */
817 			if ((dev->flags & IFF_BROADCAST) &&
818 			    (ifa->ifa_prefixlen < 31) &&
819 			    (ifa->ifa_broadcast ==
820 			     (ifa->ifa_local|~old_mask))) {
821 				ifa->ifa_broadcast = (ifa->ifa_local |
822 						      ~sin->sin_addr.s_addr);
823 			}
824 			inet_insert_ifa(ifa);
825 		}
826 		break;
827 	}
828 done:
829 	rtnl_unlock();
830 out:
831 	return ret;
832 rarok:
833 	rtnl_unlock();
834 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
835 	goto out;
836 }
837 
838 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
839 {
840 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
841 	struct in_ifaddr *ifa;
842 	struct ifreq ifr;
843 	int done = 0;
844 
845 	if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
846 		goto out;
847 
848 	for (; ifa; ifa = ifa->ifa_next) {
849 		if (!buf) {
850 			done += sizeof(ifr);
851 			continue;
852 		}
853 		if (len < (int) sizeof(ifr))
854 			break;
855 		memset(&ifr, 0, sizeof(struct ifreq));
856 		if (ifa->ifa_label)
857 			strcpy(ifr.ifr_name, ifa->ifa_label);
858 		else
859 			strcpy(ifr.ifr_name, dev->name);
860 
861 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
862 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
863 								ifa->ifa_local;
864 
865 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
866 			done = -EFAULT;
867 			break;
868 		}
869 		buf  += sizeof(struct ifreq);
870 		len  -= sizeof(struct ifreq);
871 		done += sizeof(struct ifreq);
872 	}
873 out:
874 	return done;
875 }
876 
877 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
878 {
879 	__be32 addr = 0;
880 	struct in_device *in_dev;
881 
882 	rcu_read_lock();
883 	in_dev = __in_dev_get_rcu(dev);
884 	if (!in_dev)
885 		goto no_in_dev;
886 
887 	for_primary_ifa(in_dev) {
888 		if (ifa->ifa_scope > scope)
889 			continue;
890 		if (!dst || inet_ifa_match(dst, ifa)) {
891 			addr = ifa->ifa_local;
892 			break;
893 		}
894 		if (!addr)
895 			addr = ifa->ifa_local;
896 	} endfor_ifa(in_dev);
897 no_in_dev:
898 	rcu_read_unlock();
899 
900 	if (addr)
901 		goto out;
902 
903 	/* Not loopback addresses on loopback should be preferred
904 	   in this case. It is importnat that lo is the first interface
905 	   in dev_base list.
906 	 */
907 	read_lock(&dev_base_lock);
908 	rcu_read_lock();
909 	for_each_netdev(&init_net, dev) {
910 		if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
911 			continue;
912 
913 		for_primary_ifa(in_dev) {
914 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
915 			    ifa->ifa_scope <= scope) {
916 				addr = ifa->ifa_local;
917 				goto out_unlock_both;
918 			}
919 		} endfor_ifa(in_dev);
920 	}
921 out_unlock_both:
922 	read_unlock(&dev_base_lock);
923 	rcu_read_unlock();
924 out:
925 	return addr;
926 }
927 
928 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
929 			      __be32 local, int scope)
930 {
931 	int same = 0;
932 	__be32 addr = 0;
933 
934 	for_ifa(in_dev) {
935 		if (!addr &&
936 		    (local == ifa->ifa_local || !local) &&
937 		    ifa->ifa_scope <= scope) {
938 			addr = ifa->ifa_local;
939 			if (same)
940 				break;
941 		}
942 		if (!same) {
943 			same = (!local || inet_ifa_match(local, ifa)) &&
944 				(!dst || inet_ifa_match(dst, ifa));
945 			if (same && addr) {
946 				if (local || !dst)
947 					break;
948 				/* Is the selected addr into dst subnet? */
949 				if (inet_ifa_match(addr, ifa))
950 					break;
951 				/* No, then can we use new local src? */
952 				if (ifa->ifa_scope <= scope) {
953 					addr = ifa->ifa_local;
954 					break;
955 				}
956 				/* search for large dst subnet for addr */
957 				same = 0;
958 			}
959 		}
960 	} endfor_ifa(in_dev);
961 
962 	return same? addr : 0;
963 }
964 
965 /*
966  * Confirm that local IP address exists using wildcards:
967  * - in_dev: only on this interface, 0=any interface
968  * - dst: only in the same subnet as dst, 0=any dst
969  * - local: address, 0=autoselect the local address
970  * - scope: maximum allowed scope value for the local address
971  */
972 __be32 inet_confirm_addr(struct in_device *in_dev,
973 			 __be32 dst, __be32 local, int scope)
974 {
975 	__be32 addr = 0;
976 	struct net_device *dev;
977 	struct net *net;
978 
979 	if (scope != RT_SCOPE_LINK)
980 		return confirm_addr_indev(in_dev, dst, local, scope);
981 
982 	net = in_dev->dev->nd_net;
983 	read_lock(&dev_base_lock);
984 	rcu_read_lock();
985 	for_each_netdev(net, dev) {
986 		if ((in_dev = __in_dev_get_rcu(dev))) {
987 			addr = confirm_addr_indev(in_dev, dst, local, scope);
988 			if (addr)
989 				break;
990 		}
991 	}
992 	rcu_read_unlock();
993 	read_unlock(&dev_base_lock);
994 
995 	return addr;
996 }
997 
998 /*
999  *	Device notifier
1000  */
1001 
1002 int register_inetaddr_notifier(struct notifier_block *nb)
1003 {
1004 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1005 }
1006 
1007 int unregister_inetaddr_notifier(struct notifier_block *nb)
1008 {
1009 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1010 }
1011 
1012 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1013  * alias numbering and to create unique labels if possible.
1014 */
1015 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1016 {
1017 	struct in_ifaddr *ifa;
1018 	int named = 0;
1019 
1020 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1021 		char old[IFNAMSIZ], *dot;
1022 
1023 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1024 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1025 		if (named++ == 0)
1026 			continue;
1027 		dot = strchr(old, ':');
1028 		if (dot == NULL) {
1029 			sprintf(old, ":%d", named);
1030 			dot = old;
1031 		}
1032 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1033 			strcat(ifa->ifa_label, dot);
1034 		} else {
1035 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1036 		}
1037 	}
1038 }
1039 
1040 /* Called only under RTNL semaphore */
1041 
1042 static int inetdev_event(struct notifier_block *this, unsigned long event,
1043 			 void *ptr)
1044 {
1045 	struct net_device *dev = ptr;
1046 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1047 
1048 	if (dev->nd_net != &init_net)
1049 		return NOTIFY_DONE;
1050 
1051 	ASSERT_RTNL();
1052 
1053 	if (!in_dev) {
1054 		if (event == NETDEV_REGISTER) {
1055 			in_dev = inetdev_init(dev);
1056 			if (!in_dev)
1057 				return notifier_from_errno(-ENOMEM);
1058 			if (dev->flags & IFF_LOOPBACK) {
1059 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1060 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1061 			}
1062 		}
1063 		goto out;
1064 	}
1065 
1066 	switch (event) {
1067 	case NETDEV_REGISTER:
1068 		printk(KERN_DEBUG "inetdev_event: bug\n");
1069 		dev->ip_ptr = NULL;
1070 		break;
1071 	case NETDEV_UP:
1072 		if (dev->mtu < 68)
1073 			break;
1074 		if (dev->flags & IFF_LOOPBACK) {
1075 			struct in_ifaddr *ifa;
1076 			if ((ifa = inet_alloc_ifa()) != NULL) {
1077 				ifa->ifa_local =
1078 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1079 				ifa->ifa_prefixlen = 8;
1080 				ifa->ifa_mask = inet_make_mask(8);
1081 				in_dev_hold(in_dev);
1082 				ifa->ifa_dev = in_dev;
1083 				ifa->ifa_scope = RT_SCOPE_HOST;
1084 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1085 				inet_insert_ifa(ifa);
1086 			}
1087 		}
1088 		ip_mc_up(in_dev);
1089 		break;
1090 	case NETDEV_DOWN:
1091 		ip_mc_down(in_dev);
1092 		break;
1093 	case NETDEV_CHANGEMTU:
1094 		if (dev->mtu >= 68)
1095 			break;
1096 		/* MTU falled under 68, disable IP */
1097 	case NETDEV_UNREGISTER:
1098 		inetdev_destroy(in_dev);
1099 		break;
1100 	case NETDEV_CHANGENAME:
1101 		/* Do not notify about label change, this event is
1102 		 * not interesting to applications using netlink.
1103 		 */
1104 		inetdev_changename(dev, in_dev);
1105 
1106 		devinet_sysctl_unregister(in_dev);
1107 		devinet_sysctl_register(in_dev);
1108 		break;
1109 	}
1110 out:
1111 	return NOTIFY_DONE;
1112 }
1113 
1114 static struct notifier_block ip_netdev_notifier = {
1115 	.notifier_call =inetdev_event,
1116 };
1117 
1118 static inline size_t inet_nlmsg_size(void)
1119 {
1120 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1121 	       + nla_total_size(4) /* IFA_ADDRESS */
1122 	       + nla_total_size(4) /* IFA_LOCAL */
1123 	       + nla_total_size(4) /* IFA_BROADCAST */
1124 	       + nla_total_size(4) /* IFA_ANYCAST */
1125 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1126 }
1127 
1128 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1129 			    u32 pid, u32 seq, int event, unsigned int flags)
1130 {
1131 	struct ifaddrmsg *ifm;
1132 	struct nlmsghdr  *nlh;
1133 
1134 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1135 	if (nlh == NULL)
1136 		return -EMSGSIZE;
1137 
1138 	ifm = nlmsg_data(nlh);
1139 	ifm->ifa_family = AF_INET;
1140 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1141 	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1142 	ifm->ifa_scope = ifa->ifa_scope;
1143 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1144 
1145 	if (ifa->ifa_address)
1146 		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1147 
1148 	if (ifa->ifa_local)
1149 		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1150 
1151 	if (ifa->ifa_broadcast)
1152 		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1153 
1154 	if (ifa->ifa_anycast)
1155 		NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1156 
1157 	if (ifa->ifa_label[0])
1158 		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1159 
1160 	return nlmsg_end(skb, nlh);
1161 
1162 nla_put_failure:
1163 	nlmsg_cancel(skb, nlh);
1164 	return -EMSGSIZE;
1165 }
1166 
1167 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1168 {
1169 	struct net *net = skb->sk->sk_net;
1170 	int idx, ip_idx;
1171 	struct net_device *dev;
1172 	struct in_device *in_dev;
1173 	struct in_ifaddr *ifa;
1174 	int s_ip_idx, s_idx = cb->args[0];
1175 
1176 	if (net != &init_net)
1177 		return 0;
1178 
1179 	s_ip_idx = ip_idx = cb->args[1];
1180 	idx = 0;
1181 	for_each_netdev(net, dev) {
1182 		if (idx < s_idx)
1183 			goto cont;
1184 		if (idx > s_idx)
1185 			s_ip_idx = 0;
1186 		if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1187 			goto cont;
1188 
1189 		for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1190 		     ifa = ifa->ifa_next, ip_idx++) {
1191 			if (ip_idx < s_ip_idx)
1192 				continue;
1193 			if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1194 					     cb->nlh->nlmsg_seq,
1195 					     RTM_NEWADDR, NLM_F_MULTI) <= 0)
1196 				goto done;
1197 		}
1198 cont:
1199 		idx++;
1200 	}
1201 
1202 done:
1203 	cb->args[0] = idx;
1204 	cb->args[1] = ip_idx;
1205 
1206 	return skb->len;
1207 }
1208 
1209 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1210 		      u32 pid)
1211 {
1212 	struct sk_buff *skb;
1213 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1214 	int err = -ENOBUFS;
1215 	struct net *net;
1216 
1217 	net = ifa->ifa_dev->dev->nd_net;
1218 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1219 	if (skb == NULL)
1220 		goto errout;
1221 
1222 	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1223 	if (err < 0) {
1224 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1225 		WARN_ON(err == -EMSGSIZE);
1226 		kfree_skb(skb);
1227 		goto errout;
1228 	}
1229 	err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1230 errout:
1231 	if (err < 0)
1232 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1233 }
1234 
1235 #ifdef CONFIG_SYSCTL
1236 
1237 static void devinet_copy_dflt_conf(struct net *net, int i)
1238 {
1239 	struct net_device *dev;
1240 
1241 	read_lock(&dev_base_lock);
1242 	for_each_netdev(net, dev) {
1243 		struct in_device *in_dev;
1244 		rcu_read_lock();
1245 		in_dev = __in_dev_get_rcu(dev);
1246 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1247 			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1248 		rcu_read_unlock();
1249 	}
1250 	read_unlock(&dev_base_lock);
1251 }
1252 
1253 static void inet_forward_change(struct net *net)
1254 {
1255 	struct net_device *dev;
1256 	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1257 
1258 	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1259 	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1260 
1261 	read_lock(&dev_base_lock);
1262 	for_each_netdev(net, dev) {
1263 		struct in_device *in_dev;
1264 		rcu_read_lock();
1265 		in_dev = __in_dev_get_rcu(dev);
1266 		if (in_dev)
1267 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1268 		rcu_read_unlock();
1269 	}
1270 	read_unlock(&dev_base_lock);
1271 
1272 	rt_cache_flush(0);
1273 }
1274 
1275 static int devinet_conf_proc(ctl_table *ctl, int write,
1276 			     struct file* filp, void __user *buffer,
1277 			     size_t *lenp, loff_t *ppos)
1278 {
1279 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1280 
1281 	if (write) {
1282 		struct ipv4_devconf *cnf = ctl->extra1;
1283 		struct net *net = ctl->extra2;
1284 		int i = (int *)ctl->data - cnf->data;
1285 
1286 		set_bit(i, cnf->state);
1287 
1288 		if (cnf == net->ipv4.devconf_dflt)
1289 			devinet_copy_dflt_conf(net, i);
1290 	}
1291 
1292 	return ret;
1293 }
1294 
1295 static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
1296 			       void __user *oldval, size_t __user *oldlenp,
1297 			       void __user *newval, size_t newlen)
1298 {
1299 	struct ipv4_devconf *cnf;
1300 	struct net *net;
1301 	int *valp = table->data;
1302 	int new;
1303 	int i;
1304 
1305 	if (!newval || !newlen)
1306 		return 0;
1307 
1308 	if (newlen != sizeof(int))
1309 		return -EINVAL;
1310 
1311 	if (get_user(new, (int __user *)newval))
1312 		return -EFAULT;
1313 
1314 	if (new == *valp)
1315 		return 0;
1316 
1317 	if (oldval && oldlenp) {
1318 		size_t len;
1319 
1320 		if (get_user(len, oldlenp))
1321 			return -EFAULT;
1322 
1323 		if (len) {
1324 			if (len > table->maxlen)
1325 				len = table->maxlen;
1326 			if (copy_to_user(oldval, valp, len))
1327 				return -EFAULT;
1328 			if (put_user(len, oldlenp))
1329 				return -EFAULT;
1330 		}
1331 	}
1332 
1333 	*valp = new;
1334 
1335 	cnf = table->extra1;
1336 	net = table->extra2;
1337 	i = (int *)table->data - cnf->data;
1338 
1339 	set_bit(i, cnf->state);
1340 
1341 	if (cnf == net->ipv4.devconf_dflt)
1342 		devinet_copy_dflt_conf(net, i);
1343 
1344 	return 1;
1345 }
1346 
1347 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1348 				  struct file* filp, void __user *buffer,
1349 				  size_t *lenp, loff_t *ppos)
1350 {
1351 	int *valp = ctl->data;
1352 	int val = *valp;
1353 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1354 
1355 	if (write && *valp != val) {
1356 		struct net *net = ctl->extra2;
1357 
1358 		if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING))
1359 			inet_forward_change(net);
1360 		else if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING))
1361 			rt_cache_flush(0);
1362 	}
1363 
1364 	return ret;
1365 }
1366 
1367 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1368 			 struct file* filp, void __user *buffer,
1369 			 size_t *lenp, loff_t *ppos)
1370 {
1371 	int *valp = ctl->data;
1372 	int val = *valp;
1373 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1374 
1375 	if (write && *valp != val)
1376 		rt_cache_flush(0);
1377 
1378 	return ret;
1379 }
1380 
1381 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1382 				  void __user *oldval, size_t __user *oldlenp,
1383 				  void __user *newval, size_t newlen)
1384 {
1385 	int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
1386 				      newval, newlen);
1387 
1388 	if (ret == 1)
1389 		rt_cache_flush(0);
1390 
1391 	return ret;
1392 }
1393 
1394 
1395 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1396 	{ \
1397 		.ctl_name	= NET_IPV4_CONF_ ## attr, \
1398 		.procname	= name, \
1399 		.data		= ipv4_devconf.data + \
1400 				  NET_IPV4_CONF_ ## attr - 1, \
1401 		.maxlen		= sizeof(int), \
1402 		.mode		= mval, \
1403 		.proc_handler	= proc, \
1404 		.strategy	= sysctl, \
1405 		.extra1		= &ipv4_devconf, \
1406 	}
1407 
1408 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1409 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1410 			     devinet_conf_sysctl)
1411 
1412 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1413 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1414 			     devinet_conf_sysctl)
1415 
1416 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1417 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1418 
1419 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1420 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1421 				     ipv4_doint_and_flush_strategy)
1422 
1423 static struct devinet_sysctl_table {
1424 	struct ctl_table_header *sysctl_header;
1425 	struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1426 	char *dev_name;
1427 } devinet_sysctl = {
1428 	.devinet_vars = {
1429 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1430 					     devinet_sysctl_forward,
1431 					     devinet_conf_sysctl),
1432 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1433 
1434 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1435 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1436 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1437 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1438 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1439 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1440 					"accept_source_route"),
1441 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1442 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1443 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1444 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1445 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1446 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1447 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1448 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1449 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1450 
1451 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1452 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1453 		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1454 					      "force_igmp_version"),
1455 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1456 					      "promote_secondaries"),
1457 	},
1458 };
1459 
1460 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1461 		int ctl_name, struct ipv4_devconf *p)
1462 {
1463 	int i;
1464 	struct devinet_sysctl_table *t;
1465 
1466 #define DEVINET_CTL_PATH_DEV	3
1467 
1468 	struct ctl_path devinet_ctl_path[] = {
1469 		{ .procname = "net", .ctl_name = CTL_NET, },
1470 		{ .procname = "ipv4", .ctl_name = NET_IPV4, },
1471 		{ .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1472 		{ /* to be set */ },
1473 		{ },
1474 	};
1475 
1476 	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1477 	if (!t)
1478 		goto out;
1479 
1480 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1481 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1482 		t->devinet_vars[i].extra1 = p;
1483 		t->devinet_vars[i].extra2 = net;
1484 	}
1485 
1486 	/*
1487 	 * Make a copy of dev_name, because '.procname' is regarded as const
1488 	 * by sysctl and we wouldn't want anyone to change it under our feet
1489 	 * (see SIOCSIFNAME).
1490 	 */
1491 	t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1492 	if (!t->dev_name)
1493 		goto free;
1494 
1495 	devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1496 	devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1497 
1498 	t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1499 			t->devinet_vars);
1500 	if (!t->sysctl_header)
1501 		goto free_procname;
1502 
1503 	p->sysctl = t;
1504 	return 0;
1505 
1506 free_procname:
1507 	kfree(t->dev_name);
1508 free:
1509 	kfree(t);
1510 out:
1511 	return -ENOBUFS;
1512 }
1513 
1514 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1515 {
1516 	struct devinet_sysctl_table *t = cnf->sysctl;
1517 
1518 	if (t == NULL)
1519 		return;
1520 
1521 	cnf->sysctl = NULL;
1522 	unregister_sysctl_table(t->sysctl_header);
1523 	kfree(t->dev_name);
1524 	kfree(t);
1525 }
1526 
1527 static void devinet_sysctl_register(struct in_device *idev)
1528 {
1529 	neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1530 			NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1531 	__devinet_sysctl_register(idev->dev->nd_net, idev->dev->name,
1532 			idev->dev->ifindex, &idev->cnf);
1533 }
1534 
1535 static void devinet_sysctl_unregister(struct in_device *idev)
1536 {
1537 	__devinet_sysctl_unregister(&idev->cnf);
1538 	neigh_sysctl_unregister(idev->arp_parms);
1539 }
1540 
1541 static struct ctl_table ctl_forward_entry[] = {
1542 	{
1543 		.ctl_name	= NET_IPV4_FORWARD,
1544 		.procname	= "ip_forward",
1545 		.data		= &ipv4_devconf.data[
1546 					NET_IPV4_CONF_FORWARDING - 1],
1547 		.maxlen		= sizeof(int),
1548 		.mode		= 0644,
1549 		.proc_handler	= devinet_sysctl_forward,
1550 		.strategy	= devinet_conf_sysctl,
1551 		.extra1		= &ipv4_devconf,
1552 		.extra2		= &init_net,
1553 	},
1554 	{ },
1555 };
1556 
1557 static __net_initdata struct ctl_path net_ipv4_path[] = {
1558 	{ .procname = "net", .ctl_name = CTL_NET, },
1559 	{ .procname = "ipv4", .ctl_name = NET_IPV4, },
1560 	{ },
1561 };
1562 #endif
1563 
1564 static __net_init int devinet_init_net(struct net *net)
1565 {
1566 	int err;
1567 	struct ipv4_devconf *all, *dflt;
1568 #ifdef CONFIG_SYSCTL
1569 	struct ctl_table *tbl = ctl_forward_entry;
1570 	struct ctl_table_header *forw_hdr;
1571 #endif
1572 
1573 	err = -ENOMEM;
1574 	all = &ipv4_devconf;
1575 	dflt = &ipv4_devconf_dflt;
1576 
1577 	if (net != &init_net) {
1578 		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1579 		if (all == NULL)
1580 			goto err_alloc_all;
1581 
1582 		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1583 		if (dflt == NULL)
1584 			goto err_alloc_dflt;
1585 
1586 #ifdef CONFIG_SYSCTL
1587 		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1588 		if (tbl == NULL)
1589 			goto err_alloc_ctl;
1590 
1591 		tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1592 		tbl[0].extra1 = all;
1593 		tbl[0].extra2 = net;
1594 #endif
1595 	}
1596 
1597 #ifdef CONFIG_SYSCTL
1598 	err = __devinet_sysctl_register(net, "all",
1599 			NET_PROTO_CONF_ALL, all);
1600 	if (err < 0)
1601 		goto err_reg_all;
1602 
1603 	err = __devinet_sysctl_register(net, "default",
1604 			NET_PROTO_CONF_DEFAULT, dflt);
1605 	if (err < 0)
1606 		goto err_reg_dflt;
1607 
1608 	err = -ENOMEM;
1609 	forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1610 	if (forw_hdr == NULL)
1611 		goto err_reg_ctl;
1612 	net->ipv4.forw_hdr = forw_hdr;
1613 #endif
1614 
1615 	net->ipv4.devconf_all = all;
1616 	net->ipv4.devconf_dflt = dflt;
1617 	return 0;
1618 
1619 #ifdef CONFIG_SYSCTL
1620 err_reg_ctl:
1621 	__devinet_sysctl_unregister(dflt);
1622 err_reg_dflt:
1623 	__devinet_sysctl_unregister(all);
1624 err_reg_all:
1625 	if (tbl != ctl_forward_entry)
1626 		kfree(tbl);
1627 err_alloc_ctl:
1628 #endif
1629 	if (dflt != &ipv4_devconf_dflt)
1630 		kfree(dflt);
1631 err_alloc_dflt:
1632 	if (all != &ipv4_devconf)
1633 		kfree(all);
1634 err_alloc_all:
1635 	return err;
1636 }
1637 
1638 static __net_exit void devinet_exit_net(struct net *net)
1639 {
1640 #ifdef CONFIG_SYSCTL
1641 	struct ctl_table *tbl;
1642 
1643 	tbl = net->ipv4.forw_hdr->ctl_table_arg;
1644 	unregister_net_sysctl_table(net->ipv4.forw_hdr);
1645 	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1646 	__devinet_sysctl_unregister(net->ipv4.devconf_all);
1647 	kfree(tbl);
1648 #endif
1649 	kfree(net->ipv4.devconf_dflt);
1650 	kfree(net->ipv4.devconf_all);
1651 }
1652 
1653 static __net_initdata struct pernet_operations devinet_ops = {
1654 	.init = devinet_init_net,
1655 	.exit = devinet_exit_net,
1656 };
1657 
1658 void __init devinet_init(void)
1659 {
1660 	register_pernet_subsys(&devinet_ops);
1661 
1662 	register_gifconf(PF_INET, inet_gifconf);
1663 	register_netdevice_notifier(&ip_netdev_notifier);
1664 
1665 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1666 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1667 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1668 }
1669 
1670 EXPORT_SYMBOL(in_dev_finish_destroy);
1671 EXPORT_SYMBOL(inet_select_addr);
1672 EXPORT_SYMBOL(inetdev_by_index);
1673 EXPORT_SYMBOL(register_inetaddr_notifier);
1674 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1675