xref: /linux/net/ipv4/devinet.c (revision bf74b964775009071cf12f9d59d4dd5e388fbe0b)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *	Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  *	Derived from the IP parts of dev.c 1.0.19
12  * 		Authors:	Ross Biro
13  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
15  *
16  *	Additional Authors:
17  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
18  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19  *
20  *	Changes:
21  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
22  *					lists.
23  *		Cyrus Durgin:		updated for kmod
24  *		Matthias Andree:	in devinet_ioctl, compare label and
25  *					address (4.4BSD alias style support),
26  *					fall back to comparing just the label
27  *					if no match found.
28  */
29 
30 
31 #include <asm/uaccess.h>
32 #include <asm/system.h>
33 #include <linux/bitops.h>
34 #include <linux/capability.h>
35 #include <linux/module.h>
36 #include <linux/types.h>
37 #include <linux/kernel.h>
38 #include <linux/string.h>
39 #include <linux/mm.h>
40 #include <linux/socket.h>
41 #include <linux/sockios.h>
42 #include <linux/in.h>
43 #include <linux/errno.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_addr.h>
46 #include <linux/if_ether.h>
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/etherdevice.h>
50 #include <linux/skbuff.h>
51 #include <linux/init.h>
52 #include <linux/notifier.h>
53 #include <linux/inetdevice.h>
54 #include <linux/igmp.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 
66 struct ipv4_devconf ipv4_devconf = {
67 	.data = {
68 		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
69 		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
70 		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
71 		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
72 	},
73 };
74 
75 static struct ipv4_devconf ipv4_devconf_dflt = {
76 	.data = {
77 		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
78 		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
79 		[NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
80 		[NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
81 		[NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
82 	},
83 };
84 
85 #define IPV4_DEVCONF_DFLT(attr) IPV4_DEVCONF(ipv4_devconf_dflt, attr)
86 
87 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
88 	[IFA_LOCAL]     	= { .type = NLA_U32 },
89 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
90 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
91 	[IFA_ANYCAST]   	= { .type = NLA_U32 },
92 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
93 };
94 
95 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
96 
97 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
98 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
99 			 int destroy);
100 #ifdef CONFIG_SYSCTL
101 static void devinet_sysctl_register(struct in_device *in_dev,
102 				    struct ipv4_devconf *p);
103 static void devinet_sysctl_unregister(struct ipv4_devconf *p);
104 #endif
105 
106 /* Locks all the inet devices. */
107 
108 static struct in_ifaddr *inet_alloc_ifa(void)
109 {
110 	struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
111 
112 	if (ifa) {
113 		INIT_RCU_HEAD(&ifa->rcu_head);
114 	}
115 
116 	return ifa;
117 }
118 
119 static void inet_rcu_free_ifa(struct rcu_head *head)
120 {
121 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
122 	if (ifa->ifa_dev)
123 		in_dev_put(ifa->ifa_dev);
124 	kfree(ifa);
125 }
126 
127 static inline void inet_free_ifa(struct in_ifaddr *ifa)
128 {
129 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
130 }
131 
132 void in_dev_finish_destroy(struct in_device *idev)
133 {
134 	struct net_device *dev = idev->dev;
135 
136 	BUG_TRAP(!idev->ifa_list);
137 	BUG_TRAP(!idev->mc_list);
138 #ifdef NET_REFCNT_DEBUG
139 	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
140 	       idev, dev ? dev->name : "NIL");
141 #endif
142 	dev_put(dev);
143 	if (!idev->dead)
144 		printk("Freeing alive in_device %p\n", idev);
145 	else {
146 		kfree(idev);
147 	}
148 }
149 
150 static struct in_device *inetdev_init(struct net_device *dev)
151 {
152 	struct in_device *in_dev;
153 
154 	ASSERT_RTNL();
155 
156 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
157 	if (!in_dev)
158 		goto out;
159 	INIT_RCU_HEAD(&in_dev->rcu_head);
160 	memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
161 	in_dev->cnf.sysctl = NULL;
162 	in_dev->dev = dev;
163 	if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
164 		goto out_kfree;
165 	/* Reference in_dev->dev */
166 	dev_hold(dev);
167 #ifdef CONFIG_SYSCTL
168 	neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
169 			      NET_IPV4_NEIGH, "ipv4", NULL, NULL);
170 #endif
171 
172 	/* Account for reference dev->ip_ptr (below) */
173 	in_dev_hold(in_dev);
174 
175 #ifdef CONFIG_SYSCTL
176 	devinet_sysctl_register(in_dev, &in_dev->cnf);
177 #endif
178 	ip_mc_init_dev(in_dev);
179 	if (dev->flags & IFF_UP)
180 		ip_mc_up(in_dev);
181 
182 	/* we can receive as soon as ip_ptr is set -- do this last */
183 	rcu_assign_pointer(dev->ip_ptr, in_dev);
184 out:
185 	return in_dev;
186 out_kfree:
187 	kfree(in_dev);
188 	in_dev = NULL;
189 	goto out;
190 }
191 
192 static void in_dev_rcu_put(struct rcu_head *head)
193 {
194 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
195 	in_dev_put(idev);
196 }
197 
198 static void inetdev_destroy(struct in_device *in_dev)
199 {
200 	struct in_ifaddr *ifa;
201 	struct net_device *dev;
202 
203 	ASSERT_RTNL();
204 
205 	dev = in_dev->dev;
206 	if (dev == &loopback_dev)
207 		return;
208 
209 	in_dev->dead = 1;
210 
211 	ip_mc_destroy_dev(in_dev);
212 
213 	while ((ifa = in_dev->ifa_list) != NULL) {
214 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
215 		inet_free_ifa(ifa);
216 	}
217 
218 #ifdef CONFIG_SYSCTL
219 	devinet_sysctl_unregister(&in_dev->cnf);
220 #endif
221 
222 	dev->ip_ptr = NULL;
223 
224 #ifdef CONFIG_SYSCTL
225 	neigh_sysctl_unregister(in_dev->arp_parms);
226 #endif
227 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
228 	arp_ifdown(dev);
229 
230 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
231 }
232 
233 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
234 {
235 	rcu_read_lock();
236 	for_primary_ifa(in_dev) {
237 		if (inet_ifa_match(a, ifa)) {
238 			if (!b || inet_ifa_match(b, ifa)) {
239 				rcu_read_unlock();
240 				return 1;
241 			}
242 		}
243 	} endfor_ifa(in_dev);
244 	rcu_read_unlock();
245 	return 0;
246 }
247 
248 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
249 			 int destroy, struct nlmsghdr *nlh, u32 pid)
250 {
251 	struct in_ifaddr *promote = NULL;
252 	struct in_ifaddr *ifa, *ifa1 = *ifap;
253 	struct in_ifaddr *last_prim = in_dev->ifa_list;
254 	struct in_ifaddr *prev_prom = NULL;
255 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
256 
257 	ASSERT_RTNL();
258 
259 	/* 1. Deleting primary ifaddr forces deletion all secondaries
260 	 * unless alias promotion is set
261 	 **/
262 
263 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
264 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
265 
266 		while ((ifa = *ifap1) != NULL) {
267 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
268 			    ifa1->ifa_scope <= ifa->ifa_scope)
269 				last_prim = ifa;
270 
271 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
272 			    ifa1->ifa_mask != ifa->ifa_mask ||
273 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
274 				ifap1 = &ifa->ifa_next;
275 				prev_prom = ifa;
276 				continue;
277 			}
278 
279 			if (!do_promote) {
280 				*ifap1 = ifa->ifa_next;
281 
282 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
283 				blocking_notifier_call_chain(&inetaddr_chain,
284 						NETDEV_DOWN, ifa);
285 				inet_free_ifa(ifa);
286 			} else {
287 				promote = ifa;
288 				break;
289 			}
290 		}
291 	}
292 
293 	/* 2. Unlink it */
294 
295 	*ifap = ifa1->ifa_next;
296 
297 	/* 3. Announce address deletion */
298 
299 	/* Send message first, then call notifier.
300 	   At first sight, FIB update triggered by notifier
301 	   will refer to already deleted ifaddr, that could confuse
302 	   netlink listeners. It is not true: look, gated sees
303 	   that route deleted and if it still thinks that ifaddr
304 	   is valid, it will try to restore deleted routes... Grr.
305 	   So that, this order is correct.
306 	 */
307 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
308 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
309 
310 	if (promote) {
311 
312 		if (prev_prom) {
313 			prev_prom->ifa_next = promote->ifa_next;
314 			promote->ifa_next = last_prim->ifa_next;
315 			last_prim->ifa_next = promote;
316 		}
317 
318 		promote->ifa_flags &= ~IFA_F_SECONDARY;
319 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
320 		blocking_notifier_call_chain(&inetaddr_chain,
321 				NETDEV_UP, promote);
322 		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
323 			if (ifa1->ifa_mask != ifa->ifa_mask ||
324 			    !inet_ifa_match(ifa1->ifa_address, ifa))
325 					continue;
326 			fib_add_ifaddr(ifa);
327 		}
328 
329 	}
330 	if (destroy)
331 		inet_free_ifa(ifa1);
332 }
333 
334 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
335 			 int destroy)
336 {
337 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
338 }
339 
340 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
341 			     u32 pid)
342 {
343 	struct in_device *in_dev = ifa->ifa_dev;
344 	struct in_ifaddr *ifa1, **ifap, **last_primary;
345 
346 	ASSERT_RTNL();
347 
348 	if (!ifa->ifa_local) {
349 		inet_free_ifa(ifa);
350 		return 0;
351 	}
352 
353 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
354 	last_primary = &in_dev->ifa_list;
355 
356 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
357 	     ifap = &ifa1->ifa_next) {
358 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
359 		    ifa->ifa_scope <= ifa1->ifa_scope)
360 			last_primary = &ifa1->ifa_next;
361 		if (ifa1->ifa_mask == ifa->ifa_mask &&
362 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
363 			if (ifa1->ifa_local == ifa->ifa_local) {
364 				inet_free_ifa(ifa);
365 				return -EEXIST;
366 			}
367 			if (ifa1->ifa_scope != ifa->ifa_scope) {
368 				inet_free_ifa(ifa);
369 				return -EINVAL;
370 			}
371 			ifa->ifa_flags |= IFA_F_SECONDARY;
372 		}
373 	}
374 
375 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
376 		net_srandom(ifa->ifa_local);
377 		ifap = last_primary;
378 	}
379 
380 	ifa->ifa_next = *ifap;
381 	*ifap = ifa;
382 
383 	/* Send message first, then call notifier.
384 	   Notifier will trigger FIB update, so that
385 	   listeners of netlink will know about new ifaddr */
386 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
387 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
388 
389 	return 0;
390 }
391 
392 static int inet_insert_ifa(struct in_ifaddr *ifa)
393 {
394 	return __inet_insert_ifa(ifa, NULL, 0);
395 }
396 
397 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
398 {
399 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
400 
401 	ASSERT_RTNL();
402 
403 	if (!in_dev) {
404 		inet_free_ifa(ifa);
405 		return -ENOBUFS;
406 	}
407 	ipv4_devconf_setall(in_dev);
408 	if (ifa->ifa_dev != in_dev) {
409 		BUG_TRAP(!ifa->ifa_dev);
410 		in_dev_hold(in_dev);
411 		ifa->ifa_dev = in_dev;
412 	}
413 	if (LOOPBACK(ifa->ifa_local))
414 		ifa->ifa_scope = RT_SCOPE_HOST;
415 	return inet_insert_ifa(ifa);
416 }
417 
418 struct in_device *inetdev_by_index(int ifindex)
419 {
420 	struct net_device *dev;
421 	struct in_device *in_dev = NULL;
422 	read_lock(&dev_base_lock);
423 	dev = __dev_get_by_index(ifindex);
424 	if (dev)
425 		in_dev = in_dev_get(dev);
426 	read_unlock(&dev_base_lock);
427 	return in_dev;
428 }
429 
430 /* Called only from RTNL semaphored context. No locks. */
431 
432 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
433 				    __be32 mask)
434 {
435 	ASSERT_RTNL();
436 
437 	for_primary_ifa(in_dev) {
438 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
439 			return ifa;
440 	} endfor_ifa(in_dev);
441 	return NULL;
442 }
443 
444 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
445 {
446 	struct nlattr *tb[IFA_MAX+1];
447 	struct in_device *in_dev;
448 	struct ifaddrmsg *ifm;
449 	struct in_ifaddr *ifa, **ifap;
450 	int err = -EINVAL;
451 
452 	ASSERT_RTNL();
453 
454 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
455 	if (err < 0)
456 		goto errout;
457 
458 	ifm = nlmsg_data(nlh);
459 	in_dev = inetdev_by_index(ifm->ifa_index);
460 	if (in_dev == NULL) {
461 		err = -ENODEV;
462 		goto errout;
463 	}
464 
465 	__in_dev_put(in_dev);
466 
467 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
468 	     ifap = &ifa->ifa_next) {
469 		if (tb[IFA_LOCAL] &&
470 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
471 			continue;
472 
473 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
474 			continue;
475 
476 		if (tb[IFA_ADDRESS] &&
477 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
478 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
479 			continue;
480 
481 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
482 		return 0;
483 	}
484 
485 	err = -EADDRNOTAVAIL;
486 errout:
487 	return err;
488 }
489 
490 static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
491 {
492 	struct nlattr *tb[IFA_MAX+1];
493 	struct in_ifaddr *ifa;
494 	struct ifaddrmsg *ifm;
495 	struct net_device *dev;
496 	struct in_device *in_dev;
497 	int err = -EINVAL;
498 
499 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
500 	if (err < 0)
501 		goto errout;
502 
503 	ifm = nlmsg_data(nlh);
504 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) {
505 		err = -EINVAL;
506 		goto errout;
507 	}
508 
509 	dev = __dev_get_by_index(ifm->ifa_index);
510 	if (dev == NULL) {
511 		err = -ENODEV;
512 		goto errout;
513 	}
514 
515 	in_dev = __in_dev_get_rtnl(dev);
516 	if (in_dev == NULL) {
517 		err = -ENOBUFS;
518 		goto errout;
519 	}
520 
521 	ipv4_devconf_setall(in_dev);
522 
523 	ifa = inet_alloc_ifa();
524 	if (ifa == NULL) {
525 		/*
526 		 * A potential indev allocation can be left alive, it stays
527 		 * assigned to its device and is destroy with it.
528 		 */
529 		err = -ENOBUFS;
530 		goto errout;
531 	}
532 
533 	in_dev_hold(in_dev);
534 
535 	if (tb[IFA_ADDRESS] == NULL)
536 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
537 
538 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
539 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
540 	ifa->ifa_flags = ifm->ifa_flags;
541 	ifa->ifa_scope = ifm->ifa_scope;
542 	ifa->ifa_dev = in_dev;
543 
544 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
545 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
546 
547 	if (tb[IFA_BROADCAST])
548 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
549 
550 	if (tb[IFA_ANYCAST])
551 		ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
552 
553 	if (tb[IFA_LABEL])
554 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
555 	else
556 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
557 
558 	return ifa;
559 
560 errout:
561 	return ERR_PTR(err);
562 }
563 
564 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
565 {
566 	struct in_ifaddr *ifa;
567 
568 	ASSERT_RTNL();
569 
570 	ifa = rtm_to_ifaddr(nlh);
571 	if (IS_ERR(ifa))
572 		return PTR_ERR(ifa);
573 
574 	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
575 }
576 
577 /*
578  *	Determine a default network mask, based on the IP address.
579  */
580 
581 static __inline__ int inet_abc_len(__be32 addr)
582 {
583 	int rc = -1;	/* Something else, probably a multicast. */
584 
585 	if (ZERONET(addr))
586 		rc = 0;
587 	else {
588 		__u32 haddr = ntohl(addr);
589 
590 		if (IN_CLASSA(haddr))
591 			rc = 8;
592 		else if (IN_CLASSB(haddr))
593 			rc = 16;
594 		else if (IN_CLASSC(haddr))
595 			rc = 24;
596 	}
597 
598 	return rc;
599 }
600 
601 
602 int devinet_ioctl(unsigned int cmd, void __user *arg)
603 {
604 	struct ifreq ifr;
605 	struct sockaddr_in sin_orig;
606 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
607 	struct in_device *in_dev;
608 	struct in_ifaddr **ifap = NULL;
609 	struct in_ifaddr *ifa = NULL;
610 	struct net_device *dev;
611 	char *colon;
612 	int ret = -EFAULT;
613 	int tryaddrmatch = 0;
614 
615 	/*
616 	 *	Fetch the caller's info block into kernel space
617 	 */
618 
619 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
620 		goto out;
621 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
622 
623 	/* save original address for comparison */
624 	memcpy(&sin_orig, sin, sizeof(*sin));
625 
626 	colon = strchr(ifr.ifr_name, ':');
627 	if (colon)
628 		*colon = 0;
629 
630 #ifdef CONFIG_KMOD
631 	dev_load(ifr.ifr_name);
632 #endif
633 
634 	switch (cmd) {
635 	case SIOCGIFADDR:	/* Get interface address */
636 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
637 	case SIOCGIFDSTADDR:	/* Get the destination address */
638 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
639 		/* Note that these ioctls will not sleep,
640 		   so that we do not impose a lock.
641 		   One day we will be forced to put shlock here (I mean SMP)
642 		 */
643 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
644 		memset(sin, 0, sizeof(*sin));
645 		sin->sin_family = AF_INET;
646 		break;
647 
648 	case SIOCSIFFLAGS:
649 		ret = -EACCES;
650 		if (!capable(CAP_NET_ADMIN))
651 			goto out;
652 		break;
653 	case SIOCSIFADDR:	/* Set interface address (and family) */
654 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
655 	case SIOCSIFDSTADDR:	/* Set the destination address */
656 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
657 		ret = -EACCES;
658 		if (!capable(CAP_NET_ADMIN))
659 			goto out;
660 		ret = -EINVAL;
661 		if (sin->sin_family != AF_INET)
662 			goto out;
663 		break;
664 	default:
665 		ret = -EINVAL;
666 		goto out;
667 	}
668 
669 	rtnl_lock();
670 
671 	ret = -ENODEV;
672 	if ((dev = __dev_get_by_name(ifr.ifr_name)) == NULL)
673 		goto done;
674 
675 	if (colon)
676 		*colon = ':';
677 
678 	if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
679 		if (tryaddrmatch) {
680 			/* Matthias Andree */
681 			/* compare label and address (4.4BSD style) */
682 			/* note: we only do this for a limited set of ioctls
683 			   and only if the original address family was AF_INET.
684 			   This is checked above. */
685 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
686 			     ifap = &ifa->ifa_next) {
687 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
688 				    sin_orig.sin_addr.s_addr ==
689 							ifa->ifa_address) {
690 					break; /* found */
691 				}
692 			}
693 		}
694 		/* we didn't get a match, maybe the application is
695 		   4.3BSD-style and passed in junk so we fall back to
696 		   comparing just the label */
697 		if (!ifa) {
698 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
699 			     ifap = &ifa->ifa_next)
700 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
701 					break;
702 		}
703 	}
704 
705 	ret = -EADDRNOTAVAIL;
706 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
707 		goto done;
708 
709 	switch (cmd) {
710 	case SIOCGIFADDR:	/* Get interface address */
711 		sin->sin_addr.s_addr = ifa->ifa_local;
712 		goto rarok;
713 
714 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
715 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
716 		goto rarok;
717 
718 	case SIOCGIFDSTADDR:	/* Get the destination address */
719 		sin->sin_addr.s_addr = ifa->ifa_address;
720 		goto rarok;
721 
722 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
723 		sin->sin_addr.s_addr = ifa->ifa_mask;
724 		goto rarok;
725 
726 	case SIOCSIFFLAGS:
727 		if (colon) {
728 			ret = -EADDRNOTAVAIL;
729 			if (!ifa)
730 				break;
731 			ret = 0;
732 			if (!(ifr.ifr_flags & IFF_UP))
733 				inet_del_ifa(in_dev, ifap, 1);
734 			break;
735 		}
736 		ret = dev_change_flags(dev, ifr.ifr_flags);
737 		break;
738 
739 	case SIOCSIFADDR:	/* Set interface address (and family) */
740 		ret = -EINVAL;
741 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
742 			break;
743 
744 		if (!ifa) {
745 			ret = -ENOBUFS;
746 			if ((ifa = inet_alloc_ifa()) == NULL)
747 				break;
748 			if (colon)
749 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
750 			else
751 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
752 		} else {
753 			ret = 0;
754 			if (ifa->ifa_local == sin->sin_addr.s_addr)
755 				break;
756 			inet_del_ifa(in_dev, ifap, 0);
757 			ifa->ifa_broadcast = 0;
758 			ifa->ifa_anycast = 0;
759 		}
760 
761 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
762 
763 		if (!(dev->flags & IFF_POINTOPOINT)) {
764 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
765 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
766 			if ((dev->flags & IFF_BROADCAST) &&
767 			    ifa->ifa_prefixlen < 31)
768 				ifa->ifa_broadcast = ifa->ifa_address |
769 						     ~ifa->ifa_mask;
770 		} else {
771 			ifa->ifa_prefixlen = 32;
772 			ifa->ifa_mask = inet_make_mask(32);
773 		}
774 		ret = inet_set_ifa(dev, ifa);
775 		break;
776 
777 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
778 		ret = 0;
779 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
780 			inet_del_ifa(in_dev, ifap, 0);
781 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
782 			inet_insert_ifa(ifa);
783 		}
784 		break;
785 
786 	case SIOCSIFDSTADDR:	/* Set the destination address */
787 		ret = 0;
788 		if (ifa->ifa_address == sin->sin_addr.s_addr)
789 			break;
790 		ret = -EINVAL;
791 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
792 			break;
793 		ret = 0;
794 		inet_del_ifa(in_dev, ifap, 0);
795 		ifa->ifa_address = sin->sin_addr.s_addr;
796 		inet_insert_ifa(ifa);
797 		break;
798 
799 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
800 
801 		/*
802 		 *	The mask we set must be legal.
803 		 */
804 		ret = -EINVAL;
805 		if (bad_mask(sin->sin_addr.s_addr, 0))
806 			break;
807 		ret = 0;
808 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
809 			__be32 old_mask = ifa->ifa_mask;
810 			inet_del_ifa(in_dev, ifap, 0);
811 			ifa->ifa_mask = sin->sin_addr.s_addr;
812 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
813 
814 			/* See if current broadcast address matches
815 			 * with current netmask, then recalculate
816 			 * the broadcast address. Otherwise it's a
817 			 * funny address, so don't touch it since
818 			 * the user seems to know what (s)he's doing...
819 			 */
820 			if ((dev->flags & IFF_BROADCAST) &&
821 			    (ifa->ifa_prefixlen < 31) &&
822 			    (ifa->ifa_broadcast ==
823 			     (ifa->ifa_local|~old_mask))) {
824 				ifa->ifa_broadcast = (ifa->ifa_local |
825 						      ~sin->sin_addr.s_addr);
826 			}
827 			inet_insert_ifa(ifa);
828 		}
829 		break;
830 	}
831 done:
832 	rtnl_unlock();
833 out:
834 	return ret;
835 rarok:
836 	rtnl_unlock();
837 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
838 	goto out;
839 }
840 
841 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
842 {
843 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
844 	struct in_ifaddr *ifa;
845 	struct ifreq ifr;
846 	int done = 0;
847 
848 	if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
849 		goto out;
850 
851 	for (; ifa; ifa = ifa->ifa_next) {
852 		if (!buf) {
853 			done += sizeof(ifr);
854 			continue;
855 		}
856 		if (len < (int) sizeof(ifr))
857 			break;
858 		memset(&ifr, 0, sizeof(struct ifreq));
859 		if (ifa->ifa_label)
860 			strcpy(ifr.ifr_name, ifa->ifa_label);
861 		else
862 			strcpy(ifr.ifr_name, dev->name);
863 
864 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
865 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
866 								ifa->ifa_local;
867 
868 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
869 			done = -EFAULT;
870 			break;
871 		}
872 		buf  += sizeof(struct ifreq);
873 		len  -= sizeof(struct ifreq);
874 		done += sizeof(struct ifreq);
875 	}
876 out:
877 	return done;
878 }
879 
880 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
881 {
882 	__be32 addr = 0;
883 	struct in_device *in_dev;
884 
885 	rcu_read_lock();
886 	in_dev = __in_dev_get_rcu(dev);
887 	if (!in_dev)
888 		goto no_in_dev;
889 
890 	for_primary_ifa(in_dev) {
891 		if (ifa->ifa_scope > scope)
892 			continue;
893 		if (!dst || inet_ifa_match(dst, ifa)) {
894 			addr = ifa->ifa_local;
895 			break;
896 		}
897 		if (!addr)
898 			addr = ifa->ifa_local;
899 	} endfor_ifa(in_dev);
900 no_in_dev:
901 	rcu_read_unlock();
902 
903 	if (addr)
904 		goto out;
905 
906 	/* Not loopback addresses on loopback should be preferred
907 	   in this case. It is importnat that lo is the first interface
908 	   in dev_base list.
909 	 */
910 	read_lock(&dev_base_lock);
911 	rcu_read_lock();
912 	for_each_netdev(dev) {
913 		if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
914 			continue;
915 
916 		for_primary_ifa(in_dev) {
917 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
918 			    ifa->ifa_scope <= scope) {
919 				addr = ifa->ifa_local;
920 				goto out_unlock_both;
921 			}
922 		} endfor_ifa(in_dev);
923 	}
924 out_unlock_both:
925 	read_unlock(&dev_base_lock);
926 	rcu_read_unlock();
927 out:
928 	return addr;
929 }
930 
931 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
932 			      __be32 local, int scope)
933 {
934 	int same = 0;
935 	__be32 addr = 0;
936 
937 	for_ifa(in_dev) {
938 		if (!addr &&
939 		    (local == ifa->ifa_local || !local) &&
940 		    ifa->ifa_scope <= scope) {
941 			addr = ifa->ifa_local;
942 			if (same)
943 				break;
944 		}
945 		if (!same) {
946 			same = (!local || inet_ifa_match(local, ifa)) &&
947 				(!dst || inet_ifa_match(dst, ifa));
948 			if (same && addr) {
949 				if (local || !dst)
950 					break;
951 				/* Is the selected addr into dst subnet? */
952 				if (inet_ifa_match(addr, ifa))
953 					break;
954 				/* No, then can we use new local src? */
955 				if (ifa->ifa_scope <= scope) {
956 					addr = ifa->ifa_local;
957 					break;
958 				}
959 				/* search for large dst subnet for addr */
960 				same = 0;
961 			}
962 		}
963 	} endfor_ifa(in_dev);
964 
965 	return same? addr : 0;
966 }
967 
968 /*
969  * Confirm that local IP address exists using wildcards:
970  * - dev: only on this interface, 0=any interface
971  * - dst: only in the same subnet as dst, 0=any dst
972  * - local: address, 0=autoselect the local address
973  * - scope: maximum allowed scope value for the local address
974  */
975 __be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope)
976 {
977 	__be32 addr = 0;
978 	struct in_device *in_dev;
979 
980 	if (dev) {
981 		rcu_read_lock();
982 		if ((in_dev = __in_dev_get_rcu(dev)))
983 			addr = confirm_addr_indev(in_dev, dst, local, scope);
984 		rcu_read_unlock();
985 
986 		return addr;
987 	}
988 
989 	read_lock(&dev_base_lock);
990 	rcu_read_lock();
991 	for_each_netdev(dev) {
992 		if ((in_dev = __in_dev_get_rcu(dev))) {
993 			addr = confirm_addr_indev(in_dev, dst, local, scope);
994 			if (addr)
995 				break;
996 		}
997 	}
998 	rcu_read_unlock();
999 	read_unlock(&dev_base_lock);
1000 
1001 	return addr;
1002 }
1003 
1004 /*
1005  *	Device notifier
1006  */
1007 
1008 int register_inetaddr_notifier(struct notifier_block *nb)
1009 {
1010 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1011 }
1012 
1013 int unregister_inetaddr_notifier(struct notifier_block *nb)
1014 {
1015 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1016 }
1017 
1018 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1019  * alias numbering and to create unique labels if possible.
1020 */
1021 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1022 {
1023 	struct in_ifaddr *ifa;
1024 	int named = 0;
1025 
1026 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1027 		char old[IFNAMSIZ], *dot;
1028 
1029 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1030 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1031 		if (named++ == 0)
1032 			continue;
1033 		dot = strchr(ifa->ifa_label, ':');
1034 		if (dot == NULL) {
1035 			sprintf(old, ":%d", named);
1036 			dot = old;
1037 		}
1038 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1039 			strcat(ifa->ifa_label, dot);
1040 		} else {
1041 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1042 		}
1043 	}
1044 }
1045 
1046 /* Called only under RTNL semaphore */
1047 
1048 static int inetdev_event(struct notifier_block *this, unsigned long event,
1049 			 void *ptr)
1050 {
1051 	struct net_device *dev = ptr;
1052 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1053 
1054 	ASSERT_RTNL();
1055 
1056 	if (!in_dev) {
1057 		if (event == NETDEV_REGISTER) {
1058 			in_dev = inetdev_init(dev);
1059 			if (dev == &loopback_dev) {
1060 				if (!in_dev)
1061 					panic("devinet: "
1062 					      "Failed to create loopback\n");
1063 				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1064 				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1065 			}
1066 		}
1067 		goto out;
1068 	}
1069 
1070 	switch (event) {
1071 	case NETDEV_REGISTER:
1072 		printk(KERN_DEBUG "inetdev_event: bug\n");
1073 		dev->ip_ptr = NULL;
1074 		break;
1075 	case NETDEV_UP:
1076 		if (dev->mtu < 68)
1077 			break;
1078 		if (dev == &loopback_dev) {
1079 			struct in_ifaddr *ifa;
1080 			if ((ifa = inet_alloc_ifa()) != NULL) {
1081 				ifa->ifa_local =
1082 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1083 				ifa->ifa_prefixlen = 8;
1084 				ifa->ifa_mask = inet_make_mask(8);
1085 				in_dev_hold(in_dev);
1086 				ifa->ifa_dev = in_dev;
1087 				ifa->ifa_scope = RT_SCOPE_HOST;
1088 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1089 				inet_insert_ifa(ifa);
1090 			}
1091 		}
1092 		ip_mc_up(in_dev);
1093 		break;
1094 	case NETDEV_DOWN:
1095 		ip_mc_down(in_dev);
1096 		break;
1097 	case NETDEV_CHANGEMTU:
1098 		if (dev->mtu >= 68)
1099 			break;
1100 		/* MTU falled under 68, disable IP */
1101 	case NETDEV_UNREGISTER:
1102 		inetdev_destroy(in_dev);
1103 		break;
1104 	case NETDEV_CHANGENAME:
1105 		/* Do not notify about label change, this event is
1106 		 * not interesting to applications using netlink.
1107 		 */
1108 		inetdev_changename(dev, in_dev);
1109 
1110 #ifdef CONFIG_SYSCTL
1111 		devinet_sysctl_unregister(&in_dev->cnf);
1112 		neigh_sysctl_unregister(in_dev->arp_parms);
1113 		neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
1114 				      NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1115 		devinet_sysctl_register(in_dev, &in_dev->cnf);
1116 #endif
1117 		break;
1118 	}
1119 out:
1120 	return NOTIFY_DONE;
1121 }
1122 
1123 static struct notifier_block ip_netdev_notifier = {
1124 	.notifier_call =inetdev_event,
1125 };
1126 
1127 static inline size_t inet_nlmsg_size(void)
1128 {
1129 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1130 	       + nla_total_size(4) /* IFA_ADDRESS */
1131 	       + nla_total_size(4) /* IFA_LOCAL */
1132 	       + nla_total_size(4) /* IFA_BROADCAST */
1133 	       + nla_total_size(4) /* IFA_ANYCAST */
1134 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1135 }
1136 
1137 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1138 			    u32 pid, u32 seq, int event, unsigned int flags)
1139 {
1140 	struct ifaddrmsg *ifm;
1141 	struct nlmsghdr  *nlh;
1142 
1143 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1144 	if (nlh == NULL)
1145 		return -EMSGSIZE;
1146 
1147 	ifm = nlmsg_data(nlh);
1148 	ifm->ifa_family = AF_INET;
1149 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1150 	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1151 	ifm->ifa_scope = ifa->ifa_scope;
1152 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1153 
1154 	if (ifa->ifa_address)
1155 		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1156 
1157 	if (ifa->ifa_local)
1158 		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1159 
1160 	if (ifa->ifa_broadcast)
1161 		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1162 
1163 	if (ifa->ifa_anycast)
1164 		NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1165 
1166 	if (ifa->ifa_label[0])
1167 		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1168 
1169 	return nlmsg_end(skb, nlh);
1170 
1171 nla_put_failure:
1172 	nlmsg_cancel(skb, nlh);
1173 	return -EMSGSIZE;
1174 }
1175 
1176 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1177 {
1178 	int idx, ip_idx;
1179 	struct net_device *dev;
1180 	struct in_device *in_dev;
1181 	struct in_ifaddr *ifa;
1182 	int s_ip_idx, s_idx = cb->args[0];
1183 
1184 	s_ip_idx = ip_idx = cb->args[1];
1185 	idx = 0;
1186 	for_each_netdev(dev) {
1187 		if (idx < s_idx)
1188 			goto cont;
1189 		if (idx > s_idx)
1190 			s_ip_idx = 0;
1191 		if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1192 			goto cont;
1193 
1194 		for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1195 		     ifa = ifa->ifa_next, ip_idx++) {
1196 			if (ip_idx < s_ip_idx)
1197 				goto cont;
1198 			if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1199 					     cb->nlh->nlmsg_seq,
1200 					     RTM_NEWADDR, NLM_F_MULTI) <= 0)
1201 				goto done;
1202 		}
1203 cont:
1204 		idx++;
1205 	}
1206 
1207 done:
1208 	cb->args[0] = idx;
1209 	cb->args[1] = ip_idx;
1210 
1211 	return skb->len;
1212 }
1213 
1214 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1215 		      u32 pid)
1216 {
1217 	struct sk_buff *skb;
1218 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1219 	int err = -ENOBUFS;
1220 
1221 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1222 	if (skb == NULL)
1223 		goto errout;
1224 
1225 	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1226 	if (err < 0) {
1227 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1228 		WARN_ON(err == -EMSGSIZE);
1229 		kfree_skb(skb);
1230 		goto errout;
1231 	}
1232 	err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1233 errout:
1234 	if (err < 0)
1235 		rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err);
1236 }
1237 
1238 #ifdef CONFIG_SYSCTL
1239 
1240 static void devinet_copy_dflt_conf(int i)
1241 {
1242 	struct net_device *dev;
1243 
1244 	read_lock(&dev_base_lock);
1245 	for_each_netdev(dev) {
1246 		struct in_device *in_dev;
1247 		rcu_read_lock();
1248 		in_dev = __in_dev_get_rcu(dev);
1249 		if (in_dev && !test_bit(i, in_dev->cnf.state))
1250 			in_dev->cnf.data[i] = ipv4_devconf_dflt.data[i];
1251 		rcu_read_unlock();
1252 	}
1253 	read_unlock(&dev_base_lock);
1254 }
1255 
1256 static int devinet_conf_proc(ctl_table *ctl, int write,
1257 			     struct file* filp, void __user *buffer,
1258 			     size_t *lenp, loff_t *ppos)
1259 {
1260 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1261 
1262 	if (write) {
1263 		struct ipv4_devconf *cnf = ctl->extra1;
1264 		int i = (int *)ctl->data - cnf->data;
1265 
1266 		set_bit(i, cnf->state);
1267 
1268 		if (cnf == &ipv4_devconf_dflt)
1269 			devinet_copy_dflt_conf(i);
1270 	}
1271 
1272 	return ret;
1273 }
1274 
1275 static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
1276 			       void __user *oldval, size_t __user *oldlenp,
1277 			       void __user *newval, size_t newlen)
1278 {
1279 	struct ipv4_devconf *cnf;
1280 	int *valp = table->data;
1281 	int new;
1282 	int i;
1283 
1284 	if (!newval || !newlen)
1285 		return 0;
1286 
1287 	if (newlen != sizeof(int))
1288 		return -EINVAL;
1289 
1290 	if (get_user(new, (int __user *)newval))
1291 		return -EFAULT;
1292 
1293 	if (new == *valp)
1294 		return 0;
1295 
1296 	if (oldval && oldlenp) {
1297 		size_t len;
1298 
1299 		if (get_user(len, oldlenp))
1300 			return -EFAULT;
1301 
1302 		if (len) {
1303 			if (len > table->maxlen)
1304 				len = table->maxlen;
1305 			if (copy_to_user(oldval, valp, len))
1306 				return -EFAULT;
1307 			if (put_user(len, oldlenp))
1308 				return -EFAULT;
1309 		}
1310 	}
1311 
1312 	*valp = new;
1313 
1314 	cnf = table->extra1;
1315 	i = (int *)table->data - cnf->data;
1316 
1317 	set_bit(i, cnf->state);
1318 
1319 	if (cnf == &ipv4_devconf_dflt)
1320 		devinet_copy_dflt_conf(i);
1321 
1322 	return 1;
1323 }
1324 
1325 void inet_forward_change(void)
1326 {
1327 	struct net_device *dev;
1328 	int on = IPV4_DEVCONF_ALL(FORWARDING);
1329 
1330 	IPV4_DEVCONF_ALL(ACCEPT_REDIRECTS) = !on;
1331 	IPV4_DEVCONF_DFLT(FORWARDING) = on;
1332 
1333 	read_lock(&dev_base_lock);
1334 	for_each_netdev(dev) {
1335 		struct in_device *in_dev;
1336 		rcu_read_lock();
1337 		in_dev = __in_dev_get_rcu(dev);
1338 		if (in_dev)
1339 			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1340 		rcu_read_unlock();
1341 	}
1342 	read_unlock(&dev_base_lock);
1343 
1344 	rt_cache_flush(0);
1345 }
1346 
1347 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1348 				  struct file* filp, void __user *buffer,
1349 				  size_t *lenp, loff_t *ppos)
1350 {
1351 	int *valp = ctl->data;
1352 	int val = *valp;
1353 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1354 
1355 	if (write && *valp != val) {
1356 		if (valp == &IPV4_DEVCONF_ALL(FORWARDING))
1357 			inet_forward_change();
1358 		else if (valp != &IPV4_DEVCONF_DFLT(FORWARDING))
1359 			rt_cache_flush(0);
1360 	}
1361 
1362 	return ret;
1363 }
1364 
1365 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1366 			 struct file* filp, void __user *buffer,
1367 			 size_t *lenp, loff_t *ppos)
1368 {
1369 	int *valp = ctl->data;
1370 	int val = *valp;
1371 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1372 
1373 	if (write && *valp != val)
1374 		rt_cache_flush(0);
1375 
1376 	return ret;
1377 }
1378 
1379 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1380 				  void __user *oldval, size_t __user *oldlenp,
1381 				  void __user *newval, size_t newlen)
1382 {
1383 	int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
1384 				      newval, newlen);
1385 
1386 	if (ret == 1)
1387 		rt_cache_flush(0);
1388 
1389 	return ret;
1390 }
1391 
1392 
1393 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1394 	{ \
1395 		.ctl_name	= NET_IPV4_CONF_ ## attr, \
1396 		.procname	= name, \
1397 		.data		= ipv4_devconf.data + \
1398 				  NET_IPV4_CONF_ ## attr - 1, \
1399 		.maxlen		= sizeof(int), \
1400 		.mode		= mval, \
1401 		.proc_handler	= proc, \
1402 		.strategy	= sysctl, \
1403 		.extra1		= &ipv4_devconf, \
1404 	}
1405 
1406 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1407 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1408 			     devinet_conf_sysctl)
1409 
1410 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1411 	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1412 			     devinet_conf_sysctl)
1413 
1414 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1415 	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1416 
1417 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1418 	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1419 				     ipv4_doint_and_flush_strategy)
1420 
1421 static struct devinet_sysctl_table {
1422 	struct ctl_table_header *sysctl_header;
1423 	ctl_table		devinet_vars[__NET_IPV4_CONF_MAX];
1424 	ctl_table		devinet_dev[2];
1425 	ctl_table		devinet_conf_dir[2];
1426 	ctl_table		devinet_proto_dir[2];
1427 	ctl_table		devinet_root_dir[2];
1428 } devinet_sysctl = {
1429 	.devinet_vars = {
1430 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1431 					     devinet_sysctl_forward,
1432 					     devinet_conf_sysctl),
1433 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1434 
1435 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1436 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1437 		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1438 		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1439 		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1440 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1441 					"accept_source_route"),
1442 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1443 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1444 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1445 		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1446 		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1447 		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1448 		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1449 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1450 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1451 
1452 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1453 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1454 		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1455 					      "force_igmp_version"),
1456 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1457 					      "promote_secondaries"),
1458 	},
1459 	.devinet_dev = {
1460 		{
1461 			.ctl_name	= NET_PROTO_CONF_ALL,
1462 			.procname	= "all",
1463 			.mode		= 0555,
1464 			.child		= devinet_sysctl.devinet_vars,
1465 		},
1466 	},
1467 	.devinet_conf_dir = {
1468 		{
1469 			.ctl_name	= NET_IPV4_CONF,
1470 			.procname	= "conf",
1471 			.mode		= 0555,
1472 			.child		= devinet_sysctl.devinet_dev,
1473 		},
1474 	},
1475 	.devinet_proto_dir = {
1476 		{
1477 			.ctl_name	= NET_IPV4,
1478 			.procname	= "ipv4",
1479 			.mode		= 0555,
1480 			.child 		= devinet_sysctl.devinet_conf_dir,
1481 		},
1482 	},
1483 	.devinet_root_dir = {
1484 		{
1485 			.ctl_name	= CTL_NET,
1486 			.procname 	= "net",
1487 			.mode		= 0555,
1488 			.child		= devinet_sysctl.devinet_proto_dir,
1489 		},
1490 	},
1491 };
1492 
1493 static void devinet_sysctl_register(struct in_device *in_dev,
1494 				    struct ipv4_devconf *p)
1495 {
1496 	int i;
1497 	struct net_device *dev = in_dev ? in_dev->dev : NULL;
1498 	struct devinet_sysctl_table *t = kmemdup(&devinet_sysctl, sizeof(*t),
1499 						 GFP_KERNEL);
1500 	char *dev_name = NULL;
1501 
1502 	if (!t)
1503 		return;
1504 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1505 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1506 		t->devinet_vars[i].extra1 = p;
1507 	}
1508 
1509 	if (dev) {
1510 		dev_name = dev->name;
1511 		t->devinet_dev[0].ctl_name = dev->ifindex;
1512 	} else {
1513 		dev_name = "default";
1514 		t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
1515 	}
1516 
1517 	/*
1518 	 * Make a copy of dev_name, because '.procname' is regarded as const
1519 	 * by sysctl and we wouldn't want anyone to change it under our feet
1520 	 * (see SIOCSIFNAME).
1521 	 */
1522 	dev_name = kstrdup(dev_name, GFP_KERNEL);
1523 	if (!dev_name)
1524 	    goto free;
1525 
1526 	t->devinet_dev[0].procname    = dev_name;
1527 	t->devinet_dev[0].child	      = t->devinet_vars;
1528 	t->devinet_conf_dir[0].child  = t->devinet_dev;
1529 	t->devinet_proto_dir[0].child = t->devinet_conf_dir;
1530 	t->devinet_root_dir[0].child  = t->devinet_proto_dir;
1531 
1532 	t->sysctl_header = register_sysctl_table(t->devinet_root_dir);
1533 	if (!t->sysctl_header)
1534 	    goto free_procname;
1535 
1536 	p->sysctl = t;
1537 	return;
1538 
1539 	/* error path */
1540  free_procname:
1541 	kfree(dev_name);
1542  free:
1543 	kfree(t);
1544 	return;
1545 }
1546 
1547 static void devinet_sysctl_unregister(struct ipv4_devconf *p)
1548 {
1549 	if (p->sysctl) {
1550 		struct devinet_sysctl_table *t = p->sysctl;
1551 		p->sysctl = NULL;
1552 		unregister_sysctl_table(t->sysctl_header);
1553 		kfree(t->devinet_dev[0].procname);
1554 		kfree(t);
1555 	}
1556 }
1557 #endif
1558 
1559 void __init devinet_init(void)
1560 {
1561 	register_gifconf(PF_INET, inet_gifconf);
1562 	register_netdevice_notifier(&ip_netdev_notifier);
1563 
1564 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1565 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1566 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1567 #ifdef CONFIG_SYSCTL
1568 	devinet_sysctl.sysctl_header =
1569 		register_sysctl_table(devinet_sysctl.devinet_root_dir);
1570 	devinet_sysctl_register(NULL, &ipv4_devconf_dflt);
1571 #endif
1572 }
1573 
1574 EXPORT_SYMBOL(in_dev_finish_destroy);
1575 EXPORT_SYMBOL(inet_select_addr);
1576 EXPORT_SYMBOL(inetdev_by_index);
1577 EXPORT_SYMBOL(register_inetaddr_notifier);
1578 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1579