xref: /linux/net/ipv4/devinet.c (revision cd354f1ae75e6466a7e31b727faede57a1f89ca5)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *	Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  *	Derived from the IP parts of dev.c 1.0.19
12  * 		Authors:	Ross Biro
13  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
15  *
16  *	Additional Authors:
17  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
18  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19  *
20  *	Changes:
21  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
22  *					lists.
23  *		Cyrus Durgin:		updated for kmod
24  *		Matthias Andree:	in devinet_ioctl, compare label and
25  *					address (4.4BSD alias style support),
26  *					fall back to comparing just the label
27  *					if no match found.
28  */
29 
30 
31 #include <asm/uaccess.h>
32 #include <asm/system.h>
33 #include <linux/bitops.h>
34 #include <linux/capability.h>
35 #include <linux/module.h>
36 #include <linux/types.h>
37 #include <linux/kernel.h>
38 #include <linux/string.h>
39 #include <linux/mm.h>
40 #include <linux/socket.h>
41 #include <linux/sockios.h>
42 #include <linux/in.h>
43 #include <linux/errno.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_addr.h>
46 #include <linux/if_ether.h>
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/etherdevice.h>
50 #include <linux/skbuff.h>
51 #include <linux/rtnetlink.h>
52 #include <linux/init.h>
53 #include <linux/notifier.h>
54 #include <linux/inetdevice.h>
55 #include <linux/igmp.h>
56 #ifdef CONFIG_SYSCTL
57 #include <linux/sysctl.h>
58 #endif
59 #include <linux/kmod.h>
60 
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/netlink.h>
66 
67 struct ipv4_devconf ipv4_devconf = {
68 	.accept_redirects = 1,
69 	.send_redirects =  1,
70 	.secure_redirects = 1,
71 	.shared_media =	  1,
72 };
73 
74 static struct ipv4_devconf ipv4_devconf_dflt = {
75 	.accept_redirects =  1,
76 	.send_redirects =    1,
77 	.secure_redirects =  1,
78 	.shared_media =	     1,
79 	.accept_source_route = 1,
80 };
81 
82 static struct nla_policy ifa_ipv4_policy[IFA_MAX+1] __read_mostly = {
83 	[IFA_LOCAL]     	= { .type = NLA_U32 },
84 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
85 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
86 	[IFA_ANYCAST]   	= { .type = NLA_U32 },
87 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
88 };
89 
90 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
91 
92 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
93 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
94 			 int destroy);
95 #ifdef CONFIG_SYSCTL
96 static void devinet_sysctl_register(struct in_device *in_dev,
97 				    struct ipv4_devconf *p);
98 static void devinet_sysctl_unregister(struct ipv4_devconf *p);
99 #endif
100 
101 /* Locks all the inet devices. */
102 
103 static struct in_ifaddr *inet_alloc_ifa(void)
104 {
105 	struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
106 
107 	if (ifa) {
108 		INIT_RCU_HEAD(&ifa->rcu_head);
109 	}
110 
111 	return ifa;
112 }
113 
114 static void inet_rcu_free_ifa(struct rcu_head *head)
115 {
116 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
117 	if (ifa->ifa_dev)
118 		in_dev_put(ifa->ifa_dev);
119 	kfree(ifa);
120 }
121 
122 static inline void inet_free_ifa(struct in_ifaddr *ifa)
123 {
124 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
125 }
126 
127 void in_dev_finish_destroy(struct in_device *idev)
128 {
129 	struct net_device *dev = idev->dev;
130 
131 	BUG_TRAP(!idev->ifa_list);
132 	BUG_TRAP(!idev->mc_list);
133 #ifdef NET_REFCNT_DEBUG
134 	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
135 	       idev, dev ? dev->name : "NIL");
136 #endif
137 	dev_put(dev);
138 	if (!idev->dead)
139 		printk("Freeing alive in_device %p\n", idev);
140 	else {
141 		kfree(idev);
142 	}
143 }
144 
145 struct in_device *inetdev_init(struct net_device *dev)
146 {
147 	struct in_device *in_dev;
148 
149 	ASSERT_RTNL();
150 
151 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
152 	if (!in_dev)
153 		goto out;
154 	INIT_RCU_HEAD(&in_dev->rcu_head);
155 	memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
156 	in_dev->cnf.sysctl = NULL;
157 	in_dev->dev = dev;
158 	if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
159 		goto out_kfree;
160 	/* Reference in_dev->dev */
161 	dev_hold(dev);
162 #ifdef CONFIG_SYSCTL
163 	neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
164 			      NET_IPV4_NEIGH, "ipv4", NULL, NULL);
165 #endif
166 
167 	/* Account for reference dev->ip_ptr (below) */
168 	in_dev_hold(in_dev);
169 
170 #ifdef CONFIG_SYSCTL
171 	devinet_sysctl_register(in_dev, &in_dev->cnf);
172 #endif
173 	ip_mc_init_dev(in_dev);
174 	if (dev->flags & IFF_UP)
175 		ip_mc_up(in_dev);
176 
177 	/* we can receive as soon as ip_ptr is set -- do this last */
178 	rcu_assign_pointer(dev->ip_ptr, in_dev);
179 out:
180 	return in_dev;
181 out_kfree:
182 	kfree(in_dev);
183 	in_dev = NULL;
184 	goto out;
185 }
186 
187 static void in_dev_rcu_put(struct rcu_head *head)
188 {
189 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
190 	in_dev_put(idev);
191 }
192 
193 static void inetdev_destroy(struct in_device *in_dev)
194 {
195 	struct in_ifaddr *ifa;
196 	struct net_device *dev;
197 
198 	ASSERT_RTNL();
199 
200 	dev = in_dev->dev;
201 	if (dev == &loopback_dev)
202 		return;
203 
204 	in_dev->dead = 1;
205 
206 	ip_mc_destroy_dev(in_dev);
207 
208 	while ((ifa = in_dev->ifa_list) != NULL) {
209 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
210 		inet_free_ifa(ifa);
211 	}
212 
213 #ifdef CONFIG_SYSCTL
214 	devinet_sysctl_unregister(&in_dev->cnf);
215 #endif
216 
217 	dev->ip_ptr = NULL;
218 
219 #ifdef CONFIG_SYSCTL
220 	neigh_sysctl_unregister(in_dev->arp_parms);
221 #endif
222 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
223 	arp_ifdown(dev);
224 
225 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
226 }
227 
228 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
229 {
230 	rcu_read_lock();
231 	for_primary_ifa(in_dev) {
232 		if (inet_ifa_match(a, ifa)) {
233 			if (!b || inet_ifa_match(b, ifa)) {
234 				rcu_read_unlock();
235 				return 1;
236 			}
237 		}
238 	} endfor_ifa(in_dev);
239 	rcu_read_unlock();
240 	return 0;
241 }
242 
243 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
244 			 int destroy, struct nlmsghdr *nlh, u32 pid)
245 {
246 	struct in_ifaddr *promote = NULL;
247 	struct in_ifaddr *ifa, *ifa1 = *ifap;
248 	struct in_ifaddr *last_prim = in_dev->ifa_list;
249 	struct in_ifaddr *prev_prom = NULL;
250 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
251 
252 	ASSERT_RTNL();
253 
254 	/* 1. Deleting primary ifaddr forces deletion all secondaries
255 	 * unless alias promotion is set
256 	 **/
257 
258 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
259 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
260 
261 		while ((ifa = *ifap1) != NULL) {
262 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
263 			    ifa1->ifa_scope <= ifa->ifa_scope)
264 				last_prim = ifa;
265 
266 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
267 			    ifa1->ifa_mask != ifa->ifa_mask ||
268 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
269 				ifap1 = &ifa->ifa_next;
270 				prev_prom = ifa;
271 				continue;
272 			}
273 
274 			if (!do_promote) {
275 				*ifap1 = ifa->ifa_next;
276 
277 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
278 				blocking_notifier_call_chain(&inetaddr_chain,
279 						NETDEV_DOWN, ifa);
280 				inet_free_ifa(ifa);
281 			} else {
282 				promote = ifa;
283 				break;
284 			}
285 		}
286 	}
287 
288 	/* 2. Unlink it */
289 
290 	*ifap = ifa1->ifa_next;
291 
292 	/* 3. Announce address deletion */
293 
294 	/* Send message first, then call notifier.
295 	   At first sight, FIB update triggered by notifier
296 	   will refer to already deleted ifaddr, that could confuse
297 	   netlink listeners. It is not true: look, gated sees
298 	   that route deleted and if it still thinks that ifaddr
299 	   is valid, it will try to restore deleted routes... Grr.
300 	   So that, this order is correct.
301 	 */
302 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
303 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
304 
305 	if (promote) {
306 
307 		if (prev_prom) {
308 			prev_prom->ifa_next = promote->ifa_next;
309 			promote->ifa_next = last_prim->ifa_next;
310 			last_prim->ifa_next = promote;
311 		}
312 
313 		promote->ifa_flags &= ~IFA_F_SECONDARY;
314 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
315 		blocking_notifier_call_chain(&inetaddr_chain,
316 				NETDEV_UP, promote);
317 		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
318 			if (ifa1->ifa_mask != ifa->ifa_mask ||
319 			    !inet_ifa_match(ifa1->ifa_address, ifa))
320 					continue;
321 			fib_add_ifaddr(ifa);
322 		}
323 
324 	}
325 	if (destroy) {
326 		inet_free_ifa(ifa1);
327 
328 		if (!in_dev->ifa_list)
329 			inetdev_destroy(in_dev);
330 	}
331 }
332 
333 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
334 			 int destroy)
335 {
336 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
337 }
338 
339 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
340 			     u32 pid)
341 {
342 	struct in_device *in_dev = ifa->ifa_dev;
343 	struct in_ifaddr *ifa1, **ifap, **last_primary;
344 
345 	ASSERT_RTNL();
346 
347 	if (!ifa->ifa_local) {
348 		inet_free_ifa(ifa);
349 		return 0;
350 	}
351 
352 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
353 	last_primary = &in_dev->ifa_list;
354 
355 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
356 	     ifap = &ifa1->ifa_next) {
357 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
358 		    ifa->ifa_scope <= ifa1->ifa_scope)
359 			last_primary = &ifa1->ifa_next;
360 		if (ifa1->ifa_mask == ifa->ifa_mask &&
361 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
362 			if (ifa1->ifa_local == ifa->ifa_local) {
363 				inet_free_ifa(ifa);
364 				return -EEXIST;
365 			}
366 			if (ifa1->ifa_scope != ifa->ifa_scope) {
367 				inet_free_ifa(ifa);
368 				return -EINVAL;
369 			}
370 			ifa->ifa_flags |= IFA_F_SECONDARY;
371 		}
372 	}
373 
374 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
375 		net_srandom(ifa->ifa_local);
376 		ifap = last_primary;
377 	}
378 
379 	ifa->ifa_next = *ifap;
380 	*ifap = ifa;
381 
382 	/* Send message first, then call notifier.
383 	   Notifier will trigger FIB update, so that
384 	   listeners of netlink will know about new ifaddr */
385 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
386 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
387 
388 	return 0;
389 }
390 
391 static int inet_insert_ifa(struct in_ifaddr *ifa)
392 {
393 	return __inet_insert_ifa(ifa, NULL, 0);
394 }
395 
396 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
397 {
398 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
399 
400 	ASSERT_RTNL();
401 
402 	if (!in_dev) {
403 		in_dev = inetdev_init(dev);
404 		if (!in_dev) {
405 			inet_free_ifa(ifa);
406 			return -ENOBUFS;
407 		}
408 	}
409 	if (ifa->ifa_dev != in_dev) {
410 		BUG_TRAP(!ifa->ifa_dev);
411 		in_dev_hold(in_dev);
412 		ifa->ifa_dev = in_dev;
413 	}
414 	if (LOOPBACK(ifa->ifa_local))
415 		ifa->ifa_scope = RT_SCOPE_HOST;
416 	return inet_insert_ifa(ifa);
417 }
418 
419 struct in_device *inetdev_by_index(int ifindex)
420 {
421 	struct net_device *dev;
422 	struct in_device *in_dev = NULL;
423 	read_lock(&dev_base_lock);
424 	dev = __dev_get_by_index(ifindex);
425 	if (dev)
426 		in_dev = in_dev_get(dev);
427 	read_unlock(&dev_base_lock);
428 	return in_dev;
429 }
430 
431 /* Called only from RTNL semaphored context. No locks. */
432 
433 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
434 				    __be32 mask)
435 {
436 	ASSERT_RTNL();
437 
438 	for_primary_ifa(in_dev) {
439 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
440 			return ifa;
441 	} endfor_ifa(in_dev);
442 	return NULL;
443 }
444 
445 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
446 {
447 	struct nlattr *tb[IFA_MAX+1];
448 	struct in_device *in_dev;
449 	struct ifaddrmsg *ifm;
450 	struct in_ifaddr *ifa, **ifap;
451 	int err = -EINVAL;
452 
453 	ASSERT_RTNL();
454 
455 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
456 	if (err < 0)
457 		goto errout;
458 
459 	ifm = nlmsg_data(nlh);
460 	in_dev = inetdev_by_index(ifm->ifa_index);
461 	if (in_dev == NULL) {
462 		err = -ENODEV;
463 		goto errout;
464 	}
465 
466 	__in_dev_put(in_dev);
467 
468 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
469 	     ifap = &ifa->ifa_next) {
470 		if (tb[IFA_LOCAL] &&
471 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
472 			continue;
473 
474 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
475 			continue;
476 
477 		if (tb[IFA_ADDRESS] &&
478 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
479 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
480 			continue;
481 
482 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
483 		return 0;
484 	}
485 
486 	err = -EADDRNOTAVAIL;
487 errout:
488 	return err;
489 }
490 
491 static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
492 {
493 	struct nlattr *tb[IFA_MAX+1];
494 	struct in_ifaddr *ifa;
495 	struct ifaddrmsg *ifm;
496 	struct net_device *dev;
497 	struct in_device *in_dev;
498 	int err = -EINVAL;
499 
500 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
501 	if (err < 0)
502 		goto errout;
503 
504 	ifm = nlmsg_data(nlh);
505 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
506 		goto errout;
507 
508 	dev = __dev_get_by_index(ifm->ifa_index);
509 	if (dev == NULL) {
510 		err = -ENODEV;
511 		goto errout;
512 	}
513 
514 	in_dev = __in_dev_get_rtnl(dev);
515 	if (in_dev == NULL) {
516 		in_dev = inetdev_init(dev);
517 		if (in_dev == NULL) {
518 			err = -ENOBUFS;
519 			goto errout;
520 		}
521 	}
522 
523 	ifa = inet_alloc_ifa();
524 	if (ifa == NULL) {
525 		/*
526 		 * A potential indev allocation can be left alive, it stays
527 		 * assigned to its device and is destroy with it.
528 		 */
529 		err = -ENOBUFS;
530 		goto errout;
531 	}
532 
533 	in_dev_hold(in_dev);
534 
535 	if (tb[IFA_ADDRESS] == NULL)
536 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
537 
538 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
539 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
540 	ifa->ifa_flags = ifm->ifa_flags;
541 	ifa->ifa_scope = ifm->ifa_scope;
542 	ifa->ifa_dev = in_dev;
543 
544 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
545 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
546 
547 	if (tb[IFA_BROADCAST])
548 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
549 
550 	if (tb[IFA_ANYCAST])
551 		ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
552 
553 	if (tb[IFA_LABEL])
554 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
555 	else
556 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
557 
558 	return ifa;
559 
560 errout:
561 	return ERR_PTR(err);
562 }
563 
564 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
565 {
566 	struct in_ifaddr *ifa;
567 
568 	ASSERT_RTNL();
569 
570 	ifa = rtm_to_ifaddr(nlh);
571 	if (IS_ERR(ifa))
572 		return PTR_ERR(ifa);
573 
574 	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
575 }
576 
577 /*
578  *	Determine a default network mask, based on the IP address.
579  */
580 
581 static __inline__ int inet_abc_len(__be32 addr)
582 {
583 	int rc = -1;	/* Something else, probably a multicast. */
584 
585 	if (ZERONET(addr))
586 		rc = 0;
587 	else {
588 		__u32 haddr = ntohl(addr);
589 
590 		if (IN_CLASSA(haddr))
591 			rc = 8;
592 		else if (IN_CLASSB(haddr))
593 			rc = 16;
594 		else if (IN_CLASSC(haddr))
595 			rc = 24;
596 	}
597 
598 	return rc;
599 }
600 
601 
602 int devinet_ioctl(unsigned int cmd, void __user *arg)
603 {
604 	struct ifreq ifr;
605 	struct sockaddr_in sin_orig;
606 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
607 	struct in_device *in_dev;
608 	struct in_ifaddr **ifap = NULL;
609 	struct in_ifaddr *ifa = NULL;
610 	struct net_device *dev;
611 	char *colon;
612 	int ret = -EFAULT;
613 	int tryaddrmatch = 0;
614 
615 	/*
616 	 *	Fetch the caller's info block into kernel space
617 	 */
618 
619 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
620 		goto out;
621 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
622 
623 	/* save original address for comparison */
624 	memcpy(&sin_orig, sin, sizeof(*sin));
625 
626 	colon = strchr(ifr.ifr_name, ':');
627 	if (colon)
628 		*colon = 0;
629 
630 #ifdef CONFIG_KMOD
631 	dev_load(ifr.ifr_name);
632 #endif
633 
634 	switch(cmd) {
635 	case SIOCGIFADDR:	/* Get interface address */
636 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
637 	case SIOCGIFDSTADDR:	/* Get the destination address */
638 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
639 		/* Note that these ioctls will not sleep,
640 		   so that we do not impose a lock.
641 		   One day we will be forced to put shlock here (I mean SMP)
642 		 */
643 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
644 		memset(sin, 0, sizeof(*sin));
645 		sin->sin_family = AF_INET;
646 		break;
647 
648 	case SIOCSIFFLAGS:
649 		ret = -EACCES;
650 		if (!capable(CAP_NET_ADMIN))
651 			goto out;
652 		break;
653 	case SIOCSIFADDR:	/* Set interface address (and family) */
654 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
655 	case SIOCSIFDSTADDR:	/* Set the destination address */
656 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
657 		ret = -EACCES;
658 		if (!capable(CAP_NET_ADMIN))
659 			goto out;
660 		ret = -EINVAL;
661 		if (sin->sin_family != AF_INET)
662 			goto out;
663 		break;
664 	default:
665 		ret = -EINVAL;
666 		goto out;
667 	}
668 
669 	rtnl_lock();
670 
671 	ret = -ENODEV;
672 	if ((dev = __dev_get_by_name(ifr.ifr_name)) == NULL)
673 		goto done;
674 
675 	if (colon)
676 		*colon = ':';
677 
678 	if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
679 		if (tryaddrmatch) {
680 			/* Matthias Andree */
681 			/* compare label and address (4.4BSD style) */
682 			/* note: we only do this for a limited set of ioctls
683 			   and only if the original address family was AF_INET.
684 			   This is checked above. */
685 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
686 			     ifap = &ifa->ifa_next) {
687 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
688 				    sin_orig.sin_addr.s_addr ==
689 							ifa->ifa_address) {
690 					break; /* found */
691 				}
692 			}
693 		}
694 		/* we didn't get a match, maybe the application is
695 		   4.3BSD-style and passed in junk so we fall back to
696 		   comparing just the label */
697 		if (!ifa) {
698 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
699 			     ifap = &ifa->ifa_next)
700 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
701 					break;
702 		}
703 	}
704 
705 	ret = -EADDRNOTAVAIL;
706 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
707 		goto done;
708 
709 	switch(cmd) {
710 	case SIOCGIFADDR:	/* Get interface address */
711 		sin->sin_addr.s_addr = ifa->ifa_local;
712 		goto rarok;
713 
714 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
715 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
716 		goto rarok;
717 
718 	case SIOCGIFDSTADDR:	/* Get the destination address */
719 		sin->sin_addr.s_addr = ifa->ifa_address;
720 		goto rarok;
721 
722 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
723 		sin->sin_addr.s_addr = ifa->ifa_mask;
724 		goto rarok;
725 
726 	case SIOCSIFFLAGS:
727 		if (colon) {
728 			ret = -EADDRNOTAVAIL;
729 			if (!ifa)
730 				break;
731 			ret = 0;
732 			if (!(ifr.ifr_flags & IFF_UP))
733 				inet_del_ifa(in_dev, ifap, 1);
734 			break;
735 		}
736 		ret = dev_change_flags(dev, ifr.ifr_flags);
737 		break;
738 
739 	case SIOCSIFADDR:	/* Set interface address (and family) */
740 		ret = -EINVAL;
741 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
742 			break;
743 
744 		if (!ifa) {
745 			ret = -ENOBUFS;
746 			if ((ifa = inet_alloc_ifa()) == NULL)
747 				break;
748 			if (colon)
749 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
750 			else
751 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
752 		} else {
753 			ret = 0;
754 			if (ifa->ifa_local == sin->sin_addr.s_addr)
755 				break;
756 			inet_del_ifa(in_dev, ifap, 0);
757 			ifa->ifa_broadcast = 0;
758 			ifa->ifa_anycast = 0;
759 		}
760 
761 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
762 
763 		if (!(dev->flags & IFF_POINTOPOINT)) {
764 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
765 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
766 			if ((dev->flags & IFF_BROADCAST) &&
767 			    ifa->ifa_prefixlen < 31)
768 				ifa->ifa_broadcast = ifa->ifa_address |
769 						     ~ifa->ifa_mask;
770 		} else {
771 			ifa->ifa_prefixlen = 32;
772 			ifa->ifa_mask = inet_make_mask(32);
773 		}
774 		ret = inet_set_ifa(dev, ifa);
775 		break;
776 
777 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
778 		ret = 0;
779 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
780 			inet_del_ifa(in_dev, ifap, 0);
781 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
782 			inet_insert_ifa(ifa);
783 		}
784 		break;
785 
786 	case SIOCSIFDSTADDR:	/* Set the destination address */
787 		ret = 0;
788 		if (ifa->ifa_address == sin->sin_addr.s_addr)
789 			break;
790 		ret = -EINVAL;
791 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
792 			break;
793 		ret = 0;
794 		inet_del_ifa(in_dev, ifap, 0);
795 		ifa->ifa_address = sin->sin_addr.s_addr;
796 		inet_insert_ifa(ifa);
797 		break;
798 
799 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
800 
801 		/*
802 		 *	The mask we set must be legal.
803 		 */
804 		ret = -EINVAL;
805 		if (bad_mask(sin->sin_addr.s_addr, 0))
806 			break;
807 		ret = 0;
808 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
809 			__be32 old_mask = ifa->ifa_mask;
810 			inet_del_ifa(in_dev, ifap, 0);
811 			ifa->ifa_mask = sin->sin_addr.s_addr;
812 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
813 
814 			/* See if current broadcast address matches
815 			 * with current netmask, then recalculate
816 			 * the broadcast address. Otherwise it's a
817 			 * funny address, so don't touch it since
818 			 * the user seems to know what (s)he's doing...
819 			 */
820 			if ((dev->flags & IFF_BROADCAST) &&
821 			    (ifa->ifa_prefixlen < 31) &&
822 			    (ifa->ifa_broadcast ==
823 			     (ifa->ifa_local|~old_mask))) {
824 				ifa->ifa_broadcast = (ifa->ifa_local |
825 						      ~sin->sin_addr.s_addr);
826 			}
827 			inet_insert_ifa(ifa);
828 		}
829 		break;
830 	}
831 done:
832 	rtnl_unlock();
833 out:
834 	return ret;
835 rarok:
836 	rtnl_unlock();
837 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
838 	goto out;
839 }
840 
841 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
842 {
843 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
844 	struct in_ifaddr *ifa;
845 	struct ifreq ifr;
846 	int done = 0;
847 
848 	if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
849 		goto out;
850 
851 	for (; ifa; ifa = ifa->ifa_next) {
852 		if (!buf) {
853 			done += sizeof(ifr);
854 			continue;
855 		}
856 		if (len < (int) sizeof(ifr))
857 			break;
858 		memset(&ifr, 0, sizeof(struct ifreq));
859 		if (ifa->ifa_label)
860 			strcpy(ifr.ifr_name, ifa->ifa_label);
861 		else
862 			strcpy(ifr.ifr_name, dev->name);
863 
864 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
865 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
866 								ifa->ifa_local;
867 
868 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
869 			done = -EFAULT;
870 			break;
871 		}
872 		buf  += sizeof(struct ifreq);
873 		len  -= sizeof(struct ifreq);
874 		done += sizeof(struct ifreq);
875 	}
876 out:
877 	return done;
878 }
879 
880 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
881 {
882 	__be32 addr = 0;
883 	struct in_device *in_dev;
884 
885 	rcu_read_lock();
886 	in_dev = __in_dev_get_rcu(dev);
887 	if (!in_dev)
888 		goto no_in_dev;
889 
890 	for_primary_ifa(in_dev) {
891 		if (ifa->ifa_scope > scope)
892 			continue;
893 		if (!dst || inet_ifa_match(dst, ifa)) {
894 			addr = ifa->ifa_local;
895 			break;
896 		}
897 		if (!addr)
898 			addr = ifa->ifa_local;
899 	} endfor_ifa(in_dev);
900 no_in_dev:
901 	rcu_read_unlock();
902 
903 	if (addr)
904 		goto out;
905 
906 	/* Not loopback addresses on loopback should be preferred
907 	   in this case. It is importnat that lo is the first interface
908 	   in dev_base list.
909 	 */
910 	read_lock(&dev_base_lock);
911 	rcu_read_lock();
912 	for (dev = dev_base; dev; dev = dev->next) {
913 		if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
914 			continue;
915 
916 		for_primary_ifa(in_dev) {
917 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
918 			    ifa->ifa_scope <= scope) {
919 				addr = ifa->ifa_local;
920 				goto out_unlock_both;
921 			}
922 		} endfor_ifa(in_dev);
923 	}
924 out_unlock_both:
925 	read_unlock(&dev_base_lock);
926 	rcu_read_unlock();
927 out:
928 	return addr;
929 }
930 
931 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
932 			      __be32 local, int scope)
933 {
934 	int same = 0;
935 	__be32 addr = 0;
936 
937 	for_ifa(in_dev) {
938 		if (!addr &&
939 		    (local == ifa->ifa_local || !local) &&
940 		    ifa->ifa_scope <= scope) {
941 			addr = ifa->ifa_local;
942 			if (same)
943 				break;
944 		}
945 		if (!same) {
946 			same = (!local || inet_ifa_match(local, ifa)) &&
947 				(!dst || inet_ifa_match(dst, ifa));
948 			if (same && addr) {
949 				if (local || !dst)
950 					break;
951 				/* Is the selected addr into dst subnet? */
952 				if (inet_ifa_match(addr, ifa))
953 					break;
954 				/* No, then can we use new local src? */
955 				if (ifa->ifa_scope <= scope) {
956 					addr = ifa->ifa_local;
957 					break;
958 				}
959 				/* search for large dst subnet for addr */
960 				same = 0;
961 			}
962 		}
963 	} endfor_ifa(in_dev);
964 
965 	return same? addr : 0;
966 }
967 
968 /*
969  * Confirm that local IP address exists using wildcards:
970  * - dev: only on this interface, 0=any interface
971  * - dst: only in the same subnet as dst, 0=any dst
972  * - local: address, 0=autoselect the local address
973  * - scope: maximum allowed scope value for the local address
974  */
975 __be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope)
976 {
977 	__be32 addr = 0;
978 	struct in_device *in_dev;
979 
980 	if (dev) {
981 		rcu_read_lock();
982 		if ((in_dev = __in_dev_get_rcu(dev)))
983 			addr = confirm_addr_indev(in_dev, dst, local, scope);
984 		rcu_read_unlock();
985 
986 		return addr;
987 	}
988 
989 	read_lock(&dev_base_lock);
990 	rcu_read_lock();
991 	for (dev = dev_base; dev; dev = dev->next) {
992 		if ((in_dev = __in_dev_get_rcu(dev))) {
993 			addr = confirm_addr_indev(in_dev, dst, local, scope);
994 			if (addr)
995 				break;
996 		}
997 	}
998 	rcu_read_unlock();
999 	read_unlock(&dev_base_lock);
1000 
1001 	return addr;
1002 }
1003 
1004 /*
1005  *	Device notifier
1006  */
1007 
1008 int register_inetaddr_notifier(struct notifier_block *nb)
1009 {
1010 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1011 }
1012 
1013 int unregister_inetaddr_notifier(struct notifier_block *nb)
1014 {
1015 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1016 }
1017 
1018 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1019  * alias numbering and to create unique labels if possible.
1020 */
1021 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1022 {
1023 	struct in_ifaddr *ifa;
1024 	int named = 0;
1025 
1026 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1027 		char old[IFNAMSIZ], *dot;
1028 
1029 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1030 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1031 		if (named++ == 0)
1032 			continue;
1033 		dot = strchr(ifa->ifa_label, ':');
1034 		if (dot == NULL) {
1035 			sprintf(old, ":%d", named);
1036 			dot = old;
1037 		}
1038 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1039 			strcat(ifa->ifa_label, dot);
1040 		} else {
1041 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1042 		}
1043 	}
1044 }
1045 
1046 /* Called only under RTNL semaphore */
1047 
1048 static int inetdev_event(struct notifier_block *this, unsigned long event,
1049 			 void *ptr)
1050 {
1051 	struct net_device *dev = ptr;
1052 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1053 
1054 	ASSERT_RTNL();
1055 
1056 	if (!in_dev) {
1057 		if (event == NETDEV_REGISTER && dev == &loopback_dev) {
1058 			in_dev = inetdev_init(dev);
1059 			if (!in_dev)
1060 				panic("devinet: Failed to create loopback\n");
1061 			in_dev->cnf.no_xfrm = 1;
1062 			in_dev->cnf.no_policy = 1;
1063 		}
1064 		goto out;
1065 	}
1066 
1067 	switch (event) {
1068 	case NETDEV_REGISTER:
1069 		printk(KERN_DEBUG "inetdev_event: bug\n");
1070 		dev->ip_ptr = NULL;
1071 		break;
1072 	case NETDEV_UP:
1073 		if (dev->mtu < 68)
1074 			break;
1075 		if (dev == &loopback_dev) {
1076 			struct in_ifaddr *ifa;
1077 			if ((ifa = inet_alloc_ifa()) != NULL) {
1078 				ifa->ifa_local =
1079 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1080 				ifa->ifa_prefixlen = 8;
1081 				ifa->ifa_mask = inet_make_mask(8);
1082 				in_dev_hold(in_dev);
1083 				ifa->ifa_dev = in_dev;
1084 				ifa->ifa_scope = RT_SCOPE_HOST;
1085 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1086 				inet_insert_ifa(ifa);
1087 			}
1088 		}
1089 		ip_mc_up(in_dev);
1090 		break;
1091 	case NETDEV_DOWN:
1092 		ip_mc_down(in_dev);
1093 		break;
1094 	case NETDEV_CHANGEMTU:
1095 		if (dev->mtu >= 68)
1096 			break;
1097 		/* MTU falled under 68, disable IP */
1098 	case NETDEV_UNREGISTER:
1099 		inetdev_destroy(in_dev);
1100 		break;
1101 	case NETDEV_CHANGENAME:
1102 		/* Do not notify about label change, this event is
1103 		 * not interesting to applications using netlink.
1104 		 */
1105 		inetdev_changename(dev, in_dev);
1106 
1107 #ifdef CONFIG_SYSCTL
1108 		devinet_sysctl_unregister(&in_dev->cnf);
1109 		neigh_sysctl_unregister(in_dev->arp_parms);
1110 		neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
1111 				      NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1112 		devinet_sysctl_register(in_dev, &in_dev->cnf);
1113 #endif
1114 		break;
1115 	}
1116 out:
1117 	return NOTIFY_DONE;
1118 }
1119 
1120 static struct notifier_block ip_netdev_notifier = {
1121 	.notifier_call =inetdev_event,
1122 };
1123 
1124 static inline size_t inet_nlmsg_size(void)
1125 {
1126 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1127 	       + nla_total_size(4) /* IFA_ADDRESS */
1128 	       + nla_total_size(4) /* IFA_LOCAL */
1129 	       + nla_total_size(4) /* IFA_BROADCAST */
1130 	       + nla_total_size(4) /* IFA_ANYCAST */
1131 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1132 }
1133 
1134 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1135 			    u32 pid, u32 seq, int event, unsigned int flags)
1136 {
1137 	struct ifaddrmsg *ifm;
1138 	struct nlmsghdr  *nlh;
1139 
1140 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1141 	if (nlh == NULL)
1142 		return -EMSGSIZE;
1143 
1144 	ifm = nlmsg_data(nlh);
1145 	ifm->ifa_family = AF_INET;
1146 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1147 	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1148 	ifm->ifa_scope = ifa->ifa_scope;
1149 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1150 
1151 	if (ifa->ifa_address)
1152 		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1153 
1154 	if (ifa->ifa_local)
1155 		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1156 
1157 	if (ifa->ifa_broadcast)
1158 		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1159 
1160 	if (ifa->ifa_anycast)
1161 		NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1162 
1163 	if (ifa->ifa_label[0])
1164 		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1165 
1166 	return nlmsg_end(skb, nlh);
1167 
1168 nla_put_failure:
1169 	nlmsg_cancel(skb, nlh);
1170 	return -EMSGSIZE;
1171 }
1172 
1173 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1174 {
1175 	int idx, ip_idx;
1176 	struct net_device *dev;
1177 	struct in_device *in_dev;
1178 	struct in_ifaddr *ifa;
1179 	int s_ip_idx, s_idx = cb->args[0];
1180 
1181 	s_ip_idx = ip_idx = cb->args[1];
1182 	read_lock(&dev_base_lock);
1183 	for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
1184 		if (idx < s_idx)
1185 			continue;
1186 		if (idx > s_idx)
1187 			s_ip_idx = 0;
1188 		rcu_read_lock();
1189 		if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
1190 			rcu_read_unlock();
1191 			continue;
1192 		}
1193 
1194 		for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1195 		     ifa = ifa->ifa_next, ip_idx++) {
1196 			if (ip_idx < s_ip_idx)
1197 				continue;
1198 			if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1199 					     cb->nlh->nlmsg_seq,
1200 					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1201 				rcu_read_unlock();
1202 				goto done;
1203 			}
1204 		}
1205 		rcu_read_unlock();
1206 	}
1207 
1208 done:
1209 	read_unlock(&dev_base_lock);
1210 	cb->args[0] = idx;
1211 	cb->args[1] = ip_idx;
1212 
1213 	return skb->len;
1214 }
1215 
1216 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1217 		      u32 pid)
1218 {
1219 	struct sk_buff *skb;
1220 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1221 	int err = -ENOBUFS;
1222 
1223 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1224 	if (skb == NULL)
1225 		goto errout;
1226 
1227 	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1228 	if (err < 0) {
1229 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1230 		WARN_ON(err == -EMSGSIZE);
1231 		kfree_skb(skb);
1232 		goto errout;
1233 	}
1234 	err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1235 errout:
1236 	if (err < 0)
1237 		rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err);
1238 }
1239 
1240 static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = {
1241 	[RTM_NEWADDR  - RTM_BASE] = { .doit	= inet_rtm_newaddr,	},
1242 	[RTM_DELADDR  - RTM_BASE] = { .doit	= inet_rtm_deladdr,	},
1243 	[RTM_GETADDR  - RTM_BASE] = { .dumpit	= inet_dump_ifaddr,	},
1244 	[RTM_NEWROUTE - RTM_BASE] = { .doit	= inet_rtm_newroute,	},
1245 	[RTM_DELROUTE - RTM_BASE] = { .doit	= inet_rtm_delroute,	},
1246 	[RTM_GETROUTE - RTM_BASE] = { .doit	= inet_rtm_getroute,
1247 				      .dumpit	= inet_dump_fib,	},
1248 #ifdef CONFIG_IP_MULTIPLE_TABLES
1249 	[RTM_GETRULE  - RTM_BASE] = { .dumpit	= fib4_rules_dump,	},
1250 #endif
1251 };
1252 
1253 #ifdef CONFIG_SYSCTL
1254 
1255 void inet_forward_change(void)
1256 {
1257 	struct net_device *dev;
1258 	int on = ipv4_devconf.forwarding;
1259 
1260 	ipv4_devconf.accept_redirects = !on;
1261 	ipv4_devconf_dflt.forwarding = on;
1262 
1263 	read_lock(&dev_base_lock);
1264 	for (dev = dev_base; dev; dev = dev->next) {
1265 		struct in_device *in_dev;
1266 		rcu_read_lock();
1267 		in_dev = __in_dev_get_rcu(dev);
1268 		if (in_dev)
1269 			in_dev->cnf.forwarding = on;
1270 		rcu_read_unlock();
1271 	}
1272 	read_unlock(&dev_base_lock);
1273 
1274 	rt_cache_flush(0);
1275 }
1276 
1277 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1278 				  struct file* filp, void __user *buffer,
1279 				  size_t *lenp, loff_t *ppos)
1280 {
1281 	int *valp = ctl->data;
1282 	int val = *valp;
1283 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1284 
1285 	if (write && *valp != val) {
1286 		if (valp == &ipv4_devconf.forwarding)
1287 			inet_forward_change();
1288 		else if (valp != &ipv4_devconf_dflt.forwarding)
1289 			rt_cache_flush(0);
1290 	}
1291 
1292 	return ret;
1293 }
1294 
1295 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1296 			 struct file* filp, void __user *buffer,
1297 			 size_t *lenp, loff_t *ppos)
1298 {
1299 	int *valp = ctl->data;
1300 	int val = *valp;
1301 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1302 
1303 	if (write && *valp != val)
1304 		rt_cache_flush(0);
1305 
1306 	return ret;
1307 }
1308 
1309 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1310 				  void __user *oldval, size_t __user *oldlenp,
1311 				  void __user *newval, size_t newlen)
1312 {
1313 	int *valp = table->data;
1314 	int new;
1315 
1316 	if (!newval || !newlen)
1317 		return 0;
1318 
1319 	if (newlen != sizeof(int))
1320 		return -EINVAL;
1321 
1322 	if (get_user(new, (int __user *)newval))
1323 		return -EFAULT;
1324 
1325 	if (new == *valp)
1326 		return 0;
1327 
1328 	if (oldval && oldlenp) {
1329 		size_t len;
1330 
1331 		if (get_user(len, oldlenp))
1332 			return -EFAULT;
1333 
1334 		if (len) {
1335 			if (len > table->maxlen)
1336 				len = table->maxlen;
1337 			if (copy_to_user(oldval, valp, len))
1338 				return -EFAULT;
1339 			if (put_user(len, oldlenp))
1340 				return -EFAULT;
1341 		}
1342 	}
1343 
1344 	*valp = new;
1345 	rt_cache_flush(0);
1346 	return 1;
1347 }
1348 
1349 
1350 static struct devinet_sysctl_table {
1351 	struct ctl_table_header *sysctl_header;
1352 	ctl_table		devinet_vars[__NET_IPV4_CONF_MAX];
1353 	ctl_table		devinet_dev[2];
1354 	ctl_table		devinet_conf_dir[2];
1355 	ctl_table		devinet_proto_dir[2];
1356 	ctl_table		devinet_root_dir[2];
1357 } devinet_sysctl = {
1358 	.devinet_vars = {
1359 		{
1360 			.ctl_name	= NET_IPV4_CONF_FORWARDING,
1361 			.procname	= "forwarding",
1362 			.data		= &ipv4_devconf.forwarding,
1363 			.maxlen		= sizeof(int),
1364 			.mode		= 0644,
1365 			.proc_handler	= &devinet_sysctl_forward,
1366 		},
1367 		{
1368 			.ctl_name	= NET_IPV4_CONF_MC_FORWARDING,
1369 			.procname	= "mc_forwarding",
1370 			.data		= &ipv4_devconf.mc_forwarding,
1371 			.maxlen		= sizeof(int),
1372 			.mode		= 0444,
1373 			.proc_handler	= &proc_dointvec,
1374 		},
1375 		{
1376 			.ctl_name	= NET_IPV4_CONF_ACCEPT_REDIRECTS,
1377 			.procname	= "accept_redirects",
1378 			.data		= &ipv4_devconf.accept_redirects,
1379 			.maxlen		= sizeof(int),
1380 			.mode		= 0644,
1381 			.proc_handler	= &proc_dointvec,
1382 		},
1383 		{
1384 			.ctl_name	= NET_IPV4_CONF_SECURE_REDIRECTS,
1385 			.procname	= "secure_redirects",
1386 			.data		= &ipv4_devconf.secure_redirects,
1387 			.maxlen		= sizeof(int),
1388 			.mode		= 0644,
1389 			.proc_handler	= &proc_dointvec,
1390 		},
1391 		{
1392 			.ctl_name	= NET_IPV4_CONF_SHARED_MEDIA,
1393 			.procname	= "shared_media",
1394 			.data		= &ipv4_devconf.shared_media,
1395 			.maxlen		= sizeof(int),
1396 			.mode		= 0644,
1397 			.proc_handler	= &proc_dointvec,
1398 		},
1399 		{
1400 			.ctl_name	= NET_IPV4_CONF_RP_FILTER,
1401 			.procname	= "rp_filter",
1402 			.data		= &ipv4_devconf.rp_filter,
1403 			.maxlen		= sizeof(int),
1404 			.mode		= 0644,
1405 			.proc_handler	= &proc_dointvec,
1406 		},
1407 		{
1408 			.ctl_name	= NET_IPV4_CONF_SEND_REDIRECTS,
1409 			.procname	= "send_redirects",
1410 			.data		= &ipv4_devconf.send_redirects,
1411 			.maxlen		= sizeof(int),
1412 			.mode		= 0644,
1413 			.proc_handler	= &proc_dointvec,
1414 		},
1415 		{
1416 			.ctl_name	= NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE,
1417 			.procname	= "accept_source_route",
1418 			.data		= &ipv4_devconf.accept_source_route,
1419 			.maxlen		= sizeof(int),
1420 			.mode		= 0644,
1421 			.proc_handler	= &proc_dointvec,
1422 		},
1423 		{
1424 			.ctl_name	= NET_IPV4_CONF_PROXY_ARP,
1425 			.procname	= "proxy_arp",
1426 			.data		= &ipv4_devconf.proxy_arp,
1427 			.maxlen		= sizeof(int),
1428 			.mode		= 0644,
1429 			.proc_handler	= &proc_dointvec,
1430 		},
1431 		{
1432 			.ctl_name	= NET_IPV4_CONF_MEDIUM_ID,
1433 			.procname	= "medium_id",
1434 			.data		= &ipv4_devconf.medium_id,
1435 			.maxlen		= sizeof(int),
1436 			.mode		= 0644,
1437 			.proc_handler	= &proc_dointvec,
1438 		},
1439 		{
1440 			.ctl_name	= NET_IPV4_CONF_BOOTP_RELAY,
1441 			.procname	= "bootp_relay",
1442 			.data		= &ipv4_devconf.bootp_relay,
1443 			.maxlen		= sizeof(int),
1444 			.mode		= 0644,
1445 			.proc_handler	= &proc_dointvec,
1446 		},
1447 		{
1448 			.ctl_name	= NET_IPV4_CONF_LOG_MARTIANS,
1449 			.procname	= "log_martians",
1450 			.data		= &ipv4_devconf.log_martians,
1451 			.maxlen		= sizeof(int),
1452 			.mode		= 0644,
1453 			.proc_handler	= &proc_dointvec,
1454 		},
1455 		{
1456 			.ctl_name	= NET_IPV4_CONF_TAG,
1457 			.procname	= "tag",
1458 			.data		= &ipv4_devconf.tag,
1459 			.maxlen		= sizeof(int),
1460 			.mode		= 0644,
1461 			.proc_handler	= &proc_dointvec,
1462 		},
1463 		{
1464 			.ctl_name	= NET_IPV4_CONF_ARPFILTER,
1465 			.procname	= "arp_filter",
1466 			.data		= &ipv4_devconf.arp_filter,
1467 			.maxlen		= sizeof(int),
1468 			.mode		= 0644,
1469 			.proc_handler	= &proc_dointvec,
1470 		},
1471 		{
1472 			.ctl_name	= NET_IPV4_CONF_ARP_ANNOUNCE,
1473 			.procname	= "arp_announce",
1474 			.data		= &ipv4_devconf.arp_announce,
1475 			.maxlen		= sizeof(int),
1476 			.mode		= 0644,
1477 			.proc_handler	= &proc_dointvec,
1478 		},
1479 		{
1480 			.ctl_name	= NET_IPV4_CONF_ARP_IGNORE,
1481 			.procname	= "arp_ignore",
1482 			.data		= &ipv4_devconf.arp_ignore,
1483 			.maxlen		= sizeof(int),
1484 			.mode		= 0644,
1485 			.proc_handler	= &proc_dointvec,
1486 		},
1487 		{
1488 			.ctl_name	= NET_IPV4_CONF_ARP_ACCEPT,
1489 			.procname	= "arp_accept",
1490 			.data		= &ipv4_devconf.arp_accept,
1491 			.maxlen		= sizeof(int),
1492 			.mode		= 0644,
1493 			.proc_handler	= &proc_dointvec,
1494 		},
1495 		{
1496 			.ctl_name	= NET_IPV4_CONF_NOXFRM,
1497 			.procname	= "disable_xfrm",
1498 			.data		= &ipv4_devconf.no_xfrm,
1499 			.maxlen		= sizeof(int),
1500 			.mode		= 0644,
1501 			.proc_handler	= &ipv4_doint_and_flush,
1502 			.strategy	= &ipv4_doint_and_flush_strategy,
1503 		},
1504 		{
1505 			.ctl_name	= NET_IPV4_CONF_NOPOLICY,
1506 			.procname	= "disable_policy",
1507 			.data		= &ipv4_devconf.no_policy,
1508 			.maxlen		= sizeof(int),
1509 			.mode		= 0644,
1510 			.proc_handler	= &ipv4_doint_and_flush,
1511 			.strategy	= &ipv4_doint_and_flush_strategy,
1512 		},
1513 		{
1514 			.ctl_name	= NET_IPV4_CONF_FORCE_IGMP_VERSION,
1515 			.procname	= "force_igmp_version",
1516 			.data		= &ipv4_devconf.force_igmp_version,
1517 			.maxlen		= sizeof(int),
1518 			.mode		= 0644,
1519 			.proc_handler	= &ipv4_doint_and_flush,
1520 			.strategy	= &ipv4_doint_and_flush_strategy,
1521 		},
1522 		{
1523 			.ctl_name	= NET_IPV4_CONF_PROMOTE_SECONDARIES,
1524 			.procname	= "promote_secondaries",
1525 			.data		= &ipv4_devconf.promote_secondaries,
1526 			.maxlen		= sizeof(int),
1527 			.mode		= 0644,
1528 			.proc_handler	= &ipv4_doint_and_flush,
1529 			.strategy	= &ipv4_doint_and_flush_strategy,
1530 		},
1531 	},
1532 	.devinet_dev = {
1533 		{
1534 			.ctl_name	= NET_PROTO_CONF_ALL,
1535 			.procname	= "all",
1536 			.mode		= 0555,
1537 			.child		= devinet_sysctl.devinet_vars,
1538 		},
1539 	},
1540 	.devinet_conf_dir = {
1541 		{
1542 			.ctl_name	= NET_IPV4_CONF,
1543 			.procname	= "conf",
1544 			.mode		= 0555,
1545 			.child		= devinet_sysctl.devinet_dev,
1546 		},
1547 	},
1548 	.devinet_proto_dir = {
1549 		{
1550 			.ctl_name	= NET_IPV4,
1551 			.procname	= "ipv4",
1552 			.mode		= 0555,
1553 			.child 		= devinet_sysctl.devinet_conf_dir,
1554 		},
1555 	},
1556 	.devinet_root_dir = {
1557 		{
1558 			.ctl_name	= CTL_NET,
1559 			.procname 	= "net",
1560 			.mode		= 0555,
1561 			.child		= devinet_sysctl.devinet_proto_dir,
1562 		},
1563 	},
1564 };
1565 
1566 static void devinet_sysctl_register(struct in_device *in_dev,
1567 				    struct ipv4_devconf *p)
1568 {
1569 	int i;
1570 	struct net_device *dev = in_dev ? in_dev->dev : NULL;
1571 	struct devinet_sysctl_table *t = kmemdup(&devinet_sysctl, sizeof(*t),
1572 						 GFP_KERNEL);
1573 	char *dev_name = NULL;
1574 
1575 	if (!t)
1576 		return;
1577 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1578 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1579 		t->devinet_vars[i].de = NULL;
1580 	}
1581 
1582 	if (dev) {
1583 		dev_name = dev->name;
1584 		t->devinet_dev[0].ctl_name = dev->ifindex;
1585 	} else {
1586 		dev_name = "default";
1587 		t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
1588 	}
1589 
1590 	/*
1591 	 * Make a copy of dev_name, because '.procname' is regarded as const
1592 	 * by sysctl and we wouldn't want anyone to change it under our feet
1593 	 * (see SIOCSIFNAME).
1594 	 */
1595 	dev_name = kstrdup(dev_name, GFP_KERNEL);
1596 	if (!dev_name)
1597 	    goto free;
1598 
1599 	t->devinet_dev[0].procname    = dev_name;
1600 	t->devinet_dev[0].child	      = t->devinet_vars;
1601 	t->devinet_dev[0].de	      = NULL;
1602 	t->devinet_conf_dir[0].child  = t->devinet_dev;
1603 	t->devinet_conf_dir[0].de     = NULL;
1604 	t->devinet_proto_dir[0].child = t->devinet_conf_dir;
1605 	t->devinet_proto_dir[0].de    = NULL;
1606 	t->devinet_root_dir[0].child  = t->devinet_proto_dir;
1607 	t->devinet_root_dir[0].de     = NULL;
1608 
1609 	t->sysctl_header = register_sysctl_table(t->devinet_root_dir, 0);
1610 	if (!t->sysctl_header)
1611 	    goto free_procname;
1612 
1613 	p->sysctl = t;
1614 	return;
1615 
1616 	/* error path */
1617  free_procname:
1618 	kfree(dev_name);
1619  free:
1620 	kfree(t);
1621 	return;
1622 }
1623 
1624 static void devinet_sysctl_unregister(struct ipv4_devconf *p)
1625 {
1626 	if (p->sysctl) {
1627 		struct devinet_sysctl_table *t = p->sysctl;
1628 		p->sysctl = NULL;
1629 		unregister_sysctl_table(t->sysctl_header);
1630 		kfree(t->devinet_dev[0].procname);
1631 		kfree(t);
1632 	}
1633 }
1634 #endif
1635 
1636 void __init devinet_init(void)
1637 {
1638 	register_gifconf(PF_INET, inet_gifconf);
1639 	register_netdevice_notifier(&ip_netdev_notifier);
1640 	rtnetlink_links[PF_INET] = inet_rtnetlink_table;
1641 #ifdef CONFIG_SYSCTL
1642 	devinet_sysctl.sysctl_header =
1643 		register_sysctl_table(devinet_sysctl.devinet_root_dir, 0);
1644 	devinet_sysctl_register(NULL, &ipv4_devconf_dflt);
1645 #endif
1646 }
1647 
1648 EXPORT_SYMBOL(in_dev_finish_destroy);
1649 EXPORT_SYMBOL(inet_select_addr);
1650 EXPORT_SYMBOL(inetdev_by_index);
1651 EXPORT_SYMBOL(register_inetaddr_notifier);
1652 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1653