xref: /linux/net/ipv4/devinet.c (revision c537b994505099b7197e7d3125b942ecbcc51eb6)
1 /*
2  *	NET3	IP device support routines.
3  *
4  *	Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  *	Derived from the IP parts of dev.c 1.0.19
12  * 		Authors:	Ross Biro
13  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
15  *
16  *	Additional Authors:
17  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
18  *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19  *
20  *	Changes:
21  *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
22  *					lists.
23  *		Cyrus Durgin:		updated for kmod
24  *		Matthias Andree:	in devinet_ioctl, compare label and
25  *					address (4.4BSD alias style support),
26  *					fall back to comparing just the label
27  *					if no match found.
28  */
29 
30 
31 #include <asm/uaccess.h>
32 #include <asm/system.h>
33 #include <linux/bitops.h>
34 #include <linux/capability.h>
35 #include <linux/module.h>
36 #include <linux/types.h>
37 #include <linux/kernel.h>
38 #include <linux/string.h>
39 #include <linux/mm.h>
40 #include <linux/socket.h>
41 #include <linux/sockios.h>
42 #include <linux/in.h>
43 #include <linux/errno.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_addr.h>
46 #include <linux/if_ether.h>
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/etherdevice.h>
50 #include <linux/skbuff.h>
51 #include <linux/rtnetlink.h>
52 #include <linux/init.h>
53 #include <linux/notifier.h>
54 #include <linux/inetdevice.h>
55 #include <linux/igmp.h>
56 #ifdef CONFIG_SYSCTL
57 #include <linux/sysctl.h>
58 #endif
59 #include <linux/kmod.h>
60 
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/netlink.h>
66 
67 struct ipv4_devconf ipv4_devconf = {
68 	.accept_redirects = 1,
69 	.send_redirects =  1,
70 	.secure_redirects = 1,
71 	.shared_media =	  1,
72 };
73 
74 static struct ipv4_devconf ipv4_devconf_dflt = {
75 	.accept_redirects =  1,
76 	.send_redirects =    1,
77 	.secure_redirects =  1,
78 	.shared_media =	     1,
79 	.accept_source_route = 1,
80 };
81 
82 static struct nla_policy ifa_ipv4_policy[IFA_MAX+1] __read_mostly = {
83 	[IFA_LOCAL]     	= { .type = NLA_U32 },
84 	[IFA_ADDRESS]   	= { .type = NLA_U32 },
85 	[IFA_BROADCAST] 	= { .type = NLA_U32 },
86 	[IFA_ANYCAST]   	= { .type = NLA_U32 },
87 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
88 };
89 
90 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
91 
92 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
93 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
94 			 int destroy);
95 #ifdef CONFIG_SYSCTL
96 static void devinet_sysctl_register(struct in_device *in_dev,
97 				    struct ipv4_devconf *p);
98 static void devinet_sysctl_unregister(struct ipv4_devconf *p);
99 #endif
100 
101 /* Locks all the inet devices. */
102 
103 static struct in_ifaddr *inet_alloc_ifa(void)
104 {
105 	struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
106 
107 	if (ifa) {
108 		INIT_RCU_HEAD(&ifa->rcu_head);
109 	}
110 
111 	return ifa;
112 }
113 
114 static void inet_rcu_free_ifa(struct rcu_head *head)
115 {
116 	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
117 	if (ifa->ifa_dev)
118 		in_dev_put(ifa->ifa_dev);
119 	kfree(ifa);
120 }
121 
122 static inline void inet_free_ifa(struct in_ifaddr *ifa)
123 {
124 	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
125 }
126 
127 void in_dev_finish_destroy(struct in_device *idev)
128 {
129 	struct net_device *dev = idev->dev;
130 
131 	BUG_TRAP(!idev->ifa_list);
132 	BUG_TRAP(!idev->mc_list);
133 #ifdef NET_REFCNT_DEBUG
134 	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
135 	       idev, dev ? dev->name : "NIL");
136 #endif
137 	dev_put(dev);
138 	if (!idev->dead)
139 		printk("Freeing alive in_device %p\n", idev);
140 	else {
141 		kfree(idev);
142 	}
143 }
144 
145 struct in_device *inetdev_init(struct net_device *dev)
146 {
147 	struct in_device *in_dev;
148 
149 	ASSERT_RTNL();
150 
151 	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
152 	if (!in_dev)
153 		goto out;
154 	INIT_RCU_HEAD(&in_dev->rcu_head);
155 	memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
156 	in_dev->cnf.sysctl = NULL;
157 	in_dev->dev = dev;
158 	if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
159 		goto out_kfree;
160 	/* Reference in_dev->dev */
161 	dev_hold(dev);
162 #ifdef CONFIG_SYSCTL
163 	neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
164 			      NET_IPV4_NEIGH, "ipv4", NULL, NULL);
165 #endif
166 
167 	/* Account for reference dev->ip_ptr (below) */
168 	in_dev_hold(in_dev);
169 
170 #ifdef CONFIG_SYSCTL
171 	devinet_sysctl_register(in_dev, &in_dev->cnf);
172 #endif
173 	ip_mc_init_dev(in_dev);
174 	if (dev->flags & IFF_UP)
175 		ip_mc_up(in_dev);
176 
177 	/* we can receive as soon as ip_ptr is set -- do this last */
178 	rcu_assign_pointer(dev->ip_ptr, in_dev);
179 out:
180 	return in_dev;
181 out_kfree:
182 	kfree(in_dev);
183 	in_dev = NULL;
184 	goto out;
185 }
186 
187 static void in_dev_rcu_put(struct rcu_head *head)
188 {
189 	struct in_device *idev = container_of(head, struct in_device, rcu_head);
190 	in_dev_put(idev);
191 }
192 
193 static void inetdev_destroy(struct in_device *in_dev)
194 {
195 	struct in_ifaddr *ifa;
196 	struct net_device *dev;
197 
198 	ASSERT_RTNL();
199 
200 	dev = in_dev->dev;
201 	if (dev == &loopback_dev)
202 		return;
203 
204 	in_dev->dead = 1;
205 
206 	ip_mc_destroy_dev(in_dev);
207 
208 	while ((ifa = in_dev->ifa_list) != NULL) {
209 		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
210 		inet_free_ifa(ifa);
211 	}
212 
213 #ifdef CONFIG_SYSCTL
214 	devinet_sysctl_unregister(&in_dev->cnf);
215 #endif
216 
217 	dev->ip_ptr = NULL;
218 
219 #ifdef CONFIG_SYSCTL
220 	neigh_sysctl_unregister(in_dev->arp_parms);
221 #endif
222 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
223 	arp_ifdown(dev);
224 
225 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
226 }
227 
228 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
229 {
230 	rcu_read_lock();
231 	for_primary_ifa(in_dev) {
232 		if (inet_ifa_match(a, ifa)) {
233 			if (!b || inet_ifa_match(b, ifa)) {
234 				rcu_read_unlock();
235 				return 1;
236 			}
237 		}
238 	} endfor_ifa(in_dev);
239 	rcu_read_unlock();
240 	return 0;
241 }
242 
243 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
244 			 int destroy, struct nlmsghdr *nlh, u32 pid)
245 {
246 	struct in_ifaddr *promote = NULL;
247 	struct in_ifaddr *ifa, *ifa1 = *ifap;
248 	struct in_ifaddr *last_prim = in_dev->ifa_list;
249 	struct in_ifaddr *prev_prom = NULL;
250 	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
251 
252 	ASSERT_RTNL();
253 
254 	/* 1. Deleting primary ifaddr forces deletion all secondaries
255 	 * unless alias promotion is set
256 	 **/
257 
258 	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
259 		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
260 
261 		while ((ifa = *ifap1) != NULL) {
262 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
263 			    ifa1->ifa_scope <= ifa->ifa_scope)
264 				last_prim = ifa;
265 
266 			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
267 			    ifa1->ifa_mask != ifa->ifa_mask ||
268 			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
269 				ifap1 = &ifa->ifa_next;
270 				prev_prom = ifa;
271 				continue;
272 			}
273 
274 			if (!do_promote) {
275 				*ifap1 = ifa->ifa_next;
276 
277 				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
278 				blocking_notifier_call_chain(&inetaddr_chain,
279 						NETDEV_DOWN, ifa);
280 				inet_free_ifa(ifa);
281 			} else {
282 				promote = ifa;
283 				break;
284 			}
285 		}
286 	}
287 
288 	/* 2. Unlink it */
289 
290 	*ifap = ifa1->ifa_next;
291 
292 	/* 3. Announce address deletion */
293 
294 	/* Send message first, then call notifier.
295 	   At first sight, FIB update triggered by notifier
296 	   will refer to already deleted ifaddr, that could confuse
297 	   netlink listeners. It is not true: look, gated sees
298 	   that route deleted and if it still thinks that ifaddr
299 	   is valid, it will try to restore deleted routes... Grr.
300 	   So that, this order is correct.
301 	 */
302 	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
303 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
304 
305 	if (promote) {
306 
307 		if (prev_prom) {
308 			prev_prom->ifa_next = promote->ifa_next;
309 			promote->ifa_next = last_prim->ifa_next;
310 			last_prim->ifa_next = promote;
311 		}
312 
313 		promote->ifa_flags &= ~IFA_F_SECONDARY;
314 		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
315 		blocking_notifier_call_chain(&inetaddr_chain,
316 				NETDEV_UP, promote);
317 		for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
318 			if (ifa1->ifa_mask != ifa->ifa_mask ||
319 			    !inet_ifa_match(ifa1->ifa_address, ifa))
320 					continue;
321 			fib_add_ifaddr(ifa);
322 		}
323 
324 	}
325 	if (destroy) {
326 		inet_free_ifa(ifa1);
327 
328 		if (!in_dev->ifa_list)
329 			inetdev_destroy(in_dev);
330 	}
331 }
332 
333 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
334 			 int destroy)
335 {
336 	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
337 }
338 
339 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
340 			     u32 pid)
341 {
342 	struct in_device *in_dev = ifa->ifa_dev;
343 	struct in_ifaddr *ifa1, **ifap, **last_primary;
344 
345 	ASSERT_RTNL();
346 
347 	if (!ifa->ifa_local) {
348 		inet_free_ifa(ifa);
349 		return 0;
350 	}
351 
352 	ifa->ifa_flags &= ~IFA_F_SECONDARY;
353 	last_primary = &in_dev->ifa_list;
354 
355 	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
356 	     ifap = &ifa1->ifa_next) {
357 		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
358 		    ifa->ifa_scope <= ifa1->ifa_scope)
359 			last_primary = &ifa1->ifa_next;
360 		if (ifa1->ifa_mask == ifa->ifa_mask &&
361 		    inet_ifa_match(ifa1->ifa_address, ifa)) {
362 			if (ifa1->ifa_local == ifa->ifa_local) {
363 				inet_free_ifa(ifa);
364 				return -EEXIST;
365 			}
366 			if (ifa1->ifa_scope != ifa->ifa_scope) {
367 				inet_free_ifa(ifa);
368 				return -EINVAL;
369 			}
370 			ifa->ifa_flags |= IFA_F_SECONDARY;
371 		}
372 	}
373 
374 	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
375 		net_srandom(ifa->ifa_local);
376 		ifap = last_primary;
377 	}
378 
379 	ifa->ifa_next = *ifap;
380 	*ifap = ifa;
381 
382 	/* Send message first, then call notifier.
383 	   Notifier will trigger FIB update, so that
384 	   listeners of netlink will know about new ifaddr */
385 	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
386 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
387 
388 	return 0;
389 }
390 
391 static int inet_insert_ifa(struct in_ifaddr *ifa)
392 {
393 	return __inet_insert_ifa(ifa, NULL, 0);
394 }
395 
396 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
397 {
398 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
399 
400 	ASSERT_RTNL();
401 
402 	if (!in_dev) {
403 		in_dev = inetdev_init(dev);
404 		if (!in_dev) {
405 			inet_free_ifa(ifa);
406 			return -ENOBUFS;
407 		}
408 	}
409 	if (ifa->ifa_dev != in_dev) {
410 		BUG_TRAP(!ifa->ifa_dev);
411 		in_dev_hold(in_dev);
412 		ifa->ifa_dev = in_dev;
413 	}
414 	if (LOOPBACK(ifa->ifa_local))
415 		ifa->ifa_scope = RT_SCOPE_HOST;
416 	return inet_insert_ifa(ifa);
417 }
418 
419 struct in_device *inetdev_by_index(int ifindex)
420 {
421 	struct net_device *dev;
422 	struct in_device *in_dev = NULL;
423 	read_lock(&dev_base_lock);
424 	dev = __dev_get_by_index(ifindex);
425 	if (dev)
426 		in_dev = in_dev_get(dev);
427 	read_unlock(&dev_base_lock);
428 	return in_dev;
429 }
430 
431 /* Called only from RTNL semaphored context. No locks. */
432 
433 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
434 				    __be32 mask)
435 {
436 	ASSERT_RTNL();
437 
438 	for_primary_ifa(in_dev) {
439 		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
440 			return ifa;
441 	} endfor_ifa(in_dev);
442 	return NULL;
443 }
444 
445 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
446 {
447 	struct nlattr *tb[IFA_MAX+1];
448 	struct in_device *in_dev;
449 	struct ifaddrmsg *ifm;
450 	struct in_ifaddr *ifa, **ifap;
451 	int err = -EINVAL;
452 
453 	ASSERT_RTNL();
454 
455 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
456 	if (err < 0)
457 		goto errout;
458 
459 	ifm = nlmsg_data(nlh);
460 	in_dev = inetdev_by_index(ifm->ifa_index);
461 	if (in_dev == NULL) {
462 		err = -ENODEV;
463 		goto errout;
464 	}
465 
466 	__in_dev_put(in_dev);
467 
468 	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
469 	     ifap = &ifa->ifa_next) {
470 		if (tb[IFA_LOCAL] &&
471 		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
472 			continue;
473 
474 		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
475 			continue;
476 
477 		if (tb[IFA_ADDRESS] &&
478 		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
479 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
480 			continue;
481 
482 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
483 		return 0;
484 	}
485 
486 	err = -EADDRNOTAVAIL;
487 errout:
488 	return err;
489 }
490 
491 static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
492 {
493 	struct nlattr *tb[IFA_MAX+1];
494 	struct in_ifaddr *ifa;
495 	struct ifaddrmsg *ifm;
496 	struct net_device *dev;
497 	struct in_device *in_dev;
498 	int err = -EINVAL;
499 
500 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
501 	if (err < 0)
502 		goto errout;
503 
504 	ifm = nlmsg_data(nlh);
505 	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) {
506 		err = -EINVAL;
507 		goto errout;
508 	}
509 
510 	dev = __dev_get_by_index(ifm->ifa_index);
511 	if (dev == NULL) {
512 		err = -ENODEV;
513 		goto errout;
514 	}
515 
516 	in_dev = __in_dev_get_rtnl(dev);
517 	if (in_dev == NULL) {
518 		in_dev = inetdev_init(dev);
519 		if (in_dev == NULL) {
520 			err = -ENOBUFS;
521 			goto errout;
522 		}
523 	}
524 
525 	ifa = inet_alloc_ifa();
526 	if (ifa == NULL) {
527 		/*
528 		 * A potential indev allocation can be left alive, it stays
529 		 * assigned to its device and is destroy with it.
530 		 */
531 		err = -ENOBUFS;
532 		goto errout;
533 	}
534 
535 	in_dev_hold(in_dev);
536 
537 	if (tb[IFA_ADDRESS] == NULL)
538 		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
539 
540 	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
541 	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
542 	ifa->ifa_flags = ifm->ifa_flags;
543 	ifa->ifa_scope = ifm->ifa_scope;
544 	ifa->ifa_dev = in_dev;
545 
546 	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
547 	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
548 
549 	if (tb[IFA_BROADCAST])
550 		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
551 
552 	if (tb[IFA_ANYCAST])
553 		ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
554 
555 	if (tb[IFA_LABEL])
556 		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
557 	else
558 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
559 
560 	return ifa;
561 
562 errout:
563 	return ERR_PTR(err);
564 }
565 
566 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
567 {
568 	struct in_ifaddr *ifa;
569 
570 	ASSERT_RTNL();
571 
572 	ifa = rtm_to_ifaddr(nlh);
573 	if (IS_ERR(ifa))
574 		return PTR_ERR(ifa);
575 
576 	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
577 }
578 
579 /*
580  *	Determine a default network mask, based on the IP address.
581  */
582 
583 static __inline__ int inet_abc_len(__be32 addr)
584 {
585 	int rc = -1;	/* Something else, probably a multicast. */
586 
587 	if (ZERONET(addr))
588 		rc = 0;
589 	else {
590 		__u32 haddr = ntohl(addr);
591 
592 		if (IN_CLASSA(haddr))
593 			rc = 8;
594 		else if (IN_CLASSB(haddr))
595 			rc = 16;
596 		else if (IN_CLASSC(haddr))
597 			rc = 24;
598 	}
599 
600 	return rc;
601 }
602 
603 
604 int devinet_ioctl(unsigned int cmd, void __user *arg)
605 {
606 	struct ifreq ifr;
607 	struct sockaddr_in sin_orig;
608 	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
609 	struct in_device *in_dev;
610 	struct in_ifaddr **ifap = NULL;
611 	struct in_ifaddr *ifa = NULL;
612 	struct net_device *dev;
613 	char *colon;
614 	int ret = -EFAULT;
615 	int tryaddrmatch = 0;
616 
617 	/*
618 	 *	Fetch the caller's info block into kernel space
619 	 */
620 
621 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
622 		goto out;
623 	ifr.ifr_name[IFNAMSIZ - 1] = 0;
624 
625 	/* save original address for comparison */
626 	memcpy(&sin_orig, sin, sizeof(*sin));
627 
628 	colon = strchr(ifr.ifr_name, ':');
629 	if (colon)
630 		*colon = 0;
631 
632 #ifdef CONFIG_KMOD
633 	dev_load(ifr.ifr_name);
634 #endif
635 
636 	switch(cmd) {
637 	case SIOCGIFADDR:	/* Get interface address */
638 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
639 	case SIOCGIFDSTADDR:	/* Get the destination address */
640 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
641 		/* Note that these ioctls will not sleep,
642 		   so that we do not impose a lock.
643 		   One day we will be forced to put shlock here (I mean SMP)
644 		 */
645 		tryaddrmatch = (sin_orig.sin_family == AF_INET);
646 		memset(sin, 0, sizeof(*sin));
647 		sin->sin_family = AF_INET;
648 		break;
649 
650 	case SIOCSIFFLAGS:
651 		ret = -EACCES;
652 		if (!capable(CAP_NET_ADMIN))
653 			goto out;
654 		break;
655 	case SIOCSIFADDR:	/* Set interface address (and family) */
656 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
657 	case SIOCSIFDSTADDR:	/* Set the destination address */
658 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
659 		ret = -EACCES;
660 		if (!capable(CAP_NET_ADMIN))
661 			goto out;
662 		ret = -EINVAL;
663 		if (sin->sin_family != AF_INET)
664 			goto out;
665 		break;
666 	default:
667 		ret = -EINVAL;
668 		goto out;
669 	}
670 
671 	rtnl_lock();
672 
673 	ret = -ENODEV;
674 	if ((dev = __dev_get_by_name(ifr.ifr_name)) == NULL)
675 		goto done;
676 
677 	if (colon)
678 		*colon = ':';
679 
680 	if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
681 		if (tryaddrmatch) {
682 			/* Matthias Andree */
683 			/* compare label and address (4.4BSD style) */
684 			/* note: we only do this for a limited set of ioctls
685 			   and only if the original address family was AF_INET.
686 			   This is checked above. */
687 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
688 			     ifap = &ifa->ifa_next) {
689 				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
690 				    sin_orig.sin_addr.s_addr ==
691 							ifa->ifa_address) {
692 					break; /* found */
693 				}
694 			}
695 		}
696 		/* we didn't get a match, maybe the application is
697 		   4.3BSD-style and passed in junk so we fall back to
698 		   comparing just the label */
699 		if (!ifa) {
700 			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
701 			     ifap = &ifa->ifa_next)
702 				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
703 					break;
704 		}
705 	}
706 
707 	ret = -EADDRNOTAVAIL;
708 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
709 		goto done;
710 
711 	switch(cmd) {
712 	case SIOCGIFADDR:	/* Get interface address */
713 		sin->sin_addr.s_addr = ifa->ifa_local;
714 		goto rarok;
715 
716 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
717 		sin->sin_addr.s_addr = ifa->ifa_broadcast;
718 		goto rarok;
719 
720 	case SIOCGIFDSTADDR:	/* Get the destination address */
721 		sin->sin_addr.s_addr = ifa->ifa_address;
722 		goto rarok;
723 
724 	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
725 		sin->sin_addr.s_addr = ifa->ifa_mask;
726 		goto rarok;
727 
728 	case SIOCSIFFLAGS:
729 		if (colon) {
730 			ret = -EADDRNOTAVAIL;
731 			if (!ifa)
732 				break;
733 			ret = 0;
734 			if (!(ifr.ifr_flags & IFF_UP))
735 				inet_del_ifa(in_dev, ifap, 1);
736 			break;
737 		}
738 		ret = dev_change_flags(dev, ifr.ifr_flags);
739 		break;
740 
741 	case SIOCSIFADDR:	/* Set interface address (and family) */
742 		ret = -EINVAL;
743 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
744 			break;
745 
746 		if (!ifa) {
747 			ret = -ENOBUFS;
748 			if ((ifa = inet_alloc_ifa()) == NULL)
749 				break;
750 			if (colon)
751 				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
752 			else
753 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
754 		} else {
755 			ret = 0;
756 			if (ifa->ifa_local == sin->sin_addr.s_addr)
757 				break;
758 			inet_del_ifa(in_dev, ifap, 0);
759 			ifa->ifa_broadcast = 0;
760 			ifa->ifa_anycast = 0;
761 		}
762 
763 		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
764 
765 		if (!(dev->flags & IFF_POINTOPOINT)) {
766 			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
767 			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
768 			if ((dev->flags & IFF_BROADCAST) &&
769 			    ifa->ifa_prefixlen < 31)
770 				ifa->ifa_broadcast = ifa->ifa_address |
771 						     ~ifa->ifa_mask;
772 		} else {
773 			ifa->ifa_prefixlen = 32;
774 			ifa->ifa_mask = inet_make_mask(32);
775 		}
776 		ret = inet_set_ifa(dev, ifa);
777 		break;
778 
779 	case SIOCSIFBRDADDR:	/* Set the broadcast address */
780 		ret = 0;
781 		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
782 			inet_del_ifa(in_dev, ifap, 0);
783 			ifa->ifa_broadcast = sin->sin_addr.s_addr;
784 			inet_insert_ifa(ifa);
785 		}
786 		break;
787 
788 	case SIOCSIFDSTADDR:	/* Set the destination address */
789 		ret = 0;
790 		if (ifa->ifa_address == sin->sin_addr.s_addr)
791 			break;
792 		ret = -EINVAL;
793 		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
794 			break;
795 		ret = 0;
796 		inet_del_ifa(in_dev, ifap, 0);
797 		ifa->ifa_address = sin->sin_addr.s_addr;
798 		inet_insert_ifa(ifa);
799 		break;
800 
801 	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
802 
803 		/*
804 		 *	The mask we set must be legal.
805 		 */
806 		ret = -EINVAL;
807 		if (bad_mask(sin->sin_addr.s_addr, 0))
808 			break;
809 		ret = 0;
810 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
811 			__be32 old_mask = ifa->ifa_mask;
812 			inet_del_ifa(in_dev, ifap, 0);
813 			ifa->ifa_mask = sin->sin_addr.s_addr;
814 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
815 
816 			/* See if current broadcast address matches
817 			 * with current netmask, then recalculate
818 			 * the broadcast address. Otherwise it's a
819 			 * funny address, so don't touch it since
820 			 * the user seems to know what (s)he's doing...
821 			 */
822 			if ((dev->flags & IFF_BROADCAST) &&
823 			    (ifa->ifa_prefixlen < 31) &&
824 			    (ifa->ifa_broadcast ==
825 			     (ifa->ifa_local|~old_mask))) {
826 				ifa->ifa_broadcast = (ifa->ifa_local |
827 						      ~sin->sin_addr.s_addr);
828 			}
829 			inet_insert_ifa(ifa);
830 		}
831 		break;
832 	}
833 done:
834 	rtnl_unlock();
835 out:
836 	return ret;
837 rarok:
838 	rtnl_unlock();
839 	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
840 	goto out;
841 }
842 
843 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
844 {
845 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
846 	struct in_ifaddr *ifa;
847 	struct ifreq ifr;
848 	int done = 0;
849 
850 	if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
851 		goto out;
852 
853 	for (; ifa; ifa = ifa->ifa_next) {
854 		if (!buf) {
855 			done += sizeof(ifr);
856 			continue;
857 		}
858 		if (len < (int) sizeof(ifr))
859 			break;
860 		memset(&ifr, 0, sizeof(struct ifreq));
861 		if (ifa->ifa_label)
862 			strcpy(ifr.ifr_name, ifa->ifa_label);
863 		else
864 			strcpy(ifr.ifr_name, dev->name);
865 
866 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
867 		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
868 								ifa->ifa_local;
869 
870 		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
871 			done = -EFAULT;
872 			break;
873 		}
874 		buf  += sizeof(struct ifreq);
875 		len  -= sizeof(struct ifreq);
876 		done += sizeof(struct ifreq);
877 	}
878 out:
879 	return done;
880 }
881 
882 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
883 {
884 	__be32 addr = 0;
885 	struct in_device *in_dev;
886 
887 	rcu_read_lock();
888 	in_dev = __in_dev_get_rcu(dev);
889 	if (!in_dev)
890 		goto no_in_dev;
891 
892 	for_primary_ifa(in_dev) {
893 		if (ifa->ifa_scope > scope)
894 			continue;
895 		if (!dst || inet_ifa_match(dst, ifa)) {
896 			addr = ifa->ifa_local;
897 			break;
898 		}
899 		if (!addr)
900 			addr = ifa->ifa_local;
901 	} endfor_ifa(in_dev);
902 no_in_dev:
903 	rcu_read_unlock();
904 
905 	if (addr)
906 		goto out;
907 
908 	/* Not loopback addresses on loopback should be preferred
909 	   in this case. It is importnat that lo is the first interface
910 	   in dev_base list.
911 	 */
912 	read_lock(&dev_base_lock);
913 	rcu_read_lock();
914 	for (dev = dev_base; dev; dev = dev->next) {
915 		if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
916 			continue;
917 
918 		for_primary_ifa(in_dev) {
919 			if (ifa->ifa_scope != RT_SCOPE_LINK &&
920 			    ifa->ifa_scope <= scope) {
921 				addr = ifa->ifa_local;
922 				goto out_unlock_both;
923 			}
924 		} endfor_ifa(in_dev);
925 	}
926 out_unlock_both:
927 	read_unlock(&dev_base_lock);
928 	rcu_read_unlock();
929 out:
930 	return addr;
931 }
932 
933 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
934 			      __be32 local, int scope)
935 {
936 	int same = 0;
937 	__be32 addr = 0;
938 
939 	for_ifa(in_dev) {
940 		if (!addr &&
941 		    (local == ifa->ifa_local || !local) &&
942 		    ifa->ifa_scope <= scope) {
943 			addr = ifa->ifa_local;
944 			if (same)
945 				break;
946 		}
947 		if (!same) {
948 			same = (!local || inet_ifa_match(local, ifa)) &&
949 				(!dst || inet_ifa_match(dst, ifa));
950 			if (same && addr) {
951 				if (local || !dst)
952 					break;
953 				/* Is the selected addr into dst subnet? */
954 				if (inet_ifa_match(addr, ifa))
955 					break;
956 				/* No, then can we use new local src? */
957 				if (ifa->ifa_scope <= scope) {
958 					addr = ifa->ifa_local;
959 					break;
960 				}
961 				/* search for large dst subnet for addr */
962 				same = 0;
963 			}
964 		}
965 	} endfor_ifa(in_dev);
966 
967 	return same? addr : 0;
968 }
969 
970 /*
971  * Confirm that local IP address exists using wildcards:
972  * - dev: only on this interface, 0=any interface
973  * - dst: only in the same subnet as dst, 0=any dst
974  * - local: address, 0=autoselect the local address
975  * - scope: maximum allowed scope value for the local address
976  */
977 __be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope)
978 {
979 	__be32 addr = 0;
980 	struct in_device *in_dev;
981 
982 	if (dev) {
983 		rcu_read_lock();
984 		if ((in_dev = __in_dev_get_rcu(dev)))
985 			addr = confirm_addr_indev(in_dev, dst, local, scope);
986 		rcu_read_unlock();
987 
988 		return addr;
989 	}
990 
991 	read_lock(&dev_base_lock);
992 	rcu_read_lock();
993 	for (dev = dev_base; dev; dev = dev->next) {
994 		if ((in_dev = __in_dev_get_rcu(dev))) {
995 			addr = confirm_addr_indev(in_dev, dst, local, scope);
996 			if (addr)
997 				break;
998 		}
999 	}
1000 	rcu_read_unlock();
1001 	read_unlock(&dev_base_lock);
1002 
1003 	return addr;
1004 }
1005 
1006 /*
1007  *	Device notifier
1008  */
1009 
1010 int register_inetaddr_notifier(struct notifier_block *nb)
1011 {
1012 	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1013 }
1014 
1015 int unregister_inetaddr_notifier(struct notifier_block *nb)
1016 {
1017 	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1018 }
1019 
1020 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1021  * alias numbering and to create unique labels if possible.
1022 */
1023 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1024 {
1025 	struct in_ifaddr *ifa;
1026 	int named = 0;
1027 
1028 	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1029 		char old[IFNAMSIZ], *dot;
1030 
1031 		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1032 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1033 		if (named++ == 0)
1034 			continue;
1035 		dot = strchr(ifa->ifa_label, ':');
1036 		if (dot == NULL) {
1037 			sprintf(old, ":%d", named);
1038 			dot = old;
1039 		}
1040 		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1041 			strcat(ifa->ifa_label, dot);
1042 		} else {
1043 			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1044 		}
1045 	}
1046 }
1047 
1048 /* Called only under RTNL semaphore */
1049 
1050 static int inetdev_event(struct notifier_block *this, unsigned long event,
1051 			 void *ptr)
1052 {
1053 	struct net_device *dev = ptr;
1054 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1055 
1056 	ASSERT_RTNL();
1057 
1058 	if (!in_dev) {
1059 		if (event == NETDEV_REGISTER) {
1060 			in_dev = inetdev_init(dev);
1061 			if (!in_dev)
1062 				panic("devinet: Failed to create loopback\n");
1063 			if (dev == &loopback_dev) {
1064 				in_dev->cnf.no_xfrm = 1;
1065 				in_dev->cnf.no_policy = 1;
1066 			}
1067 		}
1068 		goto out;
1069 	}
1070 
1071 	switch (event) {
1072 	case NETDEV_REGISTER:
1073 		printk(KERN_DEBUG "inetdev_event: bug\n");
1074 		dev->ip_ptr = NULL;
1075 		break;
1076 	case NETDEV_UP:
1077 		if (dev->mtu < 68)
1078 			break;
1079 		if (dev == &loopback_dev) {
1080 			struct in_ifaddr *ifa;
1081 			if ((ifa = inet_alloc_ifa()) != NULL) {
1082 				ifa->ifa_local =
1083 				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1084 				ifa->ifa_prefixlen = 8;
1085 				ifa->ifa_mask = inet_make_mask(8);
1086 				in_dev_hold(in_dev);
1087 				ifa->ifa_dev = in_dev;
1088 				ifa->ifa_scope = RT_SCOPE_HOST;
1089 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1090 				inet_insert_ifa(ifa);
1091 			}
1092 		}
1093 		ip_mc_up(in_dev);
1094 		break;
1095 	case NETDEV_DOWN:
1096 		ip_mc_down(in_dev);
1097 		break;
1098 	case NETDEV_CHANGEMTU:
1099 		if (dev->mtu >= 68)
1100 			break;
1101 		/* MTU falled under 68, disable IP */
1102 	case NETDEV_UNREGISTER:
1103 		inetdev_destroy(in_dev);
1104 		break;
1105 	case NETDEV_CHANGENAME:
1106 		/* Do not notify about label change, this event is
1107 		 * not interesting to applications using netlink.
1108 		 */
1109 		inetdev_changename(dev, in_dev);
1110 
1111 #ifdef CONFIG_SYSCTL
1112 		devinet_sysctl_unregister(&in_dev->cnf);
1113 		neigh_sysctl_unregister(in_dev->arp_parms);
1114 		neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
1115 				      NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1116 		devinet_sysctl_register(in_dev, &in_dev->cnf);
1117 #endif
1118 		break;
1119 	}
1120 out:
1121 	return NOTIFY_DONE;
1122 }
1123 
1124 static struct notifier_block ip_netdev_notifier = {
1125 	.notifier_call =inetdev_event,
1126 };
1127 
1128 static inline size_t inet_nlmsg_size(void)
1129 {
1130 	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1131 	       + nla_total_size(4) /* IFA_ADDRESS */
1132 	       + nla_total_size(4) /* IFA_LOCAL */
1133 	       + nla_total_size(4) /* IFA_BROADCAST */
1134 	       + nla_total_size(4) /* IFA_ANYCAST */
1135 	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1136 }
1137 
1138 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1139 			    u32 pid, u32 seq, int event, unsigned int flags)
1140 {
1141 	struct ifaddrmsg *ifm;
1142 	struct nlmsghdr  *nlh;
1143 
1144 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1145 	if (nlh == NULL)
1146 		return -EMSGSIZE;
1147 
1148 	ifm = nlmsg_data(nlh);
1149 	ifm->ifa_family = AF_INET;
1150 	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1151 	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1152 	ifm->ifa_scope = ifa->ifa_scope;
1153 	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1154 
1155 	if (ifa->ifa_address)
1156 		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1157 
1158 	if (ifa->ifa_local)
1159 		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1160 
1161 	if (ifa->ifa_broadcast)
1162 		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1163 
1164 	if (ifa->ifa_anycast)
1165 		NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1166 
1167 	if (ifa->ifa_label[0])
1168 		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1169 
1170 	return nlmsg_end(skb, nlh);
1171 
1172 nla_put_failure:
1173 	nlmsg_cancel(skb, nlh);
1174 	return -EMSGSIZE;
1175 }
1176 
1177 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1178 {
1179 	int idx, ip_idx;
1180 	struct net_device *dev;
1181 	struct in_device *in_dev;
1182 	struct in_ifaddr *ifa;
1183 	int s_ip_idx, s_idx = cb->args[0];
1184 
1185 	s_ip_idx = ip_idx = cb->args[1];
1186 	read_lock(&dev_base_lock);
1187 	for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
1188 		if (idx < s_idx)
1189 			continue;
1190 		if (idx > s_idx)
1191 			s_ip_idx = 0;
1192 		rcu_read_lock();
1193 		if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
1194 			rcu_read_unlock();
1195 			continue;
1196 		}
1197 
1198 		for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1199 		     ifa = ifa->ifa_next, ip_idx++) {
1200 			if (ip_idx < s_ip_idx)
1201 				continue;
1202 			if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1203 					     cb->nlh->nlmsg_seq,
1204 					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1205 				rcu_read_unlock();
1206 				goto done;
1207 			}
1208 		}
1209 		rcu_read_unlock();
1210 	}
1211 
1212 done:
1213 	read_unlock(&dev_base_lock);
1214 	cb->args[0] = idx;
1215 	cb->args[1] = ip_idx;
1216 
1217 	return skb->len;
1218 }
1219 
1220 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1221 		      u32 pid)
1222 {
1223 	struct sk_buff *skb;
1224 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1225 	int err = -ENOBUFS;
1226 
1227 	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1228 	if (skb == NULL)
1229 		goto errout;
1230 
1231 	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1232 	if (err < 0) {
1233 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1234 		WARN_ON(err == -EMSGSIZE);
1235 		kfree_skb(skb);
1236 		goto errout;
1237 	}
1238 	err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1239 errout:
1240 	if (err < 0)
1241 		rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err);
1242 }
1243 
1244 static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = {
1245 	[RTM_NEWADDR  - RTM_BASE] = { .doit	= inet_rtm_newaddr,	},
1246 	[RTM_DELADDR  - RTM_BASE] = { .doit	= inet_rtm_deladdr,	},
1247 	[RTM_GETADDR  - RTM_BASE] = { .dumpit	= inet_dump_ifaddr,	},
1248 	[RTM_NEWROUTE - RTM_BASE] = { .doit	= inet_rtm_newroute,	},
1249 	[RTM_DELROUTE - RTM_BASE] = { .doit	= inet_rtm_delroute,	},
1250 	[RTM_GETROUTE - RTM_BASE] = { .doit	= inet_rtm_getroute,
1251 				      .dumpit	= inet_dump_fib,	},
1252 #ifdef CONFIG_IP_MULTIPLE_TABLES
1253 	[RTM_GETRULE  - RTM_BASE] = { .dumpit	= fib4_rules_dump,	},
1254 #endif
1255 };
1256 
1257 #ifdef CONFIG_SYSCTL
1258 
1259 void inet_forward_change(void)
1260 {
1261 	struct net_device *dev;
1262 	int on = ipv4_devconf.forwarding;
1263 
1264 	ipv4_devconf.accept_redirects = !on;
1265 	ipv4_devconf_dflt.forwarding = on;
1266 
1267 	read_lock(&dev_base_lock);
1268 	for (dev = dev_base; dev; dev = dev->next) {
1269 		struct in_device *in_dev;
1270 		rcu_read_lock();
1271 		in_dev = __in_dev_get_rcu(dev);
1272 		if (in_dev)
1273 			in_dev->cnf.forwarding = on;
1274 		rcu_read_unlock();
1275 	}
1276 	read_unlock(&dev_base_lock);
1277 
1278 	rt_cache_flush(0);
1279 }
1280 
1281 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1282 				  struct file* filp, void __user *buffer,
1283 				  size_t *lenp, loff_t *ppos)
1284 {
1285 	int *valp = ctl->data;
1286 	int val = *valp;
1287 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1288 
1289 	if (write && *valp != val) {
1290 		if (valp == &ipv4_devconf.forwarding)
1291 			inet_forward_change();
1292 		else if (valp != &ipv4_devconf_dflt.forwarding)
1293 			rt_cache_flush(0);
1294 	}
1295 
1296 	return ret;
1297 }
1298 
1299 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1300 			 struct file* filp, void __user *buffer,
1301 			 size_t *lenp, loff_t *ppos)
1302 {
1303 	int *valp = ctl->data;
1304 	int val = *valp;
1305 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1306 
1307 	if (write && *valp != val)
1308 		rt_cache_flush(0);
1309 
1310 	return ret;
1311 }
1312 
1313 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1314 				  void __user *oldval, size_t __user *oldlenp,
1315 				  void __user *newval, size_t newlen)
1316 {
1317 	int *valp = table->data;
1318 	int new;
1319 
1320 	if (!newval || !newlen)
1321 		return 0;
1322 
1323 	if (newlen != sizeof(int))
1324 		return -EINVAL;
1325 
1326 	if (get_user(new, (int __user *)newval))
1327 		return -EFAULT;
1328 
1329 	if (new == *valp)
1330 		return 0;
1331 
1332 	if (oldval && oldlenp) {
1333 		size_t len;
1334 
1335 		if (get_user(len, oldlenp))
1336 			return -EFAULT;
1337 
1338 		if (len) {
1339 			if (len > table->maxlen)
1340 				len = table->maxlen;
1341 			if (copy_to_user(oldval, valp, len))
1342 				return -EFAULT;
1343 			if (put_user(len, oldlenp))
1344 				return -EFAULT;
1345 		}
1346 	}
1347 
1348 	*valp = new;
1349 	rt_cache_flush(0);
1350 	return 1;
1351 }
1352 
1353 
1354 static struct devinet_sysctl_table {
1355 	struct ctl_table_header *sysctl_header;
1356 	ctl_table		devinet_vars[__NET_IPV4_CONF_MAX];
1357 	ctl_table		devinet_dev[2];
1358 	ctl_table		devinet_conf_dir[2];
1359 	ctl_table		devinet_proto_dir[2];
1360 	ctl_table		devinet_root_dir[2];
1361 } devinet_sysctl = {
1362 	.devinet_vars = {
1363 		{
1364 			.ctl_name	= NET_IPV4_CONF_FORWARDING,
1365 			.procname	= "forwarding",
1366 			.data		= &ipv4_devconf.forwarding,
1367 			.maxlen		= sizeof(int),
1368 			.mode		= 0644,
1369 			.proc_handler	= &devinet_sysctl_forward,
1370 		},
1371 		{
1372 			.ctl_name	= NET_IPV4_CONF_MC_FORWARDING,
1373 			.procname	= "mc_forwarding",
1374 			.data		= &ipv4_devconf.mc_forwarding,
1375 			.maxlen		= sizeof(int),
1376 			.mode		= 0444,
1377 			.proc_handler	= &proc_dointvec,
1378 		},
1379 		{
1380 			.ctl_name	= NET_IPV4_CONF_ACCEPT_REDIRECTS,
1381 			.procname	= "accept_redirects",
1382 			.data		= &ipv4_devconf.accept_redirects,
1383 			.maxlen		= sizeof(int),
1384 			.mode		= 0644,
1385 			.proc_handler	= &proc_dointvec,
1386 		},
1387 		{
1388 			.ctl_name	= NET_IPV4_CONF_SECURE_REDIRECTS,
1389 			.procname	= "secure_redirects",
1390 			.data		= &ipv4_devconf.secure_redirects,
1391 			.maxlen		= sizeof(int),
1392 			.mode		= 0644,
1393 			.proc_handler	= &proc_dointvec,
1394 		},
1395 		{
1396 			.ctl_name	= NET_IPV4_CONF_SHARED_MEDIA,
1397 			.procname	= "shared_media",
1398 			.data		= &ipv4_devconf.shared_media,
1399 			.maxlen		= sizeof(int),
1400 			.mode		= 0644,
1401 			.proc_handler	= &proc_dointvec,
1402 		},
1403 		{
1404 			.ctl_name	= NET_IPV4_CONF_RP_FILTER,
1405 			.procname	= "rp_filter",
1406 			.data		= &ipv4_devconf.rp_filter,
1407 			.maxlen		= sizeof(int),
1408 			.mode		= 0644,
1409 			.proc_handler	= &proc_dointvec,
1410 		},
1411 		{
1412 			.ctl_name	= NET_IPV4_CONF_SEND_REDIRECTS,
1413 			.procname	= "send_redirects",
1414 			.data		= &ipv4_devconf.send_redirects,
1415 			.maxlen		= sizeof(int),
1416 			.mode		= 0644,
1417 			.proc_handler	= &proc_dointvec,
1418 		},
1419 		{
1420 			.ctl_name	= NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE,
1421 			.procname	= "accept_source_route",
1422 			.data		= &ipv4_devconf.accept_source_route,
1423 			.maxlen		= sizeof(int),
1424 			.mode		= 0644,
1425 			.proc_handler	= &proc_dointvec,
1426 		},
1427 		{
1428 			.ctl_name	= NET_IPV4_CONF_PROXY_ARP,
1429 			.procname	= "proxy_arp",
1430 			.data		= &ipv4_devconf.proxy_arp,
1431 			.maxlen		= sizeof(int),
1432 			.mode		= 0644,
1433 			.proc_handler	= &proc_dointvec,
1434 		},
1435 		{
1436 			.ctl_name	= NET_IPV4_CONF_MEDIUM_ID,
1437 			.procname	= "medium_id",
1438 			.data		= &ipv4_devconf.medium_id,
1439 			.maxlen		= sizeof(int),
1440 			.mode		= 0644,
1441 			.proc_handler	= &proc_dointvec,
1442 		},
1443 		{
1444 			.ctl_name	= NET_IPV4_CONF_BOOTP_RELAY,
1445 			.procname	= "bootp_relay",
1446 			.data		= &ipv4_devconf.bootp_relay,
1447 			.maxlen		= sizeof(int),
1448 			.mode		= 0644,
1449 			.proc_handler	= &proc_dointvec,
1450 		},
1451 		{
1452 			.ctl_name	= NET_IPV4_CONF_LOG_MARTIANS,
1453 			.procname	= "log_martians",
1454 			.data		= &ipv4_devconf.log_martians,
1455 			.maxlen		= sizeof(int),
1456 			.mode		= 0644,
1457 			.proc_handler	= &proc_dointvec,
1458 		},
1459 		{
1460 			.ctl_name	= NET_IPV4_CONF_TAG,
1461 			.procname	= "tag",
1462 			.data		= &ipv4_devconf.tag,
1463 			.maxlen		= sizeof(int),
1464 			.mode		= 0644,
1465 			.proc_handler	= &proc_dointvec,
1466 		},
1467 		{
1468 			.ctl_name	= NET_IPV4_CONF_ARPFILTER,
1469 			.procname	= "arp_filter",
1470 			.data		= &ipv4_devconf.arp_filter,
1471 			.maxlen		= sizeof(int),
1472 			.mode		= 0644,
1473 			.proc_handler	= &proc_dointvec,
1474 		},
1475 		{
1476 			.ctl_name	= NET_IPV4_CONF_ARP_ANNOUNCE,
1477 			.procname	= "arp_announce",
1478 			.data		= &ipv4_devconf.arp_announce,
1479 			.maxlen		= sizeof(int),
1480 			.mode		= 0644,
1481 			.proc_handler	= &proc_dointvec,
1482 		},
1483 		{
1484 			.ctl_name	= NET_IPV4_CONF_ARP_IGNORE,
1485 			.procname	= "arp_ignore",
1486 			.data		= &ipv4_devconf.arp_ignore,
1487 			.maxlen		= sizeof(int),
1488 			.mode		= 0644,
1489 			.proc_handler	= &proc_dointvec,
1490 		},
1491 		{
1492 			.ctl_name	= NET_IPV4_CONF_ARP_ACCEPT,
1493 			.procname	= "arp_accept",
1494 			.data		= &ipv4_devconf.arp_accept,
1495 			.maxlen		= sizeof(int),
1496 			.mode		= 0644,
1497 			.proc_handler	= &proc_dointvec,
1498 		},
1499 		{
1500 			.ctl_name	= NET_IPV4_CONF_NOXFRM,
1501 			.procname	= "disable_xfrm",
1502 			.data		= &ipv4_devconf.no_xfrm,
1503 			.maxlen		= sizeof(int),
1504 			.mode		= 0644,
1505 			.proc_handler	= &ipv4_doint_and_flush,
1506 			.strategy	= &ipv4_doint_and_flush_strategy,
1507 		},
1508 		{
1509 			.ctl_name	= NET_IPV4_CONF_NOPOLICY,
1510 			.procname	= "disable_policy",
1511 			.data		= &ipv4_devconf.no_policy,
1512 			.maxlen		= sizeof(int),
1513 			.mode		= 0644,
1514 			.proc_handler	= &ipv4_doint_and_flush,
1515 			.strategy	= &ipv4_doint_and_flush_strategy,
1516 		},
1517 		{
1518 			.ctl_name	= NET_IPV4_CONF_FORCE_IGMP_VERSION,
1519 			.procname	= "force_igmp_version",
1520 			.data		= &ipv4_devconf.force_igmp_version,
1521 			.maxlen		= sizeof(int),
1522 			.mode		= 0644,
1523 			.proc_handler	= &ipv4_doint_and_flush,
1524 			.strategy	= &ipv4_doint_and_flush_strategy,
1525 		},
1526 		{
1527 			.ctl_name	= NET_IPV4_CONF_PROMOTE_SECONDARIES,
1528 			.procname	= "promote_secondaries",
1529 			.data		= &ipv4_devconf.promote_secondaries,
1530 			.maxlen		= sizeof(int),
1531 			.mode		= 0644,
1532 			.proc_handler	= &ipv4_doint_and_flush,
1533 			.strategy	= &ipv4_doint_and_flush_strategy,
1534 		},
1535 	},
1536 	.devinet_dev = {
1537 		{
1538 			.ctl_name	= NET_PROTO_CONF_ALL,
1539 			.procname	= "all",
1540 			.mode		= 0555,
1541 			.child		= devinet_sysctl.devinet_vars,
1542 		},
1543 	},
1544 	.devinet_conf_dir = {
1545 		{
1546 			.ctl_name	= NET_IPV4_CONF,
1547 			.procname	= "conf",
1548 			.mode		= 0555,
1549 			.child		= devinet_sysctl.devinet_dev,
1550 		},
1551 	},
1552 	.devinet_proto_dir = {
1553 		{
1554 			.ctl_name	= NET_IPV4,
1555 			.procname	= "ipv4",
1556 			.mode		= 0555,
1557 			.child 		= devinet_sysctl.devinet_conf_dir,
1558 		},
1559 	},
1560 	.devinet_root_dir = {
1561 		{
1562 			.ctl_name	= CTL_NET,
1563 			.procname 	= "net",
1564 			.mode		= 0555,
1565 			.child		= devinet_sysctl.devinet_proto_dir,
1566 		},
1567 	},
1568 };
1569 
1570 static void devinet_sysctl_register(struct in_device *in_dev,
1571 				    struct ipv4_devconf *p)
1572 {
1573 	int i;
1574 	struct net_device *dev = in_dev ? in_dev->dev : NULL;
1575 	struct devinet_sysctl_table *t = kmemdup(&devinet_sysctl, sizeof(*t),
1576 						 GFP_KERNEL);
1577 	char *dev_name = NULL;
1578 
1579 	if (!t)
1580 		return;
1581 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1582 		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1583 	}
1584 
1585 	if (dev) {
1586 		dev_name = dev->name;
1587 		t->devinet_dev[0].ctl_name = dev->ifindex;
1588 	} else {
1589 		dev_name = "default";
1590 		t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
1591 	}
1592 
1593 	/*
1594 	 * Make a copy of dev_name, because '.procname' is regarded as const
1595 	 * by sysctl and we wouldn't want anyone to change it under our feet
1596 	 * (see SIOCSIFNAME).
1597 	 */
1598 	dev_name = kstrdup(dev_name, GFP_KERNEL);
1599 	if (!dev_name)
1600 	    goto free;
1601 
1602 	t->devinet_dev[0].procname    = dev_name;
1603 	t->devinet_dev[0].child	      = t->devinet_vars;
1604 	t->devinet_conf_dir[0].child  = t->devinet_dev;
1605 	t->devinet_proto_dir[0].child = t->devinet_conf_dir;
1606 	t->devinet_root_dir[0].child  = t->devinet_proto_dir;
1607 
1608 	t->sysctl_header = register_sysctl_table(t->devinet_root_dir);
1609 	if (!t->sysctl_header)
1610 	    goto free_procname;
1611 
1612 	p->sysctl = t;
1613 	return;
1614 
1615 	/* error path */
1616  free_procname:
1617 	kfree(dev_name);
1618  free:
1619 	kfree(t);
1620 	return;
1621 }
1622 
1623 static void devinet_sysctl_unregister(struct ipv4_devconf *p)
1624 {
1625 	if (p->sysctl) {
1626 		struct devinet_sysctl_table *t = p->sysctl;
1627 		p->sysctl = NULL;
1628 		unregister_sysctl_table(t->sysctl_header);
1629 		kfree(t->devinet_dev[0].procname);
1630 		kfree(t);
1631 	}
1632 }
1633 #endif
1634 
1635 void __init devinet_init(void)
1636 {
1637 	register_gifconf(PF_INET, inet_gifconf);
1638 	register_netdevice_notifier(&ip_netdev_notifier);
1639 	rtnetlink_links[PF_INET] = inet_rtnetlink_table;
1640 #ifdef CONFIG_SYSCTL
1641 	devinet_sysctl.sysctl_header =
1642 		register_sysctl_table(devinet_sysctl.devinet_root_dir);
1643 	devinet_sysctl_register(NULL, &ipv4_devconf_dflt);
1644 #endif
1645 }
1646 
1647 EXPORT_SYMBOL(in_dev_finish_destroy);
1648 EXPORT_SYMBOL(inet_select_addr);
1649 EXPORT_SYMBOL(inetdev_by_index);
1650 EXPORT_SYMBOL(register_inetaddr_notifier);
1651 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1652