xref: /linux/net/core/dev.c (revision 20d0021394c1b070bf04b22c5bc8fdb437edd4c5)
1 /*
2  * 	NET3	Protocol independent device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the non IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Florian la Roche <rzsfl@rz.uni-sb.de>
16  *		Alan Cox <gw4pts@gw4pts.ampr.org>
17  *		David Hinds <dahinds@users.sourceforge.net>
18  *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19  *		Adam Sulmicki <adam@cfar.umd.edu>
20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
21  *
22  *	Changes:
23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
24  *              			to 2 if register_netdev gets called
25  *              			before net_dev_init & also removed a
26  *              			few lines of code in the process.
27  *		Alan Cox	:	device private ioctl copies fields back.
28  *		Alan Cox	:	Transmit queue code does relevant
29  *					stunts to keep the queue safe.
30  *		Alan Cox	:	Fixed double lock.
31  *		Alan Cox	:	Fixed promisc NULL pointer trap
32  *		????????	:	Support the full private ioctl range
33  *		Alan Cox	:	Moved ioctl permission check into
34  *					drivers
35  *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
36  *		Alan Cox	:	100 backlog just doesn't cut it when
37  *					you start doing multicast video 8)
38  *		Alan Cox	:	Rewrote net_bh and list manager.
39  *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
40  *		Alan Cox	:	Took out transmit every packet pass
41  *					Saved a few bytes in the ioctl handler
42  *		Alan Cox	:	Network driver sets packet type before
43  *					calling netif_rx. Saves a function
44  *					call a packet.
45  *		Alan Cox	:	Hashed net_bh()
46  *		Richard Kooijman:	Timestamp fixes.
47  *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
48  *		Alan Cox	:	Device lock protection.
49  *		Alan Cox	: 	Fixed nasty side effect of device close
50  *					changes.
51  *		Rudi Cilibrasi	:	Pass the right thing to
52  *					set_mac_address()
53  *		Dave Miller	:	32bit quantity for the device lock to
54  *					make it work out on a Sparc.
55  *		Bjorn Ekwall	:	Added KERNELD hack.
56  *		Alan Cox	:	Cleaned up the backlog initialise.
57  *		Craig Metz	:	SIOCGIFCONF fix if space for under
58  *					1 device.
59  *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
60  *					is no device open function.
61  *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
62  *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
63  *		Cyrus Durgin	:	Cleaned for KMOD
64  *		Adam Sulmicki   :	Bug Fix : Network Device Unload
65  *					A network device unload needs to purge
66  *					the backlog queue.
67  *	Paul Rusty Russell	:	SIOCSIFNAME
68  *              Pekka Riikonen  :	Netdev boot-time settings code
69  *              Andrew Morton   :       Make unregister_netdevice wait
70  *              			indefinitely on dev->refcnt
71  * 		J Hadi Salim	:	- Backlog queue sampling
72  *				        - netif_rx() feedback
73  */
74 
75 #include <asm/uaccess.h>
76 #include <asm/system.h>
77 #include <linux/bitops.h>
78 #include <linux/config.h>
79 #include <linux/cpu.h>
80 #include <linux/types.h>
81 #include <linux/kernel.h>
82 #include <linux/sched.h>
83 #include <linux/string.h>
84 #include <linux/mm.h>
85 #include <linux/socket.h>
86 #include <linux/sockios.h>
87 #include <linux/errno.h>
88 #include <linux/interrupt.h>
89 #include <linux/if_ether.h>
90 #include <linux/netdevice.h>
91 #include <linux/etherdevice.h>
92 #include <linux/notifier.h>
93 #include <linux/skbuff.h>
94 #include <net/sock.h>
95 #include <linux/rtnetlink.h>
96 #include <linux/proc_fs.h>
97 #include <linux/seq_file.h>
98 #include <linux/stat.h>
99 #include <linux/if_bridge.h>
100 #include <linux/divert.h>
101 #include <net/dst.h>
102 #include <net/pkt_sched.h>
103 #include <net/checksum.h>
104 #include <linux/highmem.h>
105 #include <linux/init.h>
106 #include <linux/kmod.h>
107 #include <linux/module.h>
108 #include <linux/kallsyms.h>
109 #include <linux/netpoll.h>
110 #include <linux/rcupdate.h>
111 #include <linux/delay.h>
112 #ifdef CONFIG_NET_RADIO
113 #include <linux/wireless.h>		/* Note : will define WIRELESS_EXT */
114 #include <net/iw_handler.h>
115 #endif	/* CONFIG_NET_RADIO */
116 #include <asm/current.h>
117 
118 /*
119  *	The list of packet types we will receive (as opposed to discard)
120  *	and the routines to invoke.
121  *
122  *	Why 16. Because with 16 the only overlap we get on a hash of the
123  *	low nibble of the protocol value is RARP/SNAP/X.25.
124  *
125  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
126  *             sure which should go first, but I bet it won't make much
127  *             difference if we are running VLANs.  The good news is that
128  *             this protocol won't be in the list unless compiled in, so
129  *             the average user (w/out VLANs) will not be adversly affected.
130  *             --BLG
131  *
132  *		0800	IP
133  *		8100    802.1Q VLAN
134  *		0001	802.3
135  *		0002	AX.25
136  *		0004	802.2
137  *		8035	RARP
138  *		0005	SNAP
139  *		0805	X.25
140  *		0806	ARP
141  *		8137	IPX
142  *		0009	Localtalk
143  *		86DD	IPv6
144  */
145 
146 static DEFINE_SPINLOCK(ptype_lock);
147 static struct list_head ptype_base[16];	/* 16 way hashed list */
148 static struct list_head ptype_all;		/* Taps */
149 
150 /*
151  * The @dev_base list is protected by @dev_base_lock and the rtln
152  * semaphore.
153  *
154  * Pure readers hold dev_base_lock for reading.
155  *
156  * Writers must hold the rtnl semaphore while they loop through the
157  * dev_base list, and hold dev_base_lock for writing when they do the
158  * actual updates.  This allows pure readers to access the list even
159  * while a writer is preparing to update it.
160  *
161  * To put it another way, dev_base_lock is held for writing only to
162  * protect against pure readers; the rtnl semaphore provides the
163  * protection against other writers.
164  *
165  * See, for example usages, register_netdevice() and
166  * unregister_netdevice(), which must be called with the rtnl
167  * semaphore held.
168  */
169 struct net_device *dev_base;
170 static struct net_device **dev_tail = &dev_base;
171 DEFINE_RWLOCK(dev_base_lock);
172 
173 EXPORT_SYMBOL(dev_base);
174 EXPORT_SYMBOL(dev_base_lock);
175 
176 #define NETDEV_HASHBITS	8
177 static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
178 static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
179 
180 static inline struct hlist_head *dev_name_hash(const char *name)
181 {
182 	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
183 	return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
184 }
185 
186 static inline struct hlist_head *dev_index_hash(int ifindex)
187 {
188 	return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
189 }
190 
191 /*
192  *	Our notifier list
193  */
194 
195 static struct notifier_block *netdev_chain;
196 
197 /*
198  *	Device drivers call our routines to queue packets here. We empty the
199  *	queue in the local softnet handler.
200  */
201 DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL };
202 
203 #ifdef CONFIG_SYSFS
204 extern int netdev_sysfs_init(void);
205 extern int netdev_register_sysfs(struct net_device *);
206 extern void netdev_unregister_sysfs(struct net_device *);
207 #else
208 #define netdev_sysfs_init()	 	(0)
209 #define netdev_register_sysfs(dev)	(0)
210 #define	netdev_unregister_sysfs(dev)	do { } while(0)
211 #endif
212 
213 
214 /*******************************************************************************
215 
216 		Protocol management and registration routines
217 
218 *******************************************************************************/
219 
220 /*
221  *	For efficiency
222  */
223 
224 int netdev_nit;
225 
226 /*
227  *	Add a protocol ID to the list. Now that the input handler is
228  *	smarter we can dispense with all the messy stuff that used to be
229  *	here.
230  *
231  *	BEWARE!!! Protocol handlers, mangling input packets,
232  *	MUST BE last in hash buckets and checking protocol handlers
233  *	MUST start from promiscuous ptype_all chain in net_bh.
234  *	It is true now, do not change it.
235  *	Explanation follows: if protocol handler, mangling packet, will
236  *	be the first on list, it is not able to sense, that packet
237  *	is cloned and should be copied-on-write, so that it will
238  *	change it and subsequent readers will get broken packet.
239  *							--ANK (980803)
240  */
241 
242 /**
243  *	dev_add_pack - add packet handler
244  *	@pt: packet type declaration
245  *
246  *	Add a protocol handler to the networking stack. The passed &packet_type
247  *	is linked into kernel lists and may not be freed until it has been
248  *	removed from the kernel lists.
249  *
250  *	This call does not sleep therefore it can not
251  *	guarantee all CPU's that are in middle of receiving packets
252  *	will see the new packet type (until the next received packet).
253  */
254 
255 void dev_add_pack(struct packet_type *pt)
256 {
257 	int hash;
258 
259 	spin_lock_bh(&ptype_lock);
260 	if (pt->type == htons(ETH_P_ALL)) {
261 		netdev_nit++;
262 		list_add_rcu(&pt->list, &ptype_all);
263 	} else {
264 		hash = ntohs(pt->type) & 15;
265 		list_add_rcu(&pt->list, &ptype_base[hash]);
266 	}
267 	spin_unlock_bh(&ptype_lock);
268 }
269 
270 extern void linkwatch_run_queue(void);
271 
272 
273 
274 /**
275  *	__dev_remove_pack	 - remove packet handler
276  *	@pt: packet type declaration
277  *
278  *	Remove a protocol handler that was previously added to the kernel
279  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
280  *	from the kernel lists and can be freed or reused once this function
281  *	returns.
282  *
283  *      The packet type might still be in use by receivers
284  *	and must not be freed until after all the CPU's have gone
285  *	through a quiescent state.
286  */
287 void __dev_remove_pack(struct packet_type *pt)
288 {
289 	struct list_head *head;
290 	struct packet_type *pt1;
291 
292 	spin_lock_bh(&ptype_lock);
293 
294 	if (pt->type == htons(ETH_P_ALL)) {
295 		netdev_nit--;
296 		head = &ptype_all;
297 	} else
298 		head = &ptype_base[ntohs(pt->type) & 15];
299 
300 	list_for_each_entry(pt1, head, list) {
301 		if (pt == pt1) {
302 			list_del_rcu(&pt->list);
303 			goto out;
304 		}
305 	}
306 
307 	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
308 out:
309 	spin_unlock_bh(&ptype_lock);
310 }
311 /**
312  *	dev_remove_pack	 - remove packet handler
313  *	@pt: packet type declaration
314  *
315  *	Remove a protocol handler that was previously added to the kernel
316  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
317  *	from the kernel lists and can be freed or reused once this function
318  *	returns.
319  *
320  *	This call sleeps to guarantee that no CPU is looking at the packet
321  *	type after return.
322  */
323 void dev_remove_pack(struct packet_type *pt)
324 {
325 	__dev_remove_pack(pt);
326 
327 	synchronize_net();
328 }
329 
330 /******************************************************************************
331 
332 		      Device Boot-time Settings Routines
333 
334 *******************************************************************************/
335 
336 /* Boot time configuration table */
337 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
338 
339 /**
340  *	netdev_boot_setup_add	- add new setup entry
341  *	@name: name of the device
342  *	@map: configured settings for the device
343  *
344  *	Adds new setup entry to the dev_boot_setup list.  The function
345  *	returns 0 on error and 1 on success.  This is a generic routine to
346  *	all netdevices.
347  */
348 static int netdev_boot_setup_add(char *name, struct ifmap *map)
349 {
350 	struct netdev_boot_setup *s;
351 	int i;
352 
353 	s = dev_boot_setup;
354 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
355 		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
356 			memset(s[i].name, 0, sizeof(s[i].name));
357 			strcpy(s[i].name, name);
358 			memcpy(&s[i].map, map, sizeof(s[i].map));
359 			break;
360 		}
361 	}
362 
363 	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
364 }
365 
366 /**
367  *	netdev_boot_setup_check	- check boot time settings
368  *	@dev: the netdevice
369  *
370  * 	Check boot time settings for the device.
371  *	The found settings are set for the device to be used
372  *	later in the device probing.
373  *	Returns 0 if no settings found, 1 if they are.
374  */
375 int netdev_boot_setup_check(struct net_device *dev)
376 {
377 	struct netdev_boot_setup *s = dev_boot_setup;
378 	int i;
379 
380 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
381 		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
382 		    !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
383 			dev->irq 	= s[i].map.irq;
384 			dev->base_addr 	= s[i].map.base_addr;
385 			dev->mem_start 	= s[i].map.mem_start;
386 			dev->mem_end 	= s[i].map.mem_end;
387 			return 1;
388 		}
389 	}
390 	return 0;
391 }
392 
393 
394 /**
395  *	netdev_boot_base	- get address from boot time settings
396  *	@prefix: prefix for network device
397  *	@unit: id for network device
398  *
399  * 	Check boot time settings for the base address of device.
400  *	The found settings are set for the device to be used
401  *	later in the device probing.
402  *	Returns 0 if no settings found.
403  */
404 unsigned long netdev_boot_base(const char *prefix, int unit)
405 {
406 	const struct netdev_boot_setup *s = dev_boot_setup;
407 	char name[IFNAMSIZ];
408 	int i;
409 
410 	sprintf(name, "%s%d", prefix, unit);
411 
412 	/*
413 	 * If device already registered then return base of 1
414 	 * to indicate not to probe for this interface
415 	 */
416 	if (__dev_get_by_name(name))
417 		return 1;
418 
419 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
420 		if (!strcmp(name, s[i].name))
421 			return s[i].map.base_addr;
422 	return 0;
423 }
424 
425 /*
426  * Saves at boot time configured settings for any netdevice.
427  */
428 int __init netdev_boot_setup(char *str)
429 {
430 	int ints[5];
431 	struct ifmap map;
432 
433 	str = get_options(str, ARRAY_SIZE(ints), ints);
434 	if (!str || !*str)
435 		return 0;
436 
437 	/* Save settings */
438 	memset(&map, 0, sizeof(map));
439 	if (ints[0] > 0)
440 		map.irq = ints[1];
441 	if (ints[0] > 1)
442 		map.base_addr = ints[2];
443 	if (ints[0] > 2)
444 		map.mem_start = ints[3];
445 	if (ints[0] > 3)
446 		map.mem_end = ints[4];
447 
448 	/* Add new entry to the list */
449 	return netdev_boot_setup_add(str, &map);
450 }
451 
452 __setup("netdev=", netdev_boot_setup);
453 
454 /*******************************************************************************
455 
456 			    Device Interface Subroutines
457 
458 *******************************************************************************/
459 
460 /**
461  *	__dev_get_by_name	- find a device by its name
462  *	@name: name to find
463  *
464  *	Find an interface by name. Must be called under RTNL semaphore
465  *	or @dev_base_lock. If the name is found a pointer to the device
466  *	is returned. If the name is not found then %NULL is returned. The
467  *	reference counters are not incremented so the caller must be
468  *	careful with locks.
469  */
470 
471 struct net_device *__dev_get_by_name(const char *name)
472 {
473 	struct hlist_node *p;
474 
475 	hlist_for_each(p, dev_name_hash(name)) {
476 		struct net_device *dev
477 			= hlist_entry(p, struct net_device, name_hlist);
478 		if (!strncmp(dev->name, name, IFNAMSIZ))
479 			return dev;
480 	}
481 	return NULL;
482 }
483 
484 /**
485  *	dev_get_by_name		- find a device by its name
486  *	@name: name to find
487  *
488  *	Find an interface by name. This can be called from any
489  *	context and does its own locking. The returned handle has
490  *	the usage count incremented and the caller must use dev_put() to
491  *	release it when it is no longer needed. %NULL is returned if no
492  *	matching device is found.
493  */
494 
495 struct net_device *dev_get_by_name(const char *name)
496 {
497 	struct net_device *dev;
498 
499 	read_lock(&dev_base_lock);
500 	dev = __dev_get_by_name(name);
501 	if (dev)
502 		dev_hold(dev);
503 	read_unlock(&dev_base_lock);
504 	return dev;
505 }
506 
507 /**
508  *	__dev_get_by_index - find a device by its ifindex
509  *	@ifindex: index of device
510  *
511  *	Search for an interface by index. Returns %NULL if the device
512  *	is not found or a pointer to the device. The device has not
513  *	had its reference counter increased so the caller must be careful
514  *	about locking. The caller must hold either the RTNL semaphore
515  *	or @dev_base_lock.
516  */
517 
518 struct net_device *__dev_get_by_index(int ifindex)
519 {
520 	struct hlist_node *p;
521 
522 	hlist_for_each(p, dev_index_hash(ifindex)) {
523 		struct net_device *dev
524 			= hlist_entry(p, struct net_device, index_hlist);
525 		if (dev->ifindex == ifindex)
526 			return dev;
527 	}
528 	return NULL;
529 }
530 
531 
532 /**
533  *	dev_get_by_index - find a device by its ifindex
534  *	@ifindex: index of device
535  *
536  *	Search for an interface by index. Returns NULL if the device
537  *	is not found or a pointer to the device. The device returned has
538  *	had a reference added and the pointer is safe until the user calls
539  *	dev_put to indicate they have finished with it.
540  */
541 
542 struct net_device *dev_get_by_index(int ifindex)
543 {
544 	struct net_device *dev;
545 
546 	read_lock(&dev_base_lock);
547 	dev = __dev_get_by_index(ifindex);
548 	if (dev)
549 		dev_hold(dev);
550 	read_unlock(&dev_base_lock);
551 	return dev;
552 }
553 
554 /**
555  *	dev_getbyhwaddr - find a device by its hardware address
556  *	@type: media type of device
557  *	@ha: hardware address
558  *
559  *	Search for an interface by MAC address. Returns NULL if the device
560  *	is not found or a pointer to the device. The caller must hold the
561  *	rtnl semaphore. The returned device has not had its ref count increased
562  *	and the caller must therefore be careful about locking
563  *
564  *	BUGS:
565  *	If the API was consistent this would be __dev_get_by_hwaddr
566  */
567 
568 struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
569 {
570 	struct net_device *dev;
571 
572 	ASSERT_RTNL();
573 
574 	for (dev = dev_base; dev; dev = dev->next)
575 		if (dev->type == type &&
576 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
577 			break;
578 	return dev;
579 }
580 
581 struct net_device *dev_getfirstbyhwtype(unsigned short type)
582 {
583 	struct net_device *dev;
584 
585 	rtnl_lock();
586 	for (dev = dev_base; dev; dev = dev->next) {
587 		if (dev->type == type) {
588 			dev_hold(dev);
589 			break;
590 		}
591 	}
592 	rtnl_unlock();
593 	return dev;
594 }
595 
596 EXPORT_SYMBOL(dev_getfirstbyhwtype);
597 
598 /**
599  *	dev_get_by_flags - find any device with given flags
600  *	@if_flags: IFF_* values
601  *	@mask: bitmask of bits in if_flags to check
602  *
603  *	Search for any interface with the given flags. Returns NULL if a device
604  *	is not found or a pointer to the device. The device returned has
605  *	had a reference added and the pointer is safe until the user calls
606  *	dev_put to indicate they have finished with it.
607  */
608 
609 struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
610 {
611 	struct net_device *dev;
612 
613 	read_lock(&dev_base_lock);
614 	for (dev = dev_base; dev != NULL; dev = dev->next) {
615 		if (((dev->flags ^ if_flags) & mask) == 0) {
616 			dev_hold(dev);
617 			break;
618 		}
619 	}
620 	read_unlock(&dev_base_lock);
621 	return dev;
622 }
623 
624 /**
625  *	dev_valid_name - check if name is okay for network device
626  *	@name: name string
627  *
628  *	Network device names need to be valid file names to
629  *	to allow sysfs to work
630  */
631 static int dev_valid_name(const char *name)
632 {
633 	return !(*name == '\0'
634 		 || !strcmp(name, ".")
635 		 || !strcmp(name, "..")
636 		 || strchr(name, '/'));
637 }
638 
639 /**
640  *	dev_alloc_name - allocate a name for a device
641  *	@dev: device
642  *	@name: name format string
643  *
644  *	Passed a format string - eg "lt%d" it will try and find a suitable
645  *	id. Not efficient for many devices, not called a lot. The caller
646  *	must hold the dev_base or rtnl lock while allocating the name and
647  *	adding the device in order to avoid duplicates. Returns the number
648  *	of the unit assigned or a negative errno code.
649  */
650 
651 int dev_alloc_name(struct net_device *dev, const char *name)
652 {
653 	int i = 0;
654 	char buf[IFNAMSIZ];
655 	const char *p;
656 	const int max_netdevices = 8*PAGE_SIZE;
657 	long *inuse;
658 	struct net_device *d;
659 
660 	p = strnchr(name, IFNAMSIZ-1, '%');
661 	if (p) {
662 		/*
663 		 * Verify the string as this thing may have come from
664 		 * the user.  There must be either one "%d" and no other "%"
665 		 * characters.
666 		 */
667 		if (p[1] != 'd' || strchr(p + 2, '%'))
668 			return -EINVAL;
669 
670 		/* Use one page as a bit array of possible slots */
671 		inuse = (long *) get_zeroed_page(GFP_ATOMIC);
672 		if (!inuse)
673 			return -ENOMEM;
674 
675 		for (d = dev_base; d; d = d->next) {
676 			if (!sscanf(d->name, name, &i))
677 				continue;
678 			if (i < 0 || i >= max_netdevices)
679 				continue;
680 
681 			/*  avoid cases where sscanf is not exact inverse of printf */
682 			snprintf(buf, sizeof(buf), name, i);
683 			if (!strncmp(buf, d->name, IFNAMSIZ))
684 				set_bit(i, inuse);
685 		}
686 
687 		i = find_first_zero_bit(inuse, max_netdevices);
688 		free_page((unsigned long) inuse);
689 	}
690 
691 	snprintf(buf, sizeof(buf), name, i);
692 	if (!__dev_get_by_name(buf)) {
693 		strlcpy(dev->name, buf, IFNAMSIZ);
694 		return i;
695 	}
696 
697 	/* It is possible to run out of possible slots
698 	 * when the name is long and there isn't enough space left
699 	 * for the digits, or if all bits are used.
700 	 */
701 	return -ENFILE;
702 }
703 
704 
705 /**
706  *	dev_change_name - change name of a device
707  *	@dev: device
708  *	@newname: name (or format string) must be at least IFNAMSIZ
709  *
710  *	Change name of a device, can pass format strings "eth%d".
711  *	for wildcarding.
712  */
713 int dev_change_name(struct net_device *dev, char *newname)
714 {
715 	int err = 0;
716 
717 	ASSERT_RTNL();
718 
719 	if (dev->flags & IFF_UP)
720 		return -EBUSY;
721 
722 	if (!dev_valid_name(newname))
723 		return -EINVAL;
724 
725 	if (strchr(newname, '%')) {
726 		err = dev_alloc_name(dev, newname);
727 		if (err < 0)
728 			return err;
729 		strcpy(newname, dev->name);
730 	}
731 	else if (__dev_get_by_name(newname))
732 		return -EEXIST;
733 	else
734 		strlcpy(dev->name, newname, IFNAMSIZ);
735 
736 	err = class_device_rename(&dev->class_dev, dev->name);
737 	if (!err) {
738 		hlist_del(&dev->name_hlist);
739 		hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
740 		notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
741 	}
742 
743 	return err;
744 }
745 
746 /**
747  *	netdev_features_change - device changes fatures
748  *	@dev: device to cause notification
749  *
750  *	Called to indicate a device has changed features.
751  */
752 void netdev_features_change(struct net_device *dev)
753 {
754 	notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
755 }
756 EXPORT_SYMBOL(netdev_features_change);
757 
758 /**
759  *	netdev_state_change - device changes state
760  *	@dev: device to cause notification
761  *
762  *	Called to indicate a device has changed state. This function calls
763  *	the notifier chains for netdev_chain and sends a NEWLINK message
764  *	to the routing socket.
765  */
766 void netdev_state_change(struct net_device *dev)
767 {
768 	if (dev->flags & IFF_UP) {
769 		notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
770 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
771 	}
772 }
773 
774 /**
775  *	dev_load 	- load a network module
776  *	@name: name of interface
777  *
778  *	If a network interface is not present and the process has suitable
779  *	privileges this function loads the module. If module loading is not
780  *	available in this kernel then it becomes a nop.
781  */
782 
783 void dev_load(const char *name)
784 {
785 	struct net_device *dev;
786 
787 	read_lock(&dev_base_lock);
788 	dev = __dev_get_by_name(name);
789 	read_unlock(&dev_base_lock);
790 
791 	if (!dev && capable(CAP_SYS_MODULE))
792 		request_module("%s", name);
793 }
794 
795 static int default_rebuild_header(struct sk_buff *skb)
796 {
797 	printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
798 	       skb->dev ? skb->dev->name : "NULL!!!");
799 	kfree_skb(skb);
800 	return 1;
801 }
802 
803 
804 /**
805  *	dev_open	- prepare an interface for use.
806  *	@dev:	device to open
807  *
808  *	Takes a device from down to up state. The device's private open
809  *	function is invoked and then the multicast lists are loaded. Finally
810  *	the device is moved into the up state and a %NETDEV_UP message is
811  *	sent to the netdev notifier chain.
812  *
813  *	Calling this function on an active interface is a nop. On a failure
814  *	a negative errno code is returned.
815  */
816 int dev_open(struct net_device *dev)
817 {
818 	int ret = 0;
819 
820 	/*
821 	 *	Is it already up?
822 	 */
823 
824 	if (dev->flags & IFF_UP)
825 		return 0;
826 
827 	/*
828 	 *	Is it even present?
829 	 */
830 	if (!netif_device_present(dev))
831 		return -ENODEV;
832 
833 	/*
834 	 *	Call device private open method
835 	 */
836 	set_bit(__LINK_STATE_START, &dev->state);
837 	if (dev->open) {
838 		ret = dev->open(dev);
839 		if (ret)
840 			clear_bit(__LINK_STATE_START, &dev->state);
841 	}
842 
843  	/*
844 	 *	If it went open OK then:
845 	 */
846 
847 	if (!ret) {
848 		/*
849 		 *	Set the flags.
850 		 */
851 		dev->flags |= IFF_UP;
852 
853 		/*
854 		 *	Initialize multicasting status
855 		 */
856 		dev_mc_upload(dev);
857 
858 		/*
859 		 *	Wakeup transmit queue engine
860 		 */
861 		dev_activate(dev);
862 
863 		/*
864 		 *	... and announce new interface.
865 		 */
866 		notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
867 	}
868 	return ret;
869 }
870 
871 /**
872  *	dev_close - shutdown an interface.
873  *	@dev: device to shutdown
874  *
875  *	This function moves an active device into down state. A
876  *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
877  *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
878  *	chain.
879  */
880 int dev_close(struct net_device *dev)
881 {
882 	if (!(dev->flags & IFF_UP))
883 		return 0;
884 
885 	/*
886 	 *	Tell people we are going down, so that they can
887 	 *	prepare to death, when device is still operating.
888 	 */
889 	notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
890 
891 	dev_deactivate(dev);
892 
893 	clear_bit(__LINK_STATE_START, &dev->state);
894 
895 	/* Synchronize to scheduled poll. We cannot touch poll list,
896 	 * it can be even on different cpu. So just clear netif_running(),
897 	 * and wait when poll really will happen. Actually, the best place
898 	 * for this is inside dev->stop() after device stopped its irq
899 	 * engine, but this requires more changes in devices. */
900 
901 	smp_mb__after_clear_bit(); /* Commit netif_running(). */
902 	while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
903 		/* No hurry. */
904 		current->state = TASK_INTERRUPTIBLE;
905 		schedule_timeout(1);
906 	}
907 
908 	/*
909 	 *	Call the device specific close. This cannot fail.
910 	 *	Only if device is UP
911 	 *
912 	 *	We allow it to be called even after a DETACH hot-plug
913 	 *	event.
914 	 */
915 	if (dev->stop)
916 		dev->stop(dev);
917 
918 	/*
919 	 *	Device is now down.
920 	 */
921 
922 	dev->flags &= ~IFF_UP;
923 
924 	/*
925 	 * Tell people we are down
926 	 */
927 	notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
928 
929 	return 0;
930 }
931 
932 
933 /*
934  *	Device change register/unregister. These are not inline or static
935  *	as we export them to the world.
936  */
937 
938 /**
939  *	register_netdevice_notifier - register a network notifier block
940  *	@nb: notifier
941  *
942  *	Register a notifier to be called when network device events occur.
943  *	The notifier passed is linked into the kernel structures and must
944  *	not be reused until it has been unregistered. A negative errno code
945  *	is returned on a failure.
946  *
947  * 	When registered all registration and up events are replayed
948  *	to the new notifier to allow device to have a race free
949  *	view of the network device list.
950  */
951 
952 int register_netdevice_notifier(struct notifier_block *nb)
953 {
954 	struct net_device *dev;
955 	int err;
956 
957 	rtnl_lock();
958 	err = notifier_chain_register(&netdev_chain, nb);
959 	if (!err) {
960 		for (dev = dev_base; dev; dev = dev->next) {
961 			nb->notifier_call(nb, NETDEV_REGISTER, dev);
962 
963 			if (dev->flags & IFF_UP)
964 				nb->notifier_call(nb, NETDEV_UP, dev);
965 		}
966 	}
967 	rtnl_unlock();
968 	return err;
969 }
970 
971 /**
972  *	unregister_netdevice_notifier - unregister a network notifier block
973  *	@nb: notifier
974  *
975  *	Unregister a notifier previously registered by
976  *	register_netdevice_notifier(). The notifier is unlinked into the
977  *	kernel structures and may then be reused. A negative errno code
978  *	is returned on a failure.
979  */
980 
981 int unregister_netdevice_notifier(struct notifier_block *nb)
982 {
983 	return notifier_chain_unregister(&netdev_chain, nb);
984 }
985 
986 /**
987  *	call_netdevice_notifiers - call all network notifier blocks
988  *      @val: value passed unmodified to notifier function
989  *      @v:   pointer passed unmodified to notifier function
990  *
991  *	Call all network notifier blocks.  Parameters and return value
992  *	are as for notifier_call_chain().
993  */
994 
995 int call_netdevice_notifiers(unsigned long val, void *v)
996 {
997 	return notifier_call_chain(&netdev_chain, val, v);
998 }
999 
1000 /* When > 0 there are consumers of rx skb time stamps */
1001 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1002 
1003 void net_enable_timestamp(void)
1004 {
1005 	atomic_inc(&netstamp_needed);
1006 }
1007 
1008 void net_disable_timestamp(void)
1009 {
1010 	atomic_dec(&netstamp_needed);
1011 }
1012 
1013 static inline void net_timestamp(struct timeval *stamp)
1014 {
1015 	if (atomic_read(&netstamp_needed))
1016 		do_gettimeofday(stamp);
1017 	else {
1018 		stamp->tv_sec = 0;
1019 		stamp->tv_usec = 0;
1020 	}
1021 }
1022 
1023 /*
1024  *	Support routine. Sends outgoing frames to any network
1025  *	taps currently in use.
1026  */
1027 
1028 void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1029 {
1030 	struct packet_type *ptype;
1031 	net_timestamp(&skb->stamp);
1032 
1033 	rcu_read_lock();
1034 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1035 		/* Never send packets back to the socket
1036 		 * they originated from - MvS (miquels@drinkel.ow.org)
1037 		 */
1038 		if ((ptype->dev == dev || !ptype->dev) &&
1039 		    (ptype->af_packet_priv == NULL ||
1040 		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
1041 			struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1042 			if (!skb2)
1043 				break;
1044 
1045 			/* skb->nh should be correctly
1046 			   set by sender, so that the second statement is
1047 			   just protection against buggy protocols.
1048 			 */
1049 			skb2->mac.raw = skb2->data;
1050 
1051 			if (skb2->nh.raw < skb2->data ||
1052 			    skb2->nh.raw > skb2->tail) {
1053 				if (net_ratelimit())
1054 					printk(KERN_CRIT "protocol %04x is "
1055 					       "buggy, dev %s\n",
1056 					       skb2->protocol, dev->name);
1057 				skb2->nh.raw = skb2->data;
1058 			}
1059 
1060 			skb2->h.raw = skb2->nh.raw;
1061 			skb2->pkt_type = PACKET_OUTGOING;
1062 			ptype->func(skb2, skb->dev, ptype);
1063 		}
1064 	}
1065 	rcu_read_unlock();
1066 }
1067 
1068 /*
1069  * Invalidate hardware checksum when packet is to be mangled, and
1070  * complete checksum manually on outgoing path.
1071  */
1072 int skb_checksum_help(struct sk_buff *skb, int inward)
1073 {
1074 	unsigned int csum;
1075 	int ret = 0, offset = skb->h.raw - skb->data;
1076 
1077 	if (inward) {
1078 		skb->ip_summed = CHECKSUM_NONE;
1079 		goto out;
1080 	}
1081 
1082 	if (skb_cloned(skb)) {
1083 		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1084 		if (ret)
1085 			goto out;
1086 	}
1087 
1088 	if (offset > (int)skb->len)
1089 		BUG();
1090 	csum = skb_checksum(skb, offset, skb->len-offset, 0);
1091 
1092 	offset = skb->tail - skb->h.raw;
1093 	if (offset <= 0)
1094 		BUG();
1095 	if (skb->csum + 2 > offset)
1096 		BUG();
1097 
1098 	*(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
1099 	skb->ip_summed = CHECKSUM_NONE;
1100 out:
1101 	return ret;
1102 }
1103 
1104 #ifdef CONFIG_HIGHMEM
1105 /* Actually, we should eliminate this check as soon as we know, that:
1106  * 1. IOMMU is present and allows to map all the memory.
1107  * 2. No high memory really exists on this machine.
1108  */
1109 
1110 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1111 {
1112 	int i;
1113 
1114 	if (dev->features & NETIF_F_HIGHDMA)
1115 		return 0;
1116 
1117 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1118 		if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1119 			return 1;
1120 
1121 	return 0;
1122 }
1123 #else
1124 #define illegal_highdma(dev, skb)	(0)
1125 #endif
1126 
1127 extern void skb_release_data(struct sk_buff *);
1128 
1129 /* Keep head the same: replace data */
1130 int __skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp_mask)
1131 {
1132 	unsigned int size;
1133 	u8 *data;
1134 	long offset;
1135 	struct skb_shared_info *ninfo;
1136 	int headerlen = skb->data - skb->head;
1137 	int expand = (skb->tail + skb->data_len) - skb->end;
1138 
1139 	if (skb_shared(skb))
1140 		BUG();
1141 
1142 	if (expand <= 0)
1143 		expand = 0;
1144 
1145 	size = skb->end - skb->head + expand;
1146 	size = SKB_DATA_ALIGN(size);
1147 	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
1148 	if (!data)
1149 		return -ENOMEM;
1150 
1151 	/* Copy entire thing */
1152 	if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len))
1153 		BUG();
1154 
1155 	/* Set up shinfo */
1156 	ninfo = (struct skb_shared_info*)(data + size);
1157 	atomic_set(&ninfo->dataref, 1);
1158 	ninfo->tso_size = skb_shinfo(skb)->tso_size;
1159 	ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
1160 	ninfo->nr_frags = 0;
1161 	ninfo->frag_list = NULL;
1162 
1163 	/* Offset between the two in bytes */
1164 	offset = data - skb->head;
1165 
1166 	/* Free old data. */
1167 	skb_release_data(skb);
1168 
1169 	skb->head = data;
1170 	skb->end  = data + size;
1171 
1172 	/* Set up new pointers */
1173 	skb->h.raw   += offset;
1174 	skb->nh.raw  += offset;
1175 	skb->mac.raw += offset;
1176 	skb->tail    += offset;
1177 	skb->data    += offset;
1178 
1179 	/* We are no longer a clone, even if we were. */
1180 	skb->cloned    = 0;
1181 
1182 	skb->tail     += skb->data_len;
1183 	skb->data_len  = 0;
1184 	return 0;
1185 }
1186 
1187 #define HARD_TX_LOCK(dev, cpu) {			\
1188 	if ((dev->features & NETIF_F_LLTX) == 0) {	\
1189 		spin_lock(&dev->xmit_lock);		\
1190 		dev->xmit_lock_owner = cpu;		\
1191 	}						\
1192 }
1193 
1194 #define HARD_TX_UNLOCK(dev) {				\
1195 	if ((dev->features & NETIF_F_LLTX) == 0) {	\
1196 		dev->xmit_lock_owner = -1;		\
1197 		spin_unlock(&dev->xmit_lock);		\
1198 	}						\
1199 }
1200 
1201 /**
1202  *	dev_queue_xmit - transmit a buffer
1203  *	@skb: buffer to transmit
1204  *
1205  *	Queue a buffer for transmission to a network device. The caller must
1206  *	have set the device and priority and built the buffer before calling
1207  *	this function. The function can be called from an interrupt.
1208  *
1209  *	A negative errno code is returned on a failure. A success does not
1210  *	guarantee the frame will be transmitted as it may be dropped due
1211  *	to congestion or traffic shaping.
1212  *
1213  * -----------------------------------------------------------------------------------
1214  *      I notice this method can also return errors from the queue disciplines,
1215  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1216  *      be positive.
1217  *
1218  *      Regardless of the return value, the skb is consumed, so it is currently
1219  *      difficult to retry a send to this method.  (You can bump the ref count
1220  *      before sending to hold a reference for retry if you are careful.)
1221  *
1222  *      When calling this method, interrupts MUST be enabled.  This is because
1223  *      the BH enable code must have IRQs enabled so that it will not deadlock.
1224  *          --BLG
1225  */
1226 
1227 int dev_queue_xmit(struct sk_buff *skb)
1228 {
1229 	struct net_device *dev = skb->dev;
1230 	struct Qdisc *q;
1231 	int rc = -ENOMEM;
1232 
1233 	if (skb_shinfo(skb)->frag_list &&
1234 	    !(dev->features & NETIF_F_FRAGLIST) &&
1235 	    __skb_linearize(skb, GFP_ATOMIC))
1236 		goto out_kfree_skb;
1237 
1238 	/* Fragmented skb is linearized if device does not support SG,
1239 	 * or if at least one of fragments is in highmem and device
1240 	 * does not support DMA from it.
1241 	 */
1242 	if (skb_shinfo(skb)->nr_frags &&
1243 	    (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1244 	    __skb_linearize(skb, GFP_ATOMIC))
1245 		goto out_kfree_skb;
1246 
1247 	/* If packet is not checksummed and device does not support
1248 	 * checksumming for this protocol, complete checksumming here.
1249 	 */
1250 	if (skb->ip_summed == CHECKSUM_HW &&
1251 	    (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) &&
1252 	     (!(dev->features & NETIF_F_IP_CSUM) ||
1253 	      skb->protocol != htons(ETH_P_IP))))
1254 	      	if (skb_checksum_help(skb, 0))
1255 	      		goto out_kfree_skb;
1256 
1257 	/* Disable soft irqs for various locks below. Also
1258 	 * stops preemption for RCU.
1259 	 */
1260 	local_bh_disable();
1261 
1262 	/* Updates of qdisc are serialized by queue_lock.
1263 	 * The struct Qdisc which is pointed to by qdisc is now a
1264 	 * rcu structure - it may be accessed without acquiring
1265 	 * a lock (but the structure may be stale.) The freeing of the
1266 	 * qdisc will be deferred until it's known that there are no
1267 	 * more references to it.
1268 	 *
1269 	 * If the qdisc has an enqueue function, we still need to
1270 	 * hold the queue_lock before calling it, since queue_lock
1271 	 * also serializes access to the device queue.
1272 	 */
1273 
1274 	q = rcu_dereference(dev->qdisc);
1275 #ifdef CONFIG_NET_CLS_ACT
1276 	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1277 #endif
1278 	if (q->enqueue) {
1279 		/* Grab device queue */
1280 		spin_lock(&dev->queue_lock);
1281 
1282 		rc = q->enqueue(skb, q);
1283 
1284 		qdisc_run(dev);
1285 
1286 		spin_unlock(&dev->queue_lock);
1287 		rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1288 		goto out;
1289 	}
1290 
1291 	/* The device has no queue. Common case for software devices:
1292 	   loopback, all the sorts of tunnels...
1293 
1294 	   Really, it is unlikely that xmit_lock protection is necessary here.
1295 	   (f.e. loopback and IP tunnels are clean ignoring statistics
1296 	   counters.)
1297 	   However, it is possible, that they rely on protection
1298 	   made by us here.
1299 
1300 	   Check this and shot the lock. It is not prone from deadlocks.
1301 	   Either shot noqueue qdisc, it is even simpler 8)
1302 	 */
1303 	if (dev->flags & IFF_UP) {
1304 		int cpu = smp_processor_id(); /* ok because BHs are off */
1305 
1306 		if (dev->xmit_lock_owner != cpu) {
1307 
1308 			HARD_TX_LOCK(dev, cpu);
1309 
1310 			if (!netif_queue_stopped(dev)) {
1311 				if (netdev_nit)
1312 					dev_queue_xmit_nit(skb, dev);
1313 
1314 				rc = 0;
1315 				if (!dev->hard_start_xmit(skb, dev)) {
1316 					HARD_TX_UNLOCK(dev);
1317 					goto out;
1318 				}
1319 			}
1320 			HARD_TX_UNLOCK(dev);
1321 			if (net_ratelimit())
1322 				printk(KERN_CRIT "Virtual device %s asks to "
1323 				       "queue packet!\n", dev->name);
1324 		} else {
1325 			/* Recursion is detected! It is possible,
1326 			 * unfortunately */
1327 			if (net_ratelimit())
1328 				printk(KERN_CRIT "Dead loop on virtual device "
1329 				       "%s, fix it urgently!\n", dev->name);
1330 		}
1331 	}
1332 
1333 	rc = -ENETDOWN;
1334 	local_bh_enable();
1335 
1336 out_kfree_skb:
1337 	kfree_skb(skb);
1338 	return rc;
1339 out:
1340 	local_bh_enable();
1341 	return rc;
1342 }
1343 
1344 
1345 /*=======================================================================
1346 			Receiver routines
1347   =======================================================================*/
1348 
1349 int netdev_max_backlog = 1000;
1350 int netdev_budget = 300;
1351 int weight_p = 64;            /* old backlog weight */
1352 
1353 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1354 
1355 
1356 /**
1357  *	netif_rx	-	post buffer to the network code
1358  *	@skb: buffer to post
1359  *
1360  *	This function receives a packet from a device driver and queues it for
1361  *	the upper (protocol) levels to process.  It always succeeds. The buffer
1362  *	may be dropped during processing for congestion control or by the
1363  *	protocol layers.
1364  *
1365  *	return values:
1366  *	NET_RX_SUCCESS	(no congestion)
1367  *	NET_RX_CN_LOW   (low congestion)
1368  *	NET_RX_CN_MOD   (moderate congestion)
1369  *	NET_RX_CN_HIGH  (high congestion)
1370  *	NET_RX_DROP     (packet was dropped)
1371  *
1372  */
1373 
1374 int netif_rx(struct sk_buff *skb)
1375 {
1376 	struct softnet_data *queue;
1377 	unsigned long flags;
1378 
1379 	/* if netpoll wants it, pretend we never saw it */
1380 	if (netpoll_rx(skb))
1381 		return NET_RX_DROP;
1382 
1383 	if (!skb->stamp.tv_sec)
1384 		net_timestamp(&skb->stamp);
1385 
1386 	/*
1387 	 * The code is rearranged so that the path is the most
1388 	 * short when CPU is congested, but is still operating.
1389 	 */
1390 	local_irq_save(flags);
1391 	queue = &__get_cpu_var(softnet_data);
1392 
1393 	__get_cpu_var(netdev_rx_stat).total++;
1394 	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1395 		if (queue->input_pkt_queue.qlen) {
1396 enqueue:
1397 			dev_hold(skb->dev);
1398 			__skb_queue_tail(&queue->input_pkt_queue, skb);
1399 			local_irq_restore(flags);
1400 			return NET_RX_SUCCESS;
1401 		}
1402 
1403 		netif_rx_schedule(&queue->backlog_dev);
1404 		goto enqueue;
1405 	}
1406 
1407 	__get_cpu_var(netdev_rx_stat).dropped++;
1408 	local_irq_restore(flags);
1409 
1410 	kfree_skb(skb);
1411 	return NET_RX_DROP;
1412 }
1413 
1414 int netif_rx_ni(struct sk_buff *skb)
1415 {
1416 	int err;
1417 
1418 	preempt_disable();
1419 	err = netif_rx(skb);
1420 	if (local_softirq_pending())
1421 		do_softirq();
1422 	preempt_enable();
1423 
1424 	return err;
1425 }
1426 
1427 EXPORT_SYMBOL(netif_rx_ni);
1428 
1429 static __inline__ void skb_bond(struct sk_buff *skb)
1430 {
1431 	struct net_device *dev = skb->dev;
1432 
1433 	if (dev->master) {
1434 		skb->real_dev = skb->dev;
1435 		skb->dev = dev->master;
1436 	}
1437 }
1438 
1439 static void net_tx_action(struct softirq_action *h)
1440 {
1441 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
1442 
1443 	if (sd->completion_queue) {
1444 		struct sk_buff *clist;
1445 
1446 		local_irq_disable();
1447 		clist = sd->completion_queue;
1448 		sd->completion_queue = NULL;
1449 		local_irq_enable();
1450 
1451 		while (clist) {
1452 			struct sk_buff *skb = clist;
1453 			clist = clist->next;
1454 
1455 			BUG_TRAP(!atomic_read(&skb->users));
1456 			__kfree_skb(skb);
1457 		}
1458 	}
1459 
1460 	if (sd->output_queue) {
1461 		struct net_device *head;
1462 
1463 		local_irq_disable();
1464 		head = sd->output_queue;
1465 		sd->output_queue = NULL;
1466 		local_irq_enable();
1467 
1468 		while (head) {
1469 			struct net_device *dev = head;
1470 			head = head->next_sched;
1471 
1472 			smp_mb__before_clear_bit();
1473 			clear_bit(__LINK_STATE_SCHED, &dev->state);
1474 
1475 			if (spin_trylock(&dev->queue_lock)) {
1476 				qdisc_run(dev);
1477 				spin_unlock(&dev->queue_lock);
1478 			} else {
1479 				netif_schedule(dev);
1480 			}
1481 		}
1482 	}
1483 }
1484 
1485 static __inline__ int deliver_skb(struct sk_buff *skb,
1486 				  struct packet_type *pt_prev)
1487 {
1488 	atomic_inc(&skb->users);
1489 	return pt_prev->func(skb, skb->dev, pt_prev);
1490 }
1491 
1492 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
1493 int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
1494 struct net_bridge;
1495 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1496 						unsigned char *addr);
1497 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
1498 
1499 static __inline__ int handle_bridge(struct sk_buff **pskb,
1500 				    struct packet_type **pt_prev, int *ret)
1501 {
1502 	struct net_bridge_port *port;
1503 
1504 	if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
1505 	    (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
1506 		return 0;
1507 
1508 	if (*pt_prev) {
1509 		*ret = deliver_skb(*pskb, *pt_prev);
1510 		*pt_prev = NULL;
1511 	}
1512 
1513 	return br_handle_frame_hook(port, pskb);
1514 }
1515 #else
1516 #define handle_bridge(skb, pt_prev, ret)	(0)
1517 #endif
1518 
1519 #ifdef CONFIG_NET_CLS_ACT
1520 /* TODO: Maybe we should just force sch_ingress to be compiled in
1521  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1522  * a compare and 2 stores extra right now if we dont have it on
1523  * but have CONFIG_NET_CLS_ACT
1524  * NOTE: This doesnt stop any functionality; if you dont have
1525  * the ingress scheduler, you just cant add policies on ingress.
1526  *
1527  */
1528 static int ing_filter(struct sk_buff *skb)
1529 {
1530 	struct Qdisc *q;
1531 	struct net_device *dev = skb->dev;
1532 	int result = TC_ACT_OK;
1533 
1534 	if (dev->qdisc_ingress) {
1535 		__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
1536 		if (MAX_RED_LOOP < ttl++) {
1537 			printk("Redir loop detected Dropping packet (%s->%s)\n",
1538 				skb->input_dev?skb->input_dev->name:"??",skb->dev->name);
1539 			return TC_ACT_SHOT;
1540 		}
1541 
1542 		skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
1543 
1544 		skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
1545 		if (NULL == skb->input_dev) {
1546 			skb->input_dev = skb->dev;
1547 			printk("ing_filter:  fixed  %s out %s\n",skb->input_dev->name,skb->dev->name);
1548 		}
1549 		spin_lock(&dev->ingress_lock);
1550 		if ((q = dev->qdisc_ingress) != NULL)
1551 			result = q->enqueue(skb, q);
1552 		spin_unlock(&dev->ingress_lock);
1553 
1554 	}
1555 
1556 	return result;
1557 }
1558 #endif
1559 
1560 int netif_receive_skb(struct sk_buff *skb)
1561 {
1562 	struct packet_type *ptype, *pt_prev;
1563 	int ret = NET_RX_DROP;
1564 	unsigned short type;
1565 
1566 	/* if we've gotten here through NAPI, check netpoll */
1567 	if (skb->dev->poll && netpoll_rx(skb))
1568 		return NET_RX_DROP;
1569 
1570 	if (!skb->stamp.tv_sec)
1571 		net_timestamp(&skb->stamp);
1572 
1573 	skb_bond(skb);
1574 
1575 	__get_cpu_var(netdev_rx_stat).total++;
1576 
1577 	skb->h.raw = skb->nh.raw = skb->data;
1578 	skb->mac_len = skb->nh.raw - skb->mac.raw;
1579 
1580 	pt_prev = NULL;
1581 
1582 	rcu_read_lock();
1583 
1584 #ifdef CONFIG_NET_CLS_ACT
1585 	if (skb->tc_verd & TC_NCLS) {
1586 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
1587 		goto ncls;
1588 	}
1589 #endif
1590 
1591 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1592 		if (!ptype->dev || ptype->dev == skb->dev) {
1593 			if (pt_prev)
1594 				ret = deliver_skb(skb, pt_prev);
1595 			pt_prev = ptype;
1596 		}
1597 	}
1598 
1599 #ifdef CONFIG_NET_CLS_ACT
1600 	if (pt_prev) {
1601 		ret = deliver_skb(skb, pt_prev);
1602 		pt_prev = NULL; /* noone else should process this after*/
1603 	} else {
1604 		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
1605 	}
1606 
1607 	ret = ing_filter(skb);
1608 
1609 	if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
1610 		kfree_skb(skb);
1611 		goto out;
1612 	}
1613 
1614 	skb->tc_verd = 0;
1615 ncls:
1616 #endif
1617 
1618 	handle_diverter(skb);
1619 
1620 	if (handle_bridge(&skb, &pt_prev, &ret))
1621 		goto out;
1622 
1623 	type = skb->protocol;
1624 	list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
1625 		if (ptype->type == type &&
1626 		    (!ptype->dev || ptype->dev == skb->dev)) {
1627 			if (pt_prev)
1628 				ret = deliver_skb(skb, pt_prev);
1629 			pt_prev = ptype;
1630 		}
1631 	}
1632 
1633 	if (pt_prev) {
1634 		ret = pt_prev->func(skb, skb->dev, pt_prev);
1635 	} else {
1636 		kfree_skb(skb);
1637 		/* Jamal, now you will not able to escape explaining
1638 		 * me how you were going to use this. :-)
1639 		 */
1640 		ret = NET_RX_DROP;
1641 	}
1642 
1643 out:
1644 	rcu_read_unlock();
1645 	return ret;
1646 }
1647 
1648 static int process_backlog(struct net_device *backlog_dev, int *budget)
1649 {
1650 	int work = 0;
1651 	int quota = min(backlog_dev->quota, *budget);
1652 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
1653 	unsigned long start_time = jiffies;
1654 
1655 	backlog_dev->weight = weight_p;
1656 	for (;;) {
1657 		struct sk_buff *skb;
1658 		struct net_device *dev;
1659 
1660 		local_irq_disable();
1661 		skb = __skb_dequeue(&queue->input_pkt_queue);
1662 		if (!skb)
1663 			goto job_done;
1664 		local_irq_enable();
1665 
1666 		dev = skb->dev;
1667 
1668 		netif_receive_skb(skb);
1669 
1670 		dev_put(dev);
1671 
1672 		work++;
1673 
1674 		if (work >= quota || jiffies - start_time > 1)
1675 			break;
1676 
1677 	}
1678 
1679 	backlog_dev->quota -= work;
1680 	*budget -= work;
1681 	return -1;
1682 
1683 job_done:
1684 	backlog_dev->quota -= work;
1685 	*budget -= work;
1686 
1687 	list_del(&backlog_dev->poll_list);
1688 	smp_mb__before_clear_bit();
1689 	netif_poll_enable(backlog_dev);
1690 
1691 	local_irq_enable();
1692 	return 0;
1693 }
1694 
1695 static void net_rx_action(struct softirq_action *h)
1696 {
1697 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
1698 	unsigned long start_time = jiffies;
1699 	int budget = netdev_budget;
1700 
1701 	local_irq_disable();
1702 
1703 	while (!list_empty(&queue->poll_list)) {
1704 		struct net_device *dev;
1705 
1706 		if (budget <= 0 || jiffies - start_time > 1)
1707 			goto softnet_break;
1708 
1709 		local_irq_enable();
1710 
1711 		dev = list_entry(queue->poll_list.next,
1712 				 struct net_device, poll_list);
1713 		netpoll_poll_lock(dev);
1714 
1715 		if (dev->quota <= 0 || dev->poll(dev, &budget)) {
1716 			netpoll_poll_unlock(dev);
1717 			local_irq_disable();
1718 			list_del(&dev->poll_list);
1719 			list_add_tail(&dev->poll_list, &queue->poll_list);
1720 			if (dev->quota < 0)
1721 				dev->quota += dev->weight;
1722 			else
1723 				dev->quota = dev->weight;
1724 		} else {
1725 			netpoll_poll_unlock(dev);
1726 			dev_put(dev);
1727 			local_irq_disable();
1728 		}
1729 	}
1730 out:
1731 	local_irq_enable();
1732 	return;
1733 
1734 softnet_break:
1735 	__get_cpu_var(netdev_rx_stat).time_squeeze++;
1736 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
1737 	goto out;
1738 }
1739 
1740 static gifconf_func_t * gifconf_list [NPROTO];
1741 
1742 /**
1743  *	register_gifconf	-	register a SIOCGIF handler
1744  *	@family: Address family
1745  *	@gifconf: Function handler
1746  *
1747  *	Register protocol dependent address dumping routines. The handler
1748  *	that is passed must not be freed or reused until it has been replaced
1749  *	by another handler.
1750  */
1751 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
1752 {
1753 	if (family >= NPROTO)
1754 		return -EINVAL;
1755 	gifconf_list[family] = gifconf;
1756 	return 0;
1757 }
1758 
1759 
1760 /*
1761  *	Map an interface index to its name (SIOCGIFNAME)
1762  */
1763 
1764 /*
1765  *	We need this ioctl for efficient implementation of the
1766  *	if_indextoname() function required by the IPv6 API.  Without
1767  *	it, we would have to search all the interfaces to find a
1768  *	match.  --pb
1769  */
1770 
1771 static int dev_ifname(struct ifreq __user *arg)
1772 {
1773 	struct net_device *dev;
1774 	struct ifreq ifr;
1775 
1776 	/*
1777 	 *	Fetch the caller's info block.
1778 	 */
1779 
1780 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
1781 		return -EFAULT;
1782 
1783 	read_lock(&dev_base_lock);
1784 	dev = __dev_get_by_index(ifr.ifr_ifindex);
1785 	if (!dev) {
1786 		read_unlock(&dev_base_lock);
1787 		return -ENODEV;
1788 	}
1789 
1790 	strcpy(ifr.ifr_name, dev->name);
1791 	read_unlock(&dev_base_lock);
1792 
1793 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
1794 		return -EFAULT;
1795 	return 0;
1796 }
1797 
1798 /*
1799  *	Perform a SIOCGIFCONF call. This structure will change
1800  *	size eventually, and there is nothing I can do about it.
1801  *	Thus we will need a 'compatibility mode'.
1802  */
1803 
1804 static int dev_ifconf(char __user *arg)
1805 {
1806 	struct ifconf ifc;
1807 	struct net_device *dev;
1808 	char __user *pos;
1809 	int len;
1810 	int total;
1811 	int i;
1812 
1813 	/*
1814 	 *	Fetch the caller's info block.
1815 	 */
1816 
1817 	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
1818 		return -EFAULT;
1819 
1820 	pos = ifc.ifc_buf;
1821 	len = ifc.ifc_len;
1822 
1823 	/*
1824 	 *	Loop over the interfaces, and write an info block for each.
1825 	 */
1826 
1827 	total = 0;
1828 	for (dev = dev_base; dev; dev = dev->next) {
1829 		for (i = 0; i < NPROTO; i++) {
1830 			if (gifconf_list[i]) {
1831 				int done;
1832 				if (!pos)
1833 					done = gifconf_list[i](dev, NULL, 0);
1834 				else
1835 					done = gifconf_list[i](dev, pos + total,
1836 							       len - total);
1837 				if (done < 0)
1838 					return -EFAULT;
1839 				total += done;
1840 			}
1841 		}
1842   	}
1843 
1844 	/*
1845 	 *	All done.  Write the updated control block back to the caller.
1846 	 */
1847 	ifc.ifc_len = total;
1848 
1849 	/*
1850 	 * 	Both BSD and Solaris return 0 here, so we do too.
1851 	 */
1852 	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
1853 }
1854 
1855 #ifdef CONFIG_PROC_FS
1856 /*
1857  *	This is invoked by the /proc filesystem handler to display a device
1858  *	in detail.
1859  */
1860 static __inline__ struct net_device *dev_get_idx(loff_t pos)
1861 {
1862 	struct net_device *dev;
1863 	loff_t i;
1864 
1865 	for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next);
1866 
1867 	return i == pos ? dev : NULL;
1868 }
1869 
1870 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
1871 {
1872 	read_lock(&dev_base_lock);
1873 	return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN;
1874 }
1875 
1876 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1877 {
1878 	++*pos;
1879 	return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next;
1880 }
1881 
1882 void dev_seq_stop(struct seq_file *seq, void *v)
1883 {
1884 	read_unlock(&dev_base_lock);
1885 }
1886 
1887 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
1888 {
1889 	if (dev->get_stats) {
1890 		struct net_device_stats *stats = dev->get_stats(dev);
1891 
1892 		seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
1893 				"%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
1894 			   dev->name, stats->rx_bytes, stats->rx_packets,
1895 			   stats->rx_errors,
1896 			   stats->rx_dropped + stats->rx_missed_errors,
1897 			   stats->rx_fifo_errors,
1898 			   stats->rx_length_errors + stats->rx_over_errors +
1899 			     stats->rx_crc_errors + stats->rx_frame_errors,
1900 			   stats->rx_compressed, stats->multicast,
1901 			   stats->tx_bytes, stats->tx_packets,
1902 			   stats->tx_errors, stats->tx_dropped,
1903 			   stats->tx_fifo_errors, stats->collisions,
1904 			   stats->tx_carrier_errors +
1905 			     stats->tx_aborted_errors +
1906 			     stats->tx_window_errors +
1907 			     stats->tx_heartbeat_errors,
1908 			   stats->tx_compressed);
1909 	} else
1910 		seq_printf(seq, "%6s: No statistics available.\n", dev->name);
1911 }
1912 
1913 /*
1914  *	Called from the PROCfs module. This now uses the new arbitrary sized
1915  *	/proc/net interface to create /proc/net/dev
1916  */
1917 static int dev_seq_show(struct seq_file *seq, void *v)
1918 {
1919 	if (v == SEQ_START_TOKEN)
1920 		seq_puts(seq, "Inter-|   Receive                            "
1921 			      "                    |  Transmit\n"
1922 			      " face |bytes    packets errs drop fifo frame "
1923 			      "compressed multicast|bytes    packets errs "
1924 			      "drop fifo colls carrier compressed\n");
1925 	else
1926 		dev_seq_printf_stats(seq, v);
1927 	return 0;
1928 }
1929 
1930 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
1931 {
1932 	struct netif_rx_stats *rc = NULL;
1933 
1934 	while (*pos < NR_CPUS)
1935 	       	if (cpu_online(*pos)) {
1936 			rc = &per_cpu(netdev_rx_stat, *pos);
1937 			break;
1938 		} else
1939 			++*pos;
1940 	return rc;
1941 }
1942 
1943 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
1944 {
1945 	return softnet_get_online(pos);
1946 }
1947 
1948 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1949 {
1950 	++*pos;
1951 	return softnet_get_online(pos);
1952 }
1953 
1954 static void softnet_seq_stop(struct seq_file *seq, void *v)
1955 {
1956 }
1957 
1958 static int softnet_seq_show(struct seq_file *seq, void *v)
1959 {
1960 	struct netif_rx_stats *s = v;
1961 
1962 	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
1963 		   s->total, s->dropped, s->time_squeeze, 0,
1964 		   0, 0, 0, 0, /* was fastroute */
1965 		   s->cpu_collision );
1966 	return 0;
1967 }
1968 
1969 static struct seq_operations dev_seq_ops = {
1970 	.start = dev_seq_start,
1971 	.next  = dev_seq_next,
1972 	.stop  = dev_seq_stop,
1973 	.show  = dev_seq_show,
1974 };
1975 
1976 static int dev_seq_open(struct inode *inode, struct file *file)
1977 {
1978 	return seq_open(file, &dev_seq_ops);
1979 }
1980 
1981 static struct file_operations dev_seq_fops = {
1982 	.owner	 = THIS_MODULE,
1983 	.open    = dev_seq_open,
1984 	.read    = seq_read,
1985 	.llseek  = seq_lseek,
1986 	.release = seq_release,
1987 };
1988 
1989 static struct seq_operations softnet_seq_ops = {
1990 	.start = softnet_seq_start,
1991 	.next  = softnet_seq_next,
1992 	.stop  = softnet_seq_stop,
1993 	.show  = softnet_seq_show,
1994 };
1995 
1996 static int softnet_seq_open(struct inode *inode, struct file *file)
1997 {
1998 	return seq_open(file, &softnet_seq_ops);
1999 }
2000 
2001 static struct file_operations softnet_seq_fops = {
2002 	.owner	 = THIS_MODULE,
2003 	.open    = softnet_seq_open,
2004 	.read    = seq_read,
2005 	.llseek  = seq_lseek,
2006 	.release = seq_release,
2007 };
2008 
2009 #ifdef WIRELESS_EXT
2010 extern int wireless_proc_init(void);
2011 #else
2012 #define wireless_proc_init() 0
2013 #endif
2014 
2015 static int __init dev_proc_init(void)
2016 {
2017 	int rc = -ENOMEM;
2018 
2019 	if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
2020 		goto out;
2021 	if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
2022 		goto out_dev;
2023 	if (wireless_proc_init())
2024 		goto out_softnet;
2025 	rc = 0;
2026 out:
2027 	return rc;
2028 out_softnet:
2029 	proc_net_remove("softnet_stat");
2030 out_dev:
2031 	proc_net_remove("dev");
2032 	goto out;
2033 }
2034 #else
2035 #define dev_proc_init() 0
2036 #endif	/* CONFIG_PROC_FS */
2037 
2038 
2039 /**
2040  *	netdev_set_master	-	set up master/slave pair
2041  *	@slave: slave device
2042  *	@master: new master device
2043  *
2044  *	Changes the master device of the slave. Pass %NULL to break the
2045  *	bonding. The caller must hold the RTNL semaphore. On a failure
2046  *	a negative errno code is returned. On success the reference counts
2047  *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2048  *	function returns zero.
2049  */
2050 int netdev_set_master(struct net_device *slave, struct net_device *master)
2051 {
2052 	struct net_device *old = slave->master;
2053 
2054 	ASSERT_RTNL();
2055 
2056 	if (master) {
2057 		if (old)
2058 			return -EBUSY;
2059 		dev_hold(master);
2060 	}
2061 
2062 	slave->master = master;
2063 
2064 	synchronize_net();
2065 
2066 	if (old)
2067 		dev_put(old);
2068 
2069 	if (master)
2070 		slave->flags |= IFF_SLAVE;
2071 	else
2072 		slave->flags &= ~IFF_SLAVE;
2073 
2074 	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2075 	return 0;
2076 }
2077 
2078 /**
2079  *	dev_set_promiscuity	- update promiscuity count on a device
2080  *	@dev: device
2081  *	@inc: modifier
2082  *
2083  *	Add or remove promsicuity from a device. While the count in the device
2084  *	remains above zero the interface remains promiscuous. Once it hits zero
2085  *	the device reverts back to normal filtering operation. A negative inc
2086  *	value is used to drop promiscuity on the device.
2087  */
2088 void dev_set_promiscuity(struct net_device *dev, int inc)
2089 {
2090 	unsigned short old_flags = dev->flags;
2091 
2092 	if ((dev->promiscuity += inc) == 0)
2093 		dev->flags &= ~IFF_PROMISC;
2094 	else
2095 		dev->flags |= IFF_PROMISC;
2096 	if (dev->flags != old_flags) {
2097 		dev_mc_upload(dev);
2098 		printk(KERN_INFO "device %s %s promiscuous mode\n",
2099 		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2100 		       					       "left");
2101 	}
2102 }
2103 
2104 /**
2105  *	dev_set_allmulti	- update allmulti count on a device
2106  *	@dev: device
2107  *	@inc: modifier
2108  *
2109  *	Add or remove reception of all multicast frames to a device. While the
2110  *	count in the device remains above zero the interface remains listening
2111  *	to all interfaces. Once it hits zero the device reverts back to normal
2112  *	filtering operation. A negative @inc value is used to drop the counter
2113  *	when releasing a resource needing all multicasts.
2114  */
2115 
2116 void dev_set_allmulti(struct net_device *dev, int inc)
2117 {
2118 	unsigned short old_flags = dev->flags;
2119 
2120 	dev->flags |= IFF_ALLMULTI;
2121 	if ((dev->allmulti += inc) == 0)
2122 		dev->flags &= ~IFF_ALLMULTI;
2123 	if (dev->flags ^ old_flags)
2124 		dev_mc_upload(dev);
2125 }
2126 
2127 unsigned dev_get_flags(const struct net_device *dev)
2128 {
2129 	unsigned flags;
2130 
2131 	flags = (dev->flags & ~(IFF_PROMISC |
2132 				IFF_ALLMULTI |
2133 				IFF_RUNNING)) |
2134 		(dev->gflags & (IFF_PROMISC |
2135 				IFF_ALLMULTI));
2136 
2137 	if (netif_running(dev) && netif_carrier_ok(dev))
2138 		flags |= IFF_RUNNING;
2139 
2140 	return flags;
2141 }
2142 
2143 int dev_change_flags(struct net_device *dev, unsigned flags)
2144 {
2145 	int ret;
2146 	int old_flags = dev->flags;
2147 
2148 	/*
2149 	 *	Set the flags on our device.
2150 	 */
2151 
2152 	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
2153 			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
2154 			       IFF_AUTOMEDIA)) |
2155 		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
2156 				    IFF_ALLMULTI));
2157 
2158 	/*
2159 	 *	Load in the correct multicast list now the flags have changed.
2160 	 */
2161 
2162 	dev_mc_upload(dev);
2163 
2164 	/*
2165 	 *	Have we downed the interface. We handle IFF_UP ourselves
2166 	 *	according to user attempts to set it, rather than blindly
2167 	 *	setting it.
2168 	 */
2169 
2170 	ret = 0;
2171 	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
2172 		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
2173 
2174 		if (!ret)
2175 			dev_mc_upload(dev);
2176 	}
2177 
2178 	if (dev->flags & IFF_UP &&
2179 	    ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
2180 					  IFF_VOLATILE)))
2181 		notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
2182 
2183 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
2184 		int inc = (flags & IFF_PROMISC) ? +1 : -1;
2185 		dev->gflags ^= IFF_PROMISC;
2186 		dev_set_promiscuity(dev, inc);
2187 	}
2188 
2189 	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
2190 	   is important. Some (broken) drivers set IFF_PROMISC, when
2191 	   IFF_ALLMULTI is requested not asking us and not reporting.
2192 	 */
2193 	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
2194 		int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
2195 		dev->gflags ^= IFF_ALLMULTI;
2196 		dev_set_allmulti(dev, inc);
2197 	}
2198 
2199 	if (old_flags ^ dev->flags)
2200 		rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags);
2201 
2202 	return ret;
2203 }
2204 
2205 int dev_set_mtu(struct net_device *dev, int new_mtu)
2206 {
2207 	int err;
2208 
2209 	if (new_mtu == dev->mtu)
2210 		return 0;
2211 
2212 	/*	MTU must be positive.	 */
2213 	if (new_mtu < 0)
2214 		return -EINVAL;
2215 
2216 	if (!netif_device_present(dev))
2217 		return -ENODEV;
2218 
2219 	err = 0;
2220 	if (dev->change_mtu)
2221 		err = dev->change_mtu(dev, new_mtu);
2222 	else
2223 		dev->mtu = new_mtu;
2224 	if (!err && dev->flags & IFF_UP)
2225 		notifier_call_chain(&netdev_chain,
2226 				    NETDEV_CHANGEMTU, dev);
2227 	return err;
2228 }
2229 
2230 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
2231 {
2232 	int err;
2233 
2234 	if (!dev->set_mac_address)
2235 		return -EOPNOTSUPP;
2236 	if (sa->sa_family != dev->type)
2237 		return -EINVAL;
2238 	if (!netif_device_present(dev))
2239 		return -ENODEV;
2240 	err = dev->set_mac_address(dev, sa);
2241 	if (!err)
2242 		notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
2243 	return err;
2244 }
2245 
2246 /*
2247  *	Perform the SIOCxIFxxx calls.
2248  */
2249 static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
2250 {
2251 	int err;
2252 	struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
2253 
2254 	if (!dev)
2255 		return -ENODEV;
2256 
2257 	switch (cmd) {
2258 		case SIOCGIFFLAGS:	/* Get interface flags */
2259 			ifr->ifr_flags = dev_get_flags(dev);
2260 			return 0;
2261 
2262 		case SIOCSIFFLAGS:	/* Set interface flags */
2263 			return dev_change_flags(dev, ifr->ifr_flags);
2264 
2265 		case SIOCGIFMETRIC:	/* Get the metric on the interface
2266 					   (currently unused) */
2267 			ifr->ifr_metric = 0;
2268 			return 0;
2269 
2270 		case SIOCSIFMETRIC:	/* Set the metric on the interface
2271 					   (currently unused) */
2272 			return -EOPNOTSUPP;
2273 
2274 		case SIOCGIFMTU:	/* Get the MTU of a device */
2275 			ifr->ifr_mtu = dev->mtu;
2276 			return 0;
2277 
2278 		case SIOCSIFMTU:	/* Set the MTU of a device */
2279 			return dev_set_mtu(dev, ifr->ifr_mtu);
2280 
2281 		case SIOCGIFHWADDR:
2282 			if (!dev->addr_len)
2283 				memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
2284 			else
2285 				memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
2286 				       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2287 			ifr->ifr_hwaddr.sa_family = dev->type;
2288 			return 0;
2289 
2290 		case SIOCSIFHWADDR:
2291 			return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
2292 
2293 		case SIOCSIFHWBROADCAST:
2294 			if (ifr->ifr_hwaddr.sa_family != dev->type)
2295 				return -EINVAL;
2296 			memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
2297 			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2298 			notifier_call_chain(&netdev_chain,
2299 					    NETDEV_CHANGEADDR, dev);
2300 			return 0;
2301 
2302 		case SIOCGIFMAP:
2303 			ifr->ifr_map.mem_start = dev->mem_start;
2304 			ifr->ifr_map.mem_end   = dev->mem_end;
2305 			ifr->ifr_map.base_addr = dev->base_addr;
2306 			ifr->ifr_map.irq       = dev->irq;
2307 			ifr->ifr_map.dma       = dev->dma;
2308 			ifr->ifr_map.port      = dev->if_port;
2309 			return 0;
2310 
2311 		case SIOCSIFMAP:
2312 			if (dev->set_config) {
2313 				if (!netif_device_present(dev))
2314 					return -ENODEV;
2315 				return dev->set_config(dev, &ifr->ifr_map);
2316 			}
2317 			return -EOPNOTSUPP;
2318 
2319 		case SIOCADDMULTI:
2320 			if (!dev->set_multicast_list ||
2321 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2322 				return -EINVAL;
2323 			if (!netif_device_present(dev))
2324 				return -ENODEV;
2325 			return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
2326 					  dev->addr_len, 1);
2327 
2328 		case SIOCDELMULTI:
2329 			if (!dev->set_multicast_list ||
2330 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2331 				return -EINVAL;
2332 			if (!netif_device_present(dev))
2333 				return -ENODEV;
2334 			return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
2335 					     dev->addr_len, 1);
2336 
2337 		case SIOCGIFINDEX:
2338 			ifr->ifr_ifindex = dev->ifindex;
2339 			return 0;
2340 
2341 		case SIOCGIFTXQLEN:
2342 			ifr->ifr_qlen = dev->tx_queue_len;
2343 			return 0;
2344 
2345 		case SIOCSIFTXQLEN:
2346 			if (ifr->ifr_qlen < 0)
2347 				return -EINVAL;
2348 			dev->tx_queue_len = ifr->ifr_qlen;
2349 			return 0;
2350 
2351 		case SIOCSIFNAME:
2352 			ifr->ifr_newname[IFNAMSIZ-1] = '\0';
2353 			return dev_change_name(dev, ifr->ifr_newname);
2354 
2355 		/*
2356 		 *	Unknown or private ioctl
2357 		 */
2358 
2359 		default:
2360 			if ((cmd >= SIOCDEVPRIVATE &&
2361 			    cmd <= SIOCDEVPRIVATE + 15) ||
2362 			    cmd == SIOCBONDENSLAVE ||
2363 			    cmd == SIOCBONDRELEASE ||
2364 			    cmd == SIOCBONDSETHWADDR ||
2365 			    cmd == SIOCBONDSLAVEINFOQUERY ||
2366 			    cmd == SIOCBONDINFOQUERY ||
2367 			    cmd == SIOCBONDCHANGEACTIVE ||
2368 			    cmd == SIOCGMIIPHY ||
2369 			    cmd == SIOCGMIIREG ||
2370 			    cmd == SIOCSMIIREG ||
2371 			    cmd == SIOCBRADDIF ||
2372 			    cmd == SIOCBRDELIF ||
2373 			    cmd == SIOCWANDEV) {
2374 				err = -EOPNOTSUPP;
2375 				if (dev->do_ioctl) {
2376 					if (netif_device_present(dev))
2377 						err = dev->do_ioctl(dev, ifr,
2378 								    cmd);
2379 					else
2380 						err = -ENODEV;
2381 				}
2382 			} else
2383 				err = -EINVAL;
2384 
2385 	}
2386 	return err;
2387 }
2388 
2389 /*
2390  *	This function handles all "interface"-type I/O control requests. The actual
2391  *	'doing' part of this is dev_ifsioc above.
2392  */
2393 
2394 /**
2395  *	dev_ioctl	-	network device ioctl
2396  *	@cmd: command to issue
2397  *	@arg: pointer to a struct ifreq in user space
2398  *
2399  *	Issue ioctl functions to devices. This is normally called by the
2400  *	user space syscall interfaces but can sometimes be useful for
2401  *	other purposes. The return value is the return from the syscall if
2402  *	positive or a negative errno code on error.
2403  */
2404 
2405 int dev_ioctl(unsigned int cmd, void __user *arg)
2406 {
2407 	struct ifreq ifr;
2408 	int ret;
2409 	char *colon;
2410 
2411 	/* One special case: SIOCGIFCONF takes ifconf argument
2412 	   and requires shared lock, because it sleeps writing
2413 	   to user space.
2414 	 */
2415 
2416 	if (cmd == SIOCGIFCONF) {
2417 		rtnl_shlock();
2418 		ret = dev_ifconf((char __user *) arg);
2419 		rtnl_shunlock();
2420 		return ret;
2421 	}
2422 	if (cmd == SIOCGIFNAME)
2423 		return dev_ifname((struct ifreq __user *)arg);
2424 
2425 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2426 		return -EFAULT;
2427 
2428 	ifr.ifr_name[IFNAMSIZ-1] = 0;
2429 
2430 	colon = strchr(ifr.ifr_name, ':');
2431 	if (colon)
2432 		*colon = 0;
2433 
2434 	/*
2435 	 *	See which interface the caller is talking about.
2436 	 */
2437 
2438 	switch (cmd) {
2439 		/*
2440 		 *	These ioctl calls:
2441 		 *	- can be done by all.
2442 		 *	- atomic and do not require locking.
2443 		 *	- return a value
2444 		 */
2445 		case SIOCGIFFLAGS:
2446 		case SIOCGIFMETRIC:
2447 		case SIOCGIFMTU:
2448 		case SIOCGIFHWADDR:
2449 		case SIOCGIFSLAVE:
2450 		case SIOCGIFMAP:
2451 		case SIOCGIFINDEX:
2452 		case SIOCGIFTXQLEN:
2453 			dev_load(ifr.ifr_name);
2454 			read_lock(&dev_base_lock);
2455 			ret = dev_ifsioc(&ifr, cmd);
2456 			read_unlock(&dev_base_lock);
2457 			if (!ret) {
2458 				if (colon)
2459 					*colon = ':';
2460 				if (copy_to_user(arg, &ifr,
2461 						 sizeof(struct ifreq)))
2462 					ret = -EFAULT;
2463 			}
2464 			return ret;
2465 
2466 		case SIOCETHTOOL:
2467 			dev_load(ifr.ifr_name);
2468 			rtnl_lock();
2469 			ret = dev_ethtool(&ifr);
2470 			rtnl_unlock();
2471 			if (!ret) {
2472 				if (colon)
2473 					*colon = ':';
2474 				if (copy_to_user(arg, &ifr,
2475 						 sizeof(struct ifreq)))
2476 					ret = -EFAULT;
2477 			}
2478 			return ret;
2479 
2480 		/*
2481 		 *	These ioctl calls:
2482 		 *	- require superuser power.
2483 		 *	- require strict serialization.
2484 		 *	- return a value
2485 		 */
2486 		case SIOCGMIIPHY:
2487 		case SIOCGMIIREG:
2488 		case SIOCSIFNAME:
2489 			if (!capable(CAP_NET_ADMIN))
2490 				return -EPERM;
2491 			dev_load(ifr.ifr_name);
2492 			rtnl_lock();
2493 			ret = dev_ifsioc(&ifr, cmd);
2494 			rtnl_unlock();
2495 			if (!ret) {
2496 				if (colon)
2497 					*colon = ':';
2498 				if (copy_to_user(arg, &ifr,
2499 						 sizeof(struct ifreq)))
2500 					ret = -EFAULT;
2501 			}
2502 			return ret;
2503 
2504 		/*
2505 		 *	These ioctl calls:
2506 		 *	- require superuser power.
2507 		 *	- require strict serialization.
2508 		 *	- do not return a value
2509 		 */
2510 		case SIOCSIFFLAGS:
2511 		case SIOCSIFMETRIC:
2512 		case SIOCSIFMTU:
2513 		case SIOCSIFMAP:
2514 		case SIOCSIFHWADDR:
2515 		case SIOCSIFSLAVE:
2516 		case SIOCADDMULTI:
2517 		case SIOCDELMULTI:
2518 		case SIOCSIFHWBROADCAST:
2519 		case SIOCSIFTXQLEN:
2520 		case SIOCSMIIREG:
2521 		case SIOCBONDENSLAVE:
2522 		case SIOCBONDRELEASE:
2523 		case SIOCBONDSETHWADDR:
2524 		case SIOCBONDSLAVEINFOQUERY:
2525 		case SIOCBONDINFOQUERY:
2526 		case SIOCBONDCHANGEACTIVE:
2527 		case SIOCBRADDIF:
2528 		case SIOCBRDELIF:
2529 			if (!capable(CAP_NET_ADMIN))
2530 				return -EPERM;
2531 			dev_load(ifr.ifr_name);
2532 			rtnl_lock();
2533 			ret = dev_ifsioc(&ifr, cmd);
2534 			rtnl_unlock();
2535 			return ret;
2536 
2537 		case SIOCGIFMEM:
2538 			/* Get the per device memory space. We can add this but
2539 			 * currently do not support it */
2540 		case SIOCSIFMEM:
2541 			/* Set the per device memory buffer space.
2542 			 * Not applicable in our case */
2543 		case SIOCSIFLINK:
2544 			return -EINVAL;
2545 
2546 		/*
2547 		 *	Unknown or private ioctl.
2548 		 */
2549 		default:
2550 			if (cmd == SIOCWANDEV ||
2551 			    (cmd >= SIOCDEVPRIVATE &&
2552 			     cmd <= SIOCDEVPRIVATE + 15)) {
2553 				dev_load(ifr.ifr_name);
2554 				rtnl_lock();
2555 				ret = dev_ifsioc(&ifr, cmd);
2556 				rtnl_unlock();
2557 				if (!ret && copy_to_user(arg, &ifr,
2558 							 sizeof(struct ifreq)))
2559 					ret = -EFAULT;
2560 				return ret;
2561 			}
2562 #ifdef WIRELESS_EXT
2563 			/* Take care of Wireless Extensions */
2564 			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
2565 				/* If command is `set a parameter', or
2566 				 * `get the encoding parameters', check if
2567 				 * the user has the right to do it */
2568 				if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE) {
2569 					if (!capable(CAP_NET_ADMIN))
2570 						return -EPERM;
2571 				}
2572 				dev_load(ifr.ifr_name);
2573 				rtnl_lock();
2574 				/* Follow me in net/core/wireless.c */
2575 				ret = wireless_process_ioctl(&ifr, cmd);
2576 				rtnl_unlock();
2577 				if (IW_IS_GET(cmd) &&
2578 				    copy_to_user(arg, &ifr,
2579 					    	 sizeof(struct ifreq)))
2580 					ret = -EFAULT;
2581 				return ret;
2582 			}
2583 #endif	/* WIRELESS_EXT */
2584 			return -EINVAL;
2585 	}
2586 }
2587 
2588 
2589 /**
2590  *	dev_new_index	-	allocate an ifindex
2591  *
2592  *	Returns a suitable unique value for a new device interface
2593  *	number.  The caller must hold the rtnl semaphore or the
2594  *	dev_base_lock to be sure it remains unique.
2595  */
2596 static int dev_new_index(void)
2597 {
2598 	static int ifindex;
2599 	for (;;) {
2600 		if (++ifindex <= 0)
2601 			ifindex = 1;
2602 		if (!__dev_get_by_index(ifindex))
2603 			return ifindex;
2604 	}
2605 }
2606 
2607 static int dev_boot_phase = 1;
2608 
2609 /* Delayed registration/unregisteration */
2610 static DEFINE_SPINLOCK(net_todo_list_lock);
2611 static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
2612 
2613 static inline void net_set_todo(struct net_device *dev)
2614 {
2615 	spin_lock(&net_todo_list_lock);
2616 	list_add_tail(&dev->todo_list, &net_todo_list);
2617 	spin_unlock(&net_todo_list_lock);
2618 }
2619 
2620 /**
2621  *	register_netdevice	- register a network device
2622  *	@dev: device to register
2623  *
2624  *	Take a completed network device structure and add it to the kernel
2625  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2626  *	chain. 0 is returned on success. A negative errno code is returned
2627  *	on a failure to set up the device, or if the name is a duplicate.
2628  *
2629  *	Callers must hold the rtnl semaphore. You may want
2630  *	register_netdev() instead of this.
2631  *
2632  *	BUGS:
2633  *	The locking appears insufficient to guarantee two parallel registers
2634  *	will not get the same name.
2635  */
2636 
2637 int register_netdevice(struct net_device *dev)
2638 {
2639 	struct hlist_head *head;
2640 	struct hlist_node *p;
2641 	int ret;
2642 
2643 	BUG_ON(dev_boot_phase);
2644 	ASSERT_RTNL();
2645 
2646 	/* When net_device's are persistent, this will be fatal. */
2647 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
2648 
2649 	spin_lock_init(&dev->queue_lock);
2650 	spin_lock_init(&dev->xmit_lock);
2651 	dev->xmit_lock_owner = -1;
2652 #ifdef CONFIG_NET_CLS_ACT
2653 	spin_lock_init(&dev->ingress_lock);
2654 #endif
2655 
2656 	ret = alloc_divert_blk(dev);
2657 	if (ret)
2658 		goto out;
2659 
2660 	dev->iflink = -1;
2661 
2662 	/* Init, if this function is available */
2663 	if (dev->init) {
2664 		ret = dev->init(dev);
2665 		if (ret) {
2666 			if (ret > 0)
2667 				ret = -EIO;
2668 			goto out_err;
2669 		}
2670 	}
2671 
2672 	if (!dev_valid_name(dev->name)) {
2673 		ret = -EINVAL;
2674 		goto out_err;
2675 	}
2676 
2677 	dev->ifindex = dev_new_index();
2678 	if (dev->iflink == -1)
2679 		dev->iflink = dev->ifindex;
2680 
2681 	/* Check for existence of name */
2682 	head = dev_name_hash(dev->name);
2683 	hlist_for_each(p, head) {
2684 		struct net_device *d
2685 			= hlist_entry(p, struct net_device, name_hlist);
2686 		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
2687 			ret = -EEXIST;
2688  			goto out_err;
2689 		}
2690  	}
2691 
2692 	/* Fix illegal SG+CSUM combinations. */
2693 	if ((dev->features & NETIF_F_SG) &&
2694 	    !(dev->features & (NETIF_F_IP_CSUM |
2695 			       NETIF_F_NO_CSUM |
2696 			       NETIF_F_HW_CSUM))) {
2697 		printk("%s: Dropping NETIF_F_SG since no checksum feature.\n",
2698 		       dev->name);
2699 		dev->features &= ~NETIF_F_SG;
2700 	}
2701 
2702 	/* TSO requires that SG is present as well. */
2703 	if ((dev->features & NETIF_F_TSO) &&
2704 	    !(dev->features & NETIF_F_SG)) {
2705 		printk("%s: Dropping NETIF_F_TSO since no SG feature.\n",
2706 		       dev->name);
2707 		dev->features &= ~NETIF_F_TSO;
2708 	}
2709 
2710 	/*
2711 	 *	nil rebuild_header routine,
2712 	 *	that should be never called and used as just bug trap.
2713 	 */
2714 
2715 	if (!dev->rebuild_header)
2716 		dev->rebuild_header = default_rebuild_header;
2717 
2718 	/*
2719 	 *	Default initial state at registry is that the
2720 	 *	device is present.
2721 	 */
2722 
2723 	set_bit(__LINK_STATE_PRESENT, &dev->state);
2724 
2725 	dev->next = NULL;
2726 	dev_init_scheduler(dev);
2727 	write_lock_bh(&dev_base_lock);
2728 	*dev_tail = dev;
2729 	dev_tail = &dev->next;
2730 	hlist_add_head(&dev->name_hlist, head);
2731 	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
2732 	dev_hold(dev);
2733 	dev->reg_state = NETREG_REGISTERING;
2734 	write_unlock_bh(&dev_base_lock);
2735 
2736 	/* Notify protocols, that a new device appeared. */
2737 	notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
2738 
2739 	/* Finish registration after unlock */
2740 	net_set_todo(dev);
2741 	ret = 0;
2742 
2743 out:
2744 	return ret;
2745 out_err:
2746 	free_divert_blk(dev);
2747 	goto out;
2748 }
2749 
2750 /**
2751  *	register_netdev	- register a network device
2752  *	@dev: device to register
2753  *
2754  *	Take a completed network device structure and add it to the kernel
2755  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2756  *	chain. 0 is returned on success. A negative errno code is returned
2757  *	on a failure to set up the device, or if the name is a duplicate.
2758  *
2759  *	This is a wrapper around register_netdev that takes the rtnl semaphore
2760  *	and expands the device name if you passed a format string to
2761  *	alloc_netdev.
2762  */
2763 int register_netdev(struct net_device *dev)
2764 {
2765 	int err;
2766 
2767 	rtnl_lock();
2768 
2769 	/*
2770 	 * If the name is a format string the caller wants us to do a
2771 	 * name allocation.
2772 	 */
2773 	if (strchr(dev->name, '%')) {
2774 		err = dev_alloc_name(dev, dev->name);
2775 		if (err < 0)
2776 			goto out;
2777 	}
2778 
2779 	/*
2780 	 * Back compatibility hook. Kill this one in 2.5
2781 	 */
2782 	if (dev->name[0] == 0 || dev->name[0] == ' ') {
2783 		err = dev_alloc_name(dev, "eth%d");
2784 		if (err < 0)
2785 			goto out;
2786 	}
2787 
2788 	err = register_netdevice(dev);
2789 out:
2790 	rtnl_unlock();
2791 	return err;
2792 }
2793 EXPORT_SYMBOL(register_netdev);
2794 
2795 /*
2796  * netdev_wait_allrefs - wait until all references are gone.
2797  *
2798  * This is called when unregistering network devices.
2799  *
2800  * Any protocol or device that holds a reference should register
2801  * for netdevice notification, and cleanup and put back the
2802  * reference if they receive an UNREGISTER event.
2803  * We can get stuck here if buggy protocols don't correctly
2804  * call dev_put.
2805  */
2806 static void netdev_wait_allrefs(struct net_device *dev)
2807 {
2808 	unsigned long rebroadcast_time, warning_time;
2809 
2810 	rebroadcast_time = warning_time = jiffies;
2811 	while (atomic_read(&dev->refcnt) != 0) {
2812 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
2813 			rtnl_shlock();
2814 
2815 			/* Rebroadcast unregister notification */
2816 			notifier_call_chain(&netdev_chain,
2817 					    NETDEV_UNREGISTER, dev);
2818 
2819 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
2820 				     &dev->state)) {
2821 				/* We must not have linkwatch events
2822 				 * pending on unregister. If this
2823 				 * happens, we simply run the queue
2824 				 * unscheduled, resulting in a noop
2825 				 * for this device.
2826 				 */
2827 				linkwatch_run_queue();
2828 			}
2829 
2830 			rtnl_shunlock();
2831 
2832 			rebroadcast_time = jiffies;
2833 		}
2834 
2835 		msleep(250);
2836 
2837 		if (time_after(jiffies, warning_time + 10 * HZ)) {
2838 			printk(KERN_EMERG "unregister_netdevice: "
2839 			       "waiting for %s to become free. Usage "
2840 			       "count = %d\n",
2841 			       dev->name, atomic_read(&dev->refcnt));
2842 			warning_time = jiffies;
2843 		}
2844 	}
2845 }
2846 
2847 /* The sequence is:
2848  *
2849  *	rtnl_lock();
2850  *	...
2851  *	register_netdevice(x1);
2852  *	register_netdevice(x2);
2853  *	...
2854  *	unregister_netdevice(y1);
2855  *	unregister_netdevice(y2);
2856  *      ...
2857  *	rtnl_unlock();
2858  *	free_netdev(y1);
2859  *	free_netdev(y2);
2860  *
2861  * We are invoked by rtnl_unlock() after it drops the semaphore.
2862  * This allows us to deal with problems:
2863  * 1) We can create/delete sysfs objects which invoke hotplug
2864  *    without deadlocking with linkwatch via keventd.
2865  * 2) Since we run with the RTNL semaphore not held, we can sleep
2866  *    safely in order to wait for the netdev refcnt to drop to zero.
2867  */
2868 static DECLARE_MUTEX(net_todo_run_mutex);
2869 void netdev_run_todo(void)
2870 {
2871 	struct list_head list = LIST_HEAD_INIT(list);
2872 	int err;
2873 
2874 
2875 	/* Need to guard against multiple cpu's getting out of order. */
2876 	down(&net_todo_run_mutex);
2877 
2878 	/* Not safe to do outside the semaphore.  We must not return
2879 	 * until all unregister events invoked by the local processor
2880 	 * have been completed (either by this todo run, or one on
2881 	 * another cpu).
2882 	 */
2883 	if (list_empty(&net_todo_list))
2884 		goto out;
2885 
2886 	/* Snapshot list, allow later requests */
2887 	spin_lock(&net_todo_list_lock);
2888 	list_splice_init(&net_todo_list, &list);
2889 	spin_unlock(&net_todo_list_lock);
2890 
2891 	while (!list_empty(&list)) {
2892 		struct net_device *dev
2893 			= list_entry(list.next, struct net_device, todo_list);
2894 		list_del(&dev->todo_list);
2895 
2896 		switch(dev->reg_state) {
2897 		case NETREG_REGISTERING:
2898 			err = netdev_register_sysfs(dev);
2899 			if (err)
2900 				printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
2901 				       dev->name, err);
2902 			dev->reg_state = NETREG_REGISTERED;
2903 			break;
2904 
2905 		case NETREG_UNREGISTERING:
2906 			netdev_unregister_sysfs(dev);
2907 			dev->reg_state = NETREG_UNREGISTERED;
2908 
2909 			netdev_wait_allrefs(dev);
2910 
2911 			/* paranoia */
2912 			BUG_ON(atomic_read(&dev->refcnt));
2913 			BUG_TRAP(!dev->ip_ptr);
2914 			BUG_TRAP(!dev->ip6_ptr);
2915 			BUG_TRAP(!dev->dn_ptr);
2916 
2917 
2918 			/* It must be the very last action,
2919 			 * after this 'dev' may point to freed up memory.
2920 			 */
2921 			if (dev->destructor)
2922 				dev->destructor(dev);
2923 			break;
2924 
2925 		default:
2926 			printk(KERN_ERR "network todo '%s' but state %d\n",
2927 			       dev->name, dev->reg_state);
2928 			break;
2929 		}
2930 	}
2931 
2932 out:
2933 	up(&net_todo_run_mutex);
2934 }
2935 
2936 /**
2937  *	alloc_netdev - allocate network device
2938  *	@sizeof_priv:	size of private data to allocate space for
2939  *	@name:		device name format string
2940  *	@setup:		callback to initialize device
2941  *
2942  *	Allocates a struct net_device with private data area for driver use
2943  *	and performs basic initialization.
2944  */
2945 struct net_device *alloc_netdev(int sizeof_priv, const char *name,
2946 		void (*setup)(struct net_device *))
2947 {
2948 	void *p;
2949 	struct net_device *dev;
2950 	int alloc_size;
2951 
2952 	/* ensure 32-byte alignment of both the device and private area */
2953 	alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
2954 	alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
2955 
2956 	p = kmalloc(alloc_size, GFP_KERNEL);
2957 	if (!p) {
2958 		printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
2959 		return NULL;
2960 	}
2961 	memset(p, 0, alloc_size);
2962 
2963 	dev = (struct net_device *)
2964 		(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
2965 	dev->padded = (char *)dev - (char *)p;
2966 
2967 	if (sizeof_priv)
2968 		dev->priv = netdev_priv(dev);
2969 
2970 	setup(dev);
2971 	strcpy(dev->name, name);
2972 	return dev;
2973 }
2974 EXPORT_SYMBOL(alloc_netdev);
2975 
2976 /**
2977  *	free_netdev - free network device
2978  *	@dev: device
2979  *
2980  *	This function does the last stage of destroying an allocated device
2981  * 	interface. The reference to the device object is released.
2982  *	If this is the last reference then it will be freed.
2983  */
2984 void free_netdev(struct net_device *dev)
2985 {
2986 #ifdef CONFIG_SYSFS
2987 	/*  Compatiablity with error handling in drivers */
2988 	if (dev->reg_state == NETREG_UNINITIALIZED) {
2989 		kfree((char *)dev - dev->padded);
2990 		return;
2991 	}
2992 
2993 	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
2994 	dev->reg_state = NETREG_RELEASED;
2995 
2996 	/* will free via class release */
2997 	class_device_put(&dev->class_dev);
2998 #else
2999 	kfree((char *)dev - dev->padded);
3000 #endif
3001 }
3002 
3003 /* Synchronize with packet receive processing. */
3004 void synchronize_net(void)
3005 {
3006 	might_sleep();
3007 	synchronize_rcu();
3008 }
3009 
3010 /**
3011  *	unregister_netdevice - remove device from the kernel
3012  *	@dev: device
3013  *
3014  *	This function shuts down a device interface and removes it
3015  *	from the kernel tables. On success 0 is returned, on a failure
3016  *	a negative errno code is returned.
3017  *
3018  *	Callers must hold the rtnl semaphore.  You may want
3019  *	unregister_netdev() instead of this.
3020  */
3021 
3022 int unregister_netdevice(struct net_device *dev)
3023 {
3024 	struct net_device *d, **dp;
3025 
3026 	BUG_ON(dev_boot_phase);
3027 	ASSERT_RTNL();
3028 
3029 	/* Some devices call without registering for initialization unwind. */
3030 	if (dev->reg_state == NETREG_UNINITIALIZED) {
3031 		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3032 				  "was registered\n", dev->name, dev);
3033 		return -ENODEV;
3034 	}
3035 
3036 	BUG_ON(dev->reg_state != NETREG_REGISTERED);
3037 
3038 	/* If device is running, close it first. */
3039 	if (dev->flags & IFF_UP)
3040 		dev_close(dev);
3041 
3042 	/* And unlink it from device chain. */
3043 	for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) {
3044 		if (d == dev) {
3045 			write_lock_bh(&dev_base_lock);
3046 			hlist_del(&dev->name_hlist);
3047 			hlist_del(&dev->index_hlist);
3048 			if (dev_tail == &dev->next)
3049 				dev_tail = dp;
3050 			*dp = d->next;
3051 			write_unlock_bh(&dev_base_lock);
3052 			break;
3053 		}
3054 	}
3055 	if (!d) {
3056 		printk(KERN_ERR "unregister net_device: '%s' not found\n",
3057 		       dev->name);
3058 		return -ENODEV;
3059 	}
3060 
3061 	dev->reg_state = NETREG_UNREGISTERING;
3062 
3063 	synchronize_net();
3064 
3065 	/* Shutdown queueing discipline. */
3066 	dev_shutdown(dev);
3067 
3068 
3069 	/* Notify protocols, that we are about to destroy
3070 	   this device. They should clean all the things.
3071 	*/
3072 	notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
3073 
3074 	/*
3075 	 *	Flush the multicast chain
3076 	 */
3077 	dev_mc_discard(dev);
3078 
3079 	if (dev->uninit)
3080 		dev->uninit(dev);
3081 
3082 	/* Notifier chain MUST detach us from master device. */
3083 	BUG_TRAP(!dev->master);
3084 
3085 	free_divert_blk(dev);
3086 
3087 	/* Finish processing unregister after unlock */
3088 	net_set_todo(dev);
3089 
3090 	synchronize_net();
3091 
3092 	dev_put(dev);
3093 	return 0;
3094 }
3095 
3096 /**
3097  *	unregister_netdev - remove device from the kernel
3098  *	@dev: device
3099  *
3100  *	This function shuts down a device interface and removes it
3101  *	from the kernel tables. On success 0 is returned, on a failure
3102  *	a negative errno code is returned.
3103  *
3104  *	This is just a wrapper for unregister_netdevice that takes
3105  *	the rtnl semaphore.  In general you want to use this and not
3106  *	unregister_netdevice.
3107  */
3108 void unregister_netdev(struct net_device *dev)
3109 {
3110 	rtnl_lock();
3111 	unregister_netdevice(dev);
3112 	rtnl_unlock();
3113 }
3114 
3115 EXPORT_SYMBOL(unregister_netdev);
3116 
3117 #ifdef CONFIG_HOTPLUG_CPU
3118 static int dev_cpu_callback(struct notifier_block *nfb,
3119 			    unsigned long action,
3120 			    void *ocpu)
3121 {
3122 	struct sk_buff **list_skb;
3123 	struct net_device **list_net;
3124 	struct sk_buff *skb;
3125 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
3126 	struct softnet_data *sd, *oldsd;
3127 
3128 	if (action != CPU_DEAD)
3129 		return NOTIFY_OK;
3130 
3131 	local_irq_disable();
3132 	cpu = smp_processor_id();
3133 	sd = &per_cpu(softnet_data, cpu);
3134 	oldsd = &per_cpu(softnet_data, oldcpu);
3135 
3136 	/* Find end of our completion_queue. */
3137 	list_skb = &sd->completion_queue;
3138 	while (*list_skb)
3139 		list_skb = &(*list_skb)->next;
3140 	/* Append completion queue from offline CPU. */
3141 	*list_skb = oldsd->completion_queue;
3142 	oldsd->completion_queue = NULL;
3143 
3144 	/* Find end of our output_queue. */
3145 	list_net = &sd->output_queue;
3146 	while (*list_net)
3147 		list_net = &(*list_net)->next_sched;
3148 	/* Append output queue from offline CPU. */
3149 	*list_net = oldsd->output_queue;
3150 	oldsd->output_queue = NULL;
3151 
3152 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
3153 	local_irq_enable();
3154 
3155 	/* Process offline CPU's input_pkt_queue */
3156 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
3157 		netif_rx(skb);
3158 
3159 	return NOTIFY_OK;
3160 }
3161 #endif /* CONFIG_HOTPLUG_CPU */
3162 
3163 
3164 /*
3165  *	Initialize the DEV module. At boot time this walks the device list and
3166  *	unhooks any devices that fail to initialise (normally hardware not
3167  *	present) and leaves us with a valid list of present and active devices.
3168  *
3169  */
3170 
3171 /*
3172  *       This is called single threaded during boot, so no need
3173  *       to take the rtnl semaphore.
3174  */
3175 static int __init net_dev_init(void)
3176 {
3177 	int i, rc = -ENOMEM;
3178 
3179 	BUG_ON(!dev_boot_phase);
3180 
3181 	net_random_init();
3182 
3183 	if (dev_proc_init())
3184 		goto out;
3185 
3186 	if (netdev_sysfs_init())
3187 		goto out;
3188 
3189 	INIT_LIST_HEAD(&ptype_all);
3190 	for (i = 0; i < 16; i++)
3191 		INIT_LIST_HEAD(&ptype_base[i]);
3192 
3193 	for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
3194 		INIT_HLIST_HEAD(&dev_name_head[i]);
3195 
3196 	for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
3197 		INIT_HLIST_HEAD(&dev_index_head[i]);
3198 
3199 	/*
3200 	 *	Initialise the packet receive queues.
3201 	 */
3202 
3203 	for (i = 0; i < NR_CPUS; i++) {
3204 		struct softnet_data *queue;
3205 
3206 		queue = &per_cpu(softnet_data, i);
3207 		skb_queue_head_init(&queue->input_pkt_queue);
3208 		queue->completion_queue = NULL;
3209 		INIT_LIST_HEAD(&queue->poll_list);
3210 		set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
3211 		queue->backlog_dev.weight = weight_p;
3212 		queue->backlog_dev.poll = process_backlog;
3213 		atomic_set(&queue->backlog_dev.refcnt, 1);
3214 	}
3215 
3216 	dev_boot_phase = 0;
3217 
3218 	open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
3219 	open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
3220 
3221 	hotcpu_notifier(dev_cpu_callback, 0);
3222 	dst_init();
3223 	dev_mcast_init();
3224 	rc = 0;
3225 out:
3226 	return rc;
3227 }
3228 
3229 subsys_initcall(net_dev_init);
3230 
3231 EXPORT_SYMBOL(__dev_get_by_index);
3232 EXPORT_SYMBOL(__dev_get_by_name);
3233 EXPORT_SYMBOL(__dev_remove_pack);
3234 EXPORT_SYMBOL(__skb_linearize);
3235 EXPORT_SYMBOL(dev_add_pack);
3236 EXPORT_SYMBOL(dev_alloc_name);
3237 EXPORT_SYMBOL(dev_close);
3238 EXPORT_SYMBOL(dev_get_by_flags);
3239 EXPORT_SYMBOL(dev_get_by_index);
3240 EXPORT_SYMBOL(dev_get_by_name);
3241 EXPORT_SYMBOL(dev_ioctl);
3242 EXPORT_SYMBOL(dev_open);
3243 EXPORT_SYMBOL(dev_queue_xmit);
3244 EXPORT_SYMBOL(dev_remove_pack);
3245 EXPORT_SYMBOL(dev_set_allmulti);
3246 EXPORT_SYMBOL(dev_set_promiscuity);
3247 EXPORT_SYMBOL(dev_change_flags);
3248 EXPORT_SYMBOL(dev_set_mtu);
3249 EXPORT_SYMBOL(dev_set_mac_address);
3250 EXPORT_SYMBOL(free_netdev);
3251 EXPORT_SYMBOL(netdev_boot_setup_check);
3252 EXPORT_SYMBOL(netdev_set_master);
3253 EXPORT_SYMBOL(netdev_state_change);
3254 EXPORT_SYMBOL(netif_receive_skb);
3255 EXPORT_SYMBOL(netif_rx);
3256 EXPORT_SYMBOL(register_gifconf);
3257 EXPORT_SYMBOL(register_netdevice);
3258 EXPORT_SYMBOL(register_netdevice_notifier);
3259 EXPORT_SYMBOL(skb_checksum_help);
3260 EXPORT_SYMBOL(synchronize_net);
3261 EXPORT_SYMBOL(unregister_netdevice);
3262 EXPORT_SYMBOL(unregister_netdevice_notifier);
3263 EXPORT_SYMBOL(net_enable_timestamp);
3264 EXPORT_SYMBOL(net_disable_timestamp);
3265 EXPORT_SYMBOL(dev_get_flags);
3266 
3267 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
3268 EXPORT_SYMBOL(br_handle_frame_hook);
3269 EXPORT_SYMBOL(br_fdb_get_hook);
3270 EXPORT_SYMBOL(br_fdb_put_hook);
3271 #endif
3272 
3273 #ifdef CONFIG_KMOD
3274 EXPORT_SYMBOL(dev_load);
3275 #endif
3276 
3277 EXPORT_PER_CPU_SYMBOL(softnet_data);
3278