xref: /linux/net/core/dev.c (revision 6e8331ac6973435b1e7604c30f2ad394035b46e1)
1 /*
2  * 	NET3	Protocol independent device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the non IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Florian la Roche <rzsfl@rz.uni-sb.de>
16  *		Alan Cox <gw4pts@gw4pts.ampr.org>
17  *		David Hinds <dahinds@users.sourceforge.net>
18  *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19  *		Adam Sulmicki <adam@cfar.umd.edu>
20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
21  *
22  *	Changes:
23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
24  *              			to 2 if register_netdev gets called
25  *              			before net_dev_init & also removed a
26  *              			few lines of code in the process.
27  *		Alan Cox	:	device private ioctl copies fields back.
28  *		Alan Cox	:	Transmit queue code does relevant
29  *					stunts to keep the queue safe.
30  *		Alan Cox	:	Fixed double lock.
31  *		Alan Cox	:	Fixed promisc NULL pointer trap
32  *		????????	:	Support the full private ioctl range
33  *		Alan Cox	:	Moved ioctl permission check into
34  *					drivers
35  *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
36  *		Alan Cox	:	100 backlog just doesn't cut it when
37  *					you start doing multicast video 8)
38  *		Alan Cox	:	Rewrote net_bh and list manager.
39  *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
40  *		Alan Cox	:	Took out transmit every packet pass
41  *					Saved a few bytes in the ioctl handler
42  *		Alan Cox	:	Network driver sets packet type before
43  *					calling netif_rx. Saves a function
44  *					call a packet.
45  *		Alan Cox	:	Hashed net_bh()
46  *		Richard Kooijman:	Timestamp fixes.
47  *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
48  *		Alan Cox	:	Device lock protection.
49  *		Alan Cox	: 	Fixed nasty side effect of device close
50  *					changes.
51  *		Rudi Cilibrasi	:	Pass the right thing to
52  *					set_mac_address()
53  *		Dave Miller	:	32bit quantity for the device lock to
54  *					make it work out on a Sparc.
55  *		Bjorn Ekwall	:	Added KERNELD hack.
56  *		Alan Cox	:	Cleaned up the backlog initialise.
57  *		Craig Metz	:	SIOCGIFCONF fix if space for under
58  *					1 device.
59  *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
60  *					is no device open function.
61  *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
62  *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
63  *		Cyrus Durgin	:	Cleaned for KMOD
64  *		Adam Sulmicki   :	Bug Fix : Network Device Unload
65  *					A network device unload needs to purge
66  *					the backlog queue.
67  *	Paul Rusty Russell	:	SIOCSIFNAME
68  *              Pekka Riikonen  :	Netdev boot-time settings code
69  *              Andrew Morton   :       Make unregister_netdevice wait
70  *              			indefinitely on dev->refcnt
71  * 		J Hadi Salim	:	- Backlog queue sampling
72  *				        - netif_rx() feedback
73  */
74 
75 #include <asm/uaccess.h>
76 #include <asm/system.h>
77 #include <linux/bitops.h>
78 #include <linux/capability.h>
79 #include <linux/cpu.h>
80 #include <linux/types.h>
81 #include <linux/kernel.h>
82 #include <linux/sched.h>
83 #include <linux/mutex.h>
84 #include <linux/string.h>
85 #include <linux/mm.h>
86 #include <linux/socket.h>
87 #include <linux/sockios.h>
88 #include <linux/errno.h>
89 #include <linux/interrupt.h>
90 #include <linux/if_ether.h>
91 #include <linux/netdevice.h>
92 #include <linux/etherdevice.h>
93 #include <linux/notifier.h>
94 #include <linux/skbuff.h>
95 #include <net/sock.h>
96 #include <linux/rtnetlink.h>
97 #include <linux/proc_fs.h>
98 #include <linux/seq_file.h>
99 #include <linux/stat.h>
100 #include <linux/if_bridge.h>
101 #include <linux/divert.h>
102 #include <net/dst.h>
103 #include <net/pkt_sched.h>
104 #include <net/checksum.h>
105 #include <linux/highmem.h>
106 #include <linux/init.h>
107 #include <linux/kmod.h>
108 #include <linux/module.h>
109 #include <linux/kallsyms.h>
110 #include <linux/netpoll.h>
111 #include <linux/rcupdate.h>
112 #include <linux/delay.h>
113 #include <linux/wireless.h>
114 #include <net/iw_handler.h>
115 #include <asm/current.h>
116 #include <linux/audit.h>
117 #include <linux/dmaengine.h>
118 #include <linux/err.h>
119 
120 /*
121  *	The list of packet types we will receive (as opposed to discard)
122  *	and the routines to invoke.
123  *
124  *	Why 16. Because with 16 the only overlap we get on a hash of the
125  *	low nibble of the protocol value is RARP/SNAP/X.25.
126  *
127  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
128  *             sure which should go first, but I bet it won't make much
129  *             difference if we are running VLANs.  The good news is that
130  *             this protocol won't be in the list unless compiled in, so
131  *             the average user (w/out VLANs) will not be adversely affected.
132  *             --BLG
133  *
134  *		0800	IP
135  *		8100    802.1Q VLAN
136  *		0001	802.3
137  *		0002	AX.25
138  *		0004	802.2
139  *		8035	RARP
140  *		0005	SNAP
141  *		0805	X.25
142  *		0806	ARP
143  *		8137	IPX
144  *		0009	Localtalk
145  *		86DD	IPv6
146  */
147 
148 static DEFINE_SPINLOCK(ptype_lock);
149 static struct list_head ptype_base[16];	/* 16 way hashed list */
150 static struct list_head ptype_all;		/* Taps */
151 
152 #ifdef CONFIG_NET_DMA
153 static struct dma_client *net_dma_client;
154 static unsigned int net_dma_count;
155 static spinlock_t net_dma_event_lock;
156 #endif
157 
158 /*
159  * The @dev_base list is protected by @dev_base_lock and the rtnl
160  * semaphore.
161  *
162  * Pure readers hold dev_base_lock for reading.
163  *
164  * Writers must hold the rtnl semaphore while they loop through the
165  * dev_base list, and hold dev_base_lock for writing when they do the
166  * actual updates.  This allows pure readers to access the list even
167  * while a writer is preparing to update it.
168  *
169  * To put it another way, dev_base_lock is held for writing only to
170  * protect against pure readers; the rtnl semaphore provides the
171  * protection against other writers.
172  *
173  * See, for example usages, register_netdevice() and
174  * unregister_netdevice(), which must be called with the rtnl
175  * semaphore held.
176  */
177 struct net_device *dev_base;
178 static struct net_device **dev_tail = &dev_base;
179 DEFINE_RWLOCK(dev_base_lock);
180 
181 EXPORT_SYMBOL(dev_base);
182 EXPORT_SYMBOL(dev_base_lock);
183 
184 #define NETDEV_HASHBITS	8
185 static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
186 static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
187 
188 static inline struct hlist_head *dev_name_hash(const char *name)
189 {
190 	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
191 	return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
192 }
193 
194 static inline struct hlist_head *dev_index_hash(int ifindex)
195 {
196 	return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
197 }
198 
199 /*
200  *	Our notifier list
201  */
202 
203 static RAW_NOTIFIER_HEAD(netdev_chain);
204 
205 /*
206  *	Device drivers call our routines to queue packets here. We empty the
207  *	queue in the local softnet handler.
208  */
209 DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL };
210 
211 #ifdef CONFIG_SYSFS
212 extern int netdev_sysfs_init(void);
213 extern int netdev_register_sysfs(struct net_device *);
214 extern void netdev_unregister_sysfs(struct net_device *);
215 #else
216 #define netdev_sysfs_init()	 	(0)
217 #define netdev_register_sysfs(dev)	(0)
218 #define	netdev_unregister_sysfs(dev)	do { } while(0)
219 #endif
220 
221 
222 /*******************************************************************************
223 
224 		Protocol management and registration routines
225 
226 *******************************************************************************/
227 
228 /*
229  *	For efficiency
230  */
231 
232 static int netdev_nit;
233 
234 /*
235  *	Add a protocol ID to the list. Now that the input handler is
236  *	smarter we can dispense with all the messy stuff that used to be
237  *	here.
238  *
239  *	BEWARE!!! Protocol handlers, mangling input packets,
240  *	MUST BE last in hash buckets and checking protocol handlers
241  *	MUST start from promiscuous ptype_all chain in net_bh.
242  *	It is true now, do not change it.
243  *	Explanation follows: if protocol handler, mangling packet, will
244  *	be the first on list, it is not able to sense, that packet
245  *	is cloned and should be copied-on-write, so that it will
246  *	change it and subsequent readers will get broken packet.
247  *							--ANK (980803)
248  */
249 
250 /**
251  *	dev_add_pack - add packet handler
252  *	@pt: packet type declaration
253  *
254  *	Add a protocol handler to the networking stack. The passed &packet_type
255  *	is linked into kernel lists and may not be freed until it has been
256  *	removed from the kernel lists.
257  *
258  *	This call does not sleep therefore it can not
259  *	guarantee all CPU's that are in middle of receiving packets
260  *	will see the new packet type (until the next received packet).
261  */
262 
263 void dev_add_pack(struct packet_type *pt)
264 {
265 	int hash;
266 
267 	spin_lock_bh(&ptype_lock);
268 	if (pt->type == htons(ETH_P_ALL)) {
269 		netdev_nit++;
270 		list_add_rcu(&pt->list, &ptype_all);
271 	} else {
272 		hash = ntohs(pt->type) & 15;
273 		list_add_rcu(&pt->list, &ptype_base[hash]);
274 	}
275 	spin_unlock_bh(&ptype_lock);
276 }
277 
278 /**
279  *	__dev_remove_pack	 - remove packet handler
280  *	@pt: packet type declaration
281  *
282  *	Remove a protocol handler that was previously added to the kernel
283  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
284  *	from the kernel lists and can be freed or reused once this function
285  *	returns.
286  *
287  *      The packet type might still be in use by receivers
288  *	and must not be freed until after all the CPU's have gone
289  *	through a quiescent state.
290  */
291 void __dev_remove_pack(struct packet_type *pt)
292 {
293 	struct list_head *head;
294 	struct packet_type *pt1;
295 
296 	spin_lock_bh(&ptype_lock);
297 
298 	if (pt->type == htons(ETH_P_ALL)) {
299 		netdev_nit--;
300 		head = &ptype_all;
301 	} else
302 		head = &ptype_base[ntohs(pt->type) & 15];
303 
304 	list_for_each_entry(pt1, head, list) {
305 		if (pt == pt1) {
306 			list_del_rcu(&pt->list);
307 			goto out;
308 		}
309 	}
310 
311 	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
312 out:
313 	spin_unlock_bh(&ptype_lock);
314 }
315 /**
316  *	dev_remove_pack	 - remove packet handler
317  *	@pt: packet type declaration
318  *
319  *	Remove a protocol handler that was previously added to the kernel
320  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
321  *	from the kernel lists and can be freed or reused once this function
322  *	returns.
323  *
324  *	This call sleeps to guarantee that no CPU is looking at the packet
325  *	type after return.
326  */
327 void dev_remove_pack(struct packet_type *pt)
328 {
329 	__dev_remove_pack(pt);
330 
331 	synchronize_net();
332 }
333 
334 /******************************************************************************
335 
336 		      Device Boot-time Settings Routines
337 
338 *******************************************************************************/
339 
340 /* Boot time configuration table */
341 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
342 
343 /**
344  *	netdev_boot_setup_add	- add new setup entry
345  *	@name: name of the device
346  *	@map: configured settings for the device
347  *
348  *	Adds new setup entry to the dev_boot_setup list.  The function
349  *	returns 0 on error and 1 on success.  This is a generic routine to
350  *	all netdevices.
351  */
352 static int netdev_boot_setup_add(char *name, struct ifmap *map)
353 {
354 	struct netdev_boot_setup *s;
355 	int i;
356 
357 	s = dev_boot_setup;
358 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
359 		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
360 			memset(s[i].name, 0, sizeof(s[i].name));
361 			strcpy(s[i].name, name);
362 			memcpy(&s[i].map, map, sizeof(s[i].map));
363 			break;
364 		}
365 	}
366 
367 	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
368 }
369 
370 /**
371  *	netdev_boot_setup_check	- check boot time settings
372  *	@dev: the netdevice
373  *
374  * 	Check boot time settings for the device.
375  *	The found settings are set for the device to be used
376  *	later in the device probing.
377  *	Returns 0 if no settings found, 1 if they are.
378  */
379 int netdev_boot_setup_check(struct net_device *dev)
380 {
381 	struct netdev_boot_setup *s = dev_boot_setup;
382 	int i;
383 
384 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
385 		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
386 		    !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
387 			dev->irq 	= s[i].map.irq;
388 			dev->base_addr 	= s[i].map.base_addr;
389 			dev->mem_start 	= s[i].map.mem_start;
390 			dev->mem_end 	= s[i].map.mem_end;
391 			return 1;
392 		}
393 	}
394 	return 0;
395 }
396 
397 
398 /**
399  *	netdev_boot_base	- get address from boot time settings
400  *	@prefix: prefix for network device
401  *	@unit: id for network device
402  *
403  * 	Check boot time settings for the base address of device.
404  *	The found settings are set for the device to be used
405  *	later in the device probing.
406  *	Returns 0 if no settings found.
407  */
408 unsigned long netdev_boot_base(const char *prefix, int unit)
409 {
410 	const struct netdev_boot_setup *s = dev_boot_setup;
411 	char name[IFNAMSIZ];
412 	int i;
413 
414 	sprintf(name, "%s%d", prefix, unit);
415 
416 	/*
417 	 * If device already registered then return base of 1
418 	 * to indicate not to probe for this interface
419 	 */
420 	if (__dev_get_by_name(name))
421 		return 1;
422 
423 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
424 		if (!strcmp(name, s[i].name))
425 			return s[i].map.base_addr;
426 	return 0;
427 }
428 
429 /*
430  * Saves at boot time configured settings for any netdevice.
431  */
432 int __init netdev_boot_setup(char *str)
433 {
434 	int ints[5];
435 	struct ifmap map;
436 
437 	str = get_options(str, ARRAY_SIZE(ints), ints);
438 	if (!str || !*str)
439 		return 0;
440 
441 	/* Save settings */
442 	memset(&map, 0, sizeof(map));
443 	if (ints[0] > 0)
444 		map.irq = ints[1];
445 	if (ints[0] > 1)
446 		map.base_addr = ints[2];
447 	if (ints[0] > 2)
448 		map.mem_start = ints[3];
449 	if (ints[0] > 3)
450 		map.mem_end = ints[4];
451 
452 	/* Add new entry to the list */
453 	return netdev_boot_setup_add(str, &map);
454 }
455 
456 __setup("netdev=", netdev_boot_setup);
457 
458 /*******************************************************************************
459 
460 			    Device Interface Subroutines
461 
462 *******************************************************************************/
463 
464 /**
465  *	__dev_get_by_name	- find a device by its name
466  *	@name: name to find
467  *
468  *	Find an interface by name. Must be called under RTNL semaphore
469  *	or @dev_base_lock. If the name is found a pointer to the device
470  *	is returned. If the name is not found then %NULL is returned. The
471  *	reference counters are not incremented so the caller must be
472  *	careful with locks.
473  */
474 
475 struct net_device *__dev_get_by_name(const char *name)
476 {
477 	struct hlist_node *p;
478 
479 	hlist_for_each(p, dev_name_hash(name)) {
480 		struct net_device *dev
481 			= hlist_entry(p, struct net_device, name_hlist);
482 		if (!strncmp(dev->name, name, IFNAMSIZ))
483 			return dev;
484 	}
485 	return NULL;
486 }
487 
488 /**
489  *	dev_get_by_name		- find a device by its name
490  *	@name: name to find
491  *
492  *	Find an interface by name. This can be called from any
493  *	context and does its own locking. The returned handle has
494  *	the usage count incremented and the caller must use dev_put() to
495  *	release it when it is no longer needed. %NULL is returned if no
496  *	matching device is found.
497  */
498 
499 struct net_device *dev_get_by_name(const char *name)
500 {
501 	struct net_device *dev;
502 
503 	read_lock(&dev_base_lock);
504 	dev = __dev_get_by_name(name);
505 	if (dev)
506 		dev_hold(dev);
507 	read_unlock(&dev_base_lock);
508 	return dev;
509 }
510 
511 /**
512  *	__dev_get_by_index - find a device by its ifindex
513  *	@ifindex: index of device
514  *
515  *	Search for an interface by index. Returns %NULL if the device
516  *	is not found or a pointer to the device. The device has not
517  *	had its reference counter increased so the caller must be careful
518  *	about locking. The caller must hold either the RTNL semaphore
519  *	or @dev_base_lock.
520  */
521 
522 struct net_device *__dev_get_by_index(int ifindex)
523 {
524 	struct hlist_node *p;
525 
526 	hlist_for_each(p, dev_index_hash(ifindex)) {
527 		struct net_device *dev
528 			= hlist_entry(p, struct net_device, index_hlist);
529 		if (dev->ifindex == ifindex)
530 			return dev;
531 	}
532 	return NULL;
533 }
534 
535 
536 /**
537  *	dev_get_by_index - find a device by its ifindex
538  *	@ifindex: index of device
539  *
540  *	Search for an interface by index. Returns NULL if the device
541  *	is not found or a pointer to the device. The device returned has
542  *	had a reference added and the pointer is safe until the user calls
543  *	dev_put to indicate they have finished with it.
544  */
545 
546 struct net_device *dev_get_by_index(int ifindex)
547 {
548 	struct net_device *dev;
549 
550 	read_lock(&dev_base_lock);
551 	dev = __dev_get_by_index(ifindex);
552 	if (dev)
553 		dev_hold(dev);
554 	read_unlock(&dev_base_lock);
555 	return dev;
556 }
557 
558 /**
559  *	dev_getbyhwaddr - find a device by its hardware address
560  *	@type: media type of device
561  *	@ha: hardware address
562  *
563  *	Search for an interface by MAC address. Returns NULL if the device
564  *	is not found or a pointer to the device. The caller must hold the
565  *	rtnl semaphore. The returned device has not had its ref count increased
566  *	and the caller must therefore be careful about locking
567  *
568  *	BUGS:
569  *	If the API was consistent this would be __dev_get_by_hwaddr
570  */
571 
572 struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
573 {
574 	struct net_device *dev;
575 
576 	ASSERT_RTNL();
577 
578 	for (dev = dev_base; dev; dev = dev->next)
579 		if (dev->type == type &&
580 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
581 			break;
582 	return dev;
583 }
584 
585 EXPORT_SYMBOL(dev_getbyhwaddr);
586 
587 struct net_device *dev_getfirstbyhwtype(unsigned short type)
588 {
589 	struct net_device *dev;
590 
591 	rtnl_lock();
592 	for (dev = dev_base; dev; dev = dev->next) {
593 		if (dev->type == type) {
594 			dev_hold(dev);
595 			break;
596 		}
597 	}
598 	rtnl_unlock();
599 	return dev;
600 }
601 
602 EXPORT_SYMBOL(dev_getfirstbyhwtype);
603 
604 /**
605  *	dev_get_by_flags - find any device with given flags
606  *	@if_flags: IFF_* values
607  *	@mask: bitmask of bits in if_flags to check
608  *
609  *	Search for any interface with the given flags. Returns NULL if a device
610  *	is not found or a pointer to the device. The device returned has
611  *	had a reference added and the pointer is safe until the user calls
612  *	dev_put to indicate they have finished with it.
613  */
614 
615 struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
616 {
617 	struct net_device *dev;
618 
619 	read_lock(&dev_base_lock);
620 	for (dev = dev_base; dev != NULL; dev = dev->next) {
621 		if (((dev->flags ^ if_flags) & mask) == 0) {
622 			dev_hold(dev);
623 			break;
624 		}
625 	}
626 	read_unlock(&dev_base_lock);
627 	return dev;
628 }
629 
630 /**
631  *	dev_valid_name - check if name is okay for network device
632  *	@name: name string
633  *
634  *	Network device names need to be valid file names to
635  *	to allow sysfs to work
636  */
637 int dev_valid_name(const char *name)
638 {
639 	return !(*name == '\0'
640 		 || !strcmp(name, ".")
641 		 || !strcmp(name, "..")
642 		 || strchr(name, '/'));
643 }
644 
645 /**
646  *	dev_alloc_name - allocate a name for a device
647  *	@dev: device
648  *	@name: name format string
649  *
650  *	Passed a format string - eg "lt%d" it will try and find a suitable
651  *	id. It scans list of devices to build up a free map, then chooses
652  *	the first empty slot. The caller must hold the dev_base or rtnl lock
653  *	while allocating the name and adding the device in order to avoid
654  *	duplicates.
655  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
656  *	Returns the number of the unit assigned or a negative errno code.
657  */
658 
659 int dev_alloc_name(struct net_device *dev, const char *name)
660 {
661 	int i = 0;
662 	char buf[IFNAMSIZ];
663 	const char *p;
664 	const int max_netdevices = 8*PAGE_SIZE;
665 	long *inuse;
666 	struct net_device *d;
667 
668 	p = strnchr(name, IFNAMSIZ-1, '%');
669 	if (p) {
670 		/*
671 		 * Verify the string as this thing may have come from
672 		 * the user.  There must be either one "%d" and no other "%"
673 		 * characters.
674 		 */
675 		if (p[1] != 'd' || strchr(p + 2, '%'))
676 			return -EINVAL;
677 
678 		/* Use one page as a bit array of possible slots */
679 		inuse = (long *) get_zeroed_page(GFP_ATOMIC);
680 		if (!inuse)
681 			return -ENOMEM;
682 
683 		for (d = dev_base; d; d = d->next) {
684 			if (!sscanf(d->name, name, &i))
685 				continue;
686 			if (i < 0 || i >= max_netdevices)
687 				continue;
688 
689 			/*  avoid cases where sscanf is not exact inverse of printf */
690 			snprintf(buf, sizeof(buf), name, i);
691 			if (!strncmp(buf, d->name, IFNAMSIZ))
692 				set_bit(i, inuse);
693 		}
694 
695 		i = find_first_zero_bit(inuse, max_netdevices);
696 		free_page((unsigned long) inuse);
697 	}
698 
699 	snprintf(buf, sizeof(buf), name, i);
700 	if (!__dev_get_by_name(buf)) {
701 		strlcpy(dev->name, buf, IFNAMSIZ);
702 		return i;
703 	}
704 
705 	/* It is possible to run out of possible slots
706 	 * when the name is long and there isn't enough space left
707 	 * for the digits, or if all bits are used.
708 	 */
709 	return -ENFILE;
710 }
711 
712 
713 /**
714  *	dev_change_name - change name of a device
715  *	@dev: device
716  *	@newname: name (or format string) must be at least IFNAMSIZ
717  *
718  *	Change name of a device, can pass format strings "eth%d".
719  *	for wildcarding.
720  */
721 int dev_change_name(struct net_device *dev, char *newname)
722 {
723 	int err = 0;
724 
725 	ASSERT_RTNL();
726 
727 	if (dev->flags & IFF_UP)
728 		return -EBUSY;
729 
730 	if (!dev_valid_name(newname))
731 		return -EINVAL;
732 
733 	if (strchr(newname, '%')) {
734 		err = dev_alloc_name(dev, newname);
735 		if (err < 0)
736 			return err;
737 		strcpy(newname, dev->name);
738 	}
739 	else if (__dev_get_by_name(newname))
740 		return -EEXIST;
741 	else
742 		strlcpy(dev->name, newname, IFNAMSIZ);
743 
744 	err = class_device_rename(&dev->class_dev, dev->name);
745 	if (!err) {
746 		hlist_del(&dev->name_hlist);
747 		hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
748 		raw_notifier_call_chain(&netdev_chain,
749 				NETDEV_CHANGENAME, dev);
750 	}
751 
752 	return err;
753 }
754 
755 /**
756  *	netdev_features_change - device changes features
757  *	@dev: device to cause notification
758  *
759  *	Called to indicate a device has changed features.
760  */
761 void netdev_features_change(struct net_device *dev)
762 {
763 	raw_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
764 }
765 EXPORT_SYMBOL(netdev_features_change);
766 
767 /**
768  *	netdev_state_change - device changes state
769  *	@dev: device to cause notification
770  *
771  *	Called to indicate a device has changed state. This function calls
772  *	the notifier chains for netdev_chain and sends a NEWLINK message
773  *	to the routing socket.
774  */
775 void netdev_state_change(struct net_device *dev)
776 {
777 	if (dev->flags & IFF_UP) {
778 		raw_notifier_call_chain(&netdev_chain,
779 				NETDEV_CHANGE, dev);
780 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
781 	}
782 }
783 
784 /**
785  *	dev_load 	- load a network module
786  *	@name: name of interface
787  *
788  *	If a network interface is not present and the process has suitable
789  *	privileges this function loads the module. If module loading is not
790  *	available in this kernel then it becomes a nop.
791  */
792 
793 void dev_load(const char *name)
794 {
795 	struct net_device *dev;
796 
797 	read_lock(&dev_base_lock);
798 	dev = __dev_get_by_name(name);
799 	read_unlock(&dev_base_lock);
800 
801 	if (!dev && capable(CAP_SYS_MODULE))
802 		request_module("%s", name);
803 }
804 
805 static int default_rebuild_header(struct sk_buff *skb)
806 {
807 	printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
808 	       skb->dev ? skb->dev->name : "NULL!!!");
809 	kfree_skb(skb);
810 	return 1;
811 }
812 
813 
814 /**
815  *	dev_open	- prepare an interface for use.
816  *	@dev:	device to open
817  *
818  *	Takes a device from down to up state. The device's private open
819  *	function is invoked and then the multicast lists are loaded. Finally
820  *	the device is moved into the up state and a %NETDEV_UP message is
821  *	sent to the netdev notifier chain.
822  *
823  *	Calling this function on an active interface is a nop. On a failure
824  *	a negative errno code is returned.
825  */
826 int dev_open(struct net_device *dev)
827 {
828 	int ret = 0;
829 
830 	/*
831 	 *	Is it already up?
832 	 */
833 
834 	if (dev->flags & IFF_UP)
835 		return 0;
836 
837 	/*
838 	 *	Is it even present?
839 	 */
840 	if (!netif_device_present(dev))
841 		return -ENODEV;
842 
843 	/*
844 	 *	Call device private open method
845 	 */
846 	set_bit(__LINK_STATE_START, &dev->state);
847 	if (dev->open) {
848 		ret = dev->open(dev);
849 		if (ret)
850 			clear_bit(__LINK_STATE_START, &dev->state);
851 	}
852 
853  	/*
854 	 *	If it went open OK then:
855 	 */
856 
857 	if (!ret) {
858 		/*
859 		 *	Set the flags.
860 		 */
861 		dev->flags |= IFF_UP;
862 
863 		/*
864 		 *	Initialize multicasting status
865 		 */
866 		dev_mc_upload(dev);
867 
868 		/*
869 		 *	Wakeup transmit queue engine
870 		 */
871 		dev_activate(dev);
872 
873 		/*
874 		 *	... and announce new interface.
875 		 */
876 		raw_notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
877 	}
878 	return ret;
879 }
880 
881 /**
882  *	dev_close - shutdown an interface.
883  *	@dev: device to shutdown
884  *
885  *	This function moves an active device into down state. A
886  *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
887  *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
888  *	chain.
889  */
890 int dev_close(struct net_device *dev)
891 {
892 	if (!(dev->flags & IFF_UP))
893 		return 0;
894 
895 	/*
896 	 *	Tell people we are going down, so that they can
897 	 *	prepare to death, when device is still operating.
898 	 */
899 	raw_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
900 
901 	dev_deactivate(dev);
902 
903 	clear_bit(__LINK_STATE_START, &dev->state);
904 
905 	/* Synchronize to scheduled poll. We cannot touch poll list,
906 	 * it can be even on different cpu. So just clear netif_running(),
907 	 * and wait when poll really will happen. Actually, the best place
908 	 * for this is inside dev->stop() after device stopped its irq
909 	 * engine, but this requires more changes in devices. */
910 
911 	smp_mb__after_clear_bit(); /* Commit netif_running(). */
912 	while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
913 		/* No hurry. */
914 		msleep(1);
915 	}
916 
917 	/*
918 	 *	Call the device specific close. This cannot fail.
919 	 *	Only if device is UP
920 	 *
921 	 *	We allow it to be called even after a DETACH hot-plug
922 	 *	event.
923 	 */
924 	if (dev->stop)
925 		dev->stop(dev);
926 
927 	/*
928 	 *	Device is now down.
929 	 */
930 
931 	dev->flags &= ~IFF_UP;
932 
933 	/*
934 	 * Tell people we are down
935 	 */
936 	raw_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
937 
938 	return 0;
939 }
940 
941 
942 /*
943  *	Device change register/unregister. These are not inline or static
944  *	as we export them to the world.
945  */
946 
947 /**
948  *	register_netdevice_notifier - register a network notifier block
949  *	@nb: notifier
950  *
951  *	Register a notifier to be called when network device events occur.
952  *	The notifier passed is linked into the kernel structures and must
953  *	not be reused until it has been unregistered. A negative errno code
954  *	is returned on a failure.
955  *
956  * 	When registered all registration and up events are replayed
957  *	to the new notifier to allow device to have a race free
958  *	view of the network device list.
959  */
960 
961 int register_netdevice_notifier(struct notifier_block *nb)
962 {
963 	struct net_device *dev;
964 	int err;
965 
966 	rtnl_lock();
967 	err = raw_notifier_chain_register(&netdev_chain, nb);
968 	if (!err) {
969 		for (dev = dev_base; dev; dev = dev->next) {
970 			nb->notifier_call(nb, NETDEV_REGISTER, dev);
971 
972 			if (dev->flags & IFF_UP)
973 				nb->notifier_call(nb, NETDEV_UP, dev);
974 		}
975 	}
976 	rtnl_unlock();
977 	return err;
978 }
979 
980 /**
981  *	unregister_netdevice_notifier - unregister a network notifier block
982  *	@nb: notifier
983  *
984  *	Unregister a notifier previously registered by
985  *	register_netdevice_notifier(). The notifier is unlinked into the
986  *	kernel structures and may then be reused. A negative errno code
987  *	is returned on a failure.
988  */
989 
990 int unregister_netdevice_notifier(struct notifier_block *nb)
991 {
992 	int err;
993 
994 	rtnl_lock();
995 	err = raw_notifier_chain_unregister(&netdev_chain, nb);
996 	rtnl_unlock();
997 	return err;
998 }
999 
1000 /**
1001  *	call_netdevice_notifiers - call all network notifier blocks
1002  *      @val: value passed unmodified to notifier function
1003  *      @v:   pointer passed unmodified to notifier function
1004  *
1005  *	Call all network notifier blocks.  Parameters and return value
1006  *	are as for raw_notifier_call_chain().
1007  */
1008 
1009 int call_netdevice_notifiers(unsigned long val, void *v)
1010 {
1011 	return raw_notifier_call_chain(&netdev_chain, val, v);
1012 }
1013 
1014 /* When > 0 there are consumers of rx skb time stamps */
1015 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1016 
1017 void net_enable_timestamp(void)
1018 {
1019 	atomic_inc(&netstamp_needed);
1020 }
1021 
1022 void net_disable_timestamp(void)
1023 {
1024 	atomic_dec(&netstamp_needed);
1025 }
1026 
1027 void __net_timestamp(struct sk_buff *skb)
1028 {
1029 	struct timeval tv;
1030 
1031 	do_gettimeofday(&tv);
1032 	skb_set_timestamp(skb, &tv);
1033 }
1034 EXPORT_SYMBOL(__net_timestamp);
1035 
1036 static inline void net_timestamp(struct sk_buff *skb)
1037 {
1038 	if (atomic_read(&netstamp_needed))
1039 		__net_timestamp(skb);
1040 	else {
1041 		skb->tstamp.off_sec = 0;
1042 		skb->tstamp.off_usec = 0;
1043 	}
1044 }
1045 
1046 /*
1047  *	Support routine. Sends outgoing frames to any network
1048  *	taps currently in use.
1049  */
1050 
1051 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1052 {
1053 	struct packet_type *ptype;
1054 
1055 	net_timestamp(skb);
1056 
1057 	rcu_read_lock();
1058 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1059 		/* Never send packets back to the socket
1060 		 * they originated from - MvS (miquels@drinkel.ow.org)
1061 		 */
1062 		if ((ptype->dev == dev || !ptype->dev) &&
1063 		    (ptype->af_packet_priv == NULL ||
1064 		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
1065 			struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1066 			if (!skb2)
1067 				break;
1068 
1069 			/* skb->nh should be correctly
1070 			   set by sender, so that the second statement is
1071 			   just protection against buggy protocols.
1072 			 */
1073 			skb2->mac.raw = skb2->data;
1074 
1075 			if (skb2->nh.raw < skb2->data ||
1076 			    skb2->nh.raw > skb2->tail) {
1077 				if (net_ratelimit())
1078 					printk(KERN_CRIT "protocol %04x is "
1079 					       "buggy, dev %s\n",
1080 					       skb2->protocol, dev->name);
1081 				skb2->nh.raw = skb2->data;
1082 			}
1083 
1084 			skb2->h.raw = skb2->nh.raw;
1085 			skb2->pkt_type = PACKET_OUTGOING;
1086 			ptype->func(skb2, skb->dev, ptype, skb->dev);
1087 		}
1088 	}
1089 	rcu_read_unlock();
1090 }
1091 
1092 
1093 void __netif_schedule(struct net_device *dev)
1094 {
1095 	if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
1096 		unsigned long flags;
1097 		struct softnet_data *sd;
1098 
1099 		local_irq_save(flags);
1100 		sd = &__get_cpu_var(softnet_data);
1101 		dev->next_sched = sd->output_queue;
1102 		sd->output_queue = dev;
1103 		raise_softirq_irqoff(NET_TX_SOFTIRQ);
1104 		local_irq_restore(flags);
1105 	}
1106 }
1107 EXPORT_SYMBOL(__netif_schedule);
1108 
1109 void __netif_rx_schedule(struct net_device *dev)
1110 {
1111 	unsigned long flags;
1112 
1113 	local_irq_save(flags);
1114 	dev_hold(dev);
1115 	list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
1116 	if (dev->quota < 0)
1117 		dev->quota += dev->weight;
1118 	else
1119 		dev->quota = dev->weight;
1120 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
1121 	local_irq_restore(flags);
1122 }
1123 EXPORT_SYMBOL(__netif_rx_schedule);
1124 
1125 void dev_kfree_skb_any(struct sk_buff *skb)
1126 {
1127 	if (in_irq() || irqs_disabled())
1128 		dev_kfree_skb_irq(skb);
1129 	else
1130 		dev_kfree_skb(skb);
1131 }
1132 EXPORT_SYMBOL(dev_kfree_skb_any);
1133 
1134 
1135 /* Hot-plugging. */
1136 void netif_device_detach(struct net_device *dev)
1137 {
1138 	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1139 	    netif_running(dev)) {
1140 		netif_stop_queue(dev);
1141 	}
1142 }
1143 EXPORT_SYMBOL(netif_device_detach);
1144 
1145 void netif_device_attach(struct net_device *dev)
1146 {
1147 	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1148 	    netif_running(dev)) {
1149 		netif_wake_queue(dev);
1150  		__netdev_watchdog_up(dev);
1151 	}
1152 }
1153 EXPORT_SYMBOL(netif_device_attach);
1154 
1155 
1156 /*
1157  * Invalidate hardware checksum when packet is to be mangled, and
1158  * complete checksum manually on outgoing path.
1159  */
1160 int skb_checksum_help(struct sk_buff *skb, int inward)
1161 {
1162 	unsigned int csum;
1163 	int ret = 0, offset = skb->h.raw - skb->data;
1164 
1165 	if (inward)
1166 		goto out_set_summed;
1167 
1168 	if (unlikely(skb_shinfo(skb)->gso_size)) {
1169 		/* Let GSO fix up the checksum. */
1170 		goto out_set_summed;
1171 	}
1172 
1173 	if (skb_cloned(skb)) {
1174 		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1175 		if (ret)
1176 			goto out;
1177 	}
1178 
1179 	BUG_ON(offset > (int)skb->len);
1180 	csum = skb_checksum(skb, offset, skb->len-offset, 0);
1181 
1182 	offset = skb->tail - skb->h.raw;
1183 	BUG_ON(offset <= 0);
1184 	BUG_ON(skb->csum + 2 > offset);
1185 
1186 	*(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
1187 
1188 out_set_summed:
1189 	skb->ip_summed = CHECKSUM_NONE;
1190 out:
1191 	return ret;
1192 }
1193 
1194 /**
1195  *	skb_gso_segment - Perform segmentation on skb.
1196  *	@skb: buffer to segment
1197  *	@features: features for the output path (see dev->features)
1198  *
1199  *	This function segments the given skb and returns a list of segments.
1200  *
1201  *	It may return NULL if the skb requires no segmentation.  This is
1202  *	only possible when GSO is used for verifying header integrity.
1203  */
1204 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1205 {
1206 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1207 	struct packet_type *ptype;
1208 	int type = skb->protocol;
1209 	int err;
1210 
1211 	BUG_ON(skb_shinfo(skb)->frag_list);
1212 
1213 	skb->mac.raw = skb->data;
1214 	skb->mac_len = skb->nh.raw - skb->data;
1215 	__skb_pull(skb, skb->mac_len);
1216 
1217 	if (unlikely(skb->ip_summed != CHECKSUM_HW)) {
1218 		if (skb_header_cloned(skb) &&
1219 		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1220 			return ERR_PTR(err);
1221 	}
1222 
1223 	rcu_read_lock();
1224 	list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
1225 		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1226 			if (unlikely(skb->ip_summed != CHECKSUM_HW)) {
1227 				err = ptype->gso_send_check(skb);
1228 				segs = ERR_PTR(err);
1229 				if (err || skb_gso_ok(skb, features))
1230 					break;
1231 				__skb_push(skb, skb->data - skb->nh.raw);
1232 			}
1233 			segs = ptype->gso_segment(skb, features);
1234 			break;
1235 		}
1236 	}
1237 	rcu_read_unlock();
1238 
1239 	__skb_push(skb, skb->data - skb->mac.raw);
1240 
1241 	return segs;
1242 }
1243 
1244 EXPORT_SYMBOL(skb_gso_segment);
1245 
1246 /* Take action when hardware reception checksum errors are detected. */
1247 #ifdef CONFIG_BUG
1248 void netdev_rx_csum_fault(struct net_device *dev)
1249 {
1250 	if (net_ratelimit()) {
1251 		printk(KERN_ERR "%s: hw csum failure.\n",
1252 			dev ? dev->name : "<unknown>");
1253 		dump_stack();
1254 	}
1255 }
1256 EXPORT_SYMBOL(netdev_rx_csum_fault);
1257 #endif
1258 
1259 /* Actually, we should eliminate this check as soon as we know, that:
1260  * 1. IOMMU is present and allows to map all the memory.
1261  * 2. No high memory really exists on this machine.
1262  */
1263 
1264 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1265 {
1266 #ifdef CONFIG_HIGHMEM
1267 	int i;
1268 
1269 	if (dev->features & NETIF_F_HIGHDMA)
1270 		return 0;
1271 
1272 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1273 		if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1274 			return 1;
1275 
1276 #endif
1277 	return 0;
1278 }
1279 
1280 struct dev_gso_cb {
1281 	void (*destructor)(struct sk_buff *skb);
1282 };
1283 
1284 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1285 
1286 static void dev_gso_skb_destructor(struct sk_buff *skb)
1287 {
1288 	struct dev_gso_cb *cb;
1289 
1290 	do {
1291 		struct sk_buff *nskb = skb->next;
1292 
1293 		skb->next = nskb->next;
1294 		nskb->next = NULL;
1295 		kfree_skb(nskb);
1296 	} while (skb->next);
1297 
1298 	cb = DEV_GSO_CB(skb);
1299 	if (cb->destructor)
1300 		cb->destructor(skb);
1301 }
1302 
1303 /**
1304  *	dev_gso_segment - Perform emulated hardware segmentation on skb.
1305  *	@skb: buffer to segment
1306  *
1307  *	This function segments the given skb and stores the list of segments
1308  *	in skb->next.
1309  */
1310 static int dev_gso_segment(struct sk_buff *skb)
1311 {
1312 	struct net_device *dev = skb->dev;
1313 	struct sk_buff *segs;
1314 	int features = dev->features & ~(illegal_highdma(dev, skb) ?
1315 					 NETIF_F_SG : 0);
1316 
1317 	segs = skb_gso_segment(skb, features);
1318 
1319 	/* Verifying header integrity only. */
1320 	if (!segs)
1321 		return 0;
1322 
1323 	if (unlikely(IS_ERR(segs)))
1324 		return PTR_ERR(segs);
1325 
1326 	skb->next = segs;
1327 	DEV_GSO_CB(skb)->destructor = skb->destructor;
1328 	skb->destructor = dev_gso_skb_destructor;
1329 
1330 	return 0;
1331 }
1332 
1333 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
1334 {
1335 	if (likely(!skb->next)) {
1336 		if (netdev_nit)
1337 			dev_queue_xmit_nit(skb, dev);
1338 
1339 		if (netif_needs_gso(dev, skb)) {
1340 			if (unlikely(dev_gso_segment(skb)))
1341 				goto out_kfree_skb;
1342 			if (skb->next)
1343 				goto gso;
1344 		}
1345 
1346 		return dev->hard_start_xmit(skb, dev);
1347 	}
1348 
1349 gso:
1350 	do {
1351 		struct sk_buff *nskb = skb->next;
1352 		int rc;
1353 
1354 		skb->next = nskb->next;
1355 		nskb->next = NULL;
1356 		rc = dev->hard_start_xmit(nskb, dev);
1357 		if (unlikely(rc)) {
1358 			nskb->next = skb->next;
1359 			skb->next = nskb;
1360 			return rc;
1361 		}
1362 		if (unlikely(netif_queue_stopped(dev) && skb->next))
1363 			return NETDEV_TX_BUSY;
1364 	} while (skb->next);
1365 
1366 	skb->destructor = DEV_GSO_CB(skb)->destructor;
1367 
1368 out_kfree_skb:
1369 	kfree_skb(skb);
1370 	return 0;
1371 }
1372 
1373 #define HARD_TX_LOCK(dev, cpu) {			\
1374 	if ((dev->features & NETIF_F_LLTX) == 0) {	\
1375 		netif_tx_lock(dev);			\
1376 	}						\
1377 }
1378 
1379 #define HARD_TX_UNLOCK(dev) {				\
1380 	if ((dev->features & NETIF_F_LLTX) == 0) {	\
1381 		netif_tx_unlock(dev);			\
1382 	}						\
1383 }
1384 
1385 /**
1386  *	dev_queue_xmit - transmit a buffer
1387  *	@skb: buffer to transmit
1388  *
1389  *	Queue a buffer for transmission to a network device. The caller must
1390  *	have set the device and priority and built the buffer before calling
1391  *	this function. The function can be called from an interrupt.
1392  *
1393  *	A negative errno code is returned on a failure. A success does not
1394  *	guarantee the frame will be transmitted as it may be dropped due
1395  *	to congestion or traffic shaping.
1396  *
1397  * -----------------------------------------------------------------------------------
1398  *      I notice this method can also return errors from the queue disciplines,
1399  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1400  *      be positive.
1401  *
1402  *      Regardless of the return value, the skb is consumed, so it is currently
1403  *      difficult to retry a send to this method.  (You can bump the ref count
1404  *      before sending to hold a reference for retry if you are careful.)
1405  *
1406  *      When calling this method, interrupts MUST be enabled.  This is because
1407  *      the BH enable code must have IRQs enabled so that it will not deadlock.
1408  *          --BLG
1409  */
1410 
1411 int dev_queue_xmit(struct sk_buff *skb)
1412 {
1413 	struct net_device *dev = skb->dev;
1414 	struct Qdisc *q;
1415 	int rc = -ENOMEM;
1416 
1417 	/* GSO will handle the following emulations directly. */
1418 	if (netif_needs_gso(dev, skb))
1419 		goto gso;
1420 
1421 	if (skb_shinfo(skb)->frag_list &&
1422 	    !(dev->features & NETIF_F_FRAGLIST) &&
1423 	    __skb_linearize(skb))
1424 		goto out_kfree_skb;
1425 
1426 	/* Fragmented skb is linearized if device does not support SG,
1427 	 * or if at least one of fragments is in highmem and device
1428 	 * does not support DMA from it.
1429 	 */
1430 	if (skb_shinfo(skb)->nr_frags &&
1431 	    (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1432 	    __skb_linearize(skb))
1433 		goto out_kfree_skb;
1434 
1435 	/* If packet is not checksummed and device does not support
1436 	 * checksumming for this protocol, complete checksumming here.
1437 	 */
1438 	if (skb->ip_summed == CHECKSUM_HW &&
1439 	    (!(dev->features & NETIF_F_GEN_CSUM) &&
1440 	     (!(dev->features & NETIF_F_IP_CSUM) ||
1441 	      skb->protocol != htons(ETH_P_IP))))
1442 	      	if (skb_checksum_help(skb, 0))
1443 	      		goto out_kfree_skb;
1444 
1445 gso:
1446 	spin_lock_prefetch(&dev->queue_lock);
1447 
1448 	/* Disable soft irqs for various locks below. Also
1449 	 * stops preemption for RCU.
1450 	 */
1451 	rcu_read_lock_bh();
1452 
1453 	/* Updates of qdisc are serialized by queue_lock.
1454 	 * The struct Qdisc which is pointed to by qdisc is now a
1455 	 * rcu structure - it may be accessed without acquiring
1456 	 * a lock (but the structure may be stale.) The freeing of the
1457 	 * qdisc will be deferred until it's known that there are no
1458 	 * more references to it.
1459 	 *
1460 	 * If the qdisc has an enqueue function, we still need to
1461 	 * hold the queue_lock before calling it, since queue_lock
1462 	 * also serializes access to the device queue.
1463 	 */
1464 
1465 	q = rcu_dereference(dev->qdisc);
1466 #ifdef CONFIG_NET_CLS_ACT
1467 	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1468 #endif
1469 	if (q->enqueue) {
1470 		/* Grab device queue */
1471 		spin_lock(&dev->queue_lock);
1472 
1473 		rc = q->enqueue(skb, q);
1474 
1475 		qdisc_run(dev);
1476 
1477 		spin_unlock(&dev->queue_lock);
1478 		rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1479 		goto out;
1480 	}
1481 
1482 	/* The device has no queue. Common case for software devices:
1483 	   loopback, all the sorts of tunnels...
1484 
1485 	   Really, it is unlikely that netif_tx_lock protection is necessary
1486 	   here.  (f.e. loopback and IP tunnels are clean ignoring statistics
1487 	   counters.)
1488 	   However, it is possible, that they rely on protection
1489 	   made by us here.
1490 
1491 	   Check this and shot the lock. It is not prone from deadlocks.
1492 	   Either shot noqueue qdisc, it is even simpler 8)
1493 	 */
1494 	if (dev->flags & IFF_UP) {
1495 		int cpu = smp_processor_id(); /* ok because BHs are off */
1496 
1497 		if (dev->xmit_lock_owner != cpu) {
1498 
1499 			HARD_TX_LOCK(dev, cpu);
1500 
1501 			if (!netif_queue_stopped(dev)) {
1502 				rc = 0;
1503 				if (!dev_hard_start_xmit(skb, dev)) {
1504 					HARD_TX_UNLOCK(dev);
1505 					goto out;
1506 				}
1507 			}
1508 			HARD_TX_UNLOCK(dev);
1509 			if (net_ratelimit())
1510 				printk(KERN_CRIT "Virtual device %s asks to "
1511 				       "queue packet!\n", dev->name);
1512 		} else {
1513 			/* Recursion is detected! It is possible,
1514 			 * unfortunately */
1515 			if (net_ratelimit())
1516 				printk(KERN_CRIT "Dead loop on virtual device "
1517 				       "%s, fix it urgently!\n", dev->name);
1518 		}
1519 	}
1520 
1521 	rc = -ENETDOWN;
1522 	rcu_read_unlock_bh();
1523 
1524 out_kfree_skb:
1525 	kfree_skb(skb);
1526 	return rc;
1527 out:
1528 	rcu_read_unlock_bh();
1529 	return rc;
1530 }
1531 
1532 
1533 /*=======================================================================
1534 			Receiver routines
1535   =======================================================================*/
1536 
1537 int netdev_max_backlog = 1000;
1538 int netdev_budget = 300;
1539 int weight_p = 64;            /* old backlog weight */
1540 
1541 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1542 
1543 
1544 /**
1545  *	netif_rx	-	post buffer to the network code
1546  *	@skb: buffer to post
1547  *
1548  *	This function receives a packet from a device driver and queues it for
1549  *	the upper (protocol) levels to process.  It always succeeds. The buffer
1550  *	may be dropped during processing for congestion control or by the
1551  *	protocol layers.
1552  *
1553  *	return values:
1554  *	NET_RX_SUCCESS	(no congestion)
1555  *	NET_RX_CN_LOW   (low congestion)
1556  *	NET_RX_CN_MOD   (moderate congestion)
1557  *	NET_RX_CN_HIGH  (high congestion)
1558  *	NET_RX_DROP     (packet was dropped)
1559  *
1560  */
1561 
1562 int netif_rx(struct sk_buff *skb)
1563 {
1564 	struct softnet_data *queue;
1565 	unsigned long flags;
1566 
1567 	/* if netpoll wants it, pretend we never saw it */
1568 	if (netpoll_rx(skb))
1569 		return NET_RX_DROP;
1570 
1571 	if (!skb->tstamp.off_sec)
1572 		net_timestamp(skb);
1573 
1574 	/*
1575 	 * The code is rearranged so that the path is the most
1576 	 * short when CPU is congested, but is still operating.
1577 	 */
1578 	local_irq_save(flags);
1579 	queue = &__get_cpu_var(softnet_data);
1580 
1581 	__get_cpu_var(netdev_rx_stat).total++;
1582 	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1583 		if (queue->input_pkt_queue.qlen) {
1584 enqueue:
1585 			dev_hold(skb->dev);
1586 			__skb_queue_tail(&queue->input_pkt_queue, skb);
1587 			local_irq_restore(flags);
1588 			return NET_RX_SUCCESS;
1589 		}
1590 
1591 		netif_rx_schedule(&queue->backlog_dev);
1592 		goto enqueue;
1593 	}
1594 
1595 	__get_cpu_var(netdev_rx_stat).dropped++;
1596 	local_irq_restore(flags);
1597 
1598 	kfree_skb(skb);
1599 	return NET_RX_DROP;
1600 }
1601 
1602 int netif_rx_ni(struct sk_buff *skb)
1603 {
1604 	int err;
1605 
1606 	preempt_disable();
1607 	err = netif_rx(skb);
1608 	if (local_softirq_pending())
1609 		do_softirq();
1610 	preempt_enable();
1611 
1612 	return err;
1613 }
1614 
1615 EXPORT_SYMBOL(netif_rx_ni);
1616 
1617 static inline struct net_device *skb_bond(struct sk_buff *skb)
1618 {
1619 	struct net_device *dev = skb->dev;
1620 
1621 	if (dev->master) {
1622 		/*
1623 		 * On bonding slaves other than the currently active
1624 		 * slave, suppress duplicates except for 802.3ad
1625 		 * ETH_P_SLOW and alb non-mcast/bcast.
1626 		 */
1627 		if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
1628 			if (dev->master->priv_flags & IFF_MASTER_ALB) {
1629 				if (skb->pkt_type != PACKET_BROADCAST &&
1630 				    skb->pkt_type != PACKET_MULTICAST)
1631 					goto keep;
1632 			}
1633 
1634 			if (dev->master->priv_flags & IFF_MASTER_8023AD &&
1635 			    skb->protocol == __constant_htons(ETH_P_SLOW))
1636 				goto keep;
1637 
1638 			kfree_skb(skb);
1639 			return NULL;
1640 		}
1641 keep:
1642 		skb->dev = dev->master;
1643 	}
1644 
1645 	return dev;
1646 }
1647 
1648 static void net_tx_action(struct softirq_action *h)
1649 {
1650 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
1651 
1652 	if (sd->completion_queue) {
1653 		struct sk_buff *clist;
1654 
1655 		local_irq_disable();
1656 		clist = sd->completion_queue;
1657 		sd->completion_queue = NULL;
1658 		local_irq_enable();
1659 
1660 		while (clist) {
1661 			struct sk_buff *skb = clist;
1662 			clist = clist->next;
1663 
1664 			BUG_TRAP(!atomic_read(&skb->users));
1665 			__kfree_skb(skb);
1666 		}
1667 	}
1668 
1669 	if (sd->output_queue) {
1670 		struct net_device *head;
1671 
1672 		local_irq_disable();
1673 		head = sd->output_queue;
1674 		sd->output_queue = NULL;
1675 		local_irq_enable();
1676 
1677 		while (head) {
1678 			struct net_device *dev = head;
1679 			head = head->next_sched;
1680 
1681 			smp_mb__before_clear_bit();
1682 			clear_bit(__LINK_STATE_SCHED, &dev->state);
1683 
1684 			if (spin_trylock(&dev->queue_lock)) {
1685 				qdisc_run(dev);
1686 				spin_unlock(&dev->queue_lock);
1687 			} else {
1688 				netif_schedule(dev);
1689 			}
1690 		}
1691 	}
1692 }
1693 
1694 static __inline__ int deliver_skb(struct sk_buff *skb,
1695 				  struct packet_type *pt_prev,
1696 				  struct net_device *orig_dev)
1697 {
1698 	atomic_inc(&skb->users);
1699 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1700 }
1701 
1702 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
1703 int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
1704 struct net_bridge;
1705 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1706 						unsigned char *addr);
1707 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
1708 
1709 static __inline__ int handle_bridge(struct sk_buff **pskb,
1710 				    struct packet_type **pt_prev, int *ret,
1711 				    struct net_device *orig_dev)
1712 {
1713 	struct net_bridge_port *port;
1714 
1715 	if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
1716 	    (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
1717 		return 0;
1718 
1719 	if (*pt_prev) {
1720 		*ret = deliver_skb(*pskb, *pt_prev, orig_dev);
1721 		*pt_prev = NULL;
1722 	}
1723 
1724 	return br_handle_frame_hook(port, pskb);
1725 }
1726 #else
1727 #define handle_bridge(skb, pt_prev, ret, orig_dev)	(0)
1728 #endif
1729 
1730 #ifdef CONFIG_NET_CLS_ACT
1731 /* TODO: Maybe we should just force sch_ingress to be compiled in
1732  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1733  * a compare and 2 stores extra right now if we dont have it on
1734  * but have CONFIG_NET_CLS_ACT
1735  * NOTE: This doesnt stop any functionality; if you dont have
1736  * the ingress scheduler, you just cant add policies on ingress.
1737  *
1738  */
1739 static int ing_filter(struct sk_buff *skb)
1740 {
1741 	struct Qdisc *q;
1742 	struct net_device *dev = skb->dev;
1743 	int result = TC_ACT_OK;
1744 
1745 	if (dev->qdisc_ingress) {
1746 		__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
1747 		if (MAX_RED_LOOP < ttl++) {
1748 			printk(KERN_WARNING "Redir loop detected Dropping packet (%s->%s)\n",
1749 				skb->input_dev->name, skb->dev->name);
1750 			return TC_ACT_SHOT;
1751 		}
1752 
1753 		skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
1754 
1755 		skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
1756 
1757 		spin_lock(&dev->ingress_lock);
1758 		if ((q = dev->qdisc_ingress) != NULL)
1759 			result = q->enqueue(skb, q);
1760 		spin_unlock(&dev->ingress_lock);
1761 
1762 	}
1763 
1764 	return result;
1765 }
1766 #endif
1767 
1768 int netif_receive_skb(struct sk_buff *skb)
1769 {
1770 	struct packet_type *ptype, *pt_prev;
1771 	struct net_device *orig_dev;
1772 	int ret = NET_RX_DROP;
1773 	unsigned short type;
1774 
1775 	/* if we've gotten here through NAPI, check netpoll */
1776 	if (skb->dev->poll && netpoll_rx(skb))
1777 		return NET_RX_DROP;
1778 
1779 	if (!skb->tstamp.off_sec)
1780 		net_timestamp(skb);
1781 
1782 	if (!skb->input_dev)
1783 		skb->input_dev = skb->dev;
1784 
1785 	orig_dev = skb_bond(skb);
1786 
1787 	if (!orig_dev)
1788 		return NET_RX_DROP;
1789 
1790 	__get_cpu_var(netdev_rx_stat).total++;
1791 
1792 	skb->h.raw = skb->nh.raw = skb->data;
1793 	skb->mac_len = skb->nh.raw - skb->mac.raw;
1794 
1795 	pt_prev = NULL;
1796 
1797 	rcu_read_lock();
1798 
1799 #ifdef CONFIG_NET_CLS_ACT
1800 	if (skb->tc_verd & TC_NCLS) {
1801 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
1802 		goto ncls;
1803 	}
1804 #endif
1805 
1806 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1807 		if (!ptype->dev || ptype->dev == skb->dev) {
1808 			if (pt_prev)
1809 				ret = deliver_skb(skb, pt_prev, orig_dev);
1810 			pt_prev = ptype;
1811 		}
1812 	}
1813 
1814 #ifdef CONFIG_NET_CLS_ACT
1815 	if (pt_prev) {
1816 		ret = deliver_skb(skb, pt_prev, orig_dev);
1817 		pt_prev = NULL; /* noone else should process this after*/
1818 	} else {
1819 		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
1820 	}
1821 
1822 	ret = ing_filter(skb);
1823 
1824 	if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
1825 		kfree_skb(skb);
1826 		goto out;
1827 	}
1828 
1829 	skb->tc_verd = 0;
1830 ncls:
1831 #endif
1832 
1833 	handle_diverter(skb);
1834 
1835 	if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
1836 		goto out;
1837 
1838 	type = skb->protocol;
1839 	list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
1840 		if (ptype->type == type &&
1841 		    (!ptype->dev || ptype->dev == skb->dev)) {
1842 			if (pt_prev)
1843 				ret = deliver_skb(skb, pt_prev, orig_dev);
1844 			pt_prev = ptype;
1845 		}
1846 	}
1847 
1848 	if (pt_prev) {
1849 		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1850 	} else {
1851 		kfree_skb(skb);
1852 		/* Jamal, now you will not able to escape explaining
1853 		 * me how you were going to use this. :-)
1854 		 */
1855 		ret = NET_RX_DROP;
1856 	}
1857 
1858 out:
1859 	rcu_read_unlock();
1860 	return ret;
1861 }
1862 
1863 static int process_backlog(struct net_device *backlog_dev, int *budget)
1864 {
1865 	int work = 0;
1866 	int quota = min(backlog_dev->quota, *budget);
1867 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
1868 	unsigned long start_time = jiffies;
1869 
1870 	backlog_dev->weight = weight_p;
1871 	for (;;) {
1872 		struct sk_buff *skb;
1873 		struct net_device *dev;
1874 
1875 		local_irq_disable();
1876 		skb = __skb_dequeue(&queue->input_pkt_queue);
1877 		if (!skb)
1878 			goto job_done;
1879 		local_irq_enable();
1880 
1881 		dev = skb->dev;
1882 
1883 		netif_receive_skb(skb);
1884 
1885 		dev_put(dev);
1886 
1887 		work++;
1888 
1889 		if (work >= quota || jiffies - start_time > 1)
1890 			break;
1891 
1892 	}
1893 
1894 	backlog_dev->quota -= work;
1895 	*budget -= work;
1896 	return -1;
1897 
1898 job_done:
1899 	backlog_dev->quota -= work;
1900 	*budget -= work;
1901 
1902 	list_del(&backlog_dev->poll_list);
1903 	smp_mb__before_clear_bit();
1904 	netif_poll_enable(backlog_dev);
1905 
1906 	local_irq_enable();
1907 	return 0;
1908 }
1909 
1910 static void net_rx_action(struct softirq_action *h)
1911 {
1912 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
1913 	unsigned long start_time = jiffies;
1914 	int budget = netdev_budget;
1915 	void *have;
1916 
1917 	local_irq_disable();
1918 
1919 	while (!list_empty(&queue->poll_list)) {
1920 		struct net_device *dev;
1921 
1922 		if (budget <= 0 || jiffies - start_time > 1)
1923 			goto softnet_break;
1924 
1925 		local_irq_enable();
1926 
1927 		dev = list_entry(queue->poll_list.next,
1928 				 struct net_device, poll_list);
1929 		have = netpoll_poll_lock(dev);
1930 
1931 		if (dev->quota <= 0 || dev->poll(dev, &budget)) {
1932 			netpoll_poll_unlock(have);
1933 			local_irq_disable();
1934 			list_move_tail(&dev->poll_list, &queue->poll_list);
1935 			if (dev->quota < 0)
1936 				dev->quota += dev->weight;
1937 			else
1938 				dev->quota = dev->weight;
1939 		} else {
1940 			netpoll_poll_unlock(have);
1941 			dev_put(dev);
1942 			local_irq_disable();
1943 		}
1944 	}
1945 out:
1946 #ifdef CONFIG_NET_DMA
1947 	/*
1948 	 * There may not be any more sk_buffs coming right now, so push
1949 	 * any pending DMA copies to hardware
1950 	 */
1951 	if (net_dma_client) {
1952 		struct dma_chan *chan;
1953 		rcu_read_lock();
1954 		list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node)
1955 			dma_async_memcpy_issue_pending(chan);
1956 		rcu_read_unlock();
1957 	}
1958 #endif
1959 	local_irq_enable();
1960 	return;
1961 
1962 softnet_break:
1963 	__get_cpu_var(netdev_rx_stat).time_squeeze++;
1964 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
1965 	goto out;
1966 }
1967 
1968 static gifconf_func_t * gifconf_list [NPROTO];
1969 
1970 /**
1971  *	register_gifconf	-	register a SIOCGIF handler
1972  *	@family: Address family
1973  *	@gifconf: Function handler
1974  *
1975  *	Register protocol dependent address dumping routines. The handler
1976  *	that is passed must not be freed or reused until it has been replaced
1977  *	by another handler.
1978  */
1979 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
1980 {
1981 	if (family >= NPROTO)
1982 		return -EINVAL;
1983 	gifconf_list[family] = gifconf;
1984 	return 0;
1985 }
1986 
1987 
1988 /*
1989  *	Map an interface index to its name (SIOCGIFNAME)
1990  */
1991 
1992 /*
1993  *	We need this ioctl for efficient implementation of the
1994  *	if_indextoname() function required by the IPv6 API.  Without
1995  *	it, we would have to search all the interfaces to find a
1996  *	match.  --pb
1997  */
1998 
1999 static int dev_ifname(struct ifreq __user *arg)
2000 {
2001 	struct net_device *dev;
2002 	struct ifreq ifr;
2003 
2004 	/*
2005 	 *	Fetch the caller's info block.
2006 	 */
2007 
2008 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2009 		return -EFAULT;
2010 
2011 	read_lock(&dev_base_lock);
2012 	dev = __dev_get_by_index(ifr.ifr_ifindex);
2013 	if (!dev) {
2014 		read_unlock(&dev_base_lock);
2015 		return -ENODEV;
2016 	}
2017 
2018 	strcpy(ifr.ifr_name, dev->name);
2019 	read_unlock(&dev_base_lock);
2020 
2021 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2022 		return -EFAULT;
2023 	return 0;
2024 }
2025 
2026 /*
2027  *	Perform a SIOCGIFCONF call. This structure will change
2028  *	size eventually, and there is nothing I can do about it.
2029  *	Thus we will need a 'compatibility mode'.
2030  */
2031 
2032 static int dev_ifconf(char __user *arg)
2033 {
2034 	struct ifconf ifc;
2035 	struct net_device *dev;
2036 	char __user *pos;
2037 	int len;
2038 	int total;
2039 	int i;
2040 
2041 	/*
2042 	 *	Fetch the caller's info block.
2043 	 */
2044 
2045 	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2046 		return -EFAULT;
2047 
2048 	pos = ifc.ifc_buf;
2049 	len = ifc.ifc_len;
2050 
2051 	/*
2052 	 *	Loop over the interfaces, and write an info block for each.
2053 	 */
2054 
2055 	total = 0;
2056 	for (dev = dev_base; dev; dev = dev->next) {
2057 		for (i = 0; i < NPROTO; i++) {
2058 			if (gifconf_list[i]) {
2059 				int done;
2060 				if (!pos)
2061 					done = gifconf_list[i](dev, NULL, 0);
2062 				else
2063 					done = gifconf_list[i](dev, pos + total,
2064 							       len - total);
2065 				if (done < 0)
2066 					return -EFAULT;
2067 				total += done;
2068 			}
2069 		}
2070   	}
2071 
2072 	/*
2073 	 *	All done.  Write the updated control block back to the caller.
2074 	 */
2075 	ifc.ifc_len = total;
2076 
2077 	/*
2078 	 * 	Both BSD and Solaris return 0 here, so we do too.
2079 	 */
2080 	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2081 }
2082 
2083 #ifdef CONFIG_PROC_FS
2084 /*
2085  *	This is invoked by the /proc filesystem handler to display a device
2086  *	in detail.
2087  */
2088 static __inline__ struct net_device *dev_get_idx(loff_t pos)
2089 {
2090 	struct net_device *dev;
2091 	loff_t i;
2092 
2093 	for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next);
2094 
2095 	return i == pos ? dev : NULL;
2096 }
2097 
2098 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2099 {
2100 	read_lock(&dev_base_lock);
2101 	return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN;
2102 }
2103 
2104 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2105 {
2106 	++*pos;
2107 	return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next;
2108 }
2109 
2110 void dev_seq_stop(struct seq_file *seq, void *v)
2111 {
2112 	read_unlock(&dev_base_lock);
2113 }
2114 
2115 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2116 {
2117 	if (dev->get_stats) {
2118 		struct net_device_stats *stats = dev->get_stats(dev);
2119 
2120 		seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2121 				"%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2122 			   dev->name, stats->rx_bytes, stats->rx_packets,
2123 			   stats->rx_errors,
2124 			   stats->rx_dropped + stats->rx_missed_errors,
2125 			   stats->rx_fifo_errors,
2126 			   stats->rx_length_errors + stats->rx_over_errors +
2127 			     stats->rx_crc_errors + stats->rx_frame_errors,
2128 			   stats->rx_compressed, stats->multicast,
2129 			   stats->tx_bytes, stats->tx_packets,
2130 			   stats->tx_errors, stats->tx_dropped,
2131 			   stats->tx_fifo_errors, stats->collisions,
2132 			   stats->tx_carrier_errors +
2133 			     stats->tx_aborted_errors +
2134 			     stats->tx_window_errors +
2135 			     stats->tx_heartbeat_errors,
2136 			   stats->tx_compressed);
2137 	} else
2138 		seq_printf(seq, "%6s: No statistics available.\n", dev->name);
2139 }
2140 
2141 /*
2142  *	Called from the PROCfs module. This now uses the new arbitrary sized
2143  *	/proc/net interface to create /proc/net/dev
2144  */
2145 static int dev_seq_show(struct seq_file *seq, void *v)
2146 {
2147 	if (v == SEQ_START_TOKEN)
2148 		seq_puts(seq, "Inter-|   Receive                            "
2149 			      "                    |  Transmit\n"
2150 			      " face |bytes    packets errs drop fifo frame "
2151 			      "compressed multicast|bytes    packets errs "
2152 			      "drop fifo colls carrier compressed\n");
2153 	else
2154 		dev_seq_printf_stats(seq, v);
2155 	return 0;
2156 }
2157 
2158 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2159 {
2160 	struct netif_rx_stats *rc = NULL;
2161 
2162 	while (*pos < NR_CPUS)
2163 	       	if (cpu_online(*pos)) {
2164 			rc = &per_cpu(netdev_rx_stat, *pos);
2165 			break;
2166 		} else
2167 			++*pos;
2168 	return rc;
2169 }
2170 
2171 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2172 {
2173 	return softnet_get_online(pos);
2174 }
2175 
2176 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2177 {
2178 	++*pos;
2179 	return softnet_get_online(pos);
2180 }
2181 
2182 static void softnet_seq_stop(struct seq_file *seq, void *v)
2183 {
2184 }
2185 
2186 static int softnet_seq_show(struct seq_file *seq, void *v)
2187 {
2188 	struct netif_rx_stats *s = v;
2189 
2190 	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2191 		   s->total, s->dropped, s->time_squeeze, 0,
2192 		   0, 0, 0, 0, /* was fastroute */
2193 		   s->cpu_collision );
2194 	return 0;
2195 }
2196 
2197 static struct seq_operations dev_seq_ops = {
2198 	.start = dev_seq_start,
2199 	.next  = dev_seq_next,
2200 	.stop  = dev_seq_stop,
2201 	.show  = dev_seq_show,
2202 };
2203 
2204 static int dev_seq_open(struct inode *inode, struct file *file)
2205 {
2206 	return seq_open(file, &dev_seq_ops);
2207 }
2208 
2209 static struct file_operations dev_seq_fops = {
2210 	.owner	 = THIS_MODULE,
2211 	.open    = dev_seq_open,
2212 	.read    = seq_read,
2213 	.llseek  = seq_lseek,
2214 	.release = seq_release,
2215 };
2216 
2217 static struct seq_operations softnet_seq_ops = {
2218 	.start = softnet_seq_start,
2219 	.next  = softnet_seq_next,
2220 	.stop  = softnet_seq_stop,
2221 	.show  = softnet_seq_show,
2222 };
2223 
2224 static int softnet_seq_open(struct inode *inode, struct file *file)
2225 {
2226 	return seq_open(file, &softnet_seq_ops);
2227 }
2228 
2229 static struct file_operations softnet_seq_fops = {
2230 	.owner	 = THIS_MODULE,
2231 	.open    = softnet_seq_open,
2232 	.read    = seq_read,
2233 	.llseek  = seq_lseek,
2234 	.release = seq_release,
2235 };
2236 
2237 #ifdef CONFIG_WIRELESS_EXT
2238 extern int wireless_proc_init(void);
2239 #else
2240 #define wireless_proc_init() 0
2241 #endif
2242 
2243 static int __init dev_proc_init(void)
2244 {
2245 	int rc = -ENOMEM;
2246 
2247 	if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
2248 		goto out;
2249 	if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
2250 		goto out_dev;
2251 	if (wireless_proc_init())
2252 		goto out_softnet;
2253 	rc = 0;
2254 out:
2255 	return rc;
2256 out_softnet:
2257 	proc_net_remove("softnet_stat");
2258 out_dev:
2259 	proc_net_remove("dev");
2260 	goto out;
2261 }
2262 #else
2263 #define dev_proc_init() 0
2264 #endif	/* CONFIG_PROC_FS */
2265 
2266 
2267 /**
2268  *	netdev_set_master	-	set up master/slave pair
2269  *	@slave: slave device
2270  *	@master: new master device
2271  *
2272  *	Changes the master device of the slave. Pass %NULL to break the
2273  *	bonding. The caller must hold the RTNL semaphore. On a failure
2274  *	a negative errno code is returned. On success the reference counts
2275  *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2276  *	function returns zero.
2277  */
2278 int netdev_set_master(struct net_device *slave, struct net_device *master)
2279 {
2280 	struct net_device *old = slave->master;
2281 
2282 	ASSERT_RTNL();
2283 
2284 	if (master) {
2285 		if (old)
2286 			return -EBUSY;
2287 		dev_hold(master);
2288 	}
2289 
2290 	slave->master = master;
2291 
2292 	synchronize_net();
2293 
2294 	if (old)
2295 		dev_put(old);
2296 
2297 	if (master)
2298 		slave->flags |= IFF_SLAVE;
2299 	else
2300 		slave->flags &= ~IFF_SLAVE;
2301 
2302 	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2303 	return 0;
2304 }
2305 
2306 /**
2307  *	dev_set_promiscuity	- update promiscuity count on a device
2308  *	@dev: device
2309  *	@inc: modifier
2310  *
2311  *	Add or remove promiscuity from a device. While the count in the device
2312  *	remains above zero the interface remains promiscuous. Once it hits zero
2313  *	the device reverts back to normal filtering operation. A negative inc
2314  *	value is used to drop promiscuity on the device.
2315  */
2316 void dev_set_promiscuity(struct net_device *dev, int inc)
2317 {
2318 	unsigned short old_flags = dev->flags;
2319 
2320 	if ((dev->promiscuity += inc) == 0)
2321 		dev->flags &= ~IFF_PROMISC;
2322 	else
2323 		dev->flags |= IFF_PROMISC;
2324 	if (dev->flags != old_flags) {
2325 		dev_mc_upload(dev);
2326 		printk(KERN_INFO "device %s %s promiscuous mode\n",
2327 		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2328 		       					       "left");
2329 		audit_log(current->audit_context, GFP_ATOMIC,
2330 			AUDIT_ANOM_PROMISCUOUS,
2331 			"dev=%s prom=%d old_prom=%d auid=%u",
2332 			dev->name, (dev->flags & IFF_PROMISC),
2333 			(old_flags & IFF_PROMISC),
2334 			audit_get_loginuid(current->audit_context));
2335 	}
2336 }
2337 
2338 /**
2339  *	dev_set_allmulti	- update allmulti count on a device
2340  *	@dev: device
2341  *	@inc: modifier
2342  *
2343  *	Add or remove reception of all multicast frames to a device. While the
2344  *	count in the device remains above zero the interface remains listening
2345  *	to all interfaces. Once it hits zero the device reverts back to normal
2346  *	filtering operation. A negative @inc value is used to drop the counter
2347  *	when releasing a resource needing all multicasts.
2348  */
2349 
2350 void dev_set_allmulti(struct net_device *dev, int inc)
2351 {
2352 	unsigned short old_flags = dev->flags;
2353 
2354 	dev->flags |= IFF_ALLMULTI;
2355 	if ((dev->allmulti += inc) == 0)
2356 		dev->flags &= ~IFF_ALLMULTI;
2357 	if (dev->flags ^ old_flags)
2358 		dev_mc_upload(dev);
2359 }
2360 
2361 unsigned dev_get_flags(const struct net_device *dev)
2362 {
2363 	unsigned flags;
2364 
2365 	flags = (dev->flags & ~(IFF_PROMISC |
2366 				IFF_ALLMULTI |
2367 				IFF_RUNNING |
2368 				IFF_LOWER_UP |
2369 				IFF_DORMANT)) |
2370 		(dev->gflags & (IFF_PROMISC |
2371 				IFF_ALLMULTI));
2372 
2373 	if (netif_running(dev)) {
2374 		if (netif_oper_up(dev))
2375 			flags |= IFF_RUNNING;
2376 		if (netif_carrier_ok(dev))
2377 			flags |= IFF_LOWER_UP;
2378 		if (netif_dormant(dev))
2379 			flags |= IFF_DORMANT;
2380 	}
2381 
2382 	return flags;
2383 }
2384 
2385 int dev_change_flags(struct net_device *dev, unsigned flags)
2386 {
2387 	int ret;
2388 	int old_flags = dev->flags;
2389 
2390 	/*
2391 	 *	Set the flags on our device.
2392 	 */
2393 
2394 	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
2395 			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
2396 			       IFF_AUTOMEDIA)) |
2397 		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
2398 				    IFF_ALLMULTI));
2399 
2400 	/*
2401 	 *	Load in the correct multicast list now the flags have changed.
2402 	 */
2403 
2404 	dev_mc_upload(dev);
2405 
2406 	/*
2407 	 *	Have we downed the interface. We handle IFF_UP ourselves
2408 	 *	according to user attempts to set it, rather than blindly
2409 	 *	setting it.
2410 	 */
2411 
2412 	ret = 0;
2413 	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
2414 		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
2415 
2416 		if (!ret)
2417 			dev_mc_upload(dev);
2418 	}
2419 
2420 	if (dev->flags & IFF_UP &&
2421 	    ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
2422 					  IFF_VOLATILE)))
2423 		raw_notifier_call_chain(&netdev_chain,
2424 				NETDEV_CHANGE, dev);
2425 
2426 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
2427 		int inc = (flags & IFF_PROMISC) ? +1 : -1;
2428 		dev->gflags ^= IFF_PROMISC;
2429 		dev_set_promiscuity(dev, inc);
2430 	}
2431 
2432 	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
2433 	   is important. Some (broken) drivers set IFF_PROMISC, when
2434 	   IFF_ALLMULTI is requested not asking us and not reporting.
2435 	 */
2436 	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
2437 		int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
2438 		dev->gflags ^= IFF_ALLMULTI;
2439 		dev_set_allmulti(dev, inc);
2440 	}
2441 
2442 	if (old_flags ^ dev->flags)
2443 		rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags);
2444 
2445 	return ret;
2446 }
2447 
2448 int dev_set_mtu(struct net_device *dev, int new_mtu)
2449 {
2450 	int err;
2451 
2452 	if (new_mtu == dev->mtu)
2453 		return 0;
2454 
2455 	/*	MTU must be positive.	 */
2456 	if (new_mtu < 0)
2457 		return -EINVAL;
2458 
2459 	if (!netif_device_present(dev))
2460 		return -ENODEV;
2461 
2462 	err = 0;
2463 	if (dev->change_mtu)
2464 		err = dev->change_mtu(dev, new_mtu);
2465 	else
2466 		dev->mtu = new_mtu;
2467 	if (!err && dev->flags & IFF_UP)
2468 		raw_notifier_call_chain(&netdev_chain,
2469 				NETDEV_CHANGEMTU, dev);
2470 	return err;
2471 }
2472 
2473 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
2474 {
2475 	int err;
2476 
2477 	if (!dev->set_mac_address)
2478 		return -EOPNOTSUPP;
2479 	if (sa->sa_family != dev->type)
2480 		return -EINVAL;
2481 	if (!netif_device_present(dev))
2482 		return -ENODEV;
2483 	err = dev->set_mac_address(dev, sa);
2484 	if (!err)
2485 		raw_notifier_call_chain(&netdev_chain,
2486 				NETDEV_CHANGEADDR, dev);
2487 	return err;
2488 }
2489 
2490 /*
2491  *	Perform the SIOCxIFxxx calls.
2492  */
2493 static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
2494 {
2495 	int err;
2496 	struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
2497 
2498 	if (!dev)
2499 		return -ENODEV;
2500 
2501 	switch (cmd) {
2502 		case SIOCGIFFLAGS:	/* Get interface flags */
2503 			ifr->ifr_flags = dev_get_flags(dev);
2504 			return 0;
2505 
2506 		case SIOCSIFFLAGS:	/* Set interface flags */
2507 			return dev_change_flags(dev, ifr->ifr_flags);
2508 
2509 		case SIOCGIFMETRIC:	/* Get the metric on the interface
2510 					   (currently unused) */
2511 			ifr->ifr_metric = 0;
2512 			return 0;
2513 
2514 		case SIOCSIFMETRIC:	/* Set the metric on the interface
2515 					   (currently unused) */
2516 			return -EOPNOTSUPP;
2517 
2518 		case SIOCGIFMTU:	/* Get the MTU of a device */
2519 			ifr->ifr_mtu = dev->mtu;
2520 			return 0;
2521 
2522 		case SIOCSIFMTU:	/* Set the MTU of a device */
2523 			return dev_set_mtu(dev, ifr->ifr_mtu);
2524 
2525 		case SIOCGIFHWADDR:
2526 			if (!dev->addr_len)
2527 				memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
2528 			else
2529 				memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
2530 				       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2531 			ifr->ifr_hwaddr.sa_family = dev->type;
2532 			return 0;
2533 
2534 		case SIOCSIFHWADDR:
2535 			return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
2536 
2537 		case SIOCSIFHWBROADCAST:
2538 			if (ifr->ifr_hwaddr.sa_family != dev->type)
2539 				return -EINVAL;
2540 			memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
2541 			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2542 			raw_notifier_call_chain(&netdev_chain,
2543 					    NETDEV_CHANGEADDR, dev);
2544 			return 0;
2545 
2546 		case SIOCGIFMAP:
2547 			ifr->ifr_map.mem_start = dev->mem_start;
2548 			ifr->ifr_map.mem_end   = dev->mem_end;
2549 			ifr->ifr_map.base_addr = dev->base_addr;
2550 			ifr->ifr_map.irq       = dev->irq;
2551 			ifr->ifr_map.dma       = dev->dma;
2552 			ifr->ifr_map.port      = dev->if_port;
2553 			return 0;
2554 
2555 		case SIOCSIFMAP:
2556 			if (dev->set_config) {
2557 				if (!netif_device_present(dev))
2558 					return -ENODEV;
2559 				return dev->set_config(dev, &ifr->ifr_map);
2560 			}
2561 			return -EOPNOTSUPP;
2562 
2563 		case SIOCADDMULTI:
2564 			if (!dev->set_multicast_list ||
2565 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2566 				return -EINVAL;
2567 			if (!netif_device_present(dev))
2568 				return -ENODEV;
2569 			return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
2570 					  dev->addr_len, 1);
2571 
2572 		case SIOCDELMULTI:
2573 			if (!dev->set_multicast_list ||
2574 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2575 				return -EINVAL;
2576 			if (!netif_device_present(dev))
2577 				return -ENODEV;
2578 			return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
2579 					     dev->addr_len, 1);
2580 
2581 		case SIOCGIFINDEX:
2582 			ifr->ifr_ifindex = dev->ifindex;
2583 			return 0;
2584 
2585 		case SIOCGIFTXQLEN:
2586 			ifr->ifr_qlen = dev->tx_queue_len;
2587 			return 0;
2588 
2589 		case SIOCSIFTXQLEN:
2590 			if (ifr->ifr_qlen < 0)
2591 				return -EINVAL;
2592 			dev->tx_queue_len = ifr->ifr_qlen;
2593 			return 0;
2594 
2595 		case SIOCSIFNAME:
2596 			ifr->ifr_newname[IFNAMSIZ-1] = '\0';
2597 			return dev_change_name(dev, ifr->ifr_newname);
2598 
2599 		/*
2600 		 *	Unknown or private ioctl
2601 		 */
2602 
2603 		default:
2604 			if ((cmd >= SIOCDEVPRIVATE &&
2605 			    cmd <= SIOCDEVPRIVATE + 15) ||
2606 			    cmd == SIOCBONDENSLAVE ||
2607 			    cmd == SIOCBONDRELEASE ||
2608 			    cmd == SIOCBONDSETHWADDR ||
2609 			    cmd == SIOCBONDSLAVEINFOQUERY ||
2610 			    cmd == SIOCBONDINFOQUERY ||
2611 			    cmd == SIOCBONDCHANGEACTIVE ||
2612 			    cmd == SIOCGMIIPHY ||
2613 			    cmd == SIOCGMIIREG ||
2614 			    cmd == SIOCSMIIREG ||
2615 			    cmd == SIOCBRADDIF ||
2616 			    cmd == SIOCBRDELIF ||
2617 			    cmd == SIOCWANDEV) {
2618 				err = -EOPNOTSUPP;
2619 				if (dev->do_ioctl) {
2620 					if (netif_device_present(dev))
2621 						err = dev->do_ioctl(dev, ifr,
2622 								    cmd);
2623 					else
2624 						err = -ENODEV;
2625 				}
2626 			} else
2627 				err = -EINVAL;
2628 
2629 	}
2630 	return err;
2631 }
2632 
2633 /*
2634  *	This function handles all "interface"-type I/O control requests. The actual
2635  *	'doing' part of this is dev_ifsioc above.
2636  */
2637 
2638 /**
2639  *	dev_ioctl	-	network device ioctl
2640  *	@cmd: command to issue
2641  *	@arg: pointer to a struct ifreq in user space
2642  *
2643  *	Issue ioctl functions to devices. This is normally called by the
2644  *	user space syscall interfaces but can sometimes be useful for
2645  *	other purposes. The return value is the return from the syscall if
2646  *	positive or a negative errno code on error.
2647  */
2648 
2649 int dev_ioctl(unsigned int cmd, void __user *arg)
2650 {
2651 	struct ifreq ifr;
2652 	int ret;
2653 	char *colon;
2654 
2655 	/* One special case: SIOCGIFCONF takes ifconf argument
2656 	   and requires shared lock, because it sleeps writing
2657 	   to user space.
2658 	 */
2659 
2660 	if (cmd == SIOCGIFCONF) {
2661 		rtnl_lock();
2662 		ret = dev_ifconf((char __user *) arg);
2663 		rtnl_unlock();
2664 		return ret;
2665 	}
2666 	if (cmd == SIOCGIFNAME)
2667 		return dev_ifname((struct ifreq __user *)arg);
2668 
2669 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2670 		return -EFAULT;
2671 
2672 	ifr.ifr_name[IFNAMSIZ-1] = 0;
2673 
2674 	colon = strchr(ifr.ifr_name, ':');
2675 	if (colon)
2676 		*colon = 0;
2677 
2678 	/*
2679 	 *	See which interface the caller is talking about.
2680 	 */
2681 
2682 	switch (cmd) {
2683 		/*
2684 		 *	These ioctl calls:
2685 		 *	- can be done by all.
2686 		 *	- atomic and do not require locking.
2687 		 *	- return a value
2688 		 */
2689 		case SIOCGIFFLAGS:
2690 		case SIOCGIFMETRIC:
2691 		case SIOCGIFMTU:
2692 		case SIOCGIFHWADDR:
2693 		case SIOCGIFSLAVE:
2694 		case SIOCGIFMAP:
2695 		case SIOCGIFINDEX:
2696 		case SIOCGIFTXQLEN:
2697 			dev_load(ifr.ifr_name);
2698 			read_lock(&dev_base_lock);
2699 			ret = dev_ifsioc(&ifr, cmd);
2700 			read_unlock(&dev_base_lock);
2701 			if (!ret) {
2702 				if (colon)
2703 					*colon = ':';
2704 				if (copy_to_user(arg, &ifr,
2705 						 sizeof(struct ifreq)))
2706 					ret = -EFAULT;
2707 			}
2708 			return ret;
2709 
2710 		case SIOCETHTOOL:
2711 			dev_load(ifr.ifr_name);
2712 			rtnl_lock();
2713 			ret = dev_ethtool(&ifr);
2714 			rtnl_unlock();
2715 			if (!ret) {
2716 				if (colon)
2717 					*colon = ':';
2718 				if (copy_to_user(arg, &ifr,
2719 						 sizeof(struct ifreq)))
2720 					ret = -EFAULT;
2721 			}
2722 			return ret;
2723 
2724 		/*
2725 		 *	These ioctl calls:
2726 		 *	- require superuser power.
2727 		 *	- require strict serialization.
2728 		 *	- return a value
2729 		 */
2730 		case SIOCGMIIPHY:
2731 		case SIOCGMIIREG:
2732 		case SIOCSIFNAME:
2733 			if (!capable(CAP_NET_ADMIN))
2734 				return -EPERM;
2735 			dev_load(ifr.ifr_name);
2736 			rtnl_lock();
2737 			ret = dev_ifsioc(&ifr, cmd);
2738 			rtnl_unlock();
2739 			if (!ret) {
2740 				if (colon)
2741 					*colon = ':';
2742 				if (copy_to_user(arg, &ifr,
2743 						 sizeof(struct ifreq)))
2744 					ret = -EFAULT;
2745 			}
2746 			return ret;
2747 
2748 		/*
2749 		 *	These ioctl calls:
2750 		 *	- require superuser power.
2751 		 *	- require strict serialization.
2752 		 *	- do not return a value
2753 		 */
2754 		case SIOCSIFFLAGS:
2755 		case SIOCSIFMETRIC:
2756 		case SIOCSIFMTU:
2757 		case SIOCSIFMAP:
2758 		case SIOCSIFHWADDR:
2759 		case SIOCSIFSLAVE:
2760 		case SIOCADDMULTI:
2761 		case SIOCDELMULTI:
2762 		case SIOCSIFHWBROADCAST:
2763 		case SIOCSIFTXQLEN:
2764 		case SIOCSMIIREG:
2765 		case SIOCBONDENSLAVE:
2766 		case SIOCBONDRELEASE:
2767 		case SIOCBONDSETHWADDR:
2768 		case SIOCBONDCHANGEACTIVE:
2769 		case SIOCBRADDIF:
2770 		case SIOCBRDELIF:
2771 			if (!capable(CAP_NET_ADMIN))
2772 				return -EPERM;
2773 			/* fall through */
2774 		case SIOCBONDSLAVEINFOQUERY:
2775 		case SIOCBONDINFOQUERY:
2776 			dev_load(ifr.ifr_name);
2777 			rtnl_lock();
2778 			ret = dev_ifsioc(&ifr, cmd);
2779 			rtnl_unlock();
2780 			return ret;
2781 
2782 		case SIOCGIFMEM:
2783 			/* Get the per device memory space. We can add this but
2784 			 * currently do not support it */
2785 		case SIOCSIFMEM:
2786 			/* Set the per device memory buffer space.
2787 			 * Not applicable in our case */
2788 		case SIOCSIFLINK:
2789 			return -EINVAL;
2790 
2791 		/*
2792 		 *	Unknown or private ioctl.
2793 		 */
2794 		default:
2795 			if (cmd == SIOCWANDEV ||
2796 			    (cmd >= SIOCDEVPRIVATE &&
2797 			     cmd <= SIOCDEVPRIVATE + 15)) {
2798 				dev_load(ifr.ifr_name);
2799 				rtnl_lock();
2800 				ret = dev_ifsioc(&ifr, cmd);
2801 				rtnl_unlock();
2802 				if (!ret && copy_to_user(arg, &ifr,
2803 							 sizeof(struct ifreq)))
2804 					ret = -EFAULT;
2805 				return ret;
2806 			}
2807 #ifdef CONFIG_WIRELESS_EXT
2808 			/* Take care of Wireless Extensions */
2809 			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
2810 				/* If command is `set a parameter', or
2811 				 * `get the encoding parameters', check if
2812 				 * the user has the right to do it */
2813 				if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE
2814 				    || cmd == SIOCGIWENCODEEXT) {
2815 					if (!capable(CAP_NET_ADMIN))
2816 						return -EPERM;
2817 				}
2818 				dev_load(ifr.ifr_name);
2819 				rtnl_lock();
2820 				/* Follow me in net/core/wireless.c */
2821 				ret = wireless_process_ioctl(&ifr, cmd);
2822 				rtnl_unlock();
2823 				if (IW_IS_GET(cmd) &&
2824 				    copy_to_user(arg, &ifr,
2825 					    	 sizeof(struct ifreq)))
2826 					ret = -EFAULT;
2827 				return ret;
2828 			}
2829 #endif	/* CONFIG_WIRELESS_EXT */
2830 			return -EINVAL;
2831 	}
2832 }
2833 
2834 
2835 /**
2836  *	dev_new_index	-	allocate an ifindex
2837  *
2838  *	Returns a suitable unique value for a new device interface
2839  *	number.  The caller must hold the rtnl semaphore or the
2840  *	dev_base_lock to be sure it remains unique.
2841  */
2842 static int dev_new_index(void)
2843 {
2844 	static int ifindex;
2845 	for (;;) {
2846 		if (++ifindex <= 0)
2847 			ifindex = 1;
2848 		if (!__dev_get_by_index(ifindex))
2849 			return ifindex;
2850 	}
2851 }
2852 
2853 static int dev_boot_phase = 1;
2854 
2855 /* Delayed registration/unregisteration */
2856 static DEFINE_SPINLOCK(net_todo_list_lock);
2857 static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
2858 
2859 static inline void net_set_todo(struct net_device *dev)
2860 {
2861 	spin_lock(&net_todo_list_lock);
2862 	list_add_tail(&dev->todo_list, &net_todo_list);
2863 	spin_unlock(&net_todo_list_lock);
2864 }
2865 
2866 /**
2867  *	register_netdevice	- register a network device
2868  *	@dev: device to register
2869  *
2870  *	Take a completed network device structure and add it to the kernel
2871  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2872  *	chain. 0 is returned on success. A negative errno code is returned
2873  *	on a failure to set up the device, or if the name is a duplicate.
2874  *
2875  *	Callers must hold the rtnl semaphore. You may want
2876  *	register_netdev() instead of this.
2877  *
2878  *	BUGS:
2879  *	The locking appears insufficient to guarantee two parallel registers
2880  *	will not get the same name.
2881  */
2882 
2883 int register_netdevice(struct net_device *dev)
2884 {
2885 	struct hlist_head *head;
2886 	struct hlist_node *p;
2887 	int ret;
2888 
2889 	BUG_ON(dev_boot_phase);
2890 	ASSERT_RTNL();
2891 
2892 	might_sleep();
2893 
2894 	/* When net_device's are persistent, this will be fatal. */
2895 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
2896 
2897 	spin_lock_init(&dev->queue_lock);
2898 	spin_lock_init(&dev->_xmit_lock);
2899 	dev->xmit_lock_owner = -1;
2900 #ifdef CONFIG_NET_CLS_ACT
2901 	spin_lock_init(&dev->ingress_lock);
2902 #endif
2903 
2904 	ret = alloc_divert_blk(dev);
2905 	if (ret)
2906 		goto out;
2907 
2908 	dev->iflink = -1;
2909 
2910 	/* Init, if this function is available */
2911 	if (dev->init) {
2912 		ret = dev->init(dev);
2913 		if (ret) {
2914 			if (ret > 0)
2915 				ret = -EIO;
2916 			goto out_err;
2917 		}
2918 	}
2919 
2920 	if (!dev_valid_name(dev->name)) {
2921 		ret = -EINVAL;
2922 		goto out_err;
2923 	}
2924 
2925 	dev->ifindex = dev_new_index();
2926 	if (dev->iflink == -1)
2927 		dev->iflink = dev->ifindex;
2928 
2929 	/* Check for existence of name */
2930 	head = dev_name_hash(dev->name);
2931 	hlist_for_each(p, head) {
2932 		struct net_device *d
2933 			= hlist_entry(p, struct net_device, name_hlist);
2934 		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
2935 			ret = -EEXIST;
2936  			goto out_err;
2937 		}
2938  	}
2939 
2940 	/* Fix illegal SG+CSUM combinations. */
2941 	if ((dev->features & NETIF_F_SG) &&
2942 	    !(dev->features & NETIF_F_ALL_CSUM)) {
2943 		printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n",
2944 		       dev->name);
2945 		dev->features &= ~NETIF_F_SG;
2946 	}
2947 
2948 	/* TSO requires that SG is present as well. */
2949 	if ((dev->features & NETIF_F_TSO) &&
2950 	    !(dev->features & NETIF_F_SG)) {
2951 		printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n",
2952 		       dev->name);
2953 		dev->features &= ~NETIF_F_TSO;
2954 	}
2955 	if (dev->features & NETIF_F_UFO) {
2956 		if (!(dev->features & NETIF_F_HW_CSUM)) {
2957 			printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
2958 					"NETIF_F_HW_CSUM feature.\n",
2959 							dev->name);
2960 			dev->features &= ~NETIF_F_UFO;
2961 		}
2962 		if (!(dev->features & NETIF_F_SG)) {
2963 			printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
2964 					"NETIF_F_SG feature.\n",
2965 					dev->name);
2966 			dev->features &= ~NETIF_F_UFO;
2967 		}
2968 	}
2969 
2970 	/*
2971 	 *	nil rebuild_header routine,
2972 	 *	that should be never called and used as just bug trap.
2973 	 */
2974 
2975 	if (!dev->rebuild_header)
2976 		dev->rebuild_header = default_rebuild_header;
2977 
2978 	ret = netdev_register_sysfs(dev);
2979 	if (ret)
2980 		goto out_err;
2981 	dev->reg_state = NETREG_REGISTERED;
2982 
2983 	/*
2984 	 *	Default initial state at registry is that the
2985 	 *	device is present.
2986 	 */
2987 
2988 	set_bit(__LINK_STATE_PRESENT, &dev->state);
2989 
2990 	dev->next = NULL;
2991 	dev_init_scheduler(dev);
2992 	write_lock_bh(&dev_base_lock);
2993 	*dev_tail = dev;
2994 	dev_tail = &dev->next;
2995 	hlist_add_head(&dev->name_hlist, head);
2996 	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
2997 	dev_hold(dev);
2998 	write_unlock_bh(&dev_base_lock);
2999 
3000 	/* Notify protocols, that a new device appeared. */
3001 	raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
3002 
3003 	ret = 0;
3004 
3005 out:
3006 	return ret;
3007 out_err:
3008 	free_divert_blk(dev);
3009 	goto out;
3010 }
3011 
3012 /**
3013  *	register_netdev	- register a network device
3014  *	@dev: device to register
3015  *
3016  *	Take a completed network device structure and add it to the kernel
3017  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3018  *	chain. 0 is returned on success. A negative errno code is returned
3019  *	on a failure to set up the device, or if the name is a duplicate.
3020  *
3021  *	This is a wrapper around register_netdev that takes the rtnl semaphore
3022  *	and expands the device name if you passed a format string to
3023  *	alloc_netdev.
3024  */
3025 int register_netdev(struct net_device *dev)
3026 {
3027 	int err;
3028 
3029 	rtnl_lock();
3030 
3031 	/*
3032 	 * If the name is a format string the caller wants us to do a
3033 	 * name allocation.
3034 	 */
3035 	if (strchr(dev->name, '%')) {
3036 		err = dev_alloc_name(dev, dev->name);
3037 		if (err < 0)
3038 			goto out;
3039 	}
3040 
3041 	/*
3042 	 * Back compatibility hook. Kill this one in 2.5
3043 	 */
3044 	if (dev->name[0] == 0 || dev->name[0] == ' ') {
3045 		err = dev_alloc_name(dev, "eth%d");
3046 		if (err < 0)
3047 			goto out;
3048 	}
3049 
3050 	err = register_netdevice(dev);
3051 out:
3052 	rtnl_unlock();
3053 	return err;
3054 }
3055 EXPORT_SYMBOL(register_netdev);
3056 
3057 /*
3058  * netdev_wait_allrefs - wait until all references are gone.
3059  *
3060  * This is called when unregistering network devices.
3061  *
3062  * Any protocol or device that holds a reference should register
3063  * for netdevice notification, and cleanup and put back the
3064  * reference if they receive an UNREGISTER event.
3065  * We can get stuck here if buggy protocols don't correctly
3066  * call dev_put.
3067  */
3068 static void netdev_wait_allrefs(struct net_device *dev)
3069 {
3070 	unsigned long rebroadcast_time, warning_time;
3071 
3072 	rebroadcast_time = warning_time = jiffies;
3073 	while (atomic_read(&dev->refcnt) != 0) {
3074 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
3075 			rtnl_lock();
3076 
3077 			/* Rebroadcast unregister notification */
3078 			raw_notifier_call_chain(&netdev_chain,
3079 					    NETDEV_UNREGISTER, dev);
3080 
3081 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
3082 				     &dev->state)) {
3083 				/* We must not have linkwatch events
3084 				 * pending on unregister. If this
3085 				 * happens, we simply run the queue
3086 				 * unscheduled, resulting in a noop
3087 				 * for this device.
3088 				 */
3089 				linkwatch_run_queue();
3090 			}
3091 
3092 			__rtnl_unlock();
3093 
3094 			rebroadcast_time = jiffies;
3095 		}
3096 
3097 		msleep(250);
3098 
3099 		if (time_after(jiffies, warning_time + 10 * HZ)) {
3100 			printk(KERN_EMERG "unregister_netdevice: "
3101 			       "waiting for %s to become free. Usage "
3102 			       "count = %d\n",
3103 			       dev->name, atomic_read(&dev->refcnt));
3104 			warning_time = jiffies;
3105 		}
3106 	}
3107 }
3108 
3109 /* The sequence is:
3110  *
3111  *	rtnl_lock();
3112  *	...
3113  *	register_netdevice(x1);
3114  *	register_netdevice(x2);
3115  *	...
3116  *	unregister_netdevice(y1);
3117  *	unregister_netdevice(y2);
3118  *      ...
3119  *	rtnl_unlock();
3120  *	free_netdev(y1);
3121  *	free_netdev(y2);
3122  *
3123  * We are invoked by rtnl_unlock() after it drops the semaphore.
3124  * This allows us to deal with problems:
3125  * 1) We can delete sysfs objects which invoke hotplug
3126  *    without deadlocking with linkwatch via keventd.
3127  * 2) Since we run with the RTNL semaphore not held, we can sleep
3128  *    safely in order to wait for the netdev refcnt to drop to zero.
3129  */
3130 static DEFINE_MUTEX(net_todo_run_mutex);
3131 void netdev_run_todo(void)
3132 {
3133 	struct list_head list;
3134 
3135 	/* Need to guard against multiple cpu's getting out of order. */
3136 	mutex_lock(&net_todo_run_mutex);
3137 
3138 	/* Not safe to do outside the semaphore.  We must not return
3139 	 * until all unregister events invoked by the local processor
3140 	 * have been completed (either by this todo run, or one on
3141 	 * another cpu).
3142 	 */
3143 	if (list_empty(&net_todo_list))
3144 		goto out;
3145 
3146 	/* Snapshot list, allow later requests */
3147 	spin_lock(&net_todo_list_lock);
3148 	list_replace_init(&net_todo_list, &list);
3149 	spin_unlock(&net_todo_list_lock);
3150 
3151 	while (!list_empty(&list)) {
3152 		struct net_device *dev
3153 			= list_entry(list.next, struct net_device, todo_list);
3154 		list_del(&dev->todo_list);
3155 
3156 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
3157 			printk(KERN_ERR "network todo '%s' but state %d\n",
3158 			       dev->name, dev->reg_state);
3159 			dump_stack();
3160 			continue;
3161 		}
3162 
3163 		netdev_unregister_sysfs(dev);
3164 		dev->reg_state = NETREG_UNREGISTERED;
3165 
3166 		netdev_wait_allrefs(dev);
3167 
3168 		/* paranoia */
3169 		BUG_ON(atomic_read(&dev->refcnt));
3170 		BUG_TRAP(!dev->ip_ptr);
3171 		BUG_TRAP(!dev->ip6_ptr);
3172 		BUG_TRAP(!dev->dn_ptr);
3173 
3174 		/* It must be the very last action,
3175 		 * after this 'dev' may point to freed up memory.
3176 		 */
3177 		if (dev->destructor)
3178 			dev->destructor(dev);
3179 	}
3180 
3181 out:
3182 	mutex_unlock(&net_todo_run_mutex);
3183 }
3184 
3185 /**
3186  *	alloc_netdev - allocate network device
3187  *	@sizeof_priv:	size of private data to allocate space for
3188  *	@name:		device name format string
3189  *	@setup:		callback to initialize device
3190  *
3191  *	Allocates a struct net_device with private data area for driver use
3192  *	and performs basic initialization.
3193  */
3194 struct net_device *alloc_netdev(int sizeof_priv, const char *name,
3195 		void (*setup)(struct net_device *))
3196 {
3197 	void *p;
3198 	struct net_device *dev;
3199 	int alloc_size;
3200 
3201 	/* ensure 32-byte alignment of both the device and private area */
3202 	alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
3203 	alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
3204 
3205 	p = kzalloc(alloc_size, GFP_KERNEL);
3206 	if (!p) {
3207 		printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
3208 		return NULL;
3209 	}
3210 
3211 	dev = (struct net_device *)
3212 		(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
3213 	dev->padded = (char *)dev - (char *)p;
3214 
3215 	if (sizeof_priv)
3216 		dev->priv = netdev_priv(dev);
3217 
3218 	setup(dev);
3219 	strcpy(dev->name, name);
3220 	return dev;
3221 }
3222 EXPORT_SYMBOL(alloc_netdev);
3223 
3224 /**
3225  *	free_netdev - free network device
3226  *	@dev: device
3227  *
3228  *	This function does the last stage of destroying an allocated device
3229  * 	interface. The reference to the device object is released.
3230  *	If this is the last reference then it will be freed.
3231  */
3232 void free_netdev(struct net_device *dev)
3233 {
3234 #ifdef CONFIG_SYSFS
3235 	/*  Compatibility with error handling in drivers */
3236 	if (dev->reg_state == NETREG_UNINITIALIZED) {
3237 		kfree((char *)dev - dev->padded);
3238 		return;
3239 	}
3240 
3241 	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
3242 	dev->reg_state = NETREG_RELEASED;
3243 
3244 	/* will free via class release */
3245 	class_device_put(&dev->class_dev);
3246 #else
3247 	kfree((char *)dev - dev->padded);
3248 #endif
3249 }
3250 
3251 /* Synchronize with packet receive processing. */
3252 void synchronize_net(void)
3253 {
3254 	might_sleep();
3255 	synchronize_rcu();
3256 }
3257 
3258 /**
3259  *	unregister_netdevice - remove device from the kernel
3260  *	@dev: device
3261  *
3262  *	This function shuts down a device interface and removes it
3263  *	from the kernel tables. On success 0 is returned, on a failure
3264  *	a negative errno code is returned.
3265  *
3266  *	Callers must hold the rtnl semaphore.  You may want
3267  *	unregister_netdev() instead of this.
3268  */
3269 
3270 int unregister_netdevice(struct net_device *dev)
3271 {
3272 	struct net_device *d, **dp;
3273 
3274 	BUG_ON(dev_boot_phase);
3275 	ASSERT_RTNL();
3276 
3277 	/* Some devices call without registering for initialization unwind. */
3278 	if (dev->reg_state == NETREG_UNINITIALIZED) {
3279 		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3280 				  "was registered\n", dev->name, dev);
3281 		return -ENODEV;
3282 	}
3283 
3284 	BUG_ON(dev->reg_state != NETREG_REGISTERED);
3285 
3286 	/* If device is running, close it first. */
3287 	if (dev->flags & IFF_UP)
3288 		dev_close(dev);
3289 
3290 	/* And unlink it from device chain. */
3291 	for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) {
3292 		if (d == dev) {
3293 			write_lock_bh(&dev_base_lock);
3294 			hlist_del(&dev->name_hlist);
3295 			hlist_del(&dev->index_hlist);
3296 			if (dev_tail == &dev->next)
3297 				dev_tail = dp;
3298 			*dp = d->next;
3299 			write_unlock_bh(&dev_base_lock);
3300 			break;
3301 		}
3302 	}
3303 	if (!d) {
3304 		printk(KERN_ERR "unregister net_device: '%s' not found\n",
3305 		       dev->name);
3306 		return -ENODEV;
3307 	}
3308 
3309 	dev->reg_state = NETREG_UNREGISTERING;
3310 
3311 	synchronize_net();
3312 
3313 	/* Shutdown queueing discipline. */
3314 	dev_shutdown(dev);
3315 
3316 
3317 	/* Notify protocols, that we are about to destroy
3318 	   this device. They should clean all the things.
3319 	*/
3320 	raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
3321 
3322 	/*
3323 	 *	Flush the multicast chain
3324 	 */
3325 	dev_mc_discard(dev);
3326 
3327 	if (dev->uninit)
3328 		dev->uninit(dev);
3329 
3330 	/* Notifier chain MUST detach us from master device. */
3331 	BUG_TRAP(!dev->master);
3332 
3333 	free_divert_blk(dev);
3334 
3335 	/* Finish processing unregister after unlock */
3336 	net_set_todo(dev);
3337 
3338 	synchronize_net();
3339 
3340 	dev_put(dev);
3341 	return 0;
3342 }
3343 
3344 /**
3345  *	unregister_netdev - remove device from the kernel
3346  *	@dev: device
3347  *
3348  *	This function shuts down a device interface and removes it
3349  *	from the kernel tables. On success 0 is returned, on a failure
3350  *	a negative errno code is returned.
3351  *
3352  *	This is just a wrapper for unregister_netdevice that takes
3353  *	the rtnl semaphore.  In general you want to use this and not
3354  *	unregister_netdevice.
3355  */
3356 void unregister_netdev(struct net_device *dev)
3357 {
3358 	rtnl_lock();
3359 	unregister_netdevice(dev);
3360 	rtnl_unlock();
3361 }
3362 
3363 EXPORT_SYMBOL(unregister_netdev);
3364 
3365 #ifdef CONFIG_HOTPLUG_CPU
3366 static int dev_cpu_callback(struct notifier_block *nfb,
3367 			    unsigned long action,
3368 			    void *ocpu)
3369 {
3370 	struct sk_buff **list_skb;
3371 	struct net_device **list_net;
3372 	struct sk_buff *skb;
3373 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
3374 	struct softnet_data *sd, *oldsd;
3375 
3376 	if (action != CPU_DEAD)
3377 		return NOTIFY_OK;
3378 
3379 	local_irq_disable();
3380 	cpu = smp_processor_id();
3381 	sd = &per_cpu(softnet_data, cpu);
3382 	oldsd = &per_cpu(softnet_data, oldcpu);
3383 
3384 	/* Find end of our completion_queue. */
3385 	list_skb = &sd->completion_queue;
3386 	while (*list_skb)
3387 		list_skb = &(*list_skb)->next;
3388 	/* Append completion queue from offline CPU. */
3389 	*list_skb = oldsd->completion_queue;
3390 	oldsd->completion_queue = NULL;
3391 
3392 	/* Find end of our output_queue. */
3393 	list_net = &sd->output_queue;
3394 	while (*list_net)
3395 		list_net = &(*list_net)->next_sched;
3396 	/* Append output queue from offline CPU. */
3397 	*list_net = oldsd->output_queue;
3398 	oldsd->output_queue = NULL;
3399 
3400 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
3401 	local_irq_enable();
3402 
3403 	/* Process offline CPU's input_pkt_queue */
3404 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
3405 		netif_rx(skb);
3406 
3407 	return NOTIFY_OK;
3408 }
3409 #endif /* CONFIG_HOTPLUG_CPU */
3410 
3411 #ifdef CONFIG_NET_DMA
3412 /**
3413  * net_dma_rebalance -
3414  * This is called when the number of channels allocated to the net_dma_client
3415  * changes.  The net_dma_client tries to have one DMA channel per CPU.
3416  */
3417 static void net_dma_rebalance(void)
3418 {
3419 	unsigned int cpu, i, n;
3420 	struct dma_chan *chan;
3421 
3422 	if (net_dma_count == 0) {
3423 		for_each_online_cpu(cpu)
3424 			rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
3425 		return;
3426 	}
3427 
3428 	i = 0;
3429 	cpu = first_cpu(cpu_online_map);
3430 
3431 	rcu_read_lock();
3432 	list_for_each_entry(chan, &net_dma_client->channels, client_node) {
3433 		n = ((num_online_cpus() / net_dma_count)
3434 		   + (i < (num_online_cpus() % net_dma_count) ? 1 : 0));
3435 
3436 		while(n) {
3437 			per_cpu(softnet_data, cpu).net_dma = chan;
3438 			cpu = next_cpu(cpu, cpu_online_map);
3439 			n--;
3440 		}
3441 		i++;
3442 	}
3443 	rcu_read_unlock();
3444 }
3445 
3446 /**
3447  * netdev_dma_event - event callback for the net_dma_client
3448  * @client: should always be net_dma_client
3449  * @chan: DMA channel for the event
3450  * @event: event type
3451  */
3452 static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
3453 	enum dma_event event)
3454 {
3455 	spin_lock(&net_dma_event_lock);
3456 	switch (event) {
3457 	case DMA_RESOURCE_ADDED:
3458 		net_dma_count++;
3459 		net_dma_rebalance();
3460 		break;
3461 	case DMA_RESOURCE_REMOVED:
3462 		net_dma_count--;
3463 		net_dma_rebalance();
3464 		break;
3465 	default:
3466 		break;
3467 	}
3468 	spin_unlock(&net_dma_event_lock);
3469 }
3470 
3471 /**
3472  * netdev_dma_regiser - register the networking subsystem as a DMA client
3473  */
3474 static int __init netdev_dma_register(void)
3475 {
3476 	spin_lock_init(&net_dma_event_lock);
3477 	net_dma_client = dma_async_client_register(netdev_dma_event);
3478 	if (net_dma_client == NULL)
3479 		return -ENOMEM;
3480 
3481 	dma_async_client_chan_request(net_dma_client, num_online_cpus());
3482 	return 0;
3483 }
3484 
3485 #else
3486 static int __init netdev_dma_register(void) { return -ENODEV; }
3487 #endif /* CONFIG_NET_DMA */
3488 
3489 /*
3490  *	Initialize the DEV module. At boot time this walks the device list and
3491  *	unhooks any devices that fail to initialise (normally hardware not
3492  *	present) and leaves us with a valid list of present and active devices.
3493  *
3494  */
3495 
3496 /*
3497  *       This is called single threaded during boot, so no need
3498  *       to take the rtnl semaphore.
3499  */
3500 static int __init net_dev_init(void)
3501 {
3502 	int i, rc = -ENOMEM;
3503 
3504 	BUG_ON(!dev_boot_phase);
3505 
3506 	net_random_init();
3507 
3508 	if (dev_proc_init())
3509 		goto out;
3510 
3511 	if (netdev_sysfs_init())
3512 		goto out;
3513 
3514 	INIT_LIST_HEAD(&ptype_all);
3515 	for (i = 0; i < 16; i++)
3516 		INIT_LIST_HEAD(&ptype_base[i]);
3517 
3518 	for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
3519 		INIT_HLIST_HEAD(&dev_name_head[i]);
3520 
3521 	for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
3522 		INIT_HLIST_HEAD(&dev_index_head[i]);
3523 
3524 	/*
3525 	 *	Initialise the packet receive queues.
3526 	 */
3527 
3528 	for_each_possible_cpu(i) {
3529 		struct softnet_data *queue;
3530 
3531 		queue = &per_cpu(softnet_data, i);
3532 		skb_queue_head_init(&queue->input_pkt_queue);
3533 		queue->completion_queue = NULL;
3534 		INIT_LIST_HEAD(&queue->poll_list);
3535 		set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
3536 		queue->backlog_dev.weight = weight_p;
3537 		queue->backlog_dev.poll = process_backlog;
3538 		atomic_set(&queue->backlog_dev.refcnt, 1);
3539 	}
3540 
3541 	netdev_dma_register();
3542 
3543 	dev_boot_phase = 0;
3544 
3545 	open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
3546 	open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
3547 
3548 	hotcpu_notifier(dev_cpu_callback, 0);
3549 	dst_init();
3550 	dev_mcast_init();
3551 	rc = 0;
3552 out:
3553 	return rc;
3554 }
3555 
3556 subsys_initcall(net_dev_init);
3557 
3558 EXPORT_SYMBOL(__dev_get_by_index);
3559 EXPORT_SYMBOL(__dev_get_by_name);
3560 EXPORT_SYMBOL(__dev_remove_pack);
3561 EXPORT_SYMBOL(dev_valid_name);
3562 EXPORT_SYMBOL(dev_add_pack);
3563 EXPORT_SYMBOL(dev_alloc_name);
3564 EXPORT_SYMBOL(dev_close);
3565 EXPORT_SYMBOL(dev_get_by_flags);
3566 EXPORT_SYMBOL(dev_get_by_index);
3567 EXPORT_SYMBOL(dev_get_by_name);
3568 EXPORT_SYMBOL(dev_open);
3569 EXPORT_SYMBOL(dev_queue_xmit);
3570 EXPORT_SYMBOL(dev_remove_pack);
3571 EXPORT_SYMBOL(dev_set_allmulti);
3572 EXPORT_SYMBOL(dev_set_promiscuity);
3573 EXPORT_SYMBOL(dev_change_flags);
3574 EXPORT_SYMBOL(dev_set_mtu);
3575 EXPORT_SYMBOL(dev_set_mac_address);
3576 EXPORT_SYMBOL(free_netdev);
3577 EXPORT_SYMBOL(netdev_boot_setup_check);
3578 EXPORT_SYMBOL(netdev_set_master);
3579 EXPORT_SYMBOL(netdev_state_change);
3580 EXPORT_SYMBOL(netif_receive_skb);
3581 EXPORT_SYMBOL(netif_rx);
3582 EXPORT_SYMBOL(register_gifconf);
3583 EXPORT_SYMBOL(register_netdevice);
3584 EXPORT_SYMBOL(register_netdevice_notifier);
3585 EXPORT_SYMBOL(skb_checksum_help);
3586 EXPORT_SYMBOL(synchronize_net);
3587 EXPORT_SYMBOL(unregister_netdevice);
3588 EXPORT_SYMBOL(unregister_netdevice_notifier);
3589 EXPORT_SYMBOL(net_enable_timestamp);
3590 EXPORT_SYMBOL(net_disable_timestamp);
3591 EXPORT_SYMBOL(dev_get_flags);
3592 
3593 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
3594 EXPORT_SYMBOL(br_handle_frame_hook);
3595 EXPORT_SYMBOL(br_fdb_get_hook);
3596 EXPORT_SYMBOL(br_fdb_put_hook);
3597 #endif
3598 
3599 #ifdef CONFIG_KMOD
3600 EXPORT_SYMBOL(dev_load);
3601 #endif
3602 
3603 EXPORT_PER_CPU_SYMBOL(softnet_data);
3604