xref: /linux/net/core/dev.c (revision ba6e8564f459211117ce300eae2c7fdd23befe34)
1 /*
2  * 	NET3	Protocol independent device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the non IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Florian la Roche <rzsfl@rz.uni-sb.de>
16  *		Alan Cox <gw4pts@gw4pts.ampr.org>
17  *		David Hinds <dahinds@users.sourceforge.net>
18  *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19  *		Adam Sulmicki <adam@cfar.umd.edu>
20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
21  *
22  *	Changes:
23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
24  *              			to 2 if register_netdev gets called
25  *              			before net_dev_init & also removed a
26  *              			few lines of code in the process.
27  *		Alan Cox	:	device private ioctl copies fields back.
28  *		Alan Cox	:	Transmit queue code does relevant
29  *					stunts to keep the queue safe.
30  *		Alan Cox	:	Fixed double lock.
31  *		Alan Cox	:	Fixed promisc NULL pointer trap
32  *		????????	:	Support the full private ioctl range
33  *		Alan Cox	:	Moved ioctl permission check into
34  *					drivers
35  *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
36  *		Alan Cox	:	100 backlog just doesn't cut it when
37  *					you start doing multicast video 8)
38  *		Alan Cox	:	Rewrote net_bh and list manager.
39  *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
40  *		Alan Cox	:	Took out transmit every packet pass
41  *					Saved a few bytes in the ioctl handler
42  *		Alan Cox	:	Network driver sets packet type before
43  *					calling netif_rx. Saves a function
44  *					call a packet.
45  *		Alan Cox	:	Hashed net_bh()
46  *		Richard Kooijman:	Timestamp fixes.
47  *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
48  *		Alan Cox	:	Device lock protection.
49  *		Alan Cox	: 	Fixed nasty side effect of device close
50  *					changes.
51  *		Rudi Cilibrasi	:	Pass the right thing to
52  *					set_mac_address()
53  *		Dave Miller	:	32bit quantity for the device lock to
54  *					make it work out on a Sparc.
55  *		Bjorn Ekwall	:	Added KERNELD hack.
56  *		Alan Cox	:	Cleaned up the backlog initialise.
57  *		Craig Metz	:	SIOCGIFCONF fix if space for under
58  *					1 device.
59  *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
60  *					is no device open function.
61  *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
62  *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
63  *		Cyrus Durgin	:	Cleaned for KMOD
64  *		Adam Sulmicki   :	Bug Fix : Network Device Unload
65  *					A network device unload needs to purge
66  *					the backlog queue.
67  *	Paul Rusty Russell	:	SIOCSIFNAME
68  *              Pekka Riikonen  :	Netdev boot-time settings code
69  *              Andrew Morton   :       Make unregister_netdevice wait
70  *              			indefinitely on dev->refcnt
71  * 		J Hadi Salim	:	- Backlog queue sampling
72  *				        - netif_rx() feedback
73  */
74 
75 #include <asm/uaccess.h>
76 #include <asm/system.h>
77 #include <linux/bitops.h>
78 #include <linux/capability.h>
79 #include <linux/cpu.h>
80 #include <linux/types.h>
81 #include <linux/kernel.h>
82 #include <linux/sched.h>
83 #include <linux/mutex.h>
84 #include <linux/string.h>
85 #include <linux/mm.h>
86 #include <linux/socket.h>
87 #include <linux/sockios.h>
88 #include <linux/errno.h>
89 #include <linux/interrupt.h>
90 #include <linux/if_ether.h>
91 #include <linux/netdevice.h>
92 #include <linux/etherdevice.h>
93 #include <linux/notifier.h>
94 #include <linux/skbuff.h>
95 #include <net/sock.h>
96 #include <linux/rtnetlink.h>
97 #include <linux/proc_fs.h>
98 #include <linux/seq_file.h>
99 #include <linux/stat.h>
100 #include <linux/if_bridge.h>
101 #include <net/dst.h>
102 #include <net/pkt_sched.h>
103 #include <net/checksum.h>
104 #include <linux/highmem.h>
105 #include <linux/init.h>
106 #include <linux/kmod.h>
107 #include <linux/module.h>
108 #include <linux/kallsyms.h>
109 #include <linux/netpoll.h>
110 #include <linux/rcupdate.h>
111 #include <linux/delay.h>
112 #include <linux/wireless.h>
113 #include <net/iw_handler.h>
114 #include <asm/current.h>
115 #include <linux/audit.h>
116 #include <linux/dmaengine.h>
117 #include <linux/err.h>
118 #include <linux/ctype.h>
119 
120 /*
121  *	The list of packet types we will receive (as opposed to discard)
122  *	and the routines to invoke.
123  *
124  *	Why 16. Because with 16 the only overlap we get on a hash of the
125  *	low nibble of the protocol value is RARP/SNAP/X.25.
126  *
127  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
128  *             sure which should go first, but I bet it won't make much
129  *             difference if we are running VLANs.  The good news is that
130  *             this protocol won't be in the list unless compiled in, so
131  *             the average user (w/out VLANs) will not be adversely affected.
132  *             --BLG
133  *
134  *		0800	IP
135  *		8100    802.1Q VLAN
136  *		0001	802.3
137  *		0002	AX.25
138  *		0004	802.2
139  *		8035	RARP
140  *		0005	SNAP
141  *		0805	X.25
142  *		0806	ARP
143  *		8137	IPX
144  *		0009	Localtalk
145  *		86DD	IPv6
146  */
147 
148 static DEFINE_SPINLOCK(ptype_lock);
149 static struct list_head ptype_base[16];	/* 16 way hashed list */
150 static struct list_head ptype_all;		/* Taps */
151 
152 #ifdef CONFIG_NET_DMA
153 static struct dma_client *net_dma_client;
154 static unsigned int net_dma_count;
155 static spinlock_t net_dma_event_lock;
156 #endif
157 
158 /*
159  * The @dev_base list is protected by @dev_base_lock and the rtnl
160  * semaphore.
161  *
162  * Pure readers hold dev_base_lock for reading.
163  *
164  * Writers must hold the rtnl semaphore while they loop through the
165  * dev_base list, and hold dev_base_lock for writing when they do the
166  * actual updates.  This allows pure readers to access the list even
167  * while a writer is preparing to update it.
168  *
169  * To put it another way, dev_base_lock is held for writing only to
170  * protect against pure readers; the rtnl semaphore provides the
171  * protection against other writers.
172  *
173  * See, for example usages, register_netdevice() and
174  * unregister_netdevice(), which must be called with the rtnl
175  * semaphore held.
176  */
177 struct net_device *dev_base;
178 static struct net_device **dev_tail = &dev_base;
179 DEFINE_RWLOCK(dev_base_lock);
180 
181 EXPORT_SYMBOL(dev_base);
182 EXPORT_SYMBOL(dev_base_lock);
183 
184 #define NETDEV_HASHBITS	8
185 static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
186 static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
187 
188 static inline struct hlist_head *dev_name_hash(const char *name)
189 {
190 	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
191 	return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
192 }
193 
194 static inline struct hlist_head *dev_index_hash(int ifindex)
195 {
196 	return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
197 }
198 
199 /*
200  *	Our notifier list
201  */
202 
203 static RAW_NOTIFIER_HEAD(netdev_chain);
204 
205 /*
206  *	Device drivers call our routines to queue packets here. We empty the
207  *	queue in the local softnet handler.
208  */
209 DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL };
210 
211 #ifdef CONFIG_SYSFS
212 extern int netdev_sysfs_init(void);
213 extern int netdev_register_sysfs(struct net_device *);
214 extern void netdev_unregister_sysfs(struct net_device *);
215 #else
216 #define netdev_sysfs_init()	 	(0)
217 #define netdev_register_sysfs(dev)	(0)
218 #define	netdev_unregister_sysfs(dev)	do { } while(0)
219 #endif
220 
221 
222 /*******************************************************************************
223 
224 		Protocol management and registration routines
225 
226 *******************************************************************************/
227 
228 /*
229  *	For efficiency
230  */
231 
232 static int netdev_nit;
233 
234 /*
235  *	Add a protocol ID to the list. Now that the input handler is
236  *	smarter we can dispense with all the messy stuff that used to be
237  *	here.
238  *
239  *	BEWARE!!! Protocol handlers, mangling input packets,
240  *	MUST BE last in hash buckets and checking protocol handlers
241  *	MUST start from promiscuous ptype_all chain in net_bh.
242  *	It is true now, do not change it.
243  *	Explanation follows: if protocol handler, mangling packet, will
244  *	be the first on list, it is not able to sense, that packet
245  *	is cloned and should be copied-on-write, so that it will
246  *	change it and subsequent readers will get broken packet.
247  *							--ANK (980803)
248  */
249 
250 /**
251  *	dev_add_pack - add packet handler
252  *	@pt: packet type declaration
253  *
254  *	Add a protocol handler to the networking stack. The passed &packet_type
255  *	is linked into kernel lists and may not be freed until it has been
256  *	removed from the kernel lists.
257  *
258  *	This call does not sleep therefore it can not
259  *	guarantee all CPU's that are in middle of receiving packets
260  *	will see the new packet type (until the next received packet).
261  */
262 
263 void dev_add_pack(struct packet_type *pt)
264 {
265 	int hash;
266 
267 	spin_lock_bh(&ptype_lock);
268 	if (pt->type == htons(ETH_P_ALL)) {
269 		netdev_nit++;
270 		list_add_rcu(&pt->list, &ptype_all);
271 	} else {
272 		hash = ntohs(pt->type) & 15;
273 		list_add_rcu(&pt->list, &ptype_base[hash]);
274 	}
275 	spin_unlock_bh(&ptype_lock);
276 }
277 
278 /**
279  *	__dev_remove_pack	 - remove packet handler
280  *	@pt: packet type declaration
281  *
282  *	Remove a protocol handler that was previously added to the kernel
283  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
284  *	from the kernel lists and can be freed or reused once this function
285  *	returns.
286  *
287  *      The packet type might still be in use by receivers
288  *	and must not be freed until after all the CPU's have gone
289  *	through a quiescent state.
290  */
291 void __dev_remove_pack(struct packet_type *pt)
292 {
293 	struct list_head *head;
294 	struct packet_type *pt1;
295 
296 	spin_lock_bh(&ptype_lock);
297 
298 	if (pt->type == htons(ETH_P_ALL)) {
299 		netdev_nit--;
300 		head = &ptype_all;
301 	} else
302 		head = &ptype_base[ntohs(pt->type) & 15];
303 
304 	list_for_each_entry(pt1, head, list) {
305 		if (pt == pt1) {
306 			list_del_rcu(&pt->list);
307 			goto out;
308 		}
309 	}
310 
311 	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
312 out:
313 	spin_unlock_bh(&ptype_lock);
314 }
315 /**
316  *	dev_remove_pack	 - remove packet handler
317  *	@pt: packet type declaration
318  *
319  *	Remove a protocol handler that was previously added to the kernel
320  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
321  *	from the kernel lists and can be freed or reused once this function
322  *	returns.
323  *
324  *	This call sleeps to guarantee that no CPU is looking at the packet
325  *	type after return.
326  */
327 void dev_remove_pack(struct packet_type *pt)
328 {
329 	__dev_remove_pack(pt);
330 
331 	synchronize_net();
332 }
333 
334 /******************************************************************************
335 
336 		      Device Boot-time Settings Routines
337 
338 *******************************************************************************/
339 
340 /* Boot time configuration table */
341 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
342 
343 /**
344  *	netdev_boot_setup_add	- add new setup entry
345  *	@name: name of the device
346  *	@map: configured settings for the device
347  *
348  *	Adds new setup entry to the dev_boot_setup list.  The function
349  *	returns 0 on error and 1 on success.  This is a generic routine to
350  *	all netdevices.
351  */
352 static int netdev_boot_setup_add(char *name, struct ifmap *map)
353 {
354 	struct netdev_boot_setup *s;
355 	int i;
356 
357 	s = dev_boot_setup;
358 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
359 		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
360 			memset(s[i].name, 0, sizeof(s[i].name));
361 			strcpy(s[i].name, name);
362 			memcpy(&s[i].map, map, sizeof(s[i].map));
363 			break;
364 		}
365 	}
366 
367 	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
368 }
369 
370 /**
371  *	netdev_boot_setup_check	- check boot time settings
372  *	@dev: the netdevice
373  *
374  * 	Check boot time settings for the device.
375  *	The found settings are set for the device to be used
376  *	later in the device probing.
377  *	Returns 0 if no settings found, 1 if they are.
378  */
379 int netdev_boot_setup_check(struct net_device *dev)
380 {
381 	struct netdev_boot_setup *s = dev_boot_setup;
382 	int i;
383 
384 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
385 		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
386 		    !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
387 			dev->irq 	= s[i].map.irq;
388 			dev->base_addr 	= s[i].map.base_addr;
389 			dev->mem_start 	= s[i].map.mem_start;
390 			dev->mem_end 	= s[i].map.mem_end;
391 			return 1;
392 		}
393 	}
394 	return 0;
395 }
396 
397 
398 /**
399  *	netdev_boot_base	- get address from boot time settings
400  *	@prefix: prefix for network device
401  *	@unit: id for network device
402  *
403  * 	Check boot time settings for the base address of device.
404  *	The found settings are set for the device to be used
405  *	later in the device probing.
406  *	Returns 0 if no settings found.
407  */
408 unsigned long netdev_boot_base(const char *prefix, int unit)
409 {
410 	const struct netdev_boot_setup *s = dev_boot_setup;
411 	char name[IFNAMSIZ];
412 	int i;
413 
414 	sprintf(name, "%s%d", prefix, unit);
415 
416 	/*
417 	 * If device already registered then return base of 1
418 	 * to indicate not to probe for this interface
419 	 */
420 	if (__dev_get_by_name(name))
421 		return 1;
422 
423 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
424 		if (!strcmp(name, s[i].name))
425 			return s[i].map.base_addr;
426 	return 0;
427 }
428 
429 /*
430  * Saves at boot time configured settings for any netdevice.
431  */
432 int __init netdev_boot_setup(char *str)
433 {
434 	int ints[5];
435 	struct ifmap map;
436 
437 	str = get_options(str, ARRAY_SIZE(ints), ints);
438 	if (!str || !*str)
439 		return 0;
440 
441 	/* Save settings */
442 	memset(&map, 0, sizeof(map));
443 	if (ints[0] > 0)
444 		map.irq = ints[1];
445 	if (ints[0] > 1)
446 		map.base_addr = ints[2];
447 	if (ints[0] > 2)
448 		map.mem_start = ints[3];
449 	if (ints[0] > 3)
450 		map.mem_end = ints[4];
451 
452 	/* Add new entry to the list */
453 	return netdev_boot_setup_add(str, &map);
454 }
455 
456 __setup("netdev=", netdev_boot_setup);
457 
458 /*******************************************************************************
459 
460 			    Device Interface Subroutines
461 
462 *******************************************************************************/
463 
464 /**
465  *	__dev_get_by_name	- find a device by its name
466  *	@name: name to find
467  *
468  *	Find an interface by name. Must be called under RTNL semaphore
469  *	or @dev_base_lock. If the name is found a pointer to the device
470  *	is returned. If the name is not found then %NULL is returned. The
471  *	reference counters are not incremented so the caller must be
472  *	careful with locks.
473  */
474 
475 struct net_device *__dev_get_by_name(const char *name)
476 {
477 	struct hlist_node *p;
478 
479 	hlist_for_each(p, dev_name_hash(name)) {
480 		struct net_device *dev
481 			= hlist_entry(p, struct net_device, name_hlist);
482 		if (!strncmp(dev->name, name, IFNAMSIZ))
483 			return dev;
484 	}
485 	return NULL;
486 }
487 
488 /**
489  *	dev_get_by_name		- find a device by its name
490  *	@name: name to find
491  *
492  *	Find an interface by name. This can be called from any
493  *	context and does its own locking. The returned handle has
494  *	the usage count incremented and the caller must use dev_put() to
495  *	release it when it is no longer needed. %NULL is returned if no
496  *	matching device is found.
497  */
498 
499 struct net_device *dev_get_by_name(const char *name)
500 {
501 	struct net_device *dev;
502 
503 	read_lock(&dev_base_lock);
504 	dev = __dev_get_by_name(name);
505 	if (dev)
506 		dev_hold(dev);
507 	read_unlock(&dev_base_lock);
508 	return dev;
509 }
510 
511 /**
512  *	__dev_get_by_index - find a device by its ifindex
513  *	@ifindex: index of device
514  *
515  *	Search for an interface by index. Returns %NULL if the device
516  *	is not found or a pointer to the device. The device has not
517  *	had its reference counter increased so the caller must be careful
518  *	about locking. The caller must hold either the RTNL semaphore
519  *	or @dev_base_lock.
520  */
521 
522 struct net_device *__dev_get_by_index(int ifindex)
523 {
524 	struct hlist_node *p;
525 
526 	hlist_for_each(p, dev_index_hash(ifindex)) {
527 		struct net_device *dev
528 			= hlist_entry(p, struct net_device, index_hlist);
529 		if (dev->ifindex == ifindex)
530 			return dev;
531 	}
532 	return NULL;
533 }
534 
535 
536 /**
537  *	dev_get_by_index - find a device by its ifindex
538  *	@ifindex: index of device
539  *
540  *	Search for an interface by index. Returns NULL if the device
541  *	is not found or a pointer to the device. The device returned has
542  *	had a reference added and the pointer is safe until the user calls
543  *	dev_put to indicate they have finished with it.
544  */
545 
546 struct net_device *dev_get_by_index(int ifindex)
547 {
548 	struct net_device *dev;
549 
550 	read_lock(&dev_base_lock);
551 	dev = __dev_get_by_index(ifindex);
552 	if (dev)
553 		dev_hold(dev);
554 	read_unlock(&dev_base_lock);
555 	return dev;
556 }
557 
558 /**
559  *	dev_getbyhwaddr - find a device by its hardware address
560  *	@type: media type of device
561  *	@ha: hardware address
562  *
563  *	Search for an interface by MAC address. Returns NULL if the device
564  *	is not found or a pointer to the device. The caller must hold the
565  *	rtnl semaphore. The returned device has not had its ref count increased
566  *	and the caller must therefore be careful about locking
567  *
568  *	BUGS:
569  *	If the API was consistent this would be __dev_get_by_hwaddr
570  */
571 
572 struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
573 {
574 	struct net_device *dev;
575 
576 	ASSERT_RTNL();
577 
578 	for (dev = dev_base; dev; dev = dev->next)
579 		if (dev->type == type &&
580 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
581 			break;
582 	return dev;
583 }
584 
585 EXPORT_SYMBOL(dev_getbyhwaddr);
586 
587 struct net_device *dev_getfirstbyhwtype(unsigned short type)
588 {
589 	struct net_device *dev;
590 
591 	rtnl_lock();
592 	for (dev = dev_base; dev; dev = dev->next) {
593 		if (dev->type == type) {
594 			dev_hold(dev);
595 			break;
596 		}
597 	}
598 	rtnl_unlock();
599 	return dev;
600 }
601 
602 EXPORT_SYMBOL(dev_getfirstbyhwtype);
603 
604 /**
605  *	dev_get_by_flags - find any device with given flags
606  *	@if_flags: IFF_* values
607  *	@mask: bitmask of bits in if_flags to check
608  *
609  *	Search for any interface with the given flags. Returns NULL if a device
610  *	is not found or a pointer to the device. The device returned has
611  *	had a reference added and the pointer is safe until the user calls
612  *	dev_put to indicate they have finished with it.
613  */
614 
615 struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
616 {
617 	struct net_device *dev;
618 
619 	read_lock(&dev_base_lock);
620 	for (dev = dev_base; dev != NULL; dev = dev->next) {
621 		if (((dev->flags ^ if_flags) & mask) == 0) {
622 			dev_hold(dev);
623 			break;
624 		}
625 	}
626 	read_unlock(&dev_base_lock);
627 	return dev;
628 }
629 
630 /**
631  *	dev_valid_name - check if name is okay for network device
632  *	@name: name string
633  *
634  *	Network device names need to be valid file names to
635  *	to allow sysfs to work.  We also disallow any kind of
636  *	whitespace.
637  */
638 int dev_valid_name(const char *name)
639 {
640 	if (*name == '\0')
641 		return 0;
642 	if (strlen(name) >= IFNAMSIZ)
643 		return 0;
644 	if (!strcmp(name, ".") || !strcmp(name, ".."))
645 		return 0;
646 
647 	while (*name) {
648 		if (*name == '/' || isspace(*name))
649 			return 0;
650 		name++;
651 	}
652 	return 1;
653 }
654 
655 /**
656  *	dev_alloc_name - allocate a name for a device
657  *	@dev: device
658  *	@name: name format string
659  *
660  *	Passed a format string - eg "lt%d" it will try and find a suitable
661  *	id. It scans list of devices to build up a free map, then chooses
662  *	the first empty slot. The caller must hold the dev_base or rtnl lock
663  *	while allocating the name and adding the device in order to avoid
664  *	duplicates.
665  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
666  *	Returns the number of the unit assigned or a negative errno code.
667  */
668 
669 int dev_alloc_name(struct net_device *dev, const char *name)
670 {
671 	int i = 0;
672 	char buf[IFNAMSIZ];
673 	const char *p;
674 	const int max_netdevices = 8*PAGE_SIZE;
675 	long *inuse;
676 	struct net_device *d;
677 
678 	p = strnchr(name, IFNAMSIZ-1, '%');
679 	if (p) {
680 		/*
681 		 * Verify the string as this thing may have come from
682 		 * the user.  There must be either one "%d" and no other "%"
683 		 * characters.
684 		 */
685 		if (p[1] != 'd' || strchr(p + 2, '%'))
686 			return -EINVAL;
687 
688 		/* Use one page as a bit array of possible slots */
689 		inuse = (long *) get_zeroed_page(GFP_ATOMIC);
690 		if (!inuse)
691 			return -ENOMEM;
692 
693 		for (d = dev_base; d; d = d->next) {
694 			if (!sscanf(d->name, name, &i))
695 				continue;
696 			if (i < 0 || i >= max_netdevices)
697 				continue;
698 
699 			/*  avoid cases where sscanf is not exact inverse of printf */
700 			snprintf(buf, sizeof(buf), name, i);
701 			if (!strncmp(buf, d->name, IFNAMSIZ))
702 				set_bit(i, inuse);
703 		}
704 
705 		i = find_first_zero_bit(inuse, max_netdevices);
706 		free_page((unsigned long) inuse);
707 	}
708 
709 	snprintf(buf, sizeof(buf), name, i);
710 	if (!__dev_get_by_name(buf)) {
711 		strlcpy(dev->name, buf, IFNAMSIZ);
712 		return i;
713 	}
714 
715 	/* It is possible to run out of possible slots
716 	 * when the name is long and there isn't enough space left
717 	 * for the digits, or if all bits are used.
718 	 */
719 	return -ENFILE;
720 }
721 
722 
723 /**
724  *	dev_change_name - change name of a device
725  *	@dev: device
726  *	@newname: name (or format string) must be at least IFNAMSIZ
727  *
728  *	Change name of a device, can pass format strings "eth%d".
729  *	for wildcarding.
730  */
731 int dev_change_name(struct net_device *dev, char *newname)
732 {
733 	int err = 0;
734 
735 	ASSERT_RTNL();
736 
737 	if (dev->flags & IFF_UP)
738 		return -EBUSY;
739 
740 	if (!dev_valid_name(newname))
741 		return -EINVAL;
742 
743 	if (strchr(newname, '%')) {
744 		err = dev_alloc_name(dev, newname);
745 		if (err < 0)
746 			return err;
747 		strcpy(newname, dev->name);
748 	}
749 	else if (__dev_get_by_name(newname))
750 		return -EEXIST;
751 	else
752 		strlcpy(dev->name, newname, IFNAMSIZ);
753 
754 	device_rename(&dev->dev, dev->name);
755 	hlist_del(&dev->name_hlist);
756 	hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
757 	raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
758 
759 	return err;
760 }
761 
762 /**
763  *	netdev_features_change - device changes features
764  *	@dev: device to cause notification
765  *
766  *	Called to indicate a device has changed features.
767  */
768 void netdev_features_change(struct net_device *dev)
769 {
770 	raw_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
771 }
772 EXPORT_SYMBOL(netdev_features_change);
773 
774 /**
775  *	netdev_state_change - device changes state
776  *	@dev: device to cause notification
777  *
778  *	Called to indicate a device has changed state. This function calls
779  *	the notifier chains for netdev_chain and sends a NEWLINK message
780  *	to the routing socket.
781  */
782 void netdev_state_change(struct net_device *dev)
783 {
784 	if (dev->flags & IFF_UP) {
785 		raw_notifier_call_chain(&netdev_chain,
786 				NETDEV_CHANGE, dev);
787 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
788 	}
789 }
790 
791 /**
792  *	dev_load 	- load a network module
793  *	@name: name of interface
794  *
795  *	If a network interface is not present and the process has suitable
796  *	privileges this function loads the module. If module loading is not
797  *	available in this kernel then it becomes a nop.
798  */
799 
800 void dev_load(const char *name)
801 {
802 	struct net_device *dev;
803 
804 	read_lock(&dev_base_lock);
805 	dev = __dev_get_by_name(name);
806 	read_unlock(&dev_base_lock);
807 
808 	if (!dev && capable(CAP_SYS_MODULE))
809 		request_module("%s", name);
810 }
811 
812 static int default_rebuild_header(struct sk_buff *skb)
813 {
814 	printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
815 	       skb->dev ? skb->dev->name : "NULL!!!");
816 	kfree_skb(skb);
817 	return 1;
818 }
819 
820 
821 /**
822  *	dev_open	- prepare an interface for use.
823  *	@dev:	device to open
824  *
825  *	Takes a device from down to up state. The device's private open
826  *	function is invoked and then the multicast lists are loaded. Finally
827  *	the device is moved into the up state and a %NETDEV_UP message is
828  *	sent to the netdev notifier chain.
829  *
830  *	Calling this function on an active interface is a nop. On a failure
831  *	a negative errno code is returned.
832  */
833 int dev_open(struct net_device *dev)
834 {
835 	int ret = 0;
836 
837 	/*
838 	 *	Is it already up?
839 	 */
840 
841 	if (dev->flags & IFF_UP)
842 		return 0;
843 
844 	/*
845 	 *	Is it even present?
846 	 */
847 	if (!netif_device_present(dev))
848 		return -ENODEV;
849 
850 	/*
851 	 *	Call device private open method
852 	 */
853 	set_bit(__LINK_STATE_START, &dev->state);
854 	if (dev->open) {
855 		ret = dev->open(dev);
856 		if (ret)
857 			clear_bit(__LINK_STATE_START, &dev->state);
858 	}
859 
860 	/*
861 	 *	If it went open OK then:
862 	 */
863 
864 	if (!ret) {
865 		/*
866 		 *	Set the flags.
867 		 */
868 		dev->flags |= IFF_UP;
869 
870 		/*
871 		 *	Initialize multicasting status
872 		 */
873 		dev_mc_upload(dev);
874 
875 		/*
876 		 *	Wakeup transmit queue engine
877 		 */
878 		dev_activate(dev);
879 
880 		/*
881 		 *	... and announce new interface.
882 		 */
883 		raw_notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
884 	}
885 	return ret;
886 }
887 
888 /**
889  *	dev_close - shutdown an interface.
890  *	@dev: device to shutdown
891  *
892  *	This function moves an active device into down state. A
893  *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
894  *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
895  *	chain.
896  */
897 int dev_close(struct net_device *dev)
898 {
899 	if (!(dev->flags & IFF_UP))
900 		return 0;
901 
902 	/*
903 	 *	Tell people we are going down, so that they can
904 	 *	prepare to death, when device is still operating.
905 	 */
906 	raw_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
907 
908 	dev_deactivate(dev);
909 
910 	clear_bit(__LINK_STATE_START, &dev->state);
911 
912 	/* Synchronize to scheduled poll. We cannot touch poll list,
913 	 * it can be even on different cpu. So just clear netif_running(),
914 	 * and wait when poll really will happen. Actually, the best place
915 	 * for this is inside dev->stop() after device stopped its irq
916 	 * engine, but this requires more changes in devices. */
917 
918 	smp_mb__after_clear_bit(); /* Commit netif_running(). */
919 	while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
920 		/* No hurry. */
921 		msleep(1);
922 	}
923 
924 	/*
925 	 *	Call the device specific close. This cannot fail.
926 	 *	Only if device is UP
927 	 *
928 	 *	We allow it to be called even after a DETACH hot-plug
929 	 *	event.
930 	 */
931 	if (dev->stop)
932 		dev->stop(dev);
933 
934 	/*
935 	 *	Device is now down.
936 	 */
937 
938 	dev->flags &= ~IFF_UP;
939 
940 	/*
941 	 * Tell people we are down
942 	 */
943 	raw_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
944 
945 	return 0;
946 }
947 
948 
949 /*
950  *	Device change register/unregister. These are not inline or static
951  *	as we export them to the world.
952  */
953 
954 /**
955  *	register_netdevice_notifier - register a network notifier block
956  *	@nb: notifier
957  *
958  *	Register a notifier to be called when network device events occur.
959  *	The notifier passed is linked into the kernel structures and must
960  *	not be reused until it has been unregistered. A negative errno code
961  *	is returned on a failure.
962  *
963  * 	When registered all registration and up events are replayed
964  *	to the new notifier to allow device to have a race free
965  *	view of the network device list.
966  */
967 
968 int register_netdevice_notifier(struct notifier_block *nb)
969 {
970 	struct net_device *dev;
971 	int err;
972 
973 	rtnl_lock();
974 	err = raw_notifier_chain_register(&netdev_chain, nb);
975 	if (!err) {
976 		for (dev = dev_base; dev; dev = dev->next) {
977 			nb->notifier_call(nb, NETDEV_REGISTER, dev);
978 
979 			if (dev->flags & IFF_UP)
980 				nb->notifier_call(nb, NETDEV_UP, dev);
981 		}
982 	}
983 	rtnl_unlock();
984 	return err;
985 }
986 
987 /**
988  *	unregister_netdevice_notifier - unregister a network notifier block
989  *	@nb: notifier
990  *
991  *	Unregister a notifier previously registered by
992  *	register_netdevice_notifier(). The notifier is unlinked into the
993  *	kernel structures and may then be reused. A negative errno code
994  *	is returned on a failure.
995  */
996 
997 int unregister_netdevice_notifier(struct notifier_block *nb)
998 {
999 	int err;
1000 
1001 	rtnl_lock();
1002 	err = raw_notifier_chain_unregister(&netdev_chain, nb);
1003 	rtnl_unlock();
1004 	return err;
1005 }
1006 
1007 /**
1008  *	call_netdevice_notifiers - call all network notifier blocks
1009  *      @val: value passed unmodified to notifier function
1010  *      @v:   pointer passed unmodified to notifier function
1011  *
1012  *	Call all network notifier blocks.  Parameters and return value
1013  *	are as for raw_notifier_call_chain().
1014  */
1015 
1016 int call_netdevice_notifiers(unsigned long val, void *v)
1017 {
1018 	return raw_notifier_call_chain(&netdev_chain, val, v);
1019 }
1020 
1021 /* When > 0 there are consumers of rx skb time stamps */
1022 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1023 
1024 void net_enable_timestamp(void)
1025 {
1026 	atomic_inc(&netstamp_needed);
1027 }
1028 
1029 void net_disable_timestamp(void)
1030 {
1031 	atomic_dec(&netstamp_needed);
1032 }
1033 
1034 void __net_timestamp(struct sk_buff *skb)
1035 {
1036 	struct timeval tv;
1037 
1038 	do_gettimeofday(&tv);
1039 	skb_set_timestamp(skb, &tv);
1040 }
1041 EXPORT_SYMBOL(__net_timestamp);
1042 
1043 static inline void net_timestamp(struct sk_buff *skb)
1044 {
1045 	if (atomic_read(&netstamp_needed))
1046 		__net_timestamp(skb);
1047 	else {
1048 		skb->tstamp.off_sec = 0;
1049 		skb->tstamp.off_usec = 0;
1050 	}
1051 }
1052 
1053 /*
1054  *	Support routine. Sends outgoing frames to any network
1055  *	taps currently in use.
1056  */
1057 
1058 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1059 {
1060 	struct packet_type *ptype;
1061 
1062 	net_timestamp(skb);
1063 
1064 	rcu_read_lock();
1065 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1066 		/* Never send packets back to the socket
1067 		 * they originated from - MvS (miquels@drinkel.ow.org)
1068 		 */
1069 		if ((ptype->dev == dev || !ptype->dev) &&
1070 		    (ptype->af_packet_priv == NULL ||
1071 		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
1072 			struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1073 			if (!skb2)
1074 				break;
1075 
1076 			/* skb->nh should be correctly
1077 			   set by sender, so that the second statement is
1078 			   just protection against buggy protocols.
1079 			 */
1080 			skb2->mac.raw = skb2->data;
1081 
1082 			if (skb2->nh.raw < skb2->data ||
1083 			    skb2->nh.raw > skb2->tail) {
1084 				if (net_ratelimit())
1085 					printk(KERN_CRIT "protocol %04x is "
1086 					       "buggy, dev %s\n",
1087 					       skb2->protocol, dev->name);
1088 				skb2->nh.raw = skb2->data;
1089 			}
1090 
1091 			skb2->h.raw = skb2->nh.raw;
1092 			skb2->pkt_type = PACKET_OUTGOING;
1093 			ptype->func(skb2, skb->dev, ptype, skb->dev);
1094 		}
1095 	}
1096 	rcu_read_unlock();
1097 }
1098 
1099 
1100 void __netif_schedule(struct net_device *dev)
1101 {
1102 	if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
1103 		unsigned long flags;
1104 		struct softnet_data *sd;
1105 
1106 		local_irq_save(flags);
1107 		sd = &__get_cpu_var(softnet_data);
1108 		dev->next_sched = sd->output_queue;
1109 		sd->output_queue = dev;
1110 		raise_softirq_irqoff(NET_TX_SOFTIRQ);
1111 		local_irq_restore(flags);
1112 	}
1113 }
1114 EXPORT_SYMBOL(__netif_schedule);
1115 
1116 void __netif_rx_schedule(struct net_device *dev)
1117 {
1118 	unsigned long flags;
1119 
1120 	local_irq_save(flags);
1121 	dev_hold(dev);
1122 	list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
1123 	if (dev->quota < 0)
1124 		dev->quota += dev->weight;
1125 	else
1126 		dev->quota = dev->weight;
1127 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
1128 	local_irq_restore(flags);
1129 }
1130 EXPORT_SYMBOL(__netif_rx_schedule);
1131 
1132 void dev_kfree_skb_any(struct sk_buff *skb)
1133 {
1134 	if (in_irq() || irqs_disabled())
1135 		dev_kfree_skb_irq(skb);
1136 	else
1137 		dev_kfree_skb(skb);
1138 }
1139 EXPORT_SYMBOL(dev_kfree_skb_any);
1140 
1141 
1142 /* Hot-plugging. */
1143 void netif_device_detach(struct net_device *dev)
1144 {
1145 	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1146 	    netif_running(dev)) {
1147 		netif_stop_queue(dev);
1148 	}
1149 }
1150 EXPORT_SYMBOL(netif_device_detach);
1151 
1152 void netif_device_attach(struct net_device *dev)
1153 {
1154 	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1155 	    netif_running(dev)) {
1156 		netif_wake_queue(dev);
1157 		__netdev_watchdog_up(dev);
1158 	}
1159 }
1160 EXPORT_SYMBOL(netif_device_attach);
1161 
1162 
1163 /*
1164  * Invalidate hardware checksum when packet is to be mangled, and
1165  * complete checksum manually on outgoing path.
1166  */
1167 int skb_checksum_help(struct sk_buff *skb)
1168 {
1169 	__wsum csum;
1170 	int ret = 0, offset = skb->h.raw - skb->data;
1171 
1172 	if (skb->ip_summed == CHECKSUM_COMPLETE)
1173 		goto out_set_summed;
1174 
1175 	if (unlikely(skb_shinfo(skb)->gso_size)) {
1176 		/* Let GSO fix up the checksum. */
1177 		goto out_set_summed;
1178 	}
1179 
1180 	if (skb_cloned(skb)) {
1181 		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1182 		if (ret)
1183 			goto out;
1184 	}
1185 
1186 	BUG_ON(offset > (int)skb->len);
1187 	csum = skb_checksum(skb, offset, skb->len-offset, 0);
1188 
1189 	offset = skb->tail - skb->h.raw;
1190 	BUG_ON(offset <= 0);
1191 	BUG_ON(skb->csum_offset + 2 > offset);
1192 
1193 	*(__sum16*)(skb->h.raw + skb->csum_offset) = csum_fold(csum);
1194 
1195 out_set_summed:
1196 	skb->ip_summed = CHECKSUM_NONE;
1197 out:
1198 	return ret;
1199 }
1200 
1201 /**
1202  *	skb_gso_segment - Perform segmentation on skb.
1203  *	@skb: buffer to segment
1204  *	@features: features for the output path (see dev->features)
1205  *
1206  *	This function segments the given skb and returns a list of segments.
1207  *
1208  *	It may return NULL if the skb requires no segmentation.  This is
1209  *	only possible when GSO is used for verifying header integrity.
1210  */
1211 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1212 {
1213 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1214 	struct packet_type *ptype;
1215 	__be16 type = skb->protocol;
1216 	int err;
1217 
1218 	BUG_ON(skb_shinfo(skb)->frag_list);
1219 
1220 	skb->mac.raw = skb->data;
1221 	skb->mac_len = skb->nh.raw - skb->data;
1222 	__skb_pull(skb, skb->mac_len);
1223 
1224 	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1225 		if (skb_header_cloned(skb) &&
1226 		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1227 			return ERR_PTR(err);
1228 	}
1229 
1230 	rcu_read_lock();
1231 	list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
1232 		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1233 			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1234 				err = ptype->gso_send_check(skb);
1235 				segs = ERR_PTR(err);
1236 				if (err || skb_gso_ok(skb, features))
1237 					break;
1238 				__skb_push(skb, skb->data - skb->nh.raw);
1239 			}
1240 			segs = ptype->gso_segment(skb, features);
1241 			break;
1242 		}
1243 	}
1244 	rcu_read_unlock();
1245 
1246 	__skb_push(skb, skb->data - skb->mac.raw);
1247 
1248 	return segs;
1249 }
1250 
1251 EXPORT_SYMBOL(skb_gso_segment);
1252 
1253 /* Take action when hardware reception checksum errors are detected. */
1254 #ifdef CONFIG_BUG
1255 void netdev_rx_csum_fault(struct net_device *dev)
1256 {
1257 	if (net_ratelimit()) {
1258 		printk(KERN_ERR "%s: hw csum failure.\n",
1259 			dev ? dev->name : "<unknown>");
1260 		dump_stack();
1261 	}
1262 }
1263 EXPORT_SYMBOL(netdev_rx_csum_fault);
1264 #endif
1265 
1266 /* Actually, we should eliminate this check as soon as we know, that:
1267  * 1. IOMMU is present and allows to map all the memory.
1268  * 2. No high memory really exists on this machine.
1269  */
1270 
1271 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1272 {
1273 #ifdef CONFIG_HIGHMEM
1274 	int i;
1275 
1276 	if (dev->features & NETIF_F_HIGHDMA)
1277 		return 0;
1278 
1279 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1280 		if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1281 			return 1;
1282 
1283 #endif
1284 	return 0;
1285 }
1286 
1287 struct dev_gso_cb {
1288 	void (*destructor)(struct sk_buff *skb);
1289 };
1290 
1291 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1292 
1293 static void dev_gso_skb_destructor(struct sk_buff *skb)
1294 {
1295 	struct dev_gso_cb *cb;
1296 
1297 	do {
1298 		struct sk_buff *nskb = skb->next;
1299 
1300 		skb->next = nskb->next;
1301 		nskb->next = NULL;
1302 		kfree_skb(nskb);
1303 	} while (skb->next);
1304 
1305 	cb = DEV_GSO_CB(skb);
1306 	if (cb->destructor)
1307 		cb->destructor(skb);
1308 }
1309 
1310 /**
1311  *	dev_gso_segment - Perform emulated hardware segmentation on skb.
1312  *	@skb: buffer to segment
1313  *
1314  *	This function segments the given skb and stores the list of segments
1315  *	in skb->next.
1316  */
1317 static int dev_gso_segment(struct sk_buff *skb)
1318 {
1319 	struct net_device *dev = skb->dev;
1320 	struct sk_buff *segs;
1321 	int features = dev->features & ~(illegal_highdma(dev, skb) ?
1322 					 NETIF_F_SG : 0);
1323 
1324 	segs = skb_gso_segment(skb, features);
1325 
1326 	/* Verifying header integrity only. */
1327 	if (!segs)
1328 		return 0;
1329 
1330 	if (unlikely(IS_ERR(segs)))
1331 		return PTR_ERR(segs);
1332 
1333 	skb->next = segs;
1334 	DEV_GSO_CB(skb)->destructor = skb->destructor;
1335 	skb->destructor = dev_gso_skb_destructor;
1336 
1337 	return 0;
1338 }
1339 
1340 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
1341 {
1342 	if (likely(!skb->next)) {
1343 		if (netdev_nit)
1344 			dev_queue_xmit_nit(skb, dev);
1345 
1346 		if (netif_needs_gso(dev, skb)) {
1347 			if (unlikely(dev_gso_segment(skb)))
1348 				goto out_kfree_skb;
1349 			if (skb->next)
1350 				goto gso;
1351 		}
1352 
1353 		return dev->hard_start_xmit(skb, dev);
1354 	}
1355 
1356 gso:
1357 	do {
1358 		struct sk_buff *nskb = skb->next;
1359 		int rc;
1360 
1361 		skb->next = nskb->next;
1362 		nskb->next = NULL;
1363 		rc = dev->hard_start_xmit(nskb, dev);
1364 		if (unlikely(rc)) {
1365 			nskb->next = skb->next;
1366 			skb->next = nskb;
1367 			return rc;
1368 		}
1369 		if (unlikely(netif_queue_stopped(dev) && skb->next))
1370 			return NETDEV_TX_BUSY;
1371 	} while (skb->next);
1372 
1373 	skb->destructor = DEV_GSO_CB(skb)->destructor;
1374 
1375 out_kfree_skb:
1376 	kfree_skb(skb);
1377 	return 0;
1378 }
1379 
1380 #define HARD_TX_LOCK(dev, cpu) {			\
1381 	if ((dev->features & NETIF_F_LLTX) == 0) {	\
1382 		netif_tx_lock(dev);			\
1383 	}						\
1384 }
1385 
1386 #define HARD_TX_UNLOCK(dev) {				\
1387 	if ((dev->features & NETIF_F_LLTX) == 0) {	\
1388 		netif_tx_unlock(dev);			\
1389 	}						\
1390 }
1391 
1392 /**
1393  *	dev_queue_xmit - transmit a buffer
1394  *	@skb: buffer to transmit
1395  *
1396  *	Queue a buffer for transmission to a network device. The caller must
1397  *	have set the device and priority and built the buffer before calling
1398  *	this function. The function can be called from an interrupt.
1399  *
1400  *	A negative errno code is returned on a failure. A success does not
1401  *	guarantee the frame will be transmitted as it may be dropped due
1402  *	to congestion or traffic shaping.
1403  *
1404  * -----------------------------------------------------------------------------------
1405  *      I notice this method can also return errors from the queue disciplines,
1406  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1407  *      be positive.
1408  *
1409  *      Regardless of the return value, the skb is consumed, so it is currently
1410  *      difficult to retry a send to this method.  (You can bump the ref count
1411  *      before sending to hold a reference for retry if you are careful.)
1412  *
1413  *      When calling this method, interrupts MUST be enabled.  This is because
1414  *      the BH enable code must have IRQs enabled so that it will not deadlock.
1415  *          --BLG
1416  */
1417 
1418 int dev_queue_xmit(struct sk_buff *skb)
1419 {
1420 	struct net_device *dev = skb->dev;
1421 	struct Qdisc *q;
1422 	int rc = -ENOMEM;
1423 
1424 	/* GSO will handle the following emulations directly. */
1425 	if (netif_needs_gso(dev, skb))
1426 		goto gso;
1427 
1428 	if (skb_shinfo(skb)->frag_list &&
1429 	    !(dev->features & NETIF_F_FRAGLIST) &&
1430 	    __skb_linearize(skb))
1431 		goto out_kfree_skb;
1432 
1433 	/* Fragmented skb is linearized if device does not support SG,
1434 	 * or if at least one of fragments is in highmem and device
1435 	 * does not support DMA from it.
1436 	 */
1437 	if (skb_shinfo(skb)->nr_frags &&
1438 	    (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1439 	    __skb_linearize(skb))
1440 		goto out_kfree_skb;
1441 
1442 	/* If packet is not checksummed and device does not support
1443 	 * checksumming for this protocol, complete checksumming here.
1444 	 */
1445 	if (skb->ip_summed == CHECKSUM_PARTIAL &&
1446 	    (!(dev->features & NETIF_F_GEN_CSUM) &&
1447 	     (!(dev->features & NETIF_F_IP_CSUM) ||
1448 	      skb->protocol != htons(ETH_P_IP))))
1449 		if (skb_checksum_help(skb))
1450 			goto out_kfree_skb;
1451 
1452 gso:
1453 	spin_lock_prefetch(&dev->queue_lock);
1454 
1455 	/* Disable soft irqs for various locks below. Also
1456 	 * stops preemption for RCU.
1457 	 */
1458 	rcu_read_lock_bh();
1459 
1460 	/* Updates of qdisc are serialized by queue_lock.
1461 	 * The struct Qdisc which is pointed to by qdisc is now a
1462 	 * rcu structure - it may be accessed without acquiring
1463 	 * a lock (but the structure may be stale.) The freeing of the
1464 	 * qdisc will be deferred until it's known that there are no
1465 	 * more references to it.
1466 	 *
1467 	 * If the qdisc has an enqueue function, we still need to
1468 	 * hold the queue_lock before calling it, since queue_lock
1469 	 * also serializes access to the device queue.
1470 	 */
1471 
1472 	q = rcu_dereference(dev->qdisc);
1473 #ifdef CONFIG_NET_CLS_ACT
1474 	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1475 #endif
1476 	if (q->enqueue) {
1477 		/* Grab device queue */
1478 		spin_lock(&dev->queue_lock);
1479 		q = dev->qdisc;
1480 		if (q->enqueue) {
1481 			rc = q->enqueue(skb, q);
1482 			qdisc_run(dev);
1483 			spin_unlock(&dev->queue_lock);
1484 
1485 			rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1486 			goto out;
1487 		}
1488 		spin_unlock(&dev->queue_lock);
1489 	}
1490 
1491 	/* The device has no queue. Common case for software devices:
1492 	   loopback, all the sorts of tunnels...
1493 
1494 	   Really, it is unlikely that netif_tx_lock protection is necessary
1495 	   here.  (f.e. loopback and IP tunnels are clean ignoring statistics
1496 	   counters.)
1497 	   However, it is possible, that they rely on protection
1498 	   made by us here.
1499 
1500 	   Check this and shot the lock. It is not prone from deadlocks.
1501 	   Either shot noqueue qdisc, it is even simpler 8)
1502 	 */
1503 	if (dev->flags & IFF_UP) {
1504 		int cpu = smp_processor_id(); /* ok because BHs are off */
1505 
1506 		if (dev->xmit_lock_owner != cpu) {
1507 
1508 			HARD_TX_LOCK(dev, cpu);
1509 
1510 			if (!netif_queue_stopped(dev)) {
1511 				rc = 0;
1512 				if (!dev_hard_start_xmit(skb, dev)) {
1513 					HARD_TX_UNLOCK(dev);
1514 					goto out;
1515 				}
1516 			}
1517 			HARD_TX_UNLOCK(dev);
1518 			if (net_ratelimit())
1519 				printk(KERN_CRIT "Virtual device %s asks to "
1520 				       "queue packet!\n", dev->name);
1521 		} else {
1522 			/* Recursion is detected! It is possible,
1523 			 * unfortunately */
1524 			if (net_ratelimit())
1525 				printk(KERN_CRIT "Dead loop on virtual device "
1526 				       "%s, fix it urgently!\n", dev->name);
1527 		}
1528 	}
1529 
1530 	rc = -ENETDOWN;
1531 	rcu_read_unlock_bh();
1532 
1533 out_kfree_skb:
1534 	kfree_skb(skb);
1535 	return rc;
1536 out:
1537 	rcu_read_unlock_bh();
1538 	return rc;
1539 }
1540 
1541 
1542 /*=======================================================================
1543 			Receiver routines
1544   =======================================================================*/
1545 
1546 int netdev_max_backlog = 1000;
1547 int netdev_budget = 300;
1548 int weight_p = 64;            /* old backlog weight */
1549 
1550 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1551 
1552 
1553 /**
1554  *	netif_rx	-	post buffer to the network code
1555  *	@skb: buffer to post
1556  *
1557  *	This function receives a packet from a device driver and queues it for
1558  *	the upper (protocol) levels to process.  It always succeeds. The buffer
1559  *	may be dropped during processing for congestion control or by the
1560  *	protocol layers.
1561  *
1562  *	return values:
1563  *	NET_RX_SUCCESS	(no congestion)
1564  *	NET_RX_CN_LOW   (low congestion)
1565  *	NET_RX_CN_MOD   (moderate congestion)
1566  *	NET_RX_CN_HIGH  (high congestion)
1567  *	NET_RX_DROP     (packet was dropped)
1568  *
1569  */
1570 
1571 int netif_rx(struct sk_buff *skb)
1572 {
1573 	struct softnet_data *queue;
1574 	unsigned long flags;
1575 
1576 	/* if netpoll wants it, pretend we never saw it */
1577 	if (netpoll_rx(skb))
1578 		return NET_RX_DROP;
1579 
1580 	if (!skb->tstamp.off_sec)
1581 		net_timestamp(skb);
1582 
1583 	/*
1584 	 * The code is rearranged so that the path is the most
1585 	 * short when CPU is congested, but is still operating.
1586 	 */
1587 	local_irq_save(flags);
1588 	queue = &__get_cpu_var(softnet_data);
1589 
1590 	__get_cpu_var(netdev_rx_stat).total++;
1591 	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1592 		if (queue->input_pkt_queue.qlen) {
1593 enqueue:
1594 			dev_hold(skb->dev);
1595 			__skb_queue_tail(&queue->input_pkt_queue, skb);
1596 			local_irq_restore(flags);
1597 			return NET_RX_SUCCESS;
1598 		}
1599 
1600 		netif_rx_schedule(&queue->backlog_dev);
1601 		goto enqueue;
1602 	}
1603 
1604 	__get_cpu_var(netdev_rx_stat).dropped++;
1605 	local_irq_restore(flags);
1606 
1607 	kfree_skb(skb);
1608 	return NET_RX_DROP;
1609 }
1610 
1611 int netif_rx_ni(struct sk_buff *skb)
1612 {
1613 	int err;
1614 
1615 	preempt_disable();
1616 	err = netif_rx(skb);
1617 	if (local_softirq_pending())
1618 		do_softirq();
1619 	preempt_enable();
1620 
1621 	return err;
1622 }
1623 
1624 EXPORT_SYMBOL(netif_rx_ni);
1625 
1626 static inline struct net_device *skb_bond(struct sk_buff *skb)
1627 {
1628 	struct net_device *dev = skb->dev;
1629 
1630 	if (dev->master) {
1631 		if (skb_bond_should_drop(skb)) {
1632 			kfree_skb(skb);
1633 			return NULL;
1634 		}
1635 		skb->dev = dev->master;
1636 	}
1637 
1638 	return dev;
1639 }
1640 
1641 static void net_tx_action(struct softirq_action *h)
1642 {
1643 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
1644 
1645 	if (sd->completion_queue) {
1646 		struct sk_buff *clist;
1647 
1648 		local_irq_disable();
1649 		clist = sd->completion_queue;
1650 		sd->completion_queue = NULL;
1651 		local_irq_enable();
1652 
1653 		while (clist) {
1654 			struct sk_buff *skb = clist;
1655 			clist = clist->next;
1656 
1657 			BUG_TRAP(!atomic_read(&skb->users));
1658 			__kfree_skb(skb);
1659 		}
1660 	}
1661 
1662 	if (sd->output_queue) {
1663 		struct net_device *head;
1664 
1665 		local_irq_disable();
1666 		head = sd->output_queue;
1667 		sd->output_queue = NULL;
1668 		local_irq_enable();
1669 
1670 		while (head) {
1671 			struct net_device *dev = head;
1672 			head = head->next_sched;
1673 
1674 			smp_mb__before_clear_bit();
1675 			clear_bit(__LINK_STATE_SCHED, &dev->state);
1676 
1677 			if (spin_trylock(&dev->queue_lock)) {
1678 				qdisc_run(dev);
1679 				spin_unlock(&dev->queue_lock);
1680 			} else {
1681 				netif_schedule(dev);
1682 			}
1683 		}
1684 	}
1685 }
1686 
1687 static __inline__ int deliver_skb(struct sk_buff *skb,
1688 				  struct packet_type *pt_prev,
1689 				  struct net_device *orig_dev)
1690 {
1691 	atomic_inc(&skb->users);
1692 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1693 }
1694 
1695 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
1696 int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
1697 struct net_bridge;
1698 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1699 						unsigned char *addr);
1700 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
1701 
1702 static __inline__ int handle_bridge(struct sk_buff **pskb,
1703 				    struct packet_type **pt_prev, int *ret,
1704 				    struct net_device *orig_dev)
1705 {
1706 	struct net_bridge_port *port;
1707 
1708 	if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
1709 	    (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
1710 		return 0;
1711 
1712 	if (*pt_prev) {
1713 		*ret = deliver_skb(*pskb, *pt_prev, orig_dev);
1714 		*pt_prev = NULL;
1715 	}
1716 
1717 	return br_handle_frame_hook(port, pskb);
1718 }
1719 #else
1720 #define handle_bridge(skb, pt_prev, ret, orig_dev)	(0)
1721 #endif
1722 
1723 #ifdef CONFIG_NET_CLS_ACT
1724 /* TODO: Maybe we should just force sch_ingress to be compiled in
1725  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1726  * a compare and 2 stores extra right now if we dont have it on
1727  * but have CONFIG_NET_CLS_ACT
1728  * NOTE: This doesnt stop any functionality; if you dont have
1729  * the ingress scheduler, you just cant add policies on ingress.
1730  *
1731  */
1732 static int ing_filter(struct sk_buff *skb)
1733 {
1734 	struct Qdisc *q;
1735 	struct net_device *dev = skb->dev;
1736 	int result = TC_ACT_OK;
1737 
1738 	if (dev->qdisc_ingress) {
1739 		__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
1740 		if (MAX_RED_LOOP < ttl++) {
1741 			printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n",
1742 				skb->iif, skb->dev->ifindex);
1743 			return TC_ACT_SHOT;
1744 		}
1745 
1746 		skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
1747 
1748 		skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
1749 
1750 		spin_lock(&dev->queue_lock);
1751 		if ((q = dev->qdisc_ingress) != NULL)
1752 			result = q->enqueue(skb, q);
1753 		spin_unlock(&dev->queue_lock);
1754 
1755 	}
1756 
1757 	return result;
1758 }
1759 #endif
1760 
1761 int netif_receive_skb(struct sk_buff *skb)
1762 {
1763 	struct packet_type *ptype, *pt_prev;
1764 	struct net_device *orig_dev;
1765 	int ret = NET_RX_DROP;
1766 	__be16 type;
1767 
1768 	/* if we've gotten here through NAPI, check netpoll */
1769 	if (skb->dev->poll && netpoll_rx(skb))
1770 		return NET_RX_DROP;
1771 
1772 	if (!skb->tstamp.off_sec)
1773 		net_timestamp(skb);
1774 
1775 	if (!skb->iif)
1776 		skb->iif = skb->dev->ifindex;
1777 
1778 	orig_dev = skb_bond(skb);
1779 
1780 	if (!orig_dev)
1781 		return NET_RX_DROP;
1782 
1783 	__get_cpu_var(netdev_rx_stat).total++;
1784 
1785 	skb->h.raw = skb->nh.raw = skb->data;
1786 	skb->mac_len = skb->nh.raw - skb->mac.raw;
1787 
1788 	pt_prev = NULL;
1789 
1790 	rcu_read_lock();
1791 
1792 #ifdef CONFIG_NET_CLS_ACT
1793 	if (skb->tc_verd & TC_NCLS) {
1794 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
1795 		goto ncls;
1796 	}
1797 #endif
1798 
1799 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1800 		if (!ptype->dev || ptype->dev == skb->dev) {
1801 			if (pt_prev)
1802 				ret = deliver_skb(skb, pt_prev, orig_dev);
1803 			pt_prev = ptype;
1804 		}
1805 	}
1806 
1807 #ifdef CONFIG_NET_CLS_ACT
1808 	if (pt_prev) {
1809 		ret = deliver_skb(skb, pt_prev, orig_dev);
1810 		pt_prev = NULL; /* noone else should process this after*/
1811 	} else {
1812 		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
1813 	}
1814 
1815 	ret = ing_filter(skb);
1816 
1817 	if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
1818 		kfree_skb(skb);
1819 		goto out;
1820 	}
1821 
1822 	skb->tc_verd = 0;
1823 ncls:
1824 #endif
1825 
1826 	if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
1827 		goto out;
1828 
1829 	type = skb->protocol;
1830 	list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
1831 		if (ptype->type == type &&
1832 		    (!ptype->dev || ptype->dev == skb->dev)) {
1833 			if (pt_prev)
1834 				ret = deliver_skb(skb, pt_prev, orig_dev);
1835 			pt_prev = ptype;
1836 		}
1837 	}
1838 
1839 	if (pt_prev) {
1840 		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1841 	} else {
1842 		kfree_skb(skb);
1843 		/* Jamal, now you will not able to escape explaining
1844 		 * me how you were going to use this. :-)
1845 		 */
1846 		ret = NET_RX_DROP;
1847 	}
1848 
1849 out:
1850 	rcu_read_unlock();
1851 	return ret;
1852 }
1853 
1854 static int process_backlog(struct net_device *backlog_dev, int *budget)
1855 {
1856 	int work = 0;
1857 	int quota = min(backlog_dev->quota, *budget);
1858 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
1859 	unsigned long start_time = jiffies;
1860 
1861 	backlog_dev->weight = weight_p;
1862 	for (;;) {
1863 		struct sk_buff *skb;
1864 		struct net_device *dev;
1865 
1866 		local_irq_disable();
1867 		skb = __skb_dequeue(&queue->input_pkt_queue);
1868 		if (!skb)
1869 			goto job_done;
1870 		local_irq_enable();
1871 
1872 		dev = skb->dev;
1873 
1874 		netif_receive_skb(skb);
1875 
1876 		dev_put(dev);
1877 
1878 		work++;
1879 
1880 		if (work >= quota || jiffies - start_time > 1)
1881 			break;
1882 
1883 	}
1884 
1885 	backlog_dev->quota -= work;
1886 	*budget -= work;
1887 	return -1;
1888 
1889 job_done:
1890 	backlog_dev->quota -= work;
1891 	*budget -= work;
1892 
1893 	list_del(&backlog_dev->poll_list);
1894 	smp_mb__before_clear_bit();
1895 	netif_poll_enable(backlog_dev);
1896 
1897 	local_irq_enable();
1898 	return 0;
1899 }
1900 
1901 static void net_rx_action(struct softirq_action *h)
1902 {
1903 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
1904 	unsigned long start_time = jiffies;
1905 	int budget = netdev_budget;
1906 	void *have;
1907 
1908 	local_irq_disable();
1909 
1910 	while (!list_empty(&queue->poll_list)) {
1911 		struct net_device *dev;
1912 
1913 		if (budget <= 0 || jiffies - start_time > 1)
1914 			goto softnet_break;
1915 
1916 		local_irq_enable();
1917 
1918 		dev = list_entry(queue->poll_list.next,
1919 				 struct net_device, poll_list);
1920 		have = netpoll_poll_lock(dev);
1921 
1922 		if (dev->quota <= 0 || dev->poll(dev, &budget)) {
1923 			netpoll_poll_unlock(have);
1924 			local_irq_disable();
1925 			list_move_tail(&dev->poll_list, &queue->poll_list);
1926 			if (dev->quota < 0)
1927 				dev->quota += dev->weight;
1928 			else
1929 				dev->quota = dev->weight;
1930 		} else {
1931 			netpoll_poll_unlock(have);
1932 			dev_put(dev);
1933 			local_irq_disable();
1934 		}
1935 	}
1936 out:
1937 #ifdef CONFIG_NET_DMA
1938 	/*
1939 	 * There may not be any more sk_buffs coming right now, so push
1940 	 * any pending DMA copies to hardware
1941 	 */
1942 	if (net_dma_client) {
1943 		struct dma_chan *chan;
1944 		rcu_read_lock();
1945 		list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node)
1946 			dma_async_memcpy_issue_pending(chan);
1947 		rcu_read_unlock();
1948 	}
1949 #endif
1950 	local_irq_enable();
1951 	return;
1952 
1953 softnet_break:
1954 	__get_cpu_var(netdev_rx_stat).time_squeeze++;
1955 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
1956 	goto out;
1957 }
1958 
1959 static gifconf_func_t * gifconf_list [NPROTO];
1960 
1961 /**
1962  *	register_gifconf	-	register a SIOCGIF handler
1963  *	@family: Address family
1964  *	@gifconf: Function handler
1965  *
1966  *	Register protocol dependent address dumping routines. The handler
1967  *	that is passed must not be freed or reused until it has been replaced
1968  *	by another handler.
1969  */
1970 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
1971 {
1972 	if (family >= NPROTO)
1973 		return -EINVAL;
1974 	gifconf_list[family] = gifconf;
1975 	return 0;
1976 }
1977 
1978 
1979 /*
1980  *	Map an interface index to its name (SIOCGIFNAME)
1981  */
1982 
1983 /*
1984  *	We need this ioctl for efficient implementation of the
1985  *	if_indextoname() function required by the IPv6 API.  Without
1986  *	it, we would have to search all the interfaces to find a
1987  *	match.  --pb
1988  */
1989 
1990 static int dev_ifname(struct ifreq __user *arg)
1991 {
1992 	struct net_device *dev;
1993 	struct ifreq ifr;
1994 
1995 	/*
1996 	 *	Fetch the caller's info block.
1997 	 */
1998 
1999 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2000 		return -EFAULT;
2001 
2002 	read_lock(&dev_base_lock);
2003 	dev = __dev_get_by_index(ifr.ifr_ifindex);
2004 	if (!dev) {
2005 		read_unlock(&dev_base_lock);
2006 		return -ENODEV;
2007 	}
2008 
2009 	strcpy(ifr.ifr_name, dev->name);
2010 	read_unlock(&dev_base_lock);
2011 
2012 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2013 		return -EFAULT;
2014 	return 0;
2015 }
2016 
2017 /*
2018  *	Perform a SIOCGIFCONF call. This structure will change
2019  *	size eventually, and there is nothing I can do about it.
2020  *	Thus we will need a 'compatibility mode'.
2021  */
2022 
2023 static int dev_ifconf(char __user *arg)
2024 {
2025 	struct ifconf ifc;
2026 	struct net_device *dev;
2027 	char __user *pos;
2028 	int len;
2029 	int total;
2030 	int i;
2031 
2032 	/*
2033 	 *	Fetch the caller's info block.
2034 	 */
2035 
2036 	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2037 		return -EFAULT;
2038 
2039 	pos = ifc.ifc_buf;
2040 	len = ifc.ifc_len;
2041 
2042 	/*
2043 	 *	Loop over the interfaces, and write an info block for each.
2044 	 */
2045 
2046 	total = 0;
2047 	for (dev = dev_base; dev; dev = dev->next) {
2048 		for (i = 0; i < NPROTO; i++) {
2049 			if (gifconf_list[i]) {
2050 				int done;
2051 				if (!pos)
2052 					done = gifconf_list[i](dev, NULL, 0);
2053 				else
2054 					done = gifconf_list[i](dev, pos + total,
2055 							       len - total);
2056 				if (done < 0)
2057 					return -EFAULT;
2058 				total += done;
2059 			}
2060 		}
2061 	}
2062 
2063 	/*
2064 	 *	All done.  Write the updated control block back to the caller.
2065 	 */
2066 	ifc.ifc_len = total;
2067 
2068 	/*
2069 	 * 	Both BSD and Solaris return 0 here, so we do too.
2070 	 */
2071 	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2072 }
2073 
2074 #ifdef CONFIG_PROC_FS
2075 /*
2076  *	This is invoked by the /proc filesystem handler to display a device
2077  *	in detail.
2078  */
2079 static __inline__ struct net_device *dev_get_idx(loff_t pos)
2080 {
2081 	struct net_device *dev;
2082 	loff_t i;
2083 
2084 	for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next);
2085 
2086 	return i == pos ? dev : NULL;
2087 }
2088 
2089 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2090 {
2091 	read_lock(&dev_base_lock);
2092 	return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN;
2093 }
2094 
2095 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2096 {
2097 	++*pos;
2098 	return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next;
2099 }
2100 
2101 void dev_seq_stop(struct seq_file *seq, void *v)
2102 {
2103 	read_unlock(&dev_base_lock);
2104 }
2105 
2106 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2107 {
2108 	if (dev->get_stats) {
2109 		struct net_device_stats *stats = dev->get_stats(dev);
2110 
2111 		seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2112 				"%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2113 			   dev->name, stats->rx_bytes, stats->rx_packets,
2114 			   stats->rx_errors,
2115 			   stats->rx_dropped + stats->rx_missed_errors,
2116 			   stats->rx_fifo_errors,
2117 			   stats->rx_length_errors + stats->rx_over_errors +
2118 			     stats->rx_crc_errors + stats->rx_frame_errors,
2119 			   stats->rx_compressed, stats->multicast,
2120 			   stats->tx_bytes, stats->tx_packets,
2121 			   stats->tx_errors, stats->tx_dropped,
2122 			   stats->tx_fifo_errors, stats->collisions,
2123 			   stats->tx_carrier_errors +
2124 			     stats->tx_aborted_errors +
2125 			     stats->tx_window_errors +
2126 			     stats->tx_heartbeat_errors,
2127 			   stats->tx_compressed);
2128 	} else
2129 		seq_printf(seq, "%6s: No statistics available.\n", dev->name);
2130 }
2131 
2132 /*
2133  *	Called from the PROCfs module. This now uses the new arbitrary sized
2134  *	/proc/net interface to create /proc/net/dev
2135  */
2136 static int dev_seq_show(struct seq_file *seq, void *v)
2137 {
2138 	if (v == SEQ_START_TOKEN)
2139 		seq_puts(seq, "Inter-|   Receive                            "
2140 			      "                    |  Transmit\n"
2141 			      " face |bytes    packets errs drop fifo frame "
2142 			      "compressed multicast|bytes    packets errs "
2143 			      "drop fifo colls carrier compressed\n");
2144 	else
2145 		dev_seq_printf_stats(seq, v);
2146 	return 0;
2147 }
2148 
2149 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2150 {
2151 	struct netif_rx_stats *rc = NULL;
2152 
2153 	while (*pos < NR_CPUS)
2154 		if (cpu_online(*pos)) {
2155 			rc = &per_cpu(netdev_rx_stat, *pos);
2156 			break;
2157 		} else
2158 			++*pos;
2159 	return rc;
2160 }
2161 
2162 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2163 {
2164 	return softnet_get_online(pos);
2165 }
2166 
2167 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2168 {
2169 	++*pos;
2170 	return softnet_get_online(pos);
2171 }
2172 
2173 static void softnet_seq_stop(struct seq_file *seq, void *v)
2174 {
2175 }
2176 
2177 static int softnet_seq_show(struct seq_file *seq, void *v)
2178 {
2179 	struct netif_rx_stats *s = v;
2180 
2181 	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2182 		   s->total, s->dropped, s->time_squeeze, 0,
2183 		   0, 0, 0, 0, /* was fastroute */
2184 		   s->cpu_collision );
2185 	return 0;
2186 }
2187 
2188 static struct seq_operations dev_seq_ops = {
2189 	.start = dev_seq_start,
2190 	.next  = dev_seq_next,
2191 	.stop  = dev_seq_stop,
2192 	.show  = dev_seq_show,
2193 };
2194 
2195 static int dev_seq_open(struct inode *inode, struct file *file)
2196 {
2197 	return seq_open(file, &dev_seq_ops);
2198 }
2199 
2200 static const struct file_operations dev_seq_fops = {
2201 	.owner	 = THIS_MODULE,
2202 	.open    = dev_seq_open,
2203 	.read    = seq_read,
2204 	.llseek  = seq_lseek,
2205 	.release = seq_release,
2206 };
2207 
2208 static struct seq_operations softnet_seq_ops = {
2209 	.start = softnet_seq_start,
2210 	.next  = softnet_seq_next,
2211 	.stop  = softnet_seq_stop,
2212 	.show  = softnet_seq_show,
2213 };
2214 
2215 static int softnet_seq_open(struct inode *inode, struct file *file)
2216 {
2217 	return seq_open(file, &softnet_seq_ops);
2218 }
2219 
2220 static const struct file_operations softnet_seq_fops = {
2221 	.owner	 = THIS_MODULE,
2222 	.open    = softnet_seq_open,
2223 	.read    = seq_read,
2224 	.llseek  = seq_lseek,
2225 	.release = seq_release,
2226 };
2227 
2228 #ifdef CONFIG_WIRELESS_EXT
2229 extern int wireless_proc_init(void);
2230 #else
2231 #define wireless_proc_init() 0
2232 #endif
2233 
2234 static int __init dev_proc_init(void)
2235 {
2236 	int rc = -ENOMEM;
2237 
2238 	if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
2239 		goto out;
2240 	if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
2241 		goto out_dev;
2242 	if (wireless_proc_init())
2243 		goto out_softnet;
2244 	rc = 0;
2245 out:
2246 	return rc;
2247 out_softnet:
2248 	proc_net_remove("softnet_stat");
2249 out_dev:
2250 	proc_net_remove("dev");
2251 	goto out;
2252 }
2253 #else
2254 #define dev_proc_init() 0
2255 #endif	/* CONFIG_PROC_FS */
2256 
2257 
2258 /**
2259  *	netdev_set_master	-	set up master/slave pair
2260  *	@slave: slave device
2261  *	@master: new master device
2262  *
2263  *	Changes the master device of the slave. Pass %NULL to break the
2264  *	bonding. The caller must hold the RTNL semaphore. On a failure
2265  *	a negative errno code is returned. On success the reference counts
2266  *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2267  *	function returns zero.
2268  */
2269 int netdev_set_master(struct net_device *slave, struct net_device *master)
2270 {
2271 	struct net_device *old = slave->master;
2272 
2273 	ASSERT_RTNL();
2274 
2275 	if (master) {
2276 		if (old)
2277 			return -EBUSY;
2278 		dev_hold(master);
2279 	}
2280 
2281 	slave->master = master;
2282 
2283 	synchronize_net();
2284 
2285 	if (old)
2286 		dev_put(old);
2287 
2288 	if (master)
2289 		slave->flags |= IFF_SLAVE;
2290 	else
2291 		slave->flags &= ~IFF_SLAVE;
2292 
2293 	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2294 	return 0;
2295 }
2296 
2297 /**
2298  *	dev_set_promiscuity	- update promiscuity count on a device
2299  *	@dev: device
2300  *	@inc: modifier
2301  *
2302  *	Add or remove promiscuity from a device. While the count in the device
2303  *	remains above zero the interface remains promiscuous. Once it hits zero
2304  *	the device reverts back to normal filtering operation. A negative inc
2305  *	value is used to drop promiscuity on the device.
2306  */
2307 void dev_set_promiscuity(struct net_device *dev, int inc)
2308 {
2309 	unsigned short old_flags = dev->flags;
2310 
2311 	if ((dev->promiscuity += inc) == 0)
2312 		dev->flags &= ~IFF_PROMISC;
2313 	else
2314 		dev->flags |= IFF_PROMISC;
2315 	if (dev->flags != old_flags) {
2316 		dev_mc_upload(dev);
2317 		printk(KERN_INFO "device %s %s promiscuous mode\n",
2318 		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2319 							       "left");
2320 		audit_log(current->audit_context, GFP_ATOMIC,
2321 			AUDIT_ANOM_PROMISCUOUS,
2322 			"dev=%s prom=%d old_prom=%d auid=%u",
2323 			dev->name, (dev->flags & IFF_PROMISC),
2324 			(old_flags & IFF_PROMISC),
2325 			audit_get_loginuid(current->audit_context));
2326 	}
2327 }
2328 
2329 /**
2330  *	dev_set_allmulti	- update allmulti count on a device
2331  *	@dev: device
2332  *	@inc: modifier
2333  *
2334  *	Add or remove reception of all multicast frames to a device. While the
2335  *	count in the device remains above zero the interface remains listening
2336  *	to all interfaces. Once it hits zero the device reverts back to normal
2337  *	filtering operation. A negative @inc value is used to drop the counter
2338  *	when releasing a resource needing all multicasts.
2339  */
2340 
2341 void dev_set_allmulti(struct net_device *dev, int inc)
2342 {
2343 	unsigned short old_flags = dev->flags;
2344 
2345 	dev->flags |= IFF_ALLMULTI;
2346 	if ((dev->allmulti += inc) == 0)
2347 		dev->flags &= ~IFF_ALLMULTI;
2348 	if (dev->flags ^ old_flags)
2349 		dev_mc_upload(dev);
2350 }
2351 
2352 unsigned dev_get_flags(const struct net_device *dev)
2353 {
2354 	unsigned flags;
2355 
2356 	flags = (dev->flags & ~(IFF_PROMISC |
2357 				IFF_ALLMULTI |
2358 				IFF_RUNNING |
2359 				IFF_LOWER_UP |
2360 				IFF_DORMANT)) |
2361 		(dev->gflags & (IFF_PROMISC |
2362 				IFF_ALLMULTI));
2363 
2364 	if (netif_running(dev)) {
2365 		if (netif_oper_up(dev))
2366 			flags |= IFF_RUNNING;
2367 		if (netif_carrier_ok(dev))
2368 			flags |= IFF_LOWER_UP;
2369 		if (netif_dormant(dev))
2370 			flags |= IFF_DORMANT;
2371 	}
2372 
2373 	return flags;
2374 }
2375 
2376 int dev_change_flags(struct net_device *dev, unsigned flags)
2377 {
2378 	int ret;
2379 	int old_flags = dev->flags;
2380 
2381 	/*
2382 	 *	Set the flags on our device.
2383 	 */
2384 
2385 	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
2386 			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
2387 			       IFF_AUTOMEDIA)) |
2388 		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
2389 				    IFF_ALLMULTI));
2390 
2391 	/*
2392 	 *	Load in the correct multicast list now the flags have changed.
2393 	 */
2394 
2395 	dev_mc_upload(dev);
2396 
2397 	/*
2398 	 *	Have we downed the interface. We handle IFF_UP ourselves
2399 	 *	according to user attempts to set it, rather than blindly
2400 	 *	setting it.
2401 	 */
2402 
2403 	ret = 0;
2404 	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
2405 		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
2406 
2407 		if (!ret)
2408 			dev_mc_upload(dev);
2409 	}
2410 
2411 	if (dev->flags & IFF_UP &&
2412 	    ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
2413 					  IFF_VOLATILE)))
2414 		raw_notifier_call_chain(&netdev_chain,
2415 				NETDEV_CHANGE, dev);
2416 
2417 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
2418 		int inc = (flags & IFF_PROMISC) ? +1 : -1;
2419 		dev->gflags ^= IFF_PROMISC;
2420 		dev_set_promiscuity(dev, inc);
2421 	}
2422 
2423 	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
2424 	   is important. Some (broken) drivers set IFF_PROMISC, when
2425 	   IFF_ALLMULTI is requested not asking us and not reporting.
2426 	 */
2427 	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
2428 		int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
2429 		dev->gflags ^= IFF_ALLMULTI;
2430 		dev_set_allmulti(dev, inc);
2431 	}
2432 
2433 	if (old_flags ^ dev->flags)
2434 		rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags);
2435 
2436 	return ret;
2437 }
2438 
2439 int dev_set_mtu(struct net_device *dev, int new_mtu)
2440 {
2441 	int err;
2442 
2443 	if (new_mtu == dev->mtu)
2444 		return 0;
2445 
2446 	/*	MTU must be positive.	 */
2447 	if (new_mtu < 0)
2448 		return -EINVAL;
2449 
2450 	if (!netif_device_present(dev))
2451 		return -ENODEV;
2452 
2453 	err = 0;
2454 	if (dev->change_mtu)
2455 		err = dev->change_mtu(dev, new_mtu);
2456 	else
2457 		dev->mtu = new_mtu;
2458 	if (!err && dev->flags & IFF_UP)
2459 		raw_notifier_call_chain(&netdev_chain,
2460 				NETDEV_CHANGEMTU, dev);
2461 	return err;
2462 }
2463 
2464 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
2465 {
2466 	int err;
2467 
2468 	if (!dev->set_mac_address)
2469 		return -EOPNOTSUPP;
2470 	if (sa->sa_family != dev->type)
2471 		return -EINVAL;
2472 	if (!netif_device_present(dev))
2473 		return -ENODEV;
2474 	err = dev->set_mac_address(dev, sa);
2475 	if (!err)
2476 		raw_notifier_call_chain(&netdev_chain,
2477 				NETDEV_CHANGEADDR, dev);
2478 	return err;
2479 }
2480 
2481 /*
2482  *	Perform the SIOCxIFxxx calls.
2483  */
2484 static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
2485 {
2486 	int err;
2487 	struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
2488 
2489 	if (!dev)
2490 		return -ENODEV;
2491 
2492 	switch (cmd) {
2493 		case SIOCGIFFLAGS:	/* Get interface flags */
2494 			ifr->ifr_flags = dev_get_flags(dev);
2495 			return 0;
2496 
2497 		case SIOCSIFFLAGS:	/* Set interface flags */
2498 			return dev_change_flags(dev, ifr->ifr_flags);
2499 
2500 		case SIOCGIFMETRIC:	/* Get the metric on the interface
2501 					   (currently unused) */
2502 			ifr->ifr_metric = 0;
2503 			return 0;
2504 
2505 		case SIOCSIFMETRIC:	/* Set the metric on the interface
2506 					   (currently unused) */
2507 			return -EOPNOTSUPP;
2508 
2509 		case SIOCGIFMTU:	/* Get the MTU of a device */
2510 			ifr->ifr_mtu = dev->mtu;
2511 			return 0;
2512 
2513 		case SIOCSIFMTU:	/* Set the MTU of a device */
2514 			return dev_set_mtu(dev, ifr->ifr_mtu);
2515 
2516 		case SIOCGIFHWADDR:
2517 			if (!dev->addr_len)
2518 				memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
2519 			else
2520 				memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
2521 				       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2522 			ifr->ifr_hwaddr.sa_family = dev->type;
2523 			return 0;
2524 
2525 		case SIOCSIFHWADDR:
2526 			return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
2527 
2528 		case SIOCSIFHWBROADCAST:
2529 			if (ifr->ifr_hwaddr.sa_family != dev->type)
2530 				return -EINVAL;
2531 			memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
2532 			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2533 			raw_notifier_call_chain(&netdev_chain,
2534 					    NETDEV_CHANGEADDR, dev);
2535 			return 0;
2536 
2537 		case SIOCGIFMAP:
2538 			ifr->ifr_map.mem_start = dev->mem_start;
2539 			ifr->ifr_map.mem_end   = dev->mem_end;
2540 			ifr->ifr_map.base_addr = dev->base_addr;
2541 			ifr->ifr_map.irq       = dev->irq;
2542 			ifr->ifr_map.dma       = dev->dma;
2543 			ifr->ifr_map.port      = dev->if_port;
2544 			return 0;
2545 
2546 		case SIOCSIFMAP:
2547 			if (dev->set_config) {
2548 				if (!netif_device_present(dev))
2549 					return -ENODEV;
2550 				return dev->set_config(dev, &ifr->ifr_map);
2551 			}
2552 			return -EOPNOTSUPP;
2553 
2554 		case SIOCADDMULTI:
2555 			if (!dev->set_multicast_list ||
2556 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2557 				return -EINVAL;
2558 			if (!netif_device_present(dev))
2559 				return -ENODEV;
2560 			return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
2561 					  dev->addr_len, 1);
2562 
2563 		case SIOCDELMULTI:
2564 			if (!dev->set_multicast_list ||
2565 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2566 				return -EINVAL;
2567 			if (!netif_device_present(dev))
2568 				return -ENODEV;
2569 			return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
2570 					     dev->addr_len, 1);
2571 
2572 		case SIOCGIFINDEX:
2573 			ifr->ifr_ifindex = dev->ifindex;
2574 			return 0;
2575 
2576 		case SIOCGIFTXQLEN:
2577 			ifr->ifr_qlen = dev->tx_queue_len;
2578 			return 0;
2579 
2580 		case SIOCSIFTXQLEN:
2581 			if (ifr->ifr_qlen < 0)
2582 				return -EINVAL;
2583 			dev->tx_queue_len = ifr->ifr_qlen;
2584 			return 0;
2585 
2586 		case SIOCSIFNAME:
2587 			ifr->ifr_newname[IFNAMSIZ-1] = '\0';
2588 			return dev_change_name(dev, ifr->ifr_newname);
2589 
2590 		/*
2591 		 *	Unknown or private ioctl
2592 		 */
2593 
2594 		default:
2595 			if ((cmd >= SIOCDEVPRIVATE &&
2596 			    cmd <= SIOCDEVPRIVATE + 15) ||
2597 			    cmd == SIOCBONDENSLAVE ||
2598 			    cmd == SIOCBONDRELEASE ||
2599 			    cmd == SIOCBONDSETHWADDR ||
2600 			    cmd == SIOCBONDSLAVEINFOQUERY ||
2601 			    cmd == SIOCBONDINFOQUERY ||
2602 			    cmd == SIOCBONDCHANGEACTIVE ||
2603 			    cmd == SIOCGMIIPHY ||
2604 			    cmd == SIOCGMIIREG ||
2605 			    cmd == SIOCSMIIREG ||
2606 			    cmd == SIOCBRADDIF ||
2607 			    cmd == SIOCBRDELIF ||
2608 			    cmd == SIOCWANDEV) {
2609 				err = -EOPNOTSUPP;
2610 				if (dev->do_ioctl) {
2611 					if (netif_device_present(dev))
2612 						err = dev->do_ioctl(dev, ifr,
2613 								    cmd);
2614 					else
2615 						err = -ENODEV;
2616 				}
2617 			} else
2618 				err = -EINVAL;
2619 
2620 	}
2621 	return err;
2622 }
2623 
2624 /*
2625  *	This function handles all "interface"-type I/O control requests. The actual
2626  *	'doing' part of this is dev_ifsioc above.
2627  */
2628 
2629 /**
2630  *	dev_ioctl	-	network device ioctl
2631  *	@cmd: command to issue
2632  *	@arg: pointer to a struct ifreq in user space
2633  *
2634  *	Issue ioctl functions to devices. This is normally called by the
2635  *	user space syscall interfaces but can sometimes be useful for
2636  *	other purposes. The return value is the return from the syscall if
2637  *	positive or a negative errno code on error.
2638  */
2639 
2640 int dev_ioctl(unsigned int cmd, void __user *arg)
2641 {
2642 	struct ifreq ifr;
2643 	int ret;
2644 	char *colon;
2645 
2646 	/* One special case: SIOCGIFCONF takes ifconf argument
2647 	   and requires shared lock, because it sleeps writing
2648 	   to user space.
2649 	 */
2650 
2651 	if (cmd == SIOCGIFCONF) {
2652 		rtnl_lock();
2653 		ret = dev_ifconf((char __user *) arg);
2654 		rtnl_unlock();
2655 		return ret;
2656 	}
2657 	if (cmd == SIOCGIFNAME)
2658 		return dev_ifname((struct ifreq __user *)arg);
2659 
2660 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2661 		return -EFAULT;
2662 
2663 	ifr.ifr_name[IFNAMSIZ-1] = 0;
2664 
2665 	colon = strchr(ifr.ifr_name, ':');
2666 	if (colon)
2667 		*colon = 0;
2668 
2669 	/*
2670 	 *	See which interface the caller is talking about.
2671 	 */
2672 
2673 	switch (cmd) {
2674 		/*
2675 		 *	These ioctl calls:
2676 		 *	- can be done by all.
2677 		 *	- atomic and do not require locking.
2678 		 *	- return a value
2679 		 */
2680 		case SIOCGIFFLAGS:
2681 		case SIOCGIFMETRIC:
2682 		case SIOCGIFMTU:
2683 		case SIOCGIFHWADDR:
2684 		case SIOCGIFSLAVE:
2685 		case SIOCGIFMAP:
2686 		case SIOCGIFINDEX:
2687 		case SIOCGIFTXQLEN:
2688 			dev_load(ifr.ifr_name);
2689 			read_lock(&dev_base_lock);
2690 			ret = dev_ifsioc(&ifr, cmd);
2691 			read_unlock(&dev_base_lock);
2692 			if (!ret) {
2693 				if (colon)
2694 					*colon = ':';
2695 				if (copy_to_user(arg, &ifr,
2696 						 sizeof(struct ifreq)))
2697 					ret = -EFAULT;
2698 			}
2699 			return ret;
2700 
2701 		case SIOCETHTOOL:
2702 			dev_load(ifr.ifr_name);
2703 			rtnl_lock();
2704 			ret = dev_ethtool(&ifr);
2705 			rtnl_unlock();
2706 			if (!ret) {
2707 				if (colon)
2708 					*colon = ':';
2709 				if (copy_to_user(arg, &ifr,
2710 						 sizeof(struct ifreq)))
2711 					ret = -EFAULT;
2712 			}
2713 			return ret;
2714 
2715 		/*
2716 		 *	These ioctl calls:
2717 		 *	- require superuser power.
2718 		 *	- require strict serialization.
2719 		 *	- return a value
2720 		 */
2721 		case SIOCGMIIPHY:
2722 		case SIOCGMIIREG:
2723 		case SIOCSIFNAME:
2724 			if (!capable(CAP_NET_ADMIN))
2725 				return -EPERM;
2726 			dev_load(ifr.ifr_name);
2727 			rtnl_lock();
2728 			ret = dev_ifsioc(&ifr, cmd);
2729 			rtnl_unlock();
2730 			if (!ret) {
2731 				if (colon)
2732 					*colon = ':';
2733 				if (copy_to_user(arg, &ifr,
2734 						 sizeof(struct ifreq)))
2735 					ret = -EFAULT;
2736 			}
2737 			return ret;
2738 
2739 		/*
2740 		 *	These ioctl calls:
2741 		 *	- require superuser power.
2742 		 *	- require strict serialization.
2743 		 *	- do not return a value
2744 		 */
2745 		case SIOCSIFFLAGS:
2746 		case SIOCSIFMETRIC:
2747 		case SIOCSIFMTU:
2748 		case SIOCSIFMAP:
2749 		case SIOCSIFHWADDR:
2750 		case SIOCSIFSLAVE:
2751 		case SIOCADDMULTI:
2752 		case SIOCDELMULTI:
2753 		case SIOCSIFHWBROADCAST:
2754 		case SIOCSIFTXQLEN:
2755 		case SIOCSMIIREG:
2756 		case SIOCBONDENSLAVE:
2757 		case SIOCBONDRELEASE:
2758 		case SIOCBONDSETHWADDR:
2759 		case SIOCBONDCHANGEACTIVE:
2760 		case SIOCBRADDIF:
2761 		case SIOCBRDELIF:
2762 			if (!capable(CAP_NET_ADMIN))
2763 				return -EPERM;
2764 			/* fall through */
2765 		case SIOCBONDSLAVEINFOQUERY:
2766 		case SIOCBONDINFOQUERY:
2767 			dev_load(ifr.ifr_name);
2768 			rtnl_lock();
2769 			ret = dev_ifsioc(&ifr, cmd);
2770 			rtnl_unlock();
2771 			return ret;
2772 
2773 		case SIOCGIFMEM:
2774 			/* Get the per device memory space. We can add this but
2775 			 * currently do not support it */
2776 		case SIOCSIFMEM:
2777 			/* Set the per device memory buffer space.
2778 			 * Not applicable in our case */
2779 		case SIOCSIFLINK:
2780 			return -EINVAL;
2781 
2782 		/*
2783 		 *	Unknown or private ioctl.
2784 		 */
2785 		default:
2786 			if (cmd == SIOCWANDEV ||
2787 			    (cmd >= SIOCDEVPRIVATE &&
2788 			     cmd <= SIOCDEVPRIVATE + 15)) {
2789 				dev_load(ifr.ifr_name);
2790 				rtnl_lock();
2791 				ret = dev_ifsioc(&ifr, cmd);
2792 				rtnl_unlock();
2793 				if (!ret && copy_to_user(arg, &ifr,
2794 							 sizeof(struct ifreq)))
2795 					ret = -EFAULT;
2796 				return ret;
2797 			}
2798 #ifdef CONFIG_WIRELESS_EXT
2799 			/* Take care of Wireless Extensions */
2800 			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
2801 				/* If command is `set a parameter', or
2802 				 * `get the encoding parameters', check if
2803 				 * the user has the right to do it */
2804 				if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE
2805 				    || cmd == SIOCGIWENCODEEXT) {
2806 					if (!capable(CAP_NET_ADMIN))
2807 						return -EPERM;
2808 				}
2809 				dev_load(ifr.ifr_name);
2810 				rtnl_lock();
2811 				/* Follow me in net/core/wireless.c */
2812 				ret = wireless_process_ioctl(&ifr, cmd);
2813 				rtnl_unlock();
2814 				if (IW_IS_GET(cmd) &&
2815 				    copy_to_user(arg, &ifr,
2816 						 sizeof(struct ifreq)))
2817 					ret = -EFAULT;
2818 				return ret;
2819 			}
2820 #endif	/* CONFIG_WIRELESS_EXT */
2821 			return -EINVAL;
2822 	}
2823 }
2824 
2825 
2826 /**
2827  *	dev_new_index	-	allocate an ifindex
2828  *
2829  *	Returns a suitable unique value for a new device interface
2830  *	number.  The caller must hold the rtnl semaphore or the
2831  *	dev_base_lock to be sure it remains unique.
2832  */
2833 static int dev_new_index(void)
2834 {
2835 	static int ifindex;
2836 	for (;;) {
2837 		if (++ifindex <= 0)
2838 			ifindex = 1;
2839 		if (!__dev_get_by_index(ifindex))
2840 			return ifindex;
2841 	}
2842 }
2843 
2844 static int dev_boot_phase = 1;
2845 
2846 /* Delayed registration/unregisteration */
2847 static DEFINE_SPINLOCK(net_todo_list_lock);
2848 static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
2849 
2850 static inline void net_set_todo(struct net_device *dev)
2851 {
2852 	spin_lock(&net_todo_list_lock);
2853 	list_add_tail(&dev->todo_list, &net_todo_list);
2854 	spin_unlock(&net_todo_list_lock);
2855 }
2856 
2857 /**
2858  *	register_netdevice	- register a network device
2859  *	@dev: device to register
2860  *
2861  *	Take a completed network device structure and add it to the kernel
2862  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2863  *	chain. 0 is returned on success. A negative errno code is returned
2864  *	on a failure to set up the device, or if the name is a duplicate.
2865  *
2866  *	Callers must hold the rtnl semaphore. You may want
2867  *	register_netdev() instead of this.
2868  *
2869  *	BUGS:
2870  *	The locking appears insufficient to guarantee two parallel registers
2871  *	will not get the same name.
2872  */
2873 
2874 int register_netdevice(struct net_device *dev)
2875 {
2876 	struct hlist_head *head;
2877 	struct hlist_node *p;
2878 	int ret;
2879 
2880 	BUG_ON(dev_boot_phase);
2881 	ASSERT_RTNL();
2882 
2883 	might_sleep();
2884 
2885 	/* When net_device's are persistent, this will be fatal. */
2886 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
2887 
2888 	spin_lock_init(&dev->queue_lock);
2889 	spin_lock_init(&dev->_xmit_lock);
2890 	dev->xmit_lock_owner = -1;
2891 #ifdef CONFIG_NET_CLS_ACT
2892 	spin_lock_init(&dev->ingress_lock);
2893 #endif
2894 
2895 	dev->iflink = -1;
2896 
2897 	/* Init, if this function is available */
2898 	if (dev->init) {
2899 		ret = dev->init(dev);
2900 		if (ret) {
2901 			if (ret > 0)
2902 				ret = -EIO;
2903 			goto out;
2904 		}
2905 	}
2906 
2907 	if (!dev_valid_name(dev->name)) {
2908 		ret = -EINVAL;
2909 		goto out;
2910 	}
2911 
2912 	dev->ifindex = dev_new_index();
2913 	if (dev->iflink == -1)
2914 		dev->iflink = dev->ifindex;
2915 
2916 	/* Check for existence of name */
2917 	head = dev_name_hash(dev->name);
2918 	hlist_for_each(p, head) {
2919 		struct net_device *d
2920 			= hlist_entry(p, struct net_device, name_hlist);
2921 		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
2922 			ret = -EEXIST;
2923 			goto out;
2924 		}
2925 	}
2926 
2927 	/* Fix illegal SG+CSUM combinations. */
2928 	if ((dev->features & NETIF_F_SG) &&
2929 	    !(dev->features & NETIF_F_ALL_CSUM)) {
2930 		printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n",
2931 		       dev->name);
2932 		dev->features &= ~NETIF_F_SG;
2933 	}
2934 
2935 	/* TSO requires that SG is present as well. */
2936 	if ((dev->features & NETIF_F_TSO) &&
2937 	    !(dev->features & NETIF_F_SG)) {
2938 		printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n",
2939 		       dev->name);
2940 		dev->features &= ~NETIF_F_TSO;
2941 	}
2942 	if (dev->features & NETIF_F_UFO) {
2943 		if (!(dev->features & NETIF_F_HW_CSUM)) {
2944 			printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
2945 					"NETIF_F_HW_CSUM feature.\n",
2946 							dev->name);
2947 			dev->features &= ~NETIF_F_UFO;
2948 		}
2949 		if (!(dev->features & NETIF_F_SG)) {
2950 			printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
2951 					"NETIF_F_SG feature.\n",
2952 					dev->name);
2953 			dev->features &= ~NETIF_F_UFO;
2954 		}
2955 	}
2956 
2957 	/*
2958 	 *	nil rebuild_header routine,
2959 	 *	that should be never called and used as just bug trap.
2960 	 */
2961 
2962 	if (!dev->rebuild_header)
2963 		dev->rebuild_header = default_rebuild_header;
2964 
2965 	ret = netdev_register_sysfs(dev);
2966 	if (ret)
2967 		goto out;
2968 	dev->reg_state = NETREG_REGISTERED;
2969 
2970 	/*
2971 	 *	Default initial state at registry is that the
2972 	 *	device is present.
2973 	 */
2974 
2975 	set_bit(__LINK_STATE_PRESENT, &dev->state);
2976 
2977 	dev->next = NULL;
2978 	dev_init_scheduler(dev);
2979 	write_lock_bh(&dev_base_lock);
2980 	*dev_tail = dev;
2981 	dev_tail = &dev->next;
2982 	hlist_add_head(&dev->name_hlist, head);
2983 	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
2984 	dev_hold(dev);
2985 	write_unlock_bh(&dev_base_lock);
2986 
2987 	/* Notify protocols, that a new device appeared. */
2988 	raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
2989 
2990 	ret = 0;
2991 
2992 out:
2993 	return ret;
2994 }
2995 
2996 /**
2997  *	register_netdev	- register a network device
2998  *	@dev: device to register
2999  *
3000  *	Take a completed network device structure and add it to the kernel
3001  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3002  *	chain. 0 is returned on success. A negative errno code is returned
3003  *	on a failure to set up the device, or if the name is a duplicate.
3004  *
3005  *	This is a wrapper around register_netdev that takes the rtnl semaphore
3006  *	and expands the device name if you passed a format string to
3007  *	alloc_netdev.
3008  */
3009 int register_netdev(struct net_device *dev)
3010 {
3011 	int err;
3012 
3013 	rtnl_lock();
3014 
3015 	/*
3016 	 * If the name is a format string the caller wants us to do a
3017 	 * name allocation.
3018 	 */
3019 	if (strchr(dev->name, '%')) {
3020 		err = dev_alloc_name(dev, dev->name);
3021 		if (err < 0)
3022 			goto out;
3023 	}
3024 
3025 	err = register_netdevice(dev);
3026 out:
3027 	rtnl_unlock();
3028 	return err;
3029 }
3030 EXPORT_SYMBOL(register_netdev);
3031 
3032 /*
3033  * netdev_wait_allrefs - wait until all references are gone.
3034  *
3035  * This is called when unregistering network devices.
3036  *
3037  * Any protocol or device that holds a reference should register
3038  * for netdevice notification, and cleanup and put back the
3039  * reference if they receive an UNREGISTER event.
3040  * We can get stuck here if buggy protocols don't correctly
3041  * call dev_put.
3042  */
3043 static void netdev_wait_allrefs(struct net_device *dev)
3044 {
3045 	unsigned long rebroadcast_time, warning_time;
3046 
3047 	rebroadcast_time = warning_time = jiffies;
3048 	while (atomic_read(&dev->refcnt) != 0) {
3049 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
3050 			rtnl_lock();
3051 
3052 			/* Rebroadcast unregister notification */
3053 			raw_notifier_call_chain(&netdev_chain,
3054 					    NETDEV_UNREGISTER, dev);
3055 
3056 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
3057 				     &dev->state)) {
3058 				/* We must not have linkwatch events
3059 				 * pending on unregister. If this
3060 				 * happens, we simply run the queue
3061 				 * unscheduled, resulting in a noop
3062 				 * for this device.
3063 				 */
3064 				linkwatch_run_queue();
3065 			}
3066 
3067 			__rtnl_unlock();
3068 
3069 			rebroadcast_time = jiffies;
3070 		}
3071 
3072 		msleep(250);
3073 
3074 		if (time_after(jiffies, warning_time + 10 * HZ)) {
3075 			printk(KERN_EMERG "unregister_netdevice: "
3076 			       "waiting for %s to become free. Usage "
3077 			       "count = %d\n",
3078 			       dev->name, atomic_read(&dev->refcnt));
3079 			warning_time = jiffies;
3080 		}
3081 	}
3082 }
3083 
3084 /* The sequence is:
3085  *
3086  *	rtnl_lock();
3087  *	...
3088  *	register_netdevice(x1);
3089  *	register_netdevice(x2);
3090  *	...
3091  *	unregister_netdevice(y1);
3092  *	unregister_netdevice(y2);
3093  *      ...
3094  *	rtnl_unlock();
3095  *	free_netdev(y1);
3096  *	free_netdev(y2);
3097  *
3098  * We are invoked by rtnl_unlock() after it drops the semaphore.
3099  * This allows us to deal with problems:
3100  * 1) We can delete sysfs objects which invoke hotplug
3101  *    without deadlocking with linkwatch via keventd.
3102  * 2) Since we run with the RTNL semaphore not held, we can sleep
3103  *    safely in order to wait for the netdev refcnt to drop to zero.
3104  */
3105 static DEFINE_MUTEX(net_todo_run_mutex);
3106 void netdev_run_todo(void)
3107 {
3108 	struct list_head list;
3109 
3110 	/* Need to guard against multiple cpu's getting out of order. */
3111 	mutex_lock(&net_todo_run_mutex);
3112 
3113 	/* Not safe to do outside the semaphore.  We must not return
3114 	 * until all unregister events invoked by the local processor
3115 	 * have been completed (either by this todo run, or one on
3116 	 * another cpu).
3117 	 */
3118 	if (list_empty(&net_todo_list))
3119 		goto out;
3120 
3121 	/* Snapshot list, allow later requests */
3122 	spin_lock(&net_todo_list_lock);
3123 	list_replace_init(&net_todo_list, &list);
3124 	spin_unlock(&net_todo_list_lock);
3125 
3126 	while (!list_empty(&list)) {
3127 		struct net_device *dev
3128 			= list_entry(list.next, struct net_device, todo_list);
3129 		list_del(&dev->todo_list);
3130 
3131 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
3132 			printk(KERN_ERR "network todo '%s' but state %d\n",
3133 			       dev->name, dev->reg_state);
3134 			dump_stack();
3135 			continue;
3136 		}
3137 
3138 		netdev_unregister_sysfs(dev);
3139 		dev->reg_state = NETREG_UNREGISTERED;
3140 
3141 		netdev_wait_allrefs(dev);
3142 
3143 		/* paranoia */
3144 		BUG_ON(atomic_read(&dev->refcnt));
3145 		BUG_TRAP(!dev->ip_ptr);
3146 		BUG_TRAP(!dev->ip6_ptr);
3147 		BUG_TRAP(!dev->dn_ptr);
3148 
3149 		/* It must be the very last action,
3150 		 * after this 'dev' may point to freed up memory.
3151 		 */
3152 		if (dev->destructor)
3153 			dev->destructor(dev);
3154 	}
3155 
3156 out:
3157 	mutex_unlock(&net_todo_run_mutex);
3158 }
3159 
3160 /**
3161  *	alloc_netdev - allocate network device
3162  *	@sizeof_priv:	size of private data to allocate space for
3163  *	@name:		device name format string
3164  *	@setup:		callback to initialize device
3165  *
3166  *	Allocates a struct net_device with private data area for driver use
3167  *	and performs basic initialization.
3168  */
3169 struct net_device *alloc_netdev(int sizeof_priv, const char *name,
3170 		void (*setup)(struct net_device *))
3171 {
3172 	void *p;
3173 	struct net_device *dev;
3174 	int alloc_size;
3175 
3176 	BUG_ON(strlen(name) >= sizeof(dev->name));
3177 
3178 	/* ensure 32-byte alignment of both the device and private area */
3179 	alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
3180 	alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
3181 
3182 	p = kzalloc(alloc_size, GFP_KERNEL);
3183 	if (!p) {
3184 		printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
3185 		return NULL;
3186 	}
3187 
3188 	dev = (struct net_device *)
3189 		(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
3190 	dev->padded = (char *)dev - (char *)p;
3191 
3192 	if (sizeof_priv)
3193 		dev->priv = netdev_priv(dev);
3194 
3195 	setup(dev);
3196 	strcpy(dev->name, name);
3197 	return dev;
3198 }
3199 EXPORT_SYMBOL(alloc_netdev);
3200 
3201 /**
3202  *	free_netdev - free network device
3203  *	@dev: device
3204  *
3205  *	This function does the last stage of destroying an allocated device
3206  * 	interface. The reference to the device object is released.
3207  *	If this is the last reference then it will be freed.
3208  */
3209 void free_netdev(struct net_device *dev)
3210 {
3211 #ifdef CONFIG_SYSFS
3212 	/*  Compatibility with error handling in drivers */
3213 	if (dev->reg_state == NETREG_UNINITIALIZED) {
3214 		kfree((char *)dev - dev->padded);
3215 		return;
3216 	}
3217 
3218 	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
3219 	dev->reg_state = NETREG_RELEASED;
3220 
3221 	/* will free via device release */
3222 	put_device(&dev->dev);
3223 #else
3224 	kfree((char *)dev - dev->padded);
3225 #endif
3226 }
3227 
3228 /* Synchronize with packet receive processing. */
3229 void synchronize_net(void)
3230 {
3231 	might_sleep();
3232 	synchronize_rcu();
3233 }
3234 
3235 /**
3236  *	unregister_netdevice - remove device from the kernel
3237  *	@dev: device
3238  *
3239  *	This function shuts down a device interface and removes it
3240  *	from the kernel tables. On success 0 is returned, on a failure
3241  *	a negative errno code is returned.
3242  *
3243  *	Callers must hold the rtnl semaphore.  You may want
3244  *	unregister_netdev() instead of this.
3245  */
3246 
3247 void unregister_netdevice(struct net_device *dev)
3248 {
3249 	struct net_device *d, **dp;
3250 
3251 	BUG_ON(dev_boot_phase);
3252 	ASSERT_RTNL();
3253 
3254 	/* Some devices call without registering for initialization unwind. */
3255 	if (dev->reg_state == NETREG_UNINITIALIZED) {
3256 		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3257 				  "was registered\n", dev->name, dev);
3258 
3259 		WARN_ON(1);
3260 		return;
3261 	}
3262 
3263 	BUG_ON(dev->reg_state != NETREG_REGISTERED);
3264 
3265 	/* If device is running, close it first. */
3266 	if (dev->flags & IFF_UP)
3267 		dev_close(dev);
3268 
3269 	/* And unlink it from device chain. */
3270 	for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) {
3271 		if (d == dev) {
3272 			write_lock_bh(&dev_base_lock);
3273 			hlist_del(&dev->name_hlist);
3274 			hlist_del(&dev->index_hlist);
3275 			if (dev_tail == &dev->next)
3276 				dev_tail = dp;
3277 			*dp = d->next;
3278 			write_unlock_bh(&dev_base_lock);
3279 			break;
3280 		}
3281 	}
3282 	BUG_ON(!d);
3283 
3284 	dev->reg_state = NETREG_UNREGISTERING;
3285 
3286 	synchronize_net();
3287 
3288 	/* Shutdown queueing discipline. */
3289 	dev_shutdown(dev);
3290 
3291 
3292 	/* Notify protocols, that we are about to destroy
3293 	   this device. They should clean all the things.
3294 	*/
3295 	raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
3296 
3297 	/*
3298 	 *	Flush the multicast chain
3299 	 */
3300 	dev_mc_discard(dev);
3301 
3302 	if (dev->uninit)
3303 		dev->uninit(dev);
3304 
3305 	/* Notifier chain MUST detach us from master device. */
3306 	BUG_TRAP(!dev->master);
3307 
3308 	/* Finish processing unregister after unlock */
3309 	net_set_todo(dev);
3310 
3311 	synchronize_net();
3312 
3313 	dev_put(dev);
3314 }
3315 
3316 /**
3317  *	unregister_netdev - remove device from the kernel
3318  *	@dev: device
3319  *
3320  *	This function shuts down a device interface and removes it
3321  *	from the kernel tables. On success 0 is returned, on a failure
3322  *	a negative errno code is returned.
3323  *
3324  *	This is just a wrapper for unregister_netdevice that takes
3325  *	the rtnl semaphore.  In general you want to use this and not
3326  *	unregister_netdevice.
3327  */
3328 void unregister_netdev(struct net_device *dev)
3329 {
3330 	rtnl_lock();
3331 	unregister_netdevice(dev);
3332 	rtnl_unlock();
3333 }
3334 
3335 EXPORT_SYMBOL(unregister_netdev);
3336 
3337 static int dev_cpu_callback(struct notifier_block *nfb,
3338 			    unsigned long action,
3339 			    void *ocpu)
3340 {
3341 	struct sk_buff **list_skb;
3342 	struct net_device **list_net;
3343 	struct sk_buff *skb;
3344 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
3345 	struct softnet_data *sd, *oldsd;
3346 
3347 	if (action != CPU_DEAD)
3348 		return NOTIFY_OK;
3349 
3350 	local_irq_disable();
3351 	cpu = smp_processor_id();
3352 	sd = &per_cpu(softnet_data, cpu);
3353 	oldsd = &per_cpu(softnet_data, oldcpu);
3354 
3355 	/* Find end of our completion_queue. */
3356 	list_skb = &sd->completion_queue;
3357 	while (*list_skb)
3358 		list_skb = &(*list_skb)->next;
3359 	/* Append completion queue from offline CPU. */
3360 	*list_skb = oldsd->completion_queue;
3361 	oldsd->completion_queue = NULL;
3362 
3363 	/* Find end of our output_queue. */
3364 	list_net = &sd->output_queue;
3365 	while (*list_net)
3366 		list_net = &(*list_net)->next_sched;
3367 	/* Append output queue from offline CPU. */
3368 	*list_net = oldsd->output_queue;
3369 	oldsd->output_queue = NULL;
3370 
3371 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
3372 	local_irq_enable();
3373 
3374 	/* Process offline CPU's input_pkt_queue */
3375 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
3376 		netif_rx(skb);
3377 
3378 	return NOTIFY_OK;
3379 }
3380 
3381 #ifdef CONFIG_NET_DMA
3382 /**
3383  * net_dma_rebalance -
3384  * This is called when the number of channels allocated to the net_dma_client
3385  * changes.  The net_dma_client tries to have one DMA channel per CPU.
3386  */
3387 static void net_dma_rebalance(void)
3388 {
3389 	unsigned int cpu, i, n;
3390 	struct dma_chan *chan;
3391 
3392 	if (net_dma_count == 0) {
3393 		for_each_online_cpu(cpu)
3394 			rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
3395 		return;
3396 	}
3397 
3398 	i = 0;
3399 	cpu = first_cpu(cpu_online_map);
3400 
3401 	rcu_read_lock();
3402 	list_for_each_entry(chan, &net_dma_client->channels, client_node) {
3403 		n = ((num_online_cpus() / net_dma_count)
3404 		   + (i < (num_online_cpus() % net_dma_count) ? 1 : 0));
3405 
3406 		while(n) {
3407 			per_cpu(softnet_data, cpu).net_dma = chan;
3408 			cpu = next_cpu(cpu, cpu_online_map);
3409 			n--;
3410 		}
3411 		i++;
3412 	}
3413 	rcu_read_unlock();
3414 }
3415 
3416 /**
3417  * netdev_dma_event - event callback for the net_dma_client
3418  * @client: should always be net_dma_client
3419  * @chan: DMA channel for the event
3420  * @event: event type
3421  */
3422 static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
3423 	enum dma_event event)
3424 {
3425 	spin_lock(&net_dma_event_lock);
3426 	switch (event) {
3427 	case DMA_RESOURCE_ADDED:
3428 		net_dma_count++;
3429 		net_dma_rebalance();
3430 		break;
3431 	case DMA_RESOURCE_REMOVED:
3432 		net_dma_count--;
3433 		net_dma_rebalance();
3434 		break;
3435 	default:
3436 		break;
3437 	}
3438 	spin_unlock(&net_dma_event_lock);
3439 }
3440 
3441 /**
3442  * netdev_dma_regiser - register the networking subsystem as a DMA client
3443  */
3444 static int __init netdev_dma_register(void)
3445 {
3446 	spin_lock_init(&net_dma_event_lock);
3447 	net_dma_client = dma_async_client_register(netdev_dma_event);
3448 	if (net_dma_client == NULL)
3449 		return -ENOMEM;
3450 
3451 	dma_async_client_chan_request(net_dma_client, num_online_cpus());
3452 	return 0;
3453 }
3454 
3455 #else
3456 static int __init netdev_dma_register(void) { return -ENODEV; }
3457 #endif /* CONFIG_NET_DMA */
3458 
3459 /*
3460  *	Initialize the DEV module. At boot time this walks the device list and
3461  *	unhooks any devices that fail to initialise (normally hardware not
3462  *	present) and leaves us with a valid list of present and active devices.
3463  *
3464  */
3465 
3466 /*
3467  *       This is called single threaded during boot, so no need
3468  *       to take the rtnl semaphore.
3469  */
3470 static int __init net_dev_init(void)
3471 {
3472 	int i, rc = -ENOMEM;
3473 
3474 	BUG_ON(!dev_boot_phase);
3475 
3476 	if (dev_proc_init())
3477 		goto out;
3478 
3479 	if (netdev_sysfs_init())
3480 		goto out;
3481 
3482 	INIT_LIST_HEAD(&ptype_all);
3483 	for (i = 0; i < 16; i++)
3484 		INIT_LIST_HEAD(&ptype_base[i]);
3485 
3486 	for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
3487 		INIT_HLIST_HEAD(&dev_name_head[i]);
3488 
3489 	for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
3490 		INIT_HLIST_HEAD(&dev_index_head[i]);
3491 
3492 	/*
3493 	 *	Initialise the packet receive queues.
3494 	 */
3495 
3496 	for_each_possible_cpu(i) {
3497 		struct softnet_data *queue;
3498 
3499 		queue = &per_cpu(softnet_data, i);
3500 		skb_queue_head_init(&queue->input_pkt_queue);
3501 		queue->completion_queue = NULL;
3502 		INIT_LIST_HEAD(&queue->poll_list);
3503 		set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
3504 		queue->backlog_dev.weight = weight_p;
3505 		queue->backlog_dev.poll = process_backlog;
3506 		atomic_set(&queue->backlog_dev.refcnt, 1);
3507 	}
3508 
3509 	netdev_dma_register();
3510 
3511 	dev_boot_phase = 0;
3512 
3513 	open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
3514 	open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
3515 
3516 	hotcpu_notifier(dev_cpu_callback, 0);
3517 	dst_init();
3518 	dev_mcast_init();
3519 	rc = 0;
3520 out:
3521 	return rc;
3522 }
3523 
3524 subsys_initcall(net_dev_init);
3525 
3526 EXPORT_SYMBOL(__dev_get_by_index);
3527 EXPORT_SYMBOL(__dev_get_by_name);
3528 EXPORT_SYMBOL(__dev_remove_pack);
3529 EXPORT_SYMBOL(dev_valid_name);
3530 EXPORT_SYMBOL(dev_add_pack);
3531 EXPORT_SYMBOL(dev_alloc_name);
3532 EXPORT_SYMBOL(dev_close);
3533 EXPORT_SYMBOL(dev_get_by_flags);
3534 EXPORT_SYMBOL(dev_get_by_index);
3535 EXPORT_SYMBOL(dev_get_by_name);
3536 EXPORT_SYMBOL(dev_open);
3537 EXPORT_SYMBOL(dev_queue_xmit);
3538 EXPORT_SYMBOL(dev_remove_pack);
3539 EXPORT_SYMBOL(dev_set_allmulti);
3540 EXPORT_SYMBOL(dev_set_promiscuity);
3541 EXPORT_SYMBOL(dev_change_flags);
3542 EXPORT_SYMBOL(dev_set_mtu);
3543 EXPORT_SYMBOL(dev_set_mac_address);
3544 EXPORT_SYMBOL(free_netdev);
3545 EXPORT_SYMBOL(netdev_boot_setup_check);
3546 EXPORT_SYMBOL(netdev_set_master);
3547 EXPORT_SYMBOL(netdev_state_change);
3548 EXPORT_SYMBOL(netif_receive_skb);
3549 EXPORT_SYMBOL(netif_rx);
3550 EXPORT_SYMBOL(register_gifconf);
3551 EXPORT_SYMBOL(register_netdevice);
3552 EXPORT_SYMBOL(register_netdevice_notifier);
3553 EXPORT_SYMBOL(skb_checksum_help);
3554 EXPORT_SYMBOL(synchronize_net);
3555 EXPORT_SYMBOL(unregister_netdevice);
3556 EXPORT_SYMBOL(unregister_netdevice_notifier);
3557 EXPORT_SYMBOL(net_enable_timestamp);
3558 EXPORT_SYMBOL(net_disable_timestamp);
3559 EXPORT_SYMBOL(dev_get_flags);
3560 
3561 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
3562 EXPORT_SYMBOL(br_handle_frame_hook);
3563 EXPORT_SYMBOL(br_fdb_get_hook);
3564 EXPORT_SYMBOL(br_fdb_put_hook);
3565 #endif
3566 
3567 #ifdef CONFIG_KMOD
3568 EXPORT_SYMBOL(dev_load);
3569 #endif
3570 
3571 EXPORT_PER_CPU_SYMBOL(softnet_data);
3572