xref: /linux/net/core/dev.c (revision f8343685643f2901fe11aa9d0358cafbeaf7b4c3)
1 /*
2  * 	NET3	Protocol independent device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the non IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Florian la Roche <rzsfl@rz.uni-sb.de>
16  *		Alan Cox <gw4pts@gw4pts.ampr.org>
17  *		David Hinds <dahinds@users.sourceforge.net>
18  *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19  *		Adam Sulmicki <adam@cfar.umd.edu>
20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
21  *
22  *	Changes:
23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
24  *              			to 2 if register_netdev gets called
25  *              			before net_dev_init & also removed a
26  *              			few lines of code in the process.
27  *		Alan Cox	:	device private ioctl copies fields back.
28  *		Alan Cox	:	Transmit queue code does relevant
29  *					stunts to keep the queue safe.
30  *		Alan Cox	:	Fixed double lock.
31  *		Alan Cox	:	Fixed promisc NULL pointer trap
32  *		????????	:	Support the full private ioctl range
33  *		Alan Cox	:	Moved ioctl permission check into
34  *					drivers
35  *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
36  *		Alan Cox	:	100 backlog just doesn't cut it when
37  *					you start doing multicast video 8)
38  *		Alan Cox	:	Rewrote net_bh and list manager.
39  *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
40  *		Alan Cox	:	Took out transmit every packet pass
41  *					Saved a few bytes in the ioctl handler
42  *		Alan Cox	:	Network driver sets packet type before
43  *					calling netif_rx. Saves a function
44  *					call a packet.
45  *		Alan Cox	:	Hashed net_bh()
46  *		Richard Kooijman:	Timestamp fixes.
47  *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
48  *		Alan Cox	:	Device lock protection.
49  *		Alan Cox	: 	Fixed nasty side effect of device close
50  *					changes.
51  *		Rudi Cilibrasi	:	Pass the right thing to
52  *					set_mac_address()
53  *		Dave Miller	:	32bit quantity for the device lock to
54  *					make it work out on a Sparc.
55  *		Bjorn Ekwall	:	Added KERNELD hack.
56  *		Alan Cox	:	Cleaned up the backlog initialise.
57  *		Craig Metz	:	SIOCGIFCONF fix if space for under
58  *					1 device.
59  *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
60  *					is no device open function.
61  *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
62  *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
63  *		Cyrus Durgin	:	Cleaned for KMOD
64  *		Adam Sulmicki   :	Bug Fix : Network Device Unload
65  *					A network device unload needs to purge
66  *					the backlog queue.
67  *	Paul Rusty Russell	:	SIOCSIFNAME
68  *              Pekka Riikonen  :	Netdev boot-time settings code
69  *              Andrew Morton   :       Make unregister_netdevice wait
70  *              			indefinitely on dev->refcnt
71  * 		J Hadi Salim	:	- Backlog queue sampling
72  *				        - netif_rx() feedback
73  */
74 
75 #include <asm/uaccess.h>
76 #include <asm/system.h>
77 #include <linux/bitops.h>
78 #include <linux/capability.h>
79 #include <linux/cpu.h>
80 #include <linux/types.h>
81 #include <linux/kernel.h>
82 #include <linux/sched.h>
83 #include <linux/mutex.h>
84 #include <linux/string.h>
85 #include <linux/mm.h>
86 #include <linux/socket.h>
87 #include <linux/sockios.h>
88 #include <linux/errno.h>
89 #include <linux/interrupt.h>
90 #include <linux/if_ether.h>
91 #include <linux/netdevice.h>
92 #include <linux/etherdevice.h>
93 #include <linux/notifier.h>
94 #include <linux/skbuff.h>
95 #include <net/sock.h>
96 #include <linux/rtnetlink.h>
97 #include <linux/proc_fs.h>
98 #include <linux/seq_file.h>
99 #include <linux/stat.h>
100 #include <linux/if_bridge.h>
101 #include <net/dst.h>
102 #include <net/pkt_sched.h>
103 #include <net/checksum.h>
104 #include <linux/highmem.h>
105 #include <linux/init.h>
106 #include <linux/kmod.h>
107 #include <linux/module.h>
108 #include <linux/kallsyms.h>
109 #include <linux/netpoll.h>
110 #include <linux/rcupdate.h>
111 #include <linux/delay.h>
112 #include <net/wext.h>
113 #include <net/iw_handler.h>
114 #include <asm/current.h>
115 #include <linux/audit.h>
116 #include <linux/dmaengine.h>
117 #include <linux/err.h>
118 #include <linux/ctype.h>
119 #include <linux/if_arp.h>
120 
121 /*
122  *	The list of packet types we will receive (as opposed to discard)
123  *	and the routines to invoke.
124  *
125  *	Why 16. Because with 16 the only overlap we get on a hash of the
126  *	low nibble of the protocol value is RARP/SNAP/X.25.
127  *
128  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
129  *             sure which should go first, but I bet it won't make much
130  *             difference if we are running VLANs.  The good news is that
131  *             this protocol won't be in the list unless compiled in, so
132  *             the average user (w/out VLANs) will not be adversely affected.
133  *             --BLG
134  *
135  *		0800	IP
136  *		8100    802.1Q VLAN
137  *		0001	802.3
138  *		0002	AX.25
139  *		0004	802.2
140  *		8035	RARP
141  *		0005	SNAP
142  *		0805	X.25
143  *		0806	ARP
144  *		8137	IPX
145  *		0009	Localtalk
146  *		86DD	IPv6
147  */
148 
149 static DEFINE_SPINLOCK(ptype_lock);
150 static struct list_head ptype_base[16] __read_mostly;	/* 16 way hashed list */
151 static struct list_head ptype_all __read_mostly;	/* Taps */
152 
153 #ifdef CONFIG_NET_DMA
154 static struct dma_client *net_dma_client;
155 static unsigned int net_dma_count;
156 static spinlock_t net_dma_event_lock;
157 #endif
158 
159 /*
160  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
161  * semaphore.
162  *
163  * Pure readers hold dev_base_lock for reading.
164  *
165  * Writers must hold the rtnl semaphore while they loop through the
166  * dev_base_head list, and hold dev_base_lock for writing when they do the
167  * actual updates.  This allows pure readers to access the list even
168  * while a writer is preparing to update it.
169  *
170  * To put it another way, dev_base_lock is held for writing only to
171  * protect against pure readers; the rtnl semaphore provides the
172  * protection against other writers.
173  *
174  * See, for example usages, register_netdevice() and
175  * unregister_netdevice(), which must be called with the rtnl
176  * semaphore held.
177  */
178 LIST_HEAD(dev_base_head);
179 DEFINE_RWLOCK(dev_base_lock);
180 
181 EXPORT_SYMBOL(dev_base_head);
182 EXPORT_SYMBOL(dev_base_lock);
183 
184 #define NETDEV_HASHBITS	8
185 static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
186 static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
187 
188 static inline struct hlist_head *dev_name_hash(const char *name)
189 {
190 	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
191 	return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
192 }
193 
194 static inline struct hlist_head *dev_index_hash(int ifindex)
195 {
196 	return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
197 }
198 
199 /*
200  *	Our notifier list
201  */
202 
203 static RAW_NOTIFIER_HEAD(netdev_chain);
204 
205 /*
206  *	Device drivers call our routines to queue packets here. We empty the
207  *	queue in the local softnet handler.
208  */
209 DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL };
210 
211 #ifdef CONFIG_SYSFS
212 extern int netdev_sysfs_init(void);
213 extern int netdev_register_sysfs(struct net_device *);
214 extern void netdev_unregister_sysfs(struct net_device *);
215 #else
216 #define netdev_sysfs_init()	 	(0)
217 #define netdev_register_sysfs(dev)	(0)
218 #define	netdev_unregister_sysfs(dev)	do { } while(0)
219 #endif
220 
221 #ifdef CONFIG_DEBUG_LOCK_ALLOC
222 /*
223  * register_netdevice() inits dev->_xmit_lock and sets lockdep class
224  * according to dev->type
225  */
226 static const unsigned short netdev_lock_type[] =
227 	{ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
228 	 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
229 	 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
230 	 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
231 	 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
232 	 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
233 	 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
234 	 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
235 	 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
236 	 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
237 	 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
238 	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
239 	 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
240 	 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID,
241 	 ARPHRD_NONE};
242 
243 static const char *netdev_lock_name[] =
244 	{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
245 	 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
246 	 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
247 	 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
248 	 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
249 	 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
250 	 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
251 	 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
252 	 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
253 	 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
254 	 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
255 	 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
256 	 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
257 	 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID",
258 	 "_xmit_NONE"};
259 
260 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
261 
262 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
263 {
264 	int i;
265 
266 	for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
267 		if (netdev_lock_type[i] == dev_type)
268 			return i;
269 	/* the last key is used by default */
270 	return ARRAY_SIZE(netdev_lock_type) - 1;
271 }
272 
273 static inline void netdev_set_lockdep_class(spinlock_t *lock,
274 					    unsigned short dev_type)
275 {
276 	int i;
277 
278 	i = netdev_lock_pos(dev_type);
279 	lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
280 				   netdev_lock_name[i]);
281 }
282 #else
283 static inline void netdev_set_lockdep_class(spinlock_t *lock,
284 					    unsigned short dev_type)
285 {
286 }
287 #endif
288 
289 /*******************************************************************************
290 
291 		Protocol management and registration routines
292 
293 *******************************************************************************/
294 
295 /*
296  *	Add a protocol ID to the list. Now that the input handler is
297  *	smarter we can dispense with all the messy stuff that used to be
298  *	here.
299  *
300  *	BEWARE!!! Protocol handlers, mangling input packets,
301  *	MUST BE last in hash buckets and checking protocol handlers
302  *	MUST start from promiscuous ptype_all chain in net_bh.
303  *	It is true now, do not change it.
304  *	Explanation follows: if protocol handler, mangling packet, will
305  *	be the first on list, it is not able to sense, that packet
306  *	is cloned and should be copied-on-write, so that it will
307  *	change it and subsequent readers will get broken packet.
308  *							--ANK (980803)
309  */
310 
311 /**
312  *	dev_add_pack - add packet handler
313  *	@pt: packet type declaration
314  *
315  *	Add a protocol handler to the networking stack. The passed &packet_type
316  *	is linked into kernel lists and may not be freed until it has been
317  *	removed from the kernel lists.
318  *
319  *	This call does not sleep therefore it can not
320  *	guarantee all CPU's that are in middle of receiving packets
321  *	will see the new packet type (until the next received packet).
322  */
323 
324 void dev_add_pack(struct packet_type *pt)
325 {
326 	int hash;
327 
328 	spin_lock_bh(&ptype_lock);
329 	if (pt->type == htons(ETH_P_ALL))
330 		list_add_rcu(&pt->list, &ptype_all);
331 	else {
332 		hash = ntohs(pt->type) & 15;
333 		list_add_rcu(&pt->list, &ptype_base[hash]);
334 	}
335 	spin_unlock_bh(&ptype_lock);
336 }
337 
338 /**
339  *	__dev_remove_pack	 - remove packet handler
340  *	@pt: packet type declaration
341  *
342  *	Remove a protocol handler that was previously added to the kernel
343  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
344  *	from the kernel lists and can be freed or reused once this function
345  *	returns.
346  *
347  *      The packet type might still be in use by receivers
348  *	and must not be freed until after all the CPU's have gone
349  *	through a quiescent state.
350  */
351 void __dev_remove_pack(struct packet_type *pt)
352 {
353 	struct list_head *head;
354 	struct packet_type *pt1;
355 
356 	spin_lock_bh(&ptype_lock);
357 
358 	if (pt->type == htons(ETH_P_ALL))
359 		head = &ptype_all;
360 	else
361 		head = &ptype_base[ntohs(pt->type) & 15];
362 
363 	list_for_each_entry(pt1, head, list) {
364 		if (pt == pt1) {
365 			list_del_rcu(&pt->list);
366 			goto out;
367 		}
368 	}
369 
370 	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
371 out:
372 	spin_unlock_bh(&ptype_lock);
373 }
374 /**
375  *	dev_remove_pack	 - remove packet handler
376  *	@pt: packet type declaration
377  *
378  *	Remove a protocol handler that was previously added to the kernel
379  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
380  *	from the kernel lists and can be freed or reused once this function
381  *	returns.
382  *
383  *	This call sleeps to guarantee that no CPU is looking at the packet
384  *	type after return.
385  */
386 void dev_remove_pack(struct packet_type *pt)
387 {
388 	__dev_remove_pack(pt);
389 
390 	synchronize_net();
391 }
392 
393 /******************************************************************************
394 
395 		      Device Boot-time Settings Routines
396 
397 *******************************************************************************/
398 
399 /* Boot time configuration table */
400 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
401 
402 /**
403  *	netdev_boot_setup_add	- add new setup entry
404  *	@name: name of the device
405  *	@map: configured settings for the device
406  *
407  *	Adds new setup entry to the dev_boot_setup list.  The function
408  *	returns 0 on error and 1 on success.  This is a generic routine to
409  *	all netdevices.
410  */
411 static int netdev_boot_setup_add(char *name, struct ifmap *map)
412 {
413 	struct netdev_boot_setup *s;
414 	int i;
415 
416 	s = dev_boot_setup;
417 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
418 		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
419 			memset(s[i].name, 0, sizeof(s[i].name));
420 			strcpy(s[i].name, name);
421 			memcpy(&s[i].map, map, sizeof(s[i].map));
422 			break;
423 		}
424 	}
425 
426 	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
427 }
428 
429 /**
430  *	netdev_boot_setup_check	- check boot time settings
431  *	@dev: the netdevice
432  *
433  * 	Check boot time settings for the device.
434  *	The found settings are set for the device to be used
435  *	later in the device probing.
436  *	Returns 0 if no settings found, 1 if they are.
437  */
438 int netdev_boot_setup_check(struct net_device *dev)
439 {
440 	struct netdev_boot_setup *s = dev_boot_setup;
441 	int i;
442 
443 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
444 		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
445 		    !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
446 			dev->irq 	= s[i].map.irq;
447 			dev->base_addr 	= s[i].map.base_addr;
448 			dev->mem_start 	= s[i].map.mem_start;
449 			dev->mem_end 	= s[i].map.mem_end;
450 			return 1;
451 		}
452 	}
453 	return 0;
454 }
455 
456 
457 /**
458  *	netdev_boot_base	- get address from boot time settings
459  *	@prefix: prefix for network device
460  *	@unit: id for network device
461  *
462  * 	Check boot time settings for the base address of device.
463  *	The found settings are set for the device to be used
464  *	later in the device probing.
465  *	Returns 0 if no settings found.
466  */
467 unsigned long netdev_boot_base(const char *prefix, int unit)
468 {
469 	const struct netdev_boot_setup *s = dev_boot_setup;
470 	char name[IFNAMSIZ];
471 	int i;
472 
473 	sprintf(name, "%s%d", prefix, unit);
474 
475 	/*
476 	 * If device already registered then return base of 1
477 	 * to indicate not to probe for this interface
478 	 */
479 	if (__dev_get_by_name(name))
480 		return 1;
481 
482 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
483 		if (!strcmp(name, s[i].name))
484 			return s[i].map.base_addr;
485 	return 0;
486 }
487 
488 /*
489  * Saves at boot time configured settings for any netdevice.
490  */
491 int __init netdev_boot_setup(char *str)
492 {
493 	int ints[5];
494 	struct ifmap map;
495 
496 	str = get_options(str, ARRAY_SIZE(ints), ints);
497 	if (!str || !*str)
498 		return 0;
499 
500 	/* Save settings */
501 	memset(&map, 0, sizeof(map));
502 	if (ints[0] > 0)
503 		map.irq = ints[1];
504 	if (ints[0] > 1)
505 		map.base_addr = ints[2];
506 	if (ints[0] > 2)
507 		map.mem_start = ints[3];
508 	if (ints[0] > 3)
509 		map.mem_end = ints[4];
510 
511 	/* Add new entry to the list */
512 	return netdev_boot_setup_add(str, &map);
513 }
514 
515 __setup("netdev=", netdev_boot_setup);
516 
517 /*******************************************************************************
518 
519 			    Device Interface Subroutines
520 
521 *******************************************************************************/
522 
523 /**
524  *	__dev_get_by_name	- find a device by its name
525  *	@name: name to find
526  *
527  *	Find an interface by name. Must be called under RTNL semaphore
528  *	or @dev_base_lock. If the name is found a pointer to the device
529  *	is returned. If the name is not found then %NULL is returned. The
530  *	reference counters are not incremented so the caller must be
531  *	careful with locks.
532  */
533 
534 struct net_device *__dev_get_by_name(const char *name)
535 {
536 	struct hlist_node *p;
537 
538 	hlist_for_each(p, dev_name_hash(name)) {
539 		struct net_device *dev
540 			= hlist_entry(p, struct net_device, name_hlist);
541 		if (!strncmp(dev->name, name, IFNAMSIZ))
542 			return dev;
543 	}
544 	return NULL;
545 }
546 
547 /**
548  *	dev_get_by_name		- find a device by its name
549  *	@name: name to find
550  *
551  *	Find an interface by name. This can be called from any
552  *	context and does its own locking. The returned handle has
553  *	the usage count incremented and the caller must use dev_put() to
554  *	release it when it is no longer needed. %NULL is returned if no
555  *	matching device is found.
556  */
557 
558 struct net_device *dev_get_by_name(const char *name)
559 {
560 	struct net_device *dev;
561 
562 	read_lock(&dev_base_lock);
563 	dev = __dev_get_by_name(name);
564 	if (dev)
565 		dev_hold(dev);
566 	read_unlock(&dev_base_lock);
567 	return dev;
568 }
569 
570 /**
571  *	__dev_get_by_index - find a device by its ifindex
572  *	@ifindex: index of device
573  *
574  *	Search for an interface by index. Returns %NULL if the device
575  *	is not found or a pointer to the device. The device has not
576  *	had its reference counter increased so the caller must be careful
577  *	about locking. The caller must hold either the RTNL semaphore
578  *	or @dev_base_lock.
579  */
580 
581 struct net_device *__dev_get_by_index(int ifindex)
582 {
583 	struct hlist_node *p;
584 
585 	hlist_for_each(p, dev_index_hash(ifindex)) {
586 		struct net_device *dev
587 			= hlist_entry(p, struct net_device, index_hlist);
588 		if (dev->ifindex == ifindex)
589 			return dev;
590 	}
591 	return NULL;
592 }
593 
594 
595 /**
596  *	dev_get_by_index - find a device by its ifindex
597  *	@ifindex: index of device
598  *
599  *	Search for an interface by index. Returns NULL if the device
600  *	is not found or a pointer to the device. The device returned has
601  *	had a reference added and the pointer is safe until the user calls
602  *	dev_put to indicate they have finished with it.
603  */
604 
605 struct net_device *dev_get_by_index(int ifindex)
606 {
607 	struct net_device *dev;
608 
609 	read_lock(&dev_base_lock);
610 	dev = __dev_get_by_index(ifindex);
611 	if (dev)
612 		dev_hold(dev);
613 	read_unlock(&dev_base_lock);
614 	return dev;
615 }
616 
617 /**
618  *	dev_getbyhwaddr - find a device by its hardware address
619  *	@type: media type of device
620  *	@ha: hardware address
621  *
622  *	Search for an interface by MAC address. Returns NULL if the device
623  *	is not found or a pointer to the device. The caller must hold the
624  *	rtnl semaphore. The returned device has not had its ref count increased
625  *	and the caller must therefore be careful about locking
626  *
627  *	BUGS:
628  *	If the API was consistent this would be __dev_get_by_hwaddr
629  */
630 
631 struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
632 {
633 	struct net_device *dev;
634 
635 	ASSERT_RTNL();
636 
637 	for_each_netdev(dev)
638 		if (dev->type == type &&
639 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
640 			return dev;
641 
642 	return NULL;
643 }
644 
645 EXPORT_SYMBOL(dev_getbyhwaddr);
646 
647 struct net_device *__dev_getfirstbyhwtype(unsigned short type)
648 {
649 	struct net_device *dev;
650 
651 	ASSERT_RTNL();
652 	for_each_netdev(dev)
653 		if (dev->type == type)
654 			return dev;
655 
656 	return NULL;
657 }
658 
659 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
660 
661 struct net_device *dev_getfirstbyhwtype(unsigned short type)
662 {
663 	struct net_device *dev;
664 
665 	rtnl_lock();
666 	dev = __dev_getfirstbyhwtype(type);
667 	if (dev)
668 		dev_hold(dev);
669 	rtnl_unlock();
670 	return dev;
671 }
672 
673 EXPORT_SYMBOL(dev_getfirstbyhwtype);
674 
675 /**
676  *	dev_get_by_flags - find any device with given flags
677  *	@if_flags: IFF_* values
678  *	@mask: bitmask of bits in if_flags to check
679  *
680  *	Search for any interface with the given flags. Returns NULL if a device
681  *	is not found or a pointer to the device. The device returned has
682  *	had a reference added and the pointer is safe until the user calls
683  *	dev_put to indicate they have finished with it.
684  */
685 
686 struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
687 {
688 	struct net_device *dev, *ret;
689 
690 	ret = NULL;
691 	read_lock(&dev_base_lock);
692 	for_each_netdev(dev) {
693 		if (((dev->flags ^ if_flags) & mask) == 0) {
694 			dev_hold(dev);
695 			ret = dev;
696 			break;
697 		}
698 	}
699 	read_unlock(&dev_base_lock);
700 	return ret;
701 }
702 
703 /**
704  *	dev_valid_name - check if name is okay for network device
705  *	@name: name string
706  *
707  *	Network device names need to be valid file names to
708  *	to allow sysfs to work.  We also disallow any kind of
709  *	whitespace.
710  */
711 int dev_valid_name(const char *name)
712 {
713 	if (*name == '\0')
714 		return 0;
715 	if (strlen(name) >= IFNAMSIZ)
716 		return 0;
717 	if (!strcmp(name, ".") || !strcmp(name, ".."))
718 		return 0;
719 
720 	while (*name) {
721 		if (*name == '/' || isspace(*name))
722 			return 0;
723 		name++;
724 	}
725 	return 1;
726 }
727 
728 /**
729  *	dev_alloc_name - allocate a name for a device
730  *	@dev: device
731  *	@name: name format string
732  *
733  *	Passed a format string - eg "lt%d" it will try and find a suitable
734  *	id. It scans list of devices to build up a free map, then chooses
735  *	the first empty slot. The caller must hold the dev_base or rtnl lock
736  *	while allocating the name and adding the device in order to avoid
737  *	duplicates.
738  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
739  *	Returns the number of the unit assigned or a negative errno code.
740  */
741 
742 int dev_alloc_name(struct net_device *dev, const char *name)
743 {
744 	int i = 0;
745 	char buf[IFNAMSIZ];
746 	const char *p;
747 	const int max_netdevices = 8*PAGE_SIZE;
748 	long *inuse;
749 	struct net_device *d;
750 
751 	p = strnchr(name, IFNAMSIZ-1, '%');
752 	if (p) {
753 		/*
754 		 * Verify the string as this thing may have come from
755 		 * the user.  There must be either one "%d" and no other "%"
756 		 * characters.
757 		 */
758 		if (p[1] != 'd' || strchr(p + 2, '%'))
759 			return -EINVAL;
760 
761 		/* Use one page as a bit array of possible slots */
762 		inuse = (long *) get_zeroed_page(GFP_ATOMIC);
763 		if (!inuse)
764 			return -ENOMEM;
765 
766 		for_each_netdev(d) {
767 			if (!sscanf(d->name, name, &i))
768 				continue;
769 			if (i < 0 || i >= max_netdevices)
770 				continue;
771 
772 			/*  avoid cases where sscanf is not exact inverse of printf */
773 			snprintf(buf, sizeof(buf), name, i);
774 			if (!strncmp(buf, d->name, IFNAMSIZ))
775 				set_bit(i, inuse);
776 		}
777 
778 		i = find_first_zero_bit(inuse, max_netdevices);
779 		free_page((unsigned long) inuse);
780 	}
781 
782 	snprintf(buf, sizeof(buf), name, i);
783 	if (!__dev_get_by_name(buf)) {
784 		strlcpy(dev->name, buf, IFNAMSIZ);
785 		return i;
786 	}
787 
788 	/* It is possible to run out of possible slots
789 	 * when the name is long and there isn't enough space left
790 	 * for the digits, or if all bits are used.
791 	 */
792 	return -ENFILE;
793 }
794 
795 
796 /**
797  *	dev_change_name - change name of a device
798  *	@dev: device
799  *	@newname: name (or format string) must be at least IFNAMSIZ
800  *
801  *	Change name of a device, can pass format strings "eth%d".
802  *	for wildcarding.
803  */
804 int dev_change_name(struct net_device *dev, char *newname)
805 {
806 	int err = 0;
807 
808 	ASSERT_RTNL();
809 
810 	if (dev->flags & IFF_UP)
811 		return -EBUSY;
812 
813 	if (!dev_valid_name(newname))
814 		return -EINVAL;
815 
816 	if (strchr(newname, '%')) {
817 		err = dev_alloc_name(dev, newname);
818 		if (err < 0)
819 			return err;
820 		strcpy(newname, dev->name);
821 	}
822 	else if (__dev_get_by_name(newname))
823 		return -EEXIST;
824 	else
825 		strlcpy(dev->name, newname, IFNAMSIZ);
826 
827 	device_rename(&dev->dev, dev->name);
828 	hlist_del(&dev->name_hlist);
829 	hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
830 	raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
831 
832 	return err;
833 }
834 
835 /**
836  *	netdev_features_change - device changes features
837  *	@dev: device to cause notification
838  *
839  *	Called to indicate a device has changed features.
840  */
841 void netdev_features_change(struct net_device *dev)
842 {
843 	raw_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
844 }
845 EXPORT_SYMBOL(netdev_features_change);
846 
847 /**
848  *	netdev_state_change - device changes state
849  *	@dev: device to cause notification
850  *
851  *	Called to indicate a device has changed state. This function calls
852  *	the notifier chains for netdev_chain and sends a NEWLINK message
853  *	to the routing socket.
854  */
855 void netdev_state_change(struct net_device *dev)
856 {
857 	if (dev->flags & IFF_UP) {
858 		raw_notifier_call_chain(&netdev_chain,
859 				NETDEV_CHANGE, dev);
860 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
861 	}
862 }
863 
864 /**
865  *	dev_load 	- load a network module
866  *	@name: name of interface
867  *
868  *	If a network interface is not present and the process has suitable
869  *	privileges this function loads the module. If module loading is not
870  *	available in this kernel then it becomes a nop.
871  */
872 
873 void dev_load(const char *name)
874 {
875 	struct net_device *dev;
876 
877 	read_lock(&dev_base_lock);
878 	dev = __dev_get_by_name(name);
879 	read_unlock(&dev_base_lock);
880 
881 	if (!dev && capable(CAP_SYS_MODULE))
882 		request_module("%s", name);
883 }
884 
885 static int default_rebuild_header(struct sk_buff *skb)
886 {
887 	printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
888 	       skb->dev ? skb->dev->name : "NULL!!!");
889 	kfree_skb(skb);
890 	return 1;
891 }
892 
893 /**
894  *	dev_open	- prepare an interface for use.
895  *	@dev:	device to open
896  *
897  *	Takes a device from down to up state. The device's private open
898  *	function is invoked and then the multicast lists are loaded. Finally
899  *	the device is moved into the up state and a %NETDEV_UP message is
900  *	sent to the netdev notifier chain.
901  *
902  *	Calling this function on an active interface is a nop. On a failure
903  *	a negative errno code is returned.
904  */
905 int dev_open(struct net_device *dev)
906 {
907 	int ret = 0;
908 
909 	/*
910 	 *	Is it already up?
911 	 */
912 
913 	if (dev->flags & IFF_UP)
914 		return 0;
915 
916 	/*
917 	 *	Is it even present?
918 	 */
919 	if (!netif_device_present(dev))
920 		return -ENODEV;
921 
922 	/*
923 	 *	Call device private open method
924 	 */
925 	set_bit(__LINK_STATE_START, &dev->state);
926 	if (dev->open) {
927 		ret = dev->open(dev);
928 		if (ret)
929 			clear_bit(__LINK_STATE_START, &dev->state);
930 	}
931 
932 	/*
933 	 *	If it went open OK then:
934 	 */
935 
936 	if (!ret) {
937 		/*
938 		 *	Set the flags.
939 		 */
940 		dev->flags |= IFF_UP;
941 
942 		/*
943 		 *	Initialize multicasting status
944 		 */
945 		dev_mc_upload(dev);
946 
947 		/*
948 		 *	Wakeup transmit queue engine
949 		 */
950 		dev_activate(dev);
951 
952 		/*
953 		 *	... and announce new interface.
954 		 */
955 		raw_notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
956 	}
957 	return ret;
958 }
959 
960 /**
961  *	dev_close - shutdown an interface.
962  *	@dev: device to shutdown
963  *
964  *	This function moves an active device into down state. A
965  *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
966  *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
967  *	chain.
968  */
969 int dev_close(struct net_device *dev)
970 {
971 	if (!(dev->flags & IFF_UP))
972 		return 0;
973 
974 	/*
975 	 *	Tell people we are going down, so that they can
976 	 *	prepare to death, when device is still operating.
977 	 */
978 	raw_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
979 
980 	dev_deactivate(dev);
981 
982 	clear_bit(__LINK_STATE_START, &dev->state);
983 
984 	/* Synchronize to scheduled poll. We cannot touch poll list,
985 	 * it can be even on different cpu. So just clear netif_running(),
986 	 * and wait when poll really will happen. Actually, the best place
987 	 * for this is inside dev->stop() after device stopped its irq
988 	 * engine, but this requires more changes in devices. */
989 
990 	smp_mb__after_clear_bit(); /* Commit netif_running(). */
991 	while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
992 		/* No hurry. */
993 		msleep(1);
994 	}
995 
996 	/*
997 	 *	Call the device specific close. This cannot fail.
998 	 *	Only if device is UP
999 	 *
1000 	 *	We allow it to be called even after a DETACH hot-plug
1001 	 *	event.
1002 	 */
1003 	if (dev->stop)
1004 		dev->stop(dev);
1005 
1006 	/*
1007 	 *	Device is now down.
1008 	 */
1009 
1010 	dev->flags &= ~IFF_UP;
1011 
1012 	/*
1013 	 * Tell people we are down
1014 	 */
1015 	raw_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
1016 
1017 	return 0;
1018 }
1019 
1020 
1021 /*
1022  *	Device change register/unregister. These are not inline or static
1023  *	as we export them to the world.
1024  */
1025 
1026 /**
1027  *	register_netdevice_notifier - register a network notifier block
1028  *	@nb: notifier
1029  *
1030  *	Register a notifier to be called when network device events occur.
1031  *	The notifier passed is linked into the kernel structures and must
1032  *	not be reused until it has been unregistered. A negative errno code
1033  *	is returned on a failure.
1034  *
1035  * 	When registered all registration and up events are replayed
1036  *	to the new notifier to allow device to have a race free
1037  *	view of the network device list.
1038  */
1039 
1040 int register_netdevice_notifier(struct notifier_block *nb)
1041 {
1042 	struct net_device *dev;
1043 	int err;
1044 
1045 	rtnl_lock();
1046 	err = raw_notifier_chain_register(&netdev_chain, nb);
1047 	if (!err) {
1048 		for_each_netdev(dev) {
1049 			nb->notifier_call(nb, NETDEV_REGISTER, dev);
1050 
1051 			if (dev->flags & IFF_UP)
1052 				nb->notifier_call(nb, NETDEV_UP, dev);
1053 		}
1054 	}
1055 	rtnl_unlock();
1056 	return err;
1057 }
1058 
1059 /**
1060  *	unregister_netdevice_notifier - unregister a network notifier block
1061  *	@nb: notifier
1062  *
1063  *	Unregister a notifier previously registered by
1064  *	register_netdevice_notifier(). The notifier is unlinked into the
1065  *	kernel structures and may then be reused. A negative errno code
1066  *	is returned on a failure.
1067  */
1068 
1069 int unregister_netdevice_notifier(struct notifier_block *nb)
1070 {
1071 	int err;
1072 
1073 	rtnl_lock();
1074 	err = raw_notifier_chain_unregister(&netdev_chain, nb);
1075 	rtnl_unlock();
1076 	return err;
1077 }
1078 
1079 /**
1080  *	call_netdevice_notifiers - call all network notifier blocks
1081  *      @val: value passed unmodified to notifier function
1082  *      @v:   pointer passed unmodified to notifier function
1083  *
1084  *	Call all network notifier blocks.  Parameters and return value
1085  *	are as for raw_notifier_call_chain().
1086  */
1087 
1088 int call_netdevice_notifiers(unsigned long val, void *v)
1089 {
1090 	return raw_notifier_call_chain(&netdev_chain, val, v);
1091 }
1092 
1093 /* When > 0 there are consumers of rx skb time stamps */
1094 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1095 
1096 void net_enable_timestamp(void)
1097 {
1098 	atomic_inc(&netstamp_needed);
1099 }
1100 
1101 void net_disable_timestamp(void)
1102 {
1103 	atomic_dec(&netstamp_needed);
1104 }
1105 
1106 static inline void net_timestamp(struct sk_buff *skb)
1107 {
1108 	if (atomic_read(&netstamp_needed))
1109 		__net_timestamp(skb);
1110 	else
1111 		skb->tstamp.tv64 = 0;
1112 }
1113 
1114 /*
1115  *	Support routine. Sends outgoing frames to any network
1116  *	taps currently in use.
1117  */
1118 
1119 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1120 {
1121 	struct packet_type *ptype;
1122 
1123 	net_timestamp(skb);
1124 
1125 	rcu_read_lock();
1126 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1127 		/* Never send packets back to the socket
1128 		 * they originated from - MvS (miquels@drinkel.ow.org)
1129 		 */
1130 		if ((ptype->dev == dev || !ptype->dev) &&
1131 		    (ptype->af_packet_priv == NULL ||
1132 		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
1133 			struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1134 			if (!skb2)
1135 				break;
1136 
1137 			/* skb->nh should be correctly
1138 			   set by sender, so that the second statement is
1139 			   just protection against buggy protocols.
1140 			 */
1141 			skb_reset_mac_header(skb2);
1142 
1143 			if (skb_network_header(skb2) < skb2->data ||
1144 			    skb2->network_header > skb2->tail) {
1145 				if (net_ratelimit())
1146 					printk(KERN_CRIT "protocol %04x is "
1147 					       "buggy, dev %s\n",
1148 					       skb2->protocol, dev->name);
1149 				skb_reset_network_header(skb2);
1150 			}
1151 
1152 			skb2->transport_header = skb2->network_header;
1153 			skb2->pkt_type = PACKET_OUTGOING;
1154 			ptype->func(skb2, skb->dev, ptype, skb->dev);
1155 		}
1156 	}
1157 	rcu_read_unlock();
1158 }
1159 
1160 
1161 void __netif_schedule(struct net_device *dev)
1162 {
1163 	if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
1164 		unsigned long flags;
1165 		struct softnet_data *sd;
1166 
1167 		local_irq_save(flags);
1168 		sd = &__get_cpu_var(softnet_data);
1169 		dev->next_sched = sd->output_queue;
1170 		sd->output_queue = dev;
1171 		raise_softirq_irqoff(NET_TX_SOFTIRQ);
1172 		local_irq_restore(flags);
1173 	}
1174 }
1175 EXPORT_SYMBOL(__netif_schedule);
1176 
1177 void __netif_rx_schedule(struct net_device *dev)
1178 {
1179 	unsigned long flags;
1180 
1181 	local_irq_save(flags);
1182 	dev_hold(dev);
1183 	list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
1184 	if (dev->quota < 0)
1185 		dev->quota += dev->weight;
1186 	else
1187 		dev->quota = dev->weight;
1188 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
1189 	local_irq_restore(flags);
1190 }
1191 EXPORT_SYMBOL(__netif_rx_schedule);
1192 
1193 void dev_kfree_skb_any(struct sk_buff *skb)
1194 {
1195 	if (in_irq() || irqs_disabled())
1196 		dev_kfree_skb_irq(skb);
1197 	else
1198 		dev_kfree_skb(skb);
1199 }
1200 EXPORT_SYMBOL(dev_kfree_skb_any);
1201 
1202 
1203 /* Hot-plugging. */
1204 void netif_device_detach(struct net_device *dev)
1205 {
1206 	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1207 	    netif_running(dev)) {
1208 		netif_stop_queue(dev);
1209 	}
1210 }
1211 EXPORT_SYMBOL(netif_device_detach);
1212 
1213 void netif_device_attach(struct net_device *dev)
1214 {
1215 	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1216 	    netif_running(dev)) {
1217 		netif_wake_queue(dev);
1218 		__netdev_watchdog_up(dev);
1219 	}
1220 }
1221 EXPORT_SYMBOL(netif_device_attach);
1222 
1223 
1224 /*
1225  * Invalidate hardware checksum when packet is to be mangled, and
1226  * complete checksum manually on outgoing path.
1227  */
1228 int skb_checksum_help(struct sk_buff *skb)
1229 {
1230 	__wsum csum;
1231 	int ret = 0, offset;
1232 
1233 	if (skb->ip_summed == CHECKSUM_COMPLETE)
1234 		goto out_set_summed;
1235 
1236 	if (unlikely(skb_shinfo(skb)->gso_size)) {
1237 		/* Let GSO fix up the checksum. */
1238 		goto out_set_summed;
1239 	}
1240 
1241 	if (skb_cloned(skb)) {
1242 		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1243 		if (ret)
1244 			goto out;
1245 	}
1246 
1247 	offset = skb->csum_start - skb_headroom(skb);
1248 	BUG_ON(offset > (int)skb->len);
1249 	csum = skb_checksum(skb, offset, skb->len-offset, 0);
1250 
1251 	offset = skb_headlen(skb) - offset;
1252 	BUG_ON(offset <= 0);
1253 	BUG_ON(skb->csum_offset + 2 > offset);
1254 
1255 	*(__sum16 *)(skb->head + skb->csum_start + skb->csum_offset) =
1256 		csum_fold(csum);
1257 out_set_summed:
1258 	skb->ip_summed = CHECKSUM_NONE;
1259 out:
1260 	return ret;
1261 }
1262 
1263 /**
1264  *	skb_gso_segment - Perform segmentation on skb.
1265  *	@skb: buffer to segment
1266  *	@features: features for the output path (see dev->features)
1267  *
1268  *	This function segments the given skb and returns a list of segments.
1269  *
1270  *	It may return NULL if the skb requires no segmentation.  This is
1271  *	only possible when GSO is used for verifying header integrity.
1272  */
1273 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1274 {
1275 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1276 	struct packet_type *ptype;
1277 	__be16 type = skb->protocol;
1278 	int err;
1279 
1280 	BUG_ON(skb_shinfo(skb)->frag_list);
1281 
1282 	skb_reset_mac_header(skb);
1283 	skb->mac_len = skb->network_header - skb->mac_header;
1284 	__skb_pull(skb, skb->mac_len);
1285 
1286 	if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
1287 		if (skb_header_cloned(skb) &&
1288 		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1289 			return ERR_PTR(err);
1290 	}
1291 
1292 	rcu_read_lock();
1293 	list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
1294 		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1295 			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1296 				err = ptype->gso_send_check(skb);
1297 				segs = ERR_PTR(err);
1298 				if (err || skb_gso_ok(skb, features))
1299 					break;
1300 				__skb_push(skb, (skb->data -
1301 						 skb_network_header(skb)));
1302 			}
1303 			segs = ptype->gso_segment(skb, features);
1304 			break;
1305 		}
1306 	}
1307 	rcu_read_unlock();
1308 
1309 	__skb_push(skb, skb->data - skb_mac_header(skb));
1310 
1311 	return segs;
1312 }
1313 
1314 EXPORT_SYMBOL(skb_gso_segment);
1315 
1316 /* Take action when hardware reception checksum errors are detected. */
1317 #ifdef CONFIG_BUG
1318 void netdev_rx_csum_fault(struct net_device *dev)
1319 {
1320 	if (net_ratelimit()) {
1321 		printk(KERN_ERR "%s: hw csum failure.\n",
1322 			dev ? dev->name : "<unknown>");
1323 		dump_stack();
1324 	}
1325 }
1326 EXPORT_SYMBOL(netdev_rx_csum_fault);
1327 #endif
1328 
1329 /* Actually, we should eliminate this check as soon as we know, that:
1330  * 1. IOMMU is present and allows to map all the memory.
1331  * 2. No high memory really exists on this machine.
1332  */
1333 
1334 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1335 {
1336 #ifdef CONFIG_HIGHMEM
1337 	int i;
1338 
1339 	if (dev->features & NETIF_F_HIGHDMA)
1340 		return 0;
1341 
1342 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1343 		if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1344 			return 1;
1345 
1346 #endif
1347 	return 0;
1348 }
1349 
1350 struct dev_gso_cb {
1351 	void (*destructor)(struct sk_buff *skb);
1352 };
1353 
1354 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1355 
1356 static void dev_gso_skb_destructor(struct sk_buff *skb)
1357 {
1358 	struct dev_gso_cb *cb;
1359 
1360 	do {
1361 		struct sk_buff *nskb = skb->next;
1362 
1363 		skb->next = nskb->next;
1364 		nskb->next = NULL;
1365 		kfree_skb(nskb);
1366 	} while (skb->next);
1367 
1368 	cb = DEV_GSO_CB(skb);
1369 	if (cb->destructor)
1370 		cb->destructor(skb);
1371 }
1372 
1373 /**
1374  *	dev_gso_segment - Perform emulated hardware segmentation on skb.
1375  *	@skb: buffer to segment
1376  *
1377  *	This function segments the given skb and stores the list of segments
1378  *	in skb->next.
1379  */
1380 static int dev_gso_segment(struct sk_buff *skb)
1381 {
1382 	struct net_device *dev = skb->dev;
1383 	struct sk_buff *segs;
1384 	int features = dev->features & ~(illegal_highdma(dev, skb) ?
1385 					 NETIF_F_SG : 0);
1386 
1387 	segs = skb_gso_segment(skb, features);
1388 
1389 	/* Verifying header integrity only. */
1390 	if (!segs)
1391 		return 0;
1392 
1393 	if (unlikely(IS_ERR(segs)))
1394 		return PTR_ERR(segs);
1395 
1396 	skb->next = segs;
1397 	DEV_GSO_CB(skb)->destructor = skb->destructor;
1398 	skb->destructor = dev_gso_skb_destructor;
1399 
1400 	return 0;
1401 }
1402 
1403 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
1404 {
1405 	if (likely(!skb->next)) {
1406 		if (!list_empty(&ptype_all))
1407 			dev_queue_xmit_nit(skb, dev);
1408 
1409 		if (netif_needs_gso(dev, skb)) {
1410 			if (unlikely(dev_gso_segment(skb)))
1411 				goto out_kfree_skb;
1412 			if (skb->next)
1413 				goto gso;
1414 		}
1415 
1416 		return dev->hard_start_xmit(skb, dev);
1417 	}
1418 
1419 gso:
1420 	do {
1421 		struct sk_buff *nskb = skb->next;
1422 		int rc;
1423 
1424 		skb->next = nskb->next;
1425 		nskb->next = NULL;
1426 		rc = dev->hard_start_xmit(nskb, dev);
1427 		if (unlikely(rc)) {
1428 			nskb->next = skb->next;
1429 			skb->next = nskb;
1430 			return rc;
1431 		}
1432 		if (unlikely(netif_queue_stopped(dev) && skb->next))
1433 			return NETDEV_TX_BUSY;
1434 	} while (skb->next);
1435 
1436 	skb->destructor = DEV_GSO_CB(skb)->destructor;
1437 
1438 out_kfree_skb:
1439 	kfree_skb(skb);
1440 	return 0;
1441 }
1442 
1443 #define HARD_TX_LOCK(dev, cpu) {			\
1444 	if ((dev->features & NETIF_F_LLTX) == 0) {	\
1445 		netif_tx_lock(dev);			\
1446 	}						\
1447 }
1448 
1449 #define HARD_TX_UNLOCK(dev) {				\
1450 	if ((dev->features & NETIF_F_LLTX) == 0) {	\
1451 		netif_tx_unlock(dev);			\
1452 	}						\
1453 }
1454 
1455 /**
1456  *	dev_queue_xmit - transmit a buffer
1457  *	@skb: buffer to transmit
1458  *
1459  *	Queue a buffer for transmission to a network device. The caller must
1460  *	have set the device and priority and built the buffer before calling
1461  *	this function. The function can be called from an interrupt.
1462  *
1463  *	A negative errno code is returned on a failure. A success does not
1464  *	guarantee the frame will be transmitted as it may be dropped due
1465  *	to congestion or traffic shaping.
1466  *
1467  * -----------------------------------------------------------------------------------
1468  *      I notice this method can also return errors from the queue disciplines,
1469  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1470  *      be positive.
1471  *
1472  *      Regardless of the return value, the skb is consumed, so it is currently
1473  *      difficult to retry a send to this method.  (You can bump the ref count
1474  *      before sending to hold a reference for retry if you are careful.)
1475  *
1476  *      When calling this method, interrupts MUST be enabled.  This is because
1477  *      the BH enable code must have IRQs enabled so that it will not deadlock.
1478  *          --BLG
1479  */
1480 
1481 int dev_queue_xmit(struct sk_buff *skb)
1482 {
1483 	struct net_device *dev = skb->dev;
1484 	struct Qdisc *q;
1485 	int rc = -ENOMEM;
1486 
1487 	/* GSO will handle the following emulations directly. */
1488 	if (netif_needs_gso(dev, skb))
1489 		goto gso;
1490 
1491 	if (skb_shinfo(skb)->frag_list &&
1492 	    !(dev->features & NETIF_F_FRAGLIST) &&
1493 	    __skb_linearize(skb))
1494 		goto out_kfree_skb;
1495 
1496 	/* Fragmented skb is linearized if device does not support SG,
1497 	 * or if at least one of fragments is in highmem and device
1498 	 * does not support DMA from it.
1499 	 */
1500 	if (skb_shinfo(skb)->nr_frags &&
1501 	    (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1502 	    __skb_linearize(skb))
1503 		goto out_kfree_skb;
1504 
1505 	/* If packet is not checksummed and device does not support
1506 	 * checksumming for this protocol, complete checksumming here.
1507 	 */
1508 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
1509 		skb_set_transport_header(skb, skb->csum_start -
1510 					      skb_headroom(skb));
1511 
1512 		if (!(dev->features & NETIF_F_GEN_CSUM) &&
1513 		    (!(dev->features & NETIF_F_IP_CSUM) ||
1514 		     skb->protocol != htons(ETH_P_IP)))
1515 			if (skb_checksum_help(skb))
1516 				goto out_kfree_skb;
1517 	}
1518 
1519 gso:
1520 	spin_lock_prefetch(&dev->queue_lock);
1521 
1522 	/* Disable soft irqs for various locks below. Also
1523 	 * stops preemption for RCU.
1524 	 */
1525 	rcu_read_lock_bh();
1526 
1527 	/* Updates of qdisc are serialized by queue_lock.
1528 	 * The struct Qdisc which is pointed to by qdisc is now a
1529 	 * rcu structure - it may be accessed without acquiring
1530 	 * a lock (but the structure may be stale.) The freeing of the
1531 	 * qdisc will be deferred until it's known that there are no
1532 	 * more references to it.
1533 	 *
1534 	 * If the qdisc has an enqueue function, we still need to
1535 	 * hold the queue_lock before calling it, since queue_lock
1536 	 * also serializes access to the device queue.
1537 	 */
1538 
1539 	q = rcu_dereference(dev->qdisc);
1540 #ifdef CONFIG_NET_CLS_ACT
1541 	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1542 #endif
1543 	if (q->enqueue) {
1544 		/* Grab device queue */
1545 		spin_lock(&dev->queue_lock);
1546 		q = dev->qdisc;
1547 		if (q->enqueue) {
1548 			rc = q->enqueue(skb, q);
1549 			qdisc_run(dev);
1550 			spin_unlock(&dev->queue_lock);
1551 
1552 			rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1553 			goto out;
1554 		}
1555 		spin_unlock(&dev->queue_lock);
1556 	}
1557 
1558 	/* The device has no queue. Common case for software devices:
1559 	   loopback, all the sorts of tunnels...
1560 
1561 	   Really, it is unlikely that netif_tx_lock protection is necessary
1562 	   here.  (f.e. loopback and IP tunnels are clean ignoring statistics
1563 	   counters.)
1564 	   However, it is possible, that they rely on protection
1565 	   made by us here.
1566 
1567 	   Check this and shot the lock. It is not prone from deadlocks.
1568 	   Either shot noqueue qdisc, it is even simpler 8)
1569 	 */
1570 	if (dev->flags & IFF_UP) {
1571 		int cpu = smp_processor_id(); /* ok because BHs are off */
1572 
1573 		if (dev->xmit_lock_owner != cpu) {
1574 
1575 			HARD_TX_LOCK(dev, cpu);
1576 
1577 			if (!netif_queue_stopped(dev)) {
1578 				rc = 0;
1579 				if (!dev_hard_start_xmit(skb, dev)) {
1580 					HARD_TX_UNLOCK(dev);
1581 					goto out;
1582 				}
1583 			}
1584 			HARD_TX_UNLOCK(dev);
1585 			if (net_ratelimit())
1586 				printk(KERN_CRIT "Virtual device %s asks to "
1587 				       "queue packet!\n", dev->name);
1588 		} else {
1589 			/* Recursion is detected! It is possible,
1590 			 * unfortunately */
1591 			if (net_ratelimit())
1592 				printk(KERN_CRIT "Dead loop on virtual device "
1593 				       "%s, fix it urgently!\n", dev->name);
1594 		}
1595 	}
1596 
1597 	rc = -ENETDOWN;
1598 	rcu_read_unlock_bh();
1599 
1600 out_kfree_skb:
1601 	kfree_skb(skb);
1602 	return rc;
1603 out:
1604 	rcu_read_unlock_bh();
1605 	return rc;
1606 }
1607 
1608 
1609 /*=======================================================================
1610 			Receiver routines
1611   =======================================================================*/
1612 
1613 int netdev_max_backlog __read_mostly = 1000;
1614 int netdev_budget __read_mostly = 300;
1615 int weight_p __read_mostly = 64;            /* old backlog weight */
1616 
1617 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1618 
1619 
1620 /**
1621  *	netif_rx	-	post buffer to the network code
1622  *	@skb: buffer to post
1623  *
1624  *	This function receives a packet from a device driver and queues it for
1625  *	the upper (protocol) levels to process.  It always succeeds. The buffer
1626  *	may be dropped during processing for congestion control or by the
1627  *	protocol layers.
1628  *
1629  *	return values:
1630  *	NET_RX_SUCCESS	(no congestion)
1631  *	NET_RX_CN_LOW   (low congestion)
1632  *	NET_RX_CN_MOD   (moderate congestion)
1633  *	NET_RX_CN_HIGH  (high congestion)
1634  *	NET_RX_DROP     (packet was dropped)
1635  *
1636  */
1637 
1638 int netif_rx(struct sk_buff *skb)
1639 {
1640 	struct softnet_data *queue;
1641 	unsigned long flags;
1642 
1643 	/* if netpoll wants it, pretend we never saw it */
1644 	if (netpoll_rx(skb))
1645 		return NET_RX_DROP;
1646 
1647 	if (!skb->tstamp.tv64)
1648 		net_timestamp(skb);
1649 
1650 	/*
1651 	 * The code is rearranged so that the path is the most
1652 	 * short when CPU is congested, but is still operating.
1653 	 */
1654 	local_irq_save(flags);
1655 	queue = &__get_cpu_var(softnet_data);
1656 
1657 	__get_cpu_var(netdev_rx_stat).total++;
1658 	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1659 		if (queue->input_pkt_queue.qlen) {
1660 enqueue:
1661 			dev_hold(skb->dev);
1662 			__skb_queue_tail(&queue->input_pkt_queue, skb);
1663 			local_irq_restore(flags);
1664 			return NET_RX_SUCCESS;
1665 		}
1666 
1667 		netif_rx_schedule(&queue->backlog_dev);
1668 		goto enqueue;
1669 	}
1670 
1671 	__get_cpu_var(netdev_rx_stat).dropped++;
1672 	local_irq_restore(flags);
1673 
1674 	kfree_skb(skb);
1675 	return NET_RX_DROP;
1676 }
1677 
1678 int netif_rx_ni(struct sk_buff *skb)
1679 {
1680 	int err;
1681 
1682 	preempt_disable();
1683 	err = netif_rx(skb);
1684 	if (local_softirq_pending())
1685 		do_softirq();
1686 	preempt_enable();
1687 
1688 	return err;
1689 }
1690 
1691 EXPORT_SYMBOL(netif_rx_ni);
1692 
1693 static inline struct net_device *skb_bond(struct sk_buff *skb)
1694 {
1695 	struct net_device *dev = skb->dev;
1696 
1697 	if (dev->master) {
1698 		if (skb_bond_should_drop(skb)) {
1699 			kfree_skb(skb);
1700 			return NULL;
1701 		}
1702 		skb->dev = dev->master;
1703 	}
1704 
1705 	return dev;
1706 }
1707 
1708 static void net_tx_action(struct softirq_action *h)
1709 {
1710 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
1711 
1712 	if (sd->completion_queue) {
1713 		struct sk_buff *clist;
1714 
1715 		local_irq_disable();
1716 		clist = sd->completion_queue;
1717 		sd->completion_queue = NULL;
1718 		local_irq_enable();
1719 
1720 		while (clist) {
1721 			struct sk_buff *skb = clist;
1722 			clist = clist->next;
1723 
1724 			BUG_TRAP(!atomic_read(&skb->users));
1725 			__kfree_skb(skb);
1726 		}
1727 	}
1728 
1729 	if (sd->output_queue) {
1730 		struct net_device *head;
1731 
1732 		local_irq_disable();
1733 		head = sd->output_queue;
1734 		sd->output_queue = NULL;
1735 		local_irq_enable();
1736 
1737 		while (head) {
1738 			struct net_device *dev = head;
1739 			head = head->next_sched;
1740 
1741 			smp_mb__before_clear_bit();
1742 			clear_bit(__LINK_STATE_SCHED, &dev->state);
1743 
1744 			if (spin_trylock(&dev->queue_lock)) {
1745 				qdisc_run(dev);
1746 				spin_unlock(&dev->queue_lock);
1747 			} else {
1748 				netif_schedule(dev);
1749 			}
1750 		}
1751 	}
1752 }
1753 
1754 static inline int deliver_skb(struct sk_buff *skb,
1755 			      struct packet_type *pt_prev,
1756 			      struct net_device *orig_dev)
1757 {
1758 	atomic_inc(&skb->users);
1759 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1760 }
1761 
1762 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
1763 /* These hooks defined here for ATM */
1764 struct net_bridge;
1765 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1766 						unsigned char *addr);
1767 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
1768 
1769 /*
1770  * If bridge module is loaded call bridging hook.
1771  *  returns NULL if packet was consumed.
1772  */
1773 struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
1774 					struct sk_buff *skb) __read_mostly;
1775 static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
1776 					    struct packet_type **pt_prev, int *ret,
1777 					    struct net_device *orig_dev)
1778 {
1779 	struct net_bridge_port *port;
1780 
1781 	if (skb->pkt_type == PACKET_LOOPBACK ||
1782 	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
1783 		return skb;
1784 
1785 	if (*pt_prev) {
1786 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
1787 		*pt_prev = NULL;
1788 	}
1789 
1790 	return br_handle_frame_hook(port, skb);
1791 }
1792 #else
1793 #define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
1794 #endif
1795 
1796 #ifdef CONFIG_NET_CLS_ACT
1797 /* TODO: Maybe we should just force sch_ingress to be compiled in
1798  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1799  * a compare and 2 stores extra right now if we dont have it on
1800  * but have CONFIG_NET_CLS_ACT
1801  * NOTE: This doesnt stop any functionality; if you dont have
1802  * the ingress scheduler, you just cant add policies on ingress.
1803  *
1804  */
1805 static int ing_filter(struct sk_buff *skb)
1806 {
1807 	struct Qdisc *q;
1808 	struct net_device *dev = skb->dev;
1809 	int result = TC_ACT_OK;
1810 
1811 	if (dev->qdisc_ingress) {
1812 		__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
1813 		if (MAX_RED_LOOP < ttl++) {
1814 			printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n",
1815 				skb->iif, skb->dev->ifindex);
1816 			return TC_ACT_SHOT;
1817 		}
1818 
1819 		skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
1820 
1821 		skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
1822 
1823 		spin_lock(&dev->ingress_lock);
1824 		if ((q = dev->qdisc_ingress) != NULL)
1825 			result = q->enqueue(skb, q);
1826 		spin_unlock(&dev->ingress_lock);
1827 
1828 	}
1829 
1830 	return result;
1831 }
1832 #endif
1833 
1834 int netif_receive_skb(struct sk_buff *skb)
1835 {
1836 	struct packet_type *ptype, *pt_prev;
1837 	struct net_device *orig_dev;
1838 	int ret = NET_RX_DROP;
1839 	__be16 type;
1840 
1841 	/* if we've gotten here through NAPI, check netpoll */
1842 	if (skb->dev->poll && netpoll_rx(skb))
1843 		return NET_RX_DROP;
1844 
1845 	if (!skb->tstamp.tv64)
1846 		net_timestamp(skb);
1847 
1848 	if (!skb->iif)
1849 		skb->iif = skb->dev->ifindex;
1850 
1851 	orig_dev = skb_bond(skb);
1852 
1853 	if (!orig_dev)
1854 		return NET_RX_DROP;
1855 
1856 	__get_cpu_var(netdev_rx_stat).total++;
1857 
1858 	skb_reset_network_header(skb);
1859 	skb_reset_transport_header(skb);
1860 	skb->mac_len = skb->network_header - skb->mac_header;
1861 
1862 	pt_prev = NULL;
1863 
1864 	rcu_read_lock();
1865 
1866 #ifdef CONFIG_NET_CLS_ACT
1867 	if (skb->tc_verd & TC_NCLS) {
1868 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
1869 		goto ncls;
1870 	}
1871 #endif
1872 
1873 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1874 		if (!ptype->dev || ptype->dev == skb->dev) {
1875 			if (pt_prev)
1876 				ret = deliver_skb(skb, pt_prev, orig_dev);
1877 			pt_prev = ptype;
1878 		}
1879 	}
1880 
1881 #ifdef CONFIG_NET_CLS_ACT
1882 	if (pt_prev) {
1883 		ret = deliver_skb(skb, pt_prev, orig_dev);
1884 		pt_prev = NULL; /* noone else should process this after*/
1885 	} else {
1886 		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
1887 	}
1888 
1889 	ret = ing_filter(skb);
1890 
1891 	if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
1892 		kfree_skb(skb);
1893 		goto out;
1894 	}
1895 
1896 	skb->tc_verd = 0;
1897 ncls:
1898 #endif
1899 
1900 	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
1901 	if (!skb)
1902 		goto out;
1903 
1904 	type = skb->protocol;
1905 	list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
1906 		if (ptype->type == type &&
1907 		    (!ptype->dev || ptype->dev == skb->dev)) {
1908 			if (pt_prev)
1909 				ret = deliver_skb(skb, pt_prev, orig_dev);
1910 			pt_prev = ptype;
1911 		}
1912 	}
1913 
1914 	if (pt_prev) {
1915 		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1916 	} else {
1917 		kfree_skb(skb);
1918 		/* Jamal, now you will not able to escape explaining
1919 		 * me how you were going to use this. :-)
1920 		 */
1921 		ret = NET_RX_DROP;
1922 	}
1923 
1924 out:
1925 	rcu_read_unlock();
1926 	return ret;
1927 }
1928 
1929 static int process_backlog(struct net_device *backlog_dev, int *budget)
1930 {
1931 	int work = 0;
1932 	int quota = min(backlog_dev->quota, *budget);
1933 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
1934 	unsigned long start_time = jiffies;
1935 
1936 	backlog_dev->weight = weight_p;
1937 	for (;;) {
1938 		struct sk_buff *skb;
1939 		struct net_device *dev;
1940 
1941 		local_irq_disable();
1942 		skb = __skb_dequeue(&queue->input_pkt_queue);
1943 		if (!skb)
1944 			goto job_done;
1945 		local_irq_enable();
1946 
1947 		dev = skb->dev;
1948 
1949 		netif_receive_skb(skb);
1950 
1951 		dev_put(dev);
1952 
1953 		work++;
1954 
1955 		if (work >= quota || jiffies - start_time > 1)
1956 			break;
1957 
1958 	}
1959 
1960 	backlog_dev->quota -= work;
1961 	*budget -= work;
1962 	return -1;
1963 
1964 job_done:
1965 	backlog_dev->quota -= work;
1966 	*budget -= work;
1967 
1968 	list_del(&backlog_dev->poll_list);
1969 	smp_mb__before_clear_bit();
1970 	netif_poll_enable(backlog_dev);
1971 
1972 	local_irq_enable();
1973 	return 0;
1974 }
1975 
1976 static void net_rx_action(struct softirq_action *h)
1977 {
1978 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
1979 	unsigned long start_time = jiffies;
1980 	int budget = netdev_budget;
1981 	void *have;
1982 
1983 	local_irq_disable();
1984 
1985 	while (!list_empty(&queue->poll_list)) {
1986 		struct net_device *dev;
1987 
1988 		if (budget <= 0 || jiffies - start_time > 1)
1989 			goto softnet_break;
1990 
1991 		local_irq_enable();
1992 
1993 		dev = list_entry(queue->poll_list.next,
1994 				 struct net_device, poll_list);
1995 		have = netpoll_poll_lock(dev);
1996 
1997 		if (dev->quota <= 0 || dev->poll(dev, &budget)) {
1998 			netpoll_poll_unlock(have);
1999 			local_irq_disable();
2000 			list_move_tail(&dev->poll_list, &queue->poll_list);
2001 			if (dev->quota < 0)
2002 				dev->quota += dev->weight;
2003 			else
2004 				dev->quota = dev->weight;
2005 		} else {
2006 			netpoll_poll_unlock(have);
2007 			dev_put(dev);
2008 			local_irq_disable();
2009 		}
2010 	}
2011 out:
2012 #ifdef CONFIG_NET_DMA
2013 	/*
2014 	 * There may not be any more sk_buffs coming right now, so push
2015 	 * any pending DMA copies to hardware
2016 	 */
2017 	if (net_dma_client) {
2018 		struct dma_chan *chan;
2019 		rcu_read_lock();
2020 		list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node)
2021 			dma_async_memcpy_issue_pending(chan);
2022 		rcu_read_unlock();
2023 	}
2024 #endif
2025 	local_irq_enable();
2026 	return;
2027 
2028 softnet_break:
2029 	__get_cpu_var(netdev_rx_stat).time_squeeze++;
2030 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2031 	goto out;
2032 }
2033 
2034 static gifconf_func_t * gifconf_list [NPROTO];
2035 
2036 /**
2037  *	register_gifconf	-	register a SIOCGIF handler
2038  *	@family: Address family
2039  *	@gifconf: Function handler
2040  *
2041  *	Register protocol dependent address dumping routines. The handler
2042  *	that is passed must not be freed or reused until it has been replaced
2043  *	by another handler.
2044  */
2045 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2046 {
2047 	if (family >= NPROTO)
2048 		return -EINVAL;
2049 	gifconf_list[family] = gifconf;
2050 	return 0;
2051 }
2052 
2053 
2054 /*
2055  *	Map an interface index to its name (SIOCGIFNAME)
2056  */
2057 
2058 /*
2059  *	We need this ioctl for efficient implementation of the
2060  *	if_indextoname() function required by the IPv6 API.  Without
2061  *	it, we would have to search all the interfaces to find a
2062  *	match.  --pb
2063  */
2064 
2065 static int dev_ifname(struct ifreq __user *arg)
2066 {
2067 	struct net_device *dev;
2068 	struct ifreq ifr;
2069 
2070 	/*
2071 	 *	Fetch the caller's info block.
2072 	 */
2073 
2074 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2075 		return -EFAULT;
2076 
2077 	read_lock(&dev_base_lock);
2078 	dev = __dev_get_by_index(ifr.ifr_ifindex);
2079 	if (!dev) {
2080 		read_unlock(&dev_base_lock);
2081 		return -ENODEV;
2082 	}
2083 
2084 	strcpy(ifr.ifr_name, dev->name);
2085 	read_unlock(&dev_base_lock);
2086 
2087 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2088 		return -EFAULT;
2089 	return 0;
2090 }
2091 
2092 /*
2093  *	Perform a SIOCGIFCONF call. This structure will change
2094  *	size eventually, and there is nothing I can do about it.
2095  *	Thus we will need a 'compatibility mode'.
2096  */
2097 
2098 static int dev_ifconf(char __user *arg)
2099 {
2100 	struct ifconf ifc;
2101 	struct net_device *dev;
2102 	char __user *pos;
2103 	int len;
2104 	int total;
2105 	int i;
2106 
2107 	/*
2108 	 *	Fetch the caller's info block.
2109 	 */
2110 
2111 	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2112 		return -EFAULT;
2113 
2114 	pos = ifc.ifc_buf;
2115 	len = ifc.ifc_len;
2116 
2117 	/*
2118 	 *	Loop over the interfaces, and write an info block for each.
2119 	 */
2120 
2121 	total = 0;
2122 	for_each_netdev(dev) {
2123 		for (i = 0; i < NPROTO; i++) {
2124 			if (gifconf_list[i]) {
2125 				int done;
2126 				if (!pos)
2127 					done = gifconf_list[i](dev, NULL, 0);
2128 				else
2129 					done = gifconf_list[i](dev, pos + total,
2130 							       len - total);
2131 				if (done < 0)
2132 					return -EFAULT;
2133 				total += done;
2134 			}
2135 		}
2136 	}
2137 
2138 	/*
2139 	 *	All done.  Write the updated control block back to the caller.
2140 	 */
2141 	ifc.ifc_len = total;
2142 
2143 	/*
2144 	 * 	Both BSD and Solaris return 0 here, so we do too.
2145 	 */
2146 	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2147 }
2148 
2149 #ifdef CONFIG_PROC_FS
2150 /*
2151  *	This is invoked by the /proc filesystem handler to display a device
2152  *	in detail.
2153  */
2154 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2155 {
2156 	loff_t off;
2157 	struct net_device *dev;
2158 
2159 	read_lock(&dev_base_lock);
2160 	if (!*pos)
2161 		return SEQ_START_TOKEN;
2162 
2163 	off = 1;
2164 	for_each_netdev(dev)
2165 		if (off++ == *pos)
2166 			return dev;
2167 
2168 	return NULL;
2169 }
2170 
2171 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2172 {
2173 	++*pos;
2174 	return v == SEQ_START_TOKEN ?
2175 		first_net_device() : next_net_device((struct net_device *)v);
2176 }
2177 
2178 void dev_seq_stop(struct seq_file *seq, void *v)
2179 {
2180 	read_unlock(&dev_base_lock);
2181 }
2182 
2183 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2184 {
2185 	struct net_device_stats *stats = dev->get_stats(dev);
2186 
2187 	seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2188 		   "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2189 		   dev->name, stats->rx_bytes, stats->rx_packets,
2190 		   stats->rx_errors,
2191 		   stats->rx_dropped + stats->rx_missed_errors,
2192 		   stats->rx_fifo_errors,
2193 		   stats->rx_length_errors + stats->rx_over_errors +
2194 		    stats->rx_crc_errors + stats->rx_frame_errors,
2195 		   stats->rx_compressed, stats->multicast,
2196 		   stats->tx_bytes, stats->tx_packets,
2197 		   stats->tx_errors, stats->tx_dropped,
2198 		   stats->tx_fifo_errors, stats->collisions,
2199 		   stats->tx_carrier_errors +
2200 		    stats->tx_aborted_errors +
2201 		    stats->tx_window_errors +
2202 		    stats->tx_heartbeat_errors,
2203 		   stats->tx_compressed);
2204 }
2205 
2206 /*
2207  *	Called from the PROCfs module. This now uses the new arbitrary sized
2208  *	/proc/net interface to create /proc/net/dev
2209  */
2210 static int dev_seq_show(struct seq_file *seq, void *v)
2211 {
2212 	if (v == SEQ_START_TOKEN)
2213 		seq_puts(seq, "Inter-|   Receive                            "
2214 			      "                    |  Transmit\n"
2215 			      " face |bytes    packets errs drop fifo frame "
2216 			      "compressed multicast|bytes    packets errs "
2217 			      "drop fifo colls carrier compressed\n");
2218 	else
2219 		dev_seq_printf_stats(seq, v);
2220 	return 0;
2221 }
2222 
2223 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2224 {
2225 	struct netif_rx_stats *rc = NULL;
2226 
2227 	while (*pos < NR_CPUS)
2228 		if (cpu_online(*pos)) {
2229 			rc = &per_cpu(netdev_rx_stat, *pos);
2230 			break;
2231 		} else
2232 			++*pos;
2233 	return rc;
2234 }
2235 
2236 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2237 {
2238 	return softnet_get_online(pos);
2239 }
2240 
2241 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2242 {
2243 	++*pos;
2244 	return softnet_get_online(pos);
2245 }
2246 
2247 static void softnet_seq_stop(struct seq_file *seq, void *v)
2248 {
2249 }
2250 
2251 static int softnet_seq_show(struct seq_file *seq, void *v)
2252 {
2253 	struct netif_rx_stats *s = v;
2254 
2255 	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2256 		   s->total, s->dropped, s->time_squeeze, 0,
2257 		   0, 0, 0, 0, /* was fastroute */
2258 		   s->cpu_collision );
2259 	return 0;
2260 }
2261 
2262 static const struct seq_operations dev_seq_ops = {
2263 	.start = dev_seq_start,
2264 	.next  = dev_seq_next,
2265 	.stop  = dev_seq_stop,
2266 	.show  = dev_seq_show,
2267 };
2268 
2269 static int dev_seq_open(struct inode *inode, struct file *file)
2270 {
2271 	return seq_open(file, &dev_seq_ops);
2272 }
2273 
2274 static const struct file_operations dev_seq_fops = {
2275 	.owner	 = THIS_MODULE,
2276 	.open    = dev_seq_open,
2277 	.read    = seq_read,
2278 	.llseek  = seq_lseek,
2279 	.release = seq_release,
2280 };
2281 
2282 static const struct seq_operations softnet_seq_ops = {
2283 	.start = softnet_seq_start,
2284 	.next  = softnet_seq_next,
2285 	.stop  = softnet_seq_stop,
2286 	.show  = softnet_seq_show,
2287 };
2288 
2289 static int softnet_seq_open(struct inode *inode, struct file *file)
2290 {
2291 	return seq_open(file, &softnet_seq_ops);
2292 }
2293 
2294 static const struct file_operations softnet_seq_fops = {
2295 	.owner	 = THIS_MODULE,
2296 	.open    = softnet_seq_open,
2297 	.read    = seq_read,
2298 	.llseek  = seq_lseek,
2299 	.release = seq_release,
2300 };
2301 
2302 static void *ptype_get_idx(loff_t pos)
2303 {
2304 	struct packet_type *pt = NULL;
2305 	loff_t i = 0;
2306 	int t;
2307 
2308 	list_for_each_entry_rcu(pt, &ptype_all, list) {
2309 		if (i == pos)
2310 			return pt;
2311 		++i;
2312 	}
2313 
2314 	for (t = 0; t < 16; t++) {
2315 		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2316 			if (i == pos)
2317 				return pt;
2318 			++i;
2319 		}
2320 	}
2321 	return NULL;
2322 }
2323 
2324 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
2325 {
2326 	rcu_read_lock();
2327 	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
2328 }
2329 
2330 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2331 {
2332 	struct packet_type *pt;
2333 	struct list_head *nxt;
2334 	int hash;
2335 
2336 	++*pos;
2337 	if (v == SEQ_START_TOKEN)
2338 		return ptype_get_idx(0);
2339 
2340 	pt = v;
2341 	nxt = pt->list.next;
2342 	if (pt->type == htons(ETH_P_ALL)) {
2343 		if (nxt != &ptype_all)
2344 			goto found;
2345 		hash = 0;
2346 		nxt = ptype_base[0].next;
2347 	} else
2348 		hash = ntohs(pt->type) & 15;
2349 
2350 	while (nxt == &ptype_base[hash]) {
2351 		if (++hash >= 16)
2352 			return NULL;
2353 		nxt = ptype_base[hash].next;
2354 	}
2355 found:
2356 	return list_entry(nxt, struct packet_type, list);
2357 }
2358 
2359 static void ptype_seq_stop(struct seq_file *seq, void *v)
2360 {
2361 	rcu_read_unlock();
2362 }
2363 
2364 static void ptype_seq_decode(struct seq_file *seq, void *sym)
2365 {
2366 #ifdef CONFIG_KALLSYMS
2367 	unsigned long offset = 0, symsize;
2368 	const char *symname;
2369 	char *modname;
2370 	char namebuf[128];
2371 
2372 	symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset,
2373 				  &modname, namebuf);
2374 
2375 	if (symname) {
2376 		char *delim = ":";
2377 
2378 		if (!modname)
2379 			modname = delim = "";
2380 		seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim,
2381 			   symname, offset);
2382 		return;
2383 	}
2384 #endif
2385 
2386 	seq_printf(seq, "[%p]", sym);
2387 }
2388 
2389 static int ptype_seq_show(struct seq_file *seq, void *v)
2390 {
2391 	struct packet_type *pt = v;
2392 
2393 	if (v == SEQ_START_TOKEN)
2394 		seq_puts(seq, "Type Device      Function\n");
2395 	else {
2396 		if (pt->type == htons(ETH_P_ALL))
2397 			seq_puts(seq, "ALL ");
2398 		else
2399 			seq_printf(seq, "%04x", ntohs(pt->type));
2400 
2401 		seq_printf(seq, " %-8s ",
2402 			   pt->dev ? pt->dev->name : "");
2403 		ptype_seq_decode(seq,  pt->func);
2404 		seq_putc(seq, '\n');
2405 	}
2406 
2407 	return 0;
2408 }
2409 
2410 static const struct seq_operations ptype_seq_ops = {
2411 	.start = ptype_seq_start,
2412 	.next  = ptype_seq_next,
2413 	.stop  = ptype_seq_stop,
2414 	.show  = ptype_seq_show,
2415 };
2416 
2417 static int ptype_seq_open(struct inode *inode, struct file *file)
2418 {
2419 	return seq_open(file, &ptype_seq_ops);
2420 }
2421 
2422 static const struct file_operations ptype_seq_fops = {
2423 	.owner	 = THIS_MODULE,
2424 	.open    = ptype_seq_open,
2425 	.read    = seq_read,
2426 	.llseek  = seq_lseek,
2427 	.release = seq_release,
2428 };
2429 
2430 
2431 static int __init dev_proc_init(void)
2432 {
2433 	int rc = -ENOMEM;
2434 
2435 	if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
2436 		goto out;
2437 	if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
2438 		goto out_dev;
2439 	if (!proc_net_fops_create("ptype", S_IRUGO, &ptype_seq_fops))
2440 		goto out_dev2;
2441 
2442 	if (wext_proc_init())
2443 		goto out_softnet;
2444 	rc = 0;
2445 out:
2446 	return rc;
2447 out_softnet:
2448 	proc_net_remove("ptype");
2449 out_dev2:
2450 	proc_net_remove("softnet_stat");
2451 out_dev:
2452 	proc_net_remove("dev");
2453 	goto out;
2454 }
2455 #else
2456 #define dev_proc_init() 0
2457 #endif	/* CONFIG_PROC_FS */
2458 
2459 
2460 /**
2461  *	netdev_set_master	-	set up master/slave pair
2462  *	@slave: slave device
2463  *	@master: new master device
2464  *
2465  *	Changes the master device of the slave. Pass %NULL to break the
2466  *	bonding. The caller must hold the RTNL semaphore. On a failure
2467  *	a negative errno code is returned. On success the reference counts
2468  *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2469  *	function returns zero.
2470  */
2471 int netdev_set_master(struct net_device *slave, struct net_device *master)
2472 {
2473 	struct net_device *old = slave->master;
2474 
2475 	ASSERT_RTNL();
2476 
2477 	if (master) {
2478 		if (old)
2479 			return -EBUSY;
2480 		dev_hold(master);
2481 	}
2482 
2483 	slave->master = master;
2484 
2485 	synchronize_net();
2486 
2487 	if (old)
2488 		dev_put(old);
2489 
2490 	if (master)
2491 		slave->flags |= IFF_SLAVE;
2492 	else
2493 		slave->flags &= ~IFF_SLAVE;
2494 
2495 	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2496 	return 0;
2497 }
2498 
2499 /**
2500  *	dev_set_promiscuity	- update promiscuity count on a device
2501  *	@dev: device
2502  *	@inc: modifier
2503  *
2504  *	Add or remove promiscuity from a device. While the count in the device
2505  *	remains above zero the interface remains promiscuous. Once it hits zero
2506  *	the device reverts back to normal filtering operation. A negative inc
2507  *	value is used to drop promiscuity on the device.
2508  */
2509 void dev_set_promiscuity(struct net_device *dev, int inc)
2510 {
2511 	unsigned short old_flags = dev->flags;
2512 
2513 	if ((dev->promiscuity += inc) == 0)
2514 		dev->flags &= ~IFF_PROMISC;
2515 	else
2516 		dev->flags |= IFF_PROMISC;
2517 	if (dev->flags != old_flags) {
2518 		dev_mc_upload(dev);
2519 		printk(KERN_INFO "device %s %s promiscuous mode\n",
2520 		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2521 							       "left");
2522 		audit_log(current->audit_context, GFP_ATOMIC,
2523 			AUDIT_ANOM_PROMISCUOUS,
2524 			"dev=%s prom=%d old_prom=%d auid=%u",
2525 			dev->name, (dev->flags & IFF_PROMISC),
2526 			(old_flags & IFF_PROMISC),
2527 			audit_get_loginuid(current->audit_context));
2528 	}
2529 }
2530 
2531 /**
2532  *	dev_set_allmulti	- update allmulti count on a device
2533  *	@dev: device
2534  *	@inc: modifier
2535  *
2536  *	Add or remove reception of all multicast frames to a device. While the
2537  *	count in the device remains above zero the interface remains listening
2538  *	to all interfaces. Once it hits zero the device reverts back to normal
2539  *	filtering operation. A negative @inc value is used to drop the counter
2540  *	when releasing a resource needing all multicasts.
2541  */
2542 
2543 void dev_set_allmulti(struct net_device *dev, int inc)
2544 {
2545 	unsigned short old_flags = dev->flags;
2546 
2547 	dev->flags |= IFF_ALLMULTI;
2548 	if ((dev->allmulti += inc) == 0)
2549 		dev->flags &= ~IFF_ALLMULTI;
2550 	if (dev->flags ^ old_flags)
2551 		dev_mc_upload(dev);
2552 }
2553 
2554 unsigned dev_get_flags(const struct net_device *dev)
2555 {
2556 	unsigned flags;
2557 
2558 	flags = (dev->flags & ~(IFF_PROMISC |
2559 				IFF_ALLMULTI |
2560 				IFF_RUNNING |
2561 				IFF_LOWER_UP |
2562 				IFF_DORMANT)) |
2563 		(dev->gflags & (IFF_PROMISC |
2564 				IFF_ALLMULTI));
2565 
2566 	if (netif_running(dev)) {
2567 		if (netif_oper_up(dev))
2568 			flags |= IFF_RUNNING;
2569 		if (netif_carrier_ok(dev))
2570 			flags |= IFF_LOWER_UP;
2571 		if (netif_dormant(dev))
2572 			flags |= IFF_DORMANT;
2573 	}
2574 
2575 	return flags;
2576 }
2577 
2578 int dev_change_flags(struct net_device *dev, unsigned flags)
2579 {
2580 	int ret;
2581 	int old_flags = dev->flags;
2582 
2583 	/*
2584 	 *	Set the flags on our device.
2585 	 */
2586 
2587 	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
2588 			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
2589 			       IFF_AUTOMEDIA)) |
2590 		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
2591 				    IFF_ALLMULTI));
2592 
2593 	/*
2594 	 *	Load in the correct multicast list now the flags have changed.
2595 	 */
2596 
2597 	dev_mc_upload(dev);
2598 
2599 	/*
2600 	 *	Have we downed the interface. We handle IFF_UP ourselves
2601 	 *	according to user attempts to set it, rather than blindly
2602 	 *	setting it.
2603 	 */
2604 
2605 	ret = 0;
2606 	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
2607 		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
2608 
2609 		if (!ret)
2610 			dev_mc_upload(dev);
2611 	}
2612 
2613 	if (dev->flags & IFF_UP &&
2614 	    ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
2615 					  IFF_VOLATILE)))
2616 		raw_notifier_call_chain(&netdev_chain,
2617 				NETDEV_CHANGE, dev);
2618 
2619 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
2620 		int inc = (flags & IFF_PROMISC) ? +1 : -1;
2621 		dev->gflags ^= IFF_PROMISC;
2622 		dev_set_promiscuity(dev, inc);
2623 	}
2624 
2625 	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
2626 	   is important. Some (broken) drivers set IFF_PROMISC, when
2627 	   IFF_ALLMULTI is requested not asking us and not reporting.
2628 	 */
2629 	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
2630 		int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
2631 		dev->gflags ^= IFF_ALLMULTI;
2632 		dev_set_allmulti(dev, inc);
2633 	}
2634 
2635 	if (old_flags ^ dev->flags)
2636 		rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags);
2637 
2638 	return ret;
2639 }
2640 
2641 int dev_set_mtu(struct net_device *dev, int new_mtu)
2642 {
2643 	int err;
2644 
2645 	if (new_mtu == dev->mtu)
2646 		return 0;
2647 
2648 	/*	MTU must be positive.	 */
2649 	if (new_mtu < 0)
2650 		return -EINVAL;
2651 
2652 	if (!netif_device_present(dev))
2653 		return -ENODEV;
2654 
2655 	err = 0;
2656 	if (dev->change_mtu)
2657 		err = dev->change_mtu(dev, new_mtu);
2658 	else
2659 		dev->mtu = new_mtu;
2660 	if (!err && dev->flags & IFF_UP)
2661 		raw_notifier_call_chain(&netdev_chain,
2662 				NETDEV_CHANGEMTU, dev);
2663 	return err;
2664 }
2665 
2666 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
2667 {
2668 	int err;
2669 
2670 	if (!dev->set_mac_address)
2671 		return -EOPNOTSUPP;
2672 	if (sa->sa_family != dev->type)
2673 		return -EINVAL;
2674 	if (!netif_device_present(dev))
2675 		return -ENODEV;
2676 	err = dev->set_mac_address(dev, sa);
2677 	if (!err)
2678 		raw_notifier_call_chain(&netdev_chain,
2679 				NETDEV_CHANGEADDR, dev);
2680 	return err;
2681 }
2682 
2683 /*
2684  *	Perform the SIOCxIFxxx calls.
2685  */
2686 static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
2687 {
2688 	int err;
2689 	struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
2690 
2691 	if (!dev)
2692 		return -ENODEV;
2693 
2694 	switch (cmd) {
2695 		case SIOCGIFFLAGS:	/* Get interface flags */
2696 			ifr->ifr_flags = dev_get_flags(dev);
2697 			return 0;
2698 
2699 		case SIOCSIFFLAGS:	/* Set interface flags */
2700 			return dev_change_flags(dev, ifr->ifr_flags);
2701 
2702 		case SIOCGIFMETRIC:	/* Get the metric on the interface
2703 					   (currently unused) */
2704 			ifr->ifr_metric = 0;
2705 			return 0;
2706 
2707 		case SIOCSIFMETRIC:	/* Set the metric on the interface
2708 					   (currently unused) */
2709 			return -EOPNOTSUPP;
2710 
2711 		case SIOCGIFMTU:	/* Get the MTU of a device */
2712 			ifr->ifr_mtu = dev->mtu;
2713 			return 0;
2714 
2715 		case SIOCSIFMTU:	/* Set the MTU of a device */
2716 			return dev_set_mtu(dev, ifr->ifr_mtu);
2717 
2718 		case SIOCGIFHWADDR:
2719 			if (!dev->addr_len)
2720 				memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
2721 			else
2722 				memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
2723 				       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2724 			ifr->ifr_hwaddr.sa_family = dev->type;
2725 			return 0;
2726 
2727 		case SIOCSIFHWADDR:
2728 			return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
2729 
2730 		case SIOCSIFHWBROADCAST:
2731 			if (ifr->ifr_hwaddr.sa_family != dev->type)
2732 				return -EINVAL;
2733 			memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
2734 			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2735 			raw_notifier_call_chain(&netdev_chain,
2736 					    NETDEV_CHANGEADDR, dev);
2737 			return 0;
2738 
2739 		case SIOCGIFMAP:
2740 			ifr->ifr_map.mem_start = dev->mem_start;
2741 			ifr->ifr_map.mem_end   = dev->mem_end;
2742 			ifr->ifr_map.base_addr = dev->base_addr;
2743 			ifr->ifr_map.irq       = dev->irq;
2744 			ifr->ifr_map.dma       = dev->dma;
2745 			ifr->ifr_map.port      = dev->if_port;
2746 			return 0;
2747 
2748 		case SIOCSIFMAP:
2749 			if (dev->set_config) {
2750 				if (!netif_device_present(dev))
2751 					return -ENODEV;
2752 				return dev->set_config(dev, &ifr->ifr_map);
2753 			}
2754 			return -EOPNOTSUPP;
2755 
2756 		case SIOCADDMULTI:
2757 			if (!dev->set_multicast_list ||
2758 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2759 				return -EINVAL;
2760 			if (!netif_device_present(dev))
2761 				return -ENODEV;
2762 			return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
2763 					  dev->addr_len, 1);
2764 
2765 		case SIOCDELMULTI:
2766 			if (!dev->set_multicast_list ||
2767 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2768 				return -EINVAL;
2769 			if (!netif_device_present(dev))
2770 				return -ENODEV;
2771 			return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
2772 					     dev->addr_len, 1);
2773 
2774 		case SIOCGIFINDEX:
2775 			ifr->ifr_ifindex = dev->ifindex;
2776 			return 0;
2777 
2778 		case SIOCGIFTXQLEN:
2779 			ifr->ifr_qlen = dev->tx_queue_len;
2780 			return 0;
2781 
2782 		case SIOCSIFTXQLEN:
2783 			if (ifr->ifr_qlen < 0)
2784 				return -EINVAL;
2785 			dev->tx_queue_len = ifr->ifr_qlen;
2786 			return 0;
2787 
2788 		case SIOCSIFNAME:
2789 			ifr->ifr_newname[IFNAMSIZ-1] = '\0';
2790 			return dev_change_name(dev, ifr->ifr_newname);
2791 
2792 		/*
2793 		 *	Unknown or private ioctl
2794 		 */
2795 
2796 		default:
2797 			if ((cmd >= SIOCDEVPRIVATE &&
2798 			    cmd <= SIOCDEVPRIVATE + 15) ||
2799 			    cmd == SIOCBONDENSLAVE ||
2800 			    cmd == SIOCBONDRELEASE ||
2801 			    cmd == SIOCBONDSETHWADDR ||
2802 			    cmd == SIOCBONDSLAVEINFOQUERY ||
2803 			    cmd == SIOCBONDINFOQUERY ||
2804 			    cmd == SIOCBONDCHANGEACTIVE ||
2805 			    cmd == SIOCGMIIPHY ||
2806 			    cmd == SIOCGMIIREG ||
2807 			    cmd == SIOCSMIIREG ||
2808 			    cmd == SIOCBRADDIF ||
2809 			    cmd == SIOCBRDELIF ||
2810 			    cmd == SIOCWANDEV) {
2811 				err = -EOPNOTSUPP;
2812 				if (dev->do_ioctl) {
2813 					if (netif_device_present(dev))
2814 						err = dev->do_ioctl(dev, ifr,
2815 								    cmd);
2816 					else
2817 						err = -ENODEV;
2818 				}
2819 			} else
2820 				err = -EINVAL;
2821 
2822 	}
2823 	return err;
2824 }
2825 
2826 /*
2827  *	This function handles all "interface"-type I/O control requests. The actual
2828  *	'doing' part of this is dev_ifsioc above.
2829  */
2830 
2831 /**
2832  *	dev_ioctl	-	network device ioctl
2833  *	@cmd: command to issue
2834  *	@arg: pointer to a struct ifreq in user space
2835  *
2836  *	Issue ioctl functions to devices. This is normally called by the
2837  *	user space syscall interfaces but can sometimes be useful for
2838  *	other purposes. The return value is the return from the syscall if
2839  *	positive or a negative errno code on error.
2840  */
2841 
2842 int dev_ioctl(unsigned int cmd, void __user *arg)
2843 {
2844 	struct ifreq ifr;
2845 	int ret;
2846 	char *colon;
2847 
2848 	/* One special case: SIOCGIFCONF takes ifconf argument
2849 	   and requires shared lock, because it sleeps writing
2850 	   to user space.
2851 	 */
2852 
2853 	if (cmd == SIOCGIFCONF) {
2854 		rtnl_lock();
2855 		ret = dev_ifconf((char __user *) arg);
2856 		rtnl_unlock();
2857 		return ret;
2858 	}
2859 	if (cmd == SIOCGIFNAME)
2860 		return dev_ifname((struct ifreq __user *)arg);
2861 
2862 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2863 		return -EFAULT;
2864 
2865 	ifr.ifr_name[IFNAMSIZ-1] = 0;
2866 
2867 	colon = strchr(ifr.ifr_name, ':');
2868 	if (colon)
2869 		*colon = 0;
2870 
2871 	/*
2872 	 *	See which interface the caller is talking about.
2873 	 */
2874 
2875 	switch (cmd) {
2876 		/*
2877 		 *	These ioctl calls:
2878 		 *	- can be done by all.
2879 		 *	- atomic and do not require locking.
2880 		 *	- return a value
2881 		 */
2882 		case SIOCGIFFLAGS:
2883 		case SIOCGIFMETRIC:
2884 		case SIOCGIFMTU:
2885 		case SIOCGIFHWADDR:
2886 		case SIOCGIFSLAVE:
2887 		case SIOCGIFMAP:
2888 		case SIOCGIFINDEX:
2889 		case SIOCGIFTXQLEN:
2890 			dev_load(ifr.ifr_name);
2891 			read_lock(&dev_base_lock);
2892 			ret = dev_ifsioc(&ifr, cmd);
2893 			read_unlock(&dev_base_lock);
2894 			if (!ret) {
2895 				if (colon)
2896 					*colon = ':';
2897 				if (copy_to_user(arg, &ifr,
2898 						 sizeof(struct ifreq)))
2899 					ret = -EFAULT;
2900 			}
2901 			return ret;
2902 
2903 		case SIOCETHTOOL:
2904 			dev_load(ifr.ifr_name);
2905 			rtnl_lock();
2906 			ret = dev_ethtool(&ifr);
2907 			rtnl_unlock();
2908 			if (!ret) {
2909 				if (colon)
2910 					*colon = ':';
2911 				if (copy_to_user(arg, &ifr,
2912 						 sizeof(struct ifreq)))
2913 					ret = -EFAULT;
2914 			}
2915 			return ret;
2916 
2917 		/*
2918 		 *	These ioctl calls:
2919 		 *	- require superuser power.
2920 		 *	- require strict serialization.
2921 		 *	- return a value
2922 		 */
2923 		case SIOCGMIIPHY:
2924 		case SIOCGMIIREG:
2925 		case SIOCSIFNAME:
2926 			if (!capable(CAP_NET_ADMIN))
2927 				return -EPERM;
2928 			dev_load(ifr.ifr_name);
2929 			rtnl_lock();
2930 			ret = dev_ifsioc(&ifr, cmd);
2931 			rtnl_unlock();
2932 			if (!ret) {
2933 				if (colon)
2934 					*colon = ':';
2935 				if (copy_to_user(arg, &ifr,
2936 						 sizeof(struct ifreq)))
2937 					ret = -EFAULT;
2938 			}
2939 			return ret;
2940 
2941 		/*
2942 		 *	These ioctl calls:
2943 		 *	- require superuser power.
2944 		 *	- require strict serialization.
2945 		 *	- do not return a value
2946 		 */
2947 		case SIOCSIFFLAGS:
2948 		case SIOCSIFMETRIC:
2949 		case SIOCSIFMTU:
2950 		case SIOCSIFMAP:
2951 		case SIOCSIFHWADDR:
2952 		case SIOCSIFSLAVE:
2953 		case SIOCADDMULTI:
2954 		case SIOCDELMULTI:
2955 		case SIOCSIFHWBROADCAST:
2956 		case SIOCSIFTXQLEN:
2957 		case SIOCSMIIREG:
2958 		case SIOCBONDENSLAVE:
2959 		case SIOCBONDRELEASE:
2960 		case SIOCBONDSETHWADDR:
2961 		case SIOCBONDCHANGEACTIVE:
2962 		case SIOCBRADDIF:
2963 		case SIOCBRDELIF:
2964 			if (!capable(CAP_NET_ADMIN))
2965 				return -EPERM;
2966 			/* fall through */
2967 		case SIOCBONDSLAVEINFOQUERY:
2968 		case SIOCBONDINFOQUERY:
2969 			dev_load(ifr.ifr_name);
2970 			rtnl_lock();
2971 			ret = dev_ifsioc(&ifr, cmd);
2972 			rtnl_unlock();
2973 			return ret;
2974 
2975 		case SIOCGIFMEM:
2976 			/* Get the per device memory space. We can add this but
2977 			 * currently do not support it */
2978 		case SIOCSIFMEM:
2979 			/* Set the per device memory buffer space.
2980 			 * Not applicable in our case */
2981 		case SIOCSIFLINK:
2982 			return -EINVAL;
2983 
2984 		/*
2985 		 *	Unknown or private ioctl.
2986 		 */
2987 		default:
2988 			if (cmd == SIOCWANDEV ||
2989 			    (cmd >= SIOCDEVPRIVATE &&
2990 			     cmd <= SIOCDEVPRIVATE + 15)) {
2991 				dev_load(ifr.ifr_name);
2992 				rtnl_lock();
2993 				ret = dev_ifsioc(&ifr, cmd);
2994 				rtnl_unlock();
2995 				if (!ret && copy_to_user(arg, &ifr,
2996 							 sizeof(struct ifreq)))
2997 					ret = -EFAULT;
2998 				return ret;
2999 			}
3000 			/* Take care of Wireless Extensions */
3001 			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
3002 				return wext_handle_ioctl(&ifr, cmd, arg);
3003 			return -EINVAL;
3004 	}
3005 }
3006 
3007 
3008 /**
3009  *	dev_new_index	-	allocate an ifindex
3010  *
3011  *	Returns a suitable unique value for a new device interface
3012  *	number.  The caller must hold the rtnl semaphore or the
3013  *	dev_base_lock to be sure it remains unique.
3014  */
3015 static int dev_new_index(void)
3016 {
3017 	static int ifindex;
3018 	for (;;) {
3019 		if (++ifindex <= 0)
3020 			ifindex = 1;
3021 		if (!__dev_get_by_index(ifindex))
3022 			return ifindex;
3023 	}
3024 }
3025 
3026 static int dev_boot_phase = 1;
3027 
3028 /* Delayed registration/unregisteration */
3029 static DEFINE_SPINLOCK(net_todo_list_lock);
3030 static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
3031 
3032 static void net_set_todo(struct net_device *dev)
3033 {
3034 	spin_lock(&net_todo_list_lock);
3035 	list_add_tail(&dev->todo_list, &net_todo_list);
3036 	spin_unlock(&net_todo_list_lock);
3037 }
3038 
3039 /**
3040  *	register_netdevice	- register a network device
3041  *	@dev: device to register
3042  *
3043  *	Take a completed network device structure and add it to the kernel
3044  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3045  *	chain. 0 is returned on success. A negative errno code is returned
3046  *	on a failure to set up the device, or if the name is a duplicate.
3047  *
3048  *	Callers must hold the rtnl semaphore. You may want
3049  *	register_netdev() instead of this.
3050  *
3051  *	BUGS:
3052  *	The locking appears insufficient to guarantee two parallel registers
3053  *	will not get the same name.
3054  */
3055 
3056 int register_netdevice(struct net_device *dev)
3057 {
3058 	struct hlist_head *head;
3059 	struct hlist_node *p;
3060 	int ret;
3061 
3062 	BUG_ON(dev_boot_phase);
3063 	ASSERT_RTNL();
3064 
3065 	might_sleep();
3066 
3067 	/* When net_device's are persistent, this will be fatal. */
3068 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
3069 
3070 	spin_lock_init(&dev->queue_lock);
3071 	spin_lock_init(&dev->_xmit_lock);
3072 	netdev_set_lockdep_class(&dev->_xmit_lock, dev->type);
3073 	dev->xmit_lock_owner = -1;
3074 	spin_lock_init(&dev->ingress_lock);
3075 
3076 	dev->iflink = -1;
3077 
3078 	/* Init, if this function is available */
3079 	if (dev->init) {
3080 		ret = dev->init(dev);
3081 		if (ret) {
3082 			if (ret > 0)
3083 				ret = -EIO;
3084 			goto out;
3085 		}
3086 	}
3087 
3088 	if (!dev_valid_name(dev->name)) {
3089 		ret = -EINVAL;
3090 		goto out;
3091 	}
3092 
3093 	dev->ifindex = dev_new_index();
3094 	if (dev->iflink == -1)
3095 		dev->iflink = dev->ifindex;
3096 
3097 	/* Check for existence of name */
3098 	head = dev_name_hash(dev->name);
3099 	hlist_for_each(p, head) {
3100 		struct net_device *d
3101 			= hlist_entry(p, struct net_device, name_hlist);
3102 		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
3103 			ret = -EEXIST;
3104 			goto out;
3105 		}
3106 	}
3107 
3108 	/* Fix illegal SG+CSUM combinations. */
3109 	if ((dev->features & NETIF_F_SG) &&
3110 	    !(dev->features & NETIF_F_ALL_CSUM)) {
3111 		printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n",
3112 		       dev->name);
3113 		dev->features &= ~NETIF_F_SG;
3114 	}
3115 
3116 	/* TSO requires that SG is present as well. */
3117 	if ((dev->features & NETIF_F_TSO) &&
3118 	    !(dev->features & NETIF_F_SG)) {
3119 		printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n",
3120 		       dev->name);
3121 		dev->features &= ~NETIF_F_TSO;
3122 	}
3123 	if (dev->features & NETIF_F_UFO) {
3124 		if (!(dev->features & NETIF_F_HW_CSUM)) {
3125 			printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3126 					"NETIF_F_HW_CSUM feature.\n",
3127 							dev->name);
3128 			dev->features &= ~NETIF_F_UFO;
3129 		}
3130 		if (!(dev->features & NETIF_F_SG)) {
3131 			printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3132 					"NETIF_F_SG feature.\n",
3133 					dev->name);
3134 			dev->features &= ~NETIF_F_UFO;
3135 		}
3136 	}
3137 
3138 	/*
3139 	 *	nil rebuild_header routine,
3140 	 *	that should be never called and used as just bug trap.
3141 	 */
3142 
3143 	if (!dev->rebuild_header)
3144 		dev->rebuild_header = default_rebuild_header;
3145 
3146 	ret = netdev_register_sysfs(dev);
3147 	if (ret)
3148 		goto out;
3149 	dev->reg_state = NETREG_REGISTERED;
3150 
3151 	/*
3152 	 *	Default initial state at registry is that the
3153 	 *	device is present.
3154 	 */
3155 
3156 	set_bit(__LINK_STATE_PRESENT, &dev->state);
3157 
3158 	dev_init_scheduler(dev);
3159 	write_lock_bh(&dev_base_lock);
3160 	list_add_tail(&dev->dev_list, &dev_base_head);
3161 	hlist_add_head(&dev->name_hlist, head);
3162 	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
3163 	dev_hold(dev);
3164 	write_unlock_bh(&dev_base_lock);
3165 
3166 	/* Notify protocols, that a new device appeared. */
3167 	raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
3168 
3169 	ret = 0;
3170 
3171 out:
3172 	return ret;
3173 }
3174 
3175 /**
3176  *	register_netdev	- register a network device
3177  *	@dev: device to register
3178  *
3179  *	Take a completed network device structure and add it to the kernel
3180  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3181  *	chain. 0 is returned on success. A negative errno code is returned
3182  *	on a failure to set up the device, or if the name is a duplicate.
3183  *
3184  *	This is a wrapper around register_netdevice that takes the rtnl semaphore
3185  *	and expands the device name if you passed a format string to
3186  *	alloc_netdev.
3187  */
3188 int register_netdev(struct net_device *dev)
3189 {
3190 	int err;
3191 
3192 	rtnl_lock();
3193 
3194 	/*
3195 	 * If the name is a format string the caller wants us to do a
3196 	 * name allocation.
3197 	 */
3198 	if (strchr(dev->name, '%')) {
3199 		err = dev_alloc_name(dev, dev->name);
3200 		if (err < 0)
3201 			goto out;
3202 	}
3203 
3204 	err = register_netdevice(dev);
3205 out:
3206 	rtnl_unlock();
3207 	return err;
3208 }
3209 EXPORT_SYMBOL(register_netdev);
3210 
3211 /*
3212  * netdev_wait_allrefs - wait until all references are gone.
3213  *
3214  * This is called when unregistering network devices.
3215  *
3216  * Any protocol or device that holds a reference should register
3217  * for netdevice notification, and cleanup and put back the
3218  * reference if they receive an UNREGISTER event.
3219  * We can get stuck here if buggy protocols don't correctly
3220  * call dev_put.
3221  */
3222 static void netdev_wait_allrefs(struct net_device *dev)
3223 {
3224 	unsigned long rebroadcast_time, warning_time;
3225 
3226 	rebroadcast_time = warning_time = jiffies;
3227 	while (atomic_read(&dev->refcnt) != 0) {
3228 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
3229 			rtnl_lock();
3230 
3231 			/* Rebroadcast unregister notification */
3232 			raw_notifier_call_chain(&netdev_chain,
3233 					    NETDEV_UNREGISTER, dev);
3234 
3235 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
3236 				     &dev->state)) {
3237 				/* We must not have linkwatch events
3238 				 * pending on unregister. If this
3239 				 * happens, we simply run the queue
3240 				 * unscheduled, resulting in a noop
3241 				 * for this device.
3242 				 */
3243 				linkwatch_run_queue();
3244 			}
3245 
3246 			__rtnl_unlock();
3247 
3248 			rebroadcast_time = jiffies;
3249 		}
3250 
3251 		msleep(250);
3252 
3253 		if (time_after(jiffies, warning_time + 10 * HZ)) {
3254 			printk(KERN_EMERG "unregister_netdevice: "
3255 			       "waiting for %s to become free. Usage "
3256 			       "count = %d\n",
3257 			       dev->name, atomic_read(&dev->refcnt));
3258 			warning_time = jiffies;
3259 		}
3260 	}
3261 }
3262 
3263 /* The sequence is:
3264  *
3265  *	rtnl_lock();
3266  *	...
3267  *	register_netdevice(x1);
3268  *	register_netdevice(x2);
3269  *	...
3270  *	unregister_netdevice(y1);
3271  *	unregister_netdevice(y2);
3272  *      ...
3273  *	rtnl_unlock();
3274  *	free_netdev(y1);
3275  *	free_netdev(y2);
3276  *
3277  * We are invoked by rtnl_unlock() after it drops the semaphore.
3278  * This allows us to deal with problems:
3279  * 1) We can delete sysfs objects which invoke hotplug
3280  *    without deadlocking with linkwatch via keventd.
3281  * 2) Since we run with the RTNL semaphore not held, we can sleep
3282  *    safely in order to wait for the netdev refcnt to drop to zero.
3283  */
3284 static DEFINE_MUTEX(net_todo_run_mutex);
3285 void netdev_run_todo(void)
3286 {
3287 	struct list_head list;
3288 
3289 	/* Need to guard against multiple cpu's getting out of order. */
3290 	mutex_lock(&net_todo_run_mutex);
3291 
3292 	/* Not safe to do outside the semaphore.  We must not return
3293 	 * until all unregister events invoked by the local processor
3294 	 * have been completed (either by this todo run, or one on
3295 	 * another cpu).
3296 	 */
3297 	if (list_empty(&net_todo_list))
3298 		goto out;
3299 
3300 	/* Snapshot list, allow later requests */
3301 	spin_lock(&net_todo_list_lock);
3302 	list_replace_init(&net_todo_list, &list);
3303 	spin_unlock(&net_todo_list_lock);
3304 
3305 	while (!list_empty(&list)) {
3306 		struct net_device *dev
3307 			= list_entry(list.next, struct net_device, todo_list);
3308 		list_del(&dev->todo_list);
3309 
3310 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
3311 			printk(KERN_ERR "network todo '%s' but state %d\n",
3312 			       dev->name, dev->reg_state);
3313 			dump_stack();
3314 			continue;
3315 		}
3316 
3317 		dev->reg_state = NETREG_UNREGISTERED;
3318 
3319 		netdev_wait_allrefs(dev);
3320 
3321 		/* paranoia */
3322 		BUG_ON(atomic_read(&dev->refcnt));
3323 		BUG_TRAP(!dev->ip_ptr);
3324 		BUG_TRAP(!dev->ip6_ptr);
3325 		BUG_TRAP(!dev->dn_ptr);
3326 
3327 		if (dev->destructor)
3328 			dev->destructor(dev);
3329 
3330 		/* Free network device */
3331 		kobject_put(&dev->dev.kobj);
3332 	}
3333 
3334 out:
3335 	mutex_unlock(&net_todo_run_mutex);
3336 }
3337 
3338 static struct net_device_stats *internal_stats(struct net_device *dev)
3339 {
3340 	return &dev->stats;
3341 }
3342 
3343 /**
3344  *	alloc_netdev - allocate network device
3345  *	@sizeof_priv:	size of private data to allocate space for
3346  *	@name:		device name format string
3347  *	@setup:		callback to initialize device
3348  *
3349  *	Allocates a struct net_device with private data area for driver use
3350  *	and performs basic initialization.
3351  */
3352 struct net_device *alloc_netdev(int sizeof_priv, const char *name,
3353 		void (*setup)(struct net_device *))
3354 {
3355 	void *p;
3356 	struct net_device *dev;
3357 	int alloc_size;
3358 
3359 	BUG_ON(strlen(name) >= sizeof(dev->name));
3360 
3361 	/* ensure 32-byte alignment of both the device and private area */
3362 	alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
3363 	alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
3364 
3365 	p = kzalloc(alloc_size, GFP_KERNEL);
3366 	if (!p) {
3367 		printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
3368 		return NULL;
3369 	}
3370 
3371 	dev = (struct net_device *)
3372 		(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
3373 	dev->padded = (char *)dev - (char *)p;
3374 
3375 	if (sizeof_priv)
3376 		dev->priv = netdev_priv(dev);
3377 
3378 	dev->get_stats = internal_stats;
3379 	setup(dev);
3380 	strcpy(dev->name, name);
3381 	return dev;
3382 }
3383 EXPORT_SYMBOL(alloc_netdev);
3384 
3385 /**
3386  *	free_netdev - free network device
3387  *	@dev: device
3388  *
3389  *	This function does the last stage of destroying an allocated device
3390  * 	interface. The reference to the device object is released.
3391  *	If this is the last reference then it will be freed.
3392  */
3393 void free_netdev(struct net_device *dev)
3394 {
3395 #ifdef CONFIG_SYSFS
3396 	/*  Compatibility with error handling in drivers */
3397 	if (dev->reg_state == NETREG_UNINITIALIZED) {
3398 		kfree((char *)dev - dev->padded);
3399 		return;
3400 	}
3401 
3402 	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
3403 	dev->reg_state = NETREG_RELEASED;
3404 
3405 	/* will free via device release */
3406 	put_device(&dev->dev);
3407 #else
3408 	kfree((char *)dev - dev->padded);
3409 #endif
3410 }
3411 
3412 /* Synchronize with packet receive processing. */
3413 void synchronize_net(void)
3414 {
3415 	might_sleep();
3416 	synchronize_rcu();
3417 }
3418 
3419 /**
3420  *	unregister_netdevice - remove device from the kernel
3421  *	@dev: device
3422  *
3423  *	This function shuts down a device interface and removes it
3424  *	from the kernel tables. On success 0 is returned, on a failure
3425  *	a negative errno code is returned.
3426  *
3427  *	Callers must hold the rtnl semaphore.  You may want
3428  *	unregister_netdev() instead of this.
3429  */
3430 
3431 void unregister_netdevice(struct net_device *dev)
3432 {
3433 	BUG_ON(dev_boot_phase);
3434 	ASSERT_RTNL();
3435 
3436 	/* Some devices call without registering for initialization unwind. */
3437 	if (dev->reg_state == NETREG_UNINITIALIZED) {
3438 		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3439 				  "was registered\n", dev->name, dev);
3440 
3441 		WARN_ON(1);
3442 		return;
3443 	}
3444 
3445 	BUG_ON(dev->reg_state != NETREG_REGISTERED);
3446 
3447 	/* If device is running, close it first. */
3448 	if (dev->flags & IFF_UP)
3449 		dev_close(dev);
3450 
3451 	/* And unlink it from device chain. */
3452 	write_lock_bh(&dev_base_lock);
3453 	list_del(&dev->dev_list);
3454 	hlist_del(&dev->name_hlist);
3455 	hlist_del(&dev->index_hlist);
3456 	write_unlock_bh(&dev_base_lock);
3457 
3458 	dev->reg_state = NETREG_UNREGISTERING;
3459 
3460 	synchronize_net();
3461 
3462 	/* Shutdown queueing discipline. */
3463 	dev_shutdown(dev);
3464 
3465 
3466 	/* Notify protocols, that we are about to destroy
3467 	   this device. They should clean all the things.
3468 	*/
3469 	raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
3470 
3471 	/*
3472 	 *	Flush the multicast chain
3473 	 */
3474 	dev_mc_discard(dev);
3475 
3476 	if (dev->uninit)
3477 		dev->uninit(dev);
3478 
3479 	/* Notifier chain MUST detach us from master device. */
3480 	BUG_TRAP(!dev->master);
3481 
3482 	/* Remove entries from sysfs */
3483 	netdev_unregister_sysfs(dev);
3484 
3485 	/* Finish processing unregister after unlock */
3486 	net_set_todo(dev);
3487 
3488 	synchronize_net();
3489 
3490 	dev_put(dev);
3491 }
3492 
3493 /**
3494  *	unregister_netdev - remove device from the kernel
3495  *	@dev: device
3496  *
3497  *	This function shuts down a device interface and removes it
3498  *	from the kernel tables. On success 0 is returned, on a failure
3499  *	a negative errno code is returned.
3500  *
3501  *	This is just a wrapper for unregister_netdevice that takes
3502  *	the rtnl semaphore.  In general you want to use this and not
3503  *	unregister_netdevice.
3504  */
3505 void unregister_netdev(struct net_device *dev)
3506 {
3507 	rtnl_lock();
3508 	unregister_netdevice(dev);
3509 	rtnl_unlock();
3510 }
3511 
3512 EXPORT_SYMBOL(unregister_netdev);
3513 
3514 static int dev_cpu_callback(struct notifier_block *nfb,
3515 			    unsigned long action,
3516 			    void *ocpu)
3517 {
3518 	struct sk_buff **list_skb;
3519 	struct net_device **list_net;
3520 	struct sk_buff *skb;
3521 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
3522 	struct softnet_data *sd, *oldsd;
3523 
3524 	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
3525 		return NOTIFY_OK;
3526 
3527 	local_irq_disable();
3528 	cpu = smp_processor_id();
3529 	sd = &per_cpu(softnet_data, cpu);
3530 	oldsd = &per_cpu(softnet_data, oldcpu);
3531 
3532 	/* Find end of our completion_queue. */
3533 	list_skb = &sd->completion_queue;
3534 	while (*list_skb)
3535 		list_skb = &(*list_skb)->next;
3536 	/* Append completion queue from offline CPU. */
3537 	*list_skb = oldsd->completion_queue;
3538 	oldsd->completion_queue = NULL;
3539 
3540 	/* Find end of our output_queue. */
3541 	list_net = &sd->output_queue;
3542 	while (*list_net)
3543 		list_net = &(*list_net)->next_sched;
3544 	/* Append output queue from offline CPU. */
3545 	*list_net = oldsd->output_queue;
3546 	oldsd->output_queue = NULL;
3547 
3548 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
3549 	local_irq_enable();
3550 
3551 	/* Process offline CPU's input_pkt_queue */
3552 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
3553 		netif_rx(skb);
3554 
3555 	return NOTIFY_OK;
3556 }
3557 
3558 #ifdef CONFIG_NET_DMA
3559 /**
3560  * net_dma_rebalance -
3561  * This is called when the number of channels allocated to the net_dma_client
3562  * changes.  The net_dma_client tries to have one DMA channel per CPU.
3563  */
3564 static void net_dma_rebalance(void)
3565 {
3566 	unsigned int cpu, i, n;
3567 	struct dma_chan *chan;
3568 
3569 	if (net_dma_count == 0) {
3570 		for_each_online_cpu(cpu)
3571 			rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
3572 		return;
3573 	}
3574 
3575 	i = 0;
3576 	cpu = first_cpu(cpu_online_map);
3577 
3578 	rcu_read_lock();
3579 	list_for_each_entry(chan, &net_dma_client->channels, client_node) {
3580 		n = ((num_online_cpus() / net_dma_count)
3581 		   + (i < (num_online_cpus() % net_dma_count) ? 1 : 0));
3582 
3583 		while(n) {
3584 			per_cpu(softnet_data, cpu).net_dma = chan;
3585 			cpu = next_cpu(cpu, cpu_online_map);
3586 			n--;
3587 		}
3588 		i++;
3589 	}
3590 	rcu_read_unlock();
3591 }
3592 
3593 /**
3594  * netdev_dma_event - event callback for the net_dma_client
3595  * @client: should always be net_dma_client
3596  * @chan: DMA channel for the event
3597  * @event: event type
3598  */
3599 static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
3600 	enum dma_event event)
3601 {
3602 	spin_lock(&net_dma_event_lock);
3603 	switch (event) {
3604 	case DMA_RESOURCE_ADDED:
3605 		net_dma_count++;
3606 		net_dma_rebalance();
3607 		break;
3608 	case DMA_RESOURCE_REMOVED:
3609 		net_dma_count--;
3610 		net_dma_rebalance();
3611 		break;
3612 	default:
3613 		break;
3614 	}
3615 	spin_unlock(&net_dma_event_lock);
3616 }
3617 
3618 /**
3619  * netdev_dma_regiser - register the networking subsystem as a DMA client
3620  */
3621 static int __init netdev_dma_register(void)
3622 {
3623 	spin_lock_init(&net_dma_event_lock);
3624 	net_dma_client = dma_async_client_register(netdev_dma_event);
3625 	if (net_dma_client == NULL)
3626 		return -ENOMEM;
3627 
3628 	dma_async_client_chan_request(net_dma_client, num_online_cpus());
3629 	return 0;
3630 }
3631 
3632 #else
3633 static int __init netdev_dma_register(void) { return -ENODEV; }
3634 #endif /* CONFIG_NET_DMA */
3635 
3636 /*
3637  *	Initialize the DEV module. At boot time this walks the device list and
3638  *	unhooks any devices that fail to initialise (normally hardware not
3639  *	present) and leaves us with a valid list of present and active devices.
3640  *
3641  */
3642 
3643 /*
3644  *       This is called single threaded during boot, so no need
3645  *       to take the rtnl semaphore.
3646  */
3647 static int __init net_dev_init(void)
3648 {
3649 	int i, rc = -ENOMEM;
3650 
3651 	BUG_ON(!dev_boot_phase);
3652 
3653 	if (dev_proc_init())
3654 		goto out;
3655 
3656 	if (netdev_sysfs_init())
3657 		goto out;
3658 
3659 	INIT_LIST_HEAD(&ptype_all);
3660 	for (i = 0; i < 16; i++)
3661 		INIT_LIST_HEAD(&ptype_base[i]);
3662 
3663 	for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
3664 		INIT_HLIST_HEAD(&dev_name_head[i]);
3665 
3666 	for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
3667 		INIT_HLIST_HEAD(&dev_index_head[i]);
3668 
3669 	/*
3670 	 *	Initialise the packet receive queues.
3671 	 */
3672 
3673 	for_each_possible_cpu(i) {
3674 		struct softnet_data *queue;
3675 
3676 		queue = &per_cpu(softnet_data, i);
3677 		skb_queue_head_init(&queue->input_pkt_queue);
3678 		queue->completion_queue = NULL;
3679 		INIT_LIST_HEAD(&queue->poll_list);
3680 		set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
3681 		queue->backlog_dev.weight = weight_p;
3682 		queue->backlog_dev.poll = process_backlog;
3683 		atomic_set(&queue->backlog_dev.refcnt, 1);
3684 	}
3685 
3686 	netdev_dma_register();
3687 
3688 	dev_boot_phase = 0;
3689 
3690 	open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
3691 	open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
3692 
3693 	hotcpu_notifier(dev_cpu_callback, 0);
3694 	dst_init();
3695 	dev_mcast_init();
3696 	rc = 0;
3697 out:
3698 	return rc;
3699 }
3700 
3701 subsys_initcall(net_dev_init);
3702 
3703 EXPORT_SYMBOL(__dev_get_by_index);
3704 EXPORT_SYMBOL(__dev_get_by_name);
3705 EXPORT_SYMBOL(__dev_remove_pack);
3706 EXPORT_SYMBOL(dev_valid_name);
3707 EXPORT_SYMBOL(dev_add_pack);
3708 EXPORT_SYMBOL(dev_alloc_name);
3709 EXPORT_SYMBOL(dev_close);
3710 EXPORT_SYMBOL(dev_get_by_flags);
3711 EXPORT_SYMBOL(dev_get_by_index);
3712 EXPORT_SYMBOL(dev_get_by_name);
3713 EXPORT_SYMBOL(dev_open);
3714 EXPORT_SYMBOL(dev_queue_xmit);
3715 EXPORT_SYMBOL(dev_remove_pack);
3716 EXPORT_SYMBOL(dev_set_allmulti);
3717 EXPORT_SYMBOL(dev_set_promiscuity);
3718 EXPORT_SYMBOL(dev_change_flags);
3719 EXPORT_SYMBOL(dev_set_mtu);
3720 EXPORT_SYMBOL(dev_set_mac_address);
3721 EXPORT_SYMBOL(free_netdev);
3722 EXPORT_SYMBOL(netdev_boot_setup_check);
3723 EXPORT_SYMBOL(netdev_set_master);
3724 EXPORT_SYMBOL(netdev_state_change);
3725 EXPORT_SYMBOL(netif_receive_skb);
3726 EXPORT_SYMBOL(netif_rx);
3727 EXPORT_SYMBOL(register_gifconf);
3728 EXPORT_SYMBOL(register_netdevice);
3729 EXPORT_SYMBOL(register_netdevice_notifier);
3730 EXPORT_SYMBOL(skb_checksum_help);
3731 EXPORT_SYMBOL(synchronize_net);
3732 EXPORT_SYMBOL(unregister_netdevice);
3733 EXPORT_SYMBOL(unregister_netdevice_notifier);
3734 EXPORT_SYMBOL(net_enable_timestamp);
3735 EXPORT_SYMBOL(net_disable_timestamp);
3736 EXPORT_SYMBOL(dev_get_flags);
3737 
3738 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
3739 EXPORT_SYMBOL(br_handle_frame_hook);
3740 EXPORT_SYMBOL(br_fdb_get_hook);
3741 EXPORT_SYMBOL(br_fdb_put_hook);
3742 #endif
3743 
3744 #ifdef CONFIG_KMOD
3745 EXPORT_SYMBOL(dev_load);
3746 #endif
3747 
3748 EXPORT_PER_CPU_SYMBOL(softnet_data);
3749