xref: /linux/net/core/dev.c (revision bf74b964775009071cf12f9d59d4dd5e388fbe0b)
1 /*
2  * 	NET3	Protocol independent device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the non IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Florian la Roche <rzsfl@rz.uni-sb.de>
16  *		Alan Cox <gw4pts@gw4pts.ampr.org>
17  *		David Hinds <dahinds@users.sourceforge.net>
18  *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19  *		Adam Sulmicki <adam@cfar.umd.edu>
20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
21  *
22  *	Changes:
23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
24  *              			to 2 if register_netdev gets called
25  *              			before net_dev_init & also removed a
26  *              			few lines of code in the process.
27  *		Alan Cox	:	device private ioctl copies fields back.
28  *		Alan Cox	:	Transmit queue code does relevant
29  *					stunts to keep the queue safe.
30  *		Alan Cox	:	Fixed double lock.
31  *		Alan Cox	:	Fixed promisc NULL pointer trap
32  *		????????	:	Support the full private ioctl range
33  *		Alan Cox	:	Moved ioctl permission check into
34  *					drivers
35  *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
36  *		Alan Cox	:	100 backlog just doesn't cut it when
37  *					you start doing multicast video 8)
38  *		Alan Cox	:	Rewrote net_bh and list manager.
39  *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
40  *		Alan Cox	:	Took out transmit every packet pass
41  *					Saved a few bytes in the ioctl handler
42  *		Alan Cox	:	Network driver sets packet type before
43  *					calling netif_rx. Saves a function
44  *					call a packet.
45  *		Alan Cox	:	Hashed net_bh()
46  *		Richard Kooijman:	Timestamp fixes.
47  *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
48  *		Alan Cox	:	Device lock protection.
49  *		Alan Cox	: 	Fixed nasty side effect of device close
50  *					changes.
51  *		Rudi Cilibrasi	:	Pass the right thing to
52  *					set_mac_address()
53  *		Dave Miller	:	32bit quantity for the device lock to
54  *					make it work out on a Sparc.
55  *		Bjorn Ekwall	:	Added KERNELD hack.
56  *		Alan Cox	:	Cleaned up the backlog initialise.
57  *		Craig Metz	:	SIOCGIFCONF fix if space for under
58  *					1 device.
59  *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
60  *					is no device open function.
61  *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
62  *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
63  *		Cyrus Durgin	:	Cleaned for KMOD
64  *		Adam Sulmicki   :	Bug Fix : Network Device Unload
65  *					A network device unload needs to purge
66  *					the backlog queue.
67  *	Paul Rusty Russell	:	SIOCSIFNAME
68  *              Pekka Riikonen  :	Netdev boot-time settings code
69  *              Andrew Morton   :       Make unregister_netdevice wait
70  *              			indefinitely on dev->refcnt
71  * 		J Hadi Salim	:	- Backlog queue sampling
72  *				        - netif_rx() feedback
73  */
74 
75 #include <asm/uaccess.h>
76 #include <asm/system.h>
77 #include <linux/bitops.h>
78 #include <linux/capability.h>
79 #include <linux/cpu.h>
80 #include <linux/types.h>
81 #include <linux/kernel.h>
82 #include <linux/sched.h>
83 #include <linux/mutex.h>
84 #include <linux/string.h>
85 #include <linux/mm.h>
86 #include <linux/socket.h>
87 #include <linux/sockios.h>
88 #include <linux/errno.h>
89 #include <linux/interrupt.h>
90 #include <linux/if_ether.h>
91 #include <linux/netdevice.h>
92 #include <linux/etherdevice.h>
93 #include <linux/notifier.h>
94 #include <linux/skbuff.h>
95 #include <net/sock.h>
96 #include <linux/rtnetlink.h>
97 #include <linux/proc_fs.h>
98 #include <linux/seq_file.h>
99 #include <linux/stat.h>
100 #include <linux/if_bridge.h>
101 #include <net/dst.h>
102 #include <net/pkt_sched.h>
103 #include <net/checksum.h>
104 #include <linux/highmem.h>
105 #include <linux/init.h>
106 #include <linux/kmod.h>
107 #include <linux/module.h>
108 #include <linux/kallsyms.h>
109 #include <linux/netpoll.h>
110 #include <linux/rcupdate.h>
111 #include <linux/delay.h>
112 #include <net/wext.h>
113 #include <net/iw_handler.h>
114 #include <asm/current.h>
115 #include <linux/audit.h>
116 #include <linux/dmaengine.h>
117 #include <linux/err.h>
118 #include <linux/ctype.h>
119 #include <linux/if_arp.h>
120 
121 /*
122  *	The list of packet types we will receive (as opposed to discard)
123  *	and the routines to invoke.
124  *
125  *	Why 16. Because with 16 the only overlap we get on a hash of the
126  *	low nibble of the protocol value is RARP/SNAP/X.25.
127  *
128  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
129  *             sure which should go first, but I bet it won't make much
130  *             difference if we are running VLANs.  The good news is that
131  *             this protocol won't be in the list unless compiled in, so
132  *             the average user (w/out VLANs) will not be adversely affected.
133  *             --BLG
134  *
135  *		0800	IP
136  *		8100    802.1Q VLAN
137  *		0001	802.3
138  *		0002	AX.25
139  *		0004	802.2
140  *		8035	RARP
141  *		0005	SNAP
142  *		0805	X.25
143  *		0806	ARP
144  *		8137	IPX
145  *		0009	Localtalk
146  *		86DD	IPv6
147  */
148 
149 static DEFINE_SPINLOCK(ptype_lock);
150 static struct list_head ptype_base[16] __read_mostly;	/* 16 way hashed list */
151 static struct list_head ptype_all __read_mostly;	/* Taps */
152 
153 #ifdef CONFIG_NET_DMA
154 static struct dma_client *net_dma_client;
155 static unsigned int net_dma_count;
156 static spinlock_t net_dma_event_lock;
157 #endif
158 
159 /*
160  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
161  * semaphore.
162  *
163  * Pure readers hold dev_base_lock for reading.
164  *
165  * Writers must hold the rtnl semaphore while they loop through the
166  * dev_base_head list, and hold dev_base_lock for writing when they do the
167  * actual updates.  This allows pure readers to access the list even
168  * while a writer is preparing to update it.
169  *
170  * To put it another way, dev_base_lock is held for writing only to
171  * protect against pure readers; the rtnl semaphore provides the
172  * protection against other writers.
173  *
174  * See, for example usages, register_netdevice() and
175  * unregister_netdevice(), which must be called with the rtnl
176  * semaphore held.
177  */
178 LIST_HEAD(dev_base_head);
179 DEFINE_RWLOCK(dev_base_lock);
180 
181 EXPORT_SYMBOL(dev_base_head);
182 EXPORT_SYMBOL(dev_base_lock);
183 
184 #define NETDEV_HASHBITS	8
185 static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
186 static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
187 
188 static inline struct hlist_head *dev_name_hash(const char *name)
189 {
190 	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
191 	return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
192 }
193 
194 static inline struct hlist_head *dev_index_hash(int ifindex)
195 {
196 	return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
197 }
198 
199 /*
200  *	Our notifier list
201  */
202 
203 static RAW_NOTIFIER_HEAD(netdev_chain);
204 
205 /*
206  *	Device drivers call our routines to queue packets here. We empty the
207  *	queue in the local softnet handler.
208  */
209 DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL };
210 
211 #ifdef CONFIG_SYSFS
212 extern int netdev_sysfs_init(void);
213 extern int netdev_register_sysfs(struct net_device *);
214 extern void netdev_unregister_sysfs(struct net_device *);
215 #else
216 #define netdev_sysfs_init()	 	(0)
217 #define netdev_register_sysfs(dev)	(0)
218 #define	netdev_unregister_sysfs(dev)	do { } while(0)
219 #endif
220 
221 #ifdef CONFIG_DEBUG_LOCK_ALLOC
222 /*
223  * register_netdevice() inits dev->_xmit_lock and sets lockdep class
224  * according to dev->type
225  */
226 static const unsigned short netdev_lock_type[] =
227 	{ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
228 	 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
229 	 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
230 	 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
231 	 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
232 	 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
233 	 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
234 	 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
235 	 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
236 	 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
237 	 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
238 	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
239 	 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
240 	 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID,
241 	 ARPHRD_NONE};
242 
243 static const char *netdev_lock_name[] =
244 	{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
245 	 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
246 	 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
247 	 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
248 	 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
249 	 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
250 	 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
251 	 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
252 	 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
253 	 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
254 	 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
255 	 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
256 	 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
257 	 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID",
258 	 "_xmit_NONE"};
259 
260 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
261 
262 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
263 {
264 	int i;
265 
266 	for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
267 		if (netdev_lock_type[i] == dev_type)
268 			return i;
269 	/* the last key is used by default */
270 	return ARRAY_SIZE(netdev_lock_type) - 1;
271 }
272 
273 static inline void netdev_set_lockdep_class(spinlock_t *lock,
274 					    unsigned short dev_type)
275 {
276 	int i;
277 
278 	i = netdev_lock_pos(dev_type);
279 	lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
280 				   netdev_lock_name[i]);
281 }
282 #else
283 static inline void netdev_set_lockdep_class(spinlock_t *lock,
284 					    unsigned short dev_type)
285 {
286 }
287 #endif
288 
289 /*******************************************************************************
290 
291 		Protocol management and registration routines
292 
293 *******************************************************************************/
294 
295 /*
296  *	Add a protocol ID to the list. Now that the input handler is
297  *	smarter we can dispense with all the messy stuff that used to be
298  *	here.
299  *
300  *	BEWARE!!! Protocol handlers, mangling input packets,
301  *	MUST BE last in hash buckets and checking protocol handlers
302  *	MUST start from promiscuous ptype_all chain in net_bh.
303  *	It is true now, do not change it.
304  *	Explanation follows: if protocol handler, mangling packet, will
305  *	be the first on list, it is not able to sense, that packet
306  *	is cloned and should be copied-on-write, so that it will
307  *	change it and subsequent readers will get broken packet.
308  *							--ANK (980803)
309  */
310 
311 /**
312  *	dev_add_pack - add packet handler
313  *	@pt: packet type declaration
314  *
315  *	Add a protocol handler to the networking stack. The passed &packet_type
316  *	is linked into kernel lists and may not be freed until it has been
317  *	removed from the kernel lists.
318  *
319  *	This call does not sleep therefore it can not
320  *	guarantee all CPU's that are in middle of receiving packets
321  *	will see the new packet type (until the next received packet).
322  */
323 
324 void dev_add_pack(struct packet_type *pt)
325 {
326 	int hash;
327 
328 	spin_lock_bh(&ptype_lock);
329 	if (pt->type == htons(ETH_P_ALL))
330 		list_add_rcu(&pt->list, &ptype_all);
331 	else {
332 		hash = ntohs(pt->type) & 15;
333 		list_add_rcu(&pt->list, &ptype_base[hash]);
334 	}
335 	spin_unlock_bh(&ptype_lock);
336 }
337 
338 /**
339  *	__dev_remove_pack	 - remove packet handler
340  *	@pt: packet type declaration
341  *
342  *	Remove a protocol handler that was previously added to the kernel
343  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
344  *	from the kernel lists and can be freed or reused once this function
345  *	returns.
346  *
347  *      The packet type might still be in use by receivers
348  *	and must not be freed until after all the CPU's have gone
349  *	through a quiescent state.
350  */
351 void __dev_remove_pack(struct packet_type *pt)
352 {
353 	struct list_head *head;
354 	struct packet_type *pt1;
355 
356 	spin_lock_bh(&ptype_lock);
357 
358 	if (pt->type == htons(ETH_P_ALL))
359 		head = &ptype_all;
360 	else
361 		head = &ptype_base[ntohs(pt->type) & 15];
362 
363 	list_for_each_entry(pt1, head, list) {
364 		if (pt == pt1) {
365 			list_del_rcu(&pt->list);
366 			goto out;
367 		}
368 	}
369 
370 	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
371 out:
372 	spin_unlock_bh(&ptype_lock);
373 }
374 /**
375  *	dev_remove_pack	 - remove packet handler
376  *	@pt: packet type declaration
377  *
378  *	Remove a protocol handler that was previously added to the kernel
379  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
380  *	from the kernel lists and can be freed or reused once this function
381  *	returns.
382  *
383  *	This call sleeps to guarantee that no CPU is looking at the packet
384  *	type after return.
385  */
386 void dev_remove_pack(struct packet_type *pt)
387 {
388 	__dev_remove_pack(pt);
389 
390 	synchronize_net();
391 }
392 
393 /******************************************************************************
394 
395 		      Device Boot-time Settings Routines
396 
397 *******************************************************************************/
398 
399 /* Boot time configuration table */
400 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
401 
402 /**
403  *	netdev_boot_setup_add	- add new setup entry
404  *	@name: name of the device
405  *	@map: configured settings for the device
406  *
407  *	Adds new setup entry to the dev_boot_setup list.  The function
408  *	returns 0 on error and 1 on success.  This is a generic routine to
409  *	all netdevices.
410  */
411 static int netdev_boot_setup_add(char *name, struct ifmap *map)
412 {
413 	struct netdev_boot_setup *s;
414 	int i;
415 
416 	s = dev_boot_setup;
417 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
418 		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
419 			memset(s[i].name, 0, sizeof(s[i].name));
420 			strcpy(s[i].name, name);
421 			memcpy(&s[i].map, map, sizeof(s[i].map));
422 			break;
423 		}
424 	}
425 
426 	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
427 }
428 
429 /**
430  *	netdev_boot_setup_check	- check boot time settings
431  *	@dev: the netdevice
432  *
433  * 	Check boot time settings for the device.
434  *	The found settings are set for the device to be used
435  *	later in the device probing.
436  *	Returns 0 if no settings found, 1 if they are.
437  */
438 int netdev_boot_setup_check(struct net_device *dev)
439 {
440 	struct netdev_boot_setup *s = dev_boot_setup;
441 	int i;
442 
443 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
444 		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
445 		    !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
446 			dev->irq 	= s[i].map.irq;
447 			dev->base_addr 	= s[i].map.base_addr;
448 			dev->mem_start 	= s[i].map.mem_start;
449 			dev->mem_end 	= s[i].map.mem_end;
450 			return 1;
451 		}
452 	}
453 	return 0;
454 }
455 
456 
457 /**
458  *	netdev_boot_base	- get address from boot time settings
459  *	@prefix: prefix for network device
460  *	@unit: id for network device
461  *
462  * 	Check boot time settings for the base address of device.
463  *	The found settings are set for the device to be used
464  *	later in the device probing.
465  *	Returns 0 if no settings found.
466  */
467 unsigned long netdev_boot_base(const char *prefix, int unit)
468 {
469 	const struct netdev_boot_setup *s = dev_boot_setup;
470 	char name[IFNAMSIZ];
471 	int i;
472 
473 	sprintf(name, "%s%d", prefix, unit);
474 
475 	/*
476 	 * If device already registered then return base of 1
477 	 * to indicate not to probe for this interface
478 	 */
479 	if (__dev_get_by_name(name))
480 		return 1;
481 
482 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
483 		if (!strcmp(name, s[i].name))
484 			return s[i].map.base_addr;
485 	return 0;
486 }
487 
488 /*
489  * Saves at boot time configured settings for any netdevice.
490  */
491 int __init netdev_boot_setup(char *str)
492 {
493 	int ints[5];
494 	struct ifmap map;
495 
496 	str = get_options(str, ARRAY_SIZE(ints), ints);
497 	if (!str || !*str)
498 		return 0;
499 
500 	/* Save settings */
501 	memset(&map, 0, sizeof(map));
502 	if (ints[0] > 0)
503 		map.irq = ints[1];
504 	if (ints[0] > 1)
505 		map.base_addr = ints[2];
506 	if (ints[0] > 2)
507 		map.mem_start = ints[3];
508 	if (ints[0] > 3)
509 		map.mem_end = ints[4];
510 
511 	/* Add new entry to the list */
512 	return netdev_boot_setup_add(str, &map);
513 }
514 
515 __setup("netdev=", netdev_boot_setup);
516 
517 /*******************************************************************************
518 
519 			    Device Interface Subroutines
520 
521 *******************************************************************************/
522 
523 /**
524  *	__dev_get_by_name	- find a device by its name
525  *	@name: name to find
526  *
527  *	Find an interface by name. Must be called under RTNL semaphore
528  *	or @dev_base_lock. If the name is found a pointer to the device
529  *	is returned. If the name is not found then %NULL is returned. The
530  *	reference counters are not incremented so the caller must be
531  *	careful with locks.
532  */
533 
534 struct net_device *__dev_get_by_name(const char *name)
535 {
536 	struct hlist_node *p;
537 
538 	hlist_for_each(p, dev_name_hash(name)) {
539 		struct net_device *dev
540 			= hlist_entry(p, struct net_device, name_hlist);
541 		if (!strncmp(dev->name, name, IFNAMSIZ))
542 			return dev;
543 	}
544 	return NULL;
545 }
546 
547 /**
548  *	dev_get_by_name		- find a device by its name
549  *	@name: name to find
550  *
551  *	Find an interface by name. This can be called from any
552  *	context and does its own locking. The returned handle has
553  *	the usage count incremented and the caller must use dev_put() to
554  *	release it when it is no longer needed. %NULL is returned if no
555  *	matching device is found.
556  */
557 
558 struct net_device *dev_get_by_name(const char *name)
559 {
560 	struct net_device *dev;
561 
562 	read_lock(&dev_base_lock);
563 	dev = __dev_get_by_name(name);
564 	if (dev)
565 		dev_hold(dev);
566 	read_unlock(&dev_base_lock);
567 	return dev;
568 }
569 
570 /**
571  *	__dev_get_by_index - find a device by its ifindex
572  *	@ifindex: index of device
573  *
574  *	Search for an interface by index. Returns %NULL if the device
575  *	is not found or a pointer to the device. The device has not
576  *	had its reference counter increased so the caller must be careful
577  *	about locking. The caller must hold either the RTNL semaphore
578  *	or @dev_base_lock.
579  */
580 
581 struct net_device *__dev_get_by_index(int ifindex)
582 {
583 	struct hlist_node *p;
584 
585 	hlist_for_each(p, dev_index_hash(ifindex)) {
586 		struct net_device *dev
587 			= hlist_entry(p, struct net_device, index_hlist);
588 		if (dev->ifindex == ifindex)
589 			return dev;
590 	}
591 	return NULL;
592 }
593 
594 
595 /**
596  *	dev_get_by_index - find a device by its ifindex
597  *	@ifindex: index of device
598  *
599  *	Search for an interface by index. Returns NULL if the device
600  *	is not found or a pointer to the device. The device returned has
601  *	had a reference added and the pointer is safe until the user calls
602  *	dev_put to indicate they have finished with it.
603  */
604 
605 struct net_device *dev_get_by_index(int ifindex)
606 {
607 	struct net_device *dev;
608 
609 	read_lock(&dev_base_lock);
610 	dev = __dev_get_by_index(ifindex);
611 	if (dev)
612 		dev_hold(dev);
613 	read_unlock(&dev_base_lock);
614 	return dev;
615 }
616 
617 /**
618  *	dev_getbyhwaddr - find a device by its hardware address
619  *	@type: media type of device
620  *	@ha: hardware address
621  *
622  *	Search for an interface by MAC address. Returns NULL if the device
623  *	is not found or a pointer to the device. The caller must hold the
624  *	rtnl semaphore. The returned device has not had its ref count increased
625  *	and the caller must therefore be careful about locking
626  *
627  *	BUGS:
628  *	If the API was consistent this would be __dev_get_by_hwaddr
629  */
630 
631 struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
632 {
633 	struct net_device *dev;
634 
635 	ASSERT_RTNL();
636 
637 	for_each_netdev(dev)
638 		if (dev->type == type &&
639 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
640 			return dev;
641 
642 	return NULL;
643 }
644 
645 EXPORT_SYMBOL(dev_getbyhwaddr);
646 
647 struct net_device *__dev_getfirstbyhwtype(unsigned short type)
648 {
649 	struct net_device *dev;
650 
651 	ASSERT_RTNL();
652 	for_each_netdev(dev)
653 		if (dev->type == type)
654 			return dev;
655 
656 	return NULL;
657 }
658 
659 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
660 
661 struct net_device *dev_getfirstbyhwtype(unsigned short type)
662 {
663 	struct net_device *dev;
664 
665 	rtnl_lock();
666 	dev = __dev_getfirstbyhwtype(type);
667 	if (dev)
668 		dev_hold(dev);
669 	rtnl_unlock();
670 	return dev;
671 }
672 
673 EXPORT_SYMBOL(dev_getfirstbyhwtype);
674 
675 /**
676  *	dev_get_by_flags - find any device with given flags
677  *	@if_flags: IFF_* values
678  *	@mask: bitmask of bits in if_flags to check
679  *
680  *	Search for any interface with the given flags. Returns NULL if a device
681  *	is not found or a pointer to the device. The device returned has
682  *	had a reference added and the pointer is safe until the user calls
683  *	dev_put to indicate they have finished with it.
684  */
685 
686 struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
687 {
688 	struct net_device *dev, *ret;
689 
690 	ret = NULL;
691 	read_lock(&dev_base_lock);
692 	for_each_netdev(dev) {
693 		if (((dev->flags ^ if_flags) & mask) == 0) {
694 			dev_hold(dev);
695 			ret = dev;
696 			break;
697 		}
698 	}
699 	read_unlock(&dev_base_lock);
700 	return ret;
701 }
702 
703 /**
704  *	dev_valid_name - check if name is okay for network device
705  *	@name: name string
706  *
707  *	Network device names need to be valid file names to
708  *	to allow sysfs to work.  We also disallow any kind of
709  *	whitespace.
710  */
711 int dev_valid_name(const char *name)
712 {
713 	if (*name == '\0')
714 		return 0;
715 	if (strlen(name) >= IFNAMSIZ)
716 		return 0;
717 	if (!strcmp(name, ".") || !strcmp(name, ".."))
718 		return 0;
719 
720 	while (*name) {
721 		if (*name == '/' || isspace(*name))
722 			return 0;
723 		name++;
724 	}
725 	return 1;
726 }
727 
728 /**
729  *	dev_alloc_name - allocate a name for a device
730  *	@dev: device
731  *	@name: name format string
732  *
733  *	Passed a format string - eg "lt%d" it will try and find a suitable
734  *	id. It scans list of devices to build up a free map, then chooses
735  *	the first empty slot. The caller must hold the dev_base or rtnl lock
736  *	while allocating the name and adding the device in order to avoid
737  *	duplicates.
738  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
739  *	Returns the number of the unit assigned or a negative errno code.
740  */
741 
742 int dev_alloc_name(struct net_device *dev, const char *name)
743 {
744 	int i = 0;
745 	char buf[IFNAMSIZ];
746 	const char *p;
747 	const int max_netdevices = 8*PAGE_SIZE;
748 	long *inuse;
749 	struct net_device *d;
750 
751 	p = strnchr(name, IFNAMSIZ-1, '%');
752 	if (p) {
753 		/*
754 		 * Verify the string as this thing may have come from
755 		 * the user.  There must be either one "%d" and no other "%"
756 		 * characters.
757 		 */
758 		if (p[1] != 'd' || strchr(p + 2, '%'))
759 			return -EINVAL;
760 
761 		/* Use one page as a bit array of possible slots */
762 		inuse = (long *) get_zeroed_page(GFP_ATOMIC);
763 		if (!inuse)
764 			return -ENOMEM;
765 
766 		for_each_netdev(d) {
767 			if (!sscanf(d->name, name, &i))
768 				continue;
769 			if (i < 0 || i >= max_netdevices)
770 				continue;
771 
772 			/*  avoid cases where sscanf is not exact inverse of printf */
773 			snprintf(buf, sizeof(buf), name, i);
774 			if (!strncmp(buf, d->name, IFNAMSIZ))
775 				set_bit(i, inuse);
776 		}
777 
778 		i = find_first_zero_bit(inuse, max_netdevices);
779 		free_page((unsigned long) inuse);
780 	}
781 
782 	snprintf(buf, sizeof(buf), name, i);
783 	if (!__dev_get_by_name(buf)) {
784 		strlcpy(dev->name, buf, IFNAMSIZ);
785 		return i;
786 	}
787 
788 	/* It is possible to run out of possible slots
789 	 * when the name is long and there isn't enough space left
790 	 * for the digits, or if all bits are used.
791 	 */
792 	return -ENFILE;
793 }
794 
795 
796 /**
797  *	dev_change_name - change name of a device
798  *	@dev: device
799  *	@newname: name (or format string) must be at least IFNAMSIZ
800  *
801  *	Change name of a device, can pass format strings "eth%d".
802  *	for wildcarding.
803  */
804 int dev_change_name(struct net_device *dev, char *newname)
805 {
806 	int err = 0;
807 
808 	ASSERT_RTNL();
809 
810 	if (dev->flags & IFF_UP)
811 		return -EBUSY;
812 
813 	if (!dev_valid_name(newname))
814 		return -EINVAL;
815 
816 	if (strchr(newname, '%')) {
817 		err = dev_alloc_name(dev, newname);
818 		if (err < 0)
819 			return err;
820 		strcpy(newname, dev->name);
821 	}
822 	else if (__dev_get_by_name(newname))
823 		return -EEXIST;
824 	else
825 		strlcpy(dev->name, newname, IFNAMSIZ);
826 
827 	device_rename(&dev->dev, dev->name);
828 	hlist_del(&dev->name_hlist);
829 	hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
830 	raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
831 
832 	return err;
833 }
834 
835 /**
836  *	netdev_features_change - device changes features
837  *	@dev: device to cause notification
838  *
839  *	Called to indicate a device has changed features.
840  */
841 void netdev_features_change(struct net_device *dev)
842 {
843 	raw_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
844 }
845 EXPORT_SYMBOL(netdev_features_change);
846 
847 /**
848  *	netdev_state_change - device changes state
849  *	@dev: device to cause notification
850  *
851  *	Called to indicate a device has changed state. This function calls
852  *	the notifier chains for netdev_chain and sends a NEWLINK message
853  *	to the routing socket.
854  */
855 void netdev_state_change(struct net_device *dev)
856 {
857 	if (dev->flags & IFF_UP) {
858 		raw_notifier_call_chain(&netdev_chain,
859 				NETDEV_CHANGE, dev);
860 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
861 	}
862 }
863 
864 /**
865  *	dev_load 	- load a network module
866  *	@name: name of interface
867  *
868  *	If a network interface is not present and the process has suitable
869  *	privileges this function loads the module. If module loading is not
870  *	available in this kernel then it becomes a nop.
871  */
872 
873 void dev_load(const char *name)
874 {
875 	struct net_device *dev;
876 
877 	read_lock(&dev_base_lock);
878 	dev = __dev_get_by_name(name);
879 	read_unlock(&dev_base_lock);
880 
881 	if (!dev && capable(CAP_SYS_MODULE))
882 		request_module("%s", name);
883 }
884 
885 static int default_rebuild_header(struct sk_buff *skb)
886 {
887 	printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
888 	       skb->dev ? skb->dev->name : "NULL!!!");
889 	kfree_skb(skb);
890 	return 1;
891 }
892 
893 /**
894  *	dev_open	- prepare an interface for use.
895  *	@dev:	device to open
896  *
897  *	Takes a device from down to up state. The device's private open
898  *	function is invoked and then the multicast lists are loaded. Finally
899  *	the device is moved into the up state and a %NETDEV_UP message is
900  *	sent to the netdev notifier chain.
901  *
902  *	Calling this function on an active interface is a nop. On a failure
903  *	a negative errno code is returned.
904  */
905 int dev_open(struct net_device *dev)
906 {
907 	int ret = 0;
908 
909 	/*
910 	 *	Is it already up?
911 	 */
912 
913 	if (dev->flags & IFF_UP)
914 		return 0;
915 
916 	/*
917 	 *	Is it even present?
918 	 */
919 	if (!netif_device_present(dev))
920 		return -ENODEV;
921 
922 	/*
923 	 *	Call device private open method
924 	 */
925 	set_bit(__LINK_STATE_START, &dev->state);
926 	if (dev->open) {
927 		ret = dev->open(dev);
928 		if (ret)
929 			clear_bit(__LINK_STATE_START, &dev->state);
930 	}
931 
932 	/*
933 	 *	If it went open OK then:
934 	 */
935 
936 	if (!ret) {
937 		/*
938 		 *	Set the flags.
939 		 */
940 		dev->flags |= IFF_UP;
941 
942 		/*
943 		 *	Initialize multicasting status
944 		 */
945 		dev_mc_upload(dev);
946 
947 		/*
948 		 *	Wakeup transmit queue engine
949 		 */
950 		dev_activate(dev);
951 
952 		/*
953 		 *	... and announce new interface.
954 		 */
955 		raw_notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
956 	}
957 	return ret;
958 }
959 
960 /**
961  *	dev_close - shutdown an interface.
962  *	@dev: device to shutdown
963  *
964  *	This function moves an active device into down state. A
965  *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
966  *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
967  *	chain.
968  */
969 int dev_close(struct net_device *dev)
970 {
971 	if (!(dev->flags & IFF_UP))
972 		return 0;
973 
974 	/*
975 	 *	Tell people we are going down, so that they can
976 	 *	prepare to death, when device is still operating.
977 	 */
978 	raw_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
979 
980 	dev_deactivate(dev);
981 
982 	clear_bit(__LINK_STATE_START, &dev->state);
983 
984 	/* Synchronize to scheduled poll. We cannot touch poll list,
985 	 * it can be even on different cpu. So just clear netif_running(),
986 	 * and wait when poll really will happen. Actually, the best place
987 	 * for this is inside dev->stop() after device stopped its irq
988 	 * engine, but this requires more changes in devices. */
989 
990 	smp_mb__after_clear_bit(); /* Commit netif_running(). */
991 	while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
992 		/* No hurry. */
993 		msleep(1);
994 	}
995 
996 	/*
997 	 *	Call the device specific close. This cannot fail.
998 	 *	Only if device is UP
999 	 *
1000 	 *	We allow it to be called even after a DETACH hot-plug
1001 	 *	event.
1002 	 */
1003 	if (dev->stop)
1004 		dev->stop(dev);
1005 
1006 	/*
1007 	 *	Device is now down.
1008 	 */
1009 
1010 	dev->flags &= ~IFF_UP;
1011 
1012 	/*
1013 	 * Tell people we are down
1014 	 */
1015 	raw_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
1016 
1017 	return 0;
1018 }
1019 
1020 
1021 /*
1022  *	Device change register/unregister. These are not inline or static
1023  *	as we export them to the world.
1024  */
1025 
1026 /**
1027  *	register_netdevice_notifier - register a network notifier block
1028  *	@nb: notifier
1029  *
1030  *	Register a notifier to be called when network device events occur.
1031  *	The notifier passed is linked into the kernel structures and must
1032  *	not be reused until it has been unregistered. A negative errno code
1033  *	is returned on a failure.
1034  *
1035  * 	When registered all registration and up events are replayed
1036  *	to the new notifier to allow device to have a race free
1037  *	view of the network device list.
1038  */
1039 
1040 int register_netdevice_notifier(struct notifier_block *nb)
1041 {
1042 	struct net_device *dev;
1043 	int err;
1044 
1045 	rtnl_lock();
1046 	err = raw_notifier_chain_register(&netdev_chain, nb);
1047 	if (!err) {
1048 		for_each_netdev(dev) {
1049 			nb->notifier_call(nb, NETDEV_REGISTER, dev);
1050 
1051 			if (dev->flags & IFF_UP)
1052 				nb->notifier_call(nb, NETDEV_UP, dev);
1053 		}
1054 	}
1055 	rtnl_unlock();
1056 	return err;
1057 }
1058 
1059 /**
1060  *	unregister_netdevice_notifier - unregister a network notifier block
1061  *	@nb: notifier
1062  *
1063  *	Unregister a notifier previously registered by
1064  *	register_netdevice_notifier(). The notifier is unlinked into the
1065  *	kernel structures and may then be reused. A negative errno code
1066  *	is returned on a failure.
1067  */
1068 
1069 int unregister_netdevice_notifier(struct notifier_block *nb)
1070 {
1071 	int err;
1072 
1073 	rtnl_lock();
1074 	err = raw_notifier_chain_unregister(&netdev_chain, nb);
1075 	rtnl_unlock();
1076 	return err;
1077 }
1078 
1079 /**
1080  *	call_netdevice_notifiers - call all network notifier blocks
1081  *      @val: value passed unmodified to notifier function
1082  *      @v:   pointer passed unmodified to notifier function
1083  *
1084  *	Call all network notifier blocks.  Parameters and return value
1085  *	are as for raw_notifier_call_chain().
1086  */
1087 
1088 int call_netdevice_notifiers(unsigned long val, void *v)
1089 {
1090 	return raw_notifier_call_chain(&netdev_chain, val, v);
1091 }
1092 
1093 /* When > 0 there are consumers of rx skb time stamps */
1094 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1095 
1096 void net_enable_timestamp(void)
1097 {
1098 	atomic_inc(&netstamp_needed);
1099 }
1100 
1101 void net_disable_timestamp(void)
1102 {
1103 	atomic_dec(&netstamp_needed);
1104 }
1105 
1106 static inline void net_timestamp(struct sk_buff *skb)
1107 {
1108 	if (atomic_read(&netstamp_needed))
1109 		__net_timestamp(skb);
1110 	else
1111 		skb->tstamp.tv64 = 0;
1112 }
1113 
1114 /*
1115  *	Support routine. Sends outgoing frames to any network
1116  *	taps currently in use.
1117  */
1118 
1119 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1120 {
1121 	struct packet_type *ptype;
1122 
1123 	net_timestamp(skb);
1124 
1125 	rcu_read_lock();
1126 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1127 		/* Never send packets back to the socket
1128 		 * they originated from - MvS (miquels@drinkel.ow.org)
1129 		 */
1130 		if ((ptype->dev == dev || !ptype->dev) &&
1131 		    (ptype->af_packet_priv == NULL ||
1132 		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
1133 			struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1134 			if (!skb2)
1135 				break;
1136 
1137 			/* skb->nh should be correctly
1138 			   set by sender, so that the second statement is
1139 			   just protection against buggy protocols.
1140 			 */
1141 			skb_reset_mac_header(skb2);
1142 
1143 			if (skb_network_header(skb2) < skb2->data ||
1144 			    skb2->network_header > skb2->tail) {
1145 				if (net_ratelimit())
1146 					printk(KERN_CRIT "protocol %04x is "
1147 					       "buggy, dev %s\n",
1148 					       skb2->protocol, dev->name);
1149 				skb_reset_network_header(skb2);
1150 			}
1151 
1152 			skb2->transport_header = skb2->network_header;
1153 			skb2->pkt_type = PACKET_OUTGOING;
1154 			ptype->func(skb2, skb->dev, ptype, skb->dev);
1155 		}
1156 	}
1157 	rcu_read_unlock();
1158 }
1159 
1160 
1161 void __netif_schedule(struct net_device *dev)
1162 {
1163 	if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
1164 		unsigned long flags;
1165 		struct softnet_data *sd;
1166 
1167 		local_irq_save(flags);
1168 		sd = &__get_cpu_var(softnet_data);
1169 		dev->next_sched = sd->output_queue;
1170 		sd->output_queue = dev;
1171 		raise_softirq_irqoff(NET_TX_SOFTIRQ);
1172 		local_irq_restore(flags);
1173 	}
1174 }
1175 EXPORT_SYMBOL(__netif_schedule);
1176 
1177 void __netif_rx_schedule(struct net_device *dev)
1178 {
1179 	unsigned long flags;
1180 
1181 	local_irq_save(flags);
1182 	dev_hold(dev);
1183 	list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
1184 	if (dev->quota < 0)
1185 		dev->quota += dev->weight;
1186 	else
1187 		dev->quota = dev->weight;
1188 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
1189 	local_irq_restore(flags);
1190 }
1191 EXPORT_SYMBOL(__netif_rx_schedule);
1192 
1193 void dev_kfree_skb_any(struct sk_buff *skb)
1194 {
1195 	if (in_irq() || irqs_disabled())
1196 		dev_kfree_skb_irq(skb);
1197 	else
1198 		dev_kfree_skb(skb);
1199 }
1200 EXPORT_SYMBOL(dev_kfree_skb_any);
1201 
1202 
1203 /* Hot-plugging. */
1204 void netif_device_detach(struct net_device *dev)
1205 {
1206 	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1207 	    netif_running(dev)) {
1208 		netif_stop_queue(dev);
1209 	}
1210 }
1211 EXPORT_SYMBOL(netif_device_detach);
1212 
1213 void netif_device_attach(struct net_device *dev)
1214 {
1215 	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1216 	    netif_running(dev)) {
1217 		netif_wake_queue(dev);
1218 		__netdev_watchdog_up(dev);
1219 	}
1220 }
1221 EXPORT_SYMBOL(netif_device_attach);
1222 
1223 
1224 /*
1225  * Invalidate hardware checksum when packet is to be mangled, and
1226  * complete checksum manually on outgoing path.
1227  */
1228 int skb_checksum_help(struct sk_buff *skb)
1229 {
1230 	__wsum csum;
1231 	int ret = 0, offset;
1232 
1233 	if (skb->ip_summed == CHECKSUM_COMPLETE)
1234 		goto out_set_summed;
1235 
1236 	if (unlikely(skb_shinfo(skb)->gso_size)) {
1237 		/* Let GSO fix up the checksum. */
1238 		goto out_set_summed;
1239 	}
1240 
1241 	if (skb_cloned(skb)) {
1242 		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1243 		if (ret)
1244 			goto out;
1245 	}
1246 
1247 	offset = skb->csum_start - skb_headroom(skb);
1248 	BUG_ON(offset > (int)skb->len);
1249 	csum = skb_checksum(skb, offset, skb->len-offset, 0);
1250 
1251 	offset = skb_headlen(skb) - offset;
1252 	BUG_ON(offset <= 0);
1253 	BUG_ON(skb->csum_offset + 2 > offset);
1254 
1255 	*(__sum16 *)(skb->head + skb->csum_start + skb->csum_offset) =
1256 		csum_fold(csum);
1257 out_set_summed:
1258 	skb->ip_summed = CHECKSUM_NONE;
1259 out:
1260 	return ret;
1261 }
1262 
1263 /**
1264  *	skb_gso_segment - Perform segmentation on skb.
1265  *	@skb: buffer to segment
1266  *	@features: features for the output path (see dev->features)
1267  *
1268  *	This function segments the given skb and returns a list of segments.
1269  *
1270  *	It may return NULL if the skb requires no segmentation.  This is
1271  *	only possible when GSO is used for verifying header integrity.
1272  */
1273 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1274 {
1275 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1276 	struct packet_type *ptype;
1277 	__be16 type = skb->protocol;
1278 	int err;
1279 
1280 	BUG_ON(skb_shinfo(skb)->frag_list);
1281 
1282 	skb_reset_mac_header(skb);
1283 	skb->mac_len = skb->network_header - skb->mac_header;
1284 	__skb_pull(skb, skb->mac_len);
1285 
1286 	if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
1287 		if (skb_header_cloned(skb) &&
1288 		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1289 			return ERR_PTR(err);
1290 	}
1291 
1292 	rcu_read_lock();
1293 	list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
1294 		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1295 			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1296 				err = ptype->gso_send_check(skb);
1297 				segs = ERR_PTR(err);
1298 				if (err || skb_gso_ok(skb, features))
1299 					break;
1300 				__skb_push(skb, (skb->data -
1301 						 skb_network_header(skb)));
1302 			}
1303 			segs = ptype->gso_segment(skb, features);
1304 			break;
1305 		}
1306 	}
1307 	rcu_read_unlock();
1308 
1309 	__skb_push(skb, skb->data - skb_mac_header(skb));
1310 
1311 	return segs;
1312 }
1313 
1314 EXPORT_SYMBOL(skb_gso_segment);
1315 
1316 /* Take action when hardware reception checksum errors are detected. */
1317 #ifdef CONFIG_BUG
1318 void netdev_rx_csum_fault(struct net_device *dev)
1319 {
1320 	if (net_ratelimit()) {
1321 		printk(KERN_ERR "%s: hw csum failure.\n",
1322 			dev ? dev->name : "<unknown>");
1323 		dump_stack();
1324 	}
1325 }
1326 EXPORT_SYMBOL(netdev_rx_csum_fault);
1327 #endif
1328 
1329 /* Actually, we should eliminate this check as soon as we know, that:
1330  * 1. IOMMU is present and allows to map all the memory.
1331  * 2. No high memory really exists on this machine.
1332  */
1333 
1334 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1335 {
1336 #ifdef CONFIG_HIGHMEM
1337 	int i;
1338 
1339 	if (dev->features & NETIF_F_HIGHDMA)
1340 		return 0;
1341 
1342 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1343 		if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1344 			return 1;
1345 
1346 #endif
1347 	return 0;
1348 }
1349 
1350 struct dev_gso_cb {
1351 	void (*destructor)(struct sk_buff *skb);
1352 };
1353 
1354 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1355 
1356 static void dev_gso_skb_destructor(struct sk_buff *skb)
1357 {
1358 	struct dev_gso_cb *cb;
1359 
1360 	do {
1361 		struct sk_buff *nskb = skb->next;
1362 
1363 		skb->next = nskb->next;
1364 		nskb->next = NULL;
1365 		kfree_skb(nskb);
1366 	} while (skb->next);
1367 
1368 	cb = DEV_GSO_CB(skb);
1369 	if (cb->destructor)
1370 		cb->destructor(skb);
1371 }
1372 
1373 /**
1374  *	dev_gso_segment - Perform emulated hardware segmentation on skb.
1375  *	@skb: buffer to segment
1376  *
1377  *	This function segments the given skb and stores the list of segments
1378  *	in skb->next.
1379  */
1380 static int dev_gso_segment(struct sk_buff *skb)
1381 {
1382 	struct net_device *dev = skb->dev;
1383 	struct sk_buff *segs;
1384 	int features = dev->features & ~(illegal_highdma(dev, skb) ?
1385 					 NETIF_F_SG : 0);
1386 
1387 	segs = skb_gso_segment(skb, features);
1388 
1389 	/* Verifying header integrity only. */
1390 	if (!segs)
1391 		return 0;
1392 
1393 	if (unlikely(IS_ERR(segs)))
1394 		return PTR_ERR(segs);
1395 
1396 	skb->next = segs;
1397 	DEV_GSO_CB(skb)->destructor = skb->destructor;
1398 	skb->destructor = dev_gso_skb_destructor;
1399 
1400 	return 0;
1401 }
1402 
1403 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
1404 {
1405 	if (likely(!skb->next)) {
1406 		if (!list_empty(&ptype_all))
1407 			dev_queue_xmit_nit(skb, dev);
1408 
1409 		if (netif_needs_gso(dev, skb)) {
1410 			if (unlikely(dev_gso_segment(skb)))
1411 				goto out_kfree_skb;
1412 			if (skb->next)
1413 				goto gso;
1414 		}
1415 
1416 		return dev->hard_start_xmit(skb, dev);
1417 	}
1418 
1419 gso:
1420 	do {
1421 		struct sk_buff *nskb = skb->next;
1422 		int rc;
1423 
1424 		skb->next = nskb->next;
1425 		nskb->next = NULL;
1426 		rc = dev->hard_start_xmit(nskb, dev);
1427 		if (unlikely(rc)) {
1428 			nskb->next = skb->next;
1429 			skb->next = nskb;
1430 			return rc;
1431 		}
1432 		if (unlikely(netif_queue_stopped(dev) && skb->next))
1433 			return NETDEV_TX_BUSY;
1434 	} while (skb->next);
1435 
1436 	skb->destructor = DEV_GSO_CB(skb)->destructor;
1437 
1438 out_kfree_skb:
1439 	kfree_skb(skb);
1440 	return 0;
1441 }
1442 
1443 #define HARD_TX_LOCK(dev, cpu) {			\
1444 	if ((dev->features & NETIF_F_LLTX) == 0) {	\
1445 		netif_tx_lock(dev);			\
1446 	}						\
1447 }
1448 
1449 #define HARD_TX_UNLOCK(dev) {				\
1450 	if ((dev->features & NETIF_F_LLTX) == 0) {	\
1451 		netif_tx_unlock(dev);			\
1452 	}						\
1453 }
1454 
1455 /**
1456  *	dev_queue_xmit - transmit a buffer
1457  *	@skb: buffer to transmit
1458  *
1459  *	Queue a buffer for transmission to a network device. The caller must
1460  *	have set the device and priority and built the buffer before calling
1461  *	this function. The function can be called from an interrupt.
1462  *
1463  *	A negative errno code is returned on a failure. A success does not
1464  *	guarantee the frame will be transmitted as it may be dropped due
1465  *	to congestion or traffic shaping.
1466  *
1467  * -----------------------------------------------------------------------------------
1468  *      I notice this method can also return errors from the queue disciplines,
1469  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1470  *      be positive.
1471  *
1472  *      Regardless of the return value, the skb is consumed, so it is currently
1473  *      difficult to retry a send to this method.  (You can bump the ref count
1474  *      before sending to hold a reference for retry if you are careful.)
1475  *
1476  *      When calling this method, interrupts MUST be enabled.  This is because
1477  *      the BH enable code must have IRQs enabled so that it will not deadlock.
1478  *          --BLG
1479  */
1480 
1481 int dev_queue_xmit(struct sk_buff *skb)
1482 {
1483 	struct net_device *dev = skb->dev;
1484 	struct Qdisc *q;
1485 	int rc = -ENOMEM;
1486 
1487 	/* GSO will handle the following emulations directly. */
1488 	if (netif_needs_gso(dev, skb))
1489 		goto gso;
1490 
1491 	if (skb_shinfo(skb)->frag_list &&
1492 	    !(dev->features & NETIF_F_FRAGLIST) &&
1493 	    __skb_linearize(skb))
1494 		goto out_kfree_skb;
1495 
1496 	/* Fragmented skb is linearized if device does not support SG,
1497 	 * or if at least one of fragments is in highmem and device
1498 	 * does not support DMA from it.
1499 	 */
1500 	if (skb_shinfo(skb)->nr_frags &&
1501 	    (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1502 	    __skb_linearize(skb))
1503 		goto out_kfree_skb;
1504 
1505 	/* If packet is not checksummed and device does not support
1506 	 * checksumming for this protocol, complete checksumming here.
1507 	 */
1508 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
1509 		skb_set_transport_header(skb, skb->csum_start -
1510 					      skb_headroom(skb));
1511 
1512 		if (!(dev->features & NETIF_F_GEN_CSUM) &&
1513 		    (!(dev->features & NETIF_F_IP_CSUM) ||
1514 		     skb->protocol != htons(ETH_P_IP)))
1515 			if (skb_checksum_help(skb))
1516 				goto out_kfree_skb;
1517 	}
1518 
1519 gso:
1520 	spin_lock_prefetch(&dev->queue_lock);
1521 
1522 	/* Disable soft irqs for various locks below. Also
1523 	 * stops preemption for RCU.
1524 	 */
1525 	rcu_read_lock_bh();
1526 
1527 	/* Updates of qdisc are serialized by queue_lock.
1528 	 * The struct Qdisc which is pointed to by qdisc is now a
1529 	 * rcu structure - it may be accessed without acquiring
1530 	 * a lock (but the structure may be stale.) The freeing of the
1531 	 * qdisc will be deferred until it's known that there are no
1532 	 * more references to it.
1533 	 *
1534 	 * If the qdisc has an enqueue function, we still need to
1535 	 * hold the queue_lock before calling it, since queue_lock
1536 	 * also serializes access to the device queue.
1537 	 */
1538 
1539 	q = rcu_dereference(dev->qdisc);
1540 #ifdef CONFIG_NET_CLS_ACT
1541 	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1542 #endif
1543 	if (q->enqueue) {
1544 		/* Grab device queue */
1545 		spin_lock(&dev->queue_lock);
1546 		q = dev->qdisc;
1547 		if (q->enqueue) {
1548 			rc = q->enqueue(skb, q);
1549 			qdisc_run(dev);
1550 			spin_unlock(&dev->queue_lock);
1551 
1552 			rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1553 			goto out;
1554 		}
1555 		spin_unlock(&dev->queue_lock);
1556 	}
1557 
1558 	/* The device has no queue. Common case for software devices:
1559 	   loopback, all the sorts of tunnels...
1560 
1561 	   Really, it is unlikely that netif_tx_lock protection is necessary
1562 	   here.  (f.e. loopback and IP tunnels are clean ignoring statistics
1563 	   counters.)
1564 	   However, it is possible, that they rely on protection
1565 	   made by us here.
1566 
1567 	   Check this and shot the lock. It is not prone from deadlocks.
1568 	   Either shot noqueue qdisc, it is even simpler 8)
1569 	 */
1570 	if (dev->flags & IFF_UP) {
1571 		int cpu = smp_processor_id(); /* ok because BHs are off */
1572 
1573 		if (dev->xmit_lock_owner != cpu) {
1574 
1575 			HARD_TX_LOCK(dev, cpu);
1576 
1577 			if (!netif_queue_stopped(dev)) {
1578 				rc = 0;
1579 				if (!dev_hard_start_xmit(skb, dev)) {
1580 					HARD_TX_UNLOCK(dev);
1581 					goto out;
1582 				}
1583 			}
1584 			HARD_TX_UNLOCK(dev);
1585 			if (net_ratelimit())
1586 				printk(KERN_CRIT "Virtual device %s asks to "
1587 				       "queue packet!\n", dev->name);
1588 		} else {
1589 			/* Recursion is detected! It is possible,
1590 			 * unfortunately */
1591 			if (net_ratelimit())
1592 				printk(KERN_CRIT "Dead loop on virtual device "
1593 				       "%s, fix it urgently!\n", dev->name);
1594 		}
1595 	}
1596 
1597 	rc = -ENETDOWN;
1598 	rcu_read_unlock_bh();
1599 
1600 out_kfree_skb:
1601 	kfree_skb(skb);
1602 	return rc;
1603 out:
1604 	rcu_read_unlock_bh();
1605 	return rc;
1606 }
1607 
1608 
1609 /*=======================================================================
1610 			Receiver routines
1611   =======================================================================*/
1612 
1613 int netdev_max_backlog __read_mostly = 1000;
1614 int netdev_budget __read_mostly = 300;
1615 int weight_p __read_mostly = 64;            /* old backlog weight */
1616 
1617 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1618 
1619 
1620 /**
1621  *	netif_rx	-	post buffer to the network code
1622  *	@skb: buffer to post
1623  *
1624  *	This function receives a packet from a device driver and queues it for
1625  *	the upper (protocol) levels to process.  It always succeeds. The buffer
1626  *	may be dropped during processing for congestion control or by the
1627  *	protocol layers.
1628  *
1629  *	return values:
1630  *	NET_RX_SUCCESS	(no congestion)
1631  *	NET_RX_CN_LOW   (low congestion)
1632  *	NET_RX_CN_MOD   (moderate congestion)
1633  *	NET_RX_CN_HIGH  (high congestion)
1634  *	NET_RX_DROP     (packet was dropped)
1635  *
1636  */
1637 
1638 int netif_rx(struct sk_buff *skb)
1639 {
1640 	struct softnet_data *queue;
1641 	unsigned long flags;
1642 
1643 	/* if netpoll wants it, pretend we never saw it */
1644 	if (netpoll_rx(skb))
1645 		return NET_RX_DROP;
1646 
1647 	if (!skb->tstamp.tv64)
1648 		net_timestamp(skb);
1649 
1650 	/*
1651 	 * The code is rearranged so that the path is the most
1652 	 * short when CPU is congested, but is still operating.
1653 	 */
1654 	local_irq_save(flags);
1655 	queue = &__get_cpu_var(softnet_data);
1656 
1657 	__get_cpu_var(netdev_rx_stat).total++;
1658 	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1659 		if (queue->input_pkt_queue.qlen) {
1660 enqueue:
1661 			dev_hold(skb->dev);
1662 			__skb_queue_tail(&queue->input_pkt_queue, skb);
1663 			local_irq_restore(flags);
1664 			return NET_RX_SUCCESS;
1665 		}
1666 
1667 		netif_rx_schedule(&queue->backlog_dev);
1668 		goto enqueue;
1669 	}
1670 
1671 	__get_cpu_var(netdev_rx_stat).dropped++;
1672 	local_irq_restore(flags);
1673 
1674 	kfree_skb(skb);
1675 	return NET_RX_DROP;
1676 }
1677 
1678 int netif_rx_ni(struct sk_buff *skb)
1679 {
1680 	int err;
1681 
1682 	preempt_disable();
1683 	err = netif_rx(skb);
1684 	if (local_softirq_pending())
1685 		do_softirq();
1686 	preempt_enable();
1687 
1688 	return err;
1689 }
1690 
1691 EXPORT_SYMBOL(netif_rx_ni);
1692 
1693 static inline struct net_device *skb_bond(struct sk_buff *skb)
1694 {
1695 	struct net_device *dev = skb->dev;
1696 
1697 	if (dev->master) {
1698 		if (skb_bond_should_drop(skb)) {
1699 			kfree_skb(skb);
1700 			return NULL;
1701 		}
1702 		skb->dev = dev->master;
1703 	}
1704 
1705 	return dev;
1706 }
1707 
1708 static void net_tx_action(struct softirq_action *h)
1709 {
1710 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
1711 
1712 	if (sd->completion_queue) {
1713 		struct sk_buff *clist;
1714 
1715 		local_irq_disable();
1716 		clist = sd->completion_queue;
1717 		sd->completion_queue = NULL;
1718 		local_irq_enable();
1719 
1720 		while (clist) {
1721 			struct sk_buff *skb = clist;
1722 			clist = clist->next;
1723 
1724 			BUG_TRAP(!atomic_read(&skb->users));
1725 			__kfree_skb(skb);
1726 		}
1727 	}
1728 
1729 	if (sd->output_queue) {
1730 		struct net_device *head;
1731 
1732 		local_irq_disable();
1733 		head = sd->output_queue;
1734 		sd->output_queue = NULL;
1735 		local_irq_enable();
1736 
1737 		while (head) {
1738 			struct net_device *dev = head;
1739 			head = head->next_sched;
1740 
1741 			smp_mb__before_clear_bit();
1742 			clear_bit(__LINK_STATE_SCHED, &dev->state);
1743 
1744 			if (spin_trylock(&dev->queue_lock)) {
1745 				qdisc_run(dev);
1746 				spin_unlock(&dev->queue_lock);
1747 			} else {
1748 				netif_schedule(dev);
1749 			}
1750 		}
1751 	}
1752 }
1753 
1754 static inline int deliver_skb(struct sk_buff *skb,
1755 			      struct packet_type *pt_prev,
1756 			      struct net_device *orig_dev)
1757 {
1758 	atomic_inc(&skb->users);
1759 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1760 }
1761 
1762 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
1763 /* These hooks defined here for ATM */
1764 struct net_bridge;
1765 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1766 						unsigned char *addr);
1767 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
1768 
1769 /*
1770  * If bridge module is loaded call bridging hook.
1771  *  returns NULL if packet was consumed.
1772  */
1773 struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
1774 					struct sk_buff *skb) __read_mostly;
1775 static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
1776 					    struct packet_type **pt_prev, int *ret,
1777 					    struct net_device *orig_dev)
1778 {
1779 	struct net_bridge_port *port;
1780 
1781 	if (skb->pkt_type == PACKET_LOOPBACK ||
1782 	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
1783 		return skb;
1784 
1785 	if (*pt_prev) {
1786 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
1787 		*pt_prev = NULL;
1788 	}
1789 
1790 	return br_handle_frame_hook(port, skb);
1791 }
1792 #else
1793 #define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
1794 #endif
1795 
1796 #ifdef CONFIG_NET_CLS_ACT
1797 /* TODO: Maybe we should just force sch_ingress to be compiled in
1798  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1799  * a compare and 2 stores extra right now if we dont have it on
1800  * but have CONFIG_NET_CLS_ACT
1801  * NOTE: This doesnt stop any functionality; if you dont have
1802  * the ingress scheduler, you just cant add policies on ingress.
1803  *
1804  */
1805 static int ing_filter(struct sk_buff *skb)
1806 {
1807 	struct Qdisc *q;
1808 	struct net_device *dev = skb->dev;
1809 	int result = TC_ACT_OK;
1810 
1811 	if (dev->qdisc_ingress) {
1812 		__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
1813 		if (MAX_RED_LOOP < ttl++) {
1814 			printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n",
1815 				skb->iif, skb->dev->ifindex);
1816 			return TC_ACT_SHOT;
1817 		}
1818 
1819 		skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
1820 
1821 		skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
1822 
1823 		spin_lock(&dev->ingress_lock);
1824 		if ((q = dev->qdisc_ingress) != NULL)
1825 			result = q->enqueue(skb, q);
1826 		spin_unlock(&dev->ingress_lock);
1827 
1828 	}
1829 
1830 	return result;
1831 }
1832 #endif
1833 
1834 int netif_receive_skb(struct sk_buff *skb)
1835 {
1836 	struct packet_type *ptype, *pt_prev;
1837 	struct net_device *orig_dev;
1838 	int ret = NET_RX_DROP;
1839 	__be16 type;
1840 
1841 	/* if we've gotten here through NAPI, check netpoll */
1842 	if (skb->dev->poll && netpoll_rx(skb))
1843 		return NET_RX_DROP;
1844 
1845 	if (!skb->tstamp.tv64)
1846 		net_timestamp(skb);
1847 
1848 	if (!skb->iif)
1849 		skb->iif = skb->dev->ifindex;
1850 
1851 	orig_dev = skb_bond(skb);
1852 
1853 	if (!orig_dev)
1854 		return NET_RX_DROP;
1855 
1856 	__get_cpu_var(netdev_rx_stat).total++;
1857 
1858 	skb_reset_network_header(skb);
1859 	skb_reset_transport_header(skb);
1860 	skb->mac_len = skb->network_header - skb->mac_header;
1861 
1862 	pt_prev = NULL;
1863 
1864 	rcu_read_lock();
1865 
1866 #ifdef CONFIG_NET_CLS_ACT
1867 	if (skb->tc_verd & TC_NCLS) {
1868 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
1869 		goto ncls;
1870 	}
1871 #endif
1872 
1873 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1874 		if (!ptype->dev || ptype->dev == skb->dev) {
1875 			if (pt_prev)
1876 				ret = deliver_skb(skb, pt_prev, orig_dev);
1877 			pt_prev = ptype;
1878 		}
1879 	}
1880 
1881 #ifdef CONFIG_NET_CLS_ACT
1882 	if (pt_prev) {
1883 		ret = deliver_skb(skb, pt_prev, orig_dev);
1884 		pt_prev = NULL; /* noone else should process this after*/
1885 	} else {
1886 		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
1887 	}
1888 
1889 	ret = ing_filter(skb);
1890 
1891 	if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
1892 		kfree_skb(skb);
1893 		goto out;
1894 	}
1895 
1896 	skb->tc_verd = 0;
1897 ncls:
1898 #endif
1899 
1900 	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
1901 	if (!skb)
1902 		goto out;
1903 
1904 	type = skb->protocol;
1905 	list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
1906 		if (ptype->type == type &&
1907 		    (!ptype->dev || ptype->dev == skb->dev)) {
1908 			if (pt_prev)
1909 				ret = deliver_skb(skb, pt_prev, orig_dev);
1910 			pt_prev = ptype;
1911 		}
1912 	}
1913 
1914 	if (pt_prev) {
1915 		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1916 	} else {
1917 		kfree_skb(skb);
1918 		/* Jamal, now you will not able to escape explaining
1919 		 * me how you were going to use this. :-)
1920 		 */
1921 		ret = NET_RX_DROP;
1922 	}
1923 
1924 out:
1925 	rcu_read_unlock();
1926 	return ret;
1927 }
1928 
1929 static int process_backlog(struct net_device *backlog_dev, int *budget)
1930 {
1931 	int work = 0;
1932 	int quota = min(backlog_dev->quota, *budget);
1933 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
1934 	unsigned long start_time = jiffies;
1935 
1936 	backlog_dev->weight = weight_p;
1937 	for (;;) {
1938 		struct sk_buff *skb;
1939 		struct net_device *dev;
1940 
1941 		local_irq_disable();
1942 		skb = __skb_dequeue(&queue->input_pkt_queue);
1943 		if (!skb)
1944 			goto job_done;
1945 		local_irq_enable();
1946 
1947 		dev = skb->dev;
1948 
1949 		netif_receive_skb(skb);
1950 
1951 		dev_put(dev);
1952 
1953 		work++;
1954 
1955 		if (work >= quota || jiffies - start_time > 1)
1956 			break;
1957 
1958 	}
1959 
1960 	backlog_dev->quota -= work;
1961 	*budget -= work;
1962 	return -1;
1963 
1964 job_done:
1965 	backlog_dev->quota -= work;
1966 	*budget -= work;
1967 
1968 	list_del(&backlog_dev->poll_list);
1969 	smp_mb__before_clear_bit();
1970 	netif_poll_enable(backlog_dev);
1971 
1972 	local_irq_enable();
1973 	return 0;
1974 }
1975 
1976 static void net_rx_action(struct softirq_action *h)
1977 {
1978 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
1979 	unsigned long start_time = jiffies;
1980 	int budget = netdev_budget;
1981 	void *have;
1982 
1983 	local_irq_disable();
1984 
1985 	while (!list_empty(&queue->poll_list)) {
1986 		struct net_device *dev;
1987 
1988 		if (budget <= 0 || jiffies - start_time > 1)
1989 			goto softnet_break;
1990 
1991 		local_irq_enable();
1992 
1993 		dev = list_entry(queue->poll_list.next,
1994 				 struct net_device, poll_list);
1995 		have = netpoll_poll_lock(dev);
1996 
1997 		if (dev->quota <= 0 || dev->poll(dev, &budget)) {
1998 			netpoll_poll_unlock(have);
1999 			local_irq_disable();
2000 			list_move_tail(&dev->poll_list, &queue->poll_list);
2001 			if (dev->quota < 0)
2002 				dev->quota += dev->weight;
2003 			else
2004 				dev->quota = dev->weight;
2005 		} else {
2006 			netpoll_poll_unlock(have);
2007 			dev_put(dev);
2008 			local_irq_disable();
2009 		}
2010 	}
2011 out:
2012 	local_irq_enable();
2013 #ifdef CONFIG_NET_DMA
2014 	/*
2015 	 * There may not be any more sk_buffs coming right now, so push
2016 	 * any pending DMA copies to hardware
2017 	 */
2018 	if (net_dma_client) {
2019 		struct dma_chan *chan;
2020 		rcu_read_lock();
2021 		list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node)
2022 			dma_async_memcpy_issue_pending(chan);
2023 		rcu_read_unlock();
2024 	}
2025 #endif
2026 	return;
2027 
2028 softnet_break:
2029 	__get_cpu_var(netdev_rx_stat).time_squeeze++;
2030 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2031 	goto out;
2032 }
2033 
2034 static gifconf_func_t * gifconf_list [NPROTO];
2035 
2036 /**
2037  *	register_gifconf	-	register a SIOCGIF handler
2038  *	@family: Address family
2039  *	@gifconf: Function handler
2040  *
2041  *	Register protocol dependent address dumping routines. The handler
2042  *	that is passed must not be freed or reused until it has been replaced
2043  *	by another handler.
2044  */
2045 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2046 {
2047 	if (family >= NPROTO)
2048 		return -EINVAL;
2049 	gifconf_list[family] = gifconf;
2050 	return 0;
2051 }
2052 
2053 
2054 /*
2055  *	Map an interface index to its name (SIOCGIFNAME)
2056  */
2057 
2058 /*
2059  *	We need this ioctl for efficient implementation of the
2060  *	if_indextoname() function required by the IPv6 API.  Without
2061  *	it, we would have to search all the interfaces to find a
2062  *	match.  --pb
2063  */
2064 
2065 static int dev_ifname(struct ifreq __user *arg)
2066 {
2067 	struct net_device *dev;
2068 	struct ifreq ifr;
2069 
2070 	/*
2071 	 *	Fetch the caller's info block.
2072 	 */
2073 
2074 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2075 		return -EFAULT;
2076 
2077 	read_lock(&dev_base_lock);
2078 	dev = __dev_get_by_index(ifr.ifr_ifindex);
2079 	if (!dev) {
2080 		read_unlock(&dev_base_lock);
2081 		return -ENODEV;
2082 	}
2083 
2084 	strcpy(ifr.ifr_name, dev->name);
2085 	read_unlock(&dev_base_lock);
2086 
2087 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2088 		return -EFAULT;
2089 	return 0;
2090 }
2091 
2092 /*
2093  *	Perform a SIOCGIFCONF call. This structure will change
2094  *	size eventually, and there is nothing I can do about it.
2095  *	Thus we will need a 'compatibility mode'.
2096  */
2097 
2098 static int dev_ifconf(char __user *arg)
2099 {
2100 	struct ifconf ifc;
2101 	struct net_device *dev;
2102 	char __user *pos;
2103 	int len;
2104 	int total;
2105 	int i;
2106 
2107 	/*
2108 	 *	Fetch the caller's info block.
2109 	 */
2110 
2111 	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2112 		return -EFAULT;
2113 
2114 	pos = ifc.ifc_buf;
2115 	len = ifc.ifc_len;
2116 
2117 	/*
2118 	 *	Loop over the interfaces, and write an info block for each.
2119 	 */
2120 
2121 	total = 0;
2122 	for_each_netdev(dev) {
2123 		for (i = 0; i < NPROTO; i++) {
2124 			if (gifconf_list[i]) {
2125 				int done;
2126 				if (!pos)
2127 					done = gifconf_list[i](dev, NULL, 0);
2128 				else
2129 					done = gifconf_list[i](dev, pos + total,
2130 							       len - total);
2131 				if (done < 0)
2132 					return -EFAULT;
2133 				total += done;
2134 			}
2135 		}
2136 	}
2137 
2138 	/*
2139 	 *	All done.  Write the updated control block back to the caller.
2140 	 */
2141 	ifc.ifc_len = total;
2142 
2143 	/*
2144 	 * 	Both BSD and Solaris return 0 here, so we do too.
2145 	 */
2146 	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2147 }
2148 
2149 #ifdef CONFIG_PROC_FS
2150 /*
2151  *	This is invoked by the /proc filesystem handler to display a device
2152  *	in detail.
2153  */
2154 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2155 {
2156 	loff_t off;
2157 	struct net_device *dev;
2158 
2159 	read_lock(&dev_base_lock);
2160 	if (!*pos)
2161 		return SEQ_START_TOKEN;
2162 
2163 	off = 1;
2164 	for_each_netdev(dev)
2165 		if (off++ == *pos)
2166 			return dev;
2167 
2168 	return NULL;
2169 }
2170 
2171 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2172 {
2173 	++*pos;
2174 	return v == SEQ_START_TOKEN ?
2175 		first_net_device() : next_net_device((struct net_device *)v);
2176 }
2177 
2178 void dev_seq_stop(struct seq_file *seq, void *v)
2179 {
2180 	read_unlock(&dev_base_lock);
2181 }
2182 
2183 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2184 {
2185 	struct net_device_stats *stats = dev->get_stats(dev);
2186 
2187 	seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2188 		   "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2189 		   dev->name, stats->rx_bytes, stats->rx_packets,
2190 		   stats->rx_errors,
2191 		   stats->rx_dropped + stats->rx_missed_errors,
2192 		   stats->rx_fifo_errors,
2193 		   stats->rx_length_errors + stats->rx_over_errors +
2194 		    stats->rx_crc_errors + stats->rx_frame_errors,
2195 		   stats->rx_compressed, stats->multicast,
2196 		   stats->tx_bytes, stats->tx_packets,
2197 		   stats->tx_errors, stats->tx_dropped,
2198 		   stats->tx_fifo_errors, stats->collisions,
2199 		   stats->tx_carrier_errors +
2200 		    stats->tx_aborted_errors +
2201 		    stats->tx_window_errors +
2202 		    stats->tx_heartbeat_errors,
2203 		   stats->tx_compressed);
2204 }
2205 
2206 /*
2207  *	Called from the PROCfs module. This now uses the new arbitrary sized
2208  *	/proc/net interface to create /proc/net/dev
2209  */
2210 static int dev_seq_show(struct seq_file *seq, void *v)
2211 {
2212 	if (v == SEQ_START_TOKEN)
2213 		seq_puts(seq, "Inter-|   Receive                            "
2214 			      "                    |  Transmit\n"
2215 			      " face |bytes    packets errs drop fifo frame "
2216 			      "compressed multicast|bytes    packets errs "
2217 			      "drop fifo colls carrier compressed\n");
2218 	else
2219 		dev_seq_printf_stats(seq, v);
2220 	return 0;
2221 }
2222 
2223 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2224 {
2225 	struct netif_rx_stats *rc = NULL;
2226 
2227 	while (*pos < NR_CPUS)
2228 		if (cpu_online(*pos)) {
2229 			rc = &per_cpu(netdev_rx_stat, *pos);
2230 			break;
2231 		} else
2232 			++*pos;
2233 	return rc;
2234 }
2235 
2236 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2237 {
2238 	return softnet_get_online(pos);
2239 }
2240 
2241 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2242 {
2243 	++*pos;
2244 	return softnet_get_online(pos);
2245 }
2246 
2247 static void softnet_seq_stop(struct seq_file *seq, void *v)
2248 {
2249 }
2250 
2251 static int softnet_seq_show(struct seq_file *seq, void *v)
2252 {
2253 	struct netif_rx_stats *s = v;
2254 
2255 	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2256 		   s->total, s->dropped, s->time_squeeze, 0,
2257 		   0, 0, 0, 0, /* was fastroute */
2258 		   s->cpu_collision );
2259 	return 0;
2260 }
2261 
2262 static const struct seq_operations dev_seq_ops = {
2263 	.start = dev_seq_start,
2264 	.next  = dev_seq_next,
2265 	.stop  = dev_seq_stop,
2266 	.show  = dev_seq_show,
2267 };
2268 
2269 static int dev_seq_open(struct inode *inode, struct file *file)
2270 {
2271 	return seq_open(file, &dev_seq_ops);
2272 }
2273 
2274 static const struct file_operations dev_seq_fops = {
2275 	.owner	 = THIS_MODULE,
2276 	.open    = dev_seq_open,
2277 	.read    = seq_read,
2278 	.llseek  = seq_lseek,
2279 	.release = seq_release,
2280 };
2281 
2282 static const struct seq_operations softnet_seq_ops = {
2283 	.start = softnet_seq_start,
2284 	.next  = softnet_seq_next,
2285 	.stop  = softnet_seq_stop,
2286 	.show  = softnet_seq_show,
2287 };
2288 
2289 static int softnet_seq_open(struct inode *inode, struct file *file)
2290 {
2291 	return seq_open(file, &softnet_seq_ops);
2292 }
2293 
2294 static const struct file_operations softnet_seq_fops = {
2295 	.owner	 = THIS_MODULE,
2296 	.open    = softnet_seq_open,
2297 	.read    = seq_read,
2298 	.llseek  = seq_lseek,
2299 	.release = seq_release,
2300 };
2301 
2302 static void *ptype_get_idx(loff_t pos)
2303 {
2304 	struct packet_type *pt = NULL;
2305 	loff_t i = 0;
2306 	int t;
2307 
2308 	list_for_each_entry_rcu(pt, &ptype_all, list) {
2309 		if (i == pos)
2310 			return pt;
2311 		++i;
2312 	}
2313 
2314 	for (t = 0; t < 16; t++) {
2315 		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2316 			if (i == pos)
2317 				return pt;
2318 			++i;
2319 		}
2320 	}
2321 	return NULL;
2322 }
2323 
2324 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
2325 {
2326 	rcu_read_lock();
2327 	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
2328 }
2329 
2330 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2331 {
2332 	struct packet_type *pt;
2333 	struct list_head *nxt;
2334 	int hash;
2335 
2336 	++*pos;
2337 	if (v == SEQ_START_TOKEN)
2338 		return ptype_get_idx(0);
2339 
2340 	pt = v;
2341 	nxt = pt->list.next;
2342 	if (pt->type == htons(ETH_P_ALL)) {
2343 		if (nxt != &ptype_all)
2344 			goto found;
2345 		hash = 0;
2346 		nxt = ptype_base[0].next;
2347 	} else
2348 		hash = ntohs(pt->type) & 15;
2349 
2350 	while (nxt == &ptype_base[hash]) {
2351 		if (++hash >= 16)
2352 			return NULL;
2353 		nxt = ptype_base[hash].next;
2354 	}
2355 found:
2356 	return list_entry(nxt, struct packet_type, list);
2357 }
2358 
2359 static void ptype_seq_stop(struct seq_file *seq, void *v)
2360 {
2361 	rcu_read_unlock();
2362 }
2363 
2364 static void ptype_seq_decode(struct seq_file *seq, void *sym)
2365 {
2366 #ifdef CONFIG_KALLSYMS
2367 	unsigned long offset = 0, symsize;
2368 	const char *symname;
2369 	char *modname;
2370 	char namebuf[128];
2371 
2372 	symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset,
2373 				  &modname, namebuf);
2374 
2375 	if (symname) {
2376 		char *delim = ":";
2377 
2378 		if (!modname)
2379 			modname = delim = "";
2380 		seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim,
2381 			   symname, offset);
2382 		return;
2383 	}
2384 #endif
2385 
2386 	seq_printf(seq, "[%p]", sym);
2387 }
2388 
2389 static int ptype_seq_show(struct seq_file *seq, void *v)
2390 {
2391 	struct packet_type *pt = v;
2392 
2393 	if (v == SEQ_START_TOKEN)
2394 		seq_puts(seq, "Type Device      Function\n");
2395 	else {
2396 		if (pt->type == htons(ETH_P_ALL))
2397 			seq_puts(seq, "ALL ");
2398 		else
2399 			seq_printf(seq, "%04x", ntohs(pt->type));
2400 
2401 		seq_printf(seq, " %-8s ",
2402 			   pt->dev ? pt->dev->name : "");
2403 		ptype_seq_decode(seq,  pt->func);
2404 		seq_putc(seq, '\n');
2405 	}
2406 
2407 	return 0;
2408 }
2409 
2410 static const struct seq_operations ptype_seq_ops = {
2411 	.start = ptype_seq_start,
2412 	.next  = ptype_seq_next,
2413 	.stop  = ptype_seq_stop,
2414 	.show  = ptype_seq_show,
2415 };
2416 
2417 static int ptype_seq_open(struct inode *inode, struct file *file)
2418 {
2419 	return seq_open(file, &ptype_seq_ops);
2420 }
2421 
2422 static const struct file_operations ptype_seq_fops = {
2423 	.owner	 = THIS_MODULE,
2424 	.open    = ptype_seq_open,
2425 	.read    = seq_read,
2426 	.llseek  = seq_lseek,
2427 	.release = seq_release,
2428 };
2429 
2430 
2431 static int __init dev_proc_init(void)
2432 {
2433 	int rc = -ENOMEM;
2434 
2435 	if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
2436 		goto out;
2437 	if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
2438 		goto out_dev;
2439 	if (!proc_net_fops_create("ptype", S_IRUGO, &ptype_seq_fops))
2440 		goto out_dev2;
2441 
2442 	if (wext_proc_init())
2443 		goto out_softnet;
2444 	rc = 0;
2445 out:
2446 	return rc;
2447 out_softnet:
2448 	proc_net_remove("ptype");
2449 out_dev2:
2450 	proc_net_remove("softnet_stat");
2451 out_dev:
2452 	proc_net_remove("dev");
2453 	goto out;
2454 }
2455 #else
2456 #define dev_proc_init() 0
2457 #endif	/* CONFIG_PROC_FS */
2458 
2459 
2460 /**
2461  *	netdev_set_master	-	set up master/slave pair
2462  *	@slave: slave device
2463  *	@master: new master device
2464  *
2465  *	Changes the master device of the slave. Pass %NULL to break the
2466  *	bonding. The caller must hold the RTNL semaphore. On a failure
2467  *	a negative errno code is returned. On success the reference counts
2468  *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2469  *	function returns zero.
2470  */
2471 int netdev_set_master(struct net_device *slave, struct net_device *master)
2472 {
2473 	struct net_device *old = slave->master;
2474 
2475 	ASSERT_RTNL();
2476 
2477 	if (master) {
2478 		if (old)
2479 			return -EBUSY;
2480 		dev_hold(master);
2481 	}
2482 
2483 	slave->master = master;
2484 
2485 	synchronize_net();
2486 
2487 	if (old)
2488 		dev_put(old);
2489 
2490 	if (master)
2491 		slave->flags |= IFF_SLAVE;
2492 	else
2493 		slave->flags &= ~IFF_SLAVE;
2494 
2495 	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2496 	return 0;
2497 }
2498 
2499 /**
2500  *	dev_set_promiscuity	- update promiscuity count on a device
2501  *	@dev: device
2502  *	@inc: modifier
2503  *
2504  *	Add or remove promiscuity from a device. While the count in the device
2505  *	remains above zero the interface remains promiscuous. Once it hits zero
2506  *	the device reverts back to normal filtering operation. A negative inc
2507  *	value is used to drop promiscuity on the device.
2508  */
2509 void dev_set_promiscuity(struct net_device *dev, int inc)
2510 {
2511 	unsigned short old_flags = dev->flags;
2512 
2513 	if ((dev->promiscuity += inc) == 0)
2514 		dev->flags &= ~IFF_PROMISC;
2515 	else
2516 		dev->flags |= IFF_PROMISC;
2517 	if (dev->flags != old_flags) {
2518 		dev_mc_upload(dev);
2519 		printk(KERN_INFO "device %s %s promiscuous mode\n",
2520 		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2521 							       "left");
2522 		audit_log(current->audit_context, GFP_ATOMIC,
2523 			AUDIT_ANOM_PROMISCUOUS,
2524 			"dev=%s prom=%d old_prom=%d auid=%u",
2525 			dev->name, (dev->flags & IFF_PROMISC),
2526 			(old_flags & IFF_PROMISC),
2527 			audit_get_loginuid(current->audit_context));
2528 	}
2529 }
2530 
2531 /**
2532  *	dev_set_allmulti	- update allmulti count on a device
2533  *	@dev: device
2534  *	@inc: modifier
2535  *
2536  *	Add or remove reception of all multicast frames to a device. While the
2537  *	count in the device remains above zero the interface remains listening
2538  *	to all interfaces. Once it hits zero the device reverts back to normal
2539  *	filtering operation. A negative @inc value is used to drop the counter
2540  *	when releasing a resource needing all multicasts.
2541  */
2542 
2543 void dev_set_allmulti(struct net_device *dev, int inc)
2544 {
2545 	unsigned short old_flags = dev->flags;
2546 
2547 	dev->flags |= IFF_ALLMULTI;
2548 	if ((dev->allmulti += inc) == 0)
2549 		dev->flags &= ~IFF_ALLMULTI;
2550 	if (dev->flags ^ old_flags)
2551 		dev_mc_upload(dev);
2552 }
2553 
2554 unsigned dev_get_flags(const struct net_device *dev)
2555 {
2556 	unsigned flags;
2557 
2558 	flags = (dev->flags & ~(IFF_PROMISC |
2559 				IFF_ALLMULTI |
2560 				IFF_RUNNING |
2561 				IFF_LOWER_UP |
2562 				IFF_DORMANT)) |
2563 		(dev->gflags & (IFF_PROMISC |
2564 				IFF_ALLMULTI));
2565 
2566 	if (netif_running(dev)) {
2567 		if (netif_oper_up(dev))
2568 			flags |= IFF_RUNNING;
2569 		if (netif_carrier_ok(dev))
2570 			flags |= IFF_LOWER_UP;
2571 		if (netif_dormant(dev))
2572 			flags |= IFF_DORMANT;
2573 	}
2574 
2575 	return flags;
2576 }
2577 
2578 int dev_change_flags(struct net_device *dev, unsigned flags)
2579 {
2580 	int ret, changes;
2581 	int old_flags = dev->flags;
2582 
2583 	/*
2584 	 *	Set the flags on our device.
2585 	 */
2586 
2587 	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
2588 			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
2589 			       IFF_AUTOMEDIA)) |
2590 		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
2591 				    IFF_ALLMULTI));
2592 
2593 	/*
2594 	 *	Load in the correct multicast list now the flags have changed.
2595 	 */
2596 
2597 	dev_mc_upload(dev);
2598 
2599 	/*
2600 	 *	Have we downed the interface. We handle IFF_UP ourselves
2601 	 *	according to user attempts to set it, rather than blindly
2602 	 *	setting it.
2603 	 */
2604 
2605 	ret = 0;
2606 	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
2607 		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
2608 
2609 		if (!ret)
2610 			dev_mc_upload(dev);
2611 	}
2612 
2613 	if (dev->flags & IFF_UP &&
2614 	    ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
2615 					  IFF_VOLATILE)))
2616 		raw_notifier_call_chain(&netdev_chain,
2617 				NETDEV_CHANGE, dev);
2618 
2619 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
2620 		int inc = (flags & IFF_PROMISC) ? +1 : -1;
2621 		dev->gflags ^= IFF_PROMISC;
2622 		dev_set_promiscuity(dev, inc);
2623 	}
2624 
2625 	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
2626 	   is important. Some (broken) drivers set IFF_PROMISC, when
2627 	   IFF_ALLMULTI is requested not asking us and not reporting.
2628 	 */
2629 	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
2630 		int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
2631 		dev->gflags ^= IFF_ALLMULTI;
2632 		dev_set_allmulti(dev, inc);
2633 	}
2634 
2635 	/* Exclude state transition flags, already notified */
2636 	changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
2637 	if (changes)
2638 		rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
2639 
2640 	return ret;
2641 }
2642 
2643 int dev_set_mtu(struct net_device *dev, int new_mtu)
2644 {
2645 	int err;
2646 
2647 	if (new_mtu == dev->mtu)
2648 		return 0;
2649 
2650 	/*	MTU must be positive.	 */
2651 	if (new_mtu < 0)
2652 		return -EINVAL;
2653 
2654 	if (!netif_device_present(dev))
2655 		return -ENODEV;
2656 
2657 	err = 0;
2658 	if (dev->change_mtu)
2659 		err = dev->change_mtu(dev, new_mtu);
2660 	else
2661 		dev->mtu = new_mtu;
2662 	if (!err && dev->flags & IFF_UP)
2663 		raw_notifier_call_chain(&netdev_chain,
2664 				NETDEV_CHANGEMTU, dev);
2665 	return err;
2666 }
2667 
2668 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
2669 {
2670 	int err;
2671 
2672 	if (!dev->set_mac_address)
2673 		return -EOPNOTSUPP;
2674 	if (sa->sa_family != dev->type)
2675 		return -EINVAL;
2676 	if (!netif_device_present(dev))
2677 		return -ENODEV;
2678 	err = dev->set_mac_address(dev, sa);
2679 	if (!err)
2680 		raw_notifier_call_chain(&netdev_chain,
2681 				NETDEV_CHANGEADDR, dev);
2682 	return err;
2683 }
2684 
2685 /*
2686  *	Perform the SIOCxIFxxx calls.
2687  */
2688 static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
2689 {
2690 	int err;
2691 	struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
2692 
2693 	if (!dev)
2694 		return -ENODEV;
2695 
2696 	switch (cmd) {
2697 		case SIOCGIFFLAGS:	/* Get interface flags */
2698 			ifr->ifr_flags = dev_get_flags(dev);
2699 			return 0;
2700 
2701 		case SIOCSIFFLAGS:	/* Set interface flags */
2702 			return dev_change_flags(dev, ifr->ifr_flags);
2703 
2704 		case SIOCGIFMETRIC:	/* Get the metric on the interface
2705 					   (currently unused) */
2706 			ifr->ifr_metric = 0;
2707 			return 0;
2708 
2709 		case SIOCSIFMETRIC:	/* Set the metric on the interface
2710 					   (currently unused) */
2711 			return -EOPNOTSUPP;
2712 
2713 		case SIOCGIFMTU:	/* Get the MTU of a device */
2714 			ifr->ifr_mtu = dev->mtu;
2715 			return 0;
2716 
2717 		case SIOCSIFMTU:	/* Set the MTU of a device */
2718 			return dev_set_mtu(dev, ifr->ifr_mtu);
2719 
2720 		case SIOCGIFHWADDR:
2721 			if (!dev->addr_len)
2722 				memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
2723 			else
2724 				memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
2725 				       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2726 			ifr->ifr_hwaddr.sa_family = dev->type;
2727 			return 0;
2728 
2729 		case SIOCSIFHWADDR:
2730 			return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
2731 
2732 		case SIOCSIFHWBROADCAST:
2733 			if (ifr->ifr_hwaddr.sa_family != dev->type)
2734 				return -EINVAL;
2735 			memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
2736 			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2737 			raw_notifier_call_chain(&netdev_chain,
2738 					    NETDEV_CHANGEADDR, dev);
2739 			return 0;
2740 
2741 		case SIOCGIFMAP:
2742 			ifr->ifr_map.mem_start = dev->mem_start;
2743 			ifr->ifr_map.mem_end   = dev->mem_end;
2744 			ifr->ifr_map.base_addr = dev->base_addr;
2745 			ifr->ifr_map.irq       = dev->irq;
2746 			ifr->ifr_map.dma       = dev->dma;
2747 			ifr->ifr_map.port      = dev->if_port;
2748 			return 0;
2749 
2750 		case SIOCSIFMAP:
2751 			if (dev->set_config) {
2752 				if (!netif_device_present(dev))
2753 					return -ENODEV;
2754 				return dev->set_config(dev, &ifr->ifr_map);
2755 			}
2756 			return -EOPNOTSUPP;
2757 
2758 		case SIOCADDMULTI:
2759 			if (!dev->set_multicast_list ||
2760 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2761 				return -EINVAL;
2762 			if (!netif_device_present(dev))
2763 				return -ENODEV;
2764 			return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
2765 					  dev->addr_len, 1);
2766 
2767 		case SIOCDELMULTI:
2768 			if (!dev->set_multicast_list ||
2769 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2770 				return -EINVAL;
2771 			if (!netif_device_present(dev))
2772 				return -ENODEV;
2773 			return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
2774 					     dev->addr_len, 1);
2775 
2776 		case SIOCGIFINDEX:
2777 			ifr->ifr_ifindex = dev->ifindex;
2778 			return 0;
2779 
2780 		case SIOCGIFTXQLEN:
2781 			ifr->ifr_qlen = dev->tx_queue_len;
2782 			return 0;
2783 
2784 		case SIOCSIFTXQLEN:
2785 			if (ifr->ifr_qlen < 0)
2786 				return -EINVAL;
2787 			dev->tx_queue_len = ifr->ifr_qlen;
2788 			return 0;
2789 
2790 		case SIOCSIFNAME:
2791 			ifr->ifr_newname[IFNAMSIZ-1] = '\0';
2792 			return dev_change_name(dev, ifr->ifr_newname);
2793 
2794 		/*
2795 		 *	Unknown or private ioctl
2796 		 */
2797 
2798 		default:
2799 			if ((cmd >= SIOCDEVPRIVATE &&
2800 			    cmd <= SIOCDEVPRIVATE + 15) ||
2801 			    cmd == SIOCBONDENSLAVE ||
2802 			    cmd == SIOCBONDRELEASE ||
2803 			    cmd == SIOCBONDSETHWADDR ||
2804 			    cmd == SIOCBONDSLAVEINFOQUERY ||
2805 			    cmd == SIOCBONDINFOQUERY ||
2806 			    cmd == SIOCBONDCHANGEACTIVE ||
2807 			    cmd == SIOCGMIIPHY ||
2808 			    cmd == SIOCGMIIREG ||
2809 			    cmd == SIOCSMIIREG ||
2810 			    cmd == SIOCBRADDIF ||
2811 			    cmd == SIOCBRDELIF ||
2812 			    cmd == SIOCWANDEV) {
2813 				err = -EOPNOTSUPP;
2814 				if (dev->do_ioctl) {
2815 					if (netif_device_present(dev))
2816 						err = dev->do_ioctl(dev, ifr,
2817 								    cmd);
2818 					else
2819 						err = -ENODEV;
2820 				}
2821 			} else
2822 				err = -EINVAL;
2823 
2824 	}
2825 	return err;
2826 }
2827 
2828 /*
2829  *	This function handles all "interface"-type I/O control requests. The actual
2830  *	'doing' part of this is dev_ifsioc above.
2831  */
2832 
2833 /**
2834  *	dev_ioctl	-	network device ioctl
2835  *	@cmd: command to issue
2836  *	@arg: pointer to a struct ifreq in user space
2837  *
2838  *	Issue ioctl functions to devices. This is normally called by the
2839  *	user space syscall interfaces but can sometimes be useful for
2840  *	other purposes. The return value is the return from the syscall if
2841  *	positive or a negative errno code on error.
2842  */
2843 
2844 int dev_ioctl(unsigned int cmd, void __user *arg)
2845 {
2846 	struct ifreq ifr;
2847 	int ret;
2848 	char *colon;
2849 
2850 	/* One special case: SIOCGIFCONF takes ifconf argument
2851 	   and requires shared lock, because it sleeps writing
2852 	   to user space.
2853 	 */
2854 
2855 	if (cmd == SIOCGIFCONF) {
2856 		rtnl_lock();
2857 		ret = dev_ifconf((char __user *) arg);
2858 		rtnl_unlock();
2859 		return ret;
2860 	}
2861 	if (cmd == SIOCGIFNAME)
2862 		return dev_ifname((struct ifreq __user *)arg);
2863 
2864 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2865 		return -EFAULT;
2866 
2867 	ifr.ifr_name[IFNAMSIZ-1] = 0;
2868 
2869 	colon = strchr(ifr.ifr_name, ':');
2870 	if (colon)
2871 		*colon = 0;
2872 
2873 	/*
2874 	 *	See which interface the caller is talking about.
2875 	 */
2876 
2877 	switch (cmd) {
2878 		/*
2879 		 *	These ioctl calls:
2880 		 *	- can be done by all.
2881 		 *	- atomic and do not require locking.
2882 		 *	- return a value
2883 		 */
2884 		case SIOCGIFFLAGS:
2885 		case SIOCGIFMETRIC:
2886 		case SIOCGIFMTU:
2887 		case SIOCGIFHWADDR:
2888 		case SIOCGIFSLAVE:
2889 		case SIOCGIFMAP:
2890 		case SIOCGIFINDEX:
2891 		case SIOCGIFTXQLEN:
2892 			dev_load(ifr.ifr_name);
2893 			read_lock(&dev_base_lock);
2894 			ret = dev_ifsioc(&ifr, cmd);
2895 			read_unlock(&dev_base_lock);
2896 			if (!ret) {
2897 				if (colon)
2898 					*colon = ':';
2899 				if (copy_to_user(arg, &ifr,
2900 						 sizeof(struct ifreq)))
2901 					ret = -EFAULT;
2902 			}
2903 			return ret;
2904 
2905 		case SIOCETHTOOL:
2906 			dev_load(ifr.ifr_name);
2907 			rtnl_lock();
2908 			ret = dev_ethtool(&ifr);
2909 			rtnl_unlock();
2910 			if (!ret) {
2911 				if (colon)
2912 					*colon = ':';
2913 				if (copy_to_user(arg, &ifr,
2914 						 sizeof(struct ifreq)))
2915 					ret = -EFAULT;
2916 			}
2917 			return ret;
2918 
2919 		/*
2920 		 *	These ioctl calls:
2921 		 *	- require superuser power.
2922 		 *	- require strict serialization.
2923 		 *	- return a value
2924 		 */
2925 		case SIOCGMIIPHY:
2926 		case SIOCGMIIREG:
2927 		case SIOCSIFNAME:
2928 			if (!capable(CAP_NET_ADMIN))
2929 				return -EPERM;
2930 			dev_load(ifr.ifr_name);
2931 			rtnl_lock();
2932 			ret = dev_ifsioc(&ifr, cmd);
2933 			rtnl_unlock();
2934 			if (!ret) {
2935 				if (colon)
2936 					*colon = ':';
2937 				if (copy_to_user(arg, &ifr,
2938 						 sizeof(struct ifreq)))
2939 					ret = -EFAULT;
2940 			}
2941 			return ret;
2942 
2943 		/*
2944 		 *	These ioctl calls:
2945 		 *	- require superuser power.
2946 		 *	- require strict serialization.
2947 		 *	- do not return a value
2948 		 */
2949 		case SIOCSIFFLAGS:
2950 		case SIOCSIFMETRIC:
2951 		case SIOCSIFMTU:
2952 		case SIOCSIFMAP:
2953 		case SIOCSIFHWADDR:
2954 		case SIOCSIFSLAVE:
2955 		case SIOCADDMULTI:
2956 		case SIOCDELMULTI:
2957 		case SIOCSIFHWBROADCAST:
2958 		case SIOCSIFTXQLEN:
2959 		case SIOCSMIIREG:
2960 		case SIOCBONDENSLAVE:
2961 		case SIOCBONDRELEASE:
2962 		case SIOCBONDSETHWADDR:
2963 		case SIOCBONDCHANGEACTIVE:
2964 		case SIOCBRADDIF:
2965 		case SIOCBRDELIF:
2966 			if (!capable(CAP_NET_ADMIN))
2967 				return -EPERM;
2968 			/* fall through */
2969 		case SIOCBONDSLAVEINFOQUERY:
2970 		case SIOCBONDINFOQUERY:
2971 			dev_load(ifr.ifr_name);
2972 			rtnl_lock();
2973 			ret = dev_ifsioc(&ifr, cmd);
2974 			rtnl_unlock();
2975 			return ret;
2976 
2977 		case SIOCGIFMEM:
2978 			/* Get the per device memory space. We can add this but
2979 			 * currently do not support it */
2980 		case SIOCSIFMEM:
2981 			/* Set the per device memory buffer space.
2982 			 * Not applicable in our case */
2983 		case SIOCSIFLINK:
2984 			return -EINVAL;
2985 
2986 		/*
2987 		 *	Unknown or private ioctl.
2988 		 */
2989 		default:
2990 			if (cmd == SIOCWANDEV ||
2991 			    (cmd >= SIOCDEVPRIVATE &&
2992 			     cmd <= SIOCDEVPRIVATE + 15)) {
2993 				dev_load(ifr.ifr_name);
2994 				rtnl_lock();
2995 				ret = dev_ifsioc(&ifr, cmd);
2996 				rtnl_unlock();
2997 				if (!ret && copy_to_user(arg, &ifr,
2998 							 sizeof(struct ifreq)))
2999 					ret = -EFAULT;
3000 				return ret;
3001 			}
3002 			/* Take care of Wireless Extensions */
3003 			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
3004 				return wext_handle_ioctl(&ifr, cmd, arg);
3005 			return -EINVAL;
3006 	}
3007 }
3008 
3009 
3010 /**
3011  *	dev_new_index	-	allocate an ifindex
3012  *
3013  *	Returns a suitable unique value for a new device interface
3014  *	number.  The caller must hold the rtnl semaphore or the
3015  *	dev_base_lock to be sure it remains unique.
3016  */
3017 static int dev_new_index(void)
3018 {
3019 	static int ifindex;
3020 	for (;;) {
3021 		if (++ifindex <= 0)
3022 			ifindex = 1;
3023 		if (!__dev_get_by_index(ifindex))
3024 			return ifindex;
3025 	}
3026 }
3027 
3028 static int dev_boot_phase = 1;
3029 
3030 /* Delayed registration/unregisteration */
3031 static DEFINE_SPINLOCK(net_todo_list_lock);
3032 static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
3033 
3034 static void net_set_todo(struct net_device *dev)
3035 {
3036 	spin_lock(&net_todo_list_lock);
3037 	list_add_tail(&dev->todo_list, &net_todo_list);
3038 	spin_unlock(&net_todo_list_lock);
3039 }
3040 
3041 /**
3042  *	register_netdevice	- register a network device
3043  *	@dev: device to register
3044  *
3045  *	Take a completed network device structure and add it to the kernel
3046  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3047  *	chain. 0 is returned on success. A negative errno code is returned
3048  *	on a failure to set up the device, or if the name is a duplicate.
3049  *
3050  *	Callers must hold the rtnl semaphore. You may want
3051  *	register_netdev() instead of this.
3052  *
3053  *	BUGS:
3054  *	The locking appears insufficient to guarantee two parallel registers
3055  *	will not get the same name.
3056  */
3057 
3058 int register_netdevice(struct net_device *dev)
3059 {
3060 	struct hlist_head *head;
3061 	struct hlist_node *p;
3062 	int ret;
3063 
3064 	BUG_ON(dev_boot_phase);
3065 	ASSERT_RTNL();
3066 
3067 	might_sleep();
3068 
3069 	/* When net_device's are persistent, this will be fatal. */
3070 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
3071 
3072 	spin_lock_init(&dev->queue_lock);
3073 	spin_lock_init(&dev->_xmit_lock);
3074 	netdev_set_lockdep_class(&dev->_xmit_lock, dev->type);
3075 	dev->xmit_lock_owner = -1;
3076 	spin_lock_init(&dev->ingress_lock);
3077 
3078 	dev->iflink = -1;
3079 
3080 	/* Init, if this function is available */
3081 	if (dev->init) {
3082 		ret = dev->init(dev);
3083 		if (ret) {
3084 			if (ret > 0)
3085 				ret = -EIO;
3086 			goto out;
3087 		}
3088 	}
3089 
3090 	if (!dev_valid_name(dev->name)) {
3091 		ret = -EINVAL;
3092 		goto out;
3093 	}
3094 
3095 	dev->ifindex = dev_new_index();
3096 	if (dev->iflink == -1)
3097 		dev->iflink = dev->ifindex;
3098 
3099 	/* Check for existence of name */
3100 	head = dev_name_hash(dev->name);
3101 	hlist_for_each(p, head) {
3102 		struct net_device *d
3103 			= hlist_entry(p, struct net_device, name_hlist);
3104 		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
3105 			ret = -EEXIST;
3106 			goto out;
3107 		}
3108 	}
3109 
3110 	/* Fix illegal SG+CSUM combinations. */
3111 	if ((dev->features & NETIF_F_SG) &&
3112 	    !(dev->features & NETIF_F_ALL_CSUM)) {
3113 		printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n",
3114 		       dev->name);
3115 		dev->features &= ~NETIF_F_SG;
3116 	}
3117 
3118 	/* TSO requires that SG is present as well. */
3119 	if ((dev->features & NETIF_F_TSO) &&
3120 	    !(dev->features & NETIF_F_SG)) {
3121 		printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n",
3122 		       dev->name);
3123 		dev->features &= ~NETIF_F_TSO;
3124 	}
3125 	if (dev->features & NETIF_F_UFO) {
3126 		if (!(dev->features & NETIF_F_HW_CSUM)) {
3127 			printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3128 					"NETIF_F_HW_CSUM feature.\n",
3129 							dev->name);
3130 			dev->features &= ~NETIF_F_UFO;
3131 		}
3132 		if (!(dev->features & NETIF_F_SG)) {
3133 			printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3134 					"NETIF_F_SG feature.\n",
3135 					dev->name);
3136 			dev->features &= ~NETIF_F_UFO;
3137 		}
3138 	}
3139 
3140 	/*
3141 	 *	nil rebuild_header routine,
3142 	 *	that should be never called and used as just bug trap.
3143 	 */
3144 
3145 	if (!dev->rebuild_header)
3146 		dev->rebuild_header = default_rebuild_header;
3147 
3148 	ret = netdev_register_sysfs(dev);
3149 	if (ret)
3150 		goto out;
3151 	dev->reg_state = NETREG_REGISTERED;
3152 
3153 	/*
3154 	 *	Default initial state at registry is that the
3155 	 *	device is present.
3156 	 */
3157 
3158 	set_bit(__LINK_STATE_PRESENT, &dev->state);
3159 
3160 	dev_init_scheduler(dev);
3161 	write_lock_bh(&dev_base_lock);
3162 	list_add_tail(&dev->dev_list, &dev_base_head);
3163 	hlist_add_head(&dev->name_hlist, head);
3164 	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
3165 	dev_hold(dev);
3166 	write_unlock_bh(&dev_base_lock);
3167 
3168 	/* Notify protocols, that a new device appeared. */
3169 	raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
3170 
3171 	ret = 0;
3172 
3173 out:
3174 	return ret;
3175 }
3176 
3177 /**
3178  *	register_netdev	- register a network device
3179  *	@dev: device to register
3180  *
3181  *	Take a completed network device structure and add it to the kernel
3182  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3183  *	chain. 0 is returned on success. A negative errno code is returned
3184  *	on a failure to set up the device, or if the name is a duplicate.
3185  *
3186  *	This is a wrapper around register_netdevice that takes the rtnl semaphore
3187  *	and expands the device name if you passed a format string to
3188  *	alloc_netdev.
3189  */
3190 int register_netdev(struct net_device *dev)
3191 {
3192 	int err;
3193 
3194 	rtnl_lock();
3195 
3196 	/*
3197 	 * If the name is a format string the caller wants us to do a
3198 	 * name allocation.
3199 	 */
3200 	if (strchr(dev->name, '%')) {
3201 		err = dev_alloc_name(dev, dev->name);
3202 		if (err < 0)
3203 			goto out;
3204 	}
3205 
3206 	err = register_netdevice(dev);
3207 out:
3208 	rtnl_unlock();
3209 	return err;
3210 }
3211 EXPORT_SYMBOL(register_netdev);
3212 
3213 /*
3214  * netdev_wait_allrefs - wait until all references are gone.
3215  *
3216  * This is called when unregistering network devices.
3217  *
3218  * Any protocol or device that holds a reference should register
3219  * for netdevice notification, and cleanup and put back the
3220  * reference if they receive an UNREGISTER event.
3221  * We can get stuck here if buggy protocols don't correctly
3222  * call dev_put.
3223  */
3224 static void netdev_wait_allrefs(struct net_device *dev)
3225 {
3226 	unsigned long rebroadcast_time, warning_time;
3227 
3228 	rebroadcast_time = warning_time = jiffies;
3229 	while (atomic_read(&dev->refcnt) != 0) {
3230 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
3231 			rtnl_lock();
3232 
3233 			/* Rebroadcast unregister notification */
3234 			raw_notifier_call_chain(&netdev_chain,
3235 					    NETDEV_UNREGISTER, dev);
3236 
3237 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
3238 				     &dev->state)) {
3239 				/* We must not have linkwatch events
3240 				 * pending on unregister. If this
3241 				 * happens, we simply run the queue
3242 				 * unscheduled, resulting in a noop
3243 				 * for this device.
3244 				 */
3245 				linkwatch_run_queue();
3246 			}
3247 
3248 			__rtnl_unlock();
3249 
3250 			rebroadcast_time = jiffies;
3251 		}
3252 
3253 		msleep(250);
3254 
3255 		if (time_after(jiffies, warning_time + 10 * HZ)) {
3256 			printk(KERN_EMERG "unregister_netdevice: "
3257 			       "waiting for %s to become free. Usage "
3258 			       "count = %d\n",
3259 			       dev->name, atomic_read(&dev->refcnt));
3260 			warning_time = jiffies;
3261 		}
3262 	}
3263 }
3264 
3265 /* The sequence is:
3266  *
3267  *	rtnl_lock();
3268  *	...
3269  *	register_netdevice(x1);
3270  *	register_netdevice(x2);
3271  *	...
3272  *	unregister_netdevice(y1);
3273  *	unregister_netdevice(y2);
3274  *      ...
3275  *	rtnl_unlock();
3276  *	free_netdev(y1);
3277  *	free_netdev(y2);
3278  *
3279  * We are invoked by rtnl_unlock() after it drops the semaphore.
3280  * This allows us to deal with problems:
3281  * 1) We can delete sysfs objects which invoke hotplug
3282  *    without deadlocking with linkwatch via keventd.
3283  * 2) Since we run with the RTNL semaphore not held, we can sleep
3284  *    safely in order to wait for the netdev refcnt to drop to zero.
3285  */
3286 static DEFINE_MUTEX(net_todo_run_mutex);
3287 void netdev_run_todo(void)
3288 {
3289 	struct list_head list;
3290 
3291 	/* Need to guard against multiple cpu's getting out of order. */
3292 	mutex_lock(&net_todo_run_mutex);
3293 
3294 	/* Not safe to do outside the semaphore.  We must not return
3295 	 * until all unregister events invoked by the local processor
3296 	 * have been completed (either by this todo run, or one on
3297 	 * another cpu).
3298 	 */
3299 	if (list_empty(&net_todo_list))
3300 		goto out;
3301 
3302 	/* Snapshot list, allow later requests */
3303 	spin_lock(&net_todo_list_lock);
3304 	list_replace_init(&net_todo_list, &list);
3305 	spin_unlock(&net_todo_list_lock);
3306 
3307 	while (!list_empty(&list)) {
3308 		struct net_device *dev
3309 			= list_entry(list.next, struct net_device, todo_list);
3310 		list_del(&dev->todo_list);
3311 
3312 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
3313 			printk(KERN_ERR "network todo '%s' but state %d\n",
3314 			       dev->name, dev->reg_state);
3315 			dump_stack();
3316 			continue;
3317 		}
3318 
3319 		dev->reg_state = NETREG_UNREGISTERED;
3320 
3321 		netdev_wait_allrefs(dev);
3322 
3323 		/* paranoia */
3324 		BUG_ON(atomic_read(&dev->refcnt));
3325 		BUG_TRAP(!dev->ip_ptr);
3326 		BUG_TRAP(!dev->ip6_ptr);
3327 		BUG_TRAP(!dev->dn_ptr);
3328 
3329 		if (dev->destructor)
3330 			dev->destructor(dev);
3331 
3332 		/* Free network device */
3333 		kobject_put(&dev->dev.kobj);
3334 	}
3335 
3336 out:
3337 	mutex_unlock(&net_todo_run_mutex);
3338 }
3339 
3340 static struct net_device_stats *internal_stats(struct net_device *dev)
3341 {
3342 	return &dev->stats;
3343 }
3344 
3345 /**
3346  *	alloc_netdev - allocate network device
3347  *	@sizeof_priv:	size of private data to allocate space for
3348  *	@name:		device name format string
3349  *	@setup:		callback to initialize device
3350  *
3351  *	Allocates a struct net_device with private data area for driver use
3352  *	and performs basic initialization.
3353  */
3354 struct net_device *alloc_netdev(int sizeof_priv, const char *name,
3355 		void (*setup)(struct net_device *))
3356 {
3357 	void *p;
3358 	struct net_device *dev;
3359 	int alloc_size;
3360 
3361 	BUG_ON(strlen(name) >= sizeof(dev->name));
3362 
3363 	/* ensure 32-byte alignment of both the device and private area */
3364 	alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
3365 	alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
3366 
3367 	p = kzalloc(alloc_size, GFP_KERNEL);
3368 	if (!p) {
3369 		printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
3370 		return NULL;
3371 	}
3372 
3373 	dev = (struct net_device *)
3374 		(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
3375 	dev->padded = (char *)dev - (char *)p;
3376 
3377 	if (sizeof_priv)
3378 		dev->priv = netdev_priv(dev);
3379 
3380 	dev->get_stats = internal_stats;
3381 	setup(dev);
3382 	strcpy(dev->name, name);
3383 	return dev;
3384 }
3385 EXPORT_SYMBOL(alloc_netdev);
3386 
3387 /**
3388  *	free_netdev - free network device
3389  *	@dev: device
3390  *
3391  *	This function does the last stage of destroying an allocated device
3392  * 	interface. The reference to the device object is released.
3393  *	If this is the last reference then it will be freed.
3394  */
3395 void free_netdev(struct net_device *dev)
3396 {
3397 #ifdef CONFIG_SYSFS
3398 	/*  Compatibility with error handling in drivers */
3399 	if (dev->reg_state == NETREG_UNINITIALIZED) {
3400 		kfree((char *)dev - dev->padded);
3401 		return;
3402 	}
3403 
3404 	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
3405 	dev->reg_state = NETREG_RELEASED;
3406 
3407 	/* will free via device release */
3408 	put_device(&dev->dev);
3409 #else
3410 	kfree((char *)dev - dev->padded);
3411 #endif
3412 }
3413 
3414 /* Synchronize with packet receive processing. */
3415 void synchronize_net(void)
3416 {
3417 	might_sleep();
3418 	synchronize_rcu();
3419 }
3420 
3421 /**
3422  *	unregister_netdevice - remove device from the kernel
3423  *	@dev: device
3424  *
3425  *	This function shuts down a device interface and removes it
3426  *	from the kernel tables. On success 0 is returned, on a failure
3427  *	a negative errno code is returned.
3428  *
3429  *	Callers must hold the rtnl semaphore.  You may want
3430  *	unregister_netdev() instead of this.
3431  */
3432 
3433 void unregister_netdevice(struct net_device *dev)
3434 {
3435 	BUG_ON(dev_boot_phase);
3436 	ASSERT_RTNL();
3437 
3438 	/* Some devices call without registering for initialization unwind. */
3439 	if (dev->reg_state == NETREG_UNINITIALIZED) {
3440 		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3441 				  "was registered\n", dev->name, dev);
3442 
3443 		WARN_ON(1);
3444 		return;
3445 	}
3446 
3447 	BUG_ON(dev->reg_state != NETREG_REGISTERED);
3448 
3449 	/* If device is running, close it first. */
3450 	if (dev->flags & IFF_UP)
3451 		dev_close(dev);
3452 
3453 	/* And unlink it from device chain. */
3454 	write_lock_bh(&dev_base_lock);
3455 	list_del(&dev->dev_list);
3456 	hlist_del(&dev->name_hlist);
3457 	hlist_del(&dev->index_hlist);
3458 	write_unlock_bh(&dev_base_lock);
3459 
3460 	dev->reg_state = NETREG_UNREGISTERING;
3461 
3462 	synchronize_net();
3463 
3464 	/* Shutdown queueing discipline. */
3465 	dev_shutdown(dev);
3466 
3467 
3468 	/* Notify protocols, that we are about to destroy
3469 	   this device. They should clean all the things.
3470 	*/
3471 	raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
3472 
3473 	/*
3474 	 *	Flush the multicast chain
3475 	 */
3476 	dev_mc_discard(dev);
3477 
3478 	if (dev->uninit)
3479 		dev->uninit(dev);
3480 
3481 	/* Notifier chain MUST detach us from master device. */
3482 	BUG_TRAP(!dev->master);
3483 
3484 	/* Remove entries from sysfs */
3485 	netdev_unregister_sysfs(dev);
3486 
3487 	/* Finish processing unregister after unlock */
3488 	net_set_todo(dev);
3489 
3490 	synchronize_net();
3491 
3492 	dev_put(dev);
3493 }
3494 
3495 /**
3496  *	unregister_netdev - remove device from the kernel
3497  *	@dev: device
3498  *
3499  *	This function shuts down a device interface and removes it
3500  *	from the kernel tables. On success 0 is returned, on a failure
3501  *	a negative errno code is returned.
3502  *
3503  *	This is just a wrapper for unregister_netdevice that takes
3504  *	the rtnl semaphore.  In general you want to use this and not
3505  *	unregister_netdevice.
3506  */
3507 void unregister_netdev(struct net_device *dev)
3508 {
3509 	rtnl_lock();
3510 	unregister_netdevice(dev);
3511 	rtnl_unlock();
3512 }
3513 
3514 EXPORT_SYMBOL(unregister_netdev);
3515 
3516 static int dev_cpu_callback(struct notifier_block *nfb,
3517 			    unsigned long action,
3518 			    void *ocpu)
3519 {
3520 	struct sk_buff **list_skb;
3521 	struct net_device **list_net;
3522 	struct sk_buff *skb;
3523 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
3524 	struct softnet_data *sd, *oldsd;
3525 
3526 	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
3527 		return NOTIFY_OK;
3528 
3529 	local_irq_disable();
3530 	cpu = smp_processor_id();
3531 	sd = &per_cpu(softnet_data, cpu);
3532 	oldsd = &per_cpu(softnet_data, oldcpu);
3533 
3534 	/* Find end of our completion_queue. */
3535 	list_skb = &sd->completion_queue;
3536 	while (*list_skb)
3537 		list_skb = &(*list_skb)->next;
3538 	/* Append completion queue from offline CPU. */
3539 	*list_skb = oldsd->completion_queue;
3540 	oldsd->completion_queue = NULL;
3541 
3542 	/* Find end of our output_queue. */
3543 	list_net = &sd->output_queue;
3544 	while (*list_net)
3545 		list_net = &(*list_net)->next_sched;
3546 	/* Append output queue from offline CPU. */
3547 	*list_net = oldsd->output_queue;
3548 	oldsd->output_queue = NULL;
3549 
3550 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
3551 	local_irq_enable();
3552 
3553 	/* Process offline CPU's input_pkt_queue */
3554 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
3555 		netif_rx(skb);
3556 
3557 	return NOTIFY_OK;
3558 }
3559 
3560 #ifdef CONFIG_NET_DMA
3561 /**
3562  * net_dma_rebalance -
3563  * This is called when the number of channels allocated to the net_dma_client
3564  * changes.  The net_dma_client tries to have one DMA channel per CPU.
3565  */
3566 static void net_dma_rebalance(void)
3567 {
3568 	unsigned int cpu, i, n;
3569 	struct dma_chan *chan;
3570 
3571 	if (net_dma_count == 0) {
3572 		for_each_online_cpu(cpu)
3573 			rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
3574 		return;
3575 	}
3576 
3577 	i = 0;
3578 	cpu = first_cpu(cpu_online_map);
3579 
3580 	rcu_read_lock();
3581 	list_for_each_entry(chan, &net_dma_client->channels, client_node) {
3582 		n = ((num_online_cpus() / net_dma_count)
3583 		   + (i < (num_online_cpus() % net_dma_count) ? 1 : 0));
3584 
3585 		while(n) {
3586 			per_cpu(softnet_data, cpu).net_dma = chan;
3587 			cpu = next_cpu(cpu, cpu_online_map);
3588 			n--;
3589 		}
3590 		i++;
3591 	}
3592 	rcu_read_unlock();
3593 }
3594 
3595 /**
3596  * netdev_dma_event - event callback for the net_dma_client
3597  * @client: should always be net_dma_client
3598  * @chan: DMA channel for the event
3599  * @event: event type
3600  */
3601 static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
3602 	enum dma_event event)
3603 {
3604 	spin_lock(&net_dma_event_lock);
3605 	switch (event) {
3606 	case DMA_RESOURCE_ADDED:
3607 		net_dma_count++;
3608 		net_dma_rebalance();
3609 		break;
3610 	case DMA_RESOURCE_REMOVED:
3611 		net_dma_count--;
3612 		net_dma_rebalance();
3613 		break;
3614 	default:
3615 		break;
3616 	}
3617 	spin_unlock(&net_dma_event_lock);
3618 }
3619 
3620 /**
3621  * netdev_dma_regiser - register the networking subsystem as a DMA client
3622  */
3623 static int __init netdev_dma_register(void)
3624 {
3625 	spin_lock_init(&net_dma_event_lock);
3626 	net_dma_client = dma_async_client_register(netdev_dma_event);
3627 	if (net_dma_client == NULL)
3628 		return -ENOMEM;
3629 
3630 	dma_async_client_chan_request(net_dma_client, num_online_cpus());
3631 	return 0;
3632 }
3633 
3634 #else
3635 static int __init netdev_dma_register(void) { return -ENODEV; }
3636 #endif /* CONFIG_NET_DMA */
3637 
3638 /*
3639  *	Initialize the DEV module. At boot time this walks the device list and
3640  *	unhooks any devices that fail to initialise (normally hardware not
3641  *	present) and leaves us with a valid list of present and active devices.
3642  *
3643  */
3644 
3645 /*
3646  *       This is called single threaded during boot, so no need
3647  *       to take the rtnl semaphore.
3648  */
3649 static int __init net_dev_init(void)
3650 {
3651 	int i, rc = -ENOMEM;
3652 
3653 	BUG_ON(!dev_boot_phase);
3654 
3655 	if (dev_proc_init())
3656 		goto out;
3657 
3658 	if (netdev_sysfs_init())
3659 		goto out;
3660 
3661 	INIT_LIST_HEAD(&ptype_all);
3662 	for (i = 0; i < 16; i++)
3663 		INIT_LIST_HEAD(&ptype_base[i]);
3664 
3665 	for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
3666 		INIT_HLIST_HEAD(&dev_name_head[i]);
3667 
3668 	for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
3669 		INIT_HLIST_HEAD(&dev_index_head[i]);
3670 
3671 	/*
3672 	 *	Initialise the packet receive queues.
3673 	 */
3674 
3675 	for_each_possible_cpu(i) {
3676 		struct softnet_data *queue;
3677 
3678 		queue = &per_cpu(softnet_data, i);
3679 		skb_queue_head_init(&queue->input_pkt_queue);
3680 		queue->completion_queue = NULL;
3681 		INIT_LIST_HEAD(&queue->poll_list);
3682 		set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
3683 		queue->backlog_dev.weight = weight_p;
3684 		queue->backlog_dev.poll = process_backlog;
3685 		atomic_set(&queue->backlog_dev.refcnt, 1);
3686 	}
3687 
3688 	netdev_dma_register();
3689 
3690 	dev_boot_phase = 0;
3691 
3692 	open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
3693 	open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
3694 
3695 	hotcpu_notifier(dev_cpu_callback, 0);
3696 	dst_init();
3697 	dev_mcast_init();
3698 	rc = 0;
3699 out:
3700 	return rc;
3701 }
3702 
3703 subsys_initcall(net_dev_init);
3704 
3705 EXPORT_SYMBOL(__dev_get_by_index);
3706 EXPORT_SYMBOL(__dev_get_by_name);
3707 EXPORT_SYMBOL(__dev_remove_pack);
3708 EXPORT_SYMBOL(dev_valid_name);
3709 EXPORT_SYMBOL(dev_add_pack);
3710 EXPORT_SYMBOL(dev_alloc_name);
3711 EXPORT_SYMBOL(dev_close);
3712 EXPORT_SYMBOL(dev_get_by_flags);
3713 EXPORT_SYMBOL(dev_get_by_index);
3714 EXPORT_SYMBOL(dev_get_by_name);
3715 EXPORT_SYMBOL(dev_open);
3716 EXPORT_SYMBOL(dev_queue_xmit);
3717 EXPORT_SYMBOL(dev_remove_pack);
3718 EXPORT_SYMBOL(dev_set_allmulti);
3719 EXPORT_SYMBOL(dev_set_promiscuity);
3720 EXPORT_SYMBOL(dev_change_flags);
3721 EXPORT_SYMBOL(dev_set_mtu);
3722 EXPORT_SYMBOL(dev_set_mac_address);
3723 EXPORT_SYMBOL(free_netdev);
3724 EXPORT_SYMBOL(netdev_boot_setup_check);
3725 EXPORT_SYMBOL(netdev_set_master);
3726 EXPORT_SYMBOL(netdev_state_change);
3727 EXPORT_SYMBOL(netif_receive_skb);
3728 EXPORT_SYMBOL(netif_rx);
3729 EXPORT_SYMBOL(register_gifconf);
3730 EXPORT_SYMBOL(register_netdevice);
3731 EXPORT_SYMBOL(register_netdevice_notifier);
3732 EXPORT_SYMBOL(skb_checksum_help);
3733 EXPORT_SYMBOL(synchronize_net);
3734 EXPORT_SYMBOL(unregister_netdevice);
3735 EXPORT_SYMBOL(unregister_netdevice_notifier);
3736 EXPORT_SYMBOL(net_enable_timestamp);
3737 EXPORT_SYMBOL(net_disable_timestamp);
3738 EXPORT_SYMBOL(dev_get_flags);
3739 
3740 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
3741 EXPORT_SYMBOL(br_handle_frame_hook);
3742 EXPORT_SYMBOL(br_fdb_get_hook);
3743 EXPORT_SYMBOL(br_fdb_put_hook);
3744 #endif
3745 
3746 #ifdef CONFIG_KMOD
3747 EXPORT_SYMBOL(dev_load);
3748 #endif
3749 
3750 EXPORT_PER_CPU_SYMBOL(softnet_data);
3751