xref: /linux/net/core/dev.c (revision 36ca1195ad7f760a6af3814cb002bd3a3d4b4db1)
1 /*
2  * 	NET3	Protocol independent device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the non IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Florian la Roche <rzsfl@rz.uni-sb.de>
16  *		Alan Cox <gw4pts@gw4pts.ampr.org>
17  *		David Hinds <dahinds@users.sourceforge.net>
18  *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19  *		Adam Sulmicki <adam@cfar.umd.edu>
20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
21  *
22  *	Changes:
23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
24  *              			to 2 if register_netdev gets called
25  *              			before net_dev_init & also removed a
26  *              			few lines of code in the process.
27  *		Alan Cox	:	device private ioctl copies fields back.
28  *		Alan Cox	:	Transmit queue code does relevant
29  *					stunts to keep the queue safe.
30  *		Alan Cox	:	Fixed double lock.
31  *		Alan Cox	:	Fixed promisc NULL pointer trap
32  *		????????	:	Support the full private ioctl range
33  *		Alan Cox	:	Moved ioctl permission check into
34  *					drivers
35  *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
36  *		Alan Cox	:	100 backlog just doesn't cut it when
37  *					you start doing multicast video 8)
38  *		Alan Cox	:	Rewrote net_bh and list manager.
39  *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
40  *		Alan Cox	:	Took out transmit every packet pass
41  *					Saved a few bytes in the ioctl handler
42  *		Alan Cox	:	Network driver sets packet type before
43  *					calling netif_rx. Saves a function
44  *					call a packet.
45  *		Alan Cox	:	Hashed net_bh()
46  *		Richard Kooijman:	Timestamp fixes.
47  *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
48  *		Alan Cox	:	Device lock protection.
49  *		Alan Cox	: 	Fixed nasty side effect of device close
50  *					changes.
51  *		Rudi Cilibrasi	:	Pass the right thing to
52  *					set_mac_address()
53  *		Dave Miller	:	32bit quantity for the device lock to
54  *					make it work out on a Sparc.
55  *		Bjorn Ekwall	:	Added KERNELD hack.
56  *		Alan Cox	:	Cleaned up the backlog initialise.
57  *		Craig Metz	:	SIOCGIFCONF fix if space for under
58  *					1 device.
59  *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
60  *					is no device open function.
61  *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
62  *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
63  *		Cyrus Durgin	:	Cleaned for KMOD
64  *		Adam Sulmicki   :	Bug Fix : Network Device Unload
65  *					A network device unload needs to purge
66  *					the backlog queue.
67  *	Paul Rusty Russell	:	SIOCSIFNAME
68  *              Pekka Riikonen  :	Netdev boot-time settings code
69  *              Andrew Morton   :       Make unregister_netdevice wait
70  *              			indefinitely on dev->refcnt
71  * 		J Hadi Salim	:	- Backlog queue sampling
72  *				        - netif_rx() feedback
73  */
74 
75 #include <asm/uaccess.h>
76 #include <asm/system.h>
77 #include <linux/bitops.h>
78 #include <linux/config.h>
79 #include <linux/cpu.h>
80 #include <linux/types.h>
81 #include <linux/kernel.h>
82 #include <linux/sched.h>
83 #include <linux/string.h>
84 #include <linux/mm.h>
85 #include <linux/socket.h>
86 #include <linux/sockios.h>
87 #include <linux/errno.h>
88 #include <linux/interrupt.h>
89 #include <linux/if_ether.h>
90 #include <linux/netdevice.h>
91 #include <linux/etherdevice.h>
92 #include <linux/notifier.h>
93 #include <linux/skbuff.h>
94 #include <net/sock.h>
95 #include <linux/rtnetlink.h>
96 #include <linux/proc_fs.h>
97 #include <linux/seq_file.h>
98 #include <linux/stat.h>
99 #include <linux/if_bridge.h>
100 #include <linux/divert.h>
101 #include <net/dst.h>
102 #include <net/pkt_sched.h>
103 #include <net/checksum.h>
104 #include <linux/highmem.h>
105 #include <linux/init.h>
106 #include <linux/kmod.h>
107 #include <linux/module.h>
108 #include <linux/kallsyms.h>
109 #include <linux/netpoll.h>
110 #include <linux/rcupdate.h>
111 #include <linux/delay.h>
112 #ifdef CONFIG_NET_RADIO
113 #include <linux/wireless.h>		/* Note : will define WIRELESS_EXT */
114 #include <net/iw_handler.h>
115 #endif	/* CONFIG_NET_RADIO */
116 #include <asm/current.h>
117 
118 /* This define, if set, will randomly drop a packet when congestion
119  * is more than moderate.  It helps fairness in the multi-interface
120  * case when one of them is a hog, but it kills performance for the
121  * single interface case so it is off now by default.
122  */
123 #undef RAND_LIE
124 
125 /* Setting this will sample the queue lengths and thus congestion
126  * via a timer instead of as each packet is received.
127  */
128 #undef OFFLINE_SAMPLE
129 
130 /*
131  *	The list of packet types we will receive (as opposed to discard)
132  *	and the routines to invoke.
133  *
134  *	Why 16. Because with 16 the only overlap we get on a hash of the
135  *	low nibble of the protocol value is RARP/SNAP/X.25.
136  *
137  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
138  *             sure which should go first, but I bet it won't make much
139  *             difference if we are running VLANs.  The good news is that
140  *             this protocol won't be in the list unless compiled in, so
141  *             the average user (w/out VLANs) will not be adversly affected.
142  *             --BLG
143  *
144  *		0800	IP
145  *		8100    802.1Q VLAN
146  *		0001	802.3
147  *		0002	AX.25
148  *		0004	802.2
149  *		8035	RARP
150  *		0005	SNAP
151  *		0805	X.25
152  *		0806	ARP
153  *		8137	IPX
154  *		0009	Localtalk
155  *		86DD	IPv6
156  */
157 
158 static DEFINE_SPINLOCK(ptype_lock);
159 static struct list_head ptype_base[16];	/* 16 way hashed list */
160 static struct list_head ptype_all;		/* Taps */
161 
162 #ifdef OFFLINE_SAMPLE
163 static void sample_queue(unsigned long dummy);
164 static struct timer_list samp_timer = TIMER_INITIALIZER(sample_queue, 0, 0);
165 #endif
166 
167 /*
168  * The @dev_base list is protected by @dev_base_lock and the rtln
169  * semaphore.
170  *
171  * Pure readers hold dev_base_lock for reading.
172  *
173  * Writers must hold the rtnl semaphore while they loop through the
174  * dev_base list, and hold dev_base_lock for writing when they do the
175  * actual updates.  This allows pure readers to access the list even
176  * while a writer is preparing to update it.
177  *
178  * To put it another way, dev_base_lock is held for writing only to
179  * protect against pure readers; the rtnl semaphore provides the
180  * protection against other writers.
181  *
182  * See, for example usages, register_netdevice() and
183  * unregister_netdevice(), which must be called with the rtnl
184  * semaphore held.
185  */
186 struct net_device *dev_base;
187 static struct net_device **dev_tail = &dev_base;
188 DEFINE_RWLOCK(dev_base_lock);
189 
190 EXPORT_SYMBOL(dev_base);
191 EXPORT_SYMBOL(dev_base_lock);
192 
193 #define NETDEV_HASHBITS	8
194 static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
195 static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
196 
197 static inline struct hlist_head *dev_name_hash(const char *name)
198 {
199 	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
200 	return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
201 }
202 
203 static inline struct hlist_head *dev_index_hash(int ifindex)
204 {
205 	return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
206 }
207 
208 /*
209  *	Our notifier list
210  */
211 
212 static struct notifier_block *netdev_chain;
213 
214 /*
215  *	Device drivers call our routines to queue packets here. We empty the
216  *	queue in the local softnet handler.
217  */
218 DEFINE_PER_CPU(struct softnet_data, softnet_data) = { 0, };
219 
220 #ifdef CONFIG_SYSFS
221 extern int netdev_sysfs_init(void);
222 extern int netdev_register_sysfs(struct net_device *);
223 extern void netdev_unregister_sysfs(struct net_device *);
224 #else
225 #define netdev_sysfs_init()	 	(0)
226 #define netdev_register_sysfs(dev)	(0)
227 #define	netdev_unregister_sysfs(dev)	do { } while(0)
228 #endif
229 
230 
231 /*******************************************************************************
232 
233 		Protocol management and registration routines
234 
235 *******************************************************************************/
236 
237 /*
238  *	For efficiency
239  */
240 
241 int netdev_nit;
242 
243 /*
244  *	Add a protocol ID to the list. Now that the input handler is
245  *	smarter we can dispense with all the messy stuff that used to be
246  *	here.
247  *
248  *	BEWARE!!! Protocol handlers, mangling input packets,
249  *	MUST BE last in hash buckets and checking protocol handlers
250  *	MUST start from promiscuous ptype_all chain in net_bh.
251  *	It is true now, do not change it.
252  *	Explanation follows: if protocol handler, mangling packet, will
253  *	be the first on list, it is not able to sense, that packet
254  *	is cloned and should be copied-on-write, so that it will
255  *	change it and subsequent readers will get broken packet.
256  *							--ANK (980803)
257  */
258 
259 /**
260  *	dev_add_pack - add packet handler
261  *	@pt: packet type declaration
262  *
263  *	Add a protocol handler to the networking stack. The passed &packet_type
264  *	is linked into kernel lists and may not be freed until it has been
265  *	removed from the kernel lists.
266  *
267  *	This call does not sleep therefore it can not
268  *	guarantee all CPU's that are in middle of receiving packets
269  *	will see the new packet type (until the next received packet).
270  */
271 
272 void dev_add_pack(struct packet_type *pt)
273 {
274 	int hash;
275 
276 	spin_lock_bh(&ptype_lock);
277 	if (pt->type == htons(ETH_P_ALL)) {
278 		netdev_nit++;
279 		list_add_rcu(&pt->list, &ptype_all);
280 	} else {
281 		hash = ntohs(pt->type) & 15;
282 		list_add_rcu(&pt->list, &ptype_base[hash]);
283 	}
284 	spin_unlock_bh(&ptype_lock);
285 }
286 
287 extern void linkwatch_run_queue(void);
288 
289 
290 
291 /**
292  *	__dev_remove_pack	 - remove packet handler
293  *	@pt: packet type declaration
294  *
295  *	Remove a protocol handler that was previously added to the kernel
296  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
297  *	from the kernel lists and can be freed or reused once this function
298  *	returns.
299  *
300  *      The packet type might still be in use by receivers
301  *	and must not be freed until after all the CPU's have gone
302  *	through a quiescent state.
303  */
304 void __dev_remove_pack(struct packet_type *pt)
305 {
306 	struct list_head *head;
307 	struct packet_type *pt1;
308 
309 	spin_lock_bh(&ptype_lock);
310 
311 	if (pt->type == htons(ETH_P_ALL)) {
312 		netdev_nit--;
313 		head = &ptype_all;
314 	} else
315 		head = &ptype_base[ntohs(pt->type) & 15];
316 
317 	list_for_each_entry(pt1, head, list) {
318 		if (pt == pt1) {
319 			list_del_rcu(&pt->list);
320 			goto out;
321 		}
322 	}
323 
324 	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
325 out:
326 	spin_unlock_bh(&ptype_lock);
327 }
328 /**
329  *	dev_remove_pack	 - remove packet handler
330  *	@pt: packet type declaration
331  *
332  *	Remove a protocol handler that was previously added to the kernel
333  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
334  *	from the kernel lists and can be freed or reused once this function
335  *	returns.
336  *
337  *	This call sleeps to guarantee that no CPU is looking at the packet
338  *	type after return.
339  */
340 void dev_remove_pack(struct packet_type *pt)
341 {
342 	__dev_remove_pack(pt);
343 
344 	synchronize_net();
345 }
346 
347 /******************************************************************************
348 
349 		      Device Boot-time Settings Routines
350 
351 *******************************************************************************/
352 
353 /* Boot time configuration table */
354 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
355 
356 /**
357  *	netdev_boot_setup_add	- add new setup entry
358  *	@name: name of the device
359  *	@map: configured settings for the device
360  *
361  *	Adds new setup entry to the dev_boot_setup list.  The function
362  *	returns 0 on error and 1 on success.  This is a generic routine to
363  *	all netdevices.
364  */
365 static int netdev_boot_setup_add(char *name, struct ifmap *map)
366 {
367 	struct netdev_boot_setup *s;
368 	int i;
369 
370 	s = dev_boot_setup;
371 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
372 		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
373 			memset(s[i].name, 0, sizeof(s[i].name));
374 			strcpy(s[i].name, name);
375 			memcpy(&s[i].map, map, sizeof(s[i].map));
376 			break;
377 		}
378 	}
379 
380 	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
381 }
382 
383 /**
384  *	netdev_boot_setup_check	- check boot time settings
385  *	@dev: the netdevice
386  *
387  * 	Check boot time settings for the device.
388  *	The found settings are set for the device to be used
389  *	later in the device probing.
390  *	Returns 0 if no settings found, 1 if they are.
391  */
392 int netdev_boot_setup_check(struct net_device *dev)
393 {
394 	struct netdev_boot_setup *s = dev_boot_setup;
395 	int i;
396 
397 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
398 		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
399 		    !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
400 			dev->irq 	= s[i].map.irq;
401 			dev->base_addr 	= s[i].map.base_addr;
402 			dev->mem_start 	= s[i].map.mem_start;
403 			dev->mem_end 	= s[i].map.mem_end;
404 			return 1;
405 		}
406 	}
407 	return 0;
408 }
409 
410 
411 /**
412  *	netdev_boot_base	- get address from boot time settings
413  *	@prefix: prefix for network device
414  *	@unit: id for network device
415  *
416  * 	Check boot time settings for the base address of device.
417  *	The found settings are set for the device to be used
418  *	later in the device probing.
419  *	Returns 0 if no settings found.
420  */
421 unsigned long netdev_boot_base(const char *prefix, int unit)
422 {
423 	const struct netdev_boot_setup *s = dev_boot_setup;
424 	char name[IFNAMSIZ];
425 	int i;
426 
427 	sprintf(name, "%s%d", prefix, unit);
428 
429 	/*
430 	 * If device already registered then return base of 1
431 	 * to indicate not to probe for this interface
432 	 */
433 	if (__dev_get_by_name(name))
434 		return 1;
435 
436 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
437 		if (!strcmp(name, s[i].name))
438 			return s[i].map.base_addr;
439 	return 0;
440 }
441 
442 /*
443  * Saves at boot time configured settings for any netdevice.
444  */
445 int __init netdev_boot_setup(char *str)
446 {
447 	int ints[5];
448 	struct ifmap map;
449 
450 	str = get_options(str, ARRAY_SIZE(ints), ints);
451 	if (!str || !*str)
452 		return 0;
453 
454 	/* Save settings */
455 	memset(&map, 0, sizeof(map));
456 	if (ints[0] > 0)
457 		map.irq = ints[1];
458 	if (ints[0] > 1)
459 		map.base_addr = ints[2];
460 	if (ints[0] > 2)
461 		map.mem_start = ints[3];
462 	if (ints[0] > 3)
463 		map.mem_end = ints[4];
464 
465 	/* Add new entry to the list */
466 	return netdev_boot_setup_add(str, &map);
467 }
468 
469 __setup("netdev=", netdev_boot_setup);
470 
471 /*******************************************************************************
472 
473 			    Device Interface Subroutines
474 
475 *******************************************************************************/
476 
477 /**
478  *	__dev_get_by_name	- find a device by its name
479  *	@name: name to find
480  *
481  *	Find an interface by name. Must be called under RTNL semaphore
482  *	or @dev_base_lock. If the name is found a pointer to the device
483  *	is returned. If the name is not found then %NULL is returned. The
484  *	reference counters are not incremented so the caller must be
485  *	careful with locks.
486  */
487 
488 struct net_device *__dev_get_by_name(const char *name)
489 {
490 	struct hlist_node *p;
491 
492 	hlist_for_each(p, dev_name_hash(name)) {
493 		struct net_device *dev
494 			= hlist_entry(p, struct net_device, name_hlist);
495 		if (!strncmp(dev->name, name, IFNAMSIZ))
496 			return dev;
497 	}
498 	return NULL;
499 }
500 
501 /**
502  *	dev_get_by_name		- find a device by its name
503  *	@name: name to find
504  *
505  *	Find an interface by name. This can be called from any
506  *	context and does its own locking. The returned handle has
507  *	the usage count incremented and the caller must use dev_put() to
508  *	release it when it is no longer needed. %NULL is returned if no
509  *	matching device is found.
510  */
511 
512 struct net_device *dev_get_by_name(const char *name)
513 {
514 	struct net_device *dev;
515 
516 	read_lock(&dev_base_lock);
517 	dev = __dev_get_by_name(name);
518 	if (dev)
519 		dev_hold(dev);
520 	read_unlock(&dev_base_lock);
521 	return dev;
522 }
523 
524 /**
525  *	__dev_get_by_index - find a device by its ifindex
526  *	@ifindex: index of device
527  *
528  *	Search for an interface by index. Returns %NULL if the device
529  *	is not found or a pointer to the device. The device has not
530  *	had its reference counter increased so the caller must be careful
531  *	about locking. The caller must hold either the RTNL semaphore
532  *	or @dev_base_lock.
533  */
534 
535 struct net_device *__dev_get_by_index(int ifindex)
536 {
537 	struct hlist_node *p;
538 
539 	hlist_for_each(p, dev_index_hash(ifindex)) {
540 		struct net_device *dev
541 			= hlist_entry(p, struct net_device, index_hlist);
542 		if (dev->ifindex == ifindex)
543 			return dev;
544 	}
545 	return NULL;
546 }
547 
548 
549 /**
550  *	dev_get_by_index - find a device by its ifindex
551  *	@ifindex: index of device
552  *
553  *	Search for an interface by index. Returns NULL if the device
554  *	is not found or a pointer to the device. The device returned has
555  *	had a reference added and the pointer is safe until the user calls
556  *	dev_put to indicate they have finished with it.
557  */
558 
559 struct net_device *dev_get_by_index(int ifindex)
560 {
561 	struct net_device *dev;
562 
563 	read_lock(&dev_base_lock);
564 	dev = __dev_get_by_index(ifindex);
565 	if (dev)
566 		dev_hold(dev);
567 	read_unlock(&dev_base_lock);
568 	return dev;
569 }
570 
571 /**
572  *	dev_getbyhwaddr - find a device by its hardware address
573  *	@type: media type of device
574  *	@ha: hardware address
575  *
576  *	Search for an interface by MAC address. Returns NULL if the device
577  *	is not found or a pointer to the device. The caller must hold the
578  *	rtnl semaphore. The returned device has not had its ref count increased
579  *	and the caller must therefore be careful about locking
580  *
581  *	BUGS:
582  *	If the API was consistent this would be __dev_get_by_hwaddr
583  */
584 
585 struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
586 {
587 	struct net_device *dev;
588 
589 	ASSERT_RTNL();
590 
591 	for (dev = dev_base; dev; dev = dev->next)
592 		if (dev->type == type &&
593 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
594 			break;
595 	return dev;
596 }
597 
598 struct net_device *dev_getfirstbyhwtype(unsigned short type)
599 {
600 	struct net_device *dev;
601 
602 	rtnl_lock();
603 	for (dev = dev_base; dev; dev = dev->next) {
604 		if (dev->type == type) {
605 			dev_hold(dev);
606 			break;
607 		}
608 	}
609 	rtnl_unlock();
610 	return dev;
611 }
612 
613 EXPORT_SYMBOL(dev_getfirstbyhwtype);
614 
615 /**
616  *	dev_get_by_flags - find any device with given flags
617  *	@if_flags: IFF_* values
618  *	@mask: bitmask of bits in if_flags to check
619  *
620  *	Search for any interface with the given flags. Returns NULL if a device
621  *	is not found or a pointer to the device. The device returned has
622  *	had a reference added and the pointer is safe until the user calls
623  *	dev_put to indicate they have finished with it.
624  */
625 
626 struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
627 {
628 	struct net_device *dev;
629 
630 	read_lock(&dev_base_lock);
631 	for (dev = dev_base; dev != NULL; dev = dev->next) {
632 		if (((dev->flags ^ if_flags) & mask) == 0) {
633 			dev_hold(dev);
634 			break;
635 		}
636 	}
637 	read_unlock(&dev_base_lock);
638 	return dev;
639 }
640 
641 /**
642  *	dev_valid_name - check if name is okay for network device
643  *	@name: name string
644  *
645  *	Network device names need to be valid file names to
646  *	to allow sysfs to work
647  */
648 static int dev_valid_name(const char *name)
649 {
650 	return !(*name == '\0'
651 		 || !strcmp(name, ".")
652 		 || !strcmp(name, "..")
653 		 || strchr(name, '/'));
654 }
655 
656 /**
657  *	dev_alloc_name - allocate a name for a device
658  *	@dev: device
659  *	@name: name format string
660  *
661  *	Passed a format string - eg "lt%d" it will try and find a suitable
662  *	id. Not efficient for many devices, not called a lot. The caller
663  *	must hold the dev_base or rtnl lock while allocating the name and
664  *	adding the device in order to avoid duplicates. Returns the number
665  *	of the unit assigned or a negative errno code.
666  */
667 
668 int dev_alloc_name(struct net_device *dev, const char *name)
669 {
670 	int i = 0;
671 	char buf[IFNAMSIZ];
672 	const char *p;
673 	const int max_netdevices = 8*PAGE_SIZE;
674 	long *inuse;
675 	struct net_device *d;
676 
677 	p = strnchr(name, IFNAMSIZ-1, '%');
678 	if (p) {
679 		/*
680 		 * Verify the string as this thing may have come from
681 		 * the user.  There must be either one "%d" and no other "%"
682 		 * characters.
683 		 */
684 		if (p[1] != 'd' || strchr(p + 2, '%'))
685 			return -EINVAL;
686 
687 		/* Use one page as a bit array of possible slots */
688 		inuse = (long *) get_zeroed_page(GFP_ATOMIC);
689 		if (!inuse)
690 			return -ENOMEM;
691 
692 		for (d = dev_base; d; d = d->next) {
693 			if (!sscanf(d->name, name, &i))
694 				continue;
695 			if (i < 0 || i >= max_netdevices)
696 				continue;
697 
698 			/*  avoid cases where sscanf is not exact inverse of printf */
699 			snprintf(buf, sizeof(buf), name, i);
700 			if (!strncmp(buf, d->name, IFNAMSIZ))
701 				set_bit(i, inuse);
702 		}
703 
704 		i = find_first_zero_bit(inuse, max_netdevices);
705 		free_page((unsigned long) inuse);
706 	}
707 
708 	snprintf(buf, sizeof(buf), name, i);
709 	if (!__dev_get_by_name(buf)) {
710 		strlcpy(dev->name, buf, IFNAMSIZ);
711 		return i;
712 	}
713 
714 	/* It is possible to run out of possible slots
715 	 * when the name is long and there isn't enough space left
716 	 * for the digits, or if all bits are used.
717 	 */
718 	return -ENFILE;
719 }
720 
721 
722 /**
723  *	dev_change_name - change name of a device
724  *	@dev: device
725  *	@newname: name (or format string) must be at least IFNAMSIZ
726  *
727  *	Change name of a device, can pass format strings "eth%d".
728  *	for wildcarding.
729  */
730 int dev_change_name(struct net_device *dev, char *newname)
731 {
732 	int err = 0;
733 
734 	ASSERT_RTNL();
735 
736 	if (dev->flags & IFF_UP)
737 		return -EBUSY;
738 
739 	if (!dev_valid_name(newname))
740 		return -EINVAL;
741 
742 	if (strchr(newname, '%')) {
743 		err = dev_alloc_name(dev, newname);
744 		if (err < 0)
745 			return err;
746 		strcpy(newname, dev->name);
747 	}
748 	else if (__dev_get_by_name(newname))
749 		return -EEXIST;
750 	else
751 		strlcpy(dev->name, newname, IFNAMSIZ);
752 
753 	err = class_device_rename(&dev->class_dev, dev->name);
754 	if (!err) {
755 		hlist_del(&dev->name_hlist);
756 		hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
757 		notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
758 	}
759 
760 	return err;
761 }
762 
763 /**
764  *	netdev_features_change - device changes fatures
765  *	@dev: device to cause notification
766  *
767  *	Called to indicate a device has changed features.
768  */
769 void netdev_features_change(struct net_device *dev)
770 {
771 	notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
772 }
773 EXPORT_SYMBOL(netdev_features_change);
774 
775 /**
776  *	netdev_state_change - device changes state
777  *	@dev: device to cause notification
778  *
779  *	Called to indicate a device has changed state. This function calls
780  *	the notifier chains for netdev_chain and sends a NEWLINK message
781  *	to the routing socket.
782  */
783 void netdev_state_change(struct net_device *dev)
784 {
785 	if (dev->flags & IFF_UP) {
786 		notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
787 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
788 	}
789 }
790 
791 /**
792  *	dev_load 	- load a network module
793  *	@name: name of interface
794  *
795  *	If a network interface is not present and the process has suitable
796  *	privileges this function loads the module. If module loading is not
797  *	available in this kernel then it becomes a nop.
798  */
799 
800 void dev_load(const char *name)
801 {
802 	struct net_device *dev;
803 
804 	read_lock(&dev_base_lock);
805 	dev = __dev_get_by_name(name);
806 	read_unlock(&dev_base_lock);
807 
808 	if (!dev && capable(CAP_SYS_MODULE))
809 		request_module("%s", name);
810 }
811 
812 static int default_rebuild_header(struct sk_buff *skb)
813 {
814 	printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
815 	       skb->dev ? skb->dev->name : "NULL!!!");
816 	kfree_skb(skb);
817 	return 1;
818 }
819 
820 
821 /**
822  *	dev_open	- prepare an interface for use.
823  *	@dev:	device to open
824  *
825  *	Takes a device from down to up state. The device's private open
826  *	function is invoked and then the multicast lists are loaded. Finally
827  *	the device is moved into the up state and a %NETDEV_UP message is
828  *	sent to the netdev notifier chain.
829  *
830  *	Calling this function on an active interface is a nop. On a failure
831  *	a negative errno code is returned.
832  */
833 int dev_open(struct net_device *dev)
834 {
835 	int ret = 0;
836 
837 	/*
838 	 *	Is it already up?
839 	 */
840 
841 	if (dev->flags & IFF_UP)
842 		return 0;
843 
844 	/*
845 	 *	Is it even present?
846 	 */
847 	if (!netif_device_present(dev))
848 		return -ENODEV;
849 
850 	/*
851 	 *	Call device private open method
852 	 */
853 	set_bit(__LINK_STATE_START, &dev->state);
854 	if (dev->open) {
855 		ret = dev->open(dev);
856 		if (ret)
857 			clear_bit(__LINK_STATE_START, &dev->state);
858 	}
859 
860  	/*
861 	 *	If it went open OK then:
862 	 */
863 
864 	if (!ret) {
865 		/*
866 		 *	Set the flags.
867 		 */
868 		dev->flags |= IFF_UP;
869 
870 		/*
871 		 *	Initialize multicasting status
872 		 */
873 		dev_mc_upload(dev);
874 
875 		/*
876 		 *	Wakeup transmit queue engine
877 		 */
878 		dev_activate(dev);
879 
880 		/*
881 		 *	... and announce new interface.
882 		 */
883 		notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
884 	}
885 	return ret;
886 }
887 
888 /**
889  *	dev_close - shutdown an interface.
890  *	@dev: device to shutdown
891  *
892  *	This function moves an active device into down state. A
893  *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
894  *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
895  *	chain.
896  */
897 int dev_close(struct net_device *dev)
898 {
899 	if (!(dev->flags & IFF_UP))
900 		return 0;
901 
902 	/*
903 	 *	Tell people we are going down, so that they can
904 	 *	prepare to death, when device is still operating.
905 	 */
906 	notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
907 
908 	dev_deactivate(dev);
909 
910 	clear_bit(__LINK_STATE_START, &dev->state);
911 
912 	/* Synchronize to scheduled poll. We cannot touch poll list,
913 	 * it can be even on different cpu. So just clear netif_running(),
914 	 * and wait when poll really will happen. Actually, the best place
915 	 * for this is inside dev->stop() after device stopped its irq
916 	 * engine, but this requires more changes in devices. */
917 
918 	smp_mb__after_clear_bit(); /* Commit netif_running(). */
919 	while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
920 		/* No hurry. */
921 		current->state = TASK_INTERRUPTIBLE;
922 		schedule_timeout(1);
923 	}
924 
925 	/*
926 	 *	Call the device specific close. This cannot fail.
927 	 *	Only if device is UP
928 	 *
929 	 *	We allow it to be called even after a DETACH hot-plug
930 	 *	event.
931 	 */
932 	if (dev->stop)
933 		dev->stop(dev);
934 
935 	/*
936 	 *	Device is now down.
937 	 */
938 
939 	dev->flags &= ~IFF_UP;
940 
941 	/*
942 	 * Tell people we are down
943 	 */
944 	notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
945 
946 	return 0;
947 }
948 
949 
950 /*
951  *	Device change register/unregister. These are not inline or static
952  *	as we export them to the world.
953  */
954 
955 /**
956  *	register_netdevice_notifier - register a network notifier block
957  *	@nb: notifier
958  *
959  *	Register a notifier to be called when network device events occur.
960  *	The notifier passed is linked into the kernel structures and must
961  *	not be reused until it has been unregistered. A negative errno code
962  *	is returned on a failure.
963  *
964  * 	When registered all registration and up events are replayed
965  *	to the new notifier to allow device to have a race free
966  *	view of the network device list.
967  */
968 
969 int register_netdevice_notifier(struct notifier_block *nb)
970 {
971 	struct net_device *dev;
972 	int err;
973 
974 	rtnl_lock();
975 	err = notifier_chain_register(&netdev_chain, nb);
976 	if (!err) {
977 		for (dev = dev_base; dev; dev = dev->next) {
978 			nb->notifier_call(nb, NETDEV_REGISTER, dev);
979 
980 			if (dev->flags & IFF_UP)
981 				nb->notifier_call(nb, NETDEV_UP, dev);
982 		}
983 	}
984 	rtnl_unlock();
985 	return err;
986 }
987 
988 /**
989  *	unregister_netdevice_notifier - unregister a network notifier block
990  *	@nb: notifier
991  *
992  *	Unregister a notifier previously registered by
993  *	register_netdevice_notifier(). The notifier is unlinked into the
994  *	kernel structures and may then be reused. A negative errno code
995  *	is returned on a failure.
996  */
997 
998 int unregister_netdevice_notifier(struct notifier_block *nb)
999 {
1000 	return notifier_chain_unregister(&netdev_chain, nb);
1001 }
1002 
1003 /**
1004  *	call_netdevice_notifiers - call all network notifier blocks
1005  *      @val: value passed unmodified to notifier function
1006  *      @v:   pointer passed unmodified to notifier function
1007  *
1008  *	Call all network notifier blocks.  Parameters and return value
1009  *	are as for notifier_call_chain().
1010  */
1011 
1012 int call_netdevice_notifiers(unsigned long val, void *v)
1013 {
1014 	return notifier_call_chain(&netdev_chain, val, v);
1015 }
1016 
1017 /* When > 0 there are consumers of rx skb time stamps */
1018 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1019 
1020 void net_enable_timestamp(void)
1021 {
1022 	atomic_inc(&netstamp_needed);
1023 }
1024 
1025 void net_disable_timestamp(void)
1026 {
1027 	atomic_dec(&netstamp_needed);
1028 }
1029 
1030 static inline void net_timestamp(struct timeval *stamp)
1031 {
1032 	if (atomic_read(&netstamp_needed))
1033 		do_gettimeofday(stamp);
1034 	else {
1035 		stamp->tv_sec = 0;
1036 		stamp->tv_usec = 0;
1037 	}
1038 }
1039 
1040 /*
1041  *	Support routine. Sends outgoing frames to any network
1042  *	taps currently in use.
1043  */
1044 
1045 void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1046 {
1047 	struct packet_type *ptype;
1048 	net_timestamp(&skb->stamp);
1049 
1050 	rcu_read_lock();
1051 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1052 		/* Never send packets back to the socket
1053 		 * they originated from - MvS (miquels@drinkel.ow.org)
1054 		 */
1055 		if ((ptype->dev == dev || !ptype->dev) &&
1056 		    (ptype->af_packet_priv == NULL ||
1057 		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
1058 			struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1059 			if (!skb2)
1060 				break;
1061 
1062 			/* skb->nh should be correctly
1063 			   set by sender, so that the second statement is
1064 			   just protection against buggy protocols.
1065 			 */
1066 			skb2->mac.raw = skb2->data;
1067 
1068 			if (skb2->nh.raw < skb2->data ||
1069 			    skb2->nh.raw > skb2->tail) {
1070 				if (net_ratelimit())
1071 					printk(KERN_CRIT "protocol %04x is "
1072 					       "buggy, dev %s\n",
1073 					       skb2->protocol, dev->name);
1074 				skb2->nh.raw = skb2->data;
1075 			}
1076 
1077 			skb2->h.raw = skb2->nh.raw;
1078 			skb2->pkt_type = PACKET_OUTGOING;
1079 			ptype->func(skb2, skb->dev, ptype);
1080 		}
1081 	}
1082 	rcu_read_unlock();
1083 }
1084 
1085 /*
1086  * Invalidate hardware checksum when packet is to be mangled, and
1087  * complete checksum manually on outgoing path.
1088  */
1089 int skb_checksum_help(struct sk_buff *skb, int inward)
1090 {
1091 	unsigned int csum;
1092 	int ret = 0, offset = skb->h.raw - skb->data;
1093 
1094 	if (inward) {
1095 		skb->ip_summed = CHECKSUM_NONE;
1096 		goto out;
1097 	}
1098 
1099 	if (skb_cloned(skb)) {
1100 		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1101 		if (ret)
1102 			goto out;
1103 	}
1104 
1105 	if (offset > (int)skb->len)
1106 		BUG();
1107 	csum = skb_checksum(skb, offset, skb->len-offset, 0);
1108 
1109 	offset = skb->tail - skb->h.raw;
1110 	if (offset <= 0)
1111 		BUG();
1112 	if (skb->csum + 2 > offset)
1113 		BUG();
1114 
1115 	*(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
1116 	skb->ip_summed = CHECKSUM_NONE;
1117 out:
1118 	return ret;
1119 }
1120 
1121 #ifdef CONFIG_HIGHMEM
1122 /* Actually, we should eliminate this check as soon as we know, that:
1123  * 1. IOMMU is present and allows to map all the memory.
1124  * 2. No high memory really exists on this machine.
1125  */
1126 
1127 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1128 {
1129 	int i;
1130 
1131 	if (dev->features & NETIF_F_HIGHDMA)
1132 		return 0;
1133 
1134 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1135 		if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1136 			return 1;
1137 
1138 	return 0;
1139 }
1140 #else
1141 #define illegal_highdma(dev, skb)	(0)
1142 #endif
1143 
1144 extern void skb_release_data(struct sk_buff *);
1145 
1146 /* Keep head the same: replace data */
1147 int __skb_linearize(struct sk_buff *skb, int gfp_mask)
1148 {
1149 	unsigned int size;
1150 	u8 *data;
1151 	long offset;
1152 	struct skb_shared_info *ninfo;
1153 	int headerlen = skb->data - skb->head;
1154 	int expand = (skb->tail + skb->data_len) - skb->end;
1155 
1156 	if (skb_shared(skb))
1157 		BUG();
1158 
1159 	if (expand <= 0)
1160 		expand = 0;
1161 
1162 	size = skb->end - skb->head + expand;
1163 	size = SKB_DATA_ALIGN(size);
1164 	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
1165 	if (!data)
1166 		return -ENOMEM;
1167 
1168 	/* Copy entire thing */
1169 	if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len))
1170 		BUG();
1171 
1172 	/* Set up shinfo */
1173 	ninfo = (struct skb_shared_info*)(data + size);
1174 	atomic_set(&ninfo->dataref, 1);
1175 	ninfo->tso_size = skb_shinfo(skb)->tso_size;
1176 	ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
1177 	ninfo->nr_frags = 0;
1178 	ninfo->frag_list = NULL;
1179 
1180 	/* Offset between the two in bytes */
1181 	offset = data - skb->head;
1182 
1183 	/* Free old data. */
1184 	skb_release_data(skb);
1185 
1186 	skb->head = data;
1187 	skb->end  = data + size;
1188 
1189 	/* Set up new pointers */
1190 	skb->h.raw   += offset;
1191 	skb->nh.raw  += offset;
1192 	skb->mac.raw += offset;
1193 	skb->tail    += offset;
1194 	skb->data    += offset;
1195 
1196 	/* We are no longer a clone, even if we were. */
1197 	skb->cloned    = 0;
1198 
1199 	skb->tail     += skb->data_len;
1200 	skb->data_len  = 0;
1201 	return 0;
1202 }
1203 
1204 #define HARD_TX_LOCK(dev, cpu) {			\
1205 	if ((dev->features & NETIF_F_LLTX) == 0) {	\
1206 		spin_lock(&dev->xmit_lock);		\
1207 		dev->xmit_lock_owner = cpu;		\
1208 	}						\
1209 }
1210 
1211 #define HARD_TX_UNLOCK(dev) {				\
1212 	if ((dev->features & NETIF_F_LLTX) == 0) {	\
1213 		dev->xmit_lock_owner = -1;		\
1214 		spin_unlock(&dev->xmit_lock);		\
1215 	}						\
1216 }
1217 
1218 /**
1219  *	dev_queue_xmit - transmit a buffer
1220  *	@skb: buffer to transmit
1221  *
1222  *	Queue a buffer for transmission to a network device. The caller must
1223  *	have set the device and priority and built the buffer before calling
1224  *	this function. The function can be called from an interrupt.
1225  *
1226  *	A negative errno code is returned on a failure. A success does not
1227  *	guarantee the frame will be transmitted as it may be dropped due
1228  *	to congestion or traffic shaping.
1229  *
1230  * -----------------------------------------------------------------------------------
1231  *      I notice this method can also return errors from the queue disciplines,
1232  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1233  *      be positive.
1234  *
1235  *      Regardless of the return value, the skb is consumed, so it is currently
1236  *      difficult to retry a send to this method.  (You can bump the ref count
1237  *      before sending to hold a reference for retry if you are careful.)
1238  *
1239  *      When calling this method, interrupts MUST be enabled.  This is because
1240  *      the BH enable code must have IRQs enabled so that it will not deadlock.
1241  *          --BLG
1242  */
1243 
1244 int dev_queue_xmit(struct sk_buff *skb)
1245 {
1246 	struct net_device *dev = skb->dev;
1247 	struct Qdisc *q;
1248 	int rc = -ENOMEM;
1249 
1250 	if (skb_shinfo(skb)->frag_list &&
1251 	    !(dev->features & NETIF_F_FRAGLIST) &&
1252 	    __skb_linearize(skb, GFP_ATOMIC))
1253 		goto out_kfree_skb;
1254 
1255 	/* Fragmented skb is linearized if device does not support SG,
1256 	 * or if at least one of fragments is in highmem and device
1257 	 * does not support DMA from it.
1258 	 */
1259 	if (skb_shinfo(skb)->nr_frags &&
1260 	    (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1261 	    __skb_linearize(skb, GFP_ATOMIC))
1262 		goto out_kfree_skb;
1263 
1264 	/* If packet is not checksummed and device does not support
1265 	 * checksumming for this protocol, complete checksumming here.
1266 	 */
1267 	if (skb->ip_summed == CHECKSUM_HW &&
1268 	    (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) &&
1269 	     (!(dev->features & NETIF_F_IP_CSUM) ||
1270 	      skb->protocol != htons(ETH_P_IP))))
1271 	      	if (skb_checksum_help(skb, 0))
1272 	      		goto out_kfree_skb;
1273 
1274 	/* Disable soft irqs for various locks below. Also
1275 	 * stops preemption for RCU.
1276 	 */
1277 	local_bh_disable();
1278 
1279 	/* Updates of qdisc are serialized by queue_lock.
1280 	 * The struct Qdisc which is pointed to by qdisc is now a
1281 	 * rcu structure - it may be accessed without acquiring
1282 	 * a lock (but the structure may be stale.) The freeing of the
1283 	 * qdisc will be deferred until it's known that there are no
1284 	 * more references to it.
1285 	 *
1286 	 * If the qdisc has an enqueue function, we still need to
1287 	 * hold the queue_lock before calling it, since queue_lock
1288 	 * also serializes access to the device queue.
1289 	 */
1290 
1291 	q = rcu_dereference(dev->qdisc);
1292 #ifdef CONFIG_NET_CLS_ACT
1293 	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1294 #endif
1295 	if (q->enqueue) {
1296 		/* Grab device queue */
1297 		spin_lock(&dev->queue_lock);
1298 
1299 		rc = q->enqueue(skb, q);
1300 
1301 		qdisc_run(dev);
1302 
1303 		spin_unlock(&dev->queue_lock);
1304 		rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1305 		goto out;
1306 	}
1307 
1308 	/* The device has no queue. Common case for software devices:
1309 	   loopback, all the sorts of tunnels...
1310 
1311 	   Really, it is unlikely that xmit_lock protection is necessary here.
1312 	   (f.e. loopback and IP tunnels are clean ignoring statistics
1313 	   counters.)
1314 	   However, it is possible, that they rely on protection
1315 	   made by us here.
1316 
1317 	   Check this and shot the lock. It is not prone from deadlocks.
1318 	   Either shot noqueue qdisc, it is even simpler 8)
1319 	 */
1320 	if (dev->flags & IFF_UP) {
1321 		int cpu = smp_processor_id(); /* ok because BHs are off */
1322 
1323 		if (dev->xmit_lock_owner != cpu) {
1324 
1325 			HARD_TX_LOCK(dev, cpu);
1326 
1327 			if (!netif_queue_stopped(dev)) {
1328 				if (netdev_nit)
1329 					dev_queue_xmit_nit(skb, dev);
1330 
1331 				rc = 0;
1332 				if (!dev->hard_start_xmit(skb, dev)) {
1333 					HARD_TX_UNLOCK(dev);
1334 					goto out;
1335 				}
1336 			}
1337 			HARD_TX_UNLOCK(dev);
1338 			if (net_ratelimit())
1339 				printk(KERN_CRIT "Virtual device %s asks to "
1340 				       "queue packet!\n", dev->name);
1341 		} else {
1342 			/* Recursion is detected! It is possible,
1343 			 * unfortunately */
1344 			if (net_ratelimit())
1345 				printk(KERN_CRIT "Dead loop on virtual device "
1346 				       "%s, fix it urgently!\n", dev->name);
1347 		}
1348 	}
1349 
1350 	rc = -ENETDOWN;
1351 	local_bh_enable();
1352 
1353 out_kfree_skb:
1354 	kfree_skb(skb);
1355 	return rc;
1356 out:
1357 	local_bh_enable();
1358 	return rc;
1359 }
1360 
1361 
1362 /*=======================================================================
1363 			Receiver routines
1364   =======================================================================*/
1365 
1366 int netdev_max_backlog = 300;
1367 int weight_p = 64;            /* old backlog weight */
1368 /* These numbers are selected based on intuition and some
1369  * experimentatiom, if you have more scientific way of doing this
1370  * please go ahead and fix things.
1371  */
1372 int no_cong_thresh = 10;
1373 int no_cong = 20;
1374 int lo_cong = 100;
1375 int mod_cong = 290;
1376 
1377 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1378 
1379 
1380 static void get_sample_stats(int cpu)
1381 {
1382 #ifdef RAND_LIE
1383 	unsigned long rd;
1384 	int rq;
1385 #endif
1386 	struct softnet_data *sd = &per_cpu(softnet_data, cpu);
1387 	int blog = sd->input_pkt_queue.qlen;
1388 	int avg_blog = sd->avg_blog;
1389 
1390 	avg_blog = (avg_blog >> 1) + (blog >> 1);
1391 
1392 	if (avg_blog > mod_cong) {
1393 		/* Above moderate congestion levels. */
1394 		sd->cng_level = NET_RX_CN_HIGH;
1395 #ifdef RAND_LIE
1396 		rd = net_random();
1397 		rq = rd % netdev_max_backlog;
1398 		if (rq < avg_blog) /* unlucky bastard */
1399 			sd->cng_level = NET_RX_DROP;
1400 #endif
1401 	} else if (avg_blog > lo_cong) {
1402 		sd->cng_level = NET_RX_CN_MOD;
1403 #ifdef RAND_LIE
1404 		rd = net_random();
1405 		rq = rd % netdev_max_backlog;
1406 			if (rq < avg_blog) /* unlucky bastard */
1407 				sd->cng_level = NET_RX_CN_HIGH;
1408 #endif
1409 	} else if (avg_blog > no_cong)
1410 		sd->cng_level = NET_RX_CN_LOW;
1411 	else  /* no congestion */
1412 		sd->cng_level = NET_RX_SUCCESS;
1413 
1414 	sd->avg_blog = avg_blog;
1415 }
1416 
1417 #ifdef OFFLINE_SAMPLE
1418 static void sample_queue(unsigned long dummy)
1419 {
1420 /* 10 ms 0r 1ms -- i don't care -- JHS */
1421 	int next_tick = 1;
1422 	int cpu = smp_processor_id();
1423 
1424 	get_sample_stats(cpu);
1425 	next_tick += jiffies;
1426 	mod_timer(&samp_timer, next_tick);
1427 }
1428 #endif
1429 
1430 
1431 /**
1432  *	netif_rx	-	post buffer to the network code
1433  *	@skb: buffer to post
1434  *
1435  *	This function receives a packet from a device driver and queues it for
1436  *	the upper (protocol) levels to process.  It always succeeds. The buffer
1437  *	may be dropped during processing for congestion control or by the
1438  *	protocol layers.
1439  *
1440  *	return values:
1441  *	NET_RX_SUCCESS	(no congestion)
1442  *	NET_RX_CN_LOW   (low congestion)
1443  *	NET_RX_CN_MOD   (moderate congestion)
1444  *	NET_RX_CN_HIGH  (high congestion)
1445  *	NET_RX_DROP     (packet was dropped)
1446  *
1447  */
1448 
1449 int netif_rx(struct sk_buff *skb)
1450 {
1451 	int this_cpu;
1452 	struct softnet_data *queue;
1453 	unsigned long flags;
1454 
1455 	/* if netpoll wants it, pretend we never saw it */
1456 	if (netpoll_rx(skb))
1457 		return NET_RX_DROP;
1458 
1459 	if (!skb->stamp.tv_sec)
1460 		net_timestamp(&skb->stamp);
1461 
1462 	/*
1463 	 * The code is rearranged so that the path is the most
1464 	 * short when CPU is congested, but is still operating.
1465 	 */
1466 	local_irq_save(flags);
1467 	this_cpu = smp_processor_id();
1468 	queue = &__get_cpu_var(softnet_data);
1469 
1470 	__get_cpu_var(netdev_rx_stat).total++;
1471 	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1472 		if (queue->input_pkt_queue.qlen) {
1473 			if (queue->throttle)
1474 				goto drop;
1475 
1476 enqueue:
1477 			dev_hold(skb->dev);
1478 			__skb_queue_tail(&queue->input_pkt_queue, skb);
1479 #ifndef OFFLINE_SAMPLE
1480 			get_sample_stats(this_cpu);
1481 #endif
1482 			local_irq_restore(flags);
1483 			return queue->cng_level;
1484 		}
1485 
1486 		if (queue->throttle)
1487 			queue->throttle = 0;
1488 
1489 		netif_rx_schedule(&queue->backlog_dev);
1490 		goto enqueue;
1491 	}
1492 
1493 	if (!queue->throttle) {
1494 		queue->throttle = 1;
1495 		__get_cpu_var(netdev_rx_stat).throttled++;
1496 	}
1497 
1498 drop:
1499 	__get_cpu_var(netdev_rx_stat).dropped++;
1500 	local_irq_restore(flags);
1501 
1502 	kfree_skb(skb);
1503 	return NET_RX_DROP;
1504 }
1505 
1506 int netif_rx_ni(struct sk_buff *skb)
1507 {
1508 	int err;
1509 
1510 	preempt_disable();
1511 	err = netif_rx(skb);
1512 	if (local_softirq_pending())
1513 		do_softirq();
1514 	preempt_enable();
1515 
1516 	return err;
1517 }
1518 
1519 EXPORT_SYMBOL(netif_rx_ni);
1520 
1521 static __inline__ void skb_bond(struct sk_buff *skb)
1522 {
1523 	struct net_device *dev = skb->dev;
1524 
1525 	if (dev->master) {
1526 		skb->real_dev = skb->dev;
1527 		skb->dev = dev->master;
1528 	}
1529 }
1530 
1531 static void net_tx_action(struct softirq_action *h)
1532 {
1533 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
1534 
1535 	if (sd->completion_queue) {
1536 		struct sk_buff *clist;
1537 
1538 		local_irq_disable();
1539 		clist = sd->completion_queue;
1540 		sd->completion_queue = NULL;
1541 		local_irq_enable();
1542 
1543 		while (clist) {
1544 			struct sk_buff *skb = clist;
1545 			clist = clist->next;
1546 
1547 			BUG_TRAP(!atomic_read(&skb->users));
1548 			__kfree_skb(skb);
1549 		}
1550 	}
1551 
1552 	if (sd->output_queue) {
1553 		struct net_device *head;
1554 
1555 		local_irq_disable();
1556 		head = sd->output_queue;
1557 		sd->output_queue = NULL;
1558 		local_irq_enable();
1559 
1560 		while (head) {
1561 			struct net_device *dev = head;
1562 			head = head->next_sched;
1563 
1564 			smp_mb__before_clear_bit();
1565 			clear_bit(__LINK_STATE_SCHED, &dev->state);
1566 
1567 			if (spin_trylock(&dev->queue_lock)) {
1568 				qdisc_run(dev);
1569 				spin_unlock(&dev->queue_lock);
1570 			} else {
1571 				netif_schedule(dev);
1572 			}
1573 		}
1574 	}
1575 }
1576 
1577 static __inline__ int deliver_skb(struct sk_buff *skb,
1578 				  struct packet_type *pt_prev)
1579 {
1580 	atomic_inc(&skb->users);
1581 	return pt_prev->func(skb, skb->dev, pt_prev);
1582 }
1583 
1584 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
1585 int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
1586 struct net_bridge;
1587 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1588 						unsigned char *addr);
1589 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
1590 
1591 static __inline__ int handle_bridge(struct sk_buff **pskb,
1592 				    struct packet_type **pt_prev, int *ret)
1593 {
1594 	struct net_bridge_port *port;
1595 
1596 	if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
1597 	    (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
1598 		return 0;
1599 
1600 	if (*pt_prev) {
1601 		*ret = deliver_skb(*pskb, *pt_prev);
1602 		*pt_prev = NULL;
1603 	}
1604 
1605 	return br_handle_frame_hook(port, pskb);
1606 }
1607 #else
1608 #define handle_bridge(skb, pt_prev, ret)	(0)
1609 #endif
1610 
1611 #ifdef CONFIG_NET_CLS_ACT
1612 /* TODO: Maybe we should just force sch_ingress to be compiled in
1613  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1614  * a compare and 2 stores extra right now if we dont have it on
1615  * but have CONFIG_NET_CLS_ACT
1616  * NOTE: This doesnt stop any functionality; if you dont have
1617  * the ingress scheduler, you just cant add policies on ingress.
1618  *
1619  */
1620 static int ing_filter(struct sk_buff *skb)
1621 {
1622 	struct Qdisc *q;
1623 	struct net_device *dev = skb->dev;
1624 	int result = TC_ACT_OK;
1625 
1626 	if (dev->qdisc_ingress) {
1627 		__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
1628 		if (MAX_RED_LOOP < ttl++) {
1629 			printk("Redir loop detected Dropping packet (%s->%s)\n",
1630 				skb->input_dev?skb->input_dev->name:"??",skb->dev->name);
1631 			return TC_ACT_SHOT;
1632 		}
1633 
1634 		skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
1635 
1636 		skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
1637 		if (NULL == skb->input_dev) {
1638 			skb->input_dev = skb->dev;
1639 			printk("ing_filter:  fixed  %s out %s\n",skb->input_dev->name,skb->dev->name);
1640 		}
1641 		spin_lock(&dev->ingress_lock);
1642 		if ((q = dev->qdisc_ingress) != NULL)
1643 			result = q->enqueue(skb, q);
1644 		spin_unlock(&dev->ingress_lock);
1645 
1646 	}
1647 
1648 	return result;
1649 }
1650 #endif
1651 
1652 int netif_receive_skb(struct sk_buff *skb)
1653 {
1654 	struct packet_type *ptype, *pt_prev;
1655 	int ret = NET_RX_DROP;
1656 	unsigned short type;
1657 
1658 	/* if we've gotten here through NAPI, check netpoll */
1659 	if (skb->dev->poll && netpoll_rx(skb))
1660 		return NET_RX_DROP;
1661 
1662 	if (!skb->stamp.tv_sec)
1663 		net_timestamp(&skb->stamp);
1664 
1665 	skb_bond(skb);
1666 
1667 	__get_cpu_var(netdev_rx_stat).total++;
1668 
1669 	skb->h.raw = skb->nh.raw = skb->data;
1670 	skb->mac_len = skb->nh.raw - skb->mac.raw;
1671 
1672 	pt_prev = NULL;
1673 
1674 	rcu_read_lock();
1675 
1676 #ifdef CONFIG_NET_CLS_ACT
1677 	if (skb->tc_verd & TC_NCLS) {
1678 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
1679 		goto ncls;
1680 	}
1681 #endif
1682 
1683 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1684 		if (!ptype->dev || ptype->dev == skb->dev) {
1685 			if (pt_prev)
1686 				ret = deliver_skb(skb, pt_prev);
1687 			pt_prev = ptype;
1688 		}
1689 	}
1690 
1691 #ifdef CONFIG_NET_CLS_ACT
1692 	if (pt_prev) {
1693 		ret = deliver_skb(skb, pt_prev);
1694 		pt_prev = NULL; /* noone else should process this after*/
1695 	} else {
1696 		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
1697 	}
1698 
1699 	ret = ing_filter(skb);
1700 
1701 	if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
1702 		kfree_skb(skb);
1703 		goto out;
1704 	}
1705 
1706 	skb->tc_verd = 0;
1707 ncls:
1708 #endif
1709 
1710 	handle_diverter(skb);
1711 
1712 	if (handle_bridge(&skb, &pt_prev, &ret))
1713 		goto out;
1714 
1715 	type = skb->protocol;
1716 	list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
1717 		if (ptype->type == type &&
1718 		    (!ptype->dev || ptype->dev == skb->dev)) {
1719 			if (pt_prev)
1720 				ret = deliver_skb(skb, pt_prev);
1721 			pt_prev = ptype;
1722 		}
1723 	}
1724 
1725 	if (pt_prev) {
1726 		ret = pt_prev->func(skb, skb->dev, pt_prev);
1727 	} else {
1728 		kfree_skb(skb);
1729 		/* Jamal, now you will not able to escape explaining
1730 		 * me how you were going to use this. :-)
1731 		 */
1732 		ret = NET_RX_DROP;
1733 	}
1734 
1735 out:
1736 	rcu_read_unlock();
1737 	return ret;
1738 }
1739 
1740 static int process_backlog(struct net_device *backlog_dev, int *budget)
1741 {
1742 	int work = 0;
1743 	int quota = min(backlog_dev->quota, *budget);
1744 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
1745 	unsigned long start_time = jiffies;
1746 
1747 	backlog_dev->weight = weight_p;
1748 	for (;;) {
1749 		struct sk_buff *skb;
1750 		struct net_device *dev;
1751 
1752 		local_irq_disable();
1753 		skb = __skb_dequeue(&queue->input_pkt_queue);
1754 		if (!skb)
1755 			goto job_done;
1756 		local_irq_enable();
1757 
1758 		dev = skb->dev;
1759 
1760 		netif_receive_skb(skb);
1761 
1762 		dev_put(dev);
1763 
1764 		work++;
1765 
1766 		if (work >= quota || jiffies - start_time > 1)
1767 			break;
1768 
1769 	}
1770 
1771 	backlog_dev->quota -= work;
1772 	*budget -= work;
1773 	return -1;
1774 
1775 job_done:
1776 	backlog_dev->quota -= work;
1777 	*budget -= work;
1778 
1779 	list_del(&backlog_dev->poll_list);
1780 	smp_mb__before_clear_bit();
1781 	netif_poll_enable(backlog_dev);
1782 
1783 	if (queue->throttle)
1784 		queue->throttle = 0;
1785 	local_irq_enable();
1786 	return 0;
1787 }
1788 
1789 static void net_rx_action(struct softirq_action *h)
1790 {
1791 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
1792 	unsigned long start_time = jiffies;
1793 	int budget = netdev_max_backlog;
1794 
1795 
1796 	local_irq_disable();
1797 
1798 	while (!list_empty(&queue->poll_list)) {
1799 		struct net_device *dev;
1800 
1801 		if (budget <= 0 || jiffies - start_time > 1)
1802 			goto softnet_break;
1803 
1804 		local_irq_enable();
1805 
1806 		dev = list_entry(queue->poll_list.next,
1807 				 struct net_device, poll_list);
1808 		netpoll_poll_lock(dev);
1809 
1810 		if (dev->quota <= 0 || dev->poll(dev, &budget)) {
1811 			netpoll_poll_unlock(dev);
1812 			local_irq_disable();
1813 			list_del(&dev->poll_list);
1814 			list_add_tail(&dev->poll_list, &queue->poll_list);
1815 			if (dev->quota < 0)
1816 				dev->quota += dev->weight;
1817 			else
1818 				dev->quota = dev->weight;
1819 		} else {
1820 			netpoll_poll_unlock(dev);
1821 			dev_put(dev);
1822 			local_irq_disable();
1823 		}
1824 	}
1825 out:
1826 	local_irq_enable();
1827 	return;
1828 
1829 softnet_break:
1830 	__get_cpu_var(netdev_rx_stat).time_squeeze++;
1831 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
1832 	goto out;
1833 }
1834 
1835 static gifconf_func_t * gifconf_list [NPROTO];
1836 
1837 /**
1838  *	register_gifconf	-	register a SIOCGIF handler
1839  *	@family: Address family
1840  *	@gifconf: Function handler
1841  *
1842  *	Register protocol dependent address dumping routines. The handler
1843  *	that is passed must not be freed or reused until it has been replaced
1844  *	by another handler.
1845  */
1846 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
1847 {
1848 	if (family >= NPROTO)
1849 		return -EINVAL;
1850 	gifconf_list[family] = gifconf;
1851 	return 0;
1852 }
1853 
1854 
1855 /*
1856  *	Map an interface index to its name (SIOCGIFNAME)
1857  */
1858 
1859 /*
1860  *	We need this ioctl for efficient implementation of the
1861  *	if_indextoname() function required by the IPv6 API.  Without
1862  *	it, we would have to search all the interfaces to find a
1863  *	match.  --pb
1864  */
1865 
1866 static int dev_ifname(struct ifreq __user *arg)
1867 {
1868 	struct net_device *dev;
1869 	struct ifreq ifr;
1870 
1871 	/*
1872 	 *	Fetch the caller's info block.
1873 	 */
1874 
1875 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
1876 		return -EFAULT;
1877 
1878 	read_lock(&dev_base_lock);
1879 	dev = __dev_get_by_index(ifr.ifr_ifindex);
1880 	if (!dev) {
1881 		read_unlock(&dev_base_lock);
1882 		return -ENODEV;
1883 	}
1884 
1885 	strcpy(ifr.ifr_name, dev->name);
1886 	read_unlock(&dev_base_lock);
1887 
1888 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
1889 		return -EFAULT;
1890 	return 0;
1891 }
1892 
1893 /*
1894  *	Perform a SIOCGIFCONF call. This structure will change
1895  *	size eventually, and there is nothing I can do about it.
1896  *	Thus we will need a 'compatibility mode'.
1897  */
1898 
1899 static int dev_ifconf(char __user *arg)
1900 {
1901 	struct ifconf ifc;
1902 	struct net_device *dev;
1903 	char __user *pos;
1904 	int len;
1905 	int total;
1906 	int i;
1907 
1908 	/*
1909 	 *	Fetch the caller's info block.
1910 	 */
1911 
1912 	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
1913 		return -EFAULT;
1914 
1915 	pos = ifc.ifc_buf;
1916 	len = ifc.ifc_len;
1917 
1918 	/*
1919 	 *	Loop over the interfaces, and write an info block for each.
1920 	 */
1921 
1922 	total = 0;
1923 	for (dev = dev_base; dev; dev = dev->next) {
1924 		for (i = 0; i < NPROTO; i++) {
1925 			if (gifconf_list[i]) {
1926 				int done;
1927 				if (!pos)
1928 					done = gifconf_list[i](dev, NULL, 0);
1929 				else
1930 					done = gifconf_list[i](dev, pos + total,
1931 							       len - total);
1932 				if (done < 0)
1933 					return -EFAULT;
1934 				total += done;
1935 			}
1936 		}
1937   	}
1938 
1939 	/*
1940 	 *	All done.  Write the updated control block back to the caller.
1941 	 */
1942 	ifc.ifc_len = total;
1943 
1944 	/*
1945 	 * 	Both BSD and Solaris return 0 here, so we do too.
1946 	 */
1947 	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
1948 }
1949 
1950 #ifdef CONFIG_PROC_FS
1951 /*
1952  *	This is invoked by the /proc filesystem handler to display a device
1953  *	in detail.
1954  */
1955 static __inline__ struct net_device *dev_get_idx(loff_t pos)
1956 {
1957 	struct net_device *dev;
1958 	loff_t i;
1959 
1960 	for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next);
1961 
1962 	return i == pos ? dev : NULL;
1963 }
1964 
1965 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
1966 {
1967 	read_lock(&dev_base_lock);
1968 	return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN;
1969 }
1970 
1971 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1972 {
1973 	++*pos;
1974 	return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next;
1975 }
1976 
1977 void dev_seq_stop(struct seq_file *seq, void *v)
1978 {
1979 	read_unlock(&dev_base_lock);
1980 }
1981 
1982 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
1983 {
1984 	if (dev->get_stats) {
1985 		struct net_device_stats *stats = dev->get_stats(dev);
1986 
1987 		seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
1988 				"%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
1989 			   dev->name, stats->rx_bytes, stats->rx_packets,
1990 			   stats->rx_errors,
1991 			   stats->rx_dropped + stats->rx_missed_errors,
1992 			   stats->rx_fifo_errors,
1993 			   stats->rx_length_errors + stats->rx_over_errors +
1994 			     stats->rx_crc_errors + stats->rx_frame_errors,
1995 			   stats->rx_compressed, stats->multicast,
1996 			   stats->tx_bytes, stats->tx_packets,
1997 			   stats->tx_errors, stats->tx_dropped,
1998 			   stats->tx_fifo_errors, stats->collisions,
1999 			   stats->tx_carrier_errors +
2000 			     stats->tx_aborted_errors +
2001 			     stats->tx_window_errors +
2002 			     stats->tx_heartbeat_errors,
2003 			   stats->tx_compressed);
2004 	} else
2005 		seq_printf(seq, "%6s: No statistics available.\n", dev->name);
2006 }
2007 
2008 /*
2009  *	Called from the PROCfs module. This now uses the new arbitrary sized
2010  *	/proc/net interface to create /proc/net/dev
2011  */
2012 static int dev_seq_show(struct seq_file *seq, void *v)
2013 {
2014 	if (v == SEQ_START_TOKEN)
2015 		seq_puts(seq, "Inter-|   Receive                            "
2016 			      "                    |  Transmit\n"
2017 			      " face |bytes    packets errs drop fifo frame "
2018 			      "compressed multicast|bytes    packets errs "
2019 			      "drop fifo colls carrier compressed\n");
2020 	else
2021 		dev_seq_printf_stats(seq, v);
2022 	return 0;
2023 }
2024 
2025 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2026 {
2027 	struct netif_rx_stats *rc = NULL;
2028 
2029 	while (*pos < NR_CPUS)
2030 	       	if (cpu_online(*pos)) {
2031 			rc = &per_cpu(netdev_rx_stat, *pos);
2032 			break;
2033 		} else
2034 			++*pos;
2035 	return rc;
2036 }
2037 
2038 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2039 {
2040 	return softnet_get_online(pos);
2041 }
2042 
2043 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2044 {
2045 	++*pos;
2046 	return softnet_get_online(pos);
2047 }
2048 
2049 static void softnet_seq_stop(struct seq_file *seq, void *v)
2050 {
2051 }
2052 
2053 static int softnet_seq_show(struct seq_file *seq, void *v)
2054 {
2055 	struct netif_rx_stats *s = v;
2056 
2057 	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2058 		   s->total, s->dropped, s->time_squeeze, s->throttled,
2059 		   s->fastroute_hit, s->fastroute_success, s->fastroute_defer,
2060 		   s->fastroute_deferred_out,
2061 #if 0
2062 		   s->fastroute_latency_reduction
2063 #else
2064 		   s->cpu_collision
2065 #endif
2066 		  );
2067 	return 0;
2068 }
2069 
2070 static struct seq_operations dev_seq_ops = {
2071 	.start = dev_seq_start,
2072 	.next  = dev_seq_next,
2073 	.stop  = dev_seq_stop,
2074 	.show  = dev_seq_show,
2075 };
2076 
2077 static int dev_seq_open(struct inode *inode, struct file *file)
2078 {
2079 	return seq_open(file, &dev_seq_ops);
2080 }
2081 
2082 static struct file_operations dev_seq_fops = {
2083 	.owner	 = THIS_MODULE,
2084 	.open    = dev_seq_open,
2085 	.read    = seq_read,
2086 	.llseek  = seq_lseek,
2087 	.release = seq_release,
2088 };
2089 
2090 static struct seq_operations softnet_seq_ops = {
2091 	.start = softnet_seq_start,
2092 	.next  = softnet_seq_next,
2093 	.stop  = softnet_seq_stop,
2094 	.show  = softnet_seq_show,
2095 };
2096 
2097 static int softnet_seq_open(struct inode *inode, struct file *file)
2098 {
2099 	return seq_open(file, &softnet_seq_ops);
2100 }
2101 
2102 static struct file_operations softnet_seq_fops = {
2103 	.owner	 = THIS_MODULE,
2104 	.open    = softnet_seq_open,
2105 	.read    = seq_read,
2106 	.llseek  = seq_lseek,
2107 	.release = seq_release,
2108 };
2109 
2110 #ifdef WIRELESS_EXT
2111 extern int wireless_proc_init(void);
2112 #else
2113 #define wireless_proc_init() 0
2114 #endif
2115 
2116 static int __init dev_proc_init(void)
2117 {
2118 	int rc = -ENOMEM;
2119 
2120 	if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
2121 		goto out;
2122 	if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
2123 		goto out_dev;
2124 	if (wireless_proc_init())
2125 		goto out_softnet;
2126 	rc = 0;
2127 out:
2128 	return rc;
2129 out_softnet:
2130 	proc_net_remove("softnet_stat");
2131 out_dev:
2132 	proc_net_remove("dev");
2133 	goto out;
2134 }
2135 #else
2136 #define dev_proc_init() 0
2137 #endif	/* CONFIG_PROC_FS */
2138 
2139 
2140 /**
2141  *	netdev_set_master	-	set up master/slave pair
2142  *	@slave: slave device
2143  *	@master: new master device
2144  *
2145  *	Changes the master device of the slave. Pass %NULL to break the
2146  *	bonding. The caller must hold the RTNL semaphore. On a failure
2147  *	a negative errno code is returned. On success the reference counts
2148  *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2149  *	function returns zero.
2150  */
2151 int netdev_set_master(struct net_device *slave, struct net_device *master)
2152 {
2153 	struct net_device *old = slave->master;
2154 
2155 	ASSERT_RTNL();
2156 
2157 	if (master) {
2158 		if (old)
2159 			return -EBUSY;
2160 		dev_hold(master);
2161 	}
2162 
2163 	slave->master = master;
2164 
2165 	synchronize_net();
2166 
2167 	if (old)
2168 		dev_put(old);
2169 
2170 	if (master)
2171 		slave->flags |= IFF_SLAVE;
2172 	else
2173 		slave->flags &= ~IFF_SLAVE;
2174 
2175 	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2176 	return 0;
2177 }
2178 
2179 /**
2180  *	dev_set_promiscuity	- update promiscuity count on a device
2181  *	@dev: device
2182  *	@inc: modifier
2183  *
2184  *	Add or remove promsicuity from a device. While the count in the device
2185  *	remains above zero the interface remains promiscuous. Once it hits zero
2186  *	the device reverts back to normal filtering operation. A negative inc
2187  *	value is used to drop promiscuity on the device.
2188  */
2189 void dev_set_promiscuity(struct net_device *dev, int inc)
2190 {
2191 	unsigned short old_flags = dev->flags;
2192 
2193 	dev->flags |= IFF_PROMISC;
2194 	if ((dev->promiscuity += inc) == 0)
2195 		dev->flags &= ~IFF_PROMISC;
2196 	if (dev->flags ^ old_flags) {
2197 		dev_mc_upload(dev);
2198 		printk(KERN_INFO "device %s %s promiscuous mode\n",
2199 		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2200 		       					       "left");
2201 	}
2202 }
2203 
2204 /**
2205  *	dev_set_allmulti	- update allmulti count on a device
2206  *	@dev: device
2207  *	@inc: modifier
2208  *
2209  *	Add or remove reception of all multicast frames to a device. While the
2210  *	count in the device remains above zero the interface remains listening
2211  *	to all interfaces. Once it hits zero the device reverts back to normal
2212  *	filtering operation. A negative @inc value is used to drop the counter
2213  *	when releasing a resource needing all multicasts.
2214  */
2215 
2216 void dev_set_allmulti(struct net_device *dev, int inc)
2217 {
2218 	unsigned short old_flags = dev->flags;
2219 
2220 	dev->flags |= IFF_ALLMULTI;
2221 	if ((dev->allmulti += inc) == 0)
2222 		dev->flags &= ~IFF_ALLMULTI;
2223 	if (dev->flags ^ old_flags)
2224 		dev_mc_upload(dev);
2225 }
2226 
2227 unsigned dev_get_flags(const struct net_device *dev)
2228 {
2229 	unsigned flags;
2230 
2231 	flags = (dev->flags & ~(IFF_PROMISC |
2232 				IFF_ALLMULTI |
2233 				IFF_RUNNING)) |
2234 		(dev->gflags & (IFF_PROMISC |
2235 				IFF_ALLMULTI));
2236 
2237 	if (netif_running(dev) && netif_carrier_ok(dev))
2238 		flags |= IFF_RUNNING;
2239 
2240 	return flags;
2241 }
2242 
2243 int dev_change_flags(struct net_device *dev, unsigned flags)
2244 {
2245 	int ret;
2246 	int old_flags = dev->flags;
2247 
2248 	/*
2249 	 *	Set the flags on our device.
2250 	 */
2251 
2252 	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
2253 			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
2254 			       IFF_AUTOMEDIA)) |
2255 		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
2256 				    IFF_ALLMULTI));
2257 
2258 	/*
2259 	 *	Load in the correct multicast list now the flags have changed.
2260 	 */
2261 
2262 	dev_mc_upload(dev);
2263 
2264 	/*
2265 	 *	Have we downed the interface. We handle IFF_UP ourselves
2266 	 *	according to user attempts to set it, rather than blindly
2267 	 *	setting it.
2268 	 */
2269 
2270 	ret = 0;
2271 	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
2272 		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
2273 
2274 		if (!ret)
2275 			dev_mc_upload(dev);
2276 	}
2277 
2278 	if (dev->flags & IFF_UP &&
2279 	    ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
2280 					  IFF_VOLATILE)))
2281 		notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
2282 
2283 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
2284 		int inc = (flags & IFF_PROMISC) ? +1 : -1;
2285 		dev->gflags ^= IFF_PROMISC;
2286 		dev_set_promiscuity(dev, inc);
2287 	}
2288 
2289 	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
2290 	   is important. Some (broken) drivers set IFF_PROMISC, when
2291 	   IFF_ALLMULTI is requested not asking us and not reporting.
2292 	 */
2293 	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
2294 		int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
2295 		dev->gflags ^= IFF_ALLMULTI;
2296 		dev_set_allmulti(dev, inc);
2297 	}
2298 
2299 	if (old_flags ^ dev->flags)
2300 		rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags);
2301 
2302 	return ret;
2303 }
2304 
2305 int dev_set_mtu(struct net_device *dev, int new_mtu)
2306 {
2307 	int err;
2308 
2309 	if (new_mtu == dev->mtu)
2310 		return 0;
2311 
2312 	/*	MTU must be positive.	 */
2313 	if (new_mtu < 0)
2314 		return -EINVAL;
2315 
2316 	if (!netif_device_present(dev))
2317 		return -ENODEV;
2318 
2319 	err = 0;
2320 	if (dev->change_mtu)
2321 		err = dev->change_mtu(dev, new_mtu);
2322 	else
2323 		dev->mtu = new_mtu;
2324 	if (!err && dev->flags & IFF_UP)
2325 		notifier_call_chain(&netdev_chain,
2326 				    NETDEV_CHANGEMTU, dev);
2327 	return err;
2328 }
2329 
2330 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
2331 {
2332 	int err;
2333 
2334 	if (!dev->set_mac_address)
2335 		return -EOPNOTSUPP;
2336 	if (sa->sa_family != dev->type)
2337 		return -EINVAL;
2338 	if (!netif_device_present(dev))
2339 		return -ENODEV;
2340 	err = dev->set_mac_address(dev, sa);
2341 	if (!err)
2342 		notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
2343 	return err;
2344 }
2345 
2346 /*
2347  *	Perform the SIOCxIFxxx calls.
2348  */
2349 static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
2350 {
2351 	int err;
2352 	struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
2353 
2354 	if (!dev)
2355 		return -ENODEV;
2356 
2357 	switch (cmd) {
2358 		case SIOCGIFFLAGS:	/* Get interface flags */
2359 			ifr->ifr_flags = dev_get_flags(dev);
2360 			return 0;
2361 
2362 		case SIOCSIFFLAGS:	/* Set interface flags */
2363 			return dev_change_flags(dev, ifr->ifr_flags);
2364 
2365 		case SIOCGIFMETRIC:	/* Get the metric on the interface
2366 					   (currently unused) */
2367 			ifr->ifr_metric = 0;
2368 			return 0;
2369 
2370 		case SIOCSIFMETRIC:	/* Set the metric on the interface
2371 					   (currently unused) */
2372 			return -EOPNOTSUPP;
2373 
2374 		case SIOCGIFMTU:	/* Get the MTU of a device */
2375 			ifr->ifr_mtu = dev->mtu;
2376 			return 0;
2377 
2378 		case SIOCSIFMTU:	/* Set the MTU of a device */
2379 			return dev_set_mtu(dev, ifr->ifr_mtu);
2380 
2381 		case SIOCGIFHWADDR:
2382 			if (!dev->addr_len)
2383 				memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
2384 			else
2385 				memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
2386 				       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2387 			ifr->ifr_hwaddr.sa_family = dev->type;
2388 			return 0;
2389 
2390 		case SIOCSIFHWADDR:
2391 			return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
2392 
2393 		case SIOCSIFHWBROADCAST:
2394 			if (ifr->ifr_hwaddr.sa_family != dev->type)
2395 				return -EINVAL;
2396 			memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
2397 			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2398 			notifier_call_chain(&netdev_chain,
2399 					    NETDEV_CHANGEADDR, dev);
2400 			return 0;
2401 
2402 		case SIOCGIFMAP:
2403 			ifr->ifr_map.mem_start = dev->mem_start;
2404 			ifr->ifr_map.mem_end   = dev->mem_end;
2405 			ifr->ifr_map.base_addr = dev->base_addr;
2406 			ifr->ifr_map.irq       = dev->irq;
2407 			ifr->ifr_map.dma       = dev->dma;
2408 			ifr->ifr_map.port      = dev->if_port;
2409 			return 0;
2410 
2411 		case SIOCSIFMAP:
2412 			if (dev->set_config) {
2413 				if (!netif_device_present(dev))
2414 					return -ENODEV;
2415 				return dev->set_config(dev, &ifr->ifr_map);
2416 			}
2417 			return -EOPNOTSUPP;
2418 
2419 		case SIOCADDMULTI:
2420 			if (!dev->set_multicast_list ||
2421 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2422 				return -EINVAL;
2423 			if (!netif_device_present(dev))
2424 				return -ENODEV;
2425 			return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
2426 					  dev->addr_len, 1);
2427 
2428 		case SIOCDELMULTI:
2429 			if (!dev->set_multicast_list ||
2430 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2431 				return -EINVAL;
2432 			if (!netif_device_present(dev))
2433 				return -ENODEV;
2434 			return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
2435 					     dev->addr_len, 1);
2436 
2437 		case SIOCGIFINDEX:
2438 			ifr->ifr_ifindex = dev->ifindex;
2439 			return 0;
2440 
2441 		case SIOCGIFTXQLEN:
2442 			ifr->ifr_qlen = dev->tx_queue_len;
2443 			return 0;
2444 
2445 		case SIOCSIFTXQLEN:
2446 			if (ifr->ifr_qlen < 0)
2447 				return -EINVAL;
2448 			dev->tx_queue_len = ifr->ifr_qlen;
2449 			return 0;
2450 
2451 		case SIOCSIFNAME:
2452 			ifr->ifr_newname[IFNAMSIZ-1] = '\0';
2453 			return dev_change_name(dev, ifr->ifr_newname);
2454 
2455 		/*
2456 		 *	Unknown or private ioctl
2457 		 */
2458 
2459 		default:
2460 			if ((cmd >= SIOCDEVPRIVATE &&
2461 			    cmd <= SIOCDEVPRIVATE + 15) ||
2462 			    cmd == SIOCBONDENSLAVE ||
2463 			    cmd == SIOCBONDRELEASE ||
2464 			    cmd == SIOCBONDSETHWADDR ||
2465 			    cmd == SIOCBONDSLAVEINFOQUERY ||
2466 			    cmd == SIOCBONDINFOQUERY ||
2467 			    cmd == SIOCBONDCHANGEACTIVE ||
2468 			    cmd == SIOCGMIIPHY ||
2469 			    cmd == SIOCGMIIREG ||
2470 			    cmd == SIOCSMIIREG ||
2471 			    cmd == SIOCBRADDIF ||
2472 			    cmd == SIOCBRDELIF ||
2473 			    cmd == SIOCWANDEV) {
2474 				err = -EOPNOTSUPP;
2475 				if (dev->do_ioctl) {
2476 					if (netif_device_present(dev))
2477 						err = dev->do_ioctl(dev, ifr,
2478 								    cmd);
2479 					else
2480 						err = -ENODEV;
2481 				}
2482 			} else
2483 				err = -EINVAL;
2484 
2485 	}
2486 	return err;
2487 }
2488 
2489 /*
2490  *	This function handles all "interface"-type I/O control requests. The actual
2491  *	'doing' part of this is dev_ifsioc above.
2492  */
2493 
2494 /**
2495  *	dev_ioctl	-	network device ioctl
2496  *	@cmd: command to issue
2497  *	@arg: pointer to a struct ifreq in user space
2498  *
2499  *	Issue ioctl functions to devices. This is normally called by the
2500  *	user space syscall interfaces but can sometimes be useful for
2501  *	other purposes. The return value is the return from the syscall if
2502  *	positive or a negative errno code on error.
2503  */
2504 
2505 int dev_ioctl(unsigned int cmd, void __user *arg)
2506 {
2507 	struct ifreq ifr;
2508 	int ret;
2509 	char *colon;
2510 
2511 	/* One special case: SIOCGIFCONF takes ifconf argument
2512 	   and requires shared lock, because it sleeps writing
2513 	   to user space.
2514 	 */
2515 
2516 	if (cmd == SIOCGIFCONF) {
2517 		rtnl_shlock();
2518 		ret = dev_ifconf((char __user *) arg);
2519 		rtnl_shunlock();
2520 		return ret;
2521 	}
2522 	if (cmd == SIOCGIFNAME)
2523 		return dev_ifname((struct ifreq __user *)arg);
2524 
2525 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2526 		return -EFAULT;
2527 
2528 	ifr.ifr_name[IFNAMSIZ-1] = 0;
2529 
2530 	colon = strchr(ifr.ifr_name, ':');
2531 	if (colon)
2532 		*colon = 0;
2533 
2534 	/*
2535 	 *	See which interface the caller is talking about.
2536 	 */
2537 
2538 	switch (cmd) {
2539 		/*
2540 		 *	These ioctl calls:
2541 		 *	- can be done by all.
2542 		 *	- atomic and do not require locking.
2543 		 *	- return a value
2544 		 */
2545 		case SIOCGIFFLAGS:
2546 		case SIOCGIFMETRIC:
2547 		case SIOCGIFMTU:
2548 		case SIOCGIFHWADDR:
2549 		case SIOCGIFSLAVE:
2550 		case SIOCGIFMAP:
2551 		case SIOCGIFINDEX:
2552 		case SIOCGIFTXQLEN:
2553 			dev_load(ifr.ifr_name);
2554 			read_lock(&dev_base_lock);
2555 			ret = dev_ifsioc(&ifr, cmd);
2556 			read_unlock(&dev_base_lock);
2557 			if (!ret) {
2558 				if (colon)
2559 					*colon = ':';
2560 				if (copy_to_user(arg, &ifr,
2561 						 sizeof(struct ifreq)))
2562 					ret = -EFAULT;
2563 			}
2564 			return ret;
2565 
2566 		case SIOCETHTOOL:
2567 			dev_load(ifr.ifr_name);
2568 			rtnl_lock();
2569 			ret = dev_ethtool(&ifr);
2570 			rtnl_unlock();
2571 			if (!ret) {
2572 				if (colon)
2573 					*colon = ':';
2574 				if (copy_to_user(arg, &ifr,
2575 						 sizeof(struct ifreq)))
2576 					ret = -EFAULT;
2577 			}
2578 			return ret;
2579 
2580 		/*
2581 		 *	These ioctl calls:
2582 		 *	- require superuser power.
2583 		 *	- require strict serialization.
2584 		 *	- return a value
2585 		 */
2586 		case SIOCGMIIPHY:
2587 		case SIOCGMIIREG:
2588 		case SIOCSIFNAME:
2589 			if (!capable(CAP_NET_ADMIN))
2590 				return -EPERM;
2591 			dev_load(ifr.ifr_name);
2592 			rtnl_lock();
2593 			ret = dev_ifsioc(&ifr, cmd);
2594 			rtnl_unlock();
2595 			if (!ret) {
2596 				if (colon)
2597 					*colon = ':';
2598 				if (copy_to_user(arg, &ifr,
2599 						 sizeof(struct ifreq)))
2600 					ret = -EFAULT;
2601 			}
2602 			return ret;
2603 
2604 		/*
2605 		 *	These ioctl calls:
2606 		 *	- require superuser power.
2607 		 *	- require strict serialization.
2608 		 *	- do not return a value
2609 		 */
2610 		case SIOCSIFFLAGS:
2611 		case SIOCSIFMETRIC:
2612 		case SIOCSIFMTU:
2613 		case SIOCSIFMAP:
2614 		case SIOCSIFHWADDR:
2615 		case SIOCSIFSLAVE:
2616 		case SIOCADDMULTI:
2617 		case SIOCDELMULTI:
2618 		case SIOCSIFHWBROADCAST:
2619 		case SIOCSIFTXQLEN:
2620 		case SIOCSMIIREG:
2621 		case SIOCBONDENSLAVE:
2622 		case SIOCBONDRELEASE:
2623 		case SIOCBONDSETHWADDR:
2624 		case SIOCBONDSLAVEINFOQUERY:
2625 		case SIOCBONDINFOQUERY:
2626 		case SIOCBONDCHANGEACTIVE:
2627 		case SIOCBRADDIF:
2628 		case SIOCBRDELIF:
2629 			if (!capable(CAP_NET_ADMIN))
2630 				return -EPERM;
2631 			dev_load(ifr.ifr_name);
2632 			rtnl_lock();
2633 			ret = dev_ifsioc(&ifr, cmd);
2634 			rtnl_unlock();
2635 			return ret;
2636 
2637 		case SIOCGIFMEM:
2638 			/* Get the per device memory space. We can add this but
2639 			 * currently do not support it */
2640 		case SIOCSIFMEM:
2641 			/* Set the per device memory buffer space.
2642 			 * Not applicable in our case */
2643 		case SIOCSIFLINK:
2644 			return -EINVAL;
2645 
2646 		/*
2647 		 *	Unknown or private ioctl.
2648 		 */
2649 		default:
2650 			if (cmd == SIOCWANDEV ||
2651 			    (cmd >= SIOCDEVPRIVATE &&
2652 			     cmd <= SIOCDEVPRIVATE + 15)) {
2653 				dev_load(ifr.ifr_name);
2654 				rtnl_lock();
2655 				ret = dev_ifsioc(&ifr, cmd);
2656 				rtnl_unlock();
2657 				if (!ret && copy_to_user(arg, &ifr,
2658 							 sizeof(struct ifreq)))
2659 					ret = -EFAULT;
2660 				return ret;
2661 			}
2662 #ifdef WIRELESS_EXT
2663 			/* Take care of Wireless Extensions */
2664 			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
2665 				/* If command is `set a parameter', or
2666 				 * `get the encoding parameters', check if
2667 				 * the user has the right to do it */
2668 				if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE) {
2669 					if (!capable(CAP_NET_ADMIN))
2670 						return -EPERM;
2671 				}
2672 				dev_load(ifr.ifr_name);
2673 				rtnl_lock();
2674 				/* Follow me in net/core/wireless.c */
2675 				ret = wireless_process_ioctl(&ifr, cmd);
2676 				rtnl_unlock();
2677 				if (IW_IS_GET(cmd) &&
2678 				    copy_to_user(arg, &ifr,
2679 					    	 sizeof(struct ifreq)))
2680 					ret = -EFAULT;
2681 				return ret;
2682 			}
2683 #endif	/* WIRELESS_EXT */
2684 			return -EINVAL;
2685 	}
2686 }
2687 
2688 
2689 /**
2690  *	dev_new_index	-	allocate an ifindex
2691  *
2692  *	Returns a suitable unique value for a new device interface
2693  *	number.  The caller must hold the rtnl semaphore or the
2694  *	dev_base_lock to be sure it remains unique.
2695  */
2696 static int dev_new_index(void)
2697 {
2698 	static int ifindex;
2699 	for (;;) {
2700 		if (++ifindex <= 0)
2701 			ifindex = 1;
2702 		if (!__dev_get_by_index(ifindex))
2703 			return ifindex;
2704 	}
2705 }
2706 
2707 static int dev_boot_phase = 1;
2708 
2709 /* Delayed registration/unregisteration */
2710 static DEFINE_SPINLOCK(net_todo_list_lock);
2711 static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
2712 
2713 static inline void net_set_todo(struct net_device *dev)
2714 {
2715 	spin_lock(&net_todo_list_lock);
2716 	list_add_tail(&dev->todo_list, &net_todo_list);
2717 	spin_unlock(&net_todo_list_lock);
2718 }
2719 
2720 /**
2721  *	register_netdevice	- register a network device
2722  *	@dev: device to register
2723  *
2724  *	Take a completed network device structure and add it to the kernel
2725  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2726  *	chain. 0 is returned on success. A negative errno code is returned
2727  *	on a failure to set up the device, or if the name is a duplicate.
2728  *
2729  *	Callers must hold the rtnl semaphore. You may want
2730  *	register_netdev() instead of this.
2731  *
2732  *	BUGS:
2733  *	The locking appears insufficient to guarantee two parallel registers
2734  *	will not get the same name.
2735  */
2736 
2737 int register_netdevice(struct net_device *dev)
2738 {
2739 	struct hlist_head *head;
2740 	struct hlist_node *p;
2741 	int ret;
2742 
2743 	BUG_ON(dev_boot_phase);
2744 	ASSERT_RTNL();
2745 
2746 	/* When net_device's are persistent, this will be fatal. */
2747 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
2748 
2749 	spin_lock_init(&dev->queue_lock);
2750 	spin_lock_init(&dev->xmit_lock);
2751 	dev->xmit_lock_owner = -1;
2752 #ifdef CONFIG_NET_CLS_ACT
2753 	spin_lock_init(&dev->ingress_lock);
2754 #endif
2755 
2756 	ret = alloc_divert_blk(dev);
2757 	if (ret)
2758 		goto out;
2759 
2760 	dev->iflink = -1;
2761 
2762 	/* Init, if this function is available */
2763 	if (dev->init) {
2764 		ret = dev->init(dev);
2765 		if (ret) {
2766 			if (ret > 0)
2767 				ret = -EIO;
2768 			goto out_err;
2769 		}
2770 	}
2771 
2772 	if (!dev_valid_name(dev->name)) {
2773 		ret = -EINVAL;
2774 		goto out_err;
2775 	}
2776 
2777 	dev->ifindex = dev_new_index();
2778 	if (dev->iflink == -1)
2779 		dev->iflink = dev->ifindex;
2780 
2781 	/* Check for existence of name */
2782 	head = dev_name_hash(dev->name);
2783 	hlist_for_each(p, head) {
2784 		struct net_device *d
2785 			= hlist_entry(p, struct net_device, name_hlist);
2786 		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
2787 			ret = -EEXIST;
2788  			goto out_err;
2789 		}
2790  	}
2791 
2792 	/* Fix illegal SG+CSUM combinations. */
2793 	if ((dev->features & NETIF_F_SG) &&
2794 	    !(dev->features & (NETIF_F_IP_CSUM |
2795 			       NETIF_F_NO_CSUM |
2796 			       NETIF_F_HW_CSUM))) {
2797 		printk("%s: Dropping NETIF_F_SG since no checksum feature.\n",
2798 		       dev->name);
2799 		dev->features &= ~NETIF_F_SG;
2800 	}
2801 
2802 	/* TSO requires that SG is present as well. */
2803 	if ((dev->features & NETIF_F_TSO) &&
2804 	    !(dev->features & NETIF_F_SG)) {
2805 		printk("%s: Dropping NETIF_F_TSO since no SG feature.\n",
2806 		       dev->name);
2807 		dev->features &= ~NETIF_F_TSO;
2808 	}
2809 
2810 	/*
2811 	 *	nil rebuild_header routine,
2812 	 *	that should be never called and used as just bug trap.
2813 	 */
2814 
2815 	if (!dev->rebuild_header)
2816 		dev->rebuild_header = default_rebuild_header;
2817 
2818 	/*
2819 	 *	Default initial state at registry is that the
2820 	 *	device is present.
2821 	 */
2822 
2823 	set_bit(__LINK_STATE_PRESENT, &dev->state);
2824 
2825 	dev->next = NULL;
2826 	dev_init_scheduler(dev);
2827 	write_lock_bh(&dev_base_lock);
2828 	*dev_tail = dev;
2829 	dev_tail = &dev->next;
2830 	hlist_add_head(&dev->name_hlist, head);
2831 	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
2832 	dev_hold(dev);
2833 	dev->reg_state = NETREG_REGISTERING;
2834 	write_unlock_bh(&dev_base_lock);
2835 
2836 	/* Notify protocols, that a new device appeared. */
2837 	notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
2838 
2839 	/* Finish registration after unlock */
2840 	net_set_todo(dev);
2841 	ret = 0;
2842 
2843 out:
2844 	return ret;
2845 out_err:
2846 	free_divert_blk(dev);
2847 	goto out;
2848 }
2849 
2850 /**
2851  *	register_netdev	- register a network device
2852  *	@dev: device to register
2853  *
2854  *	Take a completed network device structure and add it to the kernel
2855  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2856  *	chain. 0 is returned on success. A negative errno code is returned
2857  *	on a failure to set up the device, or if the name is a duplicate.
2858  *
2859  *	This is a wrapper around register_netdev that takes the rtnl semaphore
2860  *	and expands the device name if you passed a format string to
2861  *	alloc_netdev.
2862  */
2863 int register_netdev(struct net_device *dev)
2864 {
2865 	int err;
2866 
2867 	rtnl_lock();
2868 
2869 	/*
2870 	 * If the name is a format string the caller wants us to do a
2871 	 * name allocation.
2872 	 */
2873 	if (strchr(dev->name, '%')) {
2874 		err = dev_alloc_name(dev, dev->name);
2875 		if (err < 0)
2876 			goto out;
2877 	}
2878 
2879 	/*
2880 	 * Back compatibility hook. Kill this one in 2.5
2881 	 */
2882 	if (dev->name[0] == 0 || dev->name[0] == ' ') {
2883 		err = dev_alloc_name(dev, "eth%d");
2884 		if (err < 0)
2885 			goto out;
2886 	}
2887 
2888 	err = register_netdevice(dev);
2889 out:
2890 	rtnl_unlock();
2891 	return err;
2892 }
2893 EXPORT_SYMBOL(register_netdev);
2894 
2895 /*
2896  * netdev_wait_allrefs - wait until all references are gone.
2897  *
2898  * This is called when unregistering network devices.
2899  *
2900  * Any protocol or device that holds a reference should register
2901  * for netdevice notification, and cleanup and put back the
2902  * reference if they receive an UNREGISTER event.
2903  * We can get stuck here if buggy protocols don't correctly
2904  * call dev_put.
2905  */
2906 static void netdev_wait_allrefs(struct net_device *dev)
2907 {
2908 	unsigned long rebroadcast_time, warning_time;
2909 
2910 	rebroadcast_time = warning_time = jiffies;
2911 	while (atomic_read(&dev->refcnt) != 0) {
2912 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
2913 			rtnl_shlock();
2914 
2915 			/* Rebroadcast unregister notification */
2916 			notifier_call_chain(&netdev_chain,
2917 					    NETDEV_UNREGISTER, dev);
2918 
2919 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
2920 				     &dev->state)) {
2921 				/* We must not have linkwatch events
2922 				 * pending on unregister. If this
2923 				 * happens, we simply run the queue
2924 				 * unscheduled, resulting in a noop
2925 				 * for this device.
2926 				 */
2927 				linkwatch_run_queue();
2928 			}
2929 
2930 			rtnl_shunlock();
2931 
2932 			rebroadcast_time = jiffies;
2933 		}
2934 
2935 		msleep(250);
2936 
2937 		if (time_after(jiffies, warning_time + 10 * HZ)) {
2938 			printk(KERN_EMERG "unregister_netdevice: "
2939 			       "waiting for %s to become free. Usage "
2940 			       "count = %d\n",
2941 			       dev->name, atomic_read(&dev->refcnt));
2942 			warning_time = jiffies;
2943 		}
2944 	}
2945 }
2946 
2947 /* The sequence is:
2948  *
2949  *	rtnl_lock();
2950  *	...
2951  *	register_netdevice(x1);
2952  *	register_netdevice(x2);
2953  *	...
2954  *	unregister_netdevice(y1);
2955  *	unregister_netdevice(y2);
2956  *      ...
2957  *	rtnl_unlock();
2958  *	free_netdev(y1);
2959  *	free_netdev(y2);
2960  *
2961  * We are invoked by rtnl_unlock() after it drops the semaphore.
2962  * This allows us to deal with problems:
2963  * 1) We can create/delete sysfs objects which invoke hotplug
2964  *    without deadlocking with linkwatch via keventd.
2965  * 2) Since we run with the RTNL semaphore not held, we can sleep
2966  *    safely in order to wait for the netdev refcnt to drop to zero.
2967  */
2968 static DECLARE_MUTEX(net_todo_run_mutex);
2969 void netdev_run_todo(void)
2970 {
2971 	struct list_head list = LIST_HEAD_INIT(list);
2972 	int err;
2973 
2974 
2975 	/* Need to guard against multiple cpu's getting out of order. */
2976 	down(&net_todo_run_mutex);
2977 
2978 	/* Not safe to do outside the semaphore.  We must not return
2979 	 * until all unregister events invoked by the local processor
2980 	 * have been completed (either by this todo run, or one on
2981 	 * another cpu).
2982 	 */
2983 	if (list_empty(&net_todo_list))
2984 		goto out;
2985 
2986 	/* Snapshot list, allow later requests */
2987 	spin_lock(&net_todo_list_lock);
2988 	list_splice_init(&net_todo_list, &list);
2989 	spin_unlock(&net_todo_list_lock);
2990 
2991 	while (!list_empty(&list)) {
2992 		struct net_device *dev
2993 			= list_entry(list.next, struct net_device, todo_list);
2994 		list_del(&dev->todo_list);
2995 
2996 		switch(dev->reg_state) {
2997 		case NETREG_REGISTERING:
2998 			err = netdev_register_sysfs(dev);
2999 			if (err)
3000 				printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
3001 				       dev->name, err);
3002 			dev->reg_state = NETREG_REGISTERED;
3003 			break;
3004 
3005 		case NETREG_UNREGISTERING:
3006 			netdev_unregister_sysfs(dev);
3007 			dev->reg_state = NETREG_UNREGISTERED;
3008 
3009 			netdev_wait_allrefs(dev);
3010 
3011 			/* paranoia */
3012 			BUG_ON(atomic_read(&dev->refcnt));
3013 			BUG_TRAP(!dev->ip_ptr);
3014 			BUG_TRAP(!dev->ip6_ptr);
3015 			BUG_TRAP(!dev->dn_ptr);
3016 
3017 
3018 			/* It must be the very last action,
3019 			 * after this 'dev' may point to freed up memory.
3020 			 */
3021 			if (dev->destructor)
3022 				dev->destructor(dev);
3023 			break;
3024 
3025 		default:
3026 			printk(KERN_ERR "network todo '%s' but state %d\n",
3027 			       dev->name, dev->reg_state);
3028 			break;
3029 		}
3030 	}
3031 
3032 out:
3033 	up(&net_todo_run_mutex);
3034 }
3035 
3036 /**
3037  *	alloc_netdev - allocate network device
3038  *	@sizeof_priv:	size of private data to allocate space for
3039  *	@name:		device name format string
3040  *	@setup:		callback to initialize device
3041  *
3042  *	Allocates a struct net_device with private data area for driver use
3043  *	and performs basic initialization.
3044  */
3045 struct net_device *alloc_netdev(int sizeof_priv, const char *name,
3046 		void (*setup)(struct net_device *))
3047 {
3048 	void *p;
3049 	struct net_device *dev;
3050 	int alloc_size;
3051 
3052 	/* ensure 32-byte alignment of both the device and private area */
3053 	alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
3054 	alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
3055 
3056 	p = kmalloc(alloc_size, GFP_KERNEL);
3057 	if (!p) {
3058 		printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
3059 		return NULL;
3060 	}
3061 	memset(p, 0, alloc_size);
3062 
3063 	dev = (struct net_device *)
3064 		(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
3065 	dev->padded = (char *)dev - (char *)p;
3066 
3067 	if (sizeof_priv)
3068 		dev->priv = netdev_priv(dev);
3069 
3070 	setup(dev);
3071 	strcpy(dev->name, name);
3072 	return dev;
3073 }
3074 EXPORT_SYMBOL(alloc_netdev);
3075 
3076 /**
3077  *	free_netdev - free network device
3078  *	@dev: device
3079  *
3080  *	This function does the last stage of destroying an allocated device
3081  * 	interface. The reference to the device object is released.
3082  *	If this is the last reference then it will be freed.
3083  */
3084 void free_netdev(struct net_device *dev)
3085 {
3086 #ifdef CONFIG_SYSFS
3087 	/*  Compatiablity with error handling in drivers */
3088 	if (dev->reg_state == NETREG_UNINITIALIZED) {
3089 		kfree((char *)dev - dev->padded);
3090 		return;
3091 	}
3092 
3093 	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
3094 	dev->reg_state = NETREG_RELEASED;
3095 
3096 	/* will free via class release */
3097 	class_device_put(&dev->class_dev);
3098 #else
3099 	kfree((char *)dev - dev->padded);
3100 #endif
3101 }
3102 
3103 /* Synchronize with packet receive processing. */
3104 void synchronize_net(void)
3105 {
3106 	might_sleep();
3107 	synchronize_rcu();
3108 }
3109 
3110 /**
3111  *	unregister_netdevice - remove device from the kernel
3112  *	@dev: device
3113  *
3114  *	This function shuts down a device interface and removes it
3115  *	from the kernel tables. On success 0 is returned, on a failure
3116  *	a negative errno code is returned.
3117  *
3118  *	Callers must hold the rtnl semaphore.  You may want
3119  *	unregister_netdev() instead of this.
3120  */
3121 
3122 int unregister_netdevice(struct net_device *dev)
3123 {
3124 	struct net_device *d, **dp;
3125 
3126 	BUG_ON(dev_boot_phase);
3127 	ASSERT_RTNL();
3128 
3129 	/* Some devices call without registering for initialization unwind. */
3130 	if (dev->reg_state == NETREG_UNINITIALIZED) {
3131 		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3132 				  "was registered\n", dev->name, dev);
3133 		return -ENODEV;
3134 	}
3135 
3136 	BUG_ON(dev->reg_state != NETREG_REGISTERED);
3137 
3138 	/* If device is running, close it first. */
3139 	if (dev->flags & IFF_UP)
3140 		dev_close(dev);
3141 
3142 	/* And unlink it from device chain. */
3143 	for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) {
3144 		if (d == dev) {
3145 			write_lock_bh(&dev_base_lock);
3146 			hlist_del(&dev->name_hlist);
3147 			hlist_del(&dev->index_hlist);
3148 			if (dev_tail == &dev->next)
3149 				dev_tail = dp;
3150 			*dp = d->next;
3151 			write_unlock_bh(&dev_base_lock);
3152 			break;
3153 		}
3154 	}
3155 	if (!d) {
3156 		printk(KERN_ERR "unregister net_device: '%s' not found\n",
3157 		       dev->name);
3158 		return -ENODEV;
3159 	}
3160 
3161 	dev->reg_state = NETREG_UNREGISTERING;
3162 
3163 	synchronize_net();
3164 
3165 	/* Shutdown queueing discipline. */
3166 	dev_shutdown(dev);
3167 
3168 
3169 	/* Notify protocols, that we are about to destroy
3170 	   this device. They should clean all the things.
3171 	*/
3172 	notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
3173 
3174 	/*
3175 	 *	Flush the multicast chain
3176 	 */
3177 	dev_mc_discard(dev);
3178 
3179 	if (dev->uninit)
3180 		dev->uninit(dev);
3181 
3182 	/* Notifier chain MUST detach us from master device. */
3183 	BUG_TRAP(!dev->master);
3184 
3185 	free_divert_blk(dev);
3186 
3187 	/* Finish processing unregister after unlock */
3188 	net_set_todo(dev);
3189 
3190 	synchronize_net();
3191 
3192 	dev_put(dev);
3193 	return 0;
3194 }
3195 
3196 /**
3197  *	unregister_netdev - remove device from the kernel
3198  *	@dev: device
3199  *
3200  *	This function shuts down a device interface and removes it
3201  *	from the kernel tables. On success 0 is returned, on a failure
3202  *	a negative errno code is returned.
3203  *
3204  *	This is just a wrapper for unregister_netdevice that takes
3205  *	the rtnl semaphore.  In general you want to use this and not
3206  *	unregister_netdevice.
3207  */
3208 void unregister_netdev(struct net_device *dev)
3209 {
3210 	rtnl_lock();
3211 	unregister_netdevice(dev);
3212 	rtnl_unlock();
3213 }
3214 
3215 EXPORT_SYMBOL(unregister_netdev);
3216 
3217 #ifdef CONFIG_HOTPLUG_CPU
3218 static int dev_cpu_callback(struct notifier_block *nfb,
3219 			    unsigned long action,
3220 			    void *ocpu)
3221 {
3222 	struct sk_buff **list_skb;
3223 	struct net_device **list_net;
3224 	struct sk_buff *skb;
3225 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
3226 	struct softnet_data *sd, *oldsd;
3227 
3228 	if (action != CPU_DEAD)
3229 		return NOTIFY_OK;
3230 
3231 	local_irq_disable();
3232 	cpu = smp_processor_id();
3233 	sd = &per_cpu(softnet_data, cpu);
3234 	oldsd = &per_cpu(softnet_data, oldcpu);
3235 
3236 	/* Find end of our completion_queue. */
3237 	list_skb = &sd->completion_queue;
3238 	while (*list_skb)
3239 		list_skb = &(*list_skb)->next;
3240 	/* Append completion queue from offline CPU. */
3241 	*list_skb = oldsd->completion_queue;
3242 	oldsd->completion_queue = NULL;
3243 
3244 	/* Find end of our output_queue. */
3245 	list_net = &sd->output_queue;
3246 	while (*list_net)
3247 		list_net = &(*list_net)->next_sched;
3248 	/* Append output queue from offline CPU. */
3249 	*list_net = oldsd->output_queue;
3250 	oldsd->output_queue = NULL;
3251 
3252 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
3253 	local_irq_enable();
3254 
3255 	/* Process offline CPU's input_pkt_queue */
3256 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
3257 		netif_rx(skb);
3258 
3259 	return NOTIFY_OK;
3260 }
3261 #endif /* CONFIG_HOTPLUG_CPU */
3262 
3263 
3264 /*
3265  *	Initialize the DEV module. At boot time this walks the device list and
3266  *	unhooks any devices that fail to initialise (normally hardware not
3267  *	present) and leaves us with a valid list of present and active devices.
3268  *
3269  */
3270 
3271 /*
3272  *       This is called single threaded during boot, so no need
3273  *       to take the rtnl semaphore.
3274  */
3275 static int __init net_dev_init(void)
3276 {
3277 	int i, rc = -ENOMEM;
3278 
3279 	BUG_ON(!dev_boot_phase);
3280 
3281 	net_random_init();
3282 
3283 	if (dev_proc_init())
3284 		goto out;
3285 
3286 	if (netdev_sysfs_init())
3287 		goto out;
3288 
3289 	INIT_LIST_HEAD(&ptype_all);
3290 	for (i = 0; i < 16; i++)
3291 		INIT_LIST_HEAD(&ptype_base[i]);
3292 
3293 	for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
3294 		INIT_HLIST_HEAD(&dev_name_head[i]);
3295 
3296 	for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
3297 		INIT_HLIST_HEAD(&dev_index_head[i]);
3298 
3299 	/*
3300 	 *	Initialise the packet receive queues.
3301 	 */
3302 
3303 	for (i = 0; i < NR_CPUS; i++) {
3304 		struct softnet_data *queue;
3305 
3306 		queue = &per_cpu(softnet_data, i);
3307 		skb_queue_head_init(&queue->input_pkt_queue);
3308 		queue->throttle = 0;
3309 		queue->cng_level = 0;
3310 		queue->avg_blog = 10; /* arbitrary non-zero */
3311 		queue->completion_queue = NULL;
3312 		INIT_LIST_HEAD(&queue->poll_list);
3313 		set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
3314 		queue->backlog_dev.weight = weight_p;
3315 		queue->backlog_dev.poll = process_backlog;
3316 		atomic_set(&queue->backlog_dev.refcnt, 1);
3317 	}
3318 
3319 #ifdef OFFLINE_SAMPLE
3320 	samp_timer.expires = jiffies + (10 * HZ);
3321 	add_timer(&samp_timer);
3322 #endif
3323 
3324 	dev_boot_phase = 0;
3325 
3326 	open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
3327 	open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
3328 
3329 	hotcpu_notifier(dev_cpu_callback, 0);
3330 	dst_init();
3331 	dev_mcast_init();
3332 	rc = 0;
3333 out:
3334 	return rc;
3335 }
3336 
3337 subsys_initcall(net_dev_init);
3338 
3339 EXPORT_SYMBOL(__dev_get_by_index);
3340 EXPORT_SYMBOL(__dev_get_by_name);
3341 EXPORT_SYMBOL(__dev_remove_pack);
3342 EXPORT_SYMBOL(__skb_linearize);
3343 EXPORT_SYMBOL(dev_add_pack);
3344 EXPORT_SYMBOL(dev_alloc_name);
3345 EXPORT_SYMBOL(dev_close);
3346 EXPORT_SYMBOL(dev_get_by_flags);
3347 EXPORT_SYMBOL(dev_get_by_index);
3348 EXPORT_SYMBOL(dev_get_by_name);
3349 EXPORT_SYMBOL(dev_ioctl);
3350 EXPORT_SYMBOL(dev_open);
3351 EXPORT_SYMBOL(dev_queue_xmit);
3352 EXPORT_SYMBOL(dev_remove_pack);
3353 EXPORT_SYMBOL(dev_set_allmulti);
3354 EXPORT_SYMBOL(dev_set_promiscuity);
3355 EXPORT_SYMBOL(dev_change_flags);
3356 EXPORT_SYMBOL(dev_set_mtu);
3357 EXPORT_SYMBOL(dev_set_mac_address);
3358 EXPORT_SYMBOL(free_netdev);
3359 EXPORT_SYMBOL(netdev_boot_setup_check);
3360 EXPORT_SYMBOL(netdev_set_master);
3361 EXPORT_SYMBOL(netdev_state_change);
3362 EXPORT_SYMBOL(netif_receive_skb);
3363 EXPORT_SYMBOL(netif_rx);
3364 EXPORT_SYMBOL(register_gifconf);
3365 EXPORT_SYMBOL(register_netdevice);
3366 EXPORT_SYMBOL(register_netdevice_notifier);
3367 EXPORT_SYMBOL(skb_checksum_help);
3368 EXPORT_SYMBOL(synchronize_net);
3369 EXPORT_SYMBOL(unregister_netdevice);
3370 EXPORT_SYMBOL(unregister_netdevice_notifier);
3371 EXPORT_SYMBOL(net_enable_timestamp);
3372 EXPORT_SYMBOL(net_disable_timestamp);
3373 EXPORT_SYMBOL(dev_get_flags);
3374 
3375 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
3376 EXPORT_SYMBOL(br_handle_frame_hook);
3377 EXPORT_SYMBOL(br_fdb_get_hook);
3378 EXPORT_SYMBOL(br_fdb_put_hook);
3379 #endif
3380 
3381 #ifdef CONFIG_KMOD
3382 EXPORT_SYMBOL(dev_load);
3383 #endif
3384 
3385 EXPORT_PER_CPU_SYMBOL(softnet_data);
3386