xref: /linux/net/core/dev.c (revision e7d163f7666560c90b163907b9d96ec6207e0f6f)
1 /*
2  * 	NET3	Protocol independent device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the non IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Florian la Roche <rzsfl@rz.uni-sb.de>
16  *		Alan Cox <gw4pts@gw4pts.ampr.org>
17  *		David Hinds <dahinds@users.sourceforge.net>
18  *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19  *		Adam Sulmicki <adam@cfar.umd.edu>
20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
21  *
22  *	Changes:
23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
24  *              			to 2 if register_netdev gets called
25  *              			before net_dev_init & also removed a
26  *              			few lines of code in the process.
27  *		Alan Cox	:	device private ioctl copies fields back.
28  *		Alan Cox	:	Transmit queue code does relevant
29  *					stunts to keep the queue safe.
30  *		Alan Cox	:	Fixed double lock.
31  *		Alan Cox	:	Fixed promisc NULL pointer trap
32  *		????????	:	Support the full private ioctl range
33  *		Alan Cox	:	Moved ioctl permission check into
34  *					drivers
35  *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
36  *		Alan Cox	:	100 backlog just doesn't cut it when
37  *					you start doing multicast video 8)
38  *		Alan Cox	:	Rewrote net_bh and list manager.
39  *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
40  *		Alan Cox	:	Took out transmit every packet pass
41  *					Saved a few bytes in the ioctl handler
42  *		Alan Cox	:	Network driver sets packet type before
43  *					calling netif_rx. Saves a function
44  *					call a packet.
45  *		Alan Cox	:	Hashed net_bh()
46  *		Richard Kooijman:	Timestamp fixes.
47  *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
48  *		Alan Cox	:	Device lock protection.
49  *		Alan Cox	: 	Fixed nasty side effect of device close
50  *					changes.
51  *		Rudi Cilibrasi	:	Pass the right thing to
52  *					set_mac_address()
53  *		Dave Miller	:	32bit quantity for the device lock to
54  *					make it work out on a Sparc.
55  *		Bjorn Ekwall	:	Added KERNELD hack.
56  *		Alan Cox	:	Cleaned up the backlog initialise.
57  *		Craig Metz	:	SIOCGIFCONF fix if space for under
58  *					1 device.
59  *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
60  *					is no device open function.
61  *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
62  *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
63  *		Cyrus Durgin	:	Cleaned for KMOD
64  *		Adam Sulmicki   :	Bug Fix : Network Device Unload
65  *					A network device unload needs to purge
66  *					the backlog queue.
67  *	Paul Rusty Russell	:	SIOCSIFNAME
68  *              Pekka Riikonen  :	Netdev boot-time settings code
69  *              Andrew Morton   :       Make unregister_netdevice wait
70  *              			indefinitely on dev->refcnt
71  * 		J Hadi Salim	:	- Backlog queue sampling
72  *				        - netif_rx() feedback
73  */
74 
75 #include <asm/uaccess.h>
76 #include <asm/system.h>
77 #include <linux/bitops.h>
78 #include <linux/config.h>
79 #include <linux/cpu.h>
80 #include <linux/types.h>
81 #include <linux/kernel.h>
82 #include <linux/sched.h>
83 #include <linux/string.h>
84 #include <linux/mm.h>
85 #include <linux/socket.h>
86 #include <linux/sockios.h>
87 #include <linux/errno.h>
88 #include <linux/interrupt.h>
89 #include <linux/if_ether.h>
90 #include <linux/netdevice.h>
91 #include <linux/etherdevice.h>
92 #include <linux/notifier.h>
93 #include <linux/skbuff.h>
94 #include <net/sock.h>
95 #include <linux/rtnetlink.h>
96 #include <linux/proc_fs.h>
97 #include <linux/seq_file.h>
98 #include <linux/stat.h>
99 #include <linux/if_bridge.h>
100 #include <linux/divert.h>
101 #include <net/dst.h>
102 #include <net/pkt_sched.h>
103 #include <net/checksum.h>
104 #include <linux/highmem.h>
105 #include <linux/init.h>
106 #include <linux/kmod.h>
107 #include <linux/module.h>
108 #include <linux/kallsyms.h>
109 #include <linux/netpoll.h>
110 #include <linux/rcupdate.h>
111 #include <linux/delay.h>
112 #ifdef CONFIG_NET_RADIO
113 #include <linux/wireless.h>		/* Note : will define WIRELESS_EXT */
114 #include <net/iw_handler.h>
115 #endif	/* CONFIG_NET_RADIO */
116 #include <asm/current.h>
117 
118 /*
119  *	The list of packet types we will receive (as opposed to discard)
120  *	and the routines to invoke.
121  *
122  *	Why 16. Because with 16 the only overlap we get on a hash of the
123  *	low nibble of the protocol value is RARP/SNAP/X.25.
124  *
125  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
126  *             sure which should go first, but I bet it won't make much
127  *             difference if we are running VLANs.  The good news is that
128  *             this protocol won't be in the list unless compiled in, so
129  *             the average user (w/out VLANs) will not be adversly affected.
130  *             --BLG
131  *
132  *		0800	IP
133  *		8100    802.1Q VLAN
134  *		0001	802.3
135  *		0002	AX.25
136  *		0004	802.2
137  *		8035	RARP
138  *		0005	SNAP
139  *		0805	X.25
140  *		0806	ARP
141  *		8137	IPX
142  *		0009	Localtalk
143  *		86DD	IPv6
144  */
145 
146 static DEFINE_SPINLOCK(ptype_lock);
147 static struct list_head ptype_base[16];	/* 16 way hashed list */
148 static struct list_head ptype_all;		/* Taps */
149 
150 /*
151  * The @dev_base list is protected by @dev_base_lock and the rtln
152  * semaphore.
153  *
154  * Pure readers hold dev_base_lock for reading.
155  *
156  * Writers must hold the rtnl semaphore while they loop through the
157  * dev_base list, and hold dev_base_lock for writing when they do the
158  * actual updates.  This allows pure readers to access the list even
159  * while a writer is preparing to update it.
160  *
161  * To put it another way, dev_base_lock is held for writing only to
162  * protect against pure readers; the rtnl semaphore provides the
163  * protection against other writers.
164  *
165  * See, for example usages, register_netdevice() and
166  * unregister_netdevice(), which must be called with the rtnl
167  * semaphore held.
168  */
169 struct net_device *dev_base;
170 static struct net_device **dev_tail = &dev_base;
171 DEFINE_RWLOCK(dev_base_lock);
172 
173 EXPORT_SYMBOL(dev_base);
174 EXPORT_SYMBOL(dev_base_lock);
175 
176 #define NETDEV_HASHBITS	8
177 static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
178 static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
179 
180 static inline struct hlist_head *dev_name_hash(const char *name)
181 {
182 	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
183 	return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
184 }
185 
186 static inline struct hlist_head *dev_index_hash(int ifindex)
187 {
188 	return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
189 }
190 
191 /*
192  *	Our notifier list
193  */
194 
195 static struct notifier_block *netdev_chain;
196 
197 /*
198  *	Device drivers call our routines to queue packets here. We empty the
199  *	queue in the local softnet handler.
200  */
201 DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL };
202 
203 #ifdef CONFIG_SYSFS
204 extern int netdev_sysfs_init(void);
205 extern int netdev_register_sysfs(struct net_device *);
206 extern void netdev_unregister_sysfs(struct net_device *);
207 #else
208 #define netdev_sysfs_init()	 	(0)
209 #define netdev_register_sysfs(dev)	(0)
210 #define	netdev_unregister_sysfs(dev)	do { } while(0)
211 #endif
212 
213 
214 /*******************************************************************************
215 
216 		Protocol management and registration routines
217 
218 *******************************************************************************/
219 
220 /*
221  *	For efficiency
222  */
223 
224 int netdev_nit;
225 
226 /*
227  *	Add a protocol ID to the list. Now that the input handler is
228  *	smarter we can dispense with all the messy stuff that used to be
229  *	here.
230  *
231  *	BEWARE!!! Protocol handlers, mangling input packets,
232  *	MUST BE last in hash buckets and checking protocol handlers
233  *	MUST start from promiscuous ptype_all chain in net_bh.
234  *	It is true now, do not change it.
235  *	Explanation follows: if protocol handler, mangling packet, will
236  *	be the first on list, it is not able to sense, that packet
237  *	is cloned and should be copied-on-write, so that it will
238  *	change it and subsequent readers will get broken packet.
239  *							--ANK (980803)
240  */
241 
242 /**
243  *	dev_add_pack - add packet handler
244  *	@pt: packet type declaration
245  *
246  *	Add a protocol handler to the networking stack. The passed &packet_type
247  *	is linked into kernel lists and may not be freed until it has been
248  *	removed from the kernel lists.
249  *
250  *	This call does not sleep therefore it can not
251  *	guarantee all CPU's that are in middle of receiving packets
252  *	will see the new packet type (until the next received packet).
253  */
254 
255 void dev_add_pack(struct packet_type *pt)
256 {
257 	int hash;
258 
259 	spin_lock_bh(&ptype_lock);
260 	if (pt->type == htons(ETH_P_ALL)) {
261 		netdev_nit++;
262 		list_add_rcu(&pt->list, &ptype_all);
263 	} else {
264 		hash = ntohs(pt->type) & 15;
265 		list_add_rcu(&pt->list, &ptype_base[hash]);
266 	}
267 	spin_unlock_bh(&ptype_lock);
268 }
269 
270 extern void linkwatch_run_queue(void);
271 
272 
273 
274 /**
275  *	__dev_remove_pack	 - remove packet handler
276  *	@pt: packet type declaration
277  *
278  *	Remove a protocol handler that was previously added to the kernel
279  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
280  *	from the kernel lists and can be freed or reused once this function
281  *	returns.
282  *
283  *      The packet type might still be in use by receivers
284  *	and must not be freed until after all the CPU's have gone
285  *	through a quiescent state.
286  */
287 void __dev_remove_pack(struct packet_type *pt)
288 {
289 	struct list_head *head;
290 	struct packet_type *pt1;
291 
292 	spin_lock_bh(&ptype_lock);
293 
294 	if (pt->type == htons(ETH_P_ALL)) {
295 		netdev_nit--;
296 		head = &ptype_all;
297 	} else
298 		head = &ptype_base[ntohs(pt->type) & 15];
299 
300 	list_for_each_entry(pt1, head, list) {
301 		if (pt == pt1) {
302 			list_del_rcu(&pt->list);
303 			goto out;
304 		}
305 	}
306 
307 	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
308 out:
309 	spin_unlock_bh(&ptype_lock);
310 }
311 /**
312  *	dev_remove_pack	 - remove packet handler
313  *	@pt: packet type declaration
314  *
315  *	Remove a protocol handler that was previously added to the kernel
316  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
317  *	from the kernel lists and can be freed or reused once this function
318  *	returns.
319  *
320  *	This call sleeps to guarantee that no CPU is looking at the packet
321  *	type after return.
322  */
323 void dev_remove_pack(struct packet_type *pt)
324 {
325 	__dev_remove_pack(pt);
326 
327 	synchronize_net();
328 }
329 
330 /******************************************************************************
331 
332 		      Device Boot-time Settings Routines
333 
334 *******************************************************************************/
335 
336 /* Boot time configuration table */
337 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
338 
339 /**
340  *	netdev_boot_setup_add	- add new setup entry
341  *	@name: name of the device
342  *	@map: configured settings for the device
343  *
344  *	Adds new setup entry to the dev_boot_setup list.  The function
345  *	returns 0 on error and 1 on success.  This is a generic routine to
346  *	all netdevices.
347  */
348 static int netdev_boot_setup_add(char *name, struct ifmap *map)
349 {
350 	struct netdev_boot_setup *s;
351 	int i;
352 
353 	s = dev_boot_setup;
354 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
355 		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
356 			memset(s[i].name, 0, sizeof(s[i].name));
357 			strcpy(s[i].name, name);
358 			memcpy(&s[i].map, map, sizeof(s[i].map));
359 			break;
360 		}
361 	}
362 
363 	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
364 }
365 
366 /**
367  *	netdev_boot_setup_check	- check boot time settings
368  *	@dev: the netdevice
369  *
370  * 	Check boot time settings for the device.
371  *	The found settings are set for the device to be used
372  *	later in the device probing.
373  *	Returns 0 if no settings found, 1 if they are.
374  */
375 int netdev_boot_setup_check(struct net_device *dev)
376 {
377 	struct netdev_boot_setup *s = dev_boot_setup;
378 	int i;
379 
380 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
381 		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
382 		    !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
383 			dev->irq 	= s[i].map.irq;
384 			dev->base_addr 	= s[i].map.base_addr;
385 			dev->mem_start 	= s[i].map.mem_start;
386 			dev->mem_end 	= s[i].map.mem_end;
387 			return 1;
388 		}
389 	}
390 	return 0;
391 }
392 
393 
394 /**
395  *	netdev_boot_base	- get address from boot time settings
396  *	@prefix: prefix for network device
397  *	@unit: id for network device
398  *
399  * 	Check boot time settings for the base address of device.
400  *	The found settings are set for the device to be used
401  *	later in the device probing.
402  *	Returns 0 if no settings found.
403  */
404 unsigned long netdev_boot_base(const char *prefix, int unit)
405 {
406 	const struct netdev_boot_setup *s = dev_boot_setup;
407 	char name[IFNAMSIZ];
408 	int i;
409 
410 	sprintf(name, "%s%d", prefix, unit);
411 
412 	/*
413 	 * If device already registered then return base of 1
414 	 * to indicate not to probe for this interface
415 	 */
416 	if (__dev_get_by_name(name))
417 		return 1;
418 
419 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
420 		if (!strcmp(name, s[i].name))
421 			return s[i].map.base_addr;
422 	return 0;
423 }
424 
425 /*
426  * Saves at boot time configured settings for any netdevice.
427  */
428 int __init netdev_boot_setup(char *str)
429 {
430 	int ints[5];
431 	struct ifmap map;
432 
433 	str = get_options(str, ARRAY_SIZE(ints), ints);
434 	if (!str || !*str)
435 		return 0;
436 
437 	/* Save settings */
438 	memset(&map, 0, sizeof(map));
439 	if (ints[0] > 0)
440 		map.irq = ints[1];
441 	if (ints[0] > 1)
442 		map.base_addr = ints[2];
443 	if (ints[0] > 2)
444 		map.mem_start = ints[3];
445 	if (ints[0] > 3)
446 		map.mem_end = ints[4];
447 
448 	/* Add new entry to the list */
449 	return netdev_boot_setup_add(str, &map);
450 }
451 
452 __setup("netdev=", netdev_boot_setup);
453 
454 /*******************************************************************************
455 
456 			    Device Interface Subroutines
457 
458 *******************************************************************************/
459 
460 /**
461  *	__dev_get_by_name	- find a device by its name
462  *	@name: name to find
463  *
464  *	Find an interface by name. Must be called under RTNL semaphore
465  *	or @dev_base_lock. If the name is found a pointer to the device
466  *	is returned. If the name is not found then %NULL is returned. The
467  *	reference counters are not incremented so the caller must be
468  *	careful with locks.
469  */
470 
471 struct net_device *__dev_get_by_name(const char *name)
472 {
473 	struct hlist_node *p;
474 
475 	hlist_for_each(p, dev_name_hash(name)) {
476 		struct net_device *dev
477 			= hlist_entry(p, struct net_device, name_hlist);
478 		if (!strncmp(dev->name, name, IFNAMSIZ))
479 			return dev;
480 	}
481 	return NULL;
482 }
483 
484 /**
485  *	dev_get_by_name		- find a device by its name
486  *	@name: name to find
487  *
488  *	Find an interface by name. This can be called from any
489  *	context and does its own locking. The returned handle has
490  *	the usage count incremented and the caller must use dev_put() to
491  *	release it when it is no longer needed. %NULL is returned if no
492  *	matching device is found.
493  */
494 
495 struct net_device *dev_get_by_name(const char *name)
496 {
497 	struct net_device *dev;
498 
499 	read_lock(&dev_base_lock);
500 	dev = __dev_get_by_name(name);
501 	if (dev)
502 		dev_hold(dev);
503 	read_unlock(&dev_base_lock);
504 	return dev;
505 }
506 
507 /**
508  *	__dev_get_by_index - find a device by its ifindex
509  *	@ifindex: index of device
510  *
511  *	Search for an interface by index. Returns %NULL if the device
512  *	is not found or a pointer to the device. The device has not
513  *	had its reference counter increased so the caller must be careful
514  *	about locking. The caller must hold either the RTNL semaphore
515  *	or @dev_base_lock.
516  */
517 
518 struct net_device *__dev_get_by_index(int ifindex)
519 {
520 	struct hlist_node *p;
521 
522 	hlist_for_each(p, dev_index_hash(ifindex)) {
523 		struct net_device *dev
524 			= hlist_entry(p, struct net_device, index_hlist);
525 		if (dev->ifindex == ifindex)
526 			return dev;
527 	}
528 	return NULL;
529 }
530 
531 
532 /**
533  *	dev_get_by_index - find a device by its ifindex
534  *	@ifindex: index of device
535  *
536  *	Search for an interface by index. Returns NULL if the device
537  *	is not found or a pointer to the device. The device returned has
538  *	had a reference added and the pointer is safe until the user calls
539  *	dev_put to indicate they have finished with it.
540  */
541 
542 struct net_device *dev_get_by_index(int ifindex)
543 {
544 	struct net_device *dev;
545 
546 	read_lock(&dev_base_lock);
547 	dev = __dev_get_by_index(ifindex);
548 	if (dev)
549 		dev_hold(dev);
550 	read_unlock(&dev_base_lock);
551 	return dev;
552 }
553 
554 /**
555  *	dev_getbyhwaddr - find a device by its hardware address
556  *	@type: media type of device
557  *	@ha: hardware address
558  *
559  *	Search for an interface by MAC address. Returns NULL if the device
560  *	is not found or a pointer to the device. The caller must hold the
561  *	rtnl semaphore. The returned device has not had its ref count increased
562  *	and the caller must therefore be careful about locking
563  *
564  *	BUGS:
565  *	If the API was consistent this would be __dev_get_by_hwaddr
566  */
567 
568 struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
569 {
570 	struct net_device *dev;
571 
572 	ASSERT_RTNL();
573 
574 	for (dev = dev_base; dev; dev = dev->next)
575 		if (dev->type == type &&
576 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
577 			break;
578 	return dev;
579 }
580 
581 struct net_device *dev_getfirstbyhwtype(unsigned short type)
582 {
583 	struct net_device *dev;
584 
585 	rtnl_lock();
586 	for (dev = dev_base; dev; dev = dev->next) {
587 		if (dev->type == type) {
588 			dev_hold(dev);
589 			break;
590 		}
591 	}
592 	rtnl_unlock();
593 	return dev;
594 }
595 
596 EXPORT_SYMBOL(dev_getfirstbyhwtype);
597 
598 /**
599  *	dev_get_by_flags - find any device with given flags
600  *	@if_flags: IFF_* values
601  *	@mask: bitmask of bits in if_flags to check
602  *
603  *	Search for any interface with the given flags. Returns NULL if a device
604  *	is not found or a pointer to the device. The device returned has
605  *	had a reference added and the pointer is safe until the user calls
606  *	dev_put to indicate they have finished with it.
607  */
608 
609 struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
610 {
611 	struct net_device *dev;
612 
613 	read_lock(&dev_base_lock);
614 	for (dev = dev_base; dev != NULL; dev = dev->next) {
615 		if (((dev->flags ^ if_flags) & mask) == 0) {
616 			dev_hold(dev);
617 			break;
618 		}
619 	}
620 	read_unlock(&dev_base_lock);
621 	return dev;
622 }
623 
624 /**
625  *	dev_valid_name - check if name is okay for network device
626  *	@name: name string
627  *
628  *	Network device names need to be valid file names to
629  *	to allow sysfs to work
630  */
631 static int dev_valid_name(const char *name)
632 {
633 	return !(*name == '\0'
634 		 || !strcmp(name, ".")
635 		 || !strcmp(name, "..")
636 		 || strchr(name, '/'));
637 }
638 
639 /**
640  *	dev_alloc_name - allocate a name for a device
641  *	@dev: device
642  *	@name: name format string
643  *
644  *	Passed a format string - eg "lt%d" it will try and find a suitable
645  *	id. Not efficient for many devices, not called a lot. The caller
646  *	must hold the dev_base or rtnl lock while allocating the name and
647  *	adding the device in order to avoid duplicates. Returns the number
648  *	of the unit assigned or a negative errno code.
649  */
650 
651 int dev_alloc_name(struct net_device *dev, const char *name)
652 {
653 	int i = 0;
654 	char buf[IFNAMSIZ];
655 	const char *p;
656 	const int max_netdevices = 8*PAGE_SIZE;
657 	long *inuse;
658 	struct net_device *d;
659 
660 	p = strnchr(name, IFNAMSIZ-1, '%');
661 	if (p) {
662 		/*
663 		 * Verify the string as this thing may have come from
664 		 * the user.  There must be either one "%d" and no other "%"
665 		 * characters.
666 		 */
667 		if (p[1] != 'd' || strchr(p + 2, '%'))
668 			return -EINVAL;
669 
670 		/* Use one page as a bit array of possible slots */
671 		inuse = (long *) get_zeroed_page(GFP_ATOMIC);
672 		if (!inuse)
673 			return -ENOMEM;
674 
675 		for (d = dev_base; d; d = d->next) {
676 			if (!sscanf(d->name, name, &i))
677 				continue;
678 			if (i < 0 || i >= max_netdevices)
679 				continue;
680 
681 			/*  avoid cases where sscanf is not exact inverse of printf */
682 			snprintf(buf, sizeof(buf), name, i);
683 			if (!strncmp(buf, d->name, IFNAMSIZ))
684 				set_bit(i, inuse);
685 		}
686 
687 		i = find_first_zero_bit(inuse, max_netdevices);
688 		free_page((unsigned long) inuse);
689 	}
690 
691 	snprintf(buf, sizeof(buf), name, i);
692 	if (!__dev_get_by_name(buf)) {
693 		strlcpy(dev->name, buf, IFNAMSIZ);
694 		return i;
695 	}
696 
697 	/* It is possible to run out of possible slots
698 	 * when the name is long and there isn't enough space left
699 	 * for the digits, or if all bits are used.
700 	 */
701 	return -ENFILE;
702 }
703 
704 
705 /**
706  *	dev_change_name - change name of a device
707  *	@dev: device
708  *	@newname: name (or format string) must be at least IFNAMSIZ
709  *
710  *	Change name of a device, can pass format strings "eth%d".
711  *	for wildcarding.
712  */
713 int dev_change_name(struct net_device *dev, char *newname)
714 {
715 	int err = 0;
716 
717 	ASSERT_RTNL();
718 
719 	if (dev->flags & IFF_UP)
720 		return -EBUSY;
721 
722 	if (!dev_valid_name(newname))
723 		return -EINVAL;
724 
725 	if (strchr(newname, '%')) {
726 		err = dev_alloc_name(dev, newname);
727 		if (err < 0)
728 			return err;
729 		strcpy(newname, dev->name);
730 	}
731 	else if (__dev_get_by_name(newname))
732 		return -EEXIST;
733 	else
734 		strlcpy(dev->name, newname, IFNAMSIZ);
735 
736 	err = class_device_rename(&dev->class_dev, dev->name);
737 	if (!err) {
738 		hlist_del(&dev->name_hlist);
739 		hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
740 		notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
741 	}
742 
743 	return err;
744 }
745 
746 /**
747  *	netdev_features_change - device changes fatures
748  *	@dev: device to cause notification
749  *
750  *	Called to indicate a device has changed features.
751  */
752 void netdev_features_change(struct net_device *dev)
753 {
754 	notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
755 }
756 EXPORT_SYMBOL(netdev_features_change);
757 
758 /**
759  *	netdev_state_change - device changes state
760  *	@dev: device to cause notification
761  *
762  *	Called to indicate a device has changed state. This function calls
763  *	the notifier chains for netdev_chain and sends a NEWLINK message
764  *	to the routing socket.
765  */
766 void netdev_state_change(struct net_device *dev)
767 {
768 	if (dev->flags & IFF_UP) {
769 		notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
770 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
771 	}
772 }
773 
774 /**
775  *	dev_load 	- load a network module
776  *	@name: name of interface
777  *
778  *	If a network interface is not present and the process has suitable
779  *	privileges this function loads the module. If module loading is not
780  *	available in this kernel then it becomes a nop.
781  */
782 
783 void dev_load(const char *name)
784 {
785 	struct net_device *dev;
786 
787 	read_lock(&dev_base_lock);
788 	dev = __dev_get_by_name(name);
789 	read_unlock(&dev_base_lock);
790 
791 	if (!dev && capable(CAP_SYS_MODULE))
792 		request_module("%s", name);
793 }
794 
795 static int default_rebuild_header(struct sk_buff *skb)
796 {
797 	printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
798 	       skb->dev ? skb->dev->name : "NULL!!!");
799 	kfree_skb(skb);
800 	return 1;
801 }
802 
803 
804 /**
805  *	dev_open	- prepare an interface for use.
806  *	@dev:	device to open
807  *
808  *	Takes a device from down to up state. The device's private open
809  *	function is invoked and then the multicast lists are loaded. Finally
810  *	the device is moved into the up state and a %NETDEV_UP message is
811  *	sent to the netdev notifier chain.
812  *
813  *	Calling this function on an active interface is a nop. On a failure
814  *	a negative errno code is returned.
815  */
816 int dev_open(struct net_device *dev)
817 {
818 	int ret = 0;
819 
820 	/*
821 	 *	Is it already up?
822 	 */
823 
824 	if (dev->flags & IFF_UP)
825 		return 0;
826 
827 	/*
828 	 *	Is it even present?
829 	 */
830 	if (!netif_device_present(dev))
831 		return -ENODEV;
832 
833 	/*
834 	 *	Call device private open method
835 	 */
836 	set_bit(__LINK_STATE_START, &dev->state);
837 	if (dev->open) {
838 		ret = dev->open(dev);
839 		if (ret)
840 			clear_bit(__LINK_STATE_START, &dev->state);
841 	}
842 
843  	/*
844 	 *	If it went open OK then:
845 	 */
846 
847 	if (!ret) {
848 		/*
849 		 *	Set the flags.
850 		 */
851 		dev->flags |= IFF_UP;
852 
853 		/*
854 		 *	Initialize multicasting status
855 		 */
856 		dev_mc_upload(dev);
857 
858 		/*
859 		 *	Wakeup transmit queue engine
860 		 */
861 		dev_activate(dev);
862 
863 		/*
864 		 *	... and announce new interface.
865 		 */
866 		notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
867 	}
868 	return ret;
869 }
870 
871 /**
872  *	dev_close - shutdown an interface.
873  *	@dev: device to shutdown
874  *
875  *	This function moves an active device into down state. A
876  *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
877  *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
878  *	chain.
879  */
880 int dev_close(struct net_device *dev)
881 {
882 	if (!(dev->flags & IFF_UP))
883 		return 0;
884 
885 	/*
886 	 *	Tell people we are going down, so that they can
887 	 *	prepare to death, when device is still operating.
888 	 */
889 	notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
890 
891 	dev_deactivate(dev);
892 
893 	clear_bit(__LINK_STATE_START, &dev->state);
894 
895 	/* Synchronize to scheduled poll. We cannot touch poll list,
896 	 * it can be even on different cpu. So just clear netif_running(),
897 	 * and wait when poll really will happen. Actually, the best place
898 	 * for this is inside dev->stop() after device stopped its irq
899 	 * engine, but this requires more changes in devices. */
900 
901 	smp_mb__after_clear_bit(); /* Commit netif_running(). */
902 	while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
903 		/* No hurry. */
904 		current->state = TASK_INTERRUPTIBLE;
905 		schedule_timeout(1);
906 	}
907 
908 	/*
909 	 *	Call the device specific close. This cannot fail.
910 	 *	Only if device is UP
911 	 *
912 	 *	We allow it to be called even after a DETACH hot-plug
913 	 *	event.
914 	 */
915 	if (dev->stop)
916 		dev->stop(dev);
917 
918 	/*
919 	 *	Device is now down.
920 	 */
921 
922 	dev->flags &= ~IFF_UP;
923 
924 	/*
925 	 * Tell people we are down
926 	 */
927 	notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
928 
929 	return 0;
930 }
931 
932 
933 /*
934  *	Device change register/unregister. These are not inline or static
935  *	as we export them to the world.
936  */
937 
938 /**
939  *	register_netdevice_notifier - register a network notifier block
940  *	@nb: notifier
941  *
942  *	Register a notifier to be called when network device events occur.
943  *	The notifier passed is linked into the kernel structures and must
944  *	not be reused until it has been unregistered. A negative errno code
945  *	is returned on a failure.
946  *
947  * 	When registered all registration and up events are replayed
948  *	to the new notifier to allow device to have a race free
949  *	view of the network device list.
950  */
951 
952 int register_netdevice_notifier(struct notifier_block *nb)
953 {
954 	struct net_device *dev;
955 	int err;
956 
957 	rtnl_lock();
958 	err = notifier_chain_register(&netdev_chain, nb);
959 	if (!err) {
960 		for (dev = dev_base; dev; dev = dev->next) {
961 			nb->notifier_call(nb, NETDEV_REGISTER, dev);
962 
963 			if (dev->flags & IFF_UP)
964 				nb->notifier_call(nb, NETDEV_UP, dev);
965 		}
966 	}
967 	rtnl_unlock();
968 	return err;
969 }
970 
971 /**
972  *	unregister_netdevice_notifier - unregister a network notifier block
973  *	@nb: notifier
974  *
975  *	Unregister a notifier previously registered by
976  *	register_netdevice_notifier(). The notifier is unlinked into the
977  *	kernel structures and may then be reused. A negative errno code
978  *	is returned on a failure.
979  */
980 
981 int unregister_netdevice_notifier(struct notifier_block *nb)
982 {
983 	return notifier_chain_unregister(&netdev_chain, nb);
984 }
985 
986 /**
987  *	call_netdevice_notifiers - call all network notifier blocks
988  *      @val: value passed unmodified to notifier function
989  *      @v:   pointer passed unmodified to notifier function
990  *
991  *	Call all network notifier blocks.  Parameters and return value
992  *	are as for notifier_call_chain().
993  */
994 
995 int call_netdevice_notifiers(unsigned long val, void *v)
996 {
997 	return notifier_call_chain(&netdev_chain, val, v);
998 }
999 
1000 /* When > 0 there are consumers of rx skb time stamps */
1001 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1002 
1003 void net_enable_timestamp(void)
1004 {
1005 	atomic_inc(&netstamp_needed);
1006 }
1007 
1008 void net_disable_timestamp(void)
1009 {
1010 	atomic_dec(&netstamp_needed);
1011 }
1012 
1013 static inline void net_timestamp(struct timeval *stamp)
1014 {
1015 	if (atomic_read(&netstamp_needed))
1016 		do_gettimeofday(stamp);
1017 	else {
1018 		stamp->tv_sec = 0;
1019 		stamp->tv_usec = 0;
1020 	}
1021 }
1022 
1023 /*
1024  *	Support routine. Sends outgoing frames to any network
1025  *	taps currently in use.
1026  */
1027 
1028 void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1029 {
1030 	struct packet_type *ptype;
1031 	net_timestamp(&skb->stamp);
1032 
1033 	rcu_read_lock();
1034 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1035 		/* Never send packets back to the socket
1036 		 * they originated from - MvS (miquels@drinkel.ow.org)
1037 		 */
1038 		if ((ptype->dev == dev || !ptype->dev) &&
1039 		    (ptype->af_packet_priv == NULL ||
1040 		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
1041 			struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1042 			if (!skb2)
1043 				break;
1044 
1045 			/* skb->nh should be correctly
1046 			   set by sender, so that the second statement is
1047 			   just protection against buggy protocols.
1048 			 */
1049 			skb2->mac.raw = skb2->data;
1050 
1051 			if (skb2->nh.raw < skb2->data ||
1052 			    skb2->nh.raw > skb2->tail) {
1053 				if (net_ratelimit())
1054 					printk(KERN_CRIT "protocol %04x is "
1055 					       "buggy, dev %s\n",
1056 					       skb2->protocol, dev->name);
1057 				skb2->nh.raw = skb2->data;
1058 			}
1059 
1060 			skb2->h.raw = skb2->nh.raw;
1061 			skb2->pkt_type = PACKET_OUTGOING;
1062 			ptype->func(skb2, skb->dev, ptype);
1063 		}
1064 	}
1065 	rcu_read_unlock();
1066 }
1067 
1068 /*
1069  * Invalidate hardware checksum when packet is to be mangled, and
1070  * complete checksum manually on outgoing path.
1071  */
1072 int skb_checksum_help(struct sk_buff *skb, int inward)
1073 {
1074 	unsigned int csum;
1075 	int ret = 0, offset = skb->h.raw - skb->data;
1076 
1077 	if (inward) {
1078 		skb->ip_summed = CHECKSUM_NONE;
1079 		goto out;
1080 	}
1081 
1082 	if (skb_cloned(skb)) {
1083 		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1084 		if (ret)
1085 			goto out;
1086 	}
1087 
1088 	if (offset > (int)skb->len)
1089 		BUG();
1090 	csum = skb_checksum(skb, offset, skb->len-offset, 0);
1091 
1092 	offset = skb->tail - skb->h.raw;
1093 	if (offset <= 0)
1094 		BUG();
1095 	if (skb->csum + 2 > offset)
1096 		BUG();
1097 
1098 	*(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
1099 	skb->ip_summed = CHECKSUM_NONE;
1100 out:
1101 	return ret;
1102 }
1103 
1104 #ifdef CONFIG_HIGHMEM
1105 /* Actually, we should eliminate this check as soon as we know, that:
1106  * 1. IOMMU is present and allows to map all the memory.
1107  * 2. No high memory really exists on this machine.
1108  */
1109 
1110 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1111 {
1112 	int i;
1113 
1114 	if (dev->features & NETIF_F_HIGHDMA)
1115 		return 0;
1116 
1117 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1118 		if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1119 			return 1;
1120 
1121 	return 0;
1122 }
1123 #else
1124 #define illegal_highdma(dev, skb)	(0)
1125 #endif
1126 
1127 extern void skb_release_data(struct sk_buff *);
1128 
1129 /* Keep head the same: replace data */
1130 int __skb_linearize(struct sk_buff *skb, int gfp_mask)
1131 {
1132 	unsigned int size;
1133 	u8 *data;
1134 	long offset;
1135 	struct skb_shared_info *ninfo;
1136 	int headerlen = skb->data - skb->head;
1137 	int expand = (skb->tail + skb->data_len) - skb->end;
1138 
1139 	if (skb_shared(skb))
1140 		BUG();
1141 
1142 	if (expand <= 0)
1143 		expand = 0;
1144 
1145 	size = skb->end - skb->head + expand;
1146 	size = SKB_DATA_ALIGN(size);
1147 	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
1148 	if (!data)
1149 		return -ENOMEM;
1150 
1151 	/* Copy entire thing */
1152 	if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len))
1153 		BUG();
1154 
1155 	/* Set up shinfo */
1156 	ninfo = (struct skb_shared_info*)(data + size);
1157 	atomic_set(&ninfo->dataref, 1);
1158 	ninfo->tso_size = skb_shinfo(skb)->tso_size;
1159 	ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
1160 	ninfo->nr_frags = 0;
1161 	ninfo->frag_list = NULL;
1162 
1163 	/* Offset between the two in bytes */
1164 	offset = data - skb->head;
1165 
1166 	/* Free old data. */
1167 	skb_release_data(skb);
1168 
1169 	skb->head = data;
1170 	skb->end  = data + size;
1171 
1172 	/* Set up new pointers */
1173 	skb->h.raw   += offset;
1174 	skb->nh.raw  += offset;
1175 	skb->mac.raw += offset;
1176 	skb->tail    += offset;
1177 	skb->data    += offset;
1178 
1179 	/* We are no longer a clone, even if we were. */
1180 	skb->cloned    = 0;
1181 
1182 	skb->tail     += skb->data_len;
1183 	skb->data_len  = 0;
1184 	return 0;
1185 }
1186 
1187 #define HARD_TX_LOCK(dev, cpu) {			\
1188 	if ((dev->features & NETIF_F_LLTX) == 0) {	\
1189 		spin_lock(&dev->xmit_lock);		\
1190 		dev->xmit_lock_owner = cpu;		\
1191 	}						\
1192 }
1193 
1194 #define HARD_TX_UNLOCK(dev) {				\
1195 	if ((dev->features & NETIF_F_LLTX) == 0) {	\
1196 		dev->xmit_lock_owner = -1;		\
1197 		spin_unlock(&dev->xmit_lock);		\
1198 	}						\
1199 }
1200 
1201 /**
1202  *	dev_queue_xmit - transmit a buffer
1203  *	@skb: buffer to transmit
1204  *
1205  *	Queue a buffer for transmission to a network device. The caller must
1206  *	have set the device and priority and built the buffer before calling
1207  *	this function. The function can be called from an interrupt.
1208  *
1209  *	A negative errno code is returned on a failure. A success does not
1210  *	guarantee the frame will be transmitted as it may be dropped due
1211  *	to congestion or traffic shaping.
1212  *
1213  * -----------------------------------------------------------------------------------
1214  *      I notice this method can also return errors from the queue disciplines,
1215  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1216  *      be positive.
1217  *
1218  *      Regardless of the return value, the skb is consumed, so it is currently
1219  *      difficult to retry a send to this method.  (You can bump the ref count
1220  *      before sending to hold a reference for retry if you are careful.)
1221  *
1222  *      When calling this method, interrupts MUST be enabled.  This is because
1223  *      the BH enable code must have IRQs enabled so that it will not deadlock.
1224  *          --BLG
1225  */
1226 
1227 int dev_queue_xmit(struct sk_buff *skb)
1228 {
1229 	struct net_device *dev = skb->dev;
1230 	struct Qdisc *q;
1231 	int rc = -ENOMEM;
1232 
1233 	if (skb_shinfo(skb)->frag_list &&
1234 	    !(dev->features & NETIF_F_FRAGLIST) &&
1235 	    __skb_linearize(skb, GFP_ATOMIC))
1236 		goto out_kfree_skb;
1237 
1238 	/* Fragmented skb is linearized if device does not support SG,
1239 	 * or if at least one of fragments is in highmem and device
1240 	 * does not support DMA from it.
1241 	 */
1242 	if (skb_shinfo(skb)->nr_frags &&
1243 	    (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1244 	    __skb_linearize(skb, GFP_ATOMIC))
1245 		goto out_kfree_skb;
1246 
1247 	/* If packet is not checksummed and device does not support
1248 	 * checksumming for this protocol, complete checksumming here.
1249 	 */
1250 	if (skb->ip_summed == CHECKSUM_HW &&
1251 	    (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) &&
1252 	     (!(dev->features & NETIF_F_IP_CSUM) ||
1253 	      skb->protocol != htons(ETH_P_IP))))
1254 	      	if (skb_checksum_help(skb, 0))
1255 	      		goto out_kfree_skb;
1256 
1257 	/* Disable soft irqs for various locks below. Also
1258 	 * stops preemption for RCU.
1259 	 */
1260 	local_bh_disable();
1261 
1262 	/* Updates of qdisc are serialized by queue_lock.
1263 	 * The struct Qdisc which is pointed to by qdisc is now a
1264 	 * rcu structure - it may be accessed without acquiring
1265 	 * a lock (but the structure may be stale.) The freeing of the
1266 	 * qdisc will be deferred until it's known that there are no
1267 	 * more references to it.
1268 	 *
1269 	 * If the qdisc has an enqueue function, we still need to
1270 	 * hold the queue_lock before calling it, since queue_lock
1271 	 * also serializes access to the device queue.
1272 	 */
1273 
1274 	q = rcu_dereference(dev->qdisc);
1275 #ifdef CONFIG_NET_CLS_ACT
1276 	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1277 #endif
1278 	if (q->enqueue) {
1279 		/* Grab device queue */
1280 		spin_lock(&dev->queue_lock);
1281 
1282 		rc = q->enqueue(skb, q);
1283 
1284 		qdisc_run(dev);
1285 
1286 		spin_unlock(&dev->queue_lock);
1287 		rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1288 		goto out;
1289 	}
1290 
1291 	/* The device has no queue. Common case for software devices:
1292 	   loopback, all the sorts of tunnels...
1293 
1294 	   Really, it is unlikely that xmit_lock protection is necessary here.
1295 	   (f.e. loopback and IP tunnels are clean ignoring statistics
1296 	   counters.)
1297 	   However, it is possible, that they rely on protection
1298 	   made by us here.
1299 
1300 	   Check this and shot the lock. It is not prone from deadlocks.
1301 	   Either shot noqueue qdisc, it is even simpler 8)
1302 	 */
1303 	if (dev->flags & IFF_UP) {
1304 		int cpu = smp_processor_id(); /* ok because BHs are off */
1305 
1306 		if (dev->xmit_lock_owner != cpu) {
1307 
1308 			HARD_TX_LOCK(dev, cpu);
1309 
1310 			if (!netif_queue_stopped(dev)) {
1311 				if (netdev_nit)
1312 					dev_queue_xmit_nit(skb, dev);
1313 
1314 				rc = 0;
1315 				if (!dev->hard_start_xmit(skb, dev)) {
1316 					HARD_TX_UNLOCK(dev);
1317 					goto out;
1318 				}
1319 			}
1320 			HARD_TX_UNLOCK(dev);
1321 			if (net_ratelimit())
1322 				printk(KERN_CRIT "Virtual device %s asks to "
1323 				       "queue packet!\n", dev->name);
1324 		} else {
1325 			/* Recursion is detected! It is possible,
1326 			 * unfortunately */
1327 			if (net_ratelimit())
1328 				printk(KERN_CRIT "Dead loop on virtual device "
1329 				       "%s, fix it urgently!\n", dev->name);
1330 		}
1331 	}
1332 
1333 	rc = -ENETDOWN;
1334 	local_bh_enable();
1335 
1336 out_kfree_skb:
1337 	kfree_skb(skb);
1338 	return rc;
1339 out:
1340 	local_bh_enable();
1341 	return rc;
1342 }
1343 
1344 
1345 /*=======================================================================
1346 			Receiver routines
1347   =======================================================================*/
1348 
1349 int netdev_max_backlog = 1000;
1350 int netdev_budget = 300;
1351 int weight_p = 64;            /* old backlog weight */
1352 
1353 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1354 
1355 
1356 /**
1357  *	netif_rx	-	post buffer to the network code
1358  *	@skb: buffer to post
1359  *
1360  *	This function receives a packet from a device driver and queues it for
1361  *	the upper (protocol) levels to process.  It always succeeds. The buffer
1362  *	may be dropped during processing for congestion control or by the
1363  *	protocol layers.
1364  *
1365  *	return values:
1366  *	NET_RX_SUCCESS	(no congestion)
1367  *	NET_RX_CN_LOW   (low congestion)
1368  *	NET_RX_CN_MOD   (moderate congestion)
1369  *	NET_RX_CN_HIGH  (high congestion)
1370  *	NET_RX_DROP     (packet was dropped)
1371  *
1372  */
1373 
1374 int netif_rx(struct sk_buff *skb)
1375 {
1376 	struct softnet_data *queue;
1377 	unsigned long flags;
1378 
1379 	/* if netpoll wants it, pretend we never saw it */
1380 	if (netpoll_rx(skb))
1381 		return NET_RX_DROP;
1382 
1383 	if (!skb->stamp.tv_sec)
1384 		net_timestamp(&skb->stamp);
1385 
1386 	/*
1387 	 * The code is rearranged so that the path is the most
1388 	 * short when CPU is congested, but is still operating.
1389 	 */
1390 	local_irq_save(flags);
1391 	queue = &__get_cpu_var(softnet_data);
1392 
1393 	__get_cpu_var(netdev_rx_stat).total++;
1394 	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1395 		if (queue->input_pkt_queue.qlen) {
1396 enqueue:
1397 			dev_hold(skb->dev);
1398 			__skb_queue_tail(&queue->input_pkt_queue, skb);
1399 			local_irq_restore(flags);
1400 			return NET_RX_SUCCESS;
1401 		}
1402 
1403 		netif_rx_schedule(&queue->backlog_dev);
1404 		goto enqueue;
1405 	}
1406 
1407 	__get_cpu_var(netdev_rx_stat).dropped++;
1408 	local_irq_restore(flags);
1409 
1410 	kfree_skb(skb);
1411 	return NET_RX_DROP;
1412 }
1413 
1414 int netif_rx_ni(struct sk_buff *skb)
1415 {
1416 	int err;
1417 
1418 	preempt_disable();
1419 	err = netif_rx(skb);
1420 	if (local_softirq_pending())
1421 		do_softirq();
1422 	preempt_enable();
1423 
1424 	return err;
1425 }
1426 
1427 EXPORT_SYMBOL(netif_rx_ni);
1428 
1429 static __inline__ void skb_bond(struct sk_buff *skb)
1430 {
1431 	struct net_device *dev = skb->dev;
1432 
1433 	if (dev->master) {
1434 		skb->real_dev = skb->dev;
1435 		skb->dev = dev->master;
1436 	}
1437 }
1438 
1439 static void net_tx_action(struct softirq_action *h)
1440 {
1441 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
1442 
1443 	if (sd->completion_queue) {
1444 		struct sk_buff *clist;
1445 
1446 		local_irq_disable();
1447 		clist = sd->completion_queue;
1448 		sd->completion_queue = NULL;
1449 		local_irq_enable();
1450 
1451 		while (clist) {
1452 			struct sk_buff *skb = clist;
1453 			clist = clist->next;
1454 
1455 			BUG_TRAP(!atomic_read(&skb->users));
1456 			__kfree_skb(skb);
1457 		}
1458 	}
1459 
1460 	if (sd->output_queue) {
1461 		struct net_device *head;
1462 
1463 		local_irq_disable();
1464 		head = sd->output_queue;
1465 		sd->output_queue = NULL;
1466 		local_irq_enable();
1467 
1468 		while (head) {
1469 			struct net_device *dev = head;
1470 			head = head->next_sched;
1471 
1472 			smp_mb__before_clear_bit();
1473 			clear_bit(__LINK_STATE_SCHED, &dev->state);
1474 
1475 			if (spin_trylock(&dev->queue_lock)) {
1476 				qdisc_run(dev);
1477 				spin_unlock(&dev->queue_lock);
1478 			} else {
1479 				netif_schedule(dev);
1480 			}
1481 		}
1482 	}
1483 }
1484 
1485 static __inline__ int deliver_skb(struct sk_buff *skb,
1486 				  struct packet_type *pt_prev)
1487 {
1488 	atomic_inc(&skb->users);
1489 	return pt_prev->func(skb, skb->dev, pt_prev);
1490 }
1491 
1492 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
1493 int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
1494 struct net_bridge;
1495 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1496 						unsigned char *addr);
1497 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
1498 
1499 static __inline__ int handle_bridge(struct sk_buff **pskb,
1500 				    struct packet_type **pt_prev, int *ret)
1501 {
1502 	struct net_bridge_port *port;
1503 
1504 	if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
1505 	    (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
1506 		return 0;
1507 
1508 	if (*pt_prev) {
1509 		*ret = deliver_skb(*pskb, *pt_prev);
1510 		*pt_prev = NULL;
1511 	}
1512 
1513 	return br_handle_frame_hook(port, pskb);
1514 }
1515 #else
1516 #define handle_bridge(skb, pt_prev, ret)	(0)
1517 #endif
1518 
1519 #ifdef CONFIG_NET_CLS_ACT
1520 /* TODO: Maybe we should just force sch_ingress to be compiled in
1521  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1522  * a compare and 2 stores extra right now if we dont have it on
1523  * but have CONFIG_NET_CLS_ACT
1524  * NOTE: This doesnt stop any functionality; if you dont have
1525  * the ingress scheduler, you just cant add policies on ingress.
1526  *
1527  */
1528 static int ing_filter(struct sk_buff *skb)
1529 {
1530 	struct Qdisc *q;
1531 	struct net_device *dev = skb->dev;
1532 	int result = TC_ACT_OK;
1533 
1534 	if (dev->qdisc_ingress) {
1535 		__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
1536 		if (MAX_RED_LOOP < ttl++) {
1537 			printk("Redir loop detected Dropping packet (%s->%s)\n",
1538 				skb->input_dev?skb->input_dev->name:"??",skb->dev->name);
1539 			return TC_ACT_SHOT;
1540 		}
1541 
1542 		skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
1543 
1544 		skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
1545 		if (NULL == skb->input_dev) {
1546 			skb->input_dev = skb->dev;
1547 			printk("ing_filter:  fixed  %s out %s\n",skb->input_dev->name,skb->dev->name);
1548 		}
1549 		spin_lock(&dev->ingress_lock);
1550 		if ((q = dev->qdisc_ingress) != NULL)
1551 			result = q->enqueue(skb, q);
1552 		spin_unlock(&dev->ingress_lock);
1553 
1554 	}
1555 
1556 	return result;
1557 }
1558 #endif
1559 
1560 int netif_receive_skb(struct sk_buff *skb)
1561 {
1562 	struct packet_type *ptype, *pt_prev;
1563 	int ret = NET_RX_DROP;
1564 	unsigned short type;
1565 
1566 	/* if we've gotten here through NAPI, check netpoll */
1567 	if (skb->dev->poll && netpoll_rx(skb))
1568 		return NET_RX_DROP;
1569 
1570 	if (!skb->stamp.tv_sec)
1571 		net_timestamp(&skb->stamp);
1572 
1573 	skb_bond(skb);
1574 
1575 	__get_cpu_var(netdev_rx_stat).total++;
1576 
1577 	skb->h.raw = skb->nh.raw = skb->data;
1578 	skb->mac_len = skb->nh.raw - skb->mac.raw;
1579 
1580 	pt_prev = NULL;
1581 
1582 	rcu_read_lock();
1583 
1584 #ifdef CONFIG_NET_CLS_ACT
1585 	if (skb->tc_verd & TC_NCLS) {
1586 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
1587 		goto ncls;
1588 	}
1589 #endif
1590 
1591 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1592 		if (!ptype->dev || ptype->dev == skb->dev) {
1593 			if (pt_prev)
1594 				ret = deliver_skb(skb, pt_prev);
1595 			pt_prev = ptype;
1596 		}
1597 	}
1598 
1599 #ifdef CONFIG_NET_CLS_ACT
1600 	if (pt_prev) {
1601 		ret = deliver_skb(skb, pt_prev);
1602 		pt_prev = NULL; /* noone else should process this after*/
1603 	} else {
1604 		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
1605 	}
1606 
1607 	ret = ing_filter(skb);
1608 
1609 	if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
1610 		kfree_skb(skb);
1611 		goto out;
1612 	}
1613 
1614 	skb->tc_verd = 0;
1615 ncls:
1616 #endif
1617 
1618 	handle_diverter(skb);
1619 
1620 	if (handle_bridge(&skb, &pt_prev, &ret))
1621 		goto out;
1622 
1623 	type = skb->protocol;
1624 	list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
1625 		if (ptype->type == type &&
1626 		    (!ptype->dev || ptype->dev == skb->dev)) {
1627 			if (pt_prev)
1628 				ret = deliver_skb(skb, pt_prev);
1629 			pt_prev = ptype;
1630 		}
1631 	}
1632 
1633 	if (pt_prev) {
1634 		ret = pt_prev->func(skb, skb->dev, pt_prev);
1635 	} else {
1636 		kfree_skb(skb);
1637 		/* Jamal, now you will not able to escape explaining
1638 		 * me how you were going to use this. :-)
1639 		 */
1640 		ret = NET_RX_DROP;
1641 	}
1642 
1643 out:
1644 	rcu_read_unlock();
1645 	return ret;
1646 }
1647 
1648 static int process_backlog(struct net_device *backlog_dev, int *budget)
1649 {
1650 	int work = 0;
1651 	int quota = min(backlog_dev->quota, *budget);
1652 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
1653 	unsigned long start_time = jiffies;
1654 
1655 	backlog_dev->weight = weight_p;
1656 	for (;;) {
1657 		struct sk_buff *skb;
1658 		struct net_device *dev;
1659 
1660 		local_irq_disable();
1661 		skb = __skb_dequeue(&queue->input_pkt_queue);
1662 		if (!skb)
1663 			goto job_done;
1664 		local_irq_enable();
1665 
1666 		dev = skb->dev;
1667 
1668 		netif_receive_skb(skb);
1669 
1670 		dev_put(dev);
1671 
1672 		work++;
1673 
1674 		if (work >= quota || jiffies - start_time > 1)
1675 			break;
1676 
1677 	}
1678 
1679 	backlog_dev->quota -= work;
1680 	*budget -= work;
1681 	return -1;
1682 
1683 job_done:
1684 	backlog_dev->quota -= work;
1685 	*budget -= work;
1686 
1687 	list_del(&backlog_dev->poll_list);
1688 	smp_mb__before_clear_bit();
1689 	netif_poll_enable(backlog_dev);
1690 
1691 	local_irq_enable();
1692 	return 0;
1693 }
1694 
1695 static void net_rx_action(struct softirq_action *h)
1696 {
1697 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
1698 	unsigned long start_time = jiffies;
1699 	int budget = netdev_budget;
1700 
1701 	local_irq_disable();
1702 
1703 	while (!list_empty(&queue->poll_list)) {
1704 		struct net_device *dev;
1705 
1706 		if (budget <= 0 || jiffies - start_time > 1)
1707 			goto softnet_break;
1708 
1709 		local_irq_enable();
1710 
1711 		dev = list_entry(queue->poll_list.next,
1712 				 struct net_device, poll_list);
1713 		netpoll_poll_lock(dev);
1714 
1715 		if (dev->quota <= 0 || dev->poll(dev, &budget)) {
1716 			netpoll_poll_unlock(dev);
1717 			local_irq_disable();
1718 			list_del(&dev->poll_list);
1719 			list_add_tail(&dev->poll_list, &queue->poll_list);
1720 			if (dev->quota < 0)
1721 				dev->quota += dev->weight;
1722 			else
1723 				dev->quota = dev->weight;
1724 		} else {
1725 			netpoll_poll_unlock(dev);
1726 			dev_put(dev);
1727 			local_irq_disable();
1728 		}
1729 	}
1730 out:
1731 	local_irq_enable();
1732 	return;
1733 
1734 softnet_break:
1735 	__get_cpu_var(netdev_rx_stat).time_squeeze++;
1736 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
1737 	goto out;
1738 }
1739 
1740 static gifconf_func_t * gifconf_list [NPROTO];
1741 
1742 /**
1743  *	register_gifconf	-	register a SIOCGIF handler
1744  *	@family: Address family
1745  *	@gifconf: Function handler
1746  *
1747  *	Register protocol dependent address dumping routines. The handler
1748  *	that is passed must not be freed or reused until it has been replaced
1749  *	by another handler.
1750  */
1751 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
1752 {
1753 	if (family >= NPROTO)
1754 		return -EINVAL;
1755 	gifconf_list[family] = gifconf;
1756 	return 0;
1757 }
1758 
1759 
1760 /*
1761  *	Map an interface index to its name (SIOCGIFNAME)
1762  */
1763 
1764 /*
1765  *	We need this ioctl for efficient implementation of the
1766  *	if_indextoname() function required by the IPv6 API.  Without
1767  *	it, we would have to search all the interfaces to find a
1768  *	match.  --pb
1769  */
1770 
1771 static int dev_ifname(struct ifreq __user *arg)
1772 {
1773 	struct net_device *dev;
1774 	struct ifreq ifr;
1775 
1776 	/*
1777 	 *	Fetch the caller's info block.
1778 	 */
1779 
1780 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
1781 		return -EFAULT;
1782 
1783 	read_lock(&dev_base_lock);
1784 	dev = __dev_get_by_index(ifr.ifr_ifindex);
1785 	if (!dev) {
1786 		read_unlock(&dev_base_lock);
1787 		return -ENODEV;
1788 	}
1789 
1790 	strcpy(ifr.ifr_name, dev->name);
1791 	read_unlock(&dev_base_lock);
1792 
1793 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
1794 		return -EFAULT;
1795 	return 0;
1796 }
1797 
1798 /*
1799  *	Perform a SIOCGIFCONF call. This structure will change
1800  *	size eventually, and there is nothing I can do about it.
1801  *	Thus we will need a 'compatibility mode'.
1802  */
1803 
1804 static int dev_ifconf(char __user *arg)
1805 {
1806 	struct ifconf ifc;
1807 	struct net_device *dev;
1808 	char __user *pos;
1809 	int len;
1810 	int total;
1811 	int i;
1812 
1813 	/*
1814 	 *	Fetch the caller's info block.
1815 	 */
1816 
1817 	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
1818 		return -EFAULT;
1819 
1820 	pos = ifc.ifc_buf;
1821 	len = ifc.ifc_len;
1822 
1823 	/*
1824 	 *	Loop over the interfaces, and write an info block for each.
1825 	 */
1826 
1827 	total = 0;
1828 	for (dev = dev_base; dev; dev = dev->next) {
1829 		for (i = 0; i < NPROTO; i++) {
1830 			if (gifconf_list[i]) {
1831 				int done;
1832 				if (!pos)
1833 					done = gifconf_list[i](dev, NULL, 0);
1834 				else
1835 					done = gifconf_list[i](dev, pos + total,
1836 							       len - total);
1837 				if (done < 0)
1838 					return -EFAULT;
1839 				total += done;
1840 			}
1841 		}
1842   	}
1843 
1844 	/*
1845 	 *	All done.  Write the updated control block back to the caller.
1846 	 */
1847 	ifc.ifc_len = total;
1848 
1849 	/*
1850 	 * 	Both BSD and Solaris return 0 here, so we do too.
1851 	 */
1852 	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
1853 }
1854 
1855 #ifdef CONFIG_PROC_FS
1856 /*
1857  *	This is invoked by the /proc filesystem handler to display a device
1858  *	in detail.
1859  */
1860 static __inline__ struct net_device *dev_get_idx(loff_t pos)
1861 {
1862 	struct net_device *dev;
1863 	loff_t i;
1864 
1865 	for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next);
1866 
1867 	return i == pos ? dev : NULL;
1868 }
1869 
1870 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
1871 {
1872 	read_lock(&dev_base_lock);
1873 	return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN;
1874 }
1875 
1876 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1877 {
1878 	++*pos;
1879 	return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next;
1880 }
1881 
1882 void dev_seq_stop(struct seq_file *seq, void *v)
1883 {
1884 	read_unlock(&dev_base_lock);
1885 }
1886 
1887 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
1888 {
1889 	if (dev->get_stats) {
1890 		struct net_device_stats *stats = dev->get_stats(dev);
1891 
1892 		seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
1893 				"%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
1894 			   dev->name, stats->rx_bytes, stats->rx_packets,
1895 			   stats->rx_errors,
1896 			   stats->rx_dropped + stats->rx_missed_errors,
1897 			   stats->rx_fifo_errors,
1898 			   stats->rx_length_errors + stats->rx_over_errors +
1899 			     stats->rx_crc_errors + stats->rx_frame_errors,
1900 			   stats->rx_compressed, stats->multicast,
1901 			   stats->tx_bytes, stats->tx_packets,
1902 			   stats->tx_errors, stats->tx_dropped,
1903 			   stats->tx_fifo_errors, stats->collisions,
1904 			   stats->tx_carrier_errors +
1905 			     stats->tx_aborted_errors +
1906 			     stats->tx_window_errors +
1907 			     stats->tx_heartbeat_errors,
1908 			   stats->tx_compressed);
1909 	} else
1910 		seq_printf(seq, "%6s: No statistics available.\n", dev->name);
1911 }
1912 
1913 /*
1914  *	Called from the PROCfs module. This now uses the new arbitrary sized
1915  *	/proc/net interface to create /proc/net/dev
1916  */
1917 static int dev_seq_show(struct seq_file *seq, void *v)
1918 {
1919 	if (v == SEQ_START_TOKEN)
1920 		seq_puts(seq, "Inter-|   Receive                            "
1921 			      "                    |  Transmit\n"
1922 			      " face |bytes    packets errs drop fifo frame "
1923 			      "compressed multicast|bytes    packets errs "
1924 			      "drop fifo colls carrier compressed\n");
1925 	else
1926 		dev_seq_printf_stats(seq, v);
1927 	return 0;
1928 }
1929 
1930 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
1931 {
1932 	struct netif_rx_stats *rc = NULL;
1933 
1934 	while (*pos < NR_CPUS)
1935 	       	if (cpu_online(*pos)) {
1936 			rc = &per_cpu(netdev_rx_stat, *pos);
1937 			break;
1938 		} else
1939 			++*pos;
1940 	return rc;
1941 }
1942 
1943 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
1944 {
1945 	return softnet_get_online(pos);
1946 }
1947 
1948 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1949 {
1950 	++*pos;
1951 	return softnet_get_online(pos);
1952 }
1953 
1954 static void softnet_seq_stop(struct seq_file *seq, void *v)
1955 {
1956 }
1957 
1958 static int softnet_seq_show(struct seq_file *seq, void *v)
1959 {
1960 	struct netif_rx_stats *s = v;
1961 
1962 	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
1963 		   s->total, s->dropped, s->time_squeeze, 0,
1964 		   0, 0, 0, 0, /* was fastroute */
1965 		   s->cpu_collision );
1966 	return 0;
1967 }
1968 
1969 static struct seq_operations dev_seq_ops = {
1970 	.start = dev_seq_start,
1971 	.next  = dev_seq_next,
1972 	.stop  = dev_seq_stop,
1973 	.show  = dev_seq_show,
1974 };
1975 
1976 static int dev_seq_open(struct inode *inode, struct file *file)
1977 {
1978 	return seq_open(file, &dev_seq_ops);
1979 }
1980 
1981 static struct file_operations dev_seq_fops = {
1982 	.owner	 = THIS_MODULE,
1983 	.open    = dev_seq_open,
1984 	.read    = seq_read,
1985 	.llseek  = seq_lseek,
1986 	.release = seq_release,
1987 };
1988 
1989 static struct seq_operations softnet_seq_ops = {
1990 	.start = softnet_seq_start,
1991 	.next  = softnet_seq_next,
1992 	.stop  = softnet_seq_stop,
1993 	.show  = softnet_seq_show,
1994 };
1995 
1996 static int softnet_seq_open(struct inode *inode, struct file *file)
1997 {
1998 	return seq_open(file, &softnet_seq_ops);
1999 }
2000 
2001 static struct file_operations softnet_seq_fops = {
2002 	.owner	 = THIS_MODULE,
2003 	.open    = softnet_seq_open,
2004 	.read    = seq_read,
2005 	.llseek  = seq_lseek,
2006 	.release = seq_release,
2007 };
2008 
2009 #ifdef WIRELESS_EXT
2010 extern int wireless_proc_init(void);
2011 #else
2012 #define wireless_proc_init() 0
2013 #endif
2014 
2015 static int __init dev_proc_init(void)
2016 {
2017 	int rc = -ENOMEM;
2018 
2019 	if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
2020 		goto out;
2021 	if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
2022 		goto out_dev;
2023 	if (wireless_proc_init())
2024 		goto out_softnet;
2025 	rc = 0;
2026 out:
2027 	return rc;
2028 out_softnet:
2029 	proc_net_remove("softnet_stat");
2030 out_dev:
2031 	proc_net_remove("dev");
2032 	goto out;
2033 }
2034 #else
2035 #define dev_proc_init() 0
2036 #endif	/* CONFIG_PROC_FS */
2037 
2038 
2039 /**
2040  *	netdev_set_master	-	set up master/slave pair
2041  *	@slave: slave device
2042  *	@master: new master device
2043  *
2044  *	Changes the master device of the slave. Pass %NULL to break the
2045  *	bonding. The caller must hold the RTNL semaphore. On a failure
2046  *	a negative errno code is returned. On success the reference counts
2047  *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2048  *	function returns zero.
2049  */
2050 int netdev_set_master(struct net_device *slave, struct net_device *master)
2051 {
2052 	struct net_device *old = slave->master;
2053 
2054 	ASSERT_RTNL();
2055 
2056 	if (master) {
2057 		if (old)
2058 			return -EBUSY;
2059 		dev_hold(master);
2060 	}
2061 
2062 	slave->master = master;
2063 
2064 	synchronize_net();
2065 
2066 	if (old)
2067 		dev_put(old);
2068 
2069 	if (master)
2070 		slave->flags |= IFF_SLAVE;
2071 	else
2072 		slave->flags &= ~IFF_SLAVE;
2073 
2074 	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2075 	return 0;
2076 }
2077 
2078 /**
2079  *	dev_set_promiscuity	- update promiscuity count on a device
2080  *	@dev: device
2081  *	@inc: modifier
2082  *
2083  *	Add or remove promsicuity from a device. While the count in the device
2084  *	remains above zero the interface remains promiscuous. Once it hits zero
2085  *	the device reverts back to normal filtering operation. A negative inc
2086  *	value is used to drop promiscuity on the device.
2087  */
2088 void dev_set_promiscuity(struct net_device *dev, int inc)
2089 {
2090 	unsigned short old_flags = dev->flags;
2091 
2092 	dev->flags |= IFF_PROMISC;
2093 	if ((dev->promiscuity += inc) == 0)
2094 		dev->flags &= ~IFF_PROMISC;
2095 	if (dev->flags ^ old_flags) {
2096 		dev_mc_upload(dev);
2097 		printk(KERN_INFO "device %s %s promiscuous mode\n",
2098 		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2099 		       					       "left");
2100 	}
2101 }
2102 
2103 /**
2104  *	dev_set_allmulti	- update allmulti count on a device
2105  *	@dev: device
2106  *	@inc: modifier
2107  *
2108  *	Add or remove reception of all multicast frames to a device. While the
2109  *	count in the device remains above zero the interface remains listening
2110  *	to all interfaces. Once it hits zero the device reverts back to normal
2111  *	filtering operation. A negative @inc value is used to drop the counter
2112  *	when releasing a resource needing all multicasts.
2113  */
2114 
2115 void dev_set_allmulti(struct net_device *dev, int inc)
2116 {
2117 	unsigned short old_flags = dev->flags;
2118 
2119 	dev->flags |= IFF_ALLMULTI;
2120 	if ((dev->allmulti += inc) == 0)
2121 		dev->flags &= ~IFF_ALLMULTI;
2122 	if (dev->flags ^ old_flags)
2123 		dev_mc_upload(dev);
2124 }
2125 
2126 unsigned dev_get_flags(const struct net_device *dev)
2127 {
2128 	unsigned flags;
2129 
2130 	flags = (dev->flags & ~(IFF_PROMISC |
2131 				IFF_ALLMULTI |
2132 				IFF_RUNNING)) |
2133 		(dev->gflags & (IFF_PROMISC |
2134 				IFF_ALLMULTI));
2135 
2136 	if (netif_running(dev) && netif_carrier_ok(dev))
2137 		flags |= IFF_RUNNING;
2138 
2139 	return flags;
2140 }
2141 
2142 int dev_change_flags(struct net_device *dev, unsigned flags)
2143 {
2144 	int ret;
2145 	int old_flags = dev->flags;
2146 
2147 	/*
2148 	 *	Set the flags on our device.
2149 	 */
2150 
2151 	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
2152 			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
2153 			       IFF_AUTOMEDIA)) |
2154 		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
2155 				    IFF_ALLMULTI));
2156 
2157 	/*
2158 	 *	Load in the correct multicast list now the flags have changed.
2159 	 */
2160 
2161 	dev_mc_upload(dev);
2162 
2163 	/*
2164 	 *	Have we downed the interface. We handle IFF_UP ourselves
2165 	 *	according to user attempts to set it, rather than blindly
2166 	 *	setting it.
2167 	 */
2168 
2169 	ret = 0;
2170 	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
2171 		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
2172 
2173 		if (!ret)
2174 			dev_mc_upload(dev);
2175 	}
2176 
2177 	if (dev->flags & IFF_UP &&
2178 	    ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
2179 					  IFF_VOLATILE)))
2180 		notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
2181 
2182 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
2183 		int inc = (flags & IFF_PROMISC) ? +1 : -1;
2184 		dev->gflags ^= IFF_PROMISC;
2185 		dev_set_promiscuity(dev, inc);
2186 	}
2187 
2188 	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
2189 	   is important. Some (broken) drivers set IFF_PROMISC, when
2190 	   IFF_ALLMULTI is requested not asking us and not reporting.
2191 	 */
2192 	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
2193 		int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
2194 		dev->gflags ^= IFF_ALLMULTI;
2195 		dev_set_allmulti(dev, inc);
2196 	}
2197 
2198 	if (old_flags ^ dev->flags)
2199 		rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags);
2200 
2201 	return ret;
2202 }
2203 
2204 int dev_set_mtu(struct net_device *dev, int new_mtu)
2205 {
2206 	int err;
2207 
2208 	if (new_mtu == dev->mtu)
2209 		return 0;
2210 
2211 	/*	MTU must be positive.	 */
2212 	if (new_mtu < 0)
2213 		return -EINVAL;
2214 
2215 	if (!netif_device_present(dev))
2216 		return -ENODEV;
2217 
2218 	err = 0;
2219 	if (dev->change_mtu)
2220 		err = dev->change_mtu(dev, new_mtu);
2221 	else
2222 		dev->mtu = new_mtu;
2223 	if (!err && dev->flags & IFF_UP)
2224 		notifier_call_chain(&netdev_chain,
2225 				    NETDEV_CHANGEMTU, dev);
2226 	return err;
2227 }
2228 
2229 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
2230 {
2231 	int err;
2232 
2233 	if (!dev->set_mac_address)
2234 		return -EOPNOTSUPP;
2235 	if (sa->sa_family != dev->type)
2236 		return -EINVAL;
2237 	if (!netif_device_present(dev))
2238 		return -ENODEV;
2239 	err = dev->set_mac_address(dev, sa);
2240 	if (!err)
2241 		notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
2242 	return err;
2243 }
2244 
2245 /*
2246  *	Perform the SIOCxIFxxx calls.
2247  */
2248 static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
2249 {
2250 	int err;
2251 	struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
2252 
2253 	if (!dev)
2254 		return -ENODEV;
2255 
2256 	switch (cmd) {
2257 		case SIOCGIFFLAGS:	/* Get interface flags */
2258 			ifr->ifr_flags = dev_get_flags(dev);
2259 			return 0;
2260 
2261 		case SIOCSIFFLAGS:	/* Set interface flags */
2262 			return dev_change_flags(dev, ifr->ifr_flags);
2263 
2264 		case SIOCGIFMETRIC:	/* Get the metric on the interface
2265 					   (currently unused) */
2266 			ifr->ifr_metric = 0;
2267 			return 0;
2268 
2269 		case SIOCSIFMETRIC:	/* Set the metric on the interface
2270 					   (currently unused) */
2271 			return -EOPNOTSUPP;
2272 
2273 		case SIOCGIFMTU:	/* Get the MTU of a device */
2274 			ifr->ifr_mtu = dev->mtu;
2275 			return 0;
2276 
2277 		case SIOCSIFMTU:	/* Set the MTU of a device */
2278 			return dev_set_mtu(dev, ifr->ifr_mtu);
2279 
2280 		case SIOCGIFHWADDR:
2281 			if (!dev->addr_len)
2282 				memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
2283 			else
2284 				memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
2285 				       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2286 			ifr->ifr_hwaddr.sa_family = dev->type;
2287 			return 0;
2288 
2289 		case SIOCSIFHWADDR:
2290 			return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
2291 
2292 		case SIOCSIFHWBROADCAST:
2293 			if (ifr->ifr_hwaddr.sa_family != dev->type)
2294 				return -EINVAL;
2295 			memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
2296 			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2297 			notifier_call_chain(&netdev_chain,
2298 					    NETDEV_CHANGEADDR, dev);
2299 			return 0;
2300 
2301 		case SIOCGIFMAP:
2302 			ifr->ifr_map.mem_start = dev->mem_start;
2303 			ifr->ifr_map.mem_end   = dev->mem_end;
2304 			ifr->ifr_map.base_addr = dev->base_addr;
2305 			ifr->ifr_map.irq       = dev->irq;
2306 			ifr->ifr_map.dma       = dev->dma;
2307 			ifr->ifr_map.port      = dev->if_port;
2308 			return 0;
2309 
2310 		case SIOCSIFMAP:
2311 			if (dev->set_config) {
2312 				if (!netif_device_present(dev))
2313 					return -ENODEV;
2314 				return dev->set_config(dev, &ifr->ifr_map);
2315 			}
2316 			return -EOPNOTSUPP;
2317 
2318 		case SIOCADDMULTI:
2319 			if (!dev->set_multicast_list ||
2320 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2321 				return -EINVAL;
2322 			if (!netif_device_present(dev))
2323 				return -ENODEV;
2324 			return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
2325 					  dev->addr_len, 1);
2326 
2327 		case SIOCDELMULTI:
2328 			if (!dev->set_multicast_list ||
2329 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2330 				return -EINVAL;
2331 			if (!netif_device_present(dev))
2332 				return -ENODEV;
2333 			return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
2334 					     dev->addr_len, 1);
2335 
2336 		case SIOCGIFINDEX:
2337 			ifr->ifr_ifindex = dev->ifindex;
2338 			return 0;
2339 
2340 		case SIOCGIFTXQLEN:
2341 			ifr->ifr_qlen = dev->tx_queue_len;
2342 			return 0;
2343 
2344 		case SIOCSIFTXQLEN:
2345 			if (ifr->ifr_qlen < 0)
2346 				return -EINVAL;
2347 			dev->tx_queue_len = ifr->ifr_qlen;
2348 			return 0;
2349 
2350 		case SIOCSIFNAME:
2351 			ifr->ifr_newname[IFNAMSIZ-1] = '\0';
2352 			return dev_change_name(dev, ifr->ifr_newname);
2353 
2354 		/*
2355 		 *	Unknown or private ioctl
2356 		 */
2357 
2358 		default:
2359 			if ((cmd >= SIOCDEVPRIVATE &&
2360 			    cmd <= SIOCDEVPRIVATE + 15) ||
2361 			    cmd == SIOCBONDENSLAVE ||
2362 			    cmd == SIOCBONDRELEASE ||
2363 			    cmd == SIOCBONDSETHWADDR ||
2364 			    cmd == SIOCBONDSLAVEINFOQUERY ||
2365 			    cmd == SIOCBONDINFOQUERY ||
2366 			    cmd == SIOCBONDCHANGEACTIVE ||
2367 			    cmd == SIOCGMIIPHY ||
2368 			    cmd == SIOCGMIIREG ||
2369 			    cmd == SIOCSMIIREG ||
2370 			    cmd == SIOCBRADDIF ||
2371 			    cmd == SIOCBRDELIF ||
2372 			    cmd == SIOCWANDEV) {
2373 				err = -EOPNOTSUPP;
2374 				if (dev->do_ioctl) {
2375 					if (netif_device_present(dev))
2376 						err = dev->do_ioctl(dev, ifr,
2377 								    cmd);
2378 					else
2379 						err = -ENODEV;
2380 				}
2381 			} else
2382 				err = -EINVAL;
2383 
2384 	}
2385 	return err;
2386 }
2387 
2388 /*
2389  *	This function handles all "interface"-type I/O control requests. The actual
2390  *	'doing' part of this is dev_ifsioc above.
2391  */
2392 
2393 /**
2394  *	dev_ioctl	-	network device ioctl
2395  *	@cmd: command to issue
2396  *	@arg: pointer to a struct ifreq in user space
2397  *
2398  *	Issue ioctl functions to devices. This is normally called by the
2399  *	user space syscall interfaces but can sometimes be useful for
2400  *	other purposes. The return value is the return from the syscall if
2401  *	positive or a negative errno code on error.
2402  */
2403 
2404 int dev_ioctl(unsigned int cmd, void __user *arg)
2405 {
2406 	struct ifreq ifr;
2407 	int ret;
2408 	char *colon;
2409 
2410 	/* One special case: SIOCGIFCONF takes ifconf argument
2411 	   and requires shared lock, because it sleeps writing
2412 	   to user space.
2413 	 */
2414 
2415 	if (cmd == SIOCGIFCONF) {
2416 		rtnl_shlock();
2417 		ret = dev_ifconf((char __user *) arg);
2418 		rtnl_shunlock();
2419 		return ret;
2420 	}
2421 	if (cmd == SIOCGIFNAME)
2422 		return dev_ifname((struct ifreq __user *)arg);
2423 
2424 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2425 		return -EFAULT;
2426 
2427 	ifr.ifr_name[IFNAMSIZ-1] = 0;
2428 
2429 	colon = strchr(ifr.ifr_name, ':');
2430 	if (colon)
2431 		*colon = 0;
2432 
2433 	/*
2434 	 *	See which interface the caller is talking about.
2435 	 */
2436 
2437 	switch (cmd) {
2438 		/*
2439 		 *	These ioctl calls:
2440 		 *	- can be done by all.
2441 		 *	- atomic and do not require locking.
2442 		 *	- return a value
2443 		 */
2444 		case SIOCGIFFLAGS:
2445 		case SIOCGIFMETRIC:
2446 		case SIOCGIFMTU:
2447 		case SIOCGIFHWADDR:
2448 		case SIOCGIFSLAVE:
2449 		case SIOCGIFMAP:
2450 		case SIOCGIFINDEX:
2451 		case SIOCGIFTXQLEN:
2452 			dev_load(ifr.ifr_name);
2453 			read_lock(&dev_base_lock);
2454 			ret = dev_ifsioc(&ifr, cmd);
2455 			read_unlock(&dev_base_lock);
2456 			if (!ret) {
2457 				if (colon)
2458 					*colon = ':';
2459 				if (copy_to_user(arg, &ifr,
2460 						 sizeof(struct ifreq)))
2461 					ret = -EFAULT;
2462 			}
2463 			return ret;
2464 
2465 		case SIOCETHTOOL:
2466 			dev_load(ifr.ifr_name);
2467 			rtnl_lock();
2468 			ret = dev_ethtool(&ifr);
2469 			rtnl_unlock();
2470 			if (!ret) {
2471 				if (colon)
2472 					*colon = ':';
2473 				if (copy_to_user(arg, &ifr,
2474 						 sizeof(struct ifreq)))
2475 					ret = -EFAULT;
2476 			}
2477 			return ret;
2478 
2479 		/*
2480 		 *	These ioctl calls:
2481 		 *	- require superuser power.
2482 		 *	- require strict serialization.
2483 		 *	- return a value
2484 		 */
2485 		case SIOCGMIIPHY:
2486 		case SIOCGMIIREG:
2487 		case SIOCSIFNAME:
2488 			if (!capable(CAP_NET_ADMIN))
2489 				return -EPERM;
2490 			dev_load(ifr.ifr_name);
2491 			rtnl_lock();
2492 			ret = dev_ifsioc(&ifr, cmd);
2493 			rtnl_unlock();
2494 			if (!ret) {
2495 				if (colon)
2496 					*colon = ':';
2497 				if (copy_to_user(arg, &ifr,
2498 						 sizeof(struct ifreq)))
2499 					ret = -EFAULT;
2500 			}
2501 			return ret;
2502 
2503 		/*
2504 		 *	These ioctl calls:
2505 		 *	- require superuser power.
2506 		 *	- require strict serialization.
2507 		 *	- do not return a value
2508 		 */
2509 		case SIOCSIFFLAGS:
2510 		case SIOCSIFMETRIC:
2511 		case SIOCSIFMTU:
2512 		case SIOCSIFMAP:
2513 		case SIOCSIFHWADDR:
2514 		case SIOCSIFSLAVE:
2515 		case SIOCADDMULTI:
2516 		case SIOCDELMULTI:
2517 		case SIOCSIFHWBROADCAST:
2518 		case SIOCSIFTXQLEN:
2519 		case SIOCSMIIREG:
2520 		case SIOCBONDENSLAVE:
2521 		case SIOCBONDRELEASE:
2522 		case SIOCBONDSETHWADDR:
2523 		case SIOCBONDSLAVEINFOQUERY:
2524 		case SIOCBONDINFOQUERY:
2525 		case SIOCBONDCHANGEACTIVE:
2526 		case SIOCBRADDIF:
2527 		case SIOCBRDELIF:
2528 			if (!capable(CAP_NET_ADMIN))
2529 				return -EPERM;
2530 			dev_load(ifr.ifr_name);
2531 			rtnl_lock();
2532 			ret = dev_ifsioc(&ifr, cmd);
2533 			rtnl_unlock();
2534 			return ret;
2535 
2536 		case SIOCGIFMEM:
2537 			/* Get the per device memory space. We can add this but
2538 			 * currently do not support it */
2539 		case SIOCSIFMEM:
2540 			/* Set the per device memory buffer space.
2541 			 * Not applicable in our case */
2542 		case SIOCSIFLINK:
2543 			return -EINVAL;
2544 
2545 		/*
2546 		 *	Unknown or private ioctl.
2547 		 */
2548 		default:
2549 			if (cmd == SIOCWANDEV ||
2550 			    (cmd >= SIOCDEVPRIVATE &&
2551 			     cmd <= SIOCDEVPRIVATE + 15)) {
2552 				dev_load(ifr.ifr_name);
2553 				rtnl_lock();
2554 				ret = dev_ifsioc(&ifr, cmd);
2555 				rtnl_unlock();
2556 				if (!ret && copy_to_user(arg, &ifr,
2557 							 sizeof(struct ifreq)))
2558 					ret = -EFAULT;
2559 				return ret;
2560 			}
2561 #ifdef WIRELESS_EXT
2562 			/* Take care of Wireless Extensions */
2563 			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
2564 				/* If command is `set a parameter', or
2565 				 * `get the encoding parameters', check if
2566 				 * the user has the right to do it */
2567 				if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE) {
2568 					if (!capable(CAP_NET_ADMIN))
2569 						return -EPERM;
2570 				}
2571 				dev_load(ifr.ifr_name);
2572 				rtnl_lock();
2573 				/* Follow me in net/core/wireless.c */
2574 				ret = wireless_process_ioctl(&ifr, cmd);
2575 				rtnl_unlock();
2576 				if (IW_IS_GET(cmd) &&
2577 				    copy_to_user(arg, &ifr,
2578 					    	 sizeof(struct ifreq)))
2579 					ret = -EFAULT;
2580 				return ret;
2581 			}
2582 #endif	/* WIRELESS_EXT */
2583 			return -EINVAL;
2584 	}
2585 }
2586 
2587 
2588 /**
2589  *	dev_new_index	-	allocate an ifindex
2590  *
2591  *	Returns a suitable unique value for a new device interface
2592  *	number.  The caller must hold the rtnl semaphore or the
2593  *	dev_base_lock to be sure it remains unique.
2594  */
2595 static int dev_new_index(void)
2596 {
2597 	static int ifindex;
2598 	for (;;) {
2599 		if (++ifindex <= 0)
2600 			ifindex = 1;
2601 		if (!__dev_get_by_index(ifindex))
2602 			return ifindex;
2603 	}
2604 }
2605 
2606 static int dev_boot_phase = 1;
2607 
2608 /* Delayed registration/unregisteration */
2609 static DEFINE_SPINLOCK(net_todo_list_lock);
2610 static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
2611 
2612 static inline void net_set_todo(struct net_device *dev)
2613 {
2614 	spin_lock(&net_todo_list_lock);
2615 	list_add_tail(&dev->todo_list, &net_todo_list);
2616 	spin_unlock(&net_todo_list_lock);
2617 }
2618 
2619 /**
2620  *	register_netdevice	- register a network device
2621  *	@dev: device to register
2622  *
2623  *	Take a completed network device structure and add it to the kernel
2624  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2625  *	chain. 0 is returned on success. A negative errno code is returned
2626  *	on a failure to set up the device, or if the name is a duplicate.
2627  *
2628  *	Callers must hold the rtnl semaphore. You may want
2629  *	register_netdev() instead of this.
2630  *
2631  *	BUGS:
2632  *	The locking appears insufficient to guarantee two parallel registers
2633  *	will not get the same name.
2634  */
2635 
2636 int register_netdevice(struct net_device *dev)
2637 {
2638 	struct hlist_head *head;
2639 	struct hlist_node *p;
2640 	int ret;
2641 
2642 	BUG_ON(dev_boot_phase);
2643 	ASSERT_RTNL();
2644 
2645 	/* When net_device's are persistent, this will be fatal. */
2646 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
2647 
2648 	spin_lock_init(&dev->queue_lock);
2649 	spin_lock_init(&dev->xmit_lock);
2650 	dev->xmit_lock_owner = -1;
2651 #ifdef CONFIG_NET_CLS_ACT
2652 	spin_lock_init(&dev->ingress_lock);
2653 #endif
2654 
2655 	ret = alloc_divert_blk(dev);
2656 	if (ret)
2657 		goto out;
2658 
2659 	dev->iflink = -1;
2660 
2661 	/* Init, if this function is available */
2662 	if (dev->init) {
2663 		ret = dev->init(dev);
2664 		if (ret) {
2665 			if (ret > 0)
2666 				ret = -EIO;
2667 			goto out_err;
2668 		}
2669 	}
2670 
2671 	if (!dev_valid_name(dev->name)) {
2672 		ret = -EINVAL;
2673 		goto out_err;
2674 	}
2675 
2676 	dev->ifindex = dev_new_index();
2677 	if (dev->iflink == -1)
2678 		dev->iflink = dev->ifindex;
2679 
2680 	/* Check for existence of name */
2681 	head = dev_name_hash(dev->name);
2682 	hlist_for_each(p, head) {
2683 		struct net_device *d
2684 			= hlist_entry(p, struct net_device, name_hlist);
2685 		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
2686 			ret = -EEXIST;
2687  			goto out_err;
2688 		}
2689  	}
2690 
2691 	/* Fix illegal SG+CSUM combinations. */
2692 	if ((dev->features & NETIF_F_SG) &&
2693 	    !(dev->features & (NETIF_F_IP_CSUM |
2694 			       NETIF_F_NO_CSUM |
2695 			       NETIF_F_HW_CSUM))) {
2696 		printk("%s: Dropping NETIF_F_SG since no checksum feature.\n",
2697 		       dev->name);
2698 		dev->features &= ~NETIF_F_SG;
2699 	}
2700 
2701 	/* TSO requires that SG is present as well. */
2702 	if ((dev->features & NETIF_F_TSO) &&
2703 	    !(dev->features & NETIF_F_SG)) {
2704 		printk("%s: Dropping NETIF_F_TSO since no SG feature.\n",
2705 		       dev->name);
2706 		dev->features &= ~NETIF_F_TSO;
2707 	}
2708 
2709 	/*
2710 	 *	nil rebuild_header routine,
2711 	 *	that should be never called and used as just bug trap.
2712 	 */
2713 
2714 	if (!dev->rebuild_header)
2715 		dev->rebuild_header = default_rebuild_header;
2716 
2717 	/*
2718 	 *	Default initial state at registry is that the
2719 	 *	device is present.
2720 	 */
2721 
2722 	set_bit(__LINK_STATE_PRESENT, &dev->state);
2723 
2724 	dev->next = NULL;
2725 	dev_init_scheduler(dev);
2726 	write_lock_bh(&dev_base_lock);
2727 	*dev_tail = dev;
2728 	dev_tail = &dev->next;
2729 	hlist_add_head(&dev->name_hlist, head);
2730 	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
2731 	dev_hold(dev);
2732 	dev->reg_state = NETREG_REGISTERING;
2733 	write_unlock_bh(&dev_base_lock);
2734 
2735 	/* Notify protocols, that a new device appeared. */
2736 	notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
2737 
2738 	/* Finish registration after unlock */
2739 	net_set_todo(dev);
2740 	ret = 0;
2741 
2742 out:
2743 	return ret;
2744 out_err:
2745 	free_divert_blk(dev);
2746 	goto out;
2747 }
2748 
2749 /**
2750  *	register_netdev	- register a network device
2751  *	@dev: device to register
2752  *
2753  *	Take a completed network device structure and add it to the kernel
2754  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2755  *	chain. 0 is returned on success. A negative errno code is returned
2756  *	on a failure to set up the device, or if the name is a duplicate.
2757  *
2758  *	This is a wrapper around register_netdev that takes the rtnl semaphore
2759  *	and expands the device name if you passed a format string to
2760  *	alloc_netdev.
2761  */
2762 int register_netdev(struct net_device *dev)
2763 {
2764 	int err;
2765 
2766 	rtnl_lock();
2767 
2768 	/*
2769 	 * If the name is a format string the caller wants us to do a
2770 	 * name allocation.
2771 	 */
2772 	if (strchr(dev->name, '%')) {
2773 		err = dev_alloc_name(dev, dev->name);
2774 		if (err < 0)
2775 			goto out;
2776 	}
2777 
2778 	/*
2779 	 * Back compatibility hook. Kill this one in 2.5
2780 	 */
2781 	if (dev->name[0] == 0 || dev->name[0] == ' ') {
2782 		err = dev_alloc_name(dev, "eth%d");
2783 		if (err < 0)
2784 			goto out;
2785 	}
2786 
2787 	err = register_netdevice(dev);
2788 out:
2789 	rtnl_unlock();
2790 	return err;
2791 }
2792 EXPORT_SYMBOL(register_netdev);
2793 
2794 /*
2795  * netdev_wait_allrefs - wait until all references are gone.
2796  *
2797  * This is called when unregistering network devices.
2798  *
2799  * Any protocol or device that holds a reference should register
2800  * for netdevice notification, and cleanup and put back the
2801  * reference if they receive an UNREGISTER event.
2802  * We can get stuck here if buggy protocols don't correctly
2803  * call dev_put.
2804  */
2805 static void netdev_wait_allrefs(struct net_device *dev)
2806 {
2807 	unsigned long rebroadcast_time, warning_time;
2808 
2809 	rebroadcast_time = warning_time = jiffies;
2810 	while (atomic_read(&dev->refcnt) != 0) {
2811 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
2812 			rtnl_shlock();
2813 
2814 			/* Rebroadcast unregister notification */
2815 			notifier_call_chain(&netdev_chain,
2816 					    NETDEV_UNREGISTER, dev);
2817 
2818 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
2819 				     &dev->state)) {
2820 				/* We must not have linkwatch events
2821 				 * pending on unregister. If this
2822 				 * happens, we simply run the queue
2823 				 * unscheduled, resulting in a noop
2824 				 * for this device.
2825 				 */
2826 				linkwatch_run_queue();
2827 			}
2828 
2829 			rtnl_shunlock();
2830 
2831 			rebroadcast_time = jiffies;
2832 		}
2833 
2834 		msleep(250);
2835 
2836 		if (time_after(jiffies, warning_time + 10 * HZ)) {
2837 			printk(KERN_EMERG "unregister_netdevice: "
2838 			       "waiting for %s to become free. Usage "
2839 			       "count = %d\n",
2840 			       dev->name, atomic_read(&dev->refcnt));
2841 			warning_time = jiffies;
2842 		}
2843 	}
2844 }
2845 
2846 /* The sequence is:
2847  *
2848  *	rtnl_lock();
2849  *	...
2850  *	register_netdevice(x1);
2851  *	register_netdevice(x2);
2852  *	...
2853  *	unregister_netdevice(y1);
2854  *	unregister_netdevice(y2);
2855  *      ...
2856  *	rtnl_unlock();
2857  *	free_netdev(y1);
2858  *	free_netdev(y2);
2859  *
2860  * We are invoked by rtnl_unlock() after it drops the semaphore.
2861  * This allows us to deal with problems:
2862  * 1) We can create/delete sysfs objects which invoke hotplug
2863  *    without deadlocking with linkwatch via keventd.
2864  * 2) Since we run with the RTNL semaphore not held, we can sleep
2865  *    safely in order to wait for the netdev refcnt to drop to zero.
2866  */
2867 static DECLARE_MUTEX(net_todo_run_mutex);
2868 void netdev_run_todo(void)
2869 {
2870 	struct list_head list = LIST_HEAD_INIT(list);
2871 	int err;
2872 
2873 
2874 	/* Need to guard against multiple cpu's getting out of order. */
2875 	down(&net_todo_run_mutex);
2876 
2877 	/* Not safe to do outside the semaphore.  We must not return
2878 	 * until all unregister events invoked by the local processor
2879 	 * have been completed (either by this todo run, or one on
2880 	 * another cpu).
2881 	 */
2882 	if (list_empty(&net_todo_list))
2883 		goto out;
2884 
2885 	/* Snapshot list, allow later requests */
2886 	spin_lock(&net_todo_list_lock);
2887 	list_splice_init(&net_todo_list, &list);
2888 	spin_unlock(&net_todo_list_lock);
2889 
2890 	while (!list_empty(&list)) {
2891 		struct net_device *dev
2892 			= list_entry(list.next, struct net_device, todo_list);
2893 		list_del(&dev->todo_list);
2894 
2895 		switch(dev->reg_state) {
2896 		case NETREG_REGISTERING:
2897 			err = netdev_register_sysfs(dev);
2898 			if (err)
2899 				printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
2900 				       dev->name, err);
2901 			dev->reg_state = NETREG_REGISTERED;
2902 			break;
2903 
2904 		case NETREG_UNREGISTERING:
2905 			netdev_unregister_sysfs(dev);
2906 			dev->reg_state = NETREG_UNREGISTERED;
2907 
2908 			netdev_wait_allrefs(dev);
2909 
2910 			/* paranoia */
2911 			BUG_ON(atomic_read(&dev->refcnt));
2912 			BUG_TRAP(!dev->ip_ptr);
2913 			BUG_TRAP(!dev->ip6_ptr);
2914 			BUG_TRAP(!dev->dn_ptr);
2915 
2916 
2917 			/* It must be the very last action,
2918 			 * after this 'dev' may point to freed up memory.
2919 			 */
2920 			if (dev->destructor)
2921 				dev->destructor(dev);
2922 			break;
2923 
2924 		default:
2925 			printk(KERN_ERR "network todo '%s' but state %d\n",
2926 			       dev->name, dev->reg_state);
2927 			break;
2928 		}
2929 	}
2930 
2931 out:
2932 	up(&net_todo_run_mutex);
2933 }
2934 
2935 /**
2936  *	alloc_netdev - allocate network device
2937  *	@sizeof_priv:	size of private data to allocate space for
2938  *	@name:		device name format string
2939  *	@setup:		callback to initialize device
2940  *
2941  *	Allocates a struct net_device with private data area for driver use
2942  *	and performs basic initialization.
2943  */
2944 struct net_device *alloc_netdev(int sizeof_priv, const char *name,
2945 		void (*setup)(struct net_device *))
2946 {
2947 	void *p;
2948 	struct net_device *dev;
2949 	int alloc_size;
2950 
2951 	/* ensure 32-byte alignment of both the device and private area */
2952 	alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
2953 	alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
2954 
2955 	p = kmalloc(alloc_size, GFP_KERNEL);
2956 	if (!p) {
2957 		printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
2958 		return NULL;
2959 	}
2960 	memset(p, 0, alloc_size);
2961 
2962 	dev = (struct net_device *)
2963 		(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
2964 	dev->padded = (char *)dev - (char *)p;
2965 
2966 	if (sizeof_priv)
2967 		dev->priv = netdev_priv(dev);
2968 
2969 	setup(dev);
2970 	strcpy(dev->name, name);
2971 	return dev;
2972 }
2973 EXPORT_SYMBOL(alloc_netdev);
2974 
2975 /**
2976  *	free_netdev - free network device
2977  *	@dev: device
2978  *
2979  *	This function does the last stage of destroying an allocated device
2980  * 	interface. The reference to the device object is released.
2981  *	If this is the last reference then it will be freed.
2982  */
2983 void free_netdev(struct net_device *dev)
2984 {
2985 #ifdef CONFIG_SYSFS
2986 	/*  Compatiablity with error handling in drivers */
2987 	if (dev->reg_state == NETREG_UNINITIALIZED) {
2988 		kfree((char *)dev - dev->padded);
2989 		return;
2990 	}
2991 
2992 	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
2993 	dev->reg_state = NETREG_RELEASED;
2994 
2995 	/* will free via class release */
2996 	class_device_put(&dev->class_dev);
2997 #else
2998 	kfree((char *)dev - dev->padded);
2999 #endif
3000 }
3001 
3002 /* Synchronize with packet receive processing. */
3003 void synchronize_net(void)
3004 {
3005 	might_sleep();
3006 	synchronize_rcu();
3007 }
3008 
3009 /**
3010  *	unregister_netdevice - remove device from the kernel
3011  *	@dev: device
3012  *
3013  *	This function shuts down a device interface and removes it
3014  *	from the kernel tables. On success 0 is returned, on a failure
3015  *	a negative errno code is returned.
3016  *
3017  *	Callers must hold the rtnl semaphore.  You may want
3018  *	unregister_netdev() instead of this.
3019  */
3020 
3021 int unregister_netdevice(struct net_device *dev)
3022 {
3023 	struct net_device *d, **dp;
3024 
3025 	BUG_ON(dev_boot_phase);
3026 	ASSERT_RTNL();
3027 
3028 	/* Some devices call without registering for initialization unwind. */
3029 	if (dev->reg_state == NETREG_UNINITIALIZED) {
3030 		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3031 				  "was registered\n", dev->name, dev);
3032 		return -ENODEV;
3033 	}
3034 
3035 	BUG_ON(dev->reg_state != NETREG_REGISTERED);
3036 
3037 	/* If device is running, close it first. */
3038 	if (dev->flags & IFF_UP)
3039 		dev_close(dev);
3040 
3041 	/* And unlink it from device chain. */
3042 	for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) {
3043 		if (d == dev) {
3044 			write_lock_bh(&dev_base_lock);
3045 			hlist_del(&dev->name_hlist);
3046 			hlist_del(&dev->index_hlist);
3047 			if (dev_tail == &dev->next)
3048 				dev_tail = dp;
3049 			*dp = d->next;
3050 			write_unlock_bh(&dev_base_lock);
3051 			break;
3052 		}
3053 	}
3054 	if (!d) {
3055 		printk(KERN_ERR "unregister net_device: '%s' not found\n",
3056 		       dev->name);
3057 		return -ENODEV;
3058 	}
3059 
3060 	dev->reg_state = NETREG_UNREGISTERING;
3061 
3062 	synchronize_net();
3063 
3064 	/* Shutdown queueing discipline. */
3065 	dev_shutdown(dev);
3066 
3067 
3068 	/* Notify protocols, that we are about to destroy
3069 	   this device. They should clean all the things.
3070 	*/
3071 	notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
3072 
3073 	/*
3074 	 *	Flush the multicast chain
3075 	 */
3076 	dev_mc_discard(dev);
3077 
3078 	if (dev->uninit)
3079 		dev->uninit(dev);
3080 
3081 	/* Notifier chain MUST detach us from master device. */
3082 	BUG_TRAP(!dev->master);
3083 
3084 	free_divert_blk(dev);
3085 
3086 	/* Finish processing unregister after unlock */
3087 	net_set_todo(dev);
3088 
3089 	synchronize_net();
3090 
3091 	dev_put(dev);
3092 	return 0;
3093 }
3094 
3095 /**
3096  *	unregister_netdev - remove device from the kernel
3097  *	@dev: device
3098  *
3099  *	This function shuts down a device interface and removes it
3100  *	from the kernel tables. On success 0 is returned, on a failure
3101  *	a negative errno code is returned.
3102  *
3103  *	This is just a wrapper for unregister_netdevice that takes
3104  *	the rtnl semaphore.  In general you want to use this and not
3105  *	unregister_netdevice.
3106  */
3107 void unregister_netdev(struct net_device *dev)
3108 {
3109 	rtnl_lock();
3110 	unregister_netdevice(dev);
3111 	rtnl_unlock();
3112 }
3113 
3114 EXPORT_SYMBOL(unregister_netdev);
3115 
3116 #ifdef CONFIG_HOTPLUG_CPU
3117 static int dev_cpu_callback(struct notifier_block *nfb,
3118 			    unsigned long action,
3119 			    void *ocpu)
3120 {
3121 	struct sk_buff **list_skb;
3122 	struct net_device **list_net;
3123 	struct sk_buff *skb;
3124 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
3125 	struct softnet_data *sd, *oldsd;
3126 
3127 	if (action != CPU_DEAD)
3128 		return NOTIFY_OK;
3129 
3130 	local_irq_disable();
3131 	cpu = smp_processor_id();
3132 	sd = &per_cpu(softnet_data, cpu);
3133 	oldsd = &per_cpu(softnet_data, oldcpu);
3134 
3135 	/* Find end of our completion_queue. */
3136 	list_skb = &sd->completion_queue;
3137 	while (*list_skb)
3138 		list_skb = &(*list_skb)->next;
3139 	/* Append completion queue from offline CPU. */
3140 	*list_skb = oldsd->completion_queue;
3141 	oldsd->completion_queue = NULL;
3142 
3143 	/* Find end of our output_queue. */
3144 	list_net = &sd->output_queue;
3145 	while (*list_net)
3146 		list_net = &(*list_net)->next_sched;
3147 	/* Append output queue from offline CPU. */
3148 	*list_net = oldsd->output_queue;
3149 	oldsd->output_queue = NULL;
3150 
3151 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
3152 	local_irq_enable();
3153 
3154 	/* Process offline CPU's input_pkt_queue */
3155 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
3156 		netif_rx(skb);
3157 
3158 	return NOTIFY_OK;
3159 }
3160 #endif /* CONFIG_HOTPLUG_CPU */
3161 
3162 
3163 /*
3164  *	Initialize the DEV module. At boot time this walks the device list and
3165  *	unhooks any devices that fail to initialise (normally hardware not
3166  *	present) and leaves us with a valid list of present and active devices.
3167  *
3168  */
3169 
3170 /*
3171  *       This is called single threaded during boot, so no need
3172  *       to take the rtnl semaphore.
3173  */
3174 static int __init net_dev_init(void)
3175 {
3176 	int i, rc = -ENOMEM;
3177 
3178 	BUG_ON(!dev_boot_phase);
3179 
3180 	net_random_init();
3181 
3182 	if (dev_proc_init())
3183 		goto out;
3184 
3185 	if (netdev_sysfs_init())
3186 		goto out;
3187 
3188 	INIT_LIST_HEAD(&ptype_all);
3189 	for (i = 0; i < 16; i++)
3190 		INIT_LIST_HEAD(&ptype_base[i]);
3191 
3192 	for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
3193 		INIT_HLIST_HEAD(&dev_name_head[i]);
3194 
3195 	for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
3196 		INIT_HLIST_HEAD(&dev_index_head[i]);
3197 
3198 	/*
3199 	 *	Initialise the packet receive queues.
3200 	 */
3201 
3202 	for (i = 0; i < NR_CPUS; i++) {
3203 		struct softnet_data *queue;
3204 
3205 		queue = &per_cpu(softnet_data, i);
3206 		skb_queue_head_init(&queue->input_pkt_queue);
3207 		queue->completion_queue = NULL;
3208 		INIT_LIST_HEAD(&queue->poll_list);
3209 		set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
3210 		queue->backlog_dev.weight = weight_p;
3211 		queue->backlog_dev.poll = process_backlog;
3212 		atomic_set(&queue->backlog_dev.refcnt, 1);
3213 	}
3214 
3215 	dev_boot_phase = 0;
3216 
3217 	open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
3218 	open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
3219 
3220 	hotcpu_notifier(dev_cpu_callback, 0);
3221 	dst_init();
3222 	dev_mcast_init();
3223 	rc = 0;
3224 out:
3225 	return rc;
3226 }
3227 
3228 subsys_initcall(net_dev_init);
3229 
3230 EXPORT_SYMBOL(__dev_get_by_index);
3231 EXPORT_SYMBOL(__dev_get_by_name);
3232 EXPORT_SYMBOL(__dev_remove_pack);
3233 EXPORT_SYMBOL(__skb_linearize);
3234 EXPORT_SYMBOL(dev_add_pack);
3235 EXPORT_SYMBOL(dev_alloc_name);
3236 EXPORT_SYMBOL(dev_close);
3237 EXPORT_SYMBOL(dev_get_by_flags);
3238 EXPORT_SYMBOL(dev_get_by_index);
3239 EXPORT_SYMBOL(dev_get_by_name);
3240 EXPORT_SYMBOL(dev_ioctl);
3241 EXPORT_SYMBOL(dev_open);
3242 EXPORT_SYMBOL(dev_queue_xmit);
3243 EXPORT_SYMBOL(dev_remove_pack);
3244 EXPORT_SYMBOL(dev_set_allmulti);
3245 EXPORT_SYMBOL(dev_set_promiscuity);
3246 EXPORT_SYMBOL(dev_change_flags);
3247 EXPORT_SYMBOL(dev_set_mtu);
3248 EXPORT_SYMBOL(dev_set_mac_address);
3249 EXPORT_SYMBOL(free_netdev);
3250 EXPORT_SYMBOL(netdev_boot_setup_check);
3251 EXPORT_SYMBOL(netdev_set_master);
3252 EXPORT_SYMBOL(netdev_state_change);
3253 EXPORT_SYMBOL(netif_receive_skb);
3254 EXPORT_SYMBOL(netif_rx);
3255 EXPORT_SYMBOL(register_gifconf);
3256 EXPORT_SYMBOL(register_netdevice);
3257 EXPORT_SYMBOL(register_netdevice_notifier);
3258 EXPORT_SYMBOL(skb_checksum_help);
3259 EXPORT_SYMBOL(synchronize_net);
3260 EXPORT_SYMBOL(unregister_netdevice);
3261 EXPORT_SYMBOL(unregister_netdevice_notifier);
3262 EXPORT_SYMBOL(net_enable_timestamp);
3263 EXPORT_SYMBOL(net_disable_timestamp);
3264 EXPORT_SYMBOL(dev_get_flags);
3265 
3266 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
3267 EXPORT_SYMBOL(br_handle_frame_hook);
3268 EXPORT_SYMBOL(br_fdb_get_hook);
3269 EXPORT_SYMBOL(br_fdb_put_hook);
3270 #endif
3271 
3272 #ifdef CONFIG_KMOD
3273 EXPORT_SYMBOL(dev_load);
3274 #endif
3275 
3276 EXPORT_PER_CPU_SYMBOL(softnet_data);
3277