xref: /linux/net/core/dev.c (revision 2277ab4a1df50e05bc732fe9488d4e902bb8399a)
1 /*
2  * 	NET3	Protocol independent device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the non IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Florian la Roche <rzsfl@rz.uni-sb.de>
16  *		Alan Cox <gw4pts@gw4pts.ampr.org>
17  *		David Hinds <dahinds@users.sourceforge.net>
18  *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19  *		Adam Sulmicki <adam@cfar.umd.edu>
20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
21  *
22  *	Changes:
23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
24  *              			to 2 if register_netdev gets called
25  *              			before net_dev_init & also removed a
26  *              			few lines of code in the process.
27  *		Alan Cox	:	device private ioctl copies fields back.
28  *		Alan Cox	:	Transmit queue code does relevant
29  *					stunts to keep the queue safe.
30  *		Alan Cox	:	Fixed double lock.
31  *		Alan Cox	:	Fixed promisc NULL pointer trap
32  *		????????	:	Support the full private ioctl range
33  *		Alan Cox	:	Moved ioctl permission check into
34  *					drivers
35  *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
36  *		Alan Cox	:	100 backlog just doesn't cut it when
37  *					you start doing multicast video 8)
38  *		Alan Cox	:	Rewrote net_bh and list manager.
39  *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
40  *		Alan Cox	:	Took out transmit every packet pass
41  *					Saved a few bytes in the ioctl handler
42  *		Alan Cox	:	Network driver sets packet type before
43  *					calling netif_rx. Saves a function
44  *					call a packet.
45  *		Alan Cox	:	Hashed net_bh()
46  *		Richard Kooijman:	Timestamp fixes.
47  *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
48  *		Alan Cox	:	Device lock protection.
49  *		Alan Cox	: 	Fixed nasty side effect of device close
50  *					changes.
51  *		Rudi Cilibrasi	:	Pass the right thing to
52  *					set_mac_address()
53  *		Dave Miller	:	32bit quantity for the device lock to
54  *					make it work out on a Sparc.
55  *		Bjorn Ekwall	:	Added KERNELD hack.
56  *		Alan Cox	:	Cleaned up the backlog initialise.
57  *		Craig Metz	:	SIOCGIFCONF fix if space for under
58  *					1 device.
59  *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
60  *					is no device open function.
61  *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
62  *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
63  *		Cyrus Durgin	:	Cleaned for KMOD
64  *		Adam Sulmicki   :	Bug Fix : Network Device Unload
65  *					A network device unload needs to purge
66  *					the backlog queue.
67  *	Paul Rusty Russell	:	SIOCSIFNAME
68  *              Pekka Riikonen  :	Netdev boot-time settings code
69  *              Andrew Morton   :       Make unregister_netdevice wait
70  *              			indefinitely on dev->refcnt
71  * 		J Hadi Salim	:	- Backlog queue sampling
72  *				        - netif_rx() feedback
73  */
74 
75 #include <asm/uaccess.h>
76 #include <asm/system.h>
77 #include <linux/bitops.h>
78 #include <linux/capability.h>
79 #include <linux/cpu.h>
80 #include <linux/types.h>
81 #include <linux/kernel.h>
82 #include <linux/sched.h>
83 #include <linux/mutex.h>
84 #include <linux/string.h>
85 #include <linux/mm.h>
86 #include <linux/socket.h>
87 #include <linux/sockios.h>
88 #include <linux/errno.h>
89 #include <linux/interrupt.h>
90 #include <linux/if_ether.h>
91 #include <linux/netdevice.h>
92 #include <linux/etherdevice.h>
93 #include <linux/ethtool.h>
94 #include <linux/notifier.h>
95 #include <linux/skbuff.h>
96 #include <net/net_namespace.h>
97 #include <net/sock.h>
98 #include <linux/rtnetlink.h>
99 #include <linux/proc_fs.h>
100 #include <linux/seq_file.h>
101 #include <linux/stat.h>
102 #include <linux/if_bridge.h>
103 #include <linux/if_macvlan.h>
104 #include <net/dst.h>
105 #include <net/pkt_sched.h>
106 #include <net/checksum.h>
107 #include <linux/highmem.h>
108 #include <linux/init.h>
109 #include <linux/kmod.h>
110 #include <linux/module.h>
111 #include <linux/netpoll.h>
112 #include <linux/rcupdate.h>
113 #include <linux/delay.h>
114 #include <net/wext.h>
115 #include <net/iw_handler.h>
116 #include <asm/current.h>
117 #include <linux/audit.h>
118 #include <linux/dmaengine.h>
119 #include <linux/err.h>
120 #include <linux/ctype.h>
121 #include <linux/if_arp.h>
122 #include <linux/if_vlan.h>
123 #include <linux/ip.h>
124 #include <net/ip.h>
125 #include <linux/ipv6.h>
126 #include <linux/in.h>
127 #include <linux/jhash.h>
128 #include <linux/random.h>
129 #include <trace/events/napi.h>
130 
131 #include "net-sysfs.h"
132 
133 /* Instead of increasing this, you should create a hash table. */
134 #define MAX_GRO_SKBS 8
135 
136 /* This should be increased if a protocol with a bigger head is added. */
137 #define GRO_MAX_HEAD (MAX_HEADER + 128)
138 
139 /*
140  *	The list of packet types we will receive (as opposed to discard)
141  *	and the routines to invoke.
142  *
143  *	Why 16. Because with 16 the only overlap we get on a hash of the
144  *	low nibble of the protocol value is RARP/SNAP/X.25.
145  *
146  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
147  *             sure which should go first, but I bet it won't make much
148  *             difference if we are running VLANs.  The good news is that
149  *             this protocol won't be in the list unless compiled in, so
150  *             the average user (w/out VLANs) will not be adversely affected.
151  *             --BLG
152  *
153  *		0800	IP
154  *		8100    802.1Q VLAN
155  *		0001	802.3
156  *		0002	AX.25
157  *		0004	802.2
158  *		8035	RARP
159  *		0005	SNAP
160  *		0805	X.25
161  *		0806	ARP
162  *		8137	IPX
163  *		0009	Localtalk
164  *		86DD	IPv6
165  */
166 
167 #define PTYPE_HASH_SIZE	(16)
168 #define PTYPE_HASH_MASK	(PTYPE_HASH_SIZE - 1)
169 
170 static DEFINE_SPINLOCK(ptype_lock);
171 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
172 static struct list_head ptype_all __read_mostly;	/* Taps */
173 
174 /*
175  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
176  * semaphore.
177  *
178  * Pure readers hold dev_base_lock for reading.
179  *
180  * Writers must hold the rtnl semaphore while they loop through the
181  * dev_base_head list, and hold dev_base_lock for writing when they do the
182  * actual updates.  This allows pure readers to access the list even
183  * while a writer is preparing to update it.
184  *
185  * To put it another way, dev_base_lock is held for writing only to
186  * protect against pure readers; the rtnl semaphore provides the
187  * protection against other writers.
188  *
189  * See, for example usages, register_netdevice() and
190  * unregister_netdevice(), which must be called with the rtnl
191  * semaphore held.
192  */
193 DEFINE_RWLOCK(dev_base_lock);
194 
195 EXPORT_SYMBOL(dev_base_lock);
196 
197 #define NETDEV_HASHBITS	8
198 #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
199 
200 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
201 {
202 	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
203 	return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
204 }
205 
206 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
207 {
208 	return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
209 }
210 
211 /* Device list insertion */
212 static int list_netdevice(struct net_device *dev)
213 {
214 	struct net *net = dev_net(dev);
215 
216 	ASSERT_RTNL();
217 
218 	write_lock_bh(&dev_base_lock);
219 	list_add_tail(&dev->dev_list, &net->dev_base_head);
220 	hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
221 	hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
222 	write_unlock_bh(&dev_base_lock);
223 	return 0;
224 }
225 
226 /* Device list removal */
227 static void unlist_netdevice(struct net_device *dev)
228 {
229 	ASSERT_RTNL();
230 
231 	/* Unlink dev from the device chain */
232 	write_lock_bh(&dev_base_lock);
233 	list_del(&dev->dev_list);
234 	hlist_del(&dev->name_hlist);
235 	hlist_del(&dev->index_hlist);
236 	write_unlock_bh(&dev_base_lock);
237 }
238 
239 /*
240  *	Our notifier list
241  */
242 
243 static RAW_NOTIFIER_HEAD(netdev_chain);
244 
245 /*
246  *	Device drivers call our routines to queue packets here. We empty the
247  *	queue in the local softnet handler.
248  */
249 
250 DEFINE_PER_CPU(struct softnet_data, softnet_data);
251 
252 #ifdef CONFIG_LOCKDEP
253 /*
254  * register_netdevice() inits txq->_xmit_lock and sets lockdep class
255  * according to dev->type
256  */
257 static const unsigned short netdev_lock_type[] =
258 	{ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
259 	 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
260 	 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
261 	 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
262 	 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
263 	 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
264 	 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
265 	 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
266 	 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
267 	 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
268 	 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
269 	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
270 	 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
271 	 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
272 	 ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154, ARPHRD_IEEE802154_PHY,
273 	 ARPHRD_VOID, ARPHRD_NONE};
274 
275 static const char *netdev_lock_name[] =
276 	{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
277 	 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
278 	 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
279 	 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
280 	 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
281 	 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
282 	 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
283 	 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
284 	 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
285 	 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
286 	 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
287 	 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
288 	 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
289 	 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
290 	 "_xmit_PHONET_PIPE", "_xmit_IEEE802154", "_xmit_IEEE802154_PHY",
291 	 "_xmit_VOID", "_xmit_NONE"};
292 
293 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
294 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
295 
296 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
297 {
298 	int i;
299 
300 	for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
301 		if (netdev_lock_type[i] == dev_type)
302 			return i;
303 	/* the last key is used by default */
304 	return ARRAY_SIZE(netdev_lock_type) - 1;
305 }
306 
307 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
308 						 unsigned short dev_type)
309 {
310 	int i;
311 
312 	i = netdev_lock_pos(dev_type);
313 	lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
314 				   netdev_lock_name[i]);
315 }
316 
317 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
318 {
319 	int i;
320 
321 	i = netdev_lock_pos(dev->type);
322 	lockdep_set_class_and_name(&dev->addr_list_lock,
323 				   &netdev_addr_lock_key[i],
324 				   netdev_lock_name[i]);
325 }
326 #else
327 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
328 						 unsigned short dev_type)
329 {
330 }
331 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
332 {
333 }
334 #endif
335 
336 /*******************************************************************************
337 
338 		Protocol management and registration routines
339 
340 *******************************************************************************/
341 
342 /*
343  *	Add a protocol ID to the list. Now that the input handler is
344  *	smarter we can dispense with all the messy stuff that used to be
345  *	here.
346  *
347  *	BEWARE!!! Protocol handlers, mangling input packets,
348  *	MUST BE last in hash buckets and checking protocol handlers
349  *	MUST start from promiscuous ptype_all chain in net_bh.
350  *	It is true now, do not change it.
351  *	Explanation follows: if protocol handler, mangling packet, will
352  *	be the first on list, it is not able to sense, that packet
353  *	is cloned and should be copied-on-write, so that it will
354  *	change it and subsequent readers will get broken packet.
355  *							--ANK (980803)
356  */
357 
358 /**
359  *	dev_add_pack - add packet handler
360  *	@pt: packet type declaration
361  *
362  *	Add a protocol handler to the networking stack. The passed &packet_type
363  *	is linked into kernel lists and may not be freed until it has been
364  *	removed from the kernel lists.
365  *
366  *	This call does not sleep therefore it can not
367  *	guarantee all CPU's that are in middle of receiving packets
368  *	will see the new packet type (until the next received packet).
369  */
370 
371 void dev_add_pack(struct packet_type *pt)
372 {
373 	int hash;
374 
375 	spin_lock_bh(&ptype_lock);
376 	if (pt->type == htons(ETH_P_ALL))
377 		list_add_rcu(&pt->list, &ptype_all);
378 	else {
379 		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
380 		list_add_rcu(&pt->list, &ptype_base[hash]);
381 	}
382 	spin_unlock_bh(&ptype_lock);
383 }
384 
385 /**
386  *	__dev_remove_pack	 - remove packet handler
387  *	@pt: packet type declaration
388  *
389  *	Remove a protocol handler that was previously added to the kernel
390  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
391  *	from the kernel lists and can be freed or reused once this function
392  *	returns.
393  *
394  *      The packet type might still be in use by receivers
395  *	and must not be freed until after all the CPU's have gone
396  *	through a quiescent state.
397  */
398 void __dev_remove_pack(struct packet_type *pt)
399 {
400 	struct list_head *head;
401 	struct packet_type *pt1;
402 
403 	spin_lock_bh(&ptype_lock);
404 
405 	if (pt->type == htons(ETH_P_ALL))
406 		head = &ptype_all;
407 	else
408 		head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
409 
410 	list_for_each_entry(pt1, head, list) {
411 		if (pt == pt1) {
412 			list_del_rcu(&pt->list);
413 			goto out;
414 		}
415 	}
416 
417 	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
418 out:
419 	spin_unlock_bh(&ptype_lock);
420 }
421 /**
422  *	dev_remove_pack	 - remove packet handler
423  *	@pt: packet type declaration
424  *
425  *	Remove a protocol handler that was previously added to the kernel
426  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
427  *	from the kernel lists and can be freed or reused once this function
428  *	returns.
429  *
430  *	This call sleeps to guarantee that no CPU is looking at the packet
431  *	type after return.
432  */
433 void dev_remove_pack(struct packet_type *pt)
434 {
435 	__dev_remove_pack(pt);
436 
437 	synchronize_net();
438 }
439 
440 /******************************************************************************
441 
442 		      Device Boot-time Settings Routines
443 
444 *******************************************************************************/
445 
446 /* Boot time configuration table */
447 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
448 
449 /**
450  *	netdev_boot_setup_add	- add new setup entry
451  *	@name: name of the device
452  *	@map: configured settings for the device
453  *
454  *	Adds new setup entry to the dev_boot_setup list.  The function
455  *	returns 0 on error and 1 on success.  This is a generic routine to
456  *	all netdevices.
457  */
458 static int netdev_boot_setup_add(char *name, struct ifmap *map)
459 {
460 	struct netdev_boot_setup *s;
461 	int i;
462 
463 	s = dev_boot_setup;
464 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
465 		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
466 			memset(s[i].name, 0, sizeof(s[i].name));
467 			strlcpy(s[i].name, name, IFNAMSIZ);
468 			memcpy(&s[i].map, map, sizeof(s[i].map));
469 			break;
470 		}
471 	}
472 
473 	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
474 }
475 
476 /**
477  *	netdev_boot_setup_check	- check boot time settings
478  *	@dev: the netdevice
479  *
480  * 	Check boot time settings for the device.
481  *	The found settings are set for the device to be used
482  *	later in the device probing.
483  *	Returns 0 if no settings found, 1 if they are.
484  */
485 int netdev_boot_setup_check(struct net_device *dev)
486 {
487 	struct netdev_boot_setup *s = dev_boot_setup;
488 	int i;
489 
490 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
491 		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
492 		    !strcmp(dev->name, s[i].name)) {
493 			dev->irq 	= s[i].map.irq;
494 			dev->base_addr 	= s[i].map.base_addr;
495 			dev->mem_start 	= s[i].map.mem_start;
496 			dev->mem_end 	= s[i].map.mem_end;
497 			return 1;
498 		}
499 	}
500 	return 0;
501 }
502 
503 
504 /**
505  *	netdev_boot_base	- get address from boot time settings
506  *	@prefix: prefix for network device
507  *	@unit: id for network device
508  *
509  * 	Check boot time settings for the base address of device.
510  *	The found settings are set for the device to be used
511  *	later in the device probing.
512  *	Returns 0 if no settings found.
513  */
514 unsigned long netdev_boot_base(const char *prefix, int unit)
515 {
516 	const struct netdev_boot_setup *s = dev_boot_setup;
517 	char name[IFNAMSIZ];
518 	int i;
519 
520 	sprintf(name, "%s%d", prefix, unit);
521 
522 	/*
523 	 * If device already registered then return base of 1
524 	 * to indicate not to probe for this interface
525 	 */
526 	if (__dev_get_by_name(&init_net, name))
527 		return 1;
528 
529 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
530 		if (!strcmp(name, s[i].name))
531 			return s[i].map.base_addr;
532 	return 0;
533 }
534 
535 /*
536  * Saves at boot time configured settings for any netdevice.
537  */
538 int __init netdev_boot_setup(char *str)
539 {
540 	int ints[5];
541 	struct ifmap map;
542 
543 	str = get_options(str, ARRAY_SIZE(ints), ints);
544 	if (!str || !*str)
545 		return 0;
546 
547 	/* Save settings */
548 	memset(&map, 0, sizeof(map));
549 	if (ints[0] > 0)
550 		map.irq = ints[1];
551 	if (ints[0] > 1)
552 		map.base_addr = ints[2];
553 	if (ints[0] > 2)
554 		map.mem_start = ints[3];
555 	if (ints[0] > 3)
556 		map.mem_end = ints[4];
557 
558 	/* Add new entry to the list */
559 	return netdev_boot_setup_add(str, &map);
560 }
561 
562 __setup("netdev=", netdev_boot_setup);
563 
564 /*******************************************************************************
565 
566 			    Device Interface Subroutines
567 
568 *******************************************************************************/
569 
570 /**
571  *	__dev_get_by_name	- find a device by its name
572  *	@net: the applicable net namespace
573  *	@name: name to find
574  *
575  *	Find an interface by name. Must be called under RTNL semaphore
576  *	or @dev_base_lock. If the name is found a pointer to the device
577  *	is returned. If the name is not found then %NULL is returned. The
578  *	reference counters are not incremented so the caller must be
579  *	careful with locks.
580  */
581 
582 struct net_device *__dev_get_by_name(struct net *net, const char *name)
583 {
584 	struct hlist_node *p;
585 
586 	hlist_for_each(p, dev_name_hash(net, name)) {
587 		struct net_device *dev
588 			= hlist_entry(p, struct net_device, name_hlist);
589 		if (!strncmp(dev->name, name, IFNAMSIZ))
590 			return dev;
591 	}
592 	return NULL;
593 }
594 
595 /**
596  *	dev_get_by_name		- find a device by its name
597  *	@net: the applicable net namespace
598  *	@name: name to find
599  *
600  *	Find an interface by name. This can be called from any
601  *	context and does its own locking. The returned handle has
602  *	the usage count incremented and the caller must use dev_put() to
603  *	release it when it is no longer needed. %NULL is returned if no
604  *	matching device is found.
605  */
606 
607 struct net_device *dev_get_by_name(struct net *net, const char *name)
608 {
609 	struct net_device *dev;
610 
611 	read_lock(&dev_base_lock);
612 	dev = __dev_get_by_name(net, name);
613 	if (dev)
614 		dev_hold(dev);
615 	read_unlock(&dev_base_lock);
616 	return dev;
617 }
618 
619 /**
620  *	__dev_get_by_index - find a device by its ifindex
621  *	@net: the applicable net namespace
622  *	@ifindex: index of device
623  *
624  *	Search for an interface by index. Returns %NULL if the device
625  *	is not found or a pointer to the device. The device has not
626  *	had its reference counter increased so the caller must be careful
627  *	about locking. The caller must hold either the RTNL semaphore
628  *	or @dev_base_lock.
629  */
630 
631 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
632 {
633 	struct hlist_node *p;
634 
635 	hlist_for_each(p, dev_index_hash(net, ifindex)) {
636 		struct net_device *dev
637 			= hlist_entry(p, struct net_device, index_hlist);
638 		if (dev->ifindex == ifindex)
639 			return dev;
640 	}
641 	return NULL;
642 }
643 
644 
645 /**
646  *	dev_get_by_index - find a device by its ifindex
647  *	@net: the applicable net namespace
648  *	@ifindex: index of device
649  *
650  *	Search for an interface by index. Returns NULL if the device
651  *	is not found or a pointer to the device. The device returned has
652  *	had a reference added and the pointer is safe until the user calls
653  *	dev_put to indicate they have finished with it.
654  */
655 
656 struct net_device *dev_get_by_index(struct net *net, int ifindex)
657 {
658 	struct net_device *dev;
659 
660 	read_lock(&dev_base_lock);
661 	dev = __dev_get_by_index(net, ifindex);
662 	if (dev)
663 		dev_hold(dev);
664 	read_unlock(&dev_base_lock);
665 	return dev;
666 }
667 
668 /**
669  *	dev_getbyhwaddr - find a device by its hardware address
670  *	@net: the applicable net namespace
671  *	@type: media type of device
672  *	@ha: hardware address
673  *
674  *	Search for an interface by MAC address. Returns NULL if the device
675  *	is not found or a pointer to the device. The caller must hold the
676  *	rtnl semaphore. The returned device has not had its ref count increased
677  *	and the caller must therefore be careful about locking
678  *
679  *	BUGS:
680  *	If the API was consistent this would be __dev_get_by_hwaddr
681  */
682 
683 struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
684 {
685 	struct net_device *dev;
686 
687 	ASSERT_RTNL();
688 
689 	for_each_netdev(net, dev)
690 		if (dev->type == type &&
691 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
692 			return dev;
693 
694 	return NULL;
695 }
696 
697 EXPORT_SYMBOL(dev_getbyhwaddr);
698 
699 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
700 {
701 	struct net_device *dev;
702 
703 	ASSERT_RTNL();
704 	for_each_netdev(net, dev)
705 		if (dev->type == type)
706 			return dev;
707 
708 	return NULL;
709 }
710 
711 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
712 
713 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
714 {
715 	struct net_device *dev;
716 
717 	rtnl_lock();
718 	dev = __dev_getfirstbyhwtype(net, type);
719 	if (dev)
720 		dev_hold(dev);
721 	rtnl_unlock();
722 	return dev;
723 }
724 
725 EXPORT_SYMBOL(dev_getfirstbyhwtype);
726 
727 /**
728  *	dev_get_by_flags - find any device with given flags
729  *	@net: the applicable net namespace
730  *	@if_flags: IFF_* values
731  *	@mask: bitmask of bits in if_flags to check
732  *
733  *	Search for any interface with the given flags. Returns NULL if a device
734  *	is not found or a pointer to the device. The device returned has
735  *	had a reference added and the pointer is safe until the user calls
736  *	dev_put to indicate they have finished with it.
737  */
738 
739 struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
740 {
741 	struct net_device *dev, *ret;
742 
743 	ret = NULL;
744 	read_lock(&dev_base_lock);
745 	for_each_netdev(net, dev) {
746 		if (((dev->flags ^ if_flags) & mask) == 0) {
747 			dev_hold(dev);
748 			ret = dev;
749 			break;
750 		}
751 	}
752 	read_unlock(&dev_base_lock);
753 	return ret;
754 }
755 
756 /**
757  *	dev_valid_name - check if name is okay for network device
758  *	@name: name string
759  *
760  *	Network device names need to be valid file names to
761  *	to allow sysfs to work.  We also disallow any kind of
762  *	whitespace.
763  */
764 int dev_valid_name(const char *name)
765 {
766 	if (*name == '\0')
767 		return 0;
768 	if (strlen(name) >= IFNAMSIZ)
769 		return 0;
770 	if (!strcmp(name, ".") || !strcmp(name, ".."))
771 		return 0;
772 
773 	while (*name) {
774 		if (*name == '/' || isspace(*name))
775 			return 0;
776 		name++;
777 	}
778 	return 1;
779 }
780 
781 /**
782  *	__dev_alloc_name - allocate a name for a device
783  *	@net: network namespace to allocate the device name in
784  *	@name: name format string
785  *	@buf:  scratch buffer and result name string
786  *
787  *	Passed a format string - eg "lt%d" it will try and find a suitable
788  *	id. It scans list of devices to build up a free map, then chooses
789  *	the first empty slot. The caller must hold the dev_base or rtnl lock
790  *	while allocating the name and adding the device in order to avoid
791  *	duplicates.
792  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
793  *	Returns the number of the unit assigned or a negative errno code.
794  */
795 
796 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
797 {
798 	int i = 0;
799 	const char *p;
800 	const int max_netdevices = 8*PAGE_SIZE;
801 	unsigned long *inuse;
802 	struct net_device *d;
803 
804 	p = strnchr(name, IFNAMSIZ-1, '%');
805 	if (p) {
806 		/*
807 		 * Verify the string as this thing may have come from
808 		 * the user.  There must be either one "%d" and no other "%"
809 		 * characters.
810 		 */
811 		if (p[1] != 'd' || strchr(p + 2, '%'))
812 			return -EINVAL;
813 
814 		/* Use one page as a bit array of possible slots */
815 		inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
816 		if (!inuse)
817 			return -ENOMEM;
818 
819 		for_each_netdev(net, d) {
820 			if (!sscanf(d->name, name, &i))
821 				continue;
822 			if (i < 0 || i >= max_netdevices)
823 				continue;
824 
825 			/*  avoid cases where sscanf is not exact inverse of printf */
826 			snprintf(buf, IFNAMSIZ, name, i);
827 			if (!strncmp(buf, d->name, IFNAMSIZ))
828 				set_bit(i, inuse);
829 		}
830 
831 		i = find_first_zero_bit(inuse, max_netdevices);
832 		free_page((unsigned long) inuse);
833 	}
834 
835 	snprintf(buf, IFNAMSIZ, name, i);
836 	if (!__dev_get_by_name(net, buf))
837 		return i;
838 
839 	/* It is possible to run out of possible slots
840 	 * when the name is long and there isn't enough space left
841 	 * for the digits, or if all bits are used.
842 	 */
843 	return -ENFILE;
844 }
845 
846 /**
847  *	dev_alloc_name - allocate a name for a device
848  *	@dev: device
849  *	@name: name format string
850  *
851  *	Passed a format string - eg "lt%d" it will try and find a suitable
852  *	id. It scans list of devices to build up a free map, then chooses
853  *	the first empty slot. The caller must hold the dev_base or rtnl lock
854  *	while allocating the name and adding the device in order to avoid
855  *	duplicates.
856  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
857  *	Returns the number of the unit assigned or a negative errno code.
858  */
859 
860 int dev_alloc_name(struct net_device *dev, const char *name)
861 {
862 	char buf[IFNAMSIZ];
863 	struct net *net;
864 	int ret;
865 
866 	BUG_ON(!dev_net(dev));
867 	net = dev_net(dev);
868 	ret = __dev_alloc_name(net, name, buf);
869 	if (ret >= 0)
870 		strlcpy(dev->name, buf, IFNAMSIZ);
871 	return ret;
872 }
873 
874 
875 /**
876  *	dev_change_name - change name of a device
877  *	@dev: device
878  *	@newname: name (or format string) must be at least IFNAMSIZ
879  *
880  *	Change name of a device, can pass format strings "eth%d".
881  *	for wildcarding.
882  */
883 int dev_change_name(struct net_device *dev, const char *newname)
884 {
885 	char oldname[IFNAMSIZ];
886 	int err = 0;
887 	int ret;
888 	struct net *net;
889 
890 	ASSERT_RTNL();
891 	BUG_ON(!dev_net(dev));
892 
893 	net = dev_net(dev);
894 	if (dev->flags & IFF_UP)
895 		return -EBUSY;
896 
897 	if (!dev_valid_name(newname))
898 		return -EINVAL;
899 
900 	if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
901 		return 0;
902 
903 	memcpy(oldname, dev->name, IFNAMSIZ);
904 
905 	if (strchr(newname, '%')) {
906 		err = dev_alloc_name(dev, newname);
907 		if (err < 0)
908 			return err;
909 	}
910 	else if (__dev_get_by_name(net, newname))
911 		return -EEXIST;
912 	else
913 		strlcpy(dev->name, newname, IFNAMSIZ);
914 
915 rollback:
916 	/* For now only devices in the initial network namespace
917 	 * are in sysfs.
918 	 */
919 	if (net == &init_net) {
920 		ret = device_rename(&dev->dev, dev->name);
921 		if (ret) {
922 			memcpy(dev->name, oldname, IFNAMSIZ);
923 			return ret;
924 		}
925 	}
926 
927 	write_lock_bh(&dev_base_lock);
928 	hlist_del(&dev->name_hlist);
929 	hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
930 	write_unlock_bh(&dev_base_lock);
931 
932 	ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
933 	ret = notifier_to_errno(ret);
934 
935 	if (ret) {
936 		if (err) {
937 			printk(KERN_ERR
938 			       "%s: name change rollback failed: %d.\n",
939 			       dev->name, ret);
940 		} else {
941 			err = ret;
942 			memcpy(dev->name, oldname, IFNAMSIZ);
943 			goto rollback;
944 		}
945 	}
946 
947 	return err;
948 }
949 
950 /**
951  *	dev_set_alias - change ifalias of a device
952  *	@dev: device
953  *	@alias: name up to IFALIASZ
954  *	@len: limit of bytes to copy from info
955  *
956  *	Set ifalias for a device,
957  */
958 int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
959 {
960 	ASSERT_RTNL();
961 
962 	if (len >= IFALIASZ)
963 		return -EINVAL;
964 
965 	if (!len) {
966 		if (dev->ifalias) {
967 			kfree(dev->ifalias);
968 			dev->ifalias = NULL;
969 		}
970 		return 0;
971 	}
972 
973 	dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL);
974 	if (!dev->ifalias)
975 		return -ENOMEM;
976 
977 	strlcpy(dev->ifalias, alias, len+1);
978 	return len;
979 }
980 
981 
982 /**
983  *	netdev_features_change - device changes features
984  *	@dev: device to cause notification
985  *
986  *	Called to indicate a device has changed features.
987  */
988 void netdev_features_change(struct net_device *dev)
989 {
990 	call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
991 }
992 EXPORT_SYMBOL(netdev_features_change);
993 
994 /**
995  *	netdev_state_change - device changes state
996  *	@dev: device to cause notification
997  *
998  *	Called to indicate a device has changed state. This function calls
999  *	the notifier chains for netdev_chain and sends a NEWLINK message
1000  *	to the routing socket.
1001  */
1002 void netdev_state_change(struct net_device *dev)
1003 {
1004 	if (dev->flags & IFF_UP) {
1005 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
1006 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1007 	}
1008 }
1009 
1010 void netdev_bonding_change(struct net_device *dev)
1011 {
1012 	call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
1013 }
1014 EXPORT_SYMBOL(netdev_bonding_change);
1015 
1016 /**
1017  *	dev_load 	- load a network module
1018  *	@net: the applicable net namespace
1019  *	@name: name of interface
1020  *
1021  *	If a network interface is not present and the process has suitable
1022  *	privileges this function loads the module. If module loading is not
1023  *	available in this kernel then it becomes a nop.
1024  */
1025 
1026 void dev_load(struct net *net, const char *name)
1027 {
1028 	struct net_device *dev;
1029 
1030 	read_lock(&dev_base_lock);
1031 	dev = __dev_get_by_name(net, name);
1032 	read_unlock(&dev_base_lock);
1033 
1034 	if (!dev && capable(CAP_SYS_MODULE))
1035 		request_module("%s", name);
1036 }
1037 
1038 /**
1039  *	dev_open	- prepare an interface for use.
1040  *	@dev:	device to open
1041  *
1042  *	Takes a device from down to up state. The device's private open
1043  *	function is invoked and then the multicast lists are loaded. Finally
1044  *	the device is moved into the up state and a %NETDEV_UP message is
1045  *	sent to the netdev notifier chain.
1046  *
1047  *	Calling this function on an active interface is a nop. On a failure
1048  *	a negative errno code is returned.
1049  */
1050 int dev_open(struct net_device *dev)
1051 {
1052 	const struct net_device_ops *ops = dev->netdev_ops;
1053 	int ret;
1054 
1055 	ASSERT_RTNL();
1056 
1057 	/*
1058 	 *	Is it already up?
1059 	 */
1060 
1061 	if (dev->flags & IFF_UP)
1062 		return 0;
1063 
1064 	/*
1065 	 *	Is it even present?
1066 	 */
1067 	if (!netif_device_present(dev))
1068 		return -ENODEV;
1069 
1070 	ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1071 	ret = notifier_to_errno(ret);
1072 	if (ret)
1073 		return ret;
1074 
1075 	/*
1076 	 *	Call device private open method
1077 	 */
1078 	set_bit(__LINK_STATE_START, &dev->state);
1079 
1080 	if (ops->ndo_validate_addr)
1081 		ret = ops->ndo_validate_addr(dev);
1082 
1083 	if (!ret && ops->ndo_open)
1084 		ret = ops->ndo_open(dev);
1085 
1086 	/*
1087 	 *	If it went open OK then:
1088 	 */
1089 
1090 	if (ret)
1091 		clear_bit(__LINK_STATE_START, &dev->state);
1092 	else {
1093 		/*
1094 		 *	Set the flags.
1095 		 */
1096 		dev->flags |= IFF_UP;
1097 
1098 		/*
1099 		 *	Enable NET_DMA
1100 		 */
1101 		net_dmaengine_get();
1102 
1103 		/*
1104 		 *	Initialize multicasting status
1105 		 */
1106 		dev_set_rx_mode(dev);
1107 
1108 		/*
1109 		 *	Wakeup transmit queue engine
1110 		 */
1111 		dev_activate(dev);
1112 
1113 		/*
1114 		 *	... and announce new interface.
1115 		 */
1116 		call_netdevice_notifiers(NETDEV_UP, dev);
1117 	}
1118 
1119 	return ret;
1120 }
1121 
1122 /**
1123  *	dev_close - shutdown an interface.
1124  *	@dev: device to shutdown
1125  *
1126  *	This function moves an active device into down state. A
1127  *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1128  *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1129  *	chain.
1130  */
1131 int dev_close(struct net_device *dev)
1132 {
1133 	const struct net_device_ops *ops = dev->netdev_ops;
1134 	ASSERT_RTNL();
1135 
1136 	might_sleep();
1137 
1138 	if (!(dev->flags & IFF_UP))
1139 		return 0;
1140 
1141 	/*
1142 	 *	Tell people we are going down, so that they can
1143 	 *	prepare to death, when device is still operating.
1144 	 */
1145 	call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1146 
1147 	clear_bit(__LINK_STATE_START, &dev->state);
1148 
1149 	/* Synchronize to scheduled poll. We cannot touch poll list,
1150 	 * it can be even on different cpu. So just clear netif_running().
1151 	 *
1152 	 * dev->stop() will invoke napi_disable() on all of it's
1153 	 * napi_struct instances on this device.
1154 	 */
1155 	smp_mb__after_clear_bit(); /* Commit netif_running(). */
1156 
1157 	dev_deactivate(dev);
1158 
1159 	/*
1160 	 *	Call the device specific close. This cannot fail.
1161 	 *	Only if device is UP
1162 	 *
1163 	 *	We allow it to be called even after a DETACH hot-plug
1164 	 *	event.
1165 	 */
1166 	if (ops->ndo_stop)
1167 		ops->ndo_stop(dev);
1168 
1169 	/*
1170 	 *	Device is now down.
1171 	 */
1172 
1173 	dev->flags &= ~IFF_UP;
1174 
1175 	/*
1176 	 * Tell people we are down
1177 	 */
1178 	call_netdevice_notifiers(NETDEV_DOWN, dev);
1179 
1180 	/*
1181 	 *	Shutdown NET_DMA
1182 	 */
1183 	net_dmaengine_put();
1184 
1185 	return 0;
1186 }
1187 
1188 
1189 /**
1190  *	dev_disable_lro - disable Large Receive Offload on a device
1191  *	@dev: device
1192  *
1193  *	Disable Large Receive Offload (LRO) on a net device.  Must be
1194  *	called under RTNL.  This is needed if received packets may be
1195  *	forwarded to another interface.
1196  */
1197 void dev_disable_lro(struct net_device *dev)
1198 {
1199 	if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1200 	    dev->ethtool_ops->set_flags) {
1201 		u32 flags = dev->ethtool_ops->get_flags(dev);
1202 		if (flags & ETH_FLAG_LRO) {
1203 			flags &= ~ETH_FLAG_LRO;
1204 			dev->ethtool_ops->set_flags(dev, flags);
1205 		}
1206 	}
1207 	WARN_ON(dev->features & NETIF_F_LRO);
1208 }
1209 EXPORT_SYMBOL(dev_disable_lro);
1210 
1211 
1212 static int dev_boot_phase = 1;
1213 
1214 /*
1215  *	Device change register/unregister. These are not inline or static
1216  *	as we export them to the world.
1217  */
1218 
1219 /**
1220  *	register_netdevice_notifier - register a network notifier block
1221  *	@nb: notifier
1222  *
1223  *	Register a notifier to be called when network device events occur.
1224  *	The notifier passed is linked into the kernel structures and must
1225  *	not be reused until it has been unregistered. A negative errno code
1226  *	is returned on a failure.
1227  *
1228  * 	When registered all registration and up events are replayed
1229  *	to the new notifier to allow device to have a race free
1230  *	view of the network device list.
1231  */
1232 
1233 int register_netdevice_notifier(struct notifier_block *nb)
1234 {
1235 	struct net_device *dev;
1236 	struct net_device *last;
1237 	struct net *net;
1238 	int err;
1239 
1240 	rtnl_lock();
1241 	err = raw_notifier_chain_register(&netdev_chain, nb);
1242 	if (err)
1243 		goto unlock;
1244 	if (dev_boot_phase)
1245 		goto unlock;
1246 	for_each_net(net) {
1247 		for_each_netdev(net, dev) {
1248 			err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1249 			err = notifier_to_errno(err);
1250 			if (err)
1251 				goto rollback;
1252 
1253 			if (!(dev->flags & IFF_UP))
1254 				continue;
1255 
1256 			nb->notifier_call(nb, NETDEV_UP, dev);
1257 		}
1258 	}
1259 
1260 unlock:
1261 	rtnl_unlock();
1262 	return err;
1263 
1264 rollback:
1265 	last = dev;
1266 	for_each_net(net) {
1267 		for_each_netdev(net, dev) {
1268 			if (dev == last)
1269 				break;
1270 
1271 			if (dev->flags & IFF_UP) {
1272 				nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1273 				nb->notifier_call(nb, NETDEV_DOWN, dev);
1274 			}
1275 			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1276 		}
1277 	}
1278 
1279 	raw_notifier_chain_unregister(&netdev_chain, nb);
1280 	goto unlock;
1281 }
1282 
1283 /**
1284  *	unregister_netdevice_notifier - unregister a network notifier block
1285  *	@nb: notifier
1286  *
1287  *	Unregister a notifier previously registered by
1288  *	register_netdevice_notifier(). The notifier is unlinked into the
1289  *	kernel structures and may then be reused. A negative errno code
1290  *	is returned on a failure.
1291  */
1292 
1293 int unregister_netdevice_notifier(struct notifier_block *nb)
1294 {
1295 	int err;
1296 
1297 	rtnl_lock();
1298 	err = raw_notifier_chain_unregister(&netdev_chain, nb);
1299 	rtnl_unlock();
1300 	return err;
1301 }
1302 
1303 /**
1304  *	call_netdevice_notifiers - call all network notifier blocks
1305  *      @val: value passed unmodified to notifier function
1306  *      @dev: net_device pointer passed unmodified to notifier function
1307  *
1308  *	Call all network notifier blocks.  Parameters and return value
1309  *	are as for raw_notifier_call_chain().
1310  */
1311 
1312 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1313 {
1314 	return raw_notifier_call_chain(&netdev_chain, val, dev);
1315 }
1316 
1317 /* When > 0 there are consumers of rx skb time stamps */
1318 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1319 
1320 void net_enable_timestamp(void)
1321 {
1322 	atomic_inc(&netstamp_needed);
1323 }
1324 
1325 void net_disable_timestamp(void)
1326 {
1327 	atomic_dec(&netstamp_needed);
1328 }
1329 
1330 static inline void net_timestamp(struct sk_buff *skb)
1331 {
1332 	if (atomic_read(&netstamp_needed))
1333 		__net_timestamp(skb);
1334 	else
1335 		skb->tstamp.tv64 = 0;
1336 }
1337 
1338 /*
1339  *	Support routine. Sends outgoing frames to any network
1340  *	taps currently in use.
1341  */
1342 
1343 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1344 {
1345 	struct packet_type *ptype;
1346 
1347 #ifdef CONFIG_NET_CLS_ACT
1348 	if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
1349 		net_timestamp(skb);
1350 #else
1351 	net_timestamp(skb);
1352 #endif
1353 
1354 	rcu_read_lock();
1355 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1356 		/* Never send packets back to the socket
1357 		 * they originated from - MvS (miquels@drinkel.ow.org)
1358 		 */
1359 		if ((ptype->dev == dev || !ptype->dev) &&
1360 		    (ptype->af_packet_priv == NULL ||
1361 		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
1362 			struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1363 			if (!skb2)
1364 				break;
1365 
1366 			/* skb->nh should be correctly
1367 			   set by sender, so that the second statement is
1368 			   just protection against buggy protocols.
1369 			 */
1370 			skb_reset_mac_header(skb2);
1371 
1372 			if (skb_network_header(skb2) < skb2->data ||
1373 			    skb2->network_header > skb2->tail) {
1374 				if (net_ratelimit())
1375 					printk(KERN_CRIT "protocol %04x is "
1376 					       "buggy, dev %s\n",
1377 					       skb2->protocol, dev->name);
1378 				skb_reset_network_header(skb2);
1379 			}
1380 
1381 			skb2->transport_header = skb2->network_header;
1382 			skb2->pkt_type = PACKET_OUTGOING;
1383 			ptype->func(skb2, skb->dev, ptype, skb->dev);
1384 		}
1385 	}
1386 	rcu_read_unlock();
1387 }
1388 
1389 
1390 static inline void __netif_reschedule(struct Qdisc *q)
1391 {
1392 	struct softnet_data *sd;
1393 	unsigned long flags;
1394 
1395 	local_irq_save(flags);
1396 	sd = &__get_cpu_var(softnet_data);
1397 	q->next_sched = sd->output_queue;
1398 	sd->output_queue = q;
1399 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
1400 	local_irq_restore(flags);
1401 }
1402 
1403 void __netif_schedule(struct Qdisc *q)
1404 {
1405 	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1406 		__netif_reschedule(q);
1407 }
1408 EXPORT_SYMBOL(__netif_schedule);
1409 
1410 void dev_kfree_skb_irq(struct sk_buff *skb)
1411 {
1412 	if (atomic_dec_and_test(&skb->users)) {
1413 		struct softnet_data *sd;
1414 		unsigned long flags;
1415 
1416 		local_irq_save(flags);
1417 		sd = &__get_cpu_var(softnet_data);
1418 		skb->next = sd->completion_queue;
1419 		sd->completion_queue = skb;
1420 		raise_softirq_irqoff(NET_TX_SOFTIRQ);
1421 		local_irq_restore(flags);
1422 	}
1423 }
1424 EXPORT_SYMBOL(dev_kfree_skb_irq);
1425 
1426 void dev_kfree_skb_any(struct sk_buff *skb)
1427 {
1428 	if (in_irq() || irqs_disabled())
1429 		dev_kfree_skb_irq(skb);
1430 	else
1431 		dev_kfree_skb(skb);
1432 }
1433 EXPORT_SYMBOL(dev_kfree_skb_any);
1434 
1435 
1436 /**
1437  * netif_device_detach - mark device as removed
1438  * @dev: network device
1439  *
1440  * Mark device as removed from system and therefore no longer available.
1441  */
1442 void netif_device_detach(struct net_device *dev)
1443 {
1444 	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1445 	    netif_running(dev)) {
1446 		netif_tx_stop_all_queues(dev);
1447 	}
1448 }
1449 EXPORT_SYMBOL(netif_device_detach);
1450 
1451 /**
1452  * netif_device_attach - mark device as attached
1453  * @dev: network device
1454  *
1455  * Mark device as attached from system and restart if needed.
1456  */
1457 void netif_device_attach(struct net_device *dev)
1458 {
1459 	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1460 	    netif_running(dev)) {
1461 		netif_tx_wake_all_queues(dev);
1462 		__netdev_watchdog_up(dev);
1463 	}
1464 }
1465 EXPORT_SYMBOL(netif_device_attach);
1466 
1467 static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1468 {
1469 	return ((features & NETIF_F_GEN_CSUM) ||
1470 		((features & NETIF_F_IP_CSUM) &&
1471 		 protocol == htons(ETH_P_IP)) ||
1472 		((features & NETIF_F_IPV6_CSUM) &&
1473 		 protocol == htons(ETH_P_IPV6)) ||
1474 		((features & NETIF_F_FCOE_CRC) &&
1475 		 protocol == htons(ETH_P_FCOE)));
1476 }
1477 
1478 static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1479 {
1480 	if (can_checksum_protocol(dev->features, skb->protocol))
1481 		return true;
1482 
1483 	if (skb->protocol == htons(ETH_P_8021Q)) {
1484 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1485 		if (can_checksum_protocol(dev->features & dev->vlan_features,
1486 					  veh->h_vlan_encapsulated_proto))
1487 			return true;
1488 	}
1489 
1490 	return false;
1491 }
1492 
1493 /*
1494  * Invalidate hardware checksum when packet is to be mangled, and
1495  * complete checksum manually on outgoing path.
1496  */
1497 int skb_checksum_help(struct sk_buff *skb)
1498 {
1499 	__wsum csum;
1500 	int ret = 0, offset;
1501 
1502 	if (skb->ip_summed == CHECKSUM_COMPLETE)
1503 		goto out_set_summed;
1504 
1505 	if (unlikely(skb_shinfo(skb)->gso_size)) {
1506 		/* Let GSO fix up the checksum. */
1507 		goto out_set_summed;
1508 	}
1509 
1510 	offset = skb->csum_start - skb_headroom(skb);
1511 	BUG_ON(offset >= skb_headlen(skb));
1512 	csum = skb_checksum(skb, offset, skb->len - offset, 0);
1513 
1514 	offset += skb->csum_offset;
1515 	BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1516 
1517 	if (skb_cloned(skb) &&
1518 	    !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1519 		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1520 		if (ret)
1521 			goto out;
1522 	}
1523 
1524 	*(__sum16 *)(skb->data + offset) = csum_fold(csum);
1525 out_set_summed:
1526 	skb->ip_summed = CHECKSUM_NONE;
1527 out:
1528 	return ret;
1529 }
1530 
1531 /**
1532  *	skb_gso_segment - Perform segmentation on skb.
1533  *	@skb: buffer to segment
1534  *	@features: features for the output path (see dev->features)
1535  *
1536  *	This function segments the given skb and returns a list of segments.
1537  *
1538  *	It may return NULL if the skb requires no segmentation.  This is
1539  *	only possible when GSO is used for verifying header integrity.
1540  */
1541 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1542 {
1543 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1544 	struct packet_type *ptype;
1545 	__be16 type = skb->protocol;
1546 	int err;
1547 
1548 	skb_reset_mac_header(skb);
1549 	skb->mac_len = skb->network_header - skb->mac_header;
1550 	__skb_pull(skb, skb->mac_len);
1551 
1552 	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1553 		struct net_device *dev = skb->dev;
1554 		struct ethtool_drvinfo info = {};
1555 
1556 		if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
1557 			dev->ethtool_ops->get_drvinfo(dev, &info);
1558 
1559 		WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d "
1560 			"ip_summed=%d",
1561 		     info.driver, dev ? dev->features : 0L,
1562 		     skb->sk ? skb->sk->sk_route_caps : 0L,
1563 		     skb->len, skb->data_len, skb->ip_summed);
1564 
1565 		if (skb_header_cloned(skb) &&
1566 		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1567 			return ERR_PTR(err);
1568 	}
1569 
1570 	rcu_read_lock();
1571 	list_for_each_entry_rcu(ptype,
1572 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
1573 		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1574 			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1575 				err = ptype->gso_send_check(skb);
1576 				segs = ERR_PTR(err);
1577 				if (err || skb_gso_ok(skb, features))
1578 					break;
1579 				__skb_push(skb, (skb->data -
1580 						 skb_network_header(skb)));
1581 			}
1582 			segs = ptype->gso_segment(skb, features);
1583 			break;
1584 		}
1585 	}
1586 	rcu_read_unlock();
1587 
1588 	__skb_push(skb, skb->data - skb_mac_header(skb));
1589 
1590 	return segs;
1591 }
1592 
1593 EXPORT_SYMBOL(skb_gso_segment);
1594 
1595 /* Take action when hardware reception checksum errors are detected. */
1596 #ifdef CONFIG_BUG
1597 void netdev_rx_csum_fault(struct net_device *dev)
1598 {
1599 	if (net_ratelimit()) {
1600 		printk(KERN_ERR "%s: hw csum failure.\n",
1601 			dev ? dev->name : "<unknown>");
1602 		dump_stack();
1603 	}
1604 }
1605 EXPORT_SYMBOL(netdev_rx_csum_fault);
1606 #endif
1607 
1608 /* Actually, we should eliminate this check as soon as we know, that:
1609  * 1. IOMMU is present and allows to map all the memory.
1610  * 2. No high memory really exists on this machine.
1611  */
1612 
1613 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1614 {
1615 #ifdef CONFIG_HIGHMEM
1616 	int i;
1617 
1618 	if (dev->features & NETIF_F_HIGHDMA)
1619 		return 0;
1620 
1621 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1622 		if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1623 			return 1;
1624 
1625 #endif
1626 	return 0;
1627 }
1628 
1629 struct dev_gso_cb {
1630 	void (*destructor)(struct sk_buff *skb);
1631 };
1632 
1633 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1634 
1635 static void dev_gso_skb_destructor(struct sk_buff *skb)
1636 {
1637 	struct dev_gso_cb *cb;
1638 
1639 	do {
1640 		struct sk_buff *nskb = skb->next;
1641 
1642 		skb->next = nskb->next;
1643 		nskb->next = NULL;
1644 		kfree_skb(nskb);
1645 	} while (skb->next);
1646 
1647 	cb = DEV_GSO_CB(skb);
1648 	if (cb->destructor)
1649 		cb->destructor(skb);
1650 }
1651 
1652 /**
1653  *	dev_gso_segment - Perform emulated hardware segmentation on skb.
1654  *	@skb: buffer to segment
1655  *
1656  *	This function segments the given skb and stores the list of segments
1657  *	in skb->next.
1658  */
1659 static int dev_gso_segment(struct sk_buff *skb)
1660 {
1661 	struct net_device *dev = skb->dev;
1662 	struct sk_buff *segs;
1663 	int features = dev->features & ~(illegal_highdma(dev, skb) ?
1664 					 NETIF_F_SG : 0);
1665 
1666 	segs = skb_gso_segment(skb, features);
1667 
1668 	/* Verifying header integrity only. */
1669 	if (!segs)
1670 		return 0;
1671 
1672 	if (IS_ERR(segs))
1673 		return PTR_ERR(segs);
1674 
1675 	skb->next = segs;
1676 	DEV_GSO_CB(skb)->destructor = skb->destructor;
1677 	skb->destructor = dev_gso_skb_destructor;
1678 
1679 	return 0;
1680 }
1681 
1682 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1683 			struct netdev_queue *txq)
1684 {
1685 	const struct net_device_ops *ops = dev->netdev_ops;
1686 	int rc;
1687 
1688 	if (likely(!skb->next)) {
1689 		if (!list_empty(&ptype_all))
1690 			dev_queue_xmit_nit(skb, dev);
1691 
1692 		if (netif_needs_gso(dev, skb)) {
1693 			if (unlikely(dev_gso_segment(skb)))
1694 				goto out_kfree_skb;
1695 			if (skb->next)
1696 				goto gso;
1697 		}
1698 
1699 		/*
1700 		 * If device doesnt need skb->dst, release it right now while
1701 		 * its hot in this cpu cache
1702 		 */
1703 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
1704 			skb_dst_drop(skb);
1705 
1706 		rc = ops->ndo_start_xmit(skb, dev);
1707 		if (rc == 0)
1708 			txq_trans_update(txq);
1709 		/*
1710 		 * TODO: if skb_orphan() was called by
1711 		 * dev->hard_start_xmit() (for example, the unmodified
1712 		 * igb driver does that; bnx2 doesn't), then
1713 		 * skb_tx_software_timestamp() will be unable to send
1714 		 * back the time stamp.
1715 		 *
1716 		 * How can this be prevented? Always create another
1717 		 * reference to the socket before calling
1718 		 * dev->hard_start_xmit()? Prevent that skb_orphan()
1719 		 * does anything in dev->hard_start_xmit() by clearing
1720 		 * the skb destructor before the call and restoring it
1721 		 * afterwards, then doing the skb_orphan() ourselves?
1722 		 */
1723 		return rc;
1724 	}
1725 
1726 gso:
1727 	do {
1728 		struct sk_buff *nskb = skb->next;
1729 
1730 		skb->next = nskb->next;
1731 		nskb->next = NULL;
1732 		rc = ops->ndo_start_xmit(nskb, dev);
1733 		if (unlikely(rc)) {
1734 			nskb->next = skb->next;
1735 			skb->next = nskb;
1736 			return rc;
1737 		}
1738 		txq_trans_update(txq);
1739 		if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
1740 			return NETDEV_TX_BUSY;
1741 	} while (skb->next);
1742 
1743 	skb->destructor = DEV_GSO_CB(skb)->destructor;
1744 
1745 out_kfree_skb:
1746 	kfree_skb(skb);
1747 	return 0;
1748 }
1749 
1750 static u32 skb_tx_hashrnd;
1751 
1752 u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
1753 {
1754 	u32 hash;
1755 
1756 	if (skb_rx_queue_recorded(skb)) {
1757 		hash = skb_get_rx_queue(skb);
1758 		while (unlikely (hash >= dev->real_num_tx_queues))
1759 			hash -= dev->real_num_tx_queues;
1760 		return hash;
1761 	}
1762 
1763 	if (skb->sk && skb->sk->sk_hash)
1764 		hash = skb->sk->sk_hash;
1765 	else
1766 		hash = skb->protocol;
1767 
1768 	hash = jhash_1word(hash, skb_tx_hashrnd);
1769 
1770 	return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
1771 }
1772 EXPORT_SYMBOL(skb_tx_hash);
1773 
1774 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1775 					struct sk_buff *skb)
1776 {
1777 	const struct net_device_ops *ops = dev->netdev_ops;
1778 	u16 queue_index = 0;
1779 
1780 	if (ops->ndo_select_queue)
1781 		queue_index = ops->ndo_select_queue(dev, skb);
1782 	else if (dev->real_num_tx_queues > 1)
1783 		queue_index = skb_tx_hash(dev, skb);
1784 
1785 	skb_set_queue_mapping(skb, queue_index);
1786 	return netdev_get_tx_queue(dev, queue_index);
1787 }
1788 
1789 /**
1790  *	dev_queue_xmit - transmit a buffer
1791  *	@skb: buffer to transmit
1792  *
1793  *	Queue a buffer for transmission to a network device. The caller must
1794  *	have set the device and priority and built the buffer before calling
1795  *	this function. The function can be called from an interrupt.
1796  *
1797  *	A negative errno code is returned on a failure. A success does not
1798  *	guarantee the frame will be transmitted as it may be dropped due
1799  *	to congestion or traffic shaping.
1800  *
1801  * -----------------------------------------------------------------------------------
1802  *      I notice this method can also return errors from the queue disciplines,
1803  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1804  *      be positive.
1805  *
1806  *      Regardless of the return value, the skb is consumed, so it is currently
1807  *      difficult to retry a send to this method.  (You can bump the ref count
1808  *      before sending to hold a reference for retry if you are careful.)
1809  *
1810  *      When calling this method, interrupts MUST be enabled.  This is because
1811  *      the BH enable code must have IRQs enabled so that it will not deadlock.
1812  *          --BLG
1813  */
1814 int dev_queue_xmit(struct sk_buff *skb)
1815 {
1816 	struct net_device *dev = skb->dev;
1817 	struct netdev_queue *txq;
1818 	struct Qdisc *q;
1819 	int rc = -ENOMEM;
1820 
1821 	/* GSO will handle the following emulations directly. */
1822 	if (netif_needs_gso(dev, skb))
1823 		goto gso;
1824 
1825 	if (skb_has_frags(skb) &&
1826 	    !(dev->features & NETIF_F_FRAGLIST) &&
1827 	    __skb_linearize(skb))
1828 		goto out_kfree_skb;
1829 
1830 	/* Fragmented skb is linearized if device does not support SG,
1831 	 * or if at least one of fragments is in highmem and device
1832 	 * does not support DMA from it.
1833 	 */
1834 	if (skb_shinfo(skb)->nr_frags &&
1835 	    (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1836 	    __skb_linearize(skb))
1837 		goto out_kfree_skb;
1838 
1839 	/* If packet is not checksummed and device does not support
1840 	 * checksumming for this protocol, complete checksumming here.
1841 	 */
1842 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
1843 		skb_set_transport_header(skb, skb->csum_start -
1844 					      skb_headroom(skb));
1845 		if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
1846 			goto out_kfree_skb;
1847 	}
1848 
1849 gso:
1850 	/* Disable soft irqs for various locks below. Also
1851 	 * stops preemption for RCU.
1852 	 */
1853 	rcu_read_lock_bh();
1854 
1855 	txq = dev_pick_tx(dev, skb);
1856 	q = rcu_dereference(txq->qdisc);
1857 
1858 #ifdef CONFIG_NET_CLS_ACT
1859 	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1860 #endif
1861 	if (q->enqueue) {
1862 		spinlock_t *root_lock = qdisc_lock(q);
1863 
1864 		spin_lock(root_lock);
1865 
1866 		if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
1867 			kfree_skb(skb);
1868 			rc = NET_XMIT_DROP;
1869 		} else {
1870 			rc = qdisc_enqueue_root(skb, q);
1871 			qdisc_run(q);
1872 		}
1873 		spin_unlock(root_lock);
1874 
1875 		goto out;
1876 	}
1877 
1878 	/* The device has no queue. Common case for software devices:
1879 	   loopback, all the sorts of tunnels...
1880 
1881 	   Really, it is unlikely that netif_tx_lock protection is necessary
1882 	   here.  (f.e. loopback and IP tunnels are clean ignoring statistics
1883 	   counters.)
1884 	   However, it is possible, that they rely on protection
1885 	   made by us here.
1886 
1887 	   Check this and shot the lock. It is not prone from deadlocks.
1888 	   Either shot noqueue qdisc, it is even simpler 8)
1889 	 */
1890 	if (dev->flags & IFF_UP) {
1891 		int cpu = smp_processor_id(); /* ok because BHs are off */
1892 
1893 		if (txq->xmit_lock_owner != cpu) {
1894 
1895 			HARD_TX_LOCK(dev, txq, cpu);
1896 
1897 			if (!netif_tx_queue_stopped(txq)) {
1898 				rc = 0;
1899 				if (!dev_hard_start_xmit(skb, dev, txq)) {
1900 					HARD_TX_UNLOCK(dev, txq);
1901 					goto out;
1902 				}
1903 			}
1904 			HARD_TX_UNLOCK(dev, txq);
1905 			if (net_ratelimit())
1906 				printk(KERN_CRIT "Virtual device %s asks to "
1907 				       "queue packet!\n", dev->name);
1908 		} else {
1909 			/* Recursion is detected! It is possible,
1910 			 * unfortunately */
1911 			if (net_ratelimit())
1912 				printk(KERN_CRIT "Dead loop on virtual device "
1913 				       "%s, fix it urgently!\n", dev->name);
1914 		}
1915 	}
1916 
1917 	rc = -ENETDOWN;
1918 	rcu_read_unlock_bh();
1919 
1920 out_kfree_skb:
1921 	kfree_skb(skb);
1922 	return rc;
1923 out:
1924 	rcu_read_unlock_bh();
1925 	return rc;
1926 }
1927 
1928 
1929 /*=======================================================================
1930 			Receiver routines
1931   =======================================================================*/
1932 
1933 int netdev_max_backlog __read_mostly = 1000;
1934 int netdev_budget __read_mostly = 300;
1935 int weight_p __read_mostly = 64;            /* old backlog weight */
1936 
1937 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1938 
1939 
1940 /**
1941  *	netif_rx	-	post buffer to the network code
1942  *	@skb: buffer to post
1943  *
1944  *	This function receives a packet from a device driver and queues it for
1945  *	the upper (protocol) levels to process.  It always succeeds. The buffer
1946  *	may be dropped during processing for congestion control or by the
1947  *	protocol layers.
1948  *
1949  *	return values:
1950  *	NET_RX_SUCCESS	(no congestion)
1951  *	NET_RX_DROP     (packet was dropped)
1952  *
1953  */
1954 
1955 int netif_rx(struct sk_buff *skb)
1956 {
1957 	struct softnet_data *queue;
1958 	unsigned long flags;
1959 
1960 	/* if netpoll wants it, pretend we never saw it */
1961 	if (netpoll_rx(skb))
1962 		return NET_RX_DROP;
1963 
1964 	if (!skb->tstamp.tv64)
1965 		net_timestamp(skb);
1966 
1967 	/*
1968 	 * The code is rearranged so that the path is the most
1969 	 * short when CPU is congested, but is still operating.
1970 	 */
1971 	local_irq_save(flags);
1972 	queue = &__get_cpu_var(softnet_data);
1973 
1974 	__get_cpu_var(netdev_rx_stat).total++;
1975 	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1976 		if (queue->input_pkt_queue.qlen) {
1977 enqueue:
1978 			__skb_queue_tail(&queue->input_pkt_queue, skb);
1979 			local_irq_restore(flags);
1980 			return NET_RX_SUCCESS;
1981 		}
1982 
1983 		napi_schedule(&queue->backlog);
1984 		goto enqueue;
1985 	}
1986 
1987 	__get_cpu_var(netdev_rx_stat).dropped++;
1988 	local_irq_restore(flags);
1989 
1990 	kfree_skb(skb);
1991 	return NET_RX_DROP;
1992 }
1993 
1994 int netif_rx_ni(struct sk_buff *skb)
1995 {
1996 	int err;
1997 
1998 	preempt_disable();
1999 	err = netif_rx(skb);
2000 	if (local_softirq_pending())
2001 		do_softirq();
2002 	preempt_enable();
2003 
2004 	return err;
2005 }
2006 
2007 EXPORT_SYMBOL(netif_rx_ni);
2008 
2009 static void net_tx_action(struct softirq_action *h)
2010 {
2011 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
2012 
2013 	if (sd->completion_queue) {
2014 		struct sk_buff *clist;
2015 
2016 		local_irq_disable();
2017 		clist = sd->completion_queue;
2018 		sd->completion_queue = NULL;
2019 		local_irq_enable();
2020 
2021 		while (clist) {
2022 			struct sk_buff *skb = clist;
2023 			clist = clist->next;
2024 
2025 			WARN_ON(atomic_read(&skb->users));
2026 			__kfree_skb(skb);
2027 		}
2028 	}
2029 
2030 	if (sd->output_queue) {
2031 		struct Qdisc *head;
2032 
2033 		local_irq_disable();
2034 		head = sd->output_queue;
2035 		sd->output_queue = NULL;
2036 		local_irq_enable();
2037 
2038 		while (head) {
2039 			struct Qdisc *q = head;
2040 			spinlock_t *root_lock;
2041 
2042 			head = head->next_sched;
2043 
2044 			root_lock = qdisc_lock(q);
2045 			if (spin_trylock(root_lock)) {
2046 				smp_mb__before_clear_bit();
2047 				clear_bit(__QDISC_STATE_SCHED,
2048 					  &q->state);
2049 				qdisc_run(q);
2050 				spin_unlock(root_lock);
2051 			} else {
2052 				if (!test_bit(__QDISC_STATE_DEACTIVATED,
2053 					      &q->state)) {
2054 					__netif_reschedule(q);
2055 				} else {
2056 					smp_mb__before_clear_bit();
2057 					clear_bit(__QDISC_STATE_SCHED,
2058 						  &q->state);
2059 				}
2060 			}
2061 		}
2062 	}
2063 }
2064 
2065 static inline int deliver_skb(struct sk_buff *skb,
2066 			      struct packet_type *pt_prev,
2067 			      struct net_device *orig_dev)
2068 {
2069 	atomic_inc(&skb->users);
2070 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2071 }
2072 
2073 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
2074 
2075 #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
2076 /* This hook is defined here for ATM LANE */
2077 int (*br_fdb_test_addr_hook)(struct net_device *dev,
2078 			     unsigned char *addr) __read_mostly;
2079 EXPORT_SYMBOL(br_fdb_test_addr_hook);
2080 #endif
2081 
2082 /*
2083  * If bridge module is loaded call bridging hook.
2084  *  returns NULL if packet was consumed.
2085  */
2086 struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2087 					struct sk_buff *skb) __read_mostly;
2088 EXPORT_SYMBOL(br_handle_frame_hook);
2089 
2090 static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2091 					    struct packet_type **pt_prev, int *ret,
2092 					    struct net_device *orig_dev)
2093 {
2094 	struct net_bridge_port *port;
2095 
2096 	if (skb->pkt_type == PACKET_LOOPBACK ||
2097 	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
2098 		return skb;
2099 
2100 	if (*pt_prev) {
2101 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2102 		*pt_prev = NULL;
2103 	}
2104 
2105 	return br_handle_frame_hook(port, skb);
2106 }
2107 #else
2108 #define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
2109 #endif
2110 
2111 #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2112 struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2113 EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2114 
2115 static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2116 					     struct packet_type **pt_prev,
2117 					     int *ret,
2118 					     struct net_device *orig_dev)
2119 {
2120 	if (skb->dev->macvlan_port == NULL)
2121 		return skb;
2122 
2123 	if (*pt_prev) {
2124 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2125 		*pt_prev = NULL;
2126 	}
2127 	return macvlan_handle_frame_hook(skb);
2128 }
2129 #else
2130 #define handle_macvlan(skb, pt_prev, ret, orig_dev)	(skb)
2131 #endif
2132 
2133 #ifdef CONFIG_NET_CLS_ACT
2134 /* TODO: Maybe we should just force sch_ingress to be compiled in
2135  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2136  * a compare and 2 stores extra right now if we dont have it on
2137  * but have CONFIG_NET_CLS_ACT
2138  * NOTE: This doesnt stop any functionality; if you dont have
2139  * the ingress scheduler, you just cant add policies on ingress.
2140  *
2141  */
2142 static int ing_filter(struct sk_buff *skb)
2143 {
2144 	struct net_device *dev = skb->dev;
2145 	u32 ttl = G_TC_RTTL(skb->tc_verd);
2146 	struct netdev_queue *rxq;
2147 	int result = TC_ACT_OK;
2148 	struct Qdisc *q;
2149 
2150 	if (MAX_RED_LOOP < ttl++) {
2151 		printk(KERN_WARNING
2152 		       "Redir loop detected Dropping packet (%d->%d)\n",
2153 		       skb->iif, dev->ifindex);
2154 		return TC_ACT_SHOT;
2155 	}
2156 
2157 	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2158 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2159 
2160 	rxq = &dev->rx_queue;
2161 
2162 	q = rxq->qdisc;
2163 	if (q != &noop_qdisc) {
2164 		spin_lock(qdisc_lock(q));
2165 		if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2166 			result = qdisc_enqueue_root(skb, q);
2167 		spin_unlock(qdisc_lock(q));
2168 	}
2169 
2170 	return result;
2171 }
2172 
2173 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2174 					 struct packet_type **pt_prev,
2175 					 int *ret, struct net_device *orig_dev)
2176 {
2177 	if (skb->dev->rx_queue.qdisc == &noop_qdisc)
2178 		goto out;
2179 
2180 	if (*pt_prev) {
2181 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2182 		*pt_prev = NULL;
2183 	} else {
2184 		/* Huh? Why does turning on AF_PACKET affect this? */
2185 		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2186 	}
2187 
2188 	switch (ing_filter(skb)) {
2189 	case TC_ACT_SHOT:
2190 	case TC_ACT_STOLEN:
2191 		kfree_skb(skb);
2192 		return NULL;
2193 	}
2194 
2195 out:
2196 	skb->tc_verd = 0;
2197 	return skb;
2198 }
2199 #endif
2200 
2201 /*
2202  * 	netif_nit_deliver - deliver received packets to network taps
2203  * 	@skb: buffer
2204  *
2205  * 	This function is used to deliver incoming packets to network
2206  * 	taps. It should be used when the normal netif_receive_skb path
2207  * 	is bypassed, for example because of VLAN acceleration.
2208  */
2209 void netif_nit_deliver(struct sk_buff *skb)
2210 {
2211 	struct packet_type *ptype;
2212 
2213 	if (list_empty(&ptype_all))
2214 		return;
2215 
2216 	skb_reset_network_header(skb);
2217 	skb_reset_transport_header(skb);
2218 	skb->mac_len = skb->network_header - skb->mac_header;
2219 
2220 	rcu_read_lock();
2221 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
2222 		if (!ptype->dev || ptype->dev == skb->dev)
2223 			deliver_skb(skb, ptype, skb->dev);
2224 	}
2225 	rcu_read_unlock();
2226 }
2227 
2228 /**
2229  *	netif_receive_skb - process receive buffer from network
2230  *	@skb: buffer to process
2231  *
2232  *	netif_receive_skb() is the main receive data processing function.
2233  *	It always succeeds. The buffer may be dropped during processing
2234  *	for congestion control or by the protocol layers.
2235  *
2236  *	This function may only be called from softirq context and interrupts
2237  *	should be enabled.
2238  *
2239  *	Return values (usually ignored):
2240  *	NET_RX_SUCCESS: no congestion
2241  *	NET_RX_DROP: packet was dropped
2242  */
2243 int netif_receive_skb(struct sk_buff *skb)
2244 {
2245 	struct packet_type *ptype, *pt_prev;
2246 	struct net_device *orig_dev;
2247 	struct net_device *null_or_orig;
2248 	int ret = NET_RX_DROP;
2249 	__be16 type;
2250 
2251 	if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
2252 		return NET_RX_SUCCESS;
2253 
2254 	/* if we've gotten here through NAPI, check netpoll */
2255 	if (netpoll_receive_skb(skb))
2256 		return NET_RX_DROP;
2257 
2258 	if (!skb->tstamp.tv64)
2259 		net_timestamp(skb);
2260 
2261 	if (!skb->iif)
2262 		skb->iif = skb->dev->ifindex;
2263 
2264 	null_or_orig = NULL;
2265 	orig_dev = skb->dev;
2266 	if (orig_dev->master) {
2267 		if (skb_bond_should_drop(skb))
2268 			null_or_orig = orig_dev; /* deliver only exact match */
2269 		else
2270 			skb->dev = orig_dev->master;
2271 	}
2272 
2273 	__get_cpu_var(netdev_rx_stat).total++;
2274 
2275 	skb_reset_network_header(skb);
2276 	skb_reset_transport_header(skb);
2277 	skb->mac_len = skb->network_header - skb->mac_header;
2278 
2279 	pt_prev = NULL;
2280 
2281 	rcu_read_lock();
2282 
2283 #ifdef CONFIG_NET_CLS_ACT
2284 	if (skb->tc_verd & TC_NCLS) {
2285 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2286 		goto ncls;
2287 	}
2288 #endif
2289 
2290 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
2291 		if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2292 		    ptype->dev == orig_dev) {
2293 			if (pt_prev)
2294 				ret = deliver_skb(skb, pt_prev, orig_dev);
2295 			pt_prev = ptype;
2296 		}
2297 	}
2298 
2299 #ifdef CONFIG_NET_CLS_ACT
2300 	skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2301 	if (!skb)
2302 		goto out;
2303 ncls:
2304 #endif
2305 
2306 	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2307 	if (!skb)
2308 		goto out;
2309 	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2310 	if (!skb)
2311 		goto out;
2312 
2313 	type = skb->protocol;
2314 	list_for_each_entry_rcu(ptype,
2315 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2316 		if (ptype->type == type &&
2317 		    (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2318 		     ptype->dev == orig_dev)) {
2319 			if (pt_prev)
2320 				ret = deliver_skb(skb, pt_prev, orig_dev);
2321 			pt_prev = ptype;
2322 		}
2323 	}
2324 
2325 	if (pt_prev) {
2326 		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2327 	} else {
2328 		kfree_skb(skb);
2329 		/* Jamal, now you will not able to escape explaining
2330 		 * me how you were going to use this. :-)
2331 		 */
2332 		ret = NET_RX_DROP;
2333 	}
2334 
2335 out:
2336 	rcu_read_unlock();
2337 	return ret;
2338 }
2339 
2340 /* Network device is going away, flush any packets still pending  */
2341 static void flush_backlog(void *arg)
2342 {
2343 	struct net_device *dev = arg;
2344 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
2345 	struct sk_buff *skb, *tmp;
2346 
2347 	skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
2348 		if (skb->dev == dev) {
2349 			__skb_unlink(skb, &queue->input_pkt_queue);
2350 			kfree_skb(skb);
2351 		}
2352 }
2353 
2354 static int napi_gro_complete(struct sk_buff *skb)
2355 {
2356 	struct packet_type *ptype;
2357 	__be16 type = skb->protocol;
2358 	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2359 	int err = -ENOENT;
2360 
2361 	if (NAPI_GRO_CB(skb)->count == 1) {
2362 		skb_shinfo(skb)->gso_size = 0;
2363 		goto out;
2364 	}
2365 
2366 	rcu_read_lock();
2367 	list_for_each_entry_rcu(ptype, head, list) {
2368 		if (ptype->type != type || ptype->dev || !ptype->gro_complete)
2369 			continue;
2370 
2371 		err = ptype->gro_complete(skb);
2372 		break;
2373 	}
2374 	rcu_read_unlock();
2375 
2376 	if (err) {
2377 		WARN_ON(&ptype->list == head);
2378 		kfree_skb(skb);
2379 		return NET_RX_SUCCESS;
2380 	}
2381 
2382 out:
2383 	return netif_receive_skb(skb);
2384 }
2385 
2386 void napi_gro_flush(struct napi_struct *napi)
2387 {
2388 	struct sk_buff *skb, *next;
2389 
2390 	for (skb = napi->gro_list; skb; skb = next) {
2391 		next = skb->next;
2392 		skb->next = NULL;
2393 		napi_gro_complete(skb);
2394 	}
2395 
2396 	napi->gro_count = 0;
2397 	napi->gro_list = NULL;
2398 }
2399 EXPORT_SYMBOL(napi_gro_flush);
2400 
2401 int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2402 {
2403 	struct sk_buff **pp = NULL;
2404 	struct packet_type *ptype;
2405 	__be16 type = skb->protocol;
2406 	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2407 	int same_flow;
2408 	int mac_len;
2409 	int ret;
2410 
2411 	if (!(skb->dev->features & NETIF_F_GRO))
2412 		goto normal;
2413 
2414 	if (skb_is_gso(skb) || skb_has_frags(skb))
2415 		goto normal;
2416 
2417 	rcu_read_lock();
2418 	list_for_each_entry_rcu(ptype, head, list) {
2419 		if (ptype->type != type || ptype->dev || !ptype->gro_receive)
2420 			continue;
2421 
2422 		skb_set_network_header(skb, skb_gro_offset(skb));
2423 		mac_len = skb->network_header - skb->mac_header;
2424 		skb->mac_len = mac_len;
2425 		NAPI_GRO_CB(skb)->same_flow = 0;
2426 		NAPI_GRO_CB(skb)->flush = 0;
2427 		NAPI_GRO_CB(skb)->free = 0;
2428 
2429 		pp = ptype->gro_receive(&napi->gro_list, skb);
2430 		break;
2431 	}
2432 	rcu_read_unlock();
2433 
2434 	if (&ptype->list == head)
2435 		goto normal;
2436 
2437 	same_flow = NAPI_GRO_CB(skb)->same_flow;
2438 	ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
2439 
2440 	if (pp) {
2441 		struct sk_buff *nskb = *pp;
2442 
2443 		*pp = nskb->next;
2444 		nskb->next = NULL;
2445 		napi_gro_complete(nskb);
2446 		napi->gro_count--;
2447 	}
2448 
2449 	if (same_flow)
2450 		goto ok;
2451 
2452 	if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
2453 		goto normal;
2454 
2455 	napi->gro_count++;
2456 	NAPI_GRO_CB(skb)->count = 1;
2457 	skb_shinfo(skb)->gso_size = skb_gro_len(skb);
2458 	skb->next = napi->gro_list;
2459 	napi->gro_list = skb;
2460 	ret = GRO_HELD;
2461 
2462 pull:
2463 	if (skb_headlen(skb) < skb_gro_offset(skb)) {
2464 		int grow = skb_gro_offset(skb) - skb_headlen(skb);
2465 
2466 		BUG_ON(skb->end - skb->tail < grow);
2467 
2468 		memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
2469 
2470 		skb->tail += grow;
2471 		skb->data_len -= grow;
2472 
2473 		skb_shinfo(skb)->frags[0].page_offset += grow;
2474 		skb_shinfo(skb)->frags[0].size -= grow;
2475 
2476 		if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
2477 			put_page(skb_shinfo(skb)->frags[0].page);
2478 			memmove(skb_shinfo(skb)->frags,
2479 				skb_shinfo(skb)->frags + 1,
2480 				--skb_shinfo(skb)->nr_frags);
2481 		}
2482 	}
2483 
2484 ok:
2485 	return ret;
2486 
2487 normal:
2488 	ret = GRO_NORMAL;
2489 	goto pull;
2490 }
2491 EXPORT_SYMBOL(dev_gro_receive);
2492 
2493 static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2494 {
2495 	struct sk_buff *p;
2496 
2497 	if (netpoll_rx_on(skb))
2498 		return GRO_NORMAL;
2499 
2500 	for (p = napi->gro_list; p; p = p->next) {
2501 		NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev)
2502 			&& !compare_ether_header(skb_mac_header(p),
2503 						 skb_gro_mac_header(skb));
2504 		NAPI_GRO_CB(p)->flush = 0;
2505 	}
2506 
2507 	return dev_gro_receive(napi, skb);
2508 }
2509 
2510 int napi_skb_finish(int ret, struct sk_buff *skb)
2511 {
2512 	int err = NET_RX_SUCCESS;
2513 
2514 	switch (ret) {
2515 	case GRO_NORMAL:
2516 		return netif_receive_skb(skb);
2517 
2518 	case GRO_DROP:
2519 		err = NET_RX_DROP;
2520 		/* fall through */
2521 
2522 	case GRO_MERGED_FREE:
2523 		kfree_skb(skb);
2524 		break;
2525 	}
2526 
2527 	return err;
2528 }
2529 EXPORT_SYMBOL(napi_skb_finish);
2530 
2531 void skb_gro_reset_offset(struct sk_buff *skb)
2532 {
2533 	NAPI_GRO_CB(skb)->data_offset = 0;
2534 	NAPI_GRO_CB(skb)->frag0 = NULL;
2535 	NAPI_GRO_CB(skb)->frag0_len = 0;
2536 
2537 	if (skb->mac_header == skb->tail &&
2538 	    !PageHighMem(skb_shinfo(skb)->frags[0].page)) {
2539 		NAPI_GRO_CB(skb)->frag0 =
2540 			page_address(skb_shinfo(skb)->frags[0].page) +
2541 			skb_shinfo(skb)->frags[0].page_offset;
2542 		NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size;
2543 	}
2544 }
2545 EXPORT_SYMBOL(skb_gro_reset_offset);
2546 
2547 int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2548 {
2549 	skb_gro_reset_offset(skb);
2550 
2551 	return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
2552 }
2553 EXPORT_SYMBOL(napi_gro_receive);
2554 
2555 void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
2556 {
2557 	__skb_pull(skb, skb_headlen(skb));
2558 	skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
2559 
2560 	napi->skb = skb;
2561 }
2562 EXPORT_SYMBOL(napi_reuse_skb);
2563 
2564 struct sk_buff *napi_get_frags(struct napi_struct *napi)
2565 {
2566 	struct net_device *dev = napi->dev;
2567 	struct sk_buff *skb = napi->skb;
2568 
2569 	if (!skb) {
2570 		skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN);
2571 		if (!skb)
2572 			goto out;
2573 
2574 		skb_reserve(skb, NET_IP_ALIGN);
2575 
2576 		napi->skb = skb;
2577 	}
2578 
2579 out:
2580 	return skb;
2581 }
2582 EXPORT_SYMBOL(napi_get_frags);
2583 
2584 int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
2585 {
2586 	int err = NET_RX_SUCCESS;
2587 
2588 	switch (ret) {
2589 	case GRO_NORMAL:
2590 	case GRO_HELD:
2591 		skb->protocol = eth_type_trans(skb, napi->dev);
2592 
2593 		if (ret == GRO_NORMAL)
2594 			return netif_receive_skb(skb);
2595 
2596 		skb_gro_pull(skb, -ETH_HLEN);
2597 		break;
2598 
2599 	case GRO_DROP:
2600 		err = NET_RX_DROP;
2601 		/* fall through */
2602 
2603 	case GRO_MERGED_FREE:
2604 		napi_reuse_skb(napi, skb);
2605 		break;
2606 	}
2607 
2608 	return err;
2609 }
2610 EXPORT_SYMBOL(napi_frags_finish);
2611 
2612 struct sk_buff *napi_frags_skb(struct napi_struct *napi)
2613 {
2614 	struct sk_buff *skb = napi->skb;
2615 	struct ethhdr *eth;
2616 	unsigned int hlen;
2617 	unsigned int off;
2618 
2619 	napi->skb = NULL;
2620 
2621 	skb_reset_mac_header(skb);
2622 	skb_gro_reset_offset(skb);
2623 
2624 	off = skb_gro_offset(skb);
2625 	hlen = off + sizeof(*eth);
2626 	eth = skb_gro_header_fast(skb, off);
2627 	if (skb_gro_header_hard(skb, hlen)) {
2628 		eth = skb_gro_header_slow(skb, hlen, off);
2629 		if (unlikely(!eth)) {
2630 			napi_reuse_skb(napi, skb);
2631 			skb = NULL;
2632 			goto out;
2633 		}
2634 	}
2635 
2636 	skb_gro_pull(skb, sizeof(*eth));
2637 
2638 	/*
2639 	 * This works because the only protocols we care about don't require
2640 	 * special handling.  We'll fix it up properly at the end.
2641 	 */
2642 	skb->protocol = eth->h_proto;
2643 
2644 out:
2645 	return skb;
2646 }
2647 EXPORT_SYMBOL(napi_frags_skb);
2648 
2649 int napi_gro_frags(struct napi_struct *napi)
2650 {
2651 	struct sk_buff *skb = napi_frags_skb(napi);
2652 
2653 	if (!skb)
2654 		return NET_RX_DROP;
2655 
2656 	return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
2657 }
2658 EXPORT_SYMBOL(napi_gro_frags);
2659 
2660 static int process_backlog(struct napi_struct *napi, int quota)
2661 {
2662 	int work = 0;
2663 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
2664 	unsigned long start_time = jiffies;
2665 
2666 	napi->weight = weight_p;
2667 	do {
2668 		struct sk_buff *skb;
2669 
2670 		local_irq_disable();
2671 		skb = __skb_dequeue(&queue->input_pkt_queue);
2672 		if (!skb) {
2673 			__napi_complete(napi);
2674 			local_irq_enable();
2675 			break;
2676 		}
2677 		local_irq_enable();
2678 
2679 		netif_receive_skb(skb);
2680 	} while (++work < quota && jiffies == start_time);
2681 
2682 	return work;
2683 }
2684 
2685 /**
2686  * __napi_schedule - schedule for receive
2687  * @n: entry to schedule
2688  *
2689  * The entry's receive function will be scheduled to run
2690  */
2691 void __napi_schedule(struct napi_struct *n)
2692 {
2693 	unsigned long flags;
2694 
2695 	local_irq_save(flags);
2696 	list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2697 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2698 	local_irq_restore(flags);
2699 }
2700 EXPORT_SYMBOL(__napi_schedule);
2701 
2702 void __napi_complete(struct napi_struct *n)
2703 {
2704 	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
2705 	BUG_ON(n->gro_list);
2706 
2707 	list_del(&n->poll_list);
2708 	smp_mb__before_clear_bit();
2709 	clear_bit(NAPI_STATE_SCHED, &n->state);
2710 }
2711 EXPORT_SYMBOL(__napi_complete);
2712 
2713 void napi_complete(struct napi_struct *n)
2714 {
2715 	unsigned long flags;
2716 
2717 	/*
2718 	 * don't let napi dequeue from the cpu poll list
2719 	 * just in case its running on a different cpu
2720 	 */
2721 	if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
2722 		return;
2723 
2724 	napi_gro_flush(n);
2725 	local_irq_save(flags);
2726 	__napi_complete(n);
2727 	local_irq_restore(flags);
2728 }
2729 EXPORT_SYMBOL(napi_complete);
2730 
2731 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
2732 		    int (*poll)(struct napi_struct *, int), int weight)
2733 {
2734 	INIT_LIST_HEAD(&napi->poll_list);
2735 	napi->gro_count = 0;
2736 	napi->gro_list = NULL;
2737 	napi->skb = NULL;
2738 	napi->poll = poll;
2739 	napi->weight = weight;
2740 	list_add(&napi->dev_list, &dev->napi_list);
2741 	napi->dev = dev;
2742 #ifdef CONFIG_NETPOLL
2743 	spin_lock_init(&napi->poll_lock);
2744 	napi->poll_owner = -1;
2745 #endif
2746 	set_bit(NAPI_STATE_SCHED, &napi->state);
2747 }
2748 EXPORT_SYMBOL(netif_napi_add);
2749 
2750 void netif_napi_del(struct napi_struct *napi)
2751 {
2752 	struct sk_buff *skb, *next;
2753 
2754 	list_del_init(&napi->dev_list);
2755 	napi_free_frags(napi);
2756 
2757 	for (skb = napi->gro_list; skb; skb = next) {
2758 		next = skb->next;
2759 		skb->next = NULL;
2760 		kfree_skb(skb);
2761 	}
2762 
2763 	napi->gro_list = NULL;
2764 	napi->gro_count = 0;
2765 }
2766 EXPORT_SYMBOL(netif_napi_del);
2767 
2768 
2769 static void net_rx_action(struct softirq_action *h)
2770 {
2771 	struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
2772 	unsigned long time_limit = jiffies + 2;
2773 	int budget = netdev_budget;
2774 	void *have;
2775 
2776 	local_irq_disable();
2777 
2778 	while (!list_empty(list)) {
2779 		struct napi_struct *n;
2780 		int work, weight;
2781 
2782 		/* If softirq window is exhuasted then punt.
2783 		 * Allow this to run for 2 jiffies since which will allow
2784 		 * an average latency of 1.5/HZ.
2785 		 */
2786 		if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
2787 			goto softnet_break;
2788 
2789 		local_irq_enable();
2790 
2791 		/* Even though interrupts have been re-enabled, this
2792 		 * access is safe because interrupts can only add new
2793 		 * entries to the tail of this list, and only ->poll()
2794 		 * calls can remove this head entry from the list.
2795 		 */
2796 		n = list_entry(list->next, struct napi_struct, poll_list);
2797 
2798 		have = netpoll_poll_lock(n);
2799 
2800 		weight = n->weight;
2801 
2802 		/* This NAPI_STATE_SCHED test is for avoiding a race
2803 		 * with netpoll's poll_napi().  Only the entity which
2804 		 * obtains the lock and sees NAPI_STATE_SCHED set will
2805 		 * actually make the ->poll() call.  Therefore we avoid
2806 		 * accidently calling ->poll() when NAPI is not scheduled.
2807 		 */
2808 		work = 0;
2809 		if (test_bit(NAPI_STATE_SCHED, &n->state)) {
2810 			work = n->poll(n, weight);
2811 			trace_napi_poll(n);
2812 		}
2813 
2814 		WARN_ON_ONCE(work > weight);
2815 
2816 		budget -= work;
2817 
2818 		local_irq_disable();
2819 
2820 		/* Drivers must not modify the NAPI state if they
2821 		 * consume the entire weight.  In such cases this code
2822 		 * still "owns" the NAPI instance and therefore can
2823 		 * move the instance around on the list at-will.
2824 		 */
2825 		if (unlikely(work == weight)) {
2826 			if (unlikely(napi_disable_pending(n))) {
2827 				local_irq_enable();
2828 				napi_complete(n);
2829 				local_irq_disable();
2830 			} else
2831 				list_move_tail(&n->poll_list, list);
2832 		}
2833 
2834 		netpoll_poll_unlock(have);
2835 	}
2836 out:
2837 	local_irq_enable();
2838 
2839 #ifdef CONFIG_NET_DMA
2840 	/*
2841 	 * There may not be any more sk_buffs coming right now, so push
2842 	 * any pending DMA copies to hardware
2843 	 */
2844 	dma_issue_pending_all();
2845 #endif
2846 
2847 	return;
2848 
2849 softnet_break:
2850 	__get_cpu_var(netdev_rx_stat).time_squeeze++;
2851 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2852 	goto out;
2853 }
2854 
2855 static gifconf_func_t * gifconf_list [NPROTO];
2856 
2857 /**
2858  *	register_gifconf	-	register a SIOCGIF handler
2859  *	@family: Address family
2860  *	@gifconf: Function handler
2861  *
2862  *	Register protocol dependent address dumping routines. The handler
2863  *	that is passed must not be freed or reused until it has been replaced
2864  *	by another handler.
2865  */
2866 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2867 {
2868 	if (family >= NPROTO)
2869 		return -EINVAL;
2870 	gifconf_list[family] = gifconf;
2871 	return 0;
2872 }
2873 
2874 
2875 /*
2876  *	Map an interface index to its name (SIOCGIFNAME)
2877  */
2878 
2879 /*
2880  *	We need this ioctl for efficient implementation of the
2881  *	if_indextoname() function required by the IPv6 API.  Without
2882  *	it, we would have to search all the interfaces to find a
2883  *	match.  --pb
2884  */
2885 
2886 static int dev_ifname(struct net *net, struct ifreq __user *arg)
2887 {
2888 	struct net_device *dev;
2889 	struct ifreq ifr;
2890 
2891 	/*
2892 	 *	Fetch the caller's info block.
2893 	 */
2894 
2895 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2896 		return -EFAULT;
2897 
2898 	read_lock(&dev_base_lock);
2899 	dev = __dev_get_by_index(net, ifr.ifr_ifindex);
2900 	if (!dev) {
2901 		read_unlock(&dev_base_lock);
2902 		return -ENODEV;
2903 	}
2904 
2905 	strcpy(ifr.ifr_name, dev->name);
2906 	read_unlock(&dev_base_lock);
2907 
2908 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2909 		return -EFAULT;
2910 	return 0;
2911 }
2912 
2913 /*
2914  *	Perform a SIOCGIFCONF call. This structure will change
2915  *	size eventually, and there is nothing I can do about it.
2916  *	Thus we will need a 'compatibility mode'.
2917  */
2918 
2919 static int dev_ifconf(struct net *net, char __user *arg)
2920 {
2921 	struct ifconf ifc;
2922 	struct net_device *dev;
2923 	char __user *pos;
2924 	int len;
2925 	int total;
2926 	int i;
2927 
2928 	/*
2929 	 *	Fetch the caller's info block.
2930 	 */
2931 
2932 	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2933 		return -EFAULT;
2934 
2935 	pos = ifc.ifc_buf;
2936 	len = ifc.ifc_len;
2937 
2938 	/*
2939 	 *	Loop over the interfaces, and write an info block for each.
2940 	 */
2941 
2942 	total = 0;
2943 	for_each_netdev(net, dev) {
2944 		for (i = 0; i < NPROTO; i++) {
2945 			if (gifconf_list[i]) {
2946 				int done;
2947 				if (!pos)
2948 					done = gifconf_list[i](dev, NULL, 0);
2949 				else
2950 					done = gifconf_list[i](dev, pos + total,
2951 							       len - total);
2952 				if (done < 0)
2953 					return -EFAULT;
2954 				total += done;
2955 			}
2956 		}
2957 	}
2958 
2959 	/*
2960 	 *	All done.  Write the updated control block back to the caller.
2961 	 */
2962 	ifc.ifc_len = total;
2963 
2964 	/*
2965 	 * 	Both BSD and Solaris return 0 here, so we do too.
2966 	 */
2967 	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2968 }
2969 
2970 #ifdef CONFIG_PROC_FS
2971 /*
2972  *	This is invoked by the /proc filesystem handler to display a device
2973  *	in detail.
2974  */
2975 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2976 	__acquires(dev_base_lock)
2977 {
2978 	struct net *net = seq_file_net(seq);
2979 	loff_t off;
2980 	struct net_device *dev;
2981 
2982 	read_lock(&dev_base_lock);
2983 	if (!*pos)
2984 		return SEQ_START_TOKEN;
2985 
2986 	off = 1;
2987 	for_each_netdev(net, dev)
2988 		if (off++ == *pos)
2989 			return dev;
2990 
2991 	return NULL;
2992 }
2993 
2994 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2995 {
2996 	struct net *net = seq_file_net(seq);
2997 	++*pos;
2998 	return v == SEQ_START_TOKEN ?
2999 		first_net_device(net) : next_net_device((struct net_device *)v);
3000 }
3001 
3002 void dev_seq_stop(struct seq_file *seq, void *v)
3003 	__releases(dev_base_lock)
3004 {
3005 	read_unlock(&dev_base_lock);
3006 }
3007 
3008 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
3009 {
3010 	const struct net_device_stats *stats = dev_get_stats(dev);
3011 
3012 	seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
3013 		   "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
3014 		   dev->name, stats->rx_bytes, stats->rx_packets,
3015 		   stats->rx_errors,
3016 		   stats->rx_dropped + stats->rx_missed_errors,
3017 		   stats->rx_fifo_errors,
3018 		   stats->rx_length_errors + stats->rx_over_errors +
3019 		    stats->rx_crc_errors + stats->rx_frame_errors,
3020 		   stats->rx_compressed, stats->multicast,
3021 		   stats->tx_bytes, stats->tx_packets,
3022 		   stats->tx_errors, stats->tx_dropped,
3023 		   stats->tx_fifo_errors, stats->collisions,
3024 		   stats->tx_carrier_errors +
3025 		    stats->tx_aborted_errors +
3026 		    stats->tx_window_errors +
3027 		    stats->tx_heartbeat_errors,
3028 		   stats->tx_compressed);
3029 }
3030 
3031 /*
3032  *	Called from the PROCfs module. This now uses the new arbitrary sized
3033  *	/proc/net interface to create /proc/net/dev
3034  */
3035 static int dev_seq_show(struct seq_file *seq, void *v)
3036 {
3037 	if (v == SEQ_START_TOKEN)
3038 		seq_puts(seq, "Inter-|   Receive                            "
3039 			      "                    |  Transmit\n"
3040 			      " face |bytes    packets errs drop fifo frame "
3041 			      "compressed multicast|bytes    packets errs "
3042 			      "drop fifo colls carrier compressed\n");
3043 	else
3044 		dev_seq_printf_stats(seq, v);
3045 	return 0;
3046 }
3047 
3048 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
3049 {
3050 	struct netif_rx_stats *rc = NULL;
3051 
3052 	while (*pos < nr_cpu_ids)
3053 		if (cpu_online(*pos)) {
3054 			rc = &per_cpu(netdev_rx_stat, *pos);
3055 			break;
3056 		} else
3057 			++*pos;
3058 	return rc;
3059 }
3060 
3061 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
3062 {
3063 	return softnet_get_online(pos);
3064 }
3065 
3066 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3067 {
3068 	++*pos;
3069 	return softnet_get_online(pos);
3070 }
3071 
3072 static void softnet_seq_stop(struct seq_file *seq, void *v)
3073 {
3074 }
3075 
3076 static int softnet_seq_show(struct seq_file *seq, void *v)
3077 {
3078 	struct netif_rx_stats *s = v;
3079 
3080 	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
3081 		   s->total, s->dropped, s->time_squeeze, 0,
3082 		   0, 0, 0, 0, /* was fastroute */
3083 		   s->cpu_collision );
3084 	return 0;
3085 }
3086 
3087 static const struct seq_operations dev_seq_ops = {
3088 	.start = dev_seq_start,
3089 	.next  = dev_seq_next,
3090 	.stop  = dev_seq_stop,
3091 	.show  = dev_seq_show,
3092 };
3093 
3094 static int dev_seq_open(struct inode *inode, struct file *file)
3095 {
3096 	return seq_open_net(inode, file, &dev_seq_ops,
3097 			    sizeof(struct seq_net_private));
3098 }
3099 
3100 static const struct file_operations dev_seq_fops = {
3101 	.owner	 = THIS_MODULE,
3102 	.open    = dev_seq_open,
3103 	.read    = seq_read,
3104 	.llseek  = seq_lseek,
3105 	.release = seq_release_net,
3106 };
3107 
3108 static const struct seq_operations softnet_seq_ops = {
3109 	.start = softnet_seq_start,
3110 	.next  = softnet_seq_next,
3111 	.stop  = softnet_seq_stop,
3112 	.show  = softnet_seq_show,
3113 };
3114 
3115 static int softnet_seq_open(struct inode *inode, struct file *file)
3116 {
3117 	return seq_open(file, &softnet_seq_ops);
3118 }
3119 
3120 static const struct file_operations softnet_seq_fops = {
3121 	.owner	 = THIS_MODULE,
3122 	.open    = softnet_seq_open,
3123 	.read    = seq_read,
3124 	.llseek  = seq_lseek,
3125 	.release = seq_release,
3126 };
3127 
3128 static void *ptype_get_idx(loff_t pos)
3129 {
3130 	struct packet_type *pt = NULL;
3131 	loff_t i = 0;
3132 	int t;
3133 
3134 	list_for_each_entry_rcu(pt, &ptype_all, list) {
3135 		if (i == pos)
3136 			return pt;
3137 		++i;
3138 	}
3139 
3140 	for (t = 0; t < PTYPE_HASH_SIZE; t++) {
3141 		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
3142 			if (i == pos)
3143 				return pt;
3144 			++i;
3145 		}
3146 	}
3147 	return NULL;
3148 }
3149 
3150 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
3151 	__acquires(RCU)
3152 {
3153 	rcu_read_lock();
3154 	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
3155 }
3156 
3157 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3158 {
3159 	struct packet_type *pt;
3160 	struct list_head *nxt;
3161 	int hash;
3162 
3163 	++*pos;
3164 	if (v == SEQ_START_TOKEN)
3165 		return ptype_get_idx(0);
3166 
3167 	pt = v;
3168 	nxt = pt->list.next;
3169 	if (pt->type == htons(ETH_P_ALL)) {
3170 		if (nxt != &ptype_all)
3171 			goto found;
3172 		hash = 0;
3173 		nxt = ptype_base[0].next;
3174 	} else
3175 		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
3176 
3177 	while (nxt == &ptype_base[hash]) {
3178 		if (++hash >= PTYPE_HASH_SIZE)
3179 			return NULL;
3180 		nxt = ptype_base[hash].next;
3181 	}
3182 found:
3183 	return list_entry(nxt, struct packet_type, list);
3184 }
3185 
3186 static void ptype_seq_stop(struct seq_file *seq, void *v)
3187 	__releases(RCU)
3188 {
3189 	rcu_read_unlock();
3190 }
3191 
3192 static int ptype_seq_show(struct seq_file *seq, void *v)
3193 {
3194 	struct packet_type *pt = v;
3195 
3196 	if (v == SEQ_START_TOKEN)
3197 		seq_puts(seq, "Type Device      Function\n");
3198 	else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
3199 		if (pt->type == htons(ETH_P_ALL))
3200 			seq_puts(seq, "ALL ");
3201 		else
3202 			seq_printf(seq, "%04x", ntohs(pt->type));
3203 
3204 		seq_printf(seq, " %-8s %pF\n",
3205 			   pt->dev ? pt->dev->name : "", pt->func);
3206 	}
3207 
3208 	return 0;
3209 }
3210 
3211 static const struct seq_operations ptype_seq_ops = {
3212 	.start = ptype_seq_start,
3213 	.next  = ptype_seq_next,
3214 	.stop  = ptype_seq_stop,
3215 	.show  = ptype_seq_show,
3216 };
3217 
3218 static int ptype_seq_open(struct inode *inode, struct file *file)
3219 {
3220 	return seq_open_net(inode, file, &ptype_seq_ops,
3221 			sizeof(struct seq_net_private));
3222 }
3223 
3224 static const struct file_operations ptype_seq_fops = {
3225 	.owner	 = THIS_MODULE,
3226 	.open    = ptype_seq_open,
3227 	.read    = seq_read,
3228 	.llseek  = seq_lseek,
3229 	.release = seq_release_net,
3230 };
3231 
3232 
3233 static int __net_init dev_proc_net_init(struct net *net)
3234 {
3235 	int rc = -ENOMEM;
3236 
3237 	if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
3238 		goto out;
3239 	if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
3240 		goto out_dev;
3241 	if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
3242 		goto out_softnet;
3243 
3244 	if (wext_proc_init(net))
3245 		goto out_ptype;
3246 	rc = 0;
3247 out:
3248 	return rc;
3249 out_ptype:
3250 	proc_net_remove(net, "ptype");
3251 out_softnet:
3252 	proc_net_remove(net, "softnet_stat");
3253 out_dev:
3254 	proc_net_remove(net, "dev");
3255 	goto out;
3256 }
3257 
3258 static void __net_exit dev_proc_net_exit(struct net *net)
3259 {
3260 	wext_proc_exit(net);
3261 
3262 	proc_net_remove(net, "ptype");
3263 	proc_net_remove(net, "softnet_stat");
3264 	proc_net_remove(net, "dev");
3265 }
3266 
3267 static struct pernet_operations __net_initdata dev_proc_ops = {
3268 	.init = dev_proc_net_init,
3269 	.exit = dev_proc_net_exit,
3270 };
3271 
3272 static int __init dev_proc_init(void)
3273 {
3274 	return register_pernet_subsys(&dev_proc_ops);
3275 }
3276 #else
3277 #define dev_proc_init() 0
3278 #endif	/* CONFIG_PROC_FS */
3279 
3280 
3281 /**
3282  *	netdev_set_master	-	set up master/slave pair
3283  *	@slave: slave device
3284  *	@master: new master device
3285  *
3286  *	Changes the master device of the slave. Pass %NULL to break the
3287  *	bonding. The caller must hold the RTNL semaphore. On a failure
3288  *	a negative errno code is returned. On success the reference counts
3289  *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
3290  *	function returns zero.
3291  */
3292 int netdev_set_master(struct net_device *slave, struct net_device *master)
3293 {
3294 	struct net_device *old = slave->master;
3295 
3296 	ASSERT_RTNL();
3297 
3298 	if (master) {
3299 		if (old)
3300 			return -EBUSY;
3301 		dev_hold(master);
3302 	}
3303 
3304 	slave->master = master;
3305 
3306 	synchronize_net();
3307 
3308 	if (old)
3309 		dev_put(old);
3310 
3311 	if (master)
3312 		slave->flags |= IFF_SLAVE;
3313 	else
3314 		slave->flags &= ~IFF_SLAVE;
3315 
3316 	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
3317 	return 0;
3318 }
3319 
3320 static void dev_change_rx_flags(struct net_device *dev, int flags)
3321 {
3322 	const struct net_device_ops *ops = dev->netdev_ops;
3323 
3324 	if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
3325 		ops->ndo_change_rx_flags(dev, flags);
3326 }
3327 
3328 static int __dev_set_promiscuity(struct net_device *dev, int inc)
3329 {
3330 	unsigned short old_flags = dev->flags;
3331 	uid_t uid;
3332 	gid_t gid;
3333 
3334 	ASSERT_RTNL();
3335 
3336 	dev->flags |= IFF_PROMISC;
3337 	dev->promiscuity += inc;
3338 	if (dev->promiscuity == 0) {
3339 		/*
3340 		 * Avoid overflow.
3341 		 * If inc causes overflow, untouch promisc and return error.
3342 		 */
3343 		if (inc < 0)
3344 			dev->flags &= ~IFF_PROMISC;
3345 		else {
3346 			dev->promiscuity -= inc;
3347 			printk(KERN_WARNING "%s: promiscuity touches roof, "
3348 				"set promiscuity failed, promiscuity feature "
3349 				"of device might be broken.\n", dev->name);
3350 			return -EOVERFLOW;
3351 		}
3352 	}
3353 	if (dev->flags != old_flags) {
3354 		printk(KERN_INFO "device %s %s promiscuous mode\n",
3355 		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
3356 							       "left");
3357 		if (audit_enabled) {
3358 			current_uid_gid(&uid, &gid);
3359 			audit_log(current->audit_context, GFP_ATOMIC,
3360 				AUDIT_ANOM_PROMISCUOUS,
3361 				"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
3362 				dev->name, (dev->flags & IFF_PROMISC),
3363 				(old_flags & IFF_PROMISC),
3364 				audit_get_loginuid(current),
3365 				uid, gid,
3366 				audit_get_sessionid(current));
3367 		}
3368 
3369 		dev_change_rx_flags(dev, IFF_PROMISC);
3370 	}
3371 	return 0;
3372 }
3373 
3374 /**
3375  *	dev_set_promiscuity	- update promiscuity count on a device
3376  *	@dev: device
3377  *	@inc: modifier
3378  *
3379  *	Add or remove promiscuity from a device. While the count in the device
3380  *	remains above zero the interface remains promiscuous. Once it hits zero
3381  *	the device reverts back to normal filtering operation. A negative inc
3382  *	value is used to drop promiscuity on the device.
3383  *	Return 0 if successful or a negative errno code on error.
3384  */
3385 int dev_set_promiscuity(struct net_device *dev, int inc)
3386 {
3387 	unsigned short old_flags = dev->flags;
3388 	int err;
3389 
3390 	err = __dev_set_promiscuity(dev, inc);
3391 	if (err < 0)
3392 		return err;
3393 	if (dev->flags != old_flags)
3394 		dev_set_rx_mode(dev);
3395 	return err;
3396 }
3397 
3398 /**
3399  *	dev_set_allmulti	- update allmulti count on a device
3400  *	@dev: device
3401  *	@inc: modifier
3402  *
3403  *	Add or remove reception of all multicast frames to a device. While the
3404  *	count in the device remains above zero the interface remains listening
3405  *	to all interfaces. Once it hits zero the device reverts back to normal
3406  *	filtering operation. A negative @inc value is used to drop the counter
3407  *	when releasing a resource needing all multicasts.
3408  *	Return 0 if successful or a negative errno code on error.
3409  */
3410 
3411 int dev_set_allmulti(struct net_device *dev, int inc)
3412 {
3413 	unsigned short old_flags = dev->flags;
3414 
3415 	ASSERT_RTNL();
3416 
3417 	dev->flags |= IFF_ALLMULTI;
3418 	dev->allmulti += inc;
3419 	if (dev->allmulti == 0) {
3420 		/*
3421 		 * Avoid overflow.
3422 		 * If inc causes overflow, untouch allmulti and return error.
3423 		 */
3424 		if (inc < 0)
3425 			dev->flags &= ~IFF_ALLMULTI;
3426 		else {
3427 			dev->allmulti -= inc;
3428 			printk(KERN_WARNING "%s: allmulti touches roof, "
3429 				"set allmulti failed, allmulti feature of "
3430 				"device might be broken.\n", dev->name);
3431 			return -EOVERFLOW;
3432 		}
3433 	}
3434 	if (dev->flags ^ old_flags) {
3435 		dev_change_rx_flags(dev, IFF_ALLMULTI);
3436 		dev_set_rx_mode(dev);
3437 	}
3438 	return 0;
3439 }
3440 
3441 /*
3442  *	Upload unicast and multicast address lists to device and
3443  *	configure RX filtering. When the device doesn't support unicast
3444  *	filtering it is put in promiscuous mode while unicast addresses
3445  *	are present.
3446  */
3447 void __dev_set_rx_mode(struct net_device *dev)
3448 {
3449 	const struct net_device_ops *ops = dev->netdev_ops;
3450 
3451 	/* dev_open will call this function so the list will stay sane. */
3452 	if (!(dev->flags&IFF_UP))
3453 		return;
3454 
3455 	if (!netif_device_present(dev))
3456 		return;
3457 
3458 	if (ops->ndo_set_rx_mode)
3459 		ops->ndo_set_rx_mode(dev);
3460 	else {
3461 		/* Unicast addresses changes may only happen under the rtnl,
3462 		 * therefore calling __dev_set_promiscuity here is safe.
3463 		 */
3464 		if (dev->uc.count > 0 && !dev->uc_promisc) {
3465 			__dev_set_promiscuity(dev, 1);
3466 			dev->uc_promisc = 1;
3467 		} else if (dev->uc.count == 0 && dev->uc_promisc) {
3468 			__dev_set_promiscuity(dev, -1);
3469 			dev->uc_promisc = 0;
3470 		}
3471 
3472 		if (ops->ndo_set_multicast_list)
3473 			ops->ndo_set_multicast_list(dev);
3474 	}
3475 }
3476 
3477 void dev_set_rx_mode(struct net_device *dev)
3478 {
3479 	netif_addr_lock_bh(dev);
3480 	__dev_set_rx_mode(dev);
3481 	netif_addr_unlock_bh(dev);
3482 }
3483 
3484 /* hw addresses list handling functions */
3485 
3486 static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
3487 			 int addr_len, unsigned char addr_type)
3488 {
3489 	struct netdev_hw_addr *ha;
3490 	int alloc_size;
3491 
3492 	if (addr_len > MAX_ADDR_LEN)
3493 		return -EINVAL;
3494 
3495 	list_for_each_entry(ha, &list->list, list) {
3496 		if (!memcmp(ha->addr, addr, addr_len) &&
3497 		    ha->type == addr_type) {
3498 			ha->refcount++;
3499 			return 0;
3500 		}
3501 	}
3502 
3503 
3504 	alloc_size = sizeof(*ha);
3505 	if (alloc_size < L1_CACHE_BYTES)
3506 		alloc_size = L1_CACHE_BYTES;
3507 	ha = kmalloc(alloc_size, GFP_ATOMIC);
3508 	if (!ha)
3509 		return -ENOMEM;
3510 	memcpy(ha->addr, addr, addr_len);
3511 	ha->type = addr_type;
3512 	ha->refcount = 1;
3513 	ha->synced = false;
3514 	list_add_tail_rcu(&ha->list, &list->list);
3515 	list->count++;
3516 	return 0;
3517 }
3518 
3519 static void ha_rcu_free(struct rcu_head *head)
3520 {
3521 	struct netdev_hw_addr *ha;
3522 
3523 	ha = container_of(head, struct netdev_hw_addr, rcu_head);
3524 	kfree(ha);
3525 }
3526 
3527 static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
3528 			 int addr_len, unsigned char addr_type)
3529 {
3530 	struct netdev_hw_addr *ha;
3531 
3532 	list_for_each_entry(ha, &list->list, list) {
3533 		if (!memcmp(ha->addr, addr, addr_len) &&
3534 		    (ha->type == addr_type || !addr_type)) {
3535 			if (--ha->refcount)
3536 				return 0;
3537 			list_del_rcu(&ha->list);
3538 			call_rcu(&ha->rcu_head, ha_rcu_free);
3539 			list->count--;
3540 			return 0;
3541 		}
3542 	}
3543 	return -ENOENT;
3544 }
3545 
3546 static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
3547 				  struct netdev_hw_addr_list *from_list,
3548 				  int addr_len,
3549 				  unsigned char addr_type)
3550 {
3551 	int err;
3552 	struct netdev_hw_addr *ha, *ha2;
3553 	unsigned char type;
3554 
3555 	list_for_each_entry(ha, &from_list->list, list) {
3556 		type = addr_type ? addr_type : ha->type;
3557 		err = __hw_addr_add(to_list, ha->addr, addr_len, type);
3558 		if (err)
3559 			goto unroll;
3560 	}
3561 	return 0;
3562 
3563 unroll:
3564 	list_for_each_entry(ha2, &from_list->list, list) {
3565 		if (ha2 == ha)
3566 			break;
3567 		type = addr_type ? addr_type : ha2->type;
3568 		__hw_addr_del(to_list, ha2->addr, addr_len, type);
3569 	}
3570 	return err;
3571 }
3572 
3573 static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
3574 				   struct netdev_hw_addr_list *from_list,
3575 				   int addr_len,
3576 				   unsigned char addr_type)
3577 {
3578 	struct netdev_hw_addr *ha;
3579 	unsigned char type;
3580 
3581 	list_for_each_entry(ha, &from_list->list, list) {
3582 		type = addr_type ? addr_type : ha->type;
3583 		__hw_addr_del(to_list, ha->addr, addr_len, addr_type);
3584 	}
3585 }
3586 
3587 static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
3588 			  struct netdev_hw_addr_list *from_list,
3589 			  int addr_len)
3590 {
3591 	int err = 0;
3592 	struct netdev_hw_addr *ha, *tmp;
3593 
3594 	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
3595 		if (!ha->synced) {
3596 			err = __hw_addr_add(to_list, ha->addr,
3597 					    addr_len, ha->type);
3598 			if (err)
3599 				break;
3600 			ha->synced = true;
3601 			ha->refcount++;
3602 		} else if (ha->refcount == 1) {
3603 			__hw_addr_del(to_list, ha->addr, addr_len, ha->type);
3604 			__hw_addr_del(from_list, ha->addr, addr_len, ha->type);
3605 		}
3606 	}
3607 	return err;
3608 }
3609 
3610 static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
3611 			     struct netdev_hw_addr_list *from_list,
3612 			     int addr_len)
3613 {
3614 	struct netdev_hw_addr *ha, *tmp;
3615 
3616 	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
3617 		if (ha->synced) {
3618 			__hw_addr_del(to_list, ha->addr,
3619 				      addr_len, ha->type);
3620 			ha->synced = false;
3621 			__hw_addr_del(from_list, ha->addr,
3622 				      addr_len, ha->type);
3623 		}
3624 	}
3625 }
3626 
3627 static void __hw_addr_flush(struct netdev_hw_addr_list *list)
3628 {
3629 	struct netdev_hw_addr *ha, *tmp;
3630 
3631 	list_for_each_entry_safe(ha, tmp, &list->list, list) {
3632 		list_del_rcu(&ha->list);
3633 		call_rcu(&ha->rcu_head, ha_rcu_free);
3634 	}
3635 	list->count = 0;
3636 }
3637 
3638 static void __hw_addr_init(struct netdev_hw_addr_list *list)
3639 {
3640 	INIT_LIST_HEAD(&list->list);
3641 	list->count = 0;
3642 }
3643 
3644 /* Device addresses handling functions */
3645 
3646 static void dev_addr_flush(struct net_device *dev)
3647 {
3648 	/* rtnl_mutex must be held here */
3649 
3650 	__hw_addr_flush(&dev->dev_addrs);
3651 	dev->dev_addr = NULL;
3652 }
3653 
3654 static int dev_addr_init(struct net_device *dev)
3655 {
3656 	unsigned char addr[MAX_ADDR_LEN];
3657 	struct netdev_hw_addr *ha;
3658 	int err;
3659 
3660 	/* rtnl_mutex must be held here */
3661 
3662 	__hw_addr_init(&dev->dev_addrs);
3663 	memset(addr, 0, sizeof(addr));
3664 	err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
3665 			    NETDEV_HW_ADDR_T_LAN);
3666 	if (!err) {
3667 		/*
3668 		 * Get the first (previously created) address from the list
3669 		 * and set dev_addr pointer to this location.
3670 		 */
3671 		ha = list_first_entry(&dev->dev_addrs.list,
3672 				      struct netdev_hw_addr, list);
3673 		dev->dev_addr = ha->addr;
3674 	}
3675 	return err;
3676 }
3677 
3678 /**
3679  *	dev_addr_add	- Add a device address
3680  *	@dev: device
3681  *	@addr: address to add
3682  *	@addr_type: address type
3683  *
3684  *	Add a device address to the device or increase the reference count if
3685  *	it already exists.
3686  *
3687  *	The caller must hold the rtnl_mutex.
3688  */
3689 int dev_addr_add(struct net_device *dev, unsigned char *addr,
3690 		 unsigned char addr_type)
3691 {
3692 	int err;
3693 
3694 	ASSERT_RTNL();
3695 
3696 	err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
3697 	if (!err)
3698 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3699 	return err;
3700 }
3701 EXPORT_SYMBOL(dev_addr_add);
3702 
3703 /**
3704  *	dev_addr_del	- Release a device address.
3705  *	@dev: device
3706  *	@addr: address to delete
3707  *	@addr_type: address type
3708  *
3709  *	Release reference to a device address and remove it from the device
3710  *	if the reference count drops to zero.
3711  *
3712  *	The caller must hold the rtnl_mutex.
3713  */
3714 int dev_addr_del(struct net_device *dev, unsigned char *addr,
3715 		 unsigned char addr_type)
3716 {
3717 	int err;
3718 	struct netdev_hw_addr *ha;
3719 
3720 	ASSERT_RTNL();
3721 
3722 	/*
3723 	 * We can not remove the first address from the list because
3724 	 * dev->dev_addr points to that.
3725 	 */
3726 	ha = list_first_entry(&dev->dev_addrs.list,
3727 			      struct netdev_hw_addr, list);
3728 	if (ha->addr == dev->dev_addr && ha->refcount == 1)
3729 		return -ENOENT;
3730 
3731 	err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
3732 			    addr_type);
3733 	if (!err)
3734 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3735 	return err;
3736 }
3737 EXPORT_SYMBOL(dev_addr_del);
3738 
3739 /**
3740  *	dev_addr_add_multiple	- Add device addresses from another device
3741  *	@to_dev: device to which addresses will be added
3742  *	@from_dev: device from which addresses will be added
3743  *	@addr_type: address type - 0 means type will be used from from_dev
3744  *
3745  *	Add device addresses of the one device to another.
3746  **
3747  *	The caller must hold the rtnl_mutex.
3748  */
3749 int dev_addr_add_multiple(struct net_device *to_dev,
3750 			  struct net_device *from_dev,
3751 			  unsigned char addr_type)
3752 {
3753 	int err;
3754 
3755 	ASSERT_RTNL();
3756 
3757 	if (from_dev->addr_len != to_dev->addr_len)
3758 		return -EINVAL;
3759 	err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
3760 				     to_dev->addr_len, addr_type);
3761 	if (!err)
3762 		call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
3763 	return err;
3764 }
3765 EXPORT_SYMBOL(dev_addr_add_multiple);
3766 
3767 /**
3768  *	dev_addr_del_multiple	- Delete device addresses by another device
3769  *	@to_dev: device where the addresses will be deleted
3770  *	@from_dev: device by which addresses the addresses will be deleted
3771  *	@addr_type: address type - 0 means type will used from from_dev
3772  *
3773  *	Deletes addresses in to device by the list of addresses in from device.
3774  *
3775  *	The caller must hold the rtnl_mutex.
3776  */
3777 int dev_addr_del_multiple(struct net_device *to_dev,
3778 			  struct net_device *from_dev,
3779 			  unsigned char addr_type)
3780 {
3781 	ASSERT_RTNL();
3782 
3783 	if (from_dev->addr_len != to_dev->addr_len)
3784 		return -EINVAL;
3785 	__hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
3786 			       to_dev->addr_len, addr_type);
3787 	call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
3788 	return 0;
3789 }
3790 EXPORT_SYMBOL(dev_addr_del_multiple);
3791 
3792 /* multicast addresses handling functions */
3793 
3794 int __dev_addr_delete(struct dev_addr_list **list, int *count,
3795 		      void *addr, int alen, int glbl)
3796 {
3797 	struct dev_addr_list *da;
3798 
3799 	for (; (da = *list) != NULL; list = &da->next) {
3800 		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3801 		    alen == da->da_addrlen) {
3802 			if (glbl) {
3803 				int old_glbl = da->da_gusers;
3804 				da->da_gusers = 0;
3805 				if (old_glbl == 0)
3806 					break;
3807 			}
3808 			if (--da->da_users)
3809 				return 0;
3810 
3811 			*list = da->next;
3812 			kfree(da);
3813 			(*count)--;
3814 			return 0;
3815 		}
3816 	}
3817 	return -ENOENT;
3818 }
3819 
3820 int __dev_addr_add(struct dev_addr_list **list, int *count,
3821 		   void *addr, int alen, int glbl)
3822 {
3823 	struct dev_addr_list *da;
3824 
3825 	for (da = *list; da != NULL; da = da->next) {
3826 		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3827 		    da->da_addrlen == alen) {
3828 			if (glbl) {
3829 				int old_glbl = da->da_gusers;
3830 				da->da_gusers = 1;
3831 				if (old_glbl)
3832 					return 0;
3833 			}
3834 			da->da_users++;
3835 			return 0;
3836 		}
3837 	}
3838 
3839 	da = kzalloc(sizeof(*da), GFP_ATOMIC);
3840 	if (da == NULL)
3841 		return -ENOMEM;
3842 	memcpy(da->da_addr, addr, alen);
3843 	da->da_addrlen = alen;
3844 	da->da_users = 1;
3845 	da->da_gusers = glbl ? 1 : 0;
3846 	da->next = *list;
3847 	*list = da;
3848 	(*count)++;
3849 	return 0;
3850 }
3851 
3852 /**
3853  *	dev_unicast_delete	- Release secondary unicast address.
3854  *	@dev: device
3855  *	@addr: address to delete
3856  *
3857  *	Release reference to a secondary unicast address and remove it
3858  *	from the device if the reference count drops to zero.
3859  *
3860  * 	The caller must hold the rtnl_mutex.
3861  */
3862 int dev_unicast_delete(struct net_device *dev, void *addr)
3863 {
3864 	int err;
3865 
3866 	ASSERT_RTNL();
3867 
3868 	err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
3869 			    NETDEV_HW_ADDR_T_UNICAST);
3870 	if (!err)
3871 		__dev_set_rx_mode(dev);
3872 	return err;
3873 }
3874 EXPORT_SYMBOL(dev_unicast_delete);
3875 
3876 /**
3877  *	dev_unicast_add		- add a secondary unicast address
3878  *	@dev: device
3879  *	@addr: address to add
3880  *
3881  *	Add a secondary unicast address to the device or increase
3882  *	the reference count if it already exists.
3883  *
3884  *	The caller must hold the rtnl_mutex.
3885  */
3886 int dev_unicast_add(struct net_device *dev, void *addr)
3887 {
3888 	int err;
3889 
3890 	ASSERT_RTNL();
3891 
3892 	err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
3893 			    NETDEV_HW_ADDR_T_UNICAST);
3894 	if (!err)
3895 		__dev_set_rx_mode(dev);
3896 	return err;
3897 }
3898 EXPORT_SYMBOL(dev_unicast_add);
3899 
3900 int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
3901 		    struct dev_addr_list **from, int *from_count)
3902 {
3903 	struct dev_addr_list *da, *next;
3904 	int err = 0;
3905 
3906 	da = *from;
3907 	while (da != NULL) {
3908 		next = da->next;
3909 		if (!da->da_synced) {
3910 			err = __dev_addr_add(to, to_count,
3911 					     da->da_addr, da->da_addrlen, 0);
3912 			if (err < 0)
3913 				break;
3914 			da->da_synced = 1;
3915 			da->da_users++;
3916 		} else if (da->da_users == 1) {
3917 			__dev_addr_delete(to, to_count,
3918 					  da->da_addr, da->da_addrlen, 0);
3919 			__dev_addr_delete(from, from_count,
3920 					  da->da_addr, da->da_addrlen, 0);
3921 		}
3922 		da = next;
3923 	}
3924 	return err;
3925 }
3926 
3927 void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
3928 		       struct dev_addr_list **from, int *from_count)
3929 {
3930 	struct dev_addr_list *da, *next;
3931 
3932 	da = *from;
3933 	while (da != NULL) {
3934 		next = da->next;
3935 		if (da->da_synced) {
3936 			__dev_addr_delete(to, to_count,
3937 					  da->da_addr, da->da_addrlen, 0);
3938 			da->da_synced = 0;
3939 			__dev_addr_delete(from, from_count,
3940 					  da->da_addr, da->da_addrlen, 0);
3941 		}
3942 		da = next;
3943 	}
3944 }
3945 
3946 /**
3947  *	dev_unicast_sync - Synchronize device's unicast list to another device
3948  *	@to: destination device
3949  *	@from: source device
3950  *
3951  *	Add newly added addresses to the destination device and release
3952  *	addresses that have no users left.
3953  *
3954  *	This function is intended to be called from the dev->set_rx_mode
3955  *	function of layered software devices.
3956  */
3957 int dev_unicast_sync(struct net_device *to, struct net_device *from)
3958 {
3959 	int err = 0;
3960 
3961 	ASSERT_RTNL();
3962 
3963 	if (to->addr_len != from->addr_len)
3964 		return -EINVAL;
3965 
3966 	err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
3967 	if (!err)
3968 		__dev_set_rx_mode(to);
3969 	return err;
3970 }
3971 EXPORT_SYMBOL(dev_unicast_sync);
3972 
3973 /**
3974  *	dev_unicast_unsync - Remove synchronized addresses from the destination device
3975  *	@to: destination device
3976  *	@from: source device
3977  *
3978  *	Remove all addresses that were added to the destination device by
3979  *	dev_unicast_sync(). This function is intended to be called from the
3980  *	dev->stop function of layered software devices.
3981  */
3982 void dev_unicast_unsync(struct net_device *to, struct net_device *from)
3983 {
3984 	ASSERT_RTNL();
3985 
3986 	if (to->addr_len != from->addr_len)
3987 		return;
3988 
3989 	__hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
3990 	__dev_set_rx_mode(to);
3991 }
3992 EXPORT_SYMBOL(dev_unicast_unsync);
3993 
3994 static void dev_unicast_flush(struct net_device *dev)
3995 {
3996 	/* rtnl_mutex must be held here */
3997 
3998 	__hw_addr_flush(&dev->uc);
3999 }
4000 
4001 static void dev_unicast_init(struct net_device *dev)
4002 {
4003 	/* rtnl_mutex must be held here */
4004 
4005 	__hw_addr_init(&dev->uc);
4006 }
4007 
4008 
4009 static void __dev_addr_discard(struct dev_addr_list **list)
4010 {
4011 	struct dev_addr_list *tmp;
4012 
4013 	while (*list != NULL) {
4014 		tmp = *list;
4015 		*list = tmp->next;
4016 		if (tmp->da_users > tmp->da_gusers)
4017 			printk("__dev_addr_discard: address leakage! "
4018 			       "da_users=%d\n", tmp->da_users);
4019 		kfree(tmp);
4020 	}
4021 }
4022 
4023 static void dev_addr_discard(struct net_device *dev)
4024 {
4025 	netif_addr_lock_bh(dev);
4026 
4027 	__dev_addr_discard(&dev->mc_list);
4028 	dev->mc_count = 0;
4029 
4030 	netif_addr_unlock_bh(dev);
4031 }
4032 
4033 /**
4034  *	dev_get_flags - get flags reported to userspace
4035  *	@dev: device
4036  *
4037  *	Get the combination of flag bits exported through APIs to userspace.
4038  */
4039 unsigned dev_get_flags(const struct net_device *dev)
4040 {
4041 	unsigned flags;
4042 
4043 	flags = (dev->flags & ~(IFF_PROMISC |
4044 				IFF_ALLMULTI |
4045 				IFF_RUNNING |
4046 				IFF_LOWER_UP |
4047 				IFF_DORMANT)) |
4048 		(dev->gflags & (IFF_PROMISC |
4049 				IFF_ALLMULTI));
4050 
4051 	if (netif_running(dev)) {
4052 		if (netif_oper_up(dev))
4053 			flags |= IFF_RUNNING;
4054 		if (netif_carrier_ok(dev))
4055 			flags |= IFF_LOWER_UP;
4056 		if (netif_dormant(dev))
4057 			flags |= IFF_DORMANT;
4058 	}
4059 
4060 	return flags;
4061 }
4062 
4063 /**
4064  *	dev_change_flags - change device settings
4065  *	@dev: device
4066  *	@flags: device state flags
4067  *
4068  *	Change settings on device based state flags. The flags are
4069  *	in the userspace exported format.
4070  */
4071 int dev_change_flags(struct net_device *dev, unsigned flags)
4072 {
4073 	int ret, changes;
4074 	int old_flags = dev->flags;
4075 
4076 	ASSERT_RTNL();
4077 
4078 	/*
4079 	 *	Set the flags on our device.
4080 	 */
4081 
4082 	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
4083 			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
4084 			       IFF_AUTOMEDIA)) |
4085 		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
4086 				    IFF_ALLMULTI));
4087 
4088 	/*
4089 	 *	Load in the correct multicast list now the flags have changed.
4090 	 */
4091 
4092 	if ((old_flags ^ flags) & IFF_MULTICAST)
4093 		dev_change_rx_flags(dev, IFF_MULTICAST);
4094 
4095 	dev_set_rx_mode(dev);
4096 
4097 	/*
4098 	 *	Have we downed the interface. We handle IFF_UP ourselves
4099 	 *	according to user attempts to set it, rather than blindly
4100 	 *	setting it.
4101 	 */
4102 
4103 	ret = 0;
4104 	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
4105 		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
4106 
4107 		if (!ret)
4108 			dev_set_rx_mode(dev);
4109 	}
4110 
4111 	if (dev->flags & IFF_UP &&
4112 	    ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
4113 					  IFF_VOLATILE)))
4114 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
4115 
4116 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
4117 		int inc = (flags & IFF_PROMISC) ? +1 : -1;
4118 		dev->gflags ^= IFF_PROMISC;
4119 		dev_set_promiscuity(dev, inc);
4120 	}
4121 
4122 	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
4123 	   is important. Some (broken) drivers set IFF_PROMISC, when
4124 	   IFF_ALLMULTI is requested not asking us and not reporting.
4125 	 */
4126 	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
4127 		int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
4128 		dev->gflags ^= IFF_ALLMULTI;
4129 		dev_set_allmulti(dev, inc);
4130 	}
4131 
4132 	/* Exclude state transition flags, already notified */
4133 	changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
4134 	if (changes)
4135 		rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
4136 
4137 	return ret;
4138 }
4139 
4140 /**
4141  *	dev_set_mtu - Change maximum transfer unit
4142  *	@dev: device
4143  *	@new_mtu: new transfer unit
4144  *
4145  *	Change the maximum transfer size of the network device.
4146  */
4147 int dev_set_mtu(struct net_device *dev, int new_mtu)
4148 {
4149 	const struct net_device_ops *ops = dev->netdev_ops;
4150 	int err;
4151 
4152 	if (new_mtu == dev->mtu)
4153 		return 0;
4154 
4155 	/*	MTU must be positive.	 */
4156 	if (new_mtu < 0)
4157 		return -EINVAL;
4158 
4159 	if (!netif_device_present(dev))
4160 		return -ENODEV;
4161 
4162 	err = 0;
4163 	if (ops->ndo_change_mtu)
4164 		err = ops->ndo_change_mtu(dev, new_mtu);
4165 	else
4166 		dev->mtu = new_mtu;
4167 
4168 	if (!err && dev->flags & IFF_UP)
4169 		call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
4170 	return err;
4171 }
4172 
4173 /**
4174  *	dev_set_mac_address - Change Media Access Control Address
4175  *	@dev: device
4176  *	@sa: new address
4177  *
4178  *	Change the hardware (MAC) address of the device
4179  */
4180 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
4181 {
4182 	const struct net_device_ops *ops = dev->netdev_ops;
4183 	int err;
4184 
4185 	if (!ops->ndo_set_mac_address)
4186 		return -EOPNOTSUPP;
4187 	if (sa->sa_family != dev->type)
4188 		return -EINVAL;
4189 	if (!netif_device_present(dev))
4190 		return -ENODEV;
4191 	err = ops->ndo_set_mac_address(dev, sa);
4192 	if (!err)
4193 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4194 	return err;
4195 }
4196 
4197 /*
4198  *	Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
4199  */
4200 static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
4201 {
4202 	int err;
4203 	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
4204 
4205 	if (!dev)
4206 		return -ENODEV;
4207 
4208 	switch (cmd) {
4209 		case SIOCGIFFLAGS:	/* Get interface flags */
4210 			ifr->ifr_flags = (short) dev_get_flags(dev);
4211 			return 0;
4212 
4213 		case SIOCGIFMETRIC:	/* Get the metric on the interface
4214 					   (currently unused) */
4215 			ifr->ifr_metric = 0;
4216 			return 0;
4217 
4218 		case SIOCGIFMTU:	/* Get the MTU of a device */
4219 			ifr->ifr_mtu = dev->mtu;
4220 			return 0;
4221 
4222 		case SIOCGIFHWADDR:
4223 			if (!dev->addr_len)
4224 				memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
4225 			else
4226 				memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
4227 				       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4228 			ifr->ifr_hwaddr.sa_family = dev->type;
4229 			return 0;
4230 
4231 		case SIOCGIFSLAVE:
4232 			err = -EINVAL;
4233 			break;
4234 
4235 		case SIOCGIFMAP:
4236 			ifr->ifr_map.mem_start = dev->mem_start;
4237 			ifr->ifr_map.mem_end   = dev->mem_end;
4238 			ifr->ifr_map.base_addr = dev->base_addr;
4239 			ifr->ifr_map.irq       = dev->irq;
4240 			ifr->ifr_map.dma       = dev->dma;
4241 			ifr->ifr_map.port      = dev->if_port;
4242 			return 0;
4243 
4244 		case SIOCGIFINDEX:
4245 			ifr->ifr_ifindex = dev->ifindex;
4246 			return 0;
4247 
4248 		case SIOCGIFTXQLEN:
4249 			ifr->ifr_qlen = dev->tx_queue_len;
4250 			return 0;
4251 
4252 		default:
4253 			/* dev_ioctl() should ensure this case
4254 			 * is never reached
4255 			 */
4256 			WARN_ON(1);
4257 			err = -EINVAL;
4258 			break;
4259 
4260 	}
4261 	return err;
4262 }
4263 
4264 /*
4265  *	Perform the SIOCxIFxxx calls, inside rtnl_lock()
4266  */
4267 static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4268 {
4269 	int err;
4270 	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
4271 	const struct net_device_ops *ops;
4272 
4273 	if (!dev)
4274 		return -ENODEV;
4275 
4276 	ops = dev->netdev_ops;
4277 
4278 	switch (cmd) {
4279 		case SIOCSIFFLAGS:	/* Set interface flags */
4280 			return dev_change_flags(dev, ifr->ifr_flags);
4281 
4282 		case SIOCSIFMETRIC:	/* Set the metric on the interface
4283 					   (currently unused) */
4284 			return -EOPNOTSUPP;
4285 
4286 		case SIOCSIFMTU:	/* Set the MTU of a device */
4287 			return dev_set_mtu(dev, ifr->ifr_mtu);
4288 
4289 		case SIOCSIFHWADDR:
4290 			return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
4291 
4292 		case SIOCSIFHWBROADCAST:
4293 			if (ifr->ifr_hwaddr.sa_family != dev->type)
4294 				return -EINVAL;
4295 			memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
4296 			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4297 			call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4298 			return 0;
4299 
4300 		case SIOCSIFMAP:
4301 			if (ops->ndo_set_config) {
4302 				if (!netif_device_present(dev))
4303 					return -ENODEV;
4304 				return ops->ndo_set_config(dev, &ifr->ifr_map);
4305 			}
4306 			return -EOPNOTSUPP;
4307 
4308 		case SIOCADDMULTI:
4309 			if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
4310 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4311 				return -EINVAL;
4312 			if (!netif_device_present(dev))
4313 				return -ENODEV;
4314 			return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
4315 					  dev->addr_len, 1);
4316 
4317 		case SIOCDELMULTI:
4318 			if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
4319 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4320 				return -EINVAL;
4321 			if (!netif_device_present(dev))
4322 				return -ENODEV;
4323 			return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
4324 					     dev->addr_len, 1);
4325 
4326 		case SIOCSIFTXQLEN:
4327 			if (ifr->ifr_qlen < 0)
4328 				return -EINVAL;
4329 			dev->tx_queue_len = ifr->ifr_qlen;
4330 			return 0;
4331 
4332 		case SIOCSIFNAME:
4333 			ifr->ifr_newname[IFNAMSIZ-1] = '\0';
4334 			return dev_change_name(dev, ifr->ifr_newname);
4335 
4336 		/*
4337 		 *	Unknown or private ioctl
4338 		 */
4339 
4340 		default:
4341 			if ((cmd >= SIOCDEVPRIVATE &&
4342 			    cmd <= SIOCDEVPRIVATE + 15) ||
4343 			    cmd == SIOCBONDENSLAVE ||
4344 			    cmd == SIOCBONDRELEASE ||
4345 			    cmd == SIOCBONDSETHWADDR ||
4346 			    cmd == SIOCBONDSLAVEINFOQUERY ||
4347 			    cmd == SIOCBONDINFOQUERY ||
4348 			    cmd == SIOCBONDCHANGEACTIVE ||
4349 			    cmd == SIOCGMIIPHY ||
4350 			    cmd == SIOCGMIIREG ||
4351 			    cmd == SIOCSMIIREG ||
4352 			    cmd == SIOCBRADDIF ||
4353 			    cmd == SIOCBRDELIF ||
4354 			    cmd == SIOCSHWTSTAMP ||
4355 			    cmd == SIOCWANDEV) {
4356 				err = -EOPNOTSUPP;
4357 				if (ops->ndo_do_ioctl) {
4358 					if (netif_device_present(dev))
4359 						err = ops->ndo_do_ioctl(dev, ifr, cmd);
4360 					else
4361 						err = -ENODEV;
4362 				}
4363 			} else
4364 				err = -EINVAL;
4365 
4366 	}
4367 	return err;
4368 }
4369 
4370 /*
4371  *	This function handles all "interface"-type I/O control requests. The actual
4372  *	'doing' part of this is dev_ifsioc above.
4373  */
4374 
4375 /**
4376  *	dev_ioctl	-	network device ioctl
4377  *	@net: the applicable net namespace
4378  *	@cmd: command to issue
4379  *	@arg: pointer to a struct ifreq in user space
4380  *
4381  *	Issue ioctl functions to devices. This is normally called by the
4382  *	user space syscall interfaces but can sometimes be useful for
4383  *	other purposes. The return value is the return from the syscall if
4384  *	positive or a negative errno code on error.
4385  */
4386 
4387 int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
4388 {
4389 	struct ifreq ifr;
4390 	int ret;
4391 	char *colon;
4392 
4393 	/* One special case: SIOCGIFCONF takes ifconf argument
4394 	   and requires shared lock, because it sleeps writing
4395 	   to user space.
4396 	 */
4397 
4398 	if (cmd == SIOCGIFCONF) {
4399 		rtnl_lock();
4400 		ret = dev_ifconf(net, (char __user *) arg);
4401 		rtnl_unlock();
4402 		return ret;
4403 	}
4404 	if (cmd == SIOCGIFNAME)
4405 		return dev_ifname(net, (struct ifreq __user *)arg);
4406 
4407 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
4408 		return -EFAULT;
4409 
4410 	ifr.ifr_name[IFNAMSIZ-1] = 0;
4411 
4412 	colon = strchr(ifr.ifr_name, ':');
4413 	if (colon)
4414 		*colon = 0;
4415 
4416 	/*
4417 	 *	See which interface the caller is talking about.
4418 	 */
4419 
4420 	switch (cmd) {
4421 		/*
4422 		 *	These ioctl calls:
4423 		 *	- can be done by all.
4424 		 *	- atomic and do not require locking.
4425 		 *	- return a value
4426 		 */
4427 		case SIOCGIFFLAGS:
4428 		case SIOCGIFMETRIC:
4429 		case SIOCGIFMTU:
4430 		case SIOCGIFHWADDR:
4431 		case SIOCGIFSLAVE:
4432 		case SIOCGIFMAP:
4433 		case SIOCGIFINDEX:
4434 		case SIOCGIFTXQLEN:
4435 			dev_load(net, ifr.ifr_name);
4436 			read_lock(&dev_base_lock);
4437 			ret = dev_ifsioc_locked(net, &ifr, cmd);
4438 			read_unlock(&dev_base_lock);
4439 			if (!ret) {
4440 				if (colon)
4441 					*colon = ':';
4442 				if (copy_to_user(arg, &ifr,
4443 						 sizeof(struct ifreq)))
4444 					ret = -EFAULT;
4445 			}
4446 			return ret;
4447 
4448 		case SIOCETHTOOL:
4449 			dev_load(net, ifr.ifr_name);
4450 			rtnl_lock();
4451 			ret = dev_ethtool(net, &ifr);
4452 			rtnl_unlock();
4453 			if (!ret) {
4454 				if (colon)
4455 					*colon = ':';
4456 				if (copy_to_user(arg, &ifr,
4457 						 sizeof(struct ifreq)))
4458 					ret = -EFAULT;
4459 			}
4460 			return ret;
4461 
4462 		/*
4463 		 *	These ioctl calls:
4464 		 *	- require superuser power.
4465 		 *	- require strict serialization.
4466 		 *	- return a value
4467 		 */
4468 		case SIOCGMIIPHY:
4469 		case SIOCGMIIREG:
4470 		case SIOCSIFNAME:
4471 			if (!capable(CAP_NET_ADMIN))
4472 				return -EPERM;
4473 			dev_load(net, ifr.ifr_name);
4474 			rtnl_lock();
4475 			ret = dev_ifsioc(net, &ifr, cmd);
4476 			rtnl_unlock();
4477 			if (!ret) {
4478 				if (colon)
4479 					*colon = ':';
4480 				if (copy_to_user(arg, &ifr,
4481 						 sizeof(struct ifreq)))
4482 					ret = -EFAULT;
4483 			}
4484 			return ret;
4485 
4486 		/*
4487 		 *	These ioctl calls:
4488 		 *	- require superuser power.
4489 		 *	- require strict serialization.
4490 		 *	- do not return a value
4491 		 */
4492 		case SIOCSIFFLAGS:
4493 		case SIOCSIFMETRIC:
4494 		case SIOCSIFMTU:
4495 		case SIOCSIFMAP:
4496 		case SIOCSIFHWADDR:
4497 		case SIOCSIFSLAVE:
4498 		case SIOCADDMULTI:
4499 		case SIOCDELMULTI:
4500 		case SIOCSIFHWBROADCAST:
4501 		case SIOCSIFTXQLEN:
4502 		case SIOCSMIIREG:
4503 		case SIOCBONDENSLAVE:
4504 		case SIOCBONDRELEASE:
4505 		case SIOCBONDSETHWADDR:
4506 		case SIOCBONDCHANGEACTIVE:
4507 		case SIOCBRADDIF:
4508 		case SIOCBRDELIF:
4509 		case SIOCSHWTSTAMP:
4510 			if (!capable(CAP_NET_ADMIN))
4511 				return -EPERM;
4512 			/* fall through */
4513 		case SIOCBONDSLAVEINFOQUERY:
4514 		case SIOCBONDINFOQUERY:
4515 			dev_load(net, ifr.ifr_name);
4516 			rtnl_lock();
4517 			ret = dev_ifsioc(net, &ifr, cmd);
4518 			rtnl_unlock();
4519 			return ret;
4520 
4521 		case SIOCGIFMEM:
4522 			/* Get the per device memory space. We can add this but
4523 			 * currently do not support it */
4524 		case SIOCSIFMEM:
4525 			/* Set the per device memory buffer space.
4526 			 * Not applicable in our case */
4527 		case SIOCSIFLINK:
4528 			return -EINVAL;
4529 
4530 		/*
4531 		 *	Unknown or private ioctl.
4532 		 */
4533 		default:
4534 			if (cmd == SIOCWANDEV ||
4535 			    (cmd >= SIOCDEVPRIVATE &&
4536 			     cmd <= SIOCDEVPRIVATE + 15)) {
4537 				dev_load(net, ifr.ifr_name);
4538 				rtnl_lock();
4539 				ret = dev_ifsioc(net, &ifr, cmd);
4540 				rtnl_unlock();
4541 				if (!ret && copy_to_user(arg, &ifr,
4542 							 sizeof(struct ifreq)))
4543 					ret = -EFAULT;
4544 				return ret;
4545 			}
4546 			/* Take care of Wireless Extensions */
4547 			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
4548 				return wext_handle_ioctl(net, &ifr, cmd, arg);
4549 			return -EINVAL;
4550 	}
4551 }
4552 
4553 
4554 /**
4555  *	dev_new_index	-	allocate an ifindex
4556  *	@net: the applicable net namespace
4557  *
4558  *	Returns a suitable unique value for a new device interface
4559  *	number.  The caller must hold the rtnl semaphore or the
4560  *	dev_base_lock to be sure it remains unique.
4561  */
4562 static int dev_new_index(struct net *net)
4563 {
4564 	static int ifindex;
4565 	for (;;) {
4566 		if (++ifindex <= 0)
4567 			ifindex = 1;
4568 		if (!__dev_get_by_index(net, ifindex))
4569 			return ifindex;
4570 	}
4571 }
4572 
4573 /* Delayed registration/unregisteration */
4574 static LIST_HEAD(net_todo_list);
4575 
4576 static void net_set_todo(struct net_device *dev)
4577 {
4578 	list_add_tail(&dev->todo_list, &net_todo_list);
4579 }
4580 
4581 static void rollback_registered(struct net_device *dev)
4582 {
4583 	BUG_ON(dev_boot_phase);
4584 	ASSERT_RTNL();
4585 
4586 	/* Some devices call without registering for initialization unwind. */
4587 	if (dev->reg_state == NETREG_UNINITIALIZED) {
4588 		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
4589 				  "was registered\n", dev->name, dev);
4590 
4591 		WARN_ON(1);
4592 		return;
4593 	}
4594 
4595 	BUG_ON(dev->reg_state != NETREG_REGISTERED);
4596 
4597 	/* If device is running, close it first. */
4598 	dev_close(dev);
4599 
4600 	/* And unlink it from device chain. */
4601 	unlist_netdevice(dev);
4602 
4603 	dev->reg_state = NETREG_UNREGISTERING;
4604 
4605 	synchronize_net();
4606 
4607 	/* Shutdown queueing discipline. */
4608 	dev_shutdown(dev);
4609 
4610 
4611 	/* Notify protocols, that we are about to destroy
4612 	   this device. They should clean all the things.
4613 	*/
4614 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4615 
4616 	/*
4617 	 *	Flush the unicast and multicast chains
4618 	 */
4619 	dev_unicast_flush(dev);
4620 	dev_addr_discard(dev);
4621 
4622 	if (dev->netdev_ops->ndo_uninit)
4623 		dev->netdev_ops->ndo_uninit(dev);
4624 
4625 	/* Notifier chain MUST detach us from master device. */
4626 	WARN_ON(dev->master);
4627 
4628 	/* Remove entries from kobject tree */
4629 	netdev_unregister_kobject(dev);
4630 
4631 	synchronize_net();
4632 
4633 	dev_put(dev);
4634 }
4635 
4636 static void __netdev_init_queue_locks_one(struct net_device *dev,
4637 					  struct netdev_queue *dev_queue,
4638 					  void *_unused)
4639 {
4640 	spin_lock_init(&dev_queue->_xmit_lock);
4641 	netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
4642 	dev_queue->xmit_lock_owner = -1;
4643 }
4644 
4645 static void netdev_init_queue_locks(struct net_device *dev)
4646 {
4647 	netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
4648 	__netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
4649 }
4650 
4651 unsigned long netdev_fix_features(unsigned long features, const char *name)
4652 {
4653 	/* Fix illegal SG+CSUM combinations. */
4654 	if ((features & NETIF_F_SG) &&
4655 	    !(features & NETIF_F_ALL_CSUM)) {
4656 		if (name)
4657 			printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
4658 			       "checksum feature.\n", name);
4659 		features &= ~NETIF_F_SG;
4660 	}
4661 
4662 	/* TSO requires that SG is present as well. */
4663 	if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
4664 		if (name)
4665 			printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
4666 			       "SG feature.\n", name);
4667 		features &= ~NETIF_F_TSO;
4668 	}
4669 
4670 	if (features & NETIF_F_UFO) {
4671 		if (!(features & NETIF_F_GEN_CSUM)) {
4672 			if (name)
4673 				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4674 				       "since no NETIF_F_HW_CSUM feature.\n",
4675 				       name);
4676 			features &= ~NETIF_F_UFO;
4677 		}
4678 
4679 		if (!(features & NETIF_F_SG)) {
4680 			if (name)
4681 				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4682 				       "since no NETIF_F_SG feature.\n", name);
4683 			features &= ~NETIF_F_UFO;
4684 		}
4685 	}
4686 
4687 	return features;
4688 }
4689 EXPORT_SYMBOL(netdev_fix_features);
4690 
4691 /**
4692  *	register_netdevice	- register a network device
4693  *	@dev: device to register
4694  *
4695  *	Take a completed network device structure and add it to the kernel
4696  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4697  *	chain. 0 is returned on success. A negative errno code is returned
4698  *	on a failure to set up the device, or if the name is a duplicate.
4699  *
4700  *	Callers must hold the rtnl semaphore. You may want
4701  *	register_netdev() instead of this.
4702  *
4703  *	BUGS:
4704  *	The locking appears insufficient to guarantee two parallel registers
4705  *	will not get the same name.
4706  */
4707 
4708 int register_netdevice(struct net_device *dev)
4709 {
4710 	struct hlist_head *head;
4711 	struct hlist_node *p;
4712 	int ret;
4713 	struct net *net = dev_net(dev);
4714 
4715 	BUG_ON(dev_boot_phase);
4716 	ASSERT_RTNL();
4717 
4718 	might_sleep();
4719 
4720 	/* When net_device's are persistent, this will be fatal. */
4721 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
4722 	BUG_ON(!net);
4723 
4724 	spin_lock_init(&dev->addr_list_lock);
4725 	netdev_set_addr_lockdep_class(dev);
4726 	netdev_init_queue_locks(dev);
4727 
4728 	dev->iflink = -1;
4729 
4730 	/* Init, if this function is available */
4731 	if (dev->netdev_ops->ndo_init) {
4732 		ret = dev->netdev_ops->ndo_init(dev);
4733 		if (ret) {
4734 			if (ret > 0)
4735 				ret = -EIO;
4736 			goto out;
4737 		}
4738 	}
4739 
4740 	if (!dev_valid_name(dev->name)) {
4741 		ret = -EINVAL;
4742 		goto err_uninit;
4743 	}
4744 
4745 	dev->ifindex = dev_new_index(net);
4746 	if (dev->iflink == -1)
4747 		dev->iflink = dev->ifindex;
4748 
4749 	/* Check for existence of name */
4750 	head = dev_name_hash(net, dev->name);
4751 	hlist_for_each(p, head) {
4752 		struct net_device *d
4753 			= hlist_entry(p, struct net_device, name_hlist);
4754 		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
4755 			ret = -EEXIST;
4756 			goto err_uninit;
4757 		}
4758 	}
4759 
4760 	/* Fix illegal checksum combinations */
4761 	if ((dev->features & NETIF_F_HW_CSUM) &&
4762 	    (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4763 		printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
4764 		       dev->name);
4765 		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
4766 	}
4767 
4768 	if ((dev->features & NETIF_F_NO_CSUM) &&
4769 	    (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4770 		printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
4771 		       dev->name);
4772 		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
4773 	}
4774 
4775 	dev->features = netdev_fix_features(dev->features, dev->name);
4776 
4777 	/* Enable software GSO if SG is supported. */
4778 	if (dev->features & NETIF_F_SG)
4779 		dev->features |= NETIF_F_GSO;
4780 
4781 	netdev_initialize_kobject(dev);
4782 	ret = netdev_register_kobject(dev);
4783 	if (ret)
4784 		goto err_uninit;
4785 	dev->reg_state = NETREG_REGISTERED;
4786 
4787 	/*
4788 	 *	Default initial state at registry is that the
4789 	 *	device is present.
4790 	 */
4791 
4792 	set_bit(__LINK_STATE_PRESENT, &dev->state);
4793 
4794 	dev_init_scheduler(dev);
4795 	dev_hold(dev);
4796 	list_netdevice(dev);
4797 
4798 	/* Notify protocols, that a new device appeared. */
4799 	ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
4800 	ret = notifier_to_errno(ret);
4801 	if (ret) {
4802 		rollback_registered(dev);
4803 		dev->reg_state = NETREG_UNREGISTERED;
4804 	}
4805 
4806 out:
4807 	return ret;
4808 
4809 err_uninit:
4810 	if (dev->netdev_ops->ndo_uninit)
4811 		dev->netdev_ops->ndo_uninit(dev);
4812 	goto out;
4813 }
4814 
4815 /**
4816  *	init_dummy_netdev	- init a dummy network device for NAPI
4817  *	@dev: device to init
4818  *
4819  *	This takes a network device structure and initialize the minimum
4820  *	amount of fields so it can be used to schedule NAPI polls without
4821  *	registering a full blown interface. This is to be used by drivers
4822  *	that need to tie several hardware interfaces to a single NAPI
4823  *	poll scheduler due to HW limitations.
4824  */
4825 int init_dummy_netdev(struct net_device *dev)
4826 {
4827 	/* Clear everything. Note we don't initialize spinlocks
4828 	 * are they aren't supposed to be taken by any of the
4829 	 * NAPI code and this dummy netdev is supposed to be
4830 	 * only ever used for NAPI polls
4831 	 */
4832 	memset(dev, 0, sizeof(struct net_device));
4833 
4834 	/* make sure we BUG if trying to hit standard
4835 	 * register/unregister code path
4836 	 */
4837 	dev->reg_state = NETREG_DUMMY;
4838 
4839 	/* initialize the ref count */
4840 	atomic_set(&dev->refcnt, 1);
4841 
4842 	/* NAPI wants this */
4843 	INIT_LIST_HEAD(&dev->napi_list);
4844 
4845 	/* a dummy interface is started by default */
4846 	set_bit(__LINK_STATE_PRESENT, &dev->state);
4847 	set_bit(__LINK_STATE_START, &dev->state);
4848 
4849 	return 0;
4850 }
4851 EXPORT_SYMBOL_GPL(init_dummy_netdev);
4852 
4853 
4854 /**
4855  *	register_netdev	- register a network device
4856  *	@dev: device to register
4857  *
4858  *	Take a completed network device structure and add it to the kernel
4859  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4860  *	chain. 0 is returned on success. A negative errno code is returned
4861  *	on a failure to set up the device, or if the name is a duplicate.
4862  *
4863  *	This is a wrapper around register_netdevice that takes the rtnl semaphore
4864  *	and expands the device name if you passed a format string to
4865  *	alloc_netdev.
4866  */
4867 int register_netdev(struct net_device *dev)
4868 {
4869 	int err;
4870 
4871 	rtnl_lock();
4872 
4873 	/*
4874 	 * If the name is a format string the caller wants us to do a
4875 	 * name allocation.
4876 	 */
4877 	if (strchr(dev->name, '%')) {
4878 		err = dev_alloc_name(dev, dev->name);
4879 		if (err < 0)
4880 			goto out;
4881 	}
4882 
4883 	err = register_netdevice(dev);
4884 out:
4885 	rtnl_unlock();
4886 	return err;
4887 }
4888 EXPORT_SYMBOL(register_netdev);
4889 
4890 /*
4891  * netdev_wait_allrefs - wait until all references are gone.
4892  *
4893  * This is called when unregistering network devices.
4894  *
4895  * Any protocol or device that holds a reference should register
4896  * for netdevice notification, and cleanup and put back the
4897  * reference if they receive an UNREGISTER event.
4898  * We can get stuck here if buggy protocols don't correctly
4899  * call dev_put.
4900  */
4901 static void netdev_wait_allrefs(struct net_device *dev)
4902 {
4903 	unsigned long rebroadcast_time, warning_time;
4904 
4905 	rebroadcast_time = warning_time = jiffies;
4906 	while (atomic_read(&dev->refcnt) != 0) {
4907 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
4908 			rtnl_lock();
4909 
4910 			/* Rebroadcast unregister notification */
4911 			call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4912 
4913 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
4914 				     &dev->state)) {
4915 				/* We must not have linkwatch events
4916 				 * pending on unregister. If this
4917 				 * happens, we simply run the queue
4918 				 * unscheduled, resulting in a noop
4919 				 * for this device.
4920 				 */
4921 				linkwatch_run_queue();
4922 			}
4923 
4924 			__rtnl_unlock();
4925 
4926 			rebroadcast_time = jiffies;
4927 		}
4928 
4929 		msleep(250);
4930 
4931 		if (time_after(jiffies, warning_time + 10 * HZ)) {
4932 			printk(KERN_EMERG "unregister_netdevice: "
4933 			       "waiting for %s to become free. Usage "
4934 			       "count = %d\n",
4935 			       dev->name, atomic_read(&dev->refcnt));
4936 			warning_time = jiffies;
4937 		}
4938 	}
4939 }
4940 
4941 /* The sequence is:
4942  *
4943  *	rtnl_lock();
4944  *	...
4945  *	register_netdevice(x1);
4946  *	register_netdevice(x2);
4947  *	...
4948  *	unregister_netdevice(y1);
4949  *	unregister_netdevice(y2);
4950  *      ...
4951  *	rtnl_unlock();
4952  *	free_netdev(y1);
4953  *	free_netdev(y2);
4954  *
4955  * We are invoked by rtnl_unlock().
4956  * This allows us to deal with problems:
4957  * 1) We can delete sysfs objects which invoke hotplug
4958  *    without deadlocking with linkwatch via keventd.
4959  * 2) Since we run with the RTNL semaphore not held, we can sleep
4960  *    safely in order to wait for the netdev refcnt to drop to zero.
4961  *
4962  * We must not return until all unregister events added during
4963  * the interval the lock was held have been completed.
4964  */
4965 void netdev_run_todo(void)
4966 {
4967 	struct list_head list;
4968 
4969 	/* Snapshot list, allow later requests */
4970 	list_replace_init(&net_todo_list, &list);
4971 
4972 	__rtnl_unlock();
4973 
4974 	while (!list_empty(&list)) {
4975 		struct net_device *dev
4976 			= list_entry(list.next, struct net_device, todo_list);
4977 		list_del(&dev->todo_list);
4978 
4979 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
4980 			printk(KERN_ERR "network todo '%s' but state %d\n",
4981 			       dev->name, dev->reg_state);
4982 			dump_stack();
4983 			continue;
4984 		}
4985 
4986 		dev->reg_state = NETREG_UNREGISTERED;
4987 
4988 		on_each_cpu(flush_backlog, dev, 1);
4989 
4990 		netdev_wait_allrefs(dev);
4991 
4992 		/* paranoia */
4993 		BUG_ON(atomic_read(&dev->refcnt));
4994 		WARN_ON(dev->ip_ptr);
4995 		WARN_ON(dev->ip6_ptr);
4996 		WARN_ON(dev->dn_ptr);
4997 
4998 		if (dev->destructor)
4999 			dev->destructor(dev);
5000 
5001 		/* Free network device */
5002 		kobject_put(&dev->dev.kobj);
5003 	}
5004 }
5005 
5006 /**
5007  *	dev_get_stats	- get network device statistics
5008  *	@dev: device to get statistics from
5009  *
5010  *	Get network statistics from device. The device driver may provide
5011  *	its own method by setting dev->netdev_ops->get_stats; otherwise
5012  *	the internal statistics structure is used.
5013  */
5014 const struct net_device_stats *dev_get_stats(struct net_device *dev)
5015 {
5016 	const struct net_device_ops *ops = dev->netdev_ops;
5017 
5018 	if (ops->ndo_get_stats)
5019 		return ops->ndo_get_stats(dev);
5020 	else {
5021 		unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
5022 		struct net_device_stats *stats = &dev->stats;
5023 		unsigned int i;
5024 		struct netdev_queue *txq;
5025 
5026 		for (i = 0; i < dev->num_tx_queues; i++) {
5027 			txq = netdev_get_tx_queue(dev, i);
5028 			tx_bytes   += txq->tx_bytes;
5029 			tx_packets += txq->tx_packets;
5030 			tx_dropped += txq->tx_dropped;
5031 		}
5032 		if (tx_bytes || tx_packets || tx_dropped) {
5033 			stats->tx_bytes   = tx_bytes;
5034 			stats->tx_packets = tx_packets;
5035 			stats->tx_dropped = tx_dropped;
5036 		}
5037 		return stats;
5038 	}
5039 }
5040 EXPORT_SYMBOL(dev_get_stats);
5041 
5042 static void netdev_init_one_queue(struct net_device *dev,
5043 				  struct netdev_queue *queue,
5044 				  void *_unused)
5045 {
5046 	queue->dev = dev;
5047 }
5048 
5049 static void netdev_init_queues(struct net_device *dev)
5050 {
5051 	netdev_init_one_queue(dev, &dev->rx_queue, NULL);
5052 	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
5053 	spin_lock_init(&dev->tx_global_lock);
5054 }
5055 
5056 /**
5057  *	alloc_netdev_mq - allocate network device
5058  *	@sizeof_priv:	size of private data to allocate space for
5059  *	@name:		device name format string
5060  *	@setup:		callback to initialize device
5061  *	@queue_count:	the number of subqueues to allocate
5062  *
5063  *	Allocates a struct net_device with private data area for driver use
5064  *	and performs basic initialization.  Also allocates subquue structs
5065  *	for each queue on the device at the end of the netdevice.
5066  */
5067 struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5068 		void (*setup)(struct net_device *), unsigned int queue_count)
5069 {
5070 	struct netdev_queue *tx;
5071 	struct net_device *dev;
5072 	size_t alloc_size;
5073 	struct net_device *p;
5074 
5075 	BUG_ON(strlen(name) >= sizeof(dev->name));
5076 
5077 	alloc_size = sizeof(struct net_device);
5078 	if (sizeof_priv) {
5079 		/* ensure 32-byte alignment of private area */
5080 		alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
5081 		alloc_size += sizeof_priv;
5082 	}
5083 	/* ensure 32-byte alignment of whole construct */
5084 	alloc_size += NETDEV_ALIGN - 1;
5085 
5086 	p = kzalloc(alloc_size, GFP_KERNEL);
5087 	if (!p) {
5088 		printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
5089 		return NULL;
5090 	}
5091 
5092 	tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
5093 	if (!tx) {
5094 		printk(KERN_ERR "alloc_netdev: Unable to allocate "
5095 		       "tx qdiscs.\n");
5096 		goto free_p;
5097 	}
5098 
5099 	dev = PTR_ALIGN(p, NETDEV_ALIGN);
5100 	dev->padded = (char *)dev - (char *)p;
5101 
5102 	if (dev_addr_init(dev))
5103 		goto free_tx;
5104 
5105 	dev_unicast_init(dev);
5106 
5107 	dev_net_set(dev, &init_net);
5108 
5109 	dev->_tx = tx;
5110 	dev->num_tx_queues = queue_count;
5111 	dev->real_num_tx_queues = queue_count;
5112 
5113 	dev->gso_max_size = GSO_MAX_SIZE;
5114 
5115 	netdev_init_queues(dev);
5116 
5117 	INIT_LIST_HEAD(&dev->napi_list);
5118 	dev->priv_flags = IFF_XMIT_DST_RELEASE;
5119 	setup(dev);
5120 	strcpy(dev->name, name);
5121 	return dev;
5122 
5123 free_tx:
5124 	kfree(tx);
5125 
5126 free_p:
5127 	kfree(p);
5128 	return NULL;
5129 }
5130 EXPORT_SYMBOL(alloc_netdev_mq);
5131 
5132 /**
5133  *	free_netdev - free network device
5134  *	@dev: device
5135  *
5136  *	This function does the last stage of destroying an allocated device
5137  * 	interface. The reference to the device object is released.
5138  *	If this is the last reference then it will be freed.
5139  */
5140 void free_netdev(struct net_device *dev)
5141 {
5142 	struct napi_struct *p, *n;
5143 
5144 	release_net(dev_net(dev));
5145 
5146 	kfree(dev->_tx);
5147 
5148 	/* Flush device addresses */
5149 	dev_addr_flush(dev);
5150 
5151 	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
5152 		netif_napi_del(p);
5153 
5154 	/*  Compatibility with error handling in drivers */
5155 	if (dev->reg_state == NETREG_UNINITIALIZED) {
5156 		kfree((char *)dev - dev->padded);
5157 		return;
5158 	}
5159 
5160 	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
5161 	dev->reg_state = NETREG_RELEASED;
5162 
5163 	/* will free via device release */
5164 	put_device(&dev->dev);
5165 }
5166 
5167 /**
5168  *	synchronize_net -  Synchronize with packet receive processing
5169  *
5170  *	Wait for packets currently being received to be done.
5171  *	Does not block later packets from starting.
5172  */
5173 void synchronize_net(void)
5174 {
5175 	might_sleep();
5176 	synchronize_rcu();
5177 }
5178 
5179 /**
5180  *	unregister_netdevice - remove device from the kernel
5181  *	@dev: device
5182  *
5183  *	This function shuts down a device interface and removes it
5184  *	from the kernel tables.
5185  *
5186  *	Callers must hold the rtnl semaphore.  You may want
5187  *	unregister_netdev() instead of this.
5188  */
5189 
5190 void unregister_netdevice(struct net_device *dev)
5191 {
5192 	ASSERT_RTNL();
5193 
5194 	rollback_registered(dev);
5195 	/* Finish processing unregister after unlock */
5196 	net_set_todo(dev);
5197 }
5198 
5199 /**
5200  *	unregister_netdev - remove device from the kernel
5201  *	@dev: device
5202  *
5203  *	This function shuts down a device interface and removes it
5204  *	from the kernel tables.
5205  *
5206  *	This is just a wrapper for unregister_netdevice that takes
5207  *	the rtnl semaphore.  In general you want to use this and not
5208  *	unregister_netdevice.
5209  */
5210 void unregister_netdev(struct net_device *dev)
5211 {
5212 	rtnl_lock();
5213 	unregister_netdevice(dev);
5214 	rtnl_unlock();
5215 }
5216 
5217 EXPORT_SYMBOL(unregister_netdev);
5218 
5219 /**
5220  *	dev_change_net_namespace - move device to different nethost namespace
5221  *	@dev: device
5222  *	@net: network namespace
5223  *	@pat: If not NULL name pattern to try if the current device name
5224  *	      is already taken in the destination network namespace.
5225  *
5226  *	This function shuts down a device interface and moves it
5227  *	to a new network namespace. On success 0 is returned, on
5228  *	a failure a netagive errno code is returned.
5229  *
5230  *	Callers must hold the rtnl semaphore.
5231  */
5232 
5233 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
5234 {
5235 	char buf[IFNAMSIZ];
5236 	const char *destname;
5237 	int err;
5238 
5239 	ASSERT_RTNL();
5240 
5241 	/* Don't allow namespace local devices to be moved. */
5242 	err = -EINVAL;
5243 	if (dev->features & NETIF_F_NETNS_LOCAL)
5244 		goto out;
5245 
5246 #ifdef CONFIG_SYSFS
5247 	/* Don't allow real devices to be moved when sysfs
5248 	 * is enabled.
5249 	 */
5250 	err = -EINVAL;
5251 	if (dev->dev.parent)
5252 		goto out;
5253 #endif
5254 
5255 	/* Ensure the device has been registrered */
5256 	err = -EINVAL;
5257 	if (dev->reg_state != NETREG_REGISTERED)
5258 		goto out;
5259 
5260 	/* Get out if there is nothing todo */
5261 	err = 0;
5262 	if (net_eq(dev_net(dev), net))
5263 		goto out;
5264 
5265 	/* Pick the destination device name, and ensure
5266 	 * we can use it in the destination network namespace.
5267 	 */
5268 	err = -EEXIST;
5269 	destname = dev->name;
5270 	if (__dev_get_by_name(net, destname)) {
5271 		/* We get here if we can't use the current device name */
5272 		if (!pat)
5273 			goto out;
5274 		if (!dev_valid_name(pat))
5275 			goto out;
5276 		if (strchr(pat, '%')) {
5277 			if (__dev_alloc_name(net, pat, buf) < 0)
5278 				goto out;
5279 			destname = buf;
5280 		} else
5281 			destname = pat;
5282 		if (__dev_get_by_name(net, destname))
5283 			goto out;
5284 	}
5285 
5286 	/*
5287 	 * And now a mini version of register_netdevice unregister_netdevice.
5288 	 */
5289 
5290 	/* If device is running close it first. */
5291 	dev_close(dev);
5292 
5293 	/* And unlink it from device chain */
5294 	err = -ENODEV;
5295 	unlist_netdevice(dev);
5296 
5297 	synchronize_net();
5298 
5299 	/* Shutdown queueing discipline. */
5300 	dev_shutdown(dev);
5301 
5302 	/* Notify protocols, that we are about to destroy
5303 	   this device. They should clean all the things.
5304 	*/
5305 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5306 
5307 	/*
5308 	 *	Flush the unicast and multicast chains
5309 	 */
5310 	dev_unicast_flush(dev);
5311 	dev_addr_discard(dev);
5312 
5313 	netdev_unregister_kobject(dev);
5314 
5315 	/* Actually switch the network namespace */
5316 	dev_net_set(dev, net);
5317 
5318 	/* Assign the new device name */
5319 	if (destname != dev->name)
5320 		strcpy(dev->name, destname);
5321 
5322 	/* If there is an ifindex conflict assign a new one */
5323 	if (__dev_get_by_index(net, dev->ifindex)) {
5324 		int iflink = (dev->iflink == dev->ifindex);
5325 		dev->ifindex = dev_new_index(net);
5326 		if (iflink)
5327 			dev->iflink = dev->ifindex;
5328 	}
5329 
5330 	/* Fixup kobjects */
5331 	err = netdev_register_kobject(dev);
5332 	WARN_ON(err);
5333 
5334 	/* Add the device back in the hashes */
5335 	list_netdevice(dev);
5336 
5337 	/* Notify protocols, that a new device appeared. */
5338 	call_netdevice_notifiers(NETDEV_REGISTER, dev);
5339 
5340 	synchronize_net();
5341 	err = 0;
5342 out:
5343 	return err;
5344 }
5345 
5346 static int dev_cpu_callback(struct notifier_block *nfb,
5347 			    unsigned long action,
5348 			    void *ocpu)
5349 {
5350 	struct sk_buff **list_skb;
5351 	struct Qdisc **list_net;
5352 	struct sk_buff *skb;
5353 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
5354 	struct softnet_data *sd, *oldsd;
5355 
5356 	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
5357 		return NOTIFY_OK;
5358 
5359 	local_irq_disable();
5360 	cpu = smp_processor_id();
5361 	sd = &per_cpu(softnet_data, cpu);
5362 	oldsd = &per_cpu(softnet_data, oldcpu);
5363 
5364 	/* Find end of our completion_queue. */
5365 	list_skb = &sd->completion_queue;
5366 	while (*list_skb)
5367 		list_skb = &(*list_skb)->next;
5368 	/* Append completion queue from offline CPU. */
5369 	*list_skb = oldsd->completion_queue;
5370 	oldsd->completion_queue = NULL;
5371 
5372 	/* Find end of our output_queue. */
5373 	list_net = &sd->output_queue;
5374 	while (*list_net)
5375 		list_net = &(*list_net)->next_sched;
5376 	/* Append output queue from offline CPU. */
5377 	*list_net = oldsd->output_queue;
5378 	oldsd->output_queue = NULL;
5379 
5380 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
5381 	local_irq_enable();
5382 
5383 	/* Process offline CPU's input_pkt_queue */
5384 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
5385 		netif_rx(skb);
5386 
5387 	return NOTIFY_OK;
5388 }
5389 
5390 
5391 /**
5392  *	netdev_increment_features - increment feature set by one
5393  *	@all: current feature set
5394  *	@one: new feature set
5395  *	@mask: mask feature set
5396  *
5397  *	Computes a new feature set after adding a device with feature set
5398  *	@one to the master device with current feature set @all.  Will not
5399  *	enable anything that is off in @mask. Returns the new feature set.
5400  */
5401 unsigned long netdev_increment_features(unsigned long all, unsigned long one,
5402 					unsigned long mask)
5403 {
5404 	/* If device needs checksumming, downgrade to it. */
5405         if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
5406 		all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
5407 	else if (mask & NETIF_F_ALL_CSUM) {
5408 		/* If one device supports v4/v6 checksumming, set for all. */
5409 		if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
5410 		    !(all & NETIF_F_GEN_CSUM)) {
5411 			all &= ~NETIF_F_ALL_CSUM;
5412 			all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
5413 		}
5414 
5415 		/* If one device supports hw checksumming, set for all. */
5416 		if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
5417 			all &= ~NETIF_F_ALL_CSUM;
5418 			all |= NETIF_F_HW_CSUM;
5419 		}
5420 	}
5421 
5422 	one |= NETIF_F_ALL_CSUM;
5423 
5424 	one |= all & NETIF_F_ONE_FOR_ALL;
5425 	all &= one | NETIF_F_LLTX | NETIF_F_GSO;
5426 	all |= one & mask & NETIF_F_ONE_FOR_ALL;
5427 
5428 	return all;
5429 }
5430 EXPORT_SYMBOL(netdev_increment_features);
5431 
5432 static struct hlist_head *netdev_create_hash(void)
5433 {
5434 	int i;
5435 	struct hlist_head *hash;
5436 
5437 	hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
5438 	if (hash != NULL)
5439 		for (i = 0; i < NETDEV_HASHENTRIES; i++)
5440 			INIT_HLIST_HEAD(&hash[i]);
5441 
5442 	return hash;
5443 }
5444 
5445 /* Initialize per network namespace state */
5446 static int __net_init netdev_init(struct net *net)
5447 {
5448 	INIT_LIST_HEAD(&net->dev_base_head);
5449 
5450 	net->dev_name_head = netdev_create_hash();
5451 	if (net->dev_name_head == NULL)
5452 		goto err_name;
5453 
5454 	net->dev_index_head = netdev_create_hash();
5455 	if (net->dev_index_head == NULL)
5456 		goto err_idx;
5457 
5458 	return 0;
5459 
5460 err_idx:
5461 	kfree(net->dev_name_head);
5462 err_name:
5463 	return -ENOMEM;
5464 }
5465 
5466 /**
5467  *	netdev_drivername - network driver for the device
5468  *	@dev: network device
5469  *	@buffer: buffer for resulting name
5470  *	@len: size of buffer
5471  *
5472  *	Determine network driver for device.
5473  */
5474 char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
5475 {
5476 	const struct device_driver *driver;
5477 	const struct device *parent;
5478 
5479 	if (len <= 0 || !buffer)
5480 		return buffer;
5481 	buffer[0] = 0;
5482 
5483 	parent = dev->dev.parent;
5484 
5485 	if (!parent)
5486 		return buffer;
5487 
5488 	driver = parent->driver;
5489 	if (driver && driver->name)
5490 		strlcpy(buffer, driver->name, len);
5491 	return buffer;
5492 }
5493 
5494 static void __net_exit netdev_exit(struct net *net)
5495 {
5496 	kfree(net->dev_name_head);
5497 	kfree(net->dev_index_head);
5498 }
5499 
5500 static struct pernet_operations __net_initdata netdev_net_ops = {
5501 	.init = netdev_init,
5502 	.exit = netdev_exit,
5503 };
5504 
5505 static void __net_exit default_device_exit(struct net *net)
5506 {
5507 	struct net_device *dev;
5508 	/*
5509 	 * Push all migratable of the network devices back to the
5510 	 * initial network namespace
5511 	 */
5512 	rtnl_lock();
5513 restart:
5514 	for_each_netdev(net, dev) {
5515 		int err;
5516 		char fb_name[IFNAMSIZ];
5517 
5518 		/* Ignore unmoveable devices (i.e. loopback) */
5519 		if (dev->features & NETIF_F_NETNS_LOCAL)
5520 			continue;
5521 
5522 		/* Delete virtual devices */
5523 		if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) {
5524 			dev->rtnl_link_ops->dellink(dev);
5525 			goto restart;
5526 		}
5527 
5528 		/* Push remaing network devices to init_net */
5529 		snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
5530 		err = dev_change_net_namespace(dev, &init_net, fb_name);
5531 		if (err) {
5532 			printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
5533 				__func__, dev->name, err);
5534 			BUG();
5535 		}
5536 		goto restart;
5537 	}
5538 	rtnl_unlock();
5539 }
5540 
5541 static struct pernet_operations __net_initdata default_device_ops = {
5542 	.exit = default_device_exit,
5543 };
5544 
5545 /*
5546  *	Initialize the DEV module. At boot time this walks the device list and
5547  *	unhooks any devices that fail to initialise (normally hardware not
5548  *	present) and leaves us with a valid list of present and active devices.
5549  *
5550  */
5551 
5552 /*
5553  *       This is called single threaded during boot, so no need
5554  *       to take the rtnl semaphore.
5555  */
5556 static int __init net_dev_init(void)
5557 {
5558 	int i, rc = -ENOMEM;
5559 
5560 	BUG_ON(!dev_boot_phase);
5561 
5562 	if (dev_proc_init())
5563 		goto out;
5564 
5565 	if (netdev_kobject_init())
5566 		goto out;
5567 
5568 	INIT_LIST_HEAD(&ptype_all);
5569 	for (i = 0; i < PTYPE_HASH_SIZE; i++)
5570 		INIT_LIST_HEAD(&ptype_base[i]);
5571 
5572 	if (register_pernet_subsys(&netdev_net_ops))
5573 		goto out;
5574 
5575 	/*
5576 	 *	Initialise the packet receive queues.
5577 	 */
5578 
5579 	for_each_possible_cpu(i) {
5580 		struct softnet_data *queue;
5581 
5582 		queue = &per_cpu(softnet_data, i);
5583 		skb_queue_head_init(&queue->input_pkt_queue);
5584 		queue->completion_queue = NULL;
5585 		INIT_LIST_HEAD(&queue->poll_list);
5586 
5587 		queue->backlog.poll = process_backlog;
5588 		queue->backlog.weight = weight_p;
5589 		queue->backlog.gro_list = NULL;
5590 		queue->backlog.gro_count = 0;
5591 	}
5592 
5593 	dev_boot_phase = 0;
5594 
5595 	/* The loopback device is special if any other network devices
5596 	 * is present in a network namespace the loopback device must
5597 	 * be present. Since we now dynamically allocate and free the
5598 	 * loopback device ensure this invariant is maintained by
5599 	 * keeping the loopback device as the first device on the
5600 	 * list of network devices.  Ensuring the loopback devices
5601 	 * is the first device that appears and the last network device
5602 	 * that disappears.
5603 	 */
5604 	if (register_pernet_device(&loopback_net_ops))
5605 		goto out;
5606 
5607 	if (register_pernet_device(&default_device_ops))
5608 		goto out;
5609 
5610 	open_softirq(NET_TX_SOFTIRQ, net_tx_action);
5611 	open_softirq(NET_RX_SOFTIRQ, net_rx_action);
5612 
5613 	hotcpu_notifier(dev_cpu_callback, 0);
5614 	dst_init();
5615 	dev_mcast_init();
5616 	rc = 0;
5617 out:
5618 	return rc;
5619 }
5620 
5621 subsys_initcall(net_dev_init);
5622 
5623 static int __init initialize_hashrnd(void)
5624 {
5625 	get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd));
5626 	return 0;
5627 }
5628 
5629 late_initcall_sync(initialize_hashrnd);
5630 
5631 EXPORT_SYMBOL(__dev_get_by_index);
5632 EXPORT_SYMBOL(__dev_get_by_name);
5633 EXPORT_SYMBOL(__dev_remove_pack);
5634 EXPORT_SYMBOL(dev_valid_name);
5635 EXPORT_SYMBOL(dev_add_pack);
5636 EXPORT_SYMBOL(dev_alloc_name);
5637 EXPORT_SYMBOL(dev_close);
5638 EXPORT_SYMBOL(dev_get_by_flags);
5639 EXPORT_SYMBOL(dev_get_by_index);
5640 EXPORT_SYMBOL(dev_get_by_name);
5641 EXPORT_SYMBOL(dev_open);
5642 EXPORT_SYMBOL(dev_queue_xmit);
5643 EXPORT_SYMBOL(dev_remove_pack);
5644 EXPORT_SYMBOL(dev_set_allmulti);
5645 EXPORT_SYMBOL(dev_set_promiscuity);
5646 EXPORT_SYMBOL(dev_change_flags);
5647 EXPORT_SYMBOL(dev_set_mtu);
5648 EXPORT_SYMBOL(dev_set_mac_address);
5649 EXPORT_SYMBOL(free_netdev);
5650 EXPORT_SYMBOL(netdev_boot_setup_check);
5651 EXPORT_SYMBOL(netdev_set_master);
5652 EXPORT_SYMBOL(netdev_state_change);
5653 EXPORT_SYMBOL(netif_receive_skb);
5654 EXPORT_SYMBOL(netif_rx);
5655 EXPORT_SYMBOL(register_gifconf);
5656 EXPORT_SYMBOL(register_netdevice);
5657 EXPORT_SYMBOL(register_netdevice_notifier);
5658 EXPORT_SYMBOL(skb_checksum_help);
5659 EXPORT_SYMBOL(synchronize_net);
5660 EXPORT_SYMBOL(unregister_netdevice);
5661 EXPORT_SYMBOL(unregister_netdevice_notifier);
5662 EXPORT_SYMBOL(net_enable_timestamp);
5663 EXPORT_SYMBOL(net_disable_timestamp);
5664 EXPORT_SYMBOL(dev_get_flags);
5665 
5666 EXPORT_SYMBOL(dev_load);
5667 
5668 EXPORT_PER_CPU_SYMBOL(softnet_data);
5669