xref: /linux/net/core/dev.c (revision ac6a0cf6716bb46813d0161024c66c2af66e53d1)
1 /*
2  * 	NET3	Protocol independent device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the non IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Florian la Roche <rzsfl@rz.uni-sb.de>
16  *		Alan Cox <gw4pts@gw4pts.ampr.org>
17  *		David Hinds <dahinds@users.sourceforge.net>
18  *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19  *		Adam Sulmicki <adam@cfar.umd.edu>
20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
21  *
22  *	Changes:
23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
24  *              			to 2 if register_netdev gets called
25  *              			before net_dev_init & also removed a
26  *              			few lines of code in the process.
27  *		Alan Cox	:	device private ioctl copies fields back.
28  *		Alan Cox	:	Transmit queue code does relevant
29  *					stunts to keep the queue safe.
30  *		Alan Cox	:	Fixed double lock.
31  *		Alan Cox	:	Fixed promisc NULL pointer trap
32  *		????????	:	Support the full private ioctl range
33  *		Alan Cox	:	Moved ioctl permission check into
34  *					drivers
35  *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
36  *		Alan Cox	:	100 backlog just doesn't cut it when
37  *					you start doing multicast video 8)
38  *		Alan Cox	:	Rewrote net_bh and list manager.
39  *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
40  *		Alan Cox	:	Took out transmit every packet pass
41  *					Saved a few bytes in the ioctl handler
42  *		Alan Cox	:	Network driver sets packet type before
43  *					calling netif_rx. Saves a function
44  *					call a packet.
45  *		Alan Cox	:	Hashed net_bh()
46  *		Richard Kooijman:	Timestamp fixes.
47  *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
48  *		Alan Cox	:	Device lock protection.
49  *		Alan Cox	: 	Fixed nasty side effect of device close
50  *					changes.
51  *		Rudi Cilibrasi	:	Pass the right thing to
52  *					set_mac_address()
53  *		Dave Miller	:	32bit quantity for the device lock to
54  *					make it work out on a Sparc.
55  *		Bjorn Ekwall	:	Added KERNELD hack.
56  *		Alan Cox	:	Cleaned up the backlog initialise.
57  *		Craig Metz	:	SIOCGIFCONF fix if space for under
58  *					1 device.
59  *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
60  *					is no device open function.
61  *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
62  *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
63  *		Cyrus Durgin	:	Cleaned for KMOD
64  *		Adam Sulmicki   :	Bug Fix : Network Device Unload
65  *					A network device unload needs to purge
66  *					the backlog queue.
67  *	Paul Rusty Russell	:	SIOCSIFNAME
68  *              Pekka Riikonen  :	Netdev boot-time settings code
69  *              Andrew Morton   :       Make unregister_netdevice wait
70  *              			indefinitely on dev->refcnt
71  * 		J Hadi Salim	:	- Backlog queue sampling
72  *				        - netif_rx() feedback
73  */
74 
75 #include <asm/uaccess.h>
76 #include <asm/system.h>
77 #include <linux/bitops.h>
78 #include <linux/capability.h>
79 #include <linux/cpu.h>
80 #include <linux/types.h>
81 #include <linux/kernel.h>
82 #include <linux/sched.h>
83 #include <linux/mutex.h>
84 #include <linux/string.h>
85 #include <linux/mm.h>
86 #include <linux/socket.h>
87 #include <linux/sockios.h>
88 #include <linux/errno.h>
89 #include <linux/interrupt.h>
90 #include <linux/if_ether.h>
91 #include <linux/netdevice.h>
92 #include <linux/etherdevice.h>
93 #include <linux/ethtool.h>
94 #include <linux/notifier.h>
95 #include <linux/skbuff.h>
96 #include <net/net_namespace.h>
97 #include <net/sock.h>
98 #include <linux/rtnetlink.h>
99 #include <linux/proc_fs.h>
100 #include <linux/seq_file.h>
101 #include <linux/stat.h>
102 #include <linux/if_bridge.h>
103 #include <linux/if_macvlan.h>
104 #include <net/dst.h>
105 #include <net/pkt_sched.h>
106 #include <net/checksum.h>
107 #include <linux/highmem.h>
108 #include <linux/init.h>
109 #include <linux/kmod.h>
110 #include <linux/module.h>
111 #include <linux/netpoll.h>
112 #include <linux/rcupdate.h>
113 #include <linux/delay.h>
114 #include <net/wext.h>
115 #include <net/iw_handler.h>
116 #include <asm/current.h>
117 #include <linux/audit.h>
118 #include <linux/dmaengine.h>
119 #include <linux/err.h>
120 #include <linux/ctype.h>
121 #include <linux/if_arp.h>
122 #include <linux/if_vlan.h>
123 #include <linux/ip.h>
124 #include <net/ip.h>
125 #include <linux/ipv6.h>
126 #include <linux/in.h>
127 #include <linux/jhash.h>
128 #include <linux/random.h>
129 #include <trace/events/napi.h>
130 
131 #include "net-sysfs.h"
132 
133 /* Instead of increasing this, you should create a hash table. */
134 #define MAX_GRO_SKBS 8
135 
136 /* This should be increased if a protocol with a bigger head is added. */
137 #define GRO_MAX_HEAD (MAX_HEADER + 128)
138 
139 /*
140  *	The list of packet types we will receive (as opposed to discard)
141  *	and the routines to invoke.
142  *
143  *	Why 16. Because with 16 the only overlap we get on a hash of the
144  *	low nibble of the protocol value is RARP/SNAP/X.25.
145  *
146  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
147  *             sure which should go first, but I bet it won't make much
148  *             difference if we are running VLANs.  The good news is that
149  *             this protocol won't be in the list unless compiled in, so
150  *             the average user (w/out VLANs) will not be adversely affected.
151  *             --BLG
152  *
153  *		0800	IP
154  *		8100    802.1Q VLAN
155  *		0001	802.3
156  *		0002	AX.25
157  *		0004	802.2
158  *		8035	RARP
159  *		0005	SNAP
160  *		0805	X.25
161  *		0806	ARP
162  *		8137	IPX
163  *		0009	Localtalk
164  *		86DD	IPv6
165  */
166 
167 #define PTYPE_HASH_SIZE	(16)
168 #define PTYPE_HASH_MASK	(PTYPE_HASH_SIZE - 1)
169 
170 static DEFINE_SPINLOCK(ptype_lock);
171 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
172 static struct list_head ptype_all __read_mostly;	/* Taps */
173 
174 /*
175  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
176  * semaphore.
177  *
178  * Pure readers hold dev_base_lock for reading.
179  *
180  * Writers must hold the rtnl semaphore while they loop through the
181  * dev_base_head list, and hold dev_base_lock for writing when they do the
182  * actual updates.  This allows pure readers to access the list even
183  * while a writer is preparing to update it.
184  *
185  * To put it another way, dev_base_lock is held for writing only to
186  * protect against pure readers; the rtnl semaphore provides the
187  * protection against other writers.
188  *
189  * See, for example usages, register_netdevice() and
190  * unregister_netdevice(), which must be called with the rtnl
191  * semaphore held.
192  */
193 DEFINE_RWLOCK(dev_base_lock);
194 
195 EXPORT_SYMBOL(dev_base_lock);
196 
197 #define NETDEV_HASHBITS	8
198 #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
199 
200 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
201 {
202 	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
203 	return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
204 }
205 
206 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
207 {
208 	return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
209 }
210 
211 /* Device list insertion */
212 static int list_netdevice(struct net_device *dev)
213 {
214 	struct net *net = dev_net(dev);
215 
216 	ASSERT_RTNL();
217 
218 	write_lock_bh(&dev_base_lock);
219 	list_add_tail(&dev->dev_list, &net->dev_base_head);
220 	hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
221 	hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
222 	write_unlock_bh(&dev_base_lock);
223 	return 0;
224 }
225 
226 /* Device list removal */
227 static void unlist_netdevice(struct net_device *dev)
228 {
229 	ASSERT_RTNL();
230 
231 	/* Unlink dev from the device chain */
232 	write_lock_bh(&dev_base_lock);
233 	list_del(&dev->dev_list);
234 	hlist_del(&dev->name_hlist);
235 	hlist_del(&dev->index_hlist);
236 	write_unlock_bh(&dev_base_lock);
237 }
238 
239 /*
240  *	Our notifier list
241  */
242 
243 static RAW_NOTIFIER_HEAD(netdev_chain);
244 
245 /*
246  *	Device drivers call our routines to queue packets here. We empty the
247  *	queue in the local softnet handler.
248  */
249 
250 DEFINE_PER_CPU(struct softnet_data, softnet_data);
251 
252 #ifdef CONFIG_LOCKDEP
253 /*
254  * register_netdevice() inits txq->_xmit_lock and sets lockdep class
255  * according to dev->type
256  */
257 static const unsigned short netdev_lock_type[] =
258 	{ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
259 	 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
260 	 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
261 	 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
262 	 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
263 	 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
264 	 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
265 	 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
266 	 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
267 	 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
268 	 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
269 	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
270 	 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
271 	 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
272 	 ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154, ARPHRD_IEEE802154_PHY,
273 	 ARPHRD_VOID, ARPHRD_NONE};
274 
275 static const char *netdev_lock_name[] =
276 	{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
277 	 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
278 	 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
279 	 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
280 	 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
281 	 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
282 	 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
283 	 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
284 	 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
285 	 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
286 	 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
287 	 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
288 	 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
289 	 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
290 	 "_xmit_PHONET_PIPE", "_xmit_IEEE802154", "_xmit_IEEE802154_PHY",
291 	 "_xmit_VOID", "_xmit_NONE"};
292 
293 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
294 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
295 
296 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
297 {
298 	int i;
299 
300 	for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
301 		if (netdev_lock_type[i] == dev_type)
302 			return i;
303 	/* the last key is used by default */
304 	return ARRAY_SIZE(netdev_lock_type) - 1;
305 }
306 
307 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
308 						 unsigned short dev_type)
309 {
310 	int i;
311 
312 	i = netdev_lock_pos(dev_type);
313 	lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
314 				   netdev_lock_name[i]);
315 }
316 
317 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
318 {
319 	int i;
320 
321 	i = netdev_lock_pos(dev->type);
322 	lockdep_set_class_and_name(&dev->addr_list_lock,
323 				   &netdev_addr_lock_key[i],
324 				   netdev_lock_name[i]);
325 }
326 #else
327 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
328 						 unsigned short dev_type)
329 {
330 }
331 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
332 {
333 }
334 #endif
335 
336 /*******************************************************************************
337 
338 		Protocol management and registration routines
339 
340 *******************************************************************************/
341 
342 /*
343  *	Add a protocol ID to the list. Now that the input handler is
344  *	smarter we can dispense with all the messy stuff that used to be
345  *	here.
346  *
347  *	BEWARE!!! Protocol handlers, mangling input packets,
348  *	MUST BE last in hash buckets and checking protocol handlers
349  *	MUST start from promiscuous ptype_all chain in net_bh.
350  *	It is true now, do not change it.
351  *	Explanation follows: if protocol handler, mangling packet, will
352  *	be the first on list, it is not able to sense, that packet
353  *	is cloned and should be copied-on-write, so that it will
354  *	change it and subsequent readers will get broken packet.
355  *							--ANK (980803)
356  */
357 
358 /**
359  *	dev_add_pack - add packet handler
360  *	@pt: packet type declaration
361  *
362  *	Add a protocol handler to the networking stack. The passed &packet_type
363  *	is linked into kernel lists and may not be freed until it has been
364  *	removed from the kernel lists.
365  *
366  *	This call does not sleep therefore it can not
367  *	guarantee all CPU's that are in middle of receiving packets
368  *	will see the new packet type (until the next received packet).
369  */
370 
371 void dev_add_pack(struct packet_type *pt)
372 {
373 	int hash;
374 
375 	spin_lock_bh(&ptype_lock);
376 	if (pt->type == htons(ETH_P_ALL))
377 		list_add_rcu(&pt->list, &ptype_all);
378 	else {
379 		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
380 		list_add_rcu(&pt->list, &ptype_base[hash]);
381 	}
382 	spin_unlock_bh(&ptype_lock);
383 }
384 
385 /**
386  *	__dev_remove_pack	 - remove packet handler
387  *	@pt: packet type declaration
388  *
389  *	Remove a protocol handler that was previously added to the kernel
390  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
391  *	from the kernel lists and can be freed or reused once this function
392  *	returns.
393  *
394  *      The packet type might still be in use by receivers
395  *	and must not be freed until after all the CPU's have gone
396  *	through a quiescent state.
397  */
398 void __dev_remove_pack(struct packet_type *pt)
399 {
400 	struct list_head *head;
401 	struct packet_type *pt1;
402 
403 	spin_lock_bh(&ptype_lock);
404 
405 	if (pt->type == htons(ETH_P_ALL))
406 		head = &ptype_all;
407 	else
408 		head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
409 
410 	list_for_each_entry(pt1, head, list) {
411 		if (pt == pt1) {
412 			list_del_rcu(&pt->list);
413 			goto out;
414 		}
415 	}
416 
417 	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
418 out:
419 	spin_unlock_bh(&ptype_lock);
420 }
421 /**
422  *	dev_remove_pack	 - remove packet handler
423  *	@pt: packet type declaration
424  *
425  *	Remove a protocol handler that was previously added to the kernel
426  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
427  *	from the kernel lists and can be freed or reused once this function
428  *	returns.
429  *
430  *	This call sleeps to guarantee that no CPU is looking at the packet
431  *	type after return.
432  */
433 void dev_remove_pack(struct packet_type *pt)
434 {
435 	__dev_remove_pack(pt);
436 
437 	synchronize_net();
438 }
439 
440 /******************************************************************************
441 
442 		      Device Boot-time Settings Routines
443 
444 *******************************************************************************/
445 
446 /* Boot time configuration table */
447 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
448 
449 /**
450  *	netdev_boot_setup_add	- add new setup entry
451  *	@name: name of the device
452  *	@map: configured settings for the device
453  *
454  *	Adds new setup entry to the dev_boot_setup list.  The function
455  *	returns 0 on error and 1 on success.  This is a generic routine to
456  *	all netdevices.
457  */
458 static int netdev_boot_setup_add(char *name, struct ifmap *map)
459 {
460 	struct netdev_boot_setup *s;
461 	int i;
462 
463 	s = dev_boot_setup;
464 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
465 		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
466 			memset(s[i].name, 0, sizeof(s[i].name));
467 			strlcpy(s[i].name, name, IFNAMSIZ);
468 			memcpy(&s[i].map, map, sizeof(s[i].map));
469 			break;
470 		}
471 	}
472 
473 	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
474 }
475 
476 /**
477  *	netdev_boot_setup_check	- check boot time settings
478  *	@dev: the netdevice
479  *
480  * 	Check boot time settings for the device.
481  *	The found settings are set for the device to be used
482  *	later in the device probing.
483  *	Returns 0 if no settings found, 1 if they are.
484  */
485 int netdev_boot_setup_check(struct net_device *dev)
486 {
487 	struct netdev_boot_setup *s = dev_boot_setup;
488 	int i;
489 
490 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
491 		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
492 		    !strcmp(dev->name, s[i].name)) {
493 			dev->irq 	= s[i].map.irq;
494 			dev->base_addr 	= s[i].map.base_addr;
495 			dev->mem_start 	= s[i].map.mem_start;
496 			dev->mem_end 	= s[i].map.mem_end;
497 			return 1;
498 		}
499 	}
500 	return 0;
501 }
502 
503 
504 /**
505  *	netdev_boot_base	- get address from boot time settings
506  *	@prefix: prefix for network device
507  *	@unit: id for network device
508  *
509  * 	Check boot time settings for the base address of device.
510  *	The found settings are set for the device to be used
511  *	later in the device probing.
512  *	Returns 0 if no settings found.
513  */
514 unsigned long netdev_boot_base(const char *prefix, int unit)
515 {
516 	const struct netdev_boot_setup *s = dev_boot_setup;
517 	char name[IFNAMSIZ];
518 	int i;
519 
520 	sprintf(name, "%s%d", prefix, unit);
521 
522 	/*
523 	 * If device already registered then return base of 1
524 	 * to indicate not to probe for this interface
525 	 */
526 	if (__dev_get_by_name(&init_net, name))
527 		return 1;
528 
529 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
530 		if (!strcmp(name, s[i].name))
531 			return s[i].map.base_addr;
532 	return 0;
533 }
534 
535 /*
536  * Saves at boot time configured settings for any netdevice.
537  */
538 int __init netdev_boot_setup(char *str)
539 {
540 	int ints[5];
541 	struct ifmap map;
542 
543 	str = get_options(str, ARRAY_SIZE(ints), ints);
544 	if (!str || !*str)
545 		return 0;
546 
547 	/* Save settings */
548 	memset(&map, 0, sizeof(map));
549 	if (ints[0] > 0)
550 		map.irq = ints[1];
551 	if (ints[0] > 1)
552 		map.base_addr = ints[2];
553 	if (ints[0] > 2)
554 		map.mem_start = ints[3];
555 	if (ints[0] > 3)
556 		map.mem_end = ints[4];
557 
558 	/* Add new entry to the list */
559 	return netdev_boot_setup_add(str, &map);
560 }
561 
562 __setup("netdev=", netdev_boot_setup);
563 
564 /*******************************************************************************
565 
566 			    Device Interface Subroutines
567 
568 *******************************************************************************/
569 
570 /**
571  *	__dev_get_by_name	- find a device by its name
572  *	@net: the applicable net namespace
573  *	@name: name to find
574  *
575  *	Find an interface by name. Must be called under RTNL semaphore
576  *	or @dev_base_lock. If the name is found a pointer to the device
577  *	is returned. If the name is not found then %NULL is returned. The
578  *	reference counters are not incremented so the caller must be
579  *	careful with locks.
580  */
581 
582 struct net_device *__dev_get_by_name(struct net *net, const char *name)
583 {
584 	struct hlist_node *p;
585 
586 	hlist_for_each(p, dev_name_hash(net, name)) {
587 		struct net_device *dev
588 			= hlist_entry(p, struct net_device, name_hlist);
589 		if (!strncmp(dev->name, name, IFNAMSIZ))
590 			return dev;
591 	}
592 	return NULL;
593 }
594 
595 /**
596  *	dev_get_by_name		- find a device by its name
597  *	@net: the applicable net namespace
598  *	@name: name to find
599  *
600  *	Find an interface by name. This can be called from any
601  *	context and does its own locking. The returned handle has
602  *	the usage count incremented and the caller must use dev_put() to
603  *	release it when it is no longer needed. %NULL is returned if no
604  *	matching device is found.
605  */
606 
607 struct net_device *dev_get_by_name(struct net *net, const char *name)
608 {
609 	struct net_device *dev;
610 
611 	read_lock(&dev_base_lock);
612 	dev = __dev_get_by_name(net, name);
613 	if (dev)
614 		dev_hold(dev);
615 	read_unlock(&dev_base_lock);
616 	return dev;
617 }
618 
619 /**
620  *	__dev_get_by_index - find a device by its ifindex
621  *	@net: the applicable net namespace
622  *	@ifindex: index of device
623  *
624  *	Search for an interface by index. Returns %NULL if the device
625  *	is not found or a pointer to the device. The device has not
626  *	had its reference counter increased so the caller must be careful
627  *	about locking. The caller must hold either the RTNL semaphore
628  *	or @dev_base_lock.
629  */
630 
631 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
632 {
633 	struct hlist_node *p;
634 
635 	hlist_for_each(p, dev_index_hash(net, ifindex)) {
636 		struct net_device *dev
637 			= hlist_entry(p, struct net_device, index_hlist);
638 		if (dev->ifindex == ifindex)
639 			return dev;
640 	}
641 	return NULL;
642 }
643 
644 
645 /**
646  *	dev_get_by_index - find a device by its ifindex
647  *	@net: the applicable net namespace
648  *	@ifindex: index of device
649  *
650  *	Search for an interface by index. Returns NULL if the device
651  *	is not found or a pointer to the device. The device returned has
652  *	had a reference added and the pointer is safe until the user calls
653  *	dev_put to indicate they have finished with it.
654  */
655 
656 struct net_device *dev_get_by_index(struct net *net, int ifindex)
657 {
658 	struct net_device *dev;
659 
660 	read_lock(&dev_base_lock);
661 	dev = __dev_get_by_index(net, ifindex);
662 	if (dev)
663 		dev_hold(dev);
664 	read_unlock(&dev_base_lock);
665 	return dev;
666 }
667 
668 /**
669  *	dev_getbyhwaddr - find a device by its hardware address
670  *	@net: the applicable net namespace
671  *	@type: media type of device
672  *	@ha: hardware address
673  *
674  *	Search for an interface by MAC address. Returns NULL if the device
675  *	is not found or a pointer to the device. The caller must hold the
676  *	rtnl semaphore. The returned device has not had its ref count increased
677  *	and the caller must therefore be careful about locking
678  *
679  *	BUGS:
680  *	If the API was consistent this would be __dev_get_by_hwaddr
681  */
682 
683 struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
684 {
685 	struct net_device *dev;
686 
687 	ASSERT_RTNL();
688 
689 	for_each_netdev(net, dev)
690 		if (dev->type == type &&
691 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
692 			return dev;
693 
694 	return NULL;
695 }
696 
697 EXPORT_SYMBOL(dev_getbyhwaddr);
698 
699 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
700 {
701 	struct net_device *dev;
702 
703 	ASSERT_RTNL();
704 	for_each_netdev(net, dev)
705 		if (dev->type == type)
706 			return dev;
707 
708 	return NULL;
709 }
710 
711 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
712 
713 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
714 {
715 	struct net_device *dev;
716 
717 	rtnl_lock();
718 	dev = __dev_getfirstbyhwtype(net, type);
719 	if (dev)
720 		dev_hold(dev);
721 	rtnl_unlock();
722 	return dev;
723 }
724 
725 EXPORT_SYMBOL(dev_getfirstbyhwtype);
726 
727 /**
728  *	dev_get_by_flags - find any device with given flags
729  *	@net: the applicable net namespace
730  *	@if_flags: IFF_* values
731  *	@mask: bitmask of bits in if_flags to check
732  *
733  *	Search for any interface with the given flags. Returns NULL if a device
734  *	is not found or a pointer to the device. The device returned has
735  *	had a reference added and the pointer is safe until the user calls
736  *	dev_put to indicate they have finished with it.
737  */
738 
739 struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
740 {
741 	struct net_device *dev, *ret;
742 
743 	ret = NULL;
744 	read_lock(&dev_base_lock);
745 	for_each_netdev(net, dev) {
746 		if (((dev->flags ^ if_flags) & mask) == 0) {
747 			dev_hold(dev);
748 			ret = dev;
749 			break;
750 		}
751 	}
752 	read_unlock(&dev_base_lock);
753 	return ret;
754 }
755 
756 /**
757  *	dev_valid_name - check if name is okay for network device
758  *	@name: name string
759  *
760  *	Network device names need to be valid file names to
761  *	to allow sysfs to work.  We also disallow any kind of
762  *	whitespace.
763  */
764 int dev_valid_name(const char *name)
765 {
766 	if (*name == '\0')
767 		return 0;
768 	if (strlen(name) >= IFNAMSIZ)
769 		return 0;
770 	if (!strcmp(name, ".") || !strcmp(name, ".."))
771 		return 0;
772 
773 	while (*name) {
774 		if (*name == '/' || isspace(*name))
775 			return 0;
776 		name++;
777 	}
778 	return 1;
779 }
780 
781 /**
782  *	__dev_alloc_name - allocate a name for a device
783  *	@net: network namespace to allocate the device name in
784  *	@name: name format string
785  *	@buf:  scratch buffer and result name string
786  *
787  *	Passed a format string - eg "lt%d" it will try and find a suitable
788  *	id. It scans list of devices to build up a free map, then chooses
789  *	the first empty slot. The caller must hold the dev_base or rtnl lock
790  *	while allocating the name and adding the device in order to avoid
791  *	duplicates.
792  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
793  *	Returns the number of the unit assigned or a negative errno code.
794  */
795 
796 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
797 {
798 	int i = 0;
799 	const char *p;
800 	const int max_netdevices = 8*PAGE_SIZE;
801 	unsigned long *inuse;
802 	struct net_device *d;
803 
804 	p = strnchr(name, IFNAMSIZ-1, '%');
805 	if (p) {
806 		/*
807 		 * Verify the string as this thing may have come from
808 		 * the user.  There must be either one "%d" and no other "%"
809 		 * characters.
810 		 */
811 		if (p[1] != 'd' || strchr(p + 2, '%'))
812 			return -EINVAL;
813 
814 		/* Use one page as a bit array of possible slots */
815 		inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
816 		if (!inuse)
817 			return -ENOMEM;
818 
819 		for_each_netdev(net, d) {
820 			if (!sscanf(d->name, name, &i))
821 				continue;
822 			if (i < 0 || i >= max_netdevices)
823 				continue;
824 
825 			/*  avoid cases where sscanf is not exact inverse of printf */
826 			snprintf(buf, IFNAMSIZ, name, i);
827 			if (!strncmp(buf, d->name, IFNAMSIZ))
828 				set_bit(i, inuse);
829 		}
830 
831 		i = find_first_zero_bit(inuse, max_netdevices);
832 		free_page((unsigned long) inuse);
833 	}
834 
835 	snprintf(buf, IFNAMSIZ, name, i);
836 	if (!__dev_get_by_name(net, buf))
837 		return i;
838 
839 	/* It is possible to run out of possible slots
840 	 * when the name is long and there isn't enough space left
841 	 * for the digits, or if all bits are used.
842 	 */
843 	return -ENFILE;
844 }
845 
846 /**
847  *	dev_alloc_name - allocate a name for a device
848  *	@dev: device
849  *	@name: name format string
850  *
851  *	Passed a format string - eg "lt%d" it will try and find a suitable
852  *	id. It scans list of devices to build up a free map, then chooses
853  *	the first empty slot. The caller must hold the dev_base or rtnl lock
854  *	while allocating the name and adding the device in order to avoid
855  *	duplicates.
856  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
857  *	Returns the number of the unit assigned or a negative errno code.
858  */
859 
860 int dev_alloc_name(struct net_device *dev, const char *name)
861 {
862 	char buf[IFNAMSIZ];
863 	struct net *net;
864 	int ret;
865 
866 	BUG_ON(!dev_net(dev));
867 	net = dev_net(dev);
868 	ret = __dev_alloc_name(net, name, buf);
869 	if (ret >= 0)
870 		strlcpy(dev->name, buf, IFNAMSIZ);
871 	return ret;
872 }
873 
874 
875 /**
876  *	dev_change_name - change name of a device
877  *	@dev: device
878  *	@newname: name (or format string) must be at least IFNAMSIZ
879  *
880  *	Change name of a device, can pass format strings "eth%d".
881  *	for wildcarding.
882  */
883 int dev_change_name(struct net_device *dev, const char *newname)
884 {
885 	char oldname[IFNAMSIZ];
886 	int err = 0;
887 	int ret;
888 	struct net *net;
889 
890 	ASSERT_RTNL();
891 	BUG_ON(!dev_net(dev));
892 
893 	net = dev_net(dev);
894 	if (dev->flags & IFF_UP)
895 		return -EBUSY;
896 
897 	if (!dev_valid_name(newname))
898 		return -EINVAL;
899 
900 	if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
901 		return 0;
902 
903 	memcpy(oldname, dev->name, IFNAMSIZ);
904 
905 	if (strchr(newname, '%')) {
906 		err = dev_alloc_name(dev, newname);
907 		if (err < 0)
908 			return err;
909 	}
910 	else if (__dev_get_by_name(net, newname))
911 		return -EEXIST;
912 	else
913 		strlcpy(dev->name, newname, IFNAMSIZ);
914 
915 rollback:
916 	/* For now only devices in the initial network namespace
917 	 * are in sysfs.
918 	 */
919 	if (net == &init_net) {
920 		ret = device_rename(&dev->dev, dev->name);
921 		if (ret) {
922 			memcpy(dev->name, oldname, IFNAMSIZ);
923 			return ret;
924 		}
925 	}
926 
927 	write_lock_bh(&dev_base_lock);
928 	hlist_del(&dev->name_hlist);
929 	hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
930 	write_unlock_bh(&dev_base_lock);
931 
932 	ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
933 	ret = notifier_to_errno(ret);
934 
935 	if (ret) {
936 		if (err) {
937 			printk(KERN_ERR
938 			       "%s: name change rollback failed: %d.\n",
939 			       dev->name, ret);
940 		} else {
941 			err = ret;
942 			memcpy(dev->name, oldname, IFNAMSIZ);
943 			goto rollback;
944 		}
945 	}
946 
947 	return err;
948 }
949 
950 /**
951  *	dev_set_alias - change ifalias of a device
952  *	@dev: device
953  *	@alias: name up to IFALIASZ
954  *	@len: limit of bytes to copy from info
955  *
956  *	Set ifalias for a device,
957  */
958 int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
959 {
960 	ASSERT_RTNL();
961 
962 	if (len >= IFALIASZ)
963 		return -EINVAL;
964 
965 	if (!len) {
966 		if (dev->ifalias) {
967 			kfree(dev->ifalias);
968 			dev->ifalias = NULL;
969 		}
970 		return 0;
971 	}
972 
973 	dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL);
974 	if (!dev->ifalias)
975 		return -ENOMEM;
976 
977 	strlcpy(dev->ifalias, alias, len+1);
978 	return len;
979 }
980 
981 
982 /**
983  *	netdev_features_change - device changes features
984  *	@dev: device to cause notification
985  *
986  *	Called to indicate a device has changed features.
987  */
988 void netdev_features_change(struct net_device *dev)
989 {
990 	call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
991 }
992 EXPORT_SYMBOL(netdev_features_change);
993 
994 /**
995  *	netdev_state_change - device changes state
996  *	@dev: device to cause notification
997  *
998  *	Called to indicate a device has changed state. This function calls
999  *	the notifier chains for netdev_chain and sends a NEWLINK message
1000  *	to the routing socket.
1001  */
1002 void netdev_state_change(struct net_device *dev)
1003 {
1004 	if (dev->flags & IFF_UP) {
1005 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
1006 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1007 	}
1008 }
1009 
1010 void netdev_bonding_change(struct net_device *dev)
1011 {
1012 	call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
1013 }
1014 EXPORT_SYMBOL(netdev_bonding_change);
1015 
1016 /**
1017  *	dev_load 	- load a network module
1018  *	@net: the applicable net namespace
1019  *	@name: name of interface
1020  *
1021  *	If a network interface is not present and the process has suitable
1022  *	privileges this function loads the module. If module loading is not
1023  *	available in this kernel then it becomes a nop.
1024  */
1025 
1026 void dev_load(struct net *net, const char *name)
1027 {
1028 	struct net_device *dev;
1029 
1030 	read_lock(&dev_base_lock);
1031 	dev = __dev_get_by_name(net, name);
1032 	read_unlock(&dev_base_lock);
1033 
1034 	if (!dev && capable(CAP_SYS_MODULE))
1035 		request_module("%s", name);
1036 }
1037 
1038 /**
1039  *	dev_open	- prepare an interface for use.
1040  *	@dev:	device to open
1041  *
1042  *	Takes a device from down to up state. The device's private open
1043  *	function is invoked and then the multicast lists are loaded. Finally
1044  *	the device is moved into the up state and a %NETDEV_UP message is
1045  *	sent to the netdev notifier chain.
1046  *
1047  *	Calling this function on an active interface is a nop. On a failure
1048  *	a negative errno code is returned.
1049  */
1050 int dev_open(struct net_device *dev)
1051 {
1052 	const struct net_device_ops *ops = dev->netdev_ops;
1053 	int ret;
1054 
1055 	ASSERT_RTNL();
1056 
1057 	/*
1058 	 *	Is it already up?
1059 	 */
1060 
1061 	if (dev->flags & IFF_UP)
1062 		return 0;
1063 
1064 	/*
1065 	 *	Is it even present?
1066 	 */
1067 	if (!netif_device_present(dev))
1068 		return -ENODEV;
1069 
1070 	ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1071 	ret = notifier_to_errno(ret);
1072 	if (ret)
1073 		return ret;
1074 
1075 	/*
1076 	 *	Call device private open method
1077 	 */
1078 	set_bit(__LINK_STATE_START, &dev->state);
1079 
1080 	if (ops->ndo_validate_addr)
1081 		ret = ops->ndo_validate_addr(dev);
1082 
1083 	if (!ret && ops->ndo_open)
1084 		ret = ops->ndo_open(dev);
1085 
1086 	/*
1087 	 *	If it went open OK then:
1088 	 */
1089 
1090 	if (ret)
1091 		clear_bit(__LINK_STATE_START, &dev->state);
1092 	else {
1093 		/*
1094 		 *	Set the flags.
1095 		 */
1096 		dev->flags |= IFF_UP;
1097 
1098 		/*
1099 		 *	Enable NET_DMA
1100 		 */
1101 		net_dmaengine_get();
1102 
1103 		/*
1104 		 *	Initialize multicasting status
1105 		 */
1106 		dev_set_rx_mode(dev);
1107 
1108 		/*
1109 		 *	Wakeup transmit queue engine
1110 		 */
1111 		dev_activate(dev);
1112 
1113 		/*
1114 		 *	... and announce new interface.
1115 		 */
1116 		call_netdevice_notifiers(NETDEV_UP, dev);
1117 	}
1118 
1119 	return ret;
1120 }
1121 
1122 /**
1123  *	dev_close - shutdown an interface.
1124  *	@dev: device to shutdown
1125  *
1126  *	This function moves an active device into down state. A
1127  *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1128  *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1129  *	chain.
1130  */
1131 int dev_close(struct net_device *dev)
1132 {
1133 	const struct net_device_ops *ops = dev->netdev_ops;
1134 	ASSERT_RTNL();
1135 
1136 	might_sleep();
1137 
1138 	if (!(dev->flags & IFF_UP))
1139 		return 0;
1140 
1141 	/*
1142 	 *	Tell people we are going down, so that they can
1143 	 *	prepare to death, when device is still operating.
1144 	 */
1145 	call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1146 
1147 	clear_bit(__LINK_STATE_START, &dev->state);
1148 
1149 	/* Synchronize to scheduled poll. We cannot touch poll list,
1150 	 * it can be even on different cpu. So just clear netif_running().
1151 	 *
1152 	 * dev->stop() will invoke napi_disable() on all of it's
1153 	 * napi_struct instances on this device.
1154 	 */
1155 	smp_mb__after_clear_bit(); /* Commit netif_running(). */
1156 
1157 	dev_deactivate(dev);
1158 
1159 	/*
1160 	 *	Call the device specific close. This cannot fail.
1161 	 *	Only if device is UP
1162 	 *
1163 	 *	We allow it to be called even after a DETACH hot-plug
1164 	 *	event.
1165 	 */
1166 	if (ops->ndo_stop)
1167 		ops->ndo_stop(dev);
1168 
1169 	/*
1170 	 *	Device is now down.
1171 	 */
1172 
1173 	dev->flags &= ~IFF_UP;
1174 
1175 	/*
1176 	 * Tell people we are down
1177 	 */
1178 	call_netdevice_notifiers(NETDEV_DOWN, dev);
1179 
1180 	/*
1181 	 *	Shutdown NET_DMA
1182 	 */
1183 	net_dmaengine_put();
1184 
1185 	return 0;
1186 }
1187 
1188 
1189 /**
1190  *	dev_disable_lro - disable Large Receive Offload on a device
1191  *	@dev: device
1192  *
1193  *	Disable Large Receive Offload (LRO) on a net device.  Must be
1194  *	called under RTNL.  This is needed if received packets may be
1195  *	forwarded to another interface.
1196  */
1197 void dev_disable_lro(struct net_device *dev)
1198 {
1199 	if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1200 	    dev->ethtool_ops->set_flags) {
1201 		u32 flags = dev->ethtool_ops->get_flags(dev);
1202 		if (flags & ETH_FLAG_LRO) {
1203 			flags &= ~ETH_FLAG_LRO;
1204 			dev->ethtool_ops->set_flags(dev, flags);
1205 		}
1206 	}
1207 	WARN_ON(dev->features & NETIF_F_LRO);
1208 }
1209 EXPORT_SYMBOL(dev_disable_lro);
1210 
1211 
1212 static int dev_boot_phase = 1;
1213 
1214 /*
1215  *	Device change register/unregister. These are not inline or static
1216  *	as we export them to the world.
1217  */
1218 
1219 /**
1220  *	register_netdevice_notifier - register a network notifier block
1221  *	@nb: notifier
1222  *
1223  *	Register a notifier to be called when network device events occur.
1224  *	The notifier passed is linked into the kernel structures and must
1225  *	not be reused until it has been unregistered. A negative errno code
1226  *	is returned on a failure.
1227  *
1228  * 	When registered all registration and up events are replayed
1229  *	to the new notifier to allow device to have a race free
1230  *	view of the network device list.
1231  */
1232 
1233 int register_netdevice_notifier(struct notifier_block *nb)
1234 {
1235 	struct net_device *dev;
1236 	struct net_device *last;
1237 	struct net *net;
1238 	int err;
1239 
1240 	rtnl_lock();
1241 	err = raw_notifier_chain_register(&netdev_chain, nb);
1242 	if (err)
1243 		goto unlock;
1244 	if (dev_boot_phase)
1245 		goto unlock;
1246 	for_each_net(net) {
1247 		for_each_netdev(net, dev) {
1248 			err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1249 			err = notifier_to_errno(err);
1250 			if (err)
1251 				goto rollback;
1252 
1253 			if (!(dev->flags & IFF_UP))
1254 				continue;
1255 
1256 			nb->notifier_call(nb, NETDEV_UP, dev);
1257 		}
1258 	}
1259 
1260 unlock:
1261 	rtnl_unlock();
1262 	return err;
1263 
1264 rollback:
1265 	last = dev;
1266 	for_each_net(net) {
1267 		for_each_netdev(net, dev) {
1268 			if (dev == last)
1269 				break;
1270 
1271 			if (dev->flags & IFF_UP) {
1272 				nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1273 				nb->notifier_call(nb, NETDEV_DOWN, dev);
1274 			}
1275 			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1276 		}
1277 	}
1278 
1279 	raw_notifier_chain_unregister(&netdev_chain, nb);
1280 	goto unlock;
1281 }
1282 
1283 /**
1284  *	unregister_netdevice_notifier - unregister a network notifier block
1285  *	@nb: notifier
1286  *
1287  *	Unregister a notifier previously registered by
1288  *	register_netdevice_notifier(). The notifier is unlinked into the
1289  *	kernel structures and may then be reused. A negative errno code
1290  *	is returned on a failure.
1291  */
1292 
1293 int unregister_netdevice_notifier(struct notifier_block *nb)
1294 {
1295 	int err;
1296 
1297 	rtnl_lock();
1298 	err = raw_notifier_chain_unregister(&netdev_chain, nb);
1299 	rtnl_unlock();
1300 	return err;
1301 }
1302 
1303 /**
1304  *	call_netdevice_notifiers - call all network notifier blocks
1305  *      @val: value passed unmodified to notifier function
1306  *      @dev: net_device pointer passed unmodified to notifier function
1307  *
1308  *	Call all network notifier blocks.  Parameters and return value
1309  *	are as for raw_notifier_call_chain().
1310  */
1311 
1312 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1313 {
1314 	return raw_notifier_call_chain(&netdev_chain, val, dev);
1315 }
1316 
1317 /* When > 0 there are consumers of rx skb time stamps */
1318 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1319 
1320 void net_enable_timestamp(void)
1321 {
1322 	atomic_inc(&netstamp_needed);
1323 }
1324 
1325 void net_disable_timestamp(void)
1326 {
1327 	atomic_dec(&netstamp_needed);
1328 }
1329 
1330 static inline void net_timestamp(struct sk_buff *skb)
1331 {
1332 	if (atomic_read(&netstamp_needed))
1333 		__net_timestamp(skb);
1334 	else
1335 		skb->tstamp.tv64 = 0;
1336 }
1337 
1338 /*
1339  *	Support routine. Sends outgoing frames to any network
1340  *	taps currently in use.
1341  */
1342 
1343 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1344 {
1345 	struct packet_type *ptype;
1346 
1347 #ifdef CONFIG_NET_CLS_ACT
1348 	if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
1349 		net_timestamp(skb);
1350 #else
1351 	net_timestamp(skb);
1352 #endif
1353 
1354 	rcu_read_lock();
1355 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1356 		/* Never send packets back to the socket
1357 		 * they originated from - MvS (miquels@drinkel.ow.org)
1358 		 */
1359 		if ((ptype->dev == dev || !ptype->dev) &&
1360 		    (ptype->af_packet_priv == NULL ||
1361 		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
1362 			struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1363 			if (!skb2)
1364 				break;
1365 
1366 			/* skb->nh should be correctly
1367 			   set by sender, so that the second statement is
1368 			   just protection against buggy protocols.
1369 			 */
1370 			skb_reset_mac_header(skb2);
1371 
1372 			if (skb_network_header(skb2) < skb2->data ||
1373 			    skb2->network_header > skb2->tail) {
1374 				if (net_ratelimit())
1375 					printk(KERN_CRIT "protocol %04x is "
1376 					       "buggy, dev %s\n",
1377 					       skb2->protocol, dev->name);
1378 				skb_reset_network_header(skb2);
1379 			}
1380 
1381 			skb2->transport_header = skb2->network_header;
1382 			skb2->pkt_type = PACKET_OUTGOING;
1383 			ptype->func(skb2, skb->dev, ptype, skb->dev);
1384 		}
1385 	}
1386 	rcu_read_unlock();
1387 }
1388 
1389 
1390 static inline void __netif_reschedule(struct Qdisc *q)
1391 {
1392 	struct softnet_data *sd;
1393 	unsigned long flags;
1394 
1395 	local_irq_save(flags);
1396 	sd = &__get_cpu_var(softnet_data);
1397 	q->next_sched = sd->output_queue;
1398 	sd->output_queue = q;
1399 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
1400 	local_irq_restore(flags);
1401 }
1402 
1403 void __netif_schedule(struct Qdisc *q)
1404 {
1405 	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1406 		__netif_reschedule(q);
1407 }
1408 EXPORT_SYMBOL(__netif_schedule);
1409 
1410 void dev_kfree_skb_irq(struct sk_buff *skb)
1411 {
1412 	if (atomic_dec_and_test(&skb->users)) {
1413 		struct softnet_data *sd;
1414 		unsigned long flags;
1415 
1416 		local_irq_save(flags);
1417 		sd = &__get_cpu_var(softnet_data);
1418 		skb->next = sd->completion_queue;
1419 		sd->completion_queue = skb;
1420 		raise_softirq_irqoff(NET_TX_SOFTIRQ);
1421 		local_irq_restore(flags);
1422 	}
1423 }
1424 EXPORT_SYMBOL(dev_kfree_skb_irq);
1425 
1426 void dev_kfree_skb_any(struct sk_buff *skb)
1427 {
1428 	if (in_irq() || irqs_disabled())
1429 		dev_kfree_skb_irq(skb);
1430 	else
1431 		dev_kfree_skb(skb);
1432 }
1433 EXPORT_SYMBOL(dev_kfree_skb_any);
1434 
1435 
1436 /**
1437  * netif_device_detach - mark device as removed
1438  * @dev: network device
1439  *
1440  * Mark device as removed from system and therefore no longer available.
1441  */
1442 void netif_device_detach(struct net_device *dev)
1443 {
1444 	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1445 	    netif_running(dev)) {
1446 		netif_tx_stop_all_queues(dev);
1447 	}
1448 }
1449 EXPORT_SYMBOL(netif_device_detach);
1450 
1451 /**
1452  * netif_device_attach - mark device as attached
1453  * @dev: network device
1454  *
1455  * Mark device as attached from system and restart if needed.
1456  */
1457 void netif_device_attach(struct net_device *dev)
1458 {
1459 	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1460 	    netif_running(dev)) {
1461 		netif_tx_wake_all_queues(dev);
1462 		__netdev_watchdog_up(dev);
1463 	}
1464 }
1465 EXPORT_SYMBOL(netif_device_attach);
1466 
1467 static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1468 {
1469 	return ((features & NETIF_F_GEN_CSUM) ||
1470 		((features & NETIF_F_IP_CSUM) &&
1471 		 protocol == htons(ETH_P_IP)) ||
1472 		((features & NETIF_F_IPV6_CSUM) &&
1473 		 protocol == htons(ETH_P_IPV6)) ||
1474 		((features & NETIF_F_FCOE_CRC) &&
1475 		 protocol == htons(ETH_P_FCOE)));
1476 }
1477 
1478 static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1479 {
1480 	if (can_checksum_protocol(dev->features, skb->protocol))
1481 		return true;
1482 
1483 	if (skb->protocol == htons(ETH_P_8021Q)) {
1484 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1485 		if (can_checksum_protocol(dev->features & dev->vlan_features,
1486 					  veh->h_vlan_encapsulated_proto))
1487 			return true;
1488 	}
1489 
1490 	return false;
1491 }
1492 
1493 /*
1494  * Invalidate hardware checksum when packet is to be mangled, and
1495  * complete checksum manually on outgoing path.
1496  */
1497 int skb_checksum_help(struct sk_buff *skb)
1498 {
1499 	__wsum csum;
1500 	int ret = 0, offset;
1501 
1502 	if (skb->ip_summed == CHECKSUM_COMPLETE)
1503 		goto out_set_summed;
1504 
1505 	if (unlikely(skb_shinfo(skb)->gso_size)) {
1506 		/* Let GSO fix up the checksum. */
1507 		goto out_set_summed;
1508 	}
1509 
1510 	offset = skb->csum_start - skb_headroom(skb);
1511 	BUG_ON(offset >= skb_headlen(skb));
1512 	csum = skb_checksum(skb, offset, skb->len - offset, 0);
1513 
1514 	offset += skb->csum_offset;
1515 	BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1516 
1517 	if (skb_cloned(skb) &&
1518 	    !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1519 		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1520 		if (ret)
1521 			goto out;
1522 	}
1523 
1524 	*(__sum16 *)(skb->data + offset) = csum_fold(csum);
1525 out_set_summed:
1526 	skb->ip_summed = CHECKSUM_NONE;
1527 out:
1528 	return ret;
1529 }
1530 
1531 /**
1532  *	skb_gso_segment - Perform segmentation on skb.
1533  *	@skb: buffer to segment
1534  *	@features: features for the output path (see dev->features)
1535  *
1536  *	This function segments the given skb and returns a list of segments.
1537  *
1538  *	It may return NULL if the skb requires no segmentation.  This is
1539  *	only possible when GSO is used for verifying header integrity.
1540  */
1541 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1542 {
1543 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1544 	struct packet_type *ptype;
1545 	__be16 type = skb->protocol;
1546 	int err;
1547 
1548 	skb_reset_mac_header(skb);
1549 	skb->mac_len = skb->network_header - skb->mac_header;
1550 	__skb_pull(skb, skb->mac_len);
1551 
1552 	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1553 		struct net_device *dev = skb->dev;
1554 		struct ethtool_drvinfo info = {};
1555 
1556 		if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
1557 			dev->ethtool_ops->get_drvinfo(dev, &info);
1558 
1559 		WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d "
1560 			"ip_summed=%d",
1561 		     info.driver, dev ? dev->features : 0L,
1562 		     skb->sk ? skb->sk->sk_route_caps : 0L,
1563 		     skb->len, skb->data_len, skb->ip_summed);
1564 
1565 		if (skb_header_cloned(skb) &&
1566 		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1567 			return ERR_PTR(err);
1568 	}
1569 
1570 	rcu_read_lock();
1571 	list_for_each_entry_rcu(ptype,
1572 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
1573 		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1574 			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1575 				err = ptype->gso_send_check(skb);
1576 				segs = ERR_PTR(err);
1577 				if (err || skb_gso_ok(skb, features))
1578 					break;
1579 				__skb_push(skb, (skb->data -
1580 						 skb_network_header(skb)));
1581 			}
1582 			segs = ptype->gso_segment(skb, features);
1583 			break;
1584 		}
1585 	}
1586 	rcu_read_unlock();
1587 
1588 	__skb_push(skb, skb->data - skb_mac_header(skb));
1589 
1590 	return segs;
1591 }
1592 
1593 EXPORT_SYMBOL(skb_gso_segment);
1594 
1595 /* Take action when hardware reception checksum errors are detected. */
1596 #ifdef CONFIG_BUG
1597 void netdev_rx_csum_fault(struct net_device *dev)
1598 {
1599 	if (net_ratelimit()) {
1600 		printk(KERN_ERR "%s: hw csum failure.\n",
1601 			dev ? dev->name : "<unknown>");
1602 		dump_stack();
1603 	}
1604 }
1605 EXPORT_SYMBOL(netdev_rx_csum_fault);
1606 #endif
1607 
1608 /* Actually, we should eliminate this check as soon as we know, that:
1609  * 1. IOMMU is present and allows to map all the memory.
1610  * 2. No high memory really exists on this machine.
1611  */
1612 
1613 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1614 {
1615 #ifdef CONFIG_HIGHMEM
1616 	int i;
1617 
1618 	if (dev->features & NETIF_F_HIGHDMA)
1619 		return 0;
1620 
1621 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1622 		if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1623 			return 1;
1624 
1625 #endif
1626 	return 0;
1627 }
1628 
1629 struct dev_gso_cb {
1630 	void (*destructor)(struct sk_buff *skb);
1631 };
1632 
1633 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1634 
1635 static void dev_gso_skb_destructor(struct sk_buff *skb)
1636 {
1637 	struct dev_gso_cb *cb;
1638 
1639 	do {
1640 		struct sk_buff *nskb = skb->next;
1641 
1642 		skb->next = nskb->next;
1643 		nskb->next = NULL;
1644 		kfree_skb(nskb);
1645 	} while (skb->next);
1646 
1647 	cb = DEV_GSO_CB(skb);
1648 	if (cb->destructor)
1649 		cb->destructor(skb);
1650 }
1651 
1652 /**
1653  *	dev_gso_segment - Perform emulated hardware segmentation on skb.
1654  *	@skb: buffer to segment
1655  *
1656  *	This function segments the given skb and stores the list of segments
1657  *	in skb->next.
1658  */
1659 static int dev_gso_segment(struct sk_buff *skb)
1660 {
1661 	struct net_device *dev = skb->dev;
1662 	struct sk_buff *segs;
1663 	int features = dev->features & ~(illegal_highdma(dev, skb) ?
1664 					 NETIF_F_SG : 0);
1665 
1666 	segs = skb_gso_segment(skb, features);
1667 
1668 	/* Verifying header integrity only. */
1669 	if (!segs)
1670 		return 0;
1671 
1672 	if (IS_ERR(segs))
1673 		return PTR_ERR(segs);
1674 
1675 	skb->next = segs;
1676 	DEV_GSO_CB(skb)->destructor = skb->destructor;
1677 	skb->destructor = dev_gso_skb_destructor;
1678 
1679 	return 0;
1680 }
1681 
1682 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1683 			struct netdev_queue *txq)
1684 {
1685 	const struct net_device_ops *ops = dev->netdev_ops;
1686 	int rc;
1687 
1688 	if (likely(!skb->next)) {
1689 		if (!list_empty(&ptype_all))
1690 			dev_queue_xmit_nit(skb, dev);
1691 
1692 		if (netif_needs_gso(dev, skb)) {
1693 			if (unlikely(dev_gso_segment(skb)))
1694 				goto out_kfree_skb;
1695 			if (skb->next)
1696 				goto gso;
1697 		}
1698 
1699 		/*
1700 		 * If device doesnt need skb->dst, release it right now while
1701 		 * its hot in this cpu cache
1702 		 */
1703 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
1704 			skb_dst_drop(skb);
1705 
1706 		rc = ops->ndo_start_xmit(skb, dev);
1707 		if (rc == 0)
1708 			txq_trans_update(txq);
1709 		/*
1710 		 * TODO: if skb_orphan() was called by
1711 		 * dev->hard_start_xmit() (for example, the unmodified
1712 		 * igb driver does that; bnx2 doesn't), then
1713 		 * skb_tx_software_timestamp() will be unable to send
1714 		 * back the time stamp.
1715 		 *
1716 		 * How can this be prevented? Always create another
1717 		 * reference to the socket before calling
1718 		 * dev->hard_start_xmit()? Prevent that skb_orphan()
1719 		 * does anything in dev->hard_start_xmit() by clearing
1720 		 * the skb destructor before the call and restoring it
1721 		 * afterwards, then doing the skb_orphan() ourselves?
1722 		 */
1723 		return rc;
1724 	}
1725 
1726 gso:
1727 	do {
1728 		struct sk_buff *nskb = skb->next;
1729 
1730 		skb->next = nskb->next;
1731 		nskb->next = NULL;
1732 		rc = ops->ndo_start_xmit(nskb, dev);
1733 		if (unlikely(rc)) {
1734 			nskb->next = skb->next;
1735 			skb->next = nskb;
1736 			return rc;
1737 		}
1738 		txq_trans_update(txq);
1739 		if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
1740 			return NETDEV_TX_BUSY;
1741 	} while (skb->next);
1742 
1743 	skb->destructor = DEV_GSO_CB(skb)->destructor;
1744 
1745 out_kfree_skb:
1746 	kfree_skb(skb);
1747 	return 0;
1748 }
1749 
1750 static u32 skb_tx_hashrnd;
1751 
1752 u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
1753 {
1754 	u32 hash;
1755 
1756 	if (skb_rx_queue_recorded(skb)) {
1757 		hash = skb_get_rx_queue(skb);
1758 		while (unlikely (hash >= dev->real_num_tx_queues))
1759 			hash -= dev->real_num_tx_queues;
1760 		return hash;
1761 	}
1762 
1763 	if (skb->sk && skb->sk->sk_hash)
1764 		hash = skb->sk->sk_hash;
1765 	else
1766 		hash = skb->protocol;
1767 
1768 	hash = jhash_1word(hash, skb_tx_hashrnd);
1769 
1770 	return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
1771 }
1772 EXPORT_SYMBOL(skb_tx_hash);
1773 
1774 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1775 					struct sk_buff *skb)
1776 {
1777 	const struct net_device_ops *ops = dev->netdev_ops;
1778 	u16 queue_index = 0;
1779 
1780 	if (ops->ndo_select_queue)
1781 		queue_index = ops->ndo_select_queue(dev, skb);
1782 	else if (dev->real_num_tx_queues > 1)
1783 		queue_index = skb_tx_hash(dev, skb);
1784 
1785 	skb_set_queue_mapping(skb, queue_index);
1786 	return netdev_get_tx_queue(dev, queue_index);
1787 }
1788 
1789 /**
1790  *	dev_queue_xmit - transmit a buffer
1791  *	@skb: buffer to transmit
1792  *
1793  *	Queue a buffer for transmission to a network device. The caller must
1794  *	have set the device and priority and built the buffer before calling
1795  *	this function. The function can be called from an interrupt.
1796  *
1797  *	A negative errno code is returned on a failure. A success does not
1798  *	guarantee the frame will be transmitted as it may be dropped due
1799  *	to congestion or traffic shaping.
1800  *
1801  * -----------------------------------------------------------------------------------
1802  *      I notice this method can also return errors from the queue disciplines,
1803  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1804  *      be positive.
1805  *
1806  *      Regardless of the return value, the skb is consumed, so it is currently
1807  *      difficult to retry a send to this method.  (You can bump the ref count
1808  *      before sending to hold a reference for retry if you are careful.)
1809  *
1810  *      When calling this method, interrupts MUST be enabled.  This is because
1811  *      the BH enable code must have IRQs enabled so that it will not deadlock.
1812  *          --BLG
1813  */
1814 int dev_queue_xmit(struct sk_buff *skb)
1815 {
1816 	struct net_device *dev = skb->dev;
1817 	struct netdev_queue *txq;
1818 	struct Qdisc *q;
1819 	int rc = -ENOMEM;
1820 
1821 	/* GSO will handle the following emulations directly. */
1822 	if (netif_needs_gso(dev, skb))
1823 		goto gso;
1824 
1825 	if (skb_has_frags(skb) &&
1826 	    !(dev->features & NETIF_F_FRAGLIST) &&
1827 	    __skb_linearize(skb))
1828 		goto out_kfree_skb;
1829 
1830 	/* Fragmented skb is linearized if device does not support SG,
1831 	 * or if at least one of fragments is in highmem and device
1832 	 * does not support DMA from it.
1833 	 */
1834 	if (skb_shinfo(skb)->nr_frags &&
1835 	    (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1836 	    __skb_linearize(skb))
1837 		goto out_kfree_skb;
1838 
1839 	/* If packet is not checksummed and device does not support
1840 	 * checksumming for this protocol, complete checksumming here.
1841 	 */
1842 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
1843 		skb_set_transport_header(skb, skb->csum_start -
1844 					      skb_headroom(skb));
1845 		if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
1846 			goto out_kfree_skb;
1847 	}
1848 
1849 gso:
1850 	/* Disable soft irqs for various locks below. Also
1851 	 * stops preemption for RCU.
1852 	 */
1853 	rcu_read_lock_bh();
1854 
1855 	txq = dev_pick_tx(dev, skb);
1856 	q = rcu_dereference(txq->qdisc);
1857 
1858 #ifdef CONFIG_NET_CLS_ACT
1859 	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1860 #endif
1861 	if (q->enqueue) {
1862 		spinlock_t *root_lock = qdisc_lock(q);
1863 
1864 		spin_lock(root_lock);
1865 
1866 		if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
1867 			kfree_skb(skb);
1868 			rc = NET_XMIT_DROP;
1869 		} else {
1870 			rc = qdisc_enqueue_root(skb, q);
1871 			qdisc_run(q);
1872 		}
1873 		spin_unlock(root_lock);
1874 
1875 		goto out;
1876 	}
1877 
1878 	/* The device has no queue. Common case for software devices:
1879 	   loopback, all the sorts of tunnels...
1880 
1881 	   Really, it is unlikely that netif_tx_lock protection is necessary
1882 	   here.  (f.e. loopback and IP tunnels are clean ignoring statistics
1883 	   counters.)
1884 	   However, it is possible, that they rely on protection
1885 	   made by us here.
1886 
1887 	   Check this and shot the lock. It is not prone from deadlocks.
1888 	   Either shot noqueue qdisc, it is even simpler 8)
1889 	 */
1890 	if (dev->flags & IFF_UP) {
1891 		int cpu = smp_processor_id(); /* ok because BHs are off */
1892 
1893 		if (txq->xmit_lock_owner != cpu) {
1894 
1895 			HARD_TX_LOCK(dev, txq, cpu);
1896 
1897 			if (!netif_tx_queue_stopped(txq)) {
1898 				rc = 0;
1899 				if (!dev_hard_start_xmit(skb, dev, txq)) {
1900 					HARD_TX_UNLOCK(dev, txq);
1901 					goto out;
1902 				}
1903 			}
1904 			HARD_TX_UNLOCK(dev, txq);
1905 			if (net_ratelimit())
1906 				printk(KERN_CRIT "Virtual device %s asks to "
1907 				       "queue packet!\n", dev->name);
1908 		} else {
1909 			/* Recursion is detected! It is possible,
1910 			 * unfortunately */
1911 			if (net_ratelimit())
1912 				printk(KERN_CRIT "Dead loop on virtual device "
1913 				       "%s, fix it urgently!\n", dev->name);
1914 		}
1915 	}
1916 
1917 	rc = -ENETDOWN;
1918 	rcu_read_unlock_bh();
1919 
1920 out_kfree_skb:
1921 	kfree_skb(skb);
1922 	return rc;
1923 out:
1924 	rcu_read_unlock_bh();
1925 	return rc;
1926 }
1927 
1928 
1929 /*=======================================================================
1930 			Receiver routines
1931   =======================================================================*/
1932 
1933 int netdev_max_backlog __read_mostly = 1000;
1934 int netdev_budget __read_mostly = 300;
1935 int weight_p __read_mostly = 64;            /* old backlog weight */
1936 
1937 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1938 
1939 
1940 /**
1941  *	netif_rx	-	post buffer to the network code
1942  *	@skb: buffer to post
1943  *
1944  *	This function receives a packet from a device driver and queues it for
1945  *	the upper (protocol) levels to process.  It always succeeds. The buffer
1946  *	may be dropped during processing for congestion control or by the
1947  *	protocol layers.
1948  *
1949  *	return values:
1950  *	NET_RX_SUCCESS	(no congestion)
1951  *	NET_RX_DROP     (packet was dropped)
1952  *
1953  */
1954 
1955 int netif_rx(struct sk_buff *skb)
1956 {
1957 	struct softnet_data *queue;
1958 	unsigned long flags;
1959 
1960 	/* if netpoll wants it, pretend we never saw it */
1961 	if (netpoll_rx(skb))
1962 		return NET_RX_DROP;
1963 
1964 	if (!skb->tstamp.tv64)
1965 		net_timestamp(skb);
1966 
1967 	/*
1968 	 * The code is rearranged so that the path is the most
1969 	 * short when CPU is congested, but is still operating.
1970 	 */
1971 	local_irq_save(flags);
1972 	queue = &__get_cpu_var(softnet_data);
1973 
1974 	__get_cpu_var(netdev_rx_stat).total++;
1975 	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1976 		if (queue->input_pkt_queue.qlen) {
1977 enqueue:
1978 			__skb_queue_tail(&queue->input_pkt_queue, skb);
1979 			local_irq_restore(flags);
1980 			return NET_RX_SUCCESS;
1981 		}
1982 
1983 		napi_schedule(&queue->backlog);
1984 		goto enqueue;
1985 	}
1986 
1987 	__get_cpu_var(netdev_rx_stat).dropped++;
1988 	local_irq_restore(flags);
1989 
1990 	kfree_skb(skb);
1991 	return NET_RX_DROP;
1992 }
1993 
1994 int netif_rx_ni(struct sk_buff *skb)
1995 {
1996 	int err;
1997 
1998 	preempt_disable();
1999 	err = netif_rx(skb);
2000 	if (local_softirq_pending())
2001 		do_softirq();
2002 	preempt_enable();
2003 
2004 	return err;
2005 }
2006 
2007 EXPORT_SYMBOL(netif_rx_ni);
2008 
2009 static void net_tx_action(struct softirq_action *h)
2010 {
2011 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
2012 
2013 	if (sd->completion_queue) {
2014 		struct sk_buff *clist;
2015 
2016 		local_irq_disable();
2017 		clist = sd->completion_queue;
2018 		sd->completion_queue = NULL;
2019 		local_irq_enable();
2020 
2021 		while (clist) {
2022 			struct sk_buff *skb = clist;
2023 			clist = clist->next;
2024 
2025 			WARN_ON(atomic_read(&skb->users));
2026 			__kfree_skb(skb);
2027 		}
2028 	}
2029 
2030 	if (sd->output_queue) {
2031 		struct Qdisc *head;
2032 
2033 		local_irq_disable();
2034 		head = sd->output_queue;
2035 		sd->output_queue = NULL;
2036 		local_irq_enable();
2037 
2038 		while (head) {
2039 			struct Qdisc *q = head;
2040 			spinlock_t *root_lock;
2041 
2042 			head = head->next_sched;
2043 
2044 			root_lock = qdisc_lock(q);
2045 			if (spin_trylock(root_lock)) {
2046 				smp_mb__before_clear_bit();
2047 				clear_bit(__QDISC_STATE_SCHED,
2048 					  &q->state);
2049 				qdisc_run(q);
2050 				spin_unlock(root_lock);
2051 			} else {
2052 				if (!test_bit(__QDISC_STATE_DEACTIVATED,
2053 					      &q->state)) {
2054 					__netif_reschedule(q);
2055 				} else {
2056 					smp_mb__before_clear_bit();
2057 					clear_bit(__QDISC_STATE_SCHED,
2058 						  &q->state);
2059 				}
2060 			}
2061 		}
2062 	}
2063 }
2064 
2065 static inline int deliver_skb(struct sk_buff *skb,
2066 			      struct packet_type *pt_prev,
2067 			      struct net_device *orig_dev)
2068 {
2069 	atomic_inc(&skb->users);
2070 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2071 }
2072 
2073 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
2074 
2075 #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
2076 /* This hook is defined here for ATM LANE */
2077 int (*br_fdb_test_addr_hook)(struct net_device *dev,
2078 			     unsigned char *addr) __read_mostly;
2079 EXPORT_SYMBOL(br_fdb_test_addr_hook);
2080 #endif
2081 
2082 /*
2083  * If bridge module is loaded call bridging hook.
2084  *  returns NULL if packet was consumed.
2085  */
2086 struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2087 					struct sk_buff *skb) __read_mostly;
2088 EXPORT_SYMBOL(br_handle_frame_hook);
2089 
2090 static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2091 					    struct packet_type **pt_prev, int *ret,
2092 					    struct net_device *orig_dev)
2093 {
2094 	struct net_bridge_port *port;
2095 
2096 	if (skb->pkt_type == PACKET_LOOPBACK ||
2097 	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
2098 		return skb;
2099 
2100 	if (*pt_prev) {
2101 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2102 		*pt_prev = NULL;
2103 	}
2104 
2105 	return br_handle_frame_hook(port, skb);
2106 }
2107 #else
2108 #define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
2109 #endif
2110 
2111 #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2112 struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2113 EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2114 
2115 static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2116 					     struct packet_type **pt_prev,
2117 					     int *ret,
2118 					     struct net_device *orig_dev)
2119 {
2120 	if (skb->dev->macvlan_port == NULL)
2121 		return skb;
2122 
2123 	if (*pt_prev) {
2124 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2125 		*pt_prev = NULL;
2126 	}
2127 	return macvlan_handle_frame_hook(skb);
2128 }
2129 #else
2130 #define handle_macvlan(skb, pt_prev, ret, orig_dev)	(skb)
2131 #endif
2132 
2133 #ifdef CONFIG_NET_CLS_ACT
2134 /* TODO: Maybe we should just force sch_ingress to be compiled in
2135  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2136  * a compare and 2 stores extra right now if we dont have it on
2137  * but have CONFIG_NET_CLS_ACT
2138  * NOTE: This doesnt stop any functionality; if you dont have
2139  * the ingress scheduler, you just cant add policies on ingress.
2140  *
2141  */
2142 static int ing_filter(struct sk_buff *skb)
2143 {
2144 	struct net_device *dev = skb->dev;
2145 	u32 ttl = G_TC_RTTL(skb->tc_verd);
2146 	struct netdev_queue *rxq;
2147 	int result = TC_ACT_OK;
2148 	struct Qdisc *q;
2149 
2150 	if (MAX_RED_LOOP < ttl++) {
2151 		printk(KERN_WARNING
2152 		       "Redir loop detected Dropping packet (%d->%d)\n",
2153 		       skb->iif, dev->ifindex);
2154 		return TC_ACT_SHOT;
2155 	}
2156 
2157 	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2158 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2159 
2160 	rxq = &dev->rx_queue;
2161 
2162 	q = rxq->qdisc;
2163 	if (q != &noop_qdisc) {
2164 		spin_lock(qdisc_lock(q));
2165 		if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2166 			result = qdisc_enqueue_root(skb, q);
2167 		spin_unlock(qdisc_lock(q));
2168 	}
2169 
2170 	return result;
2171 }
2172 
2173 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2174 					 struct packet_type **pt_prev,
2175 					 int *ret, struct net_device *orig_dev)
2176 {
2177 	if (skb->dev->rx_queue.qdisc == &noop_qdisc)
2178 		goto out;
2179 
2180 	if (*pt_prev) {
2181 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2182 		*pt_prev = NULL;
2183 	} else {
2184 		/* Huh? Why does turning on AF_PACKET affect this? */
2185 		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2186 	}
2187 
2188 	switch (ing_filter(skb)) {
2189 	case TC_ACT_SHOT:
2190 	case TC_ACT_STOLEN:
2191 		kfree_skb(skb);
2192 		return NULL;
2193 	}
2194 
2195 out:
2196 	skb->tc_verd = 0;
2197 	return skb;
2198 }
2199 #endif
2200 
2201 /*
2202  * 	netif_nit_deliver - deliver received packets to network taps
2203  * 	@skb: buffer
2204  *
2205  * 	This function is used to deliver incoming packets to network
2206  * 	taps. It should be used when the normal netif_receive_skb path
2207  * 	is bypassed, for example because of VLAN acceleration.
2208  */
2209 void netif_nit_deliver(struct sk_buff *skb)
2210 {
2211 	struct packet_type *ptype;
2212 
2213 	if (list_empty(&ptype_all))
2214 		return;
2215 
2216 	skb_reset_network_header(skb);
2217 	skb_reset_transport_header(skb);
2218 	skb->mac_len = skb->network_header - skb->mac_header;
2219 
2220 	rcu_read_lock();
2221 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
2222 		if (!ptype->dev || ptype->dev == skb->dev)
2223 			deliver_skb(skb, ptype, skb->dev);
2224 	}
2225 	rcu_read_unlock();
2226 }
2227 
2228 /**
2229  *	netif_receive_skb - process receive buffer from network
2230  *	@skb: buffer to process
2231  *
2232  *	netif_receive_skb() is the main receive data processing function.
2233  *	It always succeeds. The buffer may be dropped during processing
2234  *	for congestion control or by the protocol layers.
2235  *
2236  *	This function may only be called from softirq context and interrupts
2237  *	should be enabled.
2238  *
2239  *	Return values (usually ignored):
2240  *	NET_RX_SUCCESS: no congestion
2241  *	NET_RX_DROP: packet was dropped
2242  */
2243 int netif_receive_skb(struct sk_buff *skb)
2244 {
2245 	struct packet_type *ptype, *pt_prev;
2246 	struct net_device *orig_dev;
2247 	struct net_device *null_or_orig;
2248 	int ret = NET_RX_DROP;
2249 	__be16 type;
2250 
2251 	if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
2252 		return NET_RX_SUCCESS;
2253 
2254 	/* if we've gotten here through NAPI, check netpoll */
2255 	if (netpoll_receive_skb(skb))
2256 		return NET_RX_DROP;
2257 
2258 	if (!skb->tstamp.tv64)
2259 		net_timestamp(skb);
2260 
2261 	if (!skb->iif)
2262 		skb->iif = skb->dev->ifindex;
2263 
2264 	null_or_orig = NULL;
2265 	orig_dev = skb->dev;
2266 	if (orig_dev->master) {
2267 		if (skb_bond_should_drop(skb))
2268 			null_or_orig = orig_dev; /* deliver only exact match */
2269 		else
2270 			skb->dev = orig_dev->master;
2271 	}
2272 
2273 	__get_cpu_var(netdev_rx_stat).total++;
2274 
2275 	skb_reset_network_header(skb);
2276 	skb_reset_transport_header(skb);
2277 	skb->mac_len = skb->network_header - skb->mac_header;
2278 
2279 	pt_prev = NULL;
2280 
2281 	rcu_read_lock();
2282 
2283 #ifdef CONFIG_NET_CLS_ACT
2284 	if (skb->tc_verd & TC_NCLS) {
2285 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2286 		goto ncls;
2287 	}
2288 #endif
2289 
2290 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
2291 		if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2292 		    ptype->dev == orig_dev) {
2293 			if (pt_prev)
2294 				ret = deliver_skb(skb, pt_prev, orig_dev);
2295 			pt_prev = ptype;
2296 		}
2297 	}
2298 
2299 #ifdef CONFIG_NET_CLS_ACT
2300 	skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2301 	if (!skb)
2302 		goto out;
2303 ncls:
2304 #endif
2305 
2306 	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2307 	if (!skb)
2308 		goto out;
2309 	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2310 	if (!skb)
2311 		goto out;
2312 
2313 	type = skb->protocol;
2314 	list_for_each_entry_rcu(ptype,
2315 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2316 		if (ptype->type == type &&
2317 		    (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2318 		     ptype->dev == orig_dev)) {
2319 			if (pt_prev)
2320 				ret = deliver_skb(skb, pt_prev, orig_dev);
2321 			pt_prev = ptype;
2322 		}
2323 	}
2324 
2325 	if (pt_prev) {
2326 		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2327 	} else {
2328 		kfree_skb(skb);
2329 		/* Jamal, now you will not able to escape explaining
2330 		 * me how you were going to use this. :-)
2331 		 */
2332 		ret = NET_RX_DROP;
2333 	}
2334 
2335 out:
2336 	rcu_read_unlock();
2337 	return ret;
2338 }
2339 
2340 /* Network device is going away, flush any packets still pending  */
2341 static void flush_backlog(void *arg)
2342 {
2343 	struct net_device *dev = arg;
2344 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
2345 	struct sk_buff *skb, *tmp;
2346 
2347 	skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
2348 		if (skb->dev == dev) {
2349 			__skb_unlink(skb, &queue->input_pkt_queue);
2350 			kfree_skb(skb);
2351 		}
2352 }
2353 
2354 static int napi_gro_complete(struct sk_buff *skb)
2355 {
2356 	struct packet_type *ptype;
2357 	__be16 type = skb->protocol;
2358 	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2359 	int err = -ENOENT;
2360 
2361 	if (NAPI_GRO_CB(skb)->count == 1) {
2362 		skb_shinfo(skb)->gso_size = 0;
2363 		goto out;
2364 	}
2365 
2366 	rcu_read_lock();
2367 	list_for_each_entry_rcu(ptype, head, list) {
2368 		if (ptype->type != type || ptype->dev || !ptype->gro_complete)
2369 			continue;
2370 
2371 		err = ptype->gro_complete(skb);
2372 		break;
2373 	}
2374 	rcu_read_unlock();
2375 
2376 	if (err) {
2377 		WARN_ON(&ptype->list == head);
2378 		kfree_skb(skb);
2379 		return NET_RX_SUCCESS;
2380 	}
2381 
2382 out:
2383 	return netif_receive_skb(skb);
2384 }
2385 
2386 void napi_gro_flush(struct napi_struct *napi)
2387 {
2388 	struct sk_buff *skb, *next;
2389 
2390 	for (skb = napi->gro_list; skb; skb = next) {
2391 		next = skb->next;
2392 		skb->next = NULL;
2393 		napi_gro_complete(skb);
2394 	}
2395 
2396 	napi->gro_count = 0;
2397 	napi->gro_list = NULL;
2398 }
2399 EXPORT_SYMBOL(napi_gro_flush);
2400 
2401 int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2402 {
2403 	struct sk_buff **pp = NULL;
2404 	struct packet_type *ptype;
2405 	__be16 type = skb->protocol;
2406 	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2407 	int same_flow;
2408 	int mac_len;
2409 	int ret;
2410 
2411 	if (!(skb->dev->features & NETIF_F_GRO))
2412 		goto normal;
2413 
2414 	if (skb_is_gso(skb) || skb_has_frags(skb))
2415 		goto normal;
2416 
2417 	rcu_read_lock();
2418 	list_for_each_entry_rcu(ptype, head, list) {
2419 		if (ptype->type != type || ptype->dev || !ptype->gro_receive)
2420 			continue;
2421 
2422 		skb_set_network_header(skb, skb_gro_offset(skb));
2423 		mac_len = skb->network_header - skb->mac_header;
2424 		skb->mac_len = mac_len;
2425 		NAPI_GRO_CB(skb)->same_flow = 0;
2426 		NAPI_GRO_CB(skb)->flush = 0;
2427 		NAPI_GRO_CB(skb)->free = 0;
2428 
2429 		pp = ptype->gro_receive(&napi->gro_list, skb);
2430 		break;
2431 	}
2432 	rcu_read_unlock();
2433 
2434 	if (&ptype->list == head)
2435 		goto normal;
2436 
2437 	same_flow = NAPI_GRO_CB(skb)->same_flow;
2438 	ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
2439 
2440 	if (pp) {
2441 		struct sk_buff *nskb = *pp;
2442 
2443 		*pp = nskb->next;
2444 		nskb->next = NULL;
2445 		napi_gro_complete(nskb);
2446 		napi->gro_count--;
2447 	}
2448 
2449 	if (same_flow)
2450 		goto ok;
2451 
2452 	if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
2453 		goto normal;
2454 
2455 	napi->gro_count++;
2456 	NAPI_GRO_CB(skb)->count = 1;
2457 	skb_shinfo(skb)->gso_size = skb_gro_len(skb);
2458 	skb->next = napi->gro_list;
2459 	napi->gro_list = skb;
2460 	ret = GRO_HELD;
2461 
2462 pull:
2463 	if (skb_headlen(skb) < skb_gro_offset(skb)) {
2464 		int grow = skb_gro_offset(skb) - skb_headlen(skb);
2465 
2466 		BUG_ON(skb->end - skb->tail < grow);
2467 
2468 		memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
2469 
2470 		skb->tail += grow;
2471 		skb->data_len -= grow;
2472 
2473 		skb_shinfo(skb)->frags[0].page_offset += grow;
2474 		skb_shinfo(skb)->frags[0].size -= grow;
2475 
2476 		if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
2477 			put_page(skb_shinfo(skb)->frags[0].page);
2478 			memmove(skb_shinfo(skb)->frags,
2479 				skb_shinfo(skb)->frags + 1,
2480 				--skb_shinfo(skb)->nr_frags);
2481 		}
2482 	}
2483 
2484 ok:
2485 	return ret;
2486 
2487 normal:
2488 	ret = GRO_NORMAL;
2489 	goto pull;
2490 }
2491 EXPORT_SYMBOL(dev_gro_receive);
2492 
2493 static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2494 {
2495 	struct sk_buff *p;
2496 
2497 	if (netpoll_rx_on(skb))
2498 		return GRO_NORMAL;
2499 
2500 	for (p = napi->gro_list; p; p = p->next) {
2501 		NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev)
2502 			&& !compare_ether_header(skb_mac_header(p),
2503 						 skb_gro_mac_header(skb));
2504 		NAPI_GRO_CB(p)->flush = 0;
2505 	}
2506 
2507 	return dev_gro_receive(napi, skb);
2508 }
2509 
2510 int napi_skb_finish(int ret, struct sk_buff *skb)
2511 {
2512 	int err = NET_RX_SUCCESS;
2513 
2514 	switch (ret) {
2515 	case GRO_NORMAL:
2516 		return netif_receive_skb(skb);
2517 
2518 	case GRO_DROP:
2519 		err = NET_RX_DROP;
2520 		/* fall through */
2521 
2522 	case GRO_MERGED_FREE:
2523 		kfree_skb(skb);
2524 		break;
2525 	}
2526 
2527 	return err;
2528 }
2529 EXPORT_SYMBOL(napi_skb_finish);
2530 
2531 void skb_gro_reset_offset(struct sk_buff *skb)
2532 {
2533 	NAPI_GRO_CB(skb)->data_offset = 0;
2534 	NAPI_GRO_CB(skb)->frag0 = NULL;
2535 	NAPI_GRO_CB(skb)->frag0_len = 0;
2536 
2537 	if (skb->mac_header == skb->tail &&
2538 	    !PageHighMem(skb_shinfo(skb)->frags[0].page)) {
2539 		NAPI_GRO_CB(skb)->frag0 =
2540 			page_address(skb_shinfo(skb)->frags[0].page) +
2541 			skb_shinfo(skb)->frags[0].page_offset;
2542 		NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size;
2543 	}
2544 }
2545 EXPORT_SYMBOL(skb_gro_reset_offset);
2546 
2547 int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2548 {
2549 	skb_gro_reset_offset(skb);
2550 
2551 	return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
2552 }
2553 EXPORT_SYMBOL(napi_gro_receive);
2554 
2555 void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
2556 {
2557 	__skb_pull(skb, skb_headlen(skb));
2558 	skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
2559 
2560 	napi->skb = skb;
2561 }
2562 EXPORT_SYMBOL(napi_reuse_skb);
2563 
2564 struct sk_buff *napi_get_frags(struct napi_struct *napi)
2565 {
2566 	struct net_device *dev = napi->dev;
2567 	struct sk_buff *skb = napi->skb;
2568 
2569 	if (!skb) {
2570 		skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN);
2571 		if (!skb)
2572 			goto out;
2573 
2574 		skb_reserve(skb, NET_IP_ALIGN);
2575 
2576 		napi->skb = skb;
2577 	}
2578 
2579 out:
2580 	return skb;
2581 }
2582 EXPORT_SYMBOL(napi_get_frags);
2583 
2584 int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
2585 {
2586 	int err = NET_RX_SUCCESS;
2587 
2588 	switch (ret) {
2589 	case GRO_NORMAL:
2590 	case GRO_HELD:
2591 		skb->protocol = eth_type_trans(skb, napi->dev);
2592 
2593 		if (ret == GRO_NORMAL)
2594 			return netif_receive_skb(skb);
2595 
2596 		skb_gro_pull(skb, -ETH_HLEN);
2597 		break;
2598 
2599 	case GRO_DROP:
2600 		err = NET_RX_DROP;
2601 		/* fall through */
2602 
2603 	case GRO_MERGED_FREE:
2604 		napi_reuse_skb(napi, skb);
2605 		break;
2606 	}
2607 
2608 	return err;
2609 }
2610 EXPORT_SYMBOL(napi_frags_finish);
2611 
2612 struct sk_buff *napi_frags_skb(struct napi_struct *napi)
2613 {
2614 	struct sk_buff *skb = napi->skb;
2615 	struct ethhdr *eth;
2616 	unsigned int hlen;
2617 	unsigned int off;
2618 
2619 	napi->skb = NULL;
2620 
2621 	skb_reset_mac_header(skb);
2622 	skb_gro_reset_offset(skb);
2623 
2624 	off = skb_gro_offset(skb);
2625 	hlen = off + sizeof(*eth);
2626 	eth = skb_gro_header_fast(skb, off);
2627 	if (skb_gro_header_hard(skb, hlen)) {
2628 		eth = skb_gro_header_slow(skb, hlen, off);
2629 		if (unlikely(!eth)) {
2630 			napi_reuse_skb(napi, skb);
2631 			skb = NULL;
2632 			goto out;
2633 		}
2634 	}
2635 
2636 	skb_gro_pull(skb, sizeof(*eth));
2637 
2638 	/*
2639 	 * This works because the only protocols we care about don't require
2640 	 * special handling.  We'll fix it up properly at the end.
2641 	 */
2642 	skb->protocol = eth->h_proto;
2643 
2644 out:
2645 	return skb;
2646 }
2647 EXPORT_SYMBOL(napi_frags_skb);
2648 
2649 int napi_gro_frags(struct napi_struct *napi)
2650 {
2651 	struct sk_buff *skb = napi_frags_skb(napi);
2652 
2653 	if (!skb)
2654 		return NET_RX_DROP;
2655 
2656 	return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
2657 }
2658 EXPORT_SYMBOL(napi_gro_frags);
2659 
2660 static int process_backlog(struct napi_struct *napi, int quota)
2661 {
2662 	int work = 0;
2663 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
2664 	unsigned long start_time = jiffies;
2665 
2666 	napi->weight = weight_p;
2667 	do {
2668 		struct sk_buff *skb;
2669 
2670 		local_irq_disable();
2671 		skb = __skb_dequeue(&queue->input_pkt_queue);
2672 		if (!skb) {
2673 			__napi_complete(napi);
2674 			local_irq_enable();
2675 			break;
2676 		}
2677 		local_irq_enable();
2678 
2679 		netif_receive_skb(skb);
2680 	} while (++work < quota && jiffies == start_time);
2681 
2682 	return work;
2683 }
2684 
2685 /**
2686  * __napi_schedule - schedule for receive
2687  * @n: entry to schedule
2688  *
2689  * The entry's receive function will be scheduled to run
2690  */
2691 void __napi_schedule(struct napi_struct *n)
2692 {
2693 	unsigned long flags;
2694 
2695 	local_irq_save(flags);
2696 	list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2697 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2698 	local_irq_restore(flags);
2699 }
2700 EXPORT_SYMBOL(__napi_schedule);
2701 
2702 void __napi_complete(struct napi_struct *n)
2703 {
2704 	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
2705 	BUG_ON(n->gro_list);
2706 
2707 	list_del(&n->poll_list);
2708 	smp_mb__before_clear_bit();
2709 	clear_bit(NAPI_STATE_SCHED, &n->state);
2710 }
2711 EXPORT_SYMBOL(__napi_complete);
2712 
2713 void napi_complete(struct napi_struct *n)
2714 {
2715 	unsigned long flags;
2716 
2717 	/*
2718 	 * don't let napi dequeue from the cpu poll list
2719 	 * just in case its running on a different cpu
2720 	 */
2721 	if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
2722 		return;
2723 
2724 	napi_gro_flush(n);
2725 	local_irq_save(flags);
2726 	__napi_complete(n);
2727 	local_irq_restore(flags);
2728 }
2729 EXPORT_SYMBOL(napi_complete);
2730 
2731 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
2732 		    int (*poll)(struct napi_struct *, int), int weight)
2733 {
2734 	INIT_LIST_HEAD(&napi->poll_list);
2735 	napi->gro_count = 0;
2736 	napi->gro_list = NULL;
2737 	napi->skb = NULL;
2738 	napi->poll = poll;
2739 	napi->weight = weight;
2740 	list_add(&napi->dev_list, &dev->napi_list);
2741 	napi->dev = dev;
2742 #ifdef CONFIG_NETPOLL
2743 	spin_lock_init(&napi->poll_lock);
2744 	napi->poll_owner = -1;
2745 #endif
2746 	set_bit(NAPI_STATE_SCHED, &napi->state);
2747 }
2748 EXPORT_SYMBOL(netif_napi_add);
2749 
2750 void netif_napi_del(struct napi_struct *napi)
2751 {
2752 	struct sk_buff *skb, *next;
2753 
2754 	list_del_init(&napi->dev_list);
2755 	napi_free_frags(napi);
2756 
2757 	for (skb = napi->gro_list; skb; skb = next) {
2758 		next = skb->next;
2759 		skb->next = NULL;
2760 		kfree_skb(skb);
2761 	}
2762 
2763 	napi->gro_list = NULL;
2764 	napi->gro_count = 0;
2765 }
2766 EXPORT_SYMBOL(netif_napi_del);
2767 
2768 
2769 static void net_rx_action(struct softirq_action *h)
2770 {
2771 	struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
2772 	unsigned long time_limit = jiffies + 2;
2773 	int budget = netdev_budget;
2774 	void *have;
2775 
2776 	local_irq_disable();
2777 
2778 	while (!list_empty(list)) {
2779 		struct napi_struct *n;
2780 		int work, weight;
2781 
2782 		/* If softirq window is exhuasted then punt.
2783 		 * Allow this to run for 2 jiffies since which will allow
2784 		 * an average latency of 1.5/HZ.
2785 		 */
2786 		if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
2787 			goto softnet_break;
2788 
2789 		local_irq_enable();
2790 
2791 		/* Even though interrupts have been re-enabled, this
2792 		 * access is safe because interrupts can only add new
2793 		 * entries to the tail of this list, and only ->poll()
2794 		 * calls can remove this head entry from the list.
2795 		 */
2796 		n = list_entry(list->next, struct napi_struct, poll_list);
2797 
2798 		have = netpoll_poll_lock(n);
2799 
2800 		weight = n->weight;
2801 
2802 		/* This NAPI_STATE_SCHED test is for avoiding a race
2803 		 * with netpoll's poll_napi().  Only the entity which
2804 		 * obtains the lock and sees NAPI_STATE_SCHED set will
2805 		 * actually make the ->poll() call.  Therefore we avoid
2806 		 * accidently calling ->poll() when NAPI is not scheduled.
2807 		 */
2808 		work = 0;
2809 		if (test_bit(NAPI_STATE_SCHED, &n->state)) {
2810 			work = n->poll(n, weight);
2811 			trace_napi_poll(n);
2812 		}
2813 
2814 		WARN_ON_ONCE(work > weight);
2815 
2816 		budget -= work;
2817 
2818 		local_irq_disable();
2819 
2820 		/* Drivers must not modify the NAPI state if they
2821 		 * consume the entire weight.  In such cases this code
2822 		 * still "owns" the NAPI instance and therefore can
2823 		 * move the instance around on the list at-will.
2824 		 */
2825 		if (unlikely(work == weight)) {
2826 			if (unlikely(napi_disable_pending(n))) {
2827 				local_irq_enable();
2828 				napi_complete(n);
2829 				local_irq_disable();
2830 			} else
2831 				list_move_tail(&n->poll_list, list);
2832 		}
2833 
2834 		netpoll_poll_unlock(have);
2835 	}
2836 out:
2837 	local_irq_enable();
2838 
2839 #ifdef CONFIG_NET_DMA
2840 	/*
2841 	 * There may not be any more sk_buffs coming right now, so push
2842 	 * any pending DMA copies to hardware
2843 	 */
2844 	dma_issue_pending_all();
2845 #endif
2846 
2847 	return;
2848 
2849 softnet_break:
2850 	__get_cpu_var(netdev_rx_stat).time_squeeze++;
2851 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2852 	goto out;
2853 }
2854 
2855 static gifconf_func_t * gifconf_list [NPROTO];
2856 
2857 /**
2858  *	register_gifconf	-	register a SIOCGIF handler
2859  *	@family: Address family
2860  *	@gifconf: Function handler
2861  *
2862  *	Register protocol dependent address dumping routines. The handler
2863  *	that is passed must not be freed or reused until it has been replaced
2864  *	by another handler.
2865  */
2866 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2867 {
2868 	if (family >= NPROTO)
2869 		return -EINVAL;
2870 	gifconf_list[family] = gifconf;
2871 	return 0;
2872 }
2873 
2874 
2875 /*
2876  *	Map an interface index to its name (SIOCGIFNAME)
2877  */
2878 
2879 /*
2880  *	We need this ioctl for efficient implementation of the
2881  *	if_indextoname() function required by the IPv6 API.  Without
2882  *	it, we would have to search all the interfaces to find a
2883  *	match.  --pb
2884  */
2885 
2886 static int dev_ifname(struct net *net, struct ifreq __user *arg)
2887 {
2888 	struct net_device *dev;
2889 	struct ifreq ifr;
2890 
2891 	/*
2892 	 *	Fetch the caller's info block.
2893 	 */
2894 
2895 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2896 		return -EFAULT;
2897 
2898 	read_lock(&dev_base_lock);
2899 	dev = __dev_get_by_index(net, ifr.ifr_ifindex);
2900 	if (!dev) {
2901 		read_unlock(&dev_base_lock);
2902 		return -ENODEV;
2903 	}
2904 
2905 	strcpy(ifr.ifr_name, dev->name);
2906 	read_unlock(&dev_base_lock);
2907 
2908 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2909 		return -EFAULT;
2910 	return 0;
2911 }
2912 
2913 /*
2914  *	Perform a SIOCGIFCONF call. This structure will change
2915  *	size eventually, and there is nothing I can do about it.
2916  *	Thus we will need a 'compatibility mode'.
2917  */
2918 
2919 static int dev_ifconf(struct net *net, char __user *arg)
2920 {
2921 	struct ifconf ifc;
2922 	struct net_device *dev;
2923 	char __user *pos;
2924 	int len;
2925 	int total;
2926 	int i;
2927 
2928 	/*
2929 	 *	Fetch the caller's info block.
2930 	 */
2931 
2932 	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2933 		return -EFAULT;
2934 
2935 	pos = ifc.ifc_buf;
2936 	len = ifc.ifc_len;
2937 
2938 	/*
2939 	 *	Loop over the interfaces, and write an info block for each.
2940 	 */
2941 
2942 	total = 0;
2943 	for_each_netdev(net, dev) {
2944 		for (i = 0; i < NPROTO; i++) {
2945 			if (gifconf_list[i]) {
2946 				int done;
2947 				if (!pos)
2948 					done = gifconf_list[i](dev, NULL, 0);
2949 				else
2950 					done = gifconf_list[i](dev, pos + total,
2951 							       len - total);
2952 				if (done < 0)
2953 					return -EFAULT;
2954 				total += done;
2955 			}
2956 		}
2957 	}
2958 
2959 	/*
2960 	 *	All done.  Write the updated control block back to the caller.
2961 	 */
2962 	ifc.ifc_len = total;
2963 
2964 	/*
2965 	 * 	Both BSD and Solaris return 0 here, so we do too.
2966 	 */
2967 	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2968 }
2969 
2970 #ifdef CONFIG_PROC_FS
2971 /*
2972  *	This is invoked by the /proc filesystem handler to display a device
2973  *	in detail.
2974  */
2975 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2976 	__acquires(dev_base_lock)
2977 {
2978 	struct net *net = seq_file_net(seq);
2979 	loff_t off;
2980 	struct net_device *dev;
2981 
2982 	read_lock(&dev_base_lock);
2983 	if (!*pos)
2984 		return SEQ_START_TOKEN;
2985 
2986 	off = 1;
2987 	for_each_netdev(net, dev)
2988 		if (off++ == *pos)
2989 			return dev;
2990 
2991 	return NULL;
2992 }
2993 
2994 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2995 {
2996 	struct net *net = seq_file_net(seq);
2997 	++*pos;
2998 	return v == SEQ_START_TOKEN ?
2999 		first_net_device(net) : next_net_device((struct net_device *)v);
3000 }
3001 
3002 void dev_seq_stop(struct seq_file *seq, void *v)
3003 	__releases(dev_base_lock)
3004 {
3005 	read_unlock(&dev_base_lock);
3006 }
3007 
3008 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
3009 {
3010 	const struct net_device_stats *stats = dev_get_stats(dev);
3011 
3012 	seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
3013 		   "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
3014 		   dev->name, stats->rx_bytes, stats->rx_packets,
3015 		   stats->rx_errors,
3016 		   stats->rx_dropped + stats->rx_missed_errors,
3017 		   stats->rx_fifo_errors,
3018 		   stats->rx_length_errors + stats->rx_over_errors +
3019 		    stats->rx_crc_errors + stats->rx_frame_errors,
3020 		   stats->rx_compressed, stats->multicast,
3021 		   stats->tx_bytes, stats->tx_packets,
3022 		   stats->tx_errors, stats->tx_dropped,
3023 		   stats->tx_fifo_errors, stats->collisions,
3024 		   stats->tx_carrier_errors +
3025 		    stats->tx_aborted_errors +
3026 		    stats->tx_window_errors +
3027 		    stats->tx_heartbeat_errors,
3028 		   stats->tx_compressed);
3029 }
3030 
3031 /*
3032  *	Called from the PROCfs module. This now uses the new arbitrary sized
3033  *	/proc/net interface to create /proc/net/dev
3034  */
3035 static int dev_seq_show(struct seq_file *seq, void *v)
3036 {
3037 	if (v == SEQ_START_TOKEN)
3038 		seq_puts(seq, "Inter-|   Receive                            "
3039 			      "                    |  Transmit\n"
3040 			      " face |bytes    packets errs drop fifo frame "
3041 			      "compressed multicast|bytes    packets errs "
3042 			      "drop fifo colls carrier compressed\n");
3043 	else
3044 		dev_seq_printf_stats(seq, v);
3045 	return 0;
3046 }
3047 
3048 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
3049 {
3050 	struct netif_rx_stats *rc = NULL;
3051 
3052 	while (*pos < nr_cpu_ids)
3053 		if (cpu_online(*pos)) {
3054 			rc = &per_cpu(netdev_rx_stat, *pos);
3055 			break;
3056 		} else
3057 			++*pos;
3058 	return rc;
3059 }
3060 
3061 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
3062 {
3063 	return softnet_get_online(pos);
3064 }
3065 
3066 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3067 {
3068 	++*pos;
3069 	return softnet_get_online(pos);
3070 }
3071 
3072 static void softnet_seq_stop(struct seq_file *seq, void *v)
3073 {
3074 }
3075 
3076 static int softnet_seq_show(struct seq_file *seq, void *v)
3077 {
3078 	struct netif_rx_stats *s = v;
3079 
3080 	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
3081 		   s->total, s->dropped, s->time_squeeze, 0,
3082 		   0, 0, 0, 0, /* was fastroute */
3083 		   s->cpu_collision );
3084 	return 0;
3085 }
3086 
3087 static const struct seq_operations dev_seq_ops = {
3088 	.start = dev_seq_start,
3089 	.next  = dev_seq_next,
3090 	.stop  = dev_seq_stop,
3091 	.show  = dev_seq_show,
3092 };
3093 
3094 static int dev_seq_open(struct inode *inode, struct file *file)
3095 {
3096 	return seq_open_net(inode, file, &dev_seq_ops,
3097 			    sizeof(struct seq_net_private));
3098 }
3099 
3100 static const struct file_operations dev_seq_fops = {
3101 	.owner	 = THIS_MODULE,
3102 	.open    = dev_seq_open,
3103 	.read    = seq_read,
3104 	.llseek  = seq_lseek,
3105 	.release = seq_release_net,
3106 };
3107 
3108 static const struct seq_operations softnet_seq_ops = {
3109 	.start = softnet_seq_start,
3110 	.next  = softnet_seq_next,
3111 	.stop  = softnet_seq_stop,
3112 	.show  = softnet_seq_show,
3113 };
3114 
3115 static int softnet_seq_open(struct inode *inode, struct file *file)
3116 {
3117 	return seq_open(file, &softnet_seq_ops);
3118 }
3119 
3120 static const struct file_operations softnet_seq_fops = {
3121 	.owner	 = THIS_MODULE,
3122 	.open    = softnet_seq_open,
3123 	.read    = seq_read,
3124 	.llseek  = seq_lseek,
3125 	.release = seq_release,
3126 };
3127 
3128 static void *ptype_get_idx(loff_t pos)
3129 {
3130 	struct packet_type *pt = NULL;
3131 	loff_t i = 0;
3132 	int t;
3133 
3134 	list_for_each_entry_rcu(pt, &ptype_all, list) {
3135 		if (i == pos)
3136 			return pt;
3137 		++i;
3138 	}
3139 
3140 	for (t = 0; t < PTYPE_HASH_SIZE; t++) {
3141 		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
3142 			if (i == pos)
3143 				return pt;
3144 			++i;
3145 		}
3146 	}
3147 	return NULL;
3148 }
3149 
3150 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
3151 	__acquires(RCU)
3152 {
3153 	rcu_read_lock();
3154 	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
3155 }
3156 
3157 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3158 {
3159 	struct packet_type *pt;
3160 	struct list_head *nxt;
3161 	int hash;
3162 
3163 	++*pos;
3164 	if (v == SEQ_START_TOKEN)
3165 		return ptype_get_idx(0);
3166 
3167 	pt = v;
3168 	nxt = pt->list.next;
3169 	if (pt->type == htons(ETH_P_ALL)) {
3170 		if (nxt != &ptype_all)
3171 			goto found;
3172 		hash = 0;
3173 		nxt = ptype_base[0].next;
3174 	} else
3175 		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
3176 
3177 	while (nxt == &ptype_base[hash]) {
3178 		if (++hash >= PTYPE_HASH_SIZE)
3179 			return NULL;
3180 		nxt = ptype_base[hash].next;
3181 	}
3182 found:
3183 	return list_entry(nxt, struct packet_type, list);
3184 }
3185 
3186 static void ptype_seq_stop(struct seq_file *seq, void *v)
3187 	__releases(RCU)
3188 {
3189 	rcu_read_unlock();
3190 }
3191 
3192 static int ptype_seq_show(struct seq_file *seq, void *v)
3193 {
3194 	struct packet_type *pt = v;
3195 
3196 	if (v == SEQ_START_TOKEN)
3197 		seq_puts(seq, "Type Device      Function\n");
3198 	else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
3199 		if (pt->type == htons(ETH_P_ALL))
3200 			seq_puts(seq, "ALL ");
3201 		else
3202 			seq_printf(seq, "%04x", ntohs(pt->type));
3203 
3204 		seq_printf(seq, " %-8s %pF\n",
3205 			   pt->dev ? pt->dev->name : "", pt->func);
3206 	}
3207 
3208 	return 0;
3209 }
3210 
3211 static const struct seq_operations ptype_seq_ops = {
3212 	.start = ptype_seq_start,
3213 	.next  = ptype_seq_next,
3214 	.stop  = ptype_seq_stop,
3215 	.show  = ptype_seq_show,
3216 };
3217 
3218 static int ptype_seq_open(struct inode *inode, struct file *file)
3219 {
3220 	return seq_open_net(inode, file, &ptype_seq_ops,
3221 			sizeof(struct seq_net_private));
3222 }
3223 
3224 static const struct file_operations ptype_seq_fops = {
3225 	.owner	 = THIS_MODULE,
3226 	.open    = ptype_seq_open,
3227 	.read    = seq_read,
3228 	.llseek  = seq_lseek,
3229 	.release = seq_release_net,
3230 };
3231 
3232 
3233 static int __net_init dev_proc_net_init(struct net *net)
3234 {
3235 	int rc = -ENOMEM;
3236 
3237 	if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
3238 		goto out;
3239 	if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
3240 		goto out_dev;
3241 	if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
3242 		goto out_softnet;
3243 
3244 	if (wext_proc_init(net))
3245 		goto out_ptype;
3246 	rc = 0;
3247 out:
3248 	return rc;
3249 out_ptype:
3250 	proc_net_remove(net, "ptype");
3251 out_softnet:
3252 	proc_net_remove(net, "softnet_stat");
3253 out_dev:
3254 	proc_net_remove(net, "dev");
3255 	goto out;
3256 }
3257 
3258 static void __net_exit dev_proc_net_exit(struct net *net)
3259 {
3260 	wext_proc_exit(net);
3261 
3262 	proc_net_remove(net, "ptype");
3263 	proc_net_remove(net, "softnet_stat");
3264 	proc_net_remove(net, "dev");
3265 }
3266 
3267 static struct pernet_operations __net_initdata dev_proc_ops = {
3268 	.init = dev_proc_net_init,
3269 	.exit = dev_proc_net_exit,
3270 };
3271 
3272 static int __init dev_proc_init(void)
3273 {
3274 	return register_pernet_subsys(&dev_proc_ops);
3275 }
3276 #else
3277 #define dev_proc_init() 0
3278 #endif	/* CONFIG_PROC_FS */
3279 
3280 
3281 /**
3282  *	netdev_set_master	-	set up master/slave pair
3283  *	@slave: slave device
3284  *	@master: new master device
3285  *
3286  *	Changes the master device of the slave. Pass %NULL to break the
3287  *	bonding. The caller must hold the RTNL semaphore. On a failure
3288  *	a negative errno code is returned. On success the reference counts
3289  *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
3290  *	function returns zero.
3291  */
3292 int netdev_set_master(struct net_device *slave, struct net_device *master)
3293 {
3294 	struct net_device *old = slave->master;
3295 
3296 	ASSERT_RTNL();
3297 
3298 	if (master) {
3299 		if (old)
3300 			return -EBUSY;
3301 		dev_hold(master);
3302 	}
3303 
3304 	slave->master = master;
3305 
3306 	synchronize_net();
3307 
3308 	if (old)
3309 		dev_put(old);
3310 
3311 	if (master)
3312 		slave->flags |= IFF_SLAVE;
3313 	else
3314 		slave->flags &= ~IFF_SLAVE;
3315 
3316 	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
3317 	return 0;
3318 }
3319 
3320 static void dev_change_rx_flags(struct net_device *dev, int flags)
3321 {
3322 	const struct net_device_ops *ops = dev->netdev_ops;
3323 
3324 	if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
3325 		ops->ndo_change_rx_flags(dev, flags);
3326 }
3327 
3328 static int __dev_set_promiscuity(struct net_device *dev, int inc)
3329 {
3330 	unsigned short old_flags = dev->flags;
3331 	uid_t uid;
3332 	gid_t gid;
3333 
3334 	ASSERT_RTNL();
3335 
3336 	dev->flags |= IFF_PROMISC;
3337 	dev->promiscuity += inc;
3338 	if (dev->promiscuity == 0) {
3339 		/*
3340 		 * Avoid overflow.
3341 		 * If inc causes overflow, untouch promisc and return error.
3342 		 */
3343 		if (inc < 0)
3344 			dev->flags &= ~IFF_PROMISC;
3345 		else {
3346 			dev->promiscuity -= inc;
3347 			printk(KERN_WARNING "%s: promiscuity touches roof, "
3348 				"set promiscuity failed, promiscuity feature "
3349 				"of device might be broken.\n", dev->name);
3350 			return -EOVERFLOW;
3351 		}
3352 	}
3353 	if (dev->flags != old_flags) {
3354 		printk(KERN_INFO "device %s %s promiscuous mode\n",
3355 		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
3356 							       "left");
3357 		if (audit_enabled) {
3358 			current_uid_gid(&uid, &gid);
3359 			audit_log(current->audit_context, GFP_ATOMIC,
3360 				AUDIT_ANOM_PROMISCUOUS,
3361 				"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
3362 				dev->name, (dev->flags & IFF_PROMISC),
3363 				(old_flags & IFF_PROMISC),
3364 				audit_get_loginuid(current),
3365 				uid, gid,
3366 				audit_get_sessionid(current));
3367 		}
3368 
3369 		dev_change_rx_flags(dev, IFF_PROMISC);
3370 	}
3371 	return 0;
3372 }
3373 
3374 /**
3375  *	dev_set_promiscuity	- update promiscuity count on a device
3376  *	@dev: device
3377  *	@inc: modifier
3378  *
3379  *	Add or remove promiscuity from a device. While the count in the device
3380  *	remains above zero the interface remains promiscuous. Once it hits zero
3381  *	the device reverts back to normal filtering operation. A negative inc
3382  *	value is used to drop promiscuity on the device.
3383  *	Return 0 if successful or a negative errno code on error.
3384  */
3385 int dev_set_promiscuity(struct net_device *dev, int inc)
3386 {
3387 	unsigned short old_flags = dev->flags;
3388 	int err;
3389 
3390 	err = __dev_set_promiscuity(dev, inc);
3391 	if (err < 0)
3392 		return err;
3393 	if (dev->flags != old_flags)
3394 		dev_set_rx_mode(dev);
3395 	return err;
3396 }
3397 
3398 /**
3399  *	dev_set_allmulti	- update allmulti count on a device
3400  *	@dev: device
3401  *	@inc: modifier
3402  *
3403  *	Add or remove reception of all multicast frames to a device. While the
3404  *	count in the device remains above zero the interface remains listening
3405  *	to all interfaces. Once it hits zero the device reverts back to normal
3406  *	filtering operation. A negative @inc value is used to drop the counter
3407  *	when releasing a resource needing all multicasts.
3408  *	Return 0 if successful or a negative errno code on error.
3409  */
3410 
3411 int dev_set_allmulti(struct net_device *dev, int inc)
3412 {
3413 	unsigned short old_flags = dev->flags;
3414 
3415 	ASSERT_RTNL();
3416 
3417 	dev->flags |= IFF_ALLMULTI;
3418 	dev->allmulti += inc;
3419 	if (dev->allmulti == 0) {
3420 		/*
3421 		 * Avoid overflow.
3422 		 * If inc causes overflow, untouch allmulti and return error.
3423 		 */
3424 		if (inc < 0)
3425 			dev->flags &= ~IFF_ALLMULTI;
3426 		else {
3427 			dev->allmulti -= inc;
3428 			printk(KERN_WARNING "%s: allmulti touches roof, "
3429 				"set allmulti failed, allmulti feature of "
3430 				"device might be broken.\n", dev->name);
3431 			return -EOVERFLOW;
3432 		}
3433 	}
3434 	if (dev->flags ^ old_flags) {
3435 		dev_change_rx_flags(dev, IFF_ALLMULTI);
3436 		dev_set_rx_mode(dev);
3437 	}
3438 	return 0;
3439 }
3440 
3441 /*
3442  *	Upload unicast and multicast address lists to device and
3443  *	configure RX filtering. When the device doesn't support unicast
3444  *	filtering it is put in promiscuous mode while unicast addresses
3445  *	are present.
3446  */
3447 void __dev_set_rx_mode(struct net_device *dev)
3448 {
3449 	const struct net_device_ops *ops = dev->netdev_ops;
3450 
3451 	/* dev_open will call this function so the list will stay sane. */
3452 	if (!(dev->flags&IFF_UP))
3453 		return;
3454 
3455 	if (!netif_device_present(dev))
3456 		return;
3457 
3458 	if (ops->ndo_set_rx_mode)
3459 		ops->ndo_set_rx_mode(dev);
3460 	else {
3461 		/* Unicast addresses changes may only happen under the rtnl,
3462 		 * therefore calling __dev_set_promiscuity here is safe.
3463 		 */
3464 		if (dev->uc.count > 0 && !dev->uc_promisc) {
3465 			__dev_set_promiscuity(dev, 1);
3466 			dev->uc_promisc = 1;
3467 		} else if (dev->uc.count == 0 && dev->uc_promisc) {
3468 			__dev_set_promiscuity(dev, -1);
3469 			dev->uc_promisc = 0;
3470 		}
3471 
3472 		if (ops->ndo_set_multicast_list)
3473 			ops->ndo_set_multicast_list(dev);
3474 	}
3475 }
3476 
3477 void dev_set_rx_mode(struct net_device *dev)
3478 {
3479 	netif_addr_lock_bh(dev);
3480 	__dev_set_rx_mode(dev);
3481 	netif_addr_unlock_bh(dev);
3482 }
3483 
3484 /* hw addresses list handling functions */
3485 
3486 static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
3487 			 int addr_len, unsigned char addr_type)
3488 {
3489 	struct netdev_hw_addr *ha;
3490 	int alloc_size;
3491 
3492 	if (addr_len > MAX_ADDR_LEN)
3493 		return -EINVAL;
3494 
3495 	list_for_each_entry(ha, &list->list, list) {
3496 		if (!memcmp(ha->addr, addr, addr_len) &&
3497 		    ha->type == addr_type) {
3498 			ha->refcount++;
3499 			return 0;
3500 		}
3501 	}
3502 
3503 
3504 	alloc_size = sizeof(*ha);
3505 	if (alloc_size < L1_CACHE_BYTES)
3506 		alloc_size = L1_CACHE_BYTES;
3507 	ha = kmalloc(alloc_size, GFP_ATOMIC);
3508 	if (!ha)
3509 		return -ENOMEM;
3510 	memcpy(ha->addr, addr, addr_len);
3511 	ha->type = addr_type;
3512 	ha->refcount = 1;
3513 	ha->synced = false;
3514 	list_add_tail_rcu(&ha->list, &list->list);
3515 	list->count++;
3516 	return 0;
3517 }
3518 
3519 static void ha_rcu_free(struct rcu_head *head)
3520 {
3521 	struct netdev_hw_addr *ha;
3522 
3523 	ha = container_of(head, struct netdev_hw_addr, rcu_head);
3524 	kfree(ha);
3525 }
3526 
3527 static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
3528 			 int addr_len, unsigned char addr_type)
3529 {
3530 	struct netdev_hw_addr *ha;
3531 
3532 	list_for_each_entry(ha, &list->list, list) {
3533 		if (!memcmp(ha->addr, addr, addr_len) &&
3534 		    (ha->type == addr_type || !addr_type)) {
3535 			if (--ha->refcount)
3536 				return 0;
3537 			list_del_rcu(&ha->list);
3538 			call_rcu(&ha->rcu_head, ha_rcu_free);
3539 			list->count--;
3540 			return 0;
3541 		}
3542 	}
3543 	return -ENOENT;
3544 }
3545 
3546 static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
3547 				  struct netdev_hw_addr_list *from_list,
3548 				  int addr_len,
3549 				  unsigned char addr_type)
3550 {
3551 	int err;
3552 	struct netdev_hw_addr *ha, *ha2;
3553 	unsigned char type;
3554 
3555 	list_for_each_entry(ha, &from_list->list, list) {
3556 		type = addr_type ? addr_type : ha->type;
3557 		err = __hw_addr_add(to_list, ha->addr, addr_len, type);
3558 		if (err)
3559 			goto unroll;
3560 	}
3561 	return 0;
3562 
3563 unroll:
3564 	list_for_each_entry(ha2, &from_list->list, list) {
3565 		if (ha2 == ha)
3566 			break;
3567 		type = addr_type ? addr_type : ha2->type;
3568 		__hw_addr_del(to_list, ha2->addr, addr_len, type);
3569 	}
3570 	return err;
3571 }
3572 
3573 static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
3574 				   struct netdev_hw_addr_list *from_list,
3575 				   int addr_len,
3576 				   unsigned char addr_type)
3577 {
3578 	struct netdev_hw_addr *ha;
3579 	unsigned char type;
3580 
3581 	list_for_each_entry(ha, &from_list->list, list) {
3582 		type = addr_type ? addr_type : ha->type;
3583 		__hw_addr_del(to_list, ha->addr, addr_len, addr_type);
3584 	}
3585 }
3586 
3587 static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
3588 			  struct netdev_hw_addr_list *from_list,
3589 			  int addr_len)
3590 {
3591 	int err = 0;
3592 	struct netdev_hw_addr *ha, *tmp;
3593 
3594 	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
3595 		if (!ha->synced) {
3596 			err = __hw_addr_add(to_list, ha->addr,
3597 					    addr_len, ha->type);
3598 			if (err)
3599 				break;
3600 			ha->synced = true;
3601 			ha->refcount++;
3602 		} else if (ha->refcount == 1) {
3603 			__hw_addr_del(to_list, ha->addr, addr_len, ha->type);
3604 			__hw_addr_del(from_list, ha->addr, addr_len, ha->type);
3605 		}
3606 	}
3607 	return err;
3608 }
3609 
3610 static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
3611 			     struct netdev_hw_addr_list *from_list,
3612 			     int addr_len)
3613 {
3614 	struct netdev_hw_addr *ha, *tmp;
3615 
3616 	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
3617 		if (ha->synced) {
3618 			__hw_addr_del(to_list, ha->addr,
3619 				      addr_len, ha->type);
3620 			ha->synced = false;
3621 			__hw_addr_del(from_list, ha->addr,
3622 				      addr_len, ha->type);
3623 		}
3624 	}
3625 }
3626 
3627 static void __hw_addr_flush(struct netdev_hw_addr_list *list)
3628 {
3629 	struct netdev_hw_addr *ha, *tmp;
3630 
3631 	list_for_each_entry_safe(ha, tmp, &list->list, list) {
3632 		list_del_rcu(&ha->list);
3633 		call_rcu(&ha->rcu_head, ha_rcu_free);
3634 	}
3635 	list->count = 0;
3636 }
3637 
3638 static void __hw_addr_init(struct netdev_hw_addr_list *list)
3639 {
3640 	INIT_LIST_HEAD(&list->list);
3641 	list->count = 0;
3642 }
3643 
3644 /* Device addresses handling functions */
3645 
3646 static void dev_addr_flush(struct net_device *dev)
3647 {
3648 	/* rtnl_mutex must be held here */
3649 
3650 	__hw_addr_flush(&dev->dev_addrs);
3651 	dev->dev_addr = NULL;
3652 }
3653 
3654 static int dev_addr_init(struct net_device *dev)
3655 {
3656 	unsigned char addr[MAX_ADDR_LEN];
3657 	struct netdev_hw_addr *ha;
3658 	int err;
3659 
3660 	/* rtnl_mutex must be held here */
3661 
3662 	__hw_addr_init(&dev->dev_addrs);
3663 	memset(addr, 0, sizeof(addr));
3664 	err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
3665 			    NETDEV_HW_ADDR_T_LAN);
3666 	if (!err) {
3667 		/*
3668 		 * Get the first (previously created) address from the list
3669 		 * and set dev_addr pointer to this location.
3670 		 */
3671 		ha = list_first_entry(&dev->dev_addrs.list,
3672 				      struct netdev_hw_addr, list);
3673 		dev->dev_addr = ha->addr;
3674 	}
3675 	return err;
3676 }
3677 
3678 /**
3679  *	dev_addr_add	- Add a device address
3680  *	@dev: device
3681  *	@addr: address to add
3682  *	@addr_type: address type
3683  *
3684  *	Add a device address to the device or increase the reference count if
3685  *	it already exists.
3686  *
3687  *	The caller must hold the rtnl_mutex.
3688  */
3689 int dev_addr_add(struct net_device *dev, unsigned char *addr,
3690 		 unsigned char addr_type)
3691 {
3692 	int err;
3693 
3694 	ASSERT_RTNL();
3695 
3696 	err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
3697 	if (!err)
3698 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3699 	return err;
3700 }
3701 EXPORT_SYMBOL(dev_addr_add);
3702 
3703 /**
3704  *	dev_addr_del	- Release a device address.
3705  *	@dev: device
3706  *	@addr: address to delete
3707  *	@addr_type: address type
3708  *
3709  *	Release reference to a device address and remove it from the device
3710  *	if the reference count drops to zero.
3711  *
3712  *	The caller must hold the rtnl_mutex.
3713  */
3714 int dev_addr_del(struct net_device *dev, unsigned char *addr,
3715 		 unsigned char addr_type)
3716 {
3717 	int err;
3718 	struct netdev_hw_addr *ha;
3719 
3720 	ASSERT_RTNL();
3721 
3722 	/*
3723 	 * We can not remove the first address from the list because
3724 	 * dev->dev_addr points to that.
3725 	 */
3726 	ha = list_first_entry(&dev->dev_addrs.list,
3727 			      struct netdev_hw_addr, list);
3728 	if (ha->addr == dev->dev_addr && ha->refcount == 1)
3729 		return -ENOENT;
3730 
3731 	err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
3732 			    addr_type);
3733 	if (!err)
3734 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3735 	return err;
3736 }
3737 EXPORT_SYMBOL(dev_addr_del);
3738 
3739 /**
3740  *	dev_addr_add_multiple	- Add device addresses from another device
3741  *	@to_dev: device to which addresses will be added
3742  *	@from_dev: device from which addresses will be added
3743  *	@addr_type: address type - 0 means type will be used from from_dev
3744  *
3745  *	Add device addresses of the one device to another.
3746  **
3747  *	The caller must hold the rtnl_mutex.
3748  */
3749 int dev_addr_add_multiple(struct net_device *to_dev,
3750 			  struct net_device *from_dev,
3751 			  unsigned char addr_type)
3752 {
3753 	int err;
3754 
3755 	ASSERT_RTNL();
3756 
3757 	if (from_dev->addr_len != to_dev->addr_len)
3758 		return -EINVAL;
3759 	err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
3760 				     to_dev->addr_len, addr_type);
3761 	if (!err)
3762 		call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
3763 	return err;
3764 }
3765 EXPORT_SYMBOL(dev_addr_add_multiple);
3766 
3767 /**
3768  *	dev_addr_del_multiple	- Delete device addresses by another device
3769  *	@to_dev: device where the addresses will be deleted
3770  *	@from_dev: device by which addresses the addresses will be deleted
3771  *	@addr_type: address type - 0 means type will used from from_dev
3772  *
3773  *	Deletes addresses in to device by the list of addresses in from device.
3774  *
3775  *	The caller must hold the rtnl_mutex.
3776  */
3777 int dev_addr_del_multiple(struct net_device *to_dev,
3778 			  struct net_device *from_dev,
3779 			  unsigned char addr_type)
3780 {
3781 	ASSERT_RTNL();
3782 
3783 	if (from_dev->addr_len != to_dev->addr_len)
3784 		return -EINVAL;
3785 	__hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
3786 			       to_dev->addr_len, addr_type);
3787 	call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
3788 	return 0;
3789 }
3790 EXPORT_SYMBOL(dev_addr_del_multiple);
3791 
3792 /* multicast addresses handling functions */
3793 
3794 int __dev_addr_delete(struct dev_addr_list **list, int *count,
3795 		      void *addr, int alen, int glbl)
3796 {
3797 	struct dev_addr_list *da;
3798 
3799 	for (; (da = *list) != NULL; list = &da->next) {
3800 		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3801 		    alen == da->da_addrlen) {
3802 			if (glbl) {
3803 				int old_glbl = da->da_gusers;
3804 				da->da_gusers = 0;
3805 				if (old_glbl == 0)
3806 					break;
3807 			}
3808 			if (--da->da_users)
3809 				return 0;
3810 
3811 			*list = da->next;
3812 			kfree(da);
3813 			(*count)--;
3814 			return 0;
3815 		}
3816 	}
3817 	return -ENOENT;
3818 }
3819 
3820 int __dev_addr_add(struct dev_addr_list **list, int *count,
3821 		   void *addr, int alen, int glbl)
3822 {
3823 	struct dev_addr_list *da;
3824 
3825 	for (da = *list; da != NULL; da = da->next) {
3826 		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3827 		    da->da_addrlen == alen) {
3828 			if (glbl) {
3829 				int old_glbl = da->da_gusers;
3830 				da->da_gusers = 1;
3831 				if (old_glbl)
3832 					return 0;
3833 			}
3834 			da->da_users++;
3835 			return 0;
3836 		}
3837 	}
3838 
3839 	da = kzalloc(sizeof(*da), GFP_ATOMIC);
3840 	if (da == NULL)
3841 		return -ENOMEM;
3842 	memcpy(da->da_addr, addr, alen);
3843 	da->da_addrlen = alen;
3844 	da->da_users = 1;
3845 	da->da_gusers = glbl ? 1 : 0;
3846 	da->next = *list;
3847 	*list = da;
3848 	(*count)++;
3849 	return 0;
3850 }
3851 
3852 /**
3853  *	dev_unicast_delete	- Release secondary unicast address.
3854  *	@dev: device
3855  *	@addr: address to delete
3856  *
3857  *	Release reference to a secondary unicast address and remove it
3858  *	from the device if the reference count drops to zero.
3859  *
3860  * 	The caller must hold the rtnl_mutex.
3861  */
3862 int dev_unicast_delete(struct net_device *dev, void *addr)
3863 {
3864 	int err;
3865 
3866 	ASSERT_RTNL();
3867 
3868 	netif_addr_lock_bh(dev);
3869 	err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
3870 			    NETDEV_HW_ADDR_T_UNICAST);
3871 	if (!err)
3872 		__dev_set_rx_mode(dev);
3873 	netif_addr_unlock_bh(dev);
3874 	return err;
3875 }
3876 EXPORT_SYMBOL(dev_unicast_delete);
3877 
3878 /**
3879  *	dev_unicast_add		- add a secondary unicast address
3880  *	@dev: device
3881  *	@addr: address to add
3882  *
3883  *	Add a secondary unicast address to the device or increase
3884  *	the reference count if it already exists.
3885  *
3886  *	The caller must hold the rtnl_mutex.
3887  */
3888 int dev_unicast_add(struct net_device *dev, void *addr)
3889 {
3890 	int err;
3891 
3892 	ASSERT_RTNL();
3893 
3894 	netif_addr_lock_bh(dev);
3895 	err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
3896 			    NETDEV_HW_ADDR_T_UNICAST);
3897 	if (!err)
3898 		__dev_set_rx_mode(dev);
3899 	netif_addr_unlock_bh(dev);
3900 	return err;
3901 }
3902 EXPORT_SYMBOL(dev_unicast_add);
3903 
3904 int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
3905 		    struct dev_addr_list **from, int *from_count)
3906 {
3907 	struct dev_addr_list *da, *next;
3908 	int err = 0;
3909 
3910 	da = *from;
3911 	while (da != NULL) {
3912 		next = da->next;
3913 		if (!da->da_synced) {
3914 			err = __dev_addr_add(to, to_count,
3915 					     da->da_addr, da->da_addrlen, 0);
3916 			if (err < 0)
3917 				break;
3918 			da->da_synced = 1;
3919 			da->da_users++;
3920 		} else if (da->da_users == 1) {
3921 			__dev_addr_delete(to, to_count,
3922 					  da->da_addr, da->da_addrlen, 0);
3923 			__dev_addr_delete(from, from_count,
3924 					  da->da_addr, da->da_addrlen, 0);
3925 		}
3926 		da = next;
3927 	}
3928 	return err;
3929 }
3930 
3931 void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
3932 		       struct dev_addr_list **from, int *from_count)
3933 {
3934 	struct dev_addr_list *da, *next;
3935 
3936 	da = *from;
3937 	while (da != NULL) {
3938 		next = da->next;
3939 		if (da->da_synced) {
3940 			__dev_addr_delete(to, to_count,
3941 					  da->da_addr, da->da_addrlen, 0);
3942 			da->da_synced = 0;
3943 			__dev_addr_delete(from, from_count,
3944 					  da->da_addr, da->da_addrlen, 0);
3945 		}
3946 		da = next;
3947 	}
3948 }
3949 
3950 /**
3951  *	dev_unicast_sync - Synchronize device's unicast list to another device
3952  *	@to: destination device
3953  *	@from: source device
3954  *
3955  *	Add newly added addresses to the destination device and release
3956  *	addresses that have no users left. The source device must be
3957  *	locked by netif_tx_lock_bh.
3958  *
3959  *	This function is intended to be called from the dev->set_rx_mode
3960  *	function of layered software devices.
3961  */
3962 int dev_unicast_sync(struct net_device *to, struct net_device *from)
3963 {
3964 	int err = 0;
3965 
3966 	if (to->addr_len != from->addr_len)
3967 		return -EINVAL;
3968 
3969 	netif_addr_lock_bh(to);
3970 	err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
3971 	if (!err)
3972 		__dev_set_rx_mode(to);
3973 	netif_addr_unlock_bh(to);
3974 	return err;
3975 }
3976 EXPORT_SYMBOL(dev_unicast_sync);
3977 
3978 /**
3979  *	dev_unicast_unsync - Remove synchronized addresses from the destination device
3980  *	@to: destination device
3981  *	@from: source device
3982  *
3983  *	Remove all addresses that were added to the destination device by
3984  *	dev_unicast_sync(). This function is intended to be called from the
3985  *	dev->stop function of layered software devices.
3986  */
3987 void dev_unicast_unsync(struct net_device *to, struct net_device *from)
3988 {
3989 	if (to->addr_len != from->addr_len)
3990 		return;
3991 
3992 	netif_addr_lock_bh(from);
3993 	netif_addr_lock(to);
3994 	__hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
3995 	__dev_set_rx_mode(to);
3996 	netif_addr_unlock(to);
3997 	netif_addr_unlock_bh(from);
3998 }
3999 EXPORT_SYMBOL(dev_unicast_unsync);
4000 
4001 static void dev_unicast_flush(struct net_device *dev)
4002 {
4003 	netif_addr_lock_bh(dev);
4004 	__hw_addr_flush(&dev->uc);
4005 	netif_addr_unlock_bh(dev);
4006 }
4007 
4008 static void dev_unicast_init(struct net_device *dev)
4009 {
4010 	__hw_addr_init(&dev->uc);
4011 }
4012 
4013 
4014 static void __dev_addr_discard(struct dev_addr_list **list)
4015 {
4016 	struct dev_addr_list *tmp;
4017 
4018 	while (*list != NULL) {
4019 		tmp = *list;
4020 		*list = tmp->next;
4021 		if (tmp->da_users > tmp->da_gusers)
4022 			printk("__dev_addr_discard: address leakage! "
4023 			       "da_users=%d\n", tmp->da_users);
4024 		kfree(tmp);
4025 	}
4026 }
4027 
4028 static void dev_addr_discard(struct net_device *dev)
4029 {
4030 	netif_addr_lock_bh(dev);
4031 
4032 	__dev_addr_discard(&dev->mc_list);
4033 	dev->mc_count = 0;
4034 
4035 	netif_addr_unlock_bh(dev);
4036 }
4037 
4038 /**
4039  *	dev_get_flags - get flags reported to userspace
4040  *	@dev: device
4041  *
4042  *	Get the combination of flag bits exported through APIs to userspace.
4043  */
4044 unsigned dev_get_flags(const struct net_device *dev)
4045 {
4046 	unsigned flags;
4047 
4048 	flags = (dev->flags & ~(IFF_PROMISC |
4049 				IFF_ALLMULTI |
4050 				IFF_RUNNING |
4051 				IFF_LOWER_UP |
4052 				IFF_DORMANT)) |
4053 		(dev->gflags & (IFF_PROMISC |
4054 				IFF_ALLMULTI));
4055 
4056 	if (netif_running(dev)) {
4057 		if (netif_oper_up(dev))
4058 			flags |= IFF_RUNNING;
4059 		if (netif_carrier_ok(dev))
4060 			flags |= IFF_LOWER_UP;
4061 		if (netif_dormant(dev))
4062 			flags |= IFF_DORMANT;
4063 	}
4064 
4065 	return flags;
4066 }
4067 
4068 /**
4069  *	dev_change_flags - change device settings
4070  *	@dev: device
4071  *	@flags: device state flags
4072  *
4073  *	Change settings on device based state flags. The flags are
4074  *	in the userspace exported format.
4075  */
4076 int dev_change_flags(struct net_device *dev, unsigned flags)
4077 {
4078 	int ret, changes;
4079 	int old_flags = dev->flags;
4080 
4081 	ASSERT_RTNL();
4082 
4083 	/*
4084 	 *	Set the flags on our device.
4085 	 */
4086 
4087 	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
4088 			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
4089 			       IFF_AUTOMEDIA)) |
4090 		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
4091 				    IFF_ALLMULTI));
4092 
4093 	/*
4094 	 *	Load in the correct multicast list now the flags have changed.
4095 	 */
4096 
4097 	if ((old_flags ^ flags) & IFF_MULTICAST)
4098 		dev_change_rx_flags(dev, IFF_MULTICAST);
4099 
4100 	dev_set_rx_mode(dev);
4101 
4102 	/*
4103 	 *	Have we downed the interface. We handle IFF_UP ourselves
4104 	 *	according to user attempts to set it, rather than blindly
4105 	 *	setting it.
4106 	 */
4107 
4108 	ret = 0;
4109 	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
4110 		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
4111 
4112 		if (!ret)
4113 			dev_set_rx_mode(dev);
4114 	}
4115 
4116 	if (dev->flags & IFF_UP &&
4117 	    ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
4118 					  IFF_VOLATILE)))
4119 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
4120 
4121 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
4122 		int inc = (flags & IFF_PROMISC) ? +1 : -1;
4123 		dev->gflags ^= IFF_PROMISC;
4124 		dev_set_promiscuity(dev, inc);
4125 	}
4126 
4127 	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
4128 	   is important. Some (broken) drivers set IFF_PROMISC, when
4129 	   IFF_ALLMULTI is requested not asking us and not reporting.
4130 	 */
4131 	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
4132 		int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
4133 		dev->gflags ^= IFF_ALLMULTI;
4134 		dev_set_allmulti(dev, inc);
4135 	}
4136 
4137 	/* Exclude state transition flags, already notified */
4138 	changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
4139 	if (changes)
4140 		rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
4141 
4142 	return ret;
4143 }
4144 
4145 /**
4146  *	dev_set_mtu - Change maximum transfer unit
4147  *	@dev: device
4148  *	@new_mtu: new transfer unit
4149  *
4150  *	Change the maximum transfer size of the network device.
4151  */
4152 int dev_set_mtu(struct net_device *dev, int new_mtu)
4153 {
4154 	const struct net_device_ops *ops = dev->netdev_ops;
4155 	int err;
4156 
4157 	if (new_mtu == dev->mtu)
4158 		return 0;
4159 
4160 	/*	MTU must be positive.	 */
4161 	if (new_mtu < 0)
4162 		return -EINVAL;
4163 
4164 	if (!netif_device_present(dev))
4165 		return -ENODEV;
4166 
4167 	err = 0;
4168 	if (ops->ndo_change_mtu)
4169 		err = ops->ndo_change_mtu(dev, new_mtu);
4170 	else
4171 		dev->mtu = new_mtu;
4172 
4173 	if (!err && dev->flags & IFF_UP)
4174 		call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
4175 	return err;
4176 }
4177 
4178 /**
4179  *	dev_set_mac_address - Change Media Access Control Address
4180  *	@dev: device
4181  *	@sa: new address
4182  *
4183  *	Change the hardware (MAC) address of the device
4184  */
4185 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
4186 {
4187 	const struct net_device_ops *ops = dev->netdev_ops;
4188 	int err;
4189 
4190 	if (!ops->ndo_set_mac_address)
4191 		return -EOPNOTSUPP;
4192 	if (sa->sa_family != dev->type)
4193 		return -EINVAL;
4194 	if (!netif_device_present(dev))
4195 		return -ENODEV;
4196 	err = ops->ndo_set_mac_address(dev, sa);
4197 	if (!err)
4198 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4199 	return err;
4200 }
4201 
4202 /*
4203  *	Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
4204  */
4205 static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
4206 {
4207 	int err;
4208 	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
4209 
4210 	if (!dev)
4211 		return -ENODEV;
4212 
4213 	switch (cmd) {
4214 		case SIOCGIFFLAGS:	/* Get interface flags */
4215 			ifr->ifr_flags = (short) dev_get_flags(dev);
4216 			return 0;
4217 
4218 		case SIOCGIFMETRIC:	/* Get the metric on the interface
4219 					   (currently unused) */
4220 			ifr->ifr_metric = 0;
4221 			return 0;
4222 
4223 		case SIOCGIFMTU:	/* Get the MTU of a device */
4224 			ifr->ifr_mtu = dev->mtu;
4225 			return 0;
4226 
4227 		case SIOCGIFHWADDR:
4228 			if (!dev->addr_len)
4229 				memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
4230 			else
4231 				memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
4232 				       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4233 			ifr->ifr_hwaddr.sa_family = dev->type;
4234 			return 0;
4235 
4236 		case SIOCGIFSLAVE:
4237 			err = -EINVAL;
4238 			break;
4239 
4240 		case SIOCGIFMAP:
4241 			ifr->ifr_map.mem_start = dev->mem_start;
4242 			ifr->ifr_map.mem_end   = dev->mem_end;
4243 			ifr->ifr_map.base_addr = dev->base_addr;
4244 			ifr->ifr_map.irq       = dev->irq;
4245 			ifr->ifr_map.dma       = dev->dma;
4246 			ifr->ifr_map.port      = dev->if_port;
4247 			return 0;
4248 
4249 		case SIOCGIFINDEX:
4250 			ifr->ifr_ifindex = dev->ifindex;
4251 			return 0;
4252 
4253 		case SIOCGIFTXQLEN:
4254 			ifr->ifr_qlen = dev->tx_queue_len;
4255 			return 0;
4256 
4257 		default:
4258 			/* dev_ioctl() should ensure this case
4259 			 * is never reached
4260 			 */
4261 			WARN_ON(1);
4262 			err = -EINVAL;
4263 			break;
4264 
4265 	}
4266 	return err;
4267 }
4268 
4269 /*
4270  *	Perform the SIOCxIFxxx calls, inside rtnl_lock()
4271  */
4272 static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4273 {
4274 	int err;
4275 	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
4276 	const struct net_device_ops *ops;
4277 
4278 	if (!dev)
4279 		return -ENODEV;
4280 
4281 	ops = dev->netdev_ops;
4282 
4283 	switch (cmd) {
4284 		case SIOCSIFFLAGS:	/* Set interface flags */
4285 			return dev_change_flags(dev, ifr->ifr_flags);
4286 
4287 		case SIOCSIFMETRIC:	/* Set the metric on the interface
4288 					   (currently unused) */
4289 			return -EOPNOTSUPP;
4290 
4291 		case SIOCSIFMTU:	/* Set the MTU of a device */
4292 			return dev_set_mtu(dev, ifr->ifr_mtu);
4293 
4294 		case SIOCSIFHWADDR:
4295 			return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
4296 
4297 		case SIOCSIFHWBROADCAST:
4298 			if (ifr->ifr_hwaddr.sa_family != dev->type)
4299 				return -EINVAL;
4300 			memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
4301 			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4302 			call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4303 			return 0;
4304 
4305 		case SIOCSIFMAP:
4306 			if (ops->ndo_set_config) {
4307 				if (!netif_device_present(dev))
4308 					return -ENODEV;
4309 				return ops->ndo_set_config(dev, &ifr->ifr_map);
4310 			}
4311 			return -EOPNOTSUPP;
4312 
4313 		case SIOCADDMULTI:
4314 			if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
4315 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4316 				return -EINVAL;
4317 			if (!netif_device_present(dev))
4318 				return -ENODEV;
4319 			return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
4320 					  dev->addr_len, 1);
4321 
4322 		case SIOCDELMULTI:
4323 			if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
4324 			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4325 				return -EINVAL;
4326 			if (!netif_device_present(dev))
4327 				return -ENODEV;
4328 			return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
4329 					     dev->addr_len, 1);
4330 
4331 		case SIOCSIFTXQLEN:
4332 			if (ifr->ifr_qlen < 0)
4333 				return -EINVAL;
4334 			dev->tx_queue_len = ifr->ifr_qlen;
4335 			return 0;
4336 
4337 		case SIOCSIFNAME:
4338 			ifr->ifr_newname[IFNAMSIZ-1] = '\0';
4339 			return dev_change_name(dev, ifr->ifr_newname);
4340 
4341 		/*
4342 		 *	Unknown or private ioctl
4343 		 */
4344 
4345 		default:
4346 			if ((cmd >= SIOCDEVPRIVATE &&
4347 			    cmd <= SIOCDEVPRIVATE + 15) ||
4348 			    cmd == SIOCBONDENSLAVE ||
4349 			    cmd == SIOCBONDRELEASE ||
4350 			    cmd == SIOCBONDSETHWADDR ||
4351 			    cmd == SIOCBONDSLAVEINFOQUERY ||
4352 			    cmd == SIOCBONDINFOQUERY ||
4353 			    cmd == SIOCBONDCHANGEACTIVE ||
4354 			    cmd == SIOCGMIIPHY ||
4355 			    cmd == SIOCGMIIREG ||
4356 			    cmd == SIOCSMIIREG ||
4357 			    cmd == SIOCBRADDIF ||
4358 			    cmd == SIOCBRDELIF ||
4359 			    cmd == SIOCSHWTSTAMP ||
4360 			    cmd == SIOCWANDEV) {
4361 				err = -EOPNOTSUPP;
4362 				if (ops->ndo_do_ioctl) {
4363 					if (netif_device_present(dev))
4364 						err = ops->ndo_do_ioctl(dev, ifr, cmd);
4365 					else
4366 						err = -ENODEV;
4367 				}
4368 			} else
4369 				err = -EINVAL;
4370 
4371 	}
4372 	return err;
4373 }
4374 
4375 /*
4376  *	This function handles all "interface"-type I/O control requests. The actual
4377  *	'doing' part of this is dev_ifsioc above.
4378  */
4379 
4380 /**
4381  *	dev_ioctl	-	network device ioctl
4382  *	@net: the applicable net namespace
4383  *	@cmd: command to issue
4384  *	@arg: pointer to a struct ifreq in user space
4385  *
4386  *	Issue ioctl functions to devices. This is normally called by the
4387  *	user space syscall interfaces but can sometimes be useful for
4388  *	other purposes. The return value is the return from the syscall if
4389  *	positive or a negative errno code on error.
4390  */
4391 
4392 int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
4393 {
4394 	struct ifreq ifr;
4395 	int ret;
4396 	char *colon;
4397 
4398 	/* One special case: SIOCGIFCONF takes ifconf argument
4399 	   and requires shared lock, because it sleeps writing
4400 	   to user space.
4401 	 */
4402 
4403 	if (cmd == SIOCGIFCONF) {
4404 		rtnl_lock();
4405 		ret = dev_ifconf(net, (char __user *) arg);
4406 		rtnl_unlock();
4407 		return ret;
4408 	}
4409 	if (cmd == SIOCGIFNAME)
4410 		return dev_ifname(net, (struct ifreq __user *)arg);
4411 
4412 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
4413 		return -EFAULT;
4414 
4415 	ifr.ifr_name[IFNAMSIZ-1] = 0;
4416 
4417 	colon = strchr(ifr.ifr_name, ':');
4418 	if (colon)
4419 		*colon = 0;
4420 
4421 	/*
4422 	 *	See which interface the caller is talking about.
4423 	 */
4424 
4425 	switch (cmd) {
4426 		/*
4427 		 *	These ioctl calls:
4428 		 *	- can be done by all.
4429 		 *	- atomic and do not require locking.
4430 		 *	- return a value
4431 		 */
4432 		case SIOCGIFFLAGS:
4433 		case SIOCGIFMETRIC:
4434 		case SIOCGIFMTU:
4435 		case SIOCGIFHWADDR:
4436 		case SIOCGIFSLAVE:
4437 		case SIOCGIFMAP:
4438 		case SIOCGIFINDEX:
4439 		case SIOCGIFTXQLEN:
4440 			dev_load(net, ifr.ifr_name);
4441 			read_lock(&dev_base_lock);
4442 			ret = dev_ifsioc_locked(net, &ifr, cmd);
4443 			read_unlock(&dev_base_lock);
4444 			if (!ret) {
4445 				if (colon)
4446 					*colon = ':';
4447 				if (copy_to_user(arg, &ifr,
4448 						 sizeof(struct ifreq)))
4449 					ret = -EFAULT;
4450 			}
4451 			return ret;
4452 
4453 		case SIOCETHTOOL:
4454 			dev_load(net, ifr.ifr_name);
4455 			rtnl_lock();
4456 			ret = dev_ethtool(net, &ifr);
4457 			rtnl_unlock();
4458 			if (!ret) {
4459 				if (colon)
4460 					*colon = ':';
4461 				if (copy_to_user(arg, &ifr,
4462 						 sizeof(struct ifreq)))
4463 					ret = -EFAULT;
4464 			}
4465 			return ret;
4466 
4467 		/*
4468 		 *	These ioctl calls:
4469 		 *	- require superuser power.
4470 		 *	- require strict serialization.
4471 		 *	- return a value
4472 		 */
4473 		case SIOCGMIIPHY:
4474 		case SIOCGMIIREG:
4475 		case SIOCSIFNAME:
4476 			if (!capable(CAP_NET_ADMIN))
4477 				return -EPERM;
4478 			dev_load(net, ifr.ifr_name);
4479 			rtnl_lock();
4480 			ret = dev_ifsioc(net, &ifr, cmd);
4481 			rtnl_unlock();
4482 			if (!ret) {
4483 				if (colon)
4484 					*colon = ':';
4485 				if (copy_to_user(arg, &ifr,
4486 						 sizeof(struct ifreq)))
4487 					ret = -EFAULT;
4488 			}
4489 			return ret;
4490 
4491 		/*
4492 		 *	These ioctl calls:
4493 		 *	- require superuser power.
4494 		 *	- require strict serialization.
4495 		 *	- do not return a value
4496 		 */
4497 		case SIOCSIFFLAGS:
4498 		case SIOCSIFMETRIC:
4499 		case SIOCSIFMTU:
4500 		case SIOCSIFMAP:
4501 		case SIOCSIFHWADDR:
4502 		case SIOCSIFSLAVE:
4503 		case SIOCADDMULTI:
4504 		case SIOCDELMULTI:
4505 		case SIOCSIFHWBROADCAST:
4506 		case SIOCSIFTXQLEN:
4507 		case SIOCSMIIREG:
4508 		case SIOCBONDENSLAVE:
4509 		case SIOCBONDRELEASE:
4510 		case SIOCBONDSETHWADDR:
4511 		case SIOCBONDCHANGEACTIVE:
4512 		case SIOCBRADDIF:
4513 		case SIOCBRDELIF:
4514 		case SIOCSHWTSTAMP:
4515 			if (!capable(CAP_NET_ADMIN))
4516 				return -EPERM;
4517 			/* fall through */
4518 		case SIOCBONDSLAVEINFOQUERY:
4519 		case SIOCBONDINFOQUERY:
4520 			dev_load(net, ifr.ifr_name);
4521 			rtnl_lock();
4522 			ret = dev_ifsioc(net, &ifr, cmd);
4523 			rtnl_unlock();
4524 			return ret;
4525 
4526 		case SIOCGIFMEM:
4527 			/* Get the per device memory space. We can add this but
4528 			 * currently do not support it */
4529 		case SIOCSIFMEM:
4530 			/* Set the per device memory buffer space.
4531 			 * Not applicable in our case */
4532 		case SIOCSIFLINK:
4533 			return -EINVAL;
4534 
4535 		/*
4536 		 *	Unknown or private ioctl.
4537 		 */
4538 		default:
4539 			if (cmd == SIOCWANDEV ||
4540 			    (cmd >= SIOCDEVPRIVATE &&
4541 			     cmd <= SIOCDEVPRIVATE + 15)) {
4542 				dev_load(net, ifr.ifr_name);
4543 				rtnl_lock();
4544 				ret = dev_ifsioc(net, &ifr, cmd);
4545 				rtnl_unlock();
4546 				if (!ret && copy_to_user(arg, &ifr,
4547 							 sizeof(struct ifreq)))
4548 					ret = -EFAULT;
4549 				return ret;
4550 			}
4551 			/* Take care of Wireless Extensions */
4552 			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
4553 				return wext_handle_ioctl(net, &ifr, cmd, arg);
4554 			return -EINVAL;
4555 	}
4556 }
4557 
4558 
4559 /**
4560  *	dev_new_index	-	allocate an ifindex
4561  *	@net: the applicable net namespace
4562  *
4563  *	Returns a suitable unique value for a new device interface
4564  *	number.  The caller must hold the rtnl semaphore or the
4565  *	dev_base_lock to be sure it remains unique.
4566  */
4567 static int dev_new_index(struct net *net)
4568 {
4569 	static int ifindex;
4570 	for (;;) {
4571 		if (++ifindex <= 0)
4572 			ifindex = 1;
4573 		if (!__dev_get_by_index(net, ifindex))
4574 			return ifindex;
4575 	}
4576 }
4577 
4578 /* Delayed registration/unregisteration */
4579 static LIST_HEAD(net_todo_list);
4580 
4581 static void net_set_todo(struct net_device *dev)
4582 {
4583 	list_add_tail(&dev->todo_list, &net_todo_list);
4584 }
4585 
4586 static void rollback_registered(struct net_device *dev)
4587 {
4588 	BUG_ON(dev_boot_phase);
4589 	ASSERT_RTNL();
4590 
4591 	/* Some devices call without registering for initialization unwind. */
4592 	if (dev->reg_state == NETREG_UNINITIALIZED) {
4593 		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
4594 				  "was registered\n", dev->name, dev);
4595 
4596 		WARN_ON(1);
4597 		return;
4598 	}
4599 
4600 	BUG_ON(dev->reg_state != NETREG_REGISTERED);
4601 
4602 	/* If device is running, close it first. */
4603 	dev_close(dev);
4604 
4605 	/* And unlink it from device chain. */
4606 	unlist_netdevice(dev);
4607 
4608 	dev->reg_state = NETREG_UNREGISTERING;
4609 
4610 	synchronize_net();
4611 
4612 	/* Shutdown queueing discipline. */
4613 	dev_shutdown(dev);
4614 
4615 
4616 	/* Notify protocols, that we are about to destroy
4617 	   this device. They should clean all the things.
4618 	*/
4619 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4620 
4621 	/*
4622 	 *	Flush the unicast and multicast chains
4623 	 */
4624 	dev_unicast_flush(dev);
4625 	dev_addr_discard(dev);
4626 
4627 	if (dev->netdev_ops->ndo_uninit)
4628 		dev->netdev_ops->ndo_uninit(dev);
4629 
4630 	/* Notifier chain MUST detach us from master device. */
4631 	WARN_ON(dev->master);
4632 
4633 	/* Remove entries from kobject tree */
4634 	netdev_unregister_kobject(dev);
4635 
4636 	synchronize_net();
4637 
4638 	dev_put(dev);
4639 }
4640 
4641 static void __netdev_init_queue_locks_one(struct net_device *dev,
4642 					  struct netdev_queue *dev_queue,
4643 					  void *_unused)
4644 {
4645 	spin_lock_init(&dev_queue->_xmit_lock);
4646 	netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
4647 	dev_queue->xmit_lock_owner = -1;
4648 }
4649 
4650 static void netdev_init_queue_locks(struct net_device *dev)
4651 {
4652 	netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
4653 	__netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
4654 }
4655 
4656 unsigned long netdev_fix_features(unsigned long features, const char *name)
4657 {
4658 	/* Fix illegal SG+CSUM combinations. */
4659 	if ((features & NETIF_F_SG) &&
4660 	    !(features & NETIF_F_ALL_CSUM)) {
4661 		if (name)
4662 			printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
4663 			       "checksum feature.\n", name);
4664 		features &= ~NETIF_F_SG;
4665 	}
4666 
4667 	/* TSO requires that SG is present as well. */
4668 	if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
4669 		if (name)
4670 			printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
4671 			       "SG feature.\n", name);
4672 		features &= ~NETIF_F_TSO;
4673 	}
4674 
4675 	if (features & NETIF_F_UFO) {
4676 		if (!(features & NETIF_F_GEN_CSUM)) {
4677 			if (name)
4678 				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4679 				       "since no NETIF_F_HW_CSUM feature.\n",
4680 				       name);
4681 			features &= ~NETIF_F_UFO;
4682 		}
4683 
4684 		if (!(features & NETIF_F_SG)) {
4685 			if (name)
4686 				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4687 				       "since no NETIF_F_SG feature.\n", name);
4688 			features &= ~NETIF_F_UFO;
4689 		}
4690 	}
4691 
4692 	return features;
4693 }
4694 EXPORT_SYMBOL(netdev_fix_features);
4695 
4696 /**
4697  *	register_netdevice	- register a network device
4698  *	@dev: device to register
4699  *
4700  *	Take a completed network device structure and add it to the kernel
4701  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4702  *	chain. 0 is returned on success. A negative errno code is returned
4703  *	on a failure to set up the device, or if the name is a duplicate.
4704  *
4705  *	Callers must hold the rtnl semaphore. You may want
4706  *	register_netdev() instead of this.
4707  *
4708  *	BUGS:
4709  *	The locking appears insufficient to guarantee two parallel registers
4710  *	will not get the same name.
4711  */
4712 
4713 int register_netdevice(struct net_device *dev)
4714 {
4715 	struct hlist_head *head;
4716 	struct hlist_node *p;
4717 	int ret;
4718 	struct net *net = dev_net(dev);
4719 
4720 	BUG_ON(dev_boot_phase);
4721 	ASSERT_RTNL();
4722 
4723 	might_sleep();
4724 
4725 	/* When net_device's are persistent, this will be fatal. */
4726 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
4727 	BUG_ON(!net);
4728 
4729 	spin_lock_init(&dev->addr_list_lock);
4730 	netdev_set_addr_lockdep_class(dev);
4731 	netdev_init_queue_locks(dev);
4732 
4733 	dev->iflink = -1;
4734 
4735 	/* Init, if this function is available */
4736 	if (dev->netdev_ops->ndo_init) {
4737 		ret = dev->netdev_ops->ndo_init(dev);
4738 		if (ret) {
4739 			if (ret > 0)
4740 				ret = -EIO;
4741 			goto out;
4742 		}
4743 	}
4744 
4745 	if (!dev_valid_name(dev->name)) {
4746 		ret = -EINVAL;
4747 		goto err_uninit;
4748 	}
4749 
4750 	dev->ifindex = dev_new_index(net);
4751 	if (dev->iflink == -1)
4752 		dev->iflink = dev->ifindex;
4753 
4754 	/* Check for existence of name */
4755 	head = dev_name_hash(net, dev->name);
4756 	hlist_for_each(p, head) {
4757 		struct net_device *d
4758 			= hlist_entry(p, struct net_device, name_hlist);
4759 		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
4760 			ret = -EEXIST;
4761 			goto err_uninit;
4762 		}
4763 	}
4764 
4765 	/* Fix illegal checksum combinations */
4766 	if ((dev->features & NETIF_F_HW_CSUM) &&
4767 	    (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4768 		printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
4769 		       dev->name);
4770 		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
4771 	}
4772 
4773 	if ((dev->features & NETIF_F_NO_CSUM) &&
4774 	    (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4775 		printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
4776 		       dev->name);
4777 		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
4778 	}
4779 
4780 	dev->features = netdev_fix_features(dev->features, dev->name);
4781 
4782 	/* Enable software GSO if SG is supported. */
4783 	if (dev->features & NETIF_F_SG)
4784 		dev->features |= NETIF_F_GSO;
4785 
4786 	netdev_initialize_kobject(dev);
4787 	ret = netdev_register_kobject(dev);
4788 	if (ret)
4789 		goto err_uninit;
4790 	dev->reg_state = NETREG_REGISTERED;
4791 
4792 	/*
4793 	 *	Default initial state at registry is that the
4794 	 *	device is present.
4795 	 */
4796 
4797 	set_bit(__LINK_STATE_PRESENT, &dev->state);
4798 
4799 	dev_init_scheduler(dev);
4800 	dev_hold(dev);
4801 	list_netdevice(dev);
4802 
4803 	/* Notify protocols, that a new device appeared. */
4804 	ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
4805 	ret = notifier_to_errno(ret);
4806 	if (ret) {
4807 		rollback_registered(dev);
4808 		dev->reg_state = NETREG_UNREGISTERED;
4809 	}
4810 
4811 out:
4812 	return ret;
4813 
4814 err_uninit:
4815 	if (dev->netdev_ops->ndo_uninit)
4816 		dev->netdev_ops->ndo_uninit(dev);
4817 	goto out;
4818 }
4819 
4820 /**
4821  *	init_dummy_netdev	- init a dummy network device for NAPI
4822  *	@dev: device to init
4823  *
4824  *	This takes a network device structure and initialize the minimum
4825  *	amount of fields so it can be used to schedule NAPI polls without
4826  *	registering a full blown interface. This is to be used by drivers
4827  *	that need to tie several hardware interfaces to a single NAPI
4828  *	poll scheduler due to HW limitations.
4829  */
4830 int init_dummy_netdev(struct net_device *dev)
4831 {
4832 	/* Clear everything. Note we don't initialize spinlocks
4833 	 * are they aren't supposed to be taken by any of the
4834 	 * NAPI code and this dummy netdev is supposed to be
4835 	 * only ever used for NAPI polls
4836 	 */
4837 	memset(dev, 0, sizeof(struct net_device));
4838 
4839 	/* make sure we BUG if trying to hit standard
4840 	 * register/unregister code path
4841 	 */
4842 	dev->reg_state = NETREG_DUMMY;
4843 
4844 	/* initialize the ref count */
4845 	atomic_set(&dev->refcnt, 1);
4846 
4847 	/* NAPI wants this */
4848 	INIT_LIST_HEAD(&dev->napi_list);
4849 
4850 	/* a dummy interface is started by default */
4851 	set_bit(__LINK_STATE_PRESENT, &dev->state);
4852 	set_bit(__LINK_STATE_START, &dev->state);
4853 
4854 	return 0;
4855 }
4856 EXPORT_SYMBOL_GPL(init_dummy_netdev);
4857 
4858 
4859 /**
4860  *	register_netdev	- register a network device
4861  *	@dev: device to register
4862  *
4863  *	Take a completed network device structure and add it to the kernel
4864  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4865  *	chain. 0 is returned on success. A negative errno code is returned
4866  *	on a failure to set up the device, or if the name is a duplicate.
4867  *
4868  *	This is a wrapper around register_netdevice that takes the rtnl semaphore
4869  *	and expands the device name if you passed a format string to
4870  *	alloc_netdev.
4871  */
4872 int register_netdev(struct net_device *dev)
4873 {
4874 	int err;
4875 
4876 	rtnl_lock();
4877 
4878 	/*
4879 	 * If the name is a format string the caller wants us to do a
4880 	 * name allocation.
4881 	 */
4882 	if (strchr(dev->name, '%')) {
4883 		err = dev_alloc_name(dev, dev->name);
4884 		if (err < 0)
4885 			goto out;
4886 	}
4887 
4888 	err = register_netdevice(dev);
4889 out:
4890 	rtnl_unlock();
4891 	return err;
4892 }
4893 EXPORT_SYMBOL(register_netdev);
4894 
4895 /*
4896  * netdev_wait_allrefs - wait until all references are gone.
4897  *
4898  * This is called when unregistering network devices.
4899  *
4900  * Any protocol or device that holds a reference should register
4901  * for netdevice notification, and cleanup and put back the
4902  * reference if they receive an UNREGISTER event.
4903  * We can get stuck here if buggy protocols don't correctly
4904  * call dev_put.
4905  */
4906 static void netdev_wait_allrefs(struct net_device *dev)
4907 {
4908 	unsigned long rebroadcast_time, warning_time;
4909 
4910 	rebroadcast_time = warning_time = jiffies;
4911 	while (atomic_read(&dev->refcnt) != 0) {
4912 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
4913 			rtnl_lock();
4914 
4915 			/* Rebroadcast unregister notification */
4916 			call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4917 
4918 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
4919 				     &dev->state)) {
4920 				/* We must not have linkwatch events
4921 				 * pending on unregister. If this
4922 				 * happens, we simply run the queue
4923 				 * unscheduled, resulting in a noop
4924 				 * for this device.
4925 				 */
4926 				linkwatch_run_queue();
4927 			}
4928 
4929 			__rtnl_unlock();
4930 
4931 			rebroadcast_time = jiffies;
4932 		}
4933 
4934 		msleep(250);
4935 
4936 		if (time_after(jiffies, warning_time + 10 * HZ)) {
4937 			printk(KERN_EMERG "unregister_netdevice: "
4938 			       "waiting for %s to become free. Usage "
4939 			       "count = %d\n",
4940 			       dev->name, atomic_read(&dev->refcnt));
4941 			warning_time = jiffies;
4942 		}
4943 	}
4944 }
4945 
4946 /* The sequence is:
4947  *
4948  *	rtnl_lock();
4949  *	...
4950  *	register_netdevice(x1);
4951  *	register_netdevice(x2);
4952  *	...
4953  *	unregister_netdevice(y1);
4954  *	unregister_netdevice(y2);
4955  *      ...
4956  *	rtnl_unlock();
4957  *	free_netdev(y1);
4958  *	free_netdev(y2);
4959  *
4960  * We are invoked by rtnl_unlock().
4961  * This allows us to deal with problems:
4962  * 1) We can delete sysfs objects which invoke hotplug
4963  *    without deadlocking with linkwatch via keventd.
4964  * 2) Since we run with the RTNL semaphore not held, we can sleep
4965  *    safely in order to wait for the netdev refcnt to drop to zero.
4966  *
4967  * We must not return until all unregister events added during
4968  * the interval the lock was held have been completed.
4969  */
4970 void netdev_run_todo(void)
4971 {
4972 	struct list_head list;
4973 
4974 	/* Snapshot list, allow later requests */
4975 	list_replace_init(&net_todo_list, &list);
4976 
4977 	__rtnl_unlock();
4978 
4979 	while (!list_empty(&list)) {
4980 		struct net_device *dev
4981 			= list_entry(list.next, struct net_device, todo_list);
4982 		list_del(&dev->todo_list);
4983 
4984 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
4985 			printk(KERN_ERR "network todo '%s' but state %d\n",
4986 			       dev->name, dev->reg_state);
4987 			dump_stack();
4988 			continue;
4989 		}
4990 
4991 		dev->reg_state = NETREG_UNREGISTERED;
4992 
4993 		on_each_cpu(flush_backlog, dev, 1);
4994 
4995 		netdev_wait_allrefs(dev);
4996 
4997 		/* paranoia */
4998 		BUG_ON(atomic_read(&dev->refcnt));
4999 		WARN_ON(dev->ip_ptr);
5000 		WARN_ON(dev->ip6_ptr);
5001 		WARN_ON(dev->dn_ptr);
5002 
5003 		if (dev->destructor)
5004 			dev->destructor(dev);
5005 
5006 		/* Free network device */
5007 		kobject_put(&dev->dev.kobj);
5008 	}
5009 }
5010 
5011 /**
5012  *	dev_get_stats	- get network device statistics
5013  *	@dev: device to get statistics from
5014  *
5015  *	Get network statistics from device. The device driver may provide
5016  *	its own method by setting dev->netdev_ops->get_stats; otherwise
5017  *	the internal statistics structure is used.
5018  */
5019 const struct net_device_stats *dev_get_stats(struct net_device *dev)
5020 {
5021 	const struct net_device_ops *ops = dev->netdev_ops;
5022 
5023 	if (ops->ndo_get_stats)
5024 		return ops->ndo_get_stats(dev);
5025 	else {
5026 		unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
5027 		struct net_device_stats *stats = &dev->stats;
5028 		unsigned int i;
5029 		struct netdev_queue *txq;
5030 
5031 		for (i = 0; i < dev->num_tx_queues; i++) {
5032 			txq = netdev_get_tx_queue(dev, i);
5033 			tx_bytes   += txq->tx_bytes;
5034 			tx_packets += txq->tx_packets;
5035 			tx_dropped += txq->tx_dropped;
5036 		}
5037 		if (tx_bytes || tx_packets || tx_dropped) {
5038 			stats->tx_bytes   = tx_bytes;
5039 			stats->tx_packets = tx_packets;
5040 			stats->tx_dropped = tx_dropped;
5041 		}
5042 		return stats;
5043 	}
5044 }
5045 EXPORT_SYMBOL(dev_get_stats);
5046 
5047 static void netdev_init_one_queue(struct net_device *dev,
5048 				  struct netdev_queue *queue,
5049 				  void *_unused)
5050 {
5051 	queue->dev = dev;
5052 }
5053 
5054 static void netdev_init_queues(struct net_device *dev)
5055 {
5056 	netdev_init_one_queue(dev, &dev->rx_queue, NULL);
5057 	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
5058 	spin_lock_init(&dev->tx_global_lock);
5059 }
5060 
5061 /**
5062  *	alloc_netdev_mq - allocate network device
5063  *	@sizeof_priv:	size of private data to allocate space for
5064  *	@name:		device name format string
5065  *	@setup:		callback to initialize device
5066  *	@queue_count:	the number of subqueues to allocate
5067  *
5068  *	Allocates a struct net_device with private data area for driver use
5069  *	and performs basic initialization.  Also allocates subquue structs
5070  *	for each queue on the device at the end of the netdevice.
5071  */
5072 struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5073 		void (*setup)(struct net_device *), unsigned int queue_count)
5074 {
5075 	struct netdev_queue *tx;
5076 	struct net_device *dev;
5077 	size_t alloc_size;
5078 	struct net_device *p;
5079 
5080 	BUG_ON(strlen(name) >= sizeof(dev->name));
5081 
5082 	alloc_size = sizeof(struct net_device);
5083 	if (sizeof_priv) {
5084 		/* ensure 32-byte alignment of private area */
5085 		alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
5086 		alloc_size += sizeof_priv;
5087 	}
5088 	/* ensure 32-byte alignment of whole construct */
5089 	alloc_size += NETDEV_ALIGN - 1;
5090 
5091 	p = kzalloc(alloc_size, GFP_KERNEL);
5092 	if (!p) {
5093 		printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
5094 		return NULL;
5095 	}
5096 
5097 	tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
5098 	if (!tx) {
5099 		printk(KERN_ERR "alloc_netdev: Unable to allocate "
5100 		       "tx qdiscs.\n");
5101 		goto free_p;
5102 	}
5103 
5104 	dev = PTR_ALIGN(p, NETDEV_ALIGN);
5105 	dev->padded = (char *)dev - (char *)p;
5106 
5107 	if (dev_addr_init(dev))
5108 		goto free_tx;
5109 
5110 	dev_unicast_init(dev);
5111 
5112 	dev_net_set(dev, &init_net);
5113 
5114 	dev->_tx = tx;
5115 	dev->num_tx_queues = queue_count;
5116 	dev->real_num_tx_queues = queue_count;
5117 
5118 	dev->gso_max_size = GSO_MAX_SIZE;
5119 
5120 	netdev_init_queues(dev);
5121 
5122 	INIT_LIST_HEAD(&dev->napi_list);
5123 	dev->priv_flags = IFF_XMIT_DST_RELEASE;
5124 	setup(dev);
5125 	strcpy(dev->name, name);
5126 	return dev;
5127 
5128 free_tx:
5129 	kfree(tx);
5130 
5131 free_p:
5132 	kfree(p);
5133 	return NULL;
5134 }
5135 EXPORT_SYMBOL(alloc_netdev_mq);
5136 
5137 /**
5138  *	free_netdev - free network device
5139  *	@dev: device
5140  *
5141  *	This function does the last stage of destroying an allocated device
5142  * 	interface. The reference to the device object is released.
5143  *	If this is the last reference then it will be freed.
5144  */
5145 void free_netdev(struct net_device *dev)
5146 {
5147 	struct napi_struct *p, *n;
5148 
5149 	release_net(dev_net(dev));
5150 
5151 	kfree(dev->_tx);
5152 
5153 	/* Flush device addresses */
5154 	dev_addr_flush(dev);
5155 
5156 	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
5157 		netif_napi_del(p);
5158 
5159 	/*  Compatibility with error handling in drivers */
5160 	if (dev->reg_state == NETREG_UNINITIALIZED) {
5161 		kfree((char *)dev - dev->padded);
5162 		return;
5163 	}
5164 
5165 	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
5166 	dev->reg_state = NETREG_RELEASED;
5167 
5168 	/* will free via device release */
5169 	put_device(&dev->dev);
5170 }
5171 
5172 /**
5173  *	synchronize_net -  Synchronize with packet receive processing
5174  *
5175  *	Wait for packets currently being received to be done.
5176  *	Does not block later packets from starting.
5177  */
5178 void synchronize_net(void)
5179 {
5180 	might_sleep();
5181 	synchronize_rcu();
5182 }
5183 
5184 /**
5185  *	unregister_netdevice - remove device from the kernel
5186  *	@dev: device
5187  *
5188  *	This function shuts down a device interface and removes it
5189  *	from the kernel tables.
5190  *
5191  *	Callers must hold the rtnl semaphore.  You may want
5192  *	unregister_netdev() instead of this.
5193  */
5194 
5195 void unregister_netdevice(struct net_device *dev)
5196 {
5197 	ASSERT_RTNL();
5198 
5199 	rollback_registered(dev);
5200 	/* Finish processing unregister after unlock */
5201 	net_set_todo(dev);
5202 }
5203 
5204 /**
5205  *	unregister_netdev - remove device from the kernel
5206  *	@dev: device
5207  *
5208  *	This function shuts down a device interface and removes it
5209  *	from the kernel tables.
5210  *
5211  *	This is just a wrapper for unregister_netdevice that takes
5212  *	the rtnl semaphore.  In general you want to use this and not
5213  *	unregister_netdevice.
5214  */
5215 void unregister_netdev(struct net_device *dev)
5216 {
5217 	rtnl_lock();
5218 	unregister_netdevice(dev);
5219 	rtnl_unlock();
5220 }
5221 
5222 EXPORT_SYMBOL(unregister_netdev);
5223 
5224 /**
5225  *	dev_change_net_namespace - move device to different nethost namespace
5226  *	@dev: device
5227  *	@net: network namespace
5228  *	@pat: If not NULL name pattern to try if the current device name
5229  *	      is already taken in the destination network namespace.
5230  *
5231  *	This function shuts down a device interface and moves it
5232  *	to a new network namespace. On success 0 is returned, on
5233  *	a failure a netagive errno code is returned.
5234  *
5235  *	Callers must hold the rtnl semaphore.
5236  */
5237 
5238 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
5239 {
5240 	char buf[IFNAMSIZ];
5241 	const char *destname;
5242 	int err;
5243 
5244 	ASSERT_RTNL();
5245 
5246 	/* Don't allow namespace local devices to be moved. */
5247 	err = -EINVAL;
5248 	if (dev->features & NETIF_F_NETNS_LOCAL)
5249 		goto out;
5250 
5251 #ifdef CONFIG_SYSFS
5252 	/* Don't allow real devices to be moved when sysfs
5253 	 * is enabled.
5254 	 */
5255 	err = -EINVAL;
5256 	if (dev->dev.parent)
5257 		goto out;
5258 #endif
5259 
5260 	/* Ensure the device has been registrered */
5261 	err = -EINVAL;
5262 	if (dev->reg_state != NETREG_REGISTERED)
5263 		goto out;
5264 
5265 	/* Get out if there is nothing todo */
5266 	err = 0;
5267 	if (net_eq(dev_net(dev), net))
5268 		goto out;
5269 
5270 	/* Pick the destination device name, and ensure
5271 	 * we can use it in the destination network namespace.
5272 	 */
5273 	err = -EEXIST;
5274 	destname = dev->name;
5275 	if (__dev_get_by_name(net, destname)) {
5276 		/* We get here if we can't use the current device name */
5277 		if (!pat)
5278 			goto out;
5279 		if (!dev_valid_name(pat))
5280 			goto out;
5281 		if (strchr(pat, '%')) {
5282 			if (__dev_alloc_name(net, pat, buf) < 0)
5283 				goto out;
5284 			destname = buf;
5285 		} else
5286 			destname = pat;
5287 		if (__dev_get_by_name(net, destname))
5288 			goto out;
5289 	}
5290 
5291 	/*
5292 	 * And now a mini version of register_netdevice unregister_netdevice.
5293 	 */
5294 
5295 	/* If device is running close it first. */
5296 	dev_close(dev);
5297 
5298 	/* And unlink it from device chain */
5299 	err = -ENODEV;
5300 	unlist_netdevice(dev);
5301 
5302 	synchronize_net();
5303 
5304 	/* Shutdown queueing discipline. */
5305 	dev_shutdown(dev);
5306 
5307 	/* Notify protocols, that we are about to destroy
5308 	   this device. They should clean all the things.
5309 	*/
5310 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5311 
5312 	/*
5313 	 *	Flush the unicast and multicast chains
5314 	 */
5315 	dev_unicast_flush(dev);
5316 	dev_addr_discard(dev);
5317 
5318 	netdev_unregister_kobject(dev);
5319 
5320 	/* Actually switch the network namespace */
5321 	dev_net_set(dev, net);
5322 
5323 	/* Assign the new device name */
5324 	if (destname != dev->name)
5325 		strcpy(dev->name, destname);
5326 
5327 	/* If there is an ifindex conflict assign a new one */
5328 	if (__dev_get_by_index(net, dev->ifindex)) {
5329 		int iflink = (dev->iflink == dev->ifindex);
5330 		dev->ifindex = dev_new_index(net);
5331 		if (iflink)
5332 			dev->iflink = dev->ifindex;
5333 	}
5334 
5335 	/* Fixup kobjects */
5336 	err = netdev_register_kobject(dev);
5337 	WARN_ON(err);
5338 
5339 	/* Add the device back in the hashes */
5340 	list_netdevice(dev);
5341 
5342 	/* Notify protocols, that a new device appeared. */
5343 	call_netdevice_notifiers(NETDEV_REGISTER, dev);
5344 
5345 	synchronize_net();
5346 	err = 0;
5347 out:
5348 	return err;
5349 }
5350 
5351 static int dev_cpu_callback(struct notifier_block *nfb,
5352 			    unsigned long action,
5353 			    void *ocpu)
5354 {
5355 	struct sk_buff **list_skb;
5356 	struct Qdisc **list_net;
5357 	struct sk_buff *skb;
5358 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
5359 	struct softnet_data *sd, *oldsd;
5360 
5361 	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
5362 		return NOTIFY_OK;
5363 
5364 	local_irq_disable();
5365 	cpu = smp_processor_id();
5366 	sd = &per_cpu(softnet_data, cpu);
5367 	oldsd = &per_cpu(softnet_data, oldcpu);
5368 
5369 	/* Find end of our completion_queue. */
5370 	list_skb = &sd->completion_queue;
5371 	while (*list_skb)
5372 		list_skb = &(*list_skb)->next;
5373 	/* Append completion queue from offline CPU. */
5374 	*list_skb = oldsd->completion_queue;
5375 	oldsd->completion_queue = NULL;
5376 
5377 	/* Find end of our output_queue. */
5378 	list_net = &sd->output_queue;
5379 	while (*list_net)
5380 		list_net = &(*list_net)->next_sched;
5381 	/* Append output queue from offline CPU. */
5382 	*list_net = oldsd->output_queue;
5383 	oldsd->output_queue = NULL;
5384 
5385 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
5386 	local_irq_enable();
5387 
5388 	/* Process offline CPU's input_pkt_queue */
5389 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
5390 		netif_rx(skb);
5391 
5392 	return NOTIFY_OK;
5393 }
5394 
5395 
5396 /**
5397  *	netdev_increment_features - increment feature set by one
5398  *	@all: current feature set
5399  *	@one: new feature set
5400  *	@mask: mask feature set
5401  *
5402  *	Computes a new feature set after adding a device with feature set
5403  *	@one to the master device with current feature set @all.  Will not
5404  *	enable anything that is off in @mask. Returns the new feature set.
5405  */
5406 unsigned long netdev_increment_features(unsigned long all, unsigned long one,
5407 					unsigned long mask)
5408 {
5409 	/* If device needs checksumming, downgrade to it. */
5410         if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
5411 		all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
5412 	else if (mask & NETIF_F_ALL_CSUM) {
5413 		/* If one device supports v4/v6 checksumming, set for all. */
5414 		if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
5415 		    !(all & NETIF_F_GEN_CSUM)) {
5416 			all &= ~NETIF_F_ALL_CSUM;
5417 			all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
5418 		}
5419 
5420 		/* If one device supports hw checksumming, set for all. */
5421 		if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
5422 			all &= ~NETIF_F_ALL_CSUM;
5423 			all |= NETIF_F_HW_CSUM;
5424 		}
5425 	}
5426 
5427 	one |= NETIF_F_ALL_CSUM;
5428 
5429 	one |= all & NETIF_F_ONE_FOR_ALL;
5430 	all &= one | NETIF_F_LLTX | NETIF_F_GSO;
5431 	all |= one & mask & NETIF_F_ONE_FOR_ALL;
5432 
5433 	return all;
5434 }
5435 EXPORT_SYMBOL(netdev_increment_features);
5436 
5437 static struct hlist_head *netdev_create_hash(void)
5438 {
5439 	int i;
5440 	struct hlist_head *hash;
5441 
5442 	hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
5443 	if (hash != NULL)
5444 		for (i = 0; i < NETDEV_HASHENTRIES; i++)
5445 			INIT_HLIST_HEAD(&hash[i]);
5446 
5447 	return hash;
5448 }
5449 
5450 /* Initialize per network namespace state */
5451 static int __net_init netdev_init(struct net *net)
5452 {
5453 	INIT_LIST_HEAD(&net->dev_base_head);
5454 
5455 	net->dev_name_head = netdev_create_hash();
5456 	if (net->dev_name_head == NULL)
5457 		goto err_name;
5458 
5459 	net->dev_index_head = netdev_create_hash();
5460 	if (net->dev_index_head == NULL)
5461 		goto err_idx;
5462 
5463 	return 0;
5464 
5465 err_idx:
5466 	kfree(net->dev_name_head);
5467 err_name:
5468 	return -ENOMEM;
5469 }
5470 
5471 /**
5472  *	netdev_drivername - network driver for the device
5473  *	@dev: network device
5474  *	@buffer: buffer for resulting name
5475  *	@len: size of buffer
5476  *
5477  *	Determine network driver for device.
5478  */
5479 char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
5480 {
5481 	const struct device_driver *driver;
5482 	const struct device *parent;
5483 
5484 	if (len <= 0 || !buffer)
5485 		return buffer;
5486 	buffer[0] = 0;
5487 
5488 	parent = dev->dev.parent;
5489 
5490 	if (!parent)
5491 		return buffer;
5492 
5493 	driver = parent->driver;
5494 	if (driver && driver->name)
5495 		strlcpy(buffer, driver->name, len);
5496 	return buffer;
5497 }
5498 
5499 static void __net_exit netdev_exit(struct net *net)
5500 {
5501 	kfree(net->dev_name_head);
5502 	kfree(net->dev_index_head);
5503 }
5504 
5505 static struct pernet_operations __net_initdata netdev_net_ops = {
5506 	.init = netdev_init,
5507 	.exit = netdev_exit,
5508 };
5509 
5510 static void __net_exit default_device_exit(struct net *net)
5511 {
5512 	struct net_device *dev;
5513 	/*
5514 	 * Push all migratable of the network devices back to the
5515 	 * initial network namespace
5516 	 */
5517 	rtnl_lock();
5518 restart:
5519 	for_each_netdev(net, dev) {
5520 		int err;
5521 		char fb_name[IFNAMSIZ];
5522 
5523 		/* Ignore unmoveable devices (i.e. loopback) */
5524 		if (dev->features & NETIF_F_NETNS_LOCAL)
5525 			continue;
5526 
5527 		/* Delete virtual devices */
5528 		if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) {
5529 			dev->rtnl_link_ops->dellink(dev);
5530 			goto restart;
5531 		}
5532 
5533 		/* Push remaing network devices to init_net */
5534 		snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
5535 		err = dev_change_net_namespace(dev, &init_net, fb_name);
5536 		if (err) {
5537 			printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
5538 				__func__, dev->name, err);
5539 			BUG();
5540 		}
5541 		goto restart;
5542 	}
5543 	rtnl_unlock();
5544 }
5545 
5546 static struct pernet_operations __net_initdata default_device_ops = {
5547 	.exit = default_device_exit,
5548 };
5549 
5550 /*
5551  *	Initialize the DEV module. At boot time this walks the device list and
5552  *	unhooks any devices that fail to initialise (normally hardware not
5553  *	present) and leaves us with a valid list of present and active devices.
5554  *
5555  */
5556 
5557 /*
5558  *       This is called single threaded during boot, so no need
5559  *       to take the rtnl semaphore.
5560  */
5561 static int __init net_dev_init(void)
5562 {
5563 	int i, rc = -ENOMEM;
5564 
5565 	BUG_ON(!dev_boot_phase);
5566 
5567 	if (dev_proc_init())
5568 		goto out;
5569 
5570 	if (netdev_kobject_init())
5571 		goto out;
5572 
5573 	INIT_LIST_HEAD(&ptype_all);
5574 	for (i = 0; i < PTYPE_HASH_SIZE; i++)
5575 		INIT_LIST_HEAD(&ptype_base[i]);
5576 
5577 	if (register_pernet_subsys(&netdev_net_ops))
5578 		goto out;
5579 
5580 	/*
5581 	 *	Initialise the packet receive queues.
5582 	 */
5583 
5584 	for_each_possible_cpu(i) {
5585 		struct softnet_data *queue;
5586 
5587 		queue = &per_cpu(softnet_data, i);
5588 		skb_queue_head_init(&queue->input_pkt_queue);
5589 		queue->completion_queue = NULL;
5590 		INIT_LIST_HEAD(&queue->poll_list);
5591 
5592 		queue->backlog.poll = process_backlog;
5593 		queue->backlog.weight = weight_p;
5594 		queue->backlog.gro_list = NULL;
5595 		queue->backlog.gro_count = 0;
5596 	}
5597 
5598 	dev_boot_phase = 0;
5599 
5600 	/* The loopback device is special if any other network devices
5601 	 * is present in a network namespace the loopback device must
5602 	 * be present. Since we now dynamically allocate and free the
5603 	 * loopback device ensure this invariant is maintained by
5604 	 * keeping the loopback device as the first device on the
5605 	 * list of network devices.  Ensuring the loopback devices
5606 	 * is the first device that appears and the last network device
5607 	 * that disappears.
5608 	 */
5609 	if (register_pernet_device(&loopback_net_ops))
5610 		goto out;
5611 
5612 	if (register_pernet_device(&default_device_ops))
5613 		goto out;
5614 
5615 	open_softirq(NET_TX_SOFTIRQ, net_tx_action);
5616 	open_softirq(NET_RX_SOFTIRQ, net_rx_action);
5617 
5618 	hotcpu_notifier(dev_cpu_callback, 0);
5619 	dst_init();
5620 	dev_mcast_init();
5621 	rc = 0;
5622 out:
5623 	return rc;
5624 }
5625 
5626 subsys_initcall(net_dev_init);
5627 
5628 static int __init initialize_hashrnd(void)
5629 {
5630 	get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd));
5631 	return 0;
5632 }
5633 
5634 late_initcall_sync(initialize_hashrnd);
5635 
5636 EXPORT_SYMBOL(__dev_get_by_index);
5637 EXPORT_SYMBOL(__dev_get_by_name);
5638 EXPORT_SYMBOL(__dev_remove_pack);
5639 EXPORT_SYMBOL(dev_valid_name);
5640 EXPORT_SYMBOL(dev_add_pack);
5641 EXPORT_SYMBOL(dev_alloc_name);
5642 EXPORT_SYMBOL(dev_close);
5643 EXPORT_SYMBOL(dev_get_by_flags);
5644 EXPORT_SYMBOL(dev_get_by_index);
5645 EXPORT_SYMBOL(dev_get_by_name);
5646 EXPORT_SYMBOL(dev_open);
5647 EXPORT_SYMBOL(dev_queue_xmit);
5648 EXPORT_SYMBOL(dev_remove_pack);
5649 EXPORT_SYMBOL(dev_set_allmulti);
5650 EXPORT_SYMBOL(dev_set_promiscuity);
5651 EXPORT_SYMBOL(dev_change_flags);
5652 EXPORT_SYMBOL(dev_set_mtu);
5653 EXPORT_SYMBOL(dev_set_mac_address);
5654 EXPORT_SYMBOL(free_netdev);
5655 EXPORT_SYMBOL(netdev_boot_setup_check);
5656 EXPORT_SYMBOL(netdev_set_master);
5657 EXPORT_SYMBOL(netdev_state_change);
5658 EXPORT_SYMBOL(netif_receive_skb);
5659 EXPORT_SYMBOL(netif_rx);
5660 EXPORT_SYMBOL(register_gifconf);
5661 EXPORT_SYMBOL(register_netdevice);
5662 EXPORT_SYMBOL(register_netdevice_notifier);
5663 EXPORT_SYMBOL(skb_checksum_help);
5664 EXPORT_SYMBOL(synchronize_net);
5665 EXPORT_SYMBOL(unregister_netdevice);
5666 EXPORT_SYMBOL(unregister_netdevice_notifier);
5667 EXPORT_SYMBOL(net_enable_timestamp);
5668 EXPORT_SYMBOL(net_disable_timestamp);
5669 EXPORT_SYMBOL(dev_get_flags);
5670 
5671 EXPORT_SYMBOL(dev_load);
5672 
5673 EXPORT_PER_CPU_SYMBOL(softnet_data);
5674