xref: /linux/net/core/dev.c (revision cb299ba8b5ef2239429484072fea394cd7581bd7)
1 /*
2  * 	NET3	Protocol independent device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the non IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Florian la Roche <rzsfl@rz.uni-sb.de>
16  *		Alan Cox <gw4pts@gw4pts.ampr.org>
17  *		David Hinds <dahinds@users.sourceforge.net>
18  *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19  *		Adam Sulmicki <adam@cfar.umd.edu>
20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
21  *
22  *	Changes:
23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
24  *              			to 2 if register_netdev gets called
25  *              			before net_dev_init & also removed a
26  *              			few lines of code in the process.
27  *		Alan Cox	:	device private ioctl copies fields back.
28  *		Alan Cox	:	Transmit queue code does relevant
29  *					stunts to keep the queue safe.
30  *		Alan Cox	:	Fixed double lock.
31  *		Alan Cox	:	Fixed promisc NULL pointer trap
32  *		????????	:	Support the full private ioctl range
33  *		Alan Cox	:	Moved ioctl permission check into
34  *					drivers
35  *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
36  *		Alan Cox	:	100 backlog just doesn't cut it when
37  *					you start doing multicast video 8)
38  *		Alan Cox	:	Rewrote net_bh and list manager.
39  *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
40  *		Alan Cox	:	Took out transmit every packet pass
41  *					Saved a few bytes in the ioctl handler
42  *		Alan Cox	:	Network driver sets packet type before
43  *					calling netif_rx. Saves a function
44  *					call a packet.
45  *		Alan Cox	:	Hashed net_bh()
46  *		Richard Kooijman:	Timestamp fixes.
47  *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
48  *		Alan Cox	:	Device lock protection.
49  *		Alan Cox	: 	Fixed nasty side effect of device close
50  *					changes.
51  *		Rudi Cilibrasi	:	Pass the right thing to
52  *					set_mac_address()
53  *		Dave Miller	:	32bit quantity for the device lock to
54  *					make it work out on a Sparc.
55  *		Bjorn Ekwall	:	Added KERNELD hack.
56  *		Alan Cox	:	Cleaned up the backlog initialise.
57  *		Craig Metz	:	SIOCGIFCONF fix if space for under
58  *					1 device.
59  *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
60  *					is no device open function.
61  *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
62  *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
63  *		Cyrus Durgin	:	Cleaned for KMOD
64  *		Adam Sulmicki   :	Bug Fix : Network Device Unload
65  *					A network device unload needs to purge
66  *					the backlog queue.
67  *	Paul Rusty Russell	:	SIOCSIFNAME
68  *              Pekka Riikonen  :	Netdev boot-time settings code
69  *              Andrew Morton   :       Make unregister_netdevice wait
70  *              			indefinitely on dev->refcnt
71  * 		J Hadi Salim	:	- Backlog queue sampling
72  *				        - netif_rx() feedback
73  */
74 
75 #include <asm/uaccess.h>
76 #include <asm/system.h>
77 #include <linux/bitops.h>
78 #include <linux/capability.h>
79 #include <linux/cpu.h>
80 #include <linux/types.h>
81 #include <linux/kernel.h>
82 #include <linux/hash.h>
83 #include <linux/slab.h>
84 #include <linux/sched.h>
85 #include <linux/mutex.h>
86 #include <linux/string.h>
87 #include <linux/mm.h>
88 #include <linux/socket.h>
89 #include <linux/sockios.h>
90 #include <linux/errno.h>
91 #include <linux/interrupt.h>
92 #include <linux/if_ether.h>
93 #include <linux/netdevice.h>
94 #include <linux/etherdevice.h>
95 #include <linux/ethtool.h>
96 #include <linux/notifier.h>
97 #include <linux/skbuff.h>
98 #include <net/net_namespace.h>
99 #include <net/sock.h>
100 #include <linux/rtnetlink.h>
101 #include <linux/proc_fs.h>
102 #include <linux/seq_file.h>
103 #include <linux/stat.h>
104 #include <net/dst.h>
105 #include <net/pkt_sched.h>
106 #include <net/checksum.h>
107 #include <net/xfrm.h>
108 #include <linux/highmem.h>
109 #include <linux/init.h>
110 #include <linux/kmod.h>
111 #include <linux/module.h>
112 #include <linux/netpoll.h>
113 #include <linux/rcupdate.h>
114 #include <linux/delay.h>
115 #include <net/wext.h>
116 #include <net/iw_handler.h>
117 #include <asm/current.h>
118 #include <linux/audit.h>
119 #include <linux/dmaengine.h>
120 #include <linux/err.h>
121 #include <linux/ctype.h>
122 #include <linux/if_arp.h>
123 #include <linux/if_vlan.h>
124 #include <linux/ip.h>
125 #include <net/ip.h>
126 #include <linux/ipv6.h>
127 #include <linux/in.h>
128 #include <linux/jhash.h>
129 #include <linux/random.h>
130 #include <trace/events/napi.h>
131 #include <trace/events/net.h>
132 #include <trace/events/skb.h>
133 #include <linux/pci.h>
134 #include <linux/inetdevice.h>
135 
136 #include "net-sysfs.h"
137 
138 /* Instead of increasing this, you should create a hash table. */
139 #define MAX_GRO_SKBS 8
140 
141 /* This should be increased if a protocol with a bigger head is added. */
142 #define GRO_MAX_HEAD (MAX_HEADER + 128)
143 
144 /*
145  *	The list of packet types we will receive (as opposed to discard)
146  *	and the routines to invoke.
147  *
148  *	Why 16. Because with 16 the only overlap we get on a hash of the
149  *	low nibble of the protocol value is RARP/SNAP/X.25.
150  *
151  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
152  *             sure which should go first, but I bet it won't make much
153  *             difference if we are running VLANs.  The good news is that
154  *             this protocol won't be in the list unless compiled in, so
155  *             the average user (w/out VLANs) will not be adversely affected.
156  *             --BLG
157  *
158  *		0800	IP
159  *		8100    802.1Q VLAN
160  *		0001	802.3
161  *		0002	AX.25
162  *		0004	802.2
163  *		8035	RARP
164  *		0005	SNAP
165  *		0805	X.25
166  *		0806	ARP
167  *		8137	IPX
168  *		0009	Localtalk
169  *		86DD	IPv6
170  */
171 
172 #define PTYPE_HASH_SIZE	(16)
173 #define PTYPE_HASH_MASK	(PTYPE_HASH_SIZE - 1)
174 
175 static DEFINE_SPINLOCK(ptype_lock);
176 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
177 static struct list_head ptype_all __read_mostly;	/* Taps */
178 
179 /*
180  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
181  * semaphore.
182  *
183  * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
184  *
185  * Writers must hold the rtnl semaphore while they loop through the
186  * dev_base_head list, and hold dev_base_lock for writing when they do the
187  * actual updates.  This allows pure readers to access the list even
188  * while a writer is preparing to update it.
189  *
190  * To put it another way, dev_base_lock is held for writing only to
191  * protect against pure readers; the rtnl semaphore provides the
192  * protection against other writers.
193  *
194  * See, for example usages, register_netdevice() and
195  * unregister_netdevice(), which must be called with the rtnl
196  * semaphore held.
197  */
198 DEFINE_RWLOCK(dev_base_lock);
199 EXPORT_SYMBOL(dev_base_lock);
200 
201 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
202 {
203 	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
204 	return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
205 }
206 
207 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
208 {
209 	return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
210 }
211 
212 static inline void rps_lock(struct softnet_data *sd)
213 {
214 #ifdef CONFIG_RPS
215 	spin_lock(&sd->input_pkt_queue.lock);
216 #endif
217 }
218 
219 static inline void rps_unlock(struct softnet_data *sd)
220 {
221 #ifdef CONFIG_RPS
222 	spin_unlock(&sd->input_pkt_queue.lock);
223 #endif
224 }
225 
226 /* Device list insertion */
227 static int list_netdevice(struct net_device *dev)
228 {
229 	struct net *net = dev_net(dev);
230 
231 	ASSERT_RTNL();
232 
233 	write_lock_bh(&dev_base_lock);
234 	list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
235 	hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
236 	hlist_add_head_rcu(&dev->index_hlist,
237 			   dev_index_hash(net, dev->ifindex));
238 	write_unlock_bh(&dev_base_lock);
239 	return 0;
240 }
241 
242 /* Device list removal
243  * caller must respect a RCU grace period before freeing/reusing dev
244  */
245 static void unlist_netdevice(struct net_device *dev)
246 {
247 	ASSERT_RTNL();
248 
249 	/* Unlink dev from the device chain */
250 	write_lock_bh(&dev_base_lock);
251 	list_del_rcu(&dev->dev_list);
252 	hlist_del_rcu(&dev->name_hlist);
253 	hlist_del_rcu(&dev->index_hlist);
254 	write_unlock_bh(&dev_base_lock);
255 }
256 
257 /*
258  *	Our notifier list
259  */
260 
261 static RAW_NOTIFIER_HEAD(netdev_chain);
262 
263 /*
264  *	Device drivers call our routines to queue packets here. We empty the
265  *	queue in the local softnet handler.
266  */
267 
268 DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
269 EXPORT_PER_CPU_SYMBOL(softnet_data);
270 
271 #ifdef CONFIG_LOCKDEP
272 /*
273  * register_netdevice() inits txq->_xmit_lock and sets lockdep class
274  * according to dev->type
275  */
276 static const unsigned short netdev_lock_type[] =
277 	{ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
278 	 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
279 	 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
280 	 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
281 	 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
282 	 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
283 	 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
284 	 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
285 	 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
286 	 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
287 	 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
288 	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
289 	 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
290 	 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
291 	 ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154,
292 	 ARPHRD_VOID, ARPHRD_NONE};
293 
294 static const char *const netdev_lock_name[] =
295 	{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
296 	 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
297 	 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
298 	 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
299 	 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
300 	 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
301 	 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
302 	 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
303 	 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
304 	 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
305 	 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
306 	 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
307 	 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
308 	 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
309 	 "_xmit_PHONET_PIPE", "_xmit_IEEE802154",
310 	 "_xmit_VOID", "_xmit_NONE"};
311 
312 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
313 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
314 
315 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
316 {
317 	int i;
318 
319 	for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
320 		if (netdev_lock_type[i] == dev_type)
321 			return i;
322 	/* the last key is used by default */
323 	return ARRAY_SIZE(netdev_lock_type) - 1;
324 }
325 
326 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
327 						 unsigned short dev_type)
328 {
329 	int i;
330 
331 	i = netdev_lock_pos(dev_type);
332 	lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
333 				   netdev_lock_name[i]);
334 }
335 
336 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
337 {
338 	int i;
339 
340 	i = netdev_lock_pos(dev->type);
341 	lockdep_set_class_and_name(&dev->addr_list_lock,
342 				   &netdev_addr_lock_key[i],
343 				   netdev_lock_name[i]);
344 }
345 #else
346 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
347 						 unsigned short dev_type)
348 {
349 }
350 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
351 {
352 }
353 #endif
354 
355 /*******************************************************************************
356 
357 		Protocol management and registration routines
358 
359 *******************************************************************************/
360 
361 /*
362  *	Add a protocol ID to the list. Now that the input handler is
363  *	smarter we can dispense with all the messy stuff that used to be
364  *	here.
365  *
366  *	BEWARE!!! Protocol handlers, mangling input packets,
367  *	MUST BE last in hash buckets and checking protocol handlers
368  *	MUST start from promiscuous ptype_all chain in net_bh.
369  *	It is true now, do not change it.
370  *	Explanation follows: if protocol handler, mangling packet, will
371  *	be the first on list, it is not able to sense, that packet
372  *	is cloned and should be copied-on-write, so that it will
373  *	change it and subsequent readers will get broken packet.
374  *							--ANK (980803)
375  */
376 
377 static inline struct list_head *ptype_head(const struct packet_type *pt)
378 {
379 	if (pt->type == htons(ETH_P_ALL))
380 		return &ptype_all;
381 	else
382 		return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
383 }
384 
385 /**
386  *	dev_add_pack - add packet handler
387  *	@pt: packet type declaration
388  *
389  *	Add a protocol handler to the networking stack. The passed &packet_type
390  *	is linked into kernel lists and may not be freed until it has been
391  *	removed from the kernel lists.
392  *
393  *	This call does not sleep therefore it can not
394  *	guarantee all CPU's that are in middle of receiving packets
395  *	will see the new packet type (until the next received packet).
396  */
397 
398 void dev_add_pack(struct packet_type *pt)
399 {
400 	struct list_head *head = ptype_head(pt);
401 
402 	spin_lock(&ptype_lock);
403 	list_add_rcu(&pt->list, head);
404 	spin_unlock(&ptype_lock);
405 }
406 EXPORT_SYMBOL(dev_add_pack);
407 
408 /**
409  *	__dev_remove_pack	 - remove packet handler
410  *	@pt: packet type declaration
411  *
412  *	Remove a protocol handler that was previously added to the kernel
413  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
414  *	from the kernel lists and can be freed or reused once this function
415  *	returns.
416  *
417  *      The packet type might still be in use by receivers
418  *	and must not be freed until after all the CPU's have gone
419  *	through a quiescent state.
420  */
421 void __dev_remove_pack(struct packet_type *pt)
422 {
423 	struct list_head *head = ptype_head(pt);
424 	struct packet_type *pt1;
425 
426 	spin_lock(&ptype_lock);
427 
428 	list_for_each_entry(pt1, head, list) {
429 		if (pt == pt1) {
430 			list_del_rcu(&pt->list);
431 			goto out;
432 		}
433 	}
434 
435 	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
436 out:
437 	spin_unlock(&ptype_lock);
438 }
439 EXPORT_SYMBOL(__dev_remove_pack);
440 
441 /**
442  *	dev_remove_pack	 - remove packet handler
443  *	@pt: packet type declaration
444  *
445  *	Remove a protocol handler that was previously added to the kernel
446  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
447  *	from the kernel lists and can be freed or reused once this function
448  *	returns.
449  *
450  *	This call sleeps to guarantee that no CPU is looking at the packet
451  *	type after return.
452  */
453 void dev_remove_pack(struct packet_type *pt)
454 {
455 	__dev_remove_pack(pt);
456 
457 	synchronize_net();
458 }
459 EXPORT_SYMBOL(dev_remove_pack);
460 
461 /******************************************************************************
462 
463 		      Device Boot-time Settings Routines
464 
465 *******************************************************************************/
466 
467 /* Boot time configuration table */
468 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
469 
470 /**
471  *	netdev_boot_setup_add	- add new setup entry
472  *	@name: name of the device
473  *	@map: configured settings for the device
474  *
475  *	Adds new setup entry to the dev_boot_setup list.  The function
476  *	returns 0 on error and 1 on success.  This is a generic routine to
477  *	all netdevices.
478  */
479 static int netdev_boot_setup_add(char *name, struct ifmap *map)
480 {
481 	struct netdev_boot_setup *s;
482 	int i;
483 
484 	s = dev_boot_setup;
485 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
486 		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
487 			memset(s[i].name, 0, sizeof(s[i].name));
488 			strlcpy(s[i].name, name, IFNAMSIZ);
489 			memcpy(&s[i].map, map, sizeof(s[i].map));
490 			break;
491 		}
492 	}
493 
494 	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
495 }
496 
497 /**
498  *	netdev_boot_setup_check	- check boot time settings
499  *	@dev: the netdevice
500  *
501  * 	Check boot time settings for the device.
502  *	The found settings are set for the device to be used
503  *	later in the device probing.
504  *	Returns 0 if no settings found, 1 if they are.
505  */
506 int netdev_boot_setup_check(struct net_device *dev)
507 {
508 	struct netdev_boot_setup *s = dev_boot_setup;
509 	int i;
510 
511 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
512 		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
513 		    !strcmp(dev->name, s[i].name)) {
514 			dev->irq 	= s[i].map.irq;
515 			dev->base_addr 	= s[i].map.base_addr;
516 			dev->mem_start 	= s[i].map.mem_start;
517 			dev->mem_end 	= s[i].map.mem_end;
518 			return 1;
519 		}
520 	}
521 	return 0;
522 }
523 EXPORT_SYMBOL(netdev_boot_setup_check);
524 
525 
526 /**
527  *	netdev_boot_base	- get address from boot time settings
528  *	@prefix: prefix for network device
529  *	@unit: id for network device
530  *
531  * 	Check boot time settings for the base address of device.
532  *	The found settings are set for the device to be used
533  *	later in the device probing.
534  *	Returns 0 if no settings found.
535  */
536 unsigned long netdev_boot_base(const char *prefix, int unit)
537 {
538 	const struct netdev_boot_setup *s = dev_boot_setup;
539 	char name[IFNAMSIZ];
540 	int i;
541 
542 	sprintf(name, "%s%d", prefix, unit);
543 
544 	/*
545 	 * If device already registered then return base of 1
546 	 * to indicate not to probe for this interface
547 	 */
548 	if (__dev_get_by_name(&init_net, name))
549 		return 1;
550 
551 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
552 		if (!strcmp(name, s[i].name))
553 			return s[i].map.base_addr;
554 	return 0;
555 }
556 
557 /*
558  * Saves at boot time configured settings for any netdevice.
559  */
560 int __init netdev_boot_setup(char *str)
561 {
562 	int ints[5];
563 	struct ifmap map;
564 
565 	str = get_options(str, ARRAY_SIZE(ints), ints);
566 	if (!str || !*str)
567 		return 0;
568 
569 	/* Save settings */
570 	memset(&map, 0, sizeof(map));
571 	if (ints[0] > 0)
572 		map.irq = ints[1];
573 	if (ints[0] > 1)
574 		map.base_addr = ints[2];
575 	if (ints[0] > 2)
576 		map.mem_start = ints[3];
577 	if (ints[0] > 3)
578 		map.mem_end = ints[4];
579 
580 	/* Add new entry to the list */
581 	return netdev_boot_setup_add(str, &map);
582 }
583 
584 __setup("netdev=", netdev_boot_setup);
585 
586 /*******************************************************************************
587 
588 			    Device Interface Subroutines
589 
590 *******************************************************************************/
591 
592 /**
593  *	__dev_get_by_name	- find a device by its name
594  *	@net: the applicable net namespace
595  *	@name: name to find
596  *
597  *	Find an interface by name. Must be called under RTNL semaphore
598  *	or @dev_base_lock. If the name is found a pointer to the device
599  *	is returned. If the name is not found then %NULL is returned. The
600  *	reference counters are not incremented so the caller must be
601  *	careful with locks.
602  */
603 
604 struct net_device *__dev_get_by_name(struct net *net, const char *name)
605 {
606 	struct hlist_node *p;
607 	struct net_device *dev;
608 	struct hlist_head *head = dev_name_hash(net, name);
609 
610 	hlist_for_each_entry(dev, p, head, name_hlist)
611 		if (!strncmp(dev->name, name, IFNAMSIZ))
612 			return dev;
613 
614 	return NULL;
615 }
616 EXPORT_SYMBOL(__dev_get_by_name);
617 
618 /**
619  *	dev_get_by_name_rcu	- find a device by its name
620  *	@net: the applicable net namespace
621  *	@name: name to find
622  *
623  *	Find an interface by name.
624  *	If the name is found a pointer to the device is returned.
625  * 	If the name is not found then %NULL is returned.
626  *	The reference counters are not incremented so the caller must be
627  *	careful with locks. The caller must hold RCU lock.
628  */
629 
630 struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
631 {
632 	struct hlist_node *p;
633 	struct net_device *dev;
634 	struct hlist_head *head = dev_name_hash(net, name);
635 
636 	hlist_for_each_entry_rcu(dev, p, head, name_hlist)
637 		if (!strncmp(dev->name, name, IFNAMSIZ))
638 			return dev;
639 
640 	return NULL;
641 }
642 EXPORT_SYMBOL(dev_get_by_name_rcu);
643 
644 /**
645  *	dev_get_by_name		- find a device by its name
646  *	@net: the applicable net namespace
647  *	@name: name to find
648  *
649  *	Find an interface by name. This can be called from any
650  *	context and does its own locking. The returned handle has
651  *	the usage count incremented and the caller must use dev_put() to
652  *	release it when it is no longer needed. %NULL is returned if no
653  *	matching device is found.
654  */
655 
656 struct net_device *dev_get_by_name(struct net *net, const char *name)
657 {
658 	struct net_device *dev;
659 
660 	rcu_read_lock();
661 	dev = dev_get_by_name_rcu(net, name);
662 	if (dev)
663 		dev_hold(dev);
664 	rcu_read_unlock();
665 	return dev;
666 }
667 EXPORT_SYMBOL(dev_get_by_name);
668 
669 /**
670  *	__dev_get_by_index - find a device by its ifindex
671  *	@net: the applicable net namespace
672  *	@ifindex: index of device
673  *
674  *	Search for an interface by index. Returns %NULL if the device
675  *	is not found or a pointer to the device. The device has not
676  *	had its reference counter increased so the caller must be careful
677  *	about locking. The caller must hold either the RTNL semaphore
678  *	or @dev_base_lock.
679  */
680 
681 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
682 {
683 	struct hlist_node *p;
684 	struct net_device *dev;
685 	struct hlist_head *head = dev_index_hash(net, ifindex);
686 
687 	hlist_for_each_entry(dev, p, head, index_hlist)
688 		if (dev->ifindex == ifindex)
689 			return dev;
690 
691 	return NULL;
692 }
693 EXPORT_SYMBOL(__dev_get_by_index);
694 
695 /**
696  *	dev_get_by_index_rcu - find a device by its ifindex
697  *	@net: the applicable net namespace
698  *	@ifindex: index of device
699  *
700  *	Search for an interface by index. Returns %NULL if the device
701  *	is not found or a pointer to the device. The device has not
702  *	had its reference counter increased so the caller must be careful
703  *	about locking. The caller must hold RCU lock.
704  */
705 
706 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
707 {
708 	struct hlist_node *p;
709 	struct net_device *dev;
710 	struct hlist_head *head = dev_index_hash(net, ifindex);
711 
712 	hlist_for_each_entry_rcu(dev, p, head, index_hlist)
713 		if (dev->ifindex == ifindex)
714 			return dev;
715 
716 	return NULL;
717 }
718 EXPORT_SYMBOL(dev_get_by_index_rcu);
719 
720 
721 /**
722  *	dev_get_by_index - find a device by its ifindex
723  *	@net: the applicable net namespace
724  *	@ifindex: index of device
725  *
726  *	Search for an interface by index. Returns NULL if the device
727  *	is not found or a pointer to the device. The device returned has
728  *	had a reference added and the pointer is safe until the user calls
729  *	dev_put to indicate they have finished with it.
730  */
731 
732 struct net_device *dev_get_by_index(struct net *net, int ifindex)
733 {
734 	struct net_device *dev;
735 
736 	rcu_read_lock();
737 	dev = dev_get_by_index_rcu(net, ifindex);
738 	if (dev)
739 		dev_hold(dev);
740 	rcu_read_unlock();
741 	return dev;
742 }
743 EXPORT_SYMBOL(dev_get_by_index);
744 
745 /**
746  *	dev_getbyhwaddr - find a device by its hardware address
747  *	@net: the applicable net namespace
748  *	@type: media type of device
749  *	@ha: hardware address
750  *
751  *	Search for an interface by MAC address. Returns NULL if the device
752  *	is not found or a pointer to the device. The caller must hold the
753  *	rtnl semaphore. The returned device has not had its ref count increased
754  *	and the caller must therefore be careful about locking
755  *
756  *	BUGS:
757  *	If the API was consistent this would be __dev_get_by_hwaddr
758  */
759 
760 struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
761 {
762 	struct net_device *dev;
763 
764 	ASSERT_RTNL();
765 
766 	for_each_netdev(net, dev)
767 		if (dev->type == type &&
768 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
769 			return dev;
770 
771 	return NULL;
772 }
773 EXPORT_SYMBOL(dev_getbyhwaddr);
774 
775 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
776 {
777 	struct net_device *dev;
778 
779 	ASSERT_RTNL();
780 	for_each_netdev(net, dev)
781 		if (dev->type == type)
782 			return dev;
783 
784 	return NULL;
785 }
786 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
787 
788 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
789 {
790 	struct net_device *dev, *ret = NULL;
791 
792 	rcu_read_lock();
793 	for_each_netdev_rcu(net, dev)
794 		if (dev->type == type) {
795 			dev_hold(dev);
796 			ret = dev;
797 			break;
798 		}
799 	rcu_read_unlock();
800 	return ret;
801 }
802 EXPORT_SYMBOL(dev_getfirstbyhwtype);
803 
804 /**
805  *	dev_get_by_flags_rcu - find any device with given flags
806  *	@net: the applicable net namespace
807  *	@if_flags: IFF_* values
808  *	@mask: bitmask of bits in if_flags to check
809  *
810  *	Search for any interface with the given flags. Returns NULL if a device
811  *	is not found or a pointer to the device. Must be called inside
812  *	rcu_read_lock(), and result refcount is unchanged.
813  */
814 
815 struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,
816 				    unsigned short mask)
817 {
818 	struct net_device *dev, *ret;
819 
820 	ret = NULL;
821 	for_each_netdev_rcu(net, dev) {
822 		if (((dev->flags ^ if_flags) & mask) == 0) {
823 			ret = dev;
824 			break;
825 		}
826 	}
827 	return ret;
828 }
829 EXPORT_SYMBOL(dev_get_by_flags_rcu);
830 
831 /**
832  *	dev_valid_name - check if name is okay for network device
833  *	@name: name string
834  *
835  *	Network device names need to be valid file names to
836  *	to allow sysfs to work.  We also disallow any kind of
837  *	whitespace.
838  */
839 int dev_valid_name(const char *name)
840 {
841 	if (*name == '\0')
842 		return 0;
843 	if (strlen(name) >= IFNAMSIZ)
844 		return 0;
845 	if (!strcmp(name, ".") || !strcmp(name, ".."))
846 		return 0;
847 
848 	while (*name) {
849 		if (*name == '/' || isspace(*name))
850 			return 0;
851 		name++;
852 	}
853 	return 1;
854 }
855 EXPORT_SYMBOL(dev_valid_name);
856 
857 /**
858  *	__dev_alloc_name - allocate a name for a device
859  *	@net: network namespace to allocate the device name in
860  *	@name: name format string
861  *	@buf:  scratch buffer and result name string
862  *
863  *	Passed a format string - eg "lt%d" it will try and find a suitable
864  *	id. It scans list of devices to build up a free map, then chooses
865  *	the first empty slot. The caller must hold the dev_base or rtnl lock
866  *	while allocating the name and adding the device in order to avoid
867  *	duplicates.
868  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
869  *	Returns the number of the unit assigned or a negative errno code.
870  */
871 
872 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
873 {
874 	int i = 0;
875 	const char *p;
876 	const int max_netdevices = 8*PAGE_SIZE;
877 	unsigned long *inuse;
878 	struct net_device *d;
879 
880 	p = strnchr(name, IFNAMSIZ-1, '%');
881 	if (p) {
882 		/*
883 		 * Verify the string as this thing may have come from
884 		 * the user.  There must be either one "%d" and no other "%"
885 		 * characters.
886 		 */
887 		if (p[1] != 'd' || strchr(p + 2, '%'))
888 			return -EINVAL;
889 
890 		/* Use one page as a bit array of possible slots */
891 		inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
892 		if (!inuse)
893 			return -ENOMEM;
894 
895 		for_each_netdev(net, d) {
896 			if (!sscanf(d->name, name, &i))
897 				continue;
898 			if (i < 0 || i >= max_netdevices)
899 				continue;
900 
901 			/*  avoid cases where sscanf is not exact inverse of printf */
902 			snprintf(buf, IFNAMSIZ, name, i);
903 			if (!strncmp(buf, d->name, IFNAMSIZ))
904 				set_bit(i, inuse);
905 		}
906 
907 		i = find_first_zero_bit(inuse, max_netdevices);
908 		free_page((unsigned long) inuse);
909 	}
910 
911 	if (buf != name)
912 		snprintf(buf, IFNAMSIZ, name, i);
913 	if (!__dev_get_by_name(net, buf))
914 		return i;
915 
916 	/* It is possible to run out of possible slots
917 	 * when the name is long and there isn't enough space left
918 	 * for the digits, or if all bits are used.
919 	 */
920 	return -ENFILE;
921 }
922 
923 /**
924  *	dev_alloc_name - allocate a name for a device
925  *	@dev: device
926  *	@name: name format string
927  *
928  *	Passed a format string - eg "lt%d" it will try and find a suitable
929  *	id. It scans list of devices to build up a free map, then chooses
930  *	the first empty slot. The caller must hold the dev_base or rtnl lock
931  *	while allocating the name and adding the device in order to avoid
932  *	duplicates.
933  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
934  *	Returns the number of the unit assigned or a negative errno code.
935  */
936 
937 int dev_alloc_name(struct net_device *dev, const char *name)
938 {
939 	char buf[IFNAMSIZ];
940 	struct net *net;
941 	int ret;
942 
943 	BUG_ON(!dev_net(dev));
944 	net = dev_net(dev);
945 	ret = __dev_alloc_name(net, name, buf);
946 	if (ret >= 0)
947 		strlcpy(dev->name, buf, IFNAMSIZ);
948 	return ret;
949 }
950 EXPORT_SYMBOL(dev_alloc_name);
951 
952 static int dev_get_valid_name(struct net_device *dev, const char *name, bool fmt)
953 {
954 	struct net *net;
955 
956 	BUG_ON(!dev_net(dev));
957 	net = dev_net(dev);
958 
959 	if (!dev_valid_name(name))
960 		return -EINVAL;
961 
962 	if (fmt && strchr(name, '%'))
963 		return dev_alloc_name(dev, name);
964 	else if (__dev_get_by_name(net, name))
965 		return -EEXIST;
966 	else if (dev->name != name)
967 		strlcpy(dev->name, name, IFNAMSIZ);
968 
969 	return 0;
970 }
971 
972 /**
973  *	dev_change_name - change name of a device
974  *	@dev: device
975  *	@newname: name (or format string) must be at least IFNAMSIZ
976  *
977  *	Change name of a device, can pass format strings "eth%d".
978  *	for wildcarding.
979  */
980 int dev_change_name(struct net_device *dev, const char *newname)
981 {
982 	char oldname[IFNAMSIZ];
983 	int err = 0;
984 	int ret;
985 	struct net *net;
986 
987 	ASSERT_RTNL();
988 	BUG_ON(!dev_net(dev));
989 
990 	net = dev_net(dev);
991 	if (dev->flags & IFF_UP)
992 		return -EBUSY;
993 
994 	if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
995 		return 0;
996 
997 	memcpy(oldname, dev->name, IFNAMSIZ);
998 
999 	err = dev_get_valid_name(dev, newname, 1);
1000 	if (err < 0)
1001 		return err;
1002 
1003 rollback:
1004 	ret = device_rename(&dev->dev, dev->name);
1005 	if (ret) {
1006 		memcpy(dev->name, oldname, IFNAMSIZ);
1007 		return ret;
1008 	}
1009 
1010 	write_lock_bh(&dev_base_lock);
1011 	hlist_del(&dev->name_hlist);
1012 	write_unlock_bh(&dev_base_lock);
1013 
1014 	synchronize_rcu();
1015 
1016 	write_lock_bh(&dev_base_lock);
1017 	hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
1018 	write_unlock_bh(&dev_base_lock);
1019 
1020 	ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
1021 	ret = notifier_to_errno(ret);
1022 
1023 	if (ret) {
1024 		/* err >= 0 after dev_alloc_name() or stores the first errno */
1025 		if (err >= 0) {
1026 			err = ret;
1027 			memcpy(dev->name, oldname, IFNAMSIZ);
1028 			goto rollback;
1029 		} else {
1030 			printk(KERN_ERR
1031 			       "%s: name change rollback failed: %d.\n",
1032 			       dev->name, ret);
1033 		}
1034 	}
1035 
1036 	return err;
1037 }
1038 
1039 /**
1040  *	dev_set_alias - change ifalias of a device
1041  *	@dev: device
1042  *	@alias: name up to IFALIASZ
1043  *	@len: limit of bytes to copy from info
1044  *
1045  *	Set ifalias for a device,
1046  */
1047 int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1048 {
1049 	ASSERT_RTNL();
1050 
1051 	if (len >= IFALIASZ)
1052 		return -EINVAL;
1053 
1054 	if (!len) {
1055 		if (dev->ifalias) {
1056 			kfree(dev->ifalias);
1057 			dev->ifalias = NULL;
1058 		}
1059 		return 0;
1060 	}
1061 
1062 	dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
1063 	if (!dev->ifalias)
1064 		return -ENOMEM;
1065 
1066 	strlcpy(dev->ifalias, alias, len+1);
1067 	return len;
1068 }
1069 
1070 
1071 /**
1072  *	netdev_features_change - device changes features
1073  *	@dev: device to cause notification
1074  *
1075  *	Called to indicate a device has changed features.
1076  */
1077 void netdev_features_change(struct net_device *dev)
1078 {
1079 	call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
1080 }
1081 EXPORT_SYMBOL(netdev_features_change);
1082 
1083 /**
1084  *	netdev_state_change - device changes state
1085  *	@dev: device to cause notification
1086  *
1087  *	Called to indicate a device has changed state. This function calls
1088  *	the notifier chains for netdev_chain and sends a NEWLINK message
1089  *	to the routing socket.
1090  */
1091 void netdev_state_change(struct net_device *dev)
1092 {
1093 	if (dev->flags & IFF_UP) {
1094 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
1095 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1096 	}
1097 }
1098 EXPORT_SYMBOL(netdev_state_change);
1099 
1100 int netdev_bonding_change(struct net_device *dev, unsigned long event)
1101 {
1102 	return call_netdevice_notifiers(event, dev);
1103 }
1104 EXPORT_SYMBOL(netdev_bonding_change);
1105 
1106 /**
1107  *	dev_load 	- load a network module
1108  *	@net: the applicable net namespace
1109  *	@name: name of interface
1110  *
1111  *	If a network interface is not present and the process has suitable
1112  *	privileges this function loads the module. If module loading is not
1113  *	available in this kernel then it becomes a nop.
1114  */
1115 
1116 void dev_load(struct net *net, const char *name)
1117 {
1118 	struct net_device *dev;
1119 
1120 	rcu_read_lock();
1121 	dev = dev_get_by_name_rcu(net, name);
1122 	rcu_read_unlock();
1123 
1124 	if (!dev && capable(CAP_NET_ADMIN))
1125 		request_module("%s", name);
1126 }
1127 EXPORT_SYMBOL(dev_load);
1128 
1129 static int __dev_open(struct net_device *dev)
1130 {
1131 	const struct net_device_ops *ops = dev->netdev_ops;
1132 	int ret;
1133 
1134 	ASSERT_RTNL();
1135 
1136 	/*
1137 	 *	Is it even present?
1138 	 */
1139 	if (!netif_device_present(dev))
1140 		return -ENODEV;
1141 
1142 	ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1143 	ret = notifier_to_errno(ret);
1144 	if (ret)
1145 		return ret;
1146 
1147 	/*
1148 	 *	Call device private open method
1149 	 */
1150 	set_bit(__LINK_STATE_START, &dev->state);
1151 
1152 	if (ops->ndo_validate_addr)
1153 		ret = ops->ndo_validate_addr(dev);
1154 
1155 	if (!ret && ops->ndo_open)
1156 		ret = ops->ndo_open(dev);
1157 
1158 	/*
1159 	 *	If it went open OK then:
1160 	 */
1161 
1162 	if (ret)
1163 		clear_bit(__LINK_STATE_START, &dev->state);
1164 	else {
1165 		/*
1166 		 *	Set the flags.
1167 		 */
1168 		dev->flags |= IFF_UP;
1169 
1170 		/*
1171 		 *	Enable NET_DMA
1172 		 */
1173 		net_dmaengine_get();
1174 
1175 		/*
1176 		 *	Initialize multicasting status
1177 		 */
1178 		dev_set_rx_mode(dev);
1179 
1180 		/*
1181 		 *	Wakeup transmit queue engine
1182 		 */
1183 		dev_activate(dev);
1184 	}
1185 
1186 	return ret;
1187 }
1188 
1189 /**
1190  *	dev_open	- prepare an interface for use.
1191  *	@dev:	device to open
1192  *
1193  *	Takes a device from down to up state. The device's private open
1194  *	function is invoked and then the multicast lists are loaded. Finally
1195  *	the device is moved into the up state and a %NETDEV_UP message is
1196  *	sent to the netdev notifier chain.
1197  *
1198  *	Calling this function on an active interface is a nop. On a failure
1199  *	a negative errno code is returned.
1200  */
1201 int dev_open(struct net_device *dev)
1202 {
1203 	int ret;
1204 
1205 	/*
1206 	 *	Is it already up?
1207 	 */
1208 	if (dev->flags & IFF_UP)
1209 		return 0;
1210 
1211 	/*
1212 	 *	Open device
1213 	 */
1214 	ret = __dev_open(dev);
1215 	if (ret < 0)
1216 		return ret;
1217 
1218 	/*
1219 	 *	... and announce new interface.
1220 	 */
1221 	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1222 	call_netdevice_notifiers(NETDEV_UP, dev);
1223 
1224 	return ret;
1225 }
1226 EXPORT_SYMBOL(dev_open);
1227 
1228 static int __dev_close(struct net_device *dev)
1229 {
1230 	const struct net_device_ops *ops = dev->netdev_ops;
1231 
1232 	ASSERT_RTNL();
1233 	might_sleep();
1234 
1235 	/*
1236 	 *	Tell people we are going down, so that they can
1237 	 *	prepare to death, when device is still operating.
1238 	 */
1239 	call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1240 
1241 	clear_bit(__LINK_STATE_START, &dev->state);
1242 
1243 	/* Synchronize to scheduled poll. We cannot touch poll list,
1244 	 * it can be even on different cpu. So just clear netif_running().
1245 	 *
1246 	 * dev->stop() will invoke napi_disable() on all of it's
1247 	 * napi_struct instances on this device.
1248 	 */
1249 	smp_mb__after_clear_bit(); /* Commit netif_running(). */
1250 
1251 	dev_deactivate(dev);
1252 
1253 	/*
1254 	 *	Call the device specific close. This cannot fail.
1255 	 *	Only if device is UP
1256 	 *
1257 	 *	We allow it to be called even after a DETACH hot-plug
1258 	 *	event.
1259 	 */
1260 	if (ops->ndo_stop)
1261 		ops->ndo_stop(dev);
1262 
1263 	/*
1264 	 *	Device is now down.
1265 	 */
1266 
1267 	dev->flags &= ~IFF_UP;
1268 
1269 	/*
1270 	 *	Shutdown NET_DMA
1271 	 */
1272 	net_dmaengine_put();
1273 
1274 	return 0;
1275 }
1276 
1277 /**
1278  *	dev_close - shutdown an interface.
1279  *	@dev: device to shutdown
1280  *
1281  *	This function moves an active device into down state. A
1282  *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1283  *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1284  *	chain.
1285  */
1286 int dev_close(struct net_device *dev)
1287 {
1288 	if (!(dev->flags & IFF_UP))
1289 		return 0;
1290 
1291 	__dev_close(dev);
1292 
1293 	/*
1294 	 * Tell people we are down
1295 	 */
1296 	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1297 	call_netdevice_notifiers(NETDEV_DOWN, dev);
1298 
1299 	return 0;
1300 }
1301 EXPORT_SYMBOL(dev_close);
1302 
1303 
1304 /**
1305  *	dev_disable_lro - disable Large Receive Offload on a device
1306  *	@dev: device
1307  *
1308  *	Disable Large Receive Offload (LRO) on a net device.  Must be
1309  *	called under RTNL.  This is needed if received packets may be
1310  *	forwarded to another interface.
1311  */
1312 void dev_disable_lro(struct net_device *dev)
1313 {
1314 	if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1315 	    dev->ethtool_ops->set_flags) {
1316 		u32 flags = dev->ethtool_ops->get_flags(dev);
1317 		if (flags & ETH_FLAG_LRO) {
1318 			flags &= ~ETH_FLAG_LRO;
1319 			dev->ethtool_ops->set_flags(dev, flags);
1320 		}
1321 	}
1322 	WARN_ON(dev->features & NETIF_F_LRO);
1323 }
1324 EXPORT_SYMBOL(dev_disable_lro);
1325 
1326 
1327 static int dev_boot_phase = 1;
1328 
1329 /*
1330  *	Device change register/unregister. These are not inline or static
1331  *	as we export them to the world.
1332  */
1333 
1334 /**
1335  *	register_netdevice_notifier - register a network notifier block
1336  *	@nb: notifier
1337  *
1338  *	Register a notifier to be called when network device events occur.
1339  *	The notifier passed is linked into the kernel structures and must
1340  *	not be reused until it has been unregistered. A negative errno code
1341  *	is returned on a failure.
1342  *
1343  * 	When registered all registration and up events are replayed
1344  *	to the new notifier to allow device to have a race free
1345  *	view of the network device list.
1346  */
1347 
1348 int register_netdevice_notifier(struct notifier_block *nb)
1349 {
1350 	struct net_device *dev;
1351 	struct net_device *last;
1352 	struct net *net;
1353 	int err;
1354 
1355 	rtnl_lock();
1356 	err = raw_notifier_chain_register(&netdev_chain, nb);
1357 	if (err)
1358 		goto unlock;
1359 	if (dev_boot_phase)
1360 		goto unlock;
1361 	for_each_net(net) {
1362 		for_each_netdev(net, dev) {
1363 			err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1364 			err = notifier_to_errno(err);
1365 			if (err)
1366 				goto rollback;
1367 
1368 			if (!(dev->flags & IFF_UP))
1369 				continue;
1370 
1371 			nb->notifier_call(nb, NETDEV_UP, dev);
1372 		}
1373 	}
1374 
1375 unlock:
1376 	rtnl_unlock();
1377 	return err;
1378 
1379 rollback:
1380 	last = dev;
1381 	for_each_net(net) {
1382 		for_each_netdev(net, dev) {
1383 			if (dev == last)
1384 				break;
1385 
1386 			if (dev->flags & IFF_UP) {
1387 				nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1388 				nb->notifier_call(nb, NETDEV_DOWN, dev);
1389 			}
1390 			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1391 			nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
1392 		}
1393 	}
1394 
1395 	raw_notifier_chain_unregister(&netdev_chain, nb);
1396 	goto unlock;
1397 }
1398 EXPORT_SYMBOL(register_netdevice_notifier);
1399 
1400 /**
1401  *	unregister_netdevice_notifier - unregister a network notifier block
1402  *	@nb: notifier
1403  *
1404  *	Unregister a notifier previously registered by
1405  *	register_netdevice_notifier(). The notifier is unlinked into the
1406  *	kernel structures and may then be reused. A negative errno code
1407  *	is returned on a failure.
1408  */
1409 
1410 int unregister_netdevice_notifier(struct notifier_block *nb)
1411 {
1412 	int err;
1413 
1414 	rtnl_lock();
1415 	err = raw_notifier_chain_unregister(&netdev_chain, nb);
1416 	rtnl_unlock();
1417 	return err;
1418 }
1419 EXPORT_SYMBOL(unregister_netdevice_notifier);
1420 
1421 /**
1422  *	call_netdevice_notifiers - call all network notifier blocks
1423  *      @val: value passed unmodified to notifier function
1424  *      @dev: net_device pointer passed unmodified to notifier function
1425  *
1426  *	Call all network notifier blocks.  Parameters and return value
1427  *	are as for raw_notifier_call_chain().
1428  */
1429 
1430 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1431 {
1432 	ASSERT_RTNL();
1433 	return raw_notifier_call_chain(&netdev_chain, val, dev);
1434 }
1435 
1436 /* When > 0 there are consumers of rx skb time stamps */
1437 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1438 
1439 void net_enable_timestamp(void)
1440 {
1441 	atomic_inc(&netstamp_needed);
1442 }
1443 EXPORT_SYMBOL(net_enable_timestamp);
1444 
1445 void net_disable_timestamp(void)
1446 {
1447 	atomic_dec(&netstamp_needed);
1448 }
1449 EXPORT_SYMBOL(net_disable_timestamp);
1450 
1451 static inline void net_timestamp_set(struct sk_buff *skb)
1452 {
1453 	if (atomic_read(&netstamp_needed))
1454 		__net_timestamp(skb);
1455 	else
1456 		skb->tstamp.tv64 = 0;
1457 }
1458 
1459 static inline void net_timestamp_check(struct sk_buff *skb)
1460 {
1461 	if (!skb->tstamp.tv64 && atomic_read(&netstamp_needed))
1462 		__net_timestamp(skb);
1463 }
1464 
1465 /**
1466  * dev_forward_skb - loopback an skb to another netif
1467  *
1468  * @dev: destination network device
1469  * @skb: buffer to forward
1470  *
1471  * return values:
1472  *	NET_RX_SUCCESS	(no congestion)
1473  *	NET_RX_DROP     (packet was dropped, but freed)
1474  *
1475  * dev_forward_skb can be used for injecting an skb from the
1476  * start_xmit function of one device into the receive queue
1477  * of another device.
1478  *
1479  * The receiving device may be in another namespace, so
1480  * we have to clear all information in the skb that could
1481  * impact namespace isolation.
1482  */
1483 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1484 {
1485 	skb_orphan(skb);
1486 	nf_reset(skb);
1487 
1488 	if (unlikely(!(dev->flags & IFF_UP) ||
1489 		     (skb->len > (dev->mtu + dev->hard_header_len + VLAN_HLEN)))) {
1490 		atomic_long_inc(&dev->rx_dropped);
1491 		kfree_skb(skb);
1492 		return NET_RX_DROP;
1493 	}
1494 	skb_set_dev(skb, dev);
1495 	skb->tstamp.tv64 = 0;
1496 	skb->pkt_type = PACKET_HOST;
1497 	skb->protocol = eth_type_trans(skb, dev);
1498 	return netif_rx(skb);
1499 }
1500 EXPORT_SYMBOL_GPL(dev_forward_skb);
1501 
1502 /*
1503  *	Support routine. Sends outgoing frames to any network
1504  *	taps currently in use.
1505  */
1506 
1507 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1508 {
1509 	struct packet_type *ptype;
1510 
1511 #ifdef CONFIG_NET_CLS_ACT
1512 	if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
1513 		net_timestamp_set(skb);
1514 #else
1515 	net_timestamp_set(skb);
1516 #endif
1517 
1518 	rcu_read_lock();
1519 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1520 		/* Never send packets back to the socket
1521 		 * they originated from - MvS (miquels@drinkel.ow.org)
1522 		 */
1523 		if ((ptype->dev == dev || !ptype->dev) &&
1524 		    (ptype->af_packet_priv == NULL ||
1525 		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
1526 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1527 			if (!skb2)
1528 				break;
1529 
1530 			/* skb->nh should be correctly
1531 			   set by sender, so that the second statement is
1532 			   just protection against buggy protocols.
1533 			 */
1534 			skb_reset_mac_header(skb2);
1535 
1536 			if (skb_network_header(skb2) < skb2->data ||
1537 			    skb2->network_header > skb2->tail) {
1538 				if (net_ratelimit())
1539 					printk(KERN_CRIT "protocol %04x is "
1540 					       "buggy, dev %s\n",
1541 					       ntohs(skb2->protocol),
1542 					       dev->name);
1543 				skb_reset_network_header(skb2);
1544 			}
1545 
1546 			skb2->transport_header = skb2->network_header;
1547 			skb2->pkt_type = PACKET_OUTGOING;
1548 			ptype->func(skb2, skb->dev, ptype, skb->dev);
1549 		}
1550 	}
1551 	rcu_read_unlock();
1552 }
1553 
1554 /*
1555  * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
1556  * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
1557  */
1558 int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
1559 {
1560 	if (txq < 1 || txq > dev->num_tx_queues)
1561 		return -EINVAL;
1562 
1563 	if (dev->reg_state == NETREG_REGISTERED) {
1564 		ASSERT_RTNL();
1565 
1566 		if (txq < dev->real_num_tx_queues)
1567 			qdisc_reset_all_tx_gt(dev, txq);
1568 	}
1569 
1570 	dev->real_num_tx_queues = txq;
1571 	return 0;
1572 }
1573 EXPORT_SYMBOL(netif_set_real_num_tx_queues);
1574 
1575 #ifdef CONFIG_RPS
1576 /**
1577  *	netif_set_real_num_rx_queues - set actual number of RX queues used
1578  *	@dev: Network device
1579  *	@rxq: Actual number of RX queues
1580  *
1581  *	This must be called either with the rtnl_lock held or before
1582  *	registration of the net device.  Returns 0 on success, or a
1583  *	negative error code.  If called before registration, it always
1584  *	succeeds.
1585  */
1586 int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
1587 {
1588 	int rc;
1589 
1590 	if (rxq < 1 || rxq > dev->num_rx_queues)
1591 		return -EINVAL;
1592 
1593 	if (dev->reg_state == NETREG_REGISTERED) {
1594 		ASSERT_RTNL();
1595 
1596 		rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
1597 						  rxq);
1598 		if (rc)
1599 			return rc;
1600 	}
1601 
1602 	dev->real_num_rx_queues = rxq;
1603 	return 0;
1604 }
1605 EXPORT_SYMBOL(netif_set_real_num_rx_queues);
1606 #endif
1607 
1608 static inline void __netif_reschedule(struct Qdisc *q)
1609 {
1610 	struct softnet_data *sd;
1611 	unsigned long flags;
1612 
1613 	local_irq_save(flags);
1614 	sd = &__get_cpu_var(softnet_data);
1615 	q->next_sched = NULL;
1616 	*sd->output_queue_tailp = q;
1617 	sd->output_queue_tailp = &q->next_sched;
1618 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
1619 	local_irq_restore(flags);
1620 }
1621 
1622 void __netif_schedule(struct Qdisc *q)
1623 {
1624 	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1625 		__netif_reschedule(q);
1626 }
1627 EXPORT_SYMBOL(__netif_schedule);
1628 
1629 void dev_kfree_skb_irq(struct sk_buff *skb)
1630 {
1631 	if (atomic_dec_and_test(&skb->users)) {
1632 		struct softnet_data *sd;
1633 		unsigned long flags;
1634 
1635 		local_irq_save(flags);
1636 		sd = &__get_cpu_var(softnet_data);
1637 		skb->next = sd->completion_queue;
1638 		sd->completion_queue = skb;
1639 		raise_softirq_irqoff(NET_TX_SOFTIRQ);
1640 		local_irq_restore(flags);
1641 	}
1642 }
1643 EXPORT_SYMBOL(dev_kfree_skb_irq);
1644 
1645 void dev_kfree_skb_any(struct sk_buff *skb)
1646 {
1647 	if (in_irq() || irqs_disabled())
1648 		dev_kfree_skb_irq(skb);
1649 	else
1650 		dev_kfree_skb(skb);
1651 }
1652 EXPORT_SYMBOL(dev_kfree_skb_any);
1653 
1654 
1655 /**
1656  * netif_device_detach - mark device as removed
1657  * @dev: network device
1658  *
1659  * Mark device as removed from system and therefore no longer available.
1660  */
1661 void netif_device_detach(struct net_device *dev)
1662 {
1663 	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1664 	    netif_running(dev)) {
1665 		netif_tx_stop_all_queues(dev);
1666 	}
1667 }
1668 EXPORT_SYMBOL(netif_device_detach);
1669 
1670 /**
1671  * netif_device_attach - mark device as attached
1672  * @dev: network device
1673  *
1674  * Mark device as attached from system and restart if needed.
1675  */
1676 void netif_device_attach(struct net_device *dev)
1677 {
1678 	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1679 	    netif_running(dev)) {
1680 		netif_tx_wake_all_queues(dev);
1681 		__netdev_watchdog_up(dev);
1682 	}
1683 }
1684 EXPORT_SYMBOL(netif_device_attach);
1685 
1686 static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1687 {
1688 	return ((features & NETIF_F_GEN_CSUM) ||
1689 		((features & NETIF_F_IP_CSUM) &&
1690 		 protocol == htons(ETH_P_IP)) ||
1691 		((features & NETIF_F_IPV6_CSUM) &&
1692 		 protocol == htons(ETH_P_IPV6)) ||
1693 		((features & NETIF_F_FCOE_CRC) &&
1694 		 protocol == htons(ETH_P_FCOE)));
1695 }
1696 
1697 static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1698 {
1699 	int features = dev->features;
1700 
1701 	if (vlan_tx_tag_present(skb))
1702 		features &= dev->vlan_features;
1703 
1704 	if (can_checksum_protocol(features, skb->protocol))
1705 		return true;
1706 
1707 	if (skb->protocol == htons(ETH_P_8021Q)) {
1708 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1709 		if (can_checksum_protocol(dev->features & dev->vlan_features,
1710 					  veh->h_vlan_encapsulated_proto))
1711 			return true;
1712 	}
1713 
1714 	return false;
1715 }
1716 
1717 /**
1718  * skb_dev_set -- assign a new device to a buffer
1719  * @skb: buffer for the new device
1720  * @dev: network device
1721  *
1722  * If an skb is owned by a device already, we have to reset
1723  * all data private to the namespace a device belongs to
1724  * before assigning it a new device.
1725  */
1726 #ifdef CONFIG_NET_NS
1727 void skb_set_dev(struct sk_buff *skb, struct net_device *dev)
1728 {
1729 	skb_dst_drop(skb);
1730 	if (skb->dev && !net_eq(dev_net(skb->dev), dev_net(dev))) {
1731 		secpath_reset(skb);
1732 		nf_reset(skb);
1733 		skb_init_secmark(skb);
1734 		skb->mark = 0;
1735 		skb->priority = 0;
1736 		skb->nf_trace = 0;
1737 		skb->ipvs_property = 0;
1738 #ifdef CONFIG_NET_SCHED
1739 		skb->tc_index = 0;
1740 #endif
1741 	}
1742 	skb->dev = dev;
1743 }
1744 EXPORT_SYMBOL(skb_set_dev);
1745 #endif /* CONFIG_NET_NS */
1746 
1747 /*
1748  * Invalidate hardware checksum when packet is to be mangled, and
1749  * complete checksum manually on outgoing path.
1750  */
1751 int skb_checksum_help(struct sk_buff *skb)
1752 {
1753 	__wsum csum;
1754 	int ret = 0, offset;
1755 
1756 	if (skb->ip_summed == CHECKSUM_COMPLETE)
1757 		goto out_set_summed;
1758 
1759 	if (unlikely(skb_shinfo(skb)->gso_size)) {
1760 		/* Let GSO fix up the checksum. */
1761 		goto out_set_summed;
1762 	}
1763 
1764 	offset = skb->csum_start - skb_headroom(skb);
1765 	BUG_ON(offset >= skb_headlen(skb));
1766 	csum = skb_checksum(skb, offset, skb->len - offset, 0);
1767 
1768 	offset += skb->csum_offset;
1769 	BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1770 
1771 	if (skb_cloned(skb) &&
1772 	    !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1773 		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1774 		if (ret)
1775 			goto out;
1776 	}
1777 
1778 	*(__sum16 *)(skb->data + offset) = csum_fold(csum);
1779 out_set_summed:
1780 	skb->ip_summed = CHECKSUM_NONE;
1781 out:
1782 	return ret;
1783 }
1784 EXPORT_SYMBOL(skb_checksum_help);
1785 
1786 /**
1787  *	skb_gso_segment - Perform segmentation on skb.
1788  *	@skb: buffer to segment
1789  *	@features: features for the output path (see dev->features)
1790  *
1791  *	This function segments the given skb and returns a list of segments.
1792  *
1793  *	It may return NULL if the skb requires no segmentation.  This is
1794  *	only possible when GSO is used for verifying header integrity.
1795  */
1796 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1797 {
1798 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1799 	struct packet_type *ptype;
1800 	__be16 type = skb->protocol;
1801 	int err;
1802 
1803 	if (type == htons(ETH_P_8021Q)) {
1804 		struct vlan_ethhdr *veh;
1805 
1806 		if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
1807 			return ERR_PTR(-EINVAL);
1808 
1809 		veh = (struct vlan_ethhdr *)skb->data;
1810 		type = veh->h_vlan_encapsulated_proto;
1811 	}
1812 
1813 	skb_reset_mac_header(skb);
1814 	skb->mac_len = skb->network_header - skb->mac_header;
1815 	__skb_pull(skb, skb->mac_len);
1816 
1817 	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1818 		struct net_device *dev = skb->dev;
1819 		struct ethtool_drvinfo info = {};
1820 
1821 		if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
1822 			dev->ethtool_ops->get_drvinfo(dev, &info);
1823 
1824 		WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d "
1825 			"ip_summed=%d",
1826 		     info.driver, dev ? dev->features : 0L,
1827 		     skb->sk ? skb->sk->sk_route_caps : 0L,
1828 		     skb->len, skb->data_len, skb->ip_summed);
1829 
1830 		if (skb_header_cloned(skb) &&
1831 		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1832 			return ERR_PTR(err);
1833 	}
1834 
1835 	rcu_read_lock();
1836 	list_for_each_entry_rcu(ptype,
1837 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
1838 		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1839 			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1840 				err = ptype->gso_send_check(skb);
1841 				segs = ERR_PTR(err);
1842 				if (err || skb_gso_ok(skb, features))
1843 					break;
1844 				__skb_push(skb, (skb->data -
1845 						 skb_network_header(skb)));
1846 			}
1847 			segs = ptype->gso_segment(skb, features);
1848 			break;
1849 		}
1850 	}
1851 	rcu_read_unlock();
1852 
1853 	__skb_push(skb, skb->data - skb_mac_header(skb));
1854 
1855 	return segs;
1856 }
1857 EXPORT_SYMBOL(skb_gso_segment);
1858 
1859 /* Take action when hardware reception checksum errors are detected. */
1860 #ifdef CONFIG_BUG
1861 void netdev_rx_csum_fault(struct net_device *dev)
1862 {
1863 	if (net_ratelimit()) {
1864 		printk(KERN_ERR "%s: hw csum failure.\n",
1865 			dev ? dev->name : "<unknown>");
1866 		dump_stack();
1867 	}
1868 }
1869 EXPORT_SYMBOL(netdev_rx_csum_fault);
1870 #endif
1871 
1872 /* Actually, we should eliminate this check as soon as we know, that:
1873  * 1. IOMMU is present and allows to map all the memory.
1874  * 2. No high memory really exists on this machine.
1875  */
1876 
1877 static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1878 {
1879 #ifdef CONFIG_HIGHMEM
1880 	int i;
1881 	if (!(dev->features & NETIF_F_HIGHDMA)) {
1882 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1883 			if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1884 				return 1;
1885 	}
1886 
1887 	if (PCI_DMA_BUS_IS_PHYS) {
1888 		struct device *pdev = dev->dev.parent;
1889 
1890 		if (!pdev)
1891 			return 0;
1892 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1893 			dma_addr_t addr = page_to_phys(skb_shinfo(skb)->frags[i].page);
1894 			if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
1895 				return 1;
1896 		}
1897 	}
1898 #endif
1899 	return 0;
1900 }
1901 
1902 struct dev_gso_cb {
1903 	void (*destructor)(struct sk_buff *skb);
1904 };
1905 
1906 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1907 
1908 static void dev_gso_skb_destructor(struct sk_buff *skb)
1909 {
1910 	struct dev_gso_cb *cb;
1911 
1912 	do {
1913 		struct sk_buff *nskb = skb->next;
1914 
1915 		skb->next = nskb->next;
1916 		nskb->next = NULL;
1917 		kfree_skb(nskb);
1918 	} while (skb->next);
1919 
1920 	cb = DEV_GSO_CB(skb);
1921 	if (cb->destructor)
1922 		cb->destructor(skb);
1923 }
1924 
1925 /**
1926  *	dev_gso_segment - Perform emulated hardware segmentation on skb.
1927  *	@skb: buffer to segment
1928  *
1929  *	This function segments the given skb and stores the list of segments
1930  *	in skb->next.
1931  */
1932 static int dev_gso_segment(struct sk_buff *skb)
1933 {
1934 	struct net_device *dev = skb->dev;
1935 	struct sk_buff *segs;
1936 	int features = dev->features & ~(illegal_highdma(dev, skb) ?
1937 					 NETIF_F_SG : 0);
1938 
1939 	segs = skb_gso_segment(skb, features);
1940 
1941 	/* Verifying header integrity only. */
1942 	if (!segs)
1943 		return 0;
1944 
1945 	if (IS_ERR(segs))
1946 		return PTR_ERR(segs);
1947 
1948 	skb->next = segs;
1949 	DEV_GSO_CB(skb)->destructor = skb->destructor;
1950 	skb->destructor = dev_gso_skb_destructor;
1951 
1952 	return 0;
1953 }
1954 
1955 /*
1956  * Try to orphan skb early, right before transmission by the device.
1957  * We cannot orphan skb if tx timestamp is requested or the sk-reference
1958  * is needed on driver level for other reasons, e.g. see net/can/raw.c
1959  */
1960 static inline void skb_orphan_try(struct sk_buff *skb)
1961 {
1962 	struct sock *sk = skb->sk;
1963 
1964 	if (sk && !skb_shinfo(skb)->tx_flags) {
1965 		/* skb_tx_hash() wont be able to get sk.
1966 		 * We copy sk_hash into skb->rxhash
1967 		 */
1968 		if (!skb->rxhash)
1969 			skb->rxhash = sk->sk_hash;
1970 		skb_orphan(skb);
1971 	}
1972 }
1973 
1974 /*
1975  * Returns true if either:
1976  *	1. skb has frag_list and the device doesn't support FRAGLIST, or
1977  *	2. skb is fragmented and the device does not support SG, or if
1978  *	   at least one of fragments is in highmem and device does not
1979  *	   support DMA from it.
1980  */
1981 static inline int skb_needs_linearize(struct sk_buff *skb,
1982 				      struct net_device *dev)
1983 {
1984 	int features = dev->features;
1985 
1986 	if (skb->protocol == htons(ETH_P_8021Q) || vlan_tx_tag_present(skb))
1987 		features &= dev->vlan_features;
1988 
1989 	return skb_is_nonlinear(skb) &&
1990 	       ((skb_has_frag_list(skb) && !(features & NETIF_F_FRAGLIST)) ||
1991 		(skb_shinfo(skb)->nr_frags && (!(features & NETIF_F_SG) ||
1992 					      illegal_highdma(dev, skb))));
1993 }
1994 
1995 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1996 			struct netdev_queue *txq)
1997 {
1998 	const struct net_device_ops *ops = dev->netdev_ops;
1999 	int rc = NETDEV_TX_OK;
2000 
2001 	if (likely(!skb->next)) {
2002 		if (!list_empty(&ptype_all))
2003 			dev_queue_xmit_nit(skb, dev);
2004 
2005 		/*
2006 		 * If device doesnt need skb->dst, release it right now while
2007 		 * its hot in this cpu cache
2008 		 */
2009 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
2010 			skb_dst_drop(skb);
2011 
2012 		skb_orphan_try(skb);
2013 
2014 		if (vlan_tx_tag_present(skb) &&
2015 		    !(dev->features & NETIF_F_HW_VLAN_TX)) {
2016 			skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
2017 			if (unlikely(!skb))
2018 				goto out;
2019 
2020 			skb->vlan_tci = 0;
2021 		}
2022 
2023 		if (netif_needs_gso(dev, skb)) {
2024 			if (unlikely(dev_gso_segment(skb)))
2025 				goto out_kfree_skb;
2026 			if (skb->next)
2027 				goto gso;
2028 		} else {
2029 			if (skb_needs_linearize(skb, dev) &&
2030 			    __skb_linearize(skb))
2031 				goto out_kfree_skb;
2032 
2033 			/* If packet is not checksummed and device does not
2034 			 * support checksumming for this protocol, complete
2035 			 * checksumming here.
2036 			 */
2037 			if (skb->ip_summed == CHECKSUM_PARTIAL) {
2038 				skb_set_transport_header(skb, skb->csum_start -
2039 					      skb_headroom(skb));
2040 				if (!dev_can_checksum(dev, skb) &&
2041 				     skb_checksum_help(skb))
2042 					goto out_kfree_skb;
2043 			}
2044 		}
2045 
2046 		rc = ops->ndo_start_xmit(skb, dev);
2047 		trace_net_dev_xmit(skb, rc);
2048 		if (rc == NETDEV_TX_OK)
2049 			txq_trans_update(txq);
2050 		return rc;
2051 	}
2052 
2053 gso:
2054 	do {
2055 		struct sk_buff *nskb = skb->next;
2056 
2057 		skb->next = nskb->next;
2058 		nskb->next = NULL;
2059 
2060 		/*
2061 		 * If device doesnt need nskb->dst, release it right now while
2062 		 * its hot in this cpu cache
2063 		 */
2064 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
2065 			skb_dst_drop(nskb);
2066 
2067 		rc = ops->ndo_start_xmit(nskb, dev);
2068 		trace_net_dev_xmit(nskb, rc);
2069 		if (unlikely(rc != NETDEV_TX_OK)) {
2070 			if (rc & ~NETDEV_TX_MASK)
2071 				goto out_kfree_gso_skb;
2072 			nskb->next = skb->next;
2073 			skb->next = nskb;
2074 			return rc;
2075 		}
2076 		txq_trans_update(txq);
2077 		if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
2078 			return NETDEV_TX_BUSY;
2079 	} while (skb->next);
2080 
2081 out_kfree_gso_skb:
2082 	if (likely(skb->next == NULL))
2083 		skb->destructor = DEV_GSO_CB(skb)->destructor;
2084 out_kfree_skb:
2085 	kfree_skb(skb);
2086 out:
2087 	return rc;
2088 }
2089 
2090 static u32 hashrnd __read_mostly;
2091 
2092 u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
2093 {
2094 	u32 hash;
2095 
2096 	if (skb_rx_queue_recorded(skb)) {
2097 		hash = skb_get_rx_queue(skb);
2098 		while (unlikely(hash >= dev->real_num_tx_queues))
2099 			hash -= dev->real_num_tx_queues;
2100 		return hash;
2101 	}
2102 
2103 	if (skb->sk && skb->sk->sk_hash)
2104 		hash = skb->sk->sk_hash;
2105 	else
2106 		hash = (__force u16) skb->protocol ^ skb->rxhash;
2107 	hash = jhash_1word(hash, hashrnd);
2108 
2109 	return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
2110 }
2111 EXPORT_SYMBOL(skb_tx_hash);
2112 
2113 static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
2114 {
2115 	if (unlikely(queue_index >= dev->real_num_tx_queues)) {
2116 		if (net_ratelimit()) {
2117 			pr_warning("%s selects TX queue %d, but "
2118 				"real number of TX queues is %d\n",
2119 				dev->name, queue_index, dev->real_num_tx_queues);
2120 		}
2121 		return 0;
2122 	}
2123 	return queue_index;
2124 }
2125 
2126 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
2127 					struct sk_buff *skb)
2128 {
2129 	int queue_index;
2130 	const struct net_device_ops *ops = dev->netdev_ops;
2131 
2132 	if (ops->ndo_select_queue) {
2133 		queue_index = ops->ndo_select_queue(dev, skb);
2134 		queue_index = dev_cap_txqueue(dev, queue_index);
2135 	} else {
2136 		struct sock *sk = skb->sk;
2137 		queue_index = sk_tx_queue_get(sk);
2138 		if (queue_index < 0) {
2139 
2140 			queue_index = 0;
2141 			if (dev->real_num_tx_queues > 1)
2142 				queue_index = skb_tx_hash(dev, skb);
2143 
2144 			if (sk) {
2145 				struct dst_entry *dst = rcu_dereference_check(sk->sk_dst_cache, 1);
2146 
2147 				if (dst && skb_dst(skb) == dst)
2148 					sk_tx_queue_set(sk, queue_index);
2149 			}
2150 		}
2151 	}
2152 
2153 	skb_set_queue_mapping(skb, queue_index);
2154 	return netdev_get_tx_queue(dev, queue_index);
2155 }
2156 
2157 static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2158 				 struct net_device *dev,
2159 				 struct netdev_queue *txq)
2160 {
2161 	spinlock_t *root_lock = qdisc_lock(q);
2162 	bool contended = qdisc_is_running(q);
2163 	int rc;
2164 
2165 	/*
2166 	 * Heuristic to force contended enqueues to serialize on a
2167 	 * separate lock before trying to get qdisc main lock.
2168 	 * This permits __QDISC_STATE_RUNNING owner to get the lock more often
2169 	 * and dequeue packets faster.
2170 	 */
2171 	if (unlikely(contended))
2172 		spin_lock(&q->busylock);
2173 
2174 	spin_lock(root_lock);
2175 	if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
2176 		kfree_skb(skb);
2177 		rc = NET_XMIT_DROP;
2178 	} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
2179 		   qdisc_run_begin(q)) {
2180 		/*
2181 		 * This is a work-conserving queue; there are no old skbs
2182 		 * waiting to be sent out; and the qdisc is not running -
2183 		 * xmit the skb directly.
2184 		 */
2185 		if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
2186 			skb_dst_force(skb);
2187 		__qdisc_update_bstats(q, skb->len);
2188 		if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
2189 			if (unlikely(contended)) {
2190 				spin_unlock(&q->busylock);
2191 				contended = false;
2192 			}
2193 			__qdisc_run(q);
2194 		} else
2195 			qdisc_run_end(q);
2196 
2197 		rc = NET_XMIT_SUCCESS;
2198 	} else {
2199 		skb_dst_force(skb);
2200 		rc = qdisc_enqueue_root(skb, q);
2201 		if (qdisc_run_begin(q)) {
2202 			if (unlikely(contended)) {
2203 				spin_unlock(&q->busylock);
2204 				contended = false;
2205 			}
2206 			__qdisc_run(q);
2207 		}
2208 	}
2209 	spin_unlock(root_lock);
2210 	if (unlikely(contended))
2211 		spin_unlock(&q->busylock);
2212 	return rc;
2213 }
2214 
2215 static DEFINE_PER_CPU(int, xmit_recursion);
2216 #define RECURSION_LIMIT 3
2217 
2218 /**
2219  *	dev_queue_xmit - transmit a buffer
2220  *	@skb: buffer to transmit
2221  *
2222  *	Queue a buffer for transmission to a network device. The caller must
2223  *	have set the device and priority and built the buffer before calling
2224  *	this function. The function can be called from an interrupt.
2225  *
2226  *	A negative errno code is returned on a failure. A success does not
2227  *	guarantee the frame will be transmitted as it may be dropped due
2228  *	to congestion or traffic shaping.
2229  *
2230  * -----------------------------------------------------------------------------------
2231  *      I notice this method can also return errors from the queue disciplines,
2232  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
2233  *      be positive.
2234  *
2235  *      Regardless of the return value, the skb is consumed, so it is currently
2236  *      difficult to retry a send to this method.  (You can bump the ref count
2237  *      before sending to hold a reference for retry if you are careful.)
2238  *
2239  *      When calling this method, interrupts MUST be enabled.  This is because
2240  *      the BH enable code must have IRQs enabled so that it will not deadlock.
2241  *          --BLG
2242  */
2243 int dev_queue_xmit(struct sk_buff *skb)
2244 {
2245 	struct net_device *dev = skb->dev;
2246 	struct netdev_queue *txq;
2247 	struct Qdisc *q;
2248 	int rc = -ENOMEM;
2249 
2250 	/* Disable soft irqs for various locks below. Also
2251 	 * stops preemption for RCU.
2252 	 */
2253 	rcu_read_lock_bh();
2254 
2255 	txq = dev_pick_tx(dev, skb);
2256 	q = rcu_dereference_bh(txq->qdisc);
2257 
2258 #ifdef CONFIG_NET_CLS_ACT
2259 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
2260 #endif
2261 	trace_net_dev_queue(skb);
2262 	if (q->enqueue) {
2263 		rc = __dev_xmit_skb(skb, q, dev, txq);
2264 		goto out;
2265 	}
2266 
2267 	/* The device has no queue. Common case for software devices:
2268 	   loopback, all the sorts of tunnels...
2269 
2270 	   Really, it is unlikely that netif_tx_lock protection is necessary
2271 	   here.  (f.e. loopback and IP tunnels are clean ignoring statistics
2272 	   counters.)
2273 	   However, it is possible, that they rely on protection
2274 	   made by us here.
2275 
2276 	   Check this and shot the lock. It is not prone from deadlocks.
2277 	   Either shot noqueue qdisc, it is even simpler 8)
2278 	 */
2279 	if (dev->flags & IFF_UP) {
2280 		int cpu = smp_processor_id(); /* ok because BHs are off */
2281 
2282 		if (txq->xmit_lock_owner != cpu) {
2283 
2284 			if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
2285 				goto recursion_alert;
2286 
2287 			HARD_TX_LOCK(dev, txq, cpu);
2288 
2289 			if (!netif_tx_queue_stopped(txq)) {
2290 				__this_cpu_inc(xmit_recursion);
2291 				rc = dev_hard_start_xmit(skb, dev, txq);
2292 				__this_cpu_dec(xmit_recursion);
2293 				if (dev_xmit_complete(rc)) {
2294 					HARD_TX_UNLOCK(dev, txq);
2295 					goto out;
2296 				}
2297 			}
2298 			HARD_TX_UNLOCK(dev, txq);
2299 			if (net_ratelimit())
2300 				printk(KERN_CRIT "Virtual device %s asks to "
2301 				       "queue packet!\n", dev->name);
2302 		} else {
2303 			/* Recursion is detected! It is possible,
2304 			 * unfortunately
2305 			 */
2306 recursion_alert:
2307 			if (net_ratelimit())
2308 				printk(KERN_CRIT "Dead loop on virtual device "
2309 				       "%s, fix it urgently!\n", dev->name);
2310 		}
2311 	}
2312 
2313 	rc = -ENETDOWN;
2314 	rcu_read_unlock_bh();
2315 
2316 	kfree_skb(skb);
2317 	return rc;
2318 out:
2319 	rcu_read_unlock_bh();
2320 	return rc;
2321 }
2322 EXPORT_SYMBOL(dev_queue_xmit);
2323 
2324 
2325 /*=======================================================================
2326 			Receiver routines
2327   =======================================================================*/
2328 
2329 int netdev_max_backlog __read_mostly = 1000;
2330 int netdev_tstamp_prequeue __read_mostly = 1;
2331 int netdev_budget __read_mostly = 300;
2332 int weight_p __read_mostly = 64;            /* old backlog weight */
2333 
2334 /* Called with irq disabled */
2335 static inline void ____napi_schedule(struct softnet_data *sd,
2336 				     struct napi_struct *napi)
2337 {
2338 	list_add_tail(&napi->poll_list, &sd->poll_list);
2339 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2340 }
2341 
2342 /*
2343  * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
2344  * and src/dst port numbers. Returns a non-zero hash number on success
2345  * and 0 on failure.
2346  */
2347 __u32 __skb_get_rxhash(struct sk_buff *skb)
2348 {
2349 	int nhoff, hash = 0, poff;
2350 	struct ipv6hdr *ip6;
2351 	struct iphdr *ip;
2352 	u8 ip_proto;
2353 	u32 addr1, addr2, ihl;
2354 	union {
2355 		u32 v32;
2356 		u16 v16[2];
2357 	} ports;
2358 
2359 	nhoff = skb_network_offset(skb);
2360 
2361 	switch (skb->protocol) {
2362 	case __constant_htons(ETH_P_IP):
2363 		if (!pskb_may_pull(skb, sizeof(*ip) + nhoff))
2364 			goto done;
2365 
2366 		ip = (struct iphdr *) (skb->data + nhoff);
2367 		if (ip->frag_off & htons(IP_MF | IP_OFFSET))
2368 			ip_proto = 0;
2369 		else
2370 			ip_proto = ip->protocol;
2371 		addr1 = (__force u32) ip->saddr;
2372 		addr2 = (__force u32) ip->daddr;
2373 		ihl = ip->ihl;
2374 		break;
2375 	case __constant_htons(ETH_P_IPV6):
2376 		if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff))
2377 			goto done;
2378 
2379 		ip6 = (struct ipv6hdr *) (skb->data + nhoff);
2380 		ip_proto = ip6->nexthdr;
2381 		addr1 = (__force u32) ip6->saddr.s6_addr32[3];
2382 		addr2 = (__force u32) ip6->daddr.s6_addr32[3];
2383 		ihl = (40 >> 2);
2384 		break;
2385 	default:
2386 		goto done;
2387 	}
2388 
2389 	ports.v32 = 0;
2390 	poff = proto_ports_offset(ip_proto);
2391 	if (poff >= 0) {
2392 		nhoff += ihl * 4 + poff;
2393 		if (pskb_may_pull(skb, nhoff + 4)) {
2394 			ports.v32 = * (__force u32 *) (skb->data + nhoff);
2395 			if (ports.v16[1] < ports.v16[0])
2396 				swap(ports.v16[0], ports.v16[1]);
2397 		}
2398 	}
2399 
2400 	/* get a consistent hash (same value on both flow directions) */
2401 	if (addr2 < addr1)
2402 		swap(addr1, addr2);
2403 
2404 	hash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
2405 	if (!hash)
2406 		hash = 1;
2407 
2408 done:
2409 	return hash;
2410 }
2411 EXPORT_SYMBOL(__skb_get_rxhash);
2412 
2413 #ifdef CONFIG_RPS
2414 
2415 /* One global table that all flow-based protocols share. */
2416 struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
2417 EXPORT_SYMBOL(rps_sock_flow_table);
2418 
2419 /*
2420  * get_rps_cpu is called from netif_receive_skb and returns the target
2421  * CPU from the RPS map of the receiving queue for a given skb.
2422  * rcu_read_lock must be held on entry.
2423  */
2424 static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2425 		       struct rps_dev_flow **rflowp)
2426 {
2427 	struct netdev_rx_queue *rxqueue;
2428 	struct rps_map *map = NULL;
2429 	struct rps_dev_flow_table *flow_table;
2430 	struct rps_sock_flow_table *sock_flow_table;
2431 	int cpu = -1;
2432 	u16 tcpu;
2433 
2434 	if (skb_rx_queue_recorded(skb)) {
2435 		u16 index = skb_get_rx_queue(skb);
2436 		if (unlikely(index >= dev->real_num_rx_queues)) {
2437 			WARN_ONCE(dev->real_num_rx_queues > 1,
2438 				  "%s received packet on queue %u, but number "
2439 				  "of RX queues is %u\n",
2440 				  dev->name, index, dev->real_num_rx_queues);
2441 			goto done;
2442 		}
2443 		rxqueue = dev->_rx + index;
2444 	} else
2445 		rxqueue = dev->_rx;
2446 
2447 	if (rxqueue->rps_map) {
2448 		map = rcu_dereference(rxqueue->rps_map);
2449 		if (map && map->len == 1) {
2450 			tcpu = map->cpus[0];
2451 			if (cpu_online(tcpu))
2452 				cpu = tcpu;
2453 			goto done;
2454 		}
2455 	} else if (!rxqueue->rps_flow_table) {
2456 		goto done;
2457 	}
2458 
2459 	skb_reset_network_header(skb);
2460 	if (!skb_get_rxhash(skb))
2461 		goto done;
2462 
2463 	flow_table = rcu_dereference(rxqueue->rps_flow_table);
2464 	sock_flow_table = rcu_dereference(rps_sock_flow_table);
2465 	if (flow_table && sock_flow_table) {
2466 		u16 next_cpu;
2467 		struct rps_dev_flow *rflow;
2468 
2469 		rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
2470 		tcpu = rflow->cpu;
2471 
2472 		next_cpu = sock_flow_table->ents[skb->rxhash &
2473 		    sock_flow_table->mask];
2474 
2475 		/*
2476 		 * If the desired CPU (where last recvmsg was done) is
2477 		 * different from current CPU (one in the rx-queue flow
2478 		 * table entry), switch if one of the following holds:
2479 		 *   - Current CPU is unset (equal to RPS_NO_CPU).
2480 		 *   - Current CPU is offline.
2481 		 *   - The current CPU's queue tail has advanced beyond the
2482 		 *     last packet that was enqueued using this table entry.
2483 		 *     This guarantees that all previous packets for the flow
2484 		 *     have been dequeued, thus preserving in order delivery.
2485 		 */
2486 		if (unlikely(tcpu != next_cpu) &&
2487 		    (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
2488 		     ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
2489 		      rflow->last_qtail)) >= 0)) {
2490 			tcpu = rflow->cpu = next_cpu;
2491 			if (tcpu != RPS_NO_CPU)
2492 				rflow->last_qtail = per_cpu(softnet_data,
2493 				    tcpu).input_queue_head;
2494 		}
2495 		if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
2496 			*rflowp = rflow;
2497 			cpu = tcpu;
2498 			goto done;
2499 		}
2500 	}
2501 
2502 	if (map) {
2503 		tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
2504 
2505 		if (cpu_online(tcpu)) {
2506 			cpu = tcpu;
2507 			goto done;
2508 		}
2509 	}
2510 
2511 done:
2512 	return cpu;
2513 }
2514 
2515 /* Called from hardirq (IPI) context */
2516 static void rps_trigger_softirq(void *data)
2517 {
2518 	struct softnet_data *sd = data;
2519 
2520 	____napi_schedule(sd, &sd->backlog);
2521 	sd->received_rps++;
2522 }
2523 
2524 #endif /* CONFIG_RPS */
2525 
2526 /*
2527  * Check if this softnet_data structure is another cpu one
2528  * If yes, queue it to our IPI list and return 1
2529  * If no, return 0
2530  */
2531 static int rps_ipi_queued(struct softnet_data *sd)
2532 {
2533 #ifdef CONFIG_RPS
2534 	struct softnet_data *mysd = &__get_cpu_var(softnet_data);
2535 
2536 	if (sd != mysd) {
2537 		sd->rps_ipi_next = mysd->rps_ipi_list;
2538 		mysd->rps_ipi_list = sd;
2539 
2540 		__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2541 		return 1;
2542 	}
2543 #endif /* CONFIG_RPS */
2544 	return 0;
2545 }
2546 
2547 /*
2548  * enqueue_to_backlog is called to queue an skb to a per CPU backlog
2549  * queue (may be a remote CPU queue).
2550  */
2551 static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
2552 			      unsigned int *qtail)
2553 {
2554 	struct softnet_data *sd;
2555 	unsigned long flags;
2556 
2557 	sd = &per_cpu(softnet_data, cpu);
2558 
2559 	local_irq_save(flags);
2560 
2561 	rps_lock(sd);
2562 	if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) {
2563 		if (skb_queue_len(&sd->input_pkt_queue)) {
2564 enqueue:
2565 			__skb_queue_tail(&sd->input_pkt_queue, skb);
2566 			input_queue_tail_incr_save(sd, qtail);
2567 			rps_unlock(sd);
2568 			local_irq_restore(flags);
2569 			return NET_RX_SUCCESS;
2570 		}
2571 
2572 		/* Schedule NAPI for backlog device
2573 		 * We can use non atomic operation since we own the queue lock
2574 		 */
2575 		if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
2576 			if (!rps_ipi_queued(sd))
2577 				____napi_schedule(sd, &sd->backlog);
2578 		}
2579 		goto enqueue;
2580 	}
2581 
2582 	sd->dropped++;
2583 	rps_unlock(sd);
2584 
2585 	local_irq_restore(flags);
2586 
2587 	atomic_long_inc(&skb->dev->rx_dropped);
2588 	kfree_skb(skb);
2589 	return NET_RX_DROP;
2590 }
2591 
2592 /**
2593  *	netif_rx	-	post buffer to the network code
2594  *	@skb: buffer to post
2595  *
2596  *	This function receives a packet from a device driver and queues it for
2597  *	the upper (protocol) levels to process.  It always succeeds. The buffer
2598  *	may be dropped during processing for congestion control or by the
2599  *	protocol layers.
2600  *
2601  *	return values:
2602  *	NET_RX_SUCCESS	(no congestion)
2603  *	NET_RX_DROP     (packet was dropped)
2604  *
2605  */
2606 
2607 int netif_rx(struct sk_buff *skb)
2608 {
2609 	int ret;
2610 
2611 	/* if netpoll wants it, pretend we never saw it */
2612 	if (netpoll_rx(skb))
2613 		return NET_RX_DROP;
2614 
2615 	if (netdev_tstamp_prequeue)
2616 		net_timestamp_check(skb);
2617 
2618 	trace_netif_rx(skb);
2619 #ifdef CONFIG_RPS
2620 	{
2621 		struct rps_dev_flow voidflow, *rflow = &voidflow;
2622 		int cpu;
2623 
2624 		preempt_disable();
2625 		rcu_read_lock();
2626 
2627 		cpu = get_rps_cpu(skb->dev, skb, &rflow);
2628 		if (cpu < 0)
2629 			cpu = smp_processor_id();
2630 
2631 		ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
2632 
2633 		rcu_read_unlock();
2634 		preempt_enable();
2635 	}
2636 #else
2637 	{
2638 		unsigned int qtail;
2639 		ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
2640 		put_cpu();
2641 	}
2642 #endif
2643 	return ret;
2644 }
2645 EXPORT_SYMBOL(netif_rx);
2646 
2647 int netif_rx_ni(struct sk_buff *skb)
2648 {
2649 	int err;
2650 
2651 	preempt_disable();
2652 	err = netif_rx(skb);
2653 	if (local_softirq_pending())
2654 		do_softirq();
2655 	preempt_enable();
2656 
2657 	return err;
2658 }
2659 EXPORT_SYMBOL(netif_rx_ni);
2660 
2661 static void net_tx_action(struct softirq_action *h)
2662 {
2663 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
2664 
2665 	if (sd->completion_queue) {
2666 		struct sk_buff *clist;
2667 
2668 		local_irq_disable();
2669 		clist = sd->completion_queue;
2670 		sd->completion_queue = NULL;
2671 		local_irq_enable();
2672 
2673 		while (clist) {
2674 			struct sk_buff *skb = clist;
2675 			clist = clist->next;
2676 
2677 			WARN_ON(atomic_read(&skb->users));
2678 			trace_kfree_skb(skb, net_tx_action);
2679 			__kfree_skb(skb);
2680 		}
2681 	}
2682 
2683 	if (sd->output_queue) {
2684 		struct Qdisc *head;
2685 
2686 		local_irq_disable();
2687 		head = sd->output_queue;
2688 		sd->output_queue = NULL;
2689 		sd->output_queue_tailp = &sd->output_queue;
2690 		local_irq_enable();
2691 
2692 		while (head) {
2693 			struct Qdisc *q = head;
2694 			spinlock_t *root_lock;
2695 
2696 			head = head->next_sched;
2697 
2698 			root_lock = qdisc_lock(q);
2699 			if (spin_trylock(root_lock)) {
2700 				smp_mb__before_clear_bit();
2701 				clear_bit(__QDISC_STATE_SCHED,
2702 					  &q->state);
2703 				qdisc_run(q);
2704 				spin_unlock(root_lock);
2705 			} else {
2706 				if (!test_bit(__QDISC_STATE_DEACTIVATED,
2707 					      &q->state)) {
2708 					__netif_reschedule(q);
2709 				} else {
2710 					smp_mb__before_clear_bit();
2711 					clear_bit(__QDISC_STATE_SCHED,
2712 						  &q->state);
2713 				}
2714 			}
2715 		}
2716 	}
2717 }
2718 
2719 static inline int deliver_skb(struct sk_buff *skb,
2720 			      struct packet_type *pt_prev,
2721 			      struct net_device *orig_dev)
2722 {
2723 	atomic_inc(&skb->users);
2724 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2725 }
2726 
2727 #if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
2728     (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
2729 /* This hook is defined here for ATM LANE */
2730 int (*br_fdb_test_addr_hook)(struct net_device *dev,
2731 			     unsigned char *addr) __read_mostly;
2732 EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
2733 #endif
2734 
2735 #ifdef CONFIG_NET_CLS_ACT
2736 /* TODO: Maybe we should just force sch_ingress to be compiled in
2737  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2738  * a compare and 2 stores extra right now if we dont have it on
2739  * but have CONFIG_NET_CLS_ACT
2740  * NOTE: This doesnt stop any functionality; if you dont have
2741  * the ingress scheduler, you just cant add policies on ingress.
2742  *
2743  */
2744 static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
2745 {
2746 	struct net_device *dev = skb->dev;
2747 	u32 ttl = G_TC_RTTL(skb->tc_verd);
2748 	int result = TC_ACT_OK;
2749 	struct Qdisc *q;
2750 
2751 	if (unlikely(MAX_RED_LOOP < ttl++)) {
2752 		if (net_ratelimit())
2753 			pr_warning( "Redir loop detected Dropping packet (%d->%d)\n",
2754 			       skb->skb_iif, dev->ifindex);
2755 		return TC_ACT_SHOT;
2756 	}
2757 
2758 	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2759 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2760 
2761 	q = rxq->qdisc;
2762 	if (q != &noop_qdisc) {
2763 		spin_lock(qdisc_lock(q));
2764 		if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2765 			result = qdisc_enqueue_root(skb, q);
2766 		spin_unlock(qdisc_lock(q));
2767 	}
2768 
2769 	return result;
2770 }
2771 
2772 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2773 					 struct packet_type **pt_prev,
2774 					 int *ret, struct net_device *orig_dev)
2775 {
2776 	struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
2777 
2778 	if (!rxq || rxq->qdisc == &noop_qdisc)
2779 		goto out;
2780 
2781 	if (*pt_prev) {
2782 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2783 		*pt_prev = NULL;
2784 	}
2785 
2786 	switch (ing_filter(skb, rxq)) {
2787 	case TC_ACT_SHOT:
2788 	case TC_ACT_STOLEN:
2789 		kfree_skb(skb);
2790 		return NULL;
2791 	}
2792 
2793 out:
2794 	skb->tc_verd = 0;
2795 	return skb;
2796 }
2797 #endif
2798 
2799 /**
2800  *	netdev_rx_handler_register - register receive handler
2801  *	@dev: device to register a handler for
2802  *	@rx_handler: receive handler to register
2803  *	@rx_handler_data: data pointer that is used by rx handler
2804  *
2805  *	Register a receive hander for a device. This handler will then be
2806  *	called from __netif_receive_skb. A negative errno code is returned
2807  *	on a failure.
2808  *
2809  *	The caller must hold the rtnl_mutex.
2810  */
2811 int netdev_rx_handler_register(struct net_device *dev,
2812 			       rx_handler_func_t *rx_handler,
2813 			       void *rx_handler_data)
2814 {
2815 	ASSERT_RTNL();
2816 
2817 	if (dev->rx_handler)
2818 		return -EBUSY;
2819 
2820 	rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
2821 	rcu_assign_pointer(dev->rx_handler, rx_handler);
2822 
2823 	return 0;
2824 }
2825 EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
2826 
2827 /**
2828  *	netdev_rx_handler_unregister - unregister receive handler
2829  *	@dev: device to unregister a handler from
2830  *
2831  *	Unregister a receive hander from a device.
2832  *
2833  *	The caller must hold the rtnl_mutex.
2834  */
2835 void netdev_rx_handler_unregister(struct net_device *dev)
2836 {
2837 
2838 	ASSERT_RTNL();
2839 	rcu_assign_pointer(dev->rx_handler, NULL);
2840 	rcu_assign_pointer(dev->rx_handler_data, NULL);
2841 }
2842 EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
2843 
2844 static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
2845 					      struct net_device *master)
2846 {
2847 	if (skb->pkt_type == PACKET_HOST) {
2848 		u16 *dest = (u16 *) eth_hdr(skb)->h_dest;
2849 
2850 		memcpy(dest, master->dev_addr, ETH_ALEN);
2851 	}
2852 }
2853 
2854 /* On bonding slaves other than the currently active slave, suppress
2855  * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
2856  * ARP on active-backup slaves with arp_validate enabled.
2857  */
2858 int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master)
2859 {
2860 	struct net_device *dev = skb->dev;
2861 
2862 	if (master->priv_flags & IFF_MASTER_ARPMON)
2863 		dev->last_rx = jiffies;
2864 
2865 	if ((master->priv_flags & IFF_MASTER_ALB) &&
2866 	    (master->priv_flags & IFF_BRIDGE_PORT)) {
2867 		/* Do address unmangle. The local destination address
2868 		 * will be always the one master has. Provides the right
2869 		 * functionality in a bridge.
2870 		 */
2871 		skb_bond_set_mac_by_master(skb, master);
2872 	}
2873 
2874 	if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
2875 		if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
2876 		    skb->protocol == __cpu_to_be16(ETH_P_ARP))
2877 			return 0;
2878 
2879 		if (master->priv_flags & IFF_MASTER_ALB) {
2880 			if (skb->pkt_type != PACKET_BROADCAST &&
2881 			    skb->pkt_type != PACKET_MULTICAST)
2882 				return 0;
2883 		}
2884 		if (master->priv_flags & IFF_MASTER_8023AD &&
2885 		    skb->protocol == __cpu_to_be16(ETH_P_SLOW))
2886 			return 0;
2887 
2888 		return 1;
2889 	}
2890 	return 0;
2891 }
2892 EXPORT_SYMBOL(__skb_bond_should_drop);
2893 
2894 static int __netif_receive_skb(struct sk_buff *skb)
2895 {
2896 	struct packet_type *ptype, *pt_prev;
2897 	rx_handler_func_t *rx_handler;
2898 	struct net_device *orig_dev;
2899 	struct net_device *master;
2900 	struct net_device *null_or_orig;
2901 	struct net_device *orig_or_bond;
2902 	int ret = NET_RX_DROP;
2903 	__be16 type;
2904 
2905 	if (!netdev_tstamp_prequeue)
2906 		net_timestamp_check(skb);
2907 
2908 	trace_netif_receive_skb(skb);
2909 
2910 	/* if we've gotten here through NAPI, check netpoll */
2911 	if (netpoll_receive_skb(skb))
2912 		return NET_RX_DROP;
2913 
2914 	if (!skb->skb_iif)
2915 		skb->skb_iif = skb->dev->ifindex;
2916 
2917 	/*
2918 	 * bonding note: skbs received on inactive slaves should only
2919 	 * be delivered to pkt handlers that are exact matches.  Also
2920 	 * the deliver_no_wcard flag will be set.  If packet handlers
2921 	 * are sensitive to duplicate packets these skbs will need to
2922 	 * be dropped at the handler.
2923 	 */
2924 	null_or_orig = NULL;
2925 	orig_dev = skb->dev;
2926 	master = ACCESS_ONCE(orig_dev->master);
2927 	if (skb->deliver_no_wcard)
2928 		null_or_orig = orig_dev;
2929 	else if (master) {
2930 		if (skb_bond_should_drop(skb, master)) {
2931 			skb->deliver_no_wcard = 1;
2932 			null_or_orig = orig_dev; /* deliver only exact match */
2933 		} else
2934 			skb->dev = master;
2935 	}
2936 
2937 	__this_cpu_inc(softnet_data.processed);
2938 	skb_reset_network_header(skb);
2939 	skb_reset_transport_header(skb);
2940 	skb->mac_len = skb->network_header - skb->mac_header;
2941 
2942 	pt_prev = NULL;
2943 
2944 	rcu_read_lock();
2945 
2946 #ifdef CONFIG_NET_CLS_ACT
2947 	if (skb->tc_verd & TC_NCLS) {
2948 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2949 		goto ncls;
2950 	}
2951 #endif
2952 
2953 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
2954 		if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2955 		    ptype->dev == orig_dev) {
2956 			if (pt_prev)
2957 				ret = deliver_skb(skb, pt_prev, orig_dev);
2958 			pt_prev = ptype;
2959 		}
2960 	}
2961 
2962 #ifdef CONFIG_NET_CLS_ACT
2963 	skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2964 	if (!skb)
2965 		goto out;
2966 ncls:
2967 #endif
2968 
2969 	/* Handle special case of bridge or macvlan */
2970 	rx_handler = rcu_dereference(skb->dev->rx_handler);
2971 	if (rx_handler) {
2972 		if (pt_prev) {
2973 			ret = deliver_skb(skb, pt_prev, orig_dev);
2974 			pt_prev = NULL;
2975 		}
2976 		skb = rx_handler(skb);
2977 		if (!skb)
2978 			goto out;
2979 	}
2980 
2981 	if (vlan_tx_tag_present(skb)) {
2982 		if (pt_prev) {
2983 			ret = deliver_skb(skb, pt_prev, orig_dev);
2984 			pt_prev = NULL;
2985 		}
2986 		if (vlan_hwaccel_do_receive(&skb)) {
2987 			ret = __netif_receive_skb(skb);
2988 			goto out;
2989 		} else if (unlikely(!skb))
2990 			goto out;
2991 	}
2992 
2993 	/*
2994 	 * Make sure frames received on VLAN interfaces stacked on
2995 	 * bonding interfaces still make their way to any base bonding
2996 	 * device that may have registered for a specific ptype.  The
2997 	 * handler may have to adjust skb->dev and orig_dev.
2998 	 */
2999 	orig_or_bond = orig_dev;
3000 	if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) &&
3001 	    (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) {
3002 		orig_or_bond = vlan_dev_real_dev(skb->dev);
3003 	}
3004 
3005 	type = skb->protocol;
3006 	list_for_each_entry_rcu(ptype,
3007 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
3008 		if (ptype->type == type && (ptype->dev == null_or_orig ||
3009 		     ptype->dev == skb->dev || ptype->dev == orig_dev ||
3010 		     ptype->dev == orig_or_bond)) {
3011 			if (pt_prev)
3012 				ret = deliver_skb(skb, pt_prev, orig_dev);
3013 			pt_prev = ptype;
3014 		}
3015 	}
3016 
3017 	if (pt_prev) {
3018 		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
3019 	} else {
3020 		atomic_long_inc(&skb->dev->rx_dropped);
3021 		kfree_skb(skb);
3022 		/* Jamal, now you will not able to escape explaining
3023 		 * me how you were going to use this. :-)
3024 		 */
3025 		ret = NET_RX_DROP;
3026 	}
3027 
3028 out:
3029 	rcu_read_unlock();
3030 	return ret;
3031 }
3032 
3033 /**
3034  *	netif_receive_skb - process receive buffer from network
3035  *	@skb: buffer to process
3036  *
3037  *	netif_receive_skb() is the main receive data processing function.
3038  *	It always succeeds. The buffer may be dropped during processing
3039  *	for congestion control or by the protocol layers.
3040  *
3041  *	This function may only be called from softirq context and interrupts
3042  *	should be enabled.
3043  *
3044  *	Return values (usually ignored):
3045  *	NET_RX_SUCCESS: no congestion
3046  *	NET_RX_DROP: packet was dropped
3047  */
3048 int netif_receive_skb(struct sk_buff *skb)
3049 {
3050 	if (netdev_tstamp_prequeue)
3051 		net_timestamp_check(skb);
3052 
3053 	if (skb_defer_rx_timestamp(skb))
3054 		return NET_RX_SUCCESS;
3055 
3056 #ifdef CONFIG_RPS
3057 	{
3058 		struct rps_dev_flow voidflow, *rflow = &voidflow;
3059 		int cpu, ret;
3060 
3061 		rcu_read_lock();
3062 
3063 		cpu = get_rps_cpu(skb->dev, skb, &rflow);
3064 
3065 		if (cpu >= 0) {
3066 			ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
3067 			rcu_read_unlock();
3068 		} else {
3069 			rcu_read_unlock();
3070 			ret = __netif_receive_skb(skb);
3071 		}
3072 
3073 		return ret;
3074 	}
3075 #else
3076 	return __netif_receive_skb(skb);
3077 #endif
3078 }
3079 EXPORT_SYMBOL(netif_receive_skb);
3080 
3081 /* Network device is going away, flush any packets still pending
3082  * Called with irqs disabled.
3083  */
3084 static void flush_backlog(void *arg)
3085 {
3086 	struct net_device *dev = arg;
3087 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
3088 	struct sk_buff *skb, *tmp;
3089 
3090 	rps_lock(sd);
3091 	skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
3092 		if (skb->dev == dev) {
3093 			__skb_unlink(skb, &sd->input_pkt_queue);
3094 			kfree_skb(skb);
3095 			input_queue_head_incr(sd);
3096 		}
3097 	}
3098 	rps_unlock(sd);
3099 
3100 	skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
3101 		if (skb->dev == dev) {
3102 			__skb_unlink(skb, &sd->process_queue);
3103 			kfree_skb(skb);
3104 			input_queue_head_incr(sd);
3105 		}
3106 	}
3107 }
3108 
3109 static int napi_gro_complete(struct sk_buff *skb)
3110 {
3111 	struct packet_type *ptype;
3112 	__be16 type = skb->protocol;
3113 	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
3114 	int err = -ENOENT;
3115 
3116 	if (NAPI_GRO_CB(skb)->count == 1) {
3117 		skb_shinfo(skb)->gso_size = 0;
3118 		goto out;
3119 	}
3120 
3121 	rcu_read_lock();
3122 	list_for_each_entry_rcu(ptype, head, list) {
3123 		if (ptype->type != type || ptype->dev || !ptype->gro_complete)
3124 			continue;
3125 
3126 		err = ptype->gro_complete(skb);
3127 		break;
3128 	}
3129 	rcu_read_unlock();
3130 
3131 	if (err) {
3132 		WARN_ON(&ptype->list == head);
3133 		kfree_skb(skb);
3134 		return NET_RX_SUCCESS;
3135 	}
3136 
3137 out:
3138 	return netif_receive_skb(skb);
3139 }
3140 
3141 inline void napi_gro_flush(struct napi_struct *napi)
3142 {
3143 	struct sk_buff *skb, *next;
3144 
3145 	for (skb = napi->gro_list; skb; skb = next) {
3146 		next = skb->next;
3147 		skb->next = NULL;
3148 		napi_gro_complete(skb);
3149 	}
3150 
3151 	napi->gro_count = 0;
3152 	napi->gro_list = NULL;
3153 }
3154 EXPORT_SYMBOL(napi_gro_flush);
3155 
3156 enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3157 {
3158 	struct sk_buff **pp = NULL;
3159 	struct packet_type *ptype;
3160 	__be16 type = skb->protocol;
3161 	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
3162 	int same_flow;
3163 	int mac_len;
3164 	enum gro_result ret;
3165 
3166 	if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
3167 		goto normal;
3168 
3169 	if (skb_is_gso(skb) || skb_has_frag_list(skb))
3170 		goto normal;
3171 
3172 	rcu_read_lock();
3173 	list_for_each_entry_rcu(ptype, head, list) {
3174 		if (ptype->type != type || ptype->dev || !ptype->gro_receive)
3175 			continue;
3176 
3177 		skb_set_network_header(skb, skb_gro_offset(skb));
3178 		mac_len = skb->network_header - skb->mac_header;
3179 		skb->mac_len = mac_len;
3180 		NAPI_GRO_CB(skb)->same_flow = 0;
3181 		NAPI_GRO_CB(skb)->flush = 0;
3182 		NAPI_GRO_CB(skb)->free = 0;
3183 
3184 		pp = ptype->gro_receive(&napi->gro_list, skb);
3185 		break;
3186 	}
3187 	rcu_read_unlock();
3188 
3189 	if (&ptype->list == head)
3190 		goto normal;
3191 
3192 	same_flow = NAPI_GRO_CB(skb)->same_flow;
3193 	ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
3194 
3195 	if (pp) {
3196 		struct sk_buff *nskb = *pp;
3197 
3198 		*pp = nskb->next;
3199 		nskb->next = NULL;
3200 		napi_gro_complete(nskb);
3201 		napi->gro_count--;
3202 	}
3203 
3204 	if (same_flow)
3205 		goto ok;
3206 
3207 	if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
3208 		goto normal;
3209 
3210 	napi->gro_count++;
3211 	NAPI_GRO_CB(skb)->count = 1;
3212 	skb_shinfo(skb)->gso_size = skb_gro_len(skb);
3213 	skb->next = napi->gro_list;
3214 	napi->gro_list = skb;
3215 	ret = GRO_HELD;
3216 
3217 pull:
3218 	if (skb_headlen(skb) < skb_gro_offset(skb)) {
3219 		int grow = skb_gro_offset(skb) - skb_headlen(skb);
3220 
3221 		BUG_ON(skb->end - skb->tail < grow);
3222 
3223 		memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
3224 
3225 		skb->tail += grow;
3226 		skb->data_len -= grow;
3227 
3228 		skb_shinfo(skb)->frags[0].page_offset += grow;
3229 		skb_shinfo(skb)->frags[0].size -= grow;
3230 
3231 		if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
3232 			put_page(skb_shinfo(skb)->frags[0].page);
3233 			memmove(skb_shinfo(skb)->frags,
3234 				skb_shinfo(skb)->frags + 1,
3235 				--skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
3236 		}
3237 	}
3238 
3239 ok:
3240 	return ret;
3241 
3242 normal:
3243 	ret = GRO_NORMAL;
3244 	goto pull;
3245 }
3246 EXPORT_SYMBOL(dev_gro_receive);
3247 
3248 static inline gro_result_t
3249 __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3250 {
3251 	struct sk_buff *p;
3252 
3253 	for (p = napi->gro_list; p; p = p->next) {
3254 		unsigned long diffs;
3255 
3256 		diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
3257 		diffs |= p->vlan_tci ^ skb->vlan_tci;
3258 		diffs |= compare_ether_header(skb_mac_header(p),
3259 					      skb_gro_mac_header(skb));
3260 		NAPI_GRO_CB(p)->same_flow = !diffs;
3261 		NAPI_GRO_CB(p)->flush = 0;
3262 	}
3263 
3264 	return dev_gro_receive(napi, skb);
3265 }
3266 
3267 gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
3268 {
3269 	switch (ret) {
3270 	case GRO_NORMAL:
3271 		if (netif_receive_skb(skb))
3272 			ret = GRO_DROP;
3273 		break;
3274 
3275 	case GRO_DROP:
3276 	case GRO_MERGED_FREE:
3277 		kfree_skb(skb);
3278 		break;
3279 
3280 	case GRO_HELD:
3281 	case GRO_MERGED:
3282 		break;
3283 	}
3284 
3285 	return ret;
3286 }
3287 EXPORT_SYMBOL(napi_skb_finish);
3288 
3289 void skb_gro_reset_offset(struct sk_buff *skb)
3290 {
3291 	NAPI_GRO_CB(skb)->data_offset = 0;
3292 	NAPI_GRO_CB(skb)->frag0 = NULL;
3293 	NAPI_GRO_CB(skb)->frag0_len = 0;
3294 
3295 	if (skb->mac_header == skb->tail &&
3296 	    !PageHighMem(skb_shinfo(skb)->frags[0].page)) {
3297 		NAPI_GRO_CB(skb)->frag0 =
3298 			page_address(skb_shinfo(skb)->frags[0].page) +
3299 			skb_shinfo(skb)->frags[0].page_offset;
3300 		NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size;
3301 	}
3302 }
3303 EXPORT_SYMBOL(skb_gro_reset_offset);
3304 
3305 gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3306 {
3307 	skb_gro_reset_offset(skb);
3308 
3309 	return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
3310 }
3311 EXPORT_SYMBOL(napi_gro_receive);
3312 
3313 static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
3314 {
3315 	__skb_pull(skb, skb_headlen(skb));
3316 	skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
3317 	skb->vlan_tci = 0;
3318 
3319 	napi->skb = skb;
3320 }
3321 
3322 struct sk_buff *napi_get_frags(struct napi_struct *napi)
3323 {
3324 	struct sk_buff *skb = napi->skb;
3325 
3326 	if (!skb) {
3327 		skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
3328 		if (skb)
3329 			napi->skb = skb;
3330 	}
3331 	return skb;
3332 }
3333 EXPORT_SYMBOL(napi_get_frags);
3334 
3335 gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
3336 			       gro_result_t ret)
3337 {
3338 	switch (ret) {
3339 	case GRO_NORMAL:
3340 	case GRO_HELD:
3341 		skb->protocol = eth_type_trans(skb, skb->dev);
3342 
3343 		if (ret == GRO_HELD)
3344 			skb_gro_pull(skb, -ETH_HLEN);
3345 		else if (netif_receive_skb(skb))
3346 			ret = GRO_DROP;
3347 		break;
3348 
3349 	case GRO_DROP:
3350 	case GRO_MERGED_FREE:
3351 		napi_reuse_skb(napi, skb);
3352 		break;
3353 
3354 	case GRO_MERGED:
3355 		break;
3356 	}
3357 
3358 	return ret;
3359 }
3360 EXPORT_SYMBOL(napi_frags_finish);
3361 
3362 struct sk_buff *napi_frags_skb(struct napi_struct *napi)
3363 {
3364 	struct sk_buff *skb = napi->skb;
3365 	struct ethhdr *eth;
3366 	unsigned int hlen;
3367 	unsigned int off;
3368 
3369 	napi->skb = NULL;
3370 
3371 	skb_reset_mac_header(skb);
3372 	skb_gro_reset_offset(skb);
3373 
3374 	off = skb_gro_offset(skb);
3375 	hlen = off + sizeof(*eth);
3376 	eth = skb_gro_header_fast(skb, off);
3377 	if (skb_gro_header_hard(skb, hlen)) {
3378 		eth = skb_gro_header_slow(skb, hlen, off);
3379 		if (unlikely(!eth)) {
3380 			napi_reuse_skb(napi, skb);
3381 			skb = NULL;
3382 			goto out;
3383 		}
3384 	}
3385 
3386 	skb_gro_pull(skb, sizeof(*eth));
3387 
3388 	/*
3389 	 * This works because the only protocols we care about don't require
3390 	 * special handling.  We'll fix it up properly at the end.
3391 	 */
3392 	skb->protocol = eth->h_proto;
3393 
3394 out:
3395 	return skb;
3396 }
3397 EXPORT_SYMBOL(napi_frags_skb);
3398 
3399 gro_result_t napi_gro_frags(struct napi_struct *napi)
3400 {
3401 	struct sk_buff *skb = napi_frags_skb(napi);
3402 
3403 	if (!skb)
3404 		return GRO_DROP;
3405 
3406 	return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
3407 }
3408 EXPORT_SYMBOL(napi_gro_frags);
3409 
3410 /*
3411  * net_rps_action sends any pending IPI's for rps.
3412  * Note: called with local irq disabled, but exits with local irq enabled.
3413  */
3414 static void net_rps_action_and_irq_enable(struct softnet_data *sd)
3415 {
3416 #ifdef CONFIG_RPS
3417 	struct softnet_data *remsd = sd->rps_ipi_list;
3418 
3419 	if (remsd) {
3420 		sd->rps_ipi_list = NULL;
3421 
3422 		local_irq_enable();
3423 
3424 		/* Send pending IPI's to kick RPS processing on remote cpus. */
3425 		while (remsd) {
3426 			struct softnet_data *next = remsd->rps_ipi_next;
3427 
3428 			if (cpu_online(remsd->cpu))
3429 				__smp_call_function_single(remsd->cpu,
3430 							   &remsd->csd, 0);
3431 			remsd = next;
3432 		}
3433 	} else
3434 #endif
3435 		local_irq_enable();
3436 }
3437 
3438 static int process_backlog(struct napi_struct *napi, int quota)
3439 {
3440 	int work = 0;
3441 	struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
3442 
3443 #ifdef CONFIG_RPS
3444 	/* Check if we have pending ipi, its better to send them now,
3445 	 * not waiting net_rx_action() end.
3446 	 */
3447 	if (sd->rps_ipi_list) {
3448 		local_irq_disable();
3449 		net_rps_action_and_irq_enable(sd);
3450 	}
3451 #endif
3452 	napi->weight = weight_p;
3453 	local_irq_disable();
3454 	while (work < quota) {
3455 		struct sk_buff *skb;
3456 		unsigned int qlen;
3457 
3458 		while ((skb = __skb_dequeue(&sd->process_queue))) {
3459 			local_irq_enable();
3460 			__netif_receive_skb(skb);
3461 			local_irq_disable();
3462 			input_queue_head_incr(sd);
3463 			if (++work >= quota) {
3464 				local_irq_enable();
3465 				return work;
3466 			}
3467 		}
3468 
3469 		rps_lock(sd);
3470 		qlen = skb_queue_len(&sd->input_pkt_queue);
3471 		if (qlen)
3472 			skb_queue_splice_tail_init(&sd->input_pkt_queue,
3473 						   &sd->process_queue);
3474 
3475 		if (qlen < quota - work) {
3476 			/*
3477 			 * Inline a custom version of __napi_complete().
3478 			 * only current cpu owns and manipulates this napi,
3479 			 * and NAPI_STATE_SCHED is the only possible flag set on backlog.
3480 			 * we can use a plain write instead of clear_bit(),
3481 			 * and we dont need an smp_mb() memory barrier.
3482 			 */
3483 			list_del(&napi->poll_list);
3484 			napi->state = 0;
3485 
3486 			quota = work + qlen;
3487 		}
3488 		rps_unlock(sd);
3489 	}
3490 	local_irq_enable();
3491 
3492 	return work;
3493 }
3494 
3495 /**
3496  * __napi_schedule - schedule for receive
3497  * @n: entry to schedule
3498  *
3499  * The entry's receive function will be scheduled to run
3500  */
3501 void __napi_schedule(struct napi_struct *n)
3502 {
3503 	unsigned long flags;
3504 
3505 	local_irq_save(flags);
3506 	____napi_schedule(&__get_cpu_var(softnet_data), n);
3507 	local_irq_restore(flags);
3508 }
3509 EXPORT_SYMBOL(__napi_schedule);
3510 
3511 void __napi_complete(struct napi_struct *n)
3512 {
3513 	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
3514 	BUG_ON(n->gro_list);
3515 
3516 	list_del(&n->poll_list);
3517 	smp_mb__before_clear_bit();
3518 	clear_bit(NAPI_STATE_SCHED, &n->state);
3519 }
3520 EXPORT_SYMBOL(__napi_complete);
3521 
3522 void napi_complete(struct napi_struct *n)
3523 {
3524 	unsigned long flags;
3525 
3526 	/*
3527 	 * don't let napi dequeue from the cpu poll list
3528 	 * just in case its running on a different cpu
3529 	 */
3530 	if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
3531 		return;
3532 
3533 	napi_gro_flush(n);
3534 	local_irq_save(flags);
3535 	__napi_complete(n);
3536 	local_irq_restore(flags);
3537 }
3538 EXPORT_SYMBOL(napi_complete);
3539 
3540 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
3541 		    int (*poll)(struct napi_struct *, int), int weight)
3542 {
3543 	INIT_LIST_HEAD(&napi->poll_list);
3544 	napi->gro_count = 0;
3545 	napi->gro_list = NULL;
3546 	napi->skb = NULL;
3547 	napi->poll = poll;
3548 	napi->weight = weight;
3549 	list_add(&napi->dev_list, &dev->napi_list);
3550 	napi->dev = dev;
3551 #ifdef CONFIG_NETPOLL
3552 	spin_lock_init(&napi->poll_lock);
3553 	napi->poll_owner = -1;
3554 #endif
3555 	set_bit(NAPI_STATE_SCHED, &napi->state);
3556 }
3557 EXPORT_SYMBOL(netif_napi_add);
3558 
3559 void netif_napi_del(struct napi_struct *napi)
3560 {
3561 	struct sk_buff *skb, *next;
3562 
3563 	list_del_init(&napi->dev_list);
3564 	napi_free_frags(napi);
3565 
3566 	for (skb = napi->gro_list; skb; skb = next) {
3567 		next = skb->next;
3568 		skb->next = NULL;
3569 		kfree_skb(skb);
3570 	}
3571 
3572 	napi->gro_list = NULL;
3573 	napi->gro_count = 0;
3574 }
3575 EXPORT_SYMBOL(netif_napi_del);
3576 
3577 static void net_rx_action(struct softirq_action *h)
3578 {
3579 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
3580 	unsigned long time_limit = jiffies + 2;
3581 	int budget = netdev_budget;
3582 	void *have;
3583 
3584 	local_irq_disable();
3585 
3586 	while (!list_empty(&sd->poll_list)) {
3587 		struct napi_struct *n;
3588 		int work, weight;
3589 
3590 		/* If softirq window is exhuasted then punt.
3591 		 * Allow this to run for 2 jiffies since which will allow
3592 		 * an average latency of 1.5/HZ.
3593 		 */
3594 		if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
3595 			goto softnet_break;
3596 
3597 		local_irq_enable();
3598 
3599 		/* Even though interrupts have been re-enabled, this
3600 		 * access is safe because interrupts can only add new
3601 		 * entries to the tail of this list, and only ->poll()
3602 		 * calls can remove this head entry from the list.
3603 		 */
3604 		n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
3605 
3606 		have = netpoll_poll_lock(n);
3607 
3608 		weight = n->weight;
3609 
3610 		/* This NAPI_STATE_SCHED test is for avoiding a race
3611 		 * with netpoll's poll_napi().  Only the entity which
3612 		 * obtains the lock and sees NAPI_STATE_SCHED set will
3613 		 * actually make the ->poll() call.  Therefore we avoid
3614 		 * accidently calling ->poll() when NAPI is not scheduled.
3615 		 */
3616 		work = 0;
3617 		if (test_bit(NAPI_STATE_SCHED, &n->state)) {
3618 			work = n->poll(n, weight);
3619 			trace_napi_poll(n);
3620 		}
3621 
3622 		WARN_ON_ONCE(work > weight);
3623 
3624 		budget -= work;
3625 
3626 		local_irq_disable();
3627 
3628 		/* Drivers must not modify the NAPI state if they
3629 		 * consume the entire weight.  In such cases this code
3630 		 * still "owns" the NAPI instance and therefore can
3631 		 * move the instance around on the list at-will.
3632 		 */
3633 		if (unlikely(work == weight)) {
3634 			if (unlikely(napi_disable_pending(n))) {
3635 				local_irq_enable();
3636 				napi_complete(n);
3637 				local_irq_disable();
3638 			} else
3639 				list_move_tail(&n->poll_list, &sd->poll_list);
3640 		}
3641 
3642 		netpoll_poll_unlock(have);
3643 	}
3644 out:
3645 	net_rps_action_and_irq_enable(sd);
3646 
3647 #ifdef CONFIG_NET_DMA
3648 	/*
3649 	 * There may not be any more sk_buffs coming right now, so push
3650 	 * any pending DMA copies to hardware
3651 	 */
3652 	dma_issue_pending_all();
3653 #endif
3654 
3655 	return;
3656 
3657 softnet_break:
3658 	sd->time_squeeze++;
3659 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
3660 	goto out;
3661 }
3662 
3663 static gifconf_func_t *gifconf_list[NPROTO];
3664 
3665 /**
3666  *	register_gifconf	-	register a SIOCGIF handler
3667  *	@family: Address family
3668  *	@gifconf: Function handler
3669  *
3670  *	Register protocol dependent address dumping routines. The handler
3671  *	that is passed must not be freed or reused until it has been replaced
3672  *	by another handler.
3673  */
3674 int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
3675 {
3676 	if (family >= NPROTO)
3677 		return -EINVAL;
3678 	gifconf_list[family] = gifconf;
3679 	return 0;
3680 }
3681 EXPORT_SYMBOL(register_gifconf);
3682 
3683 
3684 /*
3685  *	Map an interface index to its name (SIOCGIFNAME)
3686  */
3687 
3688 /*
3689  *	We need this ioctl for efficient implementation of the
3690  *	if_indextoname() function required by the IPv6 API.  Without
3691  *	it, we would have to search all the interfaces to find a
3692  *	match.  --pb
3693  */
3694 
3695 static int dev_ifname(struct net *net, struct ifreq __user *arg)
3696 {
3697 	struct net_device *dev;
3698 	struct ifreq ifr;
3699 
3700 	/*
3701 	 *	Fetch the caller's info block.
3702 	 */
3703 
3704 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3705 		return -EFAULT;
3706 
3707 	rcu_read_lock();
3708 	dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
3709 	if (!dev) {
3710 		rcu_read_unlock();
3711 		return -ENODEV;
3712 	}
3713 
3714 	strcpy(ifr.ifr_name, dev->name);
3715 	rcu_read_unlock();
3716 
3717 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
3718 		return -EFAULT;
3719 	return 0;
3720 }
3721 
3722 /*
3723  *	Perform a SIOCGIFCONF call. This structure will change
3724  *	size eventually, and there is nothing I can do about it.
3725  *	Thus we will need a 'compatibility mode'.
3726  */
3727 
3728 static int dev_ifconf(struct net *net, char __user *arg)
3729 {
3730 	struct ifconf ifc;
3731 	struct net_device *dev;
3732 	char __user *pos;
3733 	int len;
3734 	int total;
3735 	int i;
3736 
3737 	/*
3738 	 *	Fetch the caller's info block.
3739 	 */
3740 
3741 	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
3742 		return -EFAULT;
3743 
3744 	pos = ifc.ifc_buf;
3745 	len = ifc.ifc_len;
3746 
3747 	/*
3748 	 *	Loop over the interfaces, and write an info block for each.
3749 	 */
3750 
3751 	total = 0;
3752 	for_each_netdev(net, dev) {
3753 		for (i = 0; i < NPROTO; i++) {
3754 			if (gifconf_list[i]) {
3755 				int done;
3756 				if (!pos)
3757 					done = gifconf_list[i](dev, NULL, 0);
3758 				else
3759 					done = gifconf_list[i](dev, pos + total,
3760 							       len - total);
3761 				if (done < 0)
3762 					return -EFAULT;
3763 				total += done;
3764 			}
3765 		}
3766 	}
3767 
3768 	/*
3769 	 *	All done.  Write the updated control block back to the caller.
3770 	 */
3771 	ifc.ifc_len = total;
3772 
3773 	/*
3774 	 * 	Both BSD and Solaris return 0 here, so we do too.
3775 	 */
3776 	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
3777 }
3778 
3779 #ifdef CONFIG_PROC_FS
3780 /*
3781  *	This is invoked by the /proc filesystem handler to display a device
3782  *	in detail.
3783  */
3784 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
3785 	__acquires(RCU)
3786 {
3787 	struct net *net = seq_file_net(seq);
3788 	loff_t off;
3789 	struct net_device *dev;
3790 
3791 	rcu_read_lock();
3792 	if (!*pos)
3793 		return SEQ_START_TOKEN;
3794 
3795 	off = 1;
3796 	for_each_netdev_rcu(net, dev)
3797 		if (off++ == *pos)
3798 			return dev;
3799 
3800 	return NULL;
3801 }
3802 
3803 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3804 {
3805 	struct net_device *dev = (v == SEQ_START_TOKEN) ?
3806 				  first_net_device(seq_file_net(seq)) :
3807 				  next_net_device((struct net_device *)v);
3808 
3809 	++*pos;
3810 	return rcu_dereference(dev);
3811 }
3812 
3813 void dev_seq_stop(struct seq_file *seq, void *v)
3814 	__releases(RCU)
3815 {
3816 	rcu_read_unlock();
3817 }
3818 
3819 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
3820 {
3821 	struct rtnl_link_stats64 temp;
3822 	const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
3823 
3824 	seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
3825 		   "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
3826 		   dev->name, stats->rx_bytes, stats->rx_packets,
3827 		   stats->rx_errors,
3828 		   stats->rx_dropped + stats->rx_missed_errors,
3829 		   stats->rx_fifo_errors,
3830 		   stats->rx_length_errors + stats->rx_over_errors +
3831 		    stats->rx_crc_errors + stats->rx_frame_errors,
3832 		   stats->rx_compressed, stats->multicast,
3833 		   stats->tx_bytes, stats->tx_packets,
3834 		   stats->tx_errors, stats->tx_dropped,
3835 		   stats->tx_fifo_errors, stats->collisions,
3836 		   stats->tx_carrier_errors +
3837 		    stats->tx_aborted_errors +
3838 		    stats->tx_window_errors +
3839 		    stats->tx_heartbeat_errors,
3840 		   stats->tx_compressed);
3841 }
3842 
3843 /*
3844  *	Called from the PROCfs module. This now uses the new arbitrary sized
3845  *	/proc/net interface to create /proc/net/dev
3846  */
3847 static int dev_seq_show(struct seq_file *seq, void *v)
3848 {
3849 	if (v == SEQ_START_TOKEN)
3850 		seq_puts(seq, "Inter-|   Receive                            "
3851 			      "                    |  Transmit\n"
3852 			      " face |bytes    packets errs drop fifo frame "
3853 			      "compressed multicast|bytes    packets errs "
3854 			      "drop fifo colls carrier compressed\n");
3855 	else
3856 		dev_seq_printf_stats(seq, v);
3857 	return 0;
3858 }
3859 
3860 static struct softnet_data *softnet_get_online(loff_t *pos)
3861 {
3862 	struct softnet_data *sd = NULL;
3863 
3864 	while (*pos < nr_cpu_ids)
3865 		if (cpu_online(*pos)) {
3866 			sd = &per_cpu(softnet_data, *pos);
3867 			break;
3868 		} else
3869 			++*pos;
3870 	return sd;
3871 }
3872 
3873 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
3874 {
3875 	return softnet_get_online(pos);
3876 }
3877 
3878 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3879 {
3880 	++*pos;
3881 	return softnet_get_online(pos);
3882 }
3883 
3884 static void softnet_seq_stop(struct seq_file *seq, void *v)
3885 {
3886 }
3887 
3888 static int softnet_seq_show(struct seq_file *seq, void *v)
3889 {
3890 	struct softnet_data *sd = v;
3891 
3892 	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
3893 		   sd->processed, sd->dropped, sd->time_squeeze, 0,
3894 		   0, 0, 0, 0, /* was fastroute */
3895 		   sd->cpu_collision, sd->received_rps);
3896 	return 0;
3897 }
3898 
3899 static const struct seq_operations dev_seq_ops = {
3900 	.start = dev_seq_start,
3901 	.next  = dev_seq_next,
3902 	.stop  = dev_seq_stop,
3903 	.show  = dev_seq_show,
3904 };
3905 
3906 static int dev_seq_open(struct inode *inode, struct file *file)
3907 {
3908 	return seq_open_net(inode, file, &dev_seq_ops,
3909 			    sizeof(struct seq_net_private));
3910 }
3911 
3912 static const struct file_operations dev_seq_fops = {
3913 	.owner	 = THIS_MODULE,
3914 	.open    = dev_seq_open,
3915 	.read    = seq_read,
3916 	.llseek  = seq_lseek,
3917 	.release = seq_release_net,
3918 };
3919 
3920 static const struct seq_operations softnet_seq_ops = {
3921 	.start = softnet_seq_start,
3922 	.next  = softnet_seq_next,
3923 	.stop  = softnet_seq_stop,
3924 	.show  = softnet_seq_show,
3925 };
3926 
3927 static int softnet_seq_open(struct inode *inode, struct file *file)
3928 {
3929 	return seq_open(file, &softnet_seq_ops);
3930 }
3931 
3932 static const struct file_operations softnet_seq_fops = {
3933 	.owner	 = THIS_MODULE,
3934 	.open    = softnet_seq_open,
3935 	.read    = seq_read,
3936 	.llseek  = seq_lseek,
3937 	.release = seq_release,
3938 };
3939 
3940 static void *ptype_get_idx(loff_t pos)
3941 {
3942 	struct packet_type *pt = NULL;
3943 	loff_t i = 0;
3944 	int t;
3945 
3946 	list_for_each_entry_rcu(pt, &ptype_all, list) {
3947 		if (i == pos)
3948 			return pt;
3949 		++i;
3950 	}
3951 
3952 	for (t = 0; t < PTYPE_HASH_SIZE; t++) {
3953 		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
3954 			if (i == pos)
3955 				return pt;
3956 			++i;
3957 		}
3958 	}
3959 	return NULL;
3960 }
3961 
3962 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
3963 	__acquires(RCU)
3964 {
3965 	rcu_read_lock();
3966 	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
3967 }
3968 
3969 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3970 {
3971 	struct packet_type *pt;
3972 	struct list_head *nxt;
3973 	int hash;
3974 
3975 	++*pos;
3976 	if (v == SEQ_START_TOKEN)
3977 		return ptype_get_idx(0);
3978 
3979 	pt = v;
3980 	nxt = pt->list.next;
3981 	if (pt->type == htons(ETH_P_ALL)) {
3982 		if (nxt != &ptype_all)
3983 			goto found;
3984 		hash = 0;
3985 		nxt = ptype_base[0].next;
3986 	} else
3987 		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
3988 
3989 	while (nxt == &ptype_base[hash]) {
3990 		if (++hash >= PTYPE_HASH_SIZE)
3991 			return NULL;
3992 		nxt = ptype_base[hash].next;
3993 	}
3994 found:
3995 	return list_entry(nxt, struct packet_type, list);
3996 }
3997 
3998 static void ptype_seq_stop(struct seq_file *seq, void *v)
3999 	__releases(RCU)
4000 {
4001 	rcu_read_unlock();
4002 }
4003 
4004 static int ptype_seq_show(struct seq_file *seq, void *v)
4005 {
4006 	struct packet_type *pt = v;
4007 
4008 	if (v == SEQ_START_TOKEN)
4009 		seq_puts(seq, "Type Device      Function\n");
4010 	else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
4011 		if (pt->type == htons(ETH_P_ALL))
4012 			seq_puts(seq, "ALL ");
4013 		else
4014 			seq_printf(seq, "%04x", ntohs(pt->type));
4015 
4016 		seq_printf(seq, " %-8s %pF\n",
4017 			   pt->dev ? pt->dev->name : "", pt->func);
4018 	}
4019 
4020 	return 0;
4021 }
4022 
4023 static const struct seq_operations ptype_seq_ops = {
4024 	.start = ptype_seq_start,
4025 	.next  = ptype_seq_next,
4026 	.stop  = ptype_seq_stop,
4027 	.show  = ptype_seq_show,
4028 };
4029 
4030 static int ptype_seq_open(struct inode *inode, struct file *file)
4031 {
4032 	return seq_open_net(inode, file, &ptype_seq_ops,
4033 			sizeof(struct seq_net_private));
4034 }
4035 
4036 static const struct file_operations ptype_seq_fops = {
4037 	.owner	 = THIS_MODULE,
4038 	.open    = ptype_seq_open,
4039 	.read    = seq_read,
4040 	.llseek  = seq_lseek,
4041 	.release = seq_release_net,
4042 };
4043 
4044 
4045 static int __net_init dev_proc_net_init(struct net *net)
4046 {
4047 	int rc = -ENOMEM;
4048 
4049 	if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
4050 		goto out;
4051 	if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
4052 		goto out_dev;
4053 	if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
4054 		goto out_softnet;
4055 
4056 	if (wext_proc_init(net))
4057 		goto out_ptype;
4058 	rc = 0;
4059 out:
4060 	return rc;
4061 out_ptype:
4062 	proc_net_remove(net, "ptype");
4063 out_softnet:
4064 	proc_net_remove(net, "softnet_stat");
4065 out_dev:
4066 	proc_net_remove(net, "dev");
4067 	goto out;
4068 }
4069 
4070 static void __net_exit dev_proc_net_exit(struct net *net)
4071 {
4072 	wext_proc_exit(net);
4073 
4074 	proc_net_remove(net, "ptype");
4075 	proc_net_remove(net, "softnet_stat");
4076 	proc_net_remove(net, "dev");
4077 }
4078 
4079 static struct pernet_operations __net_initdata dev_proc_ops = {
4080 	.init = dev_proc_net_init,
4081 	.exit = dev_proc_net_exit,
4082 };
4083 
4084 static int __init dev_proc_init(void)
4085 {
4086 	return register_pernet_subsys(&dev_proc_ops);
4087 }
4088 #else
4089 #define dev_proc_init() 0
4090 #endif	/* CONFIG_PROC_FS */
4091 
4092 
4093 /**
4094  *	netdev_set_master	-	set up master/slave pair
4095  *	@slave: slave device
4096  *	@master: new master device
4097  *
4098  *	Changes the master device of the slave. Pass %NULL to break the
4099  *	bonding. The caller must hold the RTNL semaphore. On a failure
4100  *	a negative errno code is returned. On success the reference counts
4101  *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
4102  *	function returns zero.
4103  */
4104 int netdev_set_master(struct net_device *slave, struct net_device *master)
4105 {
4106 	struct net_device *old = slave->master;
4107 
4108 	ASSERT_RTNL();
4109 
4110 	if (master) {
4111 		if (old)
4112 			return -EBUSY;
4113 		dev_hold(master);
4114 	}
4115 
4116 	slave->master = master;
4117 
4118 	if (old) {
4119 		synchronize_net();
4120 		dev_put(old);
4121 	}
4122 	if (master)
4123 		slave->flags |= IFF_SLAVE;
4124 	else
4125 		slave->flags &= ~IFF_SLAVE;
4126 
4127 	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
4128 	return 0;
4129 }
4130 EXPORT_SYMBOL(netdev_set_master);
4131 
4132 static void dev_change_rx_flags(struct net_device *dev, int flags)
4133 {
4134 	const struct net_device_ops *ops = dev->netdev_ops;
4135 
4136 	if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
4137 		ops->ndo_change_rx_flags(dev, flags);
4138 }
4139 
4140 static int __dev_set_promiscuity(struct net_device *dev, int inc)
4141 {
4142 	unsigned short old_flags = dev->flags;
4143 	uid_t uid;
4144 	gid_t gid;
4145 
4146 	ASSERT_RTNL();
4147 
4148 	dev->flags |= IFF_PROMISC;
4149 	dev->promiscuity += inc;
4150 	if (dev->promiscuity == 0) {
4151 		/*
4152 		 * Avoid overflow.
4153 		 * If inc causes overflow, untouch promisc and return error.
4154 		 */
4155 		if (inc < 0)
4156 			dev->flags &= ~IFF_PROMISC;
4157 		else {
4158 			dev->promiscuity -= inc;
4159 			printk(KERN_WARNING "%s: promiscuity touches roof, "
4160 				"set promiscuity failed, promiscuity feature "
4161 				"of device might be broken.\n", dev->name);
4162 			return -EOVERFLOW;
4163 		}
4164 	}
4165 	if (dev->flags != old_flags) {
4166 		printk(KERN_INFO "device %s %s promiscuous mode\n",
4167 		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
4168 							       "left");
4169 		if (audit_enabled) {
4170 			current_uid_gid(&uid, &gid);
4171 			audit_log(current->audit_context, GFP_ATOMIC,
4172 				AUDIT_ANOM_PROMISCUOUS,
4173 				"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
4174 				dev->name, (dev->flags & IFF_PROMISC),
4175 				(old_flags & IFF_PROMISC),
4176 				audit_get_loginuid(current),
4177 				uid, gid,
4178 				audit_get_sessionid(current));
4179 		}
4180 
4181 		dev_change_rx_flags(dev, IFF_PROMISC);
4182 	}
4183 	return 0;
4184 }
4185 
4186 /**
4187  *	dev_set_promiscuity	- update promiscuity count on a device
4188  *	@dev: device
4189  *	@inc: modifier
4190  *
4191  *	Add or remove promiscuity from a device. While the count in the device
4192  *	remains above zero the interface remains promiscuous. Once it hits zero
4193  *	the device reverts back to normal filtering operation. A negative inc
4194  *	value is used to drop promiscuity on the device.
4195  *	Return 0 if successful or a negative errno code on error.
4196  */
4197 int dev_set_promiscuity(struct net_device *dev, int inc)
4198 {
4199 	unsigned short old_flags = dev->flags;
4200 	int err;
4201 
4202 	err = __dev_set_promiscuity(dev, inc);
4203 	if (err < 0)
4204 		return err;
4205 	if (dev->flags != old_flags)
4206 		dev_set_rx_mode(dev);
4207 	return err;
4208 }
4209 EXPORT_SYMBOL(dev_set_promiscuity);
4210 
4211 /**
4212  *	dev_set_allmulti	- update allmulti count on a device
4213  *	@dev: device
4214  *	@inc: modifier
4215  *
4216  *	Add or remove reception of all multicast frames to a device. While the
4217  *	count in the device remains above zero the interface remains listening
4218  *	to all interfaces. Once it hits zero the device reverts back to normal
4219  *	filtering operation. A negative @inc value is used to drop the counter
4220  *	when releasing a resource needing all multicasts.
4221  *	Return 0 if successful or a negative errno code on error.
4222  */
4223 
4224 int dev_set_allmulti(struct net_device *dev, int inc)
4225 {
4226 	unsigned short old_flags = dev->flags;
4227 
4228 	ASSERT_RTNL();
4229 
4230 	dev->flags |= IFF_ALLMULTI;
4231 	dev->allmulti += inc;
4232 	if (dev->allmulti == 0) {
4233 		/*
4234 		 * Avoid overflow.
4235 		 * If inc causes overflow, untouch allmulti and return error.
4236 		 */
4237 		if (inc < 0)
4238 			dev->flags &= ~IFF_ALLMULTI;
4239 		else {
4240 			dev->allmulti -= inc;
4241 			printk(KERN_WARNING "%s: allmulti touches roof, "
4242 				"set allmulti failed, allmulti feature of "
4243 				"device might be broken.\n", dev->name);
4244 			return -EOVERFLOW;
4245 		}
4246 	}
4247 	if (dev->flags ^ old_flags) {
4248 		dev_change_rx_flags(dev, IFF_ALLMULTI);
4249 		dev_set_rx_mode(dev);
4250 	}
4251 	return 0;
4252 }
4253 EXPORT_SYMBOL(dev_set_allmulti);
4254 
4255 /*
4256  *	Upload unicast and multicast address lists to device and
4257  *	configure RX filtering. When the device doesn't support unicast
4258  *	filtering it is put in promiscuous mode while unicast addresses
4259  *	are present.
4260  */
4261 void __dev_set_rx_mode(struct net_device *dev)
4262 {
4263 	const struct net_device_ops *ops = dev->netdev_ops;
4264 
4265 	/* dev_open will call this function so the list will stay sane. */
4266 	if (!(dev->flags&IFF_UP))
4267 		return;
4268 
4269 	if (!netif_device_present(dev))
4270 		return;
4271 
4272 	if (ops->ndo_set_rx_mode)
4273 		ops->ndo_set_rx_mode(dev);
4274 	else {
4275 		/* Unicast addresses changes may only happen under the rtnl,
4276 		 * therefore calling __dev_set_promiscuity here is safe.
4277 		 */
4278 		if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
4279 			__dev_set_promiscuity(dev, 1);
4280 			dev->uc_promisc = 1;
4281 		} else if (netdev_uc_empty(dev) && dev->uc_promisc) {
4282 			__dev_set_promiscuity(dev, -1);
4283 			dev->uc_promisc = 0;
4284 		}
4285 
4286 		if (ops->ndo_set_multicast_list)
4287 			ops->ndo_set_multicast_list(dev);
4288 	}
4289 }
4290 
4291 void dev_set_rx_mode(struct net_device *dev)
4292 {
4293 	netif_addr_lock_bh(dev);
4294 	__dev_set_rx_mode(dev);
4295 	netif_addr_unlock_bh(dev);
4296 }
4297 
4298 /**
4299  *	dev_get_flags - get flags reported to userspace
4300  *	@dev: device
4301  *
4302  *	Get the combination of flag bits exported through APIs to userspace.
4303  */
4304 unsigned dev_get_flags(const struct net_device *dev)
4305 {
4306 	unsigned flags;
4307 
4308 	flags = (dev->flags & ~(IFF_PROMISC |
4309 				IFF_ALLMULTI |
4310 				IFF_RUNNING |
4311 				IFF_LOWER_UP |
4312 				IFF_DORMANT)) |
4313 		(dev->gflags & (IFF_PROMISC |
4314 				IFF_ALLMULTI));
4315 
4316 	if (netif_running(dev)) {
4317 		if (netif_oper_up(dev))
4318 			flags |= IFF_RUNNING;
4319 		if (netif_carrier_ok(dev))
4320 			flags |= IFF_LOWER_UP;
4321 		if (netif_dormant(dev))
4322 			flags |= IFF_DORMANT;
4323 	}
4324 
4325 	return flags;
4326 }
4327 EXPORT_SYMBOL(dev_get_flags);
4328 
4329 int __dev_change_flags(struct net_device *dev, unsigned int flags)
4330 {
4331 	int old_flags = dev->flags;
4332 	int ret;
4333 
4334 	ASSERT_RTNL();
4335 
4336 	/*
4337 	 *	Set the flags on our device.
4338 	 */
4339 
4340 	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
4341 			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
4342 			       IFF_AUTOMEDIA)) |
4343 		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
4344 				    IFF_ALLMULTI));
4345 
4346 	/*
4347 	 *	Load in the correct multicast list now the flags have changed.
4348 	 */
4349 
4350 	if ((old_flags ^ flags) & IFF_MULTICAST)
4351 		dev_change_rx_flags(dev, IFF_MULTICAST);
4352 
4353 	dev_set_rx_mode(dev);
4354 
4355 	/*
4356 	 *	Have we downed the interface. We handle IFF_UP ourselves
4357 	 *	according to user attempts to set it, rather than blindly
4358 	 *	setting it.
4359 	 */
4360 
4361 	ret = 0;
4362 	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
4363 		ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
4364 
4365 		if (!ret)
4366 			dev_set_rx_mode(dev);
4367 	}
4368 
4369 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
4370 		int inc = (flags & IFF_PROMISC) ? 1 : -1;
4371 
4372 		dev->gflags ^= IFF_PROMISC;
4373 		dev_set_promiscuity(dev, inc);
4374 	}
4375 
4376 	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
4377 	   is important. Some (broken) drivers set IFF_PROMISC, when
4378 	   IFF_ALLMULTI is requested not asking us and not reporting.
4379 	 */
4380 	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
4381 		int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
4382 
4383 		dev->gflags ^= IFF_ALLMULTI;
4384 		dev_set_allmulti(dev, inc);
4385 	}
4386 
4387 	return ret;
4388 }
4389 
4390 void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
4391 {
4392 	unsigned int changes = dev->flags ^ old_flags;
4393 
4394 	if (changes & IFF_UP) {
4395 		if (dev->flags & IFF_UP)
4396 			call_netdevice_notifiers(NETDEV_UP, dev);
4397 		else
4398 			call_netdevice_notifiers(NETDEV_DOWN, dev);
4399 	}
4400 
4401 	if (dev->flags & IFF_UP &&
4402 	    (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE)))
4403 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
4404 }
4405 
4406 /**
4407  *	dev_change_flags - change device settings
4408  *	@dev: device
4409  *	@flags: device state flags
4410  *
4411  *	Change settings on device based state flags. The flags are
4412  *	in the userspace exported format.
4413  */
4414 int dev_change_flags(struct net_device *dev, unsigned flags)
4415 {
4416 	int ret, changes;
4417 	int old_flags = dev->flags;
4418 
4419 	ret = __dev_change_flags(dev, flags);
4420 	if (ret < 0)
4421 		return ret;
4422 
4423 	changes = old_flags ^ dev->flags;
4424 	if (changes)
4425 		rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
4426 
4427 	__dev_notify_flags(dev, old_flags);
4428 	return ret;
4429 }
4430 EXPORT_SYMBOL(dev_change_flags);
4431 
4432 /**
4433  *	dev_set_mtu - Change maximum transfer unit
4434  *	@dev: device
4435  *	@new_mtu: new transfer unit
4436  *
4437  *	Change the maximum transfer size of the network device.
4438  */
4439 int dev_set_mtu(struct net_device *dev, int new_mtu)
4440 {
4441 	const struct net_device_ops *ops = dev->netdev_ops;
4442 	int err;
4443 
4444 	if (new_mtu == dev->mtu)
4445 		return 0;
4446 
4447 	/*	MTU must be positive.	 */
4448 	if (new_mtu < 0)
4449 		return -EINVAL;
4450 
4451 	if (!netif_device_present(dev))
4452 		return -ENODEV;
4453 
4454 	err = 0;
4455 	if (ops->ndo_change_mtu)
4456 		err = ops->ndo_change_mtu(dev, new_mtu);
4457 	else
4458 		dev->mtu = new_mtu;
4459 
4460 	if (!err && dev->flags & IFF_UP)
4461 		call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
4462 	return err;
4463 }
4464 EXPORT_SYMBOL(dev_set_mtu);
4465 
4466 /**
4467  *	dev_set_mac_address - Change Media Access Control Address
4468  *	@dev: device
4469  *	@sa: new address
4470  *
4471  *	Change the hardware (MAC) address of the device
4472  */
4473 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
4474 {
4475 	const struct net_device_ops *ops = dev->netdev_ops;
4476 	int err;
4477 
4478 	if (!ops->ndo_set_mac_address)
4479 		return -EOPNOTSUPP;
4480 	if (sa->sa_family != dev->type)
4481 		return -EINVAL;
4482 	if (!netif_device_present(dev))
4483 		return -ENODEV;
4484 	err = ops->ndo_set_mac_address(dev, sa);
4485 	if (!err)
4486 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4487 	return err;
4488 }
4489 EXPORT_SYMBOL(dev_set_mac_address);
4490 
4491 /*
4492  *	Perform the SIOCxIFxxx calls, inside rcu_read_lock()
4493  */
4494 static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
4495 {
4496 	int err;
4497 	struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
4498 
4499 	if (!dev)
4500 		return -ENODEV;
4501 
4502 	switch (cmd) {
4503 	case SIOCGIFFLAGS:	/* Get interface flags */
4504 		ifr->ifr_flags = (short) dev_get_flags(dev);
4505 		return 0;
4506 
4507 	case SIOCGIFMETRIC:	/* Get the metric on the interface
4508 				   (currently unused) */
4509 		ifr->ifr_metric = 0;
4510 		return 0;
4511 
4512 	case SIOCGIFMTU:	/* Get the MTU of a device */
4513 		ifr->ifr_mtu = dev->mtu;
4514 		return 0;
4515 
4516 	case SIOCGIFHWADDR:
4517 		if (!dev->addr_len)
4518 			memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
4519 		else
4520 			memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
4521 			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4522 		ifr->ifr_hwaddr.sa_family = dev->type;
4523 		return 0;
4524 
4525 	case SIOCGIFSLAVE:
4526 		err = -EINVAL;
4527 		break;
4528 
4529 	case SIOCGIFMAP:
4530 		ifr->ifr_map.mem_start = dev->mem_start;
4531 		ifr->ifr_map.mem_end   = dev->mem_end;
4532 		ifr->ifr_map.base_addr = dev->base_addr;
4533 		ifr->ifr_map.irq       = dev->irq;
4534 		ifr->ifr_map.dma       = dev->dma;
4535 		ifr->ifr_map.port      = dev->if_port;
4536 		return 0;
4537 
4538 	case SIOCGIFINDEX:
4539 		ifr->ifr_ifindex = dev->ifindex;
4540 		return 0;
4541 
4542 	case SIOCGIFTXQLEN:
4543 		ifr->ifr_qlen = dev->tx_queue_len;
4544 		return 0;
4545 
4546 	default:
4547 		/* dev_ioctl() should ensure this case
4548 		 * is never reached
4549 		 */
4550 		WARN_ON(1);
4551 		err = -EINVAL;
4552 		break;
4553 
4554 	}
4555 	return err;
4556 }
4557 
4558 /*
4559  *	Perform the SIOCxIFxxx calls, inside rtnl_lock()
4560  */
4561 static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4562 {
4563 	int err;
4564 	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
4565 	const struct net_device_ops *ops;
4566 
4567 	if (!dev)
4568 		return -ENODEV;
4569 
4570 	ops = dev->netdev_ops;
4571 
4572 	switch (cmd) {
4573 	case SIOCSIFFLAGS:	/* Set interface flags */
4574 		return dev_change_flags(dev, ifr->ifr_flags);
4575 
4576 	case SIOCSIFMETRIC:	/* Set the metric on the interface
4577 				   (currently unused) */
4578 		return -EOPNOTSUPP;
4579 
4580 	case SIOCSIFMTU:	/* Set the MTU of a device */
4581 		return dev_set_mtu(dev, ifr->ifr_mtu);
4582 
4583 	case SIOCSIFHWADDR:
4584 		return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
4585 
4586 	case SIOCSIFHWBROADCAST:
4587 		if (ifr->ifr_hwaddr.sa_family != dev->type)
4588 			return -EINVAL;
4589 		memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
4590 		       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4591 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4592 		return 0;
4593 
4594 	case SIOCSIFMAP:
4595 		if (ops->ndo_set_config) {
4596 			if (!netif_device_present(dev))
4597 				return -ENODEV;
4598 			return ops->ndo_set_config(dev, &ifr->ifr_map);
4599 		}
4600 		return -EOPNOTSUPP;
4601 
4602 	case SIOCADDMULTI:
4603 		if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
4604 		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4605 			return -EINVAL;
4606 		if (!netif_device_present(dev))
4607 			return -ENODEV;
4608 		return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
4609 
4610 	case SIOCDELMULTI:
4611 		if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
4612 		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4613 			return -EINVAL;
4614 		if (!netif_device_present(dev))
4615 			return -ENODEV;
4616 		return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
4617 
4618 	case SIOCSIFTXQLEN:
4619 		if (ifr->ifr_qlen < 0)
4620 			return -EINVAL;
4621 		dev->tx_queue_len = ifr->ifr_qlen;
4622 		return 0;
4623 
4624 	case SIOCSIFNAME:
4625 		ifr->ifr_newname[IFNAMSIZ-1] = '\0';
4626 		return dev_change_name(dev, ifr->ifr_newname);
4627 
4628 	/*
4629 	 *	Unknown or private ioctl
4630 	 */
4631 	default:
4632 		if ((cmd >= SIOCDEVPRIVATE &&
4633 		    cmd <= SIOCDEVPRIVATE + 15) ||
4634 		    cmd == SIOCBONDENSLAVE ||
4635 		    cmd == SIOCBONDRELEASE ||
4636 		    cmd == SIOCBONDSETHWADDR ||
4637 		    cmd == SIOCBONDSLAVEINFOQUERY ||
4638 		    cmd == SIOCBONDINFOQUERY ||
4639 		    cmd == SIOCBONDCHANGEACTIVE ||
4640 		    cmd == SIOCGMIIPHY ||
4641 		    cmd == SIOCGMIIREG ||
4642 		    cmd == SIOCSMIIREG ||
4643 		    cmd == SIOCBRADDIF ||
4644 		    cmd == SIOCBRDELIF ||
4645 		    cmd == SIOCSHWTSTAMP ||
4646 		    cmd == SIOCWANDEV) {
4647 			err = -EOPNOTSUPP;
4648 			if (ops->ndo_do_ioctl) {
4649 				if (netif_device_present(dev))
4650 					err = ops->ndo_do_ioctl(dev, ifr, cmd);
4651 				else
4652 					err = -ENODEV;
4653 			}
4654 		} else
4655 			err = -EINVAL;
4656 
4657 	}
4658 	return err;
4659 }
4660 
4661 /*
4662  *	This function handles all "interface"-type I/O control requests. The actual
4663  *	'doing' part of this is dev_ifsioc above.
4664  */
4665 
4666 /**
4667  *	dev_ioctl	-	network device ioctl
4668  *	@net: the applicable net namespace
4669  *	@cmd: command to issue
4670  *	@arg: pointer to a struct ifreq in user space
4671  *
4672  *	Issue ioctl functions to devices. This is normally called by the
4673  *	user space syscall interfaces but can sometimes be useful for
4674  *	other purposes. The return value is the return from the syscall if
4675  *	positive or a negative errno code on error.
4676  */
4677 
4678 int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
4679 {
4680 	struct ifreq ifr;
4681 	int ret;
4682 	char *colon;
4683 
4684 	/* One special case: SIOCGIFCONF takes ifconf argument
4685 	   and requires shared lock, because it sleeps writing
4686 	   to user space.
4687 	 */
4688 
4689 	if (cmd == SIOCGIFCONF) {
4690 		rtnl_lock();
4691 		ret = dev_ifconf(net, (char __user *) arg);
4692 		rtnl_unlock();
4693 		return ret;
4694 	}
4695 	if (cmd == SIOCGIFNAME)
4696 		return dev_ifname(net, (struct ifreq __user *)arg);
4697 
4698 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
4699 		return -EFAULT;
4700 
4701 	ifr.ifr_name[IFNAMSIZ-1] = 0;
4702 
4703 	colon = strchr(ifr.ifr_name, ':');
4704 	if (colon)
4705 		*colon = 0;
4706 
4707 	/*
4708 	 *	See which interface the caller is talking about.
4709 	 */
4710 
4711 	switch (cmd) {
4712 	/*
4713 	 *	These ioctl calls:
4714 	 *	- can be done by all.
4715 	 *	- atomic and do not require locking.
4716 	 *	- return a value
4717 	 */
4718 	case SIOCGIFFLAGS:
4719 	case SIOCGIFMETRIC:
4720 	case SIOCGIFMTU:
4721 	case SIOCGIFHWADDR:
4722 	case SIOCGIFSLAVE:
4723 	case SIOCGIFMAP:
4724 	case SIOCGIFINDEX:
4725 	case SIOCGIFTXQLEN:
4726 		dev_load(net, ifr.ifr_name);
4727 		rcu_read_lock();
4728 		ret = dev_ifsioc_locked(net, &ifr, cmd);
4729 		rcu_read_unlock();
4730 		if (!ret) {
4731 			if (colon)
4732 				*colon = ':';
4733 			if (copy_to_user(arg, &ifr,
4734 					 sizeof(struct ifreq)))
4735 				ret = -EFAULT;
4736 		}
4737 		return ret;
4738 
4739 	case SIOCETHTOOL:
4740 		dev_load(net, ifr.ifr_name);
4741 		rtnl_lock();
4742 		ret = dev_ethtool(net, &ifr);
4743 		rtnl_unlock();
4744 		if (!ret) {
4745 			if (colon)
4746 				*colon = ':';
4747 			if (copy_to_user(arg, &ifr,
4748 					 sizeof(struct ifreq)))
4749 				ret = -EFAULT;
4750 		}
4751 		return ret;
4752 
4753 	/*
4754 	 *	These ioctl calls:
4755 	 *	- require superuser power.
4756 	 *	- require strict serialization.
4757 	 *	- return a value
4758 	 */
4759 	case SIOCGMIIPHY:
4760 	case SIOCGMIIREG:
4761 	case SIOCSIFNAME:
4762 		if (!capable(CAP_NET_ADMIN))
4763 			return -EPERM;
4764 		dev_load(net, ifr.ifr_name);
4765 		rtnl_lock();
4766 		ret = dev_ifsioc(net, &ifr, cmd);
4767 		rtnl_unlock();
4768 		if (!ret) {
4769 			if (colon)
4770 				*colon = ':';
4771 			if (copy_to_user(arg, &ifr,
4772 					 sizeof(struct ifreq)))
4773 				ret = -EFAULT;
4774 		}
4775 		return ret;
4776 
4777 	/*
4778 	 *	These ioctl calls:
4779 	 *	- require superuser power.
4780 	 *	- require strict serialization.
4781 	 *	- do not return a value
4782 	 */
4783 	case SIOCSIFFLAGS:
4784 	case SIOCSIFMETRIC:
4785 	case SIOCSIFMTU:
4786 	case SIOCSIFMAP:
4787 	case SIOCSIFHWADDR:
4788 	case SIOCSIFSLAVE:
4789 	case SIOCADDMULTI:
4790 	case SIOCDELMULTI:
4791 	case SIOCSIFHWBROADCAST:
4792 	case SIOCSIFTXQLEN:
4793 	case SIOCSMIIREG:
4794 	case SIOCBONDENSLAVE:
4795 	case SIOCBONDRELEASE:
4796 	case SIOCBONDSETHWADDR:
4797 	case SIOCBONDCHANGEACTIVE:
4798 	case SIOCBRADDIF:
4799 	case SIOCBRDELIF:
4800 	case SIOCSHWTSTAMP:
4801 		if (!capable(CAP_NET_ADMIN))
4802 			return -EPERM;
4803 		/* fall through */
4804 	case SIOCBONDSLAVEINFOQUERY:
4805 	case SIOCBONDINFOQUERY:
4806 		dev_load(net, ifr.ifr_name);
4807 		rtnl_lock();
4808 		ret = dev_ifsioc(net, &ifr, cmd);
4809 		rtnl_unlock();
4810 		return ret;
4811 
4812 	case SIOCGIFMEM:
4813 		/* Get the per device memory space. We can add this but
4814 		 * currently do not support it */
4815 	case SIOCSIFMEM:
4816 		/* Set the per device memory buffer space.
4817 		 * Not applicable in our case */
4818 	case SIOCSIFLINK:
4819 		return -EINVAL;
4820 
4821 	/*
4822 	 *	Unknown or private ioctl.
4823 	 */
4824 	default:
4825 		if (cmd == SIOCWANDEV ||
4826 		    (cmd >= SIOCDEVPRIVATE &&
4827 		     cmd <= SIOCDEVPRIVATE + 15)) {
4828 			dev_load(net, ifr.ifr_name);
4829 			rtnl_lock();
4830 			ret = dev_ifsioc(net, &ifr, cmd);
4831 			rtnl_unlock();
4832 			if (!ret && copy_to_user(arg, &ifr,
4833 						 sizeof(struct ifreq)))
4834 				ret = -EFAULT;
4835 			return ret;
4836 		}
4837 		/* Take care of Wireless Extensions */
4838 		if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
4839 			return wext_handle_ioctl(net, &ifr, cmd, arg);
4840 		return -EINVAL;
4841 	}
4842 }
4843 
4844 
4845 /**
4846  *	dev_new_index	-	allocate an ifindex
4847  *	@net: the applicable net namespace
4848  *
4849  *	Returns a suitable unique value for a new device interface
4850  *	number.  The caller must hold the rtnl semaphore or the
4851  *	dev_base_lock to be sure it remains unique.
4852  */
4853 static int dev_new_index(struct net *net)
4854 {
4855 	static int ifindex;
4856 	for (;;) {
4857 		if (++ifindex <= 0)
4858 			ifindex = 1;
4859 		if (!__dev_get_by_index(net, ifindex))
4860 			return ifindex;
4861 	}
4862 }
4863 
4864 /* Delayed registration/unregisteration */
4865 static LIST_HEAD(net_todo_list);
4866 
4867 static void net_set_todo(struct net_device *dev)
4868 {
4869 	list_add_tail(&dev->todo_list, &net_todo_list);
4870 }
4871 
4872 static void rollback_registered_many(struct list_head *head)
4873 {
4874 	struct net_device *dev, *tmp;
4875 
4876 	BUG_ON(dev_boot_phase);
4877 	ASSERT_RTNL();
4878 
4879 	list_for_each_entry_safe(dev, tmp, head, unreg_list) {
4880 		/* Some devices call without registering
4881 		 * for initialization unwind. Remove those
4882 		 * devices and proceed with the remaining.
4883 		 */
4884 		if (dev->reg_state == NETREG_UNINITIALIZED) {
4885 			pr_debug("unregister_netdevice: device %s/%p never "
4886 				 "was registered\n", dev->name, dev);
4887 
4888 			WARN_ON(1);
4889 			list_del(&dev->unreg_list);
4890 			continue;
4891 		}
4892 
4893 		BUG_ON(dev->reg_state != NETREG_REGISTERED);
4894 
4895 		/* If device is running, close it first. */
4896 		dev_close(dev);
4897 
4898 		/* And unlink it from device chain. */
4899 		unlist_netdevice(dev);
4900 
4901 		dev->reg_state = NETREG_UNREGISTERING;
4902 	}
4903 
4904 	synchronize_net();
4905 
4906 	list_for_each_entry(dev, head, unreg_list) {
4907 		/* Shutdown queueing discipline. */
4908 		dev_shutdown(dev);
4909 
4910 
4911 		/* Notify protocols, that we are about to destroy
4912 		   this device. They should clean all the things.
4913 		*/
4914 		call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4915 
4916 		if (!dev->rtnl_link_ops ||
4917 		    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
4918 			rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
4919 
4920 		/*
4921 		 *	Flush the unicast and multicast chains
4922 		 */
4923 		dev_uc_flush(dev);
4924 		dev_mc_flush(dev);
4925 
4926 		if (dev->netdev_ops->ndo_uninit)
4927 			dev->netdev_ops->ndo_uninit(dev);
4928 
4929 		/* Notifier chain MUST detach us from master device. */
4930 		WARN_ON(dev->master);
4931 
4932 		/* Remove entries from kobject tree */
4933 		netdev_unregister_kobject(dev);
4934 	}
4935 
4936 	/* Process any work delayed until the end of the batch */
4937 	dev = list_first_entry(head, struct net_device, unreg_list);
4938 	call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
4939 
4940 	rcu_barrier();
4941 
4942 	list_for_each_entry(dev, head, unreg_list)
4943 		dev_put(dev);
4944 }
4945 
4946 static void rollback_registered(struct net_device *dev)
4947 {
4948 	LIST_HEAD(single);
4949 
4950 	list_add(&dev->unreg_list, &single);
4951 	rollback_registered_many(&single);
4952 }
4953 
4954 unsigned long netdev_fix_features(unsigned long features, const char *name)
4955 {
4956 	/* Fix illegal SG+CSUM combinations. */
4957 	if ((features & NETIF_F_SG) &&
4958 	    !(features & NETIF_F_ALL_CSUM)) {
4959 		if (name)
4960 			printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
4961 			       "checksum feature.\n", name);
4962 		features &= ~NETIF_F_SG;
4963 	}
4964 
4965 	/* TSO requires that SG is present as well. */
4966 	if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
4967 		if (name)
4968 			printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
4969 			       "SG feature.\n", name);
4970 		features &= ~NETIF_F_TSO;
4971 	}
4972 
4973 	if (features & NETIF_F_UFO) {
4974 		if (!(features & NETIF_F_GEN_CSUM)) {
4975 			if (name)
4976 				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4977 				       "since no NETIF_F_HW_CSUM feature.\n",
4978 				       name);
4979 			features &= ~NETIF_F_UFO;
4980 		}
4981 
4982 		if (!(features & NETIF_F_SG)) {
4983 			if (name)
4984 				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4985 				       "since no NETIF_F_SG feature.\n", name);
4986 			features &= ~NETIF_F_UFO;
4987 		}
4988 	}
4989 
4990 	return features;
4991 }
4992 EXPORT_SYMBOL(netdev_fix_features);
4993 
4994 /**
4995  *	netif_stacked_transfer_operstate -	transfer operstate
4996  *	@rootdev: the root or lower level device to transfer state from
4997  *	@dev: the device to transfer operstate to
4998  *
4999  *	Transfer operational state from root to device. This is normally
5000  *	called when a stacking relationship exists between the root
5001  *	device and the device(a leaf device).
5002  */
5003 void netif_stacked_transfer_operstate(const struct net_device *rootdev,
5004 					struct net_device *dev)
5005 {
5006 	if (rootdev->operstate == IF_OPER_DORMANT)
5007 		netif_dormant_on(dev);
5008 	else
5009 		netif_dormant_off(dev);
5010 
5011 	if (netif_carrier_ok(rootdev)) {
5012 		if (!netif_carrier_ok(dev))
5013 			netif_carrier_on(dev);
5014 	} else {
5015 		if (netif_carrier_ok(dev))
5016 			netif_carrier_off(dev);
5017 	}
5018 }
5019 EXPORT_SYMBOL(netif_stacked_transfer_operstate);
5020 
5021 static int netif_alloc_rx_queues(struct net_device *dev)
5022 {
5023 #ifdef CONFIG_RPS
5024 	unsigned int i, count = dev->num_rx_queues;
5025 	struct netdev_rx_queue *rx;
5026 
5027 	BUG_ON(count < 1);
5028 
5029 	rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
5030 	if (!rx) {
5031 		pr_err("netdev: Unable to allocate %u rx queues.\n", count);
5032 		return -ENOMEM;
5033 	}
5034 	dev->_rx = rx;
5035 
5036 	/*
5037 	 * Set a pointer to first element in the array which holds the
5038 	 * reference count.
5039 	 */
5040 	for (i = 0; i < count; i++)
5041 		rx[i].first = rx;
5042 #endif
5043 	return 0;
5044 }
5045 
5046 static int netif_alloc_netdev_queues(struct net_device *dev)
5047 {
5048 	unsigned int count = dev->num_tx_queues;
5049 	struct netdev_queue *tx;
5050 
5051 	BUG_ON(count < 1);
5052 
5053 	tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
5054 	if (!tx) {
5055 		pr_err("netdev: Unable to allocate %u tx queues.\n",
5056 		       count);
5057 		return -ENOMEM;
5058 	}
5059 	dev->_tx = tx;
5060 	return 0;
5061 }
5062 
5063 static void netdev_init_one_queue(struct net_device *dev,
5064 				  struct netdev_queue *queue,
5065 				  void *_unused)
5066 {
5067 	queue->dev = dev;
5068 
5069 	/* Initialize queue lock */
5070 	spin_lock_init(&queue->_xmit_lock);
5071 	netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
5072 	queue->xmit_lock_owner = -1;
5073 }
5074 
5075 static void netdev_init_queues(struct net_device *dev)
5076 {
5077 	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
5078 	spin_lock_init(&dev->tx_global_lock);
5079 }
5080 
5081 /**
5082  *	register_netdevice	- register a network device
5083  *	@dev: device to register
5084  *
5085  *	Take a completed network device structure and add it to the kernel
5086  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
5087  *	chain. 0 is returned on success. A negative errno code is returned
5088  *	on a failure to set up the device, or if the name is a duplicate.
5089  *
5090  *	Callers must hold the rtnl semaphore. You may want
5091  *	register_netdev() instead of this.
5092  *
5093  *	BUGS:
5094  *	The locking appears insufficient to guarantee two parallel registers
5095  *	will not get the same name.
5096  */
5097 
5098 int register_netdevice(struct net_device *dev)
5099 {
5100 	int ret;
5101 	struct net *net = dev_net(dev);
5102 
5103 	BUG_ON(dev_boot_phase);
5104 	ASSERT_RTNL();
5105 
5106 	might_sleep();
5107 
5108 	/* When net_device's are persistent, this will be fatal. */
5109 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
5110 	BUG_ON(!net);
5111 
5112 	spin_lock_init(&dev->addr_list_lock);
5113 	netdev_set_addr_lockdep_class(dev);
5114 
5115 	dev->iflink = -1;
5116 
5117 	ret = netif_alloc_rx_queues(dev);
5118 	if (ret)
5119 		goto out;
5120 
5121 	ret = netif_alloc_netdev_queues(dev);
5122 	if (ret)
5123 		goto out;
5124 
5125 	netdev_init_queues(dev);
5126 
5127 	/* Init, if this function is available */
5128 	if (dev->netdev_ops->ndo_init) {
5129 		ret = dev->netdev_ops->ndo_init(dev);
5130 		if (ret) {
5131 			if (ret > 0)
5132 				ret = -EIO;
5133 			goto out;
5134 		}
5135 	}
5136 
5137 	ret = dev_get_valid_name(dev, dev->name, 0);
5138 	if (ret)
5139 		goto err_uninit;
5140 
5141 	dev->ifindex = dev_new_index(net);
5142 	if (dev->iflink == -1)
5143 		dev->iflink = dev->ifindex;
5144 
5145 	/* Fix illegal checksum combinations */
5146 	if ((dev->features & NETIF_F_HW_CSUM) &&
5147 	    (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5148 		printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
5149 		       dev->name);
5150 		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
5151 	}
5152 
5153 	if ((dev->features & NETIF_F_NO_CSUM) &&
5154 	    (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5155 		printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
5156 		       dev->name);
5157 		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
5158 	}
5159 
5160 	dev->features = netdev_fix_features(dev->features, dev->name);
5161 
5162 	/* Enable software GSO if SG is supported. */
5163 	if (dev->features & NETIF_F_SG)
5164 		dev->features |= NETIF_F_GSO;
5165 
5166 	/* Enable GRO and NETIF_F_HIGHDMA for vlans by default,
5167 	 * vlan_dev_init() will do the dev->features check, so these features
5168 	 * are enabled only if supported by underlying device.
5169 	 */
5170 	dev->vlan_features |= (NETIF_F_GRO | NETIF_F_HIGHDMA);
5171 
5172 	ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5173 	ret = notifier_to_errno(ret);
5174 	if (ret)
5175 		goto err_uninit;
5176 
5177 	ret = netdev_register_kobject(dev);
5178 	if (ret)
5179 		goto err_uninit;
5180 	dev->reg_state = NETREG_REGISTERED;
5181 
5182 	/*
5183 	 *	Default initial state at registry is that the
5184 	 *	device is present.
5185 	 */
5186 
5187 	set_bit(__LINK_STATE_PRESENT, &dev->state);
5188 
5189 	dev_init_scheduler(dev);
5190 	dev_hold(dev);
5191 	list_netdevice(dev);
5192 
5193 	/* Notify protocols, that a new device appeared. */
5194 	ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
5195 	ret = notifier_to_errno(ret);
5196 	if (ret) {
5197 		rollback_registered(dev);
5198 		dev->reg_state = NETREG_UNREGISTERED;
5199 	}
5200 	/*
5201 	 *	Prevent userspace races by waiting until the network
5202 	 *	device is fully setup before sending notifications.
5203 	 */
5204 	if (!dev->rtnl_link_ops ||
5205 	    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5206 		rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
5207 
5208 out:
5209 	return ret;
5210 
5211 err_uninit:
5212 	if (dev->netdev_ops->ndo_uninit)
5213 		dev->netdev_ops->ndo_uninit(dev);
5214 	goto out;
5215 }
5216 EXPORT_SYMBOL(register_netdevice);
5217 
5218 /**
5219  *	init_dummy_netdev	- init a dummy network device for NAPI
5220  *	@dev: device to init
5221  *
5222  *	This takes a network device structure and initialize the minimum
5223  *	amount of fields so it can be used to schedule NAPI polls without
5224  *	registering a full blown interface. This is to be used by drivers
5225  *	that need to tie several hardware interfaces to a single NAPI
5226  *	poll scheduler due to HW limitations.
5227  */
5228 int init_dummy_netdev(struct net_device *dev)
5229 {
5230 	/* Clear everything. Note we don't initialize spinlocks
5231 	 * are they aren't supposed to be taken by any of the
5232 	 * NAPI code and this dummy netdev is supposed to be
5233 	 * only ever used for NAPI polls
5234 	 */
5235 	memset(dev, 0, sizeof(struct net_device));
5236 
5237 	/* make sure we BUG if trying to hit standard
5238 	 * register/unregister code path
5239 	 */
5240 	dev->reg_state = NETREG_DUMMY;
5241 
5242 	/* NAPI wants this */
5243 	INIT_LIST_HEAD(&dev->napi_list);
5244 
5245 	/* a dummy interface is started by default */
5246 	set_bit(__LINK_STATE_PRESENT, &dev->state);
5247 	set_bit(__LINK_STATE_START, &dev->state);
5248 
5249 	/* Note : We dont allocate pcpu_refcnt for dummy devices,
5250 	 * because users of this 'device' dont need to change
5251 	 * its refcount.
5252 	 */
5253 
5254 	return 0;
5255 }
5256 EXPORT_SYMBOL_GPL(init_dummy_netdev);
5257 
5258 
5259 /**
5260  *	register_netdev	- register a network device
5261  *	@dev: device to register
5262  *
5263  *	Take a completed network device structure and add it to the kernel
5264  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
5265  *	chain. 0 is returned on success. A negative errno code is returned
5266  *	on a failure to set up the device, or if the name is a duplicate.
5267  *
5268  *	This is a wrapper around register_netdevice that takes the rtnl semaphore
5269  *	and expands the device name if you passed a format string to
5270  *	alloc_netdev.
5271  */
5272 int register_netdev(struct net_device *dev)
5273 {
5274 	int err;
5275 
5276 	rtnl_lock();
5277 
5278 	/*
5279 	 * If the name is a format string the caller wants us to do a
5280 	 * name allocation.
5281 	 */
5282 	if (strchr(dev->name, '%')) {
5283 		err = dev_alloc_name(dev, dev->name);
5284 		if (err < 0)
5285 			goto out;
5286 	}
5287 
5288 	err = register_netdevice(dev);
5289 out:
5290 	rtnl_unlock();
5291 	return err;
5292 }
5293 EXPORT_SYMBOL(register_netdev);
5294 
5295 int netdev_refcnt_read(const struct net_device *dev)
5296 {
5297 	int i, refcnt = 0;
5298 
5299 	for_each_possible_cpu(i)
5300 		refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
5301 	return refcnt;
5302 }
5303 EXPORT_SYMBOL(netdev_refcnt_read);
5304 
5305 /*
5306  * netdev_wait_allrefs - wait until all references are gone.
5307  *
5308  * This is called when unregistering network devices.
5309  *
5310  * Any protocol or device that holds a reference should register
5311  * for netdevice notification, and cleanup and put back the
5312  * reference if they receive an UNREGISTER event.
5313  * We can get stuck here if buggy protocols don't correctly
5314  * call dev_put.
5315  */
5316 static void netdev_wait_allrefs(struct net_device *dev)
5317 {
5318 	unsigned long rebroadcast_time, warning_time;
5319 	int refcnt;
5320 
5321 	linkwatch_forget_dev(dev);
5322 
5323 	rebroadcast_time = warning_time = jiffies;
5324 	refcnt = netdev_refcnt_read(dev);
5325 
5326 	while (refcnt != 0) {
5327 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
5328 			rtnl_lock();
5329 
5330 			/* Rebroadcast unregister notification */
5331 			call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5332 			/* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users
5333 			 * should have already handle it the first time */
5334 
5335 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
5336 				     &dev->state)) {
5337 				/* We must not have linkwatch events
5338 				 * pending on unregister. If this
5339 				 * happens, we simply run the queue
5340 				 * unscheduled, resulting in a noop
5341 				 * for this device.
5342 				 */
5343 				linkwatch_run_queue();
5344 			}
5345 
5346 			__rtnl_unlock();
5347 
5348 			rebroadcast_time = jiffies;
5349 		}
5350 
5351 		msleep(250);
5352 
5353 		refcnt = netdev_refcnt_read(dev);
5354 
5355 		if (time_after(jiffies, warning_time + 10 * HZ)) {
5356 			printk(KERN_EMERG "unregister_netdevice: "
5357 			       "waiting for %s to become free. Usage "
5358 			       "count = %d\n",
5359 			       dev->name, refcnt);
5360 			warning_time = jiffies;
5361 		}
5362 	}
5363 }
5364 
5365 /* The sequence is:
5366  *
5367  *	rtnl_lock();
5368  *	...
5369  *	register_netdevice(x1);
5370  *	register_netdevice(x2);
5371  *	...
5372  *	unregister_netdevice(y1);
5373  *	unregister_netdevice(y2);
5374  *      ...
5375  *	rtnl_unlock();
5376  *	free_netdev(y1);
5377  *	free_netdev(y2);
5378  *
5379  * We are invoked by rtnl_unlock().
5380  * This allows us to deal with problems:
5381  * 1) We can delete sysfs objects which invoke hotplug
5382  *    without deadlocking with linkwatch via keventd.
5383  * 2) Since we run with the RTNL semaphore not held, we can sleep
5384  *    safely in order to wait for the netdev refcnt to drop to zero.
5385  *
5386  * We must not return until all unregister events added during
5387  * the interval the lock was held have been completed.
5388  */
5389 void netdev_run_todo(void)
5390 {
5391 	struct list_head list;
5392 
5393 	/* Snapshot list, allow later requests */
5394 	list_replace_init(&net_todo_list, &list);
5395 
5396 	__rtnl_unlock();
5397 
5398 	while (!list_empty(&list)) {
5399 		struct net_device *dev
5400 			= list_first_entry(&list, struct net_device, todo_list);
5401 		list_del(&dev->todo_list);
5402 
5403 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
5404 			printk(KERN_ERR "network todo '%s' but state %d\n",
5405 			       dev->name, dev->reg_state);
5406 			dump_stack();
5407 			continue;
5408 		}
5409 
5410 		dev->reg_state = NETREG_UNREGISTERED;
5411 
5412 		on_each_cpu(flush_backlog, dev, 1);
5413 
5414 		netdev_wait_allrefs(dev);
5415 
5416 		/* paranoia */
5417 		BUG_ON(netdev_refcnt_read(dev));
5418 		WARN_ON(rcu_dereference_raw(dev->ip_ptr));
5419 		WARN_ON(dev->ip6_ptr);
5420 		WARN_ON(dev->dn_ptr);
5421 
5422 		if (dev->destructor)
5423 			dev->destructor(dev);
5424 
5425 		/* Free network device */
5426 		kobject_put(&dev->dev.kobj);
5427 	}
5428 }
5429 
5430 /**
5431  *	dev_txq_stats_fold - fold tx_queues stats
5432  *	@dev: device to get statistics from
5433  *	@stats: struct rtnl_link_stats64 to hold results
5434  */
5435 void dev_txq_stats_fold(const struct net_device *dev,
5436 			struct rtnl_link_stats64 *stats)
5437 {
5438 	u64 tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
5439 	unsigned int i;
5440 	struct netdev_queue *txq;
5441 
5442 	for (i = 0; i < dev->num_tx_queues; i++) {
5443 		txq = netdev_get_tx_queue(dev, i);
5444 		spin_lock_bh(&txq->_xmit_lock);
5445 		tx_bytes   += txq->tx_bytes;
5446 		tx_packets += txq->tx_packets;
5447 		tx_dropped += txq->tx_dropped;
5448 		spin_unlock_bh(&txq->_xmit_lock);
5449 	}
5450 	if (tx_bytes || tx_packets || tx_dropped) {
5451 		stats->tx_bytes   = tx_bytes;
5452 		stats->tx_packets = tx_packets;
5453 		stats->tx_dropped = tx_dropped;
5454 	}
5455 }
5456 EXPORT_SYMBOL(dev_txq_stats_fold);
5457 
5458 /* Convert net_device_stats to rtnl_link_stats64.  They have the same
5459  * fields in the same order, with only the type differing.
5460  */
5461 static void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
5462 				    const struct net_device_stats *netdev_stats)
5463 {
5464 #if BITS_PER_LONG == 64
5465         BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats));
5466         memcpy(stats64, netdev_stats, sizeof(*stats64));
5467 #else
5468 	size_t i, n = sizeof(*stats64) / sizeof(u64);
5469 	const unsigned long *src = (const unsigned long *)netdev_stats;
5470 	u64 *dst = (u64 *)stats64;
5471 
5472 	BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) !=
5473 		     sizeof(*stats64) / sizeof(u64));
5474 	for (i = 0; i < n; i++)
5475 		dst[i] = src[i];
5476 #endif
5477 }
5478 
5479 /**
5480  *	dev_get_stats	- get network device statistics
5481  *	@dev: device to get statistics from
5482  *	@storage: place to store stats
5483  *
5484  *	Get network statistics from device. Return @storage.
5485  *	The device driver may provide its own method by setting
5486  *	dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats;
5487  *	otherwise the internal statistics structure is used.
5488  */
5489 struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
5490 					struct rtnl_link_stats64 *storage)
5491 {
5492 	const struct net_device_ops *ops = dev->netdev_ops;
5493 
5494 	if (ops->ndo_get_stats64) {
5495 		memset(storage, 0, sizeof(*storage));
5496 		ops->ndo_get_stats64(dev, storage);
5497 	} else if (ops->ndo_get_stats) {
5498 		netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
5499 	} else {
5500 		netdev_stats_to_stats64(storage, &dev->stats);
5501 		dev_txq_stats_fold(dev, storage);
5502 	}
5503 	storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
5504 	return storage;
5505 }
5506 EXPORT_SYMBOL(dev_get_stats);
5507 
5508 struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
5509 {
5510 	struct netdev_queue *queue = dev_ingress_queue(dev);
5511 
5512 #ifdef CONFIG_NET_CLS_ACT
5513 	if (queue)
5514 		return queue;
5515 	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
5516 	if (!queue)
5517 		return NULL;
5518 	netdev_init_one_queue(dev, queue, NULL);
5519 	queue->qdisc = &noop_qdisc;
5520 	queue->qdisc_sleeping = &noop_qdisc;
5521 	rcu_assign_pointer(dev->ingress_queue, queue);
5522 #endif
5523 	return queue;
5524 }
5525 
5526 /**
5527  *	alloc_netdev_mq - allocate network device
5528  *	@sizeof_priv:	size of private data to allocate space for
5529  *	@name:		device name format string
5530  *	@setup:		callback to initialize device
5531  *	@queue_count:	the number of subqueues to allocate
5532  *
5533  *	Allocates a struct net_device with private data area for driver use
5534  *	and performs basic initialization.  Also allocates subquue structs
5535  *	for each queue on the device at the end of the netdevice.
5536  */
5537 struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5538 		void (*setup)(struct net_device *), unsigned int queue_count)
5539 {
5540 	struct net_device *dev;
5541 	size_t alloc_size;
5542 	struct net_device *p;
5543 
5544 	BUG_ON(strlen(name) >= sizeof(dev->name));
5545 
5546 	if (queue_count < 1) {
5547 		pr_err("alloc_netdev: Unable to allocate device "
5548 		       "with zero queues.\n");
5549 		return NULL;
5550 	}
5551 
5552 	alloc_size = sizeof(struct net_device);
5553 	if (sizeof_priv) {
5554 		/* ensure 32-byte alignment of private area */
5555 		alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
5556 		alloc_size += sizeof_priv;
5557 	}
5558 	/* ensure 32-byte alignment of whole construct */
5559 	alloc_size += NETDEV_ALIGN - 1;
5560 
5561 	p = kzalloc(alloc_size, GFP_KERNEL);
5562 	if (!p) {
5563 		printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
5564 		return NULL;
5565 	}
5566 
5567 	dev = PTR_ALIGN(p, NETDEV_ALIGN);
5568 	dev->padded = (char *)dev - (char *)p;
5569 
5570 	dev->pcpu_refcnt = alloc_percpu(int);
5571 	if (!dev->pcpu_refcnt)
5572 		goto free_p;
5573 
5574 	if (dev_addr_init(dev))
5575 		goto free_pcpu;
5576 
5577 	dev_mc_init(dev);
5578 	dev_uc_init(dev);
5579 
5580 	dev_net_set(dev, &init_net);
5581 
5582 	dev->num_tx_queues = queue_count;
5583 	dev->real_num_tx_queues = queue_count;
5584 
5585 #ifdef CONFIG_RPS
5586 	dev->num_rx_queues = queue_count;
5587 	dev->real_num_rx_queues = queue_count;
5588 #endif
5589 
5590 	dev->gso_max_size = GSO_MAX_SIZE;
5591 
5592 	INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list);
5593 	dev->ethtool_ntuple_list.count = 0;
5594 	INIT_LIST_HEAD(&dev->napi_list);
5595 	INIT_LIST_HEAD(&dev->unreg_list);
5596 	INIT_LIST_HEAD(&dev->link_watch_list);
5597 	dev->priv_flags = IFF_XMIT_DST_RELEASE;
5598 	setup(dev);
5599 	strcpy(dev->name, name);
5600 	return dev;
5601 
5602 free_pcpu:
5603 	free_percpu(dev->pcpu_refcnt);
5604 free_p:
5605 	kfree(p);
5606 	return NULL;
5607 }
5608 EXPORT_SYMBOL(alloc_netdev_mq);
5609 
5610 /**
5611  *	free_netdev - free network device
5612  *	@dev: device
5613  *
5614  *	This function does the last stage of destroying an allocated device
5615  * 	interface. The reference to the device object is released.
5616  *	If this is the last reference then it will be freed.
5617  */
5618 void free_netdev(struct net_device *dev)
5619 {
5620 	struct napi_struct *p, *n;
5621 
5622 	release_net(dev_net(dev));
5623 
5624 	kfree(dev->_tx);
5625 
5626 	kfree(rcu_dereference_raw(dev->ingress_queue));
5627 
5628 	/* Flush device addresses */
5629 	dev_addr_flush(dev);
5630 
5631 	/* Clear ethtool n-tuple list */
5632 	ethtool_ntuple_flush(dev);
5633 
5634 	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
5635 		netif_napi_del(p);
5636 
5637 	free_percpu(dev->pcpu_refcnt);
5638 	dev->pcpu_refcnt = NULL;
5639 
5640 	/*  Compatibility with error handling in drivers */
5641 	if (dev->reg_state == NETREG_UNINITIALIZED) {
5642 		kfree((char *)dev - dev->padded);
5643 		return;
5644 	}
5645 
5646 	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
5647 	dev->reg_state = NETREG_RELEASED;
5648 
5649 	/* will free via device release */
5650 	put_device(&dev->dev);
5651 }
5652 EXPORT_SYMBOL(free_netdev);
5653 
5654 /**
5655  *	synchronize_net -  Synchronize with packet receive processing
5656  *
5657  *	Wait for packets currently being received to be done.
5658  *	Does not block later packets from starting.
5659  */
5660 void synchronize_net(void)
5661 {
5662 	might_sleep();
5663 	synchronize_rcu();
5664 }
5665 EXPORT_SYMBOL(synchronize_net);
5666 
5667 /**
5668  *	unregister_netdevice_queue - remove device from the kernel
5669  *	@dev: device
5670  *	@head: list
5671  *
5672  *	This function shuts down a device interface and removes it
5673  *	from the kernel tables.
5674  *	If head not NULL, device is queued to be unregistered later.
5675  *
5676  *	Callers must hold the rtnl semaphore.  You may want
5677  *	unregister_netdev() instead of this.
5678  */
5679 
5680 void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
5681 {
5682 	ASSERT_RTNL();
5683 
5684 	if (head) {
5685 		list_move_tail(&dev->unreg_list, head);
5686 	} else {
5687 		rollback_registered(dev);
5688 		/* Finish processing unregister after unlock */
5689 		net_set_todo(dev);
5690 	}
5691 }
5692 EXPORT_SYMBOL(unregister_netdevice_queue);
5693 
5694 /**
5695  *	unregister_netdevice_many - unregister many devices
5696  *	@head: list of devices
5697  */
5698 void unregister_netdevice_many(struct list_head *head)
5699 {
5700 	struct net_device *dev;
5701 
5702 	if (!list_empty(head)) {
5703 		rollback_registered_many(head);
5704 		list_for_each_entry(dev, head, unreg_list)
5705 			net_set_todo(dev);
5706 	}
5707 }
5708 EXPORT_SYMBOL(unregister_netdevice_many);
5709 
5710 /**
5711  *	unregister_netdev - remove device from the kernel
5712  *	@dev: device
5713  *
5714  *	This function shuts down a device interface and removes it
5715  *	from the kernel tables.
5716  *
5717  *	This is just a wrapper for unregister_netdevice that takes
5718  *	the rtnl semaphore.  In general you want to use this and not
5719  *	unregister_netdevice.
5720  */
5721 void unregister_netdev(struct net_device *dev)
5722 {
5723 	rtnl_lock();
5724 	unregister_netdevice(dev);
5725 	rtnl_unlock();
5726 }
5727 EXPORT_SYMBOL(unregister_netdev);
5728 
5729 /**
5730  *	dev_change_net_namespace - move device to different nethost namespace
5731  *	@dev: device
5732  *	@net: network namespace
5733  *	@pat: If not NULL name pattern to try if the current device name
5734  *	      is already taken in the destination network namespace.
5735  *
5736  *	This function shuts down a device interface and moves it
5737  *	to a new network namespace. On success 0 is returned, on
5738  *	a failure a netagive errno code is returned.
5739  *
5740  *	Callers must hold the rtnl semaphore.
5741  */
5742 
5743 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
5744 {
5745 	int err;
5746 
5747 	ASSERT_RTNL();
5748 
5749 	/* Don't allow namespace local devices to be moved. */
5750 	err = -EINVAL;
5751 	if (dev->features & NETIF_F_NETNS_LOCAL)
5752 		goto out;
5753 
5754 	/* Ensure the device has been registrered */
5755 	err = -EINVAL;
5756 	if (dev->reg_state != NETREG_REGISTERED)
5757 		goto out;
5758 
5759 	/* Get out if there is nothing todo */
5760 	err = 0;
5761 	if (net_eq(dev_net(dev), net))
5762 		goto out;
5763 
5764 	/* Pick the destination device name, and ensure
5765 	 * we can use it in the destination network namespace.
5766 	 */
5767 	err = -EEXIST;
5768 	if (__dev_get_by_name(net, dev->name)) {
5769 		/* We get here if we can't use the current device name */
5770 		if (!pat)
5771 			goto out;
5772 		if (dev_get_valid_name(dev, pat, 1))
5773 			goto out;
5774 	}
5775 
5776 	/*
5777 	 * And now a mini version of register_netdevice unregister_netdevice.
5778 	 */
5779 
5780 	/* If device is running close it first. */
5781 	dev_close(dev);
5782 
5783 	/* And unlink it from device chain */
5784 	err = -ENODEV;
5785 	unlist_netdevice(dev);
5786 
5787 	synchronize_net();
5788 
5789 	/* Shutdown queueing discipline. */
5790 	dev_shutdown(dev);
5791 
5792 	/* Notify protocols, that we are about to destroy
5793 	   this device. They should clean all the things.
5794 
5795 	   Note that dev->reg_state stays at NETREG_REGISTERED.
5796 	   This is wanted because this way 8021q and macvlan know
5797 	   the device is just moving and can keep their slaves up.
5798 	*/
5799 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5800 	call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
5801 
5802 	/*
5803 	 *	Flush the unicast and multicast chains
5804 	 */
5805 	dev_uc_flush(dev);
5806 	dev_mc_flush(dev);
5807 
5808 	/* Actually switch the network namespace */
5809 	dev_net_set(dev, net);
5810 
5811 	/* If there is an ifindex conflict assign a new one */
5812 	if (__dev_get_by_index(net, dev->ifindex)) {
5813 		int iflink = (dev->iflink == dev->ifindex);
5814 		dev->ifindex = dev_new_index(net);
5815 		if (iflink)
5816 			dev->iflink = dev->ifindex;
5817 	}
5818 
5819 	/* Fixup kobjects */
5820 	err = device_rename(&dev->dev, dev->name);
5821 	WARN_ON(err);
5822 
5823 	/* Add the device back in the hashes */
5824 	list_netdevice(dev);
5825 
5826 	/* Notify protocols, that a new device appeared. */
5827 	call_netdevice_notifiers(NETDEV_REGISTER, dev);
5828 
5829 	/*
5830 	 *	Prevent userspace races by waiting until the network
5831 	 *	device is fully setup before sending notifications.
5832 	 */
5833 	rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
5834 
5835 	synchronize_net();
5836 	err = 0;
5837 out:
5838 	return err;
5839 }
5840 EXPORT_SYMBOL_GPL(dev_change_net_namespace);
5841 
5842 static int dev_cpu_callback(struct notifier_block *nfb,
5843 			    unsigned long action,
5844 			    void *ocpu)
5845 {
5846 	struct sk_buff **list_skb;
5847 	struct sk_buff *skb;
5848 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
5849 	struct softnet_data *sd, *oldsd;
5850 
5851 	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
5852 		return NOTIFY_OK;
5853 
5854 	local_irq_disable();
5855 	cpu = smp_processor_id();
5856 	sd = &per_cpu(softnet_data, cpu);
5857 	oldsd = &per_cpu(softnet_data, oldcpu);
5858 
5859 	/* Find end of our completion_queue. */
5860 	list_skb = &sd->completion_queue;
5861 	while (*list_skb)
5862 		list_skb = &(*list_skb)->next;
5863 	/* Append completion queue from offline CPU. */
5864 	*list_skb = oldsd->completion_queue;
5865 	oldsd->completion_queue = NULL;
5866 
5867 	/* Append output queue from offline CPU. */
5868 	if (oldsd->output_queue) {
5869 		*sd->output_queue_tailp = oldsd->output_queue;
5870 		sd->output_queue_tailp = oldsd->output_queue_tailp;
5871 		oldsd->output_queue = NULL;
5872 		oldsd->output_queue_tailp = &oldsd->output_queue;
5873 	}
5874 
5875 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
5876 	local_irq_enable();
5877 
5878 	/* Process offline CPU's input_pkt_queue */
5879 	while ((skb = __skb_dequeue(&oldsd->process_queue))) {
5880 		netif_rx(skb);
5881 		input_queue_head_incr(oldsd);
5882 	}
5883 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
5884 		netif_rx(skb);
5885 		input_queue_head_incr(oldsd);
5886 	}
5887 
5888 	return NOTIFY_OK;
5889 }
5890 
5891 
5892 /**
5893  *	netdev_increment_features - increment feature set by one
5894  *	@all: current feature set
5895  *	@one: new feature set
5896  *	@mask: mask feature set
5897  *
5898  *	Computes a new feature set after adding a device with feature set
5899  *	@one to the master device with current feature set @all.  Will not
5900  *	enable anything that is off in @mask. Returns the new feature set.
5901  */
5902 unsigned long netdev_increment_features(unsigned long all, unsigned long one,
5903 					unsigned long mask)
5904 {
5905 	/* If device needs checksumming, downgrade to it. */
5906 	if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
5907 		all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
5908 	else if (mask & NETIF_F_ALL_CSUM) {
5909 		/* If one device supports v4/v6 checksumming, set for all. */
5910 		if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
5911 		    !(all & NETIF_F_GEN_CSUM)) {
5912 			all &= ~NETIF_F_ALL_CSUM;
5913 			all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
5914 		}
5915 
5916 		/* If one device supports hw checksumming, set for all. */
5917 		if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
5918 			all &= ~NETIF_F_ALL_CSUM;
5919 			all |= NETIF_F_HW_CSUM;
5920 		}
5921 	}
5922 
5923 	one |= NETIF_F_ALL_CSUM;
5924 
5925 	one |= all & NETIF_F_ONE_FOR_ALL;
5926 	all &= one | NETIF_F_LLTX | NETIF_F_GSO | NETIF_F_UFO;
5927 	all |= one & mask & NETIF_F_ONE_FOR_ALL;
5928 
5929 	return all;
5930 }
5931 EXPORT_SYMBOL(netdev_increment_features);
5932 
5933 static struct hlist_head *netdev_create_hash(void)
5934 {
5935 	int i;
5936 	struct hlist_head *hash;
5937 
5938 	hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
5939 	if (hash != NULL)
5940 		for (i = 0; i < NETDEV_HASHENTRIES; i++)
5941 			INIT_HLIST_HEAD(&hash[i]);
5942 
5943 	return hash;
5944 }
5945 
5946 /* Initialize per network namespace state */
5947 static int __net_init netdev_init(struct net *net)
5948 {
5949 	INIT_LIST_HEAD(&net->dev_base_head);
5950 
5951 	net->dev_name_head = netdev_create_hash();
5952 	if (net->dev_name_head == NULL)
5953 		goto err_name;
5954 
5955 	net->dev_index_head = netdev_create_hash();
5956 	if (net->dev_index_head == NULL)
5957 		goto err_idx;
5958 
5959 	return 0;
5960 
5961 err_idx:
5962 	kfree(net->dev_name_head);
5963 err_name:
5964 	return -ENOMEM;
5965 }
5966 
5967 /**
5968  *	netdev_drivername - network driver for the device
5969  *	@dev: network device
5970  *	@buffer: buffer for resulting name
5971  *	@len: size of buffer
5972  *
5973  *	Determine network driver for device.
5974  */
5975 char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
5976 {
5977 	const struct device_driver *driver;
5978 	const struct device *parent;
5979 
5980 	if (len <= 0 || !buffer)
5981 		return buffer;
5982 	buffer[0] = 0;
5983 
5984 	parent = dev->dev.parent;
5985 
5986 	if (!parent)
5987 		return buffer;
5988 
5989 	driver = parent->driver;
5990 	if (driver && driver->name)
5991 		strlcpy(buffer, driver->name, len);
5992 	return buffer;
5993 }
5994 
5995 static int __netdev_printk(const char *level, const struct net_device *dev,
5996 			   struct va_format *vaf)
5997 {
5998 	int r;
5999 
6000 	if (dev && dev->dev.parent)
6001 		r = dev_printk(level, dev->dev.parent, "%s: %pV",
6002 			       netdev_name(dev), vaf);
6003 	else if (dev)
6004 		r = printk("%s%s: %pV", level, netdev_name(dev), vaf);
6005 	else
6006 		r = printk("%s(NULL net_device): %pV", level, vaf);
6007 
6008 	return r;
6009 }
6010 
6011 int netdev_printk(const char *level, const struct net_device *dev,
6012 		  const char *format, ...)
6013 {
6014 	struct va_format vaf;
6015 	va_list args;
6016 	int r;
6017 
6018 	va_start(args, format);
6019 
6020 	vaf.fmt = format;
6021 	vaf.va = &args;
6022 
6023 	r = __netdev_printk(level, dev, &vaf);
6024 	va_end(args);
6025 
6026 	return r;
6027 }
6028 EXPORT_SYMBOL(netdev_printk);
6029 
6030 #define define_netdev_printk_level(func, level)			\
6031 int func(const struct net_device *dev, const char *fmt, ...)	\
6032 {								\
6033 	int r;							\
6034 	struct va_format vaf;					\
6035 	va_list args;						\
6036 								\
6037 	va_start(args, fmt);					\
6038 								\
6039 	vaf.fmt = fmt;						\
6040 	vaf.va = &args;						\
6041 								\
6042 	r = __netdev_printk(level, dev, &vaf);			\
6043 	va_end(args);						\
6044 								\
6045 	return r;						\
6046 }								\
6047 EXPORT_SYMBOL(func);
6048 
6049 define_netdev_printk_level(netdev_emerg, KERN_EMERG);
6050 define_netdev_printk_level(netdev_alert, KERN_ALERT);
6051 define_netdev_printk_level(netdev_crit, KERN_CRIT);
6052 define_netdev_printk_level(netdev_err, KERN_ERR);
6053 define_netdev_printk_level(netdev_warn, KERN_WARNING);
6054 define_netdev_printk_level(netdev_notice, KERN_NOTICE);
6055 define_netdev_printk_level(netdev_info, KERN_INFO);
6056 
6057 static void __net_exit netdev_exit(struct net *net)
6058 {
6059 	kfree(net->dev_name_head);
6060 	kfree(net->dev_index_head);
6061 }
6062 
6063 static struct pernet_operations __net_initdata netdev_net_ops = {
6064 	.init = netdev_init,
6065 	.exit = netdev_exit,
6066 };
6067 
6068 static void __net_exit default_device_exit(struct net *net)
6069 {
6070 	struct net_device *dev, *aux;
6071 	/*
6072 	 * Push all migratable network devices back to the
6073 	 * initial network namespace
6074 	 */
6075 	rtnl_lock();
6076 	for_each_netdev_safe(net, dev, aux) {
6077 		int err;
6078 		char fb_name[IFNAMSIZ];
6079 
6080 		/* Ignore unmoveable devices (i.e. loopback) */
6081 		if (dev->features & NETIF_F_NETNS_LOCAL)
6082 			continue;
6083 
6084 		/* Leave virtual devices for the generic cleanup */
6085 		if (dev->rtnl_link_ops)
6086 			continue;
6087 
6088 		/* Push remaing network devices to init_net */
6089 		snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
6090 		err = dev_change_net_namespace(dev, &init_net, fb_name);
6091 		if (err) {
6092 			printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
6093 				__func__, dev->name, err);
6094 			BUG();
6095 		}
6096 	}
6097 	rtnl_unlock();
6098 }
6099 
6100 static void __net_exit default_device_exit_batch(struct list_head *net_list)
6101 {
6102 	/* At exit all network devices most be removed from a network
6103 	 * namespace.  Do this in the reverse order of registeration.
6104 	 * Do this across as many network namespaces as possible to
6105 	 * improve batching efficiency.
6106 	 */
6107 	struct net_device *dev;
6108 	struct net *net;
6109 	LIST_HEAD(dev_kill_list);
6110 
6111 	rtnl_lock();
6112 	list_for_each_entry(net, net_list, exit_list) {
6113 		for_each_netdev_reverse(net, dev) {
6114 			if (dev->rtnl_link_ops)
6115 				dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
6116 			else
6117 				unregister_netdevice_queue(dev, &dev_kill_list);
6118 		}
6119 	}
6120 	unregister_netdevice_many(&dev_kill_list);
6121 	rtnl_unlock();
6122 }
6123 
6124 static struct pernet_operations __net_initdata default_device_ops = {
6125 	.exit = default_device_exit,
6126 	.exit_batch = default_device_exit_batch,
6127 };
6128 
6129 /*
6130  *	Initialize the DEV module. At boot time this walks the device list and
6131  *	unhooks any devices that fail to initialise (normally hardware not
6132  *	present) and leaves us with a valid list of present and active devices.
6133  *
6134  */
6135 
6136 /*
6137  *       This is called single threaded during boot, so no need
6138  *       to take the rtnl semaphore.
6139  */
6140 static int __init net_dev_init(void)
6141 {
6142 	int i, rc = -ENOMEM;
6143 
6144 	BUG_ON(!dev_boot_phase);
6145 
6146 	if (dev_proc_init())
6147 		goto out;
6148 
6149 	if (netdev_kobject_init())
6150 		goto out;
6151 
6152 	INIT_LIST_HEAD(&ptype_all);
6153 	for (i = 0; i < PTYPE_HASH_SIZE; i++)
6154 		INIT_LIST_HEAD(&ptype_base[i]);
6155 
6156 	if (register_pernet_subsys(&netdev_net_ops))
6157 		goto out;
6158 
6159 	/*
6160 	 *	Initialise the packet receive queues.
6161 	 */
6162 
6163 	for_each_possible_cpu(i) {
6164 		struct softnet_data *sd = &per_cpu(softnet_data, i);
6165 
6166 		memset(sd, 0, sizeof(*sd));
6167 		skb_queue_head_init(&sd->input_pkt_queue);
6168 		skb_queue_head_init(&sd->process_queue);
6169 		sd->completion_queue = NULL;
6170 		INIT_LIST_HEAD(&sd->poll_list);
6171 		sd->output_queue = NULL;
6172 		sd->output_queue_tailp = &sd->output_queue;
6173 #ifdef CONFIG_RPS
6174 		sd->csd.func = rps_trigger_softirq;
6175 		sd->csd.info = sd;
6176 		sd->csd.flags = 0;
6177 		sd->cpu = i;
6178 #endif
6179 
6180 		sd->backlog.poll = process_backlog;
6181 		sd->backlog.weight = weight_p;
6182 		sd->backlog.gro_list = NULL;
6183 		sd->backlog.gro_count = 0;
6184 	}
6185 
6186 	dev_boot_phase = 0;
6187 
6188 	/* The loopback device is special if any other network devices
6189 	 * is present in a network namespace the loopback device must
6190 	 * be present. Since we now dynamically allocate and free the
6191 	 * loopback device ensure this invariant is maintained by
6192 	 * keeping the loopback device as the first device on the
6193 	 * list of network devices.  Ensuring the loopback devices
6194 	 * is the first device that appears and the last network device
6195 	 * that disappears.
6196 	 */
6197 	if (register_pernet_device(&loopback_net_ops))
6198 		goto out;
6199 
6200 	if (register_pernet_device(&default_device_ops))
6201 		goto out;
6202 
6203 	open_softirq(NET_TX_SOFTIRQ, net_tx_action);
6204 	open_softirq(NET_RX_SOFTIRQ, net_rx_action);
6205 
6206 	hotcpu_notifier(dev_cpu_callback, 0);
6207 	dst_init();
6208 	dev_mcast_init();
6209 	rc = 0;
6210 out:
6211 	return rc;
6212 }
6213 
6214 subsys_initcall(net_dev_init);
6215 
6216 static int __init initialize_hashrnd(void)
6217 {
6218 	get_random_bytes(&hashrnd, sizeof(hashrnd));
6219 	return 0;
6220 }
6221 
6222 late_initcall_sync(initialize_hashrnd);
6223 
6224