xref: /linux/net/core/dev.c (revision 0ea6e61122196509af82cc4f36cbdaacbefb8227)
1 /*
2  * 	NET3	Protocol independent device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the non IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Florian la Roche <rzsfl@rz.uni-sb.de>
16  *		Alan Cox <gw4pts@gw4pts.ampr.org>
17  *		David Hinds <dahinds@users.sourceforge.net>
18  *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19  *		Adam Sulmicki <adam@cfar.umd.edu>
20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
21  *
22  *	Changes:
23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
24  *              			to 2 if register_netdev gets called
25  *              			before net_dev_init & also removed a
26  *              			few lines of code in the process.
27  *		Alan Cox	:	device private ioctl copies fields back.
28  *		Alan Cox	:	Transmit queue code does relevant
29  *					stunts to keep the queue safe.
30  *		Alan Cox	:	Fixed double lock.
31  *		Alan Cox	:	Fixed promisc NULL pointer trap
32  *		????????	:	Support the full private ioctl range
33  *		Alan Cox	:	Moved ioctl permission check into
34  *					drivers
35  *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
36  *		Alan Cox	:	100 backlog just doesn't cut it when
37  *					you start doing multicast video 8)
38  *		Alan Cox	:	Rewrote net_bh and list manager.
39  *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
40  *		Alan Cox	:	Took out transmit every packet pass
41  *					Saved a few bytes in the ioctl handler
42  *		Alan Cox	:	Network driver sets packet type before
43  *					calling netif_rx. Saves a function
44  *					call a packet.
45  *		Alan Cox	:	Hashed net_bh()
46  *		Richard Kooijman:	Timestamp fixes.
47  *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
48  *		Alan Cox	:	Device lock protection.
49  *		Alan Cox	: 	Fixed nasty side effect of device close
50  *					changes.
51  *		Rudi Cilibrasi	:	Pass the right thing to
52  *					set_mac_address()
53  *		Dave Miller	:	32bit quantity for the device lock to
54  *					make it work out on a Sparc.
55  *		Bjorn Ekwall	:	Added KERNELD hack.
56  *		Alan Cox	:	Cleaned up the backlog initialise.
57  *		Craig Metz	:	SIOCGIFCONF fix if space for under
58  *					1 device.
59  *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
60  *					is no device open function.
61  *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
62  *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
63  *		Cyrus Durgin	:	Cleaned for KMOD
64  *		Adam Sulmicki   :	Bug Fix : Network Device Unload
65  *					A network device unload needs to purge
66  *					the backlog queue.
67  *	Paul Rusty Russell	:	SIOCSIFNAME
68  *              Pekka Riikonen  :	Netdev boot-time settings code
69  *              Andrew Morton   :       Make unregister_netdevice wait
70  *              			indefinitely on dev->refcnt
71  * 		J Hadi Salim	:	- Backlog queue sampling
72  *				        - netif_rx() feedback
73  */
74 
75 #include <asm/uaccess.h>
76 #include <asm/system.h>
77 #include <linux/bitops.h>
78 #include <linux/capability.h>
79 #include <linux/cpu.h>
80 #include <linux/types.h>
81 #include <linux/kernel.h>
82 #include <linux/hash.h>
83 #include <linux/slab.h>
84 #include <linux/sched.h>
85 #include <linux/mutex.h>
86 #include <linux/string.h>
87 #include <linux/mm.h>
88 #include <linux/socket.h>
89 #include <linux/sockios.h>
90 #include <linux/errno.h>
91 #include <linux/interrupt.h>
92 #include <linux/if_ether.h>
93 #include <linux/netdevice.h>
94 #include <linux/etherdevice.h>
95 #include <linux/ethtool.h>
96 #include <linux/notifier.h>
97 #include <linux/skbuff.h>
98 #include <net/net_namespace.h>
99 #include <net/sock.h>
100 #include <linux/rtnetlink.h>
101 #include <linux/proc_fs.h>
102 #include <linux/seq_file.h>
103 #include <linux/stat.h>
104 #include <linux/if_bridge.h>
105 #include <linux/if_macvlan.h>
106 #include <net/dst.h>
107 #include <net/pkt_sched.h>
108 #include <net/checksum.h>
109 #include <net/xfrm.h>
110 #include <linux/highmem.h>
111 #include <linux/init.h>
112 #include <linux/kmod.h>
113 #include <linux/module.h>
114 #include <linux/netpoll.h>
115 #include <linux/rcupdate.h>
116 #include <linux/delay.h>
117 #include <net/wext.h>
118 #include <net/iw_handler.h>
119 #include <asm/current.h>
120 #include <linux/audit.h>
121 #include <linux/dmaengine.h>
122 #include <linux/err.h>
123 #include <linux/ctype.h>
124 #include <linux/if_arp.h>
125 #include <linux/if_vlan.h>
126 #include <linux/ip.h>
127 #include <net/ip.h>
128 #include <linux/ipv6.h>
129 #include <linux/in.h>
130 #include <linux/jhash.h>
131 #include <linux/random.h>
132 #include <trace/events/napi.h>
133 #include <linux/pci.h>
134 
135 #include "net-sysfs.h"
136 
137 /* Instead of increasing this, you should create a hash table. */
138 #define MAX_GRO_SKBS 8
139 
140 /* This should be increased if a protocol with a bigger head is added. */
141 #define GRO_MAX_HEAD (MAX_HEADER + 128)
142 
143 /*
144  *	The list of packet types we will receive (as opposed to discard)
145  *	and the routines to invoke.
146  *
147  *	Why 16. Because with 16 the only overlap we get on a hash of the
148  *	low nibble of the protocol value is RARP/SNAP/X.25.
149  *
150  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
151  *             sure which should go first, but I bet it won't make much
152  *             difference if we are running VLANs.  The good news is that
153  *             this protocol won't be in the list unless compiled in, so
154  *             the average user (w/out VLANs) will not be adversely affected.
155  *             --BLG
156  *
157  *		0800	IP
158  *		8100    802.1Q VLAN
159  *		0001	802.3
160  *		0002	AX.25
161  *		0004	802.2
162  *		8035	RARP
163  *		0005	SNAP
164  *		0805	X.25
165  *		0806	ARP
166  *		8137	IPX
167  *		0009	Localtalk
168  *		86DD	IPv6
169  */
170 
171 #define PTYPE_HASH_SIZE	(16)
172 #define PTYPE_HASH_MASK	(PTYPE_HASH_SIZE - 1)
173 
174 static DEFINE_SPINLOCK(ptype_lock);
175 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
176 static struct list_head ptype_all __read_mostly;	/* Taps */
177 
178 /*
179  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
180  * semaphore.
181  *
182  * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
183  *
184  * Writers must hold the rtnl semaphore while they loop through the
185  * dev_base_head list, and hold dev_base_lock for writing when they do the
186  * actual updates.  This allows pure readers to access the list even
187  * while a writer is preparing to update it.
188  *
189  * To put it another way, dev_base_lock is held for writing only to
190  * protect against pure readers; the rtnl semaphore provides the
191  * protection against other writers.
192  *
193  * See, for example usages, register_netdevice() and
194  * unregister_netdevice(), which must be called with the rtnl
195  * semaphore held.
196  */
197 DEFINE_RWLOCK(dev_base_lock);
198 EXPORT_SYMBOL(dev_base_lock);
199 
200 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
201 {
202 	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
203 	return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
204 }
205 
206 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
207 {
208 	return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
209 }
210 
211 static inline void rps_lock(struct softnet_data *sd)
212 {
213 #ifdef CONFIG_RPS
214 	spin_lock(&sd->input_pkt_queue.lock);
215 #endif
216 }
217 
218 static inline void rps_unlock(struct softnet_data *sd)
219 {
220 #ifdef CONFIG_RPS
221 	spin_unlock(&sd->input_pkt_queue.lock);
222 #endif
223 }
224 
225 /* Device list insertion */
226 static int list_netdevice(struct net_device *dev)
227 {
228 	struct net *net = dev_net(dev);
229 
230 	ASSERT_RTNL();
231 
232 	write_lock_bh(&dev_base_lock);
233 	list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
234 	hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
235 	hlist_add_head_rcu(&dev->index_hlist,
236 			   dev_index_hash(net, dev->ifindex));
237 	write_unlock_bh(&dev_base_lock);
238 	return 0;
239 }
240 
241 /* Device list removal
242  * caller must respect a RCU grace period before freeing/reusing dev
243  */
244 static void unlist_netdevice(struct net_device *dev)
245 {
246 	ASSERT_RTNL();
247 
248 	/* Unlink dev from the device chain */
249 	write_lock_bh(&dev_base_lock);
250 	list_del_rcu(&dev->dev_list);
251 	hlist_del_rcu(&dev->name_hlist);
252 	hlist_del_rcu(&dev->index_hlist);
253 	write_unlock_bh(&dev_base_lock);
254 }
255 
256 /*
257  *	Our notifier list
258  */
259 
260 static RAW_NOTIFIER_HEAD(netdev_chain);
261 
262 /*
263  *	Device drivers call our routines to queue packets here. We empty the
264  *	queue in the local softnet handler.
265  */
266 
267 DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
268 EXPORT_PER_CPU_SYMBOL(softnet_data);
269 
270 #ifdef CONFIG_LOCKDEP
271 /*
272  * register_netdevice() inits txq->_xmit_lock and sets lockdep class
273  * according to dev->type
274  */
275 static const unsigned short netdev_lock_type[] =
276 	{ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
277 	 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
278 	 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
279 	 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
280 	 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
281 	 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
282 	 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
283 	 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
284 	 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
285 	 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
286 	 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
287 	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
288 	 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
289 	 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
290 	 ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154,
291 	 ARPHRD_VOID, ARPHRD_NONE};
292 
293 static const char *const netdev_lock_name[] =
294 	{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
295 	 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
296 	 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
297 	 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
298 	 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
299 	 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
300 	 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
301 	 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
302 	 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
303 	 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
304 	 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
305 	 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
306 	 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
307 	 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
308 	 "_xmit_PHONET_PIPE", "_xmit_IEEE802154",
309 	 "_xmit_VOID", "_xmit_NONE"};
310 
311 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
312 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
313 
314 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
315 {
316 	int i;
317 
318 	for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
319 		if (netdev_lock_type[i] == dev_type)
320 			return i;
321 	/* the last key is used by default */
322 	return ARRAY_SIZE(netdev_lock_type) - 1;
323 }
324 
325 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
326 						 unsigned short dev_type)
327 {
328 	int i;
329 
330 	i = netdev_lock_pos(dev_type);
331 	lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
332 				   netdev_lock_name[i]);
333 }
334 
335 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
336 {
337 	int i;
338 
339 	i = netdev_lock_pos(dev->type);
340 	lockdep_set_class_and_name(&dev->addr_list_lock,
341 				   &netdev_addr_lock_key[i],
342 				   netdev_lock_name[i]);
343 }
344 #else
345 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
346 						 unsigned short dev_type)
347 {
348 }
349 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
350 {
351 }
352 #endif
353 
354 /*******************************************************************************
355 
356 		Protocol management and registration routines
357 
358 *******************************************************************************/
359 
360 /*
361  *	Add a protocol ID to the list. Now that the input handler is
362  *	smarter we can dispense with all the messy stuff that used to be
363  *	here.
364  *
365  *	BEWARE!!! Protocol handlers, mangling input packets,
366  *	MUST BE last in hash buckets and checking protocol handlers
367  *	MUST start from promiscuous ptype_all chain in net_bh.
368  *	It is true now, do not change it.
369  *	Explanation follows: if protocol handler, mangling packet, will
370  *	be the first on list, it is not able to sense, that packet
371  *	is cloned and should be copied-on-write, so that it will
372  *	change it and subsequent readers will get broken packet.
373  *							--ANK (980803)
374  */
375 
376 /**
377  *	dev_add_pack - add packet handler
378  *	@pt: packet type declaration
379  *
380  *	Add a protocol handler to the networking stack. The passed &packet_type
381  *	is linked into kernel lists and may not be freed until it has been
382  *	removed from the kernel lists.
383  *
384  *	This call does not sleep therefore it can not
385  *	guarantee all CPU's that are in middle of receiving packets
386  *	will see the new packet type (until the next received packet).
387  */
388 
389 void dev_add_pack(struct packet_type *pt)
390 {
391 	int hash;
392 
393 	spin_lock_bh(&ptype_lock);
394 	if (pt->type == htons(ETH_P_ALL))
395 		list_add_rcu(&pt->list, &ptype_all);
396 	else {
397 		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
398 		list_add_rcu(&pt->list, &ptype_base[hash]);
399 	}
400 	spin_unlock_bh(&ptype_lock);
401 }
402 EXPORT_SYMBOL(dev_add_pack);
403 
404 /**
405  *	__dev_remove_pack	 - remove packet handler
406  *	@pt: packet type declaration
407  *
408  *	Remove a protocol handler that was previously added to the kernel
409  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
410  *	from the kernel lists and can be freed or reused once this function
411  *	returns.
412  *
413  *      The packet type might still be in use by receivers
414  *	and must not be freed until after all the CPU's have gone
415  *	through a quiescent state.
416  */
417 void __dev_remove_pack(struct packet_type *pt)
418 {
419 	struct list_head *head;
420 	struct packet_type *pt1;
421 
422 	spin_lock_bh(&ptype_lock);
423 
424 	if (pt->type == htons(ETH_P_ALL))
425 		head = &ptype_all;
426 	else
427 		head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
428 
429 	list_for_each_entry(pt1, head, list) {
430 		if (pt == pt1) {
431 			list_del_rcu(&pt->list);
432 			goto out;
433 		}
434 	}
435 
436 	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
437 out:
438 	spin_unlock_bh(&ptype_lock);
439 }
440 EXPORT_SYMBOL(__dev_remove_pack);
441 
442 /**
443  *	dev_remove_pack	 - remove packet handler
444  *	@pt: packet type declaration
445  *
446  *	Remove a protocol handler that was previously added to the kernel
447  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
448  *	from the kernel lists and can be freed or reused once this function
449  *	returns.
450  *
451  *	This call sleeps to guarantee that no CPU is looking at the packet
452  *	type after return.
453  */
454 void dev_remove_pack(struct packet_type *pt)
455 {
456 	__dev_remove_pack(pt);
457 
458 	synchronize_net();
459 }
460 EXPORT_SYMBOL(dev_remove_pack);
461 
462 /******************************************************************************
463 
464 		      Device Boot-time Settings Routines
465 
466 *******************************************************************************/
467 
468 /* Boot time configuration table */
469 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
470 
471 /**
472  *	netdev_boot_setup_add	- add new setup entry
473  *	@name: name of the device
474  *	@map: configured settings for the device
475  *
476  *	Adds new setup entry to the dev_boot_setup list.  The function
477  *	returns 0 on error and 1 on success.  This is a generic routine to
478  *	all netdevices.
479  */
480 static int netdev_boot_setup_add(char *name, struct ifmap *map)
481 {
482 	struct netdev_boot_setup *s;
483 	int i;
484 
485 	s = dev_boot_setup;
486 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
487 		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
488 			memset(s[i].name, 0, sizeof(s[i].name));
489 			strlcpy(s[i].name, name, IFNAMSIZ);
490 			memcpy(&s[i].map, map, sizeof(s[i].map));
491 			break;
492 		}
493 	}
494 
495 	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
496 }
497 
498 /**
499  *	netdev_boot_setup_check	- check boot time settings
500  *	@dev: the netdevice
501  *
502  * 	Check boot time settings for the device.
503  *	The found settings are set for the device to be used
504  *	later in the device probing.
505  *	Returns 0 if no settings found, 1 if they are.
506  */
507 int netdev_boot_setup_check(struct net_device *dev)
508 {
509 	struct netdev_boot_setup *s = dev_boot_setup;
510 	int i;
511 
512 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
513 		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
514 		    !strcmp(dev->name, s[i].name)) {
515 			dev->irq 	= s[i].map.irq;
516 			dev->base_addr 	= s[i].map.base_addr;
517 			dev->mem_start 	= s[i].map.mem_start;
518 			dev->mem_end 	= s[i].map.mem_end;
519 			return 1;
520 		}
521 	}
522 	return 0;
523 }
524 EXPORT_SYMBOL(netdev_boot_setup_check);
525 
526 
527 /**
528  *	netdev_boot_base	- get address from boot time settings
529  *	@prefix: prefix for network device
530  *	@unit: id for network device
531  *
532  * 	Check boot time settings for the base address of device.
533  *	The found settings are set for the device to be used
534  *	later in the device probing.
535  *	Returns 0 if no settings found.
536  */
537 unsigned long netdev_boot_base(const char *prefix, int unit)
538 {
539 	const struct netdev_boot_setup *s = dev_boot_setup;
540 	char name[IFNAMSIZ];
541 	int i;
542 
543 	sprintf(name, "%s%d", prefix, unit);
544 
545 	/*
546 	 * If device already registered then return base of 1
547 	 * to indicate not to probe for this interface
548 	 */
549 	if (__dev_get_by_name(&init_net, name))
550 		return 1;
551 
552 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
553 		if (!strcmp(name, s[i].name))
554 			return s[i].map.base_addr;
555 	return 0;
556 }
557 
558 /*
559  * Saves at boot time configured settings for any netdevice.
560  */
561 int __init netdev_boot_setup(char *str)
562 {
563 	int ints[5];
564 	struct ifmap map;
565 
566 	str = get_options(str, ARRAY_SIZE(ints), ints);
567 	if (!str || !*str)
568 		return 0;
569 
570 	/* Save settings */
571 	memset(&map, 0, sizeof(map));
572 	if (ints[0] > 0)
573 		map.irq = ints[1];
574 	if (ints[0] > 1)
575 		map.base_addr = ints[2];
576 	if (ints[0] > 2)
577 		map.mem_start = ints[3];
578 	if (ints[0] > 3)
579 		map.mem_end = ints[4];
580 
581 	/* Add new entry to the list */
582 	return netdev_boot_setup_add(str, &map);
583 }
584 
585 __setup("netdev=", netdev_boot_setup);
586 
587 /*******************************************************************************
588 
589 			    Device Interface Subroutines
590 
591 *******************************************************************************/
592 
593 /**
594  *	__dev_get_by_name	- find a device by its name
595  *	@net: the applicable net namespace
596  *	@name: name to find
597  *
598  *	Find an interface by name. Must be called under RTNL semaphore
599  *	or @dev_base_lock. If the name is found a pointer to the device
600  *	is returned. If the name is not found then %NULL is returned. The
601  *	reference counters are not incremented so the caller must be
602  *	careful with locks.
603  */
604 
605 struct net_device *__dev_get_by_name(struct net *net, const char *name)
606 {
607 	struct hlist_node *p;
608 	struct net_device *dev;
609 	struct hlist_head *head = dev_name_hash(net, name);
610 
611 	hlist_for_each_entry(dev, p, head, name_hlist)
612 		if (!strncmp(dev->name, name, IFNAMSIZ))
613 			return dev;
614 
615 	return NULL;
616 }
617 EXPORT_SYMBOL(__dev_get_by_name);
618 
619 /**
620  *	dev_get_by_name_rcu	- find a device by its name
621  *	@net: the applicable net namespace
622  *	@name: name to find
623  *
624  *	Find an interface by name.
625  *	If the name is found a pointer to the device is returned.
626  * 	If the name is not found then %NULL is returned.
627  *	The reference counters are not incremented so the caller must be
628  *	careful with locks. The caller must hold RCU lock.
629  */
630 
631 struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
632 {
633 	struct hlist_node *p;
634 	struct net_device *dev;
635 	struct hlist_head *head = dev_name_hash(net, name);
636 
637 	hlist_for_each_entry_rcu(dev, p, head, name_hlist)
638 		if (!strncmp(dev->name, name, IFNAMSIZ))
639 			return dev;
640 
641 	return NULL;
642 }
643 EXPORT_SYMBOL(dev_get_by_name_rcu);
644 
645 /**
646  *	dev_get_by_name		- find a device by its name
647  *	@net: the applicable net namespace
648  *	@name: name to find
649  *
650  *	Find an interface by name. This can be called from any
651  *	context and does its own locking. The returned handle has
652  *	the usage count incremented and the caller must use dev_put() to
653  *	release it when it is no longer needed. %NULL is returned if no
654  *	matching device is found.
655  */
656 
657 struct net_device *dev_get_by_name(struct net *net, const char *name)
658 {
659 	struct net_device *dev;
660 
661 	rcu_read_lock();
662 	dev = dev_get_by_name_rcu(net, name);
663 	if (dev)
664 		dev_hold(dev);
665 	rcu_read_unlock();
666 	return dev;
667 }
668 EXPORT_SYMBOL(dev_get_by_name);
669 
670 /**
671  *	__dev_get_by_index - find a device by its ifindex
672  *	@net: the applicable net namespace
673  *	@ifindex: index of device
674  *
675  *	Search for an interface by index. Returns %NULL if the device
676  *	is not found or a pointer to the device. The device has not
677  *	had its reference counter increased so the caller must be careful
678  *	about locking. The caller must hold either the RTNL semaphore
679  *	or @dev_base_lock.
680  */
681 
682 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
683 {
684 	struct hlist_node *p;
685 	struct net_device *dev;
686 	struct hlist_head *head = dev_index_hash(net, ifindex);
687 
688 	hlist_for_each_entry(dev, p, head, index_hlist)
689 		if (dev->ifindex == ifindex)
690 			return dev;
691 
692 	return NULL;
693 }
694 EXPORT_SYMBOL(__dev_get_by_index);
695 
696 /**
697  *	dev_get_by_index_rcu - find a device by its ifindex
698  *	@net: the applicable net namespace
699  *	@ifindex: index of device
700  *
701  *	Search for an interface by index. Returns %NULL if the device
702  *	is not found or a pointer to the device. The device has not
703  *	had its reference counter increased so the caller must be careful
704  *	about locking. The caller must hold RCU lock.
705  */
706 
707 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
708 {
709 	struct hlist_node *p;
710 	struct net_device *dev;
711 	struct hlist_head *head = dev_index_hash(net, ifindex);
712 
713 	hlist_for_each_entry_rcu(dev, p, head, index_hlist)
714 		if (dev->ifindex == ifindex)
715 			return dev;
716 
717 	return NULL;
718 }
719 EXPORT_SYMBOL(dev_get_by_index_rcu);
720 
721 
722 /**
723  *	dev_get_by_index - find a device by its ifindex
724  *	@net: the applicable net namespace
725  *	@ifindex: index of device
726  *
727  *	Search for an interface by index. Returns NULL if the device
728  *	is not found or a pointer to the device. The device returned has
729  *	had a reference added and the pointer is safe until the user calls
730  *	dev_put to indicate they have finished with it.
731  */
732 
733 struct net_device *dev_get_by_index(struct net *net, int ifindex)
734 {
735 	struct net_device *dev;
736 
737 	rcu_read_lock();
738 	dev = dev_get_by_index_rcu(net, ifindex);
739 	if (dev)
740 		dev_hold(dev);
741 	rcu_read_unlock();
742 	return dev;
743 }
744 EXPORT_SYMBOL(dev_get_by_index);
745 
746 /**
747  *	dev_getbyhwaddr - find a device by its hardware address
748  *	@net: the applicable net namespace
749  *	@type: media type of device
750  *	@ha: hardware address
751  *
752  *	Search for an interface by MAC address. Returns NULL if the device
753  *	is not found or a pointer to the device. The caller must hold the
754  *	rtnl semaphore. The returned device has not had its ref count increased
755  *	and the caller must therefore be careful about locking
756  *
757  *	BUGS:
758  *	If the API was consistent this would be __dev_get_by_hwaddr
759  */
760 
761 struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
762 {
763 	struct net_device *dev;
764 
765 	ASSERT_RTNL();
766 
767 	for_each_netdev(net, dev)
768 		if (dev->type == type &&
769 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
770 			return dev;
771 
772 	return NULL;
773 }
774 EXPORT_SYMBOL(dev_getbyhwaddr);
775 
776 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
777 {
778 	struct net_device *dev;
779 
780 	ASSERT_RTNL();
781 	for_each_netdev(net, dev)
782 		if (dev->type == type)
783 			return dev;
784 
785 	return NULL;
786 }
787 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
788 
789 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
790 {
791 	struct net_device *dev, *ret = NULL;
792 
793 	rcu_read_lock();
794 	for_each_netdev_rcu(net, dev)
795 		if (dev->type == type) {
796 			dev_hold(dev);
797 			ret = dev;
798 			break;
799 		}
800 	rcu_read_unlock();
801 	return ret;
802 }
803 EXPORT_SYMBOL(dev_getfirstbyhwtype);
804 
805 /**
806  *	dev_get_by_flags - find any device with given flags
807  *	@net: the applicable net namespace
808  *	@if_flags: IFF_* values
809  *	@mask: bitmask of bits in if_flags to check
810  *
811  *	Search for any interface with the given flags. Returns NULL if a device
812  *	is not found or a pointer to the device. The device returned has
813  *	had a reference added and the pointer is safe until the user calls
814  *	dev_put to indicate they have finished with it.
815  */
816 
817 struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags,
818 				    unsigned short mask)
819 {
820 	struct net_device *dev, *ret;
821 
822 	ret = NULL;
823 	rcu_read_lock();
824 	for_each_netdev_rcu(net, dev) {
825 		if (((dev->flags ^ if_flags) & mask) == 0) {
826 			dev_hold(dev);
827 			ret = dev;
828 			break;
829 		}
830 	}
831 	rcu_read_unlock();
832 	return ret;
833 }
834 EXPORT_SYMBOL(dev_get_by_flags);
835 
836 /**
837  *	dev_valid_name - check if name is okay for network device
838  *	@name: name string
839  *
840  *	Network device names need to be valid file names to
841  *	to allow sysfs to work.  We also disallow any kind of
842  *	whitespace.
843  */
844 int dev_valid_name(const char *name)
845 {
846 	if (*name == '\0')
847 		return 0;
848 	if (strlen(name) >= IFNAMSIZ)
849 		return 0;
850 	if (!strcmp(name, ".") || !strcmp(name, ".."))
851 		return 0;
852 
853 	while (*name) {
854 		if (*name == '/' || isspace(*name))
855 			return 0;
856 		name++;
857 	}
858 	return 1;
859 }
860 EXPORT_SYMBOL(dev_valid_name);
861 
862 /**
863  *	__dev_alloc_name - allocate a name for a device
864  *	@net: network namespace to allocate the device name in
865  *	@name: name format string
866  *	@buf:  scratch buffer and result name string
867  *
868  *	Passed a format string - eg "lt%d" it will try and find a suitable
869  *	id. It scans list of devices to build up a free map, then chooses
870  *	the first empty slot. The caller must hold the dev_base or rtnl lock
871  *	while allocating the name and adding the device in order to avoid
872  *	duplicates.
873  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
874  *	Returns the number of the unit assigned or a negative errno code.
875  */
876 
877 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
878 {
879 	int i = 0;
880 	const char *p;
881 	const int max_netdevices = 8*PAGE_SIZE;
882 	unsigned long *inuse;
883 	struct net_device *d;
884 
885 	p = strnchr(name, IFNAMSIZ-1, '%');
886 	if (p) {
887 		/*
888 		 * Verify the string as this thing may have come from
889 		 * the user.  There must be either one "%d" and no other "%"
890 		 * characters.
891 		 */
892 		if (p[1] != 'd' || strchr(p + 2, '%'))
893 			return -EINVAL;
894 
895 		/* Use one page as a bit array of possible slots */
896 		inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
897 		if (!inuse)
898 			return -ENOMEM;
899 
900 		for_each_netdev(net, d) {
901 			if (!sscanf(d->name, name, &i))
902 				continue;
903 			if (i < 0 || i >= max_netdevices)
904 				continue;
905 
906 			/*  avoid cases where sscanf is not exact inverse of printf */
907 			snprintf(buf, IFNAMSIZ, name, i);
908 			if (!strncmp(buf, d->name, IFNAMSIZ))
909 				set_bit(i, inuse);
910 		}
911 
912 		i = find_first_zero_bit(inuse, max_netdevices);
913 		free_page((unsigned long) inuse);
914 	}
915 
916 	if (buf != name)
917 		snprintf(buf, IFNAMSIZ, name, i);
918 	if (!__dev_get_by_name(net, buf))
919 		return i;
920 
921 	/* It is possible to run out of possible slots
922 	 * when the name is long and there isn't enough space left
923 	 * for the digits, or if all bits are used.
924 	 */
925 	return -ENFILE;
926 }
927 
928 /**
929  *	dev_alloc_name - allocate a name for a device
930  *	@dev: device
931  *	@name: name format string
932  *
933  *	Passed a format string - eg "lt%d" it will try and find a suitable
934  *	id. It scans list of devices to build up a free map, then chooses
935  *	the first empty slot. The caller must hold the dev_base or rtnl lock
936  *	while allocating the name and adding the device in order to avoid
937  *	duplicates.
938  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
939  *	Returns the number of the unit assigned or a negative errno code.
940  */
941 
942 int dev_alloc_name(struct net_device *dev, const char *name)
943 {
944 	char buf[IFNAMSIZ];
945 	struct net *net;
946 	int ret;
947 
948 	BUG_ON(!dev_net(dev));
949 	net = dev_net(dev);
950 	ret = __dev_alloc_name(net, name, buf);
951 	if (ret >= 0)
952 		strlcpy(dev->name, buf, IFNAMSIZ);
953 	return ret;
954 }
955 EXPORT_SYMBOL(dev_alloc_name);
956 
957 static int dev_get_valid_name(struct net_device *dev, const char *name, bool fmt)
958 {
959 	struct net *net;
960 
961 	BUG_ON(!dev_net(dev));
962 	net = dev_net(dev);
963 
964 	if (!dev_valid_name(name))
965 		return -EINVAL;
966 
967 	if (fmt && strchr(name, '%'))
968 		return dev_alloc_name(dev, name);
969 	else if (__dev_get_by_name(net, name))
970 		return -EEXIST;
971 	else if (dev->name != name)
972 		strlcpy(dev->name, name, IFNAMSIZ);
973 
974 	return 0;
975 }
976 
977 /**
978  *	dev_change_name - change name of a device
979  *	@dev: device
980  *	@newname: name (or format string) must be at least IFNAMSIZ
981  *
982  *	Change name of a device, can pass format strings "eth%d".
983  *	for wildcarding.
984  */
985 int dev_change_name(struct net_device *dev, const char *newname)
986 {
987 	char oldname[IFNAMSIZ];
988 	int err = 0;
989 	int ret;
990 	struct net *net;
991 
992 	ASSERT_RTNL();
993 	BUG_ON(!dev_net(dev));
994 
995 	net = dev_net(dev);
996 	if (dev->flags & IFF_UP)
997 		return -EBUSY;
998 
999 	if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
1000 		return 0;
1001 
1002 	memcpy(oldname, dev->name, IFNAMSIZ);
1003 
1004 	err = dev_get_valid_name(dev, newname, 1);
1005 	if (err < 0)
1006 		return err;
1007 
1008 rollback:
1009 	ret = device_rename(&dev->dev, dev->name);
1010 	if (ret) {
1011 		memcpy(dev->name, oldname, IFNAMSIZ);
1012 		return ret;
1013 	}
1014 
1015 	write_lock_bh(&dev_base_lock);
1016 	hlist_del(&dev->name_hlist);
1017 	write_unlock_bh(&dev_base_lock);
1018 
1019 	synchronize_rcu();
1020 
1021 	write_lock_bh(&dev_base_lock);
1022 	hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
1023 	write_unlock_bh(&dev_base_lock);
1024 
1025 	ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
1026 	ret = notifier_to_errno(ret);
1027 
1028 	if (ret) {
1029 		/* err >= 0 after dev_alloc_name() or stores the first errno */
1030 		if (err >= 0) {
1031 			err = ret;
1032 			memcpy(dev->name, oldname, IFNAMSIZ);
1033 			goto rollback;
1034 		} else {
1035 			printk(KERN_ERR
1036 			       "%s: name change rollback failed: %d.\n",
1037 			       dev->name, ret);
1038 		}
1039 	}
1040 
1041 	return err;
1042 }
1043 
1044 /**
1045  *	dev_set_alias - change ifalias of a device
1046  *	@dev: device
1047  *	@alias: name up to IFALIASZ
1048  *	@len: limit of bytes to copy from info
1049  *
1050  *	Set ifalias for a device,
1051  */
1052 int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1053 {
1054 	ASSERT_RTNL();
1055 
1056 	if (len >= IFALIASZ)
1057 		return -EINVAL;
1058 
1059 	if (!len) {
1060 		if (dev->ifalias) {
1061 			kfree(dev->ifalias);
1062 			dev->ifalias = NULL;
1063 		}
1064 		return 0;
1065 	}
1066 
1067 	dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
1068 	if (!dev->ifalias)
1069 		return -ENOMEM;
1070 
1071 	strlcpy(dev->ifalias, alias, len+1);
1072 	return len;
1073 }
1074 
1075 
1076 /**
1077  *	netdev_features_change - device changes features
1078  *	@dev: device to cause notification
1079  *
1080  *	Called to indicate a device has changed features.
1081  */
1082 void netdev_features_change(struct net_device *dev)
1083 {
1084 	call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
1085 }
1086 EXPORT_SYMBOL(netdev_features_change);
1087 
1088 /**
1089  *	netdev_state_change - device changes state
1090  *	@dev: device to cause notification
1091  *
1092  *	Called to indicate a device has changed state. This function calls
1093  *	the notifier chains for netdev_chain and sends a NEWLINK message
1094  *	to the routing socket.
1095  */
1096 void netdev_state_change(struct net_device *dev)
1097 {
1098 	if (dev->flags & IFF_UP) {
1099 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
1100 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1101 	}
1102 }
1103 EXPORT_SYMBOL(netdev_state_change);
1104 
1105 int netdev_bonding_change(struct net_device *dev, unsigned long event)
1106 {
1107 	return call_netdevice_notifiers(event, dev);
1108 }
1109 EXPORT_SYMBOL(netdev_bonding_change);
1110 
1111 /**
1112  *	dev_load 	- load a network module
1113  *	@net: the applicable net namespace
1114  *	@name: name of interface
1115  *
1116  *	If a network interface is not present and the process has suitable
1117  *	privileges this function loads the module. If module loading is not
1118  *	available in this kernel then it becomes a nop.
1119  */
1120 
1121 void dev_load(struct net *net, const char *name)
1122 {
1123 	struct net_device *dev;
1124 
1125 	rcu_read_lock();
1126 	dev = dev_get_by_name_rcu(net, name);
1127 	rcu_read_unlock();
1128 
1129 	if (!dev && capable(CAP_NET_ADMIN))
1130 		request_module("%s", name);
1131 }
1132 EXPORT_SYMBOL(dev_load);
1133 
1134 static int __dev_open(struct net_device *dev)
1135 {
1136 	const struct net_device_ops *ops = dev->netdev_ops;
1137 	int ret;
1138 
1139 	ASSERT_RTNL();
1140 
1141 	/*
1142 	 *	Is it even present?
1143 	 */
1144 	if (!netif_device_present(dev))
1145 		return -ENODEV;
1146 
1147 	ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1148 	ret = notifier_to_errno(ret);
1149 	if (ret)
1150 		return ret;
1151 
1152 	/*
1153 	 *	Call device private open method
1154 	 */
1155 	set_bit(__LINK_STATE_START, &dev->state);
1156 
1157 	if (ops->ndo_validate_addr)
1158 		ret = ops->ndo_validate_addr(dev);
1159 
1160 	if (!ret && ops->ndo_open)
1161 		ret = ops->ndo_open(dev);
1162 
1163 	/*
1164 	 *	If it went open OK then:
1165 	 */
1166 
1167 	if (ret)
1168 		clear_bit(__LINK_STATE_START, &dev->state);
1169 	else {
1170 		/*
1171 		 *	Set the flags.
1172 		 */
1173 		dev->flags |= IFF_UP;
1174 
1175 		/*
1176 		 *	Enable NET_DMA
1177 		 */
1178 		net_dmaengine_get();
1179 
1180 		/*
1181 		 *	Initialize multicasting status
1182 		 */
1183 		dev_set_rx_mode(dev);
1184 
1185 		/*
1186 		 *	Wakeup transmit queue engine
1187 		 */
1188 		dev_activate(dev);
1189 	}
1190 
1191 	return ret;
1192 }
1193 
1194 /**
1195  *	dev_open	- prepare an interface for use.
1196  *	@dev:	device to open
1197  *
1198  *	Takes a device from down to up state. The device's private open
1199  *	function is invoked and then the multicast lists are loaded. Finally
1200  *	the device is moved into the up state and a %NETDEV_UP message is
1201  *	sent to the netdev notifier chain.
1202  *
1203  *	Calling this function on an active interface is a nop. On a failure
1204  *	a negative errno code is returned.
1205  */
1206 int dev_open(struct net_device *dev)
1207 {
1208 	int ret;
1209 
1210 	/*
1211 	 *	Is it already up?
1212 	 */
1213 	if (dev->flags & IFF_UP)
1214 		return 0;
1215 
1216 	/*
1217 	 *	Open device
1218 	 */
1219 	ret = __dev_open(dev);
1220 	if (ret < 0)
1221 		return ret;
1222 
1223 	/*
1224 	 *	... and announce new interface.
1225 	 */
1226 	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1227 	call_netdevice_notifiers(NETDEV_UP, dev);
1228 
1229 	return ret;
1230 }
1231 EXPORT_SYMBOL(dev_open);
1232 
1233 static int __dev_close(struct net_device *dev)
1234 {
1235 	const struct net_device_ops *ops = dev->netdev_ops;
1236 
1237 	ASSERT_RTNL();
1238 	might_sleep();
1239 
1240 	/*
1241 	 *	Tell people we are going down, so that they can
1242 	 *	prepare to death, when device is still operating.
1243 	 */
1244 	call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1245 
1246 	clear_bit(__LINK_STATE_START, &dev->state);
1247 
1248 	/* Synchronize to scheduled poll. We cannot touch poll list,
1249 	 * it can be even on different cpu. So just clear netif_running().
1250 	 *
1251 	 * dev->stop() will invoke napi_disable() on all of it's
1252 	 * napi_struct instances on this device.
1253 	 */
1254 	smp_mb__after_clear_bit(); /* Commit netif_running(). */
1255 
1256 	dev_deactivate(dev);
1257 
1258 	/*
1259 	 *	Call the device specific close. This cannot fail.
1260 	 *	Only if device is UP
1261 	 *
1262 	 *	We allow it to be called even after a DETACH hot-plug
1263 	 *	event.
1264 	 */
1265 	if (ops->ndo_stop)
1266 		ops->ndo_stop(dev);
1267 
1268 	/*
1269 	 *	Device is now down.
1270 	 */
1271 
1272 	dev->flags &= ~IFF_UP;
1273 
1274 	/*
1275 	 *	Shutdown NET_DMA
1276 	 */
1277 	net_dmaengine_put();
1278 
1279 	return 0;
1280 }
1281 
1282 /**
1283  *	dev_close - shutdown an interface.
1284  *	@dev: device to shutdown
1285  *
1286  *	This function moves an active device into down state. A
1287  *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1288  *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1289  *	chain.
1290  */
1291 int dev_close(struct net_device *dev)
1292 {
1293 	if (!(dev->flags & IFF_UP))
1294 		return 0;
1295 
1296 	__dev_close(dev);
1297 
1298 	/*
1299 	 * Tell people we are down
1300 	 */
1301 	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1302 	call_netdevice_notifiers(NETDEV_DOWN, dev);
1303 
1304 	return 0;
1305 }
1306 EXPORT_SYMBOL(dev_close);
1307 
1308 
1309 /**
1310  *	dev_disable_lro - disable Large Receive Offload on a device
1311  *	@dev: device
1312  *
1313  *	Disable Large Receive Offload (LRO) on a net device.  Must be
1314  *	called under RTNL.  This is needed if received packets may be
1315  *	forwarded to another interface.
1316  */
1317 void dev_disable_lro(struct net_device *dev)
1318 {
1319 	if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1320 	    dev->ethtool_ops->set_flags) {
1321 		u32 flags = dev->ethtool_ops->get_flags(dev);
1322 		if (flags & ETH_FLAG_LRO) {
1323 			flags &= ~ETH_FLAG_LRO;
1324 			dev->ethtool_ops->set_flags(dev, flags);
1325 		}
1326 	}
1327 	WARN_ON(dev->features & NETIF_F_LRO);
1328 }
1329 EXPORT_SYMBOL(dev_disable_lro);
1330 
1331 
1332 static int dev_boot_phase = 1;
1333 
1334 /*
1335  *	Device change register/unregister. These are not inline or static
1336  *	as we export them to the world.
1337  */
1338 
1339 /**
1340  *	register_netdevice_notifier - register a network notifier block
1341  *	@nb: notifier
1342  *
1343  *	Register a notifier to be called when network device events occur.
1344  *	The notifier passed is linked into the kernel structures and must
1345  *	not be reused until it has been unregistered. A negative errno code
1346  *	is returned on a failure.
1347  *
1348  * 	When registered all registration and up events are replayed
1349  *	to the new notifier to allow device to have a race free
1350  *	view of the network device list.
1351  */
1352 
1353 int register_netdevice_notifier(struct notifier_block *nb)
1354 {
1355 	struct net_device *dev;
1356 	struct net_device *last;
1357 	struct net *net;
1358 	int err;
1359 
1360 	rtnl_lock();
1361 	err = raw_notifier_chain_register(&netdev_chain, nb);
1362 	if (err)
1363 		goto unlock;
1364 	if (dev_boot_phase)
1365 		goto unlock;
1366 	for_each_net(net) {
1367 		for_each_netdev(net, dev) {
1368 			err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1369 			err = notifier_to_errno(err);
1370 			if (err)
1371 				goto rollback;
1372 
1373 			if (!(dev->flags & IFF_UP))
1374 				continue;
1375 
1376 			nb->notifier_call(nb, NETDEV_UP, dev);
1377 		}
1378 	}
1379 
1380 unlock:
1381 	rtnl_unlock();
1382 	return err;
1383 
1384 rollback:
1385 	last = dev;
1386 	for_each_net(net) {
1387 		for_each_netdev(net, dev) {
1388 			if (dev == last)
1389 				break;
1390 
1391 			if (dev->flags & IFF_UP) {
1392 				nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1393 				nb->notifier_call(nb, NETDEV_DOWN, dev);
1394 			}
1395 			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1396 			nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
1397 		}
1398 	}
1399 
1400 	raw_notifier_chain_unregister(&netdev_chain, nb);
1401 	goto unlock;
1402 }
1403 EXPORT_SYMBOL(register_netdevice_notifier);
1404 
1405 /**
1406  *	unregister_netdevice_notifier - unregister a network notifier block
1407  *	@nb: notifier
1408  *
1409  *	Unregister a notifier previously registered by
1410  *	register_netdevice_notifier(). The notifier is unlinked into the
1411  *	kernel structures and may then be reused. A negative errno code
1412  *	is returned on a failure.
1413  */
1414 
1415 int unregister_netdevice_notifier(struct notifier_block *nb)
1416 {
1417 	int err;
1418 
1419 	rtnl_lock();
1420 	err = raw_notifier_chain_unregister(&netdev_chain, nb);
1421 	rtnl_unlock();
1422 	return err;
1423 }
1424 EXPORT_SYMBOL(unregister_netdevice_notifier);
1425 
1426 /**
1427  *	call_netdevice_notifiers - call all network notifier blocks
1428  *      @val: value passed unmodified to notifier function
1429  *      @dev: net_device pointer passed unmodified to notifier function
1430  *
1431  *	Call all network notifier blocks.  Parameters and return value
1432  *	are as for raw_notifier_call_chain().
1433  */
1434 
1435 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1436 {
1437 	ASSERT_RTNL();
1438 	return raw_notifier_call_chain(&netdev_chain, val, dev);
1439 }
1440 
1441 /* When > 0 there are consumers of rx skb time stamps */
1442 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1443 
1444 void net_enable_timestamp(void)
1445 {
1446 	atomic_inc(&netstamp_needed);
1447 }
1448 EXPORT_SYMBOL(net_enable_timestamp);
1449 
1450 void net_disable_timestamp(void)
1451 {
1452 	atomic_dec(&netstamp_needed);
1453 }
1454 EXPORT_SYMBOL(net_disable_timestamp);
1455 
1456 static inline void net_timestamp_set(struct sk_buff *skb)
1457 {
1458 	if (atomic_read(&netstamp_needed))
1459 		__net_timestamp(skb);
1460 	else
1461 		skb->tstamp.tv64 = 0;
1462 }
1463 
1464 static inline void net_timestamp_check(struct sk_buff *skb)
1465 {
1466 	if (!skb->tstamp.tv64 && atomic_read(&netstamp_needed))
1467 		__net_timestamp(skb);
1468 }
1469 
1470 /**
1471  * dev_forward_skb - loopback an skb to another netif
1472  *
1473  * @dev: destination network device
1474  * @skb: buffer to forward
1475  *
1476  * return values:
1477  *	NET_RX_SUCCESS	(no congestion)
1478  *	NET_RX_DROP     (packet was dropped, but freed)
1479  *
1480  * dev_forward_skb can be used for injecting an skb from the
1481  * start_xmit function of one device into the receive queue
1482  * of another device.
1483  *
1484  * The receiving device may be in another namespace, so
1485  * we have to clear all information in the skb that could
1486  * impact namespace isolation.
1487  */
1488 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1489 {
1490 	skb_orphan(skb);
1491 	nf_reset(skb);
1492 
1493 	if (!(dev->flags & IFF_UP) ||
1494 	    (skb->len > (dev->mtu + dev->hard_header_len))) {
1495 		kfree_skb(skb);
1496 		return NET_RX_DROP;
1497 	}
1498 	skb_set_dev(skb, dev);
1499 	skb->tstamp.tv64 = 0;
1500 	skb->pkt_type = PACKET_HOST;
1501 	skb->protocol = eth_type_trans(skb, dev);
1502 	return netif_rx(skb);
1503 }
1504 EXPORT_SYMBOL_GPL(dev_forward_skb);
1505 
1506 /*
1507  *	Support routine. Sends outgoing frames to any network
1508  *	taps currently in use.
1509  */
1510 
1511 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1512 {
1513 	struct packet_type *ptype;
1514 
1515 #ifdef CONFIG_NET_CLS_ACT
1516 	if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
1517 		net_timestamp_set(skb);
1518 #else
1519 	net_timestamp_set(skb);
1520 #endif
1521 
1522 	rcu_read_lock();
1523 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1524 		/* Never send packets back to the socket
1525 		 * they originated from - MvS (miquels@drinkel.ow.org)
1526 		 */
1527 		if ((ptype->dev == dev || !ptype->dev) &&
1528 		    (ptype->af_packet_priv == NULL ||
1529 		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
1530 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1531 			if (!skb2)
1532 				break;
1533 
1534 			/* skb->nh should be correctly
1535 			   set by sender, so that the second statement is
1536 			   just protection against buggy protocols.
1537 			 */
1538 			skb_reset_mac_header(skb2);
1539 
1540 			if (skb_network_header(skb2) < skb2->data ||
1541 			    skb2->network_header > skb2->tail) {
1542 				if (net_ratelimit())
1543 					printk(KERN_CRIT "protocol %04x is "
1544 					       "buggy, dev %s\n",
1545 					       skb2->protocol, dev->name);
1546 				skb_reset_network_header(skb2);
1547 			}
1548 
1549 			skb2->transport_header = skb2->network_header;
1550 			skb2->pkt_type = PACKET_OUTGOING;
1551 			ptype->func(skb2, skb->dev, ptype, skb->dev);
1552 		}
1553 	}
1554 	rcu_read_unlock();
1555 }
1556 
1557 /*
1558  * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
1559  * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
1560  */
1561 void netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
1562 {
1563 	unsigned int real_num = dev->real_num_tx_queues;
1564 
1565 	if (unlikely(txq > dev->num_tx_queues))
1566 		;
1567 	else if (txq > real_num)
1568 		dev->real_num_tx_queues = txq;
1569 	else if (txq < real_num) {
1570 		dev->real_num_tx_queues = txq;
1571 		qdisc_reset_all_tx_gt(dev, txq);
1572 	}
1573 }
1574 EXPORT_SYMBOL(netif_set_real_num_tx_queues);
1575 
1576 static inline void __netif_reschedule(struct Qdisc *q)
1577 {
1578 	struct softnet_data *sd;
1579 	unsigned long flags;
1580 
1581 	local_irq_save(flags);
1582 	sd = &__get_cpu_var(softnet_data);
1583 	q->next_sched = NULL;
1584 	*sd->output_queue_tailp = q;
1585 	sd->output_queue_tailp = &q->next_sched;
1586 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
1587 	local_irq_restore(flags);
1588 }
1589 
1590 void __netif_schedule(struct Qdisc *q)
1591 {
1592 	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1593 		__netif_reschedule(q);
1594 }
1595 EXPORT_SYMBOL(__netif_schedule);
1596 
1597 void dev_kfree_skb_irq(struct sk_buff *skb)
1598 {
1599 	if (atomic_dec_and_test(&skb->users)) {
1600 		struct softnet_data *sd;
1601 		unsigned long flags;
1602 
1603 		local_irq_save(flags);
1604 		sd = &__get_cpu_var(softnet_data);
1605 		skb->next = sd->completion_queue;
1606 		sd->completion_queue = skb;
1607 		raise_softirq_irqoff(NET_TX_SOFTIRQ);
1608 		local_irq_restore(flags);
1609 	}
1610 }
1611 EXPORT_SYMBOL(dev_kfree_skb_irq);
1612 
1613 void dev_kfree_skb_any(struct sk_buff *skb)
1614 {
1615 	if (in_irq() || irqs_disabled())
1616 		dev_kfree_skb_irq(skb);
1617 	else
1618 		dev_kfree_skb(skb);
1619 }
1620 EXPORT_SYMBOL(dev_kfree_skb_any);
1621 
1622 
1623 /**
1624  * netif_device_detach - mark device as removed
1625  * @dev: network device
1626  *
1627  * Mark device as removed from system and therefore no longer available.
1628  */
1629 void netif_device_detach(struct net_device *dev)
1630 {
1631 	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1632 	    netif_running(dev)) {
1633 		netif_tx_stop_all_queues(dev);
1634 	}
1635 }
1636 EXPORT_SYMBOL(netif_device_detach);
1637 
1638 /**
1639  * netif_device_attach - mark device as attached
1640  * @dev: network device
1641  *
1642  * Mark device as attached from system and restart if needed.
1643  */
1644 void netif_device_attach(struct net_device *dev)
1645 {
1646 	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1647 	    netif_running(dev)) {
1648 		netif_tx_wake_all_queues(dev);
1649 		__netdev_watchdog_up(dev);
1650 	}
1651 }
1652 EXPORT_SYMBOL(netif_device_attach);
1653 
1654 static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1655 {
1656 	return ((features & NETIF_F_GEN_CSUM) ||
1657 		((features & NETIF_F_IP_CSUM) &&
1658 		 protocol == htons(ETH_P_IP)) ||
1659 		((features & NETIF_F_IPV6_CSUM) &&
1660 		 protocol == htons(ETH_P_IPV6)) ||
1661 		((features & NETIF_F_FCOE_CRC) &&
1662 		 protocol == htons(ETH_P_FCOE)));
1663 }
1664 
1665 static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1666 {
1667 	if (can_checksum_protocol(dev->features, skb->protocol))
1668 		return true;
1669 
1670 	if (skb->protocol == htons(ETH_P_8021Q)) {
1671 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1672 		if (can_checksum_protocol(dev->features & dev->vlan_features,
1673 					  veh->h_vlan_encapsulated_proto))
1674 			return true;
1675 	}
1676 
1677 	return false;
1678 }
1679 
1680 /**
1681  * skb_dev_set -- assign a new device to a buffer
1682  * @skb: buffer for the new device
1683  * @dev: network device
1684  *
1685  * If an skb is owned by a device already, we have to reset
1686  * all data private to the namespace a device belongs to
1687  * before assigning it a new device.
1688  */
1689 #ifdef CONFIG_NET_NS
1690 void skb_set_dev(struct sk_buff *skb, struct net_device *dev)
1691 {
1692 	skb_dst_drop(skb);
1693 	if (skb->dev && !net_eq(dev_net(skb->dev), dev_net(dev))) {
1694 		secpath_reset(skb);
1695 		nf_reset(skb);
1696 		skb_init_secmark(skb);
1697 		skb->mark = 0;
1698 		skb->priority = 0;
1699 		skb->nf_trace = 0;
1700 		skb->ipvs_property = 0;
1701 #ifdef CONFIG_NET_SCHED
1702 		skb->tc_index = 0;
1703 #endif
1704 	}
1705 	skb->dev = dev;
1706 }
1707 EXPORT_SYMBOL(skb_set_dev);
1708 #endif /* CONFIG_NET_NS */
1709 
1710 /*
1711  * Invalidate hardware checksum when packet is to be mangled, and
1712  * complete checksum manually on outgoing path.
1713  */
1714 int skb_checksum_help(struct sk_buff *skb)
1715 {
1716 	__wsum csum;
1717 	int ret = 0, offset;
1718 
1719 	if (skb->ip_summed == CHECKSUM_COMPLETE)
1720 		goto out_set_summed;
1721 
1722 	if (unlikely(skb_shinfo(skb)->gso_size)) {
1723 		/* Let GSO fix up the checksum. */
1724 		goto out_set_summed;
1725 	}
1726 
1727 	offset = skb->csum_start - skb_headroom(skb);
1728 	BUG_ON(offset >= skb_headlen(skb));
1729 	csum = skb_checksum(skb, offset, skb->len - offset, 0);
1730 
1731 	offset += skb->csum_offset;
1732 	BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1733 
1734 	if (skb_cloned(skb) &&
1735 	    !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1736 		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1737 		if (ret)
1738 			goto out;
1739 	}
1740 
1741 	*(__sum16 *)(skb->data + offset) = csum_fold(csum);
1742 out_set_summed:
1743 	skb->ip_summed = CHECKSUM_NONE;
1744 out:
1745 	return ret;
1746 }
1747 EXPORT_SYMBOL(skb_checksum_help);
1748 
1749 /**
1750  *	skb_gso_segment - Perform segmentation on skb.
1751  *	@skb: buffer to segment
1752  *	@features: features for the output path (see dev->features)
1753  *
1754  *	This function segments the given skb and returns a list of segments.
1755  *
1756  *	It may return NULL if the skb requires no segmentation.  This is
1757  *	only possible when GSO is used for verifying header integrity.
1758  */
1759 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1760 {
1761 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1762 	struct packet_type *ptype;
1763 	__be16 type = skb->protocol;
1764 	int err;
1765 
1766 	skb_reset_mac_header(skb);
1767 	skb->mac_len = skb->network_header - skb->mac_header;
1768 	__skb_pull(skb, skb->mac_len);
1769 
1770 	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1771 		struct net_device *dev = skb->dev;
1772 		struct ethtool_drvinfo info = {};
1773 
1774 		if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
1775 			dev->ethtool_ops->get_drvinfo(dev, &info);
1776 
1777 		WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d "
1778 			"ip_summed=%d",
1779 		     info.driver, dev ? dev->features : 0L,
1780 		     skb->sk ? skb->sk->sk_route_caps : 0L,
1781 		     skb->len, skb->data_len, skb->ip_summed);
1782 
1783 		if (skb_header_cloned(skb) &&
1784 		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1785 			return ERR_PTR(err);
1786 	}
1787 
1788 	rcu_read_lock();
1789 	list_for_each_entry_rcu(ptype,
1790 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
1791 		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1792 			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1793 				err = ptype->gso_send_check(skb);
1794 				segs = ERR_PTR(err);
1795 				if (err || skb_gso_ok(skb, features))
1796 					break;
1797 				__skb_push(skb, (skb->data -
1798 						 skb_network_header(skb)));
1799 			}
1800 			segs = ptype->gso_segment(skb, features);
1801 			break;
1802 		}
1803 	}
1804 	rcu_read_unlock();
1805 
1806 	__skb_push(skb, skb->data - skb_mac_header(skb));
1807 
1808 	return segs;
1809 }
1810 EXPORT_SYMBOL(skb_gso_segment);
1811 
1812 /* Take action when hardware reception checksum errors are detected. */
1813 #ifdef CONFIG_BUG
1814 void netdev_rx_csum_fault(struct net_device *dev)
1815 {
1816 	if (net_ratelimit()) {
1817 		printk(KERN_ERR "%s: hw csum failure.\n",
1818 			dev ? dev->name : "<unknown>");
1819 		dump_stack();
1820 	}
1821 }
1822 EXPORT_SYMBOL(netdev_rx_csum_fault);
1823 #endif
1824 
1825 /* Actually, we should eliminate this check as soon as we know, that:
1826  * 1. IOMMU is present and allows to map all the memory.
1827  * 2. No high memory really exists on this machine.
1828  */
1829 
1830 static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1831 {
1832 #ifdef CONFIG_HIGHMEM
1833 	int i;
1834 	if (!(dev->features & NETIF_F_HIGHDMA)) {
1835 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1836 			if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1837 				return 1;
1838 	}
1839 
1840 	if (PCI_DMA_BUS_IS_PHYS) {
1841 		struct device *pdev = dev->dev.parent;
1842 
1843 		if (!pdev)
1844 			return 0;
1845 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1846 			dma_addr_t addr = page_to_phys(skb_shinfo(skb)->frags[i].page);
1847 			if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
1848 				return 1;
1849 		}
1850 	}
1851 #endif
1852 	return 0;
1853 }
1854 
1855 struct dev_gso_cb {
1856 	void (*destructor)(struct sk_buff *skb);
1857 };
1858 
1859 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1860 
1861 static void dev_gso_skb_destructor(struct sk_buff *skb)
1862 {
1863 	struct dev_gso_cb *cb;
1864 
1865 	do {
1866 		struct sk_buff *nskb = skb->next;
1867 
1868 		skb->next = nskb->next;
1869 		nskb->next = NULL;
1870 		kfree_skb(nskb);
1871 	} while (skb->next);
1872 
1873 	cb = DEV_GSO_CB(skb);
1874 	if (cb->destructor)
1875 		cb->destructor(skb);
1876 }
1877 
1878 /**
1879  *	dev_gso_segment - Perform emulated hardware segmentation on skb.
1880  *	@skb: buffer to segment
1881  *
1882  *	This function segments the given skb and stores the list of segments
1883  *	in skb->next.
1884  */
1885 static int dev_gso_segment(struct sk_buff *skb)
1886 {
1887 	struct net_device *dev = skb->dev;
1888 	struct sk_buff *segs;
1889 	int features = dev->features & ~(illegal_highdma(dev, skb) ?
1890 					 NETIF_F_SG : 0);
1891 
1892 	segs = skb_gso_segment(skb, features);
1893 
1894 	/* Verifying header integrity only. */
1895 	if (!segs)
1896 		return 0;
1897 
1898 	if (IS_ERR(segs))
1899 		return PTR_ERR(segs);
1900 
1901 	skb->next = segs;
1902 	DEV_GSO_CB(skb)->destructor = skb->destructor;
1903 	skb->destructor = dev_gso_skb_destructor;
1904 
1905 	return 0;
1906 }
1907 
1908 /*
1909  * Try to orphan skb early, right before transmission by the device.
1910  * We cannot orphan skb if tx timestamp is requested, since
1911  * drivers need to call skb_tstamp_tx() to send the timestamp.
1912  */
1913 static inline void skb_orphan_try(struct sk_buff *skb)
1914 {
1915 	struct sock *sk = skb->sk;
1916 
1917 	if (sk && !skb_tx(skb)->flags) {
1918 		/* skb_tx_hash() wont be able to get sk.
1919 		 * We copy sk_hash into skb->rxhash
1920 		 */
1921 		if (!skb->rxhash)
1922 			skb->rxhash = sk->sk_hash;
1923 		skb_orphan(skb);
1924 	}
1925 }
1926 
1927 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1928 			struct netdev_queue *txq)
1929 {
1930 	const struct net_device_ops *ops = dev->netdev_ops;
1931 	int rc = NETDEV_TX_OK;
1932 
1933 	if (likely(!skb->next)) {
1934 		if (!list_empty(&ptype_all))
1935 			dev_queue_xmit_nit(skb, dev);
1936 
1937 		/*
1938 		 * If device doesnt need skb->dst, release it right now while
1939 		 * its hot in this cpu cache
1940 		 */
1941 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
1942 			skb_dst_drop(skb);
1943 
1944 		skb_orphan_try(skb);
1945 
1946 		if (netif_needs_gso(dev, skb)) {
1947 			if (unlikely(dev_gso_segment(skb)))
1948 				goto out_kfree_skb;
1949 			if (skb->next)
1950 				goto gso;
1951 		}
1952 
1953 		rc = ops->ndo_start_xmit(skb, dev);
1954 		if (rc == NETDEV_TX_OK)
1955 			txq_trans_update(txq);
1956 		return rc;
1957 	}
1958 
1959 gso:
1960 	do {
1961 		struct sk_buff *nskb = skb->next;
1962 
1963 		skb->next = nskb->next;
1964 		nskb->next = NULL;
1965 
1966 		/*
1967 		 * If device doesnt need nskb->dst, release it right now while
1968 		 * its hot in this cpu cache
1969 		 */
1970 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
1971 			skb_dst_drop(nskb);
1972 
1973 		rc = ops->ndo_start_xmit(nskb, dev);
1974 		if (unlikely(rc != NETDEV_TX_OK)) {
1975 			if (rc & ~NETDEV_TX_MASK)
1976 				goto out_kfree_gso_skb;
1977 			nskb->next = skb->next;
1978 			skb->next = nskb;
1979 			return rc;
1980 		}
1981 		txq_trans_update(txq);
1982 		if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
1983 			return NETDEV_TX_BUSY;
1984 	} while (skb->next);
1985 
1986 out_kfree_gso_skb:
1987 	if (likely(skb->next == NULL))
1988 		skb->destructor = DEV_GSO_CB(skb)->destructor;
1989 out_kfree_skb:
1990 	kfree_skb(skb);
1991 	return rc;
1992 }
1993 
1994 static u32 hashrnd __read_mostly;
1995 
1996 u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
1997 {
1998 	u32 hash;
1999 
2000 	if (skb_rx_queue_recorded(skb)) {
2001 		hash = skb_get_rx_queue(skb);
2002 		while (unlikely(hash >= dev->real_num_tx_queues))
2003 			hash -= dev->real_num_tx_queues;
2004 		return hash;
2005 	}
2006 
2007 	if (skb->sk && skb->sk->sk_hash)
2008 		hash = skb->sk->sk_hash;
2009 	else
2010 		hash = (__force u16) skb->protocol ^ skb->rxhash;
2011 	hash = jhash_1word(hash, hashrnd);
2012 
2013 	return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
2014 }
2015 EXPORT_SYMBOL(skb_tx_hash);
2016 
2017 static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
2018 {
2019 	if (unlikely(queue_index >= dev->real_num_tx_queues)) {
2020 		if (net_ratelimit()) {
2021 			pr_warning("%s selects TX queue %d, but "
2022 				"real number of TX queues is %d\n",
2023 				dev->name, queue_index, dev->real_num_tx_queues);
2024 		}
2025 		return 0;
2026 	}
2027 	return queue_index;
2028 }
2029 
2030 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
2031 					struct sk_buff *skb)
2032 {
2033 	int queue_index;
2034 	struct sock *sk = skb->sk;
2035 
2036 	queue_index = sk_tx_queue_get(sk);
2037 	if (queue_index < 0) {
2038 		const struct net_device_ops *ops = dev->netdev_ops;
2039 
2040 		if (ops->ndo_select_queue) {
2041 			queue_index = ops->ndo_select_queue(dev, skb);
2042 			queue_index = dev_cap_txqueue(dev, queue_index);
2043 		} else {
2044 			queue_index = 0;
2045 			if (dev->real_num_tx_queues > 1)
2046 				queue_index = skb_tx_hash(dev, skb);
2047 
2048 			if (sk) {
2049 				struct dst_entry *dst = rcu_dereference_check(sk->sk_dst_cache, 1);
2050 
2051 				if (dst && skb_dst(skb) == dst)
2052 					sk_tx_queue_set(sk, queue_index);
2053 			}
2054 		}
2055 	}
2056 
2057 	skb_set_queue_mapping(skb, queue_index);
2058 	return netdev_get_tx_queue(dev, queue_index);
2059 }
2060 
2061 static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2062 				 struct net_device *dev,
2063 				 struct netdev_queue *txq)
2064 {
2065 	spinlock_t *root_lock = qdisc_lock(q);
2066 	int rc;
2067 
2068 	spin_lock(root_lock);
2069 	if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
2070 		kfree_skb(skb);
2071 		rc = NET_XMIT_DROP;
2072 	} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
2073 		   !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) {
2074 		/*
2075 		 * This is a work-conserving queue; there are no old skbs
2076 		 * waiting to be sent out; and the qdisc is not running -
2077 		 * xmit the skb directly.
2078 		 */
2079 		if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
2080 			skb_dst_force(skb);
2081 		__qdisc_update_bstats(q, skb->len);
2082 		if (sch_direct_xmit(skb, q, dev, txq, root_lock))
2083 			__qdisc_run(q);
2084 		else
2085 			clear_bit(__QDISC_STATE_RUNNING, &q->state);
2086 
2087 		rc = NET_XMIT_SUCCESS;
2088 	} else {
2089 		skb_dst_force(skb);
2090 		rc = qdisc_enqueue_root(skb, q);
2091 		qdisc_run(q);
2092 	}
2093 	spin_unlock(root_lock);
2094 
2095 	return rc;
2096 }
2097 
2098 /*
2099  * Returns true if either:
2100  *	1. skb has frag_list and the device doesn't support FRAGLIST, or
2101  *	2. skb is fragmented and the device does not support SG, or if
2102  *	   at least one of fragments is in highmem and device does not
2103  *	   support DMA from it.
2104  */
2105 static inline int skb_needs_linearize(struct sk_buff *skb,
2106 				      struct net_device *dev)
2107 {
2108 	return (skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
2109 	       (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
2110 					      illegal_highdma(dev, skb)));
2111 }
2112 
2113 /**
2114  *	dev_queue_xmit - transmit a buffer
2115  *	@skb: buffer to transmit
2116  *
2117  *	Queue a buffer for transmission to a network device. The caller must
2118  *	have set the device and priority and built the buffer before calling
2119  *	this function. The function can be called from an interrupt.
2120  *
2121  *	A negative errno code is returned on a failure. A success does not
2122  *	guarantee the frame will be transmitted as it may be dropped due
2123  *	to congestion or traffic shaping.
2124  *
2125  * -----------------------------------------------------------------------------------
2126  *      I notice this method can also return errors from the queue disciplines,
2127  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
2128  *      be positive.
2129  *
2130  *      Regardless of the return value, the skb is consumed, so it is currently
2131  *      difficult to retry a send to this method.  (You can bump the ref count
2132  *      before sending to hold a reference for retry if you are careful.)
2133  *
2134  *      When calling this method, interrupts MUST be enabled.  This is because
2135  *      the BH enable code must have IRQs enabled so that it will not deadlock.
2136  *          --BLG
2137  */
2138 int dev_queue_xmit(struct sk_buff *skb)
2139 {
2140 	struct net_device *dev = skb->dev;
2141 	struct netdev_queue *txq;
2142 	struct Qdisc *q;
2143 	int rc = -ENOMEM;
2144 
2145 	/* GSO will handle the following emulations directly. */
2146 	if (netif_needs_gso(dev, skb))
2147 		goto gso;
2148 
2149 	/* Convert a paged skb to linear, if required */
2150 	if (skb_needs_linearize(skb, dev) && __skb_linearize(skb))
2151 		goto out_kfree_skb;
2152 
2153 	/* If packet is not checksummed and device does not support
2154 	 * checksumming for this protocol, complete checksumming here.
2155 	 */
2156 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
2157 		skb_set_transport_header(skb, skb->csum_start -
2158 					      skb_headroom(skb));
2159 		if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
2160 			goto out_kfree_skb;
2161 	}
2162 
2163 gso:
2164 	/* Disable soft irqs for various locks below. Also
2165 	 * stops preemption for RCU.
2166 	 */
2167 	rcu_read_lock_bh();
2168 
2169 	txq = dev_pick_tx(dev, skb);
2170 	q = rcu_dereference_bh(txq->qdisc);
2171 
2172 #ifdef CONFIG_NET_CLS_ACT
2173 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
2174 #endif
2175 	if (q->enqueue) {
2176 		rc = __dev_xmit_skb(skb, q, dev, txq);
2177 		goto out;
2178 	}
2179 
2180 	/* The device has no queue. Common case for software devices:
2181 	   loopback, all the sorts of tunnels...
2182 
2183 	   Really, it is unlikely that netif_tx_lock protection is necessary
2184 	   here.  (f.e. loopback and IP tunnels are clean ignoring statistics
2185 	   counters.)
2186 	   However, it is possible, that they rely on protection
2187 	   made by us here.
2188 
2189 	   Check this and shot the lock. It is not prone from deadlocks.
2190 	   Either shot noqueue qdisc, it is even simpler 8)
2191 	 */
2192 	if (dev->flags & IFF_UP) {
2193 		int cpu = smp_processor_id(); /* ok because BHs are off */
2194 
2195 		if (txq->xmit_lock_owner != cpu) {
2196 
2197 			HARD_TX_LOCK(dev, txq, cpu);
2198 
2199 			if (!netif_tx_queue_stopped(txq)) {
2200 				rc = dev_hard_start_xmit(skb, dev, txq);
2201 				if (dev_xmit_complete(rc)) {
2202 					HARD_TX_UNLOCK(dev, txq);
2203 					goto out;
2204 				}
2205 			}
2206 			HARD_TX_UNLOCK(dev, txq);
2207 			if (net_ratelimit())
2208 				printk(KERN_CRIT "Virtual device %s asks to "
2209 				       "queue packet!\n", dev->name);
2210 		} else {
2211 			/* Recursion is detected! It is possible,
2212 			 * unfortunately */
2213 			if (net_ratelimit())
2214 				printk(KERN_CRIT "Dead loop on virtual device "
2215 				       "%s, fix it urgently!\n", dev->name);
2216 		}
2217 	}
2218 
2219 	rc = -ENETDOWN;
2220 	rcu_read_unlock_bh();
2221 
2222 out_kfree_skb:
2223 	kfree_skb(skb);
2224 	return rc;
2225 out:
2226 	rcu_read_unlock_bh();
2227 	return rc;
2228 }
2229 EXPORT_SYMBOL(dev_queue_xmit);
2230 
2231 
2232 /*=======================================================================
2233 			Receiver routines
2234   =======================================================================*/
2235 
2236 int netdev_max_backlog __read_mostly = 1000;
2237 int netdev_tstamp_prequeue __read_mostly = 1;
2238 int netdev_budget __read_mostly = 300;
2239 int weight_p __read_mostly = 64;            /* old backlog weight */
2240 
2241 /* Called with irq disabled */
2242 static inline void ____napi_schedule(struct softnet_data *sd,
2243 				     struct napi_struct *napi)
2244 {
2245 	list_add_tail(&napi->poll_list, &sd->poll_list);
2246 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2247 }
2248 
2249 #ifdef CONFIG_RPS
2250 
2251 /* One global table that all flow-based protocols share. */
2252 struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
2253 EXPORT_SYMBOL(rps_sock_flow_table);
2254 
2255 /*
2256  * get_rps_cpu is called from netif_receive_skb and returns the target
2257  * CPU from the RPS map of the receiving queue for a given skb.
2258  * rcu_read_lock must be held on entry.
2259  */
2260 static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2261 		       struct rps_dev_flow **rflowp)
2262 {
2263 	struct ipv6hdr *ip6;
2264 	struct iphdr *ip;
2265 	struct netdev_rx_queue *rxqueue;
2266 	struct rps_map *map;
2267 	struct rps_dev_flow_table *flow_table;
2268 	struct rps_sock_flow_table *sock_flow_table;
2269 	int cpu = -1;
2270 	u8 ip_proto;
2271 	u16 tcpu;
2272 	u32 addr1, addr2, ihl;
2273 	union {
2274 		u32 v32;
2275 		u16 v16[2];
2276 	} ports;
2277 
2278 	if (skb_rx_queue_recorded(skb)) {
2279 		u16 index = skb_get_rx_queue(skb);
2280 		if (unlikely(index >= dev->num_rx_queues)) {
2281 			WARN_ONCE(dev->num_rx_queues > 1, "%s received packet "
2282 				"on queue %u, but number of RX queues is %u\n",
2283 				dev->name, index, dev->num_rx_queues);
2284 			goto done;
2285 		}
2286 		rxqueue = dev->_rx + index;
2287 	} else
2288 		rxqueue = dev->_rx;
2289 
2290 	if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
2291 		goto done;
2292 
2293 	if (skb->rxhash)
2294 		goto got_hash; /* Skip hash computation on packet header */
2295 
2296 	switch (skb->protocol) {
2297 	case __constant_htons(ETH_P_IP):
2298 		if (!pskb_may_pull(skb, sizeof(*ip)))
2299 			goto done;
2300 
2301 		ip = (struct iphdr *) skb->data;
2302 		ip_proto = ip->protocol;
2303 		addr1 = (__force u32) ip->saddr;
2304 		addr2 = (__force u32) ip->daddr;
2305 		ihl = ip->ihl;
2306 		break;
2307 	case __constant_htons(ETH_P_IPV6):
2308 		if (!pskb_may_pull(skb, sizeof(*ip6)))
2309 			goto done;
2310 
2311 		ip6 = (struct ipv6hdr *) skb->data;
2312 		ip_proto = ip6->nexthdr;
2313 		addr1 = (__force u32) ip6->saddr.s6_addr32[3];
2314 		addr2 = (__force u32) ip6->daddr.s6_addr32[3];
2315 		ihl = (40 >> 2);
2316 		break;
2317 	default:
2318 		goto done;
2319 	}
2320 	switch (ip_proto) {
2321 	case IPPROTO_TCP:
2322 	case IPPROTO_UDP:
2323 	case IPPROTO_DCCP:
2324 	case IPPROTO_ESP:
2325 	case IPPROTO_AH:
2326 	case IPPROTO_SCTP:
2327 	case IPPROTO_UDPLITE:
2328 		if (pskb_may_pull(skb, (ihl * 4) + 4)) {
2329 			ports.v32 = * (__force u32 *) (skb->data + (ihl * 4));
2330 			if (ports.v16[1] < ports.v16[0])
2331 				swap(ports.v16[0], ports.v16[1]);
2332 			break;
2333 		}
2334 	default:
2335 		ports.v32 = 0;
2336 		break;
2337 	}
2338 
2339 	/* get a consistent hash (same value on both flow directions) */
2340 	if (addr2 < addr1)
2341 		swap(addr1, addr2);
2342 	skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
2343 	if (!skb->rxhash)
2344 		skb->rxhash = 1;
2345 
2346 got_hash:
2347 	flow_table = rcu_dereference(rxqueue->rps_flow_table);
2348 	sock_flow_table = rcu_dereference(rps_sock_flow_table);
2349 	if (flow_table && sock_flow_table) {
2350 		u16 next_cpu;
2351 		struct rps_dev_flow *rflow;
2352 
2353 		rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
2354 		tcpu = rflow->cpu;
2355 
2356 		next_cpu = sock_flow_table->ents[skb->rxhash &
2357 		    sock_flow_table->mask];
2358 
2359 		/*
2360 		 * If the desired CPU (where last recvmsg was done) is
2361 		 * different from current CPU (one in the rx-queue flow
2362 		 * table entry), switch if one of the following holds:
2363 		 *   - Current CPU is unset (equal to RPS_NO_CPU).
2364 		 *   - Current CPU is offline.
2365 		 *   - The current CPU's queue tail has advanced beyond the
2366 		 *     last packet that was enqueued using this table entry.
2367 		 *     This guarantees that all previous packets for the flow
2368 		 *     have been dequeued, thus preserving in order delivery.
2369 		 */
2370 		if (unlikely(tcpu != next_cpu) &&
2371 		    (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
2372 		     ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
2373 		      rflow->last_qtail)) >= 0)) {
2374 			tcpu = rflow->cpu = next_cpu;
2375 			if (tcpu != RPS_NO_CPU)
2376 				rflow->last_qtail = per_cpu(softnet_data,
2377 				    tcpu).input_queue_head;
2378 		}
2379 		if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
2380 			*rflowp = rflow;
2381 			cpu = tcpu;
2382 			goto done;
2383 		}
2384 	}
2385 
2386 	map = rcu_dereference(rxqueue->rps_map);
2387 	if (map) {
2388 		tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
2389 
2390 		if (cpu_online(tcpu)) {
2391 			cpu = tcpu;
2392 			goto done;
2393 		}
2394 	}
2395 
2396 done:
2397 	return cpu;
2398 }
2399 
2400 /* Called from hardirq (IPI) context */
2401 static void rps_trigger_softirq(void *data)
2402 {
2403 	struct softnet_data *sd = data;
2404 
2405 	____napi_schedule(sd, &sd->backlog);
2406 	sd->received_rps++;
2407 }
2408 
2409 #endif /* CONFIG_RPS */
2410 
2411 /*
2412  * Check if this softnet_data structure is another cpu one
2413  * If yes, queue it to our IPI list and return 1
2414  * If no, return 0
2415  */
2416 static int rps_ipi_queued(struct softnet_data *sd)
2417 {
2418 #ifdef CONFIG_RPS
2419 	struct softnet_data *mysd = &__get_cpu_var(softnet_data);
2420 
2421 	if (sd != mysd) {
2422 		sd->rps_ipi_next = mysd->rps_ipi_list;
2423 		mysd->rps_ipi_list = sd;
2424 
2425 		__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2426 		return 1;
2427 	}
2428 #endif /* CONFIG_RPS */
2429 	return 0;
2430 }
2431 
2432 /*
2433  * enqueue_to_backlog is called to queue an skb to a per CPU backlog
2434  * queue (may be a remote CPU queue).
2435  */
2436 static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
2437 			      unsigned int *qtail)
2438 {
2439 	struct softnet_data *sd;
2440 	unsigned long flags;
2441 
2442 	sd = &per_cpu(softnet_data, cpu);
2443 
2444 	local_irq_save(flags);
2445 
2446 	rps_lock(sd);
2447 	if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) {
2448 		if (skb_queue_len(&sd->input_pkt_queue)) {
2449 enqueue:
2450 			__skb_queue_tail(&sd->input_pkt_queue, skb);
2451 			input_queue_tail_incr_save(sd, qtail);
2452 			rps_unlock(sd);
2453 			local_irq_restore(flags);
2454 			return NET_RX_SUCCESS;
2455 		}
2456 
2457 		/* Schedule NAPI for backlog device
2458 		 * We can use non atomic operation since we own the queue lock
2459 		 */
2460 		if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
2461 			if (!rps_ipi_queued(sd))
2462 				____napi_schedule(sd, &sd->backlog);
2463 		}
2464 		goto enqueue;
2465 	}
2466 
2467 	sd->dropped++;
2468 	rps_unlock(sd);
2469 
2470 	local_irq_restore(flags);
2471 
2472 	kfree_skb(skb);
2473 	return NET_RX_DROP;
2474 }
2475 
2476 /**
2477  *	netif_rx	-	post buffer to the network code
2478  *	@skb: buffer to post
2479  *
2480  *	This function receives a packet from a device driver and queues it for
2481  *	the upper (protocol) levels to process.  It always succeeds. The buffer
2482  *	may be dropped during processing for congestion control or by the
2483  *	protocol layers.
2484  *
2485  *	return values:
2486  *	NET_RX_SUCCESS	(no congestion)
2487  *	NET_RX_DROP     (packet was dropped)
2488  *
2489  */
2490 
2491 int netif_rx(struct sk_buff *skb)
2492 {
2493 	int ret;
2494 
2495 	/* if netpoll wants it, pretend we never saw it */
2496 	if (netpoll_rx(skb))
2497 		return NET_RX_DROP;
2498 
2499 	if (netdev_tstamp_prequeue)
2500 		net_timestamp_check(skb);
2501 
2502 #ifdef CONFIG_RPS
2503 	{
2504 		struct rps_dev_flow voidflow, *rflow = &voidflow;
2505 		int cpu;
2506 
2507 		rcu_read_lock();
2508 
2509 		cpu = get_rps_cpu(skb->dev, skb, &rflow);
2510 		if (cpu < 0)
2511 			cpu = smp_processor_id();
2512 
2513 		ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
2514 
2515 		rcu_read_unlock();
2516 	}
2517 #else
2518 	{
2519 		unsigned int qtail;
2520 		ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
2521 		put_cpu();
2522 	}
2523 #endif
2524 	return ret;
2525 }
2526 EXPORT_SYMBOL(netif_rx);
2527 
2528 int netif_rx_ni(struct sk_buff *skb)
2529 {
2530 	int err;
2531 
2532 	preempt_disable();
2533 	err = netif_rx(skb);
2534 	if (local_softirq_pending())
2535 		do_softirq();
2536 	preempt_enable();
2537 
2538 	return err;
2539 }
2540 EXPORT_SYMBOL(netif_rx_ni);
2541 
2542 static void net_tx_action(struct softirq_action *h)
2543 {
2544 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
2545 
2546 	if (sd->completion_queue) {
2547 		struct sk_buff *clist;
2548 
2549 		local_irq_disable();
2550 		clist = sd->completion_queue;
2551 		sd->completion_queue = NULL;
2552 		local_irq_enable();
2553 
2554 		while (clist) {
2555 			struct sk_buff *skb = clist;
2556 			clist = clist->next;
2557 
2558 			WARN_ON(atomic_read(&skb->users));
2559 			__kfree_skb(skb);
2560 		}
2561 	}
2562 
2563 	if (sd->output_queue) {
2564 		struct Qdisc *head;
2565 
2566 		local_irq_disable();
2567 		head = sd->output_queue;
2568 		sd->output_queue = NULL;
2569 		sd->output_queue_tailp = &sd->output_queue;
2570 		local_irq_enable();
2571 
2572 		while (head) {
2573 			struct Qdisc *q = head;
2574 			spinlock_t *root_lock;
2575 
2576 			head = head->next_sched;
2577 
2578 			root_lock = qdisc_lock(q);
2579 			if (spin_trylock(root_lock)) {
2580 				smp_mb__before_clear_bit();
2581 				clear_bit(__QDISC_STATE_SCHED,
2582 					  &q->state);
2583 				qdisc_run(q);
2584 				spin_unlock(root_lock);
2585 			} else {
2586 				if (!test_bit(__QDISC_STATE_DEACTIVATED,
2587 					      &q->state)) {
2588 					__netif_reschedule(q);
2589 				} else {
2590 					smp_mb__before_clear_bit();
2591 					clear_bit(__QDISC_STATE_SCHED,
2592 						  &q->state);
2593 				}
2594 			}
2595 		}
2596 	}
2597 }
2598 
2599 static inline int deliver_skb(struct sk_buff *skb,
2600 			      struct packet_type *pt_prev,
2601 			      struct net_device *orig_dev)
2602 {
2603 	atomic_inc(&skb->users);
2604 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2605 }
2606 
2607 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
2608 
2609 #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
2610 /* This hook is defined here for ATM LANE */
2611 int (*br_fdb_test_addr_hook)(struct net_device *dev,
2612 			     unsigned char *addr) __read_mostly;
2613 EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
2614 #endif
2615 
2616 /*
2617  * If bridge module is loaded call bridging hook.
2618  *  returns NULL if packet was consumed.
2619  */
2620 struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2621 					struct sk_buff *skb) __read_mostly;
2622 EXPORT_SYMBOL_GPL(br_handle_frame_hook);
2623 
2624 static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2625 					    struct packet_type **pt_prev, int *ret,
2626 					    struct net_device *orig_dev)
2627 {
2628 	struct net_bridge_port *port;
2629 
2630 	if (skb->pkt_type == PACKET_LOOPBACK ||
2631 	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
2632 		return skb;
2633 
2634 	if (*pt_prev) {
2635 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2636 		*pt_prev = NULL;
2637 	}
2638 
2639 	return br_handle_frame_hook(port, skb);
2640 }
2641 #else
2642 #define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
2643 #endif
2644 
2645 #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2646 struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *p,
2647 					     struct sk_buff *skb) __read_mostly;
2648 EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2649 
2650 static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2651 					     struct packet_type **pt_prev,
2652 					     int *ret,
2653 					     struct net_device *orig_dev)
2654 {
2655 	struct macvlan_port *port;
2656 
2657 	port = rcu_dereference(skb->dev->macvlan_port);
2658 	if (!port)
2659 		return skb;
2660 
2661 	if (*pt_prev) {
2662 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2663 		*pt_prev = NULL;
2664 	}
2665 	return macvlan_handle_frame_hook(port, skb);
2666 }
2667 #else
2668 #define handle_macvlan(skb, pt_prev, ret, orig_dev)	(skb)
2669 #endif
2670 
2671 #ifdef CONFIG_NET_CLS_ACT
2672 /* TODO: Maybe we should just force sch_ingress to be compiled in
2673  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2674  * a compare and 2 stores extra right now if we dont have it on
2675  * but have CONFIG_NET_CLS_ACT
2676  * NOTE: This doesnt stop any functionality; if you dont have
2677  * the ingress scheduler, you just cant add policies on ingress.
2678  *
2679  */
2680 static int ing_filter(struct sk_buff *skb)
2681 {
2682 	struct net_device *dev = skb->dev;
2683 	u32 ttl = G_TC_RTTL(skb->tc_verd);
2684 	struct netdev_queue *rxq;
2685 	int result = TC_ACT_OK;
2686 	struct Qdisc *q;
2687 
2688 	if (MAX_RED_LOOP < ttl++) {
2689 		printk(KERN_WARNING
2690 		       "Redir loop detected Dropping packet (%d->%d)\n",
2691 		       skb->skb_iif, dev->ifindex);
2692 		return TC_ACT_SHOT;
2693 	}
2694 
2695 	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2696 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2697 
2698 	rxq = &dev->rx_queue;
2699 
2700 	q = rxq->qdisc;
2701 	if (q != &noop_qdisc) {
2702 		spin_lock(qdisc_lock(q));
2703 		if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2704 			result = qdisc_enqueue_root(skb, q);
2705 		spin_unlock(qdisc_lock(q));
2706 	}
2707 
2708 	return result;
2709 }
2710 
2711 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2712 					 struct packet_type **pt_prev,
2713 					 int *ret, struct net_device *orig_dev)
2714 {
2715 	if (skb->dev->rx_queue.qdisc == &noop_qdisc)
2716 		goto out;
2717 
2718 	if (*pt_prev) {
2719 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2720 		*pt_prev = NULL;
2721 	} else {
2722 		/* Huh? Why does turning on AF_PACKET affect this? */
2723 		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2724 	}
2725 
2726 	switch (ing_filter(skb)) {
2727 	case TC_ACT_SHOT:
2728 	case TC_ACT_STOLEN:
2729 		kfree_skb(skb);
2730 		return NULL;
2731 	}
2732 
2733 out:
2734 	skb->tc_verd = 0;
2735 	return skb;
2736 }
2737 #endif
2738 
2739 /*
2740  * 	netif_nit_deliver - deliver received packets to network taps
2741  * 	@skb: buffer
2742  *
2743  * 	This function is used to deliver incoming packets to network
2744  * 	taps. It should be used when the normal netif_receive_skb path
2745  * 	is bypassed, for example because of VLAN acceleration.
2746  */
2747 void netif_nit_deliver(struct sk_buff *skb)
2748 {
2749 	struct packet_type *ptype;
2750 
2751 	if (list_empty(&ptype_all))
2752 		return;
2753 
2754 	skb_reset_network_header(skb);
2755 	skb_reset_transport_header(skb);
2756 	skb->mac_len = skb->network_header - skb->mac_header;
2757 
2758 	rcu_read_lock();
2759 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
2760 		if (!ptype->dev || ptype->dev == skb->dev)
2761 			deliver_skb(skb, ptype, skb->dev);
2762 	}
2763 	rcu_read_unlock();
2764 }
2765 
2766 static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
2767 					      struct net_device *master)
2768 {
2769 	if (skb->pkt_type == PACKET_HOST) {
2770 		u16 *dest = (u16 *) eth_hdr(skb)->h_dest;
2771 
2772 		memcpy(dest, master->dev_addr, ETH_ALEN);
2773 	}
2774 }
2775 
2776 /* On bonding slaves other than the currently active slave, suppress
2777  * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
2778  * ARP on active-backup slaves with arp_validate enabled.
2779  */
2780 int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master)
2781 {
2782 	struct net_device *dev = skb->dev;
2783 
2784 	if (master->priv_flags & IFF_MASTER_ARPMON)
2785 		dev->last_rx = jiffies;
2786 
2787 	if ((master->priv_flags & IFF_MASTER_ALB) && master->br_port) {
2788 		/* Do address unmangle. The local destination address
2789 		 * will be always the one master has. Provides the right
2790 		 * functionality in a bridge.
2791 		 */
2792 		skb_bond_set_mac_by_master(skb, master);
2793 	}
2794 
2795 	if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
2796 		if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
2797 		    skb->protocol == __cpu_to_be16(ETH_P_ARP))
2798 			return 0;
2799 
2800 		if (master->priv_flags & IFF_MASTER_ALB) {
2801 			if (skb->pkt_type != PACKET_BROADCAST &&
2802 			    skb->pkt_type != PACKET_MULTICAST)
2803 				return 0;
2804 		}
2805 		if (master->priv_flags & IFF_MASTER_8023AD &&
2806 		    skb->protocol == __cpu_to_be16(ETH_P_SLOW))
2807 			return 0;
2808 
2809 		return 1;
2810 	}
2811 	return 0;
2812 }
2813 EXPORT_SYMBOL(__skb_bond_should_drop);
2814 
2815 static int __netif_receive_skb(struct sk_buff *skb)
2816 {
2817 	struct packet_type *ptype, *pt_prev;
2818 	struct net_device *orig_dev;
2819 	struct net_device *master;
2820 	struct net_device *null_or_orig;
2821 	struct net_device *orig_or_bond;
2822 	int ret = NET_RX_DROP;
2823 	__be16 type;
2824 
2825 	if (!netdev_tstamp_prequeue)
2826 		net_timestamp_check(skb);
2827 
2828 	if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
2829 		return NET_RX_SUCCESS;
2830 
2831 	/* if we've gotten here through NAPI, check netpoll */
2832 	if (netpoll_receive_skb(skb))
2833 		return NET_RX_DROP;
2834 
2835 	if (!skb->skb_iif)
2836 		skb->skb_iif = skb->dev->ifindex;
2837 
2838 	/*
2839 	 * bonding note: skbs received on inactive slaves should only
2840 	 * be delivered to pkt handlers that are exact matches.  Also
2841 	 * the deliver_no_wcard flag will be set.  If packet handlers
2842 	 * are sensitive to duplicate packets these skbs will need to
2843 	 * be dropped at the handler.  The vlan accel path may have
2844 	 * already set the deliver_no_wcard flag.
2845 	 */
2846 	null_or_orig = NULL;
2847 	orig_dev = skb->dev;
2848 	master = ACCESS_ONCE(orig_dev->master);
2849 	if (skb->deliver_no_wcard)
2850 		null_or_orig = orig_dev;
2851 	else if (master) {
2852 		if (skb_bond_should_drop(skb, master)) {
2853 			skb->deliver_no_wcard = 1;
2854 			null_or_orig = orig_dev; /* deliver only exact match */
2855 		} else
2856 			skb->dev = master;
2857 	}
2858 
2859 	__get_cpu_var(softnet_data).processed++;
2860 
2861 	skb_reset_network_header(skb);
2862 	skb_reset_transport_header(skb);
2863 	skb->mac_len = skb->network_header - skb->mac_header;
2864 
2865 	pt_prev = NULL;
2866 
2867 	rcu_read_lock();
2868 
2869 #ifdef CONFIG_NET_CLS_ACT
2870 	if (skb->tc_verd & TC_NCLS) {
2871 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2872 		goto ncls;
2873 	}
2874 #endif
2875 
2876 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
2877 		if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2878 		    ptype->dev == orig_dev) {
2879 			if (pt_prev)
2880 				ret = deliver_skb(skb, pt_prev, orig_dev);
2881 			pt_prev = ptype;
2882 		}
2883 	}
2884 
2885 #ifdef CONFIG_NET_CLS_ACT
2886 	skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2887 	if (!skb)
2888 		goto out;
2889 ncls:
2890 #endif
2891 
2892 	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2893 	if (!skb)
2894 		goto out;
2895 	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2896 	if (!skb)
2897 		goto out;
2898 
2899 	/*
2900 	 * Make sure frames received on VLAN interfaces stacked on
2901 	 * bonding interfaces still make their way to any base bonding
2902 	 * device that may have registered for a specific ptype.  The
2903 	 * handler may have to adjust skb->dev and orig_dev.
2904 	 */
2905 	orig_or_bond = orig_dev;
2906 	if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) &&
2907 	    (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) {
2908 		orig_or_bond = vlan_dev_real_dev(skb->dev);
2909 	}
2910 
2911 	type = skb->protocol;
2912 	list_for_each_entry_rcu(ptype,
2913 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2914 		if (ptype->type == type && (ptype->dev == null_or_orig ||
2915 		     ptype->dev == skb->dev || ptype->dev == orig_dev ||
2916 		     ptype->dev == orig_or_bond)) {
2917 			if (pt_prev)
2918 				ret = deliver_skb(skb, pt_prev, orig_dev);
2919 			pt_prev = ptype;
2920 		}
2921 	}
2922 
2923 	if (pt_prev) {
2924 		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2925 	} else {
2926 		kfree_skb(skb);
2927 		/* Jamal, now you will not able to escape explaining
2928 		 * me how you were going to use this. :-)
2929 		 */
2930 		ret = NET_RX_DROP;
2931 	}
2932 
2933 out:
2934 	rcu_read_unlock();
2935 	return ret;
2936 }
2937 
2938 /**
2939  *	netif_receive_skb - process receive buffer from network
2940  *	@skb: buffer to process
2941  *
2942  *	netif_receive_skb() is the main receive data processing function.
2943  *	It always succeeds. The buffer may be dropped during processing
2944  *	for congestion control or by the protocol layers.
2945  *
2946  *	This function may only be called from softirq context and interrupts
2947  *	should be enabled.
2948  *
2949  *	Return values (usually ignored):
2950  *	NET_RX_SUCCESS: no congestion
2951  *	NET_RX_DROP: packet was dropped
2952  */
2953 int netif_receive_skb(struct sk_buff *skb)
2954 {
2955 	if (netdev_tstamp_prequeue)
2956 		net_timestamp_check(skb);
2957 
2958 #ifdef CONFIG_RPS
2959 	{
2960 		struct rps_dev_flow voidflow, *rflow = &voidflow;
2961 		int cpu, ret;
2962 
2963 		rcu_read_lock();
2964 
2965 		cpu = get_rps_cpu(skb->dev, skb, &rflow);
2966 
2967 		if (cpu >= 0) {
2968 			ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
2969 			rcu_read_unlock();
2970 		} else {
2971 			rcu_read_unlock();
2972 			ret = __netif_receive_skb(skb);
2973 		}
2974 
2975 		return ret;
2976 	}
2977 #else
2978 	return __netif_receive_skb(skb);
2979 #endif
2980 }
2981 EXPORT_SYMBOL(netif_receive_skb);
2982 
2983 /* Network device is going away, flush any packets still pending
2984  * Called with irqs disabled.
2985  */
2986 static void flush_backlog(void *arg)
2987 {
2988 	struct net_device *dev = arg;
2989 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
2990 	struct sk_buff *skb, *tmp;
2991 
2992 	rps_lock(sd);
2993 	skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
2994 		if (skb->dev == dev) {
2995 			__skb_unlink(skb, &sd->input_pkt_queue);
2996 			kfree_skb(skb);
2997 			input_queue_head_incr(sd);
2998 		}
2999 	}
3000 	rps_unlock(sd);
3001 
3002 	skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
3003 		if (skb->dev == dev) {
3004 			__skb_unlink(skb, &sd->process_queue);
3005 			kfree_skb(skb);
3006 			input_queue_head_incr(sd);
3007 		}
3008 	}
3009 }
3010 
3011 static int napi_gro_complete(struct sk_buff *skb)
3012 {
3013 	struct packet_type *ptype;
3014 	__be16 type = skb->protocol;
3015 	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
3016 	int err = -ENOENT;
3017 
3018 	if (NAPI_GRO_CB(skb)->count == 1) {
3019 		skb_shinfo(skb)->gso_size = 0;
3020 		goto out;
3021 	}
3022 
3023 	rcu_read_lock();
3024 	list_for_each_entry_rcu(ptype, head, list) {
3025 		if (ptype->type != type || ptype->dev || !ptype->gro_complete)
3026 			continue;
3027 
3028 		err = ptype->gro_complete(skb);
3029 		break;
3030 	}
3031 	rcu_read_unlock();
3032 
3033 	if (err) {
3034 		WARN_ON(&ptype->list == head);
3035 		kfree_skb(skb);
3036 		return NET_RX_SUCCESS;
3037 	}
3038 
3039 out:
3040 	return netif_receive_skb(skb);
3041 }
3042 
3043 static void napi_gro_flush(struct napi_struct *napi)
3044 {
3045 	struct sk_buff *skb, *next;
3046 
3047 	for (skb = napi->gro_list; skb; skb = next) {
3048 		next = skb->next;
3049 		skb->next = NULL;
3050 		napi_gro_complete(skb);
3051 	}
3052 
3053 	napi->gro_count = 0;
3054 	napi->gro_list = NULL;
3055 }
3056 
3057 enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3058 {
3059 	struct sk_buff **pp = NULL;
3060 	struct packet_type *ptype;
3061 	__be16 type = skb->protocol;
3062 	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
3063 	int same_flow;
3064 	int mac_len;
3065 	enum gro_result ret;
3066 
3067 	if (!(skb->dev->features & NETIF_F_GRO))
3068 		goto normal;
3069 
3070 	if (skb_is_gso(skb) || skb_has_frags(skb))
3071 		goto normal;
3072 
3073 	rcu_read_lock();
3074 	list_for_each_entry_rcu(ptype, head, list) {
3075 		if (ptype->type != type || ptype->dev || !ptype->gro_receive)
3076 			continue;
3077 
3078 		skb_set_network_header(skb, skb_gro_offset(skb));
3079 		mac_len = skb->network_header - skb->mac_header;
3080 		skb->mac_len = mac_len;
3081 		NAPI_GRO_CB(skb)->same_flow = 0;
3082 		NAPI_GRO_CB(skb)->flush = 0;
3083 		NAPI_GRO_CB(skb)->free = 0;
3084 
3085 		pp = ptype->gro_receive(&napi->gro_list, skb);
3086 		break;
3087 	}
3088 	rcu_read_unlock();
3089 
3090 	if (&ptype->list == head)
3091 		goto normal;
3092 
3093 	same_flow = NAPI_GRO_CB(skb)->same_flow;
3094 	ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
3095 
3096 	if (pp) {
3097 		struct sk_buff *nskb = *pp;
3098 
3099 		*pp = nskb->next;
3100 		nskb->next = NULL;
3101 		napi_gro_complete(nskb);
3102 		napi->gro_count--;
3103 	}
3104 
3105 	if (same_flow)
3106 		goto ok;
3107 
3108 	if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
3109 		goto normal;
3110 
3111 	napi->gro_count++;
3112 	NAPI_GRO_CB(skb)->count = 1;
3113 	skb_shinfo(skb)->gso_size = skb_gro_len(skb);
3114 	skb->next = napi->gro_list;
3115 	napi->gro_list = skb;
3116 	ret = GRO_HELD;
3117 
3118 pull:
3119 	if (skb_headlen(skb) < skb_gro_offset(skb)) {
3120 		int grow = skb_gro_offset(skb) - skb_headlen(skb);
3121 
3122 		BUG_ON(skb->end - skb->tail < grow);
3123 
3124 		memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
3125 
3126 		skb->tail += grow;
3127 		skb->data_len -= grow;
3128 
3129 		skb_shinfo(skb)->frags[0].page_offset += grow;
3130 		skb_shinfo(skb)->frags[0].size -= grow;
3131 
3132 		if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
3133 			put_page(skb_shinfo(skb)->frags[0].page);
3134 			memmove(skb_shinfo(skb)->frags,
3135 				skb_shinfo(skb)->frags + 1,
3136 				--skb_shinfo(skb)->nr_frags);
3137 		}
3138 	}
3139 
3140 ok:
3141 	return ret;
3142 
3143 normal:
3144 	ret = GRO_NORMAL;
3145 	goto pull;
3146 }
3147 EXPORT_SYMBOL(dev_gro_receive);
3148 
3149 static gro_result_t
3150 __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3151 {
3152 	struct sk_buff *p;
3153 
3154 	if (netpoll_rx_on(skb))
3155 		return GRO_NORMAL;
3156 
3157 	for (p = napi->gro_list; p; p = p->next) {
3158 		NAPI_GRO_CB(p)->same_flow =
3159 			(p->dev == skb->dev) &&
3160 			!compare_ether_header(skb_mac_header(p),
3161 					      skb_gro_mac_header(skb));
3162 		NAPI_GRO_CB(p)->flush = 0;
3163 	}
3164 
3165 	return dev_gro_receive(napi, skb);
3166 }
3167 
3168 gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
3169 {
3170 	switch (ret) {
3171 	case GRO_NORMAL:
3172 		if (netif_receive_skb(skb))
3173 			ret = GRO_DROP;
3174 		break;
3175 
3176 	case GRO_DROP:
3177 	case GRO_MERGED_FREE:
3178 		kfree_skb(skb);
3179 		break;
3180 
3181 	case GRO_HELD:
3182 	case GRO_MERGED:
3183 		break;
3184 	}
3185 
3186 	return ret;
3187 }
3188 EXPORT_SYMBOL(napi_skb_finish);
3189 
3190 void skb_gro_reset_offset(struct sk_buff *skb)
3191 {
3192 	NAPI_GRO_CB(skb)->data_offset = 0;
3193 	NAPI_GRO_CB(skb)->frag0 = NULL;
3194 	NAPI_GRO_CB(skb)->frag0_len = 0;
3195 
3196 	if (skb->mac_header == skb->tail &&
3197 	    !PageHighMem(skb_shinfo(skb)->frags[0].page)) {
3198 		NAPI_GRO_CB(skb)->frag0 =
3199 			page_address(skb_shinfo(skb)->frags[0].page) +
3200 			skb_shinfo(skb)->frags[0].page_offset;
3201 		NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size;
3202 	}
3203 }
3204 EXPORT_SYMBOL(skb_gro_reset_offset);
3205 
3206 gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3207 {
3208 	skb_gro_reset_offset(skb);
3209 
3210 	return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
3211 }
3212 EXPORT_SYMBOL(napi_gro_receive);
3213 
3214 void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
3215 {
3216 	__skb_pull(skb, skb_headlen(skb));
3217 	skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
3218 
3219 	napi->skb = skb;
3220 }
3221 EXPORT_SYMBOL(napi_reuse_skb);
3222 
3223 struct sk_buff *napi_get_frags(struct napi_struct *napi)
3224 {
3225 	struct sk_buff *skb = napi->skb;
3226 
3227 	if (!skb) {
3228 		skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
3229 		if (skb)
3230 			napi->skb = skb;
3231 	}
3232 	return skb;
3233 }
3234 EXPORT_SYMBOL(napi_get_frags);
3235 
3236 gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
3237 			       gro_result_t ret)
3238 {
3239 	switch (ret) {
3240 	case GRO_NORMAL:
3241 	case GRO_HELD:
3242 		skb->protocol = eth_type_trans(skb, skb->dev);
3243 
3244 		if (ret == GRO_HELD)
3245 			skb_gro_pull(skb, -ETH_HLEN);
3246 		else if (netif_receive_skb(skb))
3247 			ret = GRO_DROP;
3248 		break;
3249 
3250 	case GRO_DROP:
3251 	case GRO_MERGED_FREE:
3252 		napi_reuse_skb(napi, skb);
3253 		break;
3254 
3255 	case GRO_MERGED:
3256 		break;
3257 	}
3258 
3259 	return ret;
3260 }
3261 EXPORT_SYMBOL(napi_frags_finish);
3262 
3263 struct sk_buff *napi_frags_skb(struct napi_struct *napi)
3264 {
3265 	struct sk_buff *skb = napi->skb;
3266 	struct ethhdr *eth;
3267 	unsigned int hlen;
3268 	unsigned int off;
3269 
3270 	napi->skb = NULL;
3271 
3272 	skb_reset_mac_header(skb);
3273 	skb_gro_reset_offset(skb);
3274 
3275 	off = skb_gro_offset(skb);
3276 	hlen = off + sizeof(*eth);
3277 	eth = skb_gro_header_fast(skb, off);
3278 	if (skb_gro_header_hard(skb, hlen)) {
3279 		eth = skb_gro_header_slow(skb, hlen, off);
3280 		if (unlikely(!eth)) {
3281 			napi_reuse_skb(napi, skb);
3282 			skb = NULL;
3283 			goto out;
3284 		}
3285 	}
3286 
3287 	skb_gro_pull(skb, sizeof(*eth));
3288 
3289 	/*
3290 	 * This works because the only protocols we care about don't require
3291 	 * special handling.  We'll fix it up properly at the end.
3292 	 */
3293 	skb->protocol = eth->h_proto;
3294 
3295 out:
3296 	return skb;
3297 }
3298 EXPORT_SYMBOL(napi_frags_skb);
3299 
3300 gro_result_t napi_gro_frags(struct napi_struct *napi)
3301 {
3302 	struct sk_buff *skb = napi_frags_skb(napi);
3303 
3304 	if (!skb)
3305 		return GRO_DROP;
3306 
3307 	return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
3308 }
3309 EXPORT_SYMBOL(napi_gro_frags);
3310 
3311 /*
3312  * net_rps_action sends any pending IPI's for rps.
3313  * Note: called with local irq disabled, but exits with local irq enabled.
3314  */
3315 static void net_rps_action_and_irq_enable(struct softnet_data *sd)
3316 {
3317 #ifdef CONFIG_RPS
3318 	struct softnet_data *remsd = sd->rps_ipi_list;
3319 
3320 	if (remsd) {
3321 		sd->rps_ipi_list = NULL;
3322 
3323 		local_irq_enable();
3324 
3325 		/* Send pending IPI's to kick RPS processing on remote cpus. */
3326 		while (remsd) {
3327 			struct softnet_data *next = remsd->rps_ipi_next;
3328 
3329 			if (cpu_online(remsd->cpu))
3330 				__smp_call_function_single(remsd->cpu,
3331 							   &remsd->csd, 0);
3332 			remsd = next;
3333 		}
3334 	} else
3335 #endif
3336 		local_irq_enable();
3337 }
3338 
3339 static int process_backlog(struct napi_struct *napi, int quota)
3340 {
3341 	int work = 0;
3342 	struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
3343 
3344 #ifdef CONFIG_RPS
3345 	/* Check if we have pending ipi, its better to send them now,
3346 	 * not waiting net_rx_action() end.
3347 	 */
3348 	if (sd->rps_ipi_list) {
3349 		local_irq_disable();
3350 		net_rps_action_and_irq_enable(sd);
3351 	}
3352 #endif
3353 	napi->weight = weight_p;
3354 	local_irq_disable();
3355 	while (work < quota) {
3356 		struct sk_buff *skb;
3357 		unsigned int qlen;
3358 
3359 		while ((skb = __skb_dequeue(&sd->process_queue))) {
3360 			local_irq_enable();
3361 			__netif_receive_skb(skb);
3362 			local_irq_disable();
3363 			input_queue_head_incr(sd);
3364 			if (++work >= quota) {
3365 				local_irq_enable();
3366 				return work;
3367 			}
3368 		}
3369 
3370 		rps_lock(sd);
3371 		qlen = skb_queue_len(&sd->input_pkt_queue);
3372 		if (qlen)
3373 			skb_queue_splice_tail_init(&sd->input_pkt_queue,
3374 						   &sd->process_queue);
3375 
3376 		if (qlen < quota - work) {
3377 			/*
3378 			 * Inline a custom version of __napi_complete().
3379 			 * only current cpu owns and manipulates this napi,
3380 			 * and NAPI_STATE_SCHED is the only possible flag set on backlog.
3381 			 * we can use a plain write instead of clear_bit(),
3382 			 * and we dont need an smp_mb() memory barrier.
3383 			 */
3384 			list_del(&napi->poll_list);
3385 			napi->state = 0;
3386 
3387 			quota = work + qlen;
3388 		}
3389 		rps_unlock(sd);
3390 	}
3391 	local_irq_enable();
3392 
3393 	return work;
3394 }
3395 
3396 /**
3397  * __napi_schedule - schedule for receive
3398  * @n: entry to schedule
3399  *
3400  * The entry's receive function will be scheduled to run
3401  */
3402 void __napi_schedule(struct napi_struct *n)
3403 {
3404 	unsigned long flags;
3405 
3406 	local_irq_save(flags);
3407 	____napi_schedule(&__get_cpu_var(softnet_data), n);
3408 	local_irq_restore(flags);
3409 }
3410 EXPORT_SYMBOL(__napi_schedule);
3411 
3412 void __napi_complete(struct napi_struct *n)
3413 {
3414 	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
3415 	BUG_ON(n->gro_list);
3416 
3417 	list_del(&n->poll_list);
3418 	smp_mb__before_clear_bit();
3419 	clear_bit(NAPI_STATE_SCHED, &n->state);
3420 }
3421 EXPORT_SYMBOL(__napi_complete);
3422 
3423 void napi_complete(struct napi_struct *n)
3424 {
3425 	unsigned long flags;
3426 
3427 	/*
3428 	 * don't let napi dequeue from the cpu poll list
3429 	 * just in case its running on a different cpu
3430 	 */
3431 	if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
3432 		return;
3433 
3434 	napi_gro_flush(n);
3435 	local_irq_save(flags);
3436 	__napi_complete(n);
3437 	local_irq_restore(flags);
3438 }
3439 EXPORT_SYMBOL(napi_complete);
3440 
3441 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
3442 		    int (*poll)(struct napi_struct *, int), int weight)
3443 {
3444 	INIT_LIST_HEAD(&napi->poll_list);
3445 	napi->gro_count = 0;
3446 	napi->gro_list = NULL;
3447 	napi->skb = NULL;
3448 	napi->poll = poll;
3449 	napi->weight = weight;
3450 	list_add(&napi->dev_list, &dev->napi_list);
3451 	napi->dev = dev;
3452 #ifdef CONFIG_NETPOLL
3453 	spin_lock_init(&napi->poll_lock);
3454 	napi->poll_owner = -1;
3455 #endif
3456 	set_bit(NAPI_STATE_SCHED, &napi->state);
3457 }
3458 EXPORT_SYMBOL(netif_napi_add);
3459 
3460 void netif_napi_del(struct napi_struct *napi)
3461 {
3462 	struct sk_buff *skb, *next;
3463 
3464 	list_del_init(&napi->dev_list);
3465 	napi_free_frags(napi);
3466 
3467 	for (skb = napi->gro_list; skb; skb = next) {
3468 		next = skb->next;
3469 		skb->next = NULL;
3470 		kfree_skb(skb);
3471 	}
3472 
3473 	napi->gro_list = NULL;
3474 	napi->gro_count = 0;
3475 }
3476 EXPORT_SYMBOL(netif_napi_del);
3477 
3478 static void net_rx_action(struct softirq_action *h)
3479 {
3480 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
3481 	unsigned long time_limit = jiffies + 2;
3482 	int budget = netdev_budget;
3483 	void *have;
3484 
3485 	local_irq_disable();
3486 
3487 	while (!list_empty(&sd->poll_list)) {
3488 		struct napi_struct *n;
3489 		int work, weight;
3490 
3491 		/* If softirq window is exhuasted then punt.
3492 		 * Allow this to run for 2 jiffies since which will allow
3493 		 * an average latency of 1.5/HZ.
3494 		 */
3495 		if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
3496 			goto softnet_break;
3497 
3498 		local_irq_enable();
3499 
3500 		/* Even though interrupts have been re-enabled, this
3501 		 * access is safe because interrupts can only add new
3502 		 * entries to the tail of this list, and only ->poll()
3503 		 * calls can remove this head entry from the list.
3504 		 */
3505 		n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
3506 
3507 		have = netpoll_poll_lock(n);
3508 
3509 		weight = n->weight;
3510 
3511 		/* This NAPI_STATE_SCHED test is for avoiding a race
3512 		 * with netpoll's poll_napi().  Only the entity which
3513 		 * obtains the lock and sees NAPI_STATE_SCHED set will
3514 		 * actually make the ->poll() call.  Therefore we avoid
3515 		 * accidently calling ->poll() when NAPI is not scheduled.
3516 		 */
3517 		work = 0;
3518 		if (test_bit(NAPI_STATE_SCHED, &n->state)) {
3519 			work = n->poll(n, weight);
3520 			trace_napi_poll(n);
3521 		}
3522 
3523 		WARN_ON_ONCE(work > weight);
3524 
3525 		budget -= work;
3526 
3527 		local_irq_disable();
3528 
3529 		/* Drivers must not modify the NAPI state if they
3530 		 * consume the entire weight.  In such cases this code
3531 		 * still "owns" the NAPI instance and therefore can
3532 		 * move the instance around on the list at-will.
3533 		 */
3534 		if (unlikely(work == weight)) {
3535 			if (unlikely(napi_disable_pending(n))) {
3536 				local_irq_enable();
3537 				napi_complete(n);
3538 				local_irq_disable();
3539 			} else
3540 				list_move_tail(&n->poll_list, &sd->poll_list);
3541 		}
3542 
3543 		netpoll_poll_unlock(have);
3544 	}
3545 out:
3546 	net_rps_action_and_irq_enable(sd);
3547 
3548 #ifdef CONFIG_NET_DMA
3549 	/*
3550 	 * There may not be any more sk_buffs coming right now, so push
3551 	 * any pending DMA copies to hardware
3552 	 */
3553 	dma_issue_pending_all();
3554 #endif
3555 
3556 	return;
3557 
3558 softnet_break:
3559 	sd->time_squeeze++;
3560 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
3561 	goto out;
3562 }
3563 
3564 static gifconf_func_t *gifconf_list[NPROTO];
3565 
3566 /**
3567  *	register_gifconf	-	register a SIOCGIF handler
3568  *	@family: Address family
3569  *	@gifconf: Function handler
3570  *
3571  *	Register protocol dependent address dumping routines. The handler
3572  *	that is passed must not be freed or reused until it has been replaced
3573  *	by another handler.
3574  */
3575 int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
3576 {
3577 	if (family >= NPROTO)
3578 		return -EINVAL;
3579 	gifconf_list[family] = gifconf;
3580 	return 0;
3581 }
3582 EXPORT_SYMBOL(register_gifconf);
3583 
3584 
3585 /*
3586  *	Map an interface index to its name (SIOCGIFNAME)
3587  */
3588 
3589 /*
3590  *	We need this ioctl for efficient implementation of the
3591  *	if_indextoname() function required by the IPv6 API.  Without
3592  *	it, we would have to search all the interfaces to find a
3593  *	match.  --pb
3594  */
3595 
3596 static int dev_ifname(struct net *net, struct ifreq __user *arg)
3597 {
3598 	struct net_device *dev;
3599 	struct ifreq ifr;
3600 
3601 	/*
3602 	 *	Fetch the caller's info block.
3603 	 */
3604 
3605 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3606 		return -EFAULT;
3607 
3608 	rcu_read_lock();
3609 	dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
3610 	if (!dev) {
3611 		rcu_read_unlock();
3612 		return -ENODEV;
3613 	}
3614 
3615 	strcpy(ifr.ifr_name, dev->name);
3616 	rcu_read_unlock();
3617 
3618 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
3619 		return -EFAULT;
3620 	return 0;
3621 }
3622 
3623 /*
3624  *	Perform a SIOCGIFCONF call. This structure will change
3625  *	size eventually, and there is nothing I can do about it.
3626  *	Thus we will need a 'compatibility mode'.
3627  */
3628 
3629 static int dev_ifconf(struct net *net, char __user *arg)
3630 {
3631 	struct ifconf ifc;
3632 	struct net_device *dev;
3633 	char __user *pos;
3634 	int len;
3635 	int total;
3636 	int i;
3637 
3638 	/*
3639 	 *	Fetch the caller's info block.
3640 	 */
3641 
3642 	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
3643 		return -EFAULT;
3644 
3645 	pos = ifc.ifc_buf;
3646 	len = ifc.ifc_len;
3647 
3648 	/*
3649 	 *	Loop over the interfaces, and write an info block for each.
3650 	 */
3651 
3652 	total = 0;
3653 	for_each_netdev(net, dev) {
3654 		for (i = 0; i < NPROTO; i++) {
3655 			if (gifconf_list[i]) {
3656 				int done;
3657 				if (!pos)
3658 					done = gifconf_list[i](dev, NULL, 0);
3659 				else
3660 					done = gifconf_list[i](dev, pos + total,
3661 							       len - total);
3662 				if (done < 0)
3663 					return -EFAULT;
3664 				total += done;
3665 			}
3666 		}
3667 	}
3668 
3669 	/*
3670 	 *	All done.  Write the updated control block back to the caller.
3671 	 */
3672 	ifc.ifc_len = total;
3673 
3674 	/*
3675 	 * 	Both BSD and Solaris return 0 here, so we do too.
3676 	 */
3677 	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
3678 }
3679 
3680 #ifdef CONFIG_PROC_FS
3681 /*
3682  *	This is invoked by the /proc filesystem handler to display a device
3683  *	in detail.
3684  */
3685 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
3686 	__acquires(RCU)
3687 {
3688 	struct net *net = seq_file_net(seq);
3689 	loff_t off;
3690 	struct net_device *dev;
3691 
3692 	rcu_read_lock();
3693 	if (!*pos)
3694 		return SEQ_START_TOKEN;
3695 
3696 	off = 1;
3697 	for_each_netdev_rcu(net, dev)
3698 		if (off++ == *pos)
3699 			return dev;
3700 
3701 	return NULL;
3702 }
3703 
3704 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3705 {
3706 	struct net_device *dev = (v == SEQ_START_TOKEN) ?
3707 				  first_net_device(seq_file_net(seq)) :
3708 				  next_net_device((struct net_device *)v);
3709 
3710 	++*pos;
3711 	return rcu_dereference(dev);
3712 }
3713 
3714 void dev_seq_stop(struct seq_file *seq, void *v)
3715 	__releases(RCU)
3716 {
3717 	rcu_read_unlock();
3718 }
3719 
3720 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
3721 {
3722 	const struct net_device_stats *stats = dev_get_stats(dev);
3723 
3724 	seq_printf(seq, "%6s: %7lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
3725 		   "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
3726 		   dev->name, stats->rx_bytes, stats->rx_packets,
3727 		   stats->rx_errors,
3728 		   stats->rx_dropped + stats->rx_missed_errors,
3729 		   stats->rx_fifo_errors,
3730 		   stats->rx_length_errors + stats->rx_over_errors +
3731 		    stats->rx_crc_errors + stats->rx_frame_errors,
3732 		   stats->rx_compressed, stats->multicast,
3733 		   stats->tx_bytes, stats->tx_packets,
3734 		   stats->tx_errors, stats->tx_dropped,
3735 		   stats->tx_fifo_errors, stats->collisions,
3736 		   stats->tx_carrier_errors +
3737 		    stats->tx_aborted_errors +
3738 		    stats->tx_window_errors +
3739 		    stats->tx_heartbeat_errors,
3740 		   stats->tx_compressed);
3741 }
3742 
3743 /*
3744  *	Called from the PROCfs module. This now uses the new arbitrary sized
3745  *	/proc/net interface to create /proc/net/dev
3746  */
3747 static int dev_seq_show(struct seq_file *seq, void *v)
3748 {
3749 	if (v == SEQ_START_TOKEN)
3750 		seq_puts(seq, "Inter-|   Receive                            "
3751 			      "                    |  Transmit\n"
3752 			      " face |bytes    packets errs drop fifo frame "
3753 			      "compressed multicast|bytes    packets errs "
3754 			      "drop fifo colls carrier compressed\n");
3755 	else
3756 		dev_seq_printf_stats(seq, v);
3757 	return 0;
3758 }
3759 
3760 static struct softnet_data *softnet_get_online(loff_t *pos)
3761 {
3762 	struct softnet_data *sd = NULL;
3763 
3764 	while (*pos < nr_cpu_ids)
3765 		if (cpu_online(*pos)) {
3766 			sd = &per_cpu(softnet_data, *pos);
3767 			break;
3768 		} else
3769 			++*pos;
3770 	return sd;
3771 }
3772 
3773 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
3774 {
3775 	return softnet_get_online(pos);
3776 }
3777 
3778 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3779 {
3780 	++*pos;
3781 	return softnet_get_online(pos);
3782 }
3783 
3784 static void softnet_seq_stop(struct seq_file *seq, void *v)
3785 {
3786 }
3787 
3788 static int softnet_seq_show(struct seq_file *seq, void *v)
3789 {
3790 	struct softnet_data *sd = v;
3791 
3792 	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
3793 		   sd->processed, sd->dropped, sd->time_squeeze, 0,
3794 		   0, 0, 0, 0, /* was fastroute */
3795 		   sd->cpu_collision, sd->received_rps);
3796 	return 0;
3797 }
3798 
3799 static const struct seq_operations dev_seq_ops = {
3800 	.start = dev_seq_start,
3801 	.next  = dev_seq_next,
3802 	.stop  = dev_seq_stop,
3803 	.show  = dev_seq_show,
3804 };
3805 
3806 static int dev_seq_open(struct inode *inode, struct file *file)
3807 {
3808 	return seq_open_net(inode, file, &dev_seq_ops,
3809 			    sizeof(struct seq_net_private));
3810 }
3811 
3812 static const struct file_operations dev_seq_fops = {
3813 	.owner	 = THIS_MODULE,
3814 	.open    = dev_seq_open,
3815 	.read    = seq_read,
3816 	.llseek  = seq_lseek,
3817 	.release = seq_release_net,
3818 };
3819 
3820 static const struct seq_operations softnet_seq_ops = {
3821 	.start = softnet_seq_start,
3822 	.next  = softnet_seq_next,
3823 	.stop  = softnet_seq_stop,
3824 	.show  = softnet_seq_show,
3825 };
3826 
3827 static int softnet_seq_open(struct inode *inode, struct file *file)
3828 {
3829 	return seq_open(file, &softnet_seq_ops);
3830 }
3831 
3832 static const struct file_operations softnet_seq_fops = {
3833 	.owner	 = THIS_MODULE,
3834 	.open    = softnet_seq_open,
3835 	.read    = seq_read,
3836 	.llseek  = seq_lseek,
3837 	.release = seq_release,
3838 };
3839 
3840 static void *ptype_get_idx(loff_t pos)
3841 {
3842 	struct packet_type *pt = NULL;
3843 	loff_t i = 0;
3844 	int t;
3845 
3846 	list_for_each_entry_rcu(pt, &ptype_all, list) {
3847 		if (i == pos)
3848 			return pt;
3849 		++i;
3850 	}
3851 
3852 	for (t = 0; t < PTYPE_HASH_SIZE; t++) {
3853 		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
3854 			if (i == pos)
3855 				return pt;
3856 			++i;
3857 		}
3858 	}
3859 	return NULL;
3860 }
3861 
3862 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
3863 	__acquires(RCU)
3864 {
3865 	rcu_read_lock();
3866 	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
3867 }
3868 
3869 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3870 {
3871 	struct packet_type *pt;
3872 	struct list_head *nxt;
3873 	int hash;
3874 
3875 	++*pos;
3876 	if (v == SEQ_START_TOKEN)
3877 		return ptype_get_idx(0);
3878 
3879 	pt = v;
3880 	nxt = pt->list.next;
3881 	if (pt->type == htons(ETH_P_ALL)) {
3882 		if (nxt != &ptype_all)
3883 			goto found;
3884 		hash = 0;
3885 		nxt = ptype_base[0].next;
3886 	} else
3887 		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
3888 
3889 	while (nxt == &ptype_base[hash]) {
3890 		if (++hash >= PTYPE_HASH_SIZE)
3891 			return NULL;
3892 		nxt = ptype_base[hash].next;
3893 	}
3894 found:
3895 	return list_entry(nxt, struct packet_type, list);
3896 }
3897 
3898 static void ptype_seq_stop(struct seq_file *seq, void *v)
3899 	__releases(RCU)
3900 {
3901 	rcu_read_unlock();
3902 }
3903 
3904 static int ptype_seq_show(struct seq_file *seq, void *v)
3905 {
3906 	struct packet_type *pt = v;
3907 
3908 	if (v == SEQ_START_TOKEN)
3909 		seq_puts(seq, "Type Device      Function\n");
3910 	else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
3911 		if (pt->type == htons(ETH_P_ALL))
3912 			seq_puts(seq, "ALL ");
3913 		else
3914 			seq_printf(seq, "%04x", ntohs(pt->type));
3915 
3916 		seq_printf(seq, " %-8s %pF\n",
3917 			   pt->dev ? pt->dev->name : "", pt->func);
3918 	}
3919 
3920 	return 0;
3921 }
3922 
3923 static const struct seq_operations ptype_seq_ops = {
3924 	.start = ptype_seq_start,
3925 	.next  = ptype_seq_next,
3926 	.stop  = ptype_seq_stop,
3927 	.show  = ptype_seq_show,
3928 };
3929 
3930 static int ptype_seq_open(struct inode *inode, struct file *file)
3931 {
3932 	return seq_open_net(inode, file, &ptype_seq_ops,
3933 			sizeof(struct seq_net_private));
3934 }
3935 
3936 static const struct file_operations ptype_seq_fops = {
3937 	.owner	 = THIS_MODULE,
3938 	.open    = ptype_seq_open,
3939 	.read    = seq_read,
3940 	.llseek  = seq_lseek,
3941 	.release = seq_release_net,
3942 };
3943 
3944 
3945 static int __net_init dev_proc_net_init(struct net *net)
3946 {
3947 	int rc = -ENOMEM;
3948 
3949 	if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
3950 		goto out;
3951 	if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
3952 		goto out_dev;
3953 	if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
3954 		goto out_softnet;
3955 
3956 	if (wext_proc_init(net))
3957 		goto out_ptype;
3958 	rc = 0;
3959 out:
3960 	return rc;
3961 out_ptype:
3962 	proc_net_remove(net, "ptype");
3963 out_softnet:
3964 	proc_net_remove(net, "softnet_stat");
3965 out_dev:
3966 	proc_net_remove(net, "dev");
3967 	goto out;
3968 }
3969 
3970 static void __net_exit dev_proc_net_exit(struct net *net)
3971 {
3972 	wext_proc_exit(net);
3973 
3974 	proc_net_remove(net, "ptype");
3975 	proc_net_remove(net, "softnet_stat");
3976 	proc_net_remove(net, "dev");
3977 }
3978 
3979 static struct pernet_operations __net_initdata dev_proc_ops = {
3980 	.init = dev_proc_net_init,
3981 	.exit = dev_proc_net_exit,
3982 };
3983 
3984 static int __init dev_proc_init(void)
3985 {
3986 	return register_pernet_subsys(&dev_proc_ops);
3987 }
3988 #else
3989 #define dev_proc_init() 0
3990 #endif	/* CONFIG_PROC_FS */
3991 
3992 
3993 /**
3994  *	netdev_set_master	-	set up master/slave pair
3995  *	@slave: slave device
3996  *	@master: new master device
3997  *
3998  *	Changes the master device of the slave. Pass %NULL to break the
3999  *	bonding. The caller must hold the RTNL semaphore. On a failure
4000  *	a negative errno code is returned. On success the reference counts
4001  *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
4002  *	function returns zero.
4003  */
4004 int netdev_set_master(struct net_device *slave, struct net_device *master)
4005 {
4006 	struct net_device *old = slave->master;
4007 
4008 	ASSERT_RTNL();
4009 
4010 	if (master) {
4011 		if (old)
4012 			return -EBUSY;
4013 		dev_hold(master);
4014 	}
4015 
4016 	slave->master = master;
4017 
4018 	if (old) {
4019 		synchronize_net();
4020 		dev_put(old);
4021 	}
4022 	if (master)
4023 		slave->flags |= IFF_SLAVE;
4024 	else
4025 		slave->flags &= ~IFF_SLAVE;
4026 
4027 	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
4028 	return 0;
4029 }
4030 EXPORT_SYMBOL(netdev_set_master);
4031 
4032 static void dev_change_rx_flags(struct net_device *dev, int flags)
4033 {
4034 	const struct net_device_ops *ops = dev->netdev_ops;
4035 
4036 	if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
4037 		ops->ndo_change_rx_flags(dev, flags);
4038 }
4039 
4040 static int __dev_set_promiscuity(struct net_device *dev, int inc)
4041 {
4042 	unsigned short old_flags = dev->flags;
4043 	uid_t uid;
4044 	gid_t gid;
4045 
4046 	ASSERT_RTNL();
4047 
4048 	dev->flags |= IFF_PROMISC;
4049 	dev->promiscuity += inc;
4050 	if (dev->promiscuity == 0) {
4051 		/*
4052 		 * Avoid overflow.
4053 		 * If inc causes overflow, untouch promisc and return error.
4054 		 */
4055 		if (inc < 0)
4056 			dev->flags &= ~IFF_PROMISC;
4057 		else {
4058 			dev->promiscuity -= inc;
4059 			printk(KERN_WARNING "%s: promiscuity touches roof, "
4060 				"set promiscuity failed, promiscuity feature "
4061 				"of device might be broken.\n", dev->name);
4062 			return -EOVERFLOW;
4063 		}
4064 	}
4065 	if (dev->flags != old_flags) {
4066 		printk(KERN_INFO "device %s %s promiscuous mode\n",
4067 		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
4068 							       "left");
4069 		if (audit_enabled) {
4070 			current_uid_gid(&uid, &gid);
4071 			audit_log(current->audit_context, GFP_ATOMIC,
4072 				AUDIT_ANOM_PROMISCUOUS,
4073 				"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
4074 				dev->name, (dev->flags & IFF_PROMISC),
4075 				(old_flags & IFF_PROMISC),
4076 				audit_get_loginuid(current),
4077 				uid, gid,
4078 				audit_get_sessionid(current));
4079 		}
4080 
4081 		dev_change_rx_flags(dev, IFF_PROMISC);
4082 	}
4083 	return 0;
4084 }
4085 
4086 /**
4087  *	dev_set_promiscuity	- update promiscuity count on a device
4088  *	@dev: device
4089  *	@inc: modifier
4090  *
4091  *	Add or remove promiscuity from a device. While the count in the device
4092  *	remains above zero the interface remains promiscuous. Once it hits zero
4093  *	the device reverts back to normal filtering operation. A negative inc
4094  *	value is used to drop promiscuity on the device.
4095  *	Return 0 if successful or a negative errno code on error.
4096  */
4097 int dev_set_promiscuity(struct net_device *dev, int inc)
4098 {
4099 	unsigned short old_flags = dev->flags;
4100 	int err;
4101 
4102 	err = __dev_set_promiscuity(dev, inc);
4103 	if (err < 0)
4104 		return err;
4105 	if (dev->flags != old_flags)
4106 		dev_set_rx_mode(dev);
4107 	return err;
4108 }
4109 EXPORT_SYMBOL(dev_set_promiscuity);
4110 
4111 /**
4112  *	dev_set_allmulti	- update allmulti count on a device
4113  *	@dev: device
4114  *	@inc: modifier
4115  *
4116  *	Add or remove reception of all multicast frames to a device. While the
4117  *	count in the device remains above zero the interface remains listening
4118  *	to all interfaces. Once it hits zero the device reverts back to normal
4119  *	filtering operation. A negative @inc value is used to drop the counter
4120  *	when releasing a resource needing all multicasts.
4121  *	Return 0 if successful or a negative errno code on error.
4122  */
4123 
4124 int dev_set_allmulti(struct net_device *dev, int inc)
4125 {
4126 	unsigned short old_flags = dev->flags;
4127 
4128 	ASSERT_RTNL();
4129 
4130 	dev->flags |= IFF_ALLMULTI;
4131 	dev->allmulti += inc;
4132 	if (dev->allmulti == 0) {
4133 		/*
4134 		 * Avoid overflow.
4135 		 * If inc causes overflow, untouch allmulti and return error.
4136 		 */
4137 		if (inc < 0)
4138 			dev->flags &= ~IFF_ALLMULTI;
4139 		else {
4140 			dev->allmulti -= inc;
4141 			printk(KERN_WARNING "%s: allmulti touches roof, "
4142 				"set allmulti failed, allmulti feature of "
4143 				"device might be broken.\n", dev->name);
4144 			return -EOVERFLOW;
4145 		}
4146 	}
4147 	if (dev->flags ^ old_flags) {
4148 		dev_change_rx_flags(dev, IFF_ALLMULTI);
4149 		dev_set_rx_mode(dev);
4150 	}
4151 	return 0;
4152 }
4153 EXPORT_SYMBOL(dev_set_allmulti);
4154 
4155 /*
4156  *	Upload unicast and multicast address lists to device and
4157  *	configure RX filtering. When the device doesn't support unicast
4158  *	filtering it is put in promiscuous mode while unicast addresses
4159  *	are present.
4160  */
4161 void __dev_set_rx_mode(struct net_device *dev)
4162 {
4163 	const struct net_device_ops *ops = dev->netdev_ops;
4164 
4165 	/* dev_open will call this function so the list will stay sane. */
4166 	if (!(dev->flags&IFF_UP))
4167 		return;
4168 
4169 	if (!netif_device_present(dev))
4170 		return;
4171 
4172 	if (ops->ndo_set_rx_mode)
4173 		ops->ndo_set_rx_mode(dev);
4174 	else {
4175 		/* Unicast addresses changes may only happen under the rtnl,
4176 		 * therefore calling __dev_set_promiscuity here is safe.
4177 		 */
4178 		if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
4179 			__dev_set_promiscuity(dev, 1);
4180 			dev->uc_promisc = 1;
4181 		} else if (netdev_uc_empty(dev) && dev->uc_promisc) {
4182 			__dev_set_promiscuity(dev, -1);
4183 			dev->uc_promisc = 0;
4184 		}
4185 
4186 		if (ops->ndo_set_multicast_list)
4187 			ops->ndo_set_multicast_list(dev);
4188 	}
4189 }
4190 
4191 void dev_set_rx_mode(struct net_device *dev)
4192 {
4193 	netif_addr_lock_bh(dev);
4194 	__dev_set_rx_mode(dev);
4195 	netif_addr_unlock_bh(dev);
4196 }
4197 
4198 /**
4199  *	dev_get_flags - get flags reported to userspace
4200  *	@dev: device
4201  *
4202  *	Get the combination of flag bits exported through APIs to userspace.
4203  */
4204 unsigned dev_get_flags(const struct net_device *dev)
4205 {
4206 	unsigned flags;
4207 
4208 	flags = (dev->flags & ~(IFF_PROMISC |
4209 				IFF_ALLMULTI |
4210 				IFF_RUNNING |
4211 				IFF_LOWER_UP |
4212 				IFF_DORMANT)) |
4213 		(dev->gflags & (IFF_PROMISC |
4214 				IFF_ALLMULTI));
4215 
4216 	if (netif_running(dev)) {
4217 		if (netif_oper_up(dev))
4218 			flags |= IFF_RUNNING;
4219 		if (netif_carrier_ok(dev))
4220 			flags |= IFF_LOWER_UP;
4221 		if (netif_dormant(dev))
4222 			flags |= IFF_DORMANT;
4223 	}
4224 
4225 	return flags;
4226 }
4227 EXPORT_SYMBOL(dev_get_flags);
4228 
4229 int __dev_change_flags(struct net_device *dev, unsigned int flags)
4230 {
4231 	int old_flags = dev->flags;
4232 	int ret;
4233 
4234 	ASSERT_RTNL();
4235 
4236 	/*
4237 	 *	Set the flags on our device.
4238 	 */
4239 
4240 	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
4241 			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
4242 			       IFF_AUTOMEDIA)) |
4243 		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
4244 				    IFF_ALLMULTI));
4245 
4246 	/*
4247 	 *	Load in the correct multicast list now the flags have changed.
4248 	 */
4249 
4250 	if ((old_flags ^ flags) & IFF_MULTICAST)
4251 		dev_change_rx_flags(dev, IFF_MULTICAST);
4252 
4253 	dev_set_rx_mode(dev);
4254 
4255 	/*
4256 	 *	Have we downed the interface. We handle IFF_UP ourselves
4257 	 *	according to user attempts to set it, rather than blindly
4258 	 *	setting it.
4259 	 */
4260 
4261 	ret = 0;
4262 	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
4263 		ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
4264 
4265 		if (!ret)
4266 			dev_set_rx_mode(dev);
4267 	}
4268 
4269 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
4270 		int inc = (flags & IFF_PROMISC) ? 1 : -1;
4271 
4272 		dev->gflags ^= IFF_PROMISC;
4273 		dev_set_promiscuity(dev, inc);
4274 	}
4275 
4276 	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
4277 	   is important. Some (broken) drivers set IFF_PROMISC, when
4278 	   IFF_ALLMULTI is requested not asking us and not reporting.
4279 	 */
4280 	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
4281 		int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
4282 
4283 		dev->gflags ^= IFF_ALLMULTI;
4284 		dev_set_allmulti(dev, inc);
4285 	}
4286 
4287 	return ret;
4288 }
4289 
4290 void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
4291 {
4292 	unsigned int changes = dev->flags ^ old_flags;
4293 
4294 	if (changes & IFF_UP) {
4295 		if (dev->flags & IFF_UP)
4296 			call_netdevice_notifiers(NETDEV_UP, dev);
4297 		else
4298 			call_netdevice_notifiers(NETDEV_DOWN, dev);
4299 	}
4300 
4301 	if (dev->flags & IFF_UP &&
4302 	    (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE)))
4303 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
4304 }
4305 
4306 /**
4307  *	dev_change_flags - change device settings
4308  *	@dev: device
4309  *	@flags: device state flags
4310  *
4311  *	Change settings on device based state flags. The flags are
4312  *	in the userspace exported format.
4313  */
4314 int dev_change_flags(struct net_device *dev, unsigned flags)
4315 {
4316 	int ret, changes;
4317 	int old_flags = dev->flags;
4318 
4319 	ret = __dev_change_flags(dev, flags);
4320 	if (ret < 0)
4321 		return ret;
4322 
4323 	changes = old_flags ^ dev->flags;
4324 	if (changes)
4325 		rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
4326 
4327 	__dev_notify_flags(dev, old_flags);
4328 	return ret;
4329 }
4330 EXPORT_SYMBOL(dev_change_flags);
4331 
4332 /**
4333  *	dev_set_mtu - Change maximum transfer unit
4334  *	@dev: device
4335  *	@new_mtu: new transfer unit
4336  *
4337  *	Change the maximum transfer size of the network device.
4338  */
4339 int dev_set_mtu(struct net_device *dev, int new_mtu)
4340 {
4341 	const struct net_device_ops *ops = dev->netdev_ops;
4342 	int err;
4343 
4344 	if (new_mtu == dev->mtu)
4345 		return 0;
4346 
4347 	/*	MTU must be positive.	 */
4348 	if (new_mtu < 0)
4349 		return -EINVAL;
4350 
4351 	if (!netif_device_present(dev))
4352 		return -ENODEV;
4353 
4354 	err = 0;
4355 	if (ops->ndo_change_mtu)
4356 		err = ops->ndo_change_mtu(dev, new_mtu);
4357 	else
4358 		dev->mtu = new_mtu;
4359 
4360 	if (!err && dev->flags & IFF_UP)
4361 		call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
4362 	return err;
4363 }
4364 EXPORT_SYMBOL(dev_set_mtu);
4365 
4366 /**
4367  *	dev_set_mac_address - Change Media Access Control Address
4368  *	@dev: device
4369  *	@sa: new address
4370  *
4371  *	Change the hardware (MAC) address of the device
4372  */
4373 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
4374 {
4375 	const struct net_device_ops *ops = dev->netdev_ops;
4376 	int err;
4377 
4378 	if (!ops->ndo_set_mac_address)
4379 		return -EOPNOTSUPP;
4380 	if (sa->sa_family != dev->type)
4381 		return -EINVAL;
4382 	if (!netif_device_present(dev))
4383 		return -ENODEV;
4384 	err = ops->ndo_set_mac_address(dev, sa);
4385 	if (!err)
4386 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4387 	return err;
4388 }
4389 EXPORT_SYMBOL(dev_set_mac_address);
4390 
4391 /*
4392  *	Perform the SIOCxIFxxx calls, inside rcu_read_lock()
4393  */
4394 static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
4395 {
4396 	int err;
4397 	struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
4398 
4399 	if (!dev)
4400 		return -ENODEV;
4401 
4402 	switch (cmd) {
4403 	case SIOCGIFFLAGS:	/* Get interface flags */
4404 		ifr->ifr_flags = (short) dev_get_flags(dev);
4405 		return 0;
4406 
4407 	case SIOCGIFMETRIC:	/* Get the metric on the interface
4408 				   (currently unused) */
4409 		ifr->ifr_metric = 0;
4410 		return 0;
4411 
4412 	case SIOCGIFMTU:	/* Get the MTU of a device */
4413 		ifr->ifr_mtu = dev->mtu;
4414 		return 0;
4415 
4416 	case SIOCGIFHWADDR:
4417 		if (!dev->addr_len)
4418 			memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
4419 		else
4420 			memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
4421 			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4422 		ifr->ifr_hwaddr.sa_family = dev->type;
4423 		return 0;
4424 
4425 	case SIOCGIFSLAVE:
4426 		err = -EINVAL;
4427 		break;
4428 
4429 	case SIOCGIFMAP:
4430 		ifr->ifr_map.mem_start = dev->mem_start;
4431 		ifr->ifr_map.mem_end   = dev->mem_end;
4432 		ifr->ifr_map.base_addr = dev->base_addr;
4433 		ifr->ifr_map.irq       = dev->irq;
4434 		ifr->ifr_map.dma       = dev->dma;
4435 		ifr->ifr_map.port      = dev->if_port;
4436 		return 0;
4437 
4438 	case SIOCGIFINDEX:
4439 		ifr->ifr_ifindex = dev->ifindex;
4440 		return 0;
4441 
4442 	case SIOCGIFTXQLEN:
4443 		ifr->ifr_qlen = dev->tx_queue_len;
4444 		return 0;
4445 
4446 	default:
4447 		/* dev_ioctl() should ensure this case
4448 		 * is never reached
4449 		 */
4450 		WARN_ON(1);
4451 		err = -EINVAL;
4452 		break;
4453 
4454 	}
4455 	return err;
4456 }
4457 
4458 /*
4459  *	Perform the SIOCxIFxxx calls, inside rtnl_lock()
4460  */
4461 static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4462 {
4463 	int err;
4464 	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
4465 	const struct net_device_ops *ops;
4466 
4467 	if (!dev)
4468 		return -ENODEV;
4469 
4470 	ops = dev->netdev_ops;
4471 
4472 	switch (cmd) {
4473 	case SIOCSIFFLAGS:	/* Set interface flags */
4474 		return dev_change_flags(dev, ifr->ifr_flags);
4475 
4476 	case SIOCSIFMETRIC:	/* Set the metric on the interface
4477 				   (currently unused) */
4478 		return -EOPNOTSUPP;
4479 
4480 	case SIOCSIFMTU:	/* Set the MTU of a device */
4481 		return dev_set_mtu(dev, ifr->ifr_mtu);
4482 
4483 	case SIOCSIFHWADDR:
4484 		return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
4485 
4486 	case SIOCSIFHWBROADCAST:
4487 		if (ifr->ifr_hwaddr.sa_family != dev->type)
4488 			return -EINVAL;
4489 		memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
4490 		       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4491 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4492 		return 0;
4493 
4494 	case SIOCSIFMAP:
4495 		if (ops->ndo_set_config) {
4496 			if (!netif_device_present(dev))
4497 				return -ENODEV;
4498 			return ops->ndo_set_config(dev, &ifr->ifr_map);
4499 		}
4500 		return -EOPNOTSUPP;
4501 
4502 	case SIOCADDMULTI:
4503 		if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
4504 		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4505 			return -EINVAL;
4506 		if (!netif_device_present(dev))
4507 			return -ENODEV;
4508 		return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
4509 
4510 	case SIOCDELMULTI:
4511 		if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
4512 		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4513 			return -EINVAL;
4514 		if (!netif_device_present(dev))
4515 			return -ENODEV;
4516 		return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
4517 
4518 	case SIOCSIFTXQLEN:
4519 		if (ifr->ifr_qlen < 0)
4520 			return -EINVAL;
4521 		dev->tx_queue_len = ifr->ifr_qlen;
4522 		return 0;
4523 
4524 	case SIOCSIFNAME:
4525 		ifr->ifr_newname[IFNAMSIZ-1] = '\0';
4526 		return dev_change_name(dev, ifr->ifr_newname);
4527 
4528 	/*
4529 	 *	Unknown or private ioctl
4530 	 */
4531 	default:
4532 		if ((cmd >= SIOCDEVPRIVATE &&
4533 		    cmd <= SIOCDEVPRIVATE + 15) ||
4534 		    cmd == SIOCBONDENSLAVE ||
4535 		    cmd == SIOCBONDRELEASE ||
4536 		    cmd == SIOCBONDSETHWADDR ||
4537 		    cmd == SIOCBONDSLAVEINFOQUERY ||
4538 		    cmd == SIOCBONDINFOQUERY ||
4539 		    cmd == SIOCBONDCHANGEACTIVE ||
4540 		    cmd == SIOCGMIIPHY ||
4541 		    cmd == SIOCGMIIREG ||
4542 		    cmd == SIOCSMIIREG ||
4543 		    cmd == SIOCBRADDIF ||
4544 		    cmd == SIOCBRDELIF ||
4545 		    cmd == SIOCSHWTSTAMP ||
4546 		    cmd == SIOCWANDEV) {
4547 			err = -EOPNOTSUPP;
4548 			if (ops->ndo_do_ioctl) {
4549 				if (netif_device_present(dev))
4550 					err = ops->ndo_do_ioctl(dev, ifr, cmd);
4551 				else
4552 					err = -ENODEV;
4553 			}
4554 		} else
4555 			err = -EINVAL;
4556 
4557 	}
4558 	return err;
4559 }
4560 
4561 /*
4562  *	This function handles all "interface"-type I/O control requests. The actual
4563  *	'doing' part of this is dev_ifsioc above.
4564  */
4565 
4566 /**
4567  *	dev_ioctl	-	network device ioctl
4568  *	@net: the applicable net namespace
4569  *	@cmd: command to issue
4570  *	@arg: pointer to a struct ifreq in user space
4571  *
4572  *	Issue ioctl functions to devices. This is normally called by the
4573  *	user space syscall interfaces but can sometimes be useful for
4574  *	other purposes. The return value is the return from the syscall if
4575  *	positive or a negative errno code on error.
4576  */
4577 
4578 int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
4579 {
4580 	struct ifreq ifr;
4581 	int ret;
4582 	char *colon;
4583 
4584 	/* One special case: SIOCGIFCONF takes ifconf argument
4585 	   and requires shared lock, because it sleeps writing
4586 	   to user space.
4587 	 */
4588 
4589 	if (cmd == SIOCGIFCONF) {
4590 		rtnl_lock();
4591 		ret = dev_ifconf(net, (char __user *) arg);
4592 		rtnl_unlock();
4593 		return ret;
4594 	}
4595 	if (cmd == SIOCGIFNAME)
4596 		return dev_ifname(net, (struct ifreq __user *)arg);
4597 
4598 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
4599 		return -EFAULT;
4600 
4601 	ifr.ifr_name[IFNAMSIZ-1] = 0;
4602 
4603 	colon = strchr(ifr.ifr_name, ':');
4604 	if (colon)
4605 		*colon = 0;
4606 
4607 	/*
4608 	 *	See which interface the caller is talking about.
4609 	 */
4610 
4611 	switch (cmd) {
4612 	/*
4613 	 *	These ioctl calls:
4614 	 *	- can be done by all.
4615 	 *	- atomic and do not require locking.
4616 	 *	- return a value
4617 	 */
4618 	case SIOCGIFFLAGS:
4619 	case SIOCGIFMETRIC:
4620 	case SIOCGIFMTU:
4621 	case SIOCGIFHWADDR:
4622 	case SIOCGIFSLAVE:
4623 	case SIOCGIFMAP:
4624 	case SIOCGIFINDEX:
4625 	case SIOCGIFTXQLEN:
4626 		dev_load(net, ifr.ifr_name);
4627 		rcu_read_lock();
4628 		ret = dev_ifsioc_locked(net, &ifr, cmd);
4629 		rcu_read_unlock();
4630 		if (!ret) {
4631 			if (colon)
4632 				*colon = ':';
4633 			if (copy_to_user(arg, &ifr,
4634 					 sizeof(struct ifreq)))
4635 				ret = -EFAULT;
4636 		}
4637 		return ret;
4638 
4639 	case SIOCETHTOOL:
4640 		dev_load(net, ifr.ifr_name);
4641 		rtnl_lock();
4642 		ret = dev_ethtool(net, &ifr);
4643 		rtnl_unlock();
4644 		if (!ret) {
4645 			if (colon)
4646 				*colon = ':';
4647 			if (copy_to_user(arg, &ifr,
4648 					 sizeof(struct ifreq)))
4649 				ret = -EFAULT;
4650 		}
4651 		return ret;
4652 
4653 	/*
4654 	 *	These ioctl calls:
4655 	 *	- require superuser power.
4656 	 *	- require strict serialization.
4657 	 *	- return a value
4658 	 */
4659 	case SIOCGMIIPHY:
4660 	case SIOCGMIIREG:
4661 	case SIOCSIFNAME:
4662 		if (!capable(CAP_NET_ADMIN))
4663 			return -EPERM;
4664 		dev_load(net, ifr.ifr_name);
4665 		rtnl_lock();
4666 		ret = dev_ifsioc(net, &ifr, cmd);
4667 		rtnl_unlock();
4668 		if (!ret) {
4669 			if (colon)
4670 				*colon = ':';
4671 			if (copy_to_user(arg, &ifr,
4672 					 sizeof(struct ifreq)))
4673 				ret = -EFAULT;
4674 		}
4675 		return ret;
4676 
4677 	/*
4678 	 *	These ioctl calls:
4679 	 *	- require superuser power.
4680 	 *	- require strict serialization.
4681 	 *	- do not return a value
4682 	 */
4683 	case SIOCSIFFLAGS:
4684 	case SIOCSIFMETRIC:
4685 	case SIOCSIFMTU:
4686 	case SIOCSIFMAP:
4687 	case SIOCSIFHWADDR:
4688 	case SIOCSIFSLAVE:
4689 	case SIOCADDMULTI:
4690 	case SIOCDELMULTI:
4691 	case SIOCSIFHWBROADCAST:
4692 	case SIOCSIFTXQLEN:
4693 	case SIOCSMIIREG:
4694 	case SIOCBONDENSLAVE:
4695 	case SIOCBONDRELEASE:
4696 	case SIOCBONDSETHWADDR:
4697 	case SIOCBONDCHANGEACTIVE:
4698 	case SIOCBRADDIF:
4699 	case SIOCBRDELIF:
4700 	case SIOCSHWTSTAMP:
4701 		if (!capable(CAP_NET_ADMIN))
4702 			return -EPERM;
4703 		/* fall through */
4704 	case SIOCBONDSLAVEINFOQUERY:
4705 	case SIOCBONDINFOQUERY:
4706 		dev_load(net, ifr.ifr_name);
4707 		rtnl_lock();
4708 		ret = dev_ifsioc(net, &ifr, cmd);
4709 		rtnl_unlock();
4710 		return ret;
4711 
4712 	case SIOCGIFMEM:
4713 		/* Get the per device memory space. We can add this but
4714 		 * currently do not support it */
4715 	case SIOCSIFMEM:
4716 		/* Set the per device memory buffer space.
4717 		 * Not applicable in our case */
4718 	case SIOCSIFLINK:
4719 		return -EINVAL;
4720 
4721 	/*
4722 	 *	Unknown or private ioctl.
4723 	 */
4724 	default:
4725 		if (cmd == SIOCWANDEV ||
4726 		    (cmd >= SIOCDEVPRIVATE &&
4727 		     cmd <= SIOCDEVPRIVATE + 15)) {
4728 			dev_load(net, ifr.ifr_name);
4729 			rtnl_lock();
4730 			ret = dev_ifsioc(net, &ifr, cmd);
4731 			rtnl_unlock();
4732 			if (!ret && copy_to_user(arg, &ifr,
4733 						 sizeof(struct ifreq)))
4734 				ret = -EFAULT;
4735 			return ret;
4736 		}
4737 		/* Take care of Wireless Extensions */
4738 		if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
4739 			return wext_handle_ioctl(net, &ifr, cmd, arg);
4740 		return -EINVAL;
4741 	}
4742 }
4743 
4744 
4745 /**
4746  *	dev_new_index	-	allocate an ifindex
4747  *	@net: the applicable net namespace
4748  *
4749  *	Returns a suitable unique value for a new device interface
4750  *	number.  The caller must hold the rtnl semaphore or the
4751  *	dev_base_lock to be sure it remains unique.
4752  */
4753 static int dev_new_index(struct net *net)
4754 {
4755 	static int ifindex;
4756 	for (;;) {
4757 		if (++ifindex <= 0)
4758 			ifindex = 1;
4759 		if (!__dev_get_by_index(net, ifindex))
4760 			return ifindex;
4761 	}
4762 }
4763 
4764 /* Delayed registration/unregisteration */
4765 static LIST_HEAD(net_todo_list);
4766 
4767 static void net_set_todo(struct net_device *dev)
4768 {
4769 	list_add_tail(&dev->todo_list, &net_todo_list);
4770 }
4771 
4772 static void rollback_registered_many(struct list_head *head)
4773 {
4774 	struct net_device *dev, *tmp;
4775 
4776 	BUG_ON(dev_boot_phase);
4777 	ASSERT_RTNL();
4778 
4779 	list_for_each_entry_safe(dev, tmp, head, unreg_list) {
4780 		/* Some devices call without registering
4781 		 * for initialization unwind. Remove those
4782 		 * devices and proceed with the remaining.
4783 		 */
4784 		if (dev->reg_state == NETREG_UNINITIALIZED) {
4785 			pr_debug("unregister_netdevice: device %s/%p never "
4786 				 "was registered\n", dev->name, dev);
4787 
4788 			WARN_ON(1);
4789 			list_del(&dev->unreg_list);
4790 			continue;
4791 		}
4792 
4793 		BUG_ON(dev->reg_state != NETREG_REGISTERED);
4794 
4795 		/* If device is running, close it first. */
4796 		dev_close(dev);
4797 
4798 		/* And unlink it from device chain. */
4799 		unlist_netdevice(dev);
4800 
4801 		dev->reg_state = NETREG_UNREGISTERING;
4802 	}
4803 
4804 	synchronize_net();
4805 
4806 	list_for_each_entry(dev, head, unreg_list) {
4807 		/* Shutdown queueing discipline. */
4808 		dev_shutdown(dev);
4809 
4810 
4811 		/* Notify protocols, that we are about to destroy
4812 		   this device. They should clean all the things.
4813 		*/
4814 		call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4815 
4816 		if (!dev->rtnl_link_ops ||
4817 		    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
4818 			rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
4819 
4820 		/*
4821 		 *	Flush the unicast and multicast chains
4822 		 */
4823 		dev_uc_flush(dev);
4824 		dev_mc_flush(dev);
4825 
4826 		if (dev->netdev_ops->ndo_uninit)
4827 			dev->netdev_ops->ndo_uninit(dev);
4828 
4829 		/* Notifier chain MUST detach us from master device. */
4830 		WARN_ON(dev->master);
4831 
4832 		/* Remove entries from kobject tree */
4833 		netdev_unregister_kobject(dev);
4834 	}
4835 
4836 	/* Process any work delayed until the end of the batch */
4837 	dev = list_first_entry(head, struct net_device, unreg_list);
4838 	call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
4839 
4840 	synchronize_net();
4841 
4842 	list_for_each_entry(dev, head, unreg_list)
4843 		dev_put(dev);
4844 }
4845 
4846 static void rollback_registered(struct net_device *dev)
4847 {
4848 	LIST_HEAD(single);
4849 
4850 	list_add(&dev->unreg_list, &single);
4851 	rollback_registered_many(&single);
4852 }
4853 
4854 static void __netdev_init_queue_locks_one(struct net_device *dev,
4855 					  struct netdev_queue *dev_queue,
4856 					  void *_unused)
4857 {
4858 	spin_lock_init(&dev_queue->_xmit_lock);
4859 	netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
4860 	dev_queue->xmit_lock_owner = -1;
4861 }
4862 
4863 static void netdev_init_queue_locks(struct net_device *dev)
4864 {
4865 	netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
4866 	__netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
4867 }
4868 
4869 unsigned long netdev_fix_features(unsigned long features, const char *name)
4870 {
4871 	/* Fix illegal SG+CSUM combinations. */
4872 	if ((features & NETIF_F_SG) &&
4873 	    !(features & NETIF_F_ALL_CSUM)) {
4874 		if (name)
4875 			printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
4876 			       "checksum feature.\n", name);
4877 		features &= ~NETIF_F_SG;
4878 	}
4879 
4880 	/* TSO requires that SG is present as well. */
4881 	if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
4882 		if (name)
4883 			printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
4884 			       "SG feature.\n", name);
4885 		features &= ~NETIF_F_TSO;
4886 	}
4887 
4888 	if (features & NETIF_F_UFO) {
4889 		if (!(features & NETIF_F_GEN_CSUM)) {
4890 			if (name)
4891 				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4892 				       "since no NETIF_F_HW_CSUM feature.\n",
4893 				       name);
4894 			features &= ~NETIF_F_UFO;
4895 		}
4896 
4897 		if (!(features & NETIF_F_SG)) {
4898 			if (name)
4899 				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4900 				       "since no NETIF_F_SG feature.\n", name);
4901 			features &= ~NETIF_F_UFO;
4902 		}
4903 	}
4904 
4905 	return features;
4906 }
4907 EXPORT_SYMBOL(netdev_fix_features);
4908 
4909 /**
4910  *	netif_stacked_transfer_operstate -	transfer operstate
4911  *	@rootdev: the root or lower level device to transfer state from
4912  *	@dev: the device to transfer operstate to
4913  *
4914  *	Transfer operational state from root to device. This is normally
4915  *	called when a stacking relationship exists between the root
4916  *	device and the device(a leaf device).
4917  */
4918 void netif_stacked_transfer_operstate(const struct net_device *rootdev,
4919 					struct net_device *dev)
4920 {
4921 	if (rootdev->operstate == IF_OPER_DORMANT)
4922 		netif_dormant_on(dev);
4923 	else
4924 		netif_dormant_off(dev);
4925 
4926 	if (netif_carrier_ok(rootdev)) {
4927 		if (!netif_carrier_ok(dev))
4928 			netif_carrier_on(dev);
4929 	} else {
4930 		if (netif_carrier_ok(dev))
4931 			netif_carrier_off(dev);
4932 	}
4933 }
4934 EXPORT_SYMBOL(netif_stacked_transfer_operstate);
4935 
4936 /**
4937  *	register_netdevice	- register a network device
4938  *	@dev: device to register
4939  *
4940  *	Take a completed network device structure and add it to the kernel
4941  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4942  *	chain. 0 is returned on success. A negative errno code is returned
4943  *	on a failure to set up the device, or if the name is a duplicate.
4944  *
4945  *	Callers must hold the rtnl semaphore. You may want
4946  *	register_netdev() instead of this.
4947  *
4948  *	BUGS:
4949  *	The locking appears insufficient to guarantee two parallel registers
4950  *	will not get the same name.
4951  */
4952 
4953 int register_netdevice(struct net_device *dev)
4954 {
4955 	int ret;
4956 	struct net *net = dev_net(dev);
4957 
4958 	BUG_ON(dev_boot_phase);
4959 	ASSERT_RTNL();
4960 
4961 	might_sleep();
4962 
4963 	/* When net_device's are persistent, this will be fatal. */
4964 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
4965 	BUG_ON(!net);
4966 
4967 	spin_lock_init(&dev->addr_list_lock);
4968 	netdev_set_addr_lockdep_class(dev);
4969 	netdev_init_queue_locks(dev);
4970 
4971 	dev->iflink = -1;
4972 
4973 #ifdef CONFIG_RPS
4974 	if (!dev->num_rx_queues) {
4975 		/*
4976 		 * Allocate a single RX queue if driver never called
4977 		 * alloc_netdev_mq
4978 		 */
4979 
4980 		dev->_rx = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL);
4981 		if (!dev->_rx) {
4982 			ret = -ENOMEM;
4983 			goto out;
4984 		}
4985 
4986 		dev->_rx->first = dev->_rx;
4987 		atomic_set(&dev->_rx->count, 1);
4988 		dev->num_rx_queues = 1;
4989 	}
4990 #endif
4991 	/* Init, if this function is available */
4992 	if (dev->netdev_ops->ndo_init) {
4993 		ret = dev->netdev_ops->ndo_init(dev);
4994 		if (ret) {
4995 			if (ret > 0)
4996 				ret = -EIO;
4997 			goto out;
4998 		}
4999 	}
5000 
5001 	ret = dev_get_valid_name(dev, dev->name, 0);
5002 	if (ret)
5003 		goto err_uninit;
5004 
5005 	dev->ifindex = dev_new_index(net);
5006 	if (dev->iflink == -1)
5007 		dev->iflink = dev->ifindex;
5008 
5009 	/* Fix illegal checksum combinations */
5010 	if ((dev->features & NETIF_F_HW_CSUM) &&
5011 	    (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5012 		printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
5013 		       dev->name);
5014 		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
5015 	}
5016 
5017 	if ((dev->features & NETIF_F_NO_CSUM) &&
5018 	    (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5019 		printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
5020 		       dev->name);
5021 		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
5022 	}
5023 
5024 	dev->features = netdev_fix_features(dev->features, dev->name);
5025 
5026 	/* Enable software GSO if SG is supported. */
5027 	if (dev->features & NETIF_F_SG)
5028 		dev->features |= NETIF_F_GSO;
5029 
5030 	ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5031 	ret = notifier_to_errno(ret);
5032 	if (ret)
5033 		goto err_uninit;
5034 
5035 	ret = netdev_register_kobject(dev);
5036 	if (ret)
5037 		goto err_uninit;
5038 	dev->reg_state = NETREG_REGISTERED;
5039 
5040 	/*
5041 	 *	Default initial state at registry is that the
5042 	 *	device is present.
5043 	 */
5044 
5045 	set_bit(__LINK_STATE_PRESENT, &dev->state);
5046 
5047 	dev_init_scheduler(dev);
5048 	dev_hold(dev);
5049 	list_netdevice(dev);
5050 
5051 	/* Notify protocols, that a new device appeared. */
5052 	ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
5053 	ret = notifier_to_errno(ret);
5054 	if (ret) {
5055 		rollback_registered(dev);
5056 		dev->reg_state = NETREG_UNREGISTERED;
5057 	}
5058 	/*
5059 	 *	Prevent userspace races by waiting until the network
5060 	 *	device is fully setup before sending notifications.
5061 	 */
5062 	if (!dev->rtnl_link_ops ||
5063 	    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5064 		rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
5065 
5066 out:
5067 	return ret;
5068 
5069 err_uninit:
5070 	if (dev->netdev_ops->ndo_uninit)
5071 		dev->netdev_ops->ndo_uninit(dev);
5072 	goto out;
5073 }
5074 EXPORT_SYMBOL(register_netdevice);
5075 
5076 /**
5077  *	init_dummy_netdev	- init a dummy network device for NAPI
5078  *	@dev: device to init
5079  *
5080  *	This takes a network device structure and initialize the minimum
5081  *	amount of fields so it can be used to schedule NAPI polls without
5082  *	registering a full blown interface. This is to be used by drivers
5083  *	that need to tie several hardware interfaces to a single NAPI
5084  *	poll scheduler due to HW limitations.
5085  */
5086 int init_dummy_netdev(struct net_device *dev)
5087 {
5088 	/* Clear everything. Note we don't initialize spinlocks
5089 	 * are they aren't supposed to be taken by any of the
5090 	 * NAPI code and this dummy netdev is supposed to be
5091 	 * only ever used for NAPI polls
5092 	 */
5093 	memset(dev, 0, sizeof(struct net_device));
5094 
5095 	/* make sure we BUG if trying to hit standard
5096 	 * register/unregister code path
5097 	 */
5098 	dev->reg_state = NETREG_DUMMY;
5099 
5100 	/* initialize the ref count */
5101 	atomic_set(&dev->refcnt, 1);
5102 
5103 	/* NAPI wants this */
5104 	INIT_LIST_HEAD(&dev->napi_list);
5105 
5106 	/* a dummy interface is started by default */
5107 	set_bit(__LINK_STATE_PRESENT, &dev->state);
5108 	set_bit(__LINK_STATE_START, &dev->state);
5109 
5110 	return 0;
5111 }
5112 EXPORT_SYMBOL_GPL(init_dummy_netdev);
5113 
5114 
5115 /**
5116  *	register_netdev	- register a network device
5117  *	@dev: device to register
5118  *
5119  *	Take a completed network device structure and add it to the kernel
5120  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
5121  *	chain. 0 is returned on success. A negative errno code is returned
5122  *	on a failure to set up the device, or if the name is a duplicate.
5123  *
5124  *	This is a wrapper around register_netdevice that takes the rtnl semaphore
5125  *	and expands the device name if you passed a format string to
5126  *	alloc_netdev.
5127  */
5128 int register_netdev(struct net_device *dev)
5129 {
5130 	int err;
5131 
5132 	rtnl_lock();
5133 
5134 	/*
5135 	 * If the name is a format string the caller wants us to do a
5136 	 * name allocation.
5137 	 */
5138 	if (strchr(dev->name, '%')) {
5139 		err = dev_alloc_name(dev, dev->name);
5140 		if (err < 0)
5141 			goto out;
5142 	}
5143 
5144 	err = register_netdevice(dev);
5145 out:
5146 	rtnl_unlock();
5147 	return err;
5148 }
5149 EXPORT_SYMBOL(register_netdev);
5150 
5151 /*
5152  * netdev_wait_allrefs - wait until all references are gone.
5153  *
5154  * This is called when unregistering network devices.
5155  *
5156  * Any protocol or device that holds a reference should register
5157  * for netdevice notification, and cleanup and put back the
5158  * reference if they receive an UNREGISTER event.
5159  * We can get stuck here if buggy protocols don't correctly
5160  * call dev_put.
5161  */
5162 static void netdev_wait_allrefs(struct net_device *dev)
5163 {
5164 	unsigned long rebroadcast_time, warning_time;
5165 
5166 	linkwatch_forget_dev(dev);
5167 
5168 	rebroadcast_time = warning_time = jiffies;
5169 	while (atomic_read(&dev->refcnt) != 0) {
5170 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
5171 			rtnl_lock();
5172 
5173 			/* Rebroadcast unregister notification */
5174 			call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5175 			/* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users
5176 			 * should have already handle it the first time */
5177 
5178 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
5179 				     &dev->state)) {
5180 				/* We must not have linkwatch events
5181 				 * pending on unregister. If this
5182 				 * happens, we simply run the queue
5183 				 * unscheduled, resulting in a noop
5184 				 * for this device.
5185 				 */
5186 				linkwatch_run_queue();
5187 			}
5188 
5189 			__rtnl_unlock();
5190 
5191 			rebroadcast_time = jiffies;
5192 		}
5193 
5194 		msleep(250);
5195 
5196 		if (time_after(jiffies, warning_time + 10 * HZ)) {
5197 			printk(KERN_EMERG "unregister_netdevice: "
5198 			       "waiting for %s to become free. Usage "
5199 			       "count = %d\n",
5200 			       dev->name, atomic_read(&dev->refcnt));
5201 			warning_time = jiffies;
5202 		}
5203 	}
5204 }
5205 
5206 /* The sequence is:
5207  *
5208  *	rtnl_lock();
5209  *	...
5210  *	register_netdevice(x1);
5211  *	register_netdevice(x2);
5212  *	...
5213  *	unregister_netdevice(y1);
5214  *	unregister_netdevice(y2);
5215  *      ...
5216  *	rtnl_unlock();
5217  *	free_netdev(y1);
5218  *	free_netdev(y2);
5219  *
5220  * We are invoked by rtnl_unlock().
5221  * This allows us to deal with problems:
5222  * 1) We can delete sysfs objects which invoke hotplug
5223  *    without deadlocking with linkwatch via keventd.
5224  * 2) Since we run with the RTNL semaphore not held, we can sleep
5225  *    safely in order to wait for the netdev refcnt to drop to zero.
5226  *
5227  * We must not return until all unregister events added during
5228  * the interval the lock was held have been completed.
5229  */
5230 void netdev_run_todo(void)
5231 {
5232 	struct list_head list;
5233 
5234 	/* Snapshot list, allow later requests */
5235 	list_replace_init(&net_todo_list, &list);
5236 
5237 	__rtnl_unlock();
5238 
5239 	while (!list_empty(&list)) {
5240 		struct net_device *dev
5241 			= list_first_entry(&list, struct net_device, todo_list);
5242 		list_del(&dev->todo_list);
5243 
5244 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
5245 			printk(KERN_ERR "network todo '%s' but state %d\n",
5246 			       dev->name, dev->reg_state);
5247 			dump_stack();
5248 			continue;
5249 		}
5250 
5251 		dev->reg_state = NETREG_UNREGISTERED;
5252 
5253 		on_each_cpu(flush_backlog, dev, 1);
5254 
5255 		netdev_wait_allrefs(dev);
5256 
5257 		/* paranoia */
5258 		BUG_ON(atomic_read(&dev->refcnt));
5259 		WARN_ON(dev->ip_ptr);
5260 		WARN_ON(dev->ip6_ptr);
5261 		WARN_ON(dev->dn_ptr);
5262 
5263 		if (dev->destructor)
5264 			dev->destructor(dev);
5265 
5266 		/* Free network device */
5267 		kobject_put(&dev->dev.kobj);
5268 	}
5269 }
5270 
5271 /**
5272  *	dev_txq_stats_fold - fold tx_queues stats
5273  *	@dev: device to get statistics from
5274  *	@stats: struct net_device_stats to hold results
5275  */
5276 void dev_txq_stats_fold(const struct net_device *dev,
5277 			struct net_device_stats *stats)
5278 {
5279 	unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
5280 	unsigned int i;
5281 	struct netdev_queue *txq;
5282 
5283 	for (i = 0; i < dev->num_tx_queues; i++) {
5284 		txq = netdev_get_tx_queue(dev, i);
5285 		tx_bytes   += txq->tx_bytes;
5286 		tx_packets += txq->tx_packets;
5287 		tx_dropped += txq->tx_dropped;
5288 	}
5289 	if (tx_bytes || tx_packets || tx_dropped) {
5290 		stats->tx_bytes   = tx_bytes;
5291 		stats->tx_packets = tx_packets;
5292 		stats->tx_dropped = tx_dropped;
5293 	}
5294 }
5295 EXPORT_SYMBOL(dev_txq_stats_fold);
5296 
5297 /**
5298  *	dev_get_stats	- get network device statistics
5299  *	@dev: device to get statistics from
5300  *
5301  *	Get network statistics from device. The device driver may provide
5302  *	its own method by setting dev->netdev_ops->get_stats; otherwise
5303  *	the internal statistics structure is used.
5304  */
5305 const struct net_device_stats *dev_get_stats(struct net_device *dev)
5306 {
5307 	const struct net_device_ops *ops = dev->netdev_ops;
5308 
5309 	if (ops->ndo_get_stats)
5310 		return ops->ndo_get_stats(dev);
5311 
5312 	dev_txq_stats_fold(dev, &dev->stats);
5313 	return &dev->stats;
5314 }
5315 EXPORT_SYMBOL(dev_get_stats);
5316 
5317 static void netdev_init_one_queue(struct net_device *dev,
5318 				  struct netdev_queue *queue,
5319 				  void *_unused)
5320 {
5321 	queue->dev = dev;
5322 }
5323 
5324 static void netdev_init_queues(struct net_device *dev)
5325 {
5326 	netdev_init_one_queue(dev, &dev->rx_queue, NULL);
5327 	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
5328 	spin_lock_init(&dev->tx_global_lock);
5329 }
5330 
5331 /**
5332  *	alloc_netdev_mq - allocate network device
5333  *	@sizeof_priv:	size of private data to allocate space for
5334  *	@name:		device name format string
5335  *	@setup:		callback to initialize device
5336  *	@queue_count:	the number of subqueues to allocate
5337  *
5338  *	Allocates a struct net_device with private data area for driver use
5339  *	and performs basic initialization.  Also allocates subquue structs
5340  *	for each queue on the device at the end of the netdevice.
5341  */
5342 struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5343 		void (*setup)(struct net_device *), unsigned int queue_count)
5344 {
5345 	struct netdev_queue *tx;
5346 	struct net_device *dev;
5347 	size_t alloc_size;
5348 	struct net_device *p;
5349 #ifdef CONFIG_RPS
5350 	struct netdev_rx_queue *rx;
5351 	int i;
5352 #endif
5353 
5354 	BUG_ON(strlen(name) >= sizeof(dev->name));
5355 
5356 	alloc_size = sizeof(struct net_device);
5357 	if (sizeof_priv) {
5358 		/* ensure 32-byte alignment of private area */
5359 		alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
5360 		alloc_size += sizeof_priv;
5361 	}
5362 	/* ensure 32-byte alignment of whole construct */
5363 	alloc_size += NETDEV_ALIGN - 1;
5364 
5365 	p = kzalloc(alloc_size, GFP_KERNEL);
5366 	if (!p) {
5367 		printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
5368 		return NULL;
5369 	}
5370 
5371 	tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
5372 	if (!tx) {
5373 		printk(KERN_ERR "alloc_netdev: Unable to allocate "
5374 		       "tx qdiscs.\n");
5375 		goto free_p;
5376 	}
5377 
5378 #ifdef CONFIG_RPS
5379 	rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
5380 	if (!rx) {
5381 		printk(KERN_ERR "alloc_netdev: Unable to allocate "
5382 		       "rx queues.\n");
5383 		goto free_tx;
5384 	}
5385 
5386 	atomic_set(&rx->count, queue_count);
5387 
5388 	/*
5389 	 * Set a pointer to first element in the array which holds the
5390 	 * reference count.
5391 	 */
5392 	for (i = 0; i < queue_count; i++)
5393 		rx[i].first = rx;
5394 #endif
5395 
5396 	dev = PTR_ALIGN(p, NETDEV_ALIGN);
5397 	dev->padded = (char *)dev - (char *)p;
5398 
5399 	if (dev_addr_init(dev))
5400 		goto free_rx;
5401 
5402 	dev_mc_init(dev);
5403 	dev_uc_init(dev);
5404 
5405 	dev_net_set(dev, &init_net);
5406 
5407 	dev->_tx = tx;
5408 	dev->num_tx_queues = queue_count;
5409 	dev->real_num_tx_queues = queue_count;
5410 
5411 #ifdef CONFIG_RPS
5412 	dev->_rx = rx;
5413 	dev->num_rx_queues = queue_count;
5414 #endif
5415 
5416 	dev->gso_max_size = GSO_MAX_SIZE;
5417 
5418 	netdev_init_queues(dev);
5419 
5420 	INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list);
5421 	dev->ethtool_ntuple_list.count = 0;
5422 	INIT_LIST_HEAD(&dev->napi_list);
5423 	INIT_LIST_HEAD(&dev->unreg_list);
5424 	INIT_LIST_HEAD(&dev->link_watch_list);
5425 	dev->priv_flags = IFF_XMIT_DST_RELEASE;
5426 	setup(dev);
5427 	strcpy(dev->name, name);
5428 	return dev;
5429 
5430 free_rx:
5431 #ifdef CONFIG_RPS
5432 	kfree(rx);
5433 free_tx:
5434 #endif
5435 	kfree(tx);
5436 free_p:
5437 	kfree(p);
5438 	return NULL;
5439 }
5440 EXPORT_SYMBOL(alloc_netdev_mq);
5441 
5442 /**
5443  *	free_netdev - free network device
5444  *	@dev: device
5445  *
5446  *	This function does the last stage of destroying an allocated device
5447  * 	interface. The reference to the device object is released.
5448  *	If this is the last reference then it will be freed.
5449  */
5450 void free_netdev(struct net_device *dev)
5451 {
5452 	struct napi_struct *p, *n;
5453 
5454 	release_net(dev_net(dev));
5455 
5456 	kfree(dev->_tx);
5457 
5458 	/* Flush device addresses */
5459 	dev_addr_flush(dev);
5460 
5461 	/* Clear ethtool n-tuple list */
5462 	ethtool_ntuple_flush(dev);
5463 
5464 	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
5465 		netif_napi_del(p);
5466 
5467 	/*  Compatibility with error handling in drivers */
5468 	if (dev->reg_state == NETREG_UNINITIALIZED) {
5469 		kfree((char *)dev - dev->padded);
5470 		return;
5471 	}
5472 
5473 	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
5474 	dev->reg_state = NETREG_RELEASED;
5475 
5476 	/* will free via device release */
5477 	put_device(&dev->dev);
5478 }
5479 EXPORT_SYMBOL(free_netdev);
5480 
5481 /**
5482  *	synchronize_net -  Synchronize with packet receive processing
5483  *
5484  *	Wait for packets currently being received to be done.
5485  *	Does not block later packets from starting.
5486  */
5487 void synchronize_net(void)
5488 {
5489 	might_sleep();
5490 	synchronize_rcu();
5491 }
5492 EXPORT_SYMBOL(synchronize_net);
5493 
5494 /**
5495  *	unregister_netdevice_queue - remove device from the kernel
5496  *	@dev: device
5497  *	@head: list
5498  *
5499  *	This function shuts down a device interface and removes it
5500  *	from the kernel tables.
5501  *	If head not NULL, device is queued to be unregistered later.
5502  *
5503  *	Callers must hold the rtnl semaphore.  You may want
5504  *	unregister_netdev() instead of this.
5505  */
5506 
5507 void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
5508 {
5509 	ASSERT_RTNL();
5510 
5511 	if (head) {
5512 		list_move_tail(&dev->unreg_list, head);
5513 	} else {
5514 		rollback_registered(dev);
5515 		/* Finish processing unregister after unlock */
5516 		net_set_todo(dev);
5517 	}
5518 }
5519 EXPORT_SYMBOL(unregister_netdevice_queue);
5520 
5521 /**
5522  *	unregister_netdevice_many - unregister many devices
5523  *	@head: list of devices
5524  */
5525 void unregister_netdevice_many(struct list_head *head)
5526 {
5527 	struct net_device *dev;
5528 
5529 	if (!list_empty(head)) {
5530 		rollback_registered_many(head);
5531 		list_for_each_entry(dev, head, unreg_list)
5532 			net_set_todo(dev);
5533 	}
5534 }
5535 EXPORT_SYMBOL(unregister_netdevice_many);
5536 
5537 /**
5538  *	unregister_netdev - remove device from the kernel
5539  *	@dev: device
5540  *
5541  *	This function shuts down a device interface and removes it
5542  *	from the kernel tables.
5543  *
5544  *	This is just a wrapper for unregister_netdevice that takes
5545  *	the rtnl semaphore.  In general you want to use this and not
5546  *	unregister_netdevice.
5547  */
5548 void unregister_netdev(struct net_device *dev)
5549 {
5550 	rtnl_lock();
5551 	unregister_netdevice(dev);
5552 	rtnl_unlock();
5553 }
5554 EXPORT_SYMBOL(unregister_netdev);
5555 
5556 /**
5557  *	dev_change_net_namespace - move device to different nethost namespace
5558  *	@dev: device
5559  *	@net: network namespace
5560  *	@pat: If not NULL name pattern to try if the current device name
5561  *	      is already taken in the destination network namespace.
5562  *
5563  *	This function shuts down a device interface and moves it
5564  *	to a new network namespace. On success 0 is returned, on
5565  *	a failure a netagive errno code is returned.
5566  *
5567  *	Callers must hold the rtnl semaphore.
5568  */
5569 
5570 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
5571 {
5572 	int err;
5573 
5574 	ASSERT_RTNL();
5575 
5576 	/* Don't allow namespace local devices to be moved. */
5577 	err = -EINVAL;
5578 	if (dev->features & NETIF_F_NETNS_LOCAL)
5579 		goto out;
5580 
5581 	/* Ensure the device has been registrered */
5582 	err = -EINVAL;
5583 	if (dev->reg_state != NETREG_REGISTERED)
5584 		goto out;
5585 
5586 	/* Get out if there is nothing todo */
5587 	err = 0;
5588 	if (net_eq(dev_net(dev), net))
5589 		goto out;
5590 
5591 	/* Pick the destination device name, and ensure
5592 	 * we can use it in the destination network namespace.
5593 	 */
5594 	err = -EEXIST;
5595 	if (__dev_get_by_name(net, dev->name)) {
5596 		/* We get here if we can't use the current device name */
5597 		if (!pat)
5598 			goto out;
5599 		if (dev_get_valid_name(dev, pat, 1))
5600 			goto out;
5601 	}
5602 
5603 	/*
5604 	 * And now a mini version of register_netdevice unregister_netdevice.
5605 	 */
5606 
5607 	/* If device is running close it first. */
5608 	dev_close(dev);
5609 
5610 	/* And unlink it from device chain */
5611 	err = -ENODEV;
5612 	unlist_netdevice(dev);
5613 
5614 	synchronize_net();
5615 
5616 	/* Shutdown queueing discipline. */
5617 	dev_shutdown(dev);
5618 
5619 	/* Notify protocols, that we are about to destroy
5620 	   this device. They should clean all the things.
5621 	*/
5622 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5623 	call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
5624 
5625 	/*
5626 	 *	Flush the unicast and multicast chains
5627 	 */
5628 	dev_uc_flush(dev);
5629 	dev_mc_flush(dev);
5630 
5631 	/* Actually switch the network namespace */
5632 	dev_net_set(dev, net);
5633 
5634 	/* If there is an ifindex conflict assign a new one */
5635 	if (__dev_get_by_index(net, dev->ifindex)) {
5636 		int iflink = (dev->iflink == dev->ifindex);
5637 		dev->ifindex = dev_new_index(net);
5638 		if (iflink)
5639 			dev->iflink = dev->ifindex;
5640 	}
5641 
5642 	/* Fixup kobjects */
5643 	err = device_rename(&dev->dev, dev->name);
5644 	WARN_ON(err);
5645 
5646 	/* Add the device back in the hashes */
5647 	list_netdevice(dev);
5648 
5649 	/* Notify protocols, that a new device appeared. */
5650 	call_netdevice_notifiers(NETDEV_REGISTER, dev);
5651 
5652 	/*
5653 	 *	Prevent userspace races by waiting until the network
5654 	 *	device is fully setup before sending notifications.
5655 	 */
5656 	rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
5657 
5658 	synchronize_net();
5659 	err = 0;
5660 out:
5661 	return err;
5662 }
5663 EXPORT_SYMBOL_GPL(dev_change_net_namespace);
5664 
5665 static int dev_cpu_callback(struct notifier_block *nfb,
5666 			    unsigned long action,
5667 			    void *ocpu)
5668 {
5669 	struct sk_buff **list_skb;
5670 	struct sk_buff *skb;
5671 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
5672 	struct softnet_data *sd, *oldsd;
5673 
5674 	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
5675 		return NOTIFY_OK;
5676 
5677 	local_irq_disable();
5678 	cpu = smp_processor_id();
5679 	sd = &per_cpu(softnet_data, cpu);
5680 	oldsd = &per_cpu(softnet_data, oldcpu);
5681 
5682 	/* Find end of our completion_queue. */
5683 	list_skb = &sd->completion_queue;
5684 	while (*list_skb)
5685 		list_skb = &(*list_skb)->next;
5686 	/* Append completion queue from offline CPU. */
5687 	*list_skb = oldsd->completion_queue;
5688 	oldsd->completion_queue = NULL;
5689 
5690 	/* Append output queue from offline CPU. */
5691 	if (oldsd->output_queue) {
5692 		*sd->output_queue_tailp = oldsd->output_queue;
5693 		sd->output_queue_tailp = oldsd->output_queue_tailp;
5694 		oldsd->output_queue = NULL;
5695 		oldsd->output_queue_tailp = &oldsd->output_queue;
5696 	}
5697 
5698 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
5699 	local_irq_enable();
5700 
5701 	/* Process offline CPU's input_pkt_queue */
5702 	while ((skb = __skb_dequeue(&oldsd->process_queue))) {
5703 		netif_rx(skb);
5704 		input_queue_head_incr(oldsd);
5705 	}
5706 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
5707 		netif_rx(skb);
5708 		input_queue_head_incr(oldsd);
5709 	}
5710 
5711 	return NOTIFY_OK;
5712 }
5713 
5714 
5715 /**
5716  *	netdev_increment_features - increment feature set by one
5717  *	@all: current feature set
5718  *	@one: new feature set
5719  *	@mask: mask feature set
5720  *
5721  *	Computes a new feature set after adding a device with feature set
5722  *	@one to the master device with current feature set @all.  Will not
5723  *	enable anything that is off in @mask. Returns the new feature set.
5724  */
5725 unsigned long netdev_increment_features(unsigned long all, unsigned long one,
5726 					unsigned long mask)
5727 {
5728 	/* If device needs checksumming, downgrade to it. */
5729 	if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
5730 		all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
5731 	else if (mask & NETIF_F_ALL_CSUM) {
5732 		/* If one device supports v4/v6 checksumming, set for all. */
5733 		if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
5734 		    !(all & NETIF_F_GEN_CSUM)) {
5735 			all &= ~NETIF_F_ALL_CSUM;
5736 			all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
5737 		}
5738 
5739 		/* If one device supports hw checksumming, set for all. */
5740 		if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
5741 			all &= ~NETIF_F_ALL_CSUM;
5742 			all |= NETIF_F_HW_CSUM;
5743 		}
5744 	}
5745 
5746 	one |= NETIF_F_ALL_CSUM;
5747 
5748 	one |= all & NETIF_F_ONE_FOR_ALL;
5749 	all &= one | NETIF_F_LLTX | NETIF_F_GSO | NETIF_F_UFO;
5750 	all |= one & mask & NETIF_F_ONE_FOR_ALL;
5751 
5752 	return all;
5753 }
5754 EXPORT_SYMBOL(netdev_increment_features);
5755 
5756 static struct hlist_head *netdev_create_hash(void)
5757 {
5758 	int i;
5759 	struct hlist_head *hash;
5760 
5761 	hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
5762 	if (hash != NULL)
5763 		for (i = 0; i < NETDEV_HASHENTRIES; i++)
5764 			INIT_HLIST_HEAD(&hash[i]);
5765 
5766 	return hash;
5767 }
5768 
5769 /* Initialize per network namespace state */
5770 static int __net_init netdev_init(struct net *net)
5771 {
5772 	INIT_LIST_HEAD(&net->dev_base_head);
5773 
5774 	net->dev_name_head = netdev_create_hash();
5775 	if (net->dev_name_head == NULL)
5776 		goto err_name;
5777 
5778 	net->dev_index_head = netdev_create_hash();
5779 	if (net->dev_index_head == NULL)
5780 		goto err_idx;
5781 
5782 	return 0;
5783 
5784 err_idx:
5785 	kfree(net->dev_name_head);
5786 err_name:
5787 	return -ENOMEM;
5788 }
5789 
5790 /**
5791  *	netdev_drivername - network driver for the device
5792  *	@dev: network device
5793  *	@buffer: buffer for resulting name
5794  *	@len: size of buffer
5795  *
5796  *	Determine network driver for device.
5797  */
5798 char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
5799 {
5800 	const struct device_driver *driver;
5801 	const struct device *parent;
5802 
5803 	if (len <= 0 || !buffer)
5804 		return buffer;
5805 	buffer[0] = 0;
5806 
5807 	parent = dev->dev.parent;
5808 
5809 	if (!parent)
5810 		return buffer;
5811 
5812 	driver = parent->driver;
5813 	if (driver && driver->name)
5814 		strlcpy(buffer, driver->name, len);
5815 	return buffer;
5816 }
5817 
5818 static void __net_exit netdev_exit(struct net *net)
5819 {
5820 	kfree(net->dev_name_head);
5821 	kfree(net->dev_index_head);
5822 }
5823 
5824 static struct pernet_operations __net_initdata netdev_net_ops = {
5825 	.init = netdev_init,
5826 	.exit = netdev_exit,
5827 };
5828 
5829 static void __net_exit default_device_exit(struct net *net)
5830 {
5831 	struct net_device *dev, *aux;
5832 	/*
5833 	 * Push all migratable network devices back to the
5834 	 * initial network namespace
5835 	 */
5836 	rtnl_lock();
5837 	for_each_netdev_safe(net, dev, aux) {
5838 		int err;
5839 		char fb_name[IFNAMSIZ];
5840 
5841 		/* Ignore unmoveable devices (i.e. loopback) */
5842 		if (dev->features & NETIF_F_NETNS_LOCAL)
5843 			continue;
5844 
5845 		/* Leave virtual devices for the generic cleanup */
5846 		if (dev->rtnl_link_ops)
5847 			continue;
5848 
5849 		/* Push remaing network devices to init_net */
5850 		snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
5851 		err = dev_change_net_namespace(dev, &init_net, fb_name);
5852 		if (err) {
5853 			printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
5854 				__func__, dev->name, err);
5855 			BUG();
5856 		}
5857 	}
5858 	rtnl_unlock();
5859 }
5860 
5861 static void __net_exit default_device_exit_batch(struct list_head *net_list)
5862 {
5863 	/* At exit all network devices most be removed from a network
5864 	 * namespace.  Do this in the reverse order of registeration.
5865 	 * Do this across as many network namespaces as possible to
5866 	 * improve batching efficiency.
5867 	 */
5868 	struct net_device *dev;
5869 	struct net *net;
5870 	LIST_HEAD(dev_kill_list);
5871 
5872 	rtnl_lock();
5873 	list_for_each_entry(net, net_list, exit_list) {
5874 		for_each_netdev_reverse(net, dev) {
5875 			if (dev->rtnl_link_ops)
5876 				dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
5877 			else
5878 				unregister_netdevice_queue(dev, &dev_kill_list);
5879 		}
5880 	}
5881 	unregister_netdevice_many(&dev_kill_list);
5882 	rtnl_unlock();
5883 }
5884 
5885 static struct pernet_operations __net_initdata default_device_ops = {
5886 	.exit = default_device_exit,
5887 	.exit_batch = default_device_exit_batch,
5888 };
5889 
5890 /*
5891  *	Initialize the DEV module. At boot time this walks the device list and
5892  *	unhooks any devices that fail to initialise (normally hardware not
5893  *	present) and leaves us with a valid list of present and active devices.
5894  *
5895  */
5896 
5897 /*
5898  *       This is called single threaded during boot, so no need
5899  *       to take the rtnl semaphore.
5900  */
5901 static int __init net_dev_init(void)
5902 {
5903 	int i, rc = -ENOMEM;
5904 
5905 	BUG_ON(!dev_boot_phase);
5906 
5907 	if (dev_proc_init())
5908 		goto out;
5909 
5910 	if (netdev_kobject_init())
5911 		goto out;
5912 
5913 	INIT_LIST_HEAD(&ptype_all);
5914 	for (i = 0; i < PTYPE_HASH_SIZE; i++)
5915 		INIT_LIST_HEAD(&ptype_base[i]);
5916 
5917 	if (register_pernet_subsys(&netdev_net_ops))
5918 		goto out;
5919 
5920 	/*
5921 	 *	Initialise the packet receive queues.
5922 	 */
5923 
5924 	for_each_possible_cpu(i) {
5925 		struct softnet_data *sd = &per_cpu(softnet_data, i);
5926 
5927 		memset(sd, 0, sizeof(*sd));
5928 		skb_queue_head_init(&sd->input_pkt_queue);
5929 		skb_queue_head_init(&sd->process_queue);
5930 		sd->completion_queue = NULL;
5931 		INIT_LIST_HEAD(&sd->poll_list);
5932 		sd->output_queue = NULL;
5933 		sd->output_queue_tailp = &sd->output_queue;
5934 #ifdef CONFIG_RPS
5935 		sd->csd.func = rps_trigger_softirq;
5936 		sd->csd.info = sd;
5937 		sd->csd.flags = 0;
5938 		sd->cpu = i;
5939 #endif
5940 
5941 		sd->backlog.poll = process_backlog;
5942 		sd->backlog.weight = weight_p;
5943 		sd->backlog.gro_list = NULL;
5944 		sd->backlog.gro_count = 0;
5945 	}
5946 
5947 	dev_boot_phase = 0;
5948 
5949 	/* The loopback device is special if any other network devices
5950 	 * is present in a network namespace the loopback device must
5951 	 * be present. Since we now dynamically allocate and free the
5952 	 * loopback device ensure this invariant is maintained by
5953 	 * keeping the loopback device as the first device on the
5954 	 * list of network devices.  Ensuring the loopback devices
5955 	 * is the first device that appears and the last network device
5956 	 * that disappears.
5957 	 */
5958 	if (register_pernet_device(&loopback_net_ops))
5959 		goto out;
5960 
5961 	if (register_pernet_device(&default_device_ops))
5962 		goto out;
5963 
5964 	open_softirq(NET_TX_SOFTIRQ, net_tx_action);
5965 	open_softirq(NET_RX_SOFTIRQ, net_rx_action);
5966 
5967 	hotcpu_notifier(dev_cpu_callback, 0);
5968 	dst_init();
5969 	dev_mcast_init();
5970 	rc = 0;
5971 out:
5972 	return rc;
5973 }
5974 
5975 subsys_initcall(net_dev_init);
5976 
5977 static int __init initialize_hashrnd(void)
5978 {
5979 	get_random_bytes(&hashrnd, sizeof(hashrnd));
5980 	return 0;
5981 }
5982 
5983 late_initcall_sync(initialize_hashrnd);
5984 
5985