xref: /linux/net/core/dev.c (revision 2dbf708448c836754d25fe6108c5bfe1f5697c95)
1 /*
2  * 	NET3	Protocol independent device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the non IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Florian la Roche <rzsfl@rz.uni-sb.de>
16  *		Alan Cox <gw4pts@gw4pts.ampr.org>
17  *		David Hinds <dahinds@users.sourceforge.net>
18  *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19  *		Adam Sulmicki <adam@cfar.umd.edu>
20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
21  *
22  *	Changes:
23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
24  *              			to 2 if register_netdev gets called
25  *              			before net_dev_init & also removed a
26  *              			few lines of code in the process.
27  *		Alan Cox	:	device private ioctl copies fields back.
28  *		Alan Cox	:	Transmit queue code does relevant
29  *					stunts to keep the queue safe.
30  *		Alan Cox	:	Fixed double lock.
31  *		Alan Cox	:	Fixed promisc NULL pointer trap
32  *		????????	:	Support the full private ioctl range
33  *		Alan Cox	:	Moved ioctl permission check into
34  *					drivers
35  *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
36  *		Alan Cox	:	100 backlog just doesn't cut it when
37  *					you start doing multicast video 8)
38  *		Alan Cox	:	Rewrote net_bh and list manager.
39  *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
40  *		Alan Cox	:	Took out transmit every packet pass
41  *					Saved a few bytes in the ioctl handler
42  *		Alan Cox	:	Network driver sets packet type before
43  *					calling netif_rx. Saves a function
44  *					call a packet.
45  *		Alan Cox	:	Hashed net_bh()
46  *		Richard Kooijman:	Timestamp fixes.
47  *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
48  *		Alan Cox	:	Device lock protection.
49  *		Alan Cox	: 	Fixed nasty side effect of device close
50  *					changes.
51  *		Rudi Cilibrasi	:	Pass the right thing to
52  *					set_mac_address()
53  *		Dave Miller	:	32bit quantity for the device lock to
54  *					make it work out on a Sparc.
55  *		Bjorn Ekwall	:	Added KERNELD hack.
56  *		Alan Cox	:	Cleaned up the backlog initialise.
57  *		Craig Metz	:	SIOCGIFCONF fix if space for under
58  *					1 device.
59  *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
60  *					is no device open function.
61  *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
62  *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
63  *		Cyrus Durgin	:	Cleaned for KMOD
64  *		Adam Sulmicki   :	Bug Fix : Network Device Unload
65  *					A network device unload needs to purge
66  *					the backlog queue.
67  *	Paul Rusty Russell	:	SIOCSIFNAME
68  *              Pekka Riikonen  :	Netdev boot-time settings code
69  *              Andrew Morton   :       Make unregister_netdevice wait
70  *              			indefinitely on dev->refcnt
71  * 		J Hadi Salim	:	- Backlog queue sampling
72  *				        - netif_rx() feedback
73  */
74 
75 #include <asm/uaccess.h>
76 #include <linux/bitops.h>
77 #include <linux/capability.h>
78 #include <linux/cpu.h>
79 #include <linux/types.h>
80 #include <linux/kernel.h>
81 #include <linux/hash.h>
82 #include <linux/slab.h>
83 #include <linux/sched.h>
84 #include <linux/mutex.h>
85 #include <linux/string.h>
86 #include <linux/mm.h>
87 #include <linux/socket.h>
88 #include <linux/sockios.h>
89 #include <linux/errno.h>
90 #include <linux/interrupt.h>
91 #include <linux/if_ether.h>
92 #include <linux/netdevice.h>
93 #include <linux/etherdevice.h>
94 #include <linux/ethtool.h>
95 #include <linux/notifier.h>
96 #include <linux/skbuff.h>
97 #include <net/net_namespace.h>
98 #include <net/sock.h>
99 #include <linux/rtnetlink.h>
100 #include <linux/proc_fs.h>
101 #include <linux/seq_file.h>
102 #include <linux/stat.h>
103 #include <net/dst.h>
104 #include <net/pkt_sched.h>
105 #include <net/checksum.h>
106 #include <net/xfrm.h>
107 #include <linux/highmem.h>
108 #include <linux/init.h>
109 #include <linux/kmod.h>
110 #include <linux/module.h>
111 #include <linux/netpoll.h>
112 #include <linux/rcupdate.h>
113 #include <linux/delay.h>
114 #include <net/wext.h>
115 #include <net/iw_handler.h>
116 #include <asm/current.h>
117 #include <linux/audit.h>
118 #include <linux/dmaengine.h>
119 #include <linux/err.h>
120 #include <linux/ctype.h>
121 #include <linux/if_arp.h>
122 #include <linux/if_vlan.h>
123 #include <linux/ip.h>
124 #include <net/ip.h>
125 #include <linux/ipv6.h>
126 #include <linux/in.h>
127 #include <linux/jhash.h>
128 #include <linux/random.h>
129 #include <trace/events/napi.h>
130 #include <trace/events/net.h>
131 #include <trace/events/skb.h>
132 #include <linux/pci.h>
133 #include <linux/inetdevice.h>
134 #include <linux/cpu_rmap.h>
135 #include <linux/net_tstamp.h>
136 #include <linux/static_key.h>
137 #include <net/flow_keys.h>
138 
139 #include "net-sysfs.h"
140 
141 /* Instead of increasing this, you should create a hash table. */
142 #define MAX_GRO_SKBS 8
143 
144 /* This should be increased if a protocol with a bigger head is added. */
145 #define GRO_MAX_HEAD (MAX_HEADER + 128)
146 
147 /*
148  *	The list of packet types we will receive (as opposed to discard)
149  *	and the routines to invoke.
150  *
151  *	Why 16. Because with 16 the only overlap we get on a hash of the
152  *	low nibble of the protocol value is RARP/SNAP/X.25.
153  *
154  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
155  *             sure which should go first, but I bet it won't make much
156  *             difference if we are running VLANs.  The good news is that
157  *             this protocol won't be in the list unless compiled in, so
158  *             the average user (w/out VLANs) will not be adversely affected.
159  *             --BLG
160  *
161  *		0800	IP
162  *		8100    802.1Q VLAN
163  *		0001	802.3
164  *		0002	AX.25
165  *		0004	802.2
166  *		8035	RARP
167  *		0005	SNAP
168  *		0805	X.25
169  *		0806	ARP
170  *		8137	IPX
171  *		0009	Localtalk
172  *		86DD	IPv6
173  */
174 
175 #define PTYPE_HASH_SIZE	(16)
176 #define PTYPE_HASH_MASK	(PTYPE_HASH_SIZE - 1)
177 
178 static DEFINE_SPINLOCK(ptype_lock);
179 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
180 static struct list_head ptype_all __read_mostly;	/* Taps */
181 
182 /*
183  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
184  * semaphore.
185  *
186  * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
187  *
188  * Writers must hold the rtnl semaphore while they loop through the
189  * dev_base_head list, and hold dev_base_lock for writing when they do the
190  * actual updates.  This allows pure readers to access the list even
191  * while a writer is preparing to update it.
192  *
193  * To put it another way, dev_base_lock is held for writing only to
194  * protect against pure readers; the rtnl semaphore provides the
195  * protection against other writers.
196  *
197  * See, for example usages, register_netdevice() and
198  * unregister_netdevice(), which must be called with the rtnl
199  * semaphore held.
200  */
201 DEFINE_RWLOCK(dev_base_lock);
202 EXPORT_SYMBOL(dev_base_lock);
203 
204 static inline void dev_base_seq_inc(struct net *net)
205 {
206 	while (++net->dev_base_seq == 0);
207 }
208 
209 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
210 {
211 	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
212 	return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
213 }
214 
215 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
216 {
217 	return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
218 }
219 
220 static inline void rps_lock(struct softnet_data *sd)
221 {
222 #ifdef CONFIG_RPS
223 	spin_lock(&sd->input_pkt_queue.lock);
224 #endif
225 }
226 
227 static inline void rps_unlock(struct softnet_data *sd)
228 {
229 #ifdef CONFIG_RPS
230 	spin_unlock(&sd->input_pkt_queue.lock);
231 #endif
232 }
233 
234 /* Device list insertion */
235 static int list_netdevice(struct net_device *dev)
236 {
237 	struct net *net = dev_net(dev);
238 
239 	ASSERT_RTNL();
240 
241 	write_lock_bh(&dev_base_lock);
242 	list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
243 	hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
244 	hlist_add_head_rcu(&dev->index_hlist,
245 			   dev_index_hash(net, dev->ifindex));
246 	write_unlock_bh(&dev_base_lock);
247 
248 	dev_base_seq_inc(net);
249 
250 	return 0;
251 }
252 
253 /* Device list removal
254  * caller must respect a RCU grace period before freeing/reusing dev
255  */
256 static void unlist_netdevice(struct net_device *dev)
257 {
258 	ASSERT_RTNL();
259 
260 	/* Unlink dev from the device chain */
261 	write_lock_bh(&dev_base_lock);
262 	list_del_rcu(&dev->dev_list);
263 	hlist_del_rcu(&dev->name_hlist);
264 	hlist_del_rcu(&dev->index_hlist);
265 	write_unlock_bh(&dev_base_lock);
266 
267 	dev_base_seq_inc(dev_net(dev));
268 }
269 
270 /*
271  *	Our notifier list
272  */
273 
274 static RAW_NOTIFIER_HEAD(netdev_chain);
275 
276 /*
277  *	Device drivers call our routines to queue packets here. We empty the
278  *	queue in the local softnet handler.
279  */
280 
281 DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
282 EXPORT_PER_CPU_SYMBOL(softnet_data);
283 
284 #ifdef CONFIG_LOCKDEP
285 /*
286  * register_netdevice() inits txq->_xmit_lock and sets lockdep class
287  * according to dev->type
288  */
289 static const unsigned short netdev_lock_type[] =
290 	{ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
291 	 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
292 	 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
293 	 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
294 	 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
295 	 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
296 	 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
297 	 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
298 	 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
299 	 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
300 	 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
301 	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
302 	 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
303 	 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
304 	 ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154,
305 	 ARPHRD_VOID, ARPHRD_NONE};
306 
307 static const char *const netdev_lock_name[] =
308 	{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
309 	 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
310 	 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
311 	 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
312 	 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
313 	 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
314 	 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
315 	 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
316 	 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
317 	 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
318 	 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
319 	 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
320 	 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
321 	 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
322 	 "_xmit_PHONET_PIPE", "_xmit_IEEE802154",
323 	 "_xmit_VOID", "_xmit_NONE"};
324 
325 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
326 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
327 
328 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
329 {
330 	int i;
331 
332 	for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
333 		if (netdev_lock_type[i] == dev_type)
334 			return i;
335 	/* the last key is used by default */
336 	return ARRAY_SIZE(netdev_lock_type) - 1;
337 }
338 
339 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
340 						 unsigned short dev_type)
341 {
342 	int i;
343 
344 	i = netdev_lock_pos(dev_type);
345 	lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
346 				   netdev_lock_name[i]);
347 }
348 
349 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
350 {
351 	int i;
352 
353 	i = netdev_lock_pos(dev->type);
354 	lockdep_set_class_and_name(&dev->addr_list_lock,
355 				   &netdev_addr_lock_key[i],
356 				   netdev_lock_name[i]);
357 }
358 #else
359 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
360 						 unsigned short dev_type)
361 {
362 }
363 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
364 {
365 }
366 #endif
367 
368 /*******************************************************************************
369 
370 		Protocol management and registration routines
371 
372 *******************************************************************************/
373 
374 /*
375  *	Add a protocol ID to the list. Now that the input handler is
376  *	smarter we can dispense with all the messy stuff that used to be
377  *	here.
378  *
379  *	BEWARE!!! Protocol handlers, mangling input packets,
380  *	MUST BE last in hash buckets and checking protocol handlers
381  *	MUST start from promiscuous ptype_all chain in net_bh.
382  *	It is true now, do not change it.
383  *	Explanation follows: if protocol handler, mangling packet, will
384  *	be the first on list, it is not able to sense, that packet
385  *	is cloned and should be copied-on-write, so that it will
386  *	change it and subsequent readers will get broken packet.
387  *							--ANK (980803)
388  */
389 
390 static inline struct list_head *ptype_head(const struct packet_type *pt)
391 {
392 	if (pt->type == htons(ETH_P_ALL))
393 		return &ptype_all;
394 	else
395 		return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
396 }
397 
398 /**
399  *	dev_add_pack - add packet handler
400  *	@pt: packet type declaration
401  *
402  *	Add a protocol handler to the networking stack. The passed &packet_type
403  *	is linked into kernel lists and may not be freed until it has been
404  *	removed from the kernel lists.
405  *
406  *	This call does not sleep therefore it can not
407  *	guarantee all CPU's that are in middle of receiving packets
408  *	will see the new packet type (until the next received packet).
409  */
410 
411 void dev_add_pack(struct packet_type *pt)
412 {
413 	struct list_head *head = ptype_head(pt);
414 
415 	spin_lock(&ptype_lock);
416 	list_add_rcu(&pt->list, head);
417 	spin_unlock(&ptype_lock);
418 }
419 EXPORT_SYMBOL(dev_add_pack);
420 
421 /**
422  *	__dev_remove_pack	 - remove packet handler
423  *	@pt: packet type declaration
424  *
425  *	Remove a protocol handler that was previously added to the kernel
426  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
427  *	from the kernel lists and can be freed or reused once this function
428  *	returns.
429  *
430  *      The packet type might still be in use by receivers
431  *	and must not be freed until after all the CPU's have gone
432  *	through a quiescent state.
433  */
434 void __dev_remove_pack(struct packet_type *pt)
435 {
436 	struct list_head *head = ptype_head(pt);
437 	struct packet_type *pt1;
438 
439 	spin_lock(&ptype_lock);
440 
441 	list_for_each_entry(pt1, head, list) {
442 		if (pt == pt1) {
443 			list_del_rcu(&pt->list);
444 			goto out;
445 		}
446 	}
447 
448 	pr_warn("dev_remove_pack: %p not found\n", pt);
449 out:
450 	spin_unlock(&ptype_lock);
451 }
452 EXPORT_SYMBOL(__dev_remove_pack);
453 
454 /**
455  *	dev_remove_pack	 - remove packet handler
456  *	@pt: packet type declaration
457  *
458  *	Remove a protocol handler that was previously added to the kernel
459  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
460  *	from the kernel lists and can be freed or reused once this function
461  *	returns.
462  *
463  *	This call sleeps to guarantee that no CPU is looking at the packet
464  *	type after return.
465  */
466 void dev_remove_pack(struct packet_type *pt)
467 {
468 	__dev_remove_pack(pt);
469 
470 	synchronize_net();
471 }
472 EXPORT_SYMBOL(dev_remove_pack);
473 
474 /******************************************************************************
475 
476 		      Device Boot-time Settings Routines
477 
478 *******************************************************************************/
479 
480 /* Boot time configuration table */
481 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
482 
483 /**
484  *	netdev_boot_setup_add	- add new setup entry
485  *	@name: name of the device
486  *	@map: configured settings for the device
487  *
488  *	Adds new setup entry to the dev_boot_setup list.  The function
489  *	returns 0 on error and 1 on success.  This is a generic routine to
490  *	all netdevices.
491  */
492 static int netdev_boot_setup_add(char *name, struct ifmap *map)
493 {
494 	struct netdev_boot_setup *s;
495 	int i;
496 
497 	s = dev_boot_setup;
498 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
499 		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
500 			memset(s[i].name, 0, sizeof(s[i].name));
501 			strlcpy(s[i].name, name, IFNAMSIZ);
502 			memcpy(&s[i].map, map, sizeof(s[i].map));
503 			break;
504 		}
505 	}
506 
507 	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
508 }
509 
510 /**
511  *	netdev_boot_setup_check	- check boot time settings
512  *	@dev: the netdevice
513  *
514  * 	Check boot time settings for the device.
515  *	The found settings are set for the device to be used
516  *	later in the device probing.
517  *	Returns 0 if no settings found, 1 if they are.
518  */
519 int netdev_boot_setup_check(struct net_device *dev)
520 {
521 	struct netdev_boot_setup *s = dev_boot_setup;
522 	int i;
523 
524 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
525 		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
526 		    !strcmp(dev->name, s[i].name)) {
527 			dev->irq 	= s[i].map.irq;
528 			dev->base_addr 	= s[i].map.base_addr;
529 			dev->mem_start 	= s[i].map.mem_start;
530 			dev->mem_end 	= s[i].map.mem_end;
531 			return 1;
532 		}
533 	}
534 	return 0;
535 }
536 EXPORT_SYMBOL(netdev_boot_setup_check);
537 
538 
539 /**
540  *	netdev_boot_base	- get address from boot time settings
541  *	@prefix: prefix for network device
542  *	@unit: id for network device
543  *
544  * 	Check boot time settings for the base address of device.
545  *	The found settings are set for the device to be used
546  *	later in the device probing.
547  *	Returns 0 if no settings found.
548  */
549 unsigned long netdev_boot_base(const char *prefix, int unit)
550 {
551 	const struct netdev_boot_setup *s = dev_boot_setup;
552 	char name[IFNAMSIZ];
553 	int i;
554 
555 	sprintf(name, "%s%d", prefix, unit);
556 
557 	/*
558 	 * If device already registered then return base of 1
559 	 * to indicate not to probe for this interface
560 	 */
561 	if (__dev_get_by_name(&init_net, name))
562 		return 1;
563 
564 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
565 		if (!strcmp(name, s[i].name))
566 			return s[i].map.base_addr;
567 	return 0;
568 }
569 
570 /*
571  * Saves at boot time configured settings for any netdevice.
572  */
573 int __init netdev_boot_setup(char *str)
574 {
575 	int ints[5];
576 	struct ifmap map;
577 
578 	str = get_options(str, ARRAY_SIZE(ints), ints);
579 	if (!str || !*str)
580 		return 0;
581 
582 	/* Save settings */
583 	memset(&map, 0, sizeof(map));
584 	if (ints[0] > 0)
585 		map.irq = ints[1];
586 	if (ints[0] > 1)
587 		map.base_addr = ints[2];
588 	if (ints[0] > 2)
589 		map.mem_start = ints[3];
590 	if (ints[0] > 3)
591 		map.mem_end = ints[4];
592 
593 	/* Add new entry to the list */
594 	return netdev_boot_setup_add(str, &map);
595 }
596 
597 __setup("netdev=", netdev_boot_setup);
598 
599 /*******************************************************************************
600 
601 			    Device Interface Subroutines
602 
603 *******************************************************************************/
604 
605 /**
606  *	__dev_get_by_name	- find a device by its name
607  *	@net: the applicable net namespace
608  *	@name: name to find
609  *
610  *	Find an interface by name. Must be called under RTNL semaphore
611  *	or @dev_base_lock. If the name is found a pointer to the device
612  *	is returned. If the name is not found then %NULL is returned. The
613  *	reference counters are not incremented so the caller must be
614  *	careful with locks.
615  */
616 
617 struct net_device *__dev_get_by_name(struct net *net, const char *name)
618 {
619 	struct hlist_node *p;
620 	struct net_device *dev;
621 	struct hlist_head *head = dev_name_hash(net, name);
622 
623 	hlist_for_each_entry(dev, p, head, name_hlist)
624 		if (!strncmp(dev->name, name, IFNAMSIZ))
625 			return dev;
626 
627 	return NULL;
628 }
629 EXPORT_SYMBOL(__dev_get_by_name);
630 
631 /**
632  *	dev_get_by_name_rcu	- find a device by its name
633  *	@net: the applicable net namespace
634  *	@name: name to find
635  *
636  *	Find an interface by name.
637  *	If the name is found a pointer to the device is returned.
638  * 	If the name is not found then %NULL is returned.
639  *	The reference counters are not incremented so the caller must be
640  *	careful with locks. The caller must hold RCU lock.
641  */
642 
643 struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
644 {
645 	struct hlist_node *p;
646 	struct net_device *dev;
647 	struct hlist_head *head = dev_name_hash(net, name);
648 
649 	hlist_for_each_entry_rcu(dev, p, head, name_hlist)
650 		if (!strncmp(dev->name, name, IFNAMSIZ))
651 			return dev;
652 
653 	return NULL;
654 }
655 EXPORT_SYMBOL(dev_get_by_name_rcu);
656 
657 /**
658  *	dev_get_by_name		- find a device by its name
659  *	@net: the applicable net namespace
660  *	@name: name to find
661  *
662  *	Find an interface by name. This can be called from any
663  *	context and does its own locking. The returned handle has
664  *	the usage count incremented and the caller must use dev_put() to
665  *	release it when it is no longer needed. %NULL is returned if no
666  *	matching device is found.
667  */
668 
669 struct net_device *dev_get_by_name(struct net *net, const char *name)
670 {
671 	struct net_device *dev;
672 
673 	rcu_read_lock();
674 	dev = dev_get_by_name_rcu(net, name);
675 	if (dev)
676 		dev_hold(dev);
677 	rcu_read_unlock();
678 	return dev;
679 }
680 EXPORT_SYMBOL(dev_get_by_name);
681 
682 /**
683  *	__dev_get_by_index - find a device by its ifindex
684  *	@net: the applicable net namespace
685  *	@ifindex: index of device
686  *
687  *	Search for an interface by index. Returns %NULL if the device
688  *	is not found or a pointer to the device. The device has not
689  *	had its reference counter increased so the caller must be careful
690  *	about locking. The caller must hold either the RTNL semaphore
691  *	or @dev_base_lock.
692  */
693 
694 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
695 {
696 	struct hlist_node *p;
697 	struct net_device *dev;
698 	struct hlist_head *head = dev_index_hash(net, ifindex);
699 
700 	hlist_for_each_entry(dev, p, head, index_hlist)
701 		if (dev->ifindex == ifindex)
702 			return dev;
703 
704 	return NULL;
705 }
706 EXPORT_SYMBOL(__dev_get_by_index);
707 
708 /**
709  *	dev_get_by_index_rcu - find a device by its ifindex
710  *	@net: the applicable net namespace
711  *	@ifindex: index of device
712  *
713  *	Search for an interface by index. Returns %NULL if the device
714  *	is not found or a pointer to the device. The device has not
715  *	had its reference counter increased so the caller must be careful
716  *	about locking. The caller must hold RCU lock.
717  */
718 
719 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
720 {
721 	struct hlist_node *p;
722 	struct net_device *dev;
723 	struct hlist_head *head = dev_index_hash(net, ifindex);
724 
725 	hlist_for_each_entry_rcu(dev, p, head, index_hlist)
726 		if (dev->ifindex == ifindex)
727 			return dev;
728 
729 	return NULL;
730 }
731 EXPORT_SYMBOL(dev_get_by_index_rcu);
732 
733 
734 /**
735  *	dev_get_by_index - find a device by its ifindex
736  *	@net: the applicable net namespace
737  *	@ifindex: index of device
738  *
739  *	Search for an interface by index. Returns NULL if the device
740  *	is not found or a pointer to the device. The device returned has
741  *	had a reference added and the pointer is safe until the user calls
742  *	dev_put to indicate they have finished with it.
743  */
744 
745 struct net_device *dev_get_by_index(struct net *net, int ifindex)
746 {
747 	struct net_device *dev;
748 
749 	rcu_read_lock();
750 	dev = dev_get_by_index_rcu(net, ifindex);
751 	if (dev)
752 		dev_hold(dev);
753 	rcu_read_unlock();
754 	return dev;
755 }
756 EXPORT_SYMBOL(dev_get_by_index);
757 
758 /**
759  *	dev_getbyhwaddr_rcu - find a device by its hardware address
760  *	@net: the applicable net namespace
761  *	@type: media type of device
762  *	@ha: hardware address
763  *
764  *	Search for an interface by MAC address. Returns NULL if the device
765  *	is not found or a pointer to the device.
766  *	The caller must hold RCU or RTNL.
767  *	The returned device has not had its ref count increased
768  *	and the caller must therefore be careful about locking
769  *
770  */
771 
772 struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
773 				       const char *ha)
774 {
775 	struct net_device *dev;
776 
777 	for_each_netdev_rcu(net, dev)
778 		if (dev->type == type &&
779 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
780 			return dev;
781 
782 	return NULL;
783 }
784 EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
785 
786 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
787 {
788 	struct net_device *dev;
789 
790 	ASSERT_RTNL();
791 	for_each_netdev(net, dev)
792 		if (dev->type == type)
793 			return dev;
794 
795 	return NULL;
796 }
797 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
798 
799 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
800 {
801 	struct net_device *dev, *ret = NULL;
802 
803 	rcu_read_lock();
804 	for_each_netdev_rcu(net, dev)
805 		if (dev->type == type) {
806 			dev_hold(dev);
807 			ret = dev;
808 			break;
809 		}
810 	rcu_read_unlock();
811 	return ret;
812 }
813 EXPORT_SYMBOL(dev_getfirstbyhwtype);
814 
815 /**
816  *	dev_get_by_flags_rcu - find any device with given flags
817  *	@net: the applicable net namespace
818  *	@if_flags: IFF_* values
819  *	@mask: bitmask of bits in if_flags to check
820  *
821  *	Search for any interface with the given flags. Returns NULL if a device
822  *	is not found or a pointer to the device. Must be called inside
823  *	rcu_read_lock(), and result refcount is unchanged.
824  */
825 
826 struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,
827 				    unsigned short mask)
828 {
829 	struct net_device *dev, *ret;
830 
831 	ret = NULL;
832 	for_each_netdev_rcu(net, dev) {
833 		if (((dev->flags ^ if_flags) & mask) == 0) {
834 			ret = dev;
835 			break;
836 		}
837 	}
838 	return ret;
839 }
840 EXPORT_SYMBOL(dev_get_by_flags_rcu);
841 
842 /**
843  *	dev_valid_name - check if name is okay for network device
844  *	@name: name string
845  *
846  *	Network device names need to be valid file names to
847  *	to allow sysfs to work.  We also disallow any kind of
848  *	whitespace.
849  */
850 bool dev_valid_name(const char *name)
851 {
852 	if (*name == '\0')
853 		return false;
854 	if (strlen(name) >= IFNAMSIZ)
855 		return false;
856 	if (!strcmp(name, ".") || !strcmp(name, ".."))
857 		return false;
858 
859 	while (*name) {
860 		if (*name == '/' || isspace(*name))
861 			return false;
862 		name++;
863 	}
864 	return true;
865 }
866 EXPORT_SYMBOL(dev_valid_name);
867 
868 /**
869  *	__dev_alloc_name - allocate a name for a device
870  *	@net: network namespace to allocate the device name in
871  *	@name: name format string
872  *	@buf:  scratch buffer and result name string
873  *
874  *	Passed a format string - eg "lt%d" it will try and find a suitable
875  *	id. It scans list of devices to build up a free map, then chooses
876  *	the first empty slot. The caller must hold the dev_base or rtnl lock
877  *	while allocating the name and adding the device in order to avoid
878  *	duplicates.
879  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
880  *	Returns the number of the unit assigned or a negative errno code.
881  */
882 
883 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
884 {
885 	int i = 0;
886 	const char *p;
887 	const int max_netdevices = 8*PAGE_SIZE;
888 	unsigned long *inuse;
889 	struct net_device *d;
890 
891 	p = strnchr(name, IFNAMSIZ-1, '%');
892 	if (p) {
893 		/*
894 		 * Verify the string as this thing may have come from
895 		 * the user.  There must be either one "%d" and no other "%"
896 		 * characters.
897 		 */
898 		if (p[1] != 'd' || strchr(p + 2, '%'))
899 			return -EINVAL;
900 
901 		/* Use one page as a bit array of possible slots */
902 		inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
903 		if (!inuse)
904 			return -ENOMEM;
905 
906 		for_each_netdev(net, d) {
907 			if (!sscanf(d->name, name, &i))
908 				continue;
909 			if (i < 0 || i >= max_netdevices)
910 				continue;
911 
912 			/*  avoid cases where sscanf is not exact inverse of printf */
913 			snprintf(buf, IFNAMSIZ, name, i);
914 			if (!strncmp(buf, d->name, IFNAMSIZ))
915 				set_bit(i, inuse);
916 		}
917 
918 		i = find_first_zero_bit(inuse, max_netdevices);
919 		free_page((unsigned long) inuse);
920 	}
921 
922 	if (buf != name)
923 		snprintf(buf, IFNAMSIZ, name, i);
924 	if (!__dev_get_by_name(net, buf))
925 		return i;
926 
927 	/* It is possible to run out of possible slots
928 	 * when the name is long and there isn't enough space left
929 	 * for the digits, or if all bits are used.
930 	 */
931 	return -ENFILE;
932 }
933 
934 /**
935  *	dev_alloc_name - allocate a name for a device
936  *	@dev: device
937  *	@name: name format string
938  *
939  *	Passed a format string - eg "lt%d" it will try and find a suitable
940  *	id. It scans list of devices to build up a free map, then chooses
941  *	the first empty slot. The caller must hold the dev_base or rtnl lock
942  *	while allocating the name and adding the device in order to avoid
943  *	duplicates.
944  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
945  *	Returns the number of the unit assigned or a negative errno code.
946  */
947 
948 int dev_alloc_name(struct net_device *dev, const char *name)
949 {
950 	char buf[IFNAMSIZ];
951 	struct net *net;
952 	int ret;
953 
954 	BUG_ON(!dev_net(dev));
955 	net = dev_net(dev);
956 	ret = __dev_alloc_name(net, name, buf);
957 	if (ret >= 0)
958 		strlcpy(dev->name, buf, IFNAMSIZ);
959 	return ret;
960 }
961 EXPORT_SYMBOL(dev_alloc_name);
962 
963 static int dev_get_valid_name(struct net_device *dev, const char *name)
964 {
965 	struct net *net;
966 
967 	BUG_ON(!dev_net(dev));
968 	net = dev_net(dev);
969 
970 	if (!dev_valid_name(name))
971 		return -EINVAL;
972 
973 	if (strchr(name, '%'))
974 		return dev_alloc_name(dev, name);
975 	else if (__dev_get_by_name(net, name))
976 		return -EEXIST;
977 	else if (dev->name != name)
978 		strlcpy(dev->name, name, IFNAMSIZ);
979 
980 	return 0;
981 }
982 
983 /**
984  *	dev_change_name - change name of a device
985  *	@dev: device
986  *	@newname: name (or format string) must be at least IFNAMSIZ
987  *
988  *	Change name of a device, can pass format strings "eth%d".
989  *	for wildcarding.
990  */
991 int dev_change_name(struct net_device *dev, const char *newname)
992 {
993 	char oldname[IFNAMSIZ];
994 	int err = 0;
995 	int ret;
996 	struct net *net;
997 
998 	ASSERT_RTNL();
999 	BUG_ON(!dev_net(dev));
1000 
1001 	net = dev_net(dev);
1002 	if (dev->flags & IFF_UP)
1003 		return -EBUSY;
1004 
1005 	if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
1006 		return 0;
1007 
1008 	memcpy(oldname, dev->name, IFNAMSIZ);
1009 
1010 	err = dev_get_valid_name(dev, newname);
1011 	if (err < 0)
1012 		return err;
1013 
1014 rollback:
1015 	ret = device_rename(&dev->dev, dev->name);
1016 	if (ret) {
1017 		memcpy(dev->name, oldname, IFNAMSIZ);
1018 		return ret;
1019 	}
1020 
1021 	write_lock_bh(&dev_base_lock);
1022 	hlist_del_rcu(&dev->name_hlist);
1023 	write_unlock_bh(&dev_base_lock);
1024 
1025 	synchronize_rcu();
1026 
1027 	write_lock_bh(&dev_base_lock);
1028 	hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
1029 	write_unlock_bh(&dev_base_lock);
1030 
1031 	ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
1032 	ret = notifier_to_errno(ret);
1033 
1034 	if (ret) {
1035 		/* err >= 0 after dev_alloc_name() or stores the first errno */
1036 		if (err >= 0) {
1037 			err = ret;
1038 			memcpy(dev->name, oldname, IFNAMSIZ);
1039 			goto rollback;
1040 		} else {
1041 			pr_err("%s: name change rollback failed: %d\n",
1042 			       dev->name, ret);
1043 		}
1044 	}
1045 
1046 	return err;
1047 }
1048 
1049 /**
1050  *	dev_set_alias - change ifalias of a device
1051  *	@dev: device
1052  *	@alias: name up to IFALIASZ
1053  *	@len: limit of bytes to copy from info
1054  *
1055  *	Set ifalias for a device,
1056  */
1057 int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1058 {
1059 	ASSERT_RTNL();
1060 
1061 	if (len >= IFALIASZ)
1062 		return -EINVAL;
1063 
1064 	if (!len) {
1065 		if (dev->ifalias) {
1066 			kfree(dev->ifalias);
1067 			dev->ifalias = NULL;
1068 		}
1069 		return 0;
1070 	}
1071 
1072 	dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
1073 	if (!dev->ifalias)
1074 		return -ENOMEM;
1075 
1076 	strlcpy(dev->ifalias, alias, len+1);
1077 	return len;
1078 }
1079 
1080 
1081 /**
1082  *	netdev_features_change - device changes features
1083  *	@dev: device to cause notification
1084  *
1085  *	Called to indicate a device has changed features.
1086  */
1087 void netdev_features_change(struct net_device *dev)
1088 {
1089 	call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
1090 }
1091 EXPORT_SYMBOL(netdev_features_change);
1092 
1093 /**
1094  *	netdev_state_change - device changes state
1095  *	@dev: device to cause notification
1096  *
1097  *	Called to indicate a device has changed state. This function calls
1098  *	the notifier chains for netdev_chain and sends a NEWLINK message
1099  *	to the routing socket.
1100  */
1101 void netdev_state_change(struct net_device *dev)
1102 {
1103 	if (dev->flags & IFF_UP) {
1104 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
1105 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1106 	}
1107 }
1108 EXPORT_SYMBOL(netdev_state_change);
1109 
1110 int netdev_bonding_change(struct net_device *dev, unsigned long event)
1111 {
1112 	return call_netdevice_notifiers(event, dev);
1113 }
1114 EXPORT_SYMBOL(netdev_bonding_change);
1115 
1116 /**
1117  *	dev_load 	- load a network module
1118  *	@net: the applicable net namespace
1119  *	@name: name of interface
1120  *
1121  *	If a network interface is not present and the process has suitable
1122  *	privileges this function loads the module. If module loading is not
1123  *	available in this kernel then it becomes a nop.
1124  */
1125 
1126 void dev_load(struct net *net, const char *name)
1127 {
1128 	struct net_device *dev;
1129 	int no_module;
1130 
1131 	rcu_read_lock();
1132 	dev = dev_get_by_name_rcu(net, name);
1133 	rcu_read_unlock();
1134 
1135 	no_module = !dev;
1136 	if (no_module && capable(CAP_NET_ADMIN))
1137 		no_module = request_module("netdev-%s", name);
1138 	if (no_module && capable(CAP_SYS_MODULE)) {
1139 		if (!request_module("%s", name))
1140 			pr_err("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated).  Use CAP_NET_ADMIN and alias netdev-%s instead.\n",
1141 			       name);
1142 	}
1143 }
1144 EXPORT_SYMBOL(dev_load);
1145 
1146 static int __dev_open(struct net_device *dev)
1147 {
1148 	const struct net_device_ops *ops = dev->netdev_ops;
1149 	int ret;
1150 
1151 	ASSERT_RTNL();
1152 
1153 	if (!netif_device_present(dev))
1154 		return -ENODEV;
1155 
1156 	ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1157 	ret = notifier_to_errno(ret);
1158 	if (ret)
1159 		return ret;
1160 
1161 	set_bit(__LINK_STATE_START, &dev->state);
1162 
1163 	if (ops->ndo_validate_addr)
1164 		ret = ops->ndo_validate_addr(dev);
1165 
1166 	if (!ret && ops->ndo_open)
1167 		ret = ops->ndo_open(dev);
1168 
1169 	if (ret)
1170 		clear_bit(__LINK_STATE_START, &dev->state);
1171 	else {
1172 		dev->flags |= IFF_UP;
1173 		net_dmaengine_get();
1174 		dev_set_rx_mode(dev);
1175 		dev_activate(dev);
1176 	}
1177 
1178 	return ret;
1179 }
1180 
1181 /**
1182  *	dev_open	- prepare an interface for use.
1183  *	@dev:	device to open
1184  *
1185  *	Takes a device from down to up state. The device's private open
1186  *	function is invoked and then the multicast lists are loaded. Finally
1187  *	the device is moved into the up state and a %NETDEV_UP message is
1188  *	sent to the netdev notifier chain.
1189  *
1190  *	Calling this function on an active interface is a nop. On a failure
1191  *	a negative errno code is returned.
1192  */
1193 int dev_open(struct net_device *dev)
1194 {
1195 	int ret;
1196 
1197 	if (dev->flags & IFF_UP)
1198 		return 0;
1199 
1200 	ret = __dev_open(dev);
1201 	if (ret < 0)
1202 		return ret;
1203 
1204 	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1205 	call_netdevice_notifiers(NETDEV_UP, dev);
1206 
1207 	return ret;
1208 }
1209 EXPORT_SYMBOL(dev_open);
1210 
1211 static int __dev_close_many(struct list_head *head)
1212 {
1213 	struct net_device *dev;
1214 
1215 	ASSERT_RTNL();
1216 	might_sleep();
1217 
1218 	list_for_each_entry(dev, head, unreg_list) {
1219 		call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1220 
1221 		clear_bit(__LINK_STATE_START, &dev->state);
1222 
1223 		/* Synchronize to scheduled poll. We cannot touch poll list, it
1224 		 * can be even on different cpu. So just clear netif_running().
1225 		 *
1226 		 * dev->stop() will invoke napi_disable() on all of it's
1227 		 * napi_struct instances on this device.
1228 		 */
1229 		smp_mb__after_clear_bit(); /* Commit netif_running(). */
1230 	}
1231 
1232 	dev_deactivate_many(head);
1233 
1234 	list_for_each_entry(dev, head, unreg_list) {
1235 		const struct net_device_ops *ops = dev->netdev_ops;
1236 
1237 		/*
1238 		 *	Call the device specific close. This cannot fail.
1239 		 *	Only if device is UP
1240 		 *
1241 		 *	We allow it to be called even after a DETACH hot-plug
1242 		 *	event.
1243 		 */
1244 		if (ops->ndo_stop)
1245 			ops->ndo_stop(dev);
1246 
1247 		dev->flags &= ~IFF_UP;
1248 		net_dmaengine_put();
1249 	}
1250 
1251 	return 0;
1252 }
1253 
1254 static int __dev_close(struct net_device *dev)
1255 {
1256 	int retval;
1257 	LIST_HEAD(single);
1258 
1259 	list_add(&dev->unreg_list, &single);
1260 	retval = __dev_close_many(&single);
1261 	list_del(&single);
1262 	return retval;
1263 }
1264 
1265 static int dev_close_many(struct list_head *head)
1266 {
1267 	struct net_device *dev, *tmp;
1268 	LIST_HEAD(tmp_list);
1269 
1270 	list_for_each_entry_safe(dev, tmp, head, unreg_list)
1271 		if (!(dev->flags & IFF_UP))
1272 			list_move(&dev->unreg_list, &tmp_list);
1273 
1274 	__dev_close_many(head);
1275 
1276 	list_for_each_entry(dev, head, unreg_list) {
1277 		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1278 		call_netdevice_notifiers(NETDEV_DOWN, dev);
1279 	}
1280 
1281 	/* rollback_registered_many needs the complete original list */
1282 	list_splice(&tmp_list, head);
1283 	return 0;
1284 }
1285 
1286 /**
1287  *	dev_close - shutdown an interface.
1288  *	@dev: device to shutdown
1289  *
1290  *	This function moves an active device into down state. A
1291  *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1292  *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1293  *	chain.
1294  */
1295 int dev_close(struct net_device *dev)
1296 {
1297 	if (dev->flags & IFF_UP) {
1298 		LIST_HEAD(single);
1299 
1300 		list_add(&dev->unreg_list, &single);
1301 		dev_close_many(&single);
1302 		list_del(&single);
1303 	}
1304 	return 0;
1305 }
1306 EXPORT_SYMBOL(dev_close);
1307 
1308 
1309 /**
1310  *	dev_disable_lro - disable Large Receive Offload on a device
1311  *	@dev: device
1312  *
1313  *	Disable Large Receive Offload (LRO) on a net device.  Must be
1314  *	called under RTNL.  This is needed if received packets may be
1315  *	forwarded to another interface.
1316  */
1317 void dev_disable_lro(struct net_device *dev)
1318 {
1319 	/*
1320 	 * If we're trying to disable lro on a vlan device
1321 	 * use the underlying physical device instead
1322 	 */
1323 	if (is_vlan_dev(dev))
1324 		dev = vlan_dev_real_dev(dev);
1325 
1326 	dev->wanted_features &= ~NETIF_F_LRO;
1327 	netdev_update_features(dev);
1328 
1329 	if (unlikely(dev->features & NETIF_F_LRO))
1330 		netdev_WARN(dev, "failed to disable LRO!\n");
1331 }
1332 EXPORT_SYMBOL(dev_disable_lro);
1333 
1334 
1335 static int dev_boot_phase = 1;
1336 
1337 /**
1338  *	register_netdevice_notifier - register a network notifier block
1339  *	@nb: notifier
1340  *
1341  *	Register a notifier to be called when network device events occur.
1342  *	The notifier passed is linked into the kernel structures and must
1343  *	not be reused until it has been unregistered. A negative errno code
1344  *	is returned on a failure.
1345  *
1346  * 	When registered all registration and up events are replayed
1347  *	to the new notifier to allow device to have a race free
1348  *	view of the network device list.
1349  */
1350 
1351 int register_netdevice_notifier(struct notifier_block *nb)
1352 {
1353 	struct net_device *dev;
1354 	struct net_device *last;
1355 	struct net *net;
1356 	int err;
1357 
1358 	rtnl_lock();
1359 	err = raw_notifier_chain_register(&netdev_chain, nb);
1360 	if (err)
1361 		goto unlock;
1362 	if (dev_boot_phase)
1363 		goto unlock;
1364 	for_each_net(net) {
1365 		for_each_netdev(net, dev) {
1366 			err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1367 			err = notifier_to_errno(err);
1368 			if (err)
1369 				goto rollback;
1370 
1371 			if (!(dev->flags & IFF_UP))
1372 				continue;
1373 
1374 			nb->notifier_call(nb, NETDEV_UP, dev);
1375 		}
1376 	}
1377 
1378 unlock:
1379 	rtnl_unlock();
1380 	return err;
1381 
1382 rollback:
1383 	last = dev;
1384 	for_each_net(net) {
1385 		for_each_netdev(net, dev) {
1386 			if (dev == last)
1387 				goto outroll;
1388 
1389 			if (dev->flags & IFF_UP) {
1390 				nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1391 				nb->notifier_call(nb, NETDEV_DOWN, dev);
1392 			}
1393 			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1394 			nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
1395 		}
1396 	}
1397 
1398 outroll:
1399 	raw_notifier_chain_unregister(&netdev_chain, nb);
1400 	goto unlock;
1401 }
1402 EXPORT_SYMBOL(register_netdevice_notifier);
1403 
1404 /**
1405  *	unregister_netdevice_notifier - unregister a network notifier block
1406  *	@nb: notifier
1407  *
1408  *	Unregister a notifier previously registered by
1409  *	register_netdevice_notifier(). The notifier is unlinked into the
1410  *	kernel structures and may then be reused. A negative errno code
1411  *	is returned on a failure.
1412  *
1413  * 	After unregistering unregister and down device events are synthesized
1414  *	for all devices on the device list to the removed notifier to remove
1415  *	the need for special case cleanup code.
1416  */
1417 
1418 int unregister_netdevice_notifier(struct notifier_block *nb)
1419 {
1420 	struct net_device *dev;
1421 	struct net *net;
1422 	int err;
1423 
1424 	rtnl_lock();
1425 	err = raw_notifier_chain_unregister(&netdev_chain, nb);
1426 	if (err)
1427 		goto unlock;
1428 
1429 	for_each_net(net) {
1430 		for_each_netdev(net, dev) {
1431 			if (dev->flags & IFF_UP) {
1432 				nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1433 				nb->notifier_call(nb, NETDEV_DOWN, dev);
1434 			}
1435 			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1436 			nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
1437 		}
1438 	}
1439 unlock:
1440 	rtnl_unlock();
1441 	return err;
1442 }
1443 EXPORT_SYMBOL(unregister_netdevice_notifier);
1444 
1445 /**
1446  *	call_netdevice_notifiers - call all network notifier blocks
1447  *      @val: value passed unmodified to notifier function
1448  *      @dev: net_device pointer passed unmodified to notifier function
1449  *
1450  *	Call all network notifier blocks.  Parameters and return value
1451  *	are as for raw_notifier_call_chain().
1452  */
1453 
1454 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1455 {
1456 	ASSERT_RTNL();
1457 	return raw_notifier_call_chain(&netdev_chain, val, dev);
1458 }
1459 EXPORT_SYMBOL(call_netdevice_notifiers);
1460 
1461 static struct static_key netstamp_needed __read_mostly;
1462 #ifdef HAVE_JUMP_LABEL
1463 /* We are not allowed to call static_key_slow_dec() from irq context
1464  * If net_disable_timestamp() is called from irq context, defer the
1465  * static_key_slow_dec() calls.
1466  */
1467 static atomic_t netstamp_needed_deferred;
1468 #endif
1469 
1470 void net_enable_timestamp(void)
1471 {
1472 #ifdef HAVE_JUMP_LABEL
1473 	int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
1474 
1475 	if (deferred) {
1476 		while (--deferred)
1477 			static_key_slow_dec(&netstamp_needed);
1478 		return;
1479 	}
1480 #endif
1481 	WARN_ON(in_interrupt());
1482 	static_key_slow_inc(&netstamp_needed);
1483 }
1484 EXPORT_SYMBOL(net_enable_timestamp);
1485 
1486 void net_disable_timestamp(void)
1487 {
1488 #ifdef HAVE_JUMP_LABEL
1489 	if (in_interrupt()) {
1490 		atomic_inc(&netstamp_needed_deferred);
1491 		return;
1492 	}
1493 #endif
1494 	static_key_slow_dec(&netstamp_needed);
1495 }
1496 EXPORT_SYMBOL(net_disable_timestamp);
1497 
1498 static inline void net_timestamp_set(struct sk_buff *skb)
1499 {
1500 	skb->tstamp.tv64 = 0;
1501 	if (static_key_false(&netstamp_needed))
1502 		__net_timestamp(skb);
1503 }
1504 
1505 #define net_timestamp_check(COND, SKB)			\
1506 	if (static_key_false(&netstamp_needed)) {		\
1507 		if ((COND) && !(SKB)->tstamp.tv64)	\
1508 			__net_timestamp(SKB);		\
1509 	}						\
1510 
1511 static int net_hwtstamp_validate(struct ifreq *ifr)
1512 {
1513 	struct hwtstamp_config cfg;
1514 	enum hwtstamp_tx_types tx_type;
1515 	enum hwtstamp_rx_filters rx_filter;
1516 	int tx_type_valid = 0;
1517 	int rx_filter_valid = 0;
1518 
1519 	if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
1520 		return -EFAULT;
1521 
1522 	if (cfg.flags) /* reserved for future extensions */
1523 		return -EINVAL;
1524 
1525 	tx_type = cfg.tx_type;
1526 	rx_filter = cfg.rx_filter;
1527 
1528 	switch (tx_type) {
1529 	case HWTSTAMP_TX_OFF:
1530 	case HWTSTAMP_TX_ON:
1531 	case HWTSTAMP_TX_ONESTEP_SYNC:
1532 		tx_type_valid = 1;
1533 		break;
1534 	}
1535 
1536 	switch (rx_filter) {
1537 	case HWTSTAMP_FILTER_NONE:
1538 	case HWTSTAMP_FILTER_ALL:
1539 	case HWTSTAMP_FILTER_SOME:
1540 	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
1541 	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
1542 	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
1543 	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
1544 	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
1545 	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
1546 	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
1547 	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
1548 	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
1549 	case HWTSTAMP_FILTER_PTP_V2_EVENT:
1550 	case HWTSTAMP_FILTER_PTP_V2_SYNC:
1551 	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
1552 		rx_filter_valid = 1;
1553 		break;
1554 	}
1555 
1556 	if (!tx_type_valid || !rx_filter_valid)
1557 		return -ERANGE;
1558 
1559 	return 0;
1560 }
1561 
1562 static inline bool is_skb_forwardable(struct net_device *dev,
1563 				      struct sk_buff *skb)
1564 {
1565 	unsigned int len;
1566 
1567 	if (!(dev->flags & IFF_UP))
1568 		return false;
1569 
1570 	len = dev->mtu + dev->hard_header_len + VLAN_HLEN;
1571 	if (skb->len <= len)
1572 		return true;
1573 
1574 	/* if TSO is enabled, we don't care about the length as the packet
1575 	 * could be forwarded without being segmented before
1576 	 */
1577 	if (skb_is_gso(skb))
1578 		return true;
1579 
1580 	return false;
1581 }
1582 
1583 /**
1584  * dev_forward_skb - loopback an skb to another netif
1585  *
1586  * @dev: destination network device
1587  * @skb: buffer to forward
1588  *
1589  * return values:
1590  *	NET_RX_SUCCESS	(no congestion)
1591  *	NET_RX_DROP     (packet was dropped, but freed)
1592  *
1593  * dev_forward_skb can be used for injecting an skb from the
1594  * start_xmit function of one device into the receive queue
1595  * of another device.
1596  *
1597  * The receiving device may be in another namespace, so
1598  * we have to clear all information in the skb that could
1599  * impact namespace isolation.
1600  */
1601 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1602 {
1603 	if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
1604 		if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
1605 			atomic_long_inc(&dev->rx_dropped);
1606 			kfree_skb(skb);
1607 			return NET_RX_DROP;
1608 		}
1609 	}
1610 
1611 	skb_orphan(skb);
1612 	nf_reset(skb);
1613 
1614 	if (unlikely(!is_skb_forwardable(dev, skb))) {
1615 		atomic_long_inc(&dev->rx_dropped);
1616 		kfree_skb(skb);
1617 		return NET_RX_DROP;
1618 	}
1619 	skb->skb_iif = 0;
1620 	skb_set_dev(skb, dev);
1621 	skb->tstamp.tv64 = 0;
1622 	skb->pkt_type = PACKET_HOST;
1623 	skb->protocol = eth_type_trans(skb, dev);
1624 	return netif_rx(skb);
1625 }
1626 EXPORT_SYMBOL_GPL(dev_forward_skb);
1627 
1628 static inline int deliver_skb(struct sk_buff *skb,
1629 			      struct packet_type *pt_prev,
1630 			      struct net_device *orig_dev)
1631 {
1632 	atomic_inc(&skb->users);
1633 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1634 }
1635 
1636 /*
1637  *	Support routine. Sends outgoing frames to any network
1638  *	taps currently in use.
1639  */
1640 
1641 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1642 {
1643 	struct packet_type *ptype;
1644 	struct sk_buff *skb2 = NULL;
1645 	struct packet_type *pt_prev = NULL;
1646 
1647 	rcu_read_lock();
1648 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1649 		/* Never send packets back to the socket
1650 		 * they originated from - MvS (miquels@drinkel.ow.org)
1651 		 */
1652 		if ((ptype->dev == dev || !ptype->dev) &&
1653 		    (ptype->af_packet_priv == NULL ||
1654 		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
1655 			if (pt_prev) {
1656 				deliver_skb(skb2, pt_prev, skb->dev);
1657 				pt_prev = ptype;
1658 				continue;
1659 			}
1660 
1661 			skb2 = skb_clone(skb, GFP_ATOMIC);
1662 			if (!skb2)
1663 				break;
1664 
1665 			net_timestamp_set(skb2);
1666 
1667 			/* skb->nh should be correctly
1668 			   set by sender, so that the second statement is
1669 			   just protection against buggy protocols.
1670 			 */
1671 			skb_reset_mac_header(skb2);
1672 
1673 			if (skb_network_header(skb2) < skb2->data ||
1674 			    skb2->network_header > skb2->tail) {
1675 				if (net_ratelimit())
1676 					pr_crit("protocol %04x is buggy, dev %s\n",
1677 						ntohs(skb2->protocol),
1678 						dev->name);
1679 				skb_reset_network_header(skb2);
1680 			}
1681 
1682 			skb2->transport_header = skb2->network_header;
1683 			skb2->pkt_type = PACKET_OUTGOING;
1684 			pt_prev = ptype;
1685 		}
1686 	}
1687 	if (pt_prev)
1688 		pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
1689 	rcu_read_unlock();
1690 }
1691 
1692 /* netif_setup_tc - Handle tc mappings on real_num_tx_queues change
1693  * @dev: Network device
1694  * @txq: number of queues available
1695  *
1696  * If real_num_tx_queues is changed the tc mappings may no longer be
1697  * valid. To resolve this verify the tc mapping remains valid and if
1698  * not NULL the mapping. With no priorities mapping to this
1699  * offset/count pair it will no longer be used. In the worst case TC0
1700  * is invalid nothing can be done so disable priority mappings. If is
1701  * expected that drivers will fix this mapping if they can before
1702  * calling netif_set_real_num_tx_queues.
1703  */
1704 static void netif_setup_tc(struct net_device *dev, unsigned int txq)
1705 {
1706 	int i;
1707 	struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
1708 
1709 	/* If TC0 is invalidated disable TC mapping */
1710 	if (tc->offset + tc->count > txq) {
1711 		pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");
1712 		dev->num_tc = 0;
1713 		return;
1714 	}
1715 
1716 	/* Invalidated prio to tc mappings set to TC0 */
1717 	for (i = 1; i < TC_BITMASK + 1; i++) {
1718 		int q = netdev_get_prio_tc_map(dev, i);
1719 
1720 		tc = &dev->tc_to_txq[q];
1721 		if (tc->offset + tc->count > txq) {
1722 			pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",
1723 				i, q);
1724 			netdev_set_prio_tc_map(dev, i, 0);
1725 		}
1726 	}
1727 }
1728 
1729 /*
1730  * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
1731  * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
1732  */
1733 int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
1734 {
1735 	int rc;
1736 
1737 	if (txq < 1 || txq > dev->num_tx_queues)
1738 		return -EINVAL;
1739 
1740 	if (dev->reg_state == NETREG_REGISTERED ||
1741 	    dev->reg_state == NETREG_UNREGISTERING) {
1742 		ASSERT_RTNL();
1743 
1744 		rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
1745 						  txq);
1746 		if (rc)
1747 			return rc;
1748 
1749 		if (dev->num_tc)
1750 			netif_setup_tc(dev, txq);
1751 
1752 		if (txq < dev->real_num_tx_queues)
1753 			qdisc_reset_all_tx_gt(dev, txq);
1754 	}
1755 
1756 	dev->real_num_tx_queues = txq;
1757 	return 0;
1758 }
1759 EXPORT_SYMBOL(netif_set_real_num_tx_queues);
1760 
1761 #ifdef CONFIG_RPS
1762 /**
1763  *	netif_set_real_num_rx_queues - set actual number of RX queues used
1764  *	@dev: Network device
1765  *	@rxq: Actual number of RX queues
1766  *
1767  *	This must be called either with the rtnl_lock held or before
1768  *	registration of the net device.  Returns 0 on success, or a
1769  *	negative error code.  If called before registration, it always
1770  *	succeeds.
1771  */
1772 int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
1773 {
1774 	int rc;
1775 
1776 	if (rxq < 1 || rxq > dev->num_rx_queues)
1777 		return -EINVAL;
1778 
1779 	if (dev->reg_state == NETREG_REGISTERED) {
1780 		ASSERT_RTNL();
1781 
1782 		rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
1783 						  rxq);
1784 		if (rc)
1785 			return rc;
1786 	}
1787 
1788 	dev->real_num_rx_queues = rxq;
1789 	return 0;
1790 }
1791 EXPORT_SYMBOL(netif_set_real_num_rx_queues);
1792 #endif
1793 
1794 static inline void __netif_reschedule(struct Qdisc *q)
1795 {
1796 	struct softnet_data *sd;
1797 	unsigned long flags;
1798 
1799 	local_irq_save(flags);
1800 	sd = &__get_cpu_var(softnet_data);
1801 	q->next_sched = NULL;
1802 	*sd->output_queue_tailp = q;
1803 	sd->output_queue_tailp = &q->next_sched;
1804 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
1805 	local_irq_restore(flags);
1806 }
1807 
1808 void __netif_schedule(struct Qdisc *q)
1809 {
1810 	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1811 		__netif_reschedule(q);
1812 }
1813 EXPORT_SYMBOL(__netif_schedule);
1814 
1815 void dev_kfree_skb_irq(struct sk_buff *skb)
1816 {
1817 	if (atomic_dec_and_test(&skb->users)) {
1818 		struct softnet_data *sd;
1819 		unsigned long flags;
1820 
1821 		local_irq_save(flags);
1822 		sd = &__get_cpu_var(softnet_data);
1823 		skb->next = sd->completion_queue;
1824 		sd->completion_queue = skb;
1825 		raise_softirq_irqoff(NET_TX_SOFTIRQ);
1826 		local_irq_restore(flags);
1827 	}
1828 }
1829 EXPORT_SYMBOL(dev_kfree_skb_irq);
1830 
1831 void dev_kfree_skb_any(struct sk_buff *skb)
1832 {
1833 	if (in_irq() || irqs_disabled())
1834 		dev_kfree_skb_irq(skb);
1835 	else
1836 		dev_kfree_skb(skb);
1837 }
1838 EXPORT_SYMBOL(dev_kfree_skb_any);
1839 
1840 
1841 /**
1842  * netif_device_detach - mark device as removed
1843  * @dev: network device
1844  *
1845  * Mark device as removed from system and therefore no longer available.
1846  */
1847 void netif_device_detach(struct net_device *dev)
1848 {
1849 	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1850 	    netif_running(dev)) {
1851 		netif_tx_stop_all_queues(dev);
1852 	}
1853 }
1854 EXPORT_SYMBOL(netif_device_detach);
1855 
1856 /**
1857  * netif_device_attach - mark device as attached
1858  * @dev: network device
1859  *
1860  * Mark device as attached from system and restart if needed.
1861  */
1862 void netif_device_attach(struct net_device *dev)
1863 {
1864 	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1865 	    netif_running(dev)) {
1866 		netif_tx_wake_all_queues(dev);
1867 		__netdev_watchdog_up(dev);
1868 	}
1869 }
1870 EXPORT_SYMBOL(netif_device_attach);
1871 
1872 /**
1873  * skb_dev_set -- assign a new device to a buffer
1874  * @skb: buffer for the new device
1875  * @dev: network device
1876  *
1877  * If an skb is owned by a device already, we have to reset
1878  * all data private to the namespace a device belongs to
1879  * before assigning it a new device.
1880  */
1881 #ifdef CONFIG_NET_NS
1882 void skb_set_dev(struct sk_buff *skb, struct net_device *dev)
1883 {
1884 	skb_dst_drop(skb);
1885 	if (skb->dev && !net_eq(dev_net(skb->dev), dev_net(dev))) {
1886 		secpath_reset(skb);
1887 		nf_reset(skb);
1888 		skb_init_secmark(skb);
1889 		skb->mark = 0;
1890 		skb->priority = 0;
1891 		skb->nf_trace = 0;
1892 		skb->ipvs_property = 0;
1893 #ifdef CONFIG_NET_SCHED
1894 		skb->tc_index = 0;
1895 #endif
1896 	}
1897 	skb->dev = dev;
1898 }
1899 EXPORT_SYMBOL(skb_set_dev);
1900 #endif /* CONFIG_NET_NS */
1901 
1902 static void skb_warn_bad_offload(const struct sk_buff *skb)
1903 {
1904 	static const netdev_features_t null_features = 0;
1905 	struct net_device *dev = skb->dev;
1906 	const char *driver = "";
1907 
1908 	if (dev && dev->dev.parent)
1909 		driver = dev_driver_string(dev->dev.parent);
1910 
1911 	WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "
1912 	     "gso_type=%d ip_summed=%d\n",
1913 	     driver, dev ? &dev->features : &null_features,
1914 	     skb->sk ? &skb->sk->sk_route_caps : &null_features,
1915 	     skb->len, skb->data_len, skb_shinfo(skb)->gso_size,
1916 	     skb_shinfo(skb)->gso_type, skb->ip_summed);
1917 }
1918 
1919 /*
1920  * Invalidate hardware checksum when packet is to be mangled, and
1921  * complete checksum manually on outgoing path.
1922  */
1923 int skb_checksum_help(struct sk_buff *skb)
1924 {
1925 	__wsum csum;
1926 	int ret = 0, offset;
1927 
1928 	if (skb->ip_summed == CHECKSUM_COMPLETE)
1929 		goto out_set_summed;
1930 
1931 	if (unlikely(skb_shinfo(skb)->gso_size)) {
1932 		skb_warn_bad_offload(skb);
1933 		return -EINVAL;
1934 	}
1935 
1936 	offset = skb_checksum_start_offset(skb);
1937 	BUG_ON(offset >= skb_headlen(skb));
1938 	csum = skb_checksum(skb, offset, skb->len - offset, 0);
1939 
1940 	offset += skb->csum_offset;
1941 	BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1942 
1943 	if (skb_cloned(skb) &&
1944 	    !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1945 		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1946 		if (ret)
1947 			goto out;
1948 	}
1949 
1950 	*(__sum16 *)(skb->data + offset) = csum_fold(csum);
1951 out_set_summed:
1952 	skb->ip_summed = CHECKSUM_NONE;
1953 out:
1954 	return ret;
1955 }
1956 EXPORT_SYMBOL(skb_checksum_help);
1957 
1958 /**
1959  *	skb_gso_segment - Perform segmentation on skb.
1960  *	@skb: buffer to segment
1961  *	@features: features for the output path (see dev->features)
1962  *
1963  *	This function segments the given skb and returns a list of segments.
1964  *
1965  *	It may return NULL if the skb requires no segmentation.  This is
1966  *	only possible when GSO is used for verifying header integrity.
1967  */
1968 struct sk_buff *skb_gso_segment(struct sk_buff *skb,
1969 	netdev_features_t features)
1970 {
1971 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1972 	struct packet_type *ptype;
1973 	__be16 type = skb->protocol;
1974 	int vlan_depth = ETH_HLEN;
1975 	int err;
1976 
1977 	while (type == htons(ETH_P_8021Q)) {
1978 		struct vlan_hdr *vh;
1979 
1980 		if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
1981 			return ERR_PTR(-EINVAL);
1982 
1983 		vh = (struct vlan_hdr *)(skb->data + vlan_depth);
1984 		type = vh->h_vlan_encapsulated_proto;
1985 		vlan_depth += VLAN_HLEN;
1986 	}
1987 
1988 	skb_reset_mac_header(skb);
1989 	skb->mac_len = skb->network_header - skb->mac_header;
1990 	__skb_pull(skb, skb->mac_len);
1991 
1992 	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1993 		skb_warn_bad_offload(skb);
1994 
1995 		if (skb_header_cloned(skb) &&
1996 		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1997 			return ERR_PTR(err);
1998 	}
1999 
2000 	rcu_read_lock();
2001 	list_for_each_entry_rcu(ptype,
2002 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2003 		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
2004 			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
2005 				err = ptype->gso_send_check(skb);
2006 				segs = ERR_PTR(err);
2007 				if (err || skb_gso_ok(skb, features))
2008 					break;
2009 				__skb_push(skb, (skb->data -
2010 						 skb_network_header(skb)));
2011 			}
2012 			segs = ptype->gso_segment(skb, features);
2013 			break;
2014 		}
2015 	}
2016 	rcu_read_unlock();
2017 
2018 	__skb_push(skb, skb->data - skb_mac_header(skb));
2019 
2020 	return segs;
2021 }
2022 EXPORT_SYMBOL(skb_gso_segment);
2023 
2024 /* Take action when hardware reception checksum errors are detected. */
2025 #ifdef CONFIG_BUG
2026 void netdev_rx_csum_fault(struct net_device *dev)
2027 {
2028 	if (net_ratelimit()) {
2029 		pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
2030 		dump_stack();
2031 	}
2032 }
2033 EXPORT_SYMBOL(netdev_rx_csum_fault);
2034 #endif
2035 
2036 /* Actually, we should eliminate this check as soon as we know, that:
2037  * 1. IOMMU is present and allows to map all the memory.
2038  * 2. No high memory really exists on this machine.
2039  */
2040 
2041 static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
2042 {
2043 #ifdef CONFIG_HIGHMEM
2044 	int i;
2045 	if (!(dev->features & NETIF_F_HIGHDMA)) {
2046 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2047 			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2048 			if (PageHighMem(skb_frag_page(frag)))
2049 				return 1;
2050 		}
2051 	}
2052 
2053 	if (PCI_DMA_BUS_IS_PHYS) {
2054 		struct device *pdev = dev->dev.parent;
2055 
2056 		if (!pdev)
2057 			return 0;
2058 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2059 			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2060 			dma_addr_t addr = page_to_phys(skb_frag_page(frag));
2061 			if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
2062 				return 1;
2063 		}
2064 	}
2065 #endif
2066 	return 0;
2067 }
2068 
2069 struct dev_gso_cb {
2070 	void (*destructor)(struct sk_buff *skb);
2071 };
2072 
2073 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
2074 
2075 static void dev_gso_skb_destructor(struct sk_buff *skb)
2076 {
2077 	struct dev_gso_cb *cb;
2078 
2079 	do {
2080 		struct sk_buff *nskb = skb->next;
2081 
2082 		skb->next = nskb->next;
2083 		nskb->next = NULL;
2084 		kfree_skb(nskb);
2085 	} while (skb->next);
2086 
2087 	cb = DEV_GSO_CB(skb);
2088 	if (cb->destructor)
2089 		cb->destructor(skb);
2090 }
2091 
2092 /**
2093  *	dev_gso_segment - Perform emulated hardware segmentation on skb.
2094  *	@skb: buffer to segment
2095  *	@features: device features as applicable to this skb
2096  *
2097  *	This function segments the given skb and stores the list of segments
2098  *	in skb->next.
2099  */
2100 static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
2101 {
2102 	struct sk_buff *segs;
2103 
2104 	segs = skb_gso_segment(skb, features);
2105 
2106 	/* Verifying header integrity only. */
2107 	if (!segs)
2108 		return 0;
2109 
2110 	if (IS_ERR(segs))
2111 		return PTR_ERR(segs);
2112 
2113 	skb->next = segs;
2114 	DEV_GSO_CB(skb)->destructor = skb->destructor;
2115 	skb->destructor = dev_gso_skb_destructor;
2116 
2117 	return 0;
2118 }
2119 
2120 /*
2121  * Try to orphan skb early, right before transmission by the device.
2122  * We cannot orphan skb if tx timestamp is requested or the sk-reference
2123  * is needed on driver level for other reasons, e.g. see net/can/raw.c
2124  */
2125 static inline void skb_orphan_try(struct sk_buff *skb)
2126 {
2127 	struct sock *sk = skb->sk;
2128 
2129 	if (sk && !skb_shinfo(skb)->tx_flags) {
2130 		/* skb_tx_hash() wont be able to get sk.
2131 		 * We copy sk_hash into skb->rxhash
2132 		 */
2133 		if (!skb->rxhash)
2134 			skb->rxhash = sk->sk_hash;
2135 		skb_orphan(skb);
2136 	}
2137 }
2138 
2139 static bool can_checksum_protocol(netdev_features_t features, __be16 protocol)
2140 {
2141 	return ((features & NETIF_F_GEN_CSUM) ||
2142 		((features & NETIF_F_V4_CSUM) &&
2143 		 protocol == htons(ETH_P_IP)) ||
2144 		((features & NETIF_F_V6_CSUM) &&
2145 		 protocol == htons(ETH_P_IPV6)) ||
2146 		((features & NETIF_F_FCOE_CRC) &&
2147 		 protocol == htons(ETH_P_FCOE)));
2148 }
2149 
2150 static netdev_features_t harmonize_features(struct sk_buff *skb,
2151 	__be16 protocol, netdev_features_t features)
2152 {
2153 	if (!can_checksum_protocol(features, protocol)) {
2154 		features &= ~NETIF_F_ALL_CSUM;
2155 		features &= ~NETIF_F_SG;
2156 	} else if (illegal_highdma(skb->dev, skb)) {
2157 		features &= ~NETIF_F_SG;
2158 	}
2159 
2160 	return features;
2161 }
2162 
2163 netdev_features_t netif_skb_features(struct sk_buff *skb)
2164 {
2165 	__be16 protocol = skb->protocol;
2166 	netdev_features_t features = skb->dev->features;
2167 
2168 	if (protocol == htons(ETH_P_8021Q)) {
2169 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
2170 		protocol = veh->h_vlan_encapsulated_proto;
2171 	} else if (!vlan_tx_tag_present(skb)) {
2172 		return harmonize_features(skb, protocol, features);
2173 	}
2174 
2175 	features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_TX);
2176 
2177 	if (protocol != htons(ETH_P_8021Q)) {
2178 		return harmonize_features(skb, protocol, features);
2179 	} else {
2180 		features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
2181 				NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_TX;
2182 		return harmonize_features(skb, protocol, features);
2183 	}
2184 }
2185 EXPORT_SYMBOL(netif_skb_features);
2186 
2187 /*
2188  * Returns true if either:
2189  *	1. skb has frag_list and the device doesn't support FRAGLIST, or
2190  *	2. skb is fragmented and the device does not support SG, or if
2191  *	   at least one of fragments is in highmem and device does not
2192  *	   support DMA from it.
2193  */
2194 static inline int skb_needs_linearize(struct sk_buff *skb,
2195 				      int features)
2196 {
2197 	return skb_is_nonlinear(skb) &&
2198 			((skb_has_frag_list(skb) &&
2199 				!(features & NETIF_F_FRAGLIST)) ||
2200 			(skb_shinfo(skb)->nr_frags &&
2201 				!(features & NETIF_F_SG)));
2202 }
2203 
2204 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2205 			struct netdev_queue *txq)
2206 {
2207 	const struct net_device_ops *ops = dev->netdev_ops;
2208 	int rc = NETDEV_TX_OK;
2209 	unsigned int skb_len;
2210 
2211 	if (likely(!skb->next)) {
2212 		netdev_features_t features;
2213 
2214 		/*
2215 		 * If device doesn't need skb->dst, release it right now while
2216 		 * its hot in this cpu cache
2217 		 */
2218 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
2219 			skb_dst_drop(skb);
2220 
2221 		if (!list_empty(&ptype_all))
2222 			dev_queue_xmit_nit(skb, dev);
2223 
2224 		skb_orphan_try(skb);
2225 
2226 		features = netif_skb_features(skb);
2227 
2228 		if (vlan_tx_tag_present(skb) &&
2229 		    !(features & NETIF_F_HW_VLAN_TX)) {
2230 			skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
2231 			if (unlikely(!skb))
2232 				goto out;
2233 
2234 			skb->vlan_tci = 0;
2235 		}
2236 
2237 		if (netif_needs_gso(skb, features)) {
2238 			if (unlikely(dev_gso_segment(skb, features)))
2239 				goto out_kfree_skb;
2240 			if (skb->next)
2241 				goto gso;
2242 		} else {
2243 			if (skb_needs_linearize(skb, features) &&
2244 			    __skb_linearize(skb))
2245 				goto out_kfree_skb;
2246 
2247 			/* If packet is not checksummed and device does not
2248 			 * support checksumming for this protocol, complete
2249 			 * checksumming here.
2250 			 */
2251 			if (skb->ip_summed == CHECKSUM_PARTIAL) {
2252 				skb_set_transport_header(skb,
2253 					skb_checksum_start_offset(skb));
2254 				if (!(features & NETIF_F_ALL_CSUM) &&
2255 				     skb_checksum_help(skb))
2256 					goto out_kfree_skb;
2257 			}
2258 		}
2259 
2260 		skb_len = skb->len;
2261 		rc = ops->ndo_start_xmit(skb, dev);
2262 		trace_net_dev_xmit(skb, rc, dev, skb_len);
2263 		if (rc == NETDEV_TX_OK)
2264 			txq_trans_update(txq);
2265 		return rc;
2266 	}
2267 
2268 gso:
2269 	do {
2270 		struct sk_buff *nskb = skb->next;
2271 
2272 		skb->next = nskb->next;
2273 		nskb->next = NULL;
2274 
2275 		/*
2276 		 * If device doesn't need nskb->dst, release it right now while
2277 		 * its hot in this cpu cache
2278 		 */
2279 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
2280 			skb_dst_drop(nskb);
2281 
2282 		skb_len = nskb->len;
2283 		rc = ops->ndo_start_xmit(nskb, dev);
2284 		trace_net_dev_xmit(nskb, rc, dev, skb_len);
2285 		if (unlikely(rc != NETDEV_TX_OK)) {
2286 			if (rc & ~NETDEV_TX_MASK)
2287 				goto out_kfree_gso_skb;
2288 			nskb->next = skb->next;
2289 			skb->next = nskb;
2290 			return rc;
2291 		}
2292 		txq_trans_update(txq);
2293 		if (unlikely(netif_xmit_stopped(txq) && skb->next))
2294 			return NETDEV_TX_BUSY;
2295 	} while (skb->next);
2296 
2297 out_kfree_gso_skb:
2298 	if (likely(skb->next == NULL))
2299 		skb->destructor = DEV_GSO_CB(skb)->destructor;
2300 out_kfree_skb:
2301 	kfree_skb(skb);
2302 out:
2303 	return rc;
2304 }
2305 
2306 static u32 hashrnd __read_mostly;
2307 
2308 /*
2309  * Returns a Tx hash based on the given packet descriptor a Tx queues' number
2310  * to be used as a distribution range.
2311  */
2312 u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
2313 		  unsigned int num_tx_queues)
2314 {
2315 	u32 hash;
2316 	u16 qoffset = 0;
2317 	u16 qcount = num_tx_queues;
2318 
2319 	if (skb_rx_queue_recorded(skb)) {
2320 		hash = skb_get_rx_queue(skb);
2321 		while (unlikely(hash >= num_tx_queues))
2322 			hash -= num_tx_queues;
2323 		return hash;
2324 	}
2325 
2326 	if (dev->num_tc) {
2327 		u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
2328 		qoffset = dev->tc_to_txq[tc].offset;
2329 		qcount = dev->tc_to_txq[tc].count;
2330 	}
2331 
2332 	if (skb->sk && skb->sk->sk_hash)
2333 		hash = skb->sk->sk_hash;
2334 	else
2335 		hash = (__force u16) skb->protocol ^ skb->rxhash;
2336 	hash = jhash_1word(hash, hashrnd);
2337 
2338 	return (u16) (((u64) hash * qcount) >> 32) + qoffset;
2339 }
2340 EXPORT_SYMBOL(__skb_tx_hash);
2341 
2342 static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
2343 {
2344 	if (unlikely(queue_index >= dev->real_num_tx_queues)) {
2345 		if (net_ratelimit()) {
2346 			pr_warn("%s selects TX queue %d, but real number of TX queues is %d\n",
2347 				dev->name, queue_index,
2348 				dev->real_num_tx_queues);
2349 		}
2350 		return 0;
2351 	}
2352 	return queue_index;
2353 }
2354 
2355 static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
2356 {
2357 #ifdef CONFIG_XPS
2358 	struct xps_dev_maps *dev_maps;
2359 	struct xps_map *map;
2360 	int queue_index = -1;
2361 
2362 	rcu_read_lock();
2363 	dev_maps = rcu_dereference(dev->xps_maps);
2364 	if (dev_maps) {
2365 		map = rcu_dereference(
2366 		    dev_maps->cpu_map[raw_smp_processor_id()]);
2367 		if (map) {
2368 			if (map->len == 1)
2369 				queue_index = map->queues[0];
2370 			else {
2371 				u32 hash;
2372 				if (skb->sk && skb->sk->sk_hash)
2373 					hash = skb->sk->sk_hash;
2374 				else
2375 					hash = (__force u16) skb->protocol ^
2376 					    skb->rxhash;
2377 				hash = jhash_1word(hash, hashrnd);
2378 				queue_index = map->queues[
2379 				    ((u64)hash * map->len) >> 32];
2380 			}
2381 			if (unlikely(queue_index >= dev->real_num_tx_queues))
2382 				queue_index = -1;
2383 		}
2384 	}
2385 	rcu_read_unlock();
2386 
2387 	return queue_index;
2388 #else
2389 	return -1;
2390 #endif
2391 }
2392 
2393 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
2394 					struct sk_buff *skb)
2395 {
2396 	int queue_index;
2397 	const struct net_device_ops *ops = dev->netdev_ops;
2398 
2399 	if (dev->real_num_tx_queues == 1)
2400 		queue_index = 0;
2401 	else if (ops->ndo_select_queue) {
2402 		queue_index = ops->ndo_select_queue(dev, skb);
2403 		queue_index = dev_cap_txqueue(dev, queue_index);
2404 	} else {
2405 		struct sock *sk = skb->sk;
2406 		queue_index = sk_tx_queue_get(sk);
2407 
2408 		if (queue_index < 0 || skb->ooo_okay ||
2409 		    queue_index >= dev->real_num_tx_queues) {
2410 			int old_index = queue_index;
2411 
2412 			queue_index = get_xps_queue(dev, skb);
2413 			if (queue_index < 0)
2414 				queue_index = skb_tx_hash(dev, skb);
2415 
2416 			if (queue_index != old_index && sk) {
2417 				struct dst_entry *dst =
2418 				    rcu_dereference_check(sk->sk_dst_cache, 1);
2419 
2420 				if (dst && skb_dst(skb) == dst)
2421 					sk_tx_queue_set(sk, queue_index);
2422 			}
2423 		}
2424 	}
2425 
2426 	skb_set_queue_mapping(skb, queue_index);
2427 	return netdev_get_tx_queue(dev, queue_index);
2428 }
2429 
2430 static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2431 				 struct net_device *dev,
2432 				 struct netdev_queue *txq)
2433 {
2434 	spinlock_t *root_lock = qdisc_lock(q);
2435 	bool contended;
2436 	int rc;
2437 
2438 	qdisc_skb_cb(skb)->pkt_len = skb->len;
2439 	qdisc_calculate_pkt_len(skb, q);
2440 	/*
2441 	 * Heuristic to force contended enqueues to serialize on a
2442 	 * separate lock before trying to get qdisc main lock.
2443 	 * This permits __QDISC_STATE_RUNNING owner to get the lock more often
2444 	 * and dequeue packets faster.
2445 	 */
2446 	contended = qdisc_is_running(q);
2447 	if (unlikely(contended))
2448 		spin_lock(&q->busylock);
2449 
2450 	spin_lock(root_lock);
2451 	if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
2452 		kfree_skb(skb);
2453 		rc = NET_XMIT_DROP;
2454 	} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
2455 		   qdisc_run_begin(q)) {
2456 		/*
2457 		 * This is a work-conserving queue; there are no old skbs
2458 		 * waiting to be sent out; and the qdisc is not running -
2459 		 * xmit the skb directly.
2460 		 */
2461 		if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
2462 			skb_dst_force(skb);
2463 
2464 		qdisc_bstats_update(q, skb);
2465 
2466 		if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
2467 			if (unlikely(contended)) {
2468 				spin_unlock(&q->busylock);
2469 				contended = false;
2470 			}
2471 			__qdisc_run(q);
2472 		} else
2473 			qdisc_run_end(q);
2474 
2475 		rc = NET_XMIT_SUCCESS;
2476 	} else {
2477 		skb_dst_force(skb);
2478 		rc = q->enqueue(skb, q) & NET_XMIT_MASK;
2479 		if (qdisc_run_begin(q)) {
2480 			if (unlikely(contended)) {
2481 				spin_unlock(&q->busylock);
2482 				contended = false;
2483 			}
2484 			__qdisc_run(q);
2485 		}
2486 	}
2487 	spin_unlock(root_lock);
2488 	if (unlikely(contended))
2489 		spin_unlock(&q->busylock);
2490 	return rc;
2491 }
2492 
2493 #if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
2494 static void skb_update_prio(struct sk_buff *skb)
2495 {
2496 	struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
2497 
2498 	if ((!skb->priority) && (skb->sk) && map)
2499 		skb->priority = map->priomap[skb->sk->sk_cgrp_prioidx];
2500 }
2501 #else
2502 #define skb_update_prio(skb)
2503 #endif
2504 
2505 static DEFINE_PER_CPU(int, xmit_recursion);
2506 #define RECURSION_LIMIT 10
2507 
2508 /**
2509  *	dev_queue_xmit - transmit a buffer
2510  *	@skb: buffer to transmit
2511  *
2512  *	Queue a buffer for transmission to a network device. The caller must
2513  *	have set the device and priority and built the buffer before calling
2514  *	this function. The function can be called from an interrupt.
2515  *
2516  *	A negative errno code is returned on a failure. A success does not
2517  *	guarantee the frame will be transmitted as it may be dropped due
2518  *	to congestion or traffic shaping.
2519  *
2520  * -----------------------------------------------------------------------------------
2521  *      I notice this method can also return errors from the queue disciplines,
2522  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
2523  *      be positive.
2524  *
2525  *      Regardless of the return value, the skb is consumed, so it is currently
2526  *      difficult to retry a send to this method.  (You can bump the ref count
2527  *      before sending to hold a reference for retry if you are careful.)
2528  *
2529  *      When calling this method, interrupts MUST be enabled.  This is because
2530  *      the BH enable code must have IRQs enabled so that it will not deadlock.
2531  *          --BLG
2532  */
2533 int dev_queue_xmit(struct sk_buff *skb)
2534 {
2535 	struct net_device *dev = skb->dev;
2536 	struct netdev_queue *txq;
2537 	struct Qdisc *q;
2538 	int rc = -ENOMEM;
2539 
2540 	/* Disable soft irqs for various locks below. Also
2541 	 * stops preemption for RCU.
2542 	 */
2543 	rcu_read_lock_bh();
2544 
2545 	skb_update_prio(skb);
2546 
2547 	txq = dev_pick_tx(dev, skb);
2548 	q = rcu_dereference_bh(txq->qdisc);
2549 
2550 #ifdef CONFIG_NET_CLS_ACT
2551 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
2552 #endif
2553 	trace_net_dev_queue(skb);
2554 	if (q->enqueue) {
2555 		rc = __dev_xmit_skb(skb, q, dev, txq);
2556 		goto out;
2557 	}
2558 
2559 	/* The device has no queue. Common case for software devices:
2560 	   loopback, all the sorts of tunnels...
2561 
2562 	   Really, it is unlikely that netif_tx_lock protection is necessary
2563 	   here.  (f.e. loopback and IP tunnels are clean ignoring statistics
2564 	   counters.)
2565 	   However, it is possible, that they rely on protection
2566 	   made by us here.
2567 
2568 	   Check this and shot the lock. It is not prone from deadlocks.
2569 	   Either shot noqueue qdisc, it is even simpler 8)
2570 	 */
2571 	if (dev->flags & IFF_UP) {
2572 		int cpu = smp_processor_id(); /* ok because BHs are off */
2573 
2574 		if (txq->xmit_lock_owner != cpu) {
2575 
2576 			if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
2577 				goto recursion_alert;
2578 
2579 			HARD_TX_LOCK(dev, txq, cpu);
2580 
2581 			if (!netif_xmit_stopped(txq)) {
2582 				__this_cpu_inc(xmit_recursion);
2583 				rc = dev_hard_start_xmit(skb, dev, txq);
2584 				__this_cpu_dec(xmit_recursion);
2585 				if (dev_xmit_complete(rc)) {
2586 					HARD_TX_UNLOCK(dev, txq);
2587 					goto out;
2588 				}
2589 			}
2590 			HARD_TX_UNLOCK(dev, txq);
2591 			if (net_ratelimit())
2592 				pr_crit("Virtual device %s asks to queue packet!\n",
2593 					dev->name);
2594 		} else {
2595 			/* Recursion is detected! It is possible,
2596 			 * unfortunately
2597 			 */
2598 recursion_alert:
2599 			if (net_ratelimit())
2600 				pr_crit("Dead loop on virtual device %s, fix it urgently!\n",
2601 					dev->name);
2602 		}
2603 	}
2604 
2605 	rc = -ENETDOWN;
2606 	rcu_read_unlock_bh();
2607 
2608 	kfree_skb(skb);
2609 	return rc;
2610 out:
2611 	rcu_read_unlock_bh();
2612 	return rc;
2613 }
2614 EXPORT_SYMBOL(dev_queue_xmit);
2615 
2616 
2617 /*=======================================================================
2618 			Receiver routines
2619   =======================================================================*/
2620 
2621 int netdev_max_backlog __read_mostly = 1000;
2622 int netdev_tstamp_prequeue __read_mostly = 1;
2623 int netdev_budget __read_mostly = 300;
2624 int weight_p __read_mostly = 64;            /* old backlog weight */
2625 
2626 /* Called with irq disabled */
2627 static inline void ____napi_schedule(struct softnet_data *sd,
2628 				     struct napi_struct *napi)
2629 {
2630 	list_add_tail(&napi->poll_list, &sd->poll_list);
2631 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2632 }
2633 
2634 /*
2635  * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
2636  * and src/dst port numbers.  Sets rxhash in skb to non-zero hash value
2637  * on success, zero indicates no valid hash.  Also, sets l4_rxhash in skb
2638  * if hash is a canonical 4-tuple hash over transport ports.
2639  */
2640 void __skb_get_rxhash(struct sk_buff *skb)
2641 {
2642 	struct flow_keys keys;
2643 	u32 hash;
2644 
2645 	if (!skb_flow_dissect(skb, &keys))
2646 		return;
2647 
2648 	if (keys.ports) {
2649 		if ((__force u16)keys.port16[1] < (__force u16)keys.port16[0])
2650 			swap(keys.port16[0], keys.port16[1]);
2651 		skb->l4_rxhash = 1;
2652 	}
2653 
2654 	/* get a consistent hash (same value on both flow directions) */
2655 	if ((__force u32)keys.dst < (__force u32)keys.src)
2656 		swap(keys.dst, keys.src);
2657 
2658 	hash = jhash_3words((__force u32)keys.dst,
2659 			    (__force u32)keys.src,
2660 			    (__force u32)keys.ports, hashrnd);
2661 	if (!hash)
2662 		hash = 1;
2663 
2664 	skb->rxhash = hash;
2665 }
2666 EXPORT_SYMBOL(__skb_get_rxhash);
2667 
2668 #ifdef CONFIG_RPS
2669 
2670 /* One global table that all flow-based protocols share. */
2671 struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
2672 EXPORT_SYMBOL(rps_sock_flow_table);
2673 
2674 struct static_key rps_needed __read_mostly;
2675 
2676 static struct rps_dev_flow *
2677 set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2678 	    struct rps_dev_flow *rflow, u16 next_cpu)
2679 {
2680 	if (next_cpu != RPS_NO_CPU) {
2681 #ifdef CONFIG_RFS_ACCEL
2682 		struct netdev_rx_queue *rxqueue;
2683 		struct rps_dev_flow_table *flow_table;
2684 		struct rps_dev_flow *old_rflow;
2685 		u32 flow_id;
2686 		u16 rxq_index;
2687 		int rc;
2688 
2689 		/* Should we steer this flow to a different hardware queue? */
2690 		if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
2691 		    !(dev->features & NETIF_F_NTUPLE))
2692 			goto out;
2693 		rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
2694 		if (rxq_index == skb_get_rx_queue(skb))
2695 			goto out;
2696 
2697 		rxqueue = dev->_rx + rxq_index;
2698 		flow_table = rcu_dereference(rxqueue->rps_flow_table);
2699 		if (!flow_table)
2700 			goto out;
2701 		flow_id = skb->rxhash & flow_table->mask;
2702 		rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
2703 							rxq_index, flow_id);
2704 		if (rc < 0)
2705 			goto out;
2706 		old_rflow = rflow;
2707 		rflow = &flow_table->flows[flow_id];
2708 		rflow->filter = rc;
2709 		if (old_rflow->filter == rflow->filter)
2710 			old_rflow->filter = RPS_NO_FILTER;
2711 	out:
2712 #endif
2713 		rflow->last_qtail =
2714 			per_cpu(softnet_data, next_cpu).input_queue_head;
2715 	}
2716 
2717 	rflow->cpu = next_cpu;
2718 	return rflow;
2719 }
2720 
2721 /*
2722  * get_rps_cpu is called from netif_receive_skb and returns the target
2723  * CPU from the RPS map of the receiving queue for a given skb.
2724  * rcu_read_lock must be held on entry.
2725  */
2726 static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2727 		       struct rps_dev_flow **rflowp)
2728 {
2729 	struct netdev_rx_queue *rxqueue;
2730 	struct rps_map *map;
2731 	struct rps_dev_flow_table *flow_table;
2732 	struct rps_sock_flow_table *sock_flow_table;
2733 	int cpu = -1;
2734 	u16 tcpu;
2735 
2736 	if (skb_rx_queue_recorded(skb)) {
2737 		u16 index = skb_get_rx_queue(skb);
2738 		if (unlikely(index >= dev->real_num_rx_queues)) {
2739 			WARN_ONCE(dev->real_num_rx_queues > 1,
2740 				  "%s received packet on queue %u, but number "
2741 				  "of RX queues is %u\n",
2742 				  dev->name, index, dev->real_num_rx_queues);
2743 			goto done;
2744 		}
2745 		rxqueue = dev->_rx + index;
2746 	} else
2747 		rxqueue = dev->_rx;
2748 
2749 	map = rcu_dereference(rxqueue->rps_map);
2750 	if (map) {
2751 		if (map->len == 1 &&
2752 		    !rcu_access_pointer(rxqueue->rps_flow_table)) {
2753 			tcpu = map->cpus[0];
2754 			if (cpu_online(tcpu))
2755 				cpu = tcpu;
2756 			goto done;
2757 		}
2758 	} else if (!rcu_access_pointer(rxqueue->rps_flow_table)) {
2759 		goto done;
2760 	}
2761 
2762 	skb_reset_network_header(skb);
2763 	if (!skb_get_rxhash(skb))
2764 		goto done;
2765 
2766 	flow_table = rcu_dereference(rxqueue->rps_flow_table);
2767 	sock_flow_table = rcu_dereference(rps_sock_flow_table);
2768 	if (flow_table && sock_flow_table) {
2769 		u16 next_cpu;
2770 		struct rps_dev_flow *rflow;
2771 
2772 		rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
2773 		tcpu = rflow->cpu;
2774 
2775 		next_cpu = sock_flow_table->ents[skb->rxhash &
2776 		    sock_flow_table->mask];
2777 
2778 		/*
2779 		 * If the desired CPU (where last recvmsg was done) is
2780 		 * different from current CPU (one in the rx-queue flow
2781 		 * table entry), switch if one of the following holds:
2782 		 *   - Current CPU is unset (equal to RPS_NO_CPU).
2783 		 *   - Current CPU is offline.
2784 		 *   - The current CPU's queue tail has advanced beyond the
2785 		 *     last packet that was enqueued using this table entry.
2786 		 *     This guarantees that all previous packets for the flow
2787 		 *     have been dequeued, thus preserving in order delivery.
2788 		 */
2789 		if (unlikely(tcpu != next_cpu) &&
2790 		    (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
2791 		     ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
2792 		      rflow->last_qtail)) >= 0))
2793 			rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
2794 
2795 		if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
2796 			*rflowp = rflow;
2797 			cpu = tcpu;
2798 			goto done;
2799 		}
2800 	}
2801 
2802 	if (map) {
2803 		tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
2804 
2805 		if (cpu_online(tcpu)) {
2806 			cpu = tcpu;
2807 			goto done;
2808 		}
2809 	}
2810 
2811 done:
2812 	return cpu;
2813 }
2814 
2815 #ifdef CONFIG_RFS_ACCEL
2816 
2817 /**
2818  * rps_may_expire_flow - check whether an RFS hardware filter may be removed
2819  * @dev: Device on which the filter was set
2820  * @rxq_index: RX queue index
2821  * @flow_id: Flow ID passed to ndo_rx_flow_steer()
2822  * @filter_id: Filter ID returned by ndo_rx_flow_steer()
2823  *
2824  * Drivers that implement ndo_rx_flow_steer() should periodically call
2825  * this function for each installed filter and remove the filters for
2826  * which it returns %true.
2827  */
2828 bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
2829 			 u32 flow_id, u16 filter_id)
2830 {
2831 	struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
2832 	struct rps_dev_flow_table *flow_table;
2833 	struct rps_dev_flow *rflow;
2834 	bool expire = true;
2835 	int cpu;
2836 
2837 	rcu_read_lock();
2838 	flow_table = rcu_dereference(rxqueue->rps_flow_table);
2839 	if (flow_table && flow_id <= flow_table->mask) {
2840 		rflow = &flow_table->flows[flow_id];
2841 		cpu = ACCESS_ONCE(rflow->cpu);
2842 		if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&
2843 		    ((int)(per_cpu(softnet_data, cpu).input_queue_head -
2844 			   rflow->last_qtail) <
2845 		     (int)(10 * flow_table->mask)))
2846 			expire = false;
2847 	}
2848 	rcu_read_unlock();
2849 	return expire;
2850 }
2851 EXPORT_SYMBOL(rps_may_expire_flow);
2852 
2853 #endif /* CONFIG_RFS_ACCEL */
2854 
2855 /* Called from hardirq (IPI) context */
2856 static void rps_trigger_softirq(void *data)
2857 {
2858 	struct softnet_data *sd = data;
2859 
2860 	____napi_schedule(sd, &sd->backlog);
2861 	sd->received_rps++;
2862 }
2863 
2864 #endif /* CONFIG_RPS */
2865 
2866 /*
2867  * Check if this softnet_data structure is another cpu one
2868  * If yes, queue it to our IPI list and return 1
2869  * If no, return 0
2870  */
2871 static int rps_ipi_queued(struct softnet_data *sd)
2872 {
2873 #ifdef CONFIG_RPS
2874 	struct softnet_data *mysd = &__get_cpu_var(softnet_data);
2875 
2876 	if (sd != mysd) {
2877 		sd->rps_ipi_next = mysd->rps_ipi_list;
2878 		mysd->rps_ipi_list = sd;
2879 
2880 		__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2881 		return 1;
2882 	}
2883 #endif /* CONFIG_RPS */
2884 	return 0;
2885 }
2886 
2887 /*
2888  * enqueue_to_backlog is called to queue an skb to a per CPU backlog
2889  * queue (may be a remote CPU queue).
2890  */
2891 static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
2892 			      unsigned int *qtail)
2893 {
2894 	struct softnet_data *sd;
2895 	unsigned long flags;
2896 
2897 	sd = &per_cpu(softnet_data, cpu);
2898 
2899 	local_irq_save(flags);
2900 
2901 	rps_lock(sd);
2902 	if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) {
2903 		if (skb_queue_len(&sd->input_pkt_queue)) {
2904 enqueue:
2905 			__skb_queue_tail(&sd->input_pkt_queue, skb);
2906 			input_queue_tail_incr_save(sd, qtail);
2907 			rps_unlock(sd);
2908 			local_irq_restore(flags);
2909 			return NET_RX_SUCCESS;
2910 		}
2911 
2912 		/* Schedule NAPI for backlog device
2913 		 * We can use non atomic operation since we own the queue lock
2914 		 */
2915 		if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
2916 			if (!rps_ipi_queued(sd))
2917 				____napi_schedule(sd, &sd->backlog);
2918 		}
2919 		goto enqueue;
2920 	}
2921 
2922 	sd->dropped++;
2923 	rps_unlock(sd);
2924 
2925 	local_irq_restore(flags);
2926 
2927 	atomic_long_inc(&skb->dev->rx_dropped);
2928 	kfree_skb(skb);
2929 	return NET_RX_DROP;
2930 }
2931 
2932 /**
2933  *	netif_rx	-	post buffer to the network code
2934  *	@skb: buffer to post
2935  *
2936  *	This function receives a packet from a device driver and queues it for
2937  *	the upper (protocol) levels to process.  It always succeeds. The buffer
2938  *	may be dropped during processing for congestion control or by the
2939  *	protocol layers.
2940  *
2941  *	return values:
2942  *	NET_RX_SUCCESS	(no congestion)
2943  *	NET_RX_DROP     (packet was dropped)
2944  *
2945  */
2946 
2947 int netif_rx(struct sk_buff *skb)
2948 {
2949 	int ret;
2950 
2951 	/* if netpoll wants it, pretend we never saw it */
2952 	if (netpoll_rx(skb))
2953 		return NET_RX_DROP;
2954 
2955 	net_timestamp_check(netdev_tstamp_prequeue, skb);
2956 
2957 	trace_netif_rx(skb);
2958 #ifdef CONFIG_RPS
2959 	if (static_key_false(&rps_needed)) {
2960 		struct rps_dev_flow voidflow, *rflow = &voidflow;
2961 		int cpu;
2962 
2963 		preempt_disable();
2964 		rcu_read_lock();
2965 
2966 		cpu = get_rps_cpu(skb->dev, skb, &rflow);
2967 		if (cpu < 0)
2968 			cpu = smp_processor_id();
2969 
2970 		ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
2971 
2972 		rcu_read_unlock();
2973 		preempt_enable();
2974 	} else
2975 #endif
2976 	{
2977 		unsigned int qtail;
2978 		ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
2979 		put_cpu();
2980 	}
2981 	return ret;
2982 }
2983 EXPORT_SYMBOL(netif_rx);
2984 
2985 int netif_rx_ni(struct sk_buff *skb)
2986 {
2987 	int err;
2988 
2989 	preempt_disable();
2990 	err = netif_rx(skb);
2991 	if (local_softirq_pending())
2992 		do_softirq();
2993 	preempt_enable();
2994 
2995 	return err;
2996 }
2997 EXPORT_SYMBOL(netif_rx_ni);
2998 
2999 static void net_tx_action(struct softirq_action *h)
3000 {
3001 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
3002 
3003 	if (sd->completion_queue) {
3004 		struct sk_buff *clist;
3005 
3006 		local_irq_disable();
3007 		clist = sd->completion_queue;
3008 		sd->completion_queue = NULL;
3009 		local_irq_enable();
3010 
3011 		while (clist) {
3012 			struct sk_buff *skb = clist;
3013 			clist = clist->next;
3014 
3015 			WARN_ON(atomic_read(&skb->users));
3016 			trace_kfree_skb(skb, net_tx_action);
3017 			__kfree_skb(skb);
3018 		}
3019 	}
3020 
3021 	if (sd->output_queue) {
3022 		struct Qdisc *head;
3023 
3024 		local_irq_disable();
3025 		head = sd->output_queue;
3026 		sd->output_queue = NULL;
3027 		sd->output_queue_tailp = &sd->output_queue;
3028 		local_irq_enable();
3029 
3030 		while (head) {
3031 			struct Qdisc *q = head;
3032 			spinlock_t *root_lock;
3033 
3034 			head = head->next_sched;
3035 
3036 			root_lock = qdisc_lock(q);
3037 			if (spin_trylock(root_lock)) {
3038 				smp_mb__before_clear_bit();
3039 				clear_bit(__QDISC_STATE_SCHED,
3040 					  &q->state);
3041 				qdisc_run(q);
3042 				spin_unlock(root_lock);
3043 			} else {
3044 				if (!test_bit(__QDISC_STATE_DEACTIVATED,
3045 					      &q->state)) {
3046 					__netif_reschedule(q);
3047 				} else {
3048 					smp_mb__before_clear_bit();
3049 					clear_bit(__QDISC_STATE_SCHED,
3050 						  &q->state);
3051 				}
3052 			}
3053 		}
3054 	}
3055 }
3056 
3057 #if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
3058     (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
3059 /* This hook is defined here for ATM LANE */
3060 int (*br_fdb_test_addr_hook)(struct net_device *dev,
3061 			     unsigned char *addr) __read_mostly;
3062 EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
3063 #endif
3064 
3065 #ifdef CONFIG_NET_CLS_ACT
3066 /* TODO: Maybe we should just force sch_ingress to be compiled in
3067  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
3068  * a compare and 2 stores extra right now if we dont have it on
3069  * but have CONFIG_NET_CLS_ACT
3070  * NOTE: This doesn't stop any functionality; if you dont have
3071  * the ingress scheduler, you just can't add policies on ingress.
3072  *
3073  */
3074 static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
3075 {
3076 	struct net_device *dev = skb->dev;
3077 	u32 ttl = G_TC_RTTL(skb->tc_verd);
3078 	int result = TC_ACT_OK;
3079 	struct Qdisc *q;
3080 
3081 	if (unlikely(MAX_RED_LOOP < ttl++)) {
3082 		if (net_ratelimit())
3083 			pr_warn("Redir loop detected Dropping packet (%d->%d)\n",
3084 				skb->skb_iif, dev->ifindex);
3085 		return TC_ACT_SHOT;
3086 	}
3087 
3088 	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
3089 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
3090 
3091 	q = rxq->qdisc;
3092 	if (q != &noop_qdisc) {
3093 		spin_lock(qdisc_lock(q));
3094 		if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
3095 			result = qdisc_enqueue_root(skb, q);
3096 		spin_unlock(qdisc_lock(q));
3097 	}
3098 
3099 	return result;
3100 }
3101 
3102 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
3103 					 struct packet_type **pt_prev,
3104 					 int *ret, struct net_device *orig_dev)
3105 {
3106 	struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
3107 
3108 	if (!rxq || rxq->qdisc == &noop_qdisc)
3109 		goto out;
3110 
3111 	if (*pt_prev) {
3112 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
3113 		*pt_prev = NULL;
3114 	}
3115 
3116 	switch (ing_filter(skb, rxq)) {
3117 	case TC_ACT_SHOT:
3118 	case TC_ACT_STOLEN:
3119 		kfree_skb(skb);
3120 		return NULL;
3121 	}
3122 
3123 out:
3124 	skb->tc_verd = 0;
3125 	return skb;
3126 }
3127 #endif
3128 
3129 /**
3130  *	netdev_rx_handler_register - register receive handler
3131  *	@dev: device to register a handler for
3132  *	@rx_handler: receive handler to register
3133  *	@rx_handler_data: data pointer that is used by rx handler
3134  *
3135  *	Register a receive hander for a device. This handler will then be
3136  *	called from __netif_receive_skb. A negative errno code is returned
3137  *	on a failure.
3138  *
3139  *	The caller must hold the rtnl_mutex.
3140  *
3141  *	For a general description of rx_handler, see enum rx_handler_result.
3142  */
3143 int netdev_rx_handler_register(struct net_device *dev,
3144 			       rx_handler_func_t *rx_handler,
3145 			       void *rx_handler_data)
3146 {
3147 	ASSERT_RTNL();
3148 
3149 	if (dev->rx_handler)
3150 		return -EBUSY;
3151 
3152 	rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
3153 	rcu_assign_pointer(dev->rx_handler, rx_handler);
3154 
3155 	return 0;
3156 }
3157 EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
3158 
3159 /**
3160  *	netdev_rx_handler_unregister - unregister receive handler
3161  *	@dev: device to unregister a handler from
3162  *
3163  *	Unregister a receive hander from a device.
3164  *
3165  *	The caller must hold the rtnl_mutex.
3166  */
3167 void netdev_rx_handler_unregister(struct net_device *dev)
3168 {
3169 
3170 	ASSERT_RTNL();
3171 	RCU_INIT_POINTER(dev->rx_handler, NULL);
3172 	RCU_INIT_POINTER(dev->rx_handler_data, NULL);
3173 }
3174 EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
3175 
3176 static int __netif_receive_skb(struct sk_buff *skb)
3177 {
3178 	struct packet_type *ptype, *pt_prev;
3179 	rx_handler_func_t *rx_handler;
3180 	struct net_device *orig_dev;
3181 	struct net_device *null_or_dev;
3182 	bool deliver_exact = false;
3183 	int ret = NET_RX_DROP;
3184 	__be16 type;
3185 
3186 	net_timestamp_check(!netdev_tstamp_prequeue, skb);
3187 
3188 	trace_netif_receive_skb(skb);
3189 
3190 	/* if we've gotten here through NAPI, check netpoll */
3191 	if (netpoll_receive_skb(skb))
3192 		return NET_RX_DROP;
3193 
3194 	if (!skb->skb_iif)
3195 		skb->skb_iif = skb->dev->ifindex;
3196 	orig_dev = skb->dev;
3197 
3198 	skb_reset_network_header(skb);
3199 	skb_reset_transport_header(skb);
3200 	skb_reset_mac_len(skb);
3201 
3202 	pt_prev = NULL;
3203 
3204 	rcu_read_lock();
3205 
3206 another_round:
3207 
3208 	__this_cpu_inc(softnet_data.processed);
3209 
3210 	if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3211 		skb = vlan_untag(skb);
3212 		if (unlikely(!skb))
3213 			goto out;
3214 	}
3215 
3216 #ifdef CONFIG_NET_CLS_ACT
3217 	if (skb->tc_verd & TC_NCLS) {
3218 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
3219 		goto ncls;
3220 	}
3221 #endif
3222 
3223 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
3224 		if (!ptype->dev || ptype->dev == skb->dev) {
3225 			if (pt_prev)
3226 				ret = deliver_skb(skb, pt_prev, orig_dev);
3227 			pt_prev = ptype;
3228 		}
3229 	}
3230 
3231 #ifdef CONFIG_NET_CLS_ACT
3232 	skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
3233 	if (!skb)
3234 		goto out;
3235 ncls:
3236 #endif
3237 
3238 	rx_handler = rcu_dereference(skb->dev->rx_handler);
3239 	if (vlan_tx_tag_present(skb)) {
3240 		if (pt_prev) {
3241 			ret = deliver_skb(skb, pt_prev, orig_dev);
3242 			pt_prev = NULL;
3243 		}
3244 		if (vlan_do_receive(&skb, !rx_handler))
3245 			goto another_round;
3246 		else if (unlikely(!skb))
3247 			goto out;
3248 	}
3249 
3250 	if (rx_handler) {
3251 		if (pt_prev) {
3252 			ret = deliver_skb(skb, pt_prev, orig_dev);
3253 			pt_prev = NULL;
3254 		}
3255 		switch (rx_handler(&skb)) {
3256 		case RX_HANDLER_CONSUMED:
3257 			goto out;
3258 		case RX_HANDLER_ANOTHER:
3259 			goto another_round;
3260 		case RX_HANDLER_EXACT:
3261 			deliver_exact = true;
3262 		case RX_HANDLER_PASS:
3263 			break;
3264 		default:
3265 			BUG();
3266 		}
3267 	}
3268 
3269 	/* deliver only exact match when indicated */
3270 	null_or_dev = deliver_exact ? skb->dev : NULL;
3271 
3272 	type = skb->protocol;
3273 	list_for_each_entry_rcu(ptype,
3274 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
3275 		if (ptype->type == type &&
3276 		    (ptype->dev == null_or_dev || ptype->dev == skb->dev ||
3277 		     ptype->dev == orig_dev)) {
3278 			if (pt_prev)
3279 				ret = deliver_skb(skb, pt_prev, orig_dev);
3280 			pt_prev = ptype;
3281 		}
3282 	}
3283 
3284 	if (pt_prev) {
3285 		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
3286 	} else {
3287 		atomic_long_inc(&skb->dev->rx_dropped);
3288 		kfree_skb(skb);
3289 		/* Jamal, now you will not able to escape explaining
3290 		 * me how you were going to use this. :-)
3291 		 */
3292 		ret = NET_RX_DROP;
3293 	}
3294 
3295 out:
3296 	rcu_read_unlock();
3297 	return ret;
3298 }
3299 
3300 /**
3301  *	netif_receive_skb - process receive buffer from network
3302  *	@skb: buffer to process
3303  *
3304  *	netif_receive_skb() is the main receive data processing function.
3305  *	It always succeeds. The buffer may be dropped during processing
3306  *	for congestion control or by the protocol layers.
3307  *
3308  *	This function may only be called from softirq context and interrupts
3309  *	should be enabled.
3310  *
3311  *	Return values (usually ignored):
3312  *	NET_RX_SUCCESS: no congestion
3313  *	NET_RX_DROP: packet was dropped
3314  */
3315 int netif_receive_skb(struct sk_buff *skb)
3316 {
3317 	net_timestamp_check(netdev_tstamp_prequeue, skb);
3318 
3319 	if (skb_defer_rx_timestamp(skb))
3320 		return NET_RX_SUCCESS;
3321 
3322 #ifdef CONFIG_RPS
3323 	if (static_key_false(&rps_needed)) {
3324 		struct rps_dev_flow voidflow, *rflow = &voidflow;
3325 		int cpu, ret;
3326 
3327 		rcu_read_lock();
3328 
3329 		cpu = get_rps_cpu(skb->dev, skb, &rflow);
3330 
3331 		if (cpu >= 0) {
3332 			ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
3333 			rcu_read_unlock();
3334 			return ret;
3335 		}
3336 		rcu_read_unlock();
3337 	}
3338 #endif
3339 	return __netif_receive_skb(skb);
3340 }
3341 EXPORT_SYMBOL(netif_receive_skb);
3342 
3343 /* Network device is going away, flush any packets still pending
3344  * Called with irqs disabled.
3345  */
3346 static void flush_backlog(void *arg)
3347 {
3348 	struct net_device *dev = arg;
3349 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
3350 	struct sk_buff *skb, *tmp;
3351 
3352 	rps_lock(sd);
3353 	skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
3354 		if (skb->dev == dev) {
3355 			__skb_unlink(skb, &sd->input_pkt_queue);
3356 			kfree_skb(skb);
3357 			input_queue_head_incr(sd);
3358 		}
3359 	}
3360 	rps_unlock(sd);
3361 
3362 	skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
3363 		if (skb->dev == dev) {
3364 			__skb_unlink(skb, &sd->process_queue);
3365 			kfree_skb(skb);
3366 			input_queue_head_incr(sd);
3367 		}
3368 	}
3369 }
3370 
3371 static int napi_gro_complete(struct sk_buff *skb)
3372 {
3373 	struct packet_type *ptype;
3374 	__be16 type = skb->protocol;
3375 	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
3376 	int err = -ENOENT;
3377 
3378 	if (NAPI_GRO_CB(skb)->count == 1) {
3379 		skb_shinfo(skb)->gso_size = 0;
3380 		goto out;
3381 	}
3382 
3383 	rcu_read_lock();
3384 	list_for_each_entry_rcu(ptype, head, list) {
3385 		if (ptype->type != type || ptype->dev || !ptype->gro_complete)
3386 			continue;
3387 
3388 		err = ptype->gro_complete(skb);
3389 		break;
3390 	}
3391 	rcu_read_unlock();
3392 
3393 	if (err) {
3394 		WARN_ON(&ptype->list == head);
3395 		kfree_skb(skb);
3396 		return NET_RX_SUCCESS;
3397 	}
3398 
3399 out:
3400 	return netif_receive_skb(skb);
3401 }
3402 
3403 inline void napi_gro_flush(struct napi_struct *napi)
3404 {
3405 	struct sk_buff *skb, *next;
3406 
3407 	for (skb = napi->gro_list; skb; skb = next) {
3408 		next = skb->next;
3409 		skb->next = NULL;
3410 		napi_gro_complete(skb);
3411 	}
3412 
3413 	napi->gro_count = 0;
3414 	napi->gro_list = NULL;
3415 }
3416 EXPORT_SYMBOL(napi_gro_flush);
3417 
3418 enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3419 {
3420 	struct sk_buff **pp = NULL;
3421 	struct packet_type *ptype;
3422 	__be16 type = skb->protocol;
3423 	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
3424 	int same_flow;
3425 	int mac_len;
3426 	enum gro_result ret;
3427 
3428 	if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
3429 		goto normal;
3430 
3431 	if (skb_is_gso(skb) || skb_has_frag_list(skb))
3432 		goto normal;
3433 
3434 	rcu_read_lock();
3435 	list_for_each_entry_rcu(ptype, head, list) {
3436 		if (ptype->type != type || ptype->dev || !ptype->gro_receive)
3437 			continue;
3438 
3439 		skb_set_network_header(skb, skb_gro_offset(skb));
3440 		mac_len = skb->network_header - skb->mac_header;
3441 		skb->mac_len = mac_len;
3442 		NAPI_GRO_CB(skb)->same_flow = 0;
3443 		NAPI_GRO_CB(skb)->flush = 0;
3444 		NAPI_GRO_CB(skb)->free = 0;
3445 
3446 		pp = ptype->gro_receive(&napi->gro_list, skb);
3447 		break;
3448 	}
3449 	rcu_read_unlock();
3450 
3451 	if (&ptype->list == head)
3452 		goto normal;
3453 
3454 	same_flow = NAPI_GRO_CB(skb)->same_flow;
3455 	ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
3456 
3457 	if (pp) {
3458 		struct sk_buff *nskb = *pp;
3459 
3460 		*pp = nskb->next;
3461 		nskb->next = NULL;
3462 		napi_gro_complete(nskb);
3463 		napi->gro_count--;
3464 	}
3465 
3466 	if (same_flow)
3467 		goto ok;
3468 
3469 	if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
3470 		goto normal;
3471 
3472 	napi->gro_count++;
3473 	NAPI_GRO_CB(skb)->count = 1;
3474 	skb_shinfo(skb)->gso_size = skb_gro_len(skb);
3475 	skb->next = napi->gro_list;
3476 	napi->gro_list = skb;
3477 	ret = GRO_HELD;
3478 
3479 pull:
3480 	if (skb_headlen(skb) < skb_gro_offset(skb)) {
3481 		int grow = skb_gro_offset(skb) - skb_headlen(skb);
3482 
3483 		BUG_ON(skb->end - skb->tail < grow);
3484 
3485 		memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
3486 
3487 		skb->tail += grow;
3488 		skb->data_len -= grow;
3489 
3490 		skb_shinfo(skb)->frags[0].page_offset += grow;
3491 		skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow);
3492 
3493 		if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) {
3494 			skb_frag_unref(skb, 0);
3495 			memmove(skb_shinfo(skb)->frags,
3496 				skb_shinfo(skb)->frags + 1,
3497 				--skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
3498 		}
3499 	}
3500 
3501 ok:
3502 	return ret;
3503 
3504 normal:
3505 	ret = GRO_NORMAL;
3506 	goto pull;
3507 }
3508 EXPORT_SYMBOL(dev_gro_receive);
3509 
3510 static inline gro_result_t
3511 __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3512 {
3513 	struct sk_buff *p;
3514 	unsigned int maclen = skb->dev->hard_header_len;
3515 
3516 	for (p = napi->gro_list; p; p = p->next) {
3517 		unsigned long diffs;
3518 
3519 		diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
3520 		diffs |= p->vlan_tci ^ skb->vlan_tci;
3521 		if (maclen == ETH_HLEN)
3522 			diffs |= compare_ether_header(skb_mac_header(p),
3523 						      skb_gro_mac_header(skb));
3524 		else if (!diffs)
3525 			diffs = memcmp(skb_mac_header(p),
3526 				       skb_gro_mac_header(skb),
3527 				       maclen);
3528 		NAPI_GRO_CB(p)->same_flow = !diffs;
3529 		NAPI_GRO_CB(p)->flush = 0;
3530 	}
3531 
3532 	return dev_gro_receive(napi, skb);
3533 }
3534 
3535 gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
3536 {
3537 	switch (ret) {
3538 	case GRO_NORMAL:
3539 		if (netif_receive_skb(skb))
3540 			ret = GRO_DROP;
3541 		break;
3542 
3543 	case GRO_DROP:
3544 	case GRO_MERGED_FREE:
3545 		kfree_skb(skb);
3546 		break;
3547 
3548 	case GRO_HELD:
3549 	case GRO_MERGED:
3550 		break;
3551 	}
3552 
3553 	return ret;
3554 }
3555 EXPORT_SYMBOL(napi_skb_finish);
3556 
3557 void skb_gro_reset_offset(struct sk_buff *skb)
3558 {
3559 	NAPI_GRO_CB(skb)->data_offset = 0;
3560 	NAPI_GRO_CB(skb)->frag0 = NULL;
3561 	NAPI_GRO_CB(skb)->frag0_len = 0;
3562 
3563 	if (skb->mac_header == skb->tail &&
3564 	    !PageHighMem(skb_frag_page(&skb_shinfo(skb)->frags[0]))) {
3565 		NAPI_GRO_CB(skb)->frag0 =
3566 			skb_frag_address(&skb_shinfo(skb)->frags[0]);
3567 		NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(&skb_shinfo(skb)->frags[0]);
3568 	}
3569 }
3570 EXPORT_SYMBOL(skb_gro_reset_offset);
3571 
3572 gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3573 {
3574 	skb_gro_reset_offset(skb);
3575 
3576 	return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
3577 }
3578 EXPORT_SYMBOL(napi_gro_receive);
3579 
3580 static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
3581 {
3582 	__skb_pull(skb, skb_headlen(skb));
3583 	/* restore the reserve we had after netdev_alloc_skb_ip_align() */
3584 	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));
3585 	skb->vlan_tci = 0;
3586 	skb->dev = napi->dev;
3587 	skb->skb_iif = 0;
3588 
3589 	napi->skb = skb;
3590 }
3591 
3592 struct sk_buff *napi_get_frags(struct napi_struct *napi)
3593 {
3594 	struct sk_buff *skb = napi->skb;
3595 
3596 	if (!skb) {
3597 		skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
3598 		if (skb)
3599 			napi->skb = skb;
3600 	}
3601 	return skb;
3602 }
3603 EXPORT_SYMBOL(napi_get_frags);
3604 
3605 gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
3606 			       gro_result_t ret)
3607 {
3608 	switch (ret) {
3609 	case GRO_NORMAL:
3610 	case GRO_HELD:
3611 		skb->protocol = eth_type_trans(skb, skb->dev);
3612 
3613 		if (ret == GRO_HELD)
3614 			skb_gro_pull(skb, -ETH_HLEN);
3615 		else if (netif_receive_skb(skb))
3616 			ret = GRO_DROP;
3617 		break;
3618 
3619 	case GRO_DROP:
3620 	case GRO_MERGED_FREE:
3621 		napi_reuse_skb(napi, skb);
3622 		break;
3623 
3624 	case GRO_MERGED:
3625 		break;
3626 	}
3627 
3628 	return ret;
3629 }
3630 EXPORT_SYMBOL(napi_frags_finish);
3631 
3632 struct sk_buff *napi_frags_skb(struct napi_struct *napi)
3633 {
3634 	struct sk_buff *skb = napi->skb;
3635 	struct ethhdr *eth;
3636 	unsigned int hlen;
3637 	unsigned int off;
3638 
3639 	napi->skb = NULL;
3640 
3641 	skb_reset_mac_header(skb);
3642 	skb_gro_reset_offset(skb);
3643 
3644 	off = skb_gro_offset(skb);
3645 	hlen = off + sizeof(*eth);
3646 	eth = skb_gro_header_fast(skb, off);
3647 	if (skb_gro_header_hard(skb, hlen)) {
3648 		eth = skb_gro_header_slow(skb, hlen, off);
3649 		if (unlikely(!eth)) {
3650 			napi_reuse_skb(napi, skb);
3651 			skb = NULL;
3652 			goto out;
3653 		}
3654 	}
3655 
3656 	skb_gro_pull(skb, sizeof(*eth));
3657 
3658 	/*
3659 	 * This works because the only protocols we care about don't require
3660 	 * special handling.  We'll fix it up properly at the end.
3661 	 */
3662 	skb->protocol = eth->h_proto;
3663 
3664 out:
3665 	return skb;
3666 }
3667 EXPORT_SYMBOL(napi_frags_skb);
3668 
3669 gro_result_t napi_gro_frags(struct napi_struct *napi)
3670 {
3671 	struct sk_buff *skb = napi_frags_skb(napi);
3672 
3673 	if (!skb)
3674 		return GRO_DROP;
3675 
3676 	return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
3677 }
3678 EXPORT_SYMBOL(napi_gro_frags);
3679 
3680 /*
3681  * net_rps_action sends any pending IPI's for rps.
3682  * Note: called with local irq disabled, but exits with local irq enabled.
3683  */
3684 static void net_rps_action_and_irq_enable(struct softnet_data *sd)
3685 {
3686 #ifdef CONFIG_RPS
3687 	struct softnet_data *remsd = sd->rps_ipi_list;
3688 
3689 	if (remsd) {
3690 		sd->rps_ipi_list = NULL;
3691 
3692 		local_irq_enable();
3693 
3694 		/* Send pending IPI's to kick RPS processing on remote cpus. */
3695 		while (remsd) {
3696 			struct softnet_data *next = remsd->rps_ipi_next;
3697 
3698 			if (cpu_online(remsd->cpu))
3699 				__smp_call_function_single(remsd->cpu,
3700 							   &remsd->csd, 0);
3701 			remsd = next;
3702 		}
3703 	} else
3704 #endif
3705 		local_irq_enable();
3706 }
3707 
3708 static int process_backlog(struct napi_struct *napi, int quota)
3709 {
3710 	int work = 0;
3711 	struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
3712 
3713 #ifdef CONFIG_RPS
3714 	/* Check if we have pending ipi, its better to send them now,
3715 	 * not waiting net_rx_action() end.
3716 	 */
3717 	if (sd->rps_ipi_list) {
3718 		local_irq_disable();
3719 		net_rps_action_and_irq_enable(sd);
3720 	}
3721 #endif
3722 	napi->weight = weight_p;
3723 	local_irq_disable();
3724 	while (work < quota) {
3725 		struct sk_buff *skb;
3726 		unsigned int qlen;
3727 
3728 		while ((skb = __skb_dequeue(&sd->process_queue))) {
3729 			local_irq_enable();
3730 			__netif_receive_skb(skb);
3731 			local_irq_disable();
3732 			input_queue_head_incr(sd);
3733 			if (++work >= quota) {
3734 				local_irq_enable();
3735 				return work;
3736 			}
3737 		}
3738 
3739 		rps_lock(sd);
3740 		qlen = skb_queue_len(&sd->input_pkt_queue);
3741 		if (qlen)
3742 			skb_queue_splice_tail_init(&sd->input_pkt_queue,
3743 						   &sd->process_queue);
3744 
3745 		if (qlen < quota - work) {
3746 			/*
3747 			 * Inline a custom version of __napi_complete().
3748 			 * only current cpu owns and manipulates this napi,
3749 			 * and NAPI_STATE_SCHED is the only possible flag set on backlog.
3750 			 * we can use a plain write instead of clear_bit(),
3751 			 * and we dont need an smp_mb() memory barrier.
3752 			 */
3753 			list_del(&napi->poll_list);
3754 			napi->state = 0;
3755 
3756 			quota = work + qlen;
3757 		}
3758 		rps_unlock(sd);
3759 	}
3760 	local_irq_enable();
3761 
3762 	return work;
3763 }
3764 
3765 /**
3766  * __napi_schedule - schedule for receive
3767  * @n: entry to schedule
3768  *
3769  * The entry's receive function will be scheduled to run
3770  */
3771 void __napi_schedule(struct napi_struct *n)
3772 {
3773 	unsigned long flags;
3774 
3775 	local_irq_save(flags);
3776 	____napi_schedule(&__get_cpu_var(softnet_data), n);
3777 	local_irq_restore(flags);
3778 }
3779 EXPORT_SYMBOL(__napi_schedule);
3780 
3781 void __napi_complete(struct napi_struct *n)
3782 {
3783 	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
3784 	BUG_ON(n->gro_list);
3785 
3786 	list_del(&n->poll_list);
3787 	smp_mb__before_clear_bit();
3788 	clear_bit(NAPI_STATE_SCHED, &n->state);
3789 }
3790 EXPORT_SYMBOL(__napi_complete);
3791 
3792 void napi_complete(struct napi_struct *n)
3793 {
3794 	unsigned long flags;
3795 
3796 	/*
3797 	 * don't let napi dequeue from the cpu poll list
3798 	 * just in case its running on a different cpu
3799 	 */
3800 	if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
3801 		return;
3802 
3803 	napi_gro_flush(n);
3804 	local_irq_save(flags);
3805 	__napi_complete(n);
3806 	local_irq_restore(flags);
3807 }
3808 EXPORT_SYMBOL(napi_complete);
3809 
3810 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
3811 		    int (*poll)(struct napi_struct *, int), int weight)
3812 {
3813 	INIT_LIST_HEAD(&napi->poll_list);
3814 	napi->gro_count = 0;
3815 	napi->gro_list = NULL;
3816 	napi->skb = NULL;
3817 	napi->poll = poll;
3818 	napi->weight = weight;
3819 	list_add(&napi->dev_list, &dev->napi_list);
3820 	napi->dev = dev;
3821 #ifdef CONFIG_NETPOLL
3822 	spin_lock_init(&napi->poll_lock);
3823 	napi->poll_owner = -1;
3824 #endif
3825 	set_bit(NAPI_STATE_SCHED, &napi->state);
3826 }
3827 EXPORT_SYMBOL(netif_napi_add);
3828 
3829 void netif_napi_del(struct napi_struct *napi)
3830 {
3831 	struct sk_buff *skb, *next;
3832 
3833 	list_del_init(&napi->dev_list);
3834 	napi_free_frags(napi);
3835 
3836 	for (skb = napi->gro_list; skb; skb = next) {
3837 		next = skb->next;
3838 		skb->next = NULL;
3839 		kfree_skb(skb);
3840 	}
3841 
3842 	napi->gro_list = NULL;
3843 	napi->gro_count = 0;
3844 }
3845 EXPORT_SYMBOL(netif_napi_del);
3846 
3847 static void net_rx_action(struct softirq_action *h)
3848 {
3849 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
3850 	unsigned long time_limit = jiffies + 2;
3851 	int budget = netdev_budget;
3852 	void *have;
3853 
3854 	local_irq_disable();
3855 
3856 	while (!list_empty(&sd->poll_list)) {
3857 		struct napi_struct *n;
3858 		int work, weight;
3859 
3860 		/* If softirq window is exhuasted then punt.
3861 		 * Allow this to run for 2 jiffies since which will allow
3862 		 * an average latency of 1.5/HZ.
3863 		 */
3864 		if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
3865 			goto softnet_break;
3866 
3867 		local_irq_enable();
3868 
3869 		/* Even though interrupts have been re-enabled, this
3870 		 * access is safe because interrupts can only add new
3871 		 * entries to the tail of this list, and only ->poll()
3872 		 * calls can remove this head entry from the list.
3873 		 */
3874 		n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
3875 
3876 		have = netpoll_poll_lock(n);
3877 
3878 		weight = n->weight;
3879 
3880 		/* This NAPI_STATE_SCHED test is for avoiding a race
3881 		 * with netpoll's poll_napi().  Only the entity which
3882 		 * obtains the lock and sees NAPI_STATE_SCHED set will
3883 		 * actually make the ->poll() call.  Therefore we avoid
3884 		 * accidentally calling ->poll() when NAPI is not scheduled.
3885 		 */
3886 		work = 0;
3887 		if (test_bit(NAPI_STATE_SCHED, &n->state)) {
3888 			work = n->poll(n, weight);
3889 			trace_napi_poll(n);
3890 		}
3891 
3892 		WARN_ON_ONCE(work > weight);
3893 
3894 		budget -= work;
3895 
3896 		local_irq_disable();
3897 
3898 		/* Drivers must not modify the NAPI state if they
3899 		 * consume the entire weight.  In such cases this code
3900 		 * still "owns" the NAPI instance and therefore can
3901 		 * move the instance around on the list at-will.
3902 		 */
3903 		if (unlikely(work == weight)) {
3904 			if (unlikely(napi_disable_pending(n))) {
3905 				local_irq_enable();
3906 				napi_complete(n);
3907 				local_irq_disable();
3908 			} else
3909 				list_move_tail(&n->poll_list, &sd->poll_list);
3910 		}
3911 
3912 		netpoll_poll_unlock(have);
3913 	}
3914 out:
3915 	net_rps_action_and_irq_enable(sd);
3916 
3917 #ifdef CONFIG_NET_DMA
3918 	/*
3919 	 * There may not be any more sk_buffs coming right now, so push
3920 	 * any pending DMA copies to hardware
3921 	 */
3922 	dma_issue_pending_all();
3923 #endif
3924 
3925 	return;
3926 
3927 softnet_break:
3928 	sd->time_squeeze++;
3929 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
3930 	goto out;
3931 }
3932 
3933 static gifconf_func_t *gifconf_list[NPROTO];
3934 
3935 /**
3936  *	register_gifconf	-	register a SIOCGIF handler
3937  *	@family: Address family
3938  *	@gifconf: Function handler
3939  *
3940  *	Register protocol dependent address dumping routines. The handler
3941  *	that is passed must not be freed or reused until it has been replaced
3942  *	by another handler.
3943  */
3944 int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
3945 {
3946 	if (family >= NPROTO)
3947 		return -EINVAL;
3948 	gifconf_list[family] = gifconf;
3949 	return 0;
3950 }
3951 EXPORT_SYMBOL(register_gifconf);
3952 
3953 
3954 /*
3955  *	Map an interface index to its name (SIOCGIFNAME)
3956  */
3957 
3958 /*
3959  *	We need this ioctl for efficient implementation of the
3960  *	if_indextoname() function required by the IPv6 API.  Without
3961  *	it, we would have to search all the interfaces to find a
3962  *	match.  --pb
3963  */
3964 
3965 static int dev_ifname(struct net *net, struct ifreq __user *arg)
3966 {
3967 	struct net_device *dev;
3968 	struct ifreq ifr;
3969 
3970 	/*
3971 	 *	Fetch the caller's info block.
3972 	 */
3973 
3974 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3975 		return -EFAULT;
3976 
3977 	rcu_read_lock();
3978 	dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
3979 	if (!dev) {
3980 		rcu_read_unlock();
3981 		return -ENODEV;
3982 	}
3983 
3984 	strcpy(ifr.ifr_name, dev->name);
3985 	rcu_read_unlock();
3986 
3987 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
3988 		return -EFAULT;
3989 	return 0;
3990 }
3991 
3992 /*
3993  *	Perform a SIOCGIFCONF call. This structure will change
3994  *	size eventually, and there is nothing I can do about it.
3995  *	Thus we will need a 'compatibility mode'.
3996  */
3997 
3998 static int dev_ifconf(struct net *net, char __user *arg)
3999 {
4000 	struct ifconf ifc;
4001 	struct net_device *dev;
4002 	char __user *pos;
4003 	int len;
4004 	int total;
4005 	int i;
4006 
4007 	/*
4008 	 *	Fetch the caller's info block.
4009 	 */
4010 
4011 	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
4012 		return -EFAULT;
4013 
4014 	pos = ifc.ifc_buf;
4015 	len = ifc.ifc_len;
4016 
4017 	/*
4018 	 *	Loop over the interfaces, and write an info block for each.
4019 	 */
4020 
4021 	total = 0;
4022 	for_each_netdev(net, dev) {
4023 		for (i = 0; i < NPROTO; i++) {
4024 			if (gifconf_list[i]) {
4025 				int done;
4026 				if (!pos)
4027 					done = gifconf_list[i](dev, NULL, 0);
4028 				else
4029 					done = gifconf_list[i](dev, pos + total,
4030 							       len - total);
4031 				if (done < 0)
4032 					return -EFAULT;
4033 				total += done;
4034 			}
4035 		}
4036 	}
4037 
4038 	/*
4039 	 *	All done.  Write the updated control block back to the caller.
4040 	 */
4041 	ifc.ifc_len = total;
4042 
4043 	/*
4044 	 * 	Both BSD and Solaris return 0 here, so we do too.
4045 	 */
4046 	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
4047 }
4048 
4049 #ifdef CONFIG_PROC_FS
4050 
4051 #define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
4052 
4053 #define get_bucket(x) ((x) >> BUCKET_SPACE)
4054 #define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
4055 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
4056 
4057 static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos)
4058 {
4059 	struct net *net = seq_file_net(seq);
4060 	struct net_device *dev;
4061 	struct hlist_node *p;
4062 	struct hlist_head *h;
4063 	unsigned int count = 0, offset = get_offset(*pos);
4064 
4065 	h = &net->dev_name_head[get_bucket(*pos)];
4066 	hlist_for_each_entry_rcu(dev, p, h, name_hlist) {
4067 		if (++count == offset)
4068 			return dev;
4069 	}
4070 
4071 	return NULL;
4072 }
4073 
4074 static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos)
4075 {
4076 	struct net_device *dev;
4077 	unsigned int bucket;
4078 
4079 	do {
4080 		dev = dev_from_same_bucket(seq, pos);
4081 		if (dev)
4082 			return dev;
4083 
4084 		bucket = get_bucket(*pos) + 1;
4085 		*pos = set_bucket_offset(bucket, 1);
4086 	} while (bucket < NETDEV_HASHENTRIES);
4087 
4088 	return NULL;
4089 }
4090 
4091 /*
4092  *	This is invoked by the /proc filesystem handler to display a device
4093  *	in detail.
4094  */
4095 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
4096 	__acquires(RCU)
4097 {
4098 	rcu_read_lock();
4099 	if (!*pos)
4100 		return SEQ_START_TOKEN;
4101 
4102 	if (get_bucket(*pos) >= NETDEV_HASHENTRIES)
4103 		return NULL;
4104 
4105 	return dev_from_bucket(seq, pos);
4106 }
4107 
4108 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4109 {
4110 	++*pos;
4111 	return dev_from_bucket(seq, pos);
4112 }
4113 
4114 void dev_seq_stop(struct seq_file *seq, void *v)
4115 	__releases(RCU)
4116 {
4117 	rcu_read_unlock();
4118 }
4119 
4120 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
4121 {
4122 	struct rtnl_link_stats64 temp;
4123 	const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
4124 
4125 	seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
4126 		   "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
4127 		   dev->name, stats->rx_bytes, stats->rx_packets,
4128 		   stats->rx_errors,
4129 		   stats->rx_dropped + stats->rx_missed_errors,
4130 		   stats->rx_fifo_errors,
4131 		   stats->rx_length_errors + stats->rx_over_errors +
4132 		    stats->rx_crc_errors + stats->rx_frame_errors,
4133 		   stats->rx_compressed, stats->multicast,
4134 		   stats->tx_bytes, stats->tx_packets,
4135 		   stats->tx_errors, stats->tx_dropped,
4136 		   stats->tx_fifo_errors, stats->collisions,
4137 		   stats->tx_carrier_errors +
4138 		    stats->tx_aborted_errors +
4139 		    stats->tx_window_errors +
4140 		    stats->tx_heartbeat_errors,
4141 		   stats->tx_compressed);
4142 }
4143 
4144 /*
4145  *	Called from the PROCfs module. This now uses the new arbitrary sized
4146  *	/proc/net interface to create /proc/net/dev
4147  */
4148 static int dev_seq_show(struct seq_file *seq, void *v)
4149 {
4150 	if (v == SEQ_START_TOKEN)
4151 		seq_puts(seq, "Inter-|   Receive                            "
4152 			      "                    |  Transmit\n"
4153 			      " face |bytes    packets errs drop fifo frame "
4154 			      "compressed multicast|bytes    packets errs "
4155 			      "drop fifo colls carrier compressed\n");
4156 	else
4157 		dev_seq_printf_stats(seq, v);
4158 	return 0;
4159 }
4160 
4161 static struct softnet_data *softnet_get_online(loff_t *pos)
4162 {
4163 	struct softnet_data *sd = NULL;
4164 
4165 	while (*pos < nr_cpu_ids)
4166 		if (cpu_online(*pos)) {
4167 			sd = &per_cpu(softnet_data, *pos);
4168 			break;
4169 		} else
4170 			++*pos;
4171 	return sd;
4172 }
4173 
4174 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
4175 {
4176 	return softnet_get_online(pos);
4177 }
4178 
4179 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4180 {
4181 	++*pos;
4182 	return softnet_get_online(pos);
4183 }
4184 
4185 static void softnet_seq_stop(struct seq_file *seq, void *v)
4186 {
4187 }
4188 
4189 static int softnet_seq_show(struct seq_file *seq, void *v)
4190 {
4191 	struct softnet_data *sd = v;
4192 
4193 	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
4194 		   sd->processed, sd->dropped, sd->time_squeeze, 0,
4195 		   0, 0, 0, 0, /* was fastroute */
4196 		   sd->cpu_collision, sd->received_rps);
4197 	return 0;
4198 }
4199 
4200 static const struct seq_operations dev_seq_ops = {
4201 	.start = dev_seq_start,
4202 	.next  = dev_seq_next,
4203 	.stop  = dev_seq_stop,
4204 	.show  = dev_seq_show,
4205 };
4206 
4207 static int dev_seq_open(struct inode *inode, struct file *file)
4208 {
4209 	return seq_open_net(inode, file, &dev_seq_ops,
4210 			    sizeof(struct seq_net_private));
4211 }
4212 
4213 static const struct file_operations dev_seq_fops = {
4214 	.owner	 = THIS_MODULE,
4215 	.open    = dev_seq_open,
4216 	.read    = seq_read,
4217 	.llseek  = seq_lseek,
4218 	.release = seq_release_net,
4219 };
4220 
4221 static const struct seq_operations softnet_seq_ops = {
4222 	.start = softnet_seq_start,
4223 	.next  = softnet_seq_next,
4224 	.stop  = softnet_seq_stop,
4225 	.show  = softnet_seq_show,
4226 };
4227 
4228 static int softnet_seq_open(struct inode *inode, struct file *file)
4229 {
4230 	return seq_open(file, &softnet_seq_ops);
4231 }
4232 
4233 static const struct file_operations softnet_seq_fops = {
4234 	.owner	 = THIS_MODULE,
4235 	.open    = softnet_seq_open,
4236 	.read    = seq_read,
4237 	.llseek  = seq_lseek,
4238 	.release = seq_release,
4239 };
4240 
4241 static void *ptype_get_idx(loff_t pos)
4242 {
4243 	struct packet_type *pt = NULL;
4244 	loff_t i = 0;
4245 	int t;
4246 
4247 	list_for_each_entry_rcu(pt, &ptype_all, list) {
4248 		if (i == pos)
4249 			return pt;
4250 		++i;
4251 	}
4252 
4253 	for (t = 0; t < PTYPE_HASH_SIZE; t++) {
4254 		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
4255 			if (i == pos)
4256 				return pt;
4257 			++i;
4258 		}
4259 	}
4260 	return NULL;
4261 }
4262 
4263 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
4264 	__acquires(RCU)
4265 {
4266 	rcu_read_lock();
4267 	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
4268 }
4269 
4270 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4271 {
4272 	struct packet_type *pt;
4273 	struct list_head *nxt;
4274 	int hash;
4275 
4276 	++*pos;
4277 	if (v == SEQ_START_TOKEN)
4278 		return ptype_get_idx(0);
4279 
4280 	pt = v;
4281 	nxt = pt->list.next;
4282 	if (pt->type == htons(ETH_P_ALL)) {
4283 		if (nxt != &ptype_all)
4284 			goto found;
4285 		hash = 0;
4286 		nxt = ptype_base[0].next;
4287 	} else
4288 		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
4289 
4290 	while (nxt == &ptype_base[hash]) {
4291 		if (++hash >= PTYPE_HASH_SIZE)
4292 			return NULL;
4293 		nxt = ptype_base[hash].next;
4294 	}
4295 found:
4296 	return list_entry(nxt, struct packet_type, list);
4297 }
4298 
4299 static void ptype_seq_stop(struct seq_file *seq, void *v)
4300 	__releases(RCU)
4301 {
4302 	rcu_read_unlock();
4303 }
4304 
4305 static int ptype_seq_show(struct seq_file *seq, void *v)
4306 {
4307 	struct packet_type *pt = v;
4308 
4309 	if (v == SEQ_START_TOKEN)
4310 		seq_puts(seq, "Type Device      Function\n");
4311 	else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
4312 		if (pt->type == htons(ETH_P_ALL))
4313 			seq_puts(seq, "ALL ");
4314 		else
4315 			seq_printf(seq, "%04x", ntohs(pt->type));
4316 
4317 		seq_printf(seq, " %-8s %pF\n",
4318 			   pt->dev ? pt->dev->name : "", pt->func);
4319 	}
4320 
4321 	return 0;
4322 }
4323 
4324 static const struct seq_operations ptype_seq_ops = {
4325 	.start = ptype_seq_start,
4326 	.next  = ptype_seq_next,
4327 	.stop  = ptype_seq_stop,
4328 	.show  = ptype_seq_show,
4329 };
4330 
4331 static int ptype_seq_open(struct inode *inode, struct file *file)
4332 {
4333 	return seq_open_net(inode, file, &ptype_seq_ops,
4334 			sizeof(struct seq_net_private));
4335 }
4336 
4337 static const struct file_operations ptype_seq_fops = {
4338 	.owner	 = THIS_MODULE,
4339 	.open    = ptype_seq_open,
4340 	.read    = seq_read,
4341 	.llseek  = seq_lseek,
4342 	.release = seq_release_net,
4343 };
4344 
4345 
4346 static int __net_init dev_proc_net_init(struct net *net)
4347 {
4348 	int rc = -ENOMEM;
4349 
4350 	if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
4351 		goto out;
4352 	if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
4353 		goto out_dev;
4354 	if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
4355 		goto out_softnet;
4356 
4357 	if (wext_proc_init(net))
4358 		goto out_ptype;
4359 	rc = 0;
4360 out:
4361 	return rc;
4362 out_ptype:
4363 	proc_net_remove(net, "ptype");
4364 out_softnet:
4365 	proc_net_remove(net, "softnet_stat");
4366 out_dev:
4367 	proc_net_remove(net, "dev");
4368 	goto out;
4369 }
4370 
4371 static void __net_exit dev_proc_net_exit(struct net *net)
4372 {
4373 	wext_proc_exit(net);
4374 
4375 	proc_net_remove(net, "ptype");
4376 	proc_net_remove(net, "softnet_stat");
4377 	proc_net_remove(net, "dev");
4378 }
4379 
4380 static struct pernet_operations __net_initdata dev_proc_ops = {
4381 	.init = dev_proc_net_init,
4382 	.exit = dev_proc_net_exit,
4383 };
4384 
4385 static int __init dev_proc_init(void)
4386 {
4387 	return register_pernet_subsys(&dev_proc_ops);
4388 }
4389 #else
4390 #define dev_proc_init() 0
4391 #endif	/* CONFIG_PROC_FS */
4392 
4393 
4394 /**
4395  *	netdev_set_master	-	set up master pointer
4396  *	@slave: slave device
4397  *	@master: new master device
4398  *
4399  *	Changes the master device of the slave. Pass %NULL to break the
4400  *	bonding. The caller must hold the RTNL semaphore. On a failure
4401  *	a negative errno code is returned. On success the reference counts
4402  *	are adjusted and the function returns zero.
4403  */
4404 int netdev_set_master(struct net_device *slave, struct net_device *master)
4405 {
4406 	struct net_device *old = slave->master;
4407 
4408 	ASSERT_RTNL();
4409 
4410 	if (master) {
4411 		if (old)
4412 			return -EBUSY;
4413 		dev_hold(master);
4414 	}
4415 
4416 	slave->master = master;
4417 
4418 	if (old)
4419 		dev_put(old);
4420 	return 0;
4421 }
4422 EXPORT_SYMBOL(netdev_set_master);
4423 
4424 /**
4425  *	netdev_set_bond_master	-	set up bonding master/slave pair
4426  *	@slave: slave device
4427  *	@master: new master device
4428  *
4429  *	Changes the master device of the slave. Pass %NULL to break the
4430  *	bonding. The caller must hold the RTNL semaphore. On a failure
4431  *	a negative errno code is returned. On success %RTM_NEWLINK is sent
4432  *	to the routing socket and the function returns zero.
4433  */
4434 int netdev_set_bond_master(struct net_device *slave, struct net_device *master)
4435 {
4436 	int err;
4437 
4438 	ASSERT_RTNL();
4439 
4440 	err = netdev_set_master(slave, master);
4441 	if (err)
4442 		return err;
4443 	if (master)
4444 		slave->flags |= IFF_SLAVE;
4445 	else
4446 		slave->flags &= ~IFF_SLAVE;
4447 
4448 	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
4449 	return 0;
4450 }
4451 EXPORT_SYMBOL(netdev_set_bond_master);
4452 
4453 static void dev_change_rx_flags(struct net_device *dev, int flags)
4454 {
4455 	const struct net_device_ops *ops = dev->netdev_ops;
4456 
4457 	if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
4458 		ops->ndo_change_rx_flags(dev, flags);
4459 }
4460 
4461 static int __dev_set_promiscuity(struct net_device *dev, int inc)
4462 {
4463 	unsigned int old_flags = dev->flags;
4464 	uid_t uid;
4465 	gid_t gid;
4466 
4467 	ASSERT_RTNL();
4468 
4469 	dev->flags |= IFF_PROMISC;
4470 	dev->promiscuity += inc;
4471 	if (dev->promiscuity == 0) {
4472 		/*
4473 		 * Avoid overflow.
4474 		 * If inc causes overflow, untouch promisc and return error.
4475 		 */
4476 		if (inc < 0)
4477 			dev->flags &= ~IFF_PROMISC;
4478 		else {
4479 			dev->promiscuity -= inc;
4480 			pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n",
4481 				dev->name);
4482 			return -EOVERFLOW;
4483 		}
4484 	}
4485 	if (dev->flags != old_flags) {
4486 		pr_info("device %s %s promiscuous mode\n",
4487 			dev->name,
4488 			dev->flags & IFF_PROMISC ? "entered" : "left");
4489 		if (audit_enabled) {
4490 			current_uid_gid(&uid, &gid);
4491 			audit_log(current->audit_context, GFP_ATOMIC,
4492 				AUDIT_ANOM_PROMISCUOUS,
4493 				"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
4494 				dev->name, (dev->flags & IFF_PROMISC),
4495 				(old_flags & IFF_PROMISC),
4496 				audit_get_loginuid(current),
4497 				uid, gid,
4498 				audit_get_sessionid(current));
4499 		}
4500 
4501 		dev_change_rx_flags(dev, IFF_PROMISC);
4502 	}
4503 	return 0;
4504 }
4505 
4506 /**
4507  *	dev_set_promiscuity	- update promiscuity count on a device
4508  *	@dev: device
4509  *	@inc: modifier
4510  *
4511  *	Add or remove promiscuity from a device. While the count in the device
4512  *	remains above zero the interface remains promiscuous. Once it hits zero
4513  *	the device reverts back to normal filtering operation. A negative inc
4514  *	value is used to drop promiscuity on the device.
4515  *	Return 0 if successful or a negative errno code on error.
4516  */
4517 int dev_set_promiscuity(struct net_device *dev, int inc)
4518 {
4519 	unsigned int old_flags = dev->flags;
4520 	int err;
4521 
4522 	err = __dev_set_promiscuity(dev, inc);
4523 	if (err < 0)
4524 		return err;
4525 	if (dev->flags != old_flags)
4526 		dev_set_rx_mode(dev);
4527 	return err;
4528 }
4529 EXPORT_SYMBOL(dev_set_promiscuity);
4530 
4531 /**
4532  *	dev_set_allmulti	- update allmulti count on a device
4533  *	@dev: device
4534  *	@inc: modifier
4535  *
4536  *	Add or remove reception of all multicast frames to a device. While the
4537  *	count in the device remains above zero the interface remains listening
4538  *	to all interfaces. Once it hits zero the device reverts back to normal
4539  *	filtering operation. A negative @inc value is used to drop the counter
4540  *	when releasing a resource needing all multicasts.
4541  *	Return 0 if successful or a negative errno code on error.
4542  */
4543 
4544 int dev_set_allmulti(struct net_device *dev, int inc)
4545 {
4546 	unsigned int old_flags = dev->flags;
4547 
4548 	ASSERT_RTNL();
4549 
4550 	dev->flags |= IFF_ALLMULTI;
4551 	dev->allmulti += inc;
4552 	if (dev->allmulti == 0) {
4553 		/*
4554 		 * Avoid overflow.
4555 		 * If inc causes overflow, untouch allmulti and return error.
4556 		 */
4557 		if (inc < 0)
4558 			dev->flags &= ~IFF_ALLMULTI;
4559 		else {
4560 			dev->allmulti -= inc;
4561 			pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n",
4562 				dev->name);
4563 			return -EOVERFLOW;
4564 		}
4565 	}
4566 	if (dev->flags ^ old_flags) {
4567 		dev_change_rx_flags(dev, IFF_ALLMULTI);
4568 		dev_set_rx_mode(dev);
4569 	}
4570 	return 0;
4571 }
4572 EXPORT_SYMBOL(dev_set_allmulti);
4573 
4574 /*
4575  *	Upload unicast and multicast address lists to device and
4576  *	configure RX filtering. When the device doesn't support unicast
4577  *	filtering it is put in promiscuous mode while unicast addresses
4578  *	are present.
4579  */
4580 void __dev_set_rx_mode(struct net_device *dev)
4581 {
4582 	const struct net_device_ops *ops = dev->netdev_ops;
4583 
4584 	/* dev_open will call this function so the list will stay sane. */
4585 	if (!(dev->flags&IFF_UP))
4586 		return;
4587 
4588 	if (!netif_device_present(dev))
4589 		return;
4590 
4591 	if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
4592 		/* Unicast addresses changes may only happen under the rtnl,
4593 		 * therefore calling __dev_set_promiscuity here is safe.
4594 		 */
4595 		if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
4596 			__dev_set_promiscuity(dev, 1);
4597 			dev->uc_promisc = true;
4598 		} else if (netdev_uc_empty(dev) && dev->uc_promisc) {
4599 			__dev_set_promiscuity(dev, -1);
4600 			dev->uc_promisc = false;
4601 		}
4602 	}
4603 
4604 	if (ops->ndo_set_rx_mode)
4605 		ops->ndo_set_rx_mode(dev);
4606 }
4607 
4608 void dev_set_rx_mode(struct net_device *dev)
4609 {
4610 	netif_addr_lock_bh(dev);
4611 	__dev_set_rx_mode(dev);
4612 	netif_addr_unlock_bh(dev);
4613 }
4614 
4615 /**
4616  *	dev_get_flags - get flags reported to userspace
4617  *	@dev: device
4618  *
4619  *	Get the combination of flag bits exported through APIs to userspace.
4620  */
4621 unsigned dev_get_flags(const struct net_device *dev)
4622 {
4623 	unsigned flags;
4624 
4625 	flags = (dev->flags & ~(IFF_PROMISC |
4626 				IFF_ALLMULTI |
4627 				IFF_RUNNING |
4628 				IFF_LOWER_UP |
4629 				IFF_DORMANT)) |
4630 		(dev->gflags & (IFF_PROMISC |
4631 				IFF_ALLMULTI));
4632 
4633 	if (netif_running(dev)) {
4634 		if (netif_oper_up(dev))
4635 			flags |= IFF_RUNNING;
4636 		if (netif_carrier_ok(dev))
4637 			flags |= IFF_LOWER_UP;
4638 		if (netif_dormant(dev))
4639 			flags |= IFF_DORMANT;
4640 	}
4641 
4642 	return flags;
4643 }
4644 EXPORT_SYMBOL(dev_get_flags);
4645 
4646 int __dev_change_flags(struct net_device *dev, unsigned int flags)
4647 {
4648 	unsigned int old_flags = dev->flags;
4649 	int ret;
4650 
4651 	ASSERT_RTNL();
4652 
4653 	/*
4654 	 *	Set the flags on our device.
4655 	 */
4656 
4657 	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
4658 			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
4659 			       IFF_AUTOMEDIA)) |
4660 		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
4661 				    IFF_ALLMULTI));
4662 
4663 	/*
4664 	 *	Load in the correct multicast list now the flags have changed.
4665 	 */
4666 
4667 	if ((old_flags ^ flags) & IFF_MULTICAST)
4668 		dev_change_rx_flags(dev, IFF_MULTICAST);
4669 
4670 	dev_set_rx_mode(dev);
4671 
4672 	/*
4673 	 *	Have we downed the interface. We handle IFF_UP ourselves
4674 	 *	according to user attempts to set it, rather than blindly
4675 	 *	setting it.
4676 	 */
4677 
4678 	ret = 0;
4679 	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
4680 		ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
4681 
4682 		if (!ret)
4683 			dev_set_rx_mode(dev);
4684 	}
4685 
4686 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
4687 		int inc = (flags & IFF_PROMISC) ? 1 : -1;
4688 
4689 		dev->gflags ^= IFF_PROMISC;
4690 		dev_set_promiscuity(dev, inc);
4691 	}
4692 
4693 	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
4694 	   is important. Some (broken) drivers set IFF_PROMISC, when
4695 	   IFF_ALLMULTI is requested not asking us and not reporting.
4696 	 */
4697 	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
4698 		int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
4699 
4700 		dev->gflags ^= IFF_ALLMULTI;
4701 		dev_set_allmulti(dev, inc);
4702 	}
4703 
4704 	return ret;
4705 }
4706 
4707 void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
4708 {
4709 	unsigned int changes = dev->flags ^ old_flags;
4710 
4711 	if (changes & IFF_UP) {
4712 		if (dev->flags & IFF_UP)
4713 			call_netdevice_notifiers(NETDEV_UP, dev);
4714 		else
4715 			call_netdevice_notifiers(NETDEV_DOWN, dev);
4716 	}
4717 
4718 	if (dev->flags & IFF_UP &&
4719 	    (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE)))
4720 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
4721 }
4722 
4723 /**
4724  *	dev_change_flags - change device settings
4725  *	@dev: device
4726  *	@flags: device state flags
4727  *
4728  *	Change settings on device based state flags. The flags are
4729  *	in the userspace exported format.
4730  */
4731 int dev_change_flags(struct net_device *dev, unsigned int flags)
4732 {
4733 	int ret;
4734 	unsigned int changes, old_flags = dev->flags;
4735 
4736 	ret = __dev_change_flags(dev, flags);
4737 	if (ret < 0)
4738 		return ret;
4739 
4740 	changes = old_flags ^ dev->flags;
4741 	if (changes)
4742 		rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
4743 
4744 	__dev_notify_flags(dev, old_flags);
4745 	return ret;
4746 }
4747 EXPORT_SYMBOL(dev_change_flags);
4748 
4749 /**
4750  *	dev_set_mtu - Change maximum transfer unit
4751  *	@dev: device
4752  *	@new_mtu: new transfer unit
4753  *
4754  *	Change the maximum transfer size of the network device.
4755  */
4756 int dev_set_mtu(struct net_device *dev, int new_mtu)
4757 {
4758 	const struct net_device_ops *ops = dev->netdev_ops;
4759 	int err;
4760 
4761 	if (new_mtu == dev->mtu)
4762 		return 0;
4763 
4764 	/*	MTU must be positive.	 */
4765 	if (new_mtu < 0)
4766 		return -EINVAL;
4767 
4768 	if (!netif_device_present(dev))
4769 		return -ENODEV;
4770 
4771 	err = 0;
4772 	if (ops->ndo_change_mtu)
4773 		err = ops->ndo_change_mtu(dev, new_mtu);
4774 	else
4775 		dev->mtu = new_mtu;
4776 
4777 	if (!err && dev->flags & IFF_UP)
4778 		call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
4779 	return err;
4780 }
4781 EXPORT_SYMBOL(dev_set_mtu);
4782 
4783 /**
4784  *	dev_set_group - Change group this device belongs to
4785  *	@dev: device
4786  *	@new_group: group this device should belong to
4787  */
4788 void dev_set_group(struct net_device *dev, int new_group)
4789 {
4790 	dev->group = new_group;
4791 }
4792 EXPORT_SYMBOL(dev_set_group);
4793 
4794 /**
4795  *	dev_set_mac_address - Change Media Access Control Address
4796  *	@dev: device
4797  *	@sa: new address
4798  *
4799  *	Change the hardware (MAC) address of the device
4800  */
4801 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
4802 {
4803 	const struct net_device_ops *ops = dev->netdev_ops;
4804 	int err;
4805 
4806 	if (!ops->ndo_set_mac_address)
4807 		return -EOPNOTSUPP;
4808 	if (sa->sa_family != dev->type)
4809 		return -EINVAL;
4810 	if (!netif_device_present(dev))
4811 		return -ENODEV;
4812 	err = ops->ndo_set_mac_address(dev, sa);
4813 	if (!err)
4814 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4815 	return err;
4816 }
4817 EXPORT_SYMBOL(dev_set_mac_address);
4818 
4819 /*
4820  *	Perform the SIOCxIFxxx calls, inside rcu_read_lock()
4821  */
4822 static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
4823 {
4824 	int err;
4825 	struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
4826 
4827 	if (!dev)
4828 		return -ENODEV;
4829 
4830 	switch (cmd) {
4831 	case SIOCGIFFLAGS:	/* Get interface flags */
4832 		ifr->ifr_flags = (short) dev_get_flags(dev);
4833 		return 0;
4834 
4835 	case SIOCGIFMETRIC:	/* Get the metric on the interface
4836 				   (currently unused) */
4837 		ifr->ifr_metric = 0;
4838 		return 0;
4839 
4840 	case SIOCGIFMTU:	/* Get the MTU of a device */
4841 		ifr->ifr_mtu = dev->mtu;
4842 		return 0;
4843 
4844 	case SIOCGIFHWADDR:
4845 		if (!dev->addr_len)
4846 			memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
4847 		else
4848 			memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
4849 			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4850 		ifr->ifr_hwaddr.sa_family = dev->type;
4851 		return 0;
4852 
4853 	case SIOCGIFSLAVE:
4854 		err = -EINVAL;
4855 		break;
4856 
4857 	case SIOCGIFMAP:
4858 		ifr->ifr_map.mem_start = dev->mem_start;
4859 		ifr->ifr_map.mem_end   = dev->mem_end;
4860 		ifr->ifr_map.base_addr = dev->base_addr;
4861 		ifr->ifr_map.irq       = dev->irq;
4862 		ifr->ifr_map.dma       = dev->dma;
4863 		ifr->ifr_map.port      = dev->if_port;
4864 		return 0;
4865 
4866 	case SIOCGIFINDEX:
4867 		ifr->ifr_ifindex = dev->ifindex;
4868 		return 0;
4869 
4870 	case SIOCGIFTXQLEN:
4871 		ifr->ifr_qlen = dev->tx_queue_len;
4872 		return 0;
4873 
4874 	default:
4875 		/* dev_ioctl() should ensure this case
4876 		 * is never reached
4877 		 */
4878 		WARN_ON(1);
4879 		err = -ENOTTY;
4880 		break;
4881 
4882 	}
4883 	return err;
4884 }
4885 
4886 /*
4887  *	Perform the SIOCxIFxxx calls, inside rtnl_lock()
4888  */
4889 static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4890 {
4891 	int err;
4892 	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
4893 	const struct net_device_ops *ops;
4894 
4895 	if (!dev)
4896 		return -ENODEV;
4897 
4898 	ops = dev->netdev_ops;
4899 
4900 	switch (cmd) {
4901 	case SIOCSIFFLAGS:	/* Set interface flags */
4902 		return dev_change_flags(dev, ifr->ifr_flags);
4903 
4904 	case SIOCSIFMETRIC:	/* Set the metric on the interface
4905 				   (currently unused) */
4906 		return -EOPNOTSUPP;
4907 
4908 	case SIOCSIFMTU:	/* Set the MTU of a device */
4909 		return dev_set_mtu(dev, ifr->ifr_mtu);
4910 
4911 	case SIOCSIFHWADDR:
4912 		return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
4913 
4914 	case SIOCSIFHWBROADCAST:
4915 		if (ifr->ifr_hwaddr.sa_family != dev->type)
4916 			return -EINVAL;
4917 		memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
4918 		       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4919 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4920 		return 0;
4921 
4922 	case SIOCSIFMAP:
4923 		if (ops->ndo_set_config) {
4924 			if (!netif_device_present(dev))
4925 				return -ENODEV;
4926 			return ops->ndo_set_config(dev, &ifr->ifr_map);
4927 		}
4928 		return -EOPNOTSUPP;
4929 
4930 	case SIOCADDMULTI:
4931 		if (!ops->ndo_set_rx_mode ||
4932 		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4933 			return -EINVAL;
4934 		if (!netif_device_present(dev))
4935 			return -ENODEV;
4936 		return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
4937 
4938 	case SIOCDELMULTI:
4939 		if (!ops->ndo_set_rx_mode ||
4940 		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4941 			return -EINVAL;
4942 		if (!netif_device_present(dev))
4943 			return -ENODEV;
4944 		return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
4945 
4946 	case SIOCSIFTXQLEN:
4947 		if (ifr->ifr_qlen < 0)
4948 			return -EINVAL;
4949 		dev->tx_queue_len = ifr->ifr_qlen;
4950 		return 0;
4951 
4952 	case SIOCSIFNAME:
4953 		ifr->ifr_newname[IFNAMSIZ-1] = '\0';
4954 		return dev_change_name(dev, ifr->ifr_newname);
4955 
4956 	case SIOCSHWTSTAMP:
4957 		err = net_hwtstamp_validate(ifr);
4958 		if (err)
4959 			return err;
4960 		/* fall through */
4961 
4962 	/*
4963 	 *	Unknown or private ioctl
4964 	 */
4965 	default:
4966 		if ((cmd >= SIOCDEVPRIVATE &&
4967 		    cmd <= SIOCDEVPRIVATE + 15) ||
4968 		    cmd == SIOCBONDENSLAVE ||
4969 		    cmd == SIOCBONDRELEASE ||
4970 		    cmd == SIOCBONDSETHWADDR ||
4971 		    cmd == SIOCBONDSLAVEINFOQUERY ||
4972 		    cmd == SIOCBONDINFOQUERY ||
4973 		    cmd == SIOCBONDCHANGEACTIVE ||
4974 		    cmd == SIOCGMIIPHY ||
4975 		    cmd == SIOCGMIIREG ||
4976 		    cmd == SIOCSMIIREG ||
4977 		    cmd == SIOCBRADDIF ||
4978 		    cmd == SIOCBRDELIF ||
4979 		    cmd == SIOCSHWTSTAMP ||
4980 		    cmd == SIOCWANDEV) {
4981 			err = -EOPNOTSUPP;
4982 			if (ops->ndo_do_ioctl) {
4983 				if (netif_device_present(dev))
4984 					err = ops->ndo_do_ioctl(dev, ifr, cmd);
4985 				else
4986 					err = -ENODEV;
4987 			}
4988 		} else
4989 			err = -EINVAL;
4990 
4991 	}
4992 	return err;
4993 }
4994 
4995 /*
4996  *	This function handles all "interface"-type I/O control requests. The actual
4997  *	'doing' part of this is dev_ifsioc above.
4998  */
4999 
5000 /**
5001  *	dev_ioctl	-	network device ioctl
5002  *	@net: the applicable net namespace
5003  *	@cmd: command to issue
5004  *	@arg: pointer to a struct ifreq in user space
5005  *
5006  *	Issue ioctl functions to devices. This is normally called by the
5007  *	user space syscall interfaces but can sometimes be useful for
5008  *	other purposes. The return value is the return from the syscall if
5009  *	positive or a negative errno code on error.
5010  */
5011 
5012 int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
5013 {
5014 	struct ifreq ifr;
5015 	int ret;
5016 	char *colon;
5017 
5018 	/* One special case: SIOCGIFCONF takes ifconf argument
5019 	   and requires shared lock, because it sleeps writing
5020 	   to user space.
5021 	 */
5022 
5023 	if (cmd == SIOCGIFCONF) {
5024 		rtnl_lock();
5025 		ret = dev_ifconf(net, (char __user *) arg);
5026 		rtnl_unlock();
5027 		return ret;
5028 	}
5029 	if (cmd == SIOCGIFNAME)
5030 		return dev_ifname(net, (struct ifreq __user *)arg);
5031 
5032 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
5033 		return -EFAULT;
5034 
5035 	ifr.ifr_name[IFNAMSIZ-1] = 0;
5036 
5037 	colon = strchr(ifr.ifr_name, ':');
5038 	if (colon)
5039 		*colon = 0;
5040 
5041 	/*
5042 	 *	See which interface the caller is talking about.
5043 	 */
5044 
5045 	switch (cmd) {
5046 	/*
5047 	 *	These ioctl calls:
5048 	 *	- can be done by all.
5049 	 *	- atomic and do not require locking.
5050 	 *	- return a value
5051 	 */
5052 	case SIOCGIFFLAGS:
5053 	case SIOCGIFMETRIC:
5054 	case SIOCGIFMTU:
5055 	case SIOCGIFHWADDR:
5056 	case SIOCGIFSLAVE:
5057 	case SIOCGIFMAP:
5058 	case SIOCGIFINDEX:
5059 	case SIOCGIFTXQLEN:
5060 		dev_load(net, ifr.ifr_name);
5061 		rcu_read_lock();
5062 		ret = dev_ifsioc_locked(net, &ifr, cmd);
5063 		rcu_read_unlock();
5064 		if (!ret) {
5065 			if (colon)
5066 				*colon = ':';
5067 			if (copy_to_user(arg, &ifr,
5068 					 sizeof(struct ifreq)))
5069 				ret = -EFAULT;
5070 		}
5071 		return ret;
5072 
5073 	case SIOCETHTOOL:
5074 		dev_load(net, ifr.ifr_name);
5075 		rtnl_lock();
5076 		ret = dev_ethtool(net, &ifr);
5077 		rtnl_unlock();
5078 		if (!ret) {
5079 			if (colon)
5080 				*colon = ':';
5081 			if (copy_to_user(arg, &ifr,
5082 					 sizeof(struct ifreq)))
5083 				ret = -EFAULT;
5084 		}
5085 		return ret;
5086 
5087 	/*
5088 	 *	These ioctl calls:
5089 	 *	- require superuser power.
5090 	 *	- require strict serialization.
5091 	 *	- return a value
5092 	 */
5093 	case SIOCGMIIPHY:
5094 	case SIOCGMIIREG:
5095 	case SIOCSIFNAME:
5096 		if (!capable(CAP_NET_ADMIN))
5097 			return -EPERM;
5098 		dev_load(net, ifr.ifr_name);
5099 		rtnl_lock();
5100 		ret = dev_ifsioc(net, &ifr, cmd);
5101 		rtnl_unlock();
5102 		if (!ret) {
5103 			if (colon)
5104 				*colon = ':';
5105 			if (copy_to_user(arg, &ifr,
5106 					 sizeof(struct ifreq)))
5107 				ret = -EFAULT;
5108 		}
5109 		return ret;
5110 
5111 	/*
5112 	 *	These ioctl calls:
5113 	 *	- require superuser power.
5114 	 *	- require strict serialization.
5115 	 *	- do not return a value
5116 	 */
5117 	case SIOCSIFFLAGS:
5118 	case SIOCSIFMETRIC:
5119 	case SIOCSIFMTU:
5120 	case SIOCSIFMAP:
5121 	case SIOCSIFHWADDR:
5122 	case SIOCSIFSLAVE:
5123 	case SIOCADDMULTI:
5124 	case SIOCDELMULTI:
5125 	case SIOCSIFHWBROADCAST:
5126 	case SIOCSIFTXQLEN:
5127 	case SIOCSMIIREG:
5128 	case SIOCBONDENSLAVE:
5129 	case SIOCBONDRELEASE:
5130 	case SIOCBONDSETHWADDR:
5131 	case SIOCBONDCHANGEACTIVE:
5132 	case SIOCBRADDIF:
5133 	case SIOCBRDELIF:
5134 	case SIOCSHWTSTAMP:
5135 		if (!capable(CAP_NET_ADMIN))
5136 			return -EPERM;
5137 		/* fall through */
5138 	case SIOCBONDSLAVEINFOQUERY:
5139 	case SIOCBONDINFOQUERY:
5140 		dev_load(net, ifr.ifr_name);
5141 		rtnl_lock();
5142 		ret = dev_ifsioc(net, &ifr, cmd);
5143 		rtnl_unlock();
5144 		return ret;
5145 
5146 	case SIOCGIFMEM:
5147 		/* Get the per device memory space. We can add this but
5148 		 * currently do not support it */
5149 	case SIOCSIFMEM:
5150 		/* Set the per device memory buffer space.
5151 		 * Not applicable in our case */
5152 	case SIOCSIFLINK:
5153 		return -ENOTTY;
5154 
5155 	/*
5156 	 *	Unknown or private ioctl.
5157 	 */
5158 	default:
5159 		if (cmd == SIOCWANDEV ||
5160 		    (cmd >= SIOCDEVPRIVATE &&
5161 		     cmd <= SIOCDEVPRIVATE + 15)) {
5162 			dev_load(net, ifr.ifr_name);
5163 			rtnl_lock();
5164 			ret = dev_ifsioc(net, &ifr, cmd);
5165 			rtnl_unlock();
5166 			if (!ret && copy_to_user(arg, &ifr,
5167 						 sizeof(struct ifreq)))
5168 				ret = -EFAULT;
5169 			return ret;
5170 		}
5171 		/* Take care of Wireless Extensions */
5172 		if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
5173 			return wext_handle_ioctl(net, &ifr, cmd, arg);
5174 		return -ENOTTY;
5175 	}
5176 }
5177 
5178 
5179 /**
5180  *	dev_new_index	-	allocate an ifindex
5181  *	@net: the applicable net namespace
5182  *
5183  *	Returns a suitable unique value for a new device interface
5184  *	number.  The caller must hold the rtnl semaphore or the
5185  *	dev_base_lock to be sure it remains unique.
5186  */
5187 static int dev_new_index(struct net *net)
5188 {
5189 	static int ifindex;
5190 	for (;;) {
5191 		if (++ifindex <= 0)
5192 			ifindex = 1;
5193 		if (!__dev_get_by_index(net, ifindex))
5194 			return ifindex;
5195 	}
5196 }
5197 
5198 /* Delayed registration/unregisteration */
5199 static LIST_HEAD(net_todo_list);
5200 
5201 static void net_set_todo(struct net_device *dev)
5202 {
5203 	list_add_tail(&dev->todo_list, &net_todo_list);
5204 }
5205 
5206 static void rollback_registered_many(struct list_head *head)
5207 {
5208 	struct net_device *dev, *tmp;
5209 
5210 	BUG_ON(dev_boot_phase);
5211 	ASSERT_RTNL();
5212 
5213 	list_for_each_entry_safe(dev, tmp, head, unreg_list) {
5214 		/* Some devices call without registering
5215 		 * for initialization unwind. Remove those
5216 		 * devices and proceed with the remaining.
5217 		 */
5218 		if (dev->reg_state == NETREG_UNINITIALIZED) {
5219 			pr_debug("unregister_netdevice: device %s/%p never was registered\n",
5220 				 dev->name, dev);
5221 
5222 			WARN_ON(1);
5223 			list_del(&dev->unreg_list);
5224 			continue;
5225 		}
5226 		dev->dismantle = true;
5227 		BUG_ON(dev->reg_state != NETREG_REGISTERED);
5228 	}
5229 
5230 	/* If device is running, close it first. */
5231 	dev_close_many(head);
5232 
5233 	list_for_each_entry(dev, head, unreg_list) {
5234 		/* And unlink it from device chain. */
5235 		unlist_netdevice(dev);
5236 
5237 		dev->reg_state = NETREG_UNREGISTERING;
5238 	}
5239 
5240 	synchronize_net();
5241 
5242 	list_for_each_entry(dev, head, unreg_list) {
5243 		/* Shutdown queueing discipline. */
5244 		dev_shutdown(dev);
5245 
5246 
5247 		/* Notify protocols, that we are about to destroy
5248 		   this device. They should clean all the things.
5249 		*/
5250 		call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5251 
5252 		if (!dev->rtnl_link_ops ||
5253 		    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5254 			rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
5255 
5256 		/*
5257 		 *	Flush the unicast and multicast chains
5258 		 */
5259 		dev_uc_flush(dev);
5260 		dev_mc_flush(dev);
5261 
5262 		if (dev->netdev_ops->ndo_uninit)
5263 			dev->netdev_ops->ndo_uninit(dev);
5264 
5265 		/* Notifier chain MUST detach us from master device. */
5266 		WARN_ON(dev->master);
5267 
5268 		/* Remove entries from kobject tree */
5269 		netdev_unregister_kobject(dev);
5270 	}
5271 
5272 	/* Process any work delayed until the end of the batch */
5273 	dev = list_first_entry(head, struct net_device, unreg_list);
5274 	call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
5275 
5276 	synchronize_net();
5277 
5278 	list_for_each_entry(dev, head, unreg_list)
5279 		dev_put(dev);
5280 }
5281 
5282 static void rollback_registered(struct net_device *dev)
5283 {
5284 	LIST_HEAD(single);
5285 
5286 	list_add(&dev->unreg_list, &single);
5287 	rollback_registered_many(&single);
5288 	list_del(&single);
5289 }
5290 
5291 static netdev_features_t netdev_fix_features(struct net_device *dev,
5292 	netdev_features_t features)
5293 {
5294 	/* Fix illegal checksum combinations */
5295 	if ((features & NETIF_F_HW_CSUM) &&
5296 	    (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5297 		netdev_warn(dev, "mixed HW and IP checksum settings.\n");
5298 		features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
5299 	}
5300 
5301 	/* Fix illegal SG+CSUM combinations. */
5302 	if ((features & NETIF_F_SG) &&
5303 	    !(features & NETIF_F_ALL_CSUM)) {
5304 		netdev_dbg(dev,
5305 			"Dropping NETIF_F_SG since no checksum feature.\n");
5306 		features &= ~NETIF_F_SG;
5307 	}
5308 
5309 	/* TSO requires that SG is present as well. */
5310 	if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
5311 		netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
5312 		features &= ~NETIF_F_ALL_TSO;
5313 	}
5314 
5315 	/* TSO ECN requires that TSO is present as well. */
5316 	if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
5317 		features &= ~NETIF_F_TSO_ECN;
5318 
5319 	/* Software GSO depends on SG. */
5320 	if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
5321 		netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
5322 		features &= ~NETIF_F_GSO;
5323 	}
5324 
5325 	/* UFO needs SG and checksumming */
5326 	if (features & NETIF_F_UFO) {
5327 		/* maybe split UFO into V4 and V6? */
5328 		if (!((features & NETIF_F_GEN_CSUM) ||
5329 		    (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
5330 			    == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5331 			netdev_dbg(dev,
5332 				"Dropping NETIF_F_UFO since no checksum offload features.\n");
5333 			features &= ~NETIF_F_UFO;
5334 		}
5335 
5336 		if (!(features & NETIF_F_SG)) {
5337 			netdev_dbg(dev,
5338 				"Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
5339 			features &= ~NETIF_F_UFO;
5340 		}
5341 	}
5342 
5343 	return features;
5344 }
5345 
5346 int __netdev_update_features(struct net_device *dev)
5347 {
5348 	netdev_features_t features;
5349 	int err = 0;
5350 
5351 	ASSERT_RTNL();
5352 
5353 	features = netdev_get_wanted_features(dev);
5354 
5355 	if (dev->netdev_ops->ndo_fix_features)
5356 		features = dev->netdev_ops->ndo_fix_features(dev, features);
5357 
5358 	/* driver might be less strict about feature dependencies */
5359 	features = netdev_fix_features(dev, features);
5360 
5361 	if (dev->features == features)
5362 		return 0;
5363 
5364 	netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
5365 		&dev->features, &features);
5366 
5367 	if (dev->netdev_ops->ndo_set_features)
5368 		err = dev->netdev_ops->ndo_set_features(dev, features);
5369 
5370 	if (unlikely(err < 0)) {
5371 		netdev_err(dev,
5372 			"set_features() failed (%d); wanted %pNF, left %pNF\n",
5373 			err, &features, &dev->features);
5374 		return -1;
5375 	}
5376 
5377 	if (!err)
5378 		dev->features = features;
5379 
5380 	return 1;
5381 }
5382 
5383 /**
5384  *	netdev_update_features - recalculate device features
5385  *	@dev: the device to check
5386  *
5387  *	Recalculate dev->features set and send notifications if it
5388  *	has changed. Should be called after driver or hardware dependent
5389  *	conditions might have changed that influence the features.
5390  */
5391 void netdev_update_features(struct net_device *dev)
5392 {
5393 	if (__netdev_update_features(dev))
5394 		netdev_features_change(dev);
5395 }
5396 EXPORT_SYMBOL(netdev_update_features);
5397 
5398 /**
5399  *	netdev_change_features - recalculate device features
5400  *	@dev: the device to check
5401  *
5402  *	Recalculate dev->features set and send notifications even
5403  *	if they have not changed. Should be called instead of
5404  *	netdev_update_features() if also dev->vlan_features might
5405  *	have changed to allow the changes to be propagated to stacked
5406  *	VLAN devices.
5407  */
5408 void netdev_change_features(struct net_device *dev)
5409 {
5410 	__netdev_update_features(dev);
5411 	netdev_features_change(dev);
5412 }
5413 EXPORT_SYMBOL(netdev_change_features);
5414 
5415 /**
5416  *	netif_stacked_transfer_operstate -	transfer operstate
5417  *	@rootdev: the root or lower level device to transfer state from
5418  *	@dev: the device to transfer operstate to
5419  *
5420  *	Transfer operational state from root to device. This is normally
5421  *	called when a stacking relationship exists between the root
5422  *	device and the device(a leaf device).
5423  */
5424 void netif_stacked_transfer_operstate(const struct net_device *rootdev,
5425 					struct net_device *dev)
5426 {
5427 	if (rootdev->operstate == IF_OPER_DORMANT)
5428 		netif_dormant_on(dev);
5429 	else
5430 		netif_dormant_off(dev);
5431 
5432 	if (netif_carrier_ok(rootdev)) {
5433 		if (!netif_carrier_ok(dev))
5434 			netif_carrier_on(dev);
5435 	} else {
5436 		if (netif_carrier_ok(dev))
5437 			netif_carrier_off(dev);
5438 	}
5439 }
5440 EXPORT_SYMBOL(netif_stacked_transfer_operstate);
5441 
5442 #ifdef CONFIG_RPS
5443 static int netif_alloc_rx_queues(struct net_device *dev)
5444 {
5445 	unsigned int i, count = dev->num_rx_queues;
5446 	struct netdev_rx_queue *rx;
5447 
5448 	BUG_ON(count < 1);
5449 
5450 	rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
5451 	if (!rx) {
5452 		pr_err("netdev: Unable to allocate %u rx queues\n", count);
5453 		return -ENOMEM;
5454 	}
5455 	dev->_rx = rx;
5456 
5457 	for (i = 0; i < count; i++)
5458 		rx[i].dev = dev;
5459 	return 0;
5460 }
5461 #endif
5462 
5463 static void netdev_init_one_queue(struct net_device *dev,
5464 				  struct netdev_queue *queue, void *_unused)
5465 {
5466 	/* Initialize queue lock */
5467 	spin_lock_init(&queue->_xmit_lock);
5468 	netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
5469 	queue->xmit_lock_owner = -1;
5470 	netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
5471 	queue->dev = dev;
5472 #ifdef CONFIG_BQL
5473 	dql_init(&queue->dql, HZ);
5474 #endif
5475 }
5476 
5477 static int netif_alloc_netdev_queues(struct net_device *dev)
5478 {
5479 	unsigned int count = dev->num_tx_queues;
5480 	struct netdev_queue *tx;
5481 
5482 	BUG_ON(count < 1);
5483 
5484 	tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
5485 	if (!tx) {
5486 		pr_err("netdev: Unable to allocate %u tx queues\n", count);
5487 		return -ENOMEM;
5488 	}
5489 	dev->_tx = tx;
5490 
5491 	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
5492 	spin_lock_init(&dev->tx_global_lock);
5493 
5494 	return 0;
5495 }
5496 
5497 /**
5498  *	register_netdevice	- register a network device
5499  *	@dev: device to register
5500  *
5501  *	Take a completed network device structure and add it to the kernel
5502  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
5503  *	chain. 0 is returned on success. A negative errno code is returned
5504  *	on a failure to set up the device, or if the name is a duplicate.
5505  *
5506  *	Callers must hold the rtnl semaphore. You may want
5507  *	register_netdev() instead of this.
5508  *
5509  *	BUGS:
5510  *	The locking appears insufficient to guarantee two parallel registers
5511  *	will not get the same name.
5512  */
5513 
5514 int register_netdevice(struct net_device *dev)
5515 {
5516 	int ret;
5517 	struct net *net = dev_net(dev);
5518 
5519 	BUG_ON(dev_boot_phase);
5520 	ASSERT_RTNL();
5521 
5522 	might_sleep();
5523 
5524 	/* When net_device's are persistent, this will be fatal. */
5525 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
5526 	BUG_ON(!net);
5527 
5528 	spin_lock_init(&dev->addr_list_lock);
5529 	netdev_set_addr_lockdep_class(dev);
5530 
5531 	dev->iflink = -1;
5532 
5533 	ret = dev_get_valid_name(dev, dev->name);
5534 	if (ret < 0)
5535 		goto out;
5536 
5537 	/* Init, if this function is available */
5538 	if (dev->netdev_ops->ndo_init) {
5539 		ret = dev->netdev_ops->ndo_init(dev);
5540 		if (ret) {
5541 			if (ret > 0)
5542 				ret = -EIO;
5543 			goto out;
5544 		}
5545 	}
5546 
5547 	dev->ifindex = dev_new_index(net);
5548 	if (dev->iflink == -1)
5549 		dev->iflink = dev->ifindex;
5550 
5551 	/* Transfer changeable features to wanted_features and enable
5552 	 * software offloads (GSO and GRO).
5553 	 */
5554 	dev->hw_features |= NETIF_F_SOFT_FEATURES;
5555 	dev->features |= NETIF_F_SOFT_FEATURES;
5556 	dev->wanted_features = dev->features & dev->hw_features;
5557 
5558 	/* Turn on no cache copy if HW is doing checksum */
5559 	if (!(dev->flags & IFF_LOOPBACK)) {
5560 		dev->hw_features |= NETIF_F_NOCACHE_COPY;
5561 		if (dev->features & NETIF_F_ALL_CSUM) {
5562 			dev->wanted_features |= NETIF_F_NOCACHE_COPY;
5563 			dev->features |= NETIF_F_NOCACHE_COPY;
5564 		}
5565 	}
5566 
5567 	/* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
5568 	 */
5569 	dev->vlan_features |= NETIF_F_HIGHDMA;
5570 
5571 	ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5572 	ret = notifier_to_errno(ret);
5573 	if (ret)
5574 		goto err_uninit;
5575 
5576 	ret = netdev_register_kobject(dev);
5577 	if (ret)
5578 		goto err_uninit;
5579 	dev->reg_state = NETREG_REGISTERED;
5580 
5581 	__netdev_update_features(dev);
5582 
5583 	/*
5584 	 *	Default initial state at registry is that the
5585 	 *	device is present.
5586 	 */
5587 
5588 	set_bit(__LINK_STATE_PRESENT, &dev->state);
5589 
5590 	dev_init_scheduler(dev);
5591 	dev_hold(dev);
5592 	list_netdevice(dev);
5593 
5594 	/* Notify protocols, that a new device appeared. */
5595 	ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
5596 	ret = notifier_to_errno(ret);
5597 	if (ret) {
5598 		rollback_registered(dev);
5599 		dev->reg_state = NETREG_UNREGISTERED;
5600 	}
5601 	/*
5602 	 *	Prevent userspace races by waiting until the network
5603 	 *	device is fully setup before sending notifications.
5604 	 */
5605 	if (!dev->rtnl_link_ops ||
5606 	    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5607 		rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
5608 
5609 out:
5610 	return ret;
5611 
5612 err_uninit:
5613 	if (dev->netdev_ops->ndo_uninit)
5614 		dev->netdev_ops->ndo_uninit(dev);
5615 	goto out;
5616 }
5617 EXPORT_SYMBOL(register_netdevice);
5618 
5619 /**
5620  *	init_dummy_netdev	- init a dummy network device for NAPI
5621  *	@dev: device to init
5622  *
5623  *	This takes a network device structure and initialize the minimum
5624  *	amount of fields so it can be used to schedule NAPI polls without
5625  *	registering a full blown interface. This is to be used by drivers
5626  *	that need to tie several hardware interfaces to a single NAPI
5627  *	poll scheduler due to HW limitations.
5628  */
5629 int init_dummy_netdev(struct net_device *dev)
5630 {
5631 	/* Clear everything. Note we don't initialize spinlocks
5632 	 * are they aren't supposed to be taken by any of the
5633 	 * NAPI code and this dummy netdev is supposed to be
5634 	 * only ever used for NAPI polls
5635 	 */
5636 	memset(dev, 0, sizeof(struct net_device));
5637 
5638 	/* make sure we BUG if trying to hit standard
5639 	 * register/unregister code path
5640 	 */
5641 	dev->reg_state = NETREG_DUMMY;
5642 
5643 	/* NAPI wants this */
5644 	INIT_LIST_HEAD(&dev->napi_list);
5645 
5646 	/* a dummy interface is started by default */
5647 	set_bit(__LINK_STATE_PRESENT, &dev->state);
5648 	set_bit(__LINK_STATE_START, &dev->state);
5649 
5650 	/* Note : We dont allocate pcpu_refcnt for dummy devices,
5651 	 * because users of this 'device' dont need to change
5652 	 * its refcount.
5653 	 */
5654 
5655 	return 0;
5656 }
5657 EXPORT_SYMBOL_GPL(init_dummy_netdev);
5658 
5659 
5660 /**
5661  *	register_netdev	- register a network device
5662  *	@dev: device to register
5663  *
5664  *	Take a completed network device structure and add it to the kernel
5665  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
5666  *	chain. 0 is returned on success. A negative errno code is returned
5667  *	on a failure to set up the device, or if the name is a duplicate.
5668  *
5669  *	This is a wrapper around register_netdevice that takes the rtnl semaphore
5670  *	and expands the device name if you passed a format string to
5671  *	alloc_netdev.
5672  */
5673 int register_netdev(struct net_device *dev)
5674 {
5675 	int err;
5676 
5677 	rtnl_lock();
5678 	err = register_netdevice(dev);
5679 	rtnl_unlock();
5680 	return err;
5681 }
5682 EXPORT_SYMBOL(register_netdev);
5683 
5684 int netdev_refcnt_read(const struct net_device *dev)
5685 {
5686 	int i, refcnt = 0;
5687 
5688 	for_each_possible_cpu(i)
5689 		refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
5690 	return refcnt;
5691 }
5692 EXPORT_SYMBOL(netdev_refcnt_read);
5693 
5694 /*
5695  * netdev_wait_allrefs - wait until all references are gone.
5696  *
5697  * This is called when unregistering network devices.
5698  *
5699  * Any protocol or device that holds a reference should register
5700  * for netdevice notification, and cleanup and put back the
5701  * reference if they receive an UNREGISTER event.
5702  * We can get stuck here if buggy protocols don't correctly
5703  * call dev_put.
5704  */
5705 static void netdev_wait_allrefs(struct net_device *dev)
5706 {
5707 	unsigned long rebroadcast_time, warning_time;
5708 	int refcnt;
5709 
5710 	linkwatch_forget_dev(dev);
5711 
5712 	rebroadcast_time = warning_time = jiffies;
5713 	refcnt = netdev_refcnt_read(dev);
5714 
5715 	while (refcnt != 0) {
5716 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
5717 			rtnl_lock();
5718 
5719 			/* Rebroadcast unregister notification */
5720 			call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5721 			/* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users
5722 			 * should have already handle it the first time */
5723 
5724 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
5725 				     &dev->state)) {
5726 				/* We must not have linkwatch events
5727 				 * pending on unregister. If this
5728 				 * happens, we simply run the queue
5729 				 * unscheduled, resulting in a noop
5730 				 * for this device.
5731 				 */
5732 				linkwatch_run_queue();
5733 			}
5734 
5735 			__rtnl_unlock();
5736 
5737 			rebroadcast_time = jiffies;
5738 		}
5739 
5740 		msleep(250);
5741 
5742 		refcnt = netdev_refcnt_read(dev);
5743 
5744 		if (time_after(jiffies, warning_time + 10 * HZ)) {
5745 			pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
5746 				 dev->name, refcnt);
5747 			warning_time = jiffies;
5748 		}
5749 	}
5750 }
5751 
5752 /* The sequence is:
5753  *
5754  *	rtnl_lock();
5755  *	...
5756  *	register_netdevice(x1);
5757  *	register_netdevice(x2);
5758  *	...
5759  *	unregister_netdevice(y1);
5760  *	unregister_netdevice(y2);
5761  *      ...
5762  *	rtnl_unlock();
5763  *	free_netdev(y1);
5764  *	free_netdev(y2);
5765  *
5766  * We are invoked by rtnl_unlock().
5767  * This allows us to deal with problems:
5768  * 1) We can delete sysfs objects which invoke hotplug
5769  *    without deadlocking with linkwatch via keventd.
5770  * 2) Since we run with the RTNL semaphore not held, we can sleep
5771  *    safely in order to wait for the netdev refcnt to drop to zero.
5772  *
5773  * We must not return until all unregister events added during
5774  * the interval the lock was held have been completed.
5775  */
5776 void netdev_run_todo(void)
5777 {
5778 	struct list_head list;
5779 
5780 	/* Snapshot list, allow later requests */
5781 	list_replace_init(&net_todo_list, &list);
5782 
5783 	__rtnl_unlock();
5784 
5785 	/* Wait for rcu callbacks to finish before attempting to drain
5786 	 * the device list.  This usually avoids a 250ms wait.
5787 	 */
5788 	if (!list_empty(&list))
5789 		rcu_barrier();
5790 
5791 	while (!list_empty(&list)) {
5792 		struct net_device *dev
5793 			= list_first_entry(&list, struct net_device, todo_list);
5794 		list_del(&dev->todo_list);
5795 
5796 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
5797 			pr_err("network todo '%s' but state %d\n",
5798 			       dev->name, dev->reg_state);
5799 			dump_stack();
5800 			continue;
5801 		}
5802 
5803 		dev->reg_state = NETREG_UNREGISTERED;
5804 
5805 		on_each_cpu(flush_backlog, dev, 1);
5806 
5807 		netdev_wait_allrefs(dev);
5808 
5809 		/* paranoia */
5810 		BUG_ON(netdev_refcnt_read(dev));
5811 		WARN_ON(rcu_access_pointer(dev->ip_ptr));
5812 		WARN_ON(rcu_access_pointer(dev->ip6_ptr));
5813 		WARN_ON(dev->dn_ptr);
5814 
5815 		if (dev->destructor)
5816 			dev->destructor(dev);
5817 
5818 		/* Free network device */
5819 		kobject_put(&dev->dev.kobj);
5820 	}
5821 }
5822 
5823 /* Convert net_device_stats to rtnl_link_stats64.  They have the same
5824  * fields in the same order, with only the type differing.
5825  */
5826 void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
5827 			     const struct net_device_stats *netdev_stats)
5828 {
5829 #if BITS_PER_LONG == 64
5830 	BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats));
5831 	memcpy(stats64, netdev_stats, sizeof(*stats64));
5832 #else
5833 	size_t i, n = sizeof(*stats64) / sizeof(u64);
5834 	const unsigned long *src = (const unsigned long *)netdev_stats;
5835 	u64 *dst = (u64 *)stats64;
5836 
5837 	BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) !=
5838 		     sizeof(*stats64) / sizeof(u64));
5839 	for (i = 0; i < n; i++)
5840 		dst[i] = src[i];
5841 #endif
5842 }
5843 EXPORT_SYMBOL(netdev_stats_to_stats64);
5844 
5845 /**
5846  *	dev_get_stats	- get network device statistics
5847  *	@dev: device to get statistics from
5848  *	@storage: place to store stats
5849  *
5850  *	Get network statistics from device. Return @storage.
5851  *	The device driver may provide its own method by setting
5852  *	dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats;
5853  *	otherwise the internal statistics structure is used.
5854  */
5855 struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
5856 					struct rtnl_link_stats64 *storage)
5857 {
5858 	const struct net_device_ops *ops = dev->netdev_ops;
5859 
5860 	if (ops->ndo_get_stats64) {
5861 		memset(storage, 0, sizeof(*storage));
5862 		ops->ndo_get_stats64(dev, storage);
5863 	} else if (ops->ndo_get_stats) {
5864 		netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
5865 	} else {
5866 		netdev_stats_to_stats64(storage, &dev->stats);
5867 	}
5868 	storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
5869 	return storage;
5870 }
5871 EXPORT_SYMBOL(dev_get_stats);
5872 
5873 struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
5874 {
5875 	struct netdev_queue *queue = dev_ingress_queue(dev);
5876 
5877 #ifdef CONFIG_NET_CLS_ACT
5878 	if (queue)
5879 		return queue;
5880 	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
5881 	if (!queue)
5882 		return NULL;
5883 	netdev_init_one_queue(dev, queue, NULL);
5884 	queue->qdisc = &noop_qdisc;
5885 	queue->qdisc_sleeping = &noop_qdisc;
5886 	rcu_assign_pointer(dev->ingress_queue, queue);
5887 #endif
5888 	return queue;
5889 }
5890 
5891 /**
5892  *	alloc_netdev_mqs - allocate network device
5893  *	@sizeof_priv:	size of private data to allocate space for
5894  *	@name:		device name format string
5895  *	@setup:		callback to initialize device
5896  *	@txqs:		the number of TX subqueues to allocate
5897  *	@rxqs:		the number of RX subqueues to allocate
5898  *
5899  *	Allocates a struct net_device with private data area for driver use
5900  *	and performs basic initialization.  Also allocates subquue structs
5901  *	for each queue on the device.
5902  */
5903 struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
5904 		void (*setup)(struct net_device *),
5905 		unsigned int txqs, unsigned int rxqs)
5906 {
5907 	struct net_device *dev;
5908 	size_t alloc_size;
5909 	struct net_device *p;
5910 
5911 	BUG_ON(strlen(name) >= sizeof(dev->name));
5912 
5913 	if (txqs < 1) {
5914 		pr_err("alloc_netdev: Unable to allocate device with zero queues\n");
5915 		return NULL;
5916 	}
5917 
5918 #ifdef CONFIG_RPS
5919 	if (rxqs < 1) {
5920 		pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
5921 		return NULL;
5922 	}
5923 #endif
5924 
5925 	alloc_size = sizeof(struct net_device);
5926 	if (sizeof_priv) {
5927 		/* ensure 32-byte alignment of private area */
5928 		alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
5929 		alloc_size += sizeof_priv;
5930 	}
5931 	/* ensure 32-byte alignment of whole construct */
5932 	alloc_size += NETDEV_ALIGN - 1;
5933 
5934 	p = kzalloc(alloc_size, GFP_KERNEL);
5935 	if (!p) {
5936 		pr_err("alloc_netdev: Unable to allocate device\n");
5937 		return NULL;
5938 	}
5939 
5940 	dev = PTR_ALIGN(p, NETDEV_ALIGN);
5941 	dev->padded = (char *)dev - (char *)p;
5942 
5943 	dev->pcpu_refcnt = alloc_percpu(int);
5944 	if (!dev->pcpu_refcnt)
5945 		goto free_p;
5946 
5947 	if (dev_addr_init(dev))
5948 		goto free_pcpu;
5949 
5950 	dev_mc_init(dev);
5951 	dev_uc_init(dev);
5952 
5953 	dev_net_set(dev, &init_net);
5954 
5955 	dev->gso_max_size = GSO_MAX_SIZE;
5956 
5957 	INIT_LIST_HEAD(&dev->napi_list);
5958 	INIT_LIST_HEAD(&dev->unreg_list);
5959 	INIT_LIST_HEAD(&dev->link_watch_list);
5960 	dev->priv_flags = IFF_XMIT_DST_RELEASE;
5961 	setup(dev);
5962 
5963 	dev->num_tx_queues = txqs;
5964 	dev->real_num_tx_queues = txqs;
5965 	if (netif_alloc_netdev_queues(dev))
5966 		goto free_all;
5967 
5968 #ifdef CONFIG_RPS
5969 	dev->num_rx_queues = rxqs;
5970 	dev->real_num_rx_queues = rxqs;
5971 	if (netif_alloc_rx_queues(dev))
5972 		goto free_all;
5973 #endif
5974 
5975 	strcpy(dev->name, name);
5976 	dev->group = INIT_NETDEV_GROUP;
5977 	return dev;
5978 
5979 free_all:
5980 	free_netdev(dev);
5981 	return NULL;
5982 
5983 free_pcpu:
5984 	free_percpu(dev->pcpu_refcnt);
5985 	kfree(dev->_tx);
5986 #ifdef CONFIG_RPS
5987 	kfree(dev->_rx);
5988 #endif
5989 
5990 free_p:
5991 	kfree(p);
5992 	return NULL;
5993 }
5994 EXPORT_SYMBOL(alloc_netdev_mqs);
5995 
5996 /**
5997  *	free_netdev - free network device
5998  *	@dev: device
5999  *
6000  *	This function does the last stage of destroying an allocated device
6001  * 	interface. The reference to the device object is released.
6002  *	If this is the last reference then it will be freed.
6003  */
6004 void free_netdev(struct net_device *dev)
6005 {
6006 	struct napi_struct *p, *n;
6007 
6008 	release_net(dev_net(dev));
6009 
6010 	kfree(dev->_tx);
6011 #ifdef CONFIG_RPS
6012 	kfree(dev->_rx);
6013 #endif
6014 
6015 	kfree(rcu_dereference_protected(dev->ingress_queue, 1));
6016 
6017 	/* Flush device addresses */
6018 	dev_addr_flush(dev);
6019 
6020 	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
6021 		netif_napi_del(p);
6022 
6023 	free_percpu(dev->pcpu_refcnt);
6024 	dev->pcpu_refcnt = NULL;
6025 
6026 	/*  Compatibility with error handling in drivers */
6027 	if (dev->reg_state == NETREG_UNINITIALIZED) {
6028 		kfree((char *)dev - dev->padded);
6029 		return;
6030 	}
6031 
6032 	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
6033 	dev->reg_state = NETREG_RELEASED;
6034 
6035 	/* will free via device release */
6036 	put_device(&dev->dev);
6037 }
6038 EXPORT_SYMBOL(free_netdev);
6039 
6040 /**
6041  *	synchronize_net -  Synchronize with packet receive processing
6042  *
6043  *	Wait for packets currently being received to be done.
6044  *	Does not block later packets from starting.
6045  */
6046 void synchronize_net(void)
6047 {
6048 	might_sleep();
6049 	if (rtnl_is_locked())
6050 		synchronize_rcu_expedited();
6051 	else
6052 		synchronize_rcu();
6053 }
6054 EXPORT_SYMBOL(synchronize_net);
6055 
6056 /**
6057  *	unregister_netdevice_queue - remove device from the kernel
6058  *	@dev: device
6059  *	@head: list
6060  *
6061  *	This function shuts down a device interface and removes it
6062  *	from the kernel tables.
6063  *	If head not NULL, device is queued to be unregistered later.
6064  *
6065  *	Callers must hold the rtnl semaphore.  You may want
6066  *	unregister_netdev() instead of this.
6067  */
6068 
6069 void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
6070 {
6071 	ASSERT_RTNL();
6072 
6073 	if (head) {
6074 		list_move_tail(&dev->unreg_list, head);
6075 	} else {
6076 		rollback_registered(dev);
6077 		/* Finish processing unregister after unlock */
6078 		net_set_todo(dev);
6079 	}
6080 }
6081 EXPORT_SYMBOL(unregister_netdevice_queue);
6082 
6083 /**
6084  *	unregister_netdevice_many - unregister many devices
6085  *	@head: list of devices
6086  */
6087 void unregister_netdevice_many(struct list_head *head)
6088 {
6089 	struct net_device *dev;
6090 
6091 	if (!list_empty(head)) {
6092 		rollback_registered_many(head);
6093 		list_for_each_entry(dev, head, unreg_list)
6094 			net_set_todo(dev);
6095 	}
6096 }
6097 EXPORT_SYMBOL(unregister_netdevice_many);
6098 
6099 /**
6100  *	unregister_netdev - remove device from the kernel
6101  *	@dev: device
6102  *
6103  *	This function shuts down a device interface and removes it
6104  *	from the kernel tables.
6105  *
6106  *	This is just a wrapper for unregister_netdevice that takes
6107  *	the rtnl semaphore.  In general you want to use this and not
6108  *	unregister_netdevice.
6109  */
6110 void unregister_netdev(struct net_device *dev)
6111 {
6112 	rtnl_lock();
6113 	unregister_netdevice(dev);
6114 	rtnl_unlock();
6115 }
6116 EXPORT_SYMBOL(unregister_netdev);
6117 
6118 /**
6119  *	dev_change_net_namespace - move device to different nethost namespace
6120  *	@dev: device
6121  *	@net: network namespace
6122  *	@pat: If not NULL name pattern to try if the current device name
6123  *	      is already taken in the destination network namespace.
6124  *
6125  *	This function shuts down a device interface and moves it
6126  *	to a new network namespace. On success 0 is returned, on
6127  *	a failure a netagive errno code is returned.
6128  *
6129  *	Callers must hold the rtnl semaphore.
6130  */
6131 
6132 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
6133 {
6134 	int err;
6135 
6136 	ASSERT_RTNL();
6137 
6138 	/* Don't allow namespace local devices to be moved. */
6139 	err = -EINVAL;
6140 	if (dev->features & NETIF_F_NETNS_LOCAL)
6141 		goto out;
6142 
6143 	/* Ensure the device has been registrered */
6144 	err = -EINVAL;
6145 	if (dev->reg_state != NETREG_REGISTERED)
6146 		goto out;
6147 
6148 	/* Get out if there is nothing todo */
6149 	err = 0;
6150 	if (net_eq(dev_net(dev), net))
6151 		goto out;
6152 
6153 	/* Pick the destination device name, and ensure
6154 	 * we can use it in the destination network namespace.
6155 	 */
6156 	err = -EEXIST;
6157 	if (__dev_get_by_name(net, dev->name)) {
6158 		/* We get here if we can't use the current device name */
6159 		if (!pat)
6160 			goto out;
6161 		if (dev_get_valid_name(dev, pat) < 0)
6162 			goto out;
6163 	}
6164 
6165 	/*
6166 	 * And now a mini version of register_netdevice unregister_netdevice.
6167 	 */
6168 
6169 	/* If device is running close it first. */
6170 	dev_close(dev);
6171 
6172 	/* And unlink it from device chain */
6173 	err = -ENODEV;
6174 	unlist_netdevice(dev);
6175 
6176 	synchronize_net();
6177 
6178 	/* Shutdown queueing discipline. */
6179 	dev_shutdown(dev);
6180 
6181 	/* Notify protocols, that we are about to destroy
6182 	   this device. They should clean all the things.
6183 
6184 	   Note that dev->reg_state stays at NETREG_REGISTERED.
6185 	   This is wanted because this way 8021q and macvlan know
6186 	   the device is just moving and can keep their slaves up.
6187 	*/
6188 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
6189 	call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
6190 	rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
6191 
6192 	/*
6193 	 *	Flush the unicast and multicast chains
6194 	 */
6195 	dev_uc_flush(dev);
6196 	dev_mc_flush(dev);
6197 
6198 	/* Actually switch the network namespace */
6199 	dev_net_set(dev, net);
6200 
6201 	/* If there is an ifindex conflict assign a new one */
6202 	if (__dev_get_by_index(net, dev->ifindex)) {
6203 		int iflink = (dev->iflink == dev->ifindex);
6204 		dev->ifindex = dev_new_index(net);
6205 		if (iflink)
6206 			dev->iflink = dev->ifindex;
6207 	}
6208 
6209 	/* Fixup kobjects */
6210 	err = device_rename(&dev->dev, dev->name);
6211 	WARN_ON(err);
6212 
6213 	/* Add the device back in the hashes */
6214 	list_netdevice(dev);
6215 
6216 	/* Notify protocols, that a new device appeared. */
6217 	call_netdevice_notifiers(NETDEV_REGISTER, dev);
6218 
6219 	/*
6220 	 *	Prevent userspace races by waiting until the network
6221 	 *	device is fully setup before sending notifications.
6222 	 */
6223 	rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
6224 
6225 	synchronize_net();
6226 	err = 0;
6227 out:
6228 	return err;
6229 }
6230 EXPORT_SYMBOL_GPL(dev_change_net_namespace);
6231 
6232 static int dev_cpu_callback(struct notifier_block *nfb,
6233 			    unsigned long action,
6234 			    void *ocpu)
6235 {
6236 	struct sk_buff **list_skb;
6237 	struct sk_buff *skb;
6238 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
6239 	struct softnet_data *sd, *oldsd;
6240 
6241 	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
6242 		return NOTIFY_OK;
6243 
6244 	local_irq_disable();
6245 	cpu = smp_processor_id();
6246 	sd = &per_cpu(softnet_data, cpu);
6247 	oldsd = &per_cpu(softnet_data, oldcpu);
6248 
6249 	/* Find end of our completion_queue. */
6250 	list_skb = &sd->completion_queue;
6251 	while (*list_skb)
6252 		list_skb = &(*list_skb)->next;
6253 	/* Append completion queue from offline CPU. */
6254 	*list_skb = oldsd->completion_queue;
6255 	oldsd->completion_queue = NULL;
6256 
6257 	/* Append output queue from offline CPU. */
6258 	if (oldsd->output_queue) {
6259 		*sd->output_queue_tailp = oldsd->output_queue;
6260 		sd->output_queue_tailp = oldsd->output_queue_tailp;
6261 		oldsd->output_queue = NULL;
6262 		oldsd->output_queue_tailp = &oldsd->output_queue;
6263 	}
6264 	/* Append NAPI poll list from offline CPU. */
6265 	if (!list_empty(&oldsd->poll_list)) {
6266 		list_splice_init(&oldsd->poll_list, &sd->poll_list);
6267 		raise_softirq_irqoff(NET_RX_SOFTIRQ);
6268 	}
6269 
6270 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
6271 	local_irq_enable();
6272 
6273 	/* Process offline CPU's input_pkt_queue */
6274 	while ((skb = __skb_dequeue(&oldsd->process_queue))) {
6275 		netif_rx(skb);
6276 		input_queue_head_incr(oldsd);
6277 	}
6278 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
6279 		netif_rx(skb);
6280 		input_queue_head_incr(oldsd);
6281 	}
6282 
6283 	return NOTIFY_OK;
6284 }
6285 
6286 
6287 /**
6288  *	netdev_increment_features - increment feature set by one
6289  *	@all: current feature set
6290  *	@one: new feature set
6291  *	@mask: mask feature set
6292  *
6293  *	Computes a new feature set after adding a device with feature set
6294  *	@one to the master device with current feature set @all.  Will not
6295  *	enable anything that is off in @mask. Returns the new feature set.
6296  */
6297 netdev_features_t netdev_increment_features(netdev_features_t all,
6298 	netdev_features_t one, netdev_features_t mask)
6299 {
6300 	if (mask & NETIF_F_GEN_CSUM)
6301 		mask |= NETIF_F_ALL_CSUM;
6302 	mask |= NETIF_F_VLAN_CHALLENGED;
6303 
6304 	all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask;
6305 	all &= one | ~NETIF_F_ALL_FOR_ALL;
6306 
6307 	/* If one device supports hw checksumming, set for all. */
6308 	if (all & NETIF_F_GEN_CSUM)
6309 		all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
6310 
6311 	return all;
6312 }
6313 EXPORT_SYMBOL(netdev_increment_features);
6314 
6315 static struct hlist_head *netdev_create_hash(void)
6316 {
6317 	int i;
6318 	struct hlist_head *hash;
6319 
6320 	hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
6321 	if (hash != NULL)
6322 		for (i = 0; i < NETDEV_HASHENTRIES; i++)
6323 			INIT_HLIST_HEAD(&hash[i]);
6324 
6325 	return hash;
6326 }
6327 
6328 /* Initialize per network namespace state */
6329 static int __net_init netdev_init(struct net *net)
6330 {
6331 	INIT_LIST_HEAD(&net->dev_base_head);
6332 
6333 	net->dev_name_head = netdev_create_hash();
6334 	if (net->dev_name_head == NULL)
6335 		goto err_name;
6336 
6337 	net->dev_index_head = netdev_create_hash();
6338 	if (net->dev_index_head == NULL)
6339 		goto err_idx;
6340 
6341 	return 0;
6342 
6343 err_idx:
6344 	kfree(net->dev_name_head);
6345 err_name:
6346 	return -ENOMEM;
6347 }
6348 
6349 /**
6350  *	netdev_drivername - network driver for the device
6351  *	@dev: network device
6352  *
6353  *	Determine network driver for device.
6354  */
6355 const char *netdev_drivername(const struct net_device *dev)
6356 {
6357 	const struct device_driver *driver;
6358 	const struct device *parent;
6359 	const char *empty = "";
6360 
6361 	parent = dev->dev.parent;
6362 	if (!parent)
6363 		return empty;
6364 
6365 	driver = parent->driver;
6366 	if (driver && driver->name)
6367 		return driver->name;
6368 	return empty;
6369 }
6370 
6371 int __netdev_printk(const char *level, const struct net_device *dev,
6372 			   struct va_format *vaf)
6373 {
6374 	int r;
6375 
6376 	if (dev && dev->dev.parent)
6377 		r = dev_printk(level, dev->dev.parent, "%s: %pV",
6378 			       netdev_name(dev), vaf);
6379 	else if (dev)
6380 		r = printk("%s%s: %pV", level, netdev_name(dev), vaf);
6381 	else
6382 		r = printk("%s(NULL net_device): %pV", level, vaf);
6383 
6384 	return r;
6385 }
6386 EXPORT_SYMBOL(__netdev_printk);
6387 
6388 int netdev_printk(const char *level, const struct net_device *dev,
6389 		  const char *format, ...)
6390 {
6391 	struct va_format vaf;
6392 	va_list args;
6393 	int r;
6394 
6395 	va_start(args, format);
6396 
6397 	vaf.fmt = format;
6398 	vaf.va = &args;
6399 
6400 	r = __netdev_printk(level, dev, &vaf);
6401 	va_end(args);
6402 
6403 	return r;
6404 }
6405 EXPORT_SYMBOL(netdev_printk);
6406 
6407 #define define_netdev_printk_level(func, level)			\
6408 int func(const struct net_device *dev, const char *fmt, ...)	\
6409 {								\
6410 	int r;							\
6411 	struct va_format vaf;					\
6412 	va_list args;						\
6413 								\
6414 	va_start(args, fmt);					\
6415 								\
6416 	vaf.fmt = fmt;						\
6417 	vaf.va = &args;						\
6418 								\
6419 	r = __netdev_printk(level, dev, &vaf);			\
6420 	va_end(args);						\
6421 								\
6422 	return r;						\
6423 }								\
6424 EXPORT_SYMBOL(func);
6425 
6426 define_netdev_printk_level(netdev_emerg, KERN_EMERG);
6427 define_netdev_printk_level(netdev_alert, KERN_ALERT);
6428 define_netdev_printk_level(netdev_crit, KERN_CRIT);
6429 define_netdev_printk_level(netdev_err, KERN_ERR);
6430 define_netdev_printk_level(netdev_warn, KERN_WARNING);
6431 define_netdev_printk_level(netdev_notice, KERN_NOTICE);
6432 define_netdev_printk_level(netdev_info, KERN_INFO);
6433 
6434 static void __net_exit netdev_exit(struct net *net)
6435 {
6436 	kfree(net->dev_name_head);
6437 	kfree(net->dev_index_head);
6438 }
6439 
6440 static struct pernet_operations __net_initdata netdev_net_ops = {
6441 	.init = netdev_init,
6442 	.exit = netdev_exit,
6443 };
6444 
6445 static void __net_exit default_device_exit(struct net *net)
6446 {
6447 	struct net_device *dev, *aux;
6448 	/*
6449 	 * Push all migratable network devices back to the
6450 	 * initial network namespace
6451 	 */
6452 	rtnl_lock();
6453 	for_each_netdev_safe(net, dev, aux) {
6454 		int err;
6455 		char fb_name[IFNAMSIZ];
6456 
6457 		/* Ignore unmoveable devices (i.e. loopback) */
6458 		if (dev->features & NETIF_F_NETNS_LOCAL)
6459 			continue;
6460 
6461 		/* Leave virtual devices for the generic cleanup */
6462 		if (dev->rtnl_link_ops)
6463 			continue;
6464 
6465 		/* Push remaining network devices to init_net */
6466 		snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
6467 		err = dev_change_net_namespace(dev, &init_net, fb_name);
6468 		if (err) {
6469 			pr_emerg("%s: failed to move %s to init_net: %d\n",
6470 				 __func__, dev->name, err);
6471 			BUG();
6472 		}
6473 	}
6474 	rtnl_unlock();
6475 }
6476 
6477 static void __net_exit default_device_exit_batch(struct list_head *net_list)
6478 {
6479 	/* At exit all network devices most be removed from a network
6480 	 * namespace.  Do this in the reverse order of registration.
6481 	 * Do this across as many network namespaces as possible to
6482 	 * improve batching efficiency.
6483 	 */
6484 	struct net_device *dev;
6485 	struct net *net;
6486 	LIST_HEAD(dev_kill_list);
6487 
6488 	rtnl_lock();
6489 	list_for_each_entry(net, net_list, exit_list) {
6490 		for_each_netdev_reverse(net, dev) {
6491 			if (dev->rtnl_link_ops)
6492 				dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
6493 			else
6494 				unregister_netdevice_queue(dev, &dev_kill_list);
6495 		}
6496 	}
6497 	unregister_netdevice_many(&dev_kill_list);
6498 	list_del(&dev_kill_list);
6499 	rtnl_unlock();
6500 }
6501 
6502 static struct pernet_operations __net_initdata default_device_ops = {
6503 	.exit = default_device_exit,
6504 	.exit_batch = default_device_exit_batch,
6505 };
6506 
6507 /*
6508  *	Initialize the DEV module. At boot time this walks the device list and
6509  *	unhooks any devices that fail to initialise (normally hardware not
6510  *	present) and leaves us with a valid list of present and active devices.
6511  *
6512  */
6513 
6514 /*
6515  *       This is called single threaded during boot, so no need
6516  *       to take the rtnl semaphore.
6517  */
6518 static int __init net_dev_init(void)
6519 {
6520 	int i, rc = -ENOMEM;
6521 
6522 	BUG_ON(!dev_boot_phase);
6523 
6524 	if (dev_proc_init())
6525 		goto out;
6526 
6527 	if (netdev_kobject_init())
6528 		goto out;
6529 
6530 	INIT_LIST_HEAD(&ptype_all);
6531 	for (i = 0; i < PTYPE_HASH_SIZE; i++)
6532 		INIT_LIST_HEAD(&ptype_base[i]);
6533 
6534 	if (register_pernet_subsys(&netdev_net_ops))
6535 		goto out;
6536 
6537 	/*
6538 	 *	Initialise the packet receive queues.
6539 	 */
6540 
6541 	for_each_possible_cpu(i) {
6542 		struct softnet_data *sd = &per_cpu(softnet_data, i);
6543 
6544 		memset(sd, 0, sizeof(*sd));
6545 		skb_queue_head_init(&sd->input_pkt_queue);
6546 		skb_queue_head_init(&sd->process_queue);
6547 		sd->completion_queue = NULL;
6548 		INIT_LIST_HEAD(&sd->poll_list);
6549 		sd->output_queue = NULL;
6550 		sd->output_queue_tailp = &sd->output_queue;
6551 #ifdef CONFIG_RPS
6552 		sd->csd.func = rps_trigger_softirq;
6553 		sd->csd.info = sd;
6554 		sd->csd.flags = 0;
6555 		sd->cpu = i;
6556 #endif
6557 
6558 		sd->backlog.poll = process_backlog;
6559 		sd->backlog.weight = weight_p;
6560 		sd->backlog.gro_list = NULL;
6561 		sd->backlog.gro_count = 0;
6562 	}
6563 
6564 	dev_boot_phase = 0;
6565 
6566 	/* The loopback device is special if any other network devices
6567 	 * is present in a network namespace the loopback device must
6568 	 * be present. Since we now dynamically allocate and free the
6569 	 * loopback device ensure this invariant is maintained by
6570 	 * keeping the loopback device as the first device on the
6571 	 * list of network devices.  Ensuring the loopback devices
6572 	 * is the first device that appears and the last network device
6573 	 * that disappears.
6574 	 */
6575 	if (register_pernet_device(&loopback_net_ops))
6576 		goto out;
6577 
6578 	if (register_pernet_device(&default_device_ops))
6579 		goto out;
6580 
6581 	open_softirq(NET_TX_SOFTIRQ, net_tx_action);
6582 	open_softirq(NET_RX_SOFTIRQ, net_rx_action);
6583 
6584 	hotcpu_notifier(dev_cpu_callback, 0);
6585 	dst_init();
6586 	dev_mcast_init();
6587 	rc = 0;
6588 out:
6589 	return rc;
6590 }
6591 
6592 subsys_initcall(net_dev_init);
6593 
6594 static int __init initialize_hashrnd(void)
6595 {
6596 	get_random_bytes(&hashrnd, sizeof(hashrnd));
6597 	return 0;
6598 }
6599 
6600 late_initcall_sync(initialize_hashrnd);
6601 
6602