xref: /linux/net/core/dev.c (revision d89dffa976bcd13fd87eb76e02e3b71c3a7868e3)
1 /*
2  * 	NET3	Protocol independent device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the non IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Florian la Roche <rzsfl@rz.uni-sb.de>
16  *		Alan Cox <gw4pts@gw4pts.ampr.org>
17  *		David Hinds <dahinds@users.sourceforge.net>
18  *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19  *		Adam Sulmicki <adam@cfar.umd.edu>
20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
21  *
22  *	Changes:
23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
24  *              			to 2 if register_netdev gets called
25  *              			before net_dev_init & also removed a
26  *              			few lines of code in the process.
27  *		Alan Cox	:	device private ioctl copies fields back.
28  *		Alan Cox	:	Transmit queue code does relevant
29  *					stunts to keep the queue safe.
30  *		Alan Cox	:	Fixed double lock.
31  *		Alan Cox	:	Fixed promisc NULL pointer trap
32  *		????????	:	Support the full private ioctl range
33  *		Alan Cox	:	Moved ioctl permission check into
34  *					drivers
35  *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
36  *		Alan Cox	:	100 backlog just doesn't cut it when
37  *					you start doing multicast video 8)
38  *		Alan Cox	:	Rewrote net_bh and list manager.
39  *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
40  *		Alan Cox	:	Took out transmit every packet pass
41  *					Saved a few bytes in the ioctl handler
42  *		Alan Cox	:	Network driver sets packet type before
43  *					calling netif_rx. Saves a function
44  *					call a packet.
45  *		Alan Cox	:	Hashed net_bh()
46  *		Richard Kooijman:	Timestamp fixes.
47  *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
48  *		Alan Cox	:	Device lock protection.
49  *		Alan Cox	: 	Fixed nasty side effect of device close
50  *					changes.
51  *		Rudi Cilibrasi	:	Pass the right thing to
52  *					set_mac_address()
53  *		Dave Miller	:	32bit quantity for the device lock to
54  *					make it work out on a Sparc.
55  *		Bjorn Ekwall	:	Added KERNELD hack.
56  *		Alan Cox	:	Cleaned up the backlog initialise.
57  *		Craig Metz	:	SIOCGIFCONF fix if space for under
58  *					1 device.
59  *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
60  *					is no device open function.
61  *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
62  *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
63  *		Cyrus Durgin	:	Cleaned for KMOD
64  *		Adam Sulmicki   :	Bug Fix : Network Device Unload
65  *					A network device unload needs to purge
66  *					the backlog queue.
67  *	Paul Rusty Russell	:	SIOCSIFNAME
68  *              Pekka Riikonen  :	Netdev boot-time settings code
69  *              Andrew Morton   :       Make unregister_netdevice wait
70  *              			indefinitely on dev->refcnt
71  * 		J Hadi Salim	:	- Backlog queue sampling
72  *				        - netif_rx() feedback
73  */
74 
75 #include <asm/uaccess.h>
76 #include <linux/bitops.h>
77 #include <linux/capability.h>
78 #include <linux/cpu.h>
79 #include <linux/types.h>
80 #include <linux/kernel.h>
81 #include <linux/hash.h>
82 #include <linux/slab.h>
83 #include <linux/sched.h>
84 #include <linux/mutex.h>
85 #include <linux/string.h>
86 #include <linux/mm.h>
87 #include <linux/socket.h>
88 #include <linux/sockios.h>
89 #include <linux/errno.h>
90 #include <linux/interrupt.h>
91 #include <linux/if_ether.h>
92 #include <linux/netdevice.h>
93 #include <linux/etherdevice.h>
94 #include <linux/ethtool.h>
95 #include <linux/notifier.h>
96 #include <linux/skbuff.h>
97 #include <net/net_namespace.h>
98 #include <net/sock.h>
99 #include <linux/rtnetlink.h>
100 #include <linux/proc_fs.h>
101 #include <linux/seq_file.h>
102 #include <linux/stat.h>
103 #include <net/dst.h>
104 #include <net/pkt_sched.h>
105 #include <net/checksum.h>
106 #include <net/xfrm.h>
107 #include <linux/highmem.h>
108 #include <linux/init.h>
109 #include <linux/kmod.h>
110 #include <linux/module.h>
111 #include <linux/netpoll.h>
112 #include <linux/rcupdate.h>
113 #include <linux/delay.h>
114 #include <net/wext.h>
115 #include <net/iw_handler.h>
116 #include <asm/current.h>
117 #include <linux/audit.h>
118 #include <linux/dmaengine.h>
119 #include <linux/err.h>
120 #include <linux/ctype.h>
121 #include <linux/if_arp.h>
122 #include <linux/if_vlan.h>
123 #include <linux/ip.h>
124 #include <net/ip.h>
125 #include <linux/ipv6.h>
126 #include <linux/in.h>
127 #include <linux/jhash.h>
128 #include <linux/random.h>
129 #include <trace/events/napi.h>
130 #include <trace/events/net.h>
131 #include <trace/events/skb.h>
132 #include <linux/pci.h>
133 #include <linux/inetdevice.h>
134 #include <linux/cpu_rmap.h>
135 #include <linux/net_tstamp.h>
136 #include <linux/static_key.h>
137 #include <net/flow_keys.h>
138 
139 #include "net-sysfs.h"
140 
141 /* Instead of increasing this, you should create a hash table. */
142 #define MAX_GRO_SKBS 8
143 
144 /* This should be increased if a protocol with a bigger head is added. */
145 #define GRO_MAX_HEAD (MAX_HEADER + 128)
146 
147 /*
148  *	The list of packet types we will receive (as opposed to discard)
149  *	and the routines to invoke.
150  *
151  *	Why 16. Because with 16 the only overlap we get on a hash of the
152  *	low nibble of the protocol value is RARP/SNAP/X.25.
153  *
154  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
155  *             sure which should go first, but I bet it won't make much
156  *             difference if we are running VLANs.  The good news is that
157  *             this protocol won't be in the list unless compiled in, so
158  *             the average user (w/out VLANs) will not be adversely affected.
159  *             --BLG
160  *
161  *		0800	IP
162  *		8100    802.1Q VLAN
163  *		0001	802.3
164  *		0002	AX.25
165  *		0004	802.2
166  *		8035	RARP
167  *		0005	SNAP
168  *		0805	X.25
169  *		0806	ARP
170  *		8137	IPX
171  *		0009	Localtalk
172  *		86DD	IPv6
173  */
174 
175 #define PTYPE_HASH_SIZE	(16)
176 #define PTYPE_HASH_MASK	(PTYPE_HASH_SIZE - 1)
177 
178 static DEFINE_SPINLOCK(ptype_lock);
179 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
180 static struct list_head ptype_all __read_mostly;	/* Taps */
181 
182 /*
183  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
184  * semaphore.
185  *
186  * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
187  *
188  * Writers must hold the rtnl semaphore while they loop through the
189  * dev_base_head list, and hold dev_base_lock for writing when they do the
190  * actual updates.  This allows pure readers to access the list even
191  * while a writer is preparing to update it.
192  *
193  * To put it another way, dev_base_lock is held for writing only to
194  * protect against pure readers; the rtnl semaphore provides the
195  * protection against other writers.
196  *
197  * See, for example usages, register_netdevice() and
198  * unregister_netdevice(), which must be called with the rtnl
199  * semaphore held.
200  */
201 DEFINE_RWLOCK(dev_base_lock);
202 EXPORT_SYMBOL(dev_base_lock);
203 
204 static inline void dev_base_seq_inc(struct net *net)
205 {
206 	while (++net->dev_base_seq == 0);
207 }
208 
209 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
210 {
211 	unsigned int hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
212 
213 	return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
214 }
215 
216 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
217 {
218 	return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
219 }
220 
221 static inline void rps_lock(struct softnet_data *sd)
222 {
223 #ifdef CONFIG_RPS
224 	spin_lock(&sd->input_pkt_queue.lock);
225 #endif
226 }
227 
228 static inline void rps_unlock(struct softnet_data *sd)
229 {
230 #ifdef CONFIG_RPS
231 	spin_unlock(&sd->input_pkt_queue.lock);
232 #endif
233 }
234 
235 /* Device list insertion */
236 static int list_netdevice(struct net_device *dev)
237 {
238 	struct net *net = dev_net(dev);
239 
240 	ASSERT_RTNL();
241 
242 	write_lock_bh(&dev_base_lock);
243 	list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
244 	hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
245 	hlist_add_head_rcu(&dev->index_hlist,
246 			   dev_index_hash(net, dev->ifindex));
247 	write_unlock_bh(&dev_base_lock);
248 
249 	dev_base_seq_inc(net);
250 
251 	return 0;
252 }
253 
254 /* Device list removal
255  * caller must respect a RCU grace period before freeing/reusing dev
256  */
257 static void unlist_netdevice(struct net_device *dev)
258 {
259 	ASSERT_RTNL();
260 
261 	/* Unlink dev from the device chain */
262 	write_lock_bh(&dev_base_lock);
263 	list_del_rcu(&dev->dev_list);
264 	hlist_del_rcu(&dev->name_hlist);
265 	hlist_del_rcu(&dev->index_hlist);
266 	write_unlock_bh(&dev_base_lock);
267 
268 	dev_base_seq_inc(dev_net(dev));
269 }
270 
271 /*
272  *	Our notifier list
273  */
274 
275 static RAW_NOTIFIER_HEAD(netdev_chain);
276 
277 /*
278  *	Device drivers call our routines to queue packets here. We empty the
279  *	queue in the local softnet handler.
280  */
281 
282 DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
283 EXPORT_PER_CPU_SYMBOL(softnet_data);
284 
285 #ifdef CONFIG_LOCKDEP
286 /*
287  * register_netdevice() inits txq->_xmit_lock and sets lockdep class
288  * according to dev->type
289  */
290 static const unsigned short netdev_lock_type[] =
291 	{ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
292 	 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
293 	 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
294 	 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
295 	 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
296 	 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
297 	 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
298 	 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
299 	 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
300 	 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
301 	 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
302 	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
303 	 ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,
304 	 ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
305 	 ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};
306 
307 static const char *const netdev_lock_name[] =
308 	{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
309 	 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
310 	 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
311 	 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
312 	 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
313 	 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
314 	 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
315 	 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
316 	 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
317 	 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
318 	 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
319 	 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
320 	 "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",
321 	 "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",
322 	 "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
323 
324 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
325 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
326 
327 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
328 {
329 	int i;
330 
331 	for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
332 		if (netdev_lock_type[i] == dev_type)
333 			return i;
334 	/* the last key is used by default */
335 	return ARRAY_SIZE(netdev_lock_type) - 1;
336 }
337 
338 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
339 						 unsigned short dev_type)
340 {
341 	int i;
342 
343 	i = netdev_lock_pos(dev_type);
344 	lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
345 				   netdev_lock_name[i]);
346 }
347 
348 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
349 {
350 	int i;
351 
352 	i = netdev_lock_pos(dev->type);
353 	lockdep_set_class_and_name(&dev->addr_list_lock,
354 				   &netdev_addr_lock_key[i],
355 				   netdev_lock_name[i]);
356 }
357 #else
358 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
359 						 unsigned short dev_type)
360 {
361 }
362 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
363 {
364 }
365 #endif
366 
367 /*******************************************************************************
368 
369 		Protocol management and registration routines
370 
371 *******************************************************************************/
372 
373 /*
374  *	Add a protocol ID to the list. Now that the input handler is
375  *	smarter we can dispense with all the messy stuff that used to be
376  *	here.
377  *
378  *	BEWARE!!! Protocol handlers, mangling input packets,
379  *	MUST BE last in hash buckets and checking protocol handlers
380  *	MUST start from promiscuous ptype_all chain in net_bh.
381  *	It is true now, do not change it.
382  *	Explanation follows: if protocol handler, mangling packet, will
383  *	be the first on list, it is not able to sense, that packet
384  *	is cloned and should be copied-on-write, so that it will
385  *	change it and subsequent readers will get broken packet.
386  *							--ANK (980803)
387  */
388 
389 static inline struct list_head *ptype_head(const struct packet_type *pt)
390 {
391 	if (pt->type == htons(ETH_P_ALL))
392 		return &ptype_all;
393 	else
394 		return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
395 }
396 
397 /**
398  *	dev_add_pack - add packet handler
399  *	@pt: packet type declaration
400  *
401  *	Add a protocol handler to the networking stack. The passed &packet_type
402  *	is linked into kernel lists and may not be freed until it has been
403  *	removed from the kernel lists.
404  *
405  *	This call does not sleep therefore it can not
406  *	guarantee all CPU's that are in middle of receiving packets
407  *	will see the new packet type (until the next received packet).
408  */
409 
410 void dev_add_pack(struct packet_type *pt)
411 {
412 	struct list_head *head = ptype_head(pt);
413 
414 	spin_lock(&ptype_lock);
415 	list_add_rcu(&pt->list, head);
416 	spin_unlock(&ptype_lock);
417 }
418 EXPORT_SYMBOL(dev_add_pack);
419 
420 /**
421  *	__dev_remove_pack	 - remove packet handler
422  *	@pt: packet type declaration
423  *
424  *	Remove a protocol handler that was previously added to the kernel
425  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
426  *	from the kernel lists and can be freed or reused once this function
427  *	returns.
428  *
429  *      The packet type might still be in use by receivers
430  *	and must not be freed until after all the CPU's have gone
431  *	through a quiescent state.
432  */
433 void __dev_remove_pack(struct packet_type *pt)
434 {
435 	struct list_head *head = ptype_head(pt);
436 	struct packet_type *pt1;
437 
438 	spin_lock(&ptype_lock);
439 
440 	list_for_each_entry(pt1, head, list) {
441 		if (pt == pt1) {
442 			list_del_rcu(&pt->list);
443 			goto out;
444 		}
445 	}
446 
447 	pr_warn("dev_remove_pack: %p not found\n", pt);
448 out:
449 	spin_unlock(&ptype_lock);
450 }
451 EXPORT_SYMBOL(__dev_remove_pack);
452 
453 /**
454  *	dev_remove_pack	 - remove packet handler
455  *	@pt: packet type declaration
456  *
457  *	Remove a protocol handler that was previously added to the kernel
458  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
459  *	from the kernel lists and can be freed or reused once this function
460  *	returns.
461  *
462  *	This call sleeps to guarantee that no CPU is looking at the packet
463  *	type after return.
464  */
465 void dev_remove_pack(struct packet_type *pt)
466 {
467 	__dev_remove_pack(pt);
468 
469 	synchronize_net();
470 }
471 EXPORT_SYMBOL(dev_remove_pack);
472 
473 /******************************************************************************
474 
475 		      Device Boot-time Settings Routines
476 
477 *******************************************************************************/
478 
479 /* Boot time configuration table */
480 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
481 
482 /**
483  *	netdev_boot_setup_add	- add new setup entry
484  *	@name: name of the device
485  *	@map: configured settings for the device
486  *
487  *	Adds new setup entry to the dev_boot_setup list.  The function
488  *	returns 0 on error and 1 on success.  This is a generic routine to
489  *	all netdevices.
490  */
491 static int netdev_boot_setup_add(char *name, struct ifmap *map)
492 {
493 	struct netdev_boot_setup *s;
494 	int i;
495 
496 	s = dev_boot_setup;
497 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
498 		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
499 			memset(s[i].name, 0, sizeof(s[i].name));
500 			strlcpy(s[i].name, name, IFNAMSIZ);
501 			memcpy(&s[i].map, map, sizeof(s[i].map));
502 			break;
503 		}
504 	}
505 
506 	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
507 }
508 
509 /**
510  *	netdev_boot_setup_check	- check boot time settings
511  *	@dev: the netdevice
512  *
513  * 	Check boot time settings for the device.
514  *	The found settings are set for the device to be used
515  *	later in the device probing.
516  *	Returns 0 if no settings found, 1 if they are.
517  */
518 int netdev_boot_setup_check(struct net_device *dev)
519 {
520 	struct netdev_boot_setup *s = dev_boot_setup;
521 	int i;
522 
523 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
524 		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
525 		    !strcmp(dev->name, s[i].name)) {
526 			dev->irq 	= s[i].map.irq;
527 			dev->base_addr 	= s[i].map.base_addr;
528 			dev->mem_start 	= s[i].map.mem_start;
529 			dev->mem_end 	= s[i].map.mem_end;
530 			return 1;
531 		}
532 	}
533 	return 0;
534 }
535 EXPORT_SYMBOL(netdev_boot_setup_check);
536 
537 
538 /**
539  *	netdev_boot_base	- get address from boot time settings
540  *	@prefix: prefix for network device
541  *	@unit: id for network device
542  *
543  * 	Check boot time settings for the base address of device.
544  *	The found settings are set for the device to be used
545  *	later in the device probing.
546  *	Returns 0 if no settings found.
547  */
548 unsigned long netdev_boot_base(const char *prefix, int unit)
549 {
550 	const struct netdev_boot_setup *s = dev_boot_setup;
551 	char name[IFNAMSIZ];
552 	int i;
553 
554 	sprintf(name, "%s%d", prefix, unit);
555 
556 	/*
557 	 * If device already registered then return base of 1
558 	 * to indicate not to probe for this interface
559 	 */
560 	if (__dev_get_by_name(&init_net, name))
561 		return 1;
562 
563 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
564 		if (!strcmp(name, s[i].name))
565 			return s[i].map.base_addr;
566 	return 0;
567 }
568 
569 /*
570  * Saves at boot time configured settings for any netdevice.
571  */
572 int __init netdev_boot_setup(char *str)
573 {
574 	int ints[5];
575 	struct ifmap map;
576 
577 	str = get_options(str, ARRAY_SIZE(ints), ints);
578 	if (!str || !*str)
579 		return 0;
580 
581 	/* Save settings */
582 	memset(&map, 0, sizeof(map));
583 	if (ints[0] > 0)
584 		map.irq = ints[1];
585 	if (ints[0] > 1)
586 		map.base_addr = ints[2];
587 	if (ints[0] > 2)
588 		map.mem_start = ints[3];
589 	if (ints[0] > 3)
590 		map.mem_end = ints[4];
591 
592 	/* Add new entry to the list */
593 	return netdev_boot_setup_add(str, &map);
594 }
595 
596 __setup("netdev=", netdev_boot_setup);
597 
598 /*******************************************************************************
599 
600 			    Device Interface Subroutines
601 
602 *******************************************************************************/
603 
604 /**
605  *	__dev_get_by_name	- find a device by its name
606  *	@net: the applicable net namespace
607  *	@name: name to find
608  *
609  *	Find an interface by name. Must be called under RTNL semaphore
610  *	or @dev_base_lock. If the name is found a pointer to the device
611  *	is returned. If the name is not found then %NULL is returned. The
612  *	reference counters are not incremented so the caller must be
613  *	careful with locks.
614  */
615 
616 struct net_device *__dev_get_by_name(struct net *net, const char *name)
617 {
618 	struct hlist_node *p;
619 	struct net_device *dev;
620 	struct hlist_head *head = dev_name_hash(net, name);
621 
622 	hlist_for_each_entry(dev, p, head, name_hlist)
623 		if (!strncmp(dev->name, name, IFNAMSIZ))
624 			return dev;
625 
626 	return NULL;
627 }
628 EXPORT_SYMBOL(__dev_get_by_name);
629 
630 /**
631  *	dev_get_by_name_rcu	- find a device by its name
632  *	@net: the applicable net namespace
633  *	@name: name to find
634  *
635  *	Find an interface by name.
636  *	If the name is found a pointer to the device is returned.
637  * 	If the name is not found then %NULL is returned.
638  *	The reference counters are not incremented so the caller must be
639  *	careful with locks. The caller must hold RCU lock.
640  */
641 
642 struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
643 {
644 	struct hlist_node *p;
645 	struct net_device *dev;
646 	struct hlist_head *head = dev_name_hash(net, name);
647 
648 	hlist_for_each_entry_rcu(dev, p, head, name_hlist)
649 		if (!strncmp(dev->name, name, IFNAMSIZ))
650 			return dev;
651 
652 	return NULL;
653 }
654 EXPORT_SYMBOL(dev_get_by_name_rcu);
655 
656 /**
657  *	dev_get_by_name		- find a device by its name
658  *	@net: the applicable net namespace
659  *	@name: name to find
660  *
661  *	Find an interface by name. This can be called from any
662  *	context and does its own locking. The returned handle has
663  *	the usage count incremented and the caller must use dev_put() to
664  *	release it when it is no longer needed. %NULL is returned if no
665  *	matching device is found.
666  */
667 
668 struct net_device *dev_get_by_name(struct net *net, const char *name)
669 {
670 	struct net_device *dev;
671 
672 	rcu_read_lock();
673 	dev = dev_get_by_name_rcu(net, name);
674 	if (dev)
675 		dev_hold(dev);
676 	rcu_read_unlock();
677 	return dev;
678 }
679 EXPORT_SYMBOL(dev_get_by_name);
680 
681 /**
682  *	__dev_get_by_index - find a device by its ifindex
683  *	@net: the applicable net namespace
684  *	@ifindex: index of device
685  *
686  *	Search for an interface by index. Returns %NULL if the device
687  *	is not found or a pointer to the device. The device has not
688  *	had its reference counter increased so the caller must be careful
689  *	about locking. The caller must hold either the RTNL semaphore
690  *	or @dev_base_lock.
691  */
692 
693 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
694 {
695 	struct hlist_node *p;
696 	struct net_device *dev;
697 	struct hlist_head *head = dev_index_hash(net, ifindex);
698 
699 	hlist_for_each_entry(dev, p, head, index_hlist)
700 		if (dev->ifindex == ifindex)
701 			return dev;
702 
703 	return NULL;
704 }
705 EXPORT_SYMBOL(__dev_get_by_index);
706 
707 /**
708  *	dev_get_by_index_rcu - find a device by its ifindex
709  *	@net: the applicable net namespace
710  *	@ifindex: index of device
711  *
712  *	Search for an interface by index. Returns %NULL if the device
713  *	is not found or a pointer to the device. The device has not
714  *	had its reference counter increased so the caller must be careful
715  *	about locking. The caller must hold RCU lock.
716  */
717 
718 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
719 {
720 	struct hlist_node *p;
721 	struct net_device *dev;
722 	struct hlist_head *head = dev_index_hash(net, ifindex);
723 
724 	hlist_for_each_entry_rcu(dev, p, head, index_hlist)
725 		if (dev->ifindex == ifindex)
726 			return dev;
727 
728 	return NULL;
729 }
730 EXPORT_SYMBOL(dev_get_by_index_rcu);
731 
732 
733 /**
734  *	dev_get_by_index - find a device by its ifindex
735  *	@net: the applicable net namespace
736  *	@ifindex: index of device
737  *
738  *	Search for an interface by index. Returns NULL if the device
739  *	is not found or a pointer to the device. The device returned has
740  *	had a reference added and the pointer is safe until the user calls
741  *	dev_put to indicate they have finished with it.
742  */
743 
744 struct net_device *dev_get_by_index(struct net *net, int ifindex)
745 {
746 	struct net_device *dev;
747 
748 	rcu_read_lock();
749 	dev = dev_get_by_index_rcu(net, ifindex);
750 	if (dev)
751 		dev_hold(dev);
752 	rcu_read_unlock();
753 	return dev;
754 }
755 EXPORT_SYMBOL(dev_get_by_index);
756 
757 /**
758  *	dev_getbyhwaddr_rcu - find a device by its hardware address
759  *	@net: the applicable net namespace
760  *	@type: media type of device
761  *	@ha: hardware address
762  *
763  *	Search for an interface by MAC address. Returns NULL if the device
764  *	is not found or a pointer to the device.
765  *	The caller must hold RCU or RTNL.
766  *	The returned device has not had its ref count increased
767  *	and the caller must therefore be careful about locking
768  *
769  */
770 
771 struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
772 				       const char *ha)
773 {
774 	struct net_device *dev;
775 
776 	for_each_netdev_rcu(net, dev)
777 		if (dev->type == type &&
778 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
779 			return dev;
780 
781 	return NULL;
782 }
783 EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
784 
785 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
786 {
787 	struct net_device *dev;
788 
789 	ASSERT_RTNL();
790 	for_each_netdev(net, dev)
791 		if (dev->type == type)
792 			return dev;
793 
794 	return NULL;
795 }
796 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
797 
798 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
799 {
800 	struct net_device *dev, *ret = NULL;
801 
802 	rcu_read_lock();
803 	for_each_netdev_rcu(net, dev)
804 		if (dev->type == type) {
805 			dev_hold(dev);
806 			ret = dev;
807 			break;
808 		}
809 	rcu_read_unlock();
810 	return ret;
811 }
812 EXPORT_SYMBOL(dev_getfirstbyhwtype);
813 
814 /**
815  *	dev_get_by_flags_rcu - find any device with given flags
816  *	@net: the applicable net namespace
817  *	@if_flags: IFF_* values
818  *	@mask: bitmask of bits in if_flags to check
819  *
820  *	Search for any interface with the given flags. Returns NULL if a device
821  *	is not found or a pointer to the device. Must be called inside
822  *	rcu_read_lock(), and result refcount is unchanged.
823  */
824 
825 struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,
826 				    unsigned short mask)
827 {
828 	struct net_device *dev, *ret;
829 
830 	ret = NULL;
831 	for_each_netdev_rcu(net, dev) {
832 		if (((dev->flags ^ if_flags) & mask) == 0) {
833 			ret = dev;
834 			break;
835 		}
836 	}
837 	return ret;
838 }
839 EXPORT_SYMBOL(dev_get_by_flags_rcu);
840 
841 /**
842  *	dev_valid_name - check if name is okay for network device
843  *	@name: name string
844  *
845  *	Network device names need to be valid file names to
846  *	to allow sysfs to work.  We also disallow any kind of
847  *	whitespace.
848  */
849 bool dev_valid_name(const char *name)
850 {
851 	if (*name == '\0')
852 		return false;
853 	if (strlen(name) >= IFNAMSIZ)
854 		return false;
855 	if (!strcmp(name, ".") || !strcmp(name, ".."))
856 		return false;
857 
858 	while (*name) {
859 		if (*name == '/' || isspace(*name))
860 			return false;
861 		name++;
862 	}
863 	return true;
864 }
865 EXPORT_SYMBOL(dev_valid_name);
866 
867 /**
868  *	__dev_alloc_name - allocate a name for a device
869  *	@net: network namespace to allocate the device name in
870  *	@name: name format string
871  *	@buf:  scratch buffer and result name string
872  *
873  *	Passed a format string - eg "lt%d" it will try and find a suitable
874  *	id. It scans list of devices to build up a free map, then chooses
875  *	the first empty slot. The caller must hold the dev_base or rtnl lock
876  *	while allocating the name and adding the device in order to avoid
877  *	duplicates.
878  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
879  *	Returns the number of the unit assigned or a negative errno code.
880  */
881 
882 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
883 {
884 	int i = 0;
885 	const char *p;
886 	const int max_netdevices = 8*PAGE_SIZE;
887 	unsigned long *inuse;
888 	struct net_device *d;
889 
890 	p = strnchr(name, IFNAMSIZ-1, '%');
891 	if (p) {
892 		/*
893 		 * Verify the string as this thing may have come from
894 		 * the user.  There must be either one "%d" and no other "%"
895 		 * characters.
896 		 */
897 		if (p[1] != 'd' || strchr(p + 2, '%'))
898 			return -EINVAL;
899 
900 		/* Use one page as a bit array of possible slots */
901 		inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
902 		if (!inuse)
903 			return -ENOMEM;
904 
905 		for_each_netdev(net, d) {
906 			if (!sscanf(d->name, name, &i))
907 				continue;
908 			if (i < 0 || i >= max_netdevices)
909 				continue;
910 
911 			/*  avoid cases where sscanf is not exact inverse of printf */
912 			snprintf(buf, IFNAMSIZ, name, i);
913 			if (!strncmp(buf, d->name, IFNAMSIZ))
914 				set_bit(i, inuse);
915 		}
916 
917 		i = find_first_zero_bit(inuse, max_netdevices);
918 		free_page((unsigned long) inuse);
919 	}
920 
921 	if (buf != name)
922 		snprintf(buf, IFNAMSIZ, name, i);
923 	if (!__dev_get_by_name(net, buf))
924 		return i;
925 
926 	/* It is possible to run out of possible slots
927 	 * when the name is long and there isn't enough space left
928 	 * for the digits, or if all bits are used.
929 	 */
930 	return -ENFILE;
931 }
932 
933 /**
934  *	dev_alloc_name - allocate a name for a device
935  *	@dev: device
936  *	@name: name format string
937  *
938  *	Passed a format string - eg "lt%d" it will try and find a suitable
939  *	id. It scans list of devices to build up a free map, then chooses
940  *	the first empty slot. The caller must hold the dev_base or rtnl lock
941  *	while allocating the name and adding the device in order to avoid
942  *	duplicates.
943  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
944  *	Returns the number of the unit assigned or a negative errno code.
945  */
946 
947 int dev_alloc_name(struct net_device *dev, const char *name)
948 {
949 	char buf[IFNAMSIZ];
950 	struct net *net;
951 	int ret;
952 
953 	BUG_ON(!dev_net(dev));
954 	net = dev_net(dev);
955 	ret = __dev_alloc_name(net, name, buf);
956 	if (ret >= 0)
957 		strlcpy(dev->name, buf, IFNAMSIZ);
958 	return ret;
959 }
960 EXPORT_SYMBOL(dev_alloc_name);
961 
962 static int dev_get_valid_name(struct net_device *dev, const char *name)
963 {
964 	struct net *net;
965 
966 	BUG_ON(!dev_net(dev));
967 	net = dev_net(dev);
968 
969 	if (!dev_valid_name(name))
970 		return -EINVAL;
971 
972 	if (strchr(name, '%'))
973 		return dev_alloc_name(dev, name);
974 	else if (__dev_get_by_name(net, name))
975 		return -EEXIST;
976 	else if (dev->name != name)
977 		strlcpy(dev->name, name, IFNAMSIZ);
978 
979 	return 0;
980 }
981 
982 /**
983  *	dev_change_name - change name of a device
984  *	@dev: device
985  *	@newname: name (or format string) must be at least IFNAMSIZ
986  *
987  *	Change name of a device, can pass format strings "eth%d".
988  *	for wildcarding.
989  */
990 int dev_change_name(struct net_device *dev, const char *newname)
991 {
992 	char oldname[IFNAMSIZ];
993 	int err = 0;
994 	int ret;
995 	struct net *net;
996 
997 	ASSERT_RTNL();
998 	BUG_ON(!dev_net(dev));
999 
1000 	net = dev_net(dev);
1001 	if (dev->flags & IFF_UP)
1002 		return -EBUSY;
1003 
1004 	if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
1005 		return 0;
1006 
1007 	memcpy(oldname, dev->name, IFNAMSIZ);
1008 
1009 	err = dev_get_valid_name(dev, newname);
1010 	if (err < 0)
1011 		return err;
1012 
1013 rollback:
1014 	ret = device_rename(&dev->dev, dev->name);
1015 	if (ret) {
1016 		memcpy(dev->name, oldname, IFNAMSIZ);
1017 		return ret;
1018 	}
1019 
1020 	write_lock_bh(&dev_base_lock);
1021 	hlist_del_rcu(&dev->name_hlist);
1022 	write_unlock_bh(&dev_base_lock);
1023 
1024 	synchronize_rcu();
1025 
1026 	write_lock_bh(&dev_base_lock);
1027 	hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
1028 	write_unlock_bh(&dev_base_lock);
1029 
1030 	ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
1031 	ret = notifier_to_errno(ret);
1032 
1033 	if (ret) {
1034 		/* err >= 0 after dev_alloc_name() or stores the first errno */
1035 		if (err >= 0) {
1036 			err = ret;
1037 			memcpy(dev->name, oldname, IFNAMSIZ);
1038 			goto rollback;
1039 		} else {
1040 			pr_err("%s: name change rollback failed: %d\n",
1041 			       dev->name, ret);
1042 		}
1043 	}
1044 
1045 	return err;
1046 }
1047 
1048 /**
1049  *	dev_set_alias - change ifalias of a device
1050  *	@dev: device
1051  *	@alias: name up to IFALIASZ
1052  *	@len: limit of bytes to copy from info
1053  *
1054  *	Set ifalias for a device,
1055  */
1056 int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1057 {
1058 	ASSERT_RTNL();
1059 
1060 	if (len >= IFALIASZ)
1061 		return -EINVAL;
1062 
1063 	if (!len) {
1064 		if (dev->ifalias) {
1065 			kfree(dev->ifalias);
1066 			dev->ifalias = NULL;
1067 		}
1068 		return 0;
1069 	}
1070 
1071 	dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
1072 	if (!dev->ifalias)
1073 		return -ENOMEM;
1074 
1075 	strlcpy(dev->ifalias, alias, len+1);
1076 	return len;
1077 }
1078 
1079 
1080 /**
1081  *	netdev_features_change - device changes features
1082  *	@dev: device to cause notification
1083  *
1084  *	Called to indicate a device has changed features.
1085  */
1086 void netdev_features_change(struct net_device *dev)
1087 {
1088 	call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
1089 }
1090 EXPORT_SYMBOL(netdev_features_change);
1091 
1092 /**
1093  *	netdev_state_change - device changes state
1094  *	@dev: device to cause notification
1095  *
1096  *	Called to indicate a device has changed state. This function calls
1097  *	the notifier chains for netdev_chain and sends a NEWLINK message
1098  *	to the routing socket.
1099  */
1100 void netdev_state_change(struct net_device *dev)
1101 {
1102 	if (dev->flags & IFF_UP) {
1103 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
1104 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1105 	}
1106 }
1107 EXPORT_SYMBOL(netdev_state_change);
1108 
1109 int netdev_bonding_change(struct net_device *dev, unsigned long event)
1110 {
1111 	return call_netdevice_notifiers(event, dev);
1112 }
1113 EXPORT_SYMBOL(netdev_bonding_change);
1114 
1115 /**
1116  *	dev_load 	- load a network module
1117  *	@net: the applicable net namespace
1118  *	@name: name of interface
1119  *
1120  *	If a network interface is not present and the process has suitable
1121  *	privileges this function loads the module. If module loading is not
1122  *	available in this kernel then it becomes a nop.
1123  */
1124 
1125 void dev_load(struct net *net, const char *name)
1126 {
1127 	struct net_device *dev;
1128 	int no_module;
1129 
1130 	rcu_read_lock();
1131 	dev = dev_get_by_name_rcu(net, name);
1132 	rcu_read_unlock();
1133 
1134 	no_module = !dev;
1135 	if (no_module && capable(CAP_NET_ADMIN))
1136 		no_module = request_module("netdev-%s", name);
1137 	if (no_module && capable(CAP_SYS_MODULE)) {
1138 		if (!request_module("%s", name))
1139 			pr_warn("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated).  Use CAP_NET_ADMIN and alias netdev-%s instead.\n",
1140 				name);
1141 	}
1142 }
1143 EXPORT_SYMBOL(dev_load);
1144 
1145 static int __dev_open(struct net_device *dev)
1146 {
1147 	const struct net_device_ops *ops = dev->netdev_ops;
1148 	int ret;
1149 
1150 	ASSERT_RTNL();
1151 
1152 	if (!netif_device_present(dev))
1153 		return -ENODEV;
1154 
1155 	ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1156 	ret = notifier_to_errno(ret);
1157 	if (ret)
1158 		return ret;
1159 
1160 	set_bit(__LINK_STATE_START, &dev->state);
1161 
1162 	if (ops->ndo_validate_addr)
1163 		ret = ops->ndo_validate_addr(dev);
1164 
1165 	if (!ret && ops->ndo_open)
1166 		ret = ops->ndo_open(dev);
1167 
1168 	if (ret)
1169 		clear_bit(__LINK_STATE_START, &dev->state);
1170 	else {
1171 		dev->flags |= IFF_UP;
1172 		net_dmaengine_get();
1173 		dev_set_rx_mode(dev);
1174 		dev_activate(dev);
1175 	}
1176 
1177 	return ret;
1178 }
1179 
1180 /**
1181  *	dev_open	- prepare an interface for use.
1182  *	@dev:	device to open
1183  *
1184  *	Takes a device from down to up state. The device's private open
1185  *	function is invoked and then the multicast lists are loaded. Finally
1186  *	the device is moved into the up state and a %NETDEV_UP message is
1187  *	sent to the netdev notifier chain.
1188  *
1189  *	Calling this function on an active interface is a nop. On a failure
1190  *	a negative errno code is returned.
1191  */
1192 int dev_open(struct net_device *dev)
1193 {
1194 	int ret;
1195 
1196 	if (dev->flags & IFF_UP)
1197 		return 0;
1198 
1199 	ret = __dev_open(dev);
1200 	if (ret < 0)
1201 		return ret;
1202 
1203 	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1204 	call_netdevice_notifiers(NETDEV_UP, dev);
1205 
1206 	return ret;
1207 }
1208 EXPORT_SYMBOL(dev_open);
1209 
1210 static int __dev_close_many(struct list_head *head)
1211 {
1212 	struct net_device *dev;
1213 
1214 	ASSERT_RTNL();
1215 	might_sleep();
1216 
1217 	list_for_each_entry(dev, head, unreg_list) {
1218 		call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1219 
1220 		clear_bit(__LINK_STATE_START, &dev->state);
1221 
1222 		/* Synchronize to scheduled poll. We cannot touch poll list, it
1223 		 * can be even on different cpu. So just clear netif_running().
1224 		 *
1225 		 * dev->stop() will invoke napi_disable() on all of it's
1226 		 * napi_struct instances on this device.
1227 		 */
1228 		smp_mb__after_clear_bit(); /* Commit netif_running(). */
1229 	}
1230 
1231 	dev_deactivate_many(head);
1232 
1233 	list_for_each_entry(dev, head, unreg_list) {
1234 		const struct net_device_ops *ops = dev->netdev_ops;
1235 
1236 		/*
1237 		 *	Call the device specific close. This cannot fail.
1238 		 *	Only if device is UP
1239 		 *
1240 		 *	We allow it to be called even after a DETACH hot-plug
1241 		 *	event.
1242 		 */
1243 		if (ops->ndo_stop)
1244 			ops->ndo_stop(dev);
1245 
1246 		dev->flags &= ~IFF_UP;
1247 		net_dmaengine_put();
1248 	}
1249 
1250 	return 0;
1251 }
1252 
1253 static int __dev_close(struct net_device *dev)
1254 {
1255 	int retval;
1256 	LIST_HEAD(single);
1257 
1258 	list_add(&dev->unreg_list, &single);
1259 	retval = __dev_close_many(&single);
1260 	list_del(&single);
1261 	return retval;
1262 }
1263 
1264 static int dev_close_many(struct list_head *head)
1265 {
1266 	struct net_device *dev, *tmp;
1267 	LIST_HEAD(tmp_list);
1268 
1269 	list_for_each_entry_safe(dev, tmp, head, unreg_list)
1270 		if (!(dev->flags & IFF_UP))
1271 			list_move(&dev->unreg_list, &tmp_list);
1272 
1273 	__dev_close_many(head);
1274 
1275 	list_for_each_entry(dev, head, unreg_list) {
1276 		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1277 		call_netdevice_notifiers(NETDEV_DOWN, dev);
1278 	}
1279 
1280 	/* rollback_registered_many needs the complete original list */
1281 	list_splice(&tmp_list, head);
1282 	return 0;
1283 }
1284 
1285 /**
1286  *	dev_close - shutdown an interface.
1287  *	@dev: device to shutdown
1288  *
1289  *	This function moves an active device into down state. A
1290  *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1291  *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1292  *	chain.
1293  */
1294 int dev_close(struct net_device *dev)
1295 {
1296 	if (dev->flags & IFF_UP) {
1297 		LIST_HEAD(single);
1298 
1299 		list_add(&dev->unreg_list, &single);
1300 		dev_close_many(&single);
1301 		list_del(&single);
1302 	}
1303 	return 0;
1304 }
1305 EXPORT_SYMBOL(dev_close);
1306 
1307 
1308 /**
1309  *	dev_disable_lro - disable Large Receive Offload on a device
1310  *	@dev: device
1311  *
1312  *	Disable Large Receive Offload (LRO) on a net device.  Must be
1313  *	called under RTNL.  This is needed if received packets may be
1314  *	forwarded to another interface.
1315  */
1316 void dev_disable_lro(struct net_device *dev)
1317 {
1318 	/*
1319 	 * If we're trying to disable lro on a vlan device
1320 	 * use the underlying physical device instead
1321 	 */
1322 	if (is_vlan_dev(dev))
1323 		dev = vlan_dev_real_dev(dev);
1324 
1325 	dev->wanted_features &= ~NETIF_F_LRO;
1326 	netdev_update_features(dev);
1327 
1328 	if (unlikely(dev->features & NETIF_F_LRO))
1329 		netdev_WARN(dev, "failed to disable LRO!\n");
1330 }
1331 EXPORT_SYMBOL(dev_disable_lro);
1332 
1333 
1334 static int dev_boot_phase = 1;
1335 
1336 /**
1337  *	register_netdevice_notifier - register a network notifier block
1338  *	@nb: notifier
1339  *
1340  *	Register a notifier to be called when network device events occur.
1341  *	The notifier passed is linked into the kernel structures and must
1342  *	not be reused until it has been unregistered. A negative errno code
1343  *	is returned on a failure.
1344  *
1345  * 	When registered all registration and up events are replayed
1346  *	to the new notifier to allow device to have a race free
1347  *	view of the network device list.
1348  */
1349 
1350 int register_netdevice_notifier(struct notifier_block *nb)
1351 {
1352 	struct net_device *dev;
1353 	struct net_device *last;
1354 	struct net *net;
1355 	int err;
1356 
1357 	rtnl_lock();
1358 	err = raw_notifier_chain_register(&netdev_chain, nb);
1359 	if (err)
1360 		goto unlock;
1361 	if (dev_boot_phase)
1362 		goto unlock;
1363 	for_each_net(net) {
1364 		for_each_netdev(net, dev) {
1365 			err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1366 			err = notifier_to_errno(err);
1367 			if (err)
1368 				goto rollback;
1369 
1370 			if (!(dev->flags & IFF_UP))
1371 				continue;
1372 
1373 			nb->notifier_call(nb, NETDEV_UP, dev);
1374 		}
1375 	}
1376 
1377 unlock:
1378 	rtnl_unlock();
1379 	return err;
1380 
1381 rollback:
1382 	last = dev;
1383 	for_each_net(net) {
1384 		for_each_netdev(net, dev) {
1385 			if (dev == last)
1386 				goto outroll;
1387 
1388 			if (dev->flags & IFF_UP) {
1389 				nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1390 				nb->notifier_call(nb, NETDEV_DOWN, dev);
1391 			}
1392 			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1393 			nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
1394 		}
1395 	}
1396 
1397 outroll:
1398 	raw_notifier_chain_unregister(&netdev_chain, nb);
1399 	goto unlock;
1400 }
1401 EXPORT_SYMBOL(register_netdevice_notifier);
1402 
1403 /**
1404  *	unregister_netdevice_notifier - unregister a network notifier block
1405  *	@nb: notifier
1406  *
1407  *	Unregister a notifier previously registered by
1408  *	register_netdevice_notifier(). The notifier is unlinked into the
1409  *	kernel structures and may then be reused. A negative errno code
1410  *	is returned on a failure.
1411  *
1412  * 	After unregistering unregister and down device events are synthesized
1413  *	for all devices on the device list to the removed notifier to remove
1414  *	the need for special case cleanup code.
1415  */
1416 
1417 int unregister_netdevice_notifier(struct notifier_block *nb)
1418 {
1419 	struct net_device *dev;
1420 	struct net *net;
1421 	int err;
1422 
1423 	rtnl_lock();
1424 	err = raw_notifier_chain_unregister(&netdev_chain, nb);
1425 	if (err)
1426 		goto unlock;
1427 
1428 	for_each_net(net) {
1429 		for_each_netdev(net, dev) {
1430 			if (dev->flags & IFF_UP) {
1431 				nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1432 				nb->notifier_call(nb, NETDEV_DOWN, dev);
1433 			}
1434 			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1435 			nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
1436 		}
1437 	}
1438 unlock:
1439 	rtnl_unlock();
1440 	return err;
1441 }
1442 EXPORT_SYMBOL(unregister_netdevice_notifier);
1443 
1444 /**
1445  *	call_netdevice_notifiers - call all network notifier blocks
1446  *      @val: value passed unmodified to notifier function
1447  *      @dev: net_device pointer passed unmodified to notifier function
1448  *
1449  *	Call all network notifier blocks.  Parameters and return value
1450  *	are as for raw_notifier_call_chain().
1451  */
1452 
1453 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1454 {
1455 	ASSERT_RTNL();
1456 	return raw_notifier_call_chain(&netdev_chain, val, dev);
1457 }
1458 EXPORT_SYMBOL(call_netdevice_notifiers);
1459 
1460 static struct static_key netstamp_needed __read_mostly;
1461 #ifdef HAVE_JUMP_LABEL
1462 /* We are not allowed to call static_key_slow_dec() from irq context
1463  * If net_disable_timestamp() is called from irq context, defer the
1464  * static_key_slow_dec() calls.
1465  */
1466 static atomic_t netstamp_needed_deferred;
1467 #endif
1468 
1469 void net_enable_timestamp(void)
1470 {
1471 #ifdef HAVE_JUMP_LABEL
1472 	int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
1473 
1474 	if (deferred) {
1475 		while (--deferred)
1476 			static_key_slow_dec(&netstamp_needed);
1477 		return;
1478 	}
1479 #endif
1480 	WARN_ON(in_interrupt());
1481 	static_key_slow_inc(&netstamp_needed);
1482 }
1483 EXPORT_SYMBOL(net_enable_timestamp);
1484 
1485 void net_disable_timestamp(void)
1486 {
1487 #ifdef HAVE_JUMP_LABEL
1488 	if (in_interrupt()) {
1489 		atomic_inc(&netstamp_needed_deferred);
1490 		return;
1491 	}
1492 #endif
1493 	static_key_slow_dec(&netstamp_needed);
1494 }
1495 EXPORT_SYMBOL(net_disable_timestamp);
1496 
1497 static inline void net_timestamp_set(struct sk_buff *skb)
1498 {
1499 	skb->tstamp.tv64 = 0;
1500 	if (static_key_false(&netstamp_needed))
1501 		__net_timestamp(skb);
1502 }
1503 
1504 #define net_timestamp_check(COND, SKB)			\
1505 	if (static_key_false(&netstamp_needed)) {		\
1506 		if ((COND) && !(SKB)->tstamp.tv64)	\
1507 			__net_timestamp(SKB);		\
1508 	}						\
1509 
1510 static int net_hwtstamp_validate(struct ifreq *ifr)
1511 {
1512 	struct hwtstamp_config cfg;
1513 	enum hwtstamp_tx_types tx_type;
1514 	enum hwtstamp_rx_filters rx_filter;
1515 	int tx_type_valid = 0;
1516 	int rx_filter_valid = 0;
1517 
1518 	if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
1519 		return -EFAULT;
1520 
1521 	if (cfg.flags) /* reserved for future extensions */
1522 		return -EINVAL;
1523 
1524 	tx_type = cfg.tx_type;
1525 	rx_filter = cfg.rx_filter;
1526 
1527 	switch (tx_type) {
1528 	case HWTSTAMP_TX_OFF:
1529 	case HWTSTAMP_TX_ON:
1530 	case HWTSTAMP_TX_ONESTEP_SYNC:
1531 		tx_type_valid = 1;
1532 		break;
1533 	}
1534 
1535 	switch (rx_filter) {
1536 	case HWTSTAMP_FILTER_NONE:
1537 	case HWTSTAMP_FILTER_ALL:
1538 	case HWTSTAMP_FILTER_SOME:
1539 	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
1540 	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
1541 	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
1542 	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
1543 	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
1544 	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
1545 	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
1546 	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
1547 	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
1548 	case HWTSTAMP_FILTER_PTP_V2_EVENT:
1549 	case HWTSTAMP_FILTER_PTP_V2_SYNC:
1550 	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
1551 		rx_filter_valid = 1;
1552 		break;
1553 	}
1554 
1555 	if (!tx_type_valid || !rx_filter_valid)
1556 		return -ERANGE;
1557 
1558 	return 0;
1559 }
1560 
1561 static inline bool is_skb_forwardable(struct net_device *dev,
1562 				      struct sk_buff *skb)
1563 {
1564 	unsigned int len;
1565 
1566 	if (!(dev->flags & IFF_UP))
1567 		return false;
1568 
1569 	len = dev->mtu + dev->hard_header_len + VLAN_HLEN;
1570 	if (skb->len <= len)
1571 		return true;
1572 
1573 	/* if TSO is enabled, we don't care about the length as the packet
1574 	 * could be forwarded without being segmented before
1575 	 */
1576 	if (skb_is_gso(skb))
1577 		return true;
1578 
1579 	return false;
1580 }
1581 
1582 /**
1583  * dev_forward_skb - loopback an skb to another netif
1584  *
1585  * @dev: destination network device
1586  * @skb: buffer to forward
1587  *
1588  * return values:
1589  *	NET_RX_SUCCESS	(no congestion)
1590  *	NET_RX_DROP     (packet was dropped, but freed)
1591  *
1592  * dev_forward_skb can be used for injecting an skb from the
1593  * start_xmit function of one device into the receive queue
1594  * of another device.
1595  *
1596  * The receiving device may be in another namespace, so
1597  * we have to clear all information in the skb that could
1598  * impact namespace isolation.
1599  */
1600 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1601 {
1602 	if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
1603 		if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
1604 			atomic_long_inc(&dev->rx_dropped);
1605 			kfree_skb(skb);
1606 			return NET_RX_DROP;
1607 		}
1608 	}
1609 
1610 	skb_orphan(skb);
1611 	nf_reset(skb);
1612 
1613 	if (unlikely(!is_skb_forwardable(dev, skb))) {
1614 		atomic_long_inc(&dev->rx_dropped);
1615 		kfree_skb(skb);
1616 		return NET_RX_DROP;
1617 	}
1618 	skb->skb_iif = 0;
1619 	skb->dev = dev;
1620 	skb_dst_drop(skb);
1621 	skb->tstamp.tv64 = 0;
1622 	skb->pkt_type = PACKET_HOST;
1623 	skb->protocol = eth_type_trans(skb, dev);
1624 	skb->mark = 0;
1625 	secpath_reset(skb);
1626 	nf_reset(skb);
1627 	return netif_rx(skb);
1628 }
1629 EXPORT_SYMBOL_GPL(dev_forward_skb);
1630 
1631 static inline int deliver_skb(struct sk_buff *skb,
1632 			      struct packet_type *pt_prev,
1633 			      struct net_device *orig_dev)
1634 {
1635 	if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
1636 		return -ENOMEM;
1637 	atomic_inc(&skb->users);
1638 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1639 }
1640 
1641 /*
1642  *	Support routine. Sends outgoing frames to any network
1643  *	taps currently in use.
1644  */
1645 
1646 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1647 {
1648 	struct packet_type *ptype;
1649 	struct sk_buff *skb2 = NULL;
1650 	struct packet_type *pt_prev = NULL;
1651 
1652 	rcu_read_lock();
1653 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1654 		/* Never send packets back to the socket
1655 		 * they originated from - MvS (miquels@drinkel.ow.org)
1656 		 */
1657 		if ((ptype->dev == dev || !ptype->dev) &&
1658 		    (ptype->af_packet_priv == NULL ||
1659 		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
1660 			if (pt_prev) {
1661 				deliver_skb(skb2, pt_prev, skb->dev);
1662 				pt_prev = ptype;
1663 				continue;
1664 			}
1665 
1666 			skb2 = skb_clone(skb, GFP_ATOMIC);
1667 			if (!skb2)
1668 				break;
1669 
1670 			net_timestamp_set(skb2);
1671 
1672 			/* skb->nh should be correctly
1673 			   set by sender, so that the second statement is
1674 			   just protection against buggy protocols.
1675 			 */
1676 			skb_reset_mac_header(skb2);
1677 
1678 			if (skb_network_header(skb2) < skb2->data ||
1679 			    skb2->network_header > skb2->tail) {
1680 				net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
1681 						     ntohs(skb2->protocol),
1682 						     dev->name);
1683 				skb_reset_network_header(skb2);
1684 			}
1685 
1686 			skb2->transport_header = skb2->network_header;
1687 			skb2->pkt_type = PACKET_OUTGOING;
1688 			pt_prev = ptype;
1689 		}
1690 	}
1691 	if (pt_prev)
1692 		pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
1693 	rcu_read_unlock();
1694 }
1695 
1696 /**
1697  * netif_setup_tc - Handle tc mappings on real_num_tx_queues change
1698  * @dev: Network device
1699  * @txq: number of queues available
1700  *
1701  * If real_num_tx_queues is changed the tc mappings may no longer be
1702  * valid. To resolve this verify the tc mapping remains valid and if
1703  * not NULL the mapping. With no priorities mapping to this
1704  * offset/count pair it will no longer be used. In the worst case TC0
1705  * is invalid nothing can be done so disable priority mappings. If is
1706  * expected that drivers will fix this mapping if they can before
1707  * calling netif_set_real_num_tx_queues.
1708  */
1709 static void netif_setup_tc(struct net_device *dev, unsigned int txq)
1710 {
1711 	int i;
1712 	struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
1713 
1714 	/* If TC0 is invalidated disable TC mapping */
1715 	if (tc->offset + tc->count > txq) {
1716 		pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");
1717 		dev->num_tc = 0;
1718 		return;
1719 	}
1720 
1721 	/* Invalidated prio to tc mappings set to TC0 */
1722 	for (i = 1; i < TC_BITMASK + 1; i++) {
1723 		int q = netdev_get_prio_tc_map(dev, i);
1724 
1725 		tc = &dev->tc_to_txq[q];
1726 		if (tc->offset + tc->count > txq) {
1727 			pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",
1728 				i, q);
1729 			netdev_set_prio_tc_map(dev, i, 0);
1730 		}
1731 	}
1732 }
1733 
1734 /*
1735  * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
1736  * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
1737  */
1738 int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
1739 {
1740 	int rc;
1741 
1742 	if (txq < 1 || txq > dev->num_tx_queues)
1743 		return -EINVAL;
1744 
1745 	if (dev->reg_state == NETREG_REGISTERED ||
1746 	    dev->reg_state == NETREG_UNREGISTERING) {
1747 		ASSERT_RTNL();
1748 
1749 		rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
1750 						  txq);
1751 		if (rc)
1752 			return rc;
1753 
1754 		if (dev->num_tc)
1755 			netif_setup_tc(dev, txq);
1756 
1757 		if (txq < dev->real_num_tx_queues)
1758 			qdisc_reset_all_tx_gt(dev, txq);
1759 	}
1760 
1761 	dev->real_num_tx_queues = txq;
1762 	return 0;
1763 }
1764 EXPORT_SYMBOL(netif_set_real_num_tx_queues);
1765 
1766 #ifdef CONFIG_RPS
1767 /**
1768  *	netif_set_real_num_rx_queues - set actual number of RX queues used
1769  *	@dev: Network device
1770  *	@rxq: Actual number of RX queues
1771  *
1772  *	This must be called either with the rtnl_lock held or before
1773  *	registration of the net device.  Returns 0 on success, or a
1774  *	negative error code.  If called before registration, it always
1775  *	succeeds.
1776  */
1777 int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
1778 {
1779 	int rc;
1780 
1781 	if (rxq < 1 || rxq > dev->num_rx_queues)
1782 		return -EINVAL;
1783 
1784 	if (dev->reg_state == NETREG_REGISTERED) {
1785 		ASSERT_RTNL();
1786 
1787 		rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
1788 						  rxq);
1789 		if (rc)
1790 			return rc;
1791 	}
1792 
1793 	dev->real_num_rx_queues = rxq;
1794 	return 0;
1795 }
1796 EXPORT_SYMBOL(netif_set_real_num_rx_queues);
1797 #endif
1798 
1799 /**
1800  * netif_get_num_default_rss_queues - default number of RSS queues
1801  *
1802  * This routine should set an upper limit on the number of RSS queues
1803  * used by default by multiqueue devices.
1804  */
1805 int netif_get_num_default_rss_queues(void)
1806 {
1807 	return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
1808 }
1809 EXPORT_SYMBOL(netif_get_num_default_rss_queues);
1810 
1811 static inline void __netif_reschedule(struct Qdisc *q)
1812 {
1813 	struct softnet_data *sd;
1814 	unsigned long flags;
1815 
1816 	local_irq_save(flags);
1817 	sd = &__get_cpu_var(softnet_data);
1818 	q->next_sched = NULL;
1819 	*sd->output_queue_tailp = q;
1820 	sd->output_queue_tailp = &q->next_sched;
1821 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
1822 	local_irq_restore(flags);
1823 }
1824 
1825 void __netif_schedule(struct Qdisc *q)
1826 {
1827 	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1828 		__netif_reschedule(q);
1829 }
1830 EXPORT_SYMBOL(__netif_schedule);
1831 
1832 void dev_kfree_skb_irq(struct sk_buff *skb)
1833 {
1834 	if (atomic_dec_and_test(&skb->users)) {
1835 		struct softnet_data *sd;
1836 		unsigned long flags;
1837 
1838 		local_irq_save(flags);
1839 		sd = &__get_cpu_var(softnet_data);
1840 		skb->next = sd->completion_queue;
1841 		sd->completion_queue = skb;
1842 		raise_softirq_irqoff(NET_TX_SOFTIRQ);
1843 		local_irq_restore(flags);
1844 	}
1845 }
1846 EXPORT_SYMBOL(dev_kfree_skb_irq);
1847 
1848 void dev_kfree_skb_any(struct sk_buff *skb)
1849 {
1850 	if (in_irq() || irqs_disabled())
1851 		dev_kfree_skb_irq(skb);
1852 	else
1853 		dev_kfree_skb(skb);
1854 }
1855 EXPORT_SYMBOL(dev_kfree_skb_any);
1856 
1857 
1858 /**
1859  * netif_device_detach - mark device as removed
1860  * @dev: network device
1861  *
1862  * Mark device as removed from system and therefore no longer available.
1863  */
1864 void netif_device_detach(struct net_device *dev)
1865 {
1866 	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1867 	    netif_running(dev)) {
1868 		netif_tx_stop_all_queues(dev);
1869 	}
1870 }
1871 EXPORT_SYMBOL(netif_device_detach);
1872 
1873 /**
1874  * netif_device_attach - mark device as attached
1875  * @dev: network device
1876  *
1877  * Mark device as attached from system and restart if needed.
1878  */
1879 void netif_device_attach(struct net_device *dev)
1880 {
1881 	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1882 	    netif_running(dev)) {
1883 		netif_tx_wake_all_queues(dev);
1884 		__netdev_watchdog_up(dev);
1885 	}
1886 }
1887 EXPORT_SYMBOL(netif_device_attach);
1888 
1889 static void skb_warn_bad_offload(const struct sk_buff *skb)
1890 {
1891 	static const netdev_features_t null_features = 0;
1892 	struct net_device *dev = skb->dev;
1893 	const char *driver = "";
1894 
1895 	if (dev && dev->dev.parent)
1896 		driver = dev_driver_string(dev->dev.parent);
1897 
1898 	WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "
1899 	     "gso_type=%d ip_summed=%d\n",
1900 	     driver, dev ? &dev->features : &null_features,
1901 	     skb->sk ? &skb->sk->sk_route_caps : &null_features,
1902 	     skb->len, skb->data_len, skb_shinfo(skb)->gso_size,
1903 	     skb_shinfo(skb)->gso_type, skb->ip_summed);
1904 }
1905 
1906 /*
1907  * Invalidate hardware checksum when packet is to be mangled, and
1908  * complete checksum manually on outgoing path.
1909  */
1910 int skb_checksum_help(struct sk_buff *skb)
1911 {
1912 	__wsum csum;
1913 	int ret = 0, offset;
1914 
1915 	if (skb->ip_summed == CHECKSUM_COMPLETE)
1916 		goto out_set_summed;
1917 
1918 	if (unlikely(skb_shinfo(skb)->gso_size)) {
1919 		skb_warn_bad_offload(skb);
1920 		return -EINVAL;
1921 	}
1922 
1923 	offset = skb_checksum_start_offset(skb);
1924 	BUG_ON(offset >= skb_headlen(skb));
1925 	csum = skb_checksum(skb, offset, skb->len - offset, 0);
1926 
1927 	offset += skb->csum_offset;
1928 	BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1929 
1930 	if (skb_cloned(skb) &&
1931 	    !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1932 		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1933 		if (ret)
1934 			goto out;
1935 	}
1936 
1937 	*(__sum16 *)(skb->data + offset) = csum_fold(csum);
1938 out_set_summed:
1939 	skb->ip_summed = CHECKSUM_NONE;
1940 out:
1941 	return ret;
1942 }
1943 EXPORT_SYMBOL(skb_checksum_help);
1944 
1945 /**
1946  *	skb_gso_segment - Perform segmentation on skb.
1947  *	@skb: buffer to segment
1948  *	@features: features for the output path (see dev->features)
1949  *
1950  *	This function segments the given skb and returns a list of segments.
1951  *
1952  *	It may return NULL if the skb requires no segmentation.  This is
1953  *	only possible when GSO is used for verifying header integrity.
1954  */
1955 struct sk_buff *skb_gso_segment(struct sk_buff *skb,
1956 	netdev_features_t features)
1957 {
1958 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1959 	struct packet_type *ptype;
1960 	__be16 type = skb->protocol;
1961 	int vlan_depth = ETH_HLEN;
1962 	int err;
1963 
1964 	while (type == htons(ETH_P_8021Q)) {
1965 		struct vlan_hdr *vh;
1966 
1967 		if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
1968 			return ERR_PTR(-EINVAL);
1969 
1970 		vh = (struct vlan_hdr *)(skb->data + vlan_depth);
1971 		type = vh->h_vlan_encapsulated_proto;
1972 		vlan_depth += VLAN_HLEN;
1973 	}
1974 
1975 	skb_reset_mac_header(skb);
1976 	skb->mac_len = skb->network_header - skb->mac_header;
1977 	__skb_pull(skb, skb->mac_len);
1978 
1979 	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1980 		skb_warn_bad_offload(skb);
1981 
1982 		if (skb_header_cloned(skb) &&
1983 		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1984 			return ERR_PTR(err);
1985 	}
1986 
1987 	rcu_read_lock();
1988 	list_for_each_entry_rcu(ptype,
1989 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
1990 		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1991 			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1992 				err = ptype->gso_send_check(skb);
1993 				segs = ERR_PTR(err);
1994 				if (err || skb_gso_ok(skb, features))
1995 					break;
1996 				__skb_push(skb, (skb->data -
1997 						 skb_network_header(skb)));
1998 			}
1999 			segs = ptype->gso_segment(skb, features);
2000 			break;
2001 		}
2002 	}
2003 	rcu_read_unlock();
2004 
2005 	__skb_push(skb, skb->data - skb_mac_header(skb));
2006 
2007 	return segs;
2008 }
2009 EXPORT_SYMBOL(skb_gso_segment);
2010 
2011 /* Take action when hardware reception checksum errors are detected. */
2012 #ifdef CONFIG_BUG
2013 void netdev_rx_csum_fault(struct net_device *dev)
2014 {
2015 	if (net_ratelimit()) {
2016 		pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
2017 		dump_stack();
2018 	}
2019 }
2020 EXPORT_SYMBOL(netdev_rx_csum_fault);
2021 #endif
2022 
2023 /* Actually, we should eliminate this check as soon as we know, that:
2024  * 1. IOMMU is present and allows to map all the memory.
2025  * 2. No high memory really exists on this machine.
2026  */
2027 
2028 static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
2029 {
2030 #ifdef CONFIG_HIGHMEM
2031 	int i;
2032 	if (!(dev->features & NETIF_F_HIGHDMA)) {
2033 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2034 			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2035 			if (PageHighMem(skb_frag_page(frag)))
2036 				return 1;
2037 		}
2038 	}
2039 
2040 	if (PCI_DMA_BUS_IS_PHYS) {
2041 		struct device *pdev = dev->dev.parent;
2042 
2043 		if (!pdev)
2044 			return 0;
2045 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2046 			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2047 			dma_addr_t addr = page_to_phys(skb_frag_page(frag));
2048 			if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
2049 				return 1;
2050 		}
2051 	}
2052 #endif
2053 	return 0;
2054 }
2055 
2056 struct dev_gso_cb {
2057 	void (*destructor)(struct sk_buff *skb);
2058 };
2059 
2060 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
2061 
2062 static void dev_gso_skb_destructor(struct sk_buff *skb)
2063 {
2064 	struct dev_gso_cb *cb;
2065 
2066 	do {
2067 		struct sk_buff *nskb = skb->next;
2068 
2069 		skb->next = nskb->next;
2070 		nskb->next = NULL;
2071 		kfree_skb(nskb);
2072 	} while (skb->next);
2073 
2074 	cb = DEV_GSO_CB(skb);
2075 	if (cb->destructor)
2076 		cb->destructor(skb);
2077 }
2078 
2079 /**
2080  *	dev_gso_segment - Perform emulated hardware segmentation on skb.
2081  *	@skb: buffer to segment
2082  *	@features: device features as applicable to this skb
2083  *
2084  *	This function segments the given skb and stores the list of segments
2085  *	in skb->next.
2086  */
2087 static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
2088 {
2089 	struct sk_buff *segs;
2090 
2091 	segs = skb_gso_segment(skb, features);
2092 
2093 	/* Verifying header integrity only. */
2094 	if (!segs)
2095 		return 0;
2096 
2097 	if (IS_ERR(segs))
2098 		return PTR_ERR(segs);
2099 
2100 	skb->next = segs;
2101 	DEV_GSO_CB(skb)->destructor = skb->destructor;
2102 	skb->destructor = dev_gso_skb_destructor;
2103 
2104 	return 0;
2105 }
2106 
2107 static bool can_checksum_protocol(netdev_features_t features, __be16 protocol)
2108 {
2109 	return ((features & NETIF_F_GEN_CSUM) ||
2110 		((features & NETIF_F_V4_CSUM) &&
2111 		 protocol == htons(ETH_P_IP)) ||
2112 		((features & NETIF_F_V6_CSUM) &&
2113 		 protocol == htons(ETH_P_IPV6)) ||
2114 		((features & NETIF_F_FCOE_CRC) &&
2115 		 protocol == htons(ETH_P_FCOE)));
2116 }
2117 
2118 static netdev_features_t harmonize_features(struct sk_buff *skb,
2119 	__be16 protocol, netdev_features_t features)
2120 {
2121 	if (!can_checksum_protocol(features, protocol)) {
2122 		features &= ~NETIF_F_ALL_CSUM;
2123 		features &= ~NETIF_F_SG;
2124 	} else if (illegal_highdma(skb->dev, skb)) {
2125 		features &= ~NETIF_F_SG;
2126 	}
2127 
2128 	return features;
2129 }
2130 
2131 netdev_features_t netif_skb_features(struct sk_buff *skb)
2132 {
2133 	__be16 protocol = skb->protocol;
2134 	netdev_features_t features = skb->dev->features;
2135 
2136 	if (protocol == htons(ETH_P_8021Q)) {
2137 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
2138 		protocol = veh->h_vlan_encapsulated_proto;
2139 	} else if (!vlan_tx_tag_present(skb)) {
2140 		return harmonize_features(skb, protocol, features);
2141 	}
2142 
2143 	features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_TX);
2144 
2145 	if (protocol != htons(ETH_P_8021Q)) {
2146 		return harmonize_features(skb, protocol, features);
2147 	} else {
2148 		features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
2149 				NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_TX;
2150 		return harmonize_features(skb, protocol, features);
2151 	}
2152 }
2153 EXPORT_SYMBOL(netif_skb_features);
2154 
2155 /*
2156  * Returns true if either:
2157  *	1. skb has frag_list and the device doesn't support FRAGLIST, or
2158  *	2. skb is fragmented and the device does not support SG, or if
2159  *	   at least one of fragments is in highmem and device does not
2160  *	   support DMA from it.
2161  */
2162 static inline int skb_needs_linearize(struct sk_buff *skb,
2163 				      int features)
2164 {
2165 	return skb_is_nonlinear(skb) &&
2166 			((skb_has_frag_list(skb) &&
2167 				!(features & NETIF_F_FRAGLIST)) ||
2168 			(skb_shinfo(skb)->nr_frags &&
2169 				!(features & NETIF_F_SG)));
2170 }
2171 
2172 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2173 			struct netdev_queue *txq)
2174 {
2175 	const struct net_device_ops *ops = dev->netdev_ops;
2176 	int rc = NETDEV_TX_OK;
2177 	unsigned int skb_len;
2178 
2179 	if (likely(!skb->next)) {
2180 		netdev_features_t features;
2181 
2182 		/*
2183 		 * If device doesn't need skb->dst, release it right now while
2184 		 * its hot in this cpu cache
2185 		 */
2186 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
2187 			skb_dst_drop(skb);
2188 
2189 		if (!list_empty(&ptype_all))
2190 			dev_queue_xmit_nit(skb, dev);
2191 
2192 		features = netif_skb_features(skb);
2193 
2194 		if (vlan_tx_tag_present(skb) &&
2195 		    !(features & NETIF_F_HW_VLAN_TX)) {
2196 			skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
2197 			if (unlikely(!skb))
2198 				goto out;
2199 
2200 			skb->vlan_tci = 0;
2201 		}
2202 
2203 		if (netif_needs_gso(skb, features)) {
2204 			if (unlikely(dev_gso_segment(skb, features)))
2205 				goto out_kfree_skb;
2206 			if (skb->next)
2207 				goto gso;
2208 		} else {
2209 			if (skb_needs_linearize(skb, features) &&
2210 			    __skb_linearize(skb))
2211 				goto out_kfree_skb;
2212 
2213 			/* If packet is not checksummed and device does not
2214 			 * support checksumming for this protocol, complete
2215 			 * checksumming here.
2216 			 */
2217 			if (skb->ip_summed == CHECKSUM_PARTIAL) {
2218 				skb_set_transport_header(skb,
2219 					skb_checksum_start_offset(skb));
2220 				if (!(features & NETIF_F_ALL_CSUM) &&
2221 				     skb_checksum_help(skb))
2222 					goto out_kfree_skb;
2223 			}
2224 		}
2225 
2226 		skb_len = skb->len;
2227 		rc = ops->ndo_start_xmit(skb, dev);
2228 		trace_net_dev_xmit(skb, rc, dev, skb_len);
2229 		if (rc == NETDEV_TX_OK)
2230 			txq_trans_update(txq);
2231 		return rc;
2232 	}
2233 
2234 gso:
2235 	do {
2236 		struct sk_buff *nskb = skb->next;
2237 
2238 		skb->next = nskb->next;
2239 		nskb->next = NULL;
2240 
2241 		/*
2242 		 * If device doesn't need nskb->dst, release it right now while
2243 		 * its hot in this cpu cache
2244 		 */
2245 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
2246 			skb_dst_drop(nskb);
2247 
2248 		skb_len = nskb->len;
2249 		rc = ops->ndo_start_xmit(nskb, dev);
2250 		trace_net_dev_xmit(nskb, rc, dev, skb_len);
2251 		if (unlikely(rc != NETDEV_TX_OK)) {
2252 			if (rc & ~NETDEV_TX_MASK)
2253 				goto out_kfree_gso_skb;
2254 			nskb->next = skb->next;
2255 			skb->next = nskb;
2256 			return rc;
2257 		}
2258 		txq_trans_update(txq);
2259 		if (unlikely(netif_xmit_stopped(txq) && skb->next))
2260 			return NETDEV_TX_BUSY;
2261 	} while (skb->next);
2262 
2263 out_kfree_gso_skb:
2264 	if (likely(skb->next == NULL))
2265 		skb->destructor = DEV_GSO_CB(skb)->destructor;
2266 out_kfree_skb:
2267 	kfree_skb(skb);
2268 out:
2269 	return rc;
2270 }
2271 
2272 static u32 hashrnd __read_mostly;
2273 
2274 /*
2275  * Returns a Tx hash based on the given packet descriptor a Tx queues' number
2276  * to be used as a distribution range.
2277  */
2278 u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
2279 		  unsigned int num_tx_queues)
2280 {
2281 	u32 hash;
2282 	u16 qoffset = 0;
2283 	u16 qcount = num_tx_queues;
2284 
2285 	if (skb_rx_queue_recorded(skb)) {
2286 		hash = skb_get_rx_queue(skb);
2287 		while (unlikely(hash >= num_tx_queues))
2288 			hash -= num_tx_queues;
2289 		return hash;
2290 	}
2291 
2292 	if (dev->num_tc) {
2293 		u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
2294 		qoffset = dev->tc_to_txq[tc].offset;
2295 		qcount = dev->tc_to_txq[tc].count;
2296 	}
2297 
2298 	if (skb->sk && skb->sk->sk_hash)
2299 		hash = skb->sk->sk_hash;
2300 	else
2301 		hash = (__force u16) skb->protocol;
2302 	hash = jhash_1word(hash, hashrnd);
2303 
2304 	return (u16) (((u64) hash * qcount) >> 32) + qoffset;
2305 }
2306 EXPORT_SYMBOL(__skb_tx_hash);
2307 
2308 static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
2309 {
2310 	if (unlikely(queue_index >= dev->real_num_tx_queues)) {
2311 		net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n",
2312 				     dev->name, queue_index,
2313 				     dev->real_num_tx_queues);
2314 		return 0;
2315 	}
2316 	return queue_index;
2317 }
2318 
2319 static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
2320 {
2321 #ifdef CONFIG_XPS
2322 	struct xps_dev_maps *dev_maps;
2323 	struct xps_map *map;
2324 	int queue_index = -1;
2325 
2326 	rcu_read_lock();
2327 	dev_maps = rcu_dereference(dev->xps_maps);
2328 	if (dev_maps) {
2329 		map = rcu_dereference(
2330 		    dev_maps->cpu_map[raw_smp_processor_id()]);
2331 		if (map) {
2332 			if (map->len == 1)
2333 				queue_index = map->queues[0];
2334 			else {
2335 				u32 hash;
2336 				if (skb->sk && skb->sk->sk_hash)
2337 					hash = skb->sk->sk_hash;
2338 				else
2339 					hash = (__force u16) skb->protocol ^
2340 					    skb->rxhash;
2341 				hash = jhash_1word(hash, hashrnd);
2342 				queue_index = map->queues[
2343 				    ((u64)hash * map->len) >> 32];
2344 			}
2345 			if (unlikely(queue_index >= dev->real_num_tx_queues))
2346 				queue_index = -1;
2347 		}
2348 	}
2349 	rcu_read_unlock();
2350 
2351 	return queue_index;
2352 #else
2353 	return -1;
2354 #endif
2355 }
2356 
2357 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
2358 					struct sk_buff *skb)
2359 {
2360 	int queue_index;
2361 	const struct net_device_ops *ops = dev->netdev_ops;
2362 
2363 	if (dev->real_num_tx_queues == 1)
2364 		queue_index = 0;
2365 	else if (ops->ndo_select_queue) {
2366 		queue_index = ops->ndo_select_queue(dev, skb);
2367 		queue_index = dev_cap_txqueue(dev, queue_index);
2368 	} else {
2369 		struct sock *sk = skb->sk;
2370 		queue_index = sk_tx_queue_get(sk);
2371 
2372 		if (queue_index < 0 || skb->ooo_okay ||
2373 		    queue_index >= dev->real_num_tx_queues) {
2374 			int old_index = queue_index;
2375 
2376 			queue_index = get_xps_queue(dev, skb);
2377 			if (queue_index < 0)
2378 				queue_index = skb_tx_hash(dev, skb);
2379 
2380 			if (queue_index != old_index && sk) {
2381 				struct dst_entry *dst =
2382 				    rcu_dereference_check(sk->sk_dst_cache, 1);
2383 
2384 				if (dst && skb_dst(skb) == dst)
2385 					sk_tx_queue_set(sk, queue_index);
2386 			}
2387 		}
2388 	}
2389 
2390 	skb_set_queue_mapping(skb, queue_index);
2391 	return netdev_get_tx_queue(dev, queue_index);
2392 }
2393 
2394 static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2395 				 struct net_device *dev,
2396 				 struct netdev_queue *txq)
2397 {
2398 	spinlock_t *root_lock = qdisc_lock(q);
2399 	bool contended;
2400 	int rc;
2401 
2402 	qdisc_skb_cb(skb)->pkt_len = skb->len;
2403 	qdisc_calculate_pkt_len(skb, q);
2404 	/*
2405 	 * Heuristic to force contended enqueues to serialize on a
2406 	 * separate lock before trying to get qdisc main lock.
2407 	 * This permits __QDISC_STATE_RUNNING owner to get the lock more often
2408 	 * and dequeue packets faster.
2409 	 */
2410 	contended = qdisc_is_running(q);
2411 	if (unlikely(contended))
2412 		spin_lock(&q->busylock);
2413 
2414 	spin_lock(root_lock);
2415 	if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
2416 		kfree_skb(skb);
2417 		rc = NET_XMIT_DROP;
2418 	} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
2419 		   qdisc_run_begin(q)) {
2420 		/*
2421 		 * This is a work-conserving queue; there are no old skbs
2422 		 * waiting to be sent out; and the qdisc is not running -
2423 		 * xmit the skb directly.
2424 		 */
2425 		if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
2426 			skb_dst_force(skb);
2427 
2428 		qdisc_bstats_update(q, skb);
2429 
2430 		if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
2431 			if (unlikely(contended)) {
2432 				spin_unlock(&q->busylock);
2433 				contended = false;
2434 			}
2435 			__qdisc_run(q);
2436 		} else
2437 			qdisc_run_end(q);
2438 
2439 		rc = NET_XMIT_SUCCESS;
2440 	} else {
2441 		skb_dst_force(skb);
2442 		rc = q->enqueue(skb, q) & NET_XMIT_MASK;
2443 		if (qdisc_run_begin(q)) {
2444 			if (unlikely(contended)) {
2445 				spin_unlock(&q->busylock);
2446 				contended = false;
2447 			}
2448 			__qdisc_run(q);
2449 		}
2450 	}
2451 	spin_unlock(root_lock);
2452 	if (unlikely(contended))
2453 		spin_unlock(&q->busylock);
2454 	return rc;
2455 }
2456 
2457 #if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
2458 static void skb_update_prio(struct sk_buff *skb)
2459 {
2460 	struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
2461 
2462 	if (!skb->priority && skb->sk && map) {
2463 		unsigned int prioidx = skb->sk->sk_cgrp_prioidx;
2464 
2465 		if (prioidx < map->priomap_len)
2466 			skb->priority = map->priomap[prioidx];
2467 	}
2468 }
2469 #else
2470 #define skb_update_prio(skb)
2471 #endif
2472 
2473 static DEFINE_PER_CPU(int, xmit_recursion);
2474 #define RECURSION_LIMIT 10
2475 
2476 /**
2477  *	dev_loopback_xmit - loop back @skb
2478  *	@skb: buffer to transmit
2479  */
2480 int dev_loopback_xmit(struct sk_buff *skb)
2481 {
2482 	skb_reset_mac_header(skb);
2483 	__skb_pull(skb, skb_network_offset(skb));
2484 	skb->pkt_type = PACKET_LOOPBACK;
2485 	skb->ip_summed = CHECKSUM_UNNECESSARY;
2486 	WARN_ON(!skb_dst(skb));
2487 	skb_dst_force(skb);
2488 	netif_rx_ni(skb);
2489 	return 0;
2490 }
2491 EXPORT_SYMBOL(dev_loopback_xmit);
2492 
2493 /**
2494  *	dev_queue_xmit - transmit a buffer
2495  *	@skb: buffer to transmit
2496  *
2497  *	Queue a buffer for transmission to a network device. The caller must
2498  *	have set the device and priority and built the buffer before calling
2499  *	this function. The function can be called from an interrupt.
2500  *
2501  *	A negative errno code is returned on a failure. A success does not
2502  *	guarantee the frame will be transmitted as it may be dropped due
2503  *	to congestion or traffic shaping.
2504  *
2505  * -----------------------------------------------------------------------------------
2506  *      I notice this method can also return errors from the queue disciplines,
2507  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
2508  *      be positive.
2509  *
2510  *      Regardless of the return value, the skb is consumed, so it is currently
2511  *      difficult to retry a send to this method.  (You can bump the ref count
2512  *      before sending to hold a reference for retry if you are careful.)
2513  *
2514  *      When calling this method, interrupts MUST be enabled.  This is because
2515  *      the BH enable code must have IRQs enabled so that it will not deadlock.
2516  *          --BLG
2517  */
2518 int dev_queue_xmit(struct sk_buff *skb)
2519 {
2520 	struct net_device *dev = skb->dev;
2521 	struct netdev_queue *txq;
2522 	struct Qdisc *q;
2523 	int rc = -ENOMEM;
2524 
2525 	/* Disable soft irqs for various locks below. Also
2526 	 * stops preemption for RCU.
2527 	 */
2528 	rcu_read_lock_bh();
2529 
2530 	skb_update_prio(skb);
2531 
2532 	txq = dev_pick_tx(dev, skb);
2533 	q = rcu_dereference_bh(txq->qdisc);
2534 
2535 #ifdef CONFIG_NET_CLS_ACT
2536 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
2537 #endif
2538 	trace_net_dev_queue(skb);
2539 	if (q->enqueue) {
2540 		rc = __dev_xmit_skb(skb, q, dev, txq);
2541 		goto out;
2542 	}
2543 
2544 	/* The device has no queue. Common case for software devices:
2545 	   loopback, all the sorts of tunnels...
2546 
2547 	   Really, it is unlikely that netif_tx_lock protection is necessary
2548 	   here.  (f.e. loopback and IP tunnels are clean ignoring statistics
2549 	   counters.)
2550 	   However, it is possible, that they rely on protection
2551 	   made by us here.
2552 
2553 	   Check this and shot the lock. It is not prone from deadlocks.
2554 	   Either shot noqueue qdisc, it is even simpler 8)
2555 	 */
2556 	if (dev->flags & IFF_UP) {
2557 		int cpu = smp_processor_id(); /* ok because BHs are off */
2558 
2559 		if (txq->xmit_lock_owner != cpu) {
2560 
2561 			if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
2562 				goto recursion_alert;
2563 
2564 			HARD_TX_LOCK(dev, txq, cpu);
2565 
2566 			if (!netif_xmit_stopped(txq)) {
2567 				__this_cpu_inc(xmit_recursion);
2568 				rc = dev_hard_start_xmit(skb, dev, txq);
2569 				__this_cpu_dec(xmit_recursion);
2570 				if (dev_xmit_complete(rc)) {
2571 					HARD_TX_UNLOCK(dev, txq);
2572 					goto out;
2573 				}
2574 			}
2575 			HARD_TX_UNLOCK(dev, txq);
2576 			net_crit_ratelimited("Virtual device %s asks to queue packet!\n",
2577 					     dev->name);
2578 		} else {
2579 			/* Recursion is detected! It is possible,
2580 			 * unfortunately
2581 			 */
2582 recursion_alert:
2583 			net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",
2584 					     dev->name);
2585 		}
2586 	}
2587 
2588 	rc = -ENETDOWN;
2589 	rcu_read_unlock_bh();
2590 
2591 	kfree_skb(skb);
2592 	return rc;
2593 out:
2594 	rcu_read_unlock_bh();
2595 	return rc;
2596 }
2597 EXPORT_SYMBOL(dev_queue_xmit);
2598 
2599 
2600 /*=======================================================================
2601 			Receiver routines
2602   =======================================================================*/
2603 
2604 int netdev_max_backlog __read_mostly = 1000;
2605 int netdev_tstamp_prequeue __read_mostly = 1;
2606 int netdev_budget __read_mostly = 300;
2607 int weight_p __read_mostly = 64;            /* old backlog weight */
2608 
2609 /* Called with irq disabled */
2610 static inline void ____napi_schedule(struct softnet_data *sd,
2611 				     struct napi_struct *napi)
2612 {
2613 	list_add_tail(&napi->poll_list, &sd->poll_list);
2614 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2615 }
2616 
2617 /*
2618  * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
2619  * and src/dst port numbers.  Sets rxhash in skb to non-zero hash value
2620  * on success, zero indicates no valid hash.  Also, sets l4_rxhash in skb
2621  * if hash is a canonical 4-tuple hash over transport ports.
2622  */
2623 void __skb_get_rxhash(struct sk_buff *skb)
2624 {
2625 	struct flow_keys keys;
2626 	u32 hash;
2627 
2628 	if (!skb_flow_dissect(skb, &keys))
2629 		return;
2630 
2631 	if (keys.ports) {
2632 		if ((__force u16)keys.port16[1] < (__force u16)keys.port16[0])
2633 			swap(keys.port16[0], keys.port16[1]);
2634 		skb->l4_rxhash = 1;
2635 	}
2636 
2637 	/* get a consistent hash (same value on both flow directions) */
2638 	if ((__force u32)keys.dst < (__force u32)keys.src)
2639 		swap(keys.dst, keys.src);
2640 
2641 	hash = jhash_3words((__force u32)keys.dst,
2642 			    (__force u32)keys.src,
2643 			    (__force u32)keys.ports, hashrnd);
2644 	if (!hash)
2645 		hash = 1;
2646 
2647 	skb->rxhash = hash;
2648 }
2649 EXPORT_SYMBOL(__skb_get_rxhash);
2650 
2651 #ifdef CONFIG_RPS
2652 
2653 /* One global table that all flow-based protocols share. */
2654 struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
2655 EXPORT_SYMBOL(rps_sock_flow_table);
2656 
2657 struct static_key rps_needed __read_mostly;
2658 
2659 static struct rps_dev_flow *
2660 set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2661 	    struct rps_dev_flow *rflow, u16 next_cpu)
2662 {
2663 	if (next_cpu != RPS_NO_CPU) {
2664 #ifdef CONFIG_RFS_ACCEL
2665 		struct netdev_rx_queue *rxqueue;
2666 		struct rps_dev_flow_table *flow_table;
2667 		struct rps_dev_flow *old_rflow;
2668 		u32 flow_id;
2669 		u16 rxq_index;
2670 		int rc;
2671 
2672 		/* Should we steer this flow to a different hardware queue? */
2673 		if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
2674 		    !(dev->features & NETIF_F_NTUPLE))
2675 			goto out;
2676 		rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
2677 		if (rxq_index == skb_get_rx_queue(skb))
2678 			goto out;
2679 
2680 		rxqueue = dev->_rx + rxq_index;
2681 		flow_table = rcu_dereference(rxqueue->rps_flow_table);
2682 		if (!flow_table)
2683 			goto out;
2684 		flow_id = skb->rxhash & flow_table->mask;
2685 		rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
2686 							rxq_index, flow_id);
2687 		if (rc < 0)
2688 			goto out;
2689 		old_rflow = rflow;
2690 		rflow = &flow_table->flows[flow_id];
2691 		rflow->filter = rc;
2692 		if (old_rflow->filter == rflow->filter)
2693 			old_rflow->filter = RPS_NO_FILTER;
2694 	out:
2695 #endif
2696 		rflow->last_qtail =
2697 			per_cpu(softnet_data, next_cpu).input_queue_head;
2698 	}
2699 
2700 	rflow->cpu = next_cpu;
2701 	return rflow;
2702 }
2703 
2704 /*
2705  * get_rps_cpu is called from netif_receive_skb and returns the target
2706  * CPU from the RPS map of the receiving queue for a given skb.
2707  * rcu_read_lock must be held on entry.
2708  */
2709 static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2710 		       struct rps_dev_flow **rflowp)
2711 {
2712 	struct netdev_rx_queue *rxqueue;
2713 	struct rps_map *map;
2714 	struct rps_dev_flow_table *flow_table;
2715 	struct rps_sock_flow_table *sock_flow_table;
2716 	int cpu = -1;
2717 	u16 tcpu;
2718 
2719 	if (skb_rx_queue_recorded(skb)) {
2720 		u16 index = skb_get_rx_queue(skb);
2721 		if (unlikely(index >= dev->real_num_rx_queues)) {
2722 			WARN_ONCE(dev->real_num_rx_queues > 1,
2723 				  "%s received packet on queue %u, but number "
2724 				  "of RX queues is %u\n",
2725 				  dev->name, index, dev->real_num_rx_queues);
2726 			goto done;
2727 		}
2728 		rxqueue = dev->_rx + index;
2729 	} else
2730 		rxqueue = dev->_rx;
2731 
2732 	map = rcu_dereference(rxqueue->rps_map);
2733 	if (map) {
2734 		if (map->len == 1 &&
2735 		    !rcu_access_pointer(rxqueue->rps_flow_table)) {
2736 			tcpu = map->cpus[0];
2737 			if (cpu_online(tcpu))
2738 				cpu = tcpu;
2739 			goto done;
2740 		}
2741 	} else if (!rcu_access_pointer(rxqueue->rps_flow_table)) {
2742 		goto done;
2743 	}
2744 
2745 	skb_reset_network_header(skb);
2746 	if (!skb_get_rxhash(skb))
2747 		goto done;
2748 
2749 	flow_table = rcu_dereference(rxqueue->rps_flow_table);
2750 	sock_flow_table = rcu_dereference(rps_sock_flow_table);
2751 	if (flow_table && sock_flow_table) {
2752 		u16 next_cpu;
2753 		struct rps_dev_flow *rflow;
2754 
2755 		rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
2756 		tcpu = rflow->cpu;
2757 
2758 		next_cpu = sock_flow_table->ents[skb->rxhash &
2759 		    sock_flow_table->mask];
2760 
2761 		/*
2762 		 * If the desired CPU (where last recvmsg was done) is
2763 		 * different from current CPU (one in the rx-queue flow
2764 		 * table entry), switch if one of the following holds:
2765 		 *   - Current CPU is unset (equal to RPS_NO_CPU).
2766 		 *   - Current CPU is offline.
2767 		 *   - The current CPU's queue tail has advanced beyond the
2768 		 *     last packet that was enqueued using this table entry.
2769 		 *     This guarantees that all previous packets for the flow
2770 		 *     have been dequeued, thus preserving in order delivery.
2771 		 */
2772 		if (unlikely(tcpu != next_cpu) &&
2773 		    (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
2774 		     ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
2775 		      rflow->last_qtail)) >= 0))
2776 			rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
2777 
2778 		if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
2779 			*rflowp = rflow;
2780 			cpu = tcpu;
2781 			goto done;
2782 		}
2783 	}
2784 
2785 	if (map) {
2786 		tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
2787 
2788 		if (cpu_online(tcpu)) {
2789 			cpu = tcpu;
2790 			goto done;
2791 		}
2792 	}
2793 
2794 done:
2795 	return cpu;
2796 }
2797 
2798 #ifdef CONFIG_RFS_ACCEL
2799 
2800 /**
2801  * rps_may_expire_flow - check whether an RFS hardware filter may be removed
2802  * @dev: Device on which the filter was set
2803  * @rxq_index: RX queue index
2804  * @flow_id: Flow ID passed to ndo_rx_flow_steer()
2805  * @filter_id: Filter ID returned by ndo_rx_flow_steer()
2806  *
2807  * Drivers that implement ndo_rx_flow_steer() should periodically call
2808  * this function for each installed filter and remove the filters for
2809  * which it returns %true.
2810  */
2811 bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
2812 			 u32 flow_id, u16 filter_id)
2813 {
2814 	struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
2815 	struct rps_dev_flow_table *flow_table;
2816 	struct rps_dev_flow *rflow;
2817 	bool expire = true;
2818 	int cpu;
2819 
2820 	rcu_read_lock();
2821 	flow_table = rcu_dereference(rxqueue->rps_flow_table);
2822 	if (flow_table && flow_id <= flow_table->mask) {
2823 		rflow = &flow_table->flows[flow_id];
2824 		cpu = ACCESS_ONCE(rflow->cpu);
2825 		if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&
2826 		    ((int)(per_cpu(softnet_data, cpu).input_queue_head -
2827 			   rflow->last_qtail) <
2828 		     (int)(10 * flow_table->mask)))
2829 			expire = false;
2830 	}
2831 	rcu_read_unlock();
2832 	return expire;
2833 }
2834 EXPORT_SYMBOL(rps_may_expire_flow);
2835 
2836 #endif /* CONFIG_RFS_ACCEL */
2837 
2838 /* Called from hardirq (IPI) context */
2839 static void rps_trigger_softirq(void *data)
2840 {
2841 	struct softnet_data *sd = data;
2842 
2843 	____napi_schedule(sd, &sd->backlog);
2844 	sd->received_rps++;
2845 }
2846 
2847 #endif /* CONFIG_RPS */
2848 
2849 /*
2850  * Check if this softnet_data structure is another cpu one
2851  * If yes, queue it to our IPI list and return 1
2852  * If no, return 0
2853  */
2854 static int rps_ipi_queued(struct softnet_data *sd)
2855 {
2856 #ifdef CONFIG_RPS
2857 	struct softnet_data *mysd = &__get_cpu_var(softnet_data);
2858 
2859 	if (sd != mysd) {
2860 		sd->rps_ipi_next = mysd->rps_ipi_list;
2861 		mysd->rps_ipi_list = sd;
2862 
2863 		__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2864 		return 1;
2865 	}
2866 #endif /* CONFIG_RPS */
2867 	return 0;
2868 }
2869 
2870 /*
2871  * enqueue_to_backlog is called to queue an skb to a per CPU backlog
2872  * queue (may be a remote CPU queue).
2873  */
2874 static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
2875 			      unsigned int *qtail)
2876 {
2877 	struct softnet_data *sd;
2878 	unsigned long flags;
2879 
2880 	sd = &per_cpu(softnet_data, cpu);
2881 
2882 	local_irq_save(flags);
2883 
2884 	rps_lock(sd);
2885 	if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) {
2886 		if (skb_queue_len(&sd->input_pkt_queue)) {
2887 enqueue:
2888 			__skb_queue_tail(&sd->input_pkt_queue, skb);
2889 			input_queue_tail_incr_save(sd, qtail);
2890 			rps_unlock(sd);
2891 			local_irq_restore(flags);
2892 			return NET_RX_SUCCESS;
2893 		}
2894 
2895 		/* Schedule NAPI for backlog device
2896 		 * We can use non atomic operation since we own the queue lock
2897 		 */
2898 		if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
2899 			if (!rps_ipi_queued(sd))
2900 				____napi_schedule(sd, &sd->backlog);
2901 		}
2902 		goto enqueue;
2903 	}
2904 
2905 	sd->dropped++;
2906 	rps_unlock(sd);
2907 
2908 	local_irq_restore(flags);
2909 
2910 	atomic_long_inc(&skb->dev->rx_dropped);
2911 	kfree_skb(skb);
2912 	return NET_RX_DROP;
2913 }
2914 
2915 /**
2916  *	netif_rx	-	post buffer to the network code
2917  *	@skb: buffer to post
2918  *
2919  *	This function receives a packet from a device driver and queues it for
2920  *	the upper (protocol) levels to process.  It always succeeds. The buffer
2921  *	may be dropped during processing for congestion control or by the
2922  *	protocol layers.
2923  *
2924  *	return values:
2925  *	NET_RX_SUCCESS	(no congestion)
2926  *	NET_RX_DROP     (packet was dropped)
2927  *
2928  */
2929 
2930 int netif_rx(struct sk_buff *skb)
2931 {
2932 	int ret;
2933 
2934 	/* if netpoll wants it, pretend we never saw it */
2935 	if (netpoll_rx(skb))
2936 		return NET_RX_DROP;
2937 
2938 	net_timestamp_check(netdev_tstamp_prequeue, skb);
2939 
2940 	trace_netif_rx(skb);
2941 #ifdef CONFIG_RPS
2942 	if (static_key_false(&rps_needed)) {
2943 		struct rps_dev_flow voidflow, *rflow = &voidflow;
2944 		int cpu;
2945 
2946 		preempt_disable();
2947 		rcu_read_lock();
2948 
2949 		cpu = get_rps_cpu(skb->dev, skb, &rflow);
2950 		if (cpu < 0)
2951 			cpu = smp_processor_id();
2952 
2953 		ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
2954 
2955 		rcu_read_unlock();
2956 		preempt_enable();
2957 	} else
2958 #endif
2959 	{
2960 		unsigned int qtail;
2961 		ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
2962 		put_cpu();
2963 	}
2964 	return ret;
2965 }
2966 EXPORT_SYMBOL(netif_rx);
2967 
2968 int netif_rx_ni(struct sk_buff *skb)
2969 {
2970 	int err;
2971 
2972 	preempt_disable();
2973 	err = netif_rx(skb);
2974 	if (local_softirq_pending())
2975 		do_softirq();
2976 	preempt_enable();
2977 
2978 	return err;
2979 }
2980 EXPORT_SYMBOL(netif_rx_ni);
2981 
2982 static void net_tx_action(struct softirq_action *h)
2983 {
2984 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
2985 
2986 	if (sd->completion_queue) {
2987 		struct sk_buff *clist;
2988 
2989 		local_irq_disable();
2990 		clist = sd->completion_queue;
2991 		sd->completion_queue = NULL;
2992 		local_irq_enable();
2993 
2994 		while (clist) {
2995 			struct sk_buff *skb = clist;
2996 			clist = clist->next;
2997 
2998 			WARN_ON(atomic_read(&skb->users));
2999 			trace_kfree_skb(skb, net_tx_action);
3000 			__kfree_skb(skb);
3001 		}
3002 	}
3003 
3004 	if (sd->output_queue) {
3005 		struct Qdisc *head;
3006 
3007 		local_irq_disable();
3008 		head = sd->output_queue;
3009 		sd->output_queue = NULL;
3010 		sd->output_queue_tailp = &sd->output_queue;
3011 		local_irq_enable();
3012 
3013 		while (head) {
3014 			struct Qdisc *q = head;
3015 			spinlock_t *root_lock;
3016 
3017 			head = head->next_sched;
3018 
3019 			root_lock = qdisc_lock(q);
3020 			if (spin_trylock(root_lock)) {
3021 				smp_mb__before_clear_bit();
3022 				clear_bit(__QDISC_STATE_SCHED,
3023 					  &q->state);
3024 				qdisc_run(q);
3025 				spin_unlock(root_lock);
3026 			} else {
3027 				if (!test_bit(__QDISC_STATE_DEACTIVATED,
3028 					      &q->state)) {
3029 					__netif_reschedule(q);
3030 				} else {
3031 					smp_mb__before_clear_bit();
3032 					clear_bit(__QDISC_STATE_SCHED,
3033 						  &q->state);
3034 				}
3035 			}
3036 		}
3037 	}
3038 }
3039 
3040 #if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
3041     (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
3042 /* This hook is defined here for ATM LANE */
3043 int (*br_fdb_test_addr_hook)(struct net_device *dev,
3044 			     unsigned char *addr) __read_mostly;
3045 EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
3046 #endif
3047 
3048 #ifdef CONFIG_NET_CLS_ACT
3049 /* TODO: Maybe we should just force sch_ingress to be compiled in
3050  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
3051  * a compare and 2 stores extra right now if we dont have it on
3052  * but have CONFIG_NET_CLS_ACT
3053  * NOTE: This doesn't stop any functionality; if you dont have
3054  * the ingress scheduler, you just can't add policies on ingress.
3055  *
3056  */
3057 static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
3058 {
3059 	struct net_device *dev = skb->dev;
3060 	u32 ttl = G_TC_RTTL(skb->tc_verd);
3061 	int result = TC_ACT_OK;
3062 	struct Qdisc *q;
3063 
3064 	if (unlikely(MAX_RED_LOOP < ttl++)) {
3065 		net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n",
3066 				     skb->skb_iif, dev->ifindex);
3067 		return TC_ACT_SHOT;
3068 	}
3069 
3070 	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
3071 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
3072 
3073 	q = rxq->qdisc;
3074 	if (q != &noop_qdisc) {
3075 		spin_lock(qdisc_lock(q));
3076 		if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
3077 			result = qdisc_enqueue_root(skb, q);
3078 		spin_unlock(qdisc_lock(q));
3079 	}
3080 
3081 	return result;
3082 }
3083 
3084 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
3085 					 struct packet_type **pt_prev,
3086 					 int *ret, struct net_device *orig_dev)
3087 {
3088 	struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
3089 
3090 	if (!rxq || rxq->qdisc == &noop_qdisc)
3091 		goto out;
3092 
3093 	if (*pt_prev) {
3094 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
3095 		*pt_prev = NULL;
3096 	}
3097 
3098 	switch (ing_filter(skb, rxq)) {
3099 	case TC_ACT_SHOT:
3100 	case TC_ACT_STOLEN:
3101 		kfree_skb(skb);
3102 		return NULL;
3103 	}
3104 
3105 out:
3106 	skb->tc_verd = 0;
3107 	return skb;
3108 }
3109 #endif
3110 
3111 /**
3112  *	netdev_rx_handler_register - register receive handler
3113  *	@dev: device to register a handler for
3114  *	@rx_handler: receive handler to register
3115  *	@rx_handler_data: data pointer that is used by rx handler
3116  *
3117  *	Register a receive hander for a device. This handler will then be
3118  *	called from __netif_receive_skb. A negative errno code is returned
3119  *	on a failure.
3120  *
3121  *	The caller must hold the rtnl_mutex.
3122  *
3123  *	For a general description of rx_handler, see enum rx_handler_result.
3124  */
3125 int netdev_rx_handler_register(struct net_device *dev,
3126 			       rx_handler_func_t *rx_handler,
3127 			       void *rx_handler_data)
3128 {
3129 	ASSERT_RTNL();
3130 
3131 	if (dev->rx_handler)
3132 		return -EBUSY;
3133 
3134 	rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
3135 	rcu_assign_pointer(dev->rx_handler, rx_handler);
3136 
3137 	return 0;
3138 }
3139 EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
3140 
3141 /**
3142  *	netdev_rx_handler_unregister - unregister receive handler
3143  *	@dev: device to unregister a handler from
3144  *
3145  *	Unregister a receive hander from a device.
3146  *
3147  *	The caller must hold the rtnl_mutex.
3148  */
3149 void netdev_rx_handler_unregister(struct net_device *dev)
3150 {
3151 
3152 	ASSERT_RTNL();
3153 	RCU_INIT_POINTER(dev->rx_handler, NULL);
3154 	RCU_INIT_POINTER(dev->rx_handler_data, NULL);
3155 }
3156 EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
3157 
3158 static int __netif_receive_skb(struct sk_buff *skb)
3159 {
3160 	struct packet_type *ptype, *pt_prev;
3161 	rx_handler_func_t *rx_handler;
3162 	struct net_device *orig_dev;
3163 	struct net_device *null_or_dev;
3164 	bool deliver_exact = false;
3165 	int ret = NET_RX_DROP;
3166 	__be16 type;
3167 
3168 	net_timestamp_check(!netdev_tstamp_prequeue, skb);
3169 
3170 	trace_netif_receive_skb(skb);
3171 
3172 	/* if we've gotten here through NAPI, check netpoll */
3173 	if (netpoll_receive_skb(skb))
3174 		return NET_RX_DROP;
3175 
3176 	orig_dev = skb->dev;
3177 
3178 	skb_reset_network_header(skb);
3179 	skb_reset_transport_header(skb);
3180 	skb_reset_mac_len(skb);
3181 
3182 	pt_prev = NULL;
3183 
3184 	rcu_read_lock();
3185 
3186 another_round:
3187 	skb->skb_iif = skb->dev->ifindex;
3188 
3189 	__this_cpu_inc(softnet_data.processed);
3190 
3191 	if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3192 		skb = vlan_untag(skb);
3193 		if (unlikely(!skb))
3194 			goto out;
3195 	}
3196 
3197 #ifdef CONFIG_NET_CLS_ACT
3198 	if (skb->tc_verd & TC_NCLS) {
3199 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
3200 		goto ncls;
3201 	}
3202 #endif
3203 
3204 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
3205 		if (!ptype->dev || ptype->dev == skb->dev) {
3206 			if (pt_prev)
3207 				ret = deliver_skb(skb, pt_prev, orig_dev);
3208 			pt_prev = ptype;
3209 		}
3210 	}
3211 
3212 #ifdef CONFIG_NET_CLS_ACT
3213 	skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
3214 	if (!skb)
3215 		goto out;
3216 ncls:
3217 #endif
3218 
3219 	rx_handler = rcu_dereference(skb->dev->rx_handler);
3220 	if (vlan_tx_tag_present(skb)) {
3221 		if (pt_prev) {
3222 			ret = deliver_skb(skb, pt_prev, orig_dev);
3223 			pt_prev = NULL;
3224 		}
3225 		if (vlan_do_receive(&skb, !rx_handler))
3226 			goto another_round;
3227 		else if (unlikely(!skb))
3228 			goto out;
3229 	}
3230 
3231 	if (rx_handler) {
3232 		if (pt_prev) {
3233 			ret = deliver_skb(skb, pt_prev, orig_dev);
3234 			pt_prev = NULL;
3235 		}
3236 		switch (rx_handler(&skb)) {
3237 		case RX_HANDLER_CONSUMED:
3238 			goto out;
3239 		case RX_HANDLER_ANOTHER:
3240 			goto another_round;
3241 		case RX_HANDLER_EXACT:
3242 			deliver_exact = true;
3243 		case RX_HANDLER_PASS:
3244 			break;
3245 		default:
3246 			BUG();
3247 		}
3248 	}
3249 
3250 	/* deliver only exact match when indicated */
3251 	null_or_dev = deliver_exact ? skb->dev : NULL;
3252 
3253 	type = skb->protocol;
3254 	list_for_each_entry_rcu(ptype,
3255 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
3256 		if (ptype->type == type &&
3257 		    (ptype->dev == null_or_dev || ptype->dev == skb->dev ||
3258 		     ptype->dev == orig_dev)) {
3259 			if (pt_prev)
3260 				ret = deliver_skb(skb, pt_prev, orig_dev);
3261 			pt_prev = ptype;
3262 		}
3263 	}
3264 
3265 	if (pt_prev) {
3266 		if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
3267 			ret = -ENOMEM;
3268 		else
3269 			ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
3270 	} else {
3271 		atomic_long_inc(&skb->dev->rx_dropped);
3272 		kfree_skb(skb);
3273 		/* Jamal, now you will not able to escape explaining
3274 		 * me how you were going to use this. :-)
3275 		 */
3276 		ret = NET_RX_DROP;
3277 	}
3278 
3279 out:
3280 	rcu_read_unlock();
3281 	return ret;
3282 }
3283 
3284 /**
3285  *	netif_receive_skb - process receive buffer from network
3286  *	@skb: buffer to process
3287  *
3288  *	netif_receive_skb() is the main receive data processing function.
3289  *	It always succeeds. The buffer may be dropped during processing
3290  *	for congestion control or by the protocol layers.
3291  *
3292  *	This function may only be called from softirq context and interrupts
3293  *	should be enabled.
3294  *
3295  *	Return values (usually ignored):
3296  *	NET_RX_SUCCESS: no congestion
3297  *	NET_RX_DROP: packet was dropped
3298  */
3299 int netif_receive_skb(struct sk_buff *skb)
3300 {
3301 	net_timestamp_check(netdev_tstamp_prequeue, skb);
3302 
3303 	if (skb_defer_rx_timestamp(skb))
3304 		return NET_RX_SUCCESS;
3305 
3306 #ifdef CONFIG_RPS
3307 	if (static_key_false(&rps_needed)) {
3308 		struct rps_dev_flow voidflow, *rflow = &voidflow;
3309 		int cpu, ret;
3310 
3311 		rcu_read_lock();
3312 
3313 		cpu = get_rps_cpu(skb->dev, skb, &rflow);
3314 
3315 		if (cpu >= 0) {
3316 			ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
3317 			rcu_read_unlock();
3318 			return ret;
3319 		}
3320 		rcu_read_unlock();
3321 	}
3322 #endif
3323 	return __netif_receive_skb(skb);
3324 }
3325 EXPORT_SYMBOL(netif_receive_skb);
3326 
3327 /* Network device is going away, flush any packets still pending
3328  * Called with irqs disabled.
3329  */
3330 static void flush_backlog(void *arg)
3331 {
3332 	struct net_device *dev = arg;
3333 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
3334 	struct sk_buff *skb, *tmp;
3335 
3336 	rps_lock(sd);
3337 	skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
3338 		if (skb->dev == dev) {
3339 			__skb_unlink(skb, &sd->input_pkt_queue);
3340 			kfree_skb(skb);
3341 			input_queue_head_incr(sd);
3342 		}
3343 	}
3344 	rps_unlock(sd);
3345 
3346 	skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
3347 		if (skb->dev == dev) {
3348 			__skb_unlink(skb, &sd->process_queue);
3349 			kfree_skb(skb);
3350 			input_queue_head_incr(sd);
3351 		}
3352 	}
3353 }
3354 
3355 static int napi_gro_complete(struct sk_buff *skb)
3356 {
3357 	struct packet_type *ptype;
3358 	__be16 type = skb->protocol;
3359 	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
3360 	int err = -ENOENT;
3361 
3362 	if (NAPI_GRO_CB(skb)->count == 1) {
3363 		skb_shinfo(skb)->gso_size = 0;
3364 		goto out;
3365 	}
3366 
3367 	rcu_read_lock();
3368 	list_for_each_entry_rcu(ptype, head, list) {
3369 		if (ptype->type != type || ptype->dev || !ptype->gro_complete)
3370 			continue;
3371 
3372 		err = ptype->gro_complete(skb);
3373 		break;
3374 	}
3375 	rcu_read_unlock();
3376 
3377 	if (err) {
3378 		WARN_ON(&ptype->list == head);
3379 		kfree_skb(skb);
3380 		return NET_RX_SUCCESS;
3381 	}
3382 
3383 out:
3384 	return netif_receive_skb(skb);
3385 }
3386 
3387 inline void napi_gro_flush(struct napi_struct *napi)
3388 {
3389 	struct sk_buff *skb, *next;
3390 
3391 	for (skb = napi->gro_list; skb; skb = next) {
3392 		next = skb->next;
3393 		skb->next = NULL;
3394 		napi_gro_complete(skb);
3395 	}
3396 
3397 	napi->gro_count = 0;
3398 	napi->gro_list = NULL;
3399 }
3400 EXPORT_SYMBOL(napi_gro_flush);
3401 
3402 enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3403 {
3404 	struct sk_buff **pp = NULL;
3405 	struct packet_type *ptype;
3406 	__be16 type = skb->protocol;
3407 	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
3408 	int same_flow;
3409 	int mac_len;
3410 	enum gro_result ret;
3411 
3412 	if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
3413 		goto normal;
3414 
3415 	if (skb_is_gso(skb) || skb_has_frag_list(skb))
3416 		goto normal;
3417 
3418 	rcu_read_lock();
3419 	list_for_each_entry_rcu(ptype, head, list) {
3420 		if (ptype->type != type || ptype->dev || !ptype->gro_receive)
3421 			continue;
3422 
3423 		skb_set_network_header(skb, skb_gro_offset(skb));
3424 		mac_len = skb->network_header - skb->mac_header;
3425 		skb->mac_len = mac_len;
3426 		NAPI_GRO_CB(skb)->same_flow = 0;
3427 		NAPI_GRO_CB(skb)->flush = 0;
3428 		NAPI_GRO_CB(skb)->free = 0;
3429 
3430 		pp = ptype->gro_receive(&napi->gro_list, skb);
3431 		break;
3432 	}
3433 	rcu_read_unlock();
3434 
3435 	if (&ptype->list == head)
3436 		goto normal;
3437 
3438 	same_flow = NAPI_GRO_CB(skb)->same_flow;
3439 	ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
3440 
3441 	if (pp) {
3442 		struct sk_buff *nskb = *pp;
3443 
3444 		*pp = nskb->next;
3445 		nskb->next = NULL;
3446 		napi_gro_complete(nskb);
3447 		napi->gro_count--;
3448 	}
3449 
3450 	if (same_flow)
3451 		goto ok;
3452 
3453 	if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
3454 		goto normal;
3455 
3456 	napi->gro_count++;
3457 	NAPI_GRO_CB(skb)->count = 1;
3458 	skb_shinfo(skb)->gso_size = skb_gro_len(skb);
3459 	skb->next = napi->gro_list;
3460 	napi->gro_list = skb;
3461 	ret = GRO_HELD;
3462 
3463 pull:
3464 	if (skb_headlen(skb) < skb_gro_offset(skb)) {
3465 		int grow = skb_gro_offset(skb) - skb_headlen(skb);
3466 
3467 		BUG_ON(skb->end - skb->tail < grow);
3468 
3469 		memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
3470 
3471 		skb->tail += grow;
3472 		skb->data_len -= grow;
3473 
3474 		skb_shinfo(skb)->frags[0].page_offset += grow;
3475 		skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow);
3476 
3477 		if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) {
3478 			skb_frag_unref(skb, 0);
3479 			memmove(skb_shinfo(skb)->frags,
3480 				skb_shinfo(skb)->frags + 1,
3481 				--skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
3482 		}
3483 	}
3484 
3485 ok:
3486 	return ret;
3487 
3488 normal:
3489 	ret = GRO_NORMAL;
3490 	goto pull;
3491 }
3492 EXPORT_SYMBOL(dev_gro_receive);
3493 
3494 static inline gro_result_t
3495 __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3496 {
3497 	struct sk_buff *p;
3498 	unsigned int maclen = skb->dev->hard_header_len;
3499 
3500 	for (p = napi->gro_list; p; p = p->next) {
3501 		unsigned long diffs;
3502 
3503 		diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
3504 		diffs |= p->vlan_tci ^ skb->vlan_tci;
3505 		if (maclen == ETH_HLEN)
3506 			diffs |= compare_ether_header(skb_mac_header(p),
3507 						      skb_gro_mac_header(skb));
3508 		else if (!diffs)
3509 			diffs = memcmp(skb_mac_header(p),
3510 				       skb_gro_mac_header(skb),
3511 				       maclen);
3512 		NAPI_GRO_CB(p)->same_flow = !diffs;
3513 		NAPI_GRO_CB(p)->flush = 0;
3514 	}
3515 
3516 	return dev_gro_receive(napi, skb);
3517 }
3518 
3519 gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
3520 {
3521 	switch (ret) {
3522 	case GRO_NORMAL:
3523 		if (netif_receive_skb(skb))
3524 			ret = GRO_DROP;
3525 		break;
3526 
3527 	case GRO_DROP:
3528 		kfree_skb(skb);
3529 		break;
3530 
3531 	case GRO_MERGED_FREE:
3532 		if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
3533 			kmem_cache_free(skbuff_head_cache, skb);
3534 		else
3535 			__kfree_skb(skb);
3536 		break;
3537 
3538 	case GRO_HELD:
3539 	case GRO_MERGED:
3540 		break;
3541 	}
3542 
3543 	return ret;
3544 }
3545 EXPORT_SYMBOL(napi_skb_finish);
3546 
3547 void skb_gro_reset_offset(struct sk_buff *skb)
3548 {
3549 	NAPI_GRO_CB(skb)->data_offset = 0;
3550 	NAPI_GRO_CB(skb)->frag0 = NULL;
3551 	NAPI_GRO_CB(skb)->frag0_len = 0;
3552 
3553 	if (skb->mac_header == skb->tail &&
3554 	    !PageHighMem(skb_frag_page(&skb_shinfo(skb)->frags[0]))) {
3555 		NAPI_GRO_CB(skb)->frag0 =
3556 			skb_frag_address(&skb_shinfo(skb)->frags[0]);
3557 		NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(&skb_shinfo(skb)->frags[0]);
3558 	}
3559 }
3560 EXPORT_SYMBOL(skb_gro_reset_offset);
3561 
3562 gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3563 {
3564 	skb_gro_reset_offset(skb);
3565 
3566 	return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
3567 }
3568 EXPORT_SYMBOL(napi_gro_receive);
3569 
3570 static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
3571 {
3572 	__skb_pull(skb, skb_headlen(skb));
3573 	/* restore the reserve we had after netdev_alloc_skb_ip_align() */
3574 	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));
3575 	skb->vlan_tci = 0;
3576 	skb->dev = napi->dev;
3577 	skb->skb_iif = 0;
3578 
3579 	napi->skb = skb;
3580 }
3581 
3582 struct sk_buff *napi_get_frags(struct napi_struct *napi)
3583 {
3584 	struct sk_buff *skb = napi->skb;
3585 
3586 	if (!skb) {
3587 		skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
3588 		if (skb)
3589 			napi->skb = skb;
3590 	}
3591 	return skb;
3592 }
3593 EXPORT_SYMBOL(napi_get_frags);
3594 
3595 gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
3596 			       gro_result_t ret)
3597 {
3598 	switch (ret) {
3599 	case GRO_NORMAL:
3600 	case GRO_HELD:
3601 		skb->protocol = eth_type_trans(skb, skb->dev);
3602 
3603 		if (ret == GRO_HELD)
3604 			skb_gro_pull(skb, -ETH_HLEN);
3605 		else if (netif_receive_skb(skb))
3606 			ret = GRO_DROP;
3607 		break;
3608 
3609 	case GRO_DROP:
3610 	case GRO_MERGED_FREE:
3611 		napi_reuse_skb(napi, skb);
3612 		break;
3613 
3614 	case GRO_MERGED:
3615 		break;
3616 	}
3617 
3618 	return ret;
3619 }
3620 EXPORT_SYMBOL(napi_frags_finish);
3621 
3622 static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
3623 {
3624 	struct sk_buff *skb = napi->skb;
3625 	struct ethhdr *eth;
3626 	unsigned int hlen;
3627 	unsigned int off;
3628 
3629 	napi->skb = NULL;
3630 
3631 	skb_reset_mac_header(skb);
3632 	skb_gro_reset_offset(skb);
3633 
3634 	off = skb_gro_offset(skb);
3635 	hlen = off + sizeof(*eth);
3636 	eth = skb_gro_header_fast(skb, off);
3637 	if (skb_gro_header_hard(skb, hlen)) {
3638 		eth = skb_gro_header_slow(skb, hlen, off);
3639 		if (unlikely(!eth)) {
3640 			napi_reuse_skb(napi, skb);
3641 			skb = NULL;
3642 			goto out;
3643 		}
3644 	}
3645 
3646 	skb_gro_pull(skb, sizeof(*eth));
3647 
3648 	/*
3649 	 * This works because the only protocols we care about don't require
3650 	 * special handling.  We'll fix it up properly at the end.
3651 	 */
3652 	skb->protocol = eth->h_proto;
3653 
3654 out:
3655 	return skb;
3656 }
3657 
3658 gro_result_t napi_gro_frags(struct napi_struct *napi)
3659 {
3660 	struct sk_buff *skb = napi_frags_skb(napi);
3661 
3662 	if (!skb)
3663 		return GRO_DROP;
3664 
3665 	return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
3666 }
3667 EXPORT_SYMBOL(napi_gro_frags);
3668 
3669 /*
3670  * net_rps_action sends any pending IPI's for rps.
3671  * Note: called with local irq disabled, but exits with local irq enabled.
3672  */
3673 static void net_rps_action_and_irq_enable(struct softnet_data *sd)
3674 {
3675 #ifdef CONFIG_RPS
3676 	struct softnet_data *remsd = sd->rps_ipi_list;
3677 
3678 	if (remsd) {
3679 		sd->rps_ipi_list = NULL;
3680 
3681 		local_irq_enable();
3682 
3683 		/* Send pending IPI's to kick RPS processing on remote cpus. */
3684 		while (remsd) {
3685 			struct softnet_data *next = remsd->rps_ipi_next;
3686 
3687 			if (cpu_online(remsd->cpu))
3688 				__smp_call_function_single(remsd->cpu,
3689 							   &remsd->csd, 0);
3690 			remsd = next;
3691 		}
3692 	} else
3693 #endif
3694 		local_irq_enable();
3695 }
3696 
3697 static int process_backlog(struct napi_struct *napi, int quota)
3698 {
3699 	int work = 0;
3700 	struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
3701 
3702 #ifdef CONFIG_RPS
3703 	/* Check if we have pending ipi, its better to send them now,
3704 	 * not waiting net_rx_action() end.
3705 	 */
3706 	if (sd->rps_ipi_list) {
3707 		local_irq_disable();
3708 		net_rps_action_and_irq_enable(sd);
3709 	}
3710 #endif
3711 	napi->weight = weight_p;
3712 	local_irq_disable();
3713 	while (work < quota) {
3714 		struct sk_buff *skb;
3715 		unsigned int qlen;
3716 
3717 		while ((skb = __skb_dequeue(&sd->process_queue))) {
3718 			local_irq_enable();
3719 			__netif_receive_skb(skb);
3720 			local_irq_disable();
3721 			input_queue_head_incr(sd);
3722 			if (++work >= quota) {
3723 				local_irq_enable();
3724 				return work;
3725 			}
3726 		}
3727 
3728 		rps_lock(sd);
3729 		qlen = skb_queue_len(&sd->input_pkt_queue);
3730 		if (qlen)
3731 			skb_queue_splice_tail_init(&sd->input_pkt_queue,
3732 						   &sd->process_queue);
3733 
3734 		if (qlen < quota - work) {
3735 			/*
3736 			 * Inline a custom version of __napi_complete().
3737 			 * only current cpu owns and manipulates this napi,
3738 			 * and NAPI_STATE_SCHED is the only possible flag set on backlog.
3739 			 * we can use a plain write instead of clear_bit(),
3740 			 * and we dont need an smp_mb() memory barrier.
3741 			 */
3742 			list_del(&napi->poll_list);
3743 			napi->state = 0;
3744 
3745 			quota = work + qlen;
3746 		}
3747 		rps_unlock(sd);
3748 	}
3749 	local_irq_enable();
3750 
3751 	return work;
3752 }
3753 
3754 /**
3755  * __napi_schedule - schedule for receive
3756  * @n: entry to schedule
3757  *
3758  * The entry's receive function will be scheduled to run
3759  */
3760 void __napi_schedule(struct napi_struct *n)
3761 {
3762 	unsigned long flags;
3763 
3764 	local_irq_save(flags);
3765 	____napi_schedule(&__get_cpu_var(softnet_data), n);
3766 	local_irq_restore(flags);
3767 }
3768 EXPORT_SYMBOL(__napi_schedule);
3769 
3770 void __napi_complete(struct napi_struct *n)
3771 {
3772 	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
3773 	BUG_ON(n->gro_list);
3774 
3775 	list_del(&n->poll_list);
3776 	smp_mb__before_clear_bit();
3777 	clear_bit(NAPI_STATE_SCHED, &n->state);
3778 }
3779 EXPORT_SYMBOL(__napi_complete);
3780 
3781 void napi_complete(struct napi_struct *n)
3782 {
3783 	unsigned long flags;
3784 
3785 	/*
3786 	 * don't let napi dequeue from the cpu poll list
3787 	 * just in case its running on a different cpu
3788 	 */
3789 	if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
3790 		return;
3791 
3792 	napi_gro_flush(n);
3793 	local_irq_save(flags);
3794 	__napi_complete(n);
3795 	local_irq_restore(flags);
3796 }
3797 EXPORT_SYMBOL(napi_complete);
3798 
3799 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
3800 		    int (*poll)(struct napi_struct *, int), int weight)
3801 {
3802 	INIT_LIST_HEAD(&napi->poll_list);
3803 	napi->gro_count = 0;
3804 	napi->gro_list = NULL;
3805 	napi->skb = NULL;
3806 	napi->poll = poll;
3807 	napi->weight = weight;
3808 	list_add(&napi->dev_list, &dev->napi_list);
3809 	napi->dev = dev;
3810 #ifdef CONFIG_NETPOLL
3811 	spin_lock_init(&napi->poll_lock);
3812 	napi->poll_owner = -1;
3813 #endif
3814 	set_bit(NAPI_STATE_SCHED, &napi->state);
3815 }
3816 EXPORT_SYMBOL(netif_napi_add);
3817 
3818 void netif_napi_del(struct napi_struct *napi)
3819 {
3820 	struct sk_buff *skb, *next;
3821 
3822 	list_del_init(&napi->dev_list);
3823 	napi_free_frags(napi);
3824 
3825 	for (skb = napi->gro_list; skb; skb = next) {
3826 		next = skb->next;
3827 		skb->next = NULL;
3828 		kfree_skb(skb);
3829 	}
3830 
3831 	napi->gro_list = NULL;
3832 	napi->gro_count = 0;
3833 }
3834 EXPORT_SYMBOL(netif_napi_del);
3835 
3836 static void net_rx_action(struct softirq_action *h)
3837 {
3838 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
3839 	unsigned long time_limit = jiffies + 2;
3840 	int budget = netdev_budget;
3841 	void *have;
3842 
3843 	local_irq_disable();
3844 
3845 	while (!list_empty(&sd->poll_list)) {
3846 		struct napi_struct *n;
3847 		int work, weight;
3848 
3849 		/* If softirq window is exhuasted then punt.
3850 		 * Allow this to run for 2 jiffies since which will allow
3851 		 * an average latency of 1.5/HZ.
3852 		 */
3853 		if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
3854 			goto softnet_break;
3855 
3856 		local_irq_enable();
3857 
3858 		/* Even though interrupts have been re-enabled, this
3859 		 * access is safe because interrupts can only add new
3860 		 * entries to the tail of this list, and only ->poll()
3861 		 * calls can remove this head entry from the list.
3862 		 */
3863 		n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
3864 
3865 		have = netpoll_poll_lock(n);
3866 
3867 		weight = n->weight;
3868 
3869 		/* This NAPI_STATE_SCHED test is for avoiding a race
3870 		 * with netpoll's poll_napi().  Only the entity which
3871 		 * obtains the lock and sees NAPI_STATE_SCHED set will
3872 		 * actually make the ->poll() call.  Therefore we avoid
3873 		 * accidentally calling ->poll() when NAPI is not scheduled.
3874 		 */
3875 		work = 0;
3876 		if (test_bit(NAPI_STATE_SCHED, &n->state)) {
3877 			work = n->poll(n, weight);
3878 			trace_napi_poll(n);
3879 		}
3880 
3881 		WARN_ON_ONCE(work > weight);
3882 
3883 		budget -= work;
3884 
3885 		local_irq_disable();
3886 
3887 		/* Drivers must not modify the NAPI state if they
3888 		 * consume the entire weight.  In such cases this code
3889 		 * still "owns" the NAPI instance and therefore can
3890 		 * move the instance around on the list at-will.
3891 		 */
3892 		if (unlikely(work == weight)) {
3893 			if (unlikely(napi_disable_pending(n))) {
3894 				local_irq_enable();
3895 				napi_complete(n);
3896 				local_irq_disable();
3897 			} else
3898 				list_move_tail(&n->poll_list, &sd->poll_list);
3899 		}
3900 
3901 		netpoll_poll_unlock(have);
3902 	}
3903 out:
3904 	net_rps_action_and_irq_enable(sd);
3905 
3906 #ifdef CONFIG_NET_DMA
3907 	/*
3908 	 * There may not be any more sk_buffs coming right now, so push
3909 	 * any pending DMA copies to hardware
3910 	 */
3911 	dma_issue_pending_all();
3912 #endif
3913 
3914 	return;
3915 
3916 softnet_break:
3917 	sd->time_squeeze++;
3918 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
3919 	goto out;
3920 }
3921 
3922 static gifconf_func_t *gifconf_list[NPROTO];
3923 
3924 /**
3925  *	register_gifconf	-	register a SIOCGIF handler
3926  *	@family: Address family
3927  *	@gifconf: Function handler
3928  *
3929  *	Register protocol dependent address dumping routines. The handler
3930  *	that is passed must not be freed or reused until it has been replaced
3931  *	by another handler.
3932  */
3933 int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
3934 {
3935 	if (family >= NPROTO)
3936 		return -EINVAL;
3937 	gifconf_list[family] = gifconf;
3938 	return 0;
3939 }
3940 EXPORT_SYMBOL(register_gifconf);
3941 
3942 
3943 /*
3944  *	Map an interface index to its name (SIOCGIFNAME)
3945  */
3946 
3947 /*
3948  *	We need this ioctl for efficient implementation of the
3949  *	if_indextoname() function required by the IPv6 API.  Without
3950  *	it, we would have to search all the interfaces to find a
3951  *	match.  --pb
3952  */
3953 
3954 static int dev_ifname(struct net *net, struct ifreq __user *arg)
3955 {
3956 	struct net_device *dev;
3957 	struct ifreq ifr;
3958 
3959 	/*
3960 	 *	Fetch the caller's info block.
3961 	 */
3962 
3963 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3964 		return -EFAULT;
3965 
3966 	rcu_read_lock();
3967 	dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
3968 	if (!dev) {
3969 		rcu_read_unlock();
3970 		return -ENODEV;
3971 	}
3972 
3973 	strcpy(ifr.ifr_name, dev->name);
3974 	rcu_read_unlock();
3975 
3976 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
3977 		return -EFAULT;
3978 	return 0;
3979 }
3980 
3981 /*
3982  *	Perform a SIOCGIFCONF call. This structure will change
3983  *	size eventually, and there is nothing I can do about it.
3984  *	Thus we will need a 'compatibility mode'.
3985  */
3986 
3987 static int dev_ifconf(struct net *net, char __user *arg)
3988 {
3989 	struct ifconf ifc;
3990 	struct net_device *dev;
3991 	char __user *pos;
3992 	int len;
3993 	int total;
3994 	int i;
3995 
3996 	/*
3997 	 *	Fetch the caller's info block.
3998 	 */
3999 
4000 	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
4001 		return -EFAULT;
4002 
4003 	pos = ifc.ifc_buf;
4004 	len = ifc.ifc_len;
4005 
4006 	/*
4007 	 *	Loop over the interfaces, and write an info block for each.
4008 	 */
4009 
4010 	total = 0;
4011 	for_each_netdev(net, dev) {
4012 		for (i = 0; i < NPROTO; i++) {
4013 			if (gifconf_list[i]) {
4014 				int done;
4015 				if (!pos)
4016 					done = gifconf_list[i](dev, NULL, 0);
4017 				else
4018 					done = gifconf_list[i](dev, pos + total,
4019 							       len - total);
4020 				if (done < 0)
4021 					return -EFAULT;
4022 				total += done;
4023 			}
4024 		}
4025 	}
4026 
4027 	/*
4028 	 *	All done.  Write the updated control block back to the caller.
4029 	 */
4030 	ifc.ifc_len = total;
4031 
4032 	/*
4033 	 * 	Both BSD and Solaris return 0 here, so we do too.
4034 	 */
4035 	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
4036 }
4037 
4038 #ifdef CONFIG_PROC_FS
4039 
4040 #define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
4041 
4042 #define get_bucket(x) ((x) >> BUCKET_SPACE)
4043 #define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
4044 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
4045 
4046 static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos)
4047 {
4048 	struct net *net = seq_file_net(seq);
4049 	struct net_device *dev;
4050 	struct hlist_node *p;
4051 	struct hlist_head *h;
4052 	unsigned int count = 0, offset = get_offset(*pos);
4053 
4054 	h = &net->dev_name_head[get_bucket(*pos)];
4055 	hlist_for_each_entry_rcu(dev, p, h, name_hlist) {
4056 		if (++count == offset)
4057 			return dev;
4058 	}
4059 
4060 	return NULL;
4061 }
4062 
4063 static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos)
4064 {
4065 	struct net_device *dev;
4066 	unsigned int bucket;
4067 
4068 	do {
4069 		dev = dev_from_same_bucket(seq, pos);
4070 		if (dev)
4071 			return dev;
4072 
4073 		bucket = get_bucket(*pos) + 1;
4074 		*pos = set_bucket_offset(bucket, 1);
4075 	} while (bucket < NETDEV_HASHENTRIES);
4076 
4077 	return NULL;
4078 }
4079 
4080 /*
4081  *	This is invoked by the /proc filesystem handler to display a device
4082  *	in detail.
4083  */
4084 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
4085 	__acquires(RCU)
4086 {
4087 	rcu_read_lock();
4088 	if (!*pos)
4089 		return SEQ_START_TOKEN;
4090 
4091 	if (get_bucket(*pos) >= NETDEV_HASHENTRIES)
4092 		return NULL;
4093 
4094 	return dev_from_bucket(seq, pos);
4095 }
4096 
4097 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4098 {
4099 	++*pos;
4100 	return dev_from_bucket(seq, pos);
4101 }
4102 
4103 void dev_seq_stop(struct seq_file *seq, void *v)
4104 	__releases(RCU)
4105 {
4106 	rcu_read_unlock();
4107 }
4108 
4109 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
4110 {
4111 	struct rtnl_link_stats64 temp;
4112 	const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
4113 
4114 	seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
4115 		   "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
4116 		   dev->name, stats->rx_bytes, stats->rx_packets,
4117 		   stats->rx_errors,
4118 		   stats->rx_dropped + stats->rx_missed_errors,
4119 		   stats->rx_fifo_errors,
4120 		   stats->rx_length_errors + stats->rx_over_errors +
4121 		    stats->rx_crc_errors + stats->rx_frame_errors,
4122 		   stats->rx_compressed, stats->multicast,
4123 		   stats->tx_bytes, stats->tx_packets,
4124 		   stats->tx_errors, stats->tx_dropped,
4125 		   stats->tx_fifo_errors, stats->collisions,
4126 		   stats->tx_carrier_errors +
4127 		    stats->tx_aborted_errors +
4128 		    stats->tx_window_errors +
4129 		    stats->tx_heartbeat_errors,
4130 		   stats->tx_compressed);
4131 }
4132 
4133 /*
4134  *	Called from the PROCfs module. This now uses the new arbitrary sized
4135  *	/proc/net interface to create /proc/net/dev
4136  */
4137 static int dev_seq_show(struct seq_file *seq, void *v)
4138 {
4139 	if (v == SEQ_START_TOKEN)
4140 		seq_puts(seq, "Inter-|   Receive                            "
4141 			      "                    |  Transmit\n"
4142 			      " face |bytes    packets errs drop fifo frame "
4143 			      "compressed multicast|bytes    packets errs "
4144 			      "drop fifo colls carrier compressed\n");
4145 	else
4146 		dev_seq_printf_stats(seq, v);
4147 	return 0;
4148 }
4149 
4150 static struct softnet_data *softnet_get_online(loff_t *pos)
4151 {
4152 	struct softnet_data *sd = NULL;
4153 
4154 	while (*pos < nr_cpu_ids)
4155 		if (cpu_online(*pos)) {
4156 			sd = &per_cpu(softnet_data, *pos);
4157 			break;
4158 		} else
4159 			++*pos;
4160 	return sd;
4161 }
4162 
4163 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
4164 {
4165 	return softnet_get_online(pos);
4166 }
4167 
4168 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4169 {
4170 	++*pos;
4171 	return softnet_get_online(pos);
4172 }
4173 
4174 static void softnet_seq_stop(struct seq_file *seq, void *v)
4175 {
4176 }
4177 
4178 static int softnet_seq_show(struct seq_file *seq, void *v)
4179 {
4180 	struct softnet_data *sd = v;
4181 
4182 	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
4183 		   sd->processed, sd->dropped, sd->time_squeeze, 0,
4184 		   0, 0, 0, 0, /* was fastroute */
4185 		   sd->cpu_collision, sd->received_rps);
4186 	return 0;
4187 }
4188 
4189 static const struct seq_operations dev_seq_ops = {
4190 	.start = dev_seq_start,
4191 	.next  = dev_seq_next,
4192 	.stop  = dev_seq_stop,
4193 	.show  = dev_seq_show,
4194 };
4195 
4196 static int dev_seq_open(struct inode *inode, struct file *file)
4197 {
4198 	return seq_open_net(inode, file, &dev_seq_ops,
4199 			    sizeof(struct seq_net_private));
4200 }
4201 
4202 static const struct file_operations dev_seq_fops = {
4203 	.owner	 = THIS_MODULE,
4204 	.open    = dev_seq_open,
4205 	.read    = seq_read,
4206 	.llseek  = seq_lseek,
4207 	.release = seq_release_net,
4208 };
4209 
4210 static const struct seq_operations softnet_seq_ops = {
4211 	.start = softnet_seq_start,
4212 	.next  = softnet_seq_next,
4213 	.stop  = softnet_seq_stop,
4214 	.show  = softnet_seq_show,
4215 };
4216 
4217 static int softnet_seq_open(struct inode *inode, struct file *file)
4218 {
4219 	return seq_open(file, &softnet_seq_ops);
4220 }
4221 
4222 static const struct file_operations softnet_seq_fops = {
4223 	.owner	 = THIS_MODULE,
4224 	.open    = softnet_seq_open,
4225 	.read    = seq_read,
4226 	.llseek  = seq_lseek,
4227 	.release = seq_release,
4228 };
4229 
4230 static void *ptype_get_idx(loff_t pos)
4231 {
4232 	struct packet_type *pt = NULL;
4233 	loff_t i = 0;
4234 	int t;
4235 
4236 	list_for_each_entry_rcu(pt, &ptype_all, list) {
4237 		if (i == pos)
4238 			return pt;
4239 		++i;
4240 	}
4241 
4242 	for (t = 0; t < PTYPE_HASH_SIZE; t++) {
4243 		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
4244 			if (i == pos)
4245 				return pt;
4246 			++i;
4247 		}
4248 	}
4249 	return NULL;
4250 }
4251 
4252 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
4253 	__acquires(RCU)
4254 {
4255 	rcu_read_lock();
4256 	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
4257 }
4258 
4259 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4260 {
4261 	struct packet_type *pt;
4262 	struct list_head *nxt;
4263 	int hash;
4264 
4265 	++*pos;
4266 	if (v == SEQ_START_TOKEN)
4267 		return ptype_get_idx(0);
4268 
4269 	pt = v;
4270 	nxt = pt->list.next;
4271 	if (pt->type == htons(ETH_P_ALL)) {
4272 		if (nxt != &ptype_all)
4273 			goto found;
4274 		hash = 0;
4275 		nxt = ptype_base[0].next;
4276 	} else
4277 		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
4278 
4279 	while (nxt == &ptype_base[hash]) {
4280 		if (++hash >= PTYPE_HASH_SIZE)
4281 			return NULL;
4282 		nxt = ptype_base[hash].next;
4283 	}
4284 found:
4285 	return list_entry(nxt, struct packet_type, list);
4286 }
4287 
4288 static void ptype_seq_stop(struct seq_file *seq, void *v)
4289 	__releases(RCU)
4290 {
4291 	rcu_read_unlock();
4292 }
4293 
4294 static int ptype_seq_show(struct seq_file *seq, void *v)
4295 {
4296 	struct packet_type *pt = v;
4297 
4298 	if (v == SEQ_START_TOKEN)
4299 		seq_puts(seq, "Type Device      Function\n");
4300 	else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
4301 		if (pt->type == htons(ETH_P_ALL))
4302 			seq_puts(seq, "ALL ");
4303 		else
4304 			seq_printf(seq, "%04x", ntohs(pt->type));
4305 
4306 		seq_printf(seq, " %-8s %pF\n",
4307 			   pt->dev ? pt->dev->name : "", pt->func);
4308 	}
4309 
4310 	return 0;
4311 }
4312 
4313 static const struct seq_operations ptype_seq_ops = {
4314 	.start = ptype_seq_start,
4315 	.next  = ptype_seq_next,
4316 	.stop  = ptype_seq_stop,
4317 	.show  = ptype_seq_show,
4318 };
4319 
4320 static int ptype_seq_open(struct inode *inode, struct file *file)
4321 {
4322 	return seq_open_net(inode, file, &ptype_seq_ops,
4323 			sizeof(struct seq_net_private));
4324 }
4325 
4326 static const struct file_operations ptype_seq_fops = {
4327 	.owner	 = THIS_MODULE,
4328 	.open    = ptype_seq_open,
4329 	.read    = seq_read,
4330 	.llseek  = seq_lseek,
4331 	.release = seq_release_net,
4332 };
4333 
4334 
4335 static int __net_init dev_proc_net_init(struct net *net)
4336 {
4337 	int rc = -ENOMEM;
4338 
4339 	if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
4340 		goto out;
4341 	if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
4342 		goto out_dev;
4343 	if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
4344 		goto out_softnet;
4345 
4346 	if (wext_proc_init(net))
4347 		goto out_ptype;
4348 	rc = 0;
4349 out:
4350 	return rc;
4351 out_ptype:
4352 	proc_net_remove(net, "ptype");
4353 out_softnet:
4354 	proc_net_remove(net, "softnet_stat");
4355 out_dev:
4356 	proc_net_remove(net, "dev");
4357 	goto out;
4358 }
4359 
4360 static void __net_exit dev_proc_net_exit(struct net *net)
4361 {
4362 	wext_proc_exit(net);
4363 
4364 	proc_net_remove(net, "ptype");
4365 	proc_net_remove(net, "softnet_stat");
4366 	proc_net_remove(net, "dev");
4367 }
4368 
4369 static struct pernet_operations __net_initdata dev_proc_ops = {
4370 	.init = dev_proc_net_init,
4371 	.exit = dev_proc_net_exit,
4372 };
4373 
4374 static int __init dev_proc_init(void)
4375 {
4376 	return register_pernet_subsys(&dev_proc_ops);
4377 }
4378 #else
4379 #define dev_proc_init() 0
4380 #endif	/* CONFIG_PROC_FS */
4381 
4382 
4383 /**
4384  *	netdev_set_master	-	set up master pointer
4385  *	@slave: slave device
4386  *	@master: new master device
4387  *
4388  *	Changes the master device of the slave. Pass %NULL to break the
4389  *	bonding. The caller must hold the RTNL semaphore. On a failure
4390  *	a negative errno code is returned. On success the reference counts
4391  *	are adjusted and the function returns zero.
4392  */
4393 int netdev_set_master(struct net_device *slave, struct net_device *master)
4394 {
4395 	struct net_device *old = slave->master;
4396 
4397 	ASSERT_RTNL();
4398 
4399 	if (master) {
4400 		if (old)
4401 			return -EBUSY;
4402 		dev_hold(master);
4403 	}
4404 
4405 	slave->master = master;
4406 
4407 	if (old)
4408 		dev_put(old);
4409 	return 0;
4410 }
4411 EXPORT_SYMBOL(netdev_set_master);
4412 
4413 /**
4414  *	netdev_set_bond_master	-	set up bonding master/slave pair
4415  *	@slave: slave device
4416  *	@master: new master device
4417  *
4418  *	Changes the master device of the slave. Pass %NULL to break the
4419  *	bonding. The caller must hold the RTNL semaphore. On a failure
4420  *	a negative errno code is returned. On success %RTM_NEWLINK is sent
4421  *	to the routing socket and the function returns zero.
4422  */
4423 int netdev_set_bond_master(struct net_device *slave, struct net_device *master)
4424 {
4425 	int err;
4426 
4427 	ASSERT_RTNL();
4428 
4429 	err = netdev_set_master(slave, master);
4430 	if (err)
4431 		return err;
4432 	if (master)
4433 		slave->flags |= IFF_SLAVE;
4434 	else
4435 		slave->flags &= ~IFF_SLAVE;
4436 
4437 	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
4438 	return 0;
4439 }
4440 EXPORT_SYMBOL(netdev_set_bond_master);
4441 
4442 static void dev_change_rx_flags(struct net_device *dev, int flags)
4443 {
4444 	const struct net_device_ops *ops = dev->netdev_ops;
4445 
4446 	if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
4447 		ops->ndo_change_rx_flags(dev, flags);
4448 }
4449 
4450 static int __dev_set_promiscuity(struct net_device *dev, int inc)
4451 {
4452 	unsigned int old_flags = dev->flags;
4453 	uid_t uid;
4454 	gid_t gid;
4455 
4456 	ASSERT_RTNL();
4457 
4458 	dev->flags |= IFF_PROMISC;
4459 	dev->promiscuity += inc;
4460 	if (dev->promiscuity == 0) {
4461 		/*
4462 		 * Avoid overflow.
4463 		 * If inc causes overflow, untouch promisc and return error.
4464 		 */
4465 		if (inc < 0)
4466 			dev->flags &= ~IFF_PROMISC;
4467 		else {
4468 			dev->promiscuity -= inc;
4469 			pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n",
4470 				dev->name);
4471 			return -EOVERFLOW;
4472 		}
4473 	}
4474 	if (dev->flags != old_flags) {
4475 		pr_info("device %s %s promiscuous mode\n",
4476 			dev->name,
4477 			dev->flags & IFF_PROMISC ? "entered" : "left");
4478 		if (audit_enabled) {
4479 			current_uid_gid(&uid, &gid);
4480 			audit_log(current->audit_context, GFP_ATOMIC,
4481 				AUDIT_ANOM_PROMISCUOUS,
4482 				"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
4483 				dev->name, (dev->flags & IFF_PROMISC),
4484 				(old_flags & IFF_PROMISC),
4485 				audit_get_loginuid(current),
4486 				uid, gid,
4487 				audit_get_sessionid(current));
4488 		}
4489 
4490 		dev_change_rx_flags(dev, IFF_PROMISC);
4491 	}
4492 	return 0;
4493 }
4494 
4495 /**
4496  *	dev_set_promiscuity	- update promiscuity count on a device
4497  *	@dev: device
4498  *	@inc: modifier
4499  *
4500  *	Add or remove promiscuity from a device. While the count in the device
4501  *	remains above zero the interface remains promiscuous. Once it hits zero
4502  *	the device reverts back to normal filtering operation. A negative inc
4503  *	value is used to drop promiscuity on the device.
4504  *	Return 0 if successful or a negative errno code on error.
4505  */
4506 int dev_set_promiscuity(struct net_device *dev, int inc)
4507 {
4508 	unsigned int old_flags = dev->flags;
4509 	int err;
4510 
4511 	err = __dev_set_promiscuity(dev, inc);
4512 	if (err < 0)
4513 		return err;
4514 	if (dev->flags != old_flags)
4515 		dev_set_rx_mode(dev);
4516 	return err;
4517 }
4518 EXPORT_SYMBOL(dev_set_promiscuity);
4519 
4520 /**
4521  *	dev_set_allmulti	- update allmulti count on a device
4522  *	@dev: device
4523  *	@inc: modifier
4524  *
4525  *	Add or remove reception of all multicast frames to a device. While the
4526  *	count in the device remains above zero the interface remains listening
4527  *	to all interfaces. Once it hits zero the device reverts back to normal
4528  *	filtering operation. A negative @inc value is used to drop the counter
4529  *	when releasing a resource needing all multicasts.
4530  *	Return 0 if successful or a negative errno code on error.
4531  */
4532 
4533 int dev_set_allmulti(struct net_device *dev, int inc)
4534 {
4535 	unsigned int old_flags = dev->flags;
4536 
4537 	ASSERT_RTNL();
4538 
4539 	dev->flags |= IFF_ALLMULTI;
4540 	dev->allmulti += inc;
4541 	if (dev->allmulti == 0) {
4542 		/*
4543 		 * Avoid overflow.
4544 		 * If inc causes overflow, untouch allmulti and return error.
4545 		 */
4546 		if (inc < 0)
4547 			dev->flags &= ~IFF_ALLMULTI;
4548 		else {
4549 			dev->allmulti -= inc;
4550 			pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n",
4551 				dev->name);
4552 			return -EOVERFLOW;
4553 		}
4554 	}
4555 	if (dev->flags ^ old_flags) {
4556 		dev_change_rx_flags(dev, IFF_ALLMULTI);
4557 		dev_set_rx_mode(dev);
4558 	}
4559 	return 0;
4560 }
4561 EXPORT_SYMBOL(dev_set_allmulti);
4562 
4563 /*
4564  *	Upload unicast and multicast address lists to device and
4565  *	configure RX filtering. When the device doesn't support unicast
4566  *	filtering it is put in promiscuous mode while unicast addresses
4567  *	are present.
4568  */
4569 void __dev_set_rx_mode(struct net_device *dev)
4570 {
4571 	const struct net_device_ops *ops = dev->netdev_ops;
4572 
4573 	/* dev_open will call this function so the list will stay sane. */
4574 	if (!(dev->flags&IFF_UP))
4575 		return;
4576 
4577 	if (!netif_device_present(dev))
4578 		return;
4579 
4580 	if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
4581 		/* Unicast addresses changes may only happen under the rtnl,
4582 		 * therefore calling __dev_set_promiscuity here is safe.
4583 		 */
4584 		if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
4585 			__dev_set_promiscuity(dev, 1);
4586 			dev->uc_promisc = true;
4587 		} else if (netdev_uc_empty(dev) && dev->uc_promisc) {
4588 			__dev_set_promiscuity(dev, -1);
4589 			dev->uc_promisc = false;
4590 		}
4591 	}
4592 
4593 	if (ops->ndo_set_rx_mode)
4594 		ops->ndo_set_rx_mode(dev);
4595 }
4596 
4597 void dev_set_rx_mode(struct net_device *dev)
4598 {
4599 	netif_addr_lock_bh(dev);
4600 	__dev_set_rx_mode(dev);
4601 	netif_addr_unlock_bh(dev);
4602 }
4603 
4604 /**
4605  *	dev_get_flags - get flags reported to userspace
4606  *	@dev: device
4607  *
4608  *	Get the combination of flag bits exported through APIs to userspace.
4609  */
4610 unsigned int dev_get_flags(const struct net_device *dev)
4611 {
4612 	unsigned int flags;
4613 
4614 	flags = (dev->flags & ~(IFF_PROMISC |
4615 				IFF_ALLMULTI |
4616 				IFF_RUNNING |
4617 				IFF_LOWER_UP |
4618 				IFF_DORMANT)) |
4619 		(dev->gflags & (IFF_PROMISC |
4620 				IFF_ALLMULTI));
4621 
4622 	if (netif_running(dev)) {
4623 		if (netif_oper_up(dev))
4624 			flags |= IFF_RUNNING;
4625 		if (netif_carrier_ok(dev))
4626 			flags |= IFF_LOWER_UP;
4627 		if (netif_dormant(dev))
4628 			flags |= IFF_DORMANT;
4629 	}
4630 
4631 	return flags;
4632 }
4633 EXPORT_SYMBOL(dev_get_flags);
4634 
4635 int __dev_change_flags(struct net_device *dev, unsigned int flags)
4636 {
4637 	unsigned int old_flags = dev->flags;
4638 	int ret;
4639 
4640 	ASSERT_RTNL();
4641 
4642 	/*
4643 	 *	Set the flags on our device.
4644 	 */
4645 
4646 	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
4647 			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
4648 			       IFF_AUTOMEDIA)) |
4649 		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
4650 				    IFF_ALLMULTI));
4651 
4652 	/*
4653 	 *	Load in the correct multicast list now the flags have changed.
4654 	 */
4655 
4656 	if ((old_flags ^ flags) & IFF_MULTICAST)
4657 		dev_change_rx_flags(dev, IFF_MULTICAST);
4658 
4659 	dev_set_rx_mode(dev);
4660 
4661 	/*
4662 	 *	Have we downed the interface. We handle IFF_UP ourselves
4663 	 *	according to user attempts to set it, rather than blindly
4664 	 *	setting it.
4665 	 */
4666 
4667 	ret = 0;
4668 	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
4669 		ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
4670 
4671 		if (!ret)
4672 			dev_set_rx_mode(dev);
4673 	}
4674 
4675 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
4676 		int inc = (flags & IFF_PROMISC) ? 1 : -1;
4677 
4678 		dev->gflags ^= IFF_PROMISC;
4679 		dev_set_promiscuity(dev, inc);
4680 	}
4681 
4682 	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
4683 	   is important. Some (broken) drivers set IFF_PROMISC, when
4684 	   IFF_ALLMULTI is requested not asking us and not reporting.
4685 	 */
4686 	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
4687 		int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
4688 
4689 		dev->gflags ^= IFF_ALLMULTI;
4690 		dev_set_allmulti(dev, inc);
4691 	}
4692 
4693 	return ret;
4694 }
4695 
4696 void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
4697 {
4698 	unsigned int changes = dev->flags ^ old_flags;
4699 
4700 	if (changes & IFF_UP) {
4701 		if (dev->flags & IFF_UP)
4702 			call_netdevice_notifiers(NETDEV_UP, dev);
4703 		else
4704 			call_netdevice_notifiers(NETDEV_DOWN, dev);
4705 	}
4706 
4707 	if (dev->flags & IFF_UP &&
4708 	    (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE)))
4709 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
4710 }
4711 
4712 /**
4713  *	dev_change_flags - change device settings
4714  *	@dev: device
4715  *	@flags: device state flags
4716  *
4717  *	Change settings on device based state flags. The flags are
4718  *	in the userspace exported format.
4719  */
4720 int dev_change_flags(struct net_device *dev, unsigned int flags)
4721 {
4722 	int ret;
4723 	unsigned int changes, old_flags = dev->flags;
4724 
4725 	ret = __dev_change_flags(dev, flags);
4726 	if (ret < 0)
4727 		return ret;
4728 
4729 	changes = old_flags ^ dev->flags;
4730 	if (changes)
4731 		rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
4732 
4733 	__dev_notify_flags(dev, old_flags);
4734 	return ret;
4735 }
4736 EXPORT_SYMBOL(dev_change_flags);
4737 
4738 /**
4739  *	dev_set_mtu - Change maximum transfer unit
4740  *	@dev: device
4741  *	@new_mtu: new transfer unit
4742  *
4743  *	Change the maximum transfer size of the network device.
4744  */
4745 int dev_set_mtu(struct net_device *dev, int new_mtu)
4746 {
4747 	const struct net_device_ops *ops = dev->netdev_ops;
4748 	int err;
4749 
4750 	if (new_mtu == dev->mtu)
4751 		return 0;
4752 
4753 	/*	MTU must be positive.	 */
4754 	if (new_mtu < 0)
4755 		return -EINVAL;
4756 
4757 	if (!netif_device_present(dev))
4758 		return -ENODEV;
4759 
4760 	err = 0;
4761 	if (ops->ndo_change_mtu)
4762 		err = ops->ndo_change_mtu(dev, new_mtu);
4763 	else
4764 		dev->mtu = new_mtu;
4765 
4766 	if (!err && dev->flags & IFF_UP)
4767 		call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
4768 	return err;
4769 }
4770 EXPORT_SYMBOL(dev_set_mtu);
4771 
4772 /**
4773  *	dev_set_group - Change group this device belongs to
4774  *	@dev: device
4775  *	@new_group: group this device should belong to
4776  */
4777 void dev_set_group(struct net_device *dev, int new_group)
4778 {
4779 	dev->group = new_group;
4780 }
4781 EXPORT_SYMBOL(dev_set_group);
4782 
4783 /**
4784  *	dev_set_mac_address - Change Media Access Control Address
4785  *	@dev: device
4786  *	@sa: new address
4787  *
4788  *	Change the hardware (MAC) address of the device
4789  */
4790 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
4791 {
4792 	const struct net_device_ops *ops = dev->netdev_ops;
4793 	int err;
4794 
4795 	if (!ops->ndo_set_mac_address)
4796 		return -EOPNOTSUPP;
4797 	if (sa->sa_family != dev->type)
4798 		return -EINVAL;
4799 	if (!netif_device_present(dev))
4800 		return -ENODEV;
4801 	err = ops->ndo_set_mac_address(dev, sa);
4802 	if (!err)
4803 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4804 	return err;
4805 }
4806 EXPORT_SYMBOL(dev_set_mac_address);
4807 
4808 /*
4809  *	Perform the SIOCxIFxxx calls, inside rcu_read_lock()
4810  */
4811 static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
4812 {
4813 	int err;
4814 	struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
4815 
4816 	if (!dev)
4817 		return -ENODEV;
4818 
4819 	switch (cmd) {
4820 	case SIOCGIFFLAGS:	/* Get interface flags */
4821 		ifr->ifr_flags = (short) dev_get_flags(dev);
4822 		return 0;
4823 
4824 	case SIOCGIFMETRIC:	/* Get the metric on the interface
4825 				   (currently unused) */
4826 		ifr->ifr_metric = 0;
4827 		return 0;
4828 
4829 	case SIOCGIFMTU:	/* Get the MTU of a device */
4830 		ifr->ifr_mtu = dev->mtu;
4831 		return 0;
4832 
4833 	case SIOCGIFHWADDR:
4834 		if (!dev->addr_len)
4835 			memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
4836 		else
4837 			memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
4838 			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4839 		ifr->ifr_hwaddr.sa_family = dev->type;
4840 		return 0;
4841 
4842 	case SIOCGIFSLAVE:
4843 		err = -EINVAL;
4844 		break;
4845 
4846 	case SIOCGIFMAP:
4847 		ifr->ifr_map.mem_start = dev->mem_start;
4848 		ifr->ifr_map.mem_end   = dev->mem_end;
4849 		ifr->ifr_map.base_addr = dev->base_addr;
4850 		ifr->ifr_map.irq       = dev->irq;
4851 		ifr->ifr_map.dma       = dev->dma;
4852 		ifr->ifr_map.port      = dev->if_port;
4853 		return 0;
4854 
4855 	case SIOCGIFINDEX:
4856 		ifr->ifr_ifindex = dev->ifindex;
4857 		return 0;
4858 
4859 	case SIOCGIFTXQLEN:
4860 		ifr->ifr_qlen = dev->tx_queue_len;
4861 		return 0;
4862 
4863 	default:
4864 		/* dev_ioctl() should ensure this case
4865 		 * is never reached
4866 		 */
4867 		WARN_ON(1);
4868 		err = -ENOTTY;
4869 		break;
4870 
4871 	}
4872 	return err;
4873 }
4874 
4875 /*
4876  *	Perform the SIOCxIFxxx calls, inside rtnl_lock()
4877  */
4878 static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4879 {
4880 	int err;
4881 	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
4882 	const struct net_device_ops *ops;
4883 
4884 	if (!dev)
4885 		return -ENODEV;
4886 
4887 	ops = dev->netdev_ops;
4888 
4889 	switch (cmd) {
4890 	case SIOCSIFFLAGS:	/* Set interface flags */
4891 		return dev_change_flags(dev, ifr->ifr_flags);
4892 
4893 	case SIOCSIFMETRIC:	/* Set the metric on the interface
4894 				   (currently unused) */
4895 		return -EOPNOTSUPP;
4896 
4897 	case SIOCSIFMTU:	/* Set the MTU of a device */
4898 		return dev_set_mtu(dev, ifr->ifr_mtu);
4899 
4900 	case SIOCSIFHWADDR:
4901 		return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
4902 
4903 	case SIOCSIFHWBROADCAST:
4904 		if (ifr->ifr_hwaddr.sa_family != dev->type)
4905 			return -EINVAL;
4906 		memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
4907 		       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4908 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4909 		return 0;
4910 
4911 	case SIOCSIFMAP:
4912 		if (ops->ndo_set_config) {
4913 			if (!netif_device_present(dev))
4914 				return -ENODEV;
4915 			return ops->ndo_set_config(dev, &ifr->ifr_map);
4916 		}
4917 		return -EOPNOTSUPP;
4918 
4919 	case SIOCADDMULTI:
4920 		if (!ops->ndo_set_rx_mode ||
4921 		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4922 			return -EINVAL;
4923 		if (!netif_device_present(dev))
4924 			return -ENODEV;
4925 		return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
4926 
4927 	case SIOCDELMULTI:
4928 		if (!ops->ndo_set_rx_mode ||
4929 		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4930 			return -EINVAL;
4931 		if (!netif_device_present(dev))
4932 			return -ENODEV;
4933 		return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
4934 
4935 	case SIOCSIFTXQLEN:
4936 		if (ifr->ifr_qlen < 0)
4937 			return -EINVAL;
4938 		dev->tx_queue_len = ifr->ifr_qlen;
4939 		return 0;
4940 
4941 	case SIOCSIFNAME:
4942 		ifr->ifr_newname[IFNAMSIZ-1] = '\0';
4943 		return dev_change_name(dev, ifr->ifr_newname);
4944 
4945 	case SIOCSHWTSTAMP:
4946 		err = net_hwtstamp_validate(ifr);
4947 		if (err)
4948 			return err;
4949 		/* fall through */
4950 
4951 	/*
4952 	 *	Unknown or private ioctl
4953 	 */
4954 	default:
4955 		if ((cmd >= SIOCDEVPRIVATE &&
4956 		    cmd <= SIOCDEVPRIVATE + 15) ||
4957 		    cmd == SIOCBONDENSLAVE ||
4958 		    cmd == SIOCBONDRELEASE ||
4959 		    cmd == SIOCBONDSETHWADDR ||
4960 		    cmd == SIOCBONDSLAVEINFOQUERY ||
4961 		    cmd == SIOCBONDINFOQUERY ||
4962 		    cmd == SIOCBONDCHANGEACTIVE ||
4963 		    cmd == SIOCGMIIPHY ||
4964 		    cmd == SIOCGMIIREG ||
4965 		    cmd == SIOCSMIIREG ||
4966 		    cmd == SIOCBRADDIF ||
4967 		    cmd == SIOCBRDELIF ||
4968 		    cmd == SIOCSHWTSTAMP ||
4969 		    cmd == SIOCWANDEV) {
4970 			err = -EOPNOTSUPP;
4971 			if (ops->ndo_do_ioctl) {
4972 				if (netif_device_present(dev))
4973 					err = ops->ndo_do_ioctl(dev, ifr, cmd);
4974 				else
4975 					err = -ENODEV;
4976 			}
4977 		} else
4978 			err = -EINVAL;
4979 
4980 	}
4981 	return err;
4982 }
4983 
4984 /*
4985  *	This function handles all "interface"-type I/O control requests. The actual
4986  *	'doing' part of this is dev_ifsioc above.
4987  */
4988 
4989 /**
4990  *	dev_ioctl	-	network device ioctl
4991  *	@net: the applicable net namespace
4992  *	@cmd: command to issue
4993  *	@arg: pointer to a struct ifreq in user space
4994  *
4995  *	Issue ioctl functions to devices. This is normally called by the
4996  *	user space syscall interfaces but can sometimes be useful for
4997  *	other purposes. The return value is the return from the syscall if
4998  *	positive or a negative errno code on error.
4999  */
5000 
5001 int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
5002 {
5003 	struct ifreq ifr;
5004 	int ret;
5005 	char *colon;
5006 
5007 	/* One special case: SIOCGIFCONF takes ifconf argument
5008 	   and requires shared lock, because it sleeps writing
5009 	   to user space.
5010 	 */
5011 
5012 	if (cmd == SIOCGIFCONF) {
5013 		rtnl_lock();
5014 		ret = dev_ifconf(net, (char __user *) arg);
5015 		rtnl_unlock();
5016 		return ret;
5017 	}
5018 	if (cmd == SIOCGIFNAME)
5019 		return dev_ifname(net, (struct ifreq __user *)arg);
5020 
5021 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
5022 		return -EFAULT;
5023 
5024 	ifr.ifr_name[IFNAMSIZ-1] = 0;
5025 
5026 	colon = strchr(ifr.ifr_name, ':');
5027 	if (colon)
5028 		*colon = 0;
5029 
5030 	/*
5031 	 *	See which interface the caller is talking about.
5032 	 */
5033 
5034 	switch (cmd) {
5035 	/*
5036 	 *	These ioctl calls:
5037 	 *	- can be done by all.
5038 	 *	- atomic and do not require locking.
5039 	 *	- return a value
5040 	 */
5041 	case SIOCGIFFLAGS:
5042 	case SIOCGIFMETRIC:
5043 	case SIOCGIFMTU:
5044 	case SIOCGIFHWADDR:
5045 	case SIOCGIFSLAVE:
5046 	case SIOCGIFMAP:
5047 	case SIOCGIFINDEX:
5048 	case SIOCGIFTXQLEN:
5049 		dev_load(net, ifr.ifr_name);
5050 		rcu_read_lock();
5051 		ret = dev_ifsioc_locked(net, &ifr, cmd);
5052 		rcu_read_unlock();
5053 		if (!ret) {
5054 			if (colon)
5055 				*colon = ':';
5056 			if (copy_to_user(arg, &ifr,
5057 					 sizeof(struct ifreq)))
5058 				ret = -EFAULT;
5059 		}
5060 		return ret;
5061 
5062 	case SIOCETHTOOL:
5063 		dev_load(net, ifr.ifr_name);
5064 		rtnl_lock();
5065 		ret = dev_ethtool(net, &ifr);
5066 		rtnl_unlock();
5067 		if (!ret) {
5068 			if (colon)
5069 				*colon = ':';
5070 			if (copy_to_user(arg, &ifr,
5071 					 sizeof(struct ifreq)))
5072 				ret = -EFAULT;
5073 		}
5074 		return ret;
5075 
5076 	/*
5077 	 *	These ioctl calls:
5078 	 *	- require superuser power.
5079 	 *	- require strict serialization.
5080 	 *	- return a value
5081 	 */
5082 	case SIOCGMIIPHY:
5083 	case SIOCGMIIREG:
5084 	case SIOCSIFNAME:
5085 		if (!capable(CAP_NET_ADMIN))
5086 			return -EPERM;
5087 		dev_load(net, ifr.ifr_name);
5088 		rtnl_lock();
5089 		ret = dev_ifsioc(net, &ifr, cmd);
5090 		rtnl_unlock();
5091 		if (!ret) {
5092 			if (colon)
5093 				*colon = ':';
5094 			if (copy_to_user(arg, &ifr,
5095 					 sizeof(struct ifreq)))
5096 				ret = -EFAULT;
5097 		}
5098 		return ret;
5099 
5100 	/*
5101 	 *	These ioctl calls:
5102 	 *	- require superuser power.
5103 	 *	- require strict serialization.
5104 	 *	- do not return a value
5105 	 */
5106 	case SIOCSIFFLAGS:
5107 	case SIOCSIFMETRIC:
5108 	case SIOCSIFMTU:
5109 	case SIOCSIFMAP:
5110 	case SIOCSIFHWADDR:
5111 	case SIOCSIFSLAVE:
5112 	case SIOCADDMULTI:
5113 	case SIOCDELMULTI:
5114 	case SIOCSIFHWBROADCAST:
5115 	case SIOCSIFTXQLEN:
5116 	case SIOCSMIIREG:
5117 	case SIOCBONDENSLAVE:
5118 	case SIOCBONDRELEASE:
5119 	case SIOCBONDSETHWADDR:
5120 	case SIOCBONDCHANGEACTIVE:
5121 	case SIOCBRADDIF:
5122 	case SIOCBRDELIF:
5123 	case SIOCSHWTSTAMP:
5124 		if (!capable(CAP_NET_ADMIN))
5125 			return -EPERM;
5126 		/* fall through */
5127 	case SIOCBONDSLAVEINFOQUERY:
5128 	case SIOCBONDINFOQUERY:
5129 		dev_load(net, ifr.ifr_name);
5130 		rtnl_lock();
5131 		ret = dev_ifsioc(net, &ifr, cmd);
5132 		rtnl_unlock();
5133 		return ret;
5134 
5135 	case SIOCGIFMEM:
5136 		/* Get the per device memory space. We can add this but
5137 		 * currently do not support it */
5138 	case SIOCSIFMEM:
5139 		/* Set the per device memory buffer space.
5140 		 * Not applicable in our case */
5141 	case SIOCSIFLINK:
5142 		return -ENOTTY;
5143 
5144 	/*
5145 	 *	Unknown or private ioctl.
5146 	 */
5147 	default:
5148 		if (cmd == SIOCWANDEV ||
5149 		    (cmd >= SIOCDEVPRIVATE &&
5150 		     cmd <= SIOCDEVPRIVATE + 15)) {
5151 			dev_load(net, ifr.ifr_name);
5152 			rtnl_lock();
5153 			ret = dev_ifsioc(net, &ifr, cmd);
5154 			rtnl_unlock();
5155 			if (!ret && copy_to_user(arg, &ifr,
5156 						 sizeof(struct ifreq)))
5157 				ret = -EFAULT;
5158 			return ret;
5159 		}
5160 		/* Take care of Wireless Extensions */
5161 		if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
5162 			return wext_handle_ioctl(net, &ifr, cmd, arg);
5163 		return -ENOTTY;
5164 	}
5165 }
5166 
5167 
5168 /**
5169  *	dev_new_index	-	allocate an ifindex
5170  *	@net: the applicable net namespace
5171  *
5172  *	Returns a suitable unique value for a new device interface
5173  *	number.  The caller must hold the rtnl semaphore or the
5174  *	dev_base_lock to be sure it remains unique.
5175  */
5176 static int dev_new_index(struct net *net)
5177 {
5178 	static int ifindex;
5179 	for (;;) {
5180 		if (++ifindex <= 0)
5181 			ifindex = 1;
5182 		if (!__dev_get_by_index(net, ifindex))
5183 			return ifindex;
5184 	}
5185 }
5186 
5187 /* Delayed registration/unregisteration */
5188 static LIST_HEAD(net_todo_list);
5189 
5190 static void net_set_todo(struct net_device *dev)
5191 {
5192 	list_add_tail(&dev->todo_list, &net_todo_list);
5193 }
5194 
5195 static void rollback_registered_many(struct list_head *head)
5196 {
5197 	struct net_device *dev, *tmp;
5198 
5199 	BUG_ON(dev_boot_phase);
5200 	ASSERT_RTNL();
5201 
5202 	list_for_each_entry_safe(dev, tmp, head, unreg_list) {
5203 		/* Some devices call without registering
5204 		 * for initialization unwind. Remove those
5205 		 * devices and proceed with the remaining.
5206 		 */
5207 		if (dev->reg_state == NETREG_UNINITIALIZED) {
5208 			pr_debug("unregister_netdevice: device %s/%p never was registered\n",
5209 				 dev->name, dev);
5210 
5211 			WARN_ON(1);
5212 			list_del(&dev->unreg_list);
5213 			continue;
5214 		}
5215 		dev->dismantle = true;
5216 		BUG_ON(dev->reg_state != NETREG_REGISTERED);
5217 	}
5218 
5219 	/* If device is running, close it first. */
5220 	dev_close_many(head);
5221 
5222 	list_for_each_entry(dev, head, unreg_list) {
5223 		/* And unlink it from device chain. */
5224 		unlist_netdevice(dev);
5225 
5226 		dev->reg_state = NETREG_UNREGISTERING;
5227 	}
5228 
5229 	synchronize_net();
5230 
5231 	list_for_each_entry(dev, head, unreg_list) {
5232 		/* Shutdown queueing discipline. */
5233 		dev_shutdown(dev);
5234 
5235 
5236 		/* Notify protocols, that we are about to destroy
5237 		   this device. They should clean all the things.
5238 		*/
5239 		call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5240 
5241 		if (!dev->rtnl_link_ops ||
5242 		    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5243 			rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
5244 
5245 		/*
5246 		 *	Flush the unicast and multicast chains
5247 		 */
5248 		dev_uc_flush(dev);
5249 		dev_mc_flush(dev);
5250 
5251 		if (dev->netdev_ops->ndo_uninit)
5252 			dev->netdev_ops->ndo_uninit(dev);
5253 
5254 		/* Notifier chain MUST detach us from master device. */
5255 		WARN_ON(dev->master);
5256 
5257 		/* Remove entries from kobject tree */
5258 		netdev_unregister_kobject(dev);
5259 	}
5260 
5261 	/* Process any work delayed until the end of the batch */
5262 	dev = list_first_entry(head, struct net_device, unreg_list);
5263 	call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
5264 
5265 	synchronize_net();
5266 
5267 	list_for_each_entry(dev, head, unreg_list)
5268 		dev_put(dev);
5269 }
5270 
5271 static void rollback_registered(struct net_device *dev)
5272 {
5273 	LIST_HEAD(single);
5274 
5275 	list_add(&dev->unreg_list, &single);
5276 	rollback_registered_many(&single);
5277 	list_del(&single);
5278 }
5279 
5280 static netdev_features_t netdev_fix_features(struct net_device *dev,
5281 	netdev_features_t features)
5282 {
5283 	/* Fix illegal checksum combinations */
5284 	if ((features & NETIF_F_HW_CSUM) &&
5285 	    (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5286 		netdev_warn(dev, "mixed HW and IP checksum settings.\n");
5287 		features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
5288 	}
5289 
5290 	/* Fix illegal SG+CSUM combinations. */
5291 	if ((features & NETIF_F_SG) &&
5292 	    !(features & NETIF_F_ALL_CSUM)) {
5293 		netdev_dbg(dev,
5294 			"Dropping NETIF_F_SG since no checksum feature.\n");
5295 		features &= ~NETIF_F_SG;
5296 	}
5297 
5298 	/* TSO requires that SG is present as well. */
5299 	if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
5300 		netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
5301 		features &= ~NETIF_F_ALL_TSO;
5302 	}
5303 
5304 	/* TSO ECN requires that TSO is present as well. */
5305 	if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
5306 		features &= ~NETIF_F_TSO_ECN;
5307 
5308 	/* Software GSO depends on SG. */
5309 	if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
5310 		netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
5311 		features &= ~NETIF_F_GSO;
5312 	}
5313 
5314 	/* UFO needs SG and checksumming */
5315 	if (features & NETIF_F_UFO) {
5316 		/* maybe split UFO into V4 and V6? */
5317 		if (!((features & NETIF_F_GEN_CSUM) ||
5318 		    (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
5319 			    == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5320 			netdev_dbg(dev,
5321 				"Dropping NETIF_F_UFO since no checksum offload features.\n");
5322 			features &= ~NETIF_F_UFO;
5323 		}
5324 
5325 		if (!(features & NETIF_F_SG)) {
5326 			netdev_dbg(dev,
5327 				"Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
5328 			features &= ~NETIF_F_UFO;
5329 		}
5330 	}
5331 
5332 	return features;
5333 }
5334 
5335 int __netdev_update_features(struct net_device *dev)
5336 {
5337 	netdev_features_t features;
5338 	int err = 0;
5339 
5340 	ASSERT_RTNL();
5341 
5342 	features = netdev_get_wanted_features(dev);
5343 
5344 	if (dev->netdev_ops->ndo_fix_features)
5345 		features = dev->netdev_ops->ndo_fix_features(dev, features);
5346 
5347 	/* driver might be less strict about feature dependencies */
5348 	features = netdev_fix_features(dev, features);
5349 
5350 	if (dev->features == features)
5351 		return 0;
5352 
5353 	netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
5354 		&dev->features, &features);
5355 
5356 	if (dev->netdev_ops->ndo_set_features)
5357 		err = dev->netdev_ops->ndo_set_features(dev, features);
5358 
5359 	if (unlikely(err < 0)) {
5360 		netdev_err(dev,
5361 			"set_features() failed (%d); wanted %pNF, left %pNF\n",
5362 			err, &features, &dev->features);
5363 		return -1;
5364 	}
5365 
5366 	if (!err)
5367 		dev->features = features;
5368 
5369 	return 1;
5370 }
5371 
5372 /**
5373  *	netdev_update_features - recalculate device features
5374  *	@dev: the device to check
5375  *
5376  *	Recalculate dev->features set and send notifications if it
5377  *	has changed. Should be called after driver or hardware dependent
5378  *	conditions might have changed that influence the features.
5379  */
5380 void netdev_update_features(struct net_device *dev)
5381 {
5382 	if (__netdev_update_features(dev))
5383 		netdev_features_change(dev);
5384 }
5385 EXPORT_SYMBOL(netdev_update_features);
5386 
5387 /**
5388  *	netdev_change_features - recalculate device features
5389  *	@dev: the device to check
5390  *
5391  *	Recalculate dev->features set and send notifications even
5392  *	if they have not changed. Should be called instead of
5393  *	netdev_update_features() if also dev->vlan_features might
5394  *	have changed to allow the changes to be propagated to stacked
5395  *	VLAN devices.
5396  */
5397 void netdev_change_features(struct net_device *dev)
5398 {
5399 	__netdev_update_features(dev);
5400 	netdev_features_change(dev);
5401 }
5402 EXPORT_SYMBOL(netdev_change_features);
5403 
5404 /**
5405  *	netif_stacked_transfer_operstate -	transfer operstate
5406  *	@rootdev: the root or lower level device to transfer state from
5407  *	@dev: the device to transfer operstate to
5408  *
5409  *	Transfer operational state from root to device. This is normally
5410  *	called when a stacking relationship exists between the root
5411  *	device and the device(a leaf device).
5412  */
5413 void netif_stacked_transfer_operstate(const struct net_device *rootdev,
5414 					struct net_device *dev)
5415 {
5416 	if (rootdev->operstate == IF_OPER_DORMANT)
5417 		netif_dormant_on(dev);
5418 	else
5419 		netif_dormant_off(dev);
5420 
5421 	if (netif_carrier_ok(rootdev)) {
5422 		if (!netif_carrier_ok(dev))
5423 			netif_carrier_on(dev);
5424 	} else {
5425 		if (netif_carrier_ok(dev))
5426 			netif_carrier_off(dev);
5427 	}
5428 }
5429 EXPORT_SYMBOL(netif_stacked_transfer_operstate);
5430 
5431 #ifdef CONFIG_RPS
5432 static int netif_alloc_rx_queues(struct net_device *dev)
5433 {
5434 	unsigned int i, count = dev->num_rx_queues;
5435 	struct netdev_rx_queue *rx;
5436 
5437 	BUG_ON(count < 1);
5438 
5439 	rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
5440 	if (!rx) {
5441 		pr_err("netdev: Unable to allocate %u rx queues\n", count);
5442 		return -ENOMEM;
5443 	}
5444 	dev->_rx = rx;
5445 
5446 	for (i = 0; i < count; i++)
5447 		rx[i].dev = dev;
5448 	return 0;
5449 }
5450 #endif
5451 
5452 static void netdev_init_one_queue(struct net_device *dev,
5453 				  struct netdev_queue *queue, void *_unused)
5454 {
5455 	/* Initialize queue lock */
5456 	spin_lock_init(&queue->_xmit_lock);
5457 	netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
5458 	queue->xmit_lock_owner = -1;
5459 	netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
5460 	queue->dev = dev;
5461 #ifdef CONFIG_BQL
5462 	dql_init(&queue->dql, HZ);
5463 #endif
5464 }
5465 
5466 static int netif_alloc_netdev_queues(struct net_device *dev)
5467 {
5468 	unsigned int count = dev->num_tx_queues;
5469 	struct netdev_queue *tx;
5470 
5471 	BUG_ON(count < 1);
5472 
5473 	tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
5474 	if (!tx) {
5475 		pr_err("netdev: Unable to allocate %u tx queues\n", count);
5476 		return -ENOMEM;
5477 	}
5478 	dev->_tx = tx;
5479 
5480 	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
5481 	spin_lock_init(&dev->tx_global_lock);
5482 
5483 	return 0;
5484 }
5485 
5486 /**
5487  *	register_netdevice	- register a network device
5488  *	@dev: device to register
5489  *
5490  *	Take a completed network device structure and add it to the kernel
5491  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
5492  *	chain. 0 is returned on success. A negative errno code is returned
5493  *	on a failure to set up the device, or if the name is a duplicate.
5494  *
5495  *	Callers must hold the rtnl semaphore. You may want
5496  *	register_netdev() instead of this.
5497  *
5498  *	BUGS:
5499  *	The locking appears insufficient to guarantee two parallel registers
5500  *	will not get the same name.
5501  */
5502 
5503 int register_netdevice(struct net_device *dev)
5504 {
5505 	int ret;
5506 	struct net *net = dev_net(dev);
5507 
5508 	BUG_ON(dev_boot_phase);
5509 	ASSERT_RTNL();
5510 
5511 	might_sleep();
5512 
5513 	/* When net_device's are persistent, this will be fatal. */
5514 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
5515 	BUG_ON(!net);
5516 
5517 	spin_lock_init(&dev->addr_list_lock);
5518 	netdev_set_addr_lockdep_class(dev);
5519 
5520 	dev->iflink = -1;
5521 
5522 	ret = dev_get_valid_name(dev, dev->name);
5523 	if (ret < 0)
5524 		goto out;
5525 
5526 	/* Init, if this function is available */
5527 	if (dev->netdev_ops->ndo_init) {
5528 		ret = dev->netdev_ops->ndo_init(dev);
5529 		if (ret) {
5530 			if (ret > 0)
5531 				ret = -EIO;
5532 			goto out;
5533 		}
5534 	}
5535 
5536 	dev->ifindex = dev_new_index(net);
5537 	if (dev->iflink == -1)
5538 		dev->iflink = dev->ifindex;
5539 
5540 	/* Transfer changeable features to wanted_features and enable
5541 	 * software offloads (GSO and GRO).
5542 	 */
5543 	dev->hw_features |= NETIF_F_SOFT_FEATURES;
5544 	dev->features |= NETIF_F_SOFT_FEATURES;
5545 	dev->wanted_features = dev->features & dev->hw_features;
5546 
5547 	/* Turn on no cache copy if HW is doing checksum */
5548 	if (!(dev->flags & IFF_LOOPBACK)) {
5549 		dev->hw_features |= NETIF_F_NOCACHE_COPY;
5550 		if (dev->features & NETIF_F_ALL_CSUM) {
5551 			dev->wanted_features |= NETIF_F_NOCACHE_COPY;
5552 			dev->features |= NETIF_F_NOCACHE_COPY;
5553 		}
5554 	}
5555 
5556 	/* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
5557 	 */
5558 	dev->vlan_features |= NETIF_F_HIGHDMA;
5559 
5560 	ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5561 	ret = notifier_to_errno(ret);
5562 	if (ret)
5563 		goto err_uninit;
5564 
5565 	ret = netdev_register_kobject(dev);
5566 	if (ret)
5567 		goto err_uninit;
5568 	dev->reg_state = NETREG_REGISTERED;
5569 
5570 	__netdev_update_features(dev);
5571 
5572 	/*
5573 	 *	Default initial state at registry is that the
5574 	 *	device is present.
5575 	 */
5576 
5577 	set_bit(__LINK_STATE_PRESENT, &dev->state);
5578 
5579 	dev_init_scheduler(dev);
5580 	dev_hold(dev);
5581 	list_netdevice(dev);
5582 
5583 	/* Notify protocols, that a new device appeared. */
5584 	ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
5585 	ret = notifier_to_errno(ret);
5586 	if (ret) {
5587 		rollback_registered(dev);
5588 		dev->reg_state = NETREG_UNREGISTERED;
5589 	}
5590 	/*
5591 	 *	Prevent userspace races by waiting until the network
5592 	 *	device is fully setup before sending notifications.
5593 	 */
5594 	if (!dev->rtnl_link_ops ||
5595 	    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5596 		rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
5597 
5598 out:
5599 	return ret;
5600 
5601 err_uninit:
5602 	if (dev->netdev_ops->ndo_uninit)
5603 		dev->netdev_ops->ndo_uninit(dev);
5604 	goto out;
5605 }
5606 EXPORT_SYMBOL(register_netdevice);
5607 
5608 /**
5609  *	init_dummy_netdev	- init a dummy network device for NAPI
5610  *	@dev: device to init
5611  *
5612  *	This takes a network device structure and initialize the minimum
5613  *	amount of fields so it can be used to schedule NAPI polls without
5614  *	registering a full blown interface. This is to be used by drivers
5615  *	that need to tie several hardware interfaces to a single NAPI
5616  *	poll scheduler due to HW limitations.
5617  */
5618 int init_dummy_netdev(struct net_device *dev)
5619 {
5620 	/* Clear everything. Note we don't initialize spinlocks
5621 	 * are they aren't supposed to be taken by any of the
5622 	 * NAPI code and this dummy netdev is supposed to be
5623 	 * only ever used for NAPI polls
5624 	 */
5625 	memset(dev, 0, sizeof(struct net_device));
5626 
5627 	/* make sure we BUG if trying to hit standard
5628 	 * register/unregister code path
5629 	 */
5630 	dev->reg_state = NETREG_DUMMY;
5631 
5632 	/* NAPI wants this */
5633 	INIT_LIST_HEAD(&dev->napi_list);
5634 
5635 	/* a dummy interface is started by default */
5636 	set_bit(__LINK_STATE_PRESENT, &dev->state);
5637 	set_bit(__LINK_STATE_START, &dev->state);
5638 
5639 	/* Note : We dont allocate pcpu_refcnt for dummy devices,
5640 	 * because users of this 'device' dont need to change
5641 	 * its refcount.
5642 	 */
5643 
5644 	return 0;
5645 }
5646 EXPORT_SYMBOL_GPL(init_dummy_netdev);
5647 
5648 
5649 /**
5650  *	register_netdev	- register a network device
5651  *	@dev: device to register
5652  *
5653  *	Take a completed network device structure and add it to the kernel
5654  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
5655  *	chain. 0 is returned on success. A negative errno code is returned
5656  *	on a failure to set up the device, or if the name is a duplicate.
5657  *
5658  *	This is a wrapper around register_netdevice that takes the rtnl semaphore
5659  *	and expands the device name if you passed a format string to
5660  *	alloc_netdev.
5661  */
5662 int register_netdev(struct net_device *dev)
5663 {
5664 	int err;
5665 
5666 	rtnl_lock();
5667 	err = register_netdevice(dev);
5668 	rtnl_unlock();
5669 	return err;
5670 }
5671 EXPORT_SYMBOL(register_netdev);
5672 
5673 int netdev_refcnt_read(const struct net_device *dev)
5674 {
5675 	int i, refcnt = 0;
5676 
5677 	for_each_possible_cpu(i)
5678 		refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
5679 	return refcnt;
5680 }
5681 EXPORT_SYMBOL(netdev_refcnt_read);
5682 
5683 /**
5684  * netdev_wait_allrefs - wait until all references are gone.
5685  *
5686  * This is called when unregistering network devices.
5687  *
5688  * Any protocol or device that holds a reference should register
5689  * for netdevice notification, and cleanup and put back the
5690  * reference if they receive an UNREGISTER event.
5691  * We can get stuck here if buggy protocols don't correctly
5692  * call dev_put.
5693  */
5694 static void netdev_wait_allrefs(struct net_device *dev)
5695 {
5696 	unsigned long rebroadcast_time, warning_time;
5697 	int refcnt;
5698 
5699 	linkwatch_forget_dev(dev);
5700 
5701 	rebroadcast_time = warning_time = jiffies;
5702 	refcnt = netdev_refcnt_read(dev);
5703 
5704 	while (refcnt != 0) {
5705 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
5706 			rtnl_lock();
5707 
5708 			/* Rebroadcast unregister notification */
5709 			call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5710 			/* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users
5711 			 * should have already handle it the first time */
5712 
5713 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
5714 				     &dev->state)) {
5715 				/* We must not have linkwatch events
5716 				 * pending on unregister. If this
5717 				 * happens, we simply run the queue
5718 				 * unscheduled, resulting in a noop
5719 				 * for this device.
5720 				 */
5721 				linkwatch_run_queue();
5722 			}
5723 
5724 			__rtnl_unlock();
5725 
5726 			rebroadcast_time = jiffies;
5727 		}
5728 
5729 		msleep(250);
5730 
5731 		refcnt = netdev_refcnt_read(dev);
5732 
5733 		if (time_after(jiffies, warning_time + 10 * HZ)) {
5734 			pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
5735 				 dev->name, refcnt);
5736 			warning_time = jiffies;
5737 		}
5738 	}
5739 }
5740 
5741 /* The sequence is:
5742  *
5743  *	rtnl_lock();
5744  *	...
5745  *	register_netdevice(x1);
5746  *	register_netdevice(x2);
5747  *	...
5748  *	unregister_netdevice(y1);
5749  *	unregister_netdevice(y2);
5750  *      ...
5751  *	rtnl_unlock();
5752  *	free_netdev(y1);
5753  *	free_netdev(y2);
5754  *
5755  * We are invoked by rtnl_unlock().
5756  * This allows us to deal with problems:
5757  * 1) We can delete sysfs objects which invoke hotplug
5758  *    without deadlocking with linkwatch via keventd.
5759  * 2) Since we run with the RTNL semaphore not held, we can sleep
5760  *    safely in order to wait for the netdev refcnt to drop to zero.
5761  *
5762  * We must not return until all unregister events added during
5763  * the interval the lock was held have been completed.
5764  */
5765 void netdev_run_todo(void)
5766 {
5767 	struct list_head list;
5768 
5769 	/* Snapshot list, allow later requests */
5770 	list_replace_init(&net_todo_list, &list);
5771 
5772 	__rtnl_unlock();
5773 
5774 	/* Wait for rcu callbacks to finish before attempting to drain
5775 	 * the device list.  This usually avoids a 250ms wait.
5776 	 */
5777 	if (!list_empty(&list))
5778 		rcu_barrier();
5779 
5780 	while (!list_empty(&list)) {
5781 		struct net_device *dev
5782 			= list_first_entry(&list, struct net_device, todo_list);
5783 		list_del(&dev->todo_list);
5784 
5785 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
5786 			pr_err("network todo '%s' but state %d\n",
5787 			       dev->name, dev->reg_state);
5788 			dump_stack();
5789 			continue;
5790 		}
5791 
5792 		dev->reg_state = NETREG_UNREGISTERED;
5793 
5794 		on_each_cpu(flush_backlog, dev, 1);
5795 
5796 		netdev_wait_allrefs(dev);
5797 
5798 		/* paranoia */
5799 		BUG_ON(netdev_refcnt_read(dev));
5800 		WARN_ON(rcu_access_pointer(dev->ip_ptr));
5801 		WARN_ON(rcu_access_pointer(dev->ip6_ptr));
5802 		WARN_ON(dev->dn_ptr);
5803 
5804 		if (dev->destructor)
5805 			dev->destructor(dev);
5806 
5807 		/* Free network device */
5808 		kobject_put(&dev->dev.kobj);
5809 	}
5810 }
5811 
5812 /* Convert net_device_stats to rtnl_link_stats64.  They have the same
5813  * fields in the same order, with only the type differing.
5814  */
5815 void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
5816 			     const struct net_device_stats *netdev_stats)
5817 {
5818 #if BITS_PER_LONG == 64
5819 	BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats));
5820 	memcpy(stats64, netdev_stats, sizeof(*stats64));
5821 #else
5822 	size_t i, n = sizeof(*stats64) / sizeof(u64);
5823 	const unsigned long *src = (const unsigned long *)netdev_stats;
5824 	u64 *dst = (u64 *)stats64;
5825 
5826 	BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) !=
5827 		     sizeof(*stats64) / sizeof(u64));
5828 	for (i = 0; i < n; i++)
5829 		dst[i] = src[i];
5830 #endif
5831 }
5832 EXPORT_SYMBOL(netdev_stats_to_stats64);
5833 
5834 /**
5835  *	dev_get_stats	- get network device statistics
5836  *	@dev: device to get statistics from
5837  *	@storage: place to store stats
5838  *
5839  *	Get network statistics from device. Return @storage.
5840  *	The device driver may provide its own method by setting
5841  *	dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats;
5842  *	otherwise the internal statistics structure is used.
5843  */
5844 struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
5845 					struct rtnl_link_stats64 *storage)
5846 {
5847 	const struct net_device_ops *ops = dev->netdev_ops;
5848 
5849 	if (ops->ndo_get_stats64) {
5850 		memset(storage, 0, sizeof(*storage));
5851 		ops->ndo_get_stats64(dev, storage);
5852 	} else if (ops->ndo_get_stats) {
5853 		netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
5854 	} else {
5855 		netdev_stats_to_stats64(storage, &dev->stats);
5856 	}
5857 	storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
5858 	return storage;
5859 }
5860 EXPORT_SYMBOL(dev_get_stats);
5861 
5862 struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
5863 {
5864 	struct netdev_queue *queue = dev_ingress_queue(dev);
5865 
5866 #ifdef CONFIG_NET_CLS_ACT
5867 	if (queue)
5868 		return queue;
5869 	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
5870 	if (!queue)
5871 		return NULL;
5872 	netdev_init_one_queue(dev, queue, NULL);
5873 	queue->qdisc = &noop_qdisc;
5874 	queue->qdisc_sleeping = &noop_qdisc;
5875 	rcu_assign_pointer(dev->ingress_queue, queue);
5876 #endif
5877 	return queue;
5878 }
5879 
5880 /**
5881  *	alloc_netdev_mqs - allocate network device
5882  *	@sizeof_priv:	size of private data to allocate space for
5883  *	@name:		device name format string
5884  *	@setup:		callback to initialize device
5885  *	@txqs:		the number of TX subqueues to allocate
5886  *	@rxqs:		the number of RX subqueues to allocate
5887  *
5888  *	Allocates a struct net_device with private data area for driver use
5889  *	and performs basic initialization.  Also allocates subquue structs
5890  *	for each queue on the device.
5891  */
5892 struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
5893 		void (*setup)(struct net_device *),
5894 		unsigned int txqs, unsigned int rxqs)
5895 {
5896 	struct net_device *dev;
5897 	size_t alloc_size;
5898 	struct net_device *p;
5899 
5900 	BUG_ON(strlen(name) >= sizeof(dev->name));
5901 
5902 	if (txqs < 1) {
5903 		pr_err("alloc_netdev: Unable to allocate device with zero queues\n");
5904 		return NULL;
5905 	}
5906 
5907 #ifdef CONFIG_RPS
5908 	if (rxqs < 1) {
5909 		pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
5910 		return NULL;
5911 	}
5912 #endif
5913 
5914 	alloc_size = sizeof(struct net_device);
5915 	if (sizeof_priv) {
5916 		/* ensure 32-byte alignment of private area */
5917 		alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
5918 		alloc_size += sizeof_priv;
5919 	}
5920 	/* ensure 32-byte alignment of whole construct */
5921 	alloc_size += NETDEV_ALIGN - 1;
5922 
5923 	p = kzalloc(alloc_size, GFP_KERNEL);
5924 	if (!p) {
5925 		pr_err("alloc_netdev: Unable to allocate device\n");
5926 		return NULL;
5927 	}
5928 
5929 	dev = PTR_ALIGN(p, NETDEV_ALIGN);
5930 	dev->padded = (char *)dev - (char *)p;
5931 
5932 	dev->pcpu_refcnt = alloc_percpu(int);
5933 	if (!dev->pcpu_refcnt)
5934 		goto free_p;
5935 
5936 	if (dev_addr_init(dev))
5937 		goto free_pcpu;
5938 
5939 	dev_mc_init(dev);
5940 	dev_uc_init(dev);
5941 
5942 	dev_net_set(dev, &init_net);
5943 
5944 	dev->gso_max_size = GSO_MAX_SIZE;
5945 
5946 	INIT_LIST_HEAD(&dev->napi_list);
5947 	INIT_LIST_HEAD(&dev->unreg_list);
5948 	INIT_LIST_HEAD(&dev->link_watch_list);
5949 	dev->priv_flags = IFF_XMIT_DST_RELEASE;
5950 	setup(dev);
5951 
5952 	dev->num_tx_queues = txqs;
5953 	dev->real_num_tx_queues = txqs;
5954 	if (netif_alloc_netdev_queues(dev))
5955 		goto free_all;
5956 
5957 #ifdef CONFIG_RPS
5958 	dev->num_rx_queues = rxqs;
5959 	dev->real_num_rx_queues = rxqs;
5960 	if (netif_alloc_rx_queues(dev))
5961 		goto free_all;
5962 #endif
5963 
5964 	strcpy(dev->name, name);
5965 	dev->group = INIT_NETDEV_GROUP;
5966 	return dev;
5967 
5968 free_all:
5969 	free_netdev(dev);
5970 	return NULL;
5971 
5972 free_pcpu:
5973 	free_percpu(dev->pcpu_refcnt);
5974 	kfree(dev->_tx);
5975 #ifdef CONFIG_RPS
5976 	kfree(dev->_rx);
5977 #endif
5978 
5979 free_p:
5980 	kfree(p);
5981 	return NULL;
5982 }
5983 EXPORT_SYMBOL(alloc_netdev_mqs);
5984 
5985 /**
5986  *	free_netdev - free network device
5987  *	@dev: device
5988  *
5989  *	This function does the last stage of destroying an allocated device
5990  * 	interface. The reference to the device object is released.
5991  *	If this is the last reference then it will be freed.
5992  */
5993 void free_netdev(struct net_device *dev)
5994 {
5995 	struct napi_struct *p, *n;
5996 
5997 	release_net(dev_net(dev));
5998 
5999 	kfree(dev->_tx);
6000 #ifdef CONFIG_RPS
6001 	kfree(dev->_rx);
6002 #endif
6003 
6004 	kfree(rcu_dereference_protected(dev->ingress_queue, 1));
6005 
6006 	/* Flush device addresses */
6007 	dev_addr_flush(dev);
6008 
6009 	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
6010 		netif_napi_del(p);
6011 
6012 	free_percpu(dev->pcpu_refcnt);
6013 	dev->pcpu_refcnt = NULL;
6014 
6015 	/*  Compatibility with error handling in drivers */
6016 	if (dev->reg_state == NETREG_UNINITIALIZED) {
6017 		kfree((char *)dev - dev->padded);
6018 		return;
6019 	}
6020 
6021 	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
6022 	dev->reg_state = NETREG_RELEASED;
6023 
6024 	/* will free via device release */
6025 	put_device(&dev->dev);
6026 }
6027 EXPORT_SYMBOL(free_netdev);
6028 
6029 /**
6030  *	synchronize_net -  Synchronize with packet receive processing
6031  *
6032  *	Wait for packets currently being received to be done.
6033  *	Does not block later packets from starting.
6034  */
6035 void synchronize_net(void)
6036 {
6037 	might_sleep();
6038 	if (rtnl_is_locked())
6039 		synchronize_rcu_expedited();
6040 	else
6041 		synchronize_rcu();
6042 }
6043 EXPORT_SYMBOL(synchronize_net);
6044 
6045 /**
6046  *	unregister_netdevice_queue - remove device from the kernel
6047  *	@dev: device
6048  *	@head: list
6049  *
6050  *	This function shuts down a device interface and removes it
6051  *	from the kernel tables.
6052  *	If head not NULL, device is queued to be unregistered later.
6053  *
6054  *	Callers must hold the rtnl semaphore.  You may want
6055  *	unregister_netdev() instead of this.
6056  */
6057 
6058 void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
6059 {
6060 	ASSERT_RTNL();
6061 
6062 	if (head) {
6063 		list_move_tail(&dev->unreg_list, head);
6064 	} else {
6065 		rollback_registered(dev);
6066 		/* Finish processing unregister after unlock */
6067 		net_set_todo(dev);
6068 	}
6069 }
6070 EXPORT_SYMBOL(unregister_netdevice_queue);
6071 
6072 /**
6073  *	unregister_netdevice_many - unregister many devices
6074  *	@head: list of devices
6075  */
6076 void unregister_netdevice_many(struct list_head *head)
6077 {
6078 	struct net_device *dev;
6079 
6080 	if (!list_empty(head)) {
6081 		rollback_registered_many(head);
6082 		list_for_each_entry(dev, head, unreg_list)
6083 			net_set_todo(dev);
6084 	}
6085 }
6086 EXPORT_SYMBOL(unregister_netdevice_many);
6087 
6088 /**
6089  *	unregister_netdev - remove device from the kernel
6090  *	@dev: device
6091  *
6092  *	This function shuts down a device interface and removes it
6093  *	from the kernel tables.
6094  *
6095  *	This is just a wrapper for unregister_netdevice that takes
6096  *	the rtnl semaphore.  In general you want to use this and not
6097  *	unregister_netdevice.
6098  */
6099 void unregister_netdev(struct net_device *dev)
6100 {
6101 	rtnl_lock();
6102 	unregister_netdevice(dev);
6103 	rtnl_unlock();
6104 }
6105 EXPORT_SYMBOL(unregister_netdev);
6106 
6107 /**
6108  *	dev_change_net_namespace - move device to different nethost namespace
6109  *	@dev: device
6110  *	@net: network namespace
6111  *	@pat: If not NULL name pattern to try if the current device name
6112  *	      is already taken in the destination network namespace.
6113  *
6114  *	This function shuts down a device interface and moves it
6115  *	to a new network namespace. On success 0 is returned, on
6116  *	a failure a netagive errno code is returned.
6117  *
6118  *	Callers must hold the rtnl semaphore.
6119  */
6120 
6121 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
6122 {
6123 	int err;
6124 
6125 	ASSERT_RTNL();
6126 
6127 	/* Don't allow namespace local devices to be moved. */
6128 	err = -EINVAL;
6129 	if (dev->features & NETIF_F_NETNS_LOCAL)
6130 		goto out;
6131 
6132 	/* Ensure the device has been registrered */
6133 	err = -EINVAL;
6134 	if (dev->reg_state != NETREG_REGISTERED)
6135 		goto out;
6136 
6137 	/* Get out if there is nothing todo */
6138 	err = 0;
6139 	if (net_eq(dev_net(dev), net))
6140 		goto out;
6141 
6142 	/* Pick the destination device name, and ensure
6143 	 * we can use it in the destination network namespace.
6144 	 */
6145 	err = -EEXIST;
6146 	if (__dev_get_by_name(net, dev->name)) {
6147 		/* We get here if we can't use the current device name */
6148 		if (!pat)
6149 			goto out;
6150 		if (dev_get_valid_name(dev, pat) < 0)
6151 			goto out;
6152 	}
6153 
6154 	/*
6155 	 * And now a mini version of register_netdevice unregister_netdevice.
6156 	 */
6157 
6158 	/* If device is running close it first. */
6159 	dev_close(dev);
6160 
6161 	/* And unlink it from device chain */
6162 	err = -ENODEV;
6163 	unlist_netdevice(dev);
6164 
6165 	synchronize_net();
6166 
6167 	/* Shutdown queueing discipline. */
6168 	dev_shutdown(dev);
6169 
6170 	/* Notify protocols, that we are about to destroy
6171 	   this device. They should clean all the things.
6172 
6173 	   Note that dev->reg_state stays at NETREG_REGISTERED.
6174 	   This is wanted because this way 8021q and macvlan know
6175 	   the device is just moving and can keep their slaves up.
6176 	*/
6177 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
6178 	call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
6179 	rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
6180 
6181 	/*
6182 	 *	Flush the unicast and multicast chains
6183 	 */
6184 	dev_uc_flush(dev);
6185 	dev_mc_flush(dev);
6186 
6187 	/* Actually switch the network namespace */
6188 	dev_net_set(dev, net);
6189 
6190 	/* If there is an ifindex conflict assign a new one */
6191 	if (__dev_get_by_index(net, dev->ifindex)) {
6192 		int iflink = (dev->iflink == dev->ifindex);
6193 		dev->ifindex = dev_new_index(net);
6194 		if (iflink)
6195 			dev->iflink = dev->ifindex;
6196 	}
6197 
6198 	/* Fixup kobjects */
6199 	err = device_rename(&dev->dev, dev->name);
6200 	WARN_ON(err);
6201 
6202 	/* Add the device back in the hashes */
6203 	list_netdevice(dev);
6204 
6205 	/* Notify protocols, that a new device appeared. */
6206 	call_netdevice_notifiers(NETDEV_REGISTER, dev);
6207 
6208 	/*
6209 	 *	Prevent userspace races by waiting until the network
6210 	 *	device is fully setup before sending notifications.
6211 	 */
6212 	rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
6213 
6214 	synchronize_net();
6215 	err = 0;
6216 out:
6217 	return err;
6218 }
6219 EXPORT_SYMBOL_GPL(dev_change_net_namespace);
6220 
6221 static int dev_cpu_callback(struct notifier_block *nfb,
6222 			    unsigned long action,
6223 			    void *ocpu)
6224 {
6225 	struct sk_buff **list_skb;
6226 	struct sk_buff *skb;
6227 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
6228 	struct softnet_data *sd, *oldsd;
6229 
6230 	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
6231 		return NOTIFY_OK;
6232 
6233 	local_irq_disable();
6234 	cpu = smp_processor_id();
6235 	sd = &per_cpu(softnet_data, cpu);
6236 	oldsd = &per_cpu(softnet_data, oldcpu);
6237 
6238 	/* Find end of our completion_queue. */
6239 	list_skb = &sd->completion_queue;
6240 	while (*list_skb)
6241 		list_skb = &(*list_skb)->next;
6242 	/* Append completion queue from offline CPU. */
6243 	*list_skb = oldsd->completion_queue;
6244 	oldsd->completion_queue = NULL;
6245 
6246 	/* Append output queue from offline CPU. */
6247 	if (oldsd->output_queue) {
6248 		*sd->output_queue_tailp = oldsd->output_queue;
6249 		sd->output_queue_tailp = oldsd->output_queue_tailp;
6250 		oldsd->output_queue = NULL;
6251 		oldsd->output_queue_tailp = &oldsd->output_queue;
6252 	}
6253 	/* Append NAPI poll list from offline CPU. */
6254 	if (!list_empty(&oldsd->poll_list)) {
6255 		list_splice_init(&oldsd->poll_list, &sd->poll_list);
6256 		raise_softirq_irqoff(NET_RX_SOFTIRQ);
6257 	}
6258 
6259 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
6260 	local_irq_enable();
6261 
6262 	/* Process offline CPU's input_pkt_queue */
6263 	while ((skb = __skb_dequeue(&oldsd->process_queue))) {
6264 		netif_rx(skb);
6265 		input_queue_head_incr(oldsd);
6266 	}
6267 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
6268 		netif_rx(skb);
6269 		input_queue_head_incr(oldsd);
6270 	}
6271 
6272 	return NOTIFY_OK;
6273 }
6274 
6275 
6276 /**
6277  *	netdev_increment_features - increment feature set by one
6278  *	@all: current feature set
6279  *	@one: new feature set
6280  *	@mask: mask feature set
6281  *
6282  *	Computes a new feature set after adding a device with feature set
6283  *	@one to the master device with current feature set @all.  Will not
6284  *	enable anything that is off in @mask. Returns the new feature set.
6285  */
6286 netdev_features_t netdev_increment_features(netdev_features_t all,
6287 	netdev_features_t one, netdev_features_t mask)
6288 {
6289 	if (mask & NETIF_F_GEN_CSUM)
6290 		mask |= NETIF_F_ALL_CSUM;
6291 	mask |= NETIF_F_VLAN_CHALLENGED;
6292 
6293 	all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask;
6294 	all &= one | ~NETIF_F_ALL_FOR_ALL;
6295 
6296 	/* If one device supports hw checksumming, set for all. */
6297 	if (all & NETIF_F_GEN_CSUM)
6298 		all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
6299 
6300 	return all;
6301 }
6302 EXPORT_SYMBOL(netdev_increment_features);
6303 
6304 static struct hlist_head *netdev_create_hash(void)
6305 {
6306 	int i;
6307 	struct hlist_head *hash;
6308 
6309 	hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
6310 	if (hash != NULL)
6311 		for (i = 0; i < NETDEV_HASHENTRIES; i++)
6312 			INIT_HLIST_HEAD(&hash[i]);
6313 
6314 	return hash;
6315 }
6316 
6317 /* Initialize per network namespace state */
6318 static int __net_init netdev_init(struct net *net)
6319 {
6320 	if (net != &init_net)
6321 		INIT_LIST_HEAD(&net->dev_base_head);
6322 
6323 	net->dev_name_head = netdev_create_hash();
6324 	if (net->dev_name_head == NULL)
6325 		goto err_name;
6326 
6327 	net->dev_index_head = netdev_create_hash();
6328 	if (net->dev_index_head == NULL)
6329 		goto err_idx;
6330 
6331 	return 0;
6332 
6333 err_idx:
6334 	kfree(net->dev_name_head);
6335 err_name:
6336 	return -ENOMEM;
6337 }
6338 
6339 /**
6340  *	netdev_drivername - network driver for the device
6341  *	@dev: network device
6342  *
6343  *	Determine network driver for device.
6344  */
6345 const char *netdev_drivername(const struct net_device *dev)
6346 {
6347 	const struct device_driver *driver;
6348 	const struct device *parent;
6349 	const char *empty = "";
6350 
6351 	parent = dev->dev.parent;
6352 	if (!parent)
6353 		return empty;
6354 
6355 	driver = parent->driver;
6356 	if (driver && driver->name)
6357 		return driver->name;
6358 	return empty;
6359 }
6360 
6361 int __netdev_printk(const char *level, const struct net_device *dev,
6362 			   struct va_format *vaf)
6363 {
6364 	int r;
6365 
6366 	if (dev && dev->dev.parent)
6367 		r = dev_printk(level, dev->dev.parent, "%s: %pV",
6368 			       netdev_name(dev), vaf);
6369 	else if (dev)
6370 		r = printk("%s%s: %pV", level, netdev_name(dev), vaf);
6371 	else
6372 		r = printk("%s(NULL net_device): %pV", level, vaf);
6373 
6374 	return r;
6375 }
6376 EXPORT_SYMBOL(__netdev_printk);
6377 
6378 int netdev_printk(const char *level, const struct net_device *dev,
6379 		  const char *format, ...)
6380 {
6381 	struct va_format vaf;
6382 	va_list args;
6383 	int r;
6384 
6385 	va_start(args, format);
6386 
6387 	vaf.fmt = format;
6388 	vaf.va = &args;
6389 
6390 	r = __netdev_printk(level, dev, &vaf);
6391 	va_end(args);
6392 
6393 	return r;
6394 }
6395 EXPORT_SYMBOL(netdev_printk);
6396 
6397 #define define_netdev_printk_level(func, level)			\
6398 int func(const struct net_device *dev, const char *fmt, ...)	\
6399 {								\
6400 	int r;							\
6401 	struct va_format vaf;					\
6402 	va_list args;						\
6403 								\
6404 	va_start(args, fmt);					\
6405 								\
6406 	vaf.fmt = fmt;						\
6407 	vaf.va = &args;						\
6408 								\
6409 	r = __netdev_printk(level, dev, &vaf);			\
6410 	va_end(args);						\
6411 								\
6412 	return r;						\
6413 }								\
6414 EXPORT_SYMBOL(func);
6415 
6416 define_netdev_printk_level(netdev_emerg, KERN_EMERG);
6417 define_netdev_printk_level(netdev_alert, KERN_ALERT);
6418 define_netdev_printk_level(netdev_crit, KERN_CRIT);
6419 define_netdev_printk_level(netdev_err, KERN_ERR);
6420 define_netdev_printk_level(netdev_warn, KERN_WARNING);
6421 define_netdev_printk_level(netdev_notice, KERN_NOTICE);
6422 define_netdev_printk_level(netdev_info, KERN_INFO);
6423 
6424 static void __net_exit netdev_exit(struct net *net)
6425 {
6426 	kfree(net->dev_name_head);
6427 	kfree(net->dev_index_head);
6428 }
6429 
6430 static struct pernet_operations __net_initdata netdev_net_ops = {
6431 	.init = netdev_init,
6432 	.exit = netdev_exit,
6433 };
6434 
6435 static void __net_exit default_device_exit(struct net *net)
6436 {
6437 	struct net_device *dev, *aux;
6438 	/*
6439 	 * Push all migratable network devices back to the
6440 	 * initial network namespace
6441 	 */
6442 	rtnl_lock();
6443 	for_each_netdev_safe(net, dev, aux) {
6444 		int err;
6445 		char fb_name[IFNAMSIZ];
6446 
6447 		/* Ignore unmoveable devices (i.e. loopback) */
6448 		if (dev->features & NETIF_F_NETNS_LOCAL)
6449 			continue;
6450 
6451 		/* Leave virtual devices for the generic cleanup */
6452 		if (dev->rtnl_link_ops)
6453 			continue;
6454 
6455 		/* Push remaining network devices to init_net */
6456 		snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
6457 		err = dev_change_net_namespace(dev, &init_net, fb_name);
6458 		if (err) {
6459 			pr_emerg("%s: failed to move %s to init_net: %d\n",
6460 				 __func__, dev->name, err);
6461 			BUG();
6462 		}
6463 	}
6464 	rtnl_unlock();
6465 }
6466 
6467 static void __net_exit default_device_exit_batch(struct list_head *net_list)
6468 {
6469 	/* At exit all network devices most be removed from a network
6470 	 * namespace.  Do this in the reverse order of registration.
6471 	 * Do this across as many network namespaces as possible to
6472 	 * improve batching efficiency.
6473 	 */
6474 	struct net_device *dev;
6475 	struct net *net;
6476 	LIST_HEAD(dev_kill_list);
6477 
6478 	rtnl_lock();
6479 	list_for_each_entry(net, net_list, exit_list) {
6480 		for_each_netdev_reverse(net, dev) {
6481 			if (dev->rtnl_link_ops)
6482 				dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
6483 			else
6484 				unregister_netdevice_queue(dev, &dev_kill_list);
6485 		}
6486 	}
6487 	unregister_netdevice_many(&dev_kill_list);
6488 	list_del(&dev_kill_list);
6489 	rtnl_unlock();
6490 }
6491 
6492 static struct pernet_operations __net_initdata default_device_ops = {
6493 	.exit = default_device_exit,
6494 	.exit_batch = default_device_exit_batch,
6495 };
6496 
6497 /*
6498  *	Initialize the DEV module. At boot time this walks the device list and
6499  *	unhooks any devices that fail to initialise (normally hardware not
6500  *	present) and leaves us with a valid list of present and active devices.
6501  *
6502  */
6503 
6504 /*
6505  *       This is called single threaded during boot, so no need
6506  *       to take the rtnl semaphore.
6507  */
6508 static int __init net_dev_init(void)
6509 {
6510 	int i, rc = -ENOMEM;
6511 
6512 	BUG_ON(!dev_boot_phase);
6513 
6514 	if (dev_proc_init())
6515 		goto out;
6516 
6517 	if (netdev_kobject_init())
6518 		goto out;
6519 
6520 	INIT_LIST_HEAD(&ptype_all);
6521 	for (i = 0; i < PTYPE_HASH_SIZE; i++)
6522 		INIT_LIST_HEAD(&ptype_base[i]);
6523 
6524 	if (register_pernet_subsys(&netdev_net_ops))
6525 		goto out;
6526 
6527 	/*
6528 	 *	Initialise the packet receive queues.
6529 	 */
6530 
6531 	for_each_possible_cpu(i) {
6532 		struct softnet_data *sd = &per_cpu(softnet_data, i);
6533 
6534 		memset(sd, 0, sizeof(*sd));
6535 		skb_queue_head_init(&sd->input_pkt_queue);
6536 		skb_queue_head_init(&sd->process_queue);
6537 		sd->completion_queue = NULL;
6538 		INIT_LIST_HEAD(&sd->poll_list);
6539 		sd->output_queue = NULL;
6540 		sd->output_queue_tailp = &sd->output_queue;
6541 #ifdef CONFIG_RPS
6542 		sd->csd.func = rps_trigger_softirq;
6543 		sd->csd.info = sd;
6544 		sd->csd.flags = 0;
6545 		sd->cpu = i;
6546 #endif
6547 
6548 		sd->backlog.poll = process_backlog;
6549 		sd->backlog.weight = weight_p;
6550 		sd->backlog.gro_list = NULL;
6551 		sd->backlog.gro_count = 0;
6552 	}
6553 
6554 	dev_boot_phase = 0;
6555 
6556 	/* The loopback device is special if any other network devices
6557 	 * is present in a network namespace the loopback device must
6558 	 * be present. Since we now dynamically allocate and free the
6559 	 * loopback device ensure this invariant is maintained by
6560 	 * keeping the loopback device as the first device on the
6561 	 * list of network devices.  Ensuring the loopback devices
6562 	 * is the first device that appears and the last network device
6563 	 * that disappears.
6564 	 */
6565 	if (register_pernet_device(&loopback_net_ops))
6566 		goto out;
6567 
6568 	if (register_pernet_device(&default_device_ops))
6569 		goto out;
6570 
6571 	open_softirq(NET_TX_SOFTIRQ, net_tx_action);
6572 	open_softirq(NET_RX_SOFTIRQ, net_rx_action);
6573 
6574 	hotcpu_notifier(dev_cpu_callback, 0);
6575 	dst_init();
6576 	dev_mcast_init();
6577 	rc = 0;
6578 out:
6579 	return rc;
6580 }
6581 
6582 subsys_initcall(net_dev_init);
6583 
6584 static int __init initialize_hashrnd(void)
6585 {
6586 	get_random_bytes(&hashrnd, sizeof(hashrnd));
6587 	return 0;
6588 }
6589 
6590 late_initcall_sync(initialize_hashrnd);
6591 
6592