xref: /linux/net/core/dev.c (revision a508da6cc0093171833efb8376b00473f24221b9)
1 /*
2  * 	NET3	Protocol independent device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the non IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Florian la Roche <rzsfl@rz.uni-sb.de>
16  *		Alan Cox <gw4pts@gw4pts.ampr.org>
17  *		David Hinds <dahinds@users.sourceforge.net>
18  *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19  *		Adam Sulmicki <adam@cfar.umd.edu>
20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
21  *
22  *	Changes:
23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
24  *              			to 2 if register_netdev gets called
25  *              			before net_dev_init & also removed a
26  *              			few lines of code in the process.
27  *		Alan Cox	:	device private ioctl copies fields back.
28  *		Alan Cox	:	Transmit queue code does relevant
29  *					stunts to keep the queue safe.
30  *		Alan Cox	:	Fixed double lock.
31  *		Alan Cox	:	Fixed promisc NULL pointer trap
32  *		????????	:	Support the full private ioctl range
33  *		Alan Cox	:	Moved ioctl permission check into
34  *					drivers
35  *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
36  *		Alan Cox	:	100 backlog just doesn't cut it when
37  *					you start doing multicast video 8)
38  *		Alan Cox	:	Rewrote net_bh and list manager.
39  *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
40  *		Alan Cox	:	Took out transmit every packet pass
41  *					Saved a few bytes in the ioctl handler
42  *		Alan Cox	:	Network driver sets packet type before
43  *					calling netif_rx. Saves a function
44  *					call a packet.
45  *		Alan Cox	:	Hashed net_bh()
46  *		Richard Kooijman:	Timestamp fixes.
47  *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
48  *		Alan Cox	:	Device lock protection.
49  *		Alan Cox	: 	Fixed nasty side effect of device close
50  *					changes.
51  *		Rudi Cilibrasi	:	Pass the right thing to
52  *					set_mac_address()
53  *		Dave Miller	:	32bit quantity for the device lock to
54  *					make it work out on a Sparc.
55  *		Bjorn Ekwall	:	Added KERNELD hack.
56  *		Alan Cox	:	Cleaned up the backlog initialise.
57  *		Craig Metz	:	SIOCGIFCONF fix if space for under
58  *					1 device.
59  *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
60  *					is no device open function.
61  *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
62  *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
63  *		Cyrus Durgin	:	Cleaned for KMOD
64  *		Adam Sulmicki   :	Bug Fix : Network Device Unload
65  *					A network device unload needs to purge
66  *					the backlog queue.
67  *	Paul Rusty Russell	:	SIOCSIFNAME
68  *              Pekka Riikonen  :	Netdev boot-time settings code
69  *              Andrew Morton   :       Make unregister_netdevice wait
70  *              			indefinitely on dev->refcnt
71  * 		J Hadi Salim	:	- Backlog queue sampling
72  *				        - netif_rx() feedback
73  */
74 
75 #include <asm/uaccess.h>
76 #include <linux/bitops.h>
77 #include <linux/capability.h>
78 #include <linux/cpu.h>
79 #include <linux/types.h>
80 #include <linux/kernel.h>
81 #include <linux/hash.h>
82 #include <linux/slab.h>
83 #include <linux/sched.h>
84 #include <linux/mutex.h>
85 #include <linux/string.h>
86 #include <linux/mm.h>
87 #include <linux/socket.h>
88 #include <linux/sockios.h>
89 #include <linux/errno.h>
90 #include <linux/interrupt.h>
91 #include <linux/if_ether.h>
92 #include <linux/netdevice.h>
93 #include <linux/etherdevice.h>
94 #include <linux/ethtool.h>
95 #include <linux/notifier.h>
96 #include <linux/skbuff.h>
97 #include <net/net_namespace.h>
98 #include <net/sock.h>
99 #include <linux/rtnetlink.h>
100 #include <linux/proc_fs.h>
101 #include <linux/seq_file.h>
102 #include <linux/stat.h>
103 #include <net/dst.h>
104 #include <net/pkt_sched.h>
105 #include <net/checksum.h>
106 #include <net/xfrm.h>
107 #include <linux/highmem.h>
108 #include <linux/init.h>
109 #include <linux/kmod.h>
110 #include <linux/module.h>
111 #include <linux/netpoll.h>
112 #include <linux/rcupdate.h>
113 #include <linux/delay.h>
114 #include <net/wext.h>
115 #include <net/iw_handler.h>
116 #include <asm/current.h>
117 #include <linux/audit.h>
118 #include <linux/dmaengine.h>
119 #include <linux/err.h>
120 #include <linux/ctype.h>
121 #include <linux/if_arp.h>
122 #include <linux/if_vlan.h>
123 #include <linux/ip.h>
124 #include <net/ip.h>
125 #include <linux/ipv6.h>
126 #include <linux/in.h>
127 #include <linux/jhash.h>
128 #include <linux/random.h>
129 #include <trace/events/napi.h>
130 #include <trace/events/net.h>
131 #include <trace/events/skb.h>
132 #include <linux/pci.h>
133 #include <linux/inetdevice.h>
134 #include <linux/cpu_rmap.h>
135 #include <linux/net_tstamp.h>
136 #include <linux/static_key.h>
137 #include <net/flow_keys.h>
138 
139 #include "net-sysfs.h"
140 
141 /* Instead of increasing this, you should create a hash table. */
142 #define MAX_GRO_SKBS 8
143 
144 /* This should be increased if a protocol with a bigger head is added. */
145 #define GRO_MAX_HEAD (MAX_HEADER + 128)
146 
147 /*
148  *	The list of packet types we will receive (as opposed to discard)
149  *	and the routines to invoke.
150  *
151  *	Why 16. Because with 16 the only overlap we get on a hash of the
152  *	low nibble of the protocol value is RARP/SNAP/X.25.
153  *
154  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
155  *             sure which should go first, but I bet it won't make much
156  *             difference if we are running VLANs.  The good news is that
157  *             this protocol won't be in the list unless compiled in, so
158  *             the average user (w/out VLANs) will not be adversely affected.
159  *             --BLG
160  *
161  *		0800	IP
162  *		8100    802.1Q VLAN
163  *		0001	802.3
164  *		0002	AX.25
165  *		0004	802.2
166  *		8035	RARP
167  *		0005	SNAP
168  *		0805	X.25
169  *		0806	ARP
170  *		8137	IPX
171  *		0009	Localtalk
172  *		86DD	IPv6
173  */
174 
175 #define PTYPE_HASH_SIZE	(16)
176 #define PTYPE_HASH_MASK	(PTYPE_HASH_SIZE - 1)
177 
178 static DEFINE_SPINLOCK(ptype_lock);
179 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
180 static struct list_head ptype_all __read_mostly;	/* Taps */
181 
182 /*
183  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
184  * semaphore.
185  *
186  * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
187  *
188  * Writers must hold the rtnl semaphore while they loop through the
189  * dev_base_head list, and hold dev_base_lock for writing when they do the
190  * actual updates.  This allows pure readers to access the list even
191  * while a writer is preparing to update it.
192  *
193  * To put it another way, dev_base_lock is held for writing only to
194  * protect against pure readers; the rtnl semaphore provides the
195  * protection against other writers.
196  *
197  * See, for example usages, register_netdevice() and
198  * unregister_netdevice(), which must be called with the rtnl
199  * semaphore held.
200  */
201 DEFINE_RWLOCK(dev_base_lock);
202 EXPORT_SYMBOL(dev_base_lock);
203 
204 static inline void dev_base_seq_inc(struct net *net)
205 {
206 	while (++net->dev_base_seq == 0);
207 }
208 
209 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
210 {
211 	unsigned int hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
212 
213 	return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
214 }
215 
216 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
217 {
218 	return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
219 }
220 
221 static inline void rps_lock(struct softnet_data *sd)
222 {
223 #ifdef CONFIG_RPS
224 	spin_lock(&sd->input_pkt_queue.lock);
225 #endif
226 }
227 
228 static inline void rps_unlock(struct softnet_data *sd)
229 {
230 #ifdef CONFIG_RPS
231 	spin_unlock(&sd->input_pkt_queue.lock);
232 #endif
233 }
234 
235 /* Device list insertion */
236 static int list_netdevice(struct net_device *dev)
237 {
238 	struct net *net = dev_net(dev);
239 
240 	ASSERT_RTNL();
241 
242 	write_lock_bh(&dev_base_lock);
243 	list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
244 	hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
245 	hlist_add_head_rcu(&dev->index_hlist,
246 			   dev_index_hash(net, dev->ifindex));
247 	write_unlock_bh(&dev_base_lock);
248 
249 	dev_base_seq_inc(net);
250 
251 	return 0;
252 }
253 
254 /* Device list removal
255  * caller must respect a RCU grace period before freeing/reusing dev
256  */
257 static void unlist_netdevice(struct net_device *dev)
258 {
259 	ASSERT_RTNL();
260 
261 	/* Unlink dev from the device chain */
262 	write_lock_bh(&dev_base_lock);
263 	list_del_rcu(&dev->dev_list);
264 	hlist_del_rcu(&dev->name_hlist);
265 	hlist_del_rcu(&dev->index_hlist);
266 	write_unlock_bh(&dev_base_lock);
267 
268 	dev_base_seq_inc(dev_net(dev));
269 }
270 
271 /*
272  *	Our notifier list
273  */
274 
275 static RAW_NOTIFIER_HEAD(netdev_chain);
276 
277 /*
278  *	Device drivers call our routines to queue packets here. We empty the
279  *	queue in the local softnet handler.
280  */
281 
282 DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
283 EXPORT_PER_CPU_SYMBOL(softnet_data);
284 
285 #ifdef CONFIG_LOCKDEP
286 /*
287  * register_netdevice() inits txq->_xmit_lock and sets lockdep class
288  * according to dev->type
289  */
290 static const unsigned short netdev_lock_type[] =
291 	{ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
292 	 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
293 	 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
294 	 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
295 	 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
296 	 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
297 	 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
298 	 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
299 	 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
300 	 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
301 	 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
302 	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
303 	 ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,
304 	 ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
305 	 ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};
306 
307 static const char *const netdev_lock_name[] =
308 	{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
309 	 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
310 	 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
311 	 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
312 	 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
313 	 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
314 	 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
315 	 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
316 	 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
317 	 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
318 	 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
319 	 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
320 	 "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",
321 	 "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",
322 	 "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
323 
324 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
325 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
326 
327 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
328 {
329 	int i;
330 
331 	for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
332 		if (netdev_lock_type[i] == dev_type)
333 			return i;
334 	/* the last key is used by default */
335 	return ARRAY_SIZE(netdev_lock_type) - 1;
336 }
337 
338 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
339 						 unsigned short dev_type)
340 {
341 	int i;
342 
343 	i = netdev_lock_pos(dev_type);
344 	lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
345 				   netdev_lock_name[i]);
346 }
347 
348 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
349 {
350 	int i;
351 
352 	i = netdev_lock_pos(dev->type);
353 	lockdep_set_class_and_name(&dev->addr_list_lock,
354 				   &netdev_addr_lock_key[i],
355 				   netdev_lock_name[i]);
356 }
357 #else
358 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
359 						 unsigned short dev_type)
360 {
361 }
362 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
363 {
364 }
365 #endif
366 
367 /*******************************************************************************
368 
369 		Protocol management and registration routines
370 
371 *******************************************************************************/
372 
373 /*
374  *	Add a protocol ID to the list. Now that the input handler is
375  *	smarter we can dispense with all the messy stuff that used to be
376  *	here.
377  *
378  *	BEWARE!!! Protocol handlers, mangling input packets,
379  *	MUST BE last in hash buckets and checking protocol handlers
380  *	MUST start from promiscuous ptype_all chain in net_bh.
381  *	It is true now, do not change it.
382  *	Explanation follows: if protocol handler, mangling packet, will
383  *	be the first on list, it is not able to sense, that packet
384  *	is cloned and should be copied-on-write, so that it will
385  *	change it and subsequent readers will get broken packet.
386  *							--ANK (980803)
387  */
388 
389 static inline struct list_head *ptype_head(const struct packet_type *pt)
390 {
391 	if (pt->type == htons(ETH_P_ALL))
392 		return &ptype_all;
393 	else
394 		return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
395 }
396 
397 /**
398  *	dev_add_pack - add packet handler
399  *	@pt: packet type declaration
400  *
401  *	Add a protocol handler to the networking stack. The passed &packet_type
402  *	is linked into kernel lists and may not be freed until it has been
403  *	removed from the kernel lists.
404  *
405  *	This call does not sleep therefore it can not
406  *	guarantee all CPU's that are in middle of receiving packets
407  *	will see the new packet type (until the next received packet).
408  */
409 
410 void dev_add_pack(struct packet_type *pt)
411 {
412 	struct list_head *head = ptype_head(pt);
413 
414 	spin_lock(&ptype_lock);
415 	list_add_rcu(&pt->list, head);
416 	spin_unlock(&ptype_lock);
417 }
418 EXPORT_SYMBOL(dev_add_pack);
419 
420 /**
421  *	__dev_remove_pack	 - remove packet handler
422  *	@pt: packet type declaration
423  *
424  *	Remove a protocol handler that was previously added to the kernel
425  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
426  *	from the kernel lists and can be freed or reused once this function
427  *	returns.
428  *
429  *      The packet type might still be in use by receivers
430  *	and must not be freed until after all the CPU's have gone
431  *	through a quiescent state.
432  */
433 void __dev_remove_pack(struct packet_type *pt)
434 {
435 	struct list_head *head = ptype_head(pt);
436 	struct packet_type *pt1;
437 
438 	spin_lock(&ptype_lock);
439 
440 	list_for_each_entry(pt1, head, list) {
441 		if (pt == pt1) {
442 			list_del_rcu(&pt->list);
443 			goto out;
444 		}
445 	}
446 
447 	pr_warn("dev_remove_pack: %p not found\n", pt);
448 out:
449 	spin_unlock(&ptype_lock);
450 }
451 EXPORT_SYMBOL(__dev_remove_pack);
452 
453 /**
454  *	dev_remove_pack	 - remove packet handler
455  *	@pt: packet type declaration
456  *
457  *	Remove a protocol handler that was previously added to the kernel
458  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
459  *	from the kernel lists and can be freed or reused once this function
460  *	returns.
461  *
462  *	This call sleeps to guarantee that no CPU is looking at the packet
463  *	type after return.
464  */
465 void dev_remove_pack(struct packet_type *pt)
466 {
467 	__dev_remove_pack(pt);
468 
469 	synchronize_net();
470 }
471 EXPORT_SYMBOL(dev_remove_pack);
472 
473 /******************************************************************************
474 
475 		      Device Boot-time Settings Routines
476 
477 *******************************************************************************/
478 
479 /* Boot time configuration table */
480 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
481 
482 /**
483  *	netdev_boot_setup_add	- add new setup entry
484  *	@name: name of the device
485  *	@map: configured settings for the device
486  *
487  *	Adds new setup entry to the dev_boot_setup list.  The function
488  *	returns 0 on error and 1 on success.  This is a generic routine to
489  *	all netdevices.
490  */
491 static int netdev_boot_setup_add(char *name, struct ifmap *map)
492 {
493 	struct netdev_boot_setup *s;
494 	int i;
495 
496 	s = dev_boot_setup;
497 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
498 		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
499 			memset(s[i].name, 0, sizeof(s[i].name));
500 			strlcpy(s[i].name, name, IFNAMSIZ);
501 			memcpy(&s[i].map, map, sizeof(s[i].map));
502 			break;
503 		}
504 	}
505 
506 	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
507 }
508 
509 /**
510  *	netdev_boot_setup_check	- check boot time settings
511  *	@dev: the netdevice
512  *
513  * 	Check boot time settings for the device.
514  *	The found settings are set for the device to be used
515  *	later in the device probing.
516  *	Returns 0 if no settings found, 1 if they are.
517  */
518 int netdev_boot_setup_check(struct net_device *dev)
519 {
520 	struct netdev_boot_setup *s = dev_boot_setup;
521 	int i;
522 
523 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
524 		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
525 		    !strcmp(dev->name, s[i].name)) {
526 			dev->irq 	= s[i].map.irq;
527 			dev->base_addr 	= s[i].map.base_addr;
528 			dev->mem_start 	= s[i].map.mem_start;
529 			dev->mem_end 	= s[i].map.mem_end;
530 			return 1;
531 		}
532 	}
533 	return 0;
534 }
535 EXPORT_SYMBOL(netdev_boot_setup_check);
536 
537 
538 /**
539  *	netdev_boot_base	- get address from boot time settings
540  *	@prefix: prefix for network device
541  *	@unit: id for network device
542  *
543  * 	Check boot time settings for the base address of device.
544  *	The found settings are set for the device to be used
545  *	later in the device probing.
546  *	Returns 0 if no settings found.
547  */
548 unsigned long netdev_boot_base(const char *prefix, int unit)
549 {
550 	const struct netdev_boot_setup *s = dev_boot_setup;
551 	char name[IFNAMSIZ];
552 	int i;
553 
554 	sprintf(name, "%s%d", prefix, unit);
555 
556 	/*
557 	 * If device already registered then return base of 1
558 	 * to indicate not to probe for this interface
559 	 */
560 	if (__dev_get_by_name(&init_net, name))
561 		return 1;
562 
563 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
564 		if (!strcmp(name, s[i].name))
565 			return s[i].map.base_addr;
566 	return 0;
567 }
568 
569 /*
570  * Saves at boot time configured settings for any netdevice.
571  */
572 int __init netdev_boot_setup(char *str)
573 {
574 	int ints[5];
575 	struct ifmap map;
576 
577 	str = get_options(str, ARRAY_SIZE(ints), ints);
578 	if (!str || !*str)
579 		return 0;
580 
581 	/* Save settings */
582 	memset(&map, 0, sizeof(map));
583 	if (ints[0] > 0)
584 		map.irq = ints[1];
585 	if (ints[0] > 1)
586 		map.base_addr = ints[2];
587 	if (ints[0] > 2)
588 		map.mem_start = ints[3];
589 	if (ints[0] > 3)
590 		map.mem_end = ints[4];
591 
592 	/* Add new entry to the list */
593 	return netdev_boot_setup_add(str, &map);
594 }
595 
596 __setup("netdev=", netdev_boot_setup);
597 
598 /*******************************************************************************
599 
600 			    Device Interface Subroutines
601 
602 *******************************************************************************/
603 
604 /**
605  *	__dev_get_by_name	- find a device by its name
606  *	@net: the applicable net namespace
607  *	@name: name to find
608  *
609  *	Find an interface by name. Must be called under RTNL semaphore
610  *	or @dev_base_lock. If the name is found a pointer to the device
611  *	is returned. If the name is not found then %NULL is returned. The
612  *	reference counters are not incremented so the caller must be
613  *	careful with locks.
614  */
615 
616 struct net_device *__dev_get_by_name(struct net *net, const char *name)
617 {
618 	struct hlist_node *p;
619 	struct net_device *dev;
620 	struct hlist_head *head = dev_name_hash(net, name);
621 
622 	hlist_for_each_entry(dev, p, head, name_hlist)
623 		if (!strncmp(dev->name, name, IFNAMSIZ))
624 			return dev;
625 
626 	return NULL;
627 }
628 EXPORT_SYMBOL(__dev_get_by_name);
629 
630 /**
631  *	dev_get_by_name_rcu	- find a device by its name
632  *	@net: the applicable net namespace
633  *	@name: name to find
634  *
635  *	Find an interface by name.
636  *	If the name is found a pointer to the device is returned.
637  * 	If the name is not found then %NULL is returned.
638  *	The reference counters are not incremented so the caller must be
639  *	careful with locks. The caller must hold RCU lock.
640  */
641 
642 struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
643 {
644 	struct hlist_node *p;
645 	struct net_device *dev;
646 	struct hlist_head *head = dev_name_hash(net, name);
647 
648 	hlist_for_each_entry_rcu(dev, p, head, name_hlist)
649 		if (!strncmp(dev->name, name, IFNAMSIZ))
650 			return dev;
651 
652 	return NULL;
653 }
654 EXPORT_SYMBOL(dev_get_by_name_rcu);
655 
656 /**
657  *	dev_get_by_name		- find a device by its name
658  *	@net: the applicable net namespace
659  *	@name: name to find
660  *
661  *	Find an interface by name. This can be called from any
662  *	context and does its own locking. The returned handle has
663  *	the usage count incremented and the caller must use dev_put() to
664  *	release it when it is no longer needed. %NULL is returned if no
665  *	matching device is found.
666  */
667 
668 struct net_device *dev_get_by_name(struct net *net, const char *name)
669 {
670 	struct net_device *dev;
671 
672 	rcu_read_lock();
673 	dev = dev_get_by_name_rcu(net, name);
674 	if (dev)
675 		dev_hold(dev);
676 	rcu_read_unlock();
677 	return dev;
678 }
679 EXPORT_SYMBOL(dev_get_by_name);
680 
681 /**
682  *	__dev_get_by_index - find a device by its ifindex
683  *	@net: the applicable net namespace
684  *	@ifindex: index of device
685  *
686  *	Search for an interface by index. Returns %NULL if the device
687  *	is not found or a pointer to the device. The device has not
688  *	had its reference counter increased so the caller must be careful
689  *	about locking. The caller must hold either the RTNL semaphore
690  *	or @dev_base_lock.
691  */
692 
693 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
694 {
695 	struct hlist_node *p;
696 	struct net_device *dev;
697 	struct hlist_head *head = dev_index_hash(net, ifindex);
698 
699 	hlist_for_each_entry(dev, p, head, index_hlist)
700 		if (dev->ifindex == ifindex)
701 			return dev;
702 
703 	return NULL;
704 }
705 EXPORT_SYMBOL(__dev_get_by_index);
706 
707 /**
708  *	dev_get_by_index_rcu - find a device by its ifindex
709  *	@net: the applicable net namespace
710  *	@ifindex: index of device
711  *
712  *	Search for an interface by index. Returns %NULL if the device
713  *	is not found or a pointer to the device. The device has not
714  *	had its reference counter increased so the caller must be careful
715  *	about locking. The caller must hold RCU lock.
716  */
717 
718 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
719 {
720 	struct hlist_node *p;
721 	struct net_device *dev;
722 	struct hlist_head *head = dev_index_hash(net, ifindex);
723 
724 	hlist_for_each_entry_rcu(dev, p, head, index_hlist)
725 		if (dev->ifindex == ifindex)
726 			return dev;
727 
728 	return NULL;
729 }
730 EXPORT_SYMBOL(dev_get_by_index_rcu);
731 
732 
733 /**
734  *	dev_get_by_index - find a device by its ifindex
735  *	@net: the applicable net namespace
736  *	@ifindex: index of device
737  *
738  *	Search for an interface by index. Returns NULL if the device
739  *	is not found or a pointer to the device. The device returned has
740  *	had a reference added and the pointer is safe until the user calls
741  *	dev_put to indicate they have finished with it.
742  */
743 
744 struct net_device *dev_get_by_index(struct net *net, int ifindex)
745 {
746 	struct net_device *dev;
747 
748 	rcu_read_lock();
749 	dev = dev_get_by_index_rcu(net, ifindex);
750 	if (dev)
751 		dev_hold(dev);
752 	rcu_read_unlock();
753 	return dev;
754 }
755 EXPORT_SYMBOL(dev_get_by_index);
756 
757 /**
758  *	dev_getbyhwaddr_rcu - find a device by its hardware address
759  *	@net: the applicable net namespace
760  *	@type: media type of device
761  *	@ha: hardware address
762  *
763  *	Search for an interface by MAC address. Returns NULL if the device
764  *	is not found or a pointer to the device.
765  *	The caller must hold RCU or RTNL.
766  *	The returned device has not had its ref count increased
767  *	and the caller must therefore be careful about locking
768  *
769  */
770 
771 struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
772 				       const char *ha)
773 {
774 	struct net_device *dev;
775 
776 	for_each_netdev_rcu(net, dev)
777 		if (dev->type == type &&
778 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
779 			return dev;
780 
781 	return NULL;
782 }
783 EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
784 
785 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
786 {
787 	struct net_device *dev;
788 
789 	ASSERT_RTNL();
790 	for_each_netdev(net, dev)
791 		if (dev->type == type)
792 			return dev;
793 
794 	return NULL;
795 }
796 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
797 
798 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
799 {
800 	struct net_device *dev, *ret = NULL;
801 
802 	rcu_read_lock();
803 	for_each_netdev_rcu(net, dev)
804 		if (dev->type == type) {
805 			dev_hold(dev);
806 			ret = dev;
807 			break;
808 		}
809 	rcu_read_unlock();
810 	return ret;
811 }
812 EXPORT_SYMBOL(dev_getfirstbyhwtype);
813 
814 /**
815  *	dev_get_by_flags_rcu - find any device with given flags
816  *	@net: the applicable net namespace
817  *	@if_flags: IFF_* values
818  *	@mask: bitmask of bits in if_flags to check
819  *
820  *	Search for any interface with the given flags. Returns NULL if a device
821  *	is not found or a pointer to the device. Must be called inside
822  *	rcu_read_lock(), and result refcount is unchanged.
823  */
824 
825 struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,
826 				    unsigned short mask)
827 {
828 	struct net_device *dev, *ret;
829 
830 	ret = NULL;
831 	for_each_netdev_rcu(net, dev) {
832 		if (((dev->flags ^ if_flags) & mask) == 0) {
833 			ret = dev;
834 			break;
835 		}
836 	}
837 	return ret;
838 }
839 EXPORT_SYMBOL(dev_get_by_flags_rcu);
840 
841 /**
842  *	dev_valid_name - check if name is okay for network device
843  *	@name: name string
844  *
845  *	Network device names need to be valid file names to
846  *	to allow sysfs to work.  We also disallow any kind of
847  *	whitespace.
848  */
849 bool dev_valid_name(const char *name)
850 {
851 	if (*name == '\0')
852 		return false;
853 	if (strlen(name) >= IFNAMSIZ)
854 		return false;
855 	if (!strcmp(name, ".") || !strcmp(name, ".."))
856 		return false;
857 
858 	while (*name) {
859 		if (*name == '/' || isspace(*name))
860 			return false;
861 		name++;
862 	}
863 	return true;
864 }
865 EXPORT_SYMBOL(dev_valid_name);
866 
867 /**
868  *	__dev_alloc_name - allocate a name for a device
869  *	@net: network namespace to allocate the device name in
870  *	@name: name format string
871  *	@buf:  scratch buffer and result name string
872  *
873  *	Passed a format string - eg "lt%d" it will try and find a suitable
874  *	id. It scans list of devices to build up a free map, then chooses
875  *	the first empty slot. The caller must hold the dev_base or rtnl lock
876  *	while allocating the name and adding the device in order to avoid
877  *	duplicates.
878  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
879  *	Returns the number of the unit assigned or a negative errno code.
880  */
881 
882 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
883 {
884 	int i = 0;
885 	const char *p;
886 	const int max_netdevices = 8*PAGE_SIZE;
887 	unsigned long *inuse;
888 	struct net_device *d;
889 
890 	p = strnchr(name, IFNAMSIZ-1, '%');
891 	if (p) {
892 		/*
893 		 * Verify the string as this thing may have come from
894 		 * the user.  There must be either one "%d" and no other "%"
895 		 * characters.
896 		 */
897 		if (p[1] != 'd' || strchr(p + 2, '%'))
898 			return -EINVAL;
899 
900 		/* Use one page as a bit array of possible slots */
901 		inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
902 		if (!inuse)
903 			return -ENOMEM;
904 
905 		for_each_netdev(net, d) {
906 			if (!sscanf(d->name, name, &i))
907 				continue;
908 			if (i < 0 || i >= max_netdevices)
909 				continue;
910 
911 			/*  avoid cases where sscanf is not exact inverse of printf */
912 			snprintf(buf, IFNAMSIZ, name, i);
913 			if (!strncmp(buf, d->name, IFNAMSIZ))
914 				set_bit(i, inuse);
915 		}
916 
917 		i = find_first_zero_bit(inuse, max_netdevices);
918 		free_page((unsigned long) inuse);
919 	}
920 
921 	if (buf != name)
922 		snprintf(buf, IFNAMSIZ, name, i);
923 	if (!__dev_get_by_name(net, buf))
924 		return i;
925 
926 	/* It is possible to run out of possible slots
927 	 * when the name is long and there isn't enough space left
928 	 * for the digits, or if all bits are used.
929 	 */
930 	return -ENFILE;
931 }
932 
933 /**
934  *	dev_alloc_name - allocate a name for a device
935  *	@dev: device
936  *	@name: name format string
937  *
938  *	Passed a format string - eg "lt%d" it will try and find a suitable
939  *	id. It scans list of devices to build up a free map, then chooses
940  *	the first empty slot. The caller must hold the dev_base or rtnl lock
941  *	while allocating the name and adding the device in order to avoid
942  *	duplicates.
943  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
944  *	Returns the number of the unit assigned or a negative errno code.
945  */
946 
947 int dev_alloc_name(struct net_device *dev, const char *name)
948 {
949 	char buf[IFNAMSIZ];
950 	struct net *net;
951 	int ret;
952 
953 	BUG_ON(!dev_net(dev));
954 	net = dev_net(dev);
955 	ret = __dev_alloc_name(net, name, buf);
956 	if (ret >= 0)
957 		strlcpy(dev->name, buf, IFNAMSIZ);
958 	return ret;
959 }
960 EXPORT_SYMBOL(dev_alloc_name);
961 
962 static int dev_get_valid_name(struct net_device *dev, const char *name)
963 {
964 	struct net *net;
965 
966 	BUG_ON(!dev_net(dev));
967 	net = dev_net(dev);
968 
969 	if (!dev_valid_name(name))
970 		return -EINVAL;
971 
972 	if (strchr(name, '%'))
973 		return dev_alloc_name(dev, name);
974 	else if (__dev_get_by_name(net, name))
975 		return -EEXIST;
976 	else if (dev->name != name)
977 		strlcpy(dev->name, name, IFNAMSIZ);
978 
979 	return 0;
980 }
981 
982 /**
983  *	dev_change_name - change name of a device
984  *	@dev: device
985  *	@newname: name (or format string) must be at least IFNAMSIZ
986  *
987  *	Change name of a device, can pass format strings "eth%d".
988  *	for wildcarding.
989  */
990 int dev_change_name(struct net_device *dev, const char *newname)
991 {
992 	char oldname[IFNAMSIZ];
993 	int err = 0;
994 	int ret;
995 	struct net *net;
996 
997 	ASSERT_RTNL();
998 	BUG_ON(!dev_net(dev));
999 
1000 	net = dev_net(dev);
1001 	if (dev->flags & IFF_UP)
1002 		return -EBUSY;
1003 
1004 	if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
1005 		return 0;
1006 
1007 	memcpy(oldname, dev->name, IFNAMSIZ);
1008 
1009 	err = dev_get_valid_name(dev, newname);
1010 	if (err < 0)
1011 		return err;
1012 
1013 rollback:
1014 	ret = device_rename(&dev->dev, dev->name);
1015 	if (ret) {
1016 		memcpy(dev->name, oldname, IFNAMSIZ);
1017 		return ret;
1018 	}
1019 
1020 	write_lock_bh(&dev_base_lock);
1021 	hlist_del_rcu(&dev->name_hlist);
1022 	write_unlock_bh(&dev_base_lock);
1023 
1024 	synchronize_rcu();
1025 
1026 	write_lock_bh(&dev_base_lock);
1027 	hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
1028 	write_unlock_bh(&dev_base_lock);
1029 
1030 	ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
1031 	ret = notifier_to_errno(ret);
1032 
1033 	if (ret) {
1034 		/* err >= 0 after dev_alloc_name() or stores the first errno */
1035 		if (err >= 0) {
1036 			err = ret;
1037 			memcpy(dev->name, oldname, IFNAMSIZ);
1038 			goto rollback;
1039 		} else {
1040 			pr_err("%s: name change rollback failed: %d\n",
1041 			       dev->name, ret);
1042 		}
1043 	}
1044 
1045 	return err;
1046 }
1047 
1048 /**
1049  *	dev_set_alias - change ifalias of a device
1050  *	@dev: device
1051  *	@alias: name up to IFALIASZ
1052  *	@len: limit of bytes to copy from info
1053  *
1054  *	Set ifalias for a device,
1055  */
1056 int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1057 {
1058 	ASSERT_RTNL();
1059 
1060 	if (len >= IFALIASZ)
1061 		return -EINVAL;
1062 
1063 	if (!len) {
1064 		if (dev->ifalias) {
1065 			kfree(dev->ifalias);
1066 			dev->ifalias = NULL;
1067 		}
1068 		return 0;
1069 	}
1070 
1071 	dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
1072 	if (!dev->ifalias)
1073 		return -ENOMEM;
1074 
1075 	strlcpy(dev->ifalias, alias, len+1);
1076 	return len;
1077 }
1078 
1079 
1080 /**
1081  *	netdev_features_change - device changes features
1082  *	@dev: device to cause notification
1083  *
1084  *	Called to indicate a device has changed features.
1085  */
1086 void netdev_features_change(struct net_device *dev)
1087 {
1088 	call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
1089 }
1090 EXPORT_SYMBOL(netdev_features_change);
1091 
1092 /**
1093  *	netdev_state_change - device changes state
1094  *	@dev: device to cause notification
1095  *
1096  *	Called to indicate a device has changed state. This function calls
1097  *	the notifier chains for netdev_chain and sends a NEWLINK message
1098  *	to the routing socket.
1099  */
1100 void netdev_state_change(struct net_device *dev)
1101 {
1102 	if (dev->flags & IFF_UP) {
1103 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
1104 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1105 	}
1106 }
1107 EXPORT_SYMBOL(netdev_state_change);
1108 
1109 int netdev_bonding_change(struct net_device *dev, unsigned long event)
1110 {
1111 	return call_netdevice_notifiers(event, dev);
1112 }
1113 EXPORT_SYMBOL(netdev_bonding_change);
1114 
1115 /**
1116  *	dev_load 	- load a network module
1117  *	@net: the applicable net namespace
1118  *	@name: name of interface
1119  *
1120  *	If a network interface is not present and the process has suitable
1121  *	privileges this function loads the module. If module loading is not
1122  *	available in this kernel then it becomes a nop.
1123  */
1124 
1125 void dev_load(struct net *net, const char *name)
1126 {
1127 	struct net_device *dev;
1128 	int no_module;
1129 
1130 	rcu_read_lock();
1131 	dev = dev_get_by_name_rcu(net, name);
1132 	rcu_read_unlock();
1133 
1134 	no_module = !dev;
1135 	if (no_module && capable(CAP_NET_ADMIN))
1136 		no_module = request_module("netdev-%s", name);
1137 	if (no_module && capable(CAP_SYS_MODULE)) {
1138 		if (!request_module("%s", name))
1139 			pr_err("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated).  Use CAP_NET_ADMIN and alias netdev-%s instead.\n",
1140 			       name);
1141 	}
1142 }
1143 EXPORT_SYMBOL(dev_load);
1144 
1145 static int __dev_open(struct net_device *dev)
1146 {
1147 	const struct net_device_ops *ops = dev->netdev_ops;
1148 	int ret;
1149 
1150 	ASSERT_RTNL();
1151 
1152 	if (!netif_device_present(dev))
1153 		return -ENODEV;
1154 
1155 	ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1156 	ret = notifier_to_errno(ret);
1157 	if (ret)
1158 		return ret;
1159 
1160 	set_bit(__LINK_STATE_START, &dev->state);
1161 
1162 	if (ops->ndo_validate_addr)
1163 		ret = ops->ndo_validate_addr(dev);
1164 
1165 	if (!ret && ops->ndo_open)
1166 		ret = ops->ndo_open(dev);
1167 
1168 	if (ret)
1169 		clear_bit(__LINK_STATE_START, &dev->state);
1170 	else {
1171 		dev->flags |= IFF_UP;
1172 		net_dmaengine_get();
1173 		dev_set_rx_mode(dev);
1174 		dev_activate(dev);
1175 	}
1176 
1177 	return ret;
1178 }
1179 
1180 /**
1181  *	dev_open	- prepare an interface for use.
1182  *	@dev:	device to open
1183  *
1184  *	Takes a device from down to up state. The device's private open
1185  *	function is invoked and then the multicast lists are loaded. Finally
1186  *	the device is moved into the up state and a %NETDEV_UP message is
1187  *	sent to the netdev notifier chain.
1188  *
1189  *	Calling this function on an active interface is a nop. On a failure
1190  *	a negative errno code is returned.
1191  */
1192 int dev_open(struct net_device *dev)
1193 {
1194 	int ret;
1195 
1196 	if (dev->flags & IFF_UP)
1197 		return 0;
1198 
1199 	ret = __dev_open(dev);
1200 	if (ret < 0)
1201 		return ret;
1202 
1203 	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1204 	call_netdevice_notifiers(NETDEV_UP, dev);
1205 
1206 	return ret;
1207 }
1208 EXPORT_SYMBOL(dev_open);
1209 
1210 static int __dev_close_many(struct list_head *head)
1211 {
1212 	struct net_device *dev;
1213 
1214 	ASSERT_RTNL();
1215 	might_sleep();
1216 
1217 	list_for_each_entry(dev, head, unreg_list) {
1218 		call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1219 
1220 		clear_bit(__LINK_STATE_START, &dev->state);
1221 
1222 		/* Synchronize to scheduled poll. We cannot touch poll list, it
1223 		 * can be even on different cpu. So just clear netif_running().
1224 		 *
1225 		 * dev->stop() will invoke napi_disable() on all of it's
1226 		 * napi_struct instances on this device.
1227 		 */
1228 		smp_mb__after_clear_bit(); /* Commit netif_running(). */
1229 	}
1230 
1231 	dev_deactivate_many(head);
1232 
1233 	list_for_each_entry(dev, head, unreg_list) {
1234 		const struct net_device_ops *ops = dev->netdev_ops;
1235 
1236 		/*
1237 		 *	Call the device specific close. This cannot fail.
1238 		 *	Only if device is UP
1239 		 *
1240 		 *	We allow it to be called even after a DETACH hot-plug
1241 		 *	event.
1242 		 */
1243 		if (ops->ndo_stop)
1244 			ops->ndo_stop(dev);
1245 
1246 		dev->flags &= ~IFF_UP;
1247 		net_dmaengine_put();
1248 	}
1249 
1250 	return 0;
1251 }
1252 
1253 static int __dev_close(struct net_device *dev)
1254 {
1255 	int retval;
1256 	LIST_HEAD(single);
1257 
1258 	list_add(&dev->unreg_list, &single);
1259 	retval = __dev_close_many(&single);
1260 	list_del(&single);
1261 	return retval;
1262 }
1263 
1264 static int dev_close_many(struct list_head *head)
1265 {
1266 	struct net_device *dev, *tmp;
1267 	LIST_HEAD(tmp_list);
1268 
1269 	list_for_each_entry_safe(dev, tmp, head, unreg_list)
1270 		if (!(dev->flags & IFF_UP))
1271 			list_move(&dev->unreg_list, &tmp_list);
1272 
1273 	__dev_close_many(head);
1274 
1275 	list_for_each_entry(dev, head, unreg_list) {
1276 		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1277 		call_netdevice_notifiers(NETDEV_DOWN, dev);
1278 	}
1279 
1280 	/* rollback_registered_many needs the complete original list */
1281 	list_splice(&tmp_list, head);
1282 	return 0;
1283 }
1284 
1285 /**
1286  *	dev_close - shutdown an interface.
1287  *	@dev: device to shutdown
1288  *
1289  *	This function moves an active device into down state. A
1290  *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1291  *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1292  *	chain.
1293  */
1294 int dev_close(struct net_device *dev)
1295 {
1296 	if (dev->flags & IFF_UP) {
1297 		LIST_HEAD(single);
1298 
1299 		list_add(&dev->unreg_list, &single);
1300 		dev_close_many(&single);
1301 		list_del(&single);
1302 	}
1303 	return 0;
1304 }
1305 EXPORT_SYMBOL(dev_close);
1306 
1307 
1308 /**
1309  *	dev_disable_lro - disable Large Receive Offload on a device
1310  *	@dev: device
1311  *
1312  *	Disable Large Receive Offload (LRO) on a net device.  Must be
1313  *	called under RTNL.  This is needed if received packets may be
1314  *	forwarded to another interface.
1315  */
1316 void dev_disable_lro(struct net_device *dev)
1317 {
1318 	/*
1319 	 * If we're trying to disable lro on a vlan device
1320 	 * use the underlying physical device instead
1321 	 */
1322 	if (is_vlan_dev(dev))
1323 		dev = vlan_dev_real_dev(dev);
1324 
1325 	dev->wanted_features &= ~NETIF_F_LRO;
1326 	netdev_update_features(dev);
1327 
1328 	if (unlikely(dev->features & NETIF_F_LRO))
1329 		netdev_WARN(dev, "failed to disable LRO!\n");
1330 }
1331 EXPORT_SYMBOL(dev_disable_lro);
1332 
1333 
1334 static int dev_boot_phase = 1;
1335 
1336 /**
1337  *	register_netdevice_notifier - register a network notifier block
1338  *	@nb: notifier
1339  *
1340  *	Register a notifier to be called when network device events occur.
1341  *	The notifier passed is linked into the kernel structures and must
1342  *	not be reused until it has been unregistered. A negative errno code
1343  *	is returned on a failure.
1344  *
1345  * 	When registered all registration and up events are replayed
1346  *	to the new notifier to allow device to have a race free
1347  *	view of the network device list.
1348  */
1349 
1350 int register_netdevice_notifier(struct notifier_block *nb)
1351 {
1352 	struct net_device *dev;
1353 	struct net_device *last;
1354 	struct net *net;
1355 	int err;
1356 
1357 	rtnl_lock();
1358 	err = raw_notifier_chain_register(&netdev_chain, nb);
1359 	if (err)
1360 		goto unlock;
1361 	if (dev_boot_phase)
1362 		goto unlock;
1363 	for_each_net(net) {
1364 		for_each_netdev(net, dev) {
1365 			err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1366 			err = notifier_to_errno(err);
1367 			if (err)
1368 				goto rollback;
1369 
1370 			if (!(dev->flags & IFF_UP))
1371 				continue;
1372 
1373 			nb->notifier_call(nb, NETDEV_UP, dev);
1374 		}
1375 	}
1376 
1377 unlock:
1378 	rtnl_unlock();
1379 	return err;
1380 
1381 rollback:
1382 	last = dev;
1383 	for_each_net(net) {
1384 		for_each_netdev(net, dev) {
1385 			if (dev == last)
1386 				goto outroll;
1387 
1388 			if (dev->flags & IFF_UP) {
1389 				nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1390 				nb->notifier_call(nb, NETDEV_DOWN, dev);
1391 			}
1392 			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1393 			nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
1394 		}
1395 	}
1396 
1397 outroll:
1398 	raw_notifier_chain_unregister(&netdev_chain, nb);
1399 	goto unlock;
1400 }
1401 EXPORT_SYMBOL(register_netdevice_notifier);
1402 
1403 /**
1404  *	unregister_netdevice_notifier - unregister a network notifier block
1405  *	@nb: notifier
1406  *
1407  *	Unregister a notifier previously registered by
1408  *	register_netdevice_notifier(). The notifier is unlinked into the
1409  *	kernel structures and may then be reused. A negative errno code
1410  *	is returned on a failure.
1411  *
1412  * 	After unregistering unregister and down device events are synthesized
1413  *	for all devices on the device list to the removed notifier to remove
1414  *	the need for special case cleanup code.
1415  */
1416 
1417 int unregister_netdevice_notifier(struct notifier_block *nb)
1418 {
1419 	struct net_device *dev;
1420 	struct net *net;
1421 	int err;
1422 
1423 	rtnl_lock();
1424 	err = raw_notifier_chain_unregister(&netdev_chain, nb);
1425 	if (err)
1426 		goto unlock;
1427 
1428 	for_each_net(net) {
1429 		for_each_netdev(net, dev) {
1430 			if (dev->flags & IFF_UP) {
1431 				nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1432 				nb->notifier_call(nb, NETDEV_DOWN, dev);
1433 			}
1434 			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1435 			nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
1436 		}
1437 	}
1438 unlock:
1439 	rtnl_unlock();
1440 	return err;
1441 }
1442 EXPORT_SYMBOL(unregister_netdevice_notifier);
1443 
1444 /**
1445  *	call_netdevice_notifiers - call all network notifier blocks
1446  *      @val: value passed unmodified to notifier function
1447  *      @dev: net_device pointer passed unmodified to notifier function
1448  *
1449  *	Call all network notifier blocks.  Parameters and return value
1450  *	are as for raw_notifier_call_chain().
1451  */
1452 
1453 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1454 {
1455 	ASSERT_RTNL();
1456 	return raw_notifier_call_chain(&netdev_chain, val, dev);
1457 }
1458 EXPORT_SYMBOL(call_netdevice_notifiers);
1459 
1460 static struct static_key netstamp_needed __read_mostly;
1461 #ifdef HAVE_JUMP_LABEL
1462 /* We are not allowed to call static_key_slow_dec() from irq context
1463  * If net_disable_timestamp() is called from irq context, defer the
1464  * static_key_slow_dec() calls.
1465  */
1466 static atomic_t netstamp_needed_deferred;
1467 #endif
1468 
1469 void net_enable_timestamp(void)
1470 {
1471 #ifdef HAVE_JUMP_LABEL
1472 	int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
1473 
1474 	if (deferred) {
1475 		while (--deferred)
1476 			static_key_slow_dec(&netstamp_needed);
1477 		return;
1478 	}
1479 #endif
1480 	WARN_ON(in_interrupt());
1481 	static_key_slow_inc(&netstamp_needed);
1482 }
1483 EXPORT_SYMBOL(net_enable_timestamp);
1484 
1485 void net_disable_timestamp(void)
1486 {
1487 #ifdef HAVE_JUMP_LABEL
1488 	if (in_interrupt()) {
1489 		atomic_inc(&netstamp_needed_deferred);
1490 		return;
1491 	}
1492 #endif
1493 	static_key_slow_dec(&netstamp_needed);
1494 }
1495 EXPORT_SYMBOL(net_disable_timestamp);
1496 
1497 static inline void net_timestamp_set(struct sk_buff *skb)
1498 {
1499 	skb->tstamp.tv64 = 0;
1500 	if (static_key_false(&netstamp_needed))
1501 		__net_timestamp(skb);
1502 }
1503 
1504 #define net_timestamp_check(COND, SKB)			\
1505 	if (static_key_false(&netstamp_needed)) {		\
1506 		if ((COND) && !(SKB)->tstamp.tv64)	\
1507 			__net_timestamp(SKB);		\
1508 	}						\
1509 
1510 static int net_hwtstamp_validate(struct ifreq *ifr)
1511 {
1512 	struct hwtstamp_config cfg;
1513 	enum hwtstamp_tx_types tx_type;
1514 	enum hwtstamp_rx_filters rx_filter;
1515 	int tx_type_valid = 0;
1516 	int rx_filter_valid = 0;
1517 
1518 	if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
1519 		return -EFAULT;
1520 
1521 	if (cfg.flags) /* reserved for future extensions */
1522 		return -EINVAL;
1523 
1524 	tx_type = cfg.tx_type;
1525 	rx_filter = cfg.rx_filter;
1526 
1527 	switch (tx_type) {
1528 	case HWTSTAMP_TX_OFF:
1529 	case HWTSTAMP_TX_ON:
1530 	case HWTSTAMP_TX_ONESTEP_SYNC:
1531 		tx_type_valid = 1;
1532 		break;
1533 	}
1534 
1535 	switch (rx_filter) {
1536 	case HWTSTAMP_FILTER_NONE:
1537 	case HWTSTAMP_FILTER_ALL:
1538 	case HWTSTAMP_FILTER_SOME:
1539 	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
1540 	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
1541 	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
1542 	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
1543 	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
1544 	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
1545 	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
1546 	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
1547 	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
1548 	case HWTSTAMP_FILTER_PTP_V2_EVENT:
1549 	case HWTSTAMP_FILTER_PTP_V2_SYNC:
1550 	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
1551 		rx_filter_valid = 1;
1552 		break;
1553 	}
1554 
1555 	if (!tx_type_valid || !rx_filter_valid)
1556 		return -ERANGE;
1557 
1558 	return 0;
1559 }
1560 
1561 static inline bool is_skb_forwardable(struct net_device *dev,
1562 				      struct sk_buff *skb)
1563 {
1564 	unsigned int len;
1565 
1566 	if (!(dev->flags & IFF_UP))
1567 		return false;
1568 
1569 	len = dev->mtu + dev->hard_header_len + VLAN_HLEN;
1570 	if (skb->len <= len)
1571 		return true;
1572 
1573 	/* if TSO is enabled, we don't care about the length as the packet
1574 	 * could be forwarded without being segmented before
1575 	 */
1576 	if (skb_is_gso(skb))
1577 		return true;
1578 
1579 	return false;
1580 }
1581 
1582 /**
1583  * dev_forward_skb - loopback an skb to another netif
1584  *
1585  * @dev: destination network device
1586  * @skb: buffer to forward
1587  *
1588  * return values:
1589  *	NET_RX_SUCCESS	(no congestion)
1590  *	NET_RX_DROP     (packet was dropped, but freed)
1591  *
1592  * dev_forward_skb can be used for injecting an skb from the
1593  * start_xmit function of one device into the receive queue
1594  * of another device.
1595  *
1596  * The receiving device may be in another namespace, so
1597  * we have to clear all information in the skb that could
1598  * impact namespace isolation.
1599  */
1600 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1601 {
1602 	if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
1603 		if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
1604 			atomic_long_inc(&dev->rx_dropped);
1605 			kfree_skb(skb);
1606 			return NET_RX_DROP;
1607 		}
1608 	}
1609 
1610 	skb_orphan(skb);
1611 	nf_reset(skb);
1612 
1613 	if (unlikely(!is_skb_forwardable(dev, skb))) {
1614 		atomic_long_inc(&dev->rx_dropped);
1615 		kfree_skb(skb);
1616 		return NET_RX_DROP;
1617 	}
1618 	skb->skb_iif = 0;
1619 	skb->dev = dev;
1620 	skb_dst_drop(skb);
1621 	skb->tstamp.tv64 = 0;
1622 	skb->pkt_type = PACKET_HOST;
1623 	skb->protocol = eth_type_trans(skb, dev);
1624 	skb->mark = 0;
1625 	secpath_reset(skb);
1626 	nf_reset(skb);
1627 	return netif_rx(skb);
1628 }
1629 EXPORT_SYMBOL_GPL(dev_forward_skb);
1630 
1631 static inline int deliver_skb(struct sk_buff *skb,
1632 			      struct packet_type *pt_prev,
1633 			      struct net_device *orig_dev)
1634 {
1635 	atomic_inc(&skb->users);
1636 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1637 }
1638 
1639 /*
1640  *	Support routine. Sends outgoing frames to any network
1641  *	taps currently in use.
1642  */
1643 
1644 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1645 {
1646 	struct packet_type *ptype;
1647 	struct sk_buff *skb2 = NULL;
1648 	struct packet_type *pt_prev = NULL;
1649 
1650 	rcu_read_lock();
1651 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1652 		/* Never send packets back to the socket
1653 		 * they originated from - MvS (miquels@drinkel.ow.org)
1654 		 */
1655 		if ((ptype->dev == dev || !ptype->dev) &&
1656 		    (ptype->af_packet_priv == NULL ||
1657 		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
1658 			if (pt_prev) {
1659 				deliver_skb(skb2, pt_prev, skb->dev);
1660 				pt_prev = ptype;
1661 				continue;
1662 			}
1663 
1664 			skb2 = skb_clone(skb, GFP_ATOMIC);
1665 			if (!skb2)
1666 				break;
1667 
1668 			net_timestamp_set(skb2);
1669 
1670 			/* skb->nh should be correctly
1671 			   set by sender, so that the second statement is
1672 			   just protection against buggy protocols.
1673 			 */
1674 			skb_reset_mac_header(skb2);
1675 
1676 			if (skb_network_header(skb2) < skb2->data ||
1677 			    skb2->network_header > skb2->tail) {
1678 				net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
1679 						     ntohs(skb2->protocol),
1680 						     dev->name);
1681 				skb_reset_network_header(skb2);
1682 			}
1683 
1684 			skb2->transport_header = skb2->network_header;
1685 			skb2->pkt_type = PACKET_OUTGOING;
1686 			pt_prev = ptype;
1687 		}
1688 	}
1689 	if (pt_prev)
1690 		pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
1691 	rcu_read_unlock();
1692 }
1693 
1694 /* netif_setup_tc - Handle tc mappings on real_num_tx_queues change
1695  * @dev: Network device
1696  * @txq: number of queues available
1697  *
1698  * If real_num_tx_queues is changed the tc mappings may no longer be
1699  * valid. To resolve this verify the tc mapping remains valid and if
1700  * not NULL the mapping. With no priorities mapping to this
1701  * offset/count pair it will no longer be used. In the worst case TC0
1702  * is invalid nothing can be done so disable priority mappings. If is
1703  * expected that drivers will fix this mapping if they can before
1704  * calling netif_set_real_num_tx_queues.
1705  */
1706 static void netif_setup_tc(struct net_device *dev, unsigned int txq)
1707 {
1708 	int i;
1709 	struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
1710 
1711 	/* If TC0 is invalidated disable TC mapping */
1712 	if (tc->offset + tc->count > txq) {
1713 		pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");
1714 		dev->num_tc = 0;
1715 		return;
1716 	}
1717 
1718 	/* Invalidated prio to tc mappings set to TC0 */
1719 	for (i = 1; i < TC_BITMASK + 1; i++) {
1720 		int q = netdev_get_prio_tc_map(dev, i);
1721 
1722 		tc = &dev->tc_to_txq[q];
1723 		if (tc->offset + tc->count > txq) {
1724 			pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",
1725 				i, q);
1726 			netdev_set_prio_tc_map(dev, i, 0);
1727 		}
1728 	}
1729 }
1730 
1731 /*
1732  * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
1733  * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
1734  */
1735 int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
1736 {
1737 	int rc;
1738 
1739 	if (txq < 1 || txq > dev->num_tx_queues)
1740 		return -EINVAL;
1741 
1742 	if (dev->reg_state == NETREG_REGISTERED ||
1743 	    dev->reg_state == NETREG_UNREGISTERING) {
1744 		ASSERT_RTNL();
1745 
1746 		rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
1747 						  txq);
1748 		if (rc)
1749 			return rc;
1750 
1751 		if (dev->num_tc)
1752 			netif_setup_tc(dev, txq);
1753 
1754 		if (txq < dev->real_num_tx_queues)
1755 			qdisc_reset_all_tx_gt(dev, txq);
1756 	}
1757 
1758 	dev->real_num_tx_queues = txq;
1759 	return 0;
1760 }
1761 EXPORT_SYMBOL(netif_set_real_num_tx_queues);
1762 
1763 #ifdef CONFIG_RPS
1764 /**
1765  *	netif_set_real_num_rx_queues - set actual number of RX queues used
1766  *	@dev: Network device
1767  *	@rxq: Actual number of RX queues
1768  *
1769  *	This must be called either with the rtnl_lock held or before
1770  *	registration of the net device.  Returns 0 on success, or a
1771  *	negative error code.  If called before registration, it always
1772  *	succeeds.
1773  */
1774 int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
1775 {
1776 	int rc;
1777 
1778 	if (rxq < 1 || rxq > dev->num_rx_queues)
1779 		return -EINVAL;
1780 
1781 	if (dev->reg_state == NETREG_REGISTERED) {
1782 		ASSERT_RTNL();
1783 
1784 		rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
1785 						  rxq);
1786 		if (rc)
1787 			return rc;
1788 	}
1789 
1790 	dev->real_num_rx_queues = rxq;
1791 	return 0;
1792 }
1793 EXPORT_SYMBOL(netif_set_real_num_rx_queues);
1794 #endif
1795 
1796 static inline void __netif_reschedule(struct Qdisc *q)
1797 {
1798 	struct softnet_data *sd;
1799 	unsigned long flags;
1800 
1801 	local_irq_save(flags);
1802 	sd = &__get_cpu_var(softnet_data);
1803 	q->next_sched = NULL;
1804 	*sd->output_queue_tailp = q;
1805 	sd->output_queue_tailp = &q->next_sched;
1806 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
1807 	local_irq_restore(flags);
1808 }
1809 
1810 void __netif_schedule(struct Qdisc *q)
1811 {
1812 	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1813 		__netif_reschedule(q);
1814 }
1815 EXPORT_SYMBOL(__netif_schedule);
1816 
1817 void dev_kfree_skb_irq(struct sk_buff *skb)
1818 {
1819 	if (atomic_dec_and_test(&skb->users)) {
1820 		struct softnet_data *sd;
1821 		unsigned long flags;
1822 
1823 		local_irq_save(flags);
1824 		sd = &__get_cpu_var(softnet_data);
1825 		skb->next = sd->completion_queue;
1826 		sd->completion_queue = skb;
1827 		raise_softirq_irqoff(NET_TX_SOFTIRQ);
1828 		local_irq_restore(flags);
1829 	}
1830 }
1831 EXPORT_SYMBOL(dev_kfree_skb_irq);
1832 
1833 void dev_kfree_skb_any(struct sk_buff *skb)
1834 {
1835 	if (in_irq() || irqs_disabled())
1836 		dev_kfree_skb_irq(skb);
1837 	else
1838 		dev_kfree_skb(skb);
1839 }
1840 EXPORT_SYMBOL(dev_kfree_skb_any);
1841 
1842 
1843 /**
1844  * netif_device_detach - mark device as removed
1845  * @dev: network device
1846  *
1847  * Mark device as removed from system and therefore no longer available.
1848  */
1849 void netif_device_detach(struct net_device *dev)
1850 {
1851 	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1852 	    netif_running(dev)) {
1853 		netif_tx_stop_all_queues(dev);
1854 	}
1855 }
1856 EXPORT_SYMBOL(netif_device_detach);
1857 
1858 /**
1859  * netif_device_attach - mark device as attached
1860  * @dev: network device
1861  *
1862  * Mark device as attached from system and restart if needed.
1863  */
1864 void netif_device_attach(struct net_device *dev)
1865 {
1866 	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1867 	    netif_running(dev)) {
1868 		netif_tx_wake_all_queues(dev);
1869 		__netdev_watchdog_up(dev);
1870 	}
1871 }
1872 EXPORT_SYMBOL(netif_device_attach);
1873 
1874 static void skb_warn_bad_offload(const struct sk_buff *skb)
1875 {
1876 	static const netdev_features_t null_features = 0;
1877 	struct net_device *dev = skb->dev;
1878 	const char *driver = "";
1879 
1880 	if (dev && dev->dev.parent)
1881 		driver = dev_driver_string(dev->dev.parent);
1882 
1883 	WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "
1884 	     "gso_type=%d ip_summed=%d\n",
1885 	     driver, dev ? &dev->features : &null_features,
1886 	     skb->sk ? &skb->sk->sk_route_caps : &null_features,
1887 	     skb->len, skb->data_len, skb_shinfo(skb)->gso_size,
1888 	     skb_shinfo(skb)->gso_type, skb->ip_summed);
1889 }
1890 
1891 /*
1892  * Invalidate hardware checksum when packet is to be mangled, and
1893  * complete checksum manually on outgoing path.
1894  */
1895 int skb_checksum_help(struct sk_buff *skb)
1896 {
1897 	__wsum csum;
1898 	int ret = 0, offset;
1899 
1900 	if (skb->ip_summed == CHECKSUM_COMPLETE)
1901 		goto out_set_summed;
1902 
1903 	if (unlikely(skb_shinfo(skb)->gso_size)) {
1904 		skb_warn_bad_offload(skb);
1905 		return -EINVAL;
1906 	}
1907 
1908 	offset = skb_checksum_start_offset(skb);
1909 	BUG_ON(offset >= skb_headlen(skb));
1910 	csum = skb_checksum(skb, offset, skb->len - offset, 0);
1911 
1912 	offset += skb->csum_offset;
1913 	BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1914 
1915 	if (skb_cloned(skb) &&
1916 	    !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1917 		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1918 		if (ret)
1919 			goto out;
1920 	}
1921 
1922 	*(__sum16 *)(skb->data + offset) = csum_fold(csum);
1923 out_set_summed:
1924 	skb->ip_summed = CHECKSUM_NONE;
1925 out:
1926 	return ret;
1927 }
1928 EXPORT_SYMBOL(skb_checksum_help);
1929 
1930 /**
1931  *	skb_gso_segment - Perform segmentation on skb.
1932  *	@skb: buffer to segment
1933  *	@features: features for the output path (see dev->features)
1934  *
1935  *	This function segments the given skb and returns a list of segments.
1936  *
1937  *	It may return NULL if the skb requires no segmentation.  This is
1938  *	only possible when GSO is used for verifying header integrity.
1939  */
1940 struct sk_buff *skb_gso_segment(struct sk_buff *skb,
1941 	netdev_features_t features)
1942 {
1943 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1944 	struct packet_type *ptype;
1945 	__be16 type = skb->protocol;
1946 	int vlan_depth = ETH_HLEN;
1947 	int err;
1948 
1949 	while (type == htons(ETH_P_8021Q)) {
1950 		struct vlan_hdr *vh;
1951 
1952 		if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
1953 			return ERR_PTR(-EINVAL);
1954 
1955 		vh = (struct vlan_hdr *)(skb->data + vlan_depth);
1956 		type = vh->h_vlan_encapsulated_proto;
1957 		vlan_depth += VLAN_HLEN;
1958 	}
1959 
1960 	skb_reset_mac_header(skb);
1961 	skb->mac_len = skb->network_header - skb->mac_header;
1962 	__skb_pull(skb, skb->mac_len);
1963 
1964 	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1965 		skb_warn_bad_offload(skb);
1966 
1967 		if (skb_header_cloned(skb) &&
1968 		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1969 			return ERR_PTR(err);
1970 	}
1971 
1972 	rcu_read_lock();
1973 	list_for_each_entry_rcu(ptype,
1974 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
1975 		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1976 			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1977 				err = ptype->gso_send_check(skb);
1978 				segs = ERR_PTR(err);
1979 				if (err || skb_gso_ok(skb, features))
1980 					break;
1981 				__skb_push(skb, (skb->data -
1982 						 skb_network_header(skb)));
1983 			}
1984 			segs = ptype->gso_segment(skb, features);
1985 			break;
1986 		}
1987 	}
1988 	rcu_read_unlock();
1989 
1990 	__skb_push(skb, skb->data - skb_mac_header(skb));
1991 
1992 	return segs;
1993 }
1994 EXPORT_SYMBOL(skb_gso_segment);
1995 
1996 /* Take action when hardware reception checksum errors are detected. */
1997 #ifdef CONFIG_BUG
1998 void netdev_rx_csum_fault(struct net_device *dev)
1999 {
2000 	if (net_ratelimit()) {
2001 		pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
2002 		dump_stack();
2003 	}
2004 }
2005 EXPORT_SYMBOL(netdev_rx_csum_fault);
2006 #endif
2007 
2008 /* Actually, we should eliminate this check as soon as we know, that:
2009  * 1. IOMMU is present and allows to map all the memory.
2010  * 2. No high memory really exists on this machine.
2011  */
2012 
2013 static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
2014 {
2015 #ifdef CONFIG_HIGHMEM
2016 	int i;
2017 	if (!(dev->features & NETIF_F_HIGHDMA)) {
2018 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2019 			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2020 			if (PageHighMem(skb_frag_page(frag)))
2021 				return 1;
2022 		}
2023 	}
2024 
2025 	if (PCI_DMA_BUS_IS_PHYS) {
2026 		struct device *pdev = dev->dev.parent;
2027 
2028 		if (!pdev)
2029 			return 0;
2030 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2031 			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2032 			dma_addr_t addr = page_to_phys(skb_frag_page(frag));
2033 			if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
2034 				return 1;
2035 		}
2036 	}
2037 #endif
2038 	return 0;
2039 }
2040 
2041 struct dev_gso_cb {
2042 	void (*destructor)(struct sk_buff *skb);
2043 };
2044 
2045 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
2046 
2047 static void dev_gso_skb_destructor(struct sk_buff *skb)
2048 {
2049 	struct dev_gso_cb *cb;
2050 
2051 	do {
2052 		struct sk_buff *nskb = skb->next;
2053 
2054 		skb->next = nskb->next;
2055 		nskb->next = NULL;
2056 		kfree_skb(nskb);
2057 	} while (skb->next);
2058 
2059 	cb = DEV_GSO_CB(skb);
2060 	if (cb->destructor)
2061 		cb->destructor(skb);
2062 }
2063 
2064 /**
2065  *	dev_gso_segment - Perform emulated hardware segmentation on skb.
2066  *	@skb: buffer to segment
2067  *	@features: device features as applicable to this skb
2068  *
2069  *	This function segments the given skb and stores the list of segments
2070  *	in skb->next.
2071  */
2072 static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
2073 {
2074 	struct sk_buff *segs;
2075 
2076 	segs = skb_gso_segment(skb, features);
2077 
2078 	/* Verifying header integrity only. */
2079 	if (!segs)
2080 		return 0;
2081 
2082 	if (IS_ERR(segs))
2083 		return PTR_ERR(segs);
2084 
2085 	skb->next = segs;
2086 	DEV_GSO_CB(skb)->destructor = skb->destructor;
2087 	skb->destructor = dev_gso_skb_destructor;
2088 
2089 	return 0;
2090 }
2091 
2092 /*
2093  * Try to orphan skb early, right before transmission by the device.
2094  * We cannot orphan skb if tx timestamp is requested or the sk-reference
2095  * is needed on driver level for other reasons, e.g. see net/can/raw.c
2096  */
2097 static inline void skb_orphan_try(struct sk_buff *skb)
2098 {
2099 	struct sock *sk = skb->sk;
2100 
2101 	if (sk && !skb_shinfo(skb)->tx_flags) {
2102 		/* skb_tx_hash() wont be able to get sk.
2103 		 * We copy sk_hash into skb->rxhash
2104 		 */
2105 		if (!skb->rxhash)
2106 			skb->rxhash = sk->sk_hash;
2107 		skb_orphan(skb);
2108 	}
2109 }
2110 
2111 static bool can_checksum_protocol(netdev_features_t features, __be16 protocol)
2112 {
2113 	return ((features & NETIF_F_GEN_CSUM) ||
2114 		((features & NETIF_F_V4_CSUM) &&
2115 		 protocol == htons(ETH_P_IP)) ||
2116 		((features & NETIF_F_V6_CSUM) &&
2117 		 protocol == htons(ETH_P_IPV6)) ||
2118 		((features & NETIF_F_FCOE_CRC) &&
2119 		 protocol == htons(ETH_P_FCOE)));
2120 }
2121 
2122 static netdev_features_t harmonize_features(struct sk_buff *skb,
2123 	__be16 protocol, netdev_features_t features)
2124 {
2125 	if (!can_checksum_protocol(features, protocol)) {
2126 		features &= ~NETIF_F_ALL_CSUM;
2127 		features &= ~NETIF_F_SG;
2128 	} else if (illegal_highdma(skb->dev, skb)) {
2129 		features &= ~NETIF_F_SG;
2130 	}
2131 
2132 	return features;
2133 }
2134 
2135 netdev_features_t netif_skb_features(struct sk_buff *skb)
2136 {
2137 	__be16 protocol = skb->protocol;
2138 	netdev_features_t features = skb->dev->features;
2139 
2140 	if (protocol == htons(ETH_P_8021Q)) {
2141 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
2142 		protocol = veh->h_vlan_encapsulated_proto;
2143 	} else if (!vlan_tx_tag_present(skb)) {
2144 		return harmonize_features(skb, protocol, features);
2145 	}
2146 
2147 	features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_TX);
2148 
2149 	if (protocol != htons(ETH_P_8021Q)) {
2150 		return harmonize_features(skb, protocol, features);
2151 	} else {
2152 		features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
2153 				NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_TX;
2154 		return harmonize_features(skb, protocol, features);
2155 	}
2156 }
2157 EXPORT_SYMBOL(netif_skb_features);
2158 
2159 /*
2160  * Returns true if either:
2161  *	1. skb has frag_list and the device doesn't support FRAGLIST, or
2162  *	2. skb is fragmented and the device does not support SG, or if
2163  *	   at least one of fragments is in highmem and device does not
2164  *	   support DMA from it.
2165  */
2166 static inline int skb_needs_linearize(struct sk_buff *skb,
2167 				      int features)
2168 {
2169 	return skb_is_nonlinear(skb) &&
2170 			((skb_has_frag_list(skb) &&
2171 				!(features & NETIF_F_FRAGLIST)) ||
2172 			(skb_shinfo(skb)->nr_frags &&
2173 				!(features & NETIF_F_SG)));
2174 }
2175 
2176 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2177 			struct netdev_queue *txq)
2178 {
2179 	const struct net_device_ops *ops = dev->netdev_ops;
2180 	int rc = NETDEV_TX_OK;
2181 	unsigned int skb_len;
2182 
2183 	if (likely(!skb->next)) {
2184 		netdev_features_t features;
2185 
2186 		/*
2187 		 * If device doesn't need skb->dst, release it right now while
2188 		 * its hot in this cpu cache
2189 		 */
2190 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
2191 			skb_dst_drop(skb);
2192 
2193 		if (!list_empty(&ptype_all))
2194 			dev_queue_xmit_nit(skb, dev);
2195 
2196 		skb_orphan_try(skb);
2197 
2198 		features = netif_skb_features(skb);
2199 
2200 		if (vlan_tx_tag_present(skb) &&
2201 		    !(features & NETIF_F_HW_VLAN_TX)) {
2202 			skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
2203 			if (unlikely(!skb))
2204 				goto out;
2205 
2206 			skb->vlan_tci = 0;
2207 		}
2208 
2209 		if (netif_needs_gso(skb, features)) {
2210 			if (unlikely(dev_gso_segment(skb, features)))
2211 				goto out_kfree_skb;
2212 			if (skb->next)
2213 				goto gso;
2214 		} else {
2215 			if (skb_needs_linearize(skb, features) &&
2216 			    __skb_linearize(skb))
2217 				goto out_kfree_skb;
2218 
2219 			/* If packet is not checksummed and device does not
2220 			 * support checksumming for this protocol, complete
2221 			 * checksumming here.
2222 			 */
2223 			if (skb->ip_summed == CHECKSUM_PARTIAL) {
2224 				skb_set_transport_header(skb,
2225 					skb_checksum_start_offset(skb));
2226 				if (!(features & NETIF_F_ALL_CSUM) &&
2227 				     skb_checksum_help(skb))
2228 					goto out_kfree_skb;
2229 			}
2230 		}
2231 
2232 		skb_len = skb->len;
2233 		rc = ops->ndo_start_xmit(skb, dev);
2234 		trace_net_dev_xmit(skb, rc, dev, skb_len);
2235 		if (rc == NETDEV_TX_OK)
2236 			txq_trans_update(txq);
2237 		return rc;
2238 	}
2239 
2240 gso:
2241 	do {
2242 		struct sk_buff *nskb = skb->next;
2243 
2244 		skb->next = nskb->next;
2245 		nskb->next = NULL;
2246 
2247 		/*
2248 		 * If device doesn't need nskb->dst, release it right now while
2249 		 * its hot in this cpu cache
2250 		 */
2251 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
2252 			skb_dst_drop(nskb);
2253 
2254 		skb_len = nskb->len;
2255 		rc = ops->ndo_start_xmit(nskb, dev);
2256 		trace_net_dev_xmit(nskb, rc, dev, skb_len);
2257 		if (unlikely(rc != NETDEV_TX_OK)) {
2258 			if (rc & ~NETDEV_TX_MASK)
2259 				goto out_kfree_gso_skb;
2260 			nskb->next = skb->next;
2261 			skb->next = nskb;
2262 			return rc;
2263 		}
2264 		txq_trans_update(txq);
2265 		if (unlikely(netif_xmit_stopped(txq) && skb->next))
2266 			return NETDEV_TX_BUSY;
2267 	} while (skb->next);
2268 
2269 out_kfree_gso_skb:
2270 	if (likely(skb->next == NULL))
2271 		skb->destructor = DEV_GSO_CB(skb)->destructor;
2272 out_kfree_skb:
2273 	kfree_skb(skb);
2274 out:
2275 	return rc;
2276 }
2277 
2278 static u32 hashrnd __read_mostly;
2279 
2280 /*
2281  * Returns a Tx hash based on the given packet descriptor a Tx queues' number
2282  * to be used as a distribution range.
2283  */
2284 u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
2285 		  unsigned int num_tx_queues)
2286 {
2287 	u32 hash;
2288 	u16 qoffset = 0;
2289 	u16 qcount = num_tx_queues;
2290 
2291 	if (skb_rx_queue_recorded(skb)) {
2292 		hash = skb_get_rx_queue(skb);
2293 		while (unlikely(hash >= num_tx_queues))
2294 			hash -= num_tx_queues;
2295 		return hash;
2296 	}
2297 
2298 	if (dev->num_tc) {
2299 		u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
2300 		qoffset = dev->tc_to_txq[tc].offset;
2301 		qcount = dev->tc_to_txq[tc].count;
2302 	}
2303 
2304 	if (skb->sk && skb->sk->sk_hash)
2305 		hash = skb->sk->sk_hash;
2306 	else
2307 		hash = (__force u16) skb->protocol ^ skb->rxhash;
2308 	hash = jhash_1word(hash, hashrnd);
2309 
2310 	return (u16) (((u64) hash * qcount) >> 32) + qoffset;
2311 }
2312 EXPORT_SYMBOL(__skb_tx_hash);
2313 
2314 static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
2315 {
2316 	if (unlikely(queue_index >= dev->real_num_tx_queues)) {
2317 		net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n",
2318 				     dev->name, queue_index,
2319 				     dev->real_num_tx_queues);
2320 		return 0;
2321 	}
2322 	return queue_index;
2323 }
2324 
2325 static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
2326 {
2327 #ifdef CONFIG_XPS
2328 	struct xps_dev_maps *dev_maps;
2329 	struct xps_map *map;
2330 	int queue_index = -1;
2331 
2332 	rcu_read_lock();
2333 	dev_maps = rcu_dereference(dev->xps_maps);
2334 	if (dev_maps) {
2335 		map = rcu_dereference(
2336 		    dev_maps->cpu_map[raw_smp_processor_id()]);
2337 		if (map) {
2338 			if (map->len == 1)
2339 				queue_index = map->queues[0];
2340 			else {
2341 				u32 hash;
2342 				if (skb->sk && skb->sk->sk_hash)
2343 					hash = skb->sk->sk_hash;
2344 				else
2345 					hash = (__force u16) skb->protocol ^
2346 					    skb->rxhash;
2347 				hash = jhash_1word(hash, hashrnd);
2348 				queue_index = map->queues[
2349 				    ((u64)hash * map->len) >> 32];
2350 			}
2351 			if (unlikely(queue_index >= dev->real_num_tx_queues))
2352 				queue_index = -1;
2353 		}
2354 	}
2355 	rcu_read_unlock();
2356 
2357 	return queue_index;
2358 #else
2359 	return -1;
2360 #endif
2361 }
2362 
2363 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
2364 					struct sk_buff *skb)
2365 {
2366 	int queue_index;
2367 	const struct net_device_ops *ops = dev->netdev_ops;
2368 
2369 	if (dev->real_num_tx_queues == 1)
2370 		queue_index = 0;
2371 	else if (ops->ndo_select_queue) {
2372 		queue_index = ops->ndo_select_queue(dev, skb);
2373 		queue_index = dev_cap_txqueue(dev, queue_index);
2374 	} else {
2375 		struct sock *sk = skb->sk;
2376 		queue_index = sk_tx_queue_get(sk);
2377 
2378 		if (queue_index < 0 || skb->ooo_okay ||
2379 		    queue_index >= dev->real_num_tx_queues) {
2380 			int old_index = queue_index;
2381 
2382 			queue_index = get_xps_queue(dev, skb);
2383 			if (queue_index < 0)
2384 				queue_index = skb_tx_hash(dev, skb);
2385 
2386 			if (queue_index != old_index && sk) {
2387 				struct dst_entry *dst =
2388 				    rcu_dereference_check(sk->sk_dst_cache, 1);
2389 
2390 				if (dst && skb_dst(skb) == dst)
2391 					sk_tx_queue_set(sk, queue_index);
2392 			}
2393 		}
2394 	}
2395 
2396 	skb_set_queue_mapping(skb, queue_index);
2397 	return netdev_get_tx_queue(dev, queue_index);
2398 }
2399 
2400 static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2401 				 struct net_device *dev,
2402 				 struct netdev_queue *txq)
2403 {
2404 	spinlock_t *root_lock = qdisc_lock(q);
2405 	bool contended;
2406 	int rc;
2407 
2408 	qdisc_skb_cb(skb)->pkt_len = skb->len;
2409 	qdisc_calculate_pkt_len(skb, q);
2410 	/*
2411 	 * Heuristic to force contended enqueues to serialize on a
2412 	 * separate lock before trying to get qdisc main lock.
2413 	 * This permits __QDISC_STATE_RUNNING owner to get the lock more often
2414 	 * and dequeue packets faster.
2415 	 */
2416 	contended = qdisc_is_running(q);
2417 	if (unlikely(contended))
2418 		spin_lock(&q->busylock);
2419 
2420 	spin_lock(root_lock);
2421 	if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
2422 		kfree_skb(skb);
2423 		rc = NET_XMIT_DROP;
2424 	} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
2425 		   qdisc_run_begin(q)) {
2426 		/*
2427 		 * This is a work-conserving queue; there are no old skbs
2428 		 * waiting to be sent out; and the qdisc is not running -
2429 		 * xmit the skb directly.
2430 		 */
2431 		if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
2432 			skb_dst_force(skb);
2433 
2434 		qdisc_bstats_update(q, skb);
2435 
2436 		if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
2437 			if (unlikely(contended)) {
2438 				spin_unlock(&q->busylock);
2439 				contended = false;
2440 			}
2441 			__qdisc_run(q);
2442 		} else
2443 			qdisc_run_end(q);
2444 
2445 		rc = NET_XMIT_SUCCESS;
2446 	} else {
2447 		skb_dst_force(skb);
2448 		rc = q->enqueue(skb, q) & NET_XMIT_MASK;
2449 		if (qdisc_run_begin(q)) {
2450 			if (unlikely(contended)) {
2451 				spin_unlock(&q->busylock);
2452 				contended = false;
2453 			}
2454 			__qdisc_run(q);
2455 		}
2456 	}
2457 	spin_unlock(root_lock);
2458 	if (unlikely(contended))
2459 		spin_unlock(&q->busylock);
2460 	return rc;
2461 }
2462 
2463 #if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
2464 static void skb_update_prio(struct sk_buff *skb)
2465 {
2466 	struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
2467 
2468 	if ((!skb->priority) && (skb->sk) && map)
2469 		skb->priority = map->priomap[skb->sk->sk_cgrp_prioidx];
2470 }
2471 #else
2472 #define skb_update_prio(skb)
2473 #endif
2474 
2475 static DEFINE_PER_CPU(int, xmit_recursion);
2476 #define RECURSION_LIMIT 10
2477 
2478 /**
2479  *	dev_queue_xmit - transmit a buffer
2480  *	@skb: buffer to transmit
2481  *
2482  *	Queue a buffer for transmission to a network device. The caller must
2483  *	have set the device and priority and built the buffer before calling
2484  *	this function. The function can be called from an interrupt.
2485  *
2486  *	A negative errno code is returned on a failure. A success does not
2487  *	guarantee the frame will be transmitted as it may be dropped due
2488  *	to congestion or traffic shaping.
2489  *
2490  * -----------------------------------------------------------------------------------
2491  *      I notice this method can also return errors from the queue disciplines,
2492  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
2493  *      be positive.
2494  *
2495  *      Regardless of the return value, the skb is consumed, so it is currently
2496  *      difficult to retry a send to this method.  (You can bump the ref count
2497  *      before sending to hold a reference for retry if you are careful.)
2498  *
2499  *      When calling this method, interrupts MUST be enabled.  This is because
2500  *      the BH enable code must have IRQs enabled so that it will not deadlock.
2501  *          --BLG
2502  */
2503 int dev_queue_xmit(struct sk_buff *skb)
2504 {
2505 	struct net_device *dev = skb->dev;
2506 	struct netdev_queue *txq;
2507 	struct Qdisc *q;
2508 	int rc = -ENOMEM;
2509 
2510 	/* Disable soft irqs for various locks below. Also
2511 	 * stops preemption for RCU.
2512 	 */
2513 	rcu_read_lock_bh();
2514 
2515 	skb_update_prio(skb);
2516 
2517 	txq = dev_pick_tx(dev, skb);
2518 	q = rcu_dereference_bh(txq->qdisc);
2519 
2520 #ifdef CONFIG_NET_CLS_ACT
2521 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
2522 #endif
2523 	trace_net_dev_queue(skb);
2524 	if (q->enqueue) {
2525 		rc = __dev_xmit_skb(skb, q, dev, txq);
2526 		goto out;
2527 	}
2528 
2529 	/* The device has no queue. Common case for software devices:
2530 	   loopback, all the sorts of tunnels...
2531 
2532 	   Really, it is unlikely that netif_tx_lock protection is necessary
2533 	   here.  (f.e. loopback and IP tunnels are clean ignoring statistics
2534 	   counters.)
2535 	   However, it is possible, that they rely on protection
2536 	   made by us here.
2537 
2538 	   Check this and shot the lock. It is not prone from deadlocks.
2539 	   Either shot noqueue qdisc, it is even simpler 8)
2540 	 */
2541 	if (dev->flags & IFF_UP) {
2542 		int cpu = smp_processor_id(); /* ok because BHs are off */
2543 
2544 		if (txq->xmit_lock_owner != cpu) {
2545 
2546 			if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
2547 				goto recursion_alert;
2548 
2549 			HARD_TX_LOCK(dev, txq, cpu);
2550 
2551 			if (!netif_xmit_stopped(txq)) {
2552 				__this_cpu_inc(xmit_recursion);
2553 				rc = dev_hard_start_xmit(skb, dev, txq);
2554 				__this_cpu_dec(xmit_recursion);
2555 				if (dev_xmit_complete(rc)) {
2556 					HARD_TX_UNLOCK(dev, txq);
2557 					goto out;
2558 				}
2559 			}
2560 			HARD_TX_UNLOCK(dev, txq);
2561 			net_crit_ratelimited("Virtual device %s asks to queue packet!\n",
2562 					     dev->name);
2563 		} else {
2564 			/* Recursion is detected! It is possible,
2565 			 * unfortunately
2566 			 */
2567 recursion_alert:
2568 			net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",
2569 					     dev->name);
2570 		}
2571 	}
2572 
2573 	rc = -ENETDOWN;
2574 	rcu_read_unlock_bh();
2575 
2576 	kfree_skb(skb);
2577 	return rc;
2578 out:
2579 	rcu_read_unlock_bh();
2580 	return rc;
2581 }
2582 EXPORT_SYMBOL(dev_queue_xmit);
2583 
2584 
2585 /*=======================================================================
2586 			Receiver routines
2587   =======================================================================*/
2588 
2589 int netdev_max_backlog __read_mostly = 1000;
2590 int netdev_tstamp_prequeue __read_mostly = 1;
2591 int netdev_budget __read_mostly = 300;
2592 int weight_p __read_mostly = 64;            /* old backlog weight */
2593 
2594 /* Called with irq disabled */
2595 static inline void ____napi_schedule(struct softnet_data *sd,
2596 				     struct napi_struct *napi)
2597 {
2598 	list_add_tail(&napi->poll_list, &sd->poll_list);
2599 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2600 }
2601 
2602 /*
2603  * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
2604  * and src/dst port numbers.  Sets rxhash in skb to non-zero hash value
2605  * on success, zero indicates no valid hash.  Also, sets l4_rxhash in skb
2606  * if hash is a canonical 4-tuple hash over transport ports.
2607  */
2608 void __skb_get_rxhash(struct sk_buff *skb)
2609 {
2610 	struct flow_keys keys;
2611 	u32 hash;
2612 
2613 	if (!skb_flow_dissect(skb, &keys))
2614 		return;
2615 
2616 	if (keys.ports) {
2617 		if ((__force u16)keys.port16[1] < (__force u16)keys.port16[0])
2618 			swap(keys.port16[0], keys.port16[1]);
2619 		skb->l4_rxhash = 1;
2620 	}
2621 
2622 	/* get a consistent hash (same value on both flow directions) */
2623 	if ((__force u32)keys.dst < (__force u32)keys.src)
2624 		swap(keys.dst, keys.src);
2625 
2626 	hash = jhash_3words((__force u32)keys.dst,
2627 			    (__force u32)keys.src,
2628 			    (__force u32)keys.ports, hashrnd);
2629 	if (!hash)
2630 		hash = 1;
2631 
2632 	skb->rxhash = hash;
2633 }
2634 EXPORT_SYMBOL(__skb_get_rxhash);
2635 
2636 #ifdef CONFIG_RPS
2637 
2638 /* One global table that all flow-based protocols share. */
2639 struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
2640 EXPORT_SYMBOL(rps_sock_flow_table);
2641 
2642 struct static_key rps_needed __read_mostly;
2643 
2644 static struct rps_dev_flow *
2645 set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2646 	    struct rps_dev_flow *rflow, u16 next_cpu)
2647 {
2648 	if (next_cpu != RPS_NO_CPU) {
2649 #ifdef CONFIG_RFS_ACCEL
2650 		struct netdev_rx_queue *rxqueue;
2651 		struct rps_dev_flow_table *flow_table;
2652 		struct rps_dev_flow *old_rflow;
2653 		u32 flow_id;
2654 		u16 rxq_index;
2655 		int rc;
2656 
2657 		/* Should we steer this flow to a different hardware queue? */
2658 		if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
2659 		    !(dev->features & NETIF_F_NTUPLE))
2660 			goto out;
2661 		rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
2662 		if (rxq_index == skb_get_rx_queue(skb))
2663 			goto out;
2664 
2665 		rxqueue = dev->_rx + rxq_index;
2666 		flow_table = rcu_dereference(rxqueue->rps_flow_table);
2667 		if (!flow_table)
2668 			goto out;
2669 		flow_id = skb->rxhash & flow_table->mask;
2670 		rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
2671 							rxq_index, flow_id);
2672 		if (rc < 0)
2673 			goto out;
2674 		old_rflow = rflow;
2675 		rflow = &flow_table->flows[flow_id];
2676 		rflow->filter = rc;
2677 		if (old_rflow->filter == rflow->filter)
2678 			old_rflow->filter = RPS_NO_FILTER;
2679 	out:
2680 #endif
2681 		rflow->last_qtail =
2682 			per_cpu(softnet_data, next_cpu).input_queue_head;
2683 	}
2684 
2685 	rflow->cpu = next_cpu;
2686 	return rflow;
2687 }
2688 
2689 /*
2690  * get_rps_cpu is called from netif_receive_skb and returns the target
2691  * CPU from the RPS map of the receiving queue for a given skb.
2692  * rcu_read_lock must be held on entry.
2693  */
2694 static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2695 		       struct rps_dev_flow **rflowp)
2696 {
2697 	struct netdev_rx_queue *rxqueue;
2698 	struct rps_map *map;
2699 	struct rps_dev_flow_table *flow_table;
2700 	struct rps_sock_flow_table *sock_flow_table;
2701 	int cpu = -1;
2702 	u16 tcpu;
2703 
2704 	if (skb_rx_queue_recorded(skb)) {
2705 		u16 index = skb_get_rx_queue(skb);
2706 		if (unlikely(index >= dev->real_num_rx_queues)) {
2707 			WARN_ONCE(dev->real_num_rx_queues > 1,
2708 				  "%s received packet on queue %u, but number "
2709 				  "of RX queues is %u\n",
2710 				  dev->name, index, dev->real_num_rx_queues);
2711 			goto done;
2712 		}
2713 		rxqueue = dev->_rx + index;
2714 	} else
2715 		rxqueue = dev->_rx;
2716 
2717 	map = rcu_dereference(rxqueue->rps_map);
2718 	if (map) {
2719 		if (map->len == 1 &&
2720 		    !rcu_access_pointer(rxqueue->rps_flow_table)) {
2721 			tcpu = map->cpus[0];
2722 			if (cpu_online(tcpu))
2723 				cpu = tcpu;
2724 			goto done;
2725 		}
2726 	} else if (!rcu_access_pointer(rxqueue->rps_flow_table)) {
2727 		goto done;
2728 	}
2729 
2730 	skb_reset_network_header(skb);
2731 	if (!skb_get_rxhash(skb))
2732 		goto done;
2733 
2734 	flow_table = rcu_dereference(rxqueue->rps_flow_table);
2735 	sock_flow_table = rcu_dereference(rps_sock_flow_table);
2736 	if (flow_table && sock_flow_table) {
2737 		u16 next_cpu;
2738 		struct rps_dev_flow *rflow;
2739 
2740 		rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
2741 		tcpu = rflow->cpu;
2742 
2743 		next_cpu = sock_flow_table->ents[skb->rxhash &
2744 		    sock_flow_table->mask];
2745 
2746 		/*
2747 		 * If the desired CPU (where last recvmsg was done) is
2748 		 * different from current CPU (one in the rx-queue flow
2749 		 * table entry), switch if one of the following holds:
2750 		 *   - Current CPU is unset (equal to RPS_NO_CPU).
2751 		 *   - Current CPU is offline.
2752 		 *   - The current CPU's queue tail has advanced beyond the
2753 		 *     last packet that was enqueued using this table entry.
2754 		 *     This guarantees that all previous packets for the flow
2755 		 *     have been dequeued, thus preserving in order delivery.
2756 		 */
2757 		if (unlikely(tcpu != next_cpu) &&
2758 		    (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
2759 		     ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
2760 		      rflow->last_qtail)) >= 0))
2761 			rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
2762 
2763 		if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
2764 			*rflowp = rflow;
2765 			cpu = tcpu;
2766 			goto done;
2767 		}
2768 	}
2769 
2770 	if (map) {
2771 		tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
2772 
2773 		if (cpu_online(tcpu)) {
2774 			cpu = tcpu;
2775 			goto done;
2776 		}
2777 	}
2778 
2779 done:
2780 	return cpu;
2781 }
2782 
2783 #ifdef CONFIG_RFS_ACCEL
2784 
2785 /**
2786  * rps_may_expire_flow - check whether an RFS hardware filter may be removed
2787  * @dev: Device on which the filter was set
2788  * @rxq_index: RX queue index
2789  * @flow_id: Flow ID passed to ndo_rx_flow_steer()
2790  * @filter_id: Filter ID returned by ndo_rx_flow_steer()
2791  *
2792  * Drivers that implement ndo_rx_flow_steer() should periodically call
2793  * this function for each installed filter and remove the filters for
2794  * which it returns %true.
2795  */
2796 bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
2797 			 u32 flow_id, u16 filter_id)
2798 {
2799 	struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
2800 	struct rps_dev_flow_table *flow_table;
2801 	struct rps_dev_flow *rflow;
2802 	bool expire = true;
2803 	int cpu;
2804 
2805 	rcu_read_lock();
2806 	flow_table = rcu_dereference(rxqueue->rps_flow_table);
2807 	if (flow_table && flow_id <= flow_table->mask) {
2808 		rflow = &flow_table->flows[flow_id];
2809 		cpu = ACCESS_ONCE(rflow->cpu);
2810 		if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&
2811 		    ((int)(per_cpu(softnet_data, cpu).input_queue_head -
2812 			   rflow->last_qtail) <
2813 		     (int)(10 * flow_table->mask)))
2814 			expire = false;
2815 	}
2816 	rcu_read_unlock();
2817 	return expire;
2818 }
2819 EXPORT_SYMBOL(rps_may_expire_flow);
2820 
2821 #endif /* CONFIG_RFS_ACCEL */
2822 
2823 /* Called from hardirq (IPI) context */
2824 static void rps_trigger_softirq(void *data)
2825 {
2826 	struct softnet_data *sd = data;
2827 
2828 	____napi_schedule(sd, &sd->backlog);
2829 	sd->received_rps++;
2830 }
2831 
2832 #endif /* CONFIG_RPS */
2833 
2834 /*
2835  * Check if this softnet_data structure is another cpu one
2836  * If yes, queue it to our IPI list and return 1
2837  * If no, return 0
2838  */
2839 static int rps_ipi_queued(struct softnet_data *sd)
2840 {
2841 #ifdef CONFIG_RPS
2842 	struct softnet_data *mysd = &__get_cpu_var(softnet_data);
2843 
2844 	if (sd != mysd) {
2845 		sd->rps_ipi_next = mysd->rps_ipi_list;
2846 		mysd->rps_ipi_list = sd;
2847 
2848 		__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2849 		return 1;
2850 	}
2851 #endif /* CONFIG_RPS */
2852 	return 0;
2853 }
2854 
2855 /*
2856  * enqueue_to_backlog is called to queue an skb to a per CPU backlog
2857  * queue (may be a remote CPU queue).
2858  */
2859 static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
2860 			      unsigned int *qtail)
2861 {
2862 	struct softnet_data *sd;
2863 	unsigned long flags;
2864 
2865 	sd = &per_cpu(softnet_data, cpu);
2866 
2867 	local_irq_save(flags);
2868 
2869 	rps_lock(sd);
2870 	if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) {
2871 		if (skb_queue_len(&sd->input_pkt_queue)) {
2872 enqueue:
2873 			__skb_queue_tail(&sd->input_pkt_queue, skb);
2874 			input_queue_tail_incr_save(sd, qtail);
2875 			rps_unlock(sd);
2876 			local_irq_restore(flags);
2877 			return NET_RX_SUCCESS;
2878 		}
2879 
2880 		/* Schedule NAPI for backlog device
2881 		 * We can use non atomic operation since we own the queue lock
2882 		 */
2883 		if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
2884 			if (!rps_ipi_queued(sd))
2885 				____napi_schedule(sd, &sd->backlog);
2886 		}
2887 		goto enqueue;
2888 	}
2889 
2890 	sd->dropped++;
2891 	rps_unlock(sd);
2892 
2893 	local_irq_restore(flags);
2894 
2895 	atomic_long_inc(&skb->dev->rx_dropped);
2896 	kfree_skb(skb);
2897 	return NET_RX_DROP;
2898 }
2899 
2900 /**
2901  *	netif_rx	-	post buffer to the network code
2902  *	@skb: buffer to post
2903  *
2904  *	This function receives a packet from a device driver and queues it for
2905  *	the upper (protocol) levels to process.  It always succeeds. The buffer
2906  *	may be dropped during processing for congestion control or by the
2907  *	protocol layers.
2908  *
2909  *	return values:
2910  *	NET_RX_SUCCESS	(no congestion)
2911  *	NET_RX_DROP     (packet was dropped)
2912  *
2913  */
2914 
2915 int netif_rx(struct sk_buff *skb)
2916 {
2917 	int ret;
2918 
2919 	/* if netpoll wants it, pretend we never saw it */
2920 	if (netpoll_rx(skb))
2921 		return NET_RX_DROP;
2922 
2923 	net_timestamp_check(netdev_tstamp_prequeue, skb);
2924 
2925 	trace_netif_rx(skb);
2926 #ifdef CONFIG_RPS
2927 	if (static_key_false(&rps_needed)) {
2928 		struct rps_dev_flow voidflow, *rflow = &voidflow;
2929 		int cpu;
2930 
2931 		preempt_disable();
2932 		rcu_read_lock();
2933 
2934 		cpu = get_rps_cpu(skb->dev, skb, &rflow);
2935 		if (cpu < 0)
2936 			cpu = smp_processor_id();
2937 
2938 		ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
2939 
2940 		rcu_read_unlock();
2941 		preempt_enable();
2942 	} else
2943 #endif
2944 	{
2945 		unsigned int qtail;
2946 		ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
2947 		put_cpu();
2948 	}
2949 	return ret;
2950 }
2951 EXPORT_SYMBOL(netif_rx);
2952 
2953 int netif_rx_ni(struct sk_buff *skb)
2954 {
2955 	int err;
2956 
2957 	preempt_disable();
2958 	err = netif_rx(skb);
2959 	if (local_softirq_pending())
2960 		do_softirq();
2961 	preempt_enable();
2962 
2963 	return err;
2964 }
2965 EXPORT_SYMBOL(netif_rx_ni);
2966 
2967 static void net_tx_action(struct softirq_action *h)
2968 {
2969 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
2970 
2971 	if (sd->completion_queue) {
2972 		struct sk_buff *clist;
2973 
2974 		local_irq_disable();
2975 		clist = sd->completion_queue;
2976 		sd->completion_queue = NULL;
2977 		local_irq_enable();
2978 
2979 		while (clist) {
2980 			struct sk_buff *skb = clist;
2981 			clist = clist->next;
2982 
2983 			WARN_ON(atomic_read(&skb->users));
2984 			trace_kfree_skb(skb, net_tx_action);
2985 			__kfree_skb(skb);
2986 		}
2987 	}
2988 
2989 	if (sd->output_queue) {
2990 		struct Qdisc *head;
2991 
2992 		local_irq_disable();
2993 		head = sd->output_queue;
2994 		sd->output_queue = NULL;
2995 		sd->output_queue_tailp = &sd->output_queue;
2996 		local_irq_enable();
2997 
2998 		while (head) {
2999 			struct Qdisc *q = head;
3000 			spinlock_t *root_lock;
3001 
3002 			head = head->next_sched;
3003 
3004 			root_lock = qdisc_lock(q);
3005 			if (spin_trylock(root_lock)) {
3006 				smp_mb__before_clear_bit();
3007 				clear_bit(__QDISC_STATE_SCHED,
3008 					  &q->state);
3009 				qdisc_run(q);
3010 				spin_unlock(root_lock);
3011 			} else {
3012 				if (!test_bit(__QDISC_STATE_DEACTIVATED,
3013 					      &q->state)) {
3014 					__netif_reschedule(q);
3015 				} else {
3016 					smp_mb__before_clear_bit();
3017 					clear_bit(__QDISC_STATE_SCHED,
3018 						  &q->state);
3019 				}
3020 			}
3021 		}
3022 	}
3023 }
3024 
3025 #if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
3026     (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
3027 /* This hook is defined here for ATM LANE */
3028 int (*br_fdb_test_addr_hook)(struct net_device *dev,
3029 			     unsigned char *addr) __read_mostly;
3030 EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
3031 #endif
3032 
3033 #ifdef CONFIG_NET_CLS_ACT
3034 /* TODO: Maybe we should just force sch_ingress to be compiled in
3035  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
3036  * a compare and 2 stores extra right now if we dont have it on
3037  * but have CONFIG_NET_CLS_ACT
3038  * NOTE: This doesn't stop any functionality; if you dont have
3039  * the ingress scheduler, you just can't add policies on ingress.
3040  *
3041  */
3042 static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
3043 {
3044 	struct net_device *dev = skb->dev;
3045 	u32 ttl = G_TC_RTTL(skb->tc_verd);
3046 	int result = TC_ACT_OK;
3047 	struct Qdisc *q;
3048 
3049 	if (unlikely(MAX_RED_LOOP < ttl++)) {
3050 		net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n",
3051 				     skb->skb_iif, dev->ifindex);
3052 		return TC_ACT_SHOT;
3053 	}
3054 
3055 	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
3056 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
3057 
3058 	q = rxq->qdisc;
3059 	if (q != &noop_qdisc) {
3060 		spin_lock(qdisc_lock(q));
3061 		if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
3062 			result = qdisc_enqueue_root(skb, q);
3063 		spin_unlock(qdisc_lock(q));
3064 	}
3065 
3066 	return result;
3067 }
3068 
3069 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
3070 					 struct packet_type **pt_prev,
3071 					 int *ret, struct net_device *orig_dev)
3072 {
3073 	struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
3074 
3075 	if (!rxq || rxq->qdisc == &noop_qdisc)
3076 		goto out;
3077 
3078 	if (*pt_prev) {
3079 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
3080 		*pt_prev = NULL;
3081 	}
3082 
3083 	switch (ing_filter(skb, rxq)) {
3084 	case TC_ACT_SHOT:
3085 	case TC_ACT_STOLEN:
3086 		kfree_skb(skb);
3087 		return NULL;
3088 	}
3089 
3090 out:
3091 	skb->tc_verd = 0;
3092 	return skb;
3093 }
3094 #endif
3095 
3096 /**
3097  *	netdev_rx_handler_register - register receive handler
3098  *	@dev: device to register a handler for
3099  *	@rx_handler: receive handler to register
3100  *	@rx_handler_data: data pointer that is used by rx handler
3101  *
3102  *	Register a receive hander for a device. This handler will then be
3103  *	called from __netif_receive_skb. A negative errno code is returned
3104  *	on a failure.
3105  *
3106  *	The caller must hold the rtnl_mutex.
3107  *
3108  *	For a general description of rx_handler, see enum rx_handler_result.
3109  */
3110 int netdev_rx_handler_register(struct net_device *dev,
3111 			       rx_handler_func_t *rx_handler,
3112 			       void *rx_handler_data)
3113 {
3114 	ASSERT_RTNL();
3115 
3116 	if (dev->rx_handler)
3117 		return -EBUSY;
3118 
3119 	rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
3120 	rcu_assign_pointer(dev->rx_handler, rx_handler);
3121 
3122 	return 0;
3123 }
3124 EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
3125 
3126 /**
3127  *	netdev_rx_handler_unregister - unregister receive handler
3128  *	@dev: device to unregister a handler from
3129  *
3130  *	Unregister a receive hander from a device.
3131  *
3132  *	The caller must hold the rtnl_mutex.
3133  */
3134 void netdev_rx_handler_unregister(struct net_device *dev)
3135 {
3136 
3137 	ASSERT_RTNL();
3138 	RCU_INIT_POINTER(dev->rx_handler, NULL);
3139 	RCU_INIT_POINTER(dev->rx_handler_data, NULL);
3140 }
3141 EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
3142 
3143 static int __netif_receive_skb(struct sk_buff *skb)
3144 {
3145 	struct packet_type *ptype, *pt_prev;
3146 	rx_handler_func_t *rx_handler;
3147 	struct net_device *orig_dev;
3148 	struct net_device *null_or_dev;
3149 	bool deliver_exact = false;
3150 	int ret = NET_RX_DROP;
3151 	__be16 type;
3152 
3153 	net_timestamp_check(!netdev_tstamp_prequeue, skb);
3154 
3155 	trace_netif_receive_skb(skb);
3156 
3157 	/* if we've gotten here through NAPI, check netpoll */
3158 	if (netpoll_receive_skb(skb))
3159 		return NET_RX_DROP;
3160 
3161 	if (!skb->skb_iif)
3162 		skb->skb_iif = skb->dev->ifindex;
3163 	orig_dev = skb->dev;
3164 
3165 	skb_reset_network_header(skb);
3166 	skb_reset_transport_header(skb);
3167 	skb_reset_mac_len(skb);
3168 
3169 	pt_prev = NULL;
3170 
3171 	rcu_read_lock();
3172 
3173 another_round:
3174 
3175 	__this_cpu_inc(softnet_data.processed);
3176 
3177 	if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3178 		skb = vlan_untag(skb);
3179 		if (unlikely(!skb))
3180 			goto out;
3181 	}
3182 
3183 #ifdef CONFIG_NET_CLS_ACT
3184 	if (skb->tc_verd & TC_NCLS) {
3185 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
3186 		goto ncls;
3187 	}
3188 #endif
3189 
3190 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
3191 		if (!ptype->dev || ptype->dev == skb->dev) {
3192 			if (pt_prev)
3193 				ret = deliver_skb(skb, pt_prev, orig_dev);
3194 			pt_prev = ptype;
3195 		}
3196 	}
3197 
3198 #ifdef CONFIG_NET_CLS_ACT
3199 	skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
3200 	if (!skb)
3201 		goto out;
3202 ncls:
3203 #endif
3204 
3205 	rx_handler = rcu_dereference(skb->dev->rx_handler);
3206 	if (vlan_tx_tag_present(skb)) {
3207 		if (pt_prev) {
3208 			ret = deliver_skb(skb, pt_prev, orig_dev);
3209 			pt_prev = NULL;
3210 		}
3211 		if (vlan_do_receive(&skb, !rx_handler))
3212 			goto another_round;
3213 		else if (unlikely(!skb))
3214 			goto out;
3215 	}
3216 
3217 	if (rx_handler) {
3218 		if (pt_prev) {
3219 			ret = deliver_skb(skb, pt_prev, orig_dev);
3220 			pt_prev = NULL;
3221 		}
3222 		switch (rx_handler(&skb)) {
3223 		case RX_HANDLER_CONSUMED:
3224 			goto out;
3225 		case RX_HANDLER_ANOTHER:
3226 			goto another_round;
3227 		case RX_HANDLER_EXACT:
3228 			deliver_exact = true;
3229 		case RX_HANDLER_PASS:
3230 			break;
3231 		default:
3232 			BUG();
3233 		}
3234 	}
3235 
3236 	/* deliver only exact match when indicated */
3237 	null_or_dev = deliver_exact ? skb->dev : NULL;
3238 
3239 	type = skb->protocol;
3240 	list_for_each_entry_rcu(ptype,
3241 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
3242 		if (ptype->type == type &&
3243 		    (ptype->dev == null_or_dev || ptype->dev == skb->dev ||
3244 		     ptype->dev == orig_dev)) {
3245 			if (pt_prev)
3246 				ret = deliver_skb(skb, pt_prev, orig_dev);
3247 			pt_prev = ptype;
3248 		}
3249 	}
3250 
3251 	if (pt_prev) {
3252 		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
3253 	} else {
3254 		atomic_long_inc(&skb->dev->rx_dropped);
3255 		kfree_skb(skb);
3256 		/* Jamal, now you will not able to escape explaining
3257 		 * me how you were going to use this. :-)
3258 		 */
3259 		ret = NET_RX_DROP;
3260 	}
3261 
3262 out:
3263 	rcu_read_unlock();
3264 	return ret;
3265 }
3266 
3267 /**
3268  *	netif_receive_skb - process receive buffer from network
3269  *	@skb: buffer to process
3270  *
3271  *	netif_receive_skb() is the main receive data processing function.
3272  *	It always succeeds. The buffer may be dropped during processing
3273  *	for congestion control or by the protocol layers.
3274  *
3275  *	This function may only be called from softirq context and interrupts
3276  *	should be enabled.
3277  *
3278  *	Return values (usually ignored):
3279  *	NET_RX_SUCCESS: no congestion
3280  *	NET_RX_DROP: packet was dropped
3281  */
3282 int netif_receive_skb(struct sk_buff *skb)
3283 {
3284 	net_timestamp_check(netdev_tstamp_prequeue, skb);
3285 
3286 	if (skb_defer_rx_timestamp(skb))
3287 		return NET_RX_SUCCESS;
3288 
3289 #ifdef CONFIG_RPS
3290 	if (static_key_false(&rps_needed)) {
3291 		struct rps_dev_flow voidflow, *rflow = &voidflow;
3292 		int cpu, ret;
3293 
3294 		rcu_read_lock();
3295 
3296 		cpu = get_rps_cpu(skb->dev, skb, &rflow);
3297 
3298 		if (cpu >= 0) {
3299 			ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
3300 			rcu_read_unlock();
3301 			return ret;
3302 		}
3303 		rcu_read_unlock();
3304 	}
3305 #endif
3306 	return __netif_receive_skb(skb);
3307 }
3308 EXPORT_SYMBOL(netif_receive_skb);
3309 
3310 /* Network device is going away, flush any packets still pending
3311  * Called with irqs disabled.
3312  */
3313 static void flush_backlog(void *arg)
3314 {
3315 	struct net_device *dev = arg;
3316 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
3317 	struct sk_buff *skb, *tmp;
3318 
3319 	rps_lock(sd);
3320 	skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
3321 		if (skb->dev == dev) {
3322 			__skb_unlink(skb, &sd->input_pkt_queue);
3323 			kfree_skb(skb);
3324 			input_queue_head_incr(sd);
3325 		}
3326 	}
3327 	rps_unlock(sd);
3328 
3329 	skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
3330 		if (skb->dev == dev) {
3331 			__skb_unlink(skb, &sd->process_queue);
3332 			kfree_skb(skb);
3333 			input_queue_head_incr(sd);
3334 		}
3335 	}
3336 }
3337 
3338 static int napi_gro_complete(struct sk_buff *skb)
3339 {
3340 	struct packet_type *ptype;
3341 	__be16 type = skb->protocol;
3342 	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
3343 	int err = -ENOENT;
3344 
3345 	if (NAPI_GRO_CB(skb)->count == 1) {
3346 		skb_shinfo(skb)->gso_size = 0;
3347 		goto out;
3348 	}
3349 
3350 	rcu_read_lock();
3351 	list_for_each_entry_rcu(ptype, head, list) {
3352 		if (ptype->type != type || ptype->dev || !ptype->gro_complete)
3353 			continue;
3354 
3355 		err = ptype->gro_complete(skb);
3356 		break;
3357 	}
3358 	rcu_read_unlock();
3359 
3360 	if (err) {
3361 		WARN_ON(&ptype->list == head);
3362 		kfree_skb(skb);
3363 		return NET_RX_SUCCESS;
3364 	}
3365 
3366 out:
3367 	return netif_receive_skb(skb);
3368 }
3369 
3370 inline void napi_gro_flush(struct napi_struct *napi)
3371 {
3372 	struct sk_buff *skb, *next;
3373 
3374 	for (skb = napi->gro_list; skb; skb = next) {
3375 		next = skb->next;
3376 		skb->next = NULL;
3377 		napi_gro_complete(skb);
3378 	}
3379 
3380 	napi->gro_count = 0;
3381 	napi->gro_list = NULL;
3382 }
3383 EXPORT_SYMBOL(napi_gro_flush);
3384 
3385 enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3386 {
3387 	struct sk_buff **pp = NULL;
3388 	struct packet_type *ptype;
3389 	__be16 type = skb->protocol;
3390 	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
3391 	int same_flow;
3392 	int mac_len;
3393 	enum gro_result ret;
3394 
3395 	if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
3396 		goto normal;
3397 
3398 	if (skb_is_gso(skb) || skb_has_frag_list(skb))
3399 		goto normal;
3400 
3401 	rcu_read_lock();
3402 	list_for_each_entry_rcu(ptype, head, list) {
3403 		if (ptype->type != type || ptype->dev || !ptype->gro_receive)
3404 			continue;
3405 
3406 		skb_set_network_header(skb, skb_gro_offset(skb));
3407 		mac_len = skb->network_header - skb->mac_header;
3408 		skb->mac_len = mac_len;
3409 		NAPI_GRO_CB(skb)->same_flow = 0;
3410 		NAPI_GRO_CB(skb)->flush = 0;
3411 		NAPI_GRO_CB(skb)->free = 0;
3412 
3413 		pp = ptype->gro_receive(&napi->gro_list, skb);
3414 		break;
3415 	}
3416 	rcu_read_unlock();
3417 
3418 	if (&ptype->list == head)
3419 		goto normal;
3420 
3421 	same_flow = NAPI_GRO_CB(skb)->same_flow;
3422 	ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
3423 
3424 	if (pp) {
3425 		struct sk_buff *nskb = *pp;
3426 
3427 		*pp = nskb->next;
3428 		nskb->next = NULL;
3429 		napi_gro_complete(nskb);
3430 		napi->gro_count--;
3431 	}
3432 
3433 	if (same_flow)
3434 		goto ok;
3435 
3436 	if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
3437 		goto normal;
3438 
3439 	napi->gro_count++;
3440 	NAPI_GRO_CB(skb)->count = 1;
3441 	skb_shinfo(skb)->gso_size = skb_gro_len(skb);
3442 	skb->next = napi->gro_list;
3443 	napi->gro_list = skb;
3444 	ret = GRO_HELD;
3445 
3446 pull:
3447 	if (skb_headlen(skb) < skb_gro_offset(skb)) {
3448 		int grow = skb_gro_offset(skb) - skb_headlen(skb);
3449 
3450 		BUG_ON(skb->end - skb->tail < grow);
3451 
3452 		memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
3453 
3454 		skb->tail += grow;
3455 		skb->data_len -= grow;
3456 
3457 		skb_shinfo(skb)->frags[0].page_offset += grow;
3458 		skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow);
3459 
3460 		if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) {
3461 			skb_frag_unref(skb, 0);
3462 			memmove(skb_shinfo(skb)->frags,
3463 				skb_shinfo(skb)->frags + 1,
3464 				--skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
3465 		}
3466 	}
3467 
3468 ok:
3469 	return ret;
3470 
3471 normal:
3472 	ret = GRO_NORMAL;
3473 	goto pull;
3474 }
3475 EXPORT_SYMBOL(dev_gro_receive);
3476 
3477 static inline gro_result_t
3478 __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3479 {
3480 	struct sk_buff *p;
3481 	unsigned int maclen = skb->dev->hard_header_len;
3482 
3483 	for (p = napi->gro_list; p; p = p->next) {
3484 		unsigned long diffs;
3485 
3486 		diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
3487 		diffs |= p->vlan_tci ^ skb->vlan_tci;
3488 		if (maclen == ETH_HLEN)
3489 			diffs |= compare_ether_header(skb_mac_header(p),
3490 						      skb_gro_mac_header(skb));
3491 		else if (!diffs)
3492 			diffs = memcmp(skb_mac_header(p),
3493 				       skb_gro_mac_header(skb),
3494 				       maclen);
3495 		NAPI_GRO_CB(p)->same_flow = !diffs;
3496 		NAPI_GRO_CB(p)->flush = 0;
3497 	}
3498 
3499 	return dev_gro_receive(napi, skb);
3500 }
3501 
3502 gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
3503 {
3504 	switch (ret) {
3505 	case GRO_NORMAL:
3506 		if (netif_receive_skb(skb))
3507 			ret = GRO_DROP;
3508 		break;
3509 
3510 	case GRO_DROP:
3511 		kfree_skb(skb);
3512 		break;
3513 
3514 	case GRO_MERGED_FREE:
3515 		if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
3516 			kmem_cache_free(skbuff_head_cache, skb);
3517 		else
3518 			__kfree_skb(skb);
3519 		break;
3520 
3521 	case GRO_HELD:
3522 	case GRO_MERGED:
3523 		break;
3524 	}
3525 
3526 	return ret;
3527 }
3528 EXPORT_SYMBOL(napi_skb_finish);
3529 
3530 void skb_gro_reset_offset(struct sk_buff *skb)
3531 {
3532 	NAPI_GRO_CB(skb)->data_offset = 0;
3533 	NAPI_GRO_CB(skb)->frag0 = NULL;
3534 	NAPI_GRO_CB(skb)->frag0_len = 0;
3535 
3536 	if (skb->mac_header == skb->tail &&
3537 	    !PageHighMem(skb_frag_page(&skb_shinfo(skb)->frags[0]))) {
3538 		NAPI_GRO_CB(skb)->frag0 =
3539 			skb_frag_address(&skb_shinfo(skb)->frags[0]);
3540 		NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(&skb_shinfo(skb)->frags[0]);
3541 	}
3542 }
3543 EXPORT_SYMBOL(skb_gro_reset_offset);
3544 
3545 gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3546 {
3547 	skb_gro_reset_offset(skb);
3548 
3549 	return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
3550 }
3551 EXPORT_SYMBOL(napi_gro_receive);
3552 
3553 static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
3554 {
3555 	__skb_pull(skb, skb_headlen(skb));
3556 	/* restore the reserve we had after netdev_alloc_skb_ip_align() */
3557 	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));
3558 	skb->vlan_tci = 0;
3559 	skb->dev = napi->dev;
3560 	skb->skb_iif = 0;
3561 
3562 	napi->skb = skb;
3563 }
3564 
3565 struct sk_buff *napi_get_frags(struct napi_struct *napi)
3566 {
3567 	struct sk_buff *skb = napi->skb;
3568 
3569 	if (!skb) {
3570 		skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
3571 		if (skb)
3572 			napi->skb = skb;
3573 	}
3574 	return skb;
3575 }
3576 EXPORT_SYMBOL(napi_get_frags);
3577 
3578 gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
3579 			       gro_result_t ret)
3580 {
3581 	switch (ret) {
3582 	case GRO_NORMAL:
3583 	case GRO_HELD:
3584 		skb->protocol = eth_type_trans(skb, skb->dev);
3585 
3586 		if (ret == GRO_HELD)
3587 			skb_gro_pull(skb, -ETH_HLEN);
3588 		else if (netif_receive_skb(skb))
3589 			ret = GRO_DROP;
3590 		break;
3591 
3592 	case GRO_DROP:
3593 	case GRO_MERGED_FREE:
3594 		napi_reuse_skb(napi, skb);
3595 		break;
3596 
3597 	case GRO_MERGED:
3598 		break;
3599 	}
3600 
3601 	return ret;
3602 }
3603 EXPORT_SYMBOL(napi_frags_finish);
3604 
3605 struct sk_buff *napi_frags_skb(struct napi_struct *napi)
3606 {
3607 	struct sk_buff *skb = napi->skb;
3608 	struct ethhdr *eth;
3609 	unsigned int hlen;
3610 	unsigned int off;
3611 
3612 	napi->skb = NULL;
3613 
3614 	skb_reset_mac_header(skb);
3615 	skb_gro_reset_offset(skb);
3616 
3617 	off = skb_gro_offset(skb);
3618 	hlen = off + sizeof(*eth);
3619 	eth = skb_gro_header_fast(skb, off);
3620 	if (skb_gro_header_hard(skb, hlen)) {
3621 		eth = skb_gro_header_slow(skb, hlen, off);
3622 		if (unlikely(!eth)) {
3623 			napi_reuse_skb(napi, skb);
3624 			skb = NULL;
3625 			goto out;
3626 		}
3627 	}
3628 
3629 	skb_gro_pull(skb, sizeof(*eth));
3630 
3631 	/*
3632 	 * This works because the only protocols we care about don't require
3633 	 * special handling.  We'll fix it up properly at the end.
3634 	 */
3635 	skb->protocol = eth->h_proto;
3636 
3637 out:
3638 	return skb;
3639 }
3640 EXPORT_SYMBOL(napi_frags_skb);
3641 
3642 gro_result_t napi_gro_frags(struct napi_struct *napi)
3643 {
3644 	struct sk_buff *skb = napi_frags_skb(napi);
3645 
3646 	if (!skb)
3647 		return GRO_DROP;
3648 
3649 	return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
3650 }
3651 EXPORT_SYMBOL(napi_gro_frags);
3652 
3653 /*
3654  * net_rps_action sends any pending IPI's for rps.
3655  * Note: called with local irq disabled, but exits with local irq enabled.
3656  */
3657 static void net_rps_action_and_irq_enable(struct softnet_data *sd)
3658 {
3659 #ifdef CONFIG_RPS
3660 	struct softnet_data *remsd = sd->rps_ipi_list;
3661 
3662 	if (remsd) {
3663 		sd->rps_ipi_list = NULL;
3664 
3665 		local_irq_enable();
3666 
3667 		/* Send pending IPI's to kick RPS processing on remote cpus. */
3668 		while (remsd) {
3669 			struct softnet_data *next = remsd->rps_ipi_next;
3670 
3671 			if (cpu_online(remsd->cpu))
3672 				__smp_call_function_single(remsd->cpu,
3673 							   &remsd->csd, 0);
3674 			remsd = next;
3675 		}
3676 	} else
3677 #endif
3678 		local_irq_enable();
3679 }
3680 
3681 static int process_backlog(struct napi_struct *napi, int quota)
3682 {
3683 	int work = 0;
3684 	struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
3685 
3686 #ifdef CONFIG_RPS
3687 	/* Check if we have pending ipi, its better to send them now,
3688 	 * not waiting net_rx_action() end.
3689 	 */
3690 	if (sd->rps_ipi_list) {
3691 		local_irq_disable();
3692 		net_rps_action_and_irq_enable(sd);
3693 	}
3694 #endif
3695 	napi->weight = weight_p;
3696 	local_irq_disable();
3697 	while (work < quota) {
3698 		struct sk_buff *skb;
3699 		unsigned int qlen;
3700 
3701 		while ((skb = __skb_dequeue(&sd->process_queue))) {
3702 			local_irq_enable();
3703 			__netif_receive_skb(skb);
3704 			local_irq_disable();
3705 			input_queue_head_incr(sd);
3706 			if (++work >= quota) {
3707 				local_irq_enable();
3708 				return work;
3709 			}
3710 		}
3711 
3712 		rps_lock(sd);
3713 		qlen = skb_queue_len(&sd->input_pkt_queue);
3714 		if (qlen)
3715 			skb_queue_splice_tail_init(&sd->input_pkt_queue,
3716 						   &sd->process_queue);
3717 
3718 		if (qlen < quota - work) {
3719 			/*
3720 			 * Inline a custom version of __napi_complete().
3721 			 * only current cpu owns and manipulates this napi,
3722 			 * and NAPI_STATE_SCHED is the only possible flag set on backlog.
3723 			 * we can use a plain write instead of clear_bit(),
3724 			 * and we dont need an smp_mb() memory barrier.
3725 			 */
3726 			list_del(&napi->poll_list);
3727 			napi->state = 0;
3728 
3729 			quota = work + qlen;
3730 		}
3731 		rps_unlock(sd);
3732 	}
3733 	local_irq_enable();
3734 
3735 	return work;
3736 }
3737 
3738 /**
3739  * __napi_schedule - schedule for receive
3740  * @n: entry to schedule
3741  *
3742  * The entry's receive function will be scheduled to run
3743  */
3744 void __napi_schedule(struct napi_struct *n)
3745 {
3746 	unsigned long flags;
3747 
3748 	local_irq_save(flags);
3749 	____napi_schedule(&__get_cpu_var(softnet_data), n);
3750 	local_irq_restore(flags);
3751 }
3752 EXPORT_SYMBOL(__napi_schedule);
3753 
3754 void __napi_complete(struct napi_struct *n)
3755 {
3756 	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
3757 	BUG_ON(n->gro_list);
3758 
3759 	list_del(&n->poll_list);
3760 	smp_mb__before_clear_bit();
3761 	clear_bit(NAPI_STATE_SCHED, &n->state);
3762 }
3763 EXPORT_SYMBOL(__napi_complete);
3764 
3765 void napi_complete(struct napi_struct *n)
3766 {
3767 	unsigned long flags;
3768 
3769 	/*
3770 	 * don't let napi dequeue from the cpu poll list
3771 	 * just in case its running on a different cpu
3772 	 */
3773 	if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
3774 		return;
3775 
3776 	napi_gro_flush(n);
3777 	local_irq_save(flags);
3778 	__napi_complete(n);
3779 	local_irq_restore(flags);
3780 }
3781 EXPORT_SYMBOL(napi_complete);
3782 
3783 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
3784 		    int (*poll)(struct napi_struct *, int), int weight)
3785 {
3786 	INIT_LIST_HEAD(&napi->poll_list);
3787 	napi->gro_count = 0;
3788 	napi->gro_list = NULL;
3789 	napi->skb = NULL;
3790 	napi->poll = poll;
3791 	napi->weight = weight;
3792 	list_add(&napi->dev_list, &dev->napi_list);
3793 	napi->dev = dev;
3794 #ifdef CONFIG_NETPOLL
3795 	spin_lock_init(&napi->poll_lock);
3796 	napi->poll_owner = -1;
3797 #endif
3798 	set_bit(NAPI_STATE_SCHED, &napi->state);
3799 }
3800 EXPORT_SYMBOL(netif_napi_add);
3801 
3802 void netif_napi_del(struct napi_struct *napi)
3803 {
3804 	struct sk_buff *skb, *next;
3805 
3806 	list_del_init(&napi->dev_list);
3807 	napi_free_frags(napi);
3808 
3809 	for (skb = napi->gro_list; skb; skb = next) {
3810 		next = skb->next;
3811 		skb->next = NULL;
3812 		kfree_skb(skb);
3813 	}
3814 
3815 	napi->gro_list = NULL;
3816 	napi->gro_count = 0;
3817 }
3818 EXPORT_SYMBOL(netif_napi_del);
3819 
3820 static void net_rx_action(struct softirq_action *h)
3821 {
3822 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
3823 	unsigned long time_limit = jiffies + 2;
3824 	int budget = netdev_budget;
3825 	void *have;
3826 
3827 	local_irq_disable();
3828 
3829 	while (!list_empty(&sd->poll_list)) {
3830 		struct napi_struct *n;
3831 		int work, weight;
3832 
3833 		/* If softirq window is exhuasted then punt.
3834 		 * Allow this to run for 2 jiffies since which will allow
3835 		 * an average latency of 1.5/HZ.
3836 		 */
3837 		if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
3838 			goto softnet_break;
3839 
3840 		local_irq_enable();
3841 
3842 		/* Even though interrupts have been re-enabled, this
3843 		 * access is safe because interrupts can only add new
3844 		 * entries to the tail of this list, and only ->poll()
3845 		 * calls can remove this head entry from the list.
3846 		 */
3847 		n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
3848 
3849 		have = netpoll_poll_lock(n);
3850 
3851 		weight = n->weight;
3852 
3853 		/* This NAPI_STATE_SCHED test is for avoiding a race
3854 		 * with netpoll's poll_napi().  Only the entity which
3855 		 * obtains the lock and sees NAPI_STATE_SCHED set will
3856 		 * actually make the ->poll() call.  Therefore we avoid
3857 		 * accidentally calling ->poll() when NAPI is not scheduled.
3858 		 */
3859 		work = 0;
3860 		if (test_bit(NAPI_STATE_SCHED, &n->state)) {
3861 			work = n->poll(n, weight);
3862 			trace_napi_poll(n);
3863 		}
3864 
3865 		WARN_ON_ONCE(work > weight);
3866 
3867 		budget -= work;
3868 
3869 		local_irq_disable();
3870 
3871 		/* Drivers must not modify the NAPI state if they
3872 		 * consume the entire weight.  In such cases this code
3873 		 * still "owns" the NAPI instance and therefore can
3874 		 * move the instance around on the list at-will.
3875 		 */
3876 		if (unlikely(work == weight)) {
3877 			if (unlikely(napi_disable_pending(n))) {
3878 				local_irq_enable();
3879 				napi_complete(n);
3880 				local_irq_disable();
3881 			} else
3882 				list_move_tail(&n->poll_list, &sd->poll_list);
3883 		}
3884 
3885 		netpoll_poll_unlock(have);
3886 	}
3887 out:
3888 	net_rps_action_and_irq_enable(sd);
3889 
3890 #ifdef CONFIG_NET_DMA
3891 	/*
3892 	 * There may not be any more sk_buffs coming right now, so push
3893 	 * any pending DMA copies to hardware
3894 	 */
3895 	dma_issue_pending_all();
3896 #endif
3897 
3898 	return;
3899 
3900 softnet_break:
3901 	sd->time_squeeze++;
3902 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
3903 	goto out;
3904 }
3905 
3906 static gifconf_func_t *gifconf_list[NPROTO];
3907 
3908 /**
3909  *	register_gifconf	-	register a SIOCGIF handler
3910  *	@family: Address family
3911  *	@gifconf: Function handler
3912  *
3913  *	Register protocol dependent address dumping routines. The handler
3914  *	that is passed must not be freed or reused until it has been replaced
3915  *	by another handler.
3916  */
3917 int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
3918 {
3919 	if (family >= NPROTO)
3920 		return -EINVAL;
3921 	gifconf_list[family] = gifconf;
3922 	return 0;
3923 }
3924 EXPORT_SYMBOL(register_gifconf);
3925 
3926 
3927 /*
3928  *	Map an interface index to its name (SIOCGIFNAME)
3929  */
3930 
3931 /*
3932  *	We need this ioctl for efficient implementation of the
3933  *	if_indextoname() function required by the IPv6 API.  Without
3934  *	it, we would have to search all the interfaces to find a
3935  *	match.  --pb
3936  */
3937 
3938 static int dev_ifname(struct net *net, struct ifreq __user *arg)
3939 {
3940 	struct net_device *dev;
3941 	struct ifreq ifr;
3942 
3943 	/*
3944 	 *	Fetch the caller's info block.
3945 	 */
3946 
3947 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3948 		return -EFAULT;
3949 
3950 	rcu_read_lock();
3951 	dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
3952 	if (!dev) {
3953 		rcu_read_unlock();
3954 		return -ENODEV;
3955 	}
3956 
3957 	strcpy(ifr.ifr_name, dev->name);
3958 	rcu_read_unlock();
3959 
3960 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
3961 		return -EFAULT;
3962 	return 0;
3963 }
3964 
3965 /*
3966  *	Perform a SIOCGIFCONF call. This structure will change
3967  *	size eventually, and there is nothing I can do about it.
3968  *	Thus we will need a 'compatibility mode'.
3969  */
3970 
3971 static int dev_ifconf(struct net *net, char __user *arg)
3972 {
3973 	struct ifconf ifc;
3974 	struct net_device *dev;
3975 	char __user *pos;
3976 	int len;
3977 	int total;
3978 	int i;
3979 
3980 	/*
3981 	 *	Fetch the caller's info block.
3982 	 */
3983 
3984 	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
3985 		return -EFAULT;
3986 
3987 	pos = ifc.ifc_buf;
3988 	len = ifc.ifc_len;
3989 
3990 	/*
3991 	 *	Loop over the interfaces, and write an info block for each.
3992 	 */
3993 
3994 	total = 0;
3995 	for_each_netdev(net, dev) {
3996 		for (i = 0; i < NPROTO; i++) {
3997 			if (gifconf_list[i]) {
3998 				int done;
3999 				if (!pos)
4000 					done = gifconf_list[i](dev, NULL, 0);
4001 				else
4002 					done = gifconf_list[i](dev, pos + total,
4003 							       len - total);
4004 				if (done < 0)
4005 					return -EFAULT;
4006 				total += done;
4007 			}
4008 		}
4009 	}
4010 
4011 	/*
4012 	 *	All done.  Write the updated control block back to the caller.
4013 	 */
4014 	ifc.ifc_len = total;
4015 
4016 	/*
4017 	 * 	Both BSD and Solaris return 0 here, so we do too.
4018 	 */
4019 	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
4020 }
4021 
4022 #ifdef CONFIG_PROC_FS
4023 
4024 #define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
4025 
4026 #define get_bucket(x) ((x) >> BUCKET_SPACE)
4027 #define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
4028 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
4029 
4030 static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos)
4031 {
4032 	struct net *net = seq_file_net(seq);
4033 	struct net_device *dev;
4034 	struct hlist_node *p;
4035 	struct hlist_head *h;
4036 	unsigned int count = 0, offset = get_offset(*pos);
4037 
4038 	h = &net->dev_name_head[get_bucket(*pos)];
4039 	hlist_for_each_entry_rcu(dev, p, h, name_hlist) {
4040 		if (++count == offset)
4041 			return dev;
4042 	}
4043 
4044 	return NULL;
4045 }
4046 
4047 static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos)
4048 {
4049 	struct net_device *dev;
4050 	unsigned int bucket;
4051 
4052 	do {
4053 		dev = dev_from_same_bucket(seq, pos);
4054 		if (dev)
4055 			return dev;
4056 
4057 		bucket = get_bucket(*pos) + 1;
4058 		*pos = set_bucket_offset(bucket, 1);
4059 	} while (bucket < NETDEV_HASHENTRIES);
4060 
4061 	return NULL;
4062 }
4063 
4064 /*
4065  *	This is invoked by the /proc filesystem handler to display a device
4066  *	in detail.
4067  */
4068 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
4069 	__acquires(RCU)
4070 {
4071 	rcu_read_lock();
4072 	if (!*pos)
4073 		return SEQ_START_TOKEN;
4074 
4075 	if (get_bucket(*pos) >= NETDEV_HASHENTRIES)
4076 		return NULL;
4077 
4078 	return dev_from_bucket(seq, pos);
4079 }
4080 
4081 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4082 {
4083 	++*pos;
4084 	return dev_from_bucket(seq, pos);
4085 }
4086 
4087 void dev_seq_stop(struct seq_file *seq, void *v)
4088 	__releases(RCU)
4089 {
4090 	rcu_read_unlock();
4091 }
4092 
4093 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
4094 {
4095 	struct rtnl_link_stats64 temp;
4096 	const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
4097 
4098 	seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
4099 		   "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
4100 		   dev->name, stats->rx_bytes, stats->rx_packets,
4101 		   stats->rx_errors,
4102 		   stats->rx_dropped + stats->rx_missed_errors,
4103 		   stats->rx_fifo_errors,
4104 		   stats->rx_length_errors + stats->rx_over_errors +
4105 		    stats->rx_crc_errors + stats->rx_frame_errors,
4106 		   stats->rx_compressed, stats->multicast,
4107 		   stats->tx_bytes, stats->tx_packets,
4108 		   stats->tx_errors, stats->tx_dropped,
4109 		   stats->tx_fifo_errors, stats->collisions,
4110 		   stats->tx_carrier_errors +
4111 		    stats->tx_aborted_errors +
4112 		    stats->tx_window_errors +
4113 		    stats->tx_heartbeat_errors,
4114 		   stats->tx_compressed);
4115 }
4116 
4117 /*
4118  *	Called from the PROCfs module. This now uses the new arbitrary sized
4119  *	/proc/net interface to create /proc/net/dev
4120  */
4121 static int dev_seq_show(struct seq_file *seq, void *v)
4122 {
4123 	if (v == SEQ_START_TOKEN)
4124 		seq_puts(seq, "Inter-|   Receive                            "
4125 			      "                    |  Transmit\n"
4126 			      " face |bytes    packets errs drop fifo frame "
4127 			      "compressed multicast|bytes    packets errs "
4128 			      "drop fifo colls carrier compressed\n");
4129 	else
4130 		dev_seq_printf_stats(seq, v);
4131 	return 0;
4132 }
4133 
4134 static struct softnet_data *softnet_get_online(loff_t *pos)
4135 {
4136 	struct softnet_data *sd = NULL;
4137 
4138 	while (*pos < nr_cpu_ids)
4139 		if (cpu_online(*pos)) {
4140 			sd = &per_cpu(softnet_data, *pos);
4141 			break;
4142 		} else
4143 			++*pos;
4144 	return sd;
4145 }
4146 
4147 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
4148 {
4149 	return softnet_get_online(pos);
4150 }
4151 
4152 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4153 {
4154 	++*pos;
4155 	return softnet_get_online(pos);
4156 }
4157 
4158 static void softnet_seq_stop(struct seq_file *seq, void *v)
4159 {
4160 }
4161 
4162 static int softnet_seq_show(struct seq_file *seq, void *v)
4163 {
4164 	struct softnet_data *sd = v;
4165 
4166 	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
4167 		   sd->processed, sd->dropped, sd->time_squeeze, 0,
4168 		   0, 0, 0, 0, /* was fastroute */
4169 		   sd->cpu_collision, sd->received_rps);
4170 	return 0;
4171 }
4172 
4173 static const struct seq_operations dev_seq_ops = {
4174 	.start = dev_seq_start,
4175 	.next  = dev_seq_next,
4176 	.stop  = dev_seq_stop,
4177 	.show  = dev_seq_show,
4178 };
4179 
4180 static int dev_seq_open(struct inode *inode, struct file *file)
4181 {
4182 	return seq_open_net(inode, file, &dev_seq_ops,
4183 			    sizeof(struct seq_net_private));
4184 }
4185 
4186 static const struct file_operations dev_seq_fops = {
4187 	.owner	 = THIS_MODULE,
4188 	.open    = dev_seq_open,
4189 	.read    = seq_read,
4190 	.llseek  = seq_lseek,
4191 	.release = seq_release_net,
4192 };
4193 
4194 static const struct seq_operations softnet_seq_ops = {
4195 	.start = softnet_seq_start,
4196 	.next  = softnet_seq_next,
4197 	.stop  = softnet_seq_stop,
4198 	.show  = softnet_seq_show,
4199 };
4200 
4201 static int softnet_seq_open(struct inode *inode, struct file *file)
4202 {
4203 	return seq_open(file, &softnet_seq_ops);
4204 }
4205 
4206 static const struct file_operations softnet_seq_fops = {
4207 	.owner	 = THIS_MODULE,
4208 	.open    = softnet_seq_open,
4209 	.read    = seq_read,
4210 	.llseek  = seq_lseek,
4211 	.release = seq_release,
4212 };
4213 
4214 static void *ptype_get_idx(loff_t pos)
4215 {
4216 	struct packet_type *pt = NULL;
4217 	loff_t i = 0;
4218 	int t;
4219 
4220 	list_for_each_entry_rcu(pt, &ptype_all, list) {
4221 		if (i == pos)
4222 			return pt;
4223 		++i;
4224 	}
4225 
4226 	for (t = 0; t < PTYPE_HASH_SIZE; t++) {
4227 		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
4228 			if (i == pos)
4229 				return pt;
4230 			++i;
4231 		}
4232 	}
4233 	return NULL;
4234 }
4235 
4236 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
4237 	__acquires(RCU)
4238 {
4239 	rcu_read_lock();
4240 	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
4241 }
4242 
4243 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4244 {
4245 	struct packet_type *pt;
4246 	struct list_head *nxt;
4247 	int hash;
4248 
4249 	++*pos;
4250 	if (v == SEQ_START_TOKEN)
4251 		return ptype_get_idx(0);
4252 
4253 	pt = v;
4254 	nxt = pt->list.next;
4255 	if (pt->type == htons(ETH_P_ALL)) {
4256 		if (nxt != &ptype_all)
4257 			goto found;
4258 		hash = 0;
4259 		nxt = ptype_base[0].next;
4260 	} else
4261 		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
4262 
4263 	while (nxt == &ptype_base[hash]) {
4264 		if (++hash >= PTYPE_HASH_SIZE)
4265 			return NULL;
4266 		nxt = ptype_base[hash].next;
4267 	}
4268 found:
4269 	return list_entry(nxt, struct packet_type, list);
4270 }
4271 
4272 static void ptype_seq_stop(struct seq_file *seq, void *v)
4273 	__releases(RCU)
4274 {
4275 	rcu_read_unlock();
4276 }
4277 
4278 static int ptype_seq_show(struct seq_file *seq, void *v)
4279 {
4280 	struct packet_type *pt = v;
4281 
4282 	if (v == SEQ_START_TOKEN)
4283 		seq_puts(seq, "Type Device      Function\n");
4284 	else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
4285 		if (pt->type == htons(ETH_P_ALL))
4286 			seq_puts(seq, "ALL ");
4287 		else
4288 			seq_printf(seq, "%04x", ntohs(pt->type));
4289 
4290 		seq_printf(seq, " %-8s %pF\n",
4291 			   pt->dev ? pt->dev->name : "", pt->func);
4292 	}
4293 
4294 	return 0;
4295 }
4296 
4297 static const struct seq_operations ptype_seq_ops = {
4298 	.start = ptype_seq_start,
4299 	.next  = ptype_seq_next,
4300 	.stop  = ptype_seq_stop,
4301 	.show  = ptype_seq_show,
4302 };
4303 
4304 static int ptype_seq_open(struct inode *inode, struct file *file)
4305 {
4306 	return seq_open_net(inode, file, &ptype_seq_ops,
4307 			sizeof(struct seq_net_private));
4308 }
4309 
4310 static const struct file_operations ptype_seq_fops = {
4311 	.owner	 = THIS_MODULE,
4312 	.open    = ptype_seq_open,
4313 	.read    = seq_read,
4314 	.llseek  = seq_lseek,
4315 	.release = seq_release_net,
4316 };
4317 
4318 
4319 static int __net_init dev_proc_net_init(struct net *net)
4320 {
4321 	int rc = -ENOMEM;
4322 
4323 	if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
4324 		goto out;
4325 	if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
4326 		goto out_dev;
4327 	if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
4328 		goto out_softnet;
4329 
4330 	if (wext_proc_init(net))
4331 		goto out_ptype;
4332 	rc = 0;
4333 out:
4334 	return rc;
4335 out_ptype:
4336 	proc_net_remove(net, "ptype");
4337 out_softnet:
4338 	proc_net_remove(net, "softnet_stat");
4339 out_dev:
4340 	proc_net_remove(net, "dev");
4341 	goto out;
4342 }
4343 
4344 static void __net_exit dev_proc_net_exit(struct net *net)
4345 {
4346 	wext_proc_exit(net);
4347 
4348 	proc_net_remove(net, "ptype");
4349 	proc_net_remove(net, "softnet_stat");
4350 	proc_net_remove(net, "dev");
4351 }
4352 
4353 static struct pernet_operations __net_initdata dev_proc_ops = {
4354 	.init = dev_proc_net_init,
4355 	.exit = dev_proc_net_exit,
4356 };
4357 
4358 static int __init dev_proc_init(void)
4359 {
4360 	return register_pernet_subsys(&dev_proc_ops);
4361 }
4362 #else
4363 #define dev_proc_init() 0
4364 #endif	/* CONFIG_PROC_FS */
4365 
4366 
4367 /**
4368  *	netdev_set_master	-	set up master pointer
4369  *	@slave: slave device
4370  *	@master: new master device
4371  *
4372  *	Changes the master device of the slave. Pass %NULL to break the
4373  *	bonding. The caller must hold the RTNL semaphore. On a failure
4374  *	a negative errno code is returned. On success the reference counts
4375  *	are adjusted and the function returns zero.
4376  */
4377 int netdev_set_master(struct net_device *slave, struct net_device *master)
4378 {
4379 	struct net_device *old = slave->master;
4380 
4381 	ASSERT_RTNL();
4382 
4383 	if (master) {
4384 		if (old)
4385 			return -EBUSY;
4386 		dev_hold(master);
4387 	}
4388 
4389 	slave->master = master;
4390 
4391 	if (old)
4392 		dev_put(old);
4393 	return 0;
4394 }
4395 EXPORT_SYMBOL(netdev_set_master);
4396 
4397 /**
4398  *	netdev_set_bond_master	-	set up bonding master/slave pair
4399  *	@slave: slave device
4400  *	@master: new master device
4401  *
4402  *	Changes the master device of the slave. Pass %NULL to break the
4403  *	bonding. The caller must hold the RTNL semaphore. On a failure
4404  *	a negative errno code is returned. On success %RTM_NEWLINK is sent
4405  *	to the routing socket and the function returns zero.
4406  */
4407 int netdev_set_bond_master(struct net_device *slave, struct net_device *master)
4408 {
4409 	int err;
4410 
4411 	ASSERT_RTNL();
4412 
4413 	err = netdev_set_master(slave, master);
4414 	if (err)
4415 		return err;
4416 	if (master)
4417 		slave->flags |= IFF_SLAVE;
4418 	else
4419 		slave->flags &= ~IFF_SLAVE;
4420 
4421 	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
4422 	return 0;
4423 }
4424 EXPORT_SYMBOL(netdev_set_bond_master);
4425 
4426 static void dev_change_rx_flags(struct net_device *dev, int flags)
4427 {
4428 	const struct net_device_ops *ops = dev->netdev_ops;
4429 
4430 	if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
4431 		ops->ndo_change_rx_flags(dev, flags);
4432 }
4433 
4434 static int __dev_set_promiscuity(struct net_device *dev, int inc)
4435 {
4436 	unsigned int old_flags = dev->flags;
4437 	uid_t uid;
4438 	gid_t gid;
4439 
4440 	ASSERT_RTNL();
4441 
4442 	dev->flags |= IFF_PROMISC;
4443 	dev->promiscuity += inc;
4444 	if (dev->promiscuity == 0) {
4445 		/*
4446 		 * Avoid overflow.
4447 		 * If inc causes overflow, untouch promisc and return error.
4448 		 */
4449 		if (inc < 0)
4450 			dev->flags &= ~IFF_PROMISC;
4451 		else {
4452 			dev->promiscuity -= inc;
4453 			pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n",
4454 				dev->name);
4455 			return -EOVERFLOW;
4456 		}
4457 	}
4458 	if (dev->flags != old_flags) {
4459 		pr_info("device %s %s promiscuous mode\n",
4460 			dev->name,
4461 			dev->flags & IFF_PROMISC ? "entered" : "left");
4462 		if (audit_enabled) {
4463 			current_uid_gid(&uid, &gid);
4464 			audit_log(current->audit_context, GFP_ATOMIC,
4465 				AUDIT_ANOM_PROMISCUOUS,
4466 				"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
4467 				dev->name, (dev->flags & IFF_PROMISC),
4468 				(old_flags & IFF_PROMISC),
4469 				audit_get_loginuid(current),
4470 				uid, gid,
4471 				audit_get_sessionid(current));
4472 		}
4473 
4474 		dev_change_rx_flags(dev, IFF_PROMISC);
4475 	}
4476 	return 0;
4477 }
4478 
4479 /**
4480  *	dev_set_promiscuity	- update promiscuity count on a device
4481  *	@dev: device
4482  *	@inc: modifier
4483  *
4484  *	Add or remove promiscuity from a device. While the count in the device
4485  *	remains above zero the interface remains promiscuous. Once it hits zero
4486  *	the device reverts back to normal filtering operation. A negative inc
4487  *	value is used to drop promiscuity on the device.
4488  *	Return 0 if successful or a negative errno code on error.
4489  */
4490 int dev_set_promiscuity(struct net_device *dev, int inc)
4491 {
4492 	unsigned int old_flags = dev->flags;
4493 	int err;
4494 
4495 	err = __dev_set_promiscuity(dev, inc);
4496 	if (err < 0)
4497 		return err;
4498 	if (dev->flags != old_flags)
4499 		dev_set_rx_mode(dev);
4500 	return err;
4501 }
4502 EXPORT_SYMBOL(dev_set_promiscuity);
4503 
4504 /**
4505  *	dev_set_allmulti	- update allmulti count on a device
4506  *	@dev: device
4507  *	@inc: modifier
4508  *
4509  *	Add or remove reception of all multicast frames to a device. While the
4510  *	count in the device remains above zero the interface remains listening
4511  *	to all interfaces. Once it hits zero the device reverts back to normal
4512  *	filtering operation. A negative @inc value is used to drop the counter
4513  *	when releasing a resource needing all multicasts.
4514  *	Return 0 if successful or a negative errno code on error.
4515  */
4516 
4517 int dev_set_allmulti(struct net_device *dev, int inc)
4518 {
4519 	unsigned int old_flags = dev->flags;
4520 
4521 	ASSERT_RTNL();
4522 
4523 	dev->flags |= IFF_ALLMULTI;
4524 	dev->allmulti += inc;
4525 	if (dev->allmulti == 0) {
4526 		/*
4527 		 * Avoid overflow.
4528 		 * If inc causes overflow, untouch allmulti and return error.
4529 		 */
4530 		if (inc < 0)
4531 			dev->flags &= ~IFF_ALLMULTI;
4532 		else {
4533 			dev->allmulti -= inc;
4534 			pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n",
4535 				dev->name);
4536 			return -EOVERFLOW;
4537 		}
4538 	}
4539 	if (dev->flags ^ old_flags) {
4540 		dev_change_rx_flags(dev, IFF_ALLMULTI);
4541 		dev_set_rx_mode(dev);
4542 	}
4543 	return 0;
4544 }
4545 EXPORT_SYMBOL(dev_set_allmulti);
4546 
4547 /*
4548  *	Upload unicast and multicast address lists to device and
4549  *	configure RX filtering. When the device doesn't support unicast
4550  *	filtering it is put in promiscuous mode while unicast addresses
4551  *	are present.
4552  */
4553 void __dev_set_rx_mode(struct net_device *dev)
4554 {
4555 	const struct net_device_ops *ops = dev->netdev_ops;
4556 
4557 	/* dev_open will call this function so the list will stay sane. */
4558 	if (!(dev->flags&IFF_UP))
4559 		return;
4560 
4561 	if (!netif_device_present(dev))
4562 		return;
4563 
4564 	if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
4565 		/* Unicast addresses changes may only happen under the rtnl,
4566 		 * therefore calling __dev_set_promiscuity here is safe.
4567 		 */
4568 		if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
4569 			__dev_set_promiscuity(dev, 1);
4570 			dev->uc_promisc = true;
4571 		} else if (netdev_uc_empty(dev) && dev->uc_promisc) {
4572 			__dev_set_promiscuity(dev, -1);
4573 			dev->uc_promisc = false;
4574 		}
4575 	}
4576 
4577 	if (ops->ndo_set_rx_mode)
4578 		ops->ndo_set_rx_mode(dev);
4579 }
4580 
4581 void dev_set_rx_mode(struct net_device *dev)
4582 {
4583 	netif_addr_lock_bh(dev);
4584 	__dev_set_rx_mode(dev);
4585 	netif_addr_unlock_bh(dev);
4586 }
4587 
4588 /**
4589  *	dev_get_flags - get flags reported to userspace
4590  *	@dev: device
4591  *
4592  *	Get the combination of flag bits exported through APIs to userspace.
4593  */
4594 unsigned int dev_get_flags(const struct net_device *dev)
4595 {
4596 	unsigned int flags;
4597 
4598 	flags = (dev->flags & ~(IFF_PROMISC |
4599 				IFF_ALLMULTI |
4600 				IFF_RUNNING |
4601 				IFF_LOWER_UP |
4602 				IFF_DORMANT)) |
4603 		(dev->gflags & (IFF_PROMISC |
4604 				IFF_ALLMULTI));
4605 
4606 	if (netif_running(dev)) {
4607 		if (netif_oper_up(dev))
4608 			flags |= IFF_RUNNING;
4609 		if (netif_carrier_ok(dev))
4610 			flags |= IFF_LOWER_UP;
4611 		if (netif_dormant(dev))
4612 			flags |= IFF_DORMANT;
4613 	}
4614 
4615 	return flags;
4616 }
4617 EXPORT_SYMBOL(dev_get_flags);
4618 
4619 int __dev_change_flags(struct net_device *dev, unsigned int flags)
4620 {
4621 	unsigned int old_flags = dev->flags;
4622 	int ret;
4623 
4624 	ASSERT_RTNL();
4625 
4626 	/*
4627 	 *	Set the flags on our device.
4628 	 */
4629 
4630 	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
4631 			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
4632 			       IFF_AUTOMEDIA)) |
4633 		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
4634 				    IFF_ALLMULTI));
4635 
4636 	/*
4637 	 *	Load in the correct multicast list now the flags have changed.
4638 	 */
4639 
4640 	if ((old_flags ^ flags) & IFF_MULTICAST)
4641 		dev_change_rx_flags(dev, IFF_MULTICAST);
4642 
4643 	dev_set_rx_mode(dev);
4644 
4645 	/*
4646 	 *	Have we downed the interface. We handle IFF_UP ourselves
4647 	 *	according to user attempts to set it, rather than blindly
4648 	 *	setting it.
4649 	 */
4650 
4651 	ret = 0;
4652 	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
4653 		ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
4654 
4655 		if (!ret)
4656 			dev_set_rx_mode(dev);
4657 	}
4658 
4659 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
4660 		int inc = (flags & IFF_PROMISC) ? 1 : -1;
4661 
4662 		dev->gflags ^= IFF_PROMISC;
4663 		dev_set_promiscuity(dev, inc);
4664 	}
4665 
4666 	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
4667 	   is important. Some (broken) drivers set IFF_PROMISC, when
4668 	   IFF_ALLMULTI is requested not asking us and not reporting.
4669 	 */
4670 	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
4671 		int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
4672 
4673 		dev->gflags ^= IFF_ALLMULTI;
4674 		dev_set_allmulti(dev, inc);
4675 	}
4676 
4677 	return ret;
4678 }
4679 
4680 void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
4681 {
4682 	unsigned int changes = dev->flags ^ old_flags;
4683 
4684 	if (changes & IFF_UP) {
4685 		if (dev->flags & IFF_UP)
4686 			call_netdevice_notifiers(NETDEV_UP, dev);
4687 		else
4688 			call_netdevice_notifiers(NETDEV_DOWN, dev);
4689 	}
4690 
4691 	if (dev->flags & IFF_UP &&
4692 	    (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE)))
4693 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
4694 }
4695 
4696 /**
4697  *	dev_change_flags - change device settings
4698  *	@dev: device
4699  *	@flags: device state flags
4700  *
4701  *	Change settings on device based state flags. The flags are
4702  *	in the userspace exported format.
4703  */
4704 int dev_change_flags(struct net_device *dev, unsigned int flags)
4705 {
4706 	int ret;
4707 	unsigned int changes, old_flags = dev->flags;
4708 
4709 	ret = __dev_change_flags(dev, flags);
4710 	if (ret < 0)
4711 		return ret;
4712 
4713 	changes = old_flags ^ dev->flags;
4714 	if (changes)
4715 		rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
4716 
4717 	__dev_notify_flags(dev, old_flags);
4718 	return ret;
4719 }
4720 EXPORT_SYMBOL(dev_change_flags);
4721 
4722 /**
4723  *	dev_set_mtu - Change maximum transfer unit
4724  *	@dev: device
4725  *	@new_mtu: new transfer unit
4726  *
4727  *	Change the maximum transfer size of the network device.
4728  */
4729 int dev_set_mtu(struct net_device *dev, int new_mtu)
4730 {
4731 	const struct net_device_ops *ops = dev->netdev_ops;
4732 	int err;
4733 
4734 	if (new_mtu == dev->mtu)
4735 		return 0;
4736 
4737 	/*	MTU must be positive.	 */
4738 	if (new_mtu < 0)
4739 		return -EINVAL;
4740 
4741 	if (!netif_device_present(dev))
4742 		return -ENODEV;
4743 
4744 	err = 0;
4745 	if (ops->ndo_change_mtu)
4746 		err = ops->ndo_change_mtu(dev, new_mtu);
4747 	else
4748 		dev->mtu = new_mtu;
4749 
4750 	if (!err && dev->flags & IFF_UP)
4751 		call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
4752 	return err;
4753 }
4754 EXPORT_SYMBOL(dev_set_mtu);
4755 
4756 /**
4757  *	dev_set_group - Change group this device belongs to
4758  *	@dev: device
4759  *	@new_group: group this device should belong to
4760  */
4761 void dev_set_group(struct net_device *dev, int new_group)
4762 {
4763 	dev->group = new_group;
4764 }
4765 EXPORT_SYMBOL(dev_set_group);
4766 
4767 /**
4768  *	dev_set_mac_address - Change Media Access Control Address
4769  *	@dev: device
4770  *	@sa: new address
4771  *
4772  *	Change the hardware (MAC) address of the device
4773  */
4774 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
4775 {
4776 	const struct net_device_ops *ops = dev->netdev_ops;
4777 	int err;
4778 
4779 	if (!ops->ndo_set_mac_address)
4780 		return -EOPNOTSUPP;
4781 	if (sa->sa_family != dev->type)
4782 		return -EINVAL;
4783 	if (!netif_device_present(dev))
4784 		return -ENODEV;
4785 	err = ops->ndo_set_mac_address(dev, sa);
4786 	if (!err)
4787 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4788 	return err;
4789 }
4790 EXPORT_SYMBOL(dev_set_mac_address);
4791 
4792 /*
4793  *	Perform the SIOCxIFxxx calls, inside rcu_read_lock()
4794  */
4795 static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
4796 {
4797 	int err;
4798 	struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
4799 
4800 	if (!dev)
4801 		return -ENODEV;
4802 
4803 	switch (cmd) {
4804 	case SIOCGIFFLAGS:	/* Get interface flags */
4805 		ifr->ifr_flags = (short) dev_get_flags(dev);
4806 		return 0;
4807 
4808 	case SIOCGIFMETRIC:	/* Get the metric on the interface
4809 				   (currently unused) */
4810 		ifr->ifr_metric = 0;
4811 		return 0;
4812 
4813 	case SIOCGIFMTU:	/* Get the MTU of a device */
4814 		ifr->ifr_mtu = dev->mtu;
4815 		return 0;
4816 
4817 	case SIOCGIFHWADDR:
4818 		if (!dev->addr_len)
4819 			memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
4820 		else
4821 			memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
4822 			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4823 		ifr->ifr_hwaddr.sa_family = dev->type;
4824 		return 0;
4825 
4826 	case SIOCGIFSLAVE:
4827 		err = -EINVAL;
4828 		break;
4829 
4830 	case SIOCGIFMAP:
4831 		ifr->ifr_map.mem_start = dev->mem_start;
4832 		ifr->ifr_map.mem_end   = dev->mem_end;
4833 		ifr->ifr_map.base_addr = dev->base_addr;
4834 		ifr->ifr_map.irq       = dev->irq;
4835 		ifr->ifr_map.dma       = dev->dma;
4836 		ifr->ifr_map.port      = dev->if_port;
4837 		return 0;
4838 
4839 	case SIOCGIFINDEX:
4840 		ifr->ifr_ifindex = dev->ifindex;
4841 		return 0;
4842 
4843 	case SIOCGIFTXQLEN:
4844 		ifr->ifr_qlen = dev->tx_queue_len;
4845 		return 0;
4846 
4847 	default:
4848 		/* dev_ioctl() should ensure this case
4849 		 * is never reached
4850 		 */
4851 		WARN_ON(1);
4852 		err = -ENOTTY;
4853 		break;
4854 
4855 	}
4856 	return err;
4857 }
4858 
4859 /*
4860  *	Perform the SIOCxIFxxx calls, inside rtnl_lock()
4861  */
4862 static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4863 {
4864 	int err;
4865 	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
4866 	const struct net_device_ops *ops;
4867 
4868 	if (!dev)
4869 		return -ENODEV;
4870 
4871 	ops = dev->netdev_ops;
4872 
4873 	switch (cmd) {
4874 	case SIOCSIFFLAGS:	/* Set interface flags */
4875 		return dev_change_flags(dev, ifr->ifr_flags);
4876 
4877 	case SIOCSIFMETRIC:	/* Set the metric on the interface
4878 				   (currently unused) */
4879 		return -EOPNOTSUPP;
4880 
4881 	case SIOCSIFMTU:	/* Set the MTU of a device */
4882 		return dev_set_mtu(dev, ifr->ifr_mtu);
4883 
4884 	case SIOCSIFHWADDR:
4885 		return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
4886 
4887 	case SIOCSIFHWBROADCAST:
4888 		if (ifr->ifr_hwaddr.sa_family != dev->type)
4889 			return -EINVAL;
4890 		memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
4891 		       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4892 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4893 		return 0;
4894 
4895 	case SIOCSIFMAP:
4896 		if (ops->ndo_set_config) {
4897 			if (!netif_device_present(dev))
4898 				return -ENODEV;
4899 			return ops->ndo_set_config(dev, &ifr->ifr_map);
4900 		}
4901 		return -EOPNOTSUPP;
4902 
4903 	case SIOCADDMULTI:
4904 		if (!ops->ndo_set_rx_mode ||
4905 		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4906 			return -EINVAL;
4907 		if (!netif_device_present(dev))
4908 			return -ENODEV;
4909 		return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
4910 
4911 	case SIOCDELMULTI:
4912 		if (!ops->ndo_set_rx_mode ||
4913 		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4914 			return -EINVAL;
4915 		if (!netif_device_present(dev))
4916 			return -ENODEV;
4917 		return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
4918 
4919 	case SIOCSIFTXQLEN:
4920 		if (ifr->ifr_qlen < 0)
4921 			return -EINVAL;
4922 		dev->tx_queue_len = ifr->ifr_qlen;
4923 		return 0;
4924 
4925 	case SIOCSIFNAME:
4926 		ifr->ifr_newname[IFNAMSIZ-1] = '\0';
4927 		return dev_change_name(dev, ifr->ifr_newname);
4928 
4929 	case SIOCSHWTSTAMP:
4930 		err = net_hwtstamp_validate(ifr);
4931 		if (err)
4932 			return err;
4933 		/* fall through */
4934 
4935 	/*
4936 	 *	Unknown or private ioctl
4937 	 */
4938 	default:
4939 		if ((cmd >= SIOCDEVPRIVATE &&
4940 		    cmd <= SIOCDEVPRIVATE + 15) ||
4941 		    cmd == SIOCBONDENSLAVE ||
4942 		    cmd == SIOCBONDRELEASE ||
4943 		    cmd == SIOCBONDSETHWADDR ||
4944 		    cmd == SIOCBONDSLAVEINFOQUERY ||
4945 		    cmd == SIOCBONDINFOQUERY ||
4946 		    cmd == SIOCBONDCHANGEACTIVE ||
4947 		    cmd == SIOCGMIIPHY ||
4948 		    cmd == SIOCGMIIREG ||
4949 		    cmd == SIOCSMIIREG ||
4950 		    cmd == SIOCBRADDIF ||
4951 		    cmd == SIOCBRDELIF ||
4952 		    cmd == SIOCSHWTSTAMP ||
4953 		    cmd == SIOCWANDEV) {
4954 			err = -EOPNOTSUPP;
4955 			if (ops->ndo_do_ioctl) {
4956 				if (netif_device_present(dev))
4957 					err = ops->ndo_do_ioctl(dev, ifr, cmd);
4958 				else
4959 					err = -ENODEV;
4960 			}
4961 		} else
4962 			err = -EINVAL;
4963 
4964 	}
4965 	return err;
4966 }
4967 
4968 /*
4969  *	This function handles all "interface"-type I/O control requests. The actual
4970  *	'doing' part of this is dev_ifsioc above.
4971  */
4972 
4973 /**
4974  *	dev_ioctl	-	network device ioctl
4975  *	@net: the applicable net namespace
4976  *	@cmd: command to issue
4977  *	@arg: pointer to a struct ifreq in user space
4978  *
4979  *	Issue ioctl functions to devices. This is normally called by the
4980  *	user space syscall interfaces but can sometimes be useful for
4981  *	other purposes. The return value is the return from the syscall if
4982  *	positive or a negative errno code on error.
4983  */
4984 
4985 int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
4986 {
4987 	struct ifreq ifr;
4988 	int ret;
4989 	char *colon;
4990 
4991 	/* One special case: SIOCGIFCONF takes ifconf argument
4992 	   and requires shared lock, because it sleeps writing
4993 	   to user space.
4994 	 */
4995 
4996 	if (cmd == SIOCGIFCONF) {
4997 		rtnl_lock();
4998 		ret = dev_ifconf(net, (char __user *) arg);
4999 		rtnl_unlock();
5000 		return ret;
5001 	}
5002 	if (cmd == SIOCGIFNAME)
5003 		return dev_ifname(net, (struct ifreq __user *)arg);
5004 
5005 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
5006 		return -EFAULT;
5007 
5008 	ifr.ifr_name[IFNAMSIZ-1] = 0;
5009 
5010 	colon = strchr(ifr.ifr_name, ':');
5011 	if (colon)
5012 		*colon = 0;
5013 
5014 	/*
5015 	 *	See which interface the caller is talking about.
5016 	 */
5017 
5018 	switch (cmd) {
5019 	/*
5020 	 *	These ioctl calls:
5021 	 *	- can be done by all.
5022 	 *	- atomic and do not require locking.
5023 	 *	- return a value
5024 	 */
5025 	case SIOCGIFFLAGS:
5026 	case SIOCGIFMETRIC:
5027 	case SIOCGIFMTU:
5028 	case SIOCGIFHWADDR:
5029 	case SIOCGIFSLAVE:
5030 	case SIOCGIFMAP:
5031 	case SIOCGIFINDEX:
5032 	case SIOCGIFTXQLEN:
5033 		dev_load(net, ifr.ifr_name);
5034 		rcu_read_lock();
5035 		ret = dev_ifsioc_locked(net, &ifr, cmd);
5036 		rcu_read_unlock();
5037 		if (!ret) {
5038 			if (colon)
5039 				*colon = ':';
5040 			if (copy_to_user(arg, &ifr,
5041 					 sizeof(struct ifreq)))
5042 				ret = -EFAULT;
5043 		}
5044 		return ret;
5045 
5046 	case SIOCETHTOOL:
5047 		dev_load(net, ifr.ifr_name);
5048 		rtnl_lock();
5049 		ret = dev_ethtool(net, &ifr);
5050 		rtnl_unlock();
5051 		if (!ret) {
5052 			if (colon)
5053 				*colon = ':';
5054 			if (copy_to_user(arg, &ifr,
5055 					 sizeof(struct ifreq)))
5056 				ret = -EFAULT;
5057 		}
5058 		return ret;
5059 
5060 	/*
5061 	 *	These ioctl calls:
5062 	 *	- require superuser power.
5063 	 *	- require strict serialization.
5064 	 *	- return a value
5065 	 */
5066 	case SIOCGMIIPHY:
5067 	case SIOCGMIIREG:
5068 	case SIOCSIFNAME:
5069 		if (!capable(CAP_NET_ADMIN))
5070 			return -EPERM;
5071 		dev_load(net, ifr.ifr_name);
5072 		rtnl_lock();
5073 		ret = dev_ifsioc(net, &ifr, cmd);
5074 		rtnl_unlock();
5075 		if (!ret) {
5076 			if (colon)
5077 				*colon = ':';
5078 			if (copy_to_user(arg, &ifr,
5079 					 sizeof(struct ifreq)))
5080 				ret = -EFAULT;
5081 		}
5082 		return ret;
5083 
5084 	/*
5085 	 *	These ioctl calls:
5086 	 *	- require superuser power.
5087 	 *	- require strict serialization.
5088 	 *	- do not return a value
5089 	 */
5090 	case SIOCSIFFLAGS:
5091 	case SIOCSIFMETRIC:
5092 	case SIOCSIFMTU:
5093 	case SIOCSIFMAP:
5094 	case SIOCSIFHWADDR:
5095 	case SIOCSIFSLAVE:
5096 	case SIOCADDMULTI:
5097 	case SIOCDELMULTI:
5098 	case SIOCSIFHWBROADCAST:
5099 	case SIOCSIFTXQLEN:
5100 	case SIOCSMIIREG:
5101 	case SIOCBONDENSLAVE:
5102 	case SIOCBONDRELEASE:
5103 	case SIOCBONDSETHWADDR:
5104 	case SIOCBONDCHANGEACTIVE:
5105 	case SIOCBRADDIF:
5106 	case SIOCBRDELIF:
5107 	case SIOCSHWTSTAMP:
5108 		if (!capable(CAP_NET_ADMIN))
5109 			return -EPERM;
5110 		/* fall through */
5111 	case SIOCBONDSLAVEINFOQUERY:
5112 	case SIOCBONDINFOQUERY:
5113 		dev_load(net, ifr.ifr_name);
5114 		rtnl_lock();
5115 		ret = dev_ifsioc(net, &ifr, cmd);
5116 		rtnl_unlock();
5117 		return ret;
5118 
5119 	case SIOCGIFMEM:
5120 		/* Get the per device memory space. We can add this but
5121 		 * currently do not support it */
5122 	case SIOCSIFMEM:
5123 		/* Set the per device memory buffer space.
5124 		 * Not applicable in our case */
5125 	case SIOCSIFLINK:
5126 		return -ENOTTY;
5127 
5128 	/*
5129 	 *	Unknown or private ioctl.
5130 	 */
5131 	default:
5132 		if (cmd == SIOCWANDEV ||
5133 		    (cmd >= SIOCDEVPRIVATE &&
5134 		     cmd <= SIOCDEVPRIVATE + 15)) {
5135 			dev_load(net, ifr.ifr_name);
5136 			rtnl_lock();
5137 			ret = dev_ifsioc(net, &ifr, cmd);
5138 			rtnl_unlock();
5139 			if (!ret && copy_to_user(arg, &ifr,
5140 						 sizeof(struct ifreq)))
5141 				ret = -EFAULT;
5142 			return ret;
5143 		}
5144 		/* Take care of Wireless Extensions */
5145 		if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
5146 			return wext_handle_ioctl(net, &ifr, cmd, arg);
5147 		return -ENOTTY;
5148 	}
5149 }
5150 
5151 
5152 /**
5153  *	dev_new_index	-	allocate an ifindex
5154  *	@net: the applicable net namespace
5155  *
5156  *	Returns a suitable unique value for a new device interface
5157  *	number.  The caller must hold the rtnl semaphore or the
5158  *	dev_base_lock to be sure it remains unique.
5159  */
5160 static int dev_new_index(struct net *net)
5161 {
5162 	static int ifindex;
5163 	for (;;) {
5164 		if (++ifindex <= 0)
5165 			ifindex = 1;
5166 		if (!__dev_get_by_index(net, ifindex))
5167 			return ifindex;
5168 	}
5169 }
5170 
5171 /* Delayed registration/unregisteration */
5172 static LIST_HEAD(net_todo_list);
5173 
5174 static void net_set_todo(struct net_device *dev)
5175 {
5176 	list_add_tail(&dev->todo_list, &net_todo_list);
5177 }
5178 
5179 static void rollback_registered_many(struct list_head *head)
5180 {
5181 	struct net_device *dev, *tmp;
5182 
5183 	BUG_ON(dev_boot_phase);
5184 	ASSERT_RTNL();
5185 
5186 	list_for_each_entry_safe(dev, tmp, head, unreg_list) {
5187 		/* Some devices call without registering
5188 		 * for initialization unwind. Remove those
5189 		 * devices and proceed with the remaining.
5190 		 */
5191 		if (dev->reg_state == NETREG_UNINITIALIZED) {
5192 			pr_debug("unregister_netdevice: device %s/%p never was registered\n",
5193 				 dev->name, dev);
5194 
5195 			WARN_ON(1);
5196 			list_del(&dev->unreg_list);
5197 			continue;
5198 		}
5199 		dev->dismantle = true;
5200 		BUG_ON(dev->reg_state != NETREG_REGISTERED);
5201 	}
5202 
5203 	/* If device is running, close it first. */
5204 	dev_close_many(head);
5205 
5206 	list_for_each_entry(dev, head, unreg_list) {
5207 		/* And unlink it from device chain. */
5208 		unlist_netdevice(dev);
5209 
5210 		dev->reg_state = NETREG_UNREGISTERING;
5211 	}
5212 
5213 	synchronize_net();
5214 
5215 	list_for_each_entry(dev, head, unreg_list) {
5216 		/* Shutdown queueing discipline. */
5217 		dev_shutdown(dev);
5218 
5219 
5220 		/* Notify protocols, that we are about to destroy
5221 		   this device. They should clean all the things.
5222 		*/
5223 		call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5224 
5225 		if (!dev->rtnl_link_ops ||
5226 		    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5227 			rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
5228 
5229 		/*
5230 		 *	Flush the unicast and multicast chains
5231 		 */
5232 		dev_uc_flush(dev);
5233 		dev_mc_flush(dev);
5234 
5235 		if (dev->netdev_ops->ndo_uninit)
5236 			dev->netdev_ops->ndo_uninit(dev);
5237 
5238 		/* Notifier chain MUST detach us from master device. */
5239 		WARN_ON(dev->master);
5240 
5241 		/* Remove entries from kobject tree */
5242 		netdev_unregister_kobject(dev);
5243 	}
5244 
5245 	/* Process any work delayed until the end of the batch */
5246 	dev = list_first_entry(head, struct net_device, unreg_list);
5247 	call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
5248 
5249 	synchronize_net();
5250 
5251 	list_for_each_entry(dev, head, unreg_list)
5252 		dev_put(dev);
5253 }
5254 
5255 static void rollback_registered(struct net_device *dev)
5256 {
5257 	LIST_HEAD(single);
5258 
5259 	list_add(&dev->unreg_list, &single);
5260 	rollback_registered_many(&single);
5261 	list_del(&single);
5262 }
5263 
5264 static netdev_features_t netdev_fix_features(struct net_device *dev,
5265 	netdev_features_t features)
5266 {
5267 	/* Fix illegal checksum combinations */
5268 	if ((features & NETIF_F_HW_CSUM) &&
5269 	    (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5270 		netdev_warn(dev, "mixed HW and IP checksum settings.\n");
5271 		features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
5272 	}
5273 
5274 	/* Fix illegal SG+CSUM combinations. */
5275 	if ((features & NETIF_F_SG) &&
5276 	    !(features & NETIF_F_ALL_CSUM)) {
5277 		netdev_dbg(dev,
5278 			"Dropping NETIF_F_SG since no checksum feature.\n");
5279 		features &= ~NETIF_F_SG;
5280 	}
5281 
5282 	/* TSO requires that SG is present as well. */
5283 	if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
5284 		netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
5285 		features &= ~NETIF_F_ALL_TSO;
5286 	}
5287 
5288 	/* TSO ECN requires that TSO is present as well. */
5289 	if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
5290 		features &= ~NETIF_F_TSO_ECN;
5291 
5292 	/* Software GSO depends on SG. */
5293 	if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
5294 		netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
5295 		features &= ~NETIF_F_GSO;
5296 	}
5297 
5298 	/* UFO needs SG and checksumming */
5299 	if (features & NETIF_F_UFO) {
5300 		/* maybe split UFO into V4 and V6? */
5301 		if (!((features & NETIF_F_GEN_CSUM) ||
5302 		    (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
5303 			    == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5304 			netdev_dbg(dev,
5305 				"Dropping NETIF_F_UFO since no checksum offload features.\n");
5306 			features &= ~NETIF_F_UFO;
5307 		}
5308 
5309 		if (!(features & NETIF_F_SG)) {
5310 			netdev_dbg(dev,
5311 				"Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
5312 			features &= ~NETIF_F_UFO;
5313 		}
5314 	}
5315 
5316 	return features;
5317 }
5318 
5319 int __netdev_update_features(struct net_device *dev)
5320 {
5321 	netdev_features_t features;
5322 	int err = 0;
5323 
5324 	ASSERT_RTNL();
5325 
5326 	features = netdev_get_wanted_features(dev);
5327 
5328 	if (dev->netdev_ops->ndo_fix_features)
5329 		features = dev->netdev_ops->ndo_fix_features(dev, features);
5330 
5331 	/* driver might be less strict about feature dependencies */
5332 	features = netdev_fix_features(dev, features);
5333 
5334 	if (dev->features == features)
5335 		return 0;
5336 
5337 	netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
5338 		&dev->features, &features);
5339 
5340 	if (dev->netdev_ops->ndo_set_features)
5341 		err = dev->netdev_ops->ndo_set_features(dev, features);
5342 
5343 	if (unlikely(err < 0)) {
5344 		netdev_err(dev,
5345 			"set_features() failed (%d); wanted %pNF, left %pNF\n",
5346 			err, &features, &dev->features);
5347 		return -1;
5348 	}
5349 
5350 	if (!err)
5351 		dev->features = features;
5352 
5353 	return 1;
5354 }
5355 
5356 /**
5357  *	netdev_update_features - recalculate device features
5358  *	@dev: the device to check
5359  *
5360  *	Recalculate dev->features set and send notifications if it
5361  *	has changed. Should be called after driver or hardware dependent
5362  *	conditions might have changed that influence the features.
5363  */
5364 void netdev_update_features(struct net_device *dev)
5365 {
5366 	if (__netdev_update_features(dev))
5367 		netdev_features_change(dev);
5368 }
5369 EXPORT_SYMBOL(netdev_update_features);
5370 
5371 /**
5372  *	netdev_change_features - recalculate device features
5373  *	@dev: the device to check
5374  *
5375  *	Recalculate dev->features set and send notifications even
5376  *	if they have not changed. Should be called instead of
5377  *	netdev_update_features() if also dev->vlan_features might
5378  *	have changed to allow the changes to be propagated to stacked
5379  *	VLAN devices.
5380  */
5381 void netdev_change_features(struct net_device *dev)
5382 {
5383 	__netdev_update_features(dev);
5384 	netdev_features_change(dev);
5385 }
5386 EXPORT_SYMBOL(netdev_change_features);
5387 
5388 /**
5389  *	netif_stacked_transfer_operstate -	transfer operstate
5390  *	@rootdev: the root or lower level device to transfer state from
5391  *	@dev: the device to transfer operstate to
5392  *
5393  *	Transfer operational state from root to device. This is normally
5394  *	called when a stacking relationship exists between the root
5395  *	device and the device(a leaf device).
5396  */
5397 void netif_stacked_transfer_operstate(const struct net_device *rootdev,
5398 					struct net_device *dev)
5399 {
5400 	if (rootdev->operstate == IF_OPER_DORMANT)
5401 		netif_dormant_on(dev);
5402 	else
5403 		netif_dormant_off(dev);
5404 
5405 	if (netif_carrier_ok(rootdev)) {
5406 		if (!netif_carrier_ok(dev))
5407 			netif_carrier_on(dev);
5408 	} else {
5409 		if (netif_carrier_ok(dev))
5410 			netif_carrier_off(dev);
5411 	}
5412 }
5413 EXPORT_SYMBOL(netif_stacked_transfer_operstate);
5414 
5415 #ifdef CONFIG_RPS
5416 static int netif_alloc_rx_queues(struct net_device *dev)
5417 {
5418 	unsigned int i, count = dev->num_rx_queues;
5419 	struct netdev_rx_queue *rx;
5420 
5421 	BUG_ON(count < 1);
5422 
5423 	rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
5424 	if (!rx) {
5425 		pr_err("netdev: Unable to allocate %u rx queues\n", count);
5426 		return -ENOMEM;
5427 	}
5428 	dev->_rx = rx;
5429 
5430 	for (i = 0; i < count; i++)
5431 		rx[i].dev = dev;
5432 	return 0;
5433 }
5434 #endif
5435 
5436 static void netdev_init_one_queue(struct net_device *dev,
5437 				  struct netdev_queue *queue, void *_unused)
5438 {
5439 	/* Initialize queue lock */
5440 	spin_lock_init(&queue->_xmit_lock);
5441 	netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
5442 	queue->xmit_lock_owner = -1;
5443 	netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
5444 	queue->dev = dev;
5445 #ifdef CONFIG_BQL
5446 	dql_init(&queue->dql, HZ);
5447 #endif
5448 }
5449 
5450 static int netif_alloc_netdev_queues(struct net_device *dev)
5451 {
5452 	unsigned int count = dev->num_tx_queues;
5453 	struct netdev_queue *tx;
5454 
5455 	BUG_ON(count < 1);
5456 
5457 	tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
5458 	if (!tx) {
5459 		pr_err("netdev: Unable to allocate %u tx queues\n", count);
5460 		return -ENOMEM;
5461 	}
5462 	dev->_tx = tx;
5463 
5464 	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
5465 	spin_lock_init(&dev->tx_global_lock);
5466 
5467 	return 0;
5468 }
5469 
5470 /**
5471  *	register_netdevice	- register a network device
5472  *	@dev: device to register
5473  *
5474  *	Take a completed network device structure and add it to the kernel
5475  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
5476  *	chain. 0 is returned on success. A negative errno code is returned
5477  *	on a failure to set up the device, or if the name is a duplicate.
5478  *
5479  *	Callers must hold the rtnl semaphore. You may want
5480  *	register_netdev() instead of this.
5481  *
5482  *	BUGS:
5483  *	The locking appears insufficient to guarantee two parallel registers
5484  *	will not get the same name.
5485  */
5486 
5487 int register_netdevice(struct net_device *dev)
5488 {
5489 	int ret;
5490 	struct net *net = dev_net(dev);
5491 
5492 	BUG_ON(dev_boot_phase);
5493 	ASSERT_RTNL();
5494 
5495 	might_sleep();
5496 
5497 	/* When net_device's are persistent, this will be fatal. */
5498 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
5499 	BUG_ON(!net);
5500 
5501 	spin_lock_init(&dev->addr_list_lock);
5502 	netdev_set_addr_lockdep_class(dev);
5503 
5504 	dev->iflink = -1;
5505 
5506 	ret = dev_get_valid_name(dev, dev->name);
5507 	if (ret < 0)
5508 		goto out;
5509 
5510 	/* Init, if this function is available */
5511 	if (dev->netdev_ops->ndo_init) {
5512 		ret = dev->netdev_ops->ndo_init(dev);
5513 		if (ret) {
5514 			if (ret > 0)
5515 				ret = -EIO;
5516 			goto out;
5517 		}
5518 	}
5519 
5520 	dev->ifindex = dev_new_index(net);
5521 	if (dev->iflink == -1)
5522 		dev->iflink = dev->ifindex;
5523 
5524 	/* Transfer changeable features to wanted_features and enable
5525 	 * software offloads (GSO and GRO).
5526 	 */
5527 	dev->hw_features |= NETIF_F_SOFT_FEATURES;
5528 	dev->features |= NETIF_F_SOFT_FEATURES;
5529 	dev->wanted_features = dev->features & dev->hw_features;
5530 
5531 	/* Turn on no cache copy if HW is doing checksum */
5532 	if (!(dev->flags & IFF_LOOPBACK)) {
5533 		dev->hw_features |= NETIF_F_NOCACHE_COPY;
5534 		if (dev->features & NETIF_F_ALL_CSUM) {
5535 			dev->wanted_features |= NETIF_F_NOCACHE_COPY;
5536 			dev->features |= NETIF_F_NOCACHE_COPY;
5537 		}
5538 	}
5539 
5540 	/* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
5541 	 */
5542 	dev->vlan_features |= NETIF_F_HIGHDMA;
5543 
5544 	ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5545 	ret = notifier_to_errno(ret);
5546 	if (ret)
5547 		goto err_uninit;
5548 
5549 	ret = netdev_register_kobject(dev);
5550 	if (ret)
5551 		goto err_uninit;
5552 	dev->reg_state = NETREG_REGISTERED;
5553 
5554 	__netdev_update_features(dev);
5555 
5556 	/*
5557 	 *	Default initial state at registry is that the
5558 	 *	device is present.
5559 	 */
5560 
5561 	set_bit(__LINK_STATE_PRESENT, &dev->state);
5562 
5563 	dev_init_scheduler(dev);
5564 	dev_hold(dev);
5565 	list_netdevice(dev);
5566 
5567 	/* Notify protocols, that a new device appeared. */
5568 	ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
5569 	ret = notifier_to_errno(ret);
5570 	if (ret) {
5571 		rollback_registered(dev);
5572 		dev->reg_state = NETREG_UNREGISTERED;
5573 	}
5574 	/*
5575 	 *	Prevent userspace races by waiting until the network
5576 	 *	device is fully setup before sending notifications.
5577 	 */
5578 	if (!dev->rtnl_link_ops ||
5579 	    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5580 		rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
5581 
5582 out:
5583 	return ret;
5584 
5585 err_uninit:
5586 	if (dev->netdev_ops->ndo_uninit)
5587 		dev->netdev_ops->ndo_uninit(dev);
5588 	goto out;
5589 }
5590 EXPORT_SYMBOL(register_netdevice);
5591 
5592 /**
5593  *	init_dummy_netdev	- init a dummy network device for NAPI
5594  *	@dev: device to init
5595  *
5596  *	This takes a network device structure and initialize the minimum
5597  *	amount of fields so it can be used to schedule NAPI polls without
5598  *	registering a full blown interface. This is to be used by drivers
5599  *	that need to tie several hardware interfaces to a single NAPI
5600  *	poll scheduler due to HW limitations.
5601  */
5602 int init_dummy_netdev(struct net_device *dev)
5603 {
5604 	/* Clear everything. Note we don't initialize spinlocks
5605 	 * are they aren't supposed to be taken by any of the
5606 	 * NAPI code and this dummy netdev is supposed to be
5607 	 * only ever used for NAPI polls
5608 	 */
5609 	memset(dev, 0, sizeof(struct net_device));
5610 
5611 	/* make sure we BUG if trying to hit standard
5612 	 * register/unregister code path
5613 	 */
5614 	dev->reg_state = NETREG_DUMMY;
5615 
5616 	/* NAPI wants this */
5617 	INIT_LIST_HEAD(&dev->napi_list);
5618 
5619 	/* a dummy interface is started by default */
5620 	set_bit(__LINK_STATE_PRESENT, &dev->state);
5621 	set_bit(__LINK_STATE_START, &dev->state);
5622 
5623 	/* Note : We dont allocate pcpu_refcnt for dummy devices,
5624 	 * because users of this 'device' dont need to change
5625 	 * its refcount.
5626 	 */
5627 
5628 	return 0;
5629 }
5630 EXPORT_SYMBOL_GPL(init_dummy_netdev);
5631 
5632 
5633 /**
5634  *	register_netdev	- register a network device
5635  *	@dev: device to register
5636  *
5637  *	Take a completed network device structure and add it to the kernel
5638  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
5639  *	chain. 0 is returned on success. A negative errno code is returned
5640  *	on a failure to set up the device, or if the name is a duplicate.
5641  *
5642  *	This is a wrapper around register_netdevice that takes the rtnl semaphore
5643  *	and expands the device name if you passed a format string to
5644  *	alloc_netdev.
5645  */
5646 int register_netdev(struct net_device *dev)
5647 {
5648 	int err;
5649 
5650 	rtnl_lock();
5651 	err = register_netdevice(dev);
5652 	rtnl_unlock();
5653 	return err;
5654 }
5655 EXPORT_SYMBOL(register_netdev);
5656 
5657 int netdev_refcnt_read(const struct net_device *dev)
5658 {
5659 	int i, refcnt = 0;
5660 
5661 	for_each_possible_cpu(i)
5662 		refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
5663 	return refcnt;
5664 }
5665 EXPORT_SYMBOL(netdev_refcnt_read);
5666 
5667 /*
5668  * netdev_wait_allrefs - wait until all references are gone.
5669  *
5670  * This is called when unregistering network devices.
5671  *
5672  * Any protocol or device that holds a reference should register
5673  * for netdevice notification, and cleanup and put back the
5674  * reference if they receive an UNREGISTER event.
5675  * We can get stuck here if buggy protocols don't correctly
5676  * call dev_put.
5677  */
5678 static void netdev_wait_allrefs(struct net_device *dev)
5679 {
5680 	unsigned long rebroadcast_time, warning_time;
5681 	int refcnt;
5682 
5683 	linkwatch_forget_dev(dev);
5684 
5685 	rebroadcast_time = warning_time = jiffies;
5686 	refcnt = netdev_refcnt_read(dev);
5687 
5688 	while (refcnt != 0) {
5689 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
5690 			rtnl_lock();
5691 
5692 			/* Rebroadcast unregister notification */
5693 			call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5694 			/* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users
5695 			 * should have already handle it the first time */
5696 
5697 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
5698 				     &dev->state)) {
5699 				/* We must not have linkwatch events
5700 				 * pending on unregister. If this
5701 				 * happens, we simply run the queue
5702 				 * unscheduled, resulting in a noop
5703 				 * for this device.
5704 				 */
5705 				linkwatch_run_queue();
5706 			}
5707 
5708 			__rtnl_unlock();
5709 
5710 			rebroadcast_time = jiffies;
5711 		}
5712 
5713 		msleep(250);
5714 
5715 		refcnt = netdev_refcnt_read(dev);
5716 
5717 		if (time_after(jiffies, warning_time + 10 * HZ)) {
5718 			pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
5719 				 dev->name, refcnt);
5720 			warning_time = jiffies;
5721 		}
5722 	}
5723 }
5724 
5725 /* The sequence is:
5726  *
5727  *	rtnl_lock();
5728  *	...
5729  *	register_netdevice(x1);
5730  *	register_netdevice(x2);
5731  *	...
5732  *	unregister_netdevice(y1);
5733  *	unregister_netdevice(y2);
5734  *      ...
5735  *	rtnl_unlock();
5736  *	free_netdev(y1);
5737  *	free_netdev(y2);
5738  *
5739  * We are invoked by rtnl_unlock().
5740  * This allows us to deal with problems:
5741  * 1) We can delete sysfs objects which invoke hotplug
5742  *    without deadlocking with linkwatch via keventd.
5743  * 2) Since we run with the RTNL semaphore not held, we can sleep
5744  *    safely in order to wait for the netdev refcnt to drop to zero.
5745  *
5746  * We must not return until all unregister events added during
5747  * the interval the lock was held have been completed.
5748  */
5749 void netdev_run_todo(void)
5750 {
5751 	struct list_head list;
5752 
5753 	/* Snapshot list, allow later requests */
5754 	list_replace_init(&net_todo_list, &list);
5755 
5756 	__rtnl_unlock();
5757 
5758 	/* Wait for rcu callbacks to finish before attempting to drain
5759 	 * the device list.  This usually avoids a 250ms wait.
5760 	 */
5761 	if (!list_empty(&list))
5762 		rcu_barrier();
5763 
5764 	while (!list_empty(&list)) {
5765 		struct net_device *dev
5766 			= list_first_entry(&list, struct net_device, todo_list);
5767 		list_del(&dev->todo_list);
5768 
5769 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
5770 			pr_err("network todo '%s' but state %d\n",
5771 			       dev->name, dev->reg_state);
5772 			dump_stack();
5773 			continue;
5774 		}
5775 
5776 		dev->reg_state = NETREG_UNREGISTERED;
5777 
5778 		on_each_cpu(flush_backlog, dev, 1);
5779 
5780 		netdev_wait_allrefs(dev);
5781 
5782 		/* paranoia */
5783 		BUG_ON(netdev_refcnt_read(dev));
5784 		WARN_ON(rcu_access_pointer(dev->ip_ptr));
5785 		WARN_ON(rcu_access_pointer(dev->ip6_ptr));
5786 		WARN_ON(dev->dn_ptr);
5787 
5788 		if (dev->destructor)
5789 			dev->destructor(dev);
5790 
5791 		/* Free network device */
5792 		kobject_put(&dev->dev.kobj);
5793 	}
5794 }
5795 
5796 /* Convert net_device_stats to rtnl_link_stats64.  They have the same
5797  * fields in the same order, with only the type differing.
5798  */
5799 void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
5800 			     const struct net_device_stats *netdev_stats)
5801 {
5802 #if BITS_PER_LONG == 64
5803 	BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats));
5804 	memcpy(stats64, netdev_stats, sizeof(*stats64));
5805 #else
5806 	size_t i, n = sizeof(*stats64) / sizeof(u64);
5807 	const unsigned long *src = (const unsigned long *)netdev_stats;
5808 	u64 *dst = (u64 *)stats64;
5809 
5810 	BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) !=
5811 		     sizeof(*stats64) / sizeof(u64));
5812 	for (i = 0; i < n; i++)
5813 		dst[i] = src[i];
5814 #endif
5815 }
5816 EXPORT_SYMBOL(netdev_stats_to_stats64);
5817 
5818 /**
5819  *	dev_get_stats	- get network device statistics
5820  *	@dev: device to get statistics from
5821  *	@storage: place to store stats
5822  *
5823  *	Get network statistics from device. Return @storage.
5824  *	The device driver may provide its own method by setting
5825  *	dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats;
5826  *	otherwise the internal statistics structure is used.
5827  */
5828 struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
5829 					struct rtnl_link_stats64 *storage)
5830 {
5831 	const struct net_device_ops *ops = dev->netdev_ops;
5832 
5833 	if (ops->ndo_get_stats64) {
5834 		memset(storage, 0, sizeof(*storage));
5835 		ops->ndo_get_stats64(dev, storage);
5836 	} else if (ops->ndo_get_stats) {
5837 		netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
5838 	} else {
5839 		netdev_stats_to_stats64(storage, &dev->stats);
5840 	}
5841 	storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
5842 	return storage;
5843 }
5844 EXPORT_SYMBOL(dev_get_stats);
5845 
5846 struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
5847 {
5848 	struct netdev_queue *queue = dev_ingress_queue(dev);
5849 
5850 #ifdef CONFIG_NET_CLS_ACT
5851 	if (queue)
5852 		return queue;
5853 	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
5854 	if (!queue)
5855 		return NULL;
5856 	netdev_init_one_queue(dev, queue, NULL);
5857 	queue->qdisc = &noop_qdisc;
5858 	queue->qdisc_sleeping = &noop_qdisc;
5859 	rcu_assign_pointer(dev->ingress_queue, queue);
5860 #endif
5861 	return queue;
5862 }
5863 
5864 /**
5865  *	alloc_netdev_mqs - allocate network device
5866  *	@sizeof_priv:	size of private data to allocate space for
5867  *	@name:		device name format string
5868  *	@setup:		callback to initialize device
5869  *	@txqs:		the number of TX subqueues to allocate
5870  *	@rxqs:		the number of RX subqueues to allocate
5871  *
5872  *	Allocates a struct net_device with private data area for driver use
5873  *	and performs basic initialization.  Also allocates subquue structs
5874  *	for each queue on the device.
5875  */
5876 struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
5877 		void (*setup)(struct net_device *),
5878 		unsigned int txqs, unsigned int rxqs)
5879 {
5880 	struct net_device *dev;
5881 	size_t alloc_size;
5882 	struct net_device *p;
5883 
5884 	BUG_ON(strlen(name) >= sizeof(dev->name));
5885 
5886 	if (txqs < 1) {
5887 		pr_err("alloc_netdev: Unable to allocate device with zero queues\n");
5888 		return NULL;
5889 	}
5890 
5891 #ifdef CONFIG_RPS
5892 	if (rxqs < 1) {
5893 		pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
5894 		return NULL;
5895 	}
5896 #endif
5897 
5898 	alloc_size = sizeof(struct net_device);
5899 	if (sizeof_priv) {
5900 		/* ensure 32-byte alignment of private area */
5901 		alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
5902 		alloc_size += sizeof_priv;
5903 	}
5904 	/* ensure 32-byte alignment of whole construct */
5905 	alloc_size += NETDEV_ALIGN - 1;
5906 
5907 	p = kzalloc(alloc_size, GFP_KERNEL);
5908 	if (!p) {
5909 		pr_err("alloc_netdev: Unable to allocate device\n");
5910 		return NULL;
5911 	}
5912 
5913 	dev = PTR_ALIGN(p, NETDEV_ALIGN);
5914 	dev->padded = (char *)dev - (char *)p;
5915 
5916 	dev->pcpu_refcnt = alloc_percpu(int);
5917 	if (!dev->pcpu_refcnt)
5918 		goto free_p;
5919 
5920 	if (dev_addr_init(dev))
5921 		goto free_pcpu;
5922 
5923 	dev_mc_init(dev);
5924 	dev_uc_init(dev);
5925 
5926 	dev_net_set(dev, &init_net);
5927 
5928 	dev->gso_max_size = GSO_MAX_SIZE;
5929 
5930 	INIT_LIST_HEAD(&dev->napi_list);
5931 	INIT_LIST_HEAD(&dev->unreg_list);
5932 	INIT_LIST_HEAD(&dev->link_watch_list);
5933 	dev->priv_flags = IFF_XMIT_DST_RELEASE;
5934 	setup(dev);
5935 
5936 	dev->num_tx_queues = txqs;
5937 	dev->real_num_tx_queues = txqs;
5938 	if (netif_alloc_netdev_queues(dev))
5939 		goto free_all;
5940 
5941 #ifdef CONFIG_RPS
5942 	dev->num_rx_queues = rxqs;
5943 	dev->real_num_rx_queues = rxqs;
5944 	if (netif_alloc_rx_queues(dev))
5945 		goto free_all;
5946 #endif
5947 
5948 	strcpy(dev->name, name);
5949 	dev->group = INIT_NETDEV_GROUP;
5950 	return dev;
5951 
5952 free_all:
5953 	free_netdev(dev);
5954 	return NULL;
5955 
5956 free_pcpu:
5957 	free_percpu(dev->pcpu_refcnt);
5958 	kfree(dev->_tx);
5959 #ifdef CONFIG_RPS
5960 	kfree(dev->_rx);
5961 #endif
5962 
5963 free_p:
5964 	kfree(p);
5965 	return NULL;
5966 }
5967 EXPORT_SYMBOL(alloc_netdev_mqs);
5968 
5969 /**
5970  *	free_netdev - free network device
5971  *	@dev: device
5972  *
5973  *	This function does the last stage of destroying an allocated device
5974  * 	interface. The reference to the device object is released.
5975  *	If this is the last reference then it will be freed.
5976  */
5977 void free_netdev(struct net_device *dev)
5978 {
5979 	struct napi_struct *p, *n;
5980 
5981 	release_net(dev_net(dev));
5982 
5983 	kfree(dev->_tx);
5984 #ifdef CONFIG_RPS
5985 	kfree(dev->_rx);
5986 #endif
5987 
5988 	kfree(rcu_dereference_protected(dev->ingress_queue, 1));
5989 
5990 	/* Flush device addresses */
5991 	dev_addr_flush(dev);
5992 
5993 	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
5994 		netif_napi_del(p);
5995 
5996 	free_percpu(dev->pcpu_refcnt);
5997 	dev->pcpu_refcnt = NULL;
5998 
5999 	/*  Compatibility with error handling in drivers */
6000 	if (dev->reg_state == NETREG_UNINITIALIZED) {
6001 		kfree((char *)dev - dev->padded);
6002 		return;
6003 	}
6004 
6005 	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
6006 	dev->reg_state = NETREG_RELEASED;
6007 
6008 	/* will free via device release */
6009 	put_device(&dev->dev);
6010 }
6011 EXPORT_SYMBOL(free_netdev);
6012 
6013 /**
6014  *	synchronize_net -  Synchronize with packet receive processing
6015  *
6016  *	Wait for packets currently being received to be done.
6017  *	Does not block later packets from starting.
6018  */
6019 void synchronize_net(void)
6020 {
6021 	might_sleep();
6022 	if (rtnl_is_locked())
6023 		synchronize_rcu_expedited();
6024 	else
6025 		synchronize_rcu();
6026 }
6027 EXPORT_SYMBOL(synchronize_net);
6028 
6029 /**
6030  *	unregister_netdevice_queue - remove device from the kernel
6031  *	@dev: device
6032  *	@head: list
6033  *
6034  *	This function shuts down a device interface and removes it
6035  *	from the kernel tables.
6036  *	If head not NULL, device is queued to be unregistered later.
6037  *
6038  *	Callers must hold the rtnl semaphore.  You may want
6039  *	unregister_netdev() instead of this.
6040  */
6041 
6042 void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
6043 {
6044 	ASSERT_RTNL();
6045 
6046 	if (head) {
6047 		list_move_tail(&dev->unreg_list, head);
6048 	} else {
6049 		rollback_registered(dev);
6050 		/* Finish processing unregister after unlock */
6051 		net_set_todo(dev);
6052 	}
6053 }
6054 EXPORT_SYMBOL(unregister_netdevice_queue);
6055 
6056 /**
6057  *	unregister_netdevice_many - unregister many devices
6058  *	@head: list of devices
6059  */
6060 void unregister_netdevice_many(struct list_head *head)
6061 {
6062 	struct net_device *dev;
6063 
6064 	if (!list_empty(head)) {
6065 		rollback_registered_many(head);
6066 		list_for_each_entry(dev, head, unreg_list)
6067 			net_set_todo(dev);
6068 	}
6069 }
6070 EXPORT_SYMBOL(unregister_netdevice_many);
6071 
6072 /**
6073  *	unregister_netdev - remove device from the kernel
6074  *	@dev: device
6075  *
6076  *	This function shuts down a device interface and removes it
6077  *	from the kernel tables.
6078  *
6079  *	This is just a wrapper for unregister_netdevice that takes
6080  *	the rtnl semaphore.  In general you want to use this and not
6081  *	unregister_netdevice.
6082  */
6083 void unregister_netdev(struct net_device *dev)
6084 {
6085 	rtnl_lock();
6086 	unregister_netdevice(dev);
6087 	rtnl_unlock();
6088 }
6089 EXPORT_SYMBOL(unregister_netdev);
6090 
6091 /**
6092  *	dev_change_net_namespace - move device to different nethost namespace
6093  *	@dev: device
6094  *	@net: network namespace
6095  *	@pat: If not NULL name pattern to try if the current device name
6096  *	      is already taken in the destination network namespace.
6097  *
6098  *	This function shuts down a device interface and moves it
6099  *	to a new network namespace. On success 0 is returned, on
6100  *	a failure a netagive errno code is returned.
6101  *
6102  *	Callers must hold the rtnl semaphore.
6103  */
6104 
6105 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
6106 {
6107 	int err;
6108 
6109 	ASSERT_RTNL();
6110 
6111 	/* Don't allow namespace local devices to be moved. */
6112 	err = -EINVAL;
6113 	if (dev->features & NETIF_F_NETNS_LOCAL)
6114 		goto out;
6115 
6116 	/* Ensure the device has been registrered */
6117 	err = -EINVAL;
6118 	if (dev->reg_state != NETREG_REGISTERED)
6119 		goto out;
6120 
6121 	/* Get out if there is nothing todo */
6122 	err = 0;
6123 	if (net_eq(dev_net(dev), net))
6124 		goto out;
6125 
6126 	/* Pick the destination device name, and ensure
6127 	 * we can use it in the destination network namespace.
6128 	 */
6129 	err = -EEXIST;
6130 	if (__dev_get_by_name(net, dev->name)) {
6131 		/* We get here if we can't use the current device name */
6132 		if (!pat)
6133 			goto out;
6134 		if (dev_get_valid_name(dev, pat) < 0)
6135 			goto out;
6136 	}
6137 
6138 	/*
6139 	 * And now a mini version of register_netdevice unregister_netdevice.
6140 	 */
6141 
6142 	/* If device is running close it first. */
6143 	dev_close(dev);
6144 
6145 	/* And unlink it from device chain */
6146 	err = -ENODEV;
6147 	unlist_netdevice(dev);
6148 
6149 	synchronize_net();
6150 
6151 	/* Shutdown queueing discipline. */
6152 	dev_shutdown(dev);
6153 
6154 	/* Notify protocols, that we are about to destroy
6155 	   this device. They should clean all the things.
6156 
6157 	   Note that dev->reg_state stays at NETREG_REGISTERED.
6158 	   This is wanted because this way 8021q and macvlan know
6159 	   the device is just moving and can keep their slaves up.
6160 	*/
6161 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
6162 	call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
6163 	rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
6164 
6165 	/*
6166 	 *	Flush the unicast and multicast chains
6167 	 */
6168 	dev_uc_flush(dev);
6169 	dev_mc_flush(dev);
6170 
6171 	/* Actually switch the network namespace */
6172 	dev_net_set(dev, net);
6173 
6174 	/* If there is an ifindex conflict assign a new one */
6175 	if (__dev_get_by_index(net, dev->ifindex)) {
6176 		int iflink = (dev->iflink == dev->ifindex);
6177 		dev->ifindex = dev_new_index(net);
6178 		if (iflink)
6179 			dev->iflink = dev->ifindex;
6180 	}
6181 
6182 	/* Fixup kobjects */
6183 	err = device_rename(&dev->dev, dev->name);
6184 	WARN_ON(err);
6185 
6186 	/* Add the device back in the hashes */
6187 	list_netdevice(dev);
6188 
6189 	/* Notify protocols, that a new device appeared. */
6190 	call_netdevice_notifiers(NETDEV_REGISTER, dev);
6191 
6192 	/*
6193 	 *	Prevent userspace races by waiting until the network
6194 	 *	device is fully setup before sending notifications.
6195 	 */
6196 	rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
6197 
6198 	synchronize_net();
6199 	err = 0;
6200 out:
6201 	return err;
6202 }
6203 EXPORT_SYMBOL_GPL(dev_change_net_namespace);
6204 
6205 static int dev_cpu_callback(struct notifier_block *nfb,
6206 			    unsigned long action,
6207 			    void *ocpu)
6208 {
6209 	struct sk_buff **list_skb;
6210 	struct sk_buff *skb;
6211 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
6212 	struct softnet_data *sd, *oldsd;
6213 
6214 	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
6215 		return NOTIFY_OK;
6216 
6217 	local_irq_disable();
6218 	cpu = smp_processor_id();
6219 	sd = &per_cpu(softnet_data, cpu);
6220 	oldsd = &per_cpu(softnet_data, oldcpu);
6221 
6222 	/* Find end of our completion_queue. */
6223 	list_skb = &sd->completion_queue;
6224 	while (*list_skb)
6225 		list_skb = &(*list_skb)->next;
6226 	/* Append completion queue from offline CPU. */
6227 	*list_skb = oldsd->completion_queue;
6228 	oldsd->completion_queue = NULL;
6229 
6230 	/* Append output queue from offline CPU. */
6231 	if (oldsd->output_queue) {
6232 		*sd->output_queue_tailp = oldsd->output_queue;
6233 		sd->output_queue_tailp = oldsd->output_queue_tailp;
6234 		oldsd->output_queue = NULL;
6235 		oldsd->output_queue_tailp = &oldsd->output_queue;
6236 	}
6237 	/* Append NAPI poll list from offline CPU. */
6238 	if (!list_empty(&oldsd->poll_list)) {
6239 		list_splice_init(&oldsd->poll_list, &sd->poll_list);
6240 		raise_softirq_irqoff(NET_RX_SOFTIRQ);
6241 	}
6242 
6243 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
6244 	local_irq_enable();
6245 
6246 	/* Process offline CPU's input_pkt_queue */
6247 	while ((skb = __skb_dequeue(&oldsd->process_queue))) {
6248 		netif_rx(skb);
6249 		input_queue_head_incr(oldsd);
6250 	}
6251 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
6252 		netif_rx(skb);
6253 		input_queue_head_incr(oldsd);
6254 	}
6255 
6256 	return NOTIFY_OK;
6257 }
6258 
6259 
6260 /**
6261  *	netdev_increment_features - increment feature set by one
6262  *	@all: current feature set
6263  *	@one: new feature set
6264  *	@mask: mask feature set
6265  *
6266  *	Computes a new feature set after adding a device with feature set
6267  *	@one to the master device with current feature set @all.  Will not
6268  *	enable anything that is off in @mask. Returns the new feature set.
6269  */
6270 netdev_features_t netdev_increment_features(netdev_features_t all,
6271 	netdev_features_t one, netdev_features_t mask)
6272 {
6273 	if (mask & NETIF_F_GEN_CSUM)
6274 		mask |= NETIF_F_ALL_CSUM;
6275 	mask |= NETIF_F_VLAN_CHALLENGED;
6276 
6277 	all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask;
6278 	all &= one | ~NETIF_F_ALL_FOR_ALL;
6279 
6280 	/* If one device supports hw checksumming, set for all. */
6281 	if (all & NETIF_F_GEN_CSUM)
6282 		all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
6283 
6284 	return all;
6285 }
6286 EXPORT_SYMBOL(netdev_increment_features);
6287 
6288 static struct hlist_head *netdev_create_hash(void)
6289 {
6290 	int i;
6291 	struct hlist_head *hash;
6292 
6293 	hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
6294 	if (hash != NULL)
6295 		for (i = 0; i < NETDEV_HASHENTRIES; i++)
6296 			INIT_HLIST_HEAD(&hash[i]);
6297 
6298 	return hash;
6299 }
6300 
6301 /* Initialize per network namespace state */
6302 static int __net_init netdev_init(struct net *net)
6303 {
6304 	INIT_LIST_HEAD(&net->dev_base_head);
6305 
6306 	net->dev_name_head = netdev_create_hash();
6307 	if (net->dev_name_head == NULL)
6308 		goto err_name;
6309 
6310 	net->dev_index_head = netdev_create_hash();
6311 	if (net->dev_index_head == NULL)
6312 		goto err_idx;
6313 
6314 	return 0;
6315 
6316 err_idx:
6317 	kfree(net->dev_name_head);
6318 err_name:
6319 	return -ENOMEM;
6320 }
6321 
6322 /**
6323  *	netdev_drivername - network driver for the device
6324  *	@dev: network device
6325  *
6326  *	Determine network driver for device.
6327  */
6328 const char *netdev_drivername(const struct net_device *dev)
6329 {
6330 	const struct device_driver *driver;
6331 	const struct device *parent;
6332 	const char *empty = "";
6333 
6334 	parent = dev->dev.parent;
6335 	if (!parent)
6336 		return empty;
6337 
6338 	driver = parent->driver;
6339 	if (driver && driver->name)
6340 		return driver->name;
6341 	return empty;
6342 }
6343 
6344 int __netdev_printk(const char *level, const struct net_device *dev,
6345 			   struct va_format *vaf)
6346 {
6347 	int r;
6348 
6349 	if (dev && dev->dev.parent)
6350 		r = dev_printk(level, dev->dev.parent, "%s: %pV",
6351 			       netdev_name(dev), vaf);
6352 	else if (dev)
6353 		r = printk("%s%s: %pV", level, netdev_name(dev), vaf);
6354 	else
6355 		r = printk("%s(NULL net_device): %pV", level, vaf);
6356 
6357 	return r;
6358 }
6359 EXPORT_SYMBOL(__netdev_printk);
6360 
6361 int netdev_printk(const char *level, const struct net_device *dev,
6362 		  const char *format, ...)
6363 {
6364 	struct va_format vaf;
6365 	va_list args;
6366 	int r;
6367 
6368 	va_start(args, format);
6369 
6370 	vaf.fmt = format;
6371 	vaf.va = &args;
6372 
6373 	r = __netdev_printk(level, dev, &vaf);
6374 	va_end(args);
6375 
6376 	return r;
6377 }
6378 EXPORT_SYMBOL(netdev_printk);
6379 
6380 #define define_netdev_printk_level(func, level)			\
6381 int func(const struct net_device *dev, const char *fmt, ...)	\
6382 {								\
6383 	int r;							\
6384 	struct va_format vaf;					\
6385 	va_list args;						\
6386 								\
6387 	va_start(args, fmt);					\
6388 								\
6389 	vaf.fmt = fmt;						\
6390 	vaf.va = &args;						\
6391 								\
6392 	r = __netdev_printk(level, dev, &vaf);			\
6393 	va_end(args);						\
6394 								\
6395 	return r;						\
6396 }								\
6397 EXPORT_SYMBOL(func);
6398 
6399 define_netdev_printk_level(netdev_emerg, KERN_EMERG);
6400 define_netdev_printk_level(netdev_alert, KERN_ALERT);
6401 define_netdev_printk_level(netdev_crit, KERN_CRIT);
6402 define_netdev_printk_level(netdev_err, KERN_ERR);
6403 define_netdev_printk_level(netdev_warn, KERN_WARNING);
6404 define_netdev_printk_level(netdev_notice, KERN_NOTICE);
6405 define_netdev_printk_level(netdev_info, KERN_INFO);
6406 
6407 static void __net_exit netdev_exit(struct net *net)
6408 {
6409 	kfree(net->dev_name_head);
6410 	kfree(net->dev_index_head);
6411 }
6412 
6413 static struct pernet_operations __net_initdata netdev_net_ops = {
6414 	.init = netdev_init,
6415 	.exit = netdev_exit,
6416 };
6417 
6418 static void __net_exit default_device_exit(struct net *net)
6419 {
6420 	struct net_device *dev, *aux;
6421 	/*
6422 	 * Push all migratable network devices back to the
6423 	 * initial network namespace
6424 	 */
6425 	rtnl_lock();
6426 	for_each_netdev_safe(net, dev, aux) {
6427 		int err;
6428 		char fb_name[IFNAMSIZ];
6429 
6430 		/* Ignore unmoveable devices (i.e. loopback) */
6431 		if (dev->features & NETIF_F_NETNS_LOCAL)
6432 			continue;
6433 
6434 		/* Leave virtual devices for the generic cleanup */
6435 		if (dev->rtnl_link_ops)
6436 			continue;
6437 
6438 		/* Push remaining network devices to init_net */
6439 		snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
6440 		err = dev_change_net_namespace(dev, &init_net, fb_name);
6441 		if (err) {
6442 			pr_emerg("%s: failed to move %s to init_net: %d\n",
6443 				 __func__, dev->name, err);
6444 			BUG();
6445 		}
6446 	}
6447 	rtnl_unlock();
6448 }
6449 
6450 static void __net_exit default_device_exit_batch(struct list_head *net_list)
6451 {
6452 	/* At exit all network devices most be removed from a network
6453 	 * namespace.  Do this in the reverse order of registration.
6454 	 * Do this across as many network namespaces as possible to
6455 	 * improve batching efficiency.
6456 	 */
6457 	struct net_device *dev;
6458 	struct net *net;
6459 	LIST_HEAD(dev_kill_list);
6460 
6461 	rtnl_lock();
6462 	list_for_each_entry(net, net_list, exit_list) {
6463 		for_each_netdev_reverse(net, dev) {
6464 			if (dev->rtnl_link_ops)
6465 				dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
6466 			else
6467 				unregister_netdevice_queue(dev, &dev_kill_list);
6468 		}
6469 	}
6470 	unregister_netdevice_many(&dev_kill_list);
6471 	list_del(&dev_kill_list);
6472 	rtnl_unlock();
6473 }
6474 
6475 static struct pernet_operations __net_initdata default_device_ops = {
6476 	.exit = default_device_exit,
6477 	.exit_batch = default_device_exit_batch,
6478 };
6479 
6480 /*
6481  *	Initialize the DEV module. At boot time this walks the device list and
6482  *	unhooks any devices that fail to initialise (normally hardware not
6483  *	present) and leaves us with a valid list of present and active devices.
6484  *
6485  */
6486 
6487 /*
6488  *       This is called single threaded during boot, so no need
6489  *       to take the rtnl semaphore.
6490  */
6491 static int __init net_dev_init(void)
6492 {
6493 	int i, rc = -ENOMEM;
6494 
6495 	BUG_ON(!dev_boot_phase);
6496 
6497 	if (dev_proc_init())
6498 		goto out;
6499 
6500 	if (netdev_kobject_init())
6501 		goto out;
6502 
6503 	INIT_LIST_HEAD(&ptype_all);
6504 	for (i = 0; i < PTYPE_HASH_SIZE; i++)
6505 		INIT_LIST_HEAD(&ptype_base[i]);
6506 
6507 	if (register_pernet_subsys(&netdev_net_ops))
6508 		goto out;
6509 
6510 	/*
6511 	 *	Initialise the packet receive queues.
6512 	 */
6513 
6514 	for_each_possible_cpu(i) {
6515 		struct softnet_data *sd = &per_cpu(softnet_data, i);
6516 
6517 		memset(sd, 0, sizeof(*sd));
6518 		skb_queue_head_init(&sd->input_pkt_queue);
6519 		skb_queue_head_init(&sd->process_queue);
6520 		sd->completion_queue = NULL;
6521 		INIT_LIST_HEAD(&sd->poll_list);
6522 		sd->output_queue = NULL;
6523 		sd->output_queue_tailp = &sd->output_queue;
6524 #ifdef CONFIG_RPS
6525 		sd->csd.func = rps_trigger_softirq;
6526 		sd->csd.info = sd;
6527 		sd->csd.flags = 0;
6528 		sd->cpu = i;
6529 #endif
6530 
6531 		sd->backlog.poll = process_backlog;
6532 		sd->backlog.weight = weight_p;
6533 		sd->backlog.gro_list = NULL;
6534 		sd->backlog.gro_count = 0;
6535 	}
6536 
6537 	dev_boot_phase = 0;
6538 
6539 	/* The loopback device is special if any other network devices
6540 	 * is present in a network namespace the loopback device must
6541 	 * be present. Since we now dynamically allocate and free the
6542 	 * loopback device ensure this invariant is maintained by
6543 	 * keeping the loopback device as the first device on the
6544 	 * list of network devices.  Ensuring the loopback devices
6545 	 * is the first device that appears and the last network device
6546 	 * that disappears.
6547 	 */
6548 	if (register_pernet_device(&loopback_net_ops))
6549 		goto out;
6550 
6551 	if (register_pernet_device(&default_device_ops))
6552 		goto out;
6553 
6554 	open_softirq(NET_TX_SOFTIRQ, net_tx_action);
6555 	open_softirq(NET_RX_SOFTIRQ, net_rx_action);
6556 
6557 	hotcpu_notifier(dev_cpu_callback, 0);
6558 	dst_init();
6559 	dev_mcast_init();
6560 	rc = 0;
6561 out:
6562 	return rc;
6563 }
6564 
6565 subsys_initcall(net_dev_init);
6566 
6567 static int __init initialize_hashrnd(void)
6568 {
6569 	get_random_bytes(&hashrnd, sizeof(hashrnd));
6570 	return 0;
6571 }
6572 
6573 late_initcall_sync(initialize_hashrnd);
6574 
6575