xref: /linux/net/core/dev.c (revision a33f32244d8550da8b4a26e277ce07d5c6d158b5)
1 /*
2  * 	NET3	Protocol independent device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the non IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Florian la Roche <rzsfl@rz.uni-sb.de>
16  *		Alan Cox <gw4pts@gw4pts.ampr.org>
17  *		David Hinds <dahinds@users.sourceforge.net>
18  *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19  *		Adam Sulmicki <adam@cfar.umd.edu>
20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
21  *
22  *	Changes:
23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
24  *              			to 2 if register_netdev gets called
25  *              			before net_dev_init & also removed a
26  *              			few lines of code in the process.
27  *		Alan Cox	:	device private ioctl copies fields back.
28  *		Alan Cox	:	Transmit queue code does relevant
29  *					stunts to keep the queue safe.
30  *		Alan Cox	:	Fixed double lock.
31  *		Alan Cox	:	Fixed promisc NULL pointer trap
32  *		????????	:	Support the full private ioctl range
33  *		Alan Cox	:	Moved ioctl permission check into
34  *					drivers
35  *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
36  *		Alan Cox	:	100 backlog just doesn't cut it when
37  *					you start doing multicast video 8)
38  *		Alan Cox	:	Rewrote net_bh and list manager.
39  *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
40  *		Alan Cox	:	Took out transmit every packet pass
41  *					Saved a few bytes in the ioctl handler
42  *		Alan Cox	:	Network driver sets packet type before
43  *					calling netif_rx. Saves a function
44  *					call a packet.
45  *		Alan Cox	:	Hashed net_bh()
46  *		Richard Kooijman:	Timestamp fixes.
47  *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
48  *		Alan Cox	:	Device lock protection.
49  *		Alan Cox	: 	Fixed nasty side effect of device close
50  *					changes.
51  *		Rudi Cilibrasi	:	Pass the right thing to
52  *					set_mac_address()
53  *		Dave Miller	:	32bit quantity for the device lock to
54  *					make it work out on a Sparc.
55  *		Bjorn Ekwall	:	Added KERNELD hack.
56  *		Alan Cox	:	Cleaned up the backlog initialise.
57  *		Craig Metz	:	SIOCGIFCONF fix if space for under
58  *					1 device.
59  *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
60  *					is no device open function.
61  *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
62  *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
63  *		Cyrus Durgin	:	Cleaned for KMOD
64  *		Adam Sulmicki   :	Bug Fix : Network Device Unload
65  *					A network device unload needs to purge
66  *					the backlog queue.
67  *	Paul Rusty Russell	:	SIOCSIFNAME
68  *              Pekka Riikonen  :	Netdev boot-time settings code
69  *              Andrew Morton   :       Make unregister_netdevice wait
70  *              			indefinitely on dev->refcnt
71  * 		J Hadi Salim	:	- Backlog queue sampling
72  *				        - netif_rx() feedback
73  */
74 
75 #include <asm/uaccess.h>
76 #include <asm/system.h>
77 #include <linux/bitops.h>
78 #include <linux/capability.h>
79 #include <linux/cpu.h>
80 #include <linux/types.h>
81 #include <linux/kernel.h>
82 #include <linux/hash.h>
83 #include <linux/slab.h>
84 #include <linux/sched.h>
85 #include <linux/mutex.h>
86 #include <linux/string.h>
87 #include <linux/mm.h>
88 #include <linux/socket.h>
89 #include <linux/sockios.h>
90 #include <linux/errno.h>
91 #include <linux/interrupt.h>
92 #include <linux/if_ether.h>
93 #include <linux/netdevice.h>
94 #include <linux/etherdevice.h>
95 #include <linux/ethtool.h>
96 #include <linux/notifier.h>
97 #include <linux/skbuff.h>
98 #include <net/net_namespace.h>
99 #include <net/sock.h>
100 #include <linux/rtnetlink.h>
101 #include <linux/proc_fs.h>
102 #include <linux/seq_file.h>
103 #include <linux/stat.h>
104 #include <linux/if_bridge.h>
105 #include <linux/if_macvlan.h>
106 #include <net/dst.h>
107 #include <net/pkt_sched.h>
108 #include <net/checksum.h>
109 #include <net/xfrm.h>
110 #include <linux/highmem.h>
111 #include <linux/init.h>
112 #include <linux/kmod.h>
113 #include <linux/module.h>
114 #include <linux/netpoll.h>
115 #include <linux/rcupdate.h>
116 #include <linux/delay.h>
117 #include <net/wext.h>
118 #include <net/iw_handler.h>
119 #include <asm/current.h>
120 #include <linux/audit.h>
121 #include <linux/dmaengine.h>
122 #include <linux/err.h>
123 #include <linux/ctype.h>
124 #include <linux/if_arp.h>
125 #include <linux/if_vlan.h>
126 #include <linux/ip.h>
127 #include <net/ip.h>
128 #include <linux/ipv6.h>
129 #include <linux/in.h>
130 #include <linux/jhash.h>
131 #include <linux/random.h>
132 #include <trace/events/napi.h>
133 
134 #include "net-sysfs.h"
135 
136 /* Instead of increasing this, you should create a hash table. */
137 #define MAX_GRO_SKBS 8
138 
139 /* This should be increased if a protocol with a bigger head is added. */
140 #define GRO_MAX_HEAD (MAX_HEADER + 128)
141 
142 /*
143  *	The list of packet types we will receive (as opposed to discard)
144  *	and the routines to invoke.
145  *
146  *	Why 16. Because with 16 the only overlap we get on a hash of the
147  *	low nibble of the protocol value is RARP/SNAP/X.25.
148  *
149  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
150  *             sure which should go first, but I bet it won't make much
151  *             difference if we are running VLANs.  The good news is that
152  *             this protocol won't be in the list unless compiled in, so
153  *             the average user (w/out VLANs) will not be adversely affected.
154  *             --BLG
155  *
156  *		0800	IP
157  *		8100    802.1Q VLAN
158  *		0001	802.3
159  *		0002	AX.25
160  *		0004	802.2
161  *		8035	RARP
162  *		0005	SNAP
163  *		0805	X.25
164  *		0806	ARP
165  *		8137	IPX
166  *		0009	Localtalk
167  *		86DD	IPv6
168  */
169 
170 #define PTYPE_HASH_SIZE	(16)
171 #define PTYPE_HASH_MASK	(PTYPE_HASH_SIZE - 1)
172 
173 static DEFINE_SPINLOCK(ptype_lock);
174 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
175 static struct list_head ptype_all __read_mostly;	/* Taps */
176 
177 /*
178  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
179  * semaphore.
180  *
181  * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
182  *
183  * Writers must hold the rtnl semaphore while they loop through the
184  * dev_base_head list, and hold dev_base_lock for writing when they do the
185  * actual updates.  This allows pure readers to access the list even
186  * while a writer is preparing to update it.
187  *
188  * To put it another way, dev_base_lock is held for writing only to
189  * protect against pure readers; the rtnl semaphore provides the
190  * protection against other writers.
191  *
192  * See, for example usages, register_netdevice() and
193  * unregister_netdevice(), which must be called with the rtnl
194  * semaphore held.
195  */
196 DEFINE_RWLOCK(dev_base_lock);
197 EXPORT_SYMBOL(dev_base_lock);
198 
199 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
200 {
201 	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
202 	return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
203 }
204 
205 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
206 {
207 	return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
208 }
209 
210 /* Device list insertion */
211 static int list_netdevice(struct net_device *dev)
212 {
213 	struct net *net = dev_net(dev);
214 
215 	ASSERT_RTNL();
216 
217 	write_lock_bh(&dev_base_lock);
218 	list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
219 	hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
220 	hlist_add_head_rcu(&dev->index_hlist,
221 			   dev_index_hash(net, dev->ifindex));
222 	write_unlock_bh(&dev_base_lock);
223 	return 0;
224 }
225 
226 /* Device list removal
227  * caller must respect a RCU grace period before freeing/reusing dev
228  */
229 static void unlist_netdevice(struct net_device *dev)
230 {
231 	ASSERT_RTNL();
232 
233 	/* Unlink dev from the device chain */
234 	write_lock_bh(&dev_base_lock);
235 	list_del_rcu(&dev->dev_list);
236 	hlist_del_rcu(&dev->name_hlist);
237 	hlist_del_rcu(&dev->index_hlist);
238 	write_unlock_bh(&dev_base_lock);
239 }
240 
241 /*
242  *	Our notifier list
243  */
244 
245 static RAW_NOTIFIER_HEAD(netdev_chain);
246 
247 /*
248  *	Device drivers call our routines to queue packets here. We empty the
249  *	queue in the local softnet handler.
250  */
251 
252 DEFINE_PER_CPU(struct softnet_data, softnet_data);
253 EXPORT_PER_CPU_SYMBOL(softnet_data);
254 
255 #ifdef CONFIG_LOCKDEP
256 /*
257  * register_netdevice() inits txq->_xmit_lock and sets lockdep class
258  * according to dev->type
259  */
260 static const unsigned short netdev_lock_type[] =
261 	{ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
262 	 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
263 	 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
264 	 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
265 	 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
266 	 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
267 	 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
268 	 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
269 	 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
270 	 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
271 	 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
272 	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
273 	 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
274 	 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
275 	 ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154,
276 	 ARPHRD_VOID, ARPHRD_NONE};
277 
278 static const char *const netdev_lock_name[] =
279 	{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
280 	 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
281 	 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
282 	 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
283 	 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
284 	 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
285 	 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
286 	 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
287 	 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
288 	 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
289 	 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
290 	 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
291 	 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
292 	 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
293 	 "_xmit_PHONET_PIPE", "_xmit_IEEE802154",
294 	 "_xmit_VOID", "_xmit_NONE"};
295 
296 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
297 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
298 
299 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
300 {
301 	int i;
302 
303 	for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
304 		if (netdev_lock_type[i] == dev_type)
305 			return i;
306 	/* the last key is used by default */
307 	return ARRAY_SIZE(netdev_lock_type) - 1;
308 }
309 
310 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
311 						 unsigned short dev_type)
312 {
313 	int i;
314 
315 	i = netdev_lock_pos(dev_type);
316 	lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
317 				   netdev_lock_name[i]);
318 }
319 
320 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
321 {
322 	int i;
323 
324 	i = netdev_lock_pos(dev->type);
325 	lockdep_set_class_and_name(&dev->addr_list_lock,
326 				   &netdev_addr_lock_key[i],
327 				   netdev_lock_name[i]);
328 }
329 #else
330 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
331 						 unsigned short dev_type)
332 {
333 }
334 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
335 {
336 }
337 #endif
338 
339 /*******************************************************************************
340 
341 		Protocol management and registration routines
342 
343 *******************************************************************************/
344 
345 /*
346  *	Add a protocol ID to the list. Now that the input handler is
347  *	smarter we can dispense with all the messy stuff that used to be
348  *	here.
349  *
350  *	BEWARE!!! Protocol handlers, mangling input packets,
351  *	MUST BE last in hash buckets and checking protocol handlers
352  *	MUST start from promiscuous ptype_all chain in net_bh.
353  *	It is true now, do not change it.
354  *	Explanation follows: if protocol handler, mangling packet, will
355  *	be the first on list, it is not able to sense, that packet
356  *	is cloned and should be copied-on-write, so that it will
357  *	change it and subsequent readers will get broken packet.
358  *							--ANK (980803)
359  */
360 
361 /**
362  *	dev_add_pack - add packet handler
363  *	@pt: packet type declaration
364  *
365  *	Add a protocol handler to the networking stack. The passed &packet_type
366  *	is linked into kernel lists and may not be freed until it has been
367  *	removed from the kernel lists.
368  *
369  *	This call does not sleep therefore it can not
370  *	guarantee all CPU's that are in middle of receiving packets
371  *	will see the new packet type (until the next received packet).
372  */
373 
374 void dev_add_pack(struct packet_type *pt)
375 {
376 	int hash;
377 
378 	spin_lock_bh(&ptype_lock);
379 	if (pt->type == htons(ETH_P_ALL))
380 		list_add_rcu(&pt->list, &ptype_all);
381 	else {
382 		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
383 		list_add_rcu(&pt->list, &ptype_base[hash]);
384 	}
385 	spin_unlock_bh(&ptype_lock);
386 }
387 EXPORT_SYMBOL(dev_add_pack);
388 
389 /**
390  *	__dev_remove_pack	 - remove packet handler
391  *	@pt: packet type declaration
392  *
393  *	Remove a protocol handler that was previously added to the kernel
394  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
395  *	from the kernel lists and can be freed or reused once this function
396  *	returns.
397  *
398  *      The packet type might still be in use by receivers
399  *	and must not be freed until after all the CPU's have gone
400  *	through a quiescent state.
401  */
402 void __dev_remove_pack(struct packet_type *pt)
403 {
404 	struct list_head *head;
405 	struct packet_type *pt1;
406 
407 	spin_lock_bh(&ptype_lock);
408 
409 	if (pt->type == htons(ETH_P_ALL))
410 		head = &ptype_all;
411 	else
412 		head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
413 
414 	list_for_each_entry(pt1, head, list) {
415 		if (pt == pt1) {
416 			list_del_rcu(&pt->list);
417 			goto out;
418 		}
419 	}
420 
421 	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
422 out:
423 	spin_unlock_bh(&ptype_lock);
424 }
425 EXPORT_SYMBOL(__dev_remove_pack);
426 
427 /**
428  *	dev_remove_pack	 - remove packet handler
429  *	@pt: packet type declaration
430  *
431  *	Remove a protocol handler that was previously added to the kernel
432  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
433  *	from the kernel lists and can be freed or reused once this function
434  *	returns.
435  *
436  *	This call sleeps to guarantee that no CPU is looking at the packet
437  *	type after return.
438  */
439 void dev_remove_pack(struct packet_type *pt)
440 {
441 	__dev_remove_pack(pt);
442 
443 	synchronize_net();
444 }
445 EXPORT_SYMBOL(dev_remove_pack);
446 
447 /******************************************************************************
448 
449 		      Device Boot-time Settings Routines
450 
451 *******************************************************************************/
452 
453 /* Boot time configuration table */
454 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
455 
456 /**
457  *	netdev_boot_setup_add	- add new setup entry
458  *	@name: name of the device
459  *	@map: configured settings for the device
460  *
461  *	Adds new setup entry to the dev_boot_setup list.  The function
462  *	returns 0 on error and 1 on success.  This is a generic routine to
463  *	all netdevices.
464  */
465 static int netdev_boot_setup_add(char *name, struct ifmap *map)
466 {
467 	struct netdev_boot_setup *s;
468 	int i;
469 
470 	s = dev_boot_setup;
471 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
472 		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
473 			memset(s[i].name, 0, sizeof(s[i].name));
474 			strlcpy(s[i].name, name, IFNAMSIZ);
475 			memcpy(&s[i].map, map, sizeof(s[i].map));
476 			break;
477 		}
478 	}
479 
480 	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
481 }
482 
483 /**
484  *	netdev_boot_setup_check	- check boot time settings
485  *	@dev: the netdevice
486  *
487  * 	Check boot time settings for the device.
488  *	The found settings are set for the device to be used
489  *	later in the device probing.
490  *	Returns 0 if no settings found, 1 if they are.
491  */
492 int netdev_boot_setup_check(struct net_device *dev)
493 {
494 	struct netdev_boot_setup *s = dev_boot_setup;
495 	int i;
496 
497 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
498 		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
499 		    !strcmp(dev->name, s[i].name)) {
500 			dev->irq 	= s[i].map.irq;
501 			dev->base_addr 	= s[i].map.base_addr;
502 			dev->mem_start 	= s[i].map.mem_start;
503 			dev->mem_end 	= s[i].map.mem_end;
504 			return 1;
505 		}
506 	}
507 	return 0;
508 }
509 EXPORT_SYMBOL(netdev_boot_setup_check);
510 
511 
512 /**
513  *	netdev_boot_base	- get address from boot time settings
514  *	@prefix: prefix for network device
515  *	@unit: id for network device
516  *
517  * 	Check boot time settings for the base address of device.
518  *	The found settings are set for the device to be used
519  *	later in the device probing.
520  *	Returns 0 if no settings found.
521  */
522 unsigned long netdev_boot_base(const char *prefix, int unit)
523 {
524 	const struct netdev_boot_setup *s = dev_boot_setup;
525 	char name[IFNAMSIZ];
526 	int i;
527 
528 	sprintf(name, "%s%d", prefix, unit);
529 
530 	/*
531 	 * If device already registered then return base of 1
532 	 * to indicate not to probe for this interface
533 	 */
534 	if (__dev_get_by_name(&init_net, name))
535 		return 1;
536 
537 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
538 		if (!strcmp(name, s[i].name))
539 			return s[i].map.base_addr;
540 	return 0;
541 }
542 
543 /*
544  * Saves at boot time configured settings for any netdevice.
545  */
546 int __init netdev_boot_setup(char *str)
547 {
548 	int ints[5];
549 	struct ifmap map;
550 
551 	str = get_options(str, ARRAY_SIZE(ints), ints);
552 	if (!str || !*str)
553 		return 0;
554 
555 	/* Save settings */
556 	memset(&map, 0, sizeof(map));
557 	if (ints[0] > 0)
558 		map.irq = ints[1];
559 	if (ints[0] > 1)
560 		map.base_addr = ints[2];
561 	if (ints[0] > 2)
562 		map.mem_start = ints[3];
563 	if (ints[0] > 3)
564 		map.mem_end = ints[4];
565 
566 	/* Add new entry to the list */
567 	return netdev_boot_setup_add(str, &map);
568 }
569 
570 __setup("netdev=", netdev_boot_setup);
571 
572 /*******************************************************************************
573 
574 			    Device Interface Subroutines
575 
576 *******************************************************************************/
577 
578 /**
579  *	__dev_get_by_name	- find a device by its name
580  *	@net: the applicable net namespace
581  *	@name: name to find
582  *
583  *	Find an interface by name. Must be called under RTNL semaphore
584  *	or @dev_base_lock. If the name is found a pointer to the device
585  *	is returned. If the name is not found then %NULL is returned. The
586  *	reference counters are not incremented so the caller must be
587  *	careful with locks.
588  */
589 
590 struct net_device *__dev_get_by_name(struct net *net, const char *name)
591 {
592 	struct hlist_node *p;
593 	struct net_device *dev;
594 	struct hlist_head *head = dev_name_hash(net, name);
595 
596 	hlist_for_each_entry(dev, p, head, name_hlist)
597 		if (!strncmp(dev->name, name, IFNAMSIZ))
598 			return dev;
599 
600 	return NULL;
601 }
602 EXPORT_SYMBOL(__dev_get_by_name);
603 
604 /**
605  *	dev_get_by_name_rcu	- find a device by its name
606  *	@net: the applicable net namespace
607  *	@name: name to find
608  *
609  *	Find an interface by name.
610  *	If the name is found a pointer to the device is returned.
611  * 	If the name is not found then %NULL is returned.
612  *	The reference counters are not incremented so the caller must be
613  *	careful with locks. The caller must hold RCU lock.
614  */
615 
616 struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
617 {
618 	struct hlist_node *p;
619 	struct net_device *dev;
620 	struct hlist_head *head = dev_name_hash(net, name);
621 
622 	hlist_for_each_entry_rcu(dev, p, head, name_hlist)
623 		if (!strncmp(dev->name, name, IFNAMSIZ))
624 			return dev;
625 
626 	return NULL;
627 }
628 EXPORT_SYMBOL(dev_get_by_name_rcu);
629 
630 /**
631  *	dev_get_by_name		- find a device by its name
632  *	@net: the applicable net namespace
633  *	@name: name to find
634  *
635  *	Find an interface by name. This can be called from any
636  *	context and does its own locking. The returned handle has
637  *	the usage count incremented and the caller must use dev_put() to
638  *	release it when it is no longer needed. %NULL is returned if no
639  *	matching device is found.
640  */
641 
642 struct net_device *dev_get_by_name(struct net *net, const char *name)
643 {
644 	struct net_device *dev;
645 
646 	rcu_read_lock();
647 	dev = dev_get_by_name_rcu(net, name);
648 	if (dev)
649 		dev_hold(dev);
650 	rcu_read_unlock();
651 	return dev;
652 }
653 EXPORT_SYMBOL(dev_get_by_name);
654 
655 /**
656  *	__dev_get_by_index - find a device by its ifindex
657  *	@net: the applicable net namespace
658  *	@ifindex: index of device
659  *
660  *	Search for an interface by index. Returns %NULL if the device
661  *	is not found or a pointer to the device. The device has not
662  *	had its reference counter increased so the caller must be careful
663  *	about locking. The caller must hold either the RTNL semaphore
664  *	or @dev_base_lock.
665  */
666 
667 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
668 {
669 	struct hlist_node *p;
670 	struct net_device *dev;
671 	struct hlist_head *head = dev_index_hash(net, ifindex);
672 
673 	hlist_for_each_entry(dev, p, head, index_hlist)
674 		if (dev->ifindex == ifindex)
675 			return dev;
676 
677 	return NULL;
678 }
679 EXPORT_SYMBOL(__dev_get_by_index);
680 
681 /**
682  *	dev_get_by_index_rcu - find a device by its ifindex
683  *	@net: the applicable net namespace
684  *	@ifindex: index of device
685  *
686  *	Search for an interface by index. Returns %NULL if the device
687  *	is not found or a pointer to the device. The device has not
688  *	had its reference counter increased so the caller must be careful
689  *	about locking. The caller must hold RCU lock.
690  */
691 
692 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
693 {
694 	struct hlist_node *p;
695 	struct net_device *dev;
696 	struct hlist_head *head = dev_index_hash(net, ifindex);
697 
698 	hlist_for_each_entry_rcu(dev, p, head, index_hlist)
699 		if (dev->ifindex == ifindex)
700 			return dev;
701 
702 	return NULL;
703 }
704 EXPORT_SYMBOL(dev_get_by_index_rcu);
705 
706 
707 /**
708  *	dev_get_by_index - find a device by its ifindex
709  *	@net: the applicable net namespace
710  *	@ifindex: index of device
711  *
712  *	Search for an interface by index. Returns NULL if the device
713  *	is not found or a pointer to the device. The device returned has
714  *	had a reference added and the pointer is safe until the user calls
715  *	dev_put to indicate they have finished with it.
716  */
717 
718 struct net_device *dev_get_by_index(struct net *net, int ifindex)
719 {
720 	struct net_device *dev;
721 
722 	rcu_read_lock();
723 	dev = dev_get_by_index_rcu(net, ifindex);
724 	if (dev)
725 		dev_hold(dev);
726 	rcu_read_unlock();
727 	return dev;
728 }
729 EXPORT_SYMBOL(dev_get_by_index);
730 
731 /**
732  *	dev_getbyhwaddr - find a device by its hardware address
733  *	@net: the applicable net namespace
734  *	@type: media type of device
735  *	@ha: hardware address
736  *
737  *	Search for an interface by MAC address. Returns NULL if the device
738  *	is not found or a pointer to the device. The caller must hold the
739  *	rtnl semaphore. The returned device has not had its ref count increased
740  *	and the caller must therefore be careful about locking
741  *
742  *	BUGS:
743  *	If the API was consistent this would be __dev_get_by_hwaddr
744  */
745 
746 struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
747 {
748 	struct net_device *dev;
749 
750 	ASSERT_RTNL();
751 
752 	for_each_netdev(net, dev)
753 		if (dev->type == type &&
754 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
755 			return dev;
756 
757 	return NULL;
758 }
759 EXPORT_SYMBOL(dev_getbyhwaddr);
760 
761 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
762 {
763 	struct net_device *dev;
764 
765 	ASSERT_RTNL();
766 	for_each_netdev(net, dev)
767 		if (dev->type == type)
768 			return dev;
769 
770 	return NULL;
771 }
772 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
773 
774 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
775 {
776 	struct net_device *dev;
777 
778 	rtnl_lock();
779 	dev = __dev_getfirstbyhwtype(net, type);
780 	if (dev)
781 		dev_hold(dev);
782 	rtnl_unlock();
783 	return dev;
784 }
785 EXPORT_SYMBOL(dev_getfirstbyhwtype);
786 
787 /**
788  *	dev_get_by_flags - find any device with given flags
789  *	@net: the applicable net namespace
790  *	@if_flags: IFF_* values
791  *	@mask: bitmask of bits in if_flags to check
792  *
793  *	Search for any interface with the given flags. Returns NULL if a device
794  *	is not found or a pointer to the device. The device returned has
795  *	had a reference added and the pointer is safe until the user calls
796  *	dev_put to indicate they have finished with it.
797  */
798 
799 struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags,
800 				    unsigned short mask)
801 {
802 	struct net_device *dev, *ret;
803 
804 	ret = NULL;
805 	rcu_read_lock();
806 	for_each_netdev_rcu(net, dev) {
807 		if (((dev->flags ^ if_flags) & mask) == 0) {
808 			dev_hold(dev);
809 			ret = dev;
810 			break;
811 		}
812 	}
813 	rcu_read_unlock();
814 	return ret;
815 }
816 EXPORT_SYMBOL(dev_get_by_flags);
817 
818 /**
819  *	dev_valid_name - check if name is okay for network device
820  *	@name: name string
821  *
822  *	Network device names need to be valid file names to
823  *	to allow sysfs to work.  We also disallow any kind of
824  *	whitespace.
825  */
826 int dev_valid_name(const char *name)
827 {
828 	if (*name == '\0')
829 		return 0;
830 	if (strlen(name) >= IFNAMSIZ)
831 		return 0;
832 	if (!strcmp(name, ".") || !strcmp(name, ".."))
833 		return 0;
834 
835 	while (*name) {
836 		if (*name == '/' || isspace(*name))
837 			return 0;
838 		name++;
839 	}
840 	return 1;
841 }
842 EXPORT_SYMBOL(dev_valid_name);
843 
844 /**
845  *	__dev_alloc_name - allocate a name for a device
846  *	@net: network namespace to allocate the device name in
847  *	@name: name format string
848  *	@buf:  scratch buffer and result name string
849  *
850  *	Passed a format string - eg "lt%d" it will try and find a suitable
851  *	id. It scans list of devices to build up a free map, then chooses
852  *	the first empty slot. The caller must hold the dev_base or rtnl lock
853  *	while allocating the name and adding the device in order to avoid
854  *	duplicates.
855  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
856  *	Returns the number of the unit assigned or a negative errno code.
857  */
858 
859 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
860 {
861 	int i = 0;
862 	const char *p;
863 	const int max_netdevices = 8*PAGE_SIZE;
864 	unsigned long *inuse;
865 	struct net_device *d;
866 
867 	p = strnchr(name, IFNAMSIZ-1, '%');
868 	if (p) {
869 		/*
870 		 * Verify the string as this thing may have come from
871 		 * the user.  There must be either one "%d" and no other "%"
872 		 * characters.
873 		 */
874 		if (p[1] != 'd' || strchr(p + 2, '%'))
875 			return -EINVAL;
876 
877 		/* Use one page as a bit array of possible slots */
878 		inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
879 		if (!inuse)
880 			return -ENOMEM;
881 
882 		for_each_netdev(net, d) {
883 			if (!sscanf(d->name, name, &i))
884 				continue;
885 			if (i < 0 || i >= max_netdevices)
886 				continue;
887 
888 			/*  avoid cases where sscanf is not exact inverse of printf */
889 			snprintf(buf, IFNAMSIZ, name, i);
890 			if (!strncmp(buf, d->name, IFNAMSIZ))
891 				set_bit(i, inuse);
892 		}
893 
894 		i = find_first_zero_bit(inuse, max_netdevices);
895 		free_page((unsigned long) inuse);
896 	}
897 
898 	if (buf != name)
899 		snprintf(buf, IFNAMSIZ, name, i);
900 	if (!__dev_get_by_name(net, buf))
901 		return i;
902 
903 	/* It is possible to run out of possible slots
904 	 * when the name is long and there isn't enough space left
905 	 * for the digits, or if all bits are used.
906 	 */
907 	return -ENFILE;
908 }
909 
910 /**
911  *	dev_alloc_name - allocate a name for a device
912  *	@dev: device
913  *	@name: name format string
914  *
915  *	Passed a format string - eg "lt%d" it will try and find a suitable
916  *	id. It scans list of devices to build up a free map, then chooses
917  *	the first empty slot. The caller must hold the dev_base or rtnl lock
918  *	while allocating the name and adding the device in order to avoid
919  *	duplicates.
920  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
921  *	Returns the number of the unit assigned or a negative errno code.
922  */
923 
924 int dev_alloc_name(struct net_device *dev, const char *name)
925 {
926 	char buf[IFNAMSIZ];
927 	struct net *net;
928 	int ret;
929 
930 	BUG_ON(!dev_net(dev));
931 	net = dev_net(dev);
932 	ret = __dev_alloc_name(net, name, buf);
933 	if (ret >= 0)
934 		strlcpy(dev->name, buf, IFNAMSIZ);
935 	return ret;
936 }
937 EXPORT_SYMBOL(dev_alloc_name);
938 
939 static int dev_get_valid_name(struct net *net, const char *name, char *buf,
940 			      bool fmt)
941 {
942 	if (!dev_valid_name(name))
943 		return -EINVAL;
944 
945 	if (fmt && strchr(name, '%'))
946 		return __dev_alloc_name(net, name, buf);
947 	else if (__dev_get_by_name(net, name))
948 		return -EEXIST;
949 	else if (buf != name)
950 		strlcpy(buf, name, IFNAMSIZ);
951 
952 	return 0;
953 }
954 
955 /**
956  *	dev_change_name - change name of a device
957  *	@dev: device
958  *	@newname: name (or format string) must be at least IFNAMSIZ
959  *
960  *	Change name of a device, can pass format strings "eth%d".
961  *	for wildcarding.
962  */
963 int dev_change_name(struct net_device *dev, const char *newname)
964 {
965 	char oldname[IFNAMSIZ];
966 	int err = 0;
967 	int ret;
968 	struct net *net;
969 
970 	ASSERT_RTNL();
971 	BUG_ON(!dev_net(dev));
972 
973 	net = dev_net(dev);
974 	if (dev->flags & IFF_UP)
975 		return -EBUSY;
976 
977 	if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
978 		return 0;
979 
980 	memcpy(oldname, dev->name, IFNAMSIZ);
981 
982 	err = dev_get_valid_name(net, newname, dev->name, 1);
983 	if (err < 0)
984 		return err;
985 
986 rollback:
987 	/* For now only devices in the initial network namespace
988 	 * are in sysfs.
989 	 */
990 	if (net_eq(net, &init_net)) {
991 		ret = device_rename(&dev->dev, dev->name);
992 		if (ret) {
993 			memcpy(dev->name, oldname, IFNAMSIZ);
994 			return ret;
995 		}
996 	}
997 
998 	write_lock_bh(&dev_base_lock);
999 	hlist_del(&dev->name_hlist);
1000 	write_unlock_bh(&dev_base_lock);
1001 
1002 	synchronize_rcu();
1003 
1004 	write_lock_bh(&dev_base_lock);
1005 	hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
1006 	write_unlock_bh(&dev_base_lock);
1007 
1008 	ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
1009 	ret = notifier_to_errno(ret);
1010 
1011 	if (ret) {
1012 		/* err >= 0 after dev_alloc_name() or stores the first errno */
1013 		if (err >= 0) {
1014 			err = ret;
1015 			memcpy(dev->name, oldname, IFNAMSIZ);
1016 			goto rollback;
1017 		} else {
1018 			printk(KERN_ERR
1019 			       "%s: name change rollback failed: %d.\n",
1020 			       dev->name, ret);
1021 		}
1022 	}
1023 
1024 	return err;
1025 }
1026 
1027 /**
1028  *	dev_set_alias - change ifalias of a device
1029  *	@dev: device
1030  *	@alias: name up to IFALIASZ
1031  *	@len: limit of bytes to copy from info
1032  *
1033  *	Set ifalias for a device,
1034  */
1035 int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1036 {
1037 	ASSERT_RTNL();
1038 
1039 	if (len >= IFALIASZ)
1040 		return -EINVAL;
1041 
1042 	if (!len) {
1043 		if (dev->ifalias) {
1044 			kfree(dev->ifalias);
1045 			dev->ifalias = NULL;
1046 		}
1047 		return 0;
1048 	}
1049 
1050 	dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
1051 	if (!dev->ifalias)
1052 		return -ENOMEM;
1053 
1054 	strlcpy(dev->ifalias, alias, len+1);
1055 	return len;
1056 }
1057 
1058 
1059 /**
1060  *	netdev_features_change - device changes features
1061  *	@dev: device to cause notification
1062  *
1063  *	Called to indicate a device has changed features.
1064  */
1065 void netdev_features_change(struct net_device *dev)
1066 {
1067 	call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
1068 }
1069 EXPORT_SYMBOL(netdev_features_change);
1070 
1071 /**
1072  *	netdev_state_change - device changes state
1073  *	@dev: device to cause notification
1074  *
1075  *	Called to indicate a device has changed state. This function calls
1076  *	the notifier chains for netdev_chain and sends a NEWLINK message
1077  *	to the routing socket.
1078  */
1079 void netdev_state_change(struct net_device *dev)
1080 {
1081 	if (dev->flags & IFF_UP) {
1082 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
1083 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1084 	}
1085 }
1086 EXPORT_SYMBOL(netdev_state_change);
1087 
1088 void netdev_bonding_change(struct net_device *dev, unsigned long event)
1089 {
1090 	call_netdevice_notifiers(event, dev);
1091 }
1092 EXPORT_SYMBOL(netdev_bonding_change);
1093 
1094 /**
1095  *	dev_load 	- load a network module
1096  *	@net: the applicable net namespace
1097  *	@name: name of interface
1098  *
1099  *	If a network interface is not present and the process has suitable
1100  *	privileges this function loads the module. If module loading is not
1101  *	available in this kernel then it becomes a nop.
1102  */
1103 
1104 void dev_load(struct net *net, const char *name)
1105 {
1106 	struct net_device *dev;
1107 
1108 	rcu_read_lock();
1109 	dev = dev_get_by_name_rcu(net, name);
1110 	rcu_read_unlock();
1111 
1112 	if (!dev && capable(CAP_NET_ADMIN))
1113 		request_module("%s", name);
1114 }
1115 EXPORT_SYMBOL(dev_load);
1116 
1117 static int __dev_open(struct net_device *dev)
1118 {
1119 	const struct net_device_ops *ops = dev->netdev_ops;
1120 	int ret;
1121 
1122 	ASSERT_RTNL();
1123 
1124 	/*
1125 	 *	Is it even present?
1126 	 */
1127 	if (!netif_device_present(dev))
1128 		return -ENODEV;
1129 
1130 	ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1131 	ret = notifier_to_errno(ret);
1132 	if (ret)
1133 		return ret;
1134 
1135 	/*
1136 	 *	Call device private open method
1137 	 */
1138 	set_bit(__LINK_STATE_START, &dev->state);
1139 
1140 	if (ops->ndo_validate_addr)
1141 		ret = ops->ndo_validate_addr(dev);
1142 
1143 	if (!ret && ops->ndo_open)
1144 		ret = ops->ndo_open(dev);
1145 
1146 	/*
1147 	 *	If it went open OK then:
1148 	 */
1149 
1150 	if (ret)
1151 		clear_bit(__LINK_STATE_START, &dev->state);
1152 	else {
1153 		/*
1154 		 *	Set the flags.
1155 		 */
1156 		dev->flags |= IFF_UP;
1157 
1158 		/*
1159 		 *	Enable NET_DMA
1160 		 */
1161 		net_dmaengine_get();
1162 
1163 		/*
1164 		 *	Initialize multicasting status
1165 		 */
1166 		dev_set_rx_mode(dev);
1167 
1168 		/*
1169 		 *	Wakeup transmit queue engine
1170 		 */
1171 		dev_activate(dev);
1172 	}
1173 
1174 	return ret;
1175 }
1176 
1177 /**
1178  *	dev_open	- prepare an interface for use.
1179  *	@dev:	device to open
1180  *
1181  *	Takes a device from down to up state. The device's private open
1182  *	function is invoked and then the multicast lists are loaded. Finally
1183  *	the device is moved into the up state and a %NETDEV_UP message is
1184  *	sent to the netdev notifier chain.
1185  *
1186  *	Calling this function on an active interface is a nop. On a failure
1187  *	a negative errno code is returned.
1188  */
1189 int dev_open(struct net_device *dev)
1190 {
1191 	int ret;
1192 
1193 	/*
1194 	 *	Is it already up?
1195 	 */
1196 	if (dev->flags & IFF_UP)
1197 		return 0;
1198 
1199 	/*
1200 	 *	Open device
1201 	 */
1202 	ret = __dev_open(dev);
1203 	if (ret < 0)
1204 		return ret;
1205 
1206 	/*
1207 	 *	... and announce new interface.
1208 	 */
1209 	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1210 	call_netdevice_notifiers(NETDEV_UP, dev);
1211 
1212 	return ret;
1213 }
1214 EXPORT_SYMBOL(dev_open);
1215 
1216 static int __dev_close(struct net_device *dev)
1217 {
1218 	const struct net_device_ops *ops = dev->netdev_ops;
1219 
1220 	ASSERT_RTNL();
1221 	might_sleep();
1222 
1223 	/*
1224 	 *	Tell people we are going down, so that they can
1225 	 *	prepare to death, when device is still operating.
1226 	 */
1227 	call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1228 
1229 	clear_bit(__LINK_STATE_START, &dev->state);
1230 
1231 	/* Synchronize to scheduled poll. We cannot touch poll list,
1232 	 * it can be even on different cpu. So just clear netif_running().
1233 	 *
1234 	 * dev->stop() will invoke napi_disable() on all of it's
1235 	 * napi_struct instances on this device.
1236 	 */
1237 	smp_mb__after_clear_bit(); /* Commit netif_running(). */
1238 
1239 	dev_deactivate(dev);
1240 
1241 	/*
1242 	 *	Call the device specific close. This cannot fail.
1243 	 *	Only if device is UP
1244 	 *
1245 	 *	We allow it to be called even after a DETACH hot-plug
1246 	 *	event.
1247 	 */
1248 	if (ops->ndo_stop)
1249 		ops->ndo_stop(dev);
1250 
1251 	/*
1252 	 *	Device is now down.
1253 	 */
1254 
1255 	dev->flags &= ~IFF_UP;
1256 
1257 	/*
1258 	 *	Shutdown NET_DMA
1259 	 */
1260 	net_dmaengine_put();
1261 
1262 	return 0;
1263 }
1264 
1265 /**
1266  *	dev_close - shutdown an interface.
1267  *	@dev: device to shutdown
1268  *
1269  *	This function moves an active device into down state. A
1270  *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1271  *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1272  *	chain.
1273  */
1274 int dev_close(struct net_device *dev)
1275 {
1276 	if (!(dev->flags & IFF_UP))
1277 		return 0;
1278 
1279 	__dev_close(dev);
1280 
1281 	/*
1282 	 * Tell people we are down
1283 	 */
1284 	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1285 	call_netdevice_notifiers(NETDEV_DOWN, dev);
1286 
1287 	return 0;
1288 }
1289 EXPORT_SYMBOL(dev_close);
1290 
1291 
1292 /**
1293  *	dev_disable_lro - disable Large Receive Offload on a device
1294  *	@dev: device
1295  *
1296  *	Disable Large Receive Offload (LRO) on a net device.  Must be
1297  *	called under RTNL.  This is needed if received packets may be
1298  *	forwarded to another interface.
1299  */
1300 void dev_disable_lro(struct net_device *dev)
1301 {
1302 	if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1303 	    dev->ethtool_ops->set_flags) {
1304 		u32 flags = dev->ethtool_ops->get_flags(dev);
1305 		if (flags & ETH_FLAG_LRO) {
1306 			flags &= ~ETH_FLAG_LRO;
1307 			dev->ethtool_ops->set_flags(dev, flags);
1308 		}
1309 	}
1310 	WARN_ON(dev->features & NETIF_F_LRO);
1311 }
1312 EXPORT_SYMBOL(dev_disable_lro);
1313 
1314 
1315 static int dev_boot_phase = 1;
1316 
1317 /*
1318  *	Device change register/unregister. These are not inline or static
1319  *	as we export them to the world.
1320  */
1321 
1322 /**
1323  *	register_netdevice_notifier - register a network notifier block
1324  *	@nb: notifier
1325  *
1326  *	Register a notifier to be called when network device events occur.
1327  *	The notifier passed is linked into the kernel structures and must
1328  *	not be reused until it has been unregistered. A negative errno code
1329  *	is returned on a failure.
1330  *
1331  * 	When registered all registration and up events are replayed
1332  *	to the new notifier to allow device to have a race free
1333  *	view of the network device list.
1334  */
1335 
1336 int register_netdevice_notifier(struct notifier_block *nb)
1337 {
1338 	struct net_device *dev;
1339 	struct net_device *last;
1340 	struct net *net;
1341 	int err;
1342 
1343 	rtnl_lock();
1344 	err = raw_notifier_chain_register(&netdev_chain, nb);
1345 	if (err)
1346 		goto unlock;
1347 	if (dev_boot_phase)
1348 		goto unlock;
1349 	for_each_net(net) {
1350 		for_each_netdev(net, dev) {
1351 			err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1352 			err = notifier_to_errno(err);
1353 			if (err)
1354 				goto rollback;
1355 
1356 			if (!(dev->flags & IFF_UP))
1357 				continue;
1358 
1359 			nb->notifier_call(nb, NETDEV_UP, dev);
1360 		}
1361 	}
1362 
1363 unlock:
1364 	rtnl_unlock();
1365 	return err;
1366 
1367 rollback:
1368 	last = dev;
1369 	for_each_net(net) {
1370 		for_each_netdev(net, dev) {
1371 			if (dev == last)
1372 				break;
1373 
1374 			if (dev->flags & IFF_UP) {
1375 				nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1376 				nb->notifier_call(nb, NETDEV_DOWN, dev);
1377 			}
1378 			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1379 			nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
1380 		}
1381 	}
1382 
1383 	raw_notifier_chain_unregister(&netdev_chain, nb);
1384 	goto unlock;
1385 }
1386 EXPORT_SYMBOL(register_netdevice_notifier);
1387 
1388 /**
1389  *	unregister_netdevice_notifier - unregister a network notifier block
1390  *	@nb: notifier
1391  *
1392  *	Unregister a notifier previously registered by
1393  *	register_netdevice_notifier(). The notifier is unlinked into the
1394  *	kernel structures and may then be reused. A negative errno code
1395  *	is returned on a failure.
1396  */
1397 
1398 int unregister_netdevice_notifier(struct notifier_block *nb)
1399 {
1400 	int err;
1401 
1402 	rtnl_lock();
1403 	err = raw_notifier_chain_unregister(&netdev_chain, nb);
1404 	rtnl_unlock();
1405 	return err;
1406 }
1407 EXPORT_SYMBOL(unregister_netdevice_notifier);
1408 
1409 /**
1410  *	call_netdevice_notifiers - call all network notifier blocks
1411  *      @val: value passed unmodified to notifier function
1412  *      @dev: net_device pointer passed unmodified to notifier function
1413  *
1414  *	Call all network notifier blocks.  Parameters and return value
1415  *	are as for raw_notifier_call_chain().
1416  */
1417 
1418 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1419 {
1420 	return raw_notifier_call_chain(&netdev_chain, val, dev);
1421 }
1422 
1423 /* When > 0 there are consumers of rx skb time stamps */
1424 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1425 
1426 void net_enable_timestamp(void)
1427 {
1428 	atomic_inc(&netstamp_needed);
1429 }
1430 EXPORT_SYMBOL(net_enable_timestamp);
1431 
1432 void net_disable_timestamp(void)
1433 {
1434 	atomic_dec(&netstamp_needed);
1435 }
1436 EXPORT_SYMBOL(net_disable_timestamp);
1437 
1438 static inline void net_timestamp(struct sk_buff *skb)
1439 {
1440 	if (atomic_read(&netstamp_needed))
1441 		__net_timestamp(skb);
1442 	else
1443 		skb->tstamp.tv64 = 0;
1444 }
1445 
1446 /**
1447  * dev_forward_skb - loopback an skb to another netif
1448  *
1449  * @dev: destination network device
1450  * @skb: buffer to forward
1451  *
1452  * return values:
1453  *	NET_RX_SUCCESS	(no congestion)
1454  *	NET_RX_DROP     (packet was dropped)
1455  *
1456  * dev_forward_skb can be used for injecting an skb from the
1457  * start_xmit function of one device into the receive queue
1458  * of another device.
1459  *
1460  * The receiving device may be in another namespace, so
1461  * we have to clear all information in the skb that could
1462  * impact namespace isolation.
1463  */
1464 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1465 {
1466 	skb_orphan(skb);
1467 
1468 	if (!(dev->flags & IFF_UP))
1469 		return NET_RX_DROP;
1470 
1471 	if (skb->len > (dev->mtu + dev->hard_header_len))
1472 		return NET_RX_DROP;
1473 
1474 	skb_set_dev(skb, dev);
1475 	skb->tstamp.tv64 = 0;
1476 	skb->pkt_type = PACKET_HOST;
1477 	skb->protocol = eth_type_trans(skb, dev);
1478 	return netif_rx(skb);
1479 }
1480 EXPORT_SYMBOL_GPL(dev_forward_skb);
1481 
1482 /*
1483  *	Support routine. Sends outgoing frames to any network
1484  *	taps currently in use.
1485  */
1486 
1487 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1488 {
1489 	struct packet_type *ptype;
1490 
1491 #ifdef CONFIG_NET_CLS_ACT
1492 	if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
1493 		net_timestamp(skb);
1494 #else
1495 	net_timestamp(skb);
1496 #endif
1497 
1498 	rcu_read_lock();
1499 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1500 		/* Never send packets back to the socket
1501 		 * they originated from - MvS (miquels@drinkel.ow.org)
1502 		 */
1503 		if ((ptype->dev == dev || !ptype->dev) &&
1504 		    (ptype->af_packet_priv == NULL ||
1505 		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
1506 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1507 			if (!skb2)
1508 				break;
1509 
1510 			/* skb->nh should be correctly
1511 			   set by sender, so that the second statement is
1512 			   just protection against buggy protocols.
1513 			 */
1514 			skb_reset_mac_header(skb2);
1515 
1516 			if (skb_network_header(skb2) < skb2->data ||
1517 			    skb2->network_header > skb2->tail) {
1518 				if (net_ratelimit())
1519 					printk(KERN_CRIT "protocol %04x is "
1520 					       "buggy, dev %s\n",
1521 					       skb2->protocol, dev->name);
1522 				skb_reset_network_header(skb2);
1523 			}
1524 
1525 			skb2->transport_header = skb2->network_header;
1526 			skb2->pkt_type = PACKET_OUTGOING;
1527 			ptype->func(skb2, skb->dev, ptype, skb->dev);
1528 		}
1529 	}
1530 	rcu_read_unlock();
1531 }
1532 
1533 
1534 static inline void __netif_reschedule(struct Qdisc *q)
1535 {
1536 	struct softnet_data *sd;
1537 	unsigned long flags;
1538 
1539 	local_irq_save(flags);
1540 	sd = &__get_cpu_var(softnet_data);
1541 	q->next_sched = sd->output_queue;
1542 	sd->output_queue = q;
1543 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
1544 	local_irq_restore(flags);
1545 }
1546 
1547 void __netif_schedule(struct Qdisc *q)
1548 {
1549 	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1550 		__netif_reschedule(q);
1551 }
1552 EXPORT_SYMBOL(__netif_schedule);
1553 
1554 void dev_kfree_skb_irq(struct sk_buff *skb)
1555 {
1556 	if (atomic_dec_and_test(&skb->users)) {
1557 		struct softnet_data *sd;
1558 		unsigned long flags;
1559 
1560 		local_irq_save(flags);
1561 		sd = &__get_cpu_var(softnet_data);
1562 		skb->next = sd->completion_queue;
1563 		sd->completion_queue = skb;
1564 		raise_softirq_irqoff(NET_TX_SOFTIRQ);
1565 		local_irq_restore(flags);
1566 	}
1567 }
1568 EXPORT_SYMBOL(dev_kfree_skb_irq);
1569 
1570 void dev_kfree_skb_any(struct sk_buff *skb)
1571 {
1572 	if (in_irq() || irqs_disabled())
1573 		dev_kfree_skb_irq(skb);
1574 	else
1575 		dev_kfree_skb(skb);
1576 }
1577 EXPORT_SYMBOL(dev_kfree_skb_any);
1578 
1579 
1580 /**
1581  * netif_device_detach - mark device as removed
1582  * @dev: network device
1583  *
1584  * Mark device as removed from system and therefore no longer available.
1585  */
1586 void netif_device_detach(struct net_device *dev)
1587 {
1588 	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1589 	    netif_running(dev)) {
1590 		netif_tx_stop_all_queues(dev);
1591 	}
1592 }
1593 EXPORT_SYMBOL(netif_device_detach);
1594 
1595 /**
1596  * netif_device_attach - mark device as attached
1597  * @dev: network device
1598  *
1599  * Mark device as attached from system and restart if needed.
1600  */
1601 void netif_device_attach(struct net_device *dev)
1602 {
1603 	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1604 	    netif_running(dev)) {
1605 		netif_tx_wake_all_queues(dev);
1606 		__netdev_watchdog_up(dev);
1607 	}
1608 }
1609 EXPORT_SYMBOL(netif_device_attach);
1610 
1611 static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1612 {
1613 	return ((features & NETIF_F_GEN_CSUM) ||
1614 		((features & NETIF_F_IP_CSUM) &&
1615 		 protocol == htons(ETH_P_IP)) ||
1616 		((features & NETIF_F_IPV6_CSUM) &&
1617 		 protocol == htons(ETH_P_IPV6)) ||
1618 		((features & NETIF_F_FCOE_CRC) &&
1619 		 protocol == htons(ETH_P_FCOE)));
1620 }
1621 
1622 static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1623 {
1624 	if (can_checksum_protocol(dev->features, skb->protocol))
1625 		return true;
1626 
1627 	if (skb->protocol == htons(ETH_P_8021Q)) {
1628 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1629 		if (can_checksum_protocol(dev->features & dev->vlan_features,
1630 					  veh->h_vlan_encapsulated_proto))
1631 			return true;
1632 	}
1633 
1634 	return false;
1635 }
1636 
1637 /**
1638  * skb_dev_set -- assign a new device to a buffer
1639  * @skb: buffer for the new device
1640  * @dev: network device
1641  *
1642  * If an skb is owned by a device already, we have to reset
1643  * all data private to the namespace a device belongs to
1644  * before assigning it a new device.
1645  */
1646 #ifdef CONFIG_NET_NS
1647 void skb_set_dev(struct sk_buff *skb, struct net_device *dev)
1648 {
1649 	skb_dst_drop(skb);
1650 	if (skb->dev && !net_eq(dev_net(skb->dev), dev_net(dev))) {
1651 		secpath_reset(skb);
1652 		nf_reset(skb);
1653 		skb_init_secmark(skb);
1654 		skb->mark = 0;
1655 		skb->priority = 0;
1656 		skb->nf_trace = 0;
1657 		skb->ipvs_property = 0;
1658 #ifdef CONFIG_NET_SCHED
1659 		skb->tc_index = 0;
1660 #endif
1661 	}
1662 	skb->dev = dev;
1663 }
1664 EXPORT_SYMBOL(skb_set_dev);
1665 #endif /* CONFIG_NET_NS */
1666 
1667 /*
1668  * Invalidate hardware checksum when packet is to be mangled, and
1669  * complete checksum manually on outgoing path.
1670  */
1671 int skb_checksum_help(struct sk_buff *skb)
1672 {
1673 	__wsum csum;
1674 	int ret = 0, offset;
1675 
1676 	if (skb->ip_summed == CHECKSUM_COMPLETE)
1677 		goto out_set_summed;
1678 
1679 	if (unlikely(skb_shinfo(skb)->gso_size)) {
1680 		/* Let GSO fix up the checksum. */
1681 		goto out_set_summed;
1682 	}
1683 
1684 	offset = skb->csum_start - skb_headroom(skb);
1685 	BUG_ON(offset >= skb_headlen(skb));
1686 	csum = skb_checksum(skb, offset, skb->len - offset, 0);
1687 
1688 	offset += skb->csum_offset;
1689 	BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1690 
1691 	if (skb_cloned(skb) &&
1692 	    !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1693 		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1694 		if (ret)
1695 			goto out;
1696 	}
1697 
1698 	*(__sum16 *)(skb->data + offset) = csum_fold(csum);
1699 out_set_summed:
1700 	skb->ip_summed = CHECKSUM_NONE;
1701 out:
1702 	return ret;
1703 }
1704 EXPORT_SYMBOL(skb_checksum_help);
1705 
1706 /**
1707  *	skb_gso_segment - Perform segmentation on skb.
1708  *	@skb: buffer to segment
1709  *	@features: features for the output path (see dev->features)
1710  *
1711  *	This function segments the given skb and returns a list of segments.
1712  *
1713  *	It may return NULL if the skb requires no segmentation.  This is
1714  *	only possible when GSO is used for verifying header integrity.
1715  */
1716 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1717 {
1718 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1719 	struct packet_type *ptype;
1720 	__be16 type = skb->protocol;
1721 	int err;
1722 
1723 	skb_reset_mac_header(skb);
1724 	skb->mac_len = skb->network_header - skb->mac_header;
1725 	__skb_pull(skb, skb->mac_len);
1726 
1727 	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1728 		struct net_device *dev = skb->dev;
1729 		struct ethtool_drvinfo info = {};
1730 
1731 		if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
1732 			dev->ethtool_ops->get_drvinfo(dev, &info);
1733 
1734 		WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d "
1735 			"ip_summed=%d",
1736 		     info.driver, dev ? dev->features : 0L,
1737 		     skb->sk ? skb->sk->sk_route_caps : 0L,
1738 		     skb->len, skb->data_len, skb->ip_summed);
1739 
1740 		if (skb_header_cloned(skb) &&
1741 		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1742 			return ERR_PTR(err);
1743 	}
1744 
1745 	rcu_read_lock();
1746 	list_for_each_entry_rcu(ptype,
1747 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
1748 		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1749 			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1750 				err = ptype->gso_send_check(skb);
1751 				segs = ERR_PTR(err);
1752 				if (err || skb_gso_ok(skb, features))
1753 					break;
1754 				__skb_push(skb, (skb->data -
1755 						 skb_network_header(skb)));
1756 			}
1757 			segs = ptype->gso_segment(skb, features);
1758 			break;
1759 		}
1760 	}
1761 	rcu_read_unlock();
1762 
1763 	__skb_push(skb, skb->data - skb_mac_header(skb));
1764 
1765 	return segs;
1766 }
1767 EXPORT_SYMBOL(skb_gso_segment);
1768 
1769 /* Take action when hardware reception checksum errors are detected. */
1770 #ifdef CONFIG_BUG
1771 void netdev_rx_csum_fault(struct net_device *dev)
1772 {
1773 	if (net_ratelimit()) {
1774 		printk(KERN_ERR "%s: hw csum failure.\n",
1775 			dev ? dev->name : "<unknown>");
1776 		dump_stack();
1777 	}
1778 }
1779 EXPORT_SYMBOL(netdev_rx_csum_fault);
1780 #endif
1781 
1782 /* Actually, we should eliminate this check as soon as we know, that:
1783  * 1. IOMMU is present and allows to map all the memory.
1784  * 2. No high memory really exists on this machine.
1785  */
1786 
1787 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1788 {
1789 #ifdef CONFIG_HIGHMEM
1790 	int i;
1791 
1792 	if (dev->features & NETIF_F_HIGHDMA)
1793 		return 0;
1794 
1795 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1796 		if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1797 			return 1;
1798 
1799 #endif
1800 	return 0;
1801 }
1802 
1803 struct dev_gso_cb {
1804 	void (*destructor)(struct sk_buff *skb);
1805 };
1806 
1807 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1808 
1809 static void dev_gso_skb_destructor(struct sk_buff *skb)
1810 {
1811 	struct dev_gso_cb *cb;
1812 
1813 	do {
1814 		struct sk_buff *nskb = skb->next;
1815 
1816 		skb->next = nskb->next;
1817 		nskb->next = NULL;
1818 		kfree_skb(nskb);
1819 	} while (skb->next);
1820 
1821 	cb = DEV_GSO_CB(skb);
1822 	if (cb->destructor)
1823 		cb->destructor(skb);
1824 }
1825 
1826 /**
1827  *	dev_gso_segment - Perform emulated hardware segmentation on skb.
1828  *	@skb: buffer to segment
1829  *
1830  *	This function segments the given skb and stores the list of segments
1831  *	in skb->next.
1832  */
1833 static int dev_gso_segment(struct sk_buff *skb)
1834 {
1835 	struct net_device *dev = skb->dev;
1836 	struct sk_buff *segs;
1837 	int features = dev->features & ~(illegal_highdma(dev, skb) ?
1838 					 NETIF_F_SG : 0);
1839 
1840 	segs = skb_gso_segment(skb, features);
1841 
1842 	/* Verifying header integrity only. */
1843 	if (!segs)
1844 		return 0;
1845 
1846 	if (IS_ERR(segs))
1847 		return PTR_ERR(segs);
1848 
1849 	skb->next = segs;
1850 	DEV_GSO_CB(skb)->destructor = skb->destructor;
1851 	skb->destructor = dev_gso_skb_destructor;
1852 
1853 	return 0;
1854 }
1855 
1856 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1857 			struct netdev_queue *txq)
1858 {
1859 	const struct net_device_ops *ops = dev->netdev_ops;
1860 	int rc = NETDEV_TX_OK;
1861 
1862 	if (likely(!skb->next)) {
1863 		if (!list_empty(&ptype_all))
1864 			dev_queue_xmit_nit(skb, dev);
1865 
1866 		if (netif_needs_gso(dev, skb)) {
1867 			if (unlikely(dev_gso_segment(skb)))
1868 				goto out_kfree_skb;
1869 			if (skb->next)
1870 				goto gso;
1871 		}
1872 
1873 		/*
1874 		 * If device doesnt need skb->dst, release it right now while
1875 		 * its hot in this cpu cache
1876 		 */
1877 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
1878 			skb_dst_drop(skb);
1879 
1880 		rc = ops->ndo_start_xmit(skb, dev);
1881 		if (rc == NETDEV_TX_OK)
1882 			txq_trans_update(txq);
1883 		/*
1884 		 * TODO: if skb_orphan() was called by
1885 		 * dev->hard_start_xmit() (for example, the unmodified
1886 		 * igb driver does that; bnx2 doesn't), then
1887 		 * skb_tx_software_timestamp() will be unable to send
1888 		 * back the time stamp.
1889 		 *
1890 		 * How can this be prevented? Always create another
1891 		 * reference to the socket before calling
1892 		 * dev->hard_start_xmit()? Prevent that skb_orphan()
1893 		 * does anything in dev->hard_start_xmit() by clearing
1894 		 * the skb destructor before the call and restoring it
1895 		 * afterwards, then doing the skb_orphan() ourselves?
1896 		 */
1897 		return rc;
1898 	}
1899 
1900 gso:
1901 	do {
1902 		struct sk_buff *nskb = skb->next;
1903 
1904 		skb->next = nskb->next;
1905 		nskb->next = NULL;
1906 
1907 		/*
1908 		 * If device doesnt need nskb->dst, release it right now while
1909 		 * its hot in this cpu cache
1910 		 */
1911 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
1912 			skb_dst_drop(nskb);
1913 
1914 		rc = ops->ndo_start_xmit(nskb, dev);
1915 		if (unlikely(rc != NETDEV_TX_OK)) {
1916 			if (rc & ~NETDEV_TX_MASK)
1917 				goto out_kfree_gso_skb;
1918 			nskb->next = skb->next;
1919 			skb->next = nskb;
1920 			return rc;
1921 		}
1922 		txq_trans_update(txq);
1923 		if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
1924 			return NETDEV_TX_BUSY;
1925 	} while (skb->next);
1926 
1927 out_kfree_gso_skb:
1928 	if (likely(skb->next == NULL))
1929 		skb->destructor = DEV_GSO_CB(skb)->destructor;
1930 out_kfree_skb:
1931 	kfree_skb(skb);
1932 	return rc;
1933 }
1934 
1935 static u32 skb_tx_hashrnd;
1936 
1937 u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
1938 {
1939 	u32 hash;
1940 
1941 	if (skb_rx_queue_recorded(skb)) {
1942 		hash = skb_get_rx_queue(skb);
1943 		while (unlikely(hash >= dev->real_num_tx_queues))
1944 			hash -= dev->real_num_tx_queues;
1945 		return hash;
1946 	}
1947 
1948 	if (skb->sk && skb->sk->sk_hash)
1949 		hash = skb->sk->sk_hash;
1950 	else
1951 		hash = skb->protocol;
1952 
1953 	hash = jhash_1word(hash, skb_tx_hashrnd);
1954 
1955 	return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
1956 }
1957 EXPORT_SYMBOL(skb_tx_hash);
1958 
1959 static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
1960 {
1961 	if (unlikely(queue_index >= dev->real_num_tx_queues)) {
1962 		if (net_ratelimit()) {
1963 			WARN(1, "%s selects TX queue %d, but "
1964 			     "real number of TX queues is %d\n",
1965 			     dev->name, queue_index,
1966 			     dev->real_num_tx_queues);
1967 		}
1968 		return 0;
1969 	}
1970 	return queue_index;
1971 }
1972 
1973 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1974 					struct sk_buff *skb)
1975 {
1976 	u16 queue_index;
1977 	struct sock *sk = skb->sk;
1978 
1979 	if (sk_tx_queue_recorded(sk)) {
1980 		queue_index = sk_tx_queue_get(sk);
1981 	} else {
1982 		const struct net_device_ops *ops = dev->netdev_ops;
1983 
1984 		if (ops->ndo_select_queue) {
1985 			queue_index = ops->ndo_select_queue(dev, skb);
1986 			queue_index = dev_cap_txqueue(dev, queue_index);
1987 		} else {
1988 			queue_index = 0;
1989 			if (dev->real_num_tx_queues > 1)
1990 				queue_index = skb_tx_hash(dev, skb);
1991 
1992 			if (sk) {
1993 				struct dst_entry *dst = rcu_dereference(sk->sk_dst_cache);
1994 
1995 				if (dst && skb_dst(skb) == dst)
1996 					sk_tx_queue_set(sk, queue_index);
1997 			}
1998 		}
1999 	}
2000 
2001 	skb_set_queue_mapping(skb, queue_index);
2002 	return netdev_get_tx_queue(dev, queue_index);
2003 }
2004 
2005 static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2006 				 struct net_device *dev,
2007 				 struct netdev_queue *txq)
2008 {
2009 	spinlock_t *root_lock = qdisc_lock(q);
2010 	int rc;
2011 
2012 	spin_lock(root_lock);
2013 	if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
2014 		kfree_skb(skb);
2015 		rc = NET_XMIT_DROP;
2016 	} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
2017 		   !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) {
2018 		/*
2019 		 * This is a work-conserving queue; there are no old skbs
2020 		 * waiting to be sent out; and the qdisc is not running -
2021 		 * xmit the skb directly.
2022 		 */
2023 		__qdisc_update_bstats(q, skb->len);
2024 		if (sch_direct_xmit(skb, q, dev, txq, root_lock))
2025 			__qdisc_run(q);
2026 		else
2027 			clear_bit(__QDISC_STATE_RUNNING, &q->state);
2028 
2029 		rc = NET_XMIT_SUCCESS;
2030 	} else {
2031 		rc = qdisc_enqueue_root(skb, q);
2032 		qdisc_run(q);
2033 	}
2034 	spin_unlock(root_lock);
2035 
2036 	return rc;
2037 }
2038 
2039 /*
2040  * Returns true if either:
2041  *	1. skb has frag_list and the device doesn't support FRAGLIST, or
2042  *	2. skb is fragmented and the device does not support SG, or if
2043  *	   at least one of fragments is in highmem and device does not
2044  *	   support DMA from it.
2045  */
2046 static inline int skb_needs_linearize(struct sk_buff *skb,
2047 				      struct net_device *dev)
2048 {
2049 	return (skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
2050 	       (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
2051 					      illegal_highdma(dev, skb)));
2052 }
2053 
2054 /**
2055  *	dev_queue_xmit - transmit a buffer
2056  *	@skb: buffer to transmit
2057  *
2058  *	Queue a buffer for transmission to a network device. The caller must
2059  *	have set the device and priority and built the buffer before calling
2060  *	this function. The function can be called from an interrupt.
2061  *
2062  *	A negative errno code is returned on a failure. A success does not
2063  *	guarantee the frame will be transmitted as it may be dropped due
2064  *	to congestion or traffic shaping.
2065  *
2066  * -----------------------------------------------------------------------------------
2067  *      I notice this method can also return errors from the queue disciplines,
2068  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
2069  *      be positive.
2070  *
2071  *      Regardless of the return value, the skb is consumed, so it is currently
2072  *      difficult to retry a send to this method.  (You can bump the ref count
2073  *      before sending to hold a reference for retry if you are careful.)
2074  *
2075  *      When calling this method, interrupts MUST be enabled.  This is because
2076  *      the BH enable code must have IRQs enabled so that it will not deadlock.
2077  *          --BLG
2078  */
2079 int dev_queue_xmit(struct sk_buff *skb)
2080 {
2081 	struct net_device *dev = skb->dev;
2082 	struct netdev_queue *txq;
2083 	struct Qdisc *q;
2084 	int rc = -ENOMEM;
2085 
2086 	/* GSO will handle the following emulations directly. */
2087 	if (netif_needs_gso(dev, skb))
2088 		goto gso;
2089 
2090 	/* Convert a paged skb to linear, if required */
2091 	if (skb_needs_linearize(skb, dev) && __skb_linearize(skb))
2092 		goto out_kfree_skb;
2093 
2094 	/* If packet is not checksummed and device does not support
2095 	 * checksumming for this protocol, complete checksumming here.
2096 	 */
2097 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
2098 		skb_set_transport_header(skb, skb->csum_start -
2099 					      skb_headroom(skb));
2100 		if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
2101 			goto out_kfree_skb;
2102 	}
2103 
2104 gso:
2105 	/* Disable soft irqs for various locks below. Also
2106 	 * stops preemption for RCU.
2107 	 */
2108 	rcu_read_lock_bh();
2109 
2110 	txq = dev_pick_tx(dev, skb);
2111 	q = rcu_dereference_bh(txq->qdisc);
2112 
2113 #ifdef CONFIG_NET_CLS_ACT
2114 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
2115 #endif
2116 	if (q->enqueue) {
2117 		rc = __dev_xmit_skb(skb, q, dev, txq);
2118 		goto out;
2119 	}
2120 
2121 	/* The device has no queue. Common case for software devices:
2122 	   loopback, all the sorts of tunnels...
2123 
2124 	   Really, it is unlikely that netif_tx_lock protection is necessary
2125 	   here.  (f.e. loopback and IP tunnels are clean ignoring statistics
2126 	   counters.)
2127 	   However, it is possible, that they rely on protection
2128 	   made by us here.
2129 
2130 	   Check this and shot the lock. It is not prone from deadlocks.
2131 	   Either shot noqueue qdisc, it is even simpler 8)
2132 	 */
2133 	if (dev->flags & IFF_UP) {
2134 		int cpu = smp_processor_id(); /* ok because BHs are off */
2135 
2136 		if (txq->xmit_lock_owner != cpu) {
2137 
2138 			HARD_TX_LOCK(dev, txq, cpu);
2139 
2140 			if (!netif_tx_queue_stopped(txq)) {
2141 				rc = dev_hard_start_xmit(skb, dev, txq);
2142 				if (dev_xmit_complete(rc)) {
2143 					HARD_TX_UNLOCK(dev, txq);
2144 					goto out;
2145 				}
2146 			}
2147 			HARD_TX_UNLOCK(dev, txq);
2148 			if (net_ratelimit())
2149 				printk(KERN_CRIT "Virtual device %s asks to "
2150 				       "queue packet!\n", dev->name);
2151 		} else {
2152 			/* Recursion is detected! It is possible,
2153 			 * unfortunately */
2154 			if (net_ratelimit())
2155 				printk(KERN_CRIT "Dead loop on virtual device "
2156 				       "%s, fix it urgently!\n", dev->name);
2157 		}
2158 	}
2159 
2160 	rc = -ENETDOWN;
2161 	rcu_read_unlock_bh();
2162 
2163 out_kfree_skb:
2164 	kfree_skb(skb);
2165 	return rc;
2166 out:
2167 	rcu_read_unlock_bh();
2168 	return rc;
2169 }
2170 EXPORT_SYMBOL(dev_queue_xmit);
2171 
2172 
2173 /*=======================================================================
2174 			Receiver routines
2175   =======================================================================*/
2176 
2177 int netdev_max_backlog __read_mostly = 1000;
2178 int netdev_budget __read_mostly = 300;
2179 int weight_p __read_mostly = 64;            /* old backlog weight */
2180 
2181 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
2182 
2183 
2184 /**
2185  *	netif_rx	-	post buffer to the network code
2186  *	@skb: buffer to post
2187  *
2188  *	This function receives a packet from a device driver and queues it for
2189  *	the upper (protocol) levels to process.  It always succeeds. The buffer
2190  *	may be dropped during processing for congestion control or by the
2191  *	protocol layers.
2192  *
2193  *	return values:
2194  *	NET_RX_SUCCESS	(no congestion)
2195  *	NET_RX_DROP     (packet was dropped)
2196  *
2197  */
2198 
2199 int netif_rx(struct sk_buff *skb)
2200 {
2201 	struct softnet_data *queue;
2202 	unsigned long flags;
2203 
2204 	/* if netpoll wants it, pretend we never saw it */
2205 	if (netpoll_rx(skb))
2206 		return NET_RX_DROP;
2207 
2208 	if (!skb->tstamp.tv64)
2209 		net_timestamp(skb);
2210 
2211 	/*
2212 	 * The code is rearranged so that the path is the most
2213 	 * short when CPU is congested, but is still operating.
2214 	 */
2215 	local_irq_save(flags);
2216 	queue = &__get_cpu_var(softnet_data);
2217 
2218 	__get_cpu_var(netdev_rx_stat).total++;
2219 	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
2220 		if (queue->input_pkt_queue.qlen) {
2221 enqueue:
2222 			__skb_queue_tail(&queue->input_pkt_queue, skb);
2223 			local_irq_restore(flags);
2224 			return NET_RX_SUCCESS;
2225 		}
2226 
2227 		napi_schedule(&queue->backlog);
2228 		goto enqueue;
2229 	}
2230 
2231 	__get_cpu_var(netdev_rx_stat).dropped++;
2232 	local_irq_restore(flags);
2233 
2234 	kfree_skb(skb);
2235 	return NET_RX_DROP;
2236 }
2237 EXPORT_SYMBOL(netif_rx);
2238 
2239 int netif_rx_ni(struct sk_buff *skb)
2240 {
2241 	int err;
2242 
2243 	preempt_disable();
2244 	err = netif_rx(skb);
2245 	if (local_softirq_pending())
2246 		do_softirq();
2247 	preempt_enable();
2248 
2249 	return err;
2250 }
2251 EXPORT_SYMBOL(netif_rx_ni);
2252 
2253 static void net_tx_action(struct softirq_action *h)
2254 {
2255 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
2256 
2257 	if (sd->completion_queue) {
2258 		struct sk_buff *clist;
2259 
2260 		local_irq_disable();
2261 		clist = sd->completion_queue;
2262 		sd->completion_queue = NULL;
2263 		local_irq_enable();
2264 
2265 		while (clist) {
2266 			struct sk_buff *skb = clist;
2267 			clist = clist->next;
2268 
2269 			WARN_ON(atomic_read(&skb->users));
2270 			__kfree_skb(skb);
2271 		}
2272 	}
2273 
2274 	if (sd->output_queue) {
2275 		struct Qdisc *head;
2276 
2277 		local_irq_disable();
2278 		head = sd->output_queue;
2279 		sd->output_queue = NULL;
2280 		local_irq_enable();
2281 
2282 		while (head) {
2283 			struct Qdisc *q = head;
2284 			spinlock_t *root_lock;
2285 
2286 			head = head->next_sched;
2287 
2288 			root_lock = qdisc_lock(q);
2289 			if (spin_trylock(root_lock)) {
2290 				smp_mb__before_clear_bit();
2291 				clear_bit(__QDISC_STATE_SCHED,
2292 					  &q->state);
2293 				qdisc_run(q);
2294 				spin_unlock(root_lock);
2295 			} else {
2296 				if (!test_bit(__QDISC_STATE_DEACTIVATED,
2297 					      &q->state)) {
2298 					__netif_reschedule(q);
2299 				} else {
2300 					smp_mb__before_clear_bit();
2301 					clear_bit(__QDISC_STATE_SCHED,
2302 						  &q->state);
2303 				}
2304 			}
2305 		}
2306 	}
2307 }
2308 
2309 static inline int deliver_skb(struct sk_buff *skb,
2310 			      struct packet_type *pt_prev,
2311 			      struct net_device *orig_dev)
2312 {
2313 	atomic_inc(&skb->users);
2314 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2315 }
2316 
2317 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
2318 
2319 #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
2320 /* This hook is defined here for ATM LANE */
2321 int (*br_fdb_test_addr_hook)(struct net_device *dev,
2322 			     unsigned char *addr) __read_mostly;
2323 EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
2324 #endif
2325 
2326 /*
2327  * If bridge module is loaded call bridging hook.
2328  *  returns NULL if packet was consumed.
2329  */
2330 struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2331 					struct sk_buff *skb) __read_mostly;
2332 EXPORT_SYMBOL_GPL(br_handle_frame_hook);
2333 
2334 static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2335 					    struct packet_type **pt_prev, int *ret,
2336 					    struct net_device *orig_dev)
2337 {
2338 	struct net_bridge_port *port;
2339 
2340 	if (skb->pkt_type == PACKET_LOOPBACK ||
2341 	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
2342 		return skb;
2343 
2344 	if (*pt_prev) {
2345 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2346 		*pt_prev = NULL;
2347 	}
2348 
2349 	return br_handle_frame_hook(port, skb);
2350 }
2351 #else
2352 #define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
2353 #endif
2354 
2355 #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2356 struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2357 EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2358 
2359 static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2360 					     struct packet_type **pt_prev,
2361 					     int *ret,
2362 					     struct net_device *orig_dev)
2363 {
2364 	if (skb->dev->macvlan_port == NULL)
2365 		return skb;
2366 
2367 	if (*pt_prev) {
2368 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2369 		*pt_prev = NULL;
2370 	}
2371 	return macvlan_handle_frame_hook(skb);
2372 }
2373 #else
2374 #define handle_macvlan(skb, pt_prev, ret, orig_dev)	(skb)
2375 #endif
2376 
2377 #ifdef CONFIG_NET_CLS_ACT
2378 /* TODO: Maybe we should just force sch_ingress to be compiled in
2379  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2380  * a compare and 2 stores extra right now if we dont have it on
2381  * but have CONFIG_NET_CLS_ACT
2382  * NOTE: This doesnt stop any functionality; if you dont have
2383  * the ingress scheduler, you just cant add policies on ingress.
2384  *
2385  */
2386 static int ing_filter(struct sk_buff *skb)
2387 {
2388 	struct net_device *dev = skb->dev;
2389 	u32 ttl = G_TC_RTTL(skb->tc_verd);
2390 	struct netdev_queue *rxq;
2391 	int result = TC_ACT_OK;
2392 	struct Qdisc *q;
2393 
2394 	if (MAX_RED_LOOP < ttl++) {
2395 		printk(KERN_WARNING
2396 		       "Redir loop detected Dropping packet (%d->%d)\n",
2397 		       skb->skb_iif, dev->ifindex);
2398 		return TC_ACT_SHOT;
2399 	}
2400 
2401 	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2402 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2403 
2404 	rxq = &dev->rx_queue;
2405 
2406 	q = rxq->qdisc;
2407 	if (q != &noop_qdisc) {
2408 		spin_lock(qdisc_lock(q));
2409 		if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2410 			result = qdisc_enqueue_root(skb, q);
2411 		spin_unlock(qdisc_lock(q));
2412 	}
2413 
2414 	return result;
2415 }
2416 
2417 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2418 					 struct packet_type **pt_prev,
2419 					 int *ret, struct net_device *orig_dev)
2420 {
2421 	if (skb->dev->rx_queue.qdisc == &noop_qdisc)
2422 		goto out;
2423 
2424 	if (*pt_prev) {
2425 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2426 		*pt_prev = NULL;
2427 	} else {
2428 		/* Huh? Why does turning on AF_PACKET affect this? */
2429 		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2430 	}
2431 
2432 	switch (ing_filter(skb)) {
2433 	case TC_ACT_SHOT:
2434 	case TC_ACT_STOLEN:
2435 		kfree_skb(skb);
2436 		return NULL;
2437 	}
2438 
2439 out:
2440 	skb->tc_verd = 0;
2441 	return skb;
2442 }
2443 #endif
2444 
2445 /*
2446  * 	netif_nit_deliver - deliver received packets to network taps
2447  * 	@skb: buffer
2448  *
2449  * 	This function is used to deliver incoming packets to network
2450  * 	taps. It should be used when the normal netif_receive_skb path
2451  * 	is bypassed, for example because of VLAN acceleration.
2452  */
2453 void netif_nit_deliver(struct sk_buff *skb)
2454 {
2455 	struct packet_type *ptype;
2456 
2457 	if (list_empty(&ptype_all))
2458 		return;
2459 
2460 	skb_reset_network_header(skb);
2461 	skb_reset_transport_header(skb);
2462 	skb->mac_len = skb->network_header - skb->mac_header;
2463 
2464 	rcu_read_lock();
2465 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
2466 		if (!ptype->dev || ptype->dev == skb->dev)
2467 			deliver_skb(skb, ptype, skb->dev);
2468 	}
2469 	rcu_read_unlock();
2470 }
2471 
2472 /**
2473  *	netif_receive_skb - process receive buffer from network
2474  *	@skb: buffer to process
2475  *
2476  *	netif_receive_skb() is the main receive data processing function.
2477  *	It always succeeds. The buffer may be dropped during processing
2478  *	for congestion control or by the protocol layers.
2479  *
2480  *	This function may only be called from softirq context and interrupts
2481  *	should be enabled.
2482  *
2483  *	Return values (usually ignored):
2484  *	NET_RX_SUCCESS: no congestion
2485  *	NET_RX_DROP: packet was dropped
2486  */
2487 int netif_receive_skb(struct sk_buff *skb)
2488 {
2489 	struct packet_type *ptype, *pt_prev;
2490 	struct net_device *orig_dev;
2491 	struct net_device *master;
2492 	struct net_device *null_or_orig;
2493 	struct net_device *null_or_bond;
2494 	int ret = NET_RX_DROP;
2495 	__be16 type;
2496 
2497 	if (!skb->tstamp.tv64)
2498 		net_timestamp(skb);
2499 
2500 	if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
2501 		return NET_RX_SUCCESS;
2502 
2503 	/* if we've gotten here through NAPI, check netpoll */
2504 	if (netpoll_receive_skb(skb))
2505 		return NET_RX_DROP;
2506 
2507 	if (!skb->skb_iif)
2508 		skb->skb_iif = skb->dev->ifindex;
2509 
2510 	null_or_orig = NULL;
2511 	orig_dev = skb->dev;
2512 	master = ACCESS_ONCE(orig_dev->master);
2513 	if (master) {
2514 		if (skb_bond_should_drop(skb, master))
2515 			null_or_orig = orig_dev; /* deliver only exact match */
2516 		else
2517 			skb->dev = master;
2518 	}
2519 
2520 	__get_cpu_var(netdev_rx_stat).total++;
2521 
2522 	skb_reset_network_header(skb);
2523 	skb_reset_transport_header(skb);
2524 	skb->mac_len = skb->network_header - skb->mac_header;
2525 
2526 	pt_prev = NULL;
2527 
2528 	rcu_read_lock();
2529 
2530 #ifdef CONFIG_NET_CLS_ACT
2531 	if (skb->tc_verd & TC_NCLS) {
2532 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2533 		goto ncls;
2534 	}
2535 #endif
2536 
2537 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
2538 		if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2539 		    ptype->dev == orig_dev) {
2540 			if (pt_prev)
2541 				ret = deliver_skb(skb, pt_prev, orig_dev);
2542 			pt_prev = ptype;
2543 		}
2544 	}
2545 
2546 #ifdef CONFIG_NET_CLS_ACT
2547 	skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2548 	if (!skb)
2549 		goto out;
2550 ncls:
2551 #endif
2552 
2553 	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2554 	if (!skb)
2555 		goto out;
2556 	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2557 	if (!skb)
2558 		goto out;
2559 
2560 	/*
2561 	 * Make sure frames received on VLAN interfaces stacked on
2562 	 * bonding interfaces still make their way to any base bonding
2563 	 * device that may have registered for a specific ptype.  The
2564 	 * handler may have to adjust skb->dev and orig_dev.
2565 	 */
2566 	null_or_bond = NULL;
2567 	if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) &&
2568 	    (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) {
2569 		null_or_bond = vlan_dev_real_dev(skb->dev);
2570 	}
2571 
2572 	type = skb->protocol;
2573 	list_for_each_entry_rcu(ptype,
2574 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2575 		if (ptype->type == type && (ptype->dev == null_or_orig ||
2576 		     ptype->dev == skb->dev || ptype->dev == orig_dev ||
2577 		     ptype->dev == null_or_bond)) {
2578 			if (pt_prev)
2579 				ret = deliver_skb(skb, pt_prev, orig_dev);
2580 			pt_prev = ptype;
2581 		}
2582 	}
2583 
2584 	if (pt_prev) {
2585 		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2586 	} else {
2587 		kfree_skb(skb);
2588 		/* Jamal, now you will not able to escape explaining
2589 		 * me how you were going to use this. :-)
2590 		 */
2591 		ret = NET_RX_DROP;
2592 	}
2593 
2594 out:
2595 	rcu_read_unlock();
2596 	return ret;
2597 }
2598 EXPORT_SYMBOL(netif_receive_skb);
2599 
2600 /* Network device is going away, flush any packets still pending  */
2601 static void flush_backlog(void *arg)
2602 {
2603 	struct net_device *dev = arg;
2604 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
2605 	struct sk_buff *skb, *tmp;
2606 
2607 	skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
2608 		if (skb->dev == dev) {
2609 			__skb_unlink(skb, &queue->input_pkt_queue);
2610 			kfree_skb(skb);
2611 		}
2612 }
2613 
2614 static int napi_gro_complete(struct sk_buff *skb)
2615 {
2616 	struct packet_type *ptype;
2617 	__be16 type = skb->protocol;
2618 	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2619 	int err = -ENOENT;
2620 
2621 	if (NAPI_GRO_CB(skb)->count == 1) {
2622 		skb_shinfo(skb)->gso_size = 0;
2623 		goto out;
2624 	}
2625 
2626 	rcu_read_lock();
2627 	list_for_each_entry_rcu(ptype, head, list) {
2628 		if (ptype->type != type || ptype->dev || !ptype->gro_complete)
2629 			continue;
2630 
2631 		err = ptype->gro_complete(skb);
2632 		break;
2633 	}
2634 	rcu_read_unlock();
2635 
2636 	if (err) {
2637 		WARN_ON(&ptype->list == head);
2638 		kfree_skb(skb);
2639 		return NET_RX_SUCCESS;
2640 	}
2641 
2642 out:
2643 	return netif_receive_skb(skb);
2644 }
2645 
2646 static void napi_gro_flush(struct napi_struct *napi)
2647 {
2648 	struct sk_buff *skb, *next;
2649 
2650 	for (skb = napi->gro_list; skb; skb = next) {
2651 		next = skb->next;
2652 		skb->next = NULL;
2653 		napi_gro_complete(skb);
2654 	}
2655 
2656 	napi->gro_count = 0;
2657 	napi->gro_list = NULL;
2658 }
2659 
2660 enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2661 {
2662 	struct sk_buff **pp = NULL;
2663 	struct packet_type *ptype;
2664 	__be16 type = skb->protocol;
2665 	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2666 	int same_flow;
2667 	int mac_len;
2668 	enum gro_result ret;
2669 
2670 	if (!(skb->dev->features & NETIF_F_GRO))
2671 		goto normal;
2672 
2673 	if (skb_is_gso(skb) || skb_has_frags(skb))
2674 		goto normal;
2675 
2676 	rcu_read_lock();
2677 	list_for_each_entry_rcu(ptype, head, list) {
2678 		if (ptype->type != type || ptype->dev || !ptype->gro_receive)
2679 			continue;
2680 
2681 		skb_set_network_header(skb, skb_gro_offset(skb));
2682 		mac_len = skb->network_header - skb->mac_header;
2683 		skb->mac_len = mac_len;
2684 		NAPI_GRO_CB(skb)->same_flow = 0;
2685 		NAPI_GRO_CB(skb)->flush = 0;
2686 		NAPI_GRO_CB(skb)->free = 0;
2687 
2688 		pp = ptype->gro_receive(&napi->gro_list, skb);
2689 		break;
2690 	}
2691 	rcu_read_unlock();
2692 
2693 	if (&ptype->list == head)
2694 		goto normal;
2695 
2696 	same_flow = NAPI_GRO_CB(skb)->same_flow;
2697 	ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
2698 
2699 	if (pp) {
2700 		struct sk_buff *nskb = *pp;
2701 
2702 		*pp = nskb->next;
2703 		nskb->next = NULL;
2704 		napi_gro_complete(nskb);
2705 		napi->gro_count--;
2706 	}
2707 
2708 	if (same_flow)
2709 		goto ok;
2710 
2711 	if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
2712 		goto normal;
2713 
2714 	napi->gro_count++;
2715 	NAPI_GRO_CB(skb)->count = 1;
2716 	skb_shinfo(skb)->gso_size = skb_gro_len(skb);
2717 	skb->next = napi->gro_list;
2718 	napi->gro_list = skb;
2719 	ret = GRO_HELD;
2720 
2721 pull:
2722 	if (skb_headlen(skb) < skb_gro_offset(skb)) {
2723 		int grow = skb_gro_offset(skb) - skb_headlen(skb);
2724 
2725 		BUG_ON(skb->end - skb->tail < grow);
2726 
2727 		memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
2728 
2729 		skb->tail += grow;
2730 		skb->data_len -= grow;
2731 
2732 		skb_shinfo(skb)->frags[0].page_offset += grow;
2733 		skb_shinfo(skb)->frags[0].size -= grow;
2734 
2735 		if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
2736 			put_page(skb_shinfo(skb)->frags[0].page);
2737 			memmove(skb_shinfo(skb)->frags,
2738 				skb_shinfo(skb)->frags + 1,
2739 				--skb_shinfo(skb)->nr_frags);
2740 		}
2741 	}
2742 
2743 ok:
2744 	return ret;
2745 
2746 normal:
2747 	ret = GRO_NORMAL;
2748 	goto pull;
2749 }
2750 EXPORT_SYMBOL(dev_gro_receive);
2751 
2752 static gro_result_t
2753 __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2754 {
2755 	struct sk_buff *p;
2756 
2757 	if (netpoll_rx_on(skb))
2758 		return GRO_NORMAL;
2759 
2760 	for (p = napi->gro_list; p; p = p->next) {
2761 		NAPI_GRO_CB(p)->same_flow =
2762 			(p->dev == skb->dev) &&
2763 			!compare_ether_header(skb_mac_header(p),
2764 					      skb_gro_mac_header(skb));
2765 		NAPI_GRO_CB(p)->flush = 0;
2766 	}
2767 
2768 	return dev_gro_receive(napi, skb);
2769 }
2770 
2771 gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
2772 {
2773 	switch (ret) {
2774 	case GRO_NORMAL:
2775 		if (netif_receive_skb(skb))
2776 			ret = GRO_DROP;
2777 		break;
2778 
2779 	case GRO_DROP:
2780 	case GRO_MERGED_FREE:
2781 		kfree_skb(skb);
2782 		break;
2783 
2784 	case GRO_HELD:
2785 	case GRO_MERGED:
2786 		break;
2787 	}
2788 
2789 	return ret;
2790 }
2791 EXPORT_SYMBOL(napi_skb_finish);
2792 
2793 void skb_gro_reset_offset(struct sk_buff *skb)
2794 {
2795 	NAPI_GRO_CB(skb)->data_offset = 0;
2796 	NAPI_GRO_CB(skb)->frag0 = NULL;
2797 	NAPI_GRO_CB(skb)->frag0_len = 0;
2798 
2799 	if (skb->mac_header == skb->tail &&
2800 	    !PageHighMem(skb_shinfo(skb)->frags[0].page)) {
2801 		NAPI_GRO_CB(skb)->frag0 =
2802 			page_address(skb_shinfo(skb)->frags[0].page) +
2803 			skb_shinfo(skb)->frags[0].page_offset;
2804 		NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size;
2805 	}
2806 }
2807 EXPORT_SYMBOL(skb_gro_reset_offset);
2808 
2809 gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2810 {
2811 	skb_gro_reset_offset(skb);
2812 
2813 	return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
2814 }
2815 EXPORT_SYMBOL(napi_gro_receive);
2816 
2817 void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
2818 {
2819 	__skb_pull(skb, skb_headlen(skb));
2820 	skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
2821 
2822 	napi->skb = skb;
2823 }
2824 EXPORT_SYMBOL(napi_reuse_skb);
2825 
2826 struct sk_buff *napi_get_frags(struct napi_struct *napi)
2827 {
2828 	struct sk_buff *skb = napi->skb;
2829 
2830 	if (!skb) {
2831 		skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
2832 		if (skb)
2833 			napi->skb = skb;
2834 	}
2835 	return skb;
2836 }
2837 EXPORT_SYMBOL(napi_get_frags);
2838 
2839 gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
2840 			       gro_result_t ret)
2841 {
2842 	switch (ret) {
2843 	case GRO_NORMAL:
2844 	case GRO_HELD:
2845 		skb->protocol = eth_type_trans(skb, skb->dev);
2846 
2847 		if (ret == GRO_HELD)
2848 			skb_gro_pull(skb, -ETH_HLEN);
2849 		else if (netif_receive_skb(skb))
2850 			ret = GRO_DROP;
2851 		break;
2852 
2853 	case GRO_DROP:
2854 	case GRO_MERGED_FREE:
2855 		napi_reuse_skb(napi, skb);
2856 		break;
2857 
2858 	case GRO_MERGED:
2859 		break;
2860 	}
2861 
2862 	return ret;
2863 }
2864 EXPORT_SYMBOL(napi_frags_finish);
2865 
2866 struct sk_buff *napi_frags_skb(struct napi_struct *napi)
2867 {
2868 	struct sk_buff *skb = napi->skb;
2869 	struct ethhdr *eth;
2870 	unsigned int hlen;
2871 	unsigned int off;
2872 
2873 	napi->skb = NULL;
2874 
2875 	skb_reset_mac_header(skb);
2876 	skb_gro_reset_offset(skb);
2877 
2878 	off = skb_gro_offset(skb);
2879 	hlen = off + sizeof(*eth);
2880 	eth = skb_gro_header_fast(skb, off);
2881 	if (skb_gro_header_hard(skb, hlen)) {
2882 		eth = skb_gro_header_slow(skb, hlen, off);
2883 		if (unlikely(!eth)) {
2884 			napi_reuse_skb(napi, skb);
2885 			skb = NULL;
2886 			goto out;
2887 		}
2888 	}
2889 
2890 	skb_gro_pull(skb, sizeof(*eth));
2891 
2892 	/*
2893 	 * This works because the only protocols we care about don't require
2894 	 * special handling.  We'll fix it up properly at the end.
2895 	 */
2896 	skb->protocol = eth->h_proto;
2897 
2898 out:
2899 	return skb;
2900 }
2901 EXPORT_SYMBOL(napi_frags_skb);
2902 
2903 gro_result_t napi_gro_frags(struct napi_struct *napi)
2904 {
2905 	struct sk_buff *skb = napi_frags_skb(napi);
2906 
2907 	if (!skb)
2908 		return GRO_DROP;
2909 
2910 	return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
2911 }
2912 EXPORT_SYMBOL(napi_gro_frags);
2913 
2914 static int process_backlog(struct napi_struct *napi, int quota)
2915 {
2916 	int work = 0;
2917 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
2918 	unsigned long start_time = jiffies;
2919 
2920 	napi->weight = weight_p;
2921 	do {
2922 		struct sk_buff *skb;
2923 
2924 		local_irq_disable();
2925 		skb = __skb_dequeue(&queue->input_pkt_queue);
2926 		if (!skb) {
2927 			__napi_complete(napi);
2928 			local_irq_enable();
2929 			break;
2930 		}
2931 		local_irq_enable();
2932 
2933 		netif_receive_skb(skb);
2934 	} while (++work < quota && jiffies == start_time);
2935 
2936 	return work;
2937 }
2938 
2939 /**
2940  * __napi_schedule - schedule for receive
2941  * @n: entry to schedule
2942  *
2943  * The entry's receive function will be scheduled to run
2944  */
2945 void __napi_schedule(struct napi_struct *n)
2946 {
2947 	unsigned long flags;
2948 
2949 	local_irq_save(flags);
2950 	list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2951 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2952 	local_irq_restore(flags);
2953 }
2954 EXPORT_SYMBOL(__napi_schedule);
2955 
2956 void __napi_complete(struct napi_struct *n)
2957 {
2958 	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
2959 	BUG_ON(n->gro_list);
2960 
2961 	list_del(&n->poll_list);
2962 	smp_mb__before_clear_bit();
2963 	clear_bit(NAPI_STATE_SCHED, &n->state);
2964 }
2965 EXPORT_SYMBOL(__napi_complete);
2966 
2967 void napi_complete(struct napi_struct *n)
2968 {
2969 	unsigned long flags;
2970 
2971 	/*
2972 	 * don't let napi dequeue from the cpu poll list
2973 	 * just in case its running on a different cpu
2974 	 */
2975 	if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
2976 		return;
2977 
2978 	napi_gro_flush(n);
2979 	local_irq_save(flags);
2980 	__napi_complete(n);
2981 	local_irq_restore(flags);
2982 }
2983 EXPORT_SYMBOL(napi_complete);
2984 
2985 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
2986 		    int (*poll)(struct napi_struct *, int), int weight)
2987 {
2988 	INIT_LIST_HEAD(&napi->poll_list);
2989 	napi->gro_count = 0;
2990 	napi->gro_list = NULL;
2991 	napi->skb = NULL;
2992 	napi->poll = poll;
2993 	napi->weight = weight;
2994 	list_add(&napi->dev_list, &dev->napi_list);
2995 	napi->dev = dev;
2996 #ifdef CONFIG_NETPOLL
2997 	spin_lock_init(&napi->poll_lock);
2998 	napi->poll_owner = -1;
2999 #endif
3000 	set_bit(NAPI_STATE_SCHED, &napi->state);
3001 }
3002 EXPORT_SYMBOL(netif_napi_add);
3003 
3004 void netif_napi_del(struct napi_struct *napi)
3005 {
3006 	struct sk_buff *skb, *next;
3007 
3008 	list_del_init(&napi->dev_list);
3009 	napi_free_frags(napi);
3010 
3011 	for (skb = napi->gro_list; skb; skb = next) {
3012 		next = skb->next;
3013 		skb->next = NULL;
3014 		kfree_skb(skb);
3015 	}
3016 
3017 	napi->gro_list = NULL;
3018 	napi->gro_count = 0;
3019 }
3020 EXPORT_SYMBOL(netif_napi_del);
3021 
3022 
3023 static void net_rx_action(struct softirq_action *h)
3024 {
3025 	struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
3026 	unsigned long time_limit = jiffies + 2;
3027 	int budget = netdev_budget;
3028 	void *have;
3029 
3030 	local_irq_disable();
3031 
3032 	while (!list_empty(list)) {
3033 		struct napi_struct *n;
3034 		int work, weight;
3035 
3036 		/* If softirq window is exhuasted then punt.
3037 		 * Allow this to run for 2 jiffies since which will allow
3038 		 * an average latency of 1.5/HZ.
3039 		 */
3040 		if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
3041 			goto softnet_break;
3042 
3043 		local_irq_enable();
3044 
3045 		/* Even though interrupts have been re-enabled, this
3046 		 * access is safe because interrupts can only add new
3047 		 * entries to the tail of this list, and only ->poll()
3048 		 * calls can remove this head entry from the list.
3049 		 */
3050 		n = list_first_entry(list, struct napi_struct, poll_list);
3051 
3052 		have = netpoll_poll_lock(n);
3053 
3054 		weight = n->weight;
3055 
3056 		/* This NAPI_STATE_SCHED test is for avoiding a race
3057 		 * with netpoll's poll_napi().  Only the entity which
3058 		 * obtains the lock and sees NAPI_STATE_SCHED set will
3059 		 * actually make the ->poll() call.  Therefore we avoid
3060 		 * accidently calling ->poll() when NAPI is not scheduled.
3061 		 */
3062 		work = 0;
3063 		if (test_bit(NAPI_STATE_SCHED, &n->state)) {
3064 			work = n->poll(n, weight);
3065 			trace_napi_poll(n);
3066 		}
3067 
3068 		WARN_ON_ONCE(work > weight);
3069 
3070 		budget -= work;
3071 
3072 		local_irq_disable();
3073 
3074 		/* Drivers must not modify the NAPI state if they
3075 		 * consume the entire weight.  In such cases this code
3076 		 * still "owns" the NAPI instance and therefore can
3077 		 * move the instance around on the list at-will.
3078 		 */
3079 		if (unlikely(work == weight)) {
3080 			if (unlikely(napi_disable_pending(n))) {
3081 				local_irq_enable();
3082 				napi_complete(n);
3083 				local_irq_disable();
3084 			} else
3085 				list_move_tail(&n->poll_list, list);
3086 		}
3087 
3088 		netpoll_poll_unlock(have);
3089 	}
3090 out:
3091 	local_irq_enable();
3092 
3093 #ifdef CONFIG_NET_DMA
3094 	/*
3095 	 * There may not be any more sk_buffs coming right now, so push
3096 	 * any pending DMA copies to hardware
3097 	 */
3098 	dma_issue_pending_all();
3099 #endif
3100 
3101 	return;
3102 
3103 softnet_break:
3104 	__get_cpu_var(netdev_rx_stat).time_squeeze++;
3105 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
3106 	goto out;
3107 }
3108 
3109 static gifconf_func_t *gifconf_list[NPROTO];
3110 
3111 /**
3112  *	register_gifconf	-	register a SIOCGIF handler
3113  *	@family: Address family
3114  *	@gifconf: Function handler
3115  *
3116  *	Register protocol dependent address dumping routines. The handler
3117  *	that is passed must not be freed or reused until it has been replaced
3118  *	by another handler.
3119  */
3120 int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
3121 {
3122 	if (family >= NPROTO)
3123 		return -EINVAL;
3124 	gifconf_list[family] = gifconf;
3125 	return 0;
3126 }
3127 EXPORT_SYMBOL(register_gifconf);
3128 
3129 
3130 /*
3131  *	Map an interface index to its name (SIOCGIFNAME)
3132  */
3133 
3134 /*
3135  *	We need this ioctl for efficient implementation of the
3136  *	if_indextoname() function required by the IPv6 API.  Without
3137  *	it, we would have to search all the interfaces to find a
3138  *	match.  --pb
3139  */
3140 
3141 static int dev_ifname(struct net *net, struct ifreq __user *arg)
3142 {
3143 	struct net_device *dev;
3144 	struct ifreq ifr;
3145 
3146 	/*
3147 	 *	Fetch the caller's info block.
3148 	 */
3149 
3150 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3151 		return -EFAULT;
3152 
3153 	rcu_read_lock();
3154 	dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
3155 	if (!dev) {
3156 		rcu_read_unlock();
3157 		return -ENODEV;
3158 	}
3159 
3160 	strcpy(ifr.ifr_name, dev->name);
3161 	rcu_read_unlock();
3162 
3163 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
3164 		return -EFAULT;
3165 	return 0;
3166 }
3167 
3168 /*
3169  *	Perform a SIOCGIFCONF call. This structure will change
3170  *	size eventually, and there is nothing I can do about it.
3171  *	Thus we will need a 'compatibility mode'.
3172  */
3173 
3174 static int dev_ifconf(struct net *net, char __user *arg)
3175 {
3176 	struct ifconf ifc;
3177 	struct net_device *dev;
3178 	char __user *pos;
3179 	int len;
3180 	int total;
3181 	int i;
3182 
3183 	/*
3184 	 *	Fetch the caller's info block.
3185 	 */
3186 
3187 	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
3188 		return -EFAULT;
3189 
3190 	pos = ifc.ifc_buf;
3191 	len = ifc.ifc_len;
3192 
3193 	/*
3194 	 *	Loop over the interfaces, and write an info block for each.
3195 	 */
3196 
3197 	total = 0;
3198 	for_each_netdev(net, dev) {
3199 		for (i = 0; i < NPROTO; i++) {
3200 			if (gifconf_list[i]) {
3201 				int done;
3202 				if (!pos)
3203 					done = gifconf_list[i](dev, NULL, 0);
3204 				else
3205 					done = gifconf_list[i](dev, pos + total,
3206 							       len - total);
3207 				if (done < 0)
3208 					return -EFAULT;
3209 				total += done;
3210 			}
3211 		}
3212 	}
3213 
3214 	/*
3215 	 *	All done.  Write the updated control block back to the caller.
3216 	 */
3217 	ifc.ifc_len = total;
3218 
3219 	/*
3220 	 * 	Both BSD and Solaris return 0 here, so we do too.
3221 	 */
3222 	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
3223 }
3224 
3225 #ifdef CONFIG_PROC_FS
3226 /*
3227  *	This is invoked by the /proc filesystem handler to display a device
3228  *	in detail.
3229  */
3230 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
3231 	__acquires(RCU)
3232 {
3233 	struct net *net = seq_file_net(seq);
3234 	loff_t off;
3235 	struct net_device *dev;
3236 
3237 	rcu_read_lock();
3238 	if (!*pos)
3239 		return SEQ_START_TOKEN;
3240 
3241 	off = 1;
3242 	for_each_netdev_rcu(net, dev)
3243 		if (off++ == *pos)
3244 			return dev;
3245 
3246 	return NULL;
3247 }
3248 
3249 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3250 {
3251 	struct net_device *dev = (v == SEQ_START_TOKEN) ?
3252 				  first_net_device(seq_file_net(seq)) :
3253 				  next_net_device((struct net_device *)v);
3254 
3255 	++*pos;
3256 	return rcu_dereference(dev);
3257 }
3258 
3259 void dev_seq_stop(struct seq_file *seq, void *v)
3260 	__releases(RCU)
3261 {
3262 	rcu_read_unlock();
3263 }
3264 
3265 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
3266 {
3267 	const struct net_device_stats *stats = dev_get_stats(dev);
3268 
3269 	seq_printf(seq, "%6s: %7lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
3270 		   "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
3271 		   dev->name, stats->rx_bytes, stats->rx_packets,
3272 		   stats->rx_errors,
3273 		   stats->rx_dropped + stats->rx_missed_errors,
3274 		   stats->rx_fifo_errors,
3275 		   stats->rx_length_errors + stats->rx_over_errors +
3276 		    stats->rx_crc_errors + stats->rx_frame_errors,
3277 		   stats->rx_compressed, stats->multicast,
3278 		   stats->tx_bytes, stats->tx_packets,
3279 		   stats->tx_errors, stats->tx_dropped,
3280 		   stats->tx_fifo_errors, stats->collisions,
3281 		   stats->tx_carrier_errors +
3282 		    stats->tx_aborted_errors +
3283 		    stats->tx_window_errors +
3284 		    stats->tx_heartbeat_errors,
3285 		   stats->tx_compressed);
3286 }
3287 
3288 /*
3289  *	Called from the PROCfs module. This now uses the new arbitrary sized
3290  *	/proc/net interface to create /proc/net/dev
3291  */
3292 static int dev_seq_show(struct seq_file *seq, void *v)
3293 {
3294 	if (v == SEQ_START_TOKEN)
3295 		seq_puts(seq, "Inter-|   Receive                            "
3296 			      "                    |  Transmit\n"
3297 			      " face |bytes    packets errs drop fifo frame "
3298 			      "compressed multicast|bytes    packets errs "
3299 			      "drop fifo colls carrier compressed\n");
3300 	else
3301 		dev_seq_printf_stats(seq, v);
3302 	return 0;
3303 }
3304 
3305 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
3306 {
3307 	struct netif_rx_stats *rc = NULL;
3308 
3309 	while (*pos < nr_cpu_ids)
3310 		if (cpu_online(*pos)) {
3311 			rc = &per_cpu(netdev_rx_stat, *pos);
3312 			break;
3313 		} else
3314 			++*pos;
3315 	return rc;
3316 }
3317 
3318 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
3319 {
3320 	return softnet_get_online(pos);
3321 }
3322 
3323 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3324 {
3325 	++*pos;
3326 	return softnet_get_online(pos);
3327 }
3328 
3329 static void softnet_seq_stop(struct seq_file *seq, void *v)
3330 {
3331 }
3332 
3333 static int softnet_seq_show(struct seq_file *seq, void *v)
3334 {
3335 	struct netif_rx_stats *s = v;
3336 
3337 	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
3338 		   s->total, s->dropped, s->time_squeeze, 0,
3339 		   0, 0, 0, 0, /* was fastroute */
3340 		   s->cpu_collision);
3341 	return 0;
3342 }
3343 
3344 static const struct seq_operations dev_seq_ops = {
3345 	.start = dev_seq_start,
3346 	.next  = dev_seq_next,
3347 	.stop  = dev_seq_stop,
3348 	.show  = dev_seq_show,
3349 };
3350 
3351 static int dev_seq_open(struct inode *inode, struct file *file)
3352 {
3353 	return seq_open_net(inode, file, &dev_seq_ops,
3354 			    sizeof(struct seq_net_private));
3355 }
3356 
3357 static const struct file_operations dev_seq_fops = {
3358 	.owner	 = THIS_MODULE,
3359 	.open    = dev_seq_open,
3360 	.read    = seq_read,
3361 	.llseek  = seq_lseek,
3362 	.release = seq_release_net,
3363 };
3364 
3365 static const struct seq_operations softnet_seq_ops = {
3366 	.start = softnet_seq_start,
3367 	.next  = softnet_seq_next,
3368 	.stop  = softnet_seq_stop,
3369 	.show  = softnet_seq_show,
3370 };
3371 
3372 static int softnet_seq_open(struct inode *inode, struct file *file)
3373 {
3374 	return seq_open(file, &softnet_seq_ops);
3375 }
3376 
3377 static const struct file_operations softnet_seq_fops = {
3378 	.owner	 = THIS_MODULE,
3379 	.open    = softnet_seq_open,
3380 	.read    = seq_read,
3381 	.llseek  = seq_lseek,
3382 	.release = seq_release,
3383 };
3384 
3385 static void *ptype_get_idx(loff_t pos)
3386 {
3387 	struct packet_type *pt = NULL;
3388 	loff_t i = 0;
3389 	int t;
3390 
3391 	list_for_each_entry_rcu(pt, &ptype_all, list) {
3392 		if (i == pos)
3393 			return pt;
3394 		++i;
3395 	}
3396 
3397 	for (t = 0; t < PTYPE_HASH_SIZE; t++) {
3398 		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
3399 			if (i == pos)
3400 				return pt;
3401 			++i;
3402 		}
3403 	}
3404 	return NULL;
3405 }
3406 
3407 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
3408 	__acquires(RCU)
3409 {
3410 	rcu_read_lock();
3411 	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
3412 }
3413 
3414 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3415 {
3416 	struct packet_type *pt;
3417 	struct list_head *nxt;
3418 	int hash;
3419 
3420 	++*pos;
3421 	if (v == SEQ_START_TOKEN)
3422 		return ptype_get_idx(0);
3423 
3424 	pt = v;
3425 	nxt = pt->list.next;
3426 	if (pt->type == htons(ETH_P_ALL)) {
3427 		if (nxt != &ptype_all)
3428 			goto found;
3429 		hash = 0;
3430 		nxt = ptype_base[0].next;
3431 	} else
3432 		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
3433 
3434 	while (nxt == &ptype_base[hash]) {
3435 		if (++hash >= PTYPE_HASH_SIZE)
3436 			return NULL;
3437 		nxt = ptype_base[hash].next;
3438 	}
3439 found:
3440 	return list_entry(nxt, struct packet_type, list);
3441 }
3442 
3443 static void ptype_seq_stop(struct seq_file *seq, void *v)
3444 	__releases(RCU)
3445 {
3446 	rcu_read_unlock();
3447 }
3448 
3449 static int ptype_seq_show(struct seq_file *seq, void *v)
3450 {
3451 	struct packet_type *pt = v;
3452 
3453 	if (v == SEQ_START_TOKEN)
3454 		seq_puts(seq, "Type Device      Function\n");
3455 	else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
3456 		if (pt->type == htons(ETH_P_ALL))
3457 			seq_puts(seq, "ALL ");
3458 		else
3459 			seq_printf(seq, "%04x", ntohs(pt->type));
3460 
3461 		seq_printf(seq, " %-8s %pF\n",
3462 			   pt->dev ? pt->dev->name : "", pt->func);
3463 	}
3464 
3465 	return 0;
3466 }
3467 
3468 static const struct seq_operations ptype_seq_ops = {
3469 	.start = ptype_seq_start,
3470 	.next  = ptype_seq_next,
3471 	.stop  = ptype_seq_stop,
3472 	.show  = ptype_seq_show,
3473 };
3474 
3475 static int ptype_seq_open(struct inode *inode, struct file *file)
3476 {
3477 	return seq_open_net(inode, file, &ptype_seq_ops,
3478 			sizeof(struct seq_net_private));
3479 }
3480 
3481 static const struct file_operations ptype_seq_fops = {
3482 	.owner	 = THIS_MODULE,
3483 	.open    = ptype_seq_open,
3484 	.read    = seq_read,
3485 	.llseek  = seq_lseek,
3486 	.release = seq_release_net,
3487 };
3488 
3489 
3490 static int __net_init dev_proc_net_init(struct net *net)
3491 {
3492 	int rc = -ENOMEM;
3493 
3494 	if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
3495 		goto out;
3496 	if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
3497 		goto out_dev;
3498 	if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
3499 		goto out_softnet;
3500 
3501 	if (wext_proc_init(net))
3502 		goto out_ptype;
3503 	rc = 0;
3504 out:
3505 	return rc;
3506 out_ptype:
3507 	proc_net_remove(net, "ptype");
3508 out_softnet:
3509 	proc_net_remove(net, "softnet_stat");
3510 out_dev:
3511 	proc_net_remove(net, "dev");
3512 	goto out;
3513 }
3514 
3515 static void __net_exit dev_proc_net_exit(struct net *net)
3516 {
3517 	wext_proc_exit(net);
3518 
3519 	proc_net_remove(net, "ptype");
3520 	proc_net_remove(net, "softnet_stat");
3521 	proc_net_remove(net, "dev");
3522 }
3523 
3524 static struct pernet_operations __net_initdata dev_proc_ops = {
3525 	.init = dev_proc_net_init,
3526 	.exit = dev_proc_net_exit,
3527 };
3528 
3529 static int __init dev_proc_init(void)
3530 {
3531 	return register_pernet_subsys(&dev_proc_ops);
3532 }
3533 #else
3534 #define dev_proc_init() 0
3535 #endif	/* CONFIG_PROC_FS */
3536 
3537 
3538 /**
3539  *	netdev_set_master	-	set up master/slave pair
3540  *	@slave: slave device
3541  *	@master: new master device
3542  *
3543  *	Changes the master device of the slave. Pass %NULL to break the
3544  *	bonding. The caller must hold the RTNL semaphore. On a failure
3545  *	a negative errno code is returned. On success the reference counts
3546  *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
3547  *	function returns zero.
3548  */
3549 int netdev_set_master(struct net_device *slave, struct net_device *master)
3550 {
3551 	struct net_device *old = slave->master;
3552 
3553 	ASSERT_RTNL();
3554 
3555 	if (master) {
3556 		if (old)
3557 			return -EBUSY;
3558 		dev_hold(master);
3559 	}
3560 
3561 	slave->master = master;
3562 
3563 	synchronize_net();
3564 
3565 	if (old)
3566 		dev_put(old);
3567 
3568 	if (master)
3569 		slave->flags |= IFF_SLAVE;
3570 	else
3571 		slave->flags &= ~IFF_SLAVE;
3572 
3573 	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
3574 	return 0;
3575 }
3576 EXPORT_SYMBOL(netdev_set_master);
3577 
3578 static void dev_change_rx_flags(struct net_device *dev, int flags)
3579 {
3580 	const struct net_device_ops *ops = dev->netdev_ops;
3581 
3582 	if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
3583 		ops->ndo_change_rx_flags(dev, flags);
3584 }
3585 
3586 static int __dev_set_promiscuity(struct net_device *dev, int inc)
3587 {
3588 	unsigned short old_flags = dev->flags;
3589 	uid_t uid;
3590 	gid_t gid;
3591 
3592 	ASSERT_RTNL();
3593 
3594 	dev->flags |= IFF_PROMISC;
3595 	dev->promiscuity += inc;
3596 	if (dev->promiscuity == 0) {
3597 		/*
3598 		 * Avoid overflow.
3599 		 * If inc causes overflow, untouch promisc and return error.
3600 		 */
3601 		if (inc < 0)
3602 			dev->flags &= ~IFF_PROMISC;
3603 		else {
3604 			dev->promiscuity -= inc;
3605 			printk(KERN_WARNING "%s: promiscuity touches roof, "
3606 				"set promiscuity failed, promiscuity feature "
3607 				"of device might be broken.\n", dev->name);
3608 			return -EOVERFLOW;
3609 		}
3610 	}
3611 	if (dev->flags != old_flags) {
3612 		printk(KERN_INFO "device %s %s promiscuous mode\n",
3613 		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
3614 							       "left");
3615 		if (audit_enabled) {
3616 			current_uid_gid(&uid, &gid);
3617 			audit_log(current->audit_context, GFP_ATOMIC,
3618 				AUDIT_ANOM_PROMISCUOUS,
3619 				"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
3620 				dev->name, (dev->flags & IFF_PROMISC),
3621 				(old_flags & IFF_PROMISC),
3622 				audit_get_loginuid(current),
3623 				uid, gid,
3624 				audit_get_sessionid(current));
3625 		}
3626 
3627 		dev_change_rx_flags(dev, IFF_PROMISC);
3628 	}
3629 	return 0;
3630 }
3631 
3632 /**
3633  *	dev_set_promiscuity	- update promiscuity count on a device
3634  *	@dev: device
3635  *	@inc: modifier
3636  *
3637  *	Add or remove promiscuity from a device. While the count in the device
3638  *	remains above zero the interface remains promiscuous. Once it hits zero
3639  *	the device reverts back to normal filtering operation. A negative inc
3640  *	value is used to drop promiscuity on the device.
3641  *	Return 0 if successful or a negative errno code on error.
3642  */
3643 int dev_set_promiscuity(struct net_device *dev, int inc)
3644 {
3645 	unsigned short old_flags = dev->flags;
3646 	int err;
3647 
3648 	err = __dev_set_promiscuity(dev, inc);
3649 	if (err < 0)
3650 		return err;
3651 	if (dev->flags != old_flags)
3652 		dev_set_rx_mode(dev);
3653 	return err;
3654 }
3655 EXPORT_SYMBOL(dev_set_promiscuity);
3656 
3657 /**
3658  *	dev_set_allmulti	- update allmulti count on a device
3659  *	@dev: device
3660  *	@inc: modifier
3661  *
3662  *	Add or remove reception of all multicast frames to a device. While the
3663  *	count in the device remains above zero the interface remains listening
3664  *	to all interfaces. Once it hits zero the device reverts back to normal
3665  *	filtering operation. A negative @inc value is used to drop the counter
3666  *	when releasing a resource needing all multicasts.
3667  *	Return 0 if successful or a negative errno code on error.
3668  */
3669 
3670 int dev_set_allmulti(struct net_device *dev, int inc)
3671 {
3672 	unsigned short old_flags = dev->flags;
3673 
3674 	ASSERT_RTNL();
3675 
3676 	dev->flags |= IFF_ALLMULTI;
3677 	dev->allmulti += inc;
3678 	if (dev->allmulti == 0) {
3679 		/*
3680 		 * Avoid overflow.
3681 		 * If inc causes overflow, untouch allmulti and return error.
3682 		 */
3683 		if (inc < 0)
3684 			dev->flags &= ~IFF_ALLMULTI;
3685 		else {
3686 			dev->allmulti -= inc;
3687 			printk(KERN_WARNING "%s: allmulti touches roof, "
3688 				"set allmulti failed, allmulti feature of "
3689 				"device might be broken.\n", dev->name);
3690 			return -EOVERFLOW;
3691 		}
3692 	}
3693 	if (dev->flags ^ old_flags) {
3694 		dev_change_rx_flags(dev, IFF_ALLMULTI);
3695 		dev_set_rx_mode(dev);
3696 	}
3697 	return 0;
3698 }
3699 EXPORT_SYMBOL(dev_set_allmulti);
3700 
3701 /*
3702  *	Upload unicast and multicast address lists to device and
3703  *	configure RX filtering. When the device doesn't support unicast
3704  *	filtering it is put in promiscuous mode while unicast addresses
3705  *	are present.
3706  */
3707 void __dev_set_rx_mode(struct net_device *dev)
3708 {
3709 	const struct net_device_ops *ops = dev->netdev_ops;
3710 
3711 	/* dev_open will call this function so the list will stay sane. */
3712 	if (!(dev->flags&IFF_UP))
3713 		return;
3714 
3715 	if (!netif_device_present(dev))
3716 		return;
3717 
3718 	if (ops->ndo_set_rx_mode)
3719 		ops->ndo_set_rx_mode(dev);
3720 	else {
3721 		/* Unicast addresses changes may only happen under the rtnl,
3722 		 * therefore calling __dev_set_promiscuity here is safe.
3723 		 */
3724 		if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
3725 			__dev_set_promiscuity(dev, 1);
3726 			dev->uc_promisc = 1;
3727 		} else if (netdev_uc_empty(dev) && dev->uc_promisc) {
3728 			__dev_set_promiscuity(dev, -1);
3729 			dev->uc_promisc = 0;
3730 		}
3731 
3732 		if (ops->ndo_set_multicast_list)
3733 			ops->ndo_set_multicast_list(dev);
3734 	}
3735 }
3736 
3737 void dev_set_rx_mode(struct net_device *dev)
3738 {
3739 	netif_addr_lock_bh(dev);
3740 	__dev_set_rx_mode(dev);
3741 	netif_addr_unlock_bh(dev);
3742 }
3743 
3744 /* hw addresses list handling functions */
3745 
3746 static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
3747 			 int addr_len, unsigned char addr_type)
3748 {
3749 	struct netdev_hw_addr *ha;
3750 	int alloc_size;
3751 
3752 	if (addr_len > MAX_ADDR_LEN)
3753 		return -EINVAL;
3754 
3755 	list_for_each_entry(ha, &list->list, list) {
3756 		if (!memcmp(ha->addr, addr, addr_len) &&
3757 		    ha->type == addr_type) {
3758 			ha->refcount++;
3759 			return 0;
3760 		}
3761 	}
3762 
3763 
3764 	alloc_size = sizeof(*ha);
3765 	if (alloc_size < L1_CACHE_BYTES)
3766 		alloc_size = L1_CACHE_BYTES;
3767 	ha = kmalloc(alloc_size, GFP_ATOMIC);
3768 	if (!ha)
3769 		return -ENOMEM;
3770 	memcpy(ha->addr, addr, addr_len);
3771 	ha->type = addr_type;
3772 	ha->refcount = 1;
3773 	ha->synced = false;
3774 	list_add_tail_rcu(&ha->list, &list->list);
3775 	list->count++;
3776 	return 0;
3777 }
3778 
3779 static void ha_rcu_free(struct rcu_head *head)
3780 {
3781 	struct netdev_hw_addr *ha;
3782 
3783 	ha = container_of(head, struct netdev_hw_addr, rcu_head);
3784 	kfree(ha);
3785 }
3786 
3787 static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
3788 			 int addr_len, unsigned char addr_type)
3789 {
3790 	struct netdev_hw_addr *ha;
3791 
3792 	list_for_each_entry(ha, &list->list, list) {
3793 		if (!memcmp(ha->addr, addr, addr_len) &&
3794 		    (ha->type == addr_type || !addr_type)) {
3795 			if (--ha->refcount)
3796 				return 0;
3797 			list_del_rcu(&ha->list);
3798 			call_rcu(&ha->rcu_head, ha_rcu_free);
3799 			list->count--;
3800 			return 0;
3801 		}
3802 	}
3803 	return -ENOENT;
3804 }
3805 
3806 static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
3807 				  struct netdev_hw_addr_list *from_list,
3808 				  int addr_len,
3809 				  unsigned char addr_type)
3810 {
3811 	int err;
3812 	struct netdev_hw_addr *ha, *ha2;
3813 	unsigned char type;
3814 
3815 	list_for_each_entry(ha, &from_list->list, list) {
3816 		type = addr_type ? addr_type : ha->type;
3817 		err = __hw_addr_add(to_list, ha->addr, addr_len, type);
3818 		if (err)
3819 			goto unroll;
3820 	}
3821 	return 0;
3822 
3823 unroll:
3824 	list_for_each_entry(ha2, &from_list->list, list) {
3825 		if (ha2 == ha)
3826 			break;
3827 		type = addr_type ? addr_type : ha2->type;
3828 		__hw_addr_del(to_list, ha2->addr, addr_len, type);
3829 	}
3830 	return err;
3831 }
3832 
3833 static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
3834 				   struct netdev_hw_addr_list *from_list,
3835 				   int addr_len,
3836 				   unsigned char addr_type)
3837 {
3838 	struct netdev_hw_addr *ha;
3839 	unsigned char type;
3840 
3841 	list_for_each_entry(ha, &from_list->list, list) {
3842 		type = addr_type ? addr_type : ha->type;
3843 		__hw_addr_del(to_list, ha->addr, addr_len, addr_type);
3844 	}
3845 }
3846 
3847 static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
3848 			  struct netdev_hw_addr_list *from_list,
3849 			  int addr_len)
3850 {
3851 	int err = 0;
3852 	struct netdev_hw_addr *ha, *tmp;
3853 
3854 	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
3855 		if (!ha->synced) {
3856 			err = __hw_addr_add(to_list, ha->addr,
3857 					    addr_len, ha->type);
3858 			if (err)
3859 				break;
3860 			ha->synced = true;
3861 			ha->refcount++;
3862 		} else if (ha->refcount == 1) {
3863 			__hw_addr_del(to_list, ha->addr, addr_len, ha->type);
3864 			__hw_addr_del(from_list, ha->addr, addr_len, ha->type);
3865 		}
3866 	}
3867 	return err;
3868 }
3869 
3870 static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
3871 			     struct netdev_hw_addr_list *from_list,
3872 			     int addr_len)
3873 {
3874 	struct netdev_hw_addr *ha, *tmp;
3875 
3876 	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
3877 		if (ha->synced) {
3878 			__hw_addr_del(to_list, ha->addr,
3879 				      addr_len, ha->type);
3880 			ha->synced = false;
3881 			__hw_addr_del(from_list, ha->addr,
3882 				      addr_len, ha->type);
3883 		}
3884 	}
3885 }
3886 
3887 static void __hw_addr_flush(struct netdev_hw_addr_list *list)
3888 {
3889 	struct netdev_hw_addr *ha, *tmp;
3890 
3891 	list_for_each_entry_safe(ha, tmp, &list->list, list) {
3892 		list_del_rcu(&ha->list);
3893 		call_rcu(&ha->rcu_head, ha_rcu_free);
3894 	}
3895 	list->count = 0;
3896 }
3897 
3898 static void __hw_addr_init(struct netdev_hw_addr_list *list)
3899 {
3900 	INIT_LIST_HEAD(&list->list);
3901 	list->count = 0;
3902 }
3903 
3904 /* Device addresses handling functions */
3905 
3906 static void dev_addr_flush(struct net_device *dev)
3907 {
3908 	/* rtnl_mutex must be held here */
3909 
3910 	__hw_addr_flush(&dev->dev_addrs);
3911 	dev->dev_addr = NULL;
3912 }
3913 
3914 static int dev_addr_init(struct net_device *dev)
3915 {
3916 	unsigned char addr[MAX_ADDR_LEN];
3917 	struct netdev_hw_addr *ha;
3918 	int err;
3919 
3920 	/* rtnl_mutex must be held here */
3921 
3922 	__hw_addr_init(&dev->dev_addrs);
3923 	memset(addr, 0, sizeof(addr));
3924 	err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
3925 			    NETDEV_HW_ADDR_T_LAN);
3926 	if (!err) {
3927 		/*
3928 		 * Get the first (previously created) address from the list
3929 		 * and set dev_addr pointer to this location.
3930 		 */
3931 		ha = list_first_entry(&dev->dev_addrs.list,
3932 				      struct netdev_hw_addr, list);
3933 		dev->dev_addr = ha->addr;
3934 	}
3935 	return err;
3936 }
3937 
3938 /**
3939  *	dev_addr_add	- Add a device address
3940  *	@dev: device
3941  *	@addr: address to add
3942  *	@addr_type: address type
3943  *
3944  *	Add a device address to the device or increase the reference count if
3945  *	it already exists.
3946  *
3947  *	The caller must hold the rtnl_mutex.
3948  */
3949 int dev_addr_add(struct net_device *dev, unsigned char *addr,
3950 		 unsigned char addr_type)
3951 {
3952 	int err;
3953 
3954 	ASSERT_RTNL();
3955 
3956 	err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
3957 	if (!err)
3958 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3959 	return err;
3960 }
3961 EXPORT_SYMBOL(dev_addr_add);
3962 
3963 /**
3964  *	dev_addr_del	- Release a device address.
3965  *	@dev: device
3966  *	@addr: address to delete
3967  *	@addr_type: address type
3968  *
3969  *	Release reference to a device address and remove it from the device
3970  *	if the reference count drops to zero.
3971  *
3972  *	The caller must hold the rtnl_mutex.
3973  */
3974 int dev_addr_del(struct net_device *dev, unsigned char *addr,
3975 		 unsigned char addr_type)
3976 {
3977 	int err;
3978 	struct netdev_hw_addr *ha;
3979 
3980 	ASSERT_RTNL();
3981 
3982 	/*
3983 	 * We can not remove the first address from the list because
3984 	 * dev->dev_addr points to that.
3985 	 */
3986 	ha = list_first_entry(&dev->dev_addrs.list,
3987 			      struct netdev_hw_addr, list);
3988 	if (ha->addr == dev->dev_addr && ha->refcount == 1)
3989 		return -ENOENT;
3990 
3991 	err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
3992 			    addr_type);
3993 	if (!err)
3994 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3995 	return err;
3996 }
3997 EXPORT_SYMBOL(dev_addr_del);
3998 
3999 /**
4000  *	dev_addr_add_multiple	- Add device addresses from another device
4001  *	@to_dev: device to which addresses will be added
4002  *	@from_dev: device from which addresses will be added
4003  *	@addr_type: address type - 0 means type will be used from from_dev
4004  *
4005  *	Add device addresses of the one device to another.
4006  **
4007  *	The caller must hold the rtnl_mutex.
4008  */
4009 int dev_addr_add_multiple(struct net_device *to_dev,
4010 			  struct net_device *from_dev,
4011 			  unsigned char addr_type)
4012 {
4013 	int err;
4014 
4015 	ASSERT_RTNL();
4016 
4017 	if (from_dev->addr_len != to_dev->addr_len)
4018 		return -EINVAL;
4019 	err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
4020 				     to_dev->addr_len, addr_type);
4021 	if (!err)
4022 		call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
4023 	return err;
4024 }
4025 EXPORT_SYMBOL(dev_addr_add_multiple);
4026 
4027 /**
4028  *	dev_addr_del_multiple	- Delete device addresses by another device
4029  *	@to_dev: device where the addresses will be deleted
4030  *	@from_dev: device by which addresses the addresses will be deleted
4031  *	@addr_type: address type - 0 means type will used from from_dev
4032  *
4033  *	Deletes addresses in to device by the list of addresses in from device.
4034  *
4035  *	The caller must hold the rtnl_mutex.
4036  */
4037 int dev_addr_del_multiple(struct net_device *to_dev,
4038 			  struct net_device *from_dev,
4039 			  unsigned char addr_type)
4040 {
4041 	ASSERT_RTNL();
4042 
4043 	if (from_dev->addr_len != to_dev->addr_len)
4044 		return -EINVAL;
4045 	__hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
4046 			       to_dev->addr_len, addr_type);
4047 	call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
4048 	return 0;
4049 }
4050 EXPORT_SYMBOL(dev_addr_del_multiple);
4051 
4052 /* multicast addresses handling functions */
4053 
4054 int __dev_addr_delete(struct dev_addr_list **list, int *count,
4055 		      void *addr, int alen, int glbl)
4056 {
4057 	struct dev_addr_list *da;
4058 
4059 	for (; (da = *list) != NULL; list = &da->next) {
4060 		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
4061 		    alen == da->da_addrlen) {
4062 			if (glbl) {
4063 				int old_glbl = da->da_gusers;
4064 				da->da_gusers = 0;
4065 				if (old_glbl == 0)
4066 					break;
4067 			}
4068 			if (--da->da_users)
4069 				return 0;
4070 
4071 			*list = da->next;
4072 			kfree(da);
4073 			(*count)--;
4074 			return 0;
4075 		}
4076 	}
4077 	return -ENOENT;
4078 }
4079 
4080 int __dev_addr_add(struct dev_addr_list **list, int *count,
4081 		   void *addr, int alen, int glbl)
4082 {
4083 	struct dev_addr_list *da;
4084 
4085 	for (da = *list; da != NULL; da = da->next) {
4086 		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
4087 		    da->da_addrlen == alen) {
4088 			if (glbl) {
4089 				int old_glbl = da->da_gusers;
4090 				da->da_gusers = 1;
4091 				if (old_glbl)
4092 					return 0;
4093 			}
4094 			da->da_users++;
4095 			return 0;
4096 		}
4097 	}
4098 
4099 	da = kzalloc(sizeof(*da), GFP_ATOMIC);
4100 	if (da == NULL)
4101 		return -ENOMEM;
4102 	memcpy(da->da_addr, addr, alen);
4103 	da->da_addrlen = alen;
4104 	da->da_users = 1;
4105 	da->da_gusers = glbl ? 1 : 0;
4106 	da->next = *list;
4107 	*list = da;
4108 	(*count)++;
4109 	return 0;
4110 }
4111 
4112 /**
4113  *	dev_unicast_delete	- Release secondary unicast address.
4114  *	@dev: device
4115  *	@addr: address to delete
4116  *
4117  *	Release reference to a secondary unicast address and remove it
4118  *	from the device if the reference count drops to zero.
4119  *
4120  * 	The caller must hold the rtnl_mutex.
4121  */
4122 int dev_unicast_delete(struct net_device *dev, void *addr)
4123 {
4124 	int err;
4125 
4126 	ASSERT_RTNL();
4127 
4128 	netif_addr_lock_bh(dev);
4129 	err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
4130 			    NETDEV_HW_ADDR_T_UNICAST);
4131 	if (!err)
4132 		__dev_set_rx_mode(dev);
4133 	netif_addr_unlock_bh(dev);
4134 	return err;
4135 }
4136 EXPORT_SYMBOL(dev_unicast_delete);
4137 
4138 /**
4139  *	dev_unicast_add		- add a secondary unicast address
4140  *	@dev: device
4141  *	@addr: address to add
4142  *
4143  *	Add a secondary unicast address to the device or increase
4144  *	the reference count if it already exists.
4145  *
4146  *	The caller must hold the rtnl_mutex.
4147  */
4148 int dev_unicast_add(struct net_device *dev, void *addr)
4149 {
4150 	int err;
4151 
4152 	ASSERT_RTNL();
4153 
4154 	netif_addr_lock_bh(dev);
4155 	err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
4156 			    NETDEV_HW_ADDR_T_UNICAST);
4157 	if (!err)
4158 		__dev_set_rx_mode(dev);
4159 	netif_addr_unlock_bh(dev);
4160 	return err;
4161 }
4162 EXPORT_SYMBOL(dev_unicast_add);
4163 
4164 int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
4165 		    struct dev_addr_list **from, int *from_count)
4166 {
4167 	struct dev_addr_list *da, *next;
4168 	int err = 0;
4169 
4170 	da = *from;
4171 	while (da != NULL) {
4172 		next = da->next;
4173 		if (!da->da_synced) {
4174 			err = __dev_addr_add(to, to_count,
4175 					     da->da_addr, da->da_addrlen, 0);
4176 			if (err < 0)
4177 				break;
4178 			da->da_synced = 1;
4179 			da->da_users++;
4180 		} else if (da->da_users == 1) {
4181 			__dev_addr_delete(to, to_count,
4182 					  da->da_addr, da->da_addrlen, 0);
4183 			__dev_addr_delete(from, from_count,
4184 					  da->da_addr, da->da_addrlen, 0);
4185 		}
4186 		da = next;
4187 	}
4188 	return err;
4189 }
4190 EXPORT_SYMBOL_GPL(__dev_addr_sync);
4191 
4192 void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
4193 		       struct dev_addr_list **from, int *from_count)
4194 {
4195 	struct dev_addr_list *da, *next;
4196 
4197 	da = *from;
4198 	while (da != NULL) {
4199 		next = da->next;
4200 		if (da->da_synced) {
4201 			__dev_addr_delete(to, to_count,
4202 					  da->da_addr, da->da_addrlen, 0);
4203 			da->da_synced = 0;
4204 			__dev_addr_delete(from, from_count,
4205 					  da->da_addr, da->da_addrlen, 0);
4206 		}
4207 		da = next;
4208 	}
4209 }
4210 EXPORT_SYMBOL_GPL(__dev_addr_unsync);
4211 
4212 /**
4213  *	dev_unicast_sync - Synchronize device's unicast list to another device
4214  *	@to: destination device
4215  *	@from: source device
4216  *
4217  *	Add newly added addresses to the destination device and release
4218  *	addresses that have no users left. The source device must be
4219  *	locked by netif_tx_lock_bh.
4220  *
4221  *	This function is intended to be called from the dev->set_rx_mode
4222  *	function of layered software devices.
4223  */
4224 int dev_unicast_sync(struct net_device *to, struct net_device *from)
4225 {
4226 	int err = 0;
4227 
4228 	if (to->addr_len != from->addr_len)
4229 		return -EINVAL;
4230 
4231 	netif_addr_lock_bh(to);
4232 	err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
4233 	if (!err)
4234 		__dev_set_rx_mode(to);
4235 	netif_addr_unlock_bh(to);
4236 	return err;
4237 }
4238 EXPORT_SYMBOL(dev_unicast_sync);
4239 
4240 /**
4241  *	dev_unicast_unsync - Remove synchronized addresses from the destination device
4242  *	@to: destination device
4243  *	@from: source device
4244  *
4245  *	Remove all addresses that were added to the destination device by
4246  *	dev_unicast_sync(). This function is intended to be called from the
4247  *	dev->stop function of layered software devices.
4248  */
4249 void dev_unicast_unsync(struct net_device *to, struct net_device *from)
4250 {
4251 	if (to->addr_len != from->addr_len)
4252 		return;
4253 
4254 	netif_addr_lock_bh(from);
4255 	netif_addr_lock(to);
4256 	__hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
4257 	__dev_set_rx_mode(to);
4258 	netif_addr_unlock(to);
4259 	netif_addr_unlock_bh(from);
4260 }
4261 EXPORT_SYMBOL(dev_unicast_unsync);
4262 
4263 static void dev_unicast_flush(struct net_device *dev)
4264 {
4265 	netif_addr_lock_bh(dev);
4266 	__hw_addr_flush(&dev->uc);
4267 	netif_addr_unlock_bh(dev);
4268 }
4269 
4270 static void dev_unicast_init(struct net_device *dev)
4271 {
4272 	__hw_addr_init(&dev->uc);
4273 }
4274 
4275 
4276 static void __dev_addr_discard(struct dev_addr_list **list)
4277 {
4278 	struct dev_addr_list *tmp;
4279 
4280 	while (*list != NULL) {
4281 		tmp = *list;
4282 		*list = tmp->next;
4283 		if (tmp->da_users > tmp->da_gusers)
4284 			printk("__dev_addr_discard: address leakage! "
4285 			       "da_users=%d\n", tmp->da_users);
4286 		kfree(tmp);
4287 	}
4288 }
4289 
4290 static void dev_addr_discard(struct net_device *dev)
4291 {
4292 	netif_addr_lock_bh(dev);
4293 
4294 	__dev_addr_discard(&dev->mc_list);
4295 	netdev_mc_count(dev) = 0;
4296 
4297 	netif_addr_unlock_bh(dev);
4298 }
4299 
4300 /**
4301  *	dev_get_flags - get flags reported to userspace
4302  *	@dev: device
4303  *
4304  *	Get the combination of flag bits exported through APIs to userspace.
4305  */
4306 unsigned dev_get_flags(const struct net_device *dev)
4307 {
4308 	unsigned flags;
4309 
4310 	flags = (dev->flags & ~(IFF_PROMISC |
4311 				IFF_ALLMULTI |
4312 				IFF_RUNNING |
4313 				IFF_LOWER_UP |
4314 				IFF_DORMANT)) |
4315 		(dev->gflags & (IFF_PROMISC |
4316 				IFF_ALLMULTI));
4317 
4318 	if (netif_running(dev)) {
4319 		if (netif_oper_up(dev))
4320 			flags |= IFF_RUNNING;
4321 		if (netif_carrier_ok(dev))
4322 			flags |= IFF_LOWER_UP;
4323 		if (netif_dormant(dev))
4324 			flags |= IFF_DORMANT;
4325 	}
4326 
4327 	return flags;
4328 }
4329 EXPORT_SYMBOL(dev_get_flags);
4330 
4331 int __dev_change_flags(struct net_device *dev, unsigned int flags)
4332 {
4333 	int old_flags = dev->flags;
4334 	int ret;
4335 
4336 	ASSERT_RTNL();
4337 
4338 	/*
4339 	 *	Set the flags on our device.
4340 	 */
4341 
4342 	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
4343 			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
4344 			       IFF_AUTOMEDIA)) |
4345 		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
4346 				    IFF_ALLMULTI));
4347 
4348 	/*
4349 	 *	Load in the correct multicast list now the flags have changed.
4350 	 */
4351 
4352 	if ((old_flags ^ flags) & IFF_MULTICAST)
4353 		dev_change_rx_flags(dev, IFF_MULTICAST);
4354 
4355 	dev_set_rx_mode(dev);
4356 
4357 	/*
4358 	 *	Have we downed the interface. We handle IFF_UP ourselves
4359 	 *	according to user attempts to set it, rather than blindly
4360 	 *	setting it.
4361 	 */
4362 
4363 	ret = 0;
4364 	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
4365 		ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
4366 
4367 		if (!ret)
4368 			dev_set_rx_mode(dev);
4369 	}
4370 
4371 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
4372 		int inc = (flags & IFF_PROMISC) ? 1 : -1;
4373 
4374 		dev->gflags ^= IFF_PROMISC;
4375 		dev_set_promiscuity(dev, inc);
4376 	}
4377 
4378 	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
4379 	   is important. Some (broken) drivers set IFF_PROMISC, when
4380 	   IFF_ALLMULTI is requested not asking us and not reporting.
4381 	 */
4382 	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
4383 		int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
4384 
4385 		dev->gflags ^= IFF_ALLMULTI;
4386 		dev_set_allmulti(dev, inc);
4387 	}
4388 
4389 	return ret;
4390 }
4391 
4392 void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
4393 {
4394 	unsigned int changes = dev->flags ^ old_flags;
4395 
4396 	if (changes & IFF_UP) {
4397 		if (dev->flags & IFF_UP)
4398 			call_netdevice_notifiers(NETDEV_UP, dev);
4399 		else
4400 			call_netdevice_notifiers(NETDEV_DOWN, dev);
4401 	}
4402 
4403 	if (dev->flags & IFF_UP &&
4404 	    (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE)))
4405 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
4406 }
4407 
4408 /**
4409  *	dev_change_flags - change device settings
4410  *	@dev: device
4411  *	@flags: device state flags
4412  *
4413  *	Change settings on device based state flags. The flags are
4414  *	in the userspace exported format.
4415  */
4416 int dev_change_flags(struct net_device *dev, unsigned flags)
4417 {
4418 	int ret, changes;
4419 	int old_flags = dev->flags;
4420 
4421 	ret = __dev_change_flags(dev, flags);
4422 	if (ret < 0)
4423 		return ret;
4424 
4425 	changes = old_flags ^ dev->flags;
4426 	if (changes)
4427 		rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
4428 
4429 	__dev_notify_flags(dev, old_flags);
4430 	return ret;
4431 }
4432 EXPORT_SYMBOL(dev_change_flags);
4433 
4434 /**
4435  *	dev_set_mtu - Change maximum transfer unit
4436  *	@dev: device
4437  *	@new_mtu: new transfer unit
4438  *
4439  *	Change the maximum transfer size of the network device.
4440  */
4441 int dev_set_mtu(struct net_device *dev, int new_mtu)
4442 {
4443 	const struct net_device_ops *ops = dev->netdev_ops;
4444 	int err;
4445 
4446 	if (new_mtu == dev->mtu)
4447 		return 0;
4448 
4449 	/*	MTU must be positive.	 */
4450 	if (new_mtu < 0)
4451 		return -EINVAL;
4452 
4453 	if (!netif_device_present(dev))
4454 		return -ENODEV;
4455 
4456 	err = 0;
4457 	if (ops->ndo_change_mtu)
4458 		err = ops->ndo_change_mtu(dev, new_mtu);
4459 	else
4460 		dev->mtu = new_mtu;
4461 
4462 	if (!err && dev->flags & IFF_UP)
4463 		call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
4464 	return err;
4465 }
4466 EXPORT_SYMBOL(dev_set_mtu);
4467 
4468 /**
4469  *	dev_set_mac_address - Change Media Access Control Address
4470  *	@dev: device
4471  *	@sa: new address
4472  *
4473  *	Change the hardware (MAC) address of the device
4474  */
4475 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
4476 {
4477 	const struct net_device_ops *ops = dev->netdev_ops;
4478 	int err;
4479 
4480 	if (!ops->ndo_set_mac_address)
4481 		return -EOPNOTSUPP;
4482 	if (sa->sa_family != dev->type)
4483 		return -EINVAL;
4484 	if (!netif_device_present(dev))
4485 		return -ENODEV;
4486 	err = ops->ndo_set_mac_address(dev, sa);
4487 	if (!err)
4488 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4489 	return err;
4490 }
4491 EXPORT_SYMBOL(dev_set_mac_address);
4492 
4493 /*
4494  *	Perform the SIOCxIFxxx calls, inside rcu_read_lock()
4495  */
4496 static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
4497 {
4498 	int err;
4499 	struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
4500 
4501 	if (!dev)
4502 		return -ENODEV;
4503 
4504 	switch (cmd) {
4505 	case SIOCGIFFLAGS:	/* Get interface flags */
4506 		ifr->ifr_flags = (short) dev_get_flags(dev);
4507 		return 0;
4508 
4509 	case SIOCGIFMETRIC:	/* Get the metric on the interface
4510 				   (currently unused) */
4511 		ifr->ifr_metric = 0;
4512 		return 0;
4513 
4514 	case SIOCGIFMTU:	/* Get the MTU of a device */
4515 		ifr->ifr_mtu = dev->mtu;
4516 		return 0;
4517 
4518 	case SIOCGIFHWADDR:
4519 		if (!dev->addr_len)
4520 			memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
4521 		else
4522 			memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
4523 			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4524 		ifr->ifr_hwaddr.sa_family = dev->type;
4525 		return 0;
4526 
4527 	case SIOCGIFSLAVE:
4528 		err = -EINVAL;
4529 		break;
4530 
4531 	case SIOCGIFMAP:
4532 		ifr->ifr_map.mem_start = dev->mem_start;
4533 		ifr->ifr_map.mem_end   = dev->mem_end;
4534 		ifr->ifr_map.base_addr = dev->base_addr;
4535 		ifr->ifr_map.irq       = dev->irq;
4536 		ifr->ifr_map.dma       = dev->dma;
4537 		ifr->ifr_map.port      = dev->if_port;
4538 		return 0;
4539 
4540 	case SIOCGIFINDEX:
4541 		ifr->ifr_ifindex = dev->ifindex;
4542 		return 0;
4543 
4544 	case SIOCGIFTXQLEN:
4545 		ifr->ifr_qlen = dev->tx_queue_len;
4546 		return 0;
4547 
4548 	default:
4549 		/* dev_ioctl() should ensure this case
4550 		 * is never reached
4551 		 */
4552 		WARN_ON(1);
4553 		err = -EINVAL;
4554 		break;
4555 
4556 	}
4557 	return err;
4558 }
4559 
4560 /*
4561  *	Perform the SIOCxIFxxx calls, inside rtnl_lock()
4562  */
4563 static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4564 {
4565 	int err;
4566 	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
4567 	const struct net_device_ops *ops;
4568 
4569 	if (!dev)
4570 		return -ENODEV;
4571 
4572 	ops = dev->netdev_ops;
4573 
4574 	switch (cmd) {
4575 	case SIOCSIFFLAGS:	/* Set interface flags */
4576 		return dev_change_flags(dev, ifr->ifr_flags);
4577 
4578 	case SIOCSIFMETRIC:	/* Set the metric on the interface
4579 				   (currently unused) */
4580 		return -EOPNOTSUPP;
4581 
4582 	case SIOCSIFMTU:	/* Set the MTU of a device */
4583 		return dev_set_mtu(dev, ifr->ifr_mtu);
4584 
4585 	case SIOCSIFHWADDR:
4586 		return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
4587 
4588 	case SIOCSIFHWBROADCAST:
4589 		if (ifr->ifr_hwaddr.sa_family != dev->type)
4590 			return -EINVAL;
4591 		memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
4592 		       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4593 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4594 		return 0;
4595 
4596 	case SIOCSIFMAP:
4597 		if (ops->ndo_set_config) {
4598 			if (!netif_device_present(dev))
4599 				return -ENODEV;
4600 			return ops->ndo_set_config(dev, &ifr->ifr_map);
4601 		}
4602 		return -EOPNOTSUPP;
4603 
4604 	case SIOCADDMULTI:
4605 		if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
4606 		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4607 			return -EINVAL;
4608 		if (!netif_device_present(dev))
4609 			return -ENODEV;
4610 		return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
4611 				  dev->addr_len, 1);
4612 
4613 	case SIOCDELMULTI:
4614 		if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
4615 		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4616 			return -EINVAL;
4617 		if (!netif_device_present(dev))
4618 			return -ENODEV;
4619 		return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
4620 				     dev->addr_len, 1);
4621 
4622 	case SIOCSIFTXQLEN:
4623 		if (ifr->ifr_qlen < 0)
4624 			return -EINVAL;
4625 		dev->tx_queue_len = ifr->ifr_qlen;
4626 		return 0;
4627 
4628 	case SIOCSIFNAME:
4629 		ifr->ifr_newname[IFNAMSIZ-1] = '\0';
4630 		return dev_change_name(dev, ifr->ifr_newname);
4631 
4632 	/*
4633 	 *	Unknown or private ioctl
4634 	 */
4635 	default:
4636 		if ((cmd >= SIOCDEVPRIVATE &&
4637 		    cmd <= SIOCDEVPRIVATE + 15) ||
4638 		    cmd == SIOCBONDENSLAVE ||
4639 		    cmd == SIOCBONDRELEASE ||
4640 		    cmd == SIOCBONDSETHWADDR ||
4641 		    cmd == SIOCBONDSLAVEINFOQUERY ||
4642 		    cmd == SIOCBONDINFOQUERY ||
4643 		    cmd == SIOCBONDCHANGEACTIVE ||
4644 		    cmd == SIOCGMIIPHY ||
4645 		    cmd == SIOCGMIIREG ||
4646 		    cmd == SIOCSMIIREG ||
4647 		    cmd == SIOCBRADDIF ||
4648 		    cmd == SIOCBRDELIF ||
4649 		    cmd == SIOCSHWTSTAMP ||
4650 		    cmd == SIOCWANDEV) {
4651 			err = -EOPNOTSUPP;
4652 			if (ops->ndo_do_ioctl) {
4653 				if (netif_device_present(dev))
4654 					err = ops->ndo_do_ioctl(dev, ifr, cmd);
4655 				else
4656 					err = -ENODEV;
4657 			}
4658 		} else
4659 			err = -EINVAL;
4660 
4661 	}
4662 	return err;
4663 }
4664 
4665 /*
4666  *	This function handles all "interface"-type I/O control requests. The actual
4667  *	'doing' part of this is dev_ifsioc above.
4668  */
4669 
4670 /**
4671  *	dev_ioctl	-	network device ioctl
4672  *	@net: the applicable net namespace
4673  *	@cmd: command to issue
4674  *	@arg: pointer to a struct ifreq in user space
4675  *
4676  *	Issue ioctl functions to devices. This is normally called by the
4677  *	user space syscall interfaces but can sometimes be useful for
4678  *	other purposes. The return value is the return from the syscall if
4679  *	positive or a negative errno code on error.
4680  */
4681 
4682 int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
4683 {
4684 	struct ifreq ifr;
4685 	int ret;
4686 	char *colon;
4687 
4688 	/* One special case: SIOCGIFCONF takes ifconf argument
4689 	   and requires shared lock, because it sleeps writing
4690 	   to user space.
4691 	 */
4692 
4693 	if (cmd == SIOCGIFCONF) {
4694 		rtnl_lock();
4695 		ret = dev_ifconf(net, (char __user *) arg);
4696 		rtnl_unlock();
4697 		return ret;
4698 	}
4699 	if (cmd == SIOCGIFNAME)
4700 		return dev_ifname(net, (struct ifreq __user *)arg);
4701 
4702 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
4703 		return -EFAULT;
4704 
4705 	ifr.ifr_name[IFNAMSIZ-1] = 0;
4706 
4707 	colon = strchr(ifr.ifr_name, ':');
4708 	if (colon)
4709 		*colon = 0;
4710 
4711 	/*
4712 	 *	See which interface the caller is talking about.
4713 	 */
4714 
4715 	switch (cmd) {
4716 	/*
4717 	 *	These ioctl calls:
4718 	 *	- can be done by all.
4719 	 *	- atomic and do not require locking.
4720 	 *	- return a value
4721 	 */
4722 	case SIOCGIFFLAGS:
4723 	case SIOCGIFMETRIC:
4724 	case SIOCGIFMTU:
4725 	case SIOCGIFHWADDR:
4726 	case SIOCGIFSLAVE:
4727 	case SIOCGIFMAP:
4728 	case SIOCGIFINDEX:
4729 	case SIOCGIFTXQLEN:
4730 		dev_load(net, ifr.ifr_name);
4731 		rcu_read_lock();
4732 		ret = dev_ifsioc_locked(net, &ifr, cmd);
4733 		rcu_read_unlock();
4734 		if (!ret) {
4735 			if (colon)
4736 				*colon = ':';
4737 			if (copy_to_user(arg, &ifr,
4738 					 sizeof(struct ifreq)))
4739 				ret = -EFAULT;
4740 		}
4741 		return ret;
4742 
4743 	case SIOCETHTOOL:
4744 		dev_load(net, ifr.ifr_name);
4745 		rtnl_lock();
4746 		ret = dev_ethtool(net, &ifr);
4747 		rtnl_unlock();
4748 		if (!ret) {
4749 			if (colon)
4750 				*colon = ':';
4751 			if (copy_to_user(arg, &ifr,
4752 					 sizeof(struct ifreq)))
4753 				ret = -EFAULT;
4754 		}
4755 		return ret;
4756 
4757 	/*
4758 	 *	These ioctl calls:
4759 	 *	- require superuser power.
4760 	 *	- require strict serialization.
4761 	 *	- return a value
4762 	 */
4763 	case SIOCGMIIPHY:
4764 	case SIOCGMIIREG:
4765 	case SIOCSIFNAME:
4766 		if (!capable(CAP_NET_ADMIN))
4767 			return -EPERM;
4768 		dev_load(net, ifr.ifr_name);
4769 		rtnl_lock();
4770 		ret = dev_ifsioc(net, &ifr, cmd);
4771 		rtnl_unlock();
4772 		if (!ret) {
4773 			if (colon)
4774 				*colon = ':';
4775 			if (copy_to_user(arg, &ifr,
4776 					 sizeof(struct ifreq)))
4777 				ret = -EFAULT;
4778 		}
4779 		return ret;
4780 
4781 	/*
4782 	 *	These ioctl calls:
4783 	 *	- require superuser power.
4784 	 *	- require strict serialization.
4785 	 *	- do not return a value
4786 	 */
4787 	case SIOCSIFFLAGS:
4788 	case SIOCSIFMETRIC:
4789 	case SIOCSIFMTU:
4790 	case SIOCSIFMAP:
4791 	case SIOCSIFHWADDR:
4792 	case SIOCSIFSLAVE:
4793 	case SIOCADDMULTI:
4794 	case SIOCDELMULTI:
4795 	case SIOCSIFHWBROADCAST:
4796 	case SIOCSIFTXQLEN:
4797 	case SIOCSMIIREG:
4798 	case SIOCBONDENSLAVE:
4799 	case SIOCBONDRELEASE:
4800 	case SIOCBONDSETHWADDR:
4801 	case SIOCBONDCHANGEACTIVE:
4802 	case SIOCBRADDIF:
4803 	case SIOCBRDELIF:
4804 	case SIOCSHWTSTAMP:
4805 		if (!capable(CAP_NET_ADMIN))
4806 			return -EPERM;
4807 		/* fall through */
4808 	case SIOCBONDSLAVEINFOQUERY:
4809 	case SIOCBONDINFOQUERY:
4810 		dev_load(net, ifr.ifr_name);
4811 		rtnl_lock();
4812 		ret = dev_ifsioc(net, &ifr, cmd);
4813 		rtnl_unlock();
4814 		return ret;
4815 
4816 	case SIOCGIFMEM:
4817 		/* Get the per device memory space. We can add this but
4818 		 * currently do not support it */
4819 	case SIOCSIFMEM:
4820 		/* Set the per device memory buffer space.
4821 		 * Not applicable in our case */
4822 	case SIOCSIFLINK:
4823 		return -EINVAL;
4824 
4825 	/*
4826 	 *	Unknown or private ioctl.
4827 	 */
4828 	default:
4829 		if (cmd == SIOCWANDEV ||
4830 		    (cmd >= SIOCDEVPRIVATE &&
4831 		     cmd <= SIOCDEVPRIVATE + 15)) {
4832 			dev_load(net, ifr.ifr_name);
4833 			rtnl_lock();
4834 			ret = dev_ifsioc(net, &ifr, cmd);
4835 			rtnl_unlock();
4836 			if (!ret && copy_to_user(arg, &ifr,
4837 						 sizeof(struct ifreq)))
4838 				ret = -EFAULT;
4839 			return ret;
4840 		}
4841 		/* Take care of Wireless Extensions */
4842 		if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
4843 			return wext_handle_ioctl(net, &ifr, cmd, arg);
4844 		return -EINVAL;
4845 	}
4846 }
4847 
4848 
4849 /**
4850  *	dev_new_index	-	allocate an ifindex
4851  *	@net: the applicable net namespace
4852  *
4853  *	Returns a suitable unique value for a new device interface
4854  *	number.  The caller must hold the rtnl semaphore or the
4855  *	dev_base_lock to be sure it remains unique.
4856  */
4857 static int dev_new_index(struct net *net)
4858 {
4859 	static int ifindex;
4860 	for (;;) {
4861 		if (++ifindex <= 0)
4862 			ifindex = 1;
4863 		if (!__dev_get_by_index(net, ifindex))
4864 			return ifindex;
4865 	}
4866 }
4867 
4868 /* Delayed registration/unregisteration */
4869 static LIST_HEAD(net_todo_list);
4870 
4871 static void net_set_todo(struct net_device *dev)
4872 {
4873 	list_add_tail(&dev->todo_list, &net_todo_list);
4874 }
4875 
4876 static void rollback_registered_many(struct list_head *head)
4877 {
4878 	struct net_device *dev, *tmp;
4879 
4880 	BUG_ON(dev_boot_phase);
4881 	ASSERT_RTNL();
4882 
4883 	list_for_each_entry_safe(dev, tmp, head, unreg_list) {
4884 		/* Some devices call without registering
4885 		 * for initialization unwind. Remove those
4886 		 * devices and proceed with the remaining.
4887 		 */
4888 		if (dev->reg_state == NETREG_UNINITIALIZED) {
4889 			pr_debug("unregister_netdevice: device %s/%p never "
4890 				 "was registered\n", dev->name, dev);
4891 
4892 			WARN_ON(1);
4893 			list_del(&dev->unreg_list);
4894 			continue;
4895 		}
4896 
4897 		BUG_ON(dev->reg_state != NETREG_REGISTERED);
4898 
4899 		/* If device is running, close it first. */
4900 		dev_close(dev);
4901 
4902 		/* And unlink it from device chain. */
4903 		unlist_netdevice(dev);
4904 
4905 		dev->reg_state = NETREG_UNREGISTERING;
4906 	}
4907 
4908 	synchronize_net();
4909 
4910 	list_for_each_entry(dev, head, unreg_list) {
4911 		/* Shutdown queueing discipline. */
4912 		dev_shutdown(dev);
4913 
4914 
4915 		/* Notify protocols, that we are about to destroy
4916 		   this device. They should clean all the things.
4917 		*/
4918 		call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4919 
4920 		if (!dev->rtnl_link_ops ||
4921 		    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
4922 			rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
4923 
4924 		/*
4925 		 *	Flush the unicast and multicast chains
4926 		 */
4927 		dev_unicast_flush(dev);
4928 		dev_addr_discard(dev);
4929 
4930 		if (dev->netdev_ops->ndo_uninit)
4931 			dev->netdev_ops->ndo_uninit(dev);
4932 
4933 		/* Notifier chain MUST detach us from master device. */
4934 		WARN_ON(dev->master);
4935 
4936 		/* Remove entries from kobject tree */
4937 		netdev_unregister_kobject(dev);
4938 	}
4939 
4940 	/* Process any work delayed until the end of the batch */
4941 	dev = list_first_entry(head, struct net_device, unreg_list);
4942 	call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
4943 
4944 	synchronize_net();
4945 
4946 	list_for_each_entry(dev, head, unreg_list)
4947 		dev_put(dev);
4948 }
4949 
4950 static void rollback_registered(struct net_device *dev)
4951 {
4952 	LIST_HEAD(single);
4953 
4954 	list_add(&dev->unreg_list, &single);
4955 	rollback_registered_many(&single);
4956 }
4957 
4958 static void __netdev_init_queue_locks_one(struct net_device *dev,
4959 					  struct netdev_queue *dev_queue,
4960 					  void *_unused)
4961 {
4962 	spin_lock_init(&dev_queue->_xmit_lock);
4963 	netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
4964 	dev_queue->xmit_lock_owner = -1;
4965 }
4966 
4967 static void netdev_init_queue_locks(struct net_device *dev)
4968 {
4969 	netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
4970 	__netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
4971 }
4972 
4973 unsigned long netdev_fix_features(unsigned long features, const char *name)
4974 {
4975 	/* Fix illegal SG+CSUM combinations. */
4976 	if ((features & NETIF_F_SG) &&
4977 	    !(features & NETIF_F_ALL_CSUM)) {
4978 		if (name)
4979 			printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
4980 			       "checksum feature.\n", name);
4981 		features &= ~NETIF_F_SG;
4982 	}
4983 
4984 	/* TSO requires that SG is present as well. */
4985 	if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
4986 		if (name)
4987 			printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
4988 			       "SG feature.\n", name);
4989 		features &= ~NETIF_F_TSO;
4990 	}
4991 
4992 	if (features & NETIF_F_UFO) {
4993 		if (!(features & NETIF_F_GEN_CSUM)) {
4994 			if (name)
4995 				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4996 				       "since no NETIF_F_HW_CSUM feature.\n",
4997 				       name);
4998 			features &= ~NETIF_F_UFO;
4999 		}
5000 
5001 		if (!(features & NETIF_F_SG)) {
5002 			if (name)
5003 				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
5004 				       "since no NETIF_F_SG feature.\n", name);
5005 			features &= ~NETIF_F_UFO;
5006 		}
5007 	}
5008 
5009 	return features;
5010 }
5011 EXPORT_SYMBOL(netdev_fix_features);
5012 
5013 /**
5014  *	netif_stacked_transfer_operstate -	transfer operstate
5015  *	@rootdev: the root or lower level device to transfer state from
5016  *	@dev: the device to transfer operstate to
5017  *
5018  *	Transfer operational state from root to device. This is normally
5019  *	called when a stacking relationship exists between the root
5020  *	device and the device(a leaf device).
5021  */
5022 void netif_stacked_transfer_operstate(const struct net_device *rootdev,
5023 					struct net_device *dev)
5024 {
5025 	if (rootdev->operstate == IF_OPER_DORMANT)
5026 		netif_dormant_on(dev);
5027 	else
5028 		netif_dormant_off(dev);
5029 
5030 	if (netif_carrier_ok(rootdev)) {
5031 		if (!netif_carrier_ok(dev))
5032 			netif_carrier_on(dev);
5033 	} else {
5034 		if (netif_carrier_ok(dev))
5035 			netif_carrier_off(dev);
5036 	}
5037 }
5038 EXPORT_SYMBOL(netif_stacked_transfer_operstate);
5039 
5040 /**
5041  *	register_netdevice	- register a network device
5042  *	@dev: device to register
5043  *
5044  *	Take a completed network device structure and add it to the kernel
5045  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
5046  *	chain. 0 is returned on success. A negative errno code is returned
5047  *	on a failure to set up the device, or if the name is a duplicate.
5048  *
5049  *	Callers must hold the rtnl semaphore. You may want
5050  *	register_netdev() instead of this.
5051  *
5052  *	BUGS:
5053  *	The locking appears insufficient to guarantee two parallel registers
5054  *	will not get the same name.
5055  */
5056 
5057 int register_netdevice(struct net_device *dev)
5058 {
5059 	int ret;
5060 	struct net *net = dev_net(dev);
5061 
5062 	BUG_ON(dev_boot_phase);
5063 	ASSERT_RTNL();
5064 
5065 	might_sleep();
5066 
5067 	/* When net_device's are persistent, this will be fatal. */
5068 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
5069 	BUG_ON(!net);
5070 
5071 	spin_lock_init(&dev->addr_list_lock);
5072 	netdev_set_addr_lockdep_class(dev);
5073 	netdev_init_queue_locks(dev);
5074 
5075 	dev->iflink = -1;
5076 
5077 	/* Init, if this function is available */
5078 	if (dev->netdev_ops->ndo_init) {
5079 		ret = dev->netdev_ops->ndo_init(dev);
5080 		if (ret) {
5081 			if (ret > 0)
5082 				ret = -EIO;
5083 			goto out;
5084 		}
5085 	}
5086 
5087 	ret = dev_get_valid_name(net, dev->name, dev->name, 0);
5088 	if (ret)
5089 		goto err_uninit;
5090 
5091 	dev->ifindex = dev_new_index(net);
5092 	if (dev->iflink == -1)
5093 		dev->iflink = dev->ifindex;
5094 
5095 	/* Fix illegal checksum combinations */
5096 	if ((dev->features & NETIF_F_HW_CSUM) &&
5097 	    (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5098 		printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
5099 		       dev->name);
5100 		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
5101 	}
5102 
5103 	if ((dev->features & NETIF_F_NO_CSUM) &&
5104 	    (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5105 		printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
5106 		       dev->name);
5107 		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
5108 	}
5109 
5110 	dev->features = netdev_fix_features(dev->features, dev->name);
5111 
5112 	/* Enable software GSO if SG is supported. */
5113 	if (dev->features & NETIF_F_SG)
5114 		dev->features |= NETIF_F_GSO;
5115 
5116 	netdev_initialize_kobject(dev);
5117 
5118 	ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5119 	ret = notifier_to_errno(ret);
5120 	if (ret)
5121 		goto err_uninit;
5122 
5123 	ret = netdev_register_kobject(dev);
5124 	if (ret)
5125 		goto err_uninit;
5126 	dev->reg_state = NETREG_REGISTERED;
5127 
5128 	/*
5129 	 *	Default initial state at registry is that the
5130 	 *	device is present.
5131 	 */
5132 
5133 	set_bit(__LINK_STATE_PRESENT, &dev->state);
5134 
5135 	dev_init_scheduler(dev);
5136 	dev_hold(dev);
5137 	list_netdevice(dev);
5138 
5139 	/* Notify protocols, that a new device appeared. */
5140 	ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
5141 	ret = notifier_to_errno(ret);
5142 	if (ret) {
5143 		rollback_registered(dev);
5144 		dev->reg_state = NETREG_UNREGISTERED;
5145 	}
5146 	/*
5147 	 *	Prevent userspace races by waiting until the network
5148 	 *	device is fully setup before sending notifications.
5149 	 */
5150 	if (!dev->rtnl_link_ops ||
5151 	    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5152 		rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
5153 
5154 out:
5155 	return ret;
5156 
5157 err_uninit:
5158 	if (dev->netdev_ops->ndo_uninit)
5159 		dev->netdev_ops->ndo_uninit(dev);
5160 	goto out;
5161 }
5162 EXPORT_SYMBOL(register_netdevice);
5163 
5164 /**
5165  *	init_dummy_netdev	- init a dummy network device for NAPI
5166  *	@dev: device to init
5167  *
5168  *	This takes a network device structure and initialize the minimum
5169  *	amount of fields so it can be used to schedule NAPI polls without
5170  *	registering a full blown interface. This is to be used by drivers
5171  *	that need to tie several hardware interfaces to a single NAPI
5172  *	poll scheduler due to HW limitations.
5173  */
5174 int init_dummy_netdev(struct net_device *dev)
5175 {
5176 	/* Clear everything. Note we don't initialize spinlocks
5177 	 * are they aren't supposed to be taken by any of the
5178 	 * NAPI code and this dummy netdev is supposed to be
5179 	 * only ever used for NAPI polls
5180 	 */
5181 	memset(dev, 0, sizeof(struct net_device));
5182 
5183 	/* make sure we BUG if trying to hit standard
5184 	 * register/unregister code path
5185 	 */
5186 	dev->reg_state = NETREG_DUMMY;
5187 
5188 	/* initialize the ref count */
5189 	atomic_set(&dev->refcnt, 1);
5190 
5191 	/* NAPI wants this */
5192 	INIT_LIST_HEAD(&dev->napi_list);
5193 
5194 	/* a dummy interface is started by default */
5195 	set_bit(__LINK_STATE_PRESENT, &dev->state);
5196 	set_bit(__LINK_STATE_START, &dev->state);
5197 
5198 	return 0;
5199 }
5200 EXPORT_SYMBOL_GPL(init_dummy_netdev);
5201 
5202 
5203 /**
5204  *	register_netdev	- register a network device
5205  *	@dev: device to register
5206  *
5207  *	Take a completed network device structure and add it to the kernel
5208  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
5209  *	chain. 0 is returned on success. A negative errno code is returned
5210  *	on a failure to set up the device, or if the name is a duplicate.
5211  *
5212  *	This is a wrapper around register_netdevice that takes the rtnl semaphore
5213  *	and expands the device name if you passed a format string to
5214  *	alloc_netdev.
5215  */
5216 int register_netdev(struct net_device *dev)
5217 {
5218 	int err;
5219 
5220 	rtnl_lock();
5221 
5222 	/*
5223 	 * If the name is a format string the caller wants us to do a
5224 	 * name allocation.
5225 	 */
5226 	if (strchr(dev->name, '%')) {
5227 		err = dev_alloc_name(dev, dev->name);
5228 		if (err < 0)
5229 			goto out;
5230 	}
5231 
5232 	err = register_netdevice(dev);
5233 out:
5234 	rtnl_unlock();
5235 	return err;
5236 }
5237 EXPORT_SYMBOL(register_netdev);
5238 
5239 /*
5240  * netdev_wait_allrefs - wait until all references are gone.
5241  *
5242  * This is called when unregistering network devices.
5243  *
5244  * Any protocol or device that holds a reference should register
5245  * for netdevice notification, and cleanup and put back the
5246  * reference if they receive an UNREGISTER event.
5247  * We can get stuck here if buggy protocols don't correctly
5248  * call dev_put.
5249  */
5250 static void netdev_wait_allrefs(struct net_device *dev)
5251 {
5252 	unsigned long rebroadcast_time, warning_time;
5253 
5254 	linkwatch_forget_dev(dev);
5255 
5256 	rebroadcast_time = warning_time = jiffies;
5257 	while (atomic_read(&dev->refcnt) != 0) {
5258 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
5259 			rtnl_lock();
5260 
5261 			/* Rebroadcast unregister notification */
5262 			call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5263 			/* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users
5264 			 * should have already handle it the first time */
5265 
5266 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
5267 				     &dev->state)) {
5268 				/* We must not have linkwatch events
5269 				 * pending on unregister. If this
5270 				 * happens, we simply run the queue
5271 				 * unscheduled, resulting in a noop
5272 				 * for this device.
5273 				 */
5274 				linkwatch_run_queue();
5275 			}
5276 
5277 			__rtnl_unlock();
5278 
5279 			rebroadcast_time = jiffies;
5280 		}
5281 
5282 		msleep(250);
5283 
5284 		if (time_after(jiffies, warning_time + 10 * HZ)) {
5285 			printk(KERN_EMERG "unregister_netdevice: "
5286 			       "waiting for %s to become free. Usage "
5287 			       "count = %d\n",
5288 			       dev->name, atomic_read(&dev->refcnt));
5289 			warning_time = jiffies;
5290 		}
5291 	}
5292 }
5293 
5294 /* The sequence is:
5295  *
5296  *	rtnl_lock();
5297  *	...
5298  *	register_netdevice(x1);
5299  *	register_netdevice(x2);
5300  *	...
5301  *	unregister_netdevice(y1);
5302  *	unregister_netdevice(y2);
5303  *      ...
5304  *	rtnl_unlock();
5305  *	free_netdev(y1);
5306  *	free_netdev(y2);
5307  *
5308  * We are invoked by rtnl_unlock().
5309  * This allows us to deal with problems:
5310  * 1) We can delete sysfs objects which invoke hotplug
5311  *    without deadlocking with linkwatch via keventd.
5312  * 2) Since we run with the RTNL semaphore not held, we can sleep
5313  *    safely in order to wait for the netdev refcnt to drop to zero.
5314  *
5315  * We must not return until all unregister events added during
5316  * the interval the lock was held have been completed.
5317  */
5318 void netdev_run_todo(void)
5319 {
5320 	struct list_head list;
5321 
5322 	/* Snapshot list, allow later requests */
5323 	list_replace_init(&net_todo_list, &list);
5324 
5325 	__rtnl_unlock();
5326 
5327 	while (!list_empty(&list)) {
5328 		struct net_device *dev
5329 			= list_first_entry(&list, struct net_device, todo_list);
5330 		list_del(&dev->todo_list);
5331 
5332 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
5333 			printk(KERN_ERR "network todo '%s' but state %d\n",
5334 			       dev->name, dev->reg_state);
5335 			dump_stack();
5336 			continue;
5337 		}
5338 
5339 		dev->reg_state = NETREG_UNREGISTERED;
5340 
5341 		on_each_cpu(flush_backlog, dev, 1);
5342 
5343 		netdev_wait_allrefs(dev);
5344 
5345 		/* paranoia */
5346 		BUG_ON(atomic_read(&dev->refcnt));
5347 		WARN_ON(dev->ip_ptr);
5348 		WARN_ON(dev->ip6_ptr);
5349 		WARN_ON(dev->dn_ptr);
5350 
5351 		if (dev->destructor)
5352 			dev->destructor(dev);
5353 
5354 		/* Free network device */
5355 		kobject_put(&dev->dev.kobj);
5356 	}
5357 }
5358 
5359 /**
5360  *	dev_txq_stats_fold - fold tx_queues stats
5361  *	@dev: device to get statistics from
5362  *	@stats: struct net_device_stats to hold results
5363  */
5364 void dev_txq_stats_fold(const struct net_device *dev,
5365 			struct net_device_stats *stats)
5366 {
5367 	unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
5368 	unsigned int i;
5369 	struct netdev_queue *txq;
5370 
5371 	for (i = 0; i < dev->num_tx_queues; i++) {
5372 		txq = netdev_get_tx_queue(dev, i);
5373 		tx_bytes   += txq->tx_bytes;
5374 		tx_packets += txq->tx_packets;
5375 		tx_dropped += txq->tx_dropped;
5376 	}
5377 	if (tx_bytes || tx_packets || tx_dropped) {
5378 		stats->tx_bytes   = tx_bytes;
5379 		stats->tx_packets = tx_packets;
5380 		stats->tx_dropped = tx_dropped;
5381 	}
5382 }
5383 EXPORT_SYMBOL(dev_txq_stats_fold);
5384 
5385 /**
5386  *	dev_get_stats	- get network device statistics
5387  *	@dev: device to get statistics from
5388  *
5389  *	Get network statistics from device. The device driver may provide
5390  *	its own method by setting dev->netdev_ops->get_stats; otherwise
5391  *	the internal statistics structure is used.
5392  */
5393 const struct net_device_stats *dev_get_stats(struct net_device *dev)
5394 {
5395 	const struct net_device_ops *ops = dev->netdev_ops;
5396 
5397 	if (ops->ndo_get_stats)
5398 		return ops->ndo_get_stats(dev);
5399 
5400 	dev_txq_stats_fold(dev, &dev->stats);
5401 	return &dev->stats;
5402 }
5403 EXPORT_SYMBOL(dev_get_stats);
5404 
5405 static void netdev_init_one_queue(struct net_device *dev,
5406 				  struct netdev_queue *queue,
5407 				  void *_unused)
5408 {
5409 	queue->dev = dev;
5410 }
5411 
5412 static void netdev_init_queues(struct net_device *dev)
5413 {
5414 	netdev_init_one_queue(dev, &dev->rx_queue, NULL);
5415 	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
5416 	spin_lock_init(&dev->tx_global_lock);
5417 }
5418 
5419 /**
5420  *	alloc_netdev_mq - allocate network device
5421  *	@sizeof_priv:	size of private data to allocate space for
5422  *	@name:		device name format string
5423  *	@setup:		callback to initialize device
5424  *	@queue_count:	the number of subqueues to allocate
5425  *
5426  *	Allocates a struct net_device with private data area for driver use
5427  *	and performs basic initialization.  Also allocates subquue structs
5428  *	for each queue on the device at the end of the netdevice.
5429  */
5430 struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5431 		void (*setup)(struct net_device *), unsigned int queue_count)
5432 {
5433 	struct netdev_queue *tx;
5434 	struct net_device *dev;
5435 	size_t alloc_size;
5436 	struct net_device *p;
5437 
5438 	BUG_ON(strlen(name) >= sizeof(dev->name));
5439 
5440 	alloc_size = sizeof(struct net_device);
5441 	if (sizeof_priv) {
5442 		/* ensure 32-byte alignment of private area */
5443 		alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
5444 		alloc_size += sizeof_priv;
5445 	}
5446 	/* ensure 32-byte alignment of whole construct */
5447 	alloc_size += NETDEV_ALIGN - 1;
5448 
5449 	p = kzalloc(alloc_size, GFP_KERNEL);
5450 	if (!p) {
5451 		printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
5452 		return NULL;
5453 	}
5454 
5455 	tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
5456 	if (!tx) {
5457 		printk(KERN_ERR "alloc_netdev: Unable to allocate "
5458 		       "tx qdiscs.\n");
5459 		goto free_p;
5460 	}
5461 
5462 	dev = PTR_ALIGN(p, NETDEV_ALIGN);
5463 	dev->padded = (char *)dev - (char *)p;
5464 
5465 	if (dev_addr_init(dev))
5466 		goto free_tx;
5467 
5468 	dev_unicast_init(dev);
5469 
5470 	dev_net_set(dev, &init_net);
5471 
5472 	dev->_tx = tx;
5473 	dev->num_tx_queues = queue_count;
5474 	dev->real_num_tx_queues = queue_count;
5475 
5476 	dev->gso_max_size = GSO_MAX_SIZE;
5477 
5478 	netdev_init_queues(dev);
5479 
5480 	INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list);
5481 	dev->ethtool_ntuple_list.count = 0;
5482 	INIT_LIST_HEAD(&dev->napi_list);
5483 	INIT_LIST_HEAD(&dev->unreg_list);
5484 	INIT_LIST_HEAD(&dev->link_watch_list);
5485 	dev->priv_flags = IFF_XMIT_DST_RELEASE;
5486 	setup(dev);
5487 	strcpy(dev->name, name);
5488 	return dev;
5489 
5490 free_tx:
5491 	kfree(tx);
5492 
5493 free_p:
5494 	kfree(p);
5495 	return NULL;
5496 }
5497 EXPORT_SYMBOL(alloc_netdev_mq);
5498 
5499 /**
5500  *	free_netdev - free network device
5501  *	@dev: device
5502  *
5503  *	This function does the last stage of destroying an allocated device
5504  * 	interface. The reference to the device object is released.
5505  *	If this is the last reference then it will be freed.
5506  */
5507 void free_netdev(struct net_device *dev)
5508 {
5509 	struct napi_struct *p, *n;
5510 
5511 	release_net(dev_net(dev));
5512 
5513 	kfree(dev->_tx);
5514 
5515 	/* Flush device addresses */
5516 	dev_addr_flush(dev);
5517 
5518 	/* Clear ethtool n-tuple list */
5519 	ethtool_ntuple_flush(dev);
5520 
5521 	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
5522 		netif_napi_del(p);
5523 
5524 	/*  Compatibility with error handling in drivers */
5525 	if (dev->reg_state == NETREG_UNINITIALIZED) {
5526 		kfree((char *)dev - dev->padded);
5527 		return;
5528 	}
5529 
5530 	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
5531 	dev->reg_state = NETREG_RELEASED;
5532 
5533 	/* will free via device release */
5534 	put_device(&dev->dev);
5535 }
5536 EXPORT_SYMBOL(free_netdev);
5537 
5538 /**
5539  *	synchronize_net -  Synchronize with packet receive processing
5540  *
5541  *	Wait for packets currently being received to be done.
5542  *	Does not block later packets from starting.
5543  */
5544 void synchronize_net(void)
5545 {
5546 	might_sleep();
5547 	synchronize_rcu();
5548 }
5549 EXPORT_SYMBOL(synchronize_net);
5550 
5551 /**
5552  *	unregister_netdevice_queue - remove device from the kernel
5553  *	@dev: device
5554  *	@head: list
5555  *
5556  *	This function shuts down a device interface and removes it
5557  *	from the kernel tables.
5558  *	If head not NULL, device is queued to be unregistered later.
5559  *
5560  *	Callers must hold the rtnl semaphore.  You may want
5561  *	unregister_netdev() instead of this.
5562  */
5563 
5564 void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
5565 {
5566 	ASSERT_RTNL();
5567 
5568 	if (head) {
5569 		list_move_tail(&dev->unreg_list, head);
5570 	} else {
5571 		rollback_registered(dev);
5572 		/* Finish processing unregister after unlock */
5573 		net_set_todo(dev);
5574 	}
5575 }
5576 EXPORT_SYMBOL(unregister_netdevice_queue);
5577 
5578 /**
5579  *	unregister_netdevice_many - unregister many devices
5580  *	@head: list of devices
5581  */
5582 void unregister_netdevice_many(struct list_head *head)
5583 {
5584 	struct net_device *dev;
5585 
5586 	if (!list_empty(head)) {
5587 		rollback_registered_many(head);
5588 		list_for_each_entry(dev, head, unreg_list)
5589 			net_set_todo(dev);
5590 	}
5591 }
5592 EXPORT_SYMBOL(unregister_netdevice_many);
5593 
5594 /**
5595  *	unregister_netdev - remove device from the kernel
5596  *	@dev: device
5597  *
5598  *	This function shuts down a device interface and removes it
5599  *	from the kernel tables.
5600  *
5601  *	This is just a wrapper for unregister_netdevice that takes
5602  *	the rtnl semaphore.  In general you want to use this and not
5603  *	unregister_netdevice.
5604  */
5605 void unregister_netdev(struct net_device *dev)
5606 {
5607 	rtnl_lock();
5608 	unregister_netdevice(dev);
5609 	rtnl_unlock();
5610 }
5611 EXPORT_SYMBOL(unregister_netdev);
5612 
5613 /**
5614  *	dev_change_net_namespace - move device to different nethost namespace
5615  *	@dev: device
5616  *	@net: network namespace
5617  *	@pat: If not NULL name pattern to try if the current device name
5618  *	      is already taken in the destination network namespace.
5619  *
5620  *	This function shuts down a device interface and moves it
5621  *	to a new network namespace. On success 0 is returned, on
5622  *	a failure a netagive errno code is returned.
5623  *
5624  *	Callers must hold the rtnl semaphore.
5625  */
5626 
5627 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
5628 {
5629 	int err;
5630 
5631 	ASSERT_RTNL();
5632 
5633 	/* Don't allow namespace local devices to be moved. */
5634 	err = -EINVAL;
5635 	if (dev->features & NETIF_F_NETNS_LOCAL)
5636 		goto out;
5637 
5638 #ifdef CONFIG_SYSFS
5639 	/* Don't allow real devices to be moved when sysfs
5640 	 * is enabled.
5641 	 */
5642 	err = -EINVAL;
5643 	if (dev->dev.parent)
5644 		goto out;
5645 #endif
5646 
5647 	/* Ensure the device has been registrered */
5648 	err = -EINVAL;
5649 	if (dev->reg_state != NETREG_REGISTERED)
5650 		goto out;
5651 
5652 	/* Get out if there is nothing todo */
5653 	err = 0;
5654 	if (net_eq(dev_net(dev), net))
5655 		goto out;
5656 
5657 	/* Pick the destination device name, and ensure
5658 	 * we can use it in the destination network namespace.
5659 	 */
5660 	err = -EEXIST;
5661 	if (__dev_get_by_name(net, dev->name)) {
5662 		/* We get here if we can't use the current device name */
5663 		if (!pat)
5664 			goto out;
5665 		if (dev_get_valid_name(net, pat, dev->name, 1))
5666 			goto out;
5667 	}
5668 
5669 	/*
5670 	 * And now a mini version of register_netdevice unregister_netdevice.
5671 	 */
5672 
5673 	/* If device is running close it first. */
5674 	dev_close(dev);
5675 
5676 	/* And unlink it from device chain */
5677 	err = -ENODEV;
5678 	unlist_netdevice(dev);
5679 
5680 	synchronize_net();
5681 
5682 	/* Shutdown queueing discipline. */
5683 	dev_shutdown(dev);
5684 
5685 	/* Notify protocols, that we are about to destroy
5686 	   this device. They should clean all the things.
5687 	*/
5688 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5689 	call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
5690 
5691 	/*
5692 	 *	Flush the unicast and multicast chains
5693 	 */
5694 	dev_unicast_flush(dev);
5695 	dev_addr_discard(dev);
5696 
5697 	netdev_unregister_kobject(dev);
5698 
5699 	/* Actually switch the network namespace */
5700 	dev_net_set(dev, net);
5701 
5702 	/* If there is an ifindex conflict assign a new one */
5703 	if (__dev_get_by_index(net, dev->ifindex)) {
5704 		int iflink = (dev->iflink == dev->ifindex);
5705 		dev->ifindex = dev_new_index(net);
5706 		if (iflink)
5707 			dev->iflink = dev->ifindex;
5708 	}
5709 
5710 	/* Fixup kobjects */
5711 	err = netdev_register_kobject(dev);
5712 	WARN_ON(err);
5713 
5714 	/* Add the device back in the hashes */
5715 	list_netdevice(dev);
5716 
5717 	/* Notify protocols, that a new device appeared. */
5718 	call_netdevice_notifiers(NETDEV_REGISTER, dev);
5719 
5720 	/*
5721 	 *	Prevent userspace races by waiting until the network
5722 	 *	device is fully setup before sending notifications.
5723 	 */
5724 	rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
5725 
5726 	synchronize_net();
5727 	err = 0;
5728 out:
5729 	return err;
5730 }
5731 EXPORT_SYMBOL_GPL(dev_change_net_namespace);
5732 
5733 static int dev_cpu_callback(struct notifier_block *nfb,
5734 			    unsigned long action,
5735 			    void *ocpu)
5736 {
5737 	struct sk_buff **list_skb;
5738 	struct Qdisc **list_net;
5739 	struct sk_buff *skb;
5740 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
5741 	struct softnet_data *sd, *oldsd;
5742 
5743 	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
5744 		return NOTIFY_OK;
5745 
5746 	local_irq_disable();
5747 	cpu = smp_processor_id();
5748 	sd = &per_cpu(softnet_data, cpu);
5749 	oldsd = &per_cpu(softnet_data, oldcpu);
5750 
5751 	/* Find end of our completion_queue. */
5752 	list_skb = &sd->completion_queue;
5753 	while (*list_skb)
5754 		list_skb = &(*list_skb)->next;
5755 	/* Append completion queue from offline CPU. */
5756 	*list_skb = oldsd->completion_queue;
5757 	oldsd->completion_queue = NULL;
5758 
5759 	/* Find end of our output_queue. */
5760 	list_net = &sd->output_queue;
5761 	while (*list_net)
5762 		list_net = &(*list_net)->next_sched;
5763 	/* Append output queue from offline CPU. */
5764 	*list_net = oldsd->output_queue;
5765 	oldsd->output_queue = NULL;
5766 
5767 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
5768 	local_irq_enable();
5769 
5770 	/* Process offline CPU's input_pkt_queue */
5771 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
5772 		netif_rx(skb);
5773 
5774 	return NOTIFY_OK;
5775 }
5776 
5777 
5778 /**
5779  *	netdev_increment_features - increment feature set by one
5780  *	@all: current feature set
5781  *	@one: new feature set
5782  *	@mask: mask feature set
5783  *
5784  *	Computes a new feature set after adding a device with feature set
5785  *	@one to the master device with current feature set @all.  Will not
5786  *	enable anything that is off in @mask. Returns the new feature set.
5787  */
5788 unsigned long netdev_increment_features(unsigned long all, unsigned long one,
5789 					unsigned long mask)
5790 {
5791 	/* If device needs checksumming, downgrade to it. */
5792 	if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
5793 		all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
5794 	else if (mask & NETIF_F_ALL_CSUM) {
5795 		/* If one device supports v4/v6 checksumming, set for all. */
5796 		if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
5797 		    !(all & NETIF_F_GEN_CSUM)) {
5798 			all &= ~NETIF_F_ALL_CSUM;
5799 			all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
5800 		}
5801 
5802 		/* If one device supports hw checksumming, set for all. */
5803 		if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
5804 			all &= ~NETIF_F_ALL_CSUM;
5805 			all |= NETIF_F_HW_CSUM;
5806 		}
5807 	}
5808 
5809 	one |= NETIF_F_ALL_CSUM;
5810 
5811 	one |= all & NETIF_F_ONE_FOR_ALL;
5812 	all &= one | NETIF_F_LLTX | NETIF_F_GSO | NETIF_F_UFO;
5813 	all |= one & mask & NETIF_F_ONE_FOR_ALL;
5814 
5815 	return all;
5816 }
5817 EXPORT_SYMBOL(netdev_increment_features);
5818 
5819 static struct hlist_head *netdev_create_hash(void)
5820 {
5821 	int i;
5822 	struct hlist_head *hash;
5823 
5824 	hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
5825 	if (hash != NULL)
5826 		for (i = 0; i < NETDEV_HASHENTRIES; i++)
5827 			INIT_HLIST_HEAD(&hash[i]);
5828 
5829 	return hash;
5830 }
5831 
5832 /* Initialize per network namespace state */
5833 static int __net_init netdev_init(struct net *net)
5834 {
5835 	INIT_LIST_HEAD(&net->dev_base_head);
5836 
5837 	net->dev_name_head = netdev_create_hash();
5838 	if (net->dev_name_head == NULL)
5839 		goto err_name;
5840 
5841 	net->dev_index_head = netdev_create_hash();
5842 	if (net->dev_index_head == NULL)
5843 		goto err_idx;
5844 
5845 	return 0;
5846 
5847 err_idx:
5848 	kfree(net->dev_name_head);
5849 err_name:
5850 	return -ENOMEM;
5851 }
5852 
5853 /**
5854  *	netdev_drivername - network driver for the device
5855  *	@dev: network device
5856  *	@buffer: buffer for resulting name
5857  *	@len: size of buffer
5858  *
5859  *	Determine network driver for device.
5860  */
5861 char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
5862 {
5863 	const struct device_driver *driver;
5864 	const struct device *parent;
5865 
5866 	if (len <= 0 || !buffer)
5867 		return buffer;
5868 	buffer[0] = 0;
5869 
5870 	parent = dev->dev.parent;
5871 
5872 	if (!parent)
5873 		return buffer;
5874 
5875 	driver = parent->driver;
5876 	if (driver && driver->name)
5877 		strlcpy(buffer, driver->name, len);
5878 	return buffer;
5879 }
5880 
5881 static void __net_exit netdev_exit(struct net *net)
5882 {
5883 	kfree(net->dev_name_head);
5884 	kfree(net->dev_index_head);
5885 }
5886 
5887 static struct pernet_operations __net_initdata netdev_net_ops = {
5888 	.init = netdev_init,
5889 	.exit = netdev_exit,
5890 };
5891 
5892 static void __net_exit default_device_exit(struct net *net)
5893 {
5894 	struct net_device *dev, *aux;
5895 	/*
5896 	 * Push all migratable network devices back to the
5897 	 * initial network namespace
5898 	 */
5899 	rtnl_lock();
5900 	for_each_netdev_safe(net, dev, aux) {
5901 		int err;
5902 		char fb_name[IFNAMSIZ];
5903 
5904 		/* Ignore unmoveable devices (i.e. loopback) */
5905 		if (dev->features & NETIF_F_NETNS_LOCAL)
5906 			continue;
5907 
5908 		/* Leave virtual devices for the generic cleanup */
5909 		if (dev->rtnl_link_ops)
5910 			continue;
5911 
5912 		/* Push remaing network devices to init_net */
5913 		snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
5914 		err = dev_change_net_namespace(dev, &init_net, fb_name);
5915 		if (err) {
5916 			printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
5917 				__func__, dev->name, err);
5918 			BUG();
5919 		}
5920 	}
5921 	rtnl_unlock();
5922 }
5923 
5924 static void __net_exit default_device_exit_batch(struct list_head *net_list)
5925 {
5926 	/* At exit all network devices most be removed from a network
5927 	 * namespace.  Do this in the reverse order of registeration.
5928 	 * Do this across as many network namespaces as possible to
5929 	 * improve batching efficiency.
5930 	 */
5931 	struct net_device *dev;
5932 	struct net *net;
5933 	LIST_HEAD(dev_kill_list);
5934 
5935 	rtnl_lock();
5936 	list_for_each_entry(net, net_list, exit_list) {
5937 		for_each_netdev_reverse(net, dev) {
5938 			if (dev->rtnl_link_ops)
5939 				dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
5940 			else
5941 				unregister_netdevice_queue(dev, &dev_kill_list);
5942 		}
5943 	}
5944 	unregister_netdevice_many(&dev_kill_list);
5945 	rtnl_unlock();
5946 }
5947 
5948 static struct pernet_operations __net_initdata default_device_ops = {
5949 	.exit = default_device_exit,
5950 	.exit_batch = default_device_exit_batch,
5951 };
5952 
5953 /*
5954  *	Initialize the DEV module. At boot time this walks the device list and
5955  *	unhooks any devices that fail to initialise (normally hardware not
5956  *	present) and leaves us with a valid list of present and active devices.
5957  *
5958  */
5959 
5960 /*
5961  *       This is called single threaded during boot, so no need
5962  *       to take the rtnl semaphore.
5963  */
5964 static int __init net_dev_init(void)
5965 {
5966 	int i, rc = -ENOMEM;
5967 
5968 	BUG_ON(!dev_boot_phase);
5969 
5970 	if (dev_proc_init())
5971 		goto out;
5972 
5973 	if (netdev_kobject_init())
5974 		goto out;
5975 
5976 	INIT_LIST_HEAD(&ptype_all);
5977 	for (i = 0; i < PTYPE_HASH_SIZE; i++)
5978 		INIT_LIST_HEAD(&ptype_base[i]);
5979 
5980 	if (register_pernet_subsys(&netdev_net_ops))
5981 		goto out;
5982 
5983 	/*
5984 	 *	Initialise the packet receive queues.
5985 	 */
5986 
5987 	for_each_possible_cpu(i) {
5988 		struct softnet_data *queue;
5989 
5990 		queue = &per_cpu(softnet_data, i);
5991 		skb_queue_head_init(&queue->input_pkt_queue);
5992 		queue->completion_queue = NULL;
5993 		INIT_LIST_HEAD(&queue->poll_list);
5994 
5995 		queue->backlog.poll = process_backlog;
5996 		queue->backlog.weight = weight_p;
5997 		queue->backlog.gro_list = NULL;
5998 		queue->backlog.gro_count = 0;
5999 	}
6000 
6001 	dev_boot_phase = 0;
6002 
6003 	/* The loopback device is special if any other network devices
6004 	 * is present in a network namespace the loopback device must
6005 	 * be present. Since we now dynamically allocate and free the
6006 	 * loopback device ensure this invariant is maintained by
6007 	 * keeping the loopback device as the first device on the
6008 	 * list of network devices.  Ensuring the loopback devices
6009 	 * is the first device that appears and the last network device
6010 	 * that disappears.
6011 	 */
6012 	if (register_pernet_device(&loopback_net_ops))
6013 		goto out;
6014 
6015 	if (register_pernet_device(&default_device_ops))
6016 		goto out;
6017 
6018 	open_softirq(NET_TX_SOFTIRQ, net_tx_action);
6019 	open_softirq(NET_RX_SOFTIRQ, net_rx_action);
6020 
6021 	hotcpu_notifier(dev_cpu_callback, 0);
6022 	dst_init();
6023 	dev_mcast_init();
6024 	rc = 0;
6025 out:
6026 	return rc;
6027 }
6028 
6029 subsys_initcall(net_dev_init);
6030 
6031 static int __init initialize_hashrnd(void)
6032 {
6033 	get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd));
6034 	return 0;
6035 }
6036 
6037 late_initcall_sync(initialize_hashrnd);
6038 
6039