xref: /linux/net/core/dev.c (revision dfc349402de8e95f6a42e8341e9ea193b718eee3)
1 /*
2  * 	NET3	Protocol independent device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the non IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Florian la Roche <rzsfl@rz.uni-sb.de>
16  *		Alan Cox <gw4pts@gw4pts.ampr.org>
17  *		David Hinds <dahinds@users.sourceforge.net>
18  *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19  *		Adam Sulmicki <adam@cfar.umd.edu>
20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
21  *
22  *	Changes:
23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
24  *              			to 2 if register_netdev gets called
25  *              			before net_dev_init & also removed a
26  *              			few lines of code in the process.
27  *		Alan Cox	:	device private ioctl copies fields back.
28  *		Alan Cox	:	Transmit queue code does relevant
29  *					stunts to keep the queue safe.
30  *		Alan Cox	:	Fixed double lock.
31  *		Alan Cox	:	Fixed promisc NULL pointer trap
32  *		????????	:	Support the full private ioctl range
33  *		Alan Cox	:	Moved ioctl permission check into
34  *					drivers
35  *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
36  *		Alan Cox	:	100 backlog just doesn't cut it when
37  *					you start doing multicast video 8)
38  *		Alan Cox	:	Rewrote net_bh and list manager.
39  *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
40  *		Alan Cox	:	Took out transmit every packet pass
41  *					Saved a few bytes in the ioctl handler
42  *		Alan Cox	:	Network driver sets packet type before
43  *					calling netif_rx. Saves a function
44  *					call a packet.
45  *		Alan Cox	:	Hashed net_bh()
46  *		Richard Kooijman:	Timestamp fixes.
47  *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
48  *		Alan Cox	:	Device lock protection.
49  *		Alan Cox	: 	Fixed nasty side effect of device close
50  *					changes.
51  *		Rudi Cilibrasi	:	Pass the right thing to
52  *					set_mac_address()
53  *		Dave Miller	:	32bit quantity for the device lock to
54  *					make it work out on a Sparc.
55  *		Bjorn Ekwall	:	Added KERNELD hack.
56  *		Alan Cox	:	Cleaned up the backlog initialise.
57  *		Craig Metz	:	SIOCGIFCONF fix if space for under
58  *					1 device.
59  *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
60  *					is no device open function.
61  *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
62  *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
63  *		Cyrus Durgin	:	Cleaned for KMOD
64  *		Adam Sulmicki   :	Bug Fix : Network Device Unload
65  *					A network device unload needs to purge
66  *					the backlog queue.
67  *	Paul Rusty Russell	:	SIOCSIFNAME
68  *              Pekka Riikonen  :	Netdev boot-time settings code
69  *              Andrew Morton   :       Make unregister_netdevice wait
70  *              			indefinitely on dev->refcnt
71  * 		J Hadi Salim	:	- Backlog queue sampling
72  *				        - netif_rx() feedback
73  */
74 
75 #include <asm/uaccess.h>
76 #include <asm/system.h>
77 #include <linux/bitops.h>
78 #include <linux/capability.h>
79 #include <linux/cpu.h>
80 #include <linux/types.h>
81 #include <linux/kernel.h>
82 #include <linux/sched.h>
83 #include <linux/mutex.h>
84 #include <linux/string.h>
85 #include <linux/mm.h>
86 #include <linux/socket.h>
87 #include <linux/sockios.h>
88 #include <linux/errno.h>
89 #include <linux/interrupt.h>
90 #include <linux/if_ether.h>
91 #include <linux/netdevice.h>
92 #include <linux/etherdevice.h>
93 #include <linux/ethtool.h>
94 #include <linux/notifier.h>
95 #include <linux/skbuff.h>
96 #include <net/net_namespace.h>
97 #include <net/sock.h>
98 #include <linux/rtnetlink.h>
99 #include <linux/proc_fs.h>
100 #include <linux/seq_file.h>
101 #include <linux/stat.h>
102 #include <linux/if_bridge.h>
103 #include <linux/if_macvlan.h>
104 #include <net/dst.h>
105 #include <net/pkt_sched.h>
106 #include <net/checksum.h>
107 #include <linux/highmem.h>
108 #include <linux/init.h>
109 #include <linux/kmod.h>
110 #include <linux/module.h>
111 #include <linux/netpoll.h>
112 #include <linux/rcupdate.h>
113 #include <linux/delay.h>
114 #include <net/wext.h>
115 #include <net/iw_handler.h>
116 #include <asm/current.h>
117 #include <linux/audit.h>
118 #include <linux/dmaengine.h>
119 #include <linux/err.h>
120 #include <linux/ctype.h>
121 #include <linux/if_arp.h>
122 #include <linux/if_vlan.h>
123 #include <linux/ip.h>
124 #include <net/ip.h>
125 #include <linux/ipv6.h>
126 #include <linux/in.h>
127 #include <linux/jhash.h>
128 #include <linux/random.h>
129 #include <trace/events/napi.h>
130 
131 #include "net-sysfs.h"
132 
133 /* Instead of increasing this, you should create a hash table. */
134 #define MAX_GRO_SKBS 8
135 
136 /* This should be increased if a protocol with a bigger head is added. */
137 #define GRO_MAX_HEAD (MAX_HEADER + 128)
138 
139 /*
140  *	The list of packet types we will receive (as opposed to discard)
141  *	and the routines to invoke.
142  *
143  *	Why 16. Because with 16 the only overlap we get on a hash of the
144  *	low nibble of the protocol value is RARP/SNAP/X.25.
145  *
146  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
147  *             sure which should go first, but I bet it won't make much
148  *             difference if we are running VLANs.  The good news is that
149  *             this protocol won't be in the list unless compiled in, so
150  *             the average user (w/out VLANs) will not be adversely affected.
151  *             --BLG
152  *
153  *		0800	IP
154  *		8100    802.1Q VLAN
155  *		0001	802.3
156  *		0002	AX.25
157  *		0004	802.2
158  *		8035	RARP
159  *		0005	SNAP
160  *		0805	X.25
161  *		0806	ARP
162  *		8137	IPX
163  *		0009	Localtalk
164  *		86DD	IPv6
165  */
166 
167 #define PTYPE_HASH_SIZE	(16)
168 #define PTYPE_HASH_MASK	(PTYPE_HASH_SIZE - 1)
169 
170 static DEFINE_SPINLOCK(ptype_lock);
171 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
172 static struct list_head ptype_all __read_mostly;	/* Taps */
173 
174 /*
175  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
176  * semaphore.
177  *
178  * Pure readers hold dev_base_lock for reading.
179  *
180  * Writers must hold the rtnl semaphore while they loop through the
181  * dev_base_head list, and hold dev_base_lock for writing when they do the
182  * actual updates.  This allows pure readers to access the list even
183  * while a writer is preparing to update it.
184  *
185  * To put it another way, dev_base_lock is held for writing only to
186  * protect against pure readers; the rtnl semaphore provides the
187  * protection against other writers.
188  *
189  * See, for example usages, register_netdevice() and
190  * unregister_netdevice(), which must be called with the rtnl
191  * semaphore held.
192  */
193 DEFINE_RWLOCK(dev_base_lock);
194 EXPORT_SYMBOL(dev_base_lock);
195 
196 #define NETDEV_HASHBITS	8
197 #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
198 
199 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
200 {
201 	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
202 	return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
203 }
204 
205 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
206 {
207 	return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
208 }
209 
210 /* Device list insertion */
211 static int list_netdevice(struct net_device *dev)
212 {
213 	struct net *net = dev_net(dev);
214 
215 	ASSERT_RTNL();
216 
217 	write_lock_bh(&dev_base_lock);
218 	list_add_tail(&dev->dev_list, &net->dev_base_head);
219 	hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
220 	hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
221 	write_unlock_bh(&dev_base_lock);
222 	return 0;
223 }
224 
225 /* Device list removal */
226 static void unlist_netdevice(struct net_device *dev)
227 {
228 	ASSERT_RTNL();
229 
230 	/* Unlink dev from the device chain */
231 	write_lock_bh(&dev_base_lock);
232 	list_del(&dev->dev_list);
233 	hlist_del(&dev->name_hlist);
234 	hlist_del(&dev->index_hlist);
235 	write_unlock_bh(&dev_base_lock);
236 }
237 
238 /*
239  *	Our notifier list
240  */
241 
242 static RAW_NOTIFIER_HEAD(netdev_chain);
243 
244 /*
245  *	Device drivers call our routines to queue packets here. We empty the
246  *	queue in the local softnet handler.
247  */
248 
249 DEFINE_PER_CPU(struct softnet_data, softnet_data);
250 EXPORT_PER_CPU_SYMBOL(softnet_data);
251 
252 #ifdef CONFIG_LOCKDEP
253 /*
254  * register_netdevice() inits txq->_xmit_lock and sets lockdep class
255  * according to dev->type
256  */
257 static const unsigned short netdev_lock_type[] =
258 	{ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
259 	 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
260 	 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
261 	 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
262 	 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
263 	 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
264 	 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
265 	 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
266 	 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
267 	 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
268 	 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
269 	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
270 	 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
271 	 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
272 	 ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154,
273 	 ARPHRD_VOID, ARPHRD_NONE};
274 
275 static const char *const netdev_lock_name[] =
276 	{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
277 	 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
278 	 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
279 	 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
280 	 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
281 	 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
282 	 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
283 	 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
284 	 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
285 	 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
286 	 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
287 	 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
288 	 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
289 	 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
290 	 "_xmit_PHONET_PIPE", "_xmit_IEEE802154",
291 	 "_xmit_VOID", "_xmit_NONE"};
292 
293 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
294 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
295 
296 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
297 {
298 	int i;
299 
300 	for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
301 		if (netdev_lock_type[i] == dev_type)
302 			return i;
303 	/* the last key is used by default */
304 	return ARRAY_SIZE(netdev_lock_type) - 1;
305 }
306 
307 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
308 						 unsigned short dev_type)
309 {
310 	int i;
311 
312 	i = netdev_lock_pos(dev_type);
313 	lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
314 				   netdev_lock_name[i]);
315 }
316 
317 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
318 {
319 	int i;
320 
321 	i = netdev_lock_pos(dev->type);
322 	lockdep_set_class_and_name(&dev->addr_list_lock,
323 				   &netdev_addr_lock_key[i],
324 				   netdev_lock_name[i]);
325 }
326 #else
327 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
328 						 unsigned short dev_type)
329 {
330 }
331 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
332 {
333 }
334 #endif
335 
336 /*******************************************************************************
337 
338 		Protocol management and registration routines
339 
340 *******************************************************************************/
341 
342 /*
343  *	Add a protocol ID to the list. Now that the input handler is
344  *	smarter we can dispense with all the messy stuff that used to be
345  *	here.
346  *
347  *	BEWARE!!! Protocol handlers, mangling input packets,
348  *	MUST BE last in hash buckets and checking protocol handlers
349  *	MUST start from promiscuous ptype_all chain in net_bh.
350  *	It is true now, do not change it.
351  *	Explanation follows: if protocol handler, mangling packet, will
352  *	be the first on list, it is not able to sense, that packet
353  *	is cloned and should be copied-on-write, so that it will
354  *	change it and subsequent readers will get broken packet.
355  *							--ANK (980803)
356  */
357 
358 /**
359  *	dev_add_pack - add packet handler
360  *	@pt: packet type declaration
361  *
362  *	Add a protocol handler to the networking stack. The passed &packet_type
363  *	is linked into kernel lists and may not be freed until it has been
364  *	removed from the kernel lists.
365  *
366  *	This call does not sleep therefore it can not
367  *	guarantee all CPU's that are in middle of receiving packets
368  *	will see the new packet type (until the next received packet).
369  */
370 
371 void dev_add_pack(struct packet_type *pt)
372 {
373 	int hash;
374 
375 	spin_lock_bh(&ptype_lock);
376 	if (pt->type == htons(ETH_P_ALL))
377 		list_add_rcu(&pt->list, &ptype_all);
378 	else {
379 		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
380 		list_add_rcu(&pt->list, &ptype_base[hash]);
381 	}
382 	spin_unlock_bh(&ptype_lock);
383 }
384 EXPORT_SYMBOL(dev_add_pack);
385 
386 /**
387  *	__dev_remove_pack	 - remove packet handler
388  *	@pt: packet type declaration
389  *
390  *	Remove a protocol handler that was previously added to the kernel
391  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
392  *	from the kernel lists and can be freed or reused once this function
393  *	returns.
394  *
395  *      The packet type might still be in use by receivers
396  *	and must not be freed until after all the CPU's have gone
397  *	through a quiescent state.
398  */
399 void __dev_remove_pack(struct packet_type *pt)
400 {
401 	struct list_head *head;
402 	struct packet_type *pt1;
403 
404 	spin_lock_bh(&ptype_lock);
405 
406 	if (pt->type == htons(ETH_P_ALL))
407 		head = &ptype_all;
408 	else
409 		head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
410 
411 	list_for_each_entry(pt1, head, list) {
412 		if (pt == pt1) {
413 			list_del_rcu(&pt->list);
414 			goto out;
415 		}
416 	}
417 
418 	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
419 out:
420 	spin_unlock_bh(&ptype_lock);
421 }
422 EXPORT_SYMBOL(__dev_remove_pack);
423 
424 /**
425  *	dev_remove_pack	 - remove packet handler
426  *	@pt: packet type declaration
427  *
428  *	Remove a protocol handler that was previously added to the kernel
429  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
430  *	from the kernel lists and can be freed or reused once this function
431  *	returns.
432  *
433  *	This call sleeps to guarantee that no CPU is looking at the packet
434  *	type after return.
435  */
436 void dev_remove_pack(struct packet_type *pt)
437 {
438 	__dev_remove_pack(pt);
439 
440 	synchronize_net();
441 }
442 EXPORT_SYMBOL(dev_remove_pack);
443 
444 /******************************************************************************
445 
446 		      Device Boot-time Settings Routines
447 
448 *******************************************************************************/
449 
450 /* Boot time configuration table */
451 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
452 
453 /**
454  *	netdev_boot_setup_add	- add new setup entry
455  *	@name: name of the device
456  *	@map: configured settings for the device
457  *
458  *	Adds new setup entry to the dev_boot_setup list.  The function
459  *	returns 0 on error and 1 on success.  This is a generic routine to
460  *	all netdevices.
461  */
462 static int netdev_boot_setup_add(char *name, struct ifmap *map)
463 {
464 	struct netdev_boot_setup *s;
465 	int i;
466 
467 	s = dev_boot_setup;
468 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
469 		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
470 			memset(s[i].name, 0, sizeof(s[i].name));
471 			strlcpy(s[i].name, name, IFNAMSIZ);
472 			memcpy(&s[i].map, map, sizeof(s[i].map));
473 			break;
474 		}
475 	}
476 
477 	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
478 }
479 
480 /**
481  *	netdev_boot_setup_check	- check boot time settings
482  *	@dev: the netdevice
483  *
484  * 	Check boot time settings for the device.
485  *	The found settings are set for the device to be used
486  *	later in the device probing.
487  *	Returns 0 if no settings found, 1 if they are.
488  */
489 int netdev_boot_setup_check(struct net_device *dev)
490 {
491 	struct netdev_boot_setup *s = dev_boot_setup;
492 	int i;
493 
494 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
495 		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
496 		    !strcmp(dev->name, s[i].name)) {
497 			dev->irq 	= s[i].map.irq;
498 			dev->base_addr 	= s[i].map.base_addr;
499 			dev->mem_start 	= s[i].map.mem_start;
500 			dev->mem_end 	= s[i].map.mem_end;
501 			return 1;
502 		}
503 	}
504 	return 0;
505 }
506 EXPORT_SYMBOL(netdev_boot_setup_check);
507 
508 
509 /**
510  *	netdev_boot_base	- get address from boot time settings
511  *	@prefix: prefix for network device
512  *	@unit: id for network device
513  *
514  * 	Check boot time settings for the base address of device.
515  *	The found settings are set for the device to be used
516  *	later in the device probing.
517  *	Returns 0 if no settings found.
518  */
519 unsigned long netdev_boot_base(const char *prefix, int unit)
520 {
521 	const struct netdev_boot_setup *s = dev_boot_setup;
522 	char name[IFNAMSIZ];
523 	int i;
524 
525 	sprintf(name, "%s%d", prefix, unit);
526 
527 	/*
528 	 * If device already registered then return base of 1
529 	 * to indicate not to probe for this interface
530 	 */
531 	if (__dev_get_by_name(&init_net, name))
532 		return 1;
533 
534 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
535 		if (!strcmp(name, s[i].name))
536 			return s[i].map.base_addr;
537 	return 0;
538 }
539 
540 /*
541  * Saves at boot time configured settings for any netdevice.
542  */
543 int __init netdev_boot_setup(char *str)
544 {
545 	int ints[5];
546 	struct ifmap map;
547 
548 	str = get_options(str, ARRAY_SIZE(ints), ints);
549 	if (!str || !*str)
550 		return 0;
551 
552 	/* Save settings */
553 	memset(&map, 0, sizeof(map));
554 	if (ints[0] > 0)
555 		map.irq = ints[1];
556 	if (ints[0] > 1)
557 		map.base_addr = ints[2];
558 	if (ints[0] > 2)
559 		map.mem_start = ints[3];
560 	if (ints[0] > 3)
561 		map.mem_end = ints[4];
562 
563 	/* Add new entry to the list */
564 	return netdev_boot_setup_add(str, &map);
565 }
566 
567 __setup("netdev=", netdev_boot_setup);
568 
569 /*******************************************************************************
570 
571 			    Device Interface Subroutines
572 
573 *******************************************************************************/
574 
575 /**
576  *	__dev_get_by_name	- find a device by its name
577  *	@net: the applicable net namespace
578  *	@name: name to find
579  *
580  *	Find an interface by name. Must be called under RTNL semaphore
581  *	or @dev_base_lock. If the name is found a pointer to the device
582  *	is returned. If the name is not found then %NULL is returned. The
583  *	reference counters are not incremented so the caller must be
584  *	careful with locks.
585  */
586 
587 struct net_device *__dev_get_by_name(struct net *net, const char *name)
588 {
589 	struct hlist_node *p;
590 
591 	hlist_for_each(p, dev_name_hash(net, name)) {
592 		struct net_device *dev
593 			= hlist_entry(p, struct net_device, name_hlist);
594 		if (!strncmp(dev->name, name, IFNAMSIZ))
595 			return dev;
596 	}
597 	return NULL;
598 }
599 EXPORT_SYMBOL(__dev_get_by_name);
600 
601 /**
602  *	dev_get_by_name		- find a device by its name
603  *	@net: the applicable net namespace
604  *	@name: name to find
605  *
606  *	Find an interface by name. This can be called from any
607  *	context and does its own locking. The returned handle has
608  *	the usage count incremented and the caller must use dev_put() to
609  *	release it when it is no longer needed. %NULL is returned if no
610  *	matching device is found.
611  */
612 
613 struct net_device *dev_get_by_name(struct net *net, const char *name)
614 {
615 	struct net_device *dev;
616 
617 	read_lock(&dev_base_lock);
618 	dev = __dev_get_by_name(net, name);
619 	if (dev)
620 		dev_hold(dev);
621 	read_unlock(&dev_base_lock);
622 	return dev;
623 }
624 EXPORT_SYMBOL(dev_get_by_name);
625 
626 /**
627  *	__dev_get_by_index - find a device by its ifindex
628  *	@net: the applicable net namespace
629  *	@ifindex: index of device
630  *
631  *	Search for an interface by index. Returns %NULL if the device
632  *	is not found or a pointer to the device. The device has not
633  *	had its reference counter increased so the caller must be careful
634  *	about locking. The caller must hold either the RTNL semaphore
635  *	or @dev_base_lock.
636  */
637 
638 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
639 {
640 	struct hlist_node *p;
641 
642 	hlist_for_each(p, dev_index_hash(net, ifindex)) {
643 		struct net_device *dev
644 			= hlist_entry(p, struct net_device, index_hlist);
645 		if (dev->ifindex == ifindex)
646 			return dev;
647 	}
648 	return NULL;
649 }
650 EXPORT_SYMBOL(__dev_get_by_index);
651 
652 
653 /**
654  *	dev_get_by_index - find a device by its ifindex
655  *	@net: the applicable net namespace
656  *	@ifindex: index of device
657  *
658  *	Search for an interface by index. Returns NULL if the device
659  *	is not found or a pointer to the device. The device returned has
660  *	had a reference added and the pointer is safe until the user calls
661  *	dev_put to indicate they have finished with it.
662  */
663 
664 struct net_device *dev_get_by_index(struct net *net, int ifindex)
665 {
666 	struct net_device *dev;
667 
668 	read_lock(&dev_base_lock);
669 	dev = __dev_get_by_index(net, ifindex);
670 	if (dev)
671 		dev_hold(dev);
672 	read_unlock(&dev_base_lock);
673 	return dev;
674 }
675 EXPORT_SYMBOL(dev_get_by_index);
676 
677 /**
678  *	dev_getbyhwaddr - find a device by its hardware address
679  *	@net: the applicable net namespace
680  *	@type: media type of device
681  *	@ha: hardware address
682  *
683  *	Search for an interface by MAC address. Returns NULL if the device
684  *	is not found or a pointer to the device. The caller must hold the
685  *	rtnl semaphore. The returned device has not had its ref count increased
686  *	and the caller must therefore be careful about locking
687  *
688  *	BUGS:
689  *	If the API was consistent this would be __dev_get_by_hwaddr
690  */
691 
692 struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
693 {
694 	struct net_device *dev;
695 
696 	ASSERT_RTNL();
697 
698 	for_each_netdev(net, dev)
699 		if (dev->type == type &&
700 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
701 			return dev;
702 
703 	return NULL;
704 }
705 EXPORT_SYMBOL(dev_getbyhwaddr);
706 
707 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
708 {
709 	struct net_device *dev;
710 
711 	ASSERT_RTNL();
712 	for_each_netdev(net, dev)
713 		if (dev->type == type)
714 			return dev;
715 
716 	return NULL;
717 }
718 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
719 
720 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
721 {
722 	struct net_device *dev;
723 
724 	rtnl_lock();
725 	dev = __dev_getfirstbyhwtype(net, type);
726 	if (dev)
727 		dev_hold(dev);
728 	rtnl_unlock();
729 	return dev;
730 }
731 EXPORT_SYMBOL(dev_getfirstbyhwtype);
732 
733 /**
734  *	dev_get_by_flags - find any device with given flags
735  *	@net: the applicable net namespace
736  *	@if_flags: IFF_* values
737  *	@mask: bitmask of bits in if_flags to check
738  *
739  *	Search for any interface with the given flags. Returns NULL if a device
740  *	is not found or a pointer to the device. The device returned has
741  *	had a reference added and the pointer is safe until the user calls
742  *	dev_put to indicate they have finished with it.
743  */
744 
745 struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags,
746 				    unsigned short mask)
747 {
748 	struct net_device *dev, *ret;
749 
750 	ret = NULL;
751 	read_lock(&dev_base_lock);
752 	for_each_netdev(net, dev) {
753 		if (((dev->flags ^ if_flags) & mask) == 0) {
754 			dev_hold(dev);
755 			ret = dev;
756 			break;
757 		}
758 	}
759 	read_unlock(&dev_base_lock);
760 	return ret;
761 }
762 EXPORT_SYMBOL(dev_get_by_flags);
763 
764 /**
765  *	dev_valid_name - check if name is okay for network device
766  *	@name: name string
767  *
768  *	Network device names need to be valid file names to
769  *	to allow sysfs to work.  We also disallow any kind of
770  *	whitespace.
771  */
772 int dev_valid_name(const char *name)
773 {
774 	if (*name == '\0')
775 		return 0;
776 	if (strlen(name) >= IFNAMSIZ)
777 		return 0;
778 	if (!strcmp(name, ".") || !strcmp(name, ".."))
779 		return 0;
780 
781 	while (*name) {
782 		if (*name == '/' || isspace(*name))
783 			return 0;
784 		name++;
785 	}
786 	return 1;
787 }
788 EXPORT_SYMBOL(dev_valid_name);
789 
790 /**
791  *	__dev_alloc_name - allocate a name for a device
792  *	@net: network namespace to allocate the device name in
793  *	@name: name format string
794  *	@buf:  scratch buffer and result name string
795  *
796  *	Passed a format string - eg "lt%d" it will try and find a suitable
797  *	id. It scans list of devices to build up a free map, then chooses
798  *	the first empty slot. The caller must hold the dev_base or rtnl lock
799  *	while allocating the name and adding the device in order to avoid
800  *	duplicates.
801  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
802  *	Returns the number of the unit assigned or a negative errno code.
803  */
804 
805 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
806 {
807 	int i = 0;
808 	const char *p;
809 	const int max_netdevices = 8*PAGE_SIZE;
810 	unsigned long *inuse;
811 	struct net_device *d;
812 
813 	p = strnchr(name, IFNAMSIZ-1, '%');
814 	if (p) {
815 		/*
816 		 * Verify the string as this thing may have come from
817 		 * the user.  There must be either one "%d" and no other "%"
818 		 * characters.
819 		 */
820 		if (p[1] != 'd' || strchr(p + 2, '%'))
821 			return -EINVAL;
822 
823 		/* Use one page as a bit array of possible slots */
824 		inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
825 		if (!inuse)
826 			return -ENOMEM;
827 
828 		for_each_netdev(net, d) {
829 			if (!sscanf(d->name, name, &i))
830 				continue;
831 			if (i < 0 || i >= max_netdevices)
832 				continue;
833 
834 			/*  avoid cases where sscanf is not exact inverse of printf */
835 			snprintf(buf, IFNAMSIZ, name, i);
836 			if (!strncmp(buf, d->name, IFNAMSIZ))
837 				set_bit(i, inuse);
838 		}
839 
840 		i = find_first_zero_bit(inuse, max_netdevices);
841 		free_page((unsigned long) inuse);
842 	}
843 
844 	snprintf(buf, IFNAMSIZ, name, i);
845 	if (!__dev_get_by_name(net, buf))
846 		return i;
847 
848 	/* It is possible to run out of possible slots
849 	 * when the name is long and there isn't enough space left
850 	 * for the digits, or if all bits are used.
851 	 */
852 	return -ENFILE;
853 }
854 
855 /**
856  *	dev_alloc_name - allocate a name for a device
857  *	@dev: device
858  *	@name: name format string
859  *
860  *	Passed a format string - eg "lt%d" it will try and find a suitable
861  *	id. It scans list of devices to build up a free map, then chooses
862  *	the first empty slot. The caller must hold the dev_base or rtnl lock
863  *	while allocating the name and adding the device in order to avoid
864  *	duplicates.
865  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
866  *	Returns the number of the unit assigned or a negative errno code.
867  */
868 
869 int dev_alloc_name(struct net_device *dev, const char *name)
870 {
871 	char buf[IFNAMSIZ];
872 	struct net *net;
873 	int ret;
874 
875 	BUG_ON(!dev_net(dev));
876 	net = dev_net(dev);
877 	ret = __dev_alloc_name(net, name, buf);
878 	if (ret >= 0)
879 		strlcpy(dev->name, buf, IFNAMSIZ);
880 	return ret;
881 }
882 EXPORT_SYMBOL(dev_alloc_name);
883 
884 
885 /**
886  *	dev_change_name - change name of a device
887  *	@dev: device
888  *	@newname: name (or format string) must be at least IFNAMSIZ
889  *
890  *	Change name of a device, can pass format strings "eth%d".
891  *	for wildcarding.
892  */
893 int dev_change_name(struct net_device *dev, const char *newname)
894 {
895 	char oldname[IFNAMSIZ];
896 	int err = 0;
897 	int ret;
898 	struct net *net;
899 
900 	ASSERT_RTNL();
901 	BUG_ON(!dev_net(dev));
902 
903 	net = dev_net(dev);
904 	if (dev->flags & IFF_UP)
905 		return -EBUSY;
906 
907 	if (!dev_valid_name(newname))
908 		return -EINVAL;
909 
910 	if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
911 		return 0;
912 
913 	memcpy(oldname, dev->name, IFNAMSIZ);
914 
915 	if (strchr(newname, '%')) {
916 		err = dev_alloc_name(dev, newname);
917 		if (err < 0)
918 			return err;
919 	} else if (__dev_get_by_name(net, newname))
920 		return -EEXIST;
921 	else
922 		strlcpy(dev->name, newname, IFNAMSIZ);
923 
924 rollback:
925 	/* For now only devices in the initial network namespace
926 	 * are in sysfs.
927 	 */
928 	if (net == &init_net) {
929 		ret = device_rename(&dev->dev, dev->name);
930 		if (ret) {
931 			memcpy(dev->name, oldname, IFNAMSIZ);
932 			return ret;
933 		}
934 	}
935 
936 	write_lock_bh(&dev_base_lock);
937 	hlist_del(&dev->name_hlist);
938 	hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
939 	write_unlock_bh(&dev_base_lock);
940 
941 	ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
942 	ret = notifier_to_errno(ret);
943 
944 	if (ret) {
945 		/* err >= 0 after dev_alloc_name() or stores the first errno */
946 		if (err >= 0) {
947 			err = ret;
948 			memcpy(dev->name, oldname, IFNAMSIZ);
949 			goto rollback;
950 		} else {
951 			printk(KERN_ERR
952 			       "%s: name change rollback failed: %d.\n",
953 			       dev->name, ret);
954 		}
955 	}
956 
957 	return err;
958 }
959 
960 /**
961  *	dev_set_alias - change ifalias of a device
962  *	@dev: device
963  *	@alias: name up to IFALIASZ
964  *	@len: limit of bytes to copy from info
965  *
966  *	Set ifalias for a device,
967  */
968 int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
969 {
970 	ASSERT_RTNL();
971 
972 	if (len >= IFALIASZ)
973 		return -EINVAL;
974 
975 	if (!len) {
976 		if (dev->ifalias) {
977 			kfree(dev->ifalias);
978 			dev->ifalias = NULL;
979 		}
980 		return 0;
981 	}
982 
983 	dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
984 	if (!dev->ifalias)
985 		return -ENOMEM;
986 
987 	strlcpy(dev->ifalias, alias, len+1);
988 	return len;
989 }
990 
991 
992 /**
993  *	netdev_features_change - device changes features
994  *	@dev: device to cause notification
995  *
996  *	Called to indicate a device has changed features.
997  */
998 void netdev_features_change(struct net_device *dev)
999 {
1000 	call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
1001 }
1002 EXPORT_SYMBOL(netdev_features_change);
1003 
1004 /**
1005  *	netdev_state_change - device changes state
1006  *	@dev: device to cause notification
1007  *
1008  *	Called to indicate a device has changed state. This function calls
1009  *	the notifier chains for netdev_chain and sends a NEWLINK message
1010  *	to the routing socket.
1011  */
1012 void netdev_state_change(struct net_device *dev)
1013 {
1014 	if (dev->flags & IFF_UP) {
1015 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
1016 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1017 	}
1018 }
1019 EXPORT_SYMBOL(netdev_state_change);
1020 
1021 void netdev_bonding_change(struct net_device *dev, unsigned long event)
1022 {
1023 	call_netdevice_notifiers(event, dev);
1024 }
1025 EXPORT_SYMBOL(netdev_bonding_change);
1026 
1027 /**
1028  *	dev_load 	- load a network module
1029  *	@net: the applicable net namespace
1030  *	@name: name of interface
1031  *
1032  *	If a network interface is not present and the process has suitable
1033  *	privileges this function loads the module. If module loading is not
1034  *	available in this kernel then it becomes a nop.
1035  */
1036 
1037 void dev_load(struct net *net, const char *name)
1038 {
1039 	struct net_device *dev;
1040 
1041 	read_lock(&dev_base_lock);
1042 	dev = __dev_get_by_name(net, name);
1043 	read_unlock(&dev_base_lock);
1044 
1045 	if (!dev && capable(CAP_NET_ADMIN))
1046 		request_module("%s", name);
1047 }
1048 EXPORT_SYMBOL(dev_load);
1049 
1050 /**
1051  *	dev_open	- prepare an interface for use.
1052  *	@dev:	device to open
1053  *
1054  *	Takes a device from down to up state. The device's private open
1055  *	function is invoked and then the multicast lists are loaded. Finally
1056  *	the device is moved into the up state and a %NETDEV_UP message is
1057  *	sent to the netdev notifier chain.
1058  *
1059  *	Calling this function on an active interface is a nop. On a failure
1060  *	a negative errno code is returned.
1061  */
1062 int dev_open(struct net_device *dev)
1063 {
1064 	const struct net_device_ops *ops = dev->netdev_ops;
1065 	int ret;
1066 
1067 	ASSERT_RTNL();
1068 
1069 	/*
1070 	 *	Is it already up?
1071 	 */
1072 
1073 	if (dev->flags & IFF_UP)
1074 		return 0;
1075 
1076 	/*
1077 	 *	Is it even present?
1078 	 */
1079 	if (!netif_device_present(dev))
1080 		return -ENODEV;
1081 
1082 	ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1083 	ret = notifier_to_errno(ret);
1084 	if (ret)
1085 		return ret;
1086 
1087 	/*
1088 	 *	Call device private open method
1089 	 */
1090 	set_bit(__LINK_STATE_START, &dev->state);
1091 
1092 	if (ops->ndo_validate_addr)
1093 		ret = ops->ndo_validate_addr(dev);
1094 
1095 	if (!ret && ops->ndo_open)
1096 		ret = ops->ndo_open(dev);
1097 
1098 	/*
1099 	 *	If it went open OK then:
1100 	 */
1101 
1102 	if (ret)
1103 		clear_bit(__LINK_STATE_START, &dev->state);
1104 	else {
1105 		/*
1106 		 *	Set the flags.
1107 		 */
1108 		dev->flags |= IFF_UP;
1109 
1110 		/*
1111 		 *	Enable NET_DMA
1112 		 */
1113 		net_dmaengine_get();
1114 
1115 		/*
1116 		 *	Initialize multicasting status
1117 		 */
1118 		dev_set_rx_mode(dev);
1119 
1120 		/*
1121 		 *	Wakeup transmit queue engine
1122 		 */
1123 		dev_activate(dev);
1124 
1125 		/*
1126 		 *	... and announce new interface.
1127 		 */
1128 		call_netdevice_notifiers(NETDEV_UP, dev);
1129 	}
1130 
1131 	return ret;
1132 }
1133 EXPORT_SYMBOL(dev_open);
1134 
1135 /**
1136  *	dev_close - shutdown an interface.
1137  *	@dev: device to shutdown
1138  *
1139  *	This function moves an active device into down state. A
1140  *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1141  *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1142  *	chain.
1143  */
1144 int dev_close(struct net_device *dev)
1145 {
1146 	const struct net_device_ops *ops = dev->netdev_ops;
1147 	ASSERT_RTNL();
1148 
1149 	might_sleep();
1150 
1151 	if (!(dev->flags & IFF_UP))
1152 		return 0;
1153 
1154 	/*
1155 	 *	Tell people we are going down, so that they can
1156 	 *	prepare to death, when device is still operating.
1157 	 */
1158 	call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1159 
1160 	clear_bit(__LINK_STATE_START, &dev->state);
1161 
1162 	/* Synchronize to scheduled poll. We cannot touch poll list,
1163 	 * it can be even on different cpu. So just clear netif_running().
1164 	 *
1165 	 * dev->stop() will invoke napi_disable() on all of it's
1166 	 * napi_struct instances on this device.
1167 	 */
1168 	smp_mb__after_clear_bit(); /* Commit netif_running(). */
1169 
1170 	dev_deactivate(dev);
1171 
1172 	/*
1173 	 *	Call the device specific close. This cannot fail.
1174 	 *	Only if device is UP
1175 	 *
1176 	 *	We allow it to be called even after a DETACH hot-plug
1177 	 *	event.
1178 	 */
1179 	if (ops->ndo_stop)
1180 		ops->ndo_stop(dev);
1181 
1182 	/*
1183 	 *	Device is now down.
1184 	 */
1185 
1186 	dev->flags &= ~IFF_UP;
1187 
1188 	/*
1189 	 * Tell people we are down
1190 	 */
1191 	call_netdevice_notifiers(NETDEV_DOWN, dev);
1192 
1193 	/*
1194 	 *	Shutdown NET_DMA
1195 	 */
1196 	net_dmaengine_put();
1197 
1198 	return 0;
1199 }
1200 EXPORT_SYMBOL(dev_close);
1201 
1202 
1203 /**
1204  *	dev_disable_lro - disable Large Receive Offload on a device
1205  *	@dev: device
1206  *
1207  *	Disable Large Receive Offload (LRO) on a net device.  Must be
1208  *	called under RTNL.  This is needed if received packets may be
1209  *	forwarded to another interface.
1210  */
1211 void dev_disable_lro(struct net_device *dev)
1212 {
1213 	if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1214 	    dev->ethtool_ops->set_flags) {
1215 		u32 flags = dev->ethtool_ops->get_flags(dev);
1216 		if (flags & ETH_FLAG_LRO) {
1217 			flags &= ~ETH_FLAG_LRO;
1218 			dev->ethtool_ops->set_flags(dev, flags);
1219 		}
1220 	}
1221 	WARN_ON(dev->features & NETIF_F_LRO);
1222 }
1223 EXPORT_SYMBOL(dev_disable_lro);
1224 
1225 
1226 static int dev_boot_phase = 1;
1227 
1228 /*
1229  *	Device change register/unregister. These are not inline or static
1230  *	as we export them to the world.
1231  */
1232 
1233 /**
1234  *	register_netdevice_notifier - register a network notifier block
1235  *	@nb: notifier
1236  *
1237  *	Register a notifier to be called when network device events occur.
1238  *	The notifier passed is linked into the kernel structures and must
1239  *	not be reused until it has been unregistered. A negative errno code
1240  *	is returned on a failure.
1241  *
1242  * 	When registered all registration and up events are replayed
1243  *	to the new notifier to allow device to have a race free
1244  *	view of the network device list.
1245  */
1246 
1247 int register_netdevice_notifier(struct notifier_block *nb)
1248 {
1249 	struct net_device *dev;
1250 	struct net_device *last;
1251 	struct net *net;
1252 	int err;
1253 
1254 	rtnl_lock();
1255 	err = raw_notifier_chain_register(&netdev_chain, nb);
1256 	if (err)
1257 		goto unlock;
1258 	if (dev_boot_phase)
1259 		goto unlock;
1260 	for_each_net(net) {
1261 		for_each_netdev(net, dev) {
1262 			err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1263 			err = notifier_to_errno(err);
1264 			if (err)
1265 				goto rollback;
1266 
1267 			if (!(dev->flags & IFF_UP))
1268 				continue;
1269 
1270 			nb->notifier_call(nb, NETDEV_UP, dev);
1271 		}
1272 	}
1273 
1274 unlock:
1275 	rtnl_unlock();
1276 	return err;
1277 
1278 rollback:
1279 	last = dev;
1280 	for_each_net(net) {
1281 		for_each_netdev(net, dev) {
1282 			if (dev == last)
1283 				break;
1284 
1285 			if (dev->flags & IFF_UP) {
1286 				nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1287 				nb->notifier_call(nb, NETDEV_DOWN, dev);
1288 			}
1289 			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1290 		}
1291 	}
1292 
1293 	raw_notifier_chain_unregister(&netdev_chain, nb);
1294 	goto unlock;
1295 }
1296 EXPORT_SYMBOL(register_netdevice_notifier);
1297 
1298 /**
1299  *	unregister_netdevice_notifier - unregister a network notifier block
1300  *	@nb: notifier
1301  *
1302  *	Unregister a notifier previously registered by
1303  *	register_netdevice_notifier(). The notifier is unlinked into the
1304  *	kernel structures and may then be reused. A negative errno code
1305  *	is returned on a failure.
1306  */
1307 
1308 int unregister_netdevice_notifier(struct notifier_block *nb)
1309 {
1310 	int err;
1311 
1312 	rtnl_lock();
1313 	err = raw_notifier_chain_unregister(&netdev_chain, nb);
1314 	rtnl_unlock();
1315 	return err;
1316 }
1317 EXPORT_SYMBOL(unregister_netdevice_notifier);
1318 
1319 /**
1320  *	call_netdevice_notifiers - call all network notifier blocks
1321  *      @val: value passed unmodified to notifier function
1322  *      @dev: net_device pointer passed unmodified to notifier function
1323  *
1324  *	Call all network notifier blocks.  Parameters and return value
1325  *	are as for raw_notifier_call_chain().
1326  */
1327 
1328 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1329 {
1330 	return raw_notifier_call_chain(&netdev_chain, val, dev);
1331 }
1332 
1333 /* When > 0 there are consumers of rx skb time stamps */
1334 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1335 
1336 void net_enable_timestamp(void)
1337 {
1338 	atomic_inc(&netstamp_needed);
1339 }
1340 EXPORT_SYMBOL(net_enable_timestamp);
1341 
1342 void net_disable_timestamp(void)
1343 {
1344 	atomic_dec(&netstamp_needed);
1345 }
1346 EXPORT_SYMBOL(net_disable_timestamp);
1347 
1348 static inline void net_timestamp(struct sk_buff *skb)
1349 {
1350 	if (atomic_read(&netstamp_needed))
1351 		__net_timestamp(skb);
1352 	else
1353 		skb->tstamp.tv64 = 0;
1354 }
1355 
1356 /*
1357  *	Support routine. Sends outgoing frames to any network
1358  *	taps currently in use.
1359  */
1360 
1361 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1362 {
1363 	struct packet_type *ptype;
1364 
1365 #ifdef CONFIG_NET_CLS_ACT
1366 	if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
1367 		net_timestamp(skb);
1368 #else
1369 	net_timestamp(skb);
1370 #endif
1371 
1372 	rcu_read_lock();
1373 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1374 		/* Never send packets back to the socket
1375 		 * they originated from - MvS (miquels@drinkel.ow.org)
1376 		 */
1377 		if ((ptype->dev == dev || !ptype->dev) &&
1378 		    (ptype->af_packet_priv == NULL ||
1379 		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
1380 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1381 			if (!skb2)
1382 				break;
1383 
1384 			/* skb->nh should be correctly
1385 			   set by sender, so that the second statement is
1386 			   just protection against buggy protocols.
1387 			 */
1388 			skb_reset_mac_header(skb2);
1389 
1390 			if (skb_network_header(skb2) < skb2->data ||
1391 			    skb2->network_header > skb2->tail) {
1392 				if (net_ratelimit())
1393 					printk(KERN_CRIT "protocol %04x is "
1394 					       "buggy, dev %s\n",
1395 					       skb2->protocol, dev->name);
1396 				skb_reset_network_header(skb2);
1397 			}
1398 
1399 			skb2->transport_header = skb2->network_header;
1400 			skb2->pkt_type = PACKET_OUTGOING;
1401 			ptype->func(skb2, skb->dev, ptype, skb->dev);
1402 		}
1403 	}
1404 	rcu_read_unlock();
1405 }
1406 
1407 
1408 static inline void __netif_reschedule(struct Qdisc *q)
1409 {
1410 	struct softnet_data *sd;
1411 	unsigned long flags;
1412 
1413 	local_irq_save(flags);
1414 	sd = &__get_cpu_var(softnet_data);
1415 	q->next_sched = sd->output_queue;
1416 	sd->output_queue = q;
1417 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
1418 	local_irq_restore(flags);
1419 }
1420 
1421 void __netif_schedule(struct Qdisc *q)
1422 {
1423 	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1424 		__netif_reschedule(q);
1425 }
1426 EXPORT_SYMBOL(__netif_schedule);
1427 
1428 void dev_kfree_skb_irq(struct sk_buff *skb)
1429 {
1430 	if (atomic_dec_and_test(&skb->users)) {
1431 		struct softnet_data *sd;
1432 		unsigned long flags;
1433 
1434 		local_irq_save(flags);
1435 		sd = &__get_cpu_var(softnet_data);
1436 		skb->next = sd->completion_queue;
1437 		sd->completion_queue = skb;
1438 		raise_softirq_irqoff(NET_TX_SOFTIRQ);
1439 		local_irq_restore(flags);
1440 	}
1441 }
1442 EXPORT_SYMBOL(dev_kfree_skb_irq);
1443 
1444 void dev_kfree_skb_any(struct sk_buff *skb)
1445 {
1446 	if (in_irq() || irqs_disabled())
1447 		dev_kfree_skb_irq(skb);
1448 	else
1449 		dev_kfree_skb(skb);
1450 }
1451 EXPORT_SYMBOL(dev_kfree_skb_any);
1452 
1453 
1454 /**
1455  * netif_device_detach - mark device as removed
1456  * @dev: network device
1457  *
1458  * Mark device as removed from system and therefore no longer available.
1459  */
1460 void netif_device_detach(struct net_device *dev)
1461 {
1462 	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1463 	    netif_running(dev)) {
1464 		netif_tx_stop_all_queues(dev);
1465 	}
1466 }
1467 EXPORT_SYMBOL(netif_device_detach);
1468 
1469 /**
1470  * netif_device_attach - mark device as attached
1471  * @dev: network device
1472  *
1473  * Mark device as attached from system and restart if needed.
1474  */
1475 void netif_device_attach(struct net_device *dev)
1476 {
1477 	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1478 	    netif_running(dev)) {
1479 		netif_tx_wake_all_queues(dev);
1480 		__netdev_watchdog_up(dev);
1481 	}
1482 }
1483 EXPORT_SYMBOL(netif_device_attach);
1484 
1485 static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1486 {
1487 	return ((features & NETIF_F_GEN_CSUM) ||
1488 		((features & NETIF_F_IP_CSUM) &&
1489 		 protocol == htons(ETH_P_IP)) ||
1490 		((features & NETIF_F_IPV6_CSUM) &&
1491 		 protocol == htons(ETH_P_IPV6)) ||
1492 		((features & NETIF_F_FCOE_CRC) &&
1493 		 protocol == htons(ETH_P_FCOE)));
1494 }
1495 
1496 static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1497 {
1498 	if (can_checksum_protocol(dev->features, skb->protocol))
1499 		return true;
1500 
1501 	if (skb->protocol == htons(ETH_P_8021Q)) {
1502 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1503 		if (can_checksum_protocol(dev->features & dev->vlan_features,
1504 					  veh->h_vlan_encapsulated_proto))
1505 			return true;
1506 	}
1507 
1508 	return false;
1509 }
1510 
1511 /*
1512  * Invalidate hardware checksum when packet is to be mangled, and
1513  * complete checksum manually on outgoing path.
1514  */
1515 int skb_checksum_help(struct sk_buff *skb)
1516 {
1517 	__wsum csum;
1518 	int ret = 0, offset;
1519 
1520 	if (skb->ip_summed == CHECKSUM_COMPLETE)
1521 		goto out_set_summed;
1522 
1523 	if (unlikely(skb_shinfo(skb)->gso_size)) {
1524 		/* Let GSO fix up the checksum. */
1525 		goto out_set_summed;
1526 	}
1527 
1528 	offset = skb->csum_start - skb_headroom(skb);
1529 	BUG_ON(offset >= skb_headlen(skb));
1530 	csum = skb_checksum(skb, offset, skb->len - offset, 0);
1531 
1532 	offset += skb->csum_offset;
1533 	BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1534 
1535 	if (skb_cloned(skb) &&
1536 	    !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1537 		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1538 		if (ret)
1539 			goto out;
1540 	}
1541 
1542 	*(__sum16 *)(skb->data + offset) = csum_fold(csum);
1543 out_set_summed:
1544 	skb->ip_summed = CHECKSUM_NONE;
1545 out:
1546 	return ret;
1547 }
1548 EXPORT_SYMBOL(skb_checksum_help);
1549 
1550 /**
1551  *	skb_gso_segment - Perform segmentation on skb.
1552  *	@skb: buffer to segment
1553  *	@features: features for the output path (see dev->features)
1554  *
1555  *	This function segments the given skb and returns a list of segments.
1556  *
1557  *	It may return NULL if the skb requires no segmentation.  This is
1558  *	only possible when GSO is used for verifying header integrity.
1559  */
1560 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1561 {
1562 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1563 	struct packet_type *ptype;
1564 	__be16 type = skb->protocol;
1565 	int err;
1566 
1567 	skb_reset_mac_header(skb);
1568 	skb->mac_len = skb->network_header - skb->mac_header;
1569 	__skb_pull(skb, skb->mac_len);
1570 
1571 	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1572 		struct net_device *dev = skb->dev;
1573 		struct ethtool_drvinfo info = {};
1574 
1575 		if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
1576 			dev->ethtool_ops->get_drvinfo(dev, &info);
1577 
1578 		WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d "
1579 			"ip_summed=%d",
1580 		     info.driver, dev ? dev->features : 0L,
1581 		     skb->sk ? skb->sk->sk_route_caps : 0L,
1582 		     skb->len, skb->data_len, skb->ip_summed);
1583 
1584 		if (skb_header_cloned(skb) &&
1585 		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1586 			return ERR_PTR(err);
1587 	}
1588 
1589 	rcu_read_lock();
1590 	list_for_each_entry_rcu(ptype,
1591 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
1592 		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1593 			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1594 				err = ptype->gso_send_check(skb);
1595 				segs = ERR_PTR(err);
1596 				if (err || skb_gso_ok(skb, features))
1597 					break;
1598 				__skb_push(skb, (skb->data -
1599 						 skb_network_header(skb)));
1600 			}
1601 			segs = ptype->gso_segment(skb, features);
1602 			break;
1603 		}
1604 	}
1605 	rcu_read_unlock();
1606 
1607 	__skb_push(skb, skb->data - skb_mac_header(skb));
1608 
1609 	return segs;
1610 }
1611 EXPORT_SYMBOL(skb_gso_segment);
1612 
1613 /* Take action when hardware reception checksum errors are detected. */
1614 #ifdef CONFIG_BUG
1615 void netdev_rx_csum_fault(struct net_device *dev)
1616 {
1617 	if (net_ratelimit()) {
1618 		printk(KERN_ERR "%s: hw csum failure.\n",
1619 			dev ? dev->name : "<unknown>");
1620 		dump_stack();
1621 	}
1622 }
1623 EXPORT_SYMBOL(netdev_rx_csum_fault);
1624 #endif
1625 
1626 /* Actually, we should eliminate this check as soon as we know, that:
1627  * 1. IOMMU is present and allows to map all the memory.
1628  * 2. No high memory really exists on this machine.
1629  */
1630 
1631 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1632 {
1633 #ifdef CONFIG_HIGHMEM
1634 	int i;
1635 
1636 	if (dev->features & NETIF_F_HIGHDMA)
1637 		return 0;
1638 
1639 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1640 		if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1641 			return 1;
1642 
1643 #endif
1644 	return 0;
1645 }
1646 
1647 struct dev_gso_cb {
1648 	void (*destructor)(struct sk_buff *skb);
1649 };
1650 
1651 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1652 
1653 static void dev_gso_skb_destructor(struct sk_buff *skb)
1654 {
1655 	struct dev_gso_cb *cb;
1656 
1657 	do {
1658 		struct sk_buff *nskb = skb->next;
1659 
1660 		skb->next = nskb->next;
1661 		nskb->next = NULL;
1662 		kfree_skb(nskb);
1663 	} while (skb->next);
1664 
1665 	cb = DEV_GSO_CB(skb);
1666 	if (cb->destructor)
1667 		cb->destructor(skb);
1668 }
1669 
1670 /**
1671  *	dev_gso_segment - Perform emulated hardware segmentation on skb.
1672  *	@skb: buffer to segment
1673  *
1674  *	This function segments the given skb and stores the list of segments
1675  *	in skb->next.
1676  */
1677 static int dev_gso_segment(struct sk_buff *skb)
1678 {
1679 	struct net_device *dev = skb->dev;
1680 	struct sk_buff *segs;
1681 	int features = dev->features & ~(illegal_highdma(dev, skb) ?
1682 					 NETIF_F_SG : 0);
1683 
1684 	segs = skb_gso_segment(skb, features);
1685 
1686 	/* Verifying header integrity only. */
1687 	if (!segs)
1688 		return 0;
1689 
1690 	if (IS_ERR(segs))
1691 		return PTR_ERR(segs);
1692 
1693 	skb->next = segs;
1694 	DEV_GSO_CB(skb)->destructor = skb->destructor;
1695 	skb->destructor = dev_gso_skb_destructor;
1696 
1697 	return 0;
1698 }
1699 
1700 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1701 			struct netdev_queue *txq)
1702 {
1703 	const struct net_device_ops *ops = dev->netdev_ops;
1704 	int rc;
1705 
1706 	if (likely(!skb->next)) {
1707 		if (!list_empty(&ptype_all))
1708 			dev_queue_xmit_nit(skb, dev);
1709 
1710 		if (netif_needs_gso(dev, skb)) {
1711 			if (unlikely(dev_gso_segment(skb)))
1712 				goto out_kfree_skb;
1713 			if (skb->next)
1714 				goto gso;
1715 		}
1716 
1717 		/*
1718 		 * If device doesnt need skb->dst, release it right now while
1719 		 * its hot in this cpu cache
1720 		 */
1721 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
1722 			skb_dst_drop(skb);
1723 
1724 		rc = ops->ndo_start_xmit(skb, dev);
1725 		if (rc == NETDEV_TX_OK)
1726 			txq_trans_update(txq);
1727 		/*
1728 		 * TODO: if skb_orphan() was called by
1729 		 * dev->hard_start_xmit() (for example, the unmodified
1730 		 * igb driver does that; bnx2 doesn't), then
1731 		 * skb_tx_software_timestamp() will be unable to send
1732 		 * back the time stamp.
1733 		 *
1734 		 * How can this be prevented? Always create another
1735 		 * reference to the socket before calling
1736 		 * dev->hard_start_xmit()? Prevent that skb_orphan()
1737 		 * does anything in dev->hard_start_xmit() by clearing
1738 		 * the skb destructor before the call and restoring it
1739 		 * afterwards, then doing the skb_orphan() ourselves?
1740 		 */
1741 		return rc;
1742 	}
1743 
1744 gso:
1745 	do {
1746 		struct sk_buff *nskb = skb->next;
1747 
1748 		skb->next = nskb->next;
1749 		nskb->next = NULL;
1750 		rc = ops->ndo_start_xmit(nskb, dev);
1751 		if (unlikely(rc != NETDEV_TX_OK)) {
1752 			nskb->next = skb->next;
1753 			skb->next = nskb;
1754 			return rc;
1755 		}
1756 		txq_trans_update(txq);
1757 		if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
1758 			return NETDEV_TX_BUSY;
1759 	} while (skb->next);
1760 
1761 	skb->destructor = DEV_GSO_CB(skb)->destructor;
1762 
1763 out_kfree_skb:
1764 	kfree_skb(skb);
1765 	return NETDEV_TX_OK;
1766 }
1767 
1768 static u32 skb_tx_hashrnd;
1769 
1770 u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
1771 {
1772 	u32 hash;
1773 
1774 	if (skb_rx_queue_recorded(skb)) {
1775 		hash = skb_get_rx_queue(skb);
1776 		while (unlikely(hash >= dev->real_num_tx_queues))
1777 			hash -= dev->real_num_tx_queues;
1778 		return hash;
1779 	}
1780 
1781 	if (skb->sk && skb->sk->sk_hash)
1782 		hash = skb->sk->sk_hash;
1783 	else
1784 		hash = skb->protocol;
1785 
1786 	hash = jhash_1word(hash, skb_tx_hashrnd);
1787 
1788 	return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
1789 }
1790 EXPORT_SYMBOL(skb_tx_hash);
1791 
1792 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1793 					struct sk_buff *skb)
1794 {
1795 	const struct net_device_ops *ops = dev->netdev_ops;
1796 	u16 queue_index = 0;
1797 
1798 	if (ops->ndo_select_queue)
1799 		queue_index = ops->ndo_select_queue(dev, skb);
1800 	else if (dev->real_num_tx_queues > 1)
1801 		queue_index = skb_tx_hash(dev, skb);
1802 
1803 	skb_set_queue_mapping(skb, queue_index);
1804 	return netdev_get_tx_queue(dev, queue_index);
1805 }
1806 
1807 static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
1808 				 struct net_device *dev,
1809 				 struct netdev_queue *txq)
1810 {
1811 	spinlock_t *root_lock = qdisc_lock(q);
1812 	int rc;
1813 
1814 	spin_lock(root_lock);
1815 	if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
1816 		kfree_skb(skb);
1817 		rc = NET_XMIT_DROP;
1818 	} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
1819 		   !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) {
1820 		/*
1821 		 * This is a work-conserving queue; there are no old skbs
1822 		 * waiting to be sent out; and the qdisc is not running -
1823 		 * xmit the skb directly.
1824 		 */
1825 		__qdisc_update_bstats(q, skb->len);
1826 		if (sch_direct_xmit(skb, q, dev, txq, root_lock))
1827 			__qdisc_run(q);
1828 		else
1829 			clear_bit(__QDISC_STATE_RUNNING, &q->state);
1830 
1831 		rc = NET_XMIT_SUCCESS;
1832 	} else {
1833 		rc = qdisc_enqueue_root(skb, q);
1834 		qdisc_run(q);
1835 	}
1836 	spin_unlock(root_lock);
1837 
1838 	return rc;
1839 }
1840 
1841 /**
1842  *	dev_queue_xmit - transmit a buffer
1843  *	@skb: buffer to transmit
1844  *
1845  *	Queue a buffer for transmission to a network device. The caller must
1846  *	have set the device and priority and built the buffer before calling
1847  *	this function. The function can be called from an interrupt.
1848  *
1849  *	A negative errno code is returned on a failure. A success does not
1850  *	guarantee the frame will be transmitted as it may be dropped due
1851  *	to congestion or traffic shaping.
1852  *
1853  * -----------------------------------------------------------------------------------
1854  *      I notice this method can also return errors from the queue disciplines,
1855  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1856  *      be positive.
1857  *
1858  *      Regardless of the return value, the skb is consumed, so it is currently
1859  *      difficult to retry a send to this method.  (You can bump the ref count
1860  *      before sending to hold a reference for retry if you are careful.)
1861  *
1862  *      When calling this method, interrupts MUST be enabled.  This is because
1863  *      the BH enable code must have IRQs enabled so that it will not deadlock.
1864  *          --BLG
1865  */
1866 int dev_queue_xmit(struct sk_buff *skb)
1867 {
1868 	struct net_device *dev = skb->dev;
1869 	struct netdev_queue *txq;
1870 	struct Qdisc *q;
1871 	int rc = -ENOMEM;
1872 
1873 	/* GSO will handle the following emulations directly. */
1874 	if (netif_needs_gso(dev, skb))
1875 		goto gso;
1876 
1877 	if (skb_has_frags(skb) &&
1878 	    !(dev->features & NETIF_F_FRAGLIST) &&
1879 	    __skb_linearize(skb))
1880 		goto out_kfree_skb;
1881 
1882 	/* Fragmented skb is linearized if device does not support SG,
1883 	 * or if at least one of fragments is in highmem and device
1884 	 * does not support DMA from it.
1885 	 */
1886 	if (skb_shinfo(skb)->nr_frags &&
1887 	    (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1888 	    __skb_linearize(skb))
1889 		goto out_kfree_skb;
1890 
1891 	/* If packet is not checksummed and device does not support
1892 	 * checksumming for this protocol, complete checksumming here.
1893 	 */
1894 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
1895 		skb_set_transport_header(skb, skb->csum_start -
1896 					      skb_headroom(skb));
1897 		if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
1898 			goto out_kfree_skb;
1899 	}
1900 
1901 gso:
1902 	/* Disable soft irqs for various locks below. Also
1903 	 * stops preemption for RCU.
1904 	 */
1905 	rcu_read_lock_bh();
1906 
1907 	txq = dev_pick_tx(dev, skb);
1908 	q = rcu_dereference(txq->qdisc);
1909 
1910 #ifdef CONFIG_NET_CLS_ACT
1911 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
1912 #endif
1913 	if (q->enqueue) {
1914 		rc = __dev_xmit_skb(skb, q, dev, txq);
1915 		goto out;
1916 	}
1917 
1918 	/* The device has no queue. Common case for software devices:
1919 	   loopback, all the sorts of tunnels...
1920 
1921 	   Really, it is unlikely that netif_tx_lock protection is necessary
1922 	   here.  (f.e. loopback and IP tunnels are clean ignoring statistics
1923 	   counters.)
1924 	   However, it is possible, that they rely on protection
1925 	   made by us here.
1926 
1927 	   Check this and shot the lock. It is not prone from deadlocks.
1928 	   Either shot noqueue qdisc, it is even simpler 8)
1929 	 */
1930 	if (dev->flags & IFF_UP) {
1931 		int cpu = smp_processor_id(); /* ok because BHs are off */
1932 
1933 		if (txq->xmit_lock_owner != cpu) {
1934 
1935 			HARD_TX_LOCK(dev, txq, cpu);
1936 
1937 			if (!netif_tx_queue_stopped(txq)) {
1938 				rc = NET_XMIT_SUCCESS;
1939 				if (!dev_hard_start_xmit(skb, dev, txq)) {
1940 					HARD_TX_UNLOCK(dev, txq);
1941 					goto out;
1942 				}
1943 			}
1944 			HARD_TX_UNLOCK(dev, txq);
1945 			if (net_ratelimit())
1946 				printk(KERN_CRIT "Virtual device %s asks to "
1947 				       "queue packet!\n", dev->name);
1948 		} else {
1949 			/* Recursion is detected! It is possible,
1950 			 * unfortunately */
1951 			if (net_ratelimit())
1952 				printk(KERN_CRIT "Dead loop on virtual device "
1953 				       "%s, fix it urgently!\n", dev->name);
1954 		}
1955 	}
1956 
1957 	rc = -ENETDOWN;
1958 	rcu_read_unlock_bh();
1959 
1960 out_kfree_skb:
1961 	kfree_skb(skb);
1962 	return rc;
1963 out:
1964 	rcu_read_unlock_bh();
1965 	return rc;
1966 }
1967 EXPORT_SYMBOL(dev_queue_xmit);
1968 
1969 
1970 /*=======================================================================
1971 			Receiver routines
1972   =======================================================================*/
1973 
1974 int netdev_max_backlog __read_mostly = 1000;
1975 int netdev_budget __read_mostly = 300;
1976 int weight_p __read_mostly = 64;            /* old backlog weight */
1977 
1978 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1979 
1980 
1981 /**
1982  *	netif_rx	-	post buffer to the network code
1983  *	@skb: buffer to post
1984  *
1985  *	This function receives a packet from a device driver and queues it for
1986  *	the upper (protocol) levels to process.  It always succeeds. The buffer
1987  *	may be dropped during processing for congestion control or by the
1988  *	protocol layers.
1989  *
1990  *	return values:
1991  *	NET_RX_SUCCESS	(no congestion)
1992  *	NET_RX_DROP     (packet was dropped)
1993  *
1994  */
1995 
1996 int netif_rx(struct sk_buff *skb)
1997 {
1998 	struct softnet_data *queue;
1999 	unsigned long flags;
2000 
2001 	/* if netpoll wants it, pretend we never saw it */
2002 	if (netpoll_rx(skb))
2003 		return NET_RX_DROP;
2004 
2005 	if (!skb->tstamp.tv64)
2006 		net_timestamp(skb);
2007 
2008 	/*
2009 	 * The code is rearranged so that the path is the most
2010 	 * short when CPU is congested, but is still operating.
2011 	 */
2012 	local_irq_save(flags);
2013 	queue = &__get_cpu_var(softnet_data);
2014 
2015 	__get_cpu_var(netdev_rx_stat).total++;
2016 	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
2017 		if (queue->input_pkt_queue.qlen) {
2018 enqueue:
2019 			__skb_queue_tail(&queue->input_pkt_queue, skb);
2020 			local_irq_restore(flags);
2021 			return NET_RX_SUCCESS;
2022 		}
2023 
2024 		napi_schedule(&queue->backlog);
2025 		goto enqueue;
2026 	}
2027 
2028 	__get_cpu_var(netdev_rx_stat).dropped++;
2029 	local_irq_restore(flags);
2030 
2031 	kfree_skb(skb);
2032 	return NET_RX_DROP;
2033 }
2034 EXPORT_SYMBOL(netif_rx);
2035 
2036 int netif_rx_ni(struct sk_buff *skb)
2037 {
2038 	int err;
2039 
2040 	preempt_disable();
2041 	err = netif_rx(skb);
2042 	if (local_softirq_pending())
2043 		do_softirq();
2044 	preempt_enable();
2045 
2046 	return err;
2047 }
2048 EXPORT_SYMBOL(netif_rx_ni);
2049 
2050 static void net_tx_action(struct softirq_action *h)
2051 {
2052 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
2053 
2054 	if (sd->completion_queue) {
2055 		struct sk_buff *clist;
2056 
2057 		local_irq_disable();
2058 		clist = sd->completion_queue;
2059 		sd->completion_queue = NULL;
2060 		local_irq_enable();
2061 
2062 		while (clist) {
2063 			struct sk_buff *skb = clist;
2064 			clist = clist->next;
2065 
2066 			WARN_ON(atomic_read(&skb->users));
2067 			__kfree_skb(skb);
2068 		}
2069 	}
2070 
2071 	if (sd->output_queue) {
2072 		struct Qdisc *head;
2073 
2074 		local_irq_disable();
2075 		head = sd->output_queue;
2076 		sd->output_queue = NULL;
2077 		local_irq_enable();
2078 
2079 		while (head) {
2080 			struct Qdisc *q = head;
2081 			spinlock_t *root_lock;
2082 
2083 			head = head->next_sched;
2084 
2085 			root_lock = qdisc_lock(q);
2086 			if (spin_trylock(root_lock)) {
2087 				smp_mb__before_clear_bit();
2088 				clear_bit(__QDISC_STATE_SCHED,
2089 					  &q->state);
2090 				qdisc_run(q);
2091 				spin_unlock(root_lock);
2092 			} else {
2093 				if (!test_bit(__QDISC_STATE_DEACTIVATED,
2094 					      &q->state)) {
2095 					__netif_reschedule(q);
2096 				} else {
2097 					smp_mb__before_clear_bit();
2098 					clear_bit(__QDISC_STATE_SCHED,
2099 						  &q->state);
2100 				}
2101 			}
2102 		}
2103 	}
2104 }
2105 
2106 static inline int deliver_skb(struct sk_buff *skb,
2107 			      struct packet_type *pt_prev,
2108 			      struct net_device *orig_dev)
2109 {
2110 	atomic_inc(&skb->users);
2111 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2112 }
2113 
2114 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
2115 
2116 #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
2117 /* This hook is defined here for ATM LANE */
2118 int (*br_fdb_test_addr_hook)(struct net_device *dev,
2119 			     unsigned char *addr) __read_mostly;
2120 EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
2121 #endif
2122 
2123 /*
2124  * If bridge module is loaded call bridging hook.
2125  *  returns NULL if packet was consumed.
2126  */
2127 struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2128 					struct sk_buff *skb) __read_mostly;
2129 EXPORT_SYMBOL_GPL(br_handle_frame_hook);
2130 
2131 static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2132 					    struct packet_type **pt_prev, int *ret,
2133 					    struct net_device *orig_dev)
2134 {
2135 	struct net_bridge_port *port;
2136 
2137 	if (skb->pkt_type == PACKET_LOOPBACK ||
2138 	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
2139 		return skb;
2140 
2141 	if (*pt_prev) {
2142 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2143 		*pt_prev = NULL;
2144 	}
2145 
2146 	return br_handle_frame_hook(port, skb);
2147 }
2148 #else
2149 #define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
2150 #endif
2151 
2152 #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2153 struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2154 EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2155 
2156 static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2157 					     struct packet_type **pt_prev,
2158 					     int *ret,
2159 					     struct net_device *orig_dev)
2160 {
2161 	if (skb->dev->macvlan_port == NULL)
2162 		return skb;
2163 
2164 	if (*pt_prev) {
2165 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2166 		*pt_prev = NULL;
2167 	}
2168 	return macvlan_handle_frame_hook(skb);
2169 }
2170 #else
2171 #define handle_macvlan(skb, pt_prev, ret, orig_dev)	(skb)
2172 #endif
2173 
2174 #ifdef CONFIG_NET_CLS_ACT
2175 /* TODO: Maybe we should just force sch_ingress to be compiled in
2176  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2177  * a compare and 2 stores extra right now if we dont have it on
2178  * but have CONFIG_NET_CLS_ACT
2179  * NOTE: This doesnt stop any functionality; if you dont have
2180  * the ingress scheduler, you just cant add policies on ingress.
2181  *
2182  */
2183 static int ing_filter(struct sk_buff *skb)
2184 {
2185 	struct net_device *dev = skb->dev;
2186 	u32 ttl = G_TC_RTTL(skb->tc_verd);
2187 	struct netdev_queue *rxq;
2188 	int result = TC_ACT_OK;
2189 	struct Qdisc *q;
2190 
2191 	if (MAX_RED_LOOP < ttl++) {
2192 		printk(KERN_WARNING
2193 		       "Redir loop detected Dropping packet (%d->%d)\n",
2194 		       skb->iif, dev->ifindex);
2195 		return TC_ACT_SHOT;
2196 	}
2197 
2198 	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2199 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2200 
2201 	rxq = &dev->rx_queue;
2202 
2203 	q = rxq->qdisc;
2204 	if (q != &noop_qdisc) {
2205 		spin_lock(qdisc_lock(q));
2206 		if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2207 			result = qdisc_enqueue_root(skb, q);
2208 		spin_unlock(qdisc_lock(q));
2209 	}
2210 
2211 	return result;
2212 }
2213 
2214 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2215 					 struct packet_type **pt_prev,
2216 					 int *ret, struct net_device *orig_dev)
2217 {
2218 	if (skb->dev->rx_queue.qdisc == &noop_qdisc)
2219 		goto out;
2220 
2221 	if (*pt_prev) {
2222 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2223 		*pt_prev = NULL;
2224 	} else {
2225 		/* Huh? Why does turning on AF_PACKET affect this? */
2226 		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2227 	}
2228 
2229 	switch (ing_filter(skb)) {
2230 	case TC_ACT_SHOT:
2231 	case TC_ACT_STOLEN:
2232 		kfree_skb(skb);
2233 		return NULL;
2234 	}
2235 
2236 out:
2237 	skb->tc_verd = 0;
2238 	return skb;
2239 }
2240 #endif
2241 
2242 /*
2243  * 	netif_nit_deliver - deliver received packets to network taps
2244  * 	@skb: buffer
2245  *
2246  * 	This function is used to deliver incoming packets to network
2247  * 	taps. It should be used when the normal netif_receive_skb path
2248  * 	is bypassed, for example because of VLAN acceleration.
2249  */
2250 void netif_nit_deliver(struct sk_buff *skb)
2251 {
2252 	struct packet_type *ptype;
2253 
2254 	if (list_empty(&ptype_all))
2255 		return;
2256 
2257 	skb_reset_network_header(skb);
2258 	skb_reset_transport_header(skb);
2259 	skb->mac_len = skb->network_header - skb->mac_header;
2260 
2261 	rcu_read_lock();
2262 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
2263 		if (!ptype->dev || ptype->dev == skb->dev)
2264 			deliver_skb(skb, ptype, skb->dev);
2265 	}
2266 	rcu_read_unlock();
2267 }
2268 
2269 /**
2270  *	netif_receive_skb - process receive buffer from network
2271  *	@skb: buffer to process
2272  *
2273  *	netif_receive_skb() is the main receive data processing function.
2274  *	It always succeeds. The buffer may be dropped during processing
2275  *	for congestion control or by the protocol layers.
2276  *
2277  *	This function may only be called from softirq context and interrupts
2278  *	should be enabled.
2279  *
2280  *	Return values (usually ignored):
2281  *	NET_RX_SUCCESS: no congestion
2282  *	NET_RX_DROP: packet was dropped
2283  */
2284 int netif_receive_skb(struct sk_buff *skb)
2285 {
2286 	struct packet_type *ptype, *pt_prev;
2287 	struct net_device *orig_dev;
2288 	struct net_device *null_or_orig;
2289 	int ret = NET_RX_DROP;
2290 	__be16 type;
2291 
2292 	if (!skb->tstamp.tv64)
2293 		net_timestamp(skb);
2294 
2295 	if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
2296 		return NET_RX_SUCCESS;
2297 
2298 	/* if we've gotten here through NAPI, check netpoll */
2299 	if (netpoll_receive_skb(skb))
2300 		return NET_RX_DROP;
2301 
2302 	if (!skb->iif)
2303 		skb->iif = skb->dev->ifindex;
2304 
2305 	null_or_orig = NULL;
2306 	orig_dev = skb->dev;
2307 	if (orig_dev->master) {
2308 		if (skb_bond_should_drop(skb))
2309 			null_or_orig = orig_dev; /* deliver only exact match */
2310 		else
2311 			skb->dev = orig_dev->master;
2312 	}
2313 
2314 	__get_cpu_var(netdev_rx_stat).total++;
2315 
2316 	skb_reset_network_header(skb);
2317 	skb_reset_transport_header(skb);
2318 	skb->mac_len = skb->network_header - skb->mac_header;
2319 
2320 	pt_prev = NULL;
2321 
2322 	rcu_read_lock();
2323 
2324 #ifdef CONFIG_NET_CLS_ACT
2325 	if (skb->tc_verd & TC_NCLS) {
2326 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2327 		goto ncls;
2328 	}
2329 #endif
2330 
2331 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
2332 		if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2333 		    ptype->dev == orig_dev) {
2334 			if (pt_prev)
2335 				ret = deliver_skb(skb, pt_prev, orig_dev);
2336 			pt_prev = ptype;
2337 		}
2338 	}
2339 
2340 #ifdef CONFIG_NET_CLS_ACT
2341 	skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2342 	if (!skb)
2343 		goto out;
2344 ncls:
2345 #endif
2346 
2347 	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2348 	if (!skb)
2349 		goto out;
2350 	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2351 	if (!skb)
2352 		goto out;
2353 
2354 	type = skb->protocol;
2355 	list_for_each_entry_rcu(ptype,
2356 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2357 		if (ptype->type == type &&
2358 		    (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2359 		     ptype->dev == orig_dev)) {
2360 			if (pt_prev)
2361 				ret = deliver_skb(skb, pt_prev, orig_dev);
2362 			pt_prev = ptype;
2363 		}
2364 	}
2365 
2366 	if (pt_prev) {
2367 		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2368 	} else {
2369 		kfree_skb(skb);
2370 		/* Jamal, now you will not able to escape explaining
2371 		 * me how you were going to use this. :-)
2372 		 */
2373 		ret = NET_RX_DROP;
2374 	}
2375 
2376 out:
2377 	rcu_read_unlock();
2378 	return ret;
2379 }
2380 EXPORT_SYMBOL(netif_receive_skb);
2381 
2382 /* Network device is going away, flush any packets still pending  */
2383 static void flush_backlog(void *arg)
2384 {
2385 	struct net_device *dev = arg;
2386 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
2387 	struct sk_buff *skb, *tmp;
2388 
2389 	skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
2390 		if (skb->dev == dev) {
2391 			__skb_unlink(skb, &queue->input_pkt_queue);
2392 			kfree_skb(skb);
2393 		}
2394 }
2395 
2396 static int napi_gro_complete(struct sk_buff *skb)
2397 {
2398 	struct packet_type *ptype;
2399 	__be16 type = skb->protocol;
2400 	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2401 	int err = -ENOENT;
2402 
2403 	if (NAPI_GRO_CB(skb)->count == 1) {
2404 		skb_shinfo(skb)->gso_size = 0;
2405 		goto out;
2406 	}
2407 
2408 	rcu_read_lock();
2409 	list_for_each_entry_rcu(ptype, head, list) {
2410 		if (ptype->type != type || ptype->dev || !ptype->gro_complete)
2411 			continue;
2412 
2413 		err = ptype->gro_complete(skb);
2414 		break;
2415 	}
2416 	rcu_read_unlock();
2417 
2418 	if (err) {
2419 		WARN_ON(&ptype->list == head);
2420 		kfree_skb(skb);
2421 		return NET_RX_SUCCESS;
2422 	}
2423 
2424 out:
2425 	return netif_receive_skb(skb);
2426 }
2427 
2428 void napi_gro_flush(struct napi_struct *napi)
2429 {
2430 	struct sk_buff *skb, *next;
2431 
2432 	for (skb = napi->gro_list; skb; skb = next) {
2433 		next = skb->next;
2434 		skb->next = NULL;
2435 		napi_gro_complete(skb);
2436 	}
2437 
2438 	napi->gro_count = 0;
2439 	napi->gro_list = NULL;
2440 }
2441 EXPORT_SYMBOL(napi_gro_flush);
2442 
2443 int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2444 {
2445 	struct sk_buff **pp = NULL;
2446 	struct packet_type *ptype;
2447 	__be16 type = skb->protocol;
2448 	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2449 	int same_flow;
2450 	int mac_len;
2451 	int ret;
2452 
2453 	if (!(skb->dev->features & NETIF_F_GRO))
2454 		goto normal;
2455 
2456 	if (skb_is_gso(skb) || skb_has_frags(skb))
2457 		goto normal;
2458 
2459 	rcu_read_lock();
2460 	list_for_each_entry_rcu(ptype, head, list) {
2461 		if (ptype->type != type || ptype->dev || !ptype->gro_receive)
2462 			continue;
2463 
2464 		skb_set_network_header(skb, skb_gro_offset(skb));
2465 		mac_len = skb->network_header - skb->mac_header;
2466 		skb->mac_len = mac_len;
2467 		NAPI_GRO_CB(skb)->same_flow = 0;
2468 		NAPI_GRO_CB(skb)->flush = 0;
2469 		NAPI_GRO_CB(skb)->free = 0;
2470 
2471 		pp = ptype->gro_receive(&napi->gro_list, skb);
2472 		break;
2473 	}
2474 	rcu_read_unlock();
2475 
2476 	if (&ptype->list == head)
2477 		goto normal;
2478 
2479 	same_flow = NAPI_GRO_CB(skb)->same_flow;
2480 	ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
2481 
2482 	if (pp) {
2483 		struct sk_buff *nskb = *pp;
2484 
2485 		*pp = nskb->next;
2486 		nskb->next = NULL;
2487 		napi_gro_complete(nskb);
2488 		napi->gro_count--;
2489 	}
2490 
2491 	if (same_flow)
2492 		goto ok;
2493 
2494 	if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
2495 		goto normal;
2496 
2497 	napi->gro_count++;
2498 	NAPI_GRO_CB(skb)->count = 1;
2499 	skb_shinfo(skb)->gso_size = skb_gro_len(skb);
2500 	skb->next = napi->gro_list;
2501 	napi->gro_list = skb;
2502 	ret = GRO_HELD;
2503 
2504 pull:
2505 	if (skb_headlen(skb) < skb_gro_offset(skb)) {
2506 		int grow = skb_gro_offset(skb) - skb_headlen(skb);
2507 
2508 		BUG_ON(skb->end - skb->tail < grow);
2509 
2510 		memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
2511 
2512 		skb->tail += grow;
2513 		skb->data_len -= grow;
2514 
2515 		skb_shinfo(skb)->frags[0].page_offset += grow;
2516 		skb_shinfo(skb)->frags[0].size -= grow;
2517 
2518 		if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
2519 			put_page(skb_shinfo(skb)->frags[0].page);
2520 			memmove(skb_shinfo(skb)->frags,
2521 				skb_shinfo(skb)->frags + 1,
2522 				--skb_shinfo(skb)->nr_frags);
2523 		}
2524 	}
2525 
2526 ok:
2527 	return ret;
2528 
2529 normal:
2530 	ret = GRO_NORMAL;
2531 	goto pull;
2532 }
2533 EXPORT_SYMBOL(dev_gro_receive);
2534 
2535 static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2536 {
2537 	struct sk_buff *p;
2538 
2539 	if (netpoll_rx_on(skb))
2540 		return GRO_NORMAL;
2541 
2542 	for (p = napi->gro_list; p; p = p->next) {
2543 		NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev)
2544 			&& !compare_ether_header(skb_mac_header(p),
2545 						 skb_gro_mac_header(skb));
2546 		NAPI_GRO_CB(p)->flush = 0;
2547 	}
2548 
2549 	return dev_gro_receive(napi, skb);
2550 }
2551 
2552 int napi_skb_finish(int ret, struct sk_buff *skb)
2553 {
2554 	int err = NET_RX_SUCCESS;
2555 
2556 	switch (ret) {
2557 	case GRO_NORMAL:
2558 		return netif_receive_skb(skb);
2559 
2560 	case GRO_DROP:
2561 		err = NET_RX_DROP;
2562 		/* fall through */
2563 
2564 	case GRO_MERGED_FREE:
2565 		kfree_skb(skb);
2566 		break;
2567 	}
2568 
2569 	return err;
2570 }
2571 EXPORT_SYMBOL(napi_skb_finish);
2572 
2573 void skb_gro_reset_offset(struct sk_buff *skb)
2574 {
2575 	NAPI_GRO_CB(skb)->data_offset = 0;
2576 	NAPI_GRO_CB(skb)->frag0 = NULL;
2577 	NAPI_GRO_CB(skb)->frag0_len = 0;
2578 
2579 	if (skb->mac_header == skb->tail &&
2580 	    !PageHighMem(skb_shinfo(skb)->frags[0].page)) {
2581 		NAPI_GRO_CB(skb)->frag0 =
2582 			page_address(skb_shinfo(skb)->frags[0].page) +
2583 			skb_shinfo(skb)->frags[0].page_offset;
2584 		NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size;
2585 	}
2586 }
2587 EXPORT_SYMBOL(skb_gro_reset_offset);
2588 
2589 int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2590 {
2591 	skb_gro_reset_offset(skb);
2592 
2593 	return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
2594 }
2595 EXPORT_SYMBOL(napi_gro_receive);
2596 
2597 void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
2598 {
2599 	__skb_pull(skb, skb_headlen(skb));
2600 	skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
2601 
2602 	napi->skb = skb;
2603 }
2604 EXPORT_SYMBOL(napi_reuse_skb);
2605 
2606 struct sk_buff *napi_get_frags(struct napi_struct *napi)
2607 {
2608 	struct net_device *dev = napi->dev;
2609 	struct sk_buff *skb = napi->skb;
2610 
2611 	if (!skb) {
2612 		skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN);
2613 		if (!skb)
2614 			goto out;
2615 
2616 		skb_reserve(skb, NET_IP_ALIGN);
2617 
2618 		napi->skb = skb;
2619 	}
2620 
2621 out:
2622 	return skb;
2623 }
2624 EXPORT_SYMBOL(napi_get_frags);
2625 
2626 int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
2627 {
2628 	int err = NET_RX_SUCCESS;
2629 
2630 	switch (ret) {
2631 	case GRO_NORMAL:
2632 	case GRO_HELD:
2633 		skb->protocol = eth_type_trans(skb, napi->dev);
2634 
2635 		if (ret == GRO_NORMAL)
2636 			return netif_receive_skb(skb);
2637 
2638 		skb_gro_pull(skb, -ETH_HLEN);
2639 		break;
2640 
2641 	case GRO_DROP:
2642 		err = NET_RX_DROP;
2643 		/* fall through */
2644 
2645 	case GRO_MERGED_FREE:
2646 		napi_reuse_skb(napi, skb);
2647 		break;
2648 	}
2649 
2650 	return err;
2651 }
2652 EXPORT_SYMBOL(napi_frags_finish);
2653 
2654 struct sk_buff *napi_frags_skb(struct napi_struct *napi)
2655 {
2656 	struct sk_buff *skb = napi->skb;
2657 	struct ethhdr *eth;
2658 	unsigned int hlen;
2659 	unsigned int off;
2660 
2661 	napi->skb = NULL;
2662 
2663 	skb_reset_mac_header(skb);
2664 	skb_gro_reset_offset(skb);
2665 
2666 	off = skb_gro_offset(skb);
2667 	hlen = off + sizeof(*eth);
2668 	eth = skb_gro_header_fast(skb, off);
2669 	if (skb_gro_header_hard(skb, hlen)) {
2670 		eth = skb_gro_header_slow(skb, hlen, off);
2671 		if (unlikely(!eth)) {
2672 			napi_reuse_skb(napi, skb);
2673 			skb = NULL;
2674 			goto out;
2675 		}
2676 	}
2677 
2678 	skb_gro_pull(skb, sizeof(*eth));
2679 
2680 	/*
2681 	 * This works because the only protocols we care about don't require
2682 	 * special handling.  We'll fix it up properly at the end.
2683 	 */
2684 	skb->protocol = eth->h_proto;
2685 
2686 out:
2687 	return skb;
2688 }
2689 EXPORT_SYMBOL(napi_frags_skb);
2690 
2691 int napi_gro_frags(struct napi_struct *napi)
2692 {
2693 	struct sk_buff *skb = napi_frags_skb(napi);
2694 
2695 	if (!skb)
2696 		return NET_RX_DROP;
2697 
2698 	return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
2699 }
2700 EXPORT_SYMBOL(napi_gro_frags);
2701 
2702 static int process_backlog(struct napi_struct *napi, int quota)
2703 {
2704 	int work = 0;
2705 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
2706 	unsigned long start_time = jiffies;
2707 
2708 	napi->weight = weight_p;
2709 	do {
2710 		struct sk_buff *skb;
2711 
2712 		local_irq_disable();
2713 		skb = __skb_dequeue(&queue->input_pkt_queue);
2714 		if (!skb) {
2715 			__napi_complete(napi);
2716 			local_irq_enable();
2717 			break;
2718 		}
2719 		local_irq_enable();
2720 
2721 		netif_receive_skb(skb);
2722 	} while (++work < quota && jiffies == start_time);
2723 
2724 	return work;
2725 }
2726 
2727 /**
2728  * __napi_schedule - schedule for receive
2729  * @n: entry to schedule
2730  *
2731  * The entry's receive function will be scheduled to run
2732  */
2733 void __napi_schedule(struct napi_struct *n)
2734 {
2735 	unsigned long flags;
2736 
2737 	local_irq_save(flags);
2738 	list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2739 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2740 	local_irq_restore(flags);
2741 }
2742 EXPORT_SYMBOL(__napi_schedule);
2743 
2744 void __napi_complete(struct napi_struct *n)
2745 {
2746 	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
2747 	BUG_ON(n->gro_list);
2748 
2749 	list_del(&n->poll_list);
2750 	smp_mb__before_clear_bit();
2751 	clear_bit(NAPI_STATE_SCHED, &n->state);
2752 }
2753 EXPORT_SYMBOL(__napi_complete);
2754 
2755 void napi_complete(struct napi_struct *n)
2756 {
2757 	unsigned long flags;
2758 
2759 	/*
2760 	 * don't let napi dequeue from the cpu poll list
2761 	 * just in case its running on a different cpu
2762 	 */
2763 	if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
2764 		return;
2765 
2766 	napi_gro_flush(n);
2767 	local_irq_save(flags);
2768 	__napi_complete(n);
2769 	local_irq_restore(flags);
2770 }
2771 EXPORT_SYMBOL(napi_complete);
2772 
2773 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
2774 		    int (*poll)(struct napi_struct *, int), int weight)
2775 {
2776 	INIT_LIST_HEAD(&napi->poll_list);
2777 	napi->gro_count = 0;
2778 	napi->gro_list = NULL;
2779 	napi->skb = NULL;
2780 	napi->poll = poll;
2781 	napi->weight = weight;
2782 	list_add(&napi->dev_list, &dev->napi_list);
2783 	napi->dev = dev;
2784 #ifdef CONFIG_NETPOLL
2785 	spin_lock_init(&napi->poll_lock);
2786 	napi->poll_owner = -1;
2787 #endif
2788 	set_bit(NAPI_STATE_SCHED, &napi->state);
2789 }
2790 EXPORT_SYMBOL(netif_napi_add);
2791 
2792 void netif_napi_del(struct napi_struct *napi)
2793 {
2794 	struct sk_buff *skb, *next;
2795 
2796 	list_del_init(&napi->dev_list);
2797 	napi_free_frags(napi);
2798 
2799 	for (skb = napi->gro_list; skb; skb = next) {
2800 		next = skb->next;
2801 		skb->next = NULL;
2802 		kfree_skb(skb);
2803 	}
2804 
2805 	napi->gro_list = NULL;
2806 	napi->gro_count = 0;
2807 }
2808 EXPORT_SYMBOL(netif_napi_del);
2809 
2810 
2811 static void net_rx_action(struct softirq_action *h)
2812 {
2813 	struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
2814 	unsigned long time_limit = jiffies + 2;
2815 	int budget = netdev_budget;
2816 	void *have;
2817 
2818 	local_irq_disable();
2819 
2820 	while (!list_empty(list)) {
2821 		struct napi_struct *n;
2822 		int work, weight;
2823 
2824 		/* If softirq window is exhuasted then punt.
2825 		 * Allow this to run for 2 jiffies since which will allow
2826 		 * an average latency of 1.5/HZ.
2827 		 */
2828 		if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
2829 			goto softnet_break;
2830 
2831 		local_irq_enable();
2832 
2833 		/* Even though interrupts have been re-enabled, this
2834 		 * access is safe because interrupts can only add new
2835 		 * entries to the tail of this list, and only ->poll()
2836 		 * calls can remove this head entry from the list.
2837 		 */
2838 		n = list_entry(list->next, struct napi_struct, poll_list);
2839 
2840 		have = netpoll_poll_lock(n);
2841 
2842 		weight = n->weight;
2843 
2844 		/* This NAPI_STATE_SCHED test is for avoiding a race
2845 		 * with netpoll's poll_napi().  Only the entity which
2846 		 * obtains the lock and sees NAPI_STATE_SCHED set will
2847 		 * actually make the ->poll() call.  Therefore we avoid
2848 		 * accidently calling ->poll() when NAPI is not scheduled.
2849 		 */
2850 		work = 0;
2851 		if (test_bit(NAPI_STATE_SCHED, &n->state)) {
2852 			work = n->poll(n, weight);
2853 			trace_napi_poll(n);
2854 		}
2855 
2856 		WARN_ON_ONCE(work > weight);
2857 
2858 		budget -= work;
2859 
2860 		local_irq_disable();
2861 
2862 		/* Drivers must not modify the NAPI state if they
2863 		 * consume the entire weight.  In such cases this code
2864 		 * still "owns" the NAPI instance and therefore can
2865 		 * move the instance around on the list at-will.
2866 		 */
2867 		if (unlikely(work == weight)) {
2868 			if (unlikely(napi_disable_pending(n))) {
2869 				local_irq_enable();
2870 				napi_complete(n);
2871 				local_irq_disable();
2872 			} else
2873 				list_move_tail(&n->poll_list, list);
2874 		}
2875 
2876 		netpoll_poll_unlock(have);
2877 	}
2878 out:
2879 	local_irq_enable();
2880 
2881 #ifdef CONFIG_NET_DMA
2882 	/*
2883 	 * There may not be any more sk_buffs coming right now, so push
2884 	 * any pending DMA copies to hardware
2885 	 */
2886 	dma_issue_pending_all();
2887 #endif
2888 
2889 	return;
2890 
2891 softnet_break:
2892 	__get_cpu_var(netdev_rx_stat).time_squeeze++;
2893 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2894 	goto out;
2895 }
2896 
2897 static gifconf_func_t *gifconf_list[NPROTO];
2898 
2899 /**
2900  *	register_gifconf	-	register a SIOCGIF handler
2901  *	@family: Address family
2902  *	@gifconf: Function handler
2903  *
2904  *	Register protocol dependent address dumping routines. The handler
2905  *	that is passed must not be freed or reused until it has been replaced
2906  *	by another handler.
2907  */
2908 int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
2909 {
2910 	if (family >= NPROTO)
2911 		return -EINVAL;
2912 	gifconf_list[family] = gifconf;
2913 	return 0;
2914 }
2915 EXPORT_SYMBOL(register_gifconf);
2916 
2917 
2918 /*
2919  *	Map an interface index to its name (SIOCGIFNAME)
2920  */
2921 
2922 /*
2923  *	We need this ioctl for efficient implementation of the
2924  *	if_indextoname() function required by the IPv6 API.  Without
2925  *	it, we would have to search all the interfaces to find a
2926  *	match.  --pb
2927  */
2928 
2929 static int dev_ifname(struct net *net, struct ifreq __user *arg)
2930 {
2931 	struct net_device *dev;
2932 	struct ifreq ifr;
2933 
2934 	/*
2935 	 *	Fetch the caller's info block.
2936 	 */
2937 
2938 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2939 		return -EFAULT;
2940 
2941 	read_lock(&dev_base_lock);
2942 	dev = __dev_get_by_index(net, ifr.ifr_ifindex);
2943 	if (!dev) {
2944 		read_unlock(&dev_base_lock);
2945 		return -ENODEV;
2946 	}
2947 
2948 	strcpy(ifr.ifr_name, dev->name);
2949 	read_unlock(&dev_base_lock);
2950 
2951 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2952 		return -EFAULT;
2953 	return 0;
2954 }
2955 
2956 /*
2957  *	Perform a SIOCGIFCONF call. This structure will change
2958  *	size eventually, and there is nothing I can do about it.
2959  *	Thus we will need a 'compatibility mode'.
2960  */
2961 
2962 static int dev_ifconf(struct net *net, char __user *arg)
2963 {
2964 	struct ifconf ifc;
2965 	struct net_device *dev;
2966 	char __user *pos;
2967 	int len;
2968 	int total;
2969 	int i;
2970 
2971 	/*
2972 	 *	Fetch the caller's info block.
2973 	 */
2974 
2975 	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2976 		return -EFAULT;
2977 
2978 	pos = ifc.ifc_buf;
2979 	len = ifc.ifc_len;
2980 
2981 	/*
2982 	 *	Loop over the interfaces, and write an info block for each.
2983 	 */
2984 
2985 	total = 0;
2986 	for_each_netdev(net, dev) {
2987 		for (i = 0; i < NPROTO; i++) {
2988 			if (gifconf_list[i]) {
2989 				int done;
2990 				if (!pos)
2991 					done = gifconf_list[i](dev, NULL, 0);
2992 				else
2993 					done = gifconf_list[i](dev, pos + total,
2994 							       len - total);
2995 				if (done < 0)
2996 					return -EFAULT;
2997 				total += done;
2998 			}
2999 		}
3000 	}
3001 
3002 	/*
3003 	 *	All done.  Write the updated control block back to the caller.
3004 	 */
3005 	ifc.ifc_len = total;
3006 
3007 	/*
3008 	 * 	Both BSD and Solaris return 0 here, so we do too.
3009 	 */
3010 	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
3011 }
3012 
3013 #ifdef CONFIG_PROC_FS
3014 /*
3015  *	This is invoked by the /proc filesystem handler to display a device
3016  *	in detail.
3017  */
3018 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
3019 	__acquires(dev_base_lock)
3020 {
3021 	struct net *net = seq_file_net(seq);
3022 	loff_t off;
3023 	struct net_device *dev;
3024 
3025 	read_lock(&dev_base_lock);
3026 	if (!*pos)
3027 		return SEQ_START_TOKEN;
3028 
3029 	off = 1;
3030 	for_each_netdev(net, dev)
3031 		if (off++ == *pos)
3032 			return dev;
3033 
3034 	return NULL;
3035 }
3036 
3037 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3038 {
3039 	struct net *net = seq_file_net(seq);
3040 	++*pos;
3041 	return v == SEQ_START_TOKEN ?
3042 		first_net_device(net) : next_net_device((struct net_device *)v);
3043 }
3044 
3045 void dev_seq_stop(struct seq_file *seq, void *v)
3046 	__releases(dev_base_lock)
3047 {
3048 	read_unlock(&dev_base_lock);
3049 }
3050 
3051 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
3052 {
3053 	const struct net_device_stats *stats = dev_get_stats(dev);
3054 
3055 	seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
3056 		   "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
3057 		   dev->name, stats->rx_bytes, stats->rx_packets,
3058 		   stats->rx_errors,
3059 		   stats->rx_dropped + stats->rx_missed_errors,
3060 		   stats->rx_fifo_errors,
3061 		   stats->rx_length_errors + stats->rx_over_errors +
3062 		    stats->rx_crc_errors + stats->rx_frame_errors,
3063 		   stats->rx_compressed, stats->multicast,
3064 		   stats->tx_bytes, stats->tx_packets,
3065 		   stats->tx_errors, stats->tx_dropped,
3066 		   stats->tx_fifo_errors, stats->collisions,
3067 		   stats->tx_carrier_errors +
3068 		    stats->tx_aborted_errors +
3069 		    stats->tx_window_errors +
3070 		    stats->tx_heartbeat_errors,
3071 		   stats->tx_compressed);
3072 }
3073 
3074 /*
3075  *	Called from the PROCfs module. This now uses the new arbitrary sized
3076  *	/proc/net interface to create /proc/net/dev
3077  */
3078 static int dev_seq_show(struct seq_file *seq, void *v)
3079 {
3080 	if (v == SEQ_START_TOKEN)
3081 		seq_puts(seq, "Inter-|   Receive                            "
3082 			      "                    |  Transmit\n"
3083 			      " face |bytes    packets errs drop fifo frame "
3084 			      "compressed multicast|bytes    packets errs "
3085 			      "drop fifo colls carrier compressed\n");
3086 	else
3087 		dev_seq_printf_stats(seq, v);
3088 	return 0;
3089 }
3090 
3091 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
3092 {
3093 	struct netif_rx_stats *rc = NULL;
3094 
3095 	while (*pos < nr_cpu_ids)
3096 		if (cpu_online(*pos)) {
3097 			rc = &per_cpu(netdev_rx_stat, *pos);
3098 			break;
3099 		} else
3100 			++*pos;
3101 	return rc;
3102 }
3103 
3104 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
3105 {
3106 	return softnet_get_online(pos);
3107 }
3108 
3109 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3110 {
3111 	++*pos;
3112 	return softnet_get_online(pos);
3113 }
3114 
3115 static void softnet_seq_stop(struct seq_file *seq, void *v)
3116 {
3117 }
3118 
3119 static int softnet_seq_show(struct seq_file *seq, void *v)
3120 {
3121 	struct netif_rx_stats *s = v;
3122 
3123 	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
3124 		   s->total, s->dropped, s->time_squeeze, 0,
3125 		   0, 0, 0, 0, /* was fastroute */
3126 		   s->cpu_collision);
3127 	return 0;
3128 }
3129 
3130 static const struct seq_operations dev_seq_ops = {
3131 	.start = dev_seq_start,
3132 	.next  = dev_seq_next,
3133 	.stop  = dev_seq_stop,
3134 	.show  = dev_seq_show,
3135 };
3136 
3137 static int dev_seq_open(struct inode *inode, struct file *file)
3138 {
3139 	return seq_open_net(inode, file, &dev_seq_ops,
3140 			    sizeof(struct seq_net_private));
3141 }
3142 
3143 static const struct file_operations dev_seq_fops = {
3144 	.owner	 = THIS_MODULE,
3145 	.open    = dev_seq_open,
3146 	.read    = seq_read,
3147 	.llseek  = seq_lseek,
3148 	.release = seq_release_net,
3149 };
3150 
3151 static const struct seq_operations softnet_seq_ops = {
3152 	.start = softnet_seq_start,
3153 	.next  = softnet_seq_next,
3154 	.stop  = softnet_seq_stop,
3155 	.show  = softnet_seq_show,
3156 };
3157 
3158 static int softnet_seq_open(struct inode *inode, struct file *file)
3159 {
3160 	return seq_open(file, &softnet_seq_ops);
3161 }
3162 
3163 static const struct file_operations softnet_seq_fops = {
3164 	.owner	 = THIS_MODULE,
3165 	.open    = softnet_seq_open,
3166 	.read    = seq_read,
3167 	.llseek  = seq_lseek,
3168 	.release = seq_release,
3169 };
3170 
3171 static void *ptype_get_idx(loff_t pos)
3172 {
3173 	struct packet_type *pt = NULL;
3174 	loff_t i = 0;
3175 	int t;
3176 
3177 	list_for_each_entry_rcu(pt, &ptype_all, list) {
3178 		if (i == pos)
3179 			return pt;
3180 		++i;
3181 	}
3182 
3183 	for (t = 0; t < PTYPE_HASH_SIZE; t++) {
3184 		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
3185 			if (i == pos)
3186 				return pt;
3187 			++i;
3188 		}
3189 	}
3190 	return NULL;
3191 }
3192 
3193 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
3194 	__acquires(RCU)
3195 {
3196 	rcu_read_lock();
3197 	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
3198 }
3199 
3200 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3201 {
3202 	struct packet_type *pt;
3203 	struct list_head *nxt;
3204 	int hash;
3205 
3206 	++*pos;
3207 	if (v == SEQ_START_TOKEN)
3208 		return ptype_get_idx(0);
3209 
3210 	pt = v;
3211 	nxt = pt->list.next;
3212 	if (pt->type == htons(ETH_P_ALL)) {
3213 		if (nxt != &ptype_all)
3214 			goto found;
3215 		hash = 0;
3216 		nxt = ptype_base[0].next;
3217 	} else
3218 		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
3219 
3220 	while (nxt == &ptype_base[hash]) {
3221 		if (++hash >= PTYPE_HASH_SIZE)
3222 			return NULL;
3223 		nxt = ptype_base[hash].next;
3224 	}
3225 found:
3226 	return list_entry(nxt, struct packet_type, list);
3227 }
3228 
3229 static void ptype_seq_stop(struct seq_file *seq, void *v)
3230 	__releases(RCU)
3231 {
3232 	rcu_read_unlock();
3233 }
3234 
3235 static int ptype_seq_show(struct seq_file *seq, void *v)
3236 {
3237 	struct packet_type *pt = v;
3238 
3239 	if (v == SEQ_START_TOKEN)
3240 		seq_puts(seq, "Type Device      Function\n");
3241 	else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
3242 		if (pt->type == htons(ETH_P_ALL))
3243 			seq_puts(seq, "ALL ");
3244 		else
3245 			seq_printf(seq, "%04x", ntohs(pt->type));
3246 
3247 		seq_printf(seq, " %-8s %pF\n",
3248 			   pt->dev ? pt->dev->name : "", pt->func);
3249 	}
3250 
3251 	return 0;
3252 }
3253 
3254 static const struct seq_operations ptype_seq_ops = {
3255 	.start = ptype_seq_start,
3256 	.next  = ptype_seq_next,
3257 	.stop  = ptype_seq_stop,
3258 	.show  = ptype_seq_show,
3259 };
3260 
3261 static int ptype_seq_open(struct inode *inode, struct file *file)
3262 {
3263 	return seq_open_net(inode, file, &ptype_seq_ops,
3264 			sizeof(struct seq_net_private));
3265 }
3266 
3267 static const struct file_operations ptype_seq_fops = {
3268 	.owner	 = THIS_MODULE,
3269 	.open    = ptype_seq_open,
3270 	.read    = seq_read,
3271 	.llseek  = seq_lseek,
3272 	.release = seq_release_net,
3273 };
3274 
3275 
3276 static int __net_init dev_proc_net_init(struct net *net)
3277 {
3278 	int rc = -ENOMEM;
3279 
3280 	if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
3281 		goto out;
3282 	if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
3283 		goto out_dev;
3284 	if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
3285 		goto out_softnet;
3286 
3287 	if (wext_proc_init(net))
3288 		goto out_ptype;
3289 	rc = 0;
3290 out:
3291 	return rc;
3292 out_ptype:
3293 	proc_net_remove(net, "ptype");
3294 out_softnet:
3295 	proc_net_remove(net, "softnet_stat");
3296 out_dev:
3297 	proc_net_remove(net, "dev");
3298 	goto out;
3299 }
3300 
3301 static void __net_exit dev_proc_net_exit(struct net *net)
3302 {
3303 	wext_proc_exit(net);
3304 
3305 	proc_net_remove(net, "ptype");
3306 	proc_net_remove(net, "softnet_stat");
3307 	proc_net_remove(net, "dev");
3308 }
3309 
3310 static struct pernet_operations __net_initdata dev_proc_ops = {
3311 	.init = dev_proc_net_init,
3312 	.exit = dev_proc_net_exit,
3313 };
3314 
3315 static int __init dev_proc_init(void)
3316 {
3317 	return register_pernet_subsys(&dev_proc_ops);
3318 }
3319 #else
3320 #define dev_proc_init() 0
3321 #endif	/* CONFIG_PROC_FS */
3322 
3323 
3324 /**
3325  *	netdev_set_master	-	set up master/slave pair
3326  *	@slave: slave device
3327  *	@master: new master device
3328  *
3329  *	Changes the master device of the slave. Pass %NULL to break the
3330  *	bonding. The caller must hold the RTNL semaphore. On a failure
3331  *	a negative errno code is returned. On success the reference counts
3332  *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
3333  *	function returns zero.
3334  */
3335 int netdev_set_master(struct net_device *slave, struct net_device *master)
3336 {
3337 	struct net_device *old = slave->master;
3338 
3339 	ASSERT_RTNL();
3340 
3341 	if (master) {
3342 		if (old)
3343 			return -EBUSY;
3344 		dev_hold(master);
3345 	}
3346 
3347 	slave->master = master;
3348 
3349 	synchronize_net();
3350 
3351 	if (old)
3352 		dev_put(old);
3353 
3354 	if (master)
3355 		slave->flags |= IFF_SLAVE;
3356 	else
3357 		slave->flags &= ~IFF_SLAVE;
3358 
3359 	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
3360 	return 0;
3361 }
3362 EXPORT_SYMBOL(netdev_set_master);
3363 
3364 static void dev_change_rx_flags(struct net_device *dev, int flags)
3365 {
3366 	const struct net_device_ops *ops = dev->netdev_ops;
3367 
3368 	if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
3369 		ops->ndo_change_rx_flags(dev, flags);
3370 }
3371 
3372 static int __dev_set_promiscuity(struct net_device *dev, int inc)
3373 {
3374 	unsigned short old_flags = dev->flags;
3375 	uid_t uid;
3376 	gid_t gid;
3377 
3378 	ASSERT_RTNL();
3379 
3380 	dev->flags |= IFF_PROMISC;
3381 	dev->promiscuity += inc;
3382 	if (dev->promiscuity == 0) {
3383 		/*
3384 		 * Avoid overflow.
3385 		 * If inc causes overflow, untouch promisc and return error.
3386 		 */
3387 		if (inc < 0)
3388 			dev->flags &= ~IFF_PROMISC;
3389 		else {
3390 			dev->promiscuity -= inc;
3391 			printk(KERN_WARNING "%s: promiscuity touches roof, "
3392 				"set promiscuity failed, promiscuity feature "
3393 				"of device might be broken.\n", dev->name);
3394 			return -EOVERFLOW;
3395 		}
3396 	}
3397 	if (dev->flags != old_flags) {
3398 		printk(KERN_INFO "device %s %s promiscuous mode\n",
3399 		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
3400 							       "left");
3401 		if (audit_enabled) {
3402 			current_uid_gid(&uid, &gid);
3403 			audit_log(current->audit_context, GFP_ATOMIC,
3404 				AUDIT_ANOM_PROMISCUOUS,
3405 				"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
3406 				dev->name, (dev->flags & IFF_PROMISC),
3407 				(old_flags & IFF_PROMISC),
3408 				audit_get_loginuid(current),
3409 				uid, gid,
3410 				audit_get_sessionid(current));
3411 		}
3412 
3413 		dev_change_rx_flags(dev, IFF_PROMISC);
3414 	}
3415 	return 0;
3416 }
3417 
3418 /**
3419  *	dev_set_promiscuity	- update promiscuity count on a device
3420  *	@dev: device
3421  *	@inc: modifier
3422  *
3423  *	Add or remove promiscuity from a device. While the count in the device
3424  *	remains above zero the interface remains promiscuous. Once it hits zero
3425  *	the device reverts back to normal filtering operation. A negative inc
3426  *	value is used to drop promiscuity on the device.
3427  *	Return 0 if successful or a negative errno code on error.
3428  */
3429 int dev_set_promiscuity(struct net_device *dev, int inc)
3430 {
3431 	unsigned short old_flags = dev->flags;
3432 	int err;
3433 
3434 	err = __dev_set_promiscuity(dev, inc);
3435 	if (err < 0)
3436 		return err;
3437 	if (dev->flags != old_flags)
3438 		dev_set_rx_mode(dev);
3439 	return err;
3440 }
3441 EXPORT_SYMBOL(dev_set_promiscuity);
3442 
3443 /**
3444  *	dev_set_allmulti	- update allmulti count on a device
3445  *	@dev: device
3446  *	@inc: modifier
3447  *
3448  *	Add or remove reception of all multicast frames to a device. While the
3449  *	count in the device remains above zero the interface remains listening
3450  *	to all interfaces. Once it hits zero the device reverts back to normal
3451  *	filtering operation. A negative @inc value is used to drop the counter
3452  *	when releasing a resource needing all multicasts.
3453  *	Return 0 if successful or a negative errno code on error.
3454  */
3455 
3456 int dev_set_allmulti(struct net_device *dev, int inc)
3457 {
3458 	unsigned short old_flags = dev->flags;
3459 
3460 	ASSERT_RTNL();
3461 
3462 	dev->flags |= IFF_ALLMULTI;
3463 	dev->allmulti += inc;
3464 	if (dev->allmulti == 0) {
3465 		/*
3466 		 * Avoid overflow.
3467 		 * If inc causes overflow, untouch allmulti and return error.
3468 		 */
3469 		if (inc < 0)
3470 			dev->flags &= ~IFF_ALLMULTI;
3471 		else {
3472 			dev->allmulti -= inc;
3473 			printk(KERN_WARNING "%s: allmulti touches roof, "
3474 				"set allmulti failed, allmulti feature of "
3475 				"device might be broken.\n", dev->name);
3476 			return -EOVERFLOW;
3477 		}
3478 	}
3479 	if (dev->flags ^ old_flags) {
3480 		dev_change_rx_flags(dev, IFF_ALLMULTI);
3481 		dev_set_rx_mode(dev);
3482 	}
3483 	return 0;
3484 }
3485 EXPORT_SYMBOL(dev_set_allmulti);
3486 
3487 /*
3488  *	Upload unicast and multicast address lists to device and
3489  *	configure RX filtering. When the device doesn't support unicast
3490  *	filtering it is put in promiscuous mode while unicast addresses
3491  *	are present.
3492  */
3493 void __dev_set_rx_mode(struct net_device *dev)
3494 {
3495 	const struct net_device_ops *ops = dev->netdev_ops;
3496 
3497 	/* dev_open will call this function so the list will stay sane. */
3498 	if (!(dev->flags&IFF_UP))
3499 		return;
3500 
3501 	if (!netif_device_present(dev))
3502 		return;
3503 
3504 	if (ops->ndo_set_rx_mode)
3505 		ops->ndo_set_rx_mode(dev);
3506 	else {
3507 		/* Unicast addresses changes may only happen under the rtnl,
3508 		 * therefore calling __dev_set_promiscuity here is safe.
3509 		 */
3510 		if (dev->uc.count > 0 && !dev->uc_promisc) {
3511 			__dev_set_promiscuity(dev, 1);
3512 			dev->uc_promisc = 1;
3513 		} else if (dev->uc.count == 0 && dev->uc_promisc) {
3514 			__dev_set_promiscuity(dev, -1);
3515 			dev->uc_promisc = 0;
3516 		}
3517 
3518 		if (ops->ndo_set_multicast_list)
3519 			ops->ndo_set_multicast_list(dev);
3520 	}
3521 }
3522 
3523 void dev_set_rx_mode(struct net_device *dev)
3524 {
3525 	netif_addr_lock_bh(dev);
3526 	__dev_set_rx_mode(dev);
3527 	netif_addr_unlock_bh(dev);
3528 }
3529 
3530 /* hw addresses list handling functions */
3531 
3532 static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
3533 			 int addr_len, unsigned char addr_type)
3534 {
3535 	struct netdev_hw_addr *ha;
3536 	int alloc_size;
3537 
3538 	if (addr_len > MAX_ADDR_LEN)
3539 		return -EINVAL;
3540 
3541 	list_for_each_entry(ha, &list->list, list) {
3542 		if (!memcmp(ha->addr, addr, addr_len) &&
3543 		    ha->type == addr_type) {
3544 			ha->refcount++;
3545 			return 0;
3546 		}
3547 	}
3548 
3549 
3550 	alloc_size = sizeof(*ha);
3551 	if (alloc_size < L1_CACHE_BYTES)
3552 		alloc_size = L1_CACHE_BYTES;
3553 	ha = kmalloc(alloc_size, GFP_ATOMIC);
3554 	if (!ha)
3555 		return -ENOMEM;
3556 	memcpy(ha->addr, addr, addr_len);
3557 	ha->type = addr_type;
3558 	ha->refcount = 1;
3559 	ha->synced = false;
3560 	list_add_tail_rcu(&ha->list, &list->list);
3561 	list->count++;
3562 	return 0;
3563 }
3564 
3565 static void ha_rcu_free(struct rcu_head *head)
3566 {
3567 	struct netdev_hw_addr *ha;
3568 
3569 	ha = container_of(head, struct netdev_hw_addr, rcu_head);
3570 	kfree(ha);
3571 }
3572 
3573 static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
3574 			 int addr_len, unsigned char addr_type)
3575 {
3576 	struct netdev_hw_addr *ha;
3577 
3578 	list_for_each_entry(ha, &list->list, list) {
3579 		if (!memcmp(ha->addr, addr, addr_len) &&
3580 		    (ha->type == addr_type || !addr_type)) {
3581 			if (--ha->refcount)
3582 				return 0;
3583 			list_del_rcu(&ha->list);
3584 			call_rcu(&ha->rcu_head, ha_rcu_free);
3585 			list->count--;
3586 			return 0;
3587 		}
3588 	}
3589 	return -ENOENT;
3590 }
3591 
3592 static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
3593 				  struct netdev_hw_addr_list *from_list,
3594 				  int addr_len,
3595 				  unsigned char addr_type)
3596 {
3597 	int err;
3598 	struct netdev_hw_addr *ha, *ha2;
3599 	unsigned char type;
3600 
3601 	list_for_each_entry(ha, &from_list->list, list) {
3602 		type = addr_type ? addr_type : ha->type;
3603 		err = __hw_addr_add(to_list, ha->addr, addr_len, type);
3604 		if (err)
3605 			goto unroll;
3606 	}
3607 	return 0;
3608 
3609 unroll:
3610 	list_for_each_entry(ha2, &from_list->list, list) {
3611 		if (ha2 == ha)
3612 			break;
3613 		type = addr_type ? addr_type : ha2->type;
3614 		__hw_addr_del(to_list, ha2->addr, addr_len, type);
3615 	}
3616 	return err;
3617 }
3618 
3619 static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
3620 				   struct netdev_hw_addr_list *from_list,
3621 				   int addr_len,
3622 				   unsigned char addr_type)
3623 {
3624 	struct netdev_hw_addr *ha;
3625 	unsigned char type;
3626 
3627 	list_for_each_entry(ha, &from_list->list, list) {
3628 		type = addr_type ? addr_type : ha->type;
3629 		__hw_addr_del(to_list, ha->addr, addr_len, addr_type);
3630 	}
3631 }
3632 
3633 static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
3634 			  struct netdev_hw_addr_list *from_list,
3635 			  int addr_len)
3636 {
3637 	int err = 0;
3638 	struct netdev_hw_addr *ha, *tmp;
3639 
3640 	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
3641 		if (!ha->synced) {
3642 			err = __hw_addr_add(to_list, ha->addr,
3643 					    addr_len, ha->type);
3644 			if (err)
3645 				break;
3646 			ha->synced = true;
3647 			ha->refcount++;
3648 		} else if (ha->refcount == 1) {
3649 			__hw_addr_del(to_list, ha->addr, addr_len, ha->type);
3650 			__hw_addr_del(from_list, ha->addr, addr_len, ha->type);
3651 		}
3652 	}
3653 	return err;
3654 }
3655 
3656 static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
3657 			     struct netdev_hw_addr_list *from_list,
3658 			     int addr_len)
3659 {
3660 	struct netdev_hw_addr *ha, *tmp;
3661 
3662 	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
3663 		if (ha->synced) {
3664 			__hw_addr_del(to_list, ha->addr,
3665 				      addr_len, ha->type);
3666 			ha->synced = false;
3667 			__hw_addr_del(from_list, ha->addr,
3668 				      addr_len, ha->type);
3669 		}
3670 	}
3671 }
3672 
3673 static void __hw_addr_flush(struct netdev_hw_addr_list *list)
3674 {
3675 	struct netdev_hw_addr *ha, *tmp;
3676 
3677 	list_for_each_entry_safe(ha, tmp, &list->list, list) {
3678 		list_del_rcu(&ha->list);
3679 		call_rcu(&ha->rcu_head, ha_rcu_free);
3680 	}
3681 	list->count = 0;
3682 }
3683 
3684 static void __hw_addr_init(struct netdev_hw_addr_list *list)
3685 {
3686 	INIT_LIST_HEAD(&list->list);
3687 	list->count = 0;
3688 }
3689 
3690 /* Device addresses handling functions */
3691 
3692 static void dev_addr_flush(struct net_device *dev)
3693 {
3694 	/* rtnl_mutex must be held here */
3695 
3696 	__hw_addr_flush(&dev->dev_addrs);
3697 	dev->dev_addr = NULL;
3698 }
3699 
3700 static int dev_addr_init(struct net_device *dev)
3701 {
3702 	unsigned char addr[MAX_ADDR_LEN];
3703 	struct netdev_hw_addr *ha;
3704 	int err;
3705 
3706 	/* rtnl_mutex must be held here */
3707 
3708 	__hw_addr_init(&dev->dev_addrs);
3709 	memset(addr, 0, sizeof(addr));
3710 	err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
3711 			    NETDEV_HW_ADDR_T_LAN);
3712 	if (!err) {
3713 		/*
3714 		 * Get the first (previously created) address from the list
3715 		 * and set dev_addr pointer to this location.
3716 		 */
3717 		ha = list_first_entry(&dev->dev_addrs.list,
3718 				      struct netdev_hw_addr, list);
3719 		dev->dev_addr = ha->addr;
3720 	}
3721 	return err;
3722 }
3723 
3724 /**
3725  *	dev_addr_add	- Add a device address
3726  *	@dev: device
3727  *	@addr: address to add
3728  *	@addr_type: address type
3729  *
3730  *	Add a device address to the device or increase the reference count if
3731  *	it already exists.
3732  *
3733  *	The caller must hold the rtnl_mutex.
3734  */
3735 int dev_addr_add(struct net_device *dev, unsigned char *addr,
3736 		 unsigned char addr_type)
3737 {
3738 	int err;
3739 
3740 	ASSERT_RTNL();
3741 
3742 	err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
3743 	if (!err)
3744 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3745 	return err;
3746 }
3747 EXPORT_SYMBOL(dev_addr_add);
3748 
3749 /**
3750  *	dev_addr_del	- Release a device address.
3751  *	@dev: device
3752  *	@addr: address to delete
3753  *	@addr_type: address type
3754  *
3755  *	Release reference to a device address and remove it from the device
3756  *	if the reference count drops to zero.
3757  *
3758  *	The caller must hold the rtnl_mutex.
3759  */
3760 int dev_addr_del(struct net_device *dev, unsigned char *addr,
3761 		 unsigned char addr_type)
3762 {
3763 	int err;
3764 	struct netdev_hw_addr *ha;
3765 
3766 	ASSERT_RTNL();
3767 
3768 	/*
3769 	 * We can not remove the first address from the list because
3770 	 * dev->dev_addr points to that.
3771 	 */
3772 	ha = list_first_entry(&dev->dev_addrs.list,
3773 			      struct netdev_hw_addr, list);
3774 	if (ha->addr == dev->dev_addr && ha->refcount == 1)
3775 		return -ENOENT;
3776 
3777 	err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
3778 			    addr_type);
3779 	if (!err)
3780 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3781 	return err;
3782 }
3783 EXPORT_SYMBOL(dev_addr_del);
3784 
3785 /**
3786  *	dev_addr_add_multiple	- Add device addresses from another device
3787  *	@to_dev: device to which addresses will be added
3788  *	@from_dev: device from which addresses will be added
3789  *	@addr_type: address type - 0 means type will be used from from_dev
3790  *
3791  *	Add device addresses of the one device to another.
3792  **
3793  *	The caller must hold the rtnl_mutex.
3794  */
3795 int dev_addr_add_multiple(struct net_device *to_dev,
3796 			  struct net_device *from_dev,
3797 			  unsigned char addr_type)
3798 {
3799 	int err;
3800 
3801 	ASSERT_RTNL();
3802 
3803 	if (from_dev->addr_len != to_dev->addr_len)
3804 		return -EINVAL;
3805 	err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
3806 				     to_dev->addr_len, addr_type);
3807 	if (!err)
3808 		call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
3809 	return err;
3810 }
3811 EXPORT_SYMBOL(dev_addr_add_multiple);
3812 
3813 /**
3814  *	dev_addr_del_multiple	- Delete device addresses by another device
3815  *	@to_dev: device where the addresses will be deleted
3816  *	@from_dev: device by which addresses the addresses will be deleted
3817  *	@addr_type: address type - 0 means type will used from from_dev
3818  *
3819  *	Deletes addresses in to device by the list of addresses in from device.
3820  *
3821  *	The caller must hold the rtnl_mutex.
3822  */
3823 int dev_addr_del_multiple(struct net_device *to_dev,
3824 			  struct net_device *from_dev,
3825 			  unsigned char addr_type)
3826 {
3827 	ASSERT_RTNL();
3828 
3829 	if (from_dev->addr_len != to_dev->addr_len)
3830 		return -EINVAL;
3831 	__hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
3832 			       to_dev->addr_len, addr_type);
3833 	call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
3834 	return 0;
3835 }
3836 EXPORT_SYMBOL(dev_addr_del_multiple);
3837 
3838 /* multicast addresses handling functions */
3839 
3840 int __dev_addr_delete(struct dev_addr_list **list, int *count,
3841 		      void *addr, int alen, int glbl)
3842 {
3843 	struct dev_addr_list *da;
3844 
3845 	for (; (da = *list) != NULL; list = &da->next) {
3846 		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3847 		    alen == da->da_addrlen) {
3848 			if (glbl) {
3849 				int old_glbl = da->da_gusers;
3850 				da->da_gusers = 0;
3851 				if (old_glbl == 0)
3852 					break;
3853 			}
3854 			if (--da->da_users)
3855 				return 0;
3856 
3857 			*list = da->next;
3858 			kfree(da);
3859 			(*count)--;
3860 			return 0;
3861 		}
3862 	}
3863 	return -ENOENT;
3864 }
3865 
3866 int __dev_addr_add(struct dev_addr_list **list, int *count,
3867 		   void *addr, int alen, int glbl)
3868 {
3869 	struct dev_addr_list *da;
3870 
3871 	for (da = *list; da != NULL; da = da->next) {
3872 		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3873 		    da->da_addrlen == alen) {
3874 			if (glbl) {
3875 				int old_glbl = da->da_gusers;
3876 				da->da_gusers = 1;
3877 				if (old_glbl)
3878 					return 0;
3879 			}
3880 			da->da_users++;
3881 			return 0;
3882 		}
3883 	}
3884 
3885 	da = kzalloc(sizeof(*da), GFP_ATOMIC);
3886 	if (da == NULL)
3887 		return -ENOMEM;
3888 	memcpy(da->da_addr, addr, alen);
3889 	da->da_addrlen = alen;
3890 	da->da_users = 1;
3891 	da->da_gusers = glbl ? 1 : 0;
3892 	da->next = *list;
3893 	*list = da;
3894 	(*count)++;
3895 	return 0;
3896 }
3897 
3898 /**
3899  *	dev_unicast_delete	- Release secondary unicast address.
3900  *	@dev: device
3901  *	@addr: address to delete
3902  *
3903  *	Release reference to a secondary unicast address and remove it
3904  *	from the device if the reference count drops to zero.
3905  *
3906  * 	The caller must hold the rtnl_mutex.
3907  */
3908 int dev_unicast_delete(struct net_device *dev, void *addr)
3909 {
3910 	int err;
3911 
3912 	ASSERT_RTNL();
3913 
3914 	netif_addr_lock_bh(dev);
3915 	err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
3916 			    NETDEV_HW_ADDR_T_UNICAST);
3917 	if (!err)
3918 		__dev_set_rx_mode(dev);
3919 	netif_addr_unlock_bh(dev);
3920 	return err;
3921 }
3922 EXPORT_SYMBOL(dev_unicast_delete);
3923 
3924 /**
3925  *	dev_unicast_add		- add a secondary unicast address
3926  *	@dev: device
3927  *	@addr: address to add
3928  *
3929  *	Add a secondary unicast address to the device or increase
3930  *	the reference count if it already exists.
3931  *
3932  *	The caller must hold the rtnl_mutex.
3933  */
3934 int dev_unicast_add(struct net_device *dev, void *addr)
3935 {
3936 	int err;
3937 
3938 	ASSERT_RTNL();
3939 
3940 	netif_addr_lock_bh(dev);
3941 	err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
3942 			    NETDEV_HW_ADDR_T_UNICAST);
3943 	if (!err)
3944 		__dev_set_rx_mode(dev);
3945 	netif_addr_unlock_bh(dev);
3946 	return err;
3947 }
3948 EXPORT_SYMBOL(dev_unicast_add);
3949 
3950 int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
3951 		    struct dev_addr_list **from, int *from_count)
3952 {
3953 	struct dev_addr_list *da, *next;
3954 	int err = 0;
3955 
3956 	da = *from;
3957 	while (da != NULL) {
3958 		next = da->next;
3959 		if (!da->da_synced) {
3960 			err = __dev_addr_add(to, to_count,
3961 					     da->da_addr, da->da_addrlen, 0);
3962 			if (err < 0)
3963 				break;
3964 			da->da_synced = 1;
3965 			da->da_users++;
3966 		} else if (da->da_users == 1) {
3967 			__dev_addr_delete(to, to_count,
3968 					  da->da_addr, da->da_addrlen, 0);
3969 			__dev_addr_delete(from, from_count,
3970 					  da->da_addr, da->da_addrlen, 0);
3971 		}
3972 		da = next;
3973 	}
3974 	return err;
3975 }
3976 EXPORT_SYMBOL_GPL(__dev_addr_sync);
3977 
3978 void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
3979 		       struct dev_addr_list **from, int *from_count)
3980 {
3981 	struct dev_addr_list *da, *next;
3982 
3983 	da = *from;
3984 	while (da != NULL) {
3985 		next = da->next;
3986 		if (da->da_synced) {
3987 			__dev_addr_delete(to, to_count,
3988 					  da->da_addr, da->da_addrlen, 0);
3989 			da->da_synced = 0;
3990 			__dev_addr_delete(from, from_count,
3991 					  da->da_addr, da->da_addrlen, 0);
3992 		}
3993 		da = next;
3994 	}
3995 }
3996 EXPORT_SYMBOL_GPL(__dev_addr_unsync);
3997 
3998 /**
3999  *	dev_unicast_sync - Synchronize device's unicast list to another device
4000  *	@to: destination device
4001  *	@from: source device
4002  *
4003  *	Add newly added addresses to the destination device and release
4004  *	addresses that have no users left. The source device must be
4005  *	locked by netif_tx_lock_bh.
4006  *
4007  *	This function is intended to be called from the dev->set_rx_mode
4008  *	function of layered software devices.
4009  */
4010 int dev_unicast_sync(struct net_device *to, struct net_device *from)
4011 {
4012 	int err = 0;
4013 
4014 	if (to->addr_len != from->addr_len)
4015 		return -EINVAL;
4016 
4017 	netif_addr_lock_bh(to);
4018 	err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
4019 	if (!err)
4020 		__dev_set_rx_mode(to);
4021 	netif_addr_unlock_bh(to);
4022 	return err;
4023 }
4024 EXPORT_SYMBOL(dev_unicast_sync);
4025 
4026 /**
4027  *	dev_unicast_unsync - Remove synchronized addresses from the destination device
4028  *	@to: destination device
4029  *	@from: source device
4030  *
4031  *	Remove all addresses that were added to the destination device by
4032  *	dev_unicast_sync(). This function is intended to be called from the
4033  *	dev->stop function of layered software devices.
4034  */
4035 void dev_unicast_unsync(struct net_device *to, struct net_device *from)
4036 {
4037 	if (to->addr_len != from->addr_len)
4038 		return;
4039 
4040 	netif_addr_lock_bh(from);
4041 	netif_addr_lock(to);
4042 	__hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
4043 	__dev_set_rx_mode(to);
4044 	netif_addr_unlock(to);
4045 	netif_addr_unlock_bh(from);
4046 }
4047 EXPORT_SYMBOL(dev_unicast_unsync);
4048 
4049 static void dev_unicast_flush(struct net_device *dev)
4050 {
4051 	netif_addr_lock_bh(dev);
4052 	__hw_addr_flush(&dev->uc);
4053 	netif_addr_unlock_bh(dev);
4054 }
4055 
4056 static void dev_unicast_init(struct net_device *dev)
4057 {
4058 	__hw_addr_init(&dev->uc);
4059 }
4060 
4061 
4062 static void __dev_addr_discard(struct dev_addr_list **list)
4063 {
4064 	struct dev_addr_list *tmp;
4065 
4066 	while (*list != NULL) {
4067 		tmp = *list;
4068 		*list = tmp->next;
4069 		if (tmp->da_users > tmp->da_gusers)
4070 			printk("__dev_addr_discard: address leakage! "
4071 			       "da_users=%d\n", tmp->da_users);
4072 		kfree(tmp);
4073 	}
4074 }
4075 
4076 static void dev_addr_discard(struct net_device *dev)
4077 {
4078 	netif_addr_lock_bh(dev);
4079 
4080 	__dev_addr_discard(&dev->mc_list);
4081 	dev->mc_count = 0;
4082 
4083 	netif_addr_unlock_bh(dev);
4084 }
4085 
4086 /**
4087  *	dev_get_flags - get flags reported to userspace
4088  *	@dev: device
4089  *
4090  *	Get the combination of flag bits exported through APIs to userspace.
4091  */
4092 unsigned dev_get_flags(const struct net_device *dev)
4093 {
4094 	unsigned flags;
4095 
4096 	flags = (dev->flags & ~(IFF_PROMISC |
4097 				IFF_ALLMULTI |
4098 				IFF_RUNNING |
4099 				IFF_LOWER_UP |
4100 				IFF_DORMANT)) |
4101 		(dev->gflags & (IFF_PROMISC |
4102 				IFF_ALLMULTI));
4103 
4104 	if (netif_running(dev)) {
4105 		if (netif_oper_up(dev))
4106 			flags |= IFF_RUNNING;
4107 		if (netif_carrier_ok(dev))
4108 			flags |= IFF_LOWER_UP;
4109 		if (netif_dormant(dev))
4110 			flags |= IFF_DORMANT;
4111 	}
4112 
4113 	return flags;
4114 }
4115 EXPORT_SYMBOL(dev_get_flags);
4116 
4117 /**
4118  *	dev_change_flags - change device settings
4119  *	@dev: device
4120  *	@flags: device state flags
4121  *
4122  *	Change settings on device based state flags. The flags are
4123  *	in the userspace exported format.
4124  */
4125 int dev_change_flags(struct net_device *dev, unsigned flags)
4126 {
4127 	int ret, changes;
4128 	int old_flags = dev->flags;
4129 
4130 	ASSERT_RTNL();
4131 
4132 	/*
4133 	 *	Set the flags on our device.
4134 	 */
4135 
4136 	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
4137 			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
4138 			       IFF_AUTOMEDIA)) |
4139 		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
4140 				    IFF_ALLMULTI));
4141 
4142 	/*
4143 	 *	Load in the correct multicast list now the flags have changed.
4144 	 */
4145 
4146 	if ((old_flags ^ flags) & IFF_MULTICAST)
4147 		dev_change_rx_flags(dev, IFF_MULTICAST);
4148 
4149 	dev_set_rx_mode(dev);
4150 
4151 	/*
4152 	 *	Have we downed the interface. We handle IFF_UP ourselves
4153 	 *	according to user attempts to set it, rather than blindly
4154 	 *	setting it.
4155 	 */
4156 
4157 	ret = 0;
4158 	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
4159 		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
4160 
4161 		if (!ret)
4162 			dev_set_rx_mode(dev);
4163 	}
4164 
4165 	if (dev->flags & IFF_UP &&
4166 	    ((old_flags ^ dev->flags) & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
4167 					  IFF_VOLATILE)))
4168 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
4169 
4170 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
4171 		int inc = (flags & IFF_PROMISC) ? 1 : -1;
4172 
4173 		dev->gflags ^= IFF_PROMISC;
4174 		dev_set_promiscuity(dev, inc);
4175 	}
4176 
4177 	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
4178 	   is important. Some (broken) drivers set IFF_PROMISC, when
4179 	   IFF_ALLMULTI is requested not asking us and not reporting.
4180 	 */
4181 	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
4182 		int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
4183 
4184 		dev->gflags ^= IFF_ALLMULTI;
4185 		dev_set_allmulti(dev, inc);
4186 	}
4187 
4188 	/* Exclude state transition flags, already notified */
4189 	changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
4190 	if (changes)
4191 		rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
4192 
4193 	return ret;
4194 }
4195 EXPORT_SYMBOL(dev_change_flags);
4196 
4197 /**
4198  *	dev_set_mtu - Change maximum transfer unit
4199  *	@dev: device
4200  *	@new_mtu: new transfer unit
4201  *
4202  *	Change the maximum transfer size of the network device.
4203  */
4204 int dev_set_mtu(struct net_device *dev, int new_mtu)
4205 {
4206 	const struct net_device_ops *ops = dev->netdev_ops;
4207 	int err;
4208 
4209 	if (new_mtu == dev->mtu)
4210 		return 0;
4211 
4212 	/*	MTU must be positive.	 */
4213 	if (new_mtu < 0)
4214 		return -EINVAL;
4215 
4216 	if (!netif_device_present(dev))
4217 		return -ENODEV;
4218 
4219 	err = 0;
4220 	if (ops->ndo_change_mtu)
4221 		err = ops->ndo_change_mtu(dev, new_mtu);
4222 	else
4223 		dev->mtu = new_mtu;
4224 
4225 	if (!err && dev->flags & IFF_UP)
4226 		call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
4227 	return err;
4228 }
4229 EXPORT_SYMBOL(dev_set_mtu);
4230 
4231 /**
4232  *	dev_set_mac_address - Change Media Access Control Address
4233  *	@dev: device
4234  *	@sa: new address
4235  *
4236  *	Change the hardware (MAC) address of the device
4237  */
4238 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
4239 {
4240 	const struct net_device_ops *ops = dev->netdev_ops;
4241 	int err;
4242 
4243 	if (!ops->ndo_set_mac_address)
4244 		return -EOPNOTSUPP;
4245 	if (sa->sa_family != dev->type)
4246 		return -EINVAL;
4247 	if (!netif_device_present(dev))
4248 		return -ENODEV;
4249 	err = ops->ndo_set_mac_address(dev, sa);
4250 	if (!err)
4251 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4252 	return err;
4253 }
4254 EXPORT_SYMBOL(dev_set_mac_address);
4255 
4256 /*
4257  *	Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
4258  */
4259 static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
4260 {
4261 	int err;
4262 	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
4263 
4264 	if (!dev)
4265 		return -ENODEV;
4266 
4267 	switch (cmd) {
4268 	case SIOCGIFFLAGS:	/* Get interface flags */
4269 		ifr->ifr_flags = (short) dev_get_flags(dev);
4270 		return 0;
4271 
4272 	case SIOCGIFMETRIC:	/* Get the metric on the interface
4273 				   (currently unused) */
4274 		ifr->ifr_metric = 0;
4275 		return 0;
4276 
4277 	case SIOCGIFMTU:	/* Get the MTU of a device */
4278 		ifr->ifr_mtu = dev->mtu;
4279 		return 0;
4280 
4281 	case SIOCGIFHWADDR:
4282 		if (!dev->addr_len)
4283 			memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
4284 		else
4285 			memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
4286 			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4287 		ifr->ifr_hwaddr.sa_family = dev->type;
4288 		return 0;
4289 
4290 	case SIOCGIFSLAVE:
4291 		err = -EINVAL;
4292 		break;
4293 
4294 	case SIOCGIFMAP:
4295 		ifr->ifr_map.mem_start = dev->mem_start;
4296 		ifr->ifr_map.mem_end   = dev->mem_end;
4297 		ifr->ifr_map.base_addr = dev->base_addr;
4298 		ifr->ifr_map.irq       = dev->irq;
4299 		ifr->ifr_map.dma       = dev->dma;
4300 		ifr->ifr_map.port      = dev->if_port;
4301 		return 0;
4302 
4303 	case SIOCGIFINDEX:
4304 		ifr->ifr_ifindex = dev->ifindex;
4305 		return 0;
4306 
4307 	case SIOCGIFTXQLEN:
4308 		ifr->ifr_qlen = dev->tx_queue_len;
4309 		return 0;
4310 
4311 	default:
4312 		/* dev_ioctl() should ensure this case
4313 		 * is never reached
4314 		 */
4315 		WARN_ON(1);
4316 		err = -EINVAL;
4317 		break;
4318 
4319 	}
4320 	return err;
4321 }
4322 
4323 /*
4324  *	Perform the SIOCxIFxxx calls, inside rtnl_lock()
4325  */
4326 static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4327 {
4328 	int err;
4329 	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
4330 	const struct net_device_ops *ops;
4331 
4332 	if (!dev)
4333 		return -ENODEV;
4334 
4335 	ops = dev->netdev_ops;
4336 
4337 	switch (cmd) {
4338 	case SIOCSIFFLAGS:	/* Set interface flags */
4339 		return dev_change_flags(dev, ifr->ifr_flags);
4340 
4341 	case SIOCSIFMETRIC:	/* Set the metric on the interface
4342 				   (currently unused) */
4343 		return -EOPNOTSUPP;
4344 
4345 	case SIOCSIFMTU:	/* Set the MTU of a device */
4346 		return dev_set_mtu(dev, ifr->ifr_mtu);
4347 
4348 	case SIOCSIFHWADDR:
4349 		return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
4350 
4351 	case SIOCSIFHWBROADCAST:
4352 		if (ifr->ifr_hwaddr.sa_family != dev->type)
4353 			return -EINVAL;
4354 		memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
4355 		       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4356 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4357 		return 0;
4358 
4359 	case SIOCSIFMAP:
4360 		if (ops->ndo_set_config) {
4361 			if (!netif_device_present(dev))
4362 				return -ENODEV;
4363 			return ops->ndo_set_config(dev, &ifr->ifr_map);
4364 		}
4365 		return -EOPNOTSUPP;
4366 
4367 	case SIOCADDMULTI:
4368 		if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
4369 		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4370 			return -EINVAL;
4371 		if (!netif_device_present(dev))
4372 			return -ENODEV;
4373 		return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
4374 				  dev->addr_len, 1);
4375 
4376 	case SIOCDELMULTI:
4377 		if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
4378 		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4379 			return -EINVAL;
4380 		if (!netif_device_present(dev))
4381 			return -ENODEV;
4382 		return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
4383 				     dev->addr_len, 1);
4384 
4385 	case SIOCSIFTXQLEN:
4386 		if (ifr->ifr_qlen < 0)
4387 			return -EINVAL;
4388 		dev->tx_queue_len = ifr->ifr_qlen;
4389 		return 0;
4390 
4391 	case SIOCSIFNAME:
4392 		ifr->ifr_newname[IFNAMSIZ-1] = '\0';
4393 		return dev_change_name(dev, ifr->ifr_newname);
4394 
4395 	/*
4396 	 *	Unknown or private ioctl
4397 	 */
4398 	default:
4399 		if ((cmd >= SIOCDEVPRIVATE &&
4400 		    cmd <= SIOCDEVPRIVATE + 15) ||
4401 		    cmd == SIOCBONDENSLAVE ||
4402 		    cmd == SIOCBONDRELEASE ||
4403 		    cmd == SIOCBONDSETHWADDR ||
4404 		    cmd == SIOCBONDSLAVEINFOQUERY ||
4405 		    cmd == SIOCBONDINFOQUERY ||
4406 		    cmd == SIOCBONDCHANGEACTIVE ||
4407 		    cmd == SIOCGMIIPHY ||
4408 		    cmd == SIOCGMIIREG ||
4409 		    cmd == SIOCSMIIREG ||
4410 		    cmd == SIOCBRADDIF ||
4411 		    cmd == SIOCBRDELIF ||
4412 		    cmd == SIOCSHWTSTAMP ||
4413 		    cmd == SIOCWANDEV) {
4414 			err = -EOPNOTSUPP;
4415 			if (ops->ndo_do_ioctl) {
4416 				if (netif_device_present(dev))
4417 					err = ops->ndo_do_ioctl(dev, ifr, cmd);
4418 				else
4419 					err = -ENODEV;
4420 			}
4421 		} else
4422 			err = -EINVAL;
4423 
4424 	}
4425 	return err;
4426 }
4427 
4428 /*
4429  *	This function handles all "interface"-type I/O control requests. The actual
4430  *	'doing' part of this is dev_ifsioc above.
4431  */
4432 
4433 /**
4434  *	dev_ioctl	-	network device ioctl
4435  *	@net: the applicable net namespace
4436  *	@cmd: command to issue
4437  *	@arg: pointer to a struct ifreq in user space
4438  *
4439  *	Issue ioctl functions to devices. This is normally called by the
4440  *	user space syscall interfaces but can sometimes be useful for
4441  *	other purposes. The return value is the return from the syscall if
4442  *	positive or a negative errno code on error.
4443  */
4444 
4445 int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
4446 {
4447 	struct ifreq ifr;
4448 	int ret;
4449 	char *colon;
4450 
4451 	/* One special case: SIOCGIFCONF takes ifconf argument
4452 	   and requires shared lock, because it sleeps writing
4453 	   to user space.
4454 	 */
4455 
4456 	if (cmd == SIOCGIFCONF) {
4457 		rtnl_lock();
4458 		ret = dev_ifconf(net, (char __user *) arg);
4459 		rtnl_unlock();
4460 		return ret;
4461 	}
4462 	if (cmd == SIOCGIFNAME)
4463 		return dev_ifname(net, (struct ifreq __user *)arg);
4464 
4465 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
4466 		return -EFAULT;
4467 
4468 	ifr.ifr_name[IFNAMSIZ-1] = 0;
4469 
4470 	colon = strchr(ifr.ifr_name, ':');
4471 	if (colon)
4472 		*colon = 0;
4473 
4474 	/*
4475 	 *	See which interface the caller is talking about.
4476 	 */
4477 
4478 	switch (cmd) {
4479 	/*
4480 	 *	These ioctl calls:
4481 	 *	- can be done by all.
4482 	 *	- atomic and do not require locking.
4483 	 *	- return a value
4484 	 */
4485 	case SIOCGIFFLAGS:
4486 	case SIOCGIFMETRIC:
4487 	case SIOCGIFMTU:
4488 	case SIOCGIFHWADDR:
4489 	case SIOCGIFSLAVE:
4490 	case SIOCGIFMAP:
4491 	case SIOCGIFINDEX:
4492 	case SIOCGIFTXQLEN:
4493 		dev_load(net, ifr.ifr_name);
4494 		read_lock(&dev_base_lock);
4495 		ret = dev_ifsioc_locked(net, &ifr, cmd);
4496 		read_unlock(&dev_base_lock);
4497 		if (!ret) {
4498 			if (colon)
4499 				*colon = ':';
4500 			if (copy_to_user(arg, &ifr,
4501 					 sizeof(struct ifreq)))
4502 				ret = -EFAULT;
4503 		}
4504 		return ret;
4505 
4506 	case SIOCETHTOOL:
4507 		dev_load(net, ifr.ifr_name);
4508 		rtnl_lock();
4509 		ret = dev_ethtool(net, &ifr);
4510 		rtnl_unlock();
4511 		if (!ret) {
4512 			if (colon)
4513 				*colon = ':';
4514 			if (copy_to_user(arg, &ifr,
4515 					 sizeof(struct ifreq)))
4516 				ret = -EFAULT;
4517 		}
4518 		return ret;
4519 
4520 	/*
4521 	 *	These ioctl calls:
4522 	 *	- require superuser power.
4523 	 *	- require strict serialization.
4524 	 *	- return a value
4525 	 */
4526 	case SIOCGMIIPHY:
4527 	case SIOCGMIIREG:
4528 	case SIOCSIFNAME:
4529 		if (!capable(CAP_NET_ADMIN))
4530 			return -EPERM;
4531 		dev_load(net, ifr.ifr_name);
4532 		rtnl_lock();
4533 		ret = dev_ifsioc(net, &ifr, cmd);
4534 		rtnl_unlock();
4535 		if (!ret) {
4536 			if (colon)
4537 				*colon = ':';
4538 			if (copy_to_user(arg, &ifr,
4539 					 sizeof(struct ifreq)))
4540 				ret = -EFAULT;
4541 		}
4542 		return ret;
4543 
4544 	/*
4545 	 *	These ioctl calls:
4546 	 *	- require superuser power.
4547 	 *	- require strict serialization.
4548 	 *	- do not return a value
4549 	 */
4550 	case SIOCSIFFLAGS:
4551 	case SIOCSIFMETRIC:
4552 	case SIOCSIFMTU:
4553 	case SIOCSIFMAP:
4554 	case SIOCSIFHWADDR:
4555 	case SIOCSIFSLAVE:
4556 	case SIOCADDMULTI:
4557 	case SIOCDELMULTI:
4558 	case SIOCSIFHWBROADCAST:
4559 	case SIOCSIFTXQLEN:
4560 	case SIOCSMIIREG:
4561 	case SIOCBONDENSLAVE:
4562 	case SIOCBONDRELEASE:
4563 	case SIOCBONDSETHWADDR:
4564 	case SIOCBONDCHANGEACTIVE:
4565 	case SIOCBRADDIF:
4566 	case SIOCBRDELIF:
4567 	case SIOCSHWTSTAMP:
4568 		if (!capable(CAP_NET_ADMIN))
4569 			return -EPERM;
4570 		/* fall through */
4571 	case SIOCBONDSLAVEINFOQUERY:
4572 	case SIOCBONDINFOQUERY:
4573 		dev_load(net, ifr.ifr_name);
4574 		rtnl_lock();
4575 		ret = dev_ifsioc(net, &ifr, cmd);
4576 		rtnl_unlock();
4577 		return ret;
4578 
4579 	case SIOCGIFMEM:
4580 		/* Get the per device memory space. We can add this but
4581 		 * currently do not support it */
4582 	case SIOCSIFMEM:
4583 		/* Set the per device memory buffer space.
4584 		 * Not applicable in our case */
4585 	case SIOCSIFLINK:
4586 		return -EINVAL;
4587 
4588 	/*
4589 	 *	Unknown or private ioctl.
4590 	 */
4591 	default:
4592 		if (cmd == SIOCWANDEV ||
4593 		    (cmd >= SIOCDEVPRIVATE &&
4594 		     cmd <= SIOCDEVPRIVATE + 15)) {
4595 			dev_load(net, ifr.ifr_name);
4596 			rtnl_lock();
4597 			ret = dev_ifsioc(net, &ifr, cmd);
4598 			rtnl_unlock();
4599 			if (!ret && copy_to_user(arg, &ifr,
4600 						 sizeof(struct ifreq)))
4601 				ret = -EFAULT;
4602 			return ret;
4603 		}
4604 		/* Take care of Wireless Extensions */
4605 		if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
4606 			return wext_handle_ioctl(net, &ifr, cmd, arg);
4607 		return -EINVAL;
4608 	}
4609 }
4610 
4611 
4612 /**
4613  *	dev_new_index	-	allocate an ifindex
4614  *	@net: the applicable net namespace
4615  *
4616  *	Returns a suitable unique value for a new device interface
4617  *	number.  The caller must hold the rtnl semaphore or the
4618  *	dev_base_lock to be sure it remains unique.
4619  */
4620 static int dev_new_index(struct net *net)
4621 {
4622 	static int ifindex;
4623 	for (;;) {
4624 		if (++ifindex <= 0)
4625 			ifindex = 1;
4626 		if (!__dev_get_by_index(net, ifindex))
4627 			return ifindex;
4628 	}
4629 }
4630 
4631 /* Delayed registration/unregisteration */
4632 static LIST_HEAD(net_todo_list);
4633 
4634 static void net_set_todo(struct net_device *dev)
4635 {
4636 	list_add_tail(&dev->todo_list, &net_todo_list);
4637 }
4638 
4639 static void rollback_registered(struct net_device *dev)
4640 {
4641 	BUG_ON(dev_boot_phase);
4642 	ASSERT_RTNL();
4643 
4644 	/* Some devices call without registering for initialization unwind. */
4645 	if (dev->reg_state == NETREG_UNINITIALIZED) {
4646 		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
4647 				  "was registered\n", dev->name, dev);
4648 
4649 		WARN_ON(1);
4650 		return;
4651 	}
4652 
4653 	BUG_ON(dev->reg_state != NETREG_REGISTERED);
4654 
4655 	/* If device is running, close it first. */
4656 	dev_close(dev);
4657 
4658 	/* And unlink it from device chain. */
4659 	unlist_netdevice(dev);
4660 
4661 	dev->reg_state = NETREG_UNREGISTERING;
4662 
4663 	synchronize_net();
4664 
4665 	/* Shutdown queueing discipline. */
4666 	dev_shutdown(dev);
4667 
4668 
4669 	/* Notify protocols, that we are about to destroy
4670 	   this device. They should clean all the things.
4671 	*/
4672 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4673 
4674 	/*
4675 	 *	Flush the unicast and multicast chains
4676 	 */
4677 	dev_unicast_flush(dev);
4678 	dev_addr_discard(dev);
4679 
4680 	if (dev->netdev_ops->ndo_uninit)
4681 		dev->netdev_ops->ndo_uninit(dev);
4682 
4683 	/* Notifier chain MUST detach us from master device. */
4684 	WARN_ON(dev->master);
4685 
4686 	/* Remove entries from kobject tree */
4687 	netdev_unregister_kobject(dev);
4688 
4689 	synchronize_net();
4690 
4691 	dev_put(dev);
4692 }
4693 
4694 static void __netdev_init_queue_locks_one(struct net_device *dev,
4695 					  struct netdev_queue *dev_queue,
4696 					  void *_unused)
4697 {
4698 	spin_lock_init(&dev_queue->_xmit_lock);
4699 	netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
4700 	dev_queue->xmit_lock_owner = -1;
4701 }
4702 
4703 static void netdev_init_queue_locks(struct net_device *dev)
4704 {
4705 	netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
4706 	__netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
4707 }
4708 
4709 unsigned long netdev_fix_features(unsigned long features, const char *name)
4710 {
4711 	/* Fix illegal SG+CSUM combinations. */
4712 	if ((features & NETIF_F_SG) &&
4713 	    !(features & NETIF_F_ALL_CSUM)) {
4714 		if (name)
4715 			printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
4716 			       "checksum feature.\n", name);
4717 		features &= ~NETIF_F_SG;
4718 	}
4719 
4720 	/* TSO requires that SG is present as well. */
4721 	if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
4722 		if (name)
4723 			printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
4724 			       "SG feature.\n", name);
4725 		features &= ~NETIF_F_TSO;
4726 	}
4727 
4728 	if (features & NETIF_F_UFO) {
4729 		if (!(features & NETIF_F_GEN_CSUM)) {
4730 			if (name)
4731 				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4732 				       "since no NETIF_F_HW_CSUM feature.\n",
4733 				       name);
4734 			features &= ~NETIF_F_UFO;
4735 		}
4736 
4737 		if (!(features & NETIF_F_SG)) {
4738 			if (name)
4739 				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4740 				       "since no NETIF_F_SG feature.\n", name);
4741 			features &= ~NETIF_F_UFO;
4742 		}
4743 	}
4744 
4745 	return features;
4746 }
4747 EXPORT_SYMBOL(netdev_fix_features);
4748 
4749 /**
4750  *	register_netdevice	- register a network device
4751  *	@dev: device to register
4752  *
4753  *	Take a completed network device structure and add it to the kernel
4754  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4755  *	chain. 0 is returned on success. A negative errno code is returned
4756  *	on a failure to set up the device, or if the name is a duplicate.
4757  *
4758  *	Callers must hold the rtnl semaphore. You may want
4759  *	register_netdev() instead of this.
4760  *
4761  *	BUGS:
4762  *	The locking appears insufficient to guarantee two parallel registers
4763  *	will not get the same name.
4764  */
4765 
4766 int register_netdevice(struct net_device *dev)
4767 {
4768 	struct hlist_head *head;
4769 	struct hlist_node *p;
4770 	int ret;
4771 	struct net *net = dev_net(dev);
4772 
4773 	BUG_ON(dev_boot_phase);
4774 	ASSERT_RTNL();
4775 
4776 	might_sleep();
4777 
4778 	/* When net_device's are persistent, this will be fatal. */
4779 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
4780 	BUG_ON(!net);
4781 
4782 	spin_lock_init(&dev->addr_list_lock);
4783 	netdev_set_addr_lockdep_class(dev);
4784 	netdev_init_queue_locks(dev);
4785 
4786 	dev->iflink = -1;
4787 
4788 	/* Init, if this function is available */
4789 	if (dev->netdev_ops->ndo_init) {
4790 		ret = dev->netdev_ops->ndo_init(dev);
4791 		if (ret) {
4792 			if (ret > 0)
4793 				ret = -EIO;
4794 			goto out;
4795 		}
4796 	}
4797 
4798 	if (!dev_valid_name(dev->name)) {
4799 		ret = -EINVAL;
4800 		goto err_uninit;
4801 	}
4802 
4803 	dev->ifindex = dev_new_index(net);
4804 	if (dev->iflink == -1)
4805 		dev->iflink = dev->ifindex;
4806 
4807 	/* Check for existence of name */
4808 	head = dev_name_hash(net, dev->name);
4809 	hlist_for_each(p, head) {
4810 		struct net_device *d
4811 			= hlist_entry(p, struct net_device, name_hlist);
4812 		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
4813 			ret = -EEXIST;
4814 			goto err_uninit;
4815 		}
4816 	}
4817 
4818 	/* Fix illegal checksum combinations */
4819 	if ((dev->features & NETIF_F_HW_CSUM) &&
4820 	    (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4821 		printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
4822 		       dev->name);
4823 		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
4824 	}
4825 
4826 	if ((dev->features & NETIF_F_NO_CSUM) &&
4827 	    (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4828 		printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
4829 		       dev->name);
4830 		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
4831 	}
4832 
4833 	dev->features = netdev_fix_features(dev->features, dev->name);
4834 
4835 	/* Enable software GSO if SG is supported. */
4836 	if (dev->features & NETIF_F_SG)
4837 		dev->features |= NETIF_F_GSO;
4838 
4839 	netdev_initialize_kobject(dev);
4840 	ret = netdev_register_kobject(dev);
4841 	if (ret)
4842 		goto err_uninit;
4843 	dev->reg_state = NETREG_REGISTERED;
4844 
4845 	/*
4846 	 *	Default initial state at registry is that the
4847 	 *	device is present.
4848 	 */
4849 
4850 	set_bit(__LINK_STATE_PRESENT, &dev->state);
4851 
4852 	dev_init_scheduler(dev);
4853 	dev_hold(dev);
4854 	list_netdevice(dev);
4855 
4856 	/* Notify protocols, that a new device appeared. */
4857 	ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
4858 	ret = notifier_to_errno(ret);
4859 	if (ret) {
4860 		rollback_registered(dev);
4861 		dev->reg_state = NETREG_UNREGISTERED;
4862 	}
4863 
4864 out:
4865 	return ret;
4866 
4867 err_uninit:
4868 	if (dev->netdev_ops->ndo_uninit)
4869 		dev->netdev_ops->ndo_uninit(dev);
4870 	goto out;
4871 }
4872 EXPORT_SYMBOL(register_netdevice);
4873 
4874 /**
4875  *	init_dummy_netdev	- init a dummy network device for NAPI
4876  *	@dev: device to init
4877  *
4878  *	This takes a network device structure and initialize the minimum
4879  *	amount of fields so it can be used to schedule NAPI polls without
4880  *	registering a full blown interface. This is to be used by drivers
4881  *	that need to tie several hardware interfaces to a single NAPI
4882  *	poll scheduler due to HW limitations.
4883  */
4884 int init_dummy_netdev(struct net_device *dev)
4885 {
4886 	/* Clear everything. Note we don't initialize spinlocks
4887 	 * are they aren't supposed to be taken by any of the
4888 	 * NAPI code and this dummy netdev is supposed to be
4889 	 * only ever used for NAPI polls
4890 	 */
4891 	memset(dev, 0, sizeof(struct net_device));
4892 
4893 	/* make sure we BUG if trying to hit standard
4894 	 * register/unregister code path
4895 	 */
4896 	dev->reg_state = NETREG_DUMMY;
4897 
4898 	/* initialize the ref count */
4899 	atomic_set(&dev->refcnt, 1);
4900 
4901 	/* NAPI wants this */
4902 	INIT_LIST_HEAD(&dev->napi_list);
4903 
4904 	/* a dummy interface is started by default */
4905 	set_bit(__LINK_STATE_PRESENT, &dev->state);
4906 	set_bit(__LINK_STATE_START, &dev->state);
4907 
4908 	return 0;
4909 }
4910 EXPORT_SYMBOL_GPL(init_dummy_netdev);
4911 
4912 
4913 /**
4914  *	register_netdev	- register a network device
4915  *	@dev: device to register
4916  *
4917  *	Take a completed network device structure and add it to the kernel
4918  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4919  *	chain. 0 is returned on success. A negative errno code is returned
4920  *	on a failure to set up the device, or if the name is a duplicate.
4921  *
4922  *	This is a wrapper around register_netdevice that takes the rtnl semaphore
4923  *	and expands the device name if you passed a format string to
4924  *	alloc_netdev.
4925  */
4926 int register_netdev(struct net_device *dev)
4927 {
4928 	int err;
4929 
4930 	rtnl_lock();
4931 
4932 	/*
4933 	 * If the name is a format string the caller wants us to do a
4934 	 * name allocation.
4935 	 */
4936 	if (strchr(dev->name, '%')) {
4937 		err = dev_alloc_name(dev, dev->name);
4938 		if (err < 0)
4939 			goto out;
4940 	}
4941 
4942 	err = register_netdevice(dev);
4943 out:
4944 	rtnl_unlock();
4945 	return err;
4946 }
4947 EXPORT_SYMBOL(register_netdev);
4948 
4949 /*
4950  * netdev_wait_allrefs - wait until all references are gone.
4951  *
4952  * This is called when unregistering network devices.
4953  *
4954  * Any protocol or device that holds a reference should register
4955  * for netdevice notification, and cleanup and put back the
4956  * reference if they receive an UNREGISTER event.
4957  * We can get stuck here if buggy protocols don't correctly
4958  * call dev_put.
4959  */
4960 static void netdev_wait_allrefs(struct net_device *dev)
4961 {
4962 	unsigned long rebroadcast_time, warning_time;
4963 
4964 	rebroadcast_time = warning_time = jiffies;
4965 	while (atomic_read(&dev->refcnt) != 0) {
4966 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
4967 			rtnl_lock();
4968 
4969 			/* Rebroadcast unregister notification */
4970 			call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4971 
4972 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
4973 				     &dev->state)) {
4974 				/* We must not have linkwatch events
4975 				 * pending on unregister. If this
4976 				 * happens, we simply run the queue
4977 				 * unscheduled, resulting in a noop
4978 				 * for this device.
4979 				 */
4980 				linkwatch_run_queue();
4981 			}
4982 
4983 			__rtnl_unlock();
4984 
4985 			rebroadcast_time = jiffies;
4986 		}
4987 
4988 		msleep(250);
4989 
4990 		if (time_after(jiffies, warning_time + 10 * HZ)) {
4991 			printk(KERN_EMERG "unregister_netdevice: "
4992 			       "waiting for %s to become free. Usage "
4993 			       "count = %d\n",
4994 			       dev->name, atomic_read(&dev->refcnt));
4995 			warning_time = jiffies;
4996 		}
4997 	}
4998 }
4999 
5000 /* The sequence is:
5001  *
5002  *	rtnl_lock();
5003  *	...
5004  *	register_netdevice(x1);
5005  *	register_netdevice(x2);
5006  *	...
5007  *	unregister_netdevice(y1);
5008  *	unregister_netdevice(y2);
5009  *      ...
5010  *	rtnl_unlock();
5011  *	free_netdev(y1);
5012  *	free_netdev(y2);
5013  *
5014  * We are invoked by rtnl_unlock().
5015  * This allows us to deal with problems:
5016  * 1) We can delete sysfs objects which invoke hotplug
5017  *    without deadlocking with linkwatch via keventd.
5018  * 2) Since we run with the RTNL semaphore not held, we can sleep
5019  *    safely in order to wait for the netdev refcnt to drop to zero.
5020  *
5021  * We must not return until all unregister events added during
5022  * the interval the lock was held have been completed.
5023  */
5024 void netdev_run_todo(void)
5025 {
5026 	struct list_head list;
5027 
5028 	/* Snapshot list, allow later requests */
5029 	list_replace_init(&net_todo_list, &list);
5030 
5031 	__rtnl_unlock();
5032 
5033 	while (!list_empty(&list)) {
5034 		struct net_device *dev
5035 			= list_entry(list.next, struct net_device, todo_list);
5036 		list_del(&dev->todo_list);
5037 
5038 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
5039 			printk(KERN_ERR "network todo '%s' but state %d\n",
5040 			       dev->name, dev->reg_state);
5041 			dump_stack();
5042 			continue;
5043 		}
5044 
5045 		dev->reg_state = NETREG_UNREGISTERED;
5046 
5047 		on_each_cpu(flush_backlog, dev, 1);
5048 
5049 		netdev_wait_allrefs(dev);
5050 
5051 		/* paranoia */
5052 		BUG_ON(atomic_read(&dev->refcnt));
5053 		WARN_ON(dev->ip_ptr);
5054 		WARN_ON(dev->ip6_ptr);
5055 		WARN_ON(dev->dn_ptr);
5056 
5057 		if (dev->destructor)
5058 			dev->destructor(dev);
5059 
5060 		/* Free network device */
5061 		kobject_put(&dev->dev.kobj);
5062 	}
5063 }
5064 
5065 /**
5066  *	dev_get_stats	- get network device statistics
5067  *	@dev: device to get statistics from
5068  *
5069  *	Get network statistics from device. The device driver may provide
5070  *	its own method by setting dev->netdev_ops->get_stats; otherwise
5071  *	the internal statistics structure is used.
5072  */
5073 const struct net_device_stats *dev_get_stats(struct net_device *dev)
5074 {
5075 	const struct net_device_ops *ops = dev->netdev_ops;
5076 
5077 	if (ops->ndo_get_stats)
5078 		return ops->ndo_get_stats(dev);
5079 	else {
5080 		unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
5081 		struct net_device_stats *stats = &dev->stats;
5082 		unsigned int i;
5083 		struct netdev_queue *txq;
5084 
5085 		for (i = 0; i < dev->num_tx_queues; i++) {
5086 			txq = netdev_get_tx_queue(dev, i);
5087 			tx_bytes   += txq->tx_bytes;
5088 			tx_packets += txq->tx_packets;
5089 			tx_dropped += txq->tx_dropped;
5090 		}
5091 		if (tx_bytes || tx_packets || tx_dropped) {
5092 			stats->tx_bytes   = tx_bytes;
5093 			stats->tx_packets = tx_packets;
5094 			stats->tx_dropped = tx_dropped;
5095 		}
5096 		return stats;
5097 	}
5098 }
5099 EXPORT_SYMBOL(dev_get_stats);
5100 
5101 static void netdev_init_one_queue(struct net_device *dev,
5102 				  struct netdev_queue *queue,
5103 				  void *_unused)
5104 {
5105 	queue->dev = dev;
5106 }
5107 
5108 static void netdev_init_queues(struct net_device *dev)
5109 {
5110 	netdev_init_one_queue(dev, &dev->rx_queue, NULL);
5111 	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
5112 	spin_lock_init(&dev->tx_global_lock);
5113 }
5114 
5115 /**
5116  *	alloc_netdev_mq - allocate network device
5117  *	@sizeof_priv:	size of private data to allocate space for
5118  *	@name:		device name format string
5119  *	@setup:		callback to initialize device
5120  *	@queue_count:	the number of subqueues to allocate
5121  *
5122  *	Allocates a struct net_device with private data area for driver use
5123  *	and performs basic initialization.  Also allocates subquue structs
5124  *	for each queue on the device at the end of the netdevice.
5125  */
5126 struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5127 		void (*setup)(struct net_device *), unsigned int queue_count)
5128 {
5129 	struct netdev_queue *tx;
5130 	struct net_device *dev;
5131 	size_t alloc_size;
5132 	struct net_device *p;
5133 
5134 	BUG_ON(strlen(name) >= sizeof(dev->name));
5135 
5136 	alloc_size = sizeof(struct net_device);
5137 	if (sizeof_priv) {
5138 		/* ensure 32-byte alignment of private area */
5139 		alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
5140 		alloc_size += sizeof_priv;
5141 	}
5142 	/* ensure 32-byte alignment of whole construct */
5143 	alloc_size += NETDEV_ALIGN - 1;
5144 
5145 	p = kzalloc(alloc_size, GFP_KERNEL);
5146 	if (!p) {
5147 		printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
5148 		return NULL;
5149 	}
5150 
5151 	tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
5152 	if (!tx) {
5153 		printk(KERN_ERR "alloc_netdev: Unable to allocate "
5154 		       "tx qdiscs.\n");
5155 		goto free_p;
5156 	}
5157 
5158 	dev = PTR_ALIGN(p, NETDEV_ALIGN);
5159 	dev->padded = (char *)dev - (char *)p;
5160 
5161 	if (dev_addr_init(dev))
5162 		goto free_tx;
5163 
5164 	dev_unicast_init(dev);
5165 
5166 	dev_net_set(dev, &init_net);
5167 
5168 	dev->_tx = tx;
5169 	dev->num_tx_queues = queue_count;
5170 	dev->real_num_tx_queues = queue_count;
5171 
5172 	dev->gso_max_size = GSO_MAX_SIZE;
5173 
5174 	netdev_init_queues(dev);
5175 
5176 	INIT_LIST_HEAD(&dev->napi_list);
5177 	dev->priv_flags = IFF_XMIT_DST_RELEASE;
5178 	setup(dev);
5179 	strcpy(dev->name, name);
5180 	return dev;
5181 
5182 free_tx:
5183 	kfree(tx);
5184 
5185 free_p:
5186 	kfree(p);
5187 	return NULL;
5188 }
5189 EXPORT_SYMBOL(alloc_netdev_mq);
5190 
5191 /**
5192  *	free_netdev - free network device
5193  *	@dev: device
5194  *
5195  *	This function does the last stage of destroying an allocated device
5196  * 	interface. The reference to the device object is released.
5197  *	If this is the last reference then it will be freed.
5198  */
5199 void free_netdev(struct net_device *dev)
5200 {
5201 	struct napi_struct *p, *n;
5202 
5203 	release_net(dev_net(dev));
5204 
5205 	kfree(dev->_tx);
5206 
5207 	/* Flush device addresses */
5208 	dev_addr_flush(dev);
5209 
5210 	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
5211 		netif_napi_del(p);
5212 
5213 	/*  Compatibility with error handling in drivers */
5214 	if (dev->reg_state == NETREG_UNINITIALIZED) {
5215 		kfree((char *)dev - dev->padded);
5216 		return;
5217 	}
5218 
5219 	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
5220 	dev->reg_state = NETREG_RELEASED;
5221 
5222 	/* will free via device release */
5223 	put_device(&dev->dev);
5224 }
5225 EXPORT_SYMBOL(free_netdev);
5226 
5227 /**
5228  *	synchronize_net -  Synchronize with packet receive processing
5229  *
5230  *	Wait for packets currently being received to be done.
5231  *	Does not block later packets from starting.
5232  */
5233 void synchronize_net(void)
5234 {
5235 	might_sleep();
5236 	synchronize_rcu();
5237 }
5238 EXPORT_SYMBOL(synchronize_net);
5239 
5240 /**
5241  *	unregister_netdevice - remove device from the kernel
5242  *	@dev: device
5243  *
5244  *	This function shuts down a device interface and removes it
5245  *	from the kernel tables.
5246  *
5247  *	Callers must hold the rtnl semaphore.  You may want
5248  *	unregister_netdev() instead of this.
5249  */
5250 
5251 void unregister_netdevice(struct net_device *dev)
5252 {
5253 	ASSERT_RTNL();
5254 
5255 	rollback_registered(dev);
5256 	/* Finish processing unregister after unlock */
5257 	net_set_todo(dev);
5258 }
5259 EXPORT_SYMBOL(unregister_netdevice);
5260 
5261 /**
5262  *	unregister_netdev - remove device from the kernel
5263  *	@dev: device
5264  *
5265  *	This function shuts down a device interface and removes it
5266  *	from the kernel tables.
5267  *
5268  *	This is just a wrapper for unregister_netdevice that takes
5269  *	the rtnl semaphore.  In general you want to use this and not
5270  *	unregister_netdevice.
5271  */
5272 void unregister_netdev(struct net_device *dev)
5273 {
5274 	rtnl_lock();
5275 	unregister_netdevice(dev);
5276 	rtnl_unlock();
5277 }
5278 EXPORT_SYMBOL(unregister_netdev);
5279 
5280 /**
5281  *	dev_change_net_namespace - move device to different nethost namespace
5282  *	@dev: device
5283  *	@net: network namespace
5284  *	@pat: If not NULL name pattern to try if the current device name
5285  *	      is already taken in the destination network namespace.
5286  *
5287  *	This function shuts down a device interface and moves it
5288  *	to a new network namespace. On success 0 is returned, on
5289  *	a failure a netagive errno code is returned.
5290  *
5291  *	Callers must hold the rtnl semaphore.
5292  */
5293 
5294 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
5295 {
5296 	char buf[IFNAMSIZ];
5297 	const char *destname;
5298 	int err;
5299 
5300 	ASSERT_RTNL();
5301 
5302 	/* Don't allow namespace local devices to be moved. */
5303 	err = -EINVAL;
5304 	if (dev->features & NETIF_F_NETNS_LOCAL)
5305 		goto out;
5306 
5307 #ifdef CONFIG_SYSFS
5308 	/* Don't allow real devices to be moved when sysfs
5309 	 * is enabled.
5310 	 */
5311 	err = -EINVAL;
5312 	if (dev->dev.parent)
5313 		goto out;
5314 #endif
5315 
5316 	/* Ensure the device has been registrered */
5317 	err = -EINVAL;
5318 	if (dev->reg_state != NETREG_REGISTERED)
5319 		goto out;
5320 
5321 	/* Get out if there is nothing todo */
5322 	err = 0;
5323 	if (net_eq(dev_net(dev), net))
5324 		goto out;
5325 
5326 	/* Pick the destination device name, and ensure
5327 	 * we can use it in the destination network namespace.
5328 	 */
5329 	err = -EEXIST;
5330 	destname = dev->name;
5331 	if (__dev_get_by_name(net, destname)) {
5332 		/* We get here if we can't use the current device name */
5333 		if (!pat)
5334 			goto out;
5335 		if (!dev_valid_name(pat))
5336 			goto out;
5337 		if (strchr(pat, '%')) {
5338 			if (__dev_alloc_name(net, pat, buf) < 0)
5339 				goto out;
5340 			destname = buf;
5341 		} else
5342 			destname = pat;
5343 		if (__dev_get_by_name(net, destname))
5344 			goto out;
5345 	}
5346 
5347 	/*
5348 	 * And now a mini version of register_netdevice unregister_netdevice.
5349 	 */
5350 
5351 	/* If device is running close it first. */
5352 	dev_close(dev);
5353 
5354 	/* And unlink it from device chain */
5355 	err = -ENODEV;
5356 	unlist_netdevice(dev);
5357 
5358 	synchronize_net();
5359 
5360 	/* Shutdown queueing discipline. */
5361 	dev_shutdown(dev);
5362 
5363 	/* Notify protocols, that we are about to destroy
5364 	   this device. They should clean all the things.
5365 	*/
5366 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5367 
5368 	/*
5369 	 *	Flush the unicast and multicast chains
5370 	 */
5371 	dev_unicast_flush(dev);
5372 	dev_addr_discard(dev);
5373 
5374 	netdev_unregister_kobject(dev);
5375 
5376 	/* Actually switch the network namespace */
5377 	dev_net_set(dev, net);
5378 
5379 	/* Assign the new device name */
5380 	if (destname != dev->name)
5381 		strcpy(dev->name, destname);
5382 
5383 	/* If there is an ifindex conflict assign a new one */
5384 	if (__dev_get_by_index(net, dev->ifindex)) {
5385 		int iflink = (dev->iflink == dev->ifindex);
5386 		dev->ifindex = dev_new_index(net);
5387 		if (iflink)
5388 			dev->iflink = dev->ifindex;
5389 	}
5390 
5391 	/* Fixup kobjects */
5392 	err = netdev_register_kobject(dev);
5393 	WARN_ON(err);
5394 
5395 	/* Add the device back in the hashes */
5396 	list_netdevice(dev);
5397 
5398 	/* Notify protocols, that a new device appeared. */
5399 	call_netdevice_notifiers(NETDEV_REGISTER, dev);
5400 
5401 	synchronize_net();
5402 	err = 0;
5403 out:
5404 	return err;
5405 }
5406 EXPORT_SYMBOL_GPL(dev_change_net_namespace);
5407 
5408 static int dev_cpu_callback(struct notifier_block *nfb,
5409 			    unsigned long action,
5410 			    void *ocpu)
5411 {
5412 	struct sk_buff **list_skb;
5413 	struct Qdisc **list_net;
5414 	struct sk_buff *skb;
5415 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
5416 	struct softnet_data *sd, *oldsd;
5417 
5418 	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
5419 		return NOTIFY_OK;
5420 
5421 	local_irq_disable();
5422 	cpu = smp_processor_id();
5423 	sd = &per_cpu(softnet_data, cpu);
5424 	oldsd = &per_cpu(softnet_data, oldcpu);
5425 
5426 	/* Find end of our completion_queue. */
5427 	list_skb = &sd->completion_queue;
5428 	while (*list_skb)
5429 		list_skb = &(*list_skb)->next;
5430 	/* Append completion queue from offline CPU. */
5431 	*list_skb = oldsd->completion_queue;
5432 	oldsd->completion_queue = NULL;
5433 
5434 	/* Find end of our output_queue. */
5435 	list_net = &sd->output_queue;
5436 	while (*list_net)
5437 		list_net = &(*list_net)->next_sched;
5438 	/* Append output queue from offline CPU. */
5439 	*list_net = oldsd->output_queue;
5440 	oldsd->output_queue = NULL;
5441 
5442 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
5443 	local_irq_enable();
5444 
5445 	/* Process offline CPU's input_pkt_queue */
5446 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
5447 		netif_rx(skb);
5448 
5449 	return NOTIFY_OK;
5450 }
5451 
5452 
5453 /**
5454  *	netdev_increment_features - increment feature set by one
5455  *	@all: current feature set
5456  *	@one: new feature set
5457  *	@mask: mask feature set
5458  *
5459  *	Computes a new feature set after adding a device with feature set
5460  *	@one to the master device with current feature set @all.  Will not
5461  *	enable anything that is off in @mask. Returns the new feature set.
5462  */
5463 unsigned long netdev_increment_features(unsigned long all, unsigned long one,
5464 					unsigned long mask)
5465 {
5466 	/* If device needs checksumming, downgrade to it. */
5467 	if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
5468 		all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
5469 	else if (mask & NETIF_F_ALL_CSUM) {
5470 		/* If one device supports v4/v6 checksumming, set for all. */
5471 		if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
5472 		    !(all & NETIF_F_GEN_CSUM)) {
5473 			all &= ~NETIF_F_ALL_CSUM;
5474 			all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
5475 		}
5476 
5477 		/* If one device supports hw checksumming, set for all. */
5478 		if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
5479 			all &= ~NETIF_F_ALL_CSUM;
5480 			all |= NETIF_F_HW_CSUM;
5481 		}
5482 	}
5483 
5484 	one |= NETIF_F_ALL_CSUM;
5485 
5486 	one |= all & NETIF_F_ONE_FOR_ALL;
5487 	all &= one | NETIF_F_LLTX | NETIF_F_GSO;
5488 	all |= one & mask & NETIF_F_ONE_FOR_ALL;
5489 
5490 	return all;
5491 }
5492 EXPORT_SYMBOL(netdev_increment_features);
5493 
5494 static struct hlist_head *netdev_create_hash(void)
5495 {
5496 	int i;
5497 	struct hlist_head *hash;
5498 
5499 	hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
5500 	if (hash != NULL)
5501 		for (i = 0; i < NETDEV_HASHENTRIES; i++)
5502 			INIT_HLIST_HEAD(&hash[i]);
5503 
5504 	return hash;
5505 }
5506 
5507 /* Initialize per network namespace state */
5508 static int __net_init netdev_init(struct net *net)
5509 {
5510 	INIT_LIST_HEAD(&net->dev_base_head);
5511 
5512 	net->dev_name_head = netdev_create_hash();
5513 	if (net->dev_name_head == NULL)
5514 		goto err_name;
5515 
5516 	net->dev_index_head = netdev_create_hash();
5517 	if (net->dev_index_head == NULL)
5518 		goto err_idx;
5519 
5520 	return 0;
5521 
5522 err_idx:
5523 	kfree(net->dev_name_head);
5524 err_name:
5525 	return -ENOMEM;
5526 }
5527 
5528 /**
5529  *	netdev_drivername - network driver for the device
5530  *	@dev: network device
5531  *	@buffer: buffer for resulting name
5532  *	@len: size of buffer
5533  *
5534  *	Determine network driver for device.
5535  */
5536 char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
5537 {
5538 	const struct device_driver *driver;
5539 	const struct device *parent;
5540 
5541 	if (len <= 0 || !buffer)
5542 		return buffer;
5543 	buffer[0] = 0;
5544 
5545 	parent = dev->dev.parent;
5546 
5547 	if (!parent)
5548 		return buffer;
5549 
5550 	driver = parent->driver;
5551 	if (driver && driver->name)
5552 		strlcpy(buffer, driver->name, len);
5553 	return buffer;
5554 }
5555 
5556 static void __net_exit netdev_exit(struct net *net)
5557 {
5558 	kfree(net->dev_name_head);
5559 	kfree(net->dev_index_head);
5560 }
5561 
5562 static struct pernet_operations __net_initdata netdev_net_ops = {
5563 	.init = netdev_init,
5564 	.exit = netdev_exit,
5565 };
5566 
5567 static void __net_exit default_device_exit(struct net *net)
5568 {
5569 	struct net_device *dev;
5570 	/*
5571 	 * Push all migratable of the network devices back to the
5572 	 * initial network namespace
5573 	 */
5574 	rtnl_lock();
5575 restart:
5576 	for_each_netdev(net, dev) {
5577 		int err;
5578 		char fb_name[IFNAMSIZ];
5579 
5580 		/* Ignore unmoveable devices (i.e. loopback) */
5581 		if (dev->features & NETIF_F_NETNS_LOCAL)
5582 			continue;
5583 
5584 		/* Delete virtual devices */
5585 		if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) {
5586 			dev->rtnl_link_ops->dellink(dev);
5587 			goto restart;
5588 		}
5589 
5590 		/* Push remaing network devices to init_net */
5591 		snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
5592 		err = dev_change_net_namespace(dev, &init_net, fb_name);
5593 		if (err) {
5594 			printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
5595 				__func__, dev->name, err);
5596 			BUG();
5597 		}
5598 		goto restart;
5599 	}
5600 	rtnl_unlock();
5601 }
5602 
5603 static struct pernet_operations __net_initdata default_device_ops = {
5604 	.exit = default_device_exit,
5605 };
5606 
5607 /*
5608  *	Initialize the DEV module. At boot time this walks the device list and
5609  *	unhooks any devices that fail to initialise (normally hardware not
5610  *	present) and leaves us with a valid list of present and active devices.
5611  *
5612  */
5613 
5614 /*
5615  *       This is called single threaded during boot, so no need
5616  *       to take the rtnl semaphore.
5617  */
5618 static int __init net_dev_init(void)
5619 {
5620 	int i, rc = -ENOMEM;
5621 
5622 	BUG_ON(!dev_boot_phase);
5623 
5624 	if (dev_proc_init())
5625 		goto out;
5626 
5627 	if (netdev_kobject_init())
5628 		goto out;
5629 
5630 	INIT_LIST_HEAD(&ptype_all);
5631 	for (i = 0; i < PTYPE_HASH_SIZE; i++)
5632 		INIT_LIST_HEAD(&ptype_base[i]);
5633 
5634 	if (register_pernet_subsys(&netdev_net_ops))
5635 		goto out;
5636 
5637 	/*
5638 	 *	Initialise the packet receive queues.
5639 	 */
5640 
5641 	for_each_possible_cpu(i) {
5642 		struct softnet_data *queue;
5643 
5644 		queue = &per_cpu(softnet_data, i);
5645 		skb_queue_head_init(&queue->input_pkt_queue);
5646 		queue->completion_queue = NULL;
5647 		INIT_LIST_HEAD(&queue->poll_list);
5648 
5649 		queue->backlog.poll = process_backlog;
5650 		queue->backlog.weight = weight_p;
5651 		queue->backlog.gro_list = NULL;
5652 		queue->backlog.gro_count = 0;
5653 	}
5654 
5655 	dev_boot_phase = 0;
5656 
5657 	/* The loopback device is special if any other network devices
5658 	 * is present in a network namespace the loopback device must
5659 	 * be present. Since we now dynamically allocate and free the
5660 	 * loopback device ensure this invariant is maintained by
5661 	 * keeping the loopback device as the first device on the
5662 	 * list of network devices.  Ensuring the loopback devices
5663 	 * is the first device that appears and the last network device
5664 	 * that disappears.
5665 	 */
5666 	if (register_pernet_device(&loopback_net_ops))
5667 		goto out;
5668 
5669 	if (register_pernet_device(&default_device_ops))
5670 		goto out;
5671 
5672 	open_softirq(NET_TX_SOFTIRQ, net_tx_action);
5673 	open_softirq(NET_RX_SOFTIRQ, net_rx_action);
5674 
5675 	hotcpu_notifier(dev_cpu_callback, 0);
5676 	dst_init();
5677 	dev_mcast_init();
5678 	rc = 0;
5679 out:
5680 	return rc;
5681 }
5682 
5683 subsys_initcall(net_dev_init);
5684 
5685 static int __init initialize_hashrnd(void)
5686 {
5687 	get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd));
5688 	return 0;
5689 }
5690 
5691 late_initcall_sync(initialize_hashrnd);
5692 
5693