xref: /linux/net/core/dev.c (revision bcefe12eff5dca6fdfa94ed85e5bee66380d5cd9)
1 /*
2  * 	NET3	Protocol independent device support routines.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  *	Derived from the non IP parts of dev.c 1.0.19
10  * 		Authors:	Ross Biro
11  *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *				Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *	Additional Authors:
15  *		Florian la Roche <rzsfl@rz.uni-sb.de>
16  *		Alan Cox <gw4pts@gw4pts.ampr.org>
17  *		David Hinds <dahinds@users.sourceforge.net>
18  *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19  *		Adam Sulmicki <adam@cfar.umd.edu>
20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
21  *
22  *	Changes:
23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
24  *              			to 2 if register_netdev gets called
25  *              			before net_dev_init & also removed a
26  *              			few lines of code in the process.
27  *		Alan Cox	:	device private ioctl copies fields back.
28  *		Alan Cox	:	Transmit queue code does relevant
29  *					stunts to keep the queue safe.
30  *		Alan Cox	:	Fixed double lock.
31  *		Alan Cox	:	Fixed promisc NULL pointer trap
32  *		????????	:	Support the full private ioctl range
33  *		Alan Cox	:	Moved ioctl permission check into
34  *					drivers
35  *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
36  *		Alan Cox	:	100 backlog just doesn't cut it when
37  *					you start doing multicast video 8)
38  *		Alan Cox	:	Rewrote net_bh and list manager.
39  *		Alan Cox	: 	Fix ETH_P_ALL echoback lengths.
40  *		Alan Cox	:	Took out transmit every packet pass
41  *					Saved a few bytes in the ioctl handler
42  *		Alan Cox	:	Network driver sets packet type before
43  *					calling netif_rx. Saves a function
44  *					call a packet.
45  *		Alan Cox	:	Hashed net_bh()
46  *		Richard Kooijman:	Timestamp fixes.
47  *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
48  *		Alan Cox	:	Device lock protection.
49  *		Alan Cox	: 	Fixed nasty side effect of device close
50  *					changes.
51  *		Rudi Cilibrasi	:	Pass the right thing to
52  *					set_mac_address()
53  *		Dave Miller	:	32bit quantity for the device lock to
54  *					make it work out on a Sparc.
55  *		Bjorn Ekwall	:	Added KERNELD hack.
56  *		Alan Cox	:	Cleaned up the backlog initialise.
57  *		Craig Metz	:	SIOCGIFCONF fix if space for under
58  *					1 device.
59  *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
60  *					is no device open function.
61  *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
62  *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
63  *		Cyrus Durgin	:	Cleaned for KMOD
64  *		Adam Sulmicki   :	Bug Fix : Network Device Unload
65  *					A network device unload needs to purge
66  *					the backlog queue.
67  *	Paul Rusty Russell	:	SIOCSIFNAME
68  *              Pekka Riikonen  :	Netdev boot-time settings code
69  *              Andrew Morton   :       Make unregister_netdevice wait
70  *              			indefinitely on dev->refcnt
71  * 		J Hadi Salim	:	- Backlog queue sampling
72  *				        - netif_rx() feedback
73  */
74 
75 #include <asm/uaccess.h>
76 #include <asm/system.h>
77 #include <linux/bitops.h>
78 #include <linux/capability.h>
79 #include <linux/cpu.h>
80 #include <linux/types.h>
81 #include <linux/kernel.h>
82 #include <linux/sched.h>
83 #include <linux/mutex.h>
84 #include <linux/string.h>
85 #include <linux/mm.h>
86 #include <linux/socket.h>
87 #include <linux/sockios.h>
88 #include <linux/errno.h>
89 #include <linux/interrupt.h>
90 #include <linux/if_ether.h>
91 #include <linux/netdevice.h>
92 #include <linux/etherdevice.h>
93 #include <linux/ethtool.h>
94 #include <linux/notifier.h>
95 #include <linux/skbuff.h>
96 #include <net/net_namespace.h>
97 #include <net/sock.h>
98 #include <linux/rtnetlink.h>
99 #include <linux/proc_fs.h>
100 #include <linux/seq_file.h>
101 #include <linux/stat.h>
102 #include <linux/if_bridge.h>
103 #include <linux/if_macvlan.h>
104 #include <net/dst.h>
105 #include <net/pkt_sched.h>
106 #include <net/checksum.h>
107 #include <linux/highmem.h>
108 #include <linux/init.h>
109 #include <linux/kmod.h>
110 #include <linux/module.h>
111 #include <linux/netpoll.h>
112 #include <linux/rcupdate.h>
113 #include <linux/delay.h>
114 #include <net/wext.h>
115 #include <net/iw_handler.h>
116 #include <asm/current.h>
117 #include <linux/audit.h>
118 #include <linux/dmaengine.h>
119 #include <linux/err.h>
120 #include <linux/ctype.h>
121 #include <linux/if_arp.h>
122 #include <linux/if_vlan.h>
123 #include <linux/ip.h>
124 #include <net/ip.h>
125 #include <linux/ipv6.h>
126 #include <linux/in.h>
127 #include <linux/jhash.h>
128 #include <linux/random.h>
129 #include <trace/events/napi.h>
130 
131 #include "net-sysfs.h"
132 
133 /* Instead of increasing this, you should create a hash table. */
134 #define MAX_GRO_SKBS 8
135 
136 /* This should be increased if a protocol with a bigger head is added. */
137 #define GRO_MAX_HEAD (MAX_HEADER + 128)
138 
139 /*
140  *	The list of packet types we will receive (as opposed to discard)
141  *	and the routines to invoke.
142  *
143  *	Why 16. Because with 16 the only overlap we get on a hash of the
144  *	low nibble of the protocol value is RARP/SNAP/X.25.
145  *
146  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
147  *             sure which should go first, but I bet it won't make much
148  *             difference if we are running VLANs.  The good news is that
149  *             this protocol won't be in the list unless compiled in, so
150  *             the average user (w/out VLANs) will not be adversely affected.
151  *             --BLG
152  *
153  *		0800	IP
154  *		8100    802.1Q VLAN
155  *		0001	802.3
156  *		0002	AX.25
157  *		0004	802.2
158  *		8035	RARP
159  *		0005	SNAP
160  *		0805	X.25
161  *		0806	ARP
162  *		8137	IPX
163  *		0009	Localtalk
164  *		86DD	IPv6
165  */
166 
167 #define PTYPE_HASH_SIZE	(16)
168 #define PTYPE_HASH_MASK	(PTYPE_HASH_SIZE - 1)
169 
170 static DEFINE_SPINLOCK(ptype_lock);
171 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
172 static struct list_head ptype_all __read_mostly;	/* Taps */
173 
174 /*
175  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
176  * semaphore.
177  *
178  * Pure readers hold dev_base_lock for reading.
179  *
180  * Writers must hold the rtnl semaphore while they loop through the
181  * dev_base_head list, and hold dev_base_lock for writing when they do the
182  * actual updates.  This allows pure readers to access the list even
183  * while a writer is preparing to update it.
184  *
185  * To put it another way, dev_base_lock is held for writing only to
186  * protect against pure readers; the rtnl semaphore provides the
187  * protection against other writers.
188  *
189  * See, for example usages, register_netdevice() and
190  * unregister_netdevice(), which must be called with the rtnl
191  * semaphore held.
192  */
193 DEFINE_RWLOCK(dev_base_lock);
194 EXPORT_SYMBOL(dev_base_lock);
195 
196 #define NETDEV_HASHBITS	8
197 #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
198 
199 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
200 {
201 	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
202 	return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
203 }
204 
205 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
206 {
207 	return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
208 }
209 
210 /* Device list insertion */
211 static int list_netdevice(struct net_device *dev)
212 {
213 	struct net *net = dev_net(dev);
214 
215 	ASSERT_RTNL();
216 
217 	write_lock_bh(&dev_base_lock);
218 	list_add_tail(&dev->dev_list, &net->dev_base_head);
219 	hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
220 	hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
221 	write_unlock_bh(&dev_base_lock);
222 	return 0;
223 }
224 
225 /* Device list removal */
226 static void unlist_netdevice(struct net_device *dev)
227 {
228 	ASSERT_RTNL();
229 
230 	/* Unlink dev from the device chain */
231 	write_lock_bh(&dev_base_lock);
232 	list_del(&dev->dev_list);
233 	hlist_del(&dev->name_hlist);
234 	hlist_del(&dev->index_hlist);
235 	write_unlock_bh(&dev_base_lock);
236 }
237 
238 /*
239  *	Our notifier list
240  */
241 
242 static RAW_NOTIFIER_HEAD(netdev_chain);
243 
244 /*
245  *	Device drivers call our routines to queue packets here. We empty the
246  *	queue in the local softnet handler.
247  */
248 
249 DEFINE_PER_CPU(struct softnet_data, softnet_data);
250 EXPORT_PER_CPU_SYMBOL(softnet_data);
251 
252 #ifdef CONFIG_LOCKDEP
253 /*
254  * register_netdevice() inits txq->_xmit_lock and sets lockdep class
255  * according to dev->type
256  */
257 static const unsigned short netdev_lock_type[] =
258 	{ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
259 	 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
260 	 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
261 	 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
262 	 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
263 	 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
264 	 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
265 	 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
266 	 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
267 	 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
268 	 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
269 	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
270 	 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
271 	 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
272 	 ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154,
273 	 ARPHRD_VOID, ARPHRD_NONE};
274 
275 static const char *const netdev_lock_name[] =
276 	{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
277 	 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
278 	 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
279 	 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
280 	 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
281 	 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
282 	 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
283 	 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
284 	 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
285 	 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
286 	 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
287 	 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
288 	 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
289 	 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
290 	 "_xmit_PHONET_PIPE", "_xmit_IEEE802154",
291 	 "_xmit_VOID", "_xmit_NONE"};
292 
293 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
294 static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
295 
296 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
297 {
298 	int i;
299 
300 	for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
301 		if (netdev_lock_type[i] == dev_type)
302 			return i;
303 	/* the last key is used by default */
304 	return ARRAY_SIZE(netdev_lock_type) - 1;
305 }
306 
307 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
308 						 unsigned short dev_type)
309 {
310 	int i;
311 
312 	i = netdev_lock_pos(dev_type);
313 	lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
314 				   netdev_lock_name[i]);
315 }
316 
317 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
318 {
319 	int i;
320 
321 	i = netdev_lock_pos(dev->type);
322 	lockdep_set_class_and_name(&dev->addr_list_lock,
323 				   &netdev_addr_lock_key[i],
324 				   netdev_lock_name[i]);
325 }
326 #else
327 static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
328 						 unsigned short dev_type)
329 {
330 }
331 static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
332 {
333 }
334 #endif
335 
336 /*******************************************************************************
337 
338 		Protocol management and registration routines
339 
340 *******************************************************************************/
341 
342 /*
343  *	Add a protocol ID to the list. Now that the input handler is
344  *	smarter we can dispense with all the messy stuff that used to be
345  *	here.
346  *
347  *	BEWARE!!! Protocol handlers, mangling input packets,
348  *	MUST BE last in hash buckets and checking protocol handlers
349  *	MUST start from promiscuous ptype_all chain in net_bh.
350  *	It is true now, do not change it.
351  *	Explanation follows: if protocol handler, mangling packet, will
352  *	be the first on list, it is not able to sense, that packet
353  *	is cloned and should be copied-on-write, so that it will
354  *	change it and subsequent readers will get broken packet.
355  *							--ANK (980803)
356  */
357 
358 /**
359  *	dev_add_pack - add packet handler
360  *	@pt: packet type declaration
361  *
362  *	Add a protocol handler to the networking stack. The passed &packet_type
363  *	is linked into kernel lists and may not be freed until it has been
364  *	removed from the kernel lists.
365  *
366  *	This call does not sleep therefore it can not
367  *	guarantee all CPU's that are in middle of receiving packets
368  *	will see the new packet type (until the next received packet).
369  */
370 
371 void dev_add_pack(struct packet_type *pt)
372 {
373 	int hash;
374 
375 	spin_lock_bh(&ptype_lock);
376 	if (pt->type == htons(ETH_P_ALL))
377 		list_add_rcu(&pt->list, &ptype_all);
378 	else {
379 		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
380 		list_add_rcu(&pt->list, &ptype_base[hash]);
381 	}
382 	spin_unlock_bh(&ptype_lock);
383 }
384 EXPORT_SYMBOL(dev_add_pack);
385 
386 /**
387  *	__dev_remove_pack	 - remove packet handler
388  *	@pt: packet type declaration
389  *
390  *	Remove a protocol handler that was previously added to the kernel
391  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
392  *	from the kernel lists and can be freed or reused once this function
393  *	returns.
394  *
395  *      The packet type might still be in use by receivers
396  *	and must not be freed until after all the CPU's have gone
397  *	through a quiescent state.
398  */
399 void __dev_remove_pack(struct packet_type *pt)
400 {
401 	struct list_head *head;
402 	struct packet_type *pt1;
403 
404 	spin_lock_bh(&ptype_lock);
405 
406 	if (pt->type == htons(ETH_P_ALL))
407 		head = &ptype_all;
408 	else
409 		head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
410 
411 	list_for_each_entry(pt1, head, list) {
412 		if (pt == pt1) {
413 			list_del_rcu(&pt->list);
414 			goto out;
415 		}
416 	}
417 
418 	printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
419 out:
420 	spin_unlock_bh(&ptype_lock);
421 }
422 EXPORT_SYMBOL(__dev_remove_pack);
423 
424 /**
425  *	dev_remove_pack	 - remove packet handler
426  *	@pt: packet type declaration
427  *
428  *	Remove a protocol handler that was previously added to the kernel
429  *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
430  *	from the kernel lists and can be freed or reused once this function
431  *	returns.
432  *
433  *	This call sleeps to guarantee that no CPU is looking at the packet
434  *	type after return.
435  */
436 void dev_remove_pack(struct packet_type *pt)
437 {
438 	__dev_remove_pack(pt);
439 
440 	synchronize_net();
441 }
442 EXPORT_SYMBOL(dev_remove_pack);
443 
444 /******************************************************************************
445 
446 		      Device Boot-time Settings Routines
447 
448 *******************************************************************************/
449 
450 /* Boot time configuration table */
451 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
452 
453 /**
454  *	netdev_boot_setup_add	- add new setup entry
455  *	@name: name of the device
456  *	@map: configured settings for the device
457  *
458  *	Adds new setup entry to the dev_boot_setup list.  The function
459  *	returns 0 on error and 1 on success.  This is a generic routine to
460  *	all netdevices.
461  */
462 static int netdev_boot_setup_add(char *name, struct ifmap *map)
463 {
464 	struct netdev_boot_setup *s;
465 	int i;
466 
467 	s = dev_boot_setup;
468 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
469 		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
470 			memset(s[i].name, 0, sizeof(s[i].name));
471 			strlcpy(s[i].name, name, IFNAMSIZ);
472 			memcpy(&s[i].map, map, sizeof(s[i].map));
473 			break;
474 		}
475 	}
476 
477 	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
478 }
479 
480 /**
481  *	netdev_boot_setup_check	- check boot time settings
482  *	@dev: the netdevice
483  *
484  * 	Check boot time settings for the device.
485  *	The found settings are set for the device to be used
486  *	later in the device probing.
487  *	Returns 0 if no settings found, 1 if they are.
488  */
489 int netdev_boot_setup_check(struct net_device *dev)
490 {
491 	struct netdev_boot_setup *s = dev_boot_setup;
492 	int i;
493 
494 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
495 		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
496 		    !strcmp(dev->name, s[i].name)) {
497 			dev->irq 	= s[i].map.irq;
498 			dev->base_addr 	= s[i].map.base_addr;
499 			dev->mem_start 	= s[i].map.mem_start;
500 			dev->mem_end 	= s[i].map.mem_end;
501 			return 1;
502 		}
503 	}
504 	return 0;
505 }
506 EXPORT_SYMBOL(netdev_boot_setup_check);
507 
508 
509 /**
510  *	netdev_boot_base	- get address from boot time settings
511  *	@prefix: prefix for network device
512  *	@unit: id for network device
513  *
514  * 	Check boot time settings for the base address of device.
515  *	The found settings are set for the device to be used
516  *	later in the device probing.
517  *	Returns 0 if no settings found.
518  */
519 unsigned long netdev_boot_base(const char *prefix, int unit)
520 {
521 	const struct netdev_boot_setup *s = dev_boot_setup;
522 	char name[IFNAMSIZ];
523 	int i;
524 
525 	sprintf(name, "%s%d", prefix, unit);
526 
527 	/*
528 	 * If device already registered then return base of 1
529 	 * to indicate not to probe for this interface
530 	 */
531 	if (__dev_get_by_name(&init_net, name))
532 		return 1;
533 
534 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
535 		if (!strcmp(name, s[i].name))
536 			return s[i].map.base_addr;
537 	return 0;
538 }
539 
540 /*
541  * Saves at boot time configured settings for any netdevice.
542  */
543 int __init netdev_boot_setup(char *str)
544 {
545 	int ints[5];
546 	struct ifmap map;
547 
548 	str = get_options(str, ARRAY_SIZE(ints), ints);
549 	if (!str || !*str)
550 		return 0;
551 
552 	/* Save settings */
553 	memset(&map, 0, sizeof(map));
554 	if (ints[0] > 0)
555 		map.irq = ints[1];
556 	if (ints[0] > 1)
557 		map.base_addr = ints[2];
558 	if (ints[0] > 2)
559 		map.mem_start = ints[3];
560 	if (ints[0] > 3)
561 		map.mem_end = ints[4];
562 
563 	/* Add new entry to the list */
564 	return netdev_boot_setup_add(str, &map);
565 }
566 
567 __setup("netdev=", netdev_boot_setup);
568 
569 /*******************************************************************************
570 
571 			    Device Interface Subroutines
572 
573 *******************************************************************************/
574 
575 /**
576  *	__dev_get_by_name	- find a device by its name
577  *	@net: the applicable net namespace
578  *	@name: name to find
579  *
580  *	Find an interface by name. Must be called under RTNL semaphore
581  *	or @dev_base_lock. If the name is found a pointer to the device
582  *	is returned. If the name is not found then %NULL is returned. The
583  *	reference counters are not incremented so the caller must be
584  *	careful with locks.
585  */
586 
587 struct net_device *__dev_get_by_name(struct net *net, const char *name)
588 {
589 	struct hlist_node *p;
590 
591 	hlist_for_each(p, dev_name_hash(net, name)) {
592 		struct net_device *dev
593 			= hlist_entry(p, struct net_device, name_hlist);
594 		if (!strncmp(dev->name, name, IFNAMSIZ))
595 			return dev;
596 	}
597 	return NULL;
598 }
599 EXPORT_SYMBOL(__dev_get_by_name);
600 
601 /**
602  *	dev_get_by_name		- find a device by its name
603  *	@net: the applicable net namespace
604  *	@name: name to find
605  *
606  *	Find an interface by name. This can be called from any
607  *	context and does its own locking. The returned handle has
608  *	the usage count incremented and the caller must use dev_put() to
609  *	release it when it is no longer needed. %NULL is returned if no
610  *	matching device is found.
611  */
612 
613 struct net_device *dev_get_by_name(struct net *net, const char *name)
614 {
615 	struct net_device *dev;
616 
617 	read_lock(&dev_base_lock);
618 	dev = __dev_get_by_name(net, name);
619 	if (dev)
620 		dev_hold(dev);
621 	read_unlock(&dev_base_lock);
622 	return dev;
623 }
624 EXPORT_SYMBOL(dev_get_by_name);
625 
626 /**
627  *	__dev_get_by_index - find a device by its ifindex
628  *	@net: the applicable net namespace
629  *	@ifindex: index of device
630  *
631  *	Search for an interface by index. Returns %NULL if the device
632  *	is not found or a pointer to the device. The device has not
633  *	had its reference counter increased so the caller must be careful
634  *	about locking. The caller must hold either the RTNL semaphore
635  *	or @dev_base_lock.
636  */
637 
638 struct net_device *__dev_get_by_index(struct net *net, int ifindex)
639 {
640 	struct hlist_node *p;
641 
642 	hlist_for_each(p, dev_index_hash(net, ifindex)) {
643 		struct net_device *dev
644 			= hlist_entry(p, struct net_device, index_hlist);
645 		if (dev->ifindex == ifindex)
646 			return dev;
647 	}
648 	return NULL;
649 }
650 EXPORT_SYMBOL(__dev_get_by_index);
651 
652 
653 /**
654  *	dev_get_by_index - find a device by its ifindex
655  *	@net: the applicable net namespace
656  *	@ifindex: index of device
657  *
658  *	Search for an interface by index. Returns NULL if the device
659  *	is not found or a pointer to the device. The device returned has
660  *	had a reference added and the pointer is safe until the user calls
661  *	dev_put to indicate they have finished with it.
662  */
663 
664 struct net_device *dev_get_by_index(struct net *net, int ifindex)
665 {
666 	struct net_device *dev;
667 
668 	read_lock(&dev_base_lock);
669 	dev = __dev_get_by_index(net, ifindex);
670 	if (dev)
671 		dev_hold(dev);
672 	read_unlock(&dev_base_lock);
673 	return dev;
674 }
675 EXPORT_SYMBOL(dev_get_by_index);
676 
677 /**
678  *	dev_getbyhwaddr - find a device by its hardware address
679  *	@net: the applicable net namespace
680  *	@type: media type of device
681  *	@ha: hardware address
682  *
683  *	Search for an interface by MAC address. Returns NULL if the device
684  *	is not found or a pointer to the device. The caller must hold the
685  *	rtnl semaphore. The returned device has not had its ref count increased
686  *	and the caller must therefore be careful about locking
687  *
688  *	BUGS:
689  *	If the API was consistent this would be __dev_get_by_hwaddr
690  */
691 
692 struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
693 {
694 	struct net_device *dev;
695 
696 	ASSERT_RTNL();
697 
698 	for_each_netdev(net, dev)
699 		if (dev->type == type &&
700 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
701 			return dev;
702 
703 	return NULL;
704 }
705 EXPORT_SYMBOL(dev_getbyhwaddr);
706 
707 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
708 {
709 	struct net_device *dev;
710 
711 	ASSERT_RTNL();
712 	for_each_netdev(net, dev)
713 		if (dev->type == type)
714 			return dev;
715 
716 	return NULL;
717 }
718 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
719 
720 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
721 {
722 	struct net_device *dev;
723 
724 	rtnl_lock();
725 	dev = __dev_getfirstbyhwtype(net, type);
726 	if (dev)
727 		dev_hold(dev);
728 	rtnl_unlock();
729 	return dev;
730 }
731 EXPORT_SYMBOL(dev_getfirstbyhwtype);
732 
733 /**
734  *	dev_get_by_flags - find any device with given flags
735  *	@net: the applicable net namespace
736  *	@if_flags: IFF_* values
737  *	@mask: bitmask of bits in if_flags to check
738  *
739  *	Search for any interface with the given flags. Returns NULL if a device
740  *	is not found or a pointer to the device. The device returned has
741  *	had a reference added and the pointer is safe until the user calls
742  *	dev_put to indicate they have finished with it.
743  */
744 
745 struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags,
746 				    unsigned short mask)
747 {
748 	struct net_device *dev, *ret;
749 
750 	ret = NULL;
751 	read_lock(&dev_base_lock);
752 	for_each_netdev(net, dev) {
753 		if (((dev->flags ^ if_flags) & mask) == 0) {
754 			dev_hold(dev);
755 			ret = dev;
756 			break;
757 		}
758 	}
759 	read_unlock(&dev_base_lock);
760 	return ret;
761 }
762 EXPORT_SYMBOL(dev_get_by_flags);
763 
764 /**
765  *	dev_valid_name - check if name is okay for network device
766  *	@name: name string
767  *
768  *	Network device names need to be valid file names to
769  *	to allow sysfs to work.  We also disallow any kind of
770  *	whitespace.
771  */
772 int dev_valid_name(const char *name)
773 {
774 	if (*name == '\0')
775 		return 0;
776 	if (strlen(name) >= IFNAMSIZ)
777 		return 0;
778 	if (!strcmp(name, ".") || !strcmp(name, ".."))
779 		return 0;
780 
781 	while (*name) {
782 		if (*name == '/' || isspace(*name))
783 			return 0;
784 		name++;
785 	}
786 	return 1;
787 }
788 EXPORT_SYMBOL(dev_valid_name);
789 
790 /**
791  *	__dev_alloc_name - allocate a name for a device
792  *	@net: network namespace to allocate the device name in
793  *	@name: name format string
794  *	@buf:  scratch buffer and result name string
795  *
796  *	Passed a format string - eg "lt%d" it will try and find a suitable
797  *	id. It scans list of devices to build up a free map, then chooses
798  *	the first empty slot. The caller must hold the dev_base or rtnl lock
799  *	while allocating the name and adding the device in order to avoid
800  *	duplicates.
801  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
802  *	Returns the number of the unit assigned or a negative errno code.
803  */
804 
805 static int __dev_alloc_name(struct net *net, const char *name, char *buf)
806 {
807 	int i = 0;
808 	const char *p;
809 	const int max_netdevices = 8*PAGE_SIZE;
810 	unsigned long *inuse;
811 	struct net_device *d;
812 
813 	p = strnchr(name, IFNAMSIZ-1, '%');
814 	if (p) {
815 		/*
816 		 * Verify the string as this thing may have come from
817 		 * the user.  There must be either one "%d" and no other "%"
818 		 * characters.
819 		 */
820 		if (p[1] != 'd' || strchr(p + 2, '%'))
821 			return -EINVAL;
822 
823 		/* Use one page as a bit array of possible slots */
824 		inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
825 		if (!inuse)
826 			return -ENOMEM;
827 
828 		for_each_netdev(net, d) {
829 			if (!sscanf(d->name, name, &i))
830 				continue;
831 			if (i < 0 || i >= max_netdevices)
832 				continue;
833 
834 			/*  avoid cases where sscanf is not exact inverse of printf */
835 			snprintf(buf, IFNAMSIZ, name, i);
836 			if (!strncmp(buf, d->name, IFNAMSIZ))
837 				set_bit(i, inuse);
838 		}
839 
840 		i = find_first_zero_bit(inuse, max_netdevices);
841 		free_page((unsigned long) inuse);
842 	}
843 
844 	snprintf(buf, IFNAMSIZ, name, i);
845 	if (!__dev_get_by_name(net, buf))
846 		return i;
847 
848 	/* It is possible to run out of possible slots
849 	 * when the name is long and there isn't enough space left
850 	 * for the digits, or if all bits are used.
851 	 */
852 	return -ENFILE;
853 }
854 
855 /**
856  *	dev_alloc_name - allocate a name for a device
857  *	@dev: device
858  *	@name: name format string
859  *
860  *	Passed a format string - eg "lt%d" it will try and find a suitable
861  *	id. It scans list of devices to build up a free map, then chooses
862  *	the first empty slot. The caller must hold the dev_base or rtnl lock
863  *	while allocating the name and adding the device in order to avoid
864  *	duplicates.
865  *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
866  *	Returns the number of the unit assigned or a negative errno code.
867  */
868 
869 int dev_alloc_name(struct net_device *dev, const char *name)
870 {
871 	char buf[IFNAMSIZ];
872 	struct net *net;
873 	int ret;
874 
875 	BUG_ON(!dev_net(dev));
876 	net = dev_net(dev);
877 	ret = __dev_alloc_name(net, name, buf);
878 	if (ret >= 0)
879 		strlcpy(dev->name, buf, IFNAMSIZ);
880 	return ret;
881 }
882 EXPORT_SYMBOL(dev_alloc_name);
883 
884 
885 /**
886  *	dev_change_name - change name of a device
887  *	@dev: device
888  *	@newname: name (or format string) must be at least IFNAMSIZ
889  *
890  *	Change name of a device, can pass format strings "eth%d".
891  *	for wildcarding.
892  */
893 int dev_change_name(struct net_device *dev, const char *newname)
894 {
895 	char oldname[IFNAMSIZ];
896 	int err = 0;
897 	int ret;
898 	struct net *net;
899 
900 	ASSERT_RTNL();
901 	BUG_ON(!dev_net(dev));
902 
903 	net = dev_net(dev);
904 	if (dev->flags & IFF_UP)
905 		return -EBUSY;
906 
907 	if (!dev_valid_name(newname))
908 		return -EINVAL;
909 
910 	if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
911 		return 0;
912 
913 	memcpy(oldname, dev->name, IFNAMSIZ);
914 
915 	if (strchr(newname, '%')) {
916 		err = dev_alloc_name(dev, newname);
917 		if (err < 0)
918 			return err;
919 	} else if (__dev_get_by_name(net, newname))
920 		return -EEXIST;
921 	else
922 		strlcpy(dev->name, newname, IFNAMSIZ);
923 
924 rollback:
925 	/* For now only devices in the initial network namespace
926 	 * are in sysfs.
927 	 */
928 	if (net == &init_net) {
929 		ret = device_rename(&dev->dev, dev->name);
930 		if (ret) {
931 			memcpy(dev->name, oldname, IFNAMSIZ);
932 			return ret;
933 		}
934 	}
935 
936 	write_lock_bh(&dev_base_lock);
937 	hlist_del(&dev->name_hlist);
938 	hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
939 	write_unlock_bh(&dev_base_lock);
940 
941 	ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
942 	ret = notifier_to_errno(ret);
943 
944 	if (ret) {
945 		if (err) {
946 			printk(KERN_ERR
947 			       "%s: name change rollback failed: %d.\n",
948 			       dev->name, ret);
949 		} else {
950 			err = ret;
951 			memcpy(dev->name, oldname, IFNAMSIZ);
952 			goto rollback;
953 		}
954 	}
955 
956 	return err;
957 }
958 
959 /**
960  *	dev_set_alias - change ifalias of a device
961  *	@dev: device
962  *	@alias: name up to IFALIASZ
963  *	@len: limit of bytes to copy from info
964  *
965  *	Set ifalias for a device,
966  */
967 int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
968 {
969 	ASSERT_RTNL();
970 
971 	if (len >= IFALIASZ)
972 		return -EINVAL;
973 
974 	if (!len) {
975 		if (dev->ifalias) {
976 			kfree(dev->ifalias);
977 			dev->ifalias = NULL;
978 		}
979 		return 0;
980 	}
981 
982 	dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
983 	if (!dev->ifalias)
984 		return -ENOMEM;
985 
986 	strlcpy(dev->ifalias, alias, len+1);
987 	return len;
988 }
989 
990 
991 /**
992  *	netdev_features_change - device changes features
993  *	@dev: device to cause notification
994  *
995  *	Called to indicate a device has changed features.
996  */
997 void netdev_features_change(struct net_device *dev)
998 {
999 	call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
1000 }
1001 EXPORT_SYMBOL(netdev_features_change);
1002 
1003 /**
1004  *	netdev_state_change - device changes state
1005  *	@dev: device to cause notification
1006  *
1007  *	Called to indicate a device has changed state. This function calls
1008  *	the notifier chains for netdev_chain and sends a NEWLINK message
1009  *	to the routing socket.
1010  */
1011 void netdev_state_change(struct net_device *dev)
1012 {
1013 	if (dev->flags & IFF_UP) {
1014 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
1015 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1016 	}
1017 }
1018 EXPORT_SYMBOL(netdev_state_change);
1019 
1020 void netdev_bonding_change(struct net_device *dev, unsigned long event)
1021 {
1022 	call_netdevice_notifiers(event, dev);
1023 }
1024 EXPORT_SYMBOL(netdev_bonding_change);
1025 
1026 /**
1027  *	dev_load 	- load a network module
1028  *	@net: the applicable net namespace
1029  *	@name: name of interface
1030  *
1031  *	If a network interface is not present and the process has suitable
1032  *	privileges this function loads the module. If module loading is not
1033  *	available in this kernel then it becomes a nop.
1034  */
1035 
1036 void dev_load(struct net *net, const char *name)
1037 {
1038 	struct net_device *dev;
1039 
1040 	read_lock(&dev_base_lock);
1041 	dev = __dev_get_by_name(net, name);
1042 	read_unlock(&dev_base_lock);
1043 
1044 	if (!dev && capable(CAP_NET_ADMIN))
1045 		request_module("%s", name);
1046 }
1047 EXPORT_SYMBOL(dev_load);
1048 
1049 /**
1050  *	dev_open	- prepare an interface for use.
1051  *	@dev:	device to open
1052  *
1053  *	Takes a device from down to up state. The device's private open
1054  *	function is invoked and then the multicast lists are loaded. Finally
1055  *	the device is moved into the up state and a %NETDEV_UP message is
1056  *	sent to the netdev notifier chain.
1057  *
1058  *	Calling this function on an active interface is a nop. On a failure
1059  *	a negative errno code is returned.
1060  */
1061 int dev_open(struct net_device *dev)
1062 {
1063 	const struct net_device_ops *ops = dev->netdev_ops;
1064 	int ret;
1065 
1066 	ASSERT_RTNL();
1067 
1068 	/*
1069 	 *	Is it already up?
1070 	 */
1071 
1072 	if (dev->flags & IFF_UP)
1073 		return 0;
1074 
1075 	/*
1076 	 *	Is it even present?
1077 	 */
1078 	if (!netif_device_present(dev))
1079 		return -ENODEV;
1080 
1081 	ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1082 	ret = notifier_to_errno(ret);
1083 	if (ret)
1084 		return ret;
1085 
1086 	/*
1087 	 *	Call device private open method
1088 	 */
1089 	set_bit(__LINK_STATE_START, &dev->state);
1090 
1091 	if (ops->ndo_validate_addr)
1092 		ret = ops->ndo_validate_addr(dev);
1093 
1094 	if (!ret && ops->ndo_open)
1095 		ret = ops->ndo_open(dev);
1096 
1097 	/*
1098 	 *	If it went open OK then:
1099 	 */
1100 
1101 	if (ret)
1102 		clear_bit(__LINK_STATE_START, &dev->state);
1103 	else {
1104 		/*
1105 		 *	Set the flags.
1106 		 */
1107 		dev->flags |= IFF_UP;
1108 
1109 		/*
1110 		 *	Enable NET_DMA
1111 		 */
1112 		net_dmaengine_get();
1113 
1114 		/*
1115 		 *	Initialize multicasting status
1116 		 */
1117 		dev_set_rx_mode(dev);
1118 
1119 		/*
1120 		 *	Wakeup transmit queue engine
1121 		 */
1122 		dev_activate(dev);
1123 
1124 		/*
1125 		 *	... and announce new interface.
1126 		 */
1127 		call_netdevice_notifiers(NETDEV_UP, dev);
1128 	}
1129 
1130 	return ret;
1131 }
1132 EXPORT_SYMBOL(dev_open);
1133 
1134 /**
1135  *	dev_close - shutdown an interface.
1136  *	@dev: device to shutdown
1137  *
1138  *	This function moves an active device into down state. A
1139  *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1140  *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1141  *	chain.
1142  */
1143 int dev_close(struct net_device *dev)
1144 {
1145 	const struct net_device_ops *ops = dev->netdev_ops;
1146 	ASSERT_RTNL();
1147 
1148 	might_sleep();
1149 
1150 	if (!(dev->flags & IFF_UP))
1151 		return 0;
1152 
1153 	/*
1154 	 *	Tell people we are going down, so that they can
1155 	 *	prepare to death, when device is still operating.
1156 	 */
1157 	call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1158 
1159 	clear_bit(__LINK_STATE_START, &dev->state);
1160 
1161 	/* Synchronize to scheduled poll. We cannot touch poll list,
1162 	 * it can be even on different cpu. So just clear netif_running().
1163 	 *
1164 	 * dev->stop() will invoke napi_disable() on all of it's
1165 	 * napi_struct instances on this device.
1166 	 */
1167 	smp_mb__after_clear_bit(); /* Commit netif_running(). */
1168 
1169 	dev_deactivate(dev);
1170 
1171 	/*
1172 	 *	Call the device specific close. This cannot fail.
1173 	 *	Only if device is UP
1174 	 *
1175 	 *	We allow it to be called even after a DETACH hot-plug
1176 	 *	event.
1177 	 */
1178 	if (ops->ndo_stop)
1179 		ops->ndo_stop(dev);
1180 
1181 	/*
1182 	 *	Device is now down.
1183 	 */
1184 
1185 	dev->flags &= ~IFF_UP;
1186 
1187 	/*
1188 	 * Tell people we are down
1189 	 */
1190 	call_netdevice_notifiers(NETDEV_DOWN, dev);
1191 
1192 	/*
1193 	 *	Shutdown NET_DMA
1194 	 */
1195 	net_dmaengine_put();
1196 
1197 	return 0;
1198 }
1199 EXPORT_SYMBOL(dev_close);
1200 
1201 
1202 /**
1203  *	dev_disable_lro - disable Large Receive Offload on a device
1204  *	@dev: device
1205  *
1206  *	Disable Large Receive Offload (LRO) on a net device.  Must be
1207  *	called under RTNL.  This is needed if received packets may be
1208  *	forwarded to another interface.
1209  */
1210 void dev_disable_lro(struct net_device *dev)
1211 {
1212 	if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1213 	    dev->ethtool_ops->set_flags) {
1214 		u32 flags = dev->ethtool_ops->get_flags(dev);
1215 		if (flags & ETH_FLAG_LRO) {
1216 			flags &= ~ETH_FLAG_LRO;
1217 			dev->ethtool_ops->set_flags(dev, flags);
1218 		}
1219 	}
1220 	WARN_ON(dev->features & NETIF_F_LRO);
1221 }
1222 EXPORT_SYMBOL(dev_disable_lro);
1223 
1224 
1225 static int dev_boot_phase = 1;
1226 
1227 /*
1228  *	Device change register/unregister. These are not inline or static
1229  *	as we export them to the world.
1230  */
1231 
1232 /**
1233  *	register_netdevice_notifier - register a network notifier block
1234  *	@nb: notifier
1235  *
1236  *	Register a notifier to be called when network device events occur.
1237  *	The notifier passed is linked into the kernel structures and must
1238  *	not be reused until it has been unregistered. A negative errno code
1239  *	is returned on a failure.
1240  *
1241  * 	When registered all registration and up events are replayed
1242  *	to the new notifier to allow device to have a race free
1243  *	view of the network device list.
1244  */
1245 
1246 int register_netdevice_notifier(struct notifier_block *nb)
1247 {
1248 	struct net_device *dev;
1249 	struct net_device *last;
1250 	struct net *net;
1251 	int err;
1252 
1253 	rtnl_lock();
1254 	err = raw_notifier_chain_register(&netdev_chain, nb);
1255 	if (err)
1256 		goto unlock;
1257 	if (dev_boot_phase)
1258 		goto unlock;
1259 	for_each_net(net) {
1260 		for_each_netdev(net, dev) {
1261 			err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1262 			err = notifier_to_errno(err);
1263 			if (err)
1264 				goto rollback;
1265 
1266 			if (!(dev->flags & IFF_UP))
1267 				continue;
1268 
1269 			nb->notifier_call(nb, NETDEV_UP, dev);
1270 		}
1271 	}
1272 
1273 unlock:
1274 	rtnl_unlock();
1275 	return err;
1276 
1277 rollback:
1278 	last = dev;
1279 	for_each_net(net) {
1280 		for_each_netdev(net, dev) {
1281 			if (dev == last)
1282 				break;
1283 
1284 			if (dev->flags & IFF_UP) {
1285 				nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1286 				nb->notifier_call(nb, NETDEV_DOWN, dev);
1287 			}
1288 			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1289 		}
1290 	}
1291 
1292 	raw_notifier_chain_unregister(&netdev_chain, nb);
1293 	goto unlock;
1294 }
1295 EXPORT_SYMBOL(register_netdevice_notifier);
1296 
1297 /**
1298  *	unregister_netdevice_notifier - unregister a network notifier block
1299  *	@nb: notifier
1300  *
1301  *	Unregister a notifier previously registered by
1302  *	register_netdevice_notifier(). The notifier is unlinked into the
1303  *	kernel structures and may then be reused. A negative errno code
1304  *	is returned on a failure.
1305  */
1306 
1307 int unregister_netdevice_notifier(struct notifier_block *nb)
1308 {
1309 	int err;
1310 
1311 	rtnl_lock();
1312 	err = raw_notifier_chain_unregister(&netdev_chain, nb);
1313 	rtnl_unlock();
1314 	return err;
1315 }
1316 EXPORT_SYMBOL(unregister_netdevice_notifier);
1317 
1318 /**
1319  *	call_netdevice_notifiers - call all network notifier blocks
1320  *      @val: value passed unmodified to notifier function
1321  *      @dev: net_device pointer passed unmodified to notifier function
1322  *
1323  *	Call all network notifier blocks.  Parameters and return value
1324  *	are as for raw_notifier_call_chain().
1325  */
1326 
1327 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1328 {
1329 	return raw_notifier_call_chain(&netdev_chain, val, dev);
1330 }
1331 
1332 /* When > 0 there are consumers of rx skb time stamps */
1333 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1334 
1335 void net_enable_timestamp(void)
1336 {
1337 	atomic_inc(&netstamp_needed);
1338 }
1339 EXPORT_SYMBOL(net_enable_timestamp);
1340 
1341 void net_disable_timestamp(void)
1342 {
1343 	atomic_dec(&netstamp_needed);
1344 }
1345 EXPORT_SYMBOL(net_disable_timestamp);
1346 
1347 static inline void net_timestamp(struct sk_buff *skb)
1348 {
1349 	if (atomic_read(&netstamp_needed))
1350 		__net_timestamp(skb);
1351 	else
1352 		skb->tstamp.tv64 = 0;
1353 }
1354 
1355 /*
1356  *	Support routine. Sends outgoing frames to any network
1357  *	taps currently in use.
1358  */
1359 
1360 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1361 {
1362 	struct packet_type *ptype;
1363 
1364 #ifdef CONFIG_NET_CLS_ACT
1365 	if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
1366 		net_timestamp(skb);
1367 #else
1368 	net_timestamp(skb);
1369 #endif
1370 
1371 	rcu_read_lock();
1372 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
1373 		/* Never send packets back to the socket
1374 		 * they originated from - MvS (miquels@drinkel.ow.org)
1375 		 */
1376 		if ((ptype->dev == dev || !ptype->dev) &&
1377 		    (ptype->af_packet_priv == NULL ||
1378 		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
1379 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1380 			if (!skb2)
1381 				break;
1382 
1383 			/* skb->nh should be correctly
1384 			   set by sender, so that the second statement is
1385 			   just protection against buggy protocols.
1386 			 */
1387 			skb_reset_mac_header(skb2);
1388 
1389 			if (skb_network_header(skb2) < skb2->data ||
1390 			    skb2->network_header > skb2->tail) {
1391 				if (net_ratelimit())
1392 					printk(KERN_CRIT "protocol %04x is "
1393 					       "buggy, dev %s\n",
1394 					       skb2->protocol, dev->name);
1395 				skb_reset_network_header(skb2);
1396 			}
1397 
1398 			skb2->transport_header = skb2->network_header;
1399 			skb2->pkt_type = PACKET_OUTGOING;
1400 			ptype->func(skb2, skb->dev, ptype, skb->dev);
1401 		}
1402 	}
1403 	rcu_read_unlock();
1404 }
1405 
1406 
1407 static inline void __netif_reschedule(struct Qdisc *q)
1408 {
1409 	struct softnet_data *sd;
1410 	unsigned long flags;
1411 
1412 	local_irq_save(flags);
1413 	sd = &__get_cpu_var(softnet_data);
1414 	q->next_sched = sd->output_queue;
1415 	sd->output_queue = q;
1416 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
1417 	local_irq_restore(flags);
1418 }
1419 
1420 void __netif_schedule(struct Qdisc *q)
1421 {
1422 	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1423 		__netif_reschedule(q);
1424 }
1425 EXPORT_SYMBOL(__netif_schedule);
1426 
1427 void dev_kfree_skb_irq(struct sk_buff *skb)
1428 {
1429 	if (atomic_dec_and_test(&skb->users)) {
1430 		struct softnet_data *sd;
1431 		unsigned long flags;
1432 
1433 		local_irq_save(flags);
1434 		sd = &__get_cpu_var(softnet_data);
1435 		skb->next = sd->completion_queue;
1436 		sd->completion_queue = skb;
1437 		raise_softirq_irqoff(NET_TX_SOFTIRQ);
1438 		local_irq_restore(flags);
1439 	}
1440 }
1441 EXPORT_SYMBOL(dev_kfree_skb_irq);
1442 
1443 void dev_kfree_skb_any(struct sk_buff *skb)
1444 {
1445 	if (in_irq() || irqs_disabled())
1446 		dev_kfree_skb_irq(skb);
1447 	else
1448 		dev_kfree_skb(skb);
1449 }
1450 EXPORT_SYMBOL(dev_kfree_skb_any);
1451 
1452 
1453 /**
1454  * netif_device_detach - mark device as removed
1455  * @dev: network device
1456  *
1457  * Mark device as removed from system and therefore no longer available.
1458  */
1459 void netif_device_detach(struct net_device *dev)
1460 {
1461 	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1462 	    netif_running(dev)) {
1463 		netif_tx_stop_all_queues(dev);
1464 	}
1465 }
1466 EXPORT_SYMBOL(netif_device_detach);
1467 
1468 /**
1469  * netif_device_attach - mark device as attached
1470  * @dev: network device
1471  *
1472  * Mark device as attached from system and restart if needed.
1473  */
1474 void netif_device_attach(struct net_device *dev)
1475 {
1476 	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1477 	    netif_running(dev)) {
1478 		netif_tx_wake_all_queues(dev);
1479 		__netdev_watchdog_up(dev);
1480 	}
1481 }
1482 EXPORT_SYMBOL(netif_device_attach);
1483 
1484 static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1485 {
1486 	return ((features & NETIF_F_GEN_CSUM) ||
1487 		((features & NETIF_F_IP_CSUM) &&
1488 		 protocol == htons(ETH_P_IP)) ||
1489 		((features & NETIF_F_IPV6_CSUM) &&
1490 		 protocol == htons(ETH_P_IPV6)) ||
1491 		((features & NETIF_F_FCOE_CRC) &&
1492 		 protocol == htons(ETH_P_FCOE)));
1493 }
1494 
1495 static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1496 {
1497 	if (can_checksum_protocol(dev->features, skb->protocol))
1498 		return true;
1499 
1500 	if (skb->protocol == htons(ETH_P_8021Q)) {
1501 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1502 		if (can_checksum_protocol(dev->features & dev->vlan_features,
1503 					  veh->h_vlan_encapsulated_proto))
1504 			return true;
1505 	}
1506 
1507 	return false;
1508 }
1509 
1510 /*
1511  * Invalidate hardware checksum when packet is to be mangled, and
1512  * complete checksum manually on outgoing path.
1513  */
1514 int skb_checksum_help(struct sk_buff *skb)
1515 {
1516 	__wsum csum;
1517 	int ret = 0, offset;
1518 
1519 	if (skb->ip_summed == CHECKSUM_COMPLETE)
1520 		goto out_set_summed;
1521 
1522 	if (unlikely(skb_shinfo(skb)->gso_size)) {
1523 		/* Let GSO fix up the checksum. */
1524 		goto out_set_summed;
1525 	}
1526 
1527 	offset = skb->csum_start - skb_headroom(skb);
1528 	BUG_ON(offset >= skb_headlen(skb));
1529 	csum = skb_checksum(skb, offset, skb->len - offset, 0);
1530 
1531 	offset += skb->csum_offset;
1532 	BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1533 
1534 	if (skb_cloned(skb) &&
1535 	    !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1536 		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1537 		if (ret)
1538 			goto out;
1539 	}
1540 
1541 	*(__sum16 *)(skb->data + offset) = csum_fold(csum);
1542 out_set_summed:
1543 	skb->ip_summed = CHECKSUM_NONE;
1544 out:
1545 	return ret;
1546 }
1547 EXPORT_SYMBOL(skb_checksum_help);
1548 
1549 /**
1550  *	skb_gso_segment - Perform segmentation on skb.
1551  *	@skb: buffer to segment
1552  *	@features: features for the output path (see dev->features)
1553  *
1554  *	This function segments the given skb and returns a list of segments.
1555  *
1556  *	It may return NULL if the skb requires no segmentation.  This is
1557  *	only possible when GSO is used for verifying header integrity.
1558  */
1559 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1560 {
1561 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1562 	struct packet_type *ptype;
1563 	__be16 type = skb->protocol;
1564 	int err;
1565 
1566 	skb_reset_mac_header(skb);
1567 	skb->mac_len = skb->network_header - skb->mac_header;
1568 	__skb_pull(skb, skb->mac_len);
1569 
1570 	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1571 		struct net_device *dev = skb->dev;
1572 		struct ethtool_drvinfo info = {};
1573 
1574 		if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
1575 			dev->ethtool_ops->get_drvinfo(dev, &info);
1576 
1577 		WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d "
1578 			"ip_summed=%d",
1579 		     info.driver, dev ? dev->features : 0L,
1580 		     skb->sk ? skb->sk->sk_route_caps : 0L,
1581 		     skb->len, skb->data_len, skb->ip_summed);
1582 
1583 		if (skb_header_cloned(skb) &&
1584 		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1585 			return ERR_PTR(err);
1586 	}
1587 
1588 	rcu_read_lock();
1589 	list_for_each_entry_rcu(ptype,
1590 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
1591 		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1592 			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1593 				err = ptype->gso_send_check(skb);
1594 				segs = ERR_PTR(err);
1595 				if (err || skb_gso_ok(skb, features))
1596 					break;
1597 				__skb_push(skb, (skb->data -
1598 						 skb_network_header(skb)));
1599 			}
1600 			segs = ptype->gso_segment(skb, features);
1601 			break;
1602 		}
1603 	}
1604 	rcu_read_unlock();
1605 
1606 	__skb_push(skb, skb->data - skb_mac_header(skb));
1607 
1608 	return segs;
1609 }
1610 EXPORT_SYMBOL(skb_gso_segment);
1611 
1612 /* Take action when hardware reception checksum errors are detected. */
1613 #ifdef CONFIG_BUG
1614 void netdev_rx_csum_fault(struct net_device *dev)
1615 {
1616 	if (net_ratelimit()) {
1617 		printk(KERN_ERR "%s: hw csum failure.\n",
1618 			dev ? dev->name : "<unknown>");
1619 		dump_stack();
1620 	}
1621 }
1622 EXPORT_SYMBOL(netdev_rx_csum_fault);
1623 #endif
1624 
1625 /* Actually, we should eliminate this check as soon as we know, that:
1626  * 1. IOMMU is present and allows to map all the memory.
1627  * 2. No high memory really exists on this machine.
1628  */
1629 
1630 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1631 {
1632 #ifdef CONFIG_HIGHMEM
1633 	int i;
1634 
1635 	if (dev->features & NETIF_F_HIGHDMA)
1636 		return 0;
1637 
1638 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1639 		if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1640 			return 1;
1641 
1642 #endif
1643 	return 0;
1644 }
1645 
1646 struct dev_gso_cb {
1647 	void (*destructor)(struct sk_buff *skb);
1648 };
1649 
1650 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1651 
1652 static void dev_gso_skb_destructor(struct sk_buff *skb)
1653 {
1654 	struct dev_gso_cb *cb;
1655 
1656 	do {
1657 		struct sk_buff *nskb = skb->next;
1658 
1659 		skb->next = nskb->next;
1660 		nskb->next = NULL;
1661 		kfree_skb(nskb);
1662 	} while (skb->next);
1663 
1664 	cb = DEV_GSO_CB(skb);
1665 	if (cb->destructor)
1666 		cb->destructor(skb);
1667 }
1668 
1669 /**
1670  *	dev_gso_segment - Perform emulated hardware segmentation on skb.
1671  *	@skb: buffer to segment
1672  *
1673  *	This function segments the given skb and stores the list of segments
1674  *	in skb->next.
1675  */
1676 static int dev_gso_segment(struct sk_buff *skb)
1677 {
1678 	struct net_device *dev = skb->dev;
1679 	struct sk_buff *segs;
1680 	int features = dev->features & ~(illegal_highdma(dev, skb) ?
1681 					 NETIF_F_SG : 0);
1682 
1683 	segs = skb_gso_segment(skb, features);
1684 
1685 	/* Verifying header integrity only. */
1686 	if (!segs)
1687 		return 0;
1688 
1689 	if (IS_ERR(segs))
1690 		return PTR_ERR(segs);
1691 
1692 	skb->next = segs;
1693 	DEV_GSO_CB(skb)->destructor = skb->destructor;
1694 	skb->destructor = dev_gso_skb_destructor;
1695 
1696 	return 0;
1697 }
1698 
1699 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1700 			struct netdev_queue *txq)
1701 {
1702 	const struct net_device_ops *ops = dev->netdev_ops;
1703 	int rc;
1704 
1705 	if (likely(!skb->next)) {
1706 		if (!list_empty(&ptype_all))
1707 			dev_queue_xmit_nit(skb, dev);
1708 
1709 		if (netif_needs_gso(dev, skb)) {
1710 			if (unlikely(dev_gso_segment(skb)))
1711 				goto out_kfree_skb;
1712 			if (skb->next)
1713 				goto gso;
1714 		}
1715 
1716 		/*
1717 		 * If device doesnt need skb->dst, release it right now while
1718 		 * its hot in this cpu cache
1719 		 */
1720 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
1721 			skb_dst_drop(skb);
1722 
1723 		rc = ops->ndo_start_xmit(skb, dev);
1724 		if (rc == NETDEV_TX_OK)
1725 			txq_trans_update(txq);
1726 		/*
1727 		 * TODO: if skb_orphan() was called by
1728 		 * dev->hard_start_xmit() (for example, the unmodified
1729 		 * igb driver does that; bnx2 doesn't), then
1730 		 * skb_tx_software_timestamp() will be unable to send
1731 		 * back the time stamp.
1732 		 *
1733 		 * How can this be prevented? Always create another
1734 		 * reference to the socket before calling
1735 		 * dev->hard_start_xmit()? Prevent that skb_orphan()
1736 		 * does anything in dev->hard_start_xmit() by clearing
1737 		 * the skb destructor before the call and restoring it
1738 		 * afterwards, then doing the skb_orphan() ourselves?
1739 		 */
1740 		return rc;
1741 	}
1742 
1743 gso:
1744 	do {
1745 		struct sk_buff *nskb = skb->next;
1746 
1747 		skb->next = nskb->next;
1748 		nskb->next = NULL;
1749 		rc = ops->ndo_start_xmit(nskb, dev);
1750 		if (unlikely(rc != NETDEV_TX_OK)) {
1751 			nskb->next = skb->next;
1752 			skb->next = nskb;
1753 			return rc;
1754 		}
1755 		txq_trans_update(txq);
1756 		if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
1757 			return NETDEV_TX_BUSY;
1758 	} while (skb->next);
1759 
1760 	skb->destructor = DEV_GSO_CB(skb)->destructor;
1761 
1762 out_kfree_skb:
1763 	kfree_skb(skb);
1764 	return NETDEV_TX_OK;
1765 }
1766 
1767 static u32 skb_tx_hashrnd;
1768 
1769 u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
1770 {
1771 	u32 hash;
1772 
1773 	if (skb_rx_queue_recorded(skb)) {
1774 		hash = skb_get_rx_queue(skb);
1775 		while (unlikely(hash >= dev->real_num_tx_queues))
1776 			hash -= dev->real_num_tx_queues;
1777 		return hash;
1778 	}
1779 
1780 	if (skb->sk && skb->sk->sk_hash)
1781 		hash = skb->sk->sk_hash;
1782 	else
1783 		hash = skb->protocol;
1784 
1785 	hash = jhash_1word(hash, skb_tx_hashrnd);
1786 
1787 	return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
1788 }
1789 EXPORT_SYMBOL(skb_tx_hash);
1790 
1791 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1792 					struct sk_buff *skb)
1793 {
1794 	const struct net_device_ops *ops = dev->netdev_ops;
1795 	u16 queue_index = 0;
1796 
1797 	if (ops->ndo_select_queue)
1798 		queue_index = ops->ndo_select_queue(dev, skb);
1799 	else if (dev->real_num_tx_queues > 1)
1800 		queue_index = skb_tx_hash(dev, skb);
1801 
1802 	skb_set_queue_mapping(skb, queue_index);
1803 	return netdev_get_tx_queue(dev, queue_index);
1804 }
1805 
1806 static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
1807 				 struct net_device *dev,
1808 				 struct netdev_queue *txq)
1809 {
1810 	spinlock_t *root_lock = qdisc_lock(q);
1811 	int rc;
1812 
1813 	spin_lock(root_lock);
1814 	if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
1815 		kfree_skb(skb);
1816 		rc = NET_XMIT_DROP;
1817 	} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
1818 		   !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) {
1819 		/*
1820 		 * This is a work-conserving queue; there are no old skbs
1821 		 * waiting to be sent out; and the qdisc is not running -
1822 		 * xmit the skb directly.
1823 		 */
1824 		__qdisc_update_bstats(q, skb->len);
1825 		if (sch_direct_xmit(skb, q, dev, txq, root_lock))
1826 			__qdisc_run(q);
1827 		else
1828 			clear_bit(__QDISC_STATE_RUNNING, &q->state);
1829 
1830 		rc = NET_XMIT_SUCCESS;
1831 	} else {
1832 		rc = qdisc_enqueue_root(skb, q);
1833 		qdisc_run(q);
1834 	}
1835 	spin_unlock(root_lock);
1836 
1837 	return rc;
1838 }
1839 
1840 /**
1841  *	dev_queue_xmit - transmit a buffer
1842  *	@skb: buffer to transmit
1843  *
1844  *	Queue a buffer for transmission to a network device. The caller must
1845  *	have set the device and priority and built the buffer before calling
1846  *	this function. The function can be called from an interrupt.
1847  *
1848  *	A negative errno code is returned on a failure. A success does not
1849  *	guarantee the frame will be transmitted as it may be dropped due
1850  *	to congestion or traffic shaping.
1851  *
1852  * -----------------------------------------------------------------------------------
1853  *      I notice this method can also return errors from the queue disciplines,
1854  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1855  *      be positive.
1856  *
1857  *      Regardless of the return value, the skb is consumed, so it is currently
1858  *      difficult to retry a send to this method.  (You can bump the ref count
1859  *      before sending to hold a reference for retry if you are careful.)
1860  *
1861  *      When calling this method, interrupts MUST be enabled.  This is because
1862  *      the BH enable code must have IRQs enabled so that it will not deadlock.
1863  *          --BLG
1864  */
1865 int dev_queue_xmit(struct sk_buff *skb)
1866 {
1867 	struct net_device *dev = skb->dev;
1868 	struct netdev_queue *txq;
1869 	struct Qdisc *q;
1870 	int rc = -ENOMEM;
1871 
1872 	/* GSO will handle the following emulations directly. */
1873 	if (netif_needs_gso(dev, skb))
1874 		goto gso;
1875 
1876 	if (skb_has_frags(skb) &&
1877 	    !(dev->features & NETIF_F_FRAGLIST) &&
1878 	    __skb_linearize(skb))
1879 		goto out_kfree_skb;
1880 
1881 	/* Fragmented skb is linearized if device does not support SG,
1882 	 * or if at least one of fragments is in highmem and device
1883 	 * does not support DMA from it.
1884 	 */
1885 	if (skb_shinfo(skb)->nr_frags &&
1886 	    (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1887 	    __skb_linearize(skb))
1888 		goto out_kfree_skb;
1889 
1890 	/* If packet is not checksummed and device does not support
1891 	 * checksumming for this protocol, complete checksumming here.
1892 	 */
1893 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
1894 		skb_set_transport_header(skb, skb->csum_start -
1895 					      skb_headroom(skb));
1896 		if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
1897 			goto out_kfree_skb;
1898 	}
1899 
1900 gso:
1901 	/* Disable soft irqs for various locks below. Also
1902 	 * stops preemption for RCU.
1903 	 */
1904 	rcu_read_lock_bh();
1905 
1906 	txq = dev_pick_tx(dev, skb);
1907 	q = rcu_dereference(txq->qdisc);
1908 
1909 #ifdef CONFIG_NET_CLS_ACT
1910 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
1911 #endif
1912 	if (q->enqueue) {
1913 		rc = __dev_xmit_skb(skb, q, dev, txq);
1914 		goto out;
1915 	}
1916 
1917 	/* The device has no queue. Common case for software devices:
1918 	   loopback, all the sorts of tunnels...
1919 
1920 	   Really, it is unlikely that netif_tx_lock protection is necessary
1921 	   here.  (f.e. loopback and IP tunnels are clean ignoring statistics
1922 	   counters.)
1923 	   However, it is possible, that they rely on protection
1924 	   made by us here.
1925 
1926 	   Check this and shot the lock. It is not prone from deadlocks.
1927 	   Either shot noqueue qdisc, it is even simpler 8)
1928 	 */
1929 	if (dev->flags & IFF_UP) {
1930 		int cpu = smp_processor_id(); /* ok because BHs are off */
1931 
1932 		if (txq->xmit_lock_owner != cpu) {
1933 
1934 			HARD_TX_LOCK(dev, txq, cpu);
1935 
1936 			if (!netif_tx_queue_stopped(txq)) {
1937 				rc = NET_XMIT_SUCCESS;
1938 				if (!dev_hard_start_xmit(skb, dev, txq)) {
1939 					HARD_TX_UNLOCK(dev, txq);
1940 					goto out;
1941 				}
1942 			}
1943 			HARD_TX_UNLOCK(dev, txq);
1944 			if (net_ratelimit())
1945 				printk(KERN_CRIT "Virtual device %s asks to "
1946 				       "queue packet!\n", dev->name);
1947 		} else {
1948 			/* Recursion is detected! It is possible,
1949 			 * unfortunately */
1950 			if (net_ratelimit())
1951 				printk(KERN_CRIT "Dead loop on virtual device "
1952 				       "%s, fix it urgently!\n", dev->name);
1953 		}
1954 	}
1955 
1956 	rc = -ENETDOWN;
1957 	rcu_read_unlock_bh();
1958 
1959 out_kfree_skb:
1960 	kfree_skb(skb);
1961 	return rc;
1962 out:
1963 	rcu_read_unlock_bh();
1964 	return rc;
1965 }
1966 EXPORT_SYMBOL(dev_queue_xmit);
1967 
1968 
1969 /*=======================================================================
1970 			Receiver routines
1971   =======================================================================*/
1972 
1973 int netdev_max_backlog __read_mostly = 1000;
1974 int netdev_budget __read_mostly = 300;
1975 int weight_p __read_mostly = 64;            /* old backlog weight */
1976 
1977 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1978 
1979 
1980 /**
1981  *	netif_rx	-	post buffer to the network code
1982  *	@skb: buffer to post
1983  *
1984  *	This function receives a packet from a device driver and queues it for
1985  *	the upper (protocol) levels to process.  It always succeeds. The buffer
1986  *	may be dropped during processing for congestion control or by the
1987  *	protocol layers.
1988  *
1989  *	return values:
1990  *	NET_RX_SUCCESS	(no congestion)
1991  *	NET_RX_DROP     (packet was dropped)
1992  *
1993  */
1994 
1995 int netif_rx(struct sk_buff *skb)
1996 {
1997 	struct softnet_data *queue;
1998 	unsigned long flags;
1999 
2000 	/* if netpoll wants it, pretend we never saw it */
2001 	if (netpoll_rx(skb))
2002 		return NET_RX_DROP;
2003 
2004 	if (!skb->tstamp.tv64)
2005 		net_timestamp(skb);
2006 
2007 	/*
2008 	 * The code is rearranged so that the path is the most
2009 	 * short when CPU is congested, but is still operating.
2010 	 */
2011 	local_irq_save(flags);
2012 	queue = &__get_cpu_var(softnet_data);
2013 
2014 	__get_cpu_var(netdev_rx_stat).total++;
2015 	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
2016 		if (queue->input_pkt_queue.qlen) {
2017 enqueue:
2018 			__skb_queue_tail(&queue->input_pkt_queue, skb);
2019 			local_irq_restore(flags);
2020 			return NET_RX_SUCCESS;
2021 		}
2022 
2023 		napi_schedule(&queue->backlog);
2024 		goto enqueue;
2025 	}
2026 
2027 	__get_cpu_var(netdev_rx_stat).dropped++;
2028 	local_irq_restore(flags);
2029 
2030 	kfree_skb(skb);
2031 	return NET_RX_DROP;
2032 }
2033 EXPORT_SYMBOL(netif_rx);
2034 
2035 int netif_rx_ni(struct sk_buff *skb)
2036 {
2037 	int err;
2038 
2039 	preempt_disable();
2040 	err = netif_rx(skb);
2041 	if (local_softirq_pending())
2042 		do_softirq();
2043 	preempt_enable();
2044 
2045 	return err;
2046 }
2047 EXPORT_SYMBOL(netif_rx_ni);
2048 
2049 static void net_tx_action(struct softirq_action *h)
2050 {
2051 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
2052 
2053 	if (sd->completion_queue) {
2054 		struct sk_buff *clist;
2055 
2056 		local_irq_disable();
2057 		clist = sd->completion_queue;
2058 		sd->completion_queue = NULL;
2059 		local_irq_enable();
2060 
2061 		while (clist) {
2062 			struct sk_buff *skb = clist;
2063 			clist = clist->next;
2064 
2065 			WARN_ON(atomic_read(&skb->users));
2066 			__kfree_skb(skb);
2067 		}
2068 	}
2069 
2070 	if (sd->output_queue) {
2071 		struct Qdisc *head;
2072 
2073 		local_irq_disable();
2074 		head = sd->output_queue;
2075 		sd->output_queue = NULL;
2076 		local_irq_enable();
2077 
2078 		while (head) {
2079 			struct Qdisc *q = head;
2080 			spinlock_t *root_lock;
2081 
2082 			head = head->next_sched;
2083 
2084 			root_lock = qdisc_lock(q);
2085 			if (spin_trylock(root_lock)) {
2086 				smp_mb__before_clear_bit();
2087 				clear_bit(__QDISC_STATE_SCHED,
2088 					  &q->state);
2089 				qdisc_run(q);
2090 				spin_unlock(root_lock);
2091 			} else {
2092 				if (!test_bit(__QDISC_STATE_DEACTIVATED,
2093 					      &q->state)) {
2094 					__netif_reschedule(q);
2095 				} else {
2096 					smp_mb__before_clear_bit();
2097 					clear_bit(__QDISC_STATE_SCHED,
2098 						  &q->state);
2099 				}
2100 			}
2101 		}
2102 	}
2103 }
2104 
2105 static inline int deliver_skb(struct sk_buff *skb,
2106 			      struct packet_type *pt_prev,
2107 			      struct net_device *orig_dev)
2108 {
2109 	atomic_inc(&skb->users);
2110 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2111 }
2112 
2113 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
2114 
2115 #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
2116 /* This hook is defined here for ATM LANE */
2117 int (*br_fdb_test_addr_hook)(struct net_device *dev,
2118 			     unsigned char *addr) __read_mostly;
2119 EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
2120 #endif
2121 
2122 /*
2123  * If bridge module is loaded call bridging hook.
2124  *  returns NULL if packet was consumed.
2125  */
2126 struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2127 					struct sk_buff *skb) __read_mostly;
2128 EXPORT_SYMBOL_GPL(br_handle_frame_hook);
2129 
2130 static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2131 					    struct packet_type **pt_prev, int *ret,
2132 					    struct net_device *orig_dev)
2133 {
2134 	struct net_bridge_port *port;
2135 
2136 	if (skb->pkt_type == PACKET_LOOPBACK ||
2137 	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
2138 		return skb;
2139 
2140 	if (*pt_prev) {
2141 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2142 		*pt_prev = NULL;
2143 	}
2144 
2145 	return br_handle_frame_hook(port, skb);
2146 }
2147 #else
2148 #define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
2149 #endif
2150 
2151 #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2152 struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2153 EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2154 
2155 static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2156 					     struct packet_type **pt_prev,
2157 					     int *ret,
2158 					     struct net_device *orig_dev)
2159 {
2160 	if (skb->dev->macvlan_port == NULL)
2161 		return skb;
2162 
2163 	if (*pt_prev) {
2164 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2165 		*pt_prev = NULL;
2166 	}
2167 	return macvlan_handle_frame_hook(skb);
2168 }
2169 #else
2170 #define handle_macvlan(skb, pt_prev, ret, orig_dev)	(skb)
2171 #endif
2172 
2173 #ifdef CONFIG_NET_CLS_ACT
2174 /* TODO: Maybe we should just force sch_ingress to be compiled in
2175  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2176  * a compare and 2 stores extra right now if we dont have it on
2177  * but have CONFIG_NET_CLS_ACT
2178  * NOTE: This doesnt stop any functionality; if you dont have
2179  * the ingress scheduler, you just cant add policies on ingress.
2180  *
2181  */
2182 static int ing_filter(struct sk_buff *skb)
2183 {
2184 	struct net_device *dev = skb->dev;
2185 	u32 ttl = G_TC_RTTL(skb->tc_verd);
2186 	struct netdev_queue *rxq;
2187 	int result = TC_ACT_OK;
2188 	struct Qdisc *q;
2189 
2190 	if (MAX_RED_LOOP < ttl++) {
2191 		printk(KERN_WARNING
2192 		       "Redir loop detected Dropping packet (%d->%d)\n",
2193 		       skb->iif, dev->ifindex);
2194 		return TC_ACT_SHOT;
2195 	}
2196 
2197 	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2198 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2199 
2200 	rxq = &dev->rx_queue;
2201 
2202 	q = rxq->qdisc;
2203 	if (q != &noop_qdisc) {
2204 		spin_lock(qdisc_lock(q));
2205 		if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2206 			result = qdisc_enqueue_root(skb, q);
2207 		spin_unlock(qdisc_lock(q));
2208 	}
2209 
2210 	return result;
2211 }
2212 
2213 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2214 					 struct packet_type **pt_prev,
2215 					 int *ret, struct net_device *orig_dev)
2216 {
2217 	if (skb->dev->rx_queue.qdisc == &noop_qdisc)
2218 		goto out;
2219 
2220 	if (*pt_prev) {
2221 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
2222 		*pt_prev = NULL;
2223 	} else {
2224 		/* Huh? Why does turning on AF_PACKET affect this? */
2225 		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2226 	}
2227 
2228 	switch (ing_filter(skb)) {
2229 	case TC_ACT_SHOT:
2230 	case TC_ACT_STOLEN:
2231 		kfree_skb(skb);
2232 		return NULL;
2233 	}
2234 
2235 out:
2236 	skb->tc_verd = 0;
2237 	return skb;
2238 }
2239 #endif
2240 
2241 /*
2242  * 	netif_nit_deliver - deliver received packets to network taps
2243  * 	@skb: buffer
2244  *
2245  * 	This function is used to deliver incoming packets to network
2246  * 	taps. It should be used when the normal netif_receive_skb path
2247  * 	is bypassed, for example because of VLAN acceleration.
2248  */
2249 void netif_nit_deliver(struct sk_buff *skb)
2250 {
2251 	struct packet_type *ptype;
2252 
2253 	if (list_empty(&ptype_all))
2254 		return;
2255 
2256 	skb_reset_network_header(skb);
2257 	skb_reset_transport_header(skb);
2258 	skb->mac_len = skb->network_header - skb->mac_header;
2259 
2260 	rcu_read_lock();
2261 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
2262 		if (!ptype->dev || ptype->dev == skb->dev)
2263 			deliver_skb(skb, ptype, skb->dev);
2264 	}
2265 	rcu_read_unlock();
2266 }
2267 
2268 /**
2269  *	netif_receive_skb - process receive buffer from network
2270  *	@skb: buffer to process
2271  *
2272  *	netif_receive_skb() is the main receive data processing function.
2273  *	It always succeeds. The buffer may be dropped during processing
2274  *	for congestion control or by the protocol layers.
2275  *
2276  *	This function may only be called from softirq context and interrupts
2277  *	should be enabled.
2278  *
2279  *	Return values (usually ignored):
2280  *	NET_RX_SUCCESS: no congestion
2281  *	NET_RX_DROP: packet was dropped
2282  */
2283 int netif_receive_skb(struct sk_buff *skb)
2284 {
2285 	struct packet_type *ptype, *pt_prev;
2286 	struct net_device *orig_dev;
2287 	struct net_device *null_or_orig;
2288 	int ret = NET_RX_DROP;
2289 	__be16 type;
2290 
2291 	if (!skb->tstamp.tv64)
2292 		net_timestamp(skb);
2293 
2294 	if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
2295 		return NET_RX_SUCCESS;
2296 
2297 	/* if we've gotten here through NAPI, check netpoll */
2298 	if (netpoll_receive_skb(skb))
2299 		return NET_RX_DROP;
2300 
2301 	if (!skb->iif)
2302 		skb->iif = skb->dev->ifindex;
2303 
2304 	null_or_orig = NULL;
2305 	orig_dev = skb->dev;
2306 	if (orig_dev->master) {
2307 		if (skb_bond_should_drop(skb))
2308 			null_or_orig = orig_dev; /* deliver only exact match */
2309 		else
2310 			skb->dev = orig_dev->master;
2311 	}
2312 
2313 	__get_cpu_var(netdev_rx_stat).total++;
2314 
2315 	skb_reset_network_header(skb);
2316 	skb_reset_transport_header(skb);
2317 	skb->mac_len = skb->network_header - skb->mac_header;
2318 
2319 	pt_prev = NULL;
2320 
2321 	rcu_read_lock();
2322 
2323 #ifdef CONFIG_NET_CLS_ACT
2324 	if (skb->tc_verd & TC_NCLS) {
2325 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2326 		goto ncls;
2327 	}
2328 #endif
2329 
2330 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
2331 		if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2332 		    ptype->dev == orig_dev) {
2333 			if (pt_prev)
2334 				ret = deliver_skb(skb, pt_prev, orig_dev);
2335 			pt_prev = ptype;
2336 		}
2337 	}
2338 
2339 #ifdef CONFIG_NET_CLS_ACT
2340 	skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2341 	if (!skb)
2342 		goto out;
2343 ncls:
2344 #endif
2345 
2346 	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2347 	if (!skb)
2348 		goto out;
2349 	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2350 	if (!skb)
2351 		goto out;
2352 
2353 	type = skb->protocol;
2354 	list_for_each_entry_rcu(ptype,
2355 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2356 		if (ptype->type == type &&
2357 		    (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2358 		     ptype->dev == orig_dev)) {
2359 			if (pt_prev)
2360 				ret = deliver_skb(skb, pt_prev, orig_dev);
2361 			pt_prev = ptype;
2362 		}
2363 	}
2364 
2365 	if (pt_prev) {
2366 		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2367 	} else {
2368 		kfree_skb(skb);
2369 		/* Jamal, now you will not able to escape explaining
2370 		 * me how you were going to use this. :-)
2371 		 */
2372 		ret = NET_RX_DROP;
2373 	}
2374 
2375 out:
2376 	rcu_read_unlock();
2377 	return ret;
2378 }
2379 EXPORT_SYMBOL(netif_receive_skb);
2380 
2381 /* Network device is going away, flush any packets still pending  */
2382 static void flush_backlog(void *arg)
2383 {
2384 	struct net_device *dev = arg;
2385 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
2386 	struct sk_buff *skb, *tmp;
2387 
2388 	skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
2389 		if (skb->dev == dev) {
2390 			__skb_unlink(skb, &queue->input_pkt_queue);
2391 			kfree_skb(skb);
2392 		}
2393 }
2394 
2395 static int napi_gro_complete(struct sk_buff *skb)
2396 {
2397 	struct packet_type *ptype;
2398 	__be16 type = skb->protocol;
2399 	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2400 	int err = -ENOENT;
2401 
2402 	if (NAPI_GRO_CB(skb)->count == 1) {
2403 		skb_shinfo(skb)->gso_size = 0;
2404 		goto out;
2405 	}
2406 
2407 	rcu_read_lock();
2408 	list_for_each_entry_rcu(ptype, head, list) {
2409 		if (ptype->type != type || ptype->dev || !ptype->gro_complete)
2410 			continue;
2411 
2412 		err = ptype->gro_complete(skb);
2413 		break;
2414 	}
2415 	rcu_read_unlock();
2416 
2417 	if (err) {
2418 		WARN_ON(&ptype->list == head);
2419 		kfree_skb(skb);
2420 		return NET_RX_SUCCESS;
2421 	}
2422 
2423 out:
2424 	return netif_receive_skb(skb);
2425 }
2426 
2427 void napi_gro_flush(struct napi_struct *napi)
2428 {
2429 	struct sk_buff *skb, *next;
2430 
2431 	for (skb = napi->gro_list; skb; skb = next) {
2432 		next = skb->next;
2433 		skb->next = NULL;
2434 		napi_gro_complete(skb);
2435 	}
2436 
2437 	napi->gro_count = 0;
2438 	napi->gro_list = NULL;
2439 }
2440 EXPORT_SYMBOL(napi_gro_flush);
2441 
2442 int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2443 {
2444 	struct sk_buff **pp = NULL;
2445 	struct packet_type *ptype;
2446 	__be16 type = skb->protocol;
2447 	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2448 	int same_flow;
2449 	int mac_len;
2450 	int ret;
2451 
2452 	if (!(skb->dev->features & NETIF_F_GRO))
2453 		goto normal;
2454 
2455 	if (skb_is_gso(skb) || skb_has_frags(skb))
2456 		goto normal;
2457 
2458 	rcu_read_lock();
2459 	list_for_each_entry_rcu(ptype, head, list) {
2460 		if (ptype->type != type || ptype->dev || !ptype->gro_receive)
2461 			continue;
2462 
2463 		skb_set_network_header(skb, skb_gro_offset(skb));
2464 		mac_len = skb->network_header - skb->mac_header;
2465 		skb->mac_len = mac_len;
2466 		NAPI_GRO_CB(skb)->same_flow = 0;
2467 		NAPI_GRO_CB(skb)->flush = 0;
2468 		NAPI_GRO_CB(skb)->free = 0;
2469 
2470 		pp = ptype->gro_receive(&napi->gro_list, skb);
2471 		break;
2472 	}
2473 	rcu_read_unlock();
2474 
2475 	if (&ptype->list == head)
2476 		goto normal;
2477 
2478 	same_flow = NAPI_GRO_CB(skb)->same_flow;
2479 	ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
2480 
2481 	if (pp) {
2482 		struct sk_buff *nskb = *pp;
2483 
2484 		*pp = nskb->next;
2485 		nskb->next = NULL;
2486 		napi_gro_complete(nskb);
2487 		napi->gro_count--;
2488 	}
2489 
2490 	if (same_flow)
2491 		goto ok;
2492 
2493 	if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
2494 		goto normal;
2495 
2496 	napi->gro_count++;
2497 	NAPI_GRO_CB(skb)->count = 1;
2498 	skb_shinfo(skb)->gso_size = skb_gro_len(skb);
2499 	skb->next = napi->gro_list;
2500 	napi->gro_list = skb;
2501 	ret = GRO_HELD;
2502 
2503 pull:
2504 	if (skb_headlen(skb) < skb_gro_offset(skb)) {
2505 		int grow = skb_gro_offset(skb) - skb_headlen(skb);
2506 
2507 		BUG_ON(skb->end - skb->tail < grow);
2508 
2509 		memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
2510 
2511 		skb->tail += grow;
2512 		skb->data_len -= grow;
2513 
2514 		skb_shinfo(skb)->frags[0].page_offset += grow;
2515 		skb_shinfo(skb)->frags[0].size -= grow;
2516 
2517 		if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
2518 			put_page(skb_shinfo(skb)->frags[0].page);
2519 			memmove(skb_shinfo(skb)->frags,
2520 				skb_shinfo(skb)->frags + 1,
2521 				--skb_shinfo(skb)->nr_frags);
2522 		}
2523 	}
2524 
2525 ok:
2526 	return ret;
2527 
2528 normal:
2529 	ret = GRO_NORMAL;
2530 	goto pull;
2531 }
2532 EXPORT_SYMBOL(dev_gro_receive);
2533 
2534 static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2535 {
2536 	struct sk_buff *p;
2537 
2538 	if (netpoll_rx_on(skb))
2539 		return GRO_NORMAL;
2540 
2541 	for (p = napi->gro_list; p; p = p->next) {
2542 		NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev)
2543 			&& !compare_ether_header(skb_mac_header(p),
2544 						 skb_gro_mac_header(skb));
2545 		NAPI_GRO_CB(p)->flush = 0;
2546 	}
2547 
2548 	return dev_gro_receive(napi, skb);
2549 }
2550 
2551 int napi_skb_finish(int ret, struct sk_buff *skb)
2552 {
2553 	int err = NET_RX_SUCCESS;
2554 
2555 	switch (ret) {
2556 	case GRO_NORMAL:
2557 		return netif_receive_skb(skb);
2558 
2559 	case GRO_DROP:
2560 		err = NET_RX_DROP;
2561 		/* fall through */
2562 
2563 	case GRO_MERGED_FREE:
2564 		kfree_skb(skb);
2565 		break;
2566 	}
2567 
2568 	return err;
2569 }
2570 EXPORT_SYMBOL(napi_skb_finish);
2571 
2572 void skb_gro_reset_offset(struct sk_buff *skb)
2573 {
2574 	NAPI_GRO_CB(skb)->data_offset = 0;
2575 	NAPI_GRO_CB(skb)->frag0 = NULL;
2576 	NAPI_GRO_CB(skb)->frag0_len = 0;
2577 
2578 	if (skb->mac_header == skb->tail &&
2579 	    !PageHighMem(skb_shinfo(skb)->frags[0].page)) {
2580 		NAPI_GRO_CB(skb)->frag0 =
2581 			page_address(skb_shinfo(skb)->frags[0].page) +
2582 			skb_shinfo(skb)->frags[0].page_offset;
2583 		NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size;
2584 	}
2585 }
2586 EXPORT_SYMBOL(skb_gro_reset_offset);
2587 
2588 int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2589 {
2590 	skb_gro_reset_offset(skb);
2591 
2592 	return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
2593 }
2594 EXPORT_SYMBOL(napi_gro_receive);
2595 
2596 void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
2597 {
2598 	__skb_pull(skb, skb_headlen(skb));
2599 	skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
2600 
2601 	napi->skb = skb;
2602 }
2603 EXPORT_SYMBOL(napi_reuse_skb);
2604 
2605 struct sk_buff *napi_get_frags(struct napi_struct *napi)
2606 {
2607 	struct net_device *dev = napi->dev;
2608 	struct sk_buff *skb = napi->skb;
2609 
2610 	if (!skb) {
2611 		skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN);
2612 		if (!skb)
2613 			goto out;
2614 
2615 		skb_reserve(skb, NET_IP_ALIGN);
2616 
2617 		napi->skb = skb;
2618 	}
2619 
2620 out:
2621 	return skb;
2622 }
2623 EXPORT_SYMBOL(napi_get_frags);
2624 
2625 int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
2626 {
2627 	int err = NET_RX_SUCCESS;
2628 
2629 	switch (ret) {
2630 	case GRO_NORMAL:
2631 	case GRO_HELD:
2632 		skb->protocol = eth_type_trans(skb, napi->dev);
2633 
2634 		if (ret == GRO_NORMAL)
2635 			return netif_receive_skb(skb);
2636 
2637 		skb_gro_pull(skb, -ETH_HLEN);
2638 		break;
2639 
2640 	case GRO_DROP:
2641 		err = NET_RX_DROP;
2642 		/* fall through */
2643 
2644 	case GRO_MERGED_FREE:
2645 		napi_reuse_skb(napi, skb);
2646 		break;
2647 	}
2648 
2649 	return err;
2650 }
2651 EXPORT_SYMBOL(napi_frags_finish);
2652 
2653 struct sk_buff *napi_frags_skb(struct napi_struct *napi)
2654 {
2655 	struct sk_buff *skb = napi->skb;
2656 	struct ethhdr *eth;
2657 	unsigned int hlen;
2658 	unsigned int off;
2659 
2660 	napi->skb = NULL;
2661 
2662 	skb_reset_mac_header(skb);
2663 	skb_gro_reset_offset(skb);
2664 
2665 	off = skb_gro_offset(skb);
2666 	hlen = off + sizeof(*eth);
2667 	eth = skb_gro_header_fast(skb, off);
2668 	if (skb_gro_header_hard(skb, hlen)) {
2669 		eth = skb_gro_header_slow(skb, hlen, off);
2670 		if (unlikely(!eth)) {
2671 			napi_reuse_skb(napi, skb);
2672 			skb = NULL;
2673 			goto out;
2674 		}
2675 	}
2676 
2677 	skb_gro_pull(skb, sizeof(*eth));
2678 
2679 	/*
2680 	 * This works because the only protocols we care about don't require
2681 	 * special handling.  We'll fix it up properly at the end.
2682 	 */
2683 	skb->protocol = eth->h_proto;
2684 
2685 out:
2686 	return skb;
2687 }
2688 EXPORT_SYMBOL(napi_frags_skb);
2689 
2690 int napi_gro_frags(struct napi_struct *napi)
2691 {
2692 	struct sk_buff *skb = napi_frags_skb(napi);
2693 
2694 	if (!skb)
2695 		return NET_RX_DROP;
2696 
2697 	return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
2698 }
2699 EXPORT_SYMBOL(napi_gro_frags);
2700 
2701 static int process_backlog(struct napi_struct *napi, int quota)
2702 {
2703 	int work = 0;
2704 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
2705 	unsigned long start_time = jiffies;
2706 
2707 	napi->weight = weight_p;
2708 	do {
2709 		struct sk_buff *skb;
2710 
2711 		local_irq_disable();
2712 		skb = __skb_dequeue(&queue->input_pkt_queue);
2713 		if (!skb) {
2714 			__napi_complete(napi);
2715 			local_irq_enable();
2716 			break;
2717 		}
2718 		local_irq_enable();
2719 
2720 		netif_receive_skb(skb);
2721 	} while (++work < quota && jiffies == start_time);
2722 
2723 	return work;
2724 }
2725 
2726 /**
2727  * __napi_schedule - schedule for receive
2728  * @n: entry to schedule
2729  *
2730  * The entry's receive function will be scheduled to run
2731  */
2732 void __napi_schedule(struct napi_struct *n)
2733 {
2734 	unsigned long flags;
2735 
2736 	local_irq_save(flags);
2737 	list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2738 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2739 	local_irq_restore(flags);
2740 }
2741 EXPORT_SYMBOL(__napi_schedule);
2742 
2743 void __napi_complete(struct napi_struct *n)
2744 {
2745 	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
2746 	BUG_ON(n->gro_list);
2747 
2748 	list_del(&n->poll_list);
2749 	smp_mb__before_clear_bit();
2750 	clear_bit(NAPI_STATE_SCHED, &n->state);
2751 }
2752 EXPORT_SYMBOL(__napi_complete);
2753 
2754 void napi_complete(struct napi_struct *n)
2755 {
2756 	unsigned long flags;
2757 
2758 	/*
2759 	 * don't let napi dequeue from the cpu poll list
2760 	 * just in case its running on a different cpu
2761 	 */
2762 	if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
2763 		return;
2764 
2765 	napi_gro_flush(n);
2766 	local_irq_save(flags);
2767 	__napi_complete(n);
2768 	local_irq_restore(flags);
2769 }
2770 EXPORT_SYMBOL(napi_complete);
2771 
2772 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
2773 		    int (*poll)(struct napi_struct *, int), int weight)
2774 {
2775 	INIT_LIST_HEAD(&napi->poll_list);
2776 	napi->gro_count = 0;
2777 	napi->gro_list = NULL;
2778 	napi->skb = NULL;
2779 	napi->poll = poll;
2780 	napi->weight = weight;
2781 	list_add(&napi->dev_list, &dev->napi_list);
2782 	napi->dev = dev;
2783 #ifdef CONFIG_NETPOLL
2784 	spin_lock_init(&napi->poll_lock);
2785 	napi->poll_owner = -1;
2786 #endif
2787 	set_bit(NAPI_STATE_SCHED, &napi->state);
2788 }
2789 EXPORT_SYMBOL(netif_napi_add);
2790 
2791 void netif_napi_del(struct napi_struct *napi)
2792 {
2793 	struct sk_buff *skb, *next;
2794 
2795 	list_del_init(&napi->dev_list);
2796 	napi_free_frags(napi);
2797 
2798 	for (skb = napi->gro_list; skb; skb = next) {
2799 		next = skb->next;
2800 		skb->next = NULL;
2801 		kfree_skb(skb);
2802 	}
2803 
2804 	napi->gro_list = NULL;
2805 	napi->gro_count = 0;
2806 }
2807 EXPORT_SYMBOL(netif_napi_del);
2808 
2809 
2810 static void net_rx_action(struct softirq_action *h)
2811 {
2812 	struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
2813 	unsigned long time_limit = jiffies + 2;
2814 	int budget = netdev_budget;
2815 	void *have;
2816 
2817 	local_irq_disable();
2818 
2819 	while (!list_empty(list)) {
2820 		struct napi_struct *n;
2821 		int work, weight;
2822 
2823 		/* If softirq window is exhuasted then punt.
2824 		 * Allow this to run for 2 jiffies since which will allow
2825 		 * an average latency of 1.5/HZ.
2826 		 */
2827 		if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
2828 			goto softnet_break;
2829 
2830 		local_irq_enable();
2831 
2832 		/* Even though interrupts have been re-enabled, this
2833 		 * access is safe because interrupts can only add new
2834 		 * entries to the tail of this list, and only ->poll()
2835 		 * calls can remove this head entry from the list.
2836 		 */
2837 		n = list_entry(list->next, struct napi_struct, poll_list);
2838 
2839 		have = netpoll_poll_lock(n);
2840 
2841 		weight = n->weight;
2842 
2843 		/* This NAPI_STATE_SCHED test is for avoiding a race
2844 		 * with netpoll's poll_napi().  Only the entity which
2845 		 * obtains the lock and sees NAPI_STATE_SCHED set will
2846 		 * actually make the ->poll() call.  Therefore we avoid
2847 		 * accidently calling ->poll() when NAPI is not scheduled.
2848 		 */
2849 		work = 0;
2850 		if (test_bit(NAPI_STATE_SCHED, &n->state)) {
2851 			work = n->poll(n, weight);
2852 			trace_napi_poll(n);
2853 		}
2854 
2855 		WARN_ON_ONCE(work > weight);
2856 
2857 		budget -= work;
2858 
2859 		local_irq_disable();
2860 
2861 		/* Drivers must not modify the NAPI state if they
2862 		 * consume the entire weight.  In such cases this code
2863 		 * still "owns" the NAPI instance and therefore can
2864 		 * move the instance around on the list at-will.
2865 		 */
2866 		if (unlikely(work == weight)) {
2867 			if (unlikely(napi_disable_pending(n))) {
2868 				local_irq_enable();
2869 				napi_complete(n);
2870 				local_irq_disable();
2871 			} else
2872 				list_move_tail(&n->poll_list, list);
2873 		}
2874 
2875 		netpoll_poll_unlock(have);
2876 	}
2877 out:
2878 	local_irq_enable();
2879 
2880 #ifdef CONFIG_NET_DMA
2881 	/*
2882 	 * There may not be any more sk_buffs coming right now, so push
2883 	 * any pending DMA copies to hardware
2884 	 */
2885 	dma_issue_pending_all();
2886 #endif
2887 
2888 	return;
2889 
2890 softnet_break:
2891 	__get_cpu_var(netdev_rx_stat).time_squeeze++;
2892 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
2893 	goto out;
2894 }
2895 
2896 static gifconf_func_t *gifconf_list[NPROTO];
2897 
2898 /**
2899  *	register_gifconf	-	register a SIOCGIF handler
2900  *	@family: Address family
2901  *	@gifconf: Function handler
2902  *
2903  *	Register protocol dependent address dumping routines. The handler
2904  *	that is passed must not be freed or reused until it has been replaced
2905  *	by another handler.
2906  */
2907 int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
2908 {
2909 	if (family >= NPROTO)
2910 		return -EINVAL;
2911 	gifconf_list[family] = gifconf;
2912 	return 0;
2913 }
2914 EXPORT_SYMBOL(register_gifconf);
2915 
2916 
2917 /*
2918  *	Map an interface index to its name (SIOCGIFNAME)
2919  */
2920 
2921 /*
2922  *	We need this ioctl for efficient implementation of the
2923  *	if_indextoname() function required by the IPv6 API.  Without
2924  *	it, we would have to search all the interfaces to find a
2925  *	match.  --pb
2926  */
2927 
2928 static int dev_ifname(struct net *net, struct ifreq __user *arg)
2929 {
2930 	struct net_device *dev;
2931 	struct ifreq ifr;
2932 
2933 	/*
2934 	 *	Fetch the caller's info block.
2935 	 */
2936 
2937 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2938 		return -EFAULT;
2939 
2940 	read_lock(&dev_base_lock);
2941 	dev = __dev_get_by_index(net, ifr.ifr_ifindex);
2942 	if (!dev) {
2943 		read_unlock(&dev_base_lock);
2944 		return -ENODEV;
2945 	}
2946 
2947 	strcpy(ifr.ifr_name, dev->name);
2948 	read_unlock(&dev_base_lock);
2949 
2950 	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2951 		return -EFAULT;
2952 	return 0;
2953 }
2954 
2955 /*
2956  *	Perform a SIOCGIFCONF call. This structure will change
2957  *	size eventually, and there is nothing I can do about it.
2958  *	Thus we will need a 'compatibility mode'.
2959  */
2960 
2961 static int dev_ifconf(struct net *net, char __user *arg)
2962 {
2963 	struct ifconf ifc;
2964 	struct net_device *dev;
2965 	char __user *pos;
2966 	int len;
2967 	int total;
2968 	int i;
2969 
2970 	/*
2971 	 *	Fetch the caller's info block.
2972 	 */
2973 
2974 	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2975 		return -EFAULT;
2976 
2977 	pos = ifc.ifc_buf;
2978 	len = ifc.ifc_len;
2979 
2980 	/*
2981 	 *	Loop over the interfaces, and write an info block for each.
2982 	 */
2983 
2984 	total = 0;
2985 	for_each_netdev(net, dev) {
2986 		for (i = 0; i < NPROTO; i++) {
2987 			if (gifconf_list[i]) {
2988 				int done;
2989 				if (!pos)
2990 					done = gifconf_list[i](dev, NULL, 0);
2991 				else
2992 					done = gifconf_list[i](dev, pos + total,
2993 							       len - total);
2994 				if (done < 0)
2995 					return -EFAULT;
2996 				total += done;
2997 			}
2998 		}
2999 	}
3000 
3001 	/*
3002 	 *	All done.  Write the updated control block back to the caller.
3003 	 */
3004 	ifc.ifc_len = total;
3005 
3006 	/*
3007 	 * 	Both BSD and Solaris return 0 here, so we do too.
3008 	 */
3009 	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
3010 }
3011 
3012 #ifdef CONFIG_PROC_FS
3013 /*
3014  *	This is invoked by the /proc filesystem handler to display a device
3015  *	in detail.
3016  */
3017 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
3018 	__acquires(dev_base_lock)
3019 {
3020 	struct net *net = seq_file_net(seq);
3021 	loff_t off;
3022 	struct net_device *dev;
3023 
3024 	read_lock(&dev_base_lock);
3025 	if (!*pos)
3026 		return SEQ_START_TOKEN;
3027 
3028 	off = 1;
3029 	for_each_netdev(net, dev)
3030 		if (off++ == *pos)
3031 			return dev;
3032 
3033 	return NULL;
3034 }
3035 
3036 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3037 {
3038 	struct net *net = seq_file_net(seq);
3039 	++*pos;
3040 	return v == SEQ_START_TOKEN ?
3041 		first_net_device(net) : next_net_device((struct net_device *)v);
3042 }
3043 
3044 void dev_seq_stop(struct seq_file *seq, void *v)
3045 	__releases(dev_base_lock)
3046 {
3047 	read_unlock(&dev_base_lock);
3048 }
3049 
3050 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
3051 {
3052 	const struct net_device_stats *stats = dev_get_stats(dev);
3053 
3054 	seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
3055 		   "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
3056 		   dev->name, stats->rx_bytes, stats->rx_packets,
3057 		   stats->rx_errors,
3058 		   stats->rx_dropped + stats->rx_missed_errors,
3059 		   stats->rx_fifo_errors,
3060 		   stats->rx_length_errors + stats->rx_over_errors +
3061 		    stats->rx_crc_errors + stats->rx_frame_errors,
3062 		   stats->rx_compressed, stats->multicast,
3063 		   stats->tx_bytes, stats->tx_packets,
3064 		   stats->tx_errors, stats->tx_dropped,
3065 		   stats->tx_fifo_errors, stats->collisions,
3066 		   stats->tx_carrier_errors +
3067 		    stats->tx_aborted_errors +
3068 		    stats->tx_window_errors +
3069 		    stats->tx_heartbeat_errors,
3070 		   stats->tx_compressed);
3071 }
3072 
3073 /*
3074  *	Called from the PROCfs module. This now uses the new arbitrary sized
3075  *	/proc/net interface to create /proc/net/dev
3076  */
3077 static int dev_seq_show(struct seq_file *seq, void *v)
3078 {
3079 	if (v == SEQ_START_TOKEN)
3080 		seq_puts(seq, "Inter-|   Receive                            "
3081 			      "                    |  Transmit\n"
3082 			      " face |bytes    packets errs drop fifo frame "
3083 			      "compressed multicast|bytes    packets errs "
3084 			      "drop fifo colls carrier compressed\n");
3085 	else
3086 		dev_seq_printf_stats(seq, v);
3087 	return 0;
3088 }
3089 
3090 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
3091 {
3092 	struct netif_rx_stats *rc = NULL;
3093 
3094 	while (*pos < nr_cpu_ids)
3095 		if (cpu_online(*pos)) {
3096 			rc = &per_cpu(netdev_rx_stat, *pos);
3097 			break;
3098 		} else
3099 			++*pos;
3100 	return rc;
3101 }
3102 
3103 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
3104 {
3105 	return softnet_get_online(pos);
3106 }
3107 
3108 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3109 {
3110 	++*pos;
3111 	return softnet_get_online(pos);
3112 }
3113 
3114 static void softnet_seq_stop(struct seq_file *seq, void *v)
3115 {
3116 }
3117 
3118 static int softnet_seq_show(struct seq_file *seq, void *v)
3119 {
3120 	struct netif_rx_stats *s = v;
3121 
3122 	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
3123 		   s->total, s->dropped, s->time_squeeze, 0,
3124 		   0, 0, 0, 0, /* was fastroute */
3125 		   s->cpu_collision);
3126 	return 0;
3127 }
3128 
3129 static const struct seq_operations dev_seq_ops = {
3130 	.start = dev_seq_start,
3131 	.next  = dev_seq_next,
3132 	.stop  = dev_seq_stop,
3133 	.show  = dev_seq_show,
3134 };
3135 
3136 static int dev_seq_open(struct inode *inode, struct file *file)
3137 {
3138 	return seq_open_net(inode, file, &dev_seq_ops,
3139 			    sizeof(struct seq_net_private));
3140 }
3141 
3142 static const struct file_operations dev_seq_fops = {
3143 	.owner	 = THIS_MODULE,
3144 	.open    = dev_seq_open,
3145 	.read    = seq_read,
3146 	.llseek  = seq_lseek,
3147 	.release = seq_release_net,
3148 };
3149 
3150 static const struct seq_operations softnet_seq_ops = {
3151 	.start = softnet_seq_start,
3152 	.next  = softnet_seq_next,
3153 	.stop  = softnet_seq_stop,
3154 	.show  = softnet_seq_show,
3155 };
3156 
3157 static int softnet_seq_open(struct inode *inode, struct file *file)
3158 {
3159 	return seq_open(file, &softnet_seq_ops);
3160 }
3161 
3162 static const struct file_operations softnet_seq_fops = {
3163 	.owner	 = THIS_MODULE,
3164 	.open    = softnet_seq_open,
3165 	.read    = seq_read,
3166 	.llseek  = seq_lseek,
3167 	.release = seq_release,
3168 };
3169 
3170 static void *ptype_get_idx(loff_t pos)
3171 {
3172 	struct packet_type *pt = NULL;
3173 	loff_t i = 0;
3174 	int t;
3175 
3176 	list_for_each_entry_rcu(pt, &ptype_all, list) {
3177 		if (i == pos)
3178 			return pt;
3179 		++i;
3180 	}
3181 
3182 	for (t = 0; t < PTYPE_HASH_SIZE; t++) {
3183 		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
3184 			if (i == pos)
3185 				return pt;
3186 			++i;
3187 		}
3188 	}
3189 	return NULL;
3190 }
3191 
3192 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
3193 	__acquires(RCU)
3194 {
3195 	rcu_read_lock();
3196 	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
3197 }
3198 
3199 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3200 {
3201 	struct packet_type *pt;
3202 	struct list_head *nxt;
3203 	int hash;
3204 
3205 	++*pos;
3206 	if (v == SEQ_START_TOKEN)
3207 		return ptype_get_idx(0);
3208 
3209 	pt = v;
3210 	nxt = pt->list.next;
3211 	if (pt->type == htons(ETH_P_ALL)) {
3212 		if (nxt != &ptype_all)
3213 			goto found;
3214 		hash = 0;
3215 		nxt = ptype_base[0].next;
3216 	} else
3217 		hash = ntohs(pt->type) & PTYPE_HASH_MASK;
3218 
3219 	while (nxt == &ptype_base[hash]) {
3220 		if (++hash >= PTYPE_HASH_SIZE)
3221 			return NULL;
3222 		nxt = ptype_base[hash].next;
3223 	}
3224 found:
3225 	return list_entry(nxt, struct packet_type, list);
3226 }
3227 
3228 static void ptype_seq_stop(struct seq_file *seq, void *v)
3229 	__releases(RCU)
3230 {
3231 	rcu_read_unlock();
3232 }
3233 
3234 static int ptype_seq_show(struct seq_file *seq, void *v)
3235 {
3236 	struct packet_type *pt = v;
3237 
3238 	if (v == SEQ_START_TOKEN)
3239 		seq_puts(seq, "Type Device      Function\n");
3240 	else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
3241 		if (pt->type == htons(ETH_P_ALL))
3242 			seq_puts(seq, "ALL ");
3243 		else
3244 			seq_printf(seq, "%04x", ntohs(pt->type));
3245 
3246 		seq_printf(seq, " %-8s %pF\n",
3247 			   pt->dev ? pt->dev->name : "", pt->func);
3248 	}
3249 
3250 	return 0;
3251 }
3252 
3253 static const struct seq_operations ptype_seq_ops = {
3254 	.start = ptype_seq_start,
3255 	.next  = ptype_seq_next,
3256 	.stop  = ptype_seq_stop,
3257 	.show  = ptype_seq_show,
3258 };
3259 
3260 static int ptype_seq_open(struct inode *inode, struct file *file)
3261 {
3262 	return seq_open_net(inode, file, &ptype_seq_ops,
3263 			sizeof(struct seq_net_private));
3264 }
3265 
3266 static const struct file_operations ptype_seq_fops = {
3267 	.owner	 = THIS_MODULE,
3268 	.open    = ptype_seq_open,
3269 	.read    = seq_read,
3270 	.llseek  = seq_lseek,
3271 	.release = seq_release_net,
3272 };
3273 
3274 
3275 static int __net_init dev_proc_net_init(struct net *net)
3276 {
3277 	int rc = -ENOMEM;
3278 
3279 	if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
3280 		goto out;
3281 	if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
3282 		goto out_dev;
3283 	if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
3284 		goto out_softnet;
3285 
3286 	if (wext_proc_init(net))
3287 		goto out_ptype;
3288 	rc = 0;
3289 out:
3290 	return rc;
3291 out_ptype:
3292 	proc_net_remove(net, "ptype");
3293 out_softnet:
3294 	proc_net_remove(net, "softnet_stat");
3295 out_dev:
3296 	proc_net_remove(net, "dev");
3297 	goto out;
3298 }
3299 
3300 static void __net_exit dev_proc_net_exit(struct net *net)
3301 {
3302 	wext_proc_exit(net);
3303 
3304 	proc_net_remove(net, "ptype");
3305 	proc_net_remove(net, "softnet_stat");
3306 	proc_net_remove(net, "dev");
3307 }
3308 
3309 static struct pernet_operations __net_initdata dev_proc_ops = {
3310 	.init = dev_proc_net_init,
3311 	.exit = dev_proc_net_exit,
3312 };
3313 
3314 static int __init dev_proc_init(void)
3315 {
3316 	return register_pernet_subsys(&dev_proc_ops);
3317 }
3318 #else
3319 #define dev_proc_init() 0
3320 #endif	/* CONFIG_PROC_FS */
3321 
3322 
3323 /**
3324  *	netdev_set_master	-	set up master/slave pair
3325  *	@slave: slave device
3326  *	@master: new master device
3327  *
3328  *	Changes the master device of the slave. Pass %NULL to break the
3329  *	bonding. The caller must hold the RTNL semaphore. On a failure
3330  *	a negative errno code is returned. On success the reference counts
3331  *	are adjusted, %RTM_NEWLINK is sent to the routing socket and the
3332  *	function returns zero.
3333  */
3334 int netdev_set_master(struct net_device *slave, struct net_device *master)
3335 {
3336 	struct net_device *old = slave->master;
3337 
3338 	ASSERT_RTNL();
3339 
3340 	if (master) {
3341 		if (old)
3342 			return -EBUSY;
3343 		dev_hold(master);
3344 	}
3345 
3346 	slave->master = master;
3347 
3348 	synchronize_net();
3349 
3350 	if (old)
3351 		dev_put(old);
3352 
3353 	if (master)
3354 		slave->flags |= IFF_SLAVE;
3355 	else
3356 		slave->flags &= ~IFF_SLAVE;
3357 
3358 	rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
3359 	return 0;
3360 }
3361 EXPORT_SYMBOL(netdev_set_master);
3362 
3363 static void dev_change_rx_flags(struct net_device *dev, int flags)
3364 {
3365 	const struct net_device_ops *ops = dev->netdev_ops;
3366 
3367 	if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
3368 		ops->ndo_change_rx_flags(dev, flags);
3369 }
3370 
3371 static int __dev_set_promiscuity(struct net_device *dev, int inc)
3372 {
3373 	unsigned short old_flags = dev->flags;
3374 	uid_t uid;
3375 	gid_t gid;
3376 
3377 	ASSERT_RTNL();
3378 
3379 	dev->flags |= IFF_PROMISC;
3380 	dev->promiscuity += inc;
3381 	if (dev->promiscuity == 0) {
3382 		/*
3383 		 * Avoid overflow.
3384 		 * If inc causes overflow, untouch promisc and return error.
3385 		 */
3386 		if (inc < 0)
3387 			dev->flags &= ~IFF_PROMISC;
3388 		else {
3389 			dev->promiscuity -= inc;
3390 			printk(KERN_WARNING "%s: promiscuity touches roof, "
3391 				"set promiscuity failed, promiscuity feature "
3392 				"of device might be broken.\n", dev->name);
3393 			return -EOVERFLOW;
3394 		}
3395 	}
3396 	if (dev->flags != old_flags) {
3397 		printk(KERN_INFO "device %s %s promiscuous mode\n",
3398 		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
3399 							       "left");
3400 		if (audit_enabled) {
3401 			current_uid_gid(&uid, &gid);
3402 			audit_log(current->audit_context, GFP_ATOMIC,
3403 				AUDIT_ANOM_PROMISCUOUS,
3404 				"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
3405 				dev->name, (dev->flags & IFF_PROMISC),
3406 				(old_flags & IFF_PROMISC),
3407 				audit_get_loginuid(current),
3408 				uid, gid,
3409 				audit_get_sessionid(current));
3410 		}
3411 
3412 		dev_change_rx_flags(dev, IFF_PROMISC);
3413 	}
3414 	return 0;
3415 }
3416 
3417 /**
3418  *	dev_set_promiscuity	- update promiscuity count on a device
3419  *	@dev: device
3420  *	@inc: modifier
3421  *
3422  *	Add or remove promiscuity from a device. While the count in the device
3423  *	remains above zero the interface remains promiscuous. Once it hits zero
3424  *	the device reverts back to normal filtering operation. A negative inc
3425  *	value is used to drop promiscuity on the device.
3426  *	Return 0 if successful or a negative errno code on error.
3427  */
3428 int dev_set_promiscuity(struct net_device *dev, int inc)
3429 {
3430 	unsigned short old_flags = dev->flags;
3431 	int err;
3432 
3433 	err = __dev_set_promiscuity(dev, inc);
3434 	if (err < 0)
3435 		return err;
3436 	if (dev->flags != old_flags)
3437 		dev_set_rx_mode(dev);
3438 	return err;
3439 }
3440 EXPORT_SYMBOL(dev_set_promiscuity);
3441 
3442 /**
3443  *	dev_set_allmulti	- update allmulti count on a device
3444  *	@dev: device
3445  *	@inc: modifier
3446  *
3447  *	Add or remove reception of all multicast frames to a device. While the
3448  *	count in the device remains above zero the interface remains listening
3449  *	to all interfaces. Once it hits zero the device reverts back to normal
3450  *	filtering operation. A negative @inc value is used to drop the counter
3451  *	when releasing a resource needing all multicasts.
3452  *	Return 0 if successful or a negative errno code on error.
3453  */
3454 
3455 int dev_set_allmulti(struct net_device *dev, int inc)
3456 {
3457 	unsigned short old_flags = dev->flags;
3458 
3459 	ASSERT_RTNL();
3460 
3461 	dev->flags |= IFF_ALLMULTI;
3462 	dev->allmulti += inc;
3463 	if (dev->allmulti == 0) {
3464 		/*
3465 		 * Avoid overflow.
3466 		 * If inc causes overflow, untouch allmulti and return error.
3467 		 */
3468 		if (inc < 0)
3469 			dev->flags &= ~IFF_ALLMULTI;
3470 		else {
3471 			dev->allmulti -= inc;
3472 			printk(KERN_WARNING "%s: allmulti touches roof, "
3473 				"set allmulti failed, allmulti feature of "
3474 				"device might be broken.\n", dev->name);
3475 			return -EOVERFLOW;
3476 		}
3477 	}
3478 	if (dev->flags ^ old_flags) {
3479 		dev_change_rx_flags(dev, IFF_ALLMULTI);
3480 		dev_set_rx_mode(dev);
3481 	}
3482 	return 0;
3483 }
3484 EXPORT_SYMBOL(dev_set_allmulti);
3485 
3486 /*
3487  *	Upload unicast and multicast address lists to device and
3488  *	configure RX filtering. When the device doesn't support unicast
3489  *	filtering it is put in promiscuous mode while unicast addresses
3490  *	are present.
3491  */
3492 void __dev_set_rx_mode(struct net_device *dev)
3493 {
3494 	const struct net_device_ops *ops = dev->netdev_ops;
3495 
3496 	/* dev_open will call this function so the list will stay sane. */
3497 	if (!(dev->flags&IFF_UP))
3498 		return;
3499 
3500 	if (!netif_device_present(dev))
3501 		return;
3502 
3503 	if (ops->ndo_set_rx_mode)
3504 		ops->ndo_set_rx_mode(dev);
3505 	else {
3506 		/* Unicast addresses changes may only happen under the rtnl,
3507 		 * therefore calling __dev_set_promiscuity here is safe.
3508 		 */
3509 		if (dev->uc.count > 0 && !dev->uc_promisc) {
3510 			__dev_set_promiscuity(dev, 1);
3511 			dev->uc_promisc = 1;
3512 		} else if (dev->uc.count == 0 && dev->uc_promisc) {
3513 			__dev_set_promiscuity(dev, -1);
3514 			dev->uc_promisc = 0;
3515 		}
3516 
3517 		if (ops->ndo_set_multicast_list)
3518 			ops->ndo_set_multicast_list(dev);
3519 	}
3520 }
3521 
3522 void dev_set_rx_mode(struct net_device *dev)
3523 {
3524 	netif_addr_lock_bh(dev);
3525 	__dev_set_rx_mode(dev);
3526 	netif_addr_unlock_bh(dev);
3527 }
3528 
3529 /* hw addresses list handling functions */
3530 
3531 static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
3532 			 int addr_len, unsigned char addr_type)
3533 {
3534 	struct netdev_hw_addr *ha;
3535 	int alloc_size;
3536 
3537 	if (addr_len > MAX_ADDR_LEN)
3538 		return -EINVAL;
3539 
3540 	list_for_each_entry(ha, &list->list, list) {
3541 		if (!memcmp(ha->addr, addr, addr_len) &&
3542 		    ha->type == addr_type) {
3543 			ha->refcount++;
3544 			return 0;
3545 		}
3546 	}
3547 
3548 
3549 	alloc_size = sizeof(*ha);
3550 	if (alloc_size < L1_CACHE_BYTES)
3551 		alloc_size = L1_CACHE_BYTES;
3552 	ha = kmalloc(alloc_size, GFP_ATOMIC);
3553 	if (!ha)
3554 		return -ENOMEM;
3555 	memcpy(ha->addr, addr, addr_len);
3556 	ha->type = addr_type;
3557 	ha->refcount = 1;
3558 	ha->synced = false;
3559 	list_add_tail_rcu(&ha->list, &list->list);
3560 	list->count++;
3561 	return 0;
3562 }
3563 
3564 static void ha_rcu_free(struct rcu_head *head)
3565 {
3566 	struct netdev_hw_addr *ha;
3567 
3568 	ha = container_of(head, struct netdev_hw_addr, rcu_head);
3569 	kfree(ha);
3570 }
3571 
3572 static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
3573 			 int addr_len, unsigned char addr_type)
3574 {
3575 	struct netdev_hw_addr *ha;
3576 
3577 	list_for_each_entry(ha, &list->list, list) {
3578 		if (!memcmp(ha->addr, addr, addr_len) &&
3579 		    (ha->type == addr_type || !addr_type)) {
3580 			if (--ha->refcount)
3581 				return 0;
3582 			list_del_rcu(&ha->list);
3583 			call_rcu(&ha->rcu_head, ha_rcu_free);
3584 			list->count--;
3585 			return 0;
3586 		}
3587 	}
3588 	return -ENOENT;
3589 }
3590 
3591 static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
3592 				  struct netdev_hw_addr_list *from_list,
3593 				  int addr_len,
3594 				  unsigned char addr_type)
3595 {
3596 	int err;
3597 	struct netdev_hw_addr *ha, *ha2;
3598 	unsigned char type;
3599 
3600 	list_for_each_entry(ha, &from_list->list, list) {
3601 		type = addr_type ? addr_type : ha->type;
3602 		err = __hw_addr_add(to_list, ha->addr, addr_len, type);
3603 		if (err)
3604 			goto unroll;
3605 	}
3606 	return 0;
3607 
3608 unroll:
3609 	list_for_each_entry(ha2, &from_list->list, list) {
3610 		if (ha2 == ha)
3611 			break;
3612 		type = addr_type ? addr_type : ha2->type;
3613 		__hw_addr_del(to_list, ha2->addr, addr_len, type);
3614 	}
3615 	return err;
3616 }
3617 
3618 static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
3619 				   struct netdev_hw_addr_list *from_list,
3620 				   int addr_len,
3621 				   unsigned char addr_type)
3622 {
3623 	struct netdev_hw_addr *ha;
3624 	unsigned char type;
3625 
3626 	list_for_each_entry(ha, &from_list->list, list) {
3627 		type = addr_type ? addr_type : ha->type;
3628 		__hw_addr_del(to_list, ha->addr, addr_len, addr_type);
3629 	}
3630 }
3631 
3632 static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
3633 			  struct netdev_hw_addr_list *from_list,
3634 			  int addr_len)
3635 {
3636 	int err = 0;
3637 	struct netdev_hw_addr *ha, *tmp;
3638 
3639 	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
3640 		if (!ha->synced) {
3641 			err = __hw_addr_add(to_list, ha->addr,
3642 					    addr_len, ha->type);
3643 			if (err)
3644 				break;
3645 			ha->synced = true;
3646 			ha->refcount++;
3647 		} else if (ha->refcount == 1) {
3648 			__hw_addr_del(to_list, ha->addr, addr_len, ha->type);
3649 			__hw_addr_del(from_list, ha->addr, addr_len, ha->type);
3650 		}
3651 	}
3652 	return err;
3653 }
3654 
3655 static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
3656 			     struct netdev_hw_addr_list *from_list,
3657 			     int addr_len)
3658 {
3659 	struct netdev_hw_addr *ha, *tmp;
3660 
3661 	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
3662 		if (ha->synced) {
3663 			__hw_addr_del(to_list, ha->addr,
3664 				      addr_len, ha->type);
3665 			ha->synced = false;
3666 			__hw_addr_del(from_list, ha->addr,
3667 				      addr_len, ha->type);
3668 		}
3669 	}
3670 }
3671 
3672 static void __hw_addr_flush(struct netdev_hw_addr_list *list)
3673 {
3674 	struct netdev_hw_addr *ha, *tmp;
3675 
3676 	list_for_each_entry_safe(ha, tmp, &list->list, list) {
3677 		list_del_rcu(&ha->list);
3678 		call_rcu(&ha->rcu_head, ha_rcu_free);
3679 	}
3680 	list->count = 0;
3681 }
3682 
3683 static void __hw_addr_init(struct netdev_hw_addr_list *list)
3684 {
3685 	INIT_LIST_HEAD(&list->list);
3686 	list->count = 0;
3687 }
3688 
3689 /* Device addresses handling functions */
3690 
3691 static void dev_addr_flush(struct net_device *dev)
3692 {
3693 	/* rtnl_mutex must be held here */
3694 
3695 	__hw_addr_flush(&dev->dev_addrs);
3696 	dev->dev_addr = NULL;
3697 }
3698 
3699 static int dev_addr_init(struct net_device *dev)
3700 {
3701 	unsigned char addr[MAX_ADDR_LEN];
3702 	struct netdev_hw_addr *ha;
3703 	int err;
3704 
3705 	/* rtnl_mutex must be held here */
3706 
3707 	__hw_addr_init(&dev->dev_addrs);
3708 	memset(addr, 0, sizeof(addr));
3709 	err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
3710 			    NETDEV_HW_ADDR_T_LAN);
3711 	if (!err) {
3712 		/*
3713 		 * Get the first (previously created) address from the list
3714 		 * and set dev_addr pointer to this location.
3715 		 */
3716 		ha = list_first_entry(&dev->dev_addrs.list,
3717 				      struct netdev_hw_addr, list);
3718 		dev->dev_addr = ha->addr;
3719 	}
3720 	return err;
3721 }
3722 
3723 /**
3724  *	dev_addr_add	- Add a device address
3725  *	@dev: device
3726  *	@addr: address to add
3727  *	@addr_type: address type
3728  *
3729  *	Add a device address to the device or increase the reference count if
3730  *	it already exists.
3731  *
3732  *	The caller must hold the rtnl_mutex.
3733  */
3734 int dev_addr_add(struct net_device *dev, unsigned char *addr,
3735 		 unsigned char addr_type)
3736 {
3737 	int err;
3738 
3739 	ASSERT_RTNL();
3740 
3741 	err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
3742 	if (!err)
3743 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3744 	return err;
3745 }
3746 EXPORT_SYMBOL(dev_addr_add);
3747 
3748 /**
3749  *	dev_addr_del	- Release a device address.
3750  *	@dev: device
3751  *	@addr: address to delete
3752  *	@addr_type: address type
3753  *
3754  *	Release reference to a device address and remove it from the device
3755  *	if the reference count drops to zero.
3756  *
3757  *	The caller must hold the rtnl_mutex.
3758  */
3759 int dev_addr_del(struct net_device *dev, unsigned char *addr,
3760 		 unsigned char addr_type)
3761 {
3762 	int err;
3763 	struct netdev_hw_addr *ha;
3764 
3765 	ASSERT_RTNL();
3766 
3767 	/*
3768 	 * We can not remove the first address from the list because
3769 	 * dev->dev_addr points to that.
3770 	 */
3771 	ha = list_first_entry(&dev->dev_addrs.list,
3772 			      struct netdev_hw_addr, list);
3773 	if (ha->addr == dev->dev_addr && ha->refcount == 1)
3774 		return -ENOENT;
3775 
3776 	err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
3777 			    addr_type);
3778 	if (!err)
3779 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3780 	return err;
3781 }
3782 EXPORT_SYMBOL(dev_addr_del);
3783 
3784 /**
3785  *	dev_addr_add_multiple	- Add device addresses from another device
3786  *	@to_dev: device to which addresses will be added
3787  *	@from_dev: device from which addresses will be added
3788  *	@addr_type: address type - 0 means type will be used from from_dev
3789  *
3790  *	Add device addresses of the one device to another.
3791  **
3792  *	The caller must hold the rtnl_mutex.
3793  */
3794 int dev_addr_add_multiple(struct net_device *to_dev,
3795 			  struct net_device *from_dev,
3796 			  unsigned char addr_type)
3797 {
3798 	int err;
3799 
3800 	ASSERT_RTNL();
3801 
3802 	if (from_dev->addr_len != to_dev->addr_len)
3803 		return -EINVAL;
3804 	err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
3805 				     to_dev->addr_len, addr_type);
3806 	if (!err)
3807 		call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
3808 	return err;
3809 }
3810 EXPORT_SYMBOL(dev_addr_add_multiple);
3811 
3812 /**
3813  *	dev_addr_del_multiple	- Delete device addresses by another device
3814  *	@to_dev: device where the addresses will be deleted
3815  *	@from_dev: device by which addresses the addresses will be deleted
3816  *	@addr_type: address type - 0 means type will used from from_dev
3817  *
3818  *	Deletes addresses in to device by the list of addresses in from device.
3819  *
3820  *	The caller must hold the rtnl_mutex.
3821  */
3822 int dev_addr_del_multiple(struct net_device *to_dev,
3823 			  struct net_device *from_dev,
3824 			  unsigned char addr_type)
3825 {
3826 	ASSERT_RTNL();
3827 
3828 	if (from_dev->addr_len != to_dev->addr_len)
3829 		return -EINVAL;
3830 	__hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
3831 			       to_dev->addr_len, addr_type);
3832 	call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
3833 	return 0;
3834 }
3835 EXPORT_SYMBOL(dev_addr_del_multiple);
3836 
3837 /* multicast addresses handling functions */
3838 
3839 int __dev_addr_delete(struct dev_addr_list **list, int *count,
3840 		      void *addr, int alen, int glbl)
3841 {
3842 	struct dev_addr_list *da;
3843 
3844 	for (; (da = *list) != NULL; list = &da->next) {
3845 		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3846 		    alen == da->da_addrlen) {
3847 			if (glbl) {
3848 				int old_glbl = da->da_gusers;
3849 				da->da_gusers = 0;
3850 				if (old_glbl == 0)
3851 					break;
3852 			}
3853 			if (--da->da_users)
3854 				return 0;
3855 
3856 			*list = da->next;
3857 			kfree(da);
3858 			(*count)--;
3859 			return 0;
3860 		}
3861 	}
3862 	return -ENOENT;
3863 }
3864 
3865 int __dev_addr_add(struct dev_addr_list **list, int *count,
3866 		   void *addr, int alen, int glbl)
3867 {
3868 	struct dev_addr_list *da;
3869 
3870 	for (da = *list; da != NULL; da = da->next) {
3871 		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3872 		    da->da_addrlen == alen) {
3873 			if (glbl) {
3874 				int old_glbl = da->da_gusers;
3875 				da->da_gusers = 1;
3876 				if (old_glbl)
3877 					return 0;
3878 			}
3879 			da->da_users++;
3880 			return 0;
3881 		}
3882 	}
3883 
3884 	da = kzalloc(sizeof(*da), GFP_ATOMIC);
3885 	if (da == NULL)
3886 		return -ENOMEM;
3887 	memcpy(da->da_addr, addr, alen);
3888 	da->da_addrlen = alen;
3889 	da->da_users = 1;
3890 	da->da_gusers = glbl ? 1 : 0;
3891 	da->next = *list;
3892 	*list = da;
3893 	(*count)++;
3894 	return 0;
3895 }
3896 
3897 /**
3898  *	dev_unicast_delete	- Release secondary unicast address.
3899  *	@dev: device
3900  *	@addr: address to delete
3901  *
3902  *	Release reference to a secondary unicast address and remove it
3903  *	from the device if the reference count drops to zero.
3904  *
3905  * 	The caller must hold the rtnl_mutex.
3906  */
3907 int dev_unicast_delete(struct net_device *dev, void *addr)
3908 {
3909 	int err;
3910 
3911 	ASSERT_RTNL();
3912 
3913 	netif_addr_lock_bh(dev);
3914 	err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
3915 			    NETDEV_HW_ADDR_T_UNICAST);
3916 	if (!err)
3917 		__dev_set_rx_mode(dev);
3918 	netif_addr_unlock_bh(dev);
3919 	return err;
3920 }
3921 EXPORT_SYMBOL(dev_unicast_delete);
3922 
3923 /**
3924  *	dev_unicast_add		- add a secondary unicast address
3925  *	@dev: device
3926  *	@addr: address to add
3927  *
3928  *	Add a secondary unicast address to the device or increase
3929  *	the reference count if it already exists.
3930  *
3931  *	The caller must hold the rtnl_mutex.
3932  */
3933 int dev_unicast_add(struct net_device *dev, void *addr)
3934 {
3935 	int err;
3936 
3937 	ASSERT_RTNL();
3938 
3939 	netif_addr_lock_bh(dev);
3940 	err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
3941 			    NETDEV_HW_ADDR_T_UNICAST);
3942 	if (!err)
3943 		__dev_set_rx_mode(dev);
3944 	netif_addr_unlock_bh(dev);
3945 	return err;
3946 }
3947 EXPORT_SYMBOL(dev_unicast_add);
3948 
3949 int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
3950 		    struct dev_addr_list **from, int *from_count)
3951 {
3952 	struct dev_addr_list *da, *next;
3953 	int err = 0;
3954 
3955 	da = *from;
3956 	while (da != NULL) {
3957 		next = da->next;
3958 		if (!da->da_synced) {
3959 			err = __dev_addr_add(to, to_count,
3960 					     da->da_addr, da->da_addrlen, 0);
3961 			if (err < 0)
3962 				break;
3963 			da->da_synced = 1;
3964 			da->da_users++;
3965 		} else if (da->da_users == 1) {
3966 			__dev_addr_delete(to, to_count,
3967 					  da->da_addr, da->da_addrlen, 0);
3968 			__dev_addr_delete(from, from_count,
3969 					  da->da_addr, da->da_addrlen, 0);
3970 		}
3971 		da = next;
3972 	}
3973 	return err;
3974 }
3975 EXPORT_SYMBOL_GPL(__dev_addr_sync);
3976 
3977 void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
3978 		       struct dev_addr_list **from, int *from_count)
3979 {
3980 	struct dev_addr_list *da, *next;
3981 
3982 	da = *from;
3983 	while (da != NULL) {
3984 		next = da->next;
3985 		if (da->da_synced) {
3986 			__dev_addr_delete(to, to_count,
3987 					  da->da_addr, da->da_addrlen, 0);
3988 			da->da_synced = 0;
3989 			__dev_addr_delete(from, from_count,
3990 					  da->da_addr, da->da_addrlen, 0);
3991 		}
3992 		da = next;
3993 	}
3994 }
3995 EXPORT_SYMBOL_GPL(__dev_addr_unsync);
3996 
3997 /**
3998  *	dev_unicast_sync - Synchronize device's unicast list to another device
3999  *	@to: destination device
4000  *	@from: source device
4001  *
4002  *	Add newly added addresses to the destination device and release
4003  *	addresses that have no users left. The source device must be
4004  *	locked by netif_tx_lock_bh.
4005  *
4006  *	This function is intended to be called from the dev->set_rx_mode
4007  *	function of layered software devices.
4008  */
4009 int dev_unicast_sync(struct net_device *to, struct net_device *from)
4010 {
4011 	int err = 0;
4012 
4013 	if (to->addr_len != from->addr_len)
4014 		return -EINVAL;
4015 
4016 	netif_addr_lock_bh(to);
4017 	err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
4018 	if (!err)
4019 		__dev_set_rx_mode(to);
4020 	netif_addr_unlock_bh(to);
4021 	return err;
4022 }
4023 EXPORT_SYMBOL(dev_unicast_sync);
4024 
4025 /**
4026  *	dev_unicast_unsync - Remove synchronized addresses from the destination device
4027  *	@to: destination device
4028  *	@from: source device
4029  *
4030  *	Remove all addresses that were added to the destination device by
4031  *	dev_unicast_sync(). This function is intended to be called from the
4032  *	dev->stop function of layered software devices.
4033  */
4034 void dev_unicast_unsync(struct net_device *to, struct net_device *from)
4035 {
4036 	if (to->addr_len != from->addr_len)
4037 		return;
4038 
4039 	netif_addr_lock_bh(from);
4040 	netif_addr_lock(to);
4041 	__hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
4042 	__dev_set_rx_mode(to);
4043 	netif_addr_unlock(to);
4044 	netif_addr_unlock_bh(from);
4045 }
4046 EXPORT_SYMBOL(dev_unicast_unsync);
4047 
4048 static void dev_unicast_flush(struct net_device *dev)
4049 {
4050 	netif_addr_lock_bh(dev);
4051 	__hw_addr_flush(&dev->uc);
4052 	netif_addr_unlock_bh(dev);
4053 }
4054 
4055 static void dev_unicast_init(struct net_device *dev)
4056 {
4057 	__hw_addr_init(&dev->uc);
4058 }
4059 
4060 
4061 static void __dev_addr_discard(struct dev_addr_list **list)
4062 {
4063 	struct dev_addr_list *tmp;
4064 
4065 	while (*list != NULL) {
4066 		tmp = *list;
4067 		*list = tmp->next;
4068 		if (tmp->da_users > tmp->da_gusers)
4069 			printk("__dev_addr_discard: address leakage! "
4070 			       "da_users=%d\n", tmp->da_users);
4071 		kfree(tmp);
4072 	}
4073 }
4074 
4075 static void dev_addr_discard(struct net_device *dev)
4076 {
4077 	netif_addr_lock_bh(dev);
4078 
4079 	__dev_addr_discard(&dev->mc_list);
4080 	dev->mc_count = 0;
4081 
4082 	netif_addr_unlock_bh(dev);
4083 }
4084 
4085 /**
4086  *	dev_get_flags - get flags reported to userspace
4087  *	@dev: device
4088  *
4089  *	Get the combination of flag bits exported through APIs to userspace.
4090  */
4091 unsigned dev_get_flags(const struct net_device *dev)
4092 {
4093 	unsigned flags;
4094 
4095 	flags = (dev->flags & ~(IFF_PROMISC |
4096 				IFF_ALLMULTI |
4097 				IFF_RUNNING |
4098 				IFF_LOWER_UP |
4099 				IFF_DORMANT)) |
4100 		(dev->gflags & (IFF_PROMISC |
4101 				IFF_ALLMULTI));
4102 
4103 	if (netif_running(dev)) {
4104 		if (netif_oper_up(dev))
4105 			flags |= IFF_RUNNING;
4106 		if (netif_carrier_ok(dev))
4107 			flags |= IFF_LOWER_UP;
4108 		if (netif_dormant(dev))
4109 			flags |= IFF_DORMANT;
4110 	}
4111 
4112 	return flags;
4113 }
4114 EXPORT_SYMBOL(dev_get_flags);
4115 
4116 /**
4117  *	dev_change_flags - change device settings
4118  *	@dev: device
4119  *	@flags: device state flags
4120  *
4121  *	Change settings on device based state flags. The flags are
4122  *	in the userspace exported format.
4123  */
4124 int dev_change_flags(struct net_device *dev, unsigned flags)
4125 {
4126 	int ret, changes;
4127 	int old_flags = dev->flags;
4128 
4129 	ASSERT_RTNL();
4130 
4131 	/*
4132 	 *	Set the flags on our device.
4133 	 */
4134 
4135 	dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
4136 			       IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
4137 			       IFF_AUTOMEDIA)) |
4138 		     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
4139 				    IFF_ALLMULTI));
4140 
4141 	/*
4142 	 *	Load in the correct multicast list now the flags have changed.
4143 	 */
4144 
4145 	if ((old_flags ^ flags) & IFF_MULTICAST)
4146 		dev_change_rx_flags(dev, IFF_MULTICAST);
4147 
4148 	dev_set_rx_mode(dev);
4149 
4150 	/*
4151 	 *	Have we downed the interface. We handle IFF_UP ourselves
4152 	 *	according to user attempts to set it, rather than blindly
4153 	 *	setting it.
4154 	 */
4155 
4156 	ret = 0;
4157 	if ((old_flags ^ flags) & IFF_UP) {	/* Bit is different  ? */
4158 		ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
4159 
4160 		if (!ret)
4161 			dev_set_rx_mode(dev);
4162 	}
4163 
4164 	if (dev->flags & IFF_UP &&
4165 	    ((old_flags ^ dev->flags) & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
4166 					  IFF_VOLATILE)))
4167 		call_netdevice_notifiers(NETDEV_CHANGE, dev);
4168 
4169 	if ((flags ^ dev->gflags) & IFF_PROMISC) {
4170 		int inc = (flags & IFF_PROMISC) ? 1 : -1;
4171 
4172 		dev->gflags ^= IFF_PROMISC;
4173 		dev_set_promiscuity(dev, inc);
4174 	}
4175 
4176 	/* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
4177 	   is important. Some (broken) drivers set IFF_PROMISC, when
4178 	   IFF_ALLMULTI is requested not asking us and not reporting.
4179 	 */
4180 	if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
4181 		int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
4182 
4183 		dev->gflags ^= IFF_ALLMULTI;
4184 		dev_set_allmulti(dev, inc);
4185 	}
4186 
4187 	/* Exclude state transition flags, already notified */
4188 	changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
4189 	if (changes)
4190 		rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
4191 
4192 	return ret;
4193 }
4194 EXPORT_SYMBOL(dev_change_flags);
4195 
4196 /**
4197  *	dev_set_mtu - Change maximum transfer unit
4198  *	@dev: device
4199  *	@new_mtu: new transfer unit
4200  *
4201  *	Change the maximum transfer size of the network device.
4202  */
4203 int dev_set_mtu(struct net_device *dev, int new_mtu)
4204 {
4205 	const struct net_device_ops *ops = dev->netdev_ops;
4206 	int err;
4207 
4208 	if (new_mtu == dev->mtu)
4209 		return 0;
4210 
4211 	/*	MTU must be positive.	 */
4212 	if (new_mtu < 0)
4213 		return -EINVAL;
4214 
4215 	if (!netif_device_present(dev))
4216 		return -ENODEV;
4217 
4218 	err = 0;
4219 	if (ops->ndo_change_mtu)
4220 		err = ops->ndo_change_mtu(dev, new_mtu);
4221 	else
4222 		dev->mtu = new_mtu;
4223 
4224 	if (!err && dev->flags & IFF_UP)
4225 		call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
4226 	return err;
4227 }
4228 EXPORT_SYMBOL(dev_set_mtu);
4229 
4230 /**
4231  *	dev_set_mac_address - Change Media Access Control Address
4232  *	@dev: device
4233  *	@sa: new address
4234  *
4235  *	Change the hardware (MAC) address of the device
4236  */
4237 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
4238 {
4239 	const struct net_device_ops *ops = dev->netdev_ops;
4240 	int err;
4241 
4242 	if (!ops->ndo_set_mac_address)
4243 		return -EOPNOTSUPP;
4244 	if (sa->sa_family != dev->type)
4245 		return -EINVAL;
4246 	if (!netif_device_present(dev))
4247 		return -ENODEV;
4248 	err = ops->ndo_set_mac_address(dev, sa);
4249 	if (!err)
4250 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4251 	return err;
4252 }
4253 EXPORT_SYMBOL(dev_set_mac_address);
4254 
4255 /*
4256  *	Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
4257  */
4258 static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
4259 {
4260 	int err;
4261 	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
4262 
4263 	if (!dev)
4264 		return -ENODEV;
4265 
4266 	switch (cmd) {
4267 	case SIOCGIFFLAGS:	/* Get interface flags */
4268 		ifr->ifr_flags = (short) dev_get_flags(dev);
4269 		return 0;
4270 
4271 	case SIOCGIFMETRIC:	/* Get the metric on the interface
4272 				   (currently unused) */
4273 		ifr->ifr_metric = 0;
4274 		return 0;
4275 
4276 	case SIOCGIFMTU:	/* Get the MTU of a device */
4277 		ifr->ifr_mtu = dev->mtu;
4278 		return 0;
4279 
4280 	case SIOCGIFHWADDR:
4281 		if (!dev->addr_len)
4282 			memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
4283 		else
4284 			memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
4285 			       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4286 		ifr->ifr_hwaddr.sa_family = dev->type;
4287 		return 0;
4288 
4289 	case SIOCGIFSLAVE:
4290 		err = -EINVAL;
4291 		break;
4292 
4293 	case SIOCGIFMAP:
4294 		ifr->ifr_map.mem_start = dev->mem_start;
4295 		ifr->ifr_map.mem_end   = dev->mem_end;
4296 		ifr->ifr_map.base_addr = dev->base_addr;
4297 		ifr->ifr_map.irq       = dev->irq;
4298 		ifr->ifr_map.dma       = dev->dma;
4299 		ifr->ifr_map.port      = dev->if_port;
4300 		return 0;
4301 
4302 	case SIOCGIFINDEX:
4303 		ifr->ifr_ifindex = dev->ifindex;
4304 		return 0;
4305 
4306 	case SIOCGIFTXQLEN:
4307 		ifr->ifr_qlen = dev->tx_queue_len;
4308 		return 0;
4309 
4310 	default:
4311 		/* dev_ioctl() should ensure this case
4312 		 * is never reached
4313 		 */
4314 		WARN_ON(1);
4315 		err = -EINVAL;
4316 		break;
4317 
4318 	}
4319 	return err;
4320 }
4321 
4322 /*
4323  *	Perform the SIOCxIFxxx calls, inside rtnl_lock()
4324  */
4325 static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4326 {
4327 	int err;
4328 	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
4329 	const struct net_device_ops *ops;
4330 
4331 	if (!dev)
4332 		return -ENODEV;
4333 
4334 	ops = dev->netdev_ops;
4335 
4336 	switch (cmd) {
4337 	case SIOCSIFFLAGS:	/* Set interface flags */
4338 		return dev_change_flags(dev, ifr->ifr_flags);
4339 
4340 	case SIOCSIFMETRIC:	/* Set the metric on the interface
4341 				   (currently unused) */
4342 		return -EOPNOTSUPP;
4343 
4344 	case SIOCSIFMTU:	/* Set the MTU of a device */
4345 		return dev_set_mtu(dev, ifr->ifr_mtu);
4346 
4347 	case SIOCSIFHWADDR:
4348 		return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
4349 
4350 	case SIOCSIFHWBROADCAST:
4351 		if (ifr->ifr_hwaddr.sa_family != dev->type)
4352 			return -EINVAL;
4353 		memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
4354 		       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4355 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4356 		return 0;
4357 
4358 	case SIOCSIFMAP:
4359 		if (ops->ndo_set_config) {
4360 			if (!netif_device_present(dev))
4361 				return -ENODEV;
4362 			return ops->ndo_set_config(dev, &ifr->ifr_map);
4363 		}
4364 		return -EOPNOTSUPP;
4365 
4366 	case SIOCADDMULTI:
4367 		if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
4368 		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4369 			return -EINVAL;
4370 		if (!netif_device_present(dev))
4371 			return -ENODEV;
4372 		return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
4373 				  dev->addr_len, 1);
4374 
4375 	case SIOCDELMULTI:
4376 		if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
4377 		    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4378 			return -EINVAL;
4379 		if (!netif_device_present(dev))
4380 			return -ENODEV;
4381 		return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
4382 				     dev->addr_len, 1);
4383 
4384 	case SIOCSIFTXQLEN:
4385 		if (ifr->ifr_qlen < 0)
4386 			return -EINVAL;
4387 		dev->tx_queue_len = ifr->ifr_qlen;
4388 		return 0;
4389 
4390 	case SIOCSIFNAME:
4391 		ifr->ifr_newname[IFNAMSIZ-1] = '\0';
4392 		return dev_change_name(dev, ifr->ifr_newname);
4393 
4394 	/*
4395 	 *	Unknown or private ioctl
4396 	 */
4397 	default:
4398 		if ((cmd >= SIOCDEVPRIVATE &&
4399 		    cmd <= SIOCDEVPRIVATE + 15) ||
4400 		    cmd == SIOCBONDENSLAVE ||
4401 		    cmd == SIOCBONDRELEASE ||
4402 		    cmd == SIOCBONDSETHWADDR ||
4403 		    cmd == SIOCBONDSLAVEINFOQUERY ||
4404 		    cmd == SIOCBONDINFOQUERY ||
4405 		    cmd == SIOCBONDCHANGEACTIVE ||
4406 		    cmd == SIOCGMIIPHY ||
4407 		    cmd == SIOCGMIIREG ||
4408 		    cmd == SIOCSMIIREG ||
4409 		    cmd == SIOCBRADDIF ||
4410 		    cmd == SIOCBRDELIF ||
4411 		    cmd == SIOCSHWTSTAMP ||
4412 		    cmd == SIOCWANDEV) {
4413 			err = -EOPNOTSUPP;
4414 			if (ops->ndo_do_ioctl) {
4415 				if (netif_device_present(dev))
4416 					err = ops->ndo_do_ioctl(dev, ifr, cmd);
4417 				else
4418 					err = -ENODEV;
4419 			}
4420 		} else
4421 			err = -EINVAL;
4422 
4423 	}
4424 	return err;
4425 }
4426 
4427 /*
4428  *	This function handles all "interface"-type I/O control requests. The actual
4429  *	'doing' part of this is dev_ifsioc above.
4430  */
4431 
4432 /**
4433  *	dev_ioctl	-	network device ioctl
4434  *	@net: the applicable net namespace
4435  *	@cmd: command to issue
4436  *	@arg: pointer to a struct ifreq in user space
4437  *
4438  *	Issue ioctl functions to devices. This is normally called by the
4439  *	user space syscall interfaces but can sometimes be useful for
4440  *	other purposes. The return value is the return from the syscall if
4441  *	positive or a negative errno code on error.
4442  */
4443 
4444 int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
4445 {
4446 	struct ifreq ifr;
4447 	int ret;
4448 	char *colon;
4449 
4450 	/* One special case: SIOCGIFCONF takes ifconf argument
4451 	   and requires shared lock, because it sleeps writing
4452 	   to user space.
4453 	 */
4454 
4455 	if (cmd == SIOCGIFCONF) {
4456 		rtnl_lock();
4457 		ret = dev_ifconf(net, (char __user *) arg);
4458 		rtnl_unlock();
4459 		return ret;
4460 	}
4461 	if (cmd == SIOCGIFNAME)
4462 		return dev_ifname(net, (struct ifreq __user *)arg);
4463 
4464 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
4465 		return -EFAULT;
4466 
4467 	ifr.ifr_name[IFNAMSIZ-1] = 0;
4468 
4469 	colon = strchr(ifr.ifr_name, ':');
4470 	if (colon)
4471 		*colon = 0;
4472 
4473 	/*
4474 	 *	See which interface the caller is talking about.
4475 	 */
4476 
4477 	switch (cmd) {
4478 	/*
4479 	 *	These ioctl calls:
4480 	 *	- can be done by all.
4481 	 *	- atomic and do not require locking.
4482 	 *	- return a value
4483 	 */
4484 	case SIOCGIFFLAGS:
4485 	case SIOCGIFMETRIC:
4486 	case SIOCGIFMTU:
4487 	case SIOCGIFHWADDR:
4488 	case SIOCGIFSLAVE:
4489 	case SIOCGIFMAP:
4490 	case SIOCGIFINDEX:
4491 	case SIOCGIFTXQLEN:
4492 		dev_load(net, ifr.ifr_name);
4493 		read_lock(&dev_base_lock);
4494 		ret = dev_ifsioc_locked(net, &ifr, cmd);
4495 		read_unlock(&dev_base_lock);
4496 		if (!ret) {
4497 			if (colon)
4498 				*colon = ':';
4499 			if (copy_to_user(arg, &ifr,
4500 					 sizeof(struct ifreq)))
4501 				ret = -EFAULT;
4502 		}
4503 		return ret;
4504 
4505 	case SIOCETHTOOL:
4506 		dev_load(net, ifr.ifr_name);
4507 		rtnl_lock();
4508 		ret = dev_ethtool(net, &ifr);
4509 		rtnl_unlock();
4510 		if (!ret) {
4511 			if (colon)
4512 				*colon = ':';
4513 			if (copy_to_user(arg, &ifr,
4514 					 sizeof(struct ifreq)))
4515 				ret = -EFAULT;
4516 		}
4517 		return ret;
4518 
4519 	/*
4520 	 *	These ioctl calls:
4521 	 *	- require superuser power.
4522 	 *	- require strict serialization.
4523 	 *	- return a value
4524 	 */
4525 	case SIOCGMIIPHY:
4526 	case SIOCGMIIREG:
4527 	case SIOCSIFNAME:
4528 		if (!capable(CAP_NET_ADMIN))
4529 			return -EPERM;
4530 		dev_load(net, ifr.ifr_name);
4531 		rtnl_lock();
4532 		ret = dev_ifsioc(net, &ifr, cmd);
4533 		rtnl_unlock();
4534 		if (!ret) {
4535 			if (colon)
4536 				*colon = ':';
4537 			if (copy_to_user(arg, &ifr,
4538 					 sizeof(struct ifreq)))
4539 				ret = -EFAULT;
4540 		}
4541 		return ret;
4542 
4543 	/*
4544 	 *	These ioctl calls:
4545 	 *	- require superuser power.
4546 	 *	- require strict serialization.
4547 	 *	- do not return a value
4548 	 */
4549 	case SIOCSIFFLAGS:
4550 	case SIOCSIFMETRIC:
4551 	case SIOCSIFMTU:
4552 	case SIOCSIFMAP:
4553 	case SIOCSIFHWADDR:
4554 	case SIOCSIFSLAVE:
4555 	case SIOCADDMULTI:
4556 	case SIOCDELMULTI:
4557 	case SIOCSIFHWBROADCAST:
4558 	case SIOCSIFTXQLEN:
4559 	case SIOCSMIIREG:
4560 	case SIOCBONDENSLAVE:
4561 	case SIOCBONDRELEASE:
4562 	case SIOCBONDSETHWADDR:
4563 	case SIOCBONDCHANGEACTIVE:
4564 	case SIOCBRADDIF:
4565 	case SIOCBRDELIF:
4566 	case SIOCSHWTSTAMP:
4567 		if (!capable(CAP_NET_ADMIN))
4568 			return -EPERM;
4569 		/* fall through */
4570 	case SIOCBONDSLAVEINFOQUERY:
4571 	case SIOCBONDINFOQUERY:
4572 		dev_load(net, ifr.ifr_name);
4573 		rtnl_lock();
4574 		ret = dev_ifsioc(net, &ifr, cmd);
4575 		rtnl_unlock();
4576 		return ret;
4577 
4578 	case SIOCGIFMEM:
4579 		/* Get the per device memory space. We can add this but
4580 		 * currently do not support it */
4581 	case SIOCSIFMEM:
4582 		/* Set the per device memory buffer space.
4583 		 * Not applicable in our case */
4584 	case SIOCSIFLINK:
4585 		return -EINVAL;
4586 
4587 	/*
4588 	 *	Unknown or private ioctl.
4589 	 */
4590 	default:
4591 		if (cmd == SIOCWANDEV ||
4592 		    (cmd >= SIOCDEVPRIVATE &&
4593 		     cmd <= SIOCDEVPRIVATE + 15)) {
4594 			dev_load(net, ifr.ifr_name);
4595 			rtnl_lock();
4596 			ret = dev_ifsioc(net, &ifr, cmd);
4597 			rtnl_unlock();
4598 			if (!ret && copy_to_user(arg, &ifr,
4599 						 sizeof(struct ifreq)))
4600 				ret = -EFAULT;
4601 			return ret;
4602 		}
4603 		/* Take care of Wireless Extensions */
4604 		if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
4605 			return wext_handle_ioctl(net, &ifr, cmd, arg);
4606 		return -EINVAL;
4607 	}
4608 }
4609 
4610 
4611 /**
4612  *	dev_new_index	-	allocate an ifindex
4613  *	@net: the applicable net namespace
4614  *
4615  *	Returns a suitable unique value for a new device interface
4616  *	number.  The caller must hold the rtnl semaphore or the
4617  *	dev_base_lock to be sure it remains unique.
4618  */
4619 static int dev_new_index(struct net *net)
4620 {
4621 	static int ifindex;
4622 	for (;;) {
4623 		if (++ifindex <= 0)
4624 			ifindex = 1;
4625 		if (!__dev_get_by_index(net, ifindex))
4626 			return ifindex;
4627 	}
4628 }
4629 
4630 /* Delayed registration/unregisteration */
4631 static LIST_HEAD(net_todo_list);
4632 
4633 static void net_set_todo(struct net_device *dev)
4634 {
4635 	list_add_tail(&dev->todo_list, &net_todo_list);
4636 }
4637 
4638 static void rollback_registered(struct net_device *dev)
4639 {
4640 	BUG_ON(dev_boot_phase);
4641 	ASSERT_RTNL();
4642 
4643 	/* Some devices call without registering for initialization unwind. */
4644 	if (dev->reg_state == NETREG_UNINITIALIZED) {
4645 		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
4646 				  "was registered\n", dev->name, dev);
4647 
4648 		WARN_ON(1);
4649 		return;
4650 	}
4651 
4652 	BUG_ON(dev->reg_state != NETREG_REGISTERED);
4653 
4654 	/* If device is running, close it first. */
4655 	dev_close(dev);
4656 
4657 	/* And unlink it from device chain. */
4658 	unlist_netdevice(dev);
4659 
4660 	dev->reg_state = NETREG_UNREGISTERING;
4661 
4662 	synchronize_net();
4663 
4664 	/* Shutdown queueing discipline. */
4665 	dev_shutdown(dev);
4666 
4667 
4668 	/* Notify protocols, that we are about to destroy
4669 	   this device. They should clean all the things.
4670 	*/
4671 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4672 
4673 	/*
4674 	 *	Flush the unicast and multicast chains
4675 	 */
4676 	dev_unicast_flush(dev);
4677 	dev_addr_discard(dev);
4678 
4679 	if (dev->netdev_ops->ndo_uninit)
4680 		dev->netdev_ops->ndo_uninit(dev);
4681 
4682 	/* Notifier chain MUST detach us from master device. */
4683 	WARN_ON(dev->master);
4684 
4685 	/* Remove entries from kobject tree */
4686 	netdev_unregister_kobject(dev);
4687 
4688 	synchronize_net();
4689 
4690 	dev_put(dev);
4691 }
4692 
4693 static void __netdev_init_queue_locks_one(struct net_device *dev,
4694 					  struct netdev_queue *dev_queue,
4695 					  void *_unused)
4696 {
4697 	spin_lock_init(&dev_queue->_xmit_lock);
4698 	netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
4699 	dev_queue->xmit_lock_owner = -1;
4700 }
4701 
4702 static void netdev_init_queue_locks(struct net_device *dev)
4703 {
4704 	netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
4705 	__netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
4706 }
4707 
4708 unsigned long netdev_fix_features(unsigned long features, const char *name)
4709 {
4710 	/* Fix illegal SG+CSUM combinations. */
4711 	if ((features & NETIF_F_SG) &&
4712 	    !(features & NETIF_F_ALL_CSUM)) {
4713 		if (name)
4714 			printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
4715 			       "checksum feature.\n", name);
4716 		features &= ~NETIF_F_SG;
4717 	}
4718 
4719 	/* TSO requires that SG is present as well. */
4720 	if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
4721 		if (name)
4722 			printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
4723 			       "SG feature.\n", name);
4724 		features &= ~NETIF_F_TSO;
4725 	}
4726 
4727 	if (features & NETIF_F_UFO) {
4728 		if (!(features & NETIF_F_GEN_CSUM)) {
4729 			if (name)
4730 				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4731 				       "since no NETIF_F_HW_CSUM feature.\n",
4732 				       name);
4733 			features &= ~NETIF_F_UFO;
4734 		}
4735 
4736 		if (!(features & NETIF_F_SG)) {
4737 			if (name)
4738 				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4739 				       "since no NETIF_F_SG feature.\n", name);
4740 			features &= ~NETIF_F_UFO;
4741 		}
4742 	}
4743 
4744 	return features;
4745 }
4746 EXPORT_SYMBOL(netdev_fix_features);
4747 
4748 /**
4749  *	register_netdevice	- register a network device
4750  *	@dev: device to register
4751  *
4752  *	Take a completed network device structure and add it to the kernel
4753  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4754  *	chain. 0 is returned on success. A negative errno code is returned
4755  *	on a failure to set up the device, or if the name is a duplicate.
4756  *
4757  *	Callers must hold the rtnl semaphore. You may want
4758  *	register_netdev() instead of this.
4759  *
4760  *	BUGS:
4761  *	The locking appears insufficient to guarantee two parallel registers
4762  *	will not get the same name.
4763  */
4764 
4765 int register_netdevice(struct net_device *dev)
4766 {
4767 	struct hlist_head *head;
4768 	struct hlist_node *p;
4769 	int ret;
4770 	struct net *net = dev_net(dev);
4771 
4772 	BUG_ON(dev_boot_phase);
4773 	ASSERT_RTNL();
4774 
4775 	might_sleep();
4776 
4777 	/* When net_device's are persistent, this will be fatal. */
4778 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
4779 	BUG_ON(!net);
4780 
4781 	spin_lock_init(&dev->addr_list_lock);
4782 	netdev_set_addr_lockdep_class(dev);
4783 	netdev_init_queue_locks(dev);
4784 
4785 	dev->iflink = -1;
4786 
4787 	/* Init, if this function is available */
4788 	if (dev->netdev_ops->ndo_init) {
4789 		ret = dev->netdev_ops->ndo_init(dev);
4790 		if (ret) {
4791 			if (ret > 0)
4792 				ret = -EIO;
4793 			goto out;
4794 		}
4795 	}
4796 
4797 	if (!dev_valid_name(dev->name)) {
4798 		ret = -EINVAL;
4799 		goto err_uninit;
4800 	}
4801 
4802 	dev->ifindex = dev_new_index(net);
4803 	if (dev->iflink == -1)
4804 		dev->iflink = dev->ifindex;
4805 
4806 	/* Check for existence of name */
4807 	head = dev_name_hash(net, dev->name);
4808 	hlist_for_each(p, head) {
4809 		struct net_device *d
4810 			= hlist_entry(p, struct net_device, name_hlist);
4811 		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
4812 			ret = -EEXIST;
4813 			goto err_uninit;
4814 		}
4815 	}
4816 
4817 	/* Fix illegal checksum combinations */
4818 	if ((dev->features & NETIF_F_HW_CSUM) &&
4819 	    (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4820 		printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
4821 		       dev->name);
4822 		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
4823 	}
4824 
4825 	if ((dev->features & NETIF_F_NO_CSUM) &&
4826 	    (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4827 		printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
4828 		       dev->name);
4829 		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
4830 	}
4831 
4832 	dev->features = netdev_fix_features(dev->features, dev->name);
4833 
4834 	/* Enable software GSO if SG is supported. */
4835 	if (dev->features & NETIF_F_SG)
4836 		dev->features |= NETIF_F_GSO;
4837 
4838 	netdev_initialize_kobject(dev);
4839 	ret = netdev_register_kobject(dev);
4840 	if (ret)
4841 		goto err_uninit;
4842 	dev->reg_state = NETREG_REGISTERED;
4843 
4844 	/*
4845 	 *	Default initial state at registry is that the
4846 	 *	device is present.
4847 	 */
4848 
4849 	set_bit(__LINK_STATE_PRESENT, &dev->state);
4850 
4851 	dev_init_scheduler(dev);
4852 	dev_hold(dev);
4853 	list_netdevice(dev);
4854 
4855 	/* Notify protocols, that a new device appeared. */
4856 	ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
4857 	ret = notifier_to_errno(ret);
4858 	if (ret) {
4859 		rollback_registered(dev);
4860 		dev->reg_state = NETREG_UNREGISTERED;
4861 	}
4862 
4863 out:
4864 	return ret;
4865 
4866 err_uninit:
4867 	if (dev->netdev_ops->ndo_uninit)
4868 		dev->netdev_ops->ndo_uninit(dev);
4869 	goto out;
4870 }
4871 EXPORT_SYMBOL(register_netdevice);
4872 
4873 /**
4874  *	init_dummy_netdev	- init a dummy network device for NAPI
4875  *	@dev: device to init
4876  *
4877  *	This takes a network device structure and initialize the minimum
4878  *	amount of fields so it can be used to schedule NAPI polls without
4879  *	registering a full blown interface. This is to be used by drivers
4880  *	that need to tie several hardware interfaces to a single NAPI
4881  *	poll scheduler due to HW limitations.
4882  */
4883 int init_dummy_netdev(struct net_device *dev)
4884 {
4885 	/* Clear everything. Note we don't initialize spinlocks
4886 	 * are they aren't supposed to be taken by any of the
4887 	 * NAPI code and this dummy netdev is supposed to be
4888 	 * only ever used for NAPI polls
4889 	 */
4890 	memset(dev, 0, sizeof(struct net_device));
4891 
4892 	/* make sure we BUG if trying to hit standard
4893 	 * register/unregister code path
4894 	 */
4895 	dev->reg_state = NETREG_DUMMY;
4896 
4897 	/* initialize the ref count */
4898 	atomic_set(&dev->refcnt, 1);
4899 
4900 	/* NAPI wants this */
4901 	INIT_LIST_HEAD(&dev->napi_list);
4902 
4903 	/* a dummy interface is started by default */
4904 	set_bit(__LINK_STATE_PRESENT, &dev->state);
4905 	set_bit(__LINK_STATE_START, &dev->state);
4906 
4907 	return 0;
4908 }
4909 EXPORT_SYMBOL_GPL(init_dummy_netdev);
4910 
4911 
4912 /**
4913  *	register_netdev	- register a network device
4914  *	@dev: device to register
4915  *
4916  *	Take a completed network device structure and add it to the kernel
4917  *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4918  *	chain. 0 is returned on success. A negative errno code is returned
4919  *	on a failure to set up the device, or if the name is a duplicate.
4920  *
4921  *	This is a wrapper around register_netdevice that takes the rtnl semaphore
4922  *	and expands the device name if you passed a format string to
4923  *	alloc_netdev.
4924  */
4925 int register_netdev(struct net_device *dev)
4926 {
4927 	int err;
4928 
4929 	rtnl_lock();
4930 
4931 	/*
4932 	 * If the name is a format string the caller wants us to do a
4933 	 * name allocation.
4934 	 */
4935 	if (strchr(dev->name, '%')) {
4936 		err = dev_alloc_name(dev, dev->name);
4937 		if (err < 0)
4938 			goto out;
4939 	}
4940 
4941 	err = register_netdevice(dev);
4942 out:
4943 	rtnl_unlock();
4944 	return err;
4945 }
4946 EXPORT_SYMBOL(register_netdev);
4947 
4948 /*
4949  * netdev_wait_allrefs - wait until all references are gone.
4950  *
4951  * This is called when unregistering network devices.
4952  *
4953  * Any protocol or device that holds a reference should register
4954  * for netdevice notification, and cleanup and put back the
4955  * reference if they receive an UNREGISTER event.
4956  * We can get stuck here if buggy protocols don't correctly
4957  * call dev_put.
4958  */
4959 static void netdev_wait_allrefs(struct net_device *dev)
4960 {
4961 	unsigned long rebroadcast_time, warning_time;
4962 
4963 	rebroadcast_time = warning_time = jiffies;
4964 	while (atomic_read(&dev->refcnt) != 0) {
4965 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
4966 			rtnl_lock();
4967 
4968 			/* Rebroadcast unregister notification */
4969 			call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4970 
4971 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
4972 				     &dev->state)) {
4973 				/* We must not have linkwatch events
4974 				 * pending on unregister. If this
4975 				 * happens, we simply run the queue
4976 				 * unscheduled, resulting in a noop
4977 				 * for this device.
4978 				 */
4979 				linkwatch_run_queue();
4980 			}
4981 
4982 			__rtnl_unlock();
4983 
4984 			rebroadcast_time = jiffies;
4985 		}
4986 
4987 		msleep(250);
4988 
4989 		if (time_after(jiffies, warning_time + 10 * HZ)) {
4990 			printk(KERN_EMERG "unregister_netdevice: "
4991 			       "waiting for %s to become free. Usage "
4992 			       "count = %d\n",
4993 			       dev->name, atomic_read(&dev->refcnt));
4994 			warning_time = jiffies;
4995 		}
4996 	}
4997 }
4998 
4999 /* The sequence is:
5000  *
5001  *	rtnl_lock();
5002  *	...
5003  *	register_netdevice(x1);
5004  *	register_netdevice(x2);
5005  *	...
5006  *	unregister_netdevice(y1);
5007  *	unregister_netdevice(y2);
5008  *      ...
5009  *	rtnl_unlock();
5010  *	free_netdev(y1);
5011  *	free_netdev(y2);
5012  *
5013  * We are invoked by rtnl_unlock().
5014  * This allows us to deal with problems:
5015  * 1) We can delete sysfs objects which invoke hotplug
5016  *    without deadlocking with linkwatch via keventd.
5017  * 2) Since we run with the RTNL semaphore not held, we can sleep
5018  *    safely in order to wait for the netdev refcnt to drop to zero.
5019  *
5020  * We must not return until all unregister events added during
5021  * the interval the lock was held have been completed.
5022  */
5023 void netdev_run_todo(void)
5024 {
5025 	struct list_head list;
5026 
5027 	/* Snapshot list, allow later requests */
5028 	list_replace_init(&net_todo_list, &list);
5029 
5030 	__rtnl_unlock();
5031 
5032 	while (!list_empty(&list)) {
5033 		struct net_device *dev
5034 			= list_entry(list.next, struct net_device, todo_list);
5035 		list_del(&dev->todo_list);
5036 
5037 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
5038 			printk(KERN_ERR "network todo '%s' but state %d\n",
5039 			       dev->name, dev->reg_state);
5040 			dump_stack();
5041 			continue;
5042 		}
5043 
5044 		dev->reg_state = NETREG_UNREGISTERED;
5045 
5046 		on_each_cpu(flush_backlog, dev, 1);
5047 
5048 		netdev_wait_allrefs(dev);
5049 
5050 		/* paranoia */
5051 		BUG_ON(atomic_read(&dev->refcnt));
5052 		WARN_ON(dev->ip_ptr);
5053 		WARN_ON(dev->ip6_ptr);
5054 		WARN_ON(dev->dn_ptr);
5055 
5056 		if (dev->destructor)
5057 			dev->destructor(dev);
5058 
5059 		/* Free network device */
5060 		kobject_put(&dev->dev.kobj);
5061 	}
5062 }
5063 
5064 /**
5065  *	dev_get_stats	- get network device statistics
5066  *	@dev: device to get statistics from
5067  *
5068  *	Get network statistics from device. The device driver may provide
5069  *	its own method by setting dev->netdev_ops->get_stats; otherwise
5070  *	the internal statistics structure is used.
5071  */
5072 const struct net_device_stats *dev_get_stats(struct net_device *dev)
5073 {
5074 	const struct net_device_ops *ops = dev->netdev_ops;
5075 
5076 	if (ops->ndo_get_stats)
5077 		return ops->ndo_get_stats(dev);
5078 	else {
5079 		unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
5080 		struct net_device_stats *stats = &dev->stats;
5081 		unsigned int i;
5082 		struct netdev_queue *txq;
5083 
5084 		for (i = 0; i < dev->num_tx_queues; i++) {
5085 			txq = netdev_get_tx_queue(dev, i);
5086 			tx_bytes   += txq->tx_bytes;
5087 			tx_packets += txq->tx_packets;
5088 			tx_dropped += txq->tx_dropped;
5089 		}
5090 		if (tx_bytes || tx_packets || tx_dropped) {
5091 			stats->tx_bytes   = tx_bytes;
5092 			stats->tx_packets = tx_packets;
5093 			stats->tx_dropped = tx_dropped;
5094 		}
5095 		return stats;
5096 	}
5097 }
5098 EXPORT_SYMBOL(dev_get_stats);
5099 
5100 static void netdev_init_one_queue(struct net_device *dev,
5101 				  struct netdev_queue *queue,
5102 				  void *_unused)
5103 {
5104 	queue->dev = dev;
5105 }
5106 
5107 static void netdev_init_queues(struct net_device *dev)
5108 {
5109 	netdev_init_one_queue(dev, &dev->rx_queue, NULL);
5110 	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
5111 	spin_lock_init(&dev->tx_global_lock);
5112 }
5113 
5114 /**
5115  *	alloc_netdev_mq - allocate network device
5116  *	@sizeof_priv:	size of private data to allocate space for
5117  *	@name:		device name format string
5118  *	@setup:		callback to initialize device
5119  *	@queue_count:	the number of subqueues to allocate
5120  *
5121  *	Allocates a struct net_device with private data area for driver use
5122  *	and performs basic initialization.  Also allocates subquue structs
5123  *	for each queue on the device at the end of the netdevice.
5124  */
5125 struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5126 		void (*setup)(struct net_device *), unsigned int queue_count)
5127 {
5128 	struct netdev_queue *tx;
5129 	struct net_device *dev;
5130 	size_t alloc_size;
5131 	struct net_device *p;
5132 
5133 	BUG_ON(strlen(name) >= sizeof(dev->name));
5134 
5135 	alloc_size = sizeof(struct net_device);
5136 	if (sizeof_priv) {
5137 		/* ensure 32-byte alignment of private area */
5138 		alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
5139 		alloc_size += sizeof_priv;
5140 	}
5141 	/* ensure 32-byte alignment of whole construct */
5142 	alloc_size += NETDEV_ALIGN - 1;
5143 
5144 	p = kzalloc(alloc_size, GFP_KERNEL);
5145 	if (!p) {
5146 		printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
5147 		return NULL;
5148 	}
5149 
5150 	tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
5151 	if (!tx) {
5152 		printk(KERN_ERR "alloc_netdev: Unable to allocate "
5153 		       "tx qdiscs.\n");
5154 		goto free_p;
5155 	}
5156 
5157 	dev = PTR_ALIGN(p, NETDEV_ALIGN);
5158 	dev->padded = (char *)dev - (char *)p;
5159 
5160 	if (dev_addr_init(dev))
5161 		goto free_tx;
5162 
5163 	dev_unicast_init(dev);
5164 
5165 	dev_net_set(dev, &init_net);
5166 
5167 	dev->_tx = tx;
5168 	dev->num_tx_queues = queue_count;
5169 	dev->real_num_tx_queues = queue_count;
5170 
5171 	dev->gso_max_size = GSO_MAX_SIZE;
5172 
5173 	netdev_init_queues(dev);
5174 
5175 	INIT_LIST_HEAD(&dev->napi_list);
5176 	dev->priv_flags = IFF_XMIT_DST_RELEASE;
5177 	setup(dev);
5178 	strcpy(dev->name, name);
5179 	return dev;
5180 
5181 free_tx:
5182 	kfree(tx);
5183 
5184 free_p:
5185 	kfree(p);
5186 	return NULL;
5187 }
5188 EXPORT_SYMBOL(alloc_netdev_mq);
5189 
5190 /**
5191  *	free_netdev - free network device
5192  *	@dev: device
5193  *
5194  *	This function does the last stage of destroying an allocated device
5195  * 	interface. The reference to the device object is released.
5196  *	If this is the last reference then it will be freed.
5197  */
5198 void free_netdev(struct net_device *dev)
5199 {
5200 	struct napi_struct *p, *n;
5201 
5202 	release_net(dev_net(dev));
5203 
5204 	kfree(dev->_tx);
5205 
5206 	/* Flush device addresses */
5207 	dev_addr_flush(dev);
5208 
5209 	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
5210 		netif_napi_del(p);
5211 
5212 	/*  Compatibility with error handling in drivers */
5213 	if (dev->reg_state == NETREG_UNINITIALIZED) {
5214 		kfree((char *)dev - dev->padded);
5215 		return;
5216 	}
5217 
5218 	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
5219 	dev->reg_state = NETREG_RELEASED;
5220 
5221 	/* will free via device release */
5222 	put_device(&dev->dev);
5223 }
5224 EXPORT_SYMBOL(free_netdev);
5225 
5226 /**
5227  *	synchronize_net -  Synchronize with packet receive processing
5228  *
5229  *	Wait for packets currently being received to be done.
5230  *	Does not block later packets from starting.
5231  */
5232 void synchronize_net(void)
5233 {
5234 	might_sleep();
5235 	synchronize_rcu();
5236 }
5237 EXPORT_SYMBOL(synchronize_net);
5238 
5239 /**
5240  *	unregister_netdevice - remove device from the kernel
5241  *	@dev: device
5242  *
5243  *	This function shuts down a device interface and removes it
5244  *	from the kernel tables.
5245  *
5246  *	Callers must hold the rtnl semaphore.  You may want
5247  *	unregister_netdev() instead of this.
5248  */
5249 
5250 void unregister_netdevice(struct net_device *dev)
5251 {
5252 	ASSERT_RTNL();
5253 
5254 	rollback_registered(dev);
5255 	/* Finish processing unregister after unlock */
5256 	net_set_todo(dev);
5257 }
5258 EXPORT_SYMBOL(unregister_netdevice);
5259 
5260 /**
5261  *	unregister_netdev - remove device from the kernel
5262  *	@dev: device
5263  *
5264  *	This function shuts down a device interface and removes it
5265  *	from the kernel tables.
5266  *
5267  *	This is just a wrapper for unregister_netdevice that takes
5268  *	the rtnl semaphore.  In general you want to use this and not
5269  *	unregister_netdevice.
5270  */
5271 void unregister_netdev(struct net_device *dev)
5272 {
5273 	rtnl_lock();
5274 	unregister_netdevice(dev);
5275 	rtnl_unlock();
5276 }
5277 EXPORT_SYMBOL(unregister_netdev);
5278 
5279 /**
5280  *	dev_change_net_namespace - move device to different nethost namespace
5281  *	@dev: device
5282  *	@net: network namespace
5283  *	@pat: If not NULL name pattern to try if the current device name
5284  *	      is already taken in the destination network namespace.
5285  *
5286  *	This function shuts down a device interface and moves it
5287  *	to a new network namespace. On success 0 is returned, on
5288  *	a failure a netagive errno code is returned.
5289  *
5290  *	Callers must hold the rtnl semaphore.
5291  */
5292 
5293 int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
5294 {
5295 	char buf[IFNAMSIZ];
5296 	const char *destname;
5297 	int err;
5298 
5299 	ASSERT_RTNL();
5300 
5301 	/* Don't allow namespace local devices to be moved. */
5302 	err = -EINVAL;
5303 	if (dev->features & NETIF_F_NETNS_LOCAL)
5304 		goto out;
5305 
5306 #ifdef CONFIG_SYSFS
5307 	/* Don't allow real devices to be moved when sysfs
5308 	 * is enabled.
5309 	 */
5310 	err = -EINVAL;
5311 	if (dev->dev.parent)
5312 		goto out;
5313 #endif
5314 
5315 	/* Ensure the device has been registrered */
5316 	err = -EINVAL;
5317 	if (dev->reg_state != NETREG_REGISTERED)
5318 		goto out;
5319 
5320 	/* Get out if there is nothing todo */
5321 	err = 0;
5322 	if (net_eq(dev_net(dev), net))
5323 		goto out;
5324 
5325 	/* Pick the destination device name, and ensure
5326 	 * we can use it in the destination network namespace.
5327 	 */
5328 	err = -EEXIST;
5329 	destname = dev->name;
5330 	if (__dev_get_by_name(net, destname)) {
5331 		/* We get here if we can't use the current device name */
5332 		if (!pat)
5333 			goto out;
5334 		if (!dev_valid_name(pat))
5335 			goto out;
5336 		if (strchr(pat, '%')) {
5337 			if (__dev_alloc_name(net, pat, buf) < 0)
5338 				goto out;
5339 			destname = buf;
5340 		} else
5341 			destname = pat;
5342 		if (__dev_get_by_name(net, destname))
5343 			goto out;
5344 	}
5345 
5346 	/*
5347 	 * And now a mini version of register_netdevice unregister_netdevice.
5348 	 */
5349 
5350 	/* If device is running close it first. */
5351 	dev_close(dev);
5352 
5353 	/* And unlink it from device chain */
5354 	err = -ENODEV;
5355 	unlist_netdevice(dev);
5356 
5357 	synchronize_net();
5358 
5359 	/* Shutdown queueing discipline. */
5360 	dev_shutdown(dev);
5361 
5362 	/* Notify protocols, that we are about to destroy
5363 	   this device. They should clean all the things.
5364 	*/
5365 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5366 
5367 	/*
5368 	 *	Flush the unicast and multicast chains
5369 	 */
5370 	dev_unicast_flush(dev);
5371 	dev_addr_discard(dev);
5372 
5373 	netdev_unregister_kobject(dev);
5374 
5375 	/* Actually switch the network namespace */
5376 	dev_net_set(dev, net);
5377 
5378 	/* Assign the new device name */
5379 	if (destname != dev->name)
5380 		strcpy(dev->name, destname);
5381 
5382 	/* If there is an ifindex conflict assign a new one */
5383 	if (__dev_get_by_index(net, dev->ifindex)) {
5384 		int iflink = (dev->iflink == dev->ifindex);
5385 		dev->ifindex = dev_new_index(net);
5386 		if (iflink)
5387 			dev->iflink = dev->ifindex;
5388 	}
5389 
5390 	/* Fixup kobjects */
5391 	err = netdev_register_kobject(dev);
5392 	WARN_ON(err);
5393 
5394 	/* Add the device back in the hashes */
5395 	list_netdevice(dev);
5396 
5397 	/* Notify protocols, that a new device appeared. */
5398 	call_netdevice_notifiers(NETDEV_REGISTER, dev);
5399 
5400 	synchronize_net();
5401 	err = 0;
5402 out:
5403 	return err;
5404 }
5405 EXPORT_SYMBOL_GPL(dev_change_net_namespace);
5406 
5407 static int dev_cpu_callback(struct notifier_block *nfb,
5408 			    unsigned long action,
5409 			    void *ocpu)
5410 {
5411 	struct sk_buff **list_skb;
5412 	struct Qdisc **list_net;
5413 	struct sk_buff *skb;
5414 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
5415 	struct softnet_data *sd, *oldsd;
5416 
5417 	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
5418 		return NOTIFY_OK;
5419 
5420 	local_irq_disable();
5421 	cpu = smp_processor_id();
5422 	sd = &per_cpu(softnet_data, cpu);
5423 	oldsd = &per_cpu(softnet_data, oldcpu);
5424 
5425 	/* Find end of our completion_queue. */
5426 	list_skb = &sd->completion_queue;
5427 	while (*list_skb)
5428 		list_skb = &(*list_skb)->next;
5429 	/* Append completion queue from offline CPU. */
5430 	*list_skb = oldsd->completion_queue;
5431 	oldsd->completion_queue = NULL;
5432 
5433 	/* Find end of our output_queue. */
5434 	list_net = &sd->output_queue;
5435 	while (*list_net)
5436 		list_net = &(*list_net)->next_sched;
5437 	/* Append output queue from offline CPU. */
5438 	*list_net = oldsd->output_queue;
5439 	oldsd->output_queue = NULL;
5440 
5441 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
5442 	local_irq_enable();
5443 
5444 	/* Process offline CPU's input_pkt_queue */
5445 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
5446 		netif_rx(skb);
5447 
5448 	return NOTIFY_OK;
5449 }
5450 
5451 
5452 /**
5453  *	netdev_increment_features - increment feature set by one
5454  *	@all: current feature set
5455  *	@one: new feature set
5456  *	@mask: mask feature set
5457  *
5458  *	Computes a new feature set after adding a device with feature set
5459  *	@one to the master device with current feature set @all.  Will not
5460  *	enable anything that is off in @mask. Returns the new feature set.
5461  */
5462 unsigned long netdev_increment_features(unsigned long all, unsigned long one,
5463 					unsigned long mask)
5464 {
5465 	/* If device needs checksumming, downgrade to it. */
5466 	if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
5467 		all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
5468 	else if (mask & NETIF_F_ALL_CSUM) {
5469 		/* If one device supports v4/v6 checksumming, set for all. */
5470 		if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
5471 		    !(all & NETIF_F_GEN_CSUM)) {
5472 			all &= ~NETIF_F_ALL_CSUM;
5473 			all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
5474 		}
5475 
5476 		/* If one device supports hw checksumming, set for all. */
5477 		if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
5478 			all &= ~NETIF_F_ALL_CSUM;
5479 			all |= NETIF_F_HW_CSUM;
5480 		}
5481 	}
5482 
5483 	one |= NETIF_F_ALL_CSUM;
5484 
5485 	one |= all & NETIF_F_ONE_FOR_ALL;
5486 	all &= one | NETIF_F_LLTX | NETIF_F_GSO;
5487 	all |= one & mask & NETIF_F_ONE_FOR_ALL;
5488 
5489 	return all;
5490 }
5491 EXPORT_SYMBOL(netdev_increment_features);
5492 
5493 static struct hlist_head *netdev_create_hash(void)
5494 {
5495 	int i;
5496 	struct hlist_head *hash;
5497 
5498 	hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
5499 	if (hash != NULL)
5500 		for (i = 0; i < NETDEV_HASHENTRIES; i++)
5501 			INIT_HLIST_HEAD(&hash[i]);
5502 
5503 	return hash;
5504 }
5505 
5506 /* Initialize per network namespace state */
5507 static int __net_init netdev_init(struct net *net)
5508 {
5509 	INIT_LIST_HEAD(&net->dev_base_head);
5510 
5511 	net->dev_name_head = netdev_create_hash();
5512 	if (net->dev_name_head == NULL)
5513 		goto err_name;
5514 
5515 	net->dev_index_head = netdev_create_hash();
5516 	if (net->dev_index_head == NULL)
5517 		goto err_idx;
5518 
5519 	return 0;
5520 
5521 err_idx:
5522 	kfree(net->dev_name_head);
5523 err_name:
5524 	return -ENOMEM;
5525 }
5526 
5527 /**
5528  *	netdev_drivername - network driver for the device
5529  *	@dev: network device
5530  *	@buffer: buffer for resulting name
5531  *	@len: size of buffer
5532  *
5533  *	Determine network driver for device.
5534  */
5535 char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
5536 {
5537 	const struct device_driver *driver;
5538 	const struct device *parent;
5539 
5540 	if (len <= 0 || !buffer)
5541 		return buffer;
5542 	buffer[0] = 0;
5543 
5544 	parent = dev->dev.parent;
5545 
5546 	if (!parent)
5547 		return buffer;
5548 
5549 	driver = parent->driver;
5550 	if (driver && driver->name)
5551 		strlcpy(buffer, driver->name, len);
5552 	return buffer;
5553 }
5554 
5555 static void __net_exit netdev_exit(struct net *net)
5556 {
5557 	kfree(net->dev_name_head);
5558 	kfree(net->dev_index_head);
5559 }
5560 
5561 static struct pernet_operations __net_initdata netdev_net_ops = {
5562 	.init = netdev_init,
5563 	.exit = netdev_exit,
5564 };
5565 
5566 static void __net_exit default_device_exit(struct net *net)
5567 {
5568 	struct net_device *dev;
5569 	/*
5570 	 * Push all migratable of the network devices back to the
5571 	 * initial network namespace
5572 	 */
5573 	rtnl_lock();
5574 restart:
5575 	for_each_netdev(net, dev) {
5576 		int err;
5577 		char fb_name[IFNAMSIZ];
5578 
5579 		/* Ignore unmoveable devices (i.e. loopback) */
5580 		if (dev->features & NETIF_F_NETNS_LOCAL)
5581 			continue;
5582 
5583 		/* Delete virtual devices */
5584 		if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) {
5585 			dev->rtnl_link_ops->dellink(dev);
5586 			goto restart;
5587 		}
5588 
5589 		/* Push remaing network devices to init_net */
5590 		snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
5591 		err = dev_change_net_namespace(dev, &init_net, fb_name);
5592 		if (err) {
5593 			printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
5594 				__func__, dev->name, err);
5595 			BUG();
5596 		}
5597 		goto restart;
5598 	}
5599 	rtnl_unlock();
5600 }
5601 
5602 static struct pernet_operations __net_initdata default_device_ops = {
5603 	.exit = default_device_exit,
5604 };
5605 
5606 /*
5607  *	Initialize the DEV module. At boot time this walks the device list and
5608  *	unhooks any devices that fail to initialise (normally hardware not
5609  *	present) and leaves us with a valid list of present and active devices.
5610  *
5611  */
5612 
5613 /*
5614  *       This is called single threaded during boot, so no need
5615  *       to take the rtnl semaphore.
5616  */
5617 static int __init net_dev_init(void)
5618 {
5619 	int i, rc = -ENOMEM;
5620 
5621 	BUG_ON(!dev_boot_phase);
5622 
5623 	if (dev_proc_init())
5624 		goto out;
5625 
5626 	if (netdev_kobject_init())
5627 		goto out;
5628 
5629 	INIT_LIST_HEAD(&ptype_all);
5630 	for (i = 0; i < PTYPE_HASH_SIZE; i++)
5631 		INIT_LIST_HEAD(&ptype_base[i]);
5632 
5633 	if (register_pernet_subsys(&netdev_net_ops))
5634 		goto out;
5635 
5636 	/*
5637 	 *	Initialise the packet receive queues.
5638 	 */
5639 
5640 	for_each_possible_cpu(i) {
5641 		struct softnet_data *queue;
5642 
5643 		queue = &per_cpu(softnet_data, i);
5644 		skb_queue_head_init(&queue->input_pkt_queue);
5645 		queue->completion_queue = NULL;
5646 		INIT_LIST_HEAD(&queue->poll_list);
5647 
5648 		queue->backlog.poll = process_backlog;
5649 		queue->backlog.weight = weight_p;
5650 		queue->backlog.gro_list = NULL;
5651 		queue->backlog.gro_count = 0;
5652 	}
5653 
5654 	dev_boot_phase = 0;
5655 
5656 	/* The loopback device is special if any other network devices
5657 	 * is present in a network namespace the loopback device must
5658 	 * be present. Since we now dynamically allocate and free the
5659 	 * loopback device ensure this invariant is maintained by
5660 	 * keeping the loopback device as the first device on the
5661 	 * list of network devices.  Ensuring the loopback devices
5662 	 * is the first device that appears and the last network device
5663 	 * that disappears.
5664 	 */
5665 	if (register_pernet_device(&loopback_net_ops))
5666 		goto out;
5667 
5668 	if (register_pernet_device(&default_device_ops))
5669 		goto out;
5670 
5671 	open_softirq(NET_TX_SOFTIRQ, net_tx_action);
5672 	open_softirq(NET_RX_SOFTIRQ, net_rx_action);
5673 
5674 	hotcpu_notifier(dev_cpu_callback, 0);
5675 	dst_init();
5676 	dev_mcast_init();
5677 	rc = 0;
5678 out:
5679 	return rc;
5680 }
5681 
5682 subsys_initcall(net_dev_init);
5683 
5684 static int __init initialize_hashrnd(void)
5685 {
5686 	get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd));
5687 	return 0;
5688 }
5689 
5690 late_initcall_sync(initialize_hashrnd);
5691 
5692