xref: /linux/net/packet/af_packet.c (revision f3449bf31d352f70c80a7993c272a7854ae98086)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		PACKET - implements raw packet sockets.
7  *
8  * Authors:	Ross Biro
9  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
11  *
12  * Fixes:
13  *		Alan Cox	:	verify_area() now used correctly
14  *		Alan Cox	:	new skbuff lists, look ma no backlogs!
15  *		Alan Cox	:	tidied skbuff lists.
16  *		Alan Cox	:	Now uses generic datagram routines I
17  *					added. Also fixed the peek/read crash
18  *					from all old Linux datagram code.
19  *		Alan Cox	:	Uses the improved datagram code.
20  *		Alan Cox	:	Added NULL's for socket options.
21  *		Alan Cox	:	Re-commented the code.
22  *		Alan Cox	:	Use new kernel side addressing
23  *		Rob Janssen	:	Correct MTU usage.
24  *		Dave Platt	:	Counter leaks caused by incorrect
25  *					interrupt locking and some slightly
26  *					dubious gcc output. Can you read
27  *					compiler: it said _VOLATILE_
28  *	Richard Kooijman	:	Timestamp fixes.
29  *		Alan Cox	:	New buffers. Use sk->mac.raw.
30  *		Alan Cox	:	sendmsg/recvmsg support.
31  *		Alan Cox	:	Protocol setting support
32  *	Alexey Kuznetsov	:	Untied from IPv4 stack.
33  *	Cyrus Durgin		:	Fixed kerneld for kmod.
34  *	Michal Ostrowski        :       Module initialization cleanup.
35  *         Ulises Alonso        :       Frame number limit removal and
36  *                                      packet_set_ring memory leak.
37  *		Eric Biederman	:	Allow for > 8 byte hardware addresses.
38  *					The convention is that longer addresses
39  *					will simply extend the hardware address
40  *					byte arrays at the end of sockaddr_ll
41  *					and packet_mreq.
42  *		Johann Baudy	:	Added TX RING.
43  *
44  *		This program is free software; you can redistribute it and/or
45  *		modify it under the terms of the GNU General Public License
46  *		as published by the Free Software Foundation; either version
47  *		2 of the License, or (at your option) any later version.
48  *
49  */
50 
51 #include <linux/types.h>
52 #include <linux/mm.h>
53 #include <linux/capability.h>
54 #include <linux/fcntl.h>
55 #include <linux/socket.h>
56 #include <linux/in.h>
57 #include <linux/inet.h>
58 #include <linux/netdevice.h>
59 #include <linux/if_packet.h>
60 #include <linux/wireless.h>
61 #include <linux/kernel.h>
62 #include <linux/kmod.h>
63 #include <linux/slab.h>
64 #include <net/net_namespace.h>
65 #include <net/ip.h>
66 #include <net/protocol.h>
67 #include <linux/skbuff.h>
68 #include <net/sock.h>
69 #include <linux/errno.h>
70 #include <linux/timer.h>
71 #include <asm/system.h>
72 #include <asm/uaccess.h>
73 #include <asm/ioctls.h>
74 #include <asm/page.h>
75 #include <asm/cacheflush.h>
76 #include <asm/io.h>
77 #include <linux/proc_fs.h>
78 #include <linux/seq_file.h>
79 #include <linux/poll.h>
80 #include <linux/module.h>
81 #include <linux/init.h>
82 #include <linux/mutex.h>
83 #include <linux/if_vlan.h>
84 #include <linux/virtio_net.h>
85 #include <linux/errqueue.h>
86 #include <linux/net_tstamp.h>
87 
88 #ifdef CONFIG_INET
89 #include <net/inet_common.h>
90 #endif
91 
92 /*
93    Assumptions:
94    - if device has no dev->hard_header routine, it adds and removes ll header
95      inside itself. In this case ll header is invisible outside of device,
96      but higher levels still should reserve dev->hard_header_len.
97      Some devices are enough clever to reallocate skb, when header
98      will not fit to reserved space (tunnel), another ones are silly
99      (PPP).
100    - packet socket receives packets with pulled ll header,
101      so that SOCK_RAW should push it back.
102 
103 On receive:
104 -----------
105 
106 Incoming, dev->hard_header!=NULL
107    mac_header -> ll header
108    data       -> data
109 
110 Outgoing, dev->hard_header!=NULL
111    mac_header -> ll header
112    data       -> ll header
113 
114 Incoming, dev->hard_header==NULL
115    mac_header -> UNKNOWN position. It is very likely, that it points to ll
116 		 header.  PPP makes it, that is wrong, because introduce
117 		 assymetry between rx and tx paths.
118    data       -> data
119 
120 Outgoing, dev->hard_header==NULL
121    mac_header -> data. ll header is still not built!
122    data       -> data
123 
124 Resume
125   If dev->hard_header==NULL we are unlikely to restore sensible ll header.
126 
127 
128 On transmit:
129 ------------
130 
131 dev->hard_header != NULL
132    mac_header -> ll header
133    data       -> ll header
134 
135 dev->hard_header == NULL (ll header is added by device, we cannot control it)
136    mac_header -> data
137    data       -> data
138 
139    We should set nh.raw on output to correct posistion,
140    packet classifier depends on it.
141  */
142 
143 /* Private packet socket structures. */
144 
145 struct packet_mclist {
146 	struct packet_mclist	*next;
147 	int			ifindex;
148 	int			count;
149 	unsigned short		type;
150 	unsigned short		alen;
151 	unsigned char		addr[MAX_ADDR_LEN];
152 };
153 /* identical to struct packet_mreq except it has
154  * a longer address field.
155  */
156 struct packet_mreq_max {
157 	int		mr_ifindex;
158 	unsigned short	mr_type;
159 	unsigned short	mr_alen;
160 	unsigned char	mr_address[MAX_ADDR_LEN];
161 };
162 
163 static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
164 		int closing, int tx_ring);
165 
166 struct packet_ring_buffer {
167 	char			**pg_vec;
168 	unsigned int		head;
169 	unsigned int		frames_per_block;
170 	unsigned int		frame_size;
171 	unsigned int		frame_max;
172 
173 	unsigned int		pg_vec_order;
174 	unsigned int		pg_vec_pages;
175 	unsigned int		pg_vec_len;
176 
177 	atomic_t		pending;
178 };
179 
180 struct packet_sock;
181 static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
182 
183 static void packet_flush_mclist(struct sock *sk);
184 
185 struct packet_sock {
186 	/* struct sock has to be the first member of packet_sock */
187 	struct sock		sk;
188 	struct tpacket_stats	stats;
189 	struct packet_ring_buffer	rx_ring;
190 	struct packet_ring_buffer	tx_ring;
191 	int			copy_thresh;
192 	spinlock_t		bind_lock;
193 	struct mutex		pg_vec_lock;
194 	unsigned int		running:1,	/* prot_hook is attached*/
195 				auxdata:1,
196 				origdev:1,
197 				has_vnet_hdr:1;
198 	int			ifindex;	/* bound device		*/
199 	__be16			num;
200 	struct packet_mclist	*mclist;
201 	atomic_t		mapped;
202 	enum tpacket_versions	tp_version;
203 	unsigned int		tp_hdrlen;
204 	unsigned int		tp_reserve;
205 	unsigned int		tp_loss:1;
206 	unsigned int		tp_tstamp;
207 	struct packet_type	prot_hook ____cacheline_aligned_in_smp;
208 };
209 
210 struct packet_skb_cb {
211 	unsigned int origlen;
212 	union {
213 		struct sockaddr_pkt pkt;
214 		struct sockaddr_ll ll;
215 	} sa;
216 };
217 
218 #define PACKET_SKB_CB(__skb)	((struct packet_skb_cb *)((__skb)->cb))
219 
220 static void __packet_set_status(struct packet_sock *po, void *frame, int status)
221 {
222 	union {
223 		struct tpacket_hdr *h1;
224 		struct tpacket2_hdr *h2;
225 		void *raw;
226 	} h;
227 
228 	h.raw = frame;
229 	switch (po->tp_version) {
230 	case TPACKET_V1:
231 		h.h1->tp_status = status;
232 		flush_dcache_page(virt_to_page(&h.h1->tp_status));
233 		break;
234 	case TPACKET_V2:
235 		h.h2->tp_status = status;
236 		flush_dcache_page(virt_to_page(&h.h2->tp_status));
237 		break;
238 	default:
239 		pr_err("TPACKET version not supported\n");
240 		BUG();
241 	}
242 
243 	smp_wmb();
244 }
245 
246 static int __packet_get_status(struct packet_sock *po, void *frame)
247 {
248 	union {
249 		struct tpacket_hdr *h1;
250 		struct tpacket2_hdr *h2;
251 		void *raw;
252 	} h;
253 
254 	smp_rmb();
255 
256 	h.raw = frame;
257 	switch (po->tp_version) {
258 	case TPACKET_V1:
259 		flush_dcache_page(virt_to_page(&h.h1->tp_status));
260 		return h.h1->tp_status;
261 	case TPACKET_V2:
262 		flush_dcache_page(virt_to_page(&h.h2->tp_status));
263 		return h.h2->tp_status;
264 	default:
265 		pr_err("TPACKET version not supported\n");
266 		BUG();
267 		return 0;
268 	}
269 }
270 
271 static void *packet_lookup_frame(struct packet_sock *po,
272 		struct packet_ring_buffer *rb,
273 		unsigned int position,
274 		int status)
275 {
276 	unsigned int pg_vec_pos, frame_offset;
277 	union {
278 		struct tpacket_hdr *h1;
279 		struct tpacket2_hdr *h2;
280 		void *raw;
281 	} h;
282 
283 	pg_vec_pos = position / rb->frames_per_block;
284 	frame_offset = position % rb->frames_per_block;
285 
286 	h.raw = rb->pg_vec[pg_vec_pos] + (frame_offset * rb->frame_size);
287 
288 	if (status != __packet_get_status(po, h.raw))
289 		return NULL;
290 
291 	return h.raw;
292 }
293 
294 static inline void *packet_current_frame(struct packet_sock *po,
295 		struct packet_ring_buffer *rb,
296 		int status)
297 {
298 	return packet_lookup_frame(po, rb, rb->head, status);
299 }
300 
301 static inline void *packet_previous_frame(struct packet_sock *po,
302 		struct packet_ring_buffer *rb,
303 		int status)
304 {
305 	unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
306 	return packet_lookup_frame(po, rb, previous, status);
307 }
308 
309 static inline void packet_increment_head(struct packet_ring_buffer *buff)
310 {
311 	buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
312 }
313 
314 static inline struct packet_sock *pkt_sk(struct sock *sk)
315 {
316 	return (struct packet_sock *)sk;
317 }
318 
319 static void packet_sock_destruct(struct sock *sk)
320 {
321 	skb_queue_purge(&sk->sk_error_queue);
322 
323 	WARN_ON(atomic_read(&sk->sk_rmem_alloc));
324 	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
325 
326 	if (!sock_flag(sk, SOCK_DEAD)) {
327 		pr_err("Attempt to release alive packet socket: %p\n", sk);
328 		return;
329 	}
330 
331 	sk_refcnt_debug_dec(sk);
332 }
333 
334 
335 static const struct proto_ops packet_ops;
336 
337 static const struct proto_ops packet_ops_spkt;
338 
339 static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
340 			   struct packet_type *pt, struct net_device *orig_dev)
341 {
342 	struct sock *sk;
343 	struct sockaddr_pkt *spkt;
344 
345 	/*
346 	 *	When we registered the protocol we saved the socket in the data
347 	 *	field for just this event.
348 	 */
349 
350 	sk = pt->af_packet_priv;
351 
352 	/*
353 	 *	Yank back the headers [hope the device set this
354 	 *	right or kerboom...]
355 	 *
356 	 *	Incoming packets have ll header pulled,
357 	 *	push it back.
358 	 *
359 	 *	For outgoing ones skb->data == skb_mac_header(skb)
360 	 *	so that this procedure is noop.
361 	 */
362 
363 	if (skb->pkt_type == PACKET_LOOPBACK)
364 		goto out;
365 
366 	if (!net_eq(dev_net(dev), sock_net(sk)))
367 		goto out;
368 
369 	skb = skb_share_check(skb, GFP_ATOMIC);
370 	if (skb == NULL)
371 		goto oom;
372 
373 	/* drop any routing info */
374 	skb_dst_drop(skb);
375 
376 	/* drop conntrack reference */
377 	nf_reset(skb);
378 
379 	spkt = &PACKET_SKB_CB(skb)->sa.pkt;
380 
381 	skb_push(skb, skb->data - skb_mac_header(skb));
382 
383 	/*
384 	 *	The SOCK_PACKET socket receives _all_ frames.
385 	 */
386 
387 	spkt->spkt_family = dev->type;
388 	strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
389 	spkt->spkt_protocol = skb->protocol;
390 
391 	/*
392 	 *	Charge the memory to the socket. This is done specifically
393 	 *	to prevent sockets using all the memory up.
394 	 */
395 
396 	if (sock_queue_rcv_skb(sk, skb) == 0)
397 		return 0;
398 
399 out:
400 	kfree_skb(skb);
401 oom:
402 	return 0;
403 }
404 
405 
406 /*
407  *	Output a raw packet to a device layer. This bypasses all the other
408  *	protocol layers and you must therefore supply it with a complete frame
409  */
410 
411 static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
412 			       struct msghdr *msg, size_t len)
413 {
414 	struct sock *sk = sock->sk;
415 	struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name;
416 	struct sk_buff *skb = NULL;
417 	struct net_device *dev;
418 	__be16 proto = 0;
419 	int err;
420 
421 	/*
422 	 *	Get and verify the address.
423 	 */
424 
425 	if (saddr) {
426 		if (msg->msg_namelen < sizeof(struct sockaddr))
427 			return -EINVAL;
428 		if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
429 			proto = saddr->spkt_protocol;
430 	} else
431 		return -ENOTCONN;	/* SOCK_PACKET must be sent giving an address */
432 
433 	/*
434 	 *	Find the device first to size check it
435 	 */
436 
437 	saddr->spkt_device[13] = 0;
438 retry:
439 	rcu_read_lock();
440 	dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
441 	err = -ENODEV;
442 	if (dev == NULL)
443 		goto out_unlock;
444 
445 	err = -ENETDOWN;
446 	if (!(dev->flags & IFF_UP))
447 		goto out_unlock;
448 
449 	/*
450 	 * You may not queue a frame bigger than the mtu. This is the lowest level
451 	 * raw protocol and you must do your own fragmentation at this level.
452 	 */
453 
454 	err = -EMSGSIZE;
455 	if (len > dev->mtu + dev->hard_header_len)
456 		goto out_unlock;
457 
458 	if (!skb) {
459 		size_t reserved = LL_RESERVED_SPACE(dev);
460 		unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
461 
462 		rcu_read_unlock();
463 		skb = sock_wmalloc(sk, len + reserved, 0, GFP_KERNEL);
464 		if (skb == NULL)
465 			return -ENOBUFS;
466 		/* FIXME: Save some space for broken drivers that write a hard
467 		 * header at transmission time by themselves. PPP is the notable
468 		 * one here. This should really be fixed at the driver level.
469 		 */
470 		skb_reserve(skb, reserved);
471 		skb_reset_network_header(skb);
472 
473 		/* Try to align data part correctly */
474 		if (hhlen) {
475 			skb->data -= hhlen;
476 			skb->tail -= hhlen;
477 			if (len < hhlen)
478 				skb_reset_network_header(skb);
479 		}
480 		err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
481 		if (err)
482 			goto out_free;
483 		goto retry;
484 	}
485 
486 
487 	skb->protocol = proto;
488 	skb->dev = dev;
489 	skb->priority = sk->sk_priority;
490 	skb->mark = sk->sk_mark;
491 	err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
492 	if (err < 0)
493 		goto out_unlock;
494 
495 	dev_queue_xmit(skb);
496 	rcu_read_unlock();
497 	return len;
498 
499 out_unlock:
500 	rcu_read_unlock();
501 out_free:
502 	kfree_skb(skb);
503 	return err;
504 }
505 
506 static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
507 				      unsigned int res)
508 {
509 	struct sk_filter *filter;
510 
511 	rcu_read_lock_bh();
512 	filter = rcu_dereference_bh(sk->sk_filter);
513 	if (filter != NULL)
514 		res = sk_run_filter(skb, filter->insns, filter->len);
515 	rcu_read_unlock_bh();
516 
517 	return res;
518 }
519 
520 /*
521    This function makes lazy skb cloning in hope that most of packets
522    are discarded by BPF.
523 
524    Note tricky part: we DO mangle shared skb! skb->data, skb->len
525    and skb->cb are mangled. It works because (and until) packets
526    falling here are owned by current CPU. Output packets are cloned
527    by dev_queue_xmit_nit(), input packets are processed by net_bh
528    sequencially, so that if we return skb to original state on exit,
529    we will not harm anyone.
530  */
531 
532 static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
533 		      struct packet_type *pt, struct net_device *orig_dev)
534 {
535 	struct sock *sk;
536 	struct sockaddr_ll *sll;
537 	struct packet_sock *po;
538 	u8 *skb_head = skb->data;
539 	int skb_len = skb->len;
540 	unsigned int snaplen, res;
541 
542 	if (skb->pkt_type == PACKET_LOOPBACK)
543 		goto drop;
544 
545 	sk = pt->af_packet_priv;
546 	po = pkt_sk(sk);
547 
548 	if (!net_eq(dev_net(dev), sock_net(sk)))
549 		goto drop;
550 
551 	skb->dev = dev;
552 
553 	if (dev->header_ops) {
554 		/* The device has an explicit notion of ll header,
555 		   exported to higher levels.
556 
557 		   Otherwise, the device hides datails of it frame
558 		   structure, so that corresponding packet head
559 		   never delivered to user.
560 		 */
561 		if (sk->sk_type != SOCK_DGRAM)
562 			skb_push(skb, skb->data - skb_mac_header(skb));
563 		else if (skb->pkt_type == PACKET_OUTGOING) {
564 			/* Special case: outgoing packets have ll header at head */
565 			skb_pull(skb, skb_network_offset(skb));
566 		}
567 	}
568 
569 	snaplen = skb->len;
570 
571 	res = run_filter(skb, sk, snaplen);
572 	if (!res)
573 		goto drop_n_restore;
574 	if (snaplen > res)
575 		snaplen = res;
576 
577 	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
578 	    (unsigned)sk->sk_rcvbuf)
579 		goto drop_n_acct;
580 
581 	if (skb_shared(skb)) {
582 		struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
583 		if (nskb == NULL)
584 			goto drop_n_acct;
585 
586 		if (skb_head != skb->data) {
587 			skb->data = skb_head;
588 			skb->len = skb_len;
589 		}
590 		kfree_skb(skb);
591 		skb = nskb;
592 	}
593 
594 	BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
595 		     sizeof(skb->cb));
596 
597 	sll = &PACKET_SKB_CB(skb)->sa.ll;
598 	sll->sll_family = AF_PACKET;
599 	sll->sll_hatype = dev->type;
600 	sll->sll_protocol = skb->protocol;
601 	sll->sll_pkttype = skb->pkt_type;
602 	if (unlikely(po->origdev))
603 		sll->sll_ifindex = orig_dev->ifindex;
604 	else
605 		sll->sll_ifindex = dev->ifindex;
606 
607 	sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
608 
609 	PACKET_SKB_CB(skb)->origlen = skb->len;
610 
611 	if (pskb_trim(skb, snaplen))
612 		goto drop_n_acct;
613 
614 	skb_set_owner_r(skb, sk);
615 	skb->dev = NULL;
616 	skb_dst_drop(skb);
617 
618 	/* drop conntrack reference */
619 	nf_reset(skb);
620 
621 	spin_lock(&sk->sk_receive_queue.lock);
622 	po->stats.tp_packets++;
623 	skb->dropcount = atomic_read(&sk->sk_drops);
624 	__skb_queue_tail(&sk->sk_receive_queue, skb);
625 	spin_unlock(&sk->sk_receive_queue.lock);
626 	sk->sk_data_ready(sk, skb->len);
627 	return 0;
628 
629 drop_n_acct:
630 	po->stats.tp_drops = atomic_inc_return(&sk->sk_drops);
631 
632 drop_n_restore:
633 	if (skb_head != skb->data && skb_shared(skb)) {
634 		skb->data = skb_head;
635 		skb->len = skb_len;
636 	}
637 drop:
638 	consume_skb(skb);
639 	return 0;
640 }
641 
642 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
643 		       struct packet_type *pt, struct net_device *orig_dev)
644 {
645 	struct sock *sk;
646 	struct packet_sock *po;
647 	struct sockaddr_ll *sll;
648 	union {
649 		struct tpacket_hdr *h1;
650 		struct tpacket2_hdr *h2;
651 		void *raw;
652 	} h;
653 	u8 *skb_head = skb->data;
654 	int skb_len = skb->len;
655 	unsigned int snaplen, res;
656 	unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
657 	unsigned short macoff, netoff, hdrlen;
658 	struct sk_buff *copy_skb = NULL;
659 	struct timeval tv;
660 	struct timespec ts;
661 	struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
662 
663 	if (skb->pkt_type == PACKET_LOOPBACK)
664 		goto drop;
665 
666 	sk = pt->af_packet_priv;
667 	po = pkt_sk(sk);
668 
669 	if (!net_eq(dev_net(dev), sock_net(sk)))
670 		goto drop;
671 
672 	if (dev->header_ops) {
673 		if (sk->sk_type != SOCK_DGRAM)
674 			skb_push(skb, skb->data - skb_mac_header(skb));
675 		else if (skb->pkt_type == PACKET_OUTGOING) {
676 			/* Special case: outgoing packets have ll header at head */
677 			skb_pull(skb, skb_network_offset(skb));
678 		}
679 	}
680 
681 	if (skb->ip_summed == CHECKSUM_PARTIAL)
682 		status |= TP_STATUS_CSUMNOTREADY;
683 
684 	snaplen = skb->len;
685 
686 	res = run_filter(skb, sk, snaplen);
687 	if (!res)
688 		goto drop_n_restore;
689 	if (snaplen > res)
690 		snaplen = res;
691 
692 	if (sk->sk_type == SOCK_DGRAM) {
693 		macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
694 				  po->tp_reserve;
695 	} else {
696 		unsigned maclen = skb_network_offset(skb);
697 		netoff = TPACKET_ALIGN(po->tp_hdrlen +
698 				       (maclen < 16 ? 16 : maclen)) +
699 			po->tp_reserve;
700 		macoff = netoff - maclen;
701 	}
702 
703 	if (macoff + snaplen > po->rx_ring.frame_size) {
704 		if (po->copy_thresh &&
705 		    atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
706 		    (unsigned)sk->sk_rcvbuf) {
707 			if (skb_shared(skb)) {
708 				copy_skb = skb_clone(skb, GFP_ATOMIC);
709 			} else {
710 				copy_skb = skb_get(skb);
711 				skb_head = skb->data;
712 			}
713 			if (copy_skb)
714 				skb_set_owner_r(copy_skb, sk);
715 		}
716 		snaplen = po->rx_ring.frame_size - macoff;
717 		if ((int)snaplen < 0)
718 			snaplen = 0;
719 	}
720 
721 	spin_lock(&sk->sk_receive_queue.lock);
722 	h.raw = packet_current_frame(po, &po->rx_ring, TP_STATUS_KERNEL);
723 	if (!h.raw)
724 		goto ring_is_full;
725 	packet_increment_head(&po->rx_ring);
726 	po->stats.tp_packets++;
727 	if (copy_skb) {
728 		status |= TP_STATUS_COPY;
729 		__skb_queue_tail(&sk->sk_receive_queue, copy_skb);
730 	}
731 	if (!po->stats.tp_drops)
732 		status &= ~TP_STATUS_LOSING;
733 	spin_unlock(&sk->sk_receive_queue.lock);
734 
735 	skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
736 
737 	switch (po->tp_version) {
738 	case TPACKET_V1:
739 		h.h1->tp_len = skb->len;
740 		h.h1->tp_snaplen = snaplen;
741 		h.h1->tp_mac = macoff;
742 		h.h1->tp_net = netoff;
743 		if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
744 				&& shhwtstamps->syststamp.tv64)
745 			tv = ktime_to_timeval(shhwtstamps->syststamp);
746 		else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
747 				&& shhwtstamps->hwtstamp.tv64)
748 			tv = ktime_to_timeval(shhwtstamps->hwtstamp);
749 		else if (skb->tstamp.tv64)
750 			tv = ktime_to_timeval(skb->tstamp);
751 		else
752 			do_gettimeofday(&tv);
753 		h.h1->tp_sec = tv.tv_sec;
754 		h.h1->tp_usec = tv.tv_usec;
755 		hdrlen = sizeof(*h.h1);
756 		break;
757 	case TPACKET_V2:
758 		h.h2->tp_len = skb->len;
759 		h.h2->tp_snaplen = snaplen;
760 		h.h2->tp_mac = macoff;
761 		h.h2->tp_net = netoff;
762 		if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
763 				&& shhwtstamps->syststamp.tv64)
764 			ts = ktime_to_timespec(shhwtstamps->syststamp);
765 		else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
766 				&& shhwtstamps->hwtstamp.tv64)
767 			ts = ktime_to_timespec(shhwtstamps->hwtstamp);
768 		else if (skb->tstamp.tv64)
769 			ts = ktime_to_timespec(skb->tstamp);
770 		else
771 			getnstimeofday(&ts);
772 		h.h2->tp_sec = ts.tv_sec;
773 		h.h2->tp_nsec = ts.tv_nsec;
774 		h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
775 		hdrlen = sizeof(*h.h2);
776 		break;
777 	default:
778 		BUG();
779 	}
780 
781 	sll = h.raw + TPACKET_ALIGN(hdrlen);
782 	sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
783 	sll->sll_family = AF_PACKET;
784 	sll->sll_hatype = dev->type;
785 	sll->sll_protocol = skb->protocol;
786 	sll->sll_pkttype = skb->pkt_type;
787 	if (unlikely(po->origdev))
788 		sll->sll_ifindex = orig_dev->ifindex;
789 	else
790 		sll->sll_ifindex = dev->ifindex;
791 
792 	__packet_set_status(po, h.raw, status);
793 	smp_mb();
794 	{
795 		struct page *p_start, *p_end;
796 		u8 *h_end = h.raw + macoff + snaplen - 1;
797 
798 		p_start = virt_to_page(h.raw);
799 		p_end = virt_to_page(h_end);
800 		while (p_start <= p_end) {
801 			flush_dcache_page(p_start);
802 			p_start++;
803 		}
804 	}
805 
806 	sk->sk_data_ready(sk, 0);
807 
808 drop_n_restore:
809 	if (skb_head != skb->data && skb_shared(skb)) {
810 		skb->data = skb_head;
811 		skb->len = skb_len;
812 	}
813 drop:
814 	kfree_skb(skb);
815 	return 0;
816 
817 ring_is_full:
818 	po->stats.tp_drops++;
819 	spin_unlock(&sk->sk_receive_queue.lock);
820 
821 	sk->sk_data_ready(sk, 0);
822 	kfree_skb(copy_skb);
823 	goto drop_n_restore;
824 }
825 
826 static void tpacket_destruct_skb(struct sk_buff *skb)
827 {
828 	struct packet_sock *po = pkt_sk(skb->sk);
829 	void *ph;
830 
831 	BUG_ON(skb == NULL);
832 
833 	if (likely(po->tx_ring.pg_vec)) {
834 		ph = skb_shinfo(skb)->destructor_arg;
835 		BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
836 		BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
837 		atomic_dec(&po->tx_ring.pending);
838 		__packet_set_status(po, ph, TP_STATUS_AVAILABLE);
839 	}
840 
841 	sock_wfree(skb);
842 }
843 
844 static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
845 		void *frame, struct net_device *dev, int size_max,
846 		__be16 proto, unsigned char *addr)
847 {
848 	union {
849 		struct tpacket_hdr *h1;
850 		struct tpacket2_hdr *h2;
851 		void *raw;
852 	} ph;
853 	int to_write, offset, len, tp_len, nr_frags, len_max;
854 	struct socket *sock = po->sk.sk_socket;
855 	struct page *page;
856 	void *data;
857 	int err;
858 
859 	ph.raw = frame;
860 
861 	skb->protocol = proto;
862 	skb->dev = dev;
863 	skb->priority = po->sk.sk_priority;
864 	skb->mark = po->sk.sk_mark;
865 	skb_shinfo(skb)->destructor_arg = ph.raw;
866 
867 	switch (po->tp_version) {
868 	case TPACKET_V2:
869 		tp_len = ph.h2->tp_len;
870 		break;
871 	default:
872 		tp_len = ph.h1->tp_len;
873 		break;
874 	}
875 	if (unlikely(tp_len > size_max)) {
876 		pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
877 		return -EMSGSIZE;
878 	}
879 
880 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
881 	skb_reset_network_header(skb);
882 
883 	data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
884 	to_write = tp_len;
885 
886 	if (sock->type == SOCK_DGRAM) {
887 		err = dev_hard_header(skb, dev, ntohs(proto), addr,
888 				NULL, tp_len);
889 		if (unlikely(err < 0))
890 			return -EINVAL;
891 	} else if (dev->hard_header_len) {
892 		/* net device doesn't like empty head */
893 		if (unlikely(tp_len <= dev->hard_header_len)) {
894 			pr_err("packet size is too short (%d < %d)\n",
895 			       tp_len, dev->hard_header_len);
896 			return -EINVAL;
897 		}
898 
899 		skb_push(skb, dev->hard_header_len);
900 		err = skb_store_bits(skb, 0, data,
901 				dev->hard_header_len);
902 		if (unlikely(err))
903 			return err;
904 
905 		data += dev->hard_header_len;
906 		to_write -= dev->hard_header_len;
907 	}
908 
909 	err = -EFAULT;
910 	page = virt_to_page(data);
911 	offset = offset_in_page(data);
912 	len_max = PAGE_SIZE - offset;
913 	len = ((to_write > len_max) ? len_max : to_write);
914 
915 	skb->data_len = to_write;
916 	skb->len += to_write;
917 	skb->truesize += to_write;
918 	atomic_add(to_write, &po->sk.sk_wmem_alloc);
919 
920 	while (likely(to_write)) {
921 		nr_frags = skb_shinfo(skb)->nr_frags;
922 
923 		if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
924 			pr_err("Packet exceed the number of skb frags(%lu)\n",
925 			       MAX_SKB_FRAGS);
926 			return -EFAULT;
927 		}
928 
929 		flush_dcache_page(page);
930 		get_page(page);
931 		skb_fill_page_desc(skb,
932 				nr_frags,
933 				page++, offset, len);
934 		to_write -= len;
935 		offset = 0;
936 		len_max = PAGE_SIZE;
937 		len = ((to_write > len_max) ? len_max : to_write);
938 	}
939 
940 	return tp_len;
941 }
942 
943 static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
944 {
945 	struct socket *sock;
946 	struct sk_buff *skb;
947 	struct net_device *dev;
948 	__be16 proto;
949 	int ifindex, err, reserve = 0;
950 	void *ph;
951 	struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
952 	int tp_len, size_max;
953 	unsigned char *addr;
954 	int len_sum = 0;
955 	int status = 0;
956 
957 	sock = po->sk.sk_socket;
958 
959 	mutex_lock(&po->pg_vec_lock);
960 
961 	err = -EBUSY;
962 	if (saddr == NULL) {
963 		ifindex	= po->ifindex;
964 		proto	= po->num;
965 		addr	= NULL;
966 	} else {
967 		err = -EINVAL;
968 		if (msg->msg_namelen < sizeof(struct sockaddr_ll))
969 			goto out;
970 		if (msg->msg_namelen < (saddr->sll_halen
971 					+ offsetof(struct sockaddr_ll,
972 						sll_addr)))
973 			goto out;
974 		ifindex	= saddr->sll_ifindex;
975 		proto	= saddr->sll_protocol;
976 		addr	= saddr->sll_addr;
977 	}
978 
979 	dev = dev_get_by_index(sock_net(&po->sk), ifindex);
980 	err = -ENXIO;
981 	if (unlikely(dev == NULL))
982 		goto out;
983 
984 	reserve = dev->hard_header_len;
985 
986 	err = -ENETDOWN;
987 	if (unlikely(!(dev->flags & IFF_UP)))
988 		goto out_put;
989 
990 	size_max = po->tx_ring.frame_size
991 		- (po->tp_hdrlen - sizeof(struct sockaddr_ll));
992 
993 	if (size_max > dev->mtu + reserve)
994 		size_max = dev->mtu + reserve;
995 
996 	do {
997 		ph = packet_current_frame(po, &po->tx_ring,
998 				TP_STATUS_SEND_REQUEST);
999 
1000 		if (unlikely(ph == NULL)) {
1001 			schedule();
1002 			continue;
1003 		}
1004 
1005 		status = TP_STATUS_SEND_REQUEST;
1006 		skb = sock_alloc_send_skb(&po->sk,
1007 				LL_ALLOCATED_SPACE(dev)
1008 				+ sizeof(struct sockaddr_ll),
1009 				0, &err);
1010 
1011 		if (unlikely(skb == NULL))
1012 			goto out_status;
1013 
1014 		tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
1015 				addr);
1016 
1017 		if (unlikely(tp_len < 0)) {
1018 			if (po->tp_loss) {
1019 				__packet_set_status(po, ph,
1020 						TP_STATUS_AVAILABLE);
1021 				packet_increment_head(&po->tx_ring);
1022 				kfree_skb(skb);
1023 				continue;
1024 			} else {
1025 				status = TP_STATUS_WRONG_FORMAT;
1026 				err = tp_len;
1027 				goto out_status;
1028 			}
1029 		}
1030 
1031 		skb->destructor = tpacket_destruct_skb;
1032 		__packet_set_status(po, ph, TP_STATUS_SENDING);
1033 		atomic_inc(&po->tx_ring.pending);
1034 
1035 		status = TP_STATUS_SEND_REQUEST;
1036 		err = dev_queue_xmit(skb);
1037 		if (unlikely(err > 0)) {
1038 			err = net_xmit_errno(err);
1039 			if (err && __packet_get_status(po, ph) ==
1040 				   TP_STATUS_AVAILABLE) {
1041 				/* skb was destructed already */
1042 				skb = NULL;
1043 				goto out_status;
1044 			}
1045 			/*
1046 			 * skb was dropped but not destructed yet;
1047 			 * let's treat it like congestion or err < 0
1048 			 */
1049 			err = 0;
1050 		}
1051 		packet_increment_head(&po->tx_ring);
1052 		len_sum += tp_len;
1053 	} while (likely((ph != NULL) ||
1054 			((!(msg->msg_flags & MSG_DONTWAIT)) &&
1055 			 (atomic_read(&po->tx_ring.pending))))
1056 		);
1057 
1058 	err = len_sum;
1059 	goto out_put;
1060 
1061 out_status:
1062 	__packet_set_status(po, ph, status);
1063 	kfree_skb(skb);
1064 out_put:
1065 	dev_put(dev);
1066 out:
1067 	mutex_unlock(&po->pg_vec_lock);
1068 	return err;
1069 }
1070 
1071 static inline struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
1072 					       size_t reserve, size_t len,
1073 					       size_t linear, int noblock,
1074 					       int *err)
1075 {
1076 	struct sk_buff *skb;
1077 
1078 	/* Under a page?  Don't bother with paged skb. */
1079 	if (prepad + len < PAGE_SIZE || !linear)
1080 		linear = len;
1081 
1082 	skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
1083 				   err);
1084 	if (!skb)
1085 		return NULL;
1086 
1087 	skb_reserve(skb, reserve);
1088 	skb_put(skb, linear);
1089 	skb->data_len = len - linear;
1090 	skb->len += len - linear;
1091 
1092 	return skb;
1093 }
1094 
1095 static int packet_snd(struct socket *sock,
1096 			  struct msghdr *msg, size_t len)
1097 {
1098 	struct sock *sk = sock->sk;
1099 	struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
1100 	struct sk_buff *skb;
1101 	struct net_device *dev;
1102 	__be16 proto;
1103 	unsigned char *addr;
1104 	int ifindex, err, reserve = 0;
1105 	struct virtio_net_hdr vnet_hdr = { 0 };
1106 	int offset = 0;
1107 	int vnet_hdr_len;
1108 	struct packet_sock *po = pkt_sk(sk);
1109 	unsigned short gso_type = 0;
1110 
1111 	/*
1112 	 *	Get and verify the address.
1113 	 */
1114 
1115 	if (saddr == NULL) {
1116 		ifindex	= po->ifindex;
1117 		proto	= po->num;
1118 		addr	= NULL;
1119 	} else {
1120 		err = -EINVAL;
1121 		if (msg->msg_namelen < sizeof(struct sockaddr_ll))
1122 			goto out;
1123 		if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
1124 			goto out;
1125 		ifindex	= saddr->sll_ifindex;
1126 		proto	= saddr->sll_protocol;
1127 		addr	= saddr->sll_addr;
1128 	}
1129 
1130 
1131 	dev = dev_get_by_index(sock_net(sk), ifindex);
1132 	err = -ENXIO;
1133 	if (dev == NULL)
1134 		goto out_unlock;
1135 	if (sock->type == SOCK_RAW)
1136 		reserve = dev->hard_header_len;
1137 
1138 	err = -ENETDOWN;
1139 	if (!(dev->flags & IFF_UP))
1140 		goto out_unlock;
1141 
1142 	if (po->has_vnet_hdr) {
1143 		vnet_hdr_len = sizeof(vnet_hdr);
1144 
1145 		err = -EINVAL;
1146 		if (len < vnet_hdr_len)
1147 			goto out_unlock;
1148 
1149 		len -= vnet_hdr_len;
1150 
1151 		err = memcpy_fromiovec((void *)&vnet_hdr, msg->msg_iov,
1152 				       vnet_hdr_len);
1153 		if (err < 0)
1154 			goto out_unlock;
1155 
1156 		if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
1157 		    (vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 >
1158 		      vnet_hdr.hdr_len))
1159 			vnet_hdr.hdr_len = vnet_hdr.csum_start +
1160 						 vnet_hdr.csum_offset + 2;
1161 
1162 		err = -EINVAL;
1163 		if (vnet_hdr.hdr_len > len)
1164 			goto out_unlock;
1165 
1166 		if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1167 			switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1168 			case VIRTIO_NET_HDR_GSO_TCPV4:
1169 				gso_type = SKB_GSO_TCPV4;
1170 				break;
1171 			case VIRTIO_NET_HDR_GSO_TCPV6:
1172 				gso_type = SKB_GSO_TCPV6;
1173 				break;
1174 			case VIRTIO_NET_HDR_GSO_UDP:
1175 				gso_type = SKB_GSO_UDP;
1176 				break;
1177 			default:
1178 				goto out_unlock;
1179 			}
1180 
1181 			if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
1182 				gso_type |= SKB_GSO_TCP_ECN;
1183 
1184 			if (vnet_hdr.gso_size == 0)
1185 				goto out_unlock;
1186 
1187 		}
1188 	}
1189 
1190 	err = -EMSGSIZE;
1191 	if (!gso_type && (len > dev->mtu+reserve))
1192 		goto out_unlock;
1193 
1194 	err = -ENOBUFS;
1195 	skb = packet_alloc_skb(sk, LL_ALLOCATED_SPACE(dev),
1196 			       LL_RESERVED_SPACE(dev), len, vnet_hdr.hdr_len,
1197 			       msg->msg_flags & MSG_DONTWAIT, &err);
1198 	if (skb == NULL)
1199 		goto out_unlock;
1200 
1201 	skb_set_network_header(skb, reserve);
1202 
1203 	err = -EINVAL;
1204 	if (sock->type == SOCK_DGRAM &&
1205 	    (offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len)) < 0)
1206 		goto out_free;
1207 
1208 	/* Returns -EFAULT on error */
1209 	err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
1210 	if (err)
1211 		goto out_free;
1212 	err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
1213 	if (err < 0)
1214 		goto out_free;
1215 
1216 	skb->protocol = proto;
1217 	skb->dev = dev;
1218 	skb->priority = sk->sk_priority;
1219 	skb->mark = sk->sk_mark;
1220 
1221 	if (po->has_vnet_hdr) {
1222 		if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1223 			if (!skb_partial_csum_set(skb, vnet_hdr.csum_start,
1224 						  vnet_hdr.csum_offset)) {
1225 				err = -EINVAL;
1226 				goto out_free;
1227 			}
1228 		}
1229 
1230 		skb_shinfo(skb)->gso_size = vnet_hdr.gso_size;
1231 		skb_shinfo(skb)->gso_type = gso_type;
1232 
1233 		/* Header must be checked, and gso_segs computed. */
1234 		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1235 		skb_shinfo(skb)->gso_segs = 0;
1236 
1237 		len += vnet_hdr_len;
1238 	}
1239 
1240 	/*
1241 	 *	Now send it
1242 	 */
1243 
1244 	err = dev_queue_xmit(skb);
1245 	if (err > 0 && (err = net_xmit_errno(err)) != 0)
1246 		goto out_unlock;
1247 
1248 	dev_put(dev);
1249 
1250 	return len;
1251 
1252 out_free:
1253 	kfree_skb(skb);
1254 out_unlock:
1255 	if (dev)
1256 		dev_put(dev);
1257 out:
1258 	return err;
1259 }
1260 
1261 static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
1262 		struct msghdr *msg, size_t len)
1263 {
1264 	struct sock *sk = sock->sk;
1265 	struct packet_sock *po = pkt_sk(sk);
1266 	if (po->tx_ring.pg_vec)
1267 		return tpacket_snd(po, msg);
1268 	else
1269 		return packet_snd(sock, msg, len);
1270 }
1271 
1272 /*
1273  *	Close a PACKET socket. This is fairly simple. We immediately go
1274  *	to 'closed' state and remove our protocol entry in the device list.
1275  */
1276 
1277 static int packet_release(struct socket *sock)
1278 {
1279 	struct sock *sk = sock->sk;
1280 	struct packet_sock *po;
1281 	struct net *net;
1282 	struct tpacket_req req;
1283 
1284 	if (!sk)
1285 		return 0;
1286 
1287 	net = sock_net(sk);
1288 	po = pkt_sk(sk);
1289 
1290 	spin_lock_bh(&net->packet.sklist_lock);
1291 	sk_del_node_init_rcu(sk);
1292 	sock_prot_inuse_add(net, sk->sk_prot, -1);
1293 	spin_unlock_bh(&net->packet.sklist_lock);
1294 
1295 	spin_lock(&po->bind_lock);
1296 	if (po->running) {
1297 		/*
1298 		 * Remove from protocol table
1299 		 */
1300 		po->running = 0;
1301 		po->num = 0;
1302 		__dev_remove_pack(&po->prot_hook);
1303 		__sock_put(sk);
1304 	}
1305 	spin_unlock(&po->bind_lock);
1306 
1307 	packet_flush_mclist(sk);
1308 
1309 	memset(&req, 0, sizeof(req));
1310 
1311 	if (po->rx_ring.pg_vec)
1312 		packet_set_ring(sk, &req, 1, 0);
1313 
1314 	if (po->tx_ring.pg_vec)
1315 		packet_set_ring(sk, &req, 1, 1);
1316 
1317 	synchronize_net();
1318 	/*
1319 	 *	Now the socket is dead. No more input will appear.
1320 	 */
1321 	sock_orphan(sk);
1322 	sock->sk = NULL;
1323 
1324 	/* Purge queues */
1325 
1326 	skb_queue_purge(&sk->sk_receive_queue);
1327 	sk_refcnt_debug_release(sk);
1328 
1329 	sock_put(sk);
1330 	return 0;
1331 }
1332 
1333 /*
1334  *	Attach a packet hook.
1335  */
1336 
1337 static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
1338 {
1339 	struct packet_sock *po = pkt_sk(sk);
1340 	/*
1341 	 *	Detach an existing hook if present.
1342 	 */
1343 
1344 	lock_sock(sk);
1345 
1346 	spin_lock(&po->bind_lock);
1347 	if (po->running) {
1348 		__sock_put(sk);
1349 		po->running = 0;
1350 		po->num = 0;
1351 		spin_unlock(&po->bind_lock);
1352 		dev_remove_pack(&po->prot_hook);
1353 		spin_lock(&po->bind_lock);
1354 	}
1355 
1356 	po->num = protocol;
1357 	po->prot_hook.type = protocol;
1358 	po->prot_hook.dev = dev;
1359 
1360 	po->ifindex = dev ? dev->ifindex : 0;
1361 
1362 	if (protocol == 0)
1363 		goto out_unlock;
1364 
1365 	if (!dev || (dev->flags & IFF_UP)) {
1366 		dev_add_pack(&po->prot_hook);
1367 		sock_hold(sk);
1368 		po->running = 1;
1369 	} else {
1370 		sk->sk_err = ENETDOWN;
1371 		if (!sock_flag(sk, SOCK_DEAD))
1372 			sk->sk_error_report(sk);
1373 	}
1374 
1375 out_unlock:
1376 	spin_unlock(&po->bind_lock);
1377 	release_sock(sk);
1378 	return 0;
1379 }
1380 
1381 /*
1382  *	Bind a packet socket to a device
1383  */
1384 
1385 static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
1386 			    int addr_len)
1387 {
1388 	struct sock *sk = sock->sk;
1389 	char name[15];
1390 	struct net_device *dev;
1391 	int err = -ENODEV;
1392 
1393 	/*
1394 	 *	Check legality
1395 	 */
1396 
1397 	if (addr_len != sizeof(struct sockaddr))
1398 		return -EINVAL;
1399 	strlcpy(name, uaddr->sa_data, sizeof(name));
1400 
1401 	dev = dev_get_by_name(sock_net(sk), name);
1402 	if (dev) {
1403 		err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
1404 		dev_put(dev);
1405 	}
1406 	return err;
1407 }
1408 
1409 static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1410 {
1411 	struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
1412 	struct sock *sk = sock->sk;
1413 	struct net_device *dev = NULL;
1414 	int err;
1415 
1416 
1417 	/*
1418 	 *	Check legality
1419 	 */
1420 
1421 	if (addr_len < sizeof(struct sockaddr_ll))
1422 		return -EINVAL;
1423 	if (sll->sll_family != AF_PACKET)
1424 		return -EINVAL;
1425 
1426 	if (sll->sll_ifindex) {
1427 		err = -ENODEV;
1428 		dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
1429 		if (dev == NULL)
1430 			goto out;
1431 	}
1432 	err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
1433 	if (dev)
1434 		dev_put(dev);
1435 
1436 out:
1437 	return err;
1438 }
1439 
1440 static struct proto packet_proto = {
1441 	.name	  = "PACKET",
1442 	.owner	  = THIS_MODULE,
1443 	.obj_size = sizeof(struct packet_sock),
1444 };
1445 
1446 /*
1447  *	Create a packet of type SOCK_PACKET.
1448  */
1449 
1450 static int packet_create(struct net *net, struct socket *sock, int protocol,
1451 			 int kern)
1452 {
1453 	struct sock *sk;
1454 	struct packet_sock *po;
1455 	__be16 proto = (__force __be16)protocol; /* weird, but documented */
1456 	int err;
1457 
1458 	if (!capable(CAP_NET_RAW))
1459 		return -EPERM;
1460 	if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
1461 	    sock->type != SOCK_PACKET)
1462 		return -ESOCKTNOSUPPORT;
1463 
1464 	sock->state = SS_UNCONNECTED;
1465 
1466 	err = -ENOBUFS;
1467 	sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
1468 	if (sk == NULL)
1469 		goto out;
1470 
1471 	sock->ops = &packet_ops;
1472 	if (sock->type == SOCK_PACKET)
1473 		sock->ops = &packet_ops_spkt;
1474 
1475 	sock_init_data(sock, sk);
1476 
1477 	po = pkt_sk(sk);
1478 	sk->sk_family = PF_PACKET;
1479 	po->num = proto;
1480 
1481 	sk->sk_destruct = packet_sock_destruct;
1482 	sk_refcnt_debug_inc(sk);
1483 
1484 	/*
1485 	 *	Attach a protocol block
1486 	 */
1487 
1488 	spin_lock_init(&po->bind_lock);
1489 	mutex_init(&po->pg_vec_lock);
1490 	po->prot_hook.func = packet_rcv;
1491 
1492 	if (sock->type == SOCK_PACKET)
1493 		po->prot_hook.func = packet_rcv_spkt;
1494 
1495 	po->prot_hook.af_packet_priv = sk;
1496 
1497 	if (proto) {
1498 		po->prot_hook.type = proto;
1499 		dev_add_pack(&po->prot_hook);
1500 		sock_hold(sk);
1501 		po->running = 1;
1502 	}
1503 
1504 	spin_lock_bh(&net->packet.sklist_lock);
1505 	sk_add_node_rcu(sk, &net->packet.sklist);
1506 	sock_prot_inuse_add(net, &packet_proto, 1);
1507 	spin_unlock_bh(&net->packet.sklist_lock);
1508 
1509 	return 0;
1510 out:
1511 	return err;
1512 }
1513 
1514 static int packet_recv_error(struct sock *sk, struct msghdr *msg, int len)
1515 {
1516 	struct sock_exterr_skb *serr;
1517 	struct sk_buff *skb, *skb2;
1518 	int copied, err;
1519 
1520 	err = -EAGAIN;
1521 	skb = skb_dequeue(&sk->sk_error_queue);
1522 	if (skb == NULL)
1523 		goto out;
1524 
1525 	copied = skb->len;
1526 	if (copied > len) {
1527 		msg->msg_flags |= MSG_TRUNC;
1528 		copied = len;
1529 	}
1530 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1531 	if (err)
1532 		goto out_free_skb;
1533 
1534 	sock_recv_timestamp(msg, sk, skb);
1535 
1536 	serr = SKB_EXT_ERR(skb);
1537 	put_cmsg(msg, SOL_PACKET, PACKET_TX_TIMESTAMP,
1538 		 sizeof(serr->ee), &serr->ee);
1539 
1540 	msg->msg_flags |= MSG_ERRQUEUE;
1541 	err = copied;
1542 
1543 	/* Reset and regenerate socket error */
1544 	spin_lock_bh(&sk->sk_error_queue.lock);
1545 	sk->sk_err = 0;
1546 	if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
1547 		sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
1548 		spin_unlock_bh(&sk->sk_error_queue.lock);
1549 		sk->sk_error_report(sk);
1550 	} else
1551 		spin_unlock_bh(&sk->sk_error_queue.lock);
1552 
1553 out_free_skb:
1554 	kfree_skb(skb);
1555 out:
1556 	return err;
1557 }
1558 
1559 /*
1560  *	Pull a packet from our receive queue and hand it to the user.
1561  *	If necessary we block.
1562  */
1563 
1564 static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1565 			  struct msghdr *msg, size_t len, int flags)
1566 {
1567 	struct sock *sk = sock->sk;
1568 	struct sk_buff *skb;
1569 	int copied, err;
1570 	struct sockaddr_ll *sll;
1571 	int vnet_hdr_len = 0;
1572 
1573 	err = -EINVAL;
1574 	if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
1575 		goto out;
1576 
1577 #if 0
1578 	/* What error should we return now? EUNATTACH? */
1579 	if (pkt_sk(sk)->ifindex < 0)
1580 		return -ENODEV;
1581 #endif
1582 
1583 	if (flags & MSG_ERRQUEUE) {
1584 		err = packet_recv_error(sk, msg, len);
1585 		goto out;
1586 	}
1587 
1588 	/*
1589 	 *	Call the generic datagram receiver. This handles all sorts
1590 	 *	of horrible races and re-entrancy so we can forget about it
1591 	 *	in the protocol layers.
1592 	 *
1593 	 *	Now it will return ENETDOWN, if device have just gone down,
1594 	 *	but then it will block.
1595 	 */
1596 
1597 	skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
1598 
1599 	/*
1600 	 *	An error occurred so return it. Because skb_recv_datagram()
1601 	 *	handles the blocking we don't see and worry about blocking
1602 	 *	retries.
1603 	 */
1604 
1605 	if (skb == NULL)
1606 		goto out;
1607 
1608 	if (pkt_sk(sk)->has_vnet_hdr) {
1609 		struct virtio_net_hdr vnet_hdr = { 0 };
1610 
1611 		err = -EINVAL;
1612 		vnet_hdr_len = sizeof(vnet_hdr);
1613 		if (len < vnet_hdr_len)
1614 			goto out_free;
1615 
1616 		len -= vnet_hdr_len;
1617 
1618 		if (skb_is_gso(skb)) {
1619 			struct skb_shared_info *sinfo = skb_shinfo(skb);
1620 
1621 			/* This is a hint as to how much should be linear. */
1622 			vnet_hdr.hdr_len = skb_headlen(skb);
1623 			vnet_hdr.gso_size = sinfo->gso_size;
1624 			if (sinfo->gso_type & SKB_GSO_TCPV4)
1625 				vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1626 			else if (sinfo->gso_type & SKB_GSO_TCPV6)
1627 				vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1628 			else if (sinfo->gso_type & SKB_GSO_UDP)
1629 				vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
1630 			else if (sinfo->gso_type & SKB_GSO_FCOE)
1631 				goto out_free;
1632 			else
1633 				BUG();
1634 			if (sinfo->gso_type & SKB_GSO_TCP_ECN)
1635 				vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
1636 		} else
1637 			vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
1638 
1639 		if (skb->ip_summed == CHECKSUM_PARTIAL) {
1640 			vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
1641 			vnet_hdr.csum_start = skb->csum_start -
1642 							skb_headroom(skb);
1643 			vnet_hdr.csum_offset = skb->csum_offset;
1644 		} /* else everything is zero */
1645 
1646 		err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr,
1647 				     vnet_hdr_len);
1648 		if (err < 0)
1649 			goto out_free;
1650 	}
1651 
1652 	/*
1653 	 *	If the address length field is there to be filled in, we fill
1654 	 *	it in now.
1655 	 */
1656 
1657 	sll = &PACKET_SKB_CB(skb)->sa.ll;
1658 	if (sock->type == SOCK_PACKET)
1659 		msg->msg_namelen = sizeof(struct sockaddr_pkt);
1660 	else
1661 		msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1662 
1663 	/*
1664 	 *	You lose any data beyond the buffer you gave. If it worries a
1665 	 *	user program they can ask the device for its MTU anyway.
1666 	 */
1667 
1668 	copied = skb->len;
1669 	if (copied > len) {
1670 		copied = len;
1671 		msg->msg_flags |= MSG_TRUNC;
1672 	}
1673 
1674 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1675 	if (err)
1676 		goto out_free;
1677 
1678 	sock_recv_ts_and_drops(msg, sk, skb);
1679 
1680 	if (msg->msg_name)
1681 		memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1682 		       msg->msg_namelen);
1683 
1684 	if (pkt_sk(sk)->auxdata) {
1685 		struct tpacket_auxdata aux;
1686 
1687 		aux.tp_status = TP_STATUS_USER;
1688 		if (skb->ip_summed == CHECKSUM_PARTIAL)
1689 			aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1690 		aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1691 		aux.tp_snaplen = skb->len;
1692 		aux.tp_mac = 0;
1693 		aux.tp_net = skb_network_offset(skb);
1694 		aux.tp_vlan_tci = vlan_tx_tag_get(skb);
1695 
1696 		put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
1697 	}
1698 
1699 	/*
1700 	 *	Free or return the buffer as appropriate. Again this
1701 	 *	hides all the races and re-entrancy issues from us.
1702 	 */
1703 	err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
1704 
1705 out_free:
1706 	skb_free_datagram(sk, skb);
1707 out:
1708 	return err;
1709 }
1710 
1711 static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1712 			       int *uaddr_len, int peer)
1713 {
1714 	struct net_device *dev;
1715 	struct sock *sk	= sock->sk;
1716 
1717 	if (peer)
1718 		return -EOPNOTSUPP;
1719 
1720 	uaddr->sa_family = AF_PACKET;
1721 	rcu_read_lock();
1722 	dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
1723 	if (dev)
1724 		strncpy(uaddr->sa_data, dev->name, 14);
1725 	else
1726 		memset(uaddr->sa_data, 0, 14);
1727 	rcu_read_unlock();
1728 	*uaddr_len = sizeof(*uaddr);
1729 
1730 	return 0;
1731 }
1732 
1733 static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1734 			  int *uaddr_len, int peer)
1735 {
1736 	struct net_device *dev;
1737 	struct sock *sk = sock->sk;
1738 	struct packet_sock *po = pkt_sk(sk);
1739 	DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
1740 
1741 	if (peer)
1742 		return -EOPNOTSUPP;
1743 
1744 	sll->sll_family = AF_PACKET;
1745 	sll->sll_ifindex = po->ifindex;
1746 	sll->sll_protocol = po->num;
1747 	sll->sll_pkttype = 0;
1748 	rcu_read_lock();
1749 	dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
1750 	if (dev) {
1751 		sll->sll_hatype = dev->type;
1752 		sll->sll_halen = dev->addr_len;
1753 		memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1754 	} else {
1755 		sll->sll_hatype = 0;	/* Bad: we have no ARPHRD_UNSPEC */
1756 		sll->sll_halen = 0;
1757 	}
1758 	rcu_read_unlock();
1759 	*uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
1760 
1761 	return 0;
1762 }
1763 
1764 static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
1765 			 int what)
1766 {
1767 	switch (i->type) {
1768 	case PACKET_MR_MULTICAST:
1769 		if (i->alen != dev->addr_len)
1770 			return -EINVAL;
1771 		if (what > 0)
1772 			return dev_mc_add(dev, i->addr);
1773 		else
1774 			return dev_mc_del(dev, i->addr);
1775 		break;
1776 	case PACKET_MR_PROMISC:
1777 		return dev_set_promiscuity(dev, what);
1778 		break;
1779 	case PACKET_MR_ALLMULTI:
1780 		return dev_set_allmulti(dev, what);
1781 		break;
1782 	case PACKET_MR_UNICAST:
1783 		if (i->alen != dev->addr_len)
1784 			return -EINVAL;
1785 		if (what > 0)
1786 			return dev_uc_add(dev, i->addr);
1787 		else
1788 			return dev_uc_del(dev, i->addr);
1789 		break;
1790 	default:
1791 		break;
1792 	}
1793 	return 0;
1794 }
1795 
1796 static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1797 {
1798 	for ( ; i; i = i->next) {
1799 		if (i->ifindex == dev->ifindex)
1800 			packet_dev_mc(dev, i, what);
1801 	}
1802 }
1803 
1804 static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
1805 {
1806 	struct packet_sock *po = pkt_sk(sk);
1807 	struct packet_mclist *ml, *i;
1808 	struct net_device *dev;
1809 	int err;
1810 
1811 	rtnl_lock();
1812 
1813 	err = -ENODEV;
1814 	dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
1815 	if (!dev)
1816 		goto done;
1817 
1818 	err = -EINVAL;
1819 	if (mreq->mr_alen > dev->addr_len)
1820 		goto done;
1821 
1822 	err = -ENOBUFS;
1823 	i = kmalloc(sizeof(*i), GFP_KERNEL);
1824 	if (i == NULL)
1825 		goto done;
1826 
1827 	err = 0;
1828 	for (ml = po->mclist; ml; ml = ml->next) {
1829 		if (ml->ifindex == mreq->mr_ifindex &&
1830 		    ml->type == mreq->mr_type &&
1831 		    ml->alen == mreq->mr_alen &&
1832 		    memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1833 			ml->count++;
1834 			/* Free the new element ... */
1835 			kfree(i);
1836 			goto done;
1837 		}
1838 	}
1839 
1840 	i->type = mreq->mr_type;
1841 	i->ifindex = mreq->mr_ifindex;
1842 	i->alen = mreq->mr_alen;
1843 	memcpy(i->addr, mreq->mr_address, i->alen);
1844 	i->count = 1;
1845 	i->next = po->mclist;
1846 	po->mclist = i;
1847 	err = packet_dev_mc(dev, i, 1);
1848 	if (err) {
1849 		po->mclist = i->next;
1850 		kfree(i);
1851 	}
1852 
1853 done:
1854 	rtnl_unlock();
1855 	return err;
1856 }
1857 
1858 static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
1859 {
1860 	struct packet_mclist *ml, **mlp;
1861 
1862 	rtnl_lock();
1863 
1864 	for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1865 		if (ml->ifindex == mreq->mr_ifindex &&
1866 		    ml->type == mreq->mr_type &&
1867 		    ml->alen == mreq->mr_alen &&
1868 		    memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1869 			if (--ml->count == 0) {
1870 				struct net_device *dev;
1871 				*mlp = ml->next;
1872 				dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1873 				if (dev)
1874 					packet_dev_mc(dev, ml, -1);
1875 				kfree(ml);
1876 			}
1877 			rtnl_unlock();
1878 			return 0;
1879 		}
1880 	}
1881 	rtnl_unlock();
1882 	return -EADDRNOTAVAIL;
1883 }
1884 
1885 static void packet_flush_mclist(struct sock *sk)
1886 {
1887 	struct packet_sock *po = pkt_sk(sk);
1888 	struct packet_mclist *ml;
1889 
1890 	if (!po->mclist)
1891 		return;
1892 
1893 	rtnl_lock();
1894 	while ((ml = po->mclist) != NULL) {
1895 		struct net_device *dev;
1896 
1897 		po->mclist = ml->next;
1898 		dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1899 		if (dev != NULL)
1900 			packet_dev_mc(dev, ml, -1);
1901 		kfree(ml);
1902 	}
1903 	rtnl_unlock();
1904 }
1905 
1906 static int
1907 packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
1908 {
1909 	struct sock *sk = sock->sk;
1910 	struct packet_sock *po = pkt_sk(sk);
1911 	int ret;
1912 
1913 	if (level != SOL_PACKET)
1914 		return -ENOPROTOOPT;
1915 
1916 	switch (optname) {
1917 	case PACKET_ADD_MEMBERSHIP:
1918 	case PACKET_DROP_MEMBERSHIP:
1919 	{
1920 		struct packet_mreq_max mreq;
1921 		int len = optlen;
1922 		memset(&mreq, 0, sizeof(mreq));
1923 		if (len < sizeof(struct packet_mreq))
1924 			return -EINVAL;
1925 		if (len > sizeof(mreq))
1926 			len = sizeof(mreq);
1927 		if (copy_from_user(&mreq, optval, len))
1928 			return -EFAULT;
1929 		if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1930 			return -EINVAL;
1931 		if (optname == PACKET_ADD_MEMBERSHIP)
1932 			ret = packet_mc_add(sk, &mreq);
1933 		else
1934 			ret = packet_mc_drop(sk, &mreq);
1935 		return ret;
1936 	}
1937 
1938 	case PACKET_RX_RING:
1939 	case PACKET_TX_RING:
1940 	{
1941 		struct tpacket_req req;
1942 
1943 		if (optlen < sizeof(req))
1944 			return -EINVAL;
1945 		if (pkt_sk(sk)->has_vnet_hdr)
1946 			return -EINVAL;
1947 		if (copy_from_user(&req, optval, sizeof(req)))
1948 			return -EFAULT;
1949 		return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING);
1950 	}
1951 	case PACKET_COPY_THRESH:
1952 	{
1953 		int val;
1954 
1955 		if (optlen != sizeof(val))
1956 			return -EINVAL;
1957 		if (copy_from_user(&val, optval, sizeof(val)))
1958 			return -EFAULT;
1959 
1960 		pkt_sk(sk)->copy_thresh = val;
1961 		return 0;
1962 	}
1963 	case PACKET_VERSION:
1964 	{
1965 		int val;
1966 
1967 		if (optlen != sizeof(val))
1968 			return -EINVAL;
1969 		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
1970 			return -EBUSY;
1971 		if (copy_from_user(&val, optval, sizeof(val)))
1972 			return -EFAULT;
1973 		switch (val) {
1974 		case TPACKET_V1:
1975 		case TPACKET_V2:
1976 			po->tp_version = val;
1977 			return 0;
1978 		default:
1979 			return -EINVAL;
1980 		}
1981 	}
1982 	case PACKET_RESERVE:
1983 	{
1984 		unsigned int val;
1985 
1986 		if (optlen != sizeof(val))
1987 			return -EINVAL;
1988 		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
1989 			return -EBUSY;
1990 		if (copy_from_user(&val, optval, sizeof(val)))
1991 			return -EFAULT;
1992 		po->tp_reserve = val;
1993 		return 0;
1994 	}
1995 	case PACKET_LOSS:
1996 	{
1997 		unsigned int val;
1998 
1999 		if (optlen != sizeof(val))
2000 			return -EINVAL;
2001 		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
2002 			return -EBUSY;
2003 		if (copy_from_user(&val, optval, sizeof(val)))
2004 			return -EFAULT;
2005 		po->tp_loss = !!val;
2006 		return 0;
2007 	}
2008 	case PACKET_AUXDATA:
2009 	{
2010 		int val;
2011 
2012 		if (optlen < sizeof(val))
2013 			return -EINVAL;
2014 		if (copy_from_user(&val, optval, sizeof(val)))
2015 			return -EFAULT;
2016 
2017 		po->auxdata = !!val;
2018 		return 0;
2019 	}
2020 	case PACKET_ORIGDEV:
2021 	{
2022 		int val;
2023 
2024 		if (optlen < sizeof(val))
2025 			return -EINVAL;
2026 		if (copy_from_user(&val, optval, sizeof(val)))
2027 			return -EFAULT;
2028 
2029 		po->origdev = !!val;
2030 		return 0;
2031 	}
2032 	case PACKET_VNET_HDR:
2033 	{
2034 		int val;
2035 
2036 		if (sock->type != SOCK_RAW)
2037 			return -EINVAL;
2038 		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
2039 			return -EBUSY;
2040 		if (optlen < sizeof(val))
2041 			return -EINVAL;
2042 		if (copy_from_user(&val, optval, sizeof(val)))
2043 			return -EFAULT;
2044 
2045 		po->has_vnet_hdr = !!val;
2046 		return 0;
2047 	}
2048 	case PACKET_TIMESTAMP:
2049 	{
2050 		int val;
2051 
2052 		if (optlen != sizeof(val))
2053 			return -EINVAL;
2054 		if (copy_from_user(&val, optval, sizeof(val)))
2055 			return -EFAULT;
2056 
2057 		po->tp_tstamp = val;
2058 		return 0;
2059 	}
2060 	default:
2061 		return -ENOPROTOOPT;
2062 	}
2063 }
2064 
2065 static int packet_getsockopt(struct socket *sock, int level, int optname,
2066 			     char __user *optval, int __user *optlen)
2067 {
2068 	int len;
2069 	int val;
2070 	struct sock *sk = sock->sk;
2071 	struct packet_sock *po = pkt_sk(sk);
2072 	void *data;
2073 	struct tpacket_stats st;
2074 
2075 	if (level != SOL_PACKET)
2076 		return -ENOPROTOOPT;
2077 
2078 	if (get_user(len, optlen))
2079 		return -EFAULT;
2080 
2081 	if (len < 0)
2082 		return -EINVAL;
2083 
2084 	switch (optname) {
2085 	case PACKET_STATISTICS:
2086 		if (len > sizeof(struct tpacket_stats))
2087 			len = sizeof(struct tpacket_stats);
2088 		spin_lock_bh(&sk->sk_receive_queue.lock);
2089 		st = po->stats;
2090 		memset(&po->stats, 0, sizeof(st));
2091 		spin_unlock_bh(&sk->sk_receive_queue.lock);
2092 		st.tp_packets += st.tp_drops;
2093 
2094 		data = &st;
2095 		break;
2096 	case PACKET_AUXDATA:
2097 		if (len > sizeof(int))
2098 			len = sizeof(int);
2099 		val = po->auxdata;
2100 
2101 		data = &val;
2102 		break;
2103 	case PACKET_ORIGDEV:
2104 		if (len > sizeof(int))
2105 			len = sizeof(int);
2106 		val = po->origdev;
2107 
2108 		data = &val;
2109 		break;
2110 	case PACKET_VNET_HDR:
2111 		if (len > sizeof(int))
2112 			len = sizeof(int);
2113 		val = po->has_vnet_hdr;
2114 
2115 		data = &val;
2116 		break;
2117 	case PACKET_VERSION:
2118 		if (len > sizeof(int))
2119 			len = sizeof(int);
2120 		val = po->tp_version;
2121 		data = &val;
2122 		break;
2123 	case PACKET_HDRLEN:
2124 		if (len > sizeof(int))
2125 			len = sizeof(int);
2126 		if (copy_from_user(&val, optval, len))
2127 			return -EFAULT;
2128 		switch (val) {
2129 		case TPACKET_V1:
2130 			val = sizeof(struct tpacket_hdr);
2131 			break;
2132 		case TPACKET_V2:
2133 			val = sizeof(struct tpacket2_hdr);
2134 			break;
2135 		default:
2136 			return -EINVAL;
2137 		}
2138 		data = &val;
2139 		break;
2140 	case PACKET_RESERVE:
2141 		if (len > sizeof(unsigned int))
2142 			len = sizeof(unsigned int);
2143 		val = po->tp_reserve;
2144 		data = &val;
2145 		break;
2146 	case PACKET_LOSS:
2147 		if (len > sizeof(unsigned int))
2148 			len = sizeof(unsigned int);
2149 		val = po->tp_loss;
2150 		data = &val;
2151 		break;
2152 	case PACKET_TIMESTAMP:
2153 		if (len > sizeof(int))
2154 			len = sizeof(int);
2155 		val = po->tp_tstamp;
2156 		data = &val;
2157 		break;
2158 	default:
2159 		return -ENOPROTOOPT;
2160 	}
2161 
2162 	if (put_user(len, optlen))
2163 		return -EFAULT;
2164 	if (copy_to_user(optval, data, len))
2165 		return -EFAULT;
2166 	return 0;
2167 }
2168 
2169 
2170 static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
2171 {
2172 	struct sock *sk;
2173 	struct hlist_node *node;
2174 	struct net_device *dev = data;
2175 	struct net *net = dev_net(dev);
2176 
2177 	rcu_read_lock();
2178 	sk_for_each_rcu(sk, node, &net->packet.sklist) {
2179 		struct packet_sock *po = pkt_sk(sk);
2180 
2181 		switch (msg) {
2182 		case NETDEV_UNREGISTER:
2183 			if (po->mclist)
2184 				packet_dev_mclist(dev, po->mclist, -1);
2185 			/* fallthrough */
2186 
2187 		case NETDEV_DOWN:
2188 			if (dev->ifindex == po->ifindex) {
2189 				spin_lock(&po->bind_lock);
2190 				if (po->running) {
2191 					__dev_remove_pack(&po->prot_hook);
2192 					__sock_put(sk);
2193 					po->running = 0;
2194 					sk->sk_err = ENETDOWN;
2195 					if (!sock_flag(sk, SOCK_DEAD))
2196 						sk->sk_error_report(sk);
2197 				}
2198 				if (msg == NETDEV_UNREGISTER) {
2199 					po->ifindex = -1;
2200 					po->prot_hook.dev = NULL;
2201 				}
2202 				spin_unlock(&po->bind_lock);
2203 			}
2204 			break;
2205 		case NETDEV_UP:
2206 			if (dev->ifindex == po->ifindex) {
2207 				spin_lock(&po->bind_lock);
2208 				if (po->num && !po->running) {
2209 					dev_add_pack(&po->prot_hook);
2210 					sock_hold(sk);
2211 					po->running = 1;
2212 				}
2213 				spin_unlock(&po->bind_lock);
2214 			}
2215 			break;
2216 		}
2217 	}
2218 	rcu_read_unlock();
2219 	return NOTIFY_DONE;
2220 }
2221 
2222 
2223 static int packet_ioctl(struct socket *sock, unsigned int cmd,
2224 			unsigned long arg)
2225 {
2226 	struct sock *sk = sock->sk;
2227 
2228 	switch (cmd) {
2229 	case SIOCOUTQ:
2230 	{
2231 		int amount = sk_wmem_alloc_get(sk);
2232 
2233 		return put_user(amount, (int __user *)arg);
2234 	}
2235 	case SIOCINQ:
2236 	{
2237 		struct sk_buff *skb;
2238 		int amount = 0;
2239 
2240 		spin_lock_bh(&sk->sk_receive_queue.lock);
2241 		skb = skb_peek(&sk->sk_receive_queue);
2242 		if (skb)
2243 			amount = skb->len;
2244 		spin_unlock_bh(&sk->sk_receive_queue.lock);
2245 		return put_user(amount, (int __user *)arg);
2246 	}
2247 	case SIOCGSTAMP:
2248 		return sock_get_timestamp(sk, (struct timeval __user *)arg);
2249 	case SIOCGSTAMPNS:
2250 		return sock_get_timestampns(sk, (struct timespec __user *)arg);
2251 
2252 #ifdef CONFIG_INET
2253 	case SIOCADDRT:
2254 	case SIOCDELRT:
2255 	case SIOCDARP:
2256 	case SIOCGARP:
2257 	case SIOCSARP:
2258 	case SIOCGIFADDR:
2259 	case SIOCSIFADDR:
2260 	case SIOCGIFBRDADDR:
2261 	case SIOCSIFBRDADDR:
2262 	case SIOCGIFNETMASK:
2263 	case SIOCSIFNETMASK:
2264 	case SIOCGIFDSTADDR:
2265 	case SIOCSIFDSTADDR:
2266 	case SIOCSIFFLAGS:
2267 		return inet_dgram_ops.ioctl(sock, cmd, arg);
2268 #endif
2269 
2270 	default:
2271 		return -ENOIOCTLCMD;
2272 	}
2273 	return 0;
2274 }
2275 
2276 static unsigned int packet_poll(struct file *file, struct socket *sock,
2277 				poll_table *wait)
2278 {
2279 	struct sock *sk = sock->sk;
2280 	struct packet_sock *po = pkt_sk(sk);
2281 	unsigned int mask = datagram_poll(file, sock, wait);
2282 
2283 	spin_lock_bh(&sk->sk_receive_queue.lock);
2284 	if (po->rx_ring.pg_vec) {
2285 		if (!packet_previous_frame(po, &po->rx_ring, TP_STATUS_KERNEL))
2286 			mask |= POLLIN | POLLRDNORM;
2287 	}
2288 	spin_unlock_bh(&sk->sk_receive_queue.lock);
2289 	spin_lock_bh(&sk->sk_write_queue.lock);
2290 	if (po->tx_ring.pg_vec) {
2291 		if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
2292 			mask |= POLLOUT | POLLWRNORM;
2293 	}
2294 	spin_unlock_bh(&sk->sk_write_queue.lock);
2295 	return mask;
2296 }
2297 
2298 
2299 /* Dirty? Well, I still did not learn better way to account
2300  * for user mmaps.
2301  */
2302 
2303 static void packet_mm_open(struct vm_area_struct *vma)
2304 {
2305 	struct file *file = vma->vm_file;
2306 	struct socket *sock = file->private_data;
2307 	struct sock *sk = sock->sk;
2308 
2309 	if (sk)
2310 		atomic_inc(&pkt_sk(sk)->mapped);
2311 }
2312 
2313 static void packet_mm_close(struct vm_area_struct *vma)
2314 {
2315 	struct file *file = vma->vm_file;
2316 	struct socket *sock = file->private_data;
2317 	struct sock *sk = sock->sk;
2318 
2319 	if (sk)
2320 		atomic_dec(&pkt_sk(sk)->mapped);
2321 }
2322 
2323 static const struct vm_operations_struct packet_mmap_ops = {
2324 	.open	=	packet_mm_open,
2325 	.close	=	packet_mm_close,
2326 };
2327 
2328 static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
2329 {
2330 	int i;
2331 
2332 	for (i = 0; i < len; i++) {
2333 		if (likely(pg_vec[i]))
2334 			free_pages((unsigned long) pg_vec[i], order);
2335 	}
2336 	kfree(pg_vec);
2337 }
2338 
2339 static inline char *alloc_one_pg_vec_page(unsigned long order)
2340 {
2341 	gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN;
2342 
2343 	return (char *) __get_free_pages(gfp_flags, order);
2344 }
2345 
2346 static char **alloc_pg_vec(struct tpacket_req *req, int order)
2347 {
2348 	unsigned int block_nr = req->tp_block_nr;
2349 	char **pg_vec;
2350 	int i;
2351 
2352 	pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
2353 	if (unlikely(!pg_vec))
2354 		goto out;
2355 
2356 	for (i = 0; i < block_nr; i++) {
2357 		pg_vec[i] = alloc_one_pg_vec_page(order);
2358 		if (unlikely(!pg_vec[i]))
2359 			goto out_free_pgvec;
2360 	}
2361 
2362 out:
2363 	return pg_vec;
2364 
2365 out_free_pgvec:
2366 	free_pg_vec(pg_vec, order, block_nr);
2367 	pg_vec = NULL;
2368 	goto out;
2369 }
2370 
2371 static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
2372 		int closing, int tx_ring)
2373 {
2374 	char **pg_vec = NULL;
2375 	struct packet_sock *po = pkt_sk(sk);
2376 	int was_running, order = 0;
2377 	struct packet_ring_buffer *rb;
2378 	struct sk_buff_head *rb_queue;
2379 	__be16 num;
2380 	int err;
2381 
2382 	rb = tx_ring ? &po->tx_ring : &po->rx_ring;
2383 	rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
2384 
2385 	err = -EBUSY;
2386 	if (!closing) {
2387 		if (atomic_read(&po->mapped))
2388 			goto out;
2389 		if (atomic_read(&rb->pending))
2390 			goto out;
2391 	}
2392 
2393 	if (req->tp_block_nr) {
2394 		/* Sanity tests and some calculations */
2395 		err = -EBUSY;
2396 		if (unlikely(rb->pg_vec))
2397 			goto out;
2398 
2399 		switch (po->tp_version) {
2400 		case TPACKET_V1:
2401 			po->tp_hdrlen = TPACKET_HDRLEN;
2402 			break;
2403 		case TPACKET_V2:
2404 			po->tp_hdrlen = TPACKET2_HDRLEN;
2405 			break;
2406 		}
2407 
2408 		err = -EINVAL;
2409 		if (unlikely((int)req->tp_block_size <= 0))
2410 			goto out;
2411 		if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
2412 			goto out;
2413 		if (unlikely(req->tp_frame_size < po->tp_hdrlen +
2414 					po->tp_reserve))
2415 			goto out;
2416 		if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
2417 			goto out;
2418 
2419 		rb->frames_per_block = req->tp_block_size/req->tp_frame_size;
2420 		if (unlikely(rb->frames_per_block <= 0))
2421 			goto out;
2422 		if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
2423 					req->tp_frame_nr))
2424 			goto out;
2425 
2426 		err = -ENOMEM;
2427 		order = get_order(req->tp_block_size);
2428 		pg_vec = alloc_pg_vec(req, order);
2429 		if (unlikely(!pg_vec))
2430 			goto out;
2431 	}
2432 	/* Done */
2433 	else {
2434 		err = -EINVAL;
2435 		if (unlikely(req->tp_frame_nr))
2436 			goto out;
2437 	}
2438 
2439 	lock_sock(sk);
2440 
2441 	/* Detach socket from network */
2442 	spin_lock(&po->bind_lock);
2443 	was_running = po->running;
2444 	num = po->num;
2445 	if (was_running) {
2446 		__dev_remove_pack(&po->prot_hook);
2447 		po->num = 0;
2448 		po->running = 0;
2449 		__sock_put(sk);
2450 	}
2451 	spin_unlock(&po->bind_lock);
2452 
2453 	synchronize_net();
2454 
2455 	err = -EBUSY;
2456 	mutex_lock(&po->pg_vec_lock);
2457 	if (closing || atomic_read(&po->mapped) == 0) {
2458 		err = 0;
2459 #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
2460 		spin_lock_bh(&rb_queue->lock);
2461 		pg_vec = XC(rb->pg_vec, pg_vec);
2462 		rb->frame_max = (req->tp_frame_nr - 1);
2463 		rb->head = 0;
2464 		rb->frame_size = req->tp_frame_size;
2465 		spin_unlock_bh(&rb_queue->lock);
2466 
2467 		order = XC(rb->pg_vec_order, order);
2468 		req->tp_block_nr = XC(rb->pg_vec_len, req->tp_block_nr);
2469 
2470 		rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
2471 		po->prot_hook.func = (po->rx_ring.pg_vec) ?
2472 						tpacket_rcv : packet_rcv;
2473 		skb_queue_purge(rb_queue);
2474 #undef XC
2475 		if (atomic_read(&po->mapped))
2476 			pr_err("packet_mmap: vma is busy: %d\n",
2477 			       atomic_read(&po->mapped));
2478 	}
2479 	mutex_unlock(&po->pg_vec_lock);
2480 
2481 	spin_lock(&po->bind_lock);
2482 	if (was_running && !po->running) {
2483 		sock_hold(sk);
2484 		po->running = 1;
2485 		po->num = num;
2486 		dev_add_pack(&po->prot_hook);
2487 	}
2488 	spin_unlock(&po->bind_lock);
2489 
2490 	release_sock(sk);
2491 
2492 	if (pg_vec)
2493 		free_pg_vec(pg_vec, order, req->tp_block_nr);
2494 out:
2495 	return err;
2496 }
2497 
2498 static int packet_mmap(struct file *file, struct socket *sock,
2499 		struct vm_area_struct *vma)
2500 {
2501 	struct sock *sk = sock->sk;
2502 	struct packet_sock *po = pkt_sk(sk);
2503 	unsigned long size, expected_size;
2504 	struct packet_ring_buffer *rb;
2505 	unsigned long start;
2506 	int err = -EINVAL;
2507 	int i;
2508 
2509 	if (vma->vm_pgoff)
2510 		return -EINVAL;
2511 
2512 	mutex_lock(&po->pg_vec_lock);
2513 
2514 	expected_size = 0;
2515 	for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2516 		if (rb->pg_vec) {
2517 			expected_size += rb->pg_vec_len
2518 						* rb->pg_vec_pages
2519 						* PAGE_SIZE;
2520 		}
2521 	}
2522 
2523 	if (expected_size == 0)
2524 		goto out;
2525 
2526 	size = vma->vm_end - vma->vm_start;
2527 	if (size != expected_size)
2528 		goto out;
2529 
2530 	start = vma->vm_start;
2531 	for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2532 		if (rb->pg_vec == NULL)
2533 			continue;
2534 
2535 		for (i = 0; i < rb->pg_vec_len; i++) {
2536 			struct page *page = virt_to_page(rb->pg_vec[i]);
2537 			int pg_num;
2538 
2539 			for (pg_num = 0; pg_num < rb->pg_vec_pages;
2540 					pg_num++, page++) {
2541 				err = vm_insert_page(vma, start, page);
2542 				if (unlikely(err))
2543 					goto out;
2544 				start += PAGE_SIZE;
2545 			}
2546 		}
2547 	}
2548 
2549 	atomic_inc(&po->mapped);
2550 	vma->vm_ops = &packet_mmap_ops;
2551 	err = 0;
2552 
2553 out:
2554 	mutex_unlock(&po->pg_vec_lock);
2555 	return err;
2556 }
2557 
2558 static const struct proto_ops packet_ops_spkt = {
2559 	.family =	PF_PACKET,
2560 	.owner =	THIS_MODULE,
2561 	.release =	packet_release,
2562 	.bind =		packet_bind_spkt,
2563 	.connect =	sock_no_connect,
2564 	.socketpair =	sock_no_socketpair,
2565 	.accept =	sock_no_accept,
2566 	.getname =	packet_getname_spkt,
2567 	.poll =		datagram_poll,
2568 	.ioctl =	packet_ioctl,
2569 	.listen =	sock_no_listen,
2570 	.shutdown =	sock_no_shutdown,
2571 	.setsockopt =	sock_no_setsockopt,
2572 	.getsockopt =	sock_no_getsockopt,
2573 	.sendmsg =	packet_sendmsg_spkt,
2574 	.recvmsg =	packet_recvmsg,
2575 	.mmap =		sock_no_mmap,
2576 	.sendpage =	sock_no_sendpage,
2577 };
2578 
2579 static const struct proto_ops packet_ops = {
2580 	.family =	PF_PACKET,
2581 	.owner =	THIS_MODULE,
2582 	.release =	packet_release,
2583 	.bind =		packet_bind,
2584 	.connect =	sock_no_connect,
2585 	.socketpair =	sock_no_socketpair,
2586 	.accept =	sock_no_accept,
2587 	.getname =	packet_getname,
2588 	.poll =		packet_poll,
2589 	.ioctl =	packet_ioctl,
2590 	.listen =	sock_no_listen,
2591 	.shutdown =	sock_no_shutdown,
2592 	.setsockopt =	packet_setsockopt,
2593 	.getsockopt =	packet_getsockopt,
2594 	.sendmsg =	packet_sendmsg,
2595 	.recvmsg =	packet_recvmsg,
2596 	.mmap =		packet_mmap,
2597 	.sendpage =	sock_no_sendpage,
2598 };
2599 
2600 static const struct net_proto_family packet_family_ops = {
2601 	.family =	PF_PACKET,
2602 	.create =	packet_create,
2603 	.owner	=	THIS_MODULE,
2604 };
2605 
2606 static struct notifier_block packet_netdev_notifier = {
2607 	.notifier_call =	packet_notifier,
2608 };
2609 
2610 #ifdef CONFIG_PROC_FS
2611 
2612 static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
2613 	__acquires(RCU)
2614 {
2615 	struct net *net = seq_file_net(seq);
2616 
2617 	rcu_read_lock();
2618 	return seq_hlist_start_head_rcu(&net->packet.sklist, *pos);
2619 }
2620 
2621 static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2622 {
2623 	struct net *net = seq_file_net(seq);
2624 	return seq_hlist_next_rcu(v, &net->packet.sklist, pos);
2625 }
2626 
2627 static void packet_seq_stop(struct seq_file *seq, void *v)
2628 	__releases(RCU)
2629 {
2630 	rcu_read_unlock();
2631 }
2632 
2633 static int packet_seq_show(struct seq_file *seq, void *v)
2634 {
2635 	if (v == SEQ_START_TOKEN)
2636 		seq_puts(seq, "sk       RefCnt Type Proto  Iface R Rmem   User   Inode\n");
2637 	else {
2638 		struct sock *s = sk_entry(v);
2639 		const struct packet_sock *po = pkt_sk(s);
2640 
2641 		seq_printf(seq,
2642 			   "%p %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu\n",
2643 			   s,
2644 			   atomic_read(&s->sk_refcnt),
2645 			   s->sk_type,
2646 			   ntohs(po->num),
2647 			   po->ifindex,
2648 			   po->running,
2649 			   atomic_read(&s->sk_rmem_alloc),
2650 			   sock_i_uid(s),
2651 			   sock_i_ino(s));
2652 	}
2653 
2654 	return 0;
2655 }
2656 
2657 static const struct seq_operations packet_seq_ops = {
2658 	.start	= packet_seq_start,
2659 	.next	= packet_seq_next,
2660 	.stop	= packet_seq_stop,
2661 	.show	= packet_seq_show,
2662 };
2663 
2664 static int packet_seq_open(struct inode *inode, struct file *file)
2665 {
2666 	return seq_open_net(inode, file, &packet_seq_ops,
2667 			    sizeof(struct seq_net_private));
2668 }
2669 
2670 static const struct file_operations packet_seq_fops = {
2671 	.owner		= THIS_MODULE,
2672 	.open		= packet_seq_open,
2673 	.read		= seq_read,
2674 	.llseek		= seq_lseek,
2675 	.release	= seq_release_net,
2676 };
2677 
2678 #endif
2679 
2680 static int __net_init packet_net_init(struct net *net)
2681 {
2682 	spin_lock_init(&net->packet.sklist_lock);
2683 	INIT_HLIST_HEAD(&net->packet.sklist);
2684 
2685 	if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
2686 		return -ENOMEM;
2687 
2688 	return 0;
2689 }
2690 
2691 static void __net_exit packet_net_exit(struct net *net)
2692 {
2693 	proc_net_remove(net, "packet");
2694 }
2695 
2696 static struct pernet_operations packet_net_ops = {
2697 	.init = packet_net_init,
2698 	.exit = packet_net_exit,
2699 };
2700 
2701 
2702 static void __exit packet_exit(void)
2703 {
2704 	unregister_netdevice_notifier(&packet_netdev_notifier);
2705 	unregister_pernet_subsys(&packet_net_ops);
2706 	sock_unregister(PF_PACKET);
2707 	proto_unregister(&packet_proto);
2708 }
2709 
2710 static int __init packet_init(void)
2711 {
2712 	int rc = proto_register(&packet_proto, 0);
2713 
2714 	if (rc != 0)
2715 		goto out;
2716 
2717 	sock_register(&packet_family_ops);
2718 	register_pernet_subsys(&packet_net_ops);
2719 	register_netdevice_notifier(&packet_netdev_notifier);
2720 out:
2721 	return rc;
2722 }
2723 
2724 module_init(packet_init);
2725 module_exit(packet_exit);
2726 MODULE_LICENSE("GPL");
2727 MODULE_ALIAS_NETPROTO(PF_PACKET);
2728