xref: /linux/net/packet/af_packet.c (revision 13abf8130139c2ccd4962a7e5a8902be5e6cb5a7)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		PACKET - implements raw packet sockets.
7  *
8  * Version:	$Id: af_packet.c,v 1.61 2002/02/08 03:57:19 davem Exp $
9  *
10  * Authors:	Ross Biro
11  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
13  *
14  * Fixes:
15  *		Alan Cox	:	verify_area() now used correctly
16  *		Alan Cox	:	new skbuff lists, look ma no backlogs!
17  *		Alan Cox	:	tidied skbuff lists.
18  *		Alan Cox	:	Now uses generic datagram routines I
19  *					added. Also fixed the peek/read crash
20  *					from all old Linux datagram code.
21  *		Alan Cox	:	Uses the improved datagram code.
22  *		Alan Cox	:	Added NULL's for socket options.
23  *		Alan Cox	:	Re-commented the code.
24  *		Alan Cox	:	Use new kernel side addressing
25  *		Rob Janssen	:	Correct MTU usage.
26  *		Dave Platt	:	Counter leaks caused by incorrect
27  *					interrupt locking and some slightly
28  *					dubious gcc output. Can you read
29  *					compiler: it said _VOLATILE_
30  *	Richard Kooijman	:	Timestamp fixes.
31  *		Alan Cox	:	New buffers. Use sk->mac.raw.
32  *		Alan Cox	:	sendmsg/recvmsg support.
33  *		Alan Cox	:	Protocol setting support
34  *	Alexey Kuznetsov	:	Untied from IPv4 stack.
35  *	Cyrus Durgin		:	Fixed kerneld for kmod.
36  *	Michal Ostrowski        :       Module initialization cleanup.
37  *         Ulises Alonso        :       Frame number limit removal and
38  *                                      packet_set_ring memory leak.
39  *
40  *		This program is free software; you can redistribute it and/or
41  *		modify it under the terms of the GNU General Public License
42  *		as published by the Free Software Foundation; either version
43  *		2 of the License, or (at your option) any later version.
44  *
45  */
46 
47 #include <linux/config.h>
48 #include <linux/types.h>
49 #include <linux/sched.h>
50 #include <linux/mm.h>
51 #include <linux/fcntl.h>
52 #include <linux/socket.h>
53 #include <linux/in.h>
54 #include <linux/inet.h>
55 #include <linux/netdevice.h>
56 #include <linux/if_packet.h>
57 #include <linux/wireless.h>
58 #include <linux/kmod.h>
59 #include <net/ip.h>
60 #include <net/protocol.h>
61 #include <linux/skbuff.h>
62 #include <net/sock.h>
63 #include <linux/errno.h>
64 #include <linux/timer.h>
65 #include <asm/system.h>
66 #include <asm/uaccess.h>
67 #include <asm/ioctls.h>
68 #include <asm/page.h>
69 #include <asm/io.h>
70 #include <linux/proc_fs.h>
71 #include <linux/seq_file.h>
72 #include <linux/poll.h>
73 #include <linux/module.h>
74 #include <linux/init.h>
75 
76 #ifdef CONFIG_INET
77 #include <net/inet_common.h>
78 #endif
79 
80 #define CONFIG_SOCK_PACKET	1
81 
82 /*
83    Proposed replacement for SIOC{ADD,DEL}MULTI and
84    IFF_PROMISC, IFF_ALLMULTI flags.
85 
86    It is more expensive, but I believe,
87    it is really correct solution: reentereble, safe and fault tolerant.
88 
89    IFF_PROMISC/IFF_ALLMULTI/SIOC{ADD/DEL}MULTI are faked by keeping
90    reference count and global flag, so that real status is
91    (gflag|(count != 0)), so that we can use obsolete faulty interface
92    not harming clever users.
93  */
94 #define CONFIG_PACKET_MULTICAST	1
95 
96 /*
97    Assumptions:
98    - if device has no dev->hard_header routine, it adds and removes ll header
99      inside itself. In this case ll header is invisible outside of device,
100      but higher levels still should reserve dev->hard_header_len.
101      Some devices are enough clever to reallocate skb, when header
102      will not fit to reserved space (tunnel), another ones are silly
103      (PPP).
104    - packet socket receives packets with pulled ll header,
105      so that SOCK_RAW should push it back.
106 
107 On receive:
108 -----------
109 
110 Incoming, dev->hard_header!=NULL
111    mac.raw -> ll header
112    data    -> data
113 
114 Outgoing, dev->hard_header!=NULL
115    mac.raw -> ll header
116    data    -> ll header
117 
118 Incoming, dev->hard_header==NULL
119    mac.raw -> UNKNOWN position. It is very likely, that it points to ll header.
120               PPP makes it, that is wrong, because introduce assymetry
121 	      between rx and tx paths.
122    data    -> data
123 
124 Outgoing, dev->hard_header==NULL
125    mac.raw -> data. ll header is still not built!
126    data    -> data
127 
128 Resume
129   If dev->hard_header==NULL we are unlikely to restore sensible ll header.
130 
131 
132 On transmit:
133 ------------
134 
135 dev->hard_header != NULL
136    mac.raw -> ll header
137    data    -> ll header
138 
139 dev->hard_header == NULL (ll header is added by device, we cannot control it)
140    mac.raw -> data
141    data -> data
142 
143    We should set nh.raw on output to correct posistion,
144    packet classifier depends on it.
145  */
146 
147 /* List of all packet sockets. */
148 static HLIST_HEAD(packet_sklist);
149 static DEFINE_RWLOCK(packet_sklist_lock);
150 
151 static atomic_t packet_socks_nr;
152 
153 
154 /* Private packet socket structures. */
155 
156 #ifdef CONFIG_PACKET_MULTICAST
157 struct packet_mclist
158 {
159 	struct packet_mclist	*next;
160 	int			ifindex;
161 	int			count;
162 	unsigned short		type;
163 	unsigned short		alen;
164 	unsigned char		addr[8];
165 };
166 #endif
167 #ifdef CONFIG_PACKET_MMAP
168 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
169 #endif
170 
171 static void packet_flush_mclist(struct sock *sk);
172 
173 struct packet_sock {
174 	/* struct sock has to be the first member of packet_sock */
175 	struct sock		sk;
176 	struct tpacket_stats	stats;
177 #ifdef CONFIG_PACKET_MMAP
178 	char *			*pg_vec;
179 	unsigned int		head;
180 	unsigned int            frames_per_block;
181 	unsigned int		frame_size;
182 	unsigned int		frame_max;
183 	int			copy_thresh;
184 #endif
185 	struct packet_type	prot_hook;
186 	spinlock_t		bind_lock;
187 	char			running;	/* prot_hook is attached*/
188 	int			ifindex;	/* bound device		*/
189 	unsigned short		num;
190 #ifdef CONFIG_PACKET_MULTICAST
191 	struct packet_mclist	*mclist;
192 #endif
193 #ifdef CONFIG_PACKET_MMAP
194 	atomic_t		mapped;
195 	unsigned int            pg_vec_order;
196 	unsigned int		pg_vec_pages;
197 	unsigned int		pg_vec_len;
198 #endif
199 };
200 
201 #ifdef CONFIG_PACKET_MMAP
202 
203 static inline char *packet_lookup_frame(struct packet_sock *po, unsigned int position)
204 {
205 	unsigned int pg_vec_pos, frame_offset;
206 	char *frame;
207 
208 	pg_vec_pos = position / po->frames_per_block;
209 	frame_offset = position % po->frames_per_block;
210 
211 	frame = po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size);
212 
213 	return frame;
214 }
215 #endif
216 
217 static inline struct packet_sock *pkt_sk(struct sock *sk)
218 {
219 	return (struct packet_sock *)sk;
220 }
221 
222 static void packet_sock_destruct(struct sock *sk)
223 {
224 	BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
225 	BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
226 
227 	if (!sock_flag(sk, SOCK_DEAD)) {
228 		printk("Attempt to release alive packet socket: %p\n", sk);
229 		return;
230 	}
231 
232 	atomic_dec(&packet_socks_nr);
233 #ifdef PACKET_REFCNT_DEBUG
234 	printk(KERN_DEBUG "PACKET socket %p is free, %d are alive\n", sk, atomic_read(&packet_socks_nr));
235 #endif
236 }
237 
238 
239 static struct proto_ops packet_ops;
240 
241 #ifdef CONFIG_SOCK_PACKET
242 static struct proto_ops packet_ops_spkt;
243 
244 static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt, struct net_device *orig_dev)
245 {
246 	struct sock *sk;
247 	struct sockaddr_pkt *spkt;
248 
249 	/*
250 	 *	When we registered the protocol we saved the socket in the data
251 	 *	field for just this event.
252 	 */
253 
254 	sk = pt->af_packet_priv;
255 
256 	/*
257 	 *	Yank back the headers [hope the device set this
258 	 *	right or kerboom...]
259 	 *
260 	 *	Incoming packets have ll header pulled,
261 	 *	push it back.
262 	 *
263 	 *	For outgoing ones skb->data == skb->mac.raw
264 	 *	so that this procedure is noop.
265 	 */
266 
267 	if (skb->pkt_type == PACKET_LOOPBACK)
268 		goto out;
269 
270 	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
271 		goto oom;
272 
273 	/* drop any routing info */
274 	dst_release(skb->dst);
275 	skb->dst = NULL;
276 
277 	/* drop conntrack reference */
278 	nf_reset(skb);
279 
280 	spkt = (struct sockaddr_pkt*)skb->cb;
281 
282 	skb_push(skb, skb->data-skb->mac.raw);
283 
284 	/*
285 	 *	The SOCK_PACKET socket receives _all_ frames.
286 	 */
287 
288 	spkt->spkt_family = dev->type;
289 	strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
290 	spkt->spkt_protocol = skb->protocol;
291 
292 	/*
293 	 *	Charge the memory to the socket. This is done specifically
294 	 *	to prevent sockets using all the memory up.
295 	 */
296 
297 	if (sock_queue_rcv_skb(sk,skb) == 0)
298 		return 0;
299 
300 out:
301 	kfree_skb(skb);
302 oom:
303 	return 0;
304 }
305 
306 
307 /*
308  *	Output a raw packet to a device layer. This bypasses all the other
309  *	protocol layers and you must therefore supply it with a complete frame
310  */
311 
312 static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
313 			       struct msghdr *msg, size_t len)
314 {
315 	struct sock *sk = sock->sk;
316 	struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
317 	struct sk_buff *skb;
318 	struct net_device *dev;
319 	unsigned short proto=0;
320 	int err;
321 
322 	/*
323 	 *	Get and verify the address.
324 	 */
325 
326 	if (saddr)
327 	{
328 		if (msg->msg_namelen < sizeof(struct sockaddr))
329 			return(-EINVAL);
330 		if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
331 			proto=saddr->spkt_protocol;
332 	}
333 	else
334 		return(-ENOTCONN);	/* SOCK_PACKET must be sent giving an address */
335 
336 	/*
337 	 *	Find the device first to size check it
338 	 */
339 
340 	saddr->spkt_device[13] = 0;
341 	dev = dev_get_by_name(saddr->spkt_device);
342 	err = -ENODEV;
343 	if (dev == NULL)
344 		goto out_unlock;
345 
346 	/*
347 	 *	You may not queue a frame bigger than the mtu. This is the lowest level
348 	 *	raw protocol and you must do your own fragmentation at this level.
349 	 */
350 
351 	err = -EMSGSIZE;
352  	if(len>dev->mtu+dev->hard_header_len)
353 		goto out_unlock;
354 
355 	err = -ENOBUFS;
356 	skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
357 
358 	/*
359 	 *	If the write buffer is full, then tough. At this level the user gets to
360 	 *	deal with the problem - do your own algorithmic backoffs. That's far
361 	 *	more flexible.
362 	 */
363 
364 	if (skb == NULL)
365 		goto out_unlock;
366 
367 	/*
368 	 *	Fill it in
369 	 */
370 
371 	/* FIXME: Save some space for broken drivers that write a
372 	 * hard header at transmission time by themselves. PPP is the
373 	 * notable one here. This should really be fixed at the driver level.
374 	 */
375 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
376 	skb->nh.raw = skb->data;
377 
378 	/* Try to align data part correctly */
379 	if (dev->hard_header) {
380 		skb->data -= dev->hard_header_len;
381 		skb->tail -= dev->hard_header_len;
382 		if (len < dev->hard_header_len)
383 			skb->nh.raw = skb->data;
384 	}
385 
386 	/* Returns -EFAULT on error */
387 	err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
388 	skb->protocol = proto;
389 	skb->dev = dev;
390 	skb->priority = sk->sk_priority;
391 	if (err)
392 		goto out_free;
393 
394 	err = -ENETDOWN;
395 	if (!(dev->flags & IFF_UP))
396 		goto out_free;
397 
398 	/*
399 	 *	Now send it
400 	 */
401 
402 	dev_queue_xmit(skb);
403 	dev_put(dev);
404 	return(len);
405 
406 out_free:
407 	kfree_skb(skb);
408 out_unlock:
409 	if (dev)
410 		dev_put(dev);
411 	return err;
412 }
413 #endif
414 
415 static inline unsigned run_filter(struct sk_buff *skb, struct sock *sk, unsigned res)
416 {
417 	struct sk_filter *filter;
418 
419 	bh_lock_sock(sk);
420 	filter = sk->sk_filter;
421 	/*
422 	 * Our caller already checked that filter != NULL but we need to
423 	 * verify that under bh_lock_sock() to be safe
424 	 */
425 	if (likely(filter != NULL))
426 		res = sk_run_filter(skb, filter->insns, filter->len);
427 	bh_unlock_sock(sk);
428 
429 	return res;
430 }
431 
432 /*
433    This function makes lazy skb cloning in hope that most of packets
434    are discarded by BPF.
435 
436    Note tricky part: we DO mangle shared skb! skb->data, skb->len
437    and skb->cb are mangled. It works because (and until) packets
438    falling here are owned by current CPU. Output packets are cloned
439    by dev_queue_xmit_nit(), input packets are processed by net_bh
440    sequencially, so that if we return skb to original state on exit,
441    we will not harm anyone.
442  */
443 
444 static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
445 {
446 	struct sock *sk;
447 	struct sockaddr_ll *sll;
448 	struct packet_sock *po;
449 	u8 * skb_head = skb->data;
450 	int skb_len = skb->len;
451 	unsigned snaplen;
452 
453 	if (skb->pkt_type == PACKET_LOOPBACK)
454 		goto drop;
455 
456 	sk = pt->af_packet_priv;
457 	po = pkt_sk(sk);
458 
459 	skb->dev = dev;
460 
461 	if (dev->hard_header) {
462 		/* The device has an explicit notion of ll header,
463 		   exported to higher levels.
464 
465 		   Otherwise, the device hides datails of it frame
466 		   structure, so that corresponding packet head
467 		   never delivered to user.
468 		 */
469 		if (sk->sk_type != SOCK_DGRAM)
470 			skb_push(skb, skb->data - skb->mac.raw);
471 		else if (skb->pkt_type == PACKET_OUTGOING) {
472 			/* Special case: outgoing packets have ll header at head */
473 			skb_pull(skb, skb->nh.raw - skb->data);
474 		}
475 	}
476 
477 	snaplen = skb->len;
478 
479 	if (sk->sk_filter) {
480 		unsigned res = run_filter(skb, sk, snaplen);
481 		if (res == 0)
482 			goto drop_n_restore;
483 		if (snaplen > res)
484 			snaplen = res;
485 	}
486 
487 	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
488 	    (unsigned)sk->sk_rcvbuf)
489 		goto drop_n_acct;
490 
491 	if (skb_shared(skb)) {
492 		struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
493 		if (nskb == NULL)
494 			goto drop_n_acct;
495 
496 		if (skb_head != skb->data) {
497 			skb->data = skb_head;
498 			skb->len = skb_len;
499 		}
500 		kfree_skb(skb);
501 		skb = nskb;
502 	}
503 
504 	sll = (struct sockaddr_ll*)skb->cb;
505 	sll->sll_family = AF_PACKET;
506 	sll->sll_hatype = dev->type;
507 	sll->sll_protocol = skb->protocol;
508 	sll->sll_pkttype = skb->pkt_type;
509 	sll->sll_ifindex = dev->ifindex;
510 	sll->sll_halen = 0;
511 
512 	if (dev->hard_header_parse)
513 		sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
514 
515 	if (pskb_trim(skb, snaplen))
516 		goto drop_n_acct;
517 
518 	skb_set_owner_r(skb, sk);
519 	skb->dev = NULL;
520 	dst_release(skb->dst);
521 	skb->dst = NULL;
522 
523 	/* drop conntrack reference */
524 	nf_reset(skb);
525 
526 	spin_lock(&sk->sk_receive_queue.lock);
527 	po->stats.tp_packets++;
528 	__skb_queue_tail(&sk->sk_receive_queue, skb);
529 	spin_unlock(&sk->sk_receive_queue.lock);
530 	sk->sk_data_ready(sk, skb->len);
531 	return 0;
532 
533 drop_n_acct:
534 	spin_lock(&sk->sk_receive_queue.lock);
535 	po->stats.tp_drops++;
536 	spin_unlock(&sk->sk_receive_queue.lock);
537 
538 drop_n_restore:
539 	if (skb_head != skb->data && skb_shared(skb)) {
540 		skb->data = skb_head;
541 		skb->len = skb_len;
542 	}
543 drop:
544 	kfree_skb(skb);
545 	return 0;
546 }
547 
548 #ifdef CONFIG_PACKET_MMAP
549 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
550 {
551 	struct sock *sk;
552 	struct packet_sock *po;
553 	struct sockaddr_ll *sll;
554 	struct tpacket_hdr *h;
555 	u8 * skb_head = skb->data;
556 	int skb_len = skb->len;
557 	unsigned snaplen;
558 	unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
559 	unsigned short macoff, netoff;
560 	struct sk_buff *copy_skb = NULL;
561 
562 	if (skb->pkt_type == PACKET_LOOPBACK)
563 		goto drop;
564 
565 	sk = pt->af_packet_priv;
566 	po = pkt_sk(sk);
567 
568 	if (dev->hard_header) {
569 		if (sk->sk_type != SOCK_DGRAM)
570 			skb_push(skb, skb->data - skb->mac.raw);
571 		else if (skb->pkt_type == PACKET_OUTGOING) {
572 			/* Special case: outgoing packets have ll header at head */
573 			skb_pull(skb, skb->nh.raw - skb->data);
574 			if (skb->ip_summed == CHECKSUM_HW)
575 				status |= TP_STATUS_CSUMNOTREADY;
576 		}
577 	}
578 
579 	snaplen = skb->len;
580 
581 	if (sk->sk_filter) {
582 		unsigned res = run_filter(skb, sk, snaplen);
583 		if (res == 0)
584 			goto drop_n_restore;
585 		if (snaplen > res)
586 			snaplen = res;
587 	}
588 
589 	if (sk->sk_type == SOCK_DGRAM) {
590 		macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
591 	} else {
592 		unsigned maclen = skb->nh.raw - skb->data;
593 		netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
594 		macoff = netoff - maclen;
595 	}
596 
597 	if (macoff + snaplen > po->frame_size) {
598 		if (po->copy_thresh &&
599 		    atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
600 		    (unsigned)sk->sk_rcvbuf) {
601 			if (skb_shared(skb)) {
602 				copy_skb = skb_clone(skb, GFP_ATOMIC);
603 			} else {
604 				copy_skb = skb_get(skb);
605 				skb_head = skb->data;
606 			}
607 			if (copy_skb)
608 				skb_set_owner_r(copy_skb, sk);
609 		}
610 		snaplen = po->frame_size - macoff;
611 		if ((int)snaplen < 0)
612 			snaplen = 0;
613 	}
614 	if (snaplen > skb->len-skb->data_len)
615 		snaplen = skb->len-skb->data_len;
616 
617 	spin_lock(&sk->sk_receive_queue.lock);
618 	h = (struct tpacket_hdr *)packet_lookup_frame(po, po->head);
619 
620 	if (h->tp_status)
621 		goto ring_is_full;
622 	po->head = po->head != po->frame_max ? po->head+1 : 0;
623 	po->stats.tp_packets++;
624 	if (copy_skb) {
625 		status |= TP_STATUS_COPY;
626 		__skb_queue_tail(&sk->sk_receive_queue, copy_skb);
627 	}
628 	if (!po->stats.tp_drops)
629 		status &= ~TP_STATUS_LOSING;
630 	spin_unlock(&sk->sk_receive_queue.lock);
631 
632 	memcpy((u8*)h + macoff, skb->data, snaplen);
633 
634 	h->tp_len = skb->len;
635 	h->tp_snaplen = snaplen;
636 	h->tp_mac = macoff;
637 	h->tp_net = netoff;
638 	if (skb->tstamp.off_sec == 0) {
639 		__net_timestamp(skb);
640 		sock_enable_timestamp(sk);
641 	}
642 	h->tp_sec = skb_tv_base.tv_sec + skb->tstamp.off_sec;
643 	h->tp_usec = skb_tv_base.tv_usec + skb->tstamp.off_usec;
644 
645 	sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
646 	sll->sll_halen = 0;
647 	if (dev->hard_header_parse)
648 		sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
649 	sll->sll_family = AF_PACKET;
650 	sll->sll_hatype = dev->type;
651 	sll->sll_protocol = skb->protocol;
652 	sll->sll_pkttype = skb->pkt_type;
653 	sll->sll_ifindex = dev->ifindex;
654 
655 	h->tp_status = status;
656 	mb();
657 
658 	{
659 		struct page *p_start, *p_end;
660 		u8 *h_end = (u8 *)h + macoff + snaplen - 1;
661 
662 		p_start = virt_to_page(h);
663 		p_end = virt_to_page(h_end);
664 		while (p_start <= p_end) {
665 			flush_dcache_page(p_start);
666 			p_start++;
667 		}
668 	}
669 
670 	sk->sk_data_ready(sk, 0);
671 
672 drop_n_restore:
673 	if (skb_head != skb->data && skb_shared(skb)) {
674 		skb->data = skb_head;
675 		skb->len = skb_len;
676 	}
677 drop:
678         kfree_skb(skb);
679 	return 0;
680 
681 ring_is_full:
682 	po->stats.tp_drops++;
683 	spin_unlock(&sk->sk_receive_queue.lock);
684 
685 	sk->sk_data_ready(sk, 0);
686 	if (copy_skb)
687 		kfree_skb(copy_skb);
688 	goto drop_n_restore;
689 }
690 
691 #endif
692 
693 
694 static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
695 			  struct msghdr *msg, size_t len)
696 {
697 	struct sock *sk = sock->sk;
698 	struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
699 	struct sk_buff *skb;
700 	struct net_device *dev;
701 	unsigned short proto;
702 	unsigned char *addr;
703 	int ifindex, err, reserve = 0;
704 
705 	/*
706 	 *	Get and verify the address.
707 	 */
708 
709 	if (saddr == NULL) {
710 		struct packet_sock *po = pkt_sk(sk);
711 
712 		ifindex	= po->ifindex;
713 		proto	= po->num;
714 		addr	= NULL;
715 	} else {
716 		err = -EINVAL;
717 		if (msg->msg_namelen < sizeof(struct sockaddr_ll))
718 			goto out;
719 		ifindex	= saddr->sll_ifindex;
720 		proto	= saddr->sll_protocol;
721 		addr	= saddr->sll_addr;
722 	}
723 
724 
725 	dev = dev_get_by_index(ifindex);
726 	err = -ENXIO;
727 	if (dev == NULL)
728 		goto out_unlock;
729 	if (sock->type == SOCK_RAW)
730 		reserve = dev->hard_header_len;
731 
732 	err = -EMSGSIZE;
733 	if (len > dev->mtu+reserve)
734 		goto out_unlock;
735 
736 	skb = sock_alloc_send_skb(sk, len + LL_RESERVED_SPACE(dev),
737 				msg->msg_flags & MSG_DONTWAIT, &err);
738 	if (skb==NULL)
739 		goto out_unlock;
740 
741 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
742 	skb->nh.raw = skb->data;
743 
744 	if (dev->hard_header) {
745 		int res;
746 		err = -EINVAL;
747 		res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
748 		if (sock->type != SOCK_DGRAM) {
749 			skb->tail = skb->data;
750 			skb->len = 0;
751 		} else if (res < 0)
752 			goto out_free;
753 	}
754 
755 	/* Returns -EFAULT on error */
756 	err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
757 	if (err)
758 		goto out_free;
759 
760 	skb->protocol = proto;
761 	skb->dev = dev;
762 	skb->priority = sk->sk_priority;
763 
764 	err = -ENETDOWN;
765 	if (!(dev->flags & IFF_UP))
766 		goto out_free;
767 
768 	/*
769 	 *	Now send it
770 	 */
771 
772 	err = dev_queue_xmit(skb);
773 	if (err > 0 && (err = net_xmit_errno(err)) != 0)
774 		goto out_unlock;
775 
776 	dev_put(dev);
777 
778 	return(len);
779 
780 out_free:
781 	kfree_skb(skb);
782 out_unlock:
783 	if (dev)
784 		dev_put(dev);
785 out:
786 	return err;
787 }
788 
789 /*
790  *	Close a PACKET socket. This is fairly simple. We immediately go
791  *	to 'closed' state and remove our protocol entry in the device list.
792  */
793 
794 static int packet_release(struct socket *sock)
795 {
796 	struct sock *sk = sock->sk;
797 	struct packet_sock *po;
798 
799 	if (!sk)
800 		return 0;
801 
802 	po = pkt_sk(sk);
803 
804 	write_lock_bh(&packet_sklist_lock);
805 	sk_del_node_init(sk);
806 	write_unlock_bh(&packet_sklist_lock);
807 
808 	/*
809 	 *	Unhook packet receive handler.
810 	 */
811 
812 	if (po->running) {
813 		/*
814 		 *	Remove the protocol hook
815 		 */
816 		dev_remove_pack(&po->prot_hook);
817 		po->running = 0;
818 		po->num = 0;
819 		__sock_put(sk);
820 	}
821 
822 #ifdef CONFIG_PACKET_MULTICAST
823 	packet_flush_mclist(sk);
824 #endif
825 
826 #ifdef CONFIG_PACKET_MMAP
827 	if (po->pg_vec) {
828 		struct tpacket_req req;
829 		memset(&req, 0, sizeof(req));
830 		packet_set_ring(sk, &req, 1);
831 	}
832 #endif
833 
834 	/*
835 	 *	Now the socket is dead. No more input will appear.
836 	 */
837 
838 	sock_orphan(sk);
839 	sock->sk = NULL;
840 
841 	/* Purge queues */
842 
843 	skb_queue_purge(&sk->sk_receive_queue);
844 
845 	sock_put(sk);
846 	return 0;
847 }
848 
849 /*
850  *	Attach a packet hook.
851  */
852 
853 static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol)
854 {
855 	struct packet_sock *po = pkt_sk(sk);
856 	/*
857 	 *	Detach an existing hook if present.
858 	 */
859 
860 	lock_sock(sk);
861 
862 	spin_lock(&po->bind_lock);
863 	if (po->running) {
864 		__sock_put(sk);
865 		po->running = 0;
866 		po->num = 0;
867 		spin_unlock(&po->bind_lock);
868 		dev_remove_pack(&po->prot_hook);
869 		spin_lock(&po->bind_lock);
870 	}
871 
872 	po->num = protocol;
873 	po->prot_hook.type = protocol;
874 	po->prot_hook.dev = dev;
875 
876 	po->ifindex = dev ? dev->ifindex : 0;
877 
878 	if (protocol == 0)
879 		goto out_unlock;
880 
881 	if (dev) {
882 		if (dev->flags&IFF_UP) {
883 			dev_add_pack(&po->prot_hook);
884 			sock_hold(sk);
885 			po->running = 1;
886 		} else {
887 			sk->sk_err = ENETDOWN;
888 			if (!sock_flag(sk, SOCK_DEAD))
889 				sk->sk_error_report(sk);
890 		}
891 	} else {
892 		dev_add_pack(&po->prot_hook);
893 		sock_hold(sk);
894 		po->running = 1;
895 	}
896 
897 out_unlock:
898 	spin_unlock(&po->bind_lock);
899 	release_sock(sk);
900 	return 0;
901 }
902 
903 /*
904  *	Bind a packet socket to a device
905  */
906 
907 #ifdef CONFIG_SOCK_PACKET
908 
909 static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
910 {
911 	struct sock *sk=sock->sk;
912 	char name[15];
913 	struct net_device *dev;
914 	int err = -ENODEV;
915 
916 	/*
917 	 *	Check legality
918 	 */
919 
920 	if(addr_len!=sizeof(struct sockaddr))
921 		return -EINVAL;
922 	strlcpy(name,uaddr->sa_data,sizeof(name));
923 
924 	dev = dev_get_by_name(name);
925 	if (dev) {
926 		err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
927 		dev_put(dev);
928 	}
929 	return err;
930 }
931 #endif
932 
933 static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
934 {
935 	struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
936 	struct sock *sk=sock->sk;
937 	struct net_device *dev = NULL;
938 	int err;
939 
940 
941 	/*
942 	 *	Check legality
943 	 */
944 
945 	if (addr_len < sizeof(struct sockaddr_ll))
946 		return -EINVAL;
947 	if (sll->sll_family != AF_PACKET)
948 		return -EINVAL;
949 
950 	if (sll->sll_ifindex) {
951 		err = -ENODEV;
952 		dev = dev_get_by_index(sll->sll_ifindex);
953 		if (dev == NULL)
954 			goto out;
955 	}
956 	err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
957 	if (dev)
958 		dev_put(dev);
959 
960 out:
961 	return err;
962 }
963 
964 static struct proto packet_proto = {
965 	.name	  = "PACKET",
966 	.owner	  = THIS_MODULE,
967 	.obj_size = sizeof(struct packet_sock),
968 };
969 
970 /*
971  *	Create a packet of type SOCK_PACKET.
972  */
973 
974 static int packet_create(struct socket *sock, int protocol)
975 {
976 	struct sock *sk;
977 	struct packet_sock *po;
978 	int err;
979 
980 	if (!capable(CAP_NET_RAW))
981 		return -EPERM;
982 	if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW
983 #ifdef CONFIG_SOCK_PACKET
984 	    && sock->type != SOCK_PACKET
985 #endif
986 	    )
987 		return -ESOCKTNOSUPPORT;
988 
989 	sock->state = SS_UNCONNECTED;
990 
991 	err = -ENOBUFS;
992 	sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1);
993 	if (sk == NULL)
994 		goto out;
995 
996 	sock->ops = &packet_ops;
997 #ifdef CONFIG_SOCK_PACKET
998 	if (sock->type == SOCK_PACKET)
999 		sock->ops = &packet_ops_spkt;
1000 #endif
1001 	sock_init_data(sock, sk);
1002 
1003 	po = pkt_sk(sk);
1004 	sk->sk_family = PF_PACKET;
1005 	po->num = protocol;
1006 
1007 	sk->sk_destruct = packet_sock_destruct;
1008 	atomic_inc(&packet_socks_nr);
1009 
1010 	/*
1011 	 *	Attach a protocol block
1012 	 */
1013 
1014 	spin_lock_init(&po->bind_lock);
1015 	po->prot_hook.func = packet_rcv;
1016 #ifdef CONFIG_SOCK_PACKET
1017 	if (sock->type == SOCK_PACKET)
1018 		po->prot_hook.func = packet_rcv_spkt;
1019 #endif
1020 	po->prot_hook.af_packet_priv = sk;
1021 
1022 	if (protocol) {
1023 		po->prot_hook.type = protocol;
1024 		dev_add_pack(&po->prot_hook);
1025 		sock_hold(sk);
1026 		po->running = 1;
1027 	}
1028 
1029 	write_lock_bh(&packet_sklist_lock);
1030 	sk_add_node(sk, &packet_sklist);
1031 	write_unlock_bh(&packet_sklist_lock);
1032 	return(0);
1033 out:
1034 	return err;
1035 }
1036 
1037 /*
1038  *	Pull a packet from our receive queue and hand it to the user.
1039  *	If necessary we block.
1040  */
1041 
1042 static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1043 			  struct msghdr *msg, size_t len, int flags)
1044 {
1045 	struct sock *sk = sock->sk;
1046 	struct sk_buff *skb;
1047 	int copied, err;
1048 
1049 	err = -EINVAL;
1050 	if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1051 		goto out;
1052 
1053 #if 0
1054 	/* What error should we return now? EUNATTACH? */
1055 	if (pkt_sk(sk)->ifindex < 0)
1056 		return -ENODEV;
1057 #endif
1058 
1059 	/*
1060 	 *	If the address length field is there to be filled in, we fill
1061 	 *	it in now.
1062 	 */
1063 
1064 	if (sock->type == SOCK_PACKET)
1065 		msg->msg_namelen = sizeof(struct sockaddr_pkt);
1066 	else
1067 		msg->msg_namelen = sizeof(struct sockaddr_ll);
1068 
1069 	/*
1070 	 *	Call the generic datagram receiver. This handles all sorts
1071 	 *	of horrible races and re-entrancy so we can forget about it
1072 	 *	in the protocol layers.
1073 	 *
1074 	 *	Now it will return ENETDOWN, if device have just gone down,
1075 	 *	but then it will block.
1076 	 */
1077 
1078 	skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
1079 
1080 	/*
1081 	 *	An error occurred so return it. Because skb_recv_datagram()
1082 	 *	handles the blocking we don't see and worry about blocking
1083 	 *	retries.
1084 	 */
1085 
1086 	if(skb==NULL)
1087 		goto out;
1088 
1089 	/*
1090 	 *	You lose any data beyond the buffer you gave. If it worries a
1091 	 *	user program they can ask the device for its MTU anyway.
1092 	 */
1093 
1094 	copied = skb->len;
1095 	if (copied > len)
1096 	{
1097 		copied=len;
1098 		msg->msg_flags|=MSG_TRUNC;
1099 	}
1100 
1101 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1102 	if (err)
1103 		goto out_free;
1104 
1105 	sock_recv_timestamp(msg, sk, skb);
1106 
1107 	if (msg->msg_name)
1108 		memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
1109 
1110 	/*
1111 	 *	Free or return the buffer as appropriate. Again this
1112 	 *	hides all the races and re-entrancy issues from us.
1113 	 */
1114 	err = (flags&MSG_TRUNC) ? skb->len : copied;
1115 
1116 out_free:
1117 	skb_free_datagram(sk, skb);
1118 out:
1119 	return err;
1120 }
1121 
1122 #ifdef CONFIG_SOCK_PACKET
1123 static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1124 			       int *uaddr_len, int peer)
1125 {
1126 	struct net_device *dev;
1127 	struct sock *sk	= sock->sk;
1128 
1129 	if (peer)
1130 		return -EOPNOTSUPP;
1131 
1132 	uaddr->sa_family = AF_PACKET;
1133 	dev = dev_get_by_index(pkt_sk(sk)->ifindex);
1134 	if (dev) {
1135 		strlcpy(uaddr->sa_data, dev->name, 15);
1136 		dev_put(dev);
1137 	} else
1138 		memset(uaddr->sa_data, 0, 14);
1139 	*uaddr_len = sizeof(*uaddr);
1140 
1141 	return 0;
1142 }
1143 #endif
1144 
1145 static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1146 			  int *uaddr_len, int peer)
1147 {
1148 	struct net_device *dev;
1149 	struct sock *sk = sock->sk;
1150 	struct packet_sock *po = pkt_sk(sk);
1151 	struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
1152 
1153 	if (peer)
1154 		return -EOPNOTSUPP;
1155 
1156 	sll->sll_family = AF_PACKET;
1157 	sll->sll_ifindex = po->ifindex;
1158 	sll->sll_protocol = po->num;
1159 	dev = dev_get_by_index(po->ifindex);
1160 	if (dev) {
1161 		sll->sll_hatype = dev->type;
1162 		sll->sll_halen = dev->addr_len;
1163 		memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1164 		dev_put(dev);
1165 	} else {
1166 		sll->sll_hatype = 0;	/* Bad: we have no ARPHRD_UNSPEC */
1167 		sll->sll_halen = 0;
1168 	}
1169 	*uaddr_len = sizeof(*sll);
1170 
1171 	return 0;
1172 }
1173 
1174 #ifdef CONFIG_PACKET_MULTICAST
1175 static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what)
1176 {
1177 	switch (i->type) {
1178 	case PACKET_MR_MULTICAST:
1179 		if (what > 0)
1180 			dev_mc_add(dev, i->addr, i->alen, 0);
1181 		else
1182 			dev_mc_delete(dev, i->addr, i->alen, 0);
1183 		break;
1184 	case PACKET_MR_PROMISC:
1185 		dev_set_promiscuity(dev, what);
1186 		break;
1187 	case PACKET_MR_ALLMULTI:
1188 		dev_set_allmulti(dev, what);
1189 		break;
1190 	default:;
1191 	}
1192 }
1193 
1194 static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1195 {
1196 	for ( ; i; i=i->next) {
1197 		if (i->ifindex == dev->ifindex)
1198 			packet_dev_mc(dev, i, what);
1199 	}
1200 }
1201 
1202 static int packet_mc_add(struct sock *sk, struct packet_mreq *mreq)
1203 {
1204 	struct packet_sock *po = pkt_sk(sk);
1205 	struct packet_mclist *ml, *i;
1206 	struct net_device *dev;
1207 	int err;
1208 
1209 	rtnl_lock();
1210 
1211 	err = -ENODEV;
1212 	dev = __dev_get_by_index(mreq->mr_ifindex);
1213 	if (!dev)
1214 		goto done;
1215 
1216 	err = -EINVAL;
1217 	if (mreq->mr_alen > dev->addr_len)
1218 		goto done;
1219 
1220 	err = -ENOBUFS;
1221 	i = (struct packet_mclist *)kmalloc(sizeof(*i), GFP_KERNEL);
1222 	if (i == NULL)
1223 		goto done;
1224 
1225 	err = 0;
1226 	for (ml = po->mclist; ml; ml = ml->next) {
1227 		if (ml->ifindex == mreq->mr_ifindex &&
1228 		    ml->type == mreq->mr_type &&
1229 		    ml->alen == mreq->mr_alen &&
1230 		    memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1231 			ml->count++;
1232 			/* Free the new element ... */
1233 			kfree(i);
1234 			goto done;
1235 		}
1236 	}
1237 
1238 	i->type = mreq->mr_type;
1239 	i->ifindex = mreq->mr_ifindex;
1240 	i->alen = mreq->mr_alen;
1241 	memcpy(i->addr, mreq->mr_address, i->alen);
1242 	i->count = 1;
1243 	i->next = po->mclist;
1244 	po->mclist = i;
1245 	packet_dev_mc(dev, i, +1);
1246 
1247 done:
1248 	rtnl_unlock();
1249 	return err;
1250 }
1251 
1252 static int packet_mc_drop(struct sock *sk, struct packet_mreq *mreq)
1253 {
1254 	struct packet_mclist *ml, **mlp;
1255 
1256 	rtnl_lock();
1257 
1258 	for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1259 		if (ml->ifindex == mreq->mr_ifindex &&
1260 		    ml->type == mreq->mr_type &&
1261 		    ml->alen == mreq->mr_alen &&
1262 		    memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1263 			if (--ml->count == 0) {
1264 				struct net_device *dev;
1265 				*mlp = ml->next;
1266 				dev = dev_get_by_index(ml->ifindex);
1267 				if (dev) {
1268 					packet_dev_mc(dev, ml, -1);
1269 					dev_put(dev);
1270 				}
1271 				kfree(ml);
1272 			}
1273 			rtnl_unlock();
1274 			return 0;
1275 		}
1276 	}
1277 	rtnl_unlock();
1278 	return -EADDRNOTAVAIL;
1279 }
1280 
1281 static void packet_flush_mclist(struct sock *sk)
1282 {
1283 	struct packet_sock *po = pkt_sk(sk);
1284 	struct packet_mclist *ml;
1285 
1286 	if (!po->mclist)
1287 		return;
1288 
1289 	rtnl_lock();
1290 	while ((ml = po->mclist) != NULL) {
1291 		struct net_device *dev;
1292 
1293 		po->mclist = ml->next;
1294 		if ((dev = dev_get_by_index(ml->ifindex)) != NULL) {
1295 			packet_dev_mc(dev, ml, -1);
1296 			dev_put(dev);
1297 		}
1298 		kfree(ml);
1299 	}
1300 	rtnl_unlock();
1301 }
1302 #endif
1303 
1304 static int
1305 packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
1306 {
1307 	struct sock *sk = sock->sk;
1308 	int ret;
1309 
1310 	if (level != SOL_PACKET)
1311 		return -ENOPROTOOPT;
1312 
1313 	switch(optname)	{
1314 #ifdef CONFIG_PACKET_MULTICAST
1315 	case PACKET_ADD_MEMBERSHIP:
1316 	case PACKET_DROP_MEMBERSHIP:
1317 	{
1318 		struct packet_mreq mreq;
1319 		if (optlen<sizeof(mreq))
1320 			return -EINVAL;
1321 		if (copy_from_user(&mreq,optval,sizeof(mreq)))
1322 			return -EFAULT;
1323 		if (optname == PACKET_ADD_MEMBERSHIP)
1324 			ret = packet_mc_add(sk, &mreq);
1325 		else
1326 			ret = packet_mc_drop(sk, &mreq);
1327 		return ret;
1328 	}
1329 #endif
1330 #ifdef CONFIG_PACKET_MMAP
1331 	case PACKET_RX_RING:
1332 	{
1333 		struct tpacket_req req;
1334 
1335 		if (optlen<sizeof(req))
1336 			return -EINVAL;
1337 		if (copy_from_user(&req,optval,sizeof(req)))
1338 			return -EFAULT;
1339 		return packet_set_ring(sk, &req, 0);
1340 	}
1341 	case PACKET_COPY_THRESH:
1342 	{
1343 		int val;
1344 
1345 		if (optlen!=sizeof(val))
1346 			return -EINVAL;
1347 		if (copy_from_user(&val,optval,sizeof(val)))
1348 			return -EFAULT;
1349 
1350 		pkt_sk(sk)->copy_thresh = val;
1351 		return 0;
1352 	}
1353 #endif
1354 	default:
1355 		return -ENOPROTOOPT;
1356 	}
1357 }
1358 
1359 static int packet_getsockopt(struct socket *sock, int level, int optname,
1360 			     char __user *optval, int __user *optlen)
1361 {
1362 	int len;
1363 	struct sock *sk = sock->sk;
1364 	struct packet_sock *po = pkt_sk(sk);
1365 
1366 	if (level != SOL_PACKET)
1367 		return -ENOPROTOOPT;
1368 
1369   	if (get_user(len,optlen))
1370   		return -EFAULT;
1371 
1372 	if (len < 0)
1373 		return -EINVAL;
1374 
1375 	switch(optname)	{
1376 	case PACKET_STATISTICS:
1377 	{
1378 		struct tpacket_stats st;
1379 
1380 		if (len > sizeof(struct tpacket_stats))
1381 			len = sizeof(struct tpacket_stats);
1382 		spin_lock_bh(&sk->sk_receive_queue.lock);
1383 		st = po->stats;
1384 		memset(&po->stats, 0, sizeof(st));
1385 		spin_unlock_bh(&sk->sk_receive_queue.lock);
1386 		st.tp_packets += st.tp_drops;
1387 
1388 		if (copy_to_user(optval, &st, len))
1389 			return -EFAULT;
1390 		break;
1391 	}
1392 	default:
1393 		return -ENOPROTOOPT;
1394 	}
1395 
1396   	if (put_user(len, optlen))
1397   		return -EFAULT;
1398   	return 0;
1399 }
1400 
1401 
1402 static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1403 {
1404 	struct sock *sk;
1405 	struct hlist_node *node;
1406 	struct net_device *dev = (struct net_device*)data;
1407 
1408 	read_lock(&packet_sklist_lock);
1409 	sk_for_each(sk, node, &packet_sklist) {
1410 		struct packet_sock *po = pkt_sk(sk);
1411 
1412 		switch (msg) {
1413 		case NETDEV_UNREGISTER:
1414 #ifdef CONFIG_PACKET_MULTICAST
1415 			if (po->mclist)
1416 				packet_dev_mclist(dev, po->mclist, -1);
1417 			// fallthrough
1418 #endif
1419 		case NETDEV_DOWN:
1420 			if (dev->ifindex == po->ifindex) {
1421 				spin_lock(&po->bind_lock);
1422 				if (po->running) {
1423 					__dev_remove_pack(&po->prot_hook);
1424 					__sock_put(sk);
1425 					po->running = 0;
1426 					sk->sk_err = ENETDOWN;
1427 					if (!sock_flag(sk, SOCK_DEAD))
1428 						sk->sk_error_report(sk);
1429 				}
1430 				if (msg == NETDEV_UNREGISTER) {
1431 					po->ifindex = -1;
1432 					po->prot_hook.dev = NULL;
1433 				}
1434 				spin_unlock(&po->bind_lock);
1435 			}
1436 			break;
1437 		case NETDEV_UP:
1438 			spin_lock(&po->bind_lock);
1439 			if (dev->ifindex == po->ifindex && po->num &&
1440 			    !po->running) {
1441 				dev_add_pack(&po->prot_hook);
1442 				sock_hold(sk);
1443 				po->running = 1;
1444 			}
1445 			spin_unlock(&po->bind_lock);
1446 			break;
1447 		}
1448 	}
1449 	read_unlock(&packet_sklist_lock);
1450 	return NOTIFY_DONE;
1451 }
1452 
1453 
1454 static int packet_ioctl(struct socket *sock, unsigned int cmd,
1455 			unsigned long arg)
1456 {
1457 	struct sock *sk = sock->sk;
1458 
1459 	switch(cmd) {
1460 		case SIOCOUTQ:
1461 		{
1462 			int amount = atomic_read(&sk->sk_wmem_alloc);
1463 			return put_user(amount, (int __user *)arg);
1464 		}
1465 		case SIOCINQ:
1466 		{
1467 			struct sk_buff *skb;
1468 			int amount = 0;
1469 
1470 			spin_lock_bh(&sk->sk_receive_queue.lock);
1471 			skb = skb_peek(&sk->sk_receive_queue);
1472 			if (skb)
1473 				amount = skb->len;
1474 			spin_unlock_bh(&sk->sk_receive_queue.lock);
1475 			return put_user(amount, (int __user *)arg);
1476 		}
1477 		case SIOCGSTAMP:
1478 			return sock_get_timestamp(sk, (struct timeval __user *)arg);
1479 
1480 #ifdef CONFIG_INET
1481 		case SIOCADDRT:
1482 		case SIOCDELRT:
1483 		case SIOCDARP:
1484 		case SIOCGARP:
1485 		case SIOCSARP:
1486 		case SIOCGIFADDR:
1487 		case SIOCSIFADDR:
1488 		case SIOCGIFBRDADDR:
1489 		case SIOCSIFBRDADDR:
1490 		case SIOCGIFNETMASK:
1491 		case SIOCSIFNETMASK:
1492 		case SIOCGIFDSTADDR:
1493 		case SIOCSIFDSTADDR:
1494 		case SIOCSIFFLAGS:
1495 			return inet_dgram_ops.ioctl(sock, cmd, arg);
1496 #endif
1497 
1498 		default:
1499 			return dev_ioctl(cmd, (void __user *)arg);
1500 	}
1501 	return 0;
1502 }
1503 
1504 #ifndef CONFIG_PACKET_MMAP
1505 #define packet_mmap sock_no_mmap
1506 #define packet_poll datagram_poll
1507 #else
1508 
1509 static unsigned int packet_poll(struct file * file, struct socket *sock,
1510 				poll_table *wait)
1511 {
1512 	struct sock *sk = sock->sk;
1513 	struct packet_sock *po = pkt_sk(sk);
1514 	unsigned int mask = datagram_poll(file, sock, wait);
1515 
1516 	spin_lock_bh(&sk->sk_receive_queue.lock);
1517 	if (po->pg_vec) {
1518 		unsigned last = po->head ? po->head-1 : po->frame_max;
1519 		struct tpacket_hdr *h;
1520 
1521 		h = (struct tpacket_hdr *)packet_lookup_frame(po, last);
1522 
1523 		if (h->tp_status)
1524 			mask |= POLLIN | POLLRDNORM;
1525 	}
1526 	spin_unlock_bh(&sk->sk_receive_queue.lock);
1527 	return mask;
1528 }
1529 
1530 
1531 /* Dirty? Well, I still did not learn better way to account
1532  * for user mmaps.
1533  */
1534 
1535 static void packet_mm_open(struct vm_area_struct *vma)
1536 {
1537 	struct file *file = vma->vm_file;
1538 	struct inode *inode = file->f_dentry->d_inode;
1539 	struct socket * sock = SOCKET_I(inode);
1540 	struct sock *sk = sock->sk;
1541 
1542 	if (sk)
1543 		atomic_inc(&pkt_sk(sk)->mapped);
1544 }
1545 
1546 static void packet_mm_close(struct vm_area_struct *vma)
1547 {
1548 	struct file *file = vma->vm_file;
1549 	struct inode *inode = file->f_dentry->d_inode;
1550 	struct socket * sock = SOCKET_I(inode);
1551 	struct sock *sk = sock->sk;
1552 
1553 	if (sk)
1554 		atomic_dec(&pkt_sk(sk)->mapped);
1555 }
1556 
1557 static struct vm_operations_struct packet_mmap_ops = {
1558 	.open =	packet_mm_open,
1559 	.close =packet_mm_close,
1560 };
1561 
1562 static inline struct page *pg_vec_endpage(char *one_pg_vec, unsigned int order)
1563 {
1564 	return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1);
1565 }
1566 
1567 static void free_pg_vec(char **pg_vec, unsigned order, unsigned len)
1568 {
1569 	int i;
1570 
1571 	for (i=0; i<len; i++) {
1572 		if (pg_vec[i]) {
1573 			struct page *page, *pend;
1574 
1575 			pend = pg_vec_endpage(pg_vec[i], order);
1576 			for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
1577 				ClearPageReserved(page);
1578 			free_pages((unsigned long)pg_vec[i], order);
1579 		}
1580 	}
1581 	kfree(pg_vec);
1582 }
1583 
1584 
1585 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
1586 {
1587 	char **pg_vec = NULL;
1588 	struct packet_sock *po = pkt_sk(sk);
1589 	int was_running, num, order = 0;
1590 	int err = 0;
1591 
1592 	if (req->tp_block_nr) {
1593 		int i, l;
1594 
1595 		/* Sanity tests and some calculations */
1596 
1597 		if (po->pg_vec)
1598 			return -EBUSY;
1599 
1600 		if ((int)req->tp_block_size <= 0)
1601 			return -EINVAL;
1602 		if (req->tp_block_size&(PAGE_SIZE-1))
1603 			return -EINVAL;
1604 		if (req->tp_frame_size < TPACKET_HDRLEN)
1605 			return -EINVAL;
1606 		if (req->tp_frame_size&(TPACKET_ALIGNMENT-1))
1607 			return -EINVAL;
1608 
1609 		po->frames_per_block = req->tp_block_size/req->tp_frame_size;
1610 		if (po->frames_per_block <= 0)
1611 			return -EINVAL;
1612 		if (po->frames_per_block*req->tp_block_nr != req->tp_frame_nr)
1613 			return -EINVAL;
1614 		/* OK! */
1615 
1616 		/* Allocate page vector */
1617 		while ((PAGE_SIZE<<order) < req->tp_block_size)
1618 			order++;
1619 
1620 		err = -ENOMEM;
1621 
1622 		pg_vec = kmalloc(req->tp_block_nr*sizeof(char *), GFP_KERNEL);
1623 		if (pg_vec == NULL)
1624 			goto out;
1625 		memset(pg_vec, 0, req->tp_block_nr*sizeof(char **));
1626 
1627 		for (i=0; i<req->tp_block_nr; i++) {
1628 			struct page *page, *pend;
1629 			pg_vec[i] = (char *)__get_free_pages(GFP_KERNEL, order);
1630 			if (!pg_vec[i])
1631 				goto out_free_pgvec;
1632 
1633 			pend = pg_vec_endpage(pg_vec[i], order);
1634 			for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
1635 				SetPageReserved(page);
1636 		}
1637 		/* Page vector is allocated */
1638 
1639 		l = 0;
1640 		for (i=0; i<req->tp_block_nr; i++) {
1641 			char *ptr = pg_vec[i];
1642 			struct tpacket_hdr *header;
1643 			int k;
1644 
1645 			for (k=0; k<po->frames_per_block; k++) {
1646 
1647 				header = (struct tpacket_hdr*)ptr;
1648 				header->tp_status = TP_STATUS_KERNEL;
1649 				ptr += req->tp_frame_size;
1650 			}
1651 		}
1652 		/* Done */
1653 	} else {
1654 		if (req->tp_frame_nr)
1655 			return -EINVAL;
1656 	}
1657 
1658 	lock_sock(sk);
1659 
1660 	/* Detach socket from network */
1661 	spin_lock(&po->bind_lock);
1662 	was_running = po->running;
1663 	num = po->num;
1664 	if (was_running) {
1665 		__dev_remove_pack(&po->prot_hook);
1666 		po->num = 0;
1667 		po->running = 0;
1668 		__sock_put(sk);
1669 	}
1670 	spin_unlock(&po->bind_lock);
1671 
1672 	synchronize_net();
1673 
1674 	err = -EBUSY;
1675 	if (closing || atomic_read(&po->mapped) == 0) {
1676 		err = 0;
1677 #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
1678 
1679 		spin_lock_bh(&sk->sk_receive_queue.lock);
1680 		pg_vec = XC(po->pg_vec, pg_vec);
1681 		po->frame_max = req->tp_frame_nr-1;
1682 		po->head = 0;
1683 		po->frame_size = req->tp_frame_size;
1684 		spin_unlock_bh(&sk->sk_receive_queue.lock);
1685 
1686 		order = XC(po->pg_vec_order, order);
1687 		req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
1688 
1689 		po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
1690 		po->prot_hook.func = po->pg_vec ? tpacket_rcv : packet_rcv;
1691 		skb_queue_purge(&sk->sk_receive_queue);
1692 #undef XC
1693 		if (atomic_read(&po->mapped))
1694 			printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
1695 	}
1696 
1697 	spin_lock(&po->bind_lock);
1698 	if (was_running && !po->running) {
1699 		sock_hold(sk);
1700 		po->running = 1;
1701 		po->num = num;
1702 		dev_add_pack(&po->prot_hook);
1703 	}
1704 	spin_unlock(&po->bind_lock);
1705 
1706 	release_sock(sk);
1707 
1708 out_free_pgvec:
1709 	if (pg_vec)
1710 		free_pg_vec(pg_vec, order, req->tp_block_nr);
1711 out:
1712 	return err;
1713 }
1714 
1715 static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1716 {
1717 	struct sock *sk = sock->sk;
1718 	struct packet_sock *po = pkt_sk(sk);
1719 	unsigned long size;
1720 	unsigned long start;
1721 	int err = -EINVAL;
1722 	int i;
1723 
1724 	if (vma->vm_pgoff)
1725 		return -EINVAL;
1726 
1727 	size = vma->vm_end - vma->vm_start;
1728 
1729 	lock_sock(sk);
1730 	if (po->pg_vec == NULL)
1731 		goto out;
1732 	if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
1733 		goto out;
1734 
1735 	atomic_inc(&po->mapped);
1736 	start = vma->vm_start;
1737 	err = -EAGAIN;
1738 	for (i=0; i<po->pg_vec_len; i++) {
1739 		if (remap_pfn_range(vma, start,
1740 				     __pa(po->pg_vec[i]) >> PAGE_SHIFT,
1741 				     po->pg_vec_pages*PAGE_SIZE,
1742 				     vma->vm_page_prot))
1743 			goto out;
1744 		start += po->pg_vec_pages*PAGE_SIZE;
1745 	}
1746 	vma->vm_ops = &packet_mmap_ops;
1747 	err = 0;
1748 
1749 out:
1750 	release_sock(sk);
1751 	return err;
1752 }
1753 #endif
1754 
1755 
1756 #ifdef CONFIG_SOCK_PACKET
1757 static struct proto_ops packet_ops_spkt = {
1758 	.family =	PF_PACKET,
1759 	.owner =	THIS_MODULE,
1760 	.release =	packet_release,
1761 	.bind =		packet_bind_spkt,
1762 	.connect =	sock_no_connect,
1763 	.socketpair =	sock_no_socketpair,
1764 	.accept =	sock_no_accept,
1765 	.getname =	packet_getname_spkt,
1766 	.poll =		datagram_poll,
1767 	.ioctl =	packet_ioctl,
1768 	.listen =	sock_no_listen,
1769 	.shutdown =	sock_no_shutdown,
1770 	.setsockopt =	sock_no_setsockopt,
1771 	.getsockopt =	sock_no_getsockopt,
1772 	.sendmsg =	packet_sendmsg_spkt,
1773 	.recvmsg =	packet_recvmsg,
1774 	.mmap =		sock_no_mmap,
1775 	.sendpage =	sock_no_sendpage,
1776 };
1777 #endif
1778 
1779 static struct proto_ops packet_ops = {
1780 	.family =	PF_PACKET,
1781 	.owner =	THIS_MODULE,
1782 	.release =	packet_release,
1783 	.bind =		packet_bind,
1784 	.connect =	sock_no_connect,
1785 	.socketpair =	sock_no_socketpair,
1786 	.accept =	sock_no_accept,
1787 	.getname =	packet_getname,
1788 	.poll =		packet_poll,
1789 	.ioctl =	packet_ioctl,
1790 	.listen =	sock_no_listen,
1791 	.shutdown =	sock_no_shutdown,
1792 	.setsockopt =	packet_setsockopt,
1793 	.getsockopt =	packet_getsockopt,
1794 	.sendmsg =	packet_sendmsg,
1795 	.recvmsg =	packet_recvmsg,
1796 	.mmap =		packet_mmap,
1797 	.sendpage =	sock_no_sendpage,
1798 };
1799 
1800 static struct net_proto_family packet_family_ops = {
1801 	.family =	PF_PACKET,
1802 	.create =	packet_create,
1803 	.owner	=	THIS_MODULE,
1804 };
1805 
1806 static struct notifier_block packet_netdev_notifier = {
1807 	.notifier_call =packet_notifier,
1808 };
1809 
1810 #ifdef CONFIG_PROC_FS
1811 static inline struct sock *packet_seq_idx(loff_t off)
1812 {
1813 	struct sock *s;
1814 	struct hlist_node *node;
1815 
1816 	sk_for_each(s, node, &packet_sklist) {
1817 		if (!off--)
1818 			return s;
1819 	}
1820 	return NULL;
1821 }
1822 
1823 static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
1824 {
1825 	read_lock(&packet_sklist_lock);
1826 	return *pos ? packet_seq_idx(*pos - 1) : SEQ_START_TOKEN;
1827 }
1828 
1829 static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1830 {
1831 	++*pos;
1832 	return  (v == SEQ_START_TOKEN)
1833 		? sk_head(&packet_sklist)
1834 		: sk_next((struct sock*)v) ;
1835 }
1836 
1837 static void packet_seq_stop(struct seq_file *seq, void *v)
1838 {
1839 	read_unlock(&packet_sklist_lock);
1840 }
1841 
1842 static int packet_seq_show(struct seq_file *seq, void *v)
1843 {
1844 	if (v == SEQ_START_TOKEN)
1845 		seq_puts(seq, "sk       RefCnt Type Proto  Iface R Rmem   User   Inode\n");
1846 	else {
1847 		struct sock *s = v;
1848 		const struct packet_sock *po = pkt_sk(s);
1849 
1850 		seq_printf(seq,
1851 			   "%p %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu\n",
1852 			   s,
1853 			   atomic_read(&s->sk_refcnt),
1854 			   s->sk_type,
1855 			   ntohs(po->num),
1856 			   po->ifindex,
1857 			   po->running,
1858 			   atomic_read(&s->sk_rmem_alloc),
1859 			   sock_i_uid(s),
1860 			   sock_i_ino(s) );
1861 	}
1862 
1863 	return 0;
1864 }
1865 
1866 static struct seq_operations packet_seq_ops = {
1867 	.start	= packet_seq_start,
1868 	.next	= packet_seq_next,
1869 	.stop	= packet_seq_stop,
1870 	.show	= packet_seq_show,
1871 };
1872 
1873 static int packet_seq_open(struct inode *inode, struct file *file)
1874 {
1875 	return seq_open(file, &packet_seq_ops);
1876 }
1877 
1878 static struct file_operations packet_seq_fops = {
1879 	.owner		= THIS_MODULE,
1880 	.open		= packet_seq_open,
1881 	.read		= seq_read,
1882 	.llseek		= seq_lseek,
1883 	.release	= seq_release,
1884 };
1885 
1886 #endif
1887 
1888 static void __exit packet_exit(void)
1889 {
1890 	proc_net_remove("packet");
1891 	unregister_netdevice_notifier(&packet_netdev_notifier);
1892 	sock_unregister(PF_PACKET);
1893 	proto_unregister(&packet_proto);
1894 }
1895 
1896 static int __init packet_init(void)
1897 {
1898 	int rc = proto_register(&packet_proto, 0);
1899 
1900 	if (rc != 0)
1901 		goto out;
1902 
1903 	sock_register(&packet_family_ops);
1904 	register_netdevice_notifier(&packet_netdev_notifier);
1905 	proc_net_fops_create("packet", 0, &packet_seq_fops);
1906 out:
1907 	return rc;
1908 }
1909 
1910 module_init(packet_init);
1911 module_exit(packet_exit);
1912 MODULE_LICENSE("GPL");
1913 MODULE_ALIAS_NETPROTO(PF_PACKET);
1914