xref: /linux/net/packet/af_packet.c (revision f3d9478b2ce468c3115b02ecae7e975990697f15)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		PACKET - implements raw packet sockets.
7  *
8  * Version:	$Id: af_packet.c,v 1.61 2002/02/08 03:57:19 davem Exp $
9  *
10  * Authors:	Ross Biro
11  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
13  *
14  * Fixes:
15  *		Alan Cox	:	verify_area() now used correctly
16  *		Alan Cox	:	new skbuff lists, look ma no backlogs!
17  *		Alan Cox	:	tidied skbuff lists.
18  *		Alan Cox	:	Now uses generic datagram routines I
19  *					added. Also fixed the peek/read crash
20  *					from all old Linux datagram code.
21  *		Alan Cox	:	Uses the improved datagram code.
22  *		Alan Cox	:	Added NULL's for socket options.
23  *		Alan Cox	:	Re-commented the code.
24  *		Alan Cox	:	Use new kernel side addressing
25  *		Rob Janssen	:	Correct MTU usage.
26  *		Dave Platt	:	Counter leaks caused by incorrect
27  *					interrupt locking and some slightly
28  *					dubious gcc output. Can you read
29  *					compiler: it said _VOLATILE_
30  *	Richard Kooijman	:	Timestamp fixes.
31  *		Alan Cox	:	New buffers. Use sk->mac.raw.
32  *		Alan Cox	:	sendmsg/recvmsg support.
33  *		Alan Cox	:	Protocol setting support
34  *	Alexey Kuznetsov	:	Untied from IPv4 stack.
35  *	Cyrus Durgin		:	Fixed kerneld for kmod.
36  *	Michal Ostrowski        :       Module initialization cleanup.
37  *         Ulises Alonso        :       Frame number limit removal and
38  *                                      packet_set_ring memory leak.
39  *		Eric Biederman	:	Allow for > 8 byte hardware addresses.
40  *					The convention is that longer addresses
41  *					will simply extend the hardware address
42  *					byte arrays at the end of sockaddr_ll
43  *					and packet_mreq.
44  *
45  *		This program is free software; you can redistribute it and/or
46  *		modify it under the terms of the GNU General Public License
47  *		as published by the Free Software Foundation; either version
48  *		2 of the License, or (at your option) any later version.
49  *
50  */
51 
52 #include <linux/config.h>
53 #include <linux/types.h>
54 #include <linux/sched.h>
55 #include <linux/mm.h>
56 #include <linux/capability.h>
57 #include <linux/fcntl.h>
58 #include <linux/socket.h>
59 #include <linux/in.h>
60 #include <linux/inet.h>
61 #include <linux/netdevice.h>
62 #include <linux/if_packet.h>
63 #include <linux/wireless.h>
64 #include <linux/kmod.h>
65 #include <net/ip.h>
66 #include <net/protocol.h>
67 #include <linux/skbuff.h>
68 #include <net/sock.h>
69 #include <linux/errno.h>
70 #include <linux/timer.h>
71 #include <asm/system.h>
72 #include <asm/uaccess.h>
73 #include <asm/ioctls.h>
74 #include <asm/page.h>
75 #include <asm/io.h>
76 #include <linux/proc_fs.h>
77 #include <linux/seq_file.h>
78 #include <linux/poll.h>
79 #include <linux/module.h>
80 #include <linux/init.h>
81 
82 #ifdef CONFIG_INET
83 #include <net/inet_common.h>
84 #endif
85 
86 #define CONFIG_SOCK_PACKET	1
87 
88 /*
89    Proposed replacement for SIOC{ADD,DEL}MULTI and
90    IFF_PROMISC, IFF_ALLMULTI flags.
91 
92    It is more expensive, but I believe,
93    it is really correct solution: reentereble, safe and fault tolerant.
94 
95    IFF_PROMISC/IFF_ALLMULTI/SIOC{ADD/DEL}MULTI are faked by keeping
96    reference count and global flag, so that real status is
97    (gflag|(count != 0)), so that we can use obsolete faulty interface
98    not harming clever users.
99  */
100 #define CONFIG_PACKET_MULTICAST	1
101 
102 /*
103    Assumptions:
104    - if device has no dev->hard_header routine, it adds and removes ll header
105      inside itself. In this case ll header is invisible outside of device,
106      but higher levels still should reserve dev->hard_header_len.
107      Some devices are enough clever to reallocate skb, when header
108      will not fit to reserved space (tunnel), another ones are silly
109      (PPP).
110    - packet socket receives packets with pulled ll header,
111      so that SOCK_RAW should push it back.
112 
113 On receive:
114 -----------
115 
116 Incoming, dev->hard_header!=NULL
117    mac.raw -> ll header
118    data    -> data
119 
120 Outgoing, dev->hard_header!=NULL
121    mac.raw -> ll header
122    data    -> ll header
123 
124 Incoming, dev->hard_header==NULL
125    mac.raw -> UNKNOWN position. It is very likely, that it points to ll header.
126               PPP makes it, that is wrong, because introduce assymetry
127 	      between rx and tx paths.
128    data    -> data
129 
130 Outgoing, dev->hard_header==NULL
131    mac.raw -> data. ll header is still not built!
132    data    -> data
133 
134 Resume
135   If dev->hard_header==NULL we are unlikely to restore sensible ll header.
136 
137 
138 On transmit:
139 ------------
140 
141 dev->hard_header != NULL
142    mac.raw -> ll header
143    data    -> ll header
144 
145 dev->hard_header == NULL (ll header is added by device, we cannot control it)
146    mac.raw -> data
147    data -> data
148 
149    We should set nh.raw on output to correct posistion,
150    packet classifier depends on it.
151  */
152 
153 /* List of all packet sockets. */
154 static HLIST_HEAD(packet_sklist);
155 static DEFINE_RWLOCK(packet_sklist_lock);
156 
157 static atomic_t packet_socks_nr;
158 
159 
160 /* Private packet socket structures. */
161 
162 #ifdef CONFIG_PACKET_MULTICAST
163 struct packet_mclist
164 {
165 	struct packet_mclist	*next;
166 	int			ifindex;
167 	int			count;
168 	unsigned short		type;
169 	unsigned short		alen;
170 	unsigned char		addr[MAX_ADDR_LEN];
171 };
172 /* identical to struct packet_mreq except it has
173  * a longer address field.
174  */
175 struct packet_mreq_max
176 {
177 	int		mr_ifindex;
178 	unsigned short	mr_type;
179 	unsigned short	mr_alen;
180 	unsigned char	mr_address[MAX_ADDR_LEN];
181 };
182 #endif
183 #ifdef CONFIG_PACKET_MMAP
184 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
185 #endif
186 
187 static void packet_flush_mclist(struct sock *sk);
188 
189 struct packet_sock {
190 	/* struct sock has to be the first member of packet_sock */
191 	struct sock		sk;
192 	struct tpacket_stats	stats;
193 #ifdef CONFIG_PACKET_MMAP
194 	char *			*pg_vec;
195 	unsigned int		head;
196 	unsigned int            frames_per_block;
197 	unsigned int		frame_size;
198 	unsigned int		frame_max;
199 	int			copy_thresh;
200 #endif
201 	struct packet_type	prot_hook;
202 	spinlock_t		bind_lock;
203 	char			running;	/* prot_hook is attached*/
204 	int			ifindex;	/* bound device		*/
205 	unsigned short		num;
206 #ifdef CONFIG_PACKET_MULTICAST
207 	struct packet_mclist	*mclist;
208 #endif
209 #ifdef CONFIG_PACKET_MMAP
210 	atomic_t		mapped;
211 	unsigned int            pg_vec_order;
212 	unsigned int		pg_vec_pages;
213 	unsigned int		pg_vec_len;
214 #endif
215 };
216 
217 #ifdef CONFIG_PACKET_MMAP
218 
219 static inline char *packet_lookup_frame(struct packet_sock *po, unsigned int position)
220 {
221 	unsigned int pg_vec_pos, frame_offset;
222 	char *frame;
223 
224 	pg_vec_pos = position / po->frames_per_block;
225 	frame_offset = position % po->frames_per_block;
226 
227 	frame = po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size);
228 
229 	return frame;
230 }
231 #endif
232 
233 static inline struct packet_sock *pkt_sk(struct sock *sk)
234 {
235 	return (struct packet_sock *)sk;
236 }
237 
238 static void packet_sock_destruct(struct sock *sk)
239 {
240 	BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
241 	BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
242 
243 	if (!sock_flag(sk, SOCK_DEAD)) {
244 		printk("Attempt to release alive packet socket: %p\n", sk);
245 		return;
246 	}
247 
248 	atomic_dec(&packet_socks_nr);
249 #ifdef PACKET_REFCNT_DEBUG
250 	printk(KERN_DEBUG "PACKET socket %p is free, %d are alive\n", sk, atomic_read(&packet_socks_nr));
251 #endif
252 }
253 
254 
255 static const struct proto_ops packet_ops;
256 
257 #ifdef CONFIG_SOCK_PACKET
258 static const struct proto_ops packet_ops_spkt;
259 
260 static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt, struct net_device *orig_dev)
261 {
262 	struct sock *sk;
263 	struct sockaddr_pkt *spkt;
264 
265 	/*
266 	 *	When we registered the protocol we saved the socket in the data
267 	 *	field for just this event.
268 	 */
269 
270 	sk = pt->af_packet_priv;
271 
272 	/*
273 	 *	Yank back the headers [hope the device set this
274 	 *	right or kerboom...]
275 	 *
276 	 *	Incoming packets have ll header pulled,
277 	 *	push it back.
278 	 *
279 	 *	For outgoing ones skb->data == skb->mac.raw
280 	 *	so that this procedure is noop.
281 	 */
282 
283 	if (skb->pkt_type == PACKET_LOOPBACK)
284 		goto out;
285 
286 	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
287 		goto oom;
288 
289 	/* drop any routing info */
290 	dst_release(skb->dst);
291 	skb->dst = NULL;
292 
293 	/* drop conntrack reference */
294 	nf_reset(skb);
295 
296 	spkt = (struct sockaddr_pkt*)skb->cb;
297 
298 	skb_push(skb, skb->data-skb->mac.raw);
299 
300 	/*
301 	 *	The SOCK_PACKET socket receives _all_ frames.
302 	 */
303 
304 	spkt->spkt_family = dev->type;
305 	strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
306 	spkt->spkt_protocol = skb->protocol;
307 
308 	/*
309 	 *	Charge the memory to the socket. This is done specifically
310 	 *	to prevent sockets using all the memory up.
311 	 */
312 
313 	if (sock_queue_rcv_skb(sk,skb) == 0)
314 		return 0;
315 
316 out:
317 	kfree_skb(skb);
318 oom:
319 	return 0;
320 }
321 
322 
323 /*
324  *	Output a raw packet to a device layer. This bypasses all the other
325  *	protocol layers and you must therefore supply it with a complete frame
326  */
327 
328 static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
329 			       struct msghdr *msg, size_t len)
330 {
331 	struct sock *sk = sock->sk;
332 	struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
333 	struct sk_buff *skb;
334 	struct net_device *dev;
335 	unsigned short proto=0;
336 	int err;
337 
338 	/*
339 	 *	Get and verify the address.
340 	 */
341 
342 	if (saddr)
343 	{
344 		if (msg->msg_namelen < sizeof(struct sockaddr))
345 			return(-EINVAL);
346 		if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
347 			proto=saddr->spkt_protocol;
348 	}
349 	else
350 		return(-ENOTCONN);	/* SOCK_PACKET must be sent giving an address */
351 
352 	/*
353 	 *	Find the device first to size check it
354 	 */
355 
356 	saddr->spkt_device[13] = 0;
357 	dev = dev_get_by_name(saddr->spkt_device);
358 	err = -ENODEV;
359 	if (dev == NULL)
360 		goto out_unlock;
361 
362 	/*
363 	 *	You may not queue a frame bigger than the mtu. This is the lowest level
364 	 *	raw protocol and you must do your own fragmentation at this level.
365 	 */
366 
367 	err = -EMSGSIZE;
368 	if (len > dev->mtu + dev->hard_header_len)
369 		goto out_unlock;
370 
371 	err = -ENOBUFS;
372 	skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
373 
374 	/*
375 	 *	If the write buffer is full, then tough. At this level the user gets to
376 	 *	deal with the problem - do your own algorithmic backoffs. That's far
377 	 *	more flexible.
378 	 */
379 
380 	if (skb == NULL)
381 		goto out_unlock;
382 
383 	/*
384 	 *	Fill it in
385 	 */
386 
387 	/* FIXME: Save some space for broken drivers that write a
388 	 * hard header at transmission time by themselves. PPP is the
389 	 * notable one here. This should really be fixed at the driver level.
390 	 */
391 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
392 	skb->nh.raw = skb->data;
393 
394 	/* Try to align data part correctly */
395 	if (dev->hard_header) {
396 		skb->data -= dev->hard_header_len;
397 		skb->tail -= dev->hard_header_len;
398 		if (len < dev->hard_header_len)
399 			skb->nh.raw = skb->data;
400 	}
401 
402 	/* Returns -EFAULT on error */
403 	err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
404 	skb->protocol = proto;
405 	skb->dev = dev;
406 	skb->priority = sk->sk_priority;
407 	if (err)
408 		goto out_free;
409 
410 	err = -ENETDOWN;
411 	if (!(dev->flags & IFF_UP))
412 		goto out_free;
413 
414 	/*
415 	 *	Now send it
416 	 */
417 
418 	dev_queue_xmit(skb);
419 	dev_put(dev);
420 	return(len);
421 
422 out_free:
423 	kfree_skb(skb);
424 out_unlock:
425 	if (dev)
426 		dev_put(dev);
427 	return err;
428 }
429 #endif
430 
431 static inline unsigned run_filter(struct sk_buff *skb, struct sock *sk, unsigned res)
432 {
433 	struct sk_filter *filter;
434 
435 	bh_lock_sock(sk);
436 	filter = sk->sk_filter;
437 	/*
438 	 * Our caller already checked that filter != NULL but we need to
439 	 * verify that under bh_lock_sock() to be safe
440 	 */
441 	if (likely(filter != NULL))
442 		res = sk_run_filter(skb, filter->insns, filter->len);
443 	bh_unlock_sock(sk);
444 
445 	return res;
446 }
447 
448 /*
449    This function makes lazy skb cloning in hope that most of packets
450    are discarded by BPF.
451 
452    Note tricky part: we DO mangle shared skb! skb->data, skb->len
453    and skb->cb are mangled. It works because (and until) packets
454    falling here are owned by current CPU. Output packets are cloned
455    by dev_queue_xmit_nit(), input packets are processed by net_bh
456    sequencially, so that if we return skb to original state on exit,
457    we will not harm anyone.
458  */
459 
460 static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
461 {
462 	struct sock *sk;
463 	struct sockaddr_ll *sll;
464 	struct packet_sock *po;
465 	u8 * skb_head = skb->data;
466 	int skb_len = skb->len;
467 	unsigned snaplen;
468 
469 	if (skb->pkt_type == PACKET_LOOPBACK)
470 		goto drop;
471 
472 	sk = pt->af_packet_priv;
473 	po = pkt_sk(sk);
474 
475 	skb->dev = dev;
476 
477 	if (dev->hard_header) {
478 		/* The device has an explicit notion of ll header,
479 		   exported to higher levels.
480 
481 		   Otherwise, the device hides datails of it frame
482 		   structure, so that corresponding packet head
483 		   never delivered to user.
484 		 */
485 		if (sk->sk_type != SOCK_DGRAM)
486 			skb_push(skb, skb->data - skb->mac.raw);
487 		else if (skb->pkt_type == PACKET_OUTGOING) {
488 			/* Special case: outgoing packets have ll header at head */
489 			skb_pull(skb, skb->nh.raw - skb->data);
490 		}
491 	}
492 
493 	snaplen = skb->len;
494 
495 	if (sk->sk_filter) {
496 		unsigned res = run_filter(skb, sk, snaplen);
497 		if (res == 0)
498 			goto drop_n_restore;
499 		if (snaplen > res)
500 			snaplen = res;
501 	}
502 
503 	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
504 	    (unsigned)sk->sk_rcvbuf)
505 		goto drop_n_acct;
506 
507 	if (skb_shared(skb)) {
508 		struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
509 		if (nskb == NULL)
510 			goto drop_n_acct;
511 
512 		if (skb_head != skb->data) {
513 			skb->data = skb_head;
514 			skb->len = skb_len;
515 		}
516 		kfree_skb(skb);
517 		skb = nskb;
518 	}
519 
520 	sll = (struct sockaddr_ll*)skb->cb;
521 	sll->sll_family = AF_PACKET;
522 	sll->sll_hatype = dev->type;
523 	sll->sll_protocol = skb->protocol;
524 	sll->sll_pkttype = skb->pkt_type;
525 	sll->sll_ifindex = dev->ifindex;
526 	sll->sll_halen = 0;
527 
528 	if (dev->hard_header_parse)
529 		sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
530 
531 	if (pskb_trim(skb, snaplen))
532 		goto drop_n_acct;
533 
534 	skb_set_owner_r(skb, sk);
535 	skb->dev = NULL;
536 	dst_release(skb->dst);
537 	skb->dst = NULL;
538 
539 	/* drop conntrack reference */
540 	nf_reset(skb);
541 
542 	spin_lock(&sk->sk_receive_queue.lock);
543 	po->stats.tp_packets++;
544 	__skb_queue_tail(&sk->sk_receive_queue, skb);
545 	spin_unlock(&sk->sk_receive_queue.lock);
546 	sk->sk_data_ready(sk, skb->len);
547 	return 0;
548 
549 drop_n_acct:
550 	spin_lock(&sk->sk_receive_queue.lock);
551 	po->stats.tp_drops++;
552 	spin_unlock(&sk->sk_receive_queue.lock);
553 
554 drop_n_restore:
555 	if (skb_head != skb->data && skb_shared(skb)) {
556 		skb->data = skb_head;
557 		skb->len = skb_len;
558 	}
559 drop:
560 	kfree_skb(skb);
561 	return 0;
562 }
563 
564 #ifdef CONFIG_PACKET_MMAP
565 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
566 {
567 	struct sock *sk;
568 	struct packet_sock *po;
569 	struct sockaddr_ll *sll;
570 	struct tpacket_hdr *h;
571 	u8 * skb_head = skb->data;
572 	int skb_len = skb->len;
573 	unsigned snaplen;
574 	unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
575 	unsigned short macoff, netoff;
576 	struct sk_buff *copy_skb = NULL;
577 
578 	if (skb->pkt_type == PACKET_LOOPBACK)
579 		goto drop;
580 
581 	sk = pt->af_packet_priv;
582 	po = pkt_sk(sk);
583 
584 	if (dev->hard_header) {
585 		if (sk->sk_type != SOCK_DGRAM)
586 			skb_push(skb, skb->data - skb->mac.raw);
587 		else if (skb->pkt_type == PACKET_OUTGOING) {
588 			/* Special case: outgoing packets have ll header at head */
589 			skb_pull(skb, skb->nh.raw - skb->data);
590 			if (skb->ip_summed == CHECKSUM_HW)
591 				status |= TP_STATUS_CSUMNOTREADY;
592 		}
593 	}
594 
595 	snaplen = skb->len;
596 
597 	if (sk->sk_filter) {
598 		unsigned res = run_filter(skb, sk, snaplen);
599 		if (res == 0)
600 			goto drop_n_restore;
601 		if (snaplen > res)
602 			snaplen = res;
603 	}
604 
605 	if (sk->sk_type == SOCK_DGRAM) {
606 		macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
607 	} else {
608 		unsigned maclen = skb->nh.raw - skb->data;
609 		netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
610 		macoff = netoff - maclen;
611 	}
612 
613 	if (macoff + snaplen > po->frame_size) {
614 		if (po->copy_thresh &&
615 		    atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
616 		    (unsigned)sk->sk_rcvbuf) {
617 			if (skb_shared(skb)) {
618 				copy_skb = skb_clone(skb, GFP_ATOMIC);
619 			} else {
620 				copy_skb = skb_get(skb);
621 				skb_head = skb->data;
622 			}
623 			if (copy_skb)
624 				skb_set_owner_r(copy_skb, sk);
625 		}
626 		snaplen = po->frame_size - macoff;
627 		if ((int)snaplen < 0)
628 			snaplen = 0;
629 	}
630 	if (snaplen > skb->len-skb->data_len)
631 		snaplen = skb->len-skb->data_len;
632 
633 	spin_lock(&sk->sk_receive_queue.lock);
634 	h = (struct tpacket_hdr *)packet_lookup_frame(po, po->head);
635 
636 	if (h->tp_status)
637 		goto ring_is_full;
638 	po->head = po->head != po->frame_max ? po->head+1 : 0;
639 	po->stats.tp_packets++;
640 	if (copy_skb) {
641 		status |= TP_STATUS_COPY;
642 		__skb_queue_tail(&sk->sk_receive_queue, copy_skb);
643 	}
644 	if (!po->stats.tp_drops)
645 		status &= ~TP_STATUS_LOSING;
646 	spin_unlock(&sk->sk_receive_queue.lock);
647 
648 	memcpy((u8*)h + macoff, skb->data, snaplen);
649 
650 	h->tp_len = skb->len;
651 	h->tp_snaplen = snaplen;
652 	h->tp_mac = macoff;
653 	h->tp_net = netoff;
654 	if (skb->tstamp.off_sec == 0) {
655 		__net_timestamp(skb);
656 		sock_enable_timestamp(sk);
657 	}
658 	h->tp_sec = skb->tstamp.off_sec;
659 	h->tp_usec = skb->tstamp.off_usec;
660 
661 	sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
662 	sll->sll_halen = 0;
663 	if (dev->hard_header_parse)
664 		sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
665 	sll->sll_family = AF_PACKET;
666 	sll->sll_hatype = dev->type;
667 	sll->sll_protocol = skb->protocol;
668 	sll->sll_pkttype = skb->pkt_type;
669 	sll->sll_ifindex = dev->ifindex;
670 
671 	h->tp_status = status;
672 	mb();
673 
674 	{
675 		struct page *p_start, *p_end;
676 		u8 *h_end = (u8 *)h + macoff + snaplen - 1;
677 
678 		p_start = virt_to_page(h);
679 		p_end = virt_to_page(h_end);
680 		while (p_start <= p_end) {
681 			flush_dcache_page(p_start);
682 			p_start++;
683 		}
684 	}
685 
686 	sk->sk_data_ready(sk, 0);
687 
688 drop_n_restore:
689 	if (skb_head != skb->data && skb_shared(skb)) {
690 		skb->data = skb_head;
691 		skb->len = skb_len;
692 	}
693 drop:
694         kfree_skb(skb);
695 	return 0;
696 
697 ring_is_full:
698 	po->stats.tp_drops++;
699 	spin_unlock(&sk->sk_receive_queue.lock);
700 
701 	sk->sk_data_ready(sk, 0);
702 	if (copy_skb)
703 		kfree_skb(copy_skb);
704 	goto drop_n_restore;
705 }
706 
707 #endif
708 
709 
710 static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
711 			  struct msghdr *msg, size_t len)
712 {
713 	struct sock *sk = sock->sk;
714 	struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
715 	struct sk_buff *skb;
716 	struct net_device *dev;
717 	unsigned short proto;
718 	unsigned char *addr;
719 	int ifindex, err, reserve = 0;
720 
721 	/*
722 	 *	Get and verify the address.
723 	 */
724 
725 	if (saddr == NULL) {
726 		struct packet_sock *po = pkt_sk(sk);
727 
728 		ifindex	= po->ifindex;
729 		proto	= po->num;
730 		addr	= NULL;
731 	} else {
732 		err = -EINVAL;
733 		if (msg->msg_namelen < sizeof(struct sockaddr_ll))
734 			goto out;
735 		if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
736 			goto out;
737 		ifindex	= saddr->sll_ifindex;
738 		proto	= saddr->sll_protocol;
739 		addr	= saddr->sll_addr;
740 	}
741 
742 
743 	dev = dev_get_by_index(ifindex);
744 	err = -ENXIO;
745 	if (dev == NULL)
746 		goto out_unlock;
747 	if (sock->type == SOCK_RAW)
748 		reserve = dev->hard_header_len;
749 
750 	err = -EMSGSIZE;
751 	if (len > dev->mtu+reserve)
752 		goto out_unlock;
753 
754 	skb = sock_alloc_send_skb(sk, len + LL_RESERVED_SPACE(dev),
755 				msg->msg_flags & MSG_DONTWAIT, &err);
756 	if (skb==NULL)
757 		goto out_unlock;
758 
759 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
760 	skb->nh.raw = skb->data;
761 
762 	if (dev->hard_header) {
763 		int res;
764 		err = -EINVAL;
765 		res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
766 		if (sock->type != SOCK_DGRAM) {
767 			skb->tail = skb->data;
768 			skb->len = 0;
769 		} else if (res < 0)
770 			goto out_free;
771 	}
772 
773 	/* Returns -EFAULT on error */
774 	err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
775 	if (err)
776 		goto out_free;
777 
778 	skb->protocol = proto;
779 	skb->dev = dev;
780 	skb->priority = sk->sk_priority;
781 
782 	err = -ENETDOWN;
783 	if (!(dev->flags & IFF_UP))
784 		goto out_free;
785 
786 	/*
787 	 *	Now send it
788 	 */
789 
790 	err = dev_queue_xmit(skb);
791 	if (err > 0 && (err = net_xmit_errno(err)) != 0)
792 		goto out_unlock;
793 
794 	dev_put(dev);
795 
796 	return(len);
797 
798 out_free:
799 	kfree_skb(skb);
800 out_unlock:
801 	if (dev)
802 		dev_put(dev);
803 out:
804 	return err;
805 }
806 
807 /*
808  *	Close a PACKET socket. This is fairly simple. We immediately go
809  *	to 'closed' state and remove our protocol entry in the device list.
810  */
811 
812 static int packet_release(struct socket *sock)
813 {
814 	struct sock *sk = sock->sk;
815 	struct packet_sock *po;
816 
817 	if (!sk)
818 		return 0;
819 
820 	po = pkt_sk(sk);
821 
822 	write_lock_bh(&packet_sklist_lock);
823 	sk_del_node_init(sk);
824 	write_unlock_bh(&packet_sklist_lock);
825 
826 	/*
827 	 *	Unhook packet receive handler.
828 	 */
829 
830 	if (po->running) {
831 		/*
832 		 *	Remove the protocol hook
833 		 */
834 		dev_remove_pack(&po->prot_hook);
835 		po->running = 0;
836 		po->num = 0;
837 		__sock_put(sk);
838 	}
839 
840 #ifdef CONFIG_PACKET_MULTICAST
841 	packet_flush_mclist(sk);
842 #endif
843 
844 #ifdef CONFIG_PACKET_MMAP
845 	if (po->pg_vec) {
846 		struct tpacket_req req;
847 		memset(&req, 0, sizeof(req));
848 		packet_set_ring(sk, &req, 1);
849 	}
850 #endif
851 
852 	/*
853 	 *	Now the socket is dead. No more input will appear.
854 	 */
855 
856 	sock_orphan(sk);
857 	sock->sk = NULL;
858 
859 	/* Purge queues */
860 
861 	skb_queue_purge(&sk->sk_receive_queue);
862 
863 	sock_put(sk);
864 	return 0;
865 }
866 
867 /*
868  *	Attach a packet hook.
869  */
870 
871 static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol)
872 {
873 	struct packet_sock *po = pkt_sk(sk);
874 	/*
875 	 *	Detach an existing hook if present.
876 	 */
877 
878 	lock_sock(sk);
879 
880 	spin_lock(&po->bind_lock);
881 	if (po->running) {
882 		__sock_put(sk);
883 		po->running = 0;
884 		po->num = 0;
885 		spin_unlock(&po->bind_lock);
886 		dev_remove_pack(&po->prot_hook);
887 		spin_lock(&po->bind_lock);
888 	}
889 
890 	po->num = protocol;
891 	po->prot_hook.type = protocol;
892 	po->prot_hook.dev = dev;
893 
894 	po->ifindex = dev ? dev->ifindex : 0;
895 
896 	if (protocol == 0)
897 		goto out_unlock;
898 
899 	if (dev) {
900 		if (dev->flags&IFF_UP) {
901 			dev_add_pack(&po->prot_hook);
902 			sock_hold(sk);
903 			po->running = 1;
904 		} else {
905 			sk->sk_err = ENETDOWN;
906 			if (!sock_flag(sk, SOCK_DEAD))
907 				sk->sk_error_report(sk);
908 		}
909 	} else {
910 		dev_add_pack(&po->prot_hook);
911 		sock_hold(sk);
912 		po->running = 1;
913 	}
914 
915 out_unlock:
916 	spin_unlock(&po->bind_lock);
917 	release_sock(sk);
918 	return 0;
919 }
920 
921 /*
922  *	Bind a packet socket to a device
923  */
924 
925 #ifdef CONFIG_SOCK_PACKET
926 
927 static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
928 {
929 	struct sock *sk=sock->sk;
930 	char name[15];
931 	struct net_device *dev;
932 	int err = -ENODEV;
933 
934 	/*
935 	 *	Check legality
936 	 */
937 
938 	if (addr_len != sizeof(struct sockaddr))
939 		return -EINVAL;
940 	strlcpy(name,uaddr->sa_data,sizeof(name));
941 
942 	dev = dev_get_by_name(name);
943 	if (dev) {
944 		err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
945 		dev_put(dev);
946 	}
947 	return err;
948 }
949 #endif
950 
951 static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
952 {
953 	struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
954 	struct sock *sk=sock->sk;
955 	struct net_device *dev = NULL;
956 	int err;
957 
958 
959 	/*
960 	 *	Check legality
961 	 */
962 
963 	if (addr_len < sizeof(struct sockaddr_ll))
964 		return -EINVAL;
965 	if (sll->sll_family != AF_PACKET)
966 		return -EINVAL;
967 
968 	if (sll->sll_ifindex) {
969 		err = -ENODEV;
970 		dev = dev_get_by_index(sll->sll_ifindex);
971 		if (dev == NULL)
972 			goto out;
973 	}
974 	err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
975 	if (dev)
976 		dev_put(dev);
977 
978 out:
979 	return err;
980 }
981 
982 static struct proto packet_proto = {
983 	.name	  = "PACKET",
984 	.owner	  = THIS_MODULE,
985 	.obj_size = sizeof(struct packet_sock),
986 };
987 
988 /*
989  *	Create a packet of type SOCK_PACKET.
990  */
991 
992 static int packet_create(struct socket *sock, int protocol)
993 {
994 	struct sock *sk;
995 	struct packet_sock *po;
996 	int err;
997 
998 	if (!capable(CAP_NET_RAW))
999 		return -EPERM;
1000 	if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW
1001 #ifdef CONFIG_SOCK_PACKET
1002 	    && sock->type != SOCK_PACKET
1003 #endif
1004 	    )
1005 		return -ESOCKTNOSUPPORT;
1006 
1007 	sock->state = SS_UNCONNECTED;
1008 
1009 	err = -ENOBUFS;
1010 	sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1);
1011 	if (sk == NULL)
1012 		goto out;
1013 
1014 	sock->ops = &packet_ops;
1015 #ifdef CONFIG_SOCK_PACKET
1016 	if (sock->type == SOCK_PACKET)
1017 		sock->ops = &packet_ops_spkt;
1018 #endif
1019 	sock_init_data(sock, sk);
1020 
1021 	po = pkt_sk(sk);
1022 	sk->sk_family = PF_PACKET;
1023 	po->num = protocol;
1024 
1025 	sk->sk_destruct = packet_sock_destruct;
1026 	atomic_inc(&packet_socks_nr);
1027 
1028 	/*
1029 	 *	Attach a protocol block
1030 	 */
1031 
1032 	spin_lock_init(&po->bind_lock);
1033 	po->prot_hook.func = packet_rcv;
1034 #ifdef CONFIG_SOCK_PACKET
1035 	if (sock->type == SOCK_PACKET)
1036 		po->prot_hook.func = packet_rcv_spkt;
1037 #endif
1038 	po->prot_hook.af_packet_priv = sk;
1039 
1040 	if (protocol) {
1041 		po->prot_hook.type = protocol;
1042 		dev_add_pack(&po->prot_hook);
1043 		sock_hold(sk);
1044 		po->running = 1;
1045 	}
1046 
1047 	write_lock_bh(&packet_sklist_lock);
1048 	sk_add_node(sk, &packet_sklist);
1049 	write_unlock_bh(&packet_sklist_lock);
1050 	return(0);
1051 out:
1052 	return err;
1053 }
1054 
1055 /*
1056  *	Pull a packet from our receive queue and hand it to the user.
1057  *	If necessary we block.
1058  */
1059 
1060 static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1061 			  struct msghdr *msg, size_t len, int flags)
1062 {
1063 	struct sock *sk = sock->sk;
1064 	struct sk_buff *skb;
1065 	int copied, err;
1066 	struct sockaddr_ll *sll;
1067 
1068 	err = -EINVAL;
1069 	if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1070 		goto out;
1071 
1072 #if 0
1073 	/* What error should we return now? EUNATTACH? */
1074 	if (pkt_sk(sk)->ifindex < 0)
1075 		return -ENODEV;
1076 #endif
1077 
1078 	/*
1079 	 *	Call the generic datagram receiver. This handles all sorts
1080 	 *	of horrible races and re-entrancy so we can forget about it
1081 	 *	in the protocol layers.
1082 	 *
1083 	 *	Now it will return ENETDOWN, if device have just gone down,
1084 	 *	but then it will block.
1085 	 */
1086 
1087 	skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
1088 
1089 	/*
1090 	 *	An error occurred so return it. Because skb_recv_datagram()
1091 	 *	handles the blocking we don't see and worry about blocking
1092 	 *	retries.
1093 	 */
1094 
1095 	if (skb == NULL)
1096 		goto out;
1097 
1098 	/*
1099 	 *	If the address length field is there to be filled in, we fill
1100 	 *	it in now.
1101 	 */
1102 
1103 	sll = (struct sockaddr_ll*)skb->cb;
1104 	if (sock->type == SOCK_PACKET)
1105 		msg->msg_namelen = sizeof(struct sockaddr_pkt);
1106 	else
1107 		msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1108 
1109 	/*
1110 	 *	You lose any data beyond the buffer you gave. If it worries a
1111 	 *	user program they can ask the device for its MTU anyway.
1112 	 */
1113 
1114 	copied = skb->len;
1115 	if (copied > len)
1116 	{
1117 		copied=len;
1118 		msg->msg_flags|=MSG_TRUNC;
1119 	}
1120 
1121 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1122 	if (err)
1123 		goto out_free;
1124 
1125 	sock_recv_timestamp(msg, sk, skb);
1126 
1127 	if (msg->msg_name)
1128 		memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
1129 
1130 	/*
1131 	 *	Free or return the buffer as appropriate. Again this
1132 	 *	hides all the races and re-entrancy issues from us.
1133 	 */
1134 	err = (flags&MSG_TRUNC) ? skb->len : copied;
1135 
1136 out_free:
1137 	skb_free_datagram(sk, skb);
1138 out:
1139 	return err;
1140 }
1141 
1142 #ifdef CONFIG_SOCK_PACKET
1143 static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1144 			       int *uaddr_len, int peer)
1145 {
1146 	struct net_device *dev;
1147 	struct sock *sk	= sock->sk;
1148 
1149 	if (peer)
1150 		return -EOPNOTSUPP;
1151 
1152 	uaddr->sa_family = AF_PACKET;
1153 	dev = dev_get_by_index(pkt_sk(sk)->ifindex);
1154 	if (dev) {
1155 		strlcpy(uaddr->sa_data, dev->name, 15);
1156 		dev_put(dev);
1157 	} else
1158 		memset(uaddr->sa_data, 0, 14);
1159 	*uaddr_len = sizeof(*uaddr);
1160 
1161 	return 0;
1162 }
1163 #endif
1164 
1165 static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1166 			  int *uaddr_len, int peer)
1167 {
1168 	struct net_device *dev;
1169 	struct sock *sk = sock->sk;
1170 	struct packet_sock *po = pkt_sk(sk);
1171 	struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
1172 
1173 	if (peer)
1174 		return -EOPNOTSUPP;
1175 
1176 	sll->sll_family = AF_PACKET;
1177 	sll->sll_ifindex = po->ifindex;
1178 	sll->sll_protocol = po->num;
1179 	dev = dev_get_by_index(po->ifindex);
1180 	if (dev) {
1181 		sll->sll_hatype = dev->type;
1182 		sll->sll_halen = dev->addr_len;
1183 		memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1184 		dev_put(dev);
1185 	} else {
1186 		sll->sll_hatype = 0;	/* Bad: we have no ARPHRD_UNSPEC */
1187 		sll->sll_halen = 0;
1188 	}
1189 	*uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
1190 
1191 	return 0;
1192 }
1193 
1194 #ifdef CONFIG_PACKET_MULTICAST
1195 static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what)
1196 {
1197 	switch (i->type) {
1198 	case PACKET_MR_MULTICAST:
1199 		if (what > 0)
1200 			dev_mc_add(dev, i->addr, i->alen, 0);
1201 		else
1202 			dev_mc_delete(dev, i->addr, i->alen, 0);
1203 		break;
1204 	case PACKET_MR_PROMISC:
1205 		dev_set_promiscuity(dev, what);
1206 		break;
1207 	case PACKET_MR_ALLMULTI:
1208 		dev_set_allmulti(dev, what);
1209 		break;
1210 	default:;
1211 	}
1212 }
1213 
1214 static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1215 {
1216 	for ( ; i; i=i->next) {
1217 		if (i->ifindex == dev->ifindex)
1218 			packet_dev_mc(dev, i, what);
1219 	}
1220 }
1221 
1222 static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
1223 {
1224 	struct packet_sock *po = pkt_sk(sk);
1225 	struct packet_mclist *ml, *i;
1226 	struct net_device *dev;
1227 	int err;
1228 
1229 	rtnl_lock();
1230 
1231 	err = -ENODEV;
1232 	dev = __dev_get_by_index(mreq->mr_ifindex);
1233 	if (!dev)
1234 		goto done;
1235 
1236 	err = -EINVAL;
1237 	if (mreq->mr_alen > dev->addr_len)
1238 		goto done;
1239 
1240 	err = -ENOBUFS;
1241 	i = kmalloc(sizeof(*i), GFP_KERNEL);
1242 	if (i == NULL)
1243 		goto done;
1244 
1245 	err = 0;
1246 	for (ml = po->mclist; ml; ml = ml->next) {
1247 		if (ml->ifindex == mreq->mr_ifindex &&
1248 		    ml->type == mreq->mr_type &&
1249 		    ml->alen == mreq->mr_alen &&
1250 		    memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1251 			ml->count++;
1252 			/* Free the new element ... */
1253 			kfree(i);
1254 			goto done;
1255 		}
1256 	}
1257 
1258 	i->type = mreq->mr_type;
1259 	i->ifindex = mreq->mr_ifindex;
1260 	i->alen = mreq->mr_alen;
1261 	memcpy(i->addr, mreq->mr_address, i->alen);
1262 	i->count = 1;
1263 	i->next = po->mclist;
1264 	po->mclist = i;
1265 	packet_dev_mc(dev, i, +1);
1266 
1267 done:
1268 	rtnl_unlock();
1269 	return err;
1270 }
1271 
1272 static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
1273 {
1274 	struct packet_mclist *ml, **mlp;
1275 
1276 	rtnl_lock();
1277 
1278 	for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1279 		if (ml->ifindex == mreq->mr_ifindex &&
1280 		    ml->type == mreq->mr_type &&
1281 		    ml->alen == mreq->mr_alen &&
1282 		    memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1283 			if (--ml->count == 0) {
1284 				struct net_device *dev;
1285 				*mlp = ml->next;
1286 				dev = dev_get_by_index(ml->ifindex);
1287 				if (dev) {
1288 					packet_dev_mc(dev, ml, -1);
1289 					dev_put(dev);
1290 				}
1291 				kfree(ml);
1292 			}
1293 			rtnl_unlock();
1294 			return 0;
1295 		}
1296 	}
1297 	rtnl_unlock();
1298 	return -EADDRNOTAVAIL;
1299 }
1300 
1301 static void packet_flush_mclist(struct sock *sk)
1302 {
1303 	struct packet_sock *po = pkt_sk(sk);
1304 	struct packet_mclist *ml;
1305 
1306 	if (!po->mclist)
1307 		return;
1308 
1309 	rtnl_lock();
1310 	while ((ml = po->mclist) != NULL) {
1311 		struct net_device *dev;
1312 
1313 		po->mclist = ml->next;
1314 		if ((dev = dev_get_by_index(ml->ifindex)) != NULL) {
1315 			packet_dev_mc(dev, ml, -1);
1316 			dev_put(dev);
1317 		}
1318 		kfree(ml);
1319 	}
1320 	rtnl_unlock();
1321 }
1322 #endif
1323 
1324 static int
1325 packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
1326 {
1327 	struct sock *sk = sock->sk;
1328 	int ret;
1329 
1330 	if (level != SOL_PACKET)
1331 		return -ENOPROTOOPT;
1332 
1333 	switch(optname)	{
1334 #ifdef CONFIG_PACKET_MULTICAST
1335 	case PACKET_ADD_MEMBERSHIP:
1336 	case PACKET_DROP_MEMBERSHIP:
1337 	{
1338 		struct packet_mreq_max mreq;
1339 		int len = optlen;
1340 		memset(&mreq, 0, sizeof(mreq));
1341 		if (len < sizeof(struct packet_mreq))
1342 			return -EINVAL;
1343 		if (len > sizeof(mreq))
1344 			len = sizeof(mreq);
1345 		if (copy_from_user(&mreq,optval,len))
1346 			return -EFAULT;
1347 		if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1348 			return -EINVAL;
1349 		if (optname == PACKET_ADD_MEMBERSHIP)
1350 			ret = packet_mc_add(sk, &mreq);
1351 		else
1352 			ret = packet_mc_drop(sk, &mreq);
1353 		return ret;
1354 	}
1355 #endif
1356 #ifdef CONFIG_PACKET_MMAP
1357 	case PACKET_RX_RING:
1358 	{
1359 		struct tpacket_req req;
1360 
1361 		if (optlen<sizeof(req))
1362 			return -EINVAL;
1363 		if (copy_from_user(&req,optval,sizeof(req)))
1364 			return -EFAULT;
1365 		return packet_set_ring(sk, &req, 0);
1366 	}
1367 	case PACKET_COPY_THRESH:
1368 	{
1369 		int val;
1370 
1371 		if (optlen!=sizeof(val))
1372 			return -EINVAL;
1373 		if (copy_from_user(&val,optval,sizeof(val)))
1374 			return -EFAULT;
1375 
1376 		pkt_sk(sk)->copy_thresh = val;
1377 		return 0;
1378 	}
1379 #endif
1380 	default:
1381 		return -ENOPROTOOPT;
1382 	}
1383 }
1384 
1385 static int packet_getsockopt(struct socket *sock, int level, int optname,
1386 			     char __user *optval, int __user *optlen)
1387 {
1388 	int len;
1389 	struct sock *sk = sock->sk;
1390 	struct packet_sock *po = pkt_sk(sk);
1391 
1392 	if (level != SOL_PACKET)
1393 		return -ENOPROTOOPT;
1394 
1395 	if (get_user(len, optlen))
1396 		return -EFAULT;
1397 
1398 	if (len < 0)
1399 		return -EINVAL;
1400 
1401 	switch(optname)	{
1402 	case PACKET_STATISTICS:
1403 	{
1404 		struct tpacket_stats st;
1405 
1406 		if (len > sizeof(struct tpacket_stats))
1407 			len = sizeof(struct tpacket_stats);
1408 		spin_lock_bh(&sk->sk_receive_queue.lock);
1409 		st = po->stats;
1410 		memset(&po->stats, 0, sizeof(st));
1411 		spin_unlock_bh(&sk->sk_receive_queue.lock);
1412 		st.tp_packets += st.tp_drops;
1413 
1414 		if (copy_to_user(optval, &st, len))
1415 			return -EFAULT;
1416 		break;
1417 	}
1418 	default:
1419 		return -ENOPROTOOPT;
1420 	}
1421 
1422 	if (put_user(len, optlen))
1423 		return -EFAULT;
1424 	return 0;
1425 }
1426 
1427 
1428 static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1429 {
1430 	struct sock *sk;
1431 	struct hlist_node *node;
1432 	struct net_device *dev = (struct net_device*)data;
1433 
1434 	read_lock(&packet_sklist_lock);
1435 	sk_for_each(sk, node, &packet_sklist) {
1436 		struct packet_sock *po = pkt_sk(sk);
1437 
1438 		switch (msg) {
1439 		case NETDEV_UNREGISTER:
1440 #ifdef CONFIG_PACKET_MULTICAST
1441 			if (po->mclist)
1442 				packet_dev_mclist(dev, po->mclist, -1);
1443 			// fallthrough
1444 #endif
1445 		case NETDEV_DOWN:
1446 			if (dev->ifindex == po->ifindex) {
1447 				spin_lock(&po->bind_lock);
1448 				if (po->running) {
1449 					__dev_remove_pack(&po->prot_hook);
1450 					__sock_put(sk);
1451 					po->running = 0;
1452 					sk->sk_err = ENETDOWN;
1453 					if (!sock_flag(sk, SOCK_DEAD))
1454 						sk->sk_error_report(sk);
1455 				}
1456 				if (msg == NETDEV_UNREGISTER) {
1457 					po->ifindex = -1;
1458 					po->prot_hook.dev = NULL;
1459 				}
1460 				spin_unlock(&po->bind_lock);
1461 			}
1462 			break;
1463 		case NETDEV_UP:
1464 			spin_lock(&po->bind_lock);
1465 			if (dev->ifindex == po->ifindex && po->num &&
1466 			    !po->running) {
1467 				dev_add_pack(&po->prot_hook);
1468 				sock_hold(sk);
1469 				po->running = 1;
1470 			}
1471 			spin_unlock(&po->bind_lock);
1472 			break;
1473 		}
1474 	}
1475 	read_unlock(&packet_sklist_lock);
1476 	return NOTIFY_DONE;
1477 }
1478 
1479 
1480 static int packet_ioctl(struct socket *sock, unsigned int cmd,
1481 			unsigned long arg)
1482 {
1483 	struct sock *sk = sock->sk;
1484 
1485 	switch(cmd) {
1486 		case SIOCOUTQ:
1487 		{
1488 			int amount = atomic_read(&sk->sk_wmem_alloc);
1489 			return put_user(amount, (int __user *)arg);
1490 		}
1491 		case SIOCINQ:
1492 		{
1493 			struct sk_buff *skb;
1494 			int amount = 0;
1495 
1496 			spin_lock_bh(&sk->sk_receive_queue.lock);
1497 			skb = skb_peek(&sk->sk_receive_queue);
1498 			if (skb)
1499 				amount = skb->len;
1500 			spin_unlock_bh(&sk->sk_receive_queue.lock);
1501 			return put_user(amount, (int __user *)arg);
1502 		}
1503 		case SIOCGSTAMP:
1504 			return sock_get_timestamp(sk, (struct timeval __user *)arg);
1505 
1506 #ifdef CONFIG_INET
1507 		case SIOCADDRT:
1508 		case SIOCDELRT:
1509 		case SIOCDARP:
1510 		case SIOCGARP:
1511 		case SIOCSARP:
1512 		case SIOCGIFADDR:
1513 		case SIOCSIFADDR:
1514 		case SIOCGIFBRDADDR:
1515 		case SIOCSIFBRDADDR:
1516 		case SIOCGIFNETMASK:
1517 		case SIOCSIFNETMASK:
1518 		case SIOCGIFDSTADDR:
1519 		case SIOCSIFDSTADDR:
1520 		case SIOCSIFFLAGS:
1521 			return inet_dgram_ops.ioctl(sock, cmd, arg);
1522 #endif
1523 
1524 		default:
1525 			return -ENOIOCTLCMD;
1526 	}
1527 	return 0;
1528 }
1529 
1530 #ifndef CONFIG_PACKET_MMAP
1531 #define packet_mmap sock_no_mmap
1532 #define packet_poll datagram_poll
1533 #else
1534 
1535 static unsigned int packet_poll(struct file * file, struct socket *sock,
1536 				poll_table *wait)
1537 {
1538 	struct sock *sk = sock->sk;
1539 	struct packet_sock *po = pkt_sk(sk);
1540 	unsigned int mask = datagram_poll(file, sock, wait);
1541 
1542 	spin_lock_bh(&sk->sk_receive_queue.lock);
1543 	if (po->pg_vec) {
1544 		unsigned last = po->head ? po->head-1 : po->frame_max;
1545 		struct tpacket_hdr *h;
1546 
1547 		h = (struct tpacket_hdr *)packet_lookup_frame(po, last);
1548 
1549 		if (h->tp_status)
1550 			mask |= POLLIN | POLLRDNORM;
1551 	}
1552 	spin_unlock_bh(&sk->sk_receive_queue.lock);
1553 	return mask;
1554 }
1555 
1556 
1557 /* Dirty? Well, I still did not learn better way to account
1558  * for user mmaps.
1559  */
1560 
1561 static void packet_mm_open(struct vm_area_struct *vma)
1562 {
1563 	struct file *file = vma->vm_file;
1564 	struct socket * sock = file->private_data;
1565 	struct sock *sk = sock->sk;
1566 
1567 	if (sk)
1568 		atomic_inc(&pkt_sk(sk)->mapped);
1569 }
1570 
1571 static void packet_mm_close(struct vm_area_struct *vma)
1572 {
1573 	struct file *file = vma->vm_file;
1574 	struct socket * sock = file->private_data;
1575 	struct sock *sk = sock->sk;
1576 
1577 	if (sk)
1578 		atomic_dec(&pkt_sk(sk)->mapped);
1579 }
1580 
1581 static struct vm_operations_struct packet_mmap_ops = {
1582 	.open =	packet_mm_open,
1583 	.close =packet_mm_close,
1584 };
1585 
1586 static inline struct page *pg_vec_endpage(char *one_pg_vec, unsigned int order)
1587 {
1588 	return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1);
1589 }
1590 
1591 static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
1592 {
1593 	int i;
1594 
1595 	for (i = 0; i < len; i++) {
1596 		if (likely(pg_vec[i]))
1597 			free_pages((unsigned long) pg_vec[i], order);
1598 	}
1599 	kfree(pg_vec);
1600 }
1601 
1602 static inline char *alloc_one_pg_vec_page(unsigned long order)
1603 {
1604 	return (char *) __get_free_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO,
1605 					 order);
1606 }
1607 
1608 static char **alloc_pg_vec(struct tpacket_req *req, int order)
1609 {
1610 	unsigned int block_nr = req->tp_block_nr;
1611 	char **pg_vec;
1612 	int i;
1613 
1614 	pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
1615 	if (unlikely(!pg_vec))
1616 		goto out;
1617 
1618 	for (i = 0; i < block_nr; i++) {
1619 		pg_vec[i] = alloc_one_pg_vec_page(order);
1620 		if (unlikely(!pg_vec[i]))
1621 			goto out_free_pgvec;
1622 	}
1623 
1624 out:
1625 	return pg_vec;
1626 
1627 out_free_pgvec:
1628 	free_pg_vec(pg_vec, order, block_nr);
1629 	pg_vec = NULL;
1630 	goto out;
1631 }
1632 
1633 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
1634 {
1635 	char **pg_vec = NULL;
1636 	struct packet_sock *po = pkt_sk(sk);
1637 	int was_running, num, order = 0;
1638 	int err = 0;
1639 
1640 	if (req->tp_block_nr) {
1641 		int i, l;
1642 
1643 		/* Sanity tests and some calculations */
1644 
1645 		if (unlikely(po->pg_vec))
1646 			return -EBUSY;
1647 
1648 		if (unlikely((int)req->tp_block_size <= 0))
1649 			return -EINVAL;
1650 		if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
1651 			return -EINVAL;
1652 		if (unlikely(req->tp_frame_size < TPACKET_HDRLEN))
1653 			return -EINVAL;
1654 		if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
1655 			return -EINVAL;
1656 
1657 		po->frames_per_block = req->tp_block_size/req->tp_frame_size;
1658 		if (unlikely(po->frames_per_block <= 0))
1659 			return -EINVAL;
1660 		if (unlikely((po->frames_per_block * req->tp_block_nr) !=
1661 			     req->tp_frame_nr))
1662 			return -EINVAL;
1663 
1664 		err = -ENOMEM;
1665 		order = get_order(req->tp_block_size);
1666 		pg_vec = alloc_pg_vec(req, order);
1667 		if (unlikely(!pg_vec))
1668 			goto out;
1669 
1670 		l = 0;
1671 		for (i = 0; i < req->tp_block_nr; i++) {
1672 			char *ptr = pg_vec[i];
1673 			struct tpacket_hdr *header;
1674 			int k;
1675 
1676 			for (k = 0; k < po->frames_per_block; k++) {
1677 				header = (struct tpacket_hdr *) ptr;
1678 				header->tp_status = TP_STATUS_KERNEL;
1679 				ptr += req->tp_frame_size;
1680 			}
1681 		}
1682 		/* Done */
1683 	} else {
1684 		if (unlikely(req->tp_frame_nr))
1685 			return -EINVAL;
1686 	}
1687 
1688 	lock_sock(sk);
1689 
1690 	/* Detach socket from network */
1691 	spin_lock(&po->bind_lock);
1692 	was_running = po->running;
1693 	num = po->num;
1694 	if (was_running) {
1695 		__dev_remove_pack(&po->prot_hook);
1696 		po->num = 0;
1697 		po->running = 0;
1698 		__sock_put(sk);
1699 	}
1700 	spin_unlock(&po->bind_lock);
1701 
1702 	synchronize_net();
1703 
1704 	err = -EBUSY;
1705 	if (closing || atomic_read(&po->mapped) == 0) {
1706 		err = 0;
1707 #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
1708 
1709 		spin_lock_bh(&sk->sk_receive_queue.lock);
1710 		pg_vec = XC(po->pg_vec, pg_vec);
1711 		po->frame_max = (req->tp_frame_nr - 1);
1712 		po->head = 0;
1713 		po->frame_size = req->tp_frame_size;
1714 		spin_unlock_bh(&sk->sk_receive_queue.lock);
1715 
1716 		order = XC(po->pg_vec_order, order);
1717 		req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
1718 
1719 		po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
1720 		po->prot_hook.func = po->pg_vec ? tpacket_rcv : packet_rcv;
1721 		skb_queue_purge(&sk->sk_receive_queue);
1722 #undef XC
1723 		if (atomic_read(&po->mapped))
1724 			printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
1725 	}
1726 
1727 	spin_lock(&po->bind_lock);
1728 	if (was_running && !po->running) {
1729 		sock_hold(sk);
1730 		po->running = 1;
1731 		po->num = num;
1732 		dev_add_pack(&po->prot_hook);
1733 	}
1734 	spin_unlock(&po->bind_lock);
1735 
1736 	release_sock(sk);
1737 
1738 	if (pg_vec)
1739 		free_pg_vec(pg_vec, order, req->tp_block_nr);
1740 out:
1741 	return err;
1742 }
1743 
1744 static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1745 {
1746 	struct sock *sk = sock->sk;
1747 	struct packet_sock *po = pkt_sk(sk);
1748 	unsigned long size;
1749 	unsigned long start;
1750 	int err = -EINVAL;
1751 	int i;
1752 
1753 	if (vma->vm_pgoff)
1754 		return -EINVAL;
1755 
1756 	size = vma->vm_end - vma->vm_start;
1757 
1758 	lock_sock(sk);
1759 	if (po->pg_vec == NULL)
1760 		goto out;
1761 	if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
1762 		goto out;
1763 
1764 	start = vma->vm_start;
1765 	for (i = 0; i < po->pg_vec_len; i++) {
1766 		struct page *page = virt_to_page(po->pg_vec[i]);
1767 		int pg_num;
1768 
1769 		for (pg_num = 0; pg_num < po->pg_vec_pages; pg_num++, page++) {
1770 			err = vm_insert_page(vma, start, page);
1771 			if (unlikely(err))
1772 				goto out;
1773 			start += PAGE_SIZE;
1774 		}
1775 	}
1776 	atomic_inc(&po->mapped);
1777 	vma->vm_ops = &packet_mmap_ops;
1778 	err = 0;
1779 
1780 out:
1781 	release_sock(sk);
1782 	return err;
1783 }
1784 #endif
1785 
1786 
1787 #ifdef CONFIG_SOCK_PACKET
1788 static const struct proto_ops packet_ops_spkt = {
1789 	.family =	PF_PACKET,
1790 	.owner =	THIS_MODULE,
1791 	.release =	packet_release,
1792 	.bind =		packet_bind_spkt,
1793 	.connect =	sock_no_connect,
1794 	.socketpair =	sock_no_socketpair,
1795 	.accept =	sock_no_accept,
1796 	.getname =	packet_getname_spkt,
1797 	.poll =		datagram_poll,
1798 	.ioctl =	packet_ioctl,
1799 	.listen =	sock_no_listen,
1800 	.shutdown =	sock_no_shutdown,
1801 	.setsockopt =	sock_no_setsockopt,
1802 	.getsockopt =	sock_no_getsockopt,
1803 	.sendmsg =	packet_sendmsg_spkt,
1804 	.recvmsg =	packet_recvmsg,
1805 	.mmap =		sock_no_mmap,
1806 	.sendpage =	sock_no_sendpage,
1807 };
1808 #endif
1809 
1810 static const struct proto_ops packet_ops = {
1811 	.family =	PF_PACKET,
1812 	.owner =	THIS_MODULE,
1813 	.release =	packet_release,
1814 	.bind =		packet_bind,
1815 	.connect =	sock_no_connect,
1816 	.socketpair =	sock_no_socketpair,
1817 	.accept =	sock_no_accept,
1818 	.getname =	packet_getname,
1819 	.poll =		packet_poll,
1820 	.ioctl =	packet_ioctl,
1821 	.listen =	sock_no_listen,
1822 	.shutdown =	sock_no_shutdown,
1823 	.setsockopt =	packet_setsockopt,
1824 	.getsockopt =	packet_getsockopt,
1825 	.sendmsg =	packet_sendmsg,
1826 	.recvmsg =	packet_recvmsg,
1827 	.mmap =		packet_mmap,
1828 	.sendpage =	sock_no_sendpage,
1829 };
1830 
1831 static struct net_proto_family packet_family_ops = {
1832 	.family =	PF_PACKET,
1833 	.create =	packet_create,
1834 	.owner	=	THIS_MODULE,
1835 };
1836 
1837 static struct notifier_block packet_netdev_notifier = {
1838 	.notifier_call =packet_notifier,
1839 };
1840 
1841 #ifdef CONFIG_PROC_FS
1842 static inline struct sock *packet_seq_idx(loff_t off)
1843 {
1844 	struct sock *s;
1845 	struct hlist_node *node;
1846 
1847 	sk_for_each(s, node, &packet_sklist) {
1848 		if (!off--)
1849 			return s;
1850 	}
1851 	return NULL;
1852 }
1853 
1854 static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
1855 {
1856 	read_lock(&packet_sklist_lock);
1857 	return *pos ? packet_seq_idx(*pos - 1) : SEQ_START_TOKEN;
1858 }
1859 
1860 static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1861 {
1862 	++*pos;
1863 	return  (v == SEQ_START_TOKEN)
1864 		? sk_head(&packet_sklist)
1865 		: sk_next((struct sock*)v) ;
1866 }
1867 
1868 static void packet_seq_stop(struct seq_file *seq, void *v)
1869 {
1870 	read_unlock(&packet_sklist_lock);
1871 }
1872 
1873 static int packet_seq_show(struct seq_file *seq, void *v)
1874 {
1875 	if (v == SEQ_START_TOKEN)
1876 		seq_puts(seq, "sk       RefCnt Type Proto  Iface R Rmem   User   Inode\n");
1877 	else {
1878 		struct sock *s = v;
1879 		const struct packet_sock *po = pkt_sk(s);
1880 
1881 		seq_printf(seq,
1882 			   "%p %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu\n",
1883 			   s,
1884 			   atomic_read(&s->sk_refcnt),
1885 			   s->sk_type,
1886 			   ntohs(po->num),
1887 			   po->ifindex,
1888 			   po->running,
1889 			   atomic_read(&s->sk_rmem_alloc),
1890 			   sock_i_uid(s),
1891 			   sock_i_ino(s) );
1892 	}
1893 
1894 	return 0;
1895 }
1896 
1897 static struct seq_operations packet_seq_ops = {
1898 	.start	= packet_seq_start,
1899 	.next	= packet_seq_next,
1900 	.stop	= packet_seq_stop,
1901 	.show	= packet_seq_show,
1902 };
1903 
1904 static int packet_seq_open(struct inode *inode, struct file *file)
1905 {
1906 	return seq_open(file, &packet_seq_ops);
1907 }
1908 
1909 static struct file_operations packet_seq_fops = {
1910 	.owner		= THIS_MODULE,
1911 	.open		= packet_seq_open,
1912 	.read		= seq_read,
1913 	.llseek		= seq_lseek,
1914 	.release	= seq_release,
1915 };
1916 
1917 #endif
1918 
1919 static void __exit packet_exit(void)
1920 {
1921 	proc_net_remove("packet");
1922 	unregister_netdevice_notifier(&packet_netdev_notifier);
1923 	sock_unregister(PF_PACKET);
1924 	proto_unregister(&packet_proto);
1925 }
1926 
1927 static int __init packet_init(void)
1928 {
1929 	int rc = proto_register(&packet_proto, 0);
1930 
1931 	if (rc != 0)
1932 		goto out;
1933 
1934 	sock_register(&packet_family_ops);
1935 	register_netdevice_notifier(&packet_netdev_notifier);
1936 	proc_net_fops_create("packet", 0, &packet_seq_fops);
1937 out:
1938 	return rc;
1939 }
1940 
1941 module_init(packet_init);
1942 module_exit(packet_exit);
1943 MODULE_LICENSE("GPL");
1944 MODULE_ALIAS_NETPROTO(PF_PACKET);
1945