xref: /linux/net/core/sock.c (revision 5e8d780d745c1619aba81fe7166c5a4b5cad2b84)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Generic socket support routines. Memory allocators, socket lock/release
7  *		handler for protocols to use and generic option handler.
8  *
9  *
10  * Version:	$Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
11  *
12  * Authors:	Ross Biro
13  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *		Florian La Roche, <flla@stud.uni-sb.de>
15  *		Alan Cox, <A.Cox@swansea.ac.uk>
16  *
17  * Fixes:
18  *		Alan Cox	: 	Numerous verify_area() problems
19  *		Alan Cox	:	Connecting on a connecting socket
20  *					now returns an error for tcp.
21  *		Alan Cox	:	sock->protocol is set correctly.
22  *					and is not sometimes left as 0.
23  *		Alan Cox	:	connect handles icmp errors on a
24  *					connect properly. Unfortunately there
25  *					is a restart syscall nasty there. I
26  *					can't match BSD without hacking the C
27  *					library. Ideas urgently sought!
28  *		Alan Cox	:	Disallow bind() to addresses that are
29  *					not ours - especially broadcast ones!!
30  *		Alan Cox	:	Socket 1024 _IS_ ok for users. (fencepost)
31  *		Alan Cox	:	sock_wfree/sock_rfree don't destroy sockets,
32  *					instead they leave that for the DESTROY timer.
33  *		Alan Cox	:	Clean up error flag in accept
34  *		Alan Cox	:	TCP ack handling is buggy, the DESTROY timer
35  *					was buggy. Put a remove_sock() in the handler
36  *					for memory when we hit 0. Also altered the timer
37  *					code. The ACK stuff can wait and needs major
38  *					TCP layer surgery.
39  *		Alan Cox	:	Fixed TCP ack bug, removed remove sock
40  *					and fixed timer/inet_bh race.
41  *		Alan Cox	:	Added zapped flag for TCP
42  *		Alan Cox	:	Move kfree_skb into skbuff.c and tidied up surplus code
43  *		Alan Cox	:	for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44  *		Alan Cox	:	kfree_s calls now are kfree_skbmem so we can track skb resources
45  *		Alan Cox	:	Supports socket option broadcast now as does udp. Packet and raw need fixing.
46  *		Alan Cox	:	Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47  *		Rick Sladkey	:	Relaxed UDP rules for matching packets.
48  *		C.E.Hawkins	:	IFF_PROMISC/SIOCGHWADDR support
49  *	Pauline Middelink	:	identd support
50  *		Alan Cox	:	Fixed connect() taking signals I think.
51  *		Alan Cox	:	SO_LINGER supported
52  *		Alan Cox	:	Error reporting fixes
53  *		Anonymous	:	inet_create tidied up (sk->reuse setting)
54  *		Alan Cox	:	inet sockets don't set sk->type!
55  *		Alan Cox	:	Split socket option code
56  *		Alan Cox	:	Callbacks
57  *		Alan Cox	:	Nagle flag for Charles & Johannes stuff
58  *		Alex		:	Removed restriction on inet fioctl
59  *		Alan Cox	:	Splitting INET from NET core
60  *		Alan Cox	:	Fixed bogus SO_TYPE handling in getsockopt()
61  *		Adam Caldwell	:	Missing return in SO_DONTROUTE/SO_DEBUG code
62  *		Alan Cox	:	Split IP from generic code
63  *		Alan Cox	:	New kfree_skbmem()
64  *		Alan Cox	:	Make SO_DEBUG superuser only.
65  *		Alan Cox	:	Allow anyone to clear SO_DEBUG
66  *					(compatibility fix)
67  *		Alan Cox	:	Added optimistic memory grabbing for AF_UNIX throughput.
68  *		Alan Cox	:	Allocator for a socket is settable.
69  *		Alan Cox	:	SO_ERROR includes soft errors.
70  *		Alan Cox	:	Allow NULL arguments on some SO_ opts
71  *		Alan Cox	: 	Generic socket allocation to make hooks
72  *					easier (suggested by Craig Metz).
73  *		Michael Pall	:	SO_ERROR returns positive errno again
74  *              Steve Whitehouse:       Added default destructor to free
75  *                                      protocol private data.
76  *              Steve Whitehouse:       Added various other default routines
77  *                                      common to several socket families.
78  *              Chris Evans     :       Call suser() check last on F_SETOWN
79  *		Jay Schulist	:	Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
80  *		Andi Kleen	:	Add sock_kmalloc()/sock_kfree_s()
81  *		Andi Kleen	:	Fix write_space callback
82  *		Chris Evans	:	Security fixes - signedness again
83  *		Arnaldo C. Melo :       cleanups, use skb_queue_purge
84  *
85  * To Fix:
86  *
87  *
88  *		This program is free software; you can redistribute it and/or
89  *		modify it under the terms of the GNU General Public License
90  *		as published by the Free Software Foundation; either version
91  *		2 of the License, or (at your option) any later version.
92  */
93 
94 #include <linux/capability.h>
95 #include <linux/config.h>
96 #include <linux/errno.h>
97 #include <linux/types.h>
98 #include <linux/socket.h>
99 #include <linux/in.h>
100 #include <linux/kernel.h>
101 #include <linux/module.h>
102 #include <linux/proc_fs.h>
103 #include <linux/seq_file.h>
104 #include <linux/sched.h>
105 #include <linux/timer.h>
106 #include <linux/string.h>
107 #include <linux/sockios.h>
108 #include <linux/net.h>
109 #include <linux/mm.h>
110 #include <linux/slab.h>
111 #include <linux/interrupt.h>
112 #include <linux/poll.h>
113 #include <linux/tcp.h>
114 #include <linux/init.h>
115 
116 #include <asm/uaccess.h>
117 #include <asm/system.h>
118 
119 #include <linux/netdevice.h>
120 #include <net/protocol.h>
121 #include <linux/skbuff.h>
122 #include <net/request_sock.h>
123 #include <net/sock.h>
124 #include <net/xfrm.h>
125 #include <linux/ipsec.h>
126 
127 #include <linux/filter.h>
128 
129 #ifdef CONFIG_INET
130 #include <net/tcp.h>
131 #endif
132 
133 /* Take into consideration the size of the struct sk_buff overhead in the
134  * determination of these values, since that is non-constant across
135  * platforms.  This makes socket queueing behavior and performance
136  * not depend upon such differences.
137  */
138 #define _SK_MEM_PACKETS		256
139 #define _SK_MEM_OVERHEAD	(sizeof(struct sk_buff) + 256)
140 #define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
141 #define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
142 
143 /* Run time adjustable parameters. */
144 __u32 sysctl_wmem_max = SK_WMEM_MAX;
145 __u32 sysctl_rmem_max = SK_RMEM_MAX;
146 __u32 sysctl_wmem_default = SK_WMEM_MAX;
147 __u32 sysctl_rmem_default = SK_RMEM_MAX;
148 
149 /* Maximal space eaten by iovec or ancilliary data plus some space */
150 int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512);
151 
152 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
153 {
154 	struct timeval tv;
155 
156 	if (optlen < sizeof(tv))
157 		return -EINVAL;
158 	if (copy_from_user(&tv, optval, sizeof(tv)))
159 		return -EFAULT;
160 
161 	*timeo_p = MAX_SCHEDULE_TIMEOUT;
162 	if (tv.tv_sec == 0 && tv.tv_usec == 0)
163 		return 0;
164 	if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
165 		*timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
166 	return 0;
167 }
168 
169 static void sock_warn_obsolete_bsdism(const char *name)
170 {
171 	static int warned;
172 	static char warncomm[TASK_COMM_LEN];
173 	if (strcmp(warncomm, current->comm) && warned < 5) {
174 		strcpy(warncomm,  current->comm);
175 		printk(KERN_WARNING "process `%s' is using obsolete "
176 		       "%s SO_BSDCOMPAT\n", warncomm, name);
177 		warned++;
178 	}
179 }
180 
181 static void sock_disable_timestamp(struct sock *sk)
182 {
183 	if (sock_flag(sk, SOCK_TIMESTAMP)) {
184 		sock_reset_flag(sk, SOCK_TIMESTAMP);
185 		net_disable_timestamp();
186 	}
187 }
188 
189 
190 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
191 {
192 	int err = 0;
193 	int skb_len;
194 
195 	/* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
196 	   number of warnings when compiling with -W --ANK
197 	 */
198 	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
199 	    (unsigned)sk->sk_rcvbuf) {
200 		err = -ENOMEM;
201 		goto out;
202 	}
203 
204 	/* It would be deadlock, if sock_queue_rcv_skb is used
205 	   with socket lock! We assume that users of this
206 	   function are lock free.
207 	*/
208 	err = sk_filter(sk, skb, 1);
209 	if (err)
210 		goto out;
211 
212 	skb->dev = NULL;
213 	skb_set_owner_r(skb, sk);
214 
215 	/* Cache the SKB length before we tack it onto the receive
216 	 * queue.  Once it is added it no longer belongs to us and
217 	 * may be freed by other threads of control pulling packets
218 	 * from the queue.
219 	 */
220 	skb_len = skb->len;
221 
222 	skb_queue_tail(&sk->sk_receive_queue, skb);
223 
224 	if (!sock_flag(sk, SOCK_DEAD))
225 		sk->sk_data_ready(sk, skb_len);
226 out:
227 	return err;
228 }
229 EXPORT_SYMBOL(sock_queue_rcv_skb);
230 
231 int sk_receive_skb(struct sock *sk, struct sk_buff *skb)
232 {
233 	int rc = NET_RX_SUCCESS;
234 
235 	if (sk_filter(sk, skb, 0))
236 		goto discard_and_relse;
237 
238 	skb->dev = NULL;
239 
240 	bh_lock_sock(sk);
241 	if (!sock_owned_by_user(sk))
242 		rc = sk->sk_backlog_rcv(sk, skb);
243 	else
244 		sk_add_backlog(sk, skb);
245 	bh_unlock_sock(sk);
246 out:
247 	sock_put(sk);
248 	return rc;
249 discard_and_relse:
250 	kfree_skb(skb);
251 	goto out;
252 }
253 EXPORT_SYMBOL(sk_receive_skb);
254 
255 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
256 {
257 	struct dst_entry *dst = sk->sk_dst_cache;
258 
259 	if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
260 		sk->sk_dst_cache = NULL;
261 		dst_release(dst);
262 		return NULL;
263 	}
264 
265 	return dst;
266 }
267 EXPORT_SYMBOL(__sk_dst_check);
268 
269 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
270 {
271 	struct dst_entry *dst = sk_dst_get(sk);
272 
273 	if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
274 		sk_dst_reset(sk);
275 		dst_release(dst);
276 		return NULL;
277 	}
278 
279 	return dst;
280 }
281 EXPORT_SYMBOL(sk_dst_check);
282 
283 /*
284  *	This is meant for all protocols to use and covers goings on
285  *	at the socket level. Everything here is generic.
286  */
287 
288 int sock_setsockopt(struct socket *sock, int level, int optname,
289 		    char __user *optval, int optlen)
290 {
291 	struct sock *sk=sock->sk;
292 	struct sk_filter *filter;
293 	int val;
294 	int valbool;
295 	struct linger ling;
296 	int ret = 0;
297 
298 	/*
299 	 *	Options without arguments
300 	 */
301 
302 #ifdef SO_DONTLINGER		/* Compatibility item... */
303 	if (optname == SO_DONTLINGER) {
304 		lock_sock(sk);
305 		sock_reset_flag(sk, SOCK_LINGER);
306 		release_sock(sk);
307 		return 0;
308 	}
309 #endif
310 
311   	if(optlen<sizeof(int))
312   		return(-EINVAL);
313 
314 	if (get_user(val, (int __user *)optval))
315 		return -EFAULT;
316 
317   	valbool = val?1:0;
318 
319 	lock_sock(sk);
320 
321   	switch(optname)
322   	{
323 		case SO_DEBUG:
324 			if(val && !capable(CAP_NET_ADMIN))
325 			{
326 				ret = -EACCES;
327 			}
328 			else if (valbool)
329 				sock_set_flag(sk, SOCK_DBG);
330 			else
331 				sock_reset_flag(sk, SOCK_DBG);
332 			break;
333 		case SO_REUSEADDR:
334 			sk->sk_reuse = valbool;
335 			break;
336 		case SO_TYPE:
337 		case SO_ERROR:
338 			ret = -ENOPROTOOPT;
339 		  	break;
340 		case SO_DONTROUTE:
341 			if (valbool)
342 				sock_set_flag(sk, SOCK_LOCALROUTE);
343 			else
344 				sock_reset_flag(sk, SOCK_LOCALROUTE);
345 			break;
346 		case SO_BROADCAST:
347 			sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
348 			break;
349 		case SO_SNDBUF:
350 			/* Don't error on this BSD doesn't and if you think
351 			   about it this is right. Otherwise apps have to
352 			   play 'guess the biggest size' games. RCVBUF/SNDBUF
353 			   are treated in BSD as hints */
354 
355 			if (val > sysctl_wmem_max)
356 				val = sysctl_wmem_max;
357 set_sndbuf:
358 			sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
359 			if ((val * 2) < SOCK_MIN_SNDBUF)
360 				sk->sk_sndbuf = SOCK_MIN_SNDBUF;
361 			else
362 				sk->sk_sndbuf = val * 2;
363 
364 			/*
365 			 *	Wake up sending tasks if we
366 			 *	upped the value.
367 			 */
368 			sk->sk_write_space(sk);
369 			break;
370 
371 		case SO_SNDBUFFORCE:
372 			if (!capable(CAP_NET_ADMIN)) {
373 				ret = -EPERM;
374 				break;
375 			}
376 			goto set_sndbuf;
377 
378 		case SO_RCVBUF:
379 			/* Don't error on this BSD doesn't and if you think
380 			   about it this is right. Otherwise apps have to
381 			   play 'guess the biggest size' games. RCVBUF/SNDBUF
382 			   are treated in BSD as hints */
383 
384 			if (val > sysctl_rmem_max)
385 				val = sysctl_rmem_max;
386 set_rcvbuf:
387 			sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
388 			/*
389 			 * We double it on the way in to account for
390 			 * "struct sk_buff" etc. overhead.   Applications
391 			 * assume that the SO_RCVBUF setting they make will
392 			 * allow that much actual data to be received on that
393 			 * socket.
394 			 *
395 			 * Applications are unaware that "struct sk_buff" and
396 			 * other overheads allocate from the receive buffer
397 			 * during socket buffer allocation.
398 			 *
399 			 * And after considering the possible alternatives,
400 			 * returning the value we actually used in getsockopt
401 			 * is the most desirable behavior.
402 			 */
403 			if ((val * 2) < SOCK_MIN_RCVBUF)
404 				sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
405 			else
406 				sk->sk_rcvbuf = val * 2;
407 			break;
408 
409 		case SO_RCVBUFFORCE:
410 			if (!capable(CAP_NET_ADMIN)) {
411 				ret = -EPERM;
412 				break;
413 			}
414 			goto set_rcvbuf;
415 
416 		case SO_KEEPALIVE:
417 #ifdef CONFIG_INET
418 			if (sk->sk_protocol == IPPROTO_TCP)
419 				tcp_set_keepalive(sk, valbool);
420 #endif
421 			sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
422 			break;
423 
424 	 	case SO_OOBINLINE:
425 			sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
426 			break;
427 
428 	 	case SO_NO_CHECK:
429 			sk->sk_no_check = valbool;
430 			break;
431 
432 		case SO_PRIORITY:
433 			if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
434 				sk->sk_priority = val;
435 			else
436 				ret = -EPERM;
437 			break;
438 
439 		case SO_LINGER:
440 			if(optlen<sizeof(ling)) {
441 				ret = -EINVAL;	/* 1003.1g */
442 				break;
443 			}
444 			if (copy_from_user(&ling,optval,sizeof(ling))) {
445 				ret = -EFAULT;
446 				break;
447 			}
448 			if (!ling.l_onoff)
449 				sock_reset_flag(sk, SOCK_LINGER);
450 			else {
451 #if (BITS_PER_LONG == 32)
452 				if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
453 					sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
454 				else
455 #endif
456 					sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
457 				sock_set_flag(sk, SOCK_LINGER);
458 			}
459 			break;
460 
461 		case SO_BSDCOMPAT:
462 			sock_warn_obsolete_bsdism("setsockopt");
463 			break;
464 
465 		case SO_PASSCRED:
466 			if (valbool)
467 				set_bit(SOCK_PASSCRED, &sock->flags);
468 			else
469 				clear_bit(SOCK_PASSCRED, &sock->flags);
470 			break;
471 
472 		case SO_TIMESTAMP:
473 			if (valbool)  {
474 				sock_set_flag(sk, SOCK_RCVTSTAMP);
475 				sock_enable_timestamp(sk);
476 			} else
477 				sock_reset_flag(sk, SOCK_RCVTSTAMP);
478 			break;
479 
480 		case SO_RCVLOWAT:
481 			if (val < 0)
482 				val = INT_MAX;
483 			sk->sk_rcvlowat = val ? : 1;
484 			break;
485 
486 		case SO_RCVTIMEO:
487 			ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
488 			break;
489 
490 		case SO_SNDTIMEO:
491 			ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
492 			break;
493 
494 #ifdef CONFIG_NETDEVICES
495 		case SO_BINDTODEVICE:
496 		{
497 			char devname[IFNAMSIZ];
498 
499 			/* Sorry... */
500 			if (!capable(CAP_NET_RAW)) {
501 				ret = -EPERM;
502 				break;
503 			}
504 
505 			/* Bind this socket to a particular device like "eth0",
506 			 * as specified in the passed interface name. If the
507 			 * name is "" or the option length is zero the socket
508 			 * is not bound.
509 			 */
510 
511 			if (!valbool) {
512 				sk->sk_bound_dev_if = 0;
513 			} else {
514 				if (optlen > IFNAMSIZ - 1)
515 					optlen = IFNAMSIZ - 1;
516 				memset(devname, 0, sizeof(devname));
517 				if (copy_from_user(devname, optval, optlen)) {
518 					ret = -EFAULT;
519 					break;
520 				}
521 
522 				/* Remove any cached route for this socket. */
523 				sk_dst_reset(sk);
524 
525 				if (devname[0] == '\0') {
526 					sk->sk_bound_dev_if = 0;
527 				} else {
528 					struct net_device *dev = dev_get_by_name(devname);
529 					if (!dev) {
530 						ret = -ENODEV;
531 						break;
532 					}
533 					sk->sk_bound_dev_if = dev->ifindex;
534 					dev_put(dev);
535 				}
536 			}
537 			break;
538 		}
539 #endif
540 
541 
542 		case SO_ATTACH_FILTER:
543 			ret = -EINVAL;
544 			if (optlen == sizeof(struct sock_fprog)) {
545 				struct sock_fprog fprog;
546 
547 				ret = -EFAULT;
548 				if (copy_from_user(&fprog, optval, sizeof(fprog)))
549 					break;
550 
551 				ret = sk_attach_filter(&fprog, sk);
552 			}
553 			break;
554 
555 		case SO_DETACH_FILTER:
556 			spin_lock_bh(&sk->sk_lock.slock);
557 			filter = sk->sk_filter;
558                         if (filter) {
559 				sk->sk_filter = NULL;
560 				spin_unlock_bh(&sk->sk_lock.slock);
561 				sk_filter_release(sk, filter);
562 				break;
563 			}
564 			spin_unlock_bh(&sk->sk_lock.slock);
565 			ret = -ENONET;
566 			break;
567 
568 		case SO_PASSSEC:
569 			if (valbool)
570 				set_bit(SOCK_PASSSEC, &sock->flags);
571 			else
572 				clear_bit(SOCK_PASSSEC, &sock->flags);
573 			break;
574 
575 		/* We implement the SO_SNDLOWAT etc to
576 		   not be settable (1003.1g 5.3) */
577 		default:
578 		  	ret = -ENOPROTOOPT;
579 			break;
580   	}
581 	release_sock(sk);
582 	return ret;
583 }
584 
585 
586 int sock_getsockopt(struct socket *sock, int level, int optname,
587 		    char __user *optval, int __user *optlen)
588 {
589 	struct sock *sk = sock->sk;
590 
591 	union
592 	{
593   		int val;
594   		struct linger ling;
595 		struct timeval tm;
596 	} v;
597 
598 	unsigned int lv = sizeof(int);
599 	int len;
600 
601   	if(get_user(len,optlen))
602   		return -EFAULT;
603 	if(len < 0)
604 		return -EINVAL;
605 
606   	switch(optname)
607   	{
608 		case SO_DEBUG:
609 			v.val = sock_flag(sk, SOCK_DBG);
610 			break;
611 
612 		case SO_DONTROUTE:
613 			v.val = sock_flag(sk, SOCK_LOCALROUTE);
614 			break;
615 
616 		case SO_BROADCAST:
617 			v.val = !!sock_flag(sk, SOCK_BROADCAST);
618 			break;
619 
620 		case SO_SNDBUF:
621 			v.val = sk->sk_sndbuf;
622 			break;
623 
624 		case SO_RCVBUF:
625 			v.val = sk->sk_rcvbuf;
626 			break;
627 
628 		case SO_REUSEADDR:
629 			v.val = sk->sk_reuse;
630 			break;
631 
632 		case SO_KEEPALIVE:
633 			v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
634 			break;
635 
636 		case SO_TYPE:
637 			v.val = sk->sk_type;
638 			break;
639 
640 		case SO_ERROR:
641 			v.val = -sock_error(sk);
642 			if(v.val==0)
643 				v.val = xchg(&sk->sk_err_soft, 0);
644 			break;
645 
646 		case SO_OOBINLINE:
647 			v.val = !!sock_flag(sk, SOCK_URGINLINE);
648 			break;
649 
650 		case SO_NO_CHECK:
651 			v.val = sk->sk_no_check;
652 			break;
653 
654 		case SO_PRIORITY:
655 			v.val = sk->sk_priority;
656 			break;
657 
658 		case SO_LINGER:
659 			lv		= sizeof(v.ling);
660 			v.ling.l_onoff	= !!sock_flag(sk, SOCK_LINGER);
661  			v.ling.l_linger	= sk->sk_lingertime / HZ;
662 			break;
663 
664 		case SO_BSDCOMPAT:
665 			sock_warn_obsolete_bsdism("getsockopt");
666 			break;
667 
668 		case SO_TIMESTAMP:
669 			v.val = sock_flag(sk, SOCK_RCVTSTAMP);
670 			break;
671 
672 		case SO_RCVTIMEO:
673 			lv=sizeof(struct timeval);
674 			if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
675 				v.tm.tv_sec = 0;
676 				v.tm.tv_usec = 0;
677 			} else {
678 				v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
679 				v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
680 			}
681 			break;
682 
683 		case SO_SNDTIMEO:
684 			lv=sizeof(struct timeval);
685 			if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
686 				v.tm.tv_sec = 0;
687 				v.tm.tv_usec = 0;
688 			} else {
689 				v.tm.tv_sec = sk->sk_sndtimeo / HZ;
690 				v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
691 			}
692 			break;
693 
694 		case SO_RCVLOWAT:
695 			v.val = sk->sk_rcvlowat;
696 			break;
697 
698 		case SO_SNDLOWAT:
699 			v.val=1;
700 			break;
701 
702 		case SO_PASSCRED:
703 			v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
704 			break;
705 
706 		case SO_PEERCRED:
707 			if (len > sizeof(sk->sk_peercred))
708 				len = sizeof(sk->sk_peercred);
709 			if (copy_to_user(optval, &sk->sk_peercred, len))
710 				return -EFAULT;
711 			goto lenout;
712 
713 		case SO_PEERNAME:
714 		{
715 			char address[128];
716 
717 			if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
718 				return -ENOTCONN;
719 			if (lv < len)
720 				return -EINVAL;
721 			if (copy_to_user(optval, address, len))
722 				return -EFAULT;
723 			goto lenout;
724 		}
725 
726 		/* Dubious BSD thing... Probably nobody even uses it, but
727 		 * the UNIX standard wants it for whatever reason... -DaveM
728 		 */
729 		case SO_ACCEPTCONN:
730 			v.val = sk->sk_state == TCP_LISTEN;
731 			break;
732 
733 		case SO_PASSSEC:
734 			v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
735 			break;
736 
737 		case SO_PEERSEC:
738 			return security_socket_getpeersec_stream(sock, optval, optlen, len);
739 
740 		default:
741 			return(-ENOPROTOOPT);
742 	}
743 	if (len > lv)
744 		len = lv;
745 	if (copy_to_user(optval, &v, len))
746 		return -EFAULT;
747 lenout:
748   	if (put_user(len, optlen))
749   		return -EFAULT;
750   	return 0;
751 }
752 
753 /**
754  *	sk_alloc - All socket objects are allocated here
755  *	@family: protocol family
756  *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
757  *	@prot: struct proto associated with this new sock instance
758  *	@zero_it: if we should zero the newly allocated sock
759  */
760 struct sock *sk_alloc(int family, gfp_t priority,
761 		      struct proto *prot, int zero_it)
762 {
763 	struct sock *sk = NULL;
764 	kmem_cache_t *slab = prot->slab;
765 
766 	if (slab != NULL)
767 		sk = kmem_cache_alloc(slab, priority);
768 	else
769 		sk = kmalloc(prot->obj_size, priority);
770 
771 	if (sk) {
772 		if (zero_it) {
773 			memset(sk, 0, prot->obj_size);
774 			sk->sk_family = family;
775 			/*
776 			 * See comment in struct sock definition to understand
777 			 * why we need sk_prot_creator -acme
778 			 */
779 			sk->sk_prot = sk->sk_prot_creator = prot;
780 			sock_lock_init(sk);
781 		}
782 
783 		if (security_sk_alloc(sk, family, priority))
784 			goto out_free;
785 
786 		if (!try_module_get(prot->owner))
787 			goto out_free;
788 	}
789 	return sk;
790 
791 out_free:
792 	if (slab != NULL)
793 		kmem_cache_free(slab, sk);
794 	else
795 		kfree(sk);
796 	return NULL;
797 }
798 
799 void sk_free(struct sock *sk)
800 {
801 	struct sk_filter *filter;
802 	struct module *owner = sk->sk_prot_creator->owner;
803 
804 	if (sk->sk_destruct)
805 		sk->sk_destruct(sk);
806 
807 	filter = sk->sk_filter;
808 	if (filter) {
809 		sk_filter_release(sk, filter);
810 		sk->sk_filter = NULL;
811 	}
812 
813 	sock_disable_timestamp(sk);
814 
815 	if (atomic_read(&sk->sk_omem_alloc))
816 		printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
817 		       __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
818 
819 	security_sk_free(sk);
820 	if (sk->sk_prot_creator->slab != NULL)
821 		kmem_cache_free(sk->sk_prot_creator->slab, sk);
822 	else
823 		kfree(sk);
824 	module_put(owner);
825 }
826 
827 struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
828 {
829 	struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);
830 
831 	if (newsk != NULL) {
832 		struct sk_filter *filter;
833 
834 		memcpy(newsk, sk, sk->sk_prot->obj_size);
835 
836 		/* SANITY */
837 		sk_node_init(&newsk->sk_node);
838 		sock_lock_init(newsk);
839 		bh_lock_sock(newsk);
840 
841 		atomic_set(&newsk->sk_rmem_alloc, 0);
842 		atomic_set(&newsk->sk_wmem_alloc, 0);
843 		atomic_set(&newsk->sk_omem_alloc, 0);
844 		skb_queue_head_init(&newsk->sk_receive_queue);
845 		skb_queue_head_init(&newsk->sk_write_queue);
846 #ifdef CONFIG_NET_DMA
847 		skb_queue_head_init(&newsk->sk_async_wait_queue);
848 #endif
849 
850 		rwlock_init(&newsk->sk_dst_lock);
851 		rwlock_init(&newsk->sk_callback_lock);
852 
853 		newsk->sk_dst_cache	= NULL;
854 		newsk->sk_wmem_queued	= 0;
855 		newsk->sk_forward_alloc = 0;
856 		newsk->sk_send_head	= NULL;
857 		newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
858 		newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
859 
860 		sock_reset_flag(newsk, SOCK_DONE);
861 		skb_queue_head_init(&newsk->sk_error_queue);
862 
863 		filter = newsk->sk_filter;
864 		if (filter != NULL)
865 			sk_filter_charge(newsk, filter);
866 
867 		if (unlikely(xfrm_sk_clone_policy(newsk))) {
868 			/* It is still raw copy of parent, so invalidate
869 			 * destructor and make plain sk_free() */
870 			newsk->sk_destruct = NULL;
871 			sk_free(newsk);
872 			newsk = NULL;
873 			goto out;
874 		}
875 
876 		newsk->sk_err	   = 0;
877 		newsk->sk_priority = 0;
878 		atomic_set(&newsk->sk_refcnt, 2);
879 
880 		/*
881 		 * Increment the counter in the same struct proto as the master
882 		 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
883 		 * is the same as sk->sk_prot->socks, as this field was copied
884 		 * with memcpy).
885 		 *
886 		 * This _changes_ the previous behaviour, where
887 		 * tcp_create_openreq_child always was incrementing the
888 		 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
889 		 * to be taken into account in all callers. -acme
890 		 */
891 		sk_refcnt_debug_inc(newsk);
892 		newsk->sk_socket = NULL;
893 		newsk->sk_sleep	 = NULL;
894 
895 		if (newsk->sk_prot->sockets_allocated)
896 			atomic_inc(newsk->sk_prot->sockets_allocated);
897 	}
898 out:
899 	return newsk;
900 }
901 
902 EXPORT_SYMBOL_GPL(sk_clone);
903 
904 void __init sk_init(void)
905 {
906 	if (num_physpages <= 4096) {
907 		sysctl_wmem_max = 32767;
908 		sysctl_rmem_max = 32767;
909 		sysctl_wmem_default = 32767;
910 		sysctl_rmem_default = 32767;
911 	} else if (num_physpages >= 131072) {
912 		sysctl_wmem_max = 131071;
913 		sysctl_rmem_max = 131071;
914 	}
915 }
916 
917 /*
918  *	Simple resource managers for sockets.
919  */
920 
921 
922 /*
923  * Write buffer destructor automatically called from kfree_skb.
924  */
925 void sock_wfree(struct sk_buff *skb)
926 {
927 	struct sock *sk = skb->sk;
928 
929 	/* In case it might be waiting for more memory. */
930 	atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
931 	if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
932 		sk->sk_write_space(sk);
933 	sock_put(sk);
934 }
935 
936 /*
937  * Read buffer destructor automatically called from kfree_skb.
938  */
939 void sock_rfree(struct sk_buff *skb)
940 {
941 	struct sock *sk = skb->sk;
942 
943 	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
944 }
945 
946 
947 int sock_i_uid(struct sock *sk)
948 {
949 	int uid;
950 
951 	read_lock(&sk->sk_callback_lock);
952 	uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
953 	read_unlock(&sk->sk_callback_lock);
954 	return uid;
955 }
956 
957 unsigned long sock_i_ino(struct sock *sk)
958 {
959 	unsigned long ino;
960 
961 	read_lock(&sk->sk_callback_lock);
962 	ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
963 	read_unlock(&sk->sk_callback_lock);
964 	return ino;
965 }
966 
967 /*
968  * Allocate a skb from the socket's send buffer.
969  */
970 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
971 			     gfp_t priority)
972 {
973 	if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
974 		struct sk_buff * skb = alloc_skb(size, priority);
975 		if (skb) {
976 			skb_set_owner_w(skb, sk);
977 			return skb;
978 		}
979 	}
980 	return NULL;
981 }
982 
983 /*
984  * Allocate a skb from the socket's receive buffer.
985  */
986 struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
987 			     gfp_t priority)
988 {
989 	if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
990 		struct sk_buff *skb = alloc_skb(size, priority);
991 		if (skb) {
992 			skb_set_owner_r(skb, sk);
993 			return skb;
994 		}
995 	}
996 	return NULL;
997 }
998 
999 /*
1000  * Allocate a memory block from the socket's option memory buffer.
1001  */
1002 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1003 {
1004 	if ((unsigned)size <= sysctl_optmem_max &&
1005 	    atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1006 		void *mem;
1007 		/* First do the add, to avoid the race if kmalloc
1008  		 * might sleep.
1009 		 */
1010 		atomic_add(size, &sk->sk_omem_alloc);
1011 		mem = kmalloc(size, priority);
1012 		if (mem)
1013 			return mem;
1014 		atomic_sub(size, &sk->sk_omem_alloc);
1015 	}
1016 	return NULL;
1017 }
1018 
1019 /*
1020  * Free an option memory block.
1021  */
1022 void sock_kfree_s(struct sock *sk, void *mem, int size)
1023 {
1024 	kfree(mem);
1025 	atomic_sub(size, &sk->sk_omem_alloc);
1026 }
1027 
1028 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1029    I think, these locks should be removed for datagram sockets.
1030  */
1031 static long sock_wait_for_wmem(struct sock * sk, long timeo)
1032 {
1033 	DEFINE_WAIT(wait);
1034 
1035 	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1036 	for (;;) {
1037 		if (!timeo)
1038 			break;
1039 		if (signal_pending(current))
1040 			break;
1041 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1042 		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1043 		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1044 			break;
1045 		if (sk->sk_shutdown & SEND_SHUTDOWN)
1046 			break;
1047 		if (sk->sk_err)
1048 			break;
1049 		timeo = schedule_timeout(timeo);
1050 	}
1051 	finish_wait(sk->sk_sleep, &wait);
1052 	return timeo;
1053 }
1054 
1055 
1056 /*
1057  *	Generic send/receive buffer handlers
1058  */
1059 
1060 static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
1061 					    unsigned long header_len,
1062 					    unsigned long data_len,
1063 					    int noblock, int *errcode)
1064 {
1065 	struct sk_buff *skb;
1066 	gfp_t gfp_mask;
1067 	long timeo;
1068 	int err;
1069 
1070 	gfp_mask = sk->sk_allocation;
1071 	if (gfp_mask & __GFP_WAIT)
1072 		gfp_mask |= __GFP_REPEAT;
1073 
1074 	timeo = sock_sndtimeo(sk, noblock);
1075 	while (1) {
1076 		err = sock_error(sk);
1077 		if (err != 0)
1078 			goto failure;
1079 
1080 		err = -EPIPE;
1081 		if (sk->sk_shutdown & SEND_SHUTDOWN)
1082 			goto failure;
1083 
1084 		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1085 			skb = alloc_skb(header_len, sk->sk_allocation);
1086 			if (skb) {
1087 				int npages;
1088 				int i;
1089 
1090 				/* No pages, we're done... */
1091 				if (!data_len)
1092 					break;
1093 
1094 				npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1095 				skb->truesize += data_len;
1096 				skb_shinfo(skb)->nr_frags = npages;
1097 				for (i = 0; i < npages; i++) {
1098 					struct page *page;
1099 					skb_frag_t *frag;
1100 
1101 					page = alloc_pages(sk->sk_allocation, 0);
1102 					if (!page) {
1103 						err = -ENOBUFS;
1104 						skb_shinfo(skb)->nr_frags = i;
1105 						kfree_skb(skb);
1106 						goto failure;
1107 					}
1108 
1109 					frag = &skb_shinfo(skb)->frags[i];
1110 					frag->page = page;
1111 					frag->page_offset = 0;
1112 					frag->size = (data_len >= PAGE_SIZE ?
1113 						      PAGE_SIZE :
1114 						      data_len);
1115 					data_len -= PAGE_SIZE;
1116 				}
1117 
1118 				/* Full success... */
1119 				break;
1120 			}
1121 			err = -ENOBUFS;
1122 			goto failure;
1123 		}
1124 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1125 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1126 		err = -EAGAIN;
1127 		if (!timeo)
1128 			goto failure;
1129 		if (signal_pending(current))
1130 			goto interrupted;
1131 		timeo = sock_wait_for_wmem(sk, timeo);
1132 	}
1133 
1134 	skb_set_owner_w(skb, sk);
1135 	return skb;
1136 
1137 interrupted:
1138 	err = sock_intr_errno(timeo);
1139 failure:
1140 	*errcode = err;
1141 	return NULL;
1142 }
1143 
1144 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1145 				    int noblock, int *errcode)
1146 {
1147 	return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1148 }
1149 
1150 static void __lock_sock(struct sock *sk)
1151 {
1152 	DEFINE_WAIT(wait);
1153 
1154 	for(;;) {
1155 		prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1156 					TASK_UNINTERRUPTIBLE);
1157 		spin_unlock_bh(&sk->sk_lock.slock);
1158 		schedule();
1159 		spin_lock_bh(&sk->sk_lock.slock);
1160 		if(!sock_owned_by_user(sk))
1161 			break;
1162 	}
1163 	finish_wait(&sk->sk_lock.wq, &wait);
1164 }
1165 
1166 static void __release_sock(struct sock *sk)
1167 {
1168 	struct sk_buff *skb = sk->sk_backlog.head;
1169 
1170 	do {
1171 		sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1172 		bh_unlock_sock(sk);
1173 
1174 		do {
1175 			struct sk_buff *next = skb->next;
1176 
1177 			skb->next = NULL;
1178 			sk->sk_backlog_rcv(sk, skb);
1179 
1180 			/*
1181 			 * We are in process context here with softirqs
1182 			 * disabled, use cond_resched_softirq() to preempt.
1183 			 * This is safe to do because we've taken the backlog
1184 			 * queue private:
1185 			 */
1186 			cond_resched_softirq();
1187 
1188 			skb = next;
1189 		} while (skb != NULL);
1190 
1191 		bh_lock_sock(sk);
1192 	} while((skb = sk->sk_backlog.head) != NULL);
1193 }
1194 
1195 /**
1196  * sk_wait_data - wait for data to arrive at sk_receive_queue
1197  * @sk:    sock to wait on
1198  * @timeo: for how long
1199  *
1200  * Now socket state including sk->sk_err is changed only under lock,
1201  * hence we may omit checks after joining wait queue.
1202  * We check receive queue before schedule() only as optimization;
1203  * it is very likely that release_sock() added new data.
1204  */
1205 int sk_wait_data(struct sock *sk, long *timeo)
1206 {
1207 	int rc;
1208 	DEFINE_WAIT(wait);
1209 
1210 	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1211 	set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1212 	rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1213 	clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1214 	finish_wait(sk->sk_sleep, &wait);
1215 	return rc;
1216 }
1217 
1218 EXPORT_SYMBOL(sk_wait_data);
1219 
1220 /*
1221  * Set of default routines for initialising struct proto_ops when
1222  * the protocol does not support a particular function. In certain
1223  * cases where it makes no sense for a protocol to have a "do nothing"
1224  * function, some default processing is provided.
1225  */
1226 
1227 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1228 {
1229 	return -EOPNOTSUPP;
1230 }
1231 
1232 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1233 		    int len, int flags)
1234 {
1235 	return -EOPNOTSUPP;
1236 }
1237 
1238 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1239 {
1240 	return -EOPNOTSUPP;
1241 }
1242 
1243 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1244 {
1245 	return -EOPNOTSUPP;
1246 }
1247 
1248 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1249 		    int *len, int peer)
1250 {
1251 	return -EOPNOTSUPP;
1252 }
1253 
1254 unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
1255 {
1256 	return 0;
1257 }
1258 
1259 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1260 {
1261 	return -EOPNOTSUPP;
1262 }
1263 
1264 int sock_no_listen(struct socket *sock, int backlog)
1265 {
1266 	return -EOPNOTSUPP;
1267 }
1268 
1269 int sock_no_shutdown(struct socket *sock, int how)
1270 {
1271 	return -EOPNOTSUPP;
1272 }
1273 
1274 int sock_no_setsockopt(struct socket *sock, int level, int optname,
1275 		    char __user *optval, int optlen)
1276 {
1277 	return -EOPNOTSUPP;
1278 }
1279 
1280 int sock_no_getsockopt(struct socket *sock, int level, int optname,
1281 		    char __user *optval, int __user *optlen)
1282 {
1283 	return -EOPNOTSUPP;
1284 }
1285 
1286 int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1287 		    size_t len)
1288 {
1289 	return -EOPNOTSUPP;
1290 }
1291 
1292 int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1293 		    size_t len, int flags)
1294 {
1295 	return -EOPNOTSUPP;
1296 }
1297 
1298 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1299 {
1300 	/* Mirror missing mmap method error code */
1301 	return -ENODEV;
1302 }
1303 
1304 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1305 {
1306 	ssize_t res;
1307 	struct msghdr msg = {.msg_flags = flags};
1308 	struct kvec iov;
1309 	char *kaddr = kmap(page);
1310 	iov.iov_base = kaddr + offset;
1311 	iov.iov_len = size;
1312 	res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1313 	kunmap(page);
1314 	return res;
1315 }
1316 
1317 /*
1318  *	Default Socket Callbacks
1319  */
1320 
1321 static void sock_def_wakeup(struct sock *sk)
1322 {
1323 	read_lock(&sk->sk_callback_lock);
1324 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1325 		wake_up_interruptible_all(sk->sk_sleep);
1326 	read_unlock(&sk->sk_callback_lock);
1327 }
1328 
1329 static void sock_def_error_report(struct sock *sk)
1330 {
1331 	read_lock(&sk->sk_callback_lock);
1332 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1333 		wake_up_interruptible(sk->sk_sleep);
1334 	sk_wake_async(sk,0,POLL_ERR);
1335 	read_unlock(&sk->sk_callback_lock);
1336 }
1337 
1338 static void sock_def_readable(struct sock *sk, int len)
1339 {
1340 	read_lock(&sk->sk_callback_lock);
1341 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1342 		wake_up_interruptible(sk->sk_sleep);
1343 	sk_wake_async(sk,1,POLL_IN);
1344 	read_unlock(&sk->sk_callback_lock);
1345 }
1346 
1347 static void sock_def_write_space(struct sock *sk)
1348 {
1349 	read_lock(&sk->sk_callback_lock);
1350 
1351 	/* Do not wake up a writer until he can make "significant"
1352 	 * progress.  --DaveM
1353 	 */
1354 	if((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1355 		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1356 			wake_up_interruptible(sk->sk_sleep);
1357 
1358 		/* Should agree with poll, otherwise some programs break */
1359 		if (sock_writeable(sk))
1360 			sk_wake_async(sk, 2, POLL_OUT);
1361 	}
1362 
1363 	read_unlock(&sk->sk_callback_lock);
1364 }
1365 
1366 static void sock_def_destruct(struct sock *sk)
1367 {
1368 	kfree(sk->sk_protinfo);
1369 }
1370 
1371 void sk_send_sigurg(struct sock *sk)
1372 {
1373 	if (sk->sk_socket && sk->sk_socket->file)
1374 		if (send_sigurg(&sk->sk_socket->file->f_owner))
1375 			sk_wake_async(sk, 3, POLL_PRI);
1376 }
1377 
1378 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1379 		    unsigned long expires)
1380 {
1381 	if (!mod_timer(timer, expires))
1382 		sock_hold(sk);
1383 }
1384 
1385 EXPORT_SYMBOL(sk_reset_timer);
1386 
1387 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1388 {
1389 	if (timer_pending(timer) && del_timer(timer))
1390 		__sock_put(sk);
1391 }
1392 
1393 EXPORT_SYMBOL(sk_stop_timer);
1394 
1395 void sock_init_data(struct socket *sock, struct sock *sk)
1396 {
1397 	skb_queue_head_init(&sk->sk_receive_queue);
1398 	skb_queue_head_init(&sk->sk_write_queue);
1399 	skb_queue_head_init(&sk->sk_error_queue);
1400 #ifdef CONFIG_NET_DMA
1401 	skb_queue_head_init(&sk->sk_async_wait_queue);
1402 #endif
1403 
1404 	sk->sk_send_head	=	NULL;
1405 
1406 	init_timer(&sk->sk_timer);
1407 
1408 	sk->sk_allocation	=	GFP_KERNEL;
1409 	sk->sk_rcvbuf		=	sysctl_rmem_default;
1410 	sk->sk_sndbuf		=	sysctl_wmem_default;
1411 	sk->sk_state		=	TCP_CLOSE;
1412 	sk->sk_socket		=	sock;
1413 
1414 	sock_set_flag(sk, SOCK_ZAPPED);
1415 
1416 	if(sock)
1417 	{
1418 		sk->sk_type	=	sock->type;
1419 		sk->sk_sleep	=	&sock->wait;
1420 		sock->sk	=	sk;
1421 	} else
1422 		sk->sk_sleep	=	NULL;
1423 
1424 	rwlock_init(&sk->sk_dst_lock);
1425 	rwlock_init(&sk->sk_callback_lock);
1426 
1427 	sk->sk_state_change	=	sock_def_wakeup;
1428 	sk->sk_data_ready	=	sock_def_readable;
1429 	sk->sk_write_space	=	sock_def_write_space;
1430 	sk->sk_error_report	=	sock_def_error_report;
1431 	sk->sk_destruct		=	sock_def_destruct;
1432 
1433 	sk->sk_sndmsg_page	=	NULL;
1434 	sk->sk_sndmsg_off	=	0;
1435 
1436 	sk->sk_peercred.pid 	=	0;
1437 	sk->sk_peercred.uid	=	-1;
1438 	sk->sk_peercred.gid	=	-1;
1439 	sk->sk_write_pending	=	0;
1440 	sk->sk_rcvlowat		=	1;
1441 	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
1442 	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
1443 
1444 	sk->sk_stamp.tv_sec     = -1L;
1445 	sk->sk_stamp.tv_usec    = -1L;
1446 
1447 	atomic_set(&sk->sk_refcnt, 1);
1448 }
1449 
1450 void fastcall lock_sock(struct sock *sk)
1451 {
1452 	might_sleep();
1453 	spin_lock_bh(&(sk->sk_lock.slock));
1454 	if (sk->sk_lock.owner)
1455 		__lock_sock(sk);
1456 	sk->sk_lock.owner = (void *)1;
1457 	spin_unlock_bh(&(sk->sk_lock.slock));
1458 }
1459 
1460 EXPORT_SYMBOL(lock_sock);
1461 
1462 void fastcall release_sock(struct sock *sk)
1463 {
1464 	spin_lock_bh(&(sk->sk_lock.slock));
1465 	if (sk->sk_backlog.tail)
1466 		__release_sock(sk);
1467 	sk->sk_lock.owner = NULL;
1468         if (waitqueue_active(&(sk->sk_lock.wq)))
1469 		wake_up(&(sk->sk_lock.wq));
1470 	spin_unlock_bh(&(sk->sk_lock.slock));
1471 }
1472 EXPORT_SYMBOL(release_sock);
1473 
1474 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1475 {
1476 	if (!sock_flag(sk, SOCK_TIMESTAMP))
1477 		sock_enable_timestamp(sk);
1478 	if (sk->sk_stamp.tv_sec == -1)
1479 		return -ENOENT;
1480 	if (sk->sk_stamp.tv_sec == 0)
1481 		do_gettimeofday(&sk->sk_stamp);
1482 	return copy_to_user(userstamp, &sk->sk_stamp, sizeof(struct timeval)) ?
1483 		-EFAULT : 0;
1484 }
1485 EXPORT_SYMBOL(sock_get_timestamp);
1486 
1487 void sock_enable_timestamp(struct sock *sk)
1488 {
1489 	if (!sock_flag(sk, SOCK_TIMESTAMP)) {
1490 		sock_set_flag(sk, SOCK_TIMESTAMP);
1491 		net_enable_timestamp();
1492 	}
1493 }
1494 EXPORT_SYMBOL(sock_enable_timestamp);
1495 
1496 /*
1497  *	Get a socket option on an socket.
1498  *
1499  *	FIX: POSIX 1003.1g is very ambiguous here. It states that
1500  *	asynchronous errors should be reported by getsockopt. We assume
1501  *	this means if you specify SO_ERROR (otherwise whats the point of it).
1502  */
1503 int sock_common_getsockopt(struct socket *sock, int level, int optname,
1504 			   char __user *optval, int __user *optlen)
1505 {
1506 	struct sock *sk = sock->sk;
1507 
1508 	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1509 }
1510 
1511 EXPORT_SYMBOL(sock_common_getsockopt);
1512 
1513 #ifdef CONFIG_COMPAT
1514 int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
1515 				  char __user *optval, int __user *optlen)
1516 {
1517 	struct sock *sk = sock->sk;
1518 
1519 	if (sk->sk_prot->compat_setsockopt != NULL)
1520 		return sk->sk_prot->compat_getsockopt(sk, level, optname,
1521 						      optval, optlen);
1522 	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1523 }
1524 EXPORT_SYMBOL(compat_sock_common_getsockopt);
1525 #endif
1526 
1527 int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
1528 			struct msghdr *msg, size_t size, int flags)
1529 {
1530 	struct sock *sk = sock->sk;
1531 	int addr_len = 0;
1532 	int err;
1533 
1534 	err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
1535 				   flags & ~MSG_DONTWAIT, &addr_len);
1536 	if (err >= 0)
1537 		msg->msg_namelen = addr_len;
1538 	return err;
1539 }
1540 
1541 EXPORT_SYMBOL(sock_common_recvmsg);
1542 
1543 /*
1544  *	Set socket options on an inet socket.
1545  */
1546 int sock_common_setsockopt(struct socket *sock, int level, int optname,
1547 			   char __user *optval, int optlen)
1548 {
1549 	struct sock *sk = sock->sk;
1550 
1551 	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1552 }
1553 
1554 EXPORT_SYMBOL(sock_common_setsockopt);
1555 
1556 #ifdef CONFIG_COMPAT
1557 int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
1558 				  char __user *optval, int optlen)
1559 {
1560 	struct sock *sk = sock->sk;
1561 
1562 	if (sk->sk_prot->compat_setsockopt != NULL)
1563 		return sk->sk_prot->compat_setsockopt(sk, level, optname,
1564 						      optval, optlen);
1565 	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1566 }
1567 EXPORT_SYMBOL(compat_sock_common_setsockopt);
1568 #endif
1569 
1570 void sk_common_release(struct sock *sk)
1571 {
1572 	if (sk->sk_prot->destroy)
1573 		sk->sk_prot->destroy(sk);
1574 
1575 	/*
1576 	 * Observation: when sock_common_release is called, processes have
1577 	 * no access to socket. But net still has.
1578 	 * Step one, detach it from networking:
1579 	 *
1580 	 * A. Remove from hash tables.
1581 	 */
1582 
1583 	sk->sk_prot->unhash(sk);
1584 
1585 	/*
1586 	 * In this point socket cannot receive new packets, but it is possible
1587 	 * that some packets are in flight because some CPU runs receiver and
1588 	 * did hash table lookup before we unhashed socket. They will achieve
1589 	 * receive queue and will be purged by socket destructor.
1590 	 *
1591 	 * Also we still have packets pending on receive queue and probably,
1592 	 * our own packets waiting in device queues. sock_destroy will drain
1593 	 * receive queue, but transmitted packets will delay socket destruction
1594 	 * until the last reference will be released.
1595 	 */
1596 
1597 	sock_orphan(sk);
1598 
1599 	xfrm_sk_free_policy(sk);
1600 
1601 	sk_refcnt_debug_release(sk);
1602 	sock_put(sk);
1603 }
1604 
1605 EXPORT_SYMBOL(sk_common_release);
1606 
1607 static DEFINE_RWLOCK(proto_list_lock);
1608 static LIST_HEAD(proto_list);
1609 
1610 int proto_register(struct proto *prot, int alloc_slab)
1611 {
1612 	char *request_sock_slab_name = NULL;
1613 	char *timewait_sock_slab_name;
1614 	int rc = -ENOBUFS;
1615 
1616 	if (alloc_slab) {
1617 		prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
1618 					       SLAB_HWCACHE_ALIGN, NULL, NULL);
1619 
1620 		if (prot->slab == NULL) {
1621 			printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
1622 			       prot->name);
1623 			goto out;
1624 		}
1625 
1626 		if (prot->rsk_prot != NULL) {
1627 			static const char mask[] = "request_sock_%s";
1628 
1629 			request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1630 			if (request_sock_slab_name == NULL)
1631 				goto out_free_sock_slab;
1632 
1633 			sprintf(request_sock_slab_name, mask, prot->name);
1634 			prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
1635 								 prot->rsk_prot->obj_size, 0,
1636 								 SLAB_HWCACHE_ALIGN, NULL, NULL);
1637 
1638 			if (prot->rsk_prot->slab == NULL) {
1639 				printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
1640 				       prot->name);
1641 				goto out_free_request_sock_slab_name;
1642 			}
1643 		}
1644 
1645 		if (prot->twsk_prot != NULL) {
1646 			static const char mask[] = "tw_sock_%s";
1647 
1648 			timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1649 
1650 			if (timewait_sock_slab_name == NULL)
1651 				goto out_free_request_sock_slab;
1652 
1653 			sprintf(timewait_sock_slab_name, mask, prot->name);
1654 			prot->twsk_prot->twsk_slab =
1655 				kmem_cache_create(timewait_sock_slab_name,
1656 						  prot->twsk_prot->twsk_obj_size,
1657 						  0, SLAB_HWCACHE_ALIGN,
1658 						  NULL, NULL);
1659 			if (prot->twsk_prot->twsk_slab == NULL)
1660 				goto out_free_timewait_sock_slab_name;
1661 		}
1662 	}
1663 
1664 	write_lock(&proto_list_lock);
1665 	list_add(&prot->node, &proto_list);
1666 	write_unlock(&proto_list_lock);
1667 	rc = 0;
1668 out:
1669 	return rc;
1670 out_free_timewait_sock_slab_name:
1671 	kfree(timewait_sock_slab_name);
1672 out_free_request_sock_slab:
1673 	if (prot->rsk_prot && prot->rsk_prot->slab) {
1674 		kmem_cache_destroy(prot->rsk_prot->slab);
1675 		prot->rsk_prot->slab = NULL;
1676 	}
1677 out_free_request_sock_slab_name:
1678 	kfree(request_sock_slab_name);
1679 out_free_sock_slab:
1680 	kmem_cache_destroy(prot->slab);
1681 	prot->slab = NULL;
1682 	goto out;
1683 }
1684 
1685 EXPORT_SYMBOL(proto_register);
1686 
1687 void proto_unregister(struct proto *prot)
1688 {
1689 	write_lock(&proto_list_lock);
1690 	list_del(&prot->node);
1691 	write_unlock(&proto_list_lock);
1692 
1693 	if (prot->slab != NULL) {
1694 		kmem_cache_destroy(prot->slab);
1695 		prot->slab = NULL;
1696 	}
1697 
1698 	if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
1699 		const char *name = kmem_cache_name(prot->rsk_prot->slab);
1700 
1701 		kmem_cache_destroy(prot->rsk_prot->slab);
1702 		kfree(name);
1703 		prot->rsk_prot->slab = NULL;
1704 	}
1705 
1706 	if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
1707 		const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab);
1708 
1709 		kmem_cache_destroy(prot->twsk_prot->twsk_slab);
1710 		kfree(name);
1711 		prot->twsk_prot->twsk_slab = NULL;
1712 	}
1713 }
1714 
1715 EXPORT_SYMBOL(proto_unregister);
1716 
1717 #ifdef CONFIG_PROC_FS
1718 static inline struct proto *__proto_head(void)
1719 {
1720 	return list_entry(proto_list.next, struct proto, node);
1721 }
1722 
1723 static inline struct proto *proto_head(void)
1724 {
1725 	return list_empty(&proto_list) ? NULL : __proto_head();
1726 }
1727 
1728 static inline struct proto *proto_next(struct proto *proto)
1729 {
1730 	return proto->node.next == &proto_list ? NULL :
1731 		list_entry(proto->node.next, struct proto, node);
1732 }
1733 
1734 static inline struct proto *proto_get_idx(loff_t pos)
1735 {
1736 	struct proto *proto;
1737 	loff_t i = 0;
1738 
1739 	list_for_each_entry(proto, &proto_list, node)
1740 		if (i++ == pos)
1741 			goto out;
1742 
1743 	proto = NULL;
1744 out:
1745 	return proto;
1746 }
1747 
1748 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
1749 {
1750 	read_lock(&proto_list_lock);
1751 	return *pos ? proto_get_idx(*pos - 1) : SEQ_START_TOKEN;
1752 }
1753 
1754 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1755 {
1756 	++*pos;
1757 	return v == SEQ_START_TOKEN ? proto_head() : proto_next(v);
1758 }
1759 
1760 static void proto_seq_stop(struct seq_file *seq, void *v)
1761 {
1762 	read_unlock(&proto_list_lock);
1763 }
1764 
1765 static char proto_method_implemented(const void *method)
1766 {
1767 	return method == NULL ? 'n' : 'y';
1768 }
1769 
1770 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
1771 {
1772 	seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
1773 			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
1774 		   proto->name,
1775 		   proto->obj_size,
1776 		   proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
1777 		   proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
1778 		   proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
1779 		   proto->max_header,
1780 		   proto->slab == NULL ? "no" : "yes",
1781 		   module_name(proto->owner),
1782 		   proto_method_implemented(proto->close),
1783 		   proto_method_implemented(proto->connect),
1784 		   proto_method_implemented(proto->disconnect),
1785 		   proto_method_implemented(proto->accept),
1786 		   proto_method_implemented(proto->ioctl),
1787 		   proto_method_implemented(proto->init),
1788 		   proto_method_implemented(proto->destroy),
1789 		   proto_method_implemented(proto->shutdown),
1790 		   proto_method_implemented(proto->setsockopt),
1791 		   proto_method_implemented(proto->getsockopt),
1792 		   proto_method_implemented(proto->sendmsg),
1793 		   proto_method_implemented(proto->recvmsg),
1794 		   proto_method_implemented(proto->sendpage),
1795 		   proto_method_implemented(proto->bind),
1796 		   proto_method_implemented(proto->backlog_rcv),
1797 		   proto_method_implemented(proto->hash),
1798 		   proto_method_implemented(proto->unhash),
1799 		   proto_method_implemented(proto->get_port),
1800 		   proto_method_implemented(proto->enter_memory_pressure));
1801 }
1802 
1803 static int proto_seq_show(struct seq_file *seq, void *v)
1804 {
1805 	if (v == SEQ_START_TOKEN)
1806 		seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
1807 			   "protocol",
1808 			   "size",
1809 			   "sockets",
1810 			   "memory",
1811 			   "press",
1812 			   "maxhdr",
1813 			   "slab",
1814 			   "module",
1815 			   "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
1816 	else
1817 		proto_seq_printf(seq, v);
1818 	return 0;
1819 }
1820 
1821 static struct seq_operations proto_seq_ops = {
1822 	.start  = proto_seq_start,
1823 	.next   = proto_seq_next,
1824 	.stop   = proto_seq_stop,
1825 	.show   = proto_seq_show,
1826 };
1827 
1828 static int proto_seq_open(struct inode *inode, struct file *file)
1829 {
1830 	return seq_open(file, &proto_seq_ops);
1831 }
1832 
1833 static struct file_operations proto_seq_fops = {
1834 	.owner		= THIS_MODULE,
1835 	.open		= proto_seq_open,
1836 	.read		= seq_read,
1837 	.llseek		= seq_lseek,
1838 	.release	= seq_release,
1839 };
1840 
1841 static int __init proto_init(void)
1842 {
1843 	/* register /proc/net/protocols */
1844 	return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
1845 }
1846 
1847 subsys_initcall(proto_init);
1848 
1849 #endif /* PROC_FS */
1850 
1851 EXPORT_SYMBOL(sk_alloc);
1852 EXPORT_SYMBOL(sk_free);
1853 EXPORT_SYMBOL(sk_send_sigurg);
1854 EXPORT_SYMBOL(sock_alloc_send_skb);
1855 EXPORT_SYMBOL(sock_init_data);
1856 EXPORT_SYMBOL(sock_kfree_s);
1857 EXPORT_SYMBOL(sock_kmalloc);
1858 EXPORT_SYMBOL(sock_no_accept);
1859 EXPORT_SYMBOL(sock_no_bind);
1860 EXPORT_SYMBOL(sock_no_connect);
1861 EXPORT_SYMBOL(sock_no_getname);
1862 EXPORT_SYMBOL(sock_no_getsockopt);
1863 EXPORT_SYMBOL(sock_no_ioctl);
1864 EXPORT_SYMBOL(sock_no_listen);
1865 EXPORT_SYMBOL(sock_no_mmap);
1866 EXPORT_SYMBOL(sock_no_poll);
1867 EXPORT_SYMBOL(sock_no_recvmsg);
1868 EXPORT_SYMBOL(sock_no_sendmsg);
1869 EXPORT_SYMBOL(sock_no_sendpage);
1870 EXPORT_SYMBOL(sock_no_setsockopt);
1871 EXPORT_SYMBOL(sock_no_shutdown);
1872 EXPORT_SYMBOL(sock_no_socketpair);
1873 EXPORT_SYMBOL(sock_rfree);
1874 EXPORT_SYMBOL(sock_setsockopt);
1875 EXPORT_SYMBOL(sock_wfree);
1876 EXPORT_SYMBOL(sock_wmalloc);
1877 EXPORT_SYMBOL(sock_i_uid);
1878 EXPORT_SYMBOL(sock_i_ino);
1879 EXPORT_SYMBOL(sysctl_optmem_max);
1880 #ifdef CONFIG_SYSCTL
1881 EXPORT_SYMBOL(sysctl_rmem_max);
1882 EXPORT_SYMBOL(sysctl_wmem_max);
1883 #endif
1884