xref: /linux/net/core/sock.c (revision d8327c784b51b57dac2c26cfad87dce0d68dfd98)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Generic socket support routines. Memory allocators, socket lock/release
7  *		handler for protocols to use and generic option handler.
8  *
9  *
10  * Version:	$Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
11  *
12  * Authors:	Ross Biro
13  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *		Florian La Roche, <flla@stud.uni-sb.de>
15  *		Alan Cox, <A.Cox@swansea.ac.uk>
16  *
17  * Fixes:
18  *		Alan Cox	: 	Numerous verify_area() problems
19  *		Alan Cox	:	Connecting on a connecting socket
20  *					now returns an error for tcp.
21  *		Alan Cox	:	sock->protocol is set correctly.
22  *					and is not sometimes left as 0.
23  *		Alan Cox	:	connect handles icmp errors on a
24  *					connect properly. Unfortunately there
25  *					is a restart syscall nasty there. I
26  *					can't match BSD without hacking the C
27  *					library. Ideas urgently sought!
28  *		Alan Cox	:	Disallow bind() to addresses that are
29  *					not ours - especially broadcast ones!!
30  *		Alan Cox	:	Socket 1024 _IS_ ok for users. (fencepost)
31  *		Alan Cox	:	sock_wfree/sock_rfree don't destroy sockets,
32  *					instead they leave that for the DESTROY timer.
33  *		Alan Cox	:	Clean up error flag in accept
34  *		Alan Cox	:	TCP ack handling is buggy, the DESTROY timer
35  *					was buggy. Put a remove_sock() in the handler
36  *					for memory when we hit 0. Also altered the timer
37  *					code. The ACK stuff can wait and needs major
38  *					TCP layer surgery.
39  *		Alan Cox	:	Fixed TCP ack bug, removed remove sock
40  *					and fixed timer/inet_bh race.
41  *		Alan Cox	:	Added zapped flag for TCP
42  *		Alan Cox	:	Move kfree_skb into skbuff.c and tidied up surplus code
43  *		Alan Cox	:	for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44  *		Alan Cox	:	kfree_s calls now are kfree_skbmem so we can track skb resources
45  *		Alan Cox	:	Supports socket option broadcast now as does udp. Packet and raw need fixing.
46  *		Alan Cox	:	Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47  *		Rick Sladkey	:	Relaxed UDP rules for matching packets.
48  *		C.E.Hawkins	:	IFF_PROMISC/SIOCGHWADDR support
49  *	Pauline Middelink	:	identd support
50  *		Alan Cox	:	Fixed connect() taking signals I think.
51  *		Alan Cox	:	SO_LINGER supported
52  *		Alan Cox	:	Error reporting fixes
53  *		Anonymous	:	inet_create tidied up (sk->reuse setting)
54  *		Alan Cox	:	inet sockets don't set sk->type!
55  *		Alan Cox	:	Split socket option code
56  *		Alan Cox	:	Callbacks
57  *		Alan Cox	:	Nagle flag for Charles & Johannes stuff
58  *		Alex		:	Removed restriction on inet fioctl
59  *		Alan Cox	:	Splitting INET from NET core
60  *		Alan Cox	:	Fixed bogus SO_TYPE handling in getsockopt()
61  *		Adam Caldwell	:	Missing return in SO_DONTROUTE/SO_DEBUG code
62  *		Alan Cox	:	Split IP from generic code
63  *		Alan Cox	:	New kfree_skbmem()
64  *		Alan Cox	:	Make SO_DEBUG superuser only.
65  *		Alan Cox	:	Allow anyone to clear SO_DEBUG
66  *					(compatibility fix)
67  *		Alan Cox	:	Added optimistic memory grabbing for AF_UNIX throughput.
68  *		Alan Cox	:	Allocator for a socket is settable.
69  *		Alan Cox	:	SO_ERROR includes soft errors.
70  *		Alan Cox	:	Allow NULL arguments on some SO_ opts
71  *		Alan Cox	: 	Generic socket allocation to make hooks
72  *					easier (suggested by Craig Metz).
73  *		Michael Pall	:	SO_ERROR returns positive errno again
74  *              Steve Whitehouse:       Added default destructor to free
75  *                                      protocol private data.
76  *              Steve Whitehouse:       Added various other default routines
77  *                                      common to several socket families.
78  *              Chris Evans     :       Call suser() check last on F_SETOWN
79  *		Jay Schulist	:	Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
80  *		Andi Kleen	:	Add sock_kmalloc()/sock_kfree_s()
81  *		Andi Kleen	:	Fix write_space callback
82  *		Chris Evans	:	Security fixes - signedness again
83  *		Arnaldo C. Melo :       cleanups, use skb_queue_purge
84  *
85  * To Fix:
86  *
87  *
88  *		This program is free software; you can redistribute it and/or
89  *		modify it under the terms of the GNU General Public License
90  *		as published by the Free Software Foundation; either version
91  *		2 of the License, or (at your option) any later version.
92  */
93 
94 #include <linux/capability.h>
95 #include <linux/config.h>
96 #include <linux/errno.h>
97 #include <linux/types.h>
98 #include <linux/socket.h>
99 #include <linux/in.h>
100 #include <linux/kernel.h>
101 #include <linux/module.h>
102 #include <linux/proc_fs.h>
103 #include <linux/seq_file.h>
104 #include <linux/sched.h>
105 #include <linux/timer.h>
106 #include <linux/string.h>
107 #include <linux/sockios.h>
108 #include <linux/net.h>
109 #include <linux/mm.h>
110 #include <linux/slab.h>
111 #include <linux/interrupt.h>
112 #include <linux/poll.h>
113 #include <linux/tcp.h>
114 #include <linux/init.h>
115 
116 #include <asm/uaccess.h>
117 #include <asm/system.h>
118 
119 #include <linux/netdevice.h>
120 #include <net/protocol.h>
121 #include <linux/skbuff.h>
122 #include <net/request_sock.h>
123 #include <net/sock.h>
124 #include <net/xfrm.h>
125 #include <linux/ipsec.h>
126 
127 #include <linux/filter.h>
128 
129 #ifdef CONFIG_INET
130 #include <net/tcp.h>
131 #endif
132 
133 /* Take into consideration the size of the struct sk_buff overhead in the
134  * determination of these values, since that is non-constant across
135  * platforms.  This makes socket queueing behavior and performance
136  * not depend upon such differences.
137  */
138 #define _SK_MEM_PACKETS		256
139 #define _SK_MEM_OVERHEAD	(sizeof(struct sk_buff) + 256)
140 #define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
141 #define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
142 
143 /* Run time adjustable parameters. */
144 __u32 sysctl_wmem_max = SK_WMEM_MAX;
145 __u32 sysctl_rmem_max = SK_RMEM_MAX;
146 __u32 sysctl_wmem_default = SK_WMEM_MAX;
147 __u32 sysctl_rmem_default = SK_RMEM_MAX;
148 
149 /* Maximal space eaten by iovec or ancilliary data plus some space */
150 int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512);
151 
152 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
153 {
154 	struct timeval tv;
155 
156 	if (optlen < sizeof(tv))
157 		return -EINVAL;
158 	if (copy_from_user(&tv, optval, sizeof(tv)))
159 		return -EFAULT;
160 
161 	*timeo_p = MAX_SCHEDULE_TIMEOUT;
162 	if (tv.tv_sec == 0 && tv.tv_usec == 0)
163 		return 0;
164 	if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
165 		*timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
166 	return 0;
167 }
168 
169 static void sock_warn_obsolete_bsdism(const char *name)
170 {
171 	static int warned;
172 	static char warncomm[TASK_COMM_LEN];
173 	if (strcmp(warncomm, current->comm) && warned < 5) {
174 		strcpy(warncomm,  current->comm);
175 		printk(KERN_WARNING "process `%s' is using obsolete "
176 		       "%s SO_BSDCOMPAT\n", warncomm, name);
177 		warned++;
178 	}
179 }
180 
181 static void sock_disable_timestamp(struct sock *sk)
182 {
183 	if (sock_flag(sk, SOCK_TIMESTAMP)) {
184 		sock_reset_flag(sk, SOCK_TIMESTAMP);
185 		net_disable_timestamp();
186 	}
187 }
188 
189 
190 /*
191  *	This is meant for all protocols to use and covers goings on
192  *	at the socket level. Everything here is generic.
193  */
194 
195 int sock_setsockopt(struct socket *sock, int level, int optname,
196 		    char __user *optval, int optlen)
197 {
198 	struct sock *sk=sock->sk;
199 	struct sk_filter *filter;
200 	int val;
201 	int valbool;
202 	struct linger ling;
203 	int ret = 0;
204 
205 	/*
206 	 *	Options without arguments
207 	 */
208 
209 #ifdef SO_DONTLINGER		/* Compatibility item... */
210 	if (optname == SO_DONTLINGER) {
211 		lock_sock(sk);
212 		sock_reset_flag(sk, SOCK_LINGER);
213 		release_sock(sk);
214 		return 0;
215 	}
216 #endif
217 
218   	if(optlen<sizeof(int))
219   		return(-EINVAL);
220 
221 	if (get_user(val, (int __user *)optval))
222 		return -EFAULT;
223 
224   	valbool = val?1:0;
225 
226 	lock_sock(sk);
227 
228   	switch(optname)
229   	{
230 		case SO_DEBUG:
231 			if(val && !capable(CAP_NET_ADMIN))
232 			{
233 				ret = -EACCES;
234 			}
235 			else if (valbool)
236 				sock_set_flag(sk, SOCK_DBG);
237 			else
238 				sock_reset_flag(sk, SOCK_DBG);
239 			break;
240 		case SO_REUSEADDR:
241 			sk->sk_reuse = valbool;
242 			break;
243 		case SO_TYPE:
244 		case SO_ERROR:
245 			ret = -ENOPROTOOPT;
246 		  	break;
247 		case SO_DONTROUTE:
248 			if (valbool)
249 				sock_set_flag(sk, SOCK_LOCALROUTE);
250 			else
251 				sock_reset_flag(sk, SOCK_LOCALROUTE);
252 			break;
253 		case SO_BROADCAST:
254 			sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
255 			break;
256 		case SO_SNDBUF:
257 			/* Don't error on this BSD doesn't and if you think
258 			   about it this is right. Otherwise apps have to
259 			   play 'guess the biggest size' games. RCVBUF/SNDBUF
260 			   are treated in BSD as hints */
261 
262 			if (val > sysctl_wmem_max)
263 				val = sysctl_wmem_max;
264 set_sndbuf:
265 			sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
266 			if ((val * 2) < SOCK_MIN_SNDBUF)
267 				sk->sk_sndbuf = SOCK_MIN_SNDBUF;
268 			else
269 				sk->sk_sndbuf = val * 2;
270 
271 			/*
272 			 *	Wake up sending tasks if we
273 			 *	upped the value.
274 			 */
275 			sk->sk_write_space(sk);
276 			break;
277 
278 		case SO_SNDBUFFORCE:
279 			if (!capable(CAP_NET_ADMIN)) {
280 				ret = -EPERM;
281 				break;
282 			}
283 			goto set_sndbuf;
284 
285 		case SO_RCVBUF:
286 			/* Don't error on this BSD doesn't and if you think
287 			   about it this is right. Otherwise apps have to
288 			   play 'guess the biggest size' games. RCVBUF/SNDBUF
289 			   are treated in BSD as hints */
290 
291 			if (val > sysctl_rmem_max)
292 				val = sysctl_rmem_max;
293 set_rcvbuf:
294 			sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
295 			/* FIXME: is this lower bound the right one? */
296 			if ((val * 2) < SOCK_MIN_RCVBUF)
297 				sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
298 			else
299 				sk->sk_rcvbuf = val * 2;
300 			break;
301 
302 		case SO_RCVBUFFORCE:
303 			if (!capable(CAP_NET_ADMIN)) {
304 				ret = -EPERM;
305 				break;
306 			}
307 			goto set_rcvbuf;
308 
309 		case SO_KEEPALIVE:
310 #ifdef CONFIG_INET
311 			if (sk->sk_protocol == IPPROTO_TCP)
312 				tcp_set_keepalive(sk, valbool);
313 #endif
314 			sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
315 			break;
316 
317 	 	case SO_OOBINLINE:
318 			sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
319 			break;
320 
321 	 	case SO_NO_CHECK:
322 			sk->sk_no_check = valbool;
323 			break;
324 
325 		case SO_PRIORITY:
326 			if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
327 				sk->sk_priority = val;
328 			else
329 				ret = -EPERM;
330 			break;
331 
332 		case SO_LINGER:
333 			if(optlen<sizeof(ling)) {
334 				ret = -EINVAL;	/* 1003.1g */
335 				break;
336 			}
337 			if (copy_from_user(&ling,optval,sizeof(ling))) {
338 				ret = -EFAULT;
339 				break;
340 			}
341 			if (!ling.l_onoff)
342 				sock_reset_flag(sk, SOCK_LINGER);
343 			else {
344 #if (BITS_PER_LONG == 32)
345 				if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
346 					sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
347 				else
348 #endif
349 					sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
350 				sock_set_flag(sk, SOCK_LINGER);
351 			}
352 			break;
353 
354 		case SO_BSDCOMPAT:
355 			sock_warn_obsolete_bsdism("setsockopt");
356 			break;
357 
358 		case SO_PASSCRED:
359 			if (valbool)
360 				set_bit(SOCK_PASSCRED, &sock->flags);
361 			else
362 				clear_bit(SOCK_PASSCRED, &sock->flags);
363 			break;
364 
365 		case SO_TIMESTAMP:
366 			if (valbool)  {
367 				sock_set_flag(sk, SOCK_RCVTSTAMP);
368 				sock_enable_timestamp(sk);
369 			} else
370 				sock_reset_flag(sk, SOCK_RCVTSTAMP);
371 			break;
372 
373 		case SO_RCVLOWAT:
374 			if (val < 0)
375 				val = INT_MAX;
376 			sk->sk_rcvlowat = val ? : 1;
377 			break;
378 
379 		case SO_RCVTIMEO:
380 			ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
381 			break;
382 
383 		case SO_SNDTIMEO:
384 			ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
385 			break;
386 
387 #ifdef CONFIG_NETDEVICES
388 		case SO_BINDTODEVICE:
389 		{
390 			char devname[IFNAMSIZ];
391 
392 			/* Sorry... */
393 			if (!capable(CAP_NET_RAW)) {
394 				ret = -EPERM;
395 				break;
396 			}
397 
398 			/* Bind this socket to a particular device like "eth0",
399 			 * as specified in the passed interface name. If the
400 			 * name is "" or the option length is zero the socket
401 			 * is not bound.
402 			 */
403 
404 			if (!valbool) {
405 				sk->sk_bound_dev_if = 0;
406 			} else {
407 				if (optlen > IFNAMSIZ)
408 					optlen = IFNAMSIZ;
409 				if (copy_from_user(devname, optval, optlen)) {
410 					ret = -EFAULT;
411 					break;
412 				}
413 
414 				/* Remove any cached route for this socket. */
415 				sk_dst_reset(sk);
416 
417 				if (devname[0] == '\0') {
418 					sk->sk_bound_dev_if = 0;
419 				} else {
420 					struct net_device *dev = dev_get_by_name(devname);
421 					if (!dev) {
422 						ret = -ENODEV;
423 						break;
424 					}
425 					sk->sk_bound_dev_if = dev->ifindex;
426 					dev_put(dev);
427 				}
428 			}
429 			break;
430 		}
431 #endif
432 
433 
434 		case SO_ATTACH_FILTER:
435 			ret = -EINVAL;
436 			if (optlen == sizeof(struct sock_fprog)) {
437 				struct sock_fprog fprog;
438 
439 				ret = -EFAULT;
440 				if (copy_from_user(&fprog, optval, sizeof(fprog)))
441 					break;
442 
443 				ret = sk_attach_filter(&fprog, sk);
444 			}
445 			break;
446 
447 		case SO_DETACH_FILTER:
448 			spin_lock_bh(&sk->sk_lock.slock);
449 			filter = sk->sk_filter;
450                         if (filter) {
451 				sk->sk_filter = NULL;
452 				spin_unlock_bh(&sk->sk_lock.slock);
453 				sk_filter_release(sk, filter);
454 				break;
455 			}
456 			spin_unlock_bh(&sk->sk_lock.slock);
457 			ret = -ENONET;
458 			break;
459 
460 		/* We implement the SO_SNDLOWAT etc to
461 		   not be settable (1003.1g 5.3) */
462 		default:
463 		  	ret = -ENOPROTOOPT;
464 			break;
465   	}
466 	release_sock(sk);
467 	return ret;
468 }
469 
470 
471 int sock_getsockopt(struct socket *sock, int level, int optname,
472 		    char __user *optval, int __user *optlen)
473 {
474 	struct sock *sk = sock->sk;
475 
476 	union
477 	{
478   		int val;
479   		struct linger ling;
480 		struct timeval tm;
481 	} v;
482 
483 	unsigned int lv = sizeof(int);
484 	int len;
485 
486   	if(get_user(len,optlen))
487   		return -EFAULT;
488 	if(len < 0)
489 		return -EINVAL;
490 
491   	switch(optname)
492   	{
493 		case SO_DEBUG:
494 			v.val = sock_flag(sk, SOCK_DBG);
495 			break;
496 
497 		case SO_DONTROUTE:
498 			v.val = sock_flag(sk, SOCK_LOCALROUTE);
499 			break;
500 
501 		case SO_BROADCAST:
502 			v.val = !!sock_flag(sk, SOCK_BROADCAST);
503 			break;
504 
505 		case SO_SNDBUF:
506 			v.val = sk->sk_sndbuf;
507 			break;
508 
509 		case SO_RCVBUF:
510 			v.val = sk->sk_rcvbuf;
511 			break;
512 
513 		case SO_REUSEADDR:
514 			v.val = sk->sk_reuse;
515 			break;
516 
517 		case SO_KEEPALIVE:
518 			v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
519 			break;
520 
521 		case SO_TYPE:
522 			v.val = sk->sk_type;
523 			break;
524 
525 		case SO_ERROR:
526 			v.val = -sock_error(sk);
527 			if(v.val==0)
528 				v.val = xchg(&sk->sk_err_soft, 0);
529 			break;
530 
531 		case SO_OOBINLINE:
532 			v.val = !!sock_flag(sk, SOCK_URGINLINE);
533 			break;
534 
535 		case SO_NO_CHECK:
536 			v.val = sk->sk_no_check;
537 			break;
538 
539 		case SO_PRIORITY:
540 			v.val = sk->sk_priority;
541 			break;
542 
543 		case SO_LINGER:
544 			lv		= sizeof(v.ling);
545 			v.ling.l_onoff	= !!sock_flag(sk, SOCK_LINGER);
546  			v.ling.l_linger	= sk->sk_lingertime / HZ;
547 			break;
548 
549 		case SO_BSDCOMPAT:
550 			sock_warn_obsolete_bsdism("getsockopt");
551 			break;
552 
553 		case SO_TIMESTAMP:
554 			v.val = sock_flag(sk, SOCK_RCVTSTAMP);
555 			break;
556 
557 		case SO_RCVTIMEO:
558 			lv=sizeof(struct timeval);
559 			if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
560 				v.tm.tv_sec = 0;
561 				v.tm.tv_usec = 0;
562 			} else {
563 				v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
564 				v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
565 			}
566 			break;
567 
568 		case SO_SNDTIMEO:
569 			lv=sizeof(struct timeval);
570 			if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
571 				v.tm.tv_sec = 0;
572 				v.tm.tv_usec = 0;
573 			} else {
574 				v.tm.tv_sec = sk->sk_sndtimeo / HZ;
575 				v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
576 			}
577 			break;
578 
579 		case SO_RCVLOWAT:
580 			v.val = sk->sk_rcvlowat;
581 			break;
582 
583 		case SO_SNDLOWAT:
584 			v.val=1;
585 			break;
586 
587 		case SO_PASSCRED:
588 			v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
589 			break;
590 
591 		case SO_PEERCRED:
592 			if (len > sizeof(sk->sk_peercred))
593 				len = sizeof(sk->sk_peercred);
594 			if (copy_to_user(optval, &sk->sk_peercred, len))
595 				return -EFAULT;
596 			goto lenout;
597 
598 		case SO_PEERNAME:
599 		{
600 			char address[128];
601 
602 			if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
603 				return -ENOTCONN;
604 			if (lv < len)
605 				return -EINVAL;
606 			if (copy_to_user(optval, address, len))
607 				return -EFAULT;
608 			goto lenout;
609 		}
610 
611 		/* Dubious BSD thing... Probably nobody even uses it, but
612 		 * the UNIX standard wants it for whatever reason... -DaveM
613 		 */
614 		case SO_ACCEPTCONN:
615 			v.val = sk->sk_state == TCP_LISTEN;
616 			break;
617 
618 		case SO_PEERSEC:
619 			return security_socket_getpeersec(sock, optval, optlen, len);
620 
621 		default:
622 			return(-ENOPROTOOPT);
623 	}
624 	if (len > lv)
625 		len = lv;
626 	if (copy_to_user(optval, &v, len))
627 		return -EFAULT;
628 lenout:
629   	if (put_user(len, optlen))
630   		return -EFAULT;
631   	return 0;
632 }
633 
634 /**
635  *	sk_alloc - All socket objects are allocated here
636  *	@family: protocol family
637  *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
638  *	@prot: struct proto associated with this new sock instance
639  *	@zero_it: if we should zero the newly allocated sock
640  */
641 struct sock *sk_alloc(int family, gfp_t priority,
642 		      struct proto *prot, int zero_it)
643 {
644 	struct sock *sk = NULL;
645 	kmem_cache_t *slab = prot->slab;
646 
647 	if (slab != NULL)
648 		sk = kmem_cache_alloc(slab, priority);
649 	else
650 		sk = kmalloc(prot->obj_size, priority);
651 
652 	if (sk) {
653 		if (zero_it) {
654 			memset(sk, 0, prot->obj_size);
655 			sk->sk_family = family;
656 			/*
657 			 * See comment in struct sock definition to understand
658 			 * why we need sk_prot_creator -acme
659 			 */
660 			sk->sk_prot = sk->sk_prot_creator = prot;
661 			sock_lock_init(sk);
662 		}
663 
664 		if (security_sk_alloc(sk, family, priority))
665 			goto out_free;
666 
667 		if (!try_module_get(prot->owner))
668 			goto out_free;
669 	}
670 	return sk;
671 
672 out_free:
673 	if (slab != NULL)
674 		kmem_cache_free(slab, sk);
675 	else
676 		kfree(sk);
677 	return NULL;
678 }
679 
680 void sk_free(struct sock *sk)
681 {
682 	struct sk_filter *filter;
683 	struct module *owner = sk->sk_prot_creator->owner;
684 
685 	if (sk->sk_destruct)
686 		sk->sk_destruct(sk);
687 
688 	filter = sk->sk_filter;
689 	if (filter) {
690 		sk_filter_release(sk, filter);
691 		sk->sk_filter = NULL;
692 	}
693 
694 	sock_disable_timestamp(sk);
695 
696 	if (atomic_read(&sk->sk_omem_alloc))
697 		printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
698 		       __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
699 
700 	security_sk_free(sk);
701 	if (sk->sk_prot_creator->slab != NULL)
702 		kmem_cache_free(sk->sk_prot_creator->slab, sk);
703 	else
704 		kfree(sk);
705 	module_put(owner);
706 }
707 
708 struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
709 {
710 	struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);
711 
712 	if (newsk != NULL) {
713 		struct sk_filter *filter;
714 
715 		memcpy(newsk, sk, sk->sk_prot->obj_size);
716 
717 		/* SANITY */
718 		sk_node_init(&newsk->sk_node);
719 		sock_lock_init(newsk);
720 		bh_lock_sock(newsk);
721 
722 		atomic_set(&newsk->sk_rmem_alloc, 0);
723 		atomic_set(&newsk->sk_wmem_alloc, 0);
724 		atomic_set(&newsk->sk_omem_alloc, 0);
725 		skb_queue_head_init(&newsk->sk_receive_queue);
726 		skb_queue_head_init(&newsk->sk_write_queue);
727 
728 		rwlock_init(&newsk->sk_dst_lock);
729 		rwlock_init(&newsk->sk_callback_lock);
730 
731 		newsk->sk_dst_cache	= NULL;
732 		newsk->sk_wmem_queued	= 0;
733 		newsk->sk_forward_alloc = 0;
734 		newsk->sk_send_head	= NULL;
735 		newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
736 		newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
737 
738 		sock_reset_flag(newsk, SOCK_DONE);
739 		skb_queue_head_init(&newsk->sk_error_queue);
740 
741 		filter = newsk->sk_filter;
742 		if (filter != NULL)
743 			sk_filter_charge(newsk, filter);
744 
745 		if (unlikely(xfrm_sk_clone_policy(newsk))) {
746 			/* It is still raw copy of parent, so invalidate
747 			 * destructor and make plain sk_free() */
748 			newsk->sk_destruct = NULL;
749 			sk_free(newsk);
750 			newsk = NULL;
751 			goto out;
752 		}
753 
754 		newsk->sk_err	   = 0;
755 		newsk->sk_priority = 0;
756 		atomic_set(&newsk->sk_refcnt, 2);
757 
758 		/*
759 		 * Increment the counter in the same struct proto as the master
760 		 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
761 		 * is the same as sk->sk_prot->socks, as this field was copied
762 		 * with memcpy).
763 		 *
764 		 * This _changes_ the previous behaviour, where
765 		 * tcp_create_openreq_child always was incrementing the
766 		 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
767 		 * to be taken into account in all callers. -acme
768 		 */
769 		sk_refcnt_debug_inc(newsk);
770 		newsk->sk_socket = NULL;
771 		newsk->sk_sleep	 = NULL;
772 
773 		if (newsk->sk_prot->sockets_allocated)
774 			atomic_inc(newsk->sk_prot->sockets_allocated);
775 	}
776 out:
777 	return newsk;
778 }
779 
780 EXPORT_SYMBOL_GPL(sk_clone);
781 
782 void __init sk_init(void)
783 {
784 	if (num_physpages <= 4096) {
785 		sysctl_wmem_max = 32767;
786 		sysctl_rmem_max = 32767;
787 		sysctl_wmem_default = 32767;
788 		sysctl_rmem_default = 32767;
789 	} else if (num_physpages >= 131072) {
790 		sysctl_wmem_max = 131071;
791 		sysctl_rmem_max = 131071;
792 	}
793 }
794 
795 /*
796  *	Simple resource managers for sockets.
797  */
798 
799 
800 /*
801  * Write buffer destructor automatically called from kfree_skb.
802  */
803 void sock_wfree(struct sk_buff *skb)
804 {
805 	struct sock *sk = skb->sk;
806 
807 	/* In case it might be waiting for more memory. */
808 	atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
809 	if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
810 		sk->sk_write_space(sk);
811 	sock_put(sk);
812 }
813 
814 /*
815  * Read buffer destructor automatically called from kfree_skb.
816  */
817 void sock_rfree(struct sk_buff *skb)
818 {
819 	struct sock *sk = skb->sk;
820 
821 	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
822 }
823 
824 
825 int sock_i_uid(struct sock *sk)
826 {
827 	int uid;
828 
829 	read_lock(&sk->sk_callback_lock);
830 	uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
831 	read_unlock(&sk->sk_callback_lock);
832 	return uid;
833 }
834 
835 unsigned long sock_i_ino(struct sock *sk)
836 {
837 	unsigned long ino;
838 
839 	read_lock(&sk->sk_callback_lock);
840 	ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
841 	read_unlock(&sk->sk_callback_lock);
842 	return ino;
843 }
844 
845 /*
846  * Allocate a skb from the socket's send buffer.
847  */
848 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
849 			     gfp_t priority)
850 {
851 	if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
852 		struct sk_buff * skb = alloc_skb(size, priority);
853 		if (skb) {
854 			skb_set_owner_w(skb, sk);
855 			return skb;
856 		}
857 	}
858 	return NULL;
859 }
860 
861 /*
862  * Allocate a skb from the socket's receive buffer.
863  */
864 struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
865 			     gfp_t priority)
866 {
867 	if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
868 		struct sk_buff *skb = alloc_skb(size, priority);
869 		if (skb) {
870 			skb_set_owner_r(skb, sk);
871 			return skb;
872 		}
873 	}
874 	return NULL;
875 }
876 
877 /*
878  * Allocate a memory block from the socket's option memory buffer.
879  */
880 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
881 {
882 	if ((unsigned)size <= sysctl_optmem_max &&
883 	    atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
884 		void *mem;
885 		/* First do the add, to avoid the race if kmalloc
886  		 * might sleep.
887 		 */
888 		atomic_add(size, &sk->sk_omem_alloc);
889 		mem = kmalloc(size, priority);
890 		if (mem)
891 			return mem;
892 		atomic_sub(size, &sk->sk_omem_alloc);
893 	}
894 	return NULL;
895 }
896 
897 /*
898  * Free an option memory block.
899  */
900 void sock_kfree_s(struct sock *sk, void *mem, int size)
901 {
902 	kfree(mem);
903 	atomic_sub(size, &sk->sk_omem_alloc);
904 }
905 
906 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
907    I think, these locks should be removed for datagram sockets.
908  */
909 static long sock_wait_for_wmem(struct sock * sk, long timeo)
910 {
911 	DEFINE_WAIT(wait);
912 
913 	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
914 	for (;;) {
915 		if (!timeo)
916 			break;
917 		if (signal_pending(current))
918 			break;
919 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
920 		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
921 		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
922 			break;
923 		if (sk->sk_shutdown & SEND_SHUTDOWN)
924 			break;
925 		if (sk->sk_err)
926 			break;
927 		timeo = schedule_timeout(timeo);
928 	}
929 	finish_wait(sk->sk_sleep, &wait);
930 	return timeo;
931 }
932 
933 
934 /*
935  *	Generic send/receive buffer handlers
936  */
937 
938 static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
939 					    unsigned long header_len,
940 					    unsigned long data_len,
941 					    int noblock, int *errcode)
942 {
943 	struct sk_buff *skb;
944 	gfp_t gfp_mask;
945 	long timeo;
946 	int err;
947 
948 	gfp_mask = sk->sk_allocation;
949 	if (gfp_mask & __GFP_WAIT)
950 		gfp_mask |= __GFP_REPEAT;
951 
952 	timeo = sock_sndtimeo(sk, noblock);
953 	while (1) {
954 		err = sock_error(sk);
955 		if (err != 0)
956 			goto failure;
957 
958 		err = -EPIPE;
959 		if (sk->sk_shutdown & SEND_SHUTDOWN)
960 			goto failure;
961 
962 		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
963 			skb = alloc_skb(header_len, sk->sk_allocation);
964 			if (skb) {
965 				int npages;
966 				int i;
967 
968 				/* No pages, we're done... */
969 				if (!data_len)
970 					break;
971 
972 				npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
973 				skb->truesize += data_len;
974 				skb_shinfo(skb)->nr_frags = npages;
975 				for (i = 0; i < npages; i++) {
976 					struct page *page;
977 					skb_frag_t *frag;
978 
979 					page = alloc_pages(sk->sk_allocation, 0);
980 					if (!page) {
981 						err = -ENOBUFS;
982 						skb_shinfo(skb)->nr_frags = i;
983 						kfree_skb(skb);
984 						goto failure;
985 					}
986 
987 					frag = &skb_shinfo(skb)->frags[i];
988 					frag->page = page;
989 					frag->page_offset = 0;
990 					frag->size = (data_len >= PAGE_SIZE ?
991 						      PAGE_SIZE :
992 						      data_len);
993 					data_len -= PAGE_SIZE;
994 				}
995 
996 				/* Full success... */
997 				break;
998 			}
999 			err = -ENOBUFS;
1000 			goto failure;
1001 		}
1002 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1003 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1004 		err = -EAGAIN;
1005 		if (!timeo)
1006 			goto failure;
1007 		if (signal_pending(current))
1008 			goto interrupted;
1009 		timeo = sock_wait_for_wmem(sk, timeo);
1010 	}
1011 
1012 	skb_set_owner_w(skb, sk);
1013 	return skb;
1014 
1015 interrupted:
1016 	err = sock_intr_errno(timeo);
1017 failure:
1018 	*errcode = err;
1019 	return NULL;
1020 }
1021 
1022 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1023 				    int noblock, int *errcode)
1024 {
1025 	return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1026 }
1027 
1028 static void __lock_sock(struct sock *sk)
1029 {
1030 	DEFINE_WAIT(wait);
1031 
1032 	for(;;) {
1033 		prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1034 					TASK_UNINTERRUPTIBLE);
1035 		spin_unlock_bh(&sk->sk_lock.slock);
1036 		schedule();
1037 		spin_lock_bh(&sk->sk_lock.slock);
1038 		if(!sock_owned_by_user(sk))
1039 			break;
1040 	}
1041 	finish_wait(&sk->sk_lock.wq, &wait);
1042 }
1043 
1044 static void __release_sock(struct sock *sk)
1045 {
1046 	struct sk_buff *skb = sk->sk_backlog.head;
1047 
1048 	do {
1049 		sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1050 		bh_unlock_sock(sk);
1051 
1052 		do {
1053 			struct sk_buff *next = skb->next;
1054 
1055 			skb->next = NULL;
1056 			sk->sk_backlog_rcv(sk, skb);
1057 
1058 			/*
1059 			 * We are in process context here with softirqs
1060 			 * disabled, use cond_resched_softirq() to preempt.
1061 			 * This is safe to do because we've taken the backlog
1062 			 * queue private:
1063 			 */
1064 			cond_resched_softirq();
1065 
1066 			skb = next;
1067 		} while (skb != NULL);
1068 
1069 		bh_lock_sock(sk);
1070 	} while((skb = sk->sk_backlog.head) != NULL);
1071 }
1072 
1073 /**
1074  * sk_wait_data - wait for data to arrive at sk_receive_queue
1075  * @sk:    sock to wait on
1076  * @timeo: for how long
1077  *
1078  * Now socket state including sk->sk_err is changed only under lock,
1079  * hence we may omit checks after joining wait queue.
1080  * We check receive queue before schedule() only as optimization;
1081  * it is very likely that release_sock() added new data.
1082  */
1083 int sk_wait_data(struct sock *sk, long *timeo)
1084 {
1085 	int rc;
1086 	DEFINE_WAIT(wait);
1087 
1088 	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1089 	set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1090 	rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1091 	clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1092 	finish_wait(sk->sk_sleep, &wait);
1093 	return rc;
1094 }
1095 
1096 EXPORT_SYMBOL(sk_wait_data);
1097 
1098 /*
1099  * Set of default routines for initialising struct proto_ops when
1100  * the protocol does not support a particular function. In certain
1101  * cases where it makes no sense for a protocol to have a "do nothing"
1102  * function, some default processing is provided.
1103  */
1104 
1105 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1106 {
1107 	return -EOPNOTSUPP;
1108 }
1109 
1110 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1111 		    int len, int flags)
1112 {
1113 	return -EOPNOTSUPP;
1114 }
1115 
1116 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1117 {
1118 	return -EOPNOTSUPP;
1119 }
1120 
1121 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1122 {
1123 	return -EOPNOTSUPP;
1124 }
1125 
1126 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1127 		    int *len, int peer)
1128 {
1129 	return -EOPNOTSUPP;
1130 }
1131 
1132 unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
1133 {
1134 	return 0;
1135 }
1136 
1137 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1138 {
1139 	return -EOPNOTSUPP;
1140 }
1141 
1142 int sock_no_listen(struct socket *sock, int backlog)
1143 {
1144 	return -EOPNOTSUPP;
1145 }
1146 
1147 int sock_no_shutdown(struct socket *sock, int how)
1148 {
1149 	return -EOPNOTSUPP;
1150 }
1151 
1152 int sock_no_setsockopt(struct socket *sock, int level, int optname,
1153 		    char __user *optval, int optlen)
1154 {
1155 	return -EOPNOTSUPP;
1156 }
1157 
1158 int sock_no_getsockopt(struct socket *sock, int level, int optname,
1159 		    char __user *optval, int __user *optlen)
1160 {
1161 	return -EOPNOTSUPP;
1162 }
1163 
1164 int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1165 		    size_t len)
1166 {
1167 	return -EOPNOTSUPP;
1168 }
1169 
1170 int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1171 		    size_t len, int flags)
1172 {
1173 	return -EOPNOTSUPP;
1174 }
1175 
1176 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1177 {
1178 	/* Mirror missing mmap method error code */
1179 	return -ENODEV;
1180 }
1181 
1182 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1183 {
1184 	ssize_t res;
1185 	struct msghdr msg = {.msg_flags = flags};
1186 	struct kvec iov;
1187 	char *kaddr = kmap(page);
1188 	iov.iov_base = kaddr + offset;
1189 	iov.iov_len = size;
1190 	res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1191 	kunmap(page);
1192 	return res;
1193 }
1194 
1195 /*
1196  *	Default Socket Callbacks
1197  */
1198 
1199 static void sock_def_wakeup(struct sock *sk)
1200 {
1201 	read_lock(&sk->sk_callback_lock);
1202 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1203 		wake_up_interruptible_all(sk->sk_sleep);
1204 	read_unlock(&sk->sk_callback_lock);
1205 }
1206 
1207 static void sock_def_error_report(struct sock *sk)
1208 {
1209 	read_lock(&sk->sk_callback_lock);
1210 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1211 		wake_up_interruptible(sk->sk_sleep);
1212 	sk_wake_async(sk,0,POLL_ERR);
1213 	read_unlock(&sk->sk_callback_lock);
1214 }
1215 
1216 static void sock_def_readable(struct sock *sk, int len)
1217 {
1218 	read_lock(&sk->sk_callback_lock);
1219 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1220 		wake_up_interruptible(sk->sk_sleep);
1221 	sk_wake_async(sk,1,POLL_IN);
1222 	read_unlock(&sk->sk_callback_lock);
1223 }
1224 
1225 static void sock_def_write_space(struct sock *sk)
1226 {
1227 	read_lock(&sk->sk_callback_lock);
1228 
1229 	/* Do not wake up a writer until he can make "significant"
1230 	 * progress.  --DaveM
1231 	 */
1232 	if((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1233 		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1234 			wake_up_interruptible(sk->sk_sleep);
1235 
1236 		/* Should agree with poll, otherwise some programs break */
1237 		if (sock_writeable(sk))
1238 			sk_wake_async(sk, 2, POLL_OUT);
1239 	}
1240 
1241 	read_unlock(&sk->sk_callback_lock);
1242 }
1243 
1244 static void sock_def_destruct(struct sock *sk)
1245 {
1246 	kfree(sk->sk_protinfo);
1247 }
1248 
1249 void sk_send_sigurg(struct sock *sk)
1250 {
1251 	if (sk->sk_socket && sk->sk_socket->file)
1252 		if (send_sigurg(&sk->sk_socket->file->f_owner))
1253 			sk_wake_async(sk, 3, POLL_PRI);
1254 }
1255 
1256 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1257 		    unsigned long expires)
1258 {
1259 	if (!mod_timer(timer, expires))
1260 		sock_hold(sk);
1261 }
1262 
1263 EXPORT_SYMBOL(sk_reset_timer);
1264 
1265 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1266 {
1267 	if (timer_pending(timer) && del_timer(timer))
1268 		__sock_put(sk);
1269 }
1270 
1271 EXPORT_SYMBOL(sk_stop_timer);
1272 
1273 void sock_init_data(struct socket *sock, struct sock *sk)
1274 {
1275 	skb_queue_head_init(&sk->sk_receive_queue);
1276 	skb_queue_head_init(&sk->sk_write_queue);
1277 	skb_queue_head_init(&sk->sk_error_queue);
1278 
1279 	sk->sk_send_head	=	NULL;
1280 
1281 	init_timer(&sk->sk_timer);
1282 
1283 	sk->sk_allocation	=	GFP_KERNEL;
1284 	sk->sk_rcvbuf		=	sysctl_rmem_default;
1285 	sk->sk_sndbuf		=	sysctl_wmem_default;
1286 	sk->sk_state		=	TCP_CLOSE;
1287 	sk->sk_socket		=	sock;
1288 
1289 	sock_set_flag(sk, SOCK_ZAPPED);
1290 
1291 	if(sock)
1292 	{
1293 		sk->sk_type	=	sock->type;
1294 		sk->sk_sleep	=	&sock->wait;
1295 		sock->sk	=	sk;
1296 	} else
1297 		sk->sk_sleep	=	NULL;
1298 
1299 	rwlock_init(&sk->sk_dst_lock);
1300 	rwlock_init(&sk->sk_callback_lock);
1301 
1302 	sk->sk_state_change	=	sock_def_wakeup;
1303 	sk->sk_data_ready	=	sock_def_readable;
1304 	sk->sk_write_space	=	sock_def_write_space;
1305 	sk->sk_error_report	=	sock_def_error_report;
1306 	sk->sk_destruct		=	sock_def_destruct;
1307 
1308 	sk->sk_sndmsg_page	=	NULL;
1309 	sk->sk_sndmsg_off	=	0;
1310 
1311 	sk->sk_peercred.pid 	=	0;
1312 	sk->sk_peercred.uid	=	-1;
1313 	sk->sk_peercred.gid	=	-1;
1314 	sk->sk_write_pending	=	0;
1315 	sk->sk_rcvlowat		=	1;
1316 	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
1317 	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
1318 
1319 	sk->sk_stamp.tv_sec     = -1L;
1320 	sk->sk_stamp.tv_usec    = -1L;
1321 
1322 	atomic_set(&sk->sk_refcnt, 1);
1323 }
1324 
1325 void fastcall lock_sock(struct sock *sk)
1326 {
1327 	might_sleep();
1328 	spin_lock_bh(&(sk->sk_lock.slock));
1329 	if (sk->sk_lock.owner)
1330 		__lock_sock(sk);
1331 	sk->sk_lock.owner = (void *)1;
1332 	spin_unlock_bh(&(sk->sk_lock.slock));
1333 }
1334 
1335 EXPORT_SYMBOL(lock_sock);
1336 
1337 void fastcall release_sock(struct sock *sk)
1338 {
1339 	spin_lock_bh(&(sk->sk_lock.slock));
1340 	if (sk->sk_backlog.tail)
1341 		__release_sock(sk);
1342 	sk->sk_lock.owner = NULL;
1343         if (waitqueue_active(&(sk->sk_lock.wq)))
1344 		wake_up(&(sk->sk_lock.wq));
1345 	spin_unlock_bh(&(sk->sk_lock.slock));
1346 }
1347 EXPORT_SYMBOL(release_sock);
1348 
1349 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1350 {
1351 	if (!sock_flag(sk, SOCK_TIMESTAMP))
1352 		sock_enable_timestamp(sk);
1353 	if (sk->sk_stamp.tv_sec == -1)
1354 		return -ENOENT;
1355 	if (sk->sk_stamp.tv_sec == 0)
1356 		do_gettimeofday(&sk->sk_stamp);
1357 	return copy_to_user(userstamp, &sk->sk_stamp, sizeof(struct timeval)) ?
1358 		-EFAULT : 0;
1359 }
1360 EXPORT_SYMBOL(sock_get_timestamp);
1361 
1362 void sock_enable_timestamp(struct sock *sk)
1363 {
1364 	if (!sock_flag(sk, SOCK_TIMESTAMP)) {
1365 		sock_set_flag(sk, SOCK_TIMESTAMP);
1366 		net_enable_timestamp();
1367 	}
1368 }
1369 EXPORT_SYMBOL(sock_enable_timestamp);
1370 
1371 /*
1372  *	Get a socket option on an socket.
1373  *
1374  *	FIX: POSIX 1003.1g is very ambiguous here. It states that
1375  *	asynchronous errors should be reported by getsockopt. We assume
1376  *	this means if you specify SO_ERROR (otherwise whats the point of it).
1377  */
1378 int sock_common_getsockopt(struct socket *sock, int level, int optname,
1379 			   char __user *optval, int __user *optlen)
1380 {
1381 	struct sock *sk = sock->sk;
1382 
1383 	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1384 }
1385 
1386 EXPORT_SYMBOL(sock_common_getsockopt);
1387 
1388 int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
1389 			struct msghdr *msg, size_t size, int flags)
1390 {
1391 	struct sock *sk = sock->sk;
1392 	int addr_len = 0;
1393 	int err;
1394 
1395 	err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
1396 				   flags & ~MSG_DONTWAIT, &addr_len);
1397 	if (err >= 0)
1398 		msg->msg_namelen = addr_len;
1399 	return err;
1400 }
1401 
1402 EXPORT_SYMBOL(sock_common_recvmsg);
1403 
1404 /*
1405  *	Set socket options on an inet socket.
1406  */
1407 int sock_common_setsockopt(struct socket *sock, int level, int optname,
1408 			   char __user *optval, int optlen)
1409 {
1410 	struct sock *sk = sock->sk;
1411 
1412 	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1413 }
1414 
1415 EXPORT_SYMBOL(sock_common_setsockopt);
1416 
1417 void sk_common_release(struct sock *sk)
1418 {
1419 	if (sk->sk_prot->destroy)
1420 		sk->sk_prot->destroy(sk);
1421 
1422 	/*
1423 	 * Observation: when sock_common_release is called, processes have
1424 	 * no access to socket. But net still has.
1425 	 * Step one, detach it from networking:
1426 	 *
1427 	 * A. Remove from hash tables.
1428 	 */
1429 
1430 	sk->sk_prot->unhash(sk);
1431 
1432 	/*
1433 	 * In this point socket cannot receive new packets, but it is possible
1434 	 * that some packets are in flight because some CPU runs receiver and
1435 	 * did hash table lookup before we unhashed socket. They will achieve
1436 	 * receive queue and will be purged by socket destructor.
1437 	 *
1438 	 * Also we still have packets pending on receive queue and probably,
1439 	 * our own packets waiting in device queues. sock_destroy will drain
1440 	 * receive queue, but transmitted packets will delay socket destruction
1441 	 * until the last reference will be released.
1442 	 */
1443 
1444 	sock_orphan(sk);
1445 
1446 	xfrm_sk_free_policy(sk);
1447 
1448 	sk_refcnt_debug_release(sk);
1449 	sock_put(sk);
1450 }
1451 
1452 EXPORT_SYMBOL(sk_common_release);
1453 
1454 static DEFINE_RWLOCK(proto_list_lock);
1455 static LIST_HEAD(proto_list);
1456 
1457 int proto_register(struct proto *prot, int alloc_slab)
1458 {
1459 	char *request_sock_slab_name = NULL;
1460 	char *timewait_sock_slab_name;
1461 	int rc = -ENOBUFS;
1462 
1463 	if (alloc_slab) {
1464 		prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
1465 					       SLAB_HWCACHE_ALIGN, NULL, NULL);
1466 
1467 		if (prot->slab == NULL) {
1468 			printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
1469 			       prot->name);
1470 			goto out;
1471 		}
1472 
1473 		if (prot->rsk_prot != NULL) {
1474 			static const char mask[] = "request_sock_%s";
1475 
1476 			request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1477 			if (request_sock_slab_name == NULL)
1478 				goto out_free_sock_slab;
1479 
1480 			sprintf(request_sock_slab_name, mask, prot->name);
1481 			prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
1482 								 prot->rsk_prot->obj_size, 0,
1483 								 SLAB_HWCACHE_ALIGN, NULL, NULL);
1484 
1485 			if (prot->rsk_prot->slab == NULL) {
1486 				printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
1487 				       prot->name);
1488 				goto out_free_request_sock_slab_name;
1489 			}
1490 		}
1491 
1492 		if (prot->twsk_prot != NULL) {
1493 			static const char mask[] = "tw_sock_%s";
1494 
1495 			timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1496 
1497 			if (timewait_sock_slab_name == NULL)
1498 				goto out_free_request_sock_slab;
1499 
1500 			sprintf(timewait_sock_slab_name, mask, prot->name);
1501 			prot->twsk_prot->twsk_slab =
1502 				kmem_cache_create(timewait_sock_slab_name,
1503 						  prot->twsk_prot->twsk_obj_size,
1504 						  0, SLAB_HWCACHE_ALIGN,
1505 						  NULL, NULL);
1506 			if (prot->twsk_prot->twsk_slab == NULL)
1507 				goto out_free_timewait_sock_slab_name;
1508 		}
1509 	}
1510 
1511 	write_lock(&proto_list_lock);
1512 	list_add(&prot->node, &proto_list);
1513 	write_unlock(&proto_list_lock);
1514 	rc = 0;
1515 out:
1516 	return rc;
1517 out_free_timewait_sock_slab_name:
1518 	kfree(timewait_sock_slab_name);
1519 out_free_request_sock_slab:
1520 	if (prot->rsk_prot && prot->rsk_prot->slab) {
1521 		kmem_cache_destroy(prot->rsk_prot->slab);
1522 		prot->rsk_prot->slab = NULL;
1523 	}
1524 out_free_request_sock_slab_name:
1525 	kfree(request_sock_slab_name);
1526 out_free_sock_slab:
1527 	kmem_cache_destroy(prot->slab);
1528 	prot->slab = NULL;
1529 	goto out;
1530 }
1531 
1532 EXPORT_SYMBOL(proto_register);
1533 
1534 void proto_unregister(struct proto *prot)
1535 {
1536 	write_lock(&proto_list_lock);
1537 	list_del(&prot->node);
1538 	write_unlock(&proto_list_lock);
1539 
1540 	if (prot->slab != NULL) {
1541 		kmem_cache_destroy(prot->slab);
1542 		prot->slab = NULL;
1543 	}
1544 
1545 	if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
1546 		const char *name = kmem_cache_name(prot->rsk_prot->slab);
1547 
1548 		kmem_cache_destroy(prot->rsk_prot->slab);
1549 		kfree(name);
1550 		prot->rsk_prot->slab = NULL;
1551 	}
1552 
1553 	if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
1554 		const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab);
1555 
1556 		kmem_cache_destroy(prot->twsk_prot->twsk_slab);
1557 		kfree(name);
1558 		prot->twsk_prot->twsk_slab = NULL;
1559 	}
1560 }
1561 
1562 EXPORT_SYMBOL(proto_unregister);
1563 
1564 #ifdef CONFIG_PROC_FS
1565 static inline struct proto *__proto_head(void)
1566 {
1567 	return list_entry(proto_list.next, struct proto, node);
1568 }
1569 
1570 static inline struct proto *proto_head(void)
1571 {
1572 	return list_empty(&proto_list) ? NULL : __proto_head();
1573 }
1574 
1575 static inline struct proto *proto_next(struct proto *proto)
1576 {
1577 	return proto->node.next == &proto_list ? NULL :
1578 		list_entry(proto->node.next, struct proto, node);
1579 }
1580 
1581 static inline struct proto *proto_get_idx(loff_t pos)
1582 {
1583 	struct proto *proto;
1584 	loff_t i = 0;
1585 
1586 	list_for_each_entry(proto, &proto_list, node)
1587 		if (i++ == pos)
1588 			goto out;
1589 
1590 	proto = NULL;
1591 out:
1592 	return proto;
1593 }
1594 
1595 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
1596 {
1597 	read_lock(&proto_list_lock);
1598 	return *pos ? proto_get_idx(*pos - 1) : SEQ_START_TOKEN;
1599 }
1600 
1601 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1602 {
1603 	++*pos;
1604 	return v == SEQ_START_TOKEN ? proto_head() : proto_next(v);
1605 }
1606 
1607 static void proto_seq_stop(struct seq_file *seq, void *v)
1608 {
1609 	read_unlock(&proto_list_lock);
1610 }
1611 
1612 static char proto_method_implemented(const void *method)
1613 {
1614 	return method == NULL ? 'n' : 'y';
1615 }
1616 
1617 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
1618 {
1619 	seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
1620 			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
1621 		   proto->name,
1622 		   proto->obj_size,
1623 		   proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
1624 		   proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
1625 		   proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
1626 		   proto->max_header,
1627 		   proto->slab == NULL ? "no" : "yes",
1628 		   module_name(proto->owner),
1629 		   proto_method_implemented(proto->close),
1630 		   proto_method_implemented(proto->connect),
1631 		   proto_method_implemented(proto->disconnect),
1632 		   proto_method_implemented(proto->accept),
1633 		   proto_method_implemented(proto->ioctl),
1634 		   proto_method_implemented(proto->init),
1635 		   proto_method_implemented(proto->destroy),
1636 		   proto_method_implemented(proto->shutdown),
1637 		   proto_method_implemented(proto->setsockopt),
1638 		   proto_method_implemented(proto->getsockopt),
1639 		   proto_method_implemented(proto->sendmsg),
1640 		   proto_method_implemented(proto->recvmsg),
1641 		   proto_method_implemented(proto->sendpage),
1642 		   proto_method_implemented(proto->bind),
1643 		   proto_method_implemented(proto->backlog_rcv),
1644 		   proto_method_implemented(proto->hash),
1645 		   proto_method_implemented(proto->unhash),
1646 		   proto_method_implemented(proto->get_port),
1647 		   proto_method_implemented(proto->enter_memory_pressure));
1648 }
1649 
1650 static int proto_seq_show(struct seq_file *seq, void *v)
1651 {
1652 	if (v == SEQ_START_TOKEN)
1653 		seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
1654 			   "protocol",
1655 			   "size",
1656 			   "sockets",
1657 			   "memory",
1658 			   "press",
1659 			   "maxhdr",
1660 			   "slab",
1661 			   "module",
1662 			   "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
1663 	else
1664 		proto_seq_printf(seq, v);
1665 	return 0;
1666 }
1667 
1668 static struct seq_operations proto_seq_ops = {
1669 	.start  = proto_seq_start,
1670 	.next   = proto_seq_next,
1671 	.stop   = proto_seq_stop,
1672 	.show   = proto_seq_show,
1673 };
1674 
1675 static int proto_seq_open(struct inode *inode, struct file *file)
1676 {
1677 	return seq_open(file, &proto_seq_ops);
1678 }
1679 
1680 static struct file_operations proto_seq_fops = {
1681 	.owner		= THIS_MODULE,
1682 	.open		= proto_seq_open,
1683 	.read		= seq_read,
1684 	.llseek		= seq_lseek,
1685 	.release	= seq_release,
1686 };
1687 
1688 static int __init proto_init(void)
1689 {
1690 	/* register /proc/net/protocols */
1691 	return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
1692 }
1693 
1694 subsys_initcall(proto_init);
1695 
1696 #endif /* PROC_FS */
1697 
1698 EXPORT_SYMBOL(sk_alloc);
1699 EXPORT_SYMBOL(sk_free);
1700 EXPORT_SYMBOL(sk_send_sigurg);
1701 EXPORT_SYMBOL(sock_alloc_send_skb);
1702 EXPORT_SYMBOL(sock_init_data);
1703 EXPORT_SYMBOL(sock_kfree_s);
1704 EXPORT_SYMBOL(sock_kmalloc);
1705 EXPORT_SYMBOL(sock_no_accept);
1706 EXPORT_SYMBOL(sock_no_bind);
1707 EXPORT_SYMBOL(sock_no_connect);
1708 EXPORT_SYMBOL(sock_no_getname);
1709 EXPORT_SYMBOL(sock_no_getsockopt);
1710 EXPORT_SYMBOL(sock_no_ioctl);
1711 EXPORT_SYMBOL(sock_no_listen);
1712 EXPORT_SYMBOL(sock_no_mmap);
1713 EXPORT_SYMBOL(sock_no_poll);
1714 EXPORT_SYMBOL(sock_no_recvmsg);
1715 EXPORT_SYMBOL(sock_no_sendmsg);
1716 EXPORT_SYMBOL(sock_no_sendpage);
1717 EXPORT_SYMBOL(sock_no_setsockopt);
1718 EXPORT_SYMBOL(sock_no_shutdown);
1719 EXPORT_SYMBOL(sock_no_socketpair);
1720 EXPORT_SYMBOL(sock_rfree);
1721 EXPORT_SYMBOL(sock_setsockopt);
1722 EXPORT_SYMBOL(sock_wfree);
1723 EXPORT_SYMBOL(sock_wmalloc);
1724 EXPORT_SYMBOL(sock_i_uid);
1725 EXPORT_SYMBOL(sock_i_ino);
1726 EXPORT_SYMBOL(sysctl_optmem_max);
1727 #ifdef CONFIG_SYSCTL
1728 EXPORT_SYMBOL(sysctl_rmem_max);
1729 EXPORT_SYMBOL(sysctl_wmem_max);
1730 #endif
1731