xref: /linux/net/core/sock.c (revision 36ca1195ad7f760a6af3814cb002bd3a3d4b4db1)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Generic socket support routines. Memory allocators, socket lock/release
7  *		handler for protocols to use and generic option handler.
8  *
9  *
10  * Version:	$Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
11  *
12  * Authors:	Ross Biro
13  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *		Florian La Roche, <flla@stud.uni-sb.de>
15  *		Alan Cox, <A.Cox@swansea.ac.uk>
16  *
17  * Fixes:
18  *		Alan Cox	: 	Numerous verify_area() problems
19  *		Alan Cox	:	Connecting on a connecting socket
20  *					now returns an error for tcp.
21  *		Alan Cox	:	sock->protocol is set correctly.
22  *					and is not sometimes left as 0.
23  *		Alan Cox	:	connect handles icmp errors on a
24  *					connect properly. Unfortunately there
25  *					is a restart syscall nasty there. I
26  *					can't match BSD without hacking the C
27  *					library. Ideas urgently sought!
28  *		Alan Cox	:	Disallow bind() to addresses that are
29  *					not ours - especially broadcast ones!!
30  *		Alan Cox	:	Socket 1024 _IS_ ok for users. (fencepost)
31  *		Alan Cox	:	sock_wfree/sock_rfree don't destroy sockets,
32  *					instead they leave that for the DESTROY timer.
33  *		Alan Cox	:	Clean up error flag in accept
34  *		Alan Cox	:	TCP ack handling is buggy, the DESTROY timer
35  *					was buggy. Put a remove_sock() in the handler
36  *					for memory when we hit 0. Also altered the timer
37  *					code. The ACK stuff can wait and needs major
38  *					TCP layer surgery.
39  *		Alan Cox	:	Fixed TCP ack bug, removed remove sock
40  *					and fixed timer/inet_bh race.
41  *		Alan Cox	:	Added zapped flag for TCP
42  *		Alan Cox	:	Move kfree_skb into skbuff.c and tidied up surplus code
43  *		Alan Cox	:	for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44  *		Alan Cox	:	kfree_s calls now are kfree_skbmem so we can track skb resources
45  *		Alan Cox	:	Supports socket option broadcast now as does udp. Packet and raw need fixing.
46  *		Alan Cox	:	Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47  *		Rick Sladkey	:	Relaxed UDP rules for matching packets.
48  *		C.E.Hawkins	:	IFF_PROMISC/SIOCGHWADDR support
49  *	Pauline Middelink	:	identd support
50  *		Alan Cox	:	Fixed connect() taking signals I think.
51  *		Alan Cox	:	SO_LINGER supported
52  *		Alan Cox	:	Error reporting fixes
53  *		Anonymous	:	inet_create tidied up (sk->reuse setting)
54  *		Alan Cox	:	inet sockets don't set sk->type!
55  *		Alan Cox	:	Split socket option code
56  *		Alan Cox	:	Callbacks
57  *		Alan Cox	:	Nagle flag for Charles & Johannes stuff
58  *		Alex		:	Removed restriction on inet fioctl
59  *		Alan Cox	:	Splitting INET from NET core
60  *		Alan Cox	:	Fixed bogus SO_TYPE handling in getsockopt()
61  *		Adam Caldwell	:	Missing return in SO_DONTROUTE/SO_DEBUG code
62  *		Alan Cox	:	Split IP from generic code
63  *		Alan Cox	:	New kfree_skbmem()
64  *		Alan Cox	:	Make SO_DEBUG superuser only.
65  *		Alan Cox	:	Allow anyone to clear SO_DEBUG
66  *					(compatibility fix)
67  *		Alan Cox	:	Added optimistic memory grabbing for AF_UNIX throughput.
68  *		Alan Cox	:	Allocator for a socket is settable.
69  *		Alan Cox	:	SO_ERROR includes soft errors.
70  *		Alan Cox	:	Allow NULL arguments on some SO_ opts
71  *		Alan Cox	: 	Generic socket allocation to make hooks
72  *					easier (suggested by Craig Metz).
73  *		Michael Pall	:	SO_ERROR returns positive errno again
74  *              Steve Whitehouse:       Added default destructor to free
75  *                                      protocol private data.
76  *              Steve Whitehouse:       Added various other default routines
77  *                                      common to several socket families.
78  *              Chris Evans     :       Call suser() check last on F_SETOWN
79  *		Jay Schulist	:	Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
80  *		Andi Kleen	:	Add sock_kmalloc()/sock_kfree_s()
81  *		Andi Kleen	:	Fix write_space callback
82  *		Chris Evans	:	Security fixes - signedness again
83  *		Arnaldo C. Melo :       cleanups, use skb_queue_purge
84  *
85  * To Fix:
86  *
87  *
88  *		This program is free software; you can redistribute it and/or
89  *		modify it under the terms of the GNU General Public License
90  *		as published by the Free Software Foundation; either version
91  *		2 of the License, or (at your option) any later version.
92  */
93 
94 #include <linux/config.h>
95 #include <linux/errno.h>
96 #include <linux/types.h>
97 #include <linux/socket.h>
98 #include <linux/in.h>
99 #include <linux/kernel.h>
100 #include <linux/module.h>
101 #include <linux/proc_fs.h>
102 #include <linux/seq_file.h>
103 #include <linux/sched.h>
104 #include <linux/timer.h>
105 #include <linux/string.h>
106 #include <linux/sockios.h>
107 #include <linux/net.h>
108 #include <linux/mm.h>
109 #include <linux/slab.h>
110 #include <linux/interrupt.h>
111 #include <linux/poll.h>
112 #include <linux/tcp.h>
113 #include <linux/init.h>
114 
115 #include <asm/uaccess.h>
116 #include <asm/system.h>
117 
118 #include <linux/netdevice.h>
119 #include <net/protocol.h>
120 #include <linux/skbuff.h>
121 #include <net/sock.h>
122 #include <net/xfrm.h>
123 #include <linux/ipsec.h>
124 
125 #include <linux/filter.h>
126 
127 #ifdef CONFIG_INET
128 #include <net/tcp.h>
129 #endif
130 
131 /* Take into consideration the size of the struct sk_buff overhead in the
132  * determination of these values, since that is non-constant across
133  * platforms.  This makes socket queueing behavior and performance
134  * not depend upon such differences.
135  */
136 #define _SK_MEM_PACKETS		256
137 #define _SK_MEM_OVERHEAD	(sizeof(struct sk_buff) + 256)
138 #define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
139 #define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
140 
141 /* Run time adjustable parameters. */
142 __u32 sysctl_wmem_max = SK_WMEM_MAX;
143 __u32 sysctl_rmem_max = SK_RMEM_MAX;
144 __u32 sysctl_wmem_default = SK_WMEM_MAX;
145 __u32 sysctl_rmem_default = SK_RMEM_MAX;
146 
147 /* Maximal space eaten by iovec or ancilliary data plus some space */
148 int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512);
149 
150 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
151 {
152 	struct timeval tv;
153 
154 	if (optlen < sizeof(tv))
155 		return -EINVAL;
156 	if (copy_from_user(&tv, optval, sizeof(tv)))
157 		return -EFAULT;
158 
159 	*timeo_p = MAX_SCHEDULE_TIMEOUT;
160 	if (tv.tv_sec == 0 && tv.tv_usec == 0)
161 		return 0;
162 	if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
163 		*timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
164 	return 0;
165 }
166 
167 static void sock_warn_obsolete_bsdism(const char *name)
168 {
169 	static int warned;
170 	static char warncomm[TASK_COMM_LEN];
171 	if (strcmp(warncomm, current->comm) && warned < 5) {
172 		strcpy(warncomm,  current->comm);
173 		printk(KERN_WARNING "process `%s' is using obsolete "
174 		       "%s SO_BSDCOMPAT\n", warncomm, name);
175 		warned++;
176 	}
177 }
178 
179 static void sock_disable_timestamp(struct sock *sk)
180 {
181 	if (sock_flag(sk, SOCK_TIMESTAMP)) {
182 		sock_reset_flag(sk, SOCK_TIMESTAMP);
183 		net_disable_timestamp();
184 	}
185 }
186 
187 
188 /*
189  *	This is meant for all protocols to use and covers goings on
190  *	at the socket level. Everything here is generic.
191  */
192 
193 int sock_setsockopt(struct socket *sock, int level, int optname,
194 		    char __user *optval, int optlen)
195 {
196 	struct sock *sk=sock->sk;
197 	struct sk_filter *filter;
198 	int val;
199 	int valbool;
200 	struct linger ling;
201 	int ret = 0;
202 
203 	/*
204 	 *	Options without arguments
205 	 */
206 
207 #ifdef SO_DONTLINGER		/* Compatibility item... */
208 	switch (optname) {
209 		case SO_DONTLINGER:
210 			sock_reset_flag(sk, SOCK_LINGER);
211 			return 0;
212 	}
213 #endif
214 
215   	if(optlen<sizeof(int))
216   		return(-EINVAL);
217 
218 	if (get_user(val, (int __user *)optval))
219 		return -EFAULT;
220 
221   	valbool = val?1:0;
222 
223 	lock_sock(sk);
224 
225   	switch(optname)
226   	{
227 		case SO_DEBUG:
228 			if(val && !capable(CAP_NET_ADMIN))
229 			{
230 				ret = -EACCES;
231 			}
232 			else if (valbool)
233 				sock_set_flag(sk, SOCK_DBG);
234 			else
235 				sock_reset_flag(sk, SOCK_DBG);
236 			break;
237 		case SO_REUSEADDR:
238 			sk->sk_reuse = valbool;
239 			break;
240 		case SO_TYPE:
241 		case SO_ERROR:
242 			ret = -ENOPROTOOPT;
243 		  	break;
244 		case SO_DONTROUTE:
245 			if (valbool)
246 				sock_set_flag(sk, SOCK_LOCALROUTE);
247 			else
248 				sock_reset_flag(sk, SOCK_LOCALROUTE);
249 			break;
250 		case SO_BROADCAST:
251 			sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
252 			break;
253 		case SO_SNDBUF:
254 			/* Don't error on this BSD doesn't and if you think
255 			   about it this is right. Otherwise apps have to
256 			   play 'guess the biggest size' games. RCVBUF/SNDBUF
257 			   are treated in BSD as hints */
258 
259 			if (val > sysctl_wmem_max)
260 				val = sysctl_wmem_max;
261 
262 			sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
263 			if ((val * 2) < SOCK_MIN_SNDBUF)
264 				sk->sk_sndbuf = SOCK_MIN_SNDBUF;
265 			else
266 				sk->sk_sndbuf = val * 2;
267 
268 			/*
269 			 *	Wake up sending tasks if we
270 			 *	upped the value.
271 			 */
272 			sk->sk_write_space(sk);
273 			break;
274 
275 		case SO_RCVBUF:
276 			/* Don't error on this BSD doesn't and if you think
277 			   about it this is right. Otherwise apps have to
278 			   play 'guess the biggest size' games. RCVBUF/SNDBUF
279 			   are treated in BSD as hints */
280 
281 			if (val > sysctl_rmem_max)
282 				val = sysctl_rmem_max;
283 
284 			sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
285 			/* FIXME: is this lower bound the right one? */
286 			if ((val * 2) < SOCK_MIN_RCVBUF)
287 				sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
288 			else
289 				sk->sk_rcvbuf = val * 2;
290 			break;
291 
292 		case SO_KEEPALIVE:
293 #ifdef CONFIG_INET
294 			if (sk->sk_protocol == IPPROTO_TCP)
295 				tcp_set_keepalive(sk, valbool);
296 #endif
297 			sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
298 			break;
299 
300 	 	case SO_OOBINLINE:
301 			sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
302 			break;
303 
304 	 	case SO_NO_CHECK:
305 			sk->sk_no_check = valbool;
306 			break;
307 
308 		case SO_PRIORITY:
309 			if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
310 				sk->sk_priority = val;
311 			else
312 				ret = -EPERM;
313 			break;
314 
315 		case SO_LINGER:
316 			if(optlen<sizeof(ling)) {
317 				ret = -EINVAL;	/* 1003.1g */
318 				break;
319 			}
320 			if (copy_from_user(&ling,optval,sizeof(ling))) {
321 				ret = -EFAULT;
322 				break;
323 			}
324 			if (!ling.l_onoff)
325 				sock_reset_flag(sk, SOCK_LINGER);
326 			else {
327 #if (BITS_PER_LONG == 32)
328 				if (ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
329 					sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
330 				else
331 #endif
332 					sk->sk_lingertime = ling.l_linger * HZ;
333 				sock_set_flag(sk, SOCK_LINGER);
334 			}
335 			break;
336 
337 		case SO_BSDCOMPAT:
338 			sock_warn_obsolete_bsdism("setsockopt");
339 			break;
340 
341 		case SO_PASSCRED:
342 			if (valbool)
343 				set_bit(SOCK_PASSCRED, &sock->flags);
344 			else
345 				clear_bit(SOCK_PASSCRED, &sock->flags);
346 			break;
347 
348 		case SO_TIMESTAMP:
349 			if (valbool)  {
350 				sock_set_flag(sk, SOCK_RCVTSTAMP);
351 				sock_enable_timestamp(sk);
352 			} else
353 				sock_reset_flag(sk, SOCK_RCVTSTAMP);
354 			break;
355 
356 		case SO_RCVLOWAT:
357 			if (val < 0)
358 				val = INT_MAX;
359 			sk->sk_rcvlowat = val ? : 1;
360 			break;
361 
362 		case SO_RCVTIMEO:
363 			ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
364 			break;
365 
366 		case SO_SNDTIMEO:
367 			ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
368 			break;
369 
370 #ifdef CONFIG_NETDEVICES
371 		case SO_BINDTODEVICE:
372 		{
373 			char devname[IFNAMSIZ];
374 
375 			/* Sorry... */
376 			if (!capable(CAP_NET_RAW)) {
377 				ret = -EPERM;
378 				break;
379 			}
380 
381 			/* Bind this socket to a particular device like "eth0",
382 			 * as specified in the passed interface name. If the
383 			 * name is "" or the option length is zero the socket
384 			 * is not bound.
385 			 */
386 
387 			if (!valbool) {
388 				sk->sk_bound_dev_if = 0;
389 			} else {
390 				if (optlen > IFNAMSIZ)
391 					optlen = IFNAMSIZ;
392 				if (copy_from_user(devname, optval, optlen)) {
393 					ret = -EFAULT;
394 					break;
395 				}
396 
397 				/* Remove any cached route for this socket. */
398 				sk_dst_reset(sk);
399 
400 				if (devname[0] == '\0') {
401 					sk->sk_bound_dev_if = 0;
402 				} else {
403 					struct net_device *dev = dev_get_by_name(devname);
404 					if (!dev) {
405 						ret = -ENODEV;
406 						break;
407 					}
408 					sk->sk_bound_dev_if = dev->ifindex;
409 					dev_put(dev);
410 				}
411 			}
412 			break;
413 		}
414 #endif
415 
416 
417 		case SO_ATTACH_FILTER:
418 			ret = -EINVAL;
419 			if (optlen == sizeof(struct sock_fprog)) {
420 				struct sock_fprog fprog;
421 
422 				ret = -EFAULT;
423 				if (copy_from_user(&fprog, optval, sizeof(fprog)))
424 					break;
425 
426 				ret = sk_attach_filter(&fprog, sk);
427 			}
428 			break;
429 
430 		case SO_DETACH_FILTER:
431 			spin_lock_bh(&sk->sk_lock.slock);
432 			filter = sk->sk_filter;
433                         if (filter) {
434 				sk->sk_filter = NULL;
435 				spin_unlock_bh(&sk->sk_lock.slock);
436 				sk_filter_release(sk, filter);
437 				break;
438 			}
439 			spin_unlock_bh(&sk->sk_lock.slock);
440 			ret = -ENONET;
441 			break;
442 
443 		/* We implement the SO_SNDLOWAT etc to
444 		   not be settable (1003.1g 5.3) */
445 		default:
446 		  	ret = -ENOPROTOOPT;
447 			break;
448   	}
449 	release_sock(sk);
450 	return ret;
451 }
452 
453 
454 int sock_getsockopt(struct socket *sock, int level, int optname,
455 		    char __user *optval, int __user *optlen)
456 {
457 	struct sock *sk = sock->sk;
458 
459 	union
460 	{
461   		int val;
462   		struct linger ling;
463 		struct timeval tm;
464 	} v;
465 
466 	unsigned int lv = sizeof(int);
467 	int len;
468 
469   	if(get_user(len,optlen))
470   		return -EFAULT;
471 	if(len < 0)
472 		return -EINVAL;
473 
474   	switch(optname)
475   	{
476 		case SO_DEBUG:
477 			v.val = sock_flag(sk, SOCK_DBG);
478 			break;
479 
480 		case SO_DONTROUTE:
481 			v.val = sock_flag(sk, SOCK_LOCALROUTE);
482 			break;
483 
484 		case SO_BROADCAST:
485 			v.val = !!sock_flag(sk, SOCK_BROADCAST);
486 			break;
487 
488 		case SO_SNDBUF:
489 			v.val = sk->sk_sndbuf;
490 			break;
491 
492 		case SO_RCVBUF:
493 			v.val = sk->sk_rcvbuf;
494 			break;
495 
496 		case SO_REUSEADDR:
497 			v.val = sk->sk_reuse;
498 			break;
499 
500 		case SO_KEEPALIVE:
501 			v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
502 			break;
503 
504 		case SO_TYPE:
505 			v.val = sk->sk_type;
506 			break;
507 
508 		case SO_ERROR:
509 			v.val = -sock_error(sk);
510 			if(v.val==0)
511 				v.val = xchg(&sk->sk_err_soft, 0);
512 			break;
513 
514 		case SO_OOBINLINE:
515 			v.val = !!sock_flag(sk, SOCK_URGINLINE);
516 			break;
517 
518 		case SO_NO_CHECK:
519 			v.val = sk->sk_no_check;
520 			break;
521 
522 		case SO_PRIORITY:
523 			v.val = sk->sk_priority;
524 			break;
525 
526 		case SO_LINGER:
527 			lv		= sizeof(v.ling);
528 			v.ling.l_onoff	= !!sock_flag(sk, SOCK_LINGER);
529  			v.ling.l_linger	= sk->sk_lingertime / HZ;
530 			break;
531 
532 		case SO_BSDCOMPAT:
533 			sock_warn_obsolete_bsdism("getsockopt");
534 			break;
535 
536 		case SO_TIMESTAMP:
537 			v.val = sock_flag(sk, SOCK_RCVTSTAMP);
538 			break;
539 
540 		case SO_RCVTIMEO:
541 			lv=sizeof(struct timeval);
542 			if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
543 				v.tm.tv_sec = 0;
544 				v.tm.tv_usec = 0;
545 			} else {
546 				v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
547 				v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
548 			}
549 			break;
550 
551 		case SO_SNDTIMEO:
552 			lv=sizeof(struct timeval);
553 			if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
554 				v.tm.tv_sec = 0;
555 				v.tm.tv_usec = 0;
556 			} else {
557 				v.tm.tv_sec = sk->sk_sndtimeo / HZ;
558 				v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
559 			}
560 			break;
561 
562 		case SO_RCVLOWAT:
563 			v.val = sk->sk_rcvlowat;
564 			break;
565 
566 		case SO_SNDLOWAT:
567 			v.val=1;
568 			break;
569 
570 		case SO_PASSCRED:
571 			v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
572 			break;
573 
574 		case SO_PEERCRED:
575 			if (len > sizeof(sk->sk_peercred))
576 				len = sizeof(sk->sk_peercred);
577 			if (copy_to_user(optval, &sk->sk_peercred, len))
578 				return -EFAULT;
579 			goto lenout;
580 
581 		case SO_PEERNAME:
582 		{
583 			char address[128];
584 
585 			if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
586 				return -ENOTCONN;
587 			if (lv < len)
588 				return -EINVAL;
589 			if (copy_to_user(optval, address, len))
590 				return -EFAULT;
591 			goto lenout;
592 		}
593 
594 		/* Dubious BSD thing... Probably nobody even uses it, but
595 		 * the UNIX standard wants it for whatever reason... -DaveM
596 		 */
597 		case SO_ACCEPTCONN:
598 			v.val = sk->sk_state == TCP_LISTEN;
599 			break;
600 
601 		case SO_PEERSEC:
602 			return security_socket_getpeersec(sock, optval, optlen, len);
603 
604 		default:
605 			return(-ENOPROTOOPT);
606 	}
607 	if (len > lv)
608 		len = lv;
609 	if (copy_to_user(optval, &v, len))
610 		return -EFAULT;
611 lenout:
612   	if (put_user(len, optlen))
613   		return -EFAULT;
614   	return 0;
615 }
616 
617 /**
618  *	sk_alloc - All socket objects are allocated here
619  *	@family: protocol family
620  *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
621  *	@prot: struct proto associated with this new sock instance
622  *	@zero_it: if we should zero the newly allocated sock
623  */
624 struct sock *sk_alloc(int family, int priority, struct proto *prot, int zero_it)
625 {
626 	struct sock *sk = NULL;
627 	kmem_cache_t *slab = prot->slab;
628 
629 	if (slab != NULL)
630 		sk = kmem_cache_alloc(slab, priority);
631 	else
632 		sk = kmalloc(prot->obj_size, priority);
633 
634 	if (sk) {
635 		if (zero_it) {
636 			memset(sk, 0, prot->obj_size);
637 			sk->sk_family = family;
638 			/*
639 			 * See comment in struct sock definition to understand
640 			 * why we need sk_prot_creator -acme
641 			 */
642 			sk->sk_prot = sk->sk_prot_creator = prot;
643 			sock_lock_init(sk);
644 		}
645 
646 		if (security_sk_alloc(sk, family, priority)) {
647 			if (slab != NULL)
648 				kmem_cache_free(slab, sk);
649 			else
650 				kfree(sk);
651 			sk = NULL;
652 		} else
653 			__module_get(prot->owner);
654 	}
655 	return sk;
656 }
657 
658 void sk_free(struct sock *sk)
659 {
660 	struct sk_filter *filter;
661 	struct module *owner = sk->sk_prot_creator->owner;
662 
663 	if (sk->sk_destruct)
664 		sk->sk_destruct(sk);
665 
666 	filter = sk->sk_filter;
667 	if (filter) {
668 		sk_filter_release(sk, filter);
669 		sk->sk_filter = NULL;
670 	}
671 
672 	sock_disable_timestamp(sk);
673 
674 	if (atomic_read(&sk->sk_omem_alloc))
675 		printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
676 		       __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
677 
678 	security_sk_free(sk);
679 	if (sk->sk_prot_creator->slab != NULL)
680 		kmem_cache_free(sk->sk_prot_creator->slab, sk);
681 	else
682 		kfree(sk);
683 	module_put(owner);
684 }
685 
686 void __init sk_init(void)
687 {
688 	if (num_physpages <= 4096) {
689 		sysctl_wmem_max = 32767;
690 		sysctl_rmem_max = 32767;
691 		sysctl_wmem_default = 32767;
692 		sysctl_rmem_default = 32767;
693 	} else if (num_physpages >= 131072) {
694 		sysctl_wmem_max = 131071;
695 		sysctl_rmem_max = 131071;
696 	}
697 }
698 
699 /*
700  *	Simple resource managers for sockets.
701  */
702 
703 
704 /*
705  * Write buffer destructor automatically called from kfree_skb.
706  */
707 void sock_wfree(struct sk_buff *skb)
708 {
709 	struct sock *sk = skb->sk;
710 
711 	/* In case it might be waiting for more memory. */
712 	atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
713 	if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
714 		sk->sk_write_space(sk);
715 	sock_put(sk);
716 }
717 
718 /*
719  * Read buffer destructor automatically called from kfree_skb.
720  */
721 void sock_rfree(struct sk_buff *skb)
722 {
723 	struct sock *sk = skb->sk;
724 
725 	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
726 }
727 
728 
729 int sock_i_uid(struct sock *sk)
730 {
731 	int uid;
732 
733 	read_lock(&sk->sk_callback_lock);
734 	uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
735 	read_unlock(&sk->sk_callback_lock);
736 	return uid;
737 }
738 
739 unsigned long sock_i_ino(struct sock *sk)
740 {
741 	unsigned long ino;
742 
743 	read_lock(&sk->sk_callback_lock);
744 	ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
745 	read_unlock(&sk->sk_callback_lock);
746 	return ino;
747 }
748 
749 /*
750  * Allocate a skb from the socket's send buffer.
751  */
752 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int priority)
753 {
754 	if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
755 		struct sk_buff * skb = alloc_skb(size, priority);
756 		if (skb) {
757 			skb_set_owner_w(skb, sk);
758 			return skb;
759 		}
760 	}
761 	return NULL;
762 }
763 
764 /*
765  * Allocate a skb from the socket's receive buffer.
766  */
767 struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority)
768 {
769 	if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
770 		struct sk_buff *skb = alloc_skb(size, priority);
771 		if (skb) {
772 			skb_set_owner_r(skb, sk);
773 			return skb;
774 		}
775 	}
776 	return NULL;
777 }
778 
779 /*
780  * Allocate a memory block from the socket's option memory buffer.
781  */
782 void *sock_kmalloc(struct sock *sk, int size, int priority)
783 {
784 	if ((unsigned)size <= sysctl_optmem_max &&
785 	    atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
786 		void *mem;
787 		/* First do the add, to avoid the race if kmalloc
788  		 * might sleep.
789 		 */
790 		atomic_add(size, &sk->sk_omem_alloc);
791 		mem = kmalloc(size, priority);
792 		if (mem)
793 			return mem;
794 		atomic_sub(size, &sk->sk_omem_alloc);
795 	}
796 	return NULL;
797 }
798 
799 /*
800  * Free an option memory block.
801  */
802 void sock_kfree_s(struct sock *sk, void *mem, int size)
803 {
804 	kfree(mem);
805 	atomic_sub(size, &sk->sk_omem_alloc);
806 }
807 
808 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
809    I think, these locks should be removed for datagram sockets.
810  */
811 static long sock_wait_for_wmem(struct sock * sk, long timeo)
812 {
813 	DEFINE_WAIT(wait);
814 
815 	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
816 	for (;;) {
817 		if (!timeo)
818 			break;
819 		if (signal_pending(current))
820 			break;
821 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
822 		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
823 		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
824 			break;
825 		if (sk->sk_shutdown & SEND_SHUTDOWN)
826 			break;
827 		if (sk->sk_err)
828 			break;
829 		timeo = schedule_timeout(timeo);
830 	}
831 	finish_wait(sk->sk_sleep, &wait);
832 	return timeo;
833 }
834 
835 
836 /*
837  *	Generic send/receive buffer handlers
838  */
839 
840 static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
841 					    unsigned long header_len,
842 					    unsigned long data_len,
843 					    int noblock, int *errcode)
844 {
845 	struct sk_buff *skb;
846 	unsigned int gfp_mask;
847 	long timeo;
848 	int err;
849 
850 	gfp_mask = sk->sk_allocation;
851 	if (gfp_mask & __GFP_WAIT)
852 		gfp_mask |= __GFP_REPEAT;
853 
854 	timeo = sock_sndtimeo(sk, noblock);
855 	while (1) {
856 		err = sock_error(sk);
857 		if (err != 0)
858 			goto failure;
859 
860 		err = -EPIPE;
861 		if (sk->sk_shutdown & SEND_SHUTDOWN)
862 			goto failure;
863 
864 		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
865 			skb = alloc_skb(header_len, sk->sk_allocation);
866 			if (skb) {
867 				int npages;
868 				int i;
869 
870 				/* No pages, we're done... */
871 				if (!data_len)
872 					break;
873 
874 				npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
875 				skb->truesize += data_len;
876 				skb_shinfo(skb)->nr_frags = npages;
877 				for (i = 0; i < npages; i++) {
878 					struct page *page;
879 					skb_frag_t *frag;
880 
881 					page = alloc_pages(sk->sk_allocation, 0);
882 					if (!page) {
883 						err = -ENOBUFS;
884 						skb_shinfo(skb)->nr_frags = i;
885 						kfree_skb(skb);
886 						goto failure;
887 					}
888 
889 					frag = &skb_shinfo(skb)->frags[i];
890 					frag->page = page;
891 					frag->page_offset = 0;
892 					frag->size = (data_len >= PAGE_SIZE ?
893 						      PAGE_SIZE :
894 						      data_len);
895 					data_len -= PAGE_SIZE;
896 				}
897 
898 				/* Full success... */
899 				break;
900 			}
901 			err = -ENOBUFS;
902 			goto failure;
903 		}
904 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
905 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
906 		err = -EAGAIN;
907 		if (!timeo)
908 			goto failure;
909 		if (signal_pending(current))
910 			goto interrupted;
911 		timeo = sock_wait_for_wmem(sk, timeo);
912 	}
913 
914 	skb_set_owner_w(skb, sk);
915 	return skb;
916 
917 interrupted:
918 	err = sock_intr_errno(timeo);
919 failure:
920 	*errcode = err;
921 	return NULL;
922 }
923 
924 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
925 				    int noblock, int *errcode)
926 {
927 	return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
928 }
929 
930 static void __lock_sock(struct sock *sk)
931 {
932 	DEFINE_WAIT(wait);
933 
934 	for(;;) {
935 		prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
936 					TASK_UNINTERRUPTIBLE);
937 		spin_unlock_bh(&sk->sk_lock.slock);
938 		schedule();
939 		spin_lock_bh(&sk->sk_lock.slock);
940 		if(!sock_owned_by_user(sk))
941 			break;
942 	}
943 	finish_wait(&sk->sk_lock.wq, &wait);
944 }
945 
946 static void __release_sock(struct sock *sk)
947 {
948 	struct sk_buff *skb = sk->sk_backlog.head;
949 
950 	do {
951 		sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
952 		bh_unlock_sock(sk);
953 
954 		do {
955 			struct sk_buff *next = skb->next;
956 
957 			skb->next = NULL;
958 			sk->sk_backlog_rcv(sk, skb);
959 
960 			/*
961 			 * We are in process context here with softirqs
962 			 * disabled, use cond_resched_softirq() to preempt.
963 			 * This is safe to do because we've taken the backlog
964 			 * queue private:
965 			 */
966 			cond_resched_softirq();
967 
968 			skb = next;
969 		} while (skb != NULL);
970 
971 		bh_lock_sock(sk);
972 	} while((skb = sk->sk_backlog.head) != NULL);
973 }
974 
975 /**
976  * sk_wait_data - wait for data to arrive at sk_receive_queue
977  * @sk:    sock to wait on
978  * @timeo: for how long
979  *
980  * Now socket state including sk->sk_err is changed only under lock,
981  * hence we may omit checks after joining wait queue.
982  * We check receive queue before schedule() only as optimization;
983  * it is very likely that release_sock() added new data.
984  */
985 int sk_wait_data(struct sock *sk, long *timeo)
986 {
987 	int rc;
988 	DEFINE_WAIT(wait);
989 
990 	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
991 	set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
992 	rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
993 	clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
994 	finish_wait(sk->sk_sleep, &wait);
995 	return rc;
996 }
997 
998 EXPORT_SYMBOL(sk_wait_data);
999 
1000 /*
1001  * Set of default routines for initialising struct proto_ops when
1002  * the protocol does not support a particular function. In certain
1003  * cases where it makes no sense for a protocol to have a "do nothing"
1004  * function, some default processing is provided.
1005  */
1006 
1007 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1008 {
1009 	return -EOPNOTSUPP;
1010 }
1011 
1012 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1013 		    int len, int flags)
1014 {
1015 	return -EOPNOTSUPP;
1016 }
1017 
1018 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1019 {
1020 	return -EOPNOTSUPP;
1021 }
1022 
1023 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1024 {
1025 	return -EOPNOTSUPP;
1026 }
1027 
1028 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1029 		    int *len, int peer)
1030 {
1031 	return -EOPNOTSUPP;
1032 }
1033 
1034 unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
1035 {
1036 	return 0;
1037 }
1038 
1039 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1040 {
1041 	return -EOPNOTSUPP;
1042 }
1043 
1044 int sock_no_listen(struct socket *sock, int backlog)
1045 {
1046 	return -EOPNOTSUPP;
1047 }
1048 
1049 int sock_no_shutdown(struct socket *sock, int how)
1050 {
1051 	return -EOPNOTSUPP;
1052 }
1053 
1054 int sock_no_setsockopt(struct socket *sock, int level, int optname,
1055 		    char __user *optval, int optlen)
1056 {
1057 	return -EOPNOTSUPP;
1058 }
1059 
1060 int sock_no_getsockopt(struct socket *sock, int level, int optname,
1061 		    char __user *optval, int __user *optlen)
1062 {
1063 	return -EOPNOTSUPP;
1064 }
1065 
1066 int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1067 		    size_t len)
1068 {
1069 	return -EOPNOTSUPP;
1070 }
1071 
1072 int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1073 		    size_t len, int flags)
1074 {
1075 	return -EOPNOTSUPP;
1076 }
1077 
1078 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1079 {
1080 	/* Mirror missing mmap method error code */
1081 	return -ENODEV;
1082 }
1083 
1084 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1085 {
1086 	ssize_t res;
1087 	struct msghdr msg = {.msg_flags = flags};
1088 	struct kvec iov;
1089 	char *kaddr = kmap(page);
1090 	iov.iov_base = kaddr + offset;
1091 	iov.iov_len = size;
1092 	res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1093 	kunmap(page);
1094 	return res;
1095 }
1096 
1097 /*
1098  *	Default Socket Callbacks
1099  */
1100 
1101 static void sock_def_wakeup(struct sock *sk)
1102 {
1103 	read_lock(&sk->sk_callback_lock);
1104 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1105 		wake_up_interruptible_all(sk->sk_sleep);
1106 	read_unlock(&sk->sk_callback_lock);
1107 }
1108 
1109 static void sock_def_error_report(struct sock *sk)
1110 {
1111 	read_lock(&sk->sk_callback_lock);
1112 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1113 		wake_up_interruptible(sk->sk_sleep);
1114 	sk_wake_async(sk,0,POLL_ERR);
1115 	read_unlock(&sk->sk_callback_lock);
1116 }
1117 
1118 static void sock_def_readable(struct sock *sk, int len)
1119 {
1120 	read_lock(&sk->sk_callback_lock);
1121 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1122 		wake_up_interruptible(sk->sk_sleep);
1123 	sk_wake_async(sk,1,POLL_IN);
1124 	read_unlock(&sk->sk_callback_lock);
1125 }
1126 
1127 static void sock_def_write_space(struct sock *sk)
1128 {
1129 	read_lock(&sk->sk_callback_lock);
1130 
1131 	/* Do not wake up a writer until he can make "significant"
1132 	 * progress.  --DaveM
1133 	 */
1134 	if((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1135 		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1136 			wake_up_interruptible(sk->sk_sleep);
1137 
1138 		/* Should agree with poll, otherwise some programs break */
1139 		if (sock_writeable(sk))
1140 			sk_wake_async(sk, 2, POLL_OUT);
1141 	}
1142 
1143 	read_unlock(&sk->sk_callback_lock);
1144 }
1145 
1146 static void sock_def_destruct(struct sock *sk)
1147 {
1148 	if (sk->sk_protinfo)
1149 		kfree(sk->sk_protinfo);
1150 }
1151 
1152 void sk_send_sigurg(struct sock *sk)
1153 {
1154 	if (sk->sk_socket && sk->sk_socket->file)
1155 		if (send_sigurg(&sk->sk_socket->file->f_owner))
1156 			sk_wake_async(sk, 3, POLL_PRI);
1157 }
1158 
1159 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1160 		    unsigned long expires)
1161 {
1162 	if (!mod_timer(timer, expires))
1163 		sock_hold(sk);
1164 }
1165 
1166 EXPORT_SYMBOL(sk_reset_timer);
1167 
1168 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1169 {
1170 	if (timer_pending(timer) && del_timer(timer))
1171 		__sock_put(sk);
1172 }
1173 
1174 EXPORT_SYMBOL(sk_stop_timer);
1175 
1176 void sock_init_data(struct socket *sock, struct sock *sk)
1177 {
1178 	skb_queue_head_init(&sk->sk_receive_queue);
1179 	skb_queue_head_init(&sk->sk_write_queue);
1180 	skb_queue_head_init(&sk->sk_error_queue);
1181 
1182 	sk->sk_send_head	=	NULL;
1183 
1184 	init_timer(&sk->sk_timer);
1185 
1186 	sk->sk_allocation	=	GFP_KERNEL;
1187 	sk->sk_rcvbuf		=	sysctl_rmem_default;
1188 	sk->sk_sndbuf		=	sysctl_wmem_default;
1189 	sk->sk_state		=	TCP_CLOSE;
1190 	sk->sk_socket		=	sock;
1191 
1192 	sock_set_flag(sk, SOCK_ZAPPED);
1193 
1194 	if(sock)
1195 	{
1196 		sk->sk_type	=	sock->type;
1197 		sk->sk_sleep	=	&sock->wait;
1198 		sock->sk	=	sk;
1199 	} else
1200 		sk->sk_sleep	=	NULL;
1201 
1202 	rwlock_init(&sk->sk_dst_lock);
1203 	rwlock_init(&sk->sk_callback_lock);
1204 
1205 	sk->sk_state_change	=	sock_def_wakeup;
1206 	sk->sk_data_ready	=	sock_def_readable;
1207 	sk->sk_write_space	=	sock_def_write_space;
1208 	sk->sk_error_report	=	sock_def_error_report;
1209 	sk->sk_destruct		=	sock_def_destruct;
1210 
1211 	sk->sk_sndmsg_page	=	NULL;
1212 	sk->sk_sndmsg_off	=	0;
1213 
1214 	sk->sk_peercred.pid 	=	0;
1215 	sk->sk_peercred.uid	=	-1;
1216 	sk->sk_peercred.gid	=	-1;
1217 	sk->sk_write_pending	=	0;
1218 	sk->sk_rcvlowat		=	1;
1219 	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
1220 	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
1221 
1222 	sk->sk_stamp.tv_sec     = -1L;
1223 	sk->sk_stamp.tv_usec    = -1L;
1224 
1225 	atomic_set(&sk->sk_refcnt, 1);
1226 }
1227 
1228 void fastcall lock_sock(struct sock *sk)
1229 {
1230 	might_sleep();
1231 	spin_lock_bh(&(sk->sk_lock.slock));
1232 	if (sk->sk_lock.owner)
1233 		__lock_sock(sk);
1234 	sk->sk_lock.owner = (void *)1;
1235 	spin_unlock_bh(&(sk->sk_lock.slock));
1236 }
1237 
1238 EXPORT_SYMBOL(lock_sock);
1239 
1240 void fastcall release_sock(struct sock *sk)
1241 {
1242 	spin_lock_bh(&(sk->sk_lock.slock));
1243 	if (sk->sk_backlog.tail)
1244 		__release_sock(sk);
1245 	sk->sk_lock.owner = NULL;
1246         if (waitqueue_active(&(sk->sk_lock.wq)))
1247 		wake_up(&(sk->sk_lock.wq));
1248 	spin_unlock_bh(&(sk->sk_lock.slock));
1249 }
1250 EXPORT_SYMBOL(release_sock);
1251 
1252 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1253 {
1254 	if (!sock_flag(sk, SOCK_TIMESTAMP))
1255 		sock_enable_timestamp(sk);
1256 	if (sk->sk_stamp.tv_sec == -1)
1257 		return -ENOENT;
1258 	if (sk->sk_stamp.tv_sec == 0)
1259 		do_gettimeofday(&sk->sk_stamp);
1260 	return copy_to_user(userstamp, &sk->sk_stamp, sizeof(struct timeval)) ?
1261 		-EFAULT : 0;
1262 }
1263 EXPORT_SYMBOL(sock_get_timestamp);
1264 
1265 void sock_enable_timestamp(struct sock *sk)
1266 {
1267 	if (!sock_flag(sk, SOCK_TIMESTAMP)) {
1268 		sock_set_flag(sk, SOCK_TIMESTAMP);
1269 		net_enable_timestamp();
1270 	}
1271 }
1272 EXPORT_SYMBOL(sock_enable_timestamp);
1273 
1274 /*
1275  *	Get a socket option on an socket.
1276  *
1277  *	FIX: POSIX 1003.1g is very ambiguous here. It states that
1278  *	asynchronous errors should be reported by getsockopt. We assume
1279  *	this means if you specify SO_ERROR (otherwise whats the point of it).
1280  */
1281 int sock_common_getsockopt(struct socket *sock, int level, int optname,
1282 			   char __user *optval, int __user *optlen)
1283 {
1284 	struct sock *sk = sock->sk;
1285 
1286 	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1287 }
1288 
1289 EXPORT_SYMBOL(sock_common_getsockopt);
1290 
1291 int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
1292 			struct msghdr *msg, size_t size, int flags)
1293 {
1294 	struct sock *sk = sock->sk;
1295 	int addr_len = 0;
1296 	int err;
1297 
1298 	err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
1299 				   flags & ~MSG_DONTWAIT, &addr_len);
1300 	if (err >= 0)
1301 		msg->msg_namelen = addr_len;
1302 	return err;
1303 }
1304 
1305 EXPORT_SYMBOL(sock_common_recvmsg);
1306 
1307 /*
1308  *	Set socket options on an inet socket.
1309  */
1310 int sock_common_setsockopt(struct socket *sock, int level, int optname,
1311 			   char __user *optval, int optlen)
1312 {
1313 	struct sock *sk = sock->sk;
1314 
1315 	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1316 }
1317 
1318 EXPORT_SYMBOL(sock_common_setsockopt);
1319 
1320 void sk_common_release(struct sock *sk)
1321 {
1322 	if (sk->sk_prot->destroy)
1323 		sk->sk_prot->destroy(sk);
1324 
1325 	/*
1326 	 * Observation: when sock_common_release is called, processes have
1327 	 * no access to socket. But net still has.
1328 	 * Step one, detach it from networking:
1329 	 *
1330 	 * A. Remove from hash tables.
1331 	 */
1332 
1333 	sk->sk_prot->unhash(sk);
1334 
1335 	/*
1336 	 * In this point socket cannot receive new packets, but it is possible
1337 	 * that some packets are in flight because some CPU runs receiver and
1338 	 * did hash table lookup before we unhashed socket. They will achieve
1339 	 * receive queue and will be purged by socket destructor.
1340 	 *
1341 	 * Also we still have packets pending on receive queue and probably,
1342 	 * our own packets waiting in device queues. sock_destroy will drain
1343 	 * receive queue, but transmitted packets will delay socket destruction
1344 	 * until the last reference will be released.
1345 	 */
1346 
1347 	sock_orphan(sk);
1348 
1349 	xfrm_sk_free_policy(sk);
1350 
1351 #ifdef INET_REFCNT_DEBUG
1352 	if (atomic_read(&sk->sk_refcnt) != 1)
1353 		printk(KERN_DEBUG "Destruction of the socket %p delayed, c=%d\n",
1354 		       sk, atomic_read(&sk->sk_refcnt));
1355 #endif
1356 	sock_put(sk);
1357 }
1358 
1359 EXPORT_SYMBOL(sk_common_release);
1360 
1361 static DEFINE_RWLOCK(proto_list_lock);
1362 static LIST_HEAD(proto_list);
1363 
1364 int proto_register(struct proto *prot, int alloc_slab)
1365 {
1366 	int rc = -ENOBUFS;
1367 
1368 	if (alloc_slab) {
1369 		prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
1370 					       SLAB_HWCACHE_ALIGN, NULL, NULL);
1371 
1372 		if (prot->slab == NULL) {
1373 			printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
1374 			       prot->name);
1375 			goto out;
1376 		}
1377 	}
1378 
1379 	write_lock(&proto_list_lock);
1380 	list_add(&prot->node, &proto_list);
1381 	write_unlock(&proto_list_lock);
1382 	rc = 0;
1383 out:
1384 	return rc;
1385 }
1386 
1387 EXPORT_SYMBOL(proto_register);
1388 
1389 void proto_unregister(struct proto *prot)
1390 {
1391 	write_lock(&proto_list_lock);
1392 
1393 	if (prot->slab != NULL) {
1394 		kmem_cache_destroy(prot->slab);
1395 		prot->slab = NULL;
1396 	}
1397 
1398 	list_del(&prot->node);
1399 	write_unlock(&proto_list_lock);
1400 }
1401 
1402 EXPORT_SYMBOL(proto_unregister);
1403 
1404 #ifdef CONFIG_PROC_FS
1405 static inline struct proto *__proto_head(void)
1406 {
1407 	return list_entry(proto_list.next, struct proto, node);
1408 }
1409 
1410 static inline struct proto *proto_head(void)
1411 {
1412 	return list_empty(&proto_list) ? NULL : __proto_head();
1413 }
1414 
1415 static inline struct proto *proto_next(struct proto *proto)
1416 {
1417 	return proto->node.next == &proto_list ? NULL :
1418 		list_entry(proto->node.next, struct proto, node);
1419 }
1420 
1421 static inline struct proto *proto_get_idx(loff_t pos)
1422 {
1423 	struct proto *proto;
1424 	loff_t i = 0;
1425 
1426 	list_for_each_entry(proto, &proto_list, node)
1427 		if (i++ == pos)
1428 			goto out;
1429 
1430 	proto = NULL;
1431 out:
1432 	return proto;
1433 }
1434 
1435 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
1436 {
1437 	read_lock(&proto_list_lock);
1438 	return *pos ? proto_get_idx(*pos - 1) : SEQ_START_TOKEN;
1439 }
1440 
1441 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1442 {
1443 	++*pos;
1444 	return v == SEQ_START_TOKEN ? proto_head() : proto_next(v);
1445 }
1446 
1447 static void proto_seq_stop(struct seq_file *seq, void *v)
1448 {
1449 	read_unlock(&proto_list_lock);
1450 }
1451 
1452 static char proto_method_implemented(const void *method)
1453 {
1454 	return method == NULL ? 'n' : 'y';
1455 }
1456 
1457 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
1458 {
1459 	seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
1460 			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
1461 		   proto->name,
1462 		   proto->obj_size,
1463 		   proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
1464 		   proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
1465 		   proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
1466 		   proto->max_header,
1467 		   proto->slab == NULL ? "no" : "yes",
1468 		   module_name(proto->owner),
1469 		   proto_method_implemented(proto->close),
1470 		   proto_method_implemented(proto->connect),
1471 		   proto_method_implemented(proto->disconnect),
1472 		   proto_method_implemented(proto->accept),
1473 		   proto_method_implemented(proto->ioctl),
1474 		   proto_method_implemented(proto->init),
1475 		   proto_method_implemented(proto->destroy),
1476 		   proto_method_implemented(proto->shutdown),
1477 		   proto_method_implemented(proto->setsockopt),
1478 		   proto_method_implemented(proto->getsockopt),
1479 		   proto_method_implemented(proto->sendmsg),
1480 		   proto_method_implemented(proto->recvmsg),
1481 		   proto_method_implemented(proto->sendpage),
1482 		   proto_method_implemented(proto->bind),
1483 		   proto_method_implemented(proto->backlog_rcv),
1484 		   proto_method_implemented(proto->hash),
1485 		   proto_method_implemented(proto->unhash),
1486 		   proto_method_implemented(proto->get_port),
1487 		   proto_method_implemented(proto->enter_memory_pressure));
1488 }
1489 
1490 static int proto_seq_show(struct seq_file *seq, void *v)
1491 {
1492 	if (v == SEQ_START_TOKEN)
1493 		seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
1494 			   "protocol",
1495 			   "size",
1496 			   "sockets",
1497 			   "memory",
1498 			   "press",
1499 			   "maxhdr",
1500 			   "slab",
1501 			   "module",
1502 			   "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
1503 	else
1504 		proto_seq_printf(seq, v);
1505 	return 0;
1506 }
1507 
1508 static struct seq_operations proto_seq_ops = {
1509 	.start  = proto_seq_start,
1510 	.next   = proto_seq_next,
1511 	.stop   = proto_seq_stop,
1512 	.show   = proto_seq_show,
1513 };
1514 
1515 static int proto_seq_open(struct inode *inode, struct file *file)
1516 {
1517 	return seq_open(file, &proto_seq_ops);
1518 }
1519 
1520 static struct file_operations proto_seq_fops = {
1521 	.owner		= THIS_MODULE,
1522 	.open		= proto_seq_open,
1523 	.read		= seq_read,
1524 	.llseek		= seq_lseek,
1525 	.release	= seq_release,
1526 };
1527 
1528 static int __init proto_init(void)
1529 {
1530 	/* register /proc/net/protocols */
1531 	return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
1532 }
1533 
1534 subsys_initcall(proto_init);
1535 
1536 #endif /* PROC_FS */
1537 
1538 EXPORT_SYMBOL(sk_alloc);
1539 EXPORT_SYMBOL(sk_free);
1540 EXPORT_SYMBOL(sk_send_sigurg);
1541 EXPORT_SYMBOL(sock_alloc_send_skb);
1542 EXPORT_SYMBOL(sock_init_data);
1543 EXPORT_SYMBOL(sock_kfree_s);
1544 EXPORT_SYMBOL(sock_kmalloc);
1545 EXPORT_SYMBOL(sock_no_accept);
1546 EXPORT_SYMBOL(sock_no_bind);
1547 EXPORT_SYMBOL(sock_no_connect);
1548 EXPORT_SYMBOL(sock_no_getname);
1549 EXPORT_SYMBOL(sock_no_getsockopt);
1550 EXPORT_SYMBOL(sock_no_ioctl);
1551 EXPORT_SYMBOL(sock_no_listen);
1552 EXPORT_SYMBOL(sock_no_mmap);
1553 EXPORT_SYMBOL(sock_no_poll);
1554 EXPORT_SYMBOL(sock_no_recvmsg);
1555 EXPORT_SYMBOL(sock_no_sendmsg);
1556 EXPORT_SYMBOL(sock_no_sendpage);
1557 EXPORT_SYMBOL(sock_no_setsockopt);
1558 EXPORT_SYMBOL(sock_no_shutdown);
1559 EXPORT_SYMBOL(sock_no_socketpair);
1560 EXPORT_SYMBOL(sock_rfree);
1561 EXPORT_SYMBOL(sock_setsockopt);
1562 EXPORT_SYMBOL(sock_wfree);
1563 EXPORT_SYMBOL(sock_wmalloc);
1564 EXPORT_SYMBOL(sock_i_uid);
1565 EXPORT_SYMBOL(sock_i_ino);
1566 #ifdef CONFIG_SYSCTL
1567 EXPORT_SYMBOL(sysctl_optmem_max);
1568 EXPORT_SYMBOL(sysctl_rmem_max);
1569 EXPORT_SYMBOL(sysctl_wmem_max);
1570 #endif
1571