xref: /linux/net/core/sock.c (revision 60b2737de1b1ddfdb90f3ba622634eb49d6f3603)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Generic socket support routines. Memory allocators, socket lock/release
7  *		handler for protocols to use and generic option handler.
8  *
9  *
10  * Version:	$Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
11  *
12  * Authors:	Ross Biro
13  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *		Florian La Roche, <flla@stud.uni-sb.de>
15  *		Alan Cox, <A.Cox@swansea.ac.uk>
16  *
17  * Fixes:
18  *		Alan Cox	: 	Numerous verify_area() problems
19  *		Alan Cox	:	Connecting on a connecting socket
20  *					now returns an error for tcp.
21  *		Alan Cox	:	sock->protocol is set correctly.
22  *					and is not sometimes left as 0.
23  *		Alan Cox	:	connect handles icmp errors on a
24  *					connect properly. Unfortunately there
25  *					is a restart syscall nasty there. I
26  *					can't match BSD without hacking the C
27  *					library. Ideas urgently sought!
28  *		Alan Cox	:	Disallow bind() to addresses that are
29  *					not ours - especially broadcast ones!!
30  *		Alan Cox	:	Socket 1024 _IS_ ok for users. (fencepost)
31  *		Alan Cox	:	sock_wfree/sock_rfree don't destroy sockets,
32  *					instead they leave that for the DESTROY timer.
33  *		Alan Cox	:	Clean up error flag in accept
34  *		Alan Cox	:	TCP ack handling is buggy, the DESTROY timer
35  *					was buggy. Put a remove_sock() in the handler
36  *					for memory when we hit 0. Also altered the timer
37  *					code. The ACK stuff can wait and needs major
38  *					TCP layer surgery.
39  *		Alan Cox	:	Fixed TCP ack bug, removed remove sock
40  *					and fixed timer/inet_bh race.
41  *		Alan Cox	:	Added zapped flag for TCP
42  *		Alan Cox	:	Move kfree_skb into skbuff.c and tidied up surplus code
43  *		Alan Cox	:	for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44  *		Alan Cox	:	kfree_s calls now are kfree_skbmem so we can track skb resources
45  *		Alan Cox	:	Supports socket option broadcast now as does udp. Packet and raw need fixing.
46  *		Alan Cox	:	Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47  *		Rick Sladkey	:	Relaxed UDP rules for matching packets.
48  *		C.E.Hawkins	:	IFF_PROMISC/SIOCGHWADDR support
49  *	Pauline Middelink	:	identd support
50  *		Alan Cox	:	Fixed connect() taking signals I think.
51  *		Alan Cox	:	SO_LINGER supported
52  *		Alan Cox	:	Error reporting fixes
53  *		Anonymous	:	inet_create tidied up (sk->reuse setting)
54  *		Alan Cox	:	inet sockets don't set sk->type!
55  *		Alan Cox	:	Split socket option code
56  *		Alan Cox	:	Callbacks
57  *		Alan Cox	:	Nagle flag for Charles & Johannes stuff
58  *		Alex		:	Removed restriction on inet fioctl
59  *		Alan Cox	:	Splitting INET from NET core
60  *		Alan Cox	:	Fixed bogus SO_TYPE handling in getsockopt()
61  *		Adam Caldwell	:	Missing return in SO_DONTROUTE/SO_DEBUG code
62  *		Alan Cox	:	Split IP from generic code
63  *		Alan Cox	:	New kfree_skbmem()
64  *		Alan Cox	:	Make SO_DEBUG superuser only.
65  *		Alan Cox	:	Allow anyone to clear SO_DEBUG
66  *					(compatibility fix)
67  *		Alan Cox	:	Added optimistic memory grabbing for AF_UNIX throughput.
68  *		Alan Cox	:	Allocator for a socket is settable.
69  *		Alan Cox	:	SO_ERROR includes soft errors.
70  *		Alan Cox	:	Allow NULL arguments on some SO_ opts
71  *		Alan Cox	: 	Generic socket allocation to make hooks
72  *					easier (suggested by Craig Metz).
73  *		Michael Pall	:	SO_ERROR returns positive errno again
74  *              Steve Whitehouse:       Added default destructor to free
75  *                                      protocol private data.
76  *              Steve Whitehouse:       Added various other default routines
77  *                                      common to several socket families.
78  *              Chris Evans     :       Call suser() check last on F_SETOWN
79  *		Jay Schulist	:	Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
80  *		Andi Kleen	:	Add sock_kmalloc()/sock_kfree_s()
81  *		Andi Kleen	:	Fix write_space callback
82  *		Chris Evans	:	Security fixes - signedness again
83  *		Arnaldo C. Melo :       cleanups, use skb_queue_purge
84  *
85  * To Fix:
86  *
87  *
88  *		This program is free software; you can redistribute it and/or
89  *		modify it under the terms of the GNU General Public License
90  *		as published by the Free Software Foundation; either version
91  *		2 of the License, or (at your option) any later version.
92  */
93 
94 #include <linux/config.h>
95 #include <linux/errno.h>
96 #include <linux/types.h>
97 #include <linux/socket.h>
98 #include <linux/in.h>
99 #include <linux/kernel.h>
100 #include <linux/module.h>
101 #include <linux/proc_fs.h>
102 #include <linux/seq_file.h>
103 #include <linux/sched.h>
104 #include <linux/timer.h>
105 #include <linux/string.h>
106 #include <linux/sockios.h>
107 #include <linux/net.h>
108 #include <linux/mm.h>
109 #include <linux/slab.h>
110 #include <linux/interrupt.h>
111 #include <linux/poll.h>
112 #include <linux/tcp.h>
113 #include <linux/init.h>
114 
115 #include <asm/uaccess.h>
116 #include <asm/system.h>
117 
118 #include <linux/netdevice.h>
119 #include <net/protocol.h>
120 #include <linux/skbuff.h>
121 #include <net/request_sock.h>
122 #include <net/sock.h>
123 #include <net/xfrm.h>
124 #include <linux/ipsec.h>
125 
126 #include <linux/filter.h>
127 
128 #ifdef CONFIG_INET
129 #include <net/tcp.h>
130 #endif
131 
132 /* Take into consideration the size of the struct sk_buff overhead in the
133  * determination of these values, since that is non-constant across
134  * platforms.  This makes socket queueing behavior and performance
135  * not depend upon such differences.
136  */
137 #define _SK_MEM_PACKETS		256
138 #define _SK_MEM_OVERHEAD	(sizeof(struct sk_buff) + 256)
139 #define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
140 #define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
141 
142 /* Run time adjustable parameters. */
143 __u32 sysctl_wmem_max = SK_WMEM_MAX;
144 __u32 sysctl_rmem_max = SK_RMEM_MAX;
145 __u32 sysctl_wmem_default = SK_WMEM_MAX;
146 __u32 sysctl_rmem_default = SK_RMEM_MAX;
147 
148 /* Maximal space eaten by iovec or ancilliary data plus some space */
149 int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512);
150 
151 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
152 {
153 	struct timeval tv;
154 
155 	if (optlen < sizeof(tv))
156 		return -EINVAL;
157 	if (copy_from_user(&tv, optval, sizeof(tv)))
158 		return -EFAULT;
159 
160 	*timeo_p = MAX_SCHEDULE_TIMEOUT;
161 	if (tv.tv_sec == 0 && tv.tv_usec == 0)
162 		return 0;
163 	if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
164 		*timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
165 	return 0;
166 }
167 
168 static void sock_warn_obsolete_bsdism(const char *name)
169 {
170 	static int warned;
171 	static char warncomm[TASK_COMM_LEN];
172 	if (strcmp(warncomm, current->comm) && warned < 5) {
173 		strcpy(warncomm,  current->comm);
174 		printk(KERN_WARNING "process `%s' is using obsolete "
175 		       "%s SO_BSDCOMPAT\n", warncomm, name);
176 		warned++;
177 	}
178 }
179 
180 static void sock_disable_timestamp(struct sock *sk)
181 {
182 	if (sock_flag(sk, SOCK_TIMESTAMP)) {
183 		sock_reset_flag(sk, SOCK_TIMESTAMP);
184 		net_disable_timestamp();
185 	}
186 }
187 
188 
189 /*
190  *	This is meant for all protocols to use and covers goings on
191  *	at the socket level. Everything here is generic.
192  */
193 
194 int sock_setsockopt(struct socket *sock, int level, int optname,
195 		    char __user *optval, int optlen)
196 {
197 	struct sock *sk=sock->sk;
198 	struct sk_filter *filter;
199 	int val;
200 	int valbool;
201 	struct linger ling;
202 	int ret = 0;
203 
204 	/*
205 	 *	Options without arguments
206 	 */
207 
208 #ifdef SO_DONTLINGER		/* Compatibility item... */
209 	switch (optname) {
210 		case SO_DONTLINGER:
211 			sock_reset_flag(sk, SOCK_LINGER);
212 			return 0;
213 	}
214 #endif
215 
216   	if(optlen<sizeof(int))
217   		return(-EINVAL);
218 
219 	if (get_user(val, (int __user *)optval))
220 		return -EFAULT;
221 
222   	valbool = val?1:0;
223 
224 	lock_sock(sk);
225 
226   	switch(optname)
227   	{
228 		case SO_DEBUG:
229 			if(val && !capable(CAP_NET_ADMIN))
230 			{
231 				ret = -EACCES;
232 			}
233 			else if (valbool)
234 				sock_set_flag(sk, SOCK_DBG);
235 			else
236 				sock_reset_flag(sk, SOCK_DBG);
237 			break;
238 		case SO_REUSEADDR:
239 			sk->sk_reuse = valbool;
240 			break;
241 		case SO_TYPE:
242 		case SO_ERROR:
243 			ret = -ENOPROTOOPT;
244 		  	break;
245 		case SO_DONTROUTE:
246 			if (valbool)
247 				sock_set_flag(sk, SOCK_LOCALROUTE);
248 			else
249 				sock_reset_flag(sk, SOCK_LOCALROUTE);
250 			break;
251 		case SO_BROADCAST:
252 			sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
253 			break;
254 		case SO_SNDBUF:
255 			/* Don't error on this BSD doesn't and if you think
256 			   about it this is right. Otherwise apps have to
257 			   play 'guess the biggest size' games. RCVBUF/SNDBUF
258 			   are treated in BSD as hints */
259 
260 			if (val > sysctl_wmem_max)
261 				val = sysctl_wmem_max;
262 
263 			sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
264 			if ((val * 2) < SOCK_MIN_SNDBUF)
265 				sk->sk_sndbuf = SOCK_MIN_SNDBUF;
266 			else
267 				sk->sk_sndbuf = val * 2;
268 
269 			/*
270 			 *	Wake up sending tasks if we
271 			 *	upped the value.
272 			 */
273 			sk->sk_write_space(sk);
274 			break;
275 
276 		case SO_RCVBUF:
277 			/* Don't error on this BSD doesn't and if you think
278 			   about it this is right. Otherwise apps have to
279 			   play 'guess the biggest size' games. RCVBUF/SNDBUF
280 			   are treated in BSD as hints */
281 
282 			if (val > sysctl_rmem_max)
283 				val = sysctl_rmem_max;
284 
285 			sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
286 			/* FIXME: is this lower bound the right one? */
287 			if ((val * 2) < SOCK_MIN_RCVBUF)
288 				sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
289 			else
290 				sk->sk_rcvbuf = val * 2;
291 			break;
292 
293 		case SO_KEEPALIVE:
294 #ifdef CONFIG_INET
295 			if (sk->sk_protocol == IPPROTO_TCP)
296 				tcp_set_keepalive(sk, valbool);
297 #endif
298 			sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
299 			break;
300 
301 	 	case SO_OOBINLINE:
302 			sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
303 			break;
304 
305 	 	case SO_NO_CHECK:
306 			sk->sk_no_check = valbool;
307 			break;
308 
309 		case SO_PRIORITY:
310 			if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
311 				sk->sk_priority = val;
312 			else
313 				ret = -EPERM;
314 			break;
315 
316 		case SO_LINGER:
317 			if(optlen<sizeof(ling)) {
318 				ret = -EINVAL;	/* 1003.1g */
319 				break;
320 			}
321 			if (copy_from_user(&ling,optval,sizeof(ling))) {
322 				ret = -EFAULT;
323 				break;
324 			}
325 			if (!ling.l_onoff)
326 				sock_reset_flag(sk, SOCK_LINGER);
327 			else {
328 #if (BITS_PER_LONG == 32)
329 				if (ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
330 					sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
331 				else
332 #endif
333 					sk->sk_lingertime = ling.l_linger * HZ;
334 				sock_set_flag(sk, SOCK_LINGER);
335 			}
336 			break;
337 
338 		case SO_BSDCOMPAT:
339 			sock_warn_obsolete_bsdism("setsockopt");
340 			break;
341 
342 		case SO_PASSCRED:
343 			if (valbool)
344 				set_bit(SOCK_PASSCRED, &sock->flags);
345 			else
346 				clear_bit(SOCK_PASSCRED, &sock->flags);
347 			break;
348 
349 		case SO_TIMESTAMP:
350 			if (valbool)  {
351 				sock_set_flag(sk, SOCK_RCVTSTAMP);
352 				sock_enable_timestamp(sk);
353 			} else
354 				sock_reset_flag(sk, SOCK_RCVTSTAMP);
355 			break;
356 
357 		case SO_RCVLOWAT:
358 			if (val < 0)
359 				val = INT_MAX;
360 			sk->sk_rcvlowat = val ? : 1;
361 			break;
362 
363 		case SO_RCVTIMEO:
364 			ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
365 			break;
366 
367 		case SO_SNDTIMEO:
368 			ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
369 			break;
370 
371 #ifdef CONFIG_NETDEVICES
372 		case SO_BINDTODEVICE:
373 		{
374 			char devname[IFNAMSIZ];
375 
376 			/* Sorry... */
377 			if (!capable(CAP_NET_RAW)) {
378 				ret = -EPERM;
379 				break;
380 			}
381 
382 			/* Bind this socket to a particular device like "eth0",
383 			 * as specified in the passed interface name. If the
384 			 * name is "" or the option length is zero the socket
385 			 * is not bound.
386 			 */
387 
388 			if (!valbool) {
389 				sk->sk_bound_dev_if = 0;
390 			} else {
391 				if (optlen > IFNAMSIZ)
392 					optlen = IFNAMSIZ;
393 				if (copy_from_user(devname, optval, optlen)) {
394 					ret = -EFAULT;
395 					break;
396 				}
397 
398 				/* Remove any cached route for this socket. */
399 				sk_dst_reset(sk);
400 
401 				if (devname[0] == '\0') {
402 					sk->sk_bound_dev_if = 0;
403 				} else {
404 					struct net_device *dev = dev_get_by_name(devname);
405 					if (!dev) {
406 						ret = -ENODEV;
407 						break;
408 					}
409 					sk->sk_bound_dev_if = dev->ifindex;
410 					dev_put(dev);
411 				}
412 			}
413 			break;
414 		}
415 #endif
416 
417 
418 		case SO_ATTACH_FILTER:
419 			ret = -EINVAL;
420 			if (optlen == sizeof(struct sock_fprog)) {
421 				struct sock_fprog fprog;
422 
423 				ret = -EFAULT;
424 				if (copy_from_user(&fprog, optval, sizeof(fprog)))
425 					break;
426 
427 				ret = sk_attach_filter(&fprog, sk);
428 			}
429 			break;
430 
431 		case SO_DETACH_FILTER:
432 			spin_lock_bh(&sk->sk_lock.slock);
433 			filter = sk->sk_filter;
434                         if (filter) {
435 				sk->sk_filter = NULL;
436 				spin_unlock_bh(&sk->sk_lock.slock);
437 				sk_filter_release(sk, filter);
438 				break;
439 			}
440 			spin_unlock_bh(&sk->sk_lock.slock);
441 			ret = -ENONET;
442 			break;
443 
444 		/* We implement the SO_SNDLOWAT etc to
445 		   not be settable (1003.1g 5.3) */
446 		default:
447 		  	ret = -ENOPROTOOPT;
448 			break;
449   	}
450 	release_sock(sk);
451 	return ret;
452 }
453 
454 
455 int sock_getsockopt(struct socket *sock, int level, int optname,
456 		    char __user *optval, int __user *optlen)
457 {
458 	struct sock *sk = sock->sk;
459 
460 	union
461 	{
462   		int val;
463   		struct linger ling;
464 		struct timeval tm;
465 	} v;
466 
467 	unsigned int lv = sizeof(int);
468 	int len;
469 
470   	if(get_user(len,optlen))
471   		return -EFAULT;
472 	if(len < 0)
473 		return -EINVAL;
474 
475   	switch(optname)
476   	{
477 		case SO_DEBUG:
478 			v.val = sock_flag(sk, SOCK_DBG);
479 			break;
480 
481 		case SO_DONTROUTE:
482 			v.val = sock_flag(sk, SOCK_LOCALROUTE);
483 			break;
484 
485 		case SO_BROADCAST:
486 			v.val = !!sock_flag(sk, SOCK_BROADCAST);
487 			break;
488 
489 		case SO_SNDBUF:
490 			v.val = sk->sk_sndbuf;
491 			break;
492 
493 		case SO_RCVBUF:
494 			v.val = sk->sk_rcvbuf;
495 			break;
496 
497 		case SO_REUSEADDR:
498 			v.val = sk->sk_reuse;
499 			break;
500 
501 		case SO_KEEPALIVE:
502 			v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
503 			break;
504 
505 		case SO_TYPE:
506 			v.val = sk->sk_type;
507 			break;
508 
509 		case SO_ERROR:
510 			v.val = -sock_error(sk);
511 			if(v.val==0)
512 				v.val = xchg(&sk->sk_err_soft, 0);
513 			break;
514 
515 		case SO_OOBINLINE:
516 			v.val = !!sock_flag(sk, SOCK_URGINLINE);
517 			break;
518 
519 		case SO_NO_CHECK:
520 			v.val = sk->sk_no_check;
521 			break;
522 
523 		case SO_PRIORITY:
524 			v.val = sk->sk_priority;
525 			break;
526 
527 		case SO_LINGER:
528 			lv		= sizeof(v.ling);
529 			v.ling.l_onoff	= !!sock_flag(sk, SOCK_LINGER);
530  			v.ling.l_linger	= sk->sk_lingertime / HZ;
531 			break;
532 
533 		case SO_BSDCOMPAT:
534 			sock_warn_obsolete_bsdism("getsockopt");
535 			break;
536 
537 		case SO_TIMESTAMP:
538 			v.val = sock_flag(sk, SOCK_RCVTSTAMP);
539 			break;
540 
541 		case SO_RCVTIMEO:
542 			lv=sizeof(struct timeval);
543 			if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
544 				v.tm.tv_sec = 0;
545 				v.tm.tv_usec = 0;
546 			} else {
547 				v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
548 				v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
549 			}
550 			break;
551 
552 		case SO_SNDTIMEO:
553 			lv=sizeof(struct timeval);
554 			if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
555 				v.tm.tv_sec = 0;
556 				v.tm.tv_usec = 0;
557 			} else {
558 				v.tm.tv_sec = sk->sk_sndtimeo / HZ;
559 				v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
560 			}
561 			break;
562 
563 		case SO_RCVLOWAT:
564 			v.val = sk->sk_rcvlowat;
565 			break;
566 
567 		case SO_SNDLOWAT:
568 			v.val=1;
569 			break;
570 
571 		case SO_PASSCRED:
572 			v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
573 			break;
574 
575 		case SO_PEERCRED:
576 			if (len > sizeof(sk->sk_peercred))
577 				len = sizeof(sk->sk_peercred);
578 			if (copy_to_user(optval, &sk->sk_peercred, len))
579 				return -EFAULT;
580 			goto lenout;
581 
582 		case SO_PEERNAME:
583 		{
584 			char address[128];
585 
586 			if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
587 				return -ENOTCONN;
588 			if (lv < len)
589 				return -EINVAL;
590 			if (copy_to_user(optval, address, len))
591 				return -EFAULT;
592 			goto lenout;
593 		}
594 
595 		/* Dubious BSD thing... Probably nobody even uses it, but
596 		 * the UNIX standard wants it for whatever reason... -DaveM
597 		 */
598 		case SO_ACCEPTCONN:
599 			v.val = sk->sk_state == TCP_LISTEN;
600 			break;
601 
602 		case SO_PEERSEC:
603 			return security_socket_getpeersec(sock, optval, optlen, len);
604 
605 		default:
606 			return(-ENOPROTOOPT);
607 	}
608 	if (len > lv)
609 		len = lv;
610 	if (copy_to_user(optval, &v, len))
611 		return -EFAULT;
612 lenout:
613   	if (put_user(len, optlen))
614   		return -EFAULT;
615   	return 0;
616 }
617 
618 /**
619  *	sk_alloc - All socket objects are allocated here
620  *	@family: protocol family
621  *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
622  *	@prot: struct proto associated with this new sock instance
623  *	@zero_it: if we should zero the newly allocated sock
624  */
625 struct sock *sk_alloc(int family, int priority, struct proto *prot, int zero_it)
626 {
627 	struct sock *sk = NULL;
628 	kmem_cache_t *slab = prot->slab;
629 
630 	if (slab != NULL)
631 		sk = kmem_cache_alloc(slab, priority);
632 	else
633 		sk = kmalloc(prot->obj_size, priority);
634 
635 	if (sk) {
636 		if (zero_it) {
637 			memset(sk, 0, prot->obj_size);
638 			sk->sk_family = family;
639 			/*
640 			 * See comment in struct sock definition to understand
641 			 * why we need sk_prot_creator -acme
642 			 */
643 			sk->sk_prot = sk->sk_prot_creator = prot;
644 			sock_lock_init(sk);
645 		}
646 
647 		if (security_sk_alloc(sk, family, priority)) {
648 			if (slab != NULL)
649 				kmem_cache_free(slab, sk);
650 			else
651 				kfree(sk);
652 			sk = NULL;
653 		} else
654 			__module_get(prot->owner);
655 	}
656 	return sk;
657 }
658 
659 void sk_free(struct sock *sk)
660 {
661 	struct sk_filter *filter;
662 	struct module *owner = sk->sk_prot_creator->owner;
663 
664 	if (sk->sk_destruct)
665 		sk->sk_destruct(sk);
666 
667 	filter = sk->sk_filter;
668 	if (filter) {
669 		sk_filter_release(sk, filter);
670 		sk->sk_filter = NULL;
671 	}
672 
673 	sock_disable_timestamp(sk);
674 
675 	if (atomic_read(&sk->sk_omem_alloc))
676 		printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
677 		       __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
678 
679 	security_sk_free(sk);
680 	if (sk->sk_prot_creator->slab != NULL)
681 		kmem_cache_free(sk->sk_prot_creator->slab, sk);
682 	else
683 		kfree(sk);
684 	module_put(owner);
685 }
686 
687 void __init sk_init(void)
688 {
689 	if (num_physpages <= 4096) {
690 		sysctl_wmem_max = 32767;
691 		sysctl_rmem_max = 32767;
692 		sysctl_wmem_default = 32767;
693 		sysctl_rmem_default = 32767;
694 	} else if (num_physpages >= 131072) {
695 		sysctl_wmem_max = 131071;
696 		sysctl_rmem_max = 131071;
697 	}
698 }
699 
700 /*
701  *	Simple resource managers for sockets.
702  */
703 
704 
705 /*
706  * Write buffer destructor automatically called from kfree_skb.
707  */
708 void sock_wfree(struct sk_buff *skb)
709 {
710 	struct sock *sk = skb->sk;
711 
712 	/* In case it might be waiting for more memory. */
713 	atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
714 	if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
715 		sk->sk_write_space(sk);
716 	sock_put(sk);
717 }
718 
719 /*
720  * Read buffer destructor automatically called from kfree_skb.
721  */
722 void sock_rfree(struct sk_buff *skb)
723 {
724 	struct sock *sk = skb->sk;
725 
726 	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
727 }
728 
729 
730 int sock_i_uid(struct sock *sk)
731 {
732 	int uid;
733 
734 	read_lock(&sk->sk_callback_lock);
735 	uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
736 	read_unlock(&sk->sk_callback_lock);
737 	return uid;
738 }
739 
740 unsigned long sock_i_ino(struct sock *sk)
741 {
742 	unsigned long ino;
743 
744 	read_lock(&sk->sk_callback_lock);
745 	ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
746 	read_unlock(&sk->sk_callback_lock);
747 	return ino;
748 }
749 
750 /*
751  * Allocate a skb from the socket's send buffer.
752  */
753 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int priority)
754 {
755 	if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
756 		struct sk_buff * skb = alloc_skb(size, priority);
757 		if (skb) {
758 			skb_set_owner_w(skb, sk);
759 			return skb;
760 		}
761 	}
762 	return NULL;
763 }
764 
765 /*
766  * Allocate a skb from the socket's receive buffer.
767  */
768 struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority)
769 {
770 	if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
771 		struct sk_buff *skb = alloc_skb(size, priority);
772 		if (skb) {
773 			skb_set_owner_r(skb, sk);
774 			return skb;
775 		}
776 	}
777 	return NULL;
778 }
779 
780 /*
781  * Allocate a memory block from the socket's option memory buffer.
782  */
783 void *sock_kmalloc(struct sock *sk, int size, int priority)
784 {
785 	if ((unsigned)size <= sysctl_optmem_max &&
786 	    atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
787 		void *mem;
788 		/* First do the add, to avoid the race if kmalloc
789  		 * might sleep.
790 		 */
791 		atomic_add(size, &sk->sk_omem_alloc);
792 		mem = kmalloc(size, priority);
793 		if (mem)
794 			return mem;
795 		atomic_sub(size, &sk->sk_omem_alloc);
796 	}
797 	return NULL;
798 }
799 
800 /*
801  * Free an option memory block.
802  */
803 void sock_kfree_s(struct sock *sk, void *mem, int size)
804 {
805 	kfree(mem);
806 	atomic_sub(size, &sk->sk_omem_alloc);
807 }
808 
809 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
810    I think, these locks should be removed for datagram sockets.
811  */
812 static long sock_wait_for_wmem(struct sock * sk, long timeo)
813 {
814 	DEFINE_WAIT(wait);
815 
816 	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
817 	for (;;) {
818 		if (!timeo)
819 			break;
820 		if (signal_pending(current))
821 			break;
822 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
823 		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
824 		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
825 			break;
826 		if (sk->sk_shutdown & SEND_SHUTDOWN)
827 			break;
828 		if (sk->sk_err)
829 			break;
830 		timeo = schedule_timeout(timeo);
831 	}
832 	finish_wait(sk->sk_sleep, &wait);
833 	return timeo;
834 }
835 
836 
837 /*
838  *	Generic send/receive buffer handlers
839  */
840 
841 static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
842 					    unsigned long header_len,
843 					    unsigned long data_len,
844 					    int noblock, int *errcode)
845 {
846 	struct sk_buff *skb;
847 	unsigned int gfp_mask;
848 	long timeo;
849 	int err;
850 
851 	gfp_mask = sk->sk_allocation;
852 	if (gfp_mask & __GFP_WAIT)
853 		gfp_mask |= __GFP_REPEAT;
854 
855 	timeo = sock_sndtimeo(sk, noblock);
856 	while (1) {
857 		err = sock_error(sk);
858 		if (err != 0)
859 			goto failure;
860 
861 		err = -EPIPE;
862 		if (sk->sk_shutdown & SEND_SHUTDOWN)
863 			goto failure;
864 
865 		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
866 			skb = alloc_skb(header_len, sk->sk_allocation);
867 			if (skb) {
868 				int npages;
869 				int i;
870 
871 				/* No pages, we're done... */
872 				if (!data_len)
873 					break;
874 
875 				npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
876 				skb->truesize += data_len;
877 				skb_shinfo(skb)->nr_frags = npages;
878 				for (i = 0; i < npages; i++) {
879 					struct page *page;
880 					skb_frag_t *frag;
881 
882 					page = alloc_pages(sk->sk_allocation, 0);
883 					if (!page) {
884 						err = -ENOBUFS;
885 						skb_shinfo(skb)->nr_frags = i;
886 						kfree_skb(skb);
887 						goto failure;
888 					}
889 
890 					frag = &skb_shinfo(skb)->frags[i];
891 					frag->page = page;
892 					frag->page_offset = 0;
893 					frag->size = (data_len >= PAGE_SIZE ?
894 						      PAGE_SIZE :
895 						      data_len);
896 					data_len -= PAGE_SIZE;
897 				}
898 
899 				/* Full success... */
900 				break;
901 			}
902 			err = -ENOBUFS;
903 			goto failure;
904 		}
905 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
906 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
907 		err = -EAGAIN;
908 		if (!timeo)
909 			goto failure;
910 		if (signal_pending(current))
911 			goto interrupted;
912 		timeo = sock_wait_for_wmem(sk, timeo);
913 	}
914 
915 	skb_set_owner_w(skb, sk);
916 	return skb;
917 
918 interrupted:
919 	err = sock_intr_errno(timeo);
920 failure:
921 	*errcode = err;
922 	return NULL;
923 }
924 
925 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
926 				    int noblock, int *errcode)
927 {
928 	return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
929 }
930 
931 static void __lock_sock(struct sock *sk)
932 {
933 	DEFINE_WAIT(wait);
934 
935 	for(;;) {
936 		prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
937 					TASK_UNINTERRUPTIBLE);
938 		spin_unlock_bh(&sk->sk_lock.slock);
939 		schedule();
940 		spin_lock_bh(&sk->sk_lock.slock);
941 		if(!sock_owned_by_user(sk))
942 			break;
943 	}
944 	finish_wait(&sk->sk_lock.wq, &wait);
945 }
946 
947 static void __release_sock(struct sock *sk)
948 {
949 	struct sk_buff *skb = sk->sk_backlog.head;
950 
951 	do {
952 		sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
953 		bh_unlock_sock(sk);
954 
955 		do {
956 			struct sk_buff *next = skb->next;
957 
958 			skb->next = NULL;
959 			sk->sk_backlog_rcv(sk, skb);
960 
961 			/*
962 			 * We are in process context here with softirqs
963 			 * disabled, use cond_resched_softirq() to preempt.
964 			 * This is safe to do because we've taken the backlog
965 			 * queue private:
966 			 */
967 			cond_resched_softirq();
968 
969 			skb = next;
970 		} while (skb != NULL);
971 
972 		bh_lock_sock(sk);
973 	} while((skb = sk->sk_backlog.head) != NULL);
974 }
975 
976 /**
977  * sk_wait_data - wait for data to arrive at sk_receive_queue
978  * @sk:    sock to wait on
979  * @timeo: for how long
980  *
981  * Now socket state including sk->sk_err is changed only under lock,
982  * hence we may omit checks after joining wait queue.
983  * We check receive queue before schedule() only as optimization;
984  * it is very likely that release_sock() added new data.
985  */
986 int sk_wait_data(struct sock *sk, long *timeo)
987 {
988 	int rc;
989 	DEFINE_WAIT(wait);
990 
991 	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
992 	set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
993 	rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
994 	clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
995 	finish_wait(sk->sk_sleep, &wait);
996 	return rc;
997 }
998 
999 EXPORT_SYMBOL(sk_wait_data);
1000 
1001 /*
1002  * Set of default routines for initialising struct proto_ops when
1003  * the protocol does not support a particular function. In certain
1004  * cases where it makes no sense for a protocol to have a "do nothing"
1005  * function, some default processing is provided.
1006  */
1007 
1008 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1009 {
1010 	return -EOPNOTSUPP;
1011 }
1012 
1013 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1014 		    int len, int flags)
1015 {
1016 	return -EOPNOTSUPP;
1017 }
1018 
1019 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1020 {
1021 	return -EOPNOTSUPP;
1022 }
1023 
1024 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1025 {
1026 	return -EOPNOTSUPP;
1027 }
1028 
1029 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1030 		    int *len, int peer)
1031 {
1032 	return -EOPNOTSUPP;
1033 }
1034 
1035 unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
1036 {
1037 	return 0;
1038 }
1039 
1040 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1041 {
1042 	return -EOPNOTSUPP;
1043 }
1044 
1045 int sock_no_listen(struct socket *sock, int backlog)
1046 {
1047 	return -EOPNOTSUPP;
1048 }
1049 
1050 int sock_no_shutdown(struct socket *sock, int how)
1051 {
1052 	return -EOPNOTSUPP;
1053 }
1054 
1055 int sock_no_setsockopt(struct socket *sock, int level, int optname,
1056 		    char __user *optval, int optlen)
1057 {
1058 	return -EOPNOTSUPP;
1059 }
1060 
1061 int sock_no_getsockopt(struct socket *sock, int level, int optname,
1062 		    char __user *optval, int __user *optlen)
1063 {
1064 	return -EOPNOTSUPP;
1065 }
1066 
1067 int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1068 		    size_t len)
1069 {
1070 	return -EOPNOTSUPP;
1071 }
1072 
1073 int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1074 		    size_t len, int flags)
1075 {
1076 	return -EOPNOTSUPP;
1077 }
1078 
1079 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1080 {
1081 	/* Mirror missing mmap method error code */
1082 	return -ENODEV;
1083 }
1084 
1085 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1086 {
1087 	ssize_t res;
1088 	struct msghdr msg = {.msg_flags = flags};
1089 	struct kvec iov;
1090 	char *kaddr = kmap(page);
1091 	iov.iov_base = kaddr + offset;
1092 	iov.iov_len = size;
1093 	res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1094 	kunmap(page);
1095 	return res;
1096 }
1097 
1098 /*
1099  *	Default Socket Callbacks
1100  */
1101 
1102 static void sock_def_wakeup(struct sock *sk)
1103 {
1104 	read_lock(&sk->sk_callback_lock);
1105 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1106 		wake_up_interruptible_all(sk->sk_sleep);
1107 	read_unlock(&sk->sk_callback_lock);
1108 }
1109 
1110 static void sock_def_error_report(struct sock *sk)
1111 {
1112 	read_lock(&sk->sk_callback_lock);
1113 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1114 		wake_up_interruptible(sk->sk_sleep);
1115 	sk_wake_async(sk,0,POLL_ERR);
1116 	read_unlock(&sk->sk_callback_lock);
1117 }
1118 
1119 static void sock_def_readable(struct sock *sk, int len)
1120 {
1121 	read_lock(&sk->sk_callback_lock);
1122 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1123 		wake_up_interruptible(sk->sk_sleep);
1124 	sk_wake_async(sk,1,POLL_IN);
1125 	read_unlock(&sk->sk_callback_lock);
1126 }
1127 
1128 static void sock_def_write_space(struct sock *sk)
1129 {
1130 	read_lock(&sk->sk_callback_lock);
1131 
1132 	/* Do not wake up a writer until he can make "significant"
1133 	 * progress.  --DaveM
1134 	 */
1135 	if((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1136 		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1137 			wake_up_interruptible(sk->sk_sleep);
1138 
1139 		/* Should agree with poll, otherwise some programs break */
1140 		if (sock_writeable(sk))
1141 			sk_wake_async(sk, 2, POLL_OUT);
1142 	}
1143 
1144 	read_unlock(&sk->sk_callback_lock);
1145 }
1146 
1147 static void sock_def_destruct(struct sock *sk)
1148 {
1149 	if (sk->sk_protinfo)
1150 		kfree(sk->sk_protinfo);
1151 }
1152 
1153 void sk_send_sigurg(struct sock *sk)
1154 {
1155 	if (sk->sk_socket && sk->sk_socket->file)
1156 		if (send_sigurg(&sk->sk_socket->file->f_owner))
1157 			sk_wake_async(sk, 3, POLL_PRI);
1158 }
1159 
1160 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1161 		    unsigned long expires)
1162 {
1163 	if (!mod_timer(timer, expires))
1164 		sock_hold(sk);
1165 }
1166 
1167 EXPORT_SYMBOL(sk_reset_timer);
1168 
1169 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1170 {
1171 	if (timer_pending(timer) && del_timer(timer))
1172 		__sock_put(sk);
1173 }
1174 
1175 EXPORT_SYMBOL(sk_stop_timer);
1176 
1177 void sock_init_data(struct socket *sock, struct sock *sk)
1178 {
1179 	skb_queue_head_init(&sk->sk_receive_queue);
1180 	skb_queue_head_init(&sk->sk_write_queue);
1181 	skb_queue_head_init(&sk->sk_error_queue);
1182 
1183 	sk->sk_send_head	=	NULL;
1184 
1185 	init_timer(&sk->sk_timer);
1186 
1187 	sk->sk_allocation	=	GFP_KERNEL;
1188 	sk->sk_rcvbuf		=	sysctl_rmem_default;
1189 	sk->sk_sndbuf		=	sysctl_wmem_default;
1190 	sk->sk_state		=	TCP_CLOSE;
1191 	sk->sk_socket		=	sock;
1192 
1193 	sock_set_flag(sk, SOCK_ZAPPED);
1194 
1195 	if(sock)
1196 	{
1197 		sk->sk_type	=	sock->type;
1198 		sk->sk_sleep	=	&sock->wait;
1199 		sock->sk	=	sk;
1200 	} else
1201 		sk->sk_sleep	=	NULL;
1202 
1203 	rwlock_init(&sk->sk_dst_lock);
1204 	rwlock_init(&sk->sk_callback_lock);
1205 
1206 	sk->sk_state_change	=	sock_def_wakeup;
1207 	sk->sk_data_ready	=	sock_def_readable;
1208 	sk->sk_write_space	=	sock_def_write_space;
1209 	sk->sk_error_report	=	sock_def_error_report;
1210 	sk->sk_destruct		=	sock_def_destruct;
1211 
1212 	sk->sk_sndmsg_page	=	NULL;
1213 	sk->sk_sndmsg_off	=	0;
1214 
1215 	sk->sk_peercred.pid 	=	0;
1216 	sk->sk_peercred.uid	=	-1;
1217 	sk->sk_peercred.gid	=	-1;
1218 	sk->sk_write_pending	=	0;
1219 	sk->sk_rcvlowat		=	1;
1220 	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
1221 	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
1222 
1223 	sk->sk_stamp.tv_sec     = -1L;
1224 	sk->sk_stamp.tv_usec    = -1L;
1225 
1226 	atomic_set(&sk->sk_refcnt, 1);
1227 }
1228 
1229 void fastcall lock_sock(struct sock *sk)
1230 {
1231 	might_sleep();
1232 	spin_lock_bh(&(sk->sk_lock.slock));
1233 	if (sk->sk_lock.owner)
1234 		__lock_sock(sk);
1235 	sk->sk_lock.owner = (void *)1;
1236 	spin_unlock_bh(&(sk->sk_lock.slock));
1237 }
1238 
1239 EXPORT_SYMBOL(lock_sock);
1240 
1241 void fastcall release_sock(struct sock *sk)
1242 {
1243 	spin_lock_bh(&(sk->sk_lock.slock));
1244 	if (sk->sk_backlog.tail)
1245 		__release_sock(sk);
1246 	sk->sk_lock.owner = NULL;
1247         if (waitqueue_active(&(sk->sk_lock.wq)))
1248 		wake_up(&(sk->sk_lock.wq));
1249 	spin_unlock_bh(&(sk->sk_lock.slock));
1250 }
1251 EXPORT_SYMBOL(release_sock);
1252 
1253 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1254 {
1255 	if (!sock_flag(sk, SOCK_TIMESTAMP))
1256 		sock_enable_timestamp(sk);
1257 	if (sk->sk_stamp.tv_sec == -1)
1258 		return -ENOENT;
1259 	if (sk->sk_stamp.tv_sec == 0)
1260 		do_gettimeofday(&sk->sk_stamp);
1261 	return copy_to_user(userstamp, &sk->sk_stamp, sizeof(struct timeval)) ?
1262 		-EFAULT : 0;
1263 }
1264 EXPORT_SYMBOL(sock_get_timestamp);
1265 
1266 void sock_enable_timestamp(struct sock *sk)
1267 {
1268 	if (!sock_flag(sk, SOCK_TIMESTAMP)) {
1269 		sock_set_flag(sk, SOCK_TIMESTAMP);
1270 		net_enable_timestamp();
1271 	}
1272 }
1273 EXPORT_SYMBOL(sock_enable_timestamp);
1274 
1275 /*
1276  *	Get a socket option on an socket.
1277  *
1278  *	FIX: POSIX 1003.1g is very ambiguous here. It states that
1279  *	asynchronous errors should be reported by getsockopt. We assume
1280  *	this means if you specify SO_ERROR (otherwise whats the point of it).
1281  */
1282 int sock_common_getsockopt(struct socket *sock, int level, int optname,
1283 			   char __user *optval, int __user *optlen)
1284 {
1285 	struct sock *sk = sock->sk;
1286 
1287 	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1288 }
1289 
1290 EXPORT_SYMBOL(sock_common_getsockopt);
1291 
1292 int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
1293 			struct msghdr *msg, size_t size, int flags)
1294 {
1295 	struct sock *sk = sock->sk;
1296 	int addr_len = 0;
1297 	int err;
1298 
1299 	err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
1300 				   flags & ~MSG_DONTWAIT, &addr_len);
1301 	if (err >= 0)
1302 		msg->msg_namelen = addr_len;
1303 	return err;
1304 }
1305 
1306 EXPORT_SYMBOL(sock_common_recvmsg);
1307 
1308 /*
1309  *	Set socket options on an inet socket.
1310  */
1311 int sock_common_setsockopt(struct socket *sock, int level, int optname,
1312 			   char __user *optval, int optlen)
1313 {
1314 	struct sock *sk = sock->sk;
1315 
1316 	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1317 }
1318 
1319 EXPORT_SYMBOL(sock_common_setsockopt);
1320 
1321 void sk_common_release(struct sock *sk)
1322 {
1323 	if (sk->sk_prot->destroy)
1324 		sk->sk_prot->destroy(sk);
1325 
1326 	/*
1327 	 * Observation: when sock_common_release is called, processes have
1328 	 * no access to socket. But net still has.
1329 	 * Step one, detach it from networking:
1330 	 *
1331 	 * A. Remove from hash tables.
1332 	 */
1333 
1334 	sk->sk_prot->unhash(sk);
1335 
1336 	/*
1337 	 * In this point socket cannot receive new packets, but it is possible
1338 	 * that some packets are in flight because some CPU runs receiver and
1339 	 * did hash table lookup before we unhashed socket. They will achieve
1340 	 * receive queue and will be purged by socket destructor.
1341 	 *
1342 	 * Also we still have packets pending on receive queue and probably,
1343 	 * our own packets waiting in device queues. sock_destroy will drain
1344 	 * receive queue, but transmitted packets will delay socket destruction
1345 	 * until the last reference will be released.
1346 	 */
1347 
1348 	sock_orphan(sk);
1349 
1350 	xfrm_sk_free_policy(sk);
1351 
1352 #ifdef INET_REFCNT_DEBUG
1353 	if (atomic_read(&sk->sk_refcnt) != 1)
1354 		printk(KERN_DEBUG "Destruction of the socket %p delayed, c=%d\n",
1355 		       sk, atomic_read(&sk->sk_refcnt));
1356 #endif
1357 	sock_put(sk);
1358 }
1359 
1360 EXPORT_SYMBOL(sk_common_release);
1361 
1362 static DEFINE_RWLOCK(proto_list_lock);
1363 static LIST_HEAD(proto_list);
1364 
1365 int proto_register(struct proto *prot, int alloc_slab)
1366 {
1367 	char *request_sock_slab_name;
1368 	int rc = -ENOBUFS;
1369 
1370 	if (alloc_slab) {
1371 		prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
1372 					       SLAB_HWCACHE_ALIGN, NULL, NULL);
1373 
1374 		if (prot->slab == NULL) {
1375 			printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
1376 			       prot->name);
1377 			goto out;
1378 		}
1379 
1380 		if (prot->rsk_prot != NULL) {
1381 			static const char mask[] = "request_sock_%s";
1382 
1383 			request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1384 			if (request_sock_slab_name == NULL)
1385 				goto out_free_sock_slab;
1386 
1387 			sprintf(request_sock_slab_name, mask, prot->name);
1388 			prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
1389 								 prot->rsk_prot->obj_size, 0,
1390 								 SLAB_HWCACHE_ALIGN, NULL, NULL);
1391 
1392 			if (prot->rsk_prot->slab == NULL) {
1393 				printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
1394 				       prot->name);
1395 				goto out_free_request_sock_slab_name;
1396 			}
1397 		}
1398 	}
1399 
1400 	write_lock(&proto_list_lock);
1401 	list_add(&prot->node, &proto_list);
1402 	write_unlock(&proto_list_lock);
1403 	rc = 0;
1404 out:
1405 	return rc;
1406 out_free_request_sock_slab_name:
1407 	kfree(request_sock_slab_name);
1408 out_free_sock_slab:
1409 	kmem_cache_destroy(prot->slab);
1410 	prot->slab = NULL;
1411 	goto out;
1412 }
1413 
1414 EXPORT_SYMBOL(proto_register);
1415 
1416 void proto_unregister(struct proto *prot)
1417 {
1418 	write_lock(&proto_list_lock);
1419 
1420 	if (prot->slab != NULL) {
1421 		kmem_cache_destroy(prot->slab);
1422 		prot->slab = NULL;
1423 	}
1424 
1425 	if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
1426 		const char *name = kmem_cache_name(prot->rsk_prot->slab);
1427 
1428 		kmem_cache_destroy(prot->rsk_prot->slab);
1429 		kfree(name);
1430 		prot->rsk_prot->slab = NULL;
1431 	}
1432 
1433 	list_del(&prot->node);
1434 	write_unlock(&proto_list_lock);
1435 }
1436 
1437 EXPORT_SYMBOL(proto_unregister);
1438 
1439 #ifdef CONFIG_PROC_FS
1440 static inline struct proto *__proto_head(void)
1441 {
1442 	return list_entry(proto_list.next, struct proto, node);
1443 }
1444 
1445 static inline struct proto *proto_head(void)
1446 {
1447 	return list_empty(&proto_list) ? NULL : __proto_head();
1448 }
1449 
1450 static inline struct proto *proto_next(struct proto *proto)
1451 {
1452 	return proto->node.next == &proto_list ? NULL :
1453 		list_entry(proto->node.next, struct proto, node);
1454 }
1455 
1456 static inline struct proto *proto_get_idx(loff_t pos)
1457 {
1458 	struct proto *proto;
1459 	loff_t i = 0;
1460 
1461 	list_for_each_entry(proto, &proto_list, node)
1462 		if (i++ == pos)
1463 			goto out;
1464 
1465 	proto = NULL;
1466 out:
1467 	return proto;
1468 }
1469 
1470 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
1471 {
1472 	read_lock(&proto_list_lock);
1473 	return *pos ? proto_get_idx(*pos - 1) : SEQ_START_TOKEN;
1474 }
1475 
1476 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1477 {
1478 	++*pos;
1479 	return v == SEQ_START_TOKEN ? proto_head() : proto_next(v);
1480 }
1481 
1482 static void proto_seq_stop(struct seq_file *seq, void *v)
1483 {
1484 	read_unlock(&proto_list_lock);
1485 }
1486 
1487 static char proto_method_implemented(const void *method)
1488 {
1489 	return method == NULL ? 'n' : 'y';
1490 }
1491 
1492 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
1493 {
1494 	seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
1495 			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
1496 		   proto->name,
1497 		   proto->obj_size,
1498 		   proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
1499 		   proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
1500 		   proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
1501 		   proto->max_header,
1502 		   proto->slab == NULL ? "no" : "yes",
1503 		   module_name(proto->owner),
1504 		   proto_method_implemented(proto->close),
1505 		   proto_method_implemented(proto->connect),
1506 		   proto_method_implemented(proto->disconnect),
1507 		   proto_method_implemented(proto->accept),
1508 		   proto_method_implemented(proto->ioctl),
1509 		   proto_method_implemented(proto->init),
1510 		   proto_method_implemented(proto->destroy),
1511 		   proto_method_implemented(proto->shutdown),
1512 		   proto_method_implemented(proto->setsockopt),
1513 		   proto_method_implemented(proto->getsockopt),
1514 		   proto_method_implemented(proto->sendmsg),
1515 		   proto_method_implemented(proto->recvmsg),
1516 		   proto_method_implemented(proto->sendpage),
1517 		   proto_method_implemented(proto->bind),
1518 		   proto_method_implemented(proto->backlog_rcv),
1519 		   proto_method_implemented(proto->hash),
1520 		   proto_method_implemented(proto->unhash),
1521 		   proto_method_implemented(proto->get_port),
1522 		   proto_method_implemented(proto->enter_memory_pressure));
1523 }
1524 
1525 static int proto_seq_show(struct seq_file *seq, void *v)
1526 {
1527 	if (v == SEQ_START_TOKEN)
1528 		seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
1529 			   "protocol",
1530 			   "size",
1531 			   "sockets",
1532 			   "memory",
1533 			   "press",
1534 			   "maxhdr",
1535 			   "slab",
1536 			   "module",
1537 			   "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
1538 	else
1539 		proto_seq_printf(seq, v);
1540 	return 0;
1541 }
1542 
1543 static struct seq_operations proto_seq_ops = {
1544 	.start  = proto_seq_start,
1545 	.next   = proto_seq_next,
1546 	.stop   = proto_seq_stop,
1547 	.show   = proto_seq_show,
1548 };
1549 
1550 static int proto_seq_open(struct inode *inode, struct file *file)
1551 {
1552 	return seq_open(file, &proto_seq_ops);
1553 }
1554 
1555 static struct file_operations proto_seq_fops = {
1556 	.owner		= THIS_MODULE,
1557 	.open		= proto_seq_open,
1558 	.read		= seq_read,
1559 	.llseek		= seq_lseek,
1560 	.release	= seq_release,
1561 };
1562 
1563 static int __init proto_init(void)
1564 {
1565 	/* register /proc/net/protocols */
1566 	return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
1567 }
1568 
1569 subsys_initcall(proto_init);
1570 
1571 #endif /* PROC_FS */
1572 
1573 EXPORT_SYMBOL(sk_alloc);
1574 EXPORT_SYMBOL(sk_free);
1575 EXPORT_SYMBOL(sk_send_sigurg);
1576 EXPORT_SYMBOL(sock_alloc_send_skb);
1577 EXPORT_SYMBOL(sock_init_data);
1578 EXPORT_SYMBOL(sock_kfree_s);
1579 EXPORT_SYMBOL(sock_kmalloc);
1580 EXPORT_SYMBOL(sock_no_accept);
1581 EXPORT_SYMBOL(sock_no_bind);
1582 EXPORT_SYMBOL(sock_no_connect);
1583 EXPORT_SYMBOL(sock_no_getname);
1584 EXPORT_SYMBOL(sock_no_getsockopt);
1585 EXPORT_SYMBOL(sock_no_ioctl);
1586 EXPORT_SYMBOL(sock_no_listen);
1587 EXPORT_SYMBOL(sock_no_mmap);
1588 EXPORT_SYMBOL(sock_no_poll);
1589 EXPORT_SYMBOL(sock_no_recvmsg);
1590 EXPORT_SYMBOL(sock_no_sendmsg);
1591 EXPORT_SYMBOL(sock_no_sendpage);
1592 EXPORT_SYMBOL(sock_no_setsockopt);
1593 EXPORT_SYMBOL(sock_no_shutdown);
1594 EXPORT_SYMBOL(sock_no_socketpair);
1595 EXPORT_SYMBOL(sock_rfree);
1596 EXPORT_SYMBOL(sock_setsockopt);
1597 EXPORT_SYMBOL(sock_wfree);
1598 EXPORT_SYMBOL(sock_wmalloc);
1599 EXPORT_SYMBOL(sock_i_uid);
1600 EXPORT_SYMBOL(sock_i_ino);
1601 #ifdef CONFIG_SYSCTL
1602 EXPORT_SYMBOL(sysctl_optmem_max);
1603 EXPORT_SYMBOL(sysctl_rmem_max);
1604 EXPORT_SYMBOL(sysctl_wmem_max);
1605 #endif
1606