xref: /linux/net/core/sock.c (revision ccea15f45eb0ab12d658f88b5d4be005cb2bb1a7)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Generic socket support routines. Memory allocators, socket lock/release
7  *		handler for protocols to use and generic option handler.
8  *
9  *
10  * Version:	$Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
11  *
12  * Authors:	Ross Biro
13  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *		Florian La Roche, <flla@stud.uni-sb.de>
15  *		Alan Cox, <A.Cox@swansea.ac.uk>
16  *
17  * Fixes:
18  *		Alan Cox	: 	Numerous verify_area() problems
19  *		Alan Cox	:	Connecting on a connecting socket
20  *					now returns an error for tcp.
21  *		Alan Cox	:	sock->protocol is set correctly.
22  *					and is not sometimes left as 0.
23  *		Alan Cox	:	connect handles icmp errors on a
24  *					connect properly. Unfortunately there
25  *					is a restart syscall nasty there. I
26  *					can't match BSD without hacking the C
27  *					library. Ideas urgently sought!
28  *		Alan Cox	:	Disallow bind() to addresses that are
29  *					not ours - especially broadcast ones!!
30  *		Alan Cox	:	Socket 1024 _IS_ ok for users. (fencepost)
31  *		Alan Cox	:	sock_wfree/sock_rfree don't destroy sockets,
32  *					instead they leave that for the DESTROY timer.
33  *		Alan Cox	:	Clean up error flag in accept
34  *		Alan Cox	:	TCP ack handling is buggy, the DESTROY timer
35  *					was buggy. Put a remove_sock() in the handler
36  *					for memory when we hit 0. Also altered the timer
37  *					code. The ACK stuff can wait and needs major
38  *					TCP layer surgery.
39  *		Alan Cox	:	Fixed TCP ack bug, removed remove sock
40  *					and fixed timer/inet_bh race.
41  *		Alan Cox	:	Added zapped flag for TCP
42  *		Alan Cox	:	Move kfree_skb into skbuff.c and tidied up surplus code
43  *		Alan Cox	:	for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44  *		Alan Cox	:	kfree_s calls now are kfree_skbmem so we can track skb resources
45  *		Alan Cox	:	Supports socket option broadcast now as does udp. Packet and raw need fixing.
46  *		Alan Cox	:	Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47  *		Rick Sladkey	:	Relaxed UDP rules for matching packets.
48  *		C.E.Hawkins	:	IFF_PROMISC/SIOCGHWADDR support
49  *	Pauline Middelink	:	identd support
50  *		Alan Cox	:	Fixed connect() taking signals I think.
51  *		Alan Cox	:	SO_LINGER supported
52  *		Alan Cox	:	Error reporting fixes
53  *		Anonymous	:	inet_create tidied up (sk->reuse setting)
54  *		Alan Cox	:	inet sockets don't set sk->type!
55  *		Alan Cox	:	Split socket option code
56  *		Alan Cox	:	Callbacks
57  *		Alan Cox	:	Nagle flag for Charles & Johannes stuff
58  *		Alex		:	Removed restriction on inet fioctl
59  *		Alan Cox	:	Splitting INET from NET core
60  *		Alan Cox	:	Fixed bogus SO_TYPE handling in getsockopt()
61  *		Adam Caldwell	:	Missing return in SO_DONTROUTE/SO_DEBUG code
62  *		Alan Cox	:	Split IP from generic code
63  *		Alan Cox	:	New kfree_skbmem()
64  *		Alan Cox	:	Make SO_DEBUG superuser only.
65  *		Alan Cox	:	Allow anyone to clear SO_DEBUG
66  *					(compatibility fix)
67  *		Alan Cox	:	Added optimistic memory grabbing for AF_UNIX throughput.
68  *		Alan Cox	:	Allocator for a socket is settable.
69  *		Alan Cox	:	SO_ERROR includes soft errors.
70  *		Alan Cox	:	Allow NULL arguments on some SO_ opts
71  *		Alan Cox	: 	Generic socket allocation to make hooks
72  *					easier (suggested by Craig Metz).
73  *		Michael Pall	:	SO_ERROR returns positive errno again
74  *              Steve Whitehouse:       Added default destructor to free
75  *                                      protocol private data.
76  *              Steve Whitehouse:       Added various other default routines
77  *                                      common to several socket families.
78  *              Chris Evans     :       Call suser() check last on F_SETOWN
79  *		Jay Schulist	:	Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
80  *		Andi Kleen	:	Add sock_kmalloc()/sock_kfree_s()
81  *		Andi Kleen	:	Fix write_space callback
82  *		Chris Evans	:	Security fixes - signedness again
83  *		Arnaldo C. Melo :       cleanups, use skb_queue_purge
84  *
85  * To Fix:
86  *
87  *
88  *		This program is free software; you can redistribute it and/or
89  *		modify it under the terms of the GNU General Public License
90  *		as published by the Free Software Foundation; either version
91  *		2 of the License, or (at your option) any later version.
92  */
93 
94 #include <linux/capability.h>
95 #include <linux/config.h>
96 #include <linux/errno.h>
97 #include <linux/types.h>
98 #include <linux/socket.h>
99 #include <linux/in.h>
100 #include <linux/kernel.h>
101 #include <linux/module.h>
102 #include <linux/proc_fs.h>
103 #include <linux/seq_file.h>
104 #include <linux/sched.h>
105 #include <linux/timer.h>
106 #include <linux/string.h>
107 #include <linux/sockios.h>
108 #include <linux/net.h>
109 #include <linux/mm.h>
110 #include <linux/slab.h>
111 #include <linux/interrupt.h>
112 #include <linux/poll.h>
113 #include <linux/tcp.h>
114 #include <linux/init.h>
115 
116 #include <asm/uaccess.h>
117 #include <asm/system.h>
118 
119 #include <linux/netdevice.h>
120 #include <net/protocol.h>
121 #include <linux/skbuff.h>
122 #include <net/request_sock.h>
123 #include <net/sock.h>
124 #include <net/xfrm.h>
125 #include <linux/ipsec.h>
126 
127 #include <linux/filter.h>
128 
129 #ifdef CONFIG_INET
130 #include <net/tcp.h>
131 #endif
132 
133 /* Take into consideration the size of the struct sk_buff overhead in the
134  * determination of these values, since that is non-constant across
135  * platforms.  This makes socket queueing behavior and performance
136  * not depend upon such differences.
137  */
138 #define _SK_MEM_PACKETS		256
139 #define _SK_MEM_OVERHEAD	(sizeof(struct sk_buff) + 256)
140 #define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
141 #define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
142 
143 /* Run time adjustable parameters. */
144 __u32 sysctl_wmem_max = SK_WMEM_MAX;
145 __u32 sysctl_rmem_max = SK_RMEM_MAX;
146 __u32 sysctl_wmem_default = SK_WMEM_MAX;
147 __u32 sysctl_rmem_default = SK_RMEM_MAX;
148 
149 /* Maximal space eaten by iovec or ancilliary data plus some space */
150 int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512);
151 
152 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
153 {
154 	struct timeval tv;
155 
156 	if (optlen < sizeof(tv))
157 		return -EINVAL;
158 	if (copy_from_user(&tv, optval, sizeof(tv)))
159 		return -EFAULT;
160 
161 	*timeo_p = MAX_SCHEDULE_TIMEOUT;
162 	if (tv.tv_sec == 0 && tv.tv_usec == 0)
163 		return 0;
164 	if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
165 		*timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
166 	return 0;
167 }
168 
169 static void sock_warn_obsolete_bsdism(const char *name)
170 {
171 	static int warned;
172 	static char warncomm[TASK_COMM_LEN];
173 	if (strcmp(warncomm, current->comm) && warned < 5) {
174 		strcpy(warncomm,  current->comm);
175 		printk(KERN_WARNING "process `%s' is using obsolete "
176 		       "%s SO_BSDCOMPAT\n", warncomm, name);
177 		warned++;
178 	}
179 }
180 
181 static void sock_disable_timestamp(struct sock *sk)
182 {
183 	if (sock_flag(sk, SOCK_TIMESTAMP)) {
184 		sock_reset_flag(sk, SOCK_TIMESTAMP);
185 		net_disable_timestamp();
186 	}
187 }
188 
189 
190 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
191 {
192 	int err = 0;
193 	int skb_len;
194 
195 	/* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
196 	   number of warnings when compiling with -W --ANK
197 	 */
198 	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
199 	    (unsigned)sk->sk_rcvbuf) {
200 		err = -ENOMEM;
201 		goto out;
202 	}
203 
204 	/* It would be deadlock, if sock_queue_rcv_skb is used
205 	   with socket lock! We assume that users of this
206 	   function are lock free.
207 	*/
208 	err = sk_filter(sk, skb, 1);
209 	if (err)
210 		goto out;
211 
212 	skb->dev = NULL;
213 	skb_set_owner_r(skb, sk);
214 
215 	/* Cache the SKB length before we tack it onto the receive
216 	 * queue.  Once it is added it no longer belongs to us and
217 	 * may be freed by other threads of control pulling packets
218 	 * from the queue.
219 	 */
220 	skb_len = skb->len;
221 
222 	skb_queue_tail(&sk->sk_receive_queue, skb);
223 
224 	if (!sock_flag(sk, SOCK_DEAD))
225 		sk->sk_data_ready(sk, skb_len);
226 out:
227 	return err;
228 }
229 EXPORT_SYMBOL(sock_queue_rcv_skb);
230 
231 int sk_receive_skb(struct sock *sk, struct sk_buff *skb)
232 {
233 	int rc = NET_RX_SUCCESS;
234 
235 	if (sk_filter(sk, skb, 0))
236 		goto discard_and_relse;
237 
238 	skb->dev = NULL;
239 
240 	bh_lock_sock(sk);
241 	if (!sock_owned_by_user(sk))
242 		rc = sk->sk_backlog_rcv(sk, skb);
243 	else
244 		sk_add_backlog(sk, skb);
245 	bh_unlock_sock(sk);
246 out:
247 	sock_put(sk);
248 	return rc;
249 discard_and_relse:
250 	kfree_skb(skb);
251 	goto out;
252 }
253 EXPORT_SYMBOL(sk_receive_skb);
254 
255 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
256 {
257 	struct dst_entry *dst = sk->sk_dst_cache;
258 
259 	if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
260 		sk->sk_dst_cache = NULL;
261 		dst_release(dst);
262 		return NULL;
263 	}
264 
265 	return dst;
266 }
267 EXPORT_SYMBOL(__sk_dst_check);
268 
269 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
270 {
271 	struct dst_entry *dst = sk_dst_get(sk);
272 
273 	if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
274 		sk_dst_reset(sk);
275 		dst_release(dst);
276 		return NULL;
277 	}
278 
279 	return dst;
280 }
281 EXPORT_SYMBOL(sk_dst_check);
282 
283 /*
284  *	This is meant for all protocols to use and covers goings on
285  *	at the socket level. Everything here is generic.
286  */
287 
288 int sock_setsockopt(struct socket *sock, int level, int optname,
289 		    char __user *optval, int optlen)
290 {
291 	struct sock *sk=sock->sk;
292 	struct sk_filter *filter;
293 	int val;
294 	int valbool;
295 	struct linger ling;
296 	int ret = 0;
297 
298 	/*
299 	 *	Options without arguments
300 	 */
301 
302 #ifdef SO_DONTLINGER		/* Compatibility item... */
303 	if (optname == SO_DONTLINGER) {
304 		lock_sock(sk);
305 		sock_reset_flag(sk, SOCK_LINGER);
306 		release_sock(sk);
307 		return 0;
308 	}
309 #endif
310 
311   	if(optlen<sizeof(int))
312   		return(-EINVAL);
313 
314 	if (get_user(val, (int __user *)optval))
315 		return -EFAULT;
316 
317   	valbool = val?1:0;
318 
319 	lock_sock(sk);
320 
321   	switch(optname)
322   	{
323 		case SO_DEBUG:
324 			if(val && !capable(CAP_NET_ADMIN))
325 			{
326 				ret = -EACCES;
327 			}
328 			else if (valbool)
329 				sock_set_flag(sk, SOCK_DBG);
330 			else
331 				sock_reset_flag(sk, SOCK_DBG);
332 			break;
333 		case SO_REUSEADDR:
334 			sk->sk_reuse = valbool;
335 			break;
336 		case SO_TYPE:
337 		case SO_ERROR:
338 			ret = -ENOPROTOOPT;
339 		  	break;
340 		case SO_DONTROUTE:
341 			if (valbool)
342 				sock_set_flag(sk, SOCK_LOCALROUTE);
343 			else
344 				sock_reset_flag(sk, SOCK_LOCALROUTE);
345 			break;
346 		case SO_BROADCAST:
347 			sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
348 			break;
349 		case SO_SNDBUF:
350 			/* Don't error on this BSD doesn't and if you think
351 			   about it this is right. Otherwise apps have to
352 			   play 'guess the biggest size' games. RCVBUF/SNDBUF
353 			   are treated in BSD as hints */
354 
355 			if (val > sysctl_wmem_max)
356 				val = sysctl_wmem_max;
357 set_sndbuf:
358 			sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
359 			if ((val * 2) < SOCK_MIN_SNDBUF)
360 				sk->sk_sndbuf = SOCK_MIN_SNDBUF;
361 			else
362 				sk->sk_sndbuf = val * 2;
363 
364 			/*
365 			 *	Wake up sending tasks if we
366 			 *	upped the value.
367 			 */
368 			sk->sk_write_space(sk);
369 			break;
370 
371 		case SO_SNDBUFFORCE:
372 			if (!capable(CAP_NET_ADMIN)) {
373 				ret = -EPERM;
374 				break;
375 			}
376 			goto set_sndbuf;
377 
378 		case SO_RCVBUF:
379 			/* Don't error on this BSD doesn't and if you think
380 			   about it this is right. Otherwise apps have to
381 			   play 'guess the biggest size' games. RCVBUF/SNDBUF
382 			   are treated in BSD as hints */
383 
384 			if (val > sysctl_rmem_max)
385 				val = sysctl_rmem_max;
386 set_rcvbuf:
387 			sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
388 			/*
389 			 * We double it on the way in to account for
390 			 * "struct sk_buff" etc. overhead.   Applications
391 			 * assume that the SO_RCVBUF setting they make will
392 			 * allow that much actual data to be received on that
393 			 * socket.
394 			 *
395 			 * Applications are unaware that "struct sk_buff" and
396 			 * other overheads allocate from the receive buffer
397 			 * during socket buffer allocation.
398 			 *
399 			 * And after considering the possible alternatives,
400 			 * returning the value we actually used in getsockopt
401 			 * is the most desirable behavior.
402 			 */
403 			if ((val * 2) < SOCK_MIN_RCVBUF)
404 				sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
405 			else
406 				sk->sk_rcvbuf = val * 2;
407 			break;
408 
409 		case SO_RCVBUFFORCE:
410 			if (!capable(CAP_NET_ADMIN)) {
411 				ret = -EPERM;
412 				break;
413 			}
414 			goto set_rcvbuf;
415 
416 		case SO_KEEPALIVE:
417 #ifdef CONFIG_INET
418 			if (sk->sk_protocol == IPPROTO_TCP)
419 				tcp_set_keepalive(sk, valbool);
420 #endif
421 			sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
422 			break;
423 
424 	 	case SO_OOBINLINE:
425 			sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
426 			break;
427 
428 	 	case SO_NO_CHECK:
429 			sk->sk_no_check = valbool;
430 			break;
431 
432 		case SO_PRIORITY:
433 			if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
434 				sk->sk_priority = val;
435 			else
436 				ret = -EPERM;
437 			break;
438 
439 		case SO_LINGER:
440 			if(optlen<sizeof(ling)) {
441 				ret = -EINVAL;	/* 1003.1g */
442 				break;
443 			}
444 			if (copy_from_user(&ling,optval,sizeof(ling))) {
445 				ret = -EFAULT;
446 				break;
447 			}
448 			if (!ling.l_onoff)
449 				sock_reset_flag(sk, SOCK_LINGER);
450 			else {
451 #if (BITS_PER_LONG == 32)
452 				if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
453 					sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
454 				else
455 #endif
456 					sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
457 				sock_set_flag(sk, SOCK_LINGER);
458 			}
459 			break;
460 
461 		case SO_BSDCOMPAT:
462 			sock_warn_obsolete_bsdism("setsockopt");
463 			break;
464 
465 		case SO_PASSCRED:
466 			if (valbool)
467 				set_bit(SOCK_PASSCRED, &sock->flags);
468 			else
469 				clear_bit(SOCK_PASSCRED, &sock->flags);
470 			break;
471 
472 		case SO_TIMESTAMP:
473 			if (valbool)  {
474 				sock_set_flag(sk, SOCK_RCVTSTAMP);
475 				sock_enable_timestamp(sk);
476 			} else
477 				sock_reset_flag(sk, SOCK_RCVTSTAMP);
478 			break;
479 
480 		case SO_RCVLOWAT:
481 			if (val < 0)
482 				val = INT_MAX;
483 			sk->sk_rcvlowat = val ? : 1;
484 			break;
485 
486 		case SO_RCVTIMEO:
487 			ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
488 			break;
489 
490 		case SO_SNDTIMEO:
491 			ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
492 			break;
493 
494 #ifdef CONFIG_NETDEVICES
495 		case SO_BINDTODEVICE:
496 		{
497 			char devname[IFNAMSIZ];
498 
499 			/* Sorry... */
500 			if (!capable(CAP_NET_RAW)) {
501 				ret = -EPERM;
502 				break;
503 			}
504 
505 			/* Bind this socket to a particular device like "eth0",
506 			 * as specified in the passed interface name. If the
507 			 * name is "" or the option length is zero the socket
508 			 * is not bound.
509 			 */
510 
511 			if (!valbool) {
512 				sk->sk_bound_dev_if = 0;
513 			} else {
514 				if (optlen > IFNAMSIZ - 1)
515 					optlen = IFNAMSIZ - 1;
516 				memset(devname, 0, sizeof(devname));
517 				if (copy_from_user(devname, optval, optlen)) {
518 					ret = -EFAULT;
519 					break;
520 				}
521 
522 				/* Remove any cached route for this socket. */
523 				sk_dst_reset(sk);
524 
525 				if (devname[0] == '\0') {
526 					sk->sk_bound_dev_if = 0;
527 				} else {
528 					struct net_device *dev = dev_get_by_name(devname);
529 					if (!dev) {
530 						ret = -ENODEV;
531 						break;
532 					}
533 					sk->sk_bound_dev_if = dev->ifindex;
534 					dev_put(dev);
535 				}
536 			}
537 			break;
538 		}
539 #endif
540 
541 
542 		case SO_ATTACH_FILTER:
543 			ret = -EINVAL;
544 			if (optlen == sizeof(struct sock_fprog)) {
545 				struct sock_fprog fprog;
546 
547 				ret = -EFAULT;
548 				if (copy_from_user(&fprog, optval, sizeof(fprog)))
549 					break;
550 
551 				ret = sk_attach_filter(&fprog, sk);
552 			}
553 			break;
554 
555 		case SO_DETACH_FILTER:
556 			spin_lock_bh(&sk->sk_lock.slock);
557 			filter = sk->sk_filter;
558                         if (filter) {
559 				sk->sk_filter = NULL;
560 				spin_unlock_bh(&sk->sk_lock.slock);
561 				sk_filter_release(sk, filter);
562 				break;
563 			}
564 			spin_unlock_bh(&sk->sk_lock.slock);
565 			ret = -ENONET;
566 			break;
567 
568 		/* We implement the SO_SNDLOWAT etc to
569 		   not be settable (1003.1g 5.3) */
570 		default:
571 		  	ret = -ENOPROTOOPT;
572 			break;
573   	}
574 	release_sock(sk);
575 	return ret;
576 }
577 
578 
579 int sock_getsockopt(struct socket *sock, int level, int optname,
580 		    char __user *optval, int __user *optlen)
581 {
582 	struct sock *sk = sock->sk;
583 
584 	union
585 	{
586   		int val;
587   		struct linger ling;
588 		struct timeval tm;
589 	} v;
590 
591 	unsigned int lv = sizeof(int);
592 	int len;
593 
594   	if(get_user(len,optlen))
595   		return -EFAULT;
596 	if(len < 0)
597 		return -EINVAL;
598 
599   	switch(optname)
600   	{
601 		case SO_DEBUG:
602 			v.val = sock_flag(sk, SOCK_DBG);
603 			break;
604 
605 		case SO_DONTROUTE:
606 			v.val = sock_flag(sk, SOCK_LOCALROUTE);
607 			break;
608 
609 		case SO_BROADCAST:
610 			v.val = !!sock_flag(sk, SOCK_BROADCAST);
611 			break;
612 
613 		case SO_SNDBUF:
614 			v.val = sk->sk_sndbuf;
615 			break;
616 
617 		case SO_RCVBUF:
618 			v.val = sk->sk_rcvbuf;
619 			break;
620 
621 		case SO_REUSEADDR:
622 			v.val = sk->sk_reuse;
623 			break;
624 
625 		case SO_KEEPALIVE:
626 			v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
627 			break;
628 
629 		case SO_TYPE:
630 			v.val = sk->sk_type;
631 			break;
632 
633 		case SO_ERROR:
634 			v.val = -sock_error(sk);
635 			if(v.val==0)
636 				v.val = xchg(&sk->sk_err_soft, 0);
637 			break;
638 
639 		case SO_OOBINLINE:
640 			v.val = !!sock_flag(sk, SOCK_URGINLINE);
641 			break;
642 
643 		case SO_NO_CHECK:
644 			v.val = sk->sk_no_check;
645 			break;
646 
647 		case SO_PRIORITY:
648 			v.val = sk->sk_priority;
649 			break;
650 
651 		case SO_LINGER:
652 			lv		= sizeof(v.ling);
653 			v.ling.l_onoff	= !!sock_flag(sk, SOCK_LINGER);
654  			v.ling.l_linger	= sk->sk_lingertime / HZ;
655 			break;
656 
657 		case SO_BSDCOMPAT:
658 			sock_warn_obsolete_bsdism("getsockopt");
659 			break;
660 
661 		case SO_TIMESTAMP:
662 			v.val = sock_flag(sk, SOCK_RCVTSTAMP);
663 			break;
664 
665 		case SO_RCVTIMEO:
666 			lv=sizeof(struct timeval);
667 			if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
668 				v.tm.tv_sec = 0;
669 				v.tm.tv_usec = 0;
670 			} else {
671 				v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
672 				v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
673 			}
674 			break;
675 
676 		case SO_SNDTIMEO:
677 			lv=sizeof(struct timeval);
678 			if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
679 				v.tm.tv_sec = 0;
680 				v.tm.tv_usec = 0;
681 			} else {
682 				v.tm.tv_sec = sk->sk_sndtimeo / HZ;
683 				v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
684 			}
685 			break;
686 
687 		case SO_RCVLOWAT:
688 			v.val = sk->sk_rcvlowat;
689 			break;
690 
691 		case SO_SNDLOWAT:
692 			v.val=1;
693 			break;
694 
695 		case SO_PASSCRED:
696 			v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
697 			break;
698 
699 		case SO_PEERCRED:
700 			if (len > sizeof(sk->sk_peercred))
701 				len = sizeof(sk->sk_peercred);
702 			if (copy_to_user(optval, &sk->sk_peercred, len))
703 				return -EFAULT;
704 			goto lenout;
705 
706 		case SO_PEERNAME:
707 		{
708 			char address[128];
709 
710 			if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
711 				return -ENOTCONN;
712 			if (lv < len)
713 				return -EINVAL;
714 			if (copy_to_user(optval, address, len))
715 				return -EFAULT;
716 			goto lenout;
717 		}
718 
719 		/* Dubious BSD thing... Probably nobody even uses it, but
720 		 * the UNIX standard wants it for whatever reason... -DaveM
721 		 */
722 		case SO_ACCEPTCONN:
723 			v.val = sk->sk_state == TCP_LISTEN;
724 			break;
725 
726 		case SO_PEERSEC:
727 			return security_socket_getpeersec_stream(sock, optval, optlen, len);
728 
729 		default:
730 			return(-ENOPROTOOPT);
731 	}
732 	if (len > lv)
733 		len = lv;
734 	if (copy_to_user(optval, &v, len))
735 		return -EFAULT;
736 lenout:
737   	if (put_user(len, optlen))
738   		return -EFAULT;
739   	return 0;
740 }
741 
742 /**
743  *	sk_alloc - All socket objects are allocated here
744  *	@family: protocol family
745  *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
746  *	@prot: struct proto associated with this new sock instance
747  *	@zero_it: if we should zero the newly allocated sock
748  */
749 struct sock *sk_alloc(int family, gfp_t priority,
750 		      struct proto *prot, int zero_it)
751 {
752 	struct sock *sk = NULL;
753 	kmem_cache_t *slab = prot->slab;
754 
755 	if (slab != NULL)
756 		sk = kmem_cache_alloc(slab, priority);
757 	else
758 		sk = kmalloc(prot->obj_size, priority);
759 
760 	if (sk) {
761 		if (zero_it) {
762 			memset(sk, 0, prot->obj_size);
763 			sk->sk_family = family;
764 			/*
765 			 * See comment in struct sock definition to understand
766 			 * why we need sk_prot_creator -acme
767 			 */
768 			sk->sk_prot = sk->sk_prot_creator = prot;
769 			sock_lock_init(sk);
770 		}
771 
772 		if (security_sk_alloc(sk, family, priority))
773 			goto out_free;
774 
775 		if (!try_module_get(prot->owner))
776 			goto out_free;
777 	}
778 	return sk;
779 
780 out_free:
781 	if (slab != NULL)
782 		kmem_cache_free(slab, sk);
783 	else
784 		kfree(sk);
785 	return NULL;
786 }
787 
788 void sk_free(struct sock *sk)
789 {
790 	struct sk_filter *filter;
791 	struct module *owner = sk->sk_prot_creator->owner;
792 
793 	if (sk->sk_destruct)
794 		sk->sk_destruct(sk);
795 
796 	filter = sk->sk_filter;
797 	if (filter) {
798 		sk_filter_release(sk, filter);
799 		sk->sk_filter = NULL;
800 	}
801 
802 	sock_disable_timestamp(sk);
803 
804 	if (atomic_read(&sk->sk_omem_alloc))
805 		printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
806 		       __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
807 
808 	security_sk_free(sk);
809 	if (sk->sk_prot_creator->slab != NULL)
810 		kmem_cache_free(sk->sk_prot_creator->slab, sk);
811 	else
812 		kfree(sk);
813 	module_put(owner);
814 }
815 
816 struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
817 {
818 	struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);
819 
820 	if (newsk != NULL) {
821 		struct sk_filter *filter;
822 
823 		memcpy(newsk, sk, sk->sk_prot->obj_size);
824 
825 		/* SANITY */
826 		sk_node_init(&newsk->sk_node);
827 		sock_lock_init(newsk);
828 		bh_lock_sock(newsk);
829 
830 		atomic_set(&newsk->sk_rmem_alloc, 0);
831 		atomic_set(&newsk->sk_wmem_alloc, 0);
832 		atomic_set(&newsk->sk_omem_alloc, 0);
833 		skb_queue_head_init(&newsk->sk_receive_queue);
834 		skb_queue_head_init(&newsk->sk_write_queue);
835 
836 		rwlock_init(&newsk->sk_dst_lock);
837 		rwlock_init(&newsk->sk_callback_lock);
838 
839 		newsk->sk_dst_cache	= NULL;
840 		newsk->sk_wmem_queued	= 0;
841 		newsk->sk_forward_alloc = 0;
842 		newsk->sk_send_head	= NULL;
843 		newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
844 		newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
845 
846 		sock_reset_flag(newsk, SOCK_DONE);
847 		skb_queue_head_init(&newsk->sk_error_queue);
848 
849 		filter = newsk->sk_filter;
850 		if (filter != NULL)
851 			sk_filter_charge(newsk, filter);
852 
853 		if (unlikely(xfrm_sk_clone_policy(newsk))) {
854 			/* It is still raw copy of parent, so invalidate
855 			 * destructor and make plain sk_free() */
856 			newsk->sk_destruct = NULL;
857 			sk_free(newsk);
858 			newsk = NULL;
859 			goto out;
860 		}
861 
862 		newsk->sk_err	   = 0;
863 		newsk->sk_priority = 0;
864 		atomic_set(&newsk->sk_refcnt, 2);
865 
866 		/*
867 		 * Increment the counter in the same struct proto as the master
868 		 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
869 		 * is the same as sk->sk_prot->socks, as this field was copied
870 		 * with memcpy).
871 		 *
872 		 * This _changes_ the previous behaviour, where
873 		 * tcp_create_openreq_child always was incrementing the
874 		 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
875 		 * to be taken into account in all callers. -acme
876 		 */
877 		sk_refcnt_debug_inc(newsk);
878 		newsk->sk_socket = NULL;
879 		newsk->sk_sleep	 = NULL;
880 
881 		if (newsk->sk_prot->sockets_allocated)
882 			atomic_inc(newsk->sk_prot->sockets_allocated);
883 	}
884 out:
885 	return newsk;
886 }
887 
888 EXPORT_SYMBOL_GPL(sk_clone);
889 
890 void __init sk_init(void)
891 {
892 	if (num_physpages <= 4096) {
893 		sysctl_wmem_max = 32767;
894 		sysctl_rmem_max = 32767;
895 		sysctl_wmem_default = 32767;
896 		sysctl_rmem_default = 32767;
897 	} else if (num_physpages >= 131072) {
898 		sysctl_wmem_max = 131071;
899 		sysctl_rmem_max = 131071;
900 	}
901 }
902 
903 /*
904  *	Simple resource managers for sockets.
905  */
906 
907 
908 /*
909  * Write buffer destructor automatically called from kfree_skb.
910  */
911 void sock_wfree(struct sk_buff *skb)
912 {
913 	struct sock *sk = skb->sk;
914 
915 	/* In case it might be waiting for more memory. */
916 	atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
917 	if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
918 		sk->sk_write_space(sk);
919 	sock_put(sk);
920 }
921 
922 /*
923  * Read buffer destructor automatically called from kfree_skb.
924  */
925 void sock_rfree(struct sk_buff *skb)
926 {
927 	struct sock *sk = skb->sk;
928 
929 	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
930 }
931 
932 
933 int sock_i_uid(struct sock *sk)
934 {
935 	int uid;
936 
937 	read_lock(&sk->sk_callback_lock);
938 	uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
939 	read_unlock(&sk->sk_callback_lock);
940 	return uid;
941 }
942 
943 unsigned long sock_i_ino(struct sock *sk)
944 {
945 	unsigned long ino;
946 
947 	read_lock(&sk->sk_callback_lock);
948 	ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
949 	read_unlock(&sk->sk_callback_lock);
950 	return ino;
951 }
952 
953 /*
954  * Allocate a skb from the socket's send buffer.
955  */
956 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
957 			     gfp_t priority)
958 {
959 	if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
960 		struct sk_buff * skb = alloc_skb(size, priority);
961 		if (skb) {
962 			skb_set_owner_w(skb, sk);
963 			return skb;
964 		}
965 	}
966 	return NULL;
967 }
968 
969 /*
970  * Allocate a skb from the socket's receive buffer.
971  */
972 struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
973 			     gfp_t priority)
974 {
975 	if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
976 		struct sk_buff *skb = alloc_skb(size, priority);
977 		if (skb) {
978 			skb_set_owner_r(skb, sk);
979 			return skb;
980 		}
981 	}
982 	return NULL;
983 }
984 
985 /*
986  * Allocate a memory block from the socket's option memory buffer.
987  */
988 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
989 {
990 	if ((unsigned)size <= sysctl_optmem_max &&
991 	    atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
992 		void *mem;
993 		/* First do the add, to avoid the race if kmalloc
994  		 * might sleep.
995 		 */
996 		atomic_add(size, &sk->sk_omem_alloc);
997 		mem = kmalloc(size, priority);
998 		if (mem)
999 			return mem;
1000 		atomic_sub(size, &sk->sk_omem_alloc);
1001 	}
1002 	return NULL;
1003 }
1004 
1005 /*
1006  * Free an option memory block.
1007  */
1008 void sock_kfree_s(struct sock *sk, void *mem, int size)
1009 {
1010 	kfree(mem);
1011 	atomic_sub(size, &sk->sk_omem_alloc);
1012 }
1013 
1014 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1015    I think, these locks should be removed for datagram sockets.
1016  */
1017 static long sock_wait_for_wmem(struct sock * sk, long timeo)
1018 {
1019 	DEFINE_WAIT(wait);
1020 
1021 	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1022 	for (;;) {
1023 		if (!timeo)
1024 			break;
1025 		if (signal_pending(current))
1026 			break;
1027 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1028 		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1029 		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1030 			break;
1031 		if (sk->sk_shutdown & SEND_SHUTDOWN)
1032 			break;
1033 		if (sk->sk_err)
1034 			break;
1035 		timeo = schedule_timeout(timeo);
1036 	}
1037 	finish_wait(sk->sk_sleep, &wait);
1038 	return timeo;
1039 }
1040 
1041 
1042 /*
1043  *	Generic send/receive buffer handlers
1044  */
1045 
1046 static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
1047 					    unsigned long header_len,
1048 					    unsigned long data_len,
1049 					    int noblock, int *errcode)
1050 {
1051 	struct sk_buff *skb;
1052 	gfp_t gfp_mask;
1053 	long timeo;
1054 	int err;
1055 
1056 	gfp_mask = sk->sk_allocation;
1057 	if (gfp_mask & __GFP_WAIT)
1058 		gfp_mask |= __GFP_REPEAT;
1059 
1060 	timeo = sock_sndtimeo(sk, noblock);
1061 	while (1) {
1062 		err = sock_error(sk);
1063 		if (err != 0)
1064 			goto failure;
1065 
1066 		err = -EPIPE;
1067 		if (sk->sk_shutdown & SEND_SHUTDOWN)
1068 			goto failure;
1069 
1070 		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1071 			skb = alloc_skb(header_len, sk->sk_allocation);
1072 			if (skb) {
1073 				int npages;
1074 				int i;
1075 
1076 				/* No pages, we're done... */
1077 				if (!data_len)
1078 					break;
1079 
1080 				npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1081 				skb->truesize += data_len;
1082 				skb_shinfo(skb)->nr_frags = npages;
1083 				for (i = 0; i < npages; i++) {
1084 					struct page *page;
1085 					skb_frag_t *frag;
1086 
1087 					page = alloc_pages(sk->sk_allocation, 0);
1088 					if (!page) {
1089 						err = -ENOBUFS;
1090 						skb_shinfo(skb)->nr_frags = i;
1091 						kfree_skb(skb);
1092 						goto failure;
1093 					}
1094 
1095 					frag = &skb_shinfo(skb)->frags[i];
1096 					frag->page = page;
1097 					frag->page_offset = 0;
1098 					frag->size = (data_len >= PAGE_SIZE ?
1099 						      PAGE_SIZE :
1100 						      data_len);
1101 					data_len -= PAGE_SIZE;
1102 				}
1103 
1104 				/* Full success... */
1105 				break;
1106 			}
1107 			err = -ENOBUFS;
1108 			goto failure;
1109 		}
1110 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1111 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1112 		err = -EAGAIN;
1113 		if (!timeo)
1114 			goto failure;
1115 		if (signal_pending(current))
1116 			goto interrupted;
1117 		timeo = sock_wait_for_wmem(sk, timeo);
1118 	}
1119 
1120 	skb_set_owner_w(skb, sk);
1121 	return skb;
1122 
1123 interrupted:
1124 	err = sock_intr_errno(timeo);
1125 failure:
1126 	*errcode = err;
1127 	return NULL;
1128 }
1129 
1130 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1131 				    int noblock, int *errcode)
1132 {
1133 	return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1134 }
1135 
1136 static void __lock_sock(struct sock *sk)
1137 {
1138 	DEFINE_WAIT(wait);
1139 
1140 	for(;;) {
1141 		prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1142 					TASK_UNINTERRUPTIBLE);
1143 		spin_unlock_bh(&sk->sk_lock.slock);
1144 		schedule();
1145 		spin_lock_bh(&sk->sk_lock.slock);
1146 		if(!sock_owned_by_user(sk))
1147 			break;
1148 	}
1149 	finish_wait(&sk->sk_lock.wq, &wait);
1150 }
1151 
1152 static void __release_sock(struct sock *sk)
1153 {
1154 	struct sk_buff *skb = sk->sk_backlog.head;
1155 
1156 	do {
1157 		sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1158 		bh_unlock_sock(sk);
1159 
1160 		do {
1161 			struct sk_buff *next = skb->next;
1162 
1163 			skb->next = NULL;
1164 			sk->sk_backlog_rcv(sk, skb);
1165 
1166 			/*
1167 			 * We are in process context here with softirqs
1168 			 * disabled, use cond_resched_softirq() to preempt.
1169 			 * This is safe to do because we've taken the backlog
1170 			 * queue private:
1171 			 */
1172 			cond_resched_softirq();
1173 
1174 			skb = next;
1175 		} while (skb != NULL);
1176 
1177 		bh_lock_sock(sk);
1178 	} while((skb = sk->sk_backlog.head) != NULL);
1179 }
1180 
1181 /**
1182  * sk_wait_data - wait for data to arrive at sk_receive_queue
1183  * @sk:    sock to wait on
1184  * @timeo: for how long
1185  *
1186  * Now socket state including sk->sk_err is changed only under lock,
1187  * hence we may omit checks after joining wait queue.
1188  * We check receive queue before schedule() only as optimization;
1189  * it is very likely that release_sock() added new data.
1190  */
1191 int sk_wait_data(struct sock *sk, long *timeo)
1192 {
1193 	int rc;
1194 	DEFINE_WAIT(wait);
1195 
1196 	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1197 	set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1198 	rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1199 	clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1200 	finish_wait(sk->sk_sleep, &wait);
1201 	return rc;
1202 }
1203 
1204 EXPORT_SYMBOL(sk_wait_data);
1205 
1206 /*
1207  * Set of default routines for initialising struct proto_ops when
1208  * the protocol does not support a particular function. In certain
1209  * cases where it makes no sense for a protocol to have a "do nothing"
1210  * function, some default processing is provided.
1211  */
1212 
1213 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1214 {
1215 	return -EOPNOTSUPP;
1216 }
1217 
1218 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1219 		    int len, int flags)
1220 {
1221 	return -EOPNOTSUPP;
1222 }
1223 
1224 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1225 {
1226 	return -EOPNOTSUPP;
1227 }
1228 
1229 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1230 {
1231 	return -EOPNOTSUPP;
1232 }
1233 
1234 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1235 		    int *len, int peer)
1236 {
1237 	return -EOPNOTSUPP;
1238 }
1239 
1240 unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
1241 {
1242 	return 0;
1243 }
1244 
1245 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1246 {
1247 	return -EOPNOTSUPP;
1248 }
1249 
1250 int sock_no_listen(struct socket *sock, int backlog)
1251 {
1252 	return -EOPNOTSUPP;
1253 }
1254 
1255 int sock_no_shutdown(struct socket *sock, int how)
1256 {
1257 	return -EOPNOTSUPP;
1258 }
1259 
1260 int sock_no_setsockopt(struct socket *sock, int level, int optname,
1261 		    char __user *optval, int optlen)
1262 {
1263 	return -EOPNOTSUPP;
1264 }
1265 
1266 int sock_no_getsockopt(struct socket *sock, int level, int optname,
1267 		    char __user *optval, int __user *optlen)
1268 {
1269 	return -EOPNOTSUPP;
1270 }
1271 
1272 int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1273 		    size_t len)
1274 {
1275 	return -EOPNOTSUPP;
1276 }
1277 
1278 int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1279 		    size_t len, int flags)
1280 {
1281 	return -EOPNOTSUPP;
1282 }
1283 
1284 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1285 {
1286 	/* Mirror missing mmap method error code */
1287 	return -ENODEV;
1288 }
1289 
1290 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1291 {
1292 	ssize_t res;
1293 	struct msghdr msg = {.msg_flags = flags};
1294 	struct kvec iov;
1295 	char *kaddr = kmap(page);
1296 	iov.iov_base = kaddr + offset;
1297 	iov.iov_len = size;
1298 	res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1299 	kunmap(page);
1300 	return res;
1301 }
1302 
1303 /*
1304  *	Default Socket Callbacks
1305  */
1306 
1307 static void sock_def_wakeup(struct sock *sk)
1308 {
1309 	read_lock(&sk->sk_callback_lock);
1310 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1311 		wake_up_interruptible_all(sk->sk_sleep);
1312 	read_unlock(&sk->sk_callback_lock);
1313 }
1314 
1315 static void sock_def_error_report(struct sock *sk)
1316 {
1317 	read_lock(&sk->sk_callback_lock);
1318 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1319 		wake_up_interruptible(sk->sk_sleep);
1320 	sk_wake_async(sk,0,POLL_ERR);
1321 	read_unlock(&sk->sk_callback_lock);
1322 }
1323 
1324 static void sock_def_readable(struct sock *sk, int len)
1325 {
1326 	read_lock(&sk->sk_callback_lock);
1327 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1328 		wake_up_interruptible(sk->sk_sleep);
1329 	sk_wake_async(sk,1,POLL_IN);
1330 	read_unlock(&sk->sk_callback_lock);
1331 }
1332 
1333 static void sock_def_write_space(struct sock *sk)
1334 {
1335 	read_lock(&sk->sk_callback_lock);
1336 
1337 	/* Do not wake up a writer until he can make "significant"
1338 	 * progress.  --DaveM
1339 	 */
1340 	if((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1341 		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1342 			wake_up_interruptible(sk->sk_sleep);
1343 
1344 		/* Should agree with poll, otherwise some programs break */
1345 		if (sock_writeable(sk))
1346 			sk_wake_async(sk, 2, POLL_OUT);
1347 	}
1348 
1349 	read_unlock(&sk->sk_callback_lock);
1350 }
1351 
1352 static void sock_def_destruct(struct sock *sk)
1353 {
1354 	kfree(sk->sk_protinfo);
1355 }
1356 
1357 void sk_send_sigurg(struct sock *sk)
1358 {
1359 	if (sk->sk_socket && sk->sk_socket->file)
1360 		if (send_sigurg(&sk->sk_socket->file->f_owner))
1361 			sk_wake_async(sk, 3, POLL_PRI);
1362 }
1363 
1364 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1365 		    unsigned long expires)
1366 {
1367 	if (!mod_timer(timer, expires))
1368 		sock_hold(sk);
1369 }
1370 
1371 EXPORT_SYMBOL(sk_reset_timer);
1372 
1373 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1374 {
1375 	if (timer_pending(timer) && del_timer(timer))
1376 		__sock_put(sk);
1377 }
1378 
1379 EXPORT_SYMBOL(sk_stop_timer);
1380 
1381 void sock_init_data(struct socket *sock, struct sock *sk)
1382 {
1383 	skb_queue_head_init(&sk->sk_receive_queue);
1384 	skb_queue_head_init(&sk->sk_write_queue);
1385 	skb_queue_head_init(&sk->sk_error_queue);
1386 
1387 	sk->sk_send_head	=	NULL;
1388 
1389 	init_timer(&sk->sk_timer);
1390 
1391 	sk->sk_allocation	=	GFP_KERNEL;
1392 	sk->sk_rcvbuf		=	sysctl_rmem_default;
1393 	sk->sk_sndbuf		=	sysctl_wmem_default;
1394 	sk->sk_state		=	TCP_CLOSE;
1395 	sk->sk_socket		=	sock;
1396 
1397 	sock_set_flag(sk, SOCK_ZAPPED);
1398 
1399 	if(sock)
1400 	{
1401 		sk->sk_type	=	sock->type;
1402 		sk->sk_sleep	=	&sock->wait;
1403 		sock->sk	=	sk;
1404 	} else
1405 		sk->sk_sleep	=	NULL;
1406 
1407 	rwlock_init(&sk->sk_dst_lock);
1408 	rwlock_init(&sk->sk_callback_lock);
1409 
1410 	sk->sk_state_change	=	sock_def_wakeup;
1411 	sk->sk_data_ready	=	sock_def_readable;
1412 	sk->sk_write_space	=	sock_def_write_space;
1413 	sk->sk_error_report	=	sock_def_error_report;
1414 	sk->sk_destruct		=	sock_def_destruct;
1415 
1416 	sk->sk_sndmsg_page	=	NULL;
1417 	sk->sk_sndmsg_off	=	0;
1418 
1419 	sk->sk_peercred.pid 	=	0;
1420 	sk->sk_peercred.uid	=	-1;
1421 	sk->sk_peercred.gid	=	-1;
1422 	sk->sk_write_pending	=	0;
1423 	sk->sk_rcvlowat		=	1;
1424 	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
1425 	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
1426 
1427 	sk->sk_stamp.tv_sec     = -1L;
1428 	sk->sk_stamp.tv_usec    = -1L;
1429 
1430 	atomic_set(&sk->sk_refcnt, 1);
1431 }
1432 
1433 void fastcall lock_sock(struct sock *sk)
1434 {
1435 	might_sleep();
1436 	spin_lock_bh(&(sk->sk_lock.slock));
1437 	if (sk->sk_lock.owner)
1438 		__lock_sock(sk);
1439 	sk->sk_lock.owner = (void *)1;
1440 	spin_unlock_bh(&(sk->sk_lock.slock));
1441 }
1442 
1443 EXPORT_SYMBOL(lock_sock);
1444 
1445 void fastcall release_sock(struct sock *sk)
1446 {
1447 	spin_lock_bh(&(sk->sk_lock.slock));
1448 	if (sk->sk_backlog.tail)
1449 		__release_sock(sk);
1450 	sk->sk_lock.owner = NULL;
1451         if (waitqueue_active(&(sk->sk_lock.wq)))
1452 		wake_up(&(sk->sk_lock.wq));
1453 	spin_unlock_bh(&(sk->sk_lock.slock));
1454 }
1455 EXPORT_SYMBOL(release_sock);
1456 
1457 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1458 {
1459 	if (!sock_flag(sk, SOCK_TIMESTAMP))
1460 		sock_enable_timestamp(sk);
1461 	if (sk->sk_stamp.tv_sec == -1)
1462 		return -ENOENT;
1463 	if (sk->sk_stamp.tv_sec == 0)
1464 		do_gettimeofday(&sk->sk_stamp);
1465 	return copy_to_user(userstamp, &sk->sk_stamp, sizeof(struct timeval)) ?
1466 		-EFAULT : 0;
1467 }
1468 EXPORT_SYMBOL(sock_get_timestamp);
1469 
1470 void sock_enable_timestamp(struct sock *sk)
1471 {
1472 	if (!sock_flag(sk, SOCK_TIMESTAMP)) {
1473 		sock_set_flag(sk, SOCK_TIMESTAMP);
1474 		net_enable_timestamp();
1475 	}
1476 }
1477 EXPORT_SYMBOL(sock_enable_timestamp);
1478 
1479 /*
1480  *	Get a socket option on an socket.
1481  *
1482  *	FIX: POSIX 1003.1g is very ambiguous here. It states that
1483  *	asynchronous errors should be reported by getsockopt. We assume
1484  *	this means if you specify SO_ERROR (otherwise whats the point of it).
1485  */
1486 int sock_common_getsockopt(struct socket *sock, int level, int optname,
1487 			   char __user *optval, int __user *optlen)
1488 {
1489 	struct sock *sk = sock->sk;
1490 
1491 	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1492 }
1493 
1494 EXPORT_SYMBOL(sock_common_getsockopt);
1495 
1496 #ifdef CONFIG_COMPAT
1497 int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
1498 				  char __user *optval, int __user *optlen)
1499 {
1500 	struct sock *sk = sock->sk;
1501 
1502 	if (sk->sk_prot->compat_setsockopt != NULL)
1503 		return sk->sk_prot->compat_getsockopt(sk, level, optname,
1504 						      optval, optlen);
1505 	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1506 }
1507 EXPORT_SYMBOL(compat_sock_common_getsockopt);
1508 #endif
1509 
1510 int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
1511 			struct msghdr *msg, size_t size, int flags)
1512 {
1513 	struct sock *sk = sock->sk;
1514 	int addr_len = 0;
1515 	int err;
1516 
1517 	err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
1518 				   flags & ~MSG_DONTWAIT, &addr_len);
1519 	if (err >= 0)
1520 		msg->msg_namelen = addr_len;
1521 	return err;
1522 }
1523 
1524 EXPORT_SYMBOL(sock_common_recvmsg);
1525 
1526 /*
1527  *	Set socket options on an inet socket.
1528  */
1529 int sock_common_setsockopt(struct socket *sock, int level, int optname,
1530 			   char __user *optval, int optlen)
1531 {
1532 	struct sock *sk = sock->sk;
1533 
1534 	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1535 }
1536 
1537 EXPORT_SYMBOL(sock_common_setsockopt);
1538 
1539 #ifdef CONFIG_COMPAT
1540 int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
1541 				  char __user *optval, int optlen)
1542 {
1543 	struct sock *sk = sock->sk;
1544 
1545 	if (sk->sk_prot->compat_setsockopt != NULL)
1546 		return sk->sk_prot->compat_setsockopt(sk, level, optname,
1547 						      optval, optlen);
1548 	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1549 }
1550 EXPORT_SYMBOL(compat_sock_common_setsockopt);
1551 #endif
1552 
1553 void sk_common_release(struct sock *sk)
1554 {
1555 	if (sk->sk_prot->destroy)
1556 		sk->sk_prot->destroy(sk);
1557 
1558 	/*
1559 	 * Observation: when sock_common_release is called, processes have
1560 	 * no access to socket. But net still has.
1561 	 * Step one, detach it from networking:
1562 	 *
1563 	 * A. Remove from hash tables.
1564 	 */
1565 
1566 	sk->sk_prot->unhash(sk);
1567 
1568 	/*
1569 	 * In this point socket cannot receive new packets, but it is possible
1570 	 * that some packets are in flight because some CPU runs receiver and
1571 	 * did hash table lookup before we unhashed socket. They will achieve
1572 	 * receive queue and will be purged by socket destructor.
1573 	 *
1574 	 * Also we still have packets pending on receive queue and probably,
1575 	 * our own packets waiting in device queues. sock_destroy will drain
1576 	 * receive queue, but transmitted packets will delay socket destruction
1577 	 * until the last reference will be released.
1578 	 */
1579 
1580 	sock_orphan(sk);
1581 
1582 	xfrm_sk_free_policy(sk);
1583 
1584 	sk_refcnt_debug_release(sk);
1585 	sock_put(sk);
1586 }
1587 
1588 EXPORT_SYMBOL(sk_common_release);
1589 
1590 static DEFINE_RWLOCK(proto_list_lock);
1591 static LIST_HEAD(proto_list);
1592 
1593 int proto_register(struct proto *prot, int alloc_slab)
1594 {
1595 	char *request_sock_slab_name = NULL;
1596 	char *timewait_sock_slab_name;
1597 	int rc = -ENOBUFS;
1598 
1599 	if (alloc_slab) {
1600 		prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
1601 					       SLAB_HWCACHE_ALIGN, NULL, NULL);
1602 
1603 		if (prot->slab == NULL) {
1604 			printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
1605 			       prot->name);
1606 			goto out;
1607 		}
1608 
1609 		if (prot->rsk_prot != NULL) {
1610 			static const char mask[] = "request_sock_%s";
1611 
1612 			request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1613 			if (request_sock_slab_name == NULL)
1614 				goto out_free_sock_slab;
1615 
1616 			sprintf(request_sock_slab_name, mask, prot->name);
1617 			prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
1618 								 prot->rsk_prot->obj_size, 0,
1619 								 SLAB_HWCACHE_ALIGN, NULL, NULL);
1620 
1621 			if (prot->rsk_prot->slab == NULL) {
1622 				printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
1623 				       prot->name);
1624 				goto out_free_request_sock_slab_name;
1625 			}
1626 		}
1627 
1628 		if (prot->twsk_prot != NULL) {
1629 			static const char mask[] = "tw_sock_%s";
1630 
1631 			timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1632 
1633 			if (timewait_sock_slab_name == NULL)
1634 				goto out_free_request_sock_slab;
1635 
1636 			sprintf(timewait_sock_slab_name, mask, prot->name);
1637 			prot->twsk_prot->twsk_slab =
1638 				kmem_cache_create(timewait_sock_slab_name,
1639 						  prot->twsk_prot->twsk_obj_size,
1640 						  0, SLAB_HWCACHE_ALIGN,
1641 						  NULL, NULL);
1642 			if (prot->twsk_prot->twsk_slab == NULL)
1643 				goto out_free_timewait_sock_slab_name;
1644 		}
1645 	}
1646 
1647 	write_lock(&proto_list_lock);
1648 	list_add(&prot->node, &proto_list);
1649 	write_unlock(&proto_list_lock);
1650 	rc = 0;
1651 out:
1652 	return rc;
1653 out_free_timewait_sock_slab_name:
1654 	kfree(timewait_sock_slab_name);
1655 out_free_request_sock_slab:
1656 	if (prot->rsk_prot && prot->rsk_prot->slab) {
1657 		kmem_cache_destroy(prot->rsk_prot->slab);
1658 		prot->rsk_prot->slab = NULL;
1659 	}
1660 out_free_request_sock_slab_name:
1661 	kfree(request_sock_slab_name);
1662 out_free_sock_slab:
1663 	kmem_cache_destroy(prot->slab);
1664 	prot->slab = NULL;
1665 	goto out;
1666 }
1667 
1668 EXPORT_SYMBOL(proto_register);
1669 
1670 void proto_unregister(struct proto *prot)
1671 {
1672 	write_lock(&proto_list_lock);
1673 	list_del(&prot->node);
1674 	write_unlock(&proto_list_lock);
1675 
1676 	if (prot->slab != NULL) {
1677 		kmem_cache_destroy(prot->slab);
1678 		prot->slab = NULL;
1679 	}
1680 
1681 	if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
1682 		const char *name = kmem_cache_name(prot->rsk_prot->slab);
1683 
1684 		kmem_cache_destroy(prot->rsk_prot->slab);
1685 		kfree(name);
1686 		prot->rsk_prot->slab = NULL;
1687 	}
1688 
1689 	if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
1690 		const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab);
1691 
1692 		kmem_cache_destroy(prot->twsk_prot->twsk_slab);
1693 		kfree(name);
1694 		prot->twsk_prot->twsk_slab = NULL;
1695 	}
1696 }
1697 
1698 EXPORT_SYMBOL(proto_unregister);
1699 
1700 #ifdef CONFIG_PROC_FS
1701 static inline struct proto *__proto_head(void)
1702 {
1703 	return list_entry(proto_list.next, struct proto, node);
1704 }
1705 
1706 static inline struct proto *proto_head(void)
1707 {
1708 	return list_empty(&proto_list) ? NULL : __proto_head();
1709 }
1710 
1711 static inline struct proto *proto_next(struct proto *proto)
1712 {
1713 	return proto->node.next == &proto_list ? NULL :
1714 		list_entry(proto->node.next, struct proto, node);
1715 }
1716 
1717 static inline struct proto *proto_get_idx(loff_t pos)
1718 {
1719 	struct proto *proto;
1720 	loff_t i = 0;
1721 
1722 	list_for_each_entry(proto, &proto_list, node)
1723 		if (i++ == pos)
1724 			goto out;
1725 
1726 	proto = NULL;
1727 out:
1728 	return proto;
1729 }
1730 
1731 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
1732 {
1733 	read_lock(&proto_list_lock);
1734 	return *pos ? proto_get_idx(*pos - 1) : SEQ_START_TOKEN;
1735 }
1736 
1737 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1738 {
1739 	++*pos;
1740 	return v == SEQ_START_TOKEN ? proto_head() : proto_next(v);
1741 }
1742 
1743 static void proto_seq_stop(struct seq_file *seq, void *v)
1744 {
1745 	read_unlock(&proto_list_lock);
1746 }
1747 
1748 static char proto_method_implemented(const void *method)
1749 {
1750 	return method == NULL ? 'n' : 'y';
1751 }
1752 
1753 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
1754 {
1755 	seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
1756 			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
1757 		   proto->name,
1758 		   proto->obj_size,
1759 		   proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
1760 		   proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
1761 		   proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
1762 		   proto->max_header,
1763 		   proto->slab == NULL ? "no" : "yes",
1764 		   module_name(proto->owner),
1765 		   proto_method_implemented(proto->close),
1766 		   proto_method_implemented(proto->connect),
1767 		   proto_method_implemented(proto->disconnect),
1768 		   proto_method_implemented(proto->accept),
1769 		   proto_method_implemented(proto->ioctl),
1770 		   proto_method_implemented(proto->init),
1771 		   proto_method_implemented(proto->destroy),
1772 		   proto_method_implemented(proto->shutdown),
1773 		   proto_method_implemented(proto->setsockopt),
1774 		   proto_method_implemented(proto->getsockopt),
1775 		   proto_method_implemented(proto->sendmsg),
1776 		   proto_method_implemented(proto->recvmsg),
1777 		   proto_method_implemented(proto->sendpage),
1778 		   proto_method_implemented(proto->bind),
1779 		   proto_method_implemented(proto->backlog_rcv),
1780 		   proto_method_implemented(proto->hash),
1781 		   proto_method_implemented(proto->unhash),
1782 		   proto_method_implemented(proto->get_port),
1783 		   proto_method_implemented(proto->enter_memory_pressure));
1784 }
1785 
1786 static int proto_seq_show(struct seq_file *seq, void *v)
1787 {
1788 	if (v == SEQ_START_TOKEN)
1789 		seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
1790 			   "protocol",
1791 			   "size",
1792 			   "sockets",
1793 			   "memory",
1794 			   "press",
1795 			   "maxhdr",
1796 			   "slab",
1797 			   "module",
1798 			   "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
1799 	else
1800 		proto_seq_printf(seq, v);
1801 	return 0;
1802 }
1803 
1804 static struct seq_operations proto_seq_ops = {
1805 	.start  = proto_seq_start,
1806 	.next   = proto_seq_next,
1807 	.stop   = proto_seq_stop,
1808 	.show   = proto_seq_show,
1809 };
1810 
1811 static int proto_seq_open(struct inode *inode, struct file *file)
1812 {
1813 	return seq_open(file, &proto_seq_ops);
1814 }
1815 
1816 static struct file_operations proto_seq_fops = {
1817 	.owner		= THIS_MODULE,
1818 	.open		= proto_seq_open,
1819 	.read		= seq_read,
1820 	.llseek		= seq_lseek,
1821 	.release	= seq_release,
1822 };
1823 
1824 static int __init proto_init(void)
1825 {
1826 	/* register /proc/net/protocols */
1827 	return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
1828 }
1829 
1830 subsys_initcall(proto_init);
1831 
1832 #endif /* PROC_FS */
1833 
1834 EXPORT_SYMBOL(sk_alloc);
1835 EXPORT_SYMBOL(sk_free);
1836 EXPORT_SYMBOL(sk_send_sigurg);
1837 EXPORT_SYMBOL(sock_alloc_send_skb);
1838 EXPORT_SYMBOL(sock_init_data);
1839 EXPORT_SYMBOL(sock_kfree_s);
1840 EXPORT_SYMBOL(sock_kmalloc);
1841 EXPORT_SYMBOL(sock_no_accept);
1842 EXPORT_SYMBOL(sock_no_bind);
1843 EXPORT_SYMBOL(sock_no_connect);
1844 EXPORT_SYMBOL(sock_no_getname);
1845 EXPORT_SYMBOL(sock_no_getsockopt);
1846 EXPORT_SYMBOL(sock_no_ioctl);
1847 EXPORT_SYMBOL(sock_no_listen);
1848 EXPORT_SYMBOL(sock_no_mmap);
1849 EXPORT_SYMBOL(sock_no_poll);
1850 EXPORT_SYMBOL(sock_no_recvmsg);
1851 EXPORT_SYMBOL(sock_no_sendmsg);
1852 EXPORT_SYMBOL(sock_no_sendpage);
1853 EXPORT_SYMBOL(sock_no_setsockopt);
1854 EXPORT_SYMBOL(sock_no_shutdown);
1855 EXPORT_SYMBOL(sock_no_socketpair);
1856 EXPORT_SYMBOL(sock_rfree);
1857 EXPORT_SYMBOL(sock_setsockopt);
1858 EXPORT_SYMBOL(sock_wfree);
1859 EXPORT_SYMBOL(sock_wmalloc);
1860 EXPORT_SYMBOL(sock_i_uid);
1861 EXPORT_SYMBOL(sock_i_ino);
1862 EXPORT_SYMBOL(sysctl_optmem_max);
1863 #ifdef CONFIG_SYSCTL
1864 EXPORT_SYMBOL(sysctl_rmem_max);
1865 EXPORT_SYMBOL(sysctl_wmem_max);
1866 #endif
1867