xref: /linux/net/core/sock.c (revision f3d9478b2ce468c3115b02ecae7e975990697f15)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Generic socket support routines. Memory allocators, socket lock/release
7  *		handler for protocols to use and generic option handler.
8  *
9  *
10  * Version:	$Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
11  *
12  * Authors:	Ross Biro
13  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *		Florian La Roche, <flla@stud.uni-sb.de>
15  *		Alan Cox, <A.Cox@swansea.ac.uk>
16  *
17  * Fixes:
18  *		Alan Cox	: 	Numerous verify_area() problems
19  *		Alan Cox	:	Connecting on a connecting socket
20  *					now returns an error for tcp.
21  *		Alan Cox	:	sock->protocol is set correctly.
22  *					and is not sometimes left as 0.
23  *		Alan Cox	:	connect handles icmp errors on a
24  *					connect properly. Unfortunately there
25  *					is a restart syscall nasty there. I
26  *					can't match BSD without hacking the C
27  *					library. Ideas urgently sought!
28  *		Alan Cox	:	Disallow bind() to addresses that are
29  *					not ours - especially broadcast ones!!
30  *		Alan Cox	:	Socket 1024 _IS_ ok for users. (fencepost)
31  *		Alan Cox	:	sock_wfree/sock_rfree don't destroy sockets,
32  *					instead they leave that for the DESTROY timer.
33  *		Alan Cox	:	Clean up error flag in accept
34  *		Alan Cox	:	TCP ack handling is buggy, the DESTROY timer
35  *					was buggy. Put a remove_sock() in the handler
36  *					for memory when we hit 0. Also altered the timer
37  *					code. The ACK stuff can wait and needs major
38  *					TCP layer surgery.
39  *		Alan Cox	:	Fixed TCP ack bug, removed remove sock
40  *					and fixed timer/inet_bh race.
41  *		Alan Cox	:	Added zapped flag for TCP
42  *		Alan Cox	:	Move kfree_skb into skbuff.c and tidied up surplus code
43  *		Alan Cox	:	for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44  *		Alan Cox	:	kfree_s calls now are kfree_skbmem so we can track skb resources
45  *		Alan Cox	:	Supports socket option broadcast now as does udp. Packet and raw need fixing.
46  *		Alan Cox	:	Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47  *		Rick Sladkey	:	Relaxed UDP rules for matching packets.
48  *		C.E.Hawkins	:	IFF_PROMISC/SIOCGHWADDR support
49  *	Pauline Middelink	:	identd support
50  *		Alan Cox	:	Fixed connect() taking signals I think.
51  *		Alan Cox	:	SO_LINGER supported
52  *		Alan Cox	:	Error reporting fixes
53  *		Anonymous	:	inet_create tidied up (sk->reuse setting)
54  *		Alan Cox	:	inet sockets don't set sk->type!
55  *		Alan Cox	:	Split socket option code
56  *		Alan Cox	:	Callbacks
57  *		Alan Cox	:	Nagle flag for Charles & Johannes stuff
58  *		Alex		:	Removed restriction on inet fioctl
59  *		Alan Cox	:	Splitting INET from NET core
60  *		Alan Cox	:	Fixed bogus SO_TYPE handling in getsockopt()
61  *		Adam Caldwell	:	Missing return in SO_DONTROUTE/SO_DEBUG code
62  *		Alan Cox	:	Split IP from generic code
63  *		Alan Cox	:	New kfree_skbmem()
64  *		Alan Cox	:	Make SO_DEBUG superuser only.
65  *		Alan Cox	:	Allow anyone to clear SO_DEBUG
66  *					(compatibility fix)
67  *		Alan Cox	:	Added optimistic memory grabbing for AF_UNIX throughput.
68  *		Alan Cox	:	Allocator for a socket is settable.
69  *		Alan Cox	:	SO_ERROR includes soft errors.
70  *		Alan Cox	:	Allow NULL arguments on some SO_ opts
71  *		Alan Cox	: 	Generic socket allocation to make hooks
72  *					easier (suggested by Craig Metz).
73  *		Michael Pall	:	SO_ERROR returns positive errno again
74  *              Steve Whitehouse:       Added default destructor to free
75  *                                      protocol private data.
76  *              Steve Whitehouse:       Added various other default routines
77  *                                      common to several socket families.
78  *              Chris Evans     :       Call suser() check last on F_SETOWN
79  *		Jay Schulist	:	Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
80  *		Andi Kleen	:	Add sock_kmalloc()/sock_kfree_s()
81  *		Andi Kleen	:	Fix write_space callback
82  *		Chris Evans	:	Security fixes - signedness again
83  *		Arnaldo C. Melo :       cleanups, use skb_queue_purge
84  *
85  * To Fix:
86  *
87  *
88  *		This program is free software; you can redistribute it and/or
89  *		modify it under the terms of the GNU General Public License
90  *		as published by the Free Software Foundation; either version
91  *		2 of the License, or (at your option) any later version.
92  */
93 
94 #include <linux/capability.h>
95 #include <linux/config.h>
96 #include <linux/errno.h>
97 #include <linux/types.h>
98 #include <linux/socket.h>
99 #include <linux/in.h>
100 #include <linux/kernel.h>
101 #include <linux/module.h>
102 #include <linux/proc_fs.h>
103 #include <linux/seq_file.h>
104 #include <linux/sched.h>
105 #include <linux/timer.h>
106 #include <linux/string.h>
107 #include <linux/sockios.h>
108 #include <linux/net.h>
109 #include <linux/mm.h>
110 #include <linux/slab.h>
111 #include <linux/interrupt.h>
112 #include <linux/poll.h>
113 #include <linux/tcp.h>
114 #include <linux/init.h>
115 
116 #include <asm/uaccess.h>
117 #include <asm/system.h>
118 
119 #include <linux/netdevice.h>
120 #include <net/protocol.h>
121 #include <linux/skbuff.h>
122 #include <net/request_sock.h>
123 #include <net/sock.h>
124 #include <net/xfrm.h>
125 #include <linux/ipsec.h>
126 
127 #include <linux/filter.h>
128 
129 #ifdef CONFIG_INET
130 #include <net/tcp.h>
131 #endif
132 
133 /* Take into consideration the size of the struct sk_buff overhead in the
134  * determination of these values, since that is non-constant across
135  * platforms.  This makes socket queueing behavior and performance
136  * not depend upon such differences.
137  */
138 #define _SK_MEM_PACKETS		256
139 #define _SK_MEM_OVERHEAD	(sizeof(struct sk_buff) + 256)
140 #define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
141 #define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
142 
143 /* Run time adjustable parameters. */
144 __u32 sysctl_wmem_max = SK_WMEM_MAX;
145 __u32 sysctl_rmem_max = SK_RMEM_MAX;
146 __u32 sysctl_wmem_default = SK_WMEM_MAX;
147 __u32 sysctl_rmem_default = SK_RMEM_MAX;
148 
149 /* Maximal space eaten by iovec or ancilliary data plus some space */
150 int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512);
151 
152 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
153 {
154 	struct timeval tv;
155 
156 	if (optlen < sizeof(tv))
157 		return -EINVAL;
158 	if (copy_from_user(&tv, optval, sizeof(tv)))
159 		return -EFAULT;
160 
161 	*timeo_p = MAX_SCHEDULE_TIMEOUT;
162 	if (tv.tv_sec == 0 && tv.tv_usec == 0)
163 		return 0;
164 	if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
165 		*timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
166 	return 0;
167 }
168 
169 static void sock_warn_obsolete_bsdism(const char *name)
170 {
171 	static int warned;
172 	static char warncomm[TASK_COMM_LEN];
173 	if (strcmp(warncomm, current->comm) && warned < 5) {
174 		strcpy(warncomm,  current->comm);
175 		printk(KERN_WARNING "process `%s' is using obsolete "
176 		       "%s SO_BSDCOMPAT\n", warncomm, name);
177 		warned++;
178 	}
179 }
180 
181 static void sock_disable_timestamp(struct sock *sk)
182 {
183 	if (sock_flag(sk, SOCK_TIMESTAMP)) {
184 		sock_reset_flag(sk, SOCK_TIMESTAMP);
185 		net_disable_timestamp();
186 	}
187 }
188 
189 
190 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
191 {
192 	int err = 0;
193 	int skb_len;
194 
195 	/* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
196 	   number of warnings when compiling with -W --ANK
197 	 */
198 	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
199 	    (unsigned)sk->sk_rcvbuf) {
200 		err = -ENOMEM;
201 		goto out;
202 	}
203 
204 	/* It would be deadlock, if sock_queue_rcv_skb is used
205 	   with socket lock! We assume that users of this
206 	   function are lock free.
207 	*/
208 	err = sk_filter(sk, skb, 1);
209 	if (err)
210 		goto out;
211 
212 	skb->dev = NULL;
213 	skb_set_owner_r(skb, sk);
214 
215 	/* Cache the SKB length before we tack it onto the receive
216 	 * queue.  Once it is added it no longer belongs to us and
217 	 * may be freed by other threads of control pulling packets
218 	 * from the queue.
219 	 */
220 	skb_len = skb->len;
221 
222 	skb_queue_tail(&sk->sk_receive_queue, skb);
223 
224 	if (!sock_flag(sk, SOCK_DEAD))
225 		sk->sk_data_ready(sk, skb_len);
226 out:
227 	return err;
228 }
229 EXPORT_SYMBOL(sock_queue_rcv_skb);
230 
231 int sk_receive_skb(struct sock *sk, struct sk_buff *skb)
232 {
233 	int rc = NET_RX_SUCCESS;
234 
235 	if (sk_filter(sk, skb, 0))
236 		goto discard_and_relse;
237 
238 	skb->dev = NULL;
239 
240 	bh_lock_sock(sk);
241 	if (!sock_owned_by_user(sk))
242 		rc = sk->sk_backlog_rcv(sk, skb);
243 	else
244 		sk_add_backlog(sk, skb);
245 	bh_unlock_sock(sk);
246 out:
247 	sock_put(sk);
248 	return rc;
249 discard_and_relse:
250 	kfree_skb(skb);
251 	goto out;
252 }
253 EXPORT_SYMBOL(sk_receive_skb);
254 
255 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
256 {
257 	struct dst_entry *dst = sk->sk_dst_cache;
258 
259 	if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
260 		sk->sk_dst_cache = NULL;
261 		dst_release(dst);
262 		return NULL;
263 	}
264 
265 	return dst;
266 }
267 EXPORT_SYMBOL(__sk_dst_check);
268 
269 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
270 {
271 	struct dst_entry *dst = sk_dst_get(sk);
272 
273 	if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
274 		sk_dst_reset(sk);
275 		dst_release(dst);
276 		return NULL;
277 	}
278 
279 	return dst;
280 }
281 EXPORT_SYMBOL(sk_dst_check);
282 
283 /*
284  *	This is meant for all protocols to use and covers goings on
285  *	at the socket level. Everything here is generic.
286  */
287 
288 int sock_setsockopt(struct socket *sock, int level, int optname,
289 		    char __user *optval, int optlen)
290 {
291 	struct sock *sk=sock->sk;
292 	struct sk_filter *filter;
293 	int val;
294 	int valbool;
295 	struct linger ling;
296 	int ret = 0;
297 
298 	/*
299 	 *	Options without arguments
300 	 */
301 
302 #ifdef SO_DONTLINGER		/* Compatibility item... */
303 	if (optname == SO_DONTLINGER) {
304 		lock_sock(sk);
305 		sock_reset_flag(sk, SOCK_LINGER);
306 		release_sock(sk);
307 		return 0;
308 	}
309 #endif
310 
311   	if(optlen<sizeof(int))
312   		return(-EINVAL);
313 
314 	if (get_user(val, (int __user *)optval))
315 		return -EFAULT;
316 
317   	valbool = val?1:0;
318 
319 	lock_sock(sk);
320 
321   	switch(optname)
322   	{
323 		case SO_DEBUG:
324 			if(val && !capable(CAP_NET_ADMIN))
325 			{
326 				ret = -EACCES;
327 			}
328 			else if (valbool)
329 				sock_set_flag(sk, SOCK_DBG);
330 			else
331 				sock_reset_flag(sk, SOCK_DBG);
332 			break;
333 		case SO_REUSEADDR:
334 			sk->sk_reuse = valbool;
335 			break;
336 		case SO_TYPE:
337 		case SO_ERROR:
338 			ret = -ENOPROTOOPT;
339 		  	break;
340 		case SO_DONTROUTE:
341 			if (valbool)
342 				sock_set_flag(sk, SOCK_LOCALROUTE);
343 			else
344 				sock_reset_flag(sk, SOCK_LOCALROUTE);
345 			break;
346 		case SO_BROADCAST:
347 			sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
348 			break;
349 		case SO_SNDBUF:
350 			/* Don't error on this BSD doesn't and if you think
351 			   about it this is right. Otherwise apps have to
352 			   play 'guess the biggest size' games. RCVBUF/SNDBUF
353 			   are treated in BSD as hints */
354 
355 			if (val > sysctl_wmem_max)
356 				val = sysctl_wmem_max;
357 set_sndbuf:
358 			sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
359 			if ((val * 2) < SOCK_MIN_SNDBUF)
360 				sk->sk_sndbuf = SOCK_MIN_SNDBUF;
361 			else
362 				sk->sk_sndbuf = val * 2;
363 
364 			/*
365 			 *	Wake up sending tasks if we
366 			 *	upped the value.
367 			 */
368 			sk->sk_write_space(sk);
369 			break;
370 
371 		case SO_SNDBUFFORCE:
372 			if (!capable(CAP_NET_ADMIN)) {
373 				ret = -EPERM;
374 				break;
375 			}
376 			goto set_sndbuf;
377 
378 		case SO_RCVBUF:
379 			/* Don't error on this BSD doesn't and if you think
380 			   about it this is right. Otherwise apps have to
381 			   play 'guess the biggest size' games. RCVBUF/SNDBUF
382 			   are treated in BSD as hints */
383 
384 			if (val > sysctl_rmem_max)
385 				val = sysctl_rmem_max;
386 set_rcvbuf:
387 			sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
388 			/*
389 			 * We double it on the way in to account for
390 			 * "struct sk_buff" etc. overhead.   Applications
391 			 * assume that the SO_RCVBUF setting they make will
392 			 * allow that much actual data to be received on that
393 			 * socket.
394 			 *
395 			 * Applications are unaware that "struct sk_buff" and
396 			 * other overheads allocate from the receive buffer
397 			 * during socket buffer allocation.
398 			 *
399 			 * And after considering the possible alternatives,
400 			 * returning the value we actually used in getsockopt
401 			 * is the most desirable behavior.
402 			 */
403 			if ((val * 2) < SOCK_MIN_RCVBUF)
404 				sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
405 			else
406 				sk->sk_rcvbuf = val * 2;
407 			break;
408 
409 		case SO_RCVBUFFORCE:
410 			if (!capable(CAP_NET_ADMIN)) {
411 				ret = -EPERM;
412 				break;
413 			}
414 			goto set_rcvbuf;
415 
416 		case SO_KEEPALIVE:
417 #ifdef CONFIG_INET
418 			if (sk->sk_protocol == IPPROTO_TCP)
419 				tcp_set_keepalive(sk, valbool);
420 #endif
421 			sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
422 			break;
423 
424 	 	case SO_OOBINLINE:
425 			sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
426 			break;
427 
428 	 	case SO_NO_CHECK:
429 			sk->sk_no_check = valbool;
430 			break;
431 
432 		case SO_PRIORITY:
433 			if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
434 				sk->sk_priority = val;
435 			else
436 				ret = -EPERM;
437 			break;
438 
439 		case SO_LINGER:
440 			if(optlen<sizeof(ling)) {
441 				ret = -EINVAL;	/* 1003.1g */
442 				break;
443 			}
444 			if (copy_from_user(&ling,optval,sizeof(ling))) {
445 				ret = -EFAULT;
446 				break;
447 			}
448 			if (!ling.l_onoff)
449 				sock_reset_flag(sk, SOCK_LINGER);
450 			else {
451 #if (BITS_PER_LONG == 32)
452 				if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
453 					sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
454 				else
455 #endif
456 					sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
457 				sock_set_flag(sk, SOCK_LINGER);
458 			}
459 			break;
460 
461 		case SO_BSDCOMPAT:
462 			sock_warn_obsolete_bsdism("setsockopt");
463 			break;
464 
465 		case SO_PASSCRED:
466 			if (valbool)
467 				set_bit(SOCK_PASSCRED, &sock->flags);
468 			else
469 				clear_bit(SOCK_PASSCRED, &sock->flags);
470 			break;
471 
472 		case SO_TIMESTAMP:
473 			if (valbool)  {
474 				sock_set_flag(sk, SOCK_RCVTSTAMP);
475 				sock_enable_timestamp(sk);
476 			} else
477 				sock_reset_flag(sk, SOCK_RCVTSTAMP);
478 			break;
479 
480 		case SO_RCVLOWAT:
481 			if (val < 0)
482 				val = INT_MAX;
483 			sk->sk_rcvlowat = val ? : 1;
484 			break;
485 
486 		case SO_RCVTIMEO:
487 			ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
488 			break;
489 
490 		case SO_SNDTIMEO:
491 			ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
492 			break;
493 
494 #ifdef CONFIG_NETDEVICES
495 		case SO_BINDTODEVICE:
496 		{
497 			char devname[IFNAMSIZ];
498 
499 			/* Sorry... */
500 			if (!capable(CAP_NET_RAW)) {
501 				ret = -EPERM;
502 				break;
503 			}
504 
505 			/* Bind this socket to a particular device like "eth0",
506 			 * as specified in the passed interface name. If the
507 			 * name is "" or the option length is zero the socket
508 			 * is not bound.
509 			 */
510 
511 			if (!valbool) {
512 				sk->sk_bound_dev_if = 0;
513 			} else {
514 				if (optlen > IFNAMSIZ - 1)
515 					optlen = IFNAMSIZ - 1;
516 				memset(devname, 0, sizeof(devname));
517 				if (copy_from_user(devname, optval, optlen)) {
518 					ret = -EFAULT;
519 					break;
520 				}
521 
522 				/* Remove any cached route for this socket. */
523 				sk_dst_reset(sk);
524 
525 				if (devname[0] == '\0') {
526 					sk->sk_bound_dev_if = 0;
527 				} else {
528 					struct net_device *dev = dev_get_by_name(devname);
529 					if (!dev) {
530 						ret = -ENODEV;
531 						break;
532 					}
533 					sk->sk_bound_dev_if = dev->ifindex;
534 					dev_put(dev);
535 				}
536 			}
537 			break;
538 		}
539 #endif
540 
541 
542 		case SO_ATTACH_FILTER:
543 			ret = -EINVAL;
544 			if (optlen == sizeof(struct sock_fprog)) {
545 				struct sock_fprog fprog;
546 
547 				ret = -EFAULT;
548 				if (copy_from_user(&fprog, optval, sizeof(fprog)))
549 					break;
550 
551 				ret = sk_attach_filter(&fprog, sk);
552 			}
553 			break;
554 
555 		case SO_DETACH_FILTER:
556 			spin_lock_bh(&sk->sk_lock.slock);
557 			filter = sk->sk_filter;
558                         if (filter) {
559 				sk->sk_filter = NULL;
560 				spin_unlock_bh(&sk->sk_lock.slock);
561 				sk_filter_release(sk, filter);
562 				break;
563 			}
564 			spin_unlock_bh(&sk->sk_lock.slock);
565 			ret = -ENONET;
566 			break;
567 
568 		/* We implement the SO_SNDLOWAT etc to
569 		   not be settable (1003.1g 5.3) */
570 		default:
571 		  	ret = -ENOPROTOOPT;
572 			break;
573   	}
574 	release_sock(sk);
575 	return ret;
576 }
577 
578 
579 int sock_getsockopt(struct socket *sock, int level, int optname,
580 		    char __user *optval, int __user *optlen)
581 {
582 	struct sock *sk = sock->sk;
583 
584 	union
585 	{
586   		int val;
587   		struct linger ling;
588 		struct timeval tm;
589 	} v;
590 
591 	unsigned int lv = sizeof(int);
592 	int len;
593 
594   	if(get_user(len,optlen))
595   		return -EFAULT;
596 	if(len < 0)
597 		return -EINVAL;
598 
599   	switch(optname)
600   	{
601 		case SO_DEBUG:
602 			v.val = sock_flag(sk, SOCK_DBG);
603 			break;
604 
605 		case SO_DONTROUTE:
606 			v.val = sock_flag(sk, SOCK_LOCALROUTE);
607 			break;
608 
609 		case SO_BROADCAST:
610 			v.val = !!sock_flag(sk, SOCK_BROADCAST);
611 			break;
612 
613 		case SO_SNDBUF:
614 			v.val = sk->sk_sndbuf;
615 			break;
616 
617 		case SO_RCVBUF:
618 			v.val = sk->sk_rcvbuf;
619 			break;
620 
621 		case SO_REUSEADDR:
622 			v.val = sk->sk_reuse;
623 			break;
624 
625 		case SO_KEEPALIVE:
626 			v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
627 			break;
628 
629 		case SO_TYPE:
630 			v.val = sk->sk_type;
631 			break;
632 
633 		case SO_ERROR:
634 			v.val = -sock_error(sk);
635 			if(v.val==0)
636 				v.val = xchg(&sk->sk_err_soft, 0);
637 			break;
638 
639 		case SO_OOBINLINE:
640 			v.val = !!sock_flag(sk, SOCK_URGINLINE);
641 			break;
642 
643 		case SO_NO_CHECK:
644 			v.val = sk->sk_no_check;
645 			break;
646 
647 		case SO_PRIORITY:
648 			v.val = sk->sk_priority;
649 			break;
650 
651 		case SO_LINGER:
652 			lv		= sizeof(v.ling);
653 			v.ling.l_onoff	= !!sock_flag(sk, SOCK_LINGER);
654  			v.ling.l_linger	= sk->sk_lingertime / HZ;
655 			break;
656 
657 		case SO_BSDCOMPAT:
658 			sock_warn_obsolete_bsdism("getsockopt");
659 			break;
660 
661 		case SO_TIMESTAMP:
662 			v.val = sock_flag(sk, SOCK_RCVTSTAMP);
663 			break;
664 
665 		case SO_RCVTIMEO:
666 			lv=sizeof(struct timeval);
667 			if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
668 				v.tm.tv_sec = 0;
669 				v.tm.tv_usec = 0;
670 			} else {
671 				v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
672 				v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
673 			}
674 			break;
675 
676 		case SO_SNDTIMEO:
677 			lv=sizeof(struct timeval);
678 			if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
679 				v.tm.tv_sec = 0;
680 				v.tm.tv_usec = 0;
681 			} else {
682 				v.tm.tv_sec = sk->sk_sndtimeo / HZ;
683 				v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
684 			}
685 			break;
686 
687 		case SO_RCVLOWAT:
688 			v.val = sk->sk_rcvlowat;
689 			break;
690 
691 		case SO_SNDLOWAT:
692 			v.val=1;
693 			break;
694 
695 		case SO_PASSCRED:
696 			v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
697 			break;
698 
699 		case SO_PEERCRED:
700 			if (len > sizeof(sk->sk_peercred))
701 				len = sizeof(sk->sk_peercred);
702 			if (copy_to_user(optval, &sk->sk_peercred, len))
703 				return -EFAULT;
704 			goto lenout;
705 
706 		case SO_PEERNAME:
707 		{
708 			char address[128];
709 
710 			if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
711 				return -ENOTCONN;
712 			if (lv < len)
713 				return -EINVAL;
714 			if (copy_to_user(optval, address, len))
715 				return -EFAULT;
716 			goto lenout;
717 		}
718 
719 		/* Dubious BSD thing... Probably nobody even uses it, but
720 		 * the UNIX standard wants it for whatever reason... -DaveM
721 		 */
722 		case SO_ACCEPTCONN:
723 			v.val = sk->sk_state == TCP_LISTEN;
724 			break;
725 
726 		case SO_PEERSEC:
727 			return security_socket_getpeersec_stream(sock, optval, optlen, len);
728 
729 		default:
730 			return(-ENOPROTOOPT);
731 	}
732 	if (len > lv)
733 		len = lv;
734 	if (copy_to_user(optval, &v, len))
735 		return -EFAULT;
736 lenout:
737   	if (put_user(len, optlen))
738   		return -EFAULT;
739   	return 0;
740 }
741 
742 /**
743  *	sk_alloc - All socket objects are allocated here
744  *	@family: protocol family
745  *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
746  *	@prot: struct proto associated with this new sock instance
747  *	@zero_it: if we should zero the newly allocated sock
748  */
749 struct sock *sk_alloc(int family, gfp_t priority,
750 		      struct proto *prot, int zero_it)
751 {
752 	struct sock *sk = NULL;
753 	kmem_cache_t *slab = prot->slab;
754 
755 	if (slab != NULL)
756 		sk = kmem_cache_alloc(slab, priority);
757 	else
758 		sk = kmalloc(prot->obj_size, priority);
759 
760 	if (sk) {
761 		if (zero_it) {
762 			memset(sk, 0, prot->obj_size);
763 			sk->sk_family = family;
764 			/*
765 			 * See comment in struct sock definition to understand
766 			 * why we need sk_prot_creator -acme
767 			 */
768 			sk->sk_prot = sk->sk_prot_creator = prot;
769 			sock_lock_init(sk);
770 		}
771 
772 		if (security_sk_alloc(sk, family, priority))
773 			goto out_free;
774 
775 		if (!try_module_get(prot->owner))
776 			goto out_free;
777 	}
778 	return sk;
779 
780 out_free:
781 	if (slab != NULL)
782 		kmem_cache_free(slab, sk);
783 	else
784 		kfree(sk);
785 	return NULL;
786 }
787 
788 void sk_free(struct sock *sk)
789 {
790 	struct sk_filter *filter;
791 	struct module *owner = sk->sk_prot_creator->owner;
792 
793 	if (sk->sk_destruct)
794 		sk->sk_destruct(sk);
795 
796 	filter = sk->sk_filter;
797 	if (filter) {
798 		sk_filter_release(sk, filter);
799 		sk->sk_filter = NULL;
800 	}
801 
802 	sock_disable_timestamp(sk);
803 
804 	if (atomic_read(&sk->sk_omem_alloc))
805 		printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
806 		       __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
807 
808 	security_sk_free(sk);
809 	if (sk->sk_prot_creator->slab != NULL)
810 		kmem_cache_free(sk->sk_prot_creator->slab, sk);
811 	else
812 		kfree(sk);
813 	module_put(owner);
814 }
815 
816 struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
817 {
818 	struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);
819 
820 	if (newsk != NULL) {
821 		struct sk_filter *filter;
822 
823 		memcpy(newsk, sk, sk->sk_prot->obj_size);
824 
825 		/* SANITY */
826 		sk_node_init(&newsk->sk_node);
827 		sock_lock_init(newsk);
828 		bh_lock_sock(newsk);
829 
830 		atomic_set(&newsk->sk_rmem_alloc, 0);
831 		atomic_set(&newsk->sk_wmem_alloc, 0);
832 		atomic_set(&newsk->sk_omem_alloc, 0);
833 		skb_queue_head_init(&newsk->sk_receive_queue);
834 		skb_queue_head_init(&newsk->sk_write_queue);
835 #ifdef CONFIG_NET_DMA
836 		skb_queue_head_init(&newsk->sk_async_wait_queue);
837 #endif
838 
839 		rwlock_init(&newsk->sk_dst_lock);
840 		rwlock_init(&newsk->sk_callback_lock);
841 
842 		newsk->sk_dst_cache	= NULL;
843 		newsk->sk_wmem_queued	= 0;
844 		newsk->sk_forward_alloc = 0;
845 		newsk->sk_send_head	= NULL;
846 		newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
847 		newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
848 
849 		sock_reset_flag(newsk, SOCK_DONE);
850 		skb_queue_head_init(&newsk->sk_error_queue);
851 
852 		filter = newsk->sk_filter;
853 		if (filter != NULL)
854 			sk_filter_charge(newsk, filter);
855 
856 		if (unlikely(xfrm_sk_clone_policy(newsk))) {
857 			/* It is still raw copy of parent, so invalidate
858 			 * destructor and make plain sk_free() */
859 			newsk->sk_destruct = NULL;
860 			sk_free(newsk);
861 			newsk = NULL;
862 			goto out;
863 		}
864 
865 		newsk->sk_err	   = 0;
866 		newsk->sk_priority = 0;
867 		atomic_set(&newsk->sk_refcnt, 2);
868 
869 		/*
870 		 * Increment the counter in the same struct proto as the master
871 		 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
872 		 * is the same as sk->sk_prot->socks, as this field was copied
873 		 * with memcpy).
874 		 *
875 		 * This _changes_ the previous behaviour, where
876 		 * tcp_create_openreq_child always was incrementing the
877 		 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
878 		 * to be taken into account in all callers. -acme
879 		 */
880 		sk_refcnt_debug_inc(newsk);
881 		newsk->sk_socket = NULL;
882 		newsk->sk_sleep	 = NULL;
883 
884 		if (newsk->sk_prot->sockets_allocated)
885 			atomic_inc(newsk->sk_prot->sockets_allocated);
886 	}
887 out:
888 	return newsk;
889 }
890 
891 EXPORT_SYMBOL_GPL(sk_clone);
892 
893 void __init sk_init(void)
894 {
895 	if (num_physpages <= 4096) {
896 		sysctl_wmem_max = 32767;
897 		sysctl_rmem_max = 32767;
898 		sysctl_wmem_default = 32767;
899 		sysctl_rmem_default = 32767;
900 	} else if (num_physpages >= 131072) {
901 		sysctl_wmem_max = 131071;
902 		sysctl_rmem_max = 131071;
903 	}
904 }
905 
906 /*
907  *	Simple resource managers for sockets.
908  */
909 
910 
911 /*
912  * Write buffer destructor automatically called from kfree_skb.
913  */
914 void sock_wfree(struct sk_buff *skb)
915 {
916 	struct sock *sk = skb->sk;
917 
918 	/* In case it might be waiting for more memory. */
919 	atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
920 	if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
921 		sk->sk_write_space(sk);
922 	sock_put(sk);
923 }
924 
925 /*
926  * Read buffer destructor automatically called from kfree_skb.
927  */
928 void sock_rfree(struct sk_buff *skb)
929 {
930 	struct sock *sk = skb->sk;
931 
932 	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
933 }
934 
935 
936 int sock_i_uid(struct sock *sk)
937 {
938 	int uid;
939 
940 	read_lock(&sk->sk_callback_lock);
941 	uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
942 	read_unlock(&sk->sk_callback_lock);
943 	return uid;
944 }
945 
946 unsigned long sock_i_ino(struct sock *sk)
947 {
948 	unsigned long ino;
949 
950 	read_lock(&sk->sk_callback_lock);
951 	ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
952 	read_unlock(&sk->sk_callback_lock);
953 	return ino;
954 }
955 
956 /*
957  * Allocate a skb from the socket's send buffer.
958  */
959 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
960 			     gfp_t priority)
961 {
962 	if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
963 		struct sk_buff * skb = alloc_skb(size, priority);
964 		if (skb) {
965 			skb_set_owner_w(skb, sk);
966 			return skb;
967 		}
968 	}
969 	return NULL;
970 }
971 
972 /*
973  * Allocate a skb from the socket's receive buffer.
974  */
975 struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
976 			     gfp_t priority)
977 {
978 	if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
979 		struct sk_buff *skb = alloc_skb(size, priority);
980 		if (skb) {
981 			skb_set_owner_r(skb, sk);
982 			return skb;
983 		}
984 	}
985 	return NULL;
986 }
987 
988 /*
989  * Allocate a memory block from the socket's option memory buffer.
990  */
991 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
992 {
993 	if ((unsigned)size <= sysctl_optmem_max &&
994 	    atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
995 		void *mem;
996 		/* First do the add, to avoid the race if kmalloc
997  		 * might sleep.
998 		 */
999 		atomic_add(size, &sk->sk_omem_alloc);
1000 		mem = kmalloc(size, priority);
1001 		if (mem)
1002 			return mem;
1003 		atomic_sub(size, &sk->sk_omem_alloc);
1004 	}
1005 	return NULL;
1006 }
1007 
1008 /*
1009  * Free an option memory block.
1010  */
1011 void sock_kfree_s(struct sock *sk, void *mem, int size)
1012 {
1013 	kfree(mem);
1014 	atomic_sub(size, &sk->sk_omem_alloc);
1015 }
1016 
1017 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1018    I think, these locks should be removed for datagram sockets.
1019  */
1020 static long sock_wait_for_wmem(struct sock * sk, long timeo)
1021 {
1022 	DEFINE_WAIT(wait);
1023 
1024 	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1025 	for (;;) {
1026 		if (!timeo)
1027 			break;
1028 		if (signal_pending(current))
1029 			break;
1030 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1031 		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1032 		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1033 			break;
1034 		if (sk->sk_shutdown & SEND_SHUTDOWN)
1035 			break;
1036 		if (sk->sk_err)
1037 			break;
1038 		timeo = schedule_timeout(timeo);
1039 	}
1040 	finish_wait(sk->sk_sleep, &wait);
1041 	return timeo;
1042 }
1043 
1044 
1045 /*
1046  *	Generic send/receive buffer handlers
1047  */
1048 
1049 static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
1050 					    unsigned long header_len,
1051 					    unsigned long data_len,
1052 					    int noblock, int *errcode)
1053 {
1054 	struct sk_buff *skb;
1055 	gfp_t gfp_mask;
1056 	long timeo;
1057 	int err;
1058 
1059 	gfp_mask = sk->sk_allocation;
1060 	if (gfp_mask & __GFP_WAIT)
1061 		gfp_mask |= __GFP_REPEAT;
1062 
1063 	timeo = sock_sndtimeo(sk, noblock);
1064 	while (1) {
1065 		err = sock_error(sk);
1066 		if (err != 0)
1067 			goto failure;
1068 
1069 		err = -EPIPE;
1070 		if (sk->sk_shutdown & SEND_SHUTDOWN)
1071 			goto failure;
1072 
1073 		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1074 			skb = alloc_skb(header_len, sk->sk_allocation);
1075 			if (skb) {
1076 				int npages;
1077 				int i;
1078 
1079 				/* No pages, we're done... */
1080 				if (!data_len)
1081 					break;
1082 
1083 				npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1084 				skb->truesize += data_len;
1085 				skb_shinfo(skb)->nr_frags = npages;
1086 				for (i = 0; i < npages; i++) {
1087 					struct page *page;
1088 					skb_frag_t *frag;
1089 
1090 					page = alloc_pages(sk->sk_allocation, 0);
1091 					if (!page) {
1092 						err = -ENOBUFS;
1093 						skb_shinfo(skb)->nr_frags = i;
1094 						kfree_skb(skb);
1095 						goto failure;
1096 					}
1097 
1098 					frag = &skb_shinfo(skb)->frags[i];
1099 					frag->page = page;
1100 					frag->page_offset = 0;
1101 					frag->size = (data_len >= PAGE_SIZE ?
1102 						      PAGE_SIZE :
1103 						      data_len);
1104 					data_len -= PAGE_SIZE;
1105 				}
1106 
1107 				/* Full success... */
1108 				break;
1109 			}
1110 			err = -ENOBUFS;
1111 			goto failure;
1112 		}
1113 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1114 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1115 		err = -EAGAIN;
1116 		if (!timeo)
1117 			goto failure;
1118 		if (signal_pending(current))
1119 			goto interrupted;
1120 		timeo = sock_wait_for_wmem(sk, timeo);
1121 	}
1122 
1123 	skb_set_owner_w(skb, sk);
1124 	return skb;
1125 
1126 interrupted:
1127 	err = sock_intr_errno(timeo);
1128 failure:
1129 	*errcode = err;
1130 	return NULL;
1131 }
1132 
1133 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1134 				    int noblock, int *errcode)
1135 {
1136 	return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1137 }
1138 
1139 static void __lock_sock(struct sock *sk)
1140 {
1141 	DEFINE_WAIT(wait);
1142 
1143 	for(;;) {
1144 		prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1145 					TASK_UNINTERRUPTIBLE);
1146 		spin_unlock_bh(&sk->sk_lock.slock);
1147 		schedule();
1148 		spin_lock_bh(&sk->sk_lock.slock);
1149 		if(!sock_owned_by_user(sk))
1150 			break;
1151 	}
1152 	finish_wait(&sk->sk_lock.wq, &wait);
1153 }
1154 
1155 static void __release_sock(struct sock *sk)
1156 {
1157 	struct sk_buff *skb = sk->sk_backlog.head;
1158 
1159 	do {
1160 		sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1161 		bh_unlock_sock(sk);
1162 
1163 		do {
1164 			struct sk_buff *next = skb->next;
1165 
1166 			skb->next = NULL;
1167 			sk->sk_backlog_rcv(sk, skb);
1168 
1169 			/*
1170 			 * We are in process context here with softirqs
1171 			 * disabled, use cond_resched_softirq() to preempt.
1172 			 * This is safe to do because we've taken the backlog
1173 			 * queue private:
1174 			 */
1175 			cond_resched_softirq();
1176 
1177 			skb = next;
1178 		} while (skb != NULL);
1179 
1180 		bh_lock_sock(sk);
1181 	} while((skb = sk->sk_backlog.head) != NULL);
1182 }
1183 
1184 /**
1185  * sk_wait_data - wait for data to arrive at sk_receive_queue
1186  * @sk:    sock to wait on
1187  * @timeo: for how long
1188  *
1189  * Now socket state including sk->sk_err is changed only under lock,
1190  * hence we may omit checks after joining wait queue.
1191  * We check receive queue before schedule() only as optimization;
1192  * it is very likely that release_sock() added new data.
1193  */
1194 int sk_wait_data(struct sock *sk, long *timeo)
1195 {
1196 	int rc;
1197 	DEFINE_WAIT(wait);
1198 
1199 	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1200 	set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1201 	rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1202 	clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1203 	finish_wait(sk->sk_sleep, &wait);
1204 	return rc;
1205 }
1206 
1207 EXPORT_SYMBOL(sk_wait_data);
1208 
1209 /*
1210  * Set of default routines for initialising struct proto_ops when
1211  * the protocol does not support a particular function. In certain
1212  * cases where it makes no sense for a protocol to have a "do nothing"
1213  * function, some default processing is provided.
1214  */
1215 
1216 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1217 {
1218 	return -EOPNOTSUPP;
1219 }
1220 
1221 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1222 		    int len, int flags)
1223 {
1224 	return -EOPNOTSUPP;
1225 }
1226 
1227 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1228 {
1229 	return -EOPNOTSUPP;
1230 }
1231 
1232 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1233 {
1234 	return -EOPNOTSUPP;
1235 }
1236 
1237 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1238 		    int *len, int peer)
1239 {
1240 	return -EOPNOTSUPP;
1241 }
1242 
1243 unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
1244 {
1245 	return 0;
1246 }
1247 
1248 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1249 {
1250 	return -EOPNOTSUPP;
1251 }
1252 
1253 int sock_no_listen(struct socket *sock, int backlog)
1254 {
1255 	return -EOPNOTSUPP;
1256 }
1257 
1258 int sock_no_shutdown(struct socket *sock, int how)
1259 {
1260 	return -EOPNOTSUPP;
1261 }
1262 
1263 int sock_no_setsockopt(struct socket *sock, int level, int optname,
1264 		    char __user *optval, int optlen)
1265 {
1266 	return -EOPNOTSUPP;
1267 }
1268 
1269 int sock_no_getsockopt(struct socket *sock, int level, int optname,
1270 		    char __user *optval, int __user *optlen)
1271 {
1272 	return -EOPNOTSUPP;
1273 }
1274 
1275 int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1276 		    size_t len)
1277 {
1278 	return -EOPNOTSUPP;
1279 }
1280 
1281 int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1282 		    size_t len, int flags)
1283 {
1284 	return -EOPNOTSUPP;
1285 }
1286 
1287 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1288 {
1289 	/* Mirror missing mmap method error code */
1290 	return -ENODEV;
1291 }
1292 
1293 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1294 {
1295 	ssize_t res;
1296 	struct msghdr msg = {.msg_flags = flags};
1297 	struct kvec iov;
1298 	char *kaddr = kmap(page);
1299 	iov.iov_base = kaddr + offset;
1300 	iov.iov_len = size;
1301 	res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1302 	kunmap(page);
1303 	return res;
1304 }
1305 
1306 /*
1307  *	Default Socket Callbacks
1308  */
1309 
1310 static void sock_def_wakeup(struct sock *sk)
1311 {
1312 	read_lock(&sk->sk_callback_lock);
1313 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1314 		wake_up_interruptible_all(sk->sk_sleep);
1315 	read_unlock(&sk->sk_callback_lock);
1316 }
1317 
1318 static void sock_def_error_report(struct sock *sk)
1319 {
1320 	read_lock(&sk->sk_callback_lock);
1321 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1322 		wake_up_interruptible(sk->sk_sleep);
1323 	sk_wake_async(sk,0,POLL_ERR);
1324 	read_unlock(&sk->sk_callback_lock);
1325 }
1326 
1327 static void sock_def_readable(struct sock *sk, int len)
1328 {
1329 	read_lock(&sk->sk_callback_lock);
1330 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1331 		wake_up_interruptible(sk->sk_sleep);
1332 	sk_wake_async(sk,1,POLL_IN);
1333 	read_unlock(&sk->sk_callback_lock);
1334 }
1335 
1336 static void sock_def_write_space(struct sock *sk)
1337 {
1338 	read_lock(&sk->sk_callback_lock);
1339 
1340 	/* Do not wake up a writer until he can make "significant"
1341 	 * progress.  --DaveM
1342 	 */
1343 	if((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1344 		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1345 			wake_up_interruptible(sk->sk_sleep);
1346 
1347 		/* Should agree with poll, otherwise some programs break */
1348 		if (sock_writeable(sk))
1349 			sk_wake_async(sk, 2, POLL_OUT);
1350 	}
1351 
1352 	read_unlock(&sk->sk_callback_lock);
1353 }
1354 
1355 static void sock_def_destruct(struct sock *sk)
1356 {
1357 	kfree(sk->sk_protinfo);
1358 }
1359 
1360 void sk_send_sigurg(struct sock *sk)
1361 {
1362 	if (sk->sk_socket && sk->sk_socket->file)
1363 		if (send_sigurg(&sk->sk_socket->file->f_owner))
1364 			sk_wake_async(sk, 3, POLL_PRI);
1365 }
1366 
1367 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1368 		    unsigned long expires)
1369 {
1370 	if (!mod_timer(timer, expires))
1371 		sock_hold(sk);
1372 }
1373 
1374 EXPORT_SYMBOL(sk_reset_timer);
1375 
1376 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1377 {
1378 	if (timer_pending(timer) && del_timer(timer))
1379 		__sock_put(sk);
1380 }
1381 
1382 EXPORT_SYMBOL(sk_stop_timer);
1383 
1384 void sock_init_data(struct socket *sock, struct sock *sk)
1385 {
1386 	skb_queue_head_init(&sk->sk_receive_queue);
1387 	skb_queue_head_init(&sk->sk_write_queue);
1388 	skb_queue_head_init(&sk->sk_error_queue);
1389 #ifdef CONFIG_NET_DMA
1390 	skb_queue_head_init(&sk->sk_async_wait_queue);
1391 #endif
1392 
1393 	sk->sk_send_head	=	NULL;
1394 
1395 	init_timer(&sk->sk_timer);
1396 
1397 	sk->sk_allocation	=	GFP_KERNEL;
1398 	sk->sk_rcvbuf		=	sysctl_rmem_default;
1399 	sk->sk_sndbuf		=	sysctl_wmem_default;
1400 	sk->sk_state		=	TCP_CLOSE;
1401 	sk->sk_socket		=	sock;
1402 
1403 	sock_set_flag(sk, SOCK_ZAPPED);
1404 
1405 	if(sock)
1406 	{
1407 		sk->sk_type	=	sock->type;
1408 		sk->sk_sleep	=	&sock->wait;
1409 		sock->sk	=	sk;
1410 	} else
1411 		sk->sk_sleep	=	NULL;
1412 
1413 	rwlock_init(&sk->sk_dst_lock);
1414 	rwlock_init(&sk->sk_callback_lock);
1415 
1416 	sk->sk_state_change	=	sock_def_wakeup;
1417 	sk->sk_data_ready	=	sock_def_readable;
1418 	sk->sk_write_space	=	sock_def_write_space;
1419 	sk->sk_error_report	=	sock_def_error_report;
1420 	sk->sk_destruct		=	sock_def_destruct;
1421 
1422 	sk->sk_sndmsg_page	=	NULL;
1423 	sk->sk_sndmsg_off	=	0;
1424 
1425 	sk->sk_peercred.pid 	=	0;
1426 	sk->sk_peercred.uid	=	-1;
1427 	sk->sk_peercred.gid	=	-1;
1428 	sk->sk_write_pending	=	0;
1429 	sk->sk_rcvlowat		=	1;
1430 	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
1431 	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
1432 
1433 	sk->sk_stamp.tv_sec     = -1L;
1434 	sk->sk_stamp.tv_usec    = -1L;
1435 
1436 	atomic_set(&sk->sk_refcnt, 1);
1437 }
1438 
1439 void fastcall lock_sock(struct sock *sk)
1440 {
1441 	might_sleep();
1442 	spin_lock_bh(&(sk->sk_lock.slock));
1443 	if (sk->sk_lock.owner)
1444 		__lock_sock(sk);
1445 	sk->sk_lock.owner = (void *)1;
1446 	spin_unlock_bh(&(sk->sk_lock.slock));
1447 }
1448 
1449 EXPORT_SYMBOL(lock_sock);
1450 
1451 void fastcall release_sock(struct sock *sk)
1452 {
1453 	spin_lock_bh(&(sk->sk_lock.slock));
1454 	if (sk->sk_backlog.tail)
1455 		__release_sock(sk);
1456 	sk->sk_lock.owner = NULL;
1457         if (waitqueue_active(&(sk->sk_lock.wq)))
1458 		wake_up(&(sk->sk_lock.wq));
1459 	spin_unlock_bh(&(sk->sk_lock.slock));
1460 }
1461 EXPORT_SYMBOL(release_sock);
1462 
1463 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1464 {
1465 	if (!sock_flag(sk, SOCK_TIMESTAMP))
1466 		sock_enable_timestamp(sk);
1467 	if (sk->sk_stamp.tv_sec == -1)
1468 		return -ENOENT;
1469 	if (sk->sk_stamp.tv_sec == 0)
1470 		do_gettimeofday(&sk->sk_stamp);
1471 	return copy_to_user(userstamp, &sk->sk_stamp, sizeof(struct timeval)) ?
1472 		-EFAULT : 0;
1473 }
1474 EXPORT_SYMBOL(sock_get_timestamp);
1475 
1476 void sock_enable_timestamp(struct sock *sk)
1477 {
1478 	if (!sock_flag(sk, SOCK_TIMESTAMP)) {
1479 		sock_set_flag(sk, SOCK_TIMESTAMP);
1480 		net_enable_timestamp();
1481 	}
1482 }
1483 EXPORT_SYMBOL(sock_enable_timestamp);
1484 
1485 /*
1486  *	Get a socket option on an socket.
1487  *
1488  *	FIX: POSIX 1003.1g is very ambiguous here. It states that
1489  *	asynchronous errors should be reported by getsockopt. We assume
1490  *	this means if you specify SO_ERROR (otherwise whats the point of it).
1491  */
1492 int sock_common_getsockopt(struct socket *sock, int level, int optname,
1493 			   char __user *optval, int __user *optlen)
1494 {
1495 	struct sock *sk = sock->sk;
1496 
1497 	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1498 }
1499 
1500 EXPORT_SYMBOL(sock_common_getsockopt);
1501 
1502 #ifdef CONFIG_COMPAT
1503 int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
1504 				  char __user *optval, int __user *optlen)
1505 {
1506 	struct sock *sk = sock->sk;
1507 
1508 	if (sk->sk_prot->compat_setsockopt != NULL)
1509 		return sk->sk_prot->compat_getsockopt(sk, level, optname,
1510 						      optval, optlen);
1511 	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1512 }
1513 EXPORT_SYMBOL(compat_sock_common_getsockopt);
1514 #endif
1515 
1516 int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
1517 			struct msghdr *msg, size_t size, int flags)
1518 {
1519 	struct sock *sk = sock->sk;
1520 	int addr_len = 0;
1521 	int err;
1522 
1523 	err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
1524 				   flags & ~MSG_DONTWAIT, &addr_len);
1525 	if (err >= 0)
1526 		msg->msg_namelen = addr_len;
1527 	return err;
1528 }
1529 
1530 EXPORT_SYMBOL(sock_common_recvmsg);
1531 
1532 /*
1533  *	Set socket options on an inet socket.
1534  */
1535 int sock_common_setsockopt(struct socket *sock, int level, int optname,
1536 			   char __user *optval, int optlen)
1537 {
1538 	struct sock *sk = sock->sk;
1539 
1540 	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1541 }
1542 
1543 EXPORT_SYMBOL(sock_common_setsockopt);
1544 
1545 #ifdef CONFIG_COMPAT
1546 int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
1547 				  char __user *optval, int optlen)
1548 {
1549 	struct sock *sk = sock->sk;
1550 
1551 	if (sk->sk_prot->compat_setsockopt != NULL)
1552 		return sk->sk_prot->compat_setsockopt(sk, level, optname,
1553 						      optval, optlen);
1554 	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1555 }
1556 EXPORT_SYMBOL(compat_sock_common_setsockopt);
1557 #endif
1558 
1559 void sk_common_release(struct sock *sk)
1560 {
1561 	if (sk->sk_prot->destroy)
1562 		sk->sk_prot->destroy(sk);
1563 
1564 	/*
1565 	 * Observation: when sock_common_release is called, processes have
1566 	 * no access to socket. But net still has.
1567 	 * Step one, detach it from networking:
1568 	 *
1569 	 * A. Remove from hash tables.
1570 	 */
1571 
1572 	sk->sk_prot->unhash(sk);
1573 
1574 	/*
1575 	 * In this point socket cannot receive new packets, but it is possible
1576 	 * that some packets are in flight because some CPU runs receiver and
1577 	 * did hash table lookup before we unhashed socket. They will achieve
1578 	 * receive queue and will be purged by socket destructor.
1579 	 *
1580 	 * Also we still have packets pending on receive queue and probably,
1581 	 * our own packets waiting in device queues. sock_destroy will drain
1582 	 * receive queue, but transmitted packets will delay socket destruction
1583 	 * until the last reference will be released.
1584 	 */
1585 
1586 	sock_orphan(sk);
1587 
1588 	xfrm_sk_free_policy(sk);
1589 
1590 	sk_refcnt_debug_release(sk);
1591 	sock_put(sk);
1592 }
1593 
1594 EXPORT_SYMBOL(sk_common_release);
1595 
1596 static DEFINE_RWLOCK(proto_list_lock);
1597 static LIST_HEAD(proto_list);
1598 
1599 int proto_register(struct proto *prot, int alloc_slab)
1600 {
1601 	char *request_sock_slab_name = NULL;
1602 	char *timewait_sock_slab_name;
1603 	int rc = -ENOBUFS;
1604 
1605 	if (alloc_slab) {
1606 		prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
1607 					       SLAB_HWCACHE_ALIGN, NULL, NULL);
1608 
1609 		if (prot->slab == NULL) {
1610 			printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
1611 			       prot->name);
1612 			goto out;
1613 		}
1614 
1615 		if (prot->rsk_prot != NULL) {
1616 			static const char mask[] = "request_sock_%s";
1617 
1618 			request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1619 			if (request_sock_slab_name == NULL)
1620 				goto out_free_sock_slab;
1621 
1622 			sprintf(request_sock_slab_name, mask, prot->name);
1623 			prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
1624 								 prot->rsk_prot->obj_size, 0,
1625 								 SLAB_HWCACHE_ALIGN, NULL, NULL);
1626 
1627 			if (prot->rsk_prot->slab == NULL) {
1628 				printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
1629 				       prot->name);
1630 				goto out_free_request_sock_slab_name;
1631 			}
1632 		}
1633 
1634 		if (prot->twsk_prot != NULL) {
1635 			static const char mask[] = "tw_sock_%s";
1636 
1637 			timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1638 
1639 			if (timewait_sock_slab_name == NULL)
1640 				goto out_free_request_sock_slab;
1641 
1642 			sprintf(timewait_sock_slab_name, mask, prot->name);
1643 			prot->twsk_prot->twsk_slab =
1644 				kmem_cache_create(timewait_sock_slab_name,
1645 						  prot->twsk_prot->twsk_obj_size,
1646 						  0, SLAB_HWCACHE_ALIGN,
1647 						  NULL, NULL);
1648 			if (prot->twsk_prot->twsk_slab == NULL)
1649 				goto out_free_timewait_sock_slab_name;
1650 		}
1651 	}
1652 
1653 	write_lock(&proto_list_lock);
1654 	list_add(&prot->node, &proto_list);
1655 	write_unlock(&proto_list_lock);
1656 	rc = 0;
1657 out:
1658 	return rc;
1659 out_free_timewait_sock_slab_name:
1660 	kfree(timewait_sock_slab_name);
1661 out_free_request_sock_slab:
1662 	if (prot->rsk_prot && prot->rsk_prot->slab) {
1663 		kmem_cache_destroy(prot->rsk_prot->slab);
1664 		prot->rsk_prot->slab = NULL;
1665 	}
1666 out_free_request_sock_slab_name:
1667 	kfree(request_sock_slab_name);
1668 out_free_sock_slab:
1669 	kmem_cache_destroy(prot->slab);
1670 	prot->slab = NULL;
1671 	goto out;
1672 }
1673 
1674 EXPORT_SYMBOL(proto_register);
1675 
1676 void proto_unregister(struct proto *prot)
1677 {
1678 	write_lock(&proto_list_lock);
1679 	list_del(&prot->node);
1680 	write_unlock(&proto_list_lock);
1681 
1682 	if (prot->slab != NULL) {
1683 		kmem_cache_destroy(prot->slab);
1684 		prot->slab = NULL;
1685 	}
1686 
1687 	if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
1688 		const char *name = kmem_cache_name(prot->rsk_prot->slab);
1689 
1690 		kmem_cache_destroy(prot->rsk_prot->slab);
1691 		kfree(name);
1692 		prot->rsk_prot->slab = NULL;
1693 	}
1694 
1695 	if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
1696 		const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab);
1697 
1698 		kmem_cache_destroy(prot->twsk_prot->twsk_slab);
1699 		kfree(name);
1700 		prot->twsk_prot->twsk_slab = NULL;
1701 	}
1702 }
1703 
1704 EXPORT_SYMBOL(proto_unregister);
1705 
1706 #ifdef CONFIG_PROC_FS
1707 static inline struct proto *__proto_head(void)
1708 {
1709 	return list_entry(proto_list.next, struct proto, node);
1710 }
1711 
1712 static inline struct proto *proto_head(void)
1713 {
1714 	return list_empty(&proto_list) ? NULL : __proto_head();
1715 }
1716 
1717 static inline struct proto *proto_next(struct proto *proto)
1718 {
1719 	return proto->node.next == &proto_list ? NULL :
1720 		list_entry(proto->node.next, struct proto, node);
1721 }
1722 
1723 static inline struct proto *proto_get_idx(loff_t pos)
1724 {
1725 	struct proto *proto;
1726 	loff_t i = 0;
1727 
1728 	list_for_each_entry(proto, &proto_list, node)
1729 		if (i++ == pos)
1730 			goto out;
1731 
1732 	proto = NULL;
1733 out:
1734 	return proto;
1735 }
1736 
1737 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
1738 {
1739 	read_lock(&proto_list_lock);
1740 	return *pos ? proto_get_idx(*pos - 1) : SEQ_START_TOKEN;
1741 }
1742 
1743 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1744 {
1745 	++*pos;
1746 	return v == SEQ_START_TOKEN ? proto_head() : proto_next(v);
1747 }
1748 
1749 static void proto_seq_stop(struct seq_file *seq, void *v)
1750 {
1751 	read_unlock(&proto_list_lock);
1752 }
1753 
1754 static char proto_method_implemented(const void *method)
1755 {
1756 	return method == NULL ? 'n' : 'y';
1757 }
1758 
1759 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
1760 {
1761 	seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
1762 			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
1763 		   proto->name,
1764 		   proto->obj_size,
1765 		   proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
1766 		   proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
1767 		   proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
1768 		   proto->max_header,
1769 		   proto->slab == NULL ? "no" : "yes",
1770 		   module_name(proto->owner),
1771 		   proto_method_implemented(proto->close),
1772 		   proto_method_implemented(proto->connect),
1773 		   proto_method_implemented(proto->disconnect),
1774 		   proto_method_implemented(proto->accept),
1775 		   proto_method_implemented(proto->ioctl),
1776 		   proto_method_implemented(proto->init),
1777 		   proto_method_implemented(proto->destroy),
1778 		   proto_method_implemented(proto->shutdown),
1779 		   proto_method_implemented(proto->setsockopt),
1780 		   proto_method_implemented(proto->getsockopt),
1781 		   proto_method_implemented(proto->sendmsg),
1782 		   proto_method_implemented(proto->recvmsg),
1783 		   proto_method_implemented(proto->sendpage),
1784 		   proto_method_implemented(proto->bind),
1785 		   proto_method_implemented(proto->backlog_rcv),
1786 		   proto_method_implemented(proto->hash),
1787 		   proto_method_implemented(proto->unhash),
1788 		   proto_method_implemented(proto->get_port),
1789 		   proto_method_implemented(proto->enter_memory_pressure));
1790 }
1791 
1792 static int proto_seq_show(struct seq_file *seq, void *v)
1793 {
1794 	if (v == SEQ_START_TOKEN)
1795 		seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
1796 			   "protocol",
1797 			   "size",
1798 			   "sockets",
1799 			   "memory",
1800 			   "press",
1801 			   "maxhdr",
1802 			   "slab",
1803 			   "module",
1804 			   "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
1805 	else
1806 		proto_seq_printf(seq, v);
1807 	return 0;
1808 }
1809 
1810 static struct seq_operations proto_seq_ops = {
1811 	.start  = proto_seq_start,
1812 	.next   = proto_seq_next,
1813 	.stop   = proto_seq_stop,
1814 	.show   = proto_seq_show,
1815 };
1816 
1817 static int proto_seq_open(struct inode *inode, struct file *file)
1818 {
1819 	return seq_open(file, &proto_seq_ops);
1820 }
1821 
1822 static struct file_operations proto_seq_fops = {
1823 	.owner		= THIS_MODULE,
1824 	.open		= proto_seq_open,
1825 	.read		= seq_read,
1826 	.llseek		= seq_lseek,
1827 	.release	= seq_release,
1828 };
1829 
1830 static int __init proto_init(void)
1831 {
1832 	/* register /proc/net/protocols */
1833 	return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
1834 }
1835 
1836 subsys_initcall(proto_init);
1837 
1838 #endif /* PROC_FS */
1839 
1840 EXPORT_SYMBOL(sk_alloc);
1841 EXPORT_SYMBOL(sk_free);
1842 EXPORT_SYMBOL(sk_send_sigurg);
1843 EXPORT_SYMBOL(sock_alloc_send_skb);
1844 EXPORT_SYMBOL(sock_init_data);
1845 EXPORT_SYMBOL(sock_kfree_s);
1846 EXPORT_SYMBOL(sock_kmalloc);
1847 EXPORT_SYMBOL(sock_no_accept);
1848 EXPORT_SYMBOL(sock_no_bind);
1849 EXPORT_SYMBOL(sock_no_connect);
1850 EXPORT_SYMBOL(sock_no_getname);
1851 EXPORT_SYMBOL(sock_no_getsockopt);
1852 EXPORT_SYMBOL(sock_no_ioctl);
1853 EXPORT_SYMBOL(sock_no_listen);
1854 EXPORT_SYMBOL(sock_no_mmap);
1855 EXPORT_SYMBOL(sock_no_poll);
1856 EXPORT_SYMBOL(sock_no_recvmsg);
1857 EXPORT_SYMBOL(sock_no_sendmsg);
1858 EXPORT_SYMBOL(sock_no_sendpage);
1859 EXPORT_SYMBOL(sock_no_setsockopt);
1860 EXPORT_SYMBOL(sock_no_shutdown);
1861 EXPORT_SYMBOL(sock_no_socketpair);
1862 EXPORT_SYMBOL(sock_rfree);
1863 EXPORT_SYMBOL(sock_setsockopt);
1864 EXPORT_SYMBOL(sock_wfree);
1865 EXPORT_SYMBOL(sock_wmalloc);
1866 EXPORT_SYMBOL(sock_i_uid);
1867 EXPORT_SYMBOL(sock_i_ino);
1868 EXPORT_SYMBOL(sysctl_optmem_max);
1869 #ifdef CONFIG_SYSCTL
1870 EXPORT_SYMBOL(sysctl_rmem_max);
1871 EXPORT_SYMBOL(sysctl_wmem_max);
1872 #endif
1873