xref: /linux/net/core/sock.c (revision 05dc8c02bf40090e9ed23932b1980ead48eb8870)
1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Generic socket support routines. Memory allocators, socket lock/release
7  *		handler for protocols to use and generic option handler.
8  *
9  *
10  * Version:	$Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
11  *
12  * Authors:	Ross Biro
13  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *		Florian La Roche, <flla@stud.uni-sb.de>
15  *		Alan Cox, <A.Cox@swansea.ac.uk>
16  *
17  * Fixes:
18  *		Alan Cox	: 	Numerous verify_area() problems
19  *		Alan Cox	:	Connecting on a connecting socket
20  *					now returns an error for tcp.
21  *		Alan Cox	:	sock->protocol is set correctly.
22  *					and is not sometimes left as 0.
23  *		Alan Cox	:	connect handles icmp errors on a
24  *					connect properly. Unfortunately there
25  *					is a restart syscall nasty there. I
26  *					can't match BSD without hacking the C
27  *					library. Ideas urgently sought!
28  *		Alan Cox	:	Disallow bind() to addresses that are
29  *					not ours - especially broadcast ones!!
30  *		Alan Cox	:	Socket 1024 _IS_ ok for users. (fencepost)
31  *		Alan Cox	:	sock_wfree/sock_rfree don't destroy sockets,
32  *					instead they leave that for the DESTROY timer.
33  *		Alan Cox	:	Clean up error flag in accept
34  *		Alan Cox	:	TCP ack handling is buggy, the DESTROY timer
35  *					was buggy. Put a remove_sock() in the handler
36  *					for memory when we hit 0. Also altered the timer
37  *					code. The ACK stuff can wait and needs major
38  *					TCP layer surgery.
39  *		Alan Cox	:	Fixed TCP ack bug, removed remove sock
40  *					and fixed timer/inet_bh race.
41  *		Alan Cox	:	Added zapped flag for TCP
42  *		Alan Cox	:	Move kfree_skb into skbuff.c and tidied up surplus code
43  *		Alan Cox	:	for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44  *		Alan Cox	:	kfree_s calls now are kfree_skbmem so we can track skb resources
45  *		Alan Cox	:	Supports socket option broadcast now as does udp. Packet and raw need fixing.
46  *		Alan Cox	:	Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47  *		Rick Sladkey	:	Relaxed UDP rules for matching packets.
48  *		C.E.Hawkins	:	IFF_PROMISC/SIOCGHWADDR support
49  *	Pauline Middelink	:	identd support
50  *		Alan Cox	:	Fixed connect() taking signals I think.
51  *		Alan Cox	:	SO_LINGER supported
52  *		Alan Cox	:	Error reporting fixes
53  *		Anonymous	:	inet_create tidied up (sk->reuse setting)
54  *		Alan Cox	:	inet sockets don't set sk->type!
55  *		Alan Cox	:	Split socket option code
56  *		Alan Cox	:	Callbacks
57  *		Alan Cox	:	Nagle flag for Charles & Johannes stuff
58  *		Alex		:	Removed restriction on inet fioctl
59  *		Alan Cox	:	Splitting INET from NET core
60  *		Alan Cox	:	Fixed bogus SO_TYPE handling in getsockopt()
61  *		Adam Caldwell	:	Missing return in SO_DONTROUTE/SO_DEBUG code
62  *		Alan Cox	:	Split IP from generic code
63  *		Alan Cox	:	New kfree_skbmem()
64  *		Alan Cox	:	Make SO_DEBUG superuser only.
65  *		Alan Cox	:	Allow anyone to clear SO_DEBUG
66  *					(compatibility fix)
67  *		Alan Cox	:	Added optimistic memory grabbing for AF_UNIX throughput.
68  *		Alan Cox	:	Allocator for a socket is settable.
69  *		Alan Cox	:	SO_ERROR includes soft errors.
70  *		Alan Cox	:	Allow NULL arguments on some SO_ opts
71  *		Alan Cox	: 	Generic socket allocation to make hooks
72  *					easier (suggested by Craig Metz).
73  *		Michael Pall	:	SO_ERROR returns positive errno again
74  *              Steve Whitehouse:       Added default destructor to free
75  *                                      protocol private data.
76  *              Steve Whitehouse:       Added various other default routines
77  *                                      common to several socket families.
78  *              Chris Evans     :       Call suser() check last on F_SETOWN
79  *		Jay Schulist	:	Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
80  *		Andi Kleen	:	Add sock_kmalloc()/sock_kfree_s()
81  *		Andi Kleen	:	Fix write_space callback
82  *		Chris Evans	:	Security fixes - signedness again
83  *		Arnaldo C. Melo :       cleanups, use skb_queue_purge
84  *
85  * To Fix:
86  *
87  *
88  *		This program is free software; you can redistribute it and/or
89  *		modify it under the terms of the GNU General Public License
90  *		as published by the Free Software Foundation; either version
91  *		2 of the License, or (at your option) any later version.
92  */
93 
94 #include <linux/capability.h>
95 #include <linux/errno.h>
96 #include <linux/types.h>
97 #include <linux/socket.h>
98 #include <linux/in.h>
99 #include <linux/kernel.h>
100 #include <linux/module.h>
101 #include <linux/proc_fs.h>
102 #include <linux/seq_file.h>
103 #include <linux/sched.h>
104 #include <linux/timer.h>
105 #include <linux/string.h>
106 #include <linux/sockios.h>
107 #include <linux/net.h>
108 #include <linux/mm.h>
109 #include <linux/slab.h>
110 #include <linux/interrupt.h>
111 #include <linux/poll.h>
112 #include <linux/tcp.h>
113 #include <linux/init.h>
114 #include <linux/highmem.h>
115 
116 #include <asm/uaccess.h>
117 #include <asm/system.h>
118 
119 #include <linux/netdevice.h>
120 #include <net/protocol.h>
121 #include <linux/skbuff.h>
122 #include <net/request_sock.h>
123 #include <net/sock.h>
124 #include <net/xfrm.h>
125 #include <linux/ipsec.h>
126 
127 #include <linux/filter.h>
128 
129 #ifdef CONFIG_INET
130 #include <net/tcp.h>
131 #endif
132 
133 /*
134  * Each address family might have different locking rules, so we have
135  * one slock key per address family:
136  */
137 static struct lock_class_key af_family_keys[AF_MAX];
138 static struct lock_class_key af_family_slock_keys[AF_MAX];
139 
140 #ifdef CONFIG_DEBUG_LOCK_ALLOC
141 /*
142  * Make lock validator output more readable. (we pre-construct these
143  * strings build-time, so that runtime initialization of socket
144  * locks is fast):
145  */
146 static const char *af_family_key_strings[AF_MAX+1] = {
147   "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX"     , "sk_lock-AF_INET"     ,
148   "sk_lock-AF_AX25"  , "sk_lock-AF_IPX"      , "sk_lock-AF_APPLETALK",
149   "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE"   , "sk_lock-AF_ATMPVC"   ,
150   "sk_lock-AF_X25"   , "sk_lock-AF_INET6"    , "sk_lock-AF_ROSE"     ,
151   "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI"  , "sk_lock-AF_SECURITY" ,
152   "sk_lock-AF_KEY"   , "sk_lock-AF_NETLINK"  , "sk_lock-AF_PACKET"   ,
153   "sk_lock-AF_ASH"   , "sk_lock-AF_ECONET"   , "sk_lock-AF_ATMSVC"   ,
154   "sk_lock-21"       , "sk_lock-AF_SNA"      , "sk_lock-AF_IRDA"     ,
155   "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE"  , "sk_lock-AF_LLC"      ,
156   "sk_lock-27"       , "sk_lock-28"          , "sk_lock-29"          ,
157   "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        ,
158   "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX"
159 };
160 static const char *af_family_slock_key_strings[AF_MAX+1] = {
161   "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
162   "slock-AF_AX25"  , "slock-AF_IPX"      , "slock-AF_APPLETALK",
163   "slock-AF_NETROM", "slock-AF_BRIDGE"   , "slock-AF_ATMPVC"   ,
164   "slock-AF_X25"   , "slock-AF_INET6"    , "slock-AF_ROSE"     ,
165   "slock-AF_DECnet", "slock-AF_NETBEUI"  , "slock-AF_SECURITY" ,
166   "slock-AF_KEY"   , "slock-AF_NETLINK"  , "slock-AF_PACKET"   ,
167   "slock-AF_ASH"   , "slock-AF_ECONET"   , "slock-AF_ATMSVC"   ,
168   "slock-21"       , "slock-AF_SNA"      , "slock-AF_IRDA"     ,
169   "slock-AF_PPPOX" , "slock-AF_WANPIPE"  , "slock-AF_LLC"      ,
170   "slock-27"       , "slock-28"          , "slock-29"          ,
171   "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     ,
172   "slock-AF_RXRPC" , "slock-AF_MAX"
173 };
174 static const char *af_family_clock_key_strings[AF_MAX+1] = {
175   "clock-AF_UNSPEC", "clock-AF_UNIX"     , "clock-AF_INET"     ,
176   "clock-AF_AX25"  , "clock-AF_IPX"      , "clock-AF_APPLETALK",
177   "clock-AF_NETROM", "clock-AF_BRIDGE"   , "clock-AF_ATMPVC"   ,
178   "clock-AF_X25"   , "clock-AF_INET6"    , "clock-AF_ROSE"     ,
179   "clock-AF_DECnet", "clock-AF_NETBEUI"  , "clock-AF_SECURITY" ,
180   "clock-AF_KEY"   , "clock-AF_NETLINK"  , "clock-AF_PACKET"   ,
181   "clock-AF_ASH"   , "clock-AF_ECONET"   , "clock-AF_ATMSVC"   ,
182   "clock-21"       , "clock-AF_SNA"      , "clock-AF_IRDA"     ,
183   "clock-AF_PPPOX" , "clock-AF_WANPIPE"  , "clock-AF_LLC"      ,
184   "clock-27"       , "clock-28"          , "clock-29"          ,
185   "clock-AF_TIPC"  , "clock-AF_BLUETOOTH", "clock-AF_IUCV"     ,
186   "clock-AF_RXRPC" , "clock-AF_MAX"
187 };
188 #endif
189 
190 /*
191  * sk_callback_lock locking rules are per-address-family,
192  * so split the lock classes by using a per-AF key:
193  */
194 static struct lock_class_key af_callback_keys[AF_MAX];
195 
196 /* Take into consideration the size of the struct sk_buff overhead in the
197  * determination of these values, since that is non-constant across
198  * platforms.  This makes socket queueing behavior and performance
199  * not depend upon such differences.
200  */
201 #define _SK_MEM_PACKETS		256
202 #define _SK_MEM_OVERHEAD	(sizeof(struct sk_buff) + 256)
203 #define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
204 #define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
205 
206 /* Run time adjustable parameters. */
207 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
208 __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
209 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
210 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
211 
212 /* Maximal space eaten by iovec or ancilliary data plus some space */
213 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
214 
215 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
216 {
217 	struct timeval tv;
218 
219 	if (optlen < sizeof(tv))
220 		return -EINVAL;
221 	if (copy_from_user(&tv, optval, sizeof(tv)))
222 		return -EFAULT;
223 	if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
224 		return -EDOM;
225 
226 	if (tv.tv_sec < 0) {
227 		static int warned __read_mostly;
228 
229 		*timeo_p = 0;
230 		if (warned < 10 && net_ratelimit())
231 			warned++;
232 			printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) "
233 			       "tries to set negative timeout\n",
234 				current->comm, current->pid);
235 		return 0;
236 	}
237 	*timeo_p = MAX_SCHEDULE_TIMEOUT;
238 	if (tv.tv_sec == 0 && tv.tv_usec == 0)
239 		return 0;
240 	if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
241 		*timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
242 	return 0;
243 }
244 
245 static void sock_warn_obsolete_bsdism(const char *name)
246 {
247 	static int warned;
248 	static char warncomm[TASK_COMM_LEN];
249 	if (strcmp(warncomm, current->comm) && warned < 5) {
250 		strcpy(warncomm,  current->comm);
251 		printk(KERN_WARNING "process `%s' is using obsolete "
252 		       "%s SO_BSDCOMPAT\n", warncomm, name);
253 		warned++;
254 	}
255 }
256 
257 static void sock_disable_timestamp(struct sock *sk)
258 {
259 	if (sock_flag(sk, SOCK_TIMESTAMP)) {
260 		sock_reset_flag(sk, SOCK_TIMESTAMP);
261 		net_disable_timestamp();
262 	}
263 }
264 
265 
266 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
267 {
268 	int err = 0;
269 	int skb_len;
270 
271 	/* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
272 	   number of warnings when compiling with -W --ANK
273 	 */
274 	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
275 	    (unsigned)sk->sk_rcvbuf) {
276 		err = -ENOMEM;
277 		goto out;
278 	}
279 
280 	err = sk_filter(sk, skb);
281 	if (err)
282 		goto out;
283 
284 	skb->dev = NULL;
285 	skb_set_owner_r(skb, sk);
286 
287 	/* Cache the SKB length before we tack it onto the receive
288 	 * queue.  Once it is added it no longer belongs to us and
289 	 * may be freed by other threads of control pulling packets
290 	 * from the queue.
291 	 */
292 	skb_len = skb->len;
293 
294 	skb_queue_tail(&sk->sk_receive_queue, skb);
295 
296 	if (!sock_flag(sk, SOCK_DEAD))
297 		sk->sk_data_ready(sk, skb_len);
298 out:
299 	return err;
300 }
301 EXPORT_SYMBOL(sock_queue_rcv_skb);
302 
303 int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
304 {
305 	int rc = NET_RX_SUCCESS;
306 
307 	if (sk_filter(sk, skb))
308 		goto discard_and_relse;
309 
310 	skb->dev = NULL;
311 
312 	if (nested)
313 		bh_lock_sock_nested(sk);
314 	else
315 		bh_lock_sock(sk);
316 	if (!sock_owned_by_user(sk)) {
317 		/*
318 		 * trylock + unlock semantics:
319 		 */
320 		mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
321 
322 		rc = sk->sk_backlog_rcv(sk, skb);
323 
324 		mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
325 	} else
326 		sk_add_backlog(sk, skb);
327 	bh_unlock_sock(sk);
328 out:
329 	sock_put(sk);
330 	return rc;
331 discard_and_relse:
332 	kfree_skb(skb);
333 	goto out;
334 }
335 EXPORT_SYMBOL(sk_receive_skb);
336 
337 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
338 {
339 	struct dst_entry *dst = sk->sk_dst_cache;
340 
341 	if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
342 		sk->sk_dst_cache = NULL;
343 		dst_release(dst);
344 		return NULL;
345 	}
346 
347 	return dst;
348 }
349 EXPORT_SYMBOL(__sk_dst_check);
350 
351 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
352 {
353 	struct dst_entry *dst = sk_dst_get(sk);
354 
355 	if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
356 		sk_dst_reset(sk);
357 		dst_release(dst);
358 		return NULL;
359 	}
360 
361 	return dst;
362 }
363 EXPORT_SYMBOL(sk_dst_check);
364 
365 static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
366 {
367 	int ret = -ENOPROTOOPT;
368 #ifdef CONFIG_NETDEVICES
369 	char devname[IFNAMSIZ];
370 	int index;
371 
372 	/* Sorry... */
373 	ret = -EPERM;
374 	if (!capable(CAP_NET_RAW))
375 		goto out;
376 
377 	ret = -EINVAL;
378 	if (optlen < 0)
379 		goto out;
380 
381 	/* Bind this socket to a particular device like "eth0",
382 	 * as specified in the passed interface name. If the
383 	 * name is "" or the option length is zero the socket
384 	 * is not bound.
385 	 */
386 	if (optlen > IFNAMSIZ - 1)
387 		optlen = IFNAMSIZ - 1;
388 	memset(devname, 0, sizeof(devname));
389 
390 	ret = -EFAULT;
391 	if (copy_from_user(devname, optval, optlen))
392 		goto out;
393 
394 	if (devname[0] == '\0') {
395 		index = 0;
396 	} else {
397 		struct net_device *dev = dev_get_by_name(devname);
398 
399 		ret = -ENODEV;
400 		if (!dev)
401 			goto out;
402 
403 		index = dev->ifindex;
404 		dev_put(dev);
405 	}
406 
407 	lock_sock(sk);
408 	sk->sk_bound_dev_if = index;
409 	sk_dst_reset(sk);
410 	release_sock(sk);
411 
412 	ret = 0;
413 
414 out:
415 #endif
416 
417 	return ret;
418 }
419 
420 /*
421  *	This is meant for all protocols to use and covers goings on
422  *	at the socket level. Everything here is generic.
423  */
424 
425 int sock_setsockopt(struct socket *sock, int level, int optname,
426 		    char __user *optval, int optlen)
427 {
428 	struct sock *sk=sock->sk;
429 	struct sk_filter *filter;
430 	int val;
431 	int valbool;
432 	struct linger ling;
433 	int ret = 0;
434 
435 	/*
436 	 *	Options without arguments
437 	 */
438 
439 #ifdef SO_DONTLINGER		/* Compatibility item... */
440 	if (optname == SO_DONTLINGER) {
441 		lock_sock(sk);
442 		sock_reset_flag(sk, SOCK_LINGER);
443 		release_sock(sk);
444 		return 0;
445 	}
446 #endif
447 
448 	if (optname == SO_BINDTODEVICE)
449 		return sock_bindtodevice(sk, optval, optlen);
450 
451 	if (optlen < sizeof(int))
452 		return -EINVAL;
453 
454 	if (get_user(val, (int __user *)optval))
455 		return -EFAULT;
456 
457 	valbool = val?1:0;
458 
459 	lock_sock(sk);
460 
461 	switch(optname) {
462 	case SO_DEBUG:
463 		if (val && !capable(CAP_NET_ADMIN)) {
464 			ret = -EACCES;
465 		}
466 		else if (valbool)
467 			sock_set_flag(sk, SOCK_DBG);
468 		else
469 			sock_reset_flag(sk, SOCK_DBG);
470 		break;
471 	case SO_REUSEADDR:
472 		sk->sk_reuse = valbool;
473 		break;
474 	case SO_TYPE:
475 	case SO_ERROR:
476 		ret = -ENOPROTOOPT;
477 		break;
478 	case SO_DONTROUTE:
479 		if (valbool)
480 			sock_set_flag(sk, SOCK_LOCALROUTE);
481 		else
482 			sock_reset_flag(sk, SOCK_LOCALROUTE);
483 		break;
484 	case SO_BROADCAST:
485 		sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
486 		break;
487 	case SO_SNDBUF:
488 		/* Don't error on this BSD doesn't and if you think
489 		   about it this is right. Otherwise apps have to
490 		   play 'guess the biggest size' games. RCVBUF/SNDBUF
491 		   are treated in BSD as hints */
492 
493 		if (val > sysctl_wmem_max)
494 			val = sysctl_wmem_max;
495 set_sndbuf:
496 		sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
497 		if ((val * 2) < SOCK_MIN_SNDBUF)
498 			sk->sk_sndbuf = SOCK_MIN_SNDBUF;
499 		else
500 			sk->sk_sndbuf = val * 2;
501 
502 		/*
503 		 *	Wake up sending tasks if we
504 		 *	upped the value.
505 		 */
506 		sk->sk_write_space(sk);
507 		break;
508 
509 	case SO_SNDBUFFORCE:
510 		if (!capable(CAP_NET_ADMIN)) {
511 			ret = -EPERM;
512 			break;
513 		}
514 		goto set_sndbuf;
515 
516 	case SO_RCVBUF:
517 		/* Don't error on this BSD doesn't and if you think
518 		   about it this is right. Otherwise apps have to
519 		   play 'guess the biggest size' games. RCVBUF/SNDBUF
520 		   are treated in BSD as hints */
521 
522 		if (val > sysctl_rmem_max)
523 			val = sysctl_rmem_max;
524 set_rcvbuf:
525 		sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
526 		/*
527 		 * We double it on the way in to account for
528 		 * "struct sk_buff" etc. overhead.   Applications
529 		 * assume that the SO_RCVBUF setting they make will
530 		 * allow that much actual data to be received on that
531 		 * socket.
532 		 *
533 		 * Applications are unaware that "struct sk_buff" and
534 		 * other overheads allocate from the receive buffer
535 		 * during socket buffer allocation.
536 		 *
537 		 * And after considering the possible alternatives,
538 		 * returning the value we actually used in getsockopt
539 		 * is the most desirable behavior.
540 		 */
541 		if ((val * 2) < SOCK_MIN_RCVBUF)
542 			sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
543 		else
544 			sk->sk_rcvbuf = val * 2;
545 		break;
546 
547 	case SO_RCVBUFFORCE:
548 		if (!capable(CAP_NET_ADMIN)) {
549 			ret = -EPERM;
550 			break;
551 		}
552 		goto set_rcvbuf;
553 
554 	case SO_KEEPALIVE:
555 #ifdef CONFIG_INET
556 		if (sk->sk_protocol == IPPROTO_TCP)
557 			tcp_set_keepalive(sk, valbool);
558 #endif
559 		sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
560 		break;
561 
562 	case SO_OOBINLINE:
563 		sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
564 		break;
565 
566 	case SO_NO_CHECK:
567 		sk->sk_no_check = valbool;
568 		break;
569 
570 	case SO_PRIORITY:
571 		if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
572 			sk->sk_priority = val;
573 		else
574 			ret = -EPERM;
575 		break;
576 
577 	case SO_LINGER:
578 		if (optlen < sizeof(ling)) {
579 			ret = -EINVAL;	/* 1003.1g */
580 			break;
581 		}
582 		if (copy_from_user(&ling,optval,sizeof(ling))) {
583 			ret = -EFAULT;
584 			break;
585 		}
586 		if (!ling.l_onoff)
587 			sock_reset_flag(sk, SOCK_LINGER);
588 		else {
589 #if (BITS_PER_LONG == 32)
590 			if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
591 				sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
592 			else
593 #endif
594 				sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
595 			sock_set_flag(sk, SOCK_LINGER);
596 		}
597 		break;
598 
599 	case SO_BSDCOMPAT:
600 		sock_warn_obsolete_bsdism("setsockopt");
601 		break;
602 
603 	case SO_PASSCRED:
604 		if (valbool)
605 			set_bit(SOCK_PASSCRED, &sock->flags);
606 		else
607 			clear_bit(SOCK_PASSCRED, &sock->flags);
608 		break;
609 
610 	case SO_TIMESTAMP:
611 	case SO_TIMESTAMPNS:
612 		if (valbool)  {
613 			if (optname == SO_TIMESTAMP)
614 				sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
615 			else
616 				sock_set_flag(sk, SOCK_RCVTSTAMPNS);
617 			sock_set_flag(sk, SOCK_RCVTSTAMP);
618 			sock_enable_timestamp(sk);
619 		} else {
620 			sock_reset_flag(sk, SOCK_RCVTSTAMP);
621 			sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
622 		}
623 		break;
624 
625 	case SO_RCVLOWAT:
626 		if (val < 0)
627 			val = INT_MAX;
628 		sk->sk_rcvlowat = val ? : 1;
629 		break;
630 
631 	case SO_RCVTIMEO:
632 		ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
633 		break;
634 
635 	case SO_SNDTIMEO:
636 		ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
637 		break;
638 
639 	case SO_ATTACH_FILTER:
640 		ret = -EINVAL;
641 		if (optlen == sizeof(struct sock_fprog)) {
642 			struct sock_fprog fprog;
643 
644 			ret = -EFAULT;
645 			if (copy_from_user(&fprog, optval, sizeof(fprog)))
646 				break;
647 
648 			ret = sk_attach_filter(&fprog, sk);
649 		}
650 		break;
651 
652 	case SO_DETACH_FILTER:
653 		rcu_read_lock_bh();
654 		filter = rcu_dereference(sk->sk_filter);
655 		if (filter) {
656 			rcu_assign_pointer(sk->sk_filter, NULL);
657 			sk_filter_release(sk, filter);
658 			rcu_read_unlock_bh();
659 			break;
660 		}
661 		rcu_read_unlock_bh();
662 		ret = -ENONET;
663 		break;
664 
665 	case SO_PASSSEC:
666 		if (valbool)
667 			set_bit(SOCK_PASSSEC, &sock->flags);
668 		else
669 			clear_bit(SOCK_PASSSEC, &sock->flags);
670 		break;
671 
672 		/* We implement the SO_SNDLOWAT etc to
673 		   not be settable (1003.1g 5.3) */
674 	default:
675 		ret = -ENOPROTOOPT;
676 		break;
677 	}
678 	release_sock(sk);
679 	return ret;
680 }
681 
682 
683 int sock_getsockopt(struct socket *sock, int level, int optname,
684 		    char __user *optval, int __user *optlen)
685 {
686 	struct sock *sk = sock->sk;
687 
688 	union {
689 		int val;
690 		struct linger ling;
691 		struct timeval tm;
692 	} v;
693 
694 	unsigned int lv = sizeof(int);
695 	int len;
696 
697 	if (get_user(len, optlen))
698 		return -EFAULT;
699 	if (len < 0)
700 		return -EINVAL;
701 
702 	switch(optname) {
703 	case SO_DEBUG:
704 		v.val = sock_flag(sk, SOCK_DBG);
705 		break;
706 
707 	case SO_DONTROUTE:
708 		v.val = sock_flag(sk, SOCK_LOCALROUTE);
709 		break;
710 
711 	case SO_BROADCAST:
712 		v.val = !!sock_flag(sk, SOCK_BROADCAST);
713 		break;
714 
715 	case SO_SNDBUF:
716 		v.val = sk->sk_sndbuf;
717 		break;
718 
719 	case SO_RCVBUF:
720 		v.val = sk->sk_rcvbuf;
721 		break;
722 
723 	case SO_REUSEADDR:
724 		v.val = sk->sk_reuse;
725 		break;
726 
727 	case SO_KEEPALIVE:
728 		v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
729 		break;
730 
731 	case SO_TYPE:
732 		v.val = sk->sk_type;
733 		break;
734 
735 	case SO_ERROR:
736 		v.val = -sock_error(sk);
737 		if (v.val==0)
738 			v.val = xchg(&sk->sk_err_soft, 0);
739 		break;
740 
741 	case SO_OOBINLINE:
742 		v.val = !!sock_flag(sk, SOCK_URGINLINE);
743 		break;
744 
745 	case SO_NO_CHECK:
746 		v.val = sk->sk_no_check;
747 		break;
748 
749 	case SO_PRIORITY:
750 		v.val = sk->sk_priority;
751 		break;
752 
753 	case SO_LINGER:
754 		lv		= sizeof(v.ling);
755 		v.ling.l_onoff	= !!sock_flag(sk, SOCK_LINGER);
756 		v.ling.l_linger	= sk->sk_lingertime / HZ;
757 		break;
758 
759 	case SO_BSDCOMPAT:
760 		sock_warn_obsolete_bsdism("getsockopt");
761 		break;
762 
763 	case SO_TIMESTAMP:
764 		v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
765 				!sock_flag(sk, SOCK_RCVTSTAMPNS);
766 		break;
767 
768 	case SO_TIMESTAMPNS:
769 		v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
770 		break;
771 
772 	case SO_RCVTIMEO:
773 		lv=sizeof(struct timeval);
774 		if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
775 			v.tm.tv_sec = 0;
776 			v.tm.tv_usec = 0;
777 		} else {
778 			v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
779 			v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
780 		}
781 		break;
782 
783 	case SO_SNDTIMEO:
784 		lv=sizeof(struct timeval);
785 		if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
786 			v.tm.tv_sec = 0;
787 			v.tm.tv_usec = 0;
788 		} else {
789 			v.tm.tv_sec = sk->sk_sndtimeo / HZ;
790 			v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
791 		}
792 		break;
793 
794 	case SO_RCVLOWAT:
795 		v.val = sk->sk_rcvlowat;
796 		break;
797 
798 	case SO_SNDLOWAT:
799 		v.val=1;
800 		break;
801 
802 	case SO_PASSCRED:
803 		v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
804 		break;
805 
806 	case SO_PEERCRED:
807 		if (len > sizeof(sk->sk_peercred))
808 			len = sizeof(sk->sk_peercred);
809 		if (copy_to_user(optval, &sk->sk_peercred, len))
810 			return -EFAULT;
811 		goto lenout;
812 
813 	case SO_PEERNAME:
814 	{
815 		char address[128];
816 
817 		if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
818 			return -ENOTCONN;
819 		if (lv < len)
820 			return -EINVAL;
821 		if (copy_to_user(optval, address, len))
822 			return -EFAULT;
823 		goto lenout;
824 	}
825 
826 	/* Dubious BSD thing... Probably nobody even uses it, but
827 	 * the UNIX standard wants it for whatever reason... -DaveM
828 	 */
829 	case SO_ACCEPTCONN:
830 		v.val = sk->sk_state == TCP_LISTEN;
831 		break;
832 
833 	case SO_PASSSEC:
834 		v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
835 		break;
836 
837 	case SO_PEERSEC:
838 		return security_socket_getpeersec_stream(sock, optval, optlen, len);
839 
840 	default:
841 		return -ENOPROTOOPT;
842 	}
843 
844 	if (len > lv)
845 		len = lv;
846 	if (copy_to_user(optval, &v, len))
847 		return -EFAULT;
848 lenout:
849 	if (put_user(len, optlen))
850 		return -EFAULT;
851 	return 0;
852 }
853 
854 /*
855  * Initialize an sk_lock.
856  *
857  * (We also register the sk_lock with the lock validator.)
858  */
859 static inline void sock_lock_init(struct sock *sk)
860 {
861 	sock_lock_init_class_and_name(sk,
862 			af_family_slock_key_strings[sk->sk_family],
863 			af_family_slock_keys + sk->sk_family,
864 			af_family_key_strings[sk->sk_family],
865 			af_family_keys + sk->sk_family);
866 }
867 
868 /**
869  *	sk_alloc - All socket objects are allocated here
870  *	@family: protocol family
871  *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
872  *	@prot: struct proto associated with this new sock instance
873  *	@zero_it: if we should zero the newly allocated sock
874  */
875 struct sock *sk_alloc(int family, gfp_t priority,
876 		      struct proto *prot, int zero_it)
877 {
878 	struct sock *sk = NULL;
879 	struct kmem_cache *slab = prot->slab;
880 
881 	if (slab != NULL)
882 		sk = kmem_cache_alloc(slab, priority);
883 	else
884 		sk = kmalloc(prot->obj_size, priority);
885 
886 	if (sk) {
887 		if (zero_it) {
888 			memset(sk, 0, prot->obj_size);
889 			sk->sk_family = family;
890 			/*
891 			 * See comment in struct sock definition to understand
892 			 * why we need sk_prot_creator -acme
893 			 */
894 			sk->sk_prot = sk->sk_prot_creator = prot;
895 			sock_lock_init(sk);
896 		}
897 
898 		if (security_sk_alloc(sk, family, priority))
899 			goto out_free;
900 
901 		if (!try_module_get(prot->owner))
902 			goto out_free;
903 	}
904 	return sk;
905 
906 out_free:
907 	if (slab != NULL)
908 		kmem_cache_free(slab, sk);
909 	else
910 		kfree(sk);
911 	return NULL;
912 }
913 
914 void sk_free(struct sock *sk)
915 {
916 	struct sk_filter *filter;
917 	struct module *owner = sk->sk_prot_creator->owner;
918 
919 	if (sk->sk_destruct)
920 		sk->sk_destruct(sk);
921 
922 	filter = rcu_dereference(sk->sk_filter);
923 	if (filter) {
924 		sk_filter_release(sk, filter);
925 		rcu_assign_pointer(sk->sk_filter, NULL);
926 	}
927 
928 	sock_disable_timestamp(sk);
929 
930 	if (atomic_read(&sk->sk_omem_alloc))
931 		printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
932 		       __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
933 
934 	security_sk_free(sk);
935 	if (sk->sk_prot_creator->slab != NULL)
936 		kmem_cache_free(sk->sk_prot_creator->slab, sk);
937 	else
938 		kfree(sk);
939 	module_put(owner);
940 }
941 
942 struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
943 {
944 	struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);
945 
946 	if (newsk != NULL) {
947 		struct sk_filter *filter;
948 
949 		sock_copy(newsk, sk);
950 
951 		/* SANITY */
952 		sk_node_init(&newsk->sk_node);
953 		sock_lock_init(newsk);
954 		bh_lock_sock(newsk);
955 		newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
956 
957 		atomic_set(&newsk->sk_rmem_alloc, 0);
958 		atomic_set(&newsk->sk_wmem_alloc, 0);
959 		atomic_set(&newsk->sk_omem_alloc, 0);
960 		skb_queue_head_init(&newsk->sk_receive_queue);
961 		skb_queue_head_init(&newsk->sk_write_queue);
962 #ifdef CONFIG_NET_DMA
963 		skb_queue_head_init(&newsk->sk_async_wait_queue);
964 #endif
965 
966 		rwlock_init(&newsk->sk_dst_lock);
967 		rwlock_init(&newsk->sk_callback_lock);
968 		lockdep_set_class_and_name(&newsk->sk_callback_lock,
969 				af_callback_keys + newsk->sk_family,
970 				af_family_clock_key_strings[newsk->sk_family]);
971 
972 		newsk->sk_dst_cache	= NULL;
973 		newsk->sk_wmem_queued	= 0;
974 		newsk->sk_forward_alloc = 0;
975 		newsk->sk_send_head	= NULL;
976 		newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
977 
978 		sock_reset_flag(newsk, SOCK_DONE);
979 		skb_queue_head_init(&newsk->sk_error_queue);
980 
981 		filter = newsk->sk_filter;
982 		if (filter != NULL)
983 			sk_filter_charge(newsk, filter);
984 
985 		if (unlikely(xfrm_sk_clone_policy(newsk))) {
986 			/* It is still raw copy of parent, so invalidate
987 			 * destructor and make plain sk_free() */
988 			newsk->sk_destruct = NULL;
989 			sk_free(newsk);
990 			newsk = NULL;
991 			goto out;
992 		}
993 
994 		newsk->sk_err	   = 0;
995 		newsk->sk_priority = 0;
996 		atomic_set(&newsk->sk_refcnt, 2);
997 
998 		/*
999 		 * Increment the counter in the same struct proto as the master
1000 		 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
1001 		 * is the same as sk->sk_prot->socks, as this field was copied
1002 		 * with memcpy).
1003 		 *
1004 		 * This _changes_ the previous behaviour, where
1005 		 * tcp_create_openreq_child always was incrementing the
1006 		 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
1007 		 * to be taken into account in all callers. -acme
1008 		 */
1009 		sk_refcnt_debug_inc(newsk);
1010 		newsk->sk_socket = NULL;
1011 		newsk->sk_sleep	 = NULL;
1012 
1013 		if (newsk->sk_prot->sockets_allocated)
1014 			atomic_inc(newsk->sk_prot->sockets_allocated);
1015 	}
1016 out:
1017 	return newsk;
1018 }
1019 
1020 EXPORT_SYMBOL_GPL(sk_clone);
1021 
1022 void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1023 {
1024 	__sk_dst_set(sk, dst);
1025 	sk->sk_route_caps = dst->dev->features;
1026 	if (sk->sk_route_caps & NETIF_F_GSO)
1027 		sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
1028 	if (sk_can_gso(sk)) {
1029 		if (dst->header_len)
1030 			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1031 		else
1032 			sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
1033 	}
1034 }
1035 EXPORT_SYMBOL_GPL(sk_setup_caps);
1036 
1037 void __init sk_init(void)
1038 {
1039 	if (num_physpages <= 4096) {
1040 		sysctl_wmem_max = 32767;
1041 		sysctl_rmem_max = 32767;
1042 		sysctl_wmem_default = 32767;
1043 		sysctl_rmem_default = 32767;
1044 	} else if (num_physpages >= 131072) {
1045 		sysctl_wmem_max = 131071;
1046 		sysctl_rmem_max = 131071;
1047 	}
1048 }
1049 
1050 /*
1051  *	Simple resource managers for sockets.
1052  */
1053 
1054 
1055 /*
1056  * Write buffer destructor automatically called from kfree_skb.
1057  */
1058 void sock_wfree(struct sk_buff *skb)
1059 {
1060 	struct sock *sk = skb->sk;
1061 
1062 	/* In case it might be waiting for more memory. */
1063 	atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
1064 	if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
1065 		sk->sk_write_space(sk);
1066 	sock_put(sk);
1067 }
1068 
1069 /*
1070  * Read buffer destructor automatically called from kfree_skb.
1071  */
1072 void sock_rfree(struct sk_buff *skb)
1073 {
1074 	struct sock *sk = skb->sk;
1075 
1076 	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
1077 }
1078 
1079 
1080 int sock_i_uid(struct sock *sk)
1081 {
1082 	int uid;
1083 
1084 	read_lock(&sk->sk_callback_lock);
1085 	uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
1086 	read_unlock(&sk->sk_callback_lock);
1087 	return uid;
1088 }
1089 
1090 unsigned long sock_i_ino(struct sock *sk)
1091 {
1092 	unsigned long ino;
1093 
1094 	read_lock(&sk->sk_callback_lock);
1095 	ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1096 	read_unlock(&sk->sk_callback_lock);
1097 	return ino;
1098 }
1099 
1100 /*
1101  * Allocate a skb from the socket's send buffer.
1102  */
1103 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1104 			     gfp_t priority)
1105 {
1106 	if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1107 		struct sk_buff * skb = alloc_skb(size, priority);
1108 		if (skb) {
1109 			skb_set_owner_w(skb, sk);
1110 			return skb;
1111 		}
1112 	}
1113 	return NULL;
1114 }
1115 
1116 /*
1117  * Allocate a skb from the socket's receive buffer.
1118  */
1119 struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
1120 			     gfp_t priority)
1121 {
1122 	if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1123 		struct sk_buff *skb = alloc_skb(size, priority);
1124 		if (skb) {
1125 			skb_set_owner_r(skb, sk);
1126 			return skb;
1127 		}
1128 	}
1129 	return NULL;
1130 }
1131 
1132 /*
1133  * Allocate a memory block from the socket's option memory buffer.
1134  */
1135 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1136 {
1137 	if ((unsigned)size <= sysctl_optmem_max &&
1138 	    atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1139 		void *mem;
1140 		/* First do the add, to avoid the race if kmalloc
1141 		 * might sleep.
1142 		 */
1143 		atomic_add(size, &sk->sk_omem_alloc);
1144 		mem = kmalloc(size, priority);
1145 		if (mem)
1146 			return mem;
1147 		atomic_sub(size, &sk->sk_omem_alloc);
1148 	}
1149 	return NULL;
1150 }
1151 
1152 /*
1153  * Free an option memory block.
1154  */
1155 void sock_kfree_s(struct sock *sk, void *mem, int size)
1156 {
1157 	kfree(mem);
1158 	atomic_sub(size, &sk->sk_omem_alloc);
1159 }
1160 
1161 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1162    I think, these locks should be removed for datagram sockets.
1163  */
1164 static long sock_wait_for_wmem(struct sock * sk, long timeo)
1165 {
1166 	DEFINE_WAIT(wait);
1167 
1168 	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1169 	for (;;) {
1170 		if (!timeo)
1171 			break;
1172 		if (signal_pending(current))
1173 			break;
1174 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1175 		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1176 		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1177 			break;
1178 		if (sk->sk_shutdown & SEND_SHUTDOWN)
1179 			break;
1180 		if (sk->sk_err)
1181 			break;
1182 		timeo = schedule_timeout(timeo);
1183 	}
1184 	finish_wait(sk->sk_sleep, &wait);
1185 	return timeo;
1186 }
1187 
1188 
1189 /*
1190  *	Generic send/receive buffer handlers
1191  */
1192 
1193 static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
1194 					    unsigned long header_len,
1195 					    unsigned long data_len,
1196 					    int noblock, int *errcode)
1197 {
1198 	struct sk_buff *skb;
1199 	gfp_t gfp_mask;
1200 	long timeo;
1201 	int err;
1202 
1203 	gfp_mask = sk->sk_allocation;
1204 	if (gfp_mask & __GFP_WAIT)
1205 		gfp_mask |= __GFP_REPEAT;
1206 
1207 	timeo = sock_sndtimeo(sk, noblock);
1208 	while (1) {
1209 		err = sock_error(sk);
1210 		if (err != 0)
1211 			goto failure;
1212 
1213 		err = -EPIPE;
1214 		if (sk->sk_shutdown & SEND_SHUTDOWN)
1215 			goto failure;
1216 
1217 		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1218 			skb = alloc_skb(header_len, gfp_mask);
1219 			if (skb) {
1220 				int npages;
1221 				int i;
1222 
1223 				/* No pages, we're done... */
1224 				if (!data_len)
1225 					break;
1226 
1227 				npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1228 				skb->truesize += data_len;
1229 				skb_shinfo(skb)->nr_frags = npages;
1230 				for (i = 0; i < npages; i++) {
1231 					struct page *page;
1232 					skb_frag_t *frag;
1233 
1234 					page = alloc_pages(sk->sk_allocation, 0);
1235 					if (!page) {
1236 						err = -ENOBUFS;
1237 						skb_shinfo(skb)->nr_frags = i;
1238 						kfree_skb(skb);
1239 						goto failure;
1240 					}
1241 
1242 					frag = &skb_shinfo(skb)->frags[i];
1243 					frag->page = page;
1244 					frag->page_offset = 0;
1245 					frag->size = (data_len >= PAGE_SIZE ?
1246 						      PAGE_SIZE :
1247 						      data_len);
1248 					data_len -= PAGE_SIZE;
1249 				}
1250 
1251 				/* Full success... */
1252 				break;
1253 			}
1254 			err = -ENOBUFS;
1255 			goto failure;
1256 		}
1257 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1258 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1259 		err = -EAGAIN;
1260 		if (!timeo)
1261 			goto failure;
1262 		if (signal_pending(current))
1263 			goto interrupted;
1264 		timeo = sock_wait_for_wmem(sk, timeo);
1265 	}
1266 
1267 	skb_set_owner_w(skb, sk);
1268 	return skb;
1269 
1270 interrupted:
1271 	err = sock_intr_errno(timeo);
1272 failure:
1273 	*errcode = err;
1274 	return NULL;
1275 }
1276 
1277 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1278 				    int noblock, int *errcode)
1279 {
1280 	return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1281 }
1282 
1283 static void __lock_sock(struct sock *sk)
1284 {
1285 	DEFINE_WAIT(wait);
1286 
1287 	for (;;) {
1288 		prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1289 					TASK_UNINTERRUPTIBLE);
1290 		spin_unlock_bh(&sk->sk_lock.slock);
1291 		schedule();
1292 		spin_lock_bh(&sk->sk_lock.slock);
1293 		if (!sock_owned_by_user(sk))
1294 			break;
1295 	}
1296 	finish_wait(&sk->sk_lock.wq, &wait);
1297 }
1298 
1299 static void __release_sock(struct sock *sk)
1300 {
1301 	struct sk_buff *skb = sk->sk_backlog.head;
1302 
1303 	do {
1304 		sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1305 		bh_unlock_sock(sk);
1306 
1307 		do {
1308 			struct sk_buff *next = skb->next;
1309 
1310 			skb->next = NULL;
1311 			sk->sk_backlog_rcv(sk, skb);
1312 
1313 			/*
1314 			 * We are in process context here with softirqs
1315 			 * disabled, use cond_resched_softirq() to preempt.
1316 			 * This is safe to do because we've taken the backlog
1317 			 * queue private:
1318 			 */
1319 			cond_resched_softirq();
1320 
1321 			skb = next;
1322 		} while (skb != NULL);
1323 
1324 		bh_lock_sock(sk);
1325 	} while ((skb = sk->sk_backlog.head) != NULL);
1326 }
1327 
1328 /**
1329  * sk_wait_data - wait for data to arrive at sk_receive_queue
1330  * @sk:    sock to wait on
1331  * @timeo: for how long
1332  *
1333  * Now socket state including sk->sk_err is changed only under lock,
1334  * hence we may omit checks after joining wait queue.
1335  * We check receive queue before schedule() only as optimization;
1336  * it is very likely that release_sock() added new data.
1337  */
1338 int sk_wait_data(struct sock *sk, long *timeo)
1339 {
1340 	int rc;
1341 	DEFINE_WAIT(wait);
1342 
1343 	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1344 	set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1345 	rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1346 	clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1347 	finish_wait(sk->sk_sleep, &wait);
1348 	return rc;
1349 }
1350 
1351 EXPORT_SYMBOL(sk_wait_data);
1352 
1353 /*
1354  * Set of default routines for initialising struct proto_ops when
1355  * the protocol does not support a particular function. In certain
1356  * cases where it makes no sense for a protocol to have a "do nothing"
1357  * function, some default processing is provided.
1358  */
1359 
1360 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1361 {
1362 	return -EOPNOTSUPP;
1363 }
1364 
1365 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1366 		    int len, int flags)
1367 {
1368 	return -EOPNOTSUPP;
1369 }
1370 
1371 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1372 {
1373 	return -EOPNOTSUPP;
1374 }
1375 
1376 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1377 {
1378 	return -EOPNOTSUPP;
1379 }
1380 
1381 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1382 		    int *len, int peer)
1383 {
1384 	return -EOPNOTSUPP;
1385 }
1386 
1387 unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
1388 {
1389 	return 0;
1390 }
1391 
1392 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1393 {
1394 	return -EOPNOTSUPP;
1395 }
1396 
1397 int sock_no_listen(struct socket *sock, int backlog)
1398 {
1399 	return -EOPNOTSUPP;
1400 }
1401 
1402 int sock_no_shutdown(struct socket *sock, int how)
1403 {
1404 	return -EOPNOTSUPP;
1405 }
1406 
1407 int sock_no_setsockopt(struct socket *sock, int level, int optname,
1408 		    char __user *optval, int optlen)
1409 {
1410 	return -EOPNOTSUPP;
1411 }
1412 
1413 int sock_no_getsockopt(struct socket *sock, int level, int optname,
1414 		    char __user *optval, int __user *optlen)
1415 {
1416 	return -EOPNOTSUPP;
1417 }
1418 
1419 int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1420 		    size_t len)
1421 {
1422 	return -EOPNOTSUPP;
1423 }
1424 
1425 int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1426 		    size_t len, int flags)
1427 {
1428 	return -EOPNOTSUPP;
1429 }
1430 
1431 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1432 {
1433 	/* Mirror missing mmap method error code */
1434 	return -ENODEV;
1435 }
1436 
1437 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1438 {
1439 	ssize_t res;
1440 	struct msghdr msg = {.msg_flags = flags};
1441 	struct kvec iov;
1442 	char *kaddr = kmap(page);
1443 	iov.iov_base = kaddr + offset;
1444 	iov.iov_len = size;
1445 	res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1446 	kunmap(page);
1447 	return res;
1448 }
1449 
1450 /*
1451  *	Default Socket Callbacks
1452  */
1453 
1454 static void sock_def_wakeup(struct sock *sk)
1455 {
1456 	read_lock(&sk->sk_callback_lock);
1457 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1458 		wake_up_interruptible_all(sk->sk_sleep);
1459 	read_unlock(&sk->sk_callback_lock);
1460 }
1461 
1462 static void sock_def_error_report(struct sock *sk)
1463 {
1464 	read_lock(&sk->sk_callback_lock);
1465 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1466 		wake_up_interruptible(sk->sk_sleep);
1467 	sk_wake_async(sk,0,POLL_ERR);
1468 	read_unlock(&sk->sk_callback_lock);
1469 }
1470 
1471 static void sock_def_readable(struct sock *sk, int len)
1472 {
1473 	read_lock(&sk->sk_callback_lock);
1474 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1475 		wake_up_interruptible(sk->sk_sleep);
1476 	sk_wake_async(sk,1,POLL_IN);
1477 	read_unlock(&sk->sk_callback_lock);
1478 }
1479 
1480 static void sock_def_write_space(struct sock *sk)
1481 {
1482 	read_lock(&sk->sk_callback_lock);
1483 
1484 	/* Do not wake up a writer until he can make "significant"
1485 	 * progress.  --DaveM
1486 	 */
1487 	if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1488 		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1489 			wake_up_interruptible(sk->sk_sleep);
1490 
1491 		/* Should agree with poll, otherwise some programs break */
1492 		if (sock_writeable(sk))
1493 			sk_wake_async(sk, 2, POLL_OUT);
1494 	}
1495 
1496 	read_unlock(&sk->sk_callback_lock);
1497 }
1498 
1499 static void sock_def_destruct(struct sock *sk)
1500 {
1501 	kfree(sk->sk_protinfo);
1502 }
1503 
1504 void sk_send_sigurg(struct sock *sk)
1505 {
1506 	if (sk->sk_socket && sk->sk_socket->file)
1507 		if (send_sigurg(&sk->sk_socket->file->f_owner))
1508 			sk_wake_async(sk, 3, POLL_PRI);
1509 }
1510 
1511 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1512 		    unsigned long expires)
1513 {
1514 	if (!mod_timer(timer, expires))
1515 		sock_hold(sk);
1516 }
1517 
1518 EXPORT_SYMBOL(sk_reset_timer);
1519 
1520 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1521 {
1522 	if (timer_pending(timer) && del_timer(timer))
1523 		__sock_put(sk);
1524 }
1525 
1526 EXPORT_SYMBOL(sk_stop_timer);
1527 
1528 void sock_init_data(struct socket *sock, struct sock *sk)
1529 {
1530 	skb_queue_head_init(&sk->sk_receive_queue);
1531 	skb_queue_head_init(&sk->sk_write_queue);
1532 	skb_queue_head_init(&sk->sk_error_queue);
1533 #ifdef CONFIG_NET_DMA
1534 	skb_queue_head_init(&sk->sk_async_wait_queue);
1535 #endif
1536 
1537 	sk->sk_send_head	=	NULL;
1538 
1539 	init_timer(&sk->sk_timer);
1540 
1541 	sk->sk_allocation	=	GFP_KERNEL;
1542 	sk->sk_rcvbuf		=	sysctl_rmem_default;
1543 	sk->sk_sndbuf		=	sysctl_wmem_default;
1544 	sk->sk_state		=	TCP_CLOSE;
1545 	sk->sk_socket		=	sock;
1546 
1547 	sock_set_flag(sk, SOCK_ZAPPED);
1548 
1549 	if (sock) {
1550 		sk->sk_type	=	sock->type;
1551 		sk->sk_sleep	=	&sock->wait;
1552 		sock->sk	=	sk;
1553 	} else
1554 		sk->sk_sleep	=	NULL;
1555 
1556 	rwlock_init(&sk->sk_dst_lock);
1557 	rwlock_init(&sk->sk_callback_lock);
1558 	lockdep_set_class_and_name(&sk->sk_callback_lock,
1559 			af_callback_keys + sk->sk_family,
1560 			af_family_clock_key_strings[sk->sk_family]);
1561 
1562 	sk->sk_state_change	=	sock_def_wakeup;
1563 	sk->sk_data_ready	=	sock_def_readable;
1564 	sk->sk_write_space	=	sock_def_write_space;
1565 	sk->sk_error_report	=	sock_def_error_report;
1566 	sk->sk_destruct		=	sock_def_destruct;
1567 
1568 	sk->sk_sndmsg_page	=	NULL;
1569 	sk->sk_sndmsg_off	=	0;
1570 
1571 	sk->sk_peercred.pid 	=	0;
1572 	sk->sk_peercred.uid	=	-1;
1573 	sk->sk_peercred.gid	=	-1;
1574 	sk->sk_write_pending	=	0;
1575 	sk->sk_rcvlowat		=	1;
1576 	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
1577 	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
1578 
1579 	sk->sk_stamp = ktime_set(-1L, -1L);
1580 
1581 	atomic_set(&sk->sk_refcnt, 1);
1582 }
1583 
1584 void fastcall lock_sock_nested(struct sock *sk, int subclass)
1585 {
1586 	might_sleep();
1587 	spin_lock_bh(&sk->sk_lock.slock);
1588 	if (sk->sk_lock.owner)
1589 		__lock_sock(sk);
1590 	sk->sk_lock.owner = (void *)1;
1591 	spin_unlock(&sk->sk_lock.slock);
1592 	/*
1593 	 * The sk_lock has mutex_lock() semantics here:
1594 	 */
1595 	mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
1596 	local_bh_enable();
1597 }
1598 
1599 EXPORT_SYMBOL(lock_sock_nested);
1600 
1601 void fastcall release_sock(struct sock *sk)
1602 {
1603 	/*
1604 	 * The sk_lock has mutex_unlock() semantics:
1605 	 */
1606 	mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
1607 
1608 	spin_lock_bh(&sk->sk_lock.slock);
1609 	if (sk->sk_backlog.tail)
1610 		__release_sock(sk);
1611 	sk->sk_lock.owner = NULL;
1612 	if (waitqueue_active(&sk->sk_lock.wq))
1613 		wake_up(&sk->sk_lock.wq);
1614 	spin_unlock_bh(&sk->sk_lock.slock);
1615 }
1616 EXPORT_SYMBOL(release_sock);
1617 
1618 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1619 {
1620 	struct timeval tv;
1621 	if (!sock_flag(sk, SOCK_TIMESTAMP))
1622 		sock_enable_timestamp(sk);
1623 	tv = ktime_to_timeval(sk->sk_stamp);
1624 	if (tv.tv_sec == -1)
1625 		return -ENOENT;
1626 	if (tv.tv_sec == 0) {
1627 		sk->sk_stamp = ktime_get_real();
1628 		tv = ktime_to_timeval(sk->sk_stamp);
1629 	}
1630 	return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
1631 }
1632 EXPORT_SYMBOL(sock_get_timestamp);
1633 
1634 int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
1635 {
1636 	struct timespec ts;
1637 	if (!sock_flag(sk, SOCK_TIMESTAMP))
1638 		sock_enable_timestamp(sk);
1639 	ts = ktime_to_timespec(sk->sk_stamp);
1640 	if (ts.tv_sec == -1)
1641 		return -ENOENT;
1642 	if (ts.tv_sec == 0) {
1643 		sk->sk_stamp = ktime_get_real();
1644 		ts = ktime_to_timespec(sk->sk_stamp);
1645 	}
1646 	return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
1647 }
1648 EXPORT_SYMBOL(sock_get_timestampns);
1649 
1650 void sock_enable_timestamp(struct sock *sk)
1651 {
1652 	if (!sock_flag(sk, SOCK_TIMESTAMP)) {
1653 		sock_set_flag(sk, SOCK_TIMESTAMP);
1654 		net_enable_timestamp();
1655 	}
1656 }
1657 EXPORT_SYMBOL(sock_enable_timestamp);
1658 
1659 /*
1660  *	Get a socket option on an socket.
1661  *
1662  *	FIX: POSIX 1003.1g is very ambiguous here. It states that
1663  *	asynchronous errors should be reported by getsockopt. We assume
1664  *	this means if you specify SO_ERROR (otherwise whats the point of it).
1665  */
1666 int sock_common_getsockopt(struct socket *sock, int level, int optname,
1667 			   char __user *optval, int __user *optlen)
1668 {
1669 	struct sock *sk = sock->sk;
1670 
1671 	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1672 }
1673 
1674 EXPORT_SYMBOL(sock_common_getsockopt);
1675 
1676 #ifdef CONFIG_COMPAT
1677 int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
1678 				  char __user *optval, int __user *optlen)
1679 {
1680 	struct sock *sk = sock->sk;
1681 
1682 	if (sk->sk_prot->compat_getsockopt != NULL)
1683 		return sk->sk_prot->compat_getsockopt(sk, level, optname,
1684 						      optval, optlen);
1685 	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1686 }
1687 EXPORT_SYMBOL(compat_sock_common_getsockopt);
1688 #endif
1689 
1690 int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
1691 			struct msghdr *msg, size_t size, int flags)
1692 {
1693 	struct sock *sk = sock->sk;
1694 	int addr_len = 0;
1695 	int err;
1696 
1697 	err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
1698 				   flags & ~MSG_DONTWAIT, &addr_len);
1699 	if (err >= 0)
1700 		msg->msg_namelen = addr_len;
1701 	return err;
1702 }
1703 
1704 EXPORT_SYMBOL(sock_common_recvmsg);
1705 
1706 /*
1707  *	Set socket options on an inet socket.
1708  */
1709 int sock_common_setsockopt(struct socket *sock, int level, int optname,
1710 			   char __user *optval, int optlen)
1711 {
1712 	struct sock *sk = sock->sk;
1713 
1714 	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1715 }
1716 
1717 EXPORT_SYMBOL(sock_common_setsockopt);
1718 
1719 #ifdef CONFIG_COMPAT
1720 int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
1721 				  char __user *optval, int optlen)
1722 {
1723 	struct sock *sk = sock->sk;
1724 
1725 	if (sk->sk_prot->compat_setsockopt != NULL)
1726 		return sk->sk_prot->compat_setsockopt(sk, level, optname,
1727 						      optval, optlen);
1728 	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1729 }
1730 EXPORT_SYMBOL(compat_sock_common_setsockopt);
1731 #endif
1732 
1733 void sk_common_release(struct sock *sk)
1734 {
1735 	if (sk->sk_prot->destroy)
1736 		sk->sk_prot->destroy(sk);
1737 
1738 	/*
1739 	 * Observation: when sock_common_release is called, processes have
1740 	 * no access to socket. But net still has.
1741 	 * Step one, detach it from networking:
1742 	 *
1743 	 * A. Remove from hash tables.
1744 	 */
1745 
1746 	sk->sk_prot->unhash(sk);
1747 
1748 	/*
1749 	 * In this point socket cannot receive new packets, but it is possible
1750 	 * that some packets are in flight because some CPU runs receiver and
1751 	 * did hash table lookup before we unhashed socket. They will achieve
1752 	 * receive queue and will be purged by socket destructor.
1753 	 *
1754 	 * Also we still have packets pending on receive queue and probably,
1755 	 * our own packets waiting in device queues. sock_destroy will drain
1756 	 * receive queue, but transmitted packets will delay socket destruction
1757 	 * until the last reference will be released.
1758 	 */
1759 
1760 	sock_orphan(sk);
1761 
1762 	xfrm_sk_free_policy(sk);
1763 
1764 	sk_refcnt_debug_release(sk);
1765 	sock_put(sk);
1766 }
1767 
1768 EXPORT_SYMBOL(sk_common_release);
1769 
1770 static DEFINE_RWLOCK(proto_list_lock);
1771 static LIST_HEAD(proto_list);
1772 
1773 int proto_register(struct proto *prot, int alloc_slab)
1774 {
1775 	char *request_sock_slab_name = NULL;
1776 	char *timewait_sock_slab_name;
1777 	int rc = -ENOBUFS;
1778 
1779 	if (alloc_slab) {
1780 		prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
1781 					       SLAB_HWCACHE_ALIGN, NULL);
1782 
1783 		if (prot->slab == NULL) {
1784 			printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
1785 			       prot->name);
1786 			goto out;
1787 		}
1788 
1789 		if (prot->rsk_prot != NULL) {
1790 			static const char mask[] = "request_sock_%s";
1791 
1792 			request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1793 			if (request_sock_slab_name == NULL)
1794 				goto out_free_sock_slab;
1795 
1796 			sprintf(request_sock_slab_name, mask, prot->name);
1797 			prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
1798 								 prot->rsk_prot->obj_size, 0,
1799 								 SLAB_HWCACHE_ALIGN, NULL);
1800 
1801 			if (prot->rsk_prot->slab == NULL) {
1802 				printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
1803 				       prot->name);
1804 				goto out_free_request_sock_slab_name;
1805 			}
1806 		}
1807 
1808 		if (prot->twsk_prot != NULL) {
1809 			static const char mask[] = "tw_sock_%s";
1810 
1811 			timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1812 
1813 			if (timewait_sock_slab_name == NULL)
1814 				goto out_free_request_sock_slab;
1815 
1816 			sprintf(timewait_sock_slab_name, mask, prot->name);
1817 			prot->twsk_prot->twsk_slab =
1818 				kmem_cache_create(timewait_sock_slab_name,
1819 						  prot->twsk_prot->twsk_obj_size,
1820 						  0, SLAB_HWCACHE_ALIGN,
1821 						  NULL);
1822 			if (prot->twsk_prot->twsk_slab == NULL)
1823 				goto out_free_timewait_sock_slab_name;
1824 		}
1825 	}
1826 
1827 	write_lock(&proto_list_lock);
1828 	list_add(&prot->node, &proto_list);
1829 	write_unlock(&proto_list_lock);
1830 	rc = 0;
1831 out:
1832 	return rc;
1833 out_free_timewait_sock_slab_name:
1834 	kfree(timewait_sock_slab_name);
1835 out_free_request_sock_slab:
1836 	if (prot->rsk_prot && prot->rsk_prot->slab) {
1837 		kmem_cache_destroy(prot->rsk_prot->slab);
1838 		prot->rsk_prot->slab = NULL;
1839 	}
1840 out_free_request_sock_slab_name:
1841 	kfree(request_sock_slab_name);
1842 out_free_sock_slab:
1843 	kmem_cache_destroy(prot->slab);
1844 	prot->slab = NULL;
1845 	goto out;
1846 }
1847 
1848 EXPORT_SYMBOL(proto_register);
1849 
1850 void proto_unregister(struct proto *prot)
1851 {
1852 	write_lock(&proto_list_lock);
1853 	list_del(&prot->node);
1854 	write_unlock(&proto_list_lock);
1855 
1856 	if (prot->slab != NULL) {
1857 		kmem_cache_destroy(prot->slab);
1858 		prot->slab = NULL;
1859 	}
1860 
1861 	if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
1862 		const char *name = kmem_cache_name(prot->rsk_prot->slab);
1863 
1864 		kmem_cache_destroy(prot->rsk_prot->slab);
1865 		kfree(name);
1866 		prot->rsk_prot->slab = NULL;
1867 	}
1868 
1869 	if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
1870 		const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab);
1871 
1872 		kmem_cache_destroy(prot->twsk_prot->twsk_slab);
1873 		kfree(name);
1874 		prot->twsk_prot->twsk_slab = NULL;
1875 	}
1876 }
1877 
1878 EXPORT_SYMBOL(proto_unregister);
1879 
1880 #ifdef CONFIG_PROC_FS
1881 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
1882 {
1883 	read_lock(&proto_list_lock);
1884 	return seq_list_start_head(&proto_list, *pos);
1885 }
1886 
1887 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1888 {
1889 	return seq_list_next(v, &proto_list, pos);
1890 }
1891 
1892 static void proto_seq_stop(struct seq_file *seq, void *v)
1893 {
1894 	read_unlock(&proto_list_lock);
1895 }
1896 
1897 static char proto_method_implemented(const void *method)
1898 {
1899 	return method == NULL ? 'n' : 'y';
1900 }
1901 
1902 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
1903 {
1904 	seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
1905 			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
1906 		   proto->name,
1907 		   proto->obj_size,
1908 		   proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
1909 		   proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
1910 		   proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
1911 		   proto->max_header,
1912 		   proto->slab == NULL ? "no" : "yes",
1913 		   module_name(proto->owner),
1914 		   proto_method_implemented(proto->close),
1915 		   proto_method_implemented(proto->connect),
1916 		   proto_method_implemented(proto->disconnect),
1917 		   proto_method_implemented(proto->accept),
1918 		   proto_method_implemented(proto->ioctl),
1919 		   proto_method_implemented(proto->init),
1920 		   proto_method_implemented(proto->destroy),
1921 		   proto_method_implemented(proto->shutdown),
1922 		   proto_method_implemented(proto->setsockopt),
1923 		   proto_method_implemented(proto->getsockopt),
1924 		   proto_method_implemented(proto->sendmsg),
1925 		   proto_method_implemented(proto->recvmsg),
1926 		   proto_method_implemented(proto->sendpage),
1927 		   proto_method_implemented(proto->bind),
1928 		   proto_method_implemented(proto->backlog_rcv),
1929 		   proto_method_implemented(proto->hash),
1930 		   proto_method_implemented(proto->unhash),
1931 		   proto_method_implemented(proto->get_port),
1932 		   proto_method_implemented(proto->enter_memory_pressure));
1933 }
1934 
1935 static int proto_seq_show(struct seq_file *seq, void *v)
1936 {
1937 	if (v == &proto_list)
1938 		seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
1939 			   "protocol",
1940 			   "size",
1941 			   "sockets",
1942 			   "memory",
1943 			   "press",
1944 			   "maxhdr",
1945 			   "slab",
1946 			   "module",
1947 			   "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
1948 	else
1949 		proto_seq_printf(seq, list_entry(v, struct proto, node));
1950 	return 0;
1951 }
1952 
1953 static const struct seq_operations proto_seq_ops = {
1954 	.start  = proto_seq_start,
1955 	.next   = proto_seq_next,
1956 	.stop   = proto_seq_stop,
1957 	.show   = proto_seq_show,
1958 };
1959 
1960 static int proto_seq_open(struct inode *inode, struct file *file)
1961 {
1962 	return seq_open(file, &proto_seq_ops);
1963 }
1964 
1965 static const struct file_operations proto_seq_fops = {
1966 	.owner		= THIS_MODULE,
1967 	.open		= proto_seq_open,
1968 	.read		= seq_read,
1969 	.llseek		= seq_lseek,
1970 	.release	= seq_release,
1971 };
1972 
1973 static int __init proto_init(void)
1974 {
1975 	/* register /proc/net/protocols */
1976 	return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
1977 }
1978 
1979 subsys_initcall(proto_init);
1980 
1981 #endif /* PROC_FS */
1982 
1983 EXPORT_SYMBOL(sk_alloc);
1984 EXPORT_SYMBOL(sk_free);
1985 EXPORT_SYMBOL(sk_send_sigurg);
1986 EXPORT_SYMBOL(sock_alloc_send_skb);
1987 EXPORT_SYMBOL(sock_init_data);
1988 EXPORT_SYMBOL(sock_kfree_s);
1989 EXPORT_SYMBOL(sock_kmalloc);
1990 EXPORT_SYMBOL(sock_no_accept);
1991 EXPORT_SYMBOL(sock_no_bind);
1992 EXPORT_SYMBOL(sock_no_connect);
1993 EXPORT_SYMBOL(sock_no_getname);
1994 EXPORT_SYMBOL(sock_no_getsockopt);
1995 EXPORT_SYMBOL(sock_no_ioctl);
1996 EXPORT_SYMBOL(sock_no_listen);
1997 EXPORT_SYMBOL(sock_no_mmap);
1998 EXPORT_SYMBOL(sock_no_poll);
1999 EXPORT_SYMBOL(sock_no_recvmsg);
2000 EXPORT_SYMBOL(sock_no_sendmsg);
2001 EXPORT_SYMBOL(sock_no_sendpage);
2002 EXPORT_SYMBOL(sock_no_setsockopt);
2003 EXPORT_SYMBOL(sock_no_shutdown);
2004 EXPORT_SYMBOL(sock_no_socketpair);
2005 EXPORT_SYMBOL(sock_rfree);
2006 EXPORT_SYMBOL(sock_setsockopt);
2007 EXPORT_SYMBOL(sock_wfree);
2008 EXPORT_SYMBOL(sock_wmalloc);
2009 EXPORT_SYMBOL(sock_i_uid);
2010 EXPORT_SYMBOL(sock_i_ino);
2011 EXPORT_SYMBOL(sysctl_optmem_max);
2012 #ifdef CONFIG_SYSCTL
2013 EXPORT_SYMBOL(sysctl_rmem_max);
2014 EXPORT_SYMBOL(sysctl_wmem_max);
2015 #endif
2016