1 /* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * Generic socket support routines. Memory allocators, socket lock/release 7 * handler for protocols to use and generic option handler. 8 * 9 * 10 * Authors: Ross Biro 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Florian La Roche, <flla@stud.uni-sb.de> 13 * Alan Cox, <A.Cox@swansea.ac.uk> 14 * 15 * Fixes: 16 * Alan Cox : Numerous verify_area() problems 17 * Alan Cox : Connecting on a connecting socket 18 * now returns an error for tcp. 19 * Alan Cox : sock->protocol is set correctly. 20 * and is not sometimes left as 0. 21 * Alan Cox : connect handles icmp errors on a 22 * connect properly. Unfortunately there 23 * is a restart syscall nasty there. I 24 * can't match BSD without hacking the C 25 * library. Ideas urgently sought! 26 * Alan Cox : Disallow bind() to addresses that are 27 * not ours - especially broadcast ones!! 28 * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost) 29 * Alan Cox : sock_wfree/sock_rfree don't destroy sockets, 30 * instead they leave that for the DESTROY timer. 31 * Alan Cox : Clean up error flag in accept 32 * Alan Cox : TCP ack handling is buggy, the DESTROY timer 33 * was buggy. Put a remove_sock() in the handler 34 * for memory when we hit 0. Also altered the timer 35 * code. The ACK stuff can wait and needs major 36 * TCP layer surgery. 37 * Alan Cox : Fixed TCP ack bug, removed remove sock 38 * and fixed timer/inet_bh race. 39 * Alan Cox : Added zapped flag for TCP 40 * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code 41 * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb 42 * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources 43 * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing. 44 * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so... 45 * Rick Sladkey : Relaxed UDP rules for matching packets. 46 * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support 47 * Pauline Middelink : identd support 48 * Alan Cox : Fixed connect() taking signals I think. 49 * Alan Cox : SO_LINGER supported 50 * Alan Cox : Error reporting fixes 51 * Anonymous : inet_create tidied up (sk->reuse setting) 52 * Alan Cox : inet sockets don't set sk->type! 53 * Alan Cox : Split socket option code 54 * Alan Cox : Callbacks 55 * Alan Cox : Nagle flag for Charles & Johannes stuff 56 * Alex : Removed restriction on inet fioctl 57 * Alan Cox : Splitting INET from NET core 58 * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt() 59 * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code 60 * Alan Cox : Split IP from generic code 61 * Alan Cox : New kfree_skbmem() 62 * Alan Cox : Make SO_DEBUG superuser only. 63 * Alan Cox : Allow anyone to clear SO_DEBUG 64 * (compatibility fix) 65 * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput. 66 * Alan Cox : Allocator for a socket is settable. 67 * Alan Cox : SO_ERROR includes soft errors. 68 * Alan Cox : Allow NULL arguments on some SO_ opts 69 * Alan Cox : Generic socket allocation to make hooks 70 * easier (suggested by Craig Metz). 71 * Michael Pall : SO_ERROR returns positive errno again 72 * Steve Whitehouse: Added default destructor to free 73 * protocol private data. 74 * Steve Whitehouse: Added various other default routines 75 * common to several socket families. 76 * Chris Evans : Call suser() check last on F_SETOWN 77 * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER. 78 * Andi Kleen : Add sock_kmalloc()/sock_kfree_s() 79 * Andi Kleen : Fix write_space callback 80 * Chris Evans : Security fixes - signedness again 81 * Arnaldo C. Melo : cleanups, use skb_queue_purge 82 * 83 * To Fix: 84 * 85 * 86 * This program is free software; you can redistribute it and/or 87 * modify it under the terms of the GNU General Public License 88 * as published by the Free Software Foundation; either version 89 * 2 of the License, or (at your option) any later version. 90 */ 91 92 #include <linux/capability.h> 93 #include <linux/errno.h> 94 #include <linux/types.h> 95 #include <linux/socket.h> 96 #include <linux/in.h> 97 #include <linux/kernel.h> 98 #include <linux/module.h> 99 #include <linux/proc_fs.h> 100 #include <linux/seq_file.h> 101 #include <linux/sched.h> 102 #include <linux/timer.h> 103 #include <linux/string.h> 104 #include <linux/sockios.h> 105 #include <linux/net.h> 106 #include <linux/mm.h> 107 #include <linux/slab.h> 108 #include <linux/interrupt.h> 109 #include <linux/poll.h> 110 #include <linux/tcp.h> 111 #include <linux/init.h> 112 #include <linux/highmem.h> 113 #include <linux/user_namespace.h> 114 #include <linux/jump_label.h> 115 116 #include <asm/uaccess.h> 117 #include <asm/system.h> 118 119 #include <linux/netdevice.h> 120 #include <net/protocol.h> 121 #include <linux/skbuff.h> 122 #include <net/net_namespace.h> 123 #include <net/request_sock.h> 124 #include <net/sock.h> 125 #include <linux/net_tstamp.h> 126 #include <net/xfrm.h> 127 #include <linux/ipsec.h> 128 #include <net/cls_cgroup.h> 129 #include <net/netprio_cgroup.h> 130 131 #include <linux/filter.h> 132 133 #include <trace/events/sock.h> 134 135 #ifdef CONFIG_INET 136 #include <net/tcp.h> 137 #endif 138 139 static DEFINE_MUTEX(proto_list_mutex); 140 static LIST_HEAD(proto_list); 141 142 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM 143 int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss) 144 { 145 struct proto *proto; 146 int ret = 0; 147 148 mutex_lock(&proto_list_mutex); 149 list_for_each_entry(proto, &proto_list, node) { 150 if (proto->init_cgroup) { 151 ret = proto->init_cgroup(cgrp, ss); 152 if (ret) 153 goto out; 154 } 155 } 156 157 mutex_unlock(&proto_list_mutex); 158 return ret; 159 out: 160 list_for_each_entry_continue_reverse(proto, &proto_list, node) 161 if (proto->destroy_cgroup) 162 proto->destroy_cgroup(cgrp, ss); 163 mutex_unlock(&proto_list_mutex); 164 return ret; 165 } 166 167 void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss) 168 { 169 struct proto *proto; 170 171 mutex_lock(&proto_list_mutex); 172 list_for_each_entry_reverse(proto, &proto_list, node) 173 if (proto->destroy_cgroup) 174 proto->destroy_cgroup(cgrp, ss); 175 mutex_unlock(&proto_list_mutex); 176 } 177 #endif 178 179 /* 180 * Each address family might have different locking rules, so we have 181 * one slock key per address family: 182 */ 183 static struct lock_class_key af_family_keys[AF_MAX]; 184 static struct lock_class_key af_family_slock_keys[AF_MAX]; 185 186 struct jump_label_key memcg_socket_limit_enabled; 187 EXPORT_SYMBOL(memcg_socket_limit_enabled); 188 189 /* 190 * Make lock validator output more readable. (we pre-construct these 191 * strings build-time, so that runtime initialization of socket 192 * locks is fast): 193 */ 194 static const char *const af_family_key_strings[AF_MAX+1] = { 195 "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" , 196 "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK", 197 "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" , 198 "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" , 199 "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" , 200 "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" , 201 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" , 202 "sk_lock-AF_RDS" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" , 203 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" , 204 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" , 205 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , 206 "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , 207 "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , 208 "sk_lock-AF_NFC" , "sk_lock-AF_MAX" 209 }; 210 static const char *const af_family_slock_key_strings[AF_MAX+1] = { 211 "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , 212 "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK", 213 "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" , 214 "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" , 215 "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" , 216 "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" , 217 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" , 218 "slock-AF_RDS" , "slock-AF_SNA" , "slock-AF_IRDA" , 219 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" , 220 "slock-27" , "slock-28" , "slock-AF_CAN" , 221 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , 222 "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , 223 "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , 224 "slock-AF_NFC" , "slock-AF_MAX" 225 }; 226 static const char *const af_family_clock_key_strings[AF_MAX+1] = { 227 "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , 228 "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK", 229 "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" , 230 "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" , 231 "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" , 232 "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" , 233 "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" , 234 "clock-AF_RDS" , "clock-AF_SNA" , "clock-AF_IRDA" , 235 "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" , 236 "clock-27" , "clock-28" , "clock-AF_CAN" , 237 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , 238 "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , 239 "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , 240 "clock-AF_NFC" , "clock-AF_MAX" 241 }; 242 243 /* 244 * sk_callback_lock locking rules are per-address-family, 245 * so split the lock classes by using a per-AF key: 246 */ 247 static struct lock_class_key af_callback_keys[AF_MAX]; 248 249 /* Take into consideration the size of the struct sk_buff overhead in the 250 * determination of these values, since that is non-constant across 251 * platforms. This makes socket queueing behavior and performance 252 * not depend upon such differences. 253 */ 254 #define _SK_MEM_PACKETS 256 255 #define _SK_MEM_OVERHEAD SKB_TRUESIZE(256) 256 #define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) 257 #define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) 258 259 /* Run time adjustable parameters. */ 260 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX; 261 __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX; 262 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; 263 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; 264 265 /* Maximal space eaten by iovec or ancillary data plus some space */ 266 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); 267 EXPORT_SYMBOL(sysctl_optmem_max); 268 269 #if defined(CONFIG_CGROUPS) 270 #if !defined(CONFIG_NET_CLS_CGROUP) 271 int net_cls_subsys_id = -1; 272 EXPORT_SYMBOL_GPL(net_cls_subsys_id); 273 #endif 274 #if !defined(CONFIG_NETPRIO_CGROUP) 275 int net_prio_subsys_id = -1; 276 EXPORT_SYMBOL_GPL(net_prio_subsys_id); 277 #endif 278 #endif 279 280 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) 281 { 282 struct timeval tv; 283 284 if (optlen < sizeof(tv)) 285 return -EINVAL; 286 if (copy_from_user(&tv, optval, sizeof(tv))) 287 return -EFAULT; 288 if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC) 289 return -EDOM; 290 291 if (tv.tv_sec < 0) { 292 static int warned __read_mostly; 293 294 *timeo_p = 0; 295 if (warned < 10 && net_ratelimit()) { 296 warned++; 297 printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) " 298 "tries to set negative timeout\n", 299 current->comm, task_pid_nr(current)); 300 } 301 return 0; 302 } 303 *timeo_p = MAX_SCHEDULE_TIMEOUT; 304 if (tv.tv_sec == 0 && tv.tv_usec == 0) 305 return 0; 306 if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1)) 307 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ); 308 return 0; 309 } 310 311 static void sock_warn_obsolete_bsdism(const char *name) 312 { 313 static int warned; 314 static char warncomm[TASK_COMM_LEN]; 315 if (strcmp(warncomm, current->comm) && warned < 5) { 316 strcpy(warncomm, current->comm); 317 printk(KERN_WARNING "process `%s' is using obsolete " 318 "%s SO_BSDCOMPAT\n", warncomm, name); 319 warned++; 320 } 321 } 322 323 #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) 324 325 static void sock_disable_timestamp(struct sock *sk, unsigned long flags) 326 { 327 if (sk->sk_flags & flags) { 328 sk->sk_flags &= ~flags; 329 if (!(sk->sk_flags & SK_FLAGS_TIMESTAMP)) 330 net_disable_timestamp(); 331 } 332 } 333 334 335 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 336 { 337 int err; 338 int skb_len; 339 unsigned long flags; 340 struct sk_buff_head *list = &sk->sk_receive_queue; 341 342 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) { 343 atomic_inc(&sk->sk_drops); 344 trace_sock_rcvqueue_full(sk, skb); 345 return -ENOMEM; 346 } 347 348 err = sk_filter(sk, skb); 349 if (err) 350 return err; 351 352 if (!sk_rmem_schedule(sk, skb->truesize)) { 353 atomic_inc(&sk->sk_drops); 354 return -ENOBUFS; 355 } 356 357 skb->dev = NULL; 358 skb_set_owner_r(skb, sk); 359 360 /* Cache the SKB length before we tack it onto the receive 361 * queue. Once it is added it no longer belongs to us and 362 * may be freed by other threads of control pulling packets 363 * from the queue. 364 */ 365 skb_len = skb->len; 366 367 /* we escape from rcu protected region, make sure we dont leak 368 * a norefcounted dst 369 */ 370 skb_dst_force(skb); 371 372 spin_lock_irqsave(&list->lock, flags); 373 skb->dropcount = atomic_read(&sk->sk_drops); 374 __skb_queue_tail(list, skb); 375 spin_unlock_irqrestore(&list->lock, flags); 376 377 if (!sock_flag(sk, SOCK_DEAD)) 378 sk->sk_data_ready(sk, skb_len); 379 return 0; 380 } 381 EXPORT_SYMBOL(sock_queue_rcv_skb); 382 383 int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) 384 { 385 int rc = NET_RX_SUCCESS; 386 387 if (sk_filter(sk, skb)) 388 goto discard_and_relse; 389 390 skb->dev = NULL; 391 392 if (sk_rcvqueues_full(sk, skb)) { 393 atomic_inc(&sk->sk_drops); 394 goto discard_and_relse; 395 } 396 if (nested) 397 bh_lock_sock_nested(sk); 398 else 399 bh_lock_sock(sk); 400 if (!sock_owned_by_user(sk)) { 401 /* 402 * trylock + unlock semantics: 403 */ 404 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_); 405 406 rc = sk_backlog_rcv(sk, skb); 407 408 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); 409 } else if (sk_add_backlog(sk, skb)) { 410 bh_unlock_sock(sk); 411 atomic_inc(&sk->sk_drops); 412 goto discard_and_relse; 413 } 414 415 bh_unlock_sock(sk); 416 out: 417 sock_put(sk); 418 return rc; 419 discard_and_relse: 420 kfree_skb(skb); 421 goto out; 422 } 423 EXPORT_SYMBOL(sk_receive_skb); 424 425 void sk_reset_txq(struct sock *sk) 426 { 427 sk_tx_queue_clear(sk); 428 } 429 EXPORT_SYMBOL(sk_reset_txq); 430 431 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) 432 { 433 struct dst_entry *dst = __sk_dst_get(sk); 434 435 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { 436 sk_tx_queue_clear(sk); 437 RCU_INIT_POINTER(sk->sk_dst_cache, NULL); 438 dst_release(dst); 439 return NULL; 440 } 441 442 return dst; 443 } 444 EXPORT_SYMBOL(__sk_dst_check); 445 446 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie) 447 { 448 struct dst_entry *dst = sk_dst_get(sk); 449 450 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { 451 sk_dst_reset(sk); 452 dst_release(dst); 453 return NULL; 454 } 455 456 return dst; 457 } 458 EXPORT_SYMBOL(sk_dst_check); 459 460 static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen) 461 { 462 int ret = -ENOPROTOOPT; 463 #ifdef CONFIG_NETDEVICES 464 struct net *net = sock_net(sk); 465 char devname[IFNAMSIZ]; 466 int index; 467 468 /* Sorry... */ 469 ret = -EPERM; 470 if (!capable(CAP_NET_RAW)) 471 goto out; 472 473 ret = -EINVAL; 474 if (optlen < 0) 475 goto out; 476 477 /* Bind this socket to a particular device like "eth0", 478 * as specified in the passed interface name. If the 479 * name is "" or the option length is zero the socket 480 * is not bound. 481 */ 482 if (optlen > IFNAMSIZ - 1) 483 optlen = IFNAMSIZ - 1; 484 memset(devname, 0, sizeof(devname)); 485 486 ret = -EFAULT; 487 if (copy_from_user(devname, optval, optlen)) 488 goto out; 489 490 index = 0; 491 if (devname[0] != '\0') { 492 struct net_device *dev; 493 494 rcu_read_lock(); 495 dev = dev_get_by_name_rcu(net, devname); 496 if (dev) 497 index = dev->ifindex; 498 rcu_read_unlock(); 499 ret = -ENODEV; 500 if (!dev) 501 goto out; 502 } 503 504 lock_sock(sk); 505 sk->sk_bound_dev_if = index; 506 sk_dst_reset(sk); 507 release_sock(sk); 508 509 ret = 0; 510 511 out: 512 #endif 513 514 return ret; 515 } 516 517 static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool) 518 { 519 if (valbool) 520 sock_set_flag(sk, bit); 521 else 522 sock_reset_flag(sk, bit); 523 } 524 525 /* 526 * This is meant for all protocols to use and covers goings on 527 * at the socket level. Everything here is generic. 528 */ 529 530 int sock_setsockopt(struct socket *sock, int level, int optname, 531 char __user *optval, unsigned int optlen) 532 { 533 struct sock *sk = sock->sk; 534 int val; 535 int valbool; 536 struct linger ling; 537 int ret = 0; 538 539 /* 540 * Options without arguments 541 */ 542 543 if (optname == SO_BINDTODEVICE) 544 return sock_bindtodevice(sk, optval, optlen); 545 546 if (optlen < sizeof(int)) 547 return -EINVAL; 548 549 if (get_user(val, (int __user *)optval)) 550 return -EFAULT; 551 552 valbool = val ? 1 : 0; 553 554 lock_sock(sk); 555 556 switch (optname) { 557 case SO_DEBUG: 558 if (val && !capable(CAP_NET_ADMIN)) 559 ret = -EACCES; 560 else 561 sock_valbool_flag(sk, SOCK_DBG, valbool); 562 break; 563 case SO_REUSEADDR: 564 sk->sk_reuse = valbool; 565 break; 566 case SO_TYPE: 567 case SO_PROTOCOL: 568 case SO_DOMAIN: 569 case SO_ERROR: 570 ret = -ENOPROTOOPT; 571 break; 572 case SO_DONTROUTE: 573 sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool); 574 break; 575 case SO_BROADCAST: 576 sock_valbool_flag(sk, SOCK_BROADCAST, valbool); 577 break; 578 case SO_SNDBUF: 579 /* Don't error on this BSD doesn't and if you think 580 about it this is right. Otherwise apps have to 581 play 'guess the biggest size' games. RCVBUF/SNDBUF 582 are treated in BSD as hints */ 583 584 if (val > sysctl_wmem_max) 585 val = sysctl_wmem_max; 586 set_sndbuf: 587 sk->sk_userlocks |= SOCK_SNDBUF_LOCK; 588 if ((val * 2) < SOCK_MIN_SNDBUF) 589 sk->sk_sndbuf = SOCK_MIN_SNDBUF; 590 else 591 sk->sk_sndbuf = val * 2; 592 593 /* 594 * Wake up sending tasks if we 595 * upped the value. 596 */ 597 sk->sk_write_space(sk); 598 break; 599 600 case SO_SNDBUFFORCE: 601 if (!capable(CAP_NET_ADMIN)) { 602 ret = -EPERM; 603 break; 604 } 605 goto set_sndbuf; 606 607 case SO_RCVBUF: 608 /* Don't error on this BSD doesn't and if you think 609 about it this is right. Otherwise apps have to 610 play 'guess the biggest size' games. RCVBUF/SNDBUF 611 are treated in BSD as hints */ 612 613 if (val > sysctl_rmem_max) 614 val = sysctl_rmem_max; 615 set_rcvbuf: 616 sk->sk_userlocks |= SOCK_RCVBUF_LOCK; 617 /* 618 * We double it on the way in to account for 619 * "struct sk_buff" etc. overhead. Applications 620 * assume that the SO_RCVBUF setting they make will 621 * allow that much actual data to be received on that 622 * socket. 623 * 624 * Applications are unaware that "struct sk_buff" and 625 * other overheads allocate from the receive buffer 626 * during socket buffer allocation. 627 * 628 * And after considering the possible alternatives, 629 * returning the value we actually used in getsockopt 630 * is the most desirable behavior. 631 */ 632 if ((val * 2) < SOCK_MIN_RCVBUF) 633 sk->sk_rcvbuf = SOCK_MIN_RCVBUF; 634 else 635 sk->sk_rcvbuf = val * 2; 636 break; 637 638 case SO_RCVBUFFORCE: 639 if (!capable(CAP_NET_ADMIN)) { 640 ret = -EPERM; 641 break; 642 } 643 goto set_rcvbuf; 644 645 case SO_KEEPALIVE: 646 #ifdef CONFIG_INET 647 if (sk->sk_protocol == IPPROTO_TCP) 648 tcp_set_keepalive(sk, valbool); 649 #endif 650 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool); 651 break; 652 653 case SO_OOBINLINE: 654 sock_valbool_flag(sk, SOCK_URGINLINE, valbool); 655 break; 656 657 case SO_NO_CHECK: 658 sk->sk_no_check = valbool; 659 break; 660 661 case SO_PRIORITY: 662 if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN)) 663 sk->sk_priority = val; 664 else 665 ret = -EPERM; 666 break; 667 668 case SO_LINGER: 669 if (optlen < sizeof(ling)) { 670 ret = -EINVAL; /* 1003.1g */ 671 break; 672 } 673 if (copy_from_user(&ling, optval, sizeof(ling))) { 674 ret = -EFAULT; 675 break; 676 } 677 if (!ling.l_onoff) 678 sock_reset_flag(sk, SOCK_LINGER); 679 else { 680 #if (BITS_PER_LONG == 32) 681 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ) 682 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT; 683 else 684 #endif 685 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ; 686 sock_set_flag(sk, SOCK_LINGER); 687 } 688 break; 689 690 case SO_BSDCOMPAT: 691 sock_warn_obsolete_bsdism("setsockopt"); 692 break; 693 694 case SO_PASSCRED: 695 if (valbool) 696 set_bit(SOCK_PASSCRED, &sock->flags); 697 else 698 clear_bit(SOCK_PASSCRED, &sock->flags); 699 break; 700 701 case SO_TIMESTAMP: 702 case SO_TIMESTAMPNS: 703 if (valbool) { 704 if (optname == SO_TIMESTAMP) 705 sock_reset_flag(sk, SOCK_RCVTSTAMPNS); 706 else 707 sock_set_flag(sk, SOCK_RCVTSTAMPNS); 708 sock_set_flag(sk, SOCK_RCVTSTAMP); 709 sock_enable_timestamp(sk, SOCK_TIMESTAMP); 710 } else { 711 sock_reset_flag(sk, SOCK_RCVTSTAMP); 712 sock_reset_flag(sk, SOCK_RCVTSTAMPNS); 713 } 714 break; 715 716 case SO_TIMESTAMPING: 717 if (val & ~SOF_TIMESTAMPING_MASK) { 718 ret = -EINVAL; 719 break; 720 } 721 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE, 722 val & SOF_TIMESTAMPING_TX_HARDWARE); 723 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE, 724 val & SOF_TIMESTAMPING_TX_SOFTWARE); 725 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE, 726 val & SOF_TIMESTAMPING_RX_HARDWARE); 727 if (val & SOF_TIMESTAMPING_RX_SOFTWARE) 728 sock_enable_timestamp(sk, 729 SOCK_TIMESTAMPING_RX_SOFTWARE); 730 else 731 sock_disable_timestamp(sk, 732 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)); 733 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE, 734 val & SOF_TIMESTAMPING_SOFTWARE); 735 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE, 736 val & SOF_TIMESTAMPING_SYS_HARDWARE); 737 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE, 738 val & SOF_TIMESTAMPING_RAW_HARDWARE); 739 break; 740 741 case SO_RCVLOWAT: 742 if (val < 0) 743 val = INT_MAX; 744 sk->sk_rcvlowat = val ? : 1; 745 break; 746 747 case SO_RCVTIMEO: 748 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen); 749 break; 750 751 case SO_SNDTIMEO: 752 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen); 753 break; 754 755 case SO_ATTACH_FILTER: 756 ret = -EINVAL; 757 if (optlen == sizeof(struct sock_fprog)) { 758 struct sock_fprog fprog; 759 760 ret = -EFAULT; 761 if (copy_from_user(&fprog, optval, sizeof(fprog))) 762 break; 763 764 ret = sk_attach_filter(&fprog, sk); 765 } 766 break; 767 768 case SO_DETACH_FILTER: 769 ret = sk_detach_filter(sk); 770 break; 771 772 case SO_PASSSEC: 773 if (valbool) 774 set_bit(SOCK_PASSSEC, &sock->flags); 775 else 776 clear_bit(SOCK_PASSSEC, &sock->flags); 777 break; 778 case SO_MARK: 779 if (!capable(CAP_NET_ADMIN)) 780 ret = -EPERM; 781 else 782 sk->sk_mark = val; 783 break; 784 785 /* We implement the SO_SNDLOWAT etc to 786 not be settable (1003.1g 5.3) */ 787 case SO_RXQ_OVFL: 788 sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool); 789 break; 790 791 case SO_WIFI_STATUS: 792 sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool); 793 break; 794 795 default: 796 ret = -ENOPROTOOPT; 797 break; 798 } 799 release_sock(sk); 800 return ret; 801 } 802 EXPORT_SYMBOL(sock_setsockopt); 803 804 805 void cred_to_ucred(struct pid *pid, const struct cred *cred, 806 struct ucred *ucred) 807 { 808 ucred->pid = pid_vnr(pid); 809 ucred->uid = ucred->gid = -1; 810 if (cred) { 811 struct user_namespace *current_ns = current_user_ns(); 812 813 ucred->uid = user_ns_map_uid(current_ns, cred, cred->euid); 814 ucred->gid = user_ns_map_gid(current_ns, cred, cred->egid); 815 } 816 } 817 EXPORT_SYMBOL_GPL(cred_to_ucred); 818 819 int sock_getsockopt(struct socket *sock, int level, int optname, 820 char __user *optval, int __user *optlen) 821 { 822 struct sock *sk = sock->sk; 823 824 union { 825 int val; 826 struct linger ling; 827 struct timeval tm; 828 } v; 829 830 int lv = sizeof(int); 831 int len; 832 833 if (get_user(len, optlen)) 834 return -EFAULT; 835 if (len < 0) 836 return -EINVAL; 837 838 memset(&v, 0, sizeof(v)); 839 840 switch (optname) { 841 case SO_DEBUG: 842 v.val = sock_flag(sk, SOCK_DBG); 843 break; 844 845 case SO_DONTROUTE: 846 v.val = sock_flag(sk, SOCK_LOCALROUTE); 847 break; 848 849 case SO_BROADCAST: 850 v.val = !!sock_flag(sk, SOCK_BROADCAST); 851 break; 852 853 case SO_SNDBUF: 854 v.val = sk->sk_sndbuf; 855 break; 856 857 case SO_RCVBUF: 858 v.val = sk->sk_rcvbuf; 859 break; 860 861 case SO_REUSEADDR: 862 v.val = sk->sk_reuse; 863 break; 864 865 case SO_KEEPALIVE: 866 v.val = !!sock_flag(sk, SOCK_KEEPOPEN); 867 break; 868 869 case SO_TYPE: 870 v.val = sk->sk_type; 871 break; 872 873 case SO_PROTOCOL: 874 v.val = sk->sk_protocol; 875 break; 876 877 case SO_DOMAIN: 878 v.val = sk->sk_family; 879 break; 880 881 case SO_ERROR: 882 v.val = -sock_error(sk); 883 if (v.val == 0) 884 v.val = xchg(&sk->sk_err_soft, 0); 885 break; 886 887 case SO_OOBINLINE: 888 v.val = !!sock_flag(sk, SOCK_URGINLINE); 889 break; 890 891 case SO_NO_CHECK: 892 v.val = sk->sk_no_check; 893 break; 894 895 case SO_PRIORITY: 896 v.val = sk->sk_priority; 897 break; 898 899 case SO_LINGER: 900 lv = sizeof(v.ling); 901 v.ling.l_onoff = !!sock_flag(sk, SOCK_LINGER); 902 v.ling.l_linger = sk->sk_lingertime / HZ; 903 break; 904 905 case SO_BSDCOMPAT: 906 sock_warn_obsolete_bsdism("getsockopt"); 907 break; 908 909 case SO_TIMESTAMP: 910 v.val = sock_flag(sk, SOCK_RCVTSTAMP) && 911 !sock_flag(sk, SOCK_RCVTSTAMPNS); 912 break; 913 914 case SO_TIMESTAMPNS: 915 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS); 916 break; 917 918 case SO_TIMESTAMPING: 919 v.val = 0; 920 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) 921 v.val |= SOF_TIMESTAMPING_TX_HARDWARE; 922 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE)) 923 v.val |= SOF_TIMESTAMPING_TX_SOFTWARE; 924 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE)) 925 v.val |= SOF_TIMESTAMPING_RX_HARDWARE; 926 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE)) 927 v.val |= SOF_TIMESTAMPING_RX_SOFTWARE; 928 if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) 929 v.val |= SOF_TIMESTAMPING_SOFTWARE; 930 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE)) 931 v.val |= SOF_TIMESTAMPING_SYS_HARDWARE; 932 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE)) 933 v.val |= SOF_TIMESTAMPING_RAW_HARDWARE; 934 break; 935 936 case SO_RCVTIMEO: 937 lv = sizeof(struct timeval); 938 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) { 939 v.tm.tv_sec = 0; 940 v.tm.tv_usec = 0; 941 } else { 942 v.tm.tv_sec = sk->sk_rcvtimeo / HZ; 943 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ; 944 } 945 break; 946 947 case SO_SNDTIMEO: 948 lv = sizeof(struct timeval); 949 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) { 950 v.tm.tv_sec = 0; 951 v.tm.tv_usec = 0; 952 } else { 953 v.tm.tv_sec = sk->sk_sndtimeo / HZ; 954 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ; 955 } 956 break; 957 958 case SO_RCVLOWAT: 959 v.val = sk->sk_rcvlowat; 960 break; 961 962 case SO_SNDLOWAT: 963 v.val = 1; 964 break; 965 966 case SO_PASSCRED: 967 v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0; 968 break; 969 970 case SO_PEERCRED: 971 { 972 struct ucred peercred; 973 if (len > sizeof(peercred)) 974 len = sizeof(peercred); 975 cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred); 976 if (copy_to_user(optval, &peercred, len)) 977 return -EFAULT; 978 goto lenout; 979 } 980 981 case SO_PEERNAME: 982 { 983 char address[128]; 984 985 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2)) 986 return -ENOTCONN; 987 if (lv < len) 988 return -EINVAL; 989 if (copy_to_user(optval, address, len)) 990 return -EFAULT; 991 goto lenout; 992 } 993 994 /* Dubious BSD thing... Probably nobody even uses it, but 995 * the UNIX standard wants it for whatever reason... -DaveM 996 */ 997 case SO_ACCEPTCONN: 998 v.val = sk->sk_state == TCP_LISTEN; 999 break; 1000 1001 case SO_PASSSEC: 1002 v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0; 1003 break; 1004 1005 case SO_PEERSEC: 1006 return security_socket_getpeersec_stream(sock, optval, optlen, len); 1007 1008 case SO_MARK: 1009 v.val = sk->sk_mark; 1010 break; 1011 1012 case SO_RXQ_OVFL: 1013 v.val = !!sock_flag(sk, SOCK_RXQ_OVFL); 1014 break; 1015 1016 case SO_WIFI_STATUS: 1017 v.val = !!sock_flag(sk, SOCK_WIFI_STATUS); 1018 break; 1019 1020 default: 1021 return -ENOPROTOOPT; 1022 } 1023 1024 if (len > lv) 1025 len = lv; 1026 if (copy_to_user(optval, &v, len)) 1027 return -EFAULT; 1028 lenout: 1029 if (put_user(len, optlen)) 1030 return -EFAULT; 1031 return 0; 1032 } 1033 1034 /* 1035 * Initialize an sk_lock. 1036 * 1037 * (We also register the sk_lock with the lock validator.) 1038 */ 1039 static inline void sock_lock_init(struct sock *sk) 1040 { 1041 sock_lock_init_class_and_name(sk, 1042 af_family_slock_key_strings[sk->sk_family], 1043 af_family_slock_keys + sk->sk_family, 1044 af_family_key_strings[sk->sk_family], 1045 af_family_keys + sk->sk_family); 1046 } 1047 1048 /* 1049 * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet, 1050 * even temporarly, because of RCU lookups. sk_node should also be left as is. 1051 * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end 1052 */ 1053 static void sock_copy(struct sock *nsk, const struct sock *osk) 1054 { 1055 #ifdef CONFIG_SECURITY_NETWORK 1056 void *sptr = nsk->sk_security; 1057 #endif 1058 memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin)); 1059 1060 memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end, 1061 osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end)); 1062 1063 #ifdef CONFIG_SECURITY_NETWORK 1064 nsk->sk_security = sptr; 1065 security_sk_clone(osk, nsk); 1066 #endif 1067 } 1068 1069 /* 1070 * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes 1071 * un-modified. Special care is taken when initializing object to zero. 1072 */ 1073 static inline void sk_prot_clear_nulls(struct sock *sk, int size) 1074 { 1075 if (offsetof(struct sock, sk_node.next) != 0) 1076 memset(sk, 0, offsetof(struct sock, sk_node.next)); 1077 memset(&sk->sk_node.pprev, 0, 1078 size - offsetof(struct sock, sk_node.pprev)); 1079 } 1080 1081 void sk_prot_clear_portaddr_nulls(struct sock *sk, int size) 1082 { 1083 unsigned long nulls1, nulls2; 1084 1085 nulls1 = offsetof(struct sock, __sk_common.skc_node.next); 1086 nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next); 1087 if (nulls1 > nulls2) 1088 swap(nulls1, nulls2); 1089 1090 if (nulls1 != 0) 1091 memset((char *)sk, 0, nulls1); 1092 memset((char *)sk + nulls1 + sizeof(void *), 0, 1093 nulls2 - nulls1 - sizeof(void *)); 1094 memset((char *)sk + nulls2 + sizeof(void *), 0, 1095 size - nulls2 - sizeof(void *)); 1096 } 1097 EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls); 1098 1099 static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, 1100 int family) 1101 { 1102 struct sock *sk; 1103 struct kmem_cache *slab; 1104 1105 slab = prot->slab; 1106 if (slab != NULL) { 1107 sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO); 1108 if (!sk) 1109 return sk; 1110 if (priority & __GFP_ZERO) { 1111 if (prot->clear_sk) 1112 prot->clear_sk(sk, prot->obj_size); 1113 else 1114 sk_prot_clear_nulls(sk, prot->obj_size); 1115 } 1116 } else 1117 sk = kmalloc(prot->obj_size, priority); 1118 1119 if (sk != NULL) { 1120 kmemcheck_annotate_bitfield(sk, flags); 1121 1122 if (security_sk_alloc(sk, family, priority)) 1123 goto out_free; 1124 1125 if (!try_module_get(prot->owner)) 1126 goto out_free_sec; 1127 sk_tx_queue_clear(sk); 1128 } 1129 1130 return sk; 1131 1132 out_free_sec: 1133 security_sk_free(sk); 1134 out_free: 1135 if (slab != NULL) 1136 kmem_cache_free(slab, sk); 1137 else 1138 kfree(sk); 1139 return NULL; 1140 } 1141 1142 static void sk_prot_free(struct proto *prot, struct sock *sk) 1143 { 1144 struct kmem_cache *slab; 1145 struct module *owner; 1146 1147 owner = prot->owner; 1148 slab = prot->slab; 1149 1150 security_sk_free(sk); 1151 if (slab != NULL) 1152 kmem_cache_free(slab, sk); 1153 else 1154 kfree(sk); 1155 module_put(owner); 1156 } 1157 1158 #ifdef CONFIG_CGROUPS 1159 void sock_update_classid(struct sock *sk) 1160 { 1161 u32 classid; 1162 1163 rcu_read_lock(); /* doing current task, which cannot vanish. */ 1164 classid = task_cls_classid(current); 1165 rcu_read_unlock(); 1166 if (classid && classid != sk->sk_classid) 1167 sk->sk_classid = classid; 1168 } 1169 EXPORT_SYMBOL(sock_update_classid); 1170 1171 void sock_update_netprioidx(struct sock *sk) 1172 { 1173 struct cgroup_netprio_state *state; 1174 if (in_interrupt()) 1175 return; 1176 rcu_read_lock(); 1177 state = task_netprio_state(current); 1178 sk->sk_cgrp_prioidx = state ? state->prioidx : 0; 1179 rcu_read_unlock(); 1180 } 1181 EXPORT_SYMBOL_GPL(sock_update_netprioidx); 1182 #endif 1183 1184 /** 1185 * sk_alloc - All socket objects are allocated here 1186 * @net: the applicable net namespace 1187 * @family: protocol family 1188 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) 1189 * @prot: struct proto associated with this new sock instance 1190 */ 1191 struct sock *sk_alloc(struct net *net, int family, gfp_t priority, 1192 struct proto *prot) 1193 { 1194 struct sock *sk; 1195 1196 sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family); 1197 if (sk) { 1198 sk->sk_family = family; 1199 /* 1200 * See comment in struct sock definition to understand 1201 * why we need sk_prot_creator -acme 1202 */ 1203 sk->sk_prot = sk->sk_prot_creator = prot; 1204 sock_lock_init(sk); 1205 sock_net_set(sk, get_net(net)); 1206 atomic_set(&sk->sk_wmem_alloc, 1); 1207 1208 sock_update_classid(sk); 1209 sock_update_netprioidx(sk); 1210 } 1211 1212 return sk; 1213 } 1214 EXPORT_SYMBOL(sk_alloc); 1215 1216 static void __sk_free(struct sock *sk) 1217 { 1218 struct sk_filter *filter; 1219 1220 if (sk->sk_destruct) 1221 sk->sk_destruct(sk); 1222 1223 filter = rcu_dereference_check(sk->sk_filter, 1224 atomic_read(&sk->sk_wmem_alloc) == 0); 1225 if (filter) { 1226 sk_filter_uncharge(sk, filter); 1227 RCU_INIT_POINTER(sk->sk_filter, NULL); 1228 } 1229 1230 sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP); 1231 1232 if (atomic_read(&sk->sk_omem_alloc)) 1233 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n", 1234 __func__, atomic_read(&sk->sk_omem_alloc)); 1235 1236 if (sk->sk_peer_cred) 1237 put_cred(sk->sk_peer_cred); 1238 put_pid(sk->sk_peer_pid); 1239 put_net(sock_net(sk)); 1240 sk_prot_free(sk->sk_prot_creator, sk); 1241 } 1242 1243 void sk_free(struct sock *sk) 1244 { 1245 /* 1246 * We subtract one from sk_wmem_alloc and can know if 1247 * some packets are still in some tx queue. 1248 * If not null, sock_wfree() will call __sk_free(sk) later 1249 */ 1250 if (atomic_dec_and_test(&sk->sk_wmem_alloc)) 1251 __sk_free(sk); 1252 } 1253 EXPORT_SYMBOL(sk_free); 1254 1255 /* 1256 * Last sock_put should drop reference to sk->sk_net. It has already 1257 * been dropped in sk_change_net. Taking reference to stopping namespace 1258 * is not an option. 1259 * Take reference to a socket to remove it from hash _alive_ and after that 1260 * destroy it in the context of init_net. 1261 */ 1262 void sk_release_kernel(struct sock *sk) 1263 { 1264 if (sk == NULL || sk->sk_socket == NULL) 1265 return; 1266 1267 sock_hold(sk); 1268 sock_release(sk->sk_socket); 1269 release_net(sock_net(sk)); 1270 sock_net_set(sk, get_net(&init_net)); 1271 sock_put(sk); 1272 } 1273 EXPORT_SYMBOL(sk_release_kernel); 1274 1275 /** 1276 * sk_clone_lock - clone a socket, and lock its clone 1277 * @sk: the socket to clone 1278 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) 1279 * 1280 * Caller must unlock socket even in error path (bh_unlock_sock(newsk)) 1281 */ 1282 struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) 1283 { 1284 struct sock *newsk; 1285 1286 newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family); 1287 if (newsk != NULL) { 1288 struct sk_filter *filter; 1289 1290 sock_copy(newsk, sk); 1291 1292 /* SANITY */ 1293 get_net(sock_net(newsk)); 1294 sk_node_init(&newsk->sk_node); 1295 sock_lock_init(newsk); 1296 bh_lock_sock(newsk); 1297 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; 1298 newsk->sk_backlog.len = 0; 1299 1300 atomic_set(&newsk->sk_rmem_alloc, 0); 1301 /* 1302 * sk_wmem_alloc set to one (see sk_free() and sock_wfree()) 1303 */ 1304 atomic_set(&newsk->sk_wmem_alloc, 1); 1305 atomic_set(&newsk->sk_omem_alloc, 0); 1306 skb_queue_head_init(&newsk->sk_receive_queue); 1307 skb_queue_head_init(&newsk->sk_write_queue); 1308 #ifdef CONFIG_NET_DMA 1309 skb_queue_head_init(&newsk->sk_async_wait_queue); 1310 #endif 1311 1312 spin_lock_init(&newsk->sk_dst_lock); 1313 rwlock_init(&newsk->sk_callback_lock); 1314 lockdep_set_class_and_name(&newsk->sk_callback_lock, 1315 af_callback_keys + newsk->sk_family, 1316 af_family_clock_key_strings[newsk->sk_family]); 1317 1318 newsk->sk_dst_cache = NULL; 1319 newsk->sk_wmem_queued = 0; 1320 newsk->sk_forward_alloc = 0; 1321 newsk->sk_send_head = NULL; 1322 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; 1323 1324 sock_reset_flag(newsk, SOCK_DONE); 1325 skb_queue_head_init(&newsk->sk_error_queue); 1326 1327 filter = rcu_dereference_protected(newsk->sk_filter, 1); 1328 if (filter != NULL) 1329 sk_filter_charge(newsk, filter); 1330 1331 if (unlikely(xfrm_sk_clone_policy(newsk))) { 1332 /* It is still raw copy of parent, so invalidate 1333 * destructor and make plain sk_free() */ 1334 newsk->sk_destruct = NULL; 1335 bh_unlock_sock(newsk); 1336 sk_free(newsk); 1337 newsk = NULL; 1338 goto out; 1339 } 1340 1341 newsk->sk_err = 0; 1342 newsk->sk_priority = 0; 1343 /* 1344 * Before updating sk_refcnt, we must commit prior changes to memory 1345 * (Documentation/RCU/rculist_nulls.txt for details) 1346 */ 1347 smp_wmb(); 1348 atomic_set(&newsk->sk_refcnt, 2); 1349 1350 /* 1351 * Increment the counter in the same struct proto as the master 1352 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that 1353 * is the same as sk->sk_prot->socks, as this field was copied 1354 * with memcpy). 1355 * 1356 * This _changes_ the previous behaviour, where 1357 * tcp_create_openreq_child always was incrementing the 1358 * equivalent to tcp_prot->socks (inet_sock_nr), so this have 1359 * to be taken into account in all callers. -acme 1360 */ 1361 sk_refcnt_debug_inc(newsk); 1362 sk_set_socket(newsk, NULL); 1363 newsk->sk_wq = NULL; 1364 1365 if (newsk->sk_prot->sockets_allocated) 1366 sk_sockets_allocated_inc(newsk); 1367 1368 if (newsk->sk_flags & SK_FLAGS_TIMESTAMP) 1369 net_enable_timestamp(); 1370 } 1371 out: 1372 return newsk; 1373 } 1374 EXPORT_SYMBOL_GPL(sk_clone_lock); 1375 1376 void sk_setup_caps(struct sock *sk, struct dst_entry *dst) 1377 { 1378 __sk_dst_set(sk, dst); 1379 sk->sk_route_caps = dst->dev->features; 1380 if (sk->sk_route_caps & NETIF_F_GSO) 1381 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; 1382 sk->sk_route_caps &= ~sk->sk_route_nocaps; 1383 if (sk_can_gso(sk)) { 1384 if (dst->header_len) { 1385 sk->sk_route_caps &= ~NETIF_F_GSO_MASK; 1386 } else { 1387 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; 1388 sk->sk_gso_max_size = dst->dev->gso_max_size; 1389 } 1390 } 1391 } 1392 EXPORT_SYMBOL_GPL(sk_setup_caps); 1393 1394 void __init sk_init(void) 1395 { 1396 if (totalram_pages <= 4096) { 1397 sysctl_wmem_max = 32767; 1398 sysctl_rmem_max = 32767; 1399 sysctl_wmem_default = 32767; 1400 sysctl_rmem_default = 32767; 1401 } else if (totalram_pages >= 131072) { 1402 sysctl_wmem_max = 131071; 1403 sysctl_rmem_max = 131071; 1404 } 1405 } 1406 1407 /* 1408 * Simple resource managers for sockets. 1409 */ 1410 1411 1412 /* 1413 * Write buffer destructor automatically called from kfree_skb. 1414 */ 1415 void sock_wfree(struct sk_buff *skb) 1416 { 1417 struct sock *sk = skb->sk; 1418 unsigned int len = skb->truesize; 1419 1420 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) { 1421 /* 1422 * Keep a reference on sk_wmem_alloc, this will be released 1423 * after sk_write_space() call 1424 */ 1425 atomic_sub(len - 1, &sk->sk_wmem_alloc); 1426 sk->sk_write_space(sk); 1427 len = 1; 1428 } 1429 /* 1430 * if sk_wmem_alloc reaches 0, we must finish what sk_free() 1431 * could not do because of in-flight packets 1432 */ 1433 if (atomic_sub_and_test(len, &sk->sk_wmem_alloc)) 1434 __sk_free(sk); 1435 } 1436 EXPORT_SYMBOL(sock_wfree); 1437 1438 /* 1439 * Read buffer destructor automatically called from kfree_skb. 1440 */ 1441 void sock_rfree(struct sk_buff *skb) 1442 { 1443 struct sock *sk = skb->sk; 1444 unsigned int len = skb->truesize; 1445 1446 atomic_sub(len, &sk->sk_rmem_alloc); 1447 sk_mem_uncharge(sk, len); 1448 } 1449 EXPORT_SYMBOL(sock_rfree); 1450 1451 1452 int sock_i_uid(struct sock *sk) 1453 { 1454 int uid; 1455 1456 read_lock_bh(&sk->sk_callback_lock); 1457 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0; 1458 read_unlock_bh(&sk->sk_callback_lock); 1459 return uid; 1460 } 1461 EXPORT_SYMBOL(sock_i_uid); 1462 1463 unsigned long sock_i_ino(struct sock *sk) 1464 { 1465 unsigned long ino; 1466 1467 read_lock_bh(&sk->sk_callback_lock); 1468 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0; 1469 read_unlock_bh(&sk->sk_callback_lock); 1470 return ino; 1471 } 1472 EXPORT_SYMBOL(sock_i_ino); 1473 1474 /* 1475 * Allocate a skb from the socket's send buffer. 1476 */ 1477 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, 1478 gfp_t priority) 1479 { 1480 if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { 1481 struct sk_buff *skb = alloc_skb(size, priority); 1482 if (skb) { 1483 skb_set_owner_w(skb, sk); 1484 return skb; 1485 } 1486 } 1487 return NULL; 1488 } 1489 EXPORT_SYMBOL(sock_wmalloc); 1490 1491 /* 1492 * Allocate a skb from the socket's receive buffer. 1493 */ 1494 struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, 1495 gfp_t priority) 1496 { 1497 if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) { 1498 struct sk_buff *skb = alloc_skb(size, priority); 1499 if (skb) { 1500 skb_set_owner_r(skb, sk); 1501 return skb; 1502 } 1503 } 1504 return NULL; 1505 } 1506 1507 /* 1508 * Allocate a memory block from the socket's option memory buffer. 1509 */ 1510 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority) 1511 { 1512 if ((unsigned)size <= sysctl_optmem_max && 1513 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { 1514 void *mem; 1515 /* First do the add, to avoid the race if kmalloc 1516 * might sleep. 1517 */ 1518 atomic_add(size, &sk->sk_omem_alloc); 1519 mem = kmalloc(size, priority); 1520 if (mem) 1521 return mem; 1522 atomic_sub(size, &sk->sk_omem_alloc); 1523 } 1524 return NULL; 1525 } 1526 EXPORT_SYMBOL(sock_kmalloc); 1527 1528 /* 1529 * Free an option memory block. 1530 */ 1531 void sock_kfree_s(struct sock *sk, void *mem, int size) 1532 { 1533 kfree(mem); 1534 atomic_sub(size, &sk->sk_omem_alloc); 1535 } 1536 EXPORT_SYMBOL(sock_kfree_s); 1537 1538 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock. 1539 I think, these locks should be removed for datagram sockets. 1540 */ 1541 static long sock_wait_for_wmem(struct sock *sk, long timeo) 1542 { 1543 DEFINE_WAIT(wait); 1544 1545 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 1546 for (;;) { 1547 if (!timeo) 1548 break; 1549 if (signal_pending(current)) 1550 break; 1551 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1552 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 1553 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) 1554 break; 1555 if (sk->sk_shutdown & SEND_SHUTDOWN) 1556 break; 1557 if (sk->sk_err) 1558 break; 1559 timeo = schedule_timeout(timeo); 1560 } 1561 finish_wait(sk_sleep(sk), &wait); 1562 return timeo; 1563 } 1564 1565 1566 /* 1567 * Generic send/receive buffer handlers 1568 */ 1569 1570 struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, 1571 unsigned long data_len, int noblock, 1572 int *errcode) 1573 { 1574 struct sk_buff *skb; 1575 gfp_t gfp_mask; 1576 long timeo; 1577 int err; 1578 1579 gfp_mask = sk->sk_allocation; 1580 if (gfp_mask & __GFP_WAIT) 1581 gfp_mask |= __GFP_REPEAT; 1582 1583 timeo = sock_sndtimeo(sk, noblock); 1584 while (1) { 1585 err = sock_error(sk); 1586 if (err != 0) 1587 goto failure; 1588 1589 err = -EPIPE; 1590 if (sk->sk_shutdown & SEND_SHUTDOWN) 1591 goto failure; 1592 1593 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { 1594 skb = alloc_skb(header_len, gfp_mask); 1595 if (skb) { 1596 int npages; 1597 int i; 1598 1599 /* No pages, we're done... */ 1600 if (!data_len) 1601 break; 1602 1603 npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT; 1604 skb->truesize += data_len; 1605 skb_shinfo(skb)->nr_frags = npages; 1606 for (i = 0; i < npages; i++) { 1607 struct page *page; 1608 1609 page = alloc_pages(sk->sk_allocation, 0); 1610 if (!page) { 1611 err = -ENOBUFS; 1612 skb_shinfo(skb)->nr_frags = i; 1613 kfree_skb(skb); 1614 goto failure; 1615 } 1616 1617 __skb_fill_page_desc(skb, i, 1618 page, 0, 1619 (data_len >= PAGE_SIZE ? 1620 PAGE_SIZE : 1621 data_len)); 1622 data_len -= PAGE_SIZE; 1623 } 1624 1625 /* Full success... */ 1626 break; 1627 } 1628 err = -ENOBUFS; 1629 goto failure; 1630 } 1631 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 1632 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1633 err = -EAGAIN; 1634 if (!timeo) 1635 goto failure; 1636 if (signal_pending(current)) 1637 goto interrupted; 1638 timeo = sock_wait_for_wmem(sk, timeo); 1639 } 1640 1641 skb_set_owner_w(skb, sk); 1642 return skb; 1643 1644 interrupted: 1645 err = sock_intr_errno(timeo); 1646 failure: 1647 *errcode = err; 1648 return NULL; 1649 } 1650 EXPORT_SYMBOL(sock_alloc_send_pskb); 1651 1652 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, 1653 int noblock, int *errcode) 1654 { 1655 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode); 1656 } 1657 EXPORT_SYMBOL(sock_alloc_send_skb); 1658 1659 static void __lock_sock(struct sock *sk) 1660 __releases(&sk->sk_lock.slock) 1661 __acquires(&sk->sk_lock.slock) 1662 { 1663 DEFINE_WAIT(wait); 1664 1665 for (;;) { 1666 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait, 1667 TASK_UNINTERRUPTIBLE); 1668 spin_unlock_bh(&sk->sk_lock.slock); 1669 schedule(); 1670 spin_lock_bh(&sk->sk_lock.slock); 1671 if (!sock_owned_by_user(sk)) 1672 break; 1673 } 1674 finish_wait(&sk->sk_lock.wq, &wait); 1675 } 1676 1677 static void __release_sock(struct sock *sk) 1678 __releases(&sk->sk_lock.slock) 1679 __acquires(&sk->sk_lock.slock) 1680 { 1681 struct sk_buff *skb = sk->sk_backlog.head; 1682 1683 do { 1684 sk->sk_backlog.head = sk->sk_backlog.tail = NULL; 1685 bh_unlock_sock(sk); 1686 1687 do { 1688 struct sk_buff *next = skb->next; 1689 1690 WARN_ON_ONCE(skb_dst_is_noref(skb)); 1691 skb->next = NULL; 1692 sk_backlog_rcv(sk, skb); 1693 1694 /* 1695 * We are in process context here with softirqs 1696 * disabled, use cond_resched_softirq() to preempt. 1697 * This is safe to do because we've taken the backlog 1698 * queue private: 1699 */ 1700 cond_resched_softirq(); 1701 1702 skb = next; 1703 } while (skb != NULL); 1704 1705 bh_lock_sock(sk); 1706 } while ((skb = sk->sk_backlog.head) != NULL); 1707 1708 /* 1709 * Doing the zeroing here guarantee we can not loop forever 1710 * while a wild producer attempts to flood us. 1711 */ 1712 sk->sk_backlog.len = 0; 1713 } 1714 1715 /** 1716 * sk_wait_data - wait for data to arrive at sk_receive_queue 1717 * @sk: sock to wait on 1718 * @timeo: for how long 1719 * 1720 * Now socket state including sk->sk_err is changed only under lock, 1721 * hence we may omit checks after joining wait queue. 1722 * We check receive queue before schedule() only as optimization; 1723 * it is very likely that release_sock() added new data. 1724 */ 1725 int sk_wait_data(struct sock *sk, long *timeo) 1726 { 1727 int rc; 1728 DEFINE_WAIT(wait); 1729 1730 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 1731 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1732 rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue)); 1733 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1734 finish_wait(sk_sleep(sk), &wait); 1735 return rc; 1736 } 1737 EXPORT_SYMBOL(sk_wait_data); 1738 1739 /** 1740 * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated 1741 * @sk: socket 1742 * @size: memory size to allocate 1743 * @kind: allocation type 1744 * 1745 * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means 1746 * rmem allocation. This function assumes that protocols which have 1747 * memory_pressure use sk_wmem_queued as write buffer accounting. 1748 */ 1749 int __sk_mem_schedule(struct sock *sk, int size, int kind) 1750 { 1751 struct proto *prot = sk->sk_prot; 1752 int amt = sk_mem_pages(size); 1753 long allocated; 1754 int parent_status = UNDER_LIMIT; 1755 1756 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; 1757 1758 allocated = sk_memory_allocated_add(sk, amt, &parent_status); 1759 1760 /* Under limit. */ 1761 if (parent_status == UNDER_LIMIT && 1762 allocated <= sk_prot_mem_limits(sk, 0)) { 1763 sk_leave_memory_pressure(sk); 1764 return 1; 1765 } 1766 1767 /* Under pressure. (we or our parents) */ 1768 if ((parent_status > SOFT_LIMIT) || 1769 allocated > sk_prot_mem_limits(sk, 1)) 1770 sk_enter_memory_pressure(sk); 1771 1772 /* Over hard limit (we or our parents) */ 1773 if ((parent_status == OVER_LIMIT) || 1774 (allocated > sk_prot_mem_limits(sk, 2))) 1775 goto suppress_allocation; 1776 1777 /* guarantee minimum buffer size under pressure */ 1778 if (kind == SK_MEM_RECV) { 1779 if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0]) 1780 return 1; 1781 1782 } else { /* SK_MEM_SEND */ 1783 if (sk->sk_type == SOCK_STREAM) { 1784 if (sk->sk_wmem_queued < prot->sysctl_wmem[0]) 1785 return 1; 1786 } else if (atomic_read(&sk->sk_wmem_alloc) < 1787 prot->sysctl_wmem[0]) 1788 return 1; 1789 } 1790 1791 if (sk_has_memory_pressure(sk)) { 1792 int alloc; 1793 1794 if (!sk_under_memory_pressure(sk)) 1795 return 1; 1796 alloc = sk_sockets_allocated_read_positive(sk); 1797 if (sk_prot_mem_limits(sk, 2) > alloc * 1798 sk_mem_pages(sk->sk_wmem_queued + 1799 atomic_read(&sk->sk_rmem_alloc) + 1800 sk->sk_forward_alloc)) 1801 return 1; 1802 } 1803 1804 suppress_allocation: 1805 1806 if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) { 1807 sk_stream_moderate_sndbuf(sk); 1808 1809 /* Fail only if socket is _under_ its sndbuf. 1810 * In this case we cannot block, so that we have to fail. 1811 */ 1812 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) 1813 return 1; 1814 } 1815 1816 trace_sock_exceed_buf_limit(sk, prot, allocated); 1817 1818 /* Alas. Undo changes. */ 1819 sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM; 1820 1821 sk_memory_allocated_sub(sk, amt, parent_status); 1822 1823 return 0; 1824 } 1825 EXPORT_SYMBOL(__sk_mem_schedule); 1826 1827 /** 1828 * __sk_reclaim - reclaim memory_allocated 1829 * @sk: socket 1830 */ 1831 void __sk_mem_reclaim(struct sock *sk) 1832 { 1833 sk_memory_allocated_sub(sk, 1834 sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT, 0); 1835 sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1; 1836 1837 if (sk_under_memory_pressure(sk) && 1838 (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) 1839 sk_leave_memory_pressure(sk); 1840 } 1841 EXPORT_SYMBOL(__sk_mem_reclaim); 1842 1843 1844 /* 1845 * Set of default routines for initialising struct proto_ops when 1846 * the protocol does not support a particular function. In certain 1847 * cases where it makes no sense for a protocol to have a "do nothing" 1848 * function, some default processing is provided. 1849 */ 1850 1851 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len) 1852 { 1853 return -EOPNOTSUPP; 1854 } 1855 EXPORT_SYMBOL(sock_no_bind); 1856 1857 int sock_no_connect(struct socket *sock, struct sockaddr *saddr, 1858 int len, int flags) 1859 { 1860 return -EOPNOTSUPP; 1861 } 1862 EXPORT_SYMBOL(sock_no_connect); 1863 1864 int sock_no_socketpair(struct socket *sock1, struct socket *sock2) 1865 { 1866 return -EOPNOTSUPP; 1867 } 1868 EXPORT_SYMBOL(sock_no_socketpair); 1869 1870 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags) 1871 { 1872 return -EOPNOTSUPP; 1873 } 1874 EXPORT_SYMBOL(sock_no_accept); 1875 1876 int sock_no_getname(struct socket *sock, struct sockaddr *saddr, 1877 int *len, int peer) 1878 { 1879 return -EOPNOTSUPP; 1880 } 1881 EXPORT_SYMBOL(sock_no_getname); 1882 1883 unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt) 1884 { 1885 return 0; 1886 } 1887 EXPORT_SYMBOL(sock_no_poll); 1888 1889 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 1890 { 1891 return -EOPNOTSUPP; 1892 } 1893 EXPORT_SYMBOL(sock_no_ioctl); 1894 1895 int sock_no_listen(struct socket *sock, int backlog) 1896 { 1897 return -EOPNOTSUPP; 1898 } 1899 EXPORT_SYMBOL(sock_no_listen); 1900 1901 int sock_no_shutdown(struct socket *sock, int how) 1902 { 1903 return -EOPNOTSUPP; 1904 } 1905 EXPORT_SYMBOL(sock_no_shutdown); 1906 1907 int sock_no_setsockopt(struct socket *sock, int level, int optname, 1908 char __user *optval, unsigned int optlen) 1909 { 1910 return -EOPNOTSUPP; 1911 } 1912 EXPORT_SYMBOL(sock_no_setsockopt); 1913 1914 int sock_no_getsockopt(struct socket *sock, int level, int optname, 1915 char __user *optval, int __user *optlen) 1916 { 1917 return -EOPNOTSUPP; 1918 } 1919 EXPORT_SYMBOL(sock_no_getsockopt); 1920 1921 int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, 1922 size_t len) 1923 { 1924 return -EOPNOTSUPP; 1925 } 1926 EXPORT_SYMBOL(sock_no_sendmsg); 1927 1928 int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, 1929 size_t len, int flags) 1930 { 1931 return -EOPNOTSUPP; 1932 } 1933 EXPORT_SYMBOL(sock_no_recvmsg); 1934 1935 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma) 1936 { 1937 /* Mirror missing mmap method error code */ 1938 return -ENODEV; 1939 } 1940 EXPORT_SYMBOL(sock_no_mmap); 1941 1942 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) 1943 { 1944 ssize_t res; 1945 struct msghdr msg = {.msg_flags = flags}; 1946 struct kvec iov; 1947 char *kaddr = kmap(page); 1948 iov.iov_base = kaddr + offset; 1949 iov.iov_len = size; 1950 res = kernel_sendmsg(sock, &msg, &iov, 1, size); 1951 kunmap(page); 1952 return res; 1953 } 1954 EXPORT_SYMBOL(sock_no_sendpage); 1955 1956 /* 1957 * Default Socket Callbacks 1958 */ 1959 1960 static void sock_def_wakeup(struct sock *sk) 1961 { 1962 struct socket_wq *wq; 1963 1964 rcu_read_lock(); 1965 wq = rcu_dereference(sk->sk_wq); 1966 if (wq_has_sleeper(wq)) 1967 wake_up_interruptible_all(&wq->wait); 1968 rcu_read_unlock(); 1969 } 1970 1971 static void sock_def_error_report(struct sock *sk) 1972 { 1973 struct socket_wq *wq; 1974 1975 rcu_read_lock(); 1976 wq = rcu_dereference(sk->sk_wq); 1977 if (wq_has_sleeper(wq)) 1978 wake_up_interruptible_poll(&wq->wait, POLLERR); 1979 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); 1980 rcu_read_unlock(); 1981 } 1982 1983 static void sock_def_readable(struct sock *sk, int len) 1984 { 1985 struct socket_wq *wq; 1986 1987 rcu_read_lock(); 1988 wq = rcu_dereference(sk->sk_wq); 1989 if (wq_has_sleeper(wq)) 1990 wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI | 1991 POLLRDNORM | POLLRDBAND); 1992 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); 1993 rcu_read_unlock(); 1994 } 1995 1996 static void sock_def_write_space(struct sock *sk) 1997 { 1998 struct socket_wq *wq; 1999 2000 rcu_read_lock(); 2001 2002 /* Do not wake up a writer until he can make "significant" 2003 * progress. --DaveM 2004 */ 2005 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { 2006 wq = rcu_dereference(sk->sk_wq); 2007 if (wq_has_sleeper(wq)) 2008 wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | 2009 POLLWRNORM | POLLWRBAND); 2010 2011 /* Should agree with poll, otherwise some programs break */ 2012 if (sock_writeable(sk)) 2013 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 2014 } 2015 2016 rcu_read_unlock(); 2017 } 2018 2019 static void sock_def_destruct(struct sock *sk) 2020 { 2021 kfree(sk->sk_protinfo); 2022 } 2023 2024 void sk_send_sigurg(struct sock *sk) 2025 { 2026 if (sk->sk_socket && sk->sk_socket->file) 2027 if (send_sigurg(&sk->sk_socket->file->f_owner)) 2028 sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI); 2029 } 2030 EXPORT_SYMBOL(sk_send_sigurg); 2031 2032 void sk_reset_timer(struct sock *sk, struct timer_list* timer, 2033 unsigned long expires) 2034 { 2035 if (!mod_timer(timer, expires)) 2036 sock_hold(sk); 2037 } 2038 EXPORT_SYMBOL(sk_reset_timer); 2039 2040 void sk_stop_timer(struct sock *sk, struct timer_list* timer) 2041 { 2042 if (timer_pending(timer) && del_timer(timer)) 2043 __sock_put(sk); 2044 } 2045 EXPORT_SYMBOL(sk_stop_timer); 2046 2047 void sock_init_data(struct socket *sock, struct sock *sk) 2048 { 2049 skb_queue_head_init(&sk->sk_receive_queue); 2050 skb_queue_head_init(&sk->sk_write_queue); 2051 skb_queue_head_init(&sk->sk_error_queue); 2052 #ifdef CONFIG_NET_DMA 2053 skb_queue_head_init(&sk->sk_async_wait_queue); 2054 #endif 2055 2056 sk->sk_send_head = NULL; 2057 2058 init_timer(&sk->sk_timer); 2059 2060 sk->sk_allocation = GFP_KERNEL; 2061 sk->sk_rcvbuf = sysctl_rmem_default; 2062 sk->sk_sndbuf = sysctl_wmem_default; 2063 sk->sk_state = TCP_CLOSE; 2064 sk_set_socket(sk, sock); 2065 2066 sock_set_flag(sk, SOCK_ZAPPED); 2067 2068 if (sock) { 2069 sk->sk_type = sock->type; 2070 sk->sk_wq = sock->wq; 2071 sock->sk = sk; 2072 } else 2073 sk->sk_wq = NULL; 2074 2075 spin_lock_init(&sk->sk_dst_lock); 2076 rwlock_init(&sk->sk_callback_lock); 2077 lockdep_set_class_and_name(&sk->sk_callback_lock, 2078 af_callback_keys + sk->sk_family, 2079 af_family_clock_key_strings[sk->sk_family]); 2080 2081 sk->sk_state_change = sock_def_wakeup; 2082 sk->sk_data_ready = sock_def_readable; 2083 sk->sk_write_space = sock_def_write_space; 2084 sk->sk_error_report = sock_def_error_report; 2085 sk->sk_destruct = sock_def_destruct; 2086 2087 sk->sk_sndmsg_page = NULL; 2088 sk->sk_sndmsg_off = 0; 2089 2090 sk->sk_peer_pid = NULL; 2091 sk->sk_peer_cred = NULL; 2092 sk->sk_write_pending = 0; 2093 sk->sk_rcvlowat = 1; 2094 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; 2095 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; 2096 2097 sk->sk_stamp = ktime_set(-1L, 0); 2098 2099 /* 2100 * Before updating sk_refcnt, we must commit prior changes to memory 2101 * (Documentation/RCU/rculist_nulls.txt for details) 2102 */ 2103 smp_wmb(); 2104 atomic_set(&sk->sk_refcnt, 1); 2105 atomic_set(&sk->sk_drops, 0); 2106 } 2107 EXPORT_SYMBOL(sock_init_data); 2108 2109 void lock_sock_nested(struct sock *sk, int subclass) 2110 { 2111 might_sleep(); 2112 spin_lock_bh(&sk->sk_lock.slock); 2113 if (sk->sk_lock.owned) 2114 __lock_sock(sk); 2115 sk->sk_lock.owned = 1; 2116 spin_unlock(&sk->sk_lock.slock); 2117 /* 2118 * The sk_lock has mutex_lock() semantics here: 2119 */ 2120 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); 2121 local_bh_enable(); 2122 } 2123 EXPORT_SYMBOL(lock_sock_nested); 2124 2125 void release_sock(struct sock *sk) 2126 { 2127 /* 2128 * The sk_lock has mutex_unlock() semantics: 2129 */ 2130 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); 2131 2132 spin_lock_bh(&sk->sk_lock.slock); 2133 if (sk->sk_backlog.tail) 2134 __release_sock(sk); 2135 sk->sk_lock.owned = 0; 2136 if (waitqueue_active(&sk->sk_lock.wq)) 2137 wake_up(&sk->sk_lock.wq); 2138 spin_unlock_bh(&sk->sk_lock.slock); 2139 } 2140 EXPORT_SYMBOL(release_sock); 2141 2142 /** 2143 * lock_sock_fast - fast version of lock_sock 2144 * @sk: socket 2145 * 2146 * This version should be used for very small section, where process wont block 2147 * return false if fast path is taken 2148 * sk_lock.slock locked, owned = 0, BH disabled 2149 * return true if slow path is taken 2150 * sk_lock.slock unlocked, owned = 1, BH enabled 2151 */ 2152 bool lock_sock_fast(struct sock *sk) 2153 { 2154 might_sleep(); 2155 spin_lock_bh(&sk->sk_lock.slock); 2156 2157 if (!sk->sk_lock.owned) 2158 /* 2159 * Note : We must disable BH 2160 */ 2161 return false; 2162 2163 __lock_sock(sk); 2164 sk->sk_lock.owned = 1; 2165 spin_unlock(&sk->sk_lock.slock); 2166 /* 2167 * The sk_lock has mutex_lock() semantics here: 2168 */ 2169 mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_); 2170 local_bh_enable(); 2171 return true; 2172 } 2173 EXPORT_SYMBOL(lock_sock_fast); 2174 2175 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) 2176 { 2177 struct timeval tv; 2178 if (!sock_flag(sk, SOCK_TIMESTAMP)) 2179 sock_enable_timestamp(sk, SOCK_TIMESTAMP); 2180 tv = ktime_to_timeval(sk->sk_stamp); 2181 if (tv.tv_sec == -1) 2182 return -ENOENT; 2183 if (tv.tv_sec == 0) { 2184 sk->sk_stamp = ktime_get_real(); 2185 tv = ktime_to_timeval(sk->sk_stamp); 2186 } 2187 return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0; 2188 } 2189 EXPORT_SYMBOL(sock_get_timestamp); 2190 2191 int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp) 2192 { 2193 struct timespec ts; 2194 if (!sock_flag(sk, SOCK_TIMESTAMP)) 2195 sock_enable_timestamp(sk, SOCK_TIMESTAMP); 2196 ts = ktime_to_timespec(sk->sk_stamp); 2197 if (ts.tv_sec == -1) 2198 return -ENOENT; 2199 if (ts.tv_sec == 0) { 2200 sk->sk_stamp = ktime_get_real(); 2201 ts = ktime_to_timespec(sk->sk_stamp); 2202 } 2203 return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0; 2204 } 2205 EXPORT_SYMBOL(sock_get_timestampns); 2206 2207 void sock_enable_timestamp(struct sock *sk, int flag) 2208 { 2209 if (!sock_flag(sk, flag)) { 2210 unsigned long previous_flags = sk->sk_flags; 2211 2212 sock_set_flag(sk, flag); 2213 /* 2214 * we just set one of the two flags which require net 2215 * time stamping, but time stamping might have been on 2216 * already because of the other one 2217 */ 2218 if (!(previous_flags & SK_FLAGS_TIMESTAMP)) 2219 net_enable_timestamp(); 2220 } 2221 } 2222 2223 /* 2224 * Get a socket option on an socket. 2225 * 2226 * FIX: POSIX 1003.1g is very ambiguous here. It states that 2227 * asynchronous errors should be reported by getsockopt. We assume 2228 * this means if you specify SO_ERROR (otherwise whats the point of it). 2229 */ 2230 int sock_common_getsockopt(struct socket *sock, int level, int optname, 2231 char __user *optval, int __user *optlen) 2232 { 2233 struct sock *sk = sock->sk; 2234 2235 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen); 2236 } 2237 EXPORT_SYMBOL(sock_common_getsockopt); 2238 2239 #ifdef CONFIG_COMPAT 2240 int compat_sock_common_getsockopt(struct socket *sock, int level, int optname, 2241 char __user *optval, int __user *optlen) 2242 { 2243 struct sock *sk = sock->sk; 2244 2245 if (sk->sk_prot->compat_getsockopt != NULL) 2246 return sk->sk_prot->compat_getsockopt(sk, level, optname, 2247 optval, optlen); 2248 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen); 2249 } 2250 EXPORT_SYMBOL(compat_sock_common_getsockopt); 2251 #endif 2252 2253 int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock, 2254 struct msghdr *msg, size_t size, int flags) 2255 { 2256 struct sock *sk = sock->sk; 2257 int addr_len = 0; 2258 int err; 2259 2260 err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT, 2261 flags & ~MSG_DONTWAIT, &addr_len); 2262 if (err >= 0) 2263 msg->msg_namelen = addr_len; 2264 return err; 2265 } 2266 EXPORT_SYMBOL(sock_common_recvmsg); 2267 2268 /* 2269 * Set socket options on an inet socket. 2270 */ 2271 int sock_common_setsockopt(struct socket *sock, int level, int optname, 2272 char __user *optval, unsigned int optlen) 2273 { 2274 struct sock *sk = sock->sk; 2275 2276 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen); 2277 } 2278 EXPORT_SYMBOL(sock_common_setsockopt); 2279 2280 #ifdef CONFIG_COMPAT 2281 int compat_sock_common_setsockopt(struct socket *sock, int level, int optname, 2282 char __user *optval, unsigned int optlen) 2283 { 2284 struct sock *sk = sock->sk; 2285 2286 if (sk->sk_prot->compat_setsockopt != NULL) 2287 return sk->sk_prot->compat_setsockopt(sk, level, optname, 2288 optval, optlen); 2289 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen); 2290 } 2291 EXPORT_SYMBOL(compat_sock_common_setsockopt); 2292 #endif 2293 2294 void sk_common_release(struct sock *sk) 2295 { 2296 if (sk->sk_prot->destroy) 2297 sk->sk_prot->destroy(sk); 2298 2299 /* 2300 * Observation: when sock_common_release is called, processes have 2301 * no access to socket. But net still has. 2302 * Step one, detach it from networking: 2303 * 2304 * A. Remove from hash tables. 2305 */ 2306 2307 sk->sk_prot->unhash(sk); 2308 2309 /* 2310 * In this point socket cannot receive new packets, but it is possible 2311 * that some packets are in flight because some CPU runs receiver and 2312 * did hash table lookup before we unhashed socket. They will achieve 2313 * receive queue and will be purged by socket destructor. 2314 * 2315 * Also we still have packets pending on receive queue and probably, 2316 * our own packets waiting in device queues. sock_destroy will drain 2317 * receive queue, but transmitted packets will delay socket destruction 2318 * until the last reference will be released. 2319 */ 2320 2321 sock_orphan(sk); 2322 2323 xfrm_sk_free_policy(sk); 2324 2325 sk_refcnt_debug_release(sk); 2326 sock_put(sk); 2327 } 2328 EXPORT_SYMBOL(sk_common_release); 2329 2330 #ifdef CONFIG_PROC_FS 2331 #define PROTO_INUSE_NR 64 /* should be enough for the first time */ 2332 struct prot_inuse { 2333 int val[PROTO_INUSE_NR]; 2334 }; 2335 2336 static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR); 2337 2338 #ifdef CONFIG_NET_NS 2339 void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) 2340 { 2341 __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val); 2342 } 2343 EXPORT_SYMBOL_GPL(sock_prot_inuse_add); 2344 2345 int sock_prot_inuse_get(struct net *net, struct proto *prot) 2346 { 2347 int cpu, idx = prot->inuse_idx; 2348 int res = 0; 2349 2350 for_each_possible_cpu(cpu) 2351 res += per_cpu_ptr(net->core.inuse, cpu)->val[idx]; 2352 2353 return res >= 0 ? res : 0; 2354 } 2355 EXPORT_SYMBOL_GPL(sock_prot_inuse_get); 2356 2357 static int __net_init sock_inuse_init_net(struct net *net) 2358 { 2359 net->core.inuse = alloc_percpu(struct prot_inuse); 2360 return net->core.inuse ? 0 : -ENOMEM; 2361 } 2362 2363 static void __net_exit sock_inuse_exit_net(struct net *net) 2364 { 2365 free_percpu(net->core.inuse); 2366 } 2367 2368 static struct pernet_operations net_inuse_ops = { 2369 .init = sock_inuse_init_net, 2370 .exit = sock_inuse_exit_net, 2371 }; 2372 2373 static __init int net_inuse_init(void) 2374 { 2375 if (register_pernet_subsys(&net_inuse_ops)) 2376 panic("Cannot initialize net inuse counters"); 2377 2378 return 0; 2379 } 2380 2381 core_initcall(net_inuse_init); 2382 #else 2383 static DEFINE_PER_CPU(struct prot_inuse, prot_inuse); 2384 2385 void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) 2386 { 2387 __this_cpu_add(prot_inuse.val[prot->inuse_idx], val); 2388 } 2389 EXPORT_SYMBOL_GPL(sock_prot_inuse_add); 2390 2391 int sock_prot_inuse_get(struct net *net, struct proto *prot) 2392 { 2393 int cpu, idx = prot->inuse_idx; 2394 int res = 0; 2395 2396 for_each_possible_cpu(cpu) 2397 res += per_cpu(prot_inuse, cpu).val[idx]; 2398 2399 return res >= 0 ? res : 0; 2400 } 2401 EXPORT_SYMBOL_GPL(sock_prot_inuse_get); 2402 #endif 2403 2404 static void assign_proto_idx(struct proto *prot) 2405 { 2406 prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR); 2407 2408 if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) { 2409 printk(KERN_ERR "PROTO_INUSE_NR exhausted\n"); 2410 return; 2411 } 2412 2413 set_bit(prot->inuse_idx, proto_inuse_idx); 2414 } 2415 2416 static void release_proto_idx(struct proto *prot) 2417 { 2418 if (prot->inuse_idx != PROTO_INUSE_NR - 1) 2419 clear_bit(prot->inuse_idx, proto_inuse_idx); 2420 } 2421 #else 2422 static inline void assign_proto_idx(struct proto *prot) 2423 { 2424 } 2425 2426 static inline void release_proto_idx(struct proto *prot) 2427 { 2428 } 2429 #endif 2430 2431 int proto_register(struct proto *prot, int alloc_slab) 2432 { 2433 if (alloc_slab) { 2434 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, 2435 SLAB_HWCACHE_ALIGN | prot->slab_flags, 2436 NULL); 2437 2438 if (prot->slab == NULL) { 2439 printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n", 2440 prot->name); 2441 goto out; 2442 } 2443 2444 if (prot->rsk_prot != NULL) { 2445 prot->rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", prot->name); 2446 if (prot->rsk_prot->slab_name == NULL) 2447 goto out_free_sock_slab; 2448 2449 prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name, 2450 prot->rsk_prot->obj_size, 0, 2451 SLAB_HWCACHE_ALIGN, NULL); 2452 2453 if (prot->rsk_prot->slab == NULL) { 2454 printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n", 2455 prot->name); 2456 goto out_free_request_sock_slab_name; 2457 } 2458 } 2459 2460 if (prot->twsk_prot != NULL) { 2461 prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name); 2462 2463 if (prot->twsk_prot->twsk_slab_name == NULL) 2464 goto out_free_request_sock_slab; 2465 2466 prot->twsk_prot->twsk_slab = 2467 kmem_cache_create(prot->twsk_prot->twsk_slab_name, 2468 prot->twsk_prot->twsk_obj_size, 2469 0, 2470 SLAB_HWCACHE_ALIGN | 2471 prot->slab_flags, 2472 NULL); 2473 if (prot->twsk_prot->twsk_slab == NULL) 2474 goto out_free_timewait_sock_slab_name; 2475 } 2476 } 2477 2478 mutex_lock(&proto_list_mutex); 2479 list_add(&prot->node, &proto_list); 2480 assign_proto_idx(prot); 2481 mutex_unlock(&proto_list_mutex); 2482 return 0; 2483 2484 out_free_timewait_sock_slab_name: 2485 kfree(prot->twsk_prot->twsk_slab_name); 2486 out_free_request_sock_slab: 2487 if (prot->rsk_prot && prot->rsk_prot->slab) { 2488 kmem_cache_destroy(prot->rsk_prot->slab); 2489 prot->rsk_prot->slab = NULL; 2490 } 2491 out_free_request_sock_slab_name: 2492 if (prot->rsk_prot) 2493 kfree(prot->rsk_prot->slab_name); 2494 out_free_sock_slab: 2495 kmem_cache_destroy(prot->slab); 2496 prot->slab = NULL; 2497 out: 2498 return -ENOBUFS; 2499 } 2500 EXPORT_SYMBOL(proto_register); 2501 2502 void proto_unregister(struct proto *prot) 2503 { 2504 mutex_lock(&proto_list_mutex); 2505 release_proto_idx(prot); 2506 list_del(&prot->node); 2507 mutex_unlock(&proto_list_mutex); 2508 2509 if (prot->slab != NULL) { 2510 kmem_cache_destroy(prot->slab); 2511 prot->slab = NULL; 2512 } 2513 2514 if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) { 2515 kmem_cache_destroy(prot->rsk_prot->slab); 2516 kfree(prot->rsk_prot->slab_name); 2517 prot->rsk_prot->slab = NULL; 2518 } 2519 2520 if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) { 2521 kmem_cache_destroy(prot->twsk_prot->twsk_slab); 2522 kfree(prot->twsk_prot->twsk_slab_name); 2523 prot->twsk_prot->twsk_slab = NULL; 2524 } 2525 } 2526 EXPORT_SYMBOL(proto_unregister); 2527 2528 #ifdef CONFIG_PROC_FS 2529 static void *proto_seq_start(struct seq_file *seq, loff_t *pos) 2530 __acquires(proto_list_mutex) 2531 { 2532 mutex_lock(&proto_list_mutex); 2533 return seq_list_start_head(&proto_list, *pos); 2534 } 2535 2536 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2537 { 2538 return seq_list_next(v, &proto_list, pos); 2539 } 2540 2541 static void proto_seq_stop(struct seq_file *seq, void *v) 2542 __releases(proto_list_mutex) 2543 { 2544 mutex_unlock(&proto_list_mutex); 2545 } 2546 2547 static char proto_method_implemented(const void *method) 2548 { 2549 return method == NULL ? 'n' : 'y'; 2550 } 2551 static long sock_prot_memory_allocated(struct proto *proto) 2552 { 2553 return proto->memory_allocated != NULL ? proto_memory_allocated(proto): -1L; 2554 } 2555 2556 static char *sock_prot_memory_pressure(struct proto *proto) 2557 { 2558 return proto->memory_pressure != NULL ? 2559 proto_memory_pressure(proto) ? "yes" : "no" : "NI"; 2560 } 2561 2562 static void proto_seq_printf(struct seq_file *seq, struct proto *proto) 2563 { 2564 2565 seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s " 2566 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n", 2567 proto->name, 2568 proto->obj_size, 2569 sock_prot_inuse_get(seq_file_net(seq), proto), 2570 sock_prot_memory_allocated(proto), 2571 sock_prot_memory_pressure(proto), 2572 proto->max_header, 2573 proto->slab == NULL ? "no" : "yes", 2574 module_name(proto->owner), 2575 proto_method_implemented(proto->close), 2576 proto_method_implemented(proto->connect), 2577 proto_method_implemented(proto->disconnect), 2578 proto_method_implemented(proto->accept), 2579 proto_method_implemented(proto->ioctl), 2580 proto_method_implemented(proto->init), 2581 proto_method_implemented(proto->destroy), 2582 proto_method_implemented(proto->shutdown), 2583 proto_method_implemented(proto->setsockopt), 2584 proto_method_implemented(proto->getsockopt), 2585 proto_method_implemented(proto->sendmsg), 2586 proto_method_implemented(proto->recvmsg), 2587 proto_method_implemented(proto->sendpage), 2588 proto_method_implemented(proto->bind), 2589 proto_method_implemented(proto->backlog_rcv), 2590 proto_method_implemented(proto->hash), 2591 proto_method_implemented(proto->unhash), 2592 proto_method_implemented(proto->get_port), 2593 proto_method_implemented(proto->enter_memory_pressure)); 2594 } 2595 2596 static int proto_seq_show(struct seq_file *seq, void *v) 2597 { 2598 if (v == &proto_list) 2599 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s", 2600 "protocol", 2601 "size", 2602 "sockets", 2603 "memory", 2604 "press", 2605 "maxhdr", 2606 "slab", 2607 "module", 2608 "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n"); 2609 else 2610 proto_seq_printf(seq, list_entry(v, struct proto, node)); 2611 return 0; 2612 } 2613 2614 static const struct seq_operations proto_seq_ops = { 2615 .start = proto_seq_start, 2616 .next = proto_seq_next, 2617 .stop = proto_seq_stop, 2618 .show = proto_seq_show, 2619 }; 2620 2621 static int proto_seq_open(struct inode *inode, struct file *file) 2622 { 2623 return seq_open_net(inode, file, &proto_seq_ops, 2624 sizeof(struct seq_net_private)); 2625 } 2626 2627 static const struct file_operations proto_seq_fops = { 2628 .owner = THIS_MODULE, 2629 .open = proto_seq_open, 2630 .read = seq_read, 2631 .llseek = seq_lseek, 2632 .release = seq_release_net, 2633 }; 2634 2635 static __net_init int proto_init_net(struct net *net) 2636 { 2637 if (!proc_net_fops_create(net, "protocols", S_IRUGO, &proto_seq_fops)) 2638 return -ENOMEM; 2639 2640 return 0; 2641 } 2642 2643 static __net_exit void proto_exit_net(struct net *net) 2644 { 2645 proc_net_remove(net, "protocols"); 2646 } 2647 2648 2649 static __net_initdata struct pernet_operations proto_net_ops = { 2650 .init = proto_init_net, 2651 .exit = proto_exit_net, 2652 }; 2653 2654 static int __init proto_init(void) 2655 { 2656 return register_pernet_subsys(&proto_net_ops); 2657 } 2658 2659 subsys_initcall(proto_init); 2660 2661 #endif /* PROC_FS */ 2662