1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * INET An implementation of the TCP/IP protocol suite for the LINUX 4 * operating system. INET is implemented using the BSD Socket 5 * interface as the means of communication with the user level. 6 * 7 * Generic socket support routines. Memory allocators, socket lock/release 8 * handler for protocols to use and generic option handler. 9 * 10 * Authors: Ross Biro 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Florian La Roche, <flla@stud.uni-sb.de> 13 * Alan Cox, <A.Cox@swansea.ac.uk> 14 * 15 * Fixes: 16 * Alan Cox : Numerous verify_area() problems 17 * Alan Cox : Connecting on a connecting socket 18 * now returns an error for tcp. 19 * Alan Cox : sock->protocol is set correctly. 20 * and is not sometimes left as 0. 21 * Alan Cox : connect handles icmp errors on a 22 * connect properly. Unfortunately there 23 * is a restart syscall nasty there. I 24 * can't match BSD without hacking the C 25 * library. Ideas urgently sought! 26 * Alan Cox : Disallow bind() to addresses that are 27 * not ours - especially broadcast ones!! 28 * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost) 29 * Alan Cox : sock_wfree/sock_rfree don't destroy sockets, 30 * instead they leave that for the DESTROY timer. 31 * Alan Cox : Clean up error flag in accept 32 * Alan Cox : TCP ack handling is buggy, the DESTROY timer 33 * was buggy. Put a remove_sock() in the handler 34 * for memory when we hit 0. Also altered the timer 35 * code. The ACK stuff can wait and needs major 36 * TCP layer surgery. 37 * Alan Cox : Fixed TCP ack bug, removed remove sock 38 * and fixed timer/inet_bh race. 39 * Alan Cox : Added zapped flag for TCP 40 * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code 41 * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb 42 * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources 43 * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing. 44 * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so... 45 * Rick Sladkey : Relaxed UDP rules for matching packets. 46 * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support 47 * Pauline Middelink : identd support 48 * Alan Cox : Fixed connect() taking signals I think. 49 * Alan Cox : SO_LINGER supported 50 * Alan Cox : Error reporting fixes 51 * Anonymous : inet_create tidied up (sk->reuse setting) 52 * Alan Cox : inet sockets don't set sk->type! 53 * Alan Cox : Split socket option code 54 * Alan Cox : Callbacks 55 * Alan Cox : Nagle flag for Charles & Johannes stuff 56 * Alex : Removed restriction on inet fioctl 57 * Alan Cox : Splitting INET from NET core 58 * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt() 59 * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code 60 * Alan Cox : Split IP from generic code 61 * Alan Cox : New kfree_skbmem() 62 * Alan Cox : Make SO_DEBUG superuser only. 63 * Alan Cox : Allow anyone to clear SO_DEBUG 64 * (compatibility fix) 65 * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput. 66 * Alan Cox : Allocator for a socket is settable. 67 * Alan Cox : SO_ERROR includes soft errors. 68 * Alan Cox : Allow NULL arguments on some SO_ opts 69 * Alan Cox : Generic socket allocation to make hooks 70 * easier (suggested by Craig Metz). 71 * Michael Pall : SO_ERROR returns positive errno again 72 * Steve Whitehouse: Added default destructor to free 73 * protocol private data. 74 * Steve Whitehouse: Added various other default routines 75 * common to several socket families. 76 * Chris Evans : Call suser() check last on F_SETOWN 77 * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER. 78 * Andi Kleen : Add sock_kmalloc()/sock_kfree_s() 79 * Andi Kleen : Fix write_space callback 80 * Chris Evans : Security fixes - signedness again 81 * Arnaldo C. Melo : cleanups, use skb_queue_purge 82 * 83 * To Fix: 84 */ 85 86 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 87 88 #include <asm/unaligned.h> 89 #include <linux/capability.h> 90 #include <linux/errno.h> 91 #include <linux/errqueue.h> 92 #include <linux/types.h> 93 #include <linux/socket.h> 94 #include <linux/in.h> 95 #include <linux/kernel.h> 96 #include <linux/module.h> 97 #include <linux/proc_fs.h> 98 #include <linux/seq_file.h> 99 #include <linux/sched.h> 100 #include <linux/sched/mm.h> 101 #include <linux/timer.h> 102 #include <linux/string.h> 103 #include <linux/sockios.h> 104 #include <linux/net.h> 105 #include <linux/mm.h> 106 #include <linux/slab.h> 107 #include <linux/interrupt.h> 108 #include <linux/poll.h> 109 #include <linux/tcp.h> 110 #include <linux/init.h> 111 #include <linux/highmem.h> 112 #include <linux/user_namespace.h> 113 #include <linux/static_key.h> 114 #include <linux/memcontrol.h> 115 #include <linux/prefetch.h> 116 #include <linux/compat.h> 117 118 #include <linux/uaccess.h> 119 120 #include <linux/netdevice.h> 121 #include <net/protocol.h> 122 #include <linux/skbuff.h> 123 #include <net/net_namespace.h> 124 #include <net/request_sock.h> 125 #include <net/sock.h> 126 #include <linux/net_tstamp.h> 127 #include <net/xfrm.h> 128 #include <linux/ipsec.h> 129 #include <net/cls_cgroup.h> 130 #include <net/netprio_cgroup.h> 131 #include <linux/sock_diag.h> 132 133 #include <linux/filter.h> 134 #include <net/sock_reuseport.h> 135 #include <net/bpf_sk_storage.h> 136 137 #include <trace/events/sock.h> 138 139 #include <net/tcp.h> 140 #include <net/busy_poll.h> 141 142 #include <linux/ethtool.h> 143 144 #include "dev.h" 145 146 static DEFINE_MUTEX(proto_list_mutex); 147 static LIST_HEAD(proto_list); 148 149 /** 150 * sk_ns_capable - General socket capability test 151 * @sk: Socket to use a capability on or through 152 * @user_ns: The user namespace of the capability to use 153 * @cap: The capability to use 154 * 155 * Test to see if the opener of the socket had when the socket was 156 * created and the current process has the capability @cap in the user 157 * namespace @user_ns. 158 */ 159 bool sk_ns_capable(const struct sock *sk, 160 struct user_namespace *user_ns, int cap) 161 { 162 return file_ns_capable(sk->sk_socket->file, user_ns, cap) && 163 ns_capable(user_ns, cap); 164 } 165 EXPORT_SYMBOL(sk_ns_capable); 166 167 /** 168 * sk_capable - Socket global capability test 169 * @sk: Socket to use a capability on or through 170 * @cap: The global capability to use 171 * 172 * Test to see if the opener of the socket had when the socket was 173 * created and the current process has the capability @cap in all user 174 * namespaces. 175 */ 176 bool sk_capable(const struct sock *sk, int cap) 177 { 178 return sk_ns_capable(sk, &init_user_ns, cap); 179 } 180 EXPORT_SYMBOL(sk_capable); 181 182 /** 183 * sk_net_capable - Network namespace socket capability test 184 * @sk: Socket to use a capability on or through 185 * @cap: The capability to use 186 * 187 * Test to see if the opener of the socket had when the socket was created 188 * and the current process has the capability @cap over the network namespace 189 * the socket is a member of. 190 */ 191 bool sk_net_capable(const struct sock *sk, int cap) 192 { 193 return sk_ns_capable(sk, sock_net(sk)->user_ns, cap); 194 } 195 EXPORT_SYMBOL(sk_net_capable); 196 197 /* 198 * Each address family might have different locking rules, so we have 199 * one slock key per address family and separate keys for internal and 200 * userspace sockets. 201 */ 202 static struct lock_class_key af_family_keys[AF_MAX]; 203 static struct lock_class_key af_family_kern_keys[AF_MAX]; 204 static struct lock_class_key af_family_slock_keys[AF_MAX]; 205 static struct lock_class_key af_family_kern_slock_keys[AF_MAX]; 206 207 /* 208 * Make lock validator output more readable. (we pre-construct these 209 * strings build-time, so that runtime initialization of socket 210 * locks is fast): 211 */ 212 213 #define _sock_locks(x) \ 214 x "AF_UNSPEC", x "AF_UNIX" , x "AF_INET" , \ 215 x "AF_AX25" , x "AF_IPX" , x "AF_APPLETALK", \ 216 x "AF_NETROM", x "AF_BRIDGE" , x "AF_ATMPVC" , \ 217 x "AF_X25" , x "AF_INET6" , x "AF_ROSE" , \ 218 x "AF_DECnet", x "AF_NETBEUI" , x "AF_SECURITY" , \ 219 x "AF_KEY" , x "AF_NETLINK" , x "AF_PACKET" , \ 220 x "AF_ASH" , x "AF_ECONET" , x "AF_ATMSVC" , \ 221 x "AF_RDS" , x "AF_SNA" , x "AF_IRDA" , \ 222 x "AF_PPPOX" , x "AF_WANPIPE" , x "AF_LLC" , \ 223 x "27" , x "28" , x "AF_CAN" , \ 224 x "AF_TIPC" , x "AF_BLUETOOTH", x "IUCV" , \ 225 x "AF_RXRPC" , x "AF_ISDN" , x "AF_PHONET" , \ 226 x "AF_IEEE802154", x "AF_CAIF" , x "AF_ALG" , \ 227 x "AF_NFC" , x "AF_VSOCK" , x "AF_KCM" , \ 228 x "AF_QIPCRTR", x "AF_SMC" , x "AF_XDP" , \ 229 x "AF_MCTP" , \ 230 x "AF_MAX" 231 232 static const char *const af_family_key_strings[AF_MAX+1] = { 233 _sock_locks("sk_lock-") 234 }; 235 static const char *const af_family_slock_key_strings[AF_MAX+1] = { 236 _sock_locks("slock-") 237 }; 238 static const char *const af_family_clock_key_strings[AF_MAX+1] = { 239 _sock_locks("clock-") 240 }; 241 242 static const char *const af_family_kern_key_strings[AF_MAX+1] = { 243 _sock_locks("k-sk_lock-") 244 }; 245 static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = { 246 _sock_locks("k-slock-") 247 }; 248 static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = { 249 _sock_locks("k-clock-") 250 }; 251 static const char *const af_family_rlock_key_strings[AF_MAX+1] = { 252 _sock_locks("rlock-") 253 }; 254 static const char *const af_family_wlock_key_strings[AF_MAX+1] = { 255 _sock_locks("wlock-") 256 }; 257 static const char *const af_family_elock_key_strings[AF_MAX+1] = { 258 _sock_locks("elock-") 259 }; 260 261 /* 262 * sk_callback_lock and sk queues locking rules are per-address-family, 263 * so split the lock classes by using a per-AF key: 264 */ 265 static struct lock_class_key af_callback_keys[AF_MAX]; 266 static struct lock_class_key af_rlock_keys[AF_MAX]; 267 static struct lock_class_key af_wlock_keys[AF_MAX]; 268 static struct lock_class_key af_elock_keys[AF_MAX]; 269 static struct lock_class_key af_kern_callback_keys[AF_MAX]; 270 271 /* Run time adjustable parameters. */ 272 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX; 273 EXPORT_SYMBOL(sysctl_wmem_max); 274 __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX; 275 EXPORT_SYMBOL(sysctl_rmem_max); 276 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; 277 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; 278 279 /* Maximal space eaten by iovec or ancillary data plus some space */ 280 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); 281 EXPORT_SYMBOL(sysctl_optmem_max); 282 283 int sysctl_tstamp_allow_data __read_mostly = 1; 284 285 DEFINE_STATIC_KEY_FALSE(memalloc_socks_key); 286 EXPORT_SYMBOL_GPL(memalloc_socks_key); 287 288 /** 289 * sk_set_memalloc - sets %SOCK_MEMALLOC 290 * @sk: socket to set it on 291 * 292 * Set %SOCK_MEMALLOC on a socket for access to emergency reserves. 293 * It's the responsibility of the admin to adjust min_free_kbytes 294 * to meet the requirements 295 */ 296 void sk_set_memalloc(struct sock *sk) 297 { 298 sock_set_flag(sk, SOCK_MEMALLOC); 299 sk->sk_allocation |= __GFP_MEMALLOC; 300 static_branch_inc(&memalloc_socks_key); 301 } 302 EXPORT_SYMBOL_GPL(sk_set_memalloc); 303 304 void sk_clear_memalloc(struct sock *sk) 305 { 306 sock_reset_flag(sk, SOCK_MEMALLOC); 307 sk->sk_allocation &= ~__GFP_MEMALLOC; 308 static_branch_dec(&memalloc_socks_key); 309 310 /* 311 * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward 312 * progress of swapping. SOCK_MEMALLOC may be cleared while 313 * it has rmem allocations due to the last swapfile being deactivated 314 * but there is a risk that the socket is unusable due to exceeding 315 * the rmem limits. Reclaim the reserves and obey rmem limits again. 316 */ 317 sk_mem_reclaim(sk); 318 } 319 EXPORT_SYMBOL_GPL(sk_clear_memalloc); 320 321 int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) 322 { 323 int ret; 324 unsigned int noreclaim_flag; 325 326 /* these should have been dropped before queueing */ 327 BUG_ON(!sock_flag(sk, SOCK_MEMALLOC)); 328 329 noreclaim_flag = memalloc_noreclaim_save(); 330 ret = INDIRECT_CALL_INET(sk->sk_backlog_rcv, 331 tcp_v6_do_rcv, 332 tcp_v4_do_rcv, 333 sk, skb); 334 memalloc_noreclaim_restore(noreclaim_flag); 335 336 return ret; 337 } 338 EXPORT_SYMBOL(__sk_backlog_rcv); 339 340 void sk_error_report(struct sock *sk) 341 { 342 sk->sk_error_report(sk); 343 344 switch (sk->sk_family) { 345 case AF_INET: 346 fallthrough; 347 case AF_INET6: 348 trace_inet_sk_error_report(sk); 349 break; 350 default: 351 break; 352 } 353 } 354 EXPORT_SYMBOL(sk_error_report); 355 356 int sock_get_timeout(long timeo, void *optval, bool old_timeval) 357 { 358 struct __kernel_sock_timeval tv; 359 360 if (timeo == MAX_SCHEDULE_TIMEOUT) { 361 tv.tv_sec = 0; 362 tv.tv_usec = 0; 363 } else { 364 tv.tv_sec = timeo / HZ; 365 tv.tv_usec = ((timeo % HZ) * USEC_PER_SEC) / HZ; 366 } 367 368 if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) { 369 struct old_timeval32 tv32 = { tv.tv_sec, tv.tv_usec }; 370 *(struct old_timeval32 *)optval = tv32; 371 return sizeof(tv32); 372 } 373 374 if (old_timeval) { 375 struct __kernel_old_timeval old_tv; 376 old_tv.tv_sec = tv.tv_sec; 377 old_tv.tv_usec = tv.tv_usec; 378 *(struct __kernel_old_timeval *)optval = old_tv; 379 return sizeof(old_tv); 380 } 381 382 *(struct __kernel_sock_timeval *)optval = tv; 383 return sizeof(tv); 384 } 385 EXPORT_SYMBOL(sock_get_timeout); 386 387 int sock_copy_user_timeval(struct __kernel_sock_timeval *tv, 388 sockptr_t optval, int optlen, bool old_timeval) 389 { 390 if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) { 391 struct old_timeval32 tv32; 392 393 if (optlen < sizeof(tv32)) 394 return -EINVAL; 395 396 if (copy_from_sockptr(&tv32, optval, sizeof(tv32))) 397 return -EFAULT; 398 tv->tv_sec = tv32.tv_sec; 399 tv->tv_usec = tv32.tv_usec; 400 } else if (old_timeval) { 401 struct __kernel_old_timeval old_tv; 402 403 if (optlen < sizeof(old_tv)) 404 return -EINVAL; 405 if (copy_from_sockptr(&old_tv, optval, sizeof(old_tv))) 406 return -EFAULT; 407 tv->tv_sec = old_tv.tv_sec; 408 tv->tv_usec = old_tv.tv_usec; 409 } else { 410 if (optlen < sizeof(*tv)) 411 return -EINVAL; 412 if (copy_from_sockptr(tv, optval, sizeof(*tv))) 413 return -EFAULT; 414 } 415 416 return 0; 417 } 418 EXPORT_SYMBOL(sock_copy_user_timeval); 419 420 static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen, 421 bool old_timeval) 422 { 423 struct __kernel_sock_timeval tv; 424 int err = sock_copy_user_timeval(&tv, optval, optlen, old_timeval); 425 426 if (err) 427 return err; 428 429 if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC) 430 return -EDOM; 431 432 if (tv.tv_sec < 0) { 433 static int warned __read_mostly; 434 435 *timeo_p = 0; 436 if (warned < 10 && net_ratelimit()) { 437 warned++; 438 pr_info("%s: `%s' (pid %d) tries to set negative timeout\n", 439 __func__, current->comm, task_pid_nr(current)); 440 } 441 return 0; 442 } 443 *timeo_p = MAX_SCHEDULE_TIMEOUT; 444 if (tv.tv_sec == 0 && tv.tv_usec == 0) 445 return 0; 446 if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)) 447 *timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec, USEC_PER_SEC / HZ); 448 return 0; 449 } 450 451 static bool sock_needs_netstamp(const struct sock *sk) 452 { 453 switch (sk->sk_family) { 454 case AF_UNSPEC: 455 case AF_UNIX: 456 return false; 457 default: 458 return true; 459 } 460 } 461 462 static void sock_disable_timestamp(struct sock *sk, unsigned long flags) 463 { 464 if (sk->sk_flags & flags) { 465 sk->sk_flags &= ~flags; 466 if (sock_needs_netstamp(sk) && 467 !(sk->sk_flags & SK_FLAGS_TIMESTAMP)) 468 net_disable_timestamp(); 469 } 470 } 471 472 473 int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 474 { 475 unsigned long flags; 476 struct sk_buff_head *list = &sk->sk_receive_queue; 477 478 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) { 479 atomic_inc(&sk->sk_drops); 480 trace_sock_rcvqueue_full(sk, skb); 481 return -ENOMEM; 482 } 483 484 if (!sk_rmem_schedule(sk, skb, skb->truesize)) { 485 atomic_inc(&sk->sk_drops); 486 return -ENOBUFS; 487 } 488 489 skb->dev = NULL; 490 skb_set_owner_r(skb, sk); 491 492 /* we escape from rcu protected region, make sure we dont leak 493 * a norefcounted dst 494 */ 495 skb_dst_force(skb); 496 497 spin_lock_irqsave(&list->lock, flags); 498 sock_skb_set_dropcount(sk, skb); 499 __skb_queue_tail(list, skb); 500 spin_unlock_irqrestore(&list->lock, flags); 501 502 if (!sock_flag(sk, SOCK_DEAD)) 503 sk->sk_data_ready(sk); 504 return 0; 505 } 506 EXPORT_SYMBOL(__sock_queue_rcv_skb); 507 508 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 509 { 510 int err; 511 512 err = sk_filter(sk, skb); 513 if (err) 514 return err; 515 516 return __sock_queue_rcv_skb(sk, skb); 517 } 518 EXPORT_SYMBOL(sock_queue_rcv_skb); 519 520 int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, 521 const int nested, unsigned int trim_cap, bool refcounted) 522 { 523 int rc = NET_RX_SUCCESS; 524 525 if (sk_filter_trim_cap(sk, skb, trim_cap)) 526 goto discard_and_relse; 527 528 skb->dev = NULL; 529 530 if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { 531 atomic_inc(&sk->sk_drops); 532 goto discard_and_relse; 533 } 534 if (nested) 535 bh_lock_sock_nested(sk); 536 else 537 bh_lock_sock(sk); 538 if (!sock_owned_by_user(sk)) { 539 /* 540 * trylock + unlock semantics: 541 */ 542 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_); 543 544 rc = sk_backlog_rcv(sk, skb); 545 546 mutex_release(&sk->sk_lock.dep_map, _RET_IP_); 547 } else if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) { 548 bh_unlock_sock(sk); 549 atomic_inc(&sk->sk_drops); 550 goto discard_and_relse; 551 } 552 553 bh_unlock_sock(sk); 554 out: 555 if (refcounted) 556 sock_put(sk); 557 return rc; 558 discard_and_relse: 559 kfree_skb(skb); 560 goto out; 561 } 562 EXPORT_SYMBOL(__sk_receive_skb); 563 564 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ip6_dst_check(struct dst_entry *, 565 u32)); 566 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, 567 u32)); 568 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) 569 { 570 struct dst_entry *dst = __sk_dst_get(sk); 571 572 if (dst && dst->obsolete && 573 INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check, 574 dst, cookie) == NULL) { 575 sk_tx_queue_clear(sk); 576 sk->sk_dst_pending_confirm = 0; 577 RCU_INIT_POINTER(sk->sk_dst_cache, NULL); 578 dst_release(dst); 579 return NULL; 580 } 581 582 return dst; 583 } 584 EXPORT_SYMBOL(__sk_dst_check); 585 586 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie) 587 { 588 struct dst_entry *dst = sk_dst_get(sk); 589 590 if (dst && dst->obsolete && 591 INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check, 592 dst, cookie) == NULL) { 593 sk_dst_reset(sk); 594 dst_release(dst); 595 return NULL; 596 } 597 598 return dst; 599 } 600 EXPORT_SYMBOL(sk_dst_check); 601 602 static int sock_bindtoindex_locked(struct sock *sk, int ifindex) 603 { 604 int ret = -ENOPROTOOPT; 605 #ifdef CONFIG_NETDEVICES 606 struct net *net = sock_net(sk); 607 608 /* Sorry... */ 609 ret = -EPERM; 610 if (sk->sk_bound_dev_if && !ns_capable(net->user_ns, CAP_NET_RAW)) 611 goto out; 612 613 ret = -EINVAL; 614 if (ifindex < 0) 615 goto out; 616 617 sk->sk_bound_dev_if = ifindex; 618 if (sk->sk_prot->rehash) 619 sk->sk_prot->rehash(sk); 620 sk_dst_reset(sk); 621 622 ret = 0; 623 624 out: 625 #endif 626 627 return ret; 628 } 629 630 int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk) 631 { 632 int ret; 633 634 if (lock_sk) 635 lock_sock(sk); 636 ret = sock_bindtoindex_locked(sk, ifindex); 637 if (lock_sk) 638 release_sock(sk); 639 640 return ret; 641 } 642 EXPORT_SYMBOL(sock_bindtoindex); 643 644 static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen) 645 { 646 int ret = -ENOPROTOOPT; 647 #ifdef CONFIG_NETDEVICES 648 struct net *net = sock_net(sk); 649 char devname[IFNAMSIZ]; 650 int index; 651 652 ret = -EINVAL; 653 if (optlen < 0) 654 goto out; 655 656 /* Bind this socket to a particular device like "eth0", 657 * as specified in the passed interface name. If the 658 * name is "" or the option length is zero the socket 659 * is not bound. 660 */ 661 if (optlen > IFNAMSIZ - 1) 662 optlen = IFNAMSIZ - 1; 663 memset(devname, 0, sizeof(devname)); 664 665 ret = -EFAULT; 666 if (copy_from_sockptr(devname, optval, optlen)) 667 goto out; 668 669 index = 0; 670 if (devname[0] != '\0') { 671 struct net_device *dev; 672 673 rcu_read_lock(); 674 dev = dev_get_by_name_rcu(net, devname); 675 if (dev) 676 index = dev->ifindex; 677 rcu_read_unlock(); 678 ret = -ENODEV; 679 if (!dev) 680 goto out; 681 } 682 683 return sock_bindtoindex(sk, index, true); 684 out: 685 #endif 686 687 return ret; 688 } 689 690 static int sock_getbindtodevice(struct sock *sk, char __user *optval, 691 int __user *optlen, int len) 692 { 693 int ret = -ENOPROTOOPT; 694 #ifdef CONFIG_NETDEVICES 695 struct net *net = sock_net(sk); 696 char devname[IFNAMSIZ]; 697 698 if (sk->sk_bound_dev_if == 0) { 699 len = 0; 700 goto zero; 701 } 702 703 ret = -EINVAL; 704 if (len < IFNAMSIZ) 705 goto out; 706 707 ret = netdev_get_name(net, devname, sk->sk_bound_dev_if); 708 if (ret) 709 goto out; 710 711 len = strlen(devname) + 1; 712 713 ret = -EFAULT; 714 if (copy_to_user(optval, devname, len)) 715 goto out; 716 717 zero: 718 ret = -EFAULT; 719 if (put_user(len, optlen)) 720 goto out; 721 722 ret = 0; 723 724 out: 725 #endif 726 727 return ret; 728 } 729 730 bool sk_mc_loop(struct sock *sk) 731 { 732 if (dev_recursion_level()) 733 return false; 734 if (!sk) 735 return true; 736 switch (sk->sk_family) { 737 case AF_INET: 738 return inet_sk(sk)->mc_loop; 739 #if IS_ENABLED(CONFIG_IPV6) 740 case AF_INET6: 741 return inet6_sk(sk)->mc_loop; 742 #endif 743 } 744 WARN_ON_ONCE(1); 745 return true; 746 } 747 EXPORT_SYMBOL(sk_mc_loop); 748 749 void sock_set_reuseaddr(struct sock *sk) 750 { 751 lock_sock(sk); 752 sk->sk_reuse = SK_CAN_REUSE; 753 release_sock(sk); 754 } 755 EXPORT_SYMBOL(sock_set_reuseaddr); 756 757 void sock_set_reuseport(struct sock *sk) 758 { 759 lock_sock(sk); 760 sk->sk_reuseport = true; 761 release_sock(sk); 762 } 763 EXPORT_SYMBOL(sock_set_reuseport); 764 765 void sock_no_linger(struct sock *sk) 766 { 767 lock_sock(sk); 768 sk->sk_lingertime = 0; 769 sock_set_flag(sk, SOCK_LINGER); 770 release_sock(sk); 771 } 772 EXPORT_SYMBOL(sock_no_linger); 773 774 void sock_set_priority(struct sock *sk, u32 priority) 775 { 776 lock_sock(sk); 777 sk->sk_priority = priority; 778 release_sock(sk); 779 } 780 EXPORT_SYMBOL(sock_set_priority); 781 782 void sock_set_sndtimeo(struct sock *sk, s64 secs) 783 { 784 lock_sock(sk); 785 if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1) 786 sk->sk_sndtimeo = secs * HZ; 787 else 788 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; 789 release_sock(sk); 790 } 791 EXPORT_SYMBOL(sock_set_sndtimeo); 792 793 static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns) 794 { 795 if (val) { 796 sock_valbool_flag(sk, SOCK_TSTAMP_NEW, new); 797 sock_valbool_flag(sk, SOCK_RCVTSTAMPNS, ns); 798 sock_set_flag(sk, SOCK_RCVTSTAMP); 799 sock_enable_timestamp(sk, SOCK_TIMESTAMP); 800 } else { 801 sock_reset_flag(sk, SOCK_RCVTSTAMP); 802 sock_reset_flag(sk, SOCK_RCVTSTAMPNS); 803 } 804 } 805 806 void sock_enable_timestamps(struct sock *sk) 807 { 808 lock_sock(sk); 809 __sock_set_timestamps(sk, true, false, true); 810 release_sock(sk); 811 } 812 EXPORT_SYMBOL(sock_enable_timestamps); 813 814 void sock_set_timestamp(struct sock *sk, int optname, bool valbool) 815 { 816 switch (optname) { 817 case SO_TIMESTAMP_OLD: 818 __sock_set_timestamps(sk, valbool, false, false); 819 break; 820 case SO_TIMESTAMP_NEW: 821 __sock_set_timestamps(sk, valbool, true, false); 822 break; 823 case SO_TIMESTAMPNS_OLD: 824 __sock_set_timestamps(sk, valbool, false, true); 825 break; 826 case SO_TIMESTAMPNS_NEW: 827 __sock_set_timestamps(sk, valbool, true, true); 828 break; 829 } 830 } 831 832 static int sock_timestamping_bind_phc(struct sock *sk, int phc_index) 833 { 834 struct net *net = sock_net(sk); 835 struct net_device *dev = NULL; 836 bool match = false; 837 int *vclock_index; 838 int i, num; 839 840 if (sk->sk_bound_dev_if) 841 dev = dev_get_by_index(net, sk->sk_bound_dev_if); 842 843 if (!dev) { 844 pr_err("%s: sock not bind to device\n", __func__); 845 return -EOPNOTSUPP; 846 } 847 848 num = ethtool_get_phc_vclocks(dev, &vclock_index); 849 dev_put(dev); 850 851 for (i = 0; i < num; i++) { 852 if (*(vclock_index + i) == phc_index) { 853 match = true; 854 break; 855 } 856 } 857 858 if (num > 0) 859 kfree(vclock_index); 860 861 if (!match) 862 return -EINVAL; 863 864 sk->sk_bind_phc = phc_index; 865 866 return 0; 867 } 868 869 int sock_set_timestamping(struct sock *sk, int optname, 870 struct so_timestamping timestamping) 871 { 872 int val = timestamping.flags; 873 int ret; 874 875 if (val & ~SOF_TIMESTAMPING_MASK) 876 return -EINVAL; 877 878 if (val & SOF_TIMESTAMPING_OPT_ID && 879 !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) { 880 if (sk_is_tcp(sk)) { 881 if ((1 << sk->sk_state) & 882 (TCPF_CLOSE | TCPF_LISTEN)) 883 return -EINVAL; 884 atomic_set(&sk->sk_tskey, tcp_sk(sk)->snd_una); 885 } else { 886 atomic_set(&sk->sk_tskey, 0); 887 } 888 } 889 890 if (val & SOF_TIMESTAMPING_OPT_STATS && 891 !(val & SOF_TIMESTAMPING_OPT_TSONLY)) 892 return -EINVAL; 893 894 if (val & SOF_TIMESTAMPING_BIND_PHC) { 895 ret = sock_timestamping_bind_phc(sk, timestamping.bind_phc); 896 if (ret) 897 return ret; 898 } 899 900 sk->sk_tsflags = val; 901 sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW); 902 903 if (val & SOF_TIMESTAMPING_RX_SOFTWARE) 904 sock_enable_timestamp(sk, 905 SOCK_TIMESTAMPING_RX_SOFTWARE); 906 else 907 sock_disable_timestamp(sk, 908 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)); 909 return 0; 910 } 911 912 void sock_set_keepalive(struct sock *sk) 913 { 914 lock_sock(sk); 915 if (sk->sk_prot->keepalive) 916 sk->sk_prot->keepalive(sk, true); 917 sock_valbool_flag(sk, SOCK_KEEPOPEN, true); 918 release_sock(sk); 919 } 920 EXPORT_SYMBOL(sock_set_keepalive); 921 922 static void __sock_set_rcvbuf(struct sock *sk, int val) 923 { 924 /* Ensure val * 2 fits into an int, to prevent max_t() from treating it 925 * as a negative value. 926 */ 927 val = min_t(int, val, INT_MAX / 2); 928 sk->sk_userlocks |= SOCK_RCVBUF_LOCK; 929 930 /* We double it on the way in to account for "struct sk_buff" etc. 931 * overhead. Applications assume that the SO_RCVBUF setting they make 932 * will allow that much actual data to be received on that socket. 933 * 934 * Applications are unaware that "struct sk_buff" and other overheads 935 * allocate from the receive buffer during socket buffer allocation. 936 * 937 * And after considering the possible alternatives, returning the value 938 * we actually used in getsockopt is the most desirable behavior. 939 */ 940 WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF)); 941 } 942 943 void sock_set_rcvbuf(struct sock *sk, int val) 944 { 945 lock_sock(sk); 946 __sock_set_rcvbuf(sk, val); 947 release_sock(sk); 948 } 949 EXPORT_SYMBOL(sock_set_rcvbuf); 950 951 static void __sock_set_mark(struct sock *sk, u32 val) 952 { 953 if (val != sk->sk_mark) { 954 sk->sk_mark = val; 955 sk_dst_reset(sk); 956 } 957 } 958 959 void sock_set_mark(struct sock *sk, u32 val) 960 { 961 lock_sock(sk); 962 __sock_set_mark(sk, val); 963 release_sock(sk); 964 } 965 EXPORT_SYMBOL(sock_set_mark); 966 967 static void sock_release_reserved_memory(struct sock *sk, int bytes) 968 { 969 /* Round down bytes to multiple of pages */ 970 bytes &= ~(SK_MEM_QUANTUM - 1); 971 972 WARN_ON(bytes > sk->sk_reserved_mem); 973 sk->sk_reserved_mem -= bytes; 974 sk_mem_reclaim(sk); 975 } 976 977 static int sock_reserve_memory(struct sock *sk, int bytes) 978 { 979 long allocated; 980 bool charged; 981 int pages; 982 983 if (!mem_cgroup_sockets_enabled || !sk->sk_memcg || !sk_has_account(sk)) 984 return -EOPNOTSUPP; 985 986 if (!bytes) 987 return 0; 988 989 pages = sk_mem_pages(bytes); 990 991 /* pre-charge to memcg */ 992 charged = mem_cgroup_charge_skmem(sk->sk_memcg, pages, 993 GFP_KERNEL | __GFP_RETRY_MAYFAIL); 994 if (!charged) 995 return -ENOMEM; 996 997 /* pre-charge to forward_alloc */ 998 allocated = sk_memory_allocated_add(sk, pages); 999 /* If the system goes into memory pressure with this 1000 * precharge, give up and return error. 1001 */ 1002 if (allocated > sk_prot_mem_limits(sk, 1)) { 1003 sk_memory_allocated_sub(sk, pages); 1004 mem_cgroup_uncharge_skmem(sk->sk_memcg, pages); 1005 return -ENOMEM; 1006 } 1007 sk->sk_forward_alloc += pages << SK_MEM_QUANTUM_SHIFT; 1008 1009 sk->sk_reserved_mem += pages << SK_MEM_QUANTUM_SHIFT; 1010 1011 return 0; 1012 } 1013 1014 /* 1015 * This is meant for all protocols to use and covers goings on 1016 * at the socket level. Everything here is generic. 1017 */ 1018 1019 int sock_setsockopt(struct socket *sock, int level, int optname, 1020 sockptr_t optval, unsigned int optlen) 1021 { 1022 struct so_timestamping timestamping; 1023 struct sock_txtime sk_txtime; 1024 struct sock *sk = sock->sk; 1025 int val; 1026 int valbool; 1027 struct linger ling; 1028 int ret = 0; 1029 1030 /* 1031 * Options without arguments 1032 */ 1033 1034 if (optname == SO_BINDTODEVICE) 1035 return sock_setbindtodevice(sk, optval, optlen); 1036 1037 if (optlen < sizeof(int)) 1038 return -EINVAL; 1039 1040 if (copy_from_sockptr(&val, optval, sizeof(val))) 1041 return -EFAULT; 1042 1043 valbool = val ? 1 : 0; 1044 1045 lock_sock(sk); 1046 1047 switch (optname) { 1048 case SO_DEBUG: 1049 if (val && !capable(CAP_NET_ADMIN)) 1050 ret = -EACCES; 1051 else 1052 sock_valbool_flag(sk, SOCK_DBG, valbool); 1053 break; 1054 case SO_REUSEADDR: 1055 sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE); 1056 break; 1057 case SO_REUSEPORT: 1058 sk->sk_reuseport = valbool; 1059 break; 1060 case SO_TYPE: 1061 case SO_PROTOCOL: 1062 case SO_DOMAIN: 1063 case SO_ERROR: 1064 ret = -ENOPROTOOPT; 1065 break; 1066 case SO_DONTROUTE: 1067 sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool); 1068 sk_dst_reset(sk); 1069 break; 1070 case SO_BROADCAST: 1071 sock_valbool_flag(sk, SOCK_BROADCAST, valbool); 1072 break; 1073 case SO_SNDBUF: 1074 /* Don't error on this BSD doesn't and if you think 1075 * about it this is right. Otherwise apps have to 1076 * play 'guess the biggest size' games. RCVBUF/SNDBUF 1077 * are treated in BSD as hints 1078 */ 1079 val = min_t(u32, val, sysctl_wmem_max); 1080 set_sndbuf: 1081 /* Ensure val * 2 fits into an int, to prevent max_t() 1082 * from treating it as a negative value. 1083 */ 1084 val = min_t(int, val, INT_MAX / 2); 1085 sk->sk_userlocks |= SOCK_SNDBUF_LOCK; 1086 WRITE_ONCE(sk->sk_sndbuf, 1087 max_t(int, val * 2, SOCK_MIN_SNDBUF)); 1088 /* Wake up sending tasks if we upped the value. */ 1089 sk->sk_write_space(sk); 1090 break; 1091 1092 case SO_SNDBUFFORCE: 1093 if (!capable(CAP_NET_ADMIN)) { 1094 ret = -EPERM; 1095 break; 1096 } 1097 1098 /* No negative values (to prevent underflow, as val will be 1099 * multiplied by 2). 1100 */ 1101 if (val < 0) 1102 val = 0; 1103 goto set_sndbuf; 1104 1105 case SO_RCVBUF: 1106 /* Don't error on this BSD doesn't and if you think 1107 * about it this is right. Otherwise apps have to 1108 * play 'guess the biggest size' games. RCVBUF/SNDBUF 1109 * are treated in BSD as hints 1110 */ 1111 __sock_set_rcvbuf(sk, min_t(u32, val, sysctl_rmem_max)); 1112 break; 1113 1114 case SO_RCVBUFFORCE: 1115 if (!capable(CAP_NET_ADMIN)) { 1116 ret = -EPERM; 1117 break; 1118 } 1119 1120 /* No negative values (to prevent underflow, as val will be 1121 * multiplied by 2). 1122 */ 1123 __sock_set_rcvbuf(sk, max(val, 0)); 1124 break; 1125 1126 case SO_KEEPALIVE: 1127 if (sk->sk_prot->keepalive) 1128 sk->sk_prot->keepalive(sk, valbool); 1129 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool); 1130 break; 1131 1132 case SO_OOBINLINE: 1133 sock_valbool_flag(sk, SOCK_URGINLINE, valbool); 1134 break; 1135 1136 case SO_NO_CHECK: 1137 sk->sk_no_check_tx = valbool; 1138 break; 1139 1140 case SO_PRIORITY: 1141 if ((val >= 0 && val <= 6) || 1142 ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) || 1143 ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 1144 sk->sk_priority = val; 1145 else 1146 ret = -EPERM; 1147 break; 1148 1149 case SO_LINGER: 1150 if (optlen < sizeof(ling)) { 1151 ret = -EINVAL; /* 1003.1g */ 1152 break; 1153 } 1154 if (copy_from_sockptr(&ling, optval, sizeof(ling))) { 1155 ret = -EFAULT; 1156 break; 1157 } 1158 if (!ling.l_onoff) 1159 sock_reset_flag(sk, SOCK_LINGER); 1160 else { 1161 #if (BITS_PER_LONG == 32) 1162 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ) 1163 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT; 1164 else 1165 #endif 1166 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ; 1167 sock_set_flag(sk, SOCK_LINGER); 1168 } 1169 break; 1170 1171 case SO_BSDCOMPAT: 1172 break; 1173 1174 case SO_PASSCRED: 1175 if (valbool) 1176 set_bit(SOCK_PASSCRED, &sock->flags); 1177 else 1178 clear_bit(SOCK_PASSCRED, &sock->flags); 1179 break; 1180 1181 case SO_TIMESTAMP_OLD: 1182 case SO_TIMESTAMP_NEW: 1183 case SO_TIMESTAMPNS_OLD: 1184 case SO_TIMESTAMPNS_NEW: 1185 sock_set_timestamp(sk, optname, valbool); 1186 break; 1187 1188 case SO_TIMESTAMPING_NEW: 1189 case SO_TIMESTAMPING_OLD: 1190 if (optlen == sizeof(timestamping)) { 1191 if (copy_from_sockptr(×tamping, optval, 1192 sizeof(timestamping))) { 1193 ret = -EFAULT; 1194 break; 1195 } 1196 } else { 1197 memset(×tamping, 0, sizeof(timestamping)); 1198 timestamping.flags = val; 1199 } 1200 ret = sock_set_timestamping(sk, optname, timestamping); 1201 break; 1202 1203 case SO_RCVLOWAT: 1204 if (val < 0) 1205 val = INT_MAX; 1206 if (sock->ops->set_rcvlowat) 1207 ret = sock->ops->set_rcvlowat(sk, val); 1208 else 1209 WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); 1210 break; 1211 1212 case SO_RCVTIMEO_OLD: 1213 case SO_RCVTIMEO_NEW: 1214 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, 1215 optlen, optname == SO_RCVTIMEO_OLD); 1216 break; 1217 1218 case SO_SNDTIMEO_OLD: 1219 case SO_SNDTIMEO_NEW: 1220 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, 1221 optlen, optname == SO_SNDTIMEO_OLD); 1222 break; 1223 1224 case SO_ATTACH_FILTER: { 1225 struct sock_fprog fprog; 1226 1227 ret = copy_bpf_fprog_from_user(&fprog, optval, optlen); 1228 if (!ret) 1229 ret = sk_attach_filter(&fprog, sk); 1230 break; 1231 } 1232 case SO_ATTACH_BPF: 1233 ret = -EINVAL; 1234 if (optlen == sizeof(u32)) { 1235 u32 ufd; 1236 1237 ret = -EFAULT; 1238 if (copy_from_sockptr(&ufd, optval, sizeof(ufd))) 1239 break; 1240 1241 ret = sk_attach_bpf(ufd, sk); 1242 } 1243 break; 1244 1245 case SO_ATTACH_REUSEPORT_CBPF: { 1246 struct sock_fprog fprog; 1247 1248 ret = copy_bpf_fprog_from_user(&fprog, optval, optlen); 1249 if (!ret) 1250 ret = sk_reuseport_attach_filter(&fprog, sk); 1251 break; 1252 } 1253 case SO_ATTACH_REUSEPORT_EBPF: 1254 ret = -EINVAL; 1255 if (optlen == sizeof(u32)) { 1256 u32 ufd; 1257 1258 ret = -EFAULT; 1259 if (copy_from_sockptr(&ufd, optval, sizeof(ufd))) 1260 break; 1261 1262 ret = sk_reuseport_attach_bpf(ufd, sk); 1263 } 1264 break; 1265 1266 case SO_DETACH_REUSEPORT_BPF: 1267 ret = reuseport_detach_prog(sk); 1268 break; 1269 1270 case SO_DETACH_FILTER: 1271 ret = sk_detach_filter(sk); 1272 break; 1273 1274 case SO_LOCK_FILTER: 1275 if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool) 1276 ret = -EPERM; 1277 else 1278 sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool); 1279 break; 1280 1281 case SO_PASSSEC: 1282 if (valbool) 1283 set_bit(SOCK_PASSSEC, &sock->flags); 1284 else 1285 clear_bit(SOCK_PASSSEC, &sock->flags); 1286 break; 1287 case SO_MARK: 1288 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && 1289 !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { 1290 ret = -EPERM; 1291 break; 1292 } 1293 1294 __sock_set_mark(sk, val); 1295 break; 1296 1297 case SO_RXQ_OVFL: 1298 sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool); 1299 break; 1300 1301 case SO_WIFI_STATUS: 1302 sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool); 1303 break; 1304 1305 case SO_PEEK_OFF: 1306 if (sock->ops->set_peek_off) 1307 ret = sock->ops->set_peek_off(sk, val); 1308 else 1309 ret = -EOPNOTSUPP; 1310 break; 1311 1312 case SO_NOFCS: 1313 sock_valbool_flag(sk, SOCK_NOFCS, valbool); 1314 break; 1315 1316 case SO_SELECT_ERR_QUEUE: 1317 sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool); 1318 break; 1319 1320 #ifdef CONFIG_NET_RX_BUSY_POLL 1321 case SO_BUSY_POLL: 1322 /* allow unprivileged users to decrease the value */ 1323 if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN)) 1324 ret = -EPERM; 1325 else { 1326 if (val < 0) 1327 ret = -EINVAL; 1328 else 1329 WRITE_ONCE(sk->sk_ll_usec, val); 1330 } 1331 break; 1332 case SO_PREFER_BUSY_POLL: 1333 if (valbool && !capable(CAP_NET_ADMIN)) 1334 ret = -EPERM; 1335 else 1336 WRITE_ONCE(sk->sk_prefer_busy_poll, valbool); 1337 break; 1338 case SO_BUSY_POLL_BUDGET: 1339 if (val > READ_ONCE(sk->sk_busy_poll_budget) && !capable(CAP_NET_ADMIN)) { 1340 ret = -EPERM; 1341 } else { 1342 if (val < 0 || val > U16_MAX) 1343 ret = -EINVAL; 1344 else 1345 WRITE_ONCE(sk->sk_busy_poll_budget, val); 1346 } 1347 break; 1348 #endif 1349 1350 case SO_MAX_PACING_RATE: 1351 { 1352 unsigned long ulval = (val == ~0U) ? ~0UL : (unsigned int)val; 1353 1354 if (sizeof(ulval) != sizeof(val) && 1355 optlen >= sizeof(ulval) && 1356 copy_from_sockptr(&ulval, optval, sizeof(ulval))) { 1357 ret = -EFAULT; 1358 break; 1359 } 1360 if (ulval != ~0UL) 1361 cmpxchg(&sk->sk_pacing_status, 1362 SK_PACING_NONE, 1363 SK_PACING_NEEDED); 1364 sk->sk_max_pacing_rate = ulval; 1365 sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval); 1366 break; 1367 } 1368 case SO_INCOMING_CPU: 1369 WRITE_ONCE(sk->sk_incoming_cpu, val); 1370 break; 1371 1372 case SO_CNX_ADVICE: 1373 if (val == 1) 1374 dst_negative_advice(sk); 1375 break; 1376 1377 case SO_ZEROCOPY: 1378 if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) { 1379 if (!(sk_is_tcp(sk) || 1380 (sk->sk_type == SOCK_DGRAM && 1381 sk->sk_protocol == IPPROTO_UDP))) 1382 ret = -EOPNOTSUPP; 1383 } else if (sk->sk_family != PF_RDS) { 1384 ret = -EOPNOTSUPP; 1385 } 1386 if (!ret) { 1387 if (val < 0 || val > 1) 1388 ret = -EINVAL; 1389 else 1390 sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool); 1391 } 1392 break; 1393 1394 case SO_TXTIME: 1395 if (optlen != sizeof(struct sock_txtime)) { 1396 ret = -EINVAL; 1397 break; 1398 } else if (copy_from_sockptr(&sk_txtime, optval, 1399 sizeof(struct sock_txtime))) { 1400 ret = -EFAULT; 1401 break; 1402 } else if (sk_txtime.flags & ~SOF_TXTIME_FLAGS_MASK) { 1403 ret = -EINVAL; 1404 break; 1405 } 1406 /* CLOCK_MONOTONIC is only used by sch_fq, and this packet 1407 * scheduler has enough safe guards. 1408 */ 1409 if (sk_txtime.clockid != CLOCK_MONOTONIC && 1410 !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { 1411 ret = -EPERM; 1412 break; 1413 } 1414 sock_valbool_flag(sk, SOCK_TXTIME, true); 1415 sk->sk_clockid = sk_txtime.clockid; 1416 sk->sk_txtime_deadline_mode = 1417 !!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE); 1418 sk->sk_txtime_report_errors = 1419 !!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS); 1420 break; 1421 1422 case SO_BINDTOIFINDEX: 1423 ret = sock_bindtoindex_locked(sk, val); 1424 break; 1425 1426 case SO_BUF_LOCK: 1427 if (val & ~SOCK_BUF_LOCK_MASK) { 1428 ret = -EINVAL; 1429 break; 1430 } 1431 sk->sk_userlocks = val | (sk->sk_userlocks & 1432 ~SOCK_BUF_LOCK_MASK); 1433 break; 1434 1435 case SO_RESERVE_MEM: 1436 { 1437 int delta; 1438 1439 if (val < 0) { 1440 ret = -EINVAL; 1441 break; 1442 } 1443 1444 delta = val - sk->sk_reserved_mem; 1445 if (delta < 0) 1446 sock_release_reserved_memory(sk, -delta); 1447 else 1448 ret = sock_reserve_memory(sk, delta); 1449 break; 1450 } 1451 1452 case SO_TXREHASH: 1453 if (val < -1 || val > 1) { 1454 ret = -EINVAL; 1455 break; 1456 } 1457 /* Paired with READ_ONCE() in tcp_rtx_synack() */ 1458 WRITE_ONCE(sk->sk_txrehash, (u8)val); 1459 break; 1460 1461 default: 1462 ret = -ENOPROTOOPT; 1463 break; 1464 } 1465 release_sock(sk); 1466 return ret; 1467 } 1468 EXPORT_SYMBOL(sock_setsockopt); 1469 1470 static const struct cred *sk_get_peer_cred(struct sock *sk) 1471 { 1472 const struct cred *cred; 1473 1474 spin_lock(&sk->sk_peer_lock); 1475 cred = get_cred(sk->sk_peer_cred); 1476 spin_unlock(&sk->sk_peer_lock); 1477 1478 return cred; 1479 } 1480 1481 static void cred_to_ucred(struct pid *pid, const struct cred *cred, 1482 struct ucred *ucred) 1483 { 1484 ucred->pid = pid_vnr(pid); 1485 ucred->uid = ucred->gid = -1; 1486 if (cred) { 1487 struct user_namespace *current_ns = current_user_ns(); 1488 1489 ucred->uid = from_kuid_munged(current_ns, cred->euid); 1490 ucred->gid = from_kgid_munged(current_ns, cred->egid); 1491 } 1492 } 1493 1494 static int groups_to_user(gid_t __user *dst, const struct group_info *src) 1495 { 1496 struct user_namespace *user_ns = current_user_ns(); 1497 int i; 1498 1499 for (i = 0; i < src->ngroups; i++) 1500 if (put_user(from_kgid_munged(user_ns, src->gid[i]), dst + i)) 1501 return -EFAULT; 1502 1503 return 0; 1504 } 1505 1506 int sock_getsockopt(struct socket *sock, int level, int optname, 1507 char __user *optval, int __user *optlen) 1508 { 1509 struct sock *sk = sock->sk; 1510 1511 union { 1512 int val; 1513 u64 val64; 1514 unsigned long ulval; 1515 struct linger ling; 1516 struct old_timeval32 tm32; 1517 struct __kernel_old_timeval tm; 1518 struct __kernel_sock_timeval stm; 1519 struct sock_txtime txtime; 1520 struct so_timestamping timestamping; 1521 } v; 1522 1523 int lv = sizeof(int); 1524 int len; 1525 1526 if (get_user(len, optlen)) 1527 return -EFAULT; 1528 if (len < 0) 1529 return -EINVAL; 1530 1531 memset(&v, 0, sizeof(v)); 1532 1533 switch (optname) { 1534 case SO_DEBUG: 1535 v.val = sock_flag(sk, SOCK_DBG); 1536 break; 1537 1538 case SO_DONTROUTE: 1539 v.val = sock_flag(sk, SOCK_LOCALROUTE); 1540 break; 1541 1542 case SO_BROADCAST: 1543 v.val = sock_flag(sk, SOCK_BROADCAST); 1544 break; 1545 1546 case SO_SNDBUF: 1547 v.val = sk->sk_sndbuf; 1548 break; 1549 1550 case SO_RCVBUF: 1551 v.val = sk->sk_rcvbuf; 1552 break; 1553 1554 case SO_REUSEADDR: 1555 v.val = sk->sk_reuse; 1556 break; 1557 1558 case SO_REUSEPORT: 1559 v.val = sk->sk_reuseport; 1560 break; 1561 1562 case SO_KEEPALIVE: 1563 v.val = sock_flag(sk, SOCK_KEEPOPEN); 1564 break; 1565 1566 case SO_TYPE: 1567 v.val = sk->sk_type; 1568 break; 1569 1570 case SO_PROTOCOL: 1571 v.val = sk->sk_protocol; 1572 break; 1573 1574 case SO_DOMAIN: 1575 v.val = sk->sk_family; 1576 break; 1577 1578 case SO_ERROR: 1579 v.val = -sock_error(sk); 1580 if (v.val == 0) 1581 v.val = xchg(&sk->sk_err_soft, 0); 1582 break; 1583 1584 case SO_OOBINLINE: 1585 v.val = sock_flag(sk, SOCK_URGINLINE); 1586 break; 1587 1588 case SO_NO_CHECK: 1589 v.val = sk->sk_no_check_tx; 1590 break; 1591 1592 case SO_PRIORITY: 1593 v.val = sk->sk_priority; 1594 break; 1595 1596 case SO_LINGER: 1597 lv = sizeof(v.ling); 1598 v.ling.l_onoff = sock_flag(sk, SOCK_LINGER); 1599 v.ling.l_linger = sk->sk_lingertime / HZ; 1600 break; 1601 1602 case SO_BSDCOMPAT: 1603 break; 1604 1605 case SO_TIMESTAMP_OLD: 1606 v.val = sock_flag(sk, SOCK_RCVTSTAMP) && 1607 !sock_flag(sk, SOCK_TSTAMP_NEW) && 1608 !sock_flag(sk, SOCK_RCVTSTAMPNS); 1609 break; 1610 1611 case SO_TIMESTAMPNS_OLD: 1612 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && !sock_flag(sk, SOCK_TSTAMP_NEW); 1613 break; 1614 1615 case SO_TIMESTAMP_NEW: 1616 v.val = sock_flag(sk, SOCK_RCVTSTAMP) && sock_flag(sk, SOCK_TSTAMP_NEW); 1617 break; 1618 1619 case SO_TIMESTAMPNS_NEW: 1620 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && sock_flag(sk, SOCK_TSTAMP_NEW); 1621 break; 1622 1623 case SO_TIMESTAMPING_OLD: 1624 lv = sizeof(v.timestamping); 1625 v.timestamping.flags = sk->sk_tsflags; 1626 v.timestamping.bind_phc = sk->sk_bind_phc; 1627 break; 1628 1629 case SO_RCVTIMEO_OLD: 1630 case SO_RCVTIMEO_NEW: 1631 lv = sock_get_timeout(sk->sk_rcvtimeo, &v, SO_RCVTIMEO_OLD == optname); 1632 break; 1633 1634 case SO_SNDTIMEO_OLD: 1635 case SO_SNDTIMEO_NEW: 1636 lv = sock_get_timeout(sk->sk_sndtimeo, &v, SO_SNDTIMEO_OLD == optname); 1637 break; 1638 1639 case SO_RCVLOWAT: 1640 v.val = sk->sk_rcvlowat; 1641 break; 1642 1643 case SO_SNDLOWAT: 1644 v.val = 1; 1645 break; 1646 1647 case SO_PASSCRED: 1648 v.val = !!test_bit(SOCK_PASSCRED, &sock->flags); 1649 break; 1650 1651 case SO_PEERCRED: 1652 { 1653 struct ucred peercred; 1654 if (len > sizeof(peercred)) 1655 len = sizeof(peercred); 1656 1657 spin_lock(&sk->sk_peer_lock); 1658 cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred); 1659 spin_unlock(&sk->sk_peer_lock); 1660 1661 if (copy_to_user(optval, &peercred, len)) 1662 return -EFAULT; 1663 goto lenout; 1664 } 1665 1666 case SO_PEERGROUPS: 1667 { 1668 const struct cred *cred; 1669 int ret, n; 1670 1671 cred = sk_get_peer_cred(sk); 1672 if (!cred) 1673 return -ENODATA; 1674 1675 n = cred->group_info->ngroups; 1676 if (len < n * sizeof(gid_t)) { 1677 len = n * sizeof(gid_t); 1678 put_cred(cred); 1679 return put_user(len, optlen) ? -EFAULT : -ERANGE; 1680 } 1681 len = n * sizeof(gid_t); 1682 1683 ret = groups_to_user((gid_t __user *)optval, cred->group_info); 1684 put_cred(cred); 1685 if (ret) 1686 return ret; 1687 goto lenout; 1688 } 1689 1690 case SO_PEERNAME: 1691 { 1692 char address[128]; 1693 1694 lv = sock->ops->getname(sock, (struct sockaddr *)address, 2); 1695 if (lv < 0) 1696 return -ENOTCONN; 1697 if (lv < len) 1698 return -EINVAL; 1699 if (copy_to_user(optval, address, len)) 1700 return -EFAULT; 1701 goto lenout; 1702 } 1703 1704 /* Dubious BSD thing... Probably nobody even uses it, but 1705 * the UNIX standard wants it for whatever reason... -DaveM 1706 */ 1707 case SO_ACCEPTCONN: 1708 v.val = sk->sk_state == TCP_LISTEN; 1709 break; 1710 1711 case SO_PASSSEC: 1712 v.val = !!test_bit(SOCK_PASSSEC, &sock->flags); 1713 break; 1714 1715 case SO_PEERSEC: 1716 return security_socket_getpeersec_stream(sock, optval, optlen, len); 1717 1718 case SO_MARK: 1719 v.val = sk->sk_mark; 1720 break; 1721 1722 case SO_RXQ_OVFL: 1723 v.val = sock_flag(sk, SOCK_RXQ_OVFL); 1724 break; 1725 1726 case SO_WIFI_STATUS: 1727 v.val = sock_flag(sk, SOCK_WIFI_STATUS); 1728 break; 1729 1730 case SO_PEEK_OFF: 1731 if (!sock->ops->set_peek_off) 1732 return -EOPNOTSUPP; 1733 1734 v.val = sk->sk_peek_off; 1735 break; 1736 case SO_NOFCS: 1737 v.val = sock_flag(sk, SOCK_NOFCS); 1738 break; 1739 1740 case SO_BINDTODEVICE: 1741 return sock_getbindtodevice(sk, optval, optlen, len); 1742 1743 case SO_GET_FILTER: 1744 len = sk_get_filter(sk, (struct sock_filter __user *)optval, len); 1745 if (len < 0) 1746 return len; 1747 1748 goto lenout; 1749 1750 case SO_LOCK_FILTER: 1751 v.val = sock_flag(sk, SOCK_FILTER_LOCKED); 1752 break; 1753 1754 case SO_BPF_EXTENSIONS: 1755 v.val = bpf_tell_extensions(); 1756 break; 1757 1758 case SO_SELECT_ERR_QUEUE: 1759 v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE); 1760 break; 1761 1762 #ifdef CONFIG_NET_RX_BUSY_POLL 1763 case SO_BUSY_POLL: 1764 v.val = sk->sk_ll_usec; 1765 break; 1766 case SO_PREFER_BUSY_POLL: 1767 v.val = READ_ONCE(sk->sk_prefer_busy_poll); 1768 break; 1769 #endif 1770 1771 case SO_MAX_PACING_RATE: 1772 if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) { 1773 lv = sizeof(v.ulval); 1774 v.ulval = sk->sk_max_pacing_rate; 1775 } else { 1776 /* 32bit version */ 1777 v.val = min_t(unsigned long, sk->sk_max_pacing_rate, ~0U); 1778 } 1779 break; 1780 1781 case SO_INCOMING_CPU: 1782 v.val = READ_ONCE(sk->sk_incoming_cpu); 1783 break; 1784 1785 case SO_MEMINFO: 1786 { 1787 u32 meminfo[SK_MEMINFO_VARS]; 1788 1789 sk_get_meminfo(sk, meminfo); 1790 1791 len = min_t(unsigned int, len, sizeof(meminfo)); 1792 if (copy_to_user(optval, &meminfo, len)) 1793 return -EFAULT; 1794 1795 goto lenout; 1796 } 1797 1798 #ifdef CONFIG_NET_RX_BUSY_POLL 1799 case SO_INCOMING_NAPI_ID: 1800 v.val = READ_ONCE(sk->sk_napi_id); 1801 1802 /* aggregate non-NAPI IDs down to 0 */ 1803 if (v.val < MIN_NAPI_ID) 1804 v.val = 0; 1805 1806 break; 1807 #endif 1808 1809 case SO_COOKIE: 1810 lv = sizeof(u64); 1811 if (len < lv) 1812 return -EINVAL; 1813 v.val64 = sock_gen_cookie(sk); 1814 break; 1815 1816 case SO_ZEROCOPY: 1817 v.val = sock_flag(sk, SOCK_ZEROCOPY); 1818 break; 1819 1820 case SO_TXTIME: 1821 lv = sizeof(v.txtime); 1822 v.txtime.clockid = sk->sk_clockid; 1823 v.txtime.flags |= sk->sk_txtime_deadline_mode ? 1824 SOF_TXTIME_DEADLINE_MODE : 0; 1825 v.txtime.flags |= sk->sk_txtime_report_errors ? 1826 SOF_TXTIME_REPORT_ERRORS : 0; 1827 break; 1828 1829 case SO_BINDTOIFINDEX: 1830 v.val = sk->sk_bound_dev_if; 1831 break; 1832 1833 case SO_NETNS_COOKIE: 1834 lv = sizeof(u64); 1835 if (len != lv) 1836 return -EINVAL; 1837 v.val64 = sock_net(sk)->net_cookie; 1838 break; 1839 1840 case SO_BUF_LOCK: 1841 v.val = sk->sk_userlocks & SOCK_BUF_LOCK_MASK; 1842 break; 1843 1844 case SO_RESERVE_MEM: 1845 v.val = sk->sk_reserved_mem; 1846 break; 1847 1848 case SO_TXREHASH: 1849 v.val = sk->sk_txrehash; 1850 break; 1851 1852 default: 1853 /* We implement the SO_SNDLOWAT etc to not be settable 1854 * (1003.1g 7). 1855 */ 1856 return -ENOPROTOOPT; 1857 } 1858 1859 if (len > lv) 1860 len = lv; 1861 if (copy_to_user(optval, &v, len)) 1862 return -EFAULT; 1863 lenout: 1864 if (put_user(len, optlen)) 1865 return -EFAULT; 1866 return 0; 1867 } 1868 1869 /* 1870 * Initialize an sk_lock. 1871 * 1872 * (We also register the sk_lock with the lock validator.) 1873 */ 1874 static inline void sock_lock_init(struct sock *sk) 1875 { 1876 if (sk->sk_kern_sock) 1877 sock_lock_init_class_and_name( 1878 sk, 1879 af_family_kern_slock_key_strings[sk->sk_family], 1880 af_family_kern_slock_keys + sk->sk_family, 1881 af_family_kern_key_strings[sk->sk_family], 1882 af_family_kern_keys + sk->sk_family); 1883 else 1884 sock_lock_init_class_and_name( 1885 sk, 1886 af_family_slock_key_strings[sk->sk_family], 1887 af_family_slock_keys + sk->sk_family, 1888 af_family_key_strings[sk->sk_family], 1889 af_family_keys + sk->sk_family); 1890 } 1891 1892 /* 1893 * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet, 1894 * even temporarly, because of RCU lookups. sk_node should also be left as is. 1895 * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end 1896 */ 1897 static void sock_copy(struct sock *nsk, const struct sock *osk) 1898 { 1899 const struct proto *prot = READ_ONCE(osk->sk_prot); 1900 #ifdef CONFIG_SECURITY_NETWORK 1901 void *sptr = nsk->sk_security; 1902 #endif 1903 1904 /* If we move sk_tx_queue_mapping out of the private section, 1905 * we must check if sk_tx_queue_clear() is called after 1906 * sock_copy() in sk_clone_lock(). 1907 */ 1908 BUILD_BUG_ON(offsetof(struct sock, sk_tx_queue_mapping) < 1909 offsetof(struct sock, sk_dontcopy_begin) || 1910 offsetof(struct sock, sk_tx_queue_mapping) >= 1911 offsetof(struct sock, sk_dontcopy_end)); 1912 1913 memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin)); 1914 1915 memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end, 1916 prot->obj_size - offsetof(struct sock, sk_dontcopy_end)); 1917 1918 #ifdef CONFIG_SECURITY_NETWORK 1919 nsk->sk_security = sptr; 1920 security_sk_clone(osk, nsk); 1921 #endif 1922 } 1923 1924 static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, 1925 int family) 1926 { 1927 struct sock *sk; 1928 struct kmem_cache *slab; 1929 1930 slab = prot->slab; 1931 if (slab != NULL) { 1932 sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO); 1933 if (!sk) 1934 return sk; 1935 if (want_init_on_alloc(priority)) 1936 sk_prot_clear_nulls(sk, prot->obj_size); 1937 } else 1938 sk = kmalloc(prot->obj_size, priority); 1939 1940 if (sk != NULL) { 1941 if (security_sk_alloc(sk, family, priority)) 1942 goto out_free; 1943 1944 if (!try_module_get(prot->owner)) 1945 goto out_free_sec; 1946 } 1947 1948 return sk; 1949 1950 out_free_sec: 1951 security_sk_free(sk); 1952 out_free: 1953 if (slab != NULL) 1954 kmem_cache_free(slab, sk); 1955 else 1956 kfree(sk); 1957 return NULL; 1958 } 1959 1960 static void sk_prot_free(struct proto *prot, struct sock *sk) 1961 { 1962 struct kmem_cache *slab; 1963 struct module *owner; 1964 1965 owner = prot->owner; 1966 slab = prot->slab; 1967 1968 cgroup_sk_free(&sk->sk_cgrp_data); 1969 mem_cgroup_sk_free(sk); 1970 security_sk_free(sk); 1971 if (slab != NULL) 1972 kmem_cache_free(slab, sk); 1973 else 1974 kfree(sk); 1975 module_put(owner); 1976 } 1977 1978 /** 1979 * sk_alloc - All socket objects are allocated here 1980 * @net: the applicable net namespace 1981 * @family: protocol family 1982 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) 1983 * @prot: struct proto associated with this new sock instance 1984 * @kern: is this to be a kernel socket? 1985 */ 1986 struct sock *sk_alloc(struct net *net, int family, gfp_t priority, 1987 struct proto *prot, int kern) 1988 { 1989 struct sock *sk; 1990 1991 sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family); 1992 if (sk) { 1993 sk->sk_family = family; 1994 /* 1995 * See comment in struct sock definition to understand 1996 * why we need sk_prot_creator -acme 1997 */ 1998 sk->sk_prot = sk->sk_prot_creator = prot; 1999 sk->sk_kern_sock = kern; 2000 sock_lock_init(sk); 2001 sk->sk_net_refcnt = kern ? 0 : 1; 2002 if (likely(sk->sk_net_refcnt)) { 2003 get_net_track(net, &sk->ns_tracker, priority); 2004 sock_inuse_add(net, 1); 2005 } 2006 2007 sock_net_set(sk, net); 2008 refcount_set(&sk->sk_wmem_alloc, 1); 2009 2010 mem_cgroup_sk_alloc(sk); 2011 cgroup_sk_alloc(&sk->sk_cgrp_data); 2012 sock_update_classid(&sk->sk_cgrp_data); 2013 sock_update_netprioidx(&sk->sk_cgrp_data); 2014 sk_tx_queue_clear(sk); 2015 } 2016 2017 return sk; 2018 } 2019 EXPORT_SYMBOL(sk_alloc); 2020 2021 /* Sockets having SOCK_RCU_FREE will call this function after one RCU 2022 * grace period. This is the case for UDP sockets and TCP listeners. 2023 */ 2024 static void __sk_destruct(struct rcu_head *head) 2025 { 2026 struct sock *sk = container_of(head, struct sock, sk_rcu); 2027 struct sk_filter *filter; 2028 2029 if (sk->sk_destruct) 2030 sk->sk_destruct(sk); 2031 2032 filter = rcu_dereference_check(sk->sk_filter, 2033 refcount_read(&sk->sk_wmem_alloc) == 0); 2034 if (filter) { 2035 sk_filter_uncharge(sk, filter); 2036 RCU_INIT_POINTER(sk->sk_filter, NULL); 2037 } 2038 2039 sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP); 2040 2041 #ifdef CONFIG_BPF_SYSCALL 2042 bpf_sk_storage_free(sk); 2043 #endif 2044 2045 if (atomic_read(&sk->sk_omem_alloc)) 2046 pr_debug("%s: optmem leakage (%d bytes) detected\n", 2047 __func__, atomic_read(&sk->sk_omem_alloc)); 2048 2049 if (sk->sk_frag.page) { 2050 put_page(sk->sk_frag.page); 2051 sk->sk_frag.page = NULL; 2052 } 2053 2054 /* We do not need to acquire sk->sk_peer_lock, we are the last user. */ 2055 put_cred(sk->sk_peer_cred); 2056 put_pid(sk->sk_peer_pid); 2057 2058 if (likely(sk->sk_net_refcnt)) 2059 put_net_track(sock_net(sk), &sk->ns_tracker); 2060 sk_prot_free(sk->sk_prot_creator, sk); 2061 } 2062 2063 void sk_destruct(struct sock *sk) 2064 { 2065 bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE); 2066 2067 WARN_ON_ONCE(!llist_empty(&sk->defer_list)); 2068 sk_defer_free_flush(sk); 2069 2070 if (rcu_access_pointer(sk->sk_reuseport_cb)) { 2071 reuseport_detach_sock(sk); 2072 use_call_rcu = true; 2073 } 2074 2075 if (use_call_rcu) 2076 call_rcu(&sk->sk_rcu, __sk_destruct); 2077 else 2078 __sk_destruct(&sk->sk_rcu); 2079 } 2080 2081 static void __sk_free(struct sock *sk) 2082 { 2083 if (likely(sk->sk_net_refcnt)) 2084 sock_inuse_add(sock_net(sk), -1); 2085 2086 if (unlikely(sk->sk_net_refcnt && sock_diag_has_destroy_listeners(sk))) 2087 sock_diag_broadcast_destroy(sk); 2088 else 2089 sk_destruct(sk); 2090 } 2091 2092 void sk_free(struct sock *sk) 2093 { 2094 /* 2095 * We subtract one from sk_wmem_alloc and can know if 2096 * some packets are still in some tx queue. 2097 * If not null, sock_wfree() will call __sk_free(sk) later 2098 */ 2099 if (refcount_dec_and_test(&sk->sk_wmem_alloc)) 2100 __sk_free(sk); 2101 } 2102 EXPORT_SYMBOL(sk_free); 2103 2104 static void sk_init_common(struct sock *sk) 2105 { 2106 skb_queue_head_init(&sk->sk_receive_queue); 2107 skb_queue_head_init(&sk->sk_write_queue); 2108 skb_queue_head_init(&sk->sk_error_queue); 2109 2110 rwlock_init(&sk->sk_callback_lock); 2111 lockdep_set_class_and_name(&sk->sk_receive_queue.lock, 2112 af_rlock_keys + sk->sk_family, 2113 af_family_rlock_key_strings[sk->sk_family]); 2114 lockdep_set_class_and_name(&sk->sk_write_queue.lock, 2115 af_wlock_keys + sk->sk_family, 2116 af_family_wlock_key_strings[sk->sk_family]); 2117 lockdep_set_class_and_name(&sk->sk_error_queue.lock, 2118 af_elock_keys + sk->sk_family, 2119 af_family_elock_key_strings[sk->sk_family]); 2120 lockdep_set_class_and_name(&sk->sk_callback_lock, 2121 af_callback_keys + sk->sk_family, 2122 af_family_clock_key_strings[sk->sk_family]); 2123 } 2124 2125 /** 2126 * sk_clone_lock - clone a socket, and lock its clone 2127 * @sk: the socket to clone 2128 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) 2129 * 2130 * Caller must unlock socket even in error path (bh_unlock_sock(newsk)) 2131 */ 2132 struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) 2133 { 2134 struct proto *prot = READ_ONCE(sk->sk_prot); 2135 struct sk_filter *filter; 2136 bool is_charged = true; 2137 struct sock *newsk; 2138 2139 newsk = sk_prot_alloc(prot, priority, sk->sk_family); 2140 if (!newsk) 2141 goto out; 2142 2143 sock_copy(newsk, sk); 2144 2145 newsk->sk_prot_creator = prot; 2146 2147 /* SANITY */ 2148 if (likely(newsk->sk_net_refcnt)) { 2149 get_net_track(sock_net(newsk), &newsk->ns_tracker, priority); 2150 sock_inuse_add(sock_net(newsk), 1); 2151 } 2152 sk_node_init(&newsk->sk_node); 2153 sock_lock_init(newsk); 2154 bh_lock_sock(newsk); 2155 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; 2156 newsk->sk_backlog.len = 0; 2157 2158 atomic_set(&newsk->sk_rmem_alloc, 0); 2159 2160 /* sk_wmem_alloc set to one (see sk_free() and sock_wfree()) */ 2161 refcount_set(&newsk->sk_wmem_alloc, 1); 2162 2163 atomic_set(&newsk->sk_omem_alloc, 0); 2164 sk_init_common(newsk); 2165 2166 newsk->sk_dst_cache = NULL; 2167 newsk->sk_dst_pending_confirm = 0; 2168 newsk->sk_wmem_queued = 0; 2169 newsk->sk_forward_alloc = 0; 2170 newsk->sk_reserved_mem = 0; 2171 atomic_set(&newsk->sk_drops, 0); 2172 newsk->sk_send_head = NULL; 2173 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; 2174 atomic_set(&newsk->sk_zckey, 0); 2175 2176 sock_reset_flag(newsk, SOCK_DONE); 2177 2178 /* sk->sk_memcg will be populated at accept() time */ 2179 newsk->sk_memcg = NULL; 2180 2181 cgroup_sk_clone(&newsk->sk_cgrp_data); 2182 2183 rcu_read_lock(); 2184 filter = rcu_dereference(sk->sk_filter); 2185 if (filter != NULL) 2186 /* though it's an empty new sock, the charging may fail 2187 * if sysctl_optmem_max was changed between creation of 2188 * original socket and cloning 2189 */ 2190 is_charged = sk_filter_charge(newsk, filter); 2191 RCU_INIT_POINTER(newsk->sk_filter, filter); 2192 rcu_read_unlock(); 2193 2194 if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) { 2195 /* We need to make sure that we don't uncharge the new 2196 * socket if we couldn't charge it in the first place 2197 * as otherwise we uncharge the parent's filter. 2198 */ 2199 if (!is_charged) 2200 RCU_INIT_POINTER(newsk->sk_filter, NULL); 2201 sk_free_unlock_clone(newsk); 2202 newsk = NULL; 2203 goto out; 2204 } 2205 RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); 2206 2207 if (bpf_sk_storage_clone(sk, newsk)) { 2208 sk_free_unlock_clone(newsk); 2209 newsk = NULL; 2210 goto out; 2211 } 2212 2213 /* Clear sk_user_data if parent had the pointer tagged 2214 * as not suitable for copying when cloning. 2215 */ 2216 if (sk_user_data_is_nocopy(newsk)) 2217 newsk->sk_user_data = NULL; 2218 2219 newsk->sk_err = 0; 2220 newsk->sk_err_soft = 0; 2221 newsk->sk_priority = 0; 2222 newsk->sk_incoming_cpu = raw_smp_processor_id(); 2223 2224 /* Before updating sk_refcnt, we must commit prior changes to memory 2225 * (Documentation/RCU/rculist_nulls.rst for details) 2226 */ 2227 smp_wmb(); 2228 refcount_set(&newsk->sk_refcnt, 2); 2229 2230 /* Increment the counter in the same struct proto as the master 2231 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that 2232 * is the same as sk->sk_prot->socks, as this field was copied 2233 * with memcpy). 2234 * 2235 * This _changes_ the previous behaviour, where 2236 * tcp_create_openreq_child always was incrementing the 2237 * equivalent to tcp_prot->socks (inet_sock_nr), so this have 2238 * to be taken into account in all callers. -acme 2239 */ 2240 sk_refcnt_debug_inc(newsk); 2241 sk_set_socket(newsk, NULL); 2242 sk_tx_queue_clear(newsk); 2243 RCU_INIT_POINTER(newsk->sk_wq, NULL); 2244 2245 if (newsk->sk_prot->sockets_allocated) 2246 sk_sockets_allocated_inc(newsk); 2247 2248 if (sock_needs_netstamp(sk) && newsk->sk_flags & SK_FLAGS_TIMESTAMP) 2249 net_enable_timestamp(); 2250 out: 2251 return newsk; 2252 } 2253 EXPORT_SYMBOL_GPL(sk_clone_lock); 2254 2255 void sk_free_unlock_clone(struct sock *sk) 2256 { 2257 /* It is still raw copy of parent, so invalidate 2258 * destructor and make plain sk_free() */ 2259 sk->sk_destruct = NULL; 2260 bh_unlock_sock(sk); 2261 sk_free(sk); 2262 } 2263 EXPORT_SYMBOL_GPL(sk_free_unlock_clone); 2264 2265 void sk_setup_caps(struct sock *sk, struct dst_entry *dst) 2266 { 2267 u32 max_segs = 1; 2268 2269 sk_dst_set(sk, dst); 2270 sk->sk_route_caps = dst->dev->features; 2271 if (sk_is_tcp(sk)) 2272 sk->sk_route_caps |= NETIF_F_GSO; 2273 if (sk->sk_route_caps & NETIF_F_GSO) 2274 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; 2275 if (unlikely(sk->sk_gso_disabled)) 2276 sk->sk_route_caps &= ~NETIF_F_GSO_MASK; 2277 if (sk_can_gso(sk)) { 2278 if (dst->header_len && !xfrm_dst_offload_ok(dst)) { 2279 sk->sk_route_caps &= ~NETIF_F_GSO_MASK; 2280 } else { 2281 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; 2282 /* pairs with the WRITE_ONCE() in netif_set_gso_max_size() */ 2283 sk->sk_gso_max_size = READ_ONCE(dst->dev->gso_max_size); 2284 sk->sk_gso_max_size -= (MAX_TCP_HEADER + 1); 2285 /* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */ 2286 max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1); 2287 } 2288 } 2289 sk->sk_gso_max_segs = max_segs; 2290 } 2291 EXPORT_SYMBOL_GPL(sk_setup_caps); 2292 2293 /* 2294 * Simple resource managers for sockets. 2295 */ 2296 2297 2298 /* 2299 * Write buffer destructor automatically called from kfree_skb. 2300 */ 2301 void sock_wfree(struct sk_buff *skb) 2302 { 2303 struct sock *sk = skb->sk; 2304 unsigned int len = skb->truesize; 2305 2306 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) { 2307 /* 2308 * Keep a reference on sk_wmem_alloc, this will be released 2309 * after sk_write_space() call 2310 */ 2311 WARN_ON(refcount_sub_and_test(len - 1, &sk->sk_wmem_alloc)); 2312 sk->sk_write_space(sk); 2313 len = 1; 2314 } 2315 /* 2316 * if sk_wmem_alloc reaches 0, we must finish what sk_free() 2317 * could not do because of in-flight packets 2318 */ 2319 if (refcount_sub_and_test(len, &sk->sk_wmem_alloc)) 2320 __sk_free(sk); 2321 } 2322 EXPORT_SYMBOL(sock_wfree); 2323 2324 /* This variant of sock_wfree() is used by TCP, 2325 * since it sets SOCK_USE_WRITE_QUEUE. 2326 */ 2327 void __sock_wfree(struct sk_buff *skb) 2328 { 2329 struct sock *sk = skb->sk; 2330 2331 if (refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc)) 2332 __sk_free(sk); 2333 } 2334 2335 void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) 2336 { 2337 skb_orphan(skb); 2338 skb->sk = sk; 2339 #ifdef CONFIG_INET 2340 if (unlikely(!sk_fullsock(sk))) { 2341 skb->destructor = sock_edemux; 2342 sock_hold(sk); 2343 return; 2344 } 2345 #endif 2346 skb->destructor = sock_wfree; 2347 skb_set_hash_from_sk(skb, sk); 2348 /* 2349 * We used to take a refcount on sk, but following operation 2350 * is enough to guarantee sk_free() wont free this sock until 2351 * all in-flight packets are completed 2352 */ 2353 refcount_add(skb->truesize, &sk->sk_wmem_alloc); 2354 } 2355 EXPORT_SYMBOL(skb_set_owner_w); 2356 2357 static bool can_skb_orphan_partial(const struct sk_buff *skb) 2358 { 2359 #ifdef CONFIG_TLS_DEVICE 2360 /* Drivers depend on in-order delivery for crypto offload, 2361 * partial orphan breaks out-of-order-OK logic. 2362 */ 2363 if (skb->decrypted) 2364 return false; 2365 #endif 2366 return (skb->destructor == sock_wfree || 2367 (IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree)); 2368 } 2369 2370 /* This helper is used by netem, as it can hold packets in its 2371 * delay queue. We want to allow the owner socket to send more 2372 * packets, as if they were already TX completed by a typical driver. 2373 * But we also want to keep skb->sk set because some packet schedulers 2374 * rely on it (sch_fq for example). 2375 */ 2376 void skb_orphan_partial(struct sk_buff *skb) 2377 { 2378 if (skb_is_tcp_pure_ack(skb)) 2379 return; 2380 2381 if (can_skb_orphan_partial(skb) && skb_set_owner_sk_safe(skb, skb->sk)) 2382 return; 2383 2384 skb_orphan(skb); 2385 } 2386 EXPORT_SYMBOL(skb_orphan_partial); 2387 2388 /* 2389 * Read buffer destructor automatically called from kfree_skb. 2390 */ 2391 void sock_rfree(struct sk_buff *skb) 2392 { 2393 struct sock *sk = skb->sk; 2394 unsigned int len = skb->truesize; 2395 2396 atomic_sub(len, &sk->sk_rmem_alloc); 2397 sk_mem_uncharge(sk, len); 2398 } 2399 EXPORT_SYMBOL(sock_rfree); 2400 2401 /* 2402 * Buffer destructor for skbs that are not used directly in read or write 2403 * path, e.g. for error handler skbs. Automatically called from kfree_skb. 2404 */ 2405 void sock_efree(struct sk_buff *skb) 2406 { 2407 sock_put(skb->sk); 2408 } 2409 EXPORT_SYMBOL(sock_efree); 2410 2411 /* Buffer destructor for prefetch/receive path where reference count may 2412 * not be held, e.g. for listen sockets. 2413 */ 2414 #ifdef CONFIG_INET 2415 void sock_pfree(struct sk_buff *skb) 2416 { 2417 if (sk_is_refcounted(skb->sk)) 2418 sock_gen_put(skb->sk); 2419 } 2420 EXPORT_SYMBOL(sock_pfree); 2421 #endif /* CONFIG_INET */ 2422 2423 kuid_t sock_i_uid(struct sock *sk) 2424 { 2425 kuid_t uid; 2426 2427 read_lock_bh(&sk->sk_callback_lock); 2428 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID; 2429 read_unlock_bh(&sk->sk_callback_lock); 2430 return uid; 2431 } 2432 EXPORT_SYMBOL(sock_i_uid); 2433 2434 unsigned long sock_i_ino(struct sock *sk) 2435 { 2436 unsigned long ino; 2437 2438 read_lock_bh(&sk->sk_callback_lock); 2439 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0; 2440 read_unlock_bh(&sk->sk_callback_lock); 2441 return ino; 2442 } 2443 EXPORT_SYMBOL(sock_i_ino); 2444 2445 /* 2446 * Allocate a skb from the socket's send buffer. 2447 */ 2448 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, 2449 gfp_t priority) 2450 { 2451 if (force || 2452 refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) { 2453 struct sk_buff *skb = alloc_skb(size, priority); 2454 2455 if (skb) { 2456 skb_set_owner_w(skb, sk); 2457 return skb; 2458 } 2459 } 2460 return NULL; 2461 } 2462 EXPORT_SYMBOL(sock_wmalloc); 2463 2464 static void sock_ofree(struct sk_buff *skb) 2465 { 2466 struct sock *sk = skb->sk; 2467 2468 atomic_sub(skb->truesize, &sk->sk_omem_alloc); 2469 } 2470 2471 struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size, 2472 gfp_t priority) 2473 { 2474 struct sk_buff *skb; 2475 2476 /* small safe race: SKB_TRUESIZE may differ from final skb->truesize */ 2477 if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) > 2478 sysctl_optmem_max) 2479 return NULL; 2480 2481 skb = alloc_skb(size, priority); 2482 if (!skb) 2483 return NULL; 2484 2485 atomic_add(skb->truesize, &sk->sk_omem_alloc); 2486 skb->sk = sk; 2487 skb->destructor = sock_ofree; 2488 return skb; 2489 } 2490 2491 /* 2492 * Allocate a memory block from the socket's option memory buffer. 2493 */ 2494 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority) 2495 { 2496 if ((unsigned int)size <= sysctl_optmem_max && 2497 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { 2498 void *mem; 2499 /* First do the add, to avoid the race if kmalloc 2500 * might sleep. 2501 */ 2502 atomic_add(size, &sk->sk_omem_alloc); 2503 mem = kmalloc(size, priority); 2504 if (mem) 2505 return mem; 2506 atomic_sub(size, &sk->sk_omem_alloc); 2507 } 2508 return NULL; 2509 } 2510 EXPORT_SYMBOL(sock_kmalloc); 2511 2512 /* Free an option memory block. Note, we actually want the inline 2513 * here as this allows gcc to detect the nullify and fold away the 2514 * condition entirely. 2515 */ 2516 static inline void __sock_kfree_s(struct sock *sk, void *mem, int size, 2517 const bool nullify) 2518 { 2519 if (WARN_ON_ONCE(!mem)) 2520 return; 2521 if (nullify) 2522 kfree_sensitive(mem); 2523 else 2524 kfree(mem); 2525 atomic_sub(size, &sk->sk_omem_alloc); 2526 } 2527 2528 void sock_kfree_s(struct sock *sk, void *mem, int size) 2529 { 2530 __sock_kfree_s(sk, mem, size, false); 2531 } 2532 EXPORT_SYMBOL(sock_kfree_s); 2533 2534 void sock_kzfree_s(struct sock *sk, void *mem, int size) 2535 { 2536 __sock_kfree_s(sk, mem, size, true); 2537 } 2538 EXPORT_SYMBOL(sock_kzfree_s); 2539 2540 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock. 2541 I think, these locks should be removed for datagram sockets. 2542 */ 2543 static long sock_wait_for_wmem(struct sock *sk, long timeo) 2544 { 2545 DEFINE_WAIT(wait); 2546 2547 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); 2548 for (;;) { 2549 if (!timeo) 2550 break; 2551 if (signal_pending(current)) 2552 break; 2553 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 2554 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 2555 if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) 2556 break; 2557 if (sk->sk_shutdown & SEND_SHUTDOWN) 2558 break; 2559 if (sk->sk_err) 2560 break; 2561 timeo = schedule_timeout(timeo); 2562 } 2563 finish_wait(sk_sleep(sk), &wait); 2564 return timeo; 2565 } 2566 2567 2568 /* 2569 * Generic send/receive buffer handlers 2570 */ 2571 2572 struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, 2573 unsigned long data_len, int noblock, 2574 int *errcode, int max_page_order) 2575 { 2576 struct sk_buff *skb; 2577 long timeo; 2578 int err; 2579 2580 timeo = sock_sndtimeo(sk, noblock); 2581 for (;;) { 2582 err = sock_error(sk); 2583 if (err != 0) 2584 goto failure; 2585 2586 err = -EPIPE; 2587 if (sk->sk_shutdown & SEND_SHUTDOWN) 2588 goto failure; 2589 2590 if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf)) 2591 break; 2592 2593 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 2594 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 2595 err = -EAGAIN; 2596 if (!timeo) 2597 goto failure; 2598 if (signal_pending(current)) 2599 goto interrupted; 2600 timeo = sock_wait_for_wmem(sk, timeo); 2601 } 2602 skb = alloc_skb_with_frags(header_len, data_len, max_page_order, 2603 errcode, sk->sk_allocation); 2604 if (skb) 2605 skb_set_owner_w(skb, sk); 2606 return skb; 2607 2608 interrupted: 2609 err = sock_intr_errno(timeo); 2610 failure: 2611 *errcode = err; 2612 return NULL; 2613 } 2614 EXPORT_SYMBOL(sock_alloc_send_pskb); 2615 2616 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, 2617 int noblock, int *errcode) 2618 { 2619 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0); 2620 } 2621 EXPORT_SYMBOL(sock_alloc_send_skb); 2622 2623 int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg, 2624 struct sockcm_cookie *sockc) 2625 { 2626 u32 tsflags; 2627 2628 switch (cmsg->cmsg_type) { 2629 case SO_MARK: 2630 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && 2631 !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 2632 return -EPERM; 2633 if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) 2634 return -EINVAL; 2635 sockc->mark = *(u32 *)CMSG_DATA(cmsg); 2636 break; 2637 case SO_TIMESTAMPING_OLD: 2638 if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) 2639 return -EINVAL; 2640 2641 tsflags = *(u32 *)CMSG_DATA(cmsg); 2642 if (tsflags & ~SOF_TIMESTAMPING_TX_RECORD_MASK) 2643 return -EINVAL; 2644 2645 sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK; 2646 sockc->tsflags |= tsflags; 2647 break; 2648 case SCM_TXTIME: 2649 if (!sock_flag(sk, SOCK_TXTIME)) 2650 return -EINVAL; 2651 if (cmsg->cmsg_len != CMSG_LEN(sizeof(u64))) 2652 return -EINVAL; 2653 sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg)); 2654 break; 2655 /* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */ 2656 case SCM_RIGHTS: 2657 case SCM_CREDENTIALS: 2658 break; 2659 default: 2660 return -EINVAL; 2661 } 2662 return 0; 2663 } 2664 EXPORT_SYMBOL(__sock_cmsg_send); 2665 2666 int sock_cmsg_send(struct sock *sk, struct msghdr *msg, 2667 struct sockcm_cookie *sockc) 2668 { 2669 struct cmsghdr *cmsg; 2670 int ret; 2671 2672 for_each_cmsghdr(cmsg, msg) { 2673 if (!CMSG_OK(msg, cmsg)) 2674 return -EINVAL; 2675 if (cmsg->cmsg_level != SOL_SOCKET) 2676 continue; 2677 ret = __sock_cmsg_send(sk, msg, cmsg, sockc); 2678 if (ret) 2679 return ret; 2680 } 2681 return 0; 2682 } 2683 EXPORT_SYMBOL(sock_cmsg_send); 2684 2685 static void sk_enter_memory_pressure(struct sock *sk) 2686 { 2687 if (!sk->sk_prot->enter_memory_pressure) 2688 return; 2689 2690 sk->sk_prot->enter_memory_pressure(sk); 2691 } 2692 2693 static void sk_leave_memory_pressure(struct sock *sk) 2694 { 2695 if (sk->sk_prot->leave_memory_pressure) { 2696 sk->sk_prot->leave_memory_pressure(sk); 2697 } else { 2698 unsigned long *memory_pressure = sk->sk_prot->memory_pressure; 2699 2700 if (memory_pressure && READ_ONCE(*memory_pressure)) 2701 WRITE_ONCE(*memory_pressure, 0); 2702 } 2703 } 2704 2705 DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key); 2706 2707 /** 2708 * skb_page_frag_refill - check that a page_frag contains enough room 2709 * @sz: minimum size of the fragment we want to get 2710 * @pfrag: pointer to page_frag 2711 * @gfp: priority for memory allocation 2712 * 2713 * Note: While this allocator tries to use high order pages, there is 2714 * no guarantee that allocations succeed. Therefore, @sz MUST be 2715 * less or equal than PAGE_SIZE. 2716 */ 2717 bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp) 2718 { 2719 if (pfrag->page) { 2720 if (page_ref_count(pfrag->page) == 1) { 2721 pfrag->offset = 0; 2722 return true; 2723 } 2724 if (pfrag->offset + sz <= pfrag->size) 2725 return true; 2726 put_page(pfrag->page); 2727 } 2728 2729 pfrag->offset = 0; 2730 if (SKB_FRAG_PAGE_ORDER && 2731 !static_branch_unlikely(&net_high_order_alloc_disable_key)) { 2732 /* Avoid direct reclaim but allow kswapd to wake */ 2733 pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) | 2734 __GFP_COMP | __GFP_NOWARN | 2735 __GFP_NORETRY, 2736 SKB_FRAG_PAGE_ORDER); 2737 if (likely(pfrag->page)) { 2738 pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER; 2739 return true; 2740 } 2741 } 2742 pfrag->page = alloc_page(gfp); 2743 if (likely(pfrag->page)) { 2744 pfrag->size = PAGE_SIZE; 2745 return true; 2746 } 2747 return false; 2748 } 2749 EXPORT_SYMBOL(skb_page_frag_refill); 2750 2751 bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag) 2752 { 2753 if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation))) 2754 return true; 2755 2756 sk_enter_memory_pressure(sk); 2757 sk_stream_moderate_sndbuf(sk); 2758 return false; 2759 } 2760 EXPORT_SYMBOL(sk_page_frag_refill); 2761 2762 void __lock_sock(struct sock *sk) 2763 __releases(&sk->sk_lock.slock) 2764 __acquires(&sk->sk_lock.slock) 2765 { 2766 DEFINE_WAIT(wait); 2767 2768 for (;;) { 2769 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait, 2770 TASK_UNINTERRUPTIBLE); 2771 spin_unlock_bh(&sk->sk_lock.slock); 2772 schedule(); 2773 spin_lock_bh(&sk->sk_lock.slock); 2774 if (!sock_owned_by_user(sk)) 2775 break; 2776 } 2777 finish_wait(&sk->sk_lock.wq, &wait); 2778 } 2779 2780 void __release_sock(struct sock *sk) 2781 __releases(&sk->sk_lock.slock) 2782 __acquires(&sk->sk_lock.slock) 2783 { 2784 struct sk_buff *skb, *next; 2785 2786 while ((skb = sk->sk_backlog.head) != NULL) { 2787 sk->sk_backlog.head = sk->sk_backlog.tail = NULL; 2788 2789 spin_unlock_bh(&sk->sk_lock.slock); 2790 2791 do { 2792 next = skb->next; 2793 prefetch(next); 2794 WARN_ON_ONCE(skb_dst_is_noref(skb)); 2795 skb_mark_not_on_list(skb); 2796 sk_backlog_rcv(sk, skb); 2797 2798 cond_resched(); 2799 2800 skb = next; 2801 } while (skb != NULL); 2802 2803 spin_lock_bh(&sk->sk_lock.slock); 2804 } 2805 2806 /* 2807 * Doing the zeroing here guarantee we can not loop forever 2808 * while a wild producer attempts to flood us. 2809 */ 2810 sk->sk_backlog.len = 0; 2811 } 2812 2813 void __sk_flush_backlog(struct sock *sk) 2814 { 2815 spin_lock_bh(&sk->sk_lock.slock); 2816 __release_sock(sk); 2817 spin_unlock_bh(&sk->sk_lock.slock); 2818 } 2819 2820 /** 2821 * sk_wait_data - wait for data to arrive at sk_receive_queue 2822 * @sk: sock to wait on 2823 * @timeo: for how long 2824 * @skb: last skb seen on sk_receive_queue 2825 * 2826 * Now socket state including sk->sk_err is changed only under lock, 2827 * hence we may omit checks after joining wait queue. 2828 * We check receive queue before schedule() only as optimization; 2829 * it is very likely that release_sock() added new data. 2830 */ 2831 int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb) 2832 { 2833 DEFINE_WAIT_FUNC(wait, woken_wake_function); 2834 int rc; 2835 2836 add_wait_queue(sk_sleep(sk), &wait); 2837 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2838 rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb, &wait); 2839 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2840 remove_wait_queue(sk_sleep(sk), &wait); 2841 return rc; 2842 } 2843 EXPORT_SYMBOL(sk_wait_data); 2844 2845 /** 2846 * __sk_mem_raise_allocated - increase memory_allocated 2847 * @sk: socket 2848 * @size: memory size to allocate 2849 * @amt: pages to allocate 2850 * @kind: allocation type 2851 * 2852 * Similar to __sk_mem_schedule(), but does not update sk_forward_alloc 2853 */ 2854 int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) 2855 { 2856 struct proto *prot = sk->sk_prot; 2857 long allocated = sk_memory_allocated_add(sk, amt); 2858 bool memcg_charge = mem_cgroup_sockets_enabled && sk->sk_memcg; 2859 bool charged = true; 2860 2861 if (memcg_charge && 2862 !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt, 2863 gfp_memcg_charge()))) 2864 goto suppress_allocation; 2865 2866 /* Under limit. */ 2867 if (allocated <= sk_prot_mem_limits(sk, 0)) { 2868 sk_leave_memory_pressure(sk); 2869 return 1; 2870 } 2871 2872 /* Under pressure. */ 2873 if (allocated > sk_prot_mem_limits(sk, 1)) 2874 sk_enter_memory_pressure(sk); 2875 2876 /* Over hard limit. */ 2877 if (allocated > sk_prot_mem_limits(sk, 2)) 2878 goto suppress_allocation; 2879 2880 /* guarantee minimum buffer size under pressure */ 2881 if (kind == SK_MEM_RECV) { 2882 if (atomic_read(&sk->sk_rmem_alloc) < sk_get_rmem0(sk, prot)) 2883 return 1; 2884 2885 } else { /* SK_MEM_SEND */ 2886 int wmem0 = sk_get_wmem0(sk, prot); 2887 2888 if (sk->sk_type == SOCK_STREAM) { 2889 if (sk->sk_wmem_queued < wmem0) 2890 return 1; 2891 } else if (refcount_read(&sk->sk_wmem_alloc) < wmem0) { 2892 return 1; 2893 } 2894 } 2895 2896 if (sk_has_memory_pressure(sk)) { 2897 u64 alloc; 2898 2899 if (!sk_under_memory_pressure(sk)) 2900 return 1; 2901 alloc = sk_sockets_allocated_read_positive(sk); 2902 if (sk_prot_mem_limits(sk, 2) > alloc * 2903 sk_mem_pages(sk->sk_wmem_queued + 2904 atomic_read(&sk->sk_rmem_alloc) + 2905 sk->sk_forward_alloc)) 2906 return 1; 2907 } 2908 2909 suppress_allocation: 2910 2911 if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) { 2912 sk_stream_moderate_sndbuf(sk); 2913 2914 /* Fail only if socket is _under_ its sndbuf. 2915 * In this case we cannot block, so that we have to fail. 2916 */ 2917 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) { 2918 /* Force charge with __GFP_NOFAIL */ 2919 if (memcg_charge && !charged) { 2920 mem_cgroup_charge_skmem(sk->sk_memcg, amt, 2921 gfp_memcg_charge() | __GFP_NOFAIL); 2922 } 2923 return 1; 2924 } 2925 } 2926 2927 if (kind == SK_MEM_SEND || (kind == SK_MEM_RECV && charged)) 2928 trace_sock_exceed_buf_limit(sk, prot, allocated, kind); 2929 2930 sk_memory_allocated_sub(sk, amt); 2931 2932 if (memcg_charge && charged) 2933 mem_cgroup_uncharge_skmem(sk->sk_memcg, amt); 2934 2935 return 0; 2936 } 2937 EXPORT_SYMBOL(__sk_mem_raise_allocated); 2938 2939 /** 2940 * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated 2941 * @sk: socket 2942 * @size: memory size to allocate 2943 * @kind: allocation type 2944 * 2945 * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means 2946 * rmem allocation. This function assumes that protocols which have 2947 * memory_pressure use sk_wmem_queued as write buffer accounting. 2948 */ 2949 int __sk_mem_schedule(struct sock *sk, int size, int kind) 2950 { 2951 int ret, amt = sk_mem_pages(size); 2952 2953 sk->sk_forward_alloc += amt << SK_MEM_QUANTUM_SHIFT; 2954 ret = __sk_mem_raise_allocated(sk, size, amt, kind); 2955 if (!ret) 2956 sk->sk_forward_alloc -= amt << SK_MEM_QUANTUM_SHIFT; 2957 return ret; 2958 } 2959 EXPORT_SYMBOL(__sk_mem_schedule); 2960 2961 /** 2962 * __sk_mem_reduce_allocated - reclaim memory_allocated 2963 * @sk: socket 2964 * @amount: number of quanta 2965 * 2966 * Similar to __sk_mem_reclaim(), but does not update sk_forward_alloc 2967 */ 2968 void __sk_mem_reduce_allocated(struct sock *sk, int amount) 2969 { 2970 sk_memory_allocated_sub(sk, amount); 2971 2972 if (mem_cgroup_sockets_enabled && sk->sk_memcg) 2973 mem_cgroup_uncharge_skmem(sk->sk_memcg, amount); 2974 2975 if (sk_under_memory_pressure(sk) && 2976 (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) 2977 sk_leave_memory_pressure(sk); 2978 } 2979 EXPORT_SYMBOL(__sk_mem_reduce_allocated); 2980 2981 /** 2982 * __sk_mem_reclaim - reclaim sk_forward_alloc and memory_allocated 2983 * @sk: socket 2984 * @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple) 2985 */ 2986 void __sk_mem_reclaim(struct sock *sk, int amount) 2987 { 2988 amount >>= SK_MEM_QUANTUM_SHIFT; 2989 sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT; 2990 __sk_mem_reduce_allocated(sk, amount); 2991 } 2992 EXPORT_SYMBOL(__sk_mem_reclaim); 2993 2994 int sk_set_peek_off(struct sock *sk, int val) 2995 { 2996 sk->sk_peek_off = val; 2997 return 0; 2998 } 2999 EXPORT_SYMBOL_GPL(sk_set_peek_off); 3000 3001 /* 3002 * Set of default routines for initialising struct proto_ops when 3003 * the protocol does not support a particular function. In certain 3004 * cases where it makes no sense for a protocol to have a "do nothing" 3005 * function, some default processing is provided. 3006 */ 3007 3008 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len) 3009 { 3010 return -EOPNOTSUPP; 3011 } 3012 EXPORT_SYMBOL(sock_no_bind); 3013 3014 int sock_no_connect(struct socket *sock, struct sockaddr *saddr, 3015 int len, int flags) 3016 { 3017 return -EOPNOTSUPP; 3018 } 3019 EXPORT_SYMBOL(sock_no_connect); 3020 3021 int sock_no_socketpair(struct socket *sock1, struct socket *sock2) 3022 { 3023 return -EOPNOTSUPP; 3024 } 3025 EXPORT_SYMBOL(sock_no_socketpair); 3026 3027 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags, 3028 bool kern) 3029 { 3030 return -EOPNOTSUPP; 3031 } 3032 EXPORT_SYMBOL(sock_no_accept); 3033 3034 int sock_no_getname(struct socket *sock, struct sockaddr *saddr, 3035 int peer) 3036 { 3037 return -EOPNOTSUPP; 3038 } 3039 EXPORT_SYMBOL(sock_no_getname); 3040 3041 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 3042 { 3043 return -EOPNOTSUPP; 3044 } 3045 EXPORT_SYMBOL(sock_no_ioctl); 3046 3047 int sock_no_listen(struct socket *sock, int backlog) 3048 { 3049 return -EOPNOTSUPP; 3050 } 3051 EXPORT_SYMBOL(sock_no_listen); 3052 3053 int sock_no_shutdown(struct socket *sock, int how) 3054 { 3055 return -EOPNOTSUPP; 3056 } 3057 EXPORT_SYMBOL(sock_no_shutdown); 3058 3059 int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len) 3060 { 3061 return -EOPNOTSUPP; 3062 } 3063 EXPORT_SYMBOL(sock_no_sendmsg); 3064 3065 int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *m, size_t len) 3066 { 3067 return -EOPNOTSUPP; 3068 } 3069 EXPORT_SYMBOL(sock_no_sendmsg_locked); 3070 3071 int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len, 3072 int flags) 3073 { 3074 return -EOPNOTSUPP; 3075 } 3076 EXPORT_SYMBOL(sock_no_recvmsg); 3077 3078 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma) 3079 { 3080 /* Mirror missing mmap method error code */ 3081 return -ENODEV; 3082 } 3083 EXPORT_SYMBOL(sock_no_mmap); 3084 3085 /* 3086 * When a file is received (via SCM_RIGHTS, etc), we must bump the 3087 * various sock-based usage counts. 3088 */ 3089 void __receive_sock(struct file *file) 3090 { 3091 struct socket *sock; 3092 3093 sock = sock_from_file(file); 3094 if (sock) { 3095 sock_update_netprioidx(&sock->sk->sk_cgrp_data); 3096 sock_update_classid(&sock->sk->sk_cgrp_data); 3097 } 3098 } 3099 3100 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) 3101 { 3102 ssize_t res; 3103 struct msghdr msg = {.msg_flags = flags}; 3104 struct kvec iov; 3105 char *kaddr = kmap(page); 3106 iov.iov_base = kaddr + offset; 3107 iov.iov_len = size; 3108 res = kernel_sendmsg(sock, &msg, &iov, 1, size); 3109 kunmap(page); 3110 return res; 3111 } 3112 EXPORT_SYMBOL(sock_no_sendpage); 3113 3114 ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page, 3115 int offset, size_t size, int flags) 3116 { 3117 ssize_t res; 3118 struct msghdr msg = {.msg_flags = flags}; 3119 struct kvec iov; 3120 char *kaddr = kmap(page); 3121 3122 iov.iov_base = kaddr + offset; 3123 iov.iov_len = size; 3124 res = kernel_sendmsg_locked(sk, &msg, &iov, 1, size); 3125 kunmap(page); 3126 return res; 3127 } 3128 EXPORT_SYMBOL(sock_no_sendpage_locked); 3129 3130 /* 3131 * Default Socket Callbacks 3132 */ 3133 3134 static void sock_def_wakeup(struct sock *sk) 3135 { 3136 struct socket_wq *wq; 3137 3138 rcu_read_lock(); 3139 wq = rcu_dereference(sk->sk_wq); 3140 if (skwq_has_sleeper(wq)) 3141 wake_up_interruptible_all(&wq->wait); 3142 rcu_read_unlock(); 3143 } 3144 3145 static void sock_def_error_report(struct sock *sk) 3146 { 3147 struct socket_wq *wq; 3148 3149 rcu_read_lock(); 3150 wq = rcu_dereference(sk->sk_wq); 3151 if (skwq_has_sleeper(wq)) 3152 wake_up_interruptible_poll(&wq->wait, EPOLLERR); 3153 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); 3154 rcu_read_unlock(); 3155 } 3156 3157 void sock_def_readable(struct sock *sk) 3158 { 3159 struct socket_wq *wq; 3160 3161 rcu_read_lock(); 3162 wq = rcu_dereference(sk->sk_wq); 3163 if (skwq_has_sleeper(wq)) 3164 wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI | 3165 EPOLLRDNORM | EPOLLRDBAND); 3166 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); 3167 rcu_read_unlock(); 3168 } 3169 3170 static void sock_def_write_space(struct sock *sk) 3171 { 3172 struct socket_wq *wq; 3173 3174 rcu_read_lock(); 3175 3176 /* Do not wake up a writer until he can make "significant" 3177 * progress. --DaveM 3178 */ 3179 if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= READ_ONCE(sk->sk_sndbuf)) { 3180 wq = rcu_dereference(sk->sk_wq); 3181 if (skwq_has_sleeper(wq)) 3182 wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT | 3183 EPOLLWRNORM | EPOLLWRBAND); 3184 3185 /* Should agree with poll, otherwise some programs break */ 3186 if (sock_writeable(sk)) 3187 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 3188 } 3189 3190 rcu_read_unlock(); 3191 } 3192 3193 static void sock_def_destruct(struct sock *sk) 3194 { 3195 } 3196 3197 void sk_send_sigurg(struct sock *sk) 3198 { 3199 if (sk->sk_socket && sk->sk_socket->file) 3200 if (send_sigurg(&sk->sk_socket->file->f_owner)) 3201 sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI); 3202 } 3203 EXPORT_SYMBOL(sk_send_sigurg); 3204 3205 void sk_reset_timer(struct sock *sk, struct timer_list* timer, 3206 unsigned long expires) 3207 { 3208 if (!mod_timer(timer, expires)) 3209 sock_hold(sk); 3210 } 3211 EXPORT_SYMBOL(sk_reset_timer); 3212 3213 void sk_stop_timer(struct sock *sk, struct timer_list* timer) 3214 { 3215 if (del_timer(timer)) 3216 __sock_put(sk); 3217 } 3218 EXPORT_SYMBOL(sk_stop_timer); 3219 3220 void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer) 3221 { 3222 if (del_timer_sync(timer)) 3223 __sock_put(sk); 3224 } 3225 EXPORT_SYMBOL(sk_stop_timer_sync); 3226 3227 void sock_init_data(struct socket *sock, struct sock *sk) 3228 { 3229 sk_init_common(sk); 3230 sk->sk_send_head = NULL; 3231 3232 timer_setup(&sk->sk_timer, NULL, 0); 3233 3234 sk->sk_allocation = GFP_KERNEL; 3235 sk->sk_rcvbuf = sysctl_rmem_default; 3236 sk->sk_sndbuf = sysctl_wmem_default; 3237 sk->sk_state = TCP_CLOSE; 3238 sk_set_socket(sk, sock); 3239 3240 sock_set_flag(sk, SOCK_ZAPPED); 3241 3242 if (sock) { 3243 sk->sk_type = sock->type; 3244 RCU_INIT_POINTER(sk->sk_wq, &sock->wq); 3245 sock->sk = sk; 3246 sk->sk_uid = SOCK_INODE(sock)->i_uid; 3247 } else { 3248 RCU_INIT_POINTER(sk->sk_wq, NULL); 3249 sk->sk_uid = make_kuid(sock_net(sk)->user_ns, 0); 3250 } 3251 3252 rwlock_init(&sk->sk_callback_lock); 3253 if (sk->sk_kern_sock) 3254 lockdep_set_class_and_name( 3255 &sk->sk_callback_lock, 3256 af_kern_callback_keys + sk->sk_family, 3257 af_family_kern_clock_key_strings[sk->sk_family]); 3258 else 3259 lockdep_set_class_and_name( 3260 &sk->sk_callback_lock, 3261 af_callback_keys + sk->sk_family, 3262 af_family_clock_key_strings[sk->sk_family]); 3263 3264 sk->sk_state_change = sock_def_wakeup; 3265 sk->sk_data_ready = sock_def_readable; 3266 sk->sk_write_space = sock_def_write_space; 3267 sk->sk_error_report = sock_def_error_report; 3268 sk->sk_destruct = sock_def_destruct; 3269 3270 sk->sk_frag.page = NULL; 3271 sk->sk_frag.offset = 0; 3272 sk->sk_peek_off = -1; 3273 3274 sk->sk_peer_pid = NULL; 3275 sk->sk_peer_cred = NULL; 3276 spin_lock_init(&sk->sk_peer_lock); 3277 3278 sk->sk_write_pending = 0; 3279 sk->sk_rcvlowat = 1; 3280 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; 3281 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; 3282 3283 sk->sk_stamp = SK_DEFAULT_STAMP; 3284 #if BITS_PER_LONG==32 3285 seqlock_init(&sk->sk_stamp_seq); 3286 #endif 3287 atomic_set(&sk->sk_zckey, 0); 3288 3289 #ifdef CONFIG_NET_RX_BUSY_POLL 3290 sk->sk_napi_id = 0; 3291 sk->sk_ll_usec = sysctl_net_busy_read; 3292 #endif 3293 3294 sk->sk_max_pacing_rate = ~0UL; 3295 sk->sk_pacing_rate = ~0UL; 3296 WRITE_ONCE(sk->sk_pacing_shift, 10); 3297 sk->sk_incoming_cpu = -1; 3298 sk->sk_txrehash = SOCK_TXREHASH_DEFAULT; 3299 3300 sk_rx_queue_clear(sk); 3301 /* 3302 * Before updating sk_refcnt, we must commit prior changes to memory 3303 * (Documentation/RCU/rculist_nulls.rst for details) 3304 */ 3305 smp_wmb(); 3306 refcount_set(&sk->sk_refcnt, 1); 3307 atomic_set(&sk->sk_drops, 0); 3308 } 3309 EXPORT_SYMBOL(sock_init_data); 3310 3311 void lock_sock_nested(struct sock *sk, int subclass) 3312 { 3313 /* The sk_lock has mutex_lock() semantics here. */ 3314 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); 3315 3316 might_sleep(); 3317 spin_lock_bh(&sk->sk_lock.slock); 3318 if (sock_owned_by_user_nocheck(sk)) 3319 __lock_sock(sk); 3320 sk->sk_lock.owned = 1; 3321 spin_unlock_bh(&sk->sk_lock.slock); 3322 } 3323 EXPORT_SYMBOL(lock_sock_nested); 3324 3325 void release_sock(struct sock *sk) 3326 { 3327 spin_lock_bh(&sk->sk_lock.slock); 3328 if (sk->sk_backlog.tail) 3329 __release_sock(sk); 3330 3331 /* Warning : release_cb() might need to release sk ownership, 3332 * ie call sock_release_ownership(sk) before us. 3333 */ 3334 if (sk->sk_prot->release_cb) 3335 sk->sk_prot->release_cb(sk); 3336 3337 sock_release_ownership(sk); 3338 if (waitqueue_active(&sk->sk_lock.wq)) 3339 wake_up(&sk->sk_lock.wq); 3340 spin_unlock_bh(&sk->sk_lock.slock); 3341 } 3342 EXPORT_SYMBOL(release_sock); 3343 3344 bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock) 3345 { 3346 might_sleep(); 3347 spin_lock_bh(&sk->sk_lock.slock); 3348 3349 if (!sock_owned_by_user_nocheck(sk)) { 3350 /* 3351 * Fast path return with bottom halves disabled and 3352 * sock::sk_lock.slock held. 3353 * 3354 * The 'mutex' is not contended and holding 3355 * sock::sk_lock.slock prevents all other lockers to 3356 * proceed so the corresponding unlock_sock_fast() can 3357 * avoid the slow path of release_sock() completely and 3358 * just release slock. 3359 * 3360 * From a semantical POV this is equivalent to 'acquiring' 3361 * the 'mutex', hence the corresponding lockdep 3362 * mutex_release() has to happen in the fast path of 3363 * unlock_sock_fast(). 3364 */ 3365 return false; 3366 } 3367 3368 __lock_sock(sk); 3369 sk->sk_lock.owned = 1; 3370 __acquire(&sk->sk_lock.slock); 3371 spin_unlock_bh(&sk->sk_lock.slock); 3372 return true; 3373 } 3374 EXPORT_SYMBOL(__lock_sock_fast); 3375 3376 int sock_gettstamp(struct socket *sock, void __user *userstamp, 3377 bool timeval, bool time32) 3378 { 3379 struct sock *sk = sock->sk; 3380 struct timespec64 ts; 3381 3382 sock_enable_timestamp(sk, SOCK_TIMESTAMP); 3383 ts = ktime_to_timespec64(sock_read_timestamp(sk)); 3384 if (ts.tv_sec == -1) 3385 return -ENOENT; 3386 if (ts.tv_sec == 0) { 3387 ktime_t kt = ktime_get_real(); 3388 sock_write_timestamp(sk, kt); 3389 ts = ktime_to_timespec64(kt); 3390 } 3391 3392 if (timeval) 3393 ts.tv_nsec /= 1000; 3394 3395 #ifdef CONFIG_COMPAT_32BIT_TIME 3396 if (time32) 3397 return put_old_timespec32(&ts, userstamp); 3398 #endif 3399 #ifdef CONFIG_SPARC64 3400 /* beware of padding in sparc64 timeval */ 3401 if (timeval && !in_compat_syscall()) { 3402 struct __kernel_old_timeval __user tv = { 3403 .tv_sec = ts.tv_sec, 3404 .tv_usec = ts.tv_nsec, 3405 }; 3406 if (copy_to_user(userstamp, &tv, sizeof(tv))) 3407 return -EFAULT; 3408 return 0; 3409 } 3410 #endif 3411 return put_timespec64(&ts, userstamp); 3412 } 3413 EXPORT_SYMBOL(sock_gettstamp); 3414 3415 void sock_enable_timestamp(struct sock *sk, enum sock_flags flag) 3416 { 3417 if (!sock_flag(sk, flag)) { 3418 unsigned long previous_flags = sk->sk_flags; 3419 3420 sock_set_flag(sk, flag); 3421 /* 3422 * we just set one of the two flags which require net 3423 * time stamping, but time stamping might have been on 3424 * already because of the other one 3425 */ 3426 if (sock_needs_netstamp(sk) && 3427 !(previous_flags & SK_FLAGS_TIMESTAMP)) 3428 net_enable_timestamp(); 3429 } 3430 } 3431 3432 int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, 3433 int level, int type) 3434 { 3435 struct sock_exterr_skb *serr; 3436 struct sk_buff *skb; 3437 int copied, err; 3438 3439 err = -EAGAIN; 3440 skb = sock_dequeue_err_skb(sk); 3441 if (skb == NULL) 3442 goto out; 3443 3444 copied = skb->len; 3445 if (copied > len) { 3446 msg->msg_flags |= MSG_TRUNC; 3447 copied = len; 3448 } 3449 err = skb_copy_datagram_msg(skb, 0, msg, copied); 3450 if (err) 3451 goto out_free_skb; 3452 3453 sock_recv_timestamp(msg, sk, skb); 3454 3455 serr = SKB_EXT_ERR(skb); 3456 put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee); 3457 3458 msg->msg_flags |= MSG_ERRQUEUE; 3459 err = copied; 3460 3461 out_free_skb: 3462 kfree_skb(skb); 3463 out: 3464 return err; 3465 } 3466 EXPORT_SYMBOL(sock_recv_errqueue); 3467 3468 /* 3469 * Get a socket option on an socket. 3470 * 3471 * FIX: POSIX 1003.1g is very ambiguous here. It states that 3472 * asynchronous errors should be reported by getsockopt. We assume 3473 * this means if you specify SO_ERROR (otherwise whats the point of it). 3474 */ 3475 int sock_common_getsockopt(struct socket *sock, int level, int optname, 3476 char __user *optval, int __user *optlen) 3477 { 3478 struct sock *sk = sock->sk; 3479 3480 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen); 3481 } 3482 EXPORT_SYMBOL(sock_common_getsockopt); 3483 3484 int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, 3485 int flags) 3486 { 3487 struct sock *sk = sock->sk; 3488 int addr_len = 0; 3489 int err; 3490 3491 err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT, 3492 flags & ~MSG_DONTWAIT, &addr_len); 3493 if (err >= 0) 3494 msg->msg_namelen = addr_len; 3495 return err; 3496 } 3497 EXPORT_SYMBOL(sock_common_recvmsg); 3498 3499 /* 3500 * Set socket options on an inet socket. 3501 */ 3502 int sock_common_setsockopt(struct socket *sock, int level, int optname, 3503 sockptr_t optval, unsigned int optlen) 3504 { 3505 struct sock *sk = sock->sk; 3506 3507 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen); 3508 } 3509 EXPORT_SYMBOL(sock_common_setsockopt); 3510 3511 void sk_common_release(struct sock *sk) 3512 { 3513 if (sk->sk_prot->destroy) 3514 sk->sk_prot->destroy(sk); 3515 3516 /* 3517 * Observation: when sk_common_release is called, processes have 3518 * no access to socket. But net still has. 3519 * Step one, detach it from networking: 3520 * 3521 * A. Remove from hash tables. 3522 */ 3523 3524 sk->sk_prot->unhash(sk); 3525 3526 /* 3527 * In this point socket cannot receive new packets, but it is possible 3528 * that some packets are in flight because some CPU runs receiver and 3529 * did hash table lookup before we unhashed socket. They will achieve 3530 * receive queue and will be purged by socket destructor. 3531 * 3532 * Also we still have packets pending on receive queue and probably, 3533 * our own packets waiting in device queues. sock_destroy will drain 3534 * receive queue, but transmitted packets will delay socket destruction 3535 * until the last reference will be released. 3536 */ 3537 3538 sock_orphan(sk); 3539 3540 xfrm_sk_free_policy(sk); 3541 3542 sk_refcnt_debug_release(sk); 3543 3544 sock_put(sk); 3545 } 3546 EXPORT_SYMBOL(sk_common_release); 3547 3548 void sk_get_meminfo(const struct sock *sk, u32 *mem) 3549 { 3550 memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS); 3551 3552 mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk); 3553 mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf); 3554 mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk); 3555 mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf); 3556 mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; 3557 mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued); 3558 mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); 3559 mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len); 3560 mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops); 3561 } 3562 3563 #ifdef CONFIG_PROC_FS 3564 static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR); 3565 3566 int sock_prot_inuse_get(struct net *net, struct proto *prot) 3567 { 3568 int cpu, idx = prot->inuse_idx; 3569 int res = 0; 3570 3571 for_each_possible_cpu(cpu) 3572 res += per_cpu_ptr(net->core.prot_inuse, cpu)->val[idx]; 3573 3574 return res >= 0 ? res : 0; 3575 } 3576 EXPORT_SYMBOL_GPL(sock_prot_inuse_get); 3577 3578 int sock_inuse_get(struct net *net) 3579 { 3580 int cpu, res = 0; 3581 3582 for_each_possible_cpu(cpu) 3583 res += per_cpu_ptr(net->core.prot_inuse, cpu)->all; 3584 3585 return res; 3586 } 3587 3588 EXPORT_SYMBOL_GPL(sock_inuse_get); 3589 3590 static int __net_init sock_inuse_init_net(struct net *net) 3591 { 3592 net->core.prot_inuse = alloc_percpu(struct prot_inuse); 3593 if (net->core.prot_inuse == NULL) 3594 return -ENOMEM; 3595 return 0; 3596 } 3597 3598 static void __net_exit sock_inuse_exit_net(struct net *net) 3599 { 3600 free_percpu(net->core.prot_inuse); 3601 } 3602 3603 static struct pernet_operations net_inuse_ops = { 3604 .init = sock_inuse_init_net, 3605 .exit = sock_inuse_exit_net, 3606 }; 3607 3608 static __init int net_inuse_init(void) 3609 { 3610 if (register_pernet_subsys(&net_inuse_ops)) 3611 panic("Cannot initialize net inuse counters"); 3612 3613 return 0; 3614 } 3615 3616 core_initcall(net_inuse_init); 3617 3618 static int assign_proto_idx(struct proto *prot) 3619 { 3620 prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR); 3621 3622 if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) { 3623 pr_err("PROTO_INUSE_NR exhausted\n"); 3624 return -ENOSPC; 3625 } 3626 3627 set_bit(prot->inuse_idx, proto_inuse_idx); 3628 return 0; 3629 } 3630 3631 static void release_proto_idx(struct proto *prot) 3632 { 3633 if (prot->inuse_idx != PROTO_INUSE_NR - 1) 3634 clear_bit(prot->inuse_idx, proto_inuse_idx); 3635 } 3636 #else 3637 static inline int assign_proto_idx(struct proto *prot) 3638 { 3639 return 0; 3640 } 3641 3642 static inline void release_proto_idx(struct proto *prot) 3643 { 3644 } 3645 3646 #endif 3647 3648 static void tw_prot_cleanup(struct timewait_sock_ops *twsk_prot) 3649 { 3650 if (!twsk_prot) 3651 return; 3652 kfree(twsk_prot->twsk_slab_name); 3653 twsk_prot->twsk_slab_name = NULL; 3654 kmem_cache_destroy(twsk_prot->twsk_slab); 3655 twsk_prot->twsk_slab = NULL; 3656 } 3657 3658 static int tw_prot_init(const struct proto *prot) 3659 { 3660 struct timewait_sock_ops *twsk_prot = prot->twsk_prot; 3661 3662 if (!twsk_prot) 3663 return 0; 3664 3665 twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", 3666 prot->name); 3667 if (!twsk_prot->twsk_slab_name) 3668 return -ENOMEM; 3669 3670 twsk_prot->twsk_slab = 3671 kmem_cache_create(twsk_prot->twsk_slab_name, 3672 twsk_prot->twsk_obj_size, 0, 3673 SLAB_ACCOUNT | prot->slab_flags, 3674 NULL); 3675 if (!twsk_prot->twsk_slab) { 3676 pr_crit("%s: Can't create timewait sock SLAB cache!\n", 3677 prot->name); 3678 return -ENOMEM; 3679 } 3680 3681 return 0; 3682 } 3683 3684 static void req_prot_cleanup(struct request_sock_ops *rsk_prot) 3685 { 3686 if (!rsk_prot) 3687 return; 3688 kfree(rsk_prot->slab_name); 3689 rsk_prot->slab_name = NULL; 3690 kmem_cache_destroy(rsk_prot->slab); 3691 rsk_prot->slab = NULL; 3692 } 3693 3694 static int req_prot_init(const struct proto *prot) 3695 { 3696 struct request_sock_ops *rsk_prot = prot->rsk_prot; 3697 3698 if (!rsk_prot) 3699 return 0; 3700 3701 rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", 3702 prot->name); 3703 if (!rsk_prot->slab_name) 3704 return -ENOMEM; 3705 3706 rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name, 3707 rsk_prot->obj_size, 0, 3708 SLAB_ACCOUNT | prot->slab_flags, 3709 NULL); 3710 3711 if (!rsk_prot->slab) { 3712 pr_crit("%s: Can't create request sock SLAB cache!\n", 3713 prot->name); 3714 return -ENOMEM; 3715 } 3716 return 0; 3717 } 3718 3719 int proto_register(struct proto *prot, int alloc_slab) 3720 { 3721 int ret = -ENOBUFS; 3722 3723 if (prot->memory_allocated && !prot->sysctl_mem) { 3724 pr_err("%s: missing sysctl_mem\n", prot->name); 3725 return -EINVAL; 3726 } 3727 if (alloc_slab) { 3728 prot->slab = kmem_cache_create_usercopy(prot->name, 3729 prot->obj_size, 0, 3730 SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT | 3731 prot->slab_flags, 3732 prot->useroffset, prot->usersize, 3733 NULL); 3734 3735 if (prot->slab == NULL) { 3736 pr_crit("%s: Can't create sock SLAB cache!\n", 3737 prot->name); 3738 goto out; 3739 } 3740 3741 if (req_prot_init(prot)) 3742 goto out_free_request_sock_slab; 3743 3744 if (tw_prot_init(prot)) 3745 goto out_free_timewait_sock_slab; 3746 } 3747 3748 mutex_lock(&proto_list_mutex); 3749 ret = assign_proto_idx(prot); 3750 if (ret) { 3751 mutex_unlock(&proto_list_mutex); 3752 goto out_free_timewait_sock_slab; 3753 } 3754 list_add(&prot->node, &proto_list); 3755 mutex_unlock(&proto_list_mutex); 3756 return ret; 3757 3758 out_free_timewait_sock_slab: 3759 if (alloc_slab) 3760 tw_prot_cleanup(prot->twsk_prot); 3761 out_free_request_sock_slab: 3762 if (alloc_slab) { 3763 req_prot_cleanup(prot->rsk_prot); 3764 3765 kmem_cache_destroy(prot->slab); 3766 prot->slab = NULL; 3767 } 3768 out: 3769 return ret; 3770 } 3771 EXPORT_SYMBOL(proto_register); 3772 3773 void proto_unregister(struct proto *prot) 3774 { 3775 mutex_lock(&proto_list_mutex); 3776 release_proto_idx(prot); 3777 list_del(&prot->node); 3778 mutex_unlock(&proto_list_mutex); 3779 3780 kmem_cache_destroy(prot->slab); 3781 prot->slab = NULL; 3782 3783 req_prot_cleanup(prot->rsk_prot); 3784 tw_prot_cleanup(prot->twsk_prot); 3785 } 3786 EXPORT_SYMBOL(proto_unregister); 3787 3788 int sock_load_diag_module(int family, int protocol) 3789 { 3790 if (!protocol) { 3791 if (!sock_is_registered(family)) 3792 return -ENOENT; 3793 3794 return request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, 3795 NETLINK_SOCK_DIAG, family); 3796 } 3797 3798 #ifdef CONFIG_INET 3799 if (family == AF_INET && 3800 protocol != IPPROTO_RAW && 3801 protocol < MAX_INET_PROTOS && 3802 !rcu_access_pointer(inet_protos[protocol])) 3803 return -ENOENT; 3804 #endif 3805 3806 return request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK, 3807 NETLINK_SOCK_DIAG, family, protocol); 3808 } 3809 EXPORT_SYMBOL(sock_load_diag_module); 3810 3811 #ifdef CONFIG_PROC_FS 3812 static void *proto_seq_start(struct seq_file *seq, loff_t *pos) 3813 __acquires(proto_list_mutex) 3814 { 3815 mutex_lock(&proto_list_mutex); 3816 return seq_list_start_head(&proto_list, *pos); 3817 } 3818 3819 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3820 { 3821 return seq_list_next(v, &proto_list, pos); 3822 } 3823 3824 static void proto_seq_stop(struct seq_file *seq, void *v) 3825 __releases(proto_list_mutex) 3826 { 3827 mutex_unlock(&proto_list_mutex); 3828 } 3829 3830 static char proto_method_implemented(const void *method) 3831 { 3832 return method == NULL ? 'n' : 'y'; 3833 } 3834 static long sock_prot_memory_allocated(struct proto *proto) 3835 { 3836 return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L; 3837 } 3838 3839 static const char *sock_prot_memory_pressure(struct proto *proto) 3840 { 3841 return proto->memory_pressure != NULL ? 3842 proto_memory_pressure(proto) ? "yes" : "no" : "NI"; 3843 } 3844 3845 static void proto_seq_printf(struct seq_file *seq, struct proto *proto) 3846 { 3847 3848 seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s " 3849 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n", 3850 proto->name, 3851 proto->obj_size, 3852 sock_prot_inuse_get(seq_file_net(seq), proto), 3853 sock_prot_memory_allocated(proto), 3854 sock_prot_memory_pressure(proto), 3855 proto->max_header, 3856 proto->slab == NULL ? "no" : "yes", 3857 module_name(proto->owner), 3858 proto_method_implemented(proto->close), 3859 proto_method_implemented(proto->connect), 3860 proto_method_implemented(proto->disconnect), 3861 proto_method_implemented(proto->accept), 3862 proto_method_implemented(proto->ioctl), 3863 proto_method_implemented(proto->init), 3864 proto_method_implemented(proto->destroy), 3865 proto_method_implemented(proto->shutdown), 3866 proto_method_implemented(proto->setsockopt), 3867 proto_method_implemented(proto->getsockopt), 3868 proto_method_implemented(proto->sendmsg), 3869 proto_method_implemented(proto->recvmsg), 3870 proto_method_implemented(proto->sendpage), 3871 proto_method_implemented(proto->bind), 3872 proto_method_implemented(proto->backlog_rcv), 3873 proto_method_implemented(proto->hash), 3874 proto_method_implemented(proto->unhash), 3875 proto_method_implemented(proto->get_port), 3876 proto_method_implemented(proto->enter_memory_pressure)); 3877 } 3878 3879 static int proto_seq_show(struct seq_file *seq, void *v) 3880 { 3881 if (v == &proto_list) 3882 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s", 3883 "protocol", 3884 "size", 3885 "sockets", 3886 "memory", 3887 "press", 3888 "maxhdr", 3889 "slab", 3890 "module", 3891 "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n"); 3892 else 3893 proto_seq_printf(seq, list_entry(v, struct proto, node)); 3894 return 0; 3895 } 3896 3897 static const struct seq_operations proto_seq_ops = { 3898 .start = proto_seq_start, 3899 .next = proto_seq_next, 3900 .stop = proto_seq_stop, 3901 .show = proto_seq_show, 3902 }; 3903 3904 static __net_init int proto_init_net(struct net *net) 3905 { 3906 if (!proc_create_net("protocols", 0444, net->proc_net, &proto_seq_ops, 3907 sizeof(struct seq_net_private))) 3908 return -ENOMEM; 3909 3910 return 0; 3911 } 3912 3913 static __net_exit void proto_exit_net(struct net *net) 3914 { 3915 remove_proc_entry("protocols", net->proc_net); 3916 } 3917 3918 3919 static __net_initdata struct pernet_operations proto_net_ops = { 3920 .init = proto_init_net, 3921 .exit = proto_exit_net, 3922 }; 3923 3924 static int __init proto_init(void) 3925 { 3926 return register_pernet_subsys(&proto_net_ops); 3927 } 3928 3929 subsys_initcall(proto_init); 3930 3931 #endif /* PROC_FS */ 3932 3933 #ifdef CONFIG_NET_RX_BUSY_POLL 3934 bool sk_busy_loop_end(void *p, unsigned long start_time) 3935 { 3936 struct sock *sk = p; 3937 3938 return !skb_queue_empty_lockless(&sk->sk_receive_queue) || 3939 sk_busy_loop_timeout(sk, start_time); 3940 } 3941 EXPORT_SYMBOL(sk_busy_loop_end); 3942 #endif /* CONFIG_NET_RX_BUSY_POLL */ 3943 3944 int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len) 3945 { 3946 if (!sk->sk_prot->bind_add) 3947 return -EOPNOTSUPP; 3948 return sk->sk_prot->bind_add(sk, addr, addr_len); 3949 } 3950 EXPORT_SYMBOL(sock_bind_add); 3951