1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * NET An implementation of the SOCKET network access protocol. 4 * 5 * Version: @(#)socket.c 1.1.93 18/02/95 6 * 7 * Authors: Orest Zborowski, <obz@Kodak.COM> 8 * Ross Biro 9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 10 * 11 * Fixes: 12 * Anonymous : NOTSOCK/BADF cleanup. Error fix in 13 * shutdown() 14 * Alan Cox : verify_area() fixes 15 * Alan Cox : Removed DDI 16 * Jonathan Kamens : SOCK_DGRAM reconnect bug 17 * Alan Cox : Moved a load of checks to the very 18 * top level. 19 * Alan Cox : Move address structures to/from user 20 * mode above the protocol layers. 21 * Rob Janssen : Allow 0 length sends. 22 * Alan Cox : Asynchronous I/O support (cribbed from the 23 * tty drivers). 24 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style) 25 * Jeff Uphoff : Made max number of sockets command-line 26 * configurable. 27 * Matti Aarnio : Made the number of sockets dynamic, 28 * to be allocated when needed, and mr. 29 * Uphoff's max is used as max to be 30 * allowed to allocate. 31 * Linus : Argh. removed all the socket allocation 32 * altogether: it's in the inode now. 33 * Alan Cox : Made sock_alloc()/sock_release() public 34 * for NetROM and future kernel nfsd type 35 * stuff. 36 * Alan Cox : sendmsg/recvmsg basics. 37 * Tom Dyas : Export net symbols. 38 * Marcin Dalecki : Fixed problems with CONFIG_NET="n". 39 * Alan Cox : Added thread locking to sys_* calls 40 * for sockets. May have errors at the 41 * moment. 42 * Kevin Buhr : Fixed the dumb errors in the above. 43 * Andi Kleen : Some small cleanups, optimizations, 44 * and fixed a copy_from_user() bug. 45 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0) 46 * Tigran Aivazian : Made listen(2) backlog sanity checks 47 * protocol-independent 48 * 49 * This module is effectively the top level interface to the BSD socket 50 * paradigm. 51 * 52 * Based upon Swansea University Computer Society NET3.039 53 */ 54 55 #include <linux/mm.h> 56 #include <linux/socket.h> 57 #include <linux/file.h> 58 #include <linux/net.h> 59 #include <linux/interrupt.h> 60 #include <linux/thread_info.h> 61 #include <linux/rcupdate.h> 62 #include <linux/netdevice.h> 63 #include <linux/proc_fs.h> 64 #include <linux/seq_file.h> 65 #include <linux/mutex.h> 66 #include <linux/if_bridge.h> 67 #include <linux/if_vlan.h> 68 #include <linux/ptp_classify.h> 69 #include <linux/init.h> 70 #include <linux/poll.h> 71 #include <linux/cache.h> 72 #include <linux/module.h> 73 #include <linux/highmem.h> 74 #include <linux/mount.h> 75 #include <linux/pseudo_fs.h> 76 #include <linux/security.h> 77 #include <linux/syscalls.h> 78 #include <linux/compat.h> 79 #include <linux/kmod.h> 80 #include <linux/audit.h> 81 #include <linux/wireless.h> 82 #include <linux/nsproxy.h> 83 #include <linux/magic.h> 84 #include <linux/slab.h> 85 #include <linux/xattr.h> 86 #include <linux/nospec.h> 87 #include <linux/indirect_call_wrapper.h> 88 89 #include <linux/uaccess.h> 90 #include <asm/unistd.h> 91 92 #include <net/compat.h> 93 #include <net/wext.h> 94 #include <net/cls_cgroup.h> 95 96 #include <net/sock.h> 97 #include <linux/netfilter.h> 98 99 #include <linux/if_tun.h> 100 #include <linux/ipv6_route.h> 101 #include <linux/route.h> 102 #include <linux/termios.h> 103 #include <linux/sockios.h> 104 #include <net/busy_poll.h> 105 #include <linux/errqueue.h> 106 107 #ifdef CONFIG_NET_RX_BUSY_POLL 108 unsigned int sysctl_net_busy_read __read_mostly; 109 unsigned int sysctl_net_busy_poll __read_mostly; 110 #endif 111 112 static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to); 113 static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from); 114 static int sock_mmap(struct file *file, struct vm_area_struct *vma); 115 116 static int sock_close(struct inode *inode, struct file *file); 117 static __poll_t sock_poll(struct file *file, 118 struct poll_table_struct *wait); 119 static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 120 #ifdef CONFIG_COMPAT 121 static long compat_sock_ioctl(struct file *file, 122 unsigned int cmd, unsigned long arg); 123 #endif 124 static int sock_fasync(int fd, struct file *filp, int on); 125 static ssize_t sock_sendpage(struct file *file, struct page *page, 126 int offset, size_t size, loff_t *ppos, int more); 127 static ssize_t sock_splice_read(struct file *file, loff_t *ppos, 128 struct pipe_inode_info *pipe, size_t len, 129 unsigned int flags); 130 131 #ifdef CONFIG_PROC_FS 132 static void sock_show_fdinfo(struct seq_file *m, struct file *f) 133 { 134 struct socket *sock = f->private_data; 135 136 if (sock->ops->show_fdinfo) 137 sock->ops->show_fdinfo(m, sock); 138 } 139 #else 140 #define sock_show_fdinfo NULL 141 #endif 142 143 /* 144 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear 145 * in the operation structures but are done directly via the socketcall() multiplexor. 146 */ 147 148 static const struct file_operations socket_file_ops = { 149 .owner = THIS_MODULE, 150 .llseek = no_llseek, 151 .read_iter = sock_read_iter, 152 .write_iter = sock_write_iter, 153 .poll = sock_poll, 154 .unlocked_ioctl = sock_ioctl, 155 #ifdef CONFIG_COMPAT 156 .compat_ioctl = compat_sock_ioctl, 157 #endif 158 .mmap = sock_mmap, 159 .release = sock_close, 160 .fasync = sock_fasync, 161 .sendpage = sock_sendpage, 162 .splice_write = generic_splice_sendpage, 163 .splice_read = sock_splice_read, 164 .show_fdinfo = sock_show_fdinfo, 165 }; 166 167 /* 168 * The protocol list. Each protocol is registered in here. 169 */ 170 171 static DEFINE_SPINLOCK(net_family_lock); 172 static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly; 173 174 /* 175 * Support routines. 176 * Move socket addresses back and forth across the kernel/user 177 * divide and look after the messy bits. 178 */ 179 180 /** 181 * move_addr_to_kernel - copy a socket address into kernel space 182 * @uaddr: Address in user space 183 * @kaddr: Address in kernel space 184 * @ulen: Length in user space 185 * 186 * The address is copied into kernel space. If the provided address is 187 * too long an error code of -EINVAL is returned. If the copy gives 188 * invalid addresses -EFAULT is returned. On a success 0 is returned. 189 */ 190 191 int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr) 192 { 193 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage)) 194 return -EINVAL; 195 if (ulen == 0) 196 return 0; 197 if (copy_from_user(kaddr, uaddr, ulen)) 198 return -EFAULT; 199 return audit_sockaddr(ulen, kaddr); 200 } 201 202 /** 203 * move_addr_to_user - copy an address to user space 204 * @kaddr: kernel space address 205 * @klen: length of address in kernel 206 * @uaddr: user space address 207 * @ulen: pointer to user length field 208 * 209 * The value pointed to by ulen on entry is the buffer length available. 210 * This is overwritten with the buffer space used. -EINVAL is returned 211 * if an overlong buffer is specified or a negative buffer size. -EFAULT 212 * is returned if either the buffer or the length field are not 213 * accessible. 214 * After copying the data up to the limit the user specifies, the true 215 * length of the data is written over the length limit the user 216 * specified. Zero is returned for a success. 217 */ 218 219 static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen, 220 void __user *uaddr, int __user *ulen) 221 { 222 int err; 223 int len; 224 225 BUG_ON(klen > sizeof(struct sockaddr_storage)); 226 err = get_user(len, ulen); 227 if (err) 228 return err; 229 if (len > klen) 230 len = klen; 231 if (len < 0) 232 return -EINVAL; 233 if (len) { 234 if (audit_sockaddr(klen, kaddr)) 235 return -ENOMEM; 236 if (copy_to_user(uaddr, kaddr, len)) 237 return -EFAULT; 238 } 239 /* 240 * "fromlen shall refer to the value before truncation.." 241 * 1003.1g 242 */ 243 return __put_user(klen, ulen); 244 } 245 246 static struct kmem_cache *sock_inode_cachep __ro_after_init; 247 248 static struct inode *sock_alloc_inode(struct super_block *sb) 249 { 250 struct socket_alloc *ei; 251 252 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); 253 if (!ei) 254 return NULL; 255 init_waitqueue_head(&ei->socket.wq.wait); 256 ei->socket.wq.fasync_list = NULL; 257 ei->socket.wq.flags = 0; 258 259 ei->socket.state = SS_UNCONNECTED; 260 ei->socket.flags = 0; 261 ei->socket.ops = NULL; 262 ei->socket.sk = NULL; 263 ei->socket.file = NULL; 264 265 return &ei->vfs_inode; 266 } 267 268 static void sock_free_inode(struct inode *inode) 269 { 270 struct socket_alloc *ei; 271 272 ei = container_of(inode, struct socket_alloc, vfs_inode); 273 kmem_cache_free(sock_inode_cachep, ei); 274 } 275 276 static void init_once(void *foo) 277 { 278 struct socket_alloc *ei = (struct socket_alloc *)foo; 279 280 inode_init_once(&ei->vfs_inode); 281 } 282 283 static void init_inodecache(void) 284 { 285 sock_inode_cachep = kmem_cache_create("sock_inode_cache", 286 sizeof(struct socket_alloc), 287 0, 288 (SLAB_HWCACHE_ALIGN | 289 SLAB_RECLAIM_ACCOUNT | 290 SLAB_MEM_SPREAD | SLAB_ACCOUNT), 291 init_once); 292 BUG_ON(sock_inode_cachep == NULL); 293 } 294 295 static const struct super_operations sockfs_ops = { 296 .alloc_inode = sock_alloc_inode, 297 .free_inode = sock_free_inode, 298 .statfs = simple_statfs, 299 }; 300 301 /* 302 * sockfs_dname() is called from d_path(). 303 */ 304 static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen) 305 { 306 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]", 307 d_inode(dentry)->i_ino); 308 } 309 310 static const struct dentry_operations sockfs_dentry_operations = { 311 .d_dname = sockfs_dname, 312 }; 313 314 static int sockfs_xattr_get(const struct xattr_handler *handler, 315 struct dentry *dentry, struct inode *inode, 316 const char *suffix, void *value, size_t size) 317 { 318 if (value) { 319 if (dentry->d_name.len + 1 > size) 320 return -ERANGE; 321 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1); 322 } 323 return dentry->d_name.len + 1; 324 } 325 326 #define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname" 327 #define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX) 328 #define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1) 329 330 static const struct xattr_handler sockfs_xattr_handler = { 331 .name = XATTR_NAME_SOCKPROTONAME, 332 .get = sockfs_xattr_get, 333 }; 334 335 static int sockfs_security_xattr_set(const struct xattr_handler *handler, 336 struct dentry *dentry, struct inode *inode, 337 const char *suffix, const void *value, 338 size_t size, int flags) 339 { 340 /* Handled by LSM. */ 341 return -EAGAIN; 342 } 343 344 static const struct xattr_handler sockfs_security_xattr_handler = { 345 .prefix = XATTR_SECURITY_PREFIX, 346 .set = sockfs_security_xattr_set, 347 }; 348 349 static const struct xattr_handler *sockfs_xattr_handlers[] = { 350 &sockfs_xattr_handler, 351 &sockfs_security_xattr_handler, 352 NULL 353 }; 354 355 static int sockfs_init_fs_context(struct fs_context *fc) 356 { 357 struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC); 358 if (!ctx) 359 return -ENOMEM; 360 ctx->ops = &sockfs_ops; 361 ctx->dops = &sockfs_dentry_operations; 362 ctx->xattr = sockfs_xattr_handlers; 363 return 0; 364 } 365 366 static struct vfsmount *sock_mnt __read_mostly; 367 368 static struct file_system_type sock_fs_type = { 369 .name = "sockfs", 370 .init_fs_context = sockfs_init_fs_context, 371 .kill_sb = kill_anon_super, 372 }; 373 374 /* 375 * Obtains the first available file descriptor and sets it up for use. 376 * 377 * These functions create file structures and maps them to fd space 378 * of the current process. On success it returns file descriptor 379 * and file struct implicitly stored in sock->file. 380 * Note that another thread may close file descriptor before we return 381 * from this function. We use the fact that now we do not refer 382 * to socket after mapping. If one day we will need it, this 383 * function will increment ref. count on file by 1. 384 * 385 * In any case returned fd MAY BE not valid! 386 * This race condition is unavoidable 387 * with shared fd spaces, we cannot solve it inside kernel, 388 * but we take care of internal coherence yet. 389 */ 390 391 /** 392 * sock_alloc_file - Bind a &socket to a &file 393 * @sock: socket 394 * @flags: file status flags 395 * @dname: protocol name 396 * 397 * Returns the &file bound with @sock, implicitly storing it 398 * in sock->file. If dname is %NULL, sets to "". 399 * On failure the return is a ERR pointer (see linux/err.h). 400 * This function uses GFP_KERNEL internally. 401 */ 402 403 struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname) 404 { 405 struct file *file; 406 407 if (!dname) 408 dname = sock->sk ? sock->sk->sk_prot_creator->name : ""; 409 410 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname, 411 O_RDWR | (flags & O_NONBLOCK), 412 &socket_file_ops); 413 if (IS_ERR(file)) { 414 sock_release(sock); 415 return file; 416 } 417 418 sock->file = file; 419 file->private_data = sock; 420 stream_open(SOCK_INODE(sock), file); 421 return file; 422 } 423 EXPORT_SYMBOL(sock_alloc_file); 424 425 static int sock_map_fd(struct socket *sock, int flags) 426 { 427 struct file *newfile; 428 int fd = get_unused_fd_flags(flags); 429 if (unlikely(fd < 0)) { 430 sock_release(sock); 431 return fd; 432 } 433 434 newfile = sock_alloc_file(sock, flags, NULL); 435 if (!IS_ERR(newfile)) { 436 fd_install(fd, newfile); 437 return fd; 438 } 439 440 put_unused_fd(fd); 441 return PTR_ERR(newfile); 442 } 443 444 /** 445 * sock_from_file - Return the &socket bounded to @file. 446 * @file: file 447 * @err: pointer to an error code return 448 * 449 * On failure returns %NULL and assigns -ENOTSOCK to @err. 450 */ 451 452 struct socket *sock_from_file(struct file *file, int *err) 453 { 454 if (file->f_op == &socket_file_ops) 455 return file->private_data; /* set in sock_map_fd */ 456 457 *err = -ENOTSOCK; 458 return NULL; 459 } 460 EXPORT_SYMBOL(sock_from_file); 461 462 /** 463 * sockfd_lookup - Go from a file number to its socket slot 464 * @fd: file handle 465 * @err: pointer to an error code return 466 * 467 * The file handle passed in is locked and the socket it is bound 468 * to is returned. If an error occurs the err pointer is overwritten 469 * with a negative errno code and NULL is returned. The function checks 470 * for both invalid handles and passing a handle which is not a socket. 471 * 472 * On a success the socket object pointer is returned. 473 */ 474 475 struct socket *sockfd_lookup(int fd, int *err) 476 { 477 struct file *file; 478 struct socket *sock; 479 480 file = fget(fd); 481 if (!file) { 482 *err = -EBADF; 483 return NULL; 484 } 485 486 sock = sock_from_file(file, err); 487 if (!sock) 488 fput(file); 489 return sock; 490 } 491 EXPORT_SYMBOL(sockfd_lookup); 492 493 static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) 494 { 495 struct fd f = fdget(fd); 496 struct socket *sock; 497 498 *err = -EBADF; 499 if (f.file) { 500 sock = sock_from_file(f.file, err); 501 if (likely(sock)) { 502 *fput_needed = f.flags & FDPUT_FPUT; 503 return sock; 504 } 505 fdput(f); 506 } 507 return NULL; 508 } 509 510 static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer, 511 size_t size) 512 { 513 ssize_t len; 514 ssize_t used = 0; 515 516 len = security_inode_listsecurity(d_inode(dentry), buffer, size); 517 if (len < 0) 518 return len; 519 used += len; 520 if (buffer) { 521 if (size < used) 522 return -ERANGE; 523 buffer += len; 524 } 525 526 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1); 527 used += len; 528 if (buffer) { 529 if (size < used) 530 return -ERANGE; 531 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len); 532 buffer += len; 533 } 534 535 return used; 536 } 537 538 static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr) 539 { 540 int err = simple_setattr(dentry, iattr); 541 542 if (!err && (iattr->ia_valid & ATTR_UID)) { 543 struct socket *sock = SOCKET_I(d_inode(dentry)); 544 545 if (sock->sk) 546 sock->sk->sk_uid = iattr->ia_uid; 547 else 548 err = -ENOENT; 549 } 550 551 return err; 552 } 553 554 static const struct inode_operations sockfs_inode_ops = { 555 .listxattr = sockfs_listxattr, 556 .setattr = sockfs_setattr, 557 }; 558 559 /** 560 * sock_alloc - allocate a socket 561 * 562 * Allocate a new inode and socket object. The two are bound together 563 * and initialised. The socket is then returned. If we are out of inodes 564 * NULL is returned. This functions uses GFP_KERNEL internally. 565 */ 566 567 struct socket *sock_alloc(void) 568 { 569 struct inode *inode; 570 struct socket *sock; 571 572 inode = new_inode_pseudo(sock_mnt->mnt_sb); 573 if (!inode) 574 return NULL; 575 576 sock = SOCKET_I(inode); 577 578 inode->i_ino = get_next_ino(); 579 inode->i_mode = S_IFSOCK | S_IRWXUGO; 580 inode->i_uid = current_fsuid(); 581 inode->i_gid = current_fsgid(); 582 inode->i_op = &sockfs_inode_ops; 583 584 return sock; 585 } 586 EXPORT_SYMBOL(sock_alloc); 587 588 static void __sock_release(struct socket *sock, struct inode *inode) 589 { 590 if (sock->ops) { 591 struct module *owner = sock->ops->owner; 592 593 if (inode) 594 inode_lock(inode); 595 sock->ops->release(sock); 596 sock->sk = NULL; 597 if (inode) 598 inode_unlock(inode); 599 sock->ops = NULL; 600 module_put(owner); 601 } 602 603 if (sock->wq.fasync_list) 604 pr_err("%s: fasync list not empty!\n", __func__); 605 606 if (!sock->file) { 607 iput(SOCK_INODE(sock)); 608 return; 609 } 610 sock->file = NULL; 611 } 612 613 /** 614 * sock_release - close a socket 615 * @sock: socket to close 616 * 617 * The socket is released from the protocol stack if it has a release 618 * callback, and the inode is then released if the socket is bound to 619 * an inode not a file. 620 */ 621 void sock_release(struct socket *sock) 622 { 623 __sock_release(sock, NULL); 624 } 625 EXPORT_SYMBOL(sock_release); 626 627 void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags) 628 { 629 u8 flags = *tx_flags; 630 631 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE) 632 flags |= SKBTX_HW_TSTAMP; 633 634 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE) 635 flags |= SKBTX_SW_TSTAMP; 636 637 if (tsflags & SOF_TIMESTAMPING_TX_SCHED) 638 flags |= SKBTX_SCHED_TSTAMP; 639 640 *tx_flags = flags; 641 } 642 EXPORT_SYMBOL(__sock_tx_timestamp); 643 644 INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *, 645 size_t)); 646 INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *, 647 size_t)); 648 static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg) 649 { 650 int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg, 651 inet_sendmsg, sock, msg, 652 msg_data_left(msg)); 653 BUG_ON(ret == -EIOCBQUEUED); 654 return ret; 655 } 656 657 /** 658 * sock_sendmsg - send a message through @sock 659 * @sock: socket 660 * @msg: message to send 661 * 662 * Sends @msg through @sock, passing through LSM. 663 * Returns the number of bytes sent, or an error code. 664 */ 665 int sock_sendmsg(struct socket *sock, struct msghdr *msg) 666 { 667 int err = security_socket_sendmsg(sock, msg, 668 msg_data_left(msg)); 669 670 return err ?: sock_sendmsg_nosec(sock, msg); 671 } 672 EXPORT_SYMBOL(sock_sendmsg); 673 674 /** 675 * kernel_sendmsg - send a message through @sock (kernel-space) 676 * @sock: socket 677 * @msg: message header 678 * @vec: kernel vec 679 * @num: vec array length 680 * @size: total message data size 681 * 682 * Builds the message data with @vec and sends it through @sock. 683 * Returns the number of bytes sent, or an error code. 684 */ 685 686 int kernel_sendmsg(struct socket *sock, struct msghdr *msg, 687 struct kvec *vec, size_t num, size_t size) 688 { 689 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size); 690 return sock_sendmsg(sock, msg); 691 } 692 EXPORT_SYMBOL(kernel_sendmsg); 693 694 /** 695 * kernel_sendmsg_locked - send a message through @sock (kernel-space) 696 * @sk: sock 697 * @msg: message header 698 * @vec: output s/g array 699 * @num: output s/g array length 700 * @size: total message data size 701 * 702 * Builds the message data with @vec and sends it through @sock. 703 * Returns the number of bytes sent, or an error code. 704 * Caller must hold @sk. 705 */ 706 707 int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg, 708 struct kvec *vec, size_t num, size_t size) 709 { 710 struct socket *sock = sk->sk_socket; 711 712 if (!sock->ops->sendmsg_locked) 713 return sock_no_sendmsg_locked(sk, msg, size); 714 715 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size); 716 717 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg)); 718 } 719 EXPORT_SYMBOL(kernel_sendmsg_locked); 720 721 static bool skb_is_err_queue(const struct sk_buff *skb) 722 { 723 /* pkt_type of skbs enqueued on the error queue are set to 724 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do 725 * in recvmsg, since skbs received on a local socket will never 726 * have a pkt_type of PACKET_OUTGOING. 727 */ 728 return skb->pkt_type == PACKET_OUTGOING; 729 } 730 731 /* On transmit, software and hardware timestamps are returned independently. 732 * As the two skb clones share the hardware timestamp, which may be updated 733 * before the software timestamp is received, a hardware TX timestamp may be 734 * returned only if there is no software TX timestamp. Ignore false software 735 * timestamps, which may be made in the __sock_recv_timestamp() call when the 736 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a 737 * hardware timestamp. 738 */ 739 static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp) 740 { 741 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb); 742 } 743 744 static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb) 745 { 746 struct scm_ts_pktinfo ts_pktinfo; 747 struct net_device *orig_dev; 748 749 if (!skb_mac_header_was_set(skb)) 750 return; 751 752 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo)); 753 754 rcu_read_lock(); 755 orig_dev = dev_get_by_napi_id(skb_napi_id(skb)); 756 if (orig_dev) 757 ts_pktinfo.if_index = orig_dev->ifindex; 758 rcu_read_unlock(); 759 760 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb); 761 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO, 762 sizeof(ts_pktinfo), &ts_pktinfo); 763 } 764 765 /* 766 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP) 767 */ 768 void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, 769 struct sk_buff *skb) 770 { 771 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP); 772 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW); 773 struct scm_timestamping_internal tss; 774 775 int empty = 1, false_tstamp = 0; 776 struct skb_shared_hwtstamps *shhwtstamps = 777 skb_hwtstamps(skb); 778 779 /* Race occurred between timestamp enabling and packet 780 receiving. Fill in the current time for now. */ 781 if (need_software_tstamp && skb->tstamp == 0) { 782 __net_timestamp(skb); 783 false_tstamp = 1; 784 } 785 786 if (need_software_tstamp) { 787 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) { 788 if (new_tstamp) { 789 struct __kernel_sock_timeval tv; 790 791 skb_get_new_timestamp(skb, &tv); 792 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW, 793 sizeof(tv), &tv); 794 } else { 795 struct __kernel_old_timeval tv; 796 797 skb_get_timestamp(skb, &tv); 798 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD, 799 sizeof(tv), &tv); 800 } 801 } else { 802 if (new_tstamp) { 803 struct __kernel_timespec ts; 804 805 skb_get_new_timestampns(skb, &ts); 806 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW, 807 sizeof(ts), &ts); 808 } else { 809 struct __kernel_old_timespec ts; 810 811 skb_get_timestampns(skb, &ts); 812 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD, 813 sizeof(ts), &ts); 814 } 815 } 816 } 817 818 memset(&tss, 0, sizeof(tss)); 819 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) && 820 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0)) 821 empty = 0; 822 if (shhwtstamps && 823 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) && 824 !skb_is_swtx_tstamp(skb, false_tstamp) && 825 ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) { 826 empty = 0; 827 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) && 828 !skb_is_err_queue(skb)) 829 put_ts_pktinfo(msg, skb); 830 } 831 if (!empty) { 832 if (sock_flag(sk, SOCK_TSTAMP_NEW)) 833 put_cmsg_scm_timestamping64(msg, &tss); 834 else 835 put_cmsg_scm_timestamping(msg, &tss); 836 837 if (skb_is_err_queue(skb) && skb->len && 838 SKB_EXT_ERR(skb)->opt_stats) 839 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS, 840 skb->len, skb->data); 841 } 842 } 843 EXPORT_SYMBOL_GPL(__sock_recv_timestamp); 844 845 void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk, 846 struct sk_buff *skb) 847 { 848 int ack; 849 850 if (!sock_flag(sk, SOCK_WIFI_STATUS)) 851 return; 852 if (!skb->wifi_acked_valid) 853 return; 854 855 ack = skb->wifi_acked; 856 857 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack); 858 } 859 EXPORT_SYMBOL_GPL(__sock_recv_wifi_status); 860 861 static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, 862 struct sk_buff *skb) 863 { 864 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount) 865 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL, 866 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount); 867 } 868 869 void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, 870 struct sk_buff *skb) 871 { 872 sock_recv_timestamp(msg, sk, skb); 873 sock_recv_drops(msg, sk, skb); 874 } 875 EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); 876 877 INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *, 878 size_t, int)); 879 INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *, 880 size_t, int)); 881 static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, 882 int flags) 883 { 884 return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg, 885 inet_recvmsg, sock, msg, msg_data_left(msg), 886 flags); 887 } 888 889 /** 890 * sock_recvmsg - receive a message from @sock 891 * @sock: socket 892 * @msg: message to receive 893 * @flags: message flags 894 * 895 * Receives @msg from @sock, passing through LSM. Returns the total number 896 * of bytes received, or an error. 897 */ 898 int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags) 899 { 900 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags); 901 902 return err ?: sock_recvmsg_nosec(sock, msg, flags); 903 } 904 EXPORT_SYMBOL(sock_recvmsg); 905 906 /** 907 * kernel_recvmsg - Receive a message from a socket (kernel space) 908 * @sock: The socket to receive the message from 909 * @msg: Received message 910 * @vec: Input s/g array for message data 911 * @num: Size of input s/g array 912 * @size: Number of bytes to read 913 * @flags: Message flags (MSG_DONTWAIT, etc...) 914 * 915 * On return the msg structure contains the scatter/gather array passed in the 916 * vec argument. The array is modified so that it consists of the unfilled 917 * portion of the original array. 918 * 919 * The returned value is the total number of bytes received, or an error. 920 */ 921 922 int kernel_recvmsg(struct socket *sock, struct msghdr *msg, 923 struct kvec *vec, size_t num, size_t size, int flags) 924 { 925 msg->msg_control_is_user = false; 926 iov_iter_kvec(&msg->msg_iter, READ, vec, num, size); 927 return sock_recvmsg(sock, msg, flags); 928 } 929 EXPORT_SYMBOL(kernel_recvmsg); 930 931 static ssize_t sock_sendpage(struct file *file, struct page *page, 932 int offset, size_t size, loff_t *ppos, int more) 933 { 934 struct socket *sock; 935 int flags; 936 937 sock = file->private_data; 938 939 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; 940 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */ 941 flags |= more; 942 943 return kernel_sendpage(sock, page, offset, size, flags); 944 } 945 946 static ssize_t sock_splice_read(struct file *file, loff_t *ppos, 947 struct pipe_inode_info *pipe, size_t len, 948 unsigned int flags) 949 { 950 struct socket *sock = file->private_data; 951 952 if (unlikely(!sock->ops->splice_read)) 953 return generic_file_splice_read(file, ppos, pipe, len, flags); 954 955 return sock->ops->splice_read(sock, ppos, pipe, len, flags); 956 } 957 958 static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to) 959 { 960 struct file *file = iocb->ki_filp; 961 struct socket *sock = file->private_data; 962 struct msghdr msg = {.msg_iter = *to, 963 .msg_iocb = iocb}; 964 ssize_t res; 965 966 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT)) 967 msg.msg_flags = MSG_DONTWAIT; 968 969 if (iocb->ki_pos != 0) 970 return -ESPIPE; 971 972 if (!iov_iter_count(to)) /* Match SYS5 behaviour */ 973 return 0; 974 975 res = sock_recvmsg(sock, &msg, msg.msg_flags); 976 *to = msg.msg_iter; 977 return res; 978 } 979 980 static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from) 981 { 982 struct file *file = iocb->ki_filp; 983 struct socket *sock = file->private_data; 984 struct msghdr msg = {.msg_iter = *from, 985 .msg_iocb = iocb}; 986 ssize_t res; 987 988 if (iocb->ki_pos != 0) 989 return -ESPIPE; 990 991 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT)) 992 msg.msg_flags = MSG_DONTWAIT; 993 994 if (sock->type == SOCK_SEQPACKET) 995 msg.msg_flags |= MSG_EOR; 996 997 res = sock_sendmsg(sock, &msg); 998 *from = msg.msg_iter; 999 return res; 1000 } 1001 1002 /* 1003 * Atomic setting of ioctl hooks to avoid race 1004 * with module unload. 1005 */ 1006 1007 static DEFINE_MUTEX(br_ioctl_mutex); 1008 static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg); 1009 1010 void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) 1011 { 1012 mutex_lock(&br_ioctl_mutex); 1013 br_ioctl_hook = hook; 1014 mutex_unlock(&br_ioctl_mutex); 1015 } 1016 EXPORT_SYMBOL(brioctl_set); 1017 1018 static DEFINE_MUTEX(vlan_ioctl_mutex); 1019 static int (*vlan_ioctl_hook) (struct net *, void __user *arg); 1020 1021 void vlan_ioctl_set(int (*hook) (struct net *, void __user *)) 1022 { 1023 mutex_lock(&vlan_ioctl_mutex); 1024 vlan_ioctl_hook = hook; 1025 mutex_unlock(&vlan_ioctl_mutex); 1026 } 1027 EXPORT_SYMBOL(vlan_ioctl_set); 1028 1029 static long sock_do_ioctl(struct net *net, struct socket *sock, 1030 unsigned int cmd, unsigned long arg) 1031 { 1032 int err; 1033 void __user *argp = (void __user *)arg; 1034 1035 err = sock->ops->ioctl(sock, cmd, arg); 1036 1037 /* 1038 * If this ioctl is unknown try to hand it down 1039 * to the NIC driver. 1040 */ 1041 if (err != -ENOIOCTLCMD) 1042 return err; 1043 1044 if (cmd == SIOCGIFCONF) { 1045 struct ifconf ifc; 1046 if (copy_from_user(&ifc, argp, sizeof(struct ifconf))) 1047 return -EFAULT; 1048 rtnl_lock(); 1049 err = dev_ifconf(net, &ifc, sizeof(struct ifreq)); 1050 rtnl_unlock(); 1051 if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf))) 1052 err = -EFAULT; 1053 } else { 1054 struct ifreq ifr; 1055 bool need_copyout; 1056 if (copy_from_user(&ifr, argp, sizeof(struct ifreq))) 1057 return -EFAULT; 1058 err = dev_ioctl(net, cmd, &ifr, &need_copyout); 1059 if (!err && need_copyout) 1060 if (copy_to_user(argp, &ifr, sizeof(struct ifreq))) 1061 return -EFAULT; 1062 } 1063 return err; 1064 } 1065 1066 /* 1067 * With an ioctl, arg may well be a user mode pointer, but we don't know 1068 * what to do with it - that's up to the protocol still. 1069 */ 1070 1071 /** 1072 * get_net_ns - increment the refcount of the network namespace 1073 * @ns: common namespace (net) 1074 * 1075 * Returns the net's common namespace. 1076 */ 1077 1078 struct ns_common *get_net_ns(struct ns_common *ns) 1079 { 1080 return &get_net(container_of(ns, struct net, ns))->ns; 1081 } 1082 EXPORT_SYMBOL_GPL(get_net_ns); 1083 1084 static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) 1085 { 1086 struct socket *sock; 1087 struct sock *sk; 1088 void __user *argp = (void __user *)arg; 1089 int pid, err; 1090 struct net *net; 1091 1092 sock = file->private_data; 1093 sk = sock->sk; 1094 net = sock_net(sk); 1095 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) { 1096 struct ifreq ifr; 1097 bool need_copyout; 1098 if (copy_from_user(&ifr, argp, sizeof(struct ifreq))) 1099 return -EFAULT; 1100 err = dev_ioctl(net, cmd, &ifr, &need_copyout); 1101 if (!err && need_copyout) 1102 if (copy_to_user(argp, &ifr, sizeof(struct ifreq))) 1103 return -EFAULT; 1104 } else 1105 #ifdef CONFIG_WEXT_CORE 1106 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { 1107 err = wext_handle_ioctl(net, cmd, argp); 1108 } else 1109 #endif 1110 switch (cmd) { 1111 case FIOSETOWN: 1112 case SIOCSPGRP: 1113 err = -EFAULT; 1114 if (get_user(pid, (int __user *)argp)) 1115 break; 1116 err = f_setown(sock->file, pid, 1); 1117 break; 1118 case FIOGETOWN: 1119 case SIOCGPGRP: 1120 err = put_user(f_getown(sock->file), 1121 (int __user *)argp); 1122 break; 1123 case SIOCGIFBR: 1124 case SIOCSIFBR: 1125 case SIOCBRADDBR: 1126 case SIOCBRDELBR: 1127 err = -ENOPKG; 1128 if (!br_ioctl_hook) 1129 request_module("bridge"); 1130 1131 mutex_lock(&br_ioctl_mutex); 1132 if (br_ioctl_hook) 1133 err = br_ioctl_hook(net, cmd, argp); 1134 mutex_unlock(&br_ioctl_mutex); 1135 break; 1136 case SIOCGIFVLAN: 1137 case SIOCSIFVLAN: 1138 err = -ENOPKG; 1139 if (!vlan_ioctl_hook) 1140 request_module("8021q"); 1141 1142 mutex_lock(&vlan_ioctl_mutex); 1143 if (vlan_ioctl_hook) 1144 err = vlan_ioctl_hook(net, argp); 1145 mutex_unlock(&vlan_ioctl_mutex); 1146 break; 1147 case SIOCGSKNS: 1148 err = -EPERM; 1149 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 1150 break; 1151 1152 err = open_related_ns(&net->ns, get_net_ns); 1153 break; 1154 case SIOCGSTAMP_OLD: 1155 case SIOCGSTAMPNS_OLD: 1156 if (!sock->ops->gettstamp) { 1157 err = -ENOIOCTLCMD; 1158 break; 1159 } 1160 err = sock->ops->gettstamp(sock, argp, 1161 cmd == SIOCGSTAMP_OLD, 1162 !IS_ENABLED(CONFIG_64BIT)); 1163 break; 1164 case SIOCGSTAMP_NEW: 1165 case SIOCGSTAMPNS_NEW: 1166 if (!sock->ops->gettstamp) { 1167 err = -ENOIOCTLCMD; 1168 break; 1169 } 1170 err = sock->ops->gettstamp(sock, argp, 1171 cmd == SIOCGSTAMP_NEW, 1172 false); 1173 break; 1174 default: 1175 err = sock_do_ioctl(net, sock, cmd, arg); 1176 break; 1177 } 1178 return err; 1179 } 1180 1181 /** 1182 * sock_create_lite - creates a socket 1183 * @family: protocol family (AF_INET, ...) 1184 * @type: communication type (SOCK_STREAM, ...) 1185 * @protocol: protocol (0, ...) 1186 * @res: new socket 1187 * 1188 * Creates a new socket and assigns it to @res, passing through LSM. 1189 * The new socket initialization is not complete, see kernel_accept(). 1190 * Returns 0 or an error. On failure @res is set to %NULL. 1191 * This function internally uses GFP_KERNEL. 1192 */ 1193 1194 int sock_create_lite(int family, int type, int protocol, struct socket **res) 1195 { 1196 int err; 1197 struct socket *sock = NULL; 1198 1199 err = security_socket_create(family, type, protocol, 1); 1200 if (err) 1201 goto out; 1202 1203 sock = sock_alloc(); 1204 if (!sock) { 1205 err = -ENOMEM; 1206 goto out; 1207 } 1208 1209 sock->type = type; 1210 err = security_socket_post_create(sock, family, type, protocol, 1); 1211 if (err) 1212 goto out_release; 1213 1214 out: 1215 *res = sock; 1216 return err; 1217 out_release: 1218 sock_release(sock); 1219 sock = NULL; 1220 goto out; 1221 } 1222 EXPORT_SYMBOL(sock_create_lite); 1223 1224 /* No kernel lock held - perfect */ 1225 static __poll_t sock_poll(struct file *file, poll_table *wait) 1226 { 1227 struct socket *sock = file->private_data; 1228 __poll_t events = poll_requested_events(wait), flag = 0; 1229 1230 if (!sock->ops->poll) 1231 return 0; 1232 1233 if (sk_can_busy_loop(sock->sk)) { 1234 /* poll once if requested by the syscall */ 1235 if (events & POLL_BUSY_LOOP) 1236 sk_busy_loop(sock->sk, 1); 1237 1238 /* if this socket can poll_ll, tell the system call */ 1239 flag = POLL_BUSY_LOOP; 1240 } 1241 1242 return sock->ops->poll(file, sock, wait) | flag; 1243 } 1244 1245 static int sock_mmap(struct file *file, struct vm_area_struct *vma) 1246 { 1247 struct socket *sock = file->private_data; 1248 1249 return sock->ops->mmap(file, sock, vma); 1250 } 1251 1252 static int sock_close(struct inode *inode, struct file *filp) 1253 { 1254 __sock_release(SOCKET_I(inode), inode); 1255 return 0; 1256 } 1257 1258 /* 1259 * Update the socket async list 1260 * 1261 * Fasync_list locking strategy. 1262 * 1263 * 1. fasync_list is modified only under process context socket lock 1264 * i.e. under semaphore. 1265 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock) 1266 * or under socket lock 1267 */ 1268 1269 static int sock_fasync(int fd, struct file *filp, int on) 1270 { 1271 struct socket *sock = filp->private_data; 1272 struct sock *sk = sock->sk; 1273 struct socket_wq *wq = &sock->wq; 1274 1275 if (sk == NULL) 1276 return -EINVAL; 1277 1278 lock_sock(sk); 1279 fasync_helper(fd, filp, on, &wq->fasync_list); 1280 1281 if (!wq->fasync_list) 1282 sock_reset_flag(sk, SOCK_FASYNC); 1283 else 1284 sock_set_flag(sk, SOCK_FASYNC); 1285 1286 release_sock(sk); 1287 return 0; 1288 } 1289 1290 /* This function may be called only under rcu_lock */ 1291 1292 int sock_wake_async(struct socket_wq *wq, int how, int band) 1293 { 1294 if (!wq || !wq->fasync_list) 1295 return -1; 1296 1297 switch (how) { 1298 case SOCK_WAKE_WAITD: 1299 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags)) 1300 break; 1301 goto call_kill; 1302 case SOCK_WAKE_SPACE: 1303 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags)) 1304 break; 1305 fallthrough; 1306 case SOCK_WAKE_IO: 1307 call_kill: 1308 kill_fasync(&wq->fasync_list, SIGIO, band); 1309 break; 1310 case SOCK_WAKE_URG: 1311 kill_fasync(&wq->fasync_list, SIGURG, band); 1312 } 1313 1314 return 0; 1315 } 1316 EXPORT_SYMBOL(sock_wake_async); 1317 1318 /** 1319 * __sock_create - creates a socket 1320 * @net: net namespace 1321 * @family: protocol family (AF_INET, ...) 1322 * @type: communication type (SOCK_STREAM, ...) 1323 * @protocol: protocol (0, ...) 1324 * @res: new socket 1325 * @kern: boolean for kernel space sockets 1326 * 1327 * Creates a new socket and assigns it to @res, passing through LSM. 1328 * Returns 0 or an error. On failure @res is set to %NULL. @kern must 1329 * be set to true if the socket resides in kernel space. 1330 * This function internally uses GFP_KERNEL. 1331 */ 1332 1333 int __sock_create(struct net *net, int family, int type, int protocol, 1334 struct socket **res, int kern) 1335 { 1336 int err; 1337 struct socket *sock; 1338 const struct net_proto_family *pf; 1339 1340 /* 1341 * Check protocol is in range 1342 */ 1343 if (family < 0 || family >= NPROTO) 1344 return -EAFNOSUPPORT; 1345 if (type < 0 || type >= SOCK_MAX) 1346 return -EINVAL; 1347 1348 /* Compatibility. 1349 1350 This uglymoron is moved from INET layer to here to avoid 1351 deadlock in module load. 1352 */ 1353 if (family == PF_INET && type == SOCK_PACKET) { 1354 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n", 1355 current->comm); 1356 family = PF_PACKET; 1357 } 1358 1359 err = security_socket_create(family, type, protocol, kern); 1360 if (err) 1361 return err; 1362 1363 /* 1364 * Allocate the socket and allow the family to set things up. if 1365 * the protocol is 0, the family is instructed to select an appropriate 1366 * default. 1367 */ 1368 sock = sock_alloc(); 1369 if (!sock) { 1370 net_warn_ratelimited("socket: no more sockets\n"); 1371 return -ENFILE; /* Not exactly a match, but its the 1372 closest posix thing */ 1373 } 1374 1375 sock->type = type; 1376 1377 #ifdef CONFIG_MODULES 1378 /* Attempt to load a protocol module if the find failed. 1379 * 1380 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user 1381 * requested real, full-featured networking support upon configuration. 1382 * Otherwise module support will break! 1383 */ 1384 if (rcu_access_pointer(net_families[family]) == NULL) 1385 request_module("net-pf-%d", family); 1386 #endif 1387 1388 rcu_read_lock(); 1389 pf = rcu_dereference(net_families[family]); 1390 err = -EAFNOSUPPORT; 1391 if (!pf) 1392 goto out_release; 1393 1394 /* 1395 * We will call the ->create function, that possibly is in a loadable 1396 * module, so we have to bump that loadable module refcnt first. 1397 */ 1398 if (!try_module_get(pf->owner)) 1399 goto out_release; 1400 1401 /* Now protected by module ref count */ 1402 rcu_read_unlock(); 1403 1404 err = pf->create(net, sock, protocol, kern); 1405 if (err < 0) 1406 goto out_module_put; 1407 1408 /* 1409 * Now to bump the refcnt of the [loadable] module that owns this 1410 * socket at sock_release time we decrement its refcnt. 1411 */ 1412 if (!try_module_get(sock->ops->owner)) 1413 goto out_module_busy; 1414 1415 /* 1416 * Now that we're done with the ->create function, the [loadable] 1417 * module can have its refcnt decremented 1418 */ 1419 module_put(pf->owner); 1420 err = security_socket_post_create(sock, family, type, protocol, kern); 1421 if (err) 1422 goto out_sock_release; 1423 *res = sock; 1424 1425 return 0; 1426 1427 out_module_busy: 1428 err = -EAFNOSUPPORT; 1429 out_module_put: 1430 sock->ops = NULL; 1431 module_put(pf->owner); 1432 out_sock_release: 1433 sock_release(sock); 1434 return err; 1435 1436 out_release: 1437 rcu_read_unlock(); 1438 goto out_sock_release; 1439 } 1440 EXPORT_SYMBOL(__sock_create); 1441 1442 /** 1443 * sock_create - creates a socket 1444 * @family: protocol family (AF_INET, ...) 1445 * @type: communication type (SOCK_STREAM, ...) 1446 * @protocol: protocol (0, ...) 1447 * @res: new socket 1448 * 1449 * A wrapper around __sock_create(). 1450 * Returns 0 or an error. This function internally uses GFP_KERNEL. 1451 */ 1452 1453 int sock_create(int family, int type, int protocol, struct socket **res) 1454 { 1455 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0); 1456 } 1457 EXPORT_SYMBOL(sock_create); 1458 1459 /** 1460 * sock_create_kern - creates a socket (kernel space) 1461 * @net: net namespace 1462 * @family: protocol family (AF_INET, ...) 1463 * @type: communication type (SOCK_STREAM, ...) 1464 * @protocol: protocol (0, ...) 1465 * @res: new socket 1466 * 1467 * A wrapper around __sock_create(). 1468 * Returns 0 or an error. This function internally uses GFP_KERNEL. 1469 */ 1470 1471 int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res) 1472 { 1473 return __sock_create(net, family, type, protocol, res, 1); 1474 } 1475 EXPORT_SYMBOL(sock_create_kern); 1476 1477 int __sys_socket(int family, int type, int protocol) 1478 { 1479 int retval; 1480 struct socket *sock; 1481 int flags; 1482 1483 /* Check the SOCK_* constants for consistency. */ 1484 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC); 1485 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK); 1486 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK); 1487 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK); 1488 1489 flags = type & ~SOCK_TYPE_MASK; 1490 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1491 return -EINVAL; 1492 type &= SOCK_TYPE_MASK; 1493 1494 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 1495 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1496 1497 retval = sock_create(family, type, protocol, &sock); 1498 if (retval < 0) 1499 return retval; 1500 1501 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); 1502 } 1503 1504 SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol) 1505 { 1506 return __sys_socket(family, type, protocol); 1507 } 1508 1509 /* 1510 * Create a pair of connected sockets. 1511 */ 1512 1513 int __sys_socketpair(int family, int type, int protocol, int __user *usockvec) 1514 { 1515 struct socket *sock1, *sock2; 1516 int fd1, fd2, err; 1517 struct file *newfile1, *newfile2; 1518 int flags; 1519 1520 flags = type & ~SOCK_TYPE_MASK; 1521 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1522 return -EINVAL; 1523 type &= SOCK_TYPE_MASK; 1524 1525 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 1526 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1527 1528 /* 1529 * reserve descriptors and make sure we won't fail 1530 * to return them to userland. 1531 */ 1532 fd1 = get_unused_fd_flags(flags); 1533 if (unlikely(fd1 < 0)) 1534 return fd1; 1535 1536 fd2 = get_unused_fd_flags(flags); 1537 if (unlikely(fd2 < 0)) { 1538 put_unused_fd(fd1); 1539 return fd2; 1540 } 1541 1542 err = put_user(fd1, &usockvec[0]); 1543 if (err) 1544 goto out; 1545 1546 err = put_user(fd2, &usockvec[1]); 1547 if (err) 1548 goto out; 1549 1550 /* 1551 * Obtain the first socket and check if the underlying protocol 1552 * supports the socketpair call. 1553 */ 1554 1555 err = sock_create(family, type, protocol, &sock1); 1556 if (unlikely(err < 0)) 1557 goto out; 1558 1559 err = sock_create(family, type, protocol, &sock2); 1560 if (unlikely(err < 0)) { 1561 sock_release(sock1); 1562 goto out; 1563 } 1564 1565 err = security_socket_socketpair(sock1, sock2); 1566 if (unlikely(err)) { 1567 sock_release(sock2); 1568 sock_release(sock1); 1569 goto out; 1570 } 1571 1572 err = sock1->ops->socketpair(sock1, sock2); 1573 if (unlikely(err < 0)) { 1574 sock_release(sock2); 1575 sock_release(sock1); 1576 goto out; 1577 } 1578 1579 newfile1 = sock_alloc_file(sock1, flags, NULL); 1580 if (IS_ERR(newfile1)) { 1581 err = PTR_ERR(newfile1); 1582 sock_release(sock2); 1583 goto out; 1584 } 1585 1586 newfile2 = sock_alloc_file(sock2, flags, NULL); 1587 if (IS_ERR(newfile2)) { 1588 err = PTR_ERR(newfile2); 1589 fput(newfile1); 1590 goto out; 1591 } 1592 1593 audit_fd_pair(fd1, fd2); 1594 1595 fd_install(fd1, newfile1); 1596 fd_install(fd2, newfile2); 1597 return 0; 1598 1599 out: 1600 put_unused_fd(fd2); 1601 put_unused_fd(fd1); 1602 return err; 1603 } 1604 1605 SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, 1606 int __user *, usockvec) 1607 { 1608 return __sys_socketpair(family, type, protocol, usockvec); 1609 } 1610 1611 /* 1612 * Bind a name to a socket. Nothing much to do here since it's 1613 * the protocol's responsibility to handle the local address. 1614 * 1615 * We move the socket address to kernel space before we call 1616 * the protocol layer (having also checked the address is ok). 1617 */ 1618 1619 int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen) 1620 { 1621 struct socket *sock; 1622 struct sockaddr_storage address; 1623 int err, fput_needed; 1624 1625 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1626 if (sock) { 1627 err = move_addr_to_kernel(umyaddr, addrlen, &address); 1628 if (!err) { 1629 err = security_socket_bind(sock, 1630 (struct sockaddr *)&address, 1631 addrlen); 1632 if (!err) 1633 err = sock->ops->bind(sock, 1634 (struct sockaddr *) 1635 &address, addrlen); 1636 } 1637 fput_light(sock->file, fput_needed); 1638 } 1639 return err; 1640 } 1641 1642 SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen) 1643 { 1644 return __sys_bind(fd, umyaddr, addrlen); 1645 } 1646 1647 /* 1648 * Perform a listen. Basically, we allow the protocol to do anything 1649 * necessary for a listen, and if that works, we mark the socket as 1650 * ready for listening. 1651 */ 1652 1653 int __sys_listen(int fd, int backlog) 1654 { 1655 struct socket *sock; 1656 int err, fput_needed; 1657 int somaxconn; 1658 1659 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1660 if (sock) { 1661 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn; 1662 if ((unsigned int)backlog > somaxconn) 1663 backlog = somaxconn; 1664 1665 err = security_socket_listen(sock, backlog); 1666 if (!err) 1667 err = sock->ops->listen(sock, backlog); 1668 1669 fput_light(sock->file, fput_needed); 1670 } 1671 return err; 1672 } 1673 1674 SYSCALL_DEFINE2(listen, int, fd, int, backlog) 1675 { 1676 return __sys_listen(fd, backlog); 1677 } 1678 1679 int __sys_accept4_file(struct file *file, unsigned file_flags, 1680 struct sockaddr __user *upeer_sockaddr, 1681 int __user *upeer_addrlen, int flags, 1682 unsigned long nofile) 1683 { 1684 struct socket *sock, *newsock; 1685 struct file *newfile; 1686 int err, len, newfd; 1687 struct sockaddr_storage address; 1688 1689 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1690 return -EINVAL; 1691 1692 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 1693 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1694 1695 sock = sock_from_file(file, &err); 1696 if (!sock) 1697 goto out; 1698 1699 err = -ENFILE; 1700 newsock = sock_alloc(); 1701 if (!newsock) 1702 goto out; 1703 1704 newsock->type = sock->type; 1705 newsock->ops = sock->ops; 1706 1707 /* 1708 * We don't need try_module_get here, as the listening socket (sock) 1709 * has the protocol module (sock->ops->owner) held. 1710 */ 1711 __module_get(newsock->ops->owner); 1712 1713 newfd = __get_unused_fd_flags(flags, nofile); 1714 if (unlikely(newfd < 0)) { 1715 err = newfd; 1716 sock_release(newsock); 1717 goto out; 1718 } 1719 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name); 1720 if (IS_ERR(newfile)) { 1721 err = PTR_ERR(newfile); 1722 put_unused_fd(newfd); 1723 goto out; 1724 } 1725 1726 err = security_socket_accept(sock, newsock); 1727 if (err) 1728 goto out_fd; 1729 1730 err = sock->ops->accept(sock, newsock, sock->file->f_flags | file_flags, 1731 false); 1732 if (err < 0) 1733 goto out_fd; 1734 1735 if (upeer_sockaddr) { 1736 len = newsock->ops->getname(newsock, 1737 (struct sockaddr *)&address, 2); 1738 if (len < 0) { 1739 err = -ECONNABORTED; 1740 goto out_fd; 1741 } 1742 err = move_addr_to_user(&address, 1743 len, upeer_sockaddr, upeer_addrlen); 1744 if (err < 0) 1745 goto out_fd; 1746 } 1747 1748 /* File flags are not inherited via accept() unlike another OSes. */ 1749 1750 fd_install(newfd, newfile); 1751 err = newfd; 1752 out: 1753 return err; 1754 out_fd: 1755 fput(newfile); 1756 put_unused_fd(newfd); 1757 goto out; 1758 1759 } 1760 1761 /* 1762 * For accept, we attempt to create a new socket, set up the link 1763 * with the client, wake up the client, then return the new 1764 * connected fd. We collect the address of the connector in kernel 1765 * space and move it to user at the very end. This is unclean because 1766 * we open the socket then return an error. 1767 * 1768 * 1003.1g adds the ability to recvmsg() to query connection pending 1769 * status to recvmsg. We need to add that support in a way thats 1770 * clean when we restructure accept also. 1771 */ 1772 1773 int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, 1774 int __user *upeer_addrlen, int flags) 1775 { 1776 int ret = -EBADF; 1777 struct fd f; 1778 1779 f = fdget(fd); 1780 if (f.file) { 1781 ret = __sys_accept4_file(f.file, 0, upeer_sockaddr, 1782 upeer_addrlen, flags, 1783 rlimit(RLIMIT_NOFILE)); 1784 fdput(f); 1785 } 1786 1787 return ret; 1788 } 1789 1790 SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, 1791 int __user *, upeer_addrlen, int, flags) 1792 { 1793 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags); 1794 } 1795 1796 SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr, 1797 int __user *, upeer_addrlen) 1798 { 1799 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0); 1800 } 1801 1802 /* 1803 * Attempt to connect to a socket with the server address. The address 1804 * is in user space so we verify it is OK and move it to kernel space. 1805 * 1806 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to 1807 * break bindings 1808 * 1809 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and 1810 * other SEQPACKET protocols that take time to connect() as it doesn't 1811 * include the -EINPROGRESS status for such sockets. 1812 */ 1813 1814 int __sys_connect_file(struct file *file, struct sockaddr_storage *address, 1815 int addrlen, int file_flags) 1816 { 1817 struct socket *sock; 1818 int err; 1819 1820 sock = sock_from_file(file, &err); 1821 if (!sock) 1822 goto out; 1823 1824 err = 1825 security_socket_connect(sock, (struct sockaddr *)address, addrlen); 1826 if (err) 1827 goto out; 1828 1829 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen, 1830 sock->file->f_flags | file_flags); 1831 out: 1832 return err; 1833 } 1834 1835 int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen) 1836 { 1837 int ret = -EBADF; 1838 struct fd f; 1839 1840 f = fdget(fd); 1841 if (f.file) { 1842 struct sockaddr_storage address; 1843 1844 ret = move_addr_to_kernel(uservaddr, addrlen, &address); 1845 if (!ret) 1846 ret = __sys_connect_file(f.file, &address, addrlen, 0); 1847 fdput(f); 1848 } 1849 1850 return ret; 1851 } 1852 1853 SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr, 1854 int, addrlen) 1855 { 1856 return __sys_connect(fd, uservaddr, addrlen); 1857 } 1858 1859 /* 1860 * Get the local address ('name') of a socket object. Move the obtained 1861 * name to user space. 1862 */ 1863 1864 int __sys_getsockname(int fd, struct sockaddr __user *usockaddr, 1865 int __user *usockaddr_len) 1866 { 1867 struct socket *sock; 1868 struct sockaddr_storage address; 1869 int err, fput_needed; 1870 1871 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1872 if (!sock) 1873 goto out; 1874 1875 err = security_socket_getsockname(sock); 1876 if (err) 1877 goto out_put; 1878 1879 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0); 1880 if (err < 0) 1881 goto out_put; 1882 /* "err" is actually length in this case */ 1883 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len); 1884 1885 out_put: 1886 fput_light(sock->file, fput_needed); 1887 out: 1888 return err; 1889 } 1890 1891 SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr, 1892 int __user *, usockaddr_len) 1893 { 1894 return __sys_getsockname(fd, usockaddr, usockaddr_len); 1895 } 1896 1897 /* 1898 * Get the remote address ('name') of a socket object. Move the obtained 1899 * name to user space. 1900 */ 1901 1902 int __sys_getpeername(int fd, struct sockaddr __user *usockaddr, 1903 int __user *usockaddr_len) 1904 { 1905 struct socket *sock; 1906 struct sockaddr_storage address; 1907 int err, fput_needed; 1908 1909 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1910 if (sock != NULL) { 1911 err = security_socket_getpeername(sock); 1912 if (err) { 1913 fput_light(sock->file, fput_needed); 1914 return err; 1915 } 1916 1917 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1); 1918 if (err >= 0) 1919 /* "err" is actually length in this case */ 1920 err = move_addr_to_user(&address, err, usockaddr, 1921 usockaddr_len); 1922 fput_light(sock->file, fput_needed); 1923 } 1924 return err; 1925 } 1926 1927 SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr, 1928 int __user *, usockaddr_len) 1929 { 1930 return __sys_getpeername(fd, usockaddr, usockaddr_len); 1931 } 1932 1933 /* 1934 * Send a datagram to a given address. We move the address into kernel 1935 * space and check the user space data area is readable before invoking 1936 * the protocol. 1937 */ 1938 int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags, 1939 struct sockaddr __user *addr, int addr_len) 1940 { 1941 struct socket *sock; 1942 struct sockaddr_storage address; 1943 int err; 1944 struct msghdr msg; 1945 struct iovec iov; 1946 int fput_needed; 1947 1948 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter); 1949 if (unlikely(err)) 1950 return err; 1951 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1952 if (!sock) 1953 goto out; 1954 1955 msg.msg_name = NULL; 1956 msg.msg_control = NULL; 1957 msg.msg_controllen = 0; 1958 msg.msg_namelen = 0; 1959 if (addr) { 1960 err = move_addr_to_kernel(addr, addr_len, &address); 1961 if (err < 0) 1962 goto out_put; 1963 msg.msg_name = (struct sockaddr *)&address; 1964 msg.msg_namelen = addr_len; 1965 } 1966 if (sock->file->f_flags & O_NONBLOCK) 1967 flags |= MSG_DONTWAIT; 1968 msg.msg_flags = flags; 1969 err = sock_sendmsg(sock, &msg); 1970 1971 out_put: 1972 fput_light(sock->file, fput_needed); 1973 out: 1974 return err; 1975 } 1976 1977 SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len, 1978 unsigned int, flags, struct sockaddr __user *, addr, 1979 int, addr_len) 1980 { 1981 return __sys_sendto(fd, buff, len, flags, addr, addr_len); 1982 } 1983 1984 /* 1985 * Send a datagram down a socket. 1986 */ 1987 1988 SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len, 1989 unsigned int, flags) 1990 { 1991 return __sys_sendto(fd, buff, len, flags, NULL, 0); 1992 } 1993 1994 /* 1995 * Receive a frame from the socket and optionally record the address of the 1996 * sender. We verify the buffers are writable and if needed move the 1997 * sender address from kernel to user space. 1998 */ 1999 int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags, 2000 struct sockaddr __user *addr, int __user *addr_len) 2001 { 2002 struct socket *sock; 2003 struct iovec iov; 2004 struct msghdr msg; 2005 struct sockaddr_storage address; 2006 int err, err2; 2007 int fput_needed; 2008 2009 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter); 2010 if (unlikely(err)) 2011 return err; 2012 sock = sockfd_lookup_light(fd, &err, &fput_needed); 2013 if (!sock) 2014 goto out; 2015 2016 msg.msg_control = NULL; 2017 msg.msg_controllen = 0; 2018 /* Save some cycles and don't copy the address if not needed */ 2019 msg.msg_name = addr ? (struct sockaddr *)&address : NULL; 2020 /* We assume all kernel code knows the size of sockaddr_storage */ 2021 msg.msg_namelen = 0; 2022 msg.msg_iocb = NULL; 2023 msg.msg_flags = 0; 2024 if (sock->file->f_flags & O_NONBLOCK) 2025 flags |= MSG_DONTWAIT; 2026 err = sock_recvmsg(sock, &msg, flags); 2027 2028 if (err >= 0 && addr != NULL) { 2029 err2 = move_addr_to_user(&address, 2030 msg.msg_namelen, addr, addr_len); 2031 if (err2 < 0) 2032 err = err2; 2033 } 2034 2035 fput_light(sock->file, fput_needed); 2036 out: 2037 return err; 2038 } 2039 2040 SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, 2041 unsigned int, flags, struct sockaddr __user *, addr, 2042 int __user *, addr_len) 2043 { 2044 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len); 2045 } 2046 2047 /* 2048 * Receive a datagram from a socket. 2049 */ 2050 2051 SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size, 2052 unsigned int, flags) 2053 { 2054 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL); 2055 } 2056 2057 static bool sock_use_custom_sol_socket(const struct socket *sock) 2058 { 2059 const struct sock *sk = sock->sk; 2060 2061 /* Use sock->ops->setsockopt() for MPTCP */ 2062 return IS_ENABLED(CONFIG_MPTCP) && 2063 sk->sk_protocol == IPPROTO_MPTCP && 2064 sk->sk_type == SOCK_STREAM && 2065 (sk->sk_family == AF_INET || sk->sk_family == AF_INET6); 2066 } 2067 2068 /* 2069 * Set a socket option. Because we don't know the option lengths we have 2070 * to pass the user mode parameter for the protocols to sort out. 2071 */ 2072 int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval, 2073 int optlen) 2074 { 2075 sockptr_t optval = USER_SOCKPTR(user_optval); 2076 char *kernel_optval = NULL; 2077 int err, fput_needed; 2078 struct socket *sock; 2079 2080 if (optlen < 0) 2081 return -EINVAL; 2082 2083 sock = sockfd_lookup_light(fd, &err, &fput_needed); 2084 if (!sock) 2085 return err; 2086 2087 err = security_socket_setsockopt(sock, level, optname); 2088 if (err) 2089 goto out_put; 2090 2091 if (!in_compat_syscall()) 2092 err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname, 2093 user_optval, &optlen, 2094 &kernel_optval); 2095 if (err < 0) 2096 goto out_put; 2097 if (err > 0) { 2098 err = 0; 2099 goto out_put; 2100 } 2101 2102 if (kernel_optval) 2103 optval = KERNEL_SOCKPTR(kernel_optval); 2104 if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock)) 2105 err = sock_setsockopt(sock, level, optname, optval, optlen); 2106 else if (unlikely(!sock->ops->setsockopt)) 2107 err = -EOPNOTSUPP; 2108 else 2109 err = sock->ops->setsockopt(sock, level, optname, optval, 2110 optlen); 2111 kfree(kernel_optval); 2112 out_put: 2113 fput_light(sock->file, fput_needed); 2114 return err; 2115 } 2116 2117 SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, 2118 char __user *, optval, int, optlen) 2119 { 2120 return __sys_setsockopt(fd, level, optname, optval, optlen); 2121 } 2122 2123 /* 2124 * Get a socket option. Because we don't know the option lengths we have 2125 * to pass a user mode parameter for the protocols to sort out. 2126 */ 2127 int __sys_getsockopt(int fd, int level, int optname, char __user *optval, 2128 int __user *optlen) 2129 { 2130 int err, fput_needed; 2131 struct socket *sock; 2132 int max_optlen; 2133 2134 sock = sockfd_lookup_light(fd, &err, &fput_needed); 2135 if (!sock) 2136 return err; 2137 2138 err = security_socket_getsockopt(sock, level, optname); 2139 if (err) 2140 goto out_put; 2141 2142 if (!in_compat_syscall()) 2143 max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen); 2144 2145 if (level == SOL_SOCKET) 2146 err = sock_getsockopt(sock, level, optname, optval, optlen); 2147 else if (unlikely(!sock->ops->getsockopt)) 2148 err = -EOPNOTSUPP; 2149 else 2150 err = sock->ops->getsockopt(sock, level, optname, optval, 2151 optlen); 2152 2153 if (!in_compat_syscall()) 2154 err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname, 2155 optval, optlen, max_optlen, 2156 err); 2157 out_put: 2158 fput_light(sock->file, fput_needed); 2159 return err; 2160 } 2161 2162 SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname, 2163 char __user *, optval, int __user *, optlen) 2164 { 2165 return __sys_getsockopt(fd, level, optname, optval, optlen); 2166 } 2167 2168 /* 2169 * Shutdown a socket. 2170 */ 2171 2172 int __sys_shutdown(int fd, int how) 2173 { 2174 int err, fput_needed; 2175 struct socket *sock; 2176 2177 sock = sockfd_lookup_light(fd, &err, &fput_needed); 2178 if (sock != NULL) { 2179 err = security_socket_shutdown(sock, how); 2180 if (!err) 2181 err = sock->ops->shutdown(sock, how); 2182 fput_light(sock->file, fput_needed); 2183 } 2184 return err; 2185 } 2186 2187 SYSCALL_DEFINE2(shutdown, int, fd, int, how) 2188 { 2189 return __sys_shutdown(fd, how); 2190 } 2191 2192 /* A couple of helpful macros for getting the address of the 32/64 bit 2193 * fields which are the same type (int / unsigned) on our platforms. 2194 */ 2195 #define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member) 2196 #define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) 2197 #define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) 2198 2199 struct used_address { 2200 struct sockaddr_storage name; 2201 unsigned int name_len; 2202 }; 2203 2204 int __copy_msghdr_from_user(struct msghdr *kmsg, 2205 struct user_msghdr __user *umsg, 2206 struct sockaddr __user **save_addr, 2207 struct iovec __user **uiov, size_t *nsegs) 2208 { 2209 struct user_msghdr msg; 2210 ssize_t err; 2211 2212 if (copy_from_user(&msg, umsg, sizeof(*umsg))) 2213 return -EFAULT; 2214 2215 kmsg->msg_control_is_user = true; 2216 kmsg->msg_control_user = msg.msg_control; 2217 kmsg->msg_controllen = msg.msg_controllen; 2218 kmsg->msg_flags = msg.msg_flags; 2219 2220 kmsg->msg_namelen = msg.msg_namelen; 2221 if (!msg.msg_name) 2222 kmsg->msg_namelen = 0; 2223 2224 if (kmsg->msg_namelen < 0) 2225 return -EINVAL; 2226 2227 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) 2228 kmsg->msg_namelen = sizeof(struct sockaddr_storage); 2229 2230 if (save_addr) 2231 *save_addr = msg.msg_name; 2232 2233 if (msg.msg_name && kmsg->msg_namelen) { 2234 if (!save_addr) { 2235 err = move_addr_to_kernel(msg.msg_name, 2236 kmsg->msg_namelen, 2237 kmsg->msg_name); 2238 if (err < 0) 2239 return err; 2240 } 2241 } else { 2242 kmsg->msg_name = NULL; 2243 kmsg->msg_namelen = 0; 2244 } 2245 2246 if (msg.msg_iovlen > UIO_MAXIOV) 2247 return -EMSGSIZE; 2248 2249 kmsg->msg_iocb = NULL; 2250 *uiov = msg.msg_iov; 2251 *nsegs = msg.msg_iovlen; 2252 return 0; 2253 } 2254 2255 static int copy_msghdr_from_user(struct msghdr *kmsg, 2256 struct user_msghdr __user *umsg, 2257 struct sockaddr __user **save_addr, 2258 struct iovec **iov) 2259 { 2260 struct user_msghdr msg; 2261 ssize_t err; 2262 2263 err = __copy_msghdr_from_user(kmsg, umsg, save_addr, &msg.msg_iov, 2264 &msg.msg_iovlen); 2265 if (err) 2266 return err; 2267 2268 err = import_iovec(save_addr ? READ : WRITE, 2269 msg.msg_iov, msg.msg_iovlen, 2270 UIO_FASTIOV, iov, &kmsg->msg_iter); 2271 return err < 0 ? err : 0; 2272 } 2273 2274 static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys, 2275 unsigned int flags, struct used_address *used_address, 2276 unsigned int allowed_msghdr_flags) 2277 { 2278 unsigned char ctl[sizeof(struct cmsghdr) + 20] 2279 __aligned(sizeof(__kernel_size_t)); 2280 /* 20 is size of ipv6_pktinfo */ 2281 unsigned char *ctl_buf = ctl; 2282 int ctl_len; 2283 ssize_t err; 2284 2285 err = -ENOBUFS; 2286 2287 if (msg_sys->msg_controllen > INT_MAX) 2288 goto out; 2289 flags |= (msg_sys->msg_flags & allowed_msghdr_flags); 2290 ctl_len = msg_sys->msg_controllen; 2291 if ((MSG_CMSG_COMPAT & flags) && ctl_len) { 2292 err = 2293 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl, 2294 sizeof(ctl)); 2295 if (err) 2296 goto out; 2297 ctl_buf = msg_sys->msg_control; 2298 ctl_len = msg_sys->msg_controllen; 2299 } else if (ctl_len) { 2300 BUILD_BUG_ON(sizeof(struct cmsghdr) != 2301 CMSG_ALIGN(sizeof(struct cmsghdr))); 2302 if (ctl_len > sizeof(ctl)) { 2303 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); 2304 if (ctl_buf == NULL) 2305 goto out; 2306 } 2307 err = -EFAULT; 2308 if (copy_from_user(ctl_buf, msg_sys->msg_control_user, ctl_len)) 2309 goto out_freectl; 2310 msg_sys->msg_control = ctl_buf; 2311 msg_sys->msg_control_is_user = false; 2312 } 2313 msg_sys->msg_flags = flags; 2314 2315 if (sock->file->f_flags & O_NONBLOCK) 2316 msg_sys->msg_flags |= MSG_DONTWAIT; 2317 /* 2318 * If this is sendmmsg() and current destination address is same as 2319 * previously succeeded address, omit asking LSM's decision. 2320 * used_address->name_len is initialized to UINT_MAX so that the first 2321 * destination address never matches. 2322 */ 2323 if (used_address && msg_sys->msg_name && 2324 used_address->name_len == msg_sys->msg_namelen && 2325 !memcmp(&used_address->name, msg_sys->msg_name, 2326 used_address->name_len)) { 2327 err = sock_sendmsg_nosec(sock, msg_sys); 2328 goto out_freectl; 2329 } 2330 err = sock_sendmsg(sock, msg_sys); 2331 /* 2332 * If this is sendmmsg() and sending to current destination address was 2333 * successful, remember it. 2334 */ 2335 if (used_address && err >= 0) { 2336 used_address->name_len = msg_sys->msg_namelen; 2337 if (msg_sys->msg_name) 2338 memcpy(&used_address->name, msg_sys->msg_name, 2339 used_address->name_len); 2340 } 2341 2342 out_freectl: 2343 if (ctl_buf != ctl) 2344 sock_kfree_s(sock->sk, ctl_buf, ctl_len); 2345 out: 2346 return err; 2347 } 2348 2349 int sendmsg_copy_msghdr(struct msghdr *msg, 2350 struct user_msghdr __user *umsg, unsigned flags, 2351 struct iovec **iov) 2352 { 2353 int err; 2354 2355 if (flags & MSG_CMSG_COMPAT) { 2356 struct compat_msghdr __user *msg_compat; 2357 2358 msg_compat = (struct compat_msghdr __user *) umsg; 2359 err = get_compat_msghdr(msg, msg_compat, NULL, iov); 2360 } else { 2361 err = copy_msghdr_from_user(msg, umsg, NULL, iov); 2362 } 2363 if (err < 0) 2364 return err; 2365 2366 return 0; 2367 } 2368 2369 static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, 2370 struct msghdr *msg_sys, unsigned int flags, 2371 struct used_address *used_address, 2372 unsigned int allowed_msghdr_flags) 2373 { 2374 struct sockaddr_storage address; 2375 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; 2376 ssize_t err; 2377 2378 msg_sys->msg_name = &address; 2379 2380 err = sendmsg_copy_msghdr(msg_sys, msg, flags, &iov); 2381 if (err < 0) 2382 return err; 2383 2384 err = ____sys_sendmsg(sock, msg_sys, flags, used_address, 2385 allowed_msghdr_flags); 2386 kfree(iov); 2387 return err; 2388 } 2389 2390 /* 2391 * BSD sendmsg interface 2392 */ 2393 long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg, 2394 unsigned int flags) 2395 { 2396 /* disallow ancillary data requests from this path */ 2397 if (msg->msg_control || msg->msg_controllen) 2398 return -EINVAL; 2399 2400 return ____sys_sendmsg(sock, msg, flags, NULL, 0); 2401 } 2402 2403 long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags, 2404 bool forbid_cmsg_compat) 2405 { 2406 int fput_needed, err; 2407 struct msghdr msg_sys; 2408 struct socket *sock; 2409 2410 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT)) 2411 return -EINVAL; 2412 2413 sock = sockfd_lookup_light(fd, &err, &fput_needed); 2414 if (!sock) 2415 goto out; 2416 2417 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0); 2418 2419 fput_light(sock->file, fput_needed); 2420 out: 2421 return err; 2422 } 2423 2424 SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags) 2425 { 2426 return __sys_sendmsg(fd, msg, flags, true); 2427 } 2428 2429 /* 2430 * Linux sendmmsg interface 2431 */ 2432 2433 int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, 2434 unsigned int flags, bool forbid_cmsg_compat) 2435 { 2436 int fput_needed, err, datagrams; 2437 struct socket *sock; 2438 struct mmsghdr __user *entry; 2439 struct compat_mmsghdr __user *compat_entry; 2440 struct msghdr msg_sys; 2441 struct used_address used_address; 2442 unsigned int oflags = flags; 2443 2444 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT)) 2445 return -EINVAL; 2446 2447 if (vlen > UIO_MAXIOV) 2448 vlen = UIO_MAXIOV; 2449 2450 datagrams = 0; 2451 2452 sock = sockfd_lookup_light(fd, &err, &fput_needed); 2453 if (!sock) 2454 return err; 2455 2456 used_address.name_len = UINT_MAX; 2457 entry = mmsg; 2458 compat_entry = (struct compat_mmsghdr __user *)mmsg; 2459 err = 0; 2460 flags |= MSG_BATCH; 2461 2462 while (datagrams < vlen) { 2463 if (datagrams == vlen - 1) 2464 flags = oflags; 2465 2466 if (MSG_CMSG_COMPAT & flags) { 2467 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry, 2468 &msg_sys, flags, &used_address, MSG_EOR); 2469 if (err < 0) 2470 break; 2471 err = __put_user(err, &compat_entry->msg_len); 2472 ++compat_entry; 2473 } else { 2474 err = ___sys_sendmsg(sock, 2475 (struct user_msghdr __user *)entry, 2476 &msg_sys, flags, &used_address, MSG_EOR); 2477 if (err < 0) 2478 break; 2479 err = put_user(err, &entry->msg_len); 2480 ++entry; 2481 } 2482 2483 if (err) 2484 break; 2485 ++datagrams; 2486 if (msg_data_left(&msg_sys)) 2487 break; 2488 cond_resched(); 2489 } 2490 2491 fput_light(sock->file, fput_needed); 2492 2493 /* We only return an error if no datagrams were able to be sent */ 2494 if (datagrams != 0) 2495 return datagrams; 2496 2497 return err; 2498 } 2499 2500 SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg, 2501 unsigned int, vlen, unsigned int, flags) 2502 { 2503 return __sys_sendmmsg(fd, mmsg, vlen, flags, true); 2504 } 2505 2506 int recvmsg_copy_msghdr(struct msghdr *msg, 2507 struct user_msghdr __user *umsg, unsigned flags, 2508 struct sockaddr __user **uaddr, 2509 struct iovec **iov) 2510 { 2511 ssize_t err; 2512 2513 if (MSG_CMSG_COMPAT & flags) { 2514 struct compat_msghdr __user *msg_compat; 2515 2516 msg_compat = (struct compat_msghdr __user *) umsg; 2517 err = get_compat_msghdr(msg, msg_compat, uaddr, iov); 2518 } else { 2519 err = copy_msghdr_from_user(msg, umsg, uaddr, iov); 2520 } 2521 if (err < 0) 2522 return err; 2523 2524 return 0; 2525 } 2526 2527 static int ____sys_recvmsg(struct socket *sock, struct msghdr *msg_sys, 2528 struct user_msghdr __user *msg, 2529 struct sockaddr __user *uaddr, 2530 unsigned int flags, int nosec) 2531 { 2532 struct compat_msghdr __user *msg_compat = 2533 (struct compat_msghdr __user *) msg; 2534 int __user *uaddr_len = COMPAT_NAMELEN(msg); 2535 struct sockaddr_storage addr; 2536 unsigned long cmsg_ptr; 2537 int len; 2538 ssize_t err; 2539 2540 msg_sys->msg_name = &addr; 2541 cmsg_ptr = (unsigned long)msg_sys->msg_control; 2542 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); 2543 2544 /* We assume all kernel code knows the size of sockaddr_storage */ 2545 msg_sys->msg_namelen = 0; 2546 2547 if (sock->file->f_flags & O_NONBLOCK) 2548 flags |= MSG_DONTWAIT; 2549 2550 if (unlikely(nosec)) 2551 err = sock_recvmsg_nosec(sock, msg_sys, flags); 2552 else 2553 err = sock_recvmsg(sock, msg_sys, flags); 2554 2555 if (err < 0) 2556 goto out; 2557 len = err; 2558 2559 if (uaddr != NULL) { 2560 err = move_addr_to_user(&addr, 2561 msg_sys->msg_namelen, uaddr, 2562 uaddr_len); 2563 if (err < 0) 2564 goto out; 2565 } 2566 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT), 2567 COMPAT_FLAGS(msg)); 2568 if (err) 2569 goto out; 2570 if (MSG_CMSG_COMPAT & flags) 2571 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr, 2572 &msg_compat->msg_controllen); 2573 else 2574 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr, 2575 &msg->msg_controllen); 2576 if (err) 2577 goto out; 2578 err = len; 2579 out: 2580 return err; 2581 } 2582 2583 static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg, 2584 struct msghdr *msg_sys, unsigned int flags, int nosec) 2585 { 2586 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; 2587 /* user mode address pointers */ 2588 struct sockaddr __user *uaddr; 2589 ssize_t err; 2590 2591 err = recvmsg_copy_msghdr(msg_sys, msg, flags, &uaddr, &iov); 2592 if (err < 0) 2593 return err; 2594 2595 err = ____sys_recvmsg(sock, msg_sys, msg, uaddr, flags, nosec); 2596 kfree(iov); 2597 return err; 2598 } 2599 2600 /* 2601 * BSD recvmsg interface 2602 */ 2603 2604 long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg, 2605 struct user_msghdr __user *umsg, 2606 struct sockaddr __user *uaddr, unsigned int flags) 2607 { 2608 if (msg->msg_control || msg->msg_controllen) { 2609 /* disallow ancillary data reqs unless cmsg is plain data */ 2610 if (!(sock->ops->flags & PROTO_CMSG_DATA_ONLY)) 2611 return -EINVAL; 2612 } 2613 2614 return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0); 2615 } 2616 2617 long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags, 2618 bool forbid_cmsg_compat) 2619 { 2620 int fput_needed, err; 2621 struct msghdr msg_sys; 2622 struct socket *sock; 2623 2624 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT)) 2625 return -EINVAL; 2626 2627 sock = sockfd_lookup_light(fd, &err, &fput_needed); 2628 if (!sock) 2629 goto out; 2630 2631 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0); 2632 2633 fput_light(sock->file, fput_needed); 2634 out: 2635 return err; 2636 } 2637 2638 SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg, 2639 unsigned int, flags) 2640 { 2641 return __sys_recvmsg(fd, msg, flags, true); 2642 } 2643 2644 /* 2645 * Linux recvmmsg interface 2646 */ 2647 2648 static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg, 2649 unsigned int vlen, unsigned int flags, 2650 struct timespec64 *timeout) 2651 { 2652 int fput_needed, err, datagrams; 2653 struct socket *sock; 2654 struct mmsghdr __user *entry; 2655 struct compat_mmsghdr __user *compat_entry; 2656 struct msghdr msg_sys; 2657 struct timespec64 end_time; 2658 struct timespec64 timeout64; 2659 2660 if (timeout && 2661 poll_select_set_timeout(&end_time, timeout->tv_sec, 2662 timeout->tv_nsec)) 2663 return -EINVAL; 2664 2665 datagrams = 0; 2666 2667 sock = sockfd_lookup_light(fd, &err, &fput_needed); 2668 if (!sock) 2669 return err; 2670 2671 if (likely(!(flags & MSG_ERRQUEUE))) { 2672 err = sock_error(sock->sk); 2673 if (err) { 2674 datagrams = err; 2675 goto out_put; 2676 } 2677 } 2678 2679 entry = mmsg; 2680 compat_entry = (struct compat_mmsghdr __user *)mmsg; 2681 2682 while (datagrams < vlen) { 2683 /* 2684 * No need to ask LSM for more than the first datagram. 2685 */ 2686 if (MSG_CMSG_COMPAT & flags) { 2687 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry, 2688 &msg_sys, flags & ~MSG_WAITFORONE, 2689 datagrams); 2690 if (err < 0) 2691 break; 2692 err = __put_user(err, &compat_entry->msg_len); 2693 ++compat_entry; 2694 } else { 2695 err = ___sys_recvmsg(sock, 2696 (struct user_msghdr __user *)entry, 2697 &msg_sys, flags & ~MSG_WAITFORONE, 2698 datagrams); 2699 if (err < 0) 2700 break; 2701 err = put_user(err, &entry->msg_len); 2702 ++entry; 2703 } 2704 2705 if (err) 2706 break; 2707 ++datagrams; 2708 2709 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */ 2710 if (flags & MSG_WAITFORONE) 2711 flags |= MSG_DONTWAIT; 2712 2713 if (timeout) { 2714 ktime_get_ts64(&timeout64); 2715 *timeout = timespec64_sub(end_time, timeout64); 2716 if (timeout->tv_sec < 0) { 2717 timeout->tv_sec = timeout->tv_nsec = 0; 2718 break; 2719 } 2720 2721 /* Timeout, return less than vlen datagrams */ 2722 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0) 2723 break; 2724 } 2725 2726 /* Out of band data, return right away */ 2727 if (msg_sys.msg_flags & MSG_OOB) 2728 break; 2729 cond_resched(); 2730 } 2731 2732 if (err == 0) 2733 goto out_put; 2734 2735 if (datagrams == 0) { 2736 datagrams = err; 2737 goto out_put; 2738 } 2739 2740 /* 2741 * We may return less entries than requested (vlen) if the 2742 * sock is non block and there aren't enough datagrams... 2743 */ 2744 if (err != -EAGAIN) { 2745 /* 2746 * ... or if recvmsg returns an error after we 2747 * received some datagrams, where we record the 2748 * error to return on the next call or if the 2749 * app asks about it using getsockopt(SO_ERROR). 2750 */ 2751 sock->sk->sk_err = -err; 2752 } 2753 out_put: 2754 fput_light(sock->file, fput_needed); 2755 2756 return datagrams; 2757 } 2758 2759 int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, 2760 unsigned int vlen, unsigned int flags, 2761 struct __kernel_timespec __user *timeout, 2762 struct old_timespec32 __user *timeout32) 2763 { 2764 int datagrams; 2765 struct timespec64 timeout_sys; 2766 2767 if (timeout && get_timespec64(&timeout_sys, timeout)) 2768 return -EFAULT; 2769 2770 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32)) 2771 return -EFAULT; 2772 2773 if (!timeout && !timeout32) 2774 return do_recvmmsg(fd, mmsg, vlen, flags, NULL); 2775 2776 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys); 2777 2778 if (datagrams <= 0) 2779 return datagrams; 2780 2781 if (timeout && put_timespec64(&timeout_sys, timeout)) 2782 datagrams = -EFAULT; 2783 2784 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32)) 2785 datagrams = -EFAULT; 2786 2787 return datagrams; 2788 } 2789 2790 SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg, 2791 unsigned int, vlen, unsigned int, flags, 2792 struct __kernel_timespec __user *, timeout) 2793 { 2794 if (flags & MSG_CMSG_COMPAT) 2795 return -EINVAL; 2796 2797 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL); 2798 } 2799 2800 #ifdef CONFIG_COMPAT_32BIT_TIME 2801 SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg, 2802 unsigned int, vlen, unsigned int, flags, 2803 struct old_timespec32 __user *, timeout) 2804 { 2805 if (flags & MSG_CMSG_COMPAT) 2806 return -EINVAL; 2807 2808 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout); 2809 } 2810 #endif 2811 2812 #ifdef __ARCH_WANT_SYS_SOCKETCALL 2813 /* Argument list sizes for sys_socketcall */ 2814 #define AL(x) ((x) * sizeof(unsigned long)) 2815 static const unsigned char nargs[21] = { 2816 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3), 2817 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6), 2818 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3), 2819 AL(4), AL(5), AL(4) 2820 }; 2821 2822 #undef AL 2823 2824 /* 2825 * System call vectors. 2826 * 2827 * Argument checking cleaned up. Saved 20% in size. 2828 * This function doesn't need to set the kernel lock because 2829 * it is set by the callees. 2830 */ 2831 2832 SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) 2833 { 2834 unsigned long a[AUDITSC_ARGS]; 2835 unsigned long a0, a1; 2836 int err; 2837 unsigned int len; 2838 2839 if (call < 1 || call > SYS_SENDMMSG) 2840 return -EINVAL; 2841 call = array_index_nospec(call, SYS_SENDMMSG + 1); 2842 2843 len = nargs[call]; 2844 if (len > sizeof(a)) 2845 return -EINVAL; 2846 2847 /* copy_from_user should be SMP safe. */ 2848 if (copy_from_user(a, args, len)) 2849 return -EFAULT; 2850 2851 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a); 2852 if (err) 2853 return err; 2854 2855 a0 = a[0]; 2856 a1 = a[1]; 2857 2858 switch (call) { 2859 case SYS_SOCKET: 2860 err = __sys_socket(a0, a1, a[2]); 2861 break; 2862 case SYS_BIND: 2863 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]); 2864 break; 2865 case SYS_CONNECT: 2866 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]); 2867 break; 2868 case SYS_LISTEN: 2869 err = __sys_listen(a0, a1); 2870 break; 2871 case SYS_ACCEPT: 2872 err = __sys_accept4(a0, (struct sockaddr __user *)a1, 2873 (int __user *)a[2], 0); 2874 break; 2875 case SYS_GETSOCKNAME: 2876 err = 2877 __sys_getsockname(a0, (struct sockaddr __user *)a1, 2878 (int __user *)a[2]); 2879 break; 2880 case SYS_GETPEERNAME: 2881 err = 2882 __sys_getpeername(a0, (struct sockaddr __user *)a1, 2883 (int __user *)a[2]); 2884 break; 2885 case SYS_SOCKETPAIR: 2886 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]); 2887 break; 2888 case SYS_SEND: 2889 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3], 2890 NULL, 0); 2891 break; 2892 case SYS_SENDTO: 2893 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3], 2894 (struct sockaddr __user *)a[4], a[5]); 2895 break; 2896 case SYS_RECV: 2897 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3], 2898 NULL, NULL); 2899 break; 2900 case SYS_RECVFROM: 2901 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3], 2902 (struct sockaddr __user *)a[4], 2903 (int __user *)a[5]); 2904 break; 2905 case SYS_SHUTDOWN: 2906 err = __sys_shutdown(a0, a1); 2907 break; 2908 case SYS_SETSOCKOPT: 2909 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3], 2910 a[4]); 2911 break; 2912 case SYS_GETSOCKOPT: 2913 err = 2914 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3], 2915 (int __user *)a[4]); 2916 break; 2917 case SYS_SENDMSG: 2918 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1, 2919 a[2], true); 2920 break; 2921 case SYS_SENDMMSG: 2922 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], 2923 a[3], true); 2924 break; 2925 case SYS_RECVMSG: 2926 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1, 2927 a[2], true); 2928 break; 2929 case SYS_RECVMMSG: 2930 if (IS_ENABLED(CONFIG_64BIT)) 2931 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1, 2932 a[2], a[3], 2933 (struct __kernel_timespec __user *)a[4], 2934 NULL); 2935 else 2936 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1, 2937 a[2], a[3], NULL, 2938 (struct old_timespec32 __user *)a[4]); 2939 break; 2940 case SYS_ACCEPT4: 2941 err = __sys_accept4(a0, (struct sockaddr __user *)a1, 2942 (int __user *)a[2], a[3]); 2943 break; 2944 default: 2945 err = -EINVAL; 2946 break; 2947 } 2948 return err; 2949 } 2950 2951 #endif /* __ARCH_WANT_SYS_SOCKETCALL */ 2952 2953 /** 2954 * sock_register - add a socket protocol handler 2955 * @ops: description of protocol 2956 * 2957 * This function is called by a protocol handler that wants to 2958 * advertise its address family, and have it linked into the 2959 * socket interface. The value ops->family corresponds to the 2960 * socket system call protocol family. 2961 */ 2962 int sock_register(const struct net_proto_family *ops) 2963 { 2964 int err; 2965 2966 if (ops->family >= NPROTO) { 2967 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO); 2968 return -ENOBUFS; 2969 } 2970 2971 spin_lock(&net_family_lock); 2972 if (rcu_dereference_protected(net_families[ops->family], 2973 lockdep_is_held(&net_family_lock))) 2974 err = -EEXIST; 2975 else { 2976 rcu_assign_pointer(net_families[ops->family], ops); 2977 err = 0; 2978 } 2979 spin_unlock(&net_family_lock); 2980 2981 pr_info("NET: Registered protocol family %d\n", ops->family); 2982 return err; 2983 } 2984 EXPORT_SYMBOL(sock_register); 2985 2986 /** 2987 * sock_unregister - remove a protocol handler 2988 * @family: protocol family to remove 2989 * 2990 * This function is called by a protocol handler that wants to 2991 * remove its address family, and have it unlinked from the 2992 * new socket creation. 2993 * 2994 * If protocol handler is a module, then it can use module reference 2995 * counts to protect against new references. If protocol handler is not 2996 * a module then it needs to provide its own protection in 2997 * the ops->create routine. 2998 */ 2999 void sock_unregister(int family) 3000 { 3001 BUG_ON(family < 0 || family >= NPROTO); 3002 3003 spin_lock(&net_family_lock); 3004 RCU_INIT_POINTER(net_families[family], NULL); 3005 spin_unlock(&net_family_lock); 3006 3007 synchronize_rcu(); 3008 3009 pr_info("NET: Unregistered protocol family %d\n", family); 3010 } 3011 EXPORT_SYMBOL(sock_unregister); 3012 3013 bool sock_is_registered(int family) 3014 { 3015 return family < NPROTO && rcu_access_pointer(net_families[family]); 3016 } 3017 3018 static int __init sock_init(void) 3019 { 3020 int err; 3021 /* 3022 * Initialize the network sysctl infrastructure. 3023 */ 3024 err = net_sysctl_init(); 3025 if (err) 3026 goto out; 3027 3028 /* 3029 * Initialize skbuff SLAB cache 3030 */ 3031 skb_init(); 3032 3033 /* 3034 * Initialize the protocols module. 3035 */ 3036 3037 init_inodecache(); 3038 3039 err = register_filesystem(&sock_fs_type); 3040 if (err) 3041 goto out; 3042 sock_mnt = kern_mount(&sock_fs_type); 3043 if (IS_ERR(sock_mnt)) { 3044 err = PTR_ERR(sock_mnt); 3045 goto out_mount; 3046 } 3047 3048 /* The real protocol initialization is performed in later initcalls. 3049 */ 3050 3051 #ifdef CONFIG_NETFILTER 3052 err = netfilter_init(); 3053 if (err) 3054 goto out; 3055 #endif 3056 3057 ptp_classifier_init(); 3058 3059 out: 3060 return err; 3061 3062 out_mount: 3063 unregister_filesystem(&sock_fs_type); 3064 goto out; 3065 } 3066 3067 core_initcall(sock_init); /* early initcall */ 3068 3069 #ifdef CONFIG_PROC_FS 3070 void socket_seq_show(struct seq_file *seq) 3071 { 3072 seq_printf(seq, "sockets: used %d\n", 3073 sock_inuse_get(seq->private)); 3074 } 3075 #endif /* CONFIG_PROC_FS */ 3076 3077 #ifdef CONFIG_COMPAT 3078 static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) 3079 { 3080 struct compat_ifconf ifc32; 3081 struct ifconf ifc; 3082 int err; 3083 3084 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf))) 3085 return -EFAULT; 3086 3087 ifc.ifc_len = ifc32.ifc_len; 3088 ifc.ifc_req = compat_ptr(ifc32.ifcbuf); 3089 3090 rtnl_lock(); 3091 err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq)); 3092 rtnl_unlock(); 3093 if (err) 3094 return err; 3095 3096 ifc32.ifc_len = ifc.ifc_len; 3097 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf))) 3098 return -EFAULT; 3099 3100 return 0; 3101 } 3102 3103 static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) 3104 { 3105 struct compat_ethtool_rxnfc __user *compat_rxnfc; 3106 bool convert_in = false, convert_out = false; 3107 size_t buf_size = 0; 3108 struct ethtool_rxnfc __user *rxnfc = NULL; 3109 struct ifreq ifr; 3110 u32 rule_cnt = 0, actual_rule_cnt; 3111 u32 ethcmd; 3112 u32 data; 3113 int ret; 3114 3115 if (get_user(data, &ifr32->ifr_ifru.ifru_data)) 3116 return -EFAULT; 3117 3118 compat_rxnfc = compat_ptr(data); 3119 3120 if (get_user(ethcmd, &compat_rxnfc->cmd)) 3121 return -EFAULT; 3122 3123 /* Most ethtool structures are defined without padding. 3124 * Unfortunately struct ethtool_rxnfc is an exception. 3125 */ 3126 switch (ethcmd) { 3127 default: 3128 break; 3129 case ETHTOOL_GRXCLSRLALL: 3130 /* Buffer size is variable */ 3131 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt)) 3132 return -EFAULT; 3133 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32)) 3134 return -ENOMEM; 3135 buf_size += rule_cnt * sizeof(u32); 3136 fallthrough; 3137 case ETHTOOL_GRXRINGS: 3138 case ETHTOOL_GRXCLSRLCNT: 3139 case ETHTOOL_GRXCLSRULE: 3140 case ETHTOOL_SRXCLSRLINS: 3141 convert_out = true; 3142 fallthrough; 3143 case ETHTOOL_SRXCLSRLDEL: 3144 buf_size += sizeof(struct ethtool_rxnfc); 3145 convert_in = true; 3146 rxnfc = compat_alloc_user_space(buf_size); 3147 break; 3148 } 3149 3150 if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ)) 3151 return -EFAULT; 3152 3153 ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc; 3154 3155 if (convert_in) { 3156 /* We expect there to be holes between fs.m_ext and 3157 * fs.ring_cookie and at the end of fs, but nowhere else. 3158 */ 3159 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) + 3160 sizeof(compat_rxnfc->fs.m_ext) != 3161 offsetof(struct ethtool_rxnfc, fs.m_ext) + 3162 sizeof(rxnfc->fs.m_ext)); 3163 BUILD_BUG_ON( 3164 offsetof(struct compat_ethtool_rxnfc, fs.location) - 3165 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) != 3166 offsetof(struct ethtool_rxnfc, fs.location) - 3167 offsetof(struct ethtool_rxnfc, fs.ring_cookie)); 3168 3169 if (copy_in_user(rxnfc, compat_rxnfc, 3170 (void __user *)(&rxnfc->fs.m_ext + 1) - 3171 (void __user *)rxnfc) || 3172 copy_in_user(&rxnfc->fs.ring_cookie, 3173 &compat_rxnfc->fs.ring_cookie, 3174 (void __user *)(&rxnfc->fs.location + 1) - 3175 (void __user *)&rxnfc->fs.ring_cookie)) 3176 return -EFAULT; 3177 if (ethcmd == ETHTOOL_GRXCLSRLALL) { 3178 if (put_user(rule_cnt, &rxnfc->rule_cnt)) 3179 return -EFAULT; 3180 } else if (copy_in_user(&rxnfc->rule_cnt, 3181 &compat_rxnfc->rule_cnt, 3182 sizeof(rxnfc->rule_cnt))) 3183 return -EFAULT; 3184 } 3185 3186 ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL); 3187 if (ret) 3188 return ret; 3189 3190 if (convert_out) { 3191 if (copy_in_user(compat_rxnfc, rxnfc, 3192 (const void __user *)(&rxnfc->fs.m_ext + 1) - 3193 (const void __user *)rxnfc) || 3194 copy_in_user(&compat_rxnfc->fs.ring_cookie, 3195 &rxnfc->fs.ring_cookie, 3196 (const void __user *)(&rxnfc->fs.location + 1) - 3197 (const void __user *)&rxnfc->fs.ring_cookie) || 3198 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt, 3199 sizeof(rxnfc->rule_cnt))) 3200 return -EFAULT; 3201 3202 if (ethcmd == ETHTOOL_GRXCLSRLALL) { 3203 /* As an optimisation, we only copy the actual 3204 * number of rules that the underlying 3205 * function returned. Since Mallory might 3206 * change the rule count in user memory, we 3207 * check that it is less than the rule count 3208 * originally given (as the user buffer size), 3209 * which has been range-checked. 3210 */ 3211 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt)) 3212 return -EFAULT; 3213 if (actual_rule_cnt < rule_cnt) 3214 rule_cnt = actual_rule_cnt; 3215 if (copy_in_user(&compat_rxnfc->rule_locs[0], 3216 &rxnfc->rule_locs[0], 3217 rule_cnt * sizeof(u32))) 3218 return -EFAULT; 3219 } 3220 } 3221 3222 return 0; 3223 } 3224 3225 static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32) 3226 { 3227 compat_uptr_t uptr32; 3228 struct ifreq ifr; 3229 void __user *saved; 3230 int err; 3231 3232 if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq))) 3233 return -EFAULT; 3234 3235 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu)) 3236 return -EFAULT; 3237 3238 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc; 3239 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32); 3240 3241 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL); 3242 if (!err) { 3243 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved; 3244 if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq))) 3245 err = -EFAULT; 3246 } 3247 return err; 3248 } 3249 3250 /* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */ 3251 static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd, 3252 struct compat_ifreq __user *u_ifreq32) 3253 { 3254 struct ifreq ifreq; 3255 u32 data32; 3256 3257 if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ)) 3258 return -EFAULT; 3259 if (get_user(data32, &u_ifreq32->ifr_data)) 3260 return -EFAULT; 3261 ifreq.ifr_data = compat_ptr(data32); 3262 3263 return dev_ioctl(net, cmd, &ifreq, NULL); 3264 } 3265 3266 static int compat_ifreq_ioctl(struct net *net, struct socket *sock, 3267 unsigned int cmd, 3268 struct compat_ifreq __user *uifr32) 3269 { 3270 struct ifreq __user *uifr; 3271 int err; 3272 3273 /* Handle the fact that while struct ifreq has the same *layout* on 3274 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data, 3275 * which are handled elsewhere, it still has different *size* due to 3276 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit, 3277 * resulting in struct ifreq being 32 and 40 bytes respectively). 3278 * As a result, if the struct happens to be at the end of a page and 3279 * the next page isn't readable/writable, we get a fault. To prevent 3280 * that, copy back and forth to the full size. 3281 */ 3282 3283 uifr = compat_alloc_user_space(sizeof(*uifr)); 3284 if (copy_in_user(uifr, uifr32, sizeof(*uifr32))) 3285 return -EFAULT; 3286 3287 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr); 3288 3289 if (!err) { 3290 switch (cmd) { 3291 case SIOCGIFFLAGS: 3292 case SIOCGIFMETRIC: 3293 case SIOCGIFMTU: 3294 case SIOCGIFMEM: 3295 case SIOCGIFHWADDR: 3296 case SIOCGIFINDEX: 3297 case SIOCGIFADDR: 3298 case SIOCGIFBRDADDR: 3299 case SIOCGIFDSTADDR: 3300 case SIOCGIFNETMASK: 3301 case SIOCGIFPFLAGS: 3302 case SIOCGIFTXQLEN: 3303 case SIOCGMIIPHY: 3304 case SIOCGMIIREG: 3305 case SIOCGIFNAME: 3306 if (copy_in_user(uifr32, uifr, sizeof(*uifr32))) 3307 err = -EFAULT; 3308 break; 3309 } 3310 } 3311 return err; 3312 } 3313 3314 static int compat_sioc_ifmap(struct net *net, unsigned int cmd, 3315 struct compat_ifreq __user *uifr32) 3316 { 3317 struct ifreq ifr; 3318 struct compat_ifmap __user *uifmap32; 3319 int err; 3320 3321 uifmap32 = &uifr32->ifr_ifru.ifru_map; 3322 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name)); 3323 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); 3324 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); 3325 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); 3326 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq); 3327 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma); 3328 err |= get_user(ifr.ifr_map.port, &uifmap32->port); 3329 if (err) 3330 return -EFAULT; 3331 3332 err = dev_ioctl(net, cmd, &ifr, NULL); 3333 3334 if (cmd == SIOCGIFMAP && !err) { 3335 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name)); 3336 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); 3337 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); 3338 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); 3339 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq); 3340 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma); 3341 err |= put_user(ifr.ifr_map.port, &uifmap32->port); 3342 if (err) 3343 err = -EFAULT; 3344 } 3345 return err; 3346 } 3347 3348 /* Since old style bridge ioctl's endup using SIOCDEVPRIVATE 3349 * for some operations; this forces use of the newer bridge-utils that 3350 * use compatible ioctls 3351 */ 3352 static int old_bridge_ioctl(compat_ulong_t __user *argp) 3353 { 3354 compat_ulong_t tmp; 3355 3356 if (get_user(tmp, argp)) 3357 return -EFAULT; 3358 if (tmp == BRCTL_GET_VERSION) 3359 return BRCTL_VERSION + 1; 3360 return -EINVAL; 3361 } 3362 3363 static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, 3364 unsigned int cmd, unsigned long arg) 3365 { 3366 void __user *argp = compat_ptr(arg); 3367 struct sock *sk = sock->sk; 3368 struct net *net = sock_net(sk); 3369 3370 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) 3371 return compat_ifr_data_ioctl(net, cmd, argp); 3372 3373 switch (cmd) { 3374 case SIOCSIFBR: 3375 case SIOCGIFBR: 3376 return old_bridge_ioctl(argp); 3377 case SIOCGIFCONF: 3378 return compat_dev_ifconf(net, argp); 3379 case SIOCETHTOOL: 3380 return ethtool_ioctl(net, argp); 3381 case SIOCWANDEV: 3382 return compat_siocwandev(net, argp); 3383 case SIOCGIFMAP: 3384 case SIOCSIFMAP: 3385 return compat_sioc_ifmap(net, cmd, argp); 3386 case SIOCGSTAMP_OLD: 3387 case SIOCGSTAMPNS_OLD: 3388 if (!sock->ops->gettstamp) 3389 return -ENOIOCTLCMD; 3390 return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD, 3391 !COMPAT_USE_64BIT_TIME); 3392 3393 case SIOCBONDSLAVEINFOQUERY: 3394 case SIOCBONDINFOQUERY: 3395 case SIOCSHWTSTAMP: 3396 case SIOCGHWTSTAMP: 3397 return compat_ifr_data_ioctl(net, cmd, argp); 3398 3399 case FIOSETOWN: 3400 case SIOCSPGRP: 3401 case FIOGETOWN: 3402 case SIOCGPGRP: 3403 case SIOCBRADDBR: 3404 case SIOCBRDELBR: 3405 case SIOCGIFVLAN: 3406 case SIOCSIFVLAN: 3407 case SIOCGSKNS: 3408 case SIOCGSTAMP_NEW: 3409 case SIOCGSTAMPNS_NEW: 3410 return sock_ioctl(file, cmd, arg); 3411 3412 case SIOCGIFFLAGS: 3413 case SIOCSIFFLAGS: 3414 case SIOCGIFMETRIC: 3415 case SIOCSIFMETRIC: 3416 case SIOCGIFMTU: 3417 case SIOCSIFMTU: 3418 case SIOCGIFMEM: 3419 case SIOCSIFMEM: 3420 case SIOCGIFHWADDR: 3421 case SIOCSIFHWADDR: 3422 case SIOCADDMULTI: 3423 case SIOCDELMULTI: 3424 case SIOCGIFINDEX: 3425 case SIOCGIFADDR: 3426 case SIOCSIFADDR: 3427 case SIOCSIFHWBROADCAST: 3428 case SIOCDIFADDR: 3429 case SIOCGIFBRDADDR: 3430 case SIOCSIFBRDADDR: 3431 case SIOCGIFDSTADDR: 3432 case SIOCSIFDSTADDR: 3433 case SIOCGIFNETMASK: 3434 case SIOCSIFNETMASK: 3435 case SIOCSIFPFLAGS: 3436 case SIOCGIFPFLAGS: 3437 case SIOCGIFTXQLEN: 3438 case SIOCSIFTXQLEN: 3439 case SIOCBRADDIF: 3440 case SIOCBRDELIF: 3441 case SIOCGIFNAME: 3442 case SIOCSIFNAME: 3443 case SIOCGMIIPHY: 3444 case SIOCGMIIREG: 3445 case SIOCSMIIREG: 3446 case SIOCBONDENSLAVE: 3447 case SIOCBONDRELEASE: 3448 case SIOCBONDSETHWADDR: 3449 case SIOCBONDCHANGEACTIVE: 3450 return compat_ifreq_ioctl(net, sock, cmd, argp); 3451 3452 case SIOCSARP: 3453 case SIOCGARP: 3454 case SIOCDARP: 3455 case SIOCOUTQ: 3456 case SIOCOUTQNSD: 3457 case SIOCATMARK: 3458 return sock_do_ioctl(net, sock, cmd, arg); 3459 } 3460 3461 return -ENOIOCTLCMD; 3462 } 3463 3464 static long compat_sock_ioctl(struct file *file, unsigned int cmd, 3465 unsigned long arg) 3466 { 3467 struct socket *sock = file->private_data; 3468 int ret = -ENOIOCTLCMD; 3469 struct sock *sk; 3470 struct net *net; 3471 3472 sk = sock->sk; 3473 net = sock_net(sk); 3474 3475 if (sock->ops->compat_ioctl) 3476 ret = sock->ops->compat_ioctl(sock, cmd, arg); 3477 3478 if (ret == -ENOIOCTLCMD && 3479 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)) 3480 ret = compat_wext_handle_ioctl(net, cmd, arg); 3481 3482 if (ret == -ENOIOCTLCMD) 3483 ret = compat_sock_ioctl_trans(file, sock, cmd, arg); 3484 3485 return ret; 3486 } 3487 #endif 3488 3489 /** 3490 * kernel_bind - bind an address to a socket (kernel space) 3491 * @sock: socket 3492 * @addr: address 3493 * @addrlen: length of address 3494 * 3495 * Returns 0 or an error. 3496 */ 3497 3498 int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) 3499 { 3500 return sock->ops->bind(sock, addr, addrlen); 3501 } 3502 EXPORT_SYMBOL(kernel_bind); 3503 3504 /** 3505 * kernel_listen - move socket to listening state (kernel space) 3506 * @sock: socket 3507 * @backlog: pending connections queue size 3508 * 3509 * Returns 0 or an error. 3510 */ 3511 3512 int kernel_listen(struct socket *sock, int backlog) 3513 { 3514 return sock->ops->listen(sock, backlog); 3515 } 3516 EXPORT_SYMBOL(kernel_listen); 3517 3518 /** 3519 * kernel_accept - accept a connection (kernel space) 3520 * @sock: listening socket 3521 * @newsock: new connected socket 3522 * @flags: flags 3523 * 3524 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0. 3525 * If it fails, @newsock is guaranteed to be %NULL. 3526 * Returns 0 or an error. 3527 */ 3528 3529 int kernel_accept(struct socket *sock, struct socket **newsock, int flags) 3530 { 3531 struct sock *sk = sock->sk; 3532 int err; 3533 3534 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol, 3535 newsock); 3536 if (err < 0) 3537 goto done; 3538 3539 err = sock->ops->accept(sock, *newsock, flags, true); 3540 if (err < 0) { 3541 sock_release(*newsock); 3542 *newsock = NULL; 3543 goto done; 3544 } 3545 3546 (*newsock)->ops = sock->ops; 3547 __module_get((*newsock)->ops->owner); 3548 3549 done: 3550 return err; 3551 } 3552 EXPORT_SYMBOL(kernel_accept); 3553 3554 /** 3555 * kernel_connect - connect a socket (kernel space) 3556 * @sock: socket 3557 * @addr: address 3558 * @addrlen: address length 3559 * @flags: flags (O_NONBLOCK, ...) 3560 * 3561 * For datagram sockets, @addr is the addres to which datagrams are sent 3562 * by default, and the only address from which datagrams are received. 3563 * For stream sockets, attempts to connect to @addr. 3564 * Returns 0 or an error code. 3565 */ 3566 3567 int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, 3568 int flags) 3569 { 3570 return sock->ops->connect(sock, addr, addrlen, flags); 3571 } 3572 EXPORT_SYMBOL(kernel_connect); 3573 3574 /** 3575 * kernel_getsockname - get the address which the socket is bound (kernel space) 3576 * @sock: socket 3577 * @addr: address holder 3578 * 3579 * Fills the @addr pointer with the address which the socket is bound. 3580 * Returns 0 or an error code. 3581 */ 3582 3583 int kernel_getsockname(struct socket *sock, struct sockaddr *addr) 3584 { 3585 return sock->ops->getname(sock, addr, 0); 3586 } 3587 EXPORT_SYMBOL(kernel_getsockname); 3588 3589 /** 3590 * kernel_getpeername - get the address which the socket is connected (kernel space) 3591 * @sock: socket 3592 * @addr: address holder 3593 * 3594 * Fills the @addr pointer with the address which the socket is connected. 3595 * Returns 0 or an error code. 3596 */ 3597 3598 int kernel_getpeername(struct socket *sock, struct sockaddr *addr) 3599 { 3600 return sock->ops->getname(sock, addr, 1); 3601 } 3602 EXPORT_SYMBOL(kernel_getpeername); 3603 3604 /** 3605 * kernel_sendpage - send a &page through a socket (kernel space) 3606 * @sock: socket 3607 * @page: page 3608 * @offset: page offset 3609 * @size: total size in bytes 3610 * @flags: flags (MSG_DONTWAIT, ...) 3611 * 3612 * Returns the total amount sent in bytes or an error. 3613 */ 3614 3615 int kernel_sendpage(struct socket *sock, struct page *page, int offset, 3616 size_t size, int flags) 3617 { 3618 if (sock->ops->sendpage) { 3619 /* Warn in case the improper page to zero-copy send */ 3620 WARN_ONCE(!sendpage_ok(page), "improper page for zero-copy send"); 3621 return sock->ops->sendpage(sock, page, offset, size, flags); 3622 } 3623 return sock_no_sendpage(sock, page, offset, size, flags); 3624 } 3625 EXPORT_SYMBOL(kernel_sendpage); 3626 3627 /** 3628 * kernel_sendpage_locked - send a &page through the locked sock (kernel space) 3629 * @sk: sock 3630 * @page: page 3631 * @offset: page offset 3632 * @size: total size in bytes 3633 * @flags: flags (MSG_DONTWAIT, ...) 3634 * 3635 * Returns the total amount sent in bytes or an error. 3636 * Caller must hold @sk. 3637 */ 3638 3639 int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset, 3640 size_t size, int flags) 3641 { 3642 struct socket *sock = sk->sk_socket; 3643 3644 if (sock->ops->sendpage_locked) 3645 return sock->ops->sendpage_locked(sk, page, offset, size, 3646 flags); 3647 3648 return sock_no_sendpage_locked(sk, page, offset, size, flags); 3649 } 3650 EXPORT_SYMBOL(kernel_sendpage_locked); 3651 3652 /** 3653 * kernel_sock_shutdown - shut down part of a full-duplex connection (kernel space) 3654 * @sock: socket 3655 * @how: connection part 3656 * 3657 * Returns 0 or an error. 3658 */ 3659 3660 int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how) 3661 { 3662 return sock->ops->shutdown(sock, how); 3663 } 3664 EXPORT_SYMBOL(kernel_sock_shutdown); 3665 3666 /** 3667 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket 3668 * @sk: socket 3669 * 3670 * This routine returns the IP overhead imposed by a socket i.e. 3671 * the length of the underlying IP header, depending on whether 3672 * this is an IPv4 or IPv6 socket and the length from IP options turned 3673 * on at the socket. Assumes that the caller has a lock on the socket. 3674 */ 3675 3676 u32 kernel_sock_ip_overhead(struct sock *sk) 3677 { 3678 struct inet_sock *inet; 3679 struct ip_options_rcu *opt; 3680 u32 overhead = 0; 3681 #if IS_ENABLED(CONFIG_IPV6) 3682 struct ipv6_pinfo *np; 3683 struct ipv6_txoptions *optv6 = NULL; 3684 #endif /* IS_ENABLED(CONFIG_IPV6) */ 3685 3686 if (!sk) 3687 return overhead; 3688 3689 switch (sk->sk_family) { 3690 case AF_INET: 3691 inet = inet_sk(sk); 3692 overhead += sizeof(struct iphdr); 3693 opt = rcu_dereference_protected(inet->inet_opt, 3694 sock_owned_by_user(sk)); 3695 if (opt) 3696 overhead += opt->opt.optlen; 3697 return overhead; 3698 #if IS_ENABLED(CONFIG_IPV6) 3699 case AF_INET6: 3700 np = inet6_sk(sk); 3701 overhead += sizeof(struct ipv6hdr); 3702 if (np) 3703 optv6 = rcu_dereference_protected(np->opt, 3704 sock_owned_by_user(sk)); 3705 if (optv6) 3706 overhead += (optv6->opt_flen + optv6->opt_nflen); 3707 return overhead; 3708 #endif /* IS_ENABLED(CONFIG_IPV6) */ 3709 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */ 3710 return overhead; 3711 } 3712 } 3713 EXPORT_SYMBOL(kernel_sock_ip_overhead); 3714