1 /* 2 * NET An implementation of the SOCKET network access protocol. 3 * 4 * Version: @(#)socket.c 1.1.93 18/02/95 5 * 6 * Authors: Orest Zborowski, <obz@Kodak.COM> 7 * Ross Biro 8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 9 * 10 * Fixes: 11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in 12 * shutdown() 13 * Alan Cox : verify_area() fixes 14 * Alan Cox : Removed DDI 15 * Jonathan Kamens : SOCK_DGRAM reconnect bug 16 * Alan Cox : Moved a load of checks to the very 17 * top level. 18 * Alan Cox : Move address structures to/from user 19 * mode above the protocol layers. 20 * Rob Janssen : Allow 0 length sends. 21 * Alan Cox : Asynchronous I/O support (cribbed from the 22 * tty drivers). 23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style) 24 * Jeff Uphoff : Made max number of sockets command-line 25 * configurable. 26 * Matti Aarnio : Made the number of sockets dynamic, 27 * to be allocated when needed, and mr. 28 * Uphoff's max is used as max to be 29 * allowed to allocate. 30 * Linus : Argh. removed all the socket allocation 31 * altogether: it's in the inode now. 32 * Alan Cox : Made sock_alloc()/sock_release() public 33 * for NetROM and future kernel nfsd type 34 * stuff. 35 * Alan Cox : sendmsg/recvmsg basics. 36 * Tom Dyas : Export net symbols. 37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n". 38 * Alan Cox : Added thread locking to sys_* calls 39 * for sockets. May have errors at the 40 * moment. 41 * Kevin Buhr : Fixed the dumb errors in the above. 42 * Andi Kleen : Some small cleanups, optimizations, 43 * and fixed a copy_from_user() bug. 44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0) 45 * Tigran Aivazian : Made listen(2) backlog sanity checks 46 * protocol-independent 47 * 48 * 49 * This program is free software; you can redistribute it and/or 50 * modify it under the terms of the GNU General Public License 51 * as published by the Free Software Foundation; either version 52 * 2 of the License, or (at your option) any later version. 53 * 54 * 55 * This module is effectively the top level interface to the BSD socket 56 * paradigm. 57 * 58 * Based upon Swansea University Computer Society NET3.039 59 */ 60 61 #include <linux/mm.h> 62 #include <linux/socket.h> 63 #include <linux/file.h> 64 #include <linux/net.h> 65 #include <linux/interrupt.h> 66 #include <linux/thread_info.h> 67 #include <linux/rcupdate.h> 68 #include <linux/netdevice.h> 69 #include <linux/proc_fs.h> 70 #include <linux/seq_file.h> 71 #include <linux/mutex.h> 72 #include <linux/wanrouter.h> 73 #include <linux/if_bridge.h> 74 #include <linux/if_frad.h> 75 #include <linux/if_vlan.h> 76 #include <linux/init.h> 77 #include <linux/poll.h> 78 #include <linux/cache.h> 79 #include <linux/module.h> 80 #include <linux/highmem.h> 81 #include <linux/mount.h> 82 #include <linux/security.h> 83 #include <linux/syscalls.h> 84 #include <linux/compat.h> 85 #include <linux/kmod.h> 86 #include <linux/audit.h> 87 #include <linux/wireless.h> 88 #include <linux/nsproxy.h> 89 90 #include <asm/uaccess.h> 91 #include <asm/unistd.h> 92 93 #include <net/compat.h> 94 #include <net/wext.h> 95 96 #include <net/sock.h> 97 #include <linux/netfilter.h> 98 99 static int sock_no_open(struct inode *irrelevant, struct file *dontcare); 100 static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, 101 unsigned long nr_segs, loff_t pos); 102 static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, 103 unsigned long nr_segs, loff_t pos); 104 static int sock_mmap(struct file *file, struct vm_area_struct *vma); 105 106 static int sock_close(struct inode *inode, struct file *file); 107 static unsigned int sock_poll(struct file *file, 108 struct poll_table_struct *wait); 109 static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 110 #ifdef CONFIG_COMPAT 111 static long compat_sock_ioctl(struct file *file, 112 unsigned int cmd, unsigned long arg); 113 #endif 114 static int sock_fasync(int fd, struct file *filp, int on); 115 static ssize_t sock_sendpage(struct file *file, struct page *page, 116 int offset, size_t size, loff_t *ppos, int more); 117 static ssize_t sock_splice_read(struct file *file, loff_t *ppos, 118 struct pipe_inode_info *pipe, size_t len, 119 unsigned int flags); 120 121 /* 122 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear 123 * in the operation structures but are done directly via the socketcall() multiplexor. 124 */ 125 126 static const struct file_operations socket_file_ops = { 127 .owner = THIS_MODULE, 128 .llseek = no_llseek, 129 .aio_read = sock_aio_read, 130 .aio_write = sock_aio_write, 131 .poll = sock_poll, 132 .unlocked_ioctl = sock_ioctl, 133 #ifdef CONFIG_COMPAT 134 .compat_ioctl = compat_sock_ioctl, 135 #endif 136 .mmap = sock_mmap, 137 .open = sock_no_open, /* special open code to disallow open via /proc */ 138 .release = sock_close, 139 .fasync = sock_fasync, 140 .sendpage = sock_sendpage, 141 .splice_write = generic_splice_sendpage, 142 .splice_read = sock_splice_read, 143 }; 144 145 /* 146 * The protocol list. Each protocol is registered in here. 147 */ 148 149 static DEFINE_SPINLOCK(net_family_lock); 150 static const struct net_proto_family *net_families[NPROTO] __read_mostly; 151 152 /* 153 * Statistics counters of the socket lists 154 */ 155 156 static DEFINE_PER_CPU(int, sockets_in_use) = 0; 157 158 /* 159 * Support routines. 160 * Move socket addresses back and forth across the kernel/user 161 * divide and look after the messy bits. 162 */ 163 164 #define MAX_SOCK_ADDR 128 /* 108 for Unix domain - 165 16 for IP, 16 for IPX, 166 24 for IPv6, 167 about 80 for AX.25 168 must be at least one bigger than 169 the AF_UNIX size (see net/unix/af_unix.c 170 :unix_mkname()). 171 */ 172 173 /** 174 * move_addr_to_kernel - copy a socket address into kernel space 175 * @uaddr: Address in user space 176 * @kaddr: Address in kernel space 177 * @ulen: Length in user space 178 * 179 * The address is copied into kernel space. If the provided address is 180 * too long an error code of -EINVAL is returned. If the copy gives 181 * invalid addresses -EFAULT is returned. On a success 0 is returned. 182 */ 183 184 int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr) 185 { 186 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage)) 187 return -EINVAL; 188 if (ulen == 0) 189 return 0; 190 if (copy_from_user(kaddr, uaddr, ulen)) 191 return -EFAULT; 192 return audit_sockaddr(ulen, kaddr); 193 } 194 195 /** 196 * move_addr_to_user - copy an address to user space 197 * @kaddr: kernel space address 198 * @klen: length of address in kernel 199 * @uaddr: user space address 200 * @ulen: pointer to user length field 201 * 202 * The value pointed to by ulen on entry is the buffer length available. 203 * This is overwritten with the buffer space used. -EINVAL is returned 204 * if an overlong buffer is specified or a negative buffer size. -EFAULT 205 * is returned if either the buffer or the length field are not 206 * accessible. 207 * After copying the data up to the limit the user specifies, the true 208 * length of the data is written over the length limit the user 209 * specified. Zero is returned for a success. 210 */ 211 212 int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr, 213 int __user *ulen) 214 { 215 int err; 216 int len; 217 218 err = get_user(len, ulen); 219 if (err) 220 return err; 221 if (len > klen) 222 len = klen; 223 if (len < 0 || len > sizeof(struct sockaddr_storage)) 224 return -EINVAL; 225 if (len) { 226 if (audit_sockaddr(klen, kaddr)) 227 return -ENOMEM; 228 if (copy_to_user(uaddr, kaddr, len)) 229 return -EFAULT; 230 } 231 /* 232 * "fromlen shall refer to the value before truncation.." 233 * 1003.1g 234 */ 235 return __put_user(klen, ulen); 236 } 237 238 #define SOCKFS_MAGIC 0x534F434B 239 240 static struct kmem_cache *sock_inode_cachep __read_mostly; 241 242 static struct inode *sock_alloc_inode(struct super_block *sb) 243 { 244 struct socket_alloc *ei; 245 246 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); 247 if (!ei) 248 return NULL; 249 init_waitqueue_head(&ei->socket.wait); 250 251 ei->socket.fasync_list = NULL; 252 ei->socket.state = SS_UNCONNECTED; 253 ei->socket.flags = 0; 254 ei->socket.ops = NULL; 255 ei->socket.sk = NULL; 256 ei->socket.file = NULL; 257 258 return &ei->vfs_inode; 259 } 260 261 static void sock_destroy_inode(struct inode *inode) 262 { 263 kmem_cache_free(sock_inode_cachep, 264 container_of(inode, struct socket_alloc, vfs_inode)); 265 } 266 267 static void init_once(void *foo) 268 { 269 struct socket_alloc *ei = (struct socket_alloc *)foo; 270 271 inode_init_once(&ei->vfs_inode); 272 } 273 274 static int init_inodecache(void) 275 { 276 sock_inode_cachep = kmem_cache_create("sock_inode_cache", 277 sizeof(struct socket_alloc), 278 0, 279 (SLAB_HWCACHE_ALIGN | 280 SLAB_RECLAIM_ACCOUNT | 281 SLAB_MEM_SPREAD), 282 init_once); 283 if (sock_inode_cachep == NULL) 284 return -ENOMEM; 285 return 0; 286 } 287 288 static const struct super_operations sockfs_ops = { 289 .alloc_inode = sock_alloc_inode, 290 .destroy_inode =sock_destroy_inode, 291 .statfs = simple_statfs, 292 }; 293 294 static int sockfs_get_sb(struct file_system_type *fs_type, 295 int flags, const char *dev_name, void *data, 296 struct vfsmount *mnt) 297 { 298 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC, 299 mnt); 300 } 301 302 static struct vfsmount *sock_mnt __read_mostly; 303 304 static struct file_system_type sock_fs_type = { 305 .name = "sockfs", 306 .get_sb = sockfs_get_sb, 307 .kill_sb = kill_anon_super, 308 }; 309 310 static int sockfs_delete_dentry(struct dentry *dentry) 311 { 312 /* 313 * At creation time, we pretended this dentry was hashed 314 * (by clearing DCACHE_UNHASHED bit in d_flags) 315 * At delete time, we restore the truth : not hashed. 316 * (so that dput() can proceed correctly) 317 */ 318 dentry->d_flags |= DCACHE_UNHASHED; 319 return 0; 320 } 321 322 /* 323 * sockfs_dname() is called from d_path(). 324 */ 325 static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen) 326 { 327 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]", 328 dentry->d_inode->i_ino); 329 } 330 331 static const struct dentry_operations sockfs_dentry_operations = { 332 .d_delete = sockfs_delete_dentry, 333 .d_dname = sockfs_dname, 334 }; 335 336 /* 337 * Obtains the first available file descriptor and sets it up for use. 338 * 339 * These functions create file structures and maps them to fd space 340 * of the current process. On success it returns file descriptor 341 * and file struct implicitly stored in sock->file. 342 * Note that another thread may close file descriptor before we return 343 * from this function. We use the fact that now we do not refer 344 * to socket after mapping. If one day we will need it, this 345 * function will increment ref. count on file by 1. 346 * 347 * In any case returned fd MAY BE not valid! 348 * This race condition is unavoidable 349 * with shared fd spaces, we cannot solve it inside kernel, 350 * but we take care of internal coherence yet. 351 */ 352 353 static int sock_alloc_fd(struct file **filep, int flags) 354 { 355 int fd; 356 357 fd = get_unused_fd_flags(flags); 358 if (likely(fd >= 0)) { 359 struct file *file = get_empty_filp(); 360 361 *filep = file; 362 if (unlikely(!file)) { 363 put_unused_fd(fd); 364 return -ENFILE; 365 } 366 } else 367 *filep = NULL; 368 return fd; 369 } 370 371 static int sock_attach_fd(struct socket *sock, struct file *file, int flags) 372 { 373 struct dentry *dentry; 374 struct qstr name = { .name = "" }; 375 376 dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); 377 if (unlikely(!dentry)) 378 return -ENOMEM; 379 380 dentry->d_op = &sockfs_dentry_operations; 381 /* 382 * We dont want to push this dentry into global dentry hash table. 383 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED 384 * This permits a working /proc/$pid/fd/XXX on sockets 385 */ 386 dentry->d_flags &= ~DCACHE_UNHASHED; 387 d_instantiate(dentry, SOCK_INODE(sock)); 388 389 sock->file = file; 390 init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE, 391 &socket_file_ops); 392 SOCK_INODE(sock)->i_fop = &socket_file_ops; 393 file->f_flags = O_RDWR | (flags & O_NONBLOCK); 394 file->f_pos = 0; 395 file->private_data = sock; 396 397 return 0; 398 } 399 400 int sock_map_fd(struct socket *sock, int flags) 401 { 402 struct file *newfile; 403 int fd = sock_alloc_fd(&newfile, flags); 404 405 if (likely(fd >= 0)) { 406 int err = sock_attach_fd(sock, newfile, flags); 407 408 if (unlikely(err < 0)) { 409 put_filp(newfile); 410 put_unused_fd(fd); 411 return err; 412 } 413 fd_install(fd, newfile); 414 } 415 return fd; 416 } 417 418 static struct socket *sock_from_file(struct file *file, int *err) 419 { 420 if (file->f_op == &socket_file_ops) 421 return file->private_data; /* set in sock_map_fd */ 422 423 *err = -ENOTSOCK; 424 return NULL; 425 } 426 427 /** 428 * sockfd_lookup - Go from a file number to its socket slot 429 * @fd: file handle 430 * @err: pointer to an error code return 431 * 432 * The file handle passed in is locked and the socket it is bound 433 * too is returned. If an error occurs the err pointer is overwritten 434 * with a negative errno code and NULL is returned. The function checks 435 * for both invalid handles and passing a handle which is not a socket. 436 * 437 * On a success the socket object pointer is returned. 438 */ 439 440 struct socket *sockfd_lookup(int fd, int *err) 441 { 442 struct file *file; 443 struct socket *sock; 444 445 file = fget(fd); 446 if (!file) { 447 *err = -EBADF; 448 return NULL; 449 } 450 451 sock = sock_from_file(file, err); 452 if (!sock) 453 fput(file); 454 return sock; 455 } 456 457 static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) 458 { 459 struct file *file; 460 struct socket *sock; 461 462 *err = -EBADF; 463 file = fget_light(fd, fput_needed); 464 if (file) { 465 sock = sock_from_file(file, err); 466 if (sock) 467 return sock; 468 fput_light(file, *fput_needed); 469 } 470 return NULL; 471 } 472 473 /** 474 * sock_alloc - allocate a socket 475 * 476 * Allocate a new inode and socket object. The two are bound together 477 * and initialised. The socket is then returned. If we are out of inodes 478 * NULL is returned. 479 */ 480 481 static struct socket *sock_alloc(void) 482 { 483 struct inode *inode; 484 struct socket *sock; 485 486 inode = new_inode(sock_mnt->mnt_sb); 487 if (!inode) 488 return NULL; 489 490 sock = SOCKET_I(inode); 491 492 kmemcheck_annotate_bitfield(sock, type); 493 inode->i_mode = S_IFSOCK | S_IRWXUGO; 494 inode->i_uid = current_fsuid(); 495 inode->i_gid = current_fsgid(); 496 497 percpu_add(sockets_in_use, 1); 498 return sock; 499 } 500 501 /* 502 * In theory you can't get an open on this inode, but /proc provides 503 * a back door. Remember to keep it shut otherwise you'll let the 504 * creepy crawlies in. 505 */ 506 507 static int sock_no_open(struct inode *irrelevant, struct file *dontcare) 508 { 509 return -ENXIO; 510 } 511 512 const struct file_operations bad_sock_fops = { 513 .owner = THIS_MODULE, 514 .open = sock_no_open, 515 }; 516 517 /** 518 * sock_release - close a socket 519 * @sock: socket to close 520 * 521 * The socket is released from the protocol stack if it has a release 522 * callback, and the inode is then released if the socket is bound to 523 * an inode not a file. 524 */ 525 526 void sock_release(struct socket *sock) 527 { 528 if (sock->ops) { 529 struct module *owner = sock->ops->owner; 530 531 sock->ops->release(sock); 532 sock->ops = NULL; 533 module_put(owner); 534 } 535 536 if (sock->fasync_list) 537 printk(KERN_ERR "sock_release: fasync list not empty!\n"); 538 539 percpu_sub(sockets_in_use, 1); 540 if (!sock->file) { 541 iput(SOCK_INODE(sock)); 542 return; 543 } 544 sock->file = NULL; 545 } 546 547 int sock_tx_timestamp(struct msghdr *msg, struct sock *sk, 548 union skb_shared_tx *shtx) 549 { 550 shtx->flags = 0; 551 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) 552 shtx->hardware = 1; 553 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE)) 554 shtx->software = 1; 555 return 0; 556 } 557 EXPORT_SYMBOL(sock_tx_timestamp); 558 559 static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, 560 struct msghdr *msg, size_t size) 561 { 562 struct sock_iocb *si = kiocb_to_siocb(iocb); 563 int err; 564 565 si->sock = sock; 566 si->scm = NULL; 567 si->msg = msg; 568 si->size = size; 569 570 err = security_socket_sendmsg(sock, msg, size); 571 if (err) 572 return err; 573 574 return sock->ops->sendmsg(iocb, sock, msg, size); 575 } 576 577 int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) 578 { 579 struct kiocb iocb; 580 struct sock_iocb siocb; 581 int ret; 582 583 init_sync_kiocb(&iocb, NULL); 584 iocb.private = &siocb; 585 ret = __sock_sendmsg(&iocb, sock, msg, size); 586 if (-EIOCBQUEUED == ret) 587 ret = wait_on_sync_kiocb(&iocb); 588 return ret; 589 } 590 591 int kernel_sendmsg(struct socket *sock, struct msghdr *msg, 592 struct kvec *vec, size_t num, size_t size) 593 { 594 mm_segment_t oldfs = get_fs(); 595 int result; 596 597 set_fs(KERNEL_DS); 598 /* 599 * the following is safe, since for compiler definitions of kvec and 600 * iovec are identical, yielding the same in-core layout and alignment 601 */ 602 msg->msg_iov = (struct iovec *)vec; 603 msg->msg_iovlen = num; 604 result = sock_sendmsg(sock, msg, size); 605 set_fs(oldfs); 606 return result; 607 } 608 609 static int ktime2ts(ktime_t kt, struct timespec *ts) 610 { 611 if (kt.tv64) { 612 *ts = ktime_to_timespec(kt); 613 return 1; 614 } else { 615 return 0; 616 } 617 } 618 619 /* 620 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP) 621 */ 622 void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, 623 struct sk_buff *skb) 624 { 625 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP); 626 struct timespec ts[3]; 627 int empty = 1; 628 struct skb_shared_hwtstamps *shhwtstamps = 629 skb_hwtstamps(skb); 630 631 /* Race occurred between timestamp enabling and packet 632 receiving. Fill in the current time for now. */ 633 if (need_software_tstamp && skb->tstamp.tv64 == 0) 634 __net_timestamp(skb); 635 636 if (need_software_tstamp) { 637 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) { 638 struct timeval tv; 639 skb_get_timestamp(skb, &tv); 640 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, 641 sizeof(tv), &tv); 642 } else { 643 struct timespec ts; 644 skb_get_timestampns(skb, &ts); 645 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, 646 sizeof(ts), &ts); 647 } 648 } 649 650 651 memset(ts, 0, sizeof(ts)); 652 if (skb->tstamp.tv64 && 653 sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) { 654 skb_get_timestampns(skb, ts + 0); 655 empty = 0; 656 } 657 if (shhwtstamps) { 658 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) && 659 ktime2ts(shhwtstamps->syststamp, ts + 1)) 660 empty = 0; 661 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) && 662 ktime2ts(shhwtstamps->hwtstamp, ts + 2)) 663 empty = 0; 664 } 665 if (!empty) 666 put_cmsg(msg, SOL_SOCKET, 667 SCM_TIMESTAMPING, sizeof(ts), &ts); 668 } 669 670 EXPORT_SYMBOL_GPL(__sock_recv_timestamp); 671 672 static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, 673 struct msghdr *msg, size_t size, int flags) 674 { 675 int err; 676 struct sock_iocb *si = kiocb_to_siocb(iocb); 677 678 si->sock = sock; 679 si->scm = NULL; 680 si->msg = msg; 681 si->size = size; 682 si->flags = flags; 683 684 err = security_socket_recvmsg(sock, msg, size, flags); 685 if (err) 686 return err; 687 688 return sock->ops->recvmsg(iocb, sock, msg, size, flags); 689 } 690 691 int sock_recvmsg(struct socket *sock, struct msghdr *msg, 692 size_t size, int flags) 693 { 694 struct kiocb iocb; 695 struct sock_iocb siocb; 696 int ret; 697 698 init_sync_kiocb(&iocb, NULL); 699 iocb.private = &siocb; 700 ret = __sock_recvmsg(&iocb, sock, msg, size, flags); 701 if (-EIOCBQUEUED == ret) 702 ret = wait_on_sync_kiocb(&iocb); 703 return ret; 704 } 705 706 int kernel_recvmsg(struct socket *sock, struct msghdr *msg, 707 struct kvec *vec, size_t num, size_t size, int flags) 708 { 709 mm_segment_t oldfs = get_fs(); 710 int result; 711 712 set_fs(KERNEL_DS); 713 /* 714 * the following is safe, since for compiler definitions of kvec and 715 * iovec are identical, yielding the same in-core layout and alignment 716 */ 717 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num; 718 result = sock_recvmsg(sock, msg, size, flags); 719 set_fs(oldfs); 720 return result; 721 } 722 723 static void sock_aio_dtor(struct kiocb *iocb) 724 { 725 kfree(iocb->private); 726 } 727 728 static ssize_t sock_sendpage(struct file *file, struct page *page, 729 int offset, size_t size, loff_t *ppos, int more) 730 { 731 struct socket *sock; 732 int flags; 733 734 sock = file->private_data; 735 736 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT; 737 if (more) 738 flags |= MSG_MORE; 739 740 return kernel_sendpage(sock, page, offset, size, flags); 741 } 742 743 static ssize_t sock_splice_read(struct file *file, loff_t *ppos, 744 struct pipe_inode_info *pipe, size_t len, 745 unsigned int flags) 746 { 747 struct socket *sock = file->private_data; 748 749 if (unlikely(!sock->ops->splice_read)) 750 return -EINVAL; 751 752 return sock->ops->splice_read(sock, ppos, pipe, len, flags); 753 } 754 755 static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, 756 struct sock_iocb *siocb) 757 { 758 if (!is_sync_kiocb(iocb)) { 759 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL); 760 if (!siocb) 761 return NULL; 762 iocb->ki_dtor = sock_aio_dtor; 763 } 764 765 siocb->kiocb = iocb; 766 iocb->private = siocb; 767 return siocb; 768 } 769 770 static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb, 771 struct file *file, const struct iovec *iov, 772 unsigned long nr_segs) 773 { 774 struct socket *sock = file->private_data; 775 size_t size = 0; 776 int i; 777 778 for (i = 0; i < nr_segs; i++) 779 size += iov[i].iov_len; 780 781 msg->msg_name = NULL; 782 msg->msg_namelen = 0; 783 msg->msg_control = NULL; 784 msg->msg_controllen = 0; 785 msg->msg_iov = (struct iovec *)iov; 786 msg->msg_iovlen = nr_segs; 787 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; 788 789 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags); 790 } 791 792 static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, 793 unsigned long nr_segs, loff_t pos) 794 { 795 struct sock_iocb siocb, *x; 796 797 if (pos != 0) 798 return -ESPIPE; 799 800 if (iocb->ki_left == 0) /* Match SYS5 behaviour */ 801 return 0; 802 803 804 x = alloc_sock_iocb(iocb, &siocb); 805 if (!x) 806 return -ENOMEM; 807 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); 808 } 809 810 static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb, 811 struct file *file, const struct iovec *iov, 812 unsigned long nr_segs) 813 { 814 struct socket *sock = file->private_data; 815 size_t size = 0; 816 int i; 817 818 for (i = 0; i < nr_segs; i++) 819 size += iov[i].iov_len; 820 821 msg->msg_name = NULL; 822 msg->msg_namelen = 0; 823 msg->msg_control = NULL; 824 msg->msg_controllen = 0; 825 msg->msg_iov = (struct iovec *)iov; 826 msg->msg_iovlen = nr_segs; 827 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; 828 if (sock->type == SOCK_SEQPACKET) 829 msg->msg_flags |= MSG_EOR; 830 831 return __sock_sendmsg(iocb, sock, msg, size); 832 } 833 834 static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, 835 unsigned long nr_segs, loff_t pos) 836 { 837 struct sock_iocb siocb, *x; 838 839 if (pos != 0) 840 return -ESPIPE; 841 842 x = alloc_sock_iocb(iocb, &siocb); 843 if (!x) 844 return -ENOMEM; 845 846 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); 847 } 848 849 /* 850 * Atomic setting of ioctl hooks to avoid race 851 * with module unload. 852 */ 853 854 static DEFINE_MUTEX(br_ioctl_mutex); 855 static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL; 856 857 void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) 858 { 859 mutex_lock(&br_ioctl_mutex); 860 br_ioctl_hook = hook; 861 mutex_unlock(&br_ioctl_mutex); 862 } 863 864 EXPORT_SYMBOL(brioctl_set); 865 866 static DEFINE_MUTEX(vlan_ioctl_mutex); 867 static int (*vlan_ioctl_hook) (struct net *, void __user *arg); 868 869 void vlan_ioctl_set(int (*hook) (struct net *, void __user *)) 870 { 871 mutex_lock(&vlan_ioctl_mutex); 872 vlan_ioctl_hook = hook; 873 mutex_unlock(&vlan_ioctl_mutex); 874 } 875 876 EXPORT_SYMBOL(vlan_ioctl_set); 877 878 static DEFINE_MUTEX(dlci_ioctl_mutex); 879 static int (*dlci_ioctl_hook) (unsigned int, void __user *); 880 881 void dlci_ioctl_set(int (*hook) (unsigned int, void __user *)) 882 { 883 mutex_lock(&dlci_ioctl_mutex); 884 dlci_ioctl_hook = hook; 885 mutex_unlock(&dlci_ioctl_mutex); 886 } 887 888 EXPORT_SYMBOL(dlci_ioctl_set); 889 890 /* 891 * With an ioctl, arg may well be a user mode pointer, but we don't know 892 * what to do with it - that's up to the protocol still. 893 */ 894 895 static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) 896 { 897 struct socket *sock; 898 struct sock *sk; 899 void __user *argp = (void __user *)arg; 900 int pid, err; 901 struct net *net; 902 903 sock = file->private_data; 904 sk = sock->sk; 905 net = sock_net(sk); 906 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { 907 err = dev_ioctl(net, cmd, argp); 908 } else 909 #ifdef CONFIG_WIRELESS_EXT 910 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { 911 err = dev_ioctl(net, cmd, argp); 912 } else 913 #endif /* CONFIG_WIRELESS_EXT */ 914 switch (cmd) { 915 case FIOSETOWN: 916 case SIOCSPGRP: 917 err = -EFAULT; 918 if (get_user(pid, (int __user *)argp)) 919 break; 920 err = f_setown(sock->file, pid, 1); 921 break; 922 case FIOGETOWN: 923 case SIOCGPGRP: 924 err = put_user(f_getown(sock->file), 925 (int __user *)argp); 926 break; 927 case SIOCGIFBR: 928 case SIOCSIFBR: 929 case SIOCBRADDBR: 930 case SIOCBRDELBR: 931 err = -ENOPKG; 932 if (!br_ioctl_hook) 933 request_module("bridge"); 934 935 mutex_lock(&br_ioctl_mutex); 936 if (br_ioctl_hook) 937 err = br_ioctl_hook(net, cmd, argp); 938 mutex_unlock(&br_ioctl_mutex); 939 break; 940 case SIOCGIFVLAN: 941 case SIOCSIFVLAN: 942 err = -ENOPKG; 943 if (!vlan_ioctl_hook) 944 request_module("8021q"); 945 946 mutex_lock(&vlan_ioctl_mutex); 947 if (vlan_ioctl_hook) 948 err = vlan_ioctl_hook(net, argp); 949 mutex_unlock(&vlan_ioctl_mutex); 950 break; 951 case SIOCADDDLCI: 952 case SIOCDELDLCI: 953 err = -ENOPKG; 954 if (!dlci_ioctl_hook) 955 request_module("dlci"); 956 957 mutex_lock(&dlci_ioctl_mutex); 958 if (dlci_ioctl_hook) 959 err = dlci_ioctl_hook(cmd, argp); 960 mutex_unlock(&dlci_ioctl_mutex); 961 break; 962 default: 963 err = sock->ops->ioctl(sock, cmd, arg); 964 965 /* 966 * If this ioctl is unknown try to hand it down 967 * to the NIC driver. 968 */ 969 if (err == -ENOIOCTLCMD) 970 err = dev_ioctl(net, cmd, argp); 971 break; 972 } 973 return err; 974 } 975 976 int sock_create_lite(int family, int type, int protocol, struct socket **res) 977 { 978 int err; 979 struct socket *sock = NULL; 980 981 err = security_socket_create(family, type, protocol, 1); 982 if (err) 983 goto out; 984 985 sock = sock_alloc(); 986 if (!sock) { 987 err = -ENOMEM; 988 goto out; 989 } 990 991 sock->type = type; 992 err = security_socket_post_create(sock, family, type, protocol, 1); 993 if (err) 994 goto out_release; 995 996 out: 997 *res = sock; 998 return err; 999 out_release: 1000 sock_release(sock); 1001 sock = NULL; 1002 goto out; 1003 } 1004 1005 /* No kernel lock held - perfect */ 1006 static unsigned int sock_poll(struct file *file, poll_table *wait) 1007 { 1008 struct socket *sock; 1009 1010 /* 1011 * We can't return errors to poll, so it's either yes or no. 1012 */ 1013 sock = file->private_data; 1014 return sock->ops->poll(file, sock, wait); 1015 } 1016 1017 static int sock_mmap(struct file *file, struct vm_area_struct *vma) 1018 { 1019 struct socket *sock = file->private_data; 1020 1021 return sock->ops->mmap(file, sock, vma); 1022 } 1023 1024 static int sock_close(struct inode *inode, struct file *filp) 1025 { 1026 /* 1027 * It was possible the inode is NULL we were 1028 * closing an unfinished socket. 1029 */ 1030 1031 if (!inode) { 1032 printk(KERN_DEBUG "sock_close: NULL inode\n"); 1033 return 0; 1034 } 1035 sock_release(SOCKET_I(inode)); 1036 return 0; 1037 } 1038 1039 /* 1040 * Update the socket async list 1041 * 1042 * Fasync_list locking strategy. 1043 * 1044 * 1. fasync_list is modified only under process context socket lock 1045 * i.e. under semaphore. 1046 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock) 1047 * or under socket lock. 1048 * 3. fasync_list can be used from softirq context, so that 1049 * modification under socket lock have to be enhanced with 1050 * write_lock_bh(&sk->sk_callback_lock). 1051 * --ANK (990710) 1052 */ 1053 1054 static int sock_fasync(int fd, struct file *filp, int on) 1055 { 1056 struct fasync_struct *fa, *fna = NULL, **prev; 1057 struct socket *sock; 1058 struct sock *sk; 1059 1060 if (on) { 1061 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL); 1062 if (fna == NULL) 1063 return -ENOMEM; 1064 } 1065 1066 sock = filp->private_data; 1067 1068 sk = sock->sk; 1069 if (sk == NULL) { 1070 kfree(fna); 1071 return -EINVAL; 1072 } 1073 1074 lock_sock(sk); 1075 1076 spin_lock(&filp->f_lock); 1077 if (on) 1078 filp->f_flags |= FASYNC; 1079 else 1080 filp->f_flags &= ~FASYNC; 1081 spin_unlock(&filp->f_lock); 1082 1083 prev = &(sock->fasync_list); 1084 1085 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev) 1086 if (fa->fa_file == filp) 1087 break; 1088 1089 if (on) { 1090 if (fa != NULL) { 1091 write_lock_bh(&sk->sk_callback_lock); 1092 fa->fa_fd = fd; 1093 write_unlock_bh(&sk->sk_callback_lock); 1094 1095 kfree(fna); 1096 goto out; 1097 } 1098 fna->fa_file = filp; 1099 fna->fa_fd = fd; 1100 fna->magic = FASYNC_MAGIC; 1101 fna->fa_next = sock->fasync_list; 1102 write_lock_bh(&sk->sk_callback_lock); 1103 sock->fasync_list = fna; 1104 write_unlock_bh(&sk->sk_callback_lock); 1105 } else { 1106 if (fa != NULL) { 1107 write_lock_bh(&sk->sk_callback_lock); 1108 *prev = fa->fa_next; 1109 write_unlock_bh(&sk->sk_callback_lock); 1110 kfree(fa); 1111 } 1112 } 1113 1114 out: 1115 release_sock(sock->sk); 1116 return 0; 1117 } 1118 1119 /* This function may be called only under socket lock or callback_lock */ 1120 1121 int sock_wake_async(struct socket *sock, int how, int band) 1122 { 1123 if (!sock || !sock->fasync_list) 1124 return -1; 1125 switch (how) { 1126 case SOCK_WAKE_WAITD: 1127 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) 1128 break; 1129 goto call_kill; 1130 case SOCK_WAKE_SPACE: 1131 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags)) 1132 break; 1133 /* fall through */ 1134 case SOCK_WAKE_IO: 1135 call_kill: 1136 __kill_fasync(sock->fasync_list, SIGIO, band); 1137 break; 1138 case SOCK_WAKE_URG: 1139 __kill_fasync(sock->fasync_list, SIGURG, band); 1140 } 1141 return 0; 1142 } 1143 1144 static int __sock_create(struct net *net, int family, int type, int protocol, 1145 struct socket **res, int kern) 1146 { 1147 int err; 1148 struct socket *sock; 1149 const struct net_proto_family *pf; 1150 1151 /* 1152 * Check protocol is in range 1153 */ 1154 if (family < 0 || family >= NPROTO) 1155 return -EAFNOSUPPORT; 1156 if (type < 0 || type >= SOCK_MAX) 1157 return -EINVAL; 1158 1159 /* Compatibility. 1160 1161 This uglymoron is moved from INET layer to here to avoid 1162 deadlock in module load. 1163 */ 1164 if (family == PF_INET && type == SOCK_PACKET) { 1165 static int warned; 1166 if (!warned) { 1167 warned = 1; 1168 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", 1169 current->comm); 1170 } 1171 family = PF_PACKET; 1172 } 1173 1174 err = security_socket_create(family, type, protocol, kern); 1175 if (err) 1176 return err; 1177 1178 /* 1179 * Allocate the socket and allow the family to set things up. if 1180 * the protocol is 0, the family is instructed to select an appropriate 1181 * default. 1182 */ 1183 sock = sock_alloc(); 1184 if (!sock) { 1185 if (net_ratelimit()) 1186 printk(KERN_WARNING "socket: no more sockets\n"); 1187 return -ENFILE; /* Not exactly a match, but its the 1188 closest posix thing */ 1189 } 1190 1191 sock->type = type; 1192 1193 #ifdef CONFIG_MODULES 1194 /* Attempt to load a protocol module if the find failed. 1195 * 1196 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user 1197 * requested real, full-featured networking support upon configuration. 1198 * Otherwise module support will break! 1199 */ 1200 if (net_families[family] == NULL) 1201 request_module("net-pf-%d", family); 1202 #endif 1203 1204 rcu_read_lock(); 1205 pf = rcu_dereference(net_families[family]); 1206 err = -EAFNOSUPPORT; 1207 if (!pf) 1208 goto out_release; 1209 1210 /* 1211 * We will call the ->create function, that possibly is in a loadable 1212 * module, so we have to bump that loadable module refcnt first. 1213 */ 1214 if (!try_module_get(pf->owner)) 1215 goto out_release; 1216 1217 /* Now protected by module ref count */ 1218 rcu_read_unlock(); 1219 1220 err = pf->create(net, sock, protocol); 1221 if (err < 0) 1222 goto out_module_put; 1223 1224 /* 1225 * Now to bump the refcnt of the [loadable] module that owns this 1226 * socket at sock_release time we decrement its refcnt. 1227 */ 1228 if (!try_module_get(sock->ops->owner)) 1229 goto out_module_busy; 1230 1231 /* 1232 * Now that we're done with the ->create function, the [loadable] 1233 * module can have its refcnt decremented 1234 */ 1235 module_put(pf->owner); 1236 err = security_socket_post_create(sock, family, type, protocol, kern); 1237 if (err) 1238 goto out_sock_release; 1239 *res = sock; 1240 1241 return 0; 1242 1243 out_module_busy: 1244 err = -EAFNOSUPPORT; 1245 out_module_put: 1246 sock->ops = NULL; 1247 module_put(pf->owner); 1248 out_sock_release: 1249 sock_release(sock); 1250 return err; 1251 1252 out_release: 1253 rcu_read_unlock(); 1254 goto out_sock_release; 1255 } 1256 1257 int sock_create(int family, int type, int protocol, struct socket **res) 1258 { 1259 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0); 1260 } 1261 1262 int sock_create_kern(int family, int type, int protocol, struct socket **res) 1263 { 1264 return __sock_create(&init_net, family, type, protocol, res, 1); 1265 } 1266 1267 SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol) 1268 { 1269 int retval; 1270 struct socket *sock; 1271 int flags; 1272 1273 /* Check the SOCK_* constants for consistency. */ 1274 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC); 1275 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK); 1276 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK); 1277 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK); 1278 1279 flags = type & ~SOCK_TYPE_MASK; 1280 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1281 return -EINVAL; 1282 type &= SOCK_TYPE_MASK; 1283 1284 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 1285 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1286 1287 retval = sock_create(family, type, protocol, &sock); 1288 if (retval < 0) 1289 goto out; 1290 1291 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); 1292 if (retval < 0) 1293 goto out_release; 1294 1295 out: 1296 /* It may be already another descriptor 8) Not kernel problem. */ 1297 return retval; 1298 1299 out_release: 1300 sock_release(sock); 1301 return retval; 1302 } 1303 1304 /* 1305 * Create a pair of connected sockets. 1306 */ 1307 1308 SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, 1309 int __user *, usockvec) 1310 { 1311 struct socket *sock1, *sock2; 1312 int fd1, fd2, err; 1313 struct file *newfile1, *newfile2; 1314 int flags; 1315 1316 flags = type & ~SOCK_TYPE_MASK; 1317 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1318 return -EINVAL; 1319 type &= SOCK_TYPE_MASK; 1320 1321 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 1322 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1323 1324 /* 1325 * Obtain the first socket and check if the underlying protocol 1326 * supports the socketpair call. 1327 */ 1328 1329 err = sock_create(family, type, protocol, &sock1); 1330 if (err < 0) 1331 goto out; 1332 1333 err = sock_create(family, type, protocol, &sock2); 1334 if (err < 0) 1335 goto out_release_1; 1336 1337 err = sock1->ops->socketpair(sock1, sock2); 1338 if (err < 0) 1339 goto out_release_both; 1340 1341 fd1 = sock_alloc_fd(&newfile1, flags & O_CLOEXEC); 1342 if (unlikely(fd1 < 0)) { 1343 err = fd1; 1344 goto out_release_both; 1345 } 1346 1347 fd2 = sock_alloc_fd(&newfile2, flags & O_CLOEXEC); 1348 if (unlikely(fd2 < 0)) { 1349 err = fd2; 1350 put_filp(newfile1); 1351 put_unused_fd(fd1); 1352 goto out_release_both; 1353 } 1354 1355 err = sock_attach_fd(sock1, newfile1, flags & O_NONBLOCK); 1356 if (unlikely(err < 0)) { 1357 goto out_fd2; 1358 } 1359 1360 err = sock_attach_fd(sock2, newfile2, flags & O_NONBLOCK); 1361 if (unlikely(err < 0)) { 1362 fput(newfile1); 1363 goto out_fd1; 1364 } 1365 1366 audit_fd_pair(fd1, fd2); 1367 fd_install(fd1, newfile1); 1368 fd_install(fd2, newfile2); 1369 /* fd1 and fd2 may be already another descriptors. 1370 * Not kernel problem. 1371 */ 1372 1373 err = put_user(fd1, &usockvec[0]); 1374 if (!err) 1375 err = put_user(fd2, &usockvec[1]); 1376 if (!err) 1377 return 0; 1378 1379 sys_close(fd2); 1380 sys_close(fd1); 1381 return err; 1382 1383 out_release_both: 1384 sock_release(sock2); 1385 out_release_1: 1386 sock_release(sock1); 1387 out: 1388 return err; 1389 1390 out_fd2: 1391 put_filp(newfile1); 1392 sock_release(sock1); 1393 out_fd1: 1394 put_filp(newfile2); 1395 sock_release(sock2); 1396 put_unused_fd(fd1); 1397 put_unused_fd(fd2); 1398 goto out; 1399 } 1400 1401 /* 1402 * Bind a name to a socket. Nothing much to do here since it's 1403 * the protocol's responsibility to handle the local address. 1404 * 1405 * We move the socket address to kernel space before we call 1406 * the protocol layer (having also checked the address is ok). 1407 */ 1408 1409 SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen) 1410 { 1411 struct socket *sock; 1412 struct sockaddr_storage address; 1413 int err, fput_needed; 1414 1415 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1416 if (sock) { 1417 err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address); 1418 if (err >= 0) { 1419 err = security_socket_bind(sock, 1420 (struct sockaddr *)&address, 1421 addrlen); 1422 if (!err) 1423 err = sock->ops->bind(sock, 1424 (struct sockaddr *) 1425 &address, addrlen); 1426 } 1427 fput_light(sock->file, fput_needed); 1428 } 1429 return err; 1430 } 1431 1432 /* 1433 * Perform a listen. Basically, we allow the protocol to do anything 1434 * necessary for a listen, and if that works, we mark the socket as 1435 * ready for listening. 1436 */ 1437 1438 SYSCALL_DEFINE2(listen, int, fd, int, backlog) 1439 { 1440 struct socket *sock; 1441 int err, fput_needed; 1442 int somaxconn; 1443 1444 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1445 if (sock) { 1446 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn; 1447 if ((unsigned)backlog > somaxconn) 1448 backlog = somaxconn; 1449 1450 err = security_socket_listen(sock, backlog); 1451 if (!err) 1452 err = sock->ops->listen(sock, backlog); 1453 1454 fput_light(sock->file, fput_needed); 1455 } 1456 return err; 1457 } 1458 1459 /* 1460 * For accept, we attempt to create a new socket, set up the link 1461 * with the client, wake up the client, then return the new 1462 * connected fd. We collect the address of the connector in kernel 1463 * space and move it to user at the very end. This is unclean because 1464 * we open the socket then return an error. 1465 * 1466 * 1003.1g adds the ability to recvmsg() to query connection pending 1467 * status to recvmsg. We need to add that support in a way thats 1468 * clean when we restucture accept also. 1469 */ 1470 1471 SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, 1472 int __user *, upeer_addrlen, int, flags) 1473 { 1474 struct socket *sock, *newsock; 1475 struct file *newfile; 1476 int err, len, newfd, fput_needed; 1477 struct sockaddr_storage address; 1478 1479 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1480 return -EINVAL; 1481 1482 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 1483 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1484 1485 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1486 if (!sock) 1487 goto out; 1488 1489 err = -ENFILE; 1490 if (!(newsock = sock_alloc())) 1491 goto out_put; 1492 1493 newsock->type = sock->type; 1494 newsock->ops = sock->ops; 1495 1496 /* 1497 * We don't need try_module_get here, as the listening socket (sock) 1498 * has the protocol module (sock->ops->owner) held. 1499 */ 1500 __module_get(newsock->ops->owner); 1501 1502 newfd = sock_alloc_fd(&newfile, flags & O_CLOEXEC); 1503 if (unlikely(newfd < 0)) { 1504 err = newfd; 1505 sock_release(newsock); 1506 goto out_put; 1507 } 1508 1509 err = sock_attach_fd(newsock, newfile, flags & O_NONBLOCK); 1510 if (err < 0) 1511 goto out_fd_simple; 1512 1513 err = security_socket_accept(sock, newsock); 1514 if (err) 1515 goto out_fd; 1516 1517 err = sock->ops->accept(sock, newsock, sock->file->f_flags); 1518 if (err < 0) 1519 goto out_fd; 1520 1521 if (upeer_sockaddr) { 1522 if (newsock->ops->getname(newsock, (struct sockaddr *)&address, 1523 &len, 2) < 0) { 1524 err = -ECONNABORTED; 1525 goto out_fd; 1526 } 1527 err = move_addr_to_user((struct sockaddr *)&address, 1528 len, upeer_sockaddr, upeer_addrlen); 1529 if (err < 0) 1530 goto out_fd; 1531 } 1532 1533 /* File flags are not inherited via accept() unlike another OSes. */ 1534 1535 fd_install(newfd, newfile); 1536 err = newfd; 1537 1538 out_put: 1539 fput_light(sock->file, fput_needed); 1540 out: 1541 return err; 1542 out_fd_simple: 1543 sock_release(newsock); 1544 put_filp(newfile); 1545 put_unused_fd(newfd); 1546 goto out_put; 1547 out_fd: 1548 fput(newfile); 1549 put_unused_fd(newfd); 1550 goto out_put; 1551 } 1552 1553 SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr, 1554 int __user *, upeer_addrlen) 1555 { 1556 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0); 1557 } 1558 1559 /* 1560 * Attempt to connect to a socket with the server address. The address 1561 * is in user space so we verify it is OK and move it to kernel space. 1562 * 1563 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to 1564 * break bindings 1565 * 1566 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and 1567 * other SEQPACKET protocols that take time to connect() as it doesn't 1568 * include the -EINPROGRESS status for such sockets. 1569 */ 1570 1571 SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr, 1572 int, addrlen) 1573 { 1574 struct socket *sock; 1575 struct sockaddr_storage address; 1576 int err, fput_needed; 1577 1578 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1579 if (!sock) 1580 goto out; 1581 err = move_addr_to_kernel(uservaddr, addrlen, (struct sockaddr *)&address); 1582 if (err < 0) 1583 goto out_put; 1584 1585 err = 1586 security_socket_connect(sock, (struct sockaddr *)&address, addrlen); 1587 if (err) 1588 goto out_put; 1589 1590 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen, 1591 sock->file->f_flags); 1592 out_put: 1593 fput_light(sock->file, fput_needed); 1594 out: 1595 return err; 1596 } 1597 1598 /* 1599 * Get the local address ('name') of a socket object. Move the obtained 1600 * name to user space. 1601 */ 1602 1603 SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr, 1604 int __user *, usockaddr_len) 1605 { 1606 struct socket *sock; 1607 struct sockaddr_storage address; 1608 int len, err, fput_needed; 1609 1610 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1611 if (!sock) 1612 goto out; 1613 1614 err = security_socket_getsockname(sock); 1615 if (err) 1616 goto out_put; 1617 1618 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0); 1619 if (err) 1620 goto out_put; 1621 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, usockaddr_len); 1622 1623 out_put: 1624 fput_light(sock->file, fput_needed); 1625 out: 1626 return err; 1627 } 1628 1629 /* 1630 * Get the remote address ('name') of a socket object. Move the obtained 1631 * name to user space. 1632 */ 1633 1634 SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr, 1635 int __user *, usockaddr_len) 1636 { 1637 struct socket *sock; 1638 struct sockaddr_storage address; 1639 int len, err, fput_needed; 1640 1641 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1642 if (sock != NULL) { 1643 err = security_socket_getpeername(sock); 1644 if (err) { 1645 fput_light(sock->file, fput_needed); 1646 return err; 1647 } 1648 1649 err = 1650 sock->ops->getname(sock, (struct sockaddr *)&address, &len, 1651 1); 1652 if (!err) 1653 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, 1654 usockaddr_len); 1655 fput_light(sock->file, fput_needed); 1656 } 1657 return err; 1658 } 1659 1660 /* 1661 * Send a datagram to a given address. We move the address into kernel 1662 * space and check the user space data area is readable before invoking 1663 * the protocol. 1664 */ 1665 1666 SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len, 1667 unsigned, flags, struct sockaddr __user *, addr, 1668 int, addr_len) 1669 { 1670 struct socket *sock; 1671 struct sockaddr_storage address; 1672 int err; 1673 struct msghdr msg; 1674 struct iovec iov; 1675 int fput_needed; 1676 1677 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1678 if (!sock) 1679 goto out; 1680 1681 iov.iov_base = buff; 1682 iov.iov_len = len; 1683 msg.msg_name = NULL; 1684 msg.msg_iov = &iov; 1685 msg.msg_iovlen = 1; 1686 msg.msg_control = NULL; 1687 msg.msg_controllen = 0; 1688 msg.msg_namelen = 0; 1689 if (addr) { 1690 err = move_addr_to_kernel(addr, addr_len, (struct sockaddr *)&address); 1691 if (err < 0) 1692 goto out_put; 1693 msg.msg_name = (struct sockaddr *)&address; 1694 msg.msg_namelen = addr_len; 1695 } 1696 if (sock->file->f_flags & O_NONBLOCK) 1697 flags |= MSG_DONTWAIT; 1698 msg.msg_flags = flags; 1699 err = sock_sendmsg(sock, &msg, len); 1700 1701 out_put: 1702 fput_light(sock->file, fput_needed); 1703 out: 1704 return err; 1705 } 1706 1707 /* 1708 * Send a datagram down a socket. 1709 */ 1710 1711 SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len, 1712 unsigned, flags) 1713 { 1714 return sys_sendto(fd, buff, len, flags, NULL, 0); 1715 } 1716 1717 /* 1718 * Receive a frame from the socket and optionally record the address of the 1719 * sender. We verify the buffers are writable and if needed move the 1720 * sender address from kernel to user space. 1721 */ 1722 1723 SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, 1724 unsigned, flags, struct sockaddr __user *, addr, 1725 int __user *, addr_len) 1726 { 1727 struct socket *sock; 1728 struct iovec iov; 1729 struct msghdr msg; 1730 struct sockaddr_storage address; 1731 int err, err2; 1732 int fput_needed; 1733 1734 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1735 if (!sock) 1736 goto out; 1737 1738 msg.msg_control = NULL; 1739 msg.msg_controllen = 0; 1740 msg.msg_iovlen = 1; 1741 msg.msg_iov = &iov; 1742 iov.iov_len = size; 1743 iov.iov_base = ubuf; 1744 msg.msg_name = (struct sockaddr *)&address; 1745 msg.msg_namelen = sizeof(address); 1746 if (sock->file->f_flags & O_NONBLOCK) 1747 flags |= MSG_DONTWAIT; 1748 err = sock_recvmsg(sock, &msg, size, flags); 1749 1750 if (err >= 0 && addr != NULL) { 1751 err2 = move_addr_to_user((struct sockaddr *)&address, 1752 msg.msg_namelen, addr, addr_len); 1753 if (err2 < 0) 1754 err = err2; 1755 } 1756 1757 fput_light(sock->file, fput_needed); 1758 out: 1759 return err; 1760 } 1761 1762 /* 1763 * Receive a datagram from a socket. 1764 */ 1765 1766 asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size, 1767 unsigned flags) 1768 { 1769 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL); 1770 } 1771 1772 /* 1773 * Set a socket option. Because we don't know the option lengths we have 1774 * to pass the user mode parameter for the protocols to sort out. 1775 */ 1776 1777 SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, 1778 char __user *, optval, int, optlen) 1779 { 1780 int err, fput_needed; 1781 struct socket *sock; 1782 1783 if (optlen < 0) 1784 return -EINVAL; 1785 1786 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1787 if (sock != NULL) { 1788 err = security_socket_setsockopt(sock, level, optname); 1789 if (err) 1790 goto out_put; 1791 1792 if (level == SOL_SOCKET) 1793 err = 1794 sock_setsockopt(sock, level, optname, optval, 1795 optlen); 1796 else 1797 err = 1798 sock->ops->setsockopt(sock, level, optname, optval, 1799 optlen); 1800 out_put: 1801 fput_light(sock->file, fput_needed); 1802 } 1803 return err; 1804 } 1805 1806 /* 1807 * Get a socket option. Because we don't know the option lengths we have 1808 * to pass a user mode parameter for the protocols to sort out. 1809 */ 1810 1811 SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname, 1812 char __user *, optval, int __user *, optlen) 1813 { 1814 int err, fput_needed; 1815 struct socket *sock; 1816 1817 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1818 if (sock != NULL) { 1819 err = security_socket_getsockopt(sock, level, optname); 1820 if (err) 1821 goto out_put; 1822 1823 if (level == SOL_SOCKET) 1824 err = 1825 sock_getsockopt(sock, level, optname, optval, 1826 optlen); 1827 else 1828 err = 1829 sock->ops->getsockopt(sock, level, optname, optval, 1830 optlen); 1831 out_put: 1832 fput_light(sock->file, fput_needed); 1833 } 1834 return err; 1835 } 1836 1837 /* 1838 * Shutdown a socket. 1839 */ 1840 1841 SYSCALL_DEFINE2(shutdown, int, fd, int, how) 1842 { 1843 int err, fput_needed; 1844 struct socket *sock; 1845 1846 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1847 if (sock != NULL) { 1848 err = security_socket_shutdown(sock, how); 1849 if (!err) 1850 err = sock->ops->shutdown(sock, how); 1851 fput_light(sock->file, fput_needed); 1852 } 1853 return err; 1854 } 1855 1856 /* A couple of helpful macros for getting the address of the 32/64 bit 1857 * fields which are the same type (int / unsigned) on our platforms. 1858 */ 1859 #define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member) 1860 #define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) 1861 #define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) 1862 1863 /* 1864 * BSD sendmsg interface 1865 */ 1866 1867 SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags) 1868 { 1869 struct compat_msghdr __user *msg_compat = 1870 (struct compat_msghdr __user *)msg; 1871 struct socket *sock; 1872 struct sockaddr_storage address; 1873 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; 1874 unsigned char ctl[sizeof(struct cmsghdr) + 20] 1875 __attribute__ ((aligned(sizeof(__kernel_size_t)))); 1876 /* 20 is size of ipv6_pktinfo */ 1877 unsigned char *ctl_buf = ctl; 1878 struct msghdr msg_sys; 1879 int err, ctl_len, iov_size, total_len; 1880 int fput_needed; 1881 1882 err = -EFAULT; 1883 if (MSG_CMSG_COMPAT & flags) { 1884 if (get_compat_msghdr(&msg_sys, msg_compat)) 1885 return -EFAULT; 1886 } 1887 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) 1888 return -EFAULT; 1889 1890 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1891 if (!sock) 1892 goto out; 1893 1894 /* do not move before msg_sys is valid */ 1895 err = -EMSGSIZE; 1896 if (msg_sys.msg_iovlen > UIO_MAXIOV) 1897 goto out_put; 1898 1899 /* Check whether to allocate the iovec area */ 1900 err = -ENOMEM; 1901 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); 1902 if (msg_sys.msg_iovlen > UIO_FASTIOV) { 1903 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); 1904 if (!iov) 1905 goto out_put; 1906 } 1907 1908 /* This will also move the address data into kernel space */ 1909 if (MSG_CMSG_COMPAT & flags) { 1910 err = verify_compat_iovec(&msg_sys, iov, 1911 (struct sockaddr *)&address, 1912 VERIFY_READ); 1913 } else 1914 err = verify_iovec(&msg_sys, iov, 1915 (struct sockaddr *)&address, 1916 VERIFY_READ); 1917 if (err < 0) 1918 goto out_freeiov; 1919 total_len = err; 1920 1921 err = -ENOBUFS; 1922 1923 if (msg_sys.msg_controllen > INT_MAX) 1924 goto out_freeiov; 1925 ctl_len = msg_sys.msg_controllen; 1926 if ((MSG_CMSG_COMPAT & flags) && ctl_len) { 1927 err = 1928 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, 1929 sizeof(ctl)); 1930 if (err) 1931 goto out_freeiov; 1932 ctl_buf = msg_sys.msg_control; 1933 ctl_len = msg_sys.msg_controllen; 1934 } else if (ctl_len) { 1935 if (ctl_len > sizeof(ctl)) { 1936 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); 1937 if (ctl_buf == NULL) 1938 goto out_freeiov; 1939 } 1940 err = -EFAULT; 1941 /* 1942 * Careful! Before this, msg_sys.msg_control contains a user pointer. 1943 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted 1944 * checking falls down on this. 1945 */ 1946 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control, 1947 ctl_len)) 1948 goto out_freectl; 1949 msg_sys.msg_control = ctl_buf; 1950 } 1951 msg_sys.msg_flags = flags; 1952 1953 if (sock->file->f_flags & O_NONBLOCK) 1954 msg_sys.msg_flags |= MSG_DONTWAIT; 1955 err = sock_sendmsg(sock, &msg_sys, total_len); 1956 1957 out_freectl: 1958 if (ctl_buf != ctl) 1959 sock_kfree_s(sock->sk, ctl_buf, ctl_len); 1960 out_freeiov: 1961 if (iov != iovstack) 1962 sock_kfree_s(sock->sk, iov, iov_size); 1963 out_put: 1964 fput_light(sock->file, fput_needed); 1965 out: 1966 return err; 1967 } 1968 1969 /* 1970 * BSD recvmsg interface 1971 */ 1972 1973 SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, 1974 unsigned int, flags) 1975 { 1976 struct compat_msghdr __user *msg_compat = 1977 (struct compat_msghdr __user *)msg; 1978 struct socket *sock; 1979 struct iovec iovstack[UIO_FASTIOV]; 1980 struct iovec *iov = iovstack; 1981 struct msghdr msg_sys; 1982 unsigned long cmsg_ptr; 1983 int err, iov_size, total_len, len; 1984 int fput_needed; 1985 1986 /* kernel mode address */ 1987 struct sockaddr_storage addr; 1988 1989 /* user mode address pointers */ 1990 struct sockaddr __user *uaddr; 1991 int __user *uaddr_len; 1992 1993 if (MSG_CMSG_COMPAT & flags) { 1994 if (get_compat_msghdr(&msg_sys, msg_compat)) 1995 return -EFAULT; 1996 } 1997 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) 1998 return -EFAULT; 1999 2000 sock = sockfd_lookup_light(fd, &err, &fput_needed); 2001 if (!sock) 2002 goto out; 2003 2004 err = -EMSGSIZE; 2005 if (msg_sys.msg_iovlen > UIO_MAXIOV) 2006 goto out_put; 2007 2008 /* Check whether to allocate the iovec area */ 2009 err = -ENOMEM; 2010 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); 2011 if (msg_sys.msg_iovlen > UIO_FASTIOV) { 2012 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); 2013 if (!iov) 2014 goto out_put; 2015 } 2016 2017 /* 2018 * Save the user-mode address (verify_iovec will change the 2019 * kernel msghdr to use the kernel address space) 2020 */ 2021 2022 uaddr = (__force void __user *)msg_sys.msg_name; 2023 uaddr_len = COMPAT_NAMELEN(msg); 2024 if (MSG_CMSG_COMPAT & flags) { 2025 err = verify_compat_iovec(&msg_sys, iov, 2026 (struct sockaddr *)&addr, 2027 VERIFY_WRITE); 2028 } else 2029 err = verify_iovec(&msg_sys, iov, 2030 (struct sockaddr *)&addr, 2031 VERIFY_WRITE); 2032 if (err < 0) 2033 goto out_freeiov; 2034 total_len = err; 2035 2036 cmsg_ptr = (unsigned long)msg_sys.msg_control; 2037 msg_sys.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); 2038 2039 if (sock->file->f_flags & O_NONBLOCK) 2040 flags |= MSG_DONTWAIT; 2041 err = sock_recvmsg(sock, &msg_sys, total_len, flags); 2042 if (err < 0) 2043 goto out_freeiov; 2044 len = err; 2045 2046 if (uaddr != NULL) { 2047 err = move_addr_to_user((struct sockaddr *)&addr, 2048 msg_sys.msg_namelen, uaddr, 2049 uaddr_len); 2050 if (err < 0) 2051 goto out_freeiov; 2052 } 2053 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT), 2054 COMPAT_FLAGS(msg)); 2055 if (err) 2056 goto out_freeiov; 2057 if (MSG_CMSG_COMPAT & flags) 2058 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, 2059 &msg_compat->msg_controllen); 2060 else 2061 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, 2062 &msg->msg_controllen); 2063 if (err) 2064 goto out_freeiov; 2065 err = len; 2066 2067 out_freeiov: 2068 if (iov != iovstack) 2069 sock_kfree_s(sock->sk, iov, iov_size); 2070 out_put: 2071 fput_light(sock->file, fput_needed); 2072 out: 2073 return err; 2074 } 2075 2076 #ifdef __ARCH_WANT_SYS_SOCKETCALL 2077 2078 /* Argument list sizes for sys_socketcall */ 2079 #define AL(x) ((x) * sizeof(unsigned long)) 2080 static const unsigned char nargs[19]={ 2081 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), 2082 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), 2083 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), 2084 AL(4) 2085 }; 2086 2087 #undef AL 2088 2089 /* 2090 * System call vectors. 2091 * 2092 * Argument checking cleaned up. Saved 20% in size. 2093 * This function doesn't need to set the kernel lock because 2094 * it is set by the callees. 2095 */ 2096 2097 SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) 2098 { 2099 unsigned long a[6]; 2100 unsigned long a0, a1; 2101 int err; 2102 2103 if (call < 1 || call > SYS_ACCEPT4) 2104 return -EINVAL; 2105 2106 /* copy_from_user should be SMP safe. */ 2107 if (copy_from_user(a, args, nargs[call])) 2108 return -EFAULT; 2109 2110 audit_socketcall(nargs[call] / sizeof(unsigned long), a); 2111 2112 a0 = a[0]; 2113 a1 = a[1]; 2114 2115 switch (call) { 2116 case SYS_SOCKET: 2117 err = sys_socket(a0, a1, a[2]); 2118 break; 2119 case SYS_BIND: 2120 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]); 2121 break; 2122 case SYS_CONNECT: 2123 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]); 2124 break; 2125 case SYS_LISTEN: 2126 err = sys_listen(a0, a1); 2127 break; 2128 case SYS_ACCEPT: 2129 err = sys_accept4(a0, (struct sockaddr __user *)a1, 2130 (int __user *)a[2], 0); 2131 break; 2132 case SYS_GETSOCKNAME: 2133 err = 2134 sys_getsockname(a0, (struct sockaddr __user *)a1, 2135 (int __user *)a[2]); 2136 break; 2137 case SYS_GETPEERNAME: 2138 err = 2139 sys_getpeername(a0, (struct sockaddr __user *)a1, 2140 (int __user *)a[2]); 2141 break; 2142 case SYS_SOCKETPAIR: 2143 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]); 2144 break; 2145 case SYS_SEND: 2146 err = sys_send(a0, (void __user *)a1, a[2], a[3]); 2147 break; 2148 case SYS_SENDTO: 2149 err = sys_sendto(a0, (void __user *)a1, a[2], a[3], 2150 (struct sockaddr __user *)a[4], a[5]); 2151 break; 2152 case SYS_RECV: 2153 err = sys_recv(a0, (void __user *)a1, a[2], a[3]); 2154 break; 2155 case SYS_RECVFROM: 2156 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3], 2157 (struct sockaddr __user *)a[4], 2158 (int __user *)a[5]); 2159 break; 2160 case SYS_SHUTDOWN: 2161 err = sys_shutdown(a0, a1); 2162 break; 2163 case SYS_SETSOCKOPT: 2164 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]); 2165 break; 2166 case SYS_GETSOCKOPT: 2167 err = 2168 sys_getsockopt(a0, a1, a[2], (char __user *)a[3], 2169 (int __user *)a[4]); 2170 break; 2171 case SYS_SENDMSG: 2172 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]); 2173 break; 2174 case SYS_RECVMSG: 2175 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); 2176 break; 2177 case SYS_ACCEPT4: 2178 err = sys_accept4(a0, (struct sockaddr __user *)a1, 2179 (int __user *)a[2], a[3]); 2180 break; 2181 default: 2182 err = -EINVAL; 2183 break; 2184 } 2185 return err; 2186 } 2187 2188 #endif /* __ARCH_WANT_SYS_SOCKETCALL */ 2189 2190 /** 2191 * sock_register - add a socket protocol handler 2192 * @ops: description of protocol 2193 * 2194 * This function is called by a protocol handler that wants to 2195 * advertise its address family, and have it linked into the 2196 * socket interface. The value ops->family coresponds to the 2197 * socket system call protocol family. 2198 */ 2199 int sock_register(const struct net_proto_family *ops) 2200 { 2201 int err; 2202 2203 if (ops->family >= NPROTO) { 2204 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, 2205 NPROTO); 2206 return -ENOBUFS; 2207 } 2208 2209 spin_lock(&net_family_lock); 2210 if (net_families[ops->family]) 2211 err = -EEXIST; 2212 else { 2213 net_families[ops->family] = ops; 2214 err = 0; 2215 } 2216 spin_unlock(&net_family_lock); 2217 2218 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family); 2219 return err; 2220 } 2221 2222 /** 2223 * sock_unregister - remove a protocol handler 2224 * @family: protocol family to remove 2225 * 2226 * This function is called by a protocol handler that wants to 2227 * remove its address family, and have it unlinked from the 2228 * new socket creation. 2229 * 2230 * If protocol handler is a module, then it can use module reference 2231 * counts to protect against new references. If protocol handler is not 2232 * a module then it needs to provide its own protection in 2233 * the ops->create routine. 2234 */ 2235 void sock_unregister(int family) 2236 { 2237 BUG_ON(family < 0 || family >= NPROTO); 2238 2239 spin_lock(&net_family_lock); 2240 net_families[family] = NULL; 2241 spin_unlock(&net_family_lock); 2242 2243 synchronize_rcu(); 2244 2245 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family); 2246 } 2247 2248 static int __init sock_init(void) 2249 { 2250 /* 2251 * Initialize sock SLAB cache. 2252 */ 2253 2254 sk_init(); 2255 2256 /* 2257 * Initialize skbuff SLAB cache 2258 */ 2259 skb_init(); 2260 2261 /* 2262 * Initialize the protocols module. 2263 */ 2264 2265 init_inodecache(); 2266 register_filesystem(&sock_fs_type); 2267 sock_mnt = kern_mount(&sock_fs_type); 2268 2269 /* The real protocol initialization is performed in later initcalls. 2270 */ 2271 2272 #ifdef CONFIG_NETFILTER 2273 netfilter_init(); 2274 #endif 2275 2276 return 0; 2277 } 2278 2279 core_initcall(sock_init); /* early initcall */ 2280 2281 #ifdef CONFIG_PROC_FS 2282 void socket_seq_show(struct seq_file *seq) 2283 { 2284 int cpu; 2285 int counter = 0; 2286 2287 for_each_possible_cpu(cpu) 2288 counter += per_cpu(sockets_in_use, cpu); 2289 2290 /* It can be negative, by the way. 8) */ 2291 if (counter < 0) 2292 counter = 0; 2293 2294 seq_printf(seq, "sockets: used %d\n", counter); 2295 } 2296 #endif /* CONFIG_PROC_FS */ 2297 2298 #ifdef CONFIG_COMPAT 2299 static long compat_sock_ioctl(struct file *file, unsigned cmd, 2300 unsigned long arg) 2301 { 2302 struct socket *sock = file->private_data; 2303 int ret = -ENOIOCTLCMD; 2304 struct sock *sk; 2305 struct net *net; 2306 2307 sk = sock->sk; 2308 net = sock_net(sk); 2309 2310 if (sock->ops->compat_ioctl) 2311 ret = sock->ops->compat_ioctl(sock, cmd, arg); 2312 2313 if (ret == -ENOIOCTLCMD && 2314 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)) 2315 ret = compat_wext_handle_ioctl(net, cmd, arg); 2316 2317 return ret; 2318 } 2319 #endif 2320 2321 int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) 2322 { 2323 return sock->ops->bind(sock, addr, addrlen); 2324 } 2325 2326 int kernel_listen(struct socket *sock, int backlog) 2327 { 2328 return sock->ops->listen(sock, backlog); 2329 } 2330 2331 int kernel_accept(struct socket *sock, struct socket **newsock, int flags) 2332 { 2333 struct sock *sk = sock->sk; 2334 int err; 2335 2336 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol, 2337 newsock); 2338 if (err < 0) 2339 goto done; 2340 2341 err = sock->ops->accept(sock, *newsock, flags); 2342 if (err < 0) { 2343 sock_release(*newsock); 2344 *newsock = NULL; 2345 goto done; 2346 } 2347 2348 (*newsock)->ops = sock->ops; 2349 __module_get((*newsock)->ops->owner); 2350 2351 done: 2352 return err; 2353 } 2354 2355 int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, 2356 int flags) 2357 { 2358 return sock->ops->connect(sock, addr, addrlen, flags); 2359 } 2360 2361 int kernel_getsockname(struct socket *sock, struct sockaddr *addr, 2362 int *addrlen) 2363 { 2364 return sock->ops->getname(sock, addr, addrlen, 0); 2365 } 2366 2367 int kernel_getpeername(struct socket *sock, struct sockaddr *addr, 2368 int *addrlen) 2369 { 2370 return sock->ops->getname(sock, addr, addrlen, 1); 2371 } 2372 2373 int kernel_getsockopt(struct socket *sock, int level, int optname, 2374 char *optval, int *optlen) 2375 { 2376 mm_segment_t oldfs = get_fs(); 2377 int err; 2378 2379 set_fs(KERNEL_DS); 2380 if (level == SOL_SOCKET) 2381 err = sock_getsockopt(sock, level, optname, optval, optlen); 2382 else 2383 err = sock->ops->getsockopt(sock, level, optname, optval, 2384 optlen); 2385 set_fs(oldfs); 2386 return err; 2387 } 2388 2389 int kernel_setsockopt(struct socket *sock, int level, int optname, 2390 char *optval, int optlen) 2391 { 2392 mm_segment_t oldfs = get_fs(); 2393 int err; 2394 2395 set_fs(KERNEL_DS); 2396 if (level == SOL_SOCKET) 2397 err = sock_setsockopt(sock, level, optname, optval, optlen); 2398 else 2399 err = sock->ops->setsockopt(sock, level, optname, optval, 2400 optlen); 2401 set_fs(oldfs); 2402 return err; 2403 } 2404 2405 int kernel_sendpage(struct socket *sock, struct page *page, int offset, 2406 size_t size, int flags) 2407 { 2408 if (sock->ops->sendpage) 2409 return sock->ops->sendpage(sock, page, offset, size, flags); 2410 2411 return sock_no_sendpage(sock, page, offset, size, flags); 2412 } 2413 2414 int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) 2415 { 2416 mm_segment_t oldfs = get_fs(); 2417 int err; 2418 2419 set_fs(KERNEL_DS); 2420 err = sock->ops->ioctl(sock, cmd, arg); 2421 set_fs(oldfs); 2422 2423 return err; 2424 } 2425 2426 int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how) 2427 { 2428 return sock->ops->shutdown(sock, how); 2429 } 2430 2431 EXPORT_SYMBOL(sock_create); 2432 EXPORT_SYMBOL(sock_create_kern); 2433 EXPORT_SYMBOL(sock_create_lite); 2434 EXPORT_SYMBOL(sock_map_fd); 2435 EXPORT_SYMBOL(sock_recvmsg); 2436 EXPORT_SYMBOL(sock_register); 2437 EXPORT_SYMBOL(sock_release); 2438 EXPORT_SYMBOL(sock_sendmsg); 2439 EXPORT_SYMBOL(sock_unregister); 2440 EXPORT_SYMBOL(sock_wake_async); 2441 EXPORT_SYMBOL(sockfd_lookup); 2442 EXPORT_SYMBOL(kernel_sendmsg); 2443 EXPORT_SYMBOL(kernel_recvmsg); 2444 EXPORT_SYMBOL(kernel_bind); 2445 EXPORT_SYMBOL(kernel_listen); 2446 EXPORT_SYMBOL(kernel_accept); 2447 EXPORT_SYMBOL(kernel_connect); 2448 EXPORT_SYMBOL(kernel_getsockname); 2449 EXPORT_SYMBOL(kernel_getpeername); 2450 EXPORT_SYMBOL(kernel_getsockopt); 2451 EXPORT_SYMBOL(kernel_setsockopt); 2452 EXPORT_SYMBOL(kernel_sendpage); 2453 EXPORT_SYMBOL(kernel_sock_ioctl); 2454 EXPORT_SYMBOL(kernel_sock_shutdown); 2455