1 /* 2 * NET4: Implementation of BSD Unix domain sockets. 3 * 4 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 * 11 * Fixes: 12 * Linus Torvalds : Assorted bug cures. 13 * Niibe Yutaka : async I/O support. 14 * Carsten Paeth : PF_UNIX check, address fixes. 15 * Alan Cox : Limit size of allocated blocks. 16 * Alan Cox : Fixed the stupid socketpair bug. 17 * Alan Cox : BSD compatibility fine tuning. 18 * Alan Cox : Fixed a bug in connect when interrupted. 19 * Alan Cox : Sorted out a proper draft version of 20 * file descriptor passing hacked up from 21 * Mike Shaver's work. 22 * Marty Leisner : Fixes to fd passing 23 * Nick Nevin : recvmsg bugfix. 24 * Alan Cox : Started proper garbage collector 25 * Heiko EiBfeldt : Missing verify_area check 26 * Alan Cox : Started POSIXisms 27 * Andreas Schwab : Replace inode by dentry for proper 28 * reference counting 29 * Kirk Petersen : Made this a module 30 * Christoph Rohland : Elegant non-blocking accept/connect algorithm. 31 * Lots of bug fixes. 32 * Alexey Kuznetosv : Repaired (I hope) bugs introduces 33 * by above two patches. 34 * Andrea Arcangeli : If possible we block in connect(2) 35 * if the max backlog of the listen socket 36 * is been reached. This won't break 37 * old apps and it will avoid huge amount 38 * of socks hashed (this for unix_gc() 39 * performances reasons). 40 * Security fix that limits the max 41 * number of socks to 2*max_files and 42 * the number of skb queueable in the 43 * dgram receiver. 44 * Artur Skawina : Hash function optimizations 45 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8) 46 * Malcolm Beattie : Set peercred for socketpair 47 * Michal Ostrowski : Module initialization cleanup. 48 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT, 49 * the core infrastructure is doing that 50 * for all net proto families now (2.5.69+) 51 * 52 * 53 * Known differences from reference BSD that was tested: 54 * 55 * [TO FIX] 56 * ECONNREFUSED is not returned from one end of a connected() socket to the 57 * other the moment one end closes. 58 * fstat() doesn't return st_dev=0, and give the blksize as high water mark 59 * and a fake inode identifier (nor the BSD first socket fstat twice bug). 60 * [NOT TO FIX] 61 * accept() returns a path name even if the connecting socket has closed 62 * in the meantime (BSD loses the path and gives up). 63 * accept() returns 0 length path for an unbound connector. BSD returns 16 64 * and a null first byte in the path (but not for gethost/peername - BSD bug ??) 65 * socketpair(...SOCK_RAW..) doesn't panic the kernel. 66 * BSD af_unix apparently has connect forgetting to block properly. 67 * (need to check this with the POSIX spec in detail) 68 * 69 * Differences from 2.0.0-11-... (ANK) 70 * Bug fixes and improvements. 71 * - client shutdown killed server socket. 72 * - removed all useless cli/sti pairs. 73 * 74 * Semantic changes/extensions. 75 * - generic control message passing. 76 * - SCM_CREDENTIALS control message. 77 * - "Abstract" (not FS based) socket bindings. 78 * Abstract names are sequences of bytes (not zero terminated) 79 * started by 0, so that this name space does not intersect 80 * with BSD names. 81 */ 82 83 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 84 85 #include <linux/module.h> 86 #include <linux/kernel.h> 87 #include <linux/signal.h> 88 #include <linux/sched.h> 89 #include <linux/errno.h> 90 #include <linux/string.h> 91 #include <linux/stat.h> 92 #include <linux/dcache.h> 93 #include <linux/namei.h> 94 #include <linux/socket.h> 95 #include <linux/un.h> 96 #include <linux/fcntl.h> 97 #include <linux/termios.h> 98 #include <linux/sockios.h> 99 #include <linux/net.h> 100 #include <linux/in.h> 101 #include <linux/fs.h> 102 #include <linux/slab.h> 103 #include <asm/uaccess.h> 104 #include <linux/skbuff.h> 105 #include <linux/netdevice.h> 106 #include <net/net_namespace.h> 107 #include <net/sock.h> 108 #include <net/tcp_states.h> 109 #include <net/af_unix.h> 110 #include <linux/proc_fs.h> 111 #include <linux/seq_file.h> 112 #include <net/scm.h> 113 #include <linux/init.h> 114 #include <linux/poll.h> 115 #include <linux/rtnetlink.h> 116 #include <linux/mount.h> 117 #include <net/checksum.h> 118 #include <linux/security.h> 119 #include <linux/freezer.h> 120 121 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; 122 EXPORT_SYMBOL_GPL(unix_socket_table); 123 DEFINE_SPINLOCK(unix_table_lock); 124 EXPORT_SYMBOL_GPL(unix_table_lock); 125 static atomic_long_t unix_nr_socks; 126 127 128 static struct hlist_head *unix_sockets_unbound(void *addr) 129 { 130 unsigned long hash = (unsigned long)addr; 131 132 hash ^= hash >> 16; 133 hash ^= hash >> 8; 134 hash %= UNIX_HASH_SIZE; 135 return &unix_socket_table[UNIX_HASH_SIZE + hash]; 136 } 137 138 #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE) 139 140 #ifdef CONFIG_SECURITY_NETWORK 141 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 142 { 143 memcpy(UNIXSID(skb), &scm->secid, sizeof(u32)); 144 } 145 146 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 147 { 148 scm->secid = *UNIXSID(skb); 149 } 150 #else 151 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 152 { } 153 154 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 155 { } 156 #endif /* CONFIG_SECURITY_NETWORK */ 157 158 /* 159 * SMP locking strategy: 160 * hash table is protected with spinlock unix_table_lock 161 * each socket state is protected by separate spin lock. 162 */ 163 164 static inline unsigned int unix_hash_fold(__wsum n) 165 { 166 unsigned int hash = (__force unsigned int)csum_fold(n); 167 168 hash ^= hash>>8; 169 return hash&(UNIX_HASH_SIZE-1); 170 } 171 172 #define unix_peer(sk) (unix_sk(sk)->peer) 173 174 static inline int unix_our_peer(struct sock *sk, struct sock *osk) 175 { 176 return unix_peer(osk) == sk; 177 } 178 179 static inline int unix_may_send(struct sock *sk, struct sock *osk) 180 { 181 return unix_peer(osk) == NULL || unix_our_peer(sk, osk); 182 } 183 184 static inline int unix_recvq_full(struct sock const *sk) 185 { 186 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; 187 } 188 189 struct sock *unix_peer_get(struct sock *s) 190 { 191 struct sock *peer; 192 193 unix_state_lock(s); 194 peer = unix_peer(s); 195 if (peer) 196 sock_hold(peer); 197 unix_state_unlock(s); 198 return peer; 199 } 200 EXPORT_SYMBOL_GPL(unix_peer_get); 201 202 static inline void unix_release_addr(struct unix_address *addr) 203 { 204 if (atomic_dec_and_test(&addr->refcnt)) 205 kfree(addr); 206 } 207 208 /* 209 * Check unix socket name: 210 * - should be not zero length. 211 * - if started by not zero, should be NULL terminated (FS object) 212 * - if started by zero, it is abstract name. 213 */ 214 215 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp) 216 { 217 if (len <= sizeof(short) || len > sizeof(*sunaddr)) 218 return -EINVAL; 219 if (!sunaddr || sunaddr->sun_family != AF_UNIX) 220 return -EINVAL; 221 if (sunaddr->sun_path[0]) { 222 /* 223 * This may look like an off by one error but it is a bit more 224 * subtle. 108 is the longest valid AF_UNIX path for a binding. 225 * sun_path[108] doesn't as such exist. However in kernel space 226 * we are guaranteed that it is a valid memory location in our 227 * kernel address buffer. 228 */ 229 ((char *)sunaddr)[len] = 0; 230 len = strlen(sunaddr->sun_path)+1+sizeof(short); 231 return len; 232 } 233 234 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0)); 235 return len; 236 } 237 238 static void __unix_remove_socket(struct sock *sk) 239 { 240 sk_del_node_init(sk); 241 } 242 243 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk) 244 { 245 WARN_ON(!sk_unhashed(sk)); 246 sk_add_node(sk, list); 247 } 248 249 static inline void unix_remove_socket(struct sock *sk) 250 { 251 spin_lock(&unix_table_lock); 252 __unix_remove_socket(sk); 253 spin_unlock(&unix_table_lock); 254 } 255 256 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk) 257 { 258 spin_lock(&unix_table_lock); 259 __unix_insert_socket(list, sk); 260 spin_unlock(&unix_table_lock); 261 } 262 263 static struct sock *__unix_find_socket_byname(struct net *net, 264 struct sockaddr_un *sunname, 265 int len, int type, unsigned int hash) 266 { 267 struct sock *s; 268 269 sk_for_each(s, &unix_socket_table[hash ^ type]) { 270 struct unix_sock *u = unix_sk(s); 271 272 if (!net_eq(sock_net(s), net)) 273 continue; 274 275 if (u->addr->len == len && 276 !memcmp(u->addr->name, sunname, len)) 277 goto found; 278 } 279 s = NULL; 280 found: 281 return s; 282 } 283 284 static inline struct sock *unix_find_socket_byname(struct net *net, 285 struct sockaddr_un *sunname, 286 int len, int type, 287 unsigned int hash) 288 { 289 struct sock *s; 290 291 spin_lock(&unix_table_lock); 292 s = __unix_find_socket_byname(net, sunname, len, type, hash); 293 if (s) 294 sock_hold(s); 295 spin_unlock(&unix_table_lock); 296 return s; 297 } 298 299 static struct sock *unix_find_socket_byinode(struct inode *i) 300 { 301 struct sock *s; 302 303 spin_lock(&unix_table_lock); 304 sk_for_each(s, 305 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { 306 struct dentry *dentry = unix_sk(s)->path.dentry; 307 308 if (dentry && d_backing_inode(dentry) == i) { 309 sock_hold(s); 310 goto found; 311 } 312 } 313 s = NULL; 314 found: 315 spin_unlock(&unix_table_lock); 316 return s; 317 } 318 319 static inline int unix_writable(struct sock *sk) 320 { 321 return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; 322 } 323 324 static void unix_write_space(struct sock *sk) 325 { 326 struct socket_wq *wq; 327 328 rcu_read_lock(); 329 if (unix_writable(sk)) { 330 wq = rcu_dereference(sk->sk_wq); 331 if (wq_has_sleeper(wq)) 332 wake_up_interruptible_sync_poll(&wq->wait, 333 POLLOUT | POLLWRNORM | POLLWRBAND); 334 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 335 } 336 rcu_read_unlock(); 337 } 338 339 /* When dgram socket disconnects (or changes its peer), we clear its receive 340 * queue of packets arrived from previous peer. First, it allows to do 341 * flow control based only on wmem_alloc; second, sk connected to peer 342 * may receive messages only from that peer. */ 343 static void unix_dgram_disconnected(struct sock *sk, struct sock *other) 344 { 345 if (!skb_queue_empty(&sk->sk_receive_queue)) { 346 skb_queue_purge(&sk->sk_receive_queue); 347 wake_up_interruptible_all(&unix_sk(sk)->peer_wait); 348 349 /* If one link of bidirectional dgram pipe is disconnected, 350 * we signal error. Messages are lost. Do not make this, 351 * when peer was not connected to us. 352 */ 353 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) { 354 other->sk_err = ECONNRESET; 355 other->sk_error_report(other); 356 } 357 } 358 } 359 360 static void unix_sock_destructor(struct sock *sk) 361 { 362 struct unix_sock *u = unix_sk(sk); 363 364 skb_queue_purge(&sk->sk_receive_queue); 365 366 WARN_ON(atomic_read(&sk->sk_wmem_alloc)); 367 WARN_ON(!sk_unhashed(sk)); 368 WARN_ON(sk->sk_socket); 369 if (!sock_flag(sk, SOCK_DEAD)) { 370 pr_info("Attempt to release alive unix socket: %p\n", sk); 371 return; 372 } 373 374 if (u->addr) 375 unix_release_addr(u->addr); 376 377 atomic_long_dec(&unix_nr_socks); 378 local_bh_disable(); 379 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 380 local_bh_enable(); 381 #ifdef UNIX_REFCNT_DEBUG 382 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk, 383 atomic_long_read(&unix_nr_socks)); 384 #endif 385 } 386 387 static void unix_release_sock(struct sock *sk, int embrion) 388 { 389 struct unix_sock *u = unix_sk(sk); 390 struct path path; 391 struct sock *skpair; 392 struct sk_buff *skb; 393 int state; 394 395 unix_remove_socket(sk); 396 397 /* Clear state */ 398 unix_state_lock(sk); 399 sock_orphan(sk); 400 sk->sk_shutdown = SHUTDOWN_MASK; 401 path = u->path; 402 u->path.dentry = NULL; 403 u->path.mnt = NULL; 404 state = sk->sk_state; 405 sk->sk_state = TCP_CLOSE; 406 unix_state_unlock(sk); 407 408 wake_up_interruptible_all(&u->peer_wait); 409 410 skpair = unix_peer(sk); 411 412 if (skpair != NULL) { 413 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { 414 unix_state_lock(skpair); 415 /* No more writes */ 416 skpair->sk_shutdown = SHUTDOWN_MASK; 417 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) 418 skpair->sk_err = ECONNRESET; 419 unix_state_unlock(skpair); 420 skpair->sk_state_change(skpair); 421 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); 422 } 423 sock_put(skpair); /* It may now die */ 424 unix_peer(sk) = NULL; 425 } 426 427 /* Try to flush out this socket. Throw out buffers at least */ 428 429 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { 430 if (state == TCP_LISTEN) 431 unix_release_sock(skb->sk, 1); 432 /* passed fds are erased in the kfree_skb hook */ 433 kfree_skb(skb); 434 } 435 436 if (path.dentry) 437 path_put(&path); 438 439 sock_put(sk); 440 441 /* ---- Socket is dead now and most probably destroyed ---- */ 442 443 /* 444 * Fixme: BSD difference: In BSD all sockets connected to us get 445 * ECONNRESET and we die on the spot. In Linux we behave 446 * like files and pipes do and wait for the last 447 * dereference. 448 * 449 * Can't we simply set sock->err? 450 * 451 * What the above comment does talk about? --ANK(980817) 452 */ 453 454 if (unix_tot_inflight) 455 unix_gc(); /* Garbage collect fds */ 456 } 457 458 static void init_peercred(struct sock *sk) 459 { 460 put_pid(sk->sk_peer_pid); 461 if (sk->sk_peer_cred) 462 put_cred(sk->sk_peer_cred); 463 sk->sk_peer_pid = get_pid(task_tgid(current)); 464 sk->sk_peer_cred = get_current_cred(); 465 } 466 467 static void copy_peercred(struct sock *sk, struct sock *peersk) 468 { 469 put_pid(sk->sk_peer_pid); 470 if (sk->sk_peer_cred) 471 put_cred(sk->sk_peer_cred); 472 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); 473 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); 474 } 475 476 static int unix_listen(struct socket *sock, int backlog) 477 { 478 int err; 479 struct sock *sk = sock->sk; 480 struct unix_sock *u = unix_sk(sk); 481 struct pid *old_pid = NULL; 482 483 err = -EOPNOTSUPP; 484 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 485 goto out; /* Only stream/seqpacket sockets accept */ 486 err = -EINVAL; 487 if (!u->addr) 488 goto out; /* No listens on an unbound socket */ 489 unix_state_lock(sk); 490 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) 491 goto out_unlock; 492 if (backlog > sk->sk_max_ack_backlog) 493 wake_up_interruptible_all(&u->peer_wait); 494 sk->sk_max_ack_backlog = backlog; 495 sk->sk_state = TCP_LISTEN; 496 /* set credentials so connect can copy them */ 497 init_peercred(sk); 498 err = 0; 499 500 out_unlock: 501 unix_state_unlock(sk); 502 put_pid(old_pid); 503 out: 504 return err; 505 } 506 507 static int unix_release(struct socket *); 508 static int unix_bind(struct socket *, struct sockaddr *, int); 509 static int unix_stream_connect(struct socket *, struct sockaddr *, 510 int addr_len, int flags); 511 static int unix_socketpair(struct socket *, struct socket *); 512 static int unix_accept(struct socket *, struct socket *, int); 513 static int unix_getname(struct socket *, struct sockaddr *, int *, int); 514 static unsigned int unix_poll(struct file *, struct socket *, poll_table *); 515 static unsigned int unix_dgram_poll(struct file *, struct socket *, 516 poll_table *); 517 static int unix_ioctl(struct socket *, unsigned int, unsigned long); 518 static int unix_shutdown(struct socket *, int); 519 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); 520 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int); 521 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset, 522 size_t size, int flags); 523 static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos, 524 struct pipe_inode_info *, size_t size, 525 unsigned int flags); 526 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t); 527 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); 528 static int unix_dgram_connect(struct socket *, struct sockaddr *, 529 int, int); 530 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); 531 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t, 532 int); 533 534 static int unix_set_peek_off(struct sock *sk, int val) 535 { 536 struct unix_sock *u = unix_sk(sk); 537 538 if (mutex_lock_interruptible(&u->readlock)) 539 return -EINTR; 540 541 sk->sk_peek_off = val; 542 mutex_unlock(&u->readlock); 543 544 return 0; 545 } 546 547 548 static const struct proto_ops unix_stream_ops = { 549 .family = PF_UNIX, 550 .owner = THIS_MODULE, 551 .release = unix_release, 552 .bind = unix_bind, 553 .connect = unix_stream_connect, 554 .socketpair = unix_socketpair, 555 .accept = unix_accept, 556 .getname = unix_getname, 557 .poll = unix_poll, 558 .ioctl = unix_ioctl, 559 .listen = unix_listen, 560 .shutdown = unix_shutdown, 561 .setsockopt = sock_no_setsockopt, 562 .getsockopt = sock_no_getsockopt, 563 .sendmsg = unix_stream_sendmsg, 564 .recvmsg = unix_stream_recvmsg, 565 .mmap = sock_no_mmap, 566 .sendpage = unix_stream_sendpage, 567 .splice_read = unix_stream_splice_read, 568 .set_peek_off = unix_set_peek_off, 569 }; 570 571 static const struct proto_ops unix_dgram_ops = { 572 .family = PF_UNIX, 573 .owner = THIS_MODULE, 574 .release = unix_release, 575 .bind = unix_bind, 576 .connect = unix_dgram_connect, 577 .socketpair = unix_socketpair, 578 .accept = sock_no_accept, 579 .getname = unix_getname, 580 .poll = unix_dgram_poll, 581 .ioctl = unix_ioctl, 582 .listen = sock_no_listen, 583 .shutdown = unix_shutdown, 584 .setsockopt = sock_no_setsockopt, 585 .getsockopt = sock_no_getsockopt, 586 .sendmsg = unix_dgram_sendmsg, 587 .recvmsg = unix_dgram_recvmsg, 588 .mmap = sock_no_mmap, 589 .sendpage = sock_no_sendpage, 590 .set_peek_off = unix_set_peek_off, 591 }; 592 593 static const struct proto_ops unix_seqpacket_ops = { 594 .family = PF_UNIX, 595 .owner = THIS_MODULE, 596 .release = unix_release, 597 .bind = unix_bind, 598 .connect = unix_stream_connect, 599 .socketpair = unix_socketpair, 600 .accept = unix_accept, 601 .getname = unix_getname, 602 .poll = unix_dgram_poll, 603 .ioctl = unix_ioctl, 604 .listen = unix_listen, 605 .shutdown = unix_shutdown, 606 .setsockopt = sock_no_setsockopt, 607 .getsockopt = sock_no_getsockopt, 608 .sendmsg = unix_seqpacket_sendmsg, 609 .recvmsg = unix_seqpacket_recvmsg, 610 .mmap = sock_no_mmap, 611 .sendpage = sock_no_sendpage, 612 .set_peek_off = unix_set_peek_off, 613 }; 614 615 static struct proto unix_proto = { 616 .name = "UNIX", 617 .owner = THIS_MODULE, 618 .obj_size = sizeof(struct unix_sock), 619 }; 620 621 /* 622 * AF_UNIX sockets do not interact with hardware, hence they 623 * dont trigger interrupts - so it's safe for them to have 624 * bh-unsafe locking for their sk_receive_queue.lock. Split off 625 * this special lock-class by reinitializing the spinlock key: 626 */ 627 static struct lock_class_key af_unix_sk_receive_queue_lock_key; 628 629 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern) 630 { 631 struct sock *sk = NULL; 632 struct unix_sock *u; 633 634 atomic_long_inc(&unix_nr_socks); 635 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) 636 goto out; 637 638 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern); 639 if (!sk) 640 goto out; 641 642 sock_init_data(sock, sk); 643 lockdep_set_class(&sk->sk_receive_queue.lock, 644 &af_unix_sk_receive_queue_lock_key); 645 646 sk->sk_write_space = unix_write_space; 647 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; 648 sk->sk_destruct = unix_sock_destructor; 649 u = unix_sk(sk); 650 u->path.dentry = NULL; 651 u->path.mnt = NULL; 652 spin_lock_init(&u->lock); 653 atomic_long_set(&u->inflight, 0); 654 INIT_LIST_HEAD(&u->link); 655 mutex_init(&u->readlock); /* single task reading lock */ 656 init_waitqueue_head(&u->peer_wait); 657 unix_insert_socket(unix_sockets_unbound(sk), sk); 658 out: 659 if (sk == NULL) 660 atomic_long_dec(&unix_nr_socks); 661 else { 662 local_bh_disable(); 663 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 664 local_bh_enable(); 665 } 666 return sk; 667 } 668 669 static int unix_create(struct net *net, struct socket *sock, int protocol, 670 int kern) 671 { 672 if (protocol && protocol != PF_UNIX) 673 return -EPROTONOSUPPORT; 674 675 sock->state = SS_UNCONNECTED; 676 677 switch (sock->type) { 678 case SOCK_STREAM: 679 sock->ops = &unix_stream_ops; 680 break; 681 /* 682 * Believe it or not BSD has AF_UNIX, SOCK_RAW though 683 * nothing uses it. 684 */ 685 case SOCK_RAW: 686 sock->type = SOCK_DGRAM; 687 case SOCK_DGRAM: 688 sock->ops = &unix_dgram_ops; 689 break; 690 case SOCK_SEQPACKET: 691 sock->ops = &unix_seqpacket_ops; 692 break; 693 default: 694 return -ESOCKTNOSUPPORT; 695 } 696 697 return unix_create1(net, sock, kern) ? 0 : -ENOMEM; 698 } 699 700 static int unix_release(struct socket *sock) 701 { 702 struct sock *sk = sock->sk; 703 704 if (!sk) 705 return 0; 706 707 unix_release_sock(sk, 0); 708 sock->sk = NULL; 709 710 return 0; 711 } 712 713 static int unix_autobind(struct socket *sock) 714 { 715 struct sock *sk = sock->sk; 716 struct net *net = sock_net(sk); 717 struct unix_sock *u = unix_sk(sk); 718 static u32 ordernum = 1; 719 struct unix_address *addr; 720 int err; 721 unsigned int retries = 0; 722 723 err = mutex_lock_interruptible(&u->readlock); 724 if (err) 725 return err; 726 727 err = 0; 728 if (u->addr) 729 goto out; 730 731 err = -ENOMEM; 732 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL); 733 if (!addr) 734 goto out; 735 736 addr->name->sun_family = AF_UNIX; 737 atomic_set(&addr->refcnt, 1); 738 739 retry: 740 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short); 741 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0)); 742 743 spin_lock(&unix_table_lock); 744 ordernum = (ordernum+1)&0xFFFFF; 745 746 if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type, 747 addr->hash)) { 748 spin_unlock(&unix_table_lock); 749 /* 750 * __unix_find_socket_byname() may take long time if many names 751 * are already in use. 752 */ 753 cond_resched(); 754 /* Give up if all names seems to be in use. */ 755 if (retries++ == 0xFFFFF) { 756 err = -ENOSPC; 757 kfree(addr); 758 goto out; 759 } 760 goto retry; 761 } 762 addr->hash ^= sk->sk_type; 763 764 __unix_remove_socket(sk); 765 u->addr = addr; 766 __unix_insert_socket(&unix_socket_table[addr->hash], sk); 767 spin_unlock(&unix_table_lock); 768 err = 0; 769 770 out: mutex_unlock(&u->readlock); 771 return err; 772 } 773 774 static struct sock *unix_find_other(struct net *net, 775 struct sockaddr_un *sunname, int len, 776 int type, unsigned int hash, int *error) 777 { 778 struct sock *u; 779 struct path path; 780 int err = 0; 781 782 if (sunname->sun_path[0]) { 783 struct inode *inode; 784 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path); 785 if (err) 786 goto fail; 787 inode = d_backing_inode(path.dentry); 788 err = inode_permission(inode, MAY_WRITE); 789 if (err) 790 goto put_fail; 791 792 err = -ECONNREFUSED; 793 if (!S_ISSOCK(inode->i_mode)) 794 goto put_fail; 795 u = unix_find_socket_byinode(inode); 796 if (!u) 797 goto put_fail; 798 799 if (u->sk_type == type) 800 touch_atime(&path); 801 802 path_put(&path); 803 804 err = -EPROTOTYPE; 805 if (u->sk_type != type) { 806 sock_put(u); 807 goto fail; 808 } 809 } else { 810 err = -ECONNREFUSED; 811 u = unix_find_socket_byname(net, sunname, len, type, hash); 812 if (u) { 813 struct dentry *dentry; 814 dentry = unix_sk(u)->path.dentry; 815 if (dentry) 816 touch_atime(&unix_sk(u)->path); 817 } else 818 goto fail; 819 } 820 return u; 821 822 put_fail: 823 path_put(&path); 824 fail: 825 *error = err; 826 return NULL; 827 } 828 829 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res) 830 { 831 struct dentry *dentry; 832 struct path path; 833 int err = 0; 834 /* 835 * Get the parent directory, calculate the hash for last 836 * component. 837 */ 838 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); 839 err = PTR_ERR(dentry); 840 if (IS_ERR(dentry)) 841 return err; 842 843 /* 844 * All right, let's create it. 845 */ 846 err = security_path_mknod(&path, dentry, mode, 0); 847 if (!err) { 848 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0); 849 if (!err) { 850 res->mnt = mntget(path.mnt); 851 res->dentry = dget(dentry); 852 } 853 } 854 done_path_create(&path, dentry); 855 return err; 856 } 857 858 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 859 { 860 struct sock *sk = sock->sk; 861 struct net *net = sock_net(sk); 862 struct unix_sock *u = unix_sk(sk); 863 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 864 char *sun_path = sunaddr->sun_path; 865 int err; 866 unsigned int hash; 867 struct unix_address *addr; 868 struct hlist_head *list; 869 870 err = -EINVAL; 871 if (sunaddr->sun_family != AF_UNIX) 872 goto out; 873 874 if (addr_len == sizeof(short)) { 875 err = unix_autobind(sock); 876 goto out; 877 } 878 879 err = unix_mkname(sunaddr, addr_len, &hash); 880 if (err < 0) 881 goto out; 882 addr_len = err; 883 884 err = mutex_lock_interruptible(&u->readlock); 885 if (err) 886 goto out; 887 888 err = -EINVAL; 889 if (u->addr) 890 goto out_up; 891 892 err = -ENOMEM; 893 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL); 894 if (!addr) 895 goto out_up; 896 897 memcpy(addr->name, sunaddr, addr_len); 898 addr->len = addr_len; 899 addr->hash = hash ^ sk->sk_type; 900 atomic_set(&addr->refcnt, 1); 901 902 if (sun_path[0]) { 903 struct path path; 904 umode_t mode = S_IFSOCK | 905 (SOCK_INODE(sock)->i_mode & ~current_umask()); 906 err = unix_mknod(sun_path, mode, &path); 907 if (err) { 908 if (err == -EEXIST) 909 err = -EADDRINUSE; 910 unix_release_addr(addr); 911 goto out_up; 912 } 913 addr->hash = UNIX_HASH_SIZE; 914 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE-1); 915 spin_lock(&unix_table_lock); 916 u->path = path; 917 list = &unix_socket_table[hash]; 918 } else { 919 spin_lock(&unix_table_lock); 920 err = -EADDRINUSE; 921 if (__unix_find_socket_byname(net, sunaddr, addr_len, 922 sk->sk_type, hash)) { 923 unix_release_addr(addr); 924 goto out_unlock; 925 } 926 927 list = &unix_socket_table[addr->hash]; 928 } 929 930 err = 0; 931 __unix_remove_socket(sk); 932 u->addr = addr; 933 __unix_insert_socket(list, sk); 934 935 out_unlock: 936 spin_unlock(&unix_table_lock); 937 out_up: 938 mutex_unlock(&u->readlock); 939 out: 940 return err; 941 } 942 943 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) 944 { 945 if (unlikely(sk1 == sk2) || !sk2) { 946 unix_state_lock(sk1); 947 return; 948 } 949 if (sk1 < sk2) { 950 unix_state_lock(sk1); 951 unix_state_lock_nested(sk2); 952 } else { 953 unix_state_lock(sk2); 954 unix_state_lock_nested(sk1); 955 } 956 } 957 958 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) 959 { 960 if (unlikely(sk1 == sk2) || !sk2) { 961 unix_state_unlock(sk1); 962 return; 963 } 964 unix_state_unlock(sk1); 965 unix_state_unlock(sk2); 966 } 967 968 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, 969 int alen, int flags) 970 { 971 struct sock *sk = sock->sk; 972 struct net *net = sock_net(sk); 973 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; 974 struct sock *other; 975 unsigned int hash; 976 int err; 977 978 if (addr->sa_family != AF_UNSPEC) { 979 err = unix_mkname(sunaddr, alen, &hash); 980 if (err < 0) 981 goto out; 982 alen = err; 983 984 if (test_bit(SOCK_PASSCRED, &sock->flags) && 985 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0) 986 goto out; 987 988 restart: 989 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err); 990 if (!other) 991 goto out; 992 993 unix_state_double_lock(sk, other); 994 995 /* Apparently VFS overslept socket death. Retry. */ 996 if (sock_flag(other, SOCK_DEAD)) { 997 unix_state_double_unlock(sk, other); 998 sock_put(other); 999 goto restart; 1000 } 1001 1002 err = -EPERM; 1003 if (!unix_may_send(sk, other)) 1004 goto out_unlock; 1005 1006 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 1007 if (err) 1008 goto out_unlock; 1009 1010 } else { 1011 /* 1012 * 1003.1g breaking connected state with AF_UNSPEC 1013 */ 1014 other = NULL; 1015 unix_state_double_lock(sk, other); 1016 } 1017 1018 /* 1019 * If it was connected, reconnect. 1020 */ 1021 if (unix_peer(sk)) { 1022 struct sock *old_peer = unix_peer(sk); 1023 unix_peer(sk) = other; 1024 unix_state_double_unlock(sk, other); 1025 1026 if (other != old_peer) 1027 unix_dgram_disconnected(sk, old_peer); 1028 sock_put(old_peer); 1029 } else { 1030 unix_peer(sk) = other; 1031 unix_state_double_unlock(sk, other); 1032 } 1033 return 0; 1034 1035 out_unlock: 1036 unix_state_double_unlock(sk, other); 1037 sock_put(other); 1038 out: 1039 return err; 1040 } 1041 1042 static long unix_wait_for_peer(struct sock *other, long timeo) 1043 { 1044 struct unix_sock *u = unix_sk(other); 1045 int sched; 1046 DEFINE_WAIT(wait); 1047 1048 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE); 1049 1050 sched = !sock_flag(other, SOCK_DEAD) && 1051 !(other->sk_shutdown & RCV_SHUTDOWN) && 1052 unix_recvq_full(other); 1053 1054 unix_state_unlock(other); 1055 1056 if (sched) 1057 timeo = schedule_timeout(timeo); 1058 1059 finish_wait(&u->peer_wait, &wait); 1060 return timeo; 1061 } 1062 1063 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, 1064 int addr_len, int flags) 1065 { 1066 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 1067 struct sock *sk = sock->sk; 1068 struct net *net = sock_net(sk); 1069 struct unix_sock *u = unix_sk(sk), *newu, *otheru; 1070 struct sock *newsk = NULL; 1071 struct sock *other = NULL; 1072 struct sk_buff *skb = NULL; 1073 unsigned int hash; 1074 int st; 1075 int err; 1076 long timeo; 1077 1078 err = unix_mkname(sunaddr, addr_len, &hash); 1079 if (err < 0) 1080 goto out; 1081 addr_len = err; 1082 1083 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr && 1084 (err = unix_autobind(sock)) != 0) 1085 goto out; 1086 1087 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); 1088 1089 /* First of all allocate resources. 1090 If we will make it after state is locked, 1091 we will have to recheck all again in any case. 1092 */ 1093 1094 err = -ENOMEM; 1095 1096 /* create new sock for complete connection */ 1097 newsk = unix_create1(sock_net(sk), NULL, 0); 1098 if (newsk == NULL) 1099 goto out; 1100 1101 /* Allocate skb for sending to listening sock */ 1102 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL); 1103 if (skb == NULL) 1104 goto out; 1105 1106 restart: 1107 /* Find listening sock. */ 1108 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err); 1109 if (!other) 1110 goto out; 1111 1112 /* Latch state of peer */ 1113 unix_state_lock(other); 1114 1115 /* Apparently VFS overslept socket death. Retry. */ 1116 if (sock_flag(other, SOCK_DEAD)) { 1117 unix_state_unlock(other); 1118 sock_put(other); 1119 goto restart; 1120 } 1121 1122 err = -ECONNREFUSED; 1123 if (other->sk_state != TCP_LISTEN) 1124 goto out_unlock; 1125 if (other->sk_shutdown & RCV_SHUTDOWN) 1126 goto out_unlock; 1127 1128 if (unix_recvq_full(other)) { 1129 err = -EAGAIN; 1130 if (!timeo) 1131 goto out_unlock; 1132 1133 timeo = unix_wait_for_peer(other, timeo); 1134 1135 err = sock_intr_errno(timeo); 1136 if (signal_pending(current)) 1137 goto out; 1138 sock_put(other); 1139 goto restart; 1140 } 1141 1142 /* Latch our state. 1143 1144 It is tricky place. We need to grab our state lock and cannot 1145 drop lock on peer. It is dangerous because deadlock is 1146 possible. Connect to self case and simultaneous 1147 attempt to connect are eliminated by checking socket 1148 state. other is TCP_LISTEN, if sk is TCP_LISTEN we 1149 check this before attempt to grab lock. 1150 1151 Well, and we have to recheck the state after socket locked. 1152 */ 1153 st = sk->sk_state; 1154 1155 switch (st) { 1156 case TCP_CLOSE: 1157 /* This is ok... continue with connect */ 1158 break; 1159 case TCP_ESTABLISHED: 1160 /* Socket is already connected */ 1161 err = -EISCONN; 1162 goto out_unlock; 1163 default: 1164 err = -EINVAL; 1165 goto out_unlock; 1166 } 1167 1168 unix_state_lock_nested(sk); 1169 1170 if (sk->sk_state != st) { 1171 unix_state_unlock(sk); 1172 unix_state_unlock(other); 1173 sock_put(other); 1174 goto restart; 1175 } 1176 1177 err = security_unix_stream_connect(sk, other, newsk); 1178 if (err) { 1179 unix_state_unlock(sk); 1180 goto out_unlock; 1181 } 1182 1183 /* The way is open! Fastly set all the necessary fields... */ 1184 1185 sock_hold(sk); 1186 unix_peer(newsk) = sk; 1187 newsk->sk_state = TCP_ESTABLISHED; 1188 newsk->sk_type = sk->sk_type; 1189 init_peercred(newsk); 1190 newu = unix_sk(newsk); 1191 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); 1192 otheru = unix_sk(other); 1193 1194 /* copy address information from listening to new sock*/ 1195 if (otheru->addr) { 1196 atomic_inc(&otheru->addr->refcnt); 1197 newu->addr = otheru->addr; 1198 } 1199 if (otheru->path.dentry) { 1200 path_get(&otheru->path); 1201 newu->path = otheru->path; 1202 } 1203 1204 /* Set credentials */ 1205 copy_peercred(sk, other); 1206 1207 sock->state = SS_CONNECTED; 1208 sk->sk_state = TCP_ESTABLISHED; 1209 sock_hold(newsk); 1210 1211 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */ 1212 unix_peer(sk) = newsk; 1213 1214 unix_state_unlock(sk); 1215 1216 /* take ten and and send info to listening sock */ 1217 spin_lock(&other->sk_receive_queue.lock); 1218 __skb_queue_tail(&other->sk_receive_queue, skb); 1219 spin_unlock(&other->sk_receive_queue.lock); 1220 unix_state_unlock(other); 1221 other->sk_data_ready(other); 1222 sock_put(other); 1223 return 0; 1224 1225 out_unlock: 1226 if (other) 1227 unix_state_unlock(other); 1228 1229 out: 1230 kfree_skb(skb); 1231 if (newsk) 1232 unix_release_sock(newsk, 0); 1233 if (other) 1234 sock_put(other); 1235 return err; 1236 } 1237 1238 static int unix_socketpair(struct socket *socka, struct socket *sockb) 1239 { 1240 struct sock *ska = socka->sk, *skb = sockb->sk; 1241 1242 /* Join our sockets back to back */ 1243 sock_hold(ska); 1244 sock_hold(skb); 1245 unix_peer(ska) = skb; 1246 unix_peer(skb) = ska; 1247 init_peercred(ska); 1248 init_peercred(skb); 1249 1250 if (ska->sk_type != SOCK_DGRAM) { 1251 ska->sk_state = TCP_ESTABLISHED; 1252 skb->sk_state = TCP_ESTABLISHED; 1253 socka->state = SS_CONNECTED; 1254 sockb->state = SS_CONNECTED; 1255 } 1256 return 0; 1257 } 1258 1259 static void unix_sock_inherit_flags(const struct socket *old, 1260 struct socket *new) 1261 { 1262 if (test_bit(SOCK_PASSCRED, &old->flags)) 1263 set_bit(SOCK_PASSCRED, &new->flags); 1264 if (test_bit(SOCK_PASSSEC, &old->flags)) 1265 set_bit(SOCK_PASSSEC, &new->flags); 1266 } 1267 1268 static int unix_accept(struct socket *sock, struct socket *newsock, int flags) 1269 { 1270 struct sock *sk = sock->sk; 1271 struct sock *tsk; 1272 struct sk_buff *skb; 1273 int err; 1274 1275 err = -EOPNOTSUPP; 1276 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 1277 goto out; 1278 1279 err = -EINVAL; 1280 if (sk->sk_state != TCP_LISTEN) 1281 goto out; 1282 1283 /* If socket state is TCP_LISTEN it cannot change (for now...), 1284 * so that no locks are necessary. 1285 */ 1286 1287 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err); 1288 if (!skb) { 1289 /* This means receive shutdown. */ 1290 if (err == 0) 1291 err = -EINVAL; 1292 goto out; 1293 } 1294 1295 tsk = skb->sk; 1296 skb_free_datagram(sk, skb); 1297 wake_up_interruptible(&unix_sk(sk)->peer_wait); 1298 1299 /* attach accepted sock to socket */ 1300 unix_state_lock(tsk); 1301 newsock->state = SS_CONNECTED; 1302 unix_sock_inherit_flags(sock, newsock); 1303 sock_graft(tsk, newsock); 1304 unix_state_unlock(tsk); 1305 return 0; 1306 1307 out: 1308 return err; 1309 } 1310 1311 1312 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) 1313 { 1314 struct sock *sk = sock->sk; 1315 struct unix_sock *u; 1316 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr); 1317 int err = 0; 1318 1319 if (peer) { 1320 sk = unix_peer_get(sk); 1321 1322 err = -ENOTCONN; 1323 if (!sk) 1324 goto out; 1325 err = 0; 1326 } else { 1327 sock_hold(sk); 1328 } 1329 1330 u = unix_sk(sk); 1331 unix_state_lock(sk); 1332 if (!u->addr) { 1333 sunaddr->sun_family = AF_UNIX; 1334 sunaddr->sun_path[0] = 0; 1335 *uaddr_len = sizeof(short); 1336 } else { 1337 struct unix_address *addr = u->addr; 1338 1339 *uaddr_len = addr->len; 1340 memcpy(sunaddr, addr->name, *uaddr_len); 1341 } 1342 unix_state_unlock(sk); 1343 sock_put(sk); 1344 out: 1345 return err; 1346 } 1347 1348 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) 1349 { 1350 int i; 1351 1352 scm->fp = UNIXCB(skb).fp; 1353 UNIXCB(skb).fp = NULL; 1354 1355 for (i = scm->fp->count-1; i >= 0; i--) 1356 unix_notinflight(scm->fp->fp[i]); 1357 } 1358 1359 static void unix_destruct_scm(struct sk_buff *skb) 1360 { 1361 struct scm_cookie scm; 1362 memset(&scm, 0, sizeof(scm)); 1363 scm.pid = UNIXCB(skb).pid; 1364 if (UNIXCB(skb).fp) 1365 unix_detach_fds(&scm, skb); 1366 1367 /* Alas, it calls VFS */ 1368 /* So fscking what? fput() had been SMP-safe since the last Summer */ 1369 scm_destroy(&scm); 1370 sock_wfree(skb); 1371 } 1372 1373 #define MAX_RECURSION_LEVEL 4 1374 1375 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) 1376 { 1377 int i; 1378 unsigned char max_level = 0; 1379 int unix_sock_count = 0; 1380 1381 for (i = scm->fp->count - 1; i >= 0; i--) { 1382 struct sock *sk = unix_get_socket(scm->fp->fp[i]); 1383 1384 if (sk) { 1385 unix_sock_count++; 1386 max_level = max(max_level, 1387 unix_sk(sk)->recursion_level); 1388 } 1389 } 1390 if (unlikely(max_level > MAX_RECURSION_LEVEL)) 1391 return -ETOOMANYREFS; 1392 1393 /* 1394 * Need to duplicate file references for the sake of garbage 1395 * collection. Otherwise a socket in the fps might become a 1396 * candidate for GC while the skb is not yet queued. 1397 */ 1398 UNIXCB(skb).fp = scm_fp_dup(scm->fp); 1399 if (!UNIXCB(skb).fp) 1400 return -ENOMEM; 1401 1402 if (unix_sock_count) { 1403 for (i = scm->fp->count - 1; i >= 0; i--) 1404 unix_inflight(scm->fp->fp[i]); 1405 } 1406 return max_level; 1407 } 1408 1409 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) 1410 { 1411 int err = 0; 1412 1413 UNIXCB(skb).pid = get_pid(scm->pid); 1414 UNIXCB(skb).uid = scm->creds.uid; 1415 UNIXCB(skb).gid = scm->creds.gid; 1416 UNIXCB(skb).fp = NULL; 1417 if (scm->fp && send_fds) 1418 err = unix_attach_fds(scm, skb); 1419 1420 skb->destructor = unix_destruct_scm; 1421 return err; 1422 } 1423 1424 /* 1425 * Some apps rely on write() giving SCM_CREDENTIALS 1426 * We include credentials if source or destination socket 1427 * asserted SOCK_PASSCRED. 1428 */ 1429 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, 1430 const struct sock *other) 1431 { 1432 if (UNIXCB(skb).pid) 1433 return; 1434 if (test_bit(SOCK_PASSCRED, &sock->flags) || 1435 !other->sk_socket || 1436 test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) { 1437 UNIXCB(skb).pid = get_pid(task_tgid(current)); 1438 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); 1439 } 1440 } 1441 1442 /* 1443 * Send AF_UNIX data. 1444 */ 1445 1446 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, 1447 size_t len) 1448 { 1449 struct sock *sk = sock->sk; 1450 struct net *net = sock_net(sk); 1451 struct unix_sock *u = unix_sk(sk); 1452 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); 1453 struct sock *other = NULL; 1454 int namelen = 0; /* fake GCC */ 1455 int err; 1456 unsigned int hash; 1457 struct sk_buff *skb; 1458 long timeo; 1459 struct scm_cookie scm; 1460 int max_level; 1461 int data_len = 0; 1462 1463 wait_for_unix_gc(); 1464 err = scm_send(sock, msg, &scm, false); 1465 if (err < 0) 1466 return err; 1467 1468 err = -EOPNOTSUPP; 1469 if (msg->msg_flags&MSG_OOB) 1470 goto out; 1471 1472 if (msg->msg_namelen) { 1473 err = unix_mkname(sunaddr, msg->msg_namelen, &hash); 1474 if (err < 0) 1475 goto out; 1476 namelen = err; 1477 } else { 1478 sunaddr = NULL; 1479 err = -ENOTCONN; 1480 other = unix_peer_get(sk); 1481 if (!other) 1482 goto out; 1483 } 1484 1485 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr 1486 && (err = unix_autobind(sock)) != 0) 1487 goto out; 1488 1489 err = -EMSGSIZE; 1490 if (len > sk->sk_sndbuf - 32) 1491 goto out; 1492 1493 if (len > SKB_MAX_ALLOC) { 1494 data_len = min_t(size_t, 1495 len - SKB_MAX_ALLOC, 1496 MAX_SKB_FRAGS * PAGE_SIZE); 1497 data_len = PAGE_ALIGN(data_len); 1498 1499 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE); 1500 } 1501 1502 skb = sock_alloc_send_pskb(sk, len - data_len, data_len, 1503 msg->msg_flags & MSG_DONTWAIT, &err, 1504 PAGE_ALLOC_COSTLY_ORDER); 1505 if (skb == NULL) 1506 goto out; 1507 1508 err = unix_scm_to_skb(&scm, skb, true); 1509 if (err < 0) 1510 goto out_free; 1511 max_level = err + 1; 1512 unix_get_secdata(&scm, skb); 1513 1514 skb_put(skb, len - data_len); 1515 skb->data_len = data_len; 1516 skb->len = len; 1517 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len); 1518 if (err) 1519 goto out_free; 1520 1521 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); 1522 1523 restart: 1524 if (!other) { 1525 err = -ECONNRESET; 1526 if (sunaddr == NULL) 1527 goto out_free; 1528 1529 other = unix_find_other(net, sunaddr, namelen, sk->sk_type, 1530 hash, &err); 1531 if (other == NULL) 1532 goto out_free; 1533 } 1534 1535 if (sk_filter(other, skb) < 0) { 1536 /* Toss the packet but do not return any error to the sender */ 1537 err = len; 1538 goto out_free; 1539 } 1540 1541 unix_state_lock(other); 1542 err = -EPERM; 1543 if (!unix_may_send(sk, other)) 1544 goto out_unlock; 1545 1546 if (sock_flag(other, SOCK_DEAD)) { 1547 /* 1548 * Check with 1003.1g - what should 1549 * datagram error 1550 */ 1551 unix_state_unlock(other); 1552 sock_put(other); 1553 1554 err = 0; 1555 unix_state_lock(sk); 1556 if (unix_peer(sk) == other) { 1557 unix_peer(sk) = NULL; 1558 unix_state_unlock(sk); 1559 1560 unix_dgram_disconnected(sk, other); 1561 sock_put(other); 1562 err = -ECONNREFUSED; 1563 } else { 1564 unix_state_unlock(sk); 1565 } 1566 1567 other = NULL; 1568 if (err) 1569 goto out_free; 1570 goto restart; 1571 } 1572 1573 err = -EPIPE; 1574 if (other->sk_shutdown & RCV_SHUTDOWN) 1575 goto out_unlock; 1576 1577 if (sk->sk_type != SOCK_SEQPACKET) { 1578 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 1579 if (err) 1580 goto out_unlock; 1581 } 1582 1583 if (unix_peer(other) != sk && unix_recvq_full(other)) { 1584 if (!timeo) { 1585 err = -EAGAIN; 1586 goto out_unlock; 1587 } 1588 1589 timeo = unix_wait_for_peer(other, timeo); 1590 1591 err = sock_intr_errno(timeo); 1592 if (signal_pending(current)) 1593 goto out_free; 1594 1595 goto restart; 1596 } 1597 1598 if (sock_flag(other, SOCK_RCVTSTAMP)) 1599 __net_timestamp(skb); 1600 maybe_add_creds(skb, sock, other); 1601 skb_queue_tail(&other->sk_receive_queue, skb); 1602 if (max_level > unix_sk(other)->recursion_level) 1603 unix_sk(other)->recursion_level = max_level; 1604 unix_state_unlock(other); 1605 other->sk_data_ready(other); 1606 sock_put(other); 1607 scm_destroy(&scm); 1608 return len; 1609 1610 out_unlock: 1611 unix_state_unlock(other); 1612 out_free: 1613 kfree_skb(skb); 1614 out: 1615 if (other) 1616 sock_put(other); 1617 scm_destroy(&scm); 1618 return err; 1619 } 1620 1621 /* We use paged skbs for stream sockets, and limit occupancy to 32768 1622 * bytes, and a minimun of a full page. 1623 */ 1624 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) 1625 1626 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, 1627 size_t len) 1628 { 1629 struct sock *sk = sock->sk; 1630 struct sock *other = NULL; 1631 int err, size; 1632 struct sk_buff *skb; 1633 int sent = 0; 1634 struct scm_cookie scm; 1635 bool fds_sent = false; 1636 int max_level; 1637 int data_len; 1638 1639 wait_for_unix_gc(); 1640 err = scm_send(sock, msg, &scm, false); 1641 if (err < 0) 1642 return err; 1643 1644 err = -EOPNOTSUPP; 1645 if (msg->msg_flags&MSG_OOB) 1646 goto out_err; 1647 1648 if (msg->msg_namelen) { 1649 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; 1650 goto out_err; 1651 } else { 1652 err = -ENOTCONN; 1653 other = unix_peer(sk); 1654 if (!other) 1655 goto out_err; 1656 } 1657 1658 if (sk->sk_shutdown & SEND_SHUTDOWN) 1659 goto pipe_err; 1660 1661 while (sent < len) { 1662 size = len - sent; 1663 1664 /* Keep two messages in the pipe so it schedules better */ 1665 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64); 1666 1667 /* allow fallback to order-0 allocations */ 1668 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ); 1669 1670 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0)); 1671 1672 data_len = min_t(size_t, size, PAGE_ALIGN(data_len)); 1673 1674 skb = sock_alloc_send_pskb(sk, size - data_len, data_len, 1675 msg->msg_flags & MSG_DONTWAIT, &err, 1676 get_order(UNIX_SKB_FRAGS_SZ)); 1677 if (!skb) 1678 goto out_err; 1679 1680 /* Only send the fds in the first buffer */ 1681 err = unix_scm_to_skb(&scm, skb, !fds_sent); 1682 if (err < 0) { 1683 kfree_skb(skb); 1684 goto out_err; 1685 } 1686 max_level = err + 1; 1687 fds_sent = true; 1688 1689 skb_put(skb, size - data_len); 1690 skb->data_len = data_len; 1691 skb->len = size; 1692 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); 1693 if (err) { 1694 kfree_skb(skb); 1695 goto out_err; 1696 } 1697 1698 unix_state_lock(other); 1699 1700 if (sock_flag(other, SOCK_DEAD) || 1701 (other->sk_shutdown & RCV_SHUTDOWN)) 1702 goto pipe_err_free; 1703 1704 maybe_add_creds(skb, sock, other); 1705 skb_queue_tail(&other->sk_receive_queue, skb); 1706 if (max_level > unix_sk(other)->recursion_level) 1707 unix_sk(other)->recursion_level = max_level; 1708 unix_state_unlock(other); 1709 other->sk_data_ready(other); 1710 sent += size; 1711 } 1712 1713 scm_destroy(&scm); 1714 1715 return sent; 1716 1717 pipe_err_free: 1718 unix_state_unlock(other); 1719 kfree_skb(skb); 1720 pipe_err: 1721 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL)) 1722 send_sig(SIGPIPE, current, 0); 1723 err = -EPIPE; 1724 out_err: 1725 scm_destroy(&scm); 1726 return sent ? : err; 1727 } 1728 1729 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, 1730 int offset, size_t size, int flags) 1731 { 1732 int err = 0; 1733 bool send_sigpipe = true; 1734 struct sock *other, *sk = socket->sk; 1735 struct sk_buff *skb, *newskb = NULL, *tail = NULL; 1736 1737 if (flags & MSG_OOB) 1738 return -EOPNOTSUPP; 1739 1740 other = unix_peer(sk); 1741 if (!other || sk->sk_state != TCP_ESTABLISHED) 1742 return -ENOTCONN; 1743 1744 if (false) { 1745 alloc_skb: 1746 unix_state_unlock(other); 1747 mutex_unlock(&unix_sk(other)->readlock); 1748 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT, 1749 &err, 0); 1750 if (!newskb) 1751 return err; 1752 } 1753 1754 /* we must acquire readlock as we modify already present 1755 * skbs in the sk_receive_queue and mess with skb->len 1756 */ 1757 err = mutex_lock_interruptible(&unix_sk(other)->readlock); 1758 if (err) { 1759 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS; 1760 send_sigpipe = false; 1761 goto err; 1762 } 1763 1764 if (sk->sk_shutdown & SEND_SHUTDOWN) { 1765 err = -EPIPE; 1766 goto err_unlock; 1767 } 1768 1769 unix_state_lock(other); 1770 1771 if (sock_flag(other, SOCK_DEAD) || 1772 other->sk_shutdown & RCV_SHUTDOWN) { 1773 err = -EPIPE; 1774 goto err_state_unlock; 1775 } 1776 1777 skb = skb_peek_tail(&other->sk_receive_queue); 1778 if (tail && tail == skb) { 1779 skb = newskb; 1780 } else if (!skb) { 1781 if (newskb) 1782 skb = newskb; 1783 else 1784 goto alloc_skb; 1785 } else if (newskb) { 1786 /* this is fast path, we don't necessarily need to 1787 * call to kfree_skb even though with newskb == NULL 1788 * this - does no harm 1789 */ 1790 consume_skb(newskb); 1791 } 1792 1793 if (skb_append_pagefrags(skb, page, offset, size)) { 1794 tail = skb; 1795 goto alloc_skb; 1796 } 1797 1798 skb->len += size; 1799 skb->data_len += size; 1800 skb->truesize += size; 1801 atomic_add(size, &sk->sk_wmem_alloc); 1802 1803 if (newskb) 1804 __skb_queue_tail(&other->sk_receive_queue, newskb); 1805 1806 unix_state_unlock(other); 1807 mutex_unlock(&unix_sk(other)->readlock); 1808 1809 other->sk_data_ready(other); 1810 1811 return size; 1812 1813 err_state_unlock: 1814 unix_state_unlock(other); 1815 err_unlock: 1816 mutex_unlock(&unix_sk(other)->readlock); 1817 err: 1818 kfree_skb(newskb); 1819 if (send_sigpipe && !(flags & MSG_NOSIGNAL)) 1820 send_sig(SIGPIPE, current, 0); 1821 return err; 1822 } 1823 1824 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg, 1825 size_t len) 1826 { 1827 int err; 1828 struct sock *sk = sock->sk; 1829 1830 err = sock_error(sk); 1831 if (err) 1832 return err; 1833 1834 if (sk->sk_state != TCP_ESTABLISHED) 1835 return -ENOTCONN; 1836 1837 if (msg->msg_namelen) 1838 msg->msg_namelen = 0; 1839 1840 return unix_dgram_sendmsg(sock, msg, len); 1841 } 1842 1843 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg, 1844 size_t size, int flags) 1845 { 1846 struct sock *sk = sock->sk; 1847 1848 if (sk->sk_state != TCP_ESTABLISHED) 1849 return -ENOTCONN; 1850 1851 return unix_dgram_recvmsg(sock, msg, size, flags); 1852 } 1853 1854 static void unix_copy_addr(struct msghdr *msg, struct sock *sk) 1855 { 1856 struct unix_sock *u = unix_sk(sk); 1857 1858 if (u->addr) { 1859 msg->msg_namelen = u->addr->len; 1860 memcpy(msg->msg_name, u->addr->name, u->addr->len); 1861 } 1862 } 1863 1864 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, 1865 size_t size, int flags) 1866 { 1867 struct scm_cookie scm; 1868 struct sock *sk = sock->sk; 1869 struct unix_sock *u = unix_sk(sk); 1870 int noblock = flags & MSG_DONTWAIT; 1871 struct sk_buff *skb; 1872 int err; 1873 int peeked, skip; 1874 1875 err = -EOPNOTSUPP; 1876 if (flags&MSG_OOB) 1877 goto out; 1878 1879 err = mutex_lock_interruptible(&u->readlock); 1880 if (unlikely(err)) { 1881 /* recvmsg() in non blocking mode is supposed to return -EAGAIN 1882 * sk_rcvtimeo is not honored by mutex_lock_interruptible() 1883 */ 1884 err = noblock ? -EAGAIN : -ERESTARTSYS; 1885 goto out; 1886 } 1887 1888 skip = sk_peek_offset(sk, flags); 1889 1890 skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err); 1891 if (!skb) { 1892 unix_state_lock(sk); 1893 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */ 1894 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN && 1895 (sk->sk_shutdown & RCV_SHUTDOWN)) 1896 err = 0; 1897 unix_state_unlock(sk); 1898 goto out_unlock; 1899 } 1900 1901 wake_up_interruptible_sync_poll(&u->peer_wait, 1902 POLLOUT | POLLWRNORM | POLLWRBAND); 1903 1904 if (msg->msg_name) 1905 unix_copy_addr(msg, skb->sk); 1906 1907 if (size > skb->len - skip) 1908 size = skb->len - skip; 1909 else if (size < skb->len - skip) 1910 msg->msg_flags |= MSG_TRUNC; 1911 1912 err = skb_copy_datagram_msg(skb, skip, msg, size); 1913 if (err) 1914 goto out_free; 1915 1916 if (sock_flag(sk, SOCK_RCVTSTAMP)) 1917 __sock_recv_timestamp(msg, sk, skb); 1918 1919 memset(&scm, 0, sizeof(scm)); 1920 1921 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 1922 unix_set_secdata(&scm, skb); 1923 1924 if (!(flags & MSG_PEEK)) { 1925 if (UNIXCB(skb).fp) 1926 unix_detach_fds(&scm, skb); 1927 1928 sk_peek_offset_bwd(sk, skb->len); 1929 } else { 1930 /* It is questionable: on PEEK we could: 1931 - do not return fds - good, but too simple 8) 1932 - return fds, and do not return them on read (old strategy, 1933 apparently wrong) 1934 - clone fds (I chose it for now, it is the most universal 1935 solution) 1936 1937 POSIX 1003.1g does not actually define this clearly 1938 at all. POSIX 1003.1g doesn't define a lot of things 1939 clearly however! 1940 1941 */ 1942 1943 sk_peek_offset_fwd(sk, size); 1944 1945 if (UNIXCB(skb).fp) 1946 scm.fp = scm_fp_dup(UNIXCB(skb).fp); 1947 } 1948 err = (flags & MSG_TRUNC) ? skb->len - skip : size; 1949 1950 scm_recv(sock, msg, &scm, flags); 1951 1952 out_free: 1953 skb_free_datagram(sk, skb); 1954 out_unlock: 1955 mutex_unlock(&u->readlock); 1956 out: 1957 return err; 1958 } 1959 1960 /* 1961 * Sleep until more data has arrived. But check for races.. 1962 */ 1963 static long unix_stream_data_wait(struct sock *sk, long timeo, 1964 struct sk_buff *last, unsigned int last_len) 1965 { 1966 struct sk_buff *tail; 1967 DEFINE_WAIT(wait); 1968 1969 unix_state_lock(sk); 1970 1971 for (;;) { 1972 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 1973 1974 tail = skb_peek_tail(&sk->sk_receive_queue); 1975 if (tail != last || 1976 (tail && tail->len != last_len) || 1977 sk->sk_err || 1978 (sk->sk_shutdown & RCV_SHUTDOWN) || 1979 signal_pending(current) || 1980 !timeo) 1981 break; 1982 1983 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1984 unix_state_unlock(sk); 1985 timeo = freezable_schedule_timeout(timeo); 1986 unix_state_lock(sk); 1987 1988 if (sock_flag(sk, SOCK_DEAD)) 1989 break; 1990 1991 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1992 } 1993 1994 finish_wait(sk_sleep(sk), &wait); 1995 unix_state_unlock(sk); 1996 return timeo; 1997 } 1998 1999 static unsigned int unix_skb_len(const struct sk_buff *skb) 2000 { 2001 return skb->len - UNIXCB(skb).consumed; 2002 } 2003 2004 struct unix_stream_read_state { 2005 int (*recv_actor)(struct sk_buff *, int, int, 2006 struct unix_stream_read_state *); 2007 struct socket *socket; 2008 struct msghdr *msg; 2009 struct pipe_inode_info *pipe; 2010 size_t size; 2011 int flags; 2012 unsigned int splice_flags; 2013 }; 2014 2015 static int unix_stream_read_generic(struct unix_stream_read_state *state) 2016 { 2017 struct scm_cookie scm; 2018 struct socket *sock = state->socket; 2019 struct sock *sk = sock->sk; 2020 struct unix_sock *u = unix_sk(sk); 2021 int copied = 0; 2022 int flags = state->flags; 2023 int noblock = flags & MSG_DONTWAIT; 2024 bool check_creds = false; 2025 int target; 2026 int err = 0; 2027 long timeo; 2028 int skip; 2029 size_t size = state->size; 2030 unsigned int last_len; 2031 2032 err = -EINVAL; 2033 if (sk->sk_state != TCP_ESTABLISHED) 2034 goto out; 2035 2036 err = -EOPNOTSUPP; 2037 if (flags & MSG_OOB) 2038 goto out; 2039 2040 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); 2041 timeo = sock_rcvtimeo(sk, noblock); 2042 2043 memset(&scm, 0, sizeof(scm)); 2044 2045 /* Lock the socket to prevent queue disordering 2046 * while sleeps in memcpy_tomsg 2047 */ 2048 err = mutex_lock_interruptible(&u->readlock); 2049 if (unlikely(err)) { 2050 /* recvmsg() in non blocking mode is supposed to return -EAGAIN 2051 * sk_rcvtimeo is not honored by mutex_lock_interruptible() 2052 */ 2053 err = noblock ? -EAGAIN : -ERESTARTSYS; 2054 goto out; 2055 } 2056 2057 do { 2058 int chunk; 2059 struct sk_buff *skb, *last; 2060 2061 unix_state_lock(sk); 2062 if (sock_flag(sk, SOCK_DEAD)) { 2063 err = -ECONNRESET; 2064 goto unlock; 2065 } 2066 last = skb = skb_peek(&sk->sk_receive_queue); 2067 last_len = last ? last->len : 0; 2068 again: 2069 if (skb == NULL) { 2070 unix_sk(sk)->recursion_level = 0; 2071 if (copied >= target) 2072 goto unlock; 2073 2074 /* 2075 * POSIX 1003.1g mandates this order. 2076 */ 2077 2078 err = sock_error(sk); 2079 if (err) 2080 goto unlock; 2081 if (sk->sk_shutdown & RCV_SHUTDOWN) 2082 goto unlock; 2083 2084 unix_state_unlock(sk); 2085 err = -EAGAIN; 2086 if (!timeo) 2087 break; 2088 mutex_unlock(&u->readlock); 2089 2090 timeo = unix_stream_data_wait(sk, timeo, last, 2091 last_len); 2092 2093 if (signal_pending(current) || 2094 mutex_lock_interruptible(&u->readlock)) { 2095 err = sock_intr_errno(timeo); 2096 goto out; 2097 } 2098 2099 continue; 2100 unlock: 2101 unix_state_unlock(sk); 2102 break; 2103 } 2104 2105 skip = sk_peek_offset(sk, flags); 2106 while (skip >= unix_skb_len(skb)) { 2107 skip -= unix_skb_len(skb); 2108 last = skb; 2109 last_len = skb->len; 2110 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2111 if (!skb) 2112 goto again; 2113 } 2114 2115 unix_state_unlock(sk); 2116 2117 if (check_creds) { 2118 /* Never glue messages from different writers */ 2119 if ((UNIXCB(skb).pid != scm.pid) || 2120 !uid_eq(UNIXCB(skb).uid, scm.creds.uid) || 2121 !gid_eq(UNIXCB(skb).gid, scm.creds.gid)) 2122 break; 2123 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { 2124 /* Copy credentials */ 2125 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2126 check_creds = true; 2127 } 2128 2129 /* Copy address just once */ 2130 if (state->msg && state->msg->msg_name) { 2131 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, 2132 state->msg->msg_name); 2133 unix_copy_addr(state->msg, skb->sk); 2134 sunaddr = NULL; 2135 } 2136 2137 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size); 2138 chunk = state->recv_actor(skb, skip, chunk, state); 2139 if (chunk < 0) { 2140 if (copied == 0) 2141 copied = -EFAULT; 2142 break; 2143 } 2144 copied += chunk; 2145 size -= chunk; 2146 2147 /* Mark read part of skb as used */ 2148 if (!(flags & MSG_PEEK)) { 2149 UNIXCB(skb).consumed += chunk; 2150 2151 sk_peek_offset_bwd(sk, chunk); 2152 2153 if (UNIXCB(skb).fp) 2154 unix_detach_fds(&scm, skb); 2155 2156 if (unix_skb_len(skb)) 2157 break; 2158 2159 skb_unlink(skb, &sk->sk_receive_queue); 2160 consume_skb(skb); 2161 2162 if (scm.fp) 2163 break; 2164 } else { 2165 /* It is questionable, see note in unix_dgram_recvmsg. 2166 */ 2167 if (UNIXCB(skb).fp) 2168 scm.fp = scm_fp_dup(UNIXCB(skb).fp); 2169 2170 sk_peek_offset_fwd(sk, chunk); 2171 2172 break; 2173 } 2174 } while (size); 2175 2176 mutex_unlock(&u->readlock); 2177 if (state->msg) 2178 scm_recv(sock, state->msg, &scm, flags); 2179 else 2180 scm_destroy(&scm); 2181 out: 2182 return copied ? : err; 2183 } 2184 2185 static int unix_stream_read_actor(struct sk_buff *skb, 2186 int skip, int chunk, 2187 struct unix_stream_read_state *state) 2188 { 2189 int ret; 2190 2191 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip, 2192 state->msg, chunk); 2193 return ret ?: chunk; 2194 } 2195 2196 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, 2197 size_t size, int flags) 2198 { 2199 struct unix_stream_read_state state = { 2200 .recv_actor = unix_stream_read_actor, 2201 .socket = sock, 2202 .msg = msg, 2203 .size = size, 2204 .flags = flags 2205 }; 2206 2207 return unix_stream_read_generic(&state); 2208 } 2209 2210 static ssize_t skb_unix_socket_splice(struct sock *sk, 2211 struct pipe_inode_info *pipe, 2212 struct splice_pipe_desc *spd) 2213 { 2214 int ret; 2215 struct unix_sock *u = unix_sk(sk); 2216 2217 mutex_unlock(&u->readlock); 2218 ret = splice_to_pipe(pipe, spd); 2219 mutex_lock(&u->readlock); 2220 2221 return ret; 2222 } 2223 2224 static int unix_stream_splice_actor(struct sk_buff *skb, 2225 int skip, int chunk, 2226 struct unix_stream_read_state *state) 2227 { 2228 return skb_splice_bits(skb, state->socket->sk, 2229 UNIXCB(skb).consumed + skip, 2230 state->pipe, chunk, state->splice_flags, 2231 skb_unix_socket_splice); 2232 } 2233 2234 static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos, 2235 struct pipe_inode_info *pipe, 2236 size_t size, unsigned int flags) 2237 { 2238 struct unix_stream_read_state state = { 2239 .recv_actor = unix_stream_splice_actor, 2240 .socket = sock, 2241 .pipe = pipe, 2242 .size = size, 2243 .splice_flags = flags, 2244 }; 2245 2246 if (unlikely(*ppos)) 2247 return -ESPIPE; 2248 2249 if (sock->file->f_flags & O_NONBLOCK || 2250 flags & SPLICE_F_NONBLOCK) 2251 state.flags = MSG_DONTWAIT; 2252 2253 return unix_stream_read_generic(&state); 2254 } 2255 2256 static int unix_shutdown(struct socket *sock, int mode) 2257 { 2258 struct sock *sk = sock->sk; 2259 struct sock *other; 2260 2261 if (mode < SHUT_RD || mode > SHUT_RDWR) 2262 return -EINVAL; 2263 /* This maps: 2264 * SHUT_RD (0) -> RCV_SHUTDOWN (1) 2265 * SHUT_WR (1) -> SEND_SHUTDOWN (2) 2266 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3) 2267 */ 2268 ++mode; 2269 2270 unix_state_lock(sk); 2271 sk->sk_shutdown |= mode; 2272 other = unix_peer(sk); 2273 if (other) 2274 sock_hold(other); 2275 unix_state_unlock(sk); 2276 sk->sk_state_change(sk); 2277 2278 if (other && 2279 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) { 2280 2281 int peer_mode = 0; 2282 2283 if (mode&RCV_SHUTDOWN) 2284 peer_mode |= SEND_SHUTDOWN; 2285 if (mode&SEND_SHUTDOWN) 2286 peer_mode |= RCV_SHUTDOWN; 2287 unix_state_lock(other); 2288 other->sk_shutdown |= peer_mode; 2289 unix_state_unlock(other); 2290 other->sk_state_change(other); 2291 if (peer_mode == SHUTDOWN_MASK) 2292 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); 2293 else if (peer_mode & RCV_SHUTDOWN) 2294 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); 2295 } 2296 if (other) 2297 sock_put(other); 2298 2299 return 0; 2300 } 2301 2302 long unix_inq_len(struct sock *sk) 2303 { 2304 struct sk_buff *skb; 2305 long amount = 0; 2306 2307 if (sk->sk_state == TCP_LISTEN) 2308 return -EINVAL; 2309 2310 spin_lock(&sk->sk_receive_queue.lock); 2311 if (sk->sk_type == SOCK_STREAM || 2312 sk->sk_type == SOCK_SEQPACKET) { 2313 skb_queue_walk(&sk->sk_receive_queue, skb) 2314 amount += unix_skb_len(skb); 2315 } else { 2316 skb = skb_peek(&sk->sk_receive_queue); 2317 if (skb) 2318 amount = skb->len; 2319 } 2320 spin_unlock(&sk->sk_receive_queue.lock); 2321 2322 return amount; 2323 } 2324 EXPORT_SYMBOL_GPL(unix_inq_len); 2325 2326 long unix_outq_len(struct sock *sk) 2327 { 2328 return sk_wmem_alloc_get(sk); 2329 } 2330 EXPORT_SYMBOL_GPL(unix_outq_len); 2331 2332 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 2333 { 2334 struct sock *sk = sock->sk; 2335 long amount = 0; 2336 int err; 2337 2338 switch (cmd) { 2339 case SIOCOUTQ: 2340 amount = unix_outq_len(sk); 2341 err = put_user(amount, (int __user *)arg); 2342 break; 2343 case SIOCINQ: 2344 amount = unix_inq_len(sk); 2345 if (amount < 0) 2346 err = amount; 2347 else 2348 err = put_user(amount, (int __user *)arg); 2349 break; 2350 default: 2351 err = -ENOIOCTLCMD; 2352 break; 2353 } 2354 return err; 2355 } 2356 2357 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait) 2358 { 2359 struct sock *sk = sock->sk; 2360 unsigned int mask; 2361 2362 sock_poll_wait(file, sk_sleep(sk), wait); 2363 mask = 0; 2364 2365 /* exceptional events? */ 2366 if (sk->sk_err) 2367 mask |= POLLERR; 2368 if (sk->sk_shutdown == SHUTDOWN_MASK) 2369 mask |= POLLHUP; 2370 if (sk->sk_shutdown & RCV_SHUTDOWN) 2371 mask |= POLLRDHUP | POLLIN | POLLRDNORM; 2372 2373 /* readable? */ 2374 if (!skb_queue_empty(&sk->sk_receive_queue)) 2375 mask |= POLLIN | POLLRDNORM; 2376 2377 /* Connection-based need to check for termination and startup */ 2378 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && 2379 sk->sk_state == TCP_CLOSE) 2380 mask |= POLLHUP; 2381 2382 /* 2383 * we set writable also when the other side has shut down the 2384 * connection. This prevents stuck sockets. 2385 */ 2386 if (unix_writable(sk)) 2387 mask |= POLLOUT | POLLWRNORM | POLLWRBAND; 2388 2389 return mask; 2390 } 2391 2392 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, 2393 poll_table *wait) 2394 { 2395 struct sock *sk = sock->sk, *other; 2396 unsigned int mask, writable; 2397 2398 sock_poll_wait(file, sk_sleep(sk), wait); 2399 mask = 0; 2400 2401 /* exceptional events? */ 2402 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 2403 mask |= POLLERR | 2404 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); 2405 2406 if (sk->sk_shutdown & RCV_SHUTDOWN) 2407 mask |= POLLRDHUP | POLLIN | POLLRDNORM; 2408 if (sk->sk_shutdown == SHUTDOWN_MASK) 2409 mask |= POLLHUP; 2410 2411 /* readable? */ 2412 if (!skb_queue_empty(&sk->sk_receive_queue)) 2413 mask |= POLLIN | POLLRDNORM; 2414 2415 /* Connection-based need to check for termination and startup */ 2416 if (sk->sk_type == SOCK_SEQPACKET) { 2417 if (sk->sk_state == TCP_CLOSE) 2418 mask |= POLLHUP; 2419 /* connection hasn't started yet? */ 2420 if (sk->sk_state == TCP_SYN_SENT) 2421 return mask; 2422 } 2423 2424 /* No write status requested, avoid expensive OUT tests. */ 2425 if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT))) 2426 return mask; 2427 2428 writable = unix_writable(sk); 2429 other = unix_peer_get(sk); 2430 if (other) { 2431 if (unix_peer(other) != sk) { 2432 sock_poll_wait(file, &unix_sk(other)->peer_wait, wait); 2433 if (unix_recvq_full(other)) 2434 writable = 0; 2435 } 2436 sock_put(other); 2437 } 2438 2439 if (writable) 2440 mask |= POLLOUT | POLLWRNORM | POLLWRBAND; 2441 else 2442 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 2443 2444 return mask; 2445 } 2446 2447 #ifdef CONFIG_PROC_FS 2448 2449 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1) 2450 2451 #define get_bucket(x) ((x) >> BUCKET_SPACE) 2452 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1)) 2453 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) 2454 2455 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) 2456 { 2457 unsigned long offset = get_offset(*pos); 2458 unsigned long bucket = get_bucket(*pos); 2459 struct sock *sk; 2460 unsigned long count = 0; 2461 2462 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) { 2463 if (sock_net(sk) != seq_file_net(seq)) 2464 continue; 2465 if (++count == offset) 2466 break; 2467 } 2468 2469 return sk; 2470 } 2471 2472 static struct sock *unix_next_socket(struct seq_file *seq, 2473 struct sock *sk, 2474 loff_t *pos) 2475 { 2476 unsigned long bucket; 2477 2478 while (sk > (struct sock *)SEQ_START_TOKEN) { 2479 sk = sk_next(sk); 2480 if (!sk) 2481 goto next_bucket; 2482 if (sock_net(sk) == seq_file_net(seq)) 2483 return sk; 2484 } 2485 2486 do { 2487 sk = unix_from_bucket(seq, pos); 2488 if (sk) 2489 return sk; 2490 2491 next_bucket: 2492 bucket = get_bucket(*pos) + 1; 2493 *pos = set_bucket_offset(bucket, 1); 2494 } while (bucket < ARRAY_SIZE(unix_socket_table)); 2495 2496 return NULL; 2497 } 2498 2499 static void *unix_seq_start(struct seq_file *seq, loff_t *pos) 2500 __acquires(unix_table_lock) 2501 { 2502 spin_lock(&unix_table_lock); 2503 2504 if (!*pos) 2505 return SEQ_START_TOKEN; 2506 2507 if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table)) 2508 return NULL; 2509 2510 return unix_next_socket(seq, NULL, pos); 2511 } 2512 2513 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2514 { 2515 ++*pos; 2516 return unix_next_socket(seq, v, pos); 2517 } 2518 2519 static void unix_seq_stop(struct seq_file *seq, void *v) 2520 __releases(unix_table_lock) 2521 { 2522 spin_unlock(&unix_table_lock); 2523 } 2524 2525 static int unix_seq_show(struct seq_file *seq, void *v) 2526 { 2527 2528 if (v == SEQ_START_TOKEN) 2529 seq_puts(seq, "Num RefCount Protocol Flags Type St " 2530 "Inode Path\n"); 2531 else { 2532 struct sock *s = v; 2533 struct unix_sock *u = unix_sk(s); 2534 unix_state_lock(s); 2535 2536 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu", 2537 s, 2538 atomic_read(&s->sk_refcnt), 2539 0, 2540 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0, 2541 s->sk_type, 2542 s->sk_socket ? 2543 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) : 2544 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING), 2545 sock_i_ino(s)); 2546 2547 if (u->addr) { 2548 int i, len; 2549 seq_putc(seq, ' '); 2550 2551 i = 0; 2552 len = u->addr->len - sizeof(short); 2553 if (!UNIX_ABSTRACT(s)) 2554 len--; 2555 else { 2556 seq_putc(seq, '@'); 2557 i++; 2558 } 2559 for ( ; i < len; i++) 2560 seq_putc(seq, u->addr->name->sun_path[i]); 2561 } 2562 unix_state_unlock(s); 2563 seq_putc(seq, '\n'); 2564 } 2565 2566 return 0; 2567 } 2568 2569 static const struct seq_operations unix_seq_ops = { 2570 .start = unix_seq_start, 2571 .next = unix_seq_next, 2572 .stop = unix_seq_stop, 2573 .show = unix_seq_show, 2574 }; 2575 2576 static int unix_seq_open(struct inode *inode, struct file *file) 2577 { 2578 return seq_open_net(inode, file, &unix_seq_ops, 2579 sizeof(struct seq_net_private)); 2580 } 2581 2582 static const struct file_operations unix_seq_fops = { 2583 .owner = THIS_MODULE, 2584 .open = unix_seq_open, 2585 .read = seq_read, 2586 .llseek = seq_lseek, 2587 .release = seq_release_net, 2588 }; 2589 2590 #endif 2591 2592 static const struct net_proto_family unix_family_ops = { 2593 .family = PF_UNIX, 2594 .create = unix_create, 2595 .owner = THIS_MODULE, 2596 }; 2597 2598 2599 static int __net_init unix_net_init(struct net *net) 2600 { 2601 int error = -ENOMEM; 2602 2603 net->unx.sysctl_max_dgram_qlen = 10; 2604 if (unix_sysctl_register(net)) 2605 goto out; 2606 2607 #ifdef CONFIG_PROC_FS 2608 if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) { 2609 unix_sysctl_unregister(net); 2610 goto out; 2611 } 2612 #endif 2613 error = 0; 2614 out: 2615 return error; 2616 } 2617 2618 static void __net_exit unix_net_exit(struct net *net) 2619 { 2620 unix_sysctl_unregister(net); 2621 remove_proc_entry("unix", net->proc_net); 2622 } 2623 2624 static struct pernet_operations unix_net_ops = { 2625 .init = unix_net_init, 2626 .exit = unix_net_exit, 2627 }; 2628 2629 static int __init af_unix_init(void) 2630 { 2631 int rc = -1; 2632 2633 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb)); 2634 2635 rc = proto_register(&unix_proto, 1); 2636 if (rc != 0) { 2637 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); 2638 goto out; 2639 } 2640 2641 sock_register(&unix_family_ops); 2642 register_pernet_subsys(&unix_net_ops); 2643 out: 2644 return rc; 2645 } 2646 2647 static void __exit af_unix_exit(void) 2648 { 2649 sock_unregister(PF_UNIX); 2650 proto_unregister(&unix_proto); 2651 unregister_pernet_subsys(&unix_net_ops); 2652 } 2653 2654 /* Earlier than device_initcall() so that other drivers invoking 2655 request_module() don't end up in a loop when modprobe tries 2656 to use a UNIX socket. But later than subsys_initcall() because 2657 we depend on stuff initialised there */ 2658 fs_initcall(af_unix_init); 2659 module_exit(af_unix_exit); 2660 2661 MODULE_LICENSE("GPL"); 2662 MODULE_ALIAS_NETPROTO(PF_UNIX); 2663