1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * NET4: Implementation of BSD Unix domain sockets. 4 * 5 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk> 6 * 7 * Fixes: 8 * Linus Torvalds : Assorted bug cures. 9 * Niibe Yutaka : async I/O support. 10 * Carsten Paeth : PF_UNIX check, address fixes. 11 * Alan Cox : Limit size of allocated blocks. 12 * Alan Cox : Fixed the stupid socketpair bug. 13 * Alan Cox : BSD compatibility fine tuning. 14 * Alan Cox : Fixed a bug in connect when interrupted. 15 * Alan Cox : Sorted out a proper draft version of 16 * file descriptor passing hacked up from 17 * Mike Shaver's work. 18 * Marty Leisner : Fixes to fd passing 19 * Nick Nevin : recvmsg bugfix. 20 * Alan Cox : Started proper garbage collector 21 * Heiko EiBfeldt : Missing verify_area check 22 * Alan Cox : Started POSIXisms 23 * Andreas Schwab : Replace inode by dentry for proper 24 * reference counting 25 * Kirk Petersen : Made this a module 26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm. 27 * Lots of bug fixes. 28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces 29 * by above two patches. 30 * Andrea Arcangeli : If possible we block in connect(2) 31 * if the max backlog of the listen socket 32 * is been reached. This won't break 33 * old apps and it will avoid huge amount 34 * of socks hashed (this for unix_gc() 35 * performances reasons). 36 * Security fix that limits the max 37 * number of socks to 2*max_files and 38 * the number of skb queueable in the 39 * dgram receiver. 40 * Artur Skawina : Hash function optimizations 41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8) 42 * Malcolm Beattie : Set peercred for socketpair 43 * Michal Ostrowski : Module initialization cleanup. 44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT, 45 * the core infrastructure is doing that 46 * for all net proto families now (2.5.69+) 47 * 48 * Known differences from reference BSD that was tested: 49 * 50 * [TO FIX] 51 * ECONNREFUSED is not returned from one end of a connected() socket to the 52 * other the moment one end closes. 53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark 54 * and a fake inode identifier (nor the BSD first socket fstat twice bug). 55 * [NOT TO FIX] 56 * accept() returns a path name even if the connecting socket has closed 57 * in the meantime (BSD loses the path and gives up). 58 * accept() returns 0 length path for an unbound connector. BSD returns 16 59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??) 60 * socketpair(...SOCK_RAW..) doesn't panic the kernel. 61 * BSD af_unix apparently has connect forgetting to block properly. 62 * (need to check this with the POSIX spec in detail) 63 * 64 * Differences from 2.0.0-11-... (ANK) 65 * Bug fixes and improvements. 66 * - client shutdown killed server socket. 67 * - removed all useless cli/sti pairs. 68 * 69 * Semantic changes/extensions. 70 * - generic control message passing. 71 * - SCM_CREDENTIALS control message. 72 * - "Abstract" (not FS based) socket bindings. 73 * Abstract names are sequences of bytes (not zero terminated) 74 * started by 0, so that this name space does not intersect 75 * with BSD names. 76 */ 77 78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 79 80 #include <linux/module.h> 81 #include <linux/kernel.h> 82 #include <linux/signal.h> 83 #include <linux/sched/signal.h> 84 #include <linux/errno.h> 85 #include <linux/string.h> 86 #include <linux/stat.h> 87 #include <linux/dcache.h> 88 #include <linux/namei.h> 89 #include <linux/socket.h> 90 #include <linux/un.h> 91 #include <linux/fcntl.h> 92 #include <linux/termios.h> 93 #include <linux/sockios.h> 94 #include <linux/net.h> 95 #include <linux/in.h> 96 #include <linux/fs.h> 97 #include <linux/slab.h> 98 #include <linux/uaccess.h> 99 #include <linux/skbuff.h> 100 #include <linux/netdevice.h> 101 #include <net/net_namespace.h> 102 #include <net/sock.h> 103 #include <net/tcp_states.h> 104 #include <net/af_unix.h> 105 #include <linux/proc_fs.h> 106 #include <linux/seq_file.h> 107 #include <net/scm.h> 108 #include <linux/init.h> 109 #include <linux/poll.h> 110 #include <linux/rtnetlink.h> 111 #include <linux/mount.h> 112 #include <net/checksum.h> 113 #include <linux/security.h> 114 #include <linux/freezer.h> 115 #include <linux/file.h> 116 117 #include "scm.h" 118 119 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; 120 EXPORT_SYMBOL_GPL(unix_socket_table); 121 DEFINE_SPINLOCK(unix_table_lock); 122 EXPORT_SYMBOL_GPL(unix_table_lock); 123 static atomic_long_t unix_nr_socks; 124 125 126 static struct hlist_head *unix_sockets_unbound(void *addr) 127 { 128 unsigned long hash = (unsigned long)addr; 129 130 hash ^= hash >> 16; 131 hash ^= hash >> 8; 132 hash %= UNIX_HASH_SIZE; 133 return &unix_socket_table[UNIX_HASH_SIZE + hash]; 134 } 135 136 #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE) 137 138 #ifdef CONFIG_SECURITY_NETWORK 139 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 140 { 141 UNIXCB(skb).secid = scm->secid; 142 } 143 144 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 145 { 146 scm->secid = UNIXCB(skb).secid; 147 } 148 149 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) 150 { 151 return (scm->secid == UNIXCB(skb).secid); 152 } 153 #else 154 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 155 { } 156 157 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 158 { } 159 160 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) 161 { 162 return true; 163 } 164 #endif /* CONFIG_SECURITY_NETWORK */ 165 166 /* 167 * SMP locking strategy: 168 * hash table is protected with spinlock unix_table_lock 169 * each socket state is protected by separate spin lock. 170 */ 171 172 static inline unsigned int unix_hash_fold(__wsum n) 173 { 174 unsigned int hash = (__force unsigned int)csum_fold(n); 175 176 hash ^= hash>>8; 177 return hash&(UNIX_HASH_SIZE-1); 178 } 179 180 #define unix_peer(sk) (unix_sk(sk)->peer) 181 182 static inline int unix_our_peer(struct sock *sk, struct sock *osk) 183 { 184 return unix_peer(osk) == sk; 185 } 186 187 static inline int unix_may_send(struct sock *sk, struct sock *osk) 188 { 189 return unix_peer(osk) == NULL || unix_our_peer(sk, osk); 190 } 191 192 static inline int unix_recvq_full(struct sock const *sk) 193 { 194 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; 195 } 196 197 struct sock *unix_peer_get(struct sock *s) 198 { 199 struct sock *peer; 200 201 unix_state_lock(s); 202 peer = unix_peer(s); 203 if (peer) 204 sock_hold(peer); 205 unix_state_unlock(s); 206 return peer; 207 } 208 EXPORT_SYMBOL_GPL(unix_peer_get); 209 210 static inline void unix_release_addr(struct unix_address *addr) 211 { 212 if (refcount_dec_and_test(&addr->refcnt)) 213 kfree(addr); 214 } 215 216 /* 217 * Check unix socket name: 218 * - should be not zero length. 219 * - if started by not zero, should be NULL terminated (FS object) 220 * - if started by zero, it is abstract name. 221 */ 222 223 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp) 224 { 225 *hashp = 0; 226 227 if (len <= sizeof(short) || len > sizeof(*sunaddr)) 228 return -EINVAL; 229 if (!sunaddr || sunaddr->sun_family != AF_UNIX) 230 return -EINVAL; 231 if (sunaddr->sun_path[0]) { 232 /* 233 * This may look like an off by one error but it is a bit more 234 * subtle. 108 is the longest valid AF_UNIX path for a binding. 235 * sun_path[108] doesn't as such exist. However in kernel space 236 * we are guaranteed that it is a valid memory location in our 237 * kernel address buffer. 238 */ 239 ((char *)sunaddr)[len] = 0; 240 len = strlen(sunaddr->sun_path)+1+sizeof(short); 241 return len; 242 } 243 244 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0)); 245 return len; 246 } 247 248 static void __unix_remove_socket(struct sock *sk) 249 { 250 sk_del_node_init(sk); 251 } 252 253 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk) 254 { 255 WARN_ON(!sk_unhashed(sk)); 256 sk_add_node(sk, list); 257 } 258 259 static inline void unix_remove_socket(struct sock *sk) 260 { 261 spin_lock(&unix_table_lock); 262 __unix_remove_socket(sk); 263 spin_unlock(&unix_table_lock); 264 } 265 266 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk) 267 { 268 spin_lock(&unix_table_lock); 269 __unix_insert_socket(list, sk); 270 spin_unlock(&unix_table_lock); 271 } 272 273 static struct sock *__unix_find_socket_byname(struct net *net, 274 struct sockaddr_un *sunname, 275 int len, int type, unsigned int hash) 276 { 277 struct sock *s; 278 279 sk_for_each(s, &unix_socket_table[hash ^ type]) { 280 struct unix_sock *u = unix_sk(s); 281 282 if (!net_eq(sock_net(s), net)) 283 continue; 284 285 if (u->addr->len == len && 286 !memcmp(u->addr->name, sunname, len)) 287 return s; 288 } 289 return NULL; 290 } 291 292 static inline struct sock *unix_find_socket_byname(struct net *net, 293 struct sockaddr_un *sunname, 294 int len, int type, 295 unsigned int hash) 296 { 297 struct sock *s; 298 299 spin_lock(&unix_table_lock); 300 s = __unix_find_socket_byname(net, sunname, len, type, hash); 301 if (s) 302 sock_hold(s); 303 spin_unlock(&unix_table_lock); 304 return s; 305 } 306 307 static struct sock *unix_find_socket_byinode(struct inode *i) 308 { 309 struct sock *s; 310 311 spin_lock(&unix_table_lock); 312 sk_for_each(s, 313 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { 314 struct dentry *dentry = unix_sk(s)->path.dentry; 315 316 if (dentry && d_backing_inode(dentry) == i) { 317 sock_hold(s); 318 goto found; 319 } 320 } 321 s = NULL; 322 found: 323 spin_unlock(&unix_table_lock); 324 return s; 325 } 326 327 /* Support code for asymmetrically connected dgram sockets 328 * 329 * If a datagram socket is connected to a socket not itself connected 330 * to the first socket (eg, /dev/log), clients may only enqueue more 331 * messages if the present receive queue of the server socket is not 332 * "too large". This means there's a second writeability condition 333 * poll and sendmsg need to test. The dgram recv code will do a wake 334 * up on the peer_wait wait queue of a socket upon reception of a 335 * datagram which needs to be propagated to sleeping would-be writers 336 * since these might not have sent anything so far. This can't be 337 * accomplished via poll_wait because the lifetime of the server 338 * socket might be less than that of its clients if these break their 339 * association with it or if the server socket is closed while clients 340 * are still connected to it and there's no way to inform "a polling 341 * implementation" that it should let go of a certain wait queue 342 * 343 * In order to propagate a wake up, a wait_queue_entry_t of the client 344 * socket is enqueued on the peer_wait queue of the server socket 345 * whose wake function does a wake_up on the ordinary client socket 346 * wait queue. This connection is established whenever a write (or 347 * poll for write) hit the flow control condition and broken when the 348 * association to the server socket is dissolved or after a wake up 349 * was relayed. 350 */ 351 352 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags, 353 void *key) 354 { 355 struct unix_sock *u; 356 wait_queue_head_t *u_sleep; 357 358 u = container_of(q, struct unix_sock, peer_wake); 359 360 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait, 361 q); 362 u->peer_wake.private = NULL; 363 364 /* relaying can only happen while the wq still exists */ 365 u_sleep = sk_sleep(&u->sk); 366 if (u_sleep) 367 wake_up_interruptible_poll(u_sleep, key_to_poll(key)); 368 369 return 0; 370 } 371 372 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other) 373 { 374 struct unix_sock *u, *u_other; 375 int rc; 376 377 u = unix_sk(sk); 378 u_other = unix_sk(other); 379 rc = 0; 380 spin_lock(&u_other->peer_wait.lock); 381 382 if (!u->peer_wake.private) { 383 u->peer_wake.private = other; 384 __add_wait_queue(&u_other->peer_wait, &u->peer_wake); 385 386 rc = 1; 387 } 388 389 spin_unlock(&u_other->peer_wait.lock); 390 return rc; 391 } 392 393 static void unix_dgram_peer_wake_disconnect(struct sock *sk, 394 struct sock *other) 395 { 396 struct unix_sock *u, *u_other; 397 398 u = unix_sk(sk); 399 u_other = unix_sk(other); 400 spin_lock(&u_other->peer_wait.lock); 401 402 if (u->peer_wake.private == other) { 403 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake); 404 u->peer_wake.private = NULL; 405 } 406 407 spin_unlock(&u_other->peer_wait.lock); 408 } 409 410 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk, 411 struct sock *other) 412 { 413 unix_dgram_peer_wake_disconnect(sk, other); 414 wake_up_interruptible_poll(sk_sleep(sk), 415 EPOLLOUT | 416 EPOLLWRNORM | 417 EPOLLWRBAND); 418 } 419 420 /* preconditions: 421 * - unix_peer(sk) == other 422 * - association is stable 423 */ 424 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) 425 { 426 int connected; 427 428 connected = unix_dgram_peer_wake_connect(sk, other); 429 430 /* If other is SOCK_DEAD, we want to make sure we signal 431 * POLLOUT, such that a subsequent write() can get a 432 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs 433 * to other and its full, we will hang waiting for POLLOUT. 434 */ 435 if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD)) 436 return 1; 437 438 if (connected) 439 unix_dgram_peer_wake_disconnect(sk, other); 440 441 return 0; 442 } 443 444 static int unix_writable(const struct sock *sk) 445 { 446 return sk->sk_state != TCP_LISTEN && 447 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; 448 } 449 450 static void unix_write_space(struct sock *sk) 451 { 452 struct socket_wq *wq; 453 454 rcu_read_lock(); 455 if (unix_writable(sk)) { 456 wq = rcu_dereference(sk->sk_wq); 457 if (skwq_has_sleeper(wq)) 458 wake_up_interruptible_sync_poll(&wq->wait, 459 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); 460 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 461 } 462 rcu_read_unlock(); 463 } 464 465 /* When dgram socket disconnects (or changes its peer), we clear its receive 466 * queue of packets arrived from previous peer. First, it allows to do 467 * flow control based only on wmem_alloc; second, sk connected to peer 468 * may receive messages only from that peer. */ 469 static void unix_dgram_disconnected(struct sock *sk, struct sock *other) 470 { 471 if (!skb_queue_empty(&sk->sk_receive_queue)) { 472 skb_queue_purge(&sk->sk_receive_queue); 473 wake_up_interruptible_all(&unix_sk(sk)->peer_wait); 474 475 /* If one link of bidirectional dgram pipe is disconnected, 476 * we signal error. Messages are lost. Do not make this, 477 * when peer was not connected to us. 478 */ 479 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) { 480 other->sk_err = ECONNRESET; 481 other->sk_error_report(other); 482 } 483 } 484 } 485 486 static void unix_sock_destructor(struct sock *sk) 487 { 488 struct unix_sock *u = unix_sk(sk); 489 490 skb_queue_purge(&sk->sk_receive_queue); 491 492 WARN_ON(refcount_read(&sk->sk_wmem_alloc)); 493 WARN_ON(!sk_unhashed(sk)); 494 WARN_ON(sk->sk_socket); 495 if (!sock_flag(sk, SOCK_DEAD)) { 496 pr_info("Attempt to release alive unix socket: %p\n", sk); 497 return; 498 } 499 500 if (u->addr) 501 unix_release_addr(u->addr); 502 503 atomic_long_dec(&unix_nr_socks); 504 local_bh_disable(); 505 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 506 local_bh_enable(); 507 #ifdef UNIX_REFCNT_DEBUG 508 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk, 509 atomic_long_read(&unix_nr_socks)); 510 #endif 511 } 512 513 static void unix_release_sock(struct sock *sk, int embrion) 514 { 515 struct unix_sock *u = unix_sk(sk); 516 struct path path; 517 struct sock *skpair; 518 struct sk_buff *skb; 519 int state; 520 521 unix_remove_socket(sk); 522 523 /* Clear state */ 524 unix_state_lock(sk); 525 sock_orphan(sk); 526 sk->sk_shutdown = SHUTDOWN_MASK; 527 path = u->path; 528 u->path.dentry = NULL; 529 u->path.mnt = NULL; 530 state = sk->sk_state; 531 sk->sk_state = TCP_CLOSE; 532 unix_state_unlock(sk); 533 534 wake_up_interruptible_all(&u->peer_wait); 535 536 skpair = unix_peer(sk); 537 538 if (skpair != NULL) { 539 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { 540 unix_state_lock(skpair); 541 /* No more writes */ 542 skpair->sk_shutdown = SHUTDOWN_MASK; 543 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) 544 skpair->sk_err = ECONNRESET; 545 unix_state_unlock(skpair); 546 skpair->sk_state_change(skpair); 547 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); 548 } 549 550 unix_dgram_peer_wake_disconnect(sk, skpair); 551 sock_put(skpair); /* It may now die */ 552 unix_peer(sk) = NULL; 553 } 554 555 /* Try to flush out this socket. Throw out buffers at least */ 556 557 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { 558 if (state == TCP_LISTEN) 559 unix_release_sock(skb->sk, 1); 560 /* passed fds are erased in the kfree_skb hook */ 561 UNIXCB(skb).consumed = skb->len; 562 kfree_skb(skb); 563 } 564 565 if (path.dentry) 566 path_put(&path); 567 568 sock_put(sk); 569 570 /* ---- Socket is dead now and most probably destroyed ---- */ 571 572 /* 573 * Fixme: BSD difference: In BSD all sockets connected to us get 574 * ECONNRESET and we die on the spot. In Linux we behave 575 * like files and pipes do and wait for the last 576 * dereference. 577 * 578 * Can't we simply set sock->err? 579 * 580 * What the above comment does talk about? --ANK(980817) 581 */ 582 583 if (unix_tot_inflight) 584 unix_gc(); /* Garbage collect fds */ 585 } 586 587 static void init_peercred(struct sock *sk) 588 { 589 put_pid(sk->sk_peer_pid); 590 if (sk->sk_peer_cred) 591 put_cred(sk->sk_peer_cred); 592 sk->sk_peer_pid = get_pid(task_tgid(current)); 593 sk->sk_peer_cred = get_current_cred(); 594 } 595 596 static void copy_peercred(struct sock *sk, struct sock *peersk) 597 { 598 put_pid(sk->sk_peer_pid); 599 if (sk->sk_peer_cred) 600 put_cred(sk->sk_peer_cred); 601 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); 602 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); 603 } 604 605 static int unix_listen(struct socket *sock, int backlog) 606 { 607 int err; 608 struct sock *sk = sock->sk; 609 struct unix_sock *u = unix_sk(sk); 610 struct pid *old_pid = NULL; 611 612 err = -EOPNOTSUPP; 613 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 614 goto out; /* Only stream/seqpacket sockets accept */ 615 err = -EINVAL; 616 if (!u->addr) 617 goto out; /* No listens on an unbound socket */ 618 unix_state_lock(sk); 619 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) 620 goto out_unlock; 621 if (backlog > sk->sk_max_ack_backlog) 622 wake_up_interruptible_all(&u->peer_wait); 623 sk->sk_max_ack_backlog = backlog; 624 sk->sk_state = TCP_LISTEN; 625 /* set credentials so connect can copy them */ 626 init_peercred(sk); 627 err = 0; 628 629 out_unlock: 630 unix_state_unlock(sk); 631 put_pid(old_pid); 632 out: 633 return err; 634 } 635 636 static int unix_release(struct socket *); 637 static int unix_bind(struct socket *, struct sockaddr *, int); 638 static int unix_stream_connect(struct socket *, struct sockaddr *, 639 int addr_len, int flags); 640 static int unix_socketpair(struct socket *, struct socket *); 641 static int unix_accept(struct socket *, struct socket *, int, bool); 642 static int unix_getname(struct socket *, struct sockaddr *, int); 643 static __poll_t unix_poll(struct file *, struct socket *, poll_table *); 644 static __poll_t unix_dgram_poll(struct file *, struct socket *, 645 poll_table *); 646 static int unix_ioctl(struct socket *, unsigned int, unsigned long); 647 static int unix_shutdown(struct socket *, int); 648 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); 649 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int); 650 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset, 651 size_t size, int flags); 652 static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos, 653 struct pipe_inode_info *, size_t size, 654 unsigned int flags); 655 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t); 656 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); 657 static int unix_dgram_connect(struct socket *, struct sockaddr *, 658 int, int); 659 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); 660 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t, 661 int); 662 663 static int unix_set_peek_off(struct sock *sk, int val) 664 { 665 struct unix_sock *u = unix_sk(sk); 666 667 if (mutex_lock_interruptible(&u->iolock)) 668 return -EINTR; 669 670 sk->sk_peek_off = val; 671 mutex_unlock(&u->iolock); 672 673 return 0; 674 } 675 676 677 static const struct proto_ops unix_stream_ops = { 678 .family = PF_UNIX, 679 .owner = THIS_MODULE, 680 .release = unix_release, 681 .bind = unix_bind, 682 .connect = unix_stream_connect, 683 .socketpair = unix_socketpair, 684 .accept = unix_accept, 685 .getname = unix_getname, 686 .poll = unix_poll, 687 .ioctl = unix_ioctl, 688 .listen = unix_listen, 689 .shutdown = unix_shutdown, 690 .setsockopt = sock_no_setsockopt, 691 .getsockopt = sock_no_getsockopt, 692 .sendmsg = unix_stream_sendmsg, 693 .recvmsg = unix_stream_recvmsg, 694 .mmap = sock_no_mmap, 695 .sendpage = unix_stream_sendpage, 696 .splice_read = unix_stream_splice_read, 697 .set_peek_off = unix_set_peek_off, 698 }; 699 700 static const struct proto_ops unix_dgram_ops = { 701 .family = PF_UNIX, 702 .owner = THIS_MODULE, 703 .release = unix_release, 704 .bind = unix_bind, 705 .connect = unix_dgram_connect, 706 .socketpair = unix_socketpair, 707 .accept = sock_no_accept, 708 .getname = unix_getname, 709 .poll = unix_dgram_poll, 710 .ioctl = unix_ioctl, 711 .listen = sock_no_listen, 712 .shutdown = unix_shutdown, 713 .setsockopt = sock_no_setsockopt, 714 .getsockopt = sock_no_getsockopt, 715 .sendmsg = unix_dgram_sendmsg, 716 .recvmsg = unix_dgram_recvmsg, 717 .mmap = sock_no_mmap, 718 .sendpage = sock_no_sendpage, 719 .set_peek_off = unix_set_peek_off, 720 }; 721 722 static const struct proto_ops unix_seqpacket_ops = { 723 .family = PF_UNIX, 724 .owner = THIS_MODULE, 725 .release = unix_release, 726 .bind = unix_bind, 727 .connect = unix_stream_connect, 728 .socketpair = unix_socketpair, 729 .accept = unix_accept, 730 .getname = unix_getname, 731 .poll = unix_dgram_poll, 732 .ioctl = unix_ioctl, 733 .listen = unix_listen, 734 .shutdown = unix_shutdown, 735 .setsockopt = sock_no_setsockopt, 736 .getsockopt = sock_no_getsockopt, 737 .sendmsg = unix_seqpacket_sendmsg, 738 .recvmsg = unix_seqpacket_recvmsg, 739 .mmap = sock_no_mmap, 740 .sendpage = sock_no_sendpage, 741 .set_peek_off = unix_set_peek_off, 742 }; 743 744 static struct proto unix_proto = { 745 .name = "UNIX", 746 .owner = THIS_MODULE, 747 .obj_size = sizeof(struct unix_sock), 748 }; 749 750 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern) 751 { 752 struct sock *sk = NULL; 753 struct unix_sock *u; 754 755 atomic_long_inc(&unix_nr_socks); 756 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) 757 goto out; 758 759 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern); 760 if (!sk) 761 goto out; 762 763 sock_init_data(sock, sk); 764 765 sk->sk_allocation = GFP_KERNEL_ACCOUNT; 766 sk->sk_write_space = unix_write_space; 767 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; 768 sk->sk_destruct = unix_sock_destructor; 769 u = unix_sk(sk); 770 u->path.dentry = NULL; 771 u->path.mnt = NULL; 772 spin_lock_init(&u->lock); 773 atomic_long_set(&u->inflight, 0); 774 INIT_LIST_HEAD(&u->link); 775 mutex_init(&u->iolock); /* single task reading lock */ 776 mutex_init(&u->bindlock); /* single task binding lock */ 777 init_waitqueue_head(&u->peer_wait); 778 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); 779 unix_insert_socket(unix_sockets_unbound(sk), sk); 780 out: 781 if (sk == NULL) 782 atomic_long_dec(&unix_nr_socks); 783 else { 784 local_bh_disable(); 785 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 786 local_bh_enable(); 787 } 788 return sk; 789 } 790 791 static int unix_create(struct net *net, struct socket *sock, int protocol, 792 int kern) 793 { 794 if (protocol && protocol != PF_UNIX) 795 return -EPROTONOSUPPORT; 796 797 sock->state = SS_UNCONNECTED; 798 799 switch (sock->type) { 800 case SOCK_STREAM: 801 sock->ops = &unix_stream_ops; 802 break; 803 /* 804 * Believe it or not BSD has AF_UNIX, SOCK_RAW though 805 * nothing uses it. 806 */ 807 case SOCK_RAW: 808 sock->type = SOCK_DGRAM; 809 /* fall through */ 810 case SOCK_DGRAM: 811 sock->ops = &unix_dgram_ops; 812 break; 813 case SOCK_SEQPACKET: 814 sock->ops = &unix_seqpacket_ops; 815 break; 816 default: 817 return -ESOCKTNOSUPPORT; 818 } 819 820 return unix_create1(net, sock, kern) ? 0 : -ENOMEM; 821 } 822 823 static int unix_release(struct socket *sock) 824 { 825 struct sock *sk = sock->sk; 826 827 if (!sk) 828 return 0; 829 830 unix_release_sock(sk, 0); 831 sock->sk = NULL; 832 833 return 0; 834 } 835 836 static int unix_autobind(struct socket *sock) 837 { 838 struct sock *sk = sock->sk; 839 struct net *net = sock_net(sk); 840 struct unix_sock *u = unix_sk(sk); 841 static u32 ordernum = 1; 842 struct unix_address *addr; 843 int err; 844 unsigned int retries = 0; 845 846 err = mutex_lock_interruptible(&u->bindlock); 847 if (err) 848 return err; 849 850 err = 0; 851 if (u->addr) 852 goto out; 853 854 err = -ENOMEM; 855 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL); 856 if (!addr) 857 goto out; 858 859 addr->name->sun_family = AF_UNIX; 860 refcount_set(&addr->refcnt, 1); 861 862 retry: 863 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short); 864 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0)); 865 866 spin_lock(&unix_table_lock); 867 ordernum = (ordernum+1)&0xFFFFF; 868 869 if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type, 870 addr->hash)) { 871 spin_unlock(&unix_table_lock); 872 /* 873 * __unix_find_socket_byname() may take long time if many names 874 * are already in use. 875 */ 876 cond_resched(); 877 /* Give up if all names seems to be in use. */ 878 if (retries++ == 0xFFFFF) { 879 err = -ENOSPC; 880 kfree(addr); 881 goto out; 882 } 883 goto retry; 884 } 885 addr->hash ^= sk->sk_type; 886 887 __unix_remove_socket(sk); 888 smp_store_release(&u->addr, addr); 889 __unix_insert_socket(&unix_socket_table[addr->hash], sk); 890 spin_unlock(&unix_table_lock); 891 err = 0; 892 893 out: mutex_unlock(&u->bindlock); 894 return err; 895 } 896 897 static struct sock *unix_find_other(struct net *net, 898 struct sockaddr_un *sunname, int len, 899 int type, unsigned int hash, int *error) 900 { 901 struct sock *u; 902 struct path path; 903 int err = 0; 904 905 if (sunname->sun_path[0]) { 906 struct inode *inode; 907 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path); 908 if (err) 909 goto fail; 910 inode = d_backing_inode(path.dentry); 911 err = inode_permission(inode, MAY_WRITE); 912 if (err) 913 goto put_fail; 914 915 err = -ECONNREFUSED; 916 if (!S_ISSOCK(inode->i_mode)) 917 goto put_fail; 918 u = unix_find_socket_byinode(inode); 919 if (!u) 920 goto put_fail; 921 922 if (u->sk_type == type) 923 touch_atime(&path); 924 925 path_put(&path); 926 927 err = -EPROTOTYPE; 928 if (u->sk_type != type) { 929 sock_put(u); 930 goto fail; 931 } 932 } else { 933 err = -ECONNREFUSED; 934 u = unix_find_socket_byname(net, sunname, len, type, hash); 935 if (u) { 936 struct dentry *dentry; 937 dentry = unix_sk(u)->path.dentry; 938 if (dentry) 939 touch_atime(&unix_sk(u)->path); 940 } else 941 goto fail; 942 } 943 return u; 944 945 put_fail: 946 path_put(&path); 947 fail: 948 *error = err; 949 return NULL; 950 } 951 952 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res) 953 { 954 struct dentry *dentry; 955 struct path path; 956 int err = 0; 957 /* 958 * Get the parent directory, calculate the hash for last 959 * component. 960 */ 961 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); 962 err = PTR_ERR(dentry); 963 if (IS_ERR(dentry)) 964 return err; 965 966 /* 967 * All right, let's create it. 968 */ 969 err = security_path_mknod(&path, dentry, mode, 0); 970 if (!err) { 971 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0); 972 if (!err) { 973 res->mnt = mntget(path.mnt); 974 res->dentry = dget(dentry); 975 } 976 } 977 done_path_create(&path, dentry); 978 return err; 979 } 980 981 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 982 { 983 struct sock *sk = sock->sk; 984 struct net *net = sock_net(sk); 985 struct unix_sock *u = unix_sk(sk); 986 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 987 char *sun_path = sunaddr->sun_path; 988 int err; 989 unsigned int hash; 990 struct unix_address *addr; 991 struct hlist_head *list; 992 struct path path = { }; 993 994 err = -EINVAL; 995 if (addr_len < offsetofend(struct sockaddr_un, sun_family) || 996 sunaddr->sun_family != AF_UNIX) 997 goto out; 998 999 if (addr_len == sizeof(short)) { 1000 err = unix_autobind(sock); 1001 goto out; 1002 } 1003 1004 err = unix_mkname(sunaddr, addr_len, &hash); 1005 if (err < 0) 1006 goto out; 1007 addr_len = err; 1008 1009 if (sun_path[0]) { 1010 umode_t mode = S_IFSOCK | 1011 (SOCK_INODE(sock)->i_mode & ~current_umask()); 1012 err = unix_mknod(sun_path, mode, &path); 1013 if (err) { 1014 if (err == -EEXIST) 1015 err = -EADDRINUSE; 1016 goto out; 1017 } 1018 } 1019 1020 err = mutex_lock_interruptible(&u->bindlock); 1021 if (err) 1022 goto out_put; 1023 1024 err = -EINVAL; 1025 if (u->addr) 1026 goto out_up; 1027 1028 err = -ENOMEM; 1029 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL); 1030 if (!addr) 1031 goto out_up; 1032 1033 memcpy(addr->name, sunaddr, addr_len); 1034 addr->len = addr_len; 1035 addr->hash = hash ^ sk->sk_type; 1036 refcount_set(&addr->refcnt, 1); 1037 1038 if (sun_path[0]) { 1039 addr->hash = UNIX_HASH_SIZE; 1040 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1); 1041 spin_lock(&unix_table_lock); 1042 u->path = path; 1043 list = &unix_socket_table[hash]; 1044 } else { 1045 spin_lock(&unix_table_lock); 1046 err = -EADDRINUSE; 1047 if (__unix_find_socket_byname(net, sunaddr, addr_len, 1048 sk->sk_type, hash)) { 1049 unix_release_addr(addr); 1050 goto out_unlock; 1051 } 1052 1053 list = &unix_socket_table[addr->hash]; 1054 } 1055 1056 err = 0; 1057 __unix_remove_socket(sk); 1058 smp_store_release(&u->addr, addr); 1059 __unix_insert_socket(list, sk); 1060 1061 out_unlock: 1062 spin_unlock(&unix_table_lock); 1063 out_up: 1064 mutex_unlock(&u->bindlock); 1065 out_put: 1066 if (err) 1067 path_put(&path); 1068 out: 1069 return err; 1070 } 1071 1072 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) 1073 { 1074 if (unlikely(sk1 == sk2) || !sk2) { 1075 unix_state_lock(sk1); 1076 return; 1077 } 1078 if (sk1 < sk2) { 1079 unix_state_lock(sk1); 1080 unix_state_lock_nested(sk2); 1081 } else { 1082 unix_state_lock(sk2); 1083 unix_state_lock_nested(sk1); 1084 } 1085 } 1086 1087 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) 1088 { 1089 if (unlikely(sk1 == sk2) || !sk2) { 1090 unix_state_unlock(sk1); 1091 return; 1092 } 1093 unix_state_unlock(sk1); 1094 unix_state_unlock(sk2); 1095 } 1096 1097 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, 1098 int alen, int flags) 1099 { 1100 struct sock *sk = sock->sk; 1101 struct net *net = sock_net(sk); 1102 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; 1103 struct sock *other; 1104 unsigned int hash; 1105 int err; 1106 1107 err = -EINVAL; 1108 if (alen < offsetofend(struct sockaddr, sa_family)) 1109 goto out; 1110 1111 if (addr->sa_family != AF_UNSPEC) { 1112 err = unix_mkname(sunaddr, alen, &hash); 1113 if (err < 0) 1114 goto out; 1115 alen = err; 1116 1117 if (test_bit(SOCK_PASSCRED, &sock->flags) && 1118 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0) 1119 goto out; 1120 1121 restart: 1122 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err); 1123 if (!other) 1124 goto out; 1125 1126 unix_state_double_lock(sk, other); 1127 1128 /* Apparently VFS overslept socket death. Retry. */ 1129 if (sock_flag(other, SOCK_DEAD)) { 1130 unix_state_double_unlock(sk, other); 1131 sock_put(other); 1132 goto restart; 1133 } 1134 1135 err = -EPERM; 1136 if (!unix_may_send(sk, other)) 1137 goto out_unlock; 1138 1139 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 1140 if (err) 1141 goto out_unlock; 1142 1143 } else { 1144 /* 1145 * 1003.1g breaking connected state with AF_UNSPEC 1146 */ 1147 other = NULL; 1148 unix_state_double_lock(sk, other); 1149 } 1150 1151 /* 1152 * If it was connected, reconnect. 1153 */ 1154 if (unix_peer(sk)) { 1155 struct sock *old_peer = unix_peer(sk); 1156 unix_peer(sk) = other; 1157 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer); 1158 1159 unix_state_double_unlock(sk, other); 1160 1161 if (other != old_peer) 1162 unix_dgram_disconnected(sk, old_peer); 1163 sock_put(old_peer); 1164 } else { 1165 unix_peer(sk) = other; 1166 unix_state_double_unlock(sk, other); 1167 } 1168 return 0; 1169 1170 out_unlock: 1171 unix_state_double_unlock(sk, other); 1172 sock_put(other); 1173 out: 1174 return err; 1175 } 1176 1177 static long unix_wait_for_peer(struct sock *other, long timeo) 1178 { 1179 struct unix_sock *u = unix_sk(other); 1180 int sched; 1181 DEFINE_WAIT(wait); 1182 1183 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE); 1184 1185 sched = !sock_flag(other, SOCK_DEAD) && 1186 !(other->sk_shutdown & RCV_SHUTDOWN) && 1187 unix_recvq_full(other); 1188 1189 unix_state_unlock(other); 1190 1191 if (sched) 1192 timeo = schedule_timeout(timeo); 1193 1194 finish_wait(&u->peer_wait, &wait); 1195 return timeo; 1196 } 1197 1198 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, 1199 int addr_len, int flags) 1200 { 1201 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 1202 struct sock *sk = sock->sk; 1203 struct net *net = sock_net(sk); 1204 struct unix_sock *u = unix_sk(sk), *newu, *otheru; 1205 struct sock *newsk = NULL; 1206 struct sock *other = NULL; 1207 struct sk_buff *skb = NULL; 1208 unsigned int hash; 1209 int st; 1210 int err; 1211 long timeo; 1212 1213 err = unix_mkname(sunaddr, addr_len, &hash); 1214 if (err < 0) 1215 goto out; 1216 addr_len = err; 1217 1218 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr && 1219 (err = unix_autobind(sock)) != 0) 1220 goto out; 1221 1222 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); 1223 1224 /* First of all allocate resources. 1225 If we will make it after state is locked, 1226 we will have to recheck all again in any case. 1227 */ 1228 1229 err = -ENOMEM; 1230 1231 /* create new sock for complete connection */ 1232 newsk = unix_create1(sock_net(sk), NULL, 0); 1233 if (newsk == NULL) 1234 goto out; 1235 1236 /* Allocate skb for sending to listening sock */ 1237 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL); 1238 if (skb == NULL) 1239 goto out; 1240 1241 restart: 1242 /* Find listening sock. */ 1243 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err); 1244 if (!other) 1245 goto out; 1246 1247 /* Latch state of peer */ 1248 unix_state_lock(other); 1249 1250 /* Apparently VFS overslept socket death. Retry. */ 1251 if (sock_flag(other, SOCK_DEAD)) { 1252 unix_state_unlock(other); 1253 sock_put(other); 1254 goto restart; 1255 } 1256 1257 err = -ECONNREFUSED; 1258 if (other->sk_state != TCP_LISTEN) 1259 goto out_unlock; 1260 if (other->sk_shutdown & RCV_SHUTDOWN) 1261 goto out_unlock; 1262 1263 if (unix_recvq_full(other)) { 1264 err = -EAGAIN; 1265 if (!timeo) 1266 goto out_unlock; 1267 1268 timeo = unix_wait_for_peer(other, timeo); 1269 1270 err = sock_intr_errno(timeo); 1271 if (signal_pending(current)) 1272 goto out; 1273 sock_put(other); 1274 goto restart; 1275 } 1276 1277 /* Latch our state. 1278 1279 It is tricky place. We need to grab our state lock and cannot 1280 drop lock on peer. It is dangerous because deadlock is 1281 possible. Connect to self case and simultaneous 1282 attempt to connect are eliminated by checking socket 1283 state. other is TCP_LISTEN, if sk is TCP_LISTEN we 1284 check this before attempt to grab lock. 1285 1286 Well, and we have to recheck the state after socket locked. 1287 */ 1288 st = sk->sk_state; 1289 1290 switch (st) { 1291 case TCP_CLOSE: 1292 /* This is ok... continue with connect */ 1293 break; 1294 case TCP_ESTABLISHED: 1295 /* Socket is already connected */ 1296 err = -EISCONN; 1297 goto out_unlock; 1298 default: 1299 err = -EINVAL; 1300 goto out_unlock; 1301 } 1302 1303 unix_state_lock_nested(sk); 1304 1305 if (sk->sk_state != st) { 1306 unix_state_unlock(sk); 1307 unix_state_unlock(other); 1308 sock_put(other); 1309 goto restart; 1310 } 1311 1312 err = security_unix_stream_connect(sk, other, newsk); 1313 if (err) { 1314 unix_state_unlock(sk); 1315 goto out_unlock; 1316 } 1317 1318 /* The way is open! Fastly set all the necessary fields... */ 1319 1320 sock_hold(sk); 1321 unix_peer(newsk) = sk; 1322 newsk->sk_state = TCP_ESTABLISHED; 1323 newsk->sk_type = sk->sk_type; 1324 init_peercred(newsk); 1325 newu = unix_sk(newsk); 1326 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); 1327 otheru = unix_sk(other); 1328 1329 /* copy address information from listening to new sock 1330 * 1331 * The contents of *(otheru->addr) and otheru->path 1332 * are seen fully set up here, since we have found 1333 * otheru in hash under unix_table_lock. Insertion 1334 * into the hash chain we'd found it in had been done 1335 * in an earlier critical area protected by unix_table_lock, 1336 * the same one where we'd set *(otheru->addr) contents, 1337 * as well as otheru->path and otheru->addr itself. 1338 * 1339 * Using smp_store_release() here to set newu->addr 1340 * is enough to make those stores, as well as stores 1341 * to newu->path visible to anyone who gets newu->addr 1342 * by smp_load_acquire(). IOW, the same warranties 1343 * as for unix_sock instances bound in unix_bind() or 1344 * in unix_autobind(). 1345 */ 1346 if (otheru->path.dentry) { 1347 path_get(&otheru->path); 1348 newu->path = otheru->path; 1349 } 1350 refcount_inc(&otheru->addr->refcnt); 1351 smp_store_release(&newu->addr, otheru->addr); 1352 1353 /* Set credentials */ 1354 copy_peercred(sk, other); 1355 1356 sock->state = SS_CONNECTED; 1357 sk->sk_state = TCP_ESTABLISHED; 1358 sock_hold(newsk); 1359 1360 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */ 1361 unix_peer(sk) = newsk; 1362 1363 unix_state_unlock(sk); 1364 1365 /* take ten and and send info to listening sock */ 1366 spin_lock(&other->sk_receive_queue.lock); 1367 __skb_queue_tail(&other->sk_receive_queue, skb); 1368 spin_unlock(&other->sk_receive_queue.lock); 1369 unix_state_unlock(other); 1370 other->sk_data_ready(other); 1371 sock_put(other); 1372 return 0; 1373 1374 out_unlock: 1375 if (other) 1376 unix_state_unlock(other); 1377 1378 out: 1379 kfree_skb(skb); 1380 if (newsk) 1381 unix_release_sock(newsk, 0); 1382 if (other) 1383 sock_put(other); 1384 return err; 1385 } 1386 1387 static int unix_socketpair(struct socket *socka, struct socket *sockb) 1388 { 1389 struct sock *ska = socka->sk, *skb = sockb->sk; 1390 1391 /* Join our sockets back to back */ 1392 sock_hold(ska); 1393 sock_hold(skb); 1394 unix_peer(ska) = skb; 1395 unix_peer(skb) = ska; 1396 init_peercred(ska); 1397 init_peercred(skb); 1398 1399 if (ska->sk_type != SOCK_DGRAM) { 1400 ska->sk_state = TCP_ESTABLISHED; 1401 skb->sk_state = TCP_ESTABLISHED; 1402 socka->state = SS_CONNECTED; 1403 sockb->state = SS_CONNECTED; 1404 } 1405 return 0; 1406 } 1407 1408 static void unix_sock_inherit_flags(const struct socket *old, 1409 struct socket *new) 1410 { 1411 if (test_bit(SOCK_PASSCRED, &old->flags)) 1412 set_bit(SOCK_PASSCRED, &new->flags); 1413 if (test_bit(SOCK_PASSSEC, &old->flags)) 1414 set_bit(SOCK_PASSSEC, &new->flags); 1415 } 1416 1417 static int unix_accept(struct socket *sock, struct socket *newsock, int flags, 1418 bool kern) 1419 { 1420 struct sock *sk = sock->sk; 1421 struct sock *tsk; 1422 struct sk_buff *skb; 1423 int err; 1424 1425 err = -EOPNOTSUPP; 1426 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 1427 goto out; 1428 1429 err = -EINVAL; 1430 if (sk->sk_state != TCP_LISTEN) 1431 goto out; 1432 1433 /* If socket state is TCP_LISTEN it cannot change (for now...), 1434 * so that no locks are necessary. 1435 */ 1436 1437 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err); 1438 if (!skb) { 1439 /* This means receive shutdown. */ 1440 if (err == 0) 1441 err = -EINVAL; 1442 goto out; 1443 } 1444 1445 tsk = skb->sk; 1446 skb_free_datagram(sk, skb); 1447 wake_up_interruptible(&unix_sk(sk)->peer_wait); 1448 1449 /* attach accepted sock to socket */ 1450 unix_state_lock(tsk); 1451 newsock->state = SS_CONNECTED; 1452 unix_sock_inherit_flags(sock, newsock); 1453 sock_graft(tsk, newsock); 1454 unix_state_unlock(tsk); 1455 return 0; 1456 1457 out: 1458 return err; 1459 } 1460 1461 1462 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer) 1463 { 1464 struct sock *sk = sock->sk; 1465 struct unix_address *addr; 1466 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr); 1467 int err = 0; 1468 1469 if (peer) { 1470 sk = unix_peer_get(sk); 1471 1472 err = -ENOTCONN; 1473 if (!sk) 1474 goto out; 1475 err = 0; 1476 } else { 1477 sock_hold(sk); 1478 } 1479 1480 addr = smp_load_acquire(&unix_sk(sk)->addr); 1481 if (!addr) { 1482 sunaddr->sun_family = AF_UNIX; 1483 sunaddr->sun_path[0] = 0; 1484 err = sizeof(short); 1485 } else { 1486 err = addr->len; 1487 memcpy(sunaddr, addr->name, addr->len); 1488 } 1489 sock_put(sk); 1490 out: 1491 return err; 1492 } 1493 1494 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) 1495 { 1496 int err = 0; 1497 1498 UNIXCB(skb).pid = get_pid(scm->pid); 1499 UNIXCB(skb).uid = scm->creds.uid; 1500 UNIXCB(skb).gid = scm->creds.gid; 1501 UNIXCB(skb).fp = NULL; 1502 unix_get_secdata(scm, skb); 1503 if (scm->fp && send_fds) 1504 err = unix_attach_fds(scm, skb); 1505 1506 skb->destructor = unix_destruct_scm; 1507 return err; 1508 } 1509 1510 static bool unix_passcred_enabled(const struct socket *sock, 1511 const struct sock *other) 1512 { 1513 return test_bit(SOCK_PASSCRED, &sock->flags) || 1514 !other->sk_socket || 1515 test_bit(SOCK_PASSCRED, &other->sk_socket->flags); 1516 } 1517 1518 /* 1519 * Some apps rely on write() giving SCM_CREDENTIALS 1520 * We include credentials if source or destination socket 1521 * asserted SOCK_PASSCRED. 1522 */ 1523 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, 1524 const struct sock *other) 1525 { 1526 if (UNIXCB(skb).pid) 1527 return; 1528 if (unix_passcred_enabled(sock, other)) { 1529 UNIXCB(skb).pid = get_pid(task_tgid(current)); 1530 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); 1531 } 1532 } 1533 1534 static int maybe_init_creds(struct scm_cookie *scm, 1535 struct socket *socket, 1536 const struct sock *other) 1537 { 1538 int err; 1539 struct msghdr msg = { .msg_controllen = 0 }; 1540 1541 err = scm_send(socket, &msg, scm, false); 1542 if (err) 1543 return err; 1544 1545 if (unix_passcred_enabled(socket, other)) { 1546 scm->pid = get_pid(task_tgid(current)); 1547 current_uid_gid(&scm->creds.uid, &scm->creds.gid); 1548 } 1549 return err; 1550 } 1551 1552 static bool unix_skb_scm_eq(struct sk_buff *skb, 1553 struct scm_cookie *scm) 1554 { 1555 const struct unix_skb_parms *u = &UNIXCB(skb); 1556 1557 return u->pid == scm->pid && 1558 uid_eq(u->uid, scm->creds.uid) && 1559 gid_eq(u->gid, scm->creds.gid) && 1560 unix_secdata_eq(scm, skb); 1561 } 1562 1563 /* 1564 * Send AF_UNIX data. 1565 */ 1566 1567 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, 1568 size_t len) 1569 { 1570 struct sock *sk = sock->sk; 1571 struct net *net = sock_net(sk); 1572 struct unix_sock *u = unix_sk(sk); 1573 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); 1574 struct sock *other = NULL; 1575 int namelen = 0; /* fake GCC */ 1576 int err; 1577 unsigned int hash; 1578 struct sk_buff *skb; 1579 long timeo; 1580 struct scm_cookie scm; 1581 int data_len = 0; 1582 int sk_locked; 1583 1584 wait_for_unix_gc(); 1585 err = scm_send(sock, msg, &scm, false); 1586 if (err < 0) 1587 return err; 1588 1589 err = -EOPNOTSUPP; 1590 if (msg->msg_flags&MSG_OOB) 1591 goto out; 1592 1593 if (msg->msg_namelen) { 1594 err = unix_mkname(sunaddr, msg->msg_namelen, &hash); 1595 if (err < 0) 1596 goto out; 1597 namelen = err; 1598 } else { 1599 sunaddr = NULL; 1600 err = -ENOTCONN; 1601 other = unix_peer_get(sk); 1602 if (!other) 1603 goto out; 1604 } 1605 1606 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr 1607 && (err = unix_autobind(sock)) != 0) 1608 goto out; 1609 1610 err = -EMSGSIZE; 1611 if (len > sk->sk_sndbuf - 32) 1612 goto out; 1613 1614 if (len > SKB_MAX_ALLOC) { 1615 data_len = min_t(size_t, 1616 len - SKB_MAX_ALLOC, 1617 MAX_SKB_FRAGS * PAGE_SIZE); 1618 data_len = PAGE_ALIGN(data_len); 1619 1620 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE); 1621 } 1622 1623 skb = sock_alloc_send_pskb(sk, len - data_len, data_len, 1624 msg->msg_flags & MSG_DONTWAIT, &err, 1625 PAGE_ALLOC_COSTLY_ORDER); 1626 if (skb == NULL) 1627 goto out; 1628 1629 err = unix_scm_to_skb(&scm, skb, true); 1630 if (err < 0) 1631 goto out_free; 1632 1633 skb_put(skb, len - data_len); 1634 skb->data_len = data_len; 1635 skb->len = len; 1636 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len); 1637 if (err) 1638 goto out_free; 1639 1640 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); 1641 1642 restart: 1643 if (!other) { 1644 err = -ECONNRESET; 1645 if (sunaddr == NULL) 1646 goto out_free; 1647 1648 other = unix_find_other(net, sunaddr, namelen, sk->sk_type, 1649 hash, &err); 1650 if (other == NULL) 1651 goto out_free; 1652 } 1653 1654 if (sk_filter(other, skb) < 0) { 1655 /* Toss the packet but do not return any error to the sender */ 1656 err = len; 1657 goto out_free; 1658 } 1659 1660 sk_locked = 0; 1661 unix_state_lock(other); 1662 restart_locked: 1663 err = -EPERM; 1664 if (!unix_may_send(sk, other)) 1665 goto out_unlock; 1666 1667 if (unlikely(sock_flag(other, SOCK_DEAD))) { 1668 /* 1669 * Check with 1003.1g - what should 1670 * datagram error 1671 */ 1672 unix_state_unlock(other); 1673 sock_put(other); 1674 1675 if (!sk_locked) 1676 unix_state_lock(sk); 1677 1678 err = 0; 1679 if (unix_peer(sk) == other) { 1680 unix_peer(sk) = NULL; 1681 unix_dgram_peer_wake_disconnect_wakeup(sk, other); 1682 1683 unix_state_unlock(sk); 1684 1685 unix_dgram_disconnected(sk, other); 1686 sock_put(other); 1687 err = -ECONNREFUSED; 1688 } else { 1689 unix_state_unlock(sk); 1690 } 1691 1692 other = NULL; 1693 if (err) 1694 goto out_free; 1695 goto restart; 1696 } 1697 1698 err = -EPIPE; 1699 if (other->sk_shutdown & RCV_SHUTDOWN) 1700 goto out_unlock; 1701 1702 if (sk->sk_type != SOCK_SEQPACKET) { 1703 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 1704 if (err) 1705 goto out_unlock; 1706 } 1707 1708 /* other == sk && unix_peer(other) != sk if 1709 * - unix_peer(sk) == NULL, destination address bound to sk 1710 * - unix_peer(sk) == sk by time of get but disconnected before lock 1711 */ 1712 if (other != sk && 1713 unlikely(unix_peer(other) != sk && unix_recvq_full(other))) { 1714 if (timeo) { 1715 timeo = unix_wait_for_peer(other, timeo); 1716 1717 err = sock_intr_errno(timeo); 1718 if (signal_pending(current)) 1719 goto out_free; 1720 1721 goto restart; 1722 } 1723 1724 if (!sk_locked) { 1725 unix_state_unlock(other); 1726 unix_state_double_lock(sk, other); 1727 } 1728 1729 if (unix_peer(sk) != other || 1730 unix_dgram_peer_wake_me(sk, other)) { 1731 err = -EAGAIN; 1732 sk_locked = 1; 1733 goto out_unlock; 1734 } 1735 1736 if (!sk_locked) { 1737 sk_locked = 1; 1738 goto restart_locked; 1739 } 1740 } 1741 1742 if (unlikely(sk_locked)) 1743 unix_state_unlock(sk); 1744 1745 if (sock_flag(other, SOCK_RCVTSTAMP)) 1746 __net_timestamp(skb); 1747 maybe_add_creds(skb, sock, other); 1748 skb_queue_tail(&other->sk_receive_queue, skb); 1749 unix_state_unlock(other); 1750 other->sk_data_ready(other); 1751 sock_put(other); 1752 scm_destroy(&scm); 1753 return len; 1754 1755 out_unlock: 1756 if (sk_locked) 1757 unix_state_unlock(sk); 1758 unix_state_unlock(other); 1759 out_free: 1760 kfree_skb(skb); 1761 out: 1762 if (other) 1763 sock_put(other); 1764 scm_destroy(&scm); 1765 return err; 1766 } 1767 1768 /* We use paged skbs for stream sockets, and limit occupancy to 32768 1769 * bytes, and a minimum of a full page. 1770 */ 1771 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) 1772 1773 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, 1774 size_t len) 1775 { 1776 struct sock *sk = sock->sk; 1777 struct sock *other = NULL; 1778 int err, size; 1779 struct sk_buff *skb; 1780 int sent = 0; 1781 struct scm_cookie scm; 1782 bool fds_sent = false; 1783 int data_len; 1784 1785 wait_for_unix_gc(); 1786 err = scm_send(sock, msg, &scm, false); 1787 if (err < 0) 1788 return err; 1789 1790 err = -EOPNOTSUPP; 1791 if (msg->msg_flags&MSG_OOB) 1792 goto out_err; 1793 1794 if (msg->msg_namelen) { 1795 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; 1796 goto out_err; 1797 } else { 1798 err = -ENOTCONN; 1799 other = unix_peer(sk); 1800 if (!other) 1801 goto out_err; 1802 } 1803 1804 if (sk->sk_shutdown & SEND_SHUTDOWN) 1805 goto pipe_err; 1806 1807 while (sent < len) { 1808 size = len - sent; 1809 1810 /* Keep two messages in the pipe so it schedules better */ 1811 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64); 1812 1813 /* allow fallback to order-0 allocations */ 1814 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ); 1815 1816 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0)); 1817 1818 data_len = min_t(size_t, size, PAGE_ALIGN(data_len)); 1819 1820 skb = sock_alloc_send_pskb(sk, size - data_len, data_len, 1821 msg->msg_flags & MSG_DONTWAIT, &err, 1822 get_order(UNIX_SKB_FRAGS_SZ)); 1823 if (!skb) 1824 goto out_err; 1825 1826 /* Only send the fds in the first buffer */ 1827 err = unix_scm_to_skb(&scm, skb, !fds_sent); 1828 if (err < 0) { 1829 kfree_skb(skb); 1830 goto out_err; 1831 } 1832 fds_sent = true; 1833 1834 skb_put(skb, size - data_len); 1835 skb->data_len = data_len; 1836 skb->len = size; 1837 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); 1838 if (err) { 1839 kfree_skb(skb); 1840 goto out_err; 1841 } 1842 1843 unix_state_lock(other); 1844 1845 if (sock_flag(other, SOCK_DEAD) || 1846 (other->sk_shutdown & RCV_SHUTDOWN)) 1847 goto pipe_err_free; 1848 1849 maybe_add_creds(skb, sock, other); 1850 skb_queue_tail(&other->sk_receive_queue, skb); 1851 unix_state_unlock(other); 1852 other->sk_data_ready(other); 1853 sent += size; 1854 } 1855 1856 scm_destroy(&scm); 1857 1858 return sent; 1859 1860 pipe_err_free: 1861 unix_state_unlock(other); 1862 kfree_skb(skb); 1863 pipe_err: 1864 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL)) 1865 send_sig(SIGPIPE, current, 0); 1866 err = -EPIPE; 1867 out_err: 1868 scm_destroy(&scm); 1869 return sent ? : err; 1870 } 1871 1872 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, 1873 int offset, size_t size, int flags) 1874 { 1875 int err; 1876 bool send_sigpipe = false; 1877 bool init_scm = true; 1878 struct scm_cookie scm; 1879 struct sock *other, *sk = socket->sk; 1880 struct sk_buff *skb, *newskb = NULL, *tail = NULL; 1881 1882 if (flags & MSG_OOB) 1883 return -EOPNOTSUPP; 1884 1885 other = unix_peer(sk); 1886 if (!other || sk->sk_state != TCP_ESTABLISHED) 1887 return -ENOTCONN; 1888 1889 if (false) { 1890 alloc_skb: 1891 unix_state_unlock(other); 1892 mutex_unlock(&unix_sk(other)->iolock); 1893 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT, 1894 &err, 0); 1895 if (!newskb) 1896 goto err; 1897 } 1898 1899 /* we must acquire iolock as we modify already present 1900 * skbs in the sk_receive_queue and mess with skb->len 1901 */ 1902 err = mutex_lock_interruptible(&unix_sk(other)->iolock); 1903 if (err) { 1904 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS; 1905 goto err; 1906 } 1907 1908 if (sk->sk_shutdown & SEND_SHUTDOWN) { 1909 err = -EPIPE; 1910 send_sigpipe = true; 1911 goto err_unlock; 1912 } 1913 1914 unix_state_lock(other); 1915 1916 if (sock_flag(other, SOCK_DEAD) || 1917 other->sk_shutdown & RCV_SHUTDOWN) { 1918 err = -EPIPE; 1919 send_sigpipe = true; 1920 goto err_state_unlock; 1921 } 1922 1923 if (init_scm) { 1924 err = maybe_init_creds(&scm, socket, other); 1925 if (err) 1926 goto err_state_unlock; 1927 init_scm = false; 1928 } 1929 1930 skb = skb_peek_tail(&other->sk_receive_queue); 1931 if (tail && tail == skb) { 1932 skb = newskb; 1933 } else if (!skb || !unix_skb_scm_eq(skb, &scm)) { 1934 if (newskb) { 1935 skb = newskb; 1936 } else { 1937 tail = skb; 1938 goto alloc_skb; 1939 } 1940 } else if (newskb) { 1941 /* this is fast path, we don't necessarily need to 1942 * call to kfree_skb even though with newskb == NULL 1943 * this - does no harm 1944 */ 1945 consume_skb(newskb); 1946 newskb = NULL; 1947 } 1948 1949 if (skb_append_pagefrags(skb, page, offset, size)) { 1950 tail = skb; 1951 goto alloc_skb; 1952 } 1953 1954 skb->len += size; 1955 skb->data_len += size; 1956 skb->truesize += size; 1957 refcount_add(size, &sk->sk_wmem_alloc); 1958 1959 if (newskb) { 1960 err = unix_scm_to_skb(&scm, skb, false); 1961 if (err) 1962 goto err_state_unlock; 1963 spin_lock(&other->sk_receive_queue.lock); 1964 __skb_queue_tail(&other->sk_receive_queue, newskb); 1965 spin_unlock(&other->sk_receive_queue.lock); 1966 } 1967 1968 unix_state_unlock(other); 1969 mutex_unlock(&unix_sk(other)->iolock); 1970 1971 other->sk_data_ready(other); 1972 scm_destroy(&scm); 1973 return size; 1974 1975 err_state_unlock: 1976 unix_state_unlock(other); 1977 err_unlock: 1978 mutex_unlock(&unix_sk(other)->iolock); 1979 err: 1980 kfree_skb(newskb); 1981 if (send_sigpipe && !(flags & MSG_NOSIGNAL)) 1982 send_sig(SIGPIPE, current, 0); 1983 if (!init_scm) 1984 scm_destroy(&scm); 1985 return err; 1986 } 1987 1988 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg, 1989 size_t len) 1990 { 1991 int err; 1992 struct sock *sk = sock->sk; 1993 1994 err = sock_error(sk); 1995 if (err) 1996 return err; 1997 1998 if (sk->sk_state != TCP_ESTABLISHED) 1999 return -ENOTCONN; 2000 2001 if (msg->msg_namelen) 2002 msg->msg_namelen = 0; 2003 2004 return unix_dgram_sendmsg(sock, msg, len); 2005 } 2006 2007 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg, 2008 size_t size, int flags) 2009 { 2010 struct sock *sk = sock->sk; 2011 2012 if (sk->sk_state != TCP_ESTABLISHED) 2013 return -ENOTCONN; 2014 2015 return unix_dgram_recvmsg(sock, msg, size, flags); 2016 } 2017 2018 static void unix_copy_addr(struct msghdr *msg, struct sock *sk) 2019 { 2020 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr); 2021 2022 if (addr) { 2023 msg->msg_namelen = addr->len; 2024 memcpy(msg->msg_name, addr->name, addr->len); 2025 } 2026 } 2027 2028 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, 2029 size_t size, int flags) 2030 { 2031 struct scm_cookie scm; 2032 struct sock *sk = sock->sk; 2033 struct unix_sock *u = unix_sk(sk); 2034 struct sk_buff *skb, *last; 2035 long timeo; 2036 int skip; 2037 int err; 2038 2039 err = -EOPNOTSUPP; 2040 if (flags&MSG_OOB) 2041 goto out; 2042 2043 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 2044 2045 do { 2046 mutex_lock(&u->iolock); 2047 2048 skip = sk_peek_offset(sk, flags); 2049 skb = __skb_try_recv_datagram(sk, flags, NULL, &skip, &err, 2050 &last); 2051 if (skb) 2052 break; 2053 2054 mutex_unlock(&u->iolock); 2055 2056 if (err != -EAGAIN) 2057 break; 2058 } while (timeo && 2059 !__skb_wait_for_more_packets(sk, &err, &timeo, last)); 2060 2061 if (!skb) { /* implies iolock unlocked */ 2062 unix_state_lock(sk); 2063 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */ 2064 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN && 2065 (sk->sk_shutdown & RCV_SHUTDOWN)) 2066 err = 0; 2067 unix_state_unlock(sk); 2068 goto out; 2069 } 2070 2071 if (wq_has_sleeper(&u->peer_wait)) 2072 wake_up_interruptible_sync_poll(&u->peer_wait, 2073 EPOLLOUT | EPOLLWRNORM | 2074 EPOLLWRBAND); 2075 2076 if (msg->msg_name) 2077 unix_copy_addr(msg, skb->sk); 2078 2079 if (size > skb->len - skip) 2080 size = skb->len - skip; 2081 else if (size < skb->len - skip) 2082 msg->msg_flags |= MSG_TRUNC; 2083 2084 err = skb_copy_datagram_msg(skb, skip, msg, size); 2085 if (err) 2086 goto out_free; 2087 2088 if (sock_flag(sk, SOCK_RCVTSTAMP)) 2089 __sock_recv_timestamp(msg, sk, skb); 2090 2091 memset(&scm, 0, sizeof(scm)); 2092 2093 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2094 unix_set_secdata(&scm, skb); 2095 2096 if (!(flags & MSG_PEEK)) { 2097 if (UNIXCB(skb).fp) 2098 unix_detach_fds(&scm, skb); 2099 2100 sk_peek_offset_bwd(sk, skb->len); 2101 } else { 2102 /* It is questionable: on PEEK we could: 2103 - do not return fds - good, but too simple 8) 2104 - return fds, and do not return them on read (old strategy, 2105 apparently wrong) 2106 - clone fds (I chose it for now, it is the most universal 2107 solution) 2108 2109 POSIX 1003.1g does not actually define this clearly 2110 at all. POSIX 1003.1g doesn't define a lot of things 2111 clearly however! 2112 2113 */ 2114 2115 sk_peek_offset_fwd(sk, size); 2116 2117 if (UNIXCB(skb).fp) 2118 scm.fp = scm_fp_dup(UNIXCB(skb).fp); 2119 } 2120 err = (flags & MSG_TRUNC) ? skb->len - skip : size; 2121 2122 scm_recv(sock, msg, &scm, flags); 2123 2124 out_free: 2125 skb_free_datagram(sk, skb); 2126 mutex_unlock(&u->iolock); 2127 out: 2128 return err; 2129 } 2130 2131 /* 2132 * Sleep until more data has arrived. But check for races.. 2133 */ 2134 static long unix_stream_data_wait(struct sock *sk, long timeo, 2135 struct sk_buff *last, unsigned int last_len, 2136 bool freezable) 2137 { 2138 struct sk_buff *tail; 2139 DEFINE_WAIT(wait); 2140 2141 unix_state_lock(sk); 2142 2143 for (;;) { 2144 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 2145 2146 tail = skb_peek_tail(&sk->sk_receive_queue); 2147 if (tail != last || 2148 (tail && tail->len != last_len) || 2149 sk->sk_err || 2150 (sk->sk_shutdown & RCV_SHUTDOWN) || 2151 signal_pending(current) || 2152 !timeo) 2153 break; 2154 2155 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2156 unix_state_unlock(sk); 2157 if (freezable) 2158 timeo = freezable_schedule_timeout(timeo); 2159 else 2160 timeo = schedule_timeout(timeo); 2161 unix_state_lock(sk); 2162 2163 if (sock_flag(sk, SOCK_DEAD)) 2164 break; 2165 2166 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2167 } 2168 2169 finish_wait(sk_sleep(sk), &wait); 2170 unix_state_unlock(sk); 2171 return timeo; 2172 } 2173 2174 static unsigned int unix_skb_len(const struct sk_buff *skb) 2175 { 2176 return skb->len - UNIXCB(skb).consumed; 2177 } 2178 2179 struct unix_stream_read_state { 2180 int (*recv_actor)(struct sk_buff *, int, int, 2181 struct unix_stream_read_state *); 2182 struct socket *socket; 2183 struct msghdr *msg; 2184 struct pipe_inode_info *pipe; 2185 size_t size; 2186 int flags; 2187 unsigned int splice_flags; 2188 }; 2189 2190 static int unix_stream_read_generic(struct unix_stream_read_state *state, 2191 bool freezable) 2192 { 2193 struct scm_cookie scm; 2194 struct socket *sock = state->socket; 2195 struct sock *sk = sock->sk; 2196 struct unix_sock *u = unix_sk(sk); 2197 int copied = 0; 2198 int flags = state->flags; 2199 int noblock = flags & MSG_DONTWAIT; 2200 bool check_creds = false; 2201 int target; 2202 int err = 0; 2203 long timeo; 2204 int skip; 2205 size_t size = state->size; 2206 unsigned int last_len; 2207 2208 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) { 2209 err = -EINVAL; 2210 goto out; 2211 } 2212 2213 if (unlikely(flags & MSG_OOB)) { 2214 err = -EOPNOTSUPP; 2215 goto out; 2216 } 2217 2218 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); 2219 timeo = sock_rcvtimeo(sk, noblock); 2220 2221 memset(&scm, 0, sizeof(scm)); 2222 2223 /* Lock the socket to prevent queue disordering 2224 * while sleeps in memcpy_tomsg 2225 */ 2226 mutex_lock(&u->iolock); 2227 2228 skip = max(sk_peek_offset(sk, flags), 0); 2229 2230 do { 2231 int chunk; 2232 bool drop_skb; 2233 struct sk_buff *skb, *last; 2234 2235 redo: 2236 unix_state_lock(sk); 2237 if (sock_flag(sk, SOCK_DEAD)) { 2238 err = -ECONNRESET; 2239 goto unlock; 2240 } 2241 last = skb = skb_peek(&sk->sk_receive_queue); 2242 last_len = last ? last->len : 0; 2243 again: 2244 if (skb == NULL) { 2245 if (copied >= target) 2246 goto unlock; 2247 2248 /* 2249 * POSIX 1003.1g mandates this order. 2250 */ 2251 2252 err = sock_error(sk); 2253 if (err) 2254 goto unlock; 2255 if (sk->sk_shutdown & RCV_SHUTDOWN) 2256 goto unlock; 2257 2258 unix_state_unlock(sk); 2259 if (!timeo) { 2260 err = -EAGAIN; 2261 break; 2262 } 2263 2264 mutex_unlock(&u->iolock); 2265 2266 timeo = unix_stream_data_wait(sk, timeo, last, 2267 last_len, freezable); 2268 2269 if (signal_pending(current)) { 2270 err = sock_intr_errno(timeo); 2271 scm_destroy(&scm); 2272 goto out; 2273 } 2274 2275 mutex_lock(&u->iolock); 2276 goto redo; 2277 unlock: 2278 unix_state_unlock(sk); 2279 break; 2280 } 2281 2282 while (skip >= unix_skb_len(skb)) { 2283 skip -= unix_skb_len(skb); 2284 last = skb; 2285 last_len = skb->len; 2286 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2287 if (!skb) 2288 goto again; 2289 } 2290 2291 unix_state_unlock(sk); 2292 2293 if (check_creds) { 2294 /* Never glue messages from different writers */ 2295 if (!unix_skb_scm_eq(skb, &scm)) 2296 break; 2297 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { 2298 /* Copy credentials */ 2299 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2300 unix_set_secdata(&scm, skb); 2301 check_creds = true; 2302 } 2303 2304 /* Copy address just once */ 2305 if (state->msg && state->msg->msg_name) { 2306 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, 2307 state->msg->msg_name); 2308 unix_copy_addr(state->msg, skb->sk); 2309 sunaddr = NULL; 2310 } 2311 2312 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size); 2313 skb_get(skb); 2314 chunk = state->recv_actor(skb, skip, chunk, state); 2315 drop_skb = !unix_skb_len(skb); 2316 /* skb is only safe to use if !drop_skb */ 2317 consume_skb(skb); 2318 if (chunk < 0) { 2319 if (copied == 0) 2320 copied = -EFAULT; 2321 break; 2322 } 2323 copied += chunk; 2324 size -= chunk; 2325 2326 if (drop_skb) { 2327 /* the skb was touched by a concurrent reader; 2328 * we should not expect anything from this skb 2329 * anymore and assume it invalid - we can be 2330 * sure it was dropped from the socket queue 2331 * 2332 * let's report a short read 2333 */ 2334 err = 0; 2335 break; 2336 } 2337 2338 /* Mark read part of skb as used */ 2339 if (!(flags & MSG_PEEK)) { 2340 UNIXCB(skb).consumed += chunk; 2341 2342 sk_peek_offset_bwd(sk, chunk); 2343 2344 if (UNIXCB(skb).fp) 2345 unix_detach_fds(&scm, skb); 2346 2347 if (unix_skb_len(skb)) 2348 break; 2349 2350 skb_unlink(skb, &sk->sk_receive_queue); 2351 consume_skb(skb); 2352 2353 if (scm.fp) 2354 break; 2355 } else { 2356 /* It is questionable, see note in unix_dgram_recvmsg. 2357 */ 2358 if (UNIXCB(skb).fp) 2359 scm.fp = scm_fp_dup(UNIXCB(skb).fp); 2360 2361 sk_peek_offset_fwd(sk, chunk); 2362 2363 if (UNIXCB(skb).fp) 2364 break; 2365 2366 skip = 0; 2367 last = skb; 2368 last_len = skb->len; 2369 unix_state_lock(sk); 2370 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2371 if (skb) 2372 goto again; 2373 unix_state_unlock(sk); 2374 break; 2375 } 2376 } while (size); 2377 2378 mutex_unlock(&u->iolock); 2379 if (state->msg) 2380 scm_recv(sock, state->msg, &scm, flags); 2381 else 2382 scm_destroy(&scm); 2383 out: 2384 return copied ? : err; 2385 } 2386 2387 static int unix_stream_read_actor(struct sk_buff *skb, 2388 int skip, int chunk, 2389 struct unix_stream_read_state *state) 2390 { 2391 int ret; 2392 2393 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip, 2394 state->msg, chunk); 2395 return ret ?: chunk; 2396 } 2397 2398 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, 2399 size_t size, int flags) 2400 { 2401 struct unix_stream_read_state state = { 2402 .recv_actor = unix_stream_read_actor, 2403 .socket = sock, 2404 .msg = msg, 2405 .size = size, 2406 .flags = flags 2407 }; 2408 2409 return unix_stream_read_generic(&state, true); 2410 } 2411 2412 static int unix_stream_splice_actor(struct sk_buff *skb, 2413 int skip, int chunk, 2414 struct unix_stream_read_state *state) 2415 { 2416 return skb_splice_bits(skb, state->socket->sk, 2417 UNIXCB(skb).consumed + skip, 2418 state->pipe, chunk, state->splice_flags); 2419 } 2420 2421 static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos, 2422 struct pipe_inode_info *pipe, 2423 size_t size, unsigned int flags) 2424 { 2425 struct unix_stream_read_state state = { 2426 .recv_actor = unix_stream_splice_actor, 2427 .socket = sock, 2428 .pipe = pipe, 2429 .size = size, 2430 .splice_flags = flags, 2431 }; 2432 2433 if (unlikely(*ppos)) 2434 return -ESPIPE; 2435 2436 if (sock->file->f_flags & O_NONBLOCK || 2437 flags & SPLICE_F_NONBLOCK) 2438 state.flags = MSG_DONTWAIT; 2439 2440 return unix_stream_read_generic(&state, false); 2441 } 2442 2443 static int unix_shutdown(struct socket *sock, int mode) 2444 { 2445 struct sock *sk = sock->sk; 2446 struct sock *other; 2447 2448 if (mode < SHUT_RD || mode > SHUT_RDWR) 2449 return -EINVAL; 2450 /* This maps: 2451 * SHUT_RD (0) -> RCV_SHUTDOWN (1) 2452 * SHUT_WR (1) -> SEND_SHUTDOWN (2) 2453 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3) 2454 */ 2455 ++mode; 2456 2457 unix_state_lock(sk); 2458 sk->sk_shutdown |= mode; 2459 other = unix_peer(sk); 2460 if (other) 2461 sock_hold(other); 2462 unix_state_unlock(sk); 2463 sk->sk_state_change(sk); 2464 2465 if (other && 2466 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) { 2467 2468 int peer_mode = 0; 2469 2470 if (mode&RCV_SHUTDOWN) 2471 peer_mode |= SEND_SHUTDOWN; 2472 if (mode&SEND_SHUTDOWN) 2473 peer_mode |= RCV_SHUTDOWN; 2474 unix_state_lock(other); 2475 other->sk_shutdown |= peer_mode; 2476 unix_state_unlock(other); 2477 other->sk_state_change(other); 2478 if (peer_mode == SHUTDOWN_MASK) 2479 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); 2480 else if (peer_mode & RCV_SHUTDOWN) 2481 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); 2482 } 2483 if (other) 2484 sock_put(other); 2485 2486 return 0; 2487 } 2488 2489 long unix_inq_len(struct sock *sk) 2490 { 2491 struct sk_buff *skb; 2492 long amount = 0; 2493 2494 if (sk->sk_state == TCP_LISTEN) 2495 return -EINVAL; 2496 2497 spin_lock(&sk->sk_receive_queue.lock); 2498 if (sk->sk_type == SOCK_STREAM || 2499 sk->sk_type == SOCK_SEQPACKET) { 2500 skb_queue_walk(&sk->sk_receive_queue, skb) 2501 amount += unix_skb_len(skb); 2502 } else { 2503 skb = skb_peek(&sk->sk_receive_queue); 2504 if (skb) 2505 amount = skb->len; 2506 } 2507 spin_unlock(&sk->sk_receive_queue.lock); 2508 2509 return amount; 2510 } 2511 EXPORT_SYMBOL_GPL(unix_inq_len); 2512 2513 long unix_outq_len(struct sock *sk) 2514 { 2515 return sk_wmem_alloc_get(sk); 2516 } 2517 EXPORT_SYMBOL_GPL(unix_outq_len); 2518 2519 static int unix_open_file(struct sock *sk) 2520 { 2521 struct path path; 2522 struct file *f; 2523 int fd; 2524 2525 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 2526 return -EPERM; 2527 2528 if (!smp_load_acquire(&unix_sk(sk)->addr)) 2529 return -ENOENT; 2530 2531 path = unix_sk(sk)->path; 2532 if (!path.dentry) 2533 return -ENOENT; 2534 2535 path_get(&path); 2536 2537 fd = get_unused_fd_flags(O_CLOEXEC); 2538 if (fd < 0) 2539 goto out; 2540 2541 f = dentry_open(&path, O_PATH, current_cred()); 2542 if (IS_ERR(f)) { 2543 put_unused_fd(fd); 2544 fd = PTR_ERR(f); 2545 goto out; 2546 } 2547 2548 fd_install(fd, f); 2549 out: 2550 path_put(&path); 2551 2552 return fd; 2553 } 2554 2555 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 2556 { 2557 struct sock *sk = sock->sk; 2558 long amount = 0; 2559 int err; 2560 2561 switch (cmd) { 2562 case SIOCOUTQ: 2563 amount = unix_outq_len(sk); 2564 err = put_user(amount, (int __user *)arg); 2565 break; 2566 case SIOCINQ: 2567 amount = unix_inq_len(sk); 2568 if (amount < 0) 2569 err = amount; 2570 else 2571 err = put_user(amount, (int __user *)arg); 2572 break; 2573 case SIOCUNIXFILE: 2574 err = unix_open_file(sk); 2575 break; 2576 default: 2577 err = -ENOIOCTLCMD; 2578 break; 2579 } 2580 return err; 2581 } 2582 2583 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait) 2584 { 2585 struct sock *sk = sock->sk; 2586 __poll_t mask; 2587 2588 sock_poll_wait(file, sock, wait); 2589 mask = 0; 2590 2591 /* exceptional events? */ 2592 if (sk->sk_err) 2593 mask |= EPOLLERR; 2594 if (sk->sk_shutdown == SHUTDOWN_MASK) 2595 mask |= EPOLLHUP; 2596 if (sk->sk_shutdown & RCV_SHUTDOWN) 2597 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 2598 2599 /* readable? */ 2600 if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) 2601 mask |= EPOLLIN | EPOLLRDNORM; 2602 2603 /* Connection-based need to check for termination and startup */ 2604 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && 2605 sk->sk_state == TCP_CLOSE) 2606 mask |= EPOLLHUP; 2607 2608 /* 2609 * we set writable also when the other side has shut down the 2610 * connection. This prevents stuck sockets. 2611 */ 2612 if (unix_writable(sk)) 2613 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 2614 2615 return mask; 2616 } 2617 2618 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, 2619 poll_table *wait) 2620 { 2621 struct sock *sk = sock->sk, *other; 2622 unsigned int writable; 2623 __poll_t mask; 2624 2625 sock_poll_wait(file, sock, wait); 2626 mask = 0; 2627 2628 /* exceptional events? */ 2629 if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) 2630 mask |= EPOLLERR | 2631 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); 2632 2633 if (sk->sk_shutdown & RCV_SHUTDOWN) 2634 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 2635 if (sk->sk_shutdown == SHUTDOWN_MASK) 2636 mask |= EPOLLHUP; 2637 2638 /* readable? */ 2639 if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) 2640 mask |= EPOLLIN | EPOLLRDNORM; 2641 2642 /* Connection-based need to check for termination and startup */ 2643 if (sk->sk_type == SOCK_SEQPACKET) { 2644 if (sk->sk_state == TCP_CLOSE) 2645 mask |= EPOLLHUP; 2646 /* connection hasn't started yet? */ 2647 if (sk->sk_state == TCP_SYN_SENT) 2648 return mask; 2649 } 2650 2651 /* No write status requested, avoid expensive OUT tests. */ 2652 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT))) 2653 return mask; 2654 2655 writable = unix_writable(sk); 2656 if (writable) { 2657 unix_state_lock(sk); 2658 2659 other = unix_peer(sk); 2660 if (other && unix_peer(other) != sk && 2661 unix_recvq_full(other) && 2662 unix_dgram_peer_wake_me(sk, other)) 2663 writable = 0; 2664 2665 unix_state_unlock(sk); 2666 } 2667 2668 if (writable) 2669 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 2670 else 2671 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 2672 2673 return mask; 2674 } 2675 2676 #ifdef CONFIG_PROC_FS 2677 2678 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1) 2679 2680 #define get_bucket(x) ((x) >> BUCKET_SPACE) 2681 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1)) 2682 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) 2683 2684 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) 2685 { 2686 unsigned long offset = get_offset(*pos); 2687 unsigned long bucket = get_bucket(*pos); 2688 struct sock *sk; 2689 unsigned long count = 0; 2690 2691 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) { 2692 if (sock_net(sk) != seq_file_net(seq)) 2693 continue; 2694 if (++count == offset) 2695 break; 2696 } 2697 2698 return sk; 2699 } 2700 2701 static struct sock *unix_next_socket(struct seq_file *seq, 2702 struct sock *sk, 2703 loff_t *pos) 2704 { 2705 unsigned long bucket; 2706 2707 while (sk > (struct sock *)SEQ_START_TOKEN) { 2708 sk = sk_next(sk); 2709 if (!sk) 2710 goto next_bucket; 2711 if (sock_net(sk) == seq_file_net(seq)) 2712 return sk; 2713 } 2714 2715 do { 2716 sk = unix_from_bucket(seq, pos); 2717 if (sk) 2718 return sk; 2719 2720 next_bucket: 2721 bucket = get_bucket(*pos) + 1; 2722 *pos = set_bucket_offset(bucket, 1); 2723 } while (bucket < ARRAY_SIZE(unix_socket_table)); 2724 2725 return NULL; 2726 } 2727 2728 static void *unix_seq_start(struct seq_file *seq, loff_t *pos) 2729 __acquires(unix_table_lock) 2730 { 2731 spin_lock(&unix_table_lock); 2732 2733 if (!*pos) 2734 return SEQ_START_TOKEN; 2735 2736 if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table)) 2737 return NULL; 2738 2739 return unix_next_socket(seq, NULL, pos); 2740 } 2741 2742 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2743 { 2744 ++*pos; 2745 return unix_next_socket(seq, v, pos); 2746 } 2747 2748 static void unix_seq_stop(struct seq_file *seq, void *v) 2749 __releases(unix_table_lock) 2750 { 2751 spin_unlock(&unix_table_lock); 2752 } 2753 2754 static int unix_seq_show(struct seq_file *seq, void *v) 2755 { 2756 2757 if (v == SEQ_START_TOKEN) 2758 seq_puts(seq, "Num RefCount Protocol Flags Type St " 2759 "Inode Path\n"); 2760 else { 2761 struct sock *s = v; 2762 struct unix_sock *u = unix_sk(s); 2763 unix_state_lock(s); 2764 2765 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu", 2766 s, 2767 refcount_read(&s->sk_refcnt), 2768 0, 2769 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0, 2770 s->sk_type, 2771 s->sk_socket ? 2772 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) : 2773 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING), 2774 sock_i_ino(s)); 2775 2776 if (u->addr) { // under unix_table_lock here 2777 int i, len; 2778 seq_putc(seq, ' '); 2779 2780 i = 0; 2781 len = u->addr->len - sizeof(short); 2782 if (!UNIX_ABSTRACT(s)) 2783 len--; 2784 else { 2785 seq_putc(seq, '@'); 2786 i++; 2787 } 2788 for ( ; i < len; i++) 2789 seq_putc(seq, u->addr->name->sun_path[i] ?: 2790 '@'); 2791 } 2792 unix_state_unlock(s); 2793 seq_putc(seq, '\n'); 2794 } 2795 2796 return 0; 2797 } 2798 2799 static const struct seq_operations unix_seq_ops = { 2800 .start = unix_seq_start, 2801 .next = unix_seq_next, 2802 .stop = unix_seq_stop, 2803 .show = unix_seq_show, 2804 }; 2805 #endif 2806 2807 static const struct net_proto_family unix_family_ops = { 2808 .family = PF_UNIX, 2809 .create = unix_create, 2810 .owner = THIS_MODULE, 2811 }; 2812 2813 2814 static int __net_init unix_net_init(struct net *net) 2815 { 2816 int error = -ENOMEM; 2817 2818 net->unx.sysctl_max_dgram_qlen = 10; 2819 if (unix_sysctl_register(net)) 2820 goto out; 2821 2822 #ifdef CONFIG_PROC_FS 2823 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops, 2824 sizeof(struct seq_net_private))) { 2825 unix_sysctl_unregister(net); 2826 goto out; 2827 } 2828 #endif 2829 error = 0; 2830 out: 2831 return error; 2832 } 2833 2834 static void __net_exit unix_net_exit(struct net *net) 2835 { 2836 unix_sysctl_unregister(net); 2837 remove_proc_entry("unix", net->proc_net); 2838 } 2839 2840 static struct pernet_operations unix_net_ops = { 2841 .init = unix_net_init, 2842 .exit = unix_net_exit, 2843 }; 2844 2845 static int __init af_unix_init(void) 2846 { 2847 int rc = -1; 2848 2849 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb)); 2850 2851 rc = proto_register(&unix_proto, 1); 2852 if (rc != 0) { 2853 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); 2854 goto out; 2855 } 2856 2857 sock_register(&unix_family_ops); 2858 register_pernet_subsys(&unix_net_ops); 2859 out: 2860 return rc; 2861 } 2862 2863 static void __exit af_unix_exit(void) 2864 { 2865 sock_unregister(PF_UNIX); 2866 proto_unregister(&unix_proto); 2867 unregister_pernet_subsys(&unix_net_ops); 2868 } 2869 2870 /* Earlier than device_initcall() so that other drivers invoking 2871 request_module() don't end up in a loop when modprobe tries 2872 to use a UNIX socket. But later than subsys_initcall() because 2873 we depend on stuff initialised there */ 2874 fs_initcall(af_unix_init); 2875 module_exit(af_unix_exit); 2876 2877 MODULE_LICENSE("GPL"); 2878 MODULE_ALIAS_NETPROTO(PF_UNIX); 2879