1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * NET4: Implementation of BSD Unix domain sockets. 4 * 5 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk> 6 * 7 * Fixes: 8 * Linus Torvalds : Assorted bug cures. 9 * Niibe Yutaka : async I/O support. 10 * Carsten Paeth : PF_UNIX check, address fixes. 11 * Alan Cox : Limit size of allocated blocks. 12 * Alan Cox : Fixed the stupid socketpair bug. 13 * Alan Cox : BSD compatibility fine tuning. 14 * Alan Cox : Fixed a bug in connect when interrupted. 15 * Alan Cox : Sorted out a proper draft version of 16 * file descriptor passing hacked up from 17 * Mike Shaver's work. 18 * Marty Leisner : Fixes to fd passing 19 * Nick Nevin : recvmsg bugfix. 20 * Alan Cox : Started proper garbage collector 21 * Heiko EiBfeldt : Missing verify_area check 22 * Alan Cox : Started POSIXisms 23 * Andreas Schwab : Replace inode by dentry for proper 24 * reference counting 25 * Kirk Petersen : Made this a module 26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm. 27 * Lots of bug fixes. 28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces 29 * by above two patches. 30 * Andrea Arcangeli : If possible we block in connect(2) 31 * if the max backlog of the listen socket 32 * is been reached. This won't break 33 * old apps and it will avoid huge amount 34 * of socks hashed (this for unix_gc() 35 * performances reasons). 36 * Security fix that limits the max 37 * number of socks to 2*max_files and 38 * the number of skb queueable in the 39 * dgram receiver. 40 * Artur Skawina : Hash function optimizations 41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8) 42 * Malcolm Beattie : Set peercred for socketpair 43 * Michal Ostrowski : Module initialization cleanup. 44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT, 45 * the core infrastructure is doing that 46 * for all net proto families now (2.5.69+) 47 * 48 * Known differences from reference BSD that was tested: 49 * 50 * [TO FIX] 51 * ECONNREFUSED is not returned from one end of a connected() socket to the 52 * other the moment one end closes. 53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark 54 * and a fake inode identifier (nor the BSD first socket fstat twice bug). 55 * [NOT TO FIX] 56 * accept() returns a path name even if the connecting socket has closed 57 * in the meantime (BSD loses the path and gives up). 58 * accept() returns 0 length path for an unbound connector. BSD returns 16 59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??) 60 * socketpair(...SOCK_RAW..) doesn't panic the kernel. 61 * BSD af_unix apparently has connect forgetting to block properly. 62 * (need to check this with the POSIX spec in detail) 63 * 64 * Differences from 2.0.0-11-... (ANK) 65 * Bug fixes and improvements. 66 * - client shutdown killed server socket. 67 * - removed all useless cli/sti pairs. 68 * 69 * Semantic changes/extensions. 70 * - generic control message passing. 71 * - SCM_CREDENTIALS control message. 72 * - "Abstract" (not FS based) socket bindings. 73 * Abstract names are sequences of bytes (not zero terminated) 74 * started by 0, so that this name space does not intersect 75 * with BSD names. 76 */ 77 78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 79 80 #include <linux/module.h> 81 #include <linux/kernel.h> 82 #include <linux/signal.h> 83 #include <linux/sched/signal.h> 84 #include <linux/errno.h> 85 #include <linux/string.h> 86 #include <linux/stat.h> 87 #include <linux/dcache.h> 88 #include <linux/namei.h> 89 #include <linux/socket.h> 90 #include <linux/un.h> 91 #include <linux/fcntl.h> 92 #include <linux/termios.h> 93 #include <linux/sockios.h> 94 #include <linux/net.h> 95 #include <linux/in.h> 96 #include <linux/fs.h> 97 #include <linux/slab.h> 98 #include <linux/uaccess.h> 99 #include <linux/skbuff.h> 100 #include <linux/netdevice.h> 101 #include <net/net_namespace.h> 102 #include <net/sock.h> 103 #include <net/tcp_states.h> 104 #include <net/af_unix.h> 105 #include <linux/proc_fs.h> 106 #include <linux/seq_file.h> 107 #include <net/scm.h> 108 #include <linux/init.h> 109 #include <linux/poll.h> 110 #include <linux/rtnetlink.h> 111 #include <linux/mount.h> 112 #include <net/checksum.h> 113 #include <linux/security.h> 114 #include <linux/freezer.h> 115 #include <linux/file.h> 116 #include <linux/btf_ids.h> 117 118 #include "scm.h" 119 120 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; 121 EXPORT_SYMBOL_GPL(unix_socket_table); 122 DEFINE_SPINLOCK(unix_table_lock); 123 EXPORT_SYMBOL_GPL(unix_table_lock); 124 static atomic_long_t unix_nr_socks; 125 126 127 static struct hlist_head *unix_sockets_unbound(void *addr) 128 { 129 unsigned long hash = (unsigned long)addr; 130 131 hash ^= hash >> 16; 132 hash ^= hash >> 8; 133 hash %= UNIX_HASH_SIZE; 134 return &unix_socket_table[UNIX_HASH_SIZE + hash]; 135 } 136 137 #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE) 138 139 #ifdef CONFIG_SECURITY_NETWORK 140 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 141 { 142 UNIXCB(skb).secid = scm->secid; 143 } 144 145 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 146 { 147 scm->secid = UNIXCB(skb).secid; 148 } 149 150 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) 151 { 152 return (scm->secid == UNIXCB(skb).secid); 153 } 154 #else 155 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 156 { } 157 158 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 159 { } 160 161 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) 162 { 163 return true; 164 } 165 #endif /* CONFIG_SECURITY_NETWORK */ 166 167 /* 168 * SMP locking strategy: 169 * hash table is protected with spinlock unix_table_lock 170 * each socket state is protected by separate spin lock. 171 */ 172 173 static inline unsigned int unix_hash_fold(__wsum n) 174 { 175 unsigned int hash = (__force unsigned int)csum_fold(n); 176 177 hash ^= hash>>8; 178 return hash&(UNIX_HASH_SIZE-1); 179 } 180 181 #define unix_peer(sk) (unix_sk(sk)->peer) 182 183 static inline int unix_our_peer(struct sock *sk, struct sock *osk) 184 { 185 return unix_peer(osk) == sk; 186 } 187 188 static inline int unix_may_send(struct sock *sk, struct sock *osk) 189 { 190 return unix_peer(osk) == NULL || unix_our_peer(sk, osk); 191 } 192 193 static inline int unix_recvq_full(const struct sock *sk) 194 { 195 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; 196 } 197 198 static inline int unix_recvq_full_lockless(const struct sock *sk) 199 { 200 return skb_queue_len_lockless(&sk->sk_receive_queue) > 201 READ_ONCE(sk->sk_max_ack_backlog); 202 } 203 204 struct sock *unix_peer_get(struct sock *s) 205 { 206 struct sock *peer; 207 208 unix_state_lock(s); 209 peer = unix_peer(s); 210 if (peer) 211 sock_hold(peer); 212 unix_state_unlock(s); 213 return peer; 214 } 215 EXPORT_SYMBOL_GPL(unix_peer_get); 216 217 static inline void unix_release_addr(struct unix_address *addr) 218 { 219 if (refcount_dec_and_test(&addr->refcnt)) 220 kfree(addr); 221 } 222 223 /* 224 * Check unix socket name: 225 * - should be not zero length. 226 * - if started by not zero, should be NULL terminated (FS object) 227 * - if started by zero, it is abstract name. 228 */ 229 230 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp) 231 { 232 *hashp = 0; 233 234 if (len <= sizeof(short) || len > sizeof(*sunaddr)) 235 return -EINVAL; 236 if (!sunaddr || sunaddr->sun_family != AF_UNIX) 237 return -EINVAL; 238 if (sunaddr->sun_path[0]) { 239 /* 240 * This may look like an off by one error but it is a bit more 241 * subtle. 108 is the longest valid AF_UNIX path for a binding. 242 * sun_path[108] doesn't as such exist. However in kernel space 243 * we are guaranteed that it is a valid memory location in our 244 * kernel address buffer. 245 */ 246 ((char *)sunaddr)[len] = 0; 247 len = strlen(sunaddr->sun_path)+1+sizeof(short); 248 return len; 249 } 250 251 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0)); 252 return len; 253 } 254 255 static void __unix_remove_socket(struct sock *sk) 256 { 257 sk_del_node_init(sk); 258 } 259 260 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk) 261 { 262 WARN_ON(!sk_unhashed(sk)); 263 sk_add_node(sk, list); 264 } 265 266 static void __unix_set_addr(struct sock *sk, struct unix_address *addr, 267 unsigned hash) 268 { 269 __unix_remove_socket(sk); 270 smp_store_release(&unix_sk(sk)->addr, addr); 271 __unix_insert_socket(&unix_socket_table[hash], sk); 272 } 273 274 static inline void unix_remove_socket(struct sock *sk) 275 { 276 spin_lock(&unix_table_lock); 277 __unix_remove_socket(sk); 278 spin_unlock(&unix_table_lock); 279 } 280 281 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk) 282 { 283 spin_lock(&unix_table_lock); 284 __unix_insert_socket(list, sk); 285 spin_unlock(&unix_table_lock); 286 } 287 288 static struct sock *__unix_find_socket_byname(struct net *net, 289 struct sockaddr_un *sunname, 290 int len, unsigned int hash) 291 { 292 struct sock *s; 293 294 sk_for_each(s, &unix_socket_table[hash]) { 295 struct unix_sock *u = unix_sk(s); 296 297 if (!net_eq(sock_net(s), net)) 298 continue; 299 300 if (u->addr->len == len && 301 !memcmp(u->addr->name, sunname, len)) 302 return s; 303 } 304 return NULL; 305 } 306 307 static inline struct sock *unix_find_socket_byname(struct net *net, 308 struct sockaddr_un *sunname, 309 int len, unsigned int hash) 310 { 311 struct sock *s; 312 313 spin_lock(&unix_table_lock); 314 s = __unix_find_socket_byname(net, sunname, len, hash); 315 if (s) 316 sock_hold(s); 317 spin_unlock(&unix_table_lock); 318 return s; 319 } 320 321 static struct sock *unix_find_socket_byinode(struct inode *i) 322 { 323 struct sock *s; 324 325 spin_lock(&unix_table_lock); 326 sk_for_each(s, 327 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { 328 struct dentry *dentry = unix_sk(s)->path.dentry; 329 330 if (dentry && d_backing_inode(dentry) == i) { 331 sock_hold(s); 332 goto found; 333 } 334 } 335 s = NULL; 336 found: 337 spin_unlock(&unix_table_lock); 338 return s; 339 } 340 341 /* Support code for asymmetrically connected dgram sockets 342 * 343 * If a datagram socket is connected to a socket not itself connected 344 * to the first socket (eg, /dev/log), clients may only enqueue more 345 * messages if the present receive queue of the server socket is not 346 * "too large". This means there's a second writeability condition 347 * poll and sendmsg need to test. The dgram recv code will do a wake 348 * up on the peer_wait wait queue of a socket upon reception of a 349 * datagram which needs to be propagated to sleeping would-be writers 350 * since these might not have sent anything so far. This can't be 351 * accomplished via poll_wait because the lifetime of the server 352 * socket might be less than that of its clients if these break their 353 * association with it or if the server socket is closed while clients 354 * are still connected to it and there's no way to inform "a polling 355 * implementation" that it should let go of a certain wait queue 356 * 357 * In order to propagate a wake up, a wait_queue_entry_t of the client 358 * socket is enqueued on the peer_wait queue of the server socket 359 * whose wake function does a wake_up on the ordinary client socket 360 * wait queue. This connection is established whenever a write (or 361 * poll for write) hit the flow control condition and broken when the 362 * association to the server socket is dissolved or after a wake up 363 * was relayed. 364 */ 365 366 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags, 367 void *key) 368 { 369 struct unix_sock *u; 370 wait_queue_head_t *u_sleep; 371 372 u = container_of(q, struct unix_sock, peer_wake); 373 374 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait, 375 q); 376 u->peer_wake.private = NULL; 377 378 /* relaying can only happen while the wq still exists */ 379 u_sleep = sk_sleep(&u->sk); 380 if (u_sleep) 381 wake_up_interruptible_poll(u_sleep, key_to_poll(key)); 382 383 return 0; 384 } 385 386 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other) 387 { 388 struct unix_sock *u, *u_other; 389 int rc; 390 391 u = unix_sk(sk); 392 u_other = unix_sk(other); 393 rc = 0; 394 spin_lock(&u_other->peer_wait.lock); 395 396 if (!u->peer_wake.private) { 397 u->peer_wake.private = other; 398 __add_wait_queue(&u_other->peer_wait, &u->peer_wake); 399 400 rc = 1; 401 } 402 403 spin_unlock(&u_other->peer_wait.lock); 404 return rc; 405 } 406 407 static void unix_dgram_peer_wake_disconnect(struct sock *sk, 408 struct sock *other) 409 { 410 struct unix_sock *u, *u_other; 411 412 u = unix_sk(sk); 413 u_other = unix_sk(other); 414 spin_lock(&u_other->peer_wait.lock); 415 416 if (u->peer_wake.private == other) { 417 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake); 418 u->peer_wake.private = NULL; 419 } 420 421 spin_unlock(&u_other->peer_wait.lock); 422 } 423 424 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk, 425 struct sock *other) 426 { 427 unix_dgram_peer_wake_disconnect(sk, other); 428 wake_up_interruptible_poll(sk_sleep(sk), 429 EPOLLOUT | 430 EPOLLWRNORM | 431 EPOLLWRBAND); 432 } 433 434 /* preconditions: 435 * - unix_peer(sk) == other 436 * - association is stable 437 */ 438 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) 439 { 440 int connected; 441 442 connected = unix_dgram_peer_wake_connect(sk, other); 443 444 /* If other is SOCK_DEAD, we want to make sure we signal 445 * POLLOUT, such that a subsequent write() can get a 446 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs 447 * to other and its full, we will hang waiting for POLLOUT. 448 */ 449 if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD)) 450 return 1; 451 452 if (connected) 453 unix_dgram_peer_wake_disconnect(sk, other); 454 455 return 0; 456 } 457 458 static int unix_writable(const struct sock *sk) 459 { 460 return sk->sk_state != TCP_LISTEN && 461 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; 462 } 463 464 static void unix_write_space(struct sock *sk) 465 { 466 struct socket_wq *wq; 467 468 rcu_read_lock(); 469 if (unix_writable(sk)) { 470 wq = rcu_dereference(sk->sk_wq); 471 if (skwq_has_sleeper(wq)) 472 wake_up_interruptible_sync_poll(&wq->wait, 473 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); 474 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 475 } 476 rcu_read_unlock(); 477 } 478 479 /* When dgram socket disconnects (or changes its peer), we clear its receive 480 * queue of packets arrived from previous peer. First, it allows to do 481 * flow control based only on wmem_alloc; second, sk connected to peer 482 * may receive messages only from that peer. */ 483 static void unix_dgram_disconnected(struct sock *sk, struct sock *other) 484 { 485 if (!skb_queue_empty(&sk->sk_receive_queue)) { 486 skb_queue_purge(&sk->sk_receive_queue); 487 wake_up_interruptible_all(&unix_sk(sk)->peer_wait); 488 489 /* If one link of bidirectional dgram pipe is disconnected, 490 * we signal error. Messages are lost. Do not make this, 491 * when peer was not connected to us. 492 */ 493 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) { 494 other->sk_err = ECONNRESET; 495 sk_error_report(other); 496 } 497 } 498 other->sk_state = TCP_CLOSE; 499 } 500 501 static void unix_sock_destructor(struct sock *sk) 502 { 503 struct unix_sock *u = unix_sk(sk); 504 505 skb_queue_purge(&sk->sk_receive_queue); 506 507 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 508 if (u->oob_skb) { 509 kfree_skb(u->oob_skb); 510 u->oob_skb = NULL; 511 } 512 #endif 513 WARN_ON(refcount_read(&sk->sk_wmem_alloc)); 514 WARN_ON(!sk_unhashed(sk)); 515 WARN_ON(sk->sk_socket); 516 if (!sock_flag(sk, SOCK_DEAD)) { 517 pr_info("Attempt to release alive unix socket: %p\n", sk); 518 return; 519 } 520 521 if (u->addr) 522 unix_release_addr(u->addr); 523 524 atomic_long_dec(&unix_nr_socks); 525 local_bh_disable(); 526 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 527 local_bh_enable(); 528 #ifdef UNIX_REFCNT_DEBUG 529 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk, 530 atomic_long_read(&unix_nr_socks)); 531 #endif 532 } 533 534 static void unix_release_sock(struct sock *sk, int embrion) 535 { 536 struct unix_sock *u = unix_sk(sk); 537 struct path path; 538 struct sock *skpair; 539 struct sk_buff *skb; 540 int state; 541 542 unix_remove_socket(sk); 543 544 /* Clear state */ 545 unix_state_lock(sk); 546 sock_orphan(sk); 547 sk->sk_shutdown = SHUTDOWN_MASK; 548 path = u->path; 549 u->path.dentry = NULL; 550 u->path.mnt = NULL; 551 state = sk->sk_state; 552 sk->sk_state = TCP_CLOSE; 553 554 skpair = unix_peer(sk); 555 unix_peer(sk) = NULL; 556 557 unix_state_unlock(sk); 558 559 wake_up_interruptible_all(&u->peer_wait); 560 561 if (skpair != NULL) { 562 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { 563 unix_state_lock(skpair); 564 /* No more writes */ 565 skpair->sk_shutdown = SHUTDOWN_MASK; 566 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) 567 skpair->sk_err = ECONNRESET; 568 unix_state_unlock(skpair); 569 skpair->sk_state_change(skpair); 570 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); 571 } 572 573 unix_dgram_peer_wake_disconnect(sk, skpair); 574 sock_put(skpair); /* It may now die */ 575 } 576 577 /* Try to flush out this socket. Throw out buffers at least */ 578 579 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { 580 if (state == TCP_LISTEN) 581 unix_release_sock(skb->sk, 1); 582 /* passed fds are erased in the kfree_skb hook */ 583 UNIXCB(skb).consumed = skb->len; 584 kfree_skb(skb); 585 } 586 587 if (path.dentry) 588 path_put(&path); 589 590 sock_put(sk); 591 592 /* ---- Socket is dead now and most probably destroyed ---- */ 593 594 /* 595 * Fixme: BSD difference: In BSD all sockets connected to us get 596 * ECONNRESET and we die on the spot. In Linux we behave 597 * like files and pipes do and wait for the last 598 * dereference. 599 * 600 * Can't we simply set sock->err? 601 * 602 * What the above comment does talk about? --ANK(980817) 603 */ 604 605 if (unix_tot_inflight) 606 unix_gc(); /* Garbage collect fds */ 607 } 608 609 static void init_peercred(struct sock *sk) 610 { 611 put_pid(sk->sk_peer_pid); 612 if (sk->sk_peer_cred) 613 put_cred(sk->sk_peer_cred); 614 sk->sk_peer_pid = get_pid(task_tgid(current)); 615 sk->sk_peer_cred = get_current_cred(); 616 } 617 618 static void copy_peercred(struct sock *sk, struct sock *peersk) 619 { 620 put_pid(sk->sk_peer_pid); 621 if (sk->sk_peer_cred) 622 put_cred(sk->sk_peer_cred); 623 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); 624 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); 625 } 626 627 static int unix_listen(struct socket *sock, int backlog) 628 { 629 int err; 630 struct sock *sk = sock->sk; 631 struct unix_sock *u = unix_sk(sk); 632 633 err = -EOPNOTSUPP; 634 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 635 goto out; /* Only stream/seqpacket sockets accept */ 636 err = -EINVAL; 637 if (!u->addr) 638 goto out; /* No listens on an unbound socket */ 639 unix_state_lock(sk); 640 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) 641 goto out_unlock; 642 if (backlog > sk->sk_max_ack_backlog) 643 wake_up_interruptible_all(&u->peer_wait); 644 sk->sk_max_ack_backlog = backlog; 645 sk->sk_state = TCP_LISTEN; 646 /* set credentials so connect can copy them */ 647 init_peercred(sk); 648 err = 0; 649 650 out_unlock: 651 unix_state_unlock(sk); 652 out: 653 return err; 654 } 655 656 static int unix_release(struct socket *); 657 static int unix_bind(struct socket *, struct sockaddr *, int); 658 static int unix_stream_connect(struct socket *, struct sockaddr *, 659 int addr_len, int flags); 660 static int unix_socketpair(struct socket *, struct socket *); 661 static int unix_accept(struct socket *, struct socket *, int, bool); 662 static int unix_getname(struct socket *, struct sockaddr *, int); 663 static __poll_t unix_poll(struct file *, struct socket *, poll_table *); 664 static __poll_t unix_dgram_poll(struct file *, struct socket *, 665 poll_table *); 666 static int unix_ioctl(struct socket *, unsigned int, unsigned long); 667 #ifdef CONFIG_COMPAT 668 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); 669 #endif 670 static int unix_shutdown(struct socket *, int); 671 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); 672 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int); 673 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset, 674 size_t size, int flags); 675 static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos, 676 struct pipe_inode_info *, size_t size, 677 unsigned int flags); 678 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t); 679 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); 680 static int unix_read_sock(struct sock *sk, read_descriptor_t *desc, 681 sk_read_actor_t recv_actor); 682 static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc, 683 sk_read_actor_t recv_actor); 684 static int unix_dgram_connect(struct socket *, struct sockaddr *, 685 int, int); 686 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); 687 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t, 688 int); 689 690 static int unix_set_peek_off(struct sock *sk, int val) 691 { 692 struct unix_sock *u = unix_sk(sk); 693 694 if (mutex_lock_interruptible(&u->iolock)) 695 return -EINTR; 696 697 sk->sk_peek_off = val; 698 mutex_unlock(&u->iolock); 699 700 return 0; 701 } 702 703 #ifdef CONFIG_PROC_FS 704 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock) 705 { 706 struct sock *sk = sock->sk; 707 struct unix_sock *u; 708 709 if (sk) { 710 u = unix_sk(sock->sk); 711 seq_printf(m, "scm_fds: %u\n", 712 atomic_read(&u->scm_stat.nr_fds)); 713 } 714 } 715 #else 716 #define unix_show_fdinfo NULL 717 #endif 718 719 static const struct proto_ops unix_stream_ops = { 720 .family = PF_UNIX, 721 .owner = THIS_MODULE, 722 .release = unix_release, 723 .bind = unix_bind, 724 .connect = unix_stream_connect, 725 .socketpair = unix_socketpair, 726 .accept = unix_accept, 727 .getname = unix_getname, 728 .poll = unix_poll, 729 .ioctl = unix_ioctl, 730 #ifdef CONFIG_COMPAT 731 .compat_ioctl = unix_compat_ioctl, 732 #endif 733 .listen = unix_listen, 734 .shutdown = unix_shutdown, 735 .sendmsg = unix_stream_sendmsg, 736 .recvmsg = unix_stream_recvmsg, 737 .read_sock = unix_stream_read_sock, 738 .mmap = sock_no_mmap, 739 .sendpage = unix_stream_sendpage, 740 .splice_read = unix_stream_splice_read, 741 .set_peek_off = unix_set_peek_off, 742 .show_fdinfo = unix_show_fdinfo, 743 }; 744 745 static const struct proto_ops unix_dgram_ops = { 746 .family = PF_UNIX, 747 .owner = THIS_MODULE, 748 .release = unix_release, 749 .bind = unix_bind, 750 .connect = unix_dgram_connect, 751 .socketpair = unix_socketpair, 752 .accept = sock_no_accept, 753 .getname = unix_getname, 754 .poll = unix_dgram_poll, 755 .ioctl = unix_ioctl, 756 #ifdef CONFIG_COMPAT 757 .compat_ioctl = unix_compat_ioctl, 758 #endif 759 .listen = sock_no_listen, 760 .shutdown = unix_shutdown, 761 .sendmsg = unix_dgram_sendmsg, 762 .read_sock = unix_read_sock, 763 .recvmsg = unix_dgram_recvmsg, 764 .mmap = sock_no_mmap, 765 .sendpage = sock_no_sendpage, 766 .set_peek_off = unix_set_peek_off, 767 .show_fdinfo = unix_show_fdinfo, 768 }; 769 770 static const struct proto_ops unix_seqpacket_ops = { 771 .family = PF_UNIX, 772 .owner = THIS_MODULE, 773 .release = unix_release, 774 .bind = unix_bind, 775 .connect = unix_stream_connect, 776 .socketpair = unix_socketpair, 777 .accept = unix_accept, 778 .getname = unix_getname, 779 .poll = unix_dgram_poll, 780 .ioctl = unix_ioctl, 781 #ifdef CONFIG_COMPAT 782 .compat_ioctl = unix_compat_ioctl, 783 #endif 784 .listen = unix_listen, 785 .shutdown = unix_shutdown, 786 .sendmsg = unix_seqpacket_sendmsg, 787 .recvmsg = unix_seqpacket_recvmsg, 788 .mmap = sock_no_mmap, 789 .sendpage = sock_no_sendpage, 790 .set_peek_off = unix_set_peek_off, 791 .show_fdinfo = unix_show_fdinfo, 792 }; 793 794 static void unix_close(struct sock *sk, long timeout) 795 { 796 /* Nothing to do here, unix socket does not need a ->close(). 797 * This is merely for sockmap. 798 */ 799 } 800 801 static void unix_unhash(struct sock *sk) 802 { 803 /* Nothing to do here, unix socket does not need a ->unhash(). 804 * This is merely for sockmap. 805 */ 806 } 807 808 struct proto unix_dgram_proto = { 809 .name = "UNIX-DGRAM", 810 .owner = THIS_MODULE, 811 .obj_size = sizeof(struct unix_sock), 812 .close = unix_close, 813 #ifdef CONFIG_BPF_SYSCALL 814 .psock_update_sk_prot = unix_dgram_bpf_update_proto, 815 #endif 816 }; 817 818 struct proto unix_stream_proto = { 819 .name = "UNIX-STREAM", 820 .owner = THIS_MODULE, 821 .obj_size = sizeof(struct unix_sock), 822 .close = unix_close, 823 .unhash = unix_unhash, 824 #ifdef CONFIG_BPF_SYSCALL 825 .psock_update_sk_prot = unix_stream_bpf_update_proto, 826 #endif 827 }; 828 829 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type) 830 { 831 struct sock *sk = NULL; 832 struct unix_sock *u; 833 834 atomic_long_inc(&unix_nr_socks); 835 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) 836 goto out; 837 838 if (type == SOCK_STREAM) 839 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern); 840 else /*dgram and seqpacket */ 841 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern); 842 843 if (!sk) 844 goto out; 845 846 sock_init_data(sock, sk); 847 848 sk->sk_allocation = GFP_KERNEL_ACCOUNT; 849 sk->sk_write_space = unix_write_space; 850 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; 851 sk->sk_destruct = unix_sock_destructor; 852 u = unix_sk(sk); 853 u->path.dentry = NULL; 854 u->path.mnt = NULL; 855 spin_lock_init(&u->lock); 856 atomic_long_set(&u->inflight, 0); 857 INIT_LIST_HEAD(&u->link); 858 mutex_init(&u->iolock); /* single task reading lock */ 859 mutex_init(&u->bindlock); /* single task binding lock */ 860 init_waitqueue_head(&u->peer_wait); 861 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); 862 memset(&u->scm_stat, 0, sizeof(struct scm_stat)); 863 unix_insert_socket(unix_sockets_unbound(sk), sk); 864 out: 865 if (sk == NULL) 866 atomic_long_dec(&unix_nr_socks); 867 else { 868 local_bh_disable(); 869 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 870 local_bh_enable(); 871 } 872 return sk; 873 } 874 875 static int unix_create(struct net *net, struct socket *sock, int protocol, 876 int kern) 877 { 878 if (protocol && protocol != PF_UNIX) 879 return -EPROTONOSUPPORT; 880 881 sock->state = SS_UNCONNECTED; 882 883 switch (sock->type) { 884 case SOCK_STREAM: 885 sock->ops = &unix_stream_ops; 886 break; 887 /* 888 * Believe it or not BSD has AF_UNIX, SOCK_RAW though 889 * nothing uses it. 890 */ 891 case SOCK_RAW: 892 sock->type = SOCK_DGRAM; 893 fallthrough; 894 case SOCK_DGRAM: 895 sock->ops = &unix_dgram_ops; 896 break; 897 case SOCK_SEQPACKET: 898 sock->ops = &unix_seqpacket_ops; 899 break; 900 default: 901 return -ESOCKTNOSUPPORT; 902 } 903 904 return unix_create1(net, sock, kern, sock->type) ? 0 : -ENOMEM; 905 } 906 907 static int unix_release(struct socket *sock) 908 { 909 struct sock *sk = sock->sk; 910 911 if (!sk) 912 return 0; 913 914 sk->sk_prot->close(sk, 0); 915 unix_release_sock(sk, 0); 916 sock->sk = NULL; 917 918 return 0; 919 } 920 921 static int unix_autobind(struct socket *sock) 922 { 923 struct sock *sk = sock->sk; 924 struct net *net = sock_net(sk); 925 struct unix_sock *u = unix_sk(sk); 926 static u32 ordernum = 1; 927 struct unix_address *addr; 928 int err; 929 unsigned int retries = 0; 930 931 err = mutex_lock_interruptible(&u->bindlock); 932 if (err) 933 return err; 934 935 if (u->addr) 936 goto out; 937 938 err = -ENOMEM; 939 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL); 940 if (!addr) 941 goto out; 942 943 addr->name->sun_family = AF_UNIX; 944 refcount_set(&addr->refcnt, 1); 945 946 retry: 947 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short); 948 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0)); 949 addr->hash ^= sk->sk_type; 950 951 spin_lock(&unix_table_lock); 952 ordernum = (ordernum+1)&0xFFFFF; 953 954 if (__unix_find_socket_byname(net, addr->name, addr->len, addr->hash)) { 955 spin_unlock(&unix_table_lock); 956 /* 957 * __unix_find_socket_byname() may take long time if many names 958 * are already in use. 959 */ 960 cond_resched(); 961 /* Give up if all names seems to be in use. */ 962 if (retries++ == 0xFFFFF) { 963 err = -ENOSPC; 964 kfree(addr); 965 goto out; 966 } 967 goto retry; 968 } 969 970 __unix_set_addr(sk, addr, addr->hash); 971 spin_unlock(&unix_table_lock); 972 err = 0; 973 974 out: mutex_unlock(&u->bindlock); 975 return err; 976 } 977 978 static struct sock *unix_find_other(struct net *net, 979 struct sockaddr_un *sunname, int len, 980 int type, unsigned int hash, int *error) 981 { 982 struct sock *u; 983 struct path path; 984 int err = 0; 985 986 if (sunname->sun_path[0]) { 987 struct inode *inode; 988 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path); 989 if (err) 990 goto fail; 991 inode = d_backing_inode(path.dentry); 992 err = path_permission(&path, MAY_WRITE); 993 if (err) 994 goto put_fail; 995 996 err = -ECONNREFUSED; 997 if (!S_ISSOCK(inode->i_mode)) 998 goto put_fail; 999 u = unix_find_socket_byinode(inode); 1000 if (!u) 1001 goto put_fail; 1002 1003 if (u->sk_type == type) 1004 touch_atime(&path); 1005 1006 path_put(&path); 1007 1008 err = -EPROTOTYPE; 1009 if (u->sk_type != type) { 1010 sock_put(u); 1011 goto fail; 1012 } 1013 } else { 1014 err = -ECONNREFUSED; 1015 u = unix_find_socket_byname(net, sunname, len, type ^ hash); 1016 if (u) { 1017 struct dentry *dentry; 1018 dentry = unix_sk(u)->path.dentry; 1019 if (dentry) 1020 touch_atime(&unix_sk(u)->path); 1021 } else 1022 goto fail; 1023 } 1024 return u; 1025 1026 put_fail: 1027 path_put(&path); 1028 fail: 1029 *error = err; 1030 return NULL; 1031 } 1032 1033 static int unix_bind_bsd(struct sock *sk, struct unix_address *addr) 1034 { 1035 struct unix_sock *u = unix_sk(sk); 1036 umode_t mode = S_IFSOCK | 1037 (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask()); 1038 struct user_namespace *ns; // barf... 1039 struct path parent; 1040 struct dentry *dentry; 1041 unsigned int hash; 1042 int err; 1043 1044 /* 1045 * Get the parent directory, calculate the hash for last 1046 * component. 1047 */ 1048 dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0); 1049 if (IS_ERR(dentry)) 1050 return PTR_ERR(dentry); 1051 ns = mnt_user_ns(parent.mnt); 1052 1053 /* 1054 * All right, let's create it. 1055 */ 1056 err = security_path_mknod(&parent, dentry, mode, 0); 1057 if (!err) 1058 err = vfs_mknod(ns, d_inode(parent.dentry), dentry, mode, 0); 1059 if (err) 1060 goto out; 1061 err = mutex_lock_interruptible(&u->bindlock); 1062 if (err) 1063 goto out_unlink; 1064 if (u->addr) 1065 goto out_unlock; 1066 1067 addr->hash = UNIX_HASH_SIZE; 1068 hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1); 1069 spin_lock(&unix_table_lock); 1070 u->path.mnt = mntget(parent.mnt); 1071 u->path.dentry = dget(dentry); 1072 __unix_set_addr(sk, addr, hash); 1073 spin_unlock(&unix_table_lock); 1074 mutex_unlock(&u->bindlock); 1075 done_path_create(&parent, dentry); 1076 return 0; 1077 1078 out_unlock: 1079 mutex_unlock(&u->bindlock); 1080 err = -EINVAL; 1081 out_unlink: 1082 /* failed after successful mknod? unlink what we'd created... */ 1083 vfs_unlink(ns, d_inode(parent.dentry), dentry, NULL); 1084 out: 1085 done_path_create(&parent, dentry); 1086 return err; 1087 } 1088 1089 static int unix_bind_abstract(struct sock *sk, struct unix_address *addr) 1090 { 1091 struct unix_sock *u = unix_sk(sk); 1092 int err; 1093 1094 err = mutex_lock_interruptible(&u->bindlock); 1095 if (err) 1096 return err; 1097 1098 if (u->addr) { 1099 mutex_unlock(&u->bindlock); 1100 return -EINVAL; 1101 } 1102 1103 spin_lock(&unix_table_lock); 1104 if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len, 1105 addr->hash)) { 1106 spin_unlock(&unix_table_lock); 1107 mutex_unlock(&u->bindlock); 1108 return -EADDRINUSE; 1109 } 1110 __unix_set_addr(sk, addr, addr->hash); 1111 spin_unlock(&unix_table_lock); 1112 mutex_unlock(&u->bindlock); 1113 return 0; 1114 } 1115 1116 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 1117 { 1118 struct sock *sk = sock->sk; 1119 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 1120 char *sun_path = sunaddr->sun_path; 1121 int err; 1122 unsigned int hash; 1123 struct unix_address *addr; 1124 1125 if (addr_len < offsetofend(struct sockaddr_un, sun_family) || 1126 sunaddr->sun_family != AF_UNIX) 1127 return -EINVAL; 1128 1129 if (addr_len == sizeof(short)) 1130 return unix_autobind(sock); 1131 1132 err = unix_mkname(sunaddr, addr_len, &hash); 1133 if (err < 0) 1134 return err; 1135 addr_len = err; 1136 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL); 1137 if (!addr) 1138 return -ENOMEM; 1139 1140 memcpy(addr->name, sunaddr, addr_len); 1141 addr->len = addr_len; 1142 addr->hash = hash ^ sk->sk_type; 1143 refcount_set(&addr->refcnt, 1); 1144 1145 if (sun_path[0]) 1146 err = unix_bind_bsd(sk, addr); 1147 else 1148 err = unix_bind_abstract(sk, addr); 1149 if (err) 1150 unix_release_addr(addr); 1151 return err == -EEXIST ? -EADDRINUSE : err; 1152 } 1153 1154 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) 1155 { 1156 if (unlikely(sk1 == sk2) || !sk2) { 1157 unix_state_lock(sk1); 1158 return; 1159 } 1160 if (sk1 < sk2) { 1161 unix_state_lock(sk1); 1162 unix_state_lock_nested(sk2); 1163 } else { 1164 unix_state_lock(sk2); 1165 unix_state_lock_nested(sk1); 1166 } 1167 } 1168 1169 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) 1170 { 1171 if (unlikely(sk1 == sk2) || !sk2) { 1172 unix_state_unlock(sk1); 1173 return; 1174 } 1175 unix_state_unlock(sk1); 1176 unix_state_unlock(sk2); 1177 } 1178 1179 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, 1180 int alen, int flags) 1181 { 1182 struct sock *sk = sock->sk; 1183 struct net *net = sock_net(sk); 1184 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; 1185 struct sock *other; 1186 unsigned int hash; 1187 int err; 1188 1189 err = -EINVAL; 1190 if (alen < offsetofend(struct sockaddr, sa_family)) 1191 goto out; 1192 1193 if (addr->sa_family != AF_UNSPEC) { 1194 err = unix_mkname(sunaddr, alen, &hash); 1195 if (err < 0) 1196 goto out; 1197 alen = err; 1198 1199 if (test_bit(SOCK_PASSCRED, &sock->flags) && 1200 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0) 1201 goto out; 1202 1203 restart: 1204 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err); 1205 if (!other) 1206 goto out; 1207 1208 unix_state_double_lock(sk, other); 1209 1210 /* Apparently VFS overslept socket death. Retry. */ 1211 if (sock_flag(other, SOCK_DEAD)) { 1212 unix_state_double_unlock(sk, other); 1213 sock_put(other); 1214 goto restart; 1215 } 1216 1217 err = -EPERM; 1218 if (!unix_may_send(sk, other)) 1219 goto out_unlock; 1220 1221 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 1222 if (err) 1223 goto out_unlock; 1224 1225 sk->sk_state = other->sk_state = TCP_ESTABLISHED; 1226 } else { 1227 /* 1228 * 1003.1g breaking connected state with AF_UNSPEC 1229 */ 1230 other = NULL; 1231 unix_state_double_lock(sk, other); 1232 } 1233 1234 /* 1235 * If it was connected, reconnect. 1236 */ 1237 if (unix_peer(sk)) { 1238 struct sock *old_peer = unix_peer(sk); 1239 1240 unix_peer(sk) = other; 1241 if (!other) 1242 sk->sk_state = TCP_CLOSE; 1243 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer); 1244 1245 unix_state_double_unlock(sk, other); 1246 1247 if (other != old_peer) 1248 unix_dgram_disconnected(sk, old_peer); 1249 sock_put(old_peer); 1250 } else { 1251 unix_peer(sk) = other; 1252 unix_state_double_unlock(sk, other); 1253 } 1254 1255 return 0; 1256 1257 out_unlock: 1258 unix_state_double_unlock(sk, other); 1259 sock_put(other); 1260 out: 1261 return err; 1262 } 1263 1264 static long unix_wait_for_peer(struct sock *other, long timeo) 1265 __releases(&unix_sk(other)->lock) 1266 { 1267 struct unix_sock *u = unix_sk(other); 1268 int sched; 1269 DEFINE_WAIT(wait); 1270 1271 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE); 1272 1273 sched = !sock_flag(other, SOCK_DEAD) && 1274 !(other->sk_shutdown & RCV_SHUTDOWN) && 1275 unix_recvq_full(other); 1276 1277 unix_state_unlock(other); 1278 1279 if (sched) 1280 timeo = schedule_timeout(timeo); 1281 1282 finish_wait(&u->peer_wait, &wait); 1283 return timeo; 1284 } 1285 1286 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, 1287 int addr_len, int flags) 1288 { 1289 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 1290 struct sock *sk = sock->sk; 1291 struct net *net = sock_net(sk); 1292 struct unix_sock *u = unix_sk(sk), *newu, *otheru; 1293 struct sock *newsk = NULL; 1294 struct sock *other = NULL; 1295 struct sk_buff *skb = NULL; 1296 unsigned int hash; 1297 int st; 1298 int err; 1299 long timeo; 1300 1301 err = unix_mkname(sunaddr, addr_len, &hash); 1302 if (err < 0) 1303 goto out; 1304 addr_len = err; 1305 1306 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr && 1307 (err = unix_autobind(sock)) != 0) 1308 goto out; 1309 1310 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); 1311 1312 /* First of all allocate resources. 1313 If we will make it after state is locked, 1314 we will have to recheck all again in any case. 1315 */ 1316 1317 err = -ENOMEM; 1318 1319 /* create new sock for complete connection */ 1320 newsk = unix_create1(sock_net(sk), NULL, 0, sock->type); 1321 if (newsk == NULL) 1322 goto out; 1323 1324 /* Allocate skb for sending to listening sock */ 1325 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL); 1326 if (skb == NULL) 1327 goto out; 1328 1329 restart: 1330 /* Find listening sock. */ 1331 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err); 1332 if (!other) 1333 goto out; 1334 1335 /* Latch state of peer */ 1336 unix_state_lock(other); 1337 1338 /* Apparently VFS overslept socket death. Retry. */ 1339 if (sock_flag(other, SOCK_DEAD)) { 1340 unix_state_unlock(other); 1341 sock_put(other); 1342 goto restart; 1343 } 1344 1345 err = -ECONNREFUSED; 1346 if (other->sk_state != TCP_LISTEN) 1347 goto out_unlock; 1348 if (other->sk_shutdown & RCV_SHUTDOWN) 1349 goto out_unlock; 1350 1351 if (unix_recvq_full(other)) { 1352 err = -EAGAIN; 1353 if (!timeo) 1354 goto out_unlock; 1355 1356 timeo = unix_wait_for_peer(other, timeo); 1357 1358 err = sock_intr_errno(timeo); 1359 if (signal_pending(current)) 1360 goto out; 1361 sock_put(other); 1362 goto restart; 1363 } 1364 1365 /* Latch our state. 1366 1367 It is tricky place. We need to grab our state lock and cannot 1368 drop lock on peer. It is dangerous because deadlock is 1369 possible. Connect to self case and simultaneous 1370 attempt to connect are eliminated by checking socket 1371 state. other is TCP_LISTEN, if sk is TCP_LISTEN we 1372 check this before attempt to grab lock. 1373 1374 Well, and we have to recheck the state after socket locked. 1375 */ 1376 st = sk->sk_state; 1377 1378 switch (st) { 1379 case TCP_CLOSE: 1380 /* This is ok... continue with connect */ 1381 break; 1382 case TCP_ESTABLISHED: 1383 /* Socket is already connected */ 1384 err = -EISCONN; 1385 goto out_unlock; 1386 default: 1387 err = -EINVAL; 1388 goto out_unlock; 1389 } 1390 1391 unix_state_lock_nested(sk); 1392 1393 if (sk->sk_state != st) { 1394 unix_state_unlock(sk); 1395 unix_state_unlock(other); 1396 sock_put(other); 1397 goto restart; 1398 } 1399 1400 err = security_unix_stream_connect(sk, other, newsk); 1401 if (err) { 1402 unix_state_unlock(sk); 1403 goto out_unlock; 1404 } 1405 1406 /* The way is open! Fastly set all the necessary fields... */ 1407 1408 sock_hold(sk); 1409 unix_peer(newsk) = sk; 1410 newsk->sk_state = TCP_ESTABLISHED; 1411 newsk->sk_type = sk->sk_type; 1412 init_peercred(newsk); 1413 newu = unix_sk(newsk); 1414 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); 1415 otheru = unix_sk(other); 1416 1417 /* copy address information from listening to new sock 1418 * 1419 * The contents of *(otheru->addr) and otheru->path 1420 * are seen fully set up here, since we have found 1421 * otheru in hash under unix_table_lock. Insertion 1422 * into the hash chain we'd found it in had been done 1423 * in an earlier critical area protected by unix_table_lock, 1424 * the same one where we'd set *(otheru->addr) contents, 1425 * as well as otheru->path and otheru->addr itself. 1426 * 1427 * Using smp_store_release() here to set newu->addr 1428 * is enough to make those stores, as well as stores 1429 * to newu->path visible to anyone who gets newu->addr 1430 * by smp_load_acquire(). IOW, the same warranties 1431 * as for unix_sock instances bound in unix_bind() or 1432 * in unix_autobind(). 1433 */ 1434 if (otheru->path.dentry) { 1435 path_get(&otheru->path); 1436 newu->path = otheru->path; 1437 } 1438 refcount_inc(&otheru->addr->refcnt); 1439 smp_store_release(&newu->addr, otheru->addr); 1440 1441 /* Set credentials */ 1442 copy_peercred(sk, other); 1443 1444 sock->state = SS_CONNECTED; 1445 sk->sk_state = TCP_ESTABLISHED; 1446 sock_hold(newsk); 1447 1448 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */ 1449 unix_peer(sk) = newsk; 1450 1451 unix_state_unlock(sk); 1452 1453 /* take ten and send info to listening sock */ 1454 spin_lock(&other->sk_receive_queue.lock); 1455 __skb_queue_tail(&other->sk_receive_queue, skb); 1456 spin_unlock(&other->sk_receive_queue.lock); 1457 unix_state_unlock(other); 1458 other->sk_data_ready(other); 1459 sock_put(other); 1460 return 0; 1461 1462 out_unlock: 1463 if (other) 1464 unix_state_unlock(other); 1465 1466 out: 1467 kfree_skb(skb); 1468 if (newsk) 1469 unix_release_sock(newsk, 0); 1470 if (other) 1471 sock_put(other); 1472 return err; 1473 } 1474 1475 static int unix_socketpair(struct socket *socka, struct socket *sockb) 1476 { 1477 struct sock *ska = socka->sk, *skb = sockb->sk; 1478 1479 /* Join our sockets back to back */ 1480 sock_hold(ska); 1481 sock_hold(skb); 1482 unix_peer(ska) = skb; 1483 unix_peer(skb) = ska; 1484 init_peercred(ska); 1485 init_peercred(skb); 1486 1487 ska->sk_state = TCP_ESTABLISHED; 1488 skb->sk_state = TCP_ESTABLISHED; 1489 socka->state = SS_CONNECTED; 1490 sockb->state = SS_CONNECTED; 1491 return 0; 1492 } 1493 1494 static void unix_sock_inherit_flags(const struct socket *old, 1495 struct socket *new) 1496 { 1497 if (test_bit(SOCK_PASSCRED, &old->flags)) 1498 set_bit(SOCK_PASSCRED, &new->flags); 1499 if (test_bit(SOCK_PASSSEC, &old->flags)) 1500 set_bit(SOCK_PASSSEC, &new->flags); 1501 } 1502 1503 static int unix_accept(struct socket *sock, struct socket *newsock, int flags, 1504 bool kern) 1505 { 1506 struct sock *sk = sock->sk; 1507 struct sock *tsk; 1508 struct sk_buff *skb; 1509 int err; 1510 1511 err = -EOPNOTSUPP; 1512 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 1513 goto out; 1514 1515 err = -EINVAL; 1516 if (sk->sk_state != TCP_LISTEN) 1517 goto out; 1518 1519 /* If socket state is TCP_LISTEN it cannot change (for now...), 1520 * so that no locks are necessary. 1521 */ 1522 1523 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err); 1524 if (!skb) { 1525 /* This means receive shutdown. */ 1526 if (err == 0) 1527 err = -EINVAL; 1528 goto out; 1529 } 1530 1531 tsk = skb->sk; 1532 skb_free_datagram(sk, skb); 1533 wake_up_interruptible(&unix_sk(sk)->peer_wait); 1534 1535 /* attach accepted sock to socket */ 1536 unix_state_lock(tsk); 1537 newsock->state = SS_CONNECTED; 1538 unix_sock_inherit_flags(sock, newsock); 1539 sock_graft(tsk, newsock); 1540 unix_state_unlock(tsk); 1541 return 0; 1542 1543 out: 1544 return err; 1545 } 1546 1547 1548 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer) 1549 { 1550 struct sock *sk = sock->sk; 1551 struct unix_address *addr; 1552 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr); 1553 int err = 0; 1554 1555 if (peer) { 1556 sk = unix_peer_get(sk); 1557 1558 err = -ENOTCONN; 1559 if (!sk) 1560 goto out; 1561 err = 0; 1562 } else { 1563 sock_hold(sk); 1564 } 1565 1566 addr = smp_load_acquire(&unix_sk(sk)->addr); 1567 if (!addr) { 1568 sunaddr->sun_family = AF_UNIX; 1569 sunaddr->sun_path[0] = 0; 1570 err = sizeof(short); 1571 } else { 1572 err = addr->len; 1573 memcpy(sunaddr, addr->name, addr->len); 1574 } 1575 sock_put(sk); 1576 out: 1577 return err; 1578 } 1579 1580 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb) 1581 { 1582 scm->fp = scm_fp_dup(UNIXCB(skb).fp); 1583 1584 /* 1585 * Garbage collection of unix sockets starts by selecting a set of 1586 * candidate sockets which have reference only from being in flight 1587 * (total_refs == inflight_refs). This condition is checked once during 1588 * the candidate collection phase, and candidates are marked as such, so 1589 * that non-candidates can later be ignored. While inflight_refs is 1590 * protected by unix_gc_lock, total_refs (file count) is not, hence this 1591 * is an instantaneous decision. 1592 * 1593 * Once a candidate, however, the socket must not be reinstalled into a 1594 * file descriptor while the garbage collection is in progress. 1595 * 1596 * If the above conditions are met, then the directed graph of 1597 * candidates (*) does not change while unix_gc_lock is held. 1598 * 1599 * Any operations that changes the file count through file descriptors 1600 * (dup, close, sendmsg) does not change the graph since candidates are 1601 * not installed in fds. 1602 * 1603 * Dequeing a candidate via recvmsg would install it into an fd, but 1604 * that takes unix_gc_lock to decrement the inflight count, so it's 1605 * serialized with garbage collection. 1606 * 1607 * MSG_PEEK is special in that it does not change the inflight count, 1608 * yet does install the socket into an fd. The following lock/unlock 1609 * pair is to ensure serialization with garbage collection. It must be 1610 * done between incrementing the file count and installing the file into 1611 * an fd. 1612 * 1613 * If garbage collection starts after the barrier provided by the 1614 * lock/unlock, then it will see the elevated refcount and not mark this 1615 * as a candidate. If a garbage collection is already in progress 1616 * before the file count was incremented, then the lock/unlock pair will 1617 * ensure that garbage collection is finished before progressing to 1618 * installing the fd. 1619 * 1620 * (*) A -> B where B is on the queue of A or B is on the queue of C 1621 * which is on the queue of listening socket A. 1622 */ 1623 spin_lock(&unix_gc_lock); 1624 spin_unlock(&unix_gc_lock); 1625 } 1626 1627 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) 1628 { 1629 int err = 0; 1630 1631 UNIXCB(skb).pid = get_pid(scm->pid); 1632 UNIXCB(skb).uid = scm->creds.uid; 1633 UNIXCB(skb).gid = scm->creds.gid; 1634 UNIXCB(skb).fp = NULL; 1635 unix_get_secdata(scm, skb); 1636 if (scm->fp && send_fds) 1637 err = unix_attach_fds(scm, skb); 1638 1639 skb->destructor = unix_destruct_scm; 1640 return err; 1641 } 1642 1643 static bool unix_passcred_enabled(const struct socket *sock, 1644 const struct sock *other) 1645 { 1646 return test_bit(SOCK_PASSCRED, &sock->flags) || 1647 !other->sk_socket || 1648 test_bit(SOCK_PASSCRED, &other->sk_socket->flags); 1649 } 1650 1651 /* 1652 * Some apps rely on write() giving SCM_CREDENTIALS 1653 * We include credentials if source or destination socket 1654 * asserted SOCK_PASSCRED. 1655 */ 1656 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, 1657 const struct sock *other) 1658 { 1659 if (UNIXCB(skb).pid) 1660 return; 1661 if (unix_passcred_enabled(sock, other)) { 1662 UNIXCB(skb).pid = get_pid(task_tgid(current)); 1663 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); 1664 } 1665 } 1666 1667 static int maybe_init_creds(struct scm_cookie *scm, 1668 struct socket *socket, 1669 const struct sock *other) 1670 { 1671 int err; 1672 struct msghdr msg = { .msg_controllen = 0 }; 1673 1674 err = scm_send(socket, &msg, scm, false); 1675 if (err) 1676 return err; 1677 1678 if (unix_passcred_enabled(socket, other)) { 1679 scm->pid = get_pid(task_tgid(current)); 1680 current_uid_gid(&scm->creds.uid, &scm->creds.gid); 1681 } 1682 return err; 1683 } 1684 1685 static bool unix_skb_scm_eq(struct sk_buff *skb, 1686 struct scm_cookie *scm) 1687 { 1688 const struct unix_skb_parms *u = &UNIXCB(skb); 1689 1690 return u->pid == scm->pid && 1691 uid_eq(u->uid, scm->creds.uid) && 1692 gid_eq(u->gid, scm->creds.gid) && 1693 unix_secdata_eq(scm, skb); 1694 } 1695 1696 static void scm_stat_add(struct sock *sk, struct sk_buff *skb) 1697 { 1698 struct scm_fp_list *fp = UNIXCB(skb).fp; 1699 struct unix_sock *u = unix_sk(sk); 1700 1701 if (unlikely(fp && fp->count)) 1702 atomic_add(fp->count, &u->scm_stat.nr_fds); 1703 } 1704 1705 static void scm_stat_del(struct sock *sk, struct sk_buff *skb) 1706 { 1707 struct scm_fp_list *fp = UNIXCB(skb).fp; 1708 struct unix_sock *u = unix_sk(sk); 1709 1710 if (unlikely(fp && fp->count)) 1711 atomic_sub(fp->count, &u->scm_stat.nr_fds); 1712 } 1713 1714 /* 1715 * Send AF_UNIX data. 1716 */ 1717 1718 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, 1719 size_t len) 1720 { 1721 struct sock *sk = sock->sk; 1722 struct net *net = sock_net(sk); 1723 struct unix_sock *u = unix_sk(sk); 1724 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); 1725 struct sock *other = NULL; 1726 int namelen = 0; /* fake GCC */ 1727 int err; 1728 unsigned int hash; 1729 struct sk_buff *skb; 1730 long timeo; 1731 struct scm_cookie scm; 1732 int data_len = 0; 1733 int sk_locked; 1734 1735 wait_for_unix_gc(); 1736 err = scm_send(sock, msg, &scm, false); 1737 if (err < 0) 1738 return err; 1739 1740 err = -EOPNOTSUPP; 1741 if (msg->msg_flags&MSG_OOB) 1742 goto out; 1743 1744 if (msg->msg_namelen) { 1745 err = unix_mkname(sunaddr, msg->msg_namelen, &hash); 1746 if (err < 0) 1747 goto out; 1748 namelen = err; 1749 } else { 1750 sunaddr = NULL; 1751 err = -ENOTCONN; 1752 other = unix_peer_get(sk); 1753 if (!other) 1754 goto out; 1755 } 1756 1757 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr 1758 && (err = unix_autobind(sock)) != 0) 1759 goto out; 1760 1761 err = -EMSGSIZE; 1762 if (len > sk->sk_sndbuf - 32) 1763 goto out; 1764 1765 if (len > SKB_MAX_ALLOC) { 1766 data_len = min_t(size_t, 1767 len - SKB_MAX_ALLOC, 1768 MAX_SKB_FRAGS * PAGE_SIZE); 1769 data_len = PAGE_ALIGN(data_len); 1770 1771 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE); 1772 } 1773 1774 skb = sock_alloc_send_pskb(sk, len - data_len, data_len, 1775 msg->msg_flags & MSG_DONTWAIT, &err, 1776 PAGE_ALLOC_COSTLY_ORDER); 1777 if (skb == NULL) 1778 goto out; 1779 1780 err = unix_scm_to_skb(&scm, skb, true); 1781 if (err < 0) 1782 goto out_free; 1783 1784 skb_put(skb, len - data_len); 1785 skb->data_len = data_len; 1786 skb->len = len; 1787 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len); 1788 if (err) 1789 goto out_free; 1790 1791 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); 1792 1793 restart: 1794 if (!other) { 1795 err = -ECONNRESET; 1796 if (sunaddr == NULL) 1797 goto out_free; 1798 1799 other = unix_find_other(net, sunaddr, namelen, sk->sk_type, 1800 hash, &err); 1801 if (other == NULL) 1802 goto out_free; 1803 } 1804 1805 if (sk_filter(other, skb) < 0) { 1806 /* Toss the packet but do not return any error to the sender */ 1807 err = len; 1808 goto out_free; 1809 } 1810 1811 sk_locked = 0; 1812 unix_state_lock(other); 1813 restart_locked: 1814 err = -EPERM; 1815 if (!unix_may_send(sk, other)) 1816 goto out_unlock; 1817 1818 if (unlikely(sock_flag(other, SOCK_DEAD))) { 1819 /* 1820 * Check with 1003.1g - what should 1821 * datagram error 1822 */ 1823 unix_state_unlock(other); 1824 sock_put(other); 1825 1826 if (!sk_locked) 1827 unix_state_lock(sk); 1828 1829 err = 0; 1830 if (unix_peer(sk) == other) { 1831 unix_peer(sk) = NULL; 1832 unix_dgram_peer_wake_disconnect_wakeup(sk, other); 1833 1834 unix_state_unlock(sk); 1835 1836 sk->sk_state = TCP_CLOSE; 1837 unix_dgram_disconnected(sk, other); 1838 sock_put(other); 1839 err = -ECONNREFUSED; 1840 } else { 1841 unix_state_unlock(sk); 1842 } 1843 1844 other = NULL; 1845 if (err) 1846 goto out_free; 1847 goto restart; 1848 } 1849 1850 err = -EPIPE; 1851 if (other->sk_shutdown & RCV_SHUTDOWN) 1852 goto out_unlock; 1853 1854 if (sk->sk_type != SOCK_SEQPACKET) { 1855 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 1856 if (err) 1857 goto out_unlock; 1858 } 1859 1860 /* other == sk && unix_peer(other) != sk if 1861 * - unix_peer(sk) == NULL, destination address bound to sk 1862 * - unix_peer(sk) == sk by time of get but disconnected before lock 1863 */ 1864 if (other != sk && 1865 unlikely(unix_peer(other) != sk && 1866 unix_recvq_full_lockless(other))) { 1867 if (timeo) { 1868 timeo = unix_wait_for_peer(other, timeo); 1869 1870 err = sock_intr_errno(timeo); 1871 if (signal_pending(current)) 1872 goto out_free; 1873 1874 goto restart; 1875 } 1876 1877 if (!sk_locked) { 1878 unix_state_unlock(other); 1879 unix_state_double_lock(sk, other); 1880 } 1881 1882 if (unix_peer(sk) != other || 1883 unix_dgram_peer_wake_me(sk, other)) { 1884 err = -EAGAIN; 1885 sk_locked = 1; 1886 goto out_unlock; 1887 } 1888 1889 if (!sk_locked) { 1890 sk_locked = 1; 1891 goto restart_locked; 1892 } 1893 } 1894 1895 if (unlikely(sk_locked)) 1896 unix_state_unlock(sk); 1897 1898 if (sock_flag(other, SOCK_RCVTSTAMP)) 1899 __net_timestamp(skb); 1900 maybe_add_creds(skb, sock, other); 1901 scm_stat_add(other, skb); 1902 skb_queue_tail(&other->sk_receive_queue, skb); 1903 unix_state_unlock(other); 1904 other->sk_data_ready(other); 1905 sock_put(other); 1906 scm_destroy(&scm); 1907 return len; 1908 1909 out_unlock: 1910 if (sk_locked) 1911 unix_state_unlock(sk); 1912 unix_state_unlock(other); 1913 out_free: 1914 kfree_skb(skb); 1915 out: 1916 if (other) 1917 sock_put(other); 1918 scm_destroy(&scm); 1919 return err; 1920 } 1921 1922 /* We use paged skbs for stream sockets, and limit occupancy to 32768 1923 * bytes, and a minimum of a full page. 1924 */ 1925 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) 1926 1927 #if (IS_ENABLED(CONFIG_AF_UNIX_OOB)) 1928 static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other) 1929 { 1930 struct unix_sock *ousk = unix_sk(other); 1931 struct sk_buff *skb; 1932 int err = 0; 1933 1934 skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err); 1935 1936 if (!skb) 1937 return err; 1938 1939 skb_put(skb, 1); 1940 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1); 1941 1942 if (err) { 1943 kfree_skb(skb); 1944 return err; 1945 } 1946 1947 unix_state_lock(other); 1948 1949 if (sock_flag(other, SOCK_DEAD) || 1950 (other->sk_shutdown & RCV_SHUTDOWN)) { 1951 unix_state_unlock(other); 1952 kfree_skb(skb); 1953 return -EPIPE; 1954 } 1955 1956 maybe_add_creds(skb, sock, other); 1957 skb_get(skb); 1958 1959 if (ousk->oob_skb) 1960 consume_skb(ousk->oob_skb); 1961 1962 ousk->oob_skb = skb; 1963 1964 scm_stat_add(other, skb); 1965 skb_queue_tail(&other->sk_receive_queue, skb); 1966 sk_send_sigurg(other); 1967 unix_state_unlock(other); 1968 other->sk_data_ready(other); 1969 1970 return err; 1971 } 1972 #endif 1973 1974 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, 1975 size_t len) 1976 { 1977 struct sock *sk = sock->sk; 1978 struct sock *other = NULL; 1979 int err, size; 1980 struct sk_buff *skb; 1981 int sent = 0; 1982 struct scm_cookie scm; 1983 bool fds_sent = false; 1984 int data_len; 1985 1986 wait_for_unix_gc(); 1987 err = scm_send(sock, msg, &scm, false); 1988 if (err < 0) 1989 return err; 1990 1991 err = -EOPNOTSUPP; 1992 if (msg->msg_flags & MSG_OOB) { 1993 #if (IS_ENABLED(CONFIG_AF_UNIX_OOB)) 1994 if (len) 1995 len--; 1996 else 1997 #endif 1998 goto out_err; 1999 } 2000 2001 if (msg->msg_namelen) { 2002 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; 2003 goto out_err; 2004 } else { 2005 err = -ENOTCONN; 2006 other = unix_peer(sk); 2007 if (!other) 2008 goto out_err; 2009 } 2010 2011 if (sk->sk_shutdown & SEND_SHUTDOWN) 2012 goto pipe_err; 2013 2014 while (sent < len) { 2015 size = len - sent; 2016 2017 /* Keep two messages in the pipe so it schedules better */ 2018 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64); 2019 2020 /* allow fallback to order-0 allocations */ 2021 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ); 2022 2023 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0)); 2024 2025 data_len = min_t(size_t, size, PAGE_ALIGN(data_len)); 2026 2027 skb = sock_alloc_send_pskb(sk, size - data_len, data_len, 2028 msg->msg_flags & MSG_DONTWAIT, &err, 2029 get_order(UNIX_SKB_FRAGS_SZ)); 2030 if (!skb) 2031 goto out_err; 2032 2033 /* Only send the fds in the first buffer */ 2034 err = unix_scm_to_skb(&scm, skb, !fds_sent); 2035 if (err < 0) { 2036 kfree_skb(skb); 2037 goto out_err; 2038 } 2039 fds_sent = true; 2040 2041 skb_put(skb, size - data_len); 2042 skb->data_len = data_len; 2043 skb->len = size; 2044 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); 2045 if (err) { 2046 kfree_skb(skb); 2047 goto out_err; 2048 } 2049 2050 unix_state_lock(other); 2051 2052 if (sock_flag(other, SOCK_DEAD) || 2053 (other->sk_shutdown & RCV_SHUTDOWN)) 2054 goto pipe_err_free; 2055 2056 maybe_add_creds(skb, sock, other); 2057 scm_stat_add(other, skb); 2058 skb_queue_tail(&other->sk_receive_queue, skb); 2059 unix_state_unlock(other); 2060 other->sk_data_ready(other); 2061 sent += size; 2062 } 2063 2064 #if (IS_ENABLED(CONFIG_AF_UNIX_OOB)) 2065 if (msg->msg_flags & MSG_OOB) { 2066 err = queue_oob(sock, msg, other); 2067 if (err) 2068 goto out_err; 2069 sent++; 2070 } 2071 #endif 2072 2073 scm_destroy(&scm); 2074 2075 return sent; 2076 2077 pipe_err_free: 2078 unix_state_unlock(other); 2079 kfree_skb(skb); 2080 pipe_err: 2081 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL)) 2082 send_sig(SIGPIPE, current, 0); 2083 err = -EPIPE; 2084 out_err: 2085 scm_destroy(&scm); 2086 return sent ? : err; 2087 } 2088 2089 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, 2090 int offset, size_t size, int flags) 2091 { 2092 int err; 2093 bool send_sigpipe = false; 2094 bool init_scm = true; 2095 struct scm_cookie scm; 2096 struct sock *other, *sk = socket->sk; 2097 struct sk_buff *skb, *newskb = NULL, *tail = NULL; 2098 2099 if (flags & MSG_OOB) 2100 return -EOPNOTSUPP; 2101 2102 other = unix_peer(sk); 2103 if (!other || sk->sk_state != TCP_ESTABLISHED) 2104 return -ENOTCONN; 2105 2106 if (false) { 2107 alloc_skb: 2108 unix_state_unlock(other); 2109 mutex_unlock(&unix_sk(other)->iolock); 2110 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT, 2111 &err, 0); 2112 if (!newskb) 2113 goto err; 2114 } 2115 2116 /* we must acquire iolock as we modify already present 2117 * skbs in the sk_receive_queue and mess with skb->len 2118 */ 2119 err = mutex_lock_interruptible(&unix_sk(other)->iolock); 2120 if (err) { 2121 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS; 2122 goto err; 2123 } 2124 2125 if (sk->sk_shutdown & SEND_SHUTDOWN) { 2126 err = -EPIPE; 2127 send_sigpipe = true; 2128 goto err_unlock; 2129 } 2130 2131 unix_state_lock(other); 2132 2133 if (sock_flag(other, SOCK_DEAD) || 2134 other->sk_shutdown & RCV_SHUTDOWN) { 2135 err = -EPIPE; 2136 send_sigpipe = true; 2137 goto err_state_unlock; 2138 } 2139 2140 if (init_scm) { 2141 err = maybe_init_creds(&scm, socket, other); 2142 if (err) 2143 goto err_state_unlock; 2144 init_scm = false; 2145 } 2146 2147 skb = skb_peek_tail(&other->sk_receive_queue); 2148 if (tail && tail == skb) { 2149 skb = newskb; 2150 } else if (!skb || !unix_skb_scm_eq(skb, &scm)) { 2151 if (newskb) { 2152 skb = newskb; 2153 } else { 2154 tail = skb; 2155 goto alloc_skb; 2156 } 2157 } else if (newskb) { 2158 /* this is fast path, we don't necessarily need to 2159 * call to kfree_skb even though with newskb == NULL 2160 * this - does no harm 2161 */ 2162 consume_skb(newskb); 2163 newskb = NULL; 2164 } 2165 2166 if (skb_append_pagefrags(skb, page, offset, size)) { 2167 tail = skb; 2168 goto alloc_skb; 2169 } 2170 2171 skb->len += size; 2172 skb->data_len += size; 2173 skb->truesize += size; 2174 refcount_add(size, &sk->sk_wmem_alloc); 2175 2176 if (newskb) { 2177 err = unix_scm_to_skb(&scm, skb, false); 2178 if (err) 2179 goto err_state_unlock; 2180 spin_lock(&other->sk_receive_queue.lock); 2181 __skb_queue_tail(&other->sk_receive_queue, newskb); 2182 spin_unlock(&other->sk_receive_queue.lock); 2183 } 2184 2185 unix_state_unlock(other); 2186 mutex_unlock(&unix_sk(other)->iolock); 2187 2188 other->sk_data_ready(other); 2189 scm_destroy(&scm); 2190 return size; 2191 2192 err_state_unlock: 2193 unix_state_unlock(other); 2194 err_unlock: 2195 mutex_unlock(&unix_sk(other)->iolock); 2196 err: 2197 kfree_skb(newskb); 2198 if (send_sigpipe && !(flags & MSG_NOSIGNAL)) 2199 send_sig(SIGPIPE, current, 0); 2200 if (!init_scm) 2201 scm_destroy(&scm); 2202 return err; 2203 } 2204 2205 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg, 2206 size_t len) 2207 { 2208 int err; 2209 struct sock *sk = sock->sk; 2210 2211 err = sock_error(sk); 2212 if (err) 2213 return err; 2214 2215 if (sk->sk_state != TCP_ESTABLISHED) 2216 return -ENOTCONN; 2217 2218 if (msg->msg_namelen) 2219 msg->msg_namelen = 0; 2220 2221 return unix_dgram_sendmsg(sock, msg, len); 2222 } 2223 2224 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg, 2225 size_t size, int flags) 2226 { 2227 struct sock *sk = sock->sk; 2228 2229 if (sk->sk_state != TCP_ESTABLISHED) 2230 return -ENOTCONN; 2231 2232 return unix_dgram_recvmsg(sock, msg, size, flags); 2233 } 2234 2235 static void unix_copy_addr(struct msghdr *msg, struct sock *sk) 2236 { 2237 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr); 2238 2239 if (addr) { 2240 msg->msg_namelen = addr->len; 2241 memcpy(msg->msg_name, addr->name, addr->len); 2242 } 2243 } 2244 2245 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, 2246 int flags) 2247 { 2248 struct scm_cookie scm; 2249 struct socket *sock = sk->sk_socket; 2250 struct unix_sock *u = unix_sk(sk); 2251 struct sk_buff *skb, *last; 2252 long timeo; 2253 int skip; 2254 int err; 2255 2256 err = -EOPNOTSUPP; 2257 if (flags&MSG_OOB) 2258 goto out; 2259 2260 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 2261 2262 do { 2263 mutex_lock(&u->iolock); 2264 2265 skip = sk_peek_offset(sk, flags); 2266 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags, 2267 &skip, &err, &last); 2268 if (skb) { 2269 if (!(flags & MSG_PEEK)) 2270 scm_stat_del(sk, skb); 2271 break; 2272 } 2273 2274 mutex_unlock(&u->iolock); 2275 2276 if (err != -EAGAIN) 2277 break; 2278 } while (timeo && 2279 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue, 2280 &err, &timeo, last)); 2281 2282 if (!skb) { /* implies iolock unlocked */ 2283 unix_state_lock(sk); 2284 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */ 2285 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN && 2286 (sk->sk_shutdown & RCV_SHUTDOWN)) 2287 err = 0; 2288 unix_state_unlock(sk); 2289 goto out; 2290 } 2291 2292 if (wq_has_sleeper(&u->peer_wait)) 2293 wake_up_interruptible_sync_poll(&u->peer_wait, 2294 EPOLLOUT | EPOLLWRNORM | 2295 EPOLLWRBAND); 2296 2297 if (msg->msg_name) 2298 unix_copy_addr(msg, skb->sk); 2299 2300 if (size > skb->len - skip) 2301 size = skb->len - skip; 2302 else if (size < skb->len - skip) 2303 msg->msg_flags |= MSG_TRUNC; 2304 2305 err = skb_copy_datagram_msg(skb, skip, msg, size); 2306 if (err) 2307 goto out_free; 2308 2309 if (sock_flag(sk, SOCK_RCVTSTAMP)) 2310 __sock_recv_timestamp(msg, sk, skb); 2311 2312 memset(&scm, 0, sizeof(scm)); 2313 2314 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2315 unix_set_secdata(&scm, skb); 2316 2317 if (!(flags & MSG_PEEK)) { 2318 if (UNIXCB(skb).fp) 2319 unix_detach_fds(&scm, skb); 2320 2321 sk_peek_offset_bwd(sk, skb->len); 2322 } else { 2323 /* It is questionable: on PEEK we could: 2324 - do not return fds - good, but too simple 8) 2325 - return fds, and do not return them on read (old strategy, 2326 apparently wrong) 2327 - clone fds (I chose it for now, it is the most universal 2328 solution) 2329 2330 POSIX 1003.1g does not actually define this clearly 2331 at all. POSIX 1003.1g doesn't define a lot of things 2332 clearly however! 2333 2334 */ 2335 2336 sk_peek_offset_fwd(sk, size); 2337 2338 if (UNIXCB(skb).fp) 2339 unix_peek_fds(&scm, skb); 2340 } 2341 err = (flags & MSG_TRUNC) ? skb->len - skip : size; 2342 2343 scm_recv(sock, msg, &scm, flags); 2344 2345 out_free: 2346 skb_free_datagram(sk, skb); 2347 mutex_unlock(&u->iolock); 2348 out: 2349 return err; 2350 } 2351 2352 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, 2353 int flags) 2354 { 2355 struct sock *sk = sock->sk; 2356 2357 #ifdef CONFIG_BPF_SYSCALL 2358 const struct proto *prot = READ_ONCE(sk->sk_prot); 2359 2360 if (prot != &unix_dgram_proto) 2361 return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT, 2362 flags & ~MSG_DONTWAIT, NULL); 2363 #endif 2364 return __unix_dgram_recvmsg(sk, msg, size, flags); 2365 } 2366 2367 static int unix_read_sock(struct sock *sk, read_descriptor_t *desc, 2368 sk_read_actor_t recv_actor) 2369 { 2370 int copied = 0; 2371 2372 while (1) { 2373 struct unix_sock *u = unix_sk(sk); 2374 struct sk_buff *skb; 2375 int used, err; 2376 2377 mutex_lock(&u->iolock); 2378 skb = skb_recv_datagram(sk, 0, 1, &err); 2379 mutex_unlock(&u->iolock); 2380 if (!skb) 2381 return err; 2382 2383 used = recv_actor(desc, skb, 0, skb->len); 2384 if (used <= 0) { 2385 if (!copied) 2386 copied = used; 2387 kfree_skb(skb); 2388 break; 2389 } else if (used <= skb->len) { 2390 copied += used; 2391 } 2392 2393 kfree_skb(skb); 2394 if (!desc->count) 2395 break; 2396 } 2397 2398 return copied; 2399 } 2400 2401 /* 2402 * Sleep until more data has arrived. But check for races.. 2403 */ 2404 static long unix_stream_data_wait(struct sock *sk, long timeo, 2405 struct sk_buff *last, unsigned int last_len, 2406 bool freezable) 2407 { 2408 struct sk_buff *tail; 2409 DEFINE_WAIT(wait); 2410 2411 unix_state_lock(sk); 2412 2413 for (;;) { 2414 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 2415 2416 tail = skb_peek_tail(&sk->sk_receive_queue); 2417 if (tail != last || 2418 (tail && tail->len != last_len) || 2419 sk->sk_err || 2420 (sk->sk_shutdown & RCV_SHUTDOWN) || 2421 signal_pending(current) || 2422 !timeo) 2423 break; 2424 2425 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2426 unix_state_unlock(sk); 2427 if (freezable) 2428 timeo = freezable_schedule_timeout(timeo); 2429 else 2430 timeo = schedule_timeout(timeo); 2431 unix_state_lock(sk); 2432 2433 if (sock_flag(sk, SOCK_DEAD)) 2434 break; 2435 2436 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2437 } 2438 2439 finish_wait(sk_sleep(sk), &wait); 2440 unix_state_unlock(sk); 2441 return timeo; 2442 } 2443 2444 static unsigned int unix_skb_len(const struct sk_buff *skb) 2445 { 2446 return skb->len - UNIXCB(skb).consumed; 2447 } 2448 2449 struct unix_stream_read_state { 2450 int (*recv_actor)(struct sk_buff *, int, int, 2451 struct unix_stream_read_state *); 2452 struct socket *socket; 2453 struct msghdr *msg; 2454 struct pipe_inode_info *pipe; 2455 size_t size; 2456 int flags; 2457 unsigned int splice_flags; 2458 }; 2459 2460 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2461 static int unix_stream_recv_urg(struct unix_stream_read_state *state) 2462 { 2463 struct socket *sock = state->socket; 2464 struct sock *sk = sock->sk; 2465 struct unix_sock *u = unix_sk(sk); 2466 int chunk = 1; 2467 struct sk_buff *oob_skb; 2468 2469 mutex_lock(&u->iolock); 2470 unix_state_lock(sk); 2471 2472 if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) { 2473 unix_state_unlock(sk); 2474 mutex_unlock(&u->iolock); 2475 return -EINVAL; 2476 } 2477 2478 oob_skb = u->oob_skb; 2479 2480 if (!(state->flags & MSG_PEEK)) { 2481 u->oob_skb = NULL; 2482 } 2483 2484 unix_state_unlock(sk); 2485 2486 chunk = state->recv_actor(oob_skb, 0, chunk, state); 2487 2488 if (!(state->flags & MSG_PEEK)) { 2489 UNIXCB(oob_skb).consumed += 1; 2490 kfree_skb(oob_skb); 2491 } 2492 2493 mutex_unlock(&u->iolock); 2494 2495 if (chunk < 0) 2496 return -EFAULT; 2497 2498 state->msg->msg_flags |= MSG_OOB; 2499 return 1; 2500 } 2501 2502 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, 2503 int flags, int copied) 2504 { 2505 struct unix_sock *u = unix_sk(sk); 2506 2507 if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) { 2508 skb_unlink(skb, &sk->sk_receive_queue); 2509 consume_skb(skb); 2510 skb = NULL; 2511 } else { 2512 if (skb == u->oob_skb) { 2513 if (copied) { 2514 skb = NULL; 2515 } else if (sock_flag(sk, SOCK_URGINLINE)) { 2516 if (!(flags & MSG_PEEK)) { 2517 u->oob_skb = NULL; 2518 consume_skb(skb); 2519 } 2520 } else if (!(flags & MSG_PEEK)) { 2521 skb_unlink(skb, &sk->sk_receive_queue); 2522 consume_skb(skb); 2523 skb = skb_peek(&sk->sk_receive_queue); 2524 } 2525 } 2526 } 2527 return skb; 2528 } 2529 #endif 2530 2531 static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc, 2532 sk_read_actor_t recv_actor) 2533 { 2534 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) 2535 return -ENOTCONN; 2536 2537 return unix_read_sock(sk, desc, recv_actor); 2538 } 2539 2540 static int unix_stream_read_generic(struct unix_stream_read_state *state, 2541 bool freezable) 2542 { 2543 struct scm_cookie scm; 2544 struct socket *sock = state->socket; 2545 struct sock *sk = sock->sk; 2546 struct unix_sock *u = unix_sk(sk); 2547 int copied = 0; 2548 int flags = state->flags; 2549 int noblock = flags & MSG_DONTWAIT; 2550 bool check_creds = false; 2551 int target; 2552 int err = 0; 2553 long timeo; 2554 int skip; 2555 size_t size = state->size; 2556 unsigned int last_len; 2557 2558 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) { 2559 err = -EINVAL; 2560 goto out; 2561 } 2562 2563 if (unlikely(flags & MSG_OOB)) { 2564 err = -EOPNOTSUPP; 2565 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2566 err = unix_stream_recv_urg(state); 2567 #endif 2568 goto out; 2569 } 2570 2571 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); 2572 timeo = sock_rcvtimeo(sk, noblock); 2573 2574 memset(&scm, 0, sizeof(scm)); 2575 2576 /* Lock the socket to prevent queue disordering 2577 * while sleeps in memcpy_tomsg 2578 */ 2579 mutex_lock(&u->iolock); 2580 2581 skip = max(sk_peek_offset(sk, flags), 0); 2582 2583 do { 2584 int chunk; 2585 bool drop_skb; 2586 struct sk_buff *skb, *last; 2587 2588 redo: 2589 unix_state_lock(sk); 2590 if (sock_flag(sk, SOCK_DEAD)) { 2591 err = -ECONNRESET; 2592 goto unlock; 2593 } 2594 last = skb = skb_peek(&sk->sk_receive_queue); 2595 last_len = last ? last->len : 0; 2596 2597 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2598 if (skb) { 2599 skb = manage_oob(skb, sk, flags, copied); 2600 if (!skb) { 2601 unix_state_unlock(sk); 2602 if (copied) 2603 break; 2604 goto redo; 2605 } 2606 } 2607 #endif 2608 again: 2609 if (skb == NULL) { 2610 if (copied >= target) 2611 goto unlock; 2612 2613 /* 2614 * POSIX 1003.1g mandates this order. 2615 */ 2616 2617 err = sock_error(sk); 2618 if (err) 2619 goto unlock; 2620 if (sk->sk_shutdown & RCV_SHUTDOWN) 2621 goto unlock; 2622 2623 unix_state_unlock(sk); 2624 if (!timeo) { 2625 err = -EAGAIN; 2626 break; 2627 } 2628 2629 mutex_unlock(&u->iolock); 2630 2631 timeo = unix_stream_data_wait(sk, timeo, last, 2632 last_len, freezable); 2633 2634 if (signal_pending(current)) { 2635 err = sock_intr_errno(timeo); 2636 scm_destroy(&scm); 2637 goto out; 2638 } 2639 2640 mutex_lock(&u->iolock); 2641 goto redo; 2642 unlock: 2643 unix_state_unlock(sk); 2644 break; 2645 } 2646 2647 while (skip >= unix_skb_len(skb)) { 2648 skip -= unix_skb_len(skb); 2649 last = skb; 2650 last_len = skb->len; 2651 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2652 if (!skb) 2653 goto again; 2654 } 2655 2656 unix_state_unlock(sk); 2657 2658 if (check_creds) { 2659 /* Never glue messages from different writers */ 2660 if (!unix_skb_scm_eq(skb, &scm)) 2661 break; 2662 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { 2663 /* Copy credentials */ 2664 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2665 unix_set_secdata(&scm, skb); 2666 check_creds = true; 2667 } 2668 2669 /* Copy address just once */ 2670 if (state->msg && state->msg->msg_name) { 2671 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, 2672 state->msg->msg_name); 2673 unix_copy_addr(state->msg, skb->sk); 2674 sunaddr = NULL; 2675 } 2676 2677 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size); 2678 skb_get(skb); 2679 chunk = state->recv_actor(skb, skip, chunk, state); 2680 drop_skb = !unix_skb_len(skb); 2681 /* skb is only safe to use if !drop_skb */ 2682 consume_skb(skb); 2683 if (chunk < 0) { 2684 if (copied == 0) 2685 copied = -EFAULT; 2686 break; 2687 } 2688 copied += chunk; 2689 size -= chunk; 2690 2691 if (drop_skb) { 2692 /* the skb was touched by a concurrent reader; 2693 * we should not expect anything from this skb 2694 * anymore and assume it invalid - we can be 2695 * sure it was dropped from the socket queue 2696 * 2697 * let's report a short read 2698 */ 2699 err = 0; 2700 break; 2701 } 2702 2703 /* Mark read part of skb as used */ 2704 if (!(flags & MSG_PEEK)) { 2705 UNIXCB(skb).consumed += chunk; 2706 2707 sk_peek_offset_bwd(sk, chunk); 2708 2709 if (UNIXCB(skb).fp) { 2710 scm_stat_del(sk, skb); 2711 unix_detach_fds(&scm, skb); 2712 } 2713 2714 if (unix_skb_len(skb)) 2715 break; 2716 2717 skb_unlink(skb, &sk->sk_receive_queue); 2718 consume_skb(skb); 2719 2720 if (scm.fp) 2721 break; 2722 } else { 2723 /* It is questionable, see note in unix_dgram_recvmsg. 2724 */ 2725 if (UNIXCB(skb).fp) 2726 unix_peek_fds(&scm, skb); 2727 2728 sk_peek_offset_fwd(sk, chunk); 2729 2730 if (UNIXCB(skb).fp) 2731 break; 2732 2733 skip = 0; 2734 last = skb; 2735 last_len = skb->len; 2736 unix_state_lock(sk); 2737 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2738 if (skb) 2739 goto again; 2740 unix_state_unlock(sk); 2741 break; 2742 } 2743 } while (size); 2744 2745 mutex_unlock(&u->iolock); 2746 if (state->msg) 2747 scm_recv(sock, state->msg, &scm, flags); 2748 else 2749 scm_destroy(&scm); 2750 out: 2751 return copied ? : err; 2752 } 2753 2754 static int unix_stream_read_actor(struct sk_buff *skb, 2755 int skip, int chunk, 2756 struct unix_stream_read_state *state) 2757 { 2758 int ret; 2759 2760 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip, 2761 state->msg, chunk); 2762 return ret ?: chunk; 2763 } 2764 2765 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg, 2766 size_t size, int flags) 2767 { 2768 struct unix_stream_read_state state = { 2769 .recv_actor = unix_stream_read_actor, 2770 .socket = sk->sk_socket, 2771 .msg = msg, 2772 .size = size, 2773 .flags = flags 2774 }; 2775 2776 return unix_stream_read_generic(&state, true); 2777 } 2778 2779 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, 2780 size_t size, int flags) 2781 { 2782 struct unix_stream_read_state state = { 2783 .recv_actor = unix_stream_read_actor, 2784 .socket = sock, 2785 .msg = msg, 2786 .size = size, 2787 .flags = flags 2788 }; 2789 2790 #ifdef CONFIG_BPF_SYSCALL 2791 struct sock *sk = sock->sk; 2792 const struct proto *prot = READ_ONCE(sk->sk_prot); 2793 2794 if (prot != &unix_stream_proto) 2795 return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT, 2796 flags & ~MSG_DONTWAIT, NULL); 2797 #endif 2798 return unix_stream_read_generic(&state, true); 2799 } 2800 2801 static int unix_stream_splice_actor(struct sk_buff *skb, 2802 int skip, int chunk, 2803 struct unix_stream_read_state *state) 2804 { 2805 return skb_splice_bits(skb, state->socket->sk, 2806 UNIXCB(skb).consumed + skip, 2807 state->pipe, chunk, state->splice_flags); 2808 } 2809 2810 static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos, 2811 struct pipe_inode_info *pipe, 2812 size_t size, unsigned int flags) 2813 { 2814 struct unix_stream_read_state state = { 2815 .recv_actor = unix_stream_splice_actor, 2816 .socket = sock, 2817 .pipe = pipe, 2818 .size = size, 2819 .splice_flags = flags, 2820 }; 2821 2822 if (unlikely(*ppos)) 2823 return -ESPIPE; 2824 2825 if (sock->file->f_flags & O_NONBLOCK || 2826 flags & SPLICE_F_NONBLOCK) 2827 state.flags = MSG_DONTWAIT; 2828 2829 return unix_stream_read_generic(&state, false); 2830 } 2831 2832 static int unix_shutdown(struct socket *sock, int mode) 2833 { 2834 struct sock *sk = sock->sk; 2835 struct sock *other; 2836 2837 if (mode < SHUT_RD || mode > SHUT_RDWR) 2838 return -EINVAL; 2839 /* This maps: 2840 * SHUT_RD (0) -> RCV_SHUTDOWN (1) 2841 * SHUT_WR (1) -> SEND_SHUTDOWN (2) 2842 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3) 2843 */ 2844 ++mode; 2845 2846 unix_state_lock(sk); 2847 sk->sk_shutdown |= mode; 2848 other = unix_peer(sk); 2849 if (other) 2850 sock_hold(other); 2851 unix_state_unlock(sk); 2852 sk->sk_state_change(sk); 2853 2854 if (other && 2855 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) { 2856 2857 int peer_mode = 0; 2858 const struct proto *prot = READ_ONCE(other->sk_prot); 2859 2860 if (prot->unhash) 2861 prot->unhash(other); 2862 if (mode&RCV_SHUTDOWN) 2863 peer_mode |= SEND_SHUTDOWN; 2864 if (mode&SEND_SHUTDOWN) 2865 peer_mode |= RCV_SHUTDOWN; 2866 unix_state_lock(other); 2867 other->sk_shutdown |= peer_mode; 2868 unix_state_unlock(other); 2869 other->sk_state_change(other); 2870 if (peer_mode == SHUTDOWN_MASK) { 2871 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); 2872 other->sk_state = TCP_CLOSE; 2873 } else if (peer_mode & RCV_SHUTDOWN) { 2874 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); 2875 } 2876 } 2877 if (other) 2878 sock_put(other); 2879 2880 return 0; 2881 } 2882 2883 long unix_inq_len(struct sock *sk) 2884 { 2885 struct sk_buff *skb; 2886 long amount = 0; 2887 2888 if (sk->sk_state == TCP_LISTEN) 2889 return -EINVAL; 2890 2891 spin_lock(&sk->sk_receive_queue.lock); 2892 if (sk->sk_type == SOCK_STREAM || 2893 sk->sk_type == SOCK_SEQPACKET) { 2894 skb_queue_walk(&sk->sk_receive_queue, skb) 2895 amount += unix_skb_len(skb); 2896 } else { 2897 skb = skb_peek(&sk->sk_receive_queue); 2898 if (skb) 2899 amount = skb->len; 2900 } 2901 spin_unlock(&sk->sk_receive_queue.lock); 2902 2903 return amount; 2904 } 2905 EXPORT_SYMBOL_GPL(unix_inq_len); 2906 2907 long unix_outq_len(struct sock *sk) 2908 { 2909 return sk_wmem_alloc_get(sk); 2910 } 2911 EXPORT_SYMBOL_GPL(unix_outq_len); 2912 2913 static int unix_open_file(struct sock *sk) 2914 { 2915 struct path path; 2916 struct file *f; 2917 int fd; 2918 2919 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 2920 return -EPERM; 2921 2922 if (!smp_load_acquire(&unix_sk(sk)->addr)) 2923 return -ENOENT; 2924 2925 path = unix_sk(sk)->path; 2926 if (!path.dentry) 2927 return -ENOENT; 2928 2929 path_get(&path); 2930 2931 fd = get_unused_fd_flags(O_CLOEXEC); 2932 if (fd < 0) 2933 goto out; 2934 2935 f = dentry_open(&path, O_PATH, current_cred()); 2936 if (IS_ERR(f)) { 2937 put_unused_fd(fd); 2938 fd = PTR_ERR(f); 2939 goto out; 2940 } 2941 2942 fd_install(fd, f); 2943 out: 2944 path_put(&path); 2945 2946 return fd; 2947 } 2948 2949 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 2950 { 2951 struct sock *sk = sock->sk; 2952 long amount = 0; 2953 int err; 2954 2955 switch (cmd) { 2956 case SIOCOUTQ: 2957 amount = unix_outq_len(sk); 2958 err = put_user(amount, (int __user *)arg); 2959 break; 2960 case SIOCINQ: 2961 amount = unix_inq_len(sk); 2962 if (amount < 0) 2963 err = amount; 2964 else 2965 err = put_user(amount, (int __user *)arg); 2966 break; 2967 case SIOCUNIXFILE: 2968 err = unix_open_file(sk); 2969 break; 2970 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2971 case SIOCATMARK: 2972 { 2973 struct sk_buff *skb; 2974 struct unix_sock *u = unix_sk(sk); 2975 int answ = 0; 2976 2977 skb = skb_peek(&sk->sk_receive_queue); 2978 if (skb && skb == u->oob_skb) 2979 answ = 1; 2980 err = put_user(answ, (int __user *)arg); 2981 } 2982 break; 2983 #endif 2984 default: 2985 err = -ENOIOCTLCMD; 2986 break; 2987 } 2988 return err; 2989 } 2990 2991 #ifdef CONFIG_COMPAT 2992 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 2993 { 2994 return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg)); 2995 } 2996 #endif 2997 2998 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait) 2999 { 3000 struct sock *sk = sock->sk; 3001 __poll_t mask; 3002 3003 sock_poll_wait(file, sock, wait); 3004 mask = 0; 3005 3006 /* exceptional events? */ 3007 if (sk->sk_err) 3008 mask |= EPOLLERR; 3009 if (sk->sk_shutdown == SHUTDOWN_MASK) 3010 mask |= EPOLLHUP; 3011 if (sk->sk_shutdown & RCV_SHUTDOWN) 3012 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 3013 3014 /* readable? */ 3015 if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) 3016 mask |= EPOLLIN | EPOLLRDNORM; 3017 3018 /* Connection-based need to check for termination and startup */ 3019 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && 3020 sk->sk_state == TCP_CLOSE) 3021 mask |= EPOLLHUP; 3022 3023 /* 3024 * we set writable also when the other side has shut down the 3025 * connection. This prevents stuck sockets. 3026 */ 3027 if (unix_writable(sk)) 3028 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 3029 3030 return mask; 3031 } 3032 3033 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, 3034 poll_table *wait) 3035 { 3036 struct sock *sk = sock->sk, *other; 3037 unsigned int writable; 3038 __poll_t mask; 3039 3040 sock_poll_wait(file, sock, wait); 3041 mask = 0; 3042 3043 /* exceptional events? */ 3044 if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) 3045 mask |= EPOLLERR | 3046 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); 3047 3048 if (sk->sk_shutdown & RCV_SHUTDOWN) 3049 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 3050 if (sk->sk_shutdown == SHUTDOWN_MASK) 3051 mask |= EPOLLHUP; 3052 3053 /* readable? */ 3054 if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) 3055 mask |= EPOLLIN | EPOLLRDNORM; 3056 3057 /* Connection-based need to check for termination and startup */ 3058 if (sk->sk_type == SOCK_SEQPACKET) { 3059 if (sk->sk_state == TCP_CLOSE) 3060 mask |= EPOLLHUP; 3061 /* connection hasn't started yet? */ 3062 if (sk->sk_state == TCP_SYN_SENT) 3063 return mask; 3064 } 3065 3066 /* No write status requested, avoid expensive OUT tests. */ 3067 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT))) 3068 return mask; 3069 3070 writable = unix_writable(sk); 3071 if (writable) { 3072 unix_state_lock(sk); 3073 3074 other = unix_peer(sk); 3075 if (other && unix_peer(other) != sk && 3076 unix_recvq_full(other) && 3077 unix_dgram_peer_wake_me(sk, other)) 3078 writable = 0; 3079 3080 unix_state_unlock(sk); 3081 } 3082 3083 if (writable) 3084 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 3085 else 3086 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 3087 3088 return mask; 3089 } 3090 3091 #ifdef CONFIG_PROC_FS 3092 3093 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1) 3094 3095 #define get_bucket(x) ((x) >> BUCKET_SPACE) 3096 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1)) 3097 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) 3098 3099 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) 3100 { 3101 unsigned long offset = get_offset(*pos); 3102 unsigned long bucket = get_bucket(*pos); 3103 struct sock *sk; 3104 unsigned long count = 0; 3105 3106 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) { 3107 if (sock_net(sk) != seq_file_net(seq)) 3108 continue; 3109 if (++count == offset) 3110 break; 3111 } 3112 3113 return sk; 3114 } 3115 3116 static struct sock *unix_next_socket(struct seq_file *seq, 3117 struct sock *sk, 3118 loff_t *pos) 3119 { 3120 unsigned long bucket; 3121 3122 while (sk > (struct sock *)SEQ_START_TOKEN) { 3123 sk = sk_next(sk); 3124 if (!sk) 3125 goto next_bucket; 3126 if (sock_net(sk) == seq_file_net(seq)) 3127 return sk; 3128 } 3129 3130 do { 3131 sk = unix_from_bucket(seq, pos); 3132 if (sk) 3133 return sk; 3134 3135 next_bucket: 3136 bucket = get_bucket(*pos) + 1; 3137 *pos = set_bucket_offset(bucket, 1); 3138 } while (bucket < ARRAY_SIZE(unix_socket_table)); 3139 3140 return NULL; 3141 } 3142 3143 static void *unix_seq_start(struct seq_file *seq, loff_t *pos) 3144 __acquires(unix_table_lock) 3145 { 3146 spin_lock(&unix_table_lock); 3147 3148 if (!*pos) 3149 return SEQ_START_TOKEN; 3150 3151 if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table)) 3152 return NULL; 3153 3154 return unix_next_socket(seq, NULL, pos); 3155 } 3156 3157 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3158 { 3159 ++*pos; 3160 return unix_next_socket(seq, v, pos); 3161 } 3162 3163 static void unix_seq_stop(struct seq_file *seq, void *v) 3164 __releases(unix_table_lock) 3165 { 3166 spin_unlock(&unix_table_lock); 3167 } 3168 3169 static int unix_seq_show(struct seq_file *seq, void *v) 3170 { 3171 3172 if (v == SEQ_START_TOKEN) 3173 seq_puts(seq, "Num RefCount Protocol Flags Type St " 3174 "Inode Path\n"); 3175 else { 3176 struct sock *s = v; 3177 struct unix_sock *u = unix_sk(s); 3178 unix_state_lock(s); 3179 3180 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu", 3181 s, 3182 refcount_read(&s->sk_refcnt), 3183 0, 3184 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0, 3185 s->sk_type, 3186 s->sk_socket ? 3187 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) : 3188 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING), 3189 sock_i_ino(s)); 3190 3191 if (u->addr) { // under unix_table_lock here 3192 int i, len; 3193 seq_putc(seq, ' '); 3194 3195 i = 0; 3196 len = u->addr->len - sizeof(short); 3197 if (!UNIX_ABSTRACT(s)) 3198 len--; 3199 else { 3200 seq_putc(seq, '@'); 3201 i++; 3202 } 3203 for ( ; i < len; i++) 3204 seq_putc(seq, u->addr->name->sun_path[i] ?: 3205 '@'); 3206 } 3207 unix_state_unlock(s); 3208 seq_putc(seq, '\n'); 3209 } 3210 3211 return 0; 3212 } 3213 3214 static const struct seq_operations unix_seq_ops = { 3215 .start = unix_seq_start, 3216 .next = unix_seq_next, 3217 .stop = unix_seq_stop, 3218 .show = unix_seq_show, 3219 }; 3220 3221 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) 3222 struct bpf_iter__unix { 3223 __bpf_md_ptr(struct bpf_iter_meta *, meta); 3224 __bpf_md_ptr(struct unix_sock *, unix_sk); 3225 uid_t uid __aligned(8); 3226 }; 3227 3228 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, 3229 struct unix_sock *unix_sk, uid_t uid) 3230 { 3231 struct bpf_iter__unix ctx; 3232 3233 meta->seq_num--; /* skip SEQ_START_TOKEN */ 3234 ctx.meta = meta; 3235 ctx.unix_sk = unix_sk; 3236 ctx.uid = uid; 3237 return bpf_iter_run_prog(prog, &ctx); 3238 } 3239 3240 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v) 3241 { 3242 struct bpf_iter_meta meta; 3243 struct bpf_prog *prog; 3244 struct sock *sk = v; 3245 uid_t uid; 3246 3247 if (v == SEQ_START_TOKEN) 3248 return 0; 3249 3250 uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)); 3251 meta.seq = seq; 3252 prog = bpf_iter_get_info(&meta, false); 3253 return unix_prog_seq_show(prog, &meta, v, uid); 3254 } 3255 3256 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v) 3257 { 3258 struct bpf_iter_meta meta; 3259 struct bpf_prog *prog; 3260 3261 if (!v) { 3262 meta.seq = seq; 3263 prog = bpf_iter_get_info(&meta, true); 3264 if (prog) 3265 (void)unix_prog_seq_show(prog, &meta, v, 0); 3266 } 3267 3268 unix_seq_stop(seq, v); 3269 } 3270 3271 static const struct seq_operations bpf_iter_unix_seq_ops = { 3272 .start = unix_seq_start, 3273 .next = unix_seq_next, 3274 .stop = bpf_iter_unix_seq_stop, 3275 .show = bpf_iter_unix_seq_show, 3276 }; 3277 #endif 3278 #endif 3279 3280 static const struct net_proto_family unix_family_ops = { 3281 .family = PF_UNIX, 3282 .create = unix_create, 3283 .owner = THIS_MODULE, 3284 }; 3285 3286 3287 static int __net_init unix_net_init(struct net *net) 3288 { 3289 int error = -ENOMEM; 3290 3291 net->unx.sysctl_max_dgram_qlen = 10; 3292 if (unix_sysctl_register(net)) 3293 goto out; 3294 3295 #ifdef CONFIG_PROC_FS 3296 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops, 3297 sizeof(struct seq_net_private))) { 3298 unix_sysctl_unregister(net); 3299 goto out; 3300 } 3301 #endif 3302 error = 0; 3303 out: 3304 return error; 3305 } 3306 3307 static void __net_exit unix_net_exit(struct net *net) 3308 { 3309 unix_sysctl_unregister(net); 3310 remove_proc_entry("unix", net->proc_net); 3311 } 3312 3313 static struct pernet_operations unix_net_ops = { 3314 .init = unix_net_init, 3315 .exit = unix_net_exit, 3316 }; 3317 3318 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 3319 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta, 3320 struct unix_sock *unix_sk, uid_t uid) 3321 3322 static const struct bpf_iter_seq_info unix_seq_info = { 3323 .seq_ops = &bpf_iter_unix_seq_ops, 3324 .init_seq_private = bpf_iter_init_seq_net, 3325 .fini_seq_private = bpf_iter_fini_seq_net, 3326 .seq_priv_size = sizeof(struct seq_net_private), 3327 }; 3328 3329 static struct bpf_iter_reg unix_reg_info = { 3330 .target = "unix", 3331 .ctx_arg_info_size = 1, 3332 .ctx_arg_info = { 3333 { offsetof(struct bpf_iter__unix, unix_sk), 3334 PTR_TO_BTF_ID_OR_NULL }, 3335 }, 3336 .seq_info = &unix_seq_info, 3337 }; 3338 3339 static void __init bpf_iter_register(void) 3340 { 3341 unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX]; 3342 if (bpf_iter_reg_target(&unix_reg_info)) 3343 pr_warn("Warning: could not register bpf iterator unix\n"); 3344 } 3345 #endif 3346 3347 static int __init af_unix_init(void) 3348 { 3349 int rc = -1; 3350 3351 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb)); 3352 3353 rc = proto_register(&unix_dgram_proto, 1); 3354 if (rc != 0) { 3355 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); 3356 goto out; 3357 } 3358 3359 rc = proto_register(&unix_stream_proto, 1); 3360 if (rc != 0) { 3361 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); 3362 goto out; 3363 } 3364 3365 sock_register(&unix_family_ops); 3366 register_pernet_subsys(&unix_net_ops); 3367 unix_bpf_build_proto(); 3368 3369 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 3370 bpf_iter_register(); 3371 #endif 3372 3373 out: 3374 return rc; 3375 } 3376 3377 static void __exit af_unix_exit(void) 3378 { 3379 sock_unregister(PF_UNIX); 3380 proto_unregister(&unix_dgram_proto); 3381 proto_unregister(&unix_stream_proto); 3382 unregister_pernet_subsys(&unix_net_ops); 3383 } 3384 3385 /* Earlier than device_initcall() so that other drivers invoking 3386 request_module() don't end up in a loop when modprobe tries 3387 to use a UNIX socket. But later than subsys_initcall() because 3388 we depend on stuff initialised there */ 3389 fs_initcall(af_unix_init); 3390 module_exit(af_unix_exit); 3391 3392 MODULE_LICENSE("GPL"); 3393 MODULE_ALIAS_NETPROTO(PF_UNIX); 3394