1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * NET4: Implementation of BSD Unix domain sockets. 4 * 5 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk> 6 * 7 * Fixes: 8 * Linus Torvalds : Assorted bug cures. 9 * Niibe Yutaka : async I/O support. 10 * Carsten Paeth : PF_UNIX check, address fixes. 11 * Alan Cox : Limit size of allocated blocks. 12 * Alan Cox : Fixed the stupid socketpair bug. 13 * Alan Cox : BSD compatibility fine tuning. 14 * Alan Cox : Fixed a bug in connect when interrupted. 15 * Alan Cox : Sorted out a proper draft version of 16 * file descriptor passing hacked up from 17 * Mike Shaver's work. 18 * Marty Leisner : Fixes to fd passing 19 * Nick Nevin : recvmsg bugfix. 20 * Alan Cox : Started proper garbage collector 21 * Heiko EiBfeldt : Missing verify_area check 22 * Alan Cox : Started POSIXisms 23 * Andreas Schwab : Replace inode by dentry for proper 24 * reference counting 25 * Kirk Petersen : Made this a module 26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm. 27 * Lots of bug fixes. 28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces 29 * by above two patches. 30 * Andrea Arcangeli : If possible we block in connect(2) 31 * if the max backlog of the listen socket 32 * is been reached. This won't break 33 * old apps and it will avoid huge amount 34 * of socks hashed (this for unix_gc() 35 * performances reasons). 36 * Security fix that limits the max 37 * number of socks to 2*max_files and 38 * the number of skb queueable in the 39 * dgram receiver. 40 * Artur Skawina : Hash function optimizations 41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8) 42 * Malcolm Beattie : Set peercred for socketpair 43 * Michal Ostrowski : Module initialization cleanup. 44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT, 45 * the core infrastructure is doing that 46 * for all net proto families now (2.5.69+) 47 * 48 * Known differences from reference BSD that was tested: 49 * 50 * [TO FIX] 51 * ECONNREFUSED is not returned from one end of a connected() socket to the 52 * other the moment one end closes. 53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark 54 * and a fake inode identifier (nor the BSD first socket fstat twice bug). 55 * [NOT TO FIX] 56 * accept() returns a path name even if the connecting socket has closed 57 * in the meantime (BSD loses the path and gives up). 58 * accept() returns 0 length path for an unbound connector. BSD returns 16 59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??) 60 * socketpair(...SOCK_RAW..) doesn't panic the kernel. 61 * BSD af_unix apparently has connect forgetting to block properly. 62 * (need to check this with the POSIX spec in detail) 63 * 64 * Differences from 2.0.0-11-... (ANK) 65 * Bug fixes and improvements. 66 * - client shutdown killed server socket. 67 * - removed all useless cli/sti pairs. 68 * 69 * Semantic changes/extensions. 70 * - generic control message passing. 71 * - SCM_CREDENTIALS control message. 72 * - "Abstract" (not FS based) socket bindings. 73 * Abstract names are sequences of bytes (not zero terminated) 74 * started by 0, so that this name space does not intersect 75 * with BSD names. 76 */ 77 78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 79 80 #include <linux/module.h> 81 #include <linux/kernel.h> 82 #include <linux/signal.h> 83 #include <linux/sched/signal.h> 84 #include <linux/errno.h> 85 #include <linux/string.h> 86 #include <linux/stat.h> 87 #include <linux/dcache.h> 88 #include <linux/namei.h> 89 #include <linux/socket.h> 90 #include <linux/un.h> 91 #include <linux/fcntl.h> 92 #include <linux/filter.h> 93 #include <linux/termios.h> 94 #include <linux/sockios.h> 95 #include <linux/net.h> 96 #include <linux/in.h> 97 #include <linux/fs.h> 98 #include <linux/slab.h> 99 #include <linux/uaccess.h> 100 #include <linux/skbuff.h> 101 #include <linux/netdevice.h> 102 #include <net/net_namespace.h> 103 #include <net/sock.h> 104 #include <net/tcp_states.h> 105 #include <net/af_unix.h> 106 #include <linux/proc_fs.h> 107 #include <linux/seq_file.h> 108 #include <net/scm.h> 109 #include <linux/init.h> 110 #include <linux/poll.h> 111 #include <linux/rtnetlink.h> 112 #include <linux/mount.h> 113 #include <net/checksum.h> 114 #include <linux/security.h> 115 #include <linux/freezer.h> 116 #include <linux/file.h> 117 #include <linux/btf_ids.h> 118 119 #include "scm.h" 120 121 static atomic_long_t unix_nr_socks; 122 123 /* SMP locking strategy: 124 * hash table is protected with spinlock. 125 * each socket state is protected by separate spinlock. 126 */ 127 128 static unsigned int unix_unbound_hash(struct sock *sk) 129 { 130 unsigned long hash = (unsigned long)sk; 131 132 hash ^= hash >> 16; 133 hash ^= hash >> 8; 134 hash ^= sk->sk_type; 135 136 return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD); 137 } 138 139 static unsigned int unix_bsd_hash(struct inode *i) 140 { 141 return i->i_ino & UNIX_HASH_MOD; 142 } 143 144 static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr, 145 int addr_len, int type) 146 { 147 __wsum csum = csum_partial(sunaddr, addr_len, 0); 148 unsigned int hash; 149 150 hash = (__force unsigned int)csum_fold(csum); 151 hash ^= hash >> 8; 152 hash ^= type; 153 154 return hash & UNIX_HASH_MOD; 155 } 156 157 static void unix_table_double_lock(struct net *net, 158 unsigned int hash1, unsigned int hash2) 159 { 160 /* hash1 and hash2 is never the same because 161 * one is between 0 and UNIX_HASH_MOD, and 162 * another is between UNIX_HASH_MOD + 1 and UNIX_HASH_SIZE - 1. 163 */ 164 if (hash1 > hash2) 165 swap(hash1, hash2); 166 167 spin_lock(&net->unx.table.locks[hash1]); 168 spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING); 169 } 170 171 static void unix_table_double_unlock(struct net *net, 172 unsigned int hash1, unsigned int hash2) 173 { 174 spin_unlock(&net->unx.table.locks[hash1]); 175 spin_unlock(&net->unx.table.locks[hash2]); 176 } 177 178 #ifdef CONFIG_SECURITY_NETWORK 179 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 180 { 181 UNIXCB(skb).secid = scm->secid; 182 } 183 184 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 185 { 186 scm->secid = UNIXCB(skb).secid; 187 } 188 189 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) 190 { 191 return (scm->secid == UNIXCB(skb).secid); 192 } 193 #else 194 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 195 { } 196 197 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 198 { } 199 200 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) 201 { 202 return true; 203 } 204 #endif /* CONFIG_SECURITY_NETWORK */ 205 206 #define unix_peer(sk) (unix_sk(sk)->peer) 207 208 static inline int unix_our_peer(struct sock *sk, struct sock *osk) 209 { 210 return unix_peer(osk) == sk; 211 } 212 213 static inline int unix_may_send(struct sock *sk, struct sock *osk) 214 { 215 return unix_peer(osk) == NULL || unix_our_peer(sk, osk); 216 } 217 218 static inline int unix_recvq_full(const struct sock *sk) 219 { 220 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; 221 } 222 223 static inline int unix_recvq_full_lockless(const struct sock *sk) 224 { 225 return skb_queue_len_lockless(&sk->sk_receive_queue) > 226 READ_ONCE(sk->sk_max_ack_backlog); 227 } 228 229 struct sock *unix_peer_get(struct sock *s) 230 { 231 struct sock *peer; 232 233 unix_state_lock(s); 234 peer = unix_peer(s); 235 if (peer) 236 sock_hold(peer); 237 unix_state_unlock(s); 238 return peer; 239 } 240 EXPORT_SYMBOL_GPL(unix_peer_get); 241 242 static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr, 243 int addr_len) 244 { 245 struct unix_address *addr; 246 247 addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL); 248 if (!addr) 249 return NULL; 250 251 refcount_set(&addr->refcnt, 1); 252 addr->len = addr_len; 253 memcpy(addr->name, sunaddr, addr_len); 254 255 return addr; 256 } 257 258 static inline void unix_release_addr(struct unix_address *addr) 259 { 260 if (refcount_dec_and_test(&addr->refcnt)) 261 kfree(addr); 262 } 263 264 /* 265 * Check unix socket name: 266 * - should be not zero length. 267 * - if started by not zero, should be NULL terminated (FS object) 268 * - if started by zero, it is abstract name. 269 */ 270 271 static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len) 272 { 273 if (addr_len <= offsetof(struct sockaddr_un, sun_path) || 274 addr_len > sizeof(*sunaddr)) 275 return -EINVAL; 276 277 if (sunaddr->sun_family != AF_UNIX) 278 return -EINVAL; 279 280 return 0; 281 } 282 283 static void unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len) 284 { 285 /* This may look like an off by one error but it is a bit more 286 * subtle. 108 is the longest valid AF_UNIX path for a binding. 287 * sun_path[108] doesn't as such exist. However in kernel space 288 * we are guaranteed that it is a valid memory location in our 289 * kernel address buffer because syscall functions always pass 290 * a pointer of struct sockaddr_storage which has a bigger buffer 291 * than 108. 292 */ 293 ((char *)sunaddr)[addr_len] = 0; 294 } 295 296 static void __unix_remove_socket(struct sock *sk) 297 { 298 sk_del_node_init(sk); 299 } 300 301 static void __unix_insert_socket(struct net *net, struct sock *sk) 302 { 303 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk)); 304 sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]); 305 } 306 307 static void __unix_set_addr_hash(struct net *net, struct sock *sk, 308 struct unix_address *addr, unsigned int hash) 309 { 310 __unix_remove_socket(sk); 311 smp_store_release(&unix_sk(sk)->addr, addr); 312 313 sk->sk_hash = hash; 314 __unix_insert_socket(net, sk); 315 } 316 317 static void unix_remove_socket(struct net *net, struct sock *sk) 318 { 319 spin_lock(&net->unx.table.locks[sk->sk_hash]); 320 __unix_remove_socket(sk); 321 spin_unlock(&net->unx.table.locks[sk->sk_hash]); 322 } 323 324 static void unix_insert_unbound_socket(struct net *net, struct sock *sk) 325 { 326 spin_lock(&net->unx.table.locks[sk->sk_hash]); 327 __unix_insert_socket(net, sk); 328 spin_unlock(&net->unx.table.locks[sk->sk_hash]); 329 } 330 331 static struct sock *__unix_find_socket_byname(struct net *net, 332 struct sockaddr_un *sunname, 333 int len, unsigned int hash) 334 { 335 struct sock *s; 336 337 sk_for_each(s, &net->unx.table.buckets[hash]) { 338 struct unix_sock *u = unix_sk(s); 339 340 if (u->addr->len == len && 341 !memcmp(u->addr->name, sunname, len)) 342 return s; 343 } 344 return NULL; 345 } 346 347 static inline struct sock *unix_find_socket_byname(struct net *net, 348 struct sockaddr_un *sunname, 349 int len, unsigned int hash) 350 { 351 struct sock *s; 352 353 spin_lock(&net->unx.table.locks[hash]); 354 s = __unix_find_socket_byname(net, sunname, len, hash); 355 if (s) 356 sock_hold(s); 357 spin_unlock(&net->unx.table.locks[hash]); 358 return s; 359 } 360 361 static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i) 362 { 363 unsigned int hash = unix_bsd_hash(i); 364 struct sock *s; 365 366 spin_lock(&net->unx.table.locks[hash]); 367 sk_for_each(s, &net->unx.table.buckets[hash]) { 368 struct dentry *dentry = unix_sk(s)->path.dentry; 369 370 if (dentry && d_backing_inode(dentry) == i) { 371 sock_hold(s); 372 spin_unlock(&net->unx.table.locks[hash]); 373 return s; 374 } 375 } 376 spin_unlock(&net->unx.table.locks[hash]); 377 return NULL; 378 } 379 380 /* Support code for asymmetrically connected dgram sockets 381 * 382 * If a datagram socket is connected to a socket not itself connected 383 * to the first socket (eg, /dev/log), clients may only enqueue more 384 * messages if the present receive queue of the server socket is not 385 * "too large". This means there's a second writeability condition 386 * poll and sendmsg need to test. The dgram recv code will do a wake 387 * up on the peer_wait wait queue of a socket upon reception of a 388 * datagram which needs to be propagated to sleeping would-be writers 389 * since these might not have sent anything so far. This can't be 390 * accomplished via poll_wait because the lifetime of the server 391 * socket might be less than that of its clients if these break their 392 * association with it or if the server socket is closed while clients 393 * are still connected to it and there's no way to inform "a polling 394 * implementation" that it should let go of a certain wait queue 395 * 396 * In order to propagate a wake up, a wait_queue_entry_t of the client 397 * socket is enqueued on the peer_wait queue of the server socket 398 * whose wake function does a wake_up on the ordinary client socket 399 * wait queue. This connection is established whenever a write (or 400 * poll for write) hit the flow control condition and broken when the 401 * association to the server socket is dissolved or after a wake up 402 * was relayed. 403 */ 404 405 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags, 406 void *key) 407 { 408 struct unix_sock *u; 409 wait_queue_head_t *u_sleep; 410 411 u = container_of(q, struct unix_sock, peer_wake); 412 413 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait, 414 q); 415 u->peer_wake.private = NULL; 416 417 /* relaying can only happen while the wq still exists */ 418 u_sleep = sk_sleep(&u->sk); 419 if (u_sleep) 420 wake_up_interruptible_poll(u_sleep, key_to_poll(key)); 421 422 return 0; 423 } 424 425 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other) 426 { 427 struct unix_sock *u, *u_other; 428 int rc; 429 430 u = unix_sk(sk); 431 u_other = unix_sk(other); 432 rc = 0; 433 spin_lock(&u_other->peer_wait.lock); 434 435 if (!u->peer_wake.private) { 436 u->peer_wake.private = other; 437 __add_wait_queue(&u_other->peer_wait, &u->peer_wake); 438 439 rc = 1; 440 } 441 442 spin_unlock(&u_other->peer_wait.lock); 443 return rc; 444 } 445 446 static void unix_dgram_peer_wake_disconnect(struct sock *sk, 447 struct sock *other) 448 { 449 struct unix_sock *u, *u_other; 450 451 u = unix_sk(sk); 452 u_other = unix_sk(other); 453 spin_lock(&u_other->peer_wait.lock); 454 455 if (u->peer_wake.private == other) { 456 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake); 457 u->peer_wake.private = NULL; 458 } 459 460 spin_unlock(&u_other->peer_wait.lock); 461 } 462 463 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk, 464 struct sock *other) 465 { 466 unix_dgram_peer_wake_disconnect(sk, other); 467 wake_up_interruptible_poll(sk_sleep(sk), 468 EPOLLOUT | 469 EPOLLWRNORM | 470 EPOLLWRBAND); 471 } 472 473 /* preconditions: 474 * - unix_peer(sk) == other 475 * - association is stable 476 */ 477 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) 478 { 479 int connected; 480 481 connected = unix_dgram_peer_wake_connect(sk, other); 482 483 /* If other is SOCK_DEAD, we want to make sure we signal 484 * POLLOUT, such that a subsequent write() can get a 485 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs 486 * to other and its full, we will hang waiting for POLLOUT. 487 */ 488 if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD)) 489 return 1; 490 491 if (connected) 492 unix_dgram_peer_wake_disconnect(sk, other); 493 494 return 0; 495 } 496 497 static int unix_writable(const struct sock *sk) 498 { 499 return sk->sk_state != TCP_LISTEN && 500 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; 501 } 502 503 static void unix_write_space(struct sock *sk) 504 { 505 struct socket_wq *wq; 506 507 rcu_read_lock(); 508 if (unix_writable(sk)) { 509 wq = rcu_dereference(sk->sk_wq); 510 if (skwq_has_sleeper(wq)) 511 wake_up_interruptible_sync_poll(&wq->wait, 512 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); 513 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 514 } 515 rcu_read_unlock(); 516 } 517 518 /* When dgram socket disconnects (or changes its peer), we clear its receive 519 * queue of packets arrived from previous peer. First, it allows to do 520 * flow control based only on wmem_alloc; second, sk connected to peer 521 * may receive messages only from that peer. */ 522 static void unix_dgram_disconnected(struct sock *sk, struct sock *other) 523 { 524 if (!skb_queue_empty(&sk->sk_receive_queue)) { 525 skb_queue_purge(&sk->sk_receive_queue); 526 wake_up_interruptible_all(&unix_sk(sk)->peer_wait); 527 528 /* If one link of bidirectional dgram pipe is disconnected, 529 * we signal error. Messages are lost. Do not make this, 530 * when peer was not connected to us. 531 */ 532 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) { 533 other->sk_err = ECONNRESET; 534 sk_error_report(other); 535 } 536 } 537 other->sk_state = TCP_CLOSE; 538 } 539 540 static void unix_sock_destructor(struct sock *sk) 541 { 542 struct unix_sock *u = unix_sk(sk); 543 544 skb_queue_purge(&sk->sk_receive_queue); 545 546 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 547 if (u->oob_skb) { 548 kfree_skb(u->oob_skb); 549 u->oob_skb = NULL; 550 } 551 #endif 552 DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc)); 553 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk)); 554 DEBUG_NET_WARN_ON_ONCE(sk->sk_socket); 555 if (!sock_flag(sk, SOCK_DEAD)) { 556 pr_info("Attempt to release alive unix socket: %p\n", sk); 557 return; 558 } 559 560 if (u->addr) 561 unix_release_addr(u->addr); 562 563 atomic_long_dec(&unix_nr_socks); 564 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 565 #ifdef UNIX_REFCNT_DEBUG 566 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk, 567 atomic_long_read(&unix_nr_socks)); 568 #endif 569 } 570 571 static void unix_release_sock(struct sock *sk, int embrion) 572 { 573 struct unix_sock *u = unix_sk(sk); 574 struct sock *skpair; 575 struct sk_buff *skb; 576 struct path path; 577 int state; 578 579 unix_remove_socket(sock_net(sk), sk); 580 581 /* Clear state */ 582 unix_state_lock(sk); 583 sock_orphan(sk); 584 sk->sk_shutdown = SHUTDOWN_MASK; 585 path = u->path; 586 u->path.dentry = NULL; 587 u->path.mnt = NULL; 588 state = sk->sk_state; 589 sk->sk_state = TCP_CLOSE; 590 591 skpair = unix_peer(sk); 592 unix_peer(sk) = NULL; 593 594 unix_state_unlock(sk); 595 596 wake_up_interruptible_all(&u->peer_wait); 597 598 if (skpair != NULL) { 599 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { 600 unix_state_lock(skpair); 601 /* No more writes */ 602 skpair->sk_shutdown = SHUTDOWN_MASK; 603 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) 604 skpair->sk_err = ECONNRESET; 605 unix_state_unlock(skpair); 606 skpair->sk_state_change(skpair); 607 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); 608 } 609 610 unix_dgram_peer_wake_disconnect(sk, skpair); 611 sock_put(skpair); /* It may now die */ 612 } 613 614 /* Try to flush out this socket. Throw out buffers at least */ 615 616 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { 617 if (state == TCP_LISTEN) 618 unix_release_sock(skb->sk, 1); 619 /* passed fds are erased in the kfree_skb hook */ 620 UNIXCB(skb).consumed = skb->len; 621 kfree_skb(skb); 622 } 623 624 if (path.dentry) 625 path_put(&path); 626 627 sock_put(sk); 628 629 /* ---- Socket is dead now and most probably destroyed ---- */ 630 631 /* 632 * Fixme: BSD difference: In BSD all sockets connected to us get 633 * ECONNRESET and we die on the spot. In Linux we behave 634 * like files and pipes do and wait for the last 635 * dereference. 636 * 637 * Can't we simply set sock->err? 638 * 639 * What the above comment does talk about? --ANK(980817) 640 */ 641 642 if (unix_tot_inflight) 643 unix_gc(); /* Garbage collect fds */ 644 } 645 646 static void init_peercred(struct sock *sk) 647 { 648 const struct cred *old_cred; 649 struct pid *old_pid; 650 651 spin_lock(&sk->sk_peer_lock); 652 old_pid = sk->sk_peer_pid; 653 old_cred = sk->sk_peer_cred; 654 sk->sk_peer_pid = get_pid(task_tgid(current)); 655 sk->sk_peer_cred = get_current_cred(); 656 spin_unlock(&sk->sk_peer_lock); 657 658 put_pid(old_pid); 659 put_cred(old_cred); 660 } 661 662 static void copy_peercred(struct sock *sk, struct sock *peersk) 663 { 664 const struct cred *old_cred; 665 struct pid *old_pid; 666 667 if (sk < peersk) { 668 spin_lock(&sk->sk_peer_lock); 669 spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING); 670 } else { 671 spin_lock(&peersk->sk_peer_lock); 672 spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING); 673 } 674 old_pid = sk->sk_peer_pid; 675 old_cred = sk->sk_peer_cred; 676 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); 677 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); 678 679 spin_unlock(&sk->sk_peer_lock); 680 spin_unlock(&peersk->sk_peer_lock); 681 682 put_pid(old_pid); 683 put_cred(old_cred); 684 } 685 686 static int unix_listen(struct socket *sock, int backlog) 687 { 688 int err; 689 struct sock *sk = sock->sk; 690 struct unix_sock *u = unix_sk(sk); 691 692 err = -EOPNOTSUPP; 693 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 694 goto out; /* Only stream/seqpacket sockets accept */ 695 err = -EINVAL; 696 if (!u->addr) 697 goto out; /* No listens on an unbound socket */ 698 unix_state_lock(sk); 699 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) 700 goto out_unlock; 701 if (backlog > sk->sk_max_ack_backlog) 702 wake_up_interruptible_all(&u->peer_wait); 703 sk->sk_max_ack_backlog = backlog; 704 sk->sk_state = TCP_LISTEN; 705 /* set credentials so connect can copy them */ 706 init_peercred(sk); 707 err = 0; 708 709 out_unlock: 710 unix_state_unlock(sk); 711 out: 712 return err; 713 } 714 715 static int unix_release(struct socket *); 716 static int unix_bind(struct socket *, struct sockaddr *, int); 717 static int unix_stream_connect(struct socket *, struct sockaddr *, 718 int addr_len, int flags); 719 static int unix_socketpair(struct socket *, struct socket *); 720 static int unix_accept(struct socket *, struct socket *, int, bool); 721 static int unix_getname(struct socket *, struct sockaddr *, int); 722 static __poll_t unix_poll(struct file *, struct socket *, poll_table *); 723 static __poll_t unix_dgram_poll(struct file *, struct socket *, 724 poll_table *); 725 static int unix_ioctl(struct socket *, unsigned int, unsigned long); 726 #ifdef CONFIG_COMPAT 727 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); 728 #endif 729 static int unix_shutdown(struct socket *, int); 730 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); 731 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int); 732 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset, 733 size_t size, int flags); 734 static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos, 735 struct pipe_inode_info *, size_t size, 736 unsigned int flags); 737 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t); 738 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); 739 static int unix_read_sock(struct sock *sk, read_descriptor_t *desc, 740 sk_read_actor_t recv_actor); 741 static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc, 742 sk_read_actor_t recv_actor); 743 static int unix_dgram_connect(struct socket *, struct sockaddr *, 744 int, int); 745 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); 746 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t, 747 int); 748 749 static int unix_set_peek_off(struct sock *sk, int val) 750 { 751 struct unix_sock *u = unix_sk(sk); 752 753 if (mutex_lock_interruptible(&u->iolock)) 754 return -EINTR; 755 756 sk->sk_peek_off = val; 757 mutex_unlock(&u->iolock); 758 759 return 0; 760 } 761 762 #ifdef CONFIG_PROC_FS 763 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock) 764 { 765 struct sock *sk = sock->sk; 766 struct unix_sock *u; 767 768 if (sk) { 769 u = unix_sk(sock->sk); 770 seq_printf(m, "scm_fds: %u\n", 771 atomic_read(&u->scm_stat.nr_fds)); 772 } 773 } 774 #else 775 #define unix_show_fdinfo NULL 776 #endif 777 778 static const struct proto_ops unix_stream_ops = { 779 .family = PF_UNIX, 780 .owner = THIS_MODULE, 781 .release = unix_release, 782 .bind = unix_bind, 783 .connect = unix_stream_connect, 784 .socketpair = unix_socketpair, 785 .accept = unix_accept, 786 .getname = unix_getname, 787 .poll = unix_poll, 788 .ioctl = unix_ioctl, 789 #ifdef CONFIG_COMPAT 790 .compat_ioctl = unix_compat_ioctl, 791 #endif 792 .listen = unix_listen, 793 .shutdown = unix_shutdown, 794 .sendmsg = unix_stream_sendmsg, 795 .recvmsg = unix_stream_recvmsg, 796 .read_sock = unix_stream_read_sock, 797 .mmap = sock_no_mmap, 798 .sendpage = unix_stream_sendpage, 799 .splice_read = unix_stream_splice_read, 800 .set_peek_off = unix_set_peek_off, 801 .show_fdinfo = unix_show_fdinfo, 802 }; 803 804 static const struct proto_ops unix_dgram_ops = { 805 .family = PF_UNIX, 806 .owner = THIS_MODULE, 807 .release = unix_release, 808 .bind = unix_bind, 809 .connect = unix_dgram_connect, 810 .socketpair = unix_socketpair, 811 .accept = sock_no_accept, 812 .getname = unix_getname, 813 .poll = unix_dgram_poll, 814 .ioctl = unix_ioctl, 815 #ifdef CONFIG_COMPAT 816 .compat_ioctl = unix_compat_ioctl, 817 #endif 818 .listen = sock_no_listen, 819 .shutdown = unix_shutdown, 820 .sendmsg = unix_dgram_sendmsg, 821 .read_sock = unix_read_sock, 822 .recvmsg = unix_dgram_recvmsg, 823 .mmap = sock_no_mmap, 824 .sendpage = sock_no_sendpage, 825 .set_peek_off = unix_set_peek_off, 826 .show_fdinfo = unix_show_fdinfo, 827 }; 828 829 static const struct proto_ops unix_seqpacket_ops = { 830 .family = PF_UNIX, 831 .owner = THIS_MODULE, 832 .release = unix_release, 833 .bind = unix_bind, 834 .connect = unix_stream_connect, 835 .socketpair = unix_socketpair, 836 .accept = unix_accept, 837 .getname = unix_getname, 838 .poll = unix_dgram_poll, 839 .ioctl = unix_ioctl, 840 #ifdef CONFIG_COMPAT 841 .compat_ioctl = unix_compat_ioctl, 842 #endif 843 .listen = unix_listen, 844 .shutdown = unix_shutdown, 845 .sendmsg = unix_seqpacket_sendmsg, 846 .recvmsg = unix_seqpacket_recvmsg, 847 .mmap = sock_no_mmap, 848 .sendpage = sock_no_sendpage, 849 .set_peek_off = unix_set_peek_off, 850 .show_fdinfo = unix_show_fdinfo, 851 }; 852 853 static void unix_close(struct sock *sk, long timeout) 854 { 855 /* Nothing to do here, unix socket does not need a ->close(). 856 * This is merely for sockmap. 857 */ 858 } 859 860 static void unix_unhash(struct sock *sk) 861 { 862 /* Nothing to do here, unix socket does not need a ->unhash(). 863 * This is merely for sockmap. 864 */ 865 } 866 867 struct proto unix_dgram_proto = { 868 .name = "UNIX", 869 .owner = THIS_MODULE, 870 .obj_size = sizeof(struct unix_sock), 871 .close = unix_close, 872 #ifdef CONFIG_BPF_SYSCALL 873 .psock_update_sk_prot = unix_dgram_bpf_update_proto, 874 #endif 875 }; 876 877 struct proto unix_stream_proto = { 878 .name = "UNIX-STREAM", 879 .owner = THIS_MODULE, 880 .obj_size = sizeof(struct unix_sock), 881 .close = unix_close, 882 .unhash = unix_unhash, 883 #ifdef CONFIG_BPF_SYSCALL 884 .psock_update_sk_prot = unix_stream_bpf_update_proto, 885 #endif 886 }; 887 888 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type) 889 { 890 struct unix_sock *u; 891 struct sock *sk; 892 int err; 893 894 atomic_long_inc(&unix_nr_socks); 895 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) { 896 err = -ENFILE; 897 goto err; 898 } 899 900 if (type == SOCK_STREAM) 901 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern); 902 else /*dgram and seqpacket */ 903 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern); 904 905 if (!sk) { 906 err = -ENOMEM; 907 goto err; 908 } 909 910 sock_init_data(sock, sk); 911 912 sk->sk_hash = unix_unbound_hash(sk); 913 sk->sk_allocation = GFP_KERNEL_ACCOUNT; 914 sk->sk_write_space = unix_write_space; 915 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; 916 sk->sk_destruct = unix_sock_destructor; 917 u = unix_sk(sk); 918 u->path.dentry = NULL; 919 u->path.mnt = NULL; 920 spin_lock_init(&u->lock); 921 atomic_long_set(&u->inflight, 0); 922 INIT_LIST_HEAD(&u->link); 923 mutex_init(&u->iolock); /* single task reading lock */ 924 mutex_init(&u->bindlock); /* single task binding lock */ 925 init_waitqueue_head(&u->peer_wait); 926 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); 927 memset(&u->scm_stat, 0, sizeof(struct scm_stat)); 928 unix_insert_unbound_socket(net, sk); 929 930 sock_prot_inuse_add(net, sk->sk_prot, 1); 931 932 return sk; 933 934 err: 935 atomic_long_dec(&unix_nr_socks); 936 return ERR_PTR(err); 937 } 938 939 static int unix_create(struct net *net, struct socket *sock, int protocol, 940 int kern) 941 { 942 struct sock *sk; 943 944 if (protocol && protocol != PF_UNIX) 945 return -EPROTONOSUPPORT; 946 947 sock->state = SS_UNCONNECTED; 948 949 switch (sock->type) { 950 case SOCK_STREAM: 951 sock->ops = &unix_stream_ops; 952 break; 953 /* 954 * Believe it or not BSD has AF_UNIX, SOCK_RAW though 955 * nothing uses it. 956 */ 957 case SOCK_RAW: 958 sock->type = SOCK_DGRAM; 959 fallthrough; 960 case SOCK_DGRAM: 961 sock->ops = &unix_dgram_ops; 962 break; 963 case SOCK_SEQPACKET: 964 sock->ops = &unix_seqpacket_ops; 965 break; 966 default: 967 return -ESOCKTNOSUPPORT; 968 } 969 970 sk = unix_create1(net, sock, kern, sock->type); 971 if (IS_ERR(sk)) 972 return PTR_ERR(sk); 973 974 return 0; 975 } 976 977 static int unix_release(struct socket *sock) 978 { 979 struct sock *sk = sock->sk; 980 981 if (!sk) 982 return 0; 983 984 sk->sk_prot->close(sk, 0); 985 unix_release_sock(sk, 0); 986 sock->sk = NULL; 987 988 return 0; 989 } 990 991 static struct sock *unix_find_bsd(struct net *net, struct sockaddr_un *sunaddr, 992 int addr_len, int type) 993 { 994 struct inode *inode; 995 struct path path; 996 struct sock *sk; 997 int err; 998 999 unix_mkname_bsd(sunaddr, addr_len); 1000 err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path); 1001 if (err) 1002 goto fail; 1003 1004 err = path_permission(&path, MAY_WRITE); 1005 if (err) 1006 goto path_put; 1007 1008 err = -ECONNREFUSED; 1009 inode = d_backing_inode(path.dentry); 1010 if (!S_ISSOCK(inode->i_mode)) 1011 goto path_put; 1012 1013 sk = unix_find_socket_byinode(net, inode); 1014 if (!sk) 1015 goto path_put; 1016 1017 err = -EPROTOTYPE; 1018 if (sk->sk_type == type) 1019 touch_atime(&path); 1020 else 1021 goto sock_put; 1022 1023 path_put(&path); 1024 1025 return sk; 1026 1027 sock_put: 1028 sock_put(sk); 1029 path_put: 1030 path_put(&path); 1031 fail: 1032 return ERR_PTR(err); 1033 } 1034 1035 static struct sock *unix_find_abstract(struct net *net, 1036 struct sockaddr_un *sunaddr, 1037 int addr_len, int type) 1038 { 1039 unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type); 1040 struct dentry *dentry; 1041 struct sock *sk; 1042 1043 sk = unix_find_socket_byname(net, sunaddr, addr_len, hash); 1044 if (!sk) 1045 return ERR_PTR(-ECONNREFUSED); 1046 1047 dentry = unix_sk(sk)->path.dentry; 1048 if (dentry) 1049 touch_atime(&unix_sk(sk)->path); 1050 1051 return sk; 1052 } 1053 1054 static struct sock *unix_find_other(struct net *net, 1055 struct sockaddr_un *sunaddr, 1056 int addr_len, int type) 1057 { 1058 struct sock *sk; 1059 1060 if (sunaddr->sun_path[0]) 1061 sk = unix_find_bsd(net, sunaddr, addr_len, type); 1062 else 1063 sk = unix_find_abstract(net, sunaddr, addr_len, type); 1064 1065 return sk; 1066 } 1067 1068 static int unix_autobind(struct sock *sk) 1069 { 1070 unsigned int new_hash, old_hash = sk->sk_hash; 1071 struct unix_sock *u = unix_sk(sk); 1072 struct net *net = sock_net(sk); 1073 struct unix_address *addr; 1074 u32 lastnum, ordernum; 1075 int err; 1076 1077 err = mutex_lock_interruptible(&u->bindlock); 1078 if (err) 1079 return err; 1080 1081 if (u->addr) 1082 goto out; 1083 1084 err = -ENOMEM; 1085 addr = kzalloc(sizeof(*addr) + 1086 offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL); 1087 if (!addr) 1088 goto out; 1089 1090 addr->len = offsetof(struct sockaddr_un, sun_path) + 6; 1091 addr->name->sun_family = AF_UNIX; 1092 refcount_set(&addr->refcnt, 1); 1093 1094 ordernum = prandom_u32(); 1095 lastnum = ordernum & 0xFFFFF; 1096 retry: 1097 ordernum = (ordernum + 1) & 0xFFFFF; 1098 sprintf(addr->name->sun_path + 1, "%05x", ordernum); 1099 1100 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); 1101 unix_table_double_lock(net, old_hash, new_hash); 1102 1103 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) { 1104 unix_table_double_unlock(net, old_hash, new_hash); 1105 1106 /* __unix_find_socket_byname() may take long time if many names 1107 * are already in use. 1108 */ 1109 cond_resched(); 1110 1111 if (ordernum == lastnum) { 1112 /* Give up if all names seems to be in use. */ 1113 err = -ENOSPC; 1114 unix_release_addr(addr); 1115 goto out; 1116 } 1117 1118 goto retry; 1119 } 1120 1121 __unix_set_addr_hash(net, sk, addr, new_hash); 1122 unix_table_double_unlock(net, old_hash, new_hash); 1123 err = 0; 1124 1125 out: mutex_unlock(&u->bindlock); 1126 return err; 1127 } 1128 1129 static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, 1130 int addr_len) 1131 { 1132 umode_t mode = S_IFSOCK | 1133 (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask()); 1134 unsigned int new_hash, old_hash = sk->sk_hash; 1135 struct unix_sock *u = unix_sk(sk); 1136 struct net *net = sock_net(sk); 1137 struct user_namespace *ns; // barf... 1138 struct unix_address *addr; 1139 struct dentry *dentry; 1140 struct path parent; 1141 int err; 1142 1143 unix_mkname_bsd(sunaddr, addr_len); 1144 addr_len = strlen(sunaddr->sun_path) + 1145 offsetof(struct sockaddr_un, sun_path) + 1; 1146 1147 addr = unix_create_addr(sunaddr, addr_len); 1148 if (!addr) 1149 return -ENOMEM; 1150 1151 /* 1152 * Get the parent directory, calculate the hash for last 1153 * component. 1154 */ 1155 dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0); 1156 if (IS_ERR(dentry)) { 1157 err = PTR_ERR(dentry); 1158 goto out; 1159 } 1160 1161 /* 1162 * All right, let's create it. 1163 */ 1164 ns = mnt_user_ns(parent.mnt); 1165 err = security_path_mknod(&parent, dentry, mode, 0); 1166 if (!err) 1167 err = vfs_mknod(ns, d_inode(parent.dentry), dentry, mode, 0); 1168 if (err) 1169 goto out_path; 1170 err = mutex_lock_interruptible(&u->bindlock); 1171 if (err) 1172 goto out_unlink; 1173 if (u->addr) 1174 goto out_unlock; 1175 1176 new_hash = unix_bsd_hash(d_backing_inode(dentry)); 1177 unix_table_double_lock(net, old_hash, new_hash); 1178 u->path.mnt = mntget(parent.mnt); 1179 u->path.dentry = dget(dentry); 1180 __unix_set_addr_hash(net, sk, addr, new_hash); 1181 unix_table_double_unlock(net, old_hash, new_hash); 1182 mutex_unlock(&u->bindlock); 1183 done_path_create(&parent, dentry); 1184 return 0; 1185 1186 out_unlock: 1187 mutex_unlock(&u->bindlock); 1188 err = -EINVAL; 1189 out_unlink: 1190 /* failed after successful mknod? unlink what we'd created... */ 1191 vfs_unlink(ns, d_inode(parent.dentry), dentry, NULL); 1192 out_path: 1193 done_path_create(&parent, dentry); 1194 out: 1195 unix_release_addr(addr); 1196 return err == -EEXIST ? -EADDRINUSE : err; 1197 } 1198 1199 static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr, 1200 int addr_len) 1201 { 1202 unsigned int new_hash, old_hash = sk->sk_hash; 1203 struct unix_sock *u = unix_sk(sk); 1204 struct net *net = sock_net(sk); 1205 struct unix_address *addr; 1206 int err; 1207 1208 addr = unix_create_addr(sunaddr, addr_len); 1209 if (!addr) 1210 return -ENOMEM; 1211 1212 err = mutex_lock_interruptible(&u->bindlock); 1213 if (err) 1214 goto out; 1215 1216 if (u->addr) { 1217 err = -EINVAL; 1218 goto out_mutex; 1219 } 1220 1221 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); 1222 unix_table_double_lock(net, old_hash, new_hash); 1223 1224 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) 1225 goto out_spin; 1226 1227 __unix_set_addr_hash(net, sk, addr, new_hash); 1228 unix_table_double_unlock(net, old_hash, new_hash); 1229 mutex_unlock(&u->bindlock); 1230 return 0; 1231 1232 out_spin: 1233 unix_table_double_unlock(net, old_hash, new_hash); 1234 err = -EADDRINUSE; 1235 out_mutex: 1236 mutex_unlock(&u->bindlock); 1237 out: 1238 unix_release_addr(addr); 1239 return err; 1240 } 1241 1242 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 1243 { 1244 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 1245 struct sock *sk = sock->sk; 1246 int err; 1247 1248 if (addr_len == offsetof(struct sockaddr_un, sun_path) && 1249 sunaddr->sun_family == AF_UNIX) 1250 return unix_autobind(sk); 1251 1252 err = unix_validate_addr(sunaddr, addr_len); 1253 if (err) 1254 return err; 1255 1256 if (sunaddr->sun_path[0]) 1257 err = unix_bind_bsd(sk, sunaddr, addr_len); 1258 else 1259 err = unix_bind_abstract(sk, sunaddr, addr_len); 1260 1261 return err; 1262 } 1263 1264 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) 1265 { 1266 if (unlikely(sk1 == sk2) || !sk2) { 1267 unix_state_lock(sk1); 1268 return; 1269 } 1270 if (sk1 < sk2) { 1271 unix_state_lock(sk1); 1272 unix_state_lock_nested(sk2); 1273 } else { 1274 unix_state_lock(sk2); 1275 unix_state_lock_nested(sk1); 1276 } 1277 } 1278 1279 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) 1280 { 1281 if (unlikely(sk1 == sk2) || !sk2) { 1282 unix_state_unlock(sk1); 1283 return; 1284 } 1285 unix_state_unlock(sk1); 1286 unix_state_unlock(sk2); 1287 } 1288 1289 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, 1290 int alen, int flags) 1291 { 1292 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; 1293 struct sock *sk = sock->sk; 1294 struct sock *other; 1295 int err; 1296 1297 err = -EINVAL; 1298 if (alen < offsetofend(struct sockaddr, sa_family)) 1299 goto out; 1300 1301 if (addr->sa_family != AF_UNSPEC) { 1302 err = unix_validate_addr(sunaddr, alen); 1303 if (err) 1304 goto out; 1305 1306 if (test_bit(SOCK_PASSCRED, &sock->flags) && 1307 !unix_sk(sk)->addr) { 1308 err = unix_autobind(sk); 1309 if (err) 1310 goto out; 1311 } 1312 1313 restart: 1314 other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type); 1315 if (IS_ERR(other)) { 1316 err = PTR_ERR(other); 1317 goto out; 1318 } 1319 1320 unix_state_double_lock(sk, other); 1321 1322 /* Apparently VFS overslept socket death. Retry. */ 1323 if (sock_flag(other, SOCK_DEAD)) { 1324 unix_state_double_unlock(sk, other); 1325 sock_put(other); 1326 goto restart; 1327 } 1328 1329 err = -EPERM; 1330 if (!unix_may_send(sk, other)) 1331 goto out_unlock; 1332 1333 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 1334 if (err) 1335 goto out_unlock; 1336 1337 sk->sk_state = other->sk_state = TCP_ESTABLISHED; 1338 } else { 1339 /* 1340 * 1003.1g breaking connected state with AF_UNSPEC 1341 */ 1342 other = NULL; 1343 unix_state_double_lock(sk, other); 1344 } 1345 1346 /* 1347 * If it was connected, reconnect. 1348 */ 1349 if (unix_peer(sk)) { 1350 struct sock *old_peer = unix_peer(sk); 1351 1352 unix_peer(sk) = other; 1353 if (!other) 1354 sk->sk_state = TCP_CLOSE; 1355 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer); 1356 1357 unix_state_double_unlock(sk, other); 1358 1359 if (other != old_peer) 1360 unix_dgram_disconnected(sk, old_peer); 1361 sock_put(old_peer); 1362 } else { 1363 unix_peer(sk) = other; 1364 unix_state_double_unlock(sk, other); 1365 } 1366 1367 return 0; 1368 1369 out_unlock: 1370 unix_state_double_unlock(sk, other); 1371 sock_put(other); 1372 out: 1373 return err; 1374 } 1375 1376 static long unix_wait_for_peer(struct sock *other, long timeo) 1377 __releases(&unix_sk(other)->lock) 1378 { 1379 struct unix_sock *u = unix_sk(other); 1380 int sched; 1381 DEFINE_WAIT(wait); 1382 1383 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE); 1384 1385 sched = !sock_flag(other, SOCK_DEAD) && 1386 !(other->sk_shutdown & RCV_SHUTDOWN) && 1387 unix_recvq_full(other); 1388 1389 unix_state_unlock(other); 1390 1391 if (sched) 1392 timeo = schedule_timeout(timeo); 1393 1394 finish_wait(&u->peer_wait, &wait); 1395 return timeo; 1396 } 1397 1398 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, 1399 int addr_len, int flags) 1400 { 1401 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 1402 struct sock *sk = sock->sk, *newsk = NULL, *other = NULL; 1403 struct unix_sock *u = unix_sk(sk), *newu, *otheru; 1404 struct net *net = sock_net(sk); 1405 struct sk_buff *skb = NULL; 1406 long timeo; 1407 int err; 1408 int st; 1409 1410 err = unix_validate_addr(sunaddr, addr_len); 1411 if (err) 1412 goto out; 1413 1414 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) { 1415 err = unix_autobind(sk); 1416 if (err) 1417 goto out; 1418 } 1419 1420 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); 1421 1422 /* First of all allocate resources. 1423 If we will make it after state is locked, 1424 we will have to recheck all again in any case. 1425 */ 1426 1427 /* create new sock for complete connection */ 1428 newsk = unix_create1(net, NULL, 0, sock->type); 1429 if (IS_ERR(newsk)) { 1430 err = PTR_ERR(newsk); 1431 newsk = NULL; 1432 goto out; 1433 } 1434 1435 err = -ENOMEM; 1436 1437 /* Allocate skb for sending to listening sock */ 1438 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL); 1439 if (skb == NULL) 1440 goto out; 1441 1442 restart: 1443 /* Find listening sock. */ 1444 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type); 1445 if (IS_ERR(other)) { 1446 err = PTR_ERR(other); 1447 other = NULL; 1448 goto out; 1449 } 1450 1451 /* Latch state of peer */ 1452 unix_state_lock(other); 1453 1454 /* Apparently VFS overslept socket death. Retry. */ 1455 if (sock_flag(other, SOCK_DEAD)) { 1456 unix_state_unlock(other); 1457 sock_put(other); 1458 goto restart; 1459 } 1460 1461 err = -ECONNREFUSED; 1462 if (other->sk_state != TCP_LISTEN) 1463 goto out_unlock; 1464 if (other->sk_shutdown & RCV_SHUTDOWN) 1465 goto out_unlock; 1466 1467 if (unix_recvq_full(other)) { 1468 err = -EAGAIN; 1469 if (!timeo) 1470 goto out_unlock; 1471 1472 timeo = unix_wait_for_peer(other, timeo); 1473 1474 err = sock_intr_errno(timeo); 1475 if (signal_pending(current)) 1476 goto out; 1477 sock_put(other); 1478 goto restart; 1479 } 1480 1481 /* Latch our state. 1482 1483 It is tricky place. We need to grab our state lock and cannot 1484 drop lock on peer. It is dangerous because deadlock is 1485 possible. Connect to self case and simultaneous 1486 attempt to connect are eliminated by checking socket 1487 state. other is TCP_LISTEN, if sk is TCP_LISTEN we 1488 check this before attempt to grab lock. 1489 1490 Well, and we have to recheck the state after socket locked. 1491 */ 1492 st = sk->sk_state; 1493 1494 switch (st) { 1495 case TCP_CLOSE: 1496 /* This is ok... continue with connect */ 1497 break; 1498 case TCP_ESTABLISHED: 1499 /* Socket is already connected */ 1500 err = -EISCONN; 1501 goto out_unlock; 1502 default: 1503 err = -EINVAL; 1504 goto out_unlock; 1505 } 1506 1507 unix_state_lock_nested(sk); 1508 1509 if (sk->sk_state != st) { 1510 unix_state_unlock(sk); 1511 unix_state_unlock(other); 1512 sock_put(other); 1513 goto restart; 1514 } 1515 1516 err = security_unix_stream_connect(sk, other, newsk); 1517 if (err) { 1518 unix_state_unlock(sk); 1519 goto out_unlock; 1520 } 1521 1522 /* The way is open! Fastly set all the necessary fields... */ 1523 1524 sock_hold(sk); 1525 unix_peer(newsk) = sk; 1526 newsk->sk_state = TCP_ESTABLISHED; 1527 newsk->sk_type = sk->sk_type; 1528 init_peercred(newsk); 1529 newu = unix_sk(newsk); 1530 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); 1531 otheru = unix_sk(other); 1532 1533 /* copy address information from listening to new sock 1534 * 1535 * The contents of *(otheru->addr) and otheru->path 1536 * are seen fully set up here, since we have found 1537 * otheru in hash under its lock. Insertion into the 1538 * hash chain we'd found it in had been done in an 1539 * earlier critical area protected by the chain's lock, 1540 * the same one where we'd set *(otheru->addr) contents, 1541 * as well as otheru->path and otheru->addr itself. 1542 * 1543 * Using smp_store_release() here to set newu->addr 1544 * is enough to make those stores, as well as stores 1545 * to newu->path visible to anyone who gets newu->addr 1546 * by smp_load_acquire(). IOW, the same warranties 1547 * as for unix_sock instances bound in unix_bind() or 1548 * in unix_autobind(). 1549 */ 1550 if (otheru->path.dentry) { 1551 path_get(&otheru->path); 1552 newu->path = otheru->path; 1553 } 1554 refcount_inc(&otheru->addr->refcnt); 1555 smp_store_release(&newu->addr, otheru->addr); 1556 1557 /* Set credentials */ 1558 copy_peercred(sk, other); 1559 1560 sock->state = SS_CONNECTED; 1561 sk->sk_state = TCP_ESTABLISHED; 1562 sock_hold(newsk); 1563 1564 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */ 1565 unix_peer(sk) = newsk; 1566 1567 unix_state_unlock(sk); 1568 1569 /* take ten and send info to listening sock */ 1570 spin_lock(&other->sk_receive_queue.lock); 1571 __skb_queue_tail(&other->sk_receive_queue, skb); 1572 spin_unlock(&other->sk_receive_queue.lock); 1573 unix_state_unlock(other); 1574 other->sk_data_ready(other); 1575 sock_put(other); 1576 return 0; 1577 1578 out_unlock: 1579 if (other) 1580 unix_state_unlock(other); 1581 1582 out: 1583 kfree_skb(skb); 1584 if (newsk) 1585 unix_release_sock(newsk, 0); 1586 if (other) 1587 sock_put(other); 1588 return err; 1589 } 1590 1591 static int unix_socketpair(struct socket *socka, struct socket *sockb) 1592 { 1593 struct sock *ska = socka->sk, *skb = sockb->sk; 1594 1595 /* Join our sockets back to back */ 1596 sock_hold(ska); 1597 sock_hold(skb); 1598 unix_peer(ska) = skb; 1599 unix_peer(skb) = ska; 1600 init_peercred(ska); 1601 init_peercred(skb); 1602 1603 ska->sk_state = TCP_ESTABLISHED; 1604 skb->sk_state = TCP_ESTABLISHED; 1605 socka->state = SS_CONNECTED; 1606 sockb->state = SS_CONNECTED; 1607 return 0; 1608 } 1609 1610 static void unix_sock_inherit_flags(const struct socket *old, 1611 struct socket *new) 1612 { 1613 if (test_bit(SOCK_PASSCRED, &old->flags)) 1614 set_bit(SOCK_PASSCRED, &new->flags); 1615 if (test_bit(SOCK_PASSSEC, &old->flags)) 1616 set_bit(SOCK_PASSSEC, &new->flags); 1617 } 1618 1619 static int unix_accept(struct socket *sock, struct socket *newsock, int flags, 1620 bool kern) 1621 { 1622 struct sock *sk = sock->sk; 1623 struct sock *tsk; 1624 struct sk_buff *skb; 1625 int err; 1626 1627 err = -EOPNOTSUPP; 1628 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 1629 goto out; 1630 1631 err = -EINVAL; 1632 if (sk->sk_state != TCP_LISTEN) 1633 goto out; 1634 1635 /* If socket state is TCP_LISTEN it cannot change (for now...), 1636 * so that no locks are necessary. 1637 */ 1638 1639 skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, 1640 &err); 1641 if (!skb) { 1642 /* This means receive shutdown. */ 1643 if (err == 0) 1644 err = -EINVAL; 1645 goto out; 1646 } 1647 1648 tsk = skb->sk; 1649 skb_free_datagram(sk, skb); 1650 wake_up_interruptible(&unix_sk(sk)->peer_wait); 1651 1652 /* attach accepted sock to socket */ 1653 unix_state_lock(tsk); 1654 newsock->state = SS_CONNECTED; 1655 unix_sock_inherit_flags(sock, newsock); 1656 sock_graft(tsk, newsock); 1657 unix_state_unlock(tsk); 1658 return 0; 1659 1660 out: 1661 return err; 1662 } 1663 1664 1665 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer) 1666 { 1667 struct sock *sk = sock->sk; 1668 struct unix_address *addr; 1669 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr); 1670 int err = 0; 1671 1672 if (peer) { 1673 sk = unix_peer_get(sk); 1674 1675 err = -ENOTCONN; 1676 if (!sk) 1677 goto out; 1678 err = 0; 1679 } else { 1680 sock_hold(sk); 1681 } 1682 1683 addr = smp_load_acquire(&unix_sk(sk)->addr); 1684 if (!addr) { 1685 sunaddr->sun_family = AF_UNIX; 1686 sunaddr->sun_path[0] = 0; 1687 err = offsetof(struct sockaddr_un, sun_path); 1688 } else { 1689 err = addr->len; 1690 memcpy(sunaddr, addr->name, addr->len); 1691 } 1692 sock_put(sk); 1693 out: 1694 return err; 1695 } 1696 1697 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb) 1698 { 1699 scm->fp = scm_fp_dup(UNIXCB(skb).fp); 1700 1701 /* 1702 * Garbage collection of unix sockets starts by selecting a set of 1703 * candidate sockets which have reference only from being in flight 1704 * (total_refs == inflight_refs). This condition is checked once during 1705 * the candidate collection phase, and candidates are marked as such, so 1706 * that non-candidates can later be ignored. While inflight_refs is 1707 * protected by unix_gc_lock, total_refs (file count) is not, hence this 1708 * is an instantaneous decision. 1709 * 1710 * Once a candidate, however, the socket must not be reinstalled into a 1711 * file descriptor while the garbage collection is in progress. 1712 * 1713 * If the above conditions are met, then the directed graph of 1714 * candidates (*) does not change while unix_gc_lock is held. 1715 * 1716 * Any operations that changes the file count through file descriptors 1717 * (dup, close, sendmsg) does not change the graph since candidates are 1718 * not installed in fds. 1719 * 1720 * Dequeing a candidate via recvmsg would install it into an fd, but 1721 * that takes unix_gc_lock to decrement the inflight count, so it's 1722 * serialized with garbage collection. 1723 * 1724 * MSG_PEEK is special in that it does not change the inflight count, 1725 * yet does install the socket into an fd. The following lock/unlock 1726 * pair is to ensure serialization with garbage collection. It must be 1727 * done between incrementing the file count and installing the file into 1728 * an fd. 1729 * 1730 * If garbage collection starts after the barrier provided by the 1731 * lock/unlock, then it will see the elevated refcount and not mark this 1732 * as a candidate. If a garbage collection is already in progress 1733 * before the file count was incremented, then the lock/unlock pair will 1734 * ensure that garbage collection is finished before progressing to 1735 * installing the fd. 1736 * 1737 * (*) A -> B where B is on the queue of A or B is on the queue of C 1738 * which is on the queue of listening socket A. 1739 */ 1740 spin_lock(&unix_gc_lock); 1741 spin_unlock(&unix_gc_lock); 1742 } 1743 1744 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) 1745 { 1746 int err = 0; 1747 1748 UNIXCB(skb).pid = get_pid(scm->pid); 1749 UNIXCB(skb).uid = scm->creds.uid; 1750 UNIXCB(skb).gid = scm->creds.gid; 1751 UNIXCB(skb).fp = NULL; 1752 unix_get_secdata(scm, skb); 1753 if (scm->fp && send_fds) 1754 err = unix_attach_fds(scm, skb); 1755 1756 skb->destructor = unix_destruct_scm; 1757 return err; 1758 } 1759 1760 static bool unix_passcred_enabled(const struct socket *sock, 1761 const struct sock *other) 1762 { 1763 return test_bit(SOCK_PASSCRED, &sock->flags) || 1764 !other->sk_socket || 1765 test_bit(SOCK_PASSCRED, &other->sk_socket->flags); 1766 } 1767 1768 /* 1769 * Some apps rely on write() giving SCM_CREDENTIALS 1770 * We include credentials if source or destination socket 1771 * asserted SOCK_PASSCRED. 1772 */ 1773 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, 1774 const struct sock *other) 1775 { 1776 if (UNIXCB(skb).pid) 1777 return; 1778 if (unix_passcred_enabled(sock, other)) { 1779 UNIXCB(skb).pid = get_pid(task_tgid(current)); 1780 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); 1781 } 1782 } 1783 1784 static int maybe_init_creds(struct scm_cookie *scm, 1785 struct socket *socket, 1786 const struct sock *other) 1787 { 1788 int err; 1789 struct msghdr msg = { .msg_controllen = 0 }; 1790 1791 err = scm_send(socket, &msg, scm, false); 1792 if (err) 1793 return err; 1794 1795 if (unix_passcred_enabled(socket, other)) { 1796 scm->pid = get_pid(task_tgid(current)); 1797 current_uid_gid(&scm->creds.uid, &scm->creds.gid); 1798 } 1799 return err; 1800 } 1801 1802 static bool unix_skb_scm_eq(struct sk_buff *skb, 1803 struct scm_cookie *scm) 1804 { 1805 return UNIXCB(skb).pid == scm->pid && 1806 uid_eq(UNIXCB(skb).uid, scm->creds.uid) && 1807 gid_eq(UNIXCB(skb).gid, scm->creds.gid) && 1808 unix_secdata_eq(scm, skb); 1809 } 1810 1811 static void scm_stat_add(struct sock *sk, struct sk_buff *skb) 1812 { 1813 struct scm_fp_list *fp = UNIXCB(skb).fp; 1814 struct unix_sock *u = unix_sk(sk); 1815 1816 if (unlikely(fp && fp->count)) 1817 atomic_add(fp->count, &u->scm_stat.nr_fds); 1818 } 1819 1820 static void scm_stat_del(struct sock *sk, struct sk_buff *skb) 1821 { 1822 struct scm_fp_list *fp = UNIXCB(skb).fp; 1823 struct unix_sock *u = unix_sk(sk); 1824 1825 if (unlikely(fp && fp->count)) 1826 atomic_sub(fp->count, &u->scm_stat.nr_fds); 1827 } 1828 1829 /* 1830 * Send AF_UNIX data. 1831 */ 1832 1833 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, 1834 size_t len) 1835 { 1836 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); 1837 struct sock *sk = sock->sk, *other = NULL; 1838 struct unix_sock *u = unix_sk(sk); 1839 struct scm_cookie scm; 1840 struct sk_buff *skb; 1841 int data_len = 0; 1842 int sk_locked; 1843 long timeo; 1844 int err; 1845 1846 wait_for_unix_gc(); 1847 err = scm_send(sock, msg, &scm, false); 1848 if (err < 0) 1849 return err; 1850 1851 err = -EOPNOTSUPP; 1852 if (msg->msg_flags&MSG_OOB) 1853 goto out; 1854 1855 if (msg->msg_namelen) { 1856 err = unix_validate_addr(sunaddr, msg->msg_namelen); 1857 if (err) 1858 goto out; 1859 } else { 1860 sunaddr = NULL; 1861 err = -ENOTCONN; 1862 other = unix_peer_get(sk); 1863 if (!other) 1864 goto out; 1865 } 1866 1867 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) { 1868 err = unix_autobind(sk); 1869 if (err) 1870 goto out; 1871 } 1872 1873 err = -EMSGSIZE; 1874 if (len > sk->sk_sndbuf - 32) 1875 goto out; 1876 1877 if (len > SKB_MAX_ALLOC) { 1878 data_len = min_t(size_t, 1879 len - SKB_MAX_ALLOC, 1880 MAX_SKB_FRAGS * PAGE_SIZE); 1881 data_len = PAGE_ALIGN(data_len); 1882 1883 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE); 1884 } 1885 1886 skb = sock_alloc_send_pskb(sk, len - data_len, data_len, 1887 msg->msg_flags & MSG_DONTWAIT, &err, 1888 PAGE_ALLOC_COSTLY_ORDER); 1889 if (skb == NULL) 1890 goto out; 1891 1892 err = unix_scm_to_skb(&scm, skb, true); 1893 if (err < 0) 1894 goto out_free; 1895 1896 skb_put(skb, len - data_len); 1897 skb->data_len = data_len; 1898 skb->len = len; 1899 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len); 1900 if (err) 1901 goto out_free; 1902 1903 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); 1904 1905 restart: 1906 if (!other) { 1907 err = -ECONNRESET; 1908 if (sunaddr == NULL) 1909 goto out_free; 1910 1911 other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen, 1912 sk->sk_type); 1913 if (IS_ERR(other)) { 1914 err = PTR_ERR(other); 1915 other = NULL; 1916 goto out_free; 1917 } 1918 } 1919 1920 if (sk_filter(other, skb) < 0) { 1921 /* Toss the packet but do not return any error to the sender */ 1922 err = len; 1923 goto out_free; 1924 } 1925 1926 sk_locked = 0; 1927 unix_state_lock(other); 1928 restart_locked: 1929 err = -EPERM; 1930 if (!unix_may_send(sk, other)) 1931 goto out_unlock; 1932 1933 if (unlikely(sock_flag(other, SOCK_DEAD))) { 1934 /* 1935 * Check with 1003.1g - what should 1936 * datagram error 1937 */ 1938 unix_state_unlock(other); 1939 sock_put(other); 1940 1941 if (!sk_locked) 1942 unix_state_lock(sk); 1943 1944 err = 0; 1945 if (unix_peer(sk) == other) { 1946 unix_peer(sk) = NULL; 1947 unix_dgram_peer_wake_disconnect_wakeup(sk, other); 1948 1949 unix_state_unlock(sk); 1950 1951 sk->sk_state = TCP_CLOSE; 1952 unix_dgram_disconnected(sk, other); 1953 sock_put(other); 1954 err = -ECONNREFUSED; 1955 } else { 1956 unix_state_unlock(sk); 1957 } 1958 1959 other = NULL; 1960 if (err) 1961 goto out_free; 1962 goto restart; 1963 } 1964 1965 err = -EPIPE; 1966 if (other->sk_shutdown & RCV_SHUTDOWN) 1967 goto out_unlock; 1968 1969 if (sk->sk_type != SOCK_SEQPACKET) { 1970 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 1971 if (err) 1972 goto out_unlock; 1973 } 1974 1975 /* other == sk && unix_peer(other) != sk if 1976 * - unix_peer(sk) == NULL, destination address bound to sk 1977 * - unix_peer(sk) == sk by time of get but disconnected before lock 1978 */ 1979 if (other != sk && 1980 unlikely(unix_peer(other) != sk && 1981 unix_recvq_full_lockless(other))) { 1982 if (timeo) { 1983 timeo = unix_wait_for_peer(other, timeo); 1984 1985 err = sock_intr_errno(timeo); 1986 if (signal_pending(current)) 1987 goto out_free; 1988 1989 goto restart; 1990 } 1991 1992 if (!sk_locked) { 1993 unix_state_unlock(other); 1994 unix_state_double_lock(sk, other); 1995 } 1996 1997 if (unix_peer(sk) != other || 1998 unix_dgram_peer_wake_me(sk, other)) { 1999 err = -EAGAIN; 2000 sk_locked = 1; 2001 goto out_unlock; 2002 } 2003 2004 if (!sk_locked) { 2005 sk_locked = 1; 2006 goto restart_locked; 2007 } 2008 } 2009 2010 if (unlikely(sk_locked)) 2011 unix_state_unlock(sk); 2012 2013 if (sock_flag(other, SOCK_RCVTSTAMP)) 2014 __net_timestamp(skb); 2015 maybe_add_creds(skb, sock, other); 2016 scm_stat_add(other, skb); 2017 skb_queue_tail(&other->sk_receive_queue, skb); 2018 unix_state_unlock(other); 2019 other->sk_data_ready(other); 2020 sock_put(other); 2021 scm_destroy(&scm); 2022 return len; 2023 2024 out_unlock: 2025 if (sk_locked) 2026 unix_state_unlock(sk); 2027 unix_state_unlock(other); 2028 out_free: 2029 kfree_skb(skb); 2030 out: 2031 if (other) 2032 sock_put(other); 2033 scm_destroy(&scm); 2034 return err; 2035 } 2036 2037 /* We use paged skbs for stream sockets, and limit occupancy to 32768 2038 * bytes, and a minimum of a full page. 2039 */ 2040 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) 2041 2042 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2043 static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other) 2044 { 2045 struct unix_sock *ousk = unix_sk(other); 2046 struct sk_buff *skb; 2047 int err = 0; 2048 2049 skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err); 2050 2051 if (!skb) 2052 return err; 2053 2054 skb_put(skb, 1); 2055 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1); 2056 2057 if (err) { 2058 kfree_skb(skb); 2059 return err; 2060 } 2061 2062 unix_state_lock(other); 2063 2064 if (sock_flag(other, SOCK_DEAD) || 2065 (other->sk_shutdown & RCV_SHUTDOWN)) { 2066 unix_state_unlock(other); 2067 kfree_skb(skb); 2068 return -EPIPE; 2069 } 2070 2071 maybe_add_creds(skb, sock, other); 2072 skb_get(skb); 2073 2074 if (ousk->oob_skb) 2075 consume_skb(ousk->oob_skb); 2076 2077 WRITE_ONCE(ousk->oob_skb, skb); 2078 2079 scm_stat_add(other, skb); 2080 skb_queue_tail(&other->sk_receive_queue, skb); 2081 sk_send_sigurg(other); 2082 unix_state_unlock(other); 2083 other->sk_data_ready(other); 2084 2085 return err; 2086 } 2087 #endif 2088 2089 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, 2090 size_t len) 2091 { 2092 struct sock *sk = sock->sk; 2093 struct sock *other = NULL; 2094 int err, size; 2095 struct sk_buff *skb; 2096 int sent = 0; 2097 struct scm_cookie scm; 2098 bool fds_sent = false; 2099 int data_len; 2100 2101 wait_for_unix_gc(); 2102 err = scm_send(sock, msg, &scm, false); 2103 if (err < 0) 2104 return err; 2105 2106 err = -EOPNOTSUPP; 2107 if (msg->msg_flags & MSG_OOB) { 2108 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2109 if (len) 2110 len--; 2111 else 2112 #endif 2113 goto out_err; 2114 } 2115 2116 if (msg->msg_namelen) { 2117 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; 2118 goto out_err; 2119 } else { 2120 err = -ENOTCONN; 2121 other = unix_peer(sk); 2122 if (!other) 2123 goto out_err; 2124 } 2125 2126 if (sk->sk_shutdown & SEND_SHUTDOWN) 2127 goto pipe_err; 2128 2129 while (sent < len) { 2130 size = len - sent; 2131 2132 /* Keep two messages in the pipe so it schedules better */ 2133 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64); 2134 2135 /* allow fallback to order-0 allocations */ 2136 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ); 2137 2138 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0)); 2139 2140 data_len = min_t(size_t, size, PAGE_ALIGN(data_len)); 2141 2142 skb = sock_alloc_send_pskb(sk, size - data_len, data_len, 2143 msg->msg_flags & MSG_DONTWAIT, &err, 2144 get_order(UNIX_SKB_FRAGS_SZ)); 2145 if (!skb) 2146 goto out_err; 2147 2148 /* Only send the fds in the first buffer */ 2149 err = unix_scm_to_skb(&scm, skb, !fds_sent); 2150 if (err < 0) { 2151 kfree_skb(skb); 2152 goto out_err; 2153 } 2154 fds_sent = true; 2155 2156 skb_put(skb, size - data_len); 2157 skb->data_len = data_len; 2158 skb->len = size; 2159 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); 2160 if (err) { 2161 kfree_skb(skb); 2162 goto out_err; 2163 } 2164 2165 unix_state_lock(other); 2166 2167 if (sock_flag(other, SOCK_DEAD) || 2168 (other->sk_shutdown & RCV_SHUTDOWN)) 2169 goto pipe_err_free; 2170 2171 maybe_add_creds(skb, sock, other); 2172 scm_stat_add(other, skb); 2173 skb_queue_tail(&other->sk_receive_queue, skb); 2174 unix_state_unlock(other); 2175 other->sk_data_ready(other); 2176 sent += size; 2177 } 2178 2179 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2180 if (msg->msg_flags & MSG_OOB) { 2181 err = queue_oob(sock, msg, other); 2182 if (err) 2183 goto out_err; 2184 sent++; 2185 } 2186 #endif 2187 2188 scm_destroy(&scm); 2189 2190 return sent; 2191 2192 pipe_err_free: 2193 unix_state_unlock(other); 2194 kfree_skb(skb); 2195 pipe_err: 2196 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL)) 2197 send_sig(SIGPIPE, current, 0); 2198 err = -EPIPE; 2199 out_err: 2200 scm_destroy(&scm); 2201 return sent ? : err; 2202 } 2203 2204 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, 2205 int offset, size_t size, int flags) 2206 { 2207 int err; 2208 bool send_sigpipe = false; 2209 bool init_scm = true; 2210 struct scm_cookie scm; 2211 struct sock *other, *sk = socket->sk; 2212 struct sk_buff *skb, *newskb = NULL, *tail = NULL; 2213 2214 if (flags & MSG_OOB) 2215 return -EOPNOTSUPP; 2216 2217 other = unix_peer(sk); 2218 if (!other || sk->sk_state != TCP_ESTABLISHED) 2219 return -ENOTCONN; 2220 2221 if (false) { 2222 alloc_skb: 2223 unix_state_unlock(other); 2224 mutex_unlock(&unix_sk(other)->iolock); 2225 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT, 2226 &err, 0); 2227 if (!newskb) 2228 goto err; 2229 } 2230 2231 /* we must acquire iolock as we modify already present 2232 * skbs in the sk_receive_queue and mess with skb->len 2233 */ 2234 err = mutex_lock_interruptible(&unix_sk(other)->iolock); 2235 if (err) { 2236 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS; 2237 goto err; 2238 } 2239 2240 if (sk->sk_shutdown & SEND_SHUTDOWN) { 2241 err = -EPIPE; 2242 send_sigpipe = true; 2243 goto err_unlock; 2244 } 2245 2246 unix_state_lock(other); 2247 2248 if (sock_flag(other, SOCK_DEAD) || 2249 other->sk_shutdown & RCV_SHUTDOWN) { 2250 err = -EPIPE; 2251 send_sigpipe = true; 2252 goto err_state_unlock; 2253 } 2254 2255 if (init_scm) { 2256 err = maybe_init_creds(&scm, socket, other); 2257 if (err) 2258 goto err_state_unlock; 2259 init_scm = false; 2260 } 2261 2262 skb = skb_peek_tail(&other->sk_receive_queue); 2263 if (tail && tail == skb) { 2264 skb = newskb; 2265 } else if (!skb || !unix_skb_scm_eq(skb, &scm)) { 2266 if (newskb) { 2267 skb = newskb; 2268 } else { 2269 tail = skb; 2270 goto alloc_skb; 2271 } 2272 } else if (newskb) { 2273 /* this is fast path, we don't necessarily need to 2274 * call to kfree_skb even though with newskb == NULL 2275 * this - does no harm 2276 */ 2277 consume_skb(newskb); 2278 newskb = NULL; 2279 } 2280 2281 if (skb_append_pagefrags(skb, page, offset, size)) { 2282 tail = skb; 2283 goto alloc_skb; 2284 } 2285 2286 skb->len += size; 2287 skb->data_len += size; 2288 skb->truesize += size; 2289 refcount_add(size, &sk->sk_wmem_alloc); 2290 2291 if (newskb) { 2292 err = unix_scm_to_skb(&scm, skb, false); 2293 if (err) 2294 goto err_state_unlock; 2295 spin_lock(&other->sk_receive_queue.lock); 2296 __skb_queue_tail(&other->sk_receive_queue, newskb); 2297 spin_unlock(&other->sk_receive_queue.lock); 2298 } 2299 2300 unix_state_unlock(other); 2301 mutex_unlock(&unix_sk(other)->iolock); 2302 2303 other->sk_data_ready(other); 2304 scm_destroy(&scm); 2305 return size; 2306 2307 err_state_unlock: 2308 unix_state_unlock(other); 2309 err_unlock: 2310 mutex_unlock(&unix_sk(other)->iolock); 2311 err: 2312 kfree_skb(newskb); 2313 if (send_sigpipe && !(flags & MSG_NOSIGNAL)) 2314 send_sig(SIGPIPE, current, 0); 2315 if (!init_scm) 2316 scm_destroy(&scm); 2317 return err; 2318 } 2319 2320 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg, 2321 size_t len) 2322 { 2323 int err; 2324 struct sock *sk = sock->sk; 2325 2326 err = sock_error(sk); 2327 if (err) 2328 return err; 2329 2330 if (sk->sk_state != TCP_ESTABLISHED) 2331 return -ENOTCONN; 2332 2333 if (msg->msg_namelen) 2334 msg->msg_namelen = 0; 2335 2336 return unix_dgram_sendmsg(sock, msg, len); 2337 } 2338 2339 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg, 2340 size_t size, int flags) 2341 { 2342 struct sock *sk = sock->sk; 2343 2344 if (sk->sk_state != TCP_ESTABLISHED) 2345 return -ENOTCONN; 2346 2347 return unix_dgram_recvmsg(sock, msg, size, flags); 2348 } 2349 2350 static void unix_copy_addr(struct msghdr *msg, struct sock *sk) 2351 { 2352 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr); 2353 2354 if (addr) { 2355 msg->msg_namelen = addr->len; 2356 memcpy(msg->msg_name, addr->name, addr->len); 2357 } 2358 } 2359 2360 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, 2361 int flags) 2362 { 2363 struct scm_cookie scm; 2364 struct socket *sock = sk->sk_socket; 2365 struct unix_sock *u = unix_sk(sk); 2366 struct sk_buff *skb, *last; 2367 long timeo; 2368 int skip; 2369 int err; 2370 2371 err = -EOPNOTSUPP; 2372 if (flags&MSG_OOB) 2373 goto out; 2374 2375 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 2376 2377 do { 2378 mutex_lock(&u->iolock); 2379 2380 skip = sk_peek_offset(sk, flags); 2381 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags, 2382 &skip, &err, &last); 2383 if (skb) { 2384 if (!(flags & MSG_PEEK)) 2385 scm_stat_del(sk, skb); 2386 break; 2387 } 2388 2389 mutex_unlock(&u->iolock); 2390 2391 if (err != -EAGAIN) 2392 break; 2393 } while (timeo && 2394 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue, 2395 &err, &timeo, last)); 2396 2397 if (!skb) { /* implies iolock unlocked */ 2398 unix_state_lock(sk); 2399 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */ 2400 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN && 2401 (sk->sk_shutdown & RCV_SHUTDOWN)) 2402 err = 0; 2403 unix_state_unlock(sk); 2404 goto out; 2405 } 2406 2407 if (wq_has_sleeper(&u->peer_wait)) 2408 wake_up_interruptible_sync_poll(&u->peer_wait, 2409 EPOLLOUT | EPOLLWRNORM | 2410 EPOLLWRBAND); 2411 2412 if (msg->msg_name) 2413 unix_copy_addr(msg, skb->sk); 2414 2415 if (size > skb->len - skip) 2416 size = skb->len - skip; 2417 else if (size < skb->len - skip) 2418 msg->msg_flags |= MSG_TRUNC; 2419 2420 err = skb_copy_datagram_msg(skb, skip, msg, size); 2421 if (err) 2422 goto out_free; 2423 2424 if (sock_flag(sk, SOCK_RCVTSTAMP)) 2425 __sock_recv_timestamp(msg, sk, skb); 2426 2427 memset(&scm, 0, sizeof(scm)); 2428 2429 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2430 unix_set_secdata(&scm, skb); 2431 2432 if (!(flags & MSG_PEEK)) { 2433 if (UNIXCB(skb).fp) 2434 unix_detach_fds(&scm, skb); 2435 2436 sk_peek_offset_bwd(sk, skb->len); 2437 } else { 2438 /* It is questionable: on PEEK we could: 2439 - do not return fds - good, but too simple 8) 2440 - return fds, and do not return them on read (old strategy, 2441 apparently wrong) 2442 - clone fds (I chose it for now, it is the most universal 2443 solution) 2444 2445 POSIX 1003.1g does not actually define this clearly 2446 at all. POSIX 1003.1g doesn't define a lot of things 2447 clearly however! 2448 2449 */ 2450 2451 sk_peek_offset_fwd(sk, size); 2452 2453 if (UNIXCB(skb).fp) 2454 unix_peek_fds(&scm, skb); 2455 } 2456 err = (flags & MSG_TRUNC) ? skb->len - skip : size; 2457 2458 scm_recv(sock, msg, &scm, flags); 2459 2460 out_free: 2461 skb_free_datagram(sk, skb); 2462 mutex_unlock(&u->iolock); 2463 out: 2464 return err; 2465 } 2466 2467 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, 2468 int flags) 2469 { 2470 struct sock *sk = sock->sk; 2471 2472 #ifdef CONFIG_BPF_SYSCALL 2473 const struct proto *prot = READ_ONCE(sk->sk_prot); 2474 2475 if (prot != &unix_dgram_proto) 2476 return prot->recvmsg(sk, msg, size, flags, NULL); 2477 #endif 2478 return __unix_dgram_recvmsg(sk, msg, size, flags); 2479 } 2480 2481 static int unix_read_sock(struct sock *sk, read_descriptor_t *desc, 2482 sk_read_actor_t recv_actor) 2483 { 2484 int copied = 0; 2485 2486 while (1) { 2487 struct unix_sock *u = unix_sk(sk); 2488 struct sk_buff *skb; 2489 int used, err; 2490 2491 mutex_lock(&u->iolock); 2492 skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err); 2493 mutex_unlock(&u->iolock); 2494 if (!skb) 2495 return err; 2496 2497 used = recv_actor(desc, skb, 0, skb->len); 2498 if (used <= 0) { 2499 if (!copied) 2500 copied = used; 2501 kfree_skb(skb); 2502 break; 2503 } else if (used <= skb->len) { 2504 copied += used; 2505 } 2506 2507 kfree_skb(skb); 2508 if (!desc->count) 2509 break; 2510 } 2511 2512 return copied; 2513 } 2514 2515 /* 2516 * Sleep until more data has arrived. But check for races.. 2517 */ 2518 static long unix_stream_data_wait(struct sock *sk, long timeo, 2519 struct sk_buff *last, unsigned int last_len, 2520 bool freezable) 2521 { 2522 struct sk_buff *tail; 2523 DEFINE_WAIT(wait); 2524 2525 unix_state_lock(sk); 2526 2527 for (;;) { 2528 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 2529 2530 tail = skb_peek_tail(&sk->sk_receive_queue); 2531 if (tail != last || 2532 (tail && tail->len != last_len) || 2533 sk->sk_err || 2534 (sk->sk_shutdown & RCV_SHUTDOWN) || 2535 signal_pending(current) || 2536 !timeo) 2537 break; 2538 2539 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2540 unix_state_unlock(sk); 2541 if (freezable) 2542 timeo = freezable_schedule_timeout(timeo); 2543 else 2544 timeo = schedule_timeout(timeo); 2545 unix_state_lock(sk); 2546 2547 if (sock_flag(sk, SOCK_DEAD)) 2548 break; 2549 2550 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2551 } 2552 2553 finish_wait(sk_sleep(sk), &wait); 2554 unix_state_unlock(sk); 2555 return timeo; 2556 } 2557 2558 static unsigned int unix_skb_len(const struct sk_buff *skb) 2559 { 2560 return skb->len - UNIXCB(skb).consumed; 2561 } 2562 2563 struct unix_stream_read_state { 2564 int (*recv_actor)(struct sk_buff *, int, int, 2565 struct unix_stream_read_state *); 2566 struct socket *socket; 2567 struct msghdr *msg; 2568 struct pipe_inode_info *pipe; 2569 size_t size; 2570 int flags; 2571 unsigned int splice_flags; 2572 }; 2573 2574 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2575 static int unix_stream_recv_urg(struct unix_stream_read_state *state) 2576 { 2577 struct socket *sock = state->socket; 2578 struct sock *sk = sock->sk; 2579 struct unix_sock *u = unix_sk(sk); 2580 int chunk = 1; 2581 struct sk_buff *oob_skb; 2582 2583 mutex_lock(&u->iolock); 2584 unix_state_lock(sk); 2585 2586 if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) { 2587 unix_state_unlock(sk); 2588 mutex_unlock(&u->iolock); 2589 return -EINVAL; 2590 } 2591 2592 oob_skb = u->oob_skb; 2593 2594 if (!(state->flags & MSG_PEEK)) 2595 WRITE_ONCE(u->oob_skb, NULL); 2596 2597 unix_state_unlock(sk); 2598 2599 chunk = state->recv_actor(oob_skb, 0, chunk, state); 2600 2601 if (!(state->flags & MSG_PEEK)) { 2602 UNIXCB(oob_skb).consumed += 1; 2603 kfree_skb(oob_skb); 2604 } 2605 2606 mutex_unlock(&u->iolock); 2607 2608 if (chunk < 0) 2609 return -EFAULT; 2610 2611 state->msg->msg_flags |= MSG_OOB; 2612 return 1; 2613 } 2614 2615 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, 2616 int flags, int copied) 2617 { 2618 struct unix_sock *u = unix_sk(sk); 2619 2620 if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) { 2621 skb_unlink(skb, &sk->sk_receive_queue); 2622 consume_skb(skb); 2623 skb = NULL; 2624 } else { 2625 if (skb == u->oob_skb) { 2626 if (copied) { 2627 skb = NULL; 2628 } else if (sock_flag(sk, SOCK_URGINLINE)) { 2629 if (!(flags & MSG_PEEK)) { 2630 WRITE_ONCE(u->oob_skb, NULL); 2631 consume_skb(skb); 2632 } 2633 } else if (!(flags & MSG_PEEK)) { 2634 skb_unlink(skb, &sk->sk_receive_queue); 2635 consume_skb(skb); 2636 skb = skb_peek(&sk->sk_receive_queue); 2637 } 2638 } 2639 } 2640 return skb; 2641 } 2642 #endif 2643 2644 static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc, 2645 sk_read_actor_t recv_actor) 2646 { 2647 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) 2648 return -ENOTCONN; 2649 2650 return unix_read_sock(sk, desc, recv_actor); 2651 } 2652 2653 static int unix_stream_read_generic(struct unix_stream_read_state *state, 2654 bool freezable) 2655 { 2656 struct scm_cookie scm; 2657 struct socket *sock = state->socket; 2658 struct sock *sk = sock->sk; 2659 struct unix_sock *u = unix_sk(sk); 2660 int copied = 0; 2661 int flags = state->flags; 2662 int noblock = flags & MSG_DONTWAIT; 2663 bool check_creds = false; 2664 int target; 2665 int err = 0; 2666 long timeo; 2667 int skip; 2668 size_t size = state->size; 2669 unsigned int last_len; 2670 2671 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) { 2672 err = -EINVAL; 2673 goto out; 2674 } 2675 2676 if (unlikely(flags & MSG_OOB)) { 2677 err = -EOPNOTSUPP; 2678 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2679 err = unix_stream_recv_urg(state); 2680 #endif 2681 goto out; 2682 } 2683 2684 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); 2685 timeo = sock_rcvtimeo(sk, noblock); 2686 2687 memset(&scm, 0, sizeof(scm)); 2688 2689 /* Lock the socket to prevent queue disordering 2690 * while sleeps in memcpy_tomsg 2691 */ 2692 mutex_lock(&u->iolock); 2693 2694 skip = max(sk_peek_offset(sk, flags), 0); 2695 2696 do { 2697 int chunk; 2698 bool drop_skb; 2699 struct sk_buff *skb, *last; 2700 2701 redo: 2702 unix_state_lock(sk); 2703 if (sock_flag(sk, SOCK_DEAD)) { 2704 err = -ECONNRESET; 2705 goto unlock; 2706 } 2707 last = skb = skb_peek(&sk->sk_receive_queue); 2708 last_len = last ? last->len : 0; 2709 2710 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2711 if (skb) { 2712 skb = manage_oob(skb, sk, flags, copied); 2713 if (!skb) { 2714 unix_state_unlock(sk); 2715 if (copied) 2716 break; 2717 goto redo; 2718 } 2719 } 2720 #endif 2721 again: 2722 if (skb == NULL) { 2723 if (copied >= target) 2724 goto unlock; 2725 2726 /* 2727 * POSIX 1003.1g mandates this order. 2728 */ 2729 2730 err = sock_error(sk); 2731 if (err) 2732 goto unlock; 2733 if (sk->sk_shutdown & RCV_SHUTDOWN) 2734 goto unlock; 2735 2736 unix_state_unlock(sk); 2737 if (!timeo) { 2738 err = -EAGAIN; 2739 break; 2740 } 2741 2742 mutex_unlock(&u->iolock); 2743 2744 timeo = unix_stream_data_wait(sk, timeo, last, 2745 last_len, freezable); 2746 2747 if (signal_pending(current)) { 2748 err = sock_intr_errno(timeo); 2749 scm_destroy(&scm); 2750 goto out; 2751 } 2752 2753 mutex_lock(&u->iolock); 2754 goto redo; 2755 unlock: 2756 unix_state_unlock(sk); 2757 break; 2758 } 2759 2760 while (skip >= unix_skb_len(skb)) { 2761 skip -= unix_skb_len(skb); 2762 last = skb; 2763 last_len = skb->len; 2764 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2765 if (!skb) 2766 goto again; 2767 } 2768 2769 unix_state_unlock(sk); 2770 2771 if (check_creds) { 2772 /* Never glue messages from different writers */ 2773 if (!unix_skb_scm_eq(skb, &scm)) 2774 break; 2775 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { 2776 /* Copy credentials */ 2777 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2778 unix_set_secdata(&scm, skb); 2779 check_creds = true; 2780 } 2781 2782 /* Copy address just once */ 2783 if (state->msg && state->msg->msg_name) { 2784 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, 2785 state->msg->msg_name); 2786 unix_copy_addr(state->msg, skb->sk); 2787 sunaddr = NULL; 2788 } 2789 2790 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size); 2791 skb_get(skb); 2792 chunk = state->recv_actor(skb, skip, chunk, state); 2793 drop_skb = !unix_skb_len(skb); 2794 /* skb is only safe to use if !drop_skb */ 2795 consume_skb(skb); 2796 if (chunk < 0) { 2797 if (copied == 0) 2798 copied = -EFAULT; 2799 break; 2800 } 2801 copied += chunk; 2802 size -= chunk; 2803 2804 if (drop_skb) { 2805 /* the skb was touched by a concurrent reader; 2806 * we should not expect anything from this skb 2807 * anymore and assume it invalid - we can be 2808 * sure it was dropped from the socket queue 2809 * 2810 * let's report a short read 2811 */ 2812 err = 0; 2813 break; 2814 } 2815 2816 /* Mark read part of skb as used */ 2817 if (!(flags & MSG_PEEK)) { 2818 UNIXCB(skb).consumed += chunk; 2819 2820 sk_peek_offset_bwd(sk, chunk); 2821 2822 if (UNIXCB(skb).fp) { 2823 scm_stat_del(sk, skb); 2824 unix_detach_fds(&scm, skb); 2825 } 2826 2827 if (unix_skb_len(skb)) 2828 break; 2829 2830 skb_unlink(skb, &sk->sk_receive_queue); 2831 consume_skb(skb); 2832 2833 if (scm.fp) 2834 break; 2835 } else { 2836 /* It is questionable, see note in unix_dgram_recvmsg. 2837 */ 2838 if (UNIXCB(skb).fp) 2839 unix_peek_fds(&scm, skb); 2840 2841 sk_peek_offset_fwd(sk, chunk); 2842 2843 if (UNIXCB(skb).fp) 2844 break; 2845 2846 skip = 0; 2847 last = skb; 2848 last_len = skb->len; 2849 unix_state_lock(sk); 2850 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2851 if (skb) 2852 goto again; 2853 unix_state_unlock(sk); 2854 break; 2855 } 2856 } while (size); 2857 2858 mutex_unlock(&u->iolock); 2859 if (state->msg) 2860 scm_recv(sock, state->msg, &scm, flags); 2861 else 2862 scm_destroy(&scm); 2863 out: 2864 return copied ? : err; 2865 } 2866 2867 static int unix_stream_read_actor(struct sk_buff *skb, 2868 int skip, int chunk, 2869 struct unix_stream_read_state *state) 2870 { 2871 int ret; 2872 2873 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip, 2874 state->msg, chunk); 2875 return ret ?: chunk; 2876 } 2877 2878 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg, 2879 size_t size, int flags) 2880 { 2881 struct unix_stream_read_state state = { 2882 .recv_actor = unix_stream_read_actor, 2883 .socket = sk->sk_socket, 2884 .msg = msg, 2885 .size = size, 2886 .flags = flags 2887 }; 2888 2889 return unix_stream_read_generic(&state, true); 2890 } 2891 2892 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, 2893 size_t size, int flags) 2894 { 2895 struct unix_stream_read_state state = { 2896 .recv_actor = unix_stream_read_actor, 2897 .socket = sock, 2898 .msg = msg, 2899 .size = size, 2900 .flags = flags 2901 }; 2902 2903 #ifdef CONFIG_BPF_SYSCALL 2904 struct sock *sk = sock->sk; 2905 const struct proto *prot = READ_ONCE(sk->sk_prot); 2906 2907 if (prot != &unix_stream_proto) 2908 return prot->recvmsg(sk, msg, size, flags, NULL); 2909 #endif 2910 return unix_stream_read_generic(&state, true); 2911 } 2912 2913 static int unix_stream_splice_actor(struct sk_buff *skb, 2914 int skip, int chunk, 2915 struct unix_stream_read_state *state) 2916 { 2917 return skb_splice_bits(skb, state->socket->sk, 2918 UNIXCB(skb).consumed + skip, 2919 state->pipe, chunk, state->splice_flags); 2920 } 2921 2922 static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos, 2923 struct pipe_inode_info *pipe, 2924 size_t size, unsigned int flags) 2925 { 2926 struct unix_stream_read_state state = { 2927 .recv_actor = unix_stream_splice_actor, 2928 .socket = sock, 2929 .pipe = pipe, 2930 .size = size, 2931 .splice_flags = flags, 2932 }; 2933 2934 if (unlikely(*ppos)) 2935 return -ESPIPE; 2936 2937 if (sock->file->f_flags & O_NONBLOCK || 2938 flags & SPLICE_F_NONBLOCK) 2939 state.flags = MSG_DONTWAIT; 2940 2941 return unix_stream_read_generic(&state, false); 2942 } 2943 2944 static int unix_shutdown(struct socket *sock, int mode) 2945 { 2946 struct sock *sk = sock->sk; 2947 struct sock *other; 2948 2949 if (mode < SHUT_RD || mode > SHUT_RDWR) 2950 return -EINVAL; 2951 /* This maps: 2952 * SHUT_RD (0) -> RCV_SHUTDOWN (1) 2953 * SHUT_WR (1) -> SEND_SHUTDOWN (2) 2954 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3) 2955 */ 2956 ++mode; 2957 2958 unix_state_lock(sk); 2959 sk->sk_shutdown |= mode; 2960 other = unix_peer(sk); 2961 if (other) 2962 sock_hold(other); 2963 unix_state_unlock(sk); 2964 sk->sk_state_change(sk); 2965 2966 if (other && 2967 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) { 2968 2969 int peer_mode = 0; 2970 const struct proto *prot = READ_ONCE(other->sk_prot); 2971 2972 if (prot->unhash) 2973 prot->unhash(other); 2974 if (mode&RCV_SHUTDOWN) 2975 peer_mode |= SEND_SHUTDOWN; 2976 if (mode&SEND_SHUTDOWN) 2977 peer_mode |= RCV_SHUTDOWN; 2978 unix_state_lock(other); 2979 other->sk_shutdown |= peer_mode; 2980 unix_state_unlock(other); 2981 other->sk_state_change(other); 2982 if (peer_mode == SHUTDOWN_MASK) 2983 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); 2984 else if (peer_mode & RCV_SHUTDOWN) 2985 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); 2986 } 2987 if (other) 2988 sock_put(other); 2989 2990 return 0; 2991 } 2992 2993 long unix_inq_len(struct sock *sk) 2994 { 2995 struct sk_buff *skb; 2996 long amount = 0; 2997 2998 if (sk->sk_state == TCP_LISTEN) 2999 return -EINVAL; 3000 3001 spin_lock(&sk->sk_receive_queue.lock); 3002 if (sk->sk_type == SOCK_STREAM || 3003 sk->sk_type == SOCK_SEQPACKET) { 3004 skb_queue_walk(&sk->sk_receive_queue, skb) 3005 amount += unix_skb_len(skb); 3006 } else { 3007 skb = skb_peek(&sk->sk_receive_queue); 3008 if (skb) 3009 amount = skb->len; 3010 } 3011 spin_unlock(&sk->sk_receive_queue.lock); 3012 3013 return amount; 3014 } 3015 EXPORT_SYMBOL_GPL(unix_inq_len); 3016 3017 long unix_outq_len(struct sock *sk) 3018 { 3019 return sk_wmem_alloc_get(sk); 3020 } 3021 EXPORT_SYMBOL_GPL(unix_outq_len); 3022 3023 static int unix_open_file(struct sock *sk) 3024 { 3025 struct path path; 3026 struct file *f; 3027 int fd; 3028 3029 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 3030 return -EPERM; 3031 3032 if (!smp_load_acquire(&unix_sk(sk)->addr)) 3033 return -ENOENT; 3034 3035 path = unix_sk(sk)->path; 3036 if (!path.dentry) 3037 return -ENOENT; 3038 3039 path_get(&path); 3040 3041 fd = get_unused_fd_flags(O_CLOEXEC); 3042 if (fd < 0) 3043 goto out; 3044 3045 f = dentry_open(&path, O_PATH, current_cred()); 3046 if (IS_ERR(f)) { 3047 put_unused_fd(fd); 3048 fd = PTR_ERR(f); 3049 goto out; 3050 } 3051 3052 fd_install(fd, f); 3053 out: 3054 path_put(&path); 3055 3056 return fd; 3057 } 3058 3059 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 3060 { 3061 struct sock *sk = sock->sk; 3062 long amount = 0; 3063 int err; 3064 3065 switch (cmd) { 3066 case SIOCOUTQ: 3067 amount = unix_outq_len(sk); 3068 err = put_user(amount, (int __user *)arg); 3069 break; 3070 case SIOCINQ: 3071 amount = unix_inq_len(sk); 3072 if (amount < 0) 3073 err = amount; 3074 else 3075 err = put_user(amount, (int __user *)arg); 3076 break; 3077 case SIOCUNIXFILE: 3078 err = unix_open_file(sk); 3079 break; 3080 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 3081 case SIOCATMARK: 3082 { 3083 struct sk_buff *skb; 3084 int answ = 0; 3085 3086 skb = skb_peek(&sk->sk_receive_queue); 3087 if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb)) 3088 answ = 1; 3089 err = put_user(answ, (int __user *)arg); 3090 } 3091 break; 3092 #endif 3093 default: 3094 err = -ENOIOCTLCMD; 3095 break; 3096 } 3097 return err; 3098 } 3099 3100 #ifdef CONFIG_COMPAT 3101 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 3102 { 3103 return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg)); 3104 } 3105 #endif 3106 3107 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait) 3108 { 3109 struct sock *sk = sock->sk; 3110 __poll_t mask; 3111 3112 sock_poll_wait(file, sock, wait); 3113 mask = 0; 3114 3115 /* exceptional events? */ 3116 if (sk->sk_err) 3117 mask |= EPOLLERR; 3118 if (sk->sk_shutdown == SHUTDOWN_MASK) 3119 mask |= EPOLLHUP; 3120 if (sk->sk_shutdown & RCV_SHUTDOWN) 3121 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 3122 3123 /* readable? */ 3124 if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) 3125 mask |= EPOLLIN | EPOLLRDNORM; 3126 if (sk_is_readable(sk)) 3127 mask |= EPOLLIN | EPOLLRDNORM; 3128 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 3129 if (READ_ONCE(unix_sk(sk)->oob_skb)) 3130 mask |= EPOLLPRI; 3131 #endif 3132 3133 /* Connection-based need to check for termination and startup */ 3134 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && 3135 sk->sk_state == TCP_CLOSE) 3136 mask |= EPOLLHUP; 3137 3138 /* 3139 * we set writable also when the other side has shut down the 3140 * connection. This prevents stuck sockets. 3141 */ 3142 if (unix_writable(sk)) 3143 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 3144 3145 return mask; 3146 } 3147 3148 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, 3149 poll_table *wait) 3150 { 3151 struct sock *sk = sock->sk, *other; 3152 unsigned int writable; 3153 __poll_t mask; 3154 3155 sock_poll_wait(file, sock, wait); 3156 mask = 0; 3157 3158 /* exceptional events? */ 3159 if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) 3160 mask |= EPOLLERR | 3161 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); 3162 3163 if (sk->sk_shutdown & RCV_SHUTDOWN) 3164 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 3165 if (sk->sk_shutdown == SHUTDOWN_MASK) 3166 mask |= EPOLLHUP; 3167 3168 /* readable? */ 3169 if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) 3170 mask |= EPOLLIN | EPOLLRDNORM; 3171 if (sk_is_readable(sk)) 3172 mask |= EPOLLIN | EPOLLRDNORM; 3173 3174 /* Connection-based need to check for termination and startup */ 3175 if (sk->sk_type == SOCK_SEQPACKET) { 3176 if (sk->sk_state == TCP_CLOSE) 3177 mask |= EPOLLHUP; 3178 /* connection hasn't started yet? */ 3179 if (sk->sk_state == TCP_SYN_SENT) 3180 return mask; 3181 } 3182 3183 /* No write status requested, avoid expensive OUT tests. */ 3184 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT))) 3185 return mask; 3186 3187 writable = unix_writable(sk); 3188 if (writable) { 3189 unix_state_lock(sk); 3190 3191 other = unix_peer(sk); 3192 if (other && unix_peer(other) != sk && 3193 unix_recvq_full_lockless(other) && 3194 unix_dgram_peer_wake_me(sk, other)) 3195 writable = 0; 3196 3197 unix_state_unlock(sk); 3198 } 3199 3200 if (writable) 3201 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 3202 else 3203 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 3204 3205 return mask; 3206 } 3207 3208 #ifdef CONFIG_PROC_FS 3209 3210 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1) 3211 3212 #define get_bucket(x) ((x) >> BUCKET_SPACE) 3213 #define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1)) 3214 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) 3215 3216 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) 3217 { 3218 unsigned long offset = get_offset(*pos); 3219 unsigned long bucket = get_bucket(*pos); 3220 unsigned long count = 0; 3221 struct sock *sk; 3222 3223 for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]); 3224 sk; sk = sk_next(sk)) { 3225 if (++count == offset) 3226 break; 3227 } 3228 3229 return sk; 3230 } 3231 3232 static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos) 3233 { 3234 unsigned long bucket = get_bucket(*pos); 3235 struct net *net = seq_file_net(seq); 3236 struct sock *sk; 3237 3238 while (bucket < UNIX_HASH_SIZE) { 3239 spin_lock(&net->unx.table.locks[bucket]); 3240 3241 sk = unix_from_bucket(seq, pos); 3242 if (sk) 3243 return sk; 3244 3245 spin_unlock(&net->unx.table.locks[bucket]); 3246 3247 *pos = set_bucket_offset(++bucket, 1); 3248 } 3249 3250 return NULL; 3251 } 3252 3253 static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk, 3254 loff_t *pos) 3255 { 3256 unsigned long bucket = get_bucket(*pos); 3257 3258 sk = sk_next(sk); 3259 if (sk) 3260 return sk; 3261 3262 3263 spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]); 3264 3265 *pos = set_bucket_offset(++bucket, 1); 3266 3267 return unix_get_first(seq, pos); 3268 } 3269 3270 static void *unix_seq_start(struct seq_file *seq, loff_t *pos) 3271 { 3272 if (!*pos) 3273 return SEQ_START_TOKEN; 3274 3275 return unix_get_first(seq, pos); 3276 } 3277 3278 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3279 { 3280 ++*pos; 3281 3282 if (v == SEQ_START_TOKEN) 3283 return unix_get_first(seq, pos); 3284 3285 return unix_get_next(seq, v, pos); 3286 } 3287 3288 static void unix_seq_stop(struct seq_file *seq, void *v) 3289 { 3290 struct sock *sk = v; 3291 3292 if (sk) 3293 spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]); 3294 } 3295 3296 static int unix_seq_show(struct seq_file *seq, void *v) 3297 { 3298 3299 if (v == SEQ_START_TOKEN) 3300 seq_puts(seq, "Num RefCount Protocol Flags Type St " 3301 "Inode Path\n"); 3302 else { 3303 struct sock *s = v; 3304 struct unix_sock *u = unix_sk(s); 3305 unix_state_lock(s); 3306 3307 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu", 3308 s, 3309 refcount_read(&s->sk_refcnt), 3310 0, 3311 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0, 3312 s->sk_type, 3313 s->sk_socket ? 3314 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) : 3315 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING), 3316 sock_i_ino(s)); 3317 3318 if (u->addr) { // under a hash table lock here 3319 int i, len; 3320 seq_putc(seq, ' '); 3321 3322 i = 0; 3323 len = u->addr->len - 3324 offsetof(struct sockaddr_un, sun_path); 3325 if (u->addr->name->sun_path[0]) { 3326 len--; 3327 } else { 3328 seq_putc(seq, '@'); 3329 i++; 3330 } 3331 for ( ; i < len; i++) 3332 seq_putc(seq, u->addr->name->sun_path[i] ?: 3333 '@'); 3334 } 3335 unix_state_unlock(s); 3336 seq_putc(seq, '\n'); 3337 } 3338 3339 return 0; 3340 } 3341 3342 static const struct seq_operations unix_seq_ops = { 3343 .start = unix_seq_start, 3344 .next = unix_seq_next, 3345 .stop = unix_seq_stop, 3346 .show = unix_seq_show, 3347 }; 3348 3349 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) 3350 struct bpf_unix_iter_state { 3351 struct seq_net_private p; 3352 unsigned int cur_sk; 3353 unsigned int end_sk; 3354 unsigned int max_sk; 3355 struct sock **batch; 3356 bool st_bucket_done; 3357 }; 3358 3359 struct bpf_iter__unix { 3360 __bpf_md_ptr(struct bpf_iter_meta *, meta); 3361 __bpf_md_ptr(struct unix_sock *, unix_sk); 3362 uid_t uid __aligned(8); 3363 }; 3364 3365 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, 3366 struct unix_sock *unix_sk, uid_t uid) 3367 { 3368 struct bpf_iter__unix ctx; 3369 3370 meta->seq_num--; /* skip SEQ_START_TOKEN */ 3371 ctx.meta = meta; 3372 ctx.unix_sk = unix_sk; 3373 ctx.uid = uid; 3374 return bpf_iter_run_prog(prog, &ctx); 3375 } 3376 3377 static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk) 3378 3379 { 3380 struct bpf_unix_iter_state *iter = seq->private; 3381 unsigned int expected = 1; 3382 struct sock *sk; 3383 3384 sock_hold(start_sk); 3385 iter->batch[iter->end_sk++] = start_sk; 3386 3387 for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) { 3388 if (iter->end_sk < iter->max_sk) { 3389 sock_hold(sk); 3390 iter->batch[iter->end_sk++] = sk; 3391 } 3392 3393 expected++; 3394 } 3395 3396 spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]); 3397 3398 return expected; 3399 } 3400 3401 static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter) 3402 { 3403 while (iter->cur_sk < iter->end_sk) 3404 sock_put(iter->batch[iter->cur_sk++]); 3405 } 3406 3407 static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter, 3408 unsigned int new_batch_sz) 3409 { 3410 struct sock **new_batch; 3411 3412 new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz, 3413 GFP_USER | __GFP_NOWARN); 3414 if (!new_batch) 3415 return -ENOMEM; 3416 3417 bpf_iter_unix_put_batch(iter); 3418 kvfree(iter->batch); 3419 iter->batch = new_batch; 3420 iter->max_sk = new_batch_sz; 3421 3422 return 0; 3423 } 3424 3425 static struct sock *bpf_iter_unix_batch(struct seq_file *seq, 3426 loff_t *pos) 3427 { 3428 struct bpf_unix_iter_state *iter = seq->private; 3429 unsigned int expected; 3430 bool resized = false; 3431 struct sock *sk; 3432 3433 if (iter->st_bucket_done) 3434 *pos = set_bucket_offset(get_bucket(*pos) + 1, 1); 3435 3436 again: 3437 /* Get a new batch */ 3438 iter->cur_sk = 0; 3439 iter->end_sk = 0; 3440 3441 sk = unix_get_first(seq, pos); 3442 if (!sk) 3443 return NULL; /* Done */ 3444 3445 expected = bpf_iter_unix_hold_batch(seq, sk); 3446 3447 if (iter->end_sk == expected) { 3448 iter->st_bucket_done = true; 3449 return sk; 3450 } 3451 3452 if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) { 3453 resized = true; 3454 goto again; 3455 } 3456 3457 return sk; 3458 } 3459 3460 static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos) 3461 { 3462 if (!*pos) 3463 return SEQ_START_TOKEN; 3464 3465 /* bpf iter does not support lseek, so it always 3466 * continue from where it was stop()-ped. 3467 */ 3468 return bpf_iter_unix_batch(seq, pos); 3469 } 3470 3471 static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3472 { 3473 struct bpf_unix_iter_state *iter = seq->private; 3474 struct sock *sk; 3475 3476 /* Whenever seq_next() is called, the iter->cur_sk is 3477 * done with seq_show(), so advance to the next sk in 3478 * the batch. 3479 */ 3480 if (iter->cur_sk < iter->end_sk) 3481 sock_put(iter->batch[iter->cur_sk++]); 3482 3483 ++*pos; 3484 3485 if (iter->cur_sk < iter->end_sk) 3486 sk = iter->batch[iter->cur_sk]; 3487 else 3488 sk = bpf_iter_unix_batch(seq, pos); 3489 3490 return sk; 3491 } 3492 3493 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v) 3494 { 3495 struct bpf_iter_meta meta; 3496 struct bpf_prog *prog; 3497 struct sock *sk = v; 3498 uid_t uid; 3499 bool slow; 3500 int ret; 3501 3502 if (v == SEQ_START_TOKEN) 3503 return 0; 3504 3505 slow = lock_sock_fast(sk); 3506 3507 if (unlikely(sk_unhashed(sk))) { 3508 ret = SEQ_SKIP; 3509 goto unlock; 3510 } 3511 3512 uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)); 3513 meta.seq = seq; 3514 prog = bpf_iter_get_info(&meta, false); 3515 ret = unix_prog_seq_show(prog, &meta, v, uid); 3516 unlock: 3517 unlock_sock_fast(sk, slow); 3518 return ret; 3519 } 3520 3521 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v) 3522 { 3523 struct bpf_unix_iter_state *iter = seq->private; 3524 struct bpf_iter_meta meta; 3525 struct bpf_prog *prog; 3526 3527 if (!v) { 3528 meta.seq = seq; 3529 prog = bpf_iter_get_info(&meta, true); 3530 if (prog) 3531 (void)unix_prog_seq_show(prog, &meta, v, 0); 3532 } 3533 3534 if (iter->cur_sk < iter->end_sk) 3535 bpf_iter_unix_put_batch(iter); 3536 } 3537 3538 static const struct seq_operations bpf_iter_unix_seq_ops = { 3539 .start = bpf_iter_unix_seq_start, 3540 .next = bpf_iter_unix_seq_next, 3541 .stop = bpf_iter_unix_seq_stop, 3542 .show = bpf_iter_unix_seq_show, 3543 }; 3544 #endif 3545 #endif 3546 3547 static const struct net_proto_family unix_family_ops = { 3548 .family = PF_UNIX, 3549 .create = unix_create, 3550 .owner = THIS_MODULE, 3551 }; 3552 3553 3554 static int __net_init unix_net_init(struct net *net) 3555 { 3556 int i; 3557 3558 net->unx.sysctl_max_dgram_qlen = 10; 3559 if (unix_sysctl_register(net)) 3560 goto out; 3561 3562 #ifdef CONFIG_PROC_FS 3563 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops, 3564 sizeof(struct seq_net_private))) 3565 goto err_sysctl; 3566 #endif 3567 3568 net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE, 3569 sizeof(spinlock_t), GFP_KERNEL); 3570 if (!net->unx.table.locks) 3571 goto err_proc; 3572 3573 net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE, 3574 sizeof(struct hlist_head), 3575 GFP_KERNEL); 3576 if (!net->unx.table.buckets) 3577 goto free_locks; 3578 3579 for (i = 0; i < UNIX_HASH_SIZE; i++) { 3580 spin_lock_init(&net->unx.table.locks[i]); 3581 INIT_HLIST_HEAD(&net->unx.table.buckets[i]); 3582 } 3583 3584 return 0; 3585 3586 free_locks: 3587 kvfree(net->unx.table.locks); 3588 err_proc: 3589 #ifdef CONFIG_PROC_FS 3590 remove_proc_entry("unix", net->proc_net); 3591 err_sysctl: 3592 #endif 3593 unix_sysctl_unregister(net); 3594 out: 3595 return -ENOMEM; 3596 } 3597 3598 static void __net_exit unix_net_exit(struct net *net) 3599 { 3600 kvfree(net->unx.table.buckets); 3601 kvfree(net->unx.table.locks); 3602 unix_sysctl_unregister(net); 3603 remove_proc_entry("unix", net->proc_net); 3604 } 3605 3606 static struct pernet_operations unix_net_ops = { 3607 .init = unix_net_init, 3608 .exit = unix_net_exit, 3609 }; 3610 3611 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 3612 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta, 3613 struct unix_sock *unix_sk, uid_t uid) 3614 3615 #define INIT_BATCH_SZ 16 3616 3617 static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux) 3618 { 3619 struct bpf_unix_iter_state *iter = priv_data; 3620 int err; 3621 3622 err = bpf_iter_init_seq_net(priv_data, aux); 3623 if (err) 3624 return err; 3625 3626 err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ); 3627 if (err) { 3628 bpf_iter_fini_seq_net(priv_data); 3629 return err; 3630 } 3631 3632 return 0; 3633 } 3634 3635 static void bpf_iter_fini_unix(void *priv_data) 3636 { 3637 struct bpf_unix_iter_state *iter = priv_data; 3638 3639 bpf_iter_fini_seq_net(priv_data); 3640 kvfree(iter->batch); 3641 } 3642 3643 static const struct bpf_iter_seq_info unix_seq_info = { 3644 .seq_ops = &bpf_iter_unix_seq_ops, 3645 .init_seq_private = bpf_iter_init_unix, 3646 .fini_seq_private = bpf_iter_fini_unix, 3647 .seq_priv_size = sizeof(struct bpf_unix_iter_state), 3648 }; 3649 3650 static const struct bpf_func_proto * 3651 bpf_iter_unix_get_func_proto(enum bpf_func_id func_id, 3652 const struct bpf_prog *prog) 3653 { 3654 switch (func_id) { 3655 case BPF_FUNC_setsockopt: 3656 return &bpf_sk_setsockopt_proto; 3657 case BPF_FUNC_getsockopt: 3658 return &bpf_sk_getsockopt_proto; 3659 default: 3660 return NULL; 3661 } 3662 } 3663 3664 static struct bpf_iter_reg unix_reg_info = { 3665 .target = "unix", 3666 .ctx_arg_info_size = 1, 3667 .ctx_arg_info = { 3668 { offsetof(struct bpf_iter__unix, unix_sk), 3669 PTR_TO_BTF_ID_OR_NULL }, 3670 }, 3671 .get_func_proto = bpf_iter_unix_get_func_proto, 3672 .seq_info = &unix_seq_info, 3673 }; 3674 3675 static void __init bpf_iter_register(void) 3676 { 3677 unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX]; 3678 if (bpf_iter_reg_target(&unix_reg_info)) 3679 pr_warn("Warning: could not register bpf iterator unix\n"); 3680 } 3681 #endif 3682 3683 static int __init af_unix_init(void) 3684 { 3685 int rc = -1; 3686 3687 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb)); 3688 3689 rc = proto_register(&unix_dgram_proto, 1); 3690 if (rc != 0) { 3691 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); 3692 goto out; 3693 } 3694 3695 rc = proto_register(&unix_stream_proto, 1); 3696 if (rc != 0) { 3697 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); 3698 goto out; 3699 } 3700 3701 sock_register(&unix_family_ops); 3702 register_pernet_subsys(&unix_net_ops); 3703 unix_bpf_build_proto(); 3704 3705 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 3706 bpf_iter_register(); 3707 #endif 3708 3709 out: 3710 return rc; 3711 } 3712 3713 static void __exit af_unix_exit(void) 3714 { 3715 sock_unregister(PF_UNIX); 3716 proto_unregister(&unix_dgram_proto); 3717 proto_unregister(&unix_stream_proto); 3718 unregister_pernet_subsys(&unix_net_ops); 3719 } 3720 3721 /* Earlier than device_initcall() so that other drivers invoking 3722 request_module() don't end up in a loop when modprobe tries 3723 to use a UNIX socket. But later than subsys_initcall() because 3724 we depend on stuff initialised there */ 3725 fs_initcall(af_unix_init); 3726 module_exit(af_unix_exit); 3727 3728 MODULE_LICENSE("GPL"); 3729 MODULE_ALIAS_NETPROTO(PF_UNIX); 3730