1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * NET4: Implementation of BSD Unix domain sockets. 4 * 5 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk> 6 * 7 * Fixes: 8 * Linus Torvalds : Assorted bug cures. 9 * Niibe Yutaka : async I/O support. 10 * Carsten Paeth : PF_UNIX check, address fixes. 11 * Alan Cox : Limit size of allocated blocks. 12 * Alan Cox : Fixed the stupid socketpair bug. 13 * Alan Cox : BSD compatibility fine tuning. 14 * Alan Cox : Fixed a bug in connect when interrupted. 15 * Alan Cox : Sorted out a proper draft version of 16 * file descriptor passing hacked up from 17 * Mike Shaver's work. 18 * Marty Leisner : Fixes to fd passing 19 * Nick Nevin : recvmsg bugfix. 20 * Alan Cox : Started proper garbage collector 21 * Heiko EiBfeldt : Missing verify_area check 22 * Alan Cox : Started POSIXisms 23 * Andreas Schwab : Replace inode by dentry for proper 24 * reference counting 25 * Kirk Petersen : Made this a module 26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm. 27 * Lots of bug fixes. 28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces 29 * by above two patches. 30 * Andrea Arcangeli : If possible we block in connect(2) 31 * if the max backlog of the listen socket 32 * is been reached. This won't break 33 * old apps and it will avoid huge amount 34 * of socks hashed (this for unix_gc() 35 * performances reasons). 36 * Security fix that limits the max 37 * number of socks to 2*max_files and 38 * the number of skb queueable in the 39 * dgram receiver. 40 * Artur Skawina : Hash function optimizations 41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8) 42 * Malcolm Beattie : Set peercred for socketpair 43 * Michal Ostrowski : Module initialization cleanup. 44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT, 45 * the core infrastructure is doing that 46 * for all net proto families now (2.5.69+) 47 * 48 * Known differences from reference BSD that was tested: 49 * 50 * [TO FIX] 51 * ECONNREFUSED is not returned from one end of a connected() socket to the 52 * other the moment one end closes. 53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark 54 * and a fake inode identifier (nor the BSD first socket fstat twice bug). 55 * [NOT TO FIX] 56 * accept() returns a path name even if the connecting socket has closed 57 * in the meantime (BSD loses the path and gives up). 58 * accept() returns 0 length path for an unbound connector. BSD returns 16 59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??) 60 * socketpair(...SOCK_RAW..) doesn't panic the kernel. 61 * BSD af_unix apparently has connect forgetting to block properly. 62 * (need to check this with the POSIX spec in detail) 63 * 64 * Differences from 2.0.0-11-... (ANK) 65 * Bug fixes and improvements. 66 * - client shutdown killed server socket. 67 * - removed all useless cli/sti pairs. 68 * 69 * Semantic changes/extensions. 70 * - generic control message passing. 71 * - SCM_CREDENTIALS control message. 72 * - "Abstract" (not FS based) socket bindings. 73 * Abstract names are sequences of bytes (not zero terminated) 74 * started by 0, so that this name space does not intersect 75 * with BSD names. 76 */ 77 78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 79 80 #include <linux/module.h> 81 #include <linux/kernel.h> 82 #include <linux/signal.h> 83 #include <linux/sched/signal.h> 84 #include <linux/errno.h> 85 #include <linux/string.h> 86 #include <linux/stat.h> 87 #include <linux/dcache.h> 88 #include <linux/namei.h> 89 #include <linux/socket.h> 90 #include <linux/un.h> 91 #include <linux/fcntl.h> 92 #include <linux/filter.h> 93 #include <linux/termios.h> 94 #include <linux/sockios.h> 95 #include <linux/net.h> 96 #include <linux/in.h> 97 #include <linux/fs.h> 98 #include <linux/slab.h> 99 #include <linux/uaccess.h> 100 #include <linux/skbuff.h> 101 #include <linux/netdevice.h> 102 #include <net/net_namespace.h> 103 #include <net/sock.h> 104 #include <net/tcp_states.h> 105 #include <net/af_unix.h> 106 #include <linux/proc_fs.h> 107 #include <linux/seq_file.h> 108 #include <net/scm.h> 109 #include <linux/init.h> 110 #include <linux/poll.h> 111 #include <linux/rtnetlink.h> 112 #include <linux/mount.h> 113 #include <net/checksum.h> 114 #include <linux/security.h> 115 #include <linux/freezer.h> 116 #include <linux/file.h> 117 #include <linux/btf_ids.h> 118 119 #include "scm.h" 120 121 static atomic_long_t unix_nr_socks; 122 static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2]; 123 static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2]; 124 125 /* SMP locking strategy: 126 * hash table is protected with spinlock. 127 * each socket state is protected by separate spinlock. 128 */ 129 130 static unsigned int unix_unbound_hash(struct sock *sk) 131 { 132 unsigned long hash = (unsigned long)sk; 133 134 hash ^= hash >> 16; 135 hash ^= hash >> 8; 136 hash ^= sk->sk_type; 137 138 return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD); 139 } 140 141 static unsigned int unix_bsd_hash(struct inode *i) 142 { 143 return i->i_ino & UNIX_HASH_MOD; 144 } 145 146 static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr, 147 int addr_len, int type) 148 { 149 __wsum csum = csum_partial(sunaddr, addr_len, 0); 150 unsigned int hash; 151 152 hash = (__force unsigned int)csum_fold(csum); 153 hash ^= hash >> 8; 154 hash ^= type; 155 156 return hash & UNIX_HASH_MOD; 157 } 158 159 static void unix_table_double_lock(struct net *net, 160 unsigned int hash1, unsigned int hash2) 161 { 162 /* hash1 and hash2 is never the same because 163 * one is between 0 and UNIX_HASH_MOD, and 164 * another is between UNIX_HASH_MOD + 1 and UNIX_HASH_SIZE - 1. 165 */ 166 if (hash1 > hash2) 167 swap(hash1, hash2); 168 169 spin_lock(&net->unx.table.locks[hash1]); 170 spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING); 171 } 172 173 static void unix_table_double_unlock(struct net *net, 174 unsigned int hash1, unsigned int hash2) 175 { 176 spin_unlock(&net->unx.table.locks[hash1]); 177 spin_unlock(&net->unx.table.locks[hash2]); 178 } 179 180 #ifdef CONFIG_SECURITY_NETWORK 181 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 182 { 183 UNIXCB(skb).secid = scm->secid; 184 } 185 186 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 187 { 188 scm->secid = UNIXCB(skb).secid; 189 } 190 191 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) 192 { 193 return (scm->secid == UNIXCB(skb).secid); 194 } 195 #else 196 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 197 { } 198 199 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 200 { } 201 202 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) 203 { 204 return true; 205 } 206 #endif /* CONFIG_SECURITY_NETWORK */ 207 208 #define unix_peer(sk) (unix_sk(sk)->peer) 209 210 static inline int unix_our_peer(struct sock *sk, struct sock *osk) 211 { 212 return unix_peer(osk) == sk; 213 } 214 215 static inline int unix_may_send(struct sock *sk, struct sock *osk) 216 { 217 return unix_peer(osk) == NULL || unix_our_peer(sk, osk); 218 } 219 220 static inline int unix_recvq_full(const struct sock *sk) 221 { 222 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; 223 } 224 225 static inline int unix_recvq_full_lockless(const struct sock *sk) 226 { 227 return skb_queue_len_lockless(&sk->sk_receive_queue) > 228 READ_ONCE(sk->sk_max_ack_backlog); 229 } 230 231 struct sock *unix_peer_get(struct sock *s) 232 { 233 struct sock *peer; 234 235 unix_state_lock(s); 236 peer = unix_peer(s); 237 if (peer) 238 sock_hold(peer); 239 unix_state_unlock(s); 240 return peer; 241 } 242 EXPORT_SYMBOL_GPL(unix_peer_get); 243 244 static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr, 245 int addr_len) 246 { 247 struct unix_address *addr; 248 249 addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL); 250 if (!addr) 251 return NULL; 252 253 refcount_set(&addr->refcnt, 1); 254 addr->len = addr_len; 255 memcpy(addr->name, sunaddr, addr_len); 256 257 return addr; 258 } 259 260 static inline void unix_release_addr(struct unix_address *addr) 261 { 262 if (refcount_dec_and_test(&addr->refcnt)) 263 kfree(addr); 264 } 265 266 /* 267 * Check unix socket name: 268 * - should be not zero length. 269 * - if started by not zero, should be NULL terminated (FS object) 270 * - if started by zero, it is abstract name. 271 */ 272 273 static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len) 274 { 275 if (addr_len <= offsetof(struct sockaddr_un, sun_path) || 276 addr_len > sizeof(*sunaddr)) 277 return -EINVAL; 278 279 if (sunaddr->sun_family != AF_UNIX) 280 return -EINVAL; 281 282 return 0; 283 } 284 285 static void unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len) 286 { 287 /* This may look like an off by one error but it is a bit more 288 * subtle. 108 is the longest valid AF_UNIX path for a binding. 289 * sun_path[108] doesn't as such exist. However in kernel space 290 * we are guaranteed that it is a valid memory location in our 291 * kernel address buffer because syscall functions always pass 292 * a pointer of struct sockaddr_storage which has a bigger buffer 293 * than 108. 294 */ 295 ((char *)sunaddr)[addr_len] = 0; 296 } 297 298 static void __unix_remove_socket(struct sock *sk) 299 { 300 sk_del_node_init(sk); 301 } 302 303 static void __unix_insert_socket(struct net *net, struct sock *sk) 304 { 305 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk)); 306 sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]); 307 } 308 309 static void __unix_set_addr_hash(struct net *net, struct sock *sk, 310 struct unix_address *addr, unsigned int hash) 311 { 312 __unix_remove_socket(sk); 313 smp_store_release(&unix_sk(sk)->addr, addr); 314 315 sk->sk_hash = hash; 316 __unix_insert_socket(net, sk); 317 } 318 319 static void unix_remove_socket(struct net *net, struct sock *sk) 320 { 321 spin_lock(&net->unx.table.locks[sk->sk_hash]); 322 __unix_remove_socket(sk); 323 spin_unlock(&net->unx.table.locks[sk->sk_hash]); 324 } 325 326 static void unix_insert_unbound_socket(struct net *net, struct sock *sk) 327 { 328 spin_lock(&net->unx.table.locks[sk->sk_hash]); 329 __unix_insert_socket(net, sk); 330 spin_unlock(&net->unx.table.locks[sk->sk_hash]); 331 } 332 333 static void unix_insert_bsd_socket(struct sock *sk) 334 { 335 spin_lock(&bsd_socket_locks[sk->sk_hash]); 336 sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]); 337 spin_unlock(&bsd_socket_locks[sk->sk_hash]); 338 } 339 340 static void unix_remove_bsd_socket(struct sock *sk) 341 { 342 if (!hlist_unhashed(&sk->sk_bind_node)) { 343 spin_lock(&bsd_socket_locks[sk->sk_hash]); 344 __sk_del_bind_node(sk); 345 spin_unlock(&bsd_socket_locks[sk->sk_hash]); 346 347 sk_node_init(&sk->sk_bind_node); 348 } 349 } 350 351 static struct sock *__unix_find_socket_byname(struct net *net, 352 struct sockaddr_un *sunname, 353 int len, unsigned int hash) 354 { 355 struct sock *s; 356 357 sk_for_each(s, &net->unx.table.buckets[hash]) { 358 struct unix_sock *u = unix_sk(s); 359 360 if (u->addr->len == len && 361 !memcmp(u->addr->name, sunname, len)) 362 return s; 363 } 364 return NULL; 365 } 366 367 static inline struct sock *unix_find_socket_byname(struct net *net, 368 struct sockaddr_un *sunname, 369 int len, unsigned int hash) 370 { 371 struct sock *s; 372 373 spin_lock(&net->unx.table.locks[hash]); 374 s = __unix_find_socket_byname(net, sunname, len, hash); 375 if (s) 376 sock_hold(s); 377 spin_unlock(&net->unx.table.locks[hash]); 378 return s; 379 } 380 381 static struct sock *unix_find_socket_byinode(struct inode *i) 382 { 383 unsigned int hash = unix_bsd_hash(i); 384 struct sock *s; 385 386 spin_lock(&bsd_socket_locks[hash]); 387 sk_for_each_bound(s, &bsd_socket_buckets[hash]) { 388 struct dentry *dentry = unix_sk(s)->path.dentry; 389 390 if (dentry && d_backing_inode(dentry) == i) { 391 sock_hold(s); 392 spin_unlock(&bsd_socket_locks[hash]); 393 return s; 394 } 395 } 396 spin_unlock(&bsd_socket_locks[hash]); 397 return NULL; 398 } 399 400 /* Support code for asymmetrically connected dgram sockets 401 * 402 * If a datagram socket is connected to a socket not itself connected 403 * to the first socket (eg, /dev/log), clients may only enqueue more 404 * messages if the present receive queue of the server socket is not 405 * "too large". This means there's a second writeability condition 406 * poll and sendmsg need to test. The dgram recv code will do a wake 407 * up on the peer_wait wait queue of a socket upon reception of a 408 * datagram which needs to be propagated to sleeping would-be writers 409 * since these might not have sent anything so far. This can't be 410 * accomplished via poll_wait because the lifetime of the server 411 * socket might be less than that of its clients if these break their 412 * association with it or if the server socket is closed while clients 413 * are still connected to it and there's no way to inform "a polling 414 * implementation" that it should let go of a certain wait queue 415 * 416 * In order to propagate a wake up, a wait_queue_entry_t of the client 417 * socket is enqueued on the peer_wait queue of the server socket 418 * whose wake function does a wake_up on the ordinary client socket 419 * wait queue. This connection is established whenever a write (or 420 * poll for write) hit the flow control condition and broken when the 421 * association to the server socket is dissolved or after a wake up 422 * was relayed. 423 */ 424 425 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags, 426 void *key) 427 { 428 struct unix_sock *u; 429 wait_queue_head_t *u_sleep; 430 431 u = container_of(q, struct unix_sock, peer_wake); 432 433 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait, 434 q); 435 u->peer_wake.private = NULL; 436 437 /* relaying can only happen while the wq still exists */ 438 u_sleep = sk_sleep(&u->sk); 439 if (u_sleep) 440 wake_up_interruptible_poll(u_sleep, key_to_poll(key)); 441 442 return 0; 443 } 444 445 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other) 446 { 447 struct unix_sock *u, *u_other; 448 int rc; 449 450 u = unix_sk(sk); 451 u_other = unix_sk(other); 452 rc = 0; 453 spin_lock(&u_other->peer_wait.lock); 454 455 if (!u->peer_wake.private) { 456 u->peer_wake.private = other; 457 __add_wait_queue(&u_other->peer_wait, &u->peer_wake); 458 459 rc = 1; 460 } 461 462 spin_unlock(&u_other->peer_wait.lock); 463 return rc; 464 } 465 466 static void unix_dgram_peer_wake_disconnect(struct sock *sk, 467 struct sock *other) 468 { 469 struct unix_sock *u, *u_other; 470 471 u = unix_sk(sk); 472 u_other = unix_sk(other); 473 spin_lock(&u_other->peer_wait.lock); 474 475 if (u->peer_wake.private == other) { 476 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake); 477 u->peer_wake.private = NULL; 478 } 479 480 spin_unlock(&u_other->peer_wait.lock); 481 } 482 483 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk, 484 struct sock *other) 485 { 486 unix_dgram_peer_wake_disconnect(sk, other); 487 wake_up_interruptible_poll(sk_sleep(sk), 488 EPOLLOUT | 489 EPOLLWRNORM | 490 EPOLLWRBAND); 491 } 492 493 /* preconditions: 494 * - unix_peer(sk) == other 495 * - association is stable 496 */ 497 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) 498 { 499 int connected; 500 501 connected = unix_dgram_peer_wake_connect(sk, other); 502 503 /* If other is SOCK_DEAD, we want to make sure we signal 504 * POLLOUT, such that a subsequent write() can get a 505 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs 506 * to other and its full, we will hang waiting for POLLOUT. 507 */ 508 if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD)) 509 return 1; 510 511 if (connected) 512 unix_dgram_peer_wake_disconnect(sk, other); 513 514 return 0; 515 } 516 517 static int unix_writable(const struct sock *sk) 518 { 519 return sk->sk_state != TCP_LISTEN && 520 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; 521 } 522 523 static void unix_write_space(struct sock *sk) 524 { 525 struct socket_wq *wq; 526 527 rcu_read_lock(); 528 if (unix_writable(sk)) { 529 wq = rcu_dereference(sk->sk_wq); 530 if (skwq_has_sleeper(wq)) 531 wake_up_interruptible_sync_poll(&wq->wait, 532 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); 533 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 534 } 535 rcu_read_unlock(); 536 } 537 538 /* When dgram socket disconnects (or changes its peer), we clear its receive 539 * queue of packets arrived from previous peer. First, it allows to do 540 * flow control based only on wmem_alloc; second, sk connected to peer 541 * may receive messages only from that peer. */ 542 static void unix_dgram_disconnected(struct sock *sk, struct sock *other) 543 { 544 if (!skb_queue_empty(&sk->sk_receive_queue)) { 545 skb_queue_purge(&sk->sk_receive_queue); 546 wake_up_interruptible_all(&unix_sk(sk)->peer_wait); 547 548 /* If one link of bidirectional dgram pipe is disconnected, 549 * we signal error. Messages are lost. Do not make this, 550 * when peer was not connected to us. 551 */ 552 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) { 553 other->sk_err = ECONNRESET; 554 sk_error_report(other); 555 } 556 } 557 other->sk_state = TCP_CLOSE; 558 } 559 560 static void unix_sock_destructor(struct sock *sk) 561 { 562 struct unix_sock *u = unix_sk(sk); 563 564 skb_queue_purge(&sk->sk_receive_queue); 565 566 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 567 if (u->oob_skb) { 568 kfree_skb(u->oob_skb); 569 u->oob_skb = NULL; 570 } 571 #endif 572 DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc)); 573 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk)); 574 DEBUG_NET_WARN_ON_ONCE(sk->sk_socket); 575 if (!sock_flag(sk, SOCK_DEAD)) { 576 pr_info("Attempt to release alive unix socket: %p\n", sk); 577 return; 578 } 579 580 if (u->addr) 581 unix_release_addr(u->addr); 582 583 atomic_long_dec(&unix_nr_socks); 584 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 585 #ifdef UNIX_REFCNT_DEBUG 586 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk, 587 atomic_long_read(&unix_nr_socks)); 588 #endif 589 } 590 591 static void unix_release_sock(struct sock *sk, int embrion) 592 { 593 struct unix_sock *u = unix_sk(sk); 594 struct sock *skpair; 595 struct sk_buff *skb; 596 struct path path; 597 int state; 598 599 unix_remove_socket(sock_net(sk), sk); 600 unix_remove_bsd_socket(sk); 601 602 /* Clear state */ 603 unix_state_lock(sk); 604 sock_orphan(sk); 605 sk->sk_shutdown = SHUTDOWN_MASK; 606 path = u->path; 607 u->path.dentry = NULL; 608 u->path.mnt = NULL; 609 state = sk->sk_state; 610 sk->sk_state = TCP_CLOSE; 611 612 skpair = unix_peer(sk); 613 unix_peer(sk) = NULL; 614 615 unix_state_unlock(sk); 616 617 wake_up_interruptible_all(&u->peer_wait); 618 619 if (skpair != NULL) { 620 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { 621 unix_state_lock(skpair); 622 /* No more writes */ 623 skpair->sk_shutdown = SHUTDOWN_MASK; 624 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) 625 skpair->sk_err = ECONNRESET; 626 unix_state_unlock(skpair); 627 skpair->sk_state_change(skpair); 628 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); 629 } 630 631 unix_dgram_peer_wake_disconnect(sk, skpair); 632 sock_put(skpair); /* It may now die */ 633 } 634 635 /* Try to flush out this socket. Throw out buffers at least */ 636 637 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { 638 if (state == TCP_LISTEN) 639 unix_release_sock(skb->sk, 1); 640 /* passed fds are erased in the kfree_skb hook */ 641 UNIXCB(skb).consumed = skb->len; 642 kfree_skb(skb); 643 } 644 645 if (path.dentry) 646 path_put(&path); 647 648 sock_put(sk); 649 650 /* ---- Socket is dead now and most probably destroyed ---- */ 651 652 /* 653 * Fixme: BSD difference: In BSD all sockets connected to us get 654 * ECONNRESET and we die on the spot. In Linux we behave 655 * like files and pipes do and wait for the last 656 * dereference. 657 * 658 * Can't we simply set sock->err? 659 * 660 * What the above comment does talk about? --ANK(980817) 661 */ 662 663 if (unix_tot_inflight) 664 unix_gc(); /* Garbage collect fds */ 665 } 666 667 static void init_peercred(struct sock *sk) 668 { 669 const struct cred *old_cred; 670 struct pid *old_pid; 671 672 spin_lock(&sk->sk_peer_lock); 673 old_pid = sk->sk_peer_pid; 674 old_cred = sk->sk_peer_cred; 675 sk->sk_peer_pid = get_pid(task_tgid(current)); 676 sk->sk_peer_cred = get_current_cred(); 677 spin_unlock(&sk->sk_peer_lock); 678 679 put_pid(old_pid); 680 put_cred(old_cred); 681 } 682 683 static void copy_peercred(struct sock *sk, struct sock *peersk) 684 { 685 const struct cred *old_cred; 686 struct pid *old_pid; 687 688 if (sk < peersk) { 689 spin_lock(&sk->sk_peer_lock); 690 spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING); 691 } else { 692 spin_lock(&peersk->sk_peer_lock); 693 spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING); 694 } 695 old_pid = sk->sk_peer_pid; 696 old_cred = sk->sk_peer_cred; 697 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); 698 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); 699 700 spin_unlock(&sk->sk_peer_lock); 701 spin_unlock(&peersk->sk_peer_lock); 702 703 put_pid(old_pid); 704 put_cred(old_cred); 705 } 706 707 static int unix_listen(struct socket *sock, int backlog) 708 { 709 int err; 710 struct sock *sk = sock->sk; 711 struct unix_sock *u = unix_sk(sk); 712 713 err = -EOPNOTSUPP; 714 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 715 goto out; /* Only stream/seqpacket sockets accept */ 716 err = -EINVAL; 717 if (!u->addr) 718 goto out; /* No listens on an unbound socket */ 719 unix_state_lock(sk); 720 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) 721 goto out_unlock; 722 if (backlog > sk->sk_max_ack_backlog) 723 wake_up_interruptible_all(&u->peer_wait); 724 sk->sk_max_ack_backlog = backlog; 725 sk->sk_state = TCP_LISTEN; 726 /* set credentials so connect can copy them */ 727 init_peercred(sk); 728 err = 0; 729 730 out_unlock: 731 unix_state_unlock(sk); 732 out: 733 return err; 734 } 735 736 static int unix_release(struct socket *); 737 static int unix_bind(struct socket *, struct sockaddr *, int); 738 static int unix_stream_connect(struct socket *, struct sockaddr *, 739 int addr_len, int flags); 740 static int unix_socketpair(struct socket *, struct socket *); 741 static int unix_accept(struct socket *, struct socket *, int, bool); 742 static int unix_getname(struct socket *, struct sockaddr *, int); 743 static __poll_t unix_poll(struct file *, struct socket *, poll_table *); 744 static __poll_t unix_dgram_poll(struct file *, struct socket *, 745 poll_table *); 746 static int unix_ioctl(struct socket *, unsigned int, unsigned long); 747 #ifdef CONFIG_COMPAT 748 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); 749 #endif 750 static int unix_shutdown(struct socket *, int); 751 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); 752 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int); 753 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset, 754 size_t size, int flags); 755 static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos, 756 struct pipe_inode_info *, size_t size, 757 unsigned int flags); 758 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t); 759 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); 760 static int unix_read_sock(struct sock *sk, read_descriptor_t *desc, 761 sk_read_actor_t recv_actor); 762 static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc, 763 sk_read_actor_t recv_actor); 764 static int unix_dgram_connect(struct socket *, struct sockaddr *, 765 int, int); 766 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); 767 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t, 768 int); 769 770 static int unix_set_peek_off(struct sock *sk, int val) 771 { 772 struct unix_sock *u = unix_sk(sk); 773 774 if (mutex_lock_interruptible(&u->iolock)) 775 return -EINTR; 776 777 sk->sk_peek_off = val; 778 mutex_unlock(&u->iolock); 779 780 return 0; 781 } 782 783 #ifdef CONFIG_PROC_FS 784 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock) 785 { 786 struct sock *sk = sock->sk; 787 struct unix_sock *u; 788 789 if (sk) { 790 u = unix_sk(sock->sk); 791 seq_printf(m, "scm_fds: %u\n", 792 atomic_read(&u->scm_stat.nr_fds)); 793 } 794 } 795 #else 796 #define unix_show_fdinfo NULL 797 #endif 798 799 static const struct proto_ops unix_stream_ops = { 800 .family = PF_UNIX, 801 .owner = THIS_MODULE, 802 .release = unix_release, 803 .bind = unix_bind, 804 .connect = unix_stream_connect, 805 .socketpair = unix_socketpair, 806 .accept = unix_accept, 807 .getname = unix_getname, 808 .poll = unix_poll, 809 .ioctl = unix_ioctl, 810 #ifdef CONFIG_COMPAT 811 .compat_ioctl = unix_compat_ioctl, 812 #endif 813 .listen = unix_listen, 814 .shutdown = unix_shutdown, 815 .sendmsg = unix_stream_sendmsg, 816 .recvmsg = unix_stream_recvmsg, 817 .read_sock = unix_stream_read_sock, 818 .mmap = sock_no_mmap, 819 .sendpage = unix_stream_sendpage, 820 .splice_read = unix_stream_splice_read, 821 .set_peek_off = unix_set_peek_off, 822 .show_fdinfo = unix_show_fdinfo, 823 }; 824 825 static const struct proto_ops unix_dgram_ops = { 826 .family = PF_UNIX, 827 .owner = THIS_MODULE, 828 .release = unix_release, 829 .bind = unix_bind, 830 .connect = unix_dgram_connect, 831 .socketpair = unix_socketpair, 832 .accept = sock_no_accept, 833 .getname = unix_getname, 834 .poll = unix_dgram_poll, 835 .ioctl = unix_ioctl, 836 #ifdef CONFIG_COMPAT 837 .compat_ioctl = unix_compat_ioctl, 838 #endif 839 .listen = sock_no_listen, 840 .shutdown = unix_shutdown, 841 .sendmsg = unix_dgram_sendmsg, 842 .read_sock = unix_read_sock, 843 .recvmsg = unix_dgram_recvmsg, 844 .mmap = sock_no_mmap, 845 .sendpage = sock_no_sendpage, 846 .set_peek_off = unix_set_peek_off, 847 .show_fdinfo = unix_show_fdinfo, 848 }; 849 850 static const struct proto_ops unix_seqpacket_ops = { 851 .family = PF_UNIX, 852 .owner = THIS_MODULE, 853 .release = unix_release, 854 .bind = unix_bind, 855 .connect = unix_stream_connect, 856 .socketpair = unix_socketpair, 857 .accept = unix_accept, 858 .getname = unix_getname, 859 .poll = unix_dgram_poll, 860 .ioctl = unix_ioctl, 861 #ifdef CONFIG_COMPAT 862 .compat_ioctl = unix_compat_ioctl, 863 #endif 864 .listen = unix_listen, 865 .shutdown = unix_shutdown, 866 .sendmsg = unix_seqpacket_sendmsg, 867 .recvmsg = unix_seqpacket_recvmsg, 868 .mmap = sock_no_mmap, 869 .sendpage = sock_no_sendpage, 870 .set_peek_off = unix_set_peek_off, 871 .show_fdinfo = unix_show_fdinfo, 872 }; 873 874 static void unix_close(struct sock *sk, long timeout) 875 { 876 /* Nothing to do here, unix socket does not need a ->close(). 877 * This is merely for sockmap. 878 */ 879 } 880 881 static void unix_unhash(struct sock *sk) 882 { 883 /* Nothing to do here, unix socket does not need a ->unhash(). 884 * This is merely for sockmap. 885 */ 886 } 887 888 struct proto unix_dgram_proto = { 889 .name = "UNIX", 890 .owner = THIS_MODULE, 891 .obj_size = sizeof(struct unix_sock), 892 .close = unix_close, 893 #ifdef CONFIG_BPF_SYSCALL 894 .psock_update_sk_prot = unix_dgram_bpf_update_proto, 895 #endif 896 }; 897 898 struct proto unix_stream_proto = { 899 .name = "UNIX-STREAM", 900 .owner = THIS_MODULE, 901 .obj_size = sizeof(struct unix_sock), 902 .close = unix_close, 903 .unhash = unix_unhash, 904 #ifdef CONFIG_BPF_SYSCALL 905 .psock_update_sk_prot = unix_stream_bpf_update_proto, 906 #endif 907 }; 908 909 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type) 910 { 911 struct unix_sock *u; 912 struct sock *sk; 913 int err; 914 915 atomic_long_inc(&unix_nr_socks); 916 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) { 917 err = -ENFILE; 918 goto err; 919 } 920 921 if (type == SOCK_STREAM) 922 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern); 923 else /*dgram and seqpacket */ 924 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern); 925 926 if (!sk) { 927 err = -ENOMEM; 928 goto err; 929 } 930 931 sock_init_data(sock, sk); 932 933 sk->sk_hash = unix_unbound_hash(sk); 934 sk->sk_allocation = GFP_KERNEL_ACCOUNT; 935 sk->sk_write_space = unix_write_space; 936 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; 937 sk->sk_destruct = unix_sock_destructor; 938 u = unix_sk(sk); 939 u->path.dentry = NULL; 940 u->path.mnt = NULL; 941 spin_lock_init(&u->lock); 942 atomic_long_set(&u->inflight, 0); 943 INIT_LIST_HEAD(&u->link); 944 mutex_init(&u->iolock); /* single task reading lock */ 945 mutex_init(&u->bindlock); /* single task binding lock */ 946 init_waitqueue_head(&u->peer_wait); 947 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); 948 memset(&u->scm_stat, 0, sizeof(struct scm_stat)); 949 unix_insert_unbound_socket(net, sk); 950 951 sock_prot_inuse_add(net, sk->sk_prot, 1); 952 953 return sk; 954 955 err: 956 atomic_long_dec(&unix_nr_socks); 957 return ERR_PTR(err); 958 } 959 960 static int unix_create(struct net *net, struct socket *sock, int protocol, 961 int kern) 962 { 963 struct sock *sk; 964 965 if (protocol && protocol != PF_UNIX) 966 return -EPROTONOSUPPORT; 967 968 sock->state = SS_UNCONNECTED; 969 970 switch (sock->type) { 971 case SOCK_STREAM: 972 sock->ops = &unix_stream_ops; 973 break; 974 /* 975 * Believe it or not BSD has AF_UNIX, SOCK_RAW though 976 * nothing uses it. 977 */ 978 case SOCK_RAW: 979 sock->type = SOCK_DGRAM; 980 fallthrough; 981 case SOCK_DGRAM: 982 sock->ops = &unix_dgram_ops; 983 break; 984 case SOCK_SEQPACKET: 985 sock->ops = &unix_seqpacket_ops; 986 break; 987 default: 988 return -ESOCKTNOSUPPORT; 989 } 990 991 sk = unix_create1(net, sock, kern, sock->type); 992 if (IS_ERR(sk)) 993 return PTR_ERR(sk); 994 995 return 0; 996 } 997 998 static int unix_release(struct socket *sock) 999 { 1000 struct sock *sk = sock->sk; 1001 1002 if (!sk) 1003 return 0; 1004 1005 sk->sk_prot->close(sk, 0); 1006 unix_release_sock(sk, 0); 1007 sock->sk = NULL; 1008 1009 return 0; 1010 } 1011 1012 static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len, 1013 int type) 1014 { 1015 struct inode *inode; 1016 struct path path; 1017 struct sock *sk; 1018 int err; 1019 1020 unix_mkname_bsd(sunaddr, addr_len); 1021 err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path); 1022 if (err) 1023 goto fail; 1024 1025 err = path_permission(&path, MAY_WRITE); 1026 if (err) 1027 goto path_put; 1028 1029 err = -ECONNREFUSED; 1030 inode = d_backing_inode(path.dentry); 1031 if (!S_ISSOCK(inode->i_mode)) 1032 goto path_put; 1033 1034 sk = unix_find_socket_byinode(inode); 1035 if (!sk) 1036 goto path_put; 1037 1038 err = -EPROTOTYPE; 1039 if (sk->sk_type == type) 1040 touch_atime(&path); 1041 else 1042 goto sock_put; 1043 1044 path_put(&path); 1045 1046 return sk; 1047 1048 sock_put: 1049 sock_put(sk); 1050 path_put: 1051 path_put(&path); 1052 fail: 1053 return ERR_PTR(err); 1054 } 1055 1056 static struct sock *unix_find_abstract(struct net *net, 1057 struct sockaddr_un *sunaddr, 1058 int addr_len, int type) 1059 { 1060 unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type); 1061 struct dentry *dentry; 1062 struct sock *sk; 1063 1064 sk = unix_find_socket_byname(net, sunaddr, addr_len, hash); 1065 if (!sk) 1066 return ERR_PTR(-ECONNREFUSED); 1067 1068 dentry = unix_sk(sk)->path.dentry; 1069 if (dentry) 1070 touch_atime(&unix_sk(sk)->path); 1071 1072 return sk; 1073 } 1074 1075 static struct sock *unix_find_other(struct net *net, 1076 struct sockaddr_un *sunaddr, 1077 int addr_len, int type) 1078 { 1079 struct sock *sk; 1080 1081 if (sunaddr->sun_path[0]) 1082 sk = unix_find_bsd(sunaddr, addr_len, type); 1083 else 1084 sk = unix_find_abstract(net, sunaddr, addr_len, type); 1085 1086 return sk; 1087 } 1088 1089 static int unix_autobind(struct sock *sk) 1090 { 1091 unsigned int new_hash, old_hash = sk->sk_hash; 1092 struct unix_sock *u = unix_sk(sk); 1093 struct net *net = sock_net(sk); 1094 struct unix_address *addr; 1095 u32 lastnum, ordernum; 1096 int err; 1097 1098 err = mutex_lock_interruptible(&u->bindlock); 1099 if (err) 1100 return err; 1101 1102 if (u->addr) 1103 goto out; 1104 1105 err = -ENOMEM; 1106 addr = kzalloc(sizeof(*addr) + 1107 offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL); 1108 if (!addr) 1109 goto out; 1110 1111 addr->len = offsetof(struct sockaddr_un, sun_path) + 6; 1112 addr->name->sun_family = AF_UNIX; 1113 refcount_set(&addr->refcnt, 1); 1114 1115 ordernum = prandom_u32(); 1116 lastnum = ordernum & 0xFFFFF; 1117 retry: 1118 ordernum = (ordernum + 1) & 0xFFFFF; 1119 sprintf(addr->name->sun_path + 1, "%05x", ordernum); 1120 1121 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); 1122 unix_table_double_lock(net, old_hash, new_hash); 1123 1124 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) { 1125 unix_table_double_unlock(net, old_hash, new_hash); 1126 1127 /* __unix_find_socket_byname() may take long time if many names 1128 * are already in use. 1129 */ 1130 cond_resched(); 1131 1132 if (ordernum == lastnum) { 1133 /* Give up if all names seems to be in use. */ 1134 err = -ENOSPC; 1135 unix_release_addr(addr); 1136 goto out; 1137 } 1138 1139 goto retry; 1140 } 1141 1142 __unix_set_addr_hash(net, sk, addr, new_hash); 1143 unix_table_double_unlock(net, old_hash, new_hash); 1144 err = 0; 1145 1146 out: mutex_unlock(&u->bindlock); 1147 return err; 1148 } 1149 1150 static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, 1151 int addr_len) 1152 { 1153 umode_t mode = S_IFSOCK | 1154 (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask()); 1155 unsigned int new_hash, old_hash = sk->sk_hash; 1156 struct unix_sock *u = unix_sk(sk); 1157 struct net *net = sock_net(sk); 1158 struct user_namespace *ns; // barf... 1159 struct unix_address *addr; 1160 struct dentry *dentry; 1161 struct path parent; 1162 int err; 1163 1164 unix_mkname_bsd(sunaddr, addr_len); 1165 addr_len = strlen(sunaddr->sun_path) + 1166 offsetof(struct sockaddr_un, sun_path) + 1; 1167 1168 addr = unix_create_addr(sunaddr, addr_len); 1169 if (!addr) 1170 return -ENOMEM; 1171 1172 /* 1173 * Get the parent directory, calculate the hash for last 1174 * component. 1175 */ 1176 dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0); 1177 if (IS_ERR(dentry)) { 1178 err = PTR_ERR(dentry); 1179 goto out; 1180 } 1181 1182 /* 1183 * All right, let's create it. 1184 */ 1185 ns = mnt_user_ns(parent.mnt); 1186 err = security_path_mknod(&parent, dentry, mode, 0); 1187 if (!err) 1188 err = vfs_mknod(ns, d_inode(parent.dentry), dentry, mode, 0); 1189 if (err) 1190 goto out_path; 1191 err = mutex_lock_interruptible(&u->bindlock); 1192 if (err) 1193 goto out_unlink; 1194 if (u->addr) 1195 goto out_unlock; 1196 1197 new_hash = unix_bsd_hash(d_backing_inode(dentry)); 1198 unix_table_double_lock(net, old_hash, new_hash); 1199 u->path.mnt = mntget(parent.mnt); 1200 u->path.dentry = dget(dentry); 1201 __unix_set_addr_hash(net, sk, addr, new_hash); 1202 unix_table_double_unlock(net, old_hash, new_hash); 1203 unix_insert_bsd_socket(sk); 1204 mutex_unlock(&u->bindlock); 1205 done_path_create(&parent, dentry); 1206 return 0; 1207 1208 out_unlock: 1209 mutex_unlock(&u->bindlock); 1210 err = -EINVAL; 1211 out_unlink: 1212 /* failed after successful mknod? unlink what we'd created... */ 1213 vfs_unlink(ns, d_inode(parent.dentry), dentry, NULL); 1214 out_path: 1215 done_path_create(&parent, dentry); 1216 out: 1217 unix_release_addr(addr); 1218 return err == -EEXIST ? -EADDRINUSE : err; 1219 } 1220 1221 static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr, 1222 int addr_len) 1223 { 1224 unsigned int new_hash, old_hash = sk->sk_hash; 1225 struct unix_sock *u = unix_sk(sk); 1226 struct net *net = sock_net(sk); 1227 struct unix_address *addr; 1228 int err; 1229 1230 addr = unix_create_addr(sunaddr, addr_len); 1231 if (!addr) 1232 return -ENOMEM; 1233 1234 err = mutex_lock_interruptible(&u->bindlock); 1235 if (err) 1236 goto out; 1237 1238 if (u->addr) { 1239 err = -EINVAL; 1240 goto out_mutex; 1241 } 1242 1243 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); 1244 unix_table_double_lock(net, old_hash, new_hash); 1245 1246 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) 1247 goto out_spin; 1248 1249 __unix_set_addr_hash(net, sk, addr, new_hash); 1250 unix_table_double_unlock(net, old_hash, new_hash); 1251 mutex_unlock(&u->bindlock); 1252 return 0; 1253 1254 out_spin: 1255 unix_table_double_unlock(net, old_hash, new_hash); 1256 err = -EADDRINUSE; 1257 out_mutex: 1258 mutex_unlock(&u->bindlock); 1259 out: 1260 unix_release_addr(addr); 1261 return err; 1262 } 1263 1264 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 1265 { 1266 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 1267 struct sock *sk = sock->sk; 1268 int err; 1269 1270 if (addr_len == offsetof(struct sockaddr_un, sun_path) && 1271 sunaddr->sun_family == AF_UNIX) 1272 return unix_autobind(sk); 1273 1274 err = unix_validate_addr(sunaddr, addr_len); 1275 if (err) 1276 return err; 1277 1278 if (sunaddr->sun_path[0]) 1279 err = unix_bind_bsd(sk, sunaddr, addr_len); 1280 else 1281 err = unix_bind_abstract(sk, sunaddr, addr_len); 1282 1283 return err; 1284 } 1285 1286 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) 1287 { 1288 if (unlikely(sk1 == sk2) || !sk2) { 1289 unix_state_lock(sk1); 1290 return; 1291 } 1292 if (sk1 < sk2) { 1293 unix_state_lock(sk1); 1294 unix_state_lock_nested(sk2); 1295 } else { 1296 unix_state_lock(sk2); 1297 unix_state_lock_nested(sk1); 1298 } 1299 } 1300 1301 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) 1302 { 1303 if (unlikely(sk1 == sk2) || !sk2) { 1304 unix_state_unlock(sk1); 1305 return; 1306 } 1307 unix_state_unlock(sk1); 1308 unix_state_unlock(sk2); 1309 } 1310 1311 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, 1312 int alen, int flags) 1313 { 1314 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; 1315 struct sock *sk = sock->sk; 1316 struct sock *other; 1317 int err; 1318 1319 err = -EINVAL; 1320 if (alen < offsetofend(struct sockaddr, sa_family)) 1321 goto out; 1322 1323 if (addr->sa_family != AF_UNSPEC) { 1324 err = unix_validate_addr(sunaddr, alen); 1325 if (err) 1326 goto out; 1327 1328 if (test_bit(SOCK_PASSCRED, &sock->flags) && 1329 !unix_sk(sk)->addr) { 1330 err = unix_autobind(sk); 1331 if (err) 1332 goto out; 1333 } 1334 1335 restart: 1336 other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type); 1337 if (IS_ERR(other)) { 1338 err = PTR_ERR(other); 1339 goto out; 1340 } 1341 1342 unix_state_double_lock(sk, other); 1343 1344 /* Apparently VFS overslept socket death. Retry. */ 1345 if (sock_flag(other, SOCK_DEAD)) { 1346 unix_state_double_unlock(sk, other); 1347 sock_put(other); 1348 goto restart; 1349 } 1350 1351 err = -EPERM; 1352 if (!unix_may_send(sk, other)) 1353 goto out_unlock; 1354 1355 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 1356 if (err) 1357 goto out_unlock; 1358 1359 sk->sk_state = other->sk_state = TCP_ESTABLISHED; 1360 } else { 1361 /* 1362 * 1003.1g breaking connected state with AF_UNSPEC 1363 */ 1364 other = NULL; 1365 unix_state_double_lock(sk, other); 1366 } 1367 1368 /* 1369 * If it was connected, reconnect. 1370 */ 1371 if (unix_peer(sk)) { 1372 struct sock *old_peer = unix_peer(sk); 1373 1374 unix_peer(sk) = other; 1375 if (!other) 1376 sk->sk_state = TCP_CLOSE; 1377 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer); 1378 1379 unix_state_double_unlock(sk, other); 1380 1381 if (other != old_peer) 1382 unix_dgram_disconnected(sk, old_peer); 1383 sock_put(old_peer); 1384 } else { 1385 unix_peer(sk) = other; 1386 unix_state_double_unlock(sk, other); 1387 } 1388 1389 return 0; 1390 1391 out_unlock: 1392 unix_state_double_unlock(sk, other); 1393 sock_put(other); 1394 out: 1395 return err; 1396 } 1397 1398 static long unix_wait_for_peer(struct sock *other, long timeo) 1399 __releases(&unix_sk(other)->lock) 1400 { 1401 struct unix_sock *u = unix_sk(other); 1402 int sched; 1403 DEFINE_WAIT(wait); 1404 1405 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE); 1406 1407 sched = !sock_flag(other, SOCK_DEAD) && 1408 !(other->sk_shutdown & RCV_SHUTDOWN) && 1409 unix_recvq_full(other); 1410 1411 unix_state_unlock(other); 1412 1413 if (sched) 1414 timeo = schedule_timeout(timeo); 1415 1416 finish_wait(&u->peer_wait, &wait); 1417 return timeo; 1418 } 1419 1420 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, 1421 int addr_len, int flags) 1422 { 1423 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 1424 struct sock *sk = sock->sk, *newsk = NULL, *other = NULL; 1425 struct unix_sock *u = unix_sk(sk), *newu, *otheru; 1426 struct net *net = sock_net(sk); 1427 struct sk_buff *skb = NULL; 1428 long timeo; 1429 int err; 1430 int st; 1431 1432 err = unix_validate_addr(sunaddr, addr_len); 1433 if (err) 1434 goto out; 1435 1436 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) { 1437 err = unix_autobind(sk); 1438 if (err) 1439 goto out; 1440 } 1441 1442 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); 1443 1444 /* First of all allocate resources. 1445 If we will make it after state is locked, 1446 we will have to recheck all again in any case. 1447 */ 1448 1449 /* create new sock for complete connection */ 1450 newsk = unix_create1(net, NULL, 0, sock->type); 1451 if (IS_ERR(newsk)) { 1452 err = PTR_ERR(newsk); 1453 newsk = NULL; 1454 goto out; 1455 } 1456 1457 err = -ENOMEM; 1458 1459 /* Allocate skb for sending to listening sock */ 1460 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL); 1461 if (skb == NULL) 1462 goto out; 1463 1464 restart: 1465 /* Find listening sock. */ 1466 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type); 1467 if (IS_ERR(other)) { 1468 err = PTR_ERR(other); 1469 other = NULL; 1470 goto out; 1471 } 1472 1473 /* Latch state of peer */ 1474 unix_state_lock(other); 1475 1476 /* Apparently VFS overslept socket death. Retry. */ 1477 if (sock_flag(other, SOCK_DEAD)) { 1478 unix_state_unlock(other); 1479 sock_put(other); 1480 goto restart; 1481 } 1482 1483 err = -ECONNREFUSED; 1484 if (other->sk_state != TCP_LISTEN) 1485 goto out_unlock; 1486 if (other->sk_shutdown & RCV_SHUTDOWN) 1487 goto out_unlock; 1488 1489 if (unix_recvq_full(other)) { 1490 err = -EAGAIN; 1491 if (!timeo) 1492 goto out_unlock; 1493 1494 timeo = unix_wait_for_peer(other, timeo); 1495 1496 err = sock_intr_errno(timeo); 1497 if (signal_pending(current)) 1498 goto out; 1499 sock_put(other); 1500 goto restart; 1501 } 1502 1503 /* Latch our state. 1504 1505 It is tricky place. We need to grab our state lock and cannot 1506 drop lock on peer. It is dangerous because deadlock is 1507 possible. Connect to self case and simultaneous 1508 attempt to connect are eliminated by checking socket 1509 state. other is TCP_LISTEN, if sk is TCP_LISTEN we 1510 check this before attempt to grab lock. 1511 1512 Well, and we have to recheck the state after socket locked. 1513 */ 1514 st = sk->sk_state; 1515 1516 switch (st) { 1517 case TCP_CLOSE: 1518 /* This is ok... continue with connect */ 1519 break; 1520 case TCP_ESTABLISHED: 1521 /* Socket is already connected */ 1522 err = -EISCONN; 1523 goto out_unlock; 1524 default: 1525 err = -EINVAL; 1526 goto out_unlock; 1527 } 1528 1529 unix_state_lock_nested(sk); 1530 1531 if (sk->sk_state != st) { 1532 unix_state_unlock(sk); 1533 unix_state_unlock(other); 1534 sock_put(other); 1535 goto restart; 1536 } 1537 1538 err = security_unix_stream_connect(sk, other, newsk); 1539 if (err) { 1540 unix_state_unlock(sk); 1541 goto out_unlock; 1542 } 1543 1544 /* The way is open! Fastly set all the necessary fields... */ 1545 1546 sock_hold(sk); 1547 unix_peer(newsk) = sk; 1548 newsk->sk_state = TCP_ESTABLISHED; 1549 newsk->sk_type = sk->sk_type; 1550 init_peercred(newsk); 1551 newu = unix_sk(newsk); 1552 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); 1553 otheru = unix_sk(other); 1554 1555 /* copy address information from listening to new sock 1556 * 1557 * The contents of *(otheru->addr) and otheru->path 1558 * are seen fully set up here, since we have found 1559 * otheru in hash under its lock. Insertion into the 1560 * hash chain we'd found it in had been done in an 1561 * earlier critical area protected by the chain's lock, 1562 * the same one where we'd set *(otheru->addr) contents, 1563 * as well as otheru->path and otheru->addr itself. 1564 * 1565 * Using smp_store_release() here to set newu->addr 1566 * is enough to make those stores, as well as stores 1567 * to newu->path visible to anyone who gets newu->addr 1568 * by smp_load_acquire(). IOW, the same warranties 1569 * as for unix_sock instances bound in unix_bind() or 1570 * in unix_autobind(). 1571 */ 1572 if (otheru->path.dentry) { 1573 path_get(&otheru->path); 1574 newu->path = otheru->path; 1575 } 1576 refcount_inc(&otheru->addr->refcnt); 1577 smp_store_release(&newu->addr, otheru->addr); 1578 1579 /* Set credentials */ 1580 copy_peercred(sk, other); 1581 1582 sock->state = SS_CONNECTED; 1583 sk->sk_state = TCP_ESTABLISHED; 1584 sock_hold(newsk); 1585 1586 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */ 1587 unix_peer(sk) = newsk; 1588 1589 unix_state_unlock(sk); 1590 1591 /* take ten and send info to listening sock */ 1592 spin_lock(&other->sk_receive_queue.lock); 1593 __skb_queue_tail(&other->sk_receive_queue, skb); 1594 spin_unlock(&other->sk_receive_queue.lock); 1595 unix_state_unlock(other); 1596 other->sk_data_ready(other); 1597 sock_put(other); 1598 return 0; 1599 1600 out_unlock: 1601 if (other) 1602 unix_state_unlock(other); 1603 1604 out: 1605 kfree_skb(skb); 1606 if (newsk) 1607 unix_release_sock(newsk, 0); 1608 if (other) 1609 sock_put(other); 1610 return err; 1611 } 1612 1613 static int unix_socketpair(struct socket *socka, struct socket *sockb) 1614 { 1615 struct sock *ska = socka->sk, *skb = sockb->sk; 1616 1617 /* Join our sockets back to back */ 1618 sock_hold(ska); 1619 sock_hold(skb); 1620 unix_peer(ska) = skb; 1621 unix_peer(skb) = ska; 1622 init_peercred(ska); 1623 init_peercred(skb); 1624 1625 ska->sk_state = TCP_ESTABLISHED; 1626 skb->sk_state = TCP_ESTABLISHED; 1627 socka->state = SS_CONNECTED; 1628 sockb->state = SS_CONNECTED; 1629 return 0; 1630 } 1631 1632 static void unix_sock_inherit_flags(const struct socket *old, 1633 struct socket *new) 1634 { 1635 if (test_bit(SOCK_PASSCRED, &old->flags)) 1636 set_bit(SOCK_PASSCRED, &new->flags); 1637 if (test_bit(SOCK_PASSSEC, &old->flags)) 1638 set_bit(SOCK_PASSSEC, &new->flags); 1639 } 1640 1641 static int unix_accept(struct socket *sock, struct socket *newsock, int flags, 1642 bool kern) 1643 { 1644 struct sock *sk = sock->sk; 1645 struct sock *tsk; 1646 struct sk_buff *skb; 1647 int err; 1648 1649 err = -EOPNOTSUPP; 1650 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 1651 goto out; 1652 1653 err = -EINVAL; 1654 if (sk->sk_state != TCP_LISTEN) 1655 goto out; 1656 1657 /* If socket state is TCP_LISTEN it cannot change (for now...), 1658 * so that no locks are necessary. 1659 */ 1660 1661 skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, 1662 &err); 1663 if (!skb) { 1664 /* This means receive shutdown. */ 1665 if (err == 0) 1666 err = -EINVAL; 1667 goto out; 1668 } 1669 1670 tsk = skb->sk; 1671 skb_free_datagram(sk, skb); 1672 wake_up_interruptible(&unix_sk(sk)->peer_wait); 1673 1674 /* attach accepted sock to socket */ 1675 unix_state_lock(tsk); 1676 newsock->state = SS_CONNECTED; 1677 unix_sock_inherit_flags(sock, newsock); 1678 sock_graft(tsk, newsock); 1679 unix_state_unlock(tsk); 1680 return 0; 1681 1682 out: 1683 return err; 1684 } 1685 1686 1687 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer) 1688 { 1689 struct sock *sk = sock->sk; 1690 struct unix_address *addr; 1691 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr); 1692 int err = 0; 1693 1694 if (peer) { 1695 sk = unix_peer_get(sk); 1696 1697 err = -ENOTCONN; 1698 if (!sk) 1699 goto out; 1700 err = 0; 1701 } else { 1702 sock_hold(sk); 1703 } 1704 1705 addr = smp_load_acquire(&unix_sk(sk)->addr); 1706 if (!addr) { 1707 sunaddr->sun_family = AF_UNIX; 1708 sunaddr->sun_path[0] = 0; 1709 err = offsetof(struct sockaddr_un, sun_path); 1710 } else { 1711 err = addr->len; 1712 memcpy(sunaddr, addr->name, addr->len); 1713 } 1714 sock_put(sk); 1715 out: 1716 return err; 1717 } 1718 1719 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb) 1720 { 1721 scm->fp = scm_fp_dup(UNIXCB(skb).fp); 1722 1723 /* 1724 * Garbage collection of unix sockets starts by selecting a set of 1725 * candidate sockets which have reference only from being in flight 1726 * (total_refs == inflight_refs). This condition is checked once during 1727 * the candidate collection phase, and candidates are marked as such, so 1728 * that non-candidates can later be ignored. While inflight_refs is 1729 * protected by unix_gc_lock, total_refs (file count) is not, hence this 1730 * is an instantaneous decision. 1731 * 1732 * Once a candidate, however, the socket must not be reinstalled into a 1733 * file descriptor while the garbage collection is in progress. 1734 * 1735 * If the above conditions are met, then the directed graph of 1736 * candidates (*) does not change while unix_gc_lock is held. 1737 * 1738 * Any operations that changes the file count through file descriptors 1739 * (dup, close, sendmsg) does not change the graph since candidates are 1740 * not installed in fds. 1741 * 1742 * Dequeing a candidate via recvmsg would install it into an fd, but 1743 * that takes unix_gc_lock to decrement the inflight count, so it's 1744 * serialized with garbage collection. 1745 * 1746 * MSG_PEEK is special in that it does not change the inflight count, 1747 * yet does install the socket into an fd. The following lock/unlock 1748 * pair is to ensure serialization with garbage collection. It must be 1749 * done between incrementing the file count and installing the file into 1750 * an fd. 1751 * 1752 * If garbage collection starts after the barrier provided by the 1753 * lock/unlock, then it will see the elevated refcount and not mark this 1754 * as a candidate. If a garbage collection is already in progress 1755 * before the file count was incremented, then the lock/unlock pair will 1756 * ensure that garbage collection is finished before progressing to 1757 * installing the fd. 1758 * 1759 * (*) A -> B where B is on the queue of A or B is on the queue of C 1760 * which is on the queue of listening socket A. 1761 */ 1762 spin_lock(&unix_gc_lock); 1763 spin_unlock(&unix_gc_lock); 1764 } 1765 1766 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) 1767 { 1768 int err = 0; 1769 1770 UNIXCB(skb).pid = get_pid(scm->pid); 1771 UNIXCB(skb).uid = scm->creds.uid; 1772 UNIXCB(skb).gid = scm->creds.gid; 1773 UNIXCB(skb).fp = NULL; 1774 unix_get_secdata(scm, skb); 1775 if (scm->fp && send_fds) 1776 err = unix_attach_fds(scm, skb); 1777 1778 skb->destructor = unix_destruct_scm; 1779 return err; 1780 } 1781 1782 static bool unix_passcred_enabled(const struct socket *sock, 1783 const struct sock *other) 1784 { 1785 return test_bit(SOCK_PASSCRED, &sock->flags) || 1786 !other->sk_socket || 1787 test_bit(SOCK_PASSCRED, &other->sk_socket->flags); 1788 } 1789 1790 /* 1791 * Some apps rely on write() giving SCM_CREDENTIALS 1792 * We include credentials if source or destination socket 1793 * asserted SOCK_PASSCRED. 1794 */ 1795 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, 1796 const struct sock *other) 1797 { 1798 if (UNIXCB(skb).pid) 1799 return; 1800 if (unix_passcred_enabled(sock, other)) { 1801 UNIXCB(skb).pid = get_pid(task_tgid(current)); 1802 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); 1803 } 1804 } 1805 1806 static int maybe_init_creds(struct scm_cookie *scm, 1807 struct socket *socket, 1808 const struct sock *other) 1809 { 1810 int err; 1811 struct msghdr msg = { .msg_controllen = 0 }; 1812 1813 err = scm_send(socket, &msg, scm, false); 1814 if (err) 1815 return err; 1816 1817 if (unix_passcred_enabled(socket, other)) { 1818 scm->pid = get_pid(task_tgid(current)); 1819 current_uid_gid(&scm->creds.uid, &scm->creds.gid); 1820 } 1821 return err; 1822 } 1823 1824 static bool unix_skb_scm_eq(struct sk_buff *skb, 1825 struct scm_cookie *scm) 1826 { 1827 return UNIXCB(skb).pid == scm->pid && 1828 uid_eq(UNIXCB(skb).uid, scm->creds.uid) && 1829 gid_eq(UNIXCB(skb).gid, scm->creds.gid) && 1830 unix_secdata_eq(scm, skb); 1831 } 1832 1833 static void scm_stat_add(struct sock *sk, struct sk_buff *skb) 1834 { 1835 struct scm_fp_list *fp = UNIXCB(skb).fp; 1836 struct unix_sock *u = unix_sk(sk); 1837 1838 if (unlikely(fp && fp->count)) 1839 atomic_add(fp->count, &u->scm_stat.nr_fds); 1840 } 1841 1842 static void scm_stat_del(struct sock *sk, struct sk_buff *skb) 1843 { 1844 struct scm_fp_list *fp = UNIXCB(skb).fp; 1845 struct unix_sock *u = unix_sk(sk); 1846 1847 if (unlikely(fp && fp->count)) 1848 atomic_sub(fp->count, &u->scm_stat.nr_fds); 1849 } 1850 1851 /* 1852 * Send AF_UNIX data. 1853 */ 1854 1855 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, 1856 size_t len) 1857 { 1858 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); 1859 struct sock *sk = sock->sk, *other = NULL; 1860 struct unix_sock *u = unix_sk(sk); 1861 struct scm_cookie scm; 1862 struct sk_buff *skb; 1863 int data_len = 0; 1864 int sk_locked; 1865 long timeo; 1866 int err; 1867 1868 wait_for_unix_gc(); 1869 err = scm_send(sock, msg, &scm, false); 1870 if (err < 0) 1871 return err; 1872 1873 err = -EOPNOTSUPP; 1874 if (msg->msg_flags&MSG_OOB) 1875 goto out; 1876 1877 if (msg->msg_namelen) { 1878 err = unix_validate_addr(sunaddr, msg->msg_namelen); 1879 if (err) 1880 goto out; 1881 } else { 1882 sunaddr = NULL; 1883 err = -ENOTCONN; 1884 other = unix_peer_get(sk); 1885 if (!other) 1886 goto out; 1887 } 1888 1889 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) { 1890 err = unix_autobind(sk); 1891 if (err) 1892 goto out; 1893 } 1894 1895 err = -EMSGSIZE; 1896 if (len > sk->sk_sndbuf - 32) 1897 goto out; 1898 1899 if (len > SKB_MAX_ALLOC) { 1900 data_len = min_t(size_t, 1901 len - SKB_MAX_ALLOC, 1902 MAX_SKB_FRAGS * PAGE_SIZE); 1903 data_len = PAGE_ALIGN(data_len); 1904 1905 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE); 1906 } 1907 1908 skb = sock_alloc_send_pskb(sk, len - data_len, data_len, 1909 msg->msg_flags & MSG_DONTWAIT, &err, 1910 PAGE_ALLOC_COSTLY_ORDER); 1911 if (skb == NULL) 1912 goto out; 1913 1914 err = unix_scm_to_skb(&scm, skb, true); 1915 if (err < 0) 1916 goto out_free; 1917 1918 skb_put(skb, len - data_len); 1919 skb->data_len = data_len; 1920 skb->len = len; 1921 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len); 1922 if (err) 1923 goto out_free; 1924 1925 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); 1926 1927 restart: 1928 if (!other) { 1929 err = -ECONNRESET; 1930 if (sunaddr == NULL) 1931 goto out_free; 1932 1933 other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen, 1934 sk->sk_type); 1935 if (IS_ERR(other)) { 1936 err = PTR_ERR(other); 1937 other = NULL; 1938 goto out_free; 1939 } 1940 } 1941 1942 if (sk_filter(other, skb) < 0) { 1943 /* Toss the packet but do not return any error to the sender */ 1944 err = len; 1945 goto out_free; 1946 } 1947 1948 sk_locked = 0; 1949 unix_state_lock(other); 1950 restart_locked: 1951 err = -EPERM; 1952 if (!unix_may_send(sk, other)) 1953 goto out_unlock; 1954 1955 if (unlikely(sock_flag(other, SOCK_DEAD))) { 1956 /* 1957 * Check with 1003.1g - what should 1958 * datagram error 1959 */ 1960 unix_state_unlock(other); 1961 sock_put(other); 1962 1963 if (!sk_locked) 1964 unix_state_lock(sk); 1965 1966 err = 0; 1967 if (unix_peer(sk) == other) { 1968 unix_peer(sk) = NULL; 1969 unix_dgram_peer_wake_disconnect_wakeup(sk, other); 1970 1971 unix_state_unlock(sk); 1972 1973 sk->sk_state = TCP_CLOSE; 1974 unix_dgram_disconnected(sk, other); 1975 sock_put(other); 1976 err = -ECONNREFUSED; 1977 } else { 1978 unix_state_unlock(sk); 1979 } 1980 1981 other = NULL; 1982 if (err) 1983 goto out_free; 1984 goto restart; 1985 } 1986 1987 err = -EPIPE; 1988 if (other->sk_shutdown & RCV_SHUTDOWN) 1989 goto out_unlock; 1990 1991 if (sk->sk_type != SOCK_SEQPACKET) { 1992 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 1993 if (err) 1994 goto out_unlock; 1995 } 1996 1997 /* other == sk && unix_peer(other) != sk if 1998 * - unix_peer(sk) == NULL, destination address bound to sk 1999 * - unix_peer(sk) == sk by time of get but disconnected before lock 2000 */ 2001 if (other != sk && 2002 unlikely(unix_peer(other) != sk && 2003 unix_recvq_full_lockless(other))) { 2004 if (timeo) { 2005 timeo = unix_wait_for_peer(other, timeo); 2006 2007 err = sock_intr_errno(timeo); 2008 if (signal_pending(current)) 2009 goto out_free; 2010 2011 goto restart; 2012 } 2013 2014 if (!sk_locked) { 2015 unix_state_unlock(other); 2016 unix_state_double_lock(sk, other); 2017 } 2018 2019 if (unix_peer(sk) != other || 2020 unix_dgram_peer_wake_me(sk, other)) { 2021 err = -EAGAIN; 2022 sk_locked = 1; 2023 goto out_unlock; 2024 } 2025 2026 if (!sk_locked) { 2027 sk_locked = 1; 2028 goto restart_locked; 2029 } 2030 } 2031 2032 if (unlikely(sk_locked)) 2033 unix_state_unlock(sk); 2034 2035 if (sock_flag(other, SOCK_RCVTSTAMP)) 2036 __net_timestamp(skb); 2037 maybe_add_creds(skb, sock, other); 2038 scm_stat_add(other, skb); 2039 skb_queue_tail(&other->sk_receive_queue, skb); 2040 unix_state_unlock(other); 2041 other->sk_data_ready(other); 2042 sock_put(other); 2043 scm_destroy(&scm); 2044 return len; 2045 2046 out_unlock: 2047 if (sk_locked) 2048 unix_state_unlock(sk); 2049 unix_state_unlock(other); 2050 out_free: 2051 kfree_skb(skb); 2052 out: 2053 if (other) 2054 sock_put(other); 2055 scm_destroy(&scm); 2056 return err; 2057 } 2058 2059 /* We use paged skbs for stream sockets, and limit occupancy to 32768 2060 * bytes, and a minimum of a full page. 2061 */ 2062 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) 2063 2064 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2065 static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other) 2066 { 2067 struct unix_sock *ousk = unix_sk(other); 2068 struct sk_buff *skb; 2069 int err = 0; 2070 2071 skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err); 2072 2073 if (!skb) 2074 return err; 2075 2076 skb_put(skb, 1); 2077 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1); 2078 2079 if (err) { 2080 kfree_skb(skb); 2081 return err; 2082 } 2083 2084 unix_state_lock(other); 2085 2086 if (sock_flag(other, SOCK_DEAD) || 2087 (other->sk_shutdown & RCV_SHUTDOWN)) { 2088 unix_state_unlock(other); 2089 kfree_skb(skb); 2090 return -EPIPE; 2091 } 2092 2093 maybe_add_creds(skb, sock, other); 2094 skb_get(skb); 2095 2096 if (ousk->oob_skb) 2097 consume_skb(ousk->oob_skb); 2098 2099 WRITE_ONCE(ousk->oob_skb, skb); 2100 2101 scm_stat_add(other, skb); 2102 skb_queue_tail(&other->sk_receive_queue, skb); 2103 sk_send_sigurg(other); 2104 unix_state_unlock(other); 2105 other->sk_data_ready(other); 2106 2107 return err; 2108 } 2109 #endif 2110 2111 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, 2112 size_t len) 2113 { 2114 struct sock *sk = sock->sk; 2115 struct sock *other = NULL; 2116 int err, size; 2117 struct sk_buff *skb; 2118 int sent = 0; 2119 struct scm_cookie scm; 2120 bool fds_sent = false; 2121 int data_len; 2122 2123 wait_for_unix_gc(); 2124 err = scm_send(sock, msg, &scm, false); 2125 if (err < 0) 2126 return err; 2127 2128 err = -EOPNOTSUPP; 2129 if (msg->msg_flags & MSG_OOB) { 2130 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2131 if (len) 2132 len--; 2133 else 2134 #endif 2135 goto out_err; 2136 } 2137 2138 if (msg->msg_namelen) { 2139 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; 2140 goto out_err; 2141 } else { 2142 err = -ENOTCONN; 2143 other = unix_peer(sk); 2144 if (!other) 2145 goto out_err; 2146 } 2147 2148 if (sk->sk_shutdown & SEND_SHUTDOWN) 2149 goto pipe_err; 2150 2151 while (sent < len) { 2152 size = len - sent; 2153 2154 /* Keep two messages in the pipe so it schedules better */ 2155 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64); 2156 2157 /* allow fallback to order-0 allocations */ 2158 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ); 2159 2160 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0)); 2161 2162 data_len = min_t(size_t, size, PAGE_ALIGN(data_len)); 2163 2164 skb = sock_alloc_send_pskb(sk, size - data_len, data_len, 2165 msg->msg_flags & MSG_DONTWAIT, &err, 2166 get_order(UNIX_SKB_FRAGS_SZ)); 2167 if (!skb) 2168 goto out_err; 2169 2170 /* Only send the fds in the first buffer */ 2171 err = unix_scm_to_skb(&scm, skb, !fds_sent); 2172 if (err < 0) { 2173 kfree_skb(skb); 2174 goto out_err; 2175 } 2176 fds_sent = true; 2177 2178 skb_put(skb, size - data_len); 2179 skb->data_len = data_len; 2180 skb->len = size; 2181 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); 2182 if (err) { 2183 kfree_skb(skb); 2184 goto out_err; 2185 } 2186 2187 unix_state_lock(other); 2188 2189 if (sock_flag(other, SOCK_DEAD) || 2190 (other->sk_shutdown & RCV_SHUTDOWN)) 2191 goto pipe_err_free; 2192 2193 maybe_add_creds(skb, sock, other); 2194 scm_stat_add(other, skb); 2195 skb_queue_tail(&other->sk_receive_queue, skb); 2196 unix_state_unlock(other); 2197 other->sk_data_ready(other); 2198 sent += size; 2199 } 2200 2201 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2202 if (msg->msg_flags & MSG_OOB) { 2203 err = queue_oob(sock, msg, other); 2204 if (err) 2205 goto out_err; 2206 sent++; 2207 } 2208 #endif 2209 2210 scm_destroy(&scm); 2211 2212 return sent; 2213 2214 pipe_err_free: 2215 unix_state_unlock(other); 2216 kfree_skb(skb); 2217 pipe_err: 2218 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL)) 2219 send_sig(SIGPIPE, current, 0); 2220 err = -EPIPE; 2221 out_err: 2222 scm_destroy(&scm); 2223 return sent ? : err; 2224 } 2225 2226 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, 2227 int offset, size_t size, int flags) 2228 { 2229 int err; 2230 bool send_sigpipe = false; 2231 bool init_scm = true; 2232 struct scm_cookie scm; 2233 struct sock *other, *sk = socket->sk; 2234 struct sk_buff *skb, *newskb = NULL, *tail = NULL; 2235 2236 if (flags & MSG_OOB) 2237 return -EOPNOTSUPP; 2238 2239 other = unix_peer(sk); 2240 if (!other || sk->sk_state != TCP_ESTABLISHED) 2241 return -ENOTCONN; 2242 2243 if (false) { 2244 alloc_skb: 2245 unix_state_unlock(other); 2246 mutex_unlock(&unix_sk(other)->iolock); 2247 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT, 2248 &err, 0); 2249 if (!newskb) 2250 goto err; 2251 } 2252 2253 /* we must acquire iolock as we modify already present 2254 * skbs in the sk_receive_queue and mess with skb->len 2255 */ 2256 err = mutex_lock_interruptible(&unix_sk(other)->iolock); 2257 if (err) { 2258 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS; 2259 goto err; 2260 } 2261 2262 if (sk->sk_shutdown & SEND_SHUTDOWN) { 2263 err = -EPIPE; 2264 send_sigpipe = true; 2265 goto err_unlock; 2266 } 2267 2268 unix_state_lock(other); 2269 2270 if (sock_flag(other, SOCK_DEAD) || 2271 other->sk_shutdown & RCV_SHUTDOWN) { 2272 err = -EPIPE; 2273 send_sigpipe = true; 2274 goto err_state_unlock; 2275 } 2276 2277 if (init_scm) { 2278 err = maybe_init_creds(&scm, socket, other); 2279 if (err) 2280 goto err_state_unlock; 2281 init_scm = false; 2282 } 2283 2284 skb = skb_peek_tail(&other->sk_receive_queue); 2285 if (tail && tail == skb) { 2286 skb = newskb; 2287 } else if (!skb || !unix_skb_scm_eq(skb, &scm)) { 2288 if (newskb) { 2289 skb = newskb; 2290 } else { 2291 tail = skb; 2292 goto alloc_skb; 2293 } 2294 } else if (newskb) { 2295 /* this is fast path, we don't necessarily need to 2296 * call to kfree_skb even though with newskb == NULL 2297 * this - does no harm 2298 */ 2299 consume_skb(newskb); 2300 newskb = NULL; 2301 } 2302 2303 if (skb_append_pagefrags(skb, page, offset, size)) { 2304 tail = skb; 2305 goto alloc_skb; 2306 } 2307 2308 skb->len += size; 2309 skb->data_len += size; 2310 skb->truesize += size; 2311 refcount_add(size, &sk->sk_wmem_alloc); 2312 2313 if (newskb) { 2314 err = unix_scm_to_skb(&scm, skb, false); 2315 if (err) 2316 goto err_state_unlock; 2317 spin_lock(&other->sk_receive_queue.lock); 2318 __skb_queue_tail(&other->sk_receive_queue, newskb); 2319 spin_unlock(&other->sk_receive_queue.lock); 2320 } 2321 2322 unix_state_unlock(other); 2323 mutex_unlock(&unix_sk(other)->iolock); 2324 2325 other->sk_data_ready(other); 2326 scm_destroy(&scm); 2327 return size; 2328 2329 err_state_unlock: 2330 unix_state_unlock(other); 2331 err_unlock: 2332 mutex_unlock(&unix_sk(other)->iolock); 2333 err: 2334 kfree_skb(newskb); 2335 if (send_sigpipe && !(flags & MSG_NOSIGNAL)) 2336 send_sig(SIGPIPE, current, 0); 2337 if (!init_scm) 2338 scm_destroy(&scm); 2339 return err; 2340 } 2341 2342 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg, 2343 size_t len) 2344 { 2345 int err; 2346 struct sock *sk = sock->sk; 2347 2348 err = sock_error(sk); 2349 if (err) 2350 return err; 2351 2352 if (sk->sk_state != TCP_ESTABLISHED) 2353 return -ENOTCONN; 2354 2355 if (msg->msg_namelen) 2356 msg->msg_namelen = 0; 2357 2358 return unix_dgram_sendmsg(sock, msg, len); 2359 } 2360 2361 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg, 2362 size_t size, int flags) 2363 { 2364 struct sock *sk = sock->sk; 2365 2366 if (sk->sk_state != TCP_ESTABLISHED) 2367 return -ENOTCONN; 2368 2369 return unix_dgram_recvmsg(sock, msg, size, flags); 2370 } 2371 2372 static void unix_copy_addr(struct msghdr *msg, struct sock *sk) 2373 { 2374 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr); 2375 2376 if (addr) { 2377 msg->msg_namelen = addr->len; 2378 memcpy(msg->msg_name, addr->name, addr->len); 2379 } 2380 } 2381 2382 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, 2383 int flags) 2384 { 2385 struct scm_cookie scm; 2386 struct socket *sock = sk->sk_socket; 2387 struct unix_sock *u = unix_sk(sk); 2388 struct sk_buff *skb, *last; 2389 long timeo; 2390 int skip; 2391 int err; 2392 2393 err = -EOPNOTSUPP; 2394 if (flags&MSG_OOB) 2395 goto out; 2396 2397 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 2398 2399 do { 2400 mutex_lock(&u->iolock); 2401 2402 skip = sk_peek_offset(sk, flags); 2403 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags, 2404 &skip, &err, &last); 2405 if (skb) { 2406 if (!(flags & MSG_PEEK)) 2407 scm_stat_del(sk, skb); 2408 break; 2409 } 2410 2411 mutex_unlock(&u->iolock); 2412 2413 if (err != -EAGAIN) 2414 break; 2415 } while (timeo && 2416 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue, 2417 &err, &timeo, last)); 2418 2419 if (!skb) { /* implies iolock unlocked */ 2420 unix_state_lock(sk); 2421 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */ 2422 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN && 2423 (sk->sk_shutdown & RCV_SHUTDOWN)) 2424 err = 0; 2425 unix_state_unlock(sk); 2426 goto out; 2427 } 2428 2429 if (wq_has_sleeper(&u->peer_wait)) 2430 wake_up_interruptible_sync_poll(&u->peer_wait, 2431 EPOLLOUT | EPOLLWRNORM | 2432 EPOLLWRBAND); 2433 2434 if (msg->msg_name) 2435 unix_copy_addr(msg, skb->sk); 2436 2437 if (size > skb->len - skip) 2438 size = skb->len - skip; 2439 else if (size < skb->len - skip) 2440 msg->msg_flags |= MSG_TRUNC; 2441 2442 err = skb_copy_datagram_msg(skb, skip, msg, size); 2443 if (err) 2444 goto out_free; 2445 2446 if (sock_flag(sk, SOCK_RCVTSTAMP)) 2447 __sock_recv_timestamp(msg, sk, skb); 2448 2449 memset(&scm, 0, sizeof(scm)); 2450 2451 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2452 unix_set_secdata(&scm, skb); 2453 2454 if (!(flags & MSG_PEEK)) { 2455 if (UNIXCB(skb).fp) 2456 unix_detach_fds(&scm, skb); 2457 2458 sk_peek_offset_bwd(sk, skb->len); 2459 } else { 2460 /* It is questionable: on PEEK we could: 2461 - do not return fds - good, but too simple 8) 2462 - return fds, and do not return them on read (old strategy, 2463 apparently wrong) 2464 - clone fds (I chose it for now, it is the most universal 2465 solution) 2466 2467 POSIX 1003.1g does not actually define this clearly 2468 at all. POSIX 1003.1g doesn't define a lot of things 2469 clearly however! 2470 2471 */ 2472 2473 sk_peek_offset_fwd(sk, size); 2474 2475 if (UNIXCB(skb).fp) 2476 unix_peek_fds(&scm, skb); 2477 } 2478 err = (flags & MSG_TRUNC) ? skb->len - skip : size; 2479 2480 scm_recv(sock, msg, &scm, flags); 2481 2482 out_free: 2483 skb_free_datagram(sk, skb); 2484 mutex_unlock(&u->iolock); 2485 out: 2486 return err; 2487 } 2488 2489 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, 2490 int flags) 2491 { 2492 struct sock *sk = sock->sk; 2493 2494 #ifdef CONFIG_BPF_SYSCALL 2495 const struct proto *prot = READ_ONCE(sk->sk_prot); 2496 2497 if (prot != &unix_dgram_proto) 2498 return prot->recvmsg(sk, msg, size, flags, NULL); 2499 #endif 2500 return __unix_dgram_recvmsg(sk, msg, size, flags); 2501 } 2502 2503 static int unix_read_sock(struct sock *sk, read_descriptor_t *desc, 2504 sk_read_actor_t recv_actor) 2505 { 2506 int copied = 0; 2507 2508 while (1) { 2509 struct unix_sock *u = unix_sk(sk); 2510 struct sk_buff *skb; 2511 int used, err; 2512 2513 mutex_lock(&u->iolock); 2514 skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err); 2515 mutex_unlock(&u->iolock); 2516 if (!skb) 2517 return err; 2518 2519 used = recv_actor(desc, skb, 0, skb->len); 2520 if (used <= 0) { 2521 if (!copied) 2522 copied = used; 2523 kfree_skb(skb); 2524 break; 2525 } else if (used <= skb->len) { 2526 copied += used; 2527 } 2528 2529 kfree_skb(skb); 2530 if (!desc->count) 2531 break; 2532 } 2533 2534 return copied; 2535 } 2536 2537 /* 2538 * Sleep until more data has arrived. But check for races.. 2539 */ 2540 static long unix_stream_data_wait(struct sock *sk, long timeo, 2541 struct sk_buff *last, unsigned int last_len, 2542 bool freezable) 2543 { 2544 struct sk_buff *tail; 2545 DEFINE_WAIT(wait); 2546 2547 unix_state_lock(sk); 2548 2549 for (;;) { 2550 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 2551 2552 tail = skb_peek_tail(&sk->sk_receive_queue); 2553 if (tail != last || 2554 (tail && tail->len != last_len) || 2555 sk->sk_err || 2556 (sk->sk_shutdown & RCV_SHUTDOWN) || 2557 signal_pending(current) || 2558 !timeo) 2559 break; 2560 2561 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2562 unix_state_unlock(sk); 2563 if (freezable) 2564 timeo = freezable_schedule_timeout(timeo); 2565 else 2566 timeo = schedule_timeout(timeo); 2567 unix_state_lock(sk); 2568 2569 if (sock_flag(sk, SOCK_DEAD)) 2570 break; 2571 2572 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2573 } 2574 2575 finish_wait(sk_sleep(sk), &wait); 2576 unix_state_unlock(sk); 2577 return timeo; 2578 } 2579 2580 static unsigned int unix_skb_len(const struct sk_buff *skb) 2581 { 2582 return skb->len - UNIXCB(skb).consumed; 2583 } 2584 2585 struct unix_stream_read_state { 2586 int (*recv_actor)(struct sk_buff *, int, int, 2587 struct unix_stream_read_state *); 2588 struct socket *socket; 2589 struct msghdr *msg; 2590 struct pipe_inode_info *pipe; 2591 size_t size; 2592 int flags; 2593 unsigned int splice_flags; 2594 }; 2595 2596 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2597 static int unix_stream_recv_urg(struct unix_stream_read_state *state) 2598 { 2599 struct socket *sock = state->socket; 2600 struct sock *sk = sock->sk; 2601 struct unix_sock *u = unix_sk(sk); 2602 int chunk = 1; 2603 struct sk_buff *oob_skb; 2604 2605 mutex_lock(&u->iolock); 2606 unix_state_lock(sk); 2607 2608 if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) { 2609 unix_state_unlock(sk); 2610 mutex_unlock(&u->iolock); 2611 return -EINVAL; 2612 } 2613 2614 oob_skb = u->oob_skb; 2615 2616 if (!(state->flags & MSG_PEEK)) 2617 WRITE_ONCE(u->oob_skb, NULL); 2618 2619 unix_state_unlock(sk); 2620 2621 chunk = state->recv_actor(oob_skb, 0, chunk, state); 2622 2623 if (!(state->flags & MSG_PEEK)) { 2624 UNIXCB(oob_skb).consumed += 1; 2625 kfree_skb(oob_skb); 2626 } 2627 2628 mutex_unlock(&u->iolock); 2629 2630 if (chunk < 0) 2631 return -EFAULT; 2632 2633 state->msg->msg_flags |= MSG_OOB; 2634 return 1; 2635 } 2636 2637 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, 2638 int flags, int copied) 2639 { 2640 struct unix_sock *u = unix_sk(sk); 2641 2642 if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) { 2643 skb_unlink(skb, &sk->sk_receive_queue); 2644 consume_skb(skb); 2645 skb = NULL; 2646 } else { 2647 if (skb == u->oob_skb) { 2648 if (copied) { 2649 skb = NULL; 2650 } else if (sock_flag(sk, SOCK_URGINLINE)) { 2651 if (!(flags & MSG_PEEK)) { 2652 WRITE_ONCE(u->oob_skb, NULL); 2653 consume_skb(skb); 2654 } 2655 } else if (!(flags & MSG_PEEK)) { 2656 skb_unlink(skb, &sk->sk_receive_queue); 2657 consume_skb(skb); 2658 skb = skb_peek(&sk->sk_receive_queue); 2659 } 2660 } 2661 } 2662 return skb; 2663 } 2664 #endif 2665 2666 static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc, 2667 sk_read_actor_t recv_actor) 2668 { 2669 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) 2670 return -ENOTCONN; 2671 2672 return unix_read_sock(sk, desc, recv_actor); 2673 } 2674 2675 static int unix_stream_read_generic(struct unix_stream_read_state *state, 2676 bool freezable) 2677 { 2678 struct scm_cookie scm; 2679 struct socket *sock = state->socket; 2680 struct sock *sk = sock->sk; 2681 struct unix_sock *u = unix_sk(sk); 2682 int copied = 0; 2683 int flags = state->flags; 2684 int noblock = flags & MSG_DONTWAIT; 2685 bool check_creds = false; 2686 int target; 2687 int err = 0; 2688 long timeo; 2689 int skip; 2690 size_t size = state->size; 2691 unsigned int last_len; 2692 2693 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) { 2694 err = -EINVAL; 2695 goto out; 2696 } 2697 2698 if (unlikely(flags & MSG_OOB)) { 2699 err = -EOPNOTSUPP; 2700 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2701 err = unix_stream_recv_urg(state); 2702 #endif 2703 goto out; 2704 } 2705 2706 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); 2707 timeo = sock_rcvtimeo(sk, noblock); 2708 2709 memset(&scm, 0, sizeof(scm)); 2710 2711 /* Lock the socket to prevent queue disordering 2712 * while sleeps in memcpy_tomsg 2713 */ 2714 mutex_lock(&u->iolock); 2715 2716 skip = max(sk_peek_offset(sk, flags), 0); 2717 2718 do { 2719 int chunk; 2720 bool drop_skb; 2721 struct sk_buff *skb, *last; 2722 2723 redo: 2724 unix_state_lock(sk); 2725 if (sock_flag(sk, SOCK_DEAD)) { 2726 err = -ECONNRESET; 2727 goto unlock; 2728 } 2729 last = skb = skb_peek(&sk->sk_receive_queue); 2730 last_len = last ? last->len : 0; 2731 2732 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2733 if (skb) { 2734 skb = manage_oob(skb, sk, flags, copied); 2735 if (!skb) { 2736 unix_state_unlock(sk); 2737 if (copied) 2738 break; 2739 goto redo; 2740 } 2741 } 2742 #endif 2743 again: 2744 if (skb == NULL) { 2745 if (copied >= target) 2746 goto unlock; 2747 2748 /* 2749 * POSIX 1003.1g mandates this order. 2750 */ 2751 2752 err = sock_error(sk); 2753 if (err) 2754 goto unlock; 2755 if (sk->sk_shutdown & RCV_SHUTDOWN) 2756 goto unlock; 2757 2758 unix_state_unlock(sk); 2759 if (!timeo) { 2760 err = -EAGAIN; 2761 break; 2762 } 2763 2764 mutex_unlock(&u->iolock); 2765 2766 timeo = unix_stream_data_wait(sk, timeo, last, 2767 last_len, freezable); 2768 2769 if (signal_pending(current)) { 2770 err = sock_intr_errno(timeo); 2771 scm_destroy(&scm); 2772 goto out; 2773 } 2774 2775 mutex_lock(&u->iolock); 2776 goto redo; 2777 unlock: 2778 unix_state_unlock(sk); 2779 break; 2780 } 2781 2782 while (skip >= unix_skb_len(skb)) { 2783 skip -= unix_skb_len(skb); 2784 last = skb; 2785 last_len = skb->len; 2786 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2787 if (!skb) 2788 goto again; 2789 } 2790 2791 unix_state_unlock(sk); 2792 2793 if (check_creds) { 2794 /* Never glue messages from different writers */ 2795 if (!unix_skb_scm_eq(skb, &scm)) 2796 break; 2797 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { 2798 /* Copy credentials */ 2799 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2800 unix_set_secdata(&scm, skb); 2801 check_creds = true; 2802 } 2803 2804 /* Copy address just once */ 2805 if (state->msg && state->msg->msg_name) { 2806 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, 2807 state->msg->msg_name); 2808 unix_copy_addr(state->msg, skb->sk); 2809 sunaddr = NULL; 2810 } 2811 2812 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size); 2813 skb_get(skb); 2814 chunk = state->recv_actor(skb, skip, chunk, state); 2815 drop_skb = !unix_skb_len(skb); 2816 /* skb is only safe to use if !drop_skb */ 2817 consume_skb(skb); 2818 if (chunk < 0) { 2819 if (copied == 0) 2820 copied = -EFAULT; 2821 break; 2822 } 2823 copied += chunk; 2824 size -= chunk; 2825 2826 if (drop_skb) { 2827 /* the skb was touched by a concurrent reader; 2828 * we should not expect anything from this skb 2829 * anymore and assume it invalid - we can be 2830 * sure it was dropped from the socket queue 2831 * 2832 * let's report a short read 2833 */ 2834 err = 0; 2835 break; 2836 } 2837 2838 /* Mark read part of skb as used */ 2839 if (!(flags & MSG_PEEK)) { 2840 UNIXCB(skb).consumed += chunk; 2841 2842 sk_peek_offset_bwd(sk, chunk); 2843 2844 if (UNIXCB(skb).fp) { 2845 scm_stat_del(sk, skb); 2846 unix_detach_fds(&scm, skb); 2847 } 2848 2849 if (unix_skb_len(skb)) 2850 break; 2851 2852 skb_unlink(skb, &sk->sk_receive_queue); 2853 consume_skb(skb); 2854 2855 if (scm.fp) 2856 break; 2857 } else { 2858 /* It is questionable, see note in unix_dgram_recvmsg. 2859 */ 2860 if (UNIXCB(skb).fp) 2861 unix_peek_fds(&scm, skb); 2862 2863 sk_peek_offset_fwd(sk, chunk); 2864 2865 if (UNIXCB(skb).fp) 2866 break; 2867 2868 skip = 0; 2869 last = skb; 2870 last_len = skb->len; 2871 unix_state_lock(sk); 2872 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2873 if (skb) 2874 goto again; 2875 unix_state_unlock(sk); 2876 break; 2877 } 2878 } while (size); 2879 2880 mutex_unlock(&u->iolock); 2881 if (state->msg) 2882 scm_recv(sock, state->msg, &scm, flags); 2883 else 2884 scm_destroy(&scm); 2885 out: 2886 return copied ? : err; 2887 } 2888 2889 static int unix_stream_read_actor(struct sk_buff *skb, 2890 int skip, int chunk, 2891 struct unix_stream_read_state *state) 2892 { 2893 int ret; 2894 2895 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip, 2896 state->msg, chunk); 2897 return ret ?: chunk; 2898 } 2899 2900 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg, 2901 size_t size, int flags) 2902 { 2903 struct unix_stream_read_state state = { 2904 .recv_actor = unix_stream_read_actor, 2905 .socket = sk->sk_socket, 2906 .msg = msg, 2907 .size = size, 2908 .flags = flags 2909 }; 2910 2911 return unix_stream_read_generic(&state, true); 2912 } 2913 2914 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, 2915 size_t size, int flags) 2916 { 2917 struct unix_stream_read_state state = { 2918 .recv_actor = unix_stream_read_actor, 2919 .socket = sock, 2920 .msg = msg, 2921 .size = size, 2922 .flags = flags 2923 }; 2924 2925 #ifdef CONFIG_BPF_SYSCALL 2926 struct sock *sk = sock->sk; 2927 const struct proto *prot = READ_ONCE(sk->sk_prot); 2928 2929 if (prot != &unix_stream_proto) 2930 return prot->recvmsg(sk, msg, size, flags, NULL); 2931 #endif 2932 return unix_stream_read_generic(&state, true); 2933 } 2934 2935 static int unix_stream_splice_actor(struct sk_buff *skb, 2936 int skip, int chunk, 2937 struct unix_stream_read_state *state) 2938 { 2939 return skb_splice_bits(skb, state->socket->sk, 2940 UNIXCB(skb).consumed + skip, 2941 state->pipe, chunk, state->splice_flags); 2942 } 2943 2944 static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos, 2945 struct pipe_inode_info *pipe, 2946 size_t size, unsigned int flags) 2947 { 2948 struct unix_stream_read_state state = { 2949 .recv_actor = unix_stream_splice_actor, 2950 .socket = sock, 2951 .pipe = pipe, 2952 .size = size, 2953 .splice_flags = flags, 2954 }; 2955 2956 if (unlikely(*ppos)) 2957 return -ESPIPE; 2958 2959 if (sock->file->f_flags & O_NONBLOCK || 2960 flags & SPLICE_F_NONBLOCK) 2961 state.flags = MSG_DONTWAIT; 2962 2963 return unix_stream_read_generic(&state, false); 2964 } 2965 2966 static int unix_shutdown(struct socket *sock, int mode) 2967 { 2968 struct sock *sk = sock->sk; 2969 struct sock *other; 2970 2971 if (mode < SHUT_RD || mode > SHUT_RDWR) 2972 return -EINVAL; 2973 /* This maps: 2974 * SHUT_RD (0) -> RCV_SHUTDOWN (1) 2975 * SHUT_WR (1) -> SEND_SHUTDOWN (2) 2976 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3) 2977 */ 2978 ++mode; 2979 2980 unix_state_lock(sk); 2981 sk->sk_shutdown |= mode; 2982 other = unix_peer(sk); 2983 if (other) 2984 sock_hold(other); 2985 unix_state_unlock(sk); 2986 sk->sk_state_change(sk); 2987 2988 if (other && 2989 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) { 2990 2991 int peer_mode = 0; 2992 const struct proto *prot = READ_ONCE(other->sk_prot); 2993 2994 if (prot->unhash) 2995 prot->unhash(other); 2996 if (mode&RCV_SHUTDOWN) 2997 peer_mode |= SEND_SHUTDOWN; 2998 if (mode&SEND_SHUTDOWN) 2999 peer_mode |= RCV_SHUTDOWN; 3000 unix_state_lock(other); 3001 other->sk_shutdown |= peer_mode; 3002 unix_state_unlock(other); 3003 other->sk_state_change(other); 3004 if (peer_mode == SHUTDOWN_MASK) 3005 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); 3006 else if (peer_mode & RCV_SHUTDOWN) 3007 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); 3008 } 3009 if (other) 3010 sock_put(other); 3011 3012 return 0; 3013 } 3014 3015 long unix_inq_len(struct sock *sk) 3016 { 3017 struct sk_buff *skb; 3018 long amount = 0; 3019 3020 if (sk->sk_state == TCP_LISTEN) 3021 return -EINVAL; 3022 3023 spin_lock(&sk->sk_receive_queue.lock); 3024 if (sk->sk_type == SOCK_STREAM || 3025 sk->sk_type == SOCK_SEQPACKET) { 3026 skb_queue_walk(&sk->sk_receive_queue, skb) 3027 amount += unix_skb_len(skb); 3028 } else { 3029 skb = skb_peek(&sk->sk_receive_queue); 3030 if (skb) 3031 amount = skb->len; 3032 } 3033 spin_unlock(&sk->sk_receive_queue.lock); 3034 3035 return amount; 3036 } 3037 EXPORT_SYMBOL_GPL(unix_inq_len); 3038 3039 long unix_outq_len(struct sock *sk) 3040 { 3041 return sk_wmem_alloc_get(sk); 3042 } 3043 EXPORT_SYMBOL_GPL(unix_outq_len); 3044 3045 static int unix_open_file(struct sock *sk) 3046 { 3047 struct path path; 3048 struct file *f; 3049 int fd; 3050 3051 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 3052 return -EPERM; 3053 3054 if (!smp_load_acquire(&unix_sk(sk)->addr)) 3055 return -ENOENT; 3056 3057 path = unix_sk(sk)->path; 3058 if (!path.dentry) 3059 return -ENOENT; 3060 3061 path_get(&path); 3062 3063 fd = get_unused_fd_flags(O_CLOEXEC); 3064 if (fd < 0) 3065 goto out; 3066 3067 f = dentry_open(&path, O_PATH, current_cred()); 3068 if (IS_ERR(f)) { 3069 put_unused_fd(fd); 3070 fd = PTR_ERR(f); 3071 goto out; 3072 } 3073 3074 fd_install(fd, f); 3075 out: 3076 path_put(&path); 3077 3078 return fd; 3079 } 3080 3081 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 3082 { 3083 struct sock *sk = sock->sk; 3084 long amount = 0; 3085 int err; 3086 3087 switch (cmd) { 3088 case SIOCOUTQ: 3089 amount = unix_outq_len(sk); 3090 err = put_user(amount, (int __user *)arg); 3091 break; 3092 case SIOCINQ: 3093 amount = unix_inq_len(sk); 3094 if (amount < 0) 3095 err = amount; 3096 else 3097 err = put_user(amount, (int __user *)arg); 3098 break; 3099 case SIOCUNIXFILE: 3100 err = unix_open_file(sk); 3101 break; 3102 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 3103 case SIOCATMARK: 3104 { 3105 struct sk_buff *skb; 3106 int answ = 0; 3107 3108 skb = skb_peek(&sk->sk_receive_queue); 3109 if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb)) 3110 answ = 1; 3111 err = put_user(answ, (int __user *)arg); 3112 } 3113 break; 3114 #endif 3115 default: 3116 err = -ENOIOCTLCMD; 3117 break; 3118 } 3119 return err; 3120 } 3121 3122 #ifdef CONFIG_COMPAT 3123 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 3124 { 3125 return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg)); 3126 } 3127 #endif 3128 3129 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait) 3130 { 3131 struct sock *sk = sock->sk; 3132 __poll_t mask; 3133 3134 sock_poll_wait(file, sock, wait); 3135 mask = 0; 3136 3137 /* exceptional events? */ 3138 if (sk->sk_err) 3139 mask |= EPOLLERR; 3140 if (sk->sk_shutdown == SHUTDOWN_MASK) 3141 mask |= EPOLLHUP; 3142 if (sk->sk_shutdown & RCV_SHUTDOWN) 3143 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 3144 3145 /* readable? */ 3146 if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) 3147 mask |= EPOLLIN | EPOLLRDNORM; 3148 if (sk_is_readable(sk)) 3149 mask |= EPOLLIN | EPOLLRDNORM; 3150 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 3151 if (READ_ONCE(unix_sk(sk)->oob_skb)) 3152 mask |= EPOLLPRI; 3153 #endif 3154 3155 /* Connection-based need to check for termination and startup */ 3156 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && 3157 sk->sk_state == TCP_CLOSE) 3158 mask |= EPOLLHUP; 3159 3160 /* 3161 * we set writable also when the other side has shut down the 3162 * connection. This prevents stuck sockets. 3163 */ 3164 if (unix_writable(sk)) 3165 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 3166 3167 return mask; 3168 } 3169 3170 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, 3171 poll_table *wait) 3172 { 3173 struct sock *sk = sock->sk, *other; 3174 unsigned int writable; 3175 __poll_t mask; 3176 3177 sock_poll_wait(file, sock, wait); 3178 mask = 0; 3179 3180 /* exceptional events? */ 3181 if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) 3182 mask |= EPOLLERR | 3183 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); 3184 3185 if (sk->sk_shutdown & RCV_SHUTDOWN) 3186 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 3187 if (sk->sk_shutdown == SHUTDOWN_MASK) 3188 mask |= EPOLLHUP; 3189 3190 /* readable? */ 3191 if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) 3192 mask |= EPOLLIN | EPOLLRDNORM; 3193 if (sk_is_readable(sk)) 3194 mask |= EPOLLIN | EPOLLRDNORM; 3195 3196 /* Connection-based need to check for termination and startup */ 3197 if (sk->sk_type == SOCK_SEQPACKET) { 3198 if (sk->sk_state == TCP_CLOSE) 3199 mask |= EPOLLHUP; 3200 /* connection hasn't started yet? */ 3201 if (sk->sk_state == TCP_SYN_SENT) 3202 return mask; 3203 } 3204 3205 /* No write status requested, avoid expensive OUT tests. */ 3206 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT))) 3207 return mask; 3208 3209 writable = unix_writable(sk); 3210 if (writable) { 3211 unix_state_lock(sk); 3212 3213 other = unix_peer(sk); 3214 if (other && unix_peer(other) != sk && 3215 unix_recvq_full_lockless(other) && 3216 unix_dgram_peer_wake_me(sk, other)) 3217 writable = 0; 3218 3219 unix_state_unlock(sk); 3220 } 3221 3222 if (writable) 3223 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 3224 else 3225 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 3226 3227 return mask; 3228 } 3229 3230 #ifdef CONFIG_PROC_FS 3231 3232 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1) 3233 3234 #define get_bucket(x) ((x) >> BUCKET_SPACE) 3235 #define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1)) 3236 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) 3237 3238 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) 3239 { 3240 unsigned long offset = get_offset(*pos); 3241 unsigned long bucket = get_bucket(*pos); 3242 unsigned long count = 0; 3243 struct sock *sk; 3244 3245 for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]); 3246 sk; sk = sk_next(sk)) { 3247 if (++count == offset) 3248 break; 3249 } 3250 3251 return sk; 3252 } 3253 3254 static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos) 3255 { 3256 unsigned long bucket = get_bucket(*pos); 3257 struct net *net = seq_file_net(seq); 3258 struct sock *sk; 3259 3260 while (bucket < UNIX_HASH_SIZE) { 3261 spin_lock(&net->unx.table.locks[bucket]); 3262 3263 sk = unix_from_bucket(seq, pos); 3264 if (sk) 3265 return sk; 3266 3267 spin_unlock(&net->unx.table.locks[bucket]); 3268 3269 *pos = set_bucket_offset(++bucket, 1); 3270 } 3271 3272 return NULL; 3273 } 3274 3275 static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk, 3276 loff_t *pos) 3277 { 3278 unsigned long bucket = get_bucket(*pos); 3279 3280 sk = sk_next(sk); 3281 if (sk) 3282 return sk; 3283 3284 3285 spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]); 3286 3287 *pos = set_bucket_offset(++bucket, 1); 3288 3289 return unix_get_first(seq, pos); 3290 } 3291 3292 static void *unix_seq_start(struct seq_file *seq, loff_t *pos) 3293 { 3294 if (!*pos) 3295 return SEQ_START_TOKEN; 3296 3297 return unix_get_first(seq, pos); 3298 } 3299 3300 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3301 { 3302 ++*pos; 3303 3304 if (v == SEQ_START_TOKEN) 3305 return unix_get_first(seq, pos); 3306 3307 return unix_get_next(seq, v, pos); 3308 } 3309 3310 static void unix_seq_stop(struct seq_file *seq, void *v) 3311 { 3312 struct sock *sk = v; 3313 3314 if (sk) 3315 spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]); 3316 } 3317 3318 static int unix_seq_show(struct seq_file *seq, void *v) 3319 { 3320 3321 if (v == SEQ_START_TOKEN) 3322 seq_puts(seq, "Num RefCount Protocol Flags Type St " 3323 "Inode Path\n"); 3324 else { 3325 struct sock *s = v; 3326 struct unix_sock *u = unix_sk(s); 3327 unix_state_lock(s); 3328 3329 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu", 3330 s, 3331 refcount_read(&s->sk_refcnt), 3332 0, 3333 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0, 3334 s->sk_type, 3335 s->sk_socket ? 3336 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) : 3337 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING), 3338 sock_i_ino(s)); 3339 3340 if (u->addr) { // under a hash table lock here 3341 int i, len; 3342 seq_putc(seq, ' '); 3343 3344 i = 0; 3345 len = u->addr->len - 3346 offsetof(struct sockaddr_un, sun_path); 3347 if (u->addr->name->sun_path[0]) { 3348 len--; 3349 } else { 3350 seq_putc(seq, '@'); 3351 i++; 3352 } 3353 for ( ; i < len; i++) 3354 seq_putc(seq, u->addr->name->sun_path[i] ?: 3355 '@'); 3356 } 3357 unix_state_unlock(s); 3358 seq_putc(seq, '\n'); 3359 } 3360 3361 return 0; 3362 } 3363 3364 static const struct seq_operations unix_seq_ops = { 3365 .start = unix_seq_start, 3366 .next = unix_seq_next, 3367 .stop = unix_seq_stop, 3368 .show = unix_seq_show, 3369 }; 3370 3371 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) 3372 struct bpf_unix_iter_state { 3373 struct seq_net_private p; 3374 unsigned int cur_sk; 3375 unsigned int end_sk; 3376 unsigned int max_sk; 3377 struct sock **batch; 3378 bool st_bucket_done; 3379 }; 3380 3381 struct bpf_iter__unix { 3382 __bpf_md_ptr(struct bpf_iter_meta *, meta); 3383 __bpf_md_ptr(struct unix_sock *, unix_sk); 3384 uid_t uid __aligned(8); 3385 }; 3386 3387 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, 3388 struct unix_sock *unix_sk, uid_t uid) 3389 { 3390 struct bpf_iter__unix ctx; 3391 3392 meta->seq_num--; /* skip SEQ_START_TOKEN */ 3393 ctx.meta = meta; 3394 ctx.unix_sk = unix_sk; 3395 ctx.uid = uid; 3396 return bpf_iter_run_prog(prog, &ctx); 3397 } 3398 3399 static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk) 3400 3401 { 3402 struct bpf_unix_iter_state *iter = seq->private; 3403 unsigned int expected = 1; 3404 struct sock *sk; 3405 3406 sock_hold(start_sk); 3407 iter->batch[iter->end_sk++] = start_sk; 3408 3409 for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) { 3410 if (iter->end_sk < iter->max_sk) { 3411 sock_hold(sk); 3412 iter->batch[iter->end_sk++] = sk; 3413 } 3414 3415 expected++; 3416 } 3417 3418 spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]); 3419 3420 return expected; 3421 } 3422 3423 static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter) 3424 { 3425 while (iter->cur_sk < iter->end_sk) 3426 sock_put(iter->batch[iter->cur_sk++]); 3427 } 3428 3429 static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter, 3430 unsigned int new_batch_sz) 3431 { 3432 struct sock **new_batch; 3433 3434 new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz, 3435 GFP_USER | __GFP_NOWARN); 3436 if (!new_batch) 3437 return -ENOMEM; 3438 3439 bpf_iter_unix_put_batch(iter); 3440 kvfree(iter->batch); 3441 iter->batch = new_batch; 3442 iter->max_sk = new_batch_sz; 3443 3444 return 0; 3445 } 3446 3447 static struct sock *bpf_iter_unix_batch(struct seq_file *seq, 3448 loff_t *pos) 3449 { 3450 struct bpf_unix_iter_state *iter = seq->private; 3451 unsigned int expected; 3452 bool resized = false; 3453 struct sock *sk; 3454 3455 if (iter->st_bucket_done) 3456 *pos = set_bucket_offset(get_bucket(*pos) + 1, 1); 3457 3458 again: 3459 /* Get a new batch */ 3460 iter->cur_sk = 0; 3461 iter->end_sk = 0; 3462 3463 sk = unix_get_first(seq, pos); 3464 if (!sk) 3465 return NULL; /* Done */ 3466 3467 expected = bpf_iter_unix_hold_batch(seq, sk); 3468 3469 if (iter->end_sk == expected) { 3470 iter->st_bucket_done = true; 3471 return sk; 3472 } 3473 3474 if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) { 3475 resized = true; 3476 goto again; 3477 } 3478 3479 return sk; 3480 } 3481 3482 static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos) 3483 { 3484 if (!*pos) 3485 return SEQ_START_TOKEN; 3486 3487 /* bpf iter does not support lseek, so it always 3488 * continue from where it was stop()-ped. 3489 */ 3490 return bpf_iter_unix_batch(seq, pos); 3491 } 3492 3493 static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3494 { 3495 struct bpf_unix_iter_state *iter = seq->private; 3496 struct sock *sk; 3497 3498 /* Whenever seq_next() is called, the iter->cur_sk is 3499 * done with seq_show(), so advance to the next sk in 3500 * the batch. 3501 */ 3502 if (iter->cur_sk < iter->end_sk) 3503 sock_put(iter->batch[iter->cur_sk++]); 3504 3505 ++*pos; 3506 3507 if (iter->cur_sk < iter->end_sk) 3508 sk = iter->batch[iter->cur_sk]; 3509 else 3510 sk = bpf_iter_unix_batch(seq, pos); 3511 3512 return sk; 3513 } 3514 3515 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v) 3516 { 3517 struct bpf_iter_meta meta; 3518 struct bpf_prog *prog; 3519 struct sock *sk = v; 3520 uid_t uid; 3521 bool slow; 3522 int ret; 3523 3524 if (v == SEQ_START_TOKEN) 3525 return 0; 3526 3527 slow = lock_sock_fast(sk); 3528 3529 if (unlikely(sk_unhashed(sk))) { 3530 ret = SEQ_SKIP; 3531 goto unlock; 3532 } 3533 3534 uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)); 3535 meta.seq = seq; 3536 prog = bpf_iter_get_info(&meta, false); 3537 ret = unix_prog_seq_show(prog, &meta, v, uid); 3538 unlock: 3539 unlock_sock_fast(sk, slow); 3540 return ret; 3541 } 3542 3543 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v) 3544 { 3545 struct bpf_unix_iter_state *iter = seq->private; 3546 struct bpf_iter_meta meta; 3547 struct bpf_prog *prog; 3548 3549 if (!v) { 3550 meta.seq = seq; 3551 prog = bpf_iter_get_info(&meta, true); 3552 if (prog) 3553 (void)unix_prog_seq_show(prog, &meta, v, 0); 3554 } 3555 3556 if (iter->cur_sk < iter->end_sk) 3557 bpf_iter_unix_put_batch(iter); 3558 } 3559 3560 static const struct seq_operations bpf_iter_unix_seq_ops = { 3561 .start = bpf_iter_unix_seq_start, 3562 .next = bpf_iter_unix_seq_next, 3563 .stop = bpf_iter_unix_seq_stop, 3564 .show = bpf_iter_unix_seq_show, 3565 }; 3566 #endif 3567 #endif 3568 3569 static const struct net_proto_family unix_family_ops = { 3570 .family = PF_UNIX, 3571 .create = unix_create, 3572 .owner = THIS_MODULE, 3573 }; 3574 3575 3576 static int __net_init unix_net_init(struct net *net) 3577 { 3578 int i; 3579 3580 net->unx.sysctl_max_dgram_qlen = 10; 3581 if (unix_sysctl_register(net)) 3582 goto out; 3583 3584 #ifdef CONFIG_PROC_FS 3585 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops, 3586 sizeof(struct seq_net_private))) 3587 goto err_sysctl; 3588 #endif 3589 3590 net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE, 3591 sizeof(spinlock_t), GFP_KERNEL); 3592 if (!net->unx.table.locks) 3593 goto err_proc; 3594 3595 net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE, 3596 sizeof(struct hlist_head), 3597 GFP_KERNEL); 3598 if (!net->unx.table.buckets) 3599 goto free_locks; 3600 3601 for (i = 0; i < UNIX_HASH_SIZE; i++) { 3602 spin_lock_init(&net->unx.table.locks[i]); 3603 INIT_HLIST_HEAD(&net->unx.table.buckets[i]); 3604 } 3605 3606 return 0; 3607 3608 free_locks: 3609 kvfree(net->unx.table.locks); 3610 err_proc: 3611 #ifdef CONFIG_PROC_FS 3612 remove_proc_entry("unix", net->proc_net); 3613 err_sysctl: 3614 #endif 3615 unix_sysctl_unregister(net); 3616 out: 3617 return -ENOMEM; 3618 } 3619 3620 static void __net_exit unix_net_exit(struct net *net) 3621 { 3622 kvfree(net->unx.table.buckets); 3623 kvfree(net->unx.table.locks); 3624 unix_sysctl_unregister(net); 3625 remove_proc_entry("unix", net->proc_net); 3626 } 3627 3628 static struct pernet_operations unix_net_ops = { 3629 .init = unix_net_init, 3630 .exit = unix_net_exit, 3631 }; 3632 3633 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 3634 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta, 3635 struct unix_sock *unix_sk, uid_t uid) 3636 3637 #define INIT_BATCH_SZ 16 3638 3639 static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux) 3640 { 3641 struct bpf_unix_iter_state *iter = priv_data; 3642 int err; 3643 3644 err = bpf_iter_init_seq_net(priv_data, aux); 3645 if (err) 3646 return err; 3647 3648 err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ); 3649 if (err) { 3650 bpf_iter_fini_seq_net(priv_data); 3651 return err; 3652 } 3653 3654 return 0; 3655 } 3656 3657 static void bpf_iter_fini_unix(void *priv_data) 3658 { 3659 struct bpf_unix_iter_state *iter = priv_data; 3660 3661 bpf_iter_fini_seq_net(priv_data); 3662 kvfree(iter->batch); 3663 } 3664 3665 static const struct bpf_iter_seq_info unix_seq_info = { 3666 .seq_ops = &bpf_iter_unix_seq_ops, 3667 .init_seq_private = bpf_iter_init_unix, 3668 .fini_seq_private = bpf_iter_fini_unix, 3669 .seq_priv_size = sizeof(struct bpf_unix_iter_state), 3670 }; 3671 3672 static const struct bpf_func_proto * 3673 bpf_iter_unix_get_func_proto(enum bpf_func_id func_id, 3674 const struct bpf_prog *prog) 3675 { 3676 switch (func_id) { 3677 case BPF_FUNC_setsockopt: 3678 return &bpf_sk_setsockopt_proto; 3679 case BPF_FUNC_getsockopt: 3680 return &bpf_sk_getsockopt_proto; 3681 default: 3682 return NULL; 3683 } 3684 } 3685 3686 static struct bpf_iter_reg unix_reg_info = { 3687 .target = "unix", 3688 .ctx_arg_info_size = 1, 3689 .ctx_arg_info = { 3690 { offsetof(struct bpf_iter__unix, unix_sk), 3691 PTR_TO_BTF_ID_OR_NULL }, 3692 }, 3693 .get_func_proto = bpf_iter_unix_get_func_proto, 3694 .seq_info = &unix_seq_info, 3695 }; 3696 3697 static void __init bpf_iter_register(void) 3698 { 3699 unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX]; 3700 if (bpf_iter_reg_target(&unix_reg_info)) 3701 pr_warn("Warning: could not register bpf iterator unix\n"); 3702 } 3703 #endif 3704 3705 static int __init af_unix_init(void) 3706 { 3707 int i, rc = -1; 3708 3709 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb)); 3710 3711 for (i = 0; i < UNIX_HASH_SIZE / 2; i++) { 3712 spin_lock_init(&bsd_socket_locks[i]); 3713 INIT_HLIST_HEAD(&bsd_socket_buckets[i]); 3714 } 3715 3716 rc = proto_register(&unix_dgram_proto, 1); 3717 if (rc != 0) { 3718 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); 3719 goto out; 3720 } 3721 3722 rc = proto_register(&unix_stream_proto, 1); 3723 if (rc != 0) { 3724 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); 3725 goto out; 3726 } 3727 3728 sock_register(&unix_family_ops); 3729 register_pernet_subsys(&unix_net_ops); 3730 unix_bpf_build_proto(); 3731 3732 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 3733 bpf_iter_register(); 3734 #endif 3735 3736 out: 3737 return rc; 3738 } 3739 3740 static void __exit af_unix_exit(void) 3741 { 3742 sock_unregister(PF_UNIX); 3743 proto_unregister(&unix_dgram_proto); 3744 proto_unregister(&unix_stream_proto); 3745 unregister_pernet_subsys(&unix_net_ops); 3746 } 3747 3748 /* Earlier than device_initcall() so that other drivers invoking 3749 request_module() don't end up in a loop when modprobe tries 3750 to use a UNIX socket. But later than subsys_initcall() because 3751 we depend on stuff initialised there */ 3752 fs_initcall(af_unix_init); 3753 module_exit(af_unix_exit); 3754 3755 MODULE_LICENSE("GPL"); 3756 MODULE_ALIAS_NETPROTO(PF_UNIX); 3757