1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * NET4: Implementation of BSD Unix domain sockets. 4 * 5 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk> 6 * 7 * Fixes: 8 * Linus Torvalds : Assorted bug cures. 9 * Niibe Yutaka : async I/O support. 10 * Carsten Paeth : PF_UNIX check, address fixes. 11 * Alan Cox : Limit size of allocated blocks. 12 * Alan Cox : Fixed the stupid socketpair bug. 13 * Alan Cox : BSD compatibility fine tuning. 14 * Alan Cox : Fixed a bug in connect when interrupted. 15 * Alan Cox : Sorted out a proper draft version of 16 * file descriptor passing hacked up from 17 * Mike Shaver's work. 18 * Marty Leisner : Fixes to fd passing 19 * Nick Nevin : recvmsg bugfix. 20 * Alan Cox : Started proper garbage collector 21 * Heiko EiBfeldt : Missing verify_area check 22 * Alan Cox : Started POSIXisms 23 * Andreas Schwab : Replace inode by dentry for proper 24 * reference counting 25 * Kirk Petersen : Made this a module 26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm. 27 * Lots of bug fixes. 28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces 29 * by above two patches. 30 * Andrea Arcangeli : If possible we block in connect(2) 31 * if the max backlog of the listen socket 32 * is been reached. This won't break 33 * old apps and it will avoid huge amount 34 * of socks hashed (this for unix_gc() 35 * performances reasons). 36 * Security fix that limits the max 37 * number of socks to 2*max_files and 38 * the number of skb queueable in the 39 * dgram receiver. 40 * Artur Skawina : Hash function optimizations 41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8) 42 * Malcolm Beattie : Set peercred for socketpair 43 * Michal Ostrowski : Module initialization cleanup. 44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT, 45 * the core infrastructure is doing that 46 * for all net proto families now (2.5.69+) 47 * 48 * Known differences from reference BSD that was tested: 49 * 50 * [TO FIX] 51 * ECONNREFUSED is not returned from one end of a connected() socket to the 52 * other the moment one end closes. 53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark 54 * and a fake inode identifier (nor the BSD first socket fstat twice bug). 55 * [NOT TO FIX] 56 * accept() returns a path name even if the connecting socket has closed 57 * in the meantime (BSD loses the path and gives up). 58 * accept() returns 0 length path for an unbound connector. BSD returns 16 59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??) 60 * socketpair(...SOCK_RAW..) doesn't panic the kernel. 61 * BSD af_unix apparently has connect forgetting to block properly. 62 * (need to check this with the POSIX spec in detail) 63 * 64 * Differences from 2.0.0-11-... (ANK) 65 * Bug fixes and improvements. 66 * - client shutdown killed server socket. 67 * - removed all useless cli/sti pairs. 68 * 69 * Semantic changes/extensions. 70 * - generic control message passing. 71 * - SCM_CREDENTIALS control message. 72 * - "Abstract" (not FS based) socket bindings. 73 * Abstract names are sequences of bytes (not zero terminated) 74 * started by 0, so that this name space does not intersect 75 * with BSD names. 76 */ 77 78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 79 80 #include <linux/module.h> 81 #include <linux/kernel.h> 82 #include <linux/signal.h> 83 #include <linux/sched/signal.h> 84 #include <linux/errno.h> 85 #include <linux/string.h> 86 #include <linux/stat.h> 87 #include <linux/dcache.h> 88 #include <linux/namei.h> 89 #include <linux/socket.h> 90 #include <linux/un.h> 91 #include <linux/fcntl.h> 92 #include <linux/filter.h> 93 #include <linux/termios.h> 94 #include <linux/sockios.h> 95 #include <linux/net.h> 96 #include <linux/in.h> 97 #include <linux/fs.h> 98 #include <linux/slab.h> 99 #include <linux/uaccess.h> 100 #include <linux/skbuff.h> 101 #include <linux/netdevice.h> 102 #include <net/net_namespace.h> 103 #include <net/sock.h> 104 #include <net/tcp_states.h> 105 #include <net/af_unix.h> 106 #include <linux/proc_fs.h> 107 #include <linux/seq_file.h> 108 #include <net/scm.h> 109 #include <linux/init.h> 110 #include <linux/poll.h> 111 #include <linux/rtnetlink.h> 112 #include <linux/mount.h> 113 #include <net/checksum.h> 114 #include <linux/security.h> 115 #include <linux/freezer.h> 116 #include <linux/file.h> 117 #include <linux/btf_ids.h> 118 119 #include "scm.h" 120 121 static atomic_long_t unix_nr_socks; 122 static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2]; 123 static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2]; 124 125 /* SMP locking strategy: 126 * hash table is protected with spinlock. 127 * each socket state is protected by separate spinlock. 128 */ 129 130 static unsigned int unix_unbound_hash(struct sock *sk) 131 { 132 unsigned long hash = (unsigned long)sk; 133 134 hash ^= hash >> 16; 135 hash ^= hash >> 8; 136 hash ^= sk->sk_type; 137 138 return hash & UNIX_HASH_MOD; 139 } 140 141 static unsigned int unix_bsd_hash(struct inode *i) 142 { 143 return i->i_ino & UNIX_HASH_MOD; 144 } 145 146 static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr, 147 int addr_len, int type) 148 { 149 __wsum csum = csum_partial(sunaddr, addr_len, 0); 150 unsigned int hash; 151 152 hash = (__force unsigned int)csum_fold(csum); 153 hash ^= hash >> 8; 154 hash ^= type; 155 156 return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD); 157 } 158 159 static void unix_table_double_lock(struct net *net, 160 unsigned int hash1, unsigned int hash2) 161 { 162 if (hash1 == hash2) { 163 spin_lock(&net->unx.table.locks[hash1]); 164 return; 165 } 166 167 if (hash1 > hash2) 168 swap(hash1, hash2); 169 170 spin_lock(&net->unx.table.locks[hash1]); 171 spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING); 172 } 173 174 static void unix_table_double_unlock(struct net *net, 175 unsigned int hash1, unsigned int hash2) 176 { 177 if (hash1 == hash2) { 178 spin_unlock(&net->unx.table.locks[hash1]); 179 return; 180 } 181 182 spin_unlock(&net->unx.table.locks[hash1]); 183 spin_unlock(&net->unx.table.locks[hash2]); 184 } 185 186 #ifdef CONFIG_SECURITY_NETWORK 187 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 188 { 189 UNIXCB(skb).secid = scm->secid; 190 } 191 192 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 193 { 194 scm->secid = UNIXCB(skb).secid; 195 } 196 197 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) 198 { 199 return (scm->secid == UNIXCB(skb).secid); 200 } 201 #else 202 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 203 { } 204 205 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 206 { } 207 208 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) 209 { 210 return true; 211 } 212 #endif /* CONFIG_SECURITY_NETWORK */ 213 214 #define unix_peer(sk) (unix_sk(sk)->peer) 215 216 static inline int unix_our_peer(struct sock *sk, struct sock *osk) 217 { 218 return unix_peer(osk) == sk; 219 } 220 221 static inline int unix_may_send(struct sock *sk, struct sock *osk) 222 { 223 return unix_peer(osk) == NULL || unix_our_peer(sk, osk); 224 } 225 226 static inline int unix_recvq_full(const struct sock *sk) 227 { 228 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; 229 } 230 231 static inline int unix_recvq_full_lockless(const struct sock *sk) 232 { 233 return skb_queue_len_lockless(&sk->sk_receive_queue) > 234 READ_ONCE(sk->sk_max_ack_backlog); 235 } 236 237 struct sock *unix_peer_get(struct sock *s) 238 { 239 struct sock *peer; 240 241 unix_state_lock(s); 242 peer = unix_peer(s); 243 if (peer) 244 sock_hold(peer); 245 unix_state_unlock(s); 246 return peer; 247 } 248 EXPORT_SYMBOL_GPL(unix_peer_get); 249 250 static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr, 251 int addr_len) 252 { 253 struct unix_address *addr; 254 255 addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL); 256 if (!addr) 257 return NULL; 258 259 refcount_set(&addr->refcnt, 1); 260 addr->len = addr_len; 261 memcpy(addr->name, sunaddr, addr_len); 262 263 return addr; 264 } 265 266 static inline void unix_release_addr(struct unix_address *addr) 267 { 268 if (refcount_dec_and_test(&addr->refcnt)) 269 kfree(addr); 270 } 271 272 /* 273 * Check unix socket name: 274 * - should be not zero length. 275 * - if started by not zero, should be NULL terminated (FS object) 276 * - if started by zero, it is abstract name. 277 */ 278 279 static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len) 280 { 281 if (addr_len <= offsetof(struct sockaddr_un, sun_path) || 282 addr_len > sizeof(*sunaddr)) 283 return -EINVAL; 284 285 if (sunaddr->sun_family != AF_UNIX) 286 return -EINVAL; 287 288 return 0; 289 } 290 291 static void unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len) 292 { 293 /* This may look like an off by one error but it is a bit more 294 * subtle. 108 is the longest valid AF_UNIX path for a binding. 295 * sun_path[108] doesn't as such exist. However in kernel space 296 * we are guaranteed that it is a valid memory location in our 297 * kernel address buffer because syscall functions always pass 298 * a pointer of struct sockaddr_storage which has a bigger buffer 299 * than 108. 300 */ 301 ((char *)sunaddr)[addr_len] = 0; 302 } 303 304 static void __unix_remove_socket(struct sock *sk) 305 { 306 sk_del_node_init(sk); 307 } 308 309 static void __unix_insert_socket(struct net *net, struct sock *sk) 310 { 311 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk)); 312 sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]); 313 } 314 315 static void __unix_set_addr_hash(struct net *net, struct sock *sk, 316 struct unix_address *addr, unsigned int hash) 317 { 318 __unix_remove_socket(sk); 319 smp_store_release(&unix_sk(sk)->addr, addr); 320 321 sk->sk_hash = hash; 322 __unix_insert_socket(net, sk); 323 } 324 325 static void unix_remove_socket(struct net *net, struct sock *sk) 326 { 327 spin_lock(&net->unx.table.locks[sk->sk_hash]); 328 __unix_remove_socket(sk); 329 spin_unlock(&net->unx.table.locks[sk->sk_hash]); 330 } 331 332 static void unix_insert_unbound_socket(struct net *net, struct sock *sk) 333 { 334 spin_lock(&net->unx.table.locks[sk->sk_hash]); 335 __unix_insert_socket(net, sk); 336 spin_unlock(&net->unx.table.locks[sk->sk_hash]); 337 } 338 339 static void unix_insert_bsd_socket(struct sock *sk) 340 { 341 spin_lock(&bsd_socket_locks[sk->sk_hash]); 342 sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]); 343 spin_unlock(&bsd_socket_locks[sk->sk_hash]); 344 } 345 346 static void unix_remove_bsd_socket(struct sock *sk) 347 { 348 if (!hlist_unhashed(&sk->sk_bind_node)) { 349 spin_lock(&bsd_socket_locks[sk->sk_hash]); 350 __sk_del_bind_node(sk); 351 spin_unlock(&bsd_socket_locks[sk->sk_hash]); 352 353 sk_node_init(&sk->sk_bind_node); 354 } 355 } 356 357 static struct sock *__unix_find_socket_byname(struct net *net, 358 struct sockaddr_un *sunname, 359 int len, unsigned int hash) 360 { 361 struct sock *s; 362 363 sk_for_each(s, &net->unx.table.buckets[hash]) { 364 struct unix_sock *u = unix_sk(s); 365 366 if (u->addr->len == len && 367 !memcmp(u->addr->name, sunname, len)) 368 return s; 369 } 370 return NULL; 371 } 372 373 static inline struct sock *unix_find_socket_byname(struct net *net, 374 struct sockaddr_un *sunname, 375 int len, unsigned int hash) 376 { 377 struct sock *s; 378 379 spin_lock(&net->unx.table.locks[hash]); 380 s = __unix_find_socket_byname(net, sunname, len, hash); 381 if (s) 382 sock_hold(s); 383 spin_unlock(&net->unx.table.locks[hash]); 384 return s; 385 } 386 387 static struct sock *unix_find_socket_byinode(struct inode *i) 388 { 389 unsigned int hash = unix_bsd_hash(i); 390 struct sock *s; 391 392 spin_lock(&bsd_socket_locks[hash]); 393 sk_for_each_bound(s, &bsd_socket_buckets[hash]) { 394 struct dentry *dentry = unix_sk(s)->path.dentry; 395 396 if (dentry && d_backing_inode(dentry) == i) { 397 sock_hold(s); 398 spin_unlock(&bsd_socket_locks[hash]); 399 return s; 400 } 401 } 402 spin_unlock(&bsd_socket_locks[hash]); 403 return NULL; 404 } 405 406 /* Support code for asymmetrically connected dgram sockets 407 * 408 * If a datagram socket is connected to a socket not itself connected 409 * to the first socket (eg, /dev/log), clients may only enqueue more 410 * messages if the present receive queue of the server socket is not 411 * "too large". This means there's a second writeability condition 412 * poll and sendmsg need to test. The dgram recv code will do a wake 413 * up on the peer_wait wait queue of a socket upon reception of a 414 * datagram which needs to be propagated to sleeping would-be writers 415 * since these might not have sent anything so far. This can't be 416 * accomplished via poll_wait because the lifetime of the server 417 * socket might be less than that of its clients if these break their 418 * association with it or if the server socket is closed while clients 419 * are still connected to it and there's no way to inform "a polling 420 * implementation" that it should let go of a certain wait queue 421 * 422 * In order to propagate a wake up, a wait_queue_entry_t of the client 423 * socket is enqueued on the peer_wait queue of the server socket 424 * whose wake function does a wake_up on the ordinary client socket 425 * wait queue. This connection is established whenever a write (or 426 * poll for write) hit the flow control condition and broken when the 427 * association to the server socket is dissolved or after a wake up 428 * was relayed. 429 */ 430 431 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags, 432 void *key) 433 { 434 struct unix_sock *u; 435 wait_queue_head_t *u_sleep; 436 437 u = container_of(q, struct unix_sock, peer_wake); 438 439 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait, 440 q); 441 u->peer_wake.private = NULL; 442 443 /* relaying can only happen while the wq still exists */ 444 u_sleep = sk_sleep(&u->sk); 445 if (u_sleep) 446 wake_up_interruptible_poll(u_sleep, key_to_poll(key)); 447 448 return 0; 449 } 450 451 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other) 452 { 453 struct unix_sock *u, *u_other; 454 int rc; 455 456 u = unix_sk(sk); 457 u_other = unix_sk(other); 458 rc = 0; 459 spin_lock(&u_other->peer_wait.lock); 460 461 if (!u->peer_wake.private) { 462 u->peer_wake.private = other; 463 __add_wait_queue(&u_other->peer_wait, &u->peer_wake); 464 465 rc = 1; 466 } 467 468 spin_unlock(&u_other->peer_wait.lock); 469 return rc; 470 } 471 472 static void unix_dgram_peer_wake_disconnect(struct sock *sk, 473 struct sock *other) 474 { 475 struct unix_sock *u, *u_other; 476 477 u = unix_sk(sk); 478 u_other = unix_sk(other); 479 spin_lock(&u_other->peer_wait.lock); 480 481 if (u->peer_wake.private == other) { 482 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake); 483 u->peer_wake.private = NULL; 484 } 485 486 spin_unlock(&u_other->peer_wait.lock); 487 } 488 489 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk, 490 struct sock *other) 491 { 492 unix_dgram_peer_wake_disconnect(sk, other); 493 wake_up_interruptible_poll(sk_sleep(sk), 494 EPOLLOUT | 495 EPOLLWRNORM | 496 EPOLLWRBAND); 497 } 498 499 /* preconditions: 500 * - unix_peer(sk) == other 501 * - association is stable 502 */ 503 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) 504 { 505 int connected; 506 507 connected = unix_dgram_peer_wake_connect(sk, other); 508 509 /* If other is SOCK_DEAD, we want to make sure we signal 510 * POLLOUT, such that a subsequent write() can get a 511 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs 512 * to other and its full, we will hang waiting for POLLOUT. 513 */ 514 if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD)) 515 return 1; 516 517 if (connected) 518 unix_dgram_peer_wake_disconnect(sk, other); 519 520 return 0; 521 } 522 523 static int unix_writable(const struct sock *sk) 524 { 525 return sk->sk_state != TCP_LISTEN && 526 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; 527 } 528 529 static void unix_write_space(struct sock *sk) 530 { 531 struct socket_wq *wq; 532 533 rcu_read_lock(); 534 if (unix_writable(sk)) { 535 wq = rcu_dereference(sk->sk_wq); 536 if (skwq_has_sleeper(wq)) 537 wake_up_interruptible_sync_poll(&wq->wait, 538 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); 539 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 540 } 541 rcu_read_unlock(); 542 } 543 544 /* When dgram socket disconnects (or changes its peer), we clear its receive 545 * queue of packets arrived from previous peer. First, it allows to do 546 * flow control based only on wmem_alloc; second, sk connected to peer 547 * may receive messages only from that peer. */ 548 static void unix_dgram_disconnected(struct sock *sk, struct sock *other) 549 { 550 if (!skb_queue_empty(&sk->sk_receive_queue)) { 551 skb_queue_purge(&sk->sk_receive_queue); 552 wake_up_interruptible_all(&unix_sk(sk)->peer_wait); 553 554 /* If one link of bidirectional dgram pipe is disconnected, 555 * we signal error. Messages are lost. Do not make this, 556 * when peer was not connected to us. 557 */ 558 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) { 559 other->sk_err = ECONNRESET; 560 sk_error_report(other); 561 } 562 } 563 other->sk_state = TCP_CLOSE; 564 } 565 566 static void unix_sock_destructor(struct sock *sk) 567 { 568 struct unix_sock *u = unix_sk(sk); 569 570 skb_queue_purge(&sk->sk_receive_queue); 571 572 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 573 if (u->oob_skb) { 574 kfree_skb(u->oob_skb); 575 u->oob_skb = NULL; 576 } 577 #endif 578 DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc)); 579 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk)); 580 DEBUG_NET_WARN_ON_ONCE(sk->sk_socket); 581 if (!sock_flag(sk, SOCK_DEAD)) { 582 pr_info("Attempt to release alive unix socket: %p\n", sk); 583 return; 584 } 585 586 if (u->addr) 587 unix_release_addr(u->addr); 588 589 atomic_long_dec(&unix_nr_socks); 590 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 591 #ifdef UNIX_REFCNT_DEBUG 592 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk, 593 atomic_long_read(&unix_nr_socks)); 594 #endif 595 } 596 597 static void unix_release_sock(struct sock *sk, int embrion) 598 { 599 struct unix_sock *u = unix_sk(sk); 600 struct sock *skpair; 601 struct sk_buff *skb; 602 struct path path; 603 int state; 604 605 unix_remove_socket(sock_net(sk), sk); 606 unix_remove_bsd_socket(sk); 607 608 /* Clear state */ 609 unix_state_lock(sk); 610 sock_orphan(sk); 611 sk->sk_shutdown = SHUTDOWN_MASK; 612 path = u->path; 613 u->path.dentry = NULL; 614 u->path.mnt = NULL; 615 state = sk->sk_state; 616 sk->sk_state = TCP_CLOSE; 617 618 skpair = unix_peer(sk); 619 unix_peer(sk) = NULL; 620 621 unix_state_unlock(sk); 622 623 wake_up_interruptible_all(&u->peer_wait); 624 625 if (skpair != NULL) { 626 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { 627 unix_state_lock(skpair); 628 /* No more writes */ 629 skpair->sk_shutdown = SHUTDOWN_MASK; 630 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) 631 skpair->sk_err = ECONNRESET; 632 unix_state_unlock(skpair); 633 skpair->sk_state_change(skpair); 634 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); 635 } 636 637 unix_dgram_peer_wake_disconnect(sk, skpair); 638 sock_put(skpair); /* It may now die */ 639 } 640 641 /* Try to flush out this socket. Throw out buffers at least */ 642 643 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { 644 if (state == TCP_LISTEN) 645 unix_release_sock(skb->sk, 1); 646 /* passed fds are erased in the kfree_skb hook */ 647 UNIXCB(skb).consumed = skb->len; 648 kfree_skb(skb); 649 } 650 651 if (path.dentry) 652 path_put(&path); 653 654 sock_put(sk); 655 656 /* ---- Socket is dead now and most probably destroyed ---- */ 657 658 /* 659 * Fixme: BSD difference: In BSD all sockets connected to us get 660 * ECONNRESET and we die on the spot. In Linux we behave 661 * like files and pipes do and wait for the last 662 * dereference. 663 * 664 * Can't we simply set sock->err? 665 * 666 * What the above comment does talk about? --ANK(980817) 667 */ 668 669 if (unix_tot_inflight) 670 unix_gc(); /* Garbage collect fds */ 671 } 672 673 static void init_peercred(struct sock *sk) 674 { 675 const struct cred *old_cred; 676 struct pid *old_pid; 677 678 spin_lock(&sk->sk_peer_lock); 679 old_pid = sk->sk_peer_pid; 680 old_cred = sk->sk_peer_cred; 681 sk->sk_peer_pid = get_pid(task_tgid(current)); 682 sk->sk_peer_cred = get_current_cred(); 683 spin_unlock(&sk->sk_peer_lock); 684 685 put_pid(old_pid); 686 put_cred(old_cred); 687 } 688 689 static void copy_peercred(struct sock *sk, struct sock *peersk) 690 { 691 const struct cred *old_cred; 692 struct pid *old_pid; 693 694 if (sk < peersk) { 695 spin_lock(&sk->sk_peer_lock); 696 spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING); 697 } else { 698 spin_lock(&peersk->sk_peer_lock); 699 spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING); 700 } 701 old_pid = sk->sk_peer_pid; 702 old_cred = sk->sk_peer_cred; 703 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); 704 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); 705 706 spin_unlock(&sk->sk_peer_lock); 707 spin_unlock(&peersk->sk_peer_lock); 708 709 put_pid(old_pid); 710 put_cred(old_cred); 711 } 712 713 static int unix_listen(struct socket *sock, int backlog) 714 { 715 int err; 716 struct sock *sk = sock->sk; 717 struct unix_sock *u = unix_sk(sk); 718 719 err = -EOPNOTSUPP; 720 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 721 goto out; /* Only stream/seqpacket sockets accept */ 722 err = -EINVAL; 723 if (!u->addr) 724 goto out; /* No listens on an unbound socket */ 725 unix_state_lock(sk); 726 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) 727 goto out_unlock; 728 if (backlog > sk->sk_max_ack_backlog) 729 wake_up_interruptible_all(&u->peer_wait); 730 sk->sk_max_ack_backlog = backlog; 731 sk->sk_state = TCP_LISTEN; 732 /* set credentials so connect can copy them */ 733 init_peercred(sk); 734 err = 0; 735 736 out_unlock: 737 unix_state_unlock(sk); 738 out: 739 return err; 740 } 741 742 static int unix_release(struct socket *); 743 static int unix_bind(struct socket *, struct sockaddr *, int); 744 static int unix_stream_connect(struct socket *, struct sockaddr *, 745 int addr_len, int flags); 746 static int unix_socketpair(struct socket *, struct socket *); 747 static int unix_accept(struct socket *, struct socket *, int, bool); 748 static int unix_getname(struct socket *, struct sockaddr *, int); 749 static __poll_t unix_poll(struct file *, struct socket *, poll_table *); 750 static __poll_t unix_dgram_poll(struct file *, struct socket *, 751 poll_table *); 752 static int unix_ioctl(struct socket *, unsigned int, unsigned long); 753 #ifdef CONFIG_COMPAT 754 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); 755 #endif 756 static int unix_shutdown(struct socket *, int); 757 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); 758 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int); 759 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset, 760 size_t size, int flags); 761 static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos, 762 struct pipe_inode_info *, size_t size, 763 unsigned int flags); 764 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t); 765 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); 766 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor); 767 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor); 768 static int unix_dgram_connect(struct socket *, struct sockaddr *, 769 int, int); 770 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); 771 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t, 772 int); 773 774 static int unix_set_peek_off(struct sock *sk, int val) 775 { 776 struct unix_sock *u = unix_sk(sk); 777 778 if (mutex_lock_interruptible(&u->iolock)) 779 return -EINTR; 780 781 sk->sk_peek_off = val; 782 mutex_unlock(&u->iolock); 783 784 return 0; 785 } 786 787 #ifdef CONFIG_PROC_FS 788 static int unix_count_nr_fds(struct sock *sk) 789 { 790 struct sk_buff *skb; 791 struct unix_sock *u; 792 int nr_fds = 0; 793 794 spin_lock(&sk->sk_receive_queue.lock); 795 skb = skb_peek(&sk->sk_receive_queue); 796 while (skb) { 797 u = unix_sk(skb->sk); 798 nr_fds += atomic_read(&u->scm_stat.nr_fds); 799 skb = skb_peek_next(skb, &sk->sk_receive_queue); 800 } 801 spin_unlock(&sk->sk_receive_queue.lock); 802 803 return nr_fds; 804 } 805 806 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock) 807 { 808 struct sock *sk = sock->sk; 809 struct unix_sock *u; 810 int nr_fds; 811 812 if (sk) { 813 u = unix_sk(sk); 814 if (sock->type == SOCK_DGRAM) { 815 nr_fds = atomic_read(&u->scm_stat.nr_fds); 816 goto out_print; 817 } 818 819 unix_state_lock(sk); 820 if (sk->sk_state != TCP_LISTEN) 821 nr_fds = atomic_read(&u->scm_stat.nr_fds); 822 else 823 nr_fds = unix_count_nr_fds(sk); 824 unix_state_unlock(sk); 825 out_print: 826 seq_printf(m, "scm_fds: %u\n", nr_fds); 827 } 828 } 829 #else 830 #define unix_show_fdinfo NULL 831 #endif 832 833 static const struct proto_ops unix_stream_ops = { 834 .family = PF_UNIX, 835 .owner = THIS_MODULE, 836 .release = unix_release, 837 .bind = unix_bind, 838 .connect = unix_stream_connect, 839 .socketpair = unix_socketpair, 840 .accept = unix_accept, 841 .getname = unix_getname, 842 .poll = unix_poll, 843 .ioctl = unix_ioctl, 844 #ifdef CONFIG_COMPAT 845 .compat_ioctl = unix_compat_ioctl, 846 #endif 847 .listen = unix_listen, 848 .shutdown = unix_shutdown, 849 .sendmsg = unix_stream_sendmsg, 850 .recvmsg = unix_stream_recvmsg, 851 .read_skb = unix_stream_read_skb, 852 .mmap = sock_no_mmap, 853 .sendpage = unix_stream_sendpage, 854 .splice_read = unix_stream_splice_read, 855 .set_peek_off = unix_set_peek_off, 856 .show_fdinfo = unix_show_fdinfo, 857 }; 858 859 static const struct proto_ops unix_dgram_ops = { 860 .family = PF_UNIX, 861 .owner = THIS_MODULE, 862 .release = unix_release, 863 .bind = unix_bind, 864 .connect = unix_dgram_connect, 865 .socketpair = unix_socketpair, 866 .accept = sock_no_accept, 867 .getname = unix_getname, 868 .poll = unix_dgram_poll, 869 .ioctl = unix_ioctl, 870 #ifdef CONFIG_COMPAT 871 .compat_ioctl = unix_compat_ioctl, 872 #endif 873 .listen = sock_no_listen, 874 .shutdown = unix_shutdown, 875 .sendmsg = unix_dgram_sendmsg, 876 .read_skb = unix_read_skb, 877 .recvmsg = unix_dgram_recvmsg, 878 .mmap = sock_no_mmap, 879 .sendpage = sock_no_sendpage, 880 .set_peek_off = unix_set_peek_off, 881 .show_fdinfo = unix_show_fdinfo, 882 }; 883 884 static const struct proto_ops unix_seqpacket_ops = { 885 .family = PF_UNIX, 886 .owner = THIS_MODULE, 887 .release = unix_release, 888 .bind = unix_bind, 889 .connect = unix_stream_connect, 890 .socketpair = unix_socketpair, 891 .accept = unix_accept, 892 .getname = unix_getname, 893 .poll = unix_dgram_poll, 894 .ioctl = unix_ioctl, 895 #ifdef CONFIG_COMPAT 896 .compat_ioctl = unix_compat_ioctl, 897 #endif 898 .listen = unix_listen, 899 .shutdown = unix_shutdown, 900 .sendmsg = unix_seqpacket_sendmsg, 901 .recvmsg = unix_seqpacket_recvmsg, 902 .mmap = sock_no_mmap, 903 .sendpage = sock_no_sendpage, 904 .set_peek_off = unix_set_peek_off, 905 .show_fdinfo = unix_show_fdinfo, 906 }; 907 908 static void unix_close(struct sock *sk, long timeout) 909 { 910 /* Nothing to do here, unix socket does not need a ->close(). 911 * This is merely for sockmap. 912 */ 913 } 914 915 static void unix_unhash(struct sock *sk) 916 { 917 /* Nothing to do here, unix socket does not need a ->unhash(). 918 * This is merely for sockmap. 919 */ 920 } 921 922 struct proto unix_dgram_proto = { 923 .name = "UNIX", 924 .owner = THIS_MODULE, 925 .obj_size = sizeof(struct unix_sock), 926 .close = unix_close, 927 #ifdef CONFIG_BPF_SYSCALL 928 .psock_update_sk_prot = unix_dgram_bpf_update_proto, 929 #endif 930 }; 931 932 struct proto unix_stream_proto = { 933 .name = "UNIX-STREAM", 934 .owner = THIS_MODULE, 935 .obj_size = sizeof(struct unix_sock), 936 .close = unix_close, 937 .unhash = unix_unhash, 938 #ifdef CONFIG_BPF_SYSCALL 939 .psock_update_sk_prot = unix_stream_bpf_update_proto, 940 #endif 941 }; 942 943 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type) 944 { 945 struct unix_sock *u; 946 struct sock *sk; 947 int err; 948 949 atomic_long_inc(&unix_nr_socks); 950 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) { 951 err = -ENFILE; 952 goto err; 953 } 954 955 if (type == SOCK_STREAM) 956 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern); 957 else /*dgram and seqpacket */ 958 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern); 959 960 if (!sk) { 961 err = -ENOMEM; 962 goto err; 963 } 964 965 sock_init_data(sock, sk); 966 967 sk->sk_hash = unix_unbound_hash(sk); 968 sk->sk_allocation = GFP_KERNEL_ACCOUNT; 969 sk->sk_write_space = unix_write_space; 970 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; 971 sk->sk_destruct = unix_sock_destructor; 972 u = unix_sk(sk); 973 u->path.dentry = NULL; 974 u->path.mnt = NULL; 975 spin_lock_init(&u->lock); 976 atomic_long_set(&u->inflight, 0); 977 INIT_LIST_HEAD(&u->link); 978 mutex_init(&u->iolock); /* single task reading lock */ 979 mutex_init(&u->bindlock); /* single task binding lock */ 980 init_waitqueue_head(&u->peer_wait); 981 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); 982 memset(&u->scm_stat, 0, sizeof(struct scm_stat)); 983 unix_insert_unbound_socket(net, sk); 984 985 sock_prot_inuse_add(net, sk->sk_prot, 1); 986 987 return sk; 988 989 err: 990 atomic_long_dec(&unix_nr_socks); 991 return ERR_PTR(err); 992 } 993 994 static int unix_create(struct net *net, struct socket *sock, int protocol, 995 int kern) 996 { 997 struct sock *sk; 998 999 if (protocol && protocol != PF_UNIX) 1000 return -EPROTONOSUPPORT; 1001 1002 sock->state = SS_UNCONNECTED; 1003 1004 switch (sock->type) { 1005 case SOCK_STREAM: 1006 sock->ops = &unix_stream_ops; 1007 break; 1008 /* 1009 * Believe it or not BSD has AF_UNIX, SOCK_RAW though 1010 * nothing uses it. 1011 */ 1012 case SOCK_RAW: 1013 sock->type = SOCK_DGRAM; 1014 fallthrough; 1015 case SOCK_DGRAM: 1016 sock->ops = &unix_dgram_ops; 1017 break; 1018 case SOCK_SEQPACKET: 1019 sock->ops = &unix_seqpacket_ops; 1020 break; 1021 default: 1022 return -ESOCKTNOSUPPORT; 1023 } 1024 1025 sk = unix_create1(net, sock, kern, sock->type); 1026 if (IS_ERR(sk)) 1027 return PTR_ERR(sk); 1028 1029 return 0; 1030 } 1031 1032 static int unix_release(struct socket *sock) 1033 { 1034 struct sock *sk = sock->sk; 1035 1036 if (!sk) 1037 return 0; 1038 1039 sk->sk_prot->close(sk, 0); 1040 unix_release_sock(sk, 0); 1041 sock->sk = NULL; 1042 1043 return 0; 1044 } 1045 1046 static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len, 1047 int type) 1048 { 1049 struct inode *inode; 1050 struct path path; 1051 struct sock *sk; 1052 int err; 1053 1054 unix_mkname_bsd(sunaddr, addr_len); 1055 err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path); 1056 if (err) 1057 goto fail; 1058 1059 err = path_permission(&path, MAY_WRITE); 1060 if (err) 1061 goto path_put; 1062 1063 err = -ECONNREFUSED; 1064 inode = d_backing_inode(path.dentry); 1065 if (!S_ISSOCK(inode->i_mode)) 1066 goto path_put; 1067 1068 sk = unix_find_socket_byinode(inode); 1069 if (!sk) 1070 goto path_put; 1071 1072 err = -EPROTOTYPE; 1073 if (sk->sk_type == type) 1074 touch_atime(&path); 1075 else 1076 goto sock_put; 1077 1078 path_put(&path); 1079 1080 return sk; 1081 1082 sock_put: 1083 sock_put(sk); 1084 path_put: 1085 path_put(&path); 1086 fail: 1087 return ERR_PTR(err); 1088 } 1089 1090 static struct sock *unix_find_abstract(struct net *net, 1091 struct sockaddr_un *sunaddr, 1092 int addr_len, int type) 1093 { 1094 unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type); 1095 struct dentry *dentry; 1096 struct sock *sk; 1097 1098 sk = unix_find_socket_byname(net, sunaddr, addr_len, hash); 1099 if (!sk) 1100 return ERR_PTR(-ECONNREFUSED); 1101 1102 dentry = unix_sk(sk)->path.dentry; 1103 if (dentry) 1104 touch_atime(&unix_sk(sk)->path); 1105 1106 return sk; 1107 } 1108 1109 static struct sock *unix_find_other(struct net *net, 1110 struct sockaddr_un *sunaddr, 1111 int addr_len, int type) 1112 { 1113 struct sock *sk; 1114 1115 if (sunaddr->sun_path[0]) 1116 sk = unix_find_bsd(sunaddr, addr_len, type); 1117 else 1118 sk = unix_find_abstract(net, sunaddr, addr_len, type); 1119 1120 return sk; 1121 } 1122 1123 static int unix_autobind(struct sock *sk) 1124 { 1125 unsigned int new_hash, old_hash = sk->sk_hash; 1126 struct unix_sock *u = unix_sk(sk); 1127 struct net *net = sock_net(sk); 1128 struct unix_address *addr; 1129 u32 lastnum, ordernum; 1130 int err; 1131 1132 err = mutex_lock_interruptible(&u->bindlock); 1133 if (err) 1134 return err; 1135 1136 if (u->addr) 1137 goto out; 1138 1139 err = -ENOMEM; 1140 addr = kzalloc(sizeof(*addr) + 1141 offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL); 1142 if (!addr) 1143 goto out; 1144 1145 addr->len = offsetof(struct sockaddr_un, sun_path) + 6; 1146 addr->name->sun_family = AF_UNIX; 1147 refcount_set(&addr->refcnt, 1); 1148 1149 ordernum = prandom_u32(); 1150 lastnum = ordernum & 0xFFFFF; 1151 retry: 1152 ordernum = (ordernum + 1) & 0xFFFFF; 1153 sprintf(addr->name->sun_path + 1, "%05x", ordernum); 1154 1155 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); 1156 unix_table_double_lock(net, old_hash, new_hash); 1157 1158 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) { 1159 unix_table_double_unlock(net, old_hash, new_hash); 1160 1161 /* __unix_find_socket_byname() may take long time if many names 1162 * are already in use. 1163 */ 1164 cond_resched(); 1165 1166 if (ordernum == lastnum) { 1167 /* Give up if all names seems to be in use. */ 1168 err = -ENOSPC; 1169 unix_release_addr(addr); 1170 goto out; 1171 } 1172 1173 goto retry; 1174 } 1175 1176 __unix_set_addr_hash(net, sk, addr, new_hash); 1177 unix_table_double_unlock(net, old_hash, new_hash); 1178 err = 0; 1179 1180 out: mutex_unlock(&u->bindlock); 1181 return err; 1182 } 1183 1184 static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, 1185 int addr_len) 1186 { 1187 umode_t mode = S_IFSOCK | 1188 (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask()); 1189 unsigned int new_hash, old_hash = sk->sk_hash; 1190 struct unix_sock *u = unix_sk(sk); 1191 struct net *net = sock_net(sk); 1192 struct user_namespace *ns; // barf... 1193 struct unix_address *addr; 1194 struct dentry *dentry; 1195 struct path parent; 1196 int err; 1197 1198 unix_mkname_bsd(sunaddr, addr_len); 1199 addr_len = strlen(sunaddr->sun_path) + 1200 offsetof(struct sockaddr_un, sun_path) + 1; 1201 1202 addr = unix_create_addr(sunaddr, addr_len); 1203 if (!addr) 1204 return -ENOMEM; 1205 1206 /* 1207 * Get the parent directory, calculate the hash for last 1208 * component. 1209 */ 1210 dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0); 1211 if (IS_ERR(dentry)) { 1212 err = PTR_ERR(dentry); 1213 goto out; 1214 } 1215 1216 /* 1217 * All right, let's create it. 1218 */ 1219 ns = mnt_user_ns(parent.mnt); 1220 err = security_path_mknod(&parent, dentry, mode, 0); 1221 if (!err) 1222 err = vfs_mknod(ns, d_inode(parent.dentry), dentry, mode, 0); 1223 if (err) 1224 goto out_path; 1225 err = mutex_lock_interruptible(&u->bindlock); 1226 if (err) 1227 goto out_unlink; 1228 if (u->addr) 1229 goto out_unlock; 1230 1231 new_hash = unix_bsd_hash(d_backing_inode(dentry)); 1232 unix_table_double_lock(net, old_hash, new_hash); 1233 u->path.mnt = mntget(parent.mnt); 1234 u->path.dentry = dget(dentry); 1235 __unix_set_addr_hash(net, sk, addr, new_hash); 1236 unix_table_double_unlock(net, old_hash, new_hash); 1237 unix_insert_bsd_socket(sk); 1238 mutex_unlock(&u->bindlock); 1239 done_path_create(&parent, dentry); 1240 return 0; 1241 1242 out_unlock: 1243 mutex_unlock(&u->bindlock); 1244 err = -EINVAL; 1245 out_unlink: 1246 /* failed after successful mknod? unlink what we'd created... */ 1247 vfs_unlink(ns, d_inode(parent.dentry), dentry, NULL); 1248 out_path: 1249 done_path_create(&parent, dentry); 1250 out: 1251 unix_release_addr(addr); 1252 return err == -EEXIST ? -EADDRINUSE : err; 1253 } 1254 1255 static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr, 1256 int addr_len) 1257 { 1258 unsigned int new_hash, old_hash = sk->sk_hash; 1259 struct unix_sock *u = unix_sk(sk); 1260 struct net *net = sock_net(sk); 1261 struct unix_address *addr; 1262 int err; 1263 1264 addr = unix_create_addr(sunaddr, addr_len); 1265 if (!addr) 1266 return -ENOMEM; 1267 1268 err = mutex_lock_interruptible(&u->bindlock); 1269 if (err) 1270 goto out; 1271 1272 if (u->addr) { 1273 err = -EINVAL; 1274 goto out_mutex; 1275 } 1276 1277 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); 1278 unix_table_double_lock(net, old_hash, new_hash); 1279 1280 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) 1281 goto out_spin; 1282 1283 __unix_set_addr_hash(net, sk, addr, new_hash); 1284 unix_table_double_unlock(net, old_hash, new_hash); 1285 mutex_unlock(&u->bindlock); 1286 return 0; 1287 1288 out_spin: 1289 unix_table_double_unlock(net, old_hash, new_hash); 1290 err = -EADDRINUSE; 1291 out_mutex: 1292 mutex_unlock(&u->bindlock); 1293 out: 1294 unix_release_addr(addr); 1295 return err; 1296 } 1297 1298 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 1299 { 1300 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 1301 struct sock *sk = sock->sk; 1302 int err; 1303 1304 if (addr_len == offsetof(struct sockaddr_un, sun_path) && 1305 sunaddr->sun_family == AF_UNIX) 1306 return unix_autobind(sk); 1307 1308 err = unix_validate_addr(sunaddr, addr_len); 1309 if (err) 1310 return err; 1311 1312 if (sunaddr->sun_path[0]) 1313 err = unix_bind_bsd(sk, sunaddr, addr_len); 1314 else 1315 err = unix_bind_abstract(sk, sunaddr, addr_len); 1316 1317 return err; 1318 } 1319 1320 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) 1321 { 1322 if (unlikely(sk1 == sk2) || !sk2) { 1323 unix_state_lock(sk1); 1324 return; 1325 } 1326 if (sk1 < sk2) { 1327 unix_state_lock(sk1); 1328 unix_state_lock_nested(sk2); 1329 } else { 1330 unix_state_lock(sk2); 1331 unix_state_lock_nested(sk1); 1332 } 1333 } 1334 1335 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) 1336 { 1337 if (unlikely(sk1 == sk2) || !sk2) { 1338 unix_state_unlock(sk1); 1339 return; 1340 } 1341 unix_state_unlock(sk1); 1342 unix_state_unlock(sk2); 1343 } 1344 1345 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, 1346 int alen, int flags) 1347 { 1348 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; 1349 struct sock *sk = sock->sk; 1350 struct sock *other; 1351 int err; 1352 1353 err = -EINVAL; 1354 if (alen < offsetofend(struct sockaddr, sa_family)) 1355 goto out; 1356 1357 if (addr->sa_family != AF_UNSPEC) { 1358 err = unix_validate_addr(sunaddr, alen); 1359 if (err) 1360 goto out; 1361 1362 if (test_bit(SOCK_PASSCRED, &sock->flags) && 1363 !unix_sk(sk)->addr) { 1364 err = unix_autobind(sk); 1365 if (err) 1366 goto out; 1367 } 1368 1369 restart: 1370 other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type); 1371 if (IS_ERR(other)) { 1372 err = PTR_ERR(other); 1373 goto out; 1374 } 1375 1376 unix_state_double_lock(sk, other); 1377 1378 /* Apparently VFS overslept socket death. Retry. */ 1379 if (sock_flag(other, SOCK_DEAD)) { 1380 unix_state_double_unlock(sk, other); 1381 sock_put(other); 1382 goto restart; 1383 } 1384 1385 err = -EPERM; 1386 if (!unix_may_send(sk, other)) 1387 goto out_unlock; 1388 1389 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 1390 if (err) 1391 goto out_unlock; 1392 1393 sk->sk_state = other->sk_state = TCP_ESTABLISHED; 1394 } else { 1395 /* 1396 * 1003.1g breaking connected state with AF_UNSPEC 1397 */ 1398 other = NULL; 1399 unix_state_double_lock(sk, other); 1400 } 1401 1402 /* 1403 * If it was connected, reconnect. 1404 */ 1405 if (unix_peer(sk)) { 1406 struct sock *old_peer = unix_peer(sk); 1407 1408 unix_peer(sk) = other; 1409 if (!other) 1410 sk->sk_state = TCP_CLOSE; 1411 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer); 1412 1413 unix_state_double_unlock(sk, other); 1414 1415 if (other != old_peer) 1416 unix_dgram_disconnected(sk, old_peer); 1417 sock_put(old_peer); 1418 } else { 1419 unix_peer(sk) = other; 1420 unix_state_double_unlock(sk, other); 1421 } 1422 1423 return 0; 1424 1425 out_unlock: 1426 unix_state_double_unlock(sk, other); 1427 sock_put(other); 1428 out: 1429 return err; 1430 } 1431 1432 static long unix_wait_for_peer(struct sock *other, long timeo) 1433 __releases(&unix_sk(other)->lock) 1434 { 1435 struct unix_sock *u = unix_sk(other); 1436 int sched; 1437 DEFINE_WAIT(wait); 1438 1439 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE); 1440 1441 sched = !sock_flag(other, SOCK_DEAD) && 1442 !(other->sk_shutdown & RCV_SHUTDOWN) && 1443 unix_recvq_full(other); 1444 1445 unix_state_unlock(other); 1446 1447 if (sched) 1448 timeo = schedule_timeout(timeo); 1449 1450 finish_wait(&u->peer_wait, &wait); 1451 return timeo; 1452 } 1453 1454 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, 1455 int addr_len, int flags) 1456 { 1457 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 1458 struct sock *sk = sock->sk, *newsk = NULL, *other = NULL; 1459 struct unix_sock *u = unix_sk(sk), *newu, *otheru; 1460 struct net *net = sock_net(sk); 1461 struct sk_buff *skb = NULL; 1462 long timeo; 1463 int err; 1464 int st; 1465 1466 err = unix_validate_addr(sunaddr, addr_len); 1467 if (err) 1468 goto out; 1469 1470 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) { 1471 err = unix_autobind(sk); 1472 if (err) 1473 goto out; 1474 } 1475 1476 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); 1477 1478 /* First of all allocate resources. 1479 If we will make it after state is locked, 1480 we will have to recheck all again in any case. 1481 */ 1482 1483 /* create new sock for complete connection */ 1484 newsk = unix_create1(net, NULL, 0, sock->type); 1485 if (IS_ERR(newsk)) { 1486 err = PTR_ERR(newsk); 1487 newsk = NULL; 1488 goto out; 1489 } 1490 1491 err = -ENOMEM; 1492 1493 /* Allocate skb for sending to listening sock */ 1494 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL); 1495 if (skb == NULL) 1496 goto out; 1497 1498 restart: 1499 /* Find listening sock. */ 1500 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type); 1501 if (IS_ERR(other)) { 1502 err = PTR_ERR(other); 1503 other = NULL; 1504 goto out; 1505 } 1506 1507 /* Latch state of peer */ 1508 unix_state_lock(other); 1509 1510 /* Apparently VFS overslept socket death. Retry. */ 1511 if (sock_flag(other, SOCK_DEAD)) { 1512 unix_state_unlock(other); 1513 sock_put(other); 1514 goto restart; 1515 } 1516 1517 err = -ECONNREFUSED; 1518 if (other->sk_state != TCP_LISTEN) 1519 goto out_unlock; 1520 if (other->sk_shutdown & RCV_SHUTDOWN) 1521 goto out_unlock; 1522 1523 if (unix_recvq_full(other)) { 1524 err = -EAGAIN; 1525 if (!timeo) 1526 goto out_unlock; 1527 1528 timeo = unix_wait_for_peer(other, timeo); 1529 1530 err = sock_intr_errno(timeo); 1531 if (signal_pending(current)) 1532 goto out; 1533 sock_put(other); 1534 goto restart; 1535 } 1536 1537 /* Latch our state. 1538 1539 It is tricky place. We need to grab our state lock and cannot 1540 drop lock on peer. It is dangerous because deadlock is 1541 possible. Connect to self case and simultaneous 1542 attempt to connect are eliminated by checking socket 1543 state. other is TCP_LISTEN, if sk is TCP_LISTEN we 1544 check this before attempt to grab lock. 1545 1546 Well, and we have to recheck the state after socket locked. 1547 */ 1548 st = sk->sk_state; 1549 1550 switch (st) { 1551 case TCP_CLOSE: 1552 /* This is ok... continue with connect */ 1553 break; 1554 case TCP_ESTABLISHED: 1555 /* Socket is already connected */ 1556 err = -EISCONN; 1557 goto out_unlock; 1558 default: 1559 err = -EINVAL; 1560 goto out_unlock; 1561 } 1562 1563 unix_state_lock_nested(sk); 1564 1565 if (sk->sk_state != st) { 1566 unix_state_unlock(sk); 1567 unix_state_unlock(other); 1568 sock_put(other); 1569 goto restart; 1570 } 1571 1572 err = security_unix_stream_connect(sk, other, newsk); 1573 if (err) { 1574 unix_state_unlock(sk); 1575 goto out_unlock; 1576 } 1577 1578 /* The way is open! Fastly set all the necessary fields... */ 1579 1580 sock_hold(sk); 1581 unix_peer(newsk) = sk; 1582 newsk->sk_state = TCP_ESTABLISHED; 1583 newsk->sk_type = sk->sk_type; 1584 init_peercred(newsk); 1585 newu = unix_sk(newsk); 1586 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); 1587 otheru = unix_sk(other); 1588 1589 /* copy address information from listening to new sock 1590 * 1591 * The contents of *(otheru->addr) and otheru->path 1592 * are seen fully set up here, since we have found 1593 * otheru in hash under its lock. Insertion into the 1594 * hash chain we'd found it in had been done in an 1595 * earlier critical area protected by the chain's lock, 1596 * the same one where we'd set *(otheru->addr) contents, 1597 * as well as otheru->path and otheru->addr itself. 1598 * 1599 * Using smp_store_release() here to set newu->addr 1600 * is enough to make those stores, as well as stores 1601 * to newu->path visible to anyone who gets newu->addr 1602 * by smp_load_acquire(). IOW, the same warranties 1603 * as for unix_sock instances bound in unix_bind() or 1604 * in unix_autobind(). 1605 */ 1606 if (otheru->path.dentry) { 1607 path_get(&otheru->path); 1608 newu->path = otheru->path; 1609 } 1610 refcount_inc(&otheru->addr->refcnt); 1611 smp_store_release(&newu->addr, otheru->addr); 1612 1613 /* Set credentials */ 1614 copy_peercred(sk, other); 1615 1616 sock->state = SS_CONNECTED; 1617 sk->sk_state = TCP_ESTABLISHED; 1618 sock_hold(newsk); 1619 1620 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */ 1621 unix_peer(sk) = newsk; 1622 1623 unix_state_unlock(sk); 1624 1625 /* take ten and send info to listening sock */ 1626 spin_lock(&other->sk_receive_queue.lock); 1627 __skb_queue_tail(&other->sk_receive_queue, skb); 1628 spin_unlock(&other->sk_receive_queue.lock); 1629 unix_state_unlock(other); 1630 other->sk_data_ready(other); 1631 sock_put(other); 1632 return 0; 1633 1634 out_unlock: 1635 if (other) 1636 unix_state_unlock(other); 1637 1638 out: 1639 kfree_skb(skb); 1640 if (newsk) 1641 unix_release_sock(newsk, 0); 1642 if (other) 1643 sock_put(other); 1644 return err; 1645 } 1646 1647 static int unix_socketpair(struct socket *socka, struct socket *sockb) 1648 { 1649 struct sock *ska = socka->sk, *skb = sockb->sk; 1650 1651 /* Join our sockets back to back */ 1652 sock_hold(ska); 1653 sock_hold(skb); 1654 unix_peer(ska) = skb; 1655 unix_peer(skb) = ska; 1656 init_peercred(ska); 1657 init_peercred(skb); 1658 1659 ska->sk_state = TCP_ESTABLISHED; 1660 skb->sk_state = TCP_ESTABLISHED; 1661 socka->state = SS_CONNECTED; 1662 sockb->state = SS_CONNECTED; 1663 return 0; 1664 } 1665 1666 static void unix_sock_inherit_flags(const struct socket *old, 1667 struct socket *new) 1668 { 1669 if (test_bit(SOCK_PASSCRED, &old->flags)) 1670 set_bit(SOCK_PASSCRED, &new->flags); 1671 if (test_bit(SOCK_PASSSEC, &old->flags)) 1672 set_bit(SOCK_PASSSEC, &new->flags); 1673 } 1674 1675 static int unix_accept(struct socket *sock, struct socket *newsock, int flags, 1676 bool kern) 1677 { 1678 struct sock *sk = sock->sk; 1679 struct sock *tsk; 1680 struct sk_buff *skb; 1681 int err; 1682 1683 err = -EOPNOTSUPP; 1684 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 1685 goto out; 1686 1687 err = -EINVAL; 1688 if (sk->sk_state != TCP_LISTEN) 1689 goto out; 1690 1691 /* If socket state is TCP_LISTEN it cannot change (for now...), 1692 * so that no locks are necessary. 1693 */ 1694 1695 skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, 1696 &err); 1697 if (!skb) { 1698 /* This means receive shutdown. */ 1699 if (err == 0) 1700 err = -EINVAL; 1701 goto out; 1702 } 1703 1704 tsk = skb->sk; 1705 skb_free_datagram(sk, skb); 1706 wake_up_interruptible(&unix_sk(sk)->peer_wait); 1707 1708 /* attach accepted sock to socket */ 1709 unix_state_lock(tsk); 1710 newsock->state = SS_CONNECTED; 1711 unix_sock_inherit_flags(sock, newsock); 1712 sock_graft(tsk, newsock); 1713 unix_state_unlock(tsk); 1714 return 0; 1715 1716 out: 1717 return err; 1718 } 1719 1720 1721 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer) 1722 { 1723 struct sock *sk = sock->sk; 1724 struct unix_address *addr; 1725 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr); 1726 int err = 0; 1727 1728 if (peer) { 1729 sk = unix_peer_get(sk); 1730 1731 err = -ENOTCONN; 1732 if (!sk) 1733 goto out; 1734 err = 0; 1735 } else { 1736 sock_hold(sk); 1737 } 1738 1739 addr = smp_load_acquire(&unix_sk(sk)->addr); 1740 if (!addr) { 1741 sunaddr->sun_family = AF_UNIX; 1742 sunaddr->sun_path[0] = 0; 1743 err = offsetof(struct sockaddr_un, sun_path); 1744 } else { 1745 err = addr->len; 1746 memcpy(sunaddr, addr->name, addr->len); 1747 } 1748 sock_put(sk); 1749 out: 1750 return err; 1751 } 1752 1753 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb) 1754 { 1755 scm->fp = scm_fp_dup(UNIXCB(skb).fp); 1756 1757 /* 1758 * Garbage collection of unix sockets starts by selecting a set of 1759 * candidate sockets which have reference only from being in flight 1760 * (total_refs == inflight_refs). This condition is checked once during 1761 * the candidate collection phase, and candidates are marked as such, so 1762 * that non-candidates can later be ignored. While inflight_refs is 1763 * protected by unix_gc_lock, total_refs (file count) is not, hence this 1764 * is an instantaneous decision. 1765 * 1766 * Once a candidate, however, the socket must not be reinstalled into a 1767 * file descriptor while the garbage collection is in progress. 1768 * 1769 * If the above conditions are met, then the directed graph of 1770 * candidates (*) does not change while unix_gc_lock is held. 1771 * 1772 * Any operations that changes the file count through file descriptors 1773 * (dup, close, sendmsg) does not change the graph since candidates are 1774 * not installed in fds. 1775 * 1776 * Dequeing a candidate via recvmsg would install it into an fd, but 1777 * that takes unix_gc_lock to decrement the inflight count, so it's 1778 * serialized with garbage collection. 1779 * 1780 * MSG_PEEK is special in that it does not change the inflight count, 1781 * yet does install the socket into an fd. The following lock/unlock 1782 * pair is to ensure serialization with garbage collection. It must be 1783 * done between incrementing the file count and installing the file into 1784 * an fd. 1785 * 1786 * If garbage collection starts after the barrier provided by the 1787 * lock/unlock, then it will see the elevated refcount and not mark this 1788 * as a candidate. If a garbage collection is already in progress 1789 * before the file count was incremented, then the lock/unlock pair will 1790 * ensure that garbage collection is finished before progressing to 1791 * installing the fd. 1792 * 1793 * (*) A -> B where B is on the queue of A or B is on the queue of C 1794 * which is on the queue of listening socket A. 1795 */ 1796 spin_lock(&unix_gc_lock); 1797 spin_unlock(&unix_gc_lock); 1798 } 1799 1800 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) 1801 { 1802 int err = 0; 1803 1804 UNIXCB(skb).pid = get_pid(scm->pid); 1805 UNIXCB(skb).uid = scm->creds.uid; 1806 UNIXCB(skb).gid = scm->creds.gid; 1807 UNIXCB(skb).fp = NULL; 1808 unix_get_secdata(scm, skb); 1809 if (scm->fp && send_fds) 1810 err = unix_attach_fds(scm, skb); 1811 1812 skb->destructor = unix_destruct_scm; 1813 return err; 1814 } 1815 1816 static bool unix_passcred_enabled(const struct socket *sock, 1817 const struct sock *other) 1818 { 1819 return test_bit(SOCK_PASSCRED, &sock->flags) || 1820 !other->sk_socket || 1821 test_bit(SOCK_PASSCRED, &other->sk_socket->flags); 1822 } 1823 1824 /* 1825 * Some apps rely on write() giving SCM_CREDENTIALS 1826 * We include credentials if source or destination socket 1827 * asserted SOCK_PASSCRED. 1828 */ 1829 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, 1830 const struct sock *other) 1831 { 1832 if (UNIXCB(skb).pid) 1833 return; 1834 if (unix_passcred_enabled(sock, other)) { 1835 UNIXCB(skb).pid = get_pid(task_tgid(current)); 1836 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); 1837 } 1838 } 1839 1840 static int maybe_init_creds(struct scm_cookie *scm, 1841 struct socket *socket, 1842 const struct sock *other) 1843 { 1844 int err; 1845 struct msghdr msg = { .msg_controllen = 0 }; 1846 1847 err = scm_send(socket, &msg, scm, false); 1848 if (err) 1849 return err; 1850 1851 if (unix_passcred_enabled(socket, other)) { 1852 scm->pid = get_pid(task_tgid(current)); 1853 current_uid_gid(&scm->creds.uid, &scm->creds.gid); 1854 } 1855 return err; 1856 } 1857 1858 static bool unix_skb_scm_eq(struct sk_buff *skb, 1859 struct scm_cookie *scm) 1860 { 1861 return UNIXCB(skb).pid == scm->pid && 1862 uid_eq(UNIXCB(skb).uid, scm->creds.uid) && 1863 gid_eq(UNIXCB(skb).gid, scm->creds.gid) && 1864 unix_secdata_eq(scm, skb); 1865 } 1866 1867 static void scm_stat_add(struct sock *sk, struct sk_buff *skb) 1868 { 1869 struct scm_fp_list *fp = UNIXCB(skb).fp; 1870 struct unix_sock *u = unix_sk(sk); 1871 1872 if (unlikely(fp && fp->count)) 1873 atomic_add(fp->count, &u->scm_stat.nr_fds); 1874 } 1875 1876 static void scm_stat_del(struct sock *sk, struct sk_buff *skb) 1877 { 1878 struct scm_fp_list *fp = UNIXCB(skb).fp; 1879 struct unix_sock *u = unix_sk(sk); 1880 1881 if (unlikely(fp && fp->count)) 1882 atomic_sub(fp->count, &u->scm_stat.nr_fds); 1883 } 1884 1885 /* 1886 * Send AF_UNIX data. 1887 */ 1888 1889 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, 1890 size_t len) 1891 { 1892 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); 1893 struct sock *sk = sock->sk, *other = NULL; 1894 struct unix_sock *u = unix_sk(sk); 1895 struct scm_cookie scm; 1896 struct sk_buff *skb; 1897 int data_len = 0; 1898 int sk_locked; 1899 long timeo; 1900 int err; 1901 1902 wait_for_unix_gc(); 1903 err = scm_send(sock, msg, &scm, false); 1904 if (err < 0) 1905 return err; 1906 1907 err = -EOPNOTSUPP; 1908 if (msg->msg_flags&MSG_OOB) 1909 goto out; 1910 1911 if (msg->msg_namelen) { 1912 err = unix_validate_addr(sunaddr, msg->msg_namelen); 1913 if (err) 1914 goto out; 1915 } else { 1916 sunaddr = NULL; 1917 err = -ENOTCONN; 1918 other = unix_peer_get(sk); 1919 if (!other) 1920 goto out; 1921 } 1922 1923 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) { 1924 err = unix_autobind(sk); 1925 if (err) 1926 goto out; 1927 } 1928 1929 err = -EMSGSIZE; 1930 if (len > sk->sk_sndbuf - 32) 1931 goto out; 1932 1933 if (len > SKB_MAX_ALLOC) { 1934 data_len = min_t(size_t, 1935 len - SKB_MAX_ALLOC, 1936 MAX_SKB_FRAGS * PAGE_SIZE); 1937 data_len = PAGE_ALIGN(data_len); 1938 1939 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE); 1940 } 1941 1942 skb = sock_alloc_send_pskb(sk, len - data_len, data_len, 1943 msg->msg_flags & MSG_DONTWAIT, &err, 1944 PAGE_ALLOC_COSTLY_ORDER); 1945 if (skb == NULL) 1946 goto out; 1947 1948 err = unix_scm_to_skb(&scm, skb, true); 1949 if (err < 0) 1950 goto out_free; 1951 1952 skb_put(skb, len - data_len); 1953 skb->data_len = data_len; 1954 skb->len = len; 1955 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len); 1956 if (err) 1957 goto out_free; 1958 1959 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); 1960 1961 restart: 1962 if (!other) { 1963 err = -ECONNRESET; 1964 if (sunaddr == NULL) 1965 goto out_free; 1966 1967 other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen, 1968 sk->sk_type); 1969 if (IS_ERR(other)) { 1970 err = PTR_ERR(other); 1971 other = NULL; 1972 goto out_free; 1973 } 1974 } 1975 1976 if (sk_filter(other, skb) < 0) { 1977 /* Toss the packet but do not return any error to the sender */ 1978 err = len; 1979 goto out_free; 1980 } 1981 1982 sk_locked = 0; 1983 unix_state_lock(other); 1984 restart_locked: 1985 err = -EPERM; 1986 if (!unix_may_send(sk, other)) 1987 goto out_unlock; 1988 1989 if (unlikely(sock_flag(other, SOCK_DEAD))) { 1990 /* 1991 * Check with 1003.1g - what should 1992 * datagram error 1993 */ 1994 unix_state_unlock(other); 1995 sock_put(other); 1996 1997 if (!sk_locked) 1998 unix_state_lock(sk); 1999 2000 err = 0; 2001 if (unix_peer(sk) == other) { 2002 unix_peer(sk) = NULL; 2003 unix_dgram_peer_wake_disconnect_wakeup(sk, other); 2004 2005 unix_state_unlock(sk); 2006 2007 sk->sk_state = TCP_CLOSE; 2008 unix_dgram_disconnected(sk, other); 2009 sock_put(other); 2010 err = -ECONNREFUSED; 2011 } else { 2012 unix_state_unlock(sk); 2013 } 2014 2015 other = NULL; 2016 if (err) 2017 goto out_free; 2018 goto restart; 2019 } 2020 2021 err = -EPIPE; 2022 if (other->sk_shutdown & RCV_SHUTDOWN) 2023 goto out_unlock; 2024 2025 if (sk->sk_type != SOCK_SEQPACKET) { 2026 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 2027 if (err) 2028 goto out_unlock; 2029 } 2030 2031 /* other == sk && unix_peer(other) != sk if 2032 * - unix_peer(sk) == NULL, destination address bound to sk 2033 * - unix_peer(sk) == sk by time of get but disconnected before lock 2034 */ 2035 if (other != sk && 2036 unlikely(unix_peer(other) != sk && 2037 unix_recvq_full_lockless(other))) { 2038 if (timeo) { 2039 timeo = unix_wait_for_peer(other, timeo); 2040 2041 err = sock_intr_errno(timeo); 2042 if (signal_pending(current)) 2043 goto out_free; 2044 2045 goto restart; 2046 } 2047 2048 if (!sk_locked) { 2049 unix_state_unlock(other); 2050 unix_state_double_lock(sk, other); 2051 } 2052 2053 if (unix_peer(sk) != other || 2054 unix_dgram_peer_wake_me(sk, other)) { 2055 err = -EAGAIN; 2056 sk_locked = 1; 2057 goto out_unlock; 2058 } 2059 2060 if (!sk_locked) { 2061 sk_locked = 1; 2062 goto restart_locked; 2063 } 2064 } 2065 2066 if (unlikely(sk_locked)) 2067 unix_state_unlock(sk); 2068 2069 if (sock_flag(other, SOCK_RCVTSTAMP)) 2070 __net_timestamp(skb); 2071 maybe_add_creds(skb, sock, other); 2072 scm_stat_add(other, skb); 2073 skb_queue_tail(&other->sk_receive_queue, skb); 2074 unix_state_unlock(other); 2075 other->sk_data_ready(other); 2076 sock_put(other); 2077 scm_destroy(&scm); 2078 return len; 2079 2080 out_unlock: 2081 if (sk_locked) 2082 unix_state_unlock(sk); 2083 unix_state_unlock(other); 2084 out_free: 2085 kfree_skb(skb); 2086 out: 2087 if (other) 2088 sock_put(other); 2089 scm_destroy(&scm); 2090 return err; 2091 } 2092 2093 /* We use paged skbs for stream sockets, and limit occupancy to 32768 2094 * bytes, and a minimum of a full page. 2095 */ 2096 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) 2097 2098 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2099 static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other) 2100 { 2101 struct unix_sock *ousk = unix_sk(other); 2102 struct sk_buff *skb; 2103 int err = 0; 2104 2105 skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err); 2106 2107 if (!skb) 2108 return err; 2109 2110 skb_put(skb, 1); 2111 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1); 2112 2113 if (err) { 2114 kfree_skb(skb); 2115 return err; 2116 } 2117 2118 unix_state_lock(other); 2119 2120 if (sock_flag(other, SOCK_DEAD) || 2121 (other->sk_shutdown & RCV_SHUTDOWN)) { 2122 unix_state_unlock(other); 2123 kfree_skb(skb); 2124 return -EPIPE; 2125 } 2126 2127 maybe_add_creds(skb, sock, other); 2128 skb_get(skb); 2129 2130 if (ousk->oob_skb) 2131 consume_skb(ousk->oob_skb); 2132 2133 WRITE_ONCE(ousk->oob_skb, skb); 2134 2135 scm_stat_add(other, skb); 2136 skb_queue_tail(&other->sk_receive_queue, skb); 2137 sk_send_sigurg(other); 2138 unix_state_unlock(other); 2139 other->sk_data_ready(other); 2140 2141 return err; 2142 } 2143 #endif 2144 2145 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, 2146 size_t len) 2147 { 2148 struct sock *sk = sock->sk; 2149 struct sock *other = NULL; 2150 int err, size; 2151 struct sk_buff *skb; 2152 int sent = 0; 2153 struct scm_cookie scm; 2154 bool fds_sent = false; 2155 int data_len; 2156 2157 wait_for_unix_gc(); 2158 err = scm_send(sock, msg, &scm, false); 2159 if (err < 0) 2160 return err; 2161 2162 err = -EOPNOTSUPP; 2163 if (msg->msg_flags & MSG_OOB) { 2164 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2165 if (len) 2166 len--; 2167 else 2168 #endif 2169 goto out_err; 2170 } 2171 2172 if (msg->msg_namelen) { 2173 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; 2174 goto out_err; 2175 } else { 2176 err = -ENOTCONN; 2177 other = unix_peer(sk); 2178 if (!other) 2179 goto out_err; 2180 } 2181 2182 if (sk->sk_shutdown & SEND_SHUTDOWN) 2183 goto pipe_err; 2184 2185 while (sent < len) { 2186 size = len - sent; 2187 2188 /* Keep two messages in the pipe so it schedules better */ 2189 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64); 2190 2191 /* allow fallback to order-0 allocations */ 2192 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ); 2193 2194 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0)); 2195 2196 data_len = min_t(size_t, size, PAGE_ALIGN(data_len)); 2197 2198 skb = sock_alloc_send_pskb(sk, size - data_len, data_len, 2199 msg->msg_flags & MSG_DONTWAIT, &err, 2200 get_order(UNIX_SKB_FRAGS_SZ)); 2201 if (!skb) 2202 goto out_err; 2203 2204 /* Only send the fds in the first buffer */ 2205 err = unix_scm_to_skb(&scm, skb, !fds_sent); 2206 if (err < 0) { 2207 kfree_skb(skb); 2208 goto out_err; 2209 } 2210 fds_sent = true; 2211 2212 skb_put(skb, size - data_len); 2213 skb->data_len = data_len; 2214 skb->len = size; 2215 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); 2216 if (err) { 2217 kfree_skb(skb); 2218 goto out_err; 2219 } 2220 2221 unix_state_lock(other); 2222 2223 if (sock_flag(other, SOCK_DEAD) || 2224 (other->sk_shutdown & RCV_SHUTDOWN)) 2225 goto pipe_err_free; 2226 2227 maybe_add_creds(skb, sock, other); 2228 scm_stat_add(other, skb); 2229 skb_queue_tail(&other->sk_receive_queue, skb); 2230 unix_state_unlock(other); 2231 other->sk_data_ready(other); 2232 sent += size; 2233 } 2234 2235 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2236 if (msg->msg_flags & MSG_OOB) { 2237 err = queue_oob(sock, msg, other); 2238 if (err) 2239 goto out_err; 2240 sent++; 2241 } 2242 #endif 2243 2244 scm_destroy(&scm); 2245 2246 return sent; 2247 2248 pipe_err_free: 2249 unix_state_unlock(other); 2250 kfree_skb(skb); 2251 pipe_err: 2252 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL)) 2253 send_sig(SIGPIPE, current, 0); 2254 err = -EPIPE; 2255 out_err: 2256 scm_destroy(&scm); 2257 return sent ? : err; 2258 } 2259 2260 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, 2261 int offset, size_t size, int flags) 2262 { 2263 int err; 2264 bool send_sigpipe = false; 2265 bool init_scm = true; 2266 struct scm_cookie scm; 2267 struct sock *other, *sk = socket->sk; 2268 struct sk_buff *skb, *newskb = NULL, *tail = NULL; 2269 2270 if (flags & MSG_OOB) 2271 return -EOPNOTSUPP; 2272 2273 other = unix_peer(sk); 2274 if (!other || sk->sk_state != TCP_ESTABLISHED) 2275 return -ENOTCONN; 2276 2277 if (false) { 2278 alloc_skb: 2279 unix_state_unlock(other); 2280 mutex_unlock(&unix_sk(other)->iolock); 2281 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT, 2282 &err, 0); 2283 if (!newskb) 2284 goto err; 2285 } 2286 2287 /* we must acquire iolock as we modify already present 2288 * skbs in the sk_receive_queue and mess with skb->len 2289 */ 2290 err = mutex_lock_interruptible(&unix_sk(other)->iolock); 2291 if (err) { 2292 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS; 2293 goto err; 2294 } 2295 2296 if (sk->sk_shutdown & SEND_SHUTDOWN) { 2297 err = -EPIPE; 2298 send_sigpipe = true; 2299 goto err_unlock; 2300 } 2301 2302 unix_state_lock(other); 2303 2304 if (sock_flag(other, SOCK_DEAD) || 2305 other->sk_shutdown & RCV_SHUTDOWN) { 2306 err = -EPIPE; 2307 send_sigpipe = true; 2308 goto err_state_unlock; 2309 } 2310 2311 if (init_scm) { 2312 err = maybe_init_creds(&scm, socket, other); 2313 if (err) 2314 goto err_state_unlock; 2315 init_scm = false; 2316 } 2317 2318 skb = skb_peek_tail(&other->sk_receive_queue); 2319 if (tail && tail == skb) { 2320 skb = newskb; 2321 } else if (!skb || !unix_skb_scm_eq(skb, &scm)) { 2322 if (newskb) { 2323 skb = newskb; 2324 } else { 2325 tail = skb; 2326 goto alloc_skb; 2327 } 2328 } else if (newskb) { 2329 /* this is fast path, we don't necessarily need to 2330 * call to kfree_skb even though with newskb == NULL 2331 * this - does no harm 2332 */ 2333 consume_skb(newskb); 2334 newskb = NULL; 2335 } 2336 2337 if (skb_append_pagefrags(skb, page, offset, size)) { 2338 tail = skb; 2339 goto alloc_skb; 2340 } 2341 2342 skb->len += size; 2343 skb->data_len += size; 2344 skb->truesize += size; 2345 refcount_add(size, &sk->sk_wmem_alloc); 2346 2347 if (newskb) { 2348 err = unix_scm_to_skb(&scm, skb, false); 2349 if (err) 2350 goto err_state_unlock; 2351 spin_lock(&other->sk_receive_queue.lock); 2352 __skb_queue_tail(&other->sk_receive_queue, newskb); 2353 spin_unlock(&other->sk_receive_queue.lock); 2354 } 2355 2356 unix_state_unlock(other); 2357 mutex_unlock(&unix_sk(other)->iolock); 2358 2359 other->sk_data_ready(other); 2360 scm_destroy(&scm); 2361 return size; 2362 2363 err_state_unlock: 2364 unix_state_unlock(other); 2365 err_unlock: 2366 mutex_unlock(&unix_sk(other)->iolock); 2367 err: 2368 kfree_skb(newskb); 2369 if (send_sigpipe && !(flags & MSG_NOSIGNAL)) 2370 send_sig(SIGPIPE, current, 0); 2371 if (!init_scm) 2372 scm_destroy(&scm); 2373 return err; 2374 } 2375 2376 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg, 2377 size_t len) 2378 { 2379 int err; 2380 struct sock *sk = sock->sk; 2381 2382 err = sock_error(sk); 2383 if (err) 2384 return err; 2385 2386 if (sk->sk_state != TCP_ESTABLISHED) 2387 return -ENOTCONN; 2388 2389 if (msg->msg_namelen) 2390 msg->msg_namelen = 0; 2391 2392 return unix_dgram_sendmsg(sock, msg, len); 2393 } 2394 2395 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg, 2396 size_t size, int flags) 2397 { 2398 struct sock *sk = sock->sk; 2399 2400 if (sk->sk_state != TCP_ESTABLISHED) 2401 return -ENOTCONN; 2402 2403 return unix_dgram_recvmsg(sock, msg, size, flags); 2404 } 2405 2406 static void unix_copy_addr(struct msghdr *msg, struct sock *sk) 2407 { 2408 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr); 2409 2410 if (addr) { 2411 msg->msg_namelen = addr->len; 2412 memcpy(msg->msg_name, addr->name, addr->len); 2413 } 2414 } 2415 2416 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, 2417 int flags) 2418 { 2419 struct scm_cookie scm; 2420 struct socket *sock = sk->sk_socket; 2421 struct unix_sock *u = unix_sk(sk); 2422 struct sk_buff *skb, *last; 2423 long timeo; 2424 int skip; 2425 int err; 2426 2427 err = -EOPNOTSUPP; 2428 if (flags&MSG_OOB) 2429 goto out; 2430 2431 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 2432 2433 do { 2434 mutex_lock(&u->iolock); 2435 2436 skip = sk_peek_offset(sk, flags); 2437 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags, 2438 &skip, &err, &last); 2439 if (skb) { 2440 if (!(flags & MSG_PEEK)) 2441 scm_stat_del(sk, skb); 2442 break; 2443 } 2444 2445 mutex_unlock(&u->iolock); 2446 2447 if (err != -EAGAIN) 2448 break; 2449 } while (timeo && 2450 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue, 2451 &err, &timeo, last)); 2452 2453 if (!skb) { /* implies iolock unlocked */ 2454 unix_state_lock(sk); 2455 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */ 2456 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN && 2457 (sk->sk_shutdown & RCV_SHUTDOWN)) 2458 err = 0; 2459 unix_state_unlock(sk); 2460 goto out; 2461 } 2462 2463 if (wq_has_sleeper(&u->peer_wait)) 2464 wake_up_interruptible_sync_poll(&u->peer_wait, 2465 EPOLLOUT | EPOLLWRNORM | 2466 EPOLLWRBAND); 2467 2468 if (msg->msg_name) 2469 unix_copy_addr(msg, skb->sk); 2470 2471 if (size > skb->len - skip) 2472 size = skb->len - skip; 2473 else if (size < skb->len - skip) 2474 msg->msg_flags |= MSG_TRUNC; 2475 2476 err = skb_copy_datagram_msg(skb, skip, msg, size); 2477 if (err) 2478 goto out_free; 2479 2480 if (sock_flag(sk, SOCK_RCVTSTAMP)) 2481 __sock_recv_timestamp(msg, sk, skb); 2482 2483 memset(&scm, 0, sizeof(scm)); 2484 2485 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2486 unix_set_secdata(&scm, skb); 2487 2488 if (!(flags & MSG_PEEK)) { 2489 if (UNIXCB(skb).fp) 2490 unix_detach_fds(&scm, skb); 2491 2492 sk_peek_offset_bwd(sk, skb->len); 2493 } else { 2494 /* It is questionable: on PEEK we could: 2495 - do not return fds - good, but too simple 8) 2496 - return fds, and do not return them on read (old strategy, 2497 apparently wrong) 2498 - clone fds (I chose it for now, it is the most universal 2499 solution) 2500 2501 POSIX 1003.1g does not actually define this clearly 2502 at all. POSIX 1003.1g doesn't define a lot of things 2503 clearly however! 2504 2505 */ 2506 2507 sk_peek_offset_fwd(sk, size); 2508 2509 if (UNIXCB(skb).fp) 2510 unix_peek_fds(&scm, skb); 2511 } 2512 err = (flags & MSG_TRUNC) ? skb->len - skip : size; 2513 2514 scm_recv(sock, msg, &scm, flags); 2515 2516 out_free: 2517 skb_free_datagram(sk, skb); 2518 mutex_unlock(&u->iolock); 2519 out: 2520 return err; 2521 } 2522 2523 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, 2524 int flags) 2525 { 2526 struct sock *sk = sock->sk; 2527 2528 #ifdef CONFIG_BPF_SYSCALL 2529 const struct proto *prot = READ_ONCE(sk->sk_prot); 2530 2531 if (prot != &unix_dgram_proto) 2532 return prot->recvmsg(sk, msg, size, flags, NULL); 2533 #endif 2534 return __unix_dgram_recvmsg(sk, msg, size, flags); 2535 } 2536 2537 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor) 2538 { 2539 struct unix_sock *u = unix_sk(sk); 2540 struct sk_buff *skb; 2541 int err, copied; 2542 2543 mutex_lock(&u->iolock); 2544 skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err); 2545 mutex_unlock(&u->iolock); 2546 if (!skb) 2547 return err; 2548 2549 copied = recv_actor(sk, skb); 2550 kfree_skb(skb); 2551 2552 return copied; 2553 } 2554 2555 /* 2556 * Sleep until more data has arrived. But check for races.. 2557 */ 2558 static long unix_stream_data_wait(struct sock *sk, long timeo, 2559 struct sk_buff *last, unsigned int last_len, 2560 bool freezable) 2561 { 2562 struct sk_buff *tail; 2563 DEFINE_WAIT(wait); 2564 2565 unix_state_lock(sk); 2566 2567 for (;;) { 2568 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 2569 2570 tail = skb_peek_tail(&sk->sk_receive_queue); 2571 if (tail != last || 2572 (tail && tail->len != last_len) || 2573 sk->sk_err || 2574 (sk->sk_shutdown & RCV_SHUTDOWN) || 2575 signal_pending(current) || 2576 !timeo) 2577 break; 2578 2579 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2580 unix_state_unlock(sk); 2581 if (freezable) 2582 timeo = freezable_schedule_timeout(timeo); 2583 else 2584 timeo = schedule_timeout(timeo); 2585 unix_state_lock(sk); 2586 2587 if (sock_flag(sk, SOCK_DEAD)) 2588 break; 2589 2590 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2591 } 2592 2593 finish_wait(sk_sleep(sk), &wait); 2594 unix_state_unlock(sk); 2595 return timeo; 2596 } 2597 2598 static unsigned int unix_skb_len(const struct sk_buff *skb) 2599 { 2600 return skb->len - UNIXCB(skb).consumed; 2601 } 2602 2603 struct unix_stream_read_state { 2604 int (*recv_actor)(struct sk_buff *, int, int, 2605 struct unix_stream_read_state *); 2606 struct socket *socket; 2607 struct msghdr *msg; 2608 struct pipe_inode_info *pipe; 2609 size_t size; 2610 int flags; 2611 unsigned int splice_flags; 2612 }; 2613 2614 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2615 static int unix_stream_recv_urg(struct unix_stream_read_state *state) 2616 { 2617 struct socket *sock = state->socket; 2618 struct sock *sk = sock->sk; 2619 struct unix_sock *u = unix_sk(sk); 2620 int chunk = 1; 2621 struct sk_buff *oob_skb; 2622 2623 mutex_lock(&u->iolock); 2624 unix_state_lock(sk); 2625 2626 if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) { 2627 unix_state_unlock(sk); 2628 mutex_unlock(&u->iolock); 2629 return -EINVAL; 2630 } 2631 2632 oob_skb = u->oob_skb; 2633 2634 if (!(state->flags & MSG_PEEK)) 2635 WRITE_ONCE(u->oob_skb, NULL); 2636 2637 unix_state_unlock(sk); 2638 2639 chunk = state->recv_actor(oob_skb, 0, chunk, state); 2640 2641 if (!(state->flags & MSG_PEEK)) { 2642 UNIXCB(oob_skb).consumed += 1; 2643 kfree_skb(oob_skb); 2644 } 2645 2646 mutex_unlock(&u->iolock); 2647 2648 if (chunk < 0) 2649 return -EFAULT; 2650 2651 state->msg->msg_flags |= MSG_OOB; 2652 return 1; 2653 } 2654 2655 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, 2656 int flags, int copied) 2657 { 2658 struct unix_sock *u = unix_sk(sk); 2659 2660 if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) { 2661 skb_unlink(skb, &sk->sk_receive_queue); 2662 consume_skb(skb); 2663 skb = NULL; 2664 } else { 2665 if (skb == u->oob_skb) { 2666 if (copied) { 2667 skb = NULL; 2668 } else if (sock_flag(sk, SOCK_URGINLINE)) { 2669 if (!(flags & MSG_PEEK)) { 2670 WRITE_ONCE(u->oob_skb, NULL); 2671 consume_skb(skb); 2672 } 2673 } else if (!(flags & MSG_PEEK)) { 2674 skb_unlink(skb, &sk->sk_receive_queue); 2675 consume_skb(skb); 2676 skb = skb_peek(&sk->sk_receive_queue); 2677 } 2678 } 2679 } 2680 return skb; 2681 } 2682 #endif 2683 2684 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor) 2685 { 2686 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) 2687 return -ENOTCONN; 2688 2689 return unix_read_skb(sk, recv_actor); 2690 } 2691 2692 static int unix_stream_read_generic(struct unix_stream_read_state *state, 2693 bool freezable) 2694 { 2695 struct scm_cookie scm; 2696 struct socket *sock = state->socket; 2697 struct sock *sk = sock->sk; 2698 struct unix_sock *u = unix_sk(sk); 2699 int copied = 0; 2700 int flags = state->flags; 2701 int noblock = flags & MSG_DONTWAIT; 2702 bool check_creds = false; 2703 int target; 2704 int err = 0; 2705 long timeo; 2706 int skip; 2707 size_t size = state->size; 2708 unsigned int last_len; 2709 2710 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) { 2711 err = -EINVAL; 2712 goto out; 2713 } 2714 2715 if (unlikely(flags & MSG_OOB)) { 2716 err = -EOPNOTSUPP; 2717 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2718 err = unix_stream_recv_urg(state); 2719 #endif 2720 goto out; 2721 } 2722 2723 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); 2724 timeo = sock_rcvtimeo(sk, noblock); 2725 2726 memset(&scm, 0, sizeof(scm)); 2727 2728 /* Lock the socket to prevent queue disordering 2729 * while sleeps in memcpy_tomsg 2730 */ 2731 mutex_lock(&u->iolock); 2732 2733 skip = max(sk_peek_offset(sk, flags), 0); 2734 2735 do { 2736 int chunk; 2737 bool drop_skb; 2738 struct sk_buff *skb, *last; 2739 2740 redo: 2741 unix_state_lock(sk); 2742 if (sock_flag(sk, SOCK_DEAD)) { 2743 err = -ECONNRESET; 2744 goto unlock; 2745 } 2746 last = skb = skb_peek(&sk->sk_receive_queue); 2747 last_len = last ? last->len : 0; 2748 2749 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2750 if (skb) { 2751 skb = manage_oob(skb, sk, flags, copied); 2752 if (!skb) { 2753 unix_state_unlock(sk); 2754 if (copied) 2755 break; 2756 goto redo; 2757 } 2758 } 2759 #endif 2760 again: 2761 if (skb == NULL) { 2762 if (copied >= target) 2763 goto unlock; 2764 2765 /* 2766 * POSIX 1003.1g mandates this order. 2767 */ 2768 2769 err = sock_error(sk); 2770 if (err) 2771 goto unlock; 2772 if (sk->sk_shutdown & RCV_SHUTDOWN) 2773 goto unlock; 2774 2775 unix_state_unlock(sk); 2776 if (!timeo) { 2777 err = -EAGAIN; 2778 break; 2779 } 2780 2781 mutex_unlock(&u->iolock); 2782 2783 timeo = unix_stream_data_wait(sk, timeo, last, 2784 last_len, freezable); 2785 2786 if (signal_pending(current)) { 2787 err = sock_intr_errno(timeo); 2788 scm_destroy(&scm); 2789 goto out; 2790 } 2791 2792 mutex_lock(&u->iolock); 2793 goto redo; 2794 unlock: 2795 unix_state_unlock(sk); 2796 break; 2797 } 2798 2799 while (skip >= unix_skb_len(skb)) { 2800 skip -= unix_skb_len(skb); 2801 last = skb; 2802 last_len = skb->len; 2803 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2804 if (!skb) 2805 goto again; 2806 } 2807 2808 unix_state_unlock(sk); 2809 2810 if (check_creds) { 2811 /* Never glue messages from different writers */ 2812 if (!unix_skb_scm_eq(skb, &scm)) 2813 break; 2814 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { 2815 /* Copy credentials */ 2816 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2817 unix_set_secdata(&scm, skb); 2818 check_creds = true; 2819 } 2820 2821 /* Copy address just once */ 2822 if (state->msg && state->msg->msg_name) { 2823 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, 2824 state->msg->msg_name); 2825 unix_copy_addr(state->msg, skb->sk); 2826 sunaddr = NULL; 2827 } 2828 2829 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size); 2830 skb_get(skb); 2831 chunk = state->recv_actor(skb, skip, chunk, state); 2832 drop_skb = !unix_skb_len(skb); 2833 /* skb is only safe to use if !drop_skb */ 2834 consume_skb(skb); 2835 if (chunk < 0) { 2836 if (copied == 0) 2837 copied = -EFAULT; 2838 break; 2839 } 2840 copied += chunk; 2841 size -= chunk; 2842 2843 if (drop_skb) { 2844 /* the skb was touched by a concurrent reader; 2845 * we should not expect anything from this skb 2846 * anymore and assume it invalid - we can be 2847 * sure it was dropped from the socket queue 2848 * 2849 * let's report a short read 2850 */ 2851 err = 0; 2852 break; 2853 } 2854 2855 /* Mark read part of skb as used */ 2856 if (!(flags & MSG_PEEK)) { 2857 UNIXCB(skb).consumed += chunk; 2858 2859 sk_peek_offset_bwd(sk, chunk); 2860 2861 if (UNIXCB(skb).fp) { 2862 scm_stat_del(sk, skb); 2863 unix_detach_fds(&scm, skb); 2864 } 2865 2866 if (unix_skb_len(skb)) 2867 break; 2868 2869 skb_unlink(skb, &sk->sk_receive_queue); 2870 consume_skb(skb); 2871 2872 if (scm.fp) 2873 break; 2874 } else { 2875 /* It is questionable, see note in unix_dgram_recvmsg. 2876 */ 2877 if (UNIXCB(skb).fp) 2878 unix_peek_fds(&scm, skb); 2879 2880 sk_peek_offset_fwd(sk, chunk); 2881 2882 if (UNIXCB(skb).fp) 2883 break; 2884 2885 skip = 0; 2886 last = skb; 2887 last_len = skb->len; 2888 unix_state_lock(sk); 2889 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2890 if (skb) 2891 goto again; 2892 unix_state_unlock(sk); 2893 break; 2894 } 2895 } while (size); 2896 2897 mutex_unlock(&u->iolock); 2898 if (state->msg) 2899 scm_recv(sock, state->msg, &scm, flags); 2900 else 2901 scm_destroy(&scm); 2902 out: 2903 return copied ? : err; 2904 } 2905 2906 static int unix_stream_read_actor(struct sk_buff *skb, 2907 int skip, int chunk, 2908 struct unix_stream_read_state *state) 2909 { 2910 int ret; 2911 2912 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip, 2913 state->msg, chunk); 2914 return ret ?: chunk; 2915 } 2916 2917 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg, 2918 size_t size, int flags) 2919 { 2920 struct unix_stream_read_state state = { 2921 .recv_actor = unix_stream_read_actor, 2922 .socket = sk->sk_socket, 2923 .msg = msg, 2924 .size = size, 2925 .flags = flags 2926 }; 2927 2928 return unix_stream_read_generic(&state, true); 2929 } 2930 2931 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, 2932 size_t size, int flags) 2933 { 2934 struct unix_stream_read_state state = { 2935 .recv_actor = unix_stream_read_actor, 2936 .socket = sock, 2937 .msg = msg, 2938 .size = size, 2939 .flags = flags 2940 }; 2941 2942 #ifdef CONFIG_BPF_SYSCALL 2943 struct sock *sk = sock->sk; 2944 const struct proto *prot = READ_ONCE(sk->sk_prot); 2945 2946 if (prot != &unix_stream_proto) 2947 return prot->recvmsg(sk, msg, size, flags, NULL); 2948 #endif 2949 return unix_stream_read_generic(&state, true); 2950 } 2951 2952 static int unix_stream_splice_actor(struct sk_buff *skb, 2953 int skip, int chunk, 2954 struct unix_stream_read_state *state) 2955 { 2956 return skb_splice_bits(skb, state->socket->sk, 2957 UNIXCB(skb).consumed + skip, 2958 state->pipe, chunk, state->splice_flags); 2959 } 2960 2961 static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos, 2962 struct pipe_inode_info *pipe, 2963 size_t size, unsigned int flags) 2964 { 2965 struct unix_stream_read_state state = { 2966 .recv_actor = unix_stream_splice_actor, 2967 .socket = sock, 2968 .pipe = pipe, 2969 .size = size, 2970 .splice_flags = flags, 2971 }; 2972 2973 if (unlikely(*ppos)) 2974 return -ESPIPE; 2975 2976 if (sock->file->f_flags & O_NONBLOCK || 2977 flags & SPLICE_F_NONBLOCK) 2978 state.flags = MSG_DONTWAIT; 2979 2980 return unix_stream_read_generic(&state, false); 2981 } 2982 2983 static int unix_shutdown(struct socket *sock, int mode) 2984 { 2985 struct sock *sk = sock->sk; 2986 struct sock *other; 2987 2988 if (mode < SHUT_RD || mode > SHUT_RDWR) 2989 return -EINVAL; 2990 /* This maps: 2991 * SHUT_RD (0) -> RCV_SHUTDOWN (1) 2992 * SHUT_WR (1) -> SEND_SHUTDOWN (2) 2993 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3) 2994 */ 2995 ++mode; 2996 2997 unix_state_lock(sk); 2998 sk->sk_shutdown |= mode; 2999 other = unix_peer(sk); 3000 if (other) 3001 sock_hold(other); 3002 unix_state_unlock(sk); 3003 sk->sk_state_change(sk); 3004 3005 if (other && 3006 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) { 3007 3008 int peer_mode = 0; 3009 const struct proto *prot = READ_ONCE(other->sk_prot); 3010 3011 if (prot->unhash) 3012 prot->unhash(other); 3013 if (mode&RCV_SHUTDOWN) 3014 peer_mode |= SEND_SHUTDOWN; 3015 if (mode&SEND_SHUTDOWN) 3016 peer_mode |= RCV_SHUTDOWN; 3017 unix_state_lock(other); 3018 other->sk_shutdown |= peer_mode; 3019 unix_state_unlock(other); 3020 other->sk_state_change(other); 3021 if (peer_mode == SHUTDOWN_MASK) 3022 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); 3023 else if (peer_mode & RCV_SHUTDOWN) 3024 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); 3025 } 3026 if (other) 3027 sock_put(other); 3028 3029 return 0; 3030 } 3031 3032 long unix_inq_len(struct sock *sk) 3033 { 3034 struct sk_buff *skb; 3035 long amount = 0; 3036 3037 if (sk->sk_state == TCP_LISTEN) 3038 return -EINVAL; 3039 3040 spin_lock(&sk->sk_receive_queue.lock); 3041 if (sk->sk_type == SOCK_STREAM || 3042 sk->sk_type == SOCK_SEQPACKET) { 3043 skb_queue_walk(&sk->sk_receive_queue, skb) 3044 amount += unix_skb_len(skb); 3045 } else { 3046 skb = skb_peek(&sk->sk_receive_queue); 3047 if (skb) 3048 amount = skb->len; 3049 } 3050 spin_unlock(&sk->sk_receive_queue.lock); 3051 3052 return amount; 3053 } 3054 EXPORT_SYMBOL_GPL(unix_inq_len); 3055 3056 long unix_outq_len(struct sock *sk) 3057 { 3058 return sk_wmem_alloc_get(sk); 3059 } 3060 EXPORT_SYMBOL_GPL(unix_outq_len); 3061 3062 static int unix_open_file(struct sock *sk) 3063 { 3064 struct path path; 3065 struct file *f; 3066 int fd; 3067 3068 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 3069 return -EPERM; 3070 3071 if (!smp_load_acquire(&unix_sk(sk)->addr)) 3072 return -ENOENT; 3073 3074 path = unix_sk(sk)->path; 3075 if (!path.dentry) 3076 return -ENOENT; 3077 3078 path_get(&path); 3079 3080 fd = get_unused_fd_flags(O_CLOEXEC); 3081 if (fd < 0) 3082 goto out; 3083 3084 f = dentry_open(&path, O_PATH, current_cred()); 3085 if (IS_ERR(f)) { 3086 put_unused_fd(fd); 3087 fd = PTR_ERR(f); 3088 goto out; 3089 } 3090 3091 fd_install(fd, f); 3092 out: 3093 path_put(&path); 3094 3095 return fd; 3096 } 3097 3098 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 3099 { 3100 struct sock *sk = sock->sk; 3101 long amount = 0; 3102 int err; 3103 3104 switch (cmd) { 3105 case SIOCOUTQ: 3106 amount = unix_outq_len(sk); 3107 err = put_user(amount, (int __user *)arg); 3108 break; 3109 case SIOCINQ: 3110 amount = unix_inq_len(sk); 3111 if (amount < 0) 3112 err = amount; 3113 else 3114 err = put_user(amount, (int __user *)arg); 3115 break; 3116 case SIOCUNIXFILE: 3117 err = unix_open_file(sk); 3118 break; 3119 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 3120 case SIOCATMARK: 3121 { 3122 struct sk_buff *skb; 3123 int answ = 0; 3124 3125 skb = skb_peek(&sk->sk_receive_queue); 3126 if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb)) 3127 answ = 1; 3128 err = put_user(answ, (int __user *)arg); 3129 } 3130 break; 3131 #endif 3132 default: 3133 err = -ENOIOCTLCMD; 3134 break; 3135 } 3136 return err; 3137 } 3138 3139 #ifdef CONFIG_COMPAT 3140 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 3141 { 3142 return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg)); 3143 } 3144 #endif 3145 3146 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait) 3147 { 3148 struct sock *sk = sock->sk; 3149 __poll_t mask; 3150 3151 sock_poll_wait(file, sock, wait); 3152 mask = 0; 3153 3154 /* exceptional events? */ 3155 if (sk->sk_err) 3156 mask |= EPOLLERR; 3157 if (sk->sk_shutdown == SHUTDOWN_MASK) 3158 mask |= EPOLLHUP; 3159 if (sk->sk_shutdown & RCV_SHUTDOWN) 3160 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 3161 3162 /* readable? */ 3163 if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) 3164 mask |= EPOLLIN | EPOLLRDNORM; 3165 if (sk_is_readable(sk)) 3166 mask |= EPOLLIN | EPOLLRDNORM; 3167 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 3168 if (READ_ONCE(unix_sk(sk)->oob_skb)) 3169 mask |= EPOLLPRI; 3170 #endif 3171 3172 /* Connection-based need to check for termination and startup */ 3173 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && 3174 sk->sk_state == TCP_CLOSE) 3175 mask |= EPOLLHUP; 3176 3177 /* 3178 * we set writable also when the other side has shut down the 3179 * connection. This prevents stuck sockets. 3180 */ 3181 if (unix_writable(sk)) 3182 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 3183 3184 return mask; 3185 } 3186 3187 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, 3188 poll_table *wait) 3189 { 3190 struct sock *sk = sock->sk, *other; 3191 unsigned int writable; 3192 __poll_t mask; 3193 3194 sock_poll_wait(file, sock, wait); 3195 mask = 0; 3196 3197 /* exceptional events? */ 3198 if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) 3199 mask |= EPOLLERR | 3200 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); 3201 3202 if (sk->sk_shutdown & RCV_SHUTDOWN) 3203 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 3204 if (sk->sk_shutdown == SHUTDOWN_MASK) 3205 mask |= EPOLLHUP; 3206 3207 /* readable? */ 3208 if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) 3209 mask |= EPOLLIN | EPOLLRDNORM; 3210 if (sk_is_readable(sk)) 3211 mask |= EPOLLIN | EPOLLRDNORM; 3212 3213 /* Connection-based need to check for termination and startup */ 3214 if (sk->sk_type == SOCK_SEQPACKET) { 3215 if (sk->sk_state == TCP_CLOSE) 3216 mask |= EPOLLHUP; 3217 /* connection hasn't started yet? */ 3218 if (sk->sk_state == TCP_SYN_SENT) 3219 return mask; 3220 } 3221 3222 /* No write status requested, avoid expensive OUT tests. */ 3223 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT))) 3224 return mask; 3225 3226 writable = unix_writable(sk); 3227 if (writable) { 3228 unix_state_lock(sk); 3229 3230 other = unix_peer(sk); 3231 if (other && unix_peer(other) != sk && 3232 unix_recvq_full_lockless(other) && 3233 unix_dgram_peer_wake_me(sk, other)) 3234 writable = 0; 3235 3236 unix_state_unlock(sk); 3237 } 3238 3239 if (writable) 3240 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 3241 else 3242 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 3243 3244 return mask; 3245 } 3246 3247 #ifdef CONFIG_PROC_FS 3248 3249 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1) 3250 3251 #define get_bucket(x) ((x) >> BUCKET_SPACE) 3252 #define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1)) 3253 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) 3254 3255 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) 3256 { 3257 unsigned long offset = get_offset(*pos); 3258 unsigned long bucket = get_bucket(*pos); 3259 unsigned long count = 0; 3260 struct sock *sk; 3261 3262 for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]); 3263 sk; sk = sk_next(sk)) { 3264 if (++count == offset) 3265 break; 3266 } 3267 3268 return sk; 3269 } 3270 3271 static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos) 3272 { 3273 unsigned long bucket = get_bucket(*pos); 3274 struct net *net = seq_file_net(seq); 3275 struct sock *sk; 3276 3277 while (bucket < UNIX_HASH_SIZE) { 3278 spin_lock(&net->unx.table.locks[bucket]); 3279 3280 sk = unix_from_bucket(seq, pos); 3281 if (sk) 3282 return sk; 3283 3284 spin_unlock(&net->unx.table.locks[bucket]); 3285 3286 *pos = set_bucket_offset(++bucket, 1); 3287 } 3288 3289 return NULL; 3290 } 3291 3292 static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk, 3293 loff_t *pos) 3294 { 3295 unsigned long bucket = get_bucket(*pos); 3296 3297 sk = sk_next(sk); 3298 if (sk) 3299 return sk; 3300 3301 3302 spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]); 3303 3304 *pos = set_bucket_offset(++bucket, 1); 3305 3306 return unix_get_first(seq, pos); 3307 } 3308 3309 static void *unix_seq_start(struct seq_file *seq, loff_t *pos) 3310 { 3311 if (!*pos) 3312 return SEQ_START_TOKEN; 3313 3314 return unix_get_first(seq, pos); 3315 } 3316 3317 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3318 { 3319 ++*pos; 3320 3321 if (v == SEQ_START_TOKEN) 3322 return unix_get_first(seq, pos); 3323 3324 return unix_get_next(seq, v, pos); 3325 } 3326 3327 static void unix_seq_stop(struct seq_file *seq, void *v) 3328 { 3329 struct sock *sk = v; 3330 3331 if (sk) 3332 spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]); 3333 } 3334 3335 static int unix_seq_show(struct seq_file *seq, void *v) 3336 { 3337 3338 if (v == SEQ_START_TOKEN) 3339 seq_puts(seq, "Num RefCount Protocol Flags Type St " 3340 "Inode Path\n"); 3341 else { 3342 struct sock *s = v; 3343 struct unix_sock *u = unix_sk(s); 3344 unix_state_lock(s); 3345 3346 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu", 3347 s, 3348 refcount_read(&s->sk_refcnt), 3349 0, 3350 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0, 3351 s->sk_type, 3352 s->sk_socket ? 3353 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) : 3354 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING), 3355 sock_i_ino(s)); 3356 3357 if (u->addr) { // under a hash table lock here 3358 int i, len; 3359 seq_putc(seq, ' '); 3360 3361 i = 0; 3362 len = u->addr->len - 3363 offsetof(struct sockaddr_un, sun_path); 3364 if (u->addr->name->sun_path[0]) { 3365 len--; 3366 } else { 3367 seq_putc(seq, '@'); 3368 i++; 3369 } 3370 for ( ; i < len; i++) 3371 seq_putc(seq, u->addr->name->sun_path[i] ?: 3372 '@'); 3373 } 3374 unix_state_unlock(s); 3375 seq_putc(seq, '\n'); 3376 } 3377 3378 return 0; 3379 } 3380 3381 static const struct seq_operations unix_seq_ops = { 3382 .start = unix_seq_start, 3383 .next = unix_seq_next, 3384 .stop = unix_seq_stop, 3385 .show = unix_seq_show, 3386 }; 3387 3388 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) 3389 struct bpf_unix_iter_state { 3390 struct seq_net_private p; 3391 unsigned int cur_sk; 3392 unsigned int end_sk; 3393 unsigned int max_sk; 3394 struct sock **batch; 3395 bool st_bucket_done; 3396 }; 3397 3398 struct bpf_iter__unix { 3399 __bpf_md_ptr(struct bpf_iter_meta *, meta); 3400 __bpf_md_ptr(struct unix_sock *, unix_sk); 3401 uid_t uid __aligned(8); 3402 }; 3403 3404 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, 3405 struct unix_sock *unix_sk, uid_t uid) 3406 { 3407 struct bpf_iter__unix ctx; 3408 3409 meta->seq_num--; /* skip SEQ_START_TOKEN */ 3410 ctx.meta = meta; 3411 ctx.unix_sk = unix_sk; 3412 ctx.uid = uid; 3413 return bpf_iter_run_prog(prog, &ctx); 3414 } 3415 3416 static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk) 3417 3418 { 3419 struct bpf_unix_iter_state *iter = seq->private; 3420 unsigned int expected = 1; 3421 struct sock *sk; 3422 3423 sock_hold(start_sk); 3424 iter->batch[iter->end_sk++] = start_sk; 3425 3426 for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) { 3427 if (iter->end_sk < iter->max_sk) { 3428 sock_hold(sk); 3429 iter->batch[iter->end_sk++] = sk; 3430 } 3431 3432 expected++; 3433 } 3434 3435 spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]); 3436 3437 return expected; 3438 } 3439 3440 static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter) 3441 { 3442 while (iter->cur_sk < iter->end_sk) 3443 sock_put(iter->batch[iter->cur_sk++]); 3444 } 3445 3446 static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter, 3447 unsigned int new_batch_sz) 3448 { 3449 struct sock **new_batch; 3450 3451 new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz, 3452 GFP_USER | __GFP_NOWARN); 3453 if (!new_batch) 3454 return -ENOMEM; 3455 3456 bpf_iter_unix_put_batch(iter); 3457 kvfree(iter->batch); 3458 iter->batch = new_batch; 3459 iter->max_sk = new_batch_sz; 3460 3461 return 0; 3462 } 3463 3464 static struct sock *bpf_iter_unix_batch(struct seq_file *seq, 3465 loff_t *pos) 3466 { 3467 struct bpf_unix_iter_state *iter = seq->private; 3468 unsigned int expected; 3469 bool resized = false; 3470 struct sock *sk; 3471 3472 if (iter->st_bucket_done) 3473 *pos = set_bucket_offset(get_bucket(*pos) + 1, 1); 3474 3475 again: 3476 /* Get a new batch */ 3477 iter->cur_sk = 0; 3478 iter->end_sk = 0; 3479 3480 sk = unix_get_first(seq, pos); 3481 if (!sk) 3482 return NULL; /* Done */ 3483 3484 expected = bpf_iter_unix_hold_batch(seq, sk); 3485 3486 if (iter->end_sk == expected) { 3487 iter->st_bucket_done = true; 3488 return sk; 3489 } 3490 3491 if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) { 3492 resized = true; 3493 goto again; 3494 } 3495 3496 return sk; 3497 } 3498 3499 static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos) 3500 { 3501 if (!*pos) 3502 return SEQ_START_TOKEN; 3503 3504 /* bpf iter does not support lseek, so it always 3505 * continue from where it was stop()-ped. 3506 */ 3507 return bpf_iter_unix_batch(seq, pos); 3508 } 3509 3510 static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3511 { 3512 struct bpf_unix_iter_state *iter = seq->private; 3513 struct sock *sk; 3514 3515 /* Whenever seq_next() is called, the iter->cur_sk is 3516 * done with seq_show(), so advance to the next sk in 3517 * the batch. 3518 */ 3519 if (iter->cur_sk < iter->end_sk) 3520 sock_put(iter->batch[iter->cur_sk++]); 3521 3522 ++*pos; 3523 3524 if (iter->cur_sk < iter->end_sk) 3525 sk = iter->batch[iter->cur_sk]; 3526 else 3527 sk = bpf_iter_unix_batch(seq, pos); 3528 3529 return sk; 3530 } 3531 3532 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v) 3533 { 3534 struct bpf_iter_meta meta; 3535 struct bpf_prog *prog; 3536 struct sock *sk = v; 3537 uid_t uid; 3538 bool slow; 3539 int ret; 3540 3541 if (v == SEQ_START_TOKEN) 3542 return 0; 3543 3544 slow = lock_sock_fast(sk); 3545 3546 if (unlikely(sk_unhashed(sk))) { 3547 ret = SEQ_SKIP; 3548 goto unlock; 3549 } 3550 3551 uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)); 3552 meta.seq = seq; 3553 prog = bpf_iter_get_info(&meta, false); 3554 ret = unix_prog_seq_show(prog, &meta, v, uid); 3555 unlock: 3556 unlock_sock_fast(sk, slow); 3557 return ret; 3558 } 3559 3560 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v) 3561 { 3562 struct bpf_unix_iter_state *iter = seq->private; 3563 struct bpf_iter_meta meta; 3564 struct bpf_prog *prog; 3565 3566 if (!v) { 3567 meta.seq = seq; 3568 prog = bpf_iter_get_info(&meta, true); 3569 if (prog) 3570 (void)unix_prog_seq_show(prog, &meta, v, 0); 3571 } 3572 3573 if (iter->cur_sk < iter->end_sk) 3574 bpf_iter_unix_put_batch(iter); 3575 } 3576 3577 static const struct seq_operations bpf_iter_unix_seq_ops = { 3578 .start = bpf_iter_unix_seq_start, 3579 .next = bpf_iter_unix_seq_next, 3580 .stop = bpf_iter_unix_seq_stop, 3581 .show = bpf_iter_unix_seq_show, 3582 }; 3583 #endif 3584 #endif 3585 3586 static const struct net_proto_family unix_family_ops = { 3587 .family = PF_UNIX, 3588 .create = unix_create, 3589 .owner = THIS_MODULE, 3590 }; 3591 3592 3593 static int __net_init unix_net_init(struct net *net) 3594 { 3595 int i; 3596 3597 net->unx.sysctl_max_dgram_qlen = 10; 3598 if (unix_sysctl_register(net)) 3599 goto out; 3600 3601 #ifdef CONFIG_PROC_FS 3602 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops, 3603 sizeof(struct seq_net_private))) 3604 goto err_sysctl; 3605 #endif 3606 3607 net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE, 3608 sizeof(spinlock_t), GFP_KERNEL); 3609 if (!net->unx.table.locks) 3610 goto err_proc; 3611 3612 net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE, 3613 sizeof(struct hlist_head), 3614 GFP_KERNEL); 3615 if (!net->unx.table.buckets) 3616 goto free_locks; 3617 3618 for (i = 0; i < UNIX_HASH_SIZE; i++) { 3619 spin_lock_init(&net->unx.table.locks[i]); 3620 INIT_HLIST_HEAD(&net->unx.table.buckets[i]); 3621 } 3622 3623 return 0; 3624 3625 free_locks: 3626 kvfree(net->unx.table.locks); 3627 err_proc: 3628 #ifdef CONFIG_PROC_FS 3629 remove_proc_entry("unix", net->proc_net); 3630 err_sysctl: 3631 #endif 3632 unix_sysctl_unregister(net); 3633 out: 3634 return -ENOMEM; 3635 } 3636 3637 static void __net_exit unix_net_exit(struct net *net) 3638 { 3639 kvfree(net->unx.table.buckets); 3640 kvfree(net->unx.table.locks); 3641 unix_sysctl_unregister(net); 3642 remove_proc_entry("unix", net->proc_net); 3643 } 3644 3645 static struct pernet_operations unix_net_ops = { 3646 .init = unix_net_init, 3647 .exit = unix_net_exit, 3648 }; 3649 3650 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 3651 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta, 3652 struct unix_sock *unix_sk, uid_t uid) 3653 3654 #define INIT_BATCH_SZ 16 3655 3656 static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux) 3657 { 3658 struct bpf_unix_iter_state *iter = priv_data; 3659 int err; 3660 3661 err = bpf_iter_init_seq_net(priv_data, aux); 3662 if (err) 3663 return err; 3664 3665 err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ); 3666 if (err) { 3667 bpf_iter_fini_seq_net(priv_data); 3668 return err; 3669 } 3670 3671 return 0; 3672 } 3673 3674 static void bpf_iter_fini_unix(void *priv_data) 3675 { 3676 struct bpf_unix_iter_state *iter = priv_data; 3677 3678 bpf_iter_fini_seq_net(priv_data); 3679 kvfree(iter->batch); 3680 } 3681 3682 static const struct bpf_iter_seq_info unix_seq_info = { 3683 .seq_ops = &bpf_iter_unix_seq_ops, 3684 .init_seq_private = bpf_iter_init_unix, 3685 .fini_seq_private = bpf_iter_fini_unix, 3686 .seq_priv_size = sizeof(struct bpf_unix_iter_state), 3687 }; 3688 3689 static const struct bpf_func_proto * 3690 bpf_iter_unix_get_func_proto(enum bpf_func_id func_id, 3691 const struct bpf_prog *prog) 3692 { 3693 switch (func_id) { 3694 case BPF_FUNC_setsockopt: 3695 return &bpf_sk_setsockopt_proto; 3696 case BPF_FUNC_getsockopt: 3697 return &bpf_sk_getsockopt_proto; 3698 default: 3699 return NULL; 3700 } 3701 } 3702 3703 static struct bpf_iter_reg unix_reg_info = { 3704 .target = "unix", 3705 .ctx_arg_info_size = 1, 3706 .ctx_arg_info = { 3707 { offsetof(struct bpf_iter__unix, unix_sk), 3708 PTR_TO_BTF_ID_OR_NULL }, 3709 }, 3710 .get_func_proto = bpf_iter_unix_get_func_proto, 3711 .seq_info = &unix_seq_info, 3712 }; 3713 3714 static void __init bpf_iter_register(void) 3715 { 3716 unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX]; 3717 if (bpf_iter_reg_target(&unix_reg_info)) 3718 pr_warn("Warning: could not register bpf iterator unix\n"); 3719 } 3720 #endif 3721 3722 static int __init af_unix_init(void) 3723 { 3724 int i, rc = -1; 3725 3726 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb)); 3727 3728 for (i = 0; i < UNIX_HASH_SIZE / 2; i++) { 3729 spin_lock_init(&bsd_socket_locks[i]); 3730 INIT_HLIST_HEAD(&bsd_socket_buckets[i]); 3731 } 3732 3733 rc = proto_register(&unix_dgram_proto, 1); 3734 if (rc != 0) { 3735 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); 3736 goto out; 3737 } 3738 3739 rc = proto_register(&unix_stream_proto, 1); 3740 if (rc != 0) { 3741 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); 3742 goto out; 3743 } 3744 3745 sock_register(&unix_family_ops); 3746 register_pernet_subsys(&unix_net_ops); 3747 unix_bpf_build_proto(); 3748 3749 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 3750 bpf_iter_register(); 3751 #endif 3752 3753 out: 3754 return rc; 3755 } 3756 3757 static void __exit af_unix_exit(void) 3758 { 3759 sock_unregister(PF_UNIX); 3760 proto_unregister(&unix_dgram_proto); 3761 proto_unregister(&unix_stream_proto); 3762 unregister_pernet_subsys(&unix_net_ops); 3763 } 3764 3765 /* Earlier than device_initcall() so that other drivers invoking 3766 request_module() don't end up in a loop when modprobe tries 3767 to use a UNIX socket. But later than subsys_initcall() because 3768 we depend on stuff initialised there */ 3769 fs_initcall(af_unix_init); 3770 module_exit(af_unix_exit); 3771 3772 MODULE_LICENSE("GPL"); 3773 MODULE_ALIAS_NETPROTO(PF_UNIX); 3774