1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * NET4: Implementation of BSD Unix domain sockets. 4 * 5 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk> 6 * 7 * Fixes: 8 * Linus Torvalds : Assorted bug cures. 9 * Niibe Yutaka : async I/O support. 10 * Carsten Paeth : PF_UNIX check, address fixes. 11 * Alan Cox : Limit size of allocated blocks. 12 * Alan Cox : Fixed the stupid socketpair bug. 13 * Alan Cox : BSD compatibility fine tuning. 14 * Alan Cox : Fixed a bug in connect when interrupted. 15 * Alan Cox : Sorted out a proper draft version of 16 * file descriptor passing hacked up from 17 * Mike Shaver's work. 18 * Marty Leisner : Fixes to fd passing 19 * Nick Nevin : recvmsg bugfix. 20 * Alan Cox : Started proper garbage collector 21 * Heiko EiBfeldt : Missing verify_area check 22 * Alan Cox : Started POSIXisms 23 * Andreas Schwab : Replace inode by dentry for proper 24 * reference counting 25 * Kirk Petersen : Made this a module 26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm. 27 * Lots of bug fixes. 28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces 29 * by above two patches. 30 * Andrea Arcangeli : If possible we block in connect(2) 31 * if the max backlog of the listen socket 32 * is been reached. This won't break 33 * old apps and it will avoid huge amount 34 * of socks hashed (this for unix_gc() 35 * performances reasons). 36 * Security fix that limits the max 37 * number of socks to 2*max_files and 38 * the number of skb queueable in the 39 * dgram receiver. 40 * Artur Skawina : Hash function optimizations 41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8) 42 * Malcolm Beattie : Set peercred for socketpair 43 * Michal Ostrowski : Module initialization cleanup. 44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT, 45 * the core infrastructure is doing that 46 * for all net proto families now (2.5.69+) 47 * 48 * Known differences from reference BSD that was tested: 49 * 50 * [TO FIX] 51 * ECONNREFUSED is not returned from one end of a connected() socket to the 52 * other the moment one end closes. 53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark 54 * and a fake inode identifier (nor the BSD first socket fstat twice bug). 55 * [NOT TO FIX] 56 * accept() returns a path name even if the connecting socket has closed 57 * in the meantime (BSD loses the path and gives up). 58 * accept() returns 0 length path for an unbound connector. BSD returns 16 59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??) 60 * socketpair(...SOCK_RAW..) doesn't panic the kernel. 61 * BSD af_unix apparently has connect forgetting to block properly. 62 * (need to check this with the POSIX spec in detail) 63 * 64 * Differences from 2.0.0-11-... (ANK) 65 * Bug fixes and improvements. 66 * - client shutdown killed server socket. 67 * - removed all useless cli/sti pairs. 68 * 69 * Semantic changes/extensions. 70 * - generic control message passing. 71 * - SCM_CREDENTIALS control message. 72 * - "Abstract" (not FS based) socket bindings. 73 * Abstract names are sequences of bytes (not zero terminated) 74 * started by 0, so that this name space does not intersect 75 * with BSD names. 76 */ 77 78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 79 80 #include <linux/module.h> 81 #include <linux/kernel.h> 82 #include <linux/signal.h> 83 #include <linux/sched/signal.h> 84 #include <linux/errno.h> 85 #include <linux/string.h> 86 #include <linux/stat.h> 87 #include <linux/dcache.h> 88 #include <linux/namei.h> 89 #include <linux/socket.h> 90 #include <linux/un.h> 91 #include <linux/fcntl.h> 92 #include <linux/filter.h> 93 #include <linux/termios.h> 94 #include <linux/sockios.h> 95 #include <linux/net.h> 96 #include <linux/in.h> 97 #include <linux/fs.h> 98 #include <linux/slab.h> 99 #include <linux/uaccess.h> 100 #include <linux/skbuff.h> 101 #include <linux/netdevice.h> 102 #include <net/net_namespace.h> 103 #include <net/sock.h> 104 #include <net/tcp_states.h> 105 #include <net/af_unix.h> 106 #include <linux/proc_fs.h> 107 #include <linux/seq_file.h> 108 #include <net/scm.h> 109 #include <linux/init.h> 110 #include <linux/poll.h> 111 #include <linux/rtnetlink.h> 112 #include <linux/mount.h> 113 #include <net/checksum.h> 114 #include <linux/security.h> 115 #include <linux/freezer.h> 116 #include <linux/file.h> 117 #include <linux/btf_ids.h> 118 119 #include "scm.h" 120 121 static atomic_long_t unix_nr_socks; 122 static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2]; 123 static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2]; 124 125 /* SMP locking strategy: 126 * hash table is protected with spinlock. 127 * each socket state is protected by separate spinlock. 128 */ 129 130 static unsigned int unix_unbound_hash(struct sock *sk) 131 { 132 unsigned long hash = (unsigned long)sk; 133 134 hash ^= hash >> 16; 135 hash ^= hash >> 8; 136 hash ^= sk->sk_type; 137 138 return hash & UNIX_HASH_MOD; 139 } 140 141 static unsigned int unix_bsd_hash(struct inode *i) 142 { 143 return i->i_ino & UNIX_HASH_MOD; 144 } 145 146 static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr, 147 int addr_len, int type) 148 { 149 __wsum csum = csum_partial(sunaddr, addr_len, 0); 150 unsigned int hash; 151 152 hash = (__force unsigned int)csum_fold(csum); 153 hash ^= hash >> 8; 154 hash ^= type; 155 156 return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD); 157 } 158 159 static void unix_table_double_lock(struct net *net, 160 unsigned int hash1, unsigned int hash2) 161 { 162 if (hash1 == hash2) { 163 spin_lock(&net->unx.table.locks[hash1]); 164 return; 165 } 166 167 if (hash1 > hash2) 168 swap(hash1, hash2); 169 170 spin_lock(&net->unx.table.locks[hash1]); 171 spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING); 172 } 173 174 static void unix_table_double_unlock(struct net *net, 175 unsigned int hash1, unsigned int hash2) 176 { 177 if (hash1 == hash2) { 178 spin_unlock(&net->unx.table.locks[hash1]); 179 return; 180 } 181 182 spin_unlock(&net->unx.table.locks[hash1]); 183 spin_unlock(&net->unx.table.locks[hash2]); 184 } 185 186 #ifdef CONFIG_SECURITY_NETWORK 187 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 188 { 189 UNIXCB(skb).secid = scm->secid; 190 } 191 192 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 193 { 194 scm->secid = UNIXCB(skb).secid; 195 } 196 197 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) 198 { 199 return (scm->secid == UNIXCB(skb).secid); 200 } 201 #else 202 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 203 { } 204 205 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 206 { } 207 208 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) 209 { 210 return true; 211 } 212 #endif /* CONFIG_SECURITY_NETWORK */ 213 214 #define unix_peer(sk) (unix_sk(sk)->peer) 215 216 static inline int unix_our_peer(struct sock *sk, struct sock *osk) 217 { 218 return unix_peer(osk) == sk; 219 } 220 221 static inline int unix_may_send(struct sock *sk, struct sock *osk) 222 { 223 return unix_peer(osk) == NULL || unix_our_peer(sk, osk); 224 } 225 226 static inline int unix_recvq_full(const struct sock *sk) 227 { 228 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; 229 } 230 231 static inline int unix_recvq_full_lockless(const struct sock *sk) 232 { 233 return skb_queue_len_lockless(&sk->sk_receive_queue) > 234 READ_ONCE(sk->sk_max_ack_backlog); 235 } 236 237 struct sock *unix_peer_get(struct sock *s) 238 { 239 struct sock *peer; 240 241 unix_state_lock(s); 242 peer = unix_peer(s); 243 if (peer) 244 sock_hold(peer); 245 unix_state_unlock(s); 246 return peer; 247 } 248 EXPORT_SYMBOL_GPL(unix_peer_get); 249 250 static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr, 251 int addr_len) 252 { 253 struct unix_address *addr; 254 255 addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL); 256 if (!addr) 257 return NULL; 258 259 refcount_set(&addr->refcnt, 1); 260 addr->len = addr_len; 261 memcpy(addr->name, sunaddr, addr_len); 262 263 return addr; 264 } 265 266 static inline void unix_release_addr(struct unix_address *addr) 267 { 268 if (refcount_dec_and_test(&addr->refcnt)) 269 kfree(addr); 270 } 271 272 /* 273 * Check unix socket name: 274 * - should be not zero length. 275 * - if started by not zero, should be NULL terminated (FS object) 276 * - if started by zero, it is abstract name. 277 */ 278 279 static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len) 280 { 281 if (addr_len <= offsetof(struct sockaddr_un, sun_path) || 282 addr_len > sizeof(*sunaddr)) 283 return -EINVAL; 284 285 if (sunaddr->sun_family != AF_UNIX) 286 return -EINVAL; 287 288 return 0; 289 } 290 291 static void unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len) 292 { 293 /* This may look like an off by one error but it is a bit more 294 * subtle. 108 is the longest valid AF_UNIX path for a binding. 295 * sun_path[108] doesn't as such exist. However in kernel space 296 * we are guaranteed that it is a valid memory location in our 297 * kernel address buffer because syscall functions always pass 298 * a pointer of struct sockaddr_storage which has a bigger buffer 299 * than 108. 300 */ 301 ((char *)sunaddr)[addr_len] = 0; 302 } 303 304 static void __unix_remove_socket(struct sock *sk) 305 { 306 sk_del_node_init(sk); 307 } 308 309 static void __unix_insert_socket(struct net *net, struct sock *sk) 310 { 311 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk)); 312 sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]); 313 } 314 315 static void __unix_set_addr_hash(struct net *net, struct sock *sk, 316 struct unix_address *addr, unsigned int hash) 317 { 318 __unix_remove_socket(sk); 319 smp_store_release(&unix_sk(sk)->addr, addr); 320 321 sk->sk_hash = hash; 322 __unix_insert_socket(net, sk); 323 } 324 325 static void unix_remove_socket(struct net *net, struct sock *sk) 326 { 327 spin_lock(&net->unx.table.locks[sk->sk_hash]); 328 __unix_remove_socket(sk); 329 spin_unlock(&net->unx.table.locks[sk->sk_hash]); 330 } 331 332 static void unix_insert_unbound_socket(struct net *net, struct sock *sk) 333 { 334 spin_lock(&net->unx.table.locks[sk->sk_hash]); 335 __unix_insert_socket(net, sk); 336 spin_unlock(&net->unx.table.locks[sk->sk_hash]); 337 } 338 339 static void unix_insert_bsd_socket(struct sock *sk) 340 { 341 spin_lock(&bsd_socket_locks[sk->sk_hash]); 342 sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]); 343 spin_unlock(&bsd_socket_locks[sk->sk_hash]); 344 } 345 346 static void unix_remove_bsd_socket(struct sock *sk) 347 { 348 if (!hlist_unhashed(&sk->sk_bind_node)) { 349 spin_lock(&bsd_socket_locks[sk->sk_hash]); 350 __sk_del_bind_node(sk); 351 spin_unlock(&bsd_socket_locks[sk->sk_hash]); 352 353 sk_node_init(&sk->sk_bind_node); 354 } 355 } 356 357 static struct sock *__unix_find_socket_byname(struct net *net, 358 struct sockaddr_un *sunname, 359 int len, unsigned int hash) 360 { 361 struct sock *s; 362 363 sk_for_each(s, &net->unx.table.buckets[hash]) { 364 struct unix_sock *u = unix_sk(s); 365 366 if (u->addr->len == len && 367 !memcmp(u->addr->name, sunname, len)) 368 return s; 369 } 370 return NULL; 371 } 372 373 static inline struct sock *unix_find_socket_byname(struct net *net, 374 struct sockaddr_un *sunname, 375 int len, unsigned int hash) 376 { 377 struct sock *s; 378 379 spin_lock(&net->unx.table.locks[hash]); 380 s = __unix_find_socket_byname(net, sunname, len, hash); 381 if (s) 382 sock_hold(s); 383 spin_unlock(&net->unx.table.locks[hash]); 384 return s; 385 } 386 387 static struct sock *unix_find_socket_byinode(struct inode *i) 388 { 389 unsigned int hash = unix_bsd_hash(i); 390 struct sock *s; 391 392 spin_lock(&bsd_socket_locks[hash]); 393 sk_for_each_bound(s, &bsd_socket_buckets[hash]) { 394 struct dentry *dentry = unix_sk(s)->path.dentry; 395 396 if (dentry && d_backing_inode(dentry) == i) { 397 sock_hold(s); 398 spin_unlock(&bsd_socket_locks[hash]); 399 return s; 400 } 401 } 402 spin_unlock(&bsd_socket_locks[hash]); 403 return NULL; 404 } 405 406 /* Support code for asymmetrically connected dgram sockets 407 * 408 * If a datagram socket is connected to a socket not itself connected 409 * to the first socket (eg, /dev/log), clients may only enqueue more 410 * messages if the present receive queue of the server socket is not 411 * "too large". This means there's a second writeability condition 412 * poll and sendmsg need to test. The dgram recv code will do a wake 413 * up on the peer_wait wait queue of a socket upon reception of a 414 * datagram which needs to be propagated to sleeping would-be writers 415 * since these might not have sent anything so far. This can't be 416 * accomplished via poll_wait because the lifetime of the server 417 * socket might be less than that of its clients if these break their 418 * association with it or if the server socket is closed while clients 419 * are still connected to it and there's no way to inform "a polling 420 * implementation" that it should let go of a certain wait queue 421 * 422 * In order to propagate a wake up, a wait_queue_entry_t of the client 423 * socket is enqueued on the peer_wait queue of the server socket 424 * whose wake function does a wake_up on the ordinary client socket 425 * wait queue. This connection is established whenever a write (or 426 * poll for write) hit the flow control condition and broken when the 427 * association to the server socket is dissolved or after a wake up 428 * was relayed. 429 */ 430 431 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags, 432 void *key) 433 { 434 struct unix_sock *u; 435 wait_queue_head_t *u_sleep; 436 437 u = container_of(q, struct unix_sock, peer_wake); 438 439 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait, 440 q); 441 u->peer_wake.private = NULL; 442 443 /* relaying can only happen while the wq still exists */ 444 u_sleep = sk_sleep(&u->sk); 445 if (u_sleep) 446 wake_up_interruptible_poll(u_sleep, key_to_poll(key)); 447 448 return 0; 449 } 450 451 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other) 452 { 453 struct unix_sock *u, *u_other; 454 int rc; 455 456 u = unix_sk(sk); 457 u_other = unix_sk(other); 458 rc = 0; 459 spin_lock(&u_other->peer_wait.lock); 460 461 if (!u->peer_wake.private) { 462 u->peer_wake.private = other; 463 __add_wait_queue(&u_other->peer_wait, &u->peer_wake); 464 465 rc = 1; 466 } 467 468 spin_unlock(&u_other->peer_wait.lock); 469 return rc; 470 } 471 472 static void unix_dgram_peer_wake_disconnect(struct sock *sk, 473 struct sock *other) 474 { 475 struct unix_sock *u, *u_other; 476 477 u = unix_sk(sk); 478 u_other = unix_sk(other); 479 spin_lock(&u_other->peer_wait.lock); 480 481 if (u->peer_wake.private == other) { 482 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake); 483 u->peer_wake.private = NULL; 484 } 485 486 spin_unlock(&u_other->peer_wait.lock); 487 } 488 489 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk, 490 struct sock *other) 491 { 492 unix_dgram_peer_wake_disconnect(sk, other); 493 wake_up_interruptible_poll(sk_sleep(sk), 494 EPOLLOUT | 495 EPOLLWRNORM | 496 EPOLLWRBAND); 497 } 498 499 /* preconditions: 500 * - unix_peer(sk) == other 501 * - association is stable 502 */ 503 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) 504 { 505 int connected; 506 507 connected = unix_dgram_peer_wake_connect(sk, other); 508 509 /* If other is SOCK_DEAD, we want to make sure we signal 510 * POLLOUT, such that a subsequent write() can get a 511 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs 512 * to other and its full, we will hang waiting for POLLOUT. 513 */ 514 if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD)) 515 return 1; 516 517 if (connected) 518 unix_dgram_peer_wake_disconnect(sk, other); 519 520 return 0; 521 } 522 523 static int unix_writable(const struct sock *sk) 524 { 525 return sk->sk_state != TCP_LISTEN && 526 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; 527 } 528 529 static void unix_write_space(struct sock *sk) 530 { 531 struct socket_wq *wq; 532 533 rcu_read_lock(); 534 if (unix_writable(sk)) { 535 wq = rcu_dereference(sk->sk_wq); 536 if (skwq_has_sleeper(wq)) 537 wake_up_interruptible_sync_poll(&wq->wait, 538 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); 539 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 540 } 541 rcu_read_unlock(); 542 } 543 544 /* When dgram socket disconnects (or changes its peer), we clear its receive 545 * queue of packets arrived from previous peer. First, it allows to do 546 * flow control based only on wmem_alloc; second, sk connected to peer 547 * may receive messages only from that peer. */ 548 static void unix_dgram_disconnected(struct sock *sk, struct sock *other) 549 { 550 if (!skb_queue_empty(&sk->sk_receive_queue)) { 551 skb_queue_purge(&sk->sk_receive_queue); 552 wake_up_interruptible_all(&unix_sk(sk)->peer_wait); 553 554 /* If one link of bidirectional dgram pipe is disconnected, 555 * we signal error. Messages are lost. Do not make this, 556 * when peer was not connected to us. 557 */ 558 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) { 559 other->sk_err = ECONNRESET; 560 sk_error_report(other); 561 } 562 } 563 other->sk_state = TCP_CLOSE; 564 } 565 566 static void unix_sock_destructor(struct sock *sk) 567 { 568 struct unix_sock *u = unix_sk(sk); 569 570 skb_queue_purge(&sk->sk_receive_queue); 571 572 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 573 if (u->oob_skb) { 574 kfree_skb(u->oob_skb); 575 u->oob_skb = NULL; 576 } 577 #endif 578 DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc)); 579 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk)); 580 DEBUG_NET_WARN_ON_ONCE(sk->sk_socket); 581 if (!sock_flag(sk, SOCK_DEAD)) { 582 pr_info("Attempt to release alive unix socket: %p\n", sk); 583 return; 584 } 585 586 if (u->addr) 587 unix_release_addr(u->addr); 588 589 atomic_long_dec(&unix_nr_socks); 590 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 591 #ifdef UNIX_REFCNT_DEBUG 592 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk, 593 atomic_long_read(&unix_nr_socks)); 594 #endif 595 } 596 597 static void unix_release_sock(struct sock *sk, int embrion) 598 { 599 struct unix_sock *u = unix_sk(sk); 600 struct sock *skpair; 601 struct sk_buff *skb; 602 struct path path; 603 int state; 604 605 unix_remove_socket(sock_net(sk), sk); 606 unix_remove_bsd_socket(sk); 607 608 /* Clear state */ 609 unix_state_lock(sk); 610 sock_orphan(sk); 611 sk->sk_shutdown = SHUTDOWN_MASK; 612 path = u->path; 613 u->path.dentry = NULL; 614 u->path.mnt = NULL; 615 state = sk->sk_state; 616 sk->sk_state = TCP_CLOSE; 617 618 skpair = unix_peer(sk); 619 unix_peer(sk) = NULL; 620 621 unix_state_unlock(sk); 622 623 wake_up_interruptible_all(&u->peer_wait); 624 625 if (skpair != NULL) { 626 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { 627 unix_state_lock(skpair); 628 /* No more writes */ 629 skpair->sk_shutdown = SHUTDOWN_MASK; 630 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) 631 skpair->sk_err = ECONNRESET; 632 unix_state_unlock(skpair); 633 skpair->sk_state_change(skpair); 634 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); 635 } 636 637 unix_dgram_peer_wake_disconnect(sk, skpair); 638 sock_put(skpair); /* It may now die */ 639 } 640 641 /* Try to flush out this socket. Throw out buffers at least */ 642 643 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { 644 if (state == TCP_LISTEN) 645 unix_release_sock(skb->sk, 1); 646 /* passed fds are erased in the kfree_skb hook */ 647 UNIXCB(skb).consumed = skb->len; 648 kfree_skb(skb); 649 } 650 651 if (path.dentry) 652 path_put(&path); 653 654 sock_put(sk); 655 656 /* ---- Socket is dead now and most probably destroyed ---- */ 657 658 /* 659 * Fixme: BSD difference: In BSD all sockets connected to us get 660 * ECONNRESET and we die on the spot. In Linux we behave 661 * like files and pipes do and wait for the last 662 * dereference. 663 * 664 * Can't we simply set sock->err? 665 * 666 * What the above comment does talk about? --ANK(980817) 667 */ 668 669 if (unix_tot_inflight) 670 unix_gc(); /* Garbage collect fds */ 671 } 672 673 static void init_peercred(struct sock *sk) 674 { 675 const struct cred *old_cred; 676 struct pid *old_pid; 677 678 spin_lock(&sk->sk_peer_lock); 679 old_pid = sk->sk_peer_pid; 680 old_cred = sk->sk_peer_cred; 681 sk->sk_peer_pid = get_pid(task_tgid(current)); 682 sk->sk_peer_cred = get_current_cred(); 683 spin_unlock(&sk->sk_peer_lock); 684 685 put_pid(old_pid); 686 put_cred(old_cred); 687 } 688 689 static void copy_peercred(struct sock *sk, struct sock *peersk) 690 { 691 const struct cred *old_cred; 692 struct pid *old_pid; 693 694 if (sk < peersk) { 695 spin_lock(&sk->sk_peer_lock); 696 spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING); 697 } else { 698 spin_lock(&peersk->sk_peer_lock); 699 spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING); 700 } 701 old_pid = sk->sk_peer_pid; 702 old_cred = sk->sk_peer_cred; 703 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); 704 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); 705 706 spin_unlock(&sk->sk_peer_lock); 707 spin_unlock(&peersk->sk_peer_lock); 708 709 put_pid(old_pid); 710 put_cred(old_cred); 711 } 712 713 static int unix_listen(struct socket *sock, int backlog) 714 { 715 int err; 716 struct sock *sk = sock->sk; 717 struct unix_sock *u = unix_sk(sk); 718 719 err = -EOPNOTSUPP; 720 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 721 goto out; /* Only stream/seqpacket sockets accept */ 722 err = -EINVAL; 723 if (!u->addr) 724 goto out; /* No listens on an unbound socket */ 725 unix_state_lock(sk); 726 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) 727 goto out_unlock; 728 if (backlog > sk->sk_max_ack_backlog) 729 wake_up_interruptible_all(&u->peer_wait); 730 sk->sk_max_ack_backlog = backlog; 731 sk->sk_state = TCP_LISTEN; 732 /* set credentials so connect can copy them */ 733 init_peercred(sk); 734 err = 0; 735 736 out_unlock: 737 unix_state_unlock(sk); 738 out: 739 return err; 740 } 741 742 static int unix_release(struct socket *); 743 static int unix_bind(struct socket *, struct sockaddr *, int); 744 static int unix_stream_connect(struct socket *, struct sockaddr *, 745 int addr_len, int flags); 746 static int unix_socketpair(struct socket *, struct socket *); 747 static int unix_accept(struct socket *, struct socket *, int, bool); 748 static int unix_getname(struct socket *, struct sockaddr *, int); 749 static __poll_t unix_poll(struct file *, struct socket *, poll_table *); 750 static __poll_t unix_dgram_poll(struct file *, struct socket *, 751 poll_table *); 752 static int unix_ioctl(struct socket *, unsigned int, unsigned long); 753 #ifdef CONFIG_COMPAT 754 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); 755 #endif 756 static int unix_shutdown(struct socket *, int); 757 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); 758 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int); 759 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset, 760 size_t size, int flags); 761 static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos, 762 struct pipe_inode_info *, size_t size, 763 unsigned int flags); 764 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t); 765 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); 766 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor); 767 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor); 768 static int unix_dgram_connect(struct socket *, struct sockaddr *, 769 int, int); 770 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); 771 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t, 772 int); 773 774 static int unix_set_peek_off(struct sock *sk, int val) 775 { 776 struct unix_sock *u = unix_sk(sk); 777 778 if (mutex_lock_interruptible(&u->iolock)) 779 return -EINTR; 780 781 sk->sk_peek_off = val; 782 mutex_unlock(&u->iolock); 783 784 return 0; 785 } 786 787 #ifdef CONFIG_PROC_FS 788 static int unix_count_nr_fds(struct sock *sk) 789 { 790 struct sk_buff *skb; 791 struct unix_sock *u; 792 int nr_fds = 0; 793 794 spin_lock(&sk->sk_receive_queue.lock); 795 skb = skb_peek(&sk->sk_receive_queue); 796 while (skb) { 797 u = unix_sk(skb->sk); 798 nr_fds += atomic_read(&u->scm_stat.nr_fds); 799 skb = skb_peek_next(skb, &sk->sk_receive_queue); 800 } 801 spin_unlock(&sk->sk_receive_queue.lock); 802 803 return nr_fds; 804 } 805 806 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock) 807 { 808 struct sock *sk = sock->sk; 809 struct unix_sock *u; 810 int nr_fds; 811 812 if (sk) { 813 u = unix_sk(sk); 814 if (sock->type == SOCK_DGRAM) { 815 nr_fds = atomic_read(&u->scm_stat.nr_fds); 816 goto out_print; 817 } 818 819 unix_state_lock(sk); 820 if (sk->sk_state != TCP_LISTEN) 821 nr_fds = atomic_read(&u->scm_stat.nr_fds); 822 else 823 nr_fds = unix_count_nr_fds(sk); 824 unix_state_unlock(sk); 825 out_print: 826 seq_printf(m, "scm_fds: %u\n", nr_fds); 827 } 828 } 829 #else 830 #define unix_show_fdinfo NULL 831 #endif 832 833 static const struct proto_ops unix_stream_ops = { 834 .family = PF_UNIX, 835 .owner = THIS_MODULE, 836 .release = unix_release, 837 .bind = unix_bind, 838 .connect = unix_stream_connect, 839 .socketpair = unix_socketpair, 840 .accept = unix_accept, 841 .getname = unix_getname, 842 .poll = unix_poll, 843 .ioctl = unix_ioctl, 844 #ifdef CONFIG_COMPAT 845 .compat_ioctl = unix_compat_ioctl, 846 #endif 847 .listen = unix_listen, 848 .shutdown = unix_shutdown, 849 .sendmsg = unix_stream_sendmsg, 850 .recvmsg = unix_stream_recvmsg, 851 .read_skb = unix_stream_read_skb, 852 .mmap = sock_no_mmap, 853 .sendpage = unix_stream_sendpage, 854 .splice_read = unix_stream_splice_read, 855 .set_peek_off = unix_set_peek_off, 856 .show_fdinfo = unix_show_fdinfo, 857 }; 858 859 static const struct proto_ops unix_dgram_ops = { 860 .family = PF_UNIX, 861 .owner = THIS_MODULE, 862 .release = unix_release, 863 .bind = unix_bind, 864 .connect = unix_dgram_connect, 865 .socketpair = unix_socketpair, 866 .accept = sock_no_accept, 867 .getname = unix_getname, 868 .poll = unix_dgram_poll, 869 .ioctl = unix_ioctl, 870 #ifdef CONFIG_COMPAT 871 .compat_ioctl = unix_compat_ioctl, 872 #endif 873 .listen = sock_no_listen, 874 .shutdown = unix_shutdown, 875 .sendmsg = unix_dgram_sendmsg, 876 .read_skb = unix_read_skb, 877 .recvmsg = unix_dgram_recvmsg, 878 .mmap = sock_no_mmap, 879 .sendpage = sock_no_sendpage, 880 .set_peek_off = unix_set_peek_off, 881 .show_fdinfo = unix_show_fdinfo, 882 }; 883 884 static const struct proto_ops unix_seqpacket_ops = { 885 .family = PF_UNIX, 886 .owner = THIS_MODULE, 887 .release = unix_release, 888 .bind = unix_bind, 889 .connect = unix_stream_connect, 890 .socketpair = unix_socketpair, 891 .accept = unix_accept, 892 .getname = unix_getname, 893 .poll = unix_dgram_poll, 894 .ioctl = unix_ioctl, 895 #ifdef CONFIG_COMPAT 896 .compat_ioctl = unix_compat_ioctl, 897 #endif 898 .listen = unix_listen, 899 .shutdown = unix_shutdown, 900 .sendmsg = unix_seqpacket_sendmsg, 901 .recvmsg = unix_seqpacket_recvmsg, 902 .mmap = sock_no_mmap, 903 .sendpage = sock_no_sendpage, 904 .set_peek_off = unix_set_peek_off, 905 .show_fdinfo = unix_show_fdinfo, 906 }; 907 908 static void unix_close(struct sock *sk, long timeout) 909 { 910 /* Nothing to do here, unix socket does not need a ->close(). 911 * This is merely for sockmap. 912 */ 913 } 914 915 static void unix_unhash(struct sock *sk) 916 { 917 /* Nothing to do here, unix socket does not need a ->unhash(). 918 * This is merely for sockmap. 919 */ 920 } 921 922 struct proto unix_dgram_proto = { 923 .name = "UNIX", 924 .owner = THIS_MODULE, 925 .obj_size = sizeof(struct unix_sock), 926 .close = unix_close, 927 #ifdef CONFIG_BPF_SYSCALL 928 .psock_update_sk_prot = unix_dgram_bpf_update_proto, 929 #endif 930 }; 931 932 struct proto unix_stream_proto = { 933 .name = "UNIX-STREAM", 934 .owner = THIS_MODULE, 935 .obj_size = sizeof(struct unix_sock), 936 .close = unix_close, 937 .unhash = unix_unhash, 938 #ifdef CONFIG_BPF_SYSCALL 939 .psock_update_sk_prot = unix_stream_bpf_update_proto, 940 #endif 941 }; 942 943 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type) 944 { 945 struct unix_sock *u; 946 struct sock *sk; 947 int err; 948 949 atomic_long_inc(&unix_nr_socks); 950 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) { 951 err = -ENFILE; 952 goto err; 953 } 954 955 if (type == SOCK_STREAM) 956 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern); 957 else /*dgram and seqpacket */ 958 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern); 959 960 if (!sk) { 961 err = -ENOMEM; 962 goto err; 963 } 964 965 sock_init_data(sock, sk); 966 967 sk->sk_hash = unix_unbound_hash(sk); 968 sk->sk_allocation = GFP_KERNEL_ACCOUNT; 969 sk->sk_write_space = unix_write_space; 970 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; 971 sk->sk_destruct = unix_sock_destructor; 972 u = unix_sk(sk); 973 u->path.dentry = NULL; 974 u->path.mnt = NULL; 975 spin_lock_init(&u->lock); 976 atomic_long_set(&u->inflight, 0); 977 INIT_LIST_HEAD(&u->link); 978 mutex_init(&u->iolock); /* single task reading lock */ 979 mutex_init(&u->bindlock); /* single task binding lock */ 980 init_waitqueue_head(&u->peer_wait); 981 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); 982 memset(&u->scm_stat, 0, sizeof(struct scm_stat)); 983 unix_insert_unbound_socket(net, sk); 984 985 sock_prot_inuse_add(net, sk->sk_prot, 1); 986 987 return sk; 988 989 err: 990 atomic_long_dec(&unix_nr_socks); 991 return ERR_PTR(err); 992 } 993 994 static int unix_create(struct net *net, struct socket *sock, int protocol, 995 int kern) 996 { 997 struct sock *sk; 998 999 if (protocol && protocol != PF_UNIX) 1000 return -EPROTONOSUPPORT; 1001 1002 sock->state = SS_UNCONNECTED; 1003 1004 switch (sock->type) { 1005 case SOCK_STREAM: 1006 sock->ops = &unix_stream_ops; 1007 break; 1008 /* 1009 * Believe it or not BSD has AF_UNIX, SOCK_RAW though 1010 * nothing uses it. 1011 */ 1012 case SOCK_RAW: 1013 sock->type = SOCK_DGRAM; 1014 fallthrough; 1015 case SOCK_DGRAM: 1016 sock->ops = &unix_dgram_ops; 1017 break; 1018 case SOCK_SEQPACKET: 1019 sock->ops = &unix_seqpacket_ops; 1020 break; 1021 default: 1022 return -ESOCKTNOSUPPORT; 1023 } 1024 1025 sk = unix_create1(net, sock, kern, sock->type); 1026 if (IS_ERR(sk)) 1027 return PTR_ERR(sk); 1028 1029 return 0; 1030 } 1031 1032 static int unix_release(struct socket *sock) 1033 { 1034 struct sock *sk = sock->sk; 1035 1036 if (!sk) 1037 return 0; 1038 1039 sk->sk_prot->close(sk, 0); 1040 unix_release_sock(sk, 0); 1041 sock->sk = NULL; 1042 1043 return 0; 1044 } 1045 1046 static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len, 1047 int type) 1048 { 1049 struct inode *inode; 1050 struct path path; 1051 struct sock *sk; 1052 int err; 1053 1054 unix_mkname_bsd(sunaddr, addr_len); 1055 err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path); 1056 if (err) 1057 goto fail; 1058 1059 err = path_permission(&path, MAY_WRITE); 1060 if (err) 1061 goto path_put; 1062 1063 err = -ECONNREFUSED; 1064 inode = d_backing_inode(path.dentry); 1065 if (!S_ISSOCK(inode->i_mode)) 1066 goto path_put; 1067 1068 sk = unix_find_socket_byinode(inode); 1069 if (!sk) 1070 goto path_put; 1071 1072 err = -EPROTOTYPE; 1073 if (sk->sk_type == type) 1074 touch_atime(&path); 1075 else 1076 goto sock_put; 1077 1078 path_put(&path); 1079 1080 return sk; 1081 1082 sock_put: 1083 sock_put(sk); 1084 path_put: 1085 path_put(&path); 1086 fail: 1087 return ERR_PTR(err); 1088 } 1089 1090 static struct sock *unix_find_abstract(struct net *net, 1091 struct sockaddr_un *sunaddr, 1092 int addr_len, int type) 1093 { 1094 unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type); 1095 struct dentry *dentry; 1096 struct sock *sk; 1097 1098 sk = unix_find_socket_byname(net, sunaddr, addr_len, hash); 1099 if (!sk) 1100 return ERR_PTR(-ECONNREFUSED); 1101 1102 dentry = unix_sk(sk)->path.dentry; 1103 if (dentry) 1104 touch_atime(&unix_sk(sk)->path); 1105 1106 return sk; 1107 } 1108 1109 static struct sock *unix_find_other(struct net *net, 1110 struct sockaddr_un *sunaddr, 1111 int addr_len, int type) 1112 { 1113 struct sock *sk; 1114 1115 if (sunaddr->sun_path[0]) 1116 sk = unix_find_bsd(sunaddr, addr_len, type); 1117 else 1118 sk = unix_find_abstract(net, sunaddr, addr_len, type); 1119 1120 return sk; 1121 } 1122 1123 static int unix_autobind(struct sock *sk) 1124 { 1125 unsigned int new_hash, old_hash = sk->sk_hash; 1126 struct unix_sock *u = unix_sk(sk); 1127 struct net *net = sock_net(sk); 1128 struct unix_address *addr; 1129 u32 lastnum, ordernum; 1130 int err; 1131 1132 err = mutex_lock_interruptible(&u->bindlock); 1133 if (err) 1134 return err; 1135 1136 if (u->addr) 1137 goto out; 1138 1139 err = -ENOMEM; 1140 addr = kzalloc(sizeof(*addr) + 1141 offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL); 1142 if (!addr) 1143 goto out; 1144 1145 addr->len = offsetof(struct sockaddr_un, sun_path) + 6; 1146 addr->name->sun_family = AF_UNIX; 1147 refcount_set(&addr->refcnt, 1); 1148 1149 ordernum = prandom_u32(); 1150 lastnum = ordernum & 0xFFFFF; 1151 retry: 1152 ordernum = (ordernum + 1) & 0xFFFFF; 1153 sprintf(addr->name->sun_path + 1, "%05x", ordernum); 1154 1155 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); 1156 unix_table_double_lock(net, old_hash, new_hash); 1157 1158 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) { 1159 unix_table_double_unlock(net, old_hash, new_hash); 1160 1161 /* __unix_find_socket_byname() may take long time if many names 1162 * are already in use. 1163 */ 1164 cond_resched(); 1165 1166 if (ordernum == lastnum) { 1167 /* Give up if all names seems to be in use. */ 1168 err = -ENOSPC; 1169 unix_release_addr(addr); 1170 goto out; 1171 } 1172 1173 goto retry; 1174 } 1175 1176 __unix_set_addr_hash(net, sk, addr, new_hash); 1177 unix_table_double_unlock(net, old_hash, new_hash); 1178 err = 0; 1179 1180 out: mutex_unlock(&u->bindlock); 1181 return err; 1182 } 1183 1184 static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, 1185 int addr_len) 1186 { 1187 umode_t mode = S_IFSOCK | 1188 (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask()); 1189 unsigned int new_hash, old_hash = sk->sk_hash; 1190 struct unix_sock *u = unix_sk(sk); 1191 struct net *net = sock_net(sk); 1192 struct user_namespace *ns; // barf... 1193 struct unix_address *addr; 1194 struct dentry *dentry; 1195 struct path parent; 1196 int err; 1197 1198 unix_mkname_bsd(sunaddr, addr_len); 1199 addr_len = strlen(sunaddr->sun_path) + 1200 offsetof(struct sockaddr_un, sun_path) + 1; 1201 1202 addr = unix_create_addr(sunaddr, addr_len); 1203 if (!addr) 1204 return -ENOMEM; 1205 1206 /* 1207 * Get the parent directory, calculate the hash for last 1208 * component. 1209 */ 1210 dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0); 1211 if (IS_ERR(dentry)) { 1212 err = PTR_ERR(dentry); 1213 goto out; 1214 } 1215 1216 /* 1217 * All right, let's create it. 1218 */ 1219 ns = mnt_user_ns(parent.mnt); 1220 err = security_path_mknod(&parent, dentry, mode, 0); 1221 if (!err) 1222 err = vfs_mknod(ns, d_inode(parent.dentry), dentry, mode, 0); 1223 if (err) 1224 goto out_path; 1225 err = mutex_lock_interruptible(&u->bindlock); 1226 if (err) 1227 goto out_unlink; 1228 if (u->addr) 1229 goto out_unlock; 1230 1231 new_hash = unix_bsd_hash(d_backing_inode(dentry)); 1232 unix_table_double_lock(net, old_hash, new_hash); 1233 u->path.mnt = mntget(parent.mnt); 1234 u->path.dentry = dget(dentry); 1235 __unix_set_addr_hash(net, sk, addr, new_hash); 1236 unix_table_double_unlock(net, old_hash, new_hash); 1237 unix_insert_bsd_socket(sk); 1238 mutex_unlock(&u->bindlock); 1239 done_path_create(&parent, dentry); 1240 return 0; 1241 1242 out_unlock: 1243 mutex_unlock(&u->bindlock); 1244 err = -EINVAL; 1245 out_unlink: 1246 /* failed after successful mknod? unlink what we'd created... */ 1247 vfs_unlink(ns, d_inode(parent.dentry), dentry, NULL); 1248 out_path: 1249 done_path_create(&parent, dentry); 1250 out: 1251 unix_release_addr(addr); 1252 return err == -EEXIST ? -EADDRINUSE : err; 1253 } 1254 1255 static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr, 1256 int addr_len) 1257 { 1258 unsigned int new_hash, old_hash = sk->sk_hash; 1259 struct unix_sock *u = unix_sk(sk); 1260 struct net *net = sock_net(sk); 1261 struct unix_address *addr; 1262 int err; 1263 1264 addr = unix_create_addr(sunaddr, addr_len); 1265 if (!addr) 1266 return -ENOMEM; 1267 1268 err = mutex_lock_interruptible(&u->bindlock); 1269 if (err) 1270 goto out; 1271 1272 if (u->addr) { 1273 err = -EINVAL; 1274 goto out_mutex; 1275 } 1276 1277 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); 1278 unix_table_double_lock(net, old_hash, new_hash); 1279 1280 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) 1281 goto out_spin; 1282 1283 __unix_set_addr_hash(net, sk, addr, new_hash); 1284 unix_table_double_unlock(net, old_hash, new_hash); 1285 mutex_unlock(&u->bindlock); 1286 return 0; 1287 1288 out_spin: 1289 unix_table_double_unlock(net, old_hash, new_hash); 1290 err = -EADDRINUSE; 1291 out_mutex: 1292 mutex_unlock(&u->bindlock); 1293 out: 1294 unix_release_addr(addr); 1295 return err; 1296 } 1297 1298 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 1299 { 1300 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 1301 struct sock *sk = sock->sk; 1302 int err; 1303 1304 if (addr_len == offsetof(struct sockaddr_un, sun_path) && 1305 sunaddr->sun_family == AF_UNIX) 1306 return unix_autobind(sk); 1307 1308 err = unix_validate_addr(sunaddr, addr_len); 1309 if (err) 1310 return err; 1311 1312 if (sunaddr->sun_path[0]) 1313 err = unix_bind_bsd(sk, sunaddr, addr_len); 1314 else 1315 err = unix_bind_abstract(sk, sunaddr, addr_len); 1316 1317 return err; 1318 } 1319 1320 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) 1321 { 1322 if (unlikely(sk1 == sk2) || !sk2) { 1323 unix_state_lock(sk1); 1324 return; 1325 } 1326 if (sk1 < sk2) { 1327 unix_state_lock(sk1); 1328 unix_state_lock_nested(sk2); 1329 } else { 1330 unix_state_lock(sk2); 1331 unix_state_lock_nested(sk1); 1332 } 1333 } 1334 1335 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) 1336 { 1337 if (unlikely(sk1 == sk2) || !sk2) { 1338 unix_state_unlock(sk1); 1339 return; 1340 } 1341 unix_state_unlock(sk1); 1342 unix_state_unlock(sk2); 1343 } 1344 1345 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, 1346 int alen, int flags) 1347 { 1348 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; 1349 struct sock *sk = sock->sk; 1350 struct sock *other; 1351 int err; 1352 1353 err = -EINVAL; 1354 if (alen < offsetofend(struct sockaddr, sa_family)) 1355 goto out; 1356 1357 if (addr->sa_family != AF_UNSPEC) { 1358 err = unix_validate_addr(sunaddr, alen); 1359 if (err) 1360 goto out; 1361 1362 if (test_bit(SOCK_PASSCRED, &sock->flags) && 1363 !unix_sk(sk)->addr) { 1364 err = unix_autobind(sk); 1365 if (err) 1366 goto out; 1367 } 1368 1369 restart: 1370 other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type); 1371 if (IS_ERR(other)) { 1372 err = PTR_ERR(other); 1373 goto out; 1374 } 1375 1376 unix_state_double_lock(sk, other); 1377 1378 /* Apparently VFS overslept socket death. Retry. */ 1379 if (sock_flag(other, SOCK_DEAD)) { 1380 unix_state_double_unlock(sk, other); 1381 sock_put(other); 1382 goto restart; 1383 } 1384 1385 err = -EPERM; 1386 if (!unix_may_send(sk, other)) 1387 goto out_unlock; 1388 1389 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 1390 if (err) 1391 goto out_unlock; 1392 1393 sk->sk_state = other->sk_state = TCP_ESTABLISHED; 1394 } else { 1395 /* 1396 * 1003.1g breaking connected state with AF_UNSPEC 1397 */ 1398 other = NULL; 1399 unix_state_double_lock(sk, other); 1400 } 1401 1402 /* 1403 * If it was connected, reconnect. 1404 */ 1405 if (unix_peer(sk)) { 1406 struct sock *old_peer = unix_peer(sk); 1407 1408 unix_peer(sk) = other; 1409 if (!other) 1410 sk->sk_state = TCP_CLOSE; 1411 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer); 1412 1413 unix_state_double_unlock(sk, other); 1414 1415 if (other != old_peer) 1416 unix_dgram_disconnected(sk, old_peer); 1417 sock_put(old_peer); 1418 } else { 1419 unix_peer(sk) = other; 1420 unix_state_double_unlock(sk, other); 1421 } 1422 1423 return 0; 1424 1425 out_unlock: 1426 unix_state_double_unlock(sk, other); 1427 sock_put(other); 1428 out: 1429 return err; 1430 } 1431 1432 static long unix_wait_for_peer(struct sock *other, long timeo) 1433 __releases(&unix_sk(other)->lock) 1434 { 1435 struct unix_sock *u = unix_sk(other); 1436 int sched; 1437 DEFINE_WAIT(wait); 1438 1439 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE); 1440 1441 sched = !sock_flag(other, SOCK_DEAD) && 1442 !(other->sk_shutdown & RCV_SHUTDOWN) && 1443 unix_recvq_full(other); 1444 1445 unix_state_unlock(other); 1446 1447 if (sched) 1448 timeo = schedule_timeout(timeo); 1449 1450 finish_wait(&u->peer_wait, &wait); 1451 return timeo; 1452 } 1453 1454 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, 1455 int addr_len, int flags) 1456 { 1457 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 1458 struct sock *sk = sock->sk, *newsk = NULL, *other = NULL; 1459 struct unix_sock *u = unix_sk(sk), *newu, *otheru; 1460 struct net *net = sock_net(sk); 1461 struct sk_buff *skb = NULL; 1462 long timeo; 1463 int err; 1464 int st; 1465 1466 err = unix_validate_addr(sunaddr, addr_len); 1467 if (err) 1468 goto out; 1469 1470 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) { 1471 err = unix_autobind(sk); 1472 if (err) 1473 goto out; 1474 } 1475 1476 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); 1477 1478 /* First of all allocate resources. 1479 If we will make it after state is locked, 1480 we will have to recheck all again in any case. 1481 */ 1482 1483 /* create new sock for complete connection */ 1484 newsk = unix_create1(net, NULL, 0, sock->type); 1485 if (IS_ERR(newsk)) { 1486 err = PTR_ERR(newsk); 1487 newsk = NULL; 1488 goto out; 1489 } 1490 1491 err = -ENOMEM; 1492 1493 /* Allocate skb for sending to listening sock */ 1494 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL); 1495 if (skb == NULL) 1496 goto out; 1497 1498 restart: 1499 /* Find listening sock. */ 1500 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type); 1501 if (IS_ERR(other)) { 1502 err = PTR_ERR(other); 1503 other = NULL; 1504 goto out; 1505 } 1506 1507 /* Latch state of peer */ 1508 unix_state_lock(other); 1509 1510 /* Apparently VFS overslept socket death. Retry. */ 1511 if (sock_flag(other, SOCK_DEAD)) { 1512 unix_state_unlock(other); 1513 sock_put(other); 1514 goto restart; 1515 } 1516 1517 err = -ECONNREFUSED; 1518 if (other->sk_state != TCP_LISTEN) 1519 goto out_unlock; 1520 if (other->sk_shutdown & RCV_SHUTDOWN) 1521 goto out_unlock; 1522 1523 if (unix_recvq_full(other)) { 1524 err = -EAGAIN; 1525 if (!timeo) 1526 goto out_unlock; 1527 1528 timeo = unix_wait_for_peer(other, timeo); 1529 1530 err = sock_intr_errno(timeo); 1531 if (signal_pending(current)) 1532 goto out; 1533 sock_put(other); 1534 goto restart; 1535 } 1536 1537 /* Latch our state. 1538 1539 It is tricky place. We need to grab our state lock and cannot 1540 drop lock on peer. It is dangerous because deadlock is 1541 possible. Connect to self case and simultaneous 1542 attempt to connect are eliminated by checking socket 1543 state. other is TCP_LISTEN, if sk is TCP_LISTEN we 1544 check this before attempt to grab lock. 1545 1546 Well, and we have to recheck the state after socket locked. 1547 */ 1548 st = sk->sk_state; 1549 1550 switch (st) { 1551 case TCP_CLOSE: 1552 /* This is ok... continue with connect */ 1553 break; 1554 case TCP_ESTABLISHED: 1555 /* Socket is already connected */ 1556 err = -EISCONN; 1557 goto out_unlock; 1558 default: 1559 err = -EINVAL; 1560 goto out_unlock; 1561 } 1562 1563 unix_state_lock_nested(sk); 1564 1565 if (sk->sk_state != st) { 1566 unix_state_unlock(sk); 1567 unix_state_unlock(other); 1568 sock_put(other); 1569 goto restart; 1570 } 1571 1572 err = security_unix_stream_connect(sk, other, newsk); 1573 if (err) { 1574 unix_state_unlock(sk); 1575 goto out_unlock; 1576 } 1577 1578 /* The way is open! Fastly set all the necessary fields... */ 1579 1580 sock_hold(sk); 1581 unix_peer(newsk) = sk; 1582 newsk->sk_state = TCP_ESTABLISHED; 1583 newsk->sk_type = sk->sk_type; 1584 init_peercred(newsk); 1585 newu = unix_sk(newsk); 1586 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); 1587 otheru = unix_sk(other); 1588 1589 /* copy address information from listening to new sock 1590 * 1591 * The contents of *(otheru->addr) and otheru->path 1592 * are seen fully set up here, since we have found 1593 * otheru in hash under its lock. Insertion into the 1594 * hash chain we'd found it in had been done in an 1595 * earlier critical area protected by the chain's lock, 1596 * the same one where we'd set *(otheru->addr) contents, 1597 * as well as otheru->path and otheru->addr itself. 1598 * 1599 * Using smp_store_release() here to set newu->addr 1600 * is enough to make those stores, as well as stores 1601 * to newu->path visible to anyone who gets newu->addr 1602 * by smp_load_acquire(). IOW, the same warranties 1603 * as for unix_sock instances bound in unix_bind() or 1604 * in unix_autobind(). 1605 */ 1606 if (otheru->path.dentry) { 1607 path_get(&otheru->path); 1608 newu->path = otheru->path; 1609 } 1610 refcount_inc(&otheru->addr->refcnt); 1611 smp_store_release(&newu->addr, otheru->addr); 1612 1613 /* Set credentials */ 1614 copy_peercred(sk, other); 1615 1616 sock->state = SS_CONNECTED; 1617 sk->sk_state = TCP_ESTABLISHED; 1618 sock_hold(newsk); 1619 1620 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */ 1621 unix_peer(sk) = newsk; 1622 1623 unix_state_unlock(sk); 1624 1625 /* take ten and send info to listening sock */ 1626 spin_lock(&other->sk_receive_queue.lock); 1627 __skb_queue_tail(&other->sk_receive_queue, skb); 1628 spin_unlock(&other->sk_receive_queue.lock); 1629 unix_state_unlock(other); 1630 other->sk_data_ready(other); 1631 sock_put(other); 1632 return 0; 1633 1634 out_unlock: 1635 if (other) 1636 unix_state_unlock(other); 1637 1638 out: 1639 kfree_skb(skb); 1640 if (newsk) 1641 unix_release_sock(newsk, 0); 1642 if (other) 1643 sock_put(other); 1644 return err; 1645 } 1646 1647 static int unix_socketpair(struct socket *socka, struct socket *sockb) 1648 { 1649 struct sock *ska = socka->sk, *skb = sockb->sk; 1650 1651 /* Join our sockets back to back */ 1652 sock_hold(ska); 1653 sock_hold(skb); 1654 unix_peer(ska) = skb; 1655 unix_peer(skb) = ska; 1656 init_peercred(ska); 1657 init_peercred(skb); 1658 1659 ska->sk_state = TCP_ESTABLISHED; 1660 skb->sk_state = TCP_ESTABLISHED; 1661 socka->state = SS_CONNECTED; 1662 sockb->state = SS_CONNECTED; 1663 return 0; 1664 } 1665 1666 static void unix_sock_inherit_flags(const struct socket *old, 1667 struct socket *new) 1668 { 1669 if (test_bit(SOCK_PASSCRED, &old->flags)) 1670 set_bit(SOCK_PASSCRED, &new->flags); 1671 if (test_bit(SOCK_PASSSEC, &old->flags)) 1672 set_bit(SOCK_PASSSEC, &new->flags); 1673 } 1674 1675 static int unix_accept(struct socket *sock, struct socket *newsock, int flags, 1676 bool kern) 1677 { 1678 struct sock *sk = sock->sk; 1679 struct sock *tsk; 1680 struct sk_buff *skb; 1681 int err; 1682 1683 err = -EOPNOTSUPP; 1684 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 1685 goto out; 1686 1687 err = -EINVAL; 1688 if (sk->sk_state != TCP_LISTEN) 1689 goto out; 1690 1691 /* If socket state is TCP_LISTEN it cannot change (for now...), 1692 * so that no locks are necessary. 1693 */ 1694 1695 skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, 1696 &err); 1697 if (!skb) { 1698 /* This means receive shutdown. */ 1699 if (err == 0) 1700 err = -EINVAL; 1701 goto out; 1702 } 1703 1704 tsk = skb->sk; 1705 skb_free_datagram(sk, skb); 1706 wake_up_interruptible(&unix_sk(sk)->peer_wait); 1707 1708 /* attach accepted sock to socket */ 1709 unix_state_lock(tsk); 1710 newsock->state = SS_CONNECTED; 1711 unix_sock_inherit_flags(sock, newsock); 1712 sock_graft(tsk, newsock); 1713 unix_state_unlock(tsk); 1714 return 0; 1715 1716 out: 1717 return err; 1718 } 1719 1720 1721 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer) 1722 { 1723 struct sock *sk = sock->sk; 1724 struct unix_address *addr; 1725 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr); 1726 int err = 0; 1727 1728 if (peer) { 1729 sk = unix_peer_get(sk); 1730 1731 err = -ENOTCONN; 1732 if (!sk) 1733 goto out; 1734 err = 0; 1735 } else { 1736 sock_hold(sk); 1737 } 1738 1739 addr = smp_load_acquire(&unix_sk(sk)->addr); 1740 if (!addr) { 1741 sunaddr->sun_family = AF_UNIX; 1742 sunaddr->sun_path[0] = 0; 1743 err = offsetof(struct sockaddr_un, sun_path); 1744 } else { 1745 err = addr->len; 1746 memcpy(sunaddr, addr->name, addr->len); 1747 } 1748 sock_put(sk); 1749 out: 1750 return err; 1751 } 1752 1753 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb) 1754 { 1755 scm->fp = scm_fp_dup(UNIXCB(skb).fp); 1756 1757 /* 1758 * Garbage collection of unix sockets starts by selecting a set of 1759 * candidate sockets which have reference only from being in flight 1760 * (total_refs == inflight_refs). This condition is checked once during 1761 * the candidate collection phase, and candidates are marked as such, so 1762 * that non-candidates can later be ignored. While inflight_refs is 1763 * protected by unix_gc_lock, total_refs (file count) is not, hence this 1764 * is an instantaneous decision. 1765 * 1766 * Once a candidate, however, the socket must not be reinstalled into a 1767 * file descriptor while the garbage collection is in progress. 1768 * 1769 * If the above conditions are met, then the directed graph of 1770 * candidates (*) does not change while unix_gc_lock is held. 1771 * 1772 * Any operations that changes the file count through file descriptors 1773 * (dup, close, sendmsg) does not change the graph since candidates are 1774 * not installed in fds. 1775 * 1776 * Dequeing a candidate via recvmsg would install it into an fd, but 1777 * that takes unix_gc_lock to decrement the inflight count, so it's 1778 * serialized with garbage collection. 1779 * 1780 * MSG_PEEK is special in that it does not change the inflight count, 1781 * yet does install the socket into an fd. The following lock/unlock 1782 * pair is to ensure serialization with garbage collection. It must be 1783 * done between incrementing the file count and installing the file into 1784 * an fd. 1785 * 1786 * If garbage collection starts after the barrier provided by the 1787 * lock/unlock, then it will see the elevated refcount and not mark this 1788 * as a candidate. If a garbage collection is already in progress 1789 * before the file count was incremented, then the lock/unlock pair will 1790 * ensure that garbage collection is finished before progressing to 1791 * installing the fd. 1792 * 1793 * (*) A -> B where B is on the queue of A or B is on the queue of C 1794 * which is on the queue of listening socket A. 1795 */ 1796 spin_lock(&unix_gc_lock); 1797 spin_unlock(&unix_gc_lock); 1798 } 1799 1800 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) 1801 { 1802 int err = 0; 1803 1804 UNIXCB(skb).pid = get_pid(scm->pid); 1805 UNIXCB(skb).uid = scm->creds.uid; 1806 UNIXCB(skb).gid = scm->creds.gid; 1807 UNIXCB(skb).fp = NULL; 1808 unix_get_secdata(scm, skb); 1809 if (scm->fp && send_fds) 1810 err = unix_attach_fds(scm, skb); 1811 1812 skb->destructor = unix_destruct_scm; 1813 return err; 1814 } 1815 1816 static bool unix_passcred_enabled(const struct socket *sock, 1817 const struct sock *other) 1818 { 1819 return test_bit(SOCK_PASSCRED, &sock->flags) || 1820 !other->sk_socket || 1821 test_bit(SOCK_PASSCRED, &other->sk_socket->flags); 1822 } 1823 1824 /* 1825 * Some apps rely on write() giving SCM_CREDENTIALS 1826 * We include credentials if source or destination socket 1827 * asserted SOCK_PASSCRED. 1828 */ 1829 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, 1830 const struct sock *other) 1831 { 1832 if (UNIXCB(skb).pid) 1833 return; 1834 if (unix_passcred_enabled(sock, other)) { 1835 UNIXCB(skb).pid = get_pid(task_tgid(current)); 1836 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); 1837 } 1838 } 1839 1840 static int maybe_init_creds(struct scm_cookie *scm, 1841 struct socket *socket, 1842 const struct sock *other) 1843 { 1844 int err; 1845 struct msghdr msg = { .msg_controllen = 0 }; 1846 1847 err = scm_send(socket, &msg, scm, false); 1848 if (err) 1849 return err; 1850 1851 if (unix_passcred_enabled(socket, other)) { 1852 scm->pid = get_pid(task_tgid(current)); 1853 current_uid_gid(&scm->creds.uid, &scm->creds.gid); 1854 } 1855 return err; 1856 } 1857 1858 static bool unix_skb_scm_eq(struct sk_buff *skb, 1859 struct scm_cookie *scm) 1860 { 1861 return UNIXCB(skb).pid == scm->pid && 1862 uid_eq(UNIXCB(skb).uid, scm->creds.uid) && 1863 gid_eq(UNIXCB(skb).gid, scm->creds.gid) && 1864 unix_secdata_eq(scm, skb); 1865 } 1866 1867 static void scm_stat_add(struct sock *sk, struct sk_buff *skb) 1868 { 1869 struct scm_fp_list *fp = UNIXCB(skb).fp; 1870 struct unix_sock *u = unix_sk(sk); 1871 1872 if (unlikely(fp && fp->count)) 1873 atomic_add(fp->count, &u->scm_stat.nr_fds); 1874 } 1875 1876 static void scm_stat_del(struct sock *sk, struct sk_buff *skb) 1877 { 1878 struct scm_fp_list *fp = UNIXCB(skb).fp; 1879 struct unix_sock *u = unix_sk(sk); 1880 1881 if (unlikely(fp && fp->count)) 1882 atomic_sub(fp->count, &u->scm_stat.nr_fds); 1883 } 1884 1885 /* 1886 * Send AF_UNIX data. 1887 */ 1888 1889 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, 1890 size_t len) 1891 { 1892 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); 1893 struct sock *sk = sock->sk, *other = NULL; 1894 struct unix_sock *u = unix_sk(sk); 1895 struct scm_cookie scm; 1896 struct sk_buff *skb; 1897 int data_len = 0; 1898 int sk_locked; 1899 long timeo; 1900 int err; 1901 1902 wait_for_unix_gc(); 1903 err = scm_send(sock, msg, &scm, false); 1904 if (err < 0) 1905 return err; 1906 1907 err = -EOPNOTSUPP; 1908 if (msg->msg_flags&MSG_OOB) 1909 goto out; 1910 1911 if (msg->msg_namelen) { 1912 err = unix_validate_addr(sunaddr, msg->msg_namelen); 1913 if (err) 1914 goto out; 1915 } else { 1916 sunaddr = NULL; 1917 err = -ENOTCONN; 1918 other = unix_peer_get(sk); 1919 if (!other) 1920 goto out; 1921 } 1922 1923 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) { 1924 err = unix_autobind(sk); 1925 if (err) 1926 goto out; 1927 } 1928 1929 err = -EMSGSIZE; 1930 if (len > sk->sk_sndbuf - 32) 1931 goto out; 1932 1933 if (len > SKB_MAX_ALLOC) { 1934 data_len = min_t(size_t, 1935 len - SKB_MAX_ALLOC, 1936 MAX_SKB_FRAGS * PAGE_SIZE); 1937 data_len = PAGE_ALIGN(data_len); 1938 1939 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE); 1940 } 1941 1942 skb = sock_alloc_send_pskb(sk, len - data_len, data_len, 1943 msg->msg_flags & MSG_DONTWAIT, &err, 1944 PAGE_ALLOC_COSTLY_ORDER); 1945 if (skb == NULL) 1946 goto out; 1947 1948 err = unix_scm_to_skb(&scm, skb, true); 1949 if (err < 0) 1950 goto out_free; 1951 1952 skb_put(skb, len - data_len); 1953 skb->data_len = data_len; 1954 skb->len = len; 1955 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len); 1956 if (err) 1957 goto out_free; 1958 1959 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); 1960 1961 restart: 1962 if (!other) { 1963 err = -ECONNRESET; 1964 if (sunaddr == NULL) 1965 goto out_free; 1966 1967 other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen, 1968 sk->sk_type); 1969 if (IS_ERR(other)) { 1970 err = PTR_ERR(other); 1971 other = NULL; 1972 goto out_free; 1973 } 1974 } 1975 1976 if (sk_filter(other, skb) < 0) { 1977 /* Toss the packet but do not return any error to the sender */ 1978 err = len; 1979 goto out_free; 1980 } 1981 1982 sk_locked = 0; 1983 unix_state_lock(other); 1984 restart_locked: 1985 err = -EPERM; 1986 if (!unix_may_send(sk, other)) 1987 goto out_unlock; 1988 1989 if (unlikely(sock_flag(other, SOCK_DEAD))) { 1990 /* 1991 * Check with 1003.1g - what should 1992 * datagram error 1993 */ 1994 unix_state_unlock(other); 1995 sock_put(other); 1996 1997 if (!sk_locked) 1998 unix_state_lock(sk); 1999 2000 err = 0; 2001 if (unix_peer(sk) == other) { 2002 unix_peer(sk) = NULL; 2003 unix_dgram_peer_wake_disconnect_wakeup(sk, other); 2004 2005 unix_state_unlock(sk); 2006 2007 sk->sk_state = TCP_CLOSE; 2008 unix_dgram_disconnected(sk, other); 2009 sock_put(other); 2010 err = -ECONNREFUSED; 2011 } else { 2012 unix_state_unlock(sk); 2013 } 2014 2015 other = NULL; 2016 if (err) 2017 goto out_free; 2018 goto restart; 2019 } 2020 2021 err = -EPIPE; 2022 if (other->sk_shutdown & RCV_SHUTDOWN) 2023 goto out_unlock; 2024 2025 if (sk->sk_type != SOCK_SEQPACKET) { 2026 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 2027 if (err) 2028 goto out_unlock; 2029 } 2030 2031 /* other == sk && unix_peer(other) != sk if 2032 * - unix_peer(sk) == NULL, destination address bound to sk 2033 * - unix_peer(sk) == sk by time of get but disconnected before lock 2034 */ 2035 if (other != sk && 2036 unlikely(unix_peer(other) != sk && 2037 unix_recvq_full_lockless(other))) { 2038 if (timeo) { 2039 timeo = unix_wait_for_peer(other, timeo); 2040 2041 err = sock_intr_errno(timeo); 2042 if (signal_pending(current)) 2043 goto out_free; 2044 2045 goto restart; 2046 } 2047 2048 if (!sk_locked) { 2049 unix_state_unlock(other); 2050 unix_state_double_lock(sk, other); 2051 } 2052 2053 if (unix_peer(sk) != other || 2054 unix_dgram_peer_wake_me(sk, other)) { 2055 err = -EAGAIN; 2056 sk_locked = 1; 2057 goto out_unlock; 2058 } 2059 2060 if (!sk_locked) { 2061 sk_locked = 1; 2062 goto restart_locked; 2063 } 2064 } 2065 2066 if (unlikely(sk_locked)) 2067 unix_state_unlock(sk); 2068 2069 if (sock_flag(other, SOCK_RCVTSTAMP)) 2070 __net_timestamp(skb); 2071 maybe_add_creds(skb, sock, other); 2072 scm_stat_add(other, skb); 2073 skb_queue_tail(&other->sk_receive_queue, skb); 2074 unix_state_unlock(other); 2075 other->sk_data_ready(other); 2076 sock_put(other); 2077 scm_destroy(&scm); 2078 return len; 2079 2080 out_unlock: 2081 if (sk_locked) 2082 unix_state_unlock(sk); 2083 unix_state_unlock(other); 2084 out_free: 2085 kfree_skb(skb); 2086 out: 2087 if (other) 2088 sock_put(other); 2089 scm_destroy(&scm); 2090 return err; 2091 } 2092 2093 /* We use paged skbs for stream sockets, and limit occupancy to 32768 2094 * bytes, and a minimum of a full page. 2095 */ 2096 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) 2097 2098 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2099 static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other) 2100 { 2101 struct unix_sock *ousk = unix_sk(other); 2102 struct sk_buff *skb; 2103 int err = 0; 2104 2105 skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err); 2106 2107 if (!skb) 2108 return err; 2109 2110 skb_put(skb, 1); 2111 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1); 2112 2113 if (err) { 2114 kfree_skb(skb); 2115 return err; 2116 } 2117 2118 unix_state_lock(other); 2119 2120 if (sock_flag(other, SOCK_DEAD) || 2121 (other->sk_shutdown & RCV_SHUTDOWN)) { 2122 unix_state_unlock(other); 2123 kfree_skb(skb); 2124 return -EPIPE; 2125 } 2126 2127 maybe_add_creds(skb, sock, other); 2128 skb_get(skb); 2129 2130 if (ousk->oob_skb) 2131 consume_skb(ousk->oob_skb); 2132 2133 WRITE_ONCE(ousk->oob_skb, skb); 2134 2135 scm_stat_add(other, skb); 2136 skb_queue_tail(&other->sk_receive_queue, skb); 2137 sk_send_sigurg(other); 2138 unix_state_unlock(other); 2139 other->sk_data_ready(other); 2140 2141 return err; 2142 } 2143 #endif 2144 2145 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, 2146 size_t len) 2147 { 2148 struct sock *sk = sock->sk; 2149 struct sock *other = NULL; 2150 int err, size; 2151 struct sk_buff *skb; 2152 int sent = 0; 2153 struct scm_cookie scm; 2154 bool fds_sent = false; 2155 int data_len; 2156 2157 wait_for_unix_gc(); 2158 err = scm_send(sock, msg, &scm, false); 2159 if (err < 0) 2160 return err; 2161 2162 err = -EOPNOTSUPP; 2163 if (msg->msg_flags & MSG_OOB) { 2164 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2165 if (len) 2166 len--; 2167 else 2168 #endif 2169 goto out_err; 2170 } 2171 2172 if (msg->msg_namelen) { 2173 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; 2174 goto out_err; 2175 } else { 2176 err = -ENOTCONN; 2177 other = unix_peer(sk); 2178 if (!other) 2179 goto out_err; 2180 } 2181 2182 if (sk->sk_shutdown & SEND_SHUTDOWN) 2183 goto pipe_err; 2184 2185 while (sent < len) { 2186 size = len - sent; 2187 2188 /* Keep two messages in the pipe so it schedules better */ 2189 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64); 2190 2191 /* allow fallback to order-0 allocations */ 2192 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ); 2193 2194 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0)); 2195 2196 data_len = min_t(size_t, size, PAGE_ALIGN(data_len)); 2197 2198 skb = sock_alloc_send_pskb(sk, size - data_len, data_len, 2199 msg->msg_flags & MSG_DONTWAIT, &err, 2200 get_order(UNIX_SKB_FRAGS_SZ)); 2201 if (!skb) 2202 goto out_err; 2203 2204 /* Only send the fds in the first buffer */ 2205 err = unix_scm_to_skb(&scm, skb, !fds_sent); 2206 if (err < 0) { 2207 kfree_skb(skb); 2208 goto out_err; 2209 } 2210 fds_sent = true; 2211 2212 skb_put(skb, size - data_len); 2213 skb->data_len = data_len; 2214 skb->len = size; 2215 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); 2216 if (err) { 2217 kfree_skb(skb); 2218 goto out_err; 2219 } 2220 2221 unix_state_lock(other); 2222 2223 if (sock_flag(other, SOCK_DEAD) || 2224 (other->sk_shutdown & RCV_SHUTDOWN)) 2225 goto pipe_err_free; 2226 2227 maybe_add_creds(skb, sock, other); 2228 scm_stat_add(other, skb); 2229 skb_queue_tail(&other->sk_receive_queue, skb); 2230 unix_state_unlock(other); 2231 other->sk_data_ready(other); 2232 sent += size; 2233 } 2234 2235 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2236 if (msg->msg_flags & MSG_OOB) { 2237 err = queue_oob(sock, msg, other); 2238 if (err) 2239 goto out_err; 2240 sent++; 2241 } 2242 #endif 2243 2244 scm_destroy(&scm); 2245 2246 return sent; 2247 2248 pipe_err_free: 2249 unix_state_unlock(other); 2250 kfree_skb(skb); 2251 pipe_err: 2252 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL)) 2253 send_sig(SIGPIPE, current, 0); 2254 err = -EPIPE; 2255 out_err: 2256 scm_destroy(&scm); 2257 return sent ? : err; 2258 } 2259 2260 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, 2261 int offset, size_t size, int flags) 2262 { 2263 int err; 2264 bool send_sigpipe = false; 2265 bool init_scm = true; 2266 struct scm_cookie scm; 2267 struct sock *other, *sk = socket->sk; 2268 struct sk_buff *skb, *newskb = NULL, *tail = NULL; 2269 2270 if (flags & MSG_OOB) 2271 return -EOPNOTSUPP; 2272 2273 other = unix_peer(sk); 2274 if (!other || sk->sk_state != TCP_ESTABLISHED) 2275 return -ENOTCONN; 2276 2277 if (false) { 2278 alloc_skb: 2279 unix_state_unlock(other); 2280 mutex_unlock(&unix_sk(other)->iolock); 2281 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT, 2282 &err, 0); 2283 if (!newskb) 2284 goto err; 2285 } 2286 2287 /* we must acquire iolock as we modify already present 2288 * skbs in the sk_receive_queue and mess with skb->len 2289 */ 2290 err = mutex_lock_interruptible(&unix_sk(other)->iolock); 2291 if (err) { 2292 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS; 2293 goto err; 2294 } 2295 2296 if (sk->sk_shutdown & SEND_SHUTDOWN) { 2297 err = -EPIPE; 2298 send_sigpipe = true; 2299 goto err_unlock; 2300 } 2301 2302 unix_state_lock(other); 2303 2304 if (sock_flag(other, SOCK_DEAD) || 2305 other->sk_shutdown & RCV_SHUTDOWN) { 2306 err = -EPIPE; 2307 send_sigpipe = true; 2308 goto err_state_unlock; 2309 } 2310 2311 if (init_scm) { 2312 err = maybe_init_creds(&scm, socket, other); 2313 if (err) 2314 goto err_state_unlock; 2315 init_scm = false; 2316 } 2317 2318 skb = skb_peek_tail(&other->sk_receive_queue); 2319 if (tail && tail == skb) { 2320 skb = newskb; 2321 } else if (!skb || !unix_skb_scm_eq(skb, &scm)) { 2322 if (newskb) { 2323 skb = newskb; 2324 } else { 2325 tail = skb; 2326 goto alloc_skb; 2327 } 2328 } else if (newskb) { 2329 /* this is fast path, we don't necessarily need to 2330 * call to kfree_skb even though with newskb == NULL 2331 * this - does no harm 2332 */ 2333 consume_skb(newskb); 2334 newskb = NULL; 2335 } 2336 2337 if (skb_append_pagefrags(skb, page, offset, size)) { 2338 tail = skb; 2339 goto alloc_skb; 2340 } 2341 2342 skb->len += size; 2343 skb->data_len += size; 2344 skb->truesize += size; 2345 refcount_add(size, &sk->sk_wmem_alloc); 2346 2347 if (newskb) { 2348 err = unix_scm_to_skb(&scm, skb, false); 2349 if (err) 2350 goto err_state_unlock; 2351 spin_lock(&other->sk_receive_queue.lock); 2352 __skb_queue_tail(&other->sk_receive_queue, newskb); 2353 spin_unlock(&other->sk_receive_queue.lock); 2354 } 2355 2356 unix_state_unlock(other); 2357 mutex_unlock(&unix_sk(other)->iolock); 2358 2359 other->sk_data_ready(other); 2360 scm_destroy(&scm); 2361 return size; 2362 2363 err_state_unlock: 2364 unix_state_unlock(other); 2365 err_unlock: 2366 mutex_unlock(&unix_sk(other)->iolock); 2367 err: 2368 kfree_skb(newskb); 2369 if (send_sigpipe && !(flags & MSG_NOSIGNAL)) 2370 send_sig(SIGPIPE, current, 0); 2371 if (!init_scm) 2372 scm_destroy(&scm); 2373 return err; 2374 } 2375 2376 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg, 2377 size_t len) 2378 { 2379 int err; 2380 struct sock *sk = sock->sk; 2381 2382 err = sock_error(sk); 2383 if (err) 2384 return err; 2385 2386 if (sk->sk_state != TCP_ESTABLISHED) 2387 return -ENOTCONN; 2388 2389 if (msg->msg_namelen) 2390 msg->msg_namelen = 0; 2391 2392 return unix_dgram_sendmsg(sock, msg, len); 2393 } 2394 2395 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg, 2396 size_t size, int flags) 2397 { 2398 struct sock *sk = sock->sk; 2399 2400 if (sk->sk_state != TCP_ESTABLISHED) 2401 return -ENOTCONN; 2402 2403 return unix_dgram_recvmsg(sock, msg, size, flags); 2404 } 2405 2406 static void unix_copy_addr(struct msghdr *msg, struct sock *sk) 2407 { 2408 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr); 2409 2410 if (addr) { 2411 msg->msg_namelen = addr->len; 2412 memcpy(msg->msg_name, addr->name, addr->len); 2413 } 2414 } 2415 2416 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, 2417 int flags) 2418 { 2419 struct scm_cookie scm; 2420 struct socket *sock = sk->sk_socket; 2421 struct unix_sock *u = unix_sk(sk); 2422 struct sk_buff *skb, *last; 2423 long timeo; 2424 int skip; 2425 int err; 2426 2427 err = -EOPNOTSUPP; 2428 if (flags&MSG_OOB) 2429 goto out; 2430 2431 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 2432 2433 do { 2434 mutex_lock(&u->iolock); 2435 2436 skip = sk_peek_offset(sk, flags); 2437 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags, 2438 &skip, &err, &last); 2439 if (skb) { 2440 if (!(flags & MSG_PEEK)) 2441 scm_stat_del(sk, skb); 2442 break; 2443 } 2444 2445 mutex_unlock(&u->iolock); 2446 2447 if (err != -EAGAIN) 2448 break; 2449 } while (timeo && 2450 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue, 2451 &err, &timeo, last)); 2452 2453 if (!skb) { /* implies iolock unlocked */ 2454 unix_state_lock(sk); 2455 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */ 2456 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN && 2457 (sk->sk_shutdown & RCV_SHUTDOWN)) 2458 err = 0; 2459 unix_state_unlock(sk); 2460 goto out; 2461 } 2462 2463 if (wq_has_sleeper(&u->peer_wait)) 2464 wake_up_interruptible_sync_poll(&u->peer_wait, 2465 EPOLLOUT | EPOLLWRNORM | 2466 EPOLLWRBAND); 2467 2468 if (msg->msg_name) 2469 unix_copy_addr(msg, skb->sk); 2470 2471 if (size > skb->len - skip) 2472 size = skb->len - skip; 2473 else if (size < skb->len - skip) 2474 msg->msg_flags |= MSG_TRUNC; 2475 2476 err = skb_copy_datagram_msg(skb, skip, msg, size); 2477 if (err) 2478 goto out_free; 2479 2480 if (sock_flag(sk, SOCK_RCVTSTAMP)) 2481 __sock_recv_timestamp(msg, sk, skb); 2482 2483 memset(&scm, 0, sizeof(scm)); 2484 2485 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2486 unix_set_secdata(&scm, skb); 2487 2488 if (!(flags & MSG_PEEK)) { 2489 if (UNIXCB(skb).fp) 2490 unix_detach_fds(&scm, skb); 2491 2492 sk_peek_offset_bwd(sk, skb->len); 2493 } else { 2494 /* It is questionable: on PEEK we could: 2495 - do not return fds - good, but too simple 8) 2496 - return fds, and do not return them on read (old strategy, 2497 apparently wrong) 2498 - clone fds (I chose it for now, it is the most universal 2499 solution) 2500 2501 POSIX 1003.1g does not actually define this clearly 2502 at all. POSIX 1003.1g doesn't define a lot of things 2503 clearly however! 2504 2505 */ 2506 2507 sk_peek_offset_fwd(sk, size); 2508 2509 if (UNIXCB(skb).fp) 2510 unix_peek_fds(&scm, skb); 2511 } 2512 err = (flags & MSG_TRUNC) ? skb->len - skip : size; 2513 2514 scm_recv(sock, msg, &scm, flags); 2515 2516 out_free: 2517 skb_free_datagram(sk, skb); 2518 mutex_unlock(&u->iolock); 2519 out: 2520 return err; 2521 } 2522 2523 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, 2524 int flags) 2525 { 2526 struct sock *sk = sock->sk; 2527 2528 #ifdef CONFIG_BPF_SYSCALL 2529 const struct proto *prot = READ_ONCE(sk->sk_prot); 2530 2531 if (prot != &unix_dgram_proto) 2532 return prot->recvmsg(sk, msg, size, flags, NULL); 2533 #endif 2534 return __unix_dgram_recvmsg(sk, msg, size, flags); 2535 } 2536 2537 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor) 2538 { 2539 int copied = 0; 2540 2541 while (1) { 2542 struct unix_sock *u = unix_sk(sk); 2543 struct sk_buff *skb; 2544 int used, err; 2545 2546 mutex_lock(&u->iolock); 2547 skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err); 2548 mutex_unlock(&u->iolock); 2549 if (!skb) 2550 return err; 2551 2552 used = recv_actor(sk, skb); 2553 if (used <= 0) { 2554 if (!copied) 2555 copied = used; 2556 kfree_skb(skb); 2557 break; 2558 } else if (used <= skb->len) { 2559 copied += used; 2560 } 2561 2562 kfree_skb(skb); 2563 break; 2564 } 2565 2566 return copied; 2567 } 2568 2569 /* 2570 * Sleep until more data has arrived. But check for races.. 2571 */ 2572 static long unix_stream_data_wait(struct sock *sk, long timeo, 2573 struct sk_buff *last, unsigned int last_len, 2574 bool freezable) 2575 { 2576 struct sk_buff *tail; 2577 DEFINE_WAIT(wait); 2578 2579 unix_state_lock(sk); 2580 2581 for (;;) { 2582 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 2583 2584 tail = skb_peek_tail(&sk->sk_receive_queue); 2585 if (tail != last || 2586 (tail && tail->len != last_len) || 2587 sk->sk_err || 2588 (sk->sk_shutdown & RCV_SHUTDOWN) || 2589 signal_pending(current) || 2590 !timeo) 2591 break; 2592 2593 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2594 unix_state_unlock(sk); 2595 if (freezable) 2596 timeo = freezable_schedule_timeout(timeo); 2597 else 2598 timeo = schedule_timeout(timeo); 2599 unix_state_lock(sk); 2600 2601 if (sock_flag(sk, SOCK_DEAD)) 2602 break; 2603 2604 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2605 } 2606 2607 finish_wait(sk_sleep(sk), &wait); 2608 unix_state_unlock(sk); 2609 return timeo; 2610 } 2611 2612 static unsigned int unix_skb_len(const struct sk_buff *skb) 2613 { 2614 return skb->len - UNIXCB(skb).consumed; 2615 } 2616 2617 struct unix_stream_read_state { 2618 int (*recv_actor)(struct sk_buff *, int, int, 2619 struct unix_stream_read_state *); 2620 struct socket *socket; 2621 struct msghdr *msg; 2622 struct pipe_inode_info *pipe; 2623 size_t size; 2624 int flags; 2625 unsigned int splice_flags; 2626 }; 2627 2628 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2629 static int unix_stream_recv_urg(struct unix_stream_read_state *state) 2630 { 2631 struct socket *sock = state->socket; 2632 struct sock *sk = sock->sk; 2633 struct unix_sock *u = unix_sk(sk); 2634 int chunk = 1; 2635 struct sk_buff *oob_skb; 2636 2637 mutex_lock(&u->iolock); 2638 unix_state_lock(sk); 2639 2640 if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) { 2641 unix_state_unlock(sk); 2642 mutex_unlock(&u->iolock); 2643 return -EINVAL; 2644 } 2645 2646 oob_skb = u->oob_skb; 2647 2648 if (!(state->flags & MSG_PEEK)) 2649 WRITE_ONCE(u->oob_skb, NULL); 2650 2651 unix_state_unlock(sk); 2652 2653 chunk = state->recv_actor(oob_skb, 0, chunk, state); 2654 2655 if (!(state->flags & MSG_PEEK)) { 2656 UNIXCB(oob_skb).consumed += 1; 2657 kfree_skb(oob_skb); 2658 } 2659 2660 mutex_unlock(&u->iolock); 2661 2662 if (chunk < 0) 2663 return -EFAULT; 2664 2665 state->msg->msg_flags |= MSG_OOB; 2666 return 1; 2667 } 2668 2669 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, 2670 int flags, int copied) 2671 { 2672 struct unix_sock *u = unix_sk(sk); 2673 2674 if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) { 2675 skb_unlink(skb, &sk->sk_receive_queue); 2676 consume_skb(skb); 2677 skb = NULL; 2678 } else { 2679 if (skb == u->oob_skb) { 2680 if (copied) { 2681 skb = NULL; 2682 } else if (sock_flag(sk, SOCK_URGINLINE)) { 2683 if (!(flags & MSG_PEEK)) { 2684 WRITE_ONCE(u->oob_skb, NULL); 2685 consume_skb(skb); 2686 } 2687 } else if (!(flags & MSG_PEEK)) { 2688 skb_unlink(skb, &sk->sk_receive_queue); 2689 consume_skb(skb); 2690 skb = skb_peek(&sk->sk_receive_queue); 2691 } 2692 } 2693 } 2694 return skb; 2695 } 2696 #endif 2697 2698 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor) 2699 { 2700 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) 2701 return -ENOTCONN; 2702 2703 return unix_read_skb(sk, recv_actor); 2704 } 2705 2706 static int unix_stream_read_generic(struct unix_stream_read_state *state, 2707 bool freezable) 2708 { 2709 struct scm_cookie scm; 2710 struct socket *sock = state->socket; 2711 struct sock *sk = sock->sk; 2712 struct unix_sock *u = unix_sk(sk); 2713 int copied = 0; 2714 int flags = state->flags; 2715 int noblock = flags & MSG_DONTWAIT; 2716 bool check_creds = false; 2717 int target; 2718 int err = 0; 2719 long timeo; 2720 int skip; 2721 size_t size = state->size; 2722 unsigned int last_len; 2723 2724 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) { 2725 err = -EINVAL; 2726 goto out; 2727 } 2728 2729 if (unlikely(flags & MSG_OOB)) { 2730 err = -EOPNOTSUPP; 2731 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2732 err = unix_stream_recv_urg(state); 2733 #endif 2734 goto out; 2735 } 2736 2737 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); 2738 timeo = sock_rcvtimeo(sk, noblock); 2739 2740 memset(&scm, 0, sizeof(scm)); 2741 2742 /* Lock the socket to prevent queue disordering 2743 * while sleeps in memcpy_tomsg 2744 */ 2745 mutex_lock(&u->iolock); 2746 2747 skip = max(sk_peek_offset(sk, flags), 0); 2748 2749 do { 2750 int chunk; 2751 bool drop_skb; 2752 struct sk_buff *skb, *last; 2753 2754 redo: 2755 unix_state_lock(sk); 2756 if (sock_flag(sk, SOCK_DEAD)) { 2757 err = -ECONNRESET; 2758 goto unlock; 2759 } 2760 last = skb = skb_peek(&sk->sk_receive_queue); 2761 last_len = last ? last->len : 0; 2762 2763 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2764 if (skb) { 2765 skb = manage_oob(skb, sk, flags, copied); 2766 if (!skb) { 2767 unix_state_unlock(sk); 2768 if (copied) 2769 break; 2770 goto redo; 2771 } 2772 } 2773 #endif 2774 again: 2775 if (skb == NULL) { 2776 if (copied >= target) 2777 goto unlock; 2778 2779 /* 2780 * POSIX 1003.1g mandates this order. 2781 */ 2782 2783 err = sock_error(sk); 2784 if (err) 2785 goto unlock; 2786 if (sk->sk_shutdown & RCV_SHUTDOWN) 2787 goto unlock; 2788 2789 unix_state_unlock(sk); 2790 if (!timeo) { 2791 err = -EAGAIN; 2792 break; 2793 } 2794 2795 mutex_unlock(&u->iolock); 2796 2797 timeo = unix_stream_data_wait(sk, timeo, last, 2798 last_len, freezable); 2799 2800 if (signal_pending(current)) { 2801 err = sock_intr_errno(timeo); 2802 scm_destroy(&scm); 2803 goto out; 2804 } 2805 2806 mutex_lock(&u->iolock); 2807 goto redo; 2808 unlock: 2809 unix_state_unlock(sk); 2810 break; 2811 } 2812 2813 while (skip >= unix_skb_len(skb)) { 2814 skip -= unix_skb_len(skb); 2815 last = skb; 2816 last_len = skb->len; 2817 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2818 if (!skb) 2819 goto again; 2820 } 2821 2822 unix_state_unlock(sk); 2823 2824 if (check_creds) { 2825 /* Never glue messages from different writers */ 2826 if (!unix_skb_scm_eq(skb, &scm)) 2827 break; 2828 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { 2829 /* Copy credentials */ 2830 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2831 unix_set_secdata(&scm, skb); 2832 check_creds = true; 2833 } 2834 2835 /* Copy address just once */ 2836 if (state->msg && state->msg->msg_name) { 2837 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, 2838 state->msg->msg_name); 2839 unix_copy_addr(state->msg, skb->sk); 2840 sunaddr = NULL; 2841 } 2842 2843 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size); 2844 skb_get(skb); 2845 chunk = state->recv_actor(skb, skip, chunk, state); 2846 drop_skb = !unix_skb_len(skb); 2847 /* skb is only safe to use if !drop_skb */ 2848 consume_skb(skb); 2849 if (chunk < 0) { 2850 if (copied == 0) 2851 copied = -EFAULT; 2852 break; 2853 } 2854 copied += chunk; 2855 size -= chunk; 2856 2857 if (drop_skb) { 2858 /* the skb was touched by a concurrent reader; 2859 * we should not expect anything from this skb 2860 * anymore and assume it invalid - we can be 2861 * sure it was dropped from the socket queue 2862 * 2863 * let's report a short read 2864 */ 2865 err = 0; 2866 break; 2867 } 2868 2869 /* Mark read part of skb as used */ 2870 if (!(flags & MSG_PEEK)) { 2871 UNIXCB(skb).consumed += chunk; 2872 2873 sk_peek_offset_bwd(sk, chunk); 2874 2875 if (UNIXCB(skb).fp) { 2876 scm_stat_del(sk, skb); 2877 unix_detach_fds(&scm, skb); 2878 } 2879 2880 if (unix_skb_len(skb)) 2881 break; 2882 2883 skb_unlink(skb, &sk->sk_receive_queue); 2884 consume_skb(skb); 2885 2886 if (scm.fp) 2887 break; 2888 } else { 2889 /* It is questionable, see note in unix_dgram_recvmsg. 2890 */ 2891 if (UNIXCB(skb).fp) 2892 unix_peek_fds(&scm, skb); 2893 2894 sk_peek_offset_fwd(sk, chunk); 2895 2896 if (UNIXCB(skb).fp) 2897 break; 2898 2899 skip = 0; 2900 last = skb; 2901 last_len = skb->len; 2902 unix_state_lock(sk); 2903 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2904 if (skb) 2905 goto again; 2906 unix_state_unlock(sk); 2907 break; 2908 } 2909 } while (size); 2910 2911 mutex_unlock(&u->iolock); 2912 if (state->msg) 2913 scm_recv(sock, state->msg, &scm, flags); 2914 else 2915 scm_destroy(&scm); 2916 out: 2917 return copied ? : err; 2918 } 2919 2920 static int unix_stream_read_actor(struct sk_buff *skb, 2921 int skip, int chunk, 2922 struct unix_stream_read_state *state) 2923 { 2924 int ret; 2925 2926 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip, 2927 state->msg, chunk); 2928 return ret ?: chunk; 2929 } 2930 2931 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg, 2932 size_t size, int flags) 2933 { 2934 struct unix_stream_read_state state = { 2935 .recv_actor = unix_stream_read_actor, 2936 .socket = sk->sk_socket, 2937 .msg = msg, 2938 .size = size, 2939 .flags = flags 2940 }; 2941 2942 return unix_stream_read_generic(&state, true); 2943 } 2944 2945 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, 2946 size_t size, int flags) 2947 { 2948 struct unix_stream_read_state state = { 2949 .recv_actor = unix_stream_read_actor, 2950 .socket = sock, 2951 .msg = msg, 2952 .size = size, 2953 .flags = flags 2954 }; 2955 2956 #ifdef CONFIG_BPF_SYSCALL 2957 struct sock *sk = sock->sk; 2958 const struct proto *prot = READ_ONCE(sk->sk_prot); 2959 2960 if (prot != &unix_stream_proto) 2961 return prot->recvmsg(sk, msg, size, flags, NULL); 2962 #endif 2963 return unix_stream_read_generic(&state, true); 2964 } 2965 2966 static int unix_stream_splice_actor(struct sk_buff *skb, 2967 int skip, int chunk, 2968 struct unix_stream_read_state *state) 2969 { 2970 return skb_splice_bits(skb, state->socket->sk, 2971 UNIXCB(skb).consumed + skip, 2972 state->pipe, chunk, state->splice_flags); 2973 } 2974 2975 static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos, 2976 struct pipe_inode_info *pipe, 2977 size_t size, unsigned int flags) 2978 { 2979 struct unix_stream_read_state state = { 2980 .recv_actor = unix_stream_splice_actor, 2981 .socket = sock, 2982 .pipe = pipe, 2983 .size = size, 2984 .splice_flags = flags, 2985 }; 2986 2987 if (unlikely(*ppos)) 2988 return -ESPIPE; 2989 2990 if (sock->file->f_flags & O_NONBLOCK || 2991 flags & SPLICE_F_NONBLOCK) 2992 state.flags = MSG_DONTWAIT; 2993 2994 return unix_stream_read_generic(&state, false); 2995 } 2996 2997 static int unix_shutdown(struct socket *sock, int mode) 2998 { 2999 struct sock *sk = sock->sk; 3000 struct sock *other; 3001 3002 if (mode < SHUT_RD || mode > SHUT_RDWR) 3003 return -EINVAL; 3004 /* This maps: 3005 * SHUT_RD (0) -> RCV_SHUTDOWN (1) 3006 * SHUT_WR (1) -> SEND_SHUTDOWN (2) 3007 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3) 3008 */ 3009 ++mode; 3010 3011 unix_state_lock(sk); 3012 sk->sk_shutdown |= mode; 3013 other = unix_peer(sk); 3014 if (other) 3015 sock_hold(other); 3016 unix_state_unlock(sk); 3017 sk->sk_state_change(sk); 3018 3019 if (other && 3020 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) { 3021 3022 int peer_mode = 0; 3023 const struct proto *prot = READ_ONCE(other->sk_prot); 3024 3025 if (prot->unhash) 3026 prot->unhash(other); 3027 if (mode&RCV_SHUTDOWN) 3028 peer_mode |= SEND_SHUTDOWN; 3029 if (mode&SEND_SHUTDOWN) 3030 peer_mode |= RCV_SHUTDOWN; 3031 unix_state_lock(other); 3032 other->sk_shutdown |= peer_mode; 3033 unix_state_unlock(other); 3034 other->sk_state_change(other); 3035 if (peer_mode == SHUTDOWN_MASK) 3036 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); 3037 else if (peer_mode & RCV_SHUTDOWN) 3038 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); 3039 } 3040 if (other) 3041 sock_put(other); 3042 3043 return 0; 3044 } 3045 3046 long unix_inq_len(struct sock *sk) 3047 { 3048 struct sk_buff *skb; 3049 long amount = 0; 3050 3051 if (sk->sk_state == TCP_LISTEN) 3052 return -EINVAL; 3053 3054 spin_lock(&sk->sk_receive_queue.lock); 3055 if (sk->sk_type == SOCK_STREAM || 3056 sk->sk_type == SOCK_SEQPACKET) { 3057 skb_queue_walk(&sk->sk_receive_queue, skb) 3058 amount += unix_skb_len(skb); 3059 } else { 3060 skb = skb_peek(&sk->sk_receive_queue); 3061 if (skb) 3062 amount = skb->len; 3063 } 3064 spin_unlock(&sk->sk_receive_queue.lock); 3065 3066 return amount; 3067 } 3068 EXPORT_SYMBOL_GPL(unix_inq_len); 3069 3070 long unix_outq_len(struct sock *sk) 3071 { 3072 return sk_wmem_alloc_get(sk); 3073 } 3074 EXPORT_SYMBOL_GPL(unix_outq_len); 3075 3076 static int unix_open_file(struct sock *sk) 3077 { 3078 struct path path; 3079 struct file *f; 3080 int fd; 3081 3082 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 3083 return -EPERM; 3084 3085 if (!smp_load_acquire(&unix_sk(sk)->addr)) 3086 return -ENOENT; 3087 3088 path = unix_sk(sk)->path; 3089 if (!path.dentry) 3090 return -ENOENT; 3091 3092 path_get(&path); 3093 3094 fd = get_unused_fd_flags(O_CLOEXEC); 3095 if (fd < 0) 3096 goto out; 3097 3098 f = dentry_open(&path, O_PATH, current_cred()); 3099 if (IS_ERR(f)) { 3100 put_unused_fd(fd); 3101 fd = PTR_ERR(f); 3102 goto out; 3103 } 3104 3105 fd_install(fd, f); 3106 out: 3107 path_put(&path); 3108 3109 return fd; 3110 } 3111 3112 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 3113 { 3114 struct sock *sk = sock->sk; 3115 long amount = 0; 3116 int err; 3117 3118 switch (cmd) { 3119 case SIOCOUTQ: 3120 amount = unix_outq_len(sk); 3121 err = put_user(amount, (int __user *)arg); 3122 break; 3123 case SIOCINQ: 3124 amount = unix_inq_len(sk); 3125 if (amount < 0) 3126 err = amount; 3127 else 3128 err = put_user(amount, (int __user *)arg); 3129 break; 3130 case SIOCUNIXFILE: 3131 err = unix_open_file(sk); 3132 break; 3133 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 3134 case SIOCATMARK: 3135 { 3136 struct sk_buff *skb; 3137 int answ = 0; 3138 3139 skb = skb_peek(&sk->sk_receive_queue); 3140 if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb)) 3141 answ = 1; 3142 err = put_user(answ, (int __user *)arg); 3143 } 3144 break; 3145 #endif 3146 default: 3147 err = -ENOIOCTLCMD; 3148 break; 3149 } 3150 return err; 3151 } 3152 3153 #ifdef CONFIG_COMPAT 3154 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 3155 { 3156 return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg)); 3157 } 3158 #endif 3159 3160 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait) 3161 { 3162 struct sock *sk = sock->sk; 3163 __poll_t mask; 3164 3165 sock_poll_wait(file, sock, wait); 3166 mask = 0; 3167 3168 /* exceptional events? */ 3169 if (sk->sk_err) 3170 mask |= EPOLLERR; 3171 if (sk->sk_shutdown == SHUTDOWN_MASK) 3172 mask |= EPOLLHUP; 3173 if (sk->sk_shutdown & RCV_SHUTDOWN) 3174 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 3175 3176 /* readable? */ 3177 if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) 3178 mask |= EPOLLIN | EPOLLRDNORM; 3179 if (sk_is_readable(sk)) 3180 mask |= EPOLLIN | EPOLLRDNORM; 3181 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 3182 if (READ_ONCE(unix_sk(sk)->oob_skb)) 3183 mask |= EPOLLPRI; 3184 #endif 3185 3186 /* Connection-based need to check for termination and startup */ 3187 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && 3188 sk->sk_state == TCP_CLOSE) 3189 mask |= EPOLLHUP; 3190 3191 /* 3192 * we set writable also when the other side has shut down the 3193 * connection. This prevents stuck sockets. 3194 */ 3195 if (unix_writable(sk)) 3196 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 3197 3198 return mask; 3199 } 3200 3201 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, 3202 poll_table *wait) 3203 { 3204 struct sock *sk = sock->sk, *other; 3205 unsigned int writable; 3206 __poll_t mask; 3207 3208 sock_poll_wait(file, sock, wait); 3209 mask = 0; 3210 3211 /* exceptional events? */ 3212 if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) 3213 mask |= EPOLLERR | 3214 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); 3215 3216 if (sk->sk_shutdown & RCV_SHUTDOWN) 3217 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 3218 if (sk->sk_shutdown == SHUTDOWN_MASK) 3219 mask |= EPOLLHUP; 3220 3221 /* readable? */ 3222 if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) 3223 mask |= EPOLLIN | EPOLLRDNORM; 3224 if (sk_is_readable(sk)) 3225 mask |= EPOLLIN | EPOLLRDNORM; 3226 3227 /* Connection-based need to check for termination and startup */ 3228 if (sk->sk_type == SOCK_SEQPACKET) { 3229 if (sk->sk_state == TCP_CLOSE) 3230 mask |= EPOLLHUP; 3231 /* connection hasn't started yet? */ 3232 if (sk->sk_state == TCP_SYN_SENT) 3233 return mask; 3234 } 3235 3236 /* No write status requested, avoid expensive OUT tests. */ 3237 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT))) 3238 return mask; 3239 3240 writable = unix_writable(sk); 3241 if (writable) { 3242 unix_state_lock(sk); 3243 3244 other = unix_peer(sk); 3245 if (other && unix_peer(other) != sk && 3246 unix_recvq_full_lockless(other) && 3247 unix_dgram_peer_wake_me(sk, other)) 3248 writable = 0; 3249 3250 unix_state_unlock(sk); 3251 } 3252 3253 if (writable) 3254 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 3255 else 3256 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 3257 3258 return mask; 3259 } 3260 3261 #ifdef CONFIG_PROC_FS 3262 3263 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1) 3264 3265 #define get_bucket(x) ((x) >> BUCKET_SPACE) 3266 #define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1)) 3267 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) 3268 3269 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) 3270 { 3271 unsigned long offset = get_offset(*pos); 3272 unsigned long bucket = get_bucket(*pos); 3273 unsigned long count = 0; 3274 struct sock *sk; 3275 3276 for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]); 3277 sk; sk = sk_next(sk)) { 3278 if (++count == offset) 3279 break; 3280 } 3281 3282 return sk; 3283 } 3284 3285 static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos) 3286 { 3287 unsigned long bucket = get_bucket(*pos); 3288 struct net *net = seq_file_net(seq); 3289 struct sock *sk; 3290 3291 while (bucket < UNIX_HASH_SIZE) { 3292 spin_lock(&net->unx.table.locks[bucket]); 3293 3294 sk = unix_from_bucket(seq, pos); 3295 if (sk) 3296 return sk; 3297 3298 spin_unlock(&net->unx.table.locks[bucket]); 3299 3300 *pos = set_bucket_offset(++bucket, 1); 3301 } 3302 3303 return NULL; 3304 } 3305 3306 static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk, 3307 loff_t *pos) 3308 { 3309 unsigned long bucket = get_bucket(*pos); 3310 3311 sk = sk_next(sk); 3312 if (sk) 3313 return sk; 3314 3315 3316 spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]); 3317 3318 *pos = set_bucket_offset(++bucket, 1); 3319 3320 return unix_get_first(seq, pos); 3321 } 3322 3323 static void *unix_seq_start(struct seq_file *seq, loff_t *pos) 3324 { 3325 if (!*pos) 3326 return SEQ_START_TOKEN; 3327 3328 return unix_get_first(seq, pos); 3329 } 3330 3331 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3332 { 3333 ++*pos; 3334 3335 if (v == SEQ_START_TOKEN) 3336 return unix_get_first(seq, pos); 3337 3338 return unix_get_next(seq, v, pos); 3339 } 3340 3341 static void unix_seq_stop(struct seq_file *seq, void *v) 3342 { 3343 struct sock *sk = v; 3344 3345 if (sk) 3346 spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]); 3347 } 3348 3349 static int unix_seq_show(struct seq_file *seq, void *v) 3350 { 3351 3352 if (v == SEQ_START_TOKEN) 3353 seq_puts(seq, "Num RefCount Protocol Flags Type St " 3354 "Inode Path\n"); 3355 else { 3356 struct sock *s = v; 3357 struct unix_sock *u = unix_sk(s); 3358 unix_state_lock(s); 3359 3360 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu", 3361 s, 3362 refcount_read(&s->sk_refcnt), 3363 0, 3364 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0, 3365 s->sk_type, 3366 s->sk_socket ? 3367 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) : 3368 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING), 3369 sock_i_ino(s)); 3370 3371 if (u->addr) { // under a hash table lock here 3372 int i, len; 3373 seq_putc(seq, ' '); 3374 3375 i = 0; 3376 len = u->addr->len - 3377 offsetof(struct sockaddr_un, sun_path); 3378 if (u->addr->name->sun_path[0]) { 3379 len--; 3380 } else { 3381 seq_putc(seq, '@'); 3382 i++; 3383 } 3384 for ( ; i < len; i++) 3385 seq_putc(seq, u->addr->name->sun_path[i] ?: 3386 '@'); 3387 } 3388 unix_state_unlock(s); 3389 seq_putc(seq, '\n'); 3390 } 3391 3392 return 0; 3393 } 3394 3395 static const struct seq_operations unix_seq_ops = { 3396 .start = unix_seq_start, 3397 .next = unix_seq_next, 3398 .stop = unix_seq_stop, 3399 .show = unix_seq_show, 3400 }; 3401 3402 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) 3403 struct bpf_unix_iter_state { 3404 struct seq_net_private p; 3405 unsigned int cur_sk; 3406 unsigned int end_sk; 3407 unsigned int max_sk; 3408 struct sock **batch; 3409 bool st_bucket_done; 3410 }; 3411 3412 struct bpf_iter__unix { 3413 __bpf_md_ptr(struct bpf_iter_meta *, meta); 3414 __bpf_md_ptr(struct unix_sock *, unix_sk); 3415 uid_t uid __aligned(8); 3416 }; 3417 3418 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, 3419 struct unix_sock *unix_sk, uid_t uid) 3420 { 3421 struct bpf_iter__unix ctx; 3422 3423 meta->seq_num--; /* skip SEQ_START_TOKEN */ 3424 ctx.meta = meta; 3425 ctx.unix_sk = unix_sk; 3426 ctx.uid = uid; 3427 return bpf_iter_run_prog(prog, &ctx); 3428 } 3429 3430 static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk) 3431 3432 { 3433 struct bpf_unix_iter_state *iter = seq->private; 3434 unsigned int expected = 1; 3435 struct sock *sk; 3436 3437 sock_hold(start_sk); 3438 iter->batch[iter->end_sk++] = start_sk; 3439 3440 for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) { 3441 if (iter->end_sk < iter->max_sk) { 3442 sock_hold(sk); 3443 iter->batch[iter->end_sk++] = sk; 3444 } 3445 3446 expected++; 3447 } 3448 3449 spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]); 3450 3451 return expected; 3452 } 3453 3454 static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter) 3455 { 3456 while (iter->cur_sk < iter->end_sk) 3457 sock_put(iter->batch[iter->cur_sk++]); 3458 } 3459 3460 static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter, 3461 unsigned int new_batch_sz) 3462 { 3463 struct sock **new_batch; 3464 3465 new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz, 3466 GFP_USER | __GFP_NOWARN); 3467 if (!new_batch) 3468 return -ENOMEM; 3469 3470 bpf_iter_unix_put_batch(iter); 3471 kvfree(iter->batch); 3472 iter->batch = new_batch; 3473 iter->max_sk = new_batch_sz; 3474 3475 return 0; 3476 } 3477 3478 static struct sock *bpf_iter_unix_batch(struct seq_file *seq, 3479 loff_t *pos) 3480 { 3481 struct bpf_unix_iter_state *iter = seq->private; 3482 unsigned int expected; 3483 bool resized = false; 3484 struct sock *sk; 3485 3486 if (iter->st_bucket_done) 3487 *pos = set_bucket_offset(get_bucket(*pos) + 1, 1); 3488 3489 again: 3490 /* Get a new batch */ 3491 iter->cur_sk = 0; 3492 iter->end_sk = 0; 3493 3494 sk = unix_get_first(seq, pos); 3495 if (!sk) 3496 return NULL; /* Done */ 3497 3498 expected = bpf_iter_unix_hold_batch(seq, sk); 3499 3500 if (iter->end_sk == expected) { 3501 iter->st_bucket_done = true; 3502 return sk; 3503 } 3504 3505 if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) { 3506 resized = true; 3507 goto again; 3508 } 3509 3510 return sk; 3511 } 3512 3513 static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos) 3514 { 3515 if (!*pos) 3516 return SEQ_START_TOKEN; 3517 3518 /* bpf iter does not support lseek, so it always 3519 * continue from where it was stop()-ped. 3520 */ 3521 return bpf_iter_unix_batch(seq, pos); 3522 } 3523 3524 static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3525 { 3526 struct bpf_unix_iter_state *iter = seq->private; 3527 struct sock *sk; 3528 3529 /* Whenever seq_next() is called, the iter->cur_sk is 3530 * done with seq_show(), so advance to the next sk in 3531 * the batch. 3532 */ 3533 if (iter->cur_sk < iter->end_sk) 3534 sock_put(iter->batch[iter->cur_sk++]); 3535 3536 ++*pos; 3537 3538 if (iter->cur_sk < iter->end_sk) 3539 sk = iter->batch[iter->cur_sk]; 3540 else 3541 sk = bpf_iter_unix_batch(seq, pos); 3542 3543 return sk; 3544 } 3545 3546 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v) 3547 { 3548 struct bpf_iter_meta meta; 3549 struct bpf_prog *prog; 3550 struct sock *sk = v; 3551 uid_t uid; 3552 bool slow; 3553 int ret; 3554 3555 if (v == SEQ_START_TOKEN) 3556 return 0; 3557 3558 slow = lock_sock_fast(sk); 3559 3560 if (unlikely(sk_unhashed(sk))) { 3561 ret = SEQ_SKIP; 3562 goto unlock; 3563 } 3564 3565 uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)); 3566 meta.seq = seq; 3567 prog = bpf_iter_get_info(&meta, false); 3568 ret = unix_prog_seq_show(prog, &meta, v, uid); 3569 unlock: 3570 unlock_sock_fast(sk, slow); 3571 return ret; 3572 } 3573 3574 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v) 3575 { 3576 struct bpf_unix_iter_state *iter = seq->private; 3577 struct bpf_iter_meta meta; 3578 struct bpf_prog *prog; 3579 3580 if (!v) { 3581 meta.seq = seq; 3582 prog = bpf_iter_get_info(&meta, true); 3583 if (prog) 3584 (void)unix_prog_seq_show(prog, &meta, v, 0); 3585 } 3586 3587 if (iter->cur_sk < iter->end_sk) 3588 bpf_iter_unix_put_batch(iter); 3589 } 3590 3591 static const struct seq_operations bpf_iter_unix_seq_ops = { 3592 .start = bpf_iter_unix_seq_start, 3593 .next = bpf_iter_unix_seq_next, 3594 .stop = bpf_iter_unix_seq_stop, 3595 .show = bpf_iter_unix_seq_show, 3596 }; 3597 #endif 3598 #endif 3599 3600 static const struct net_proto_family unix_family_ops = { 3601 .family = PF_UNIX, 3602 .create = unix_create, 3603 .owner = THIS_MODULE, 3604 }; 3605 3606 3607 static int __net_init unix_net_init(struct net *net) 3608 { 3609 int i; 3610 3611 net->unx.sysctl_max_dgram_qlen = 10; 3612 if (unix_sysctl_register(net)) 3613 goto out; 3614 3615 #ifdef CONFIG_PROC_FS 3616 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops, 3617 sizeof(struct seq_net_private))) 3618 goto err_sysctl; 3619 #endif 3620 3621 net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE, 3622 sizeof(spinlock_t), GFP_KERNEL); 3623 if (!net->unx.table.locks) 3624 goto err_proc; 3625 3626 net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE, 3627 sizeof(struct hlist_head), 3628 GFP_KERNEL); 3629 if (!net->unx.table.buckets) 3630 goto free_locks; 3631 3632 for (i = 0; i < UNIX_HASH_SIZE; i++) { 3633 spin_lock_init(&net->unx.table.locks[i]); 3634 INIT_HLIST_HEAD(&net->unx.table.buckets[i]); 3635 } 3636 3637 return 0; 3638 3639 free_locks: 3640 kvfree(net->unx.table.locks); 3641 err_proc: 3642 #ifdef CONFIG_PROC_FS 3643 remove_proc_entry("unix", net->proc_net); 3644 err_sysctl: 3645 #endif 3646 unix_sysctl_unregister(net); 3647 out: 3648 return -ENOMEM; 3649 } 3650 3651 static void __net_exit unix_net_exit(struct net *net) 3652 { 3653 kvfree(net->unx.table.buckets); 3654 kvfree(net->unx.table.locks); 3655 unix_sysctl_unregister(net); 3656 remove_proc_entry("unix", net->proc_net); 3657 } 3658 3659 static struct pernet_operations unix_net_ops = { 3660 .init = unix_net_init, 3661 .exit = unix_net_exit, 3662 }; 3663 3664 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 3665 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta, 3666 struct unix_sock *unix_sk, uid_t uid) 3667 3668 #define INIT_BATCH_SZ 16 3669 3670 static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux) 3671 { 3672 struct bpf_unix_iter_state *iter = priv_data; 3673 int err; 3674 3675 err = bpf_iter_init_seq_net(priv_data, aux); 3676 if (err) 3677 return err; 3678 3679 err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ); 3680 if (err) { 3681 bpf_iter_fini_seq_net(priv_data); 3682 return err; 3683 } 3684 3685 return 0; 3686 } 3687 3688 static void bpf_iter_fini_unix(void *priv_data) 3689 { 3690 struct bpf_unix_iter_state *iter = priv_data; 3691 3692 bpf_iter_fini_seq_net(priv_data); 3693 kvfree(iter->batch); 3694 } 3695 3696 static const struct bpf_iter_seq_info unix_seq_info = { 3697 .seq_ops = &bpf_iter_unix_seq_ops, 3698 .init_seq_private = bpf_iter_init_unix, 3699 .fini_seq_private = bpf_iter_fini_unix, 3700 .seq_priv_size = sizeof(struct bpf_unix_iter_state), 3701 }; 3702 3703 static const struct bpf_func_proto * 3704 bpf_iter_unix_get_func_proto(enum bpf_func_id func_id, 3705 const struct bpf_prog *prog) 3706 { 3707 switch (func_id) { 3708 case BPF_FUNC_setsockopt: 3709 return &bpf_sk_setsockopt_proto; 3710 case BPF_FUNC_getsockopt: 3711 return &bpf_sk_getsockopt_proto; 3712 default: 3713 return NULL; 3714 } 3715 } 3716 3717 static struct bpf_iter_reg unix_reg_info = { 3718 .target = "unix", 3719 .ctx_arg_info_size = 1, 3720 .ctx_arg_info = { 3721 { offsetof(struct bpf_iter__unix, unix_sk), 3722 PTR_TO_BTF_ID_OR_NULL }, 3723 }, 3724 .get_func_proto = bpf_iter_unix_get_func_proto, 3725 .seq_info = &unix_seq_info, 3726 }; 3727 3728 static void __init bpf_iter_register(void) 3729 { 3730 unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX]; 3731 if (bpf_iter_reg_target(&unix_reg_info)) 3732 pr_warn("Warning: could not register bpf iterator unix\n"); 3733 } 3734 #endif 3735 3736 static int __init af_unix_init(void) 3737 { 3738 int i, rc = -1; 3739 3740 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb)); 3741 3742 for (i = 0; i < UNIX_HASH_SIZE / 2; i++) { 3743 spin_lock_init(&bsd_socket_locks[i]); 3744 INIT_HLIST_HEAD(&bsd_socket_buckets[i]); 3745 } 3746 3747 rc = proto_register(&unix_dgram_proto, 1); 3748 if (rc != 0) { 3749 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); 3750 goto out; 3751 } 3752 3753 rc = proto_register(&unix_stream_proto, 1); 3754 if (rc != 0) { 3755 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); 3756 goto out; 3757 } 3758 3759 sock_register(&unix_family_ops); 3760 register_pernet_subsys(&unix_net_ops); 3761 unix_bpf_build_proto(); 3762 3763 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 3764 bpf_iter_register(); 3765 #endif 3766 3767 out: 3768 return rc; 3769 } 3770 3771 static void __exit af_unix_exit(void) 3772 { 3773 sock_unregister(PF_UNIX); 3774 proto_unregister(&unix_dgram_proto); 3775 proto_unregister(&unix_stream_proto); 3776 unregister_pernet_subsys(&unix_net_ops); 3777 } 3778 3779 /* Earlier than device_initcall() so that other drivers invoking 3780 request_module() don't end up in a loop when modprobe tries 3781 to use a UNIX socket. But later than subsys_initcall() because 3782 we depend on stuff initialised there */ 3783 fs_initcall(af_unix_init); 3784 module_exit(af_unix_exit); 3785 3786 MODULE_LICENSE("GPL"); 3787 MODULE_ALIAS_NETPROTO(PF_UNIX); 3788