1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * NET4: Implementation of BSD Unix domain sockets. 4 * 5 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk> 6 * 7 * Fixes: 8 * Linus Torvalds : Assorted bug cures. 9 * Niibe Yutaka : async I/O support. 10 * Carsten Paeth : PF_UNIX check, address fixes. 11 * Alan Cox : Limit size of allocated blocks. 12 * Alan Cox : Fixed the stupid socketpair bug. 13 * Alan Cox : BSD compatibility fine tuning. 14 * Alan Cox : Fixed a bug in connect when interrupted. 15 * Alan Cox : Sorted out a proper draft version of 16 * file descriptor passing hacked up from 17 * Mike Shaver's work. 18 * Marty Leisner : Fixes to fd passing 19 * Nick Nevin : recvmsg bugfix. 20 * Alan Cox : Started proper garbage collector 21 * Heiko EiBfeldt : Missing verify_area check 22 * Alan Cox : Started POSIXisms 23 * Andreas Schwab : Replace inode by dentry for proper 24 * reference counting 25 * Kirk Petersen : Made this a module 26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm. 27 * Lots of bug fixes. 28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces 29 * by above two patches. 30 * Andrea Arcangeli : If possible we block in connect(2) 31 * if the max backlog of the listen socket 32 * is been reached. This won't break 33 * old apps and it will avoid huge amount 34 * of socks hashed (this for unix_gc() 35 * performances reasons). 36 * Security fix that limits the max 37 * number of socks to 2*max_files and 38 * the number of skb queueable in the 39 * dgram receiver. 40 * Artur Skawina : Hash function optimizations 41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8) 42 * Malcolm Beattie : Set peercred for socketpair 43 * Michal Ostrowski : Module initialization cleanup. 44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT, 45 * the core infrastructure is doing that 46 * for all net proto families now (2.5.69+) 47 * 48 * Known differences from reference BSD that was tested: 49 * 50 * [TO FIX] 51 * ECONNREFUSED is not returned from one end of a connected() socket to the 52 * other the moment one end closes. 53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark 54 * and a fake inode identifier (nor the BSD first socket fstat twice bug). 55 * [NOT TO FIX] 56 * accept() returns a path name even if the connecting socket has closed 57 * in the meantime (BSD loses the path and gives up). 58 * accept() returns 0 length path for an unbound connector. BSD returns 16 59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??) 60 * socketpair(...SOCK_RAW..) doesn't panic the kernel. 61 * BSD af_unix apparently has connect forgetting to block properly. 62 * (need to check this with the POSIX spec in detail) 63 * 64 * Differences from 2.0.0-11-... (ANK) 65 * Bug fixes and improvements. 66 * - client shutdown killed server socket. 67 * - removed all useless cli/sti pairs. 68 * 69 * Semantic changes/extensions. 70 * - generic control message passing. 71 * - SCM_CREDENTIALS control message. 72 * - "Abstract" (not FS based) socket bindings. 73 * Abstract names are sequences of bytes (not zero terminated) 74 * started by 0, so that this name space does not intersect 75 * with BSD names. 76 */ 77 78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 79 80 #include <linux/module.h> 81 #include <linux/kernel.h> 82 #include <linux/signal.h> 83 #include <linux/sched/signal.h> 84 #include <linux/errno.h> 85 #include <linux/string.h> 86 #include <linux/stat.h> 87 #include <linux/dcache.h> 88 #include <linux/namei.h> 89 #include <linux/socket.h> 90 #include <linux/un.h> 91 #include <linux/fcntl.h> 92 #include <linux/filter.h> 93 #include <linux/termios.h> 94 #include <linux/sockios.h> 95 #include <linux/net.h> 96 #include <linux/in.h> 97 #include <linux/fs.h> 98 #include <linux/slab.h> 99 #include <linux/uaccess.h> 100 #include <linux/skbuff.h> 101 #include <linux/netdevice.h> 102 #include <net/net_namespace.h> 103 #include <net/sock.h> 104 #include <net/tcp_states.h> 105 #include <net/af_unix.h> 106 #include <linux/proc_fs.h> 107 #include <linux/seq_file.h> 108 #include <net/scm.h> 109 #include <linux/init.h> 110 #include <linux/poll.h> 111 #include <linux/rtnetlink.h> 112 #include <linux/mount.h> 113 #include <net/checksum.h> 114 #include <linux/security.h> 115 #include <linux/freezer.h> 116 #include <linux/file.h> 117 #include <linux/btf_ids.h> 118 119 #include "scm.h" 120 121 spinlock_t unix_table_locks[2 * UNIX_HASH_SIZE]; 122 EXPORT_SYMBOL_GPL(unix_table_locks); 123 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; 124 EXPORT_SYMBOL_GPL(unix_socket_table); 125 static atomic_long_t unix_nr_socks; 126 127 /* SMP locking strategy: 128 * hash table is protected with spinlock unix_table_locks 129 * each socket state is protected by separate spin lock. 130 */ 131 132 static unsigned int unix_unbound_hash(struct sock *sk) 133 { 134 unsigned long hash = (unsigned long)sk; 135 136 hash ^= hash >> 16; 137 hash ^= hash >> 8; 138 hash ^= sk->sk_type; 139 140 return UNIX_HASH_SIZE + (hash & (UNIX_HASH_SIZE - 1)); 141 } 142 143 static unsigned int unix_bsd_hash(struct inode *i) 144 { 145 return i->i_ino & (UNIX_HASH_SIZE - 1); 146 } 147 148 static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr, 149 int addr_len, int type) 150 { 151 __wsum csum = csum_partial(sunaddr, addr_len, 0); 152 unsigned int hash; 153 154 hash = (__force unsigned int)csum_fold(csum); 155 hash ^= hash >> 8; 156 hash ^= type; 157 158 return hash & (UNIX_HASH_SIZE - 1); 159 } 160 161 static void unix_table_double_lock(unsigned int hash1, unsigned int hash2) 162 { 163 /* hash1 and hash2 is never the same because 164 * one is between 0 and UNIX_HASH_SIZE - 1, and 165 * another is between UNIX_HASH_SIZE and UNIX_HASH_SIZE * 2. 166 */ 167 if (hash1 > hash2) 168 swap(hash1, hash2); 169 170 spin_lock(&unix_table_locks[hash1]); 171 spin_lock_nested(&unix_table_locks[hash2], SINGLE_DEPTH_NESTING); 172 } 173 174 static void unix_table_double_unlock(unsigned int hash1, unsigned int hash2) 175 { 176 spin_unlock(&unix_table_locks[hash1]); 177 spin_unlock(&unix_table_locks[hash2]); 178 } 179 180 #ifdef CONFIG_SECURITY_NETWORK 181 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 182 { 183 UNIXCB(skb).secid = scm->secid; 184 } 185 186 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 187 { 188 scm->secid = UNIXCB(skb).secid; 189 } 190 191 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) 192 { 193 return (scm->secid == UNIXCB(skb).secid); 194 } 195 #else 196 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 197 { } 198 199 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 200 { } 201 202 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) 203 { 204 return true; 205 } 206 #endif /* CONFIG_SECURITY_NETWORK */ 207 208 #define unix_peer(sk) (unix_sk(sk)->peer) 209 210 static inline int unix_our_peer(struct sock *sk, struct sock *osk) 211 { 212 return unix_peer(osk) == sk; 213 } 214 215 static inline int unix_may_send(struct sock *sk, struct sock *osk) 216 { 217 return unix_peer(osk) == NULL || unix_our_peer(sk, osk); 218 } 219 220 static inline int unix_recvq_full(const struct sock *sk) 221 { 222 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; 223 } 224 225 static inline int unix_recvq_full_lockless(const struct sock *sk) 226 { 227 return skb_queue_len_lockless(&sk->sk_receive_queue) > 228 READ_ONCE(sk->sk_max_ack_backlog); 229 } 230 231 struct sock *unix_peer_get(struct sock *s) 232 { 233 struct sock *peer; 234 235 unix_state_lock(s); 236 peer = unix_peer(s); 237 if (peer) 238 sock_hold(peer); 239 unix_state_unlock(s); 240 return peer; 241 } 242 EXPORT_SYMBOL_GPL(unix_peer_get); 243 244 static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr, 245 int addr_len) 246 { 247 struct unix_address *addr; 248 249 addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL); 250 if (!addr) 251 return NULL; 252 253 refcount_set(&addr->refcnt, 1); 254 addr->len = addr_len; 255 memcpy(addr->name, sunaddr, addr_len); 256 257 return addr; 258 } 259 260 static inline void unix_release_addr(struct unix_address *addr) 261 { 262 if (refcount_dec_and_test(&addr->refcnt)) 263 kfree(addr); 264 } 265 266 /* 267 * Check unix socket name: 268 * - should be not zero length. 269 * - if started by not zero, should be NULL terminated (FS object) 270 * - if started by zero, it is abstract name. 271 */ 272 273 static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len) 274 { 275 if (addr_len <= offsetof(struct sockaddr_un, sun_path) || 276 addr_len > sizeof(*sunaddr)) 277 return -EINVAL; 278 279 if (sunaddr->sun_family != AF_UNIX) 280 return -EINVAL; 281 282 return 0; 283 } 284 285 static void unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len) 286 { 287 /* This may look like an off by one error but it is a bit more 288 * subtle. 108 is the longest valid AF_UNIX path for a binding. 289 * sun_path[108] doesn't as such exist. However in kernel space 290 * we are guaranteed that it is a valid memory location in our 291 * kernel address buffer because syscall functions always pass 292 * a pointer of struct sockaddr_storage which has a bigger buffer 293 * than 108. 294 */ 295 ((char *)sunaddr)[addr_len] = 0; 296 } 297 298 static void __unix_remove_socket(struct sock *sk) 299 { 300 sk_del_node_init(sk); 301 } 302 303 static void __unix_insert_socket(struct sock *sk) 304 { 305 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk)); 306 sk_add_node(sk, &unix_socket_table[sk->sk_hash]); 307 } 308 309 static void __unix_set_addr_hash(struct sock *sk, struct unix_address *addr, 310 unsigned int hash) 311 { 312 __unix_remove_socket(sk); 313 smp_store_release(&unix_sk(sk)->addr, addr); 314 315 sk->sk_hash = hash; 316 __unix_insert_socket(sk); 317 } 318 319 static void unix_remove_socket(struct sock *sk) 320 { 321 spin_lock(&unix_table_locks[sk->sk_hash]); 322 __unix_remove_socket(sk); 323 spin_unlock(&unix_table_locks[sk->sk_hash]); 324 } 325 326 static void unix_insert_unbound_socket(struct sock *sk) 327 { 328 spin_lock(&unix_table_locks[sk->sk_hash]); 329 __unix_insert_socket(sk); 330 spin_unlock(&unix_table_locks[sk->sk_hash]); 331 } 332 333 static struct sock *__unix_find_socket_byname(struct net *net, 334 struct sockaddr_un *sunname, 335 int len, unsigned int hash) 336 { 337 struct sock *s; 338 339 sk_for_each(s, &unix_socket_table[hash]) { 340 struct unix_sock *u = unix_sk(s); 341 342 if (!net_eq(sock_net(s), net)) 343 continue; 344 345 if (u->addr->len == len && 346 !memcmp(u->addr->name, sunname, len)) 347 return s; 348 } 349 return NULL; 350 } 351 352 static inline struct sock *unix_find_socket_byname(struct net *net, 353 struct sockaddr_un *sunname, 354 int len, unsigned int hash) 355 { 356 struct sock *s; 357 358 spin_lock(&unix_table_locks[hash]); 359 s = __unix_find_socket_byname(net, sunname, len, hash); 360 if (s) 361 sock_hold(s); 362 spin_unlock(&unix_table_locks[hash]); 363 return s; 364 } 365 366 static struct sock *unix_find_socket_byinode(struct inode *i) 367 { 368 unsigned int hash = unix_bsd_hash(i); 369 struct sock *s; 370 371 spin_lock(&unix_table_locks[hash]); 372 sk_for_each(s, &unix_socket_table[hash]) { 373 struct dentry *dentry = unix_sk(s)->path.dentry; 374 375 if (dentry && d_backing_inode(dentry) == i) { 376 sock_hold(s); 377 spin_unlock(&unix_table_locks[hash]); 378 return s; 379 } 380 } 381 spin_unlock(&unix_table_locks[hash]); 382 return NULL; 383 } 384 385 /* Support code for asymmetrically connected dgram sockets 386 * 387 * If a datagram socket is connected to a socket not itself connected 388 * to the first socket (eg, /dev/log), clients may only enqueue more 389 * messages if the present receive queue of the server socket is not 390 * "too large". This means there's a second writeability condition 391 * poll and sendmsg need to test. The dgram recv code will do a wake 392 * up on the peer_wait wait queue of a socket upon reception of a 393 * datagram which needs to be propagated to sleeping would-be writers 394 * since these might not have sent anything so far. This can't be 395 * accomplished via poll_wait because the lifetime of the server 396 * socket might be less than that of its clients if these break their 397 * association with it or if the server socket is closed while clients 398 * are still connected to it and there's no way to inform "a polling 399 * implementation" that it should let go of a certain wait queue 400 * 401 * In order to propagate a wake up, a wait_queue_entry_t of the client 402 * socket is enqueued on the peer_wait queue of the server socket 403 * whose wake function does a wake_up on the ordinary client socket 404 * wait queue. This connection is established whenever a write (or 405 * poll for write) hit the flow control condition and broken when the 406 * association to the server socket is dissolved or after a wake up 407 * was relayed. 408 */ 409 410 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags, 411 void *key) 412 { 413 struct unix_sock *u; 414 wait_queue_head_t *u_sleep; 415 416 u = container_of(q, struct unix_sock, peer_wake); 417 418 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait, 419 q); 420 u->peer_wake.private = NULL; 421 422 /* relaying can only happen while the wq still exists */ 423 u_sleep = sk_sleep(&u->sk); 424 if (u_sleep) 425 wake_up_interruptible_poll(u_sleep, key_to_poll(key)); 426 427 return 0; 428 } 429 430 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other) 431 { 432 struct unix_sock *u, *u_other; 433 int rc; 434 435 u = unix_sk(sk); 436 u_other = unix_sk(other); 437 rc = 0; 438 spin_lock(&u_other->peer_wait.lock); 439 440 if (!u->peer_wake.private) { 441 u->peer_wake.private = other; 442 __add_wait_queue(&u_other->peer_wait, &u->peer_wake); 443 444 rc = 1; 445 } 446 447 spin_unlock(&u_other->peer_wait.lock); 448 return rc; 449 } 450 451 static void unix_dgram_peer_wake_disconnect(struct sock *sk, 452 struct sock *other) 453 { 454 struct unix_sock *u, *u_other; 455 456 u = unix_sk(sk); 457 u_other = unix_sk(other); 458 spin_lock(&u_other->peer_wait.lock); 459 460 if (u->peer_wake.private == other) { 461 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake); 462 u->peer_wake.private = NULL; 463 } 464 465 spin_unlock(&u_other->peer_wait.lock); 466 } 467 468 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk, 469 struct sock *other) 470 { 471 unix_dgram_peer_wake_disconnect(sk, other); 472 wake_up_interruptible_poll(sk_sleep(sk), 473 EPOLLOUT | 474 EPOLLWRNORM | 475 EPOLLWRBAND); 476 } 477 478 /* preconditions: 479 * - unix_peer(sk) == other 480 * - association is stable 481 */ 482 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) 483 { 484 int connected; 485 486 connected = unix_dgram_peer_wake_connect(sk, other); 487 488 /* If other is SOCK_DEAD, we want to make sure we signal 489 * POLLOUT, such that a subsequent write() can get a 490 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs 491 * to other and its full, we will hang waiting for POLLOUT. 492 */ 493 if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD)) 494 return 1; 495 496 if (connected) 497 unix_dgram_peer_wake_disconnect(sk, other); 498 499 return 0; 500 } 501 502 static int unix_writable(const struct sock *sk) 503 { 504 return sk->sk_state != TCP_LISTEN && 505 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; 506 } 507 508 static void unix_write_space(struct sock *sk) 509 { 510 struct socket_wq *wq; 511 512 rcu_read_lock(); 513 if (unix_writable(sk)) { 514 wq = rcu_dereference(sk->sk_wq); 515 if (skwq_has_sleeper(wq)) 516 wake_up_interruptible_sync_poll(&wq->wait, 517 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); 518 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 519 } 520 rcu_read_unlock(); 521 } 522 523 /* When dgram socket disconnects (or changes its peer), we clear its receive 524 * queue of packets arrived from previous peer. First, it allows to do 525 * flow control based only on wmem_alloc; second, sk connected to peer 526 * may receive messages only from that peer. */ 527 static void unix_dgram_disconnected(struct sock *sk, struct sock *other) 528 { 529 if (!skb_queue_empty(&sk->sk_receive_queue)) { 530 skb_queue_purge(&sk->sk_receive_queue); 531 wake_up_interruptible_all(&unix_sk(sk)->peer_wait); 532 533 /* If one link of bidirectional dgram pipe is disconnected, 534 * we signal error. Messages are lost. Do not make this, 535 * when peer was not connected to us. 536 */ 537 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) { 538 other->sk_err = ECONNRESET; 539 sk_error_report(other); 540 } 541 } 542 other->sk_state = TCP_CLOSE; 543 } 544 545 static void unix_sock_destructor(struct sock *sk) 546 { 547 struct unix_sock *u = unix_sk(sk); 548 549 skb_queue_purge(&sk->sk_receive_queue); 550 551 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 552 if (u->oob_skb) { 553 kfree_skb(u->oob_skb); 554 u->oob_skb = NULL; 555 } 556 #endif 557 DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc)); 558 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk)); 559 DEBUG_NET_WARN_ON_ONCE(sk->sk_socket); 560 if (!sock_flag(sk, SOCK_DEAD)) { 561 pr_info("Attempt to release alive unix socket: %p\n", sk); 562 return; 563 } 564 565 if (u->addr) 566 unix_release_addr(u->addr); 567 568 atomic_long_dec(&unix_nr_socks); 569 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 570 #ifdef UNIX_REFCNT_DEBUG 571 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk, 572 atomic_long_read(&unix_nr_socks)); 573 #endif 574 } 575 576 static void unix_release_sock(struct sock *sk, int embrion) 577 { 578 struct unix_sock *u = unix_sk(sk); 579 struct path path; 580 struct sock *skpair; 581 struct sk_buff *skb; 582 int state; 583 584 unix_remove_socket(sk); 585 586 /* Clear state */ 587 unix_state_lock(sk); 588 sock_orphan(sk); 589 sk->sk_shutdown = SHUTDOWN_MASK; 590 path = u->path; 591 u->path.dentry = NULL; 592 u->path.mnt = NULL; 593 state = sk->sk_state; 594 sk->sk_state = TCP_CLOSE; 595 596 skpair = unix_peer(sk); 597 unix_peer(sk) = NULL; 598 599 unix_state_unlock(sk); 600 601 wake_up_interruptible_all(&u->peer_wait); 602 603 if (skpair != NULL) { 604 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { 605 unix_state_lock(skpair); 606 /* No more writes */ 607 skpair->sk_shutdown = SHUTDOWN_MASK; 608 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) 609 skpair->sk_err = ECONNRESET; 610 unix_state_unlock(skpair); 611 skpair->sk_state_change(skpair); 612 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); 613 } 614 615 unix_dgram_peer_wake_disconnect(sk, skpair); 616 sock_put(skpair); /* It may now die */ 617 } 618 619 /* Try to flush out this socket. Throw out buffers at least */ 620 621 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { 622 if (state == TCP_LISTEN) 623 unix_release_sock(skb->sk, 1); 624 /* passed fds are erased in the kfree_skb hook */ 625 UNIXCB(skb).consumed = skb->len; 626 kfree_skb(skb); 627 } 628 629 if (path.dentry) 630 path_put(&path); 631 632 sock_put(sk); 633 634 /* ---- Socket is dead now and most probably destroyed ---- */ 635 636 /* 637 * Fixme: BSD difference: In BSD all sockets connected to us get 638 * ECONNRESET and we die on the spot. In Linux we behave 639 * like files and pipes do and wait for the last 640 * dereference. 641 * 642 * Can't we simply set sock->err? 643 * 644 * What the above comment does talk about? --ANK(980817) 645 */ 646 647 if (unix_tot_inflight) 648 unix_gc(); /* Garbage collect fds */ 649 } 650 651 static void init_peercred(struct sock *sk) 652 { 653 const struct cred *old_cred; 654 struct pid *old_pid; 655 656 spin_lock(&sk->sk_peer_lock); 657 old_pid = sk->sk_peer_pid; 658 old_cred = sk->sk_peer_cred; 659 sk->sk_peer_pid = get_pid(task_tgid(current)); 660 sk->sk_peer_cred = get_current_cred(); 661 spin_unlock(&sk->sk_peer_lock); 662 663 put_pid(old_pid); 664 put_cred(old_cred); 665 } 666 667 static void copy_peercred(struct sock *sk, struct sock *peersk) 668 { 669 const struct cred *old_cred; 670 struct pid *old_pid; 671 672 if (sk < peersk) { 673 spin_lock(&sk->sk_peer_lock); 674 spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING); 675 } else { 676 spin_lock(&peersk->sk_peer_lock); 677 spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING); 678 } 679 old_pid = sk->sk_peer_pid; 680 old_cred = sk->sk_peer_cred; 681 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); 682 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); 683 684 spin_unlock(&sk->sk_peer_lock); 685 spin_unlock(&peersk->sk_peer_lock); 686 687 put_pid(old_pid); 688 put_cred(old_cred); 689 } 690 691 static int unix_listen(struct socket *sock, int backlog) 692 { 693 int err; 694 struct sock *sk = sock->sk; 695 struct unix_sock *u = unix_sk(sk); 696 697 err = -EOPNOTSUPP; 698 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 699 goto out; /* Only stream/seqpacket sockets accept */ 700 err = -EINVAL; 701 if (!u->addr) 702 goto out; /* No listens on an unbound socket */ 703 unix_state_lock(sk); 704 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) 705 goto out_unlock; 706 if (backlog > sk->sk_max_ack_backlog) 707 wake_up_interruptible_all(&u->peer_wait); 708 sk->sk_max_ack_backlog = backlog; 709 sk->sk_state = TCP_LISTEN; 710 /* set credentials so connect can copy them */ 711 init_peercred(sk); 712 err = 0; 713 714 out_unlock: 715 unix_state_unlock(sk); 716 out: 717 return err; 718 } 719 720 static int unix_release(struct socket *); 721 static int unix_bind(struct socket *, struct sockaddr *, int); 722 static int unix_stream_connect(struct socket *, struct sockaddr *, 723 int addr_len, int flags); 724 static int unix_socketpair(struct socket *, struct socket *); 725 static int unix_accept(struct socket *, struct socket *, int, bool); 726 static int unix_getname(struct socket *, struct sockaddr *, int); 727 static __poll_t unix_poll(struct file *, struct socket *, poll_table *); 728 static __poll_t unix_dgram_poll(struct file *, struct socket *, 729 poll_table *); 730 static int unix_ioctl(struct socket *, unsigned int, unsigned long); 731 #ifdef CONFIG_COMPAT 732 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); 733 #endif 734 static int unix_shutdown(struct socket *, int); 735 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); 736 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int); 737 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset, 738 size_t size, int flags); 739 static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos, 740 struct pipe_inode_info *, size_t size, 741 unsigned int flags); 742 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t); 743 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); 744 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor); 745 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor); 746 static int unix_dgram_connect(struct socket *, struct sockaddr *, 747 int, int); 748 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); 749 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t, 750 int); 751 752 static int unix_set_peek_off(struct sock *sk, int val) 753 { 754 struct unix_sock *u = unix_sk(sk); 755 756 if (mutex_lock_interruptible(&u->iolock)) 757 return -EINTR; 758 759 sk->sk_peek_off = val; 760 mutex_unlock(&u->iolock); 761 762 return 0; 763 } 764 765 #ifdef CONFIG_PROC_FS 766 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock) 767 { 768 struct sock *sk = sock->sk; 769 struct unix_sock *u; 770 771 if (sk) { 772 u = unix_sk(sock->sk); 773 seq_printf(m, "scm_fds: %u\n", 774 atomic_read(&u->scm_stat.nr_fds)); 775 } 776 } 777 #else 778 #define unix_show_fdinfo NULL 779 #endif 780 781 static const struct proto_ops unix_stream_ops = { 782 .family = PF_UNIX, 783 .owner = THIS_MODULE, 784 .release = unix_release, 785 .bind = unix_bind, 786 .connect = unix_stream_connect, 787 .socketpair = unix_socketpair, 788 .accept = unix_accept, 789 .getname = unix_getname, 790 .poll = unix_poll, 791 .ioctl = unix_ioctl, 792 #ifdef CONFIG_COMPAT 793 .compat_ioctl = unix_compat_ioctl, 794 #endif 795 .listen = unix_listen, 796 .shutdown = unix_shutdown, 797 .sendmsg = unix_stream_sendmsg, 798 .recvmsg = unix_stream_recvmsg, 799 .read_skb = unix_stream_read_skb, 800 .mmap = sock_no_mmap, 801 .sendpage = unix_stream_sendpage, 802 .splice_read = unix_stream_splice_read, 803 .set_peek_off = unix_set_peek_off, 804 .show_fdinfo = unix_show_fdinfo, 805 }; 806 807 static const struct proto_ops unix_dgram_ops = { 808 .family = PF_UNIX, 809 .owner = THIS_MODULE, 810 .release = unix_release, 811 .bind = unix_bind, 812 .connect = unix_dgram_connect, 813 .socketpair = unix_socketpair, 814 .accept = sock_no_accept, 815 .getname = unix_getname, 816 .poll = unix_dgram_poll, 817 .ioctl = unix_ioctl, 818 #ifdef CONFIG_COMPAT 819 .compat_ioctl = unix_compat_ioctl, 820 #endif 821 .listen = sock_no_listen, 822 .shutdown = unix_shutdown, 823 .sendmsg = unix_dgram_sendmsg, 824 .read_skb = unix_read_skb, 825 .recvmsg = unix_dgram_recvmsg, 826 .mmap = sock_no_mmap, 827 .sendpage = sock_no_sendpage, 828 .set_peek_off = unix_set_peek_off, 829 .show_fdinfo = unix_show_fdinfo, 830 }; 831 832 static const struct proto_ops unix_seqpacket_ops = { 833 .family = PF_UNIX, 834 .owner = THIS_MODULE, 835 .release = unix_release, 836 .bind = unix_bind, 837 .connect = unix_stream_connect, 838 .socketpair = unix_socketpair, 839 .accept = unix_accept, 840 .getname = unix_getname, 841 .poll = unix_dgram_poll, 842 .ioctl = unix_ioctl, 843 #ifdef CONFIG_COMPAT 844 .compat_ioctl = unix_compat_ioctl, 845 #endif 846 .listen = unix_listen, 847 .shutdown = unix_shutdown, 848 .sendmsg = unix_seqpacket_sendmsg, 849 .recvmsg = unix_seqpacket_recvmsg, 850 .mmap = sock_no_mmap, 851 .sendpage = sock_no_sendpage, 852 .set_peek_off = unix_set_peek_off, 853 .show_fdinfo = unix_show_fdinfo, 854 }; 855 856 static void unix_close(struct sock *sk, long timeout) 857 { 858 /* Nothing to do here, unix socket does not need a ->close(). 859 * This is merely for sockmap. 860 */ 861 } 862 863 static void unix_unhash(struct sock *sk) 864 { 865 /* Nothing to do here, unix socket does not need a ->unhash(). 866 * This is merely for sockmap. 867 */ 868 } 869 870 struct proto unix_dgram_proto = { 871 .name = "UNIX", 872 .owner = THIS_MODULE, 873 .obj_size = sizeof(struct unix_sock), 874 .close = unix_close, 875 #ifdef CONFIG_BPF_SYSCALL 876 .psock_update_sk_prot = unix_dgram_bpf_update_proto, 877 #endif 878 }; 879 880 struct proto unix_stream_proto = { 881 .name = "UNIX-STREAM", 882 .owner = THIS_MODULE, 883 .obj_size = sizeof(struct unix_sock), 884 .close = unix_close, 885 .unhash = unix_unhash, 886 #ifdef CONFIG_BPF_SYSCALL 887 .psock_update_sk_prot = unix_stream_bpf_update_proto, 888 #endif 889 }; 890 891 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type) 892 { 893 struct unix_sock *u; 894 struct sock *sk; 895 int err; 896 897 atomic_long_inc(&unix_nr_socks); 898 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) { 899 err = -ENFILE; 900 goto err; 901 } 902 903 if (type == SOCK_STREAM) 904 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern); 905 else /*dgram and seqpacket */ 906 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern); 907 908 if (!sk) { 909 err = -ENOMEM; 910 goto err; 911 } 912 913 sock_init_data(sock, sk); 914 915 sk->sk_hash = unix_unbound_hash(sk); 916 sk->sk_allocation = GFP_KERNEL_ACCOUNT; 917 sk->sk_write_space = unix_write_space; 918 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; 919 sk->sk_destruct = unix_sock_destructor; 920 u = unix_sk(sk); 921 u->path.dentry = NULL; 922 u->path.mnt = NULL; 923 spin_lock_init(&u->lock); 924 atomic_long_set(&u->inflight, 0); 925 INIT_LIST_HEAD(&u->link); 926 mutex_init(&u->iolock); /* single task reading lock */ 927 mutex_init(&u->bindlock); /* single task binding lock */ 928 init_waitqueue_head(&u->peer_wait); 929 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); 930 memset(&u->scm_stat, 0, sizeof(struct scm_stat)); 931 unix_insert_unbound_socket(sk); 932 933 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 934 935 return sk; 936 937 err: 938 atomic_long_dec(&unix_nr_socks); 939 return ERR_PTR(err); 940 } 941 942 static int unix_create(struct net *net, struct socket *sock, int protocol, 943 int kern) 944 { 945 struct sock *sk; 946 947 if (protocol && protocol != PF_UNIX) 948 return -EPROTONOSUPPORT; 949 950 sock->state = SS_UNCONNECTED; 951 952 switch (sock->type) { 953 case SOCK_STREAM: 954 sock->ops = &unix_stream_ops; 955 break; 956 /* 957 * Believe it or not BSD has AF_UNIX, SOCK_RAW though 958 * nothing uses it. 959 */ 960 case SOCK_RAW: 961 sock->type = SOCK_DGRAM; 962 fallthrough; 963 case SOCK_DGRAM: 964 sock->ops = &unix_dgram_ops; 965 break; 966 case SOCK_SEQPACKET: 967 sock->ops = &unix_seqpacket_ops; 968 break; 969 default: 970 return -ESOCKTNOSUPPORT; 971 } 972 973 sk = unix_create1(net, sock, kern, sock->type); 974 if (IS_ERR(sk)) 975 return PTR_ERR(sk); 976 977 return 0; 978 } 979 980 static int unix_release(struct socket *sock) 981 { 982 struct sock *sk = sock->sk; 983 984 if (!sk) 985 return 0; 986 987 sk->sk_prot->close(sk, 0); 988 unix_release_sock(sk, 0); 989 sock->sk = NULL; 990 991 return 0; 992 } 993 994 static struct sock *unix_find_bsd(struct net *net, struct sockaddr_un *sunaddr, 995 int addr_len, int type) 996 { 997 struct inode *inode; 998 struct path path; 999 struct sock *sk; 1000 int err; 1001 1002 unix_mkname_bsd(sunaddr, addr_len); 1003 err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path); 1004 if (err) 1005 goto fail; 1006 1007 err = path_permission(&path, MAY_WRITE); 1008 if (err) 1009 goto path_put; 1010 1011 err = -ECONNREFUSED; 1012 inode = d_backing_inode(path.dentry); 1013 if (!S_ISSOCK(inode->i_mode)) 1014 goto path_put; 1015 1016 sk = unix_find_socket_byinode(inode); 1017 if (!sk) 1018 goto path_put; 1019 1020 err = -EPROTOTYPE; 1021 if (sk->sk_type == type) 1022 touch_atime(&path); 1023 else 1024 goto sock_put; 1025 1026 path_put(&path); 1027 1028 return sk; 1029 1030 sock_put: 1031 sock_put(sk); 1032 path_put: 1033 path_put(&path); 1034 fail: 1035 return ERR_PTR(err); 1036 } 1037 1038 static struct sock *unix_find_abstract(struct net *net, 1039 struct sockaddr_un *sunaddr, 1040 int addr_len, int type) 1041 { 1042 unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type); 1043 struct dentry *dentry; 1044 struct sock *sk; 1045 1046 sk = unix_find_socket_byname(net, sunaddr, addr_len, hash); 1047 if (!sk) 1048 return ERR_PTR(-ECONNREFUSED); 1049 1050 dentry = unix_sk(sk)->path.dentry; 1051 if (dentry) 1052 touch_atime(&unix_sk(sk)->path); 1053 1054 return sk; 1055 } 1056 1057 static struct sock *unix_find_other(struct net *net, 1058 struct sockaddr_un *sunaddr, 1059 int addr_len, int type) 1060 { 1061 struct sock *sk; 1062 1063 if (sunaddr->sun_path[0]) 1064 sk = unix_find_bsd(net, sunaddr, addr_len, type); 1065 else 1066 sk = unix_find_abstract(net, sunaddr, addr_len, type); 1067 1068 return sk; 1069 } 1070 1071 static int unix_autobind(struct sock *sk) 1072 { 1073 unsigned int new_hash, old_hash = sk->sk_hash; 1074 struct unix_sock *u = unix_sk(sk); 1075 struct unix_address *addr; 1076 u32 lastnum, ordernum; 1077 int err; 1078 1079 err = mutex_lock_interruptible(&u->bindlock); 1080 if (err) 1081 return err; 1082 1083 if (u->addr) 1084 goto out; 1085 1086 err = -ENOMEM; 1087 addr = kzalloc(sizeof(*addr) + 1088 offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL); 1089 if (!addr) 1090 goto out; 1091 1092 addr->len = offsetof(struct sockaddr_un, sun_path) + 6; 1093 addr->name->sun_family = AF_UNIX; 1094 refcount_set(&addr->refcnt, 1); 1095 1096 ordernum = prandom_u32(); 1097 lastnum = ordernum & 0xFFFFF; 1098 retry: 1099 ordernum = (ordernum + 1) & 0xFFFFF; 1100 sprintf(addr->name->sun_path + 1, "%05x", ordernum); 1101 1102 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); 1103 unix_table_double_lock(old_hash, new_hash); 1104 1105 if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len, 1106 new_hash)) { 1107 unix_table_double_unlock(old_hash, new_hash); 1108 1109 /* __unix_find_socket_byname() may take long time if many names 1110 * are already in use. 1111 */ 1112 cond_resched(); 1113 1114 if (ordernum == lastnum) { 1115 /* Give up if all names seems to be in use. */ 1116 err = -ENOSPC; 1117 unix_release_addr(addr); 1118 goto out; 1119 } 1120 1121 goto retry; 1122 } 1123 1124 __unix_set_addr_hash(sk, addr, new_hash); 1125 unix_table_double_unlock(old_hash, new_hash); 1126 err = 0; 1127 1128 out: mutex_unlock(&u->bindlock); 1129 return err; 1130 } 1131 1132 static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, 1133 int addr_len) 1134 { 1135 umode_t mode = S_IFSOCK | 1136 (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask()); 1137 unsigned int new_hash, old_hash = sk->sk_hash; 1138 struct unix_sock *u = unix_sk(sk); 1139 struct user_namespace *ns; // barf... 1140 struct unix_address *addr; 1141 struct dentry *dentry; 1142 struct path parent; 1143 int err; 1144 1145 unix_mkname_bsd(sunaddr, addr_len); 1146 addr_len = strlen(sunaddr->sun_path) + 1147 offsetof(struct sockaddr_un, sun_path) + 1; 1148 1149 addr = unix_create_addr(sunaddr, addr_len); 1150 if (!addr) 1151 return -ENOMEM; 1152 1153 /* 1154 * Get the parent directory, calculate the hash for last 1155 * component. 1156 */ 1157 dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0); 1158 if (IS_ERR(dentry)) { 1159 err = PTR_ERR(dentry); 1160 goto out; 1161 } 1162 1163 /* 1164 * All right, let's create it. 1165 */ 1166 ns = mnt_user_ns(parent.mnt); 1167 err = security_path_mknod(&parent, dentry, mode, 0); 1168 if (!err) 1169 err = vfs_mknod(ns, d_inode(parent.dentry), dentry, mode, 0); 1170 if (err) 1171 goto out_path; 1172 err = mutex_lock_interruptible(&u->bindlock); 1173 if (err) 1174 goto out_unlink; 1175 if (u->addr) 1176 goto out_unlock; 1177 1178 new_hash = unix_bsd_hash(d_backing_inode(dentry)); 1179 unix_table_double_lock(old_hash, new_hash); 1180 u->path.mnt = mntget(parent.mnt); 1181 u->path.dentry = dget(dentry); 1182 __unix_set_addr_hash(sk, addr, new_hash); 1183 unix_table_double_unlock(old_hash, new_hash); 1184 mutex_unlock(&u->bindlock); 1185 done_path_create(&parent, dentry); 1186 return 0; 1187 1188 out_unlock: 1189 mutex_unlock(&u->bindlock); 1190 err = -EINVAL; 1191 out_unlink: 1192 /* failed after successful mknod? unlink what we'd created... */ 1193 vfs_unlink(ns, d_inode(parent.dentry), dentry, NULL); 1194 out_path: 1195 done_path_create(&parent, dentry); 1196 out: 1197 unix_release_addr(addr); 1198 return err == -EEXIST ? -EADDRINUSE : err; 1199 } 1200 1201 static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr, 1202 int addr_len) 1203 { 1204 unsigned int new_hash, old_hash = sk->sk_hash; 1205 struct unix_sock *u = unix_sk(sk); 1206 struct unix_address *addr; 1207 int err; 1208 1209 addr = unix_create_addr(sunaddr, addr_len); 1210 if (!addr) 1211 return -ENOMEM; 1212 1213 err = mutex_lock_interruptible(&u->bindlock); 1214 if (err) 1215 goto out; 1216 1217 if (u->addr) { 1218 err = -EINVAL; 1219 goto out_mutex; 1220 } 1221 1222 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type); 1223 unix_table_double_lock(old_hash, new_hash); 1224 1225 if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len, 1226 new_hash)) 1227 goto out_spin; 1228 1229 __unix_set_addr_hash(sk, addr, new_hash); 1230 unix_table_double_unlock(old_hash, new_hash); 1231 mutex_unlock(&u->bindlock); 1232 return 0; 1233 1234 out_spin: 1235 unix_table_double_unlock(old_hash, new_hash); 1236 err = -EADDRINUSE; 1237 out_mutex: 1238 mutex_unlock(&u->bindlock); 1239 out: 1240 unix_release_addr(addr); 1241 return err; 1242 } 1243 1244 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 1245 { 1246 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 1247 struct sock *sk = sock->sk; 1248 int err; 1249 1250 if (addr_len == offsetof(struct sockaddr_un, sun_path) && 1251 sunaddr->sun_family == AF_UNIX) 1252 return unix_autobind(sk); 1253 1254 err = unix_validate_addr(sunaddr, addr_len); 1255 if (err) 1256 return err; 1257 1258 if (sunaddr->sun_path[0]) 1259 err = unix_bind_bsd(sk, sunaddr, addr_len); 1260 else 1261 err = unix_bind_abstract(sk, sunaddr, addr_len); 1262 1263 return err; 1264 } 1265 1266 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) 1267 { 1268 if (unlikely(sk1 == sk2) || !sk2) { 1269 unix_state_lock(sk1); 1270 return; 1271 } 1272 if (sk1 < sk2) { 1273 unix_state_lock(sk1); 1274 unix_state_lock_nested(sk2); 1275 } else { 1276 unix_state_lock(sk2); 1277 unix_state_lock_nested(sk1); 1278 } 1279 } 1280 1281 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) 1282 { 1283 if (unlikely(sk1 == sk2) || !sk2) { 1284 unix_state_unlock(sk1); 1285 return; 1286 } 1287 unix_state_unlock(sk1); 1288 unix_state_unlock(sk2); 1289 } 1290 1291 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, 1292 int alen, int flags) 1293 { 1294 struct sock *sk = sock->sk; 1295 struct net *net = sock_net(sk); 1296 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; 1297 struct sock *other; 1298 int err; 1299 1300 err = -EINVAL; 1301 if (alen < offsetofend(struct sockaddr, sa_family)) 1302 goto out; 1303 1304 if (addr->sa_family != AF_UNSPEC) { 1305 err = unix_validate_addr(sunaddr, alen); 1306 if (err) 1307 goto out; 1308 1309 if (test_bit(SOCK_PASSCRED, &sock->flags) && 1310 !unix_sk(sk)->addr) { 1311 err = unix_autobind(sk); 1312 if (err) 1313 goto out; 1314 } 1315 1316 restart: 1317 other = unix_find_other(net, sunaddr, alen, sock->type); 1318 if (IS_ERR(other)) { 1319 err = PTR_ERR(other); 1320 goto out; 1321 } 1322 1323 unix_state_double_lock(sk, other); 1324 1325 /* Apparently VFS overslept socket death. Retry. */ 1326 if (sock_flag(other, SOCK_DEAD)) { 1327 unix_state_double_unlock(sk, other); 1328 sock_put(other); 1329 goto restart; 1330 } 1331 1332 err = -EPERM; 1333 if (!unix_may_send(sk, other)) 1334 goto out_unlock; 1335 1336 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 1337 if (err) 1338 goto out_unlock; 1339 1340 sk->sk_state = other->sk_state = TCP_ESTABLISHED; 1341 } else { 1342 /* 1343 * 1003.1g breaking connected state with AF_UNSPEC 1344 */ 1345 other = NULL; 1346 unix_state_double_lock(sk, other); 1347 } 1348 1349 /* 1350 * If it was connected, reconnect. 1351 */ 1352 if (unix_peer(sk)) { 1353 struct sock *old_peer = unix_peer(sk); 1354 1355 unix_peer(sk) = other; 1356 if (!other) 1357 sk->sk_state = TCP_CLOSE; 1358 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer); 1359 1360 unix_state_double_unlock(sk, other); 1361 1362 if (other != old_peer) 1363 unix_dgram_disconnected(sk, old_peer); 1364 sock_put(old_peer); 1365 } else { 1366 unix_peer(sk) = other; 1367 unix_state_double_unlock(sk, other); 1368 } 1369 1370 return 0; 1371 1372 out_unlock: 1373 unix_state_double_unlock(sk, other); 1374 sock_put(other); 1375 out: 1376 return err; 1377 } 1378 1379 static long unix_wait_for_peer(struct sock *other, long timeo) 1380 __releases(&unix_sk(other)->lock) 1381 { 1382 struct unix_sock *u = unix_sk(other); 1383 int sched; 1384 DEFINE_WAIT(wait); 1385 1386 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE); 1387 1388 sched = !sock_flag(other, SOCK_DEAD) && 1389 !(other->sk_shutdown & RCV_SHUTDOWN) && 1390 unix_recvq_full(other); 1391 1392 unix_state_unlock(other); 1393 1394 if (sched) 1395 timeo = schedule_timeout(timeo); 1396 1397 finish_wait(&u->peer_wait, &wait); 1398 return timeo; 1399 } 1400 1401 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, 1402 int addr_len, int flags) 1403 { 1404 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 1405 struct sock *sk = sock->sk; 1406 struct net *net = sock_net(sk); 1407 struct unix_sock *u = unix_sk(sk), *newu, *otheru; 1408 struct sock *newsk = NULL; 1409 struct sock *other = NULL; 1410 struct sk_buff *skb = NULL; 1411 int st; 1412 int err; 1413 long timeo; 1414 1415 err = unix_validate_addr(sunaddr, addr_len); 1416 if (err) 1417 goto out; 1418 1419 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) { 1420 err = unix_autobind(sk); 1421 if (err) 1422 goto out; 1423 } 1424 1425 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); 1426 1427 /* First of all allocate resources. 1428 If we will make it after state is locked, 1429 we will have to recheck all again in any case. 1430 */ 1431 1432 /* create new sock for complete connection */ 1433 newsk = unix_create1(sock_net(sk), NULL, 0, sock->type); 1434 if (IS_ERR(newsk)) { 1435 err = PTR_ERR(newsk); 1436 newsk = NULL; 1437 goto out; 1438 } 1439 1440 err = -ENOMEM; 1441 1442 /* Allocate skb for sending to listening sock */ 1443 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL); 1444 if (skb == NULL) 1445 goto out; 1446 1447 restart: 1448 /* Find listening sock. */ 1449 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type); 1450 if (IS_ERR(other)) { 1451 err = PTR_ERR(other); 1452 other = NULL; 1453 goto out; 1454 } 1455 1456 /* Latch state of peer */ 1457 unix_state_lock(other); 1458 1459 /* Apparently VFS overslept socket death. Retry. */ 1460 if (sock_flag(other, SOCK_DEAD)) { 1461 unix_state_unlock(other); 1462 sock_put(other); 1463 goto restart; 1464 } 1465 1466 err = -ECONNREFUSED; 1467 if (other->sk_state != TCP_LISTEN) 1468 goto out_unlock; 1469 if (other->sk_shutdown & RCV_SHUTDOWN) 1470 goto out_unlock; 1471 1472 if (unix_recvq_full(other)) { 1473 err = -EAGAIN; 1474 if (!timeo) 1475 goto out_unlock; 1476 1477 timeo = unix_wait_for_peer(other, timeo); 1478 1479 err = sock_intr_errno(timeo); 1480 if (signal_pending(current)) 1481 goto out; 1482 sock_put(other); 1483 goto restart; 1484 } 1485 1486 /* Latch our state. 1487 1488 It is tricky place. We need to grab our state lock and cannot 1489 drop lock on peer. It is dangerous because deadlock is 1490 possible. Connect to self case and simultaneous 1491 attempt to connect are eliminated by checking socket 1492 state. other is TCP_LISTEN, if sk is TCP_LISTEN we 1493 check this before attempt to grab lock. 1494 1495 Well, and we have to recheck the state after socket locked. 1496 */ 1497 st = sk->sk_state; 1498 1499 switch (st) { 1500 case TCP_CLOSE: 1501 /* This is ok... continue with connect */ 1502 break; 1503 case TCP_ESTABLISHED: 1504 /* Socket is already connected */ 1505 err = -EISCONN; 1506 goto out_unlock; 1507 default: 1508 err = -EINVAL; 1509 goto out_unlock; 1510 } 1511 1512 unix_state_lock_nested(sk); 1513 1514 if (sk->sk_state != st) { 1515 unix_state_unlock(sk); 1516 unix_state_unlock(other); 1517 sock_put(other); 1518 goto restart; 1519 } 1520 1521 err = security_unix_stream_connect(sk, other, newsk); 1522 if (err) { 1523 unix_state_unlock(sk); 1524 goto out_unlock; 1525 } 1526 1527 /* The way is open! Fastly set all the necessary fields... */ 1528 1529 sock_hold(sk); 1530 unix_peer(newsk) = sk; 1531 newsk->sk_state = TCP_ESTABLISHED; 1532 newsk->sk_type = sk->sk_type; 1533 init_peercred(newsk); 1534 newu = unix_sk(newsk); 1535 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); 1536 otheru = unix_sk(other); 1537 1538 /* copy address information from listening to new sock 1539 * 1540 * The contents of *(otheru->addr) and otheru->path 1541 * are seen fully set up here, since we have found 1542 * otheru in hash under unix_table_locks. Insertion 1543 * into the hash chain we'd found it in had been done 1544 * in an earlier critical area protected by unix_table_locks, 1545 * the same one where we'd set *(otheru->addr) contents, 1546 * as well as otheru->path and otheru->addr itself. 1547 * 1548 * Using smp_store_release() here to set newu->addr 1549 * is enough to make those stores, as well as stores 1550 * to newu->path visible to anyone who gets newu->addr 1551 * by smp_load_acquire(). IOW, the same warranties 1552 * as for unix_sock instances bound in unix_bind() or 1553 * in unix_autobind(). 1554 */ 1555 if (otheru->path.dentry) { 1556 path_get(&otheru->path); 1557 newu->path = otheru->path; 1558 } 1559 refcount_inc(&otheru->addr->refcnt); 1560 smp_store_release(&newu->addr, otheru->addr); 1561 1562 /* Set credentials */ 1563 copy_peercred(sk, other); 1564 1565 sock->state = SS_CONNECTED; 1566 sk->sk_state = TCP_ESTABLISHED; 1567 sock_hold(newsk); 1568 1569 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */ 1570 unix_peer(sk) = newsk; 1571 1572 unix_state_unlock(sk); 1573 1574 /* take ten and send info to listening sock */ 1575 spin_lock(&other->sk_receive_queue.lock); 1576 __skb_queue_tail(&other->sk_receive_queue, skb); 1577 spin_unlock(&other->sk_receive_queue.lock); 1578 unix_state_unlock(other); 1579 other->sk_data_ready(other); 1580 sock_put(other); 1581 return 0; 1582 1583 out_unlock: 1584 if (other) 1585 unix_state_unlock(other); 1586 1587 out: 1588 kfree_skb(skb); 1589 if (newsk) 1590 unix_release_sock(newsk, 0); 1591 if (other) 1592 sock_put(other); 1593 return err; 1594 } 1595 1596 static int unix_socketpair(struct socket *socka, struct socket *sockb) 1597 { 1598 struct sock *ska = socka->sk, *skb = sockb->sk; 1599 1600 /* Join our sockets back to back */ 1601 sock_hold(ska); 1602 sock_hold(skb); 1603 unix_peer(ska) = skb; 1604 unix_peer(skb) = ska; 1605 init_peercred(ska); 1606 init_peercred(skb); 1607 1608 ska->sk_state = TCP_ESTABLISHED; 1609 skb->sk_state = TCP_ESTABLISHED; 1610 socka->state = SS_CONNECTED; 1611 sockb->state = SS_CONNECTED; 1612 return 0; 1613 } 1614 1615 static void unix_sock_inherit_flags(const struct socket *old, 1616 struct socket *new) 1617 { 1618 if (test_bit(SOCK_PASSCRED, &old->flags)) 1619 set_bit(SOCK_PASSCRED, &new->flags); 1620 if (test_bit(SOCK_PASSSEC, &old->flags)) 1621 set_bit(SOCK_PASSSEC, &new->flags); 1622 } 1623 1624 static int unix_accept(struct socket *sock, struct socket *newsock, int flags, 1625 bool kern) 1626 { 1627 struct sock *sk = sock->sk; 1628 struct sock *tsk; 1629 struct sk_buff *skb; 1630 int err; 1631 1632 err = -EOPNOTSUPP; 1633 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 1634 goto out; 1635 1636 err = -EINVAL; 1637 if (sk->sk_state != TCP_LISTEN) 1638 goto out; 1639 1640 /* If socket state is TCP_LISTEN it cannot change (for now...), 1641 * so that no locks are necessary. 1642 */ 1643 1644 skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, 1645 &err); 1646 if (!skb) { 1647 /* This means receive shutdown. */ 1648 if (err == 0) 1649 err = -EINVAL; 1650 goto out; 1651 } 1652 1653 tsk = skb->sk; 1654 skb_free_datagram(sk, skb); 1655 wake_up_interruptible(&unix_sk(sk)->peer_wait); 1656 1657 /* attach accepted sock to socket */ 1658 unix_state_lock(tsk); 1659 newsock->state = SS_CONNECTED; 1660 unix_sock_inherit_flags(sock, newsock); 1661 sock_graft(tsk, newsock); 1662 unix_state_unlock(tsk); 1663 return 0; 1664 1665 out: 1666 return err; 1667 } 1668 1669 1670 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer) 1671 { 1672 struct sock *sk = sock->sk; 1673 struct unix_address *addr; 1674 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr); 1675 int err = 0; 1676 1677 if (peer) { 1678 sk = unix_peer_get(sk); 1679 1680 err = -ENOTCONN; 1681 if (!sk) 1682 goto out; 1683 err = 0; 1684 } else { 1685 sock_hold(sk); 1686 } 1687 1688 addr = smp_load_acquire(&unix_sk(sk)->addr); 1689 if (!addr) { 1690 sunaddr->sun_family = AF_UNIX; 1691 sunaddr->sun_path[0] = 0; 1692 err = offsetof(struct sockaddr_un, sun_path); 1693 } else { 1694 err = addr->len; 1695 memcpy(sunaddr, addr->name, addr->len); 1696 } 1697 sock_put(sk); 1698 out: 1699 return err; 1700 } 1701 1702 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb) 1703 { 1704 scm->fp = scm_fp_dup(UNIXCB(skb).fp); 1705 1706 /* 1707 * Garbage collection of unix sockets starts by selecting a set of 1708 * candidate sockets which have reference only from being in flight 1709 * (total_refs == inflight_refs). This condition is checked once during 1710 * the candidate collection phase, and candidates are marked as such, so 1711 * that non-candidates can later be ignored. While inflight_refs is 1712 * protected by unix_gc_lock, total_refs (file count) is not, hence this 1713 * is an instantaneous decision. 1714 * 1715 * Once a candidate, however, the socket must not be reinstalled into a 1716 * file descriptor while the garbage collection is in progress. 1717 * 1718 * If the above conditions are met, then the directed graph of 1719 * candidates (*) does not change while unix_gc_lock is held. 1720 * 1721 * Any operations that changes the file count through file descriptors 1722 * (dup, close, sendmsg) does not change the graph since candidates are 1723 * not installed in fds. 1724 * 1725 * Dequeing a candidate via recvmsg would install it into an fd, but 1726 * that takes unix_gc_lock to decrement the inflight count, so it's 1727 * serialized with garbage collection. 1728 * 1729 * MSG_PEEK is special in that it does not change the inflight count, 1730 * yet does install the socket into an fd. The following lock/unlock 1731 * pair is to ensure serialization with garbage collection. It must be 1732 * done between incrementing the file count and installing the file into 1733 * an fd. 1734 * 1735 * If garbage collection starts after the barrier provided by the 1736 * lock/unlock, then it will see the elevated refcount and not mark this 1737 * as a candidate. If a garbage collection is already in progress 1738 * before the file count was incremented, then the lock/unlock pair will 1739 * ensure that garbage collection is finished before progressing to 1740 * installing the fd. 1741 * 1742 * (*) A -> B where B is on the queue of A or B is on the queue of C 1743 * which is on the queue of listening socket A. 1744 */ 1745 spin_lock(&unix_gc_lock); 1746 spin_unlock(&unix_gc_lock); 1747 } 1748 1749 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) 1750 { 1751 int err = 0; 1752 1753 UNIXCB(skb).pid = get_pid(scm->pid); 1754 UNIXCB(skb).uid = scm->creds.uid; 1755 UNIXCB(skb).gid = scm->creds.gid; 1756 UNIXCB(skb).fp = NULL; 1757 unix_get_secdata(scm, skb); 1758 if (scm->fp && send_fds) 1759 err = unix_attach_fds(scm, skb); 1760 1761 skb->destructor = unix_destruct_scm; 1762 return err; 1763 } 1764 1765 static bool unix_passcred_enabled(const struct socket *sock, 1766 const struct sock *other) 1767 { 1768 return test_bit(SOCK_PASSCRED, &sock->flags) || 1769 !other->sk_socket || 1770 test_bit(SOCK_PASSCRED, &other->sk_socket->flags); 1771 } 1772 1773 /* 1774 * Some apps rely on write() giving SCM_CREDENTIALS 1775 * We include credentials if source or destination socket 1776 * asserted SOCK_PASSCRED. 1777 */ 1778 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, 1779 const struct sock *other) 1780 { 1781 if (UNIXCB(skb).pid) 1782 return; 1783 if (unix_passcred_enabled(sock, other)) { 1784 UNIXCB(skb).pid = get_pid(task_tgid(current)); 1785 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); 1786 } 1787 } 1788 1789 static int maybe_init_creds(struct scm_cookie *scm, 1790 struct socket *socket, 1791 const struct sock *other) 1792 { 1793 int err; 1794 struct msghdr msg = { .msg_controllen = 0 }; 1795 1796 err = scm_send(socket, &msg, scm, false); 1797 if (err) 1798 return err; 1799 1800 if (unix_passcred_enabled(socket, other)) { 1801 scm->pid = get_pid(task_tgid(current)); 1802 current_uid_gid(&scm->creds.uid, &scm->creds.gid); 1803 } 1804 return err; 1805 } 1806 1807 static bool unix_skb_scm_eq(struct sk_buff *skb, 1808 struct scm_cookie *scm) 1809 { 1810 return UNIXCB(skb).pid == scm->pid && 1811 uid_eq(UNIXCB(skb).uid, scm->creds.uid) && 1812 gid_eq(UNIXCB(skb).gid, scm->creds.gid) && 1813 unix_secdata_eq(scm, skb); 1814 } 1815 1816 static void scm_stat_add(struct sock *sk, struct sk_buff *skb) 1817 { 1818 struct scm_fp_list *fp = UNIXCB(skb).fp; 1819 struct unix_sock *u = unix_sk(sk); 1820 1821 if (unlikely(fp && fp->count)) 1822 atomic_add(fp->count, &u->scm_stat.nr_fds); 1823 } 1824 1825 static void scm_stat_del(struct sock *sk, struct sk_buff *skb) 1826 { 1827 struct scm_fp_list *fp = UNIXCB(skb).fp; 1828 struct unix_sock *u = unix_sk(sk); 1829 1830 if (unlikely(fp && fp->count)) 1831 atomic_sub(fp->count, &u->scm_stat.nr_fds); 1832 } 1833 1834 /* 1835 * Send AF_UNIX data. 1836 */ 1837 1838 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, 1839 size_t len) 1840 { 1841 struct sock *sk = sock->sk; 1842 struct net *net = sock_net(sk); 1843 struct unix_sock *u = unix_sk(sk); 1844 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); 1845 struct sock *other = NULL; 1846 int err; 1847 struct sk_buff *skb; 1848 long timeo; 1849 struct scm_cookie scm; 1850 int data_len = 0; 1851 int sk_locked; 1852 1853 wait_for_unix_gc(); 1854 err = scm_send(sock, msg, &scm, false); 1855 if (err < 0) 1856 return err; 1857 1858 err = -EOPNOTSUPP; 1859 if (msg->msg_flags&MSG_OOB) 1860 goto out; 1861 1862 if (msg->msg_namelen) { 1863 err = unix_validate_addr(sunaddr, msg->msg_namelen); 1864 if (err) 1865 goto out; 1866 } else { 1867 sunaddr = NULL; 1868 err = -ENOTCONN; 1869 other = unix_peer_get(sk); 1870 if (!other) 1871 goto out; 1872 } 1873 1874 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) { 1875 err = unix_autobind(sk); 1876 if (err) 1877 goto out; 1878 } 1879 1880 err = -EMSGSIZE; 1881 if (len > sk->sk_sndbuf - 32) 1882 goto out; 1883 1884 if (len > SKB_MAX_ALLOC) { 1885 data_len = min_t(size_t, 1886 len - SKB_MAX_ALLOC, 1887 MAX_SKB_FRAGS * PAGE_SIZE); 1888 data_len = PAGE_ALIGN(data_len); 1889 1890 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE); 1891 } 1892 1893 skb = sock_alloc_send_pskb(sk, len - data_len, data_len, 1894 msg->msg_flags & MSG_DONTWAIT, &err, 1895 PAGE_ALLOC_COSTLY_ORDER); 1896 if (skb == NULL) 1897 goto out; 1898 1899 err = unix_scm_to_skb(&scm, skb, true); 1900 if (err < 0) 1901 goto out_free; 1902 1903 skb_put(skb, len - data_len); 1904 skb->data_len = data_len; 1905 skb->len = len; 1906 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len); 1907 if (err) 1908 goto out_free; 1909 1910 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); 1911 1912 restart: 1913 if (!other) { 1914 err = -ECONNRESET; 1915 if (sunaddr == NULL) 1916 goto out_free; 1917 1918 other = unix_find_other(net, sunaddr, msg->msg_namelen, 1919 sk->sk_type); 1920 if (IS_ERR(other)) { 1921 err = PTR_ERR(other); 1922 other = NULL; 1923 goto out_free; 1924 } 1925 } 1926 1927 if (sk_filter(other, skb) < 0) { 1928 /* Toss the packet but do not return any error to the sender */ 1929 err = len; 1930 goto out_free; 1931 } 1932 1933 sk_locked = 0; 1934 unix_state_lock(other); 1935 restart_locked: 1936 err = -EPERM; 1937 if (!unix_may_send(sk, other)) 1938 goto out_unlock; 1939 1940 if (unlikely(sock_flag(other, SOCK_DEAD))) { 1941 /* 1942 * Check with 1003.1g - what should 1943 * datagram error 1944 */ 1945 unix_state_unlock(other); 1946 sock_put(other); 1947 1948 if (!sk_locked) 1949 unix_state_lock(sk); 1950 1951 err = 0; 1952 if (unix_peer(sk) == other) { 1953 unix_peer(sk) = NULL; 1954 unix_dgram_peer_wake_disconnect_wakeup(sk, other); 1955 1956 unix_state_unlock(sk); 1957 1958 sk->sk_state = TCP_CLOSE; 1959 unix_dgram_disconnected(sk, other); 1960 sock_put(other); 1961 err = -ECONNREFUSED; 1962 } else { 1963 unix_state_unlock(sk); 1964 } 1965 1966 other = NULL; 1967 if (err) 1968 goto out_free; 1969 goto restart; 1970 } 1971 1972 err = -EPIPE; 1973 if (other->sk_shutdown & RCV_SHUTDOWN) 1974 goto out_unlock; 1975 1976 if (sk->sk_type != SOCK_SEQPACKET) { 1977 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 1978 if (err) 1979 goto out_unlock; 1980 } 1981 1982 /* other == sk && unix_peer(other) != sk if 1983 * - unix_peer(sk) == NULL, destination address bound to sk 1984 * - unix_peer(sk) == sk by time of get but disconnected before lock 1985 */ 1986 if (other != sk && 1987 unlikely(unix_peer(other) != sk && 1988 unix_recvq_full_lockless(other))) { 1989 if (timeo) { 1990 timeo = unix_wait_for_peer(other, timeo); 1991 1992 err = sock_intr_errno(timeo); 1993 if (signal_pending(current)) 1994 goto out_free; 1995 1996 goto restart; 1997 } 1998 1999 if (!sk_locked) { 2000 unix_state_unlock(other); 2001 unix_state_double_lock(sk, other); 2002 } 2003 2004 if (unix_peer(sk) != other || 2005 unix_dgram_peer_wake_me(sk, other)) { 2006 err = -EAGAIN; 2007 sk_locked = 1; 2008 goto out_unlock; 2009 } 2010 2011 if (!sk_locked) { 2012 sk_locked = 1; 2013 goto restart_locked; 2014 } 2015 } 2016 2017 if (unlikely(sk_locked)) 2018 unix_state_unlock(sk); 2019 2020 if (sock_flag(other, SOCK_RCVTSTAMP)) 2021 __net_timestamp(skb); 2022 maybe_add_creds(skb, sock, other); 2023 scm_stat_add(other, skb); 2024 skb_queue_tail(&other->sk_receive_queue, skb); 2025 unix_state_unlock(other); 2026 other->sk_data_ready(other); 2027 sock_put(other); 2028 scm_destroy(&scm); 2029 return len; 2030 2031 out_unlock: 2032 if (sk_locked) 2033 unix_state_unlock(sk); 2034 unix_state_unlock(other); 2035 out_free: 2036 kfree_skb(skb); 2037 out: 2038 if (other) 2039 sock_put(other); 2040 scm_destroy(&scm); 2041 return err; 2042 } 2043 2044 /* We use paged skbs for stream sockets, and limit occupancy to 32768 2045 * bytes, and a minimum of a full page. 2046 */ 2047 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) 2048 2049 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2050 static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other) 2051 { 2052 struct unix_sock *ousk = unix_sk(other); 2053 struct sk_buff *skb; 2054 int err = 0; 2055 2056 skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err); 2057 2058 if (!skb) 2059 return err; 2060 2061 skb_put(skb, 1); 2062 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1); 2063 2064 if (err) { 2065 kfree_skb(skb); 2066 return err; 2067 } 2068 2069 unix_state_lock(other); 2070 2071 if (sock_flag(other, SOCK_DEAD) || 2072 (other->sk_shutdown & RCV_SHUTDOWN)) { 2073 unix_state_unlock(other); 2074 kfree_skb(skb); 2075 return -EPIPE; 2076 } 2077 2078 maybe_add_creds(skb, sock, other); 2079 skb_get(skb); 2080 2081 if (ousk->oob_skb) 2082 consume_skb(ousk->oob_skb); 2083 2084 WRITE_ONCE(ousk->oob_skb, skb); 2085 2086 scm_stat_add(other, skb); 2087 skb_queue_tail(&other->sk_receive_queue, skb); 2088 sk_send_sigurg(other); 2089 unix_state_unlock(other); 2090 other->sk_data_ready(other); 2091 2092 return err; 2093 } 2094 #endif 2095 2096 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, 2097 size_t len) 2098 { 2099 struct sock *sk = sock->sk; 2100 struct sock *other = NULL; 2101 int err, size; 2102 struct sk_buff *skb; 2103 int sent = 0; 2104 struct scm_cookie scm; 2105 bool fds_sent = false; 2106 int data_len; 2107 2108 wait_for_unix_gc(); 2109 err = scm_send(sock, msg, &scm, false); 2110 if (err < 0) 2111 return err; 2112 2113 err = -EOPNOTSUPP; 2114 if (msg->msg_flags & MSG_OOB) { 2115 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2116 if (len) 2117 len--; 2118 else 2119 #endif 2120 goto out_err; 2121 } 2122 2123 if (msg->msg_namelen) { 2124 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; 2125 goto out_err; 2126 } else { 2127 err = -ENOTCONN; 2128 other = unix_peer(sk); 2129 if (!other) 2130 goto out_err; 2131 } 2132 2133 if (sk->sk_shutdown & SEND_SHUTDOWN) 2134 goto pipe_err; 2135 2136 while (sent < len) { 2137 size = len - sent; 2138 2139 /* Keep two messages in the pipe so it schedules better */ 2140 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64); 2141 2142 /* allow fallback to order-0 allocations */ 2143 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ); 2144 2145 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0)); 2146 2147 data_len = min_t(size_t, size, PAGE_ALIGN(data_len)); 2148 2149 skb = sock_alloc_send_pskb(sk, size - data_len, data_len, 2150 msg->msg_flags & MSG_DONTWAIT, &err, 2151 get_order(UNIX_SKB_FRAGS_SZ)); 2152 if (!skb) 2153 goto out_err; 2154 2155 /* Only send the fds in the first buffer */ 2156 err = unix_scm_to_skb(&scm, skb, !fds_sent); 2157 if (err < 0) { 2158 kfree_skb(skb); 2159 goto out_err; 2160 } 2161 fds_sent = true; 2162 2163 skb_put(skb, size - data_len); 2164 skb->data_len = data_len; 2165 skb->len = size; 2166 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); 2167 if (err) { 2168 kfree_skb(skb); 2169 goto out_err; 2170 } 2171 2172 unix_state_lock(other); 2173 2174 if (sock_flag(other, SOCK_DEAD) || 2175 (other->sk_shutdown & RCV_SHUTDOWN)) 2176 goto pipe_err_free; 2177 2178 maybe_add_creds(skb, sock, other); 2179 scm_stat_add(other, skb); 2180 skb_queue_tail(&other->sk_receive_queue, skb); 2181 unix_state_unlock(other); 2182 other->sk_data_ready(other); 2183 sent += size; 2184 } 2185 2186 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2187 if (msg->msg_flags & MSG_OOB) { 2188 err = queue_oob(sock, msg, other); 2189 if (err) 2190 goto out_err; 2191 sent++; 2192 } 2193 #endif 2194 2195 scm_destroy(&scm); 2196 2197 return sent; 2198 2199 pipe_err_free: 2200 unix_state_unlock(other); 2201 kfree_skb(skb); 2202 pipe_err: 2203 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL)) 2204 send_sig(SIGPIPE, current, 0); 2205 err = -EPIPE; 2206 out_err: 2207 scm_destroy(&scm); 2208 return sent ? : err; 2209 } 2210 2211 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, 2212 int offset, size_t size, int flags) 2213 { 2214 int err; 2215 bool send_sigpipe = false; 2216 bool init_scm = true; 2217 struct scm_cookie scm; 2218 struct sock *other, *sk = socket->sk; 2219 struct sk_buff *skb, *newskb = NULL, *tail = NULL; 2220 2221 if (flags & MSG_OOB) 2222 return -EOPNOTSUPP; 2223 2224 other = unix_peer(sk); 2225 if (!other || sk->sk_state != TCP_ESTABLISHED) 2226 return -ENOTCONN; 2227 2228 if (false) { 2229 alloc_skb: 2230 unix_state_unlock(other); 2231 mutex_unlock(&unix_sk(other)->iolock); 2232 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT, 2233 &err, 0); 2234 if (!newskb) 2235 goto err; 2236 } 2237 2238 /* we must acquire iolock as we modify already present 2239 * skbs in the sk_receive_queue and mess with skb->len 2240 */ 2241 err = mutex_lock_interruptible(&unix_sk(other)->iolock); 2242 if (err) { 2243 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS; 2244 goto err; 2245 } 2246 2247 if (sk->sk_shutdown & SEND_SHUTDOWN) { 2248 err = -EPIPE; 2249 send_sigpipe = true; 2250 goto err_unlock; 2251 } 2252 2253 unix_state_lock(other); 2254 2255 if (sock_flag(other, SOCK_DEAD) || 2256 other->sk_shutdown & RCV_SHUTDOWN) { 2257 err = -EPIPE; 2258 send_sigpipe = true; 2259 goto err_state_unlock; 2260 } 2261 2262 if (init_scm) { 2263 err = maybe_init_creds(&scm, socket, other); 2264 if (err) 2265 goto err_state_unlock; 2266 init_scm = false; 2267 } 2268 2269 skb = skb_peek_tail(&other->sk_receive_queue); 2270 if (tail && tail == skb) { 2271 skb = newskb; 2272 } else if (!skb || !unix_skb_scm_eq(skb, &scm)) { 2273 if (newskb) { 2274 skb = newskb; 2275 } else { 2276 tail = skb; 2277 goto alloc_skb; 2278 } 2279 } else if (newskb) { 2280 /* this is fast path, we don't necessarily need to 2281 * call to kfree_skb even though with newskb == NULL 2282 * this - does no harm 2283 */ 2284 consume_skb(newskb); 2285 newskb = NULL; 2286 } 2287 2288 if (skb_append_pagefrags(skb, page, offset, size)) { 2289 tail = skb; 2290 goto alloc_skb; 2291 } 2292 2293 skb->len += size; 2294 skb->data_len += size; 2295 skb->truesize += size; 2296 refcount_add(size, &sk->sk_wmem_alloc); 2297 2298 if (newskb) { 2299 err = unix_scm_to_skb(&scm, skb, false); 2300 if (err) 2301 goto err_state_unlock; 2302 spin_lock(&other->sk_receive_queue.lock); 2303 __skb_queue_tail(&other->sk_receive_queue, newskb); 2304 spin_unlock(&other->sk_receive_queue.lock); 2305 } 2306 2307 unix_state_unlock(other); 2308 mutex_unlock(&unix_sk(other)->iolock); 2309 2310 other->sk_data_ready(other); 2311 scm_destroy(&scm); 2312 return size; 2313 2314 err_state_unlock: 2315 unix_state_unlock(other); 2316 err_unlock: 2317 mutex_unlock(&unix_sk(other)->iolock); 2318 err: 2319 kfree_skb(newskb); 2320 if (send_sigpipe && !(flags & MSG_NOSIGNAL)) 2321 send_sig(SIGPIPE, current, 0); 2322 if (!init_scm) 2323 scm_destroy(&scm); 2324 return err; 2325 } 2326 2327 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg, 2328 size_t len) 2329 { 2330 int err; 2331 struct sock *sk = sock->sk; 2332 2333 err = sock_error(sk); 2334 if (err) 2335 return err; 2336 2337 if (sk->sk_state != TCP_ESTABLISHED) 2338 return -ENOTCONN; 2339 2340 if (msg->msg_namelen) 2341 msg->msg_namelen = 0; 2342 2343 return unix_dgram_sendmsg(sock, msg, len); 2344 } 2345 2346 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg, 2347 size_t size, int flags) 2348 { 2349 struct sock *sk = sock->sk; 2350 2351 if (sk->sk_state != TCP_ESTABLISHED) 2352 return -ENOTCONN; 2353 2354 return unix_dgram_recvmsg(sock, msg, size, flags); 2355 } 2356 2357 static void unix_copy_addr(struct msghdr *msg, struct sock *sk) 2358 { 2359 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr); 2360 2361 if (addr) { 2362 msg->msg_namelen = addr->len; 2363 memcpy(msg->msg_name, addr->name, addr->len); 2364 } 2365 } 2366 2367 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, 2368 int flags) 2369 { 2370 struct scm_cookie scm; 2371 struct socket *sock = sk->sk_socket; 2372 struct unix_sock *u = unix_sk(sk); 2373 struct sk_buff *skb, *last; 2374 long timeo; 2375 int skip; 2376 int err; 2377 2378 err = -EOPNOTSUPP; 2379 if (flags&MSG_OOB) 2380 goto out; 2381 2382 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 2383 2384 do { 2385 mutex_lock(&u->iolock); 2386 2387 skip = sk_peek_offset(sk, flags); 2388 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags, 2389 &skip, &err, &last); 2390 if (skb) { 2391 if (!(flags & MSG_PEEK)) 2392 scm_stat_del(sk, skb); 2393 break; 2394 } 2395 2396 mutex_unlock(&u->iolock); 2397 2398 if (err != -EAGAIN) 2399 break; 2400 } while (timeo && 2401 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue, 2402 &err, &timeo, last)); 2403 2404 if (!skb) { /* implies iolock unlocked */ 2405 unix_state_lock(sk); 2406 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */ 2407 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN && 2408 (sk->sk_shutdown & RCV_SHUTDOWN)) 2409 err = 0; 2410 unix_state_unlock(sk); 2411 goto out; 2412 } 2413 2414 if (wq_has_sleeper(&u->peer_wait)) 2415 wake_up_interruptible_sync_poll(&u->peer_wait, 2416 EPOLLOUT | EPOLLWRNORM | 2417 EPOLLWRBAND); 2418 2419 if (msg->msg_name) 2420 unix_copy_addr(msg, skb->sk); 2421 2422 if (size > skb->len - skip) 2423 size = skb->len - skip; 2424 else if (size < skb->len - skip) 2425 msg->msg_flags |= MSG_TRUNC; 2426 2427 err = skb_copy_datagram_msg(skb, skip, msg, size); 2428 if (err) 2429 goto out_free; 2430 2431 if (sock_flag(sk, SOCK_RCVTSTAMP)) 2432 __sock_recv_timestamp(msg, sk, skb); 2433 2434 memset(&scm, 0, sizeof(scm)); 2435 2436 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2437 unix_set_secdata(&scm, skb); 2438 2439 if (!(flags & MSG_PEEK)) { 2440 if (UNIXCB(skb).fp) 2441 unix_detach_fds(&scm, skb); 2442 2443 sk_peek_offset_bwd(sk, skb->len); 2444 } else { 2445 /* It is questionable: on PEEK we could: 2446 - do not return fds - good, but too simple 8) 2447 - return fds, and do not return them on read (old strategy, 2448 apparently wrong) 2449 - clone fds (I chose it for now, it is the most universal 2450 solution) 2451 2452 POSIX 1003.1g does not actually define this clearly 2453 at all. POSIX 1003.1g doesn't define a lot of things 2454 clearly however! 2455 2456 */ 2457 2458 sk_peek_offset_fwd(sk, size); 2459 2460 if (UNIXCB(skb).fp) 2461 unix_peek_fds(&scm, skb); 2462 } 2463 err = (flags & MSG_TRUNC) ? skb->len - skip : size; 2464 2465 scm_recv(sock, msg, &scm, flags); 2466 2467 out_free: 2468 skb_free_datagram(sk, skb); 2469 mutex_unlock(&u->iolock); 2470 out: 2471 return err; 2472 } 2473 2474 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, 2475 int flags) 2476 { 2477 struct sock *sk = sock->sk; 2478 2479 #ifdef CONFIG_BPF_SYSCALL 2480 const struct proto *prot = READ_ONCE(sk->sk_prot); 2481 2482 if (prot != &unix_dgram_proto) 2483 return prot->recvmsg(sk, msg, size, flags, NULL); 2484 #endif 2485 return __unix_dgram_recvmsg(sk, msg, size, flags); 2486 } 2487 2488 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor) 2489 { 2490 int copied = 0; 2491 2492 while (1) { 2493 struct unix_sock *u = unix_sk(sk); 2494 struct sk_buff *skb; 2495 int used, err; 2496 2497 mutex_lock(&u->iolock); 2498 skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err); 2499 mutex_unlock(&u->iolock); 2500 if (!skb) 2501 return err; 2502 2503 used = recv_actor(sk, skb); 2504 if (used <= 0) { 2505 if (!copied) 2506 copied = used; 2507 kfree_skb(skb); 2508 break; 2509 } else if (used <= skb->len) { 2510 copied += used; 2511 } 2512 2513 kfree_skb(skb); 2514 break; 2515 } 2516 2517 return copied; 2518 } 2519 2520 /* 2521 * Sleep until more data has arrived. But check for races.. 2522 */ 2523 static long unix_stream_data_wait(struct sock *sk, long timeo, 2524 struct sk_buff *last, unsigned int last_len, 2525 bool freezable) 2526 { 2527 struct sk_buff *tail; 2528 DEFINE_WAIT(wait); 2529 2530 unix_state_lock(sk); 2531 2532 for (;;) { 2533 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 2534 2535 tail = skb_peek_tail(&sk->sk_receive_queue); 2536 if (tail != last || 2537 (tail && tail->len != last_len) || 2538 sk->sk_err || 2539 (sk->sk_shutdown & RCV_SHUTDOWN) || 2540 signal_pending(current) || 2541 !timeo) 2542 break; 2543 2544 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2545 unix_state_unlock(sk); 2546 if (freezable) 2547 timeo = freezable_schedule_timeout(timeo); 2548 else 2549 timeo = schedule_timeout(timeo); 2550 unix_state_lock(sk); 2551 2552 if (sock_flag(sk, SOCK_DEAD)) 2553 break; 2554 2555 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2556 } 2557 2558 finish_wait(sk_sleep(sk), &wait); 2559 unix_state_unlock(sk); 2560 return timeo; 2561 } 2562 2563 static unsigned int unix_skb_len(const struct sk_buff *skb) 2564 { 2565 return skb->len - UNIXCB(skb).consumed; 2566 } 2567 2568 struct unix_stream_read_state { 2569 int (*recv_actor)(struct sk_buff *, int, int, 2570 struct unix_stream_read_state *); 2571 struct socket *socket; 2572 struct msghdr *msg; 2573 struct pipe_inode_info *pipe; 2574 size_t size; 2575 int flags; 2576 unsigned int splice_flags; 2577 }; 2578 2579 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2580 static int unix_stream_recv_urg(struct unix_stream_read_state *state) 2581 { 2582 struct socket *sock = state->socket; 2583 struct sock *sk = sock->sk; 2584 struct unix_sock *u = unix_sk(sk); 2585 int chunk = 1; 2586 struct sk_buff *oob_skb; 2587 2588 mutex_lock(&u->iolock); 2589 unix_state_lock(sk); 2590 2591 if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) { 2592 unix_state_unlock(sk); 2593 mutex_unlock(&u->iolock); 2594 return -EINVAL; 2595 } 2596 2597 oob_skb = u->oob_skb; 2598 2599 if (!(state->flags & MSG_PEEK)) 2600 WRITE_ONCE(u->oob_skb, NULL); 2601 2602 unix_state_unlock(sk); 2603 2604 chunk = state->recv_actor(oob_skb, 0, chunk, state); 2605 2606 if (!(state->flags & MSG_PEEK)) { 2607 UNIXCB(oob_skb).consumed += 1; 2608 kfree_skb(oob_skb); 2609 } 2610 2611 mutex_unlock(&u->iolock); 2612 2613 if (chunk < 0) 2614 return -EFAULT; 2615 2616 state->msg->msg_flags |= MSG_OOB; 2617 return 1; 2618 } 2619 2620 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, 2621 int flags, int copied) 2622 { 2623 struct unix_sock *u = unix_sk(sk); 2624 2625 if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) { 2626 skb_unlink(skb, &sk->sk_receive_queue); 2627 consume_skb(skb); 2628 skb = NULL; 2629 } else { 2630 if (skb == u->oob_skb) { 2631 if (copied) { 2632 skb = NULL; 2633 } else if (sock_flag(sk, SOCK_URGINLINE)) { 2634 if (!(flags & MSG_PEEK)) { 2635 WRITE_ONCE(u->oob_skb, NULL); 2636 consume_skb(skb); 2637 } 2638 } else if (!(flags & MSG_PEEK)) { 2639 skb_unlink(skb, &sk->sk_receive_queue); 2640 consume_skb(skb); 2641 skb = skb_peek(&sk->sk_receive_queue); 2642 } 2643 } 2644 } 2645 return skb; 2646 } 2647 #endif 2648 2649 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor) 2650 { 2651 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) 2652 return -ENOTCONN; 2653 2654 return unix_read_skb(sk, recv_actor); 2655 } 2656 2657 static int unix_stream_read_generic(struct unix_stream_read_state *state, 2658 bool freezable) 2659 { 2660 struct scm_cookie scm; 2661 struct socket *sock = state->socket; 2662 struct sock *sk = sock->sk; 2663 struct unix_sock *u = unix_sk(sk); 2664 int copied = 0; 2665 int flags = state->flags; 2666 int noblock = flags & MSG_DONTWAIT; 2667 bool check_creds = false; 2668 int target; 2669 int err = 0; 2670 long timeo; 2671 int skip; 2672 size_t size = state->size; 2673 unsigned int last_len; 2674 2675 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) { 2676 err = -EINVAL; 2677 goto out; 2678 } 2679 2680 if (unlikely(flags & MSG_OOB)) { 2681 err = -EOPNOTSUPP; 2682 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2683 err = unix_stream_recv_urg(state); 2684 #endif 2685 goto out; 2686 } 2687 2688 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); 2689 timeo = sock_rcvtimeo(sk, noblock); 2690 2691 memset(&scm, 0, sizeof(scm)); 2692 2693 /* Lock the socket to prevent queue disordering 2694 * while sleeps in memcpy_tomsg 2695 */ 2696 mutex_lock(&u->iolock); 2697 2698 skip = max(sk_peek_offset(sk, flags), 0); 2699 2700 do { 2701 int chunk; 2702 bool drop_skb; 2703 struct sk_buff *skb, *last; 2704 2705 redo: 2706 unix_state_lock(sk); 2707 if (sock_flag(sk, SOCK_DEAD)) { 2708 err = -ECONNRESET; 2709 goto unlock; 2710 } 2711 last = skb = skb_peek(&sk->sk_receive_queue); 2712 last_len = last ? last->len : 0; 2713 2714 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 2715 if (skb) { 2716 skb = manage_oob(skb, sk, flags, copied); 2717 if (!skb) { 2718 unix_state_unlock(sk); 2719 if (copied) 2720 break; 2721 goto redo; 2722 } 2723 } 2724 #endif 2725 again: 2726 if (skb == NULL) { 2727 if (copied >= target) 2728 goto unlock; 2729 2730 /* 2731 * POSIX 1003.1g mandates this order. 2732 */ 2733 2734 err = sock_error(sk); 2735 if (err) 2736 goto unlock; 2737 if (sk->sk_shutdown & RCV_SHUTDOWN) 2738 goto unlock; 2739 2740 unix_state_unlock(sk); 2741 if (!timeo) { 2742 err = -EAGAIN; 2743 break; 2744 } 2745 2746 mutex_unlock(&u->iolock); 2747 2748 timeo = unix_stream_data_wait(sk, timeo, last, 2749 last_len, freezable); 2750 2751 if (signal_pending(current)) { 2752 err = sock_intr_errno(timeo); 2753 scm_destroy(&scm); 2754 goto out; 2755 } 2756 2757 mutex_lock(&u->iolock); 2758 goto redo; 2759 unlock: 2760 unix_state_unlock(sk); 2761 break; 2762 } 2763 2764 while (skip >= unix_skb_len(skb)) { 2765 skip -= unix_skb_len(skb); 2766 last = skb; 2767 last_len = skb->len; 2768 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2769 if (!skb) 2770 goto again; 2771 } 2772 2773 unix_state_unlock(sk); 2774 2775 if (check_creds) { 2776 /* Never glue messages from different writers */ 2777 if (!unix_skb_scm_eq(skb, &scm)) 2778 break; 2779 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { 2780 /* Copy credentials */ 2781 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2782 unix_set_secdata(&scm, skb); 2783 check_creds = true; 2784 } 2785 2786 /* Copy address just once */ 2787 if (state->msg && state->msg->msg_name) { 2788 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, 2789 state->msg->msg_name); 2790 unix_copy_addr(state->msg, skb->sk); 2791 sunaddr = NULL; 2792 } 2793 2794 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size); 2795 skb_get(skb); 2796 chunk = state->recv_actor(skb, skip, chunk, state); 2797 drop_skb = !unix_skb_len(skb); 2798 /* skb is only safe to use if !drop_skb */ 2799 consume_skb(skb); 2800 if (chunk < 0) { 2801 if (copied == 0) 2802 copied = -EFAULT; 2803 break; 2804 } 2805 copied += chunk; 2806 size -= chunk; 2807 2808 if (drop_skb) { 2809 /* the skb was touched by a concurrent reader; 2810 * we should not expect anything from this skb 2811 * anymore and assume it invalid - we can be 2812 * sure it was dropped from the socket queue 2813 * 2814 * let's report a short read 2815 */ 2816 err = 0; 2817 break; 2818 } 2819 2820 /* Mark read part of skb as used */ 2821 if (!(flags & MSG_PEEK)) { 2822 UNIXCB(skb).consumed += chunk; 2823 2824 sk_peek_offset_bwd(sk, chunk); 2825 2826 if (UNIXCB(skb).fp) { 2827 scm_stat_del(sk, skb); 2828 unix_detach_fds(&scm, skb); 2829 } 2830 2831 if (unix_skb_len(skb)) 2832 break; 2833 2834 skb_unlink(skb, &sk->sk_receive_queue); 2835 consume_skb(skb); 2836 2837 if (scm.fp) 2838 break; 2839 } else { 2840 /* It is questionable, see note in unix_dgram_recvmsg. 2841 */ 2842 if (UNIXCB(skb).fp) 2843 unix_peek_fds(&scm, skb); 2844 2845 sk_peek_offset_fwd(sk, chunk); 2846 2847 if (UNIXCB(skb).fp) 2848 break; 2849 2850 skip = 0; 2851 last = skb; 2852 last_len = skb->len; 2853 unix_state_lock(sk); 2854 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2855 if (skb) 2856 goto again; 2857 unix_state_unlock(sk); 2858 break; 2859 } 2860 } while (size); 2861 2862 mutex_unlock(&u->iolock); 2863 if (state->msg) 2864 scm_recv(sock, state->msg, &scm, flags); 2865 else 2866 scm_destroy(&scm); 2867 out: 2868 return copied ? : err; 2869 } 2870 2871 static int unix_stream_read_actor(struct sk_buff *skb, 2872 int skip, int chunk, 2873 struct unix_stream_read_state *state) 2874 { 2875 int ret; 2876 2877 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip, 2878 state->msg, chunk); 2879 return ret ?: chunk; 2880 } 2881 2882 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg, 2883 size_t size, int flags) 2884 { 2885 struct unix_stream_read_state state = { 2886 .recv_actor = unix_stream_read_actor, 2887 .socket = sk->sk_socket, 2888 .msg = msg, 2889 .size = size, 2890 .flags = flags 2891 }; 2892 2893 return unix_stream_read_generic(&state, true); 2894 } 2895 2896 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, 2897 size_t size, int flags) 2898 { 2899 struct unix_stream_read_state state = { 2900 .recv_actor = unix_stream_read_actor, 2901 .socket = sock, 2902 .msg = msg, 2903 .size = size, 2904 .flags = flags 2905 }; 2906 2907 #ifdef CONFIG_BPF_SYSCALL 2908 struct sock *sk = sock->sk; 2909 const struct proto *prot = READ_ONCE(sk->sk_prot); 2910 2911 if (prot != &unix_stream_proto) 2912 return prot->recvmsg(sk, msg, size, flags, NULL); 2913 #endif 2914 return unix_stream_read_generic(&state, true); 2915 } 2916 2917 static int unix_stream_splice_actor(struct sk_buff *skb, 2918 int skip, int chunk, 2919 struct unix_stream_read_state *state) 2920 { 2921 return skb_splice_bits(skb, state->socket->sk, 2922 UNIXCB(skb).consumed + skip, 2923 state->pipe, chunk, state->splice_flags); 2924 } 2925 2926 static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos, 2927 struct pipe_inode_info *pipe, 2928 size_t size, unsigned int flags) 2929 { 2930 struct unix_stream_read_state state = { 2931 .recv_actor = unix_stream_splice_actor, 2932 .socket = sock, 2933 .pipe = pipe, 2934 .size = size, 2935 .splice_flags = flags, 2936 }; 2937 2938 if (unlikely(*ppos)) 2939 return -ESPIPE; 2940 2941 if (sock->file->f_flags & O_NONBLOCK || 2942 flags & SPLICE_F_NONBLOCK) 2943 state.flags = MSG_DONTWAIT; 2944 2945 return unix_stream_read_generic(&state, false); 2946 } 2947 2948 static int unix_shutdown(struct socket *sock, int mode) 2949 { 2950 struct sock *sk = sock->sk; 2951 struct sock *other; 2952 2953 if (mode < SHUT_RD || mode > SHUT_RDWR) 2954 return -EINVAL; 2955 /* This maps: 2956 * SHUT_RD (0) -> RCV_SHUTDOWN (1) 2957 * SHUT_WR (1) -> SEND_SHUTDOWN (2) 2958 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3) 2959 */ 2960 ++mode; 2961 2962 unix_state_lock(sk); 2963 sk->sk_shutdown |= mode; 2964 other = unix_peer(sk); 2965 if (other) 2966 sock_hold(other); 2967 unix_state_unlock(sk); 2968 sk->sk_state_change(sk); 2969 2970 if (other && 2971 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) { 2972 2973 int peer_mode = 0; 2974 const struct proto *prot = READ_ONCE(other->sk_prot); 2975 2976 if (prot->unhash) 2977 prot->unhash(other); 2978 if (mode&RCV_SHUTDOWN) 2979 peer_mode |= SEND_SHUTDOWN; 2980 if (mode&SEND_SHUTDOWN) 2981 peer_mode |= RCV_SHUTDOWN; 2982 unix_state_lock(other); 2983 other->sk_shutdown |= peer_mode; 2984 unix_state_unlock(other); 2985 other->sk_state_change(other); 2986 if (peer_mode == SHUTDOWN_MASK) 2987 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); 2988 else if (peer_mode & RCV_SHUTDOWN) 2989 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); 2990 } 2991 if (other) 2992 sock_put(other); 2993 2994 return 0; 2995 } 2996 2997 long unix_inq_len(struct sock *sk) 2998 { 2999 struct sk_buff *skb; 3000 long amount = 0; 3001 3002 if (sk->sk_state == TCP_LISTEN) 3003 return -EINVAL; 3004 3005 spin_lock(&sk->sk_receive_queue.lock); 3006 if (sk->sk_type == SOCK_STREAM || 3007 sk->sk_type == SOCK_SEQPACKET) { 3008 skb_queue_walk(&sk->sk_receive_queue, skb) 3009 amount += unix_skb_len(skb); 3010 } else { 3011 skb = skb_peek(&sk->sk_receive_queue); 3012 if (skb) 3013 amount = skb->len; 3014 } 3015 spin_unlock(&sk->sk_receive_queue.lock); 3016 3017 return amount; 3018 } 3019 EXPORT_SYMBOL_GPL(unix_inq_len); 3020 3021 long unix_outq_len(struct sock *sk) 3022 { 3023 return sk_wmem_alloc_get(sk); 3024 } 3025 EXPORT_SYMBOL_GPL(unix_outq_len); 3026 3027 static int unix_open_file(struct sock *sk) 3028 { 3029 struct path path; 3030 struct file *f; 3031 int fd; 3032 3033 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 3034 return -EPERM; 3035 3036 if (!smp_load_acquire(&unix_sk(sk)->addr)) 3037 return -ENOENT; 3038 3039 path = unix_sk(sk)->path; 3040 if (!path.dentry) 3041 return -ENOENT; 3042 3043 path_get(&path); 3044 3045 fd = get_unused_fd_flags(O_CLOEXEC); 3046 if (fd < 0) 3047 goto out; 3048 3049 f = dentry_open(&path, O_PATH, current_cred()); 3050 if (IS_ERR(f)) { 3051 put_unused_fd(fd); 3052 fd = PTR_ERR(f); 3053 goto out; 3054 } 3055 3056 fd_install(fd, f); 3057 out: 3058 path_put(&path); 3059 3060 return fd; 3061 } 3062 3063 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 3064 { 3065 struct sock *sk = sock->sk; 3066 long amount = 0; 3067 int err; 3068 3069 switch (cmd) { 3070 case SIOCOUTQ: 3071 amount = unix_outq_len(sk); 3072 err = put_user(amount, (int __user *)arg); 3073 break; 3074 case SIOCINQ: 3075 amount = unix_inq_len(sk); 3076 if (amount < 0) 3077 err = amount; 3078 else 3079 err = put_user(amount, (int __user *)arg); 3080 break; 3081 case SIOCUNIXFILE: 3082 err = unix_open_file(sk); 3083 break; 3084 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 3085 case SIOCATMARK: 3086 { 3087 struct sk_buff *skb; 3088 int answ = 0; 3089 3090 skb = skb_peek(&sk->sk_receive_queue); 3091 if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb)) 3092 answ = 1; 3093 err = put_user(answ, (int __user *)arg); 3094 } 3095 break; 3096 #endif 3097 default: 3098 err = -ENOIOCTLCMD; 3099 break; 3100 } 3101 return err; 3102 } 3103 3104 #ifdef CONFIG_COMPAT 3105 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 3106 { 3107 return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg)); 3108 } 3109 #endif 3110 3111 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait) 3112 { 3113 struct sock *sk = sock->sk; 3114 __poll_t mask; 3115 3116 sock_poll_wait(file, sock, wait); 3117 mask = 0; 3118 3119 /* exceptional events? */ 3120 if (sk->sk_err) 3121 mask |= EPOLLERR; 3122 if (sk->sk_shutdown == SHUTDOWN_MASK) 3123 mask |= EPOLLHUP; 3124 if (sk->sk_shutdown & RCV_SHUTDOWN) 3125 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 3126 3127 /* readable? */ 3128 if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) 3129 mask |= EPOLLIN | EPOLLRDNORM; 3130 if (sk_is_readable(sk)) 3131 mask |= EPOLLIN | EPOLLRDNORM; 3132 #if IS_ENABLED(CONFIG_AF_UNIX_OOB) 3133 if (READ_ONCE(unix_sk(sk)->oob_skb)) 3134 mask |= EPOLLPRI; 3135 #endif 3136 3137 /* Connection-based need to check for termination and startup */ 3138 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && 3139 sk->sk_state == TCP_CLOSE) 3140 mask |= EPOLLHUP; 3141 3142 /* 3143 * we set writable also when the other side has shut down the 3144 * connection. This prevents stuck sockets. 3145 */ 3146 if (unix_writable(sk)) 3147 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 3148 3149 return mask; 3150 } 3151 3152 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, 3153 poll_table *wait) 3154 { 3155 struct sock *sk = sock->sk, *other; 3156 unsigned int writable; 3157 __poll_t mask; 3158 3159 sock_poll_wait(file, sock, wait); 3160 mask = 0; 3161 3162 /* exceptional events? */ 3163 if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) 3164 mask |= EPOLLERR | 3165 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); 3166 3167 if (sk->sk_shutdown & RCV_SHUTDOWN) 3168 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 3169 if (sk->sk_shutdown == SHUTDOWN_MASK) 3170 mask |= EPOLLHUP; 3171 3172 /* readable? */ 3173 if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) 3174 mask |= EPOLLIN | EPOLLRDNORM; 3175 if (sk_is_readable(sk)) 3176 mask |= EPOLLIN | EPOLLRDNORM; 3177 3178 /* Connection-based need to check for termination and startup */ 3179 if (sk->sk_type == SOCK_SEQPACKET) { 3180 if (sk->sk_state == TCP_CLOSE) 3181 mask |= EPOLLHUP; 3182 /* connection hasn't started yet? */ 3183 if (sk->sk_state == TCP_SYN_SENT) 3184 return mask; 3185 } 3186 3187 /* No write status requested, avoid expensive OUT tests. */ 3188 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT))) 3189 return mask; 3190 3191 writable = unix_writable(sk); 3192 if (writable) { 3193 unix_state_lock(sk); 3194 3195 other = unix_peer(sk); 3196 if (other && unix_peer(other) != sk && 3197 unix_recvq_full_lockless(other) && 3198 unix_dgram_peer_wake_me(sk, other)) 3199 writable = 0; 3200 3201 unix_state_unlock(sk); 3202 } 3203 3204 if (writable) 3205 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 3206 else 3207 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 3208 3209 return mask; 3210 } 3211 3212 #ifdef CONFIG_PROC_FS 3213 3214 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1) 3215 3216 #define get_bucket(x) ((x) >> BUCKET_SPACE) 3217 #define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1)) 3218 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) 3219 3220 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) 3221 { 3222 unsigned long offset = get_offset(*pos); 3223 unsigned long bucket = get_bucket(*pos); 3224 struct sock *sk; 3225 unsigned long count = 0; 3226 3227 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) { 3228 if (sock_net(sk) != seq_file_net(seq)) 3229 continue; 3230 if (++count == offset) 3231 break; 3232 } 3233 3234 return sk; 3235 } 3236 3237 static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos) 3238 { 3239 unsigned long bucket = get_bucket(*pos); 3240 struct sock *sk; 3241 3242 while (bucket < ARRAY_SIZE(unix_socket_table)) { 3243 spin_lock(&unix_table_locks[bucket]); 3244 3245 sk = unix_from_bucket(seq, pos); 3246 if (sk) 3247 return sk; 3248 3249 spin_unlock(&unix_table_locks[bucket]); 3250 3251 *pos = set_bucket_offset(++bucket, 1); 3252 } 3253 3254 return NULL; 3255 } 3256 3257 static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk, 3258 loff_t *pos) 3259 { 3260 unsigned long bucket = get_bucket(*pos); 3261 3262 for (sk = sk_next(sk); sk; sk = sk_next(sk)) 3263 if (sock_net(sk) == seq_file_net(seq)) 3264 return sk; 3265 3266 spin_unlock(&unix_table_locks[bucket]); 3267 3268 *pos = set_bucket_offset(++bucket, 1); 3269 3270 return unix_get_first(seq, pos); 3271 } 3272 3273 static void *unix_seq_start(struct seq_file *seq, loff_t *pos) 3274 { 3275 if (!*pos) 3276 return SEQ_START_TOKEN; 3277 3278 return unix_get_first(seq, pos); 3279 } 3280 3281 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3282 { 3283 ++*pos; 3284 3285 if (v == SEQ_START_TOKEN) 3286 return unix_get_first(seq, pos); 3287 3288 return unix_get_next(seq, v, pos); 3289 } 3290 3291 static void unix_seq_stop(struct seq_file *seq, void *v) 3292 { 3293 struct sock *sk = v; 3294 3295 if (sk) 3296 spin_unlock(&unix_table_locks[sk->sk_hash]); 3297 } 3298 3299 static int unix_seq_show(struct seq_file *seq, void *v) 3300 { 3301 3302 if (v == SEQ_START_TOKEN) 3303 seq_puts(seq, "Num RefCount Protocol Flags Type St " 3304 "Inode Path\n"); 3305 else { 3306 struct sock *s = v; 3307 struct unix_sock *u = unix_sk(s); 3308 unix_state_lock(s); 3309 3310 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu", 3311 s, 3312 refcount_read(&s->sk_refcnt), 3313 0, 3314 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0, 3315 s->sk_type, 3316 s->sk_socket ? 3317 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) : 3318 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING), 3319 sock_i_ino(s)); 3320 3321 if (u->addr) { // under unix_table_locks here 3322 int i, len; 3323 seq_putc(seq, ' '); 3324 3325 i = 0; 3326 len = u->addr->len - 3327 offsetof(struct sockaddr_un, sun_path); 3328 if (u->addr->name->sun_path[0]) { 3329 len--; 3330 } else { 3331 seq_putc(seq, '@'); 3332 i++; 3333 } 3334 for ( ; i < len; i++) 3335 seq_putc(seq, u->addr->name->sun_path[i] ?: 3336 '@'); 3337 } 3338 unix_state_unlock(s); 3339 seq_putc(seq, '\n'); 3340 } 3341 3342 return 0; 3343 } 3344 3345 static const struct seq_operations unix_seq_ops = { 3346 .start = unix_seq_start, 3347 .next = unix_seq_next, 3348 .stop = unix_seq_stop, 3349 .show = unix_seq_show, 3350 }; 3351 3352 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) 3353 struct bpf_unix_iter_state { 3354 struct seq_net_private p; 3355 unsigned int cur_sk; 3356 unsigned int end_sk; 3357 unsigned int max_sk; 3358 struct sock **batch; 3359 bool st_bucket_done; 3360 }; 3361 3362 struct bpf_iter__unix { 3363 __bpf_md_ptr(struct bpf_iter_meta *, meta); 3364 __bpf_md_ptr(struct unix_sock *, unix_sk); 3365 uid_t uid __aligned(8); 3366 }; 3367 3368 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, 3369 struct unix_sock *unix_sk, uid_t uid) 3370 { 3371 struct bpf_iter__unix ctx; 3372 3373 meta->seq_num--; /* skip SEQ_START_TOKEN */ 3374 ctx.meta = meta; 3375 ctx.unix_sk = unix_sk; 3376 ctx.uid = uid; 3377 return bpf_iter_run_prog(prog, &ctx); 3378 } 3379 3380 static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk) 3381 3382 { 3383 struct bpf_unix_iter_state *iter = seq->private; 3384 unsigned int expected = 1; 3385 struct sock *sk; 3386 3387 sock_hold(start_sk); 3388 iter->batch[iter->end_sk++] = start_sk; 3389 3390 for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) { 3391 if (sock_net(sk) != seq_file_net(seq)) 3392 continue; 3393 3394 if (iter->end_sk < iter->max_sk) { 3395 sock_hold(sk); 3396 iter->batch[iter->end_sk++] = sk; 3397 } 3398 3399 expected++; 3400 } 3401 3402 spin_unlock(&unix_table_locks[start_sk->sk_hash]); 3403 3404 return expected; 3405 } 3406 3407 static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter) 3408 { 3409 while (iter->cur_sk < iter->end_sk) 3410 sock_put(iter->batch[iter->cur_sk++]); 3411 } 3412 3413 static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter, 3414 unsigned int new_batch_sz) 3415 { 3416 struct sock **new_batch; 3417 3418 new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz, 3419 GFP_USER | __GFP_NOWARN); 3420 if (!new_batch) 3421 return -ENOMEM; 3422 3423 bpf_iter_unix_put_batch(iter); 3424 kvfree(iter->batch); 3425 iter->batch = new_batch; 3426 iter->max_sk = new_batch_sz; 3427 3428 return 0; 3429 } 3430 3431 static struct sock *bpf_iter_unix_batch(struct seq_file *seq, 3432 loff_t *pos) 3433 { 3434 struct bpf_unix_iter_state *iter = seq->private; 3435 unsigned int expected; 3436 bool resized = false; 3437 struct sock *sk; 3438 3439 if (iter->st_bucket_done) 3440 *pos = set_bucket_offset(get_bucket(*pos) + 1, 1); 3441 3442 again: 3443 /* Get a new batch */ 3444 iter->cur_sk = 0; 3445 iter->end_sk = 0; 3446 3447 sk = unix_get_first(seq, pos); 3448 if (!sk) 3449 return NULL; /* Done */ 3450 3451 expected = bpf_iter_unix_hold_batch(seq, sk); 3452 3453 if (iter->end_sk == expected) { 3454 iter->st_bucket_done = true; 3455 return sk; 3456 } 3457 3458 if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) { 3459 resized = true; 3460 goto again; 3461 } 3462 3463 return sk; 3464 } 3465 3466 static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos) 3467 { 3468 if (!*pos) 3469 return SEQ_START_TOKEN; 3470 3471 /* bpf iter does not support lseek, so it always 3472 * continue from where it was stop()-ped. 3473 */ 3474 return bpf_iter_unix_batch(seq, pos); 3475 } 3476 3477 static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3478 { 3479 struct bpf_unix_iter_state *iter = seq->private; 3480 struct sock *sk; 3481 3482 /* Whenever seq_next() is called, the iter->cur_sk is 3483 * done with seq_show(), so advance to the next sk in 3484 * the batch. 3485 */ 3486 if (iter->cur_sk < iter->end_sk) 3487 sock_put(iter->batch[iter->cur_sk++]); 3488 3489 ++*pos; 3490 3491 if (iter->cur_sk < iter->end_sk) 3492 sk = iter->batch[iter->cur_sk]; 3493 else 3494 sk = bpf_iter_unix_batch(seq, pos); 3495 3496 return sk; 3497 } 3498 3499 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v) 3500 { 3501 struct bpf_iter_meta meta; 3502 struct bpf_prog *prog; 3503 struct sock *sk = v; 3504 uid_t uid; 3505 bool slow; 3506 int ret; 3507 3508 if (v == SEQ_START_TOKEN) 3509 return 0; 3510 3511 slow = lock_sock_fast(sk); 3512 3513 if (unlikely(sk_unhashed(sk))) { 3514 ret = SEQ_SKIP; 3515 goto unlock; 3516 } 3517 3518 uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)); 3519 meta.seq = seq; 3520 prog = bpf_iter_get_info(&meta, false); 3521 ret = unix_prog_seq_show(prog, &meta, v, uid); 3522 unlock: 3523 unlock_sock_fast(sk, slow); 3524 return ret; 3525 } 3526 3527 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v) 3528 { 3529 struct bpf_unix_iter_state *iter = seq->private; 3530 struct bpf_iter_meta meta; 3531 struct bpf_prog *prog; 3532 3533 if (!v) { 3534 meta.seq = seq; 3535 prog = bpf_iter_get_info(&meta, true); 3536 if (prog) 3537 (void)unix_prog_seq_show(prog, &meta, v, 0); 3538 } 3539 3540 if (iter->cur_sk < iter->end_sk) 3541 bpf_iter_unix_put_batch(iter); 3542 } 3543 3544 static const struct seq_operations bpf_iter_unix_seq_ops = { 3545 .start = bpf_iter_unix_seq_start, 3546 .next = bpf_iter_unix_seq_next, 3547 .stop = bpf_iter_unix_seq_stop, 3548 .show = bpf_iter_unix_seq_show, 3549 }; 3550 #endif 3551 #endif 3552 3553 static const struct net_proto_family unix_family_ops = { 3554 .family = PF_UNIX, 3555 .create = unix_create, 3556 .owner = THIS_MODULE, 3557 }; 3558 3559 3560 static int __net_init unix_net_init(struct net *net) 3561 { 3562 int error = -ENOMEM; 3563 3564 net->unx.sysctl_max_dgram_qlen = 10; 3565 if (unix_sysctl_register(net)) 3566 goto out; 3567 3568 #ifdef CONFIG_PROC_FS 3569 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops, 3570 sizeof(struct seq_net_private))) { 3571 unix_sysctl_unregister(net); 3572 goto out; 3573 } 3574 #endif 3575 error = 0; 3576 out: 3577 return error; 3578 } 3579 3580 static void __net_exit unix_net_exit(struct net *net) 3581 { 3582 unix_sysctl_unregister(net); 3583 remove_proc_entry("unix", net->proc_net); 3584 } 3585 3586 static struct pernet_operations unix_net_ops = { 3587 .init = unix_net_init, 3588 .exit = unix_net_exit, 3589 }; 3590 3591 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 3592 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta, 3593 struct unix_sock *unix_sk, uid_t uid) 3594 3595 #define INIT_BATCH_SZ 16 3596 3597 static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux) 3598 { 3599 struct bpf_unix_iter_state *iter = priv_data; 3600 int err; 3601 3602 err = bpf_iter_init_seq_net(priv_data, aux); 3603 if (err) 3604 return err; 3605 3606 err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ); 3607 if (err) { 3608 bpf_iter_fini_seq_net(priv_data); 3609 return err; 3610 } 3611 3612 return 0; 3613 } 3614 3615 static void bpf_iter_fini_unix(void *priv_data) 3616 { 3617 struct bpf_unix_iter_state *iter = priv_data; 3618 3619 bpf_iter_fini_seq_net(priv_data); 3620 kvfree(iter->batch); 3621 } 3622 3623 static const struct bpf_iter_seq_info unix_seq_info = { 3624 .seq_ops = &bpf_iter_unix_seq_ops, 3625 .init_seq_private = bpf_iter_init_unix, 3626 .fini_seq_private = bpf_iter_fini_unix, 3627 .seq_priv_size = sizeof(struct bpf_unix_iter_state), 3628 }; 3629 3630 static const struct bpf_func_proto * 3631 bpf_iter_unix_get_func_proto(enum bpf_func_id func_id, 3632 const struct bpf_prog *prog) 3633 { 3634 switch (func_id) { 3635 case BPF_FUNC_setsockopt: 3636 return &bpf_sk_setsockopt_proto; 3637 case BPF_FUNC_getsockopt: 3638 return &bpf_sk_getsockopt_proto; 3639 default: 3640 return NULL; 3641 } 3642 } 3643 3644 static struct bpf_iter_reg unix_reg_info = { 3645 .target = "unix", 3646 .ctx_arg_info_size = 1, 3647 .ctx_arg_info = { 3648 { offsetof(struct bpf_iter__unix, unix_sk), 3649 PTR_TO_BTF_ID_OR_NULL }, 3650 }, 3651 .get_func_proto = bpf_iter_unix_get_func_proto, 3652 .seq_info = &unix_seq_info, 3653 }; 3654 3655 static void __init bpf_iter_register(void) 3656 { 3657 unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX]; 3658 if (bpf_iter_reg_target(&unix_reg_info)) 3659 pr_warn("Warning: could not register bpf iterator unix\n"); 3660 } 3661 #endif 3662 3663 static int __init af_unix_init(void) 3664 { 3665 int i, rc = -1; 3666 3667 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb)); 3668 3669 for (i = 0; i < 2 * UNIX_HASH_SIZE; i++) 3670 spin_lock_init(&unix_table_locks[i]); 3671 3672 rc = proto_register(&unix_dgram_proto, 1); 3673 if (rc != 0) { 3674 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); 3675 goto out; 3676 } 3677 3678 rc = proto_register(&unix_stream_proto, 1); 3679 if (rc != 0) { 3680 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); 3681 goto out; 3682 } 3683 3684 sock_register(&unix_family_ops); 3685 register_pernet_subsys(&unix_net_ops); 3686 unix_bpf_build_proto(); 3687 3688 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) 3689 bpf_iter_register(); 3690 #endif 3691 3692 out: 3693 return rc; 3694 } 3695 3696 static void __exit af_unix_exit(void) 3697 { 3698 sock_unregister(PF_UNIX); 3699 proto_unregister(&unix_dgram_proto); 3700 proto_unregister(&unix_stream_proto); 3701 unregister_pernet_subsys(&unix_net_ops); 3702 } 3703 3704 /* Earlier than device_initcall() so that other drivers invoking 3705 request_module() don't end up in a loop when modprobe tries 3706 to use a UNIX socket. But later than subsys_initcall() because 3707 we depend on stuff initialised there */ 3708 fs_initcall(af_unix_init); 3709 module_exit(af_unix_exit); 3710 3711 MODULE_LICENSE("GPL"); 3712 MODULE_ALIAS_NETPROTO(PF_UNIX); 3713