1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * NET4: Implementation of BSD Unix domain sockets. 4 * 5 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk> 6 * 7 * Fixes: 8 * Linus Torvalds : Assorted bug cures. 9 * Niibe Yutaka : async I/O support. 10 * Carsten Paeth : PF_UNIX check, address fixes. 11 * Alan Cox : Limit size of allocated blocks. 12 * Alan Cox : Fixed the stupid socketpair bug. 13 * Alan Cox : BSD compatibility fine tuning. 14 * Alan Cox : Fixed a bug in connect when interrupted. 15 * Alan Cox : Sorted out a proper draft version of 16 * file descriptor passing hacked up from 17 * Mike Shaver's work. 18 * Marty Leisner : Fixes to fd passing 19 * Nick Nevin : recvmsg bugfix. 20 * Alan Cox : Started proper garbage collector 21 * Heiko EiBfeldt : Missing verify_area check 22 * Alan Cox : Started POSIXisms 23 * Andreas Schwab : Replace inode by dentry for proper 24 * reference counting 25 * Kirk Petersen : Made this a module 26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm. 27 * Lots of bug fixes. 28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces 29 * by above two patches. 30 * Andrea Arcangeli : If possible we block in connect(2) 31 * if the max backlog of the listen socket 32 * is been reached. This won't break 33 * old apps and it will avoid huge amount 34 * of socks hashed (this for unix_gc() 35 * performances reasons). 36 * Security fix that limits the max 37 * number of socks to 2*max_files and 38 * the number of skb queueable in the 39 * dgram receiver. 40 * Artur Skawina : Hash function optimizations 41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8) 42 * Malcolm Beattie : Set peercred for socketpair 43 * Michal Ostrowski : Module initialization cleanup. 44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT, 45 * the core infrastructure is doing that 46 * for all net proto families now (2.5.69+) 47 * 48 * Known differences from reference BSD that was tested: 49 * 50 * [TO FIX] 51 * ECONNREFUSED is not returned from one end of a connected() socket to the 52 * other the moment one end closes. 53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark 54 * and a fake inode identifier (nor the BSD first socket fstat twice bug). 55 * [NOT TO FIX] 56 * accept() returns a path name even if the connecting socket has closed 57 * in the meantime (BSD loses the path and gives up). 58 * accept() returns 0 length path for an unbound connector. BSD returns 16 59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??) 60 * socketpair(...SOCK_RAW..) doesn't panic the kernel. 61 * BSD af_unix apparently has connect forgetting to block properly. 62 * (need to check this with the POSIX spec in detail) 63 * 64 * Differences from 2.0.0-11-... (ANK) 65 * Bug fixes and improvements. 66 * - client shutdown killed server socket. 67 * - removed all useless cli/sti pairs. 68 * 69 * Semantic changes/extensions. 70 * - generic control message passing. 71 * - SCM_CREDENTIALS control message. 72 * - "Abstract" (not FS based) socket bindings. 73 * Abstract names are sequences of bytes (not zero terminated) 74 * started by 0, so that this name space does not intersect 75 * with BSD names. 76 */ 77 78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 79 80 #include <linux/module.h> 81 #include <linux/kernel.h> 82 #include <linux/signal.h> 83 #include <linux/sched/signal.h> 84 #include <linux/errno.h> 85 #include <linux/string.h> 86 #include <linux/stat.h> 87 #include <linux/dcache.h> 88 #include <linux/namei.h> 89 #include <linux/socket.h> 90 #include <linux/un.h> 91 #include <linux/fcntl.h> 92 #include <linux/termios.h> 93 #include <linux/sockios.h> 94 #include <linux/net.h> 95 #include <linux/in.h> 96 #include <linux/fs.h> 97 #include <linux/slab.h> 98 #include <linux/uaccess.h> 99 #include <linux/skbuff.h> 100 #include <linux/netdevice.h> 101 #include <net/net_namespace.h> 102 #include <net/sock.h> 103 #include <net/tcp_states.h> 104 #include <net/af_unix.h> 105 #include <linux/proc_fs.h> 106 #include <linux/seq_file.h> 107 #include <net/scm.h> 108 #include <linux/init.h> 109 #include <linux/poll.h> 110 #include <linux/rtnetlink.h> 111 #include <linux/mount.h> 112 #include <net/checksum.h> 113 #include <linux/security.h> 114 #include <linux/freezer.h> 115 #include <linux/file.h> 116 117 #include "scm.h" 118 119 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; 120 EXPORT_SYMBOL_GPL(unix_socket_table); 121 DEFINE_SPINLOCK(unix_table_lock); 122 EXPORT_SYMBOL_GPL(unix_table_lock); 123 static atomic_long_t unix_nr_socks; 124 125 126 static struct hlist_head *unix_sockets_unbound(void *addr) 127 { 128 unsigned long hash = (unsigned long)addr; 129 130 hash ^= hash >> 16; 131 hash ^= hash >> 8; 132 hash %= UNIX_HASH_SIZE; 133 return &unix_socket_table[UNIX_HASH_SIZE + hash]; 134 } 135 136 #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE) 137 138 #ifdef CONFIG_SECURITY_NETWORK 139 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 140 { 141 UNIXCB(skb).secid = scm->secid; 142 } 143 144 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 145 { 146 scm->secid = UNIXCB(skb).secid; 147 } 148 149 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) 150 { 151 return (scm->secid == UNIXCB(skb).secid); 152 } 153 #else 154 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) 155 { } 156 157 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) 158 { } 159 160 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) 161 { 162 return true; 163 } 164 #endif /* CONFIG_SECURITY_NETWORK */ 165 166 /* 167 * SMP locking strategy: 168 * hash table is protected with spinlock unix_table_lock 169 * each socket state is protected by separate spin lock. 170 */ 171 172 static inline unsigned int unix_hash_fold(__wsum n) 173 { 174 unsigned int hash = (__force unsigned int)csum_fold(n); 175 176 hash ^= hash>>8; 177 return hash&(UNIX_HASH_SIZE-1); 178 } 179 180 #define unix_peer(sk) (unix_sk(sk)->peer) 181 182 static inline int unix_our_peer(struct sock *sk, struct sock *osk) 183 { 184 return unix_peer(osk) == sk; 185 } 186 187 static inline int unix_may_send(struct sock *sk, struct sock *osk) 188 { 189 return unix_peer(osk) == NULL || unix_our_peer(sk, osk); 190 } 191 192 static inline int unix_recvq_full(const struct sock *sk) 193 { 194 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; 195 } 196 197 static inline int unix_recvq_full_lockless(const struct sock *sk) 198 { 199 return skb_queue_len_lockless(&sk->sk_receive_queue) > 200 READ_ONCE(sk->sk_max_ack_backlog); 201 } 202 203 struct sock *unix_peer_get(struct sock *s) 204 { 205 struct sock *peer; 206 207 unix_state_lock(s); 208 peer = unix_peer(s); 209 if (peer) 210 sock_hold(peer); 211 unix_state_unlock(s); 212 return peer; 213 } 214 EXPORT_SYMBOL_GPL(unix_peer_get); 215 216 static inline void unix_release_addr(struct unix_address *addr) 217 { 218 if (refcount_dec_and_test(&addr->refcnt)) 219 kfree(addr); 220 } 221 222 /* 223 * Check unix socket name: 224 * - should be not zero length. 225 * - if started by not zero, should be NULL terminated (FS object) 226 * - if started by zero, it is abstract name. 227 */ 228 229 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp) 230 { 231 *hashp = 0; 232 233 if (len <= sizeof(short) || len > sizeof(*sunaddr)) 234 return -EINVAL; 235 if (!sunaddr || sunaddr->sun_family != AF_UNIX) 236 return -EINVAL; 237 if (sunaddr->sun_path[0]) { 238 /* 239 * This may look like an off by one error but it is a bit more 240 * subtle. 108 is the longest valid AF_UNIX path for a binding. 241 * sun_path[108] doesn't as such exist. However in kernel space 242 * we are guaranteed that it is a valid memory location in our 243 * kernel address buffer. 244 */ 245 ((char *)sunaddr)[len] = 0; 246 len = strlen(sunaddr->sun_path)+1+sizeof(short); 247 return len; 248 } 249 250 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0)); 251 return len; 252 } 253 254 static void __unix_remove_socket(struct sock *sk) 255 { 256 sk_del_node_init(sk); 257 } 258 259 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk) 260 { 261 WARN_ON(!sk_unhashed(sk)); 262 sk_add_node(sk, list); 263 } 264 265 static void __unix_set_addr(struct sock *sk, struct unix_address *addr, 266 unsigned hash) 267 { 268 __unix_remove_socket(sk); 269 smp_store_release(&unix_sk(sk)->addr, addr); 270 __unix_insert_socket(&unix_socket_table[hash], sk); 271 } 272 273 static inline void unix_remove_socket(struct sock *sk) 274 { 275 spin_lock(&unix_table_lock); 276 __unix_remove_socket(sk); 277 spin_unlock(&unix_table_lock); 278 } 279 280 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk) 281 { 282 spin_lock(&unix_table_lock); 283 __unix_insert_socket(list, sk); 284 spin_unlock(&unix_table_lock); 285 } 286 287 static struct sock *__unix_find_socket_byname(struct net *net, 288 struct sockaddr_un *sunname, 289 int len, unsigned int hash) 290 { 291 struct sock *s; 292 293 sk_for_each(s, &unix_socket_table[hash]) { 294 struct unix_sock *u = unix_sk(s); 295 296 if (!net_eq(sock_net(s), net)) 297 continue; 298 299 if (u->addr->len == len && 300 !memcmp(u->addr->name, sunname, len)) 301 return s; 302 } 303 return NULL; 304 } 305 306 static inline struct sock *unix_find_socket_byname(struct net *net, 307 struct sockaddr_un *sunname, 308 int len, unsigned int hash) 309 { 310 struct sock *s; 311 312 spin_lock(&unix_table_lock); 313 s = __unix_find_socket_byname(net, sunname, len, hash); 314 if (s) 315 sock_hold(s); 316 spin_unlock(&unix_table_lock); 317 return s; 318 } 319 320 static struct sock *unix_find_socket_byinode(struct inode *i) 321 { 322 struct sock *s; 323 324 spin_lock(&unix_table_lock); 325 sk_for_each(s, 326 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { 327 struct dentry *dentry = unix_sk(s)->path.dentry; 328 329 if (dentry && d_backing_inode(dentry) == i) { 330 sock_hold(s); 331 goto found; 332 } 333 } 334 s = NULL; 335 found: 336 spin_unlock(&unix_table_lock); 337 return s; 338 } 339 340 /* Support code for asymmetrically connected dgram sockets 341 * 342 * If a datagram socket is connected to a socket not itself connected 343 * to the first socket (eg, /dev/log), clients may only enqueue more 344 * messages if the present receive queue of the server socket is not 345 * "too large". This means there's a second writeability condition 346 * poll and sendmsg need to test. The dgram recv code will do a wake 347 * up on the peer_wait wait queue of a socket upon reception of a 348 * datagram which needs to be propagated to sleeping would-be writers 349 * since these might not have sent anything so far. This can't be 350 * accomplished via poll_wait because the lifetime of the server 351 * socket might be less than that of its clients if these break their 352 * association with it or if the server socket is closed while clients 353 * are still connected to it and there's no way to inform "a polling 354 * implementation" that it should let go of a certain wait queue 355 * 356 * In order to propagate a wake up, a wait_queue_entry_t of the client 357 * socket is enqueued on the peer_wait queue of the server socket 358 * whose wake function does a wake_up on the ordinary client socket 359 * wait queue. This connection is established whenever a write (or 360 * poll for write) hit the flow control condition and broken when the 361 * association to the server socket is dissolved or after a wake up 362 * was relayed. 363 */ 364 365 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags, 366 void *key) 367 { 368 struct unix_sock *u; 369 wait_queue_head_t *u_sleep; 370 371 u = container_of(q, struct unix_sock, peer_wake); 372 373 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait, 374 q); 375 u->peer_wake.private = NULL; 376 377 /* relaying can only happen while the wq still exists */ 378 u_sleep = sk_sleep(&u->sk); 379 if (u_sleep) 380 wake_up_interruptible_poll(u_sleep, key_to_poll(key)); 381 382 return 0; 383 } 384 385 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other) 386 { 387 struct unix_sock *u, *u_other; 388 int rc; 389 390 u = unix_sk(sk); 391 u_other = unix_sk(other); 392 rc = 0; 393 spin_lock(&u_other->peer_wait.lock); 394 395 if (!u->peer_wake.private) { 396 u->peer_wake.private = other; 397 __add_wait_queue(&u_other->peer_wait, &u->peer_wake); 398 399 rc = 1; 400 } 401 402 spin_unlock(&u_other->peer_wait.lock); 403 return rc; 404 } 405 406 static void unix_dgram_peer_wake_disconnect(struct sock *sk, 407 struct sock *other) 408 { 409 struct unix_sock *u, *u_other; 410 411 u = unix_sk(sk); 412 u_other = unix_sk(other); 413 spin_lock(&u_other->peer_wait.lock); 414 415 if (u->peer_wake.private == other) { 416 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake); 417 u->peer_wake.private = NULL; 418 } 419 420 spin_unlock(&u_other->peer_wait.lock); 421 } 422 423 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk, 424 struct sock *other) 425 { 426 unix_dgram_peer_wake_disconnect(sk, other); 427 wake_up_interruptible_poll(sk_sleep(sk), 428 EPOLLOUT | 429 EPOLLWRNORM | 430 EPOLLWRBAND); 431 } 432 433 /* preconditions: 434 * - unix_peer(sk) == other 435 * - association is stable 436 */ 437 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) 438 { 439 int connected; 440 441 connected = unix_dgram_peer_wake_connect(sk, other); 442 443 /* If other is SOCK_DEAD, we want to make sure we signal 444 * POLLOUT, such that a subsequent write() can get a 445 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs 446 * to other and its full, we will hang waiting for POLLOUT. 447 */ 448 if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD)) 449 return 1; 450 451 if (connected) 452 unix_dgram_peer_wake_disconnect(sk, other); 453 454 return 0; 455 } 456 457 static int unix_writable(const struct sock *sk) 458 { 459 return sk->sk_state != TCP_LISTEN && 460 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; 461 } 462 463 static void unix_write_space(struct sock *sk) 464 { 465 struct socket_wq *wq; 466 467 rcu_read_lock(); 468 if (unix_writable(sk)) { 469 wq = rcu_dereference(sk->sk_wq); 470 if (skwq_has_sleeper(wq)) 471 wake_up_interruptible_sync_poll(&wq->wait, 472 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); 473 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 474 } 475 rcu_read_unlock(); 476 } 477 478 /* When dgram socket disconnects (or changes its peer), we clear its receive 479 * queue of packets arrived from previous peer. First, it allows to do 480 * flow control based only on wmem_alloc; second, sk connected to peer 481 * may receive messages only from that peer. */ 482 static void unix_dgram_disconnected(struct sock *sk, struct sock *other) 483 { 484 if (!skb_queue_empty(&sk->sk_receive_queue)) { 485 skb_queue_purge(&sk->sk_receive_queue); 486 wake_up_interruptible_all(&unix_sk(sk)->peer_wait); 487 488 /* If one link of bidirectional dgram pipe is disconnected, 489 * we signal error. Messages are lost. Do not make this, 490 * when peer was not connected to us. 491 */ 492 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) { 493 other->sk_err = ECONNRESET; 494 sk_error_report(other); 495 } 496 } 497 sk->sk_state = other->sk_state = TCP_CLOSE; 498 } 499 500 static void unix_sock_destructor(struct sock *sk) 501 { 502 struct unix_sock *u = unix_sk(sk); 503 504 skb_queue_purge(&sk->sk_receive_queue); 505 506 WARN_ON(refcount_read(&sk->sk_wmem_alloc)); 507 WARN_ON(!sk_unhashed(sk)); 508 WARN_ON(sk->sk_socket); 509 if (!sock_flag(sk, SOCK_DEAD)) { 510 pr_info("Attempt to release alive unix socket: %p\n", sk); 511 return; 512 } 513 514 if (u->addr) 515 unix_release_addr(u->addr); 516 517 atomic_long_dec(&unix_nr_socks); 518 local_bh_disable(); 519 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 520 local_bh_enable(); 521 #ifdef UNIX_REFCNT_DEBUG 522 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk, 523 atomic_long_read(&unix_nr_socks)); 524 #endif 525 } 526 527 static void unix_release_sock(struct sock *sk, int embrion) 528 { 529 struct unix_sock *u = unix_sk(sk); 530 struct path path; 531 struct sock *skpair; 532 struct sk_buff *skb; 533 int state; 534 535 unix_remove_socket(sk); 536 537 /* Clear state */ 538 unix_state_lock(sk); 539 sock_orphan(sk); 540 sk->sk_shutdown = SHUTDOWN_MASK; 541 path = u->path; 542 u->path.dentry = NULL; 543 u->path.mnt = NULL; 544 state = sk->sk_state; 545 sk->sk_state = TCP_CLOSE; 546 547 skpair = unix_peer(sk); 548 unix_peer(sk) = NULL; 549 550 unix_state_unlock(sk); 551 552 wake_up_interruptible_all(&u->peer_wait); 553 554 if (skpair != NULL) { 555 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { 556 unix_state_lock(skpair); 557 /* No more writes */ 558 skpair->sk_shutdown = SHUTDOWN_MASK; 559 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) 560 skpair->sk_err = ECONNRESET; 561 unix_state_unlock(skpair); 562 skpair->sk_state_change(skpair); 563 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); 564 } 565 566 unix_dgram_peer_wake_disconnect(sk, skpair); 567 sock_put(skpair); /* It may now die */ 568 } 569 570 /* Try to flush out this socket. Throw out buffers at least */ 571 572 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { 573 if (state == TCP_LISTEN) 574 unix_release_sock(skb->sk, 1); 575 /* passed fds are erased in the kfree_skb hook */ 576 UNIXCB(skb).consumed = skb->len; 577 kfree_skb(skb); 578 } 579 580 if (path.dentry) 581 path_put(&path); 582 583 sock_put(sk); 584 585 /* ---- Socket is dead now and most probably destroyed ---- */ 586 587 /* 588 * Fixme: BSD difference: In BSD all sockets connected to us get 589 * ECONNRESET and we die on the spot. In Linux we behave 590 * like files and pipes do and wait for the last 591 * dereference. 592 * 593 * Can't we simply set sock->err? 594 * 595 * What the above comment does talk about? --ANK(980817) 596 */ 597 598 if (unix_tot_inflight) 599 unix_gc(); /* Garbage collect fds */ 600 } 601 602 static void init_peercred(struct sock *sk) 603 { 604 put_pid(sk->sk_peer_pid); 605 if (sk->sk_peer_cred) 606 put_cred(sk->sk_peer_cred); 607 sk->sk_peer_pid = get_pid(task_tgid(current)); 608 sk->sk_peer_cred = get_current_cred(); 609 } 610 611 static void copy_peercred(struct sock *sk, struct sock *peersk) 612 { 613 put_pid(sk->sk_peer_pid); 614 if (sk->sk_peer_cred) 615 put_cred(sk->sk_peer_cred); 616 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); 617 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); 618 } 619 620 static int unix_listen(struct socket *sock, int backlog) 621 { 622 int err; 623 struct sock *sk = sock->sk; 624 struct unix_sock *u = unix_sk(sk); 625 626 err = -EOPNOTSUPP; 627 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 628 goto out; /* Only stream/seqpacket sockets accept */ 629 err = -EINVAL; 630 if (!u->addr) 631 goto out; /* No listens on an unbound socket */ 632 unix_state_lock(sk); 633 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) 634 goto out_unlock; 635 if (backlog > sk->sk_max_ack_backlog) 636 wake_up_interruptible_all(&u->peer_wait); 637 sk->sk_max_ack_backlog = backlog; 638 sk->sk_state = TCP_LISTEN; 639 /* set credentials so connect can copy them */ 640 init_peercred(sk); 641 err = 0; 642 643 out_unlock: 644 unix_state_unlock(sk); 645 out: 646 return err; 647 } 648 649 static int unix_release(struct socket *); 650 static int unix_bind(struct socket *, struct sockaddr *, int); 651 static int unix_stream_connect(struct socket *, struct sockaddr *, 652 int addr_len, int flags); 653 static int unix_socketpair(struct socket *, struct socket *); 654 static int unix_accept(struct socket *, struct socket *, int, bool); 655 static int unix_getname(struct socket *, struct sockaddr *, int); 656 static __poll_t unix_poll(struct file *, struct socket *, poll_table *); 657 static __poll_t unix_dgram_poll(struct file *, struct socket *, 658 poll_table *); 659 static int unix_ioctl(struct socket *, unsigned int, unsigned long); 660 #ifdef CONFIG_COMPAT 661 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); 662 #endif 663 static int unix_shutdown(struct socket *, int); 664 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); 665 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int); 666 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset, 667 size_t size, int flags); 668 static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos, 669 struct pipe_inode_info *, size_t size, 670 unsigned int flags); 671 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t); 672 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); 673 static int unix_read_sock(struct sock *sk, read_descriptor_t *desc, 674 sk_read_actor_t recv_actor); 675 static int unix_dgram_connect(struct socket *, struct sockaddr *, 676 int, int); 677 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); 678 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t, 679 int); 680 681 static int unix_set_peek_off(struct sock *sk, int val) 682 { 683 struct unix_sock *u = unix_sk(sk); 684 685 if (mutex_lock_interruptible(&u->iolock)) 686 return -EINTR; 687 688 sk->sk_peek_off = val; 689 mutex_unlock(&u->iolock); 690 691 return 0; 692 } 693 694 #ifdef CONFIG_PROC_FS 695 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock) 696 { 697 struct sock *sk = sock->sk; 698 struct unix_sock *u; 699 700 if (sk) { 701 u = unix_sk(sock->sk); 702 seq_printf(m, "scm_fds: %u\n", 703 atomic_read(&u->scm_stat.nr_fds)); 704 } 705 } 706 #else 707 #define unix_show_fdinfo NULL 708 #endif 709 710 static const struct proto_ops unix_stream_ops = { 711 .family = PF_UNIX, 712 .owner = THIS_MODULE, 713 .release = unix_release, 714 .bind = unix_bind, 715 .connect = unix_stream_connect, 716 .socketpair = unix_socketpair, 717 .accept = unix_accept, 718 .getname = unix_getname, 719 .poll = unix_poll, 720 .ioctl = unix_ioctl, 721 #ifdef CONFIG_COMPAT 722 .compat_ioctl = unix_compat_ioctl, 723 #endif 724 .listen = unix_listen, 725 .shutdown = unix_shutdown, 726 .sendmsg = unix_stream_sendmsg, 727 .recvmsg = unix_stream_recvmsg, 728 .mmap = sock_no_mmap, 729 .sendpage = unix_stream_sendpage, 730 .splice_read = unix_stream_splice_read, 731 .set_peek_off = unix_set_peek_off, 732 .show_fdinfo = unix_show_fdinfo, 733 }; 734 735 static const struct proto_ops unix_dgram_ops = { 736 .family = PF_UNIX, 737 .owner = THIS_MODULE, 738 .release = unix_release, 739 .bind = unix_bind, 740 .connect = unix_dgram_connect, 741 .socketpair = unix_socketpair, 742 .accept = sock_no_accept, 743 .getname = unix_getname, 744 .poll = unix_dgram_poll, 745 .ioctl = unix_ioctl, 746 #ifdef CONFIG_COMPAT 747 .compat_ioctl = unix_compat_ioctl, 748 #endif 749 .listen = sock_no_listen, 750 .shutdown = unix_shutdown, 751 .sendmsg = unix_dgram_sendmsg, 752 .read_sock = unix_read_sock, 753 .recvmsg = unix_dgram_recvmsg, 754 .mmap = sock_no_mmap, 755 .sendpage = sock_no_sendpage, 756 .set_peek_off = unix_set_peek_off, 757 .show_fdinfo = unix_show_fdinfo, 758 }; 759 760 static const struct proto_ops unix_seqpacket_ops = { 761 .family = PF_UNIX, 762 .owner = THIS_MODULE, 763 .release = unix_release, 764 .bind = unix_bind, 765 .connect = unix_stream_connect, 766 .socketpair = unix_socketpair, 767 .accept = unix_accept, 768 .getname = unix_getname, 769 .poll = unix_dgram_poll, 770 .ioctl = unix_ioctl, 771 #ifdef CONFIG_COMPAT 772 .compat_ioctl = unix_compat_ioctl, 773 #endif 774 .listen = unix_listen, 775 .shutdown = unix_shutdown, 776 .sendmsg = unix_seqpacket_sendmsg, 777 .recvmsg = unix_seqpacket_recvmsg, 778 .mmap = sock_no_mmap, 779 .sendpage = sock_no_sendpage, 780 .set_peek_off = unix_set_peek_off, 781 .show_fdinfo = unix_show_fdinfo, 782 }; 783 784 static void unix_close(struct sock *sk, long timeout) 785 { 786 /* Nothing to do here, unix socket does not need a ->close(). 787 * This is merely for sockmap. 788 */ 789 } 790 791 struct proto unix_proto = { 792 .name = "UNIX", 793 .owner = THIS_MODULE, 794 .obj_size = sizeof(struct unix_sock), 795 .close = unix_close, 796 #ifdef CONFIG_BPF_SYSCALL 797 .psock_update_sk_prot = unix_bpf_update_proto, 798 #endif 799 }; 800 801 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern) 802 { 803 struct sock *sk = NULL; 804 struct unix_sock *u; 805 806 atomic_long_inc(&unix_nr_socks); 807 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) 808 goto out; 809 810 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern); 811 if (!sk) 812 goto out; 813 814 sock_init_data(sock, sk); 815 816 sk->sk_allocation = GFP_KERNEL_ACCOUNT; 817 sk->sk_write_space = unix_write_space; 818 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; 819 sk->sk_destruct = unix_sock_destructor; 820 u = unix_sk(sk); 821 u->path.dentry = NULL; 822 u->path.mnt = NULL; 823 spin_lock_init(&u->lock); 824 atomic_long_set(&u->inflight, 0); 825 INIT_LIST_HEAD(&u->link); 826 mutex_init(&u->iolock); /* single task reading lock */ 827 mutex_init(&u->bindlock); /* single task binding lock */ 828 init_waitqueue_head(&u->peer_wait); 829 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); 830 memset(&u->scm_stat, 0, sizeof(struct scm_stat)); 831 unix_insert_socket(unix_sockets_unbound(sk), sk); 832 out: 833 if (sk == NULL) 834 atomic_long_dec(&unix_nr_socks); 835 else { 836 local_bh_disable(); 837 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 838 local_bh_enable(); 839 } 840 return sk; 841 } 842 843 static int unix_create(struct net *net, struct socket *sock, int protocol, 844 int kern) 845 { 846 if (protocol && protocol != PF_UNIX) 847 return -EPROTONOSUPPORT; 848 849 sock->state = SS_UNCONNECTED; 850 851 switch (sock->type) { 852 case SOCK_STREAM: 853 sock->ops = &unix_stream_ops; 854 break; 855 /* 856 * Believe it or not BSD has AF_UNIX, SOCK_RAW though 857 * nothing uses it. 858 */ 859 case SOCK_RAW: 860 sock->type = SOCK_DGRAM; 861 fallthrough; 862 case SOCK_DGRAM: 863 sock->ops = &unix_dgram_ops; 864 break; 865 case SOCK_SEQPACKET: 866 sock->ops = &unix_seqpacket_ops; 867 break; 868 default: 869 return -ESOCKTNOSUPPORT; 870 } 871 872 return unix_create1(net, sock, kern) ? 0 : -ENOMEM; 873 } 874 875 static int unix_release(struct socket *sock) 876 { 877 struct sock *sk = sock->sk; 878 879 if (!sk) 880 return 0; 881 882 sk->sk_prot->close(sk, 0); 883 unix_release_sock(sk, 0); 884 sock->sk = NULL; 885 886 return 0; 887 } 888 889 static int unix_autobind(struct socket *sock) 890 { 891 struct sock *sk = sock->sk; 892 struct net *net = sock_net(sk); 893 struct unix_sock *u = unix_sk(sk); 894 static u32 ordernum = 1; 895 struct unix_address *addr; 896 int err; 897 unsigned int retries = 0; 898 899 err = mutex_lock_interruptible(&u->bindlock); 900 if (err) 901 return err; 902 903 if (u->addr) 904 goto out; 905 906 err = -ENOMEM; 907 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL); 908 if (!addr) 909 goto out; 910 911 addr->name->sun_family = AF_UNIX; 912 refcount_set(&addr->refcnt, 1); 913 914 retry: 915 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short); 916 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0)); 917 addr->hash ^= sk->sk_type; 918 919 spin_lock(&unix_table_lock); 920 ordernum = (ordernum+1)&0xFFFFF; 921 922 if (__unix_find_socket_byname(net, addr->name, addr->len, addr->hash)) { 923 spin_unlock(&unix_table_lock); 924 /* 925 * __unix_find_socket_byname() may take long time if many names 926 * are already in use. 927 */ 928 cond_resched(); 929 /* Give up if all names seems to be in use. */ 930 if (retries++ == 0xFFFFF) { 931 err = -ENOSPC; 932 kfree(addr); 933 goto out; 934 } 935 goto retry; 936 } 937 938 __unix_set_addr(sk, addr, addr->hash); 939 spin_unlock(&unix_table_lock); 940 err = 0; 941 942 out: mutex_unlock(&u->bindlock); 943 return err; 944 } 945 946 static struct sock *unix_find_other(struct net *net, 947 struct sockaddr_un *sunname, int len, 948 int type, unsigned int hash, int *error) 949 { 950 struct sock *u; 951 struct path path; 952 int err = 0; 953 954 if (sunname->sun_path[0]) { 955 struct inode *inode; 956 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path); 957 if (err) 958 goto fail; 959 inode = d_backing_inode(path.dentry); 960 err = path_permission(&path, MAY_WRITE); 961 if (err) 962 goto put_fail; 963 964 err = -ECONNREFUSED; 965 if (!S_ISSOCK(inode->i_mode)) 966 goto put_fail; 967 u = unix_find_socket_byinode(inode); 968 if (!u) 969 goto put_fail; 970 971 if (u->sk_type == type) 972 touch_atime(&path); 973 974 path_put(&path); 975 976 err = -EPROTOTYPE; 977 if (u->sk_type != type) { 978 sock_put(u); 979 goto fail; 980 } 981 } else { 982 err = -ECONNREFUSED; 983 u = unix_find_socket_byname(net, sunname, len, type ^ hash); 984 if (u) { 985 struct dentry *dentry; 986 dentry = unix_sk(u)->path.dentry; 987 if (dentry) 988 touch_atime(&unix_sk(u)->path); 989 } else 990 goto fail; 991 } 992 return u; 993 994 put_fail: 995 path_put(&path); 996 fail: 997 *error = err; 998 return NULL; 999 } 1000 1001 static int unix_bind_bsd(struct sock *sk, struct unix_address *addr) 1002 { 1003 struct unix_sock *u = unix_sk(sk); 1004 umode_t mode = S_IFSOCK | 1005 (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask()); 1006 struct user_namespace *ns; // barf... 1007 struct path parent; 1008 struct dentry *dentry; 1009 unsigned int hash; 1010 int err; 1011 1012 /* 1013 * Get the parent directory, calculate the hash for last 1014 * component. 1015 */ 1016 dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0); 1017 if (IS_ERR(dentry)) 1018 return PTR_ERR(dentry); 1019 ns = mnt_user_ns(parent.mnt); 1020 1021 /* 1022 * All right, let's create it. 1023 */ 1024 err = security_path_mknod(&parent, dentry, mode, 0); 1025 if (!err) 1026 err = vfs_mknod(ns, d_inode(parent.dentry), dentry, mode, 0); 1027 if (err) 1028 goto out; 1029 err = mutex_lock_interruptible(&u->bindlock); 1030 if (err) 1031 goto out_unlink; 1032 if (u->addr) 1033 goto out_unlock; 1034 1035 addr->hash = UNIX_HASH_SIZE; 1036 hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1); 1037 spin_lock(&unix_table_lock); 1038 u->path.mnt = mntget(parent.mnt); 1039 u->path.dentry = dget(dentry); 1040 __unix_set_addr(sk, addr, hash); 1041 spin_unlock(&unix_table_lock); 1042 mutex_unlock(&u->bindlock); 1043 done_path_create(&parent, dentry); 1044 return 0; 1045 1046 out_unlock: 1047 mutex_unlock(&u->bindlock); 1048 err = -EINVAL; 1049 out_unlink: 1050 /* failed after successful mknod? unlink what we'd created... */ 1051 vfs_unlink(ns, d_inode(parent.dentry), dentry, NULL); 1052 out: 1053 done_path_create(&parent, dentry); 1054 return err; 1055 } 1056 1057 static int unix_bind_abstract(struct sock *sk, struct unix_address *addr) 1058 { 1059 struct unix_sock *u = unix_sk(sk); 1060 int err; 1061 1062 err = mutex_lock_interruptible(&u->bindlock); 1063 if (err) 1064 return err; 1065 1066 if (u->addr) { 1067 mutex_unlock(&u->bindlock); 1068 return -EINVAL; 1069 } 1070 1071 spin_lock(&unix_table_lock); 1072 if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len, 1073 addr->hash)) { 1074 spin_unlock(&unix_table_lock); 1075 mutex_unlock(&u->bindlock); 1076 return -EADDRINUSE; 1077 } 1078 __unix_set_addr(sk, addr, addr->hash); 1079 spin_unlock(&unix_table_lock); 1080 mutex_unlock(&u->bindlock); 1081 return 0; 1082 } 1083 1084 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 1085 { 1086 struct sock *sk = sock->sk; 1087 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 1088 char *sun_path = sunaddr->sun_path; 1089 int err; 1090 unsigned int hash; 1091 struct unix_address *addr; 1092 1093 if (addr_len < offsetofend(struct sockaddr_un, sun_family) || 1094 sunaddr->sun_family != AF_UNIX) 1095 return -EINVAL; 1096 1097 if (addr_len == sizeof(short)) 1098 return unix_autobind(sock); 1099 1100 err = unix_mkname(sunaddr, addr_len, &hash); 1101 if (err < 0) 1102 return err; 1103 addr_len = err; 1104 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL); 1105 if (!addr) 1106 return -ENOMEM; 1107 1108 memcpy(addr->name, sunaddr, addr_len); 1109 addr->len = addr_len; 1110 addr->hash = hash ^ sk->sk_type; 1111 refcount_set(&addr->refcnt, 1); 1112 1113 if (sun_path[0]) 1114 err = unix_bind_bsd(sk, addr); 1115 else 1116 err = unix_bind_abstract(sk, addr); 1117 if (err) 1118 unix_release_addr(addr); 1119 return err == -EEXIST ? -EADDRINUSE : err; 1120 } 1121 1122 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) 1123 { 1124 if (unlikely(sk1 == sk2) || !sk2) { 1125 unix_state_lock(sk1); 1126 return; 1127 } 1128 if (sk1 < sk2) { 1129 unix_state_lock(sk1); 1130 unix_state_lock_nested(sk2); 1131 } else { 1132 unix_state_lock(sk2); 1133 unix_state_lock_nested(sk1); 1134 } 1135 } 1136 1137 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) 1138 { 1139 if (unlikely(sk1 == sk2) || !sk2) { 1140 unix_state_unlock(sk1); 1141 return; 1142 } 1143 unix_state_unlock(sk1); 1144 unix_state_unlock(sk2); 1145 } 1146 1147 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, 1148 int alen, int flags) 1149 { 1150 struct sock *sk = sock->sk; 1151 struct net *net = sock_net(sk); 1152 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; 1153 struct sock *other; 1154 unsigned int hash; 1155 int err; 1156 1157 err = -EINVAL; 1158 if (alen < offsetofend(struct sockaddr, sa_family)) 1159 goto out; 1160 1161 if (addr->sa_family != AF_UNSPEC) { 1162 err = unix_mkname(sunaddr, alen, &hash); 1163 if (err < 0) 1164 goto out; 1165 alen = err; 1166 1167 if (test_bit(SOCK_PASSCRED, &sock->flags) && 1168 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0) 1169 goto out; 1170 1171 restart: 1172 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err); 1173 if (!other) 1174 goto out; 1175 1176 unix_state_double_lock(sk, other); 1177 1178 /* Apparently VFS overslept socket death. Retry. */ 1179 if (sock_flag(other, SOCK_DEAD)) { 1180 unix_state_double_unlock(sk, other); 1181 sock_put(other); 1182 goto restart; 1183 } 1184 1185 err = -EPERM; 1186 if (!unix_may_send(sk, other)) 1187 goto out_unlock; 1188 1189 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 1190 if (err) 1191 goto out_unlock; 1192 1193 } else { 1194 /* 1195 * 1003.1g breaking connected state with AF_UNSPEC 1196 */ 1197 other = NULL; 1198 unix_state_double_lock(sk, other); 1199 } 1200 1201 /* 1202 * If it was connected, reconnect. 1203 */ 1204 if (unix_peer(sk)) { 1205 struct sock *old_peer = unix_peer(sk); 1206 unix_peer(sk) = other; 1207 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer); 1208 1209 unix_state_double_unlock(sk, other); 1210 1211 if (other != old_peer) 1212 unix_dgram_disconnected(sk, old_peer); 1213 sock_put(old_peer); 1214 } else { 1215 unix_peer(sk) = other; 1216 unix_state_double_unlock(sk, other); 1217 } 1218 1219 if (unix_peer(sk)) 1220 sk->sk_state = other->sk_state = TCP_ESTABLISHED; 1221 return 0; 1222 1223 out_unlock: 1224 unix_state_double_unlock(sk, other); 1225 sock_put(other); 1226 out: 1227 return err; 1228 } 1229 1230 static long unix_wait_for_peer(struct sock *other, long timeo) 1231 __releases(&unix_sk(other)->lock) 1232 { 1233 struct unix_sock *u = unix_sk(other); 1234 int sched; 1235 DEFINE_WAIT(wait); 1236 1237 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE); 1238 1239 sched = !sock_flag(other, SOCK_DEAD) && 1240 !(other->sk_shutdown & RCV_SHUTDOWN) && 1241 unix_recvq_full(other); 1242 1243 unix_state_unlock(other); 1244 1245 if (sched) 1246 timeo = schedule_timeout(timeo); 1247 1248 finish_wait(&u->peer_wait, &wait); 1249 return timeo; 1250 } 1251 1252 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, 1253 int addr_len, int flags) 1254 { 1255 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 1256 struct sock *sk = sock->sk; 1257 struct net *net = sock_net(sk); 1258 struct unix_sock *u = unix_sk(sk), *newu, *otheru; 1259 struct sock *newsk = NULL; 1260 struct sock *other = NULL; 1261 struct sk_buff *skb = NULL; 1262 unsigned int hash; 1263 int st; 1264 int err; 1265 long timeo; 1266 1267 err = unix_mkname(sunaddr, addr_len, &hash); 1268 if (err < 0) 1269 goto out; 1270 addr_len = err; 1271 1272 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr && 1273 (err = unix_autobind(sock)) != 0) 1274 goto out; 1275 1276 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); 1277 1278 /* First of all allocate resources. 1279 If we will make it after state is locked, 1280 we will have to recheck all again in any case. 1281 */ 1282 1283 err = -ENOMEM; 1284 1285 /* create new sock for complete connection */ 1286 newsk = unix_create1(sock_net(sk), NULL, 0); 1287 if (newsk == NULL) 1288 goto out; 1289 1290 /* Allocate skb for sending to listening sock */ 1291 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL); 1292 if (skb == NULL) 1293 goto out; 1294 1295 restart: 1296 /* Find listening sock. */ 1297 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err); 1298 if (!other) 1299 goto out; 1300 1301 /* Latch state of peer */ 1302 unix_state_lock(other); 1303 1304 /* Apparently VFS overslept socket death. Retry. */ 1305 if (sock_flag(other, SOCK_DEAD)) { 1306 unix_state_unlock(other); 1307 sock_put(other); 1308 goto restart; 1309 } 1310 1311 err = -ECONNREFUSED; 1312 if (other->sk_state != TCP_LISTEN) 1313 goto out_unlock; 1314 if (other->sk_shutdown & RCV_SHUTDOWN) 1315 goto out_unlock; 1316 1317 if (unix_recvq_full(other)) { 1318 err = -EAGAIN; 1319 if (!timeo) 1320 goto out_unlock; 1321 1322 timeo = unix_wait_for_peer(other, timeo); 1323 1324 err = sock_intr_errno(timeo); 1325 if (signal_pending(current)) 1326 goto out; 1327 sock_put(other); 1328 goto restart; 1329 } 1330 1331 /* Latch our state. 1332 1333 It is tricky place. We need to grab our state lock and cannot 1334 drop lock on peer. It is dangerous because deadlock is 1335 possible. Connect to self case and simultaneous 1336 attempt to connect are eliminated by checking socket 1337 state. other is TCP_LISTEN, if sk is TCP_LISTEN we 1338 check this before attempt to grab lock. 1339 1340 Well, and we have to recheck the state after socket locked. 1341 */ 1342 st = sk->sk_state; 1343 1344 switch (st) { 1345 case TCP_CLOSE: 1346 /* This is ok... continue with connect */ 1347 break; 1348 case TCP_ESTABLISHED: 1349 /* Socket is already connected */ 1350 err = -EISCONN; 1351 goto out_unlock; 1352 default: 1353 err = -EINVAL; 1354 goto out_unlock; 1355 } 1356 1357 unix_state_lock_nested(sk); 1358 1359 if (sk->sk_state != st) { 1360 unix_state_unlock(sk); 1361 unix_state_unlock(other); 1362 sock_put(other); 1363 goto restart; 1364 } 1365 1366 err = security_unix_stream_connect(sk, other, newsk); 1367 if (err) { 1368 unix_state_unlock(sk); 1369 goto out_unlock; 1370 } 1371 1372 /* The way is open! Fastly set all the necessary fields... */ 1373 1374 sock_hold(sk); 1375 unix_peer(newsk) = sk; 1376 newsk->sk_state = TCP_ESTABLISHED; 1377 newsk->sk_type = sk->sk_type; 1378 init_peercred(newsk); 1379 newu = unix_sk(newsk); 1380 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); 1381 otheru = unix_sk(other); 1382 1383 /* copy address information from listening to new sock 1384 * 1385 * The contents of *(otheru->addr) and otheru->path 1386 * are seen fully set up here, since we have found 1387 * otheru in hash under unix_table_lock. Insertion 1388 * into the hash chain we'd found it in had been done 1389 * in an earlier critical area protected by unix_table_lock, 1390 * the same one where we'd set *(otheru->addr) contents, 1391 * as well as otheru->path and otheru->addr itself. 1392 * 1393 * Using smp_store_release() here to set newu->addr 1394 * is enough to make those stores, as well as stores 1395 * to newu->path visible to anyone who gets newu->addr 1396 * by smp_load_acquire(). IOW, the same warranties 1397 * as for unix_sock instances bound in unix_bind() or 1398 * in unix_autobind(). 1399 */ 1400 if (otheru->path.dentry) { 1401 path_get(&otheru->path); 1402 newu->path = otheru->path; 1403 } 1404 refcount_inc(&otheru->addr->refcnt); 1405 smp_store_release(&newu->addr, otheru->addr); 1406 1407 /* Set credentials */ 1408 copy_peercred(sk, other); 1409 1410 sock->state = SS_CONNECTED; 1411 sk->sk_state = TCP_ESTABLISHED; 1412 sock_hold(newsk); 1413 1414 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */ 1415 unix_peer(sk) = newsk; 1416 1417 unix_state_unlock(sk); 1418 1419 /* take ten and send info to listening sock */ 1420 spin_lock(&other->sk_receive_queue.lock); 1421 __skb_queue_tail(&other->sk_receive_queue, skb); 1422 spin_unlock(&other->sk_receive_queue.lock); 1423 unix_state_unlock(other); 1424 other->sk_data_ready(other); 1425 sock_put(other); 1426 return 0; 1427 1428 out_unlock: 1429 if (other) 1430 unix_state_unlock(other); 1431 1432 out: 1433 kfree_skb(skb); 1434 if (newsk) 1435 unix_release_sock(newsk, 0); 1436 if (other) 1437 sock_put(other); 1438 return err; 1439 } 1440 1441 static int unix_socketpair(struct socket *socka, struct socket *sockb) 1442 { 1443 struct sock *ska = socka->sk, *skb = sockb->sk; 1444 1445 /* Join our sockets back to back */ 1446 sock_hold(ska); 1447 sock_hold(skb); 1448 unix_peer(ska) = skb; 1449 unix_peer(skb) = ska; 1450 init_peercred(ska); 1451 init_peercred(skb); 1452 1453 ska->sk_state = TCP_ESTABLISHED; 1454 skb->sk_state = TCP_ESTABLISHED; 1455 socka->state = SS_CONNECTED; 1456 sockb->state = SS_CONNECTED; 1457 return 0; 1458 } 1459 1460 static void unix_sock_inherit_flags(const struct socket *old, 1461 struct socket *new) 1462 { 1463 if (test_bit(SOCK_PASSCRED, &old->flags)) 1464 set_bit(SOCK_PASSCRED, &new->flags); 1465 if (test_bit(SOCK_PASSSEC, &old->flags)) 1466 set_bit(SOCK_PASSSEC, &new->flags); 1467 } 1468 1469 static int unix_accept(struct socket *sock, struct socket *newsock, int flags, 1470 bool kern) 1471 { 1472 struct sock *sk = sock->sk; 1473 struct sock *tsk; 1474 struct sk_buff *skb; 1475 int err; 1476 1477 err = -EOPNOTSUPP; 1478 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 1479 goto out; 1480 1481 err = -EINVAL; 1482 if (sk->sk_state != TCP_LISTEN) 1483 goto out; 1484 1485 /* If socket state is TCP_LISTEN it cannot change (for now...), 1486 * so that no locks are necessary. 1487 */ 1488 1489 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err); 1490 if (!skb) { 1491 /* This means receive shutdown. */ 1492 if (err == 0) 1493 err = -EINVAL; 1494 goto out; 1495 } 1496 1497 tsk = skb->sk; 1498 skb_free_datagram(sk, skb); 1499 wake_up_interruptible(&unix_sk(sk)->peer_wait); 1500 1501 /* attach accepted sock to socket */ 1502 unix_state_lock(tsk); 1503 newsock->state = SS_CONNECTED; 1504 unix_sock_inherit_flags(sock, newsock); 1505 sock_graft(tsk, newsock); 1506 unix_state_unlock(tsk); 1507 return 0; 1508 1509 out: 1510 return err; 1511 } 1512 1513 1514 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer) 1515 { 1516 struct sock *sk = sock->sk; 1517 struct unix_address *addr; 1518 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr); 1519 int err = 0; 1520 1521 if (peer) { 1522 sk = unix_peer_get(sk); 1523 1524 err = -ENOTCONN; 1525 if (!sk) 1526 goto out; 1527 err = 0; 1528 } else { 1529 sock_hold(sk); 1530 } 1531 1532 addr = smp_load_acquire(&unix_sk(sk)->addr); 1533 if (!addr) { 1534 sunaddr->sun_family = AF_UNIX; 1535 sunaddr->sun_path[0] = 0; 1536 err = sizeof(short); 1537 } else { 1538 err = addr->len; 1539 memcpy(sunaddr, addr->name, addr->len); 1540 } 1541 sock_put(sk); 1542 out: 1543 return err; 1544 } 1545 1546 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) 1547 { 1548 int err = 0; 1549 1550 UNIXCB(skb).pid = get_pid(scm->pid); 1551 UNIXCB(skb).uid = scm->creds.uid; 1552 UNIXCB(skb).gid = scm->creds.gid; 1553 UNIXCB(skb).fp = NULL; 1554 unix_get_secdata(scm, skb); 1555 if (scm->fp && send_fds) 1556 err = unix_attach_fds(scm, skb); 1557 1558 skb->destructor = unix_destruct_scm; 1559 return err; 1560 } 1561 1562 static bool unix_passcred_enabled(const struct socket *sock, 1563 const struct sock *other) 1564 { 1565 return test_bit(SOCK_PASSCRED, &sock->flags) || 1566 !other->sk_socket || 1567 test_bit(SOCK_PASSCRED, &other->sk_socket->flags); 1568 } 1569 1570 /* 1571 * Some apps rely on write() giving SCM_CREDENTIALS 1572 * We include credentials if source or destination socket 1573 * asserted SOCK_PASSCRED. 1574 */ 1575 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, 1576 const struct sock *other) 1577 { 1578 if (UNIXCB(skb).pid) 1579 return; 1580 if (unix_passcred_enabled(sock, other)) { 1581 UNIXCB(skb).pid = get_pid(task_tgid(current)); 1582 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); 1583 } 1584 } 1585 1586 static int maybe_init_creds(struct scm_cookie *scm, 1587 struct socket *socket, 1588 const struct sock *other) 1589 { 1590 int err; 1591 struct msghdr msg = { .msg_controllen = 0 }; 1592 1593 err = scm_send(socket, &msg, scm, false); 1594 if (err) 1595 return err; 1596 1597 if (unix_passcred_enabled(socket, other)) { 1598 scm->pid = get_pid(task_tgid(current)); 1599 current_uid_gid(&scm->creds.uid, &scm->creds.gid); 1600 } 1601 return err; 1602 } 1603 1604 static bool unix_skb_scm_eq(struct sk_buff *skb, 1605 struct scm_cookie *scm) 1606 { 1607 const struct unix_skb_parms *u = &UNIXCB(skb); 1608 1609 return u->pid == scm->pid && 1610 uid_eq(u->uid, scm->creds.uid) && 1611 gid_eq(u->gid, scm->creds.gid) && 1612 unix_secdata_eq(scm, skb); 1613 } 1614 1615 static void scm_stat_add(struct sock *sk, struct sk_buff *skb) 1616 { 1617 struct scm_fp_list *fp = UNIXCB(skb).fp; 1618 struct unix_sock *u = unix_sk(sk); 1619 1620 if (unlikely(fp && fp->count)) 1621 atomic_add(fp->count, &u->scm_stat.nr_fds); 1622 } 1623 1624 static void scm_stat_del(struct sock *sk, struct sk_buff *skb) 1625 { 1626 struct scm_fp_list *fp = UNIXCB(skb).fp; 1627 struct unix_sock *u = unix_sk(sk); 1628 1629 if (unlikely(fp && fp->count)) 1630 atomic_sub(fp->count, &u->scm_stat.nr_fds); 1631 } 1632 1633 /* 1634 * Send AF_UNIX data. 1635 */ 1636 1637 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, 1638 size_t len) 1639 { 1640 struct sock *sk = sock->sk; 1641 struct net *net = sock_net(sk); 1642 struct unix_sock *u = unix_sk(sk); 1643 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); 1644 struct sock *other = NULL; 1645 int namelen = 0; /* fake GCC */ 1646 int err; 1647 unsigned int hash; 1648 struct sk_buff *skb; 1649 long timeo; 1650 struct scm_cookie scm; 1651 int data_len = 0; 1652 int sk_locked; 1653 1654 wait_for_unix_gc(); 1655 err = scm_send(sock, msg, &scm, false); 1656 if (err < 0) 1657 return err; 1658 1659 err = -EOPNOTSUPP; 1660 if (msg->msg_flags&MSG_OOB) 1661 goto out; 1662 1663 if (msg->msg_namelen) { 1664 err = unix_mkname(sunaddr, msg->msg_namelen, &hash); 1665 if (err < 0) 1666 goto out; 1667 namelen = err; 1668 } else { 1669 sunaddr = NULL; 1670 err = -ENOTCONN; 1671 other = unix_peer_get(sk); 1672 if (!other) 1673 goto out; 1674 } 1675 1676 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr 1677 && (err = unix_autobind(sock)) != 0) 1678 goto out; 1679 1680 err = -EMSGSIZE; 1681 if (len > sk->sk_sndbuf - 32) 1682 goto out; 1683 1684 if (len > SKB_MAX_ALLOC) { 1685 data_len = min_t(size_t, 1686 len - SKB_MAX_ALLOC, 1687 MAX_SKB_FRAGS * PAGE_SIZE); 1688 data_len = PAGE_ALIGN(data_len); 1689 1690 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE); 1691 } 1692 1693 skb = sock_alloc_send_pskb(sk, len - data_len, data_len, 1694 msg->msg_flags & MSG_DONTWAIT, &err, 1695 PAGE_ALLOC_COSTLY_ORDER); 1696 if (skb == NULL) 1697 goto out; 1698 1699 err = unix_scm_to_skb(&scm, skb, true); 1700 if (err < 0) 1701 goto out_free; 1702 1703 skb_put(skb, len - data_len); 1704 skb->data_len = data_len; 1705 skb->len = len; 1706 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len); 1707 if (err) 1708 goto out_free; 1709 1710 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); 1711 1712 restart: 1713 if (!other) { 1714 err = -ECONNRESET; 1715 if (sunaddr == NULL) 1716 goto out_free; 1717 1718 other = unix_find_other(net, sunaddr, namelen, sk->sk_type, 1719 hash, &err); 1720 if (other == NULL) 1721 goto out_free; 1722 } 1723 1724 if (sk_filter(other, skb) < 0) { 1725 /* Toss the packet but do not return any error to the sender */ 1726 err = len; 1727 goto out_free; 1728 } 1729 1730 sk_locked = 0; 1731 unix_state_lock(other); 1732 restart_locked: 1733 err = -EPERM; 1734 if (!unix_may_send(sk, other)) 1735 goto out_unlock; 1736 1737 if (unlikely(sock_flag(other, SOCK_DEAD))) { 1738 /* 1739 * Check with 1003.1g - what should 1740 * datagram error 1741 */ 1742 unix_state_unlock(other); 1743 sock_put(other); 1744 1745 if (!sk_locked) 1746 unix_state_lock(sk); 1747 1748 err = 0; 1749 if (unix_peer(sk) == other) { 1750 unix_peer(sk) = NULL; 1751 unix_dgram_peer_wake_disconnect_wakeup(sk, other); 1752 1753 unix_state_unlock(sk); 1754 1755 unix_dgram_disconnected(sk, other); 1756 sock_put(other); 1757 err = -ECONNREFUSED; 1758 } else { 1759 unix_state_unlock(sk); 1760 } 1761 1762 other = NULL; 1763 if (err) 1764 goto out_free; 1765 goto restart; 1766 } 1767 1768 err = -EPIPE; 1769 if (other->sk_shutdown & RCV_SHUTDOWN) 1770 goto out_unlock; 1771 1772 if (sk->sk_type != SOCK_SEQPACKET) { 1773 err = security_unix_may_send(sk->sk_socket, other->sk_socket); 1774 if (err) 1775 goto out_unlock; 1776 } 1777 1778 /* other == sk && unix_peer(other) != sk if 1779 * - unix_peer(sk) == NULL, destination address bound to sk 1780 * - unix_peer(sk) == sk by time of get but disconnected before lock 1781 */ 1782 if (other != sk && 1783 unlikely(unix_peer(other) != sk && 1784 unix_recvq_full_lockless(other))) { 1785 if (timeo) { 1786 timeo = unix_wait_for_peer(other, timeo); 1787 1788 err = sock_intr_errno(timeo); 1789 if (signal_pending(current)) 1790 goto out_free; 1791 1792 goto restart; 1793 } 1794 1795 if (!sk_locked) { 1796 unix_state_unlock(other); 1797 unix_state_double_lock(sk, other); 1798 } 1799 1800 if (unix_peer(sk) != other || 1801 unix_dgram_peer_wake_me(sk, other)) { 1802 err = -EAGAIN; 1803 sk_locked = 1; 1804 goto out_unlock; 1805 } 1806 1807 if (!sk_locked) { 1808 sk_locked = 1; 1809 goto restart_locked; 1810 } 1811 } 1812 1813 if (unlikely(sk_locked)) 1814 unix_state_unlock(sk); 1815 1816 if (sock_flag(other, SOCK_RCVTSTAMP)) 1817 __net_timestamp(skb); 1818 maybe_add_creds(skb, sock, other); 1819 scm_stat_add(other, skb); 1820 skb_queue_tail(&other->sk_receive_queue, skb); 1821 unix_state_unlock(other); 1822 other->sk_data_ready(other); 1823 sock_put(other); 1824 scm_destroy(&scm); 1825 return len; 1826 1827 out_unlock: 1828 if (sk_locked) 1829 unix_state_unlock(sk); 1830 unix_state_unlock(other); 1831 out_free: 1832 kfree_skb(skb); 1833 out: 1834 if (other) 1835 sock_put(other); 1836 scm_destroy(&scm); 1837 return err; 1838 } 1839 1840 /* We use paged skbs for stream sockets, and limit occupancy to 32768 1841 * bytes, and a minimum of a full page. 1842 */ 1843 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) 1844 1845 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, 1846 size_t len) 1847 { 1848 struct sock *sk = sock->sk; 1849 struct sock *other = NULL; 1850 int err, size; 1851 struct sk_buff *skb; 1852 int sent = 0; 1853 struct scm_cookie scm; 1854 bool fds_sent = false; 1855 int data_len; 1856 1857 wait_for_unix_gc(); 1858 err = scm_send(sock, msg, &scm, false); 1859 if (err < 0) 1860 return err; 1861 1862 err = -EOPNOTSUPP; 1863 if (msg->msg_flags&MSG_OOB) 1864 goto out_err; 1865 1866 if (msg->msg_namelen) { 1867 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; 1868 goto out_err; 1869 } else { 1870 err = -ENOTCONN; 1871 other = unix_peer(sk); 1872 if (!other) 1873 goto out_err; 1874 } 1875 1876 if (sk->sk_shutdown & SEND_SHUTDOWN) 1877 goto pipe_err; 1878 1879 while (sent < len) { 1880 size = len - sent; 1881 1882 /* Keep two messages in the pipe so it schedules better */ 1883 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64); 1884 1885 /* allow fallback to order-0 allocations */ 1886 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ); 1887 1888 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0)); 1889 1890 data_len = min_t(size_t, size, PAGE_ALIGN(data_len)); 1891 1892 skb = sock_alloc_send_pskb(sk, size - data_len, data_len, 1893 msg->msg_flags & MSG_DONTWAIT, &err, 1894 get_order(UNIX_SKB_FRAGS_SZ)); 1895 if (!skb) 1896 goto out_err; 1897 1898 /* Only send the fds in the first buffer */ 1899 err = unix_scm_to_skb(&scm, skb, !fds_sent); 1900 if (err < 0) { 1901 kfree_skb(skb); 1902 goto out_err; 1903 } 1904 fds_sent = true; 1905 1906 skb_put(skb, size - data_len); 1907 skb->data_len = data_len; 1908 skb->len = size; 1909 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); 1910 if (err) { 1911 kfree_skb(skb); 1912 goto out_err; 1913 } 1914 1915 unix_state_lock(other); 1916 1917 if (sock_flag(other, SOCK_DEAD) || 1918 (other->sk_shutdown & RCV_SHUTDOWN)) 1919 goto pipe_err_free; 1920 1921 maybe_add_creds(skb, sock, other); 1922 scm_stat_add(other, skb); 1923 skb_queue_tail(&other->sk_receive_queue, skb); 1924 unix_state_unlock(other); 1925 other->sk_data_ready(other); 1926 sent += size; 1927 } 1928 1929 scm_destroy(&scm); 1930 1931 return sent; 1932 1933 pipe_err_free: 1934 unix_state_unlock(other); 1935 kfree_skb(skb); 1936 pipe_err: 1937 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL)) 1938 send_sig(SIGPIPE, current, 0); 1939 err = -EPIPE; 1940 out_err: 1941 scm_destroy(&scm); 1942 return sent ? : err; 1943 } 1944 1945 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, 1946 int offset, size_t size, int flags) 1947 { 1948 int err; 1949 bool send_sigpipe = false; 1950 bool init_scm = true; 1951 struct scm_cookie scm; 1952 struct sock *other, *sk = socket->sk; 1953 struct sk_buff *skb, *newskb = NULL, *tail = NULL; 1954 1955 if (flags & MSG_OOB) 1956 return -EOPNOTSUPP; 1957 1958 other = unix_peer(sk); 1959 if (!other || sk->sk_state != TCP_ESTABLISHED) 1960 return -ENOTCONN; 1961 1962 if (false) { 1963 alloc_skb: 1964 unix_state_unlock(other); 1965 mutex_unlock(&unix_sk(other)->iolock); 1966 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT, 1967 &err, 0); 1968 if (!newskb) 1969 goto err; 1970 } 1971 1972 /* we must acquire iolock as we modify already present 1973 * skbs in the sk_receive_queue and mess with skb->len 1974 */ 1975 err = mutex_lock_interruptible(&unix_sk(other)->iolock); 1976 if (err) { 1977 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS; 1978 goto err; 1979 } 1980 1981 if (sk->sk_shutdown & SEND_SHUTDOWN) { 1982 err = -EPIPE; 1983 send_sigpipe = true; 1984 goto err_unlock; 1985 } 1986 1987 unix_state_lock(other); 1988 1989 if (sock_flag(other, SOCK_DEAD) || 1990 other->sk_shutdown & RCV_SHUTDOWN) { 1991 err = -EPIPE; 1992 send_sigpipe = true; 1993 goto err_state_unlock; 1994 } 1995 1996 if (init_scm) { 1997 err = maybe_init_creds(&scm, socket, other); 1998 if (err) 1999 goto err_state_unlock; 2000 init_scm = false; 2001 } 2002 2003 skb = skb_peek_tail(&other->sk_receive_queue); 2004 if (tail && tail == skb) { 2005 skb = newskb; 2006 } else if (!skb || !unix_skb_scm_eq(skb, &scm)) { 2007 if (newskb) { 2008 skb = newskb; 2009 } else { 2010 tail = skb; 2011 goto alloc_skb; 2012 } 2013 } else if (newskb) { 2014 /* this is fast path, we don't necessarily need to 2015 * call to kfree_skb even though with newskb == NULL 2016 * this - does no harm 2017 */ 2018 consume_skb(newskb); 2019 newskb = NULL; 2020 } 2021 2022 if (skb_append_pagefrags(skb, page, offset, size)) { 2023 tail = skb; 2024 goto alloc_skb; 2025 } 2026 2027 skb->len += size; 2028 skb->data_len += size; 2029 skb->truesize += size; 2030 refcount_add(size, &sk->sk_wmem_alloc); 2031 2032 if (newskb) { 2033 err = unix_scm_to_skb(&scm, skb, false); 2034 if (err) 2035 goto err_state_unlock; 2036 spin_lock(&other->sk_receive_queue.lock); 2037 __skb_queue_tail(&other->sk_receive_queue, newskb); 2038 spin_unlock(&other->sk_receive_queue.lock); 2039 } 2040 2041 unix_state_unlock(other); 2042 mutex_unlock(&unix_sk(other)->iolock); 2043 2044 other->sk_data_ready(other); 2045 scm_destroy(&scm); 2046 return size; 2047 2048 err_state_unlock: 2049 unix_state_unlock(other); 2050 err_unlock: 2051 mutex_unlock(&unix_sk(other)->iolock); 2052 err: 2053 kfree_skb(newskb); 2054 if (send_sigpipe && !(flags & MSG_NOSIGNAL)) 2055 send_sig(SIGPIPE, current, 0); 2056 if (!init_scm) 2057 scm_destroy(&scm); 2058 return err; 2059 } 2060 2061 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg, 2062 size_t len) 2063 { 2064 int err; 2065 struct sock *sk = sock->sk; 2066 2067 err = sock_error(sk); 2068 if (err) 2069 return err; 2070 2071 if (sk->sk_state != TCP_ESTABLISHED) 2072 return -ENOTCONN; 2073 2074 if (msg->msg_namelen) 2075 msg->msg_namelen = 0; 2076 2077 return unix_dgram_sendmsg(sock, msg, len); 2078 } 2079 2080 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg, 2081 size_t size, int flags) 2082 { 2083 struct sock *sk = sock->sk; 2084 2085 if (sk->sk_state != TCP_ESTABLISHED) 2086 return -ENOTCONN; 2087 2088 return unix_dgram_recvmsg(sock, msg, size, flags); 2089 } 2090 2091 static void unix_copy_addr(struct msghdr *msg, struct sock *sk) 2092 { 2093 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr); 2094 2095 if (addr) { 2096 msg->msg_namelen = addr->len; 2097 memcpy(msg->msg_name, addr->name, addr->len); 2098 } 2099 } 2100 2101 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, 2102 int flags) 2103 { 2104 struct scm_cookie scm; 2105 struct socket *sock = sk->sk_socket; 2106 struct unix_sock *u = unix_sk(sk); 2107 struct sk_buff *skb, *last; 2108 long timeo; 2109 int skip; 2110 int err; 2111 2112 err = -EOPNOTSUPP; 2113 if (flags&MSG_OOB) 2114 goto out; 2115 2116 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 2117 2118 do { 2119 mutex_lock(&u->iolock); 2120 2121 skip = sk_peek_offset(sk, flags); 2122 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags, 2123 &skip, &err, &last); 2124 if (skb) { 2125 if (!(flags & MSG_PEEK)) 2126 scm_stat_del(sk, skb); 2127 break; 2128 } 2129 2130 mutex_unlock(&u->iolock); 2131 2132 if (err != -EAGAIN) 2133 break; 2134 } while (timeo && 2135 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue, 2136 &err, &timeo, last)); 2137 2138 if (!skb) { /* implies iolock unlocked */ 2139 unix_state_lock(sk); 2140 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */ 2141 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN && 2142 (sk->sk_shutdown & RCV_SHUTDOWN)) 2143 err = 0; 2144 unix_state_unlock(sk); 2145 goto out; 2146 } 2147 2148 if (wq_has_sleeper(&u->peer_wait)) 2149 wake_up_interruptible_sync_poll(&u->peer_wait, 2150 EPOLLOUT | EPOLLWRNORM | 2151 EPOLLWRBAND); 2152 2153 if (msg->msg_name) 2154 unix_copy_addr(msg, skb->sk); 2155 2156 if (size > skb->len - skip) 2157 size = skb->len - skip; 2158 else if (size < skb->len - skip) 2159 msg->msg_flags |= MSG_TRUNC; 2160 2161 err = skb_copy_datagram_msg(skb, skip, msg, size); 2162 if (err) 2163 goto out_free; 2164 2165 if (sock_flag(sk, SOCK_RCVTSTAMP)) 2166 __sock_recv_timestamp(msg, sk, skb); 2167 2168 memset(&scm, 0, sizeof(scm)); 2169 2170 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2171 unix_set_secdata(&scm, skb); 2172 2173 if (!(flags & MSG_PEEK)) { 2174 if (UNIXCB(skb).fp) 2175 unix_detach_fds(&scm, skb); 2176 2177 sk_peek_offset_bwd(sk, skb->len); 2178 } else { 2179 /* It is questionable: on PEEK we could: 2180 - do not return fds - good, but too simple 8) 2181 - return fds, and do not return them on read (old strategy, 2182 apparently wrong) 2183 - clone fds (I chose it for now, it is the most universal 2184 solution) 2185 2186 POSIX 1003.1g does not actually define this clearly 2187 at all. POSIX 1003.1g doesn't define a lot of things 2188 clearly however! 2189 2190 */ 2191 2192 sk_peek_offset_fwd(sk, size); 2193 2194 if (UNIXCB(skb).fp) 2195 scm.fp = scm_fp_dup(UNIXCB(skb).fp); 2196 } 2197 err = (flags & MSG_TRUNC) ? skb->len - skip : size; 2198 2199 scm_recv(sock, msg, &scm, flags); 2200 2201 out_free: 2202 skb_free_datagram(sk, skb); 2203 mutex_unlock(&u->iolock); 2204 out: 2205 return err; 2206 } 2207 2208 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, 2209 int flags) 2210 { 2211 struct sock *sk = sock->sk; 2212 2213 #ifdef CONFIG_BPF_SYSCALL 2214 if (sk->sk_prot != &unix_proto) 2215 return sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT, 2216 flags & ~MSG_DONTWAIT, NULL); 2217 #endif 2218 return __unix_dgram_recvmsg(sk, msg, size, flags); 2219 } 2220 2221 static int unix_read_sock(struct sock *sk, read_descriptor_t *desc, 2222 sk_read_actor_t recv_actor) 2223 { 2224 int copied = 0; 2225 2226 while (1) { 2227 struct unix_sock *u = unix_sk(sk); 2228 struct sk_buff *skb; 2229 int used, err; 2230 2231 mutex_lock(&u->iolock); 2232 skb = skb_recv_datagram(sk, 0, 1, &err); 2233 mutex_unlock(&u->iolock); 2234 if (!skb) 2235 return err; 2236 2237 used = recv_actor(desc, skb, 0, skb->len); 2238 if (used <= 0) { 2239 if (!copied) 2240 copied = used; 2241 kfree_skb(skb); 2242 break; 2243 } else if (used <= skb->len) { 2244 copied += used; 2245 } 2246 2247 kfree_skb(skb); 2248 if (!desc->count) 2249 break; 2250 } 2251 2252 return copied; 2253 } 2254 2255 /* 2256 * Sleep until more data has arrived. But check for races.. 2257 */ 2258 static long unix_stream_data_wait(struct sock *sk, long timeo, 2259 struct sk_buff *last, unsigned int last_len, 2260 bool freezable) 2261 { 2262 struct sk_buff *tail; 2263 DEFINE_WAIT(wait); 2264 2265 unix_state_lock(sk); 2266 2267 for (;;) { 2268 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 2269 2270 tail = skb_peek_tail(&sk->sk_receive_queue); 2271 if (tail != last || 2272 (tail && tail->len != last_len) || 2273 sk->sk_err || 2274 (sk->sk_shutdown & RCV_SHUTDOWN) || 2275 signal_pending(current) || 2276 !timeo) 2277 break; 2278 2279 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2280 unix_state_unlock(sk); 2281 if (freezable) 2282 timeo = freezable_schedule_timeout(timeo); 2283 else 2284 timeo = schedule_timeout(timeo); 2285 unix_state_lock(sk); 2286 2287 if (sock_flag(sk, SOCK_DEAD)) 2288 break; 2289 2290 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); 2291 } 2292 2293 finish_wait(sk_sleep(sk), &wait); 2294 unix_state_unlock(sk); 2295 return timeo; 2296 } 2297 2298 static unsigned int unix_skb_len(const struct sk_buff *skb) 2299 { 2300 return skb->len - UNIXCB(skb).consumed; 2301 } 2302 2303 struct unix_stream_read_state { 2304 int (*recv_actor)(struct sk_buff *, int, int, 2305 struct unix_stream_read_state *); 2306 struct socket *socket; 2307 struct msghdr *msg; 2308 struct pipe_inode_info *pipe; 2309 size_t size; 2310 int flags; 2311 unsigned int splice_flags; 2312 }; 2313 2314 static int unix_stream_read_generic(struct unix_stream_read_state *state, 2315 bool freezable) 2316 { 2317 struct scm_cookie scm; 2318 struct socket *sock = state->socket; 2319 struct sock *sk = sock->sk; 2320 struct unix_sock *u = unix_sk(sk); 2321 int copied = 0; 2322 int flags = state->flags; 2323 int noblock = flags & MSG_DONTWAIT; 2324 bool check_creds = false; 2325 int target; 2326 int err = 0; 2327 long timeo; 2328 int skip; 2329 size_t size = state->size; 2330 unsigned int last_len; 2331 2332 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) { 2333 err = -EINVAL; 2334 goto out; 2335 } 2336 2337 if (unlikely(flags & MSG_OOB)) { 2338 err = -EOPNOTSUPP; 2339 goto out; 2340 } 2341 2342 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); 2343 timeo = sock_rcvtimeo(sk, noblock); 2344 2345 memset(&scm, 0, sizeof(scm)); 2346 2347 /* Lock the socket to prevent queue disordering 2348 * while sleeps in memcpy_tomsg 2349 */ 2350 mutex_lock(&u->iolock); 2351 2352 skip = max(sk_peek_offset(sk, flags), 0); 2353 2354 do { 2355 int chunk; 2356 bool drop_skb; 2357 struct sk_buff *skb, *last; 2358 2359 redo: 2360 unix_state_lock(sk); 2361 if (sock_flag(sk, SOCK_DEAD)) { 2362 err = -ECONNRESET; 2363 goto unlock; 2364 } 2365 last = skb = skb_peek(&sk->sk_receive_queue); 2366 last_len = last ? last->len : 0; 2367 again: 2368 if (skb == NULL) { 2369 if (copied >= target) 2370 goto unlock; 2371 2372 /* 2373 * POSIX 1003.1g mandates this order. 2374 */ 2375 2376 err = sock_error(sk); 2377 if (err) 2378 goto unlock; 2379 if (sk->sk_shutdown & RCV_SHUTDOWN) 2380 goto unlock; 2381 2382 unix_state_unlock(sk); 2383 if (!timeo) { 2384 err = -EAGAIN; 2385 break; 2386 } 2387 2388 mutex_unlock(&u->iolock); 2389 2390 timeo = unix_stream_data_wait(sk, timeo, last, 2391 last_len, freezable); 2392 2393 if (signal_pending(current)) { 2394 err = sock_intr_errno(timeo); 2395 scm_destroy(&scm); 2396 goto out; 2397 } 2398 2399 mutex_lock(&u->iolock); 2400 goto redo; 2401 unlock: 2402 unix_state_unlock(sk); 2403 break; 2404 } 2405 2406 while (skip >= unix_skb_len(skb)) { 2407 skip -= unix_skb_len(skb); 2408 last = skb; 2409 last_len = skb->len; 2410 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2411 if (!skb) 2412 goto again; 2413 } 2414 2415 unix_state_unlock(sk); 2416 2417 if (check_creds) { 2418 /* Never glue messages from different writers */ 2419 if (!unix_skb_scm_eq(skb, &scm)) 2420 break; 2421 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { 2422 /* Copy credentials */ 2423 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); 2424 unix_set_secdata(&scm, skb); 2425 check_creds = true; 2426 } 2427 2428 /* Copy address just once */ 2429 if (state->msg && state->msg->msg_name) { 2430 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, 2431 state->msg->msg_name); 2432 unix_copy_addr(state->msg, skb->sk); 2433 sunaddr = NULL; 2434 } 2435 2436 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size); 2437 skb_get(skb); 2438 chunk = state->recv_actor(skb, skip, chunk, state); 2439 drop_skb = !unix_skb_len(skb); 2440 /* skb is only safe to use if !drop_skb */ 2441 consume_skb(skb); 2442 if (chunk < 0) { 2443 if (copied == 0) 2444 copied = -EFAULT; 2445 break; 2446 } 2447 copied += chunk; 2448 size -= chunk; 2449 2450 if (drop_skb) { 2451 /* the skb was touched by a concurrent reader; 2452 * we should not expect anything from this skb 2453 * anymore and assume it invalid - we can be 2454 * sure it was dropped from the socket queue 2455 * 2456 * let's report a short read 2457 */ 2458 err = 0; 2459 break; 2460 } 2461 2462 /* Mark read part of skb as used */ 2463 if (!(flags & MSG_PEEK)) { 2464 UNIXCB(skb).consumed += chunk; 2465 2466 sk_peek_offset_bwd(sk, chunk); 2467 2468 if (UNIXCB(skb).fp) { 2469 scm_stat_del(sk, skb); 2470 unix_detach_fds(&scm, skb); 2471 } 2472 2473 if (unix_skb_len(skb)) 2474 break; 2475 2476 skb_unlink(skb, &sk->sk_receive_queue); 2477 consume_skb(skb); 2478 2479 if (scm.fp) 2480 break; 2481 } else { 2482 /* It is questionable, see note in unix_dgram_recvmsg. 2483 */ 2484 if (UNIXCB(skb).fp) 2485 scm.fp = scm_fp_dup(UNIXCB(skb).fp); 2486 2487 sk_peek_offset_fwd(sk, chunk); 2488 2489 if (UNIXCB(skb).fp) 2490 break; 2491 2492 skip = 0; 2493 last = skb; 2494 last_len = skb->len; 2495 unix_state_lock(sk); 2496 skb = skb_peek_next(skb, &sk->sk_receive_queue); 2497 if (skb) 2498 goto again; 2499 unix_state_unlock(sk); 2500 break; 2501 } 2502 } while (size); 2503 2504 mutex_unlock(&u->iolock); 2505 if (state->msg) 2506 scm_recv(sock, state->msg, &scm, flags); 2507 else 2508 scm_destroy(&scm); 2509 out: 2510 return copied ? : err; 2511 } 2512 2513 static int unix_stream_read_actor(struct sk_buff *skb, 2514 int skip, int chunk, 2515 struct unix_stream_read_state *state) 2516 { 2517 int ret; 2518 2519 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip, 2520 state->msg, chunk); 2521 return ret ?: chunk; 2522 } 2523 2524 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, 2525 size_t size, int flags) 2526 { 2527 struct unix_stream_read_state state = { 2528 .recv_actor = unix_stream_read_actor, 2529 .socket = sock, 2530 .msg = msg, 2531 .size = size, 2532 .flags = flags 2533 }; 2534 2535 return unix_stream_read_generic(&state, true); 2536 } 2537 2538 static int unix_stream_splice_actor(struct sk_buff *skb, 2539 int skip, int chunk, 2540 struct unix_stream_read_state *state) 2541 { 2542 return skb_splice_bits(skb, state->socket->sk, 2543 UNIXCB(skb).consumed + skip, 2544 state->pipe, chunk, state->splice_flags); 2545 } 2546 2547 static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos, 2548 struct pipe_inode_info *pipe, 2549 size_t size, unsigned int flags) 2550 { 2551 struct unix_stream_read_state state = { 2552 .recv_actor = unix_stream_splice_actor, 2553 .socket = sock, 2554 .pipe = pipe, 2555 .size = size, 2556 .splice_flags = flags, 2557 }; 2558 2559 if (unlikely(*ppos)) 2560 return -ESPIPE; 2561 2562 if (sock->file->f_flags & O_NONBLOCK || 2563 flags & SPLICE_F_NONBLOCK) 2564 state.flags = MSG_DONTWAIT; 2565 2566 return unix_stream_read_generic(&state, false); 2567 } 2568 2569 static int unix_shutdown(struct socket *sock, int mode) 2570 { 2571 struct sock *sk = sock->sk; 2572 struct sock *other; 2573 2574 if (mode < SHUT_RD || mode > SHUT_RDWR) 2575 return -EINVAL; 2576 /* This maps: 2577 * SHUT_RD (0) -> RCV_SHUTDOWN (1) 2578 * SHUT_WR (1) -> SEND_SHUTDOWN (2) 2579 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3) 2580 */ 2581 ++mode; 2582 2583 unix_state_lock(sk); 2584 sk->sk_shutdown |= mode; 2585 other = unix_peer(sk); 2586 if (other) 2587 sock_hold(other); 2588 unix_state_unlock(sk); 2589 sk->sk_state_change(sk); 2590 2591 if (other && 2592 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) { 2593 2594 int peer_mode = 0; 2595 2596 if (mode&RCV_SHUTDOWN) 2597 peer_mode |= SEND_SHUTDOWN; 2598 if (mode&SEND_SHUTDOWN) 2599 peer_mode |= RCV_SHUTDOWN; 2600 unix_state_lock(other); 2601 other->sk_shutdown |= peer_mode; 2602 unix_state_unlock(other); 2603 other->sk_state_change(other); 2604 if (peer_mode == SHUTDOWN_MASK) 2605 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); 2606 else if (peer_mode & RCV_SHUTDOWN) 2607 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); 2608 } 2609 if (other) 2610 sock_put(other); 2611 2612 return 0; 2613 } 2614 2615 long unix_inq_len(struct sock *sk) 2616 { 2617 struct sk_buff *skb; 2618 long amount = 0; 2619 2620 if (sk->sk_state == TCP_LISTEN) 2621 return -EINVAL; 2622 2623 spin_lock(&sk->sk_receive_queue.lock); 2624 if (sk->sk_type == SOCK_STREAM || 2625 sk->sk_type == SOCK_SEQPACKET) { 2626 skb_queue_walk(&sk->sk_receive_queue, skb) 2627 amount += unix_skb_len(skb); 2628 } else { 2629 skb = skb_peek(&sk->sk_receive_queue); 2630 if (skb) 2631 amount = skb->len; 2632 } 2633 spin_unlock(&sk->sk_receive_queue.lock); 2634 2635 return amount; 2636 } 2637 EXPORT_SYMBOL_GPL(unix_inq_len); 2638 2639 long unix_outq_len(struct sock *sk) 2640 { 2641 return sk_wmem_alloc_get(sk); 2642 } 2643 EXPORT_SYMBOL_GPL(unix_outq_len); 2644 2645 static int unix_open_file(struct sock *sk) 2646 { 2647 struct path path; 2648 struct file *f; 2649 int fd; 2650 2651 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 2652 return -EPERM; 2653 2654 if (!smp_load_acquire(&unix_sk(sk)->addr)) 2655 return -ENOENT; 2656 2657 path = unix_sk(sk)->path; 2658 if (!path.dentry) 2659 return -ENOENT; 2660 2661 path_get(&path); 2662 2663 fd = get_unused_fd_flags(O_CLOEXEC); 2664 if (fd < 0) 2665 goto out; 2666 2667 f = dentry_open(&path, O_PATH, current_cred()); 2668 if (IS_ERR(f)) { 2669 put_unused_fd(fd); 2670 fd = PTR_ERR(f); 2671 goto out; 2672 } 2673 2674 fd_install(fd, f); 2675 out: 2676 path_put(&path); 2677 2678 return fd; 2679 } 2680 2681 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 2682 { 2683 struct sock *sk = sock->sk; 2684 long amount = 0; 2685 int err; 2686 2687 switch (cmd) { 2688 case SIOCOUTQ: 2689 amount = unix_outq_len(sk); 2690 err = put_user(amount, (int __user *)arg); 2691 break; 2692 case SIOCINQ: 2693 amount = unix_inq_len(sk); 2694 if (amount < 0) 2695 err = amount; 2696 else 2697 err = put_user(amount, (int __user *)arg); 2698 break; 2699 case SIOCUNIXFILE: 2700 err = unix_open_file(sk); 2701 break; 2702 default: 2703 err = -ENOIOCTLCMD; 2704 break; 2705 } 2706 return err; 2707 } 2708 2709 #ifdef CONFIG_COMPAT 2710 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 2711 { 2712 return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg)); 2713 } 2714 #endif 2715 2716 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait) 2717 { 2718 struct sock *sk = sock->sk; 2719 __poll_t mask; 2720 2721 sock_poll_wait(file, sock, wait); 2722 mask = 0; 2723 2724 /* exceptional events? */ 2725 if (sk->sk_err) 2726 mask |= EPOLLERR; 2727 if (sk->sk_shutdown == SHUTDOWN_MASK) 2728 mask |= EPOLLHUP; 2729 if (sk->sk_shutdown & RCV_SHUTDOWN) 2730 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 2731 2732 /* readable? */ 2733 if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) 2734 mask |= EPOLLIN | EPOLLRDNORM; 2735 2736 /* Connection-based need to check for termination and startup */ 2737 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && 2738 sk->sk_state == TCP_CLOSE) 2739 mask |= EPOLLHUP; 2740 2741 /* 2742 * we set writable also when the other side has shut down the 2743 * connection. This prevents stuck sockets. 2744 */ 2745 if (unix_writable(sk)) 2746 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 2747 2748 return mask; 2749 } 2750 2751 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, 2752 poll_table *wait) 2753 { 2754 struct sock *sk = sock->sk, *other; 2755 unsigned int writable; 2756 __poll_t mask; 2757 2758 sock_poll_wait(file, sock, wait); 2759 mask = 0; 2760 2761 /* exceptional events? */ 2762 if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) 2763 mask |= EPOLLERR | 2764 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); 2765 2766 if (sk->sk_shutdown & RCV_SHUTDOWN) 2767 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; 2768 if (sk->sk_shutdown == SHUTDOWN_MASK) 2769 mask |= EPOLLHUP; 2770 2771 /* readable? */ 2772 if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) 2773 mask |= EPOLLIN | EPOLLRDNORM; 2774 2775 /* Connection-based need to check for termination and startup */ 2776 if (sk->sk_type == SOCK_SEQPACKET) { 2777 if (sk->sk_state == TCP_CLOSE) 2778 mask |= EPOLLHUP; 2779 /* connection hasn't started yet? */ 2780 if (sk->sk_state == TCP_SYN_SENT) 2781 return mask; 2782 } 2783 2784 /* No write status requested, avoid expensive OUT tests. */ 2785 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT))) 2786 return mask; 2787 2788 writable = unix_writable(sk); 2789 if (writable) { 2790 unix_state_lock(sk); 2791 2792 other = unix_peer(sk); 2793 if (other && unix_peer(other) != sk && 2794 unix_recvq_full(other) && 2795 unix_dgram_peer_wake_me(sk, other)) 2796 writable = 0; 2797 2798 unix_state_unlock(sk); 2799 } 2800 2801 if (writable) 2802 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; 2803 else 2804 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 2805 2806 return mask; 2807 } 2808 2809 #ifdef CONFIG_PROC_FS 2810 2811 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1) 2812 2813 #define get_bucket(x) ((x) >> BUCKET_SPACE) 2814 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1)) 2815 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) 2816 2817 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) 2818 { 2819 unsigned long offset = get_offset(*pos); 2820 unsigned long bucket = get_bucket(*pos); 2821 struct sock *sk; 2822 unsigned long count = 0; 2823 2824 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) { 2825 if (sock_net(sk) != seq_file_net(seq)) 2826 continue; 2827 if (++count == offset) 2828 break; 2829 } 2830 2831 return sk; 2832 } 2833 2834 static struct sock *unix_next_socket(struct seq_file *seq, 2835 struct sock *sk, 2836 loff_t *pos) 2837 { 2838 unsigned long bucket; 2839 2840 while (sk > (struct sock *)SEQ_START_TOKEN) { 2841 sk = sk_next(sk); 2842 if (!sk) 2843 goto next_bucket; 2844 if (sock_net(sk) == seq_file_net(seq)) 2845 return sk; 2846 } 2847 2848 do { 2849 sk = unix_from_bucket(seq, pos); 2850 if (sk) 2851 return sk; 2852 2853 next_bucket: 2854 bucket = get_bucket(*pos) + 1; 2855 *pos = set_bucket_offset(bucket, 1); 2856 } while (bucket < ARRAY_SIZE(unix_socket_table)); 2857 2858 return NULL; 2859 } 2860 2861 static void *unix_seq_start(struct seq_file *seq, loff_t *pos) 2862 __acquires(unix_table_lock) 2863 { 2864 spin_lock(&unix_table_lock); 2865 2866 if (!*pos) 2867 return SEQ_START_TOKEN; 2868 2869 if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table)) 2870 return NULL; 2871 2872 return unix_next_socket(seq, NULL, pos); 2873 } 2874 2875 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2876 { 2877 ++*pos; 2878 return unix_next_socket(seq, v, pos); 2879 } 2880 2881 static void unix_seq_stop(struct seq_file *seq, void *v) 2882 __releases(unix_table_lock) 2883 { 2884 spin_unlock(&unix_table_lock); 2885 } 2886 2887 static int unix_seq_show(struct seq_file *seq, void *v) 2888 { 2889 2890 if (v == SEQ_START_TOKEN) 2891 seq_puts(seq, "Num RefCount Protocol Flags Type St " 2892 "Inode Path\n"); 2893 else { 2894 struct sock *s = v; 2895 struct unix_sock *u = unix_sk(s); 2896 unix_state_lock(s); 2897 2898 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu", 2899 s, 2900 refcount_read(&s->sk_refcnt), 2901 0, 2902 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0, 2903 s->sk_type, 2904 s->sk_socket ? 2905 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) : 2906 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING), 2907 sock_i_ino(s)); 2908 2909 if (u->addr) { // under unix_table_lock here 2910 int i, len; 2911 seq_putc(seq, ' '); 2912 2913 i = 0; 2914 len = u->addr->len - sizeof(short); 2915 if (!UNIX_ABSTRACT(s)) 2916 len--; 2917 else { 2918 seq_putc(seq, '@'); 2919 i++; 2920 } 2921 for ( ; i < len; i++) 2922 seq_putc(seq, u->addr->name->sun_path[i] ?: 2923 '@'); 2924 } 2925 unix_state_unlock(s); 2926 seq_putc(seq, '\n'); 2927 } 2928 2929 return 0; 2930 } 2931 2932 static const struct seq_operations unix_seq_ops = { 2933 .start = unix_seq_start, 2934 .next = unix_seq_next, 2935 .stop = unix_seq_stop, 2936 .show = unix_seq_show, 2937 }; 2938 #endif 2939 2940 static const struct net_proto_family unix_family_ops = { 2941 .family = PF_UNIX, 2942 .create = unix_create, 2943 .owner = THIS_MODULE, 2944 }; 2945 2946 2947 static int __net_init unix_net_init(struct net *net) 2948 { 2949 int error = -ENOMEM; 2950 2951 net->unx.sysctl_max_dgram_qlen = 10; 2952 if (unix_sysctl_register(net)) 2953 goto out; 2954 2955 #ifdef CONFIG_PROC_FS 2956 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops, 2957 sizeof(struct seq_net_private))) { 2958 unix_sysctl_unregister(net); 2959 goto out; 2960 } 2961 #endif 2962 error = 0; 2963 out: 2964 return error; 2965 } 2966 2967 static void __net_exit unix_net_exit(struct net *net) 2968 { 2969 unix_sysctl_unregister(net); 2970 remove_proc_entry("unix", net->proc_net); 2971 } 2972 2973 static struct pernet_operations unix_net_ops = { 2974 .init = unix_net_init, 2975 .exit = unix_net_exit, 2976 }; 2977 2978 static int __init af_unix_init(void) 2979 { 2980 int rc = -1; 2981 2982 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb)); 2983 2984 rc = proto_register(&unix_proto, 1); 2985 if (rc != 0) { 2986 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__); 2987 goto out; 2988 } 2989 2990 sock_register(&unix_family_ops); 2991 register_pernet_subsys(&unix_net_ops); 2992 unix_bpf_build_proto(); 2993 out: 2994 return rc; 2995 } 2996 2997 static void __exit af_unix_exit(void) 2998 { 2999 sock_unregister(PF_UNIX); 3000 proto_unregister(&unix_proto); 3001 unregister_pernet_subsys(&unix_net_ops); 3002 } 3003 3004 /* Earlier than device_initcall() so that other drivers invoking 3005 request_module() don't end up in a loop when modprobe tries 3006 to use a UNIX socket. But later than subsys_initcall() because 3007 we depend on stuff initialised there */ 3008 fs_initcall(af_unix_init); 3009 module_exit(af_unix_exit); 3010 3011 MODULE_LICENSE("GPL"); 3012 MODULE_ALIAS_NETPROTO(PF_UNIX); 3013