1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * NET4: Implementation of BSD Unix domain sockets.
4 *
5 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
6 *
7 * Fixes:
8 * Linus Torvalds : Assorted bug cures.
9 * Niibe Yutaka : async I/O support.
10 * Carsten Paeth : PF_UNIX check, address fixes.
11 * Alan Cox : Limit size of allocated blocks.
12 * Alan Cox : Fixed the stupid socketpair bug.
13 * Alan Cox : BSD compatibility fine tuning.
14 * Alan Cox : Fixed a bug in connect when interrupted.
15 * Alan Cox : Sorted out a proper draft version of
16 * file descriptor passing hacked up from
17 * Mike Shaver's work.
18 * Marty Leisner : Fixes to fd passing
19 * Nick Nevin : recvmsg bugfix.
20 * Alan Cox : Started proper garbage collector
21 * Heiko EiBfeldt : Missing verify_area check
22 * Alan Cox : Started POSIXisms
23 * Andreas Schwab : Replace inode by dentry for proper
24 * reference counting
25 * Kirk Petersen : Made this a module
26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
27 * Lots of bug fixes.
28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
29 * by above two patches.
30 * Andrea Arcangeli : If possible we block in connect(2)
31 * if the max backlog of the listen socket
32 * is been reached. This won't break
33 * old apps and it will avoid huge amount
34 * of socks hashed (this for unix_gc()
35 * performances reasons).
36 * Security fix that limits the max
37 * number of socks to 2*max_files and
38 * the number of skb queueable in the
39 * dgram receiver.
40 * Artur Skawina : Hash function optimizations
41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
42 * Malcolm Beattie : Set peercred for socketpair
43 * Michal Ostrowski : Module initialization cleanup.
44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
45 * the core infrastructure is doing that
46 * for all net proto families now (2.5.69+)
47 *
48 * Known differences from reference BSD that was tested:
49 *
50 * [TO FIX]
51 * ECONNREFUSED is not returned from one end of a connected() socket to the
52 * other the moment one end closes.
53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
54 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
55 * [NOT TO FIX]
56 * accept() returns a path name even if the connecting socket has closed
57 * in the meantime (BSD loses the path and gives up).
58 * accept() returns 0 length path for an unbound connector. BSD returns 16
59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
61 * BSD af_unix apparently has connect forgetting to block properly.
62 * (need to check this with the POSIX spec in detail)
63 *
64 * Differences from 2.0.0-11-... (ANK)
65 * Bug fixes and improvements.
66 * - client shutdown killed server socket.
67 * - removed all useless cli/sti pairs.
68 *
69 * Semantic changes/extensions.
70 * - generic control message passing.
71 * - SCM_CREDENTIALS control message.
72 * - "Abstract" (not FS based) socket bindings.
73 * Abstract names are sequences of bytes (not zero terminated)
74 * started by 0, so that this name space does not intersect
75 * with BSD names.
76 */
77
78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
79
80 #include <linux/bpf-cgroup.h>
81 #include <linux/btf_ids.h>
82 #include <linux/dcache.h>
83 #include <linux/errno.h>
84 #include <linux/fcntl.h>
85 #include <linux/file.h>
86 #include <linux/filter.h>
87 #include <linux/fs.h>
88 #include <linux/fs_struct.h>
89 #include <linux/init.h>
90 #include <linux/kernel.h>
91 #include <linux/mount.h>
92 #include <linux/namei.h>
93 #include <linux/net.h>
94 #include <linux/pidfs.h>
95 #include <linux/poll.h>
96 #include <linux/proc_fs.h>
97 #include <linux/sched/signal.h>
98 #include <linux/security.h>
99 #include <linux/seq_file.h>
100 #include <linux/skbuff.h>
101 #include <linux/slab.h>
102 #include <linux/socket.h>
103 #include <linux/splice.h>
104 #include <linux/string.h>
105 #include <linux/uaccess.h>
106 #include <net/af_unix.h>
107 #include <net/net_namespace.h>
108 #include <net/scm.h>
109 #include <net/tcp_states.h>
110 #include <uapi/linux/sockios.h>
111 #include <uapi/linux/termios.h>
112
113 #include "af_unix.h"
114
115 static atomic_long_t unix_nr_socks;
116 static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
117 static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
118
119 /* SMP locking strategy:
120 * hash table is protected with spinlock.
121 * each socket state is protected by separate spinlock.
122 */
123 #ifdef CONFIG_PROVE_LOCKING
124 #define cmp_ptr(l, r) (((l) > (r)) - ((l) < (r)))
125
unix_table_lock_cmp_fn(const struct lockdep_map * a,const struct lockdep_map * b)126 static int unix_table_lock_cmp_fn(const struct lockdep_map *a,
127 const struct lockdep_map *b)
128 {
129 return cmp_ptr(a, b);
130 }
131
unix_state_lock_cmp_fn(const struct lockdep_map * _a,const struct lockdep_map * _b)132 static int unix_state_lock_cmp_fn(const struct lockdep_map *_a,
133 const struct lockdep_map *_b)
134 {
135 const struct unix_sock *a, *b;
136
137 a = container_of(_a, struct unix_sock, lock.dep_map);
138 b = container_of(_b, struct unix_sock, lock.dep_map);
139
140 if (a->sk.sk_state == TCP_LISTEN) {
141 /* unix_stream_connect(): Before the 2nd unix_state_lock(),
142 *
143 * 1. a is TCP_LISTEN.
144 * 2. b is not a.
145 * 3. concurrent connect(b -> a) must fail.
146 *
147 * Except for 2. & 3., the b's state can be any possible
148 * value due to concurrent connect() or listen().
149 *
150 * 2. is detected in debug_spin_lock_before(), and 3. cannot
151 * be expressed as lock_cmp_fn.
152 */
153 switch (b->sk.sk_state) {
154 case TCP_CLOSE:
155 case TCP_ESTABLISHED:
156 case TCP_LISTEN:
157 return -1;
158 default:
159 /* Invalid case. */
160 return 0;
161 }
162 }
163
164 /* Should never happen. Just to be symmetric. */
165 if (b->sk.sk_state == TCP_LISTEN) {
166 switch (b->sk.sk_state) {
167 case TCP_CLOSE:
168 case TCP_ESTABLISHED:
169 return 1;
170 default:
171 return 0;
172 }
173 }
174
175 /* unix_state_double_lock(): ascending address order. */
176 return cmp_ptr(a, b);
177 }
178
unix_recvq_lock_cmp_fn(const struct lockdep_map * _a,const struct lockdep_map * _b)179 static int unix_recvq_lock_cmp_fn(const struct lockdep_map *_a,
180 const struct lockdep_map *_b)
181 {
182 const struct sock *a, *b;
183
184 a = container_of(_a, struct sock, sk_receive_queue.lock.dep_map);
185 b = container_of(_b, struct sock, sk_receive_queue.lock.dep_map);
186
187 /* unix_collect_skb(): listener -> embryo order. */
188 if (a->sk_state == TCP_LISTEN && unix_sk(b)->listener == a)
189 return -1;
190
191 /* Should never happen. Just to be symmetric. */
192 if (b->sk_state == TCP_LISTEN && unix_sk(a)->listener == b)
193 return 1;
194
195 return 0;
196 }
197 #endif
198
unix_unbound_hash(struct sock * sk)199 static unsigned int unix_unbound_hash(struct sock *sk)
200 {
201 unsigned long hash = (unsigned long)sk;
202
203 hash ^= hash >> 16;
204 hash ^= hash >> 8;
205 hash ^= sk->sk_type;
206
207 return hash & UNIX_HASH_MOD;
208 }
209
unix_bsd_hash(struct inode * i)210 static unsigned int unix_bsd_hash(struct inode *i)
211 {
212 return i->i_ino & UNIX_HASH_MOD;
213 }
214
unix_abstract_hash(struct sockaddr_un * sunaddr,int addr_len,int type)215 static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr,
216 int addr_len, int type)
217 {
218 __wsum csum = csum_partial(sunaddr, addr_len, 0);
219 unsigned int hash;
220
221 hash = (__force unsigned int)csum_fold(csum);
222 hash ^= hash >> 8;
223 hash ^= type;
224
225 return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD);
226 }
227
unix_table_double_lock(struct net * net,unsigned int hash1,unsigned int hash2)228 static void unix_table_double_lock(struct net *net,
229 unsigned int hash1, unsigned int hash2)
230 {
231 if (hash1 == hash2) {
232 spin_lock(&net->unx.table.locks[hash1]);
233 return;
234 }
235
236 if (hash1 > hash2)
237 swap(hash1, hash2);
238
239 spin_lock(&net->unx.table.locks[hash1]);
240 spin_lock(&net->unx.table.locks[hash2]);
241 }
242
unix_table_double_unlock(struct net * net,unsigned int hash1,unsigned int hash2)243 static void unix_table_double_unlock(struct net *net,
244 unsigned int hash1, unsigned int hash2)
245 {
246 if (hash1 == hash2) {
247 spin_unlock(&net->unx.table.locks[hash1]);
248 return;
249 }
250
251 spin_unlock(&net->unx.table.locks[hash1]);
252 spin_unlock(&net->unx.table.locks[hash2]);
253 }
254
255 #ifdef CONFIG_SECURITY_NETWORK
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)256 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
257 {
258 UNIXCB(skb).secid = scm->secid;
259 }
260
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)261 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
262 {
263 scm->secid = UNIXCB(skb).secid;
264 }
265
unix_secdata_eq(struct scm_cookie * scm,struct sk_buff * skb)266 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
267 {
268 return (scm->secid == UNIXCB(skb).secid);
269 }
270 #else
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)271 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
272 { }
273
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)274 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
275 { }
276
unix_secdata_eq(struct scm_cookie * scm,struct sk_buff * skb)277 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
278 {
279 return true;
280 }
281 #endif /* CONFIG_SECURITY_NETWORK */
282
unix_may_send(struct sock * sk,struct sock * osk)283 static inline int unix_may_send(struct sock *sk, struct sock *osk)
284 {
285 return !unix_peer(osk) || unix_peer(osk) == sk;
286 }
287
unix_recvq_full_lockless(const struct sock * sk)288 static inline int unix_recvq_full_lockless(const struct sock *sk)
289 {
290 return skb_queue_len_lockless(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
291 }
292
unix_peer_get(struct sock * s)293 struct sock *unix_peer_get(struct sock *s)
294 {
295 struct sock *peer;
296
297 unix_state_lock(s);
298 peer = unix_peer(s);
299 if (peer)
300 sock_hold(peer);
301 unix_state_unlock(s);
302 return peer;
303 }
304 EXPORT_SYMBOL_GPL(unix_peer_get);
305
unix_create_addr(struct sockaddr_un * sunaddr,int addr_len)306 static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr,
307 int addr_len)
308 {
309 struct unix_address *addr;
310
311 addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL);
312 if (!addr)
313 return NULL;
314
315 refcount_set(&addr->refcnt, 1);
316 addr->len = addr_len;
317 memcpy(addr->name, sunaddr, addr_len);
318
319 return addr;
320 }
321
unix_release_addr(struct unix_address * addr)322 static inline void unix_release_addr(struct unix_address *addr)
323 {
324 if (refcount_dec_and_test(&addr->refcnt))
325 kfree(addr);
326 }
327
328 /*
329 * Check unix socket name:
330 * - should be not zero length.
331 * - if started by not zero, should be NULL terminated (FS object)
332 * - if started by zero, it is abstract name.
333 */
334
unix_validate_addr(struct sockaddr_un * sunaddr,int addr_len)335 static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len)
336 {
337 if (addr_len <= offsetof(struct sockaddr_un, sun_path) ||
338 addr_len > sizeof(*sunaddr))
339 return -EINVAL;
340
341 if (sunaddr->sun_family != AF_UNIX)
342 return -EINVAL;
343
344 return 0;
345 }
346
unix_mkname_bsd(struct sockaddr_un * sunaddr,int addr_len)347 static int unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len)
348 {
349 struct sockaddr_storage *addr = (struct sockaddr_storage *)sunaddr;
350 short offset = offsetof(struct sockaddr_storage, __data);
351
352 BUILD_BUG_ON(offset != offsetof(struct sockaddr_un, sun_path));
353
354 /* This may look like an off by one error but it is a bit more
355 * subtle. 108 is the longest valid AF_UNIX path for a binding.
356 * sun_path[108] doesn't as such exist. However in kernel space
357 * we are guaranteed that it is a valid memory location in our
358 * kernel address buffer because syscall functions always pass
359 * a pointer of struct sockaddr_storage which has a bigger buffer
360 * than 108. Also, we must terminate sun_path for strlen() in
361 * getname_kernel().
362 */
363 addr->__data[addr_len - offset] = 0;
364
365 /* Don't pass sunaddr->sun_path to strlen(). Otherwise, 108 will
366 * cause panic if CONFIG_FORTIFY_SOURCE=y. Let __fortify_strlen()
367 * know the actual buffer.
368 */
369 return strlen(addr->__data) + offset + 1;
370 }
371
__unix_remove_socket(struct sock * sk)372 static void __unix_remove_socket(struct sock *sk)
373 {
374 sk_del_node_init(sk);
375 }
376
__unix_insert_socket(struct net * net,struct sock * sk)377 static void __unix_insert_socket(struct net *net, struct sock *sk)
378 {
379 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
380 sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]);
381 }
382
__unix_set_addr_hash(struct net * net,struct sock * sk,struct unix_address * addr,unsigned int hash)383 static void __unix_set_addr_hash(struct net *net, struct sock *sk,
384 struct unix_address *addr, unsigned int hash)
385 {
386 __unix_remove_socket(sk);
387 smp_store_release(&unix_sk(sk)->addr, addr);
388
389 sk->sk_hash = hash;
390 __unix_insert_socket(net, sk);
391 }
392
unix_remove_socket(struct net * net,struct sock * sk)393 static void unix_remove_socket(struct net *net, struct sock *sk)
394 {
395 spin_lock(&net->unx.table.locks[sk->sk_hash]);
396 __unix_remove_socket(sk);
397 spin_unlock(&net->unx.table.locks[sk->sk_hash]);
398 }
399
unix_insert_unbound_socket(struct net * net,struct sock * sk)400 static void unix_insert_unbound_socket(struct net *net, struct sock *sk)
401 {
402 spin_lock(&net->unx.table.locks[sk->sk_hash]);
403 __unix_insert_socket(net, sk);
404 spin_unlock(&net->unx.table.locks[sk->sk_hash]);
405 }
406
unix_insert_bsd_socket(struct sock * sk)407 static void unix_insert_bsd_socket(struct sock *sk)
408 {
409 spin_lock(&bsd_socket_locks[sk->sk_hash]);
410 sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]);
411 spin_unlock(&bsd_socket_locks[sk->sk_hash]);
412 }
413
unix_remove_bsd_socket(struct sock * sk)414 static void unix_remove_bsd_socket(struct sock *sk)
415 {
416 if (!hlist_unhashed(&sk->sk_bind_node)) {
417 spin_lock(&bsd_socket_locks[sk->sk_hash]);
418 __sk_del_bind_node(sk);
419 spin_unlock(&bsd_socket_locks[sk->sk_hash]);
420
421 sk_node_init(&sk->sk_bind_node);
422 }
423 }
424
__unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,unsigned int hash)425 static struct sock *__unix_find_socket_byname(struct net *net,
426 struct sockaddr_un *sunname,
427 int len, unsigned int hash)
428 {
429 struct sock *s;
430
431 sk_for_each(s, &net->unx.table.buckets[hash]) {
432 struct unix_sock *u = unix_sk(s);
433
434 if (u->addr->len == len &&
435 !memcmp(u->addr->name, sunname, len))
436 return s;
437 }
438 return NULL;
439 }
440
unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,unsigned int hash)441 static inline struct sock *unix_find_socket_byname(struct net *net,
442 struct sockaddr_un *sunname,
443 int len, unsigned int hash)
444 {
445 struct sock *s;
446
447 spin_lock(&net->unx.table.locks[hash]);
448 s = __unix_find_socket_byname(net, sunname, len, hash);
449 if (s)
450 sock_hold(s);
451 spin_unlock(&net->unx.table.locks[hash]);
452 return s;
453 }
454
unix_find_socket_byinode(struct inode * i)455 static struct sock *unix_find_socket_byinode(struct inode *i)
456 {
457 unsigned int hash = unix_bsd_hash(i);
458 struct sock *s;
459
460 spin_lock(&bsd_socket_locks[hash]);
461 sk_for_each_bound(s, &bsd_socket_buckets[hash]) {
462 struct dentry *dentry = unix_sk(s)->path.dentry;
463
464 if (dentry && d_backing_inode(dentry) == i) {
465 sock_hold(s);
466 spin_unlock(&bsd_socket_locks[hash]);
467 return s;
468 }
469 }
470 spin_unlock(&bsd_socket_locks[hash]);
471 return NULL;
472 }
473
474 /* Support code for asymmetrically connected dgram sockets
475 *
476 * If a datagram socket is connected to a socket not itself connected
477 * to the first socket (eg, /dev/log), clients may only enqueue more
478 * messages if the present receive queue of the server socket is not
479 * "too large". This means there's a second writeability condition
480 * poll and sendmsg need to test. The dgram recv code will do a wake
481 * up on the peer_wait wait queue of a socket upon reception of a
482 * datagram which needs to be propagated to sleeping would-be writers
483 * since these might not have sent anything so far. This can't be
484 * accomplished via poll_wait because the lifetime of the server
485 * socket might be less than that of its clients if these break their
486 * association with it or if the server socket is closed while clients
487 * are still connected to it and there's no way to inform "a polling
488 * implementation" that it should let go of a certain wait queue
489 *
490 * In order to propagate a wake up, a wait_queue_entry_t of the client
491 * socket is enqueued on the peer_wait queue of the server socket
492 * whose wake function does a wake_up on the ordinary client socket
493 * wait queue. This connection is established whenever a write (or
494 * poll for write) hit the flow control condition and broken when the
495 * association to the server socket is dissolved or after a wake up
496 * was relayed.
497 */
498
unix_dgram_peer_wake_relay(wait_queue_entry_t * q,unsigned mode,int flags,void * key)499 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
500 void *key)
501 {
502 struct unix_sock *u;
503 wait_queue_head_t *u_sleep;
504
505 u = container_of(q, struct unix_sock, peer_wake);
506
507 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
508 q);
509 u->peer_wake.private = NULL;
510
511 /* relaying can only happen while the wq still exists */
512 u_sleep = sk_sleep(&u->sk);
513 if (u_sleep)
514 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
515
516 return 0;
517 }
518
unix_dgram_peer_wake_connect(struct sock * sk,struct sock * other)519 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
520 {
521 struct unix_sock *u, *u_other;
522 int rc;
523
524 u = unix_sk(sk);
525 u_other = unix_sk(other);
526 rc = 0;
527 spin_lock(&u_other->peer_wait.lock);
528
529 if (!u->peer_wake.private) {
530 u->peer_wake.private = other;
531 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
532
533 rc = 1;
534 }
535
536 spin_unlock(&u_other->peer_wait.lock);
537 return rc;
538 }
539
unix_dgram_peer_wake_disconnect(struct sock * sk,struct sock * other)540 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
541 struct sock *other)
542 {
543 struct unix_sock *u, *u_other;
544
545 u = unix_sk(sk);
546 u_other = unix_sk(other);
547 spin_lock(&u_other->peer_wait.lock);
548
549 if (u->peer_wake.private == other) {
550 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
551 u->peer_wake.private = NULL;
552 }
553
554 spin_unlock(&u_other->peer_wait.lock);
555 }
556
unix_dgram_peer_wake_disconnect_wakeup(struct sock * sk,struct sock * other)557 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
558 struct sock *other)
559 {
560 unix_dgram_peer_wake_disconnect(sk, other);
561 wake_up_interruptible_poll(sk_sleep(sk),
562 EPOLLOUT |
563 EPOLLWRNORM |
564 EPOLLWRBAND);
565 }
566
567 /* preconditions:
568 * - unix_peer(sk) == other
569 * - association is stable
570 */
unix_dgram_peer_wake_me(struct sock * sk,struct sock * other)571 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
572 {
573 int connected;
574
575 connected = unix_dgram_peer_wake_connect(sk, other);
576
577 /* If other is SOCK_DEAD, we want to make sure we signal
578 * POLLOUT, such that a subsequent write() can get a
579 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
580 * to other and its full, we will hang waiting for POLLOUT.
581 */
582 if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
583 return 1;
584
585 if (connected)
586 unix_dgram_peer_wake_disconnect(sk, other);
587
588 return 0;
589 }
590
unix_writable(const struct sock * sk,unsigned char state)591 static int unix_writable(const struct sock *sk, unsigned char state)
592 {
593 return state != TCP_LISTEN &&
594 (refcount_read(&sk->sk_wmem_alloc) << 2) <= READ_ONCE(sk->sk_sndbuf);
595 }
596
unix_write_space(struct sock * sk)597 static void unix_write_space(struct sock *sk)
598 {
599 struct socket_wq *wq;
600
601 rcu_read_lock();
602 if (unix_writable(sk, READ_ONCE(sk->sk_state))) {
603 wq = rcu_dereference(sk->sk_wq);
604 if (skwq_has_sleeper(wq))
605 wake_up_interruptible_sync_poll(&wq->wait,
606 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
607 sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
608 }
609 rcu_read_unlock();
610 }
611
612 /* When dgram socket disconnects (or changes its peer), we clear its receive
613 * queue of packets arrived from previous peer. First, it allows to do
614 * flow control based only on wmem_alloc; second, sk connected to peer
615 * may receive messages only from that peer. */
unix_dgram_disconnected(struct sock * sk,struct sock * other)616 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
617 {
618 if (!skb_queue_empty(&sk->sk_receive_queue)) {
619 skb_queue_purge_reason(&sk->sk_receive_queue,
620 SKB_DROP_REASON_UNIX_DISCONNECT);
621
622 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
623
624 /* If one link of bidirectional dgram pipe is disconnected,
625 * we signal error. Messages are lost. Do not make this,
626 * when peer was not connected to us.
627 */
628 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
629 WRITE_ONCE(other->sk_err, ECONNRESET);
630 sk_error_report(other);
631 }
632 }
633 }
634
unix_sock_destructor(struct sock * sk)635 static void unix_sock_destructor(struct sock *sk)
636 {
637 struct unix_sock *u = unix_sk(sk);
638
639 skb_queue_purge_reason(&sk->sk_receive_queue, SKB_DROP_REASON_SOCKET_CLOSE);
640
641 DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc));
642 DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
643 DEBUG_NET_WARN_ON_ONCE(sk->sk_socket);
644 if (!sock_flag(sk, SOCK_DEAD)) {
645 pr_info("Attempt to release alive unix socket: %p\n", sk);
646 return;
647 }
648
649 if (u->addr)
650 unix_release_addr(u->addr);
651
652 atomic_long_dec(&unix_nr_socks);
653 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
654 #ifdef UNIX_REFCNT_DEBUG
655 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
656 atomic_long_read(&unix_nr_socks));
657 #endif
658 }
659
unix_skb_len(const struct sk_buff * skb)660 static unsigned int unix_skb_len(const struct sk_buff *skb)
661 {
662 return skb->len - UNIXCB(skb).consumed;
663 }
664
unix_release_sock(struct sock * sk,int embrion)665 static void unix_release_sock(struct sock *sk, int embrion)
666 {
667 struct unix_sock *u = unix_sk(sk);
668 struct sock *skpair;
669 struct sk_buff *skb;
670 struct path path;
671 int state;
672
673 unix_remove_socket(sock_net(sk), sk);
674 unix_remove_bsd_socket(sk);
675
676 /* Clear state */
677 unix_state_lock(sk);
678 sock_orphan(sk);
679 WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
680 path = u->path;
681 u->path.dentry = NULL;
682 u->path.mnt = NULL;
683 state = sk->sk_state;
684 WRITE_ONCE(sk->sk_state, TCP_CLOSE);
685
686 skpair = unix_peer(sk);
687 unix_peer(sk) = NULL;
688
689 unix_state_unlock(sk);
690
691 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
692 u->oob_skb = NULL;
693 #endif
694
695 wake_up_interruptible_all(&u->peer_wait);
696
697 if (skpair != NULL) {
698 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
699 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
700
701 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
702 if (skb && !unix_skb_len(skb))
703 skb = skb_peek_next(skb, &sk->sk_receive_queue);
704 #endif
705 unix_state_lock(skpair);
706 /* No more writes */
707 WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
708 if (skb || embrion)
709 WRITE_ONCE(skpair->sk_err, ECONNRESET);
710 unix_state_unlock(skpair);
711 skpair->sk_state_change(skpair);
712 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
713 }
714
715 unix_dgram_peer_wake_disconnect(sk, skpair);
716 sock_put(skpair); /* It may now die */
717 }
718
719 /* Try to flush out this socket. Throw out buffers at least */
720
721 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
722 if (state == TCP_LISTEN)
723 unix_release_sock(skb->sk, 1);
724
725 /* passed fds are erased in the kfree_skb hook */
726 kfree_skb_reason(skb, SKB_DROP_REASON_SOCKET_CLOSE);
727 }
728
729 if (path.dentry)
730 path_put(&path);
731
732 sock_put(sk);
733
734 /* ---- Socket is dead now and most probably destroyed ---- */
735
736 /*
737 * Fixme: BSD difference: In BSD all sockets connected to us get
738 * ECONNRESET and we die on the spot. In Linux we behave
739 * like files and pipes do and wait for the last
740 * dereference.
741 *
742 * Can't we simply set sock->err?
743 *
744 * What the above comment does talk about? --ANK(980817)
745 */
746
747 if (READ_ONCE(unix_tot_inflight))
748 unix_gc(); /* Garbage collect fds */
749 }
750
751 struct unix_peercred {
752 struct pid *peer_pid;
753 const struct cred *peer_cred;
754 };
755
prepare_peercred(struct unix_peercred * peercred)756 static inline int prepare_peercred(struct unix_peercred *peercred)
757 {
758 struct pid *pid;
759 int err;
760
761 pid = task_tgid(current);
762 err = pidfs_register_pid(pid);
763 if (likely(!err)) {
764 peercred->peer_pid = get_pid(pid);
765 peercred->peer_cred = get_current_cred();
766 }
767 return err;
768 }
769
drop_peercred(struct unix_peercred * peercred)770 static void drop_peercred(struct unix_peercred *peercred)
771 {
772 const struct cred *cred = NULL;
773 struct pid *pid = NULL;
774
775 might_sleep();
776
777 swap(peercred->peer_pid, pid);
778 swap(peercred->peer_cred, cred);
779
780 put_pid(pid);
781 put_cred(cred);
782 }
783
init_peercred(struct sock * sk,const struct unix_peercred * peercred)784 static inline void init_peercred(struct sock *sk,
785 const struct unix_peercred *peercred)
786 {
787 sk->sk_peer_pid = peercred->peer_pid;
788 sk->sk_peer_cred = peercred->peer_cred;
789 }
790
update_peercred(struct sock * sk,struct unix_peercred * peercred)791 static void update_peercred(struct sock *sk, struct unix_peercred *peercred)
792 {
793 const struct cred *old_cred;
794 struct pid *old_pid;
795
796 spin_lock(&sk->sk_peer_lock);
797 old_pid = sk->sk_peer_pid;
798 old_cred = sk->sk_peer_cred;
799 init_peercred(sk, peercred);
800 spin_unlock(&sk->sk_peer_lock);
801
802 peercred->peer_pid = old_pid;
803 peercred->peer_cred = old_cred;
804 }
805
copy_peercred(struct sock * sk,struct sock * peersk)806 static void copy_peercred(struct sock *sk, struct sock *peersk)
807 {
808 lockdep_assert_held(&unix_sk(peersk)->lock);
809
810 spin_lock(&sk->sk_peer_lock);
811 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
812 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
813 spin_unlock(&sk->sk_peer_lock);
814 }
815
unix_may_passcred(const struct sock * sk)816 static bool unix_may_passcred(const struct sock *sk)
817 {
818 return sk->sk_scm_credentials || sk->sk_scm_pidfd;
819 }
820
unix_listen(struct socket * sock,int backlog)821 static int unix_listen(struct socket *sock, int backlog)
822 {
823 int err;
824 struct sock *sk = sock->sk;
825 struct unix_sock *u = unix_sk(sk);
826 struct unix_peercred peercred = {};
827
828 err = -EOPNOTSUPP;
829 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
830 goto out; /* Only stream/seqpacket sockets accept */
831 err = -EINVAL;
832 if (!READ_ONCE(u->addr))
833 goto out; /* No listens on an unbound socket */
834 err = prepare_peercred(&peercred);
835 if (err)
836 goto out;
837 unix_state_lock(sk);
838 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
839 goto out_unlock;
840 if (backlog > sk->sk_max_ack_backlog)
841 wake_up_interruptible_all(&u->peer_wait);
842 sk->sk_max_ack_backlog = backlog;
843 WRITE_ONCE(sk->sk_state, TCP_LISTEN);
844
845 /* set credentials so connect can copy them */
846 update_peercred(sk, &peercred);
847 err = 0;
848
849 out_unlock:
850 unix_state_unlock(sk);
851 drop_peercred(&peercred);
852 out:
853 return err;
854 }
855
856 static int unix_release(struct socket *);
857 static int unix_bind(struct socket *, struct sockaddr *, int);
858 static int unix_stream_connect(struct socket *, struct sockaddr *,
859 int addr_len, int flags);
860 static int unix_socketpair(struct socket *, struct socket *);
861 static int unix_accept(struct socket *, struct socket *, struct proto_accept_arg *arg);
862 static int unix_getname(struct socket *, struct sockaddr *, int);
863 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
864 static __poll_t unix_dgram_poll(struct file *, struct socket *,
865 poll_table *);
866 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
867 #ifdef CONFIG_COMPAT
868 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
869 #endif
870 static int unix_shutdown(struct socket *, int);
871 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
872 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
873 static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
874 struct pipe_inode_info *, size_t size,
875 unsigned int flags);
876 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
877 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
878 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
879 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
880 static int unix_dgram_connect(struct socket *, struct sockaddr *,
881 int, int);
882 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
883 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
884 int);
885
886 #ifdef CONFIG_PROC_FS
unix_count_nr_fds(struct sock * sk)887 static int unix_count_nr_fds(struct sock *sk)
888 {
889 struct sk_buff *skb;
890 struct unix_sock *u;
891 int nr_fds = 0;
892
893 spin_lock(&sk->sk_receive_queue.lock);
894 skb = skb_peek(&sk->sk_receive_queue);
895 while (skb) {
896 u = unix_sk(skb->sk);
897 nr_fds += atomic_read(&u->scm_stat.nr_fds);
898 skb = skb_peek_next(skb, &sk->sk_receive_queue);
899 }
900 spin_unlock(&sk->sk_receive_queue.lock);
901
902 return nr_fds;
903 }
904
unix_show_fdinfo(struct seq_file * m,struct socket * sock)905 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
906 {
907 struct sock *sk = sock->sk;
908 unsigned char s_state;
909 struct unix_sock *u;
910 int nr_fds = 0;
911
912 if (sk) {
913 s_state = READ_ONCE(sk->sk_state);
914 u = unix_sk(sk);
915
916 /* SOCK_STREAM and SOCK_SEQPACKET sockets never change their
917 * sk_state after switching to TCP_ESTABLISHED or TCP_LISTEN.
918 * SOCK_DGRAM is ordinary. So, no lock is needed.
919 */
920 if (sock->type == SOCK_DGRAM || s_state == TCP_ESTABLISHED)
921 nr_fds = atomic_read(&u->scm_stat.nr_fds);
922 else if (s_state == TCP_LISTEN)
923 nr_fds = unix_count_nr_fds(sk);
924
925 seq_printf(m, "scm_fds: %u\n", nr_fds);
926 }
927 }
928 #else
929 #define unix_show_fdinfo NULL
930 #endif
931
unix_custom_sockopt(int optname)932 static bool unix_custom_sockopt(int optname)
933 {
934 switch (optname) {
935 case SO_INQ:
936 return true;
937 default:
938 return false;
939 }
940 }
941
unix_setsockopt(struct socket * sock,int level,int optname,sockptr_t optval,unsigned int optlen)942 static int unix_setsockopt(struct socket *sock, int level, int optname,
943 sockptr_t optval, unsigned int optlen)
944 {
945 struct unix_sock *u = unix_sk(sock->sk);
946 struct sock *sk = sock->sk;
947 int val;
948
949 if (level != SOL_SOCKET)
950 return -EOPNOTSUPP;
951
952 if (!unix_custom_sockopt(optname))
953 return sock_setsockopt(sock, level, optname, optval, optlen);
954
955 if (optlen != sizeof(int))
956 return -EINVAL;
957
958 if (copy_from_sockptr(&val, optval, sizeof(val)))
959 return -EFAULT;
960
961 switch (optname) {
962 case SO_INQ:
963 if (sk->sk_type != SOCK_STREAM)
964 return -EINVAL;
965
966 if (val > 1 || val < 0)
967 return -EINVAL;
968
969 WRITE_ONCE(u->recvmsg_inq, val);
970 break;
971 default:
972 return -ENOPROTOOPT;
973 }
974
975 return 0;
976 }
977
978 static const struct proto_ops unix_stream_ops = {
979 .family = PF_UNIX,
980 .owner = THIS_MODULE,
981 .release = unix_release,
982 .bind = unix_bind,
983 .connect = unix_stream_connect,
984 .socketpair = unix_socketpair,
985 .accept = unix_accept,
986 .getname = unix_getname,
987 .poll = unix_poll,
988 .ioctl = unix_ioctl,
989 #ifdef CONFIG_COMPAT
990 .compat_ioctl = unix_compat_ioctl,
991 #endif
992 .listen = unix_listen,
993 .shutdown = unix_shutdown,
994 .setsockopt = unix_setsockopt,
995 .sendmsg = unix_stream_sendmsg,
996 .recvmsg = unix_stream_recvmsg,
997 .read_skb = unix_stream_read_skb,
998 .mmap = sock_no_mmap,
999 .splice_read = unix_stream_splice_read,
1000 .set_peek_off = sk_set_peek_off,
1001 .show_fdinfo = unix_show_fdinfo,
1002 };
1003
1004 static const struct proto_ops unix_dgram_ops = {
1005 .family = PF_UNIX,
1006 .owner = THIS_MODULE,
1007 .release = unix_release,
1008 .bind = unix_bind,
1009 .connect = unix_dgram_connect,
1010 .socketpair = unix_socketpair,
1011 .accept = sock_no_accept,
1012 .getname = unix_getname,
1013 .poll = unix_dgram_poll,
1014 .ioctl = unix_ioctl,
1015 #ifdef CONFIG_COMPAT
1016 .compat_ioctl = unix_compat_ioctl,
1017 #endif
1018 .listen = sock_no_listen,
1019 .shutdown = unix_shutdown,
1020 .sendmsg = unix_dgram_sendmsg,
1021 .read_skb = unix_read_skb,
1022 .recvmsg = unix_dgram_recvmsg,
1023 .mmap = sock_no_mmap,
1024 .set_peek_off = sk_set_peek_off,
1025 .show_fdinfo = unix_show_fdinfo,
1026 };
1027
1028 static const struct proto_ops unix_seqpacket_ops = {
1029 .family = PF_UNIX,
1030 .owner = THIS_MODULE,
1031 .release = unix_release,
1032 .bind = unix_bind,
1033 .connect = unix_stream_connect,
1034 .socketpair = unix_socketpair,
1035 .accept = unix_accept,
1036 .getname = unix_getname,
1037 .poll = unix_dgram_poll,
1038 .ioctl = unix_ioctl,
1039 #ifdef CONFIG_COMPAT
1040 .compat_ioctl = unix_compat_ioctl,
1041 #endif
1042 .listen = unix_listen,
1043 .shutdown = unix_shutdown,
1044 .sendmsg = unix_seqpacket_sendmsg,
1045 .recvmsg = unix_seqpacket_recvmsg,
1046 .mmap = sock_no_mmap,
1047 .set_peek_off = sk_set_peek_off,
1048 .show_fdinfo = unix_show_fdinfo,
1049 };
1050
unix_close(struct sock * sk,long timeout)1051 static void unix_close(struct sock *sk, long timeout)
1052 {
1053 /* Nothing to do here, unix socket does not need a ->close().
1054 * This is merely for sockmap.
1055 */
1056 }
1057
unix_bpf_bypass_getsockopt(int level,int optname)1058 static bool unix_bpf_bypass_getsockopt(int level, int optname)
1059 {
1060 if (level == SOL_SOCKET) {
1061 switch (optname) {
1062 case SO_PEERPIDFD:
1063 return true;
1064 default:
1065 return false;
1066 }
1067 }
1068
1069 return false;
1070 }
1071
1072 struct proto unix_dgram_proto = {
1073 .name = "UNIX",
1074 .owner = THIS_MODULE,
1075 .obj_size = sizeof(struct unix_sock),
1076 .close = unix_close,
1077 .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt,
1078 #ifdef CONFIG_BPF_SYSCALL
1079 .psock_update_sk_prot = unix_dgram_bpf_update_proto,
1080 #endif
1081 };
1082
1083 struct proto unix_stream_proto = {
1084 .name = "UNIX-STREAM",
1085 .owner = THIS_MODULE,
1086 .obj_size = sizeof(struct unix_sock),
1087 .close = unix_close,
1088 .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt,
1089 #ifdef CONFIG_BPF_SYSCALL
1090 .psock_update_sk_prot = unix_stream_bpf_update_proto,
1091 #endif
1092 };
1093
unix_create1(struct net * net,struct socket * sock,int kern,int type)1094 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
1095 {
1096 struct unix_sock *u;
1097 struct sock *sk;
1098 int err;
1099
1100 atomic_long_inc(&unix_nr_socks);
1101 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
1102 err = -ENFILE;
1103 goto err;
1104 }
1105
1106 if (type == SOCK_STREAM)
1107 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
1108 else /*dgram and seqpacket */
1109 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
1110
1111 if (!sk) {
1112 err = -ENOMEM;
1113 goto err;
1114 }
1115
1116 sock_init_data(sock, sk);
1117
1118 sk->sk_scm_rights = 1;
1119 sk->sk_hash = unix_unbound_hash(sk);
1120 sk->sk_allocation = GFP_KERNEL_ACCOUNT;
1121 sk->sk_write_space = unix_write_space;
1122 sk->sk_max_ack_backlog = READ_ONCE(net->unx.sysctl_max_dgram_qlen);
1123 sk->sk_destruct = unix_sock_destructor;
1124 lock_set_cmp_fn(&sk->sk_receive_queue.lock, unix_recvq_lock_cmp_fn, NULL);
1125
1126 u = unix_sk(sk);
1127 u->listener = NULL;
1128 u->vertex = NULL;
1129 u->path.dentry = NULL;
1130 u->path.mnt = NULL;
1131 spin_lock_init(&u->lock);
1132 lock_set_cmp_fn(&u->lock, unix_state_lock_cmp_fn, NULL);
1133 mutex_init(&u->iolock); /* single task reading lock */
1134 mutex_init(&u->bindlock); /* single task binding lock */
1135 init_waitqueue_head(&u->peer_wait);
1136 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
1137 memset(&u->scm_stat, 0, sizeof(struct scm_stat));
1138 unix_insert_unbound_socket(net, sk);
1139
1140 sock_prot_inuse_add(net, sk->sk_prot, 1);
1141
1142 return sk;
1143
1144 err:
1145 atomic_long_dec(&unix_nr_socks);
1146 return ERR_PTR(err);
1147 }
1148
unix_create(struct net * net,struct socket * sock,int protocol,int kern)1149 static int unix_create(struct net *net, struct socket *sock, int protocol,
1150 int kern)
1151 {
1152 struct sock *sk;
1153
1154 if (protocol && protocol != PF_UNIX)
1155 return -EPROTONOSUPPORT;
1156
1157 sock->state = SS_UNCONNECTED;
1158
1159 switch (sock->type) {
1160 case SOCK_STREAM:
1161 set_bit(SOCK_CUSTOM_SOCKOPT, &sock->flags);
1162 sock->ops = &unix_stream_ops;
1163 break;
1164 /*
1165 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
1166 * nothing uses it.
1167 */
1168 case SOCK_RAW:
1169 sock->type = SOCK_DGRAM;
1170 fallthrough;
1171 case SOCK_DGRAM:
1172 sock->ops = &unix_dgram_ops;
1173 break;
1174 case SOCK_SEQPACKET:
1175 sock->ops = &unix_seqpacket_ops;
1176 break;
1177 default:
1178 return -ESOCKTNOSUPPORT;
1179 }
1180
1181 sk = unix_create1(net, sock, kern, sock->type);
1182 if (IS_ERR(sk))
1183 return PTR_ERR(sk);
1184
1185 return 0;
1186 }
1187
unix_release(struct socket * sock)1188 static int unix_release(struct socket *sock)
1189 {
1190 struct sock *sk = sock->sk;
1191
1192 if (!sk)
1193 return 0;
1194
1195 sk->sk_prot->close(sk, 0);
1196 unix_release_sock(sk, 0);
1197 sock->sk = NULL;
1198
1199 return 0;
1200 }
1201
unix_find_bsd(struct sockaddr_un * sunaddr,int addr_len,int type,int flags)1202 static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len,
1203 int type, int flags)
1204 {
1205 struct inode *inode;
1206 struct path path;
1207 struct sock *sk;
1208 int err;
1209
1210 unix_mkname_bsd(sunaddr, addr_len);
1211
1212 if (flags & SOCK_COREDUMP) {
1213 struct path root;
1214
1215 task_lock(&init_task);
1216 get_fs_root(init_task.fs, &root);
1217 task_unlock(&init_task);
1218
1219 scoped_with_kernel_creds()
1220 err = vfs_path_lookup(root.dentry, root.mnt, sunaddr->sun_path,
1221 LOOKUP_BENEATH | LOOKUP_NO_SYMLINKS |
1222 LOOKUP_NO_MAGICLINKS, &path);
1223 path_put(&root);
1224 if (err)
1225 goto fail;
1226 } else {
1227 err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path);
1228 if (err)
1229 goto fail;
1230
1231 err = path_permission(&path, MAY_WRITE);
1232 if (err)
1233 goto path_put;
1234 }
1235
1236 err = -ECONNREFUSED;
1237 inode = d_backing_inode(path.dentry);
1238 if (!S_ISSOCK(inode->i_mode))
1239 goto path_put;
1240
1241 sk = unix_find_socket_byinode(inode);
1242 if (!sk)
1243 goto path_put;
1244
1245 err = -EPROTOTYPE;
1246 if (sk->sk_type == type)
1247 touch_atime(&path);
1248 else
1249 goto sock_put;
1250
1251 path_put(&path);
1252
1253 return sk;
1254
1255 sock_put:
1256 sock_put(sk);
1257 path_put:
1258 path_put(&path);
1259 fail:
1260 return ERR_PTR(err);
1261 }
1262
unix_find_abstract(struct net * net,struct sockaddr_un * sunaddr,int addr_len,int type)1263 static struct sock *unix_find_abstract(struct net *net,
1264 struct sockaddr_un *sunaddr,
1265 int addr_len, int type)
1266 {
1267 unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type);
1268 struct dentry *dentry;
1269 struct sock *sk;
1270
1271 sk = unix_find_socket_byname(net, sunaddr, addr_len, hash);
1272 if (!sk)
1273 return ERR_PTR(-ECONNREFUSED);
1274
1275 dentry = unix_sk(sk)->path.dentry;
1276 if (dentry)
1277 touch_atime(&unix_sk(sk)->path);
1278
1279 return sk;
1280 }
1281
unix_find_other(struct net * net,struct sockaddr_un * sunaddr,int addr_len,int type,int flags)1282 static struct sock *unix_find_other(struct net *net,
1283 struct sockaddr_un *sunaddr,
1284 int addr_len, int type, int flags)
1285 {
1286 struct sock *sk;
1287
1288 if (sunaddr->sun_path[0])
1289 sk = unix_find_bsd(sunaddr, addr_len, type, flags);
1290 else
1291 sk = unix_find_abstract(net, sunaddr, addr_len, type);
1292
1293 return sk;
1294 }
1295
unix_autobind(struct sock * sk)1296 static int unix_autobind(struct sock *sk)
1297 {
1298 struct unix_sock *u = unix_sk(sk);
1299 unsigned int new_hash, old_hash;
1300 struct net *net = sock_net(sk);
1301 struct unix_address *addr;
1302 u32 lastnum, ordernum;
1303 int err;
1304
1305 err = mutex_lock_interruptible(&u->bindlock);
1306 if (err)
1307 return err;
1308
1309 if (u->addr)
1310 goto out;
1311
1312 err = -ENOMEM;
1313 addr = kzalloc(sizeof(*addr) +
1314 offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL);
1315 if (!addr)
1316 goto out;
1317
1318 addr->len = offsetof(struct sockaddr_un, sun_path) + 6;
1319 addr->name->sun_family = AF_UNIX;
1320 refcount_set(&addr->refcnt, 1);
1321
1322 old_hash = sk->sk_hash;
1323 ordernum = get_random_u32();
1324 lastnum = ordernum & 0xFFFFF;
1325 retry:
1326 ordernum = (ordernum + 1) & 0xFFFFF;
1327 sprintf(addr->name->sun_path + 1, "%05x", ordernum);
1328
1329 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1330 unix_table_double_lock(net, old_hash, new_hash);
1331
1332 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) {
1333 unix_table_double_unlock(net, old_hash, new_hash);
1334
1335 /* __unix_find_socket_byname() may take long time if many names
1336 * are already in use.
1337 */
1338 cond_resched();
1339
1340 if (ordernum == lastnum) {
1341 /* Give up if all names seems to be in use. */
1342 err = -ENOSPC;
1343 unix_release_addr(addr);
1344 goto out;
1345 }
1346
1347 goto retry;
1348 }
1349
1350 __unix_set_addr_hash(net, sk, addr, new_hash);
1351 unix_table_double_unlock(net, old_hash, new_hash);
1352 err = 0;
1353
1354 out: mutex_unlock(&u->bindlock);
1355 return err;
1356 }
1357
unix_bind_bsd(struct sock * sk,struct sockaddr_un * sunaddr,int addr_len)1358 static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
1359 int addr_len)
1360 {
1361 umode_t mode = S_IFSOCK |
1362 (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
1363 struct unix_sock *u = unix_sk(sk);
1364 unsigned int new_hash, old_hash;
1365 struct net *net = sock_net(sk);
1366 struct mnt_idmap *idmap;
1367 struct unix_address *addr;
1368 struct dentry *dentry;
1369 struct path parent;
1370 int err;
1371
1372 addr_len = unix_mkname_bsd(sunaddr, addr_len);
1373 addr = unix_create_addr(sunaddr, addr_len);
1374 if (!addr)
1375 return -ENOMEM;
1376
1377 /*
1378 * Get the parent directory, calculate the hash for last
1379 * component.
1380 */
1381 dentry = start_creating_path(AT_FDCWD, addr->name->sun_path, &parent, 0);
1382 if (IS_ERR(dentry)) {
1383 err = PTR_ERR(dentry);
1384 goto out;
1385 }
1386
1387 /*
1388 * All right, let's create it.
1389 */
1390 idmap = mnt_idmap(parent.mnt);
1391 err = security_path_mknod(&parent, dentry, mode, 0);
1392 if (!err)
1393 err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0);
1394 if (err)
1395 goto out_path;
1396 err = mutex_lock_interruptible(&u->bindlock);
1397 if (err)
1398 goto out_unlink;
1399 if (u->addr)
1400 goto out_unlock;
1401
1402 old_hash = sk->sk_hash;
1403 new_hash = unix_bsd_hash(d_backing_inode(dentry));
1404 unix_table_double_lock(net, old_hash, new_hash);
1405 u->path.mnt = mntget(parent.mnt);
1406 u->path.dentry = dget(dentry);
1407 __unix_set_addr_hash(net, sk, addr, new_hash);
1408 unix_table_double_unlock(net, old_hash, new_hash);
1409 unix_insert_bsd_socket(sk);
1410 mutex_unlock(&u->bindlock);
1411 end_creating_path(&parent, dentry);
1412 return 0;
1413
1414 out_unlock:
1415 mutex_unlock(&u->bindlock);
1416 err = -EINVAL;
1417 out_unlink:
1418 /* failed after successful mknod? unlink what we'd created... */
1419 vfs_unlink(idmap, d_inode(parent.dentry), dentry, NULL);
1420 out_path:
1421 end_creating_path(&parent, dentry);
1422 out:
1423 unix_release_addr(addr);
1424 return err == -EEXIST ? -EADDRINUSE : err;
1425 }
1426
unix_bind_abstract(struct sock * sk,struct sockaddr_un * sunaddr,int addr_len)1427 static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
1428 int addr_len)
1429 {
1430 struct unix_sock *u = unix_sk(sk);
1431 unsigned int new_hash, old_hash;
1432 struct net *net = sock_net(sk);
1433 struct unix_address *addr;
1434 int err;
1435
1436 addr = unix_create_addr(sunaddr, addr_len);
1437 if (!addr)
1438 return -ENOMEM;
1439
1440 err = mutex_lock_interruptible(&u->bindlock);
1441 if (err)
1442 goto out;
1443
1444 if (u->addr) {
1445 err = -EINVAL;
1446 goto out_mutex;
1447 }
1448
1449 old_hash = sk->sk_hash;
1450 new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1451 unix_table_double_lock(net, old_hash, new_hash);
1452
1453 if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash))
1454 goto out_spin;
1455
1456 __unix_set_addr_hash(net, sk, addr, new_hash);
1457 unix_table_double_unlock(net, old_hash, new_hash);
1458 mutex_unlock(&u->bindlock);
1459 return 0;
1460
1461 out_spin:
1462 unix_table_double_unlock(net, old_hash, new_hash);
1463 err = -EADDRINUSE;
1464 out_mutex:
1465 mutex_unlock(&u->bindlock);
1466 out:
1467 unix_release_addr(addr);
1468 return err;
1469 }
1470
unix_bind(struct socket * sock,struct sockaddr * uaddr,int addr_len)1471 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1472 {
1473 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1474 struct sock *sk = sock->sk;
1475 int err;
1476
1477 if (addr_len == offsetof(struct sockaddr_un, sun_path) &&
1478 sunaddr->sun_family == AF_UNIX)
1479 return unix_autobind(sk);
1480
1481 err = unix_validate_addr(sunaddr, addr_len);
1482 if (err)
1483 return err;
1484
1485 if (sunaddr->sun_path[0])
1486 err = unix_bind_bsd(sk, sunaddr, addr_len);
1487 else
1488 err = unix_bind_abstract(sk, sunaddr, addr_len);
1489
1490 return err;
1491 }
1492
unix_state_double_lock(struct sock * sk1,struct sock * sk2)1493 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1494 {
1495 if (unlikely(sk1 == sk2) || !sk2) {
1496 unix_state_lock(sk1);
1497 return;
1498 }
1499
1500 if (sk1 > sk2)
1501 swap(sk1, sk2);
1502
1503 unix_state_lock(sk1);
1504 unix_state_lock(sk2);
1505 }
1506
unix_state_double_unlock(struct sock * sk1,struct sock * sk2)1507 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1508 {
1509 if (unlikely(sk1 == sk2) || !sk2) {
1510 unix_state_unlock(sk1);
1511 return;
1512 }
1513 unix_state_unlock(sk1);
1514 unix_state_unlock(sk2);
1515 }
1516
unix_dgram_connect(struct socket * sock,struct sockaddr * addr,int alen,int flags)1517 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1518 int alen, int flags)
1519 {
1520 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1521 struct sock *sk = sock->sk;
1522 struct sock *other;
1523 int err;
1524
1525 err = -EINVAL;
1526 if (alen < offsetofend(struct sockaddr, sa_family))
1527 goto out;
1528
1529 if (addr->sa_family != AF_UNSPEC) {
1530 err = unix_validate_addr(sunaddr, alen);
1531 if (err)
1532 goto out;
1533
1534 err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, addr, &alen);
1535 if (err)
1536 goto out;
1537
1538 if (unix_may_passcred(sk) && !READ_ONCE(unix_sk(sk)->addr)) {
1539 err = unix_autobind(sk);
1540 if (err)
1541 goto out;
1542 }
1543
1544 restart:
1545 other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type, 0);
1546 if (IS_ERR(other)) {
1547 err = PTR_ERR(other);
1548 goto out;
1549 }
1550
1551 unix_state_double_lock(sk, other);
1552
1553 /* Apparently VFS overslept socket death. Retry. */
1554 if (sock_flag(other, SOCK_DEAD)) {
1555 unix_state_double_unlock(sk, other);
1556 sock_put(other);
1557 goto restart;
1558 }
1559
1560 err = -EPERM;
1561 if (!unix_may_send(sk, other))
1562 goto out_unlock;
1563
1564 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1565 if (err)
1566 goto out_unlock;
1567
1568 WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
1569 WRITE_ONCE(other->sk_state, TCP_ESTABLISHED);
1570 } else {
1571 /*
1572 * 1003.1g breaking connected state with AF_UNSPEC
1573 */
1574 other = NULL;
1575 unix_state_double_lock(sk, other);
1576 }
1577
1578 /*
1579 * If it was connected, reconnect.
1580 */
1581 if (unix_peer(sk)) {
1582 struct sock *old_peer = unix_peer(sk);
1583
1584 unix_peer(sk) = other;
1585 if (!other)
1586 WRITE_ONCE(sk->sk_state, TCP_CLOSE);
1587 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1588
1589 unix_state_double_unlock(sk, other);
1590
1591 if (other != old_peer) {
1592 unix_dgram_disconnected(sk, old_peer);
1593
1594 unix_state_lock(old_peer);
1595 if (!unix_peer(old_peer))
1596 WRITE_ONCE(old_peer->sk_state, TCP_CLOSE);
1597 unix_state_unlock(old_peer);
1598 }
1599
1600 sock_put(old_peer);
1601 } else {
1602 unix_peer(sk) = other;
1603 unix_state_double_unlock(sk, other);
1604 }
1605
1606 return 0;
1607
1608 out_unlock:
1609 unix_state_double_unlock(sk, other);
1610 sock_put(other);
1611 out:
1612 return err;
1613 }
1614
unix_wait_for_peer(struct sock * other,long timeo)1615 static long unix_wait_for_peer(struct sock *other, long timeo)
1616 {
1617 struct unix_sock *u = unix_sk(other);
1618 int sched;
1619 DEFINE_WAIT(wait);
1620
1621 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1622
1623 sched = !sock_flag(other, SOCK_DEAD) &&
1624 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1625 unix_recvq_full_lockless(other);
1626
1627 unix_state_unlock(other);
1628
1629 if (sched)
1630 timeo = schedule_timeout(timeo);
1631
1632 finish_wait(&u->peer_wait, &wait);
1633 return timeo;
1634 }
1635
unix_stream_connect(struct socket * sock,struct sockaddr * uaddr,int addr_len,int flags)1636 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1637 int addr_len, int flags)
1638 {
1639 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1640 struct sock *sk = sock->sk, *newsk = NULL, *other = NULL;
1641 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1642 struct unix_peercred peercred = {};
1643 struct net *net = sock_net(sk);
1644 struct sk_buff *skb = NULL;
1645 unsigned char state;
1646 long timeo;
1647 int err;
1648
1649 err = unix_validate_addr(sunaddr, addr_len);
1650 if (err)
1651 goto out;
1652
1653 err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, &addr_len);
1654 if (err)
1655 goto out;
1656
1657 if (unix_may_passcred(sk) && !READ_ONCE(u->addr)) {
1658 err = unix_autobind(sk);
1659 if (err)
1660 goto out;
1661 }
1662
1663 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1664
1665 /* First of all allocate resources.
1666 * If we will make it after state is locked,
1667 * we will have to recheck all again in any case.
1668 */
1669
1670 /* create new sock for complete connection */
1671 newsk = unix_create1(net, NULL, 0, sock->type);
1672 if (IS_ERR(newsk)) {
1673 err = PTR_ERR(newsk);
1674 goto out;
1675 }
1676
1677 err = prepare_peercred(&peercred);
1678 if (err)
1679 goto out;
1680
1681 /* Allocate skb for sending to listening sock */
1682 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1683 if (!skb) {
1684 err = -ENOMEM;
1685 goto out_free_sk;
1686 }
1687
1688 restart:
1689 /* Find listening sock. */
1690 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, flags);
1691 if (IS_ERR(other)) {
1692 err = PTR_ERR(other);
1693 goto out_free_skb;
1694 }
1695
1696 unix_state_lock(other);
1697
1698 /* Apparently VFS overslept socket death. Retry. */
1699 if (sock_flag(other, SOCK_DEAD)) {
1700 unix_state_unlock(other);
1701 sock_put(other);
1702 goto restart;
1703 }
1704
1705 if (other->sk_state != TCP_LISTEN ||
1706 other->sk_shutdown & RCV_SHUTDOWN) {
1707 err = -ECONNREFUSED;
1708 goto out_unlock;
1709 }
1710
1711 if (unix_recvq_full_lockless(other)) {
1712 if (!timeo) {
1713 err = -EAGAIN;
1714 goto out_unlock;
1715 }
1716
1717 timeo = unix_wait_for_peer(other, timeo);
1718 sock_put(other);
1719
1720 err = sock_intr_errno(timeo);
1721 if (signal_pending(current))
1722 goto out_free_skb;
1723
1724 goto restart;
1725 }
1726
1727 /* self connect and simultaneous connect are eliminated
1728 * by rejecting TCP_LISTEN socket to avoid deadlock.
1729 */
1730 state = READ_ONCE(sk->sk_state);
1731 if (unlikely(state != TCP_CLOSE)) {
1732 err = state == TCP_ESTABLISHED ? -EISCONN : -EINVAL;
1733 goto out_unlock;
1734 }
1735
1736 unix_state_lock(sk);
1737
1738 if (unlikely(sk->sk_state != TCP_CLOSE)) {
1739 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EINVAL;
1740 unix_state_unlock(sk);
1741 goto out_unlock;
1742 }
1743
1744 err = security_unix_stream_connect(sk, other, newsk);
1745 if (err) {
1746 unix_state_unlock(sk);
1747 goto out_unlock;
1748 }
1749
1750 /* The way is open! Fastly set all the necessary fields... */
1751
1752 sock_hold(sk);
1753 unix_peer(newsk) = sk;
1754 newsk->sk_state = TCP_ESTABLISHED;
1755 newsk->sk_type = sk->sk_type;
1756 newsk->sk_scm_recv_flags = other->sk_scm_recv_flags;
1757 init_peercred(newsk, &peercred);
1758
1759 newu = unix_sk(newsk);
1760 newu->listener = other;
1761 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1762 otheru = unix_sk(other);
1763
1764 /* copy address information from listening to new sock
1765 *
1766 * The contents of *(otheru->addr) and otheru->path
1767 * are seen fully set up here, since we have found
1768 * otheru in hash under its lock. Insertion into the
1769 * hash chain we'd found it in had been done in an
1770 * earlier critical area protected by the chain's lock,
1771 * the same one where we'd set *(otheru->addr) contents,
1772 * as well as otheru->path and otheru->addr itself.
1773 *
1774 * Using smp_store_release() here to set newu->addr
1775 * is enough to make those stores, as well as stores
1776 * to newu->path visible to anyone who gets newu->addr
1777 * by smp_load_acquire(). IOW, the same warranties
1778 * as for unix_sock instances bound in unix_bind() or
1779 * in unix_autobind().
1780 */
1781 if (otheru->path.dentry) {
1782 path_get(&otheru->path);
1783 newu->path = otheru->path;
1784 }
1785 refcount_inc(&otheru->addr->refcnt);
1786 smp_store_release(&newu->addr, otheru->addr);
1787
1788 /* Set credentials */
1789 copy_peercred(sk, other);
1790
1791 sock->state = SS_CONNECTED;
1792 WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
1793 sock_hold(newsk);
1794
1795 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1796 unix_peer(sk) = newsk;
1797
1798 unix_state_unlock(sk);
1799
1800 /* take ten and send info to listening sock */
1801 spin_lock(&other->sk_receive_queue.lock);
1802 __skb_queue_tail(&other->sk_receive_queue, skb);
1803 spin_unlock(&other->sk_receive_queue.lock);
1804 unix_state_unlock(other);
1805 other->sk_data_ready(other);
1806 sock_put(other);
1807 return 0;
1808
1809 out_unlock:
1810 unix_state_unlock(other);
1811 sock_put(other);
1812 out_free_skb:
1813 consume_skb(skb);
1814 out_free_sk:
1815 unix_release_sock(newsk, 0);
1816 out:
1817 drop_peercred(&peercred);
1818 return err;
1819 }
1820
unix_socketpair(struct socket * socka,struct socket * sockb)1821 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1822 {
1823 struct unix_peercred ska_peercred = {}, skb_peercred = {};
1824 struct sock *ska = socka->sk, *skb = sockb->sk;
1825 int err;
1826
1827 err = prepare_peercred(&ska_peercred);
1828 if (err)
1829 return err;
1830
1831 err = prepare_peercred(&skb_peercred);
1832 if (err) {
1833 drop_peercred(&ska_peercred);
1834 return err;
1835 }
1836
1837 /* Join our sockets back to back */
1838 sock_hold(ska);
1839 sock_hold(skb);
1840 unix_peer(ska) = skb;
1841 unix_peer(skb) = ska;
1842 init_peercred(ska, &ska_peercred);
1843 init_peercred(skb, &skb_peercred);
1844
1845 ska->sk_state = TCP_ESTABLISHED;
1846 skb->sk_state = TCP_ESTABLISHED;
1847 socka->state = SS_CONNECTED;
1848 sockb->state = SS_CONNECTED;
1849 return 0;
1850 }
1851
unix_accept(struct socket * sock,struct socket * newsock,struct proto_accept_arg * arg)1852 static int unix_accept(struct socket *sock, struct socket *newsock,
1853 struct proto_accept_arg *arg)
1854 {
1855 struct sock *sk = sock->sk;
1856 struct sk_buff *skb;
1857 struct sock *tsk;
1858
1859 arg->err = -EOPNOTSUPP;
1860 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1861 goto out;
1862
1863 arg->err = -EINVAL;
1864 if (READ_ONCE(sk->sk_state) != TCP_LISTEN)
1865 goto out;
1866
1867 /* If socket state is TCP_LISTEN it cannot change (for now...),
1868 * so that no locks are necessary.
1869 */
1870
1871 skb = skb_recv_datagram(sk, (arg->flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
1872 &arg->err);
1873 if (!skb) {
1874 /* This means receive shutdown. */
1875 if (arg->err == 0)
1876 arg->err = -EINVAL;
1877 goto out;
1878 }
1879
1880 tsk = skb->sk;
1881 skb_free_datagram(sk, skb);
1882 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1883
1884 if (tsk->sk_type == SOCK_STREAM)
1885 set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags);
1886
1887 /* attach accepted sock to socket */
1888 unix_state_lock(tsk);
1889 unix_update_edges(unix_sk(tsk));
1890 newsock->state = SS_CONNECTED;
1891 sock_graft(tsk, newsock);
1892 unix_state_unlock(tsk);
1893 return 0;
1894
1895 out:
1896 return arg->err;
1897 }
1898
1899
unix_getname(struct socket * sock,struct sockaddr * uaddr,int peer)1900 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1901 {
1902 struct sock *sk = sock->sk;
1903 struct unix_address *addr;
1904 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1905 int err = 0;
1906
1907 if (peer) {
1908 sk = unix_peer_get(sk);
1909
1910 err = -ENOTCONN;
1911 if (!sk)
1912 goto out;
1913 err = 0;
1914 } else {
1915 sock_hold(sk);
1916 }
1917
1918 addr = smp_load_acquire(&unix_sk(sk)->addr);
1919 if (!addr) {
1920 sunaddr->sun_family = AF_UNIX;
1921 sunaddr->sun_path[0] = 0;
1922 err = offsetof(struct sockaddr_un, sun_path);
1923 } else {
1924 err = addr->len;
1925 memcpy(sunaddr, addr->name, addr->len);
1926
1927 if (peer)
1928 BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
1929 CGROUP_UNIX_GETPEERNAME);
1930 else
1931 BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err,
1932 CGROUP_UNIX_GETSOCKNAME);
1933 }
1934 sock_put(sk);
1935 out:
1936 return err;
1937 }
1938
1939 /* The "user->unix_inflight" variable is protected by the garbage
1940 * collection lock, and we just read it locklessly here. If you go
1941 * over the limit, there might be a tiny race in actually noticing
1942 * it across threads. Tough.
1943 */
too_many_unix_fds(struct task_struct * p)1944 static inline bool too_many_unix_fds(struct task_struct *p)
1945 {
1946 struct user_struct *user = current_user();
1947
1948 if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE)))
1949 return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1950 return false;
1951 }
1952
unix_attach_fds(struct scm_cookie * scm,struct sk_buff * skb)1953 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1954 {
1955 if (too_many_unix_fds(current))
1956 return -ETOOMANYREFS;
1957
1958 UNIXCB(skb).fp = scm->fp;
1959 scm->fp = NULL;
1960
1961 if (unix_prepare_fpl(UNIXCB(skb).fp))
1962 return -ENOMEM;
1963
1964 return 0;
1965 }
1966
unix_detach_fds(struct scm_cookie * scm,struct sk_buff * skb)1967 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1968 {
1969 scm->fp = UNIXCB(skb).fp;
1970 UNIXCB(skb).fp = NULL;
1971
1972 unix_destroy_fpl(scm->fp);
1973 }
1974
unix_peek_fds(struct scm_cookie * scm,struct sk_buff * skb)1975 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1976 {
1977 scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1978 }
1979
unix_destruct_scm(struct sk_buff * skb)1980 static void unix_destruct_scm(struct sk_buff *skb)
1981 {
1982 struct scm_cookie scm;
1983
1984 memset(&scm, 0, sizeof(scm));
1985 scm.pid = UNIXCB(skb).pid;
1986 if (UNIXCB(skb).fp)
1987 unix_detach_fds(&scm, skb);
1988
1989 /* Alas, it calls VFS */
1990 /* So fscking what? fput() had been SMP-safe since the last Summer */
1991 scm_destroy(&scm);
1992 sock_wfree(skb);
1993 }
1994
unix_scm_to_skb(struct scm_cookie * scm,struct sk_buff * skb,bool send_fds)1995 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1996 {
1997 int err = 0;
1998
1999 UNIXCB(skb).pid = get_pid(scm->pid);
2000 UNIXCB(skb).uid = scm->creds.uid;
2001 UNIXCB(skb).gid = scm->creds.gid;
2002 UNIXCB(skb).fp = NULL;
2003 unix_get_secdata(scm, skb);
2004 if (scm->fp && send_fds)
2005 err = unix_attach_fds(scm, skb);
2006
2007 skb->destructor = unix_destruct_scm;
2008 return err;
2009 }
2010
unix_skb_to_scm(struct sk_buff * skb,struct scm_cookie * scm)2011 static void unix_skb_to_scm(struct sk_buff *skb, struct scm_cookie *scm)
2012 {
2013 scm_set_cred(scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2014 unix_set_secdata(scm, skb);
2015 }
2016
2017 /**
2018 * unix_maybe_add_creds() - Adds current task uid/gid and struct pid to skb if needed.
2019 * @skb: skb to attach creds to.
2020 * @sk: Sender sock.
2021 * @other: Receiver sock.
2022 *
2023 * Some apps rely on write() giving SCM_CREDENTIALS
2024 * We include credentials if source or destination socket
2025 * asserted SOCK_PASSCRED.
2026 *
2027 * Context: May sleep.
2028 * Return: On success zero, on error a negative error code is returned.
2029 */
unix_maybe_add_creds(struct sk_buff * skb,const struct sock * sk,const struct sock * other)2030 static int unix_maybe_add_creds(struct sk_buff *skb, const struct sock *sk,
2031 const struct sock *other)
2032 {
2033 if (UNIXCB(skb).pid)
2034 return 0;
2035
2036 if (unix_may_passcred(sk) || unix_may_passcred(other) ||
2037 !other->sk_socket) {
2038 struct pid *pid;
2039 int err;
2040
2041 pid = task_tgid(current);
2042 err = pidfs_register_pid(pid);
2043 if (unlikely(err))
2044 return err;
2045
2046 UNIXCB(skb).pid = get_pid(pid);
2047 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
2048 }
2049
2050 return 0;
2051 }
2052
unix_skb_scm_eq(struct sk_buff * skb,struct scm_cookie * scm)2053 static bool unix_skb_scm_eq(struct sk_buff *skb,
2054 struct scm_cookie *scm)
2055 {
2056 return UNIXCB(skb).pid == scm->pid &&
2057 uid_eq(UNIXCB(skb).uid, scm->creds.uid) &&
2058 gid_eq(UNIXCB(skb).gid, scm->creds.gid) &&
2059 unix_secdata_eq(scm, skb);
2060 }
2061
scm_stat_add(struct sock * sk,struct sk_buff * skb)2062 static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
2063 {
2064 struct scm_fp_list *fp = UNIXCB(skb).fp;
2065 struct unix_sock *u = unix_sk(sk);
2066
2067 if (unlikely(fp && fp->count)) {
2068 atomic_add(fp->count, &u->scm_stat.nr_fds);
2069 unix_add_edges(fp, u);
2070 }
2071 }
2072
scm_stat_del(struct sock * sk,struct sk_buff * skb)2073 static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
2074 {
2075 struct scm_fp_list *fp = UNIXCB(skb).fp;
2076 struct unix_sock *u = unix_sk(sk);
2077
2078 if (unlikely(fp && fp->count)) {
2079 atomic_sub(fp->count, &u->scm_stat.nr_fds);
2080 unix_del_edges(fp);
2081 }
2082 }
2083
2084 /*
2085 * Send AF_UNIX data.
2086 */
2087
unix_dgram_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)2088 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
2089 size_t len)
2090 {
2091 struct sock *sk = sock->sk, *other = NULL;
2092 struct unix_sock *u = unix_sk(sk);
2093 struct scm_cookie scm;
2094 struct sk_buff *skb;
2095 int data_len = 0;
2096 int sk_locked;
2097 long timeo;
2098 int err;
2099
2100 err = scm_send(sock, msg, &scm, false);
2101 if (err < 0)
2102 return err;
2103
2104 wait_for_unix_gc(scm.fp);
2105
2106 if (msg->msg_flags & MSG_OOB) {
2107 err = -EOPNOTSUPP;
2108 goto out;
2109 }
2110
2111 if (msg->msg_namelen) {
2112 err = unix_validate_addr(msg->msg_name, msg->msg_namelen);
2113 if (err)
2114 goto out;
2115
2116 err = BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk,
2117 msg->msg_name,
2118 &msg->msg_namelen,
2119 NULL);
2120 if (err)
2121 goto out;
2122 }
2123
2124 if (unix_may_passcred(sk) && !READ_ONCE(u->addr)) {
2125 err = unix_autobind(sk);
2126 if (err)
2127 goto out;
2128 }
2129
2130 if (len > READ_ONCE(sk->sk_sndbuf) - 32) {
2131 err = -EMSGSIZE;
2132 goto out;
2133 }
2134
2135 if (len > SKB_MAX_ALLOC) {
2136 data_len = min_t(size_t,
2137 len - SKB_MAX_ALLOC,
2138 MAX_SKB_FRAGS * PAGE_SIZE);
2139 data_len = PAGE_ALIGN(data_len);
2140
2141 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
2142 }
2143
2144 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
2145 msg->msg_flags & MSG_DONTWAIT, &err,
2146 PAGE_ALLOC_COSTLY_ORDER);
2147 if (!skb)
2148 goto out;
2149
2150 err = unix_scm_to_skb(&scm, skb, true);
2151 if (err < 0)
2152 goto out_free;
2153
2154 skb_put(skb, len - data_len);
2155 skb->data_len = data_len;
2156 skb->len = len;
2157 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
2158 if (err)
2159 goto out_free;
2160
2161 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
2162
2163 if (msg->msg_namelen) {
2164 lookup:
2165 other = unix_find_other(sock_net(sk), msg->msg_name,
2166 msg->msg_namelen, sk->sk_type, 0);
2167 if (IS_ERR(other)) {
2168 err = PTR_ERR(other);
2169 goto out_free;
2170 }
2171 } else {
2172 other = unix_peer_get(sk);
2173 if (!other) {
2174 err = -ENOTCONN;
2175 goto out_free;
2176 }
2177 }
2178
2179 if (sk_filter(other, skb) < 0) {
2180 /* Toss the packet but do not return any error to the sender */
2181 err = len;
2182 goto out_sock_put;
2183 }
2184
2185 err = unix_maybe_add_creds(skb, sk, other);
2186 if (err)
2187 goto out_sock_put;
2188
2189 restart:
2190 sk_locked = 0;
2191 unix_state_lock(other);
2192 restart_locked:
2193
2194 if (!unix_may_send(sk, other)) {
2195 err = -EPERM;
2196 goto out_unlock;
2197 }
2198
2199 if (unlikely(sock_flag(other, SOCK_DEAD))) {
2200 /* Check with 1003.1g - what should datagram error */
2201
2202 unix_state_unlock(other);
2203
2204 if (sk->sk_type == SOCK_SEQPACKET) {
2205 /* We are here only when racing with unix_release_sock()
2206 * is clearing @other. Never change state to TCP_CLOSE
2207 * unlike SOCK_DGRAM wants.
2208 */
2209 err = -EPIPE;
2210 goto out_sock_put;
2211 }
2212
2213 if (!sk_locked)
2214 unix_state_lock(sk);
2215
2216 if (unix_peer(sk) == other) {
2217 unix_peer(sk) = NULL;
2218 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
2219
2220 WRITE_ONCE(sk->sk_state, TCP_CLOSE);
2221 unix_state_unlock(sk);
2222
2223 unix_dgram_disconnected(sk, other);
2224 sock_put(other);
2225 err = -ECONNREFUSED;
2226 goto out_sock_put;
2227 }
2228
2229 unix_state_unlock(sk);
2230
2231 if (!msg->msg_namelen) {
2232 err = -ECONNRESET;
2233 goto out_sock_put;
2234 }
2235
2236 sock_put(other);
2237 goto lookup;
2238 }
2239
2240 if (other->sk_shutdown & RCV_SHUTDOWN) {
2241 err = -EPIPE;
2242 goto out_unlock;
2243 }
2244
2245 if (UNIXCB(skb).fp && !other->sk_scm_rights) {
2246 err = -EPERM;
2247 goto out_unlock;
2248 }
2249
2250 if (sk->sk_type != SOCK_SEQPACKET) {
2251 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
2252 if (err)
2253 goto out_unlock;
2254 }
2255
2256 /* other == sk && unix_peer(other) != sk if
2257 * - unix_peer(sk) == NULL, destination address bound to sk
2258 * - unix_peer(sk) == sk by time of get but disconnected before lock
2259 */
2260 if (other != sk &&
2261 unlikely(unix_peer(other) != sk &&
2262 unix_recvq_full_lockless(other))) {
2263 if (timeo) {
2264 timeo = unix_wait_for_peer(other, timeo);
2265
2266 err = sock_intr_errno(timeo);
2267 if (signal_pending(current))
2268 goto out_sock_put;
2269
2270 goto restart;
2271 }
2272
2273 if (!sk_locked) {
2274 unix_state_unlock(other);
2275 unix_state_double_lock(sk, other);
2276 }
2277
2278 if (unix_peer(sk) != other ||
2279 unix_dgram_peer_wake_me(sk, other)) {
2280 err = -EAGAIN;
2281 sk_locked = 1;
2282 goto out_unlock;
2283 }
2284
2285 if (!sk_locked) {
2286 sk_locked = 1;
2287 goto restart_locked;
2288 }
2289 }
2290
2291 if (unlikely(sk_locked))
2292 unix_state_unlock(sk);
2293
2294 if (sock_flag(other, SOCK_RCVTSTAMP))
2295 __net_timestamp(skb);
2296
2297 scm_stat_add(other, skb);
2298 skb_queue_tail(&other->sk_receive_queue, skb);
2299 unix_state_unlock(other);
2300 other->sk_data_ready(other);
2301 sock_put(other);
2302 scm_destroy(&scm);
2303 return len;
2304
2305 out_unlock:
2306 if (sk_locked)
2307 unix_state_unlock(sk);
2308 unix_state_unlock(other);
2309 out_sock_put:
2310 sock_put(other);
2311 out_free:
2312 consume_skb(skb);
2313 out:
2314 scm_destroy(&scm);
2315 return err;
2316 }
2317
2318 /* We use paged skbs for stream sockets, and limit occupancy to 32768
2319 * bytes, and a minimum of a full page.
2320 */
2321 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
2322
2323 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
queue_oob(struct sock * sk,struct msghdr * msg,struct sock * other,struct scm_cookie * scm,bool fds_sent)2324 static int queue_oob(struct sock *sk, struct msghdr *msg, struct sock *other,
2325 struct scm_cookie *scm, bool fds_sent)
2326 {
2327 struct unix_sock *ousk = unix_sk(other);
2328 struct sk_buff *skb;
2329 int err;
2330
2331 skb = sock_alloc_send_skb(sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
2332
2333 if (!skb)
2334 return err;
2335
2336 err = unix_scm_to_skb(scm, skb, !fds_sent);
2337 if (err < 0)
2338 goto out;
2339
2340 err = unix_maybe_add_creds(skb, sk, other);
2341 if (err)
2342 goto out;
2343
2344 skb_put(skb, 1);
2345 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
2346
2347 if (err)
2348 goto out;
2349
2350 unix_state_lock(other);
2351
2352 if (sock_flag(other, SOCK_DEAD) ||
2353 (other->sk_shutdown & RCV_SHUTDOWN)) {
2354 err = -EPIPE;
2355 goto out_unlock;
2356 }
2357
2358 if (UNIXCB(skb).fp && !other->sk_scm_rights) {
2359 err = -EPERM;
2360 goto out_unlock;
2361 }
2362
2363 scm_stat_add(other, skb);
2364
2365 spin_lock(&other->sk_receive_queue.lock);
2366 WRITE_ONCE(ousk->oob_skb, skb);
2367 WRITE_ONCE(ousk->inq_len, ousk->inq_len + 1);
2368 __skb_queue_tail(&other->sk_receive_queue, skb);
2369 spin_unlock(&other->sk_receive_queue.lock);
2370
2371 sk_send_sigurg(other);
2372 unix_state_unlock(other);
2373 other->sk_data_ready(other);
2374
2375 return 0;
2376 out_unlock:
2377 unix_state_unlock(other);
2378 out:
2379 consume_skb(skb);
2380 return err;
2381 }
2382 #endif
2383
unix_stream_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)2384 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
2385 size_t len)
2386 {
2387 struct sock *sk = sock->sk;
2388 struct sk_buff *skb = NULL;
2389 struct sock *other = NULL;
2390 struct unix_sock *otheru;
2391 struct scm_cookie scm;
2392 bool fds_sent = false;
2393 int err, sent = 0;
2394
2395 err = scm_send(sock, msg, &scm, false);
2396 if (err < 0)
2397 return err;
2398
2399 wait_for_unix_gc(scm.fp);
2400
2401 if (msg->msg_flags & MSG_OOB) {
2402 err = -EOPNOTSUPP;
2403 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2404 if (len)
2405 len--;
2406 else
2407 #endif
2408 goto out_err;
2409 }
2410
2411 if (msg->msg_namelen) {
2412 err = READ_ONCE(sk->sk_state) == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
2413 goto out_err;
2414 }
2415
2416 other = unix_peer(sk);
2417 if (!other) {
2418 err = -ENOTCONN;
2419 goto out_err;
2420 }
2421
2422 otheru = unix_sk(other);
2423
2424 if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
2425 goto out_pipe;
2426
2427 while (sent < len) {
2428 int size = len - sent;
2429 int data_len;
2430
2431 if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2432 skb = sock_alloc_send_pskb(sk, 0, 0,
2433 msg->msg_flags & MSG_DONTWAIT,
2434 &err, 0);
2435 } else {
2436 /* Keep two messages in the pipe so it schedules better */
2437 size = min_t(int, size, (READ_ONCE(sk->sk_sndbuf) >> 1) - 64);
2438
2439 /* allow fallback to order-0 allocations */
2440 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
2441
2442 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
2443
2444 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
2445
2446 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
2447 msg->msg_flags & MSG_DONTWAIT, &err,
2448 get_order(UNIX_SKB_FRAGS_SZ));
2449 }
2450 if (!skb)
2451 goto out_err;
2452
2453 /* Only send the fds in the first buffer */
2454 err = unix_scm_to_skb(&scm, skb, !fds_sent);
2455 if (err < 0)
2456 goto out_free;
2457
2458 fds_sent = true;
2459
2460 err = unix_maybe_add_creds(skb, sk, other);
2461 if (err)
2462 goto out_free;
2463
2464 if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2465 skb->ip_summed = CHECKSUM_UNNECESSARY;
2466 err = skb_splice_from_iter(skb, &msg->msg_iter, size);
2467 if (err < 0)
2468 goto out_free;
2469
2470 size = err;
2471 refcount_add(size, &sk->sk_wmem_alloc);
2472 } else {
2473 skb_put(skb, size - data_len);
2474 skb->data_len = data_len;
2475 skb->len = size;
2476 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
2477 if (err)
2478 goto out_free;
2479 }
2480
2481 unix_state_lock(other);
2482
2483 if (sock_flag(other, SOCK_DEAD) ||
2484 (other->sk_shutdown & RCV_SHUTDOWN))
2485 goto out_pipe_unlock;
2486
2487 if (UNIXCB(skb).fp && !other->sk_scm_rights) {
2488 unix_state_unlock(other);
2489 err = -EPERM;
2490 goto out_free;
2491 }
2492
2493 scm_stat_add(other, skb);
2494
2495 spin_lock(&other->sk_receive_queue.lock);
2496 WRITE_ONCE(otheru->inq_len, otheru->inq_len + skb->len);
2497 __skb_queue_tail(&other->sk_receive_queue, skb);
2498 spin_unlock(&other->sk_receive_queue.lock);
2499
2500 unix_state_unlock(other);
2501 other->sk_data_ready(other);
2502 sent += size;
2503 }
2504
2505 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2506 if (msg->msg_flags & MSG_OOB) {
2507 err = queue_oob(sk, msg, other, &scm, fds_sent);
2508 if (err)
2509 goto out_err;
2510 sent++;
2511 }
2512 #endif
2513
2514 scm_destroy(&scm);
2515
2516 return sent;
2517
2518 out_pipe_unlock:
2519 unix_state_unlock(other);
2520 out_pipe:
2521 if (!sent && !(msg->msg_flags & MSG_NOSIGNAL))
2522 send_sig(SIGPIPE, current, 0);
2523 err = -EPIPE;
2524 out_free:
2525 consume_skb(skb);
2526 out_err:
2527 scm_destroy(&scm);
2528 return sent ? : err;
2529 }
2530
unix_seqpacket_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)2531 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2532 size_t len)
2533 {
2534 int err;
2535 struct sock *sk = sock->sk;
2536
2537 err = sock_error(sk);
2538 if (err)
2539 return err;
2540
2541 if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
2542 return -ENOTCONN;
2543
2544 if (msg->msg_namelen)
2545 msg->msg_namelen = 0;
2546
2547 return unix_dgram_sendmsg(sock, msg, len);
2548 }
2549
unix_seqpacket_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)2550 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2551 size_t size, int flags)
2552 {
2553 struct sock *sk = sock->sk;
2554
2555 if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
2556 return -ENOTCONN;
2557
2558 return unix_dgram_recvmsg(sock, msg, size, flags);
2559 }
2560
unix_copy_addr(struct msghdr * msg,struct sock * sk)2561 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2562 {
2563 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2564
2565 if (addr) {
2566 msg->msg_namelen = addr->len;
2567 memcpy(msg->msg_name, addr->name, addr->len);
2568 }
2569 }
2570
__unix_dgram_recvmsg(struct sock * sk,struct msghdr * msg,size_t size,int flags)2571 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
2572 int flags)
2573 {
2574 struct scm_cookie scm;
2575 struct socket *sock = sk->sk_socket;
2576 struct unix_sock *u = unix_sk(sk);
2577 struct sk_buff *skb, *last;
2578 long timeo;
2579 int skip;
2580 int err;
2581
2582 err = -EOPNOTSUPP;
2583 if (flags&MSG_OOB)
2584 goto out;
2585
2586 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2587
2588 do {
2589 mutex_lock(&u->iolock);
2590
2591 skip = sk_peek_offset(sk, flags);
2592 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2593 &skip, &err, &last);
2594 if (skb) {
2595 if (!(flags & MSG_PEEK))
2596 scm_stat_del(sk, skb);
2597 break;
2598 }
2599
2600 mutex_unlock(&u->iolock);
2601
2602 if (err != -EAGAIN)
2603 break;
2604 } while (timeo &&
2605 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2606 &err, &timeo, last));
2607
2608 if (!skb) { /* implies iolock unlocked */
2609 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2610 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2611 (READ_ONCE(sk->sk_shutdown) & RCV_SHUTDOWN))
2612 err = 0;
2613 goto out;
2614 }
2615
2616 if (wq_has_sleeper(&u->peer_wait))
2617 wake_up_interruptible_sync_poll(&u->peer_wait,
2618 EPOLLOUT | EPOLLWRNORM |
2619 EPOLLWRBAND);
2620
2621 if (msg->msg_name) {
2622 unix_copy_addr(msg, skb->sk);
2623
2624 BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk,
2625 msg->msg_name,
2626 &msg->msg_namelen);
2627 }
2628
2629 if (size > skb->len - skip)
2630 size = skb->len - skip;
2631 else if (size < skb->len - skip)
2632 msg->msg_flags |= MSG_TRUNC;
2633
2634 err = skb_copy_datagram_msg(skb, skip, msg, size);
2635 if (err)
2636 goto out_free;
2637
2638 if (sock_flag(sk, SOCK_RCVTSTAMP))
2639 __sock_recv_timestamp(msg, sk, skb);
2640
2641 memset(&scm, 0, sizeof(scm));
2642
2643 unix_skb_to_scm(skb, &scm);
2644
2645 if (!(flags & MSG_PEEK)) {
2646 if (UNIXCB(skb).fp)
2647 unix_detach_fds(&scm, skb);
2648
2649 sk_peek_offset_bwd(sk, skb->len);
2650 } else {
2651 /* It is questionable: on PEEK we could:
2652 - do not return fds - good, but too simple 8)
2653 - return fds, and do not return them on read (old strategy,
2654 apparently wrong)
2655 - clone fds (I chose it for now, it is the most universal
2656 solution)
2657
2658 POSIX 1003.1g does not actually define this clearly
2659 at all. POSIX 1003.1g doesn't define a lot of things
2660 clearly however!
2661
2662 */
2663
2664 sk_peek_offset_fwd(sk, size);
2665
2666 if (UNIXCB(skb).fp)
2667 unix_peek_fds(&scm, skb);
2668 }
2669 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2670
2671 scm_recv_unix(sock, msg, &scm, flags);
2672
2673 out_free:
2674 skb_free_datagram(sk, skb);
2675 mutex_unlock(&u->iolock);
2676 out:
2677 return err;
2678 }
2679
unix_dgram_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)2680 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2681 int flags)
2682 {
2683 struct sock *sk = sock->sk;
2684
2685 #ifdef CONFIG_BPF_SYSCALL
2686 const struct proto *prot = READ_ONCE(sk->sk_prot);
2687
2688 if (prot != &unix_dgram_proto)
2689 return prot->recvmsg(sk, msg, size, flags, NULL);
2690 #endif
2691 return __unix_dgram_recvmsg(sk, msg, size, flags);
2692 }
2693
unix_read_skb(struct sock * sk,skb_read_actor_t recv_actor)2694 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
2695 {
2696 struct unix_sock *u = unix_sk(sk);
2697 struct sk_buff *skb;
2698 int err;
2699
2700 mutex_lock(&u->iolock);
2701 skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
2702 mutex_unlock(&u->iolock);
2703 if (!skb)
2704 return err;
2705
2706 return recv_actor(sk, skb);
2707 }
2708
2709 /*
2710 * Sleep until more data has arrived. But check for races..
2711 */
unix_stream_data_wait(struct sock * sk,long timeo,struct sk_buff * last,unsigned int last_len,bool freezable)2712 static long unix_stream_data_wait(struct sock *sk, long timeo,
2713 struct sk_buff *last, unsigned int last_len,
2714 bool freezable)
2715 {
2716 unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE;
2717 struct sk_buff *tail;
2718 DEFINE_WAIT(wait);
2719
2720 unix_state_lock(sk);
2721
2722 for (;;) {
2723 prepare_to_wait(sk_sleep(sk), &wait, state);
2724
2725 tail = skb_peek_tail(&sk->sk_receive_queue);
2726 if (tail != last ||
2727 (tail && tail->len != last_len) ||
2728 sk->sk_err ||
2729 (sk->sk_shutdown & RCV_SHUTDOWN) ||
2730 signal_pending(current) ||
2731 !timeo)
2732 break;
2733
2734 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2735 unix_state_unlock(sk);
2736 timeo = schedule_timeout(timeo);
2737 unix_state_lock(sk);
2738
2739 if (sock_flag(sk, SOCK_DEAD))
2740 break;
2741
2742 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2743 }
2744
2745 finish_wait(sk_sleep(sk), &wait);
2746 unix_state_unlock(sk);
2747 return timeo;
2748 }
2749
2750 struct unix_stream_read_state {
2751 int (*recv_actor)(struct sk_buff *, int, int,
2752 struct unix_stream_read_state *);
2753 struct socket *socket;
2754 struct msghdr *msg;
2755 struct pipe_inode_info *pipe;
2756 size_t size;
2757 int flags;
2758 unsigned int splice_flags;
2759 };
2760
2761 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
unix_stream_recv_urg(struct unix_stream_read_state * state)2762 static int unix_stream_recv_urg(struct unix_stream_read_state *state)
2763 {
2764 struct sk_buff *oob_skb, *read_skb = NULL;
2765 struct socket *sock = state->socket;
2766 struct sock *sk = sock->sk;
2767 struct unix_sock *u = unix_sk(sk);
2768 int chunk = 1;
2769
2770 mutex_lock(&u->iolock);
2771 unix_state_lock(sk);
2772 spin_lock(&sk->sk_receive_queue.lock);
2773
2774 if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
2775 spin_unlock(&sk->sk_receive_queue.lock);
2776 unix_state_unlock(sk);
2777 mutex_unlock(&u->iolock);
2778 return -EINVAL;
2779 }
2780
2781 oob_skb = u->oob_skb;
2782
2783 if (!(state->flags & MSG_PEEK)) {
2784 WRITE_ONCE(u->oob_skb, NULL);
2785 WRITE_ONCE(u->inq_len, u->inq_len - 1);
2786
2787 if (oob_skb->prev != (struct sk_buff *)&sk->sk_receive_queue &&
2788 !unix_skb_len(oob_skb->prev)) {
2789 read_skb = oob_skb->prev;
2790 __skb_unlink(read_skb, &sk->sk_receive_queue);
2791 }
2792 }
2793
2794 spin_unlock(&sk->sk_receive_queue.lock);
2795 unix_state_unlock(sk);
2796
2797 chunk = state->recv_actor(oob_skb, 0, chunk, state);
2798
2799 if (!(state->flags & MSG_PEEK))
2800 UNIXCB(oob_skb).consumed += 1;
2801
2802 mutex_unlock(&u->iolock);
2803
2804 consume_skb(read_skb);
2805
2806 if (chunk < 0)
2807 return -EFAULT;
2808
2809 state->msg->msg_flags |= MSG_OOB;
2810 return 1;
2811 }
2812
manage_oob(struct sk_buff * skb,struct sock * sk,int flags,int copied)2813 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
2814 int flags, int copied)
2815 {
2816 struct sk_buff *read_skb = NULL, *unread_skb = NULL;
2817 struct unix_sock *u = unix_sk(sk);
2818
2819 if (likely(unix_skb_len(skb) && skb != READ_ONCE(u->oob_skb)))
2820 return skb;
2821
2822 spin_lock(&sk->sk_receive_queue.lock);
2823
2824 if (!unix_skb_len(skb)) {
2825 if (copied && (!u->oob_skb || skb == u->oob_skb)) {
2826 skb = NULL;
2827 } else if (flags & MSG_PEEK) {
2828 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2829 } else {
2830 read_skb = skb;
2831 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2832 __skb_unlink(read_skb, &sk->sk_receive_queue);
2833 }
2834
2835 if (!skb)
2836 goto unlock;
2837 }
2838
2839 if (skb != u->oob_skb)
2840 goto unlock;
2841
2842 if (copied) {
2843 skb = NULL;
2844 } else if (!(flags & MSG_PEEK)) {
2845 WRITE_ONCE(u->oob_skb, NULL);
2846
2847 if (!sock_flag(sk, SOCK_URGINLINE)) {
2848 __skb_unlink(skb, &sk->sk_receive_queue);
2849 unread_skb = skb;
2850 skb = skb_peek(&sk->sk_receive_queue);
2851 }
2852 } else if (!sock_flag(sk, SOCK_URGINLINE)) {
2853 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2854 }
2855
2856 unlock:
2857 spin_unlock(&sk->sk_receive_queue.lock);
2858
2859 consume_skb(read_skb);
2860 kfree_skb_reason(unread_skb, SKB_DROP_REASON_UNIX_SKIP_OOB);
2861
2862 return skb;
2863 }
2864 #endif
2865
unix_stream_read_skb(struct sock * sk,skb_read_actor_t recv_actor)2866 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
2867 {
2868 struct sk_buff_head *queue = &sk->sk_receive_queue;
2869 struct unix_sock *u = unix_sk(sk);
2870 struct sk_buff *skb;
2871 int err;
2872
2873 if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED))
2874 return -ENOTCONN;
2875
2876 err = sock_error(sk);
2877 if (err)
2878 return err;
2879
2880 mutex_lock(&u->iolock);
2881 spin_lock(&queue->lock);
2882
2883 skb = __skb_dequeue(queue);
2884 if (!skb) {
2885 spin_unlock(&queue->lock);
2886 mutex_unlock(&u->iolock);
2887 return -EAGAIN;
2888 }
2889
2890 WRITE_ONCE(u->inq_len, u->inq_len - skb->len);
2891
2892 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2893 if (skb == u->oob_skb) {
2894 WRITE_ONCE(u->oob_skb, NULL);
2895 spin_unlock(&queue->lock);
2896 mutex_unlock(&u->iolock);
2897
2898 kfree_skb_reason(skb, SKB_DROP_REASON_UNIX_SKIP_OOB);
2899 return -EAGAIN;
2900 }
2901 #endif
2902
2903 spin_unlock(&queue->lock);
2904 mutex_unlock(&u->iolock);
2905
2906 return recv_actor(sk, skb);
2907 }
2908
unix_stream_read_generic(struct unix_stream_read_state * state,bool freezable)2909 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2910 bool freezable)
2911 {
2912 int noblock = state->flags & MSG_DONTWAIT;
2913 struct socket *sock = state->socket;
2914 struct msghdr *msg = state->msg;
2915 struct sock *sk = sock->sk;
2916 size_t size = state->size;
2917 int flags = state->flags;
2918 bool check_creds = false;
2919 struct scm_cookie scm;
2920 unsigned int last_len;
2921 struct unix_sock *u;
2922 int copied = 0;
2923 int err = 0;
2924 long timeo;
2925 int target;
2926 int skip;
2927
2928 if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) {
2929 err = -EINVAL;
2930 goto out;
2931 }
2932
2933 if (unlikely(flags & MSG_OOB)) {
2934 err = -EOPNOTSUPP;
2935 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2936 err = unix_stream_recv_urg(state);
2937 #endif
2938 goto out;
2939 }
2940
2941 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2942 timeo = sock_rcvtimeo(sk, noblock);
2943
2944 memset(&scm, 0, sizeof(scm));
2945
2946 u = unix_sk(sk);
2947
2948 redo:
2949 /* Lock the socket to prevent queue disordering
2950 * while sleeps in memcpy_tomsg
2951 */
2952 mutex_lock(&u->iolock);
2953
2954 skip = max(sk_peek_offset(sk, flags), 0);
2955
2956 do {
2957 struct sk_buff *skb, *last;
2958 int chunk;
2959
2960 unix_state_lock(sk);
2961 if (sock_flag(sk, SOCK_DEAD)) {
2962 err = -ECONNRESET;
2963 goto unlock;
2964 }
2965 last = skb = skb_peek(&sk->sk_receive_queue);
2966 last_len = last ? last->len : 0;
2967
2968 again:
2969 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2970 if (skb) {
2971 skb = manage_oob(skb, sk, flags, copied);
2972 if (!skb && copied) {
2973 unix_state_unlock(sk);
2974 break;
2975 }
2976 }
2977 #endif
2978 if (skb == NULL) {
2979 if (copied >= target)
2980 goto unlock;
2981
2982 /*
2983 * POSIX 1003.1g mandates this order.
2984 */
2985
2986 err = sock_error(sk);
2987 if (err)
2988 goto unlock;
2989 if (sk->sk_shutdown & RCV_SHUTDOWN)
2990 goto unlock;
2991
2992 unix_state_unlock(sk);
2993 if (!timeo) {
2994 err = -EAGAIN;
2995 break;
2996 }
2997
2998 mutex_unlock(&u->iolock);
2999
3000 timeo = unix_stream_data_wait(sk, timeo, last,
3001 last_len, freezable);
3002
3003 if (signal_pending(current)) {
3004 err = sock_intr_errno(timeo);
3005 scm_destroy(&scm);
3006 goto out;
3007 }
3008
3009 goto redo;
3010 unlock:
3011 unix_state_unlock(sk);
3012 break;
3013 }
3014
3015 while (skip >= unix_skb_len(skb)) {
3016 skip -= unix_skb_len(skb);
3017 last = skb;
3018 last_len = skb->len;
3019 skb = skb_peek_next(skb, &sk->sk_receive_queue);
3020 if (!skb)
3021 goto again;
3022 }
3023
3024 unix_state_unlock(sk);
3025
3026 if (check_creds) {
3027 /* Never glue messages from different writers */
3028 if (!unix_skb_scm_eq(skb, &scm))
3029 break;
3030 } else if (unix_may_passcred(sk)) {
3031 /* Copy credentials */
3032 unix_skb_to_scm(skb, &scm);
3033 check_creds = true;
3034 }
3035
3036 /* Copy address just once */
3037 if (msg && msg->msg_name) {
3038 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
3039
3040 unix_copy_addr(msg, skb->sk);
3041 BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, msg->msg_name,
3042 &msg->msg_namelen);
3043
3044 sunaddr = NULL;
3045 }
3046
3047 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
3048 chunk = state->recv_actor(skb, skip, chunk, state);
3049 if (chunk < 0) {
3050 if (copied == 0)
3051 copied = -EFAULT;
3052 break;
3053 }
3054 copied += chunk;
3055 size -= chunk;
3056
3057 /* Mark read part of skb as used */
3058 if (!(flags & MSG_PEEK)) {
3059 UNIXCB(skb).consumed += chunk;
3060
3061 sk_peek_offset_bwd(sk, chunk);
3062
3063 if (UNIXCB(skb).fp) {
3064 scm_stat_del(sk, skb);
3065 unix_detach_fds(&scm, skb);
3066 }
3067
3068 if (unix_skb_len(skb))
3069 break;
3070
3071 spin_lock(&sk->sk_receive_queue.lock);
3072 WRITE_ONCE(u->inq_len, u->inq_len - skb->len);
3073 __skb_unlink(skb, &sk->sk_receive_queue);
3074 spin_unlock(&sk->sk_receive_queue.lock);
3075
3076 consume_skb(skb);
3077
3078 if (scm.fp)
3079 break;
3080 } else {
3081 /* It is questionable, see note in unix_dgram_recvmsg.
3082 */
3083 if (UNIXCB(skb).fp)
3084 unix_peek_fds(&scm, skb);
3085
3086 sk_peek_offset_fwd(sk, chunk);
3087
3088 if (UNIXCB(skb).fp)
3089 break;
3090
3091 skip = 0;
3092 last = skb;
3093 last_len = skb->len;
3094 unix_state_lock(sk);
3095 skb = skb_peek_next(skb, &sk->sk_receive_queue);
3096 if (skb)
3097 goto again;
3098 unix_state_unlock(sk);
3099 break;
3100 }
3101 } while (size);
3102
3103 mutex_unlock(&u->iolock);
3104 if (msg) {
3105 scm_recv_unix(sock, msg, &scm, flags);
3106
3107 if (READ_ONCE(u->recvmsg_inq) || msg->msg_get_inq) {
3108 msg->msg_inq = READ_ONCE(u->inq_len);
3109 put_cmsg(msg, SOL_SOCKET, SCM_INQ,
3110 sizeof(msg->msg_inq), &msg->msg_inq);
3111 }
3112 } else {
3113 scm_destroy(&scm);
3114 }
3115 out:
3116 return copied ? : err;
3117 }
3118
unix_stream_read_actor(struct sk_buff * skb,int skip,int chunk,struct unix_stream_read_state * state)3119 static int unix_stream_read_actor(struct sk_buff *skb,
3120 int skip, int chunk,
3121 struct unix_stream_read_state *state)
3122 {
3123 int ret;
3124
3125 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
3126 state->msg, chunk);
3127 return ret ?: chunk;
3128 }
3129
__unix_stream_recvmsg(struct sock * sk,struct msghdr * msg,size_t size,int flags)3130 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
3131 size_t size, int flags)
3132 {
3133 struct unix_stream_read_state state = {
3134 .recv_actor = unix_stream_read_actor,
3135 .socket = sk->sk_socket,
3136 .msg = msg,
3137 .size = size,
3138 .flags = flags
3139 };
3140
3141 return unix_stream_read_generic(&state, true);
3142 }
3143
unix_stream_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)3144 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
3145 size_t size, int flags)
3146 {
3147 struct unix_stream_read_state state = {
3148 .recv_actor = unix_stream_read_actor,
3149 .socket = sock,
3150 .msg = msg,
3151 .size = size,
3152 .flags = flags
3153 };
3154
3155 #ifdef CONFIG_BPF_SYSCALL
3156 struct sock *sk = sock->sk;
3157 const struct proto *prot = READ_ONCE(sk->sk_prot);
3158
3159 if (prot != &unix_stream_proto)
3160 return prot->recvmsg(sk, msg, size, flags, NULL);
3161 #endif
3162 return unix_stream_read_generic(&state, true);
3163 }
3164
unix_stream_splice_actor(struct sk_buff * skb,int skip,int chunk,struct unix_stream_read_state * state)3165 static int unix_stream_splice_actor(struct sk_buff *skb,
3166 int skip, int chunk,
3167 struct unix_stream_read_state *state)
3168 {
3169 return skb_splice_bits(skb, state->socket->sk,
3170 UNIXCB(skb).consumed + skip,
3171 state->pipe, chunk, state->splice_flags);
3172 }
3173
unix_stream_splice_read(struct socket * sock,loff_t * ppos,struct pipe_inode_info * pipe,size_t size,unsigned int flags)3174 static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
3175 struct pipe_inode_info *pipe,
3176 size_t size, unsigned int flags)
3177 {
3178 struct unix_stream_read_state state = {
3179 .recv_actor = unix_stream_splice_actor,
3180 .socket = sock,
3181 .pipe = pipe,
3182 .size = size,
3183 .splice_flags = flags,
3184 };
3185
3186 if (unlikely(*ppos))
3187 return -ESPIPE;
3188
3189 if (sock->file->f_flags & O_NONBLOCK ||
3190 flags & SPLICE_F_NONBLOCK)
3191 state.flags = MSG_DONTWAIT;
3192
3193 return unix_stream_read_generic(&state, false);
3194 }
3195
unix_shutdown(struct socket * sock,int mode)3196 static int unix_shutdown(struct socket *sock, int mode)
3197 {
3198 struct sock *sk = sock->sk;
3199 struct sock *other;
3200
3201 if (mode < SHUT_RD || mode > SHUT_RDWR)
3202 return -EINVAL;
3203 /* This maps:
3204 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
3205 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
3206 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
3207 */
3208 ++mode;
3209
3210 unix_state_lock(sk);
3211 WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode);
3212 other = unix_peer(sk);
3213 if (other)
3214 sock_hold(other);
3215 unix_state_unlock(sk);
3216 sk->sk_state_change(sk);
3217
3218 if (other &&
3219 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
3220
3221 int peer_mode = 0;
3222 const struct proto *prot = READ_ONCE(other->sk_prot);
3223
3224 if (prot->unhash)
3225 prot->unhash(other);
3226 if (mode&RCV_SHUTDOWN)
3227 peer_mode |= SEND_SHUTDOWN;
3228 if (mode&SEND_SHUTDOWN)
3229 peer_mode |= RCV_SHUTDOWN;
3230 unix_state_lock(other);
3231 WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode);
3232 unix_state_unlock(other);
3233 other->sk_state_change(other);
3234 if (peer_mode == SHUTDOWN_MASK)
3235 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
3236 else if (peer_mode & RCV_SHUTDOWN)
3237 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
3238 }
3239 if (other)
3240 sock_put(other);
3241
3242 return 0;
3243 }
3244
unix_inq_len(struct sock * sk)3245 long unix_inq_len(struct sock *sk)
3246 {
3247 struct sk_buff *skb;
3248 long amount = 0;
3249
3250 if (READ_ONCE(sk->sk_state) == TCP_LISTEN)
3251 return -EINVAL;
3252
3253 if (sk->sk_type == SOCK_STREAM)
3254 return READ_ONCE(unix_sk(sk)->inq_len);
3255
3256 spin_lock(&sk->sk_receive_queue.lock);
3257 if (sk->sk_type == SOCK_SEQPACKET) {
3258 skb_queue_walk(&sk->sk_receive_queue, skb)
3259 amount += unix_skb_len(skb);
3260 } else {
3261 skb = skb_peek(&sk->sk_receive_queue);
3262 if (skb)
3263 amount = skb->len;
3264 }
3265 spin_unlock(&sk->sk_receive_queue.lock);
3266
3267 return amount;
3268 }
3269 EXPORT_SYMBOL_GPL(unix_inq_len);
3270
unix_outq_len(struct sock * sk)3271 long unix_outq_len(struct sock *sk)
3272 {
3273 return sk_wmem_alloc_get(sk);
3274 }
3275 EXPORT_SYMBOL_GPL(unix_outq_len);
3276
unix_open_file(struct sock * sk)3277 static int unix_open_file(struct sock *sk)
3278 {
3279 struct file *f;
3280 int fd;
3281
3282 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
3283 return -EPERM;
3284
3285 if (!smp_load_acquire(&unix_sk(sk)->addr))
3286 return -ENOENT;
3287
3288 if (!unix_sk(sk)->path.dentry)
3289 return -ENOENT;
3290
3291 fd = get_unused_fd_flags(O_CLOEXEC);
3292 if (fd < 0)
3293 return fd;
3294
3295 f = dentry_open(&unix_sk(sk)->path, O_PATH, current_cred());
3296 if (IS_ERR(f)) {
3297 put_unused_fd(fd);
3298 return PTR_ERR(f);
3299 }
3300
3301 fd_install(fd, f);
3302 return fd;
3303 }
3304
unix_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)3305 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3306 {
3307 struct sock *sk = sock->sk;
3308 long amount = 0;
3309 int err;
3310
3311 switch (cmd) {
3312 case SIOCOUTQ:
3313 amount = unix_outq_len(sk);
3314 err = put_user(amount, (int __user *)arg);
3315 break;
3316 case SIOCINQ:
3317 amount = unix_inq_len(sk);
3318 if (amount < 0)
3319 err = amount;
3320 else
3321 err = put_user(amount, (int __user *)arg);
3322 break;
3323 case SIOCUNIXFILE:
3324 err = unix_open_file(sk);
3325 break;
3326 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3327 case SIOCATMARK:
3328 {
3329 struct unix_sock *u = unix_sk(sk);
3330 struct sk_buff *skb;
3331 int answ = 0;
3332
3333 mutex_lock(&u->iolock);
3334
3335 skb = skb_peek(&sk->sk_receive_queue);
3336 if (skb) {
3337 struct sk_buff *oob_skb = READ_ONCE(u->oob_skb);
3338 struct sk_buff *next_skb;
3339
3340 next_skb = skb_peek_next(skb, &sk->sk_receive_queue);
3341
3342 if (skb == oob_skb ||
3343 (!unix_skb_len(skb) &&
3344 (!oob_skb || next_skb == oob_skb)))
3345 answ = 1;
3346 }
3347
3348 mutex_unlock(&u->iolock);
3349
3350 err = put_user(answ, (int __user *)arg);
3351 }
3352 break;
3353 #endif
3354 default:
3355 err = -ENOIOCTLCMD;
3356 break;
3357 }
3358 return err;
3359 }
3360
3361 #ifdef CONFIG_COMPAT
unix_compat_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)3362 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3363 {
3364 return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
3365 }
3366 #endif
3367
unix_poll(struct file * file,struct socket * sock,poll_table * wait)3368 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
3369 {
3370 struct sock *sk = sock->sk;
3371 unsigned char state;
3372 __poll_t mask;
3373 u8 shutdown;
3374
3375 sock_poll_wait(file, sock, wait);
3376 mask = 0;
3377 shutdown = READ_ONCE(sk->sk_shutdown);
3378 state = READ_ONCE(sk->sk_state);
3379
3380 /* exceptional events? */
3381 if (READ_ONCE(sk->sk_err))
3382 mask |= EPOLLERR;
3383 if (shutdown == SHUTDOWN_MASK)
3384 mask |= EPOLLHUP;
3385 if (shutdown & RCV_SHUTDOWN)
3386 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3387
3388 /* readable? */
3389 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3390 mask |= EPOLLIN | EPOLLRDNORM;
3391 if (sk_is_readable(sk))
3392 mask |= EPOLLIN | EPOLLRDNORM;
3393 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3394 if (READ_ONCE(unix_sk(sk)->oob_skb))
3395 mask |= EPOLLPRI;
3396 #endif
3397
3398 /* Connection-based need to check for termination and startup */
3399 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
3400 state == TCP_CLOSE)
3401 mask |= EPOLLHUP;
3402
3403 /*
3404 * we set writable also when the other side has shut down the
3405 * connection. This prevents stuck sockets.
3406 */
3407 if (unix_writable(sk, state))
3408 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3409
3410 return mask;
3411 }
3412
unix_dgram_poll(struct file * file,struct socket * sock,poll_table * wait)3413 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
3414 poll_table *wait)
3415 {
3416 struct sock *sk = sock->sk, *other;
3417 unsigned int writable;
3418 unsigned char state;
3419 __poll_t mask;
3420 u8 shutdown;
3421
3422 sock_poll_wait(file, sock, wait);
3423 mask = 0;
3424 shutdown = READ_ONCE(sk->sk_shutdown);
3425 state = READ_ONCE(sk->sk_state);
3426
3427 /* exceptional events? */
3428 if (READ_ONCE(sk->sk_err) ||
3429 !skb_queue_empty_lockless(&sk->sk_error_queue))
3430 mask |= EPOLLERR |
3431 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
3432
3433 if (shutdown & RCV_SHUTDOWN)
3434 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3435 if (shutdown == SHUTDOWN_MASK)
3436 mask |= EPOLLHUP;
3437
3438 /* readable? */
3439 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3440 mask |= EPOLLIN | EPOLLRDNORM;
3441 if (sk_is_readable(sk))
3442 mask |= EPOLLIN | EPOLLRDNORM;
3443
3444 /* Connection-based need to check for termination and startup */
3445 if (sk->sk_type == SOCK_SEQPACKET && state == TCP_CLOSE)
3446 mask |= EPOLLHUP;
3447
3448 /* No write status requested, avoid expensive OUT tests. */
3449 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
3450 return mask;
3451
3452 writable = unix_writable(sk, state);
3453 if (writable) {
3454 unix_state_lock(sk);
3455
3456 other = unix_peer(sk);
3457 if (other && unix_peer(other) != sk &&
3458 unix_recvq_full_lockless(other) &&
3459 unix_dgram_peer_wake_me(sk, other))
3460 writable = 0;
3461
3462 unix_state_unlock(sk);
3463 }
3464
3465 if (writable)
3466 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3467 else
3468 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
3469
3470 return mask;
3471 }
3472
3473 #ifdef CONFIG_PROC_FS
3474
3475 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
3476
3477 #define get_bucket(x) ((x) >> BUCKET_SPACE)
3478 #define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1))
3479 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
3480
unix_from_bucket(struct seq_file * seq,loff_t * pos)3481 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
3482 {
3483 unsigned long offset = get_offset(*pos);
3484 unsigned long bucket = get_bucket(*pos);
3485 unsigned long count = 0;
3486 struct sock *sk;
3487
3488 for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]);
3489 sk; sk = sk_next(sk)) {
3490 if (++count == offset)
3491 break;
3492 }
3493
3494 return sk;
3495 }
3496
unix_get_first(struct seq_file * seq,loff_t * pos)3497 static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos)
3498 {
3499 unsigned long bucket = get_bucket(*pos);
3500 struct net *net = seq_file_net(seq);
3501 struct sock *sk;
3502
3503 while (bucket < UNIX_HASH_SIZE) {
3504 spin_lock(&net->unx.table.locks[bucket]);
3505
3506 sk = unix_from_bucket(seq, pos);
3507 if (sk)
3508 return sk;
3509
3510 spin_unlock(&net->unx.table.locks[bucket]);
3511
3512 *pos = set_bucket_offset(++bucket, 1);
3513 }
3514
3515 return NULL;
3516 }
3517
unix_get_next(struct seq_file * seq,struct sock * sk,loff_t * pos)3518 static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk,
3519 loff_t *pos)
3520 {
3521 unsigned long bucket = get_bucket(*pos);
3522
3523 sk = sk_next(sk);
3524 if (sk)
3525 return sk;
3526
3527
3528 spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]);
3529
3530 *pos = set_bucket_offset(++bucket, 1);
3531
3532 return unix_get_first(seq, pos);
3533 }
3534
unix_seq_start(struct seq_file * seq,loff_t * pos)3535 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
3536 {
3537 if (!*pos)
3538 return SEQ_START_TOKEN;
3539
3540 return unix_get_first(seq, pos);
3541 }
3542
unix_seq_next(struct seq_file * seq,void * v,loff_t * pos)3543 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3544 {
3545 ++*pos;
3546
3547 if (v == SEQ_START_TOKEN)
3548 return unix_get_first(seq, pos);
3549
3550 return unix_get_next(seq, v, pos);
3551 }
3552
unix_seq_stop(struct seq_file * seq,void * v)3553 static void unix_seq_stop(struct seq_file *seq, void *v)
3554 {
3555 struct sock *sk = v;
3556
3557 if (sk)
3558 spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]);
3559 }
3560
unix_seq_show(struct seq_file * seq,void * v)3561 static int unix_seq_show(struct seq_file *seq, void *v)
3562 {
3563
3564 if (v == SEQ_START_TOKEN)
3565 seq_puts(seq, "Num RefCount Protocol Flags Type St "
3566 "Inode Path\n");
3567 else {
3568 struct sock *s = v;
3569 struct unix_sock *u = unix_sk(s);
3570 unix_state_lock(s);
3571
3572 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
3573 s,
3574 refcount_read(&s->sk_refcnt),
3575 0,
3576 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
3577 s->sk_type,
3578 s->sk_socket ?
3579 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
3580 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
3581 sock_i_ino(s));
3582
3583 if (u->addr) { // under a hash table lock here
3584 int i, len;
3585 seq_putc(seq, ' ');
3586
3587 i = 0;
3588 len = u->addr->len -
3589 offsetof(struct sockaddr_un, sun_path);
3590 if (u->addr->name->sun_path[0]) {
3591 len--;
3592 } else {
3593 seq_putc(seq, '@');
3594 i++;
3595 }
3596 for ( ; i < len; i++)
3597 seq_putc(seq, u->addr->name->sun_path[i] ?:
3598 '@');
3599 }
3600 unix_state_unlock(s);
3601 seq_putc(seq, '\n');
3602 }
3603
3604 return 0;
3605 }
3606
3607 static const struct seq_operations unix_seq_ops = {
3608 .start = unix_seq_start,
3609 .next = unix_seq_next,
3610 .stop = unix_seq_stop,
3611 .show = unix_seq_show,
3612 };
3613
3614 #ifdef CONFIG_BPF_SYSCALL
3615 struct bpf_unix_iter_state {
3616 struct seq_net_private p;
3617 unsigned int cur_sk;
3618 unsigned int end_sk;
3619 unsigned int max_sk;
3620 struct sock **batch;
3621 bool st_bucket_done;
3622 };
3623
3624 struct bpf_iter__unix {
3625 __bpf_md_ptr(struct bpf_iter_meta *, meta);
3626 __bpf_md_ptr(struct unix_sock *, unix_sk);
3627 uid_t uid __aligned(8);
3628 };
3629
unix_prog_seq_show(struct bpf_prog * prog,struct bpf_iter_meta * meta,struct unix_sock * unix_sk,uid_t uid)3630 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
3631 struct unix_sock *unix_sk, uid_t uid)
3632 {
3633 struct bpf_iter__unix ctx;
3634
3635 meta->seq_num--; /* skip SEQ_START_TOKEN */
3636 ctx.meta = meta;
3637 ctx.unix_sk = unix_sk;
3638 ctx.uid = uid;
3639 return bpf_iter_run_prog(prog, &ctx);
3640 }
3641
bpf_iter_unix_hold_batch(struct seq_file * seq,struct sock * start_sk)3642 static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
3643
3644 {
3645 struct bpf_unix_iter_state *iter = seq->private;
3646 unsigned int expected = 1;
3647 struct sock *sk;
3648
3649 sock_hold(start_sk);
3650 iter->batch[iter->end_sk++] = start_sk;
3651
3652 for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) {
3653 if (iter->end_sk < iter->max_sk) {
3654 sock_hold(sk);
3655 iter->batch[iter->end_sk++] = sk;
3656 }
3657
3658 expected++;
3659 }
3660
3661 spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]);
3662
3663 return expected;
3664 }
3665
bpf_iter_unix_put_batch(struct bpf_unix_iter_state * iter)3666 static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter)
3667 {
3668 while (iter->cur_sk < iter->end_sk)
3669 sock_put(iter->batch[iter->cur_sk++]);
3670 }
3671
bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state * iter,unsigned int new_batch_sz)3672 static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter,
3673 unsigned int new_batch_sz)
3674 {
3675 struct sock **new_batch;
3676
3677 new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
3678 GFP_USER | __GFP_NOWARN);
3679 if (!new_batch)
3680 return -ENOMEM;
3681
3682 bpf_iter_unix_put_batch(iter);
3683 kvfree(iter->batch);
3684 iter->batch = new_batch;
3685 iter->max_sk = new_batch_sz;
3686
3687 return 0;
3688 }
3689
bpf_iter_unix_batch(struct seq_file * seq,loff_t * pos)3690 static struct sock *bpf_iter_unix_batch(struct seq_file *seq,
3691 loff_t *pos)
3692 {
3693 struct bpf_unix_iter_state *iter = seq->private;
3694 unsigned int expected;
3695 bool resized = false;
3696 struct sock *sk;
3697
3698 if (iter->st_bucket_done)
3699 *pos = set_bucket_offset(get_bucket(*pos) + 1, 1);
3700
3701 again:
3702 /* Get a new batch */
3703 iter->cur_sk = 0;
3704 iter->end_sk = 0;
3705
3706 sk = unix_get_first(seq, pos);
3707 if (!sk)
3708 return NULL; /* Done */
3709
3710 expected = bpf_iter_unix_hold_batch(seq, sk);
3711
3712 if (iter->end_sk == expected) {
3713 iter->st_bucket_done = true;
3714 return sk;
3715 }
3716
3717 if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) {
3718 resized = true;
3719 goto again;
3720 }
3721
3722 return sk;
3723 }
3724
bpf_iter_unix_seq_start(struct seq_file * seq,loff_t * pos)3725 static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos)
3726 {
3727 if (!*pos)
3728 return SEQ_START_TOKEN;
3729
3730 /* bpf iter does not support lseek, so it always
3731 * continue from where it was stop()-ped.
3732 */
3733 return bpf_iter_unix_batch(seq, pos);
3734 }
3735
bpf_iter_unix_seq_next(struct seq_file * seq,void * v,loff_t * pos)3736 static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3737 {
3738 struct bpf_unix_iter_state *iter = seq->private;
3739 struct sock *sk;
3740
3741 /* Whenever seq_next() is called, the iter->cur_sk is
3742 * done with seq_show(), so advance to the next sk in
3743 * the batch.
3744 */
3745 if (iter->cur_sk < iter->end_sk)
3746 sock_put(iter->batch[iter->cur_sk++]);
3747
3748 ++*pos;
3749
3750 if (iter->cur_sk < iter->end_sk)
3751 sk = iter->batch[iter->cur_sk];
3752 else
3753 sk = bpf_iter_unix_batch(seq, pos);
3754
3755 return sk;
3756 }
3757
bpf_iter_unix_seq_show(struct seq_file * seq,void * v)3758 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
3759 {
3760 struct bpf_iter_meta meta;
3761 struct bpf_prog *prog;
3762 struct sock *sk = v;
3763 uid_t uid;
3764 bool slow;
3765 int ret;
3766
3767 if (v == SEQ_START_TOKEN)
3768 return 0;
3769
3770 slow = lock_sock_fast(sk);
3771
3772 if (unlikely(sk_unhashed(sk))) {
3773 ret = SEQ_SKIP;
3774 goto unlock;
3775 }
3776
3777 uid = from_kuid_munged(seq_user_ns(seq), sk_uid(sk));
3778 meta.seq = seq;
3779 prog = bpf_iter_get_info(&meta, false);
3780 ret = unix_prog_seq_show(prog, &meta, v, uid);
3781 unlock:
3782 unlock_sock_fast(sk, slow);
3783 return ret;
3784 }
3785
bpf_iter_unix_seq_stop(struct seq_file * seq,void * v)3786 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
3787 {
3788 struct bpf_unix_iter_state *iter = seq->private;
3789 struct bpf_iter_meta meta;
3790 struct bpf_prog *prog;
3791
3792 if (!v) {
3793 meta.seq = seq;
3794 prog = bpf_iter_get_info(&meta, true);
3795 if (prog)
3796 (void)unix_prog_seq_show(prog, &meta, v, 0);
3797 }
3798
3799 if (iter->cur_sk < iter->end_sk)
3800 bpf_iter_unix_put_batch(iter);
3801 }
3802
3803 static const struct seq_operations bpf_iter_unix_seq_ops = {
3804 .start = bpf_iter_unix_seq_start,
3805 .next = bpf_iter_unix_seq_next,
3806 .stop = bpf_iter_unix_seq_stop,
3807 .show = bpf_iter_unix_seq_show,
3808 };
3809 #endif
3810 #endif
3811
3812 static const struct net_proto_family unix_family_ops = {
3813 .family = PF_UNIX,
3814 .create = unix_create,
3815 .owner = THIS_MODULE,
3816 };
3817
3818
unix_net_init(struct net * net)3819 static int __net_init unix_net_init(struct net *net)
3820 {
3821 int i;
3822
3823 net->unx.sysctl_max_dgram_qlen = 10;
3824 if (unix_sysctl_register(net))
3825 goto out;
3826
3827 #ifdef CONFIG_PROC_FS
3828 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
3829 sizeof(struct seq_net_private)))
3830 goto err_sysctl;
3831 #endif
3832
3833 net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE,
3834 sizeof(spinlock_t), GFP_KERNEL);
3835 if (!net->unx.table.locks)
3836 goto err_proc;
3837
3838 net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE,
3839 sizeof(struct hlist_head),
3840 GFP_KERNEL);
3841 if (!net->unx.table.buckets)
3842 goto free_locks;
3843
3844 for (i = 0; i < UNIX_HASH_SIZE; i++) {
3845 spin_lock_init(&net->unx.table.locks[i]);
3846 lock_set_cmp_fn(&net->unx.table.locks[i], unix_table_lock_cmp_fn, NULL);
3847 INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
3848 }
3849
3850 return 0;
3851
3852 free_locks:
3853 kvfree(net->unx.table.locks);
3854 err_proc:
3855 #ifdef CONFIG_PROC_FS
3856 remove_proc_entry("unix", net->proc_net);
3857 err_sysctl:
3858 #endif
3859 unix_sysctl_unregister(net);
3860 out:
3861 return -ENOMEM;
3862 }
3863
unix_net_exit(struct net * net)3864 static void __net_exit unix_net_exit(struct net *net)
3865 {
3866 kvfree(net->unx.table.buckets);
3867 kvfree(net->unx.table.locks);
3868 unix_sysctl_unregister(net);
3869 remove_proc_entry("unix", net->proc_net);
3870 }
3871
3872 static struct pernet_operations unix_net_ops = {
3873 .init = unix_net_init,
3874 .exit = unix_net_exit,
3875 };
3876
3877 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
DEFINE_BPF_ITER_FUNC(unix,struct bpf_iter_meta * meta,struct unix_sock * unix_sk,uid_t uid)3878 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
3879 struct unix_sock *unix_sk, uid_t uid)
3880
3881 #define INIT_BATCH_SZ 16
3882
3883 static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux)
3884 {
3885 struct bpf_unix_iter_state *iter = priv_data;
3886 int err;
3887
3888 err = bpf_iter_init_seq_net(priv_data, aux);
3889 if (err)
3890 return err;
3891
3892 err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ);
3893 if (err) {
3894 bpf_iter_fini_seq_net(priv_data);
3895 return err;
3896 }
3897
3898 return 0;
3899 }
3900
bpf_iter_fini_unix(void * priv_data)3901 static void bpf_iter_fini_unix(void *priv_data)
3902 {
3903 struct bpf_unix_iter_state *iter = priv_data;
3904
3905 bpf_iter_fini_seq_net(priv_data);
3906 kvfree(iter->batch);
3907 }
3908
3909 static const struct bpf_iter_seq_info unix_seq_info = {
3910 .seq_ops = &bpf_iter_unix_seq_ops,
3911 .init_seq_private = bpf_iter_init_unix,
3912 .fini_seq_private = bpf_iter_fini_unix,
3913 .seq_priv_size = sizeof(struct bpf_unix_iter_state),
3914 };
3915
3916 static const struct bpf_func_proto *
bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,const struct bpf_prog * prog)3917 bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,
3918 const struct bpf_prog *prog)
3919 {
3920 switch (func_id) {
3921 case BPF_FUNC_setsockopt:
3922 return &bpf_sk_setsockopt_proto;
3923 case BPF_FUNC_getsockopt:
3924 return &bpf_sk_getsockopt_proto;
3925 default:
3926 return NULL;
3927 }
3928 }
3929
3930 static struct bpf_iter_reg unix_reg_info = {
3931 .target = "unix",
3932 .ctx_arg_info_size = 1,
3933 .ctx_arg_info = {
3934 { offsetof(struct bpf_iter__unix, unix_sk),
3935 PTR_TO_BTF_ID_OR_NULL },
3936 },
3937 .get_func_proto = bpf_iter_unix_get_func_proto,
3938 .seq_info = &unix_seq_info,
3939 };
3940
bpf_iter_register(void)3941 static void __init bpf_iter_register(void)
3942 {
3943 unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
3944 if (bpf_iter_reg_target(&unix_reg_info))
3945 pr_warn("Warning: could not register bpf iterator unix\n");
3946 }
3947 #endif
3948
af_unix_init(void)3949 static int __init af_unix_init(void)
3950 {
3951 int i, rc = -1;
3952
3953 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
3954
3955 for (i = 0; i < UNIX_HASH_SIZE / 2; i++) {
3956 spin_lock_init(&bsd_socket_locks[i]);
3957 INIT_HLIST_HEAD(&bsd_socket_buckets[i]);
3958 }
3959
3960 rc = proto_register(&unix_dgram_proto, 1);
3961 if (rc != 0) {
3962 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3963 goto out;
3964 }
3965
3966 rc = proto_register(&unix_stream_proto, 1);
3967 if (rc != 0) {
3968 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3969 proto_unregister(&unix_dgram_proto);
3970 goto out;
3971 }
3972
3973 sock_register(&unix_family_ops);
3974 register_pernet_subsys(&unix_net_ops);
3975 unix_bpf_build_proto();
3976
3977 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3978 bpf_iter_register();
3979 #endif
3980
3981 out:
3982 return rc;
3983 }
3984
3985 /* Later than subsys_initcall() because we depend on stuff initialised there */
3986 fs_initcall(af_unix_init);
3987