xref: /linux/net/core/sock.c (revision 173b0b5b0e865348684c02bd9cb1d22b5d46e458)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * INET		An implementation of the TCP/IP protocol suite for the LINUX
4  *		operating system.  INET is implemented using the  BSD Socket
5  *		interface as the means of communication with the user level.
6  *
7  *		Generic socket support routines. Memory allocators, socket lock/release
8  *		handler for protocols to use and generic option handler.
9  *
10  * Authors:	Ross Biro
11  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *		Florian La Roche, <flla@stud.uni-sb.de>
13  *		Alan Cox, <A.Cox@swansea.ac.uk>
14  *
15  * Fixes:
16  *		Alan Cox	: 	Numerous verify_area() problems
17  *		Alan Cox	:	Connecting on a connecting socket
18  *					now returns an error for tcp.
19  *		Alan Cox	:	sock->protocol is set correctly.
20  *					and is not sometimes left as 0.
21  *		Alan Cox	:	connect handles icmp errors on a
22  *					connect properly. Unfortunately there
23  *					is a restart syscall nasty there. I
24  *					can't match BSD without hacking the C
25  *					library. Ideas urgently sought!
26  *		Alan Cox	:	Disallow bind() to addresses that are
27  *					not ours - especially broadcast ones!!
28  *		Alan Cox	:	Socket 1024 _IS_ ok for users. (fencepost)
29  *		Alan Cox	:	sock_wfree/sock_rfree don't destroy sockets,
30  *					instead they leave that for the DESTROY timer.
31  *		Alan Cox	:	Clean up error flag in accept
32  *		Alan Cox	:	TCP ack handling is buggy, the DESTROY timer
33  *					was buggy. Put a remove_sock() in the handler
34  *					for memory when we hit 0. Also altered the timer
35  *					code. The ACK stuff can wait and needs major
36  *					TCP layer surgery.
37  *		Alan Cox	:	Fixed TCP ack bug, removed remove sock
38  *					and fixed timer/inet_bh race.
39  *		Alan Cox	:	Added zapped flag for TCP
40  *		Alan Cox	:	Move kfree_skb into skbuff.c and tidied up surplus code
41  *		Alan Cox	:	for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
42  *		Alan Cox	:	kfree_s calls now are kfree_skbmem so we can track skb resources
43  *		Alan Cox	:	Supports socket option broadcast now as does udp. Packet and raw need fixing.
44  *		Alan Cox	:	Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
45  *		Rick Sladkey	:	Relaxed UDP rules for matching packets.
46  *		C.E.Hawkins	:	IFF_PROMISC/SIOCGHWADDR support
47  *	Pauline Middelink	:	identd support
48  *		Alan Cox	:	Fixed connect() taking signals I think.
49  *		Alan Cox	:	SO_LINGER supported
50  *		Alan Cox	:	Error reporting fixes
51  *		Anonymous	:	inet_create tidied up (sk->reuse setting)
52  *		Alan Cox	:	inet sockets don't set sk->type!
53  *		Alan Cox	:	Split socket option code
54  *		Alan Cox	:	Callbacks
55  *		Alan Cox	:	Nagle flag for Charles & Johannes stuff
56  *		Alex		:	Removed restriction on inet fioctl
57  *		Alan Cox	:	Splitting INET from NET core
58  *		Alan Cox	:	Fixed bogus SO_TYPE handling in getsockopt()
59  *		Adam Caldwell	:	Missing return in SO_DONTROUTE/SO_DEBUG code
60  *		Alan Cox	:	Split IP from generic code
61  *		Alan Cox	:	New kfree_skbmem()
62  *		Alan Cox	:	Make SO_DEBUG superuser only.
63  *		Alan Cox	:	Allow anyone to clear SO_DEBUG
64  *					(compatibility fix)
65  *		Alan Cox	:	Added optimistic memory grabbing for AF_UNIX throughput.
66  *		Alan Cox	:	Allocator for a socket is settable.
67  *		Alan Cox	:	SO_ERROR includes soft errors.
68  *		Alan Cox	:	Allow NULL arguments on some SO_ opts
69  *		Alan Cox	: 	Generic socket allocation to make hooks
70  *					easier (suggested by Craig Metz).
71  *		Michael Pall	:	SO_ERROR returns positive errno again
72  *              Steve Whitehouse:       Added default destructor to free
73  *                                      protocol private data.
74  *              Steve Whitehouse:       Added various other default routines
75  *                                      common to several socket families.
76  *              Chris Evans     :       Call suser() check last on F_SETOWN
77  *		Jay Schulist	:	Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
78  *		Andi Kleen	:	Add sock_kmalloc()/sock_kfree_s()
79  *		Andi Kleen	:	Fix write_space callback
80  *		Chris Evans	:	Security fixes - signedness again
81  *		Arnaldo C. Melo :       cleanups, use skb_queue_purge
82  *
83  * To Fix:
84  */
85 
86 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
87 
88 #include <asm/unaligned.h>
89 #include <linux/capability.h>
90 #include <linux/errno.h>
91 #include <linux/errqueue.h>
92 #include <linux/types.h>
93 #include <linux/socket.h>
94 #include <linux/in.h>
95 #include <linux/kernel.h>
96 #include <linux/module.h>
97 #include <linux/proc_fs.h>
98 #include <linux/seq_file.h>
99 #include <linux/sched.h>
100 #include <linux/sched/mm.h>
101 #include <linux/timer.h>
102 #include <linux/string.h>
103 #include <linux/sockios.h>
104 #include <linux/net.h>
105 #include <linux/mm.h>
106 #include <linux/slab.h>
107 #include <linux/interrupt.h>
108 #include <linux/poll.h>
109 #include <linux/tcp.h>
110 #include <linux/udp.h>
111 #include <linux/init.h>
112 #include <linux/highmem.h>
113 #include <linux/user_namespace.h>
114 #include <linux/static_key.h>
115 #include <linux/memcontrol.h>
116 #include <linux/prefetch.h>
117 #include <linux/compat.h>
118 #include <linux/mroute.h>
119 #include <linux/mroute6.h>
120 #include <linux/icmpv6.h>
121 
122 #include <linux/uaccess.h>
123 
124 #include <linux/netdevice.h>
125 #include <net/protocol.h>
126 #include <linux/skbuff.h>
127 #include <net/net_namespace.h>
128 #include <net/request_sock.h>
129 #include <net/sock.h>
130 #include <linux/net_tstamp.h>
131 #include <net/xfrm.h>
132 #include <linux/ipsec.h>
133 #include <net/cls_cgroup.h>
134 #include <net/netprio_cgroup.h>
135 #include <linux/sock_diag.h>
136 
137 #include <linux/filter.h>
138 #include <net/sock_reuseport.h>
139 #include <net/bpf_sk_storage.h>
140 
141 #include <trace/events/sock.h>
142 
143 #include <net/tcp.h>
144 #include <net/busy_poll.h>
145 #include <net/phonet/phonet.h>
146 
147 #include <linux/ethtool.h>
148 
149 #include "dev.h"
150 
151 static DEFINE_MUTEX(proto_list_mutex);
152 static LIST_HEAD(proto_list);
153 
154 static void sock_def_write_space_wfree(struct sock *sk);
155 static void sock_def_write_space(struct sock *sk);
156 
157 /**
158  * sk_ns_capable - General socket capability test
159  * @sk: Socket to use a capability on or through
160  * @user_ns: The user namespace of the capability to use
161  * @cap: The capability to use
162  *
163  * Test to see if the opener of the socket had when the socket was
164  * created and the current process has the capability @cap in the user
165  * namespace @user_ns.
166  */
167 bool sk_ns_capable(const struct sock *sk,
168 		   struct user_namespace *user_ns, int cap)
169 {
170 	return file_ns_capable(sk->sk_socket->file, user_ns, cap) &&
171 		ns_capable(user_ns, cap);
172 }
173 EXPORT_SYMBOL(sk_ns_capable);
174 
175 /**
176  * sk_capable - Socket global capability test
177  * @sk: Socket to use a capability on or through
178  * @cap: The global capability to use
179  *
180  * Test to see if the opener of the socket had when the socket was
181  * created and the current process has the capability @cap in all user
182  * namespaces.
183  */
184 bool sk_capable(const struct sock *sk, int cap)
185 {
186 	return sk_ns_capable(sk, &init_user_ns, cap);
187 }
188 EXPORT_SYMBOL(sk_capable);
189 
190 /**
191  * sk_net_capable - Network namespace socket capability test
192  * @sk: Socket to use a capability on or through
193  * @cap: The capability to use
194  *
195  * Test to see if the opener of the socket had when the socket was created
196  * and the current process has the capability @cap over the network namespace
197  * the socket is a member of.
198  */
199 bool sk_net_capable(const struct sock *sk, int cap)
200 {
201 	return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
202 }
203 EXPORT_SYMBOL(sk_net_capable);
204 
205 /*
206  * Each address family might have different locking rules, so we have
207  * one slock key per address family and separate keys for internal and
208  * userspace sockets.
209  */
210 static struct lock_class_key af_family_keys[AF_MAX];
211 static struct lock_class_key af_family_kern_keys[AF_MAX];
212 static struct lock_class_key af_family_slock_keys[AF_MAX];
213 static struct lock_class_key af_family_kern_slock_keys[AF_MAX];
214 
215 /*
216  * Make lock validator output more readable. (we pre-construct these
217  * strings build-time, so that runtime initialization of socket
218  * locks is fast):
219  */
220 
221 #define _sock_locks(x)						  \
222   x "AF_UNSPEC",	x "AF_UNIX"     ,	x "AF_INET"     , \
223   x "AF_AX25"  ,	x "AF_IPX"      ,	x "AF_APPLETALK", \
224   x "AF_NETROM",	x "AF_BRIDGE"   ,	x "AF_ATMPVC"   , \
225   x "AF_X25"   ,	x "AF_INET6"    ,	x "AF_ROSE"     , \
226   x "AF_DECnet",	x "AF_NETBEUI"  ,	x "AF_SECURITY" , \
227   x "AF_KEY"   ,	x "AF_NETLINK"  ,	x "AF_PACKET"   , \
228   x "AF_ASH"   ,	x "AF_ECONET"   ,	x "AF_ATMSVC"   , \
229   x "AF_RDS"   ,	x "AF_SNA"      ,	x "AF_IRDA"     , \
230   x "AF_PPPOX" ,	x "AF_WANPIPE"  ,	x "AF_LLC"      , \
231   x "27"       ,	x "28"          ,	x "AF_CAN"      , \
232   x "AF_TIPC"  ,	x "AF_BLUETOOTH",	x "IUCV"        , \
233   x "AF_RXRPC" ,	x "AF_ISDN"     ,	x "AF_PHONET"   , \
234   x "AF_IEEE802154",	x "AF_CAIF"	,	x "AF_ALG"      , \
235   x "AF_NFC"   ,	x "AF_VSOCK"    ,	x "AF_KCM"      , \
236   x "AF_QIPCRTR",	x "AF_SMC"	,	x "AF_XDP"	, \
237   x "AF_MCTP"  , \
238   x "AF_MAX"
239 
240 static const char *const af_family_key_strings[AF_MAX+1] = {
241 	_sock_locks("sk_lock-")
242 };
243 static const char *const af_family_slock_key_strings[AF_MAX+1] = {
244 	_sock_locks("slock-")
245 };
246 static const char *const af_family_clock_key_strings[AF_MAX+1] = {
247 	_sock_locks("clock-")
248 };
249 
250 static const char *const af_family_kern_key_strings[AF_MAX+1] = {
251 	_sock_locks("k-sk_lock-")
252 };
253 static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = {
254 	_sock_locks("k-slock-")
255 };
256 static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = {
257 	_sock_locks("k-clock-")
258 };
259 static const char *const af_family_rlock_key_strings[AF_MAX+1] = {
260 	_sock_locks("rlock-")
261 };
262 static const char *const af_family_wlock_key_strings[AF_MAX+1] = {
263 	_sock_locks("wlock-")
264 };
265 static const char *const af_family_elock_key_strings[AF_MAX+1] = {
266 	_sock_locks("elock-")
267 };
268 
269 /*
270  * sk_callback_lock and sk queues locking rules are per-address-family,
271  * so split the lock classes by using a per-AF key:
272  */
273 static struct lock_class_key af_callback_keys[AF_MAX];
274 static struct lock_class_key af_rlock_keys[AF_MAX];
275 static struct lock_class_key af_wlock_keys[AF_MAX];
276 static struct lock_class_key af_elock_keys[AF_MAX];
277 static struct lock_class_key af_kern_callback_keys[AF_MAX];
278 
279 /* Run time adjustable parameters. */
280 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
281 EXPORT_SYMBOL(sysctl_wmem_max);
282 __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
283 EXPORT_SYMBOL(sysctl_rmem_max);
284 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
285 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
286 int sysctl_mem_pcpu_rsv __read_mostly = SK_MEMORY_PCPU_RESERVE;
287 
288 int sysctl_tstamp_allow_data __read_mostly = 1;
289 
290 DEFINE_STATIC_KEY_FALSE(memalloc_socks_key);
291 EXPORT_SYMBOL_GPL(memalloc_socks_key);
292 
293 /**
294  * sk_set_memalloc - sets %SOCK_MEMALLOC
295  * @sk: socket to set it on
296  *
297  * Set %SOCK_MEMALLOC on a socket for access to emergency reserves.
298  * It's the responsibility of the admin to adjust min_free_kbytes
299  * to meet the requirements
300  */
301 void sk_set_memalloc(struct sock *sk)
302 {
303 	sock_set_flag(sk, SOCK_MEMALLOC);
304 	sk->sk_allocation |= __GFP_MEMALLOC;
305 	static_branch_inc(&memalloc_socks_key);
306 }
307 EXPORT_SYMBOL_GPL(sk_set_memalloc);
308 
309 void sk_clear_memalloc(struct sock *sk)
310 {
311 	sock_reset_flag(sk, SOCK_MEMALLOC);
312 	sk->sk_allocation &= ~__GFP_MEMALLOC;
313 	static_branch_dec(&memalloc_socks_key);
314 
315 	/*
316 	 * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
317 	 * progress of swapping. SOCK_MEMALLOC may be cleared while
318 	 * it has rmem allocations due to the last swapfile being deactivated
319 	 * but there is a risk that the socket is unusable due to exceeding
320 	 * the rmem limits. Reclaim the reserves and obey rmem limits again.
321 	 */
322 	sk_mem_reclaim(sk);
323 }
324 EXPORT_SYMBOL_GPL(sk_clear_memalloc);
325 
326 int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
327 {
328 	int ret;
329 	unsigned int noreclaim_flag;
330 
331 	/* these should have been dropped before queueing */
332 	BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
333 
334 	noreclaim_flag = memalloc_noreclaim_save();
335 	ret = INDIRECT_CALL_INET(sk->sk_backlog_rcv,
336 				 tcp_v6_do_rcv,
337 				 tcp_v4_do_rcv,
338 				 sk, skb);
339 	memalloc_noreclaim_restore(noreclaim_flag);
340 
341 	return ret;
342 }
343 EXPORT_SYMBOL(__sk_backlog_rcv);
344 
345 void sk_error_report(struct sock *sk)
346 {
347 	sk->sk_error_report(sk);
348 
349 	switch (sk->sk_family) {
350 	case AF_INET:
351 		fallthrough;
352 	case AF_INET6:
353 		trace_inet_sk_error_report(sk);
354 		break;
355 	default:
356 		break;
357 	}
358 }
359 EXPORT_SYMBOL(sk_error_report);
360 
361 int sock_get_timeout(long timeo, void *optval, bool old_timeval)
362 {
363 	struct __kernel_sock_timeval tv;
364 
365 	if (timeo == MAX_SCHEDULE_TIMEOUT) {
366 		tv.tv_sec = 0;
367 		tv.tv_usec = 0;
368 	} else {
369 		tv.tv_sec = timeo / HZ;
370 		tv.tv_usec = ((timeo % HZ) * USEC_PER_SEC) / HZ;
371 	}
372 
373 	if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
374 		struct old_timeval32 tv32 = { tv.tv_sec, tv.tv_usec };
375 		*(struct old_timeval32 *)optval = tv32;
376 		return sizeof(tv32);
377 	}
378 
379 	if (old_timeval) {
380 		struct __kernel_old_timeval old_tv;
381 		old_tv.tv_sec = tv.tv_sec;
382 		old_tv.tv_usec = tv.tv_usec;
383 		*(struct __kernel_old_timeval *)optval = old_tv;
384 		return sizeof(old_tv);
385 	}
386 
387 	*(struct __kernel_sock_timeval *)optval = tv;
388 	return sizeof(tv);
389 }
390 EXPORT_SYMBOL(sock_get_timeout);
391 
392 int sock_copy_user_timeval(struct __kernel_sock_timeval *tv,
393 			   sockptr_t optval, int optlen, bool old_timeval)
394 {
395 	if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
396 		struct old_timeval32 tv32;
397 
398 		if (optlen < sizeof(tv32))
399 			return -EINVAL;
400 
401 		if (copy_from_sockptr(&tv32, optval, sizeof(tv32)))
402 			return -EFAULT;
403 		tv->tv_sec = tv32.tv_sec;
404 		tv->tv_usec = tv32.tv_usec;
405 	} else if (old_timeval) {
406 		struct __kernel_old_timeval old_tv;
407 
408 		if (optlen < sizeof(old_tv))
409 			return -EINVAL;
410 		if (copy_from_sockptr(&old_tv, optval, sizeof(old_tv)))
411 			return -EFAULT;
412 		tv->tv_sec = old_tv.tv_sec;
413 		tv->tv_usec = old_tv.tv_usec;
414 	} else {
415 		if (optlen < sizeof(*tv))
416 			return -EINVAL;
417 		if (copy_from_sockptr(tv, optval, sizeof(*tv)))
418 			return -EFAULT;
419 	}
420 
421 	return 0;
422 }
423 EXPORT_SYMBOL(sock_copy_user_timeval);
424 
425 static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
426 			    bool old_timeval)
427 {
428 	struct __kernel_sock_timeval tv;
429 	int err = sock_copy_user_timeval(&tv, optval, optlen, old_timeval);
430 	long val;
431 
432 	if (err)
433 		return err;
434 
435 	if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
436 		return -EDOM;
437 
438 	if (tv.tv_sec < 0) {
439 		static int warned __read_mostly;
440 
441 		WRITE_ONCE(*timeo_p, 0);
442 		if (warned < 10 && net_ratelimit()) {
443 			warned++;
444 			pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
445 				__func__, current->comm, task_pid_nr(current));
446 		}
447 		return 0;
448 	}
449 	val = MAX_SCHEDULE_TIMEOUT;
450 	if ((tv.tv_sec || tv.tv_usec) &&
451 	    (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)))
452 		val = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec,
453 						    USEC_PER_SEC / HZ);
454 	WRITE_ONCE(*timeo_p, val);
455 	return 0;
456 }
457 
458 static bool sock_needs_netstamp(const struct sock *sk)
459 {
460 	switch (sk->sk_family) {
461 	case AF_UNSPEC:
462 	case AF_UNIX:
463 		return false;
464 	default:
465 		return true;
466 	}
467 }
468 
469 static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
470 {
471 	if (sk->sk_flags & flags) {
472 		sk->sk_flags &= ~flags;
473 		if (sock_needs_netstamp(sk) &&
474 		    !(sk->sk_flags & SK_FLAGS_TIMESTAMP))
475 			net_disable_timestamp();
476 	}
477 }
478 
479 
480 int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
481 {
482 	unsigned long flags;
483 	struct sk_buff_head *list = &sk->sk_receive_queue;
484 
485 	if (atomic_read(&sk->sk_rmem_alloc) >= READ_ONCE(sk->sk_rcvbuf)) {
486 		atomic_inc(&sk->sk_drops);
487 		trace_sock_rcvqueue_full(sk, skb);
488 		return -ENOMEM;
489 	}
490 
491 	if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
492 		atomic_inc(&sk->sk_drops);
493 		return -ENOBUFS;
494 	}
495 
496 	skb->dev = NULL;
497 	skb_set_owner_r(skb, sk);
498 
499 	/* we escape from rcu protected region, make sure we dont leak
500 	 * a norefcounted dst
501 	 */
502 	skb_dst_force(skb);
503 
504 	spin_lock_irqsave(&list->lock, flags);
505 	sock_skb_set_dropcount(sk, skb);
506 	__skb_queue_tail(list, skb);
507 	spin_unlock_irqrestore(&list->lock, flags);
508 
509 	if (!sock_flag(sk, SOCK_DEAD))
510 		sk->sk_data_ready(sk);
511 	return 0;
512 }
513 EXPORT_SYMBOL(__sock_queue_rcv_skb);
514 
515 int sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb,
516 			      enum skb_drop_reason *reason)
517 {
518 	enum skb_drop_reason drop_reason;
519 	int err;
520 
521 	err = sk_filter(sk, skb);
522 	if (err) {
523 		drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
524 		goto out;
525 	}
526 	err = __sock_queue_rcv_skb(sk, skb);
527 	switch (err) {
528 	case -ENOMEM:
529 		drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF;
530 		break;
531 	case -ENOBUFS:
532 		drop_reason = SKB_DROP_REASON_PROTO_MEM;
533 		break;
534 	default:
535 		drop_reason = SKB_NOT_DROPPED_YET;
536 		break;
537 	}
538 out:
539 	if (reason)
540 		*reason = drop_reason;
541 	return err;
542 }
543 EXPORT_SYMBOL(sock_queue_rcv_skb_reason);
544 
545 int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
546 		     const int nested, unsigned int trim_cap, bool refcounted)
547 {
548 	int rc = NET_RX_SUCCESS;
549 
550 	if (sk_filter_trim_cap(sk, skb, trim_cap))
551 		goto discard_and_relse;
552 
553 	skb->dev = NULL;
554 
555 	if (sk_rcvqueues_full(sk, READ_ONCE(sk->sk_rcvbuf))) {
556 		atomic_inc(&sk->sk_drops);
557 		goto discard_and_relse;
558 	}
559 	if (nested)
560 		bh_lock_sock_nested(sk);
561 	else
562 		bh_lock_sock(sk);
563 	if (!sock_owned_by_user(sk)) {
564 		/*
565 		 * trylock + unlock semantics:
566 		 */
567 		mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
568 
569 		rc = sk_backlog_rcv(sk, skb);
570 
571 		mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
572 	} else if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) {
573 		bh_unlock_sock(sk);
574 		atomic_inc(&sk->sk_drops);
575 		goto discard_and_relse;
576 	}
577 
578 	bh_unlock_sock(sk);
579 out:
580 	if (refcounted)
581 		sock_put(sk);
582 	return rc;
583 discard_and_relse:
584 	kfree_skb(skb);
585 	goto out;
586 }
587 EXPORT_SYMBOL(__sk_receive_skb);
588 
589 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ip6_dst_check(struct dst_entry *,
590 							  u32));
591 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
592 							   u32));
593 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
594 {
595 	struct dst_entry *dst = __sk_dst_get(sk);
596 
597 	if (dst && dst->obsolete &&
598 	    INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check,
599 			       dst, cookie) == NULL) {
600 		sk_tx_queue_clear(sk);
601 		WRITE_ONCE(sk->sk_dst_pending_confirm, 0);
602 		RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
603 		dst_release(dst);
604 		return NULL;
605 	}
606 
607 	return dst;
608 }
609 EXPORT_SYMBOL(__sk_dst_check);
610 
611 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
612 {
613 	struct dst_entry *dst = sk_dst_get(sk);
614 
615 	if (dst && dst->obsolete &&
616 	    INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check,
617 			       dst, cookie) == NULL) {
618 		sk_dst_reset(sk);
619 		dst_release(dst);
620 		return NULL;
621 	}
622 
623 	return dst;
624 }
625 EXPORT_SYMBOL(sk_dst_check);
626 
627 static int sock_bindtoindex_locked(struct sock *sk, int ifindex)
628 {
629 	int ret = -ENOPROTOOPT;
630 #ifdef CONFIG_NETDEVICES
631 	struct net *net = sock_net(sk);
632 
633 	/* Sorry... */
634 	ret = -EPERM;
635 	if (sk->sk_bound_dev_if && !ns_capable(net->user_ns, CAP_NET_RAW))
636 		goto out;
637 
638 	ret = -EINVAL;
639 	if (ifindex < 0)
640 		goto out;
641 
642 	/* Paired with all READ_ONCE() done locklessly. */
643 	WRITE_ONCE(sk->sk_bound_dev_if, ifindex);
644 
645 	if (sk->sk_prot->rehash)
646 		sk->sk_prot->rehash(sk);
647 	sk_dst_reset(sk);
648 
649 	ret = 0;
650 
651 out:
652 #endif
653 
654 	return ret;
655 }
656 
657 int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk)
658 {
659 	int ret;
660 
661 	if (lock_sk)
662 		lock_sock(sk);
663 	ret = sock_bindtoindex_locked(sk, ifindex);
664 	if (lock_sk)
665 		release_sock(sk);
666 
667 	return ret;
668 }
669 EXPORT_SYMBOL(sock_bindtoindex);
670 
671 static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen)
672 {
673 	int ret = -ENOPROTOOPT;
674 #ifdef CONFIG_NETDEVICES
675 	struct net *net = sock_net(sk);
676 	char devname[IFNAMSIZ];
677 	int index;
678 
679 	ret = -EINVAL;
680 	if (optlen < 0)
681 		goto out;
682 
683 	/* Bind this socket to a particular device like "eth0",
684 	 * as specified in the passed interface name. If the
685 	 * name is "" or the option length is zero the socket
686 	 * is not bound.
687 	 */
688 	if (optlen > IFNAMSIZ - 1)
689 		optlen = IFNAMSIZ - 1;
690 	memset(devname, 0, sizeof(devname));
691 
692 	ret = -EFAULT;
693 	if (copy_from_sockptr(devname, optval, optlen))
694 		goto out;
695 
696 	index = 0;
697 	if (devname[0] != '\0') {
698 		struct net_device *dev;
699 
700 		rcu_read_lock();
701 		dev = dev_get_by_name_rcu(net, devname);
702 		if (dev)
703 			index = dev->ifindex;
704 		rcu_read_unlock();
705 		ret = -ENODEV;
706 		if (!dev)
707 			goto out;
708 	}
709 
710 	sockopt_lock_sock(sk);
711 	ret = sock_bindtoindex_locked(sk, index);
712 	sockopt_release_sock(sk);
713 out:
714 #endif
715 
716 	return ret;
717 }
718 
719 static int sock_getbindtodevice(struct sock *sk, sockptr_t optval,
720 				sockptr_t optlen, int len)
721 {
722 	int ret = -ENOPROTOOPT;
723 #ifdef CONFIG_NETDEVICES
724 	int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
725 	struct net *net = sock_net(sk);
726 	char devname[IFNAMSIZ];
727 
728 	if (bound_dev_if == 0) {
729 		len = 0;
730 		goto zero;
731 	}
732 
733 	ret = -EINVAL;
734 	if (len < IFNAMSIZ)
735 		goto out;
736 
737 	ret = netdev_get_name(net, devname, bound_dev_if);
738 	if (ret)
739 		goto out;
740 
741 	len = strlen(devname) + 1;
742 
743 	ret = -EFAULT;
744 	if (copy_to_sockptr(optval, devname, len))
745 		goto out;
746 
747 zero:
748 	ret = -EFAULT;
749 	if (copy_to_sockptr(optlen, &len, sizeof(int)))
750 		goto out;
751 
752 	ret = 0;
753 
754 out:
755 #endif
756 
757 	return ret;
758 }
759 
760 bool sk_mc_loop(const struct sock *sk)
761 {
762 	if (dev_recursion_level())
763 		return false;
764 	if (!sk)
765 		return true;
766 	/* IPV6_ADDRFORM can change sk->sk_family under us. */
767 	switch (READ_ONCE(sk->sk_family)) {
768 	case AF_INET:
769 		return inet_test_bit(MC_LOOP, sk);
770 #if IS_ENABLED(CONFIG_IPV6)
771 	case AF_INET6:
772 		return inet6_test_bit(MC6_LOOP, sk);
773 #endif
774 	}
775 	WARN_ON_ONCE(1);
776 	return true;
777 }
778 EXPORT_SYMBOL(sk_mc_loop);
779 
780 void sock_set_reuseaddr(struct sock *sk)
781 {
782 	lock_sock(sk);
783 	sk->sk_reuse = SK_CAN_REUSE;
784 	release_sock(sk);
785 }
786 EXPORT_SYMBOL(sock_set_reuseaddr);
787 
788 void sock_set_reuseport(struct sock *sk)
789 {
790 	lock_sock(sk);
791 	sk->sk_reuseport = true;
792 	release_sock(sk);
793 }
794 EXPORT_SYMBOL(sock_set_reuseport);
795 
796 void sock_no_linger(struct sock *sk)
797 {
798 	lock_sock(sk);
799 	WRITE_ONCE(sk->sk_lingertime, 0);
800 	sock_set_flag(sk, SOCK_LINGER);
801 	release_sock(sk);
802 }
803 EXPORT_SYMBOL(sock_no_linger);
804 
805 void sock_set_priority(struct sock *sk, u32 priority)
806 {
807 	WRITE_ONCE(sk->sk_priority, priority);
808 }
809 EXPORT_SYMBOL(sock_set_priority);
810 
811 void sock_set_sndtimeo(struct sock *sk, s64 secs)
812 {
813 	lock_sock(sk);
814 	if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1)
815 		WRITE_ONCE(sk->sk_sndtimeo, secs * HZ);
816 	else
817 		WRITE_ONCE(sk->sk_sndtimeo, MAX_SCHEDULE_TIMEOUT);
818 	release_sock(sk);
819 }
820 EXPORT_SYMBOL(sock_set_sndtimeo);
821 
822 static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns)
823 {
824 	if (val)  {
825 		sock_valbool_flag(sk, SOCK_TSTAMP_NEW, new);
826 		sock_valbool_flag(sk, SOCK_RCVTSTAMPNS, ns);
827 		sock_set_flag(sk, SOCK_RCVTSTAMP);
828 		sock_enable_timestamp(sk, SOCK_TIMESTAMP);
829 	} else {
830 		sock_reset_flag(sk, SOCK_RCVTSTAMP);
831 		sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
832 	}
833 }
834 
835 void sock_enable_timestamps(struct sock *sk)
836 {
837 	lock_sock(sk);
838 	__sock_set_timestamps(sk, true, false, true);
839 	release_sock(sk);
840 }
841 EXPORT_SYMBOL(sock_enable_timestamps);
842 
843 void sock_set_timestamp(struct sock *sk, int optname, bool valbool)
844 {
845 	switch (optname) {
846 	case SO_TIMESTAMP_OLD:
847 		__sock_set_timestamps(sk, valbool, false, false);
848 		break;
849 	case SO_TIMESTAMP_NEW:
850 		__sock_set_timestamps(sk, valbool, true, false);
851 		break;
852 	case SO_TIMESTAMPNS_OLD:
853 		__sock_set_timestamps(sk, valbool, false, true);
854 		break;
855 	case SO_TIMESTAMPNS_NEW:
856 		__sock_set_timestamps(sk, valbool, true, true);
857 		break;
858 	}
859 }
860 
861 static int sock_timestamping_bind_phc(struct sock *sk, int phc_index)
862 {
863 	struct net *net = sock_net(sk);
864 	struct net_device *dev = NULL;
865 	bool match = false;
866 	int *vclock_index;
867 	int i, num;
868 
869 	if (sk->sk_bound_dev_if)
870 		dev = dev_get_by_index(net, sk->sk_bound_dev_if);
871 
872 	if (!dev) {
873 		pr_err("%s: sock not bind to device\n", __func__);
874 		return -EOPNOTSUPP;
875 	}
876 
877 	num = ethtool_get_phc_vclocks(dev, &vclock_index);
878 	dev_put(dev);
879 
880 	for (i = 0; i < num; i++) {
881 		if (*(vclock_index + i) == phc_index) {
882 			match = true;
883 			break;
884 		}
885 	}
886 
887 	if (num > 0)
888 		kfree(vclock_index);
889 
890 	if (!match)
891 		return -EINVAL;
892 
893 	WRITE_ONCE(sk->sk_bind_phc, phc_index);
894 
895 	return 0;
896 }
897 
898 int sock_set_timestamping(struct sock *sk, int optname,
899 			  struct so_timestamping timestamping)
900 {
901 	int val = timestamping.flags;
902 	int ret;
903 
904 	if (val & ~SOF_TIMESTAMPING_MASK)
905 		return -EINVAL;
906 
907 	if (val & SOF_TIMESTAMPING_OPT_ID_TCP &&
908 	    !(val & SOF_TIMESTAMPING_OPT_ID))
909 		return -EINVAL;
910 
911 	if (val & SOF_TIMESTAMPING_OPT_ID &&
912 	    !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
913 		if (sk_is_tcp(sk)) {
914 			if ((1 << sk->sk_state) &
915 			    (TCPF_CLOSE | TCPF_LISTEN))
916 				return -EINVAL;
917 			if (val & SOF_TIMESTAMPING_OPT_ID_TCP)
918 				atomic_set(&sk->sk_tskey, tcp_sk(sk)->write_seq);
919 			else
920 				atomic_set(&sk->sk_tskey, tcp_sk(sk)->snd_una);
921 		} else {
922 			atomic_set(&sk->sk_tskey, 0);
923 		}
924 	}
925 
926 	if (val & SOF_TIMESTAMPING_OPT_STATS &&
927 	    !(val & SOF_TIMESTAMPING_OPT_TSONLY))
928 		return -EINVAL;
929 
930 	if (val & SOF_TIMESTAMPING_BIND_PHC) {
931 		ret = sock_timestamping_bind_phc(sk, timestamping.bind_phc);
932 		if (ret)
933 			return ret;
934 	}
935 
936 	WRITE_ONCE(sk->sk_tsflags, val);
937 	sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
938 
939 	if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
940 		sock_enable_timestamp(sk,
941 				      SOCK_TIMESTAMPING_RX_SOFTWARE);
942 	else
943 		sock_disable_timestamp(sk,
944 				       (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
945 	return 0;
946 }
947 
948 void sock_set_keepalive(struct sock *sk)
949 {
950 	lock_sock(sk);
951 	if (sk->sk_prot->keepalive)
952 		sk->sk_prot->keepalive(sk, true);
953 	sock_valbool_flag(sk, SOCK_KEEPOPEN, true);
954 	release_sock(sk);
955 }
956 EXPORT_SYMBOL(sock_set_keepalive);
957 
958 static void __sock_set_rcvbuf(struct sock *sk, int val)
959 {
960 	/* Ensure val * 2 fits into an int, to prevent max_t() from treating it
961 	 * as a negative value.
962 	 */
963 	val = min_t(int, val, INT_MAX / 2);
964 	sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
965 
966 	/* We double it on the way in to account for "struct sk_buff" etc.
967 	 * overhead.   Applications assume that the SO_RCVBUF setting they make
968 	 * will allow that much actual data to be received on that socket.
969 	 *
970 	 * Applications are unaware that "struct sk_buff" and other overheads
971 	 * allocate from the receive buffer during socket buffer allocation.
972 	 *
973 	 * And after considering the possible alternatives, returning the value
974 	 * we actually used in getsockopt is the most desirable behavior.
975 	 */
976 	WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF));
977 }
978 
979 void sock_set_rcvbuf(struct sock *sk, int val)
980 {
981 	lock_sock(sk);
982 	__sock_set_rcvbuf(sk, val);
983 	release_sock(sk);
984 }
985 EXPORT_SYMBOL(sock_set_rcvbuf);
986 
987 static void __sock_set_mark(struct sock *sk, u32 val)
988 {
989 	if (val != sk->sk_mark) {
990 		WRITE_ONCE(sk->sk_mark, val);
991 		sk_dst_reset(sk);
992 	}
993 }
994 
995 void sock_set_mark(struct sock *sk, u32 val)
996 {
997 	lock_sock(sk);
998 	__sock_set_mark(sk, val);
999 	release_sock(sk);
1000 }
1001 EXPORT_SYMBOL(sock_set_mark);
1002 
1003 static void sock_release_reserved_memory(struct sock *sk, int bytes)
1004 {
1005 	/* Round down bytes to multiple of pages */
1006 	bytes = round_down(bytes, PAGE_SIZE);
1007 
1008 	WARN_ON(bytes > sk->sk_reserved_mem);
1009 	WRITE_ONCE(sk->sk_reserved_mem, sk->sk_reserved_mem - bytes);
1010 	sk_mem_reclaim(sk);
1011 }
1012 
1013 static int sock_reserve_memory(struct sock *sk, int bytes)
1014 {
1015 	long allocated;
1016 	bool charged;
1017 	int pages;
1018 
1019 	if (!mem_cgroup_sockets_enabled || !sk->sk_memcg || !sk_has_account(sk))
1020 		return -EOPNOTSUPP;
1021 
1022 	if (!bytes)
1023 		return 0;
1024 
1025 	pages = sk_mem_pages(bytes);
1026 
1027 	/* pre-charge to memcg */
1028 	charged = mem_cgroup_charge_skmem(sk->sk_memcg, pages,
1029 					  GFP_KERNEL | __GFP_RETRY_MAYFAIL);
1030 	if (!charged)
1031 		return -ENOMEM;
1032 
1033 	/* pre-charge to forward_alloc */
1034 	sk_memory_allocated_add(sk, pages);
1035 	allocated = sk_memory_allocated(sk);
1036 	/* If the system goes into memory pressure with this
1037 	 * precharge, give up and return error.
1038 	 */
1039 	if (allocated > sk_prot_mem_limits(sk, 1)) {
1040 		sk_memory_allocated_sub(sk, pages);
1041 		mem_cgroup_uncharge_skmem(sk->sk_memcg, pages);
1042 		return -ENOMEM;
1043 	}
1044 	sk_forward_alloc_add(sk, pages << PAGE_SHIFT);
1045 
1046 	WRITE_ONCE(sk->sk_reserved_mem,
1047 		   sk->sk_reserved_mem + (pages << PAGE_SHIFT));
1048 
1049 	return 0;
1050 }
1051 
1052 void sockopt_lock_sock(struct sock *sk)
1053 {
1054 	/* When current->bpf_ctx is set, the setsockopt is called from
1055 	 * a bpf prog.  bpf has ensured the sk lock has been
1056 	 * acquired before calling setsockopt().
1057 	 */
1058 	if (has_current_bpf_ctx())
1059 		return;
1060 
1061 	lock_sock(sk);
1062 }
1063 EXPORT_SYMBOL(sockopt_lock_sock);
1064 
1065 void sockopt_release_sock(struct sock *sk)
1066 {
1067 	if (has_current_bpf_ctx())
1068 		return;
1069 
1070 	release_sock(sk);
1071 }
1072 EXPORT_SYMBOL(sockopt_release_sock);
1073 
1074 bool sockopt_ns_capable(struct user_namespace *ns, int cap)
1075 {
1076 	return has_current_bpf_ctx() || ns_capable(ns, cap);
1077 }
1078 EXPORT_SYMBOL(sockopt_ns_capable);
1079 
1080 bool sockopt_capable(int cap)
1081 {
1082 	return has_current_bpf_ctx() || capable(cap);
1083 }
1084 EXPORT_SYMBOL(sockopt_capable);
1085 
1086 /*
1087  *	This is meant for all protocols to use and covers goings on
1088  *	at the socket level. Everything here is generic.
1089  */
1090 
1091 int sk_setsockopt(struct sock *sk, int level, int optname,
1092 		  sockptr_t optval, unsigned int optlen)
1093 {
1094 	struct so_timestamping timestamping;
1095 	struct socket *sock = sk->sk_socket;
1096 	struct sock_txtime sk_txtime;
1097 	int val;
1098 	int valbool;
1099 	struct linger ling;
1100 	int ret = 0;
1101 
1102 	/*
1103 	 *	Options without arguments
1104 	 */
1105 
1106 	if (optname == SO_BINDTODEVICE)
1107 		return sock_setbindtodevice(sk, optval, optlen);
1108 
1109 	if (optlen < sizeof(int))
1110 		return -EINVAL;
1111 
1112 	if (copy_from_sockptr(&val, optval, sizeof(val)))
1113 		return -EFAULT;
1114 
1115 	valbool = val ? 1 : 0;
1116 
1117 	/* handle options which do not require locking the socket. */
1118 	switch (optname) {
1119 	case SO_PRIORITY:
1120 		if ((val >= 0 && val <= 6) ||
1121 		    sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) ||
1122 		    sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1123 			sock_set_priority(sk, val);
1124 			return 0;
1125 		}
1126 		return -EPERM;
1127 	case SO_PASSSEC:
1128 		assign_bit(SOCK_PASSSEC, &sock->flags, valbool);
1129 		return 0;
1130 	case SO_PASSCRED:
1131 		assign_bit(SOCK_PASSCRED, &sock->flags, valbool);
1132 		return 0;
1133 	case SO_PASSPIDFD:
1134 		assign_bit(SOCK_PASSPIDFD, &sock->flags, valbool);
1135 		return 0;
1136 	case SO_TYPE:
1137 	case SO_PROTOCOL:
1138 	case SO_DOMAIN:
1139 	case SO_ERROR:
1140 		return -ENOPROTOOPT;
1141 #ifdef CONFIG_NET_RX_BUSY_POLL
1142 	case SO_BUSY_POLL:
1143 		if (val < 0)
1144 			return -EINVAL;
1145 		WRITE_ONCE(sk->sk_ll_usec, val);
1146 		return 0;
1147 	case SO_PREFER_BUSY_POLL:
1148 		if (valbool && !sockopt_capable(CAP_NET_ADMIN))
1149 			return -EPERM;
1150 		WRITE_ONCE(sk->sk_prefer_busy_poll, valbool);
1151 		return 0;
1152 	case SO_BUSY_POLL_BUDGET:
1153 		if (val > READ_ONCE(sk->sk_busy_poll_budget) &&
1154 		    !sockopt_capable(CAP_NET_ADMIN))
1155 			return -EPERM;
1156 		if (val < 0 || val > U16_MAX)
1157 			return -EINVAL;
1158 		WRITE_ONCE(sk->sk_busy_poll_budget, val);
1159 		return 0;
1160 #endif
1161 	case SO_MAX_PACING_RATE:
1162 		{
1163 		unsigned long ulval = (val == ~0U) ? ~0UL : (unsigned int)val;
1164 		unsigned long pacing_rate;
1165 
1166 		if (sizeof(ulval) != sizeof(val) &&
1167 		    optlen >= sizeof(ulval) &&
1168 		    copy_from_sockptr(&ulval, optval, sizeof(ulval))) {
1169 			return -EFAULT;
1170 		}
1171 		if (ulval != ~0UL)
1172 			cmpxchg(&sk->sk_pacing_status,
1173 				SK_PACING_NONE,
1174 				SK_PACING_NEEDED);
1175 		/* Pairs with READ_ONCE() from sk_getsockopt() */
1176 		WRITE_ONCE(sk->sk_max_pacing_rate, ulval);
1177 		pacing_rate = READ_ONCE(sk->sk_pacing_rate);
1178 		if (ulval < pacing_rate)
1179 			WRITE_ONCE(sk->sk_pacing_rate, ulval);
1180 		return 0;
1181 		}
1182 	case SO_TXREHASH:
1183 		if (val < -1 || val > 1)
1184 			return -EINVAL;
1185 		if ((u8)val == SOCK_TXREHASH_DEFAULT)
1186 			val = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
1187 		/* Paired with READ_ONCE() in tcp_rtx_synack()
1188 		 * and sk_getsockopt().
1189 		 */
1190 		WRITE_ONCE(sk->sk_txrehash, (u8)val);
1191 		return 0;
1192 	case SO_PEEK_OFF:
1193 		{
1194 		int (*set_peek_off)(struct sock *sk, int val);
1195 
1196 		set_peek_off = READ_ONCE(sock->ops)->set_peek_off;
1197 		if (set_peek_off)
1198 			ret = set_peek_off(sk, val);
1199 		else
1200 			ret = -EOPNOTSUPP;
1201 		return ret;
1202 		}
1203 	}
1204 
1205 	sockopt_lock_sock(sk);
1206 
1207 	switch (optname) {
1208 	case SO_DEBUG:
1209 		if (val && !sockopt_capable(CAP_NET_ADMIN))
1210 			ret = -EACCES;
1211 		else
1212 			sock_valbool_flag(sk, SOCK_DBG, valbool);
1213 		break;
1214 	case SO_REUSEADDR:
1215 		sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
1216 		break;
1217 	case SO_REUSEPORT:
1218 		sk->sk_reuseport = valbool;
1219 		break;
1220 	case SO_DONTROUTE:
1221 		sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
1222 		sk_dst_reset(sk);
1223 		break;
1224 	case SO_BROADCAST:
1225 		sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
1226 		break;
1227 	case SO_SNDBUF:
1228 		/* Don't error on this BSD doesn't and if you think
1229 		 * about it this is right. Otherwise apps have to
1230 		 * play 'guess the biggest size' games. RCVBUF/SNDBUF
1231 		 * are treated in BSD as hints
1232 		 */
1233 		val = min_t(u32, val, READ_ONCE(sysctl_wmem_max));
1234 set_sndbuf:
1235 		/* Ensure val * 2 fits into an int, to prevent max_t()
1236 		 * from treating it as a negative value.
1237 		 */
1238 		val = min_t(int, val, INT_MAX / 2);
1239 		sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
1240 		WRITE_ONCE(sk->sk_sndbuf,
1241 			   max_t(int, val * 2, SOCK_MIN_SNDBUF));
1242 		/* Wake up sending tasks if we upped the value. */
1243 		sk->sk_write_space(sk);
1244 		break;
1245 
1246 	case SO_SNDBUFFORCE:
1247 		if (!sockopt_capable(CAP_NET_ADMIN)) {
1248 			ret = -EPERM;
1249 			break;
1250 		}
1251 
1252 		/* No negative values (to prevent underflow, as val will be
1253 		 * multiplied by 2).
1254 		 */
1255 		if (val < 0)
1256 			val = 0;
1257 		goto set_sndbuf;
1258 
1259 	case SO_RCVBUF:
1260 		/* Don't error on this BSD doesn't and if you think
1261 		 * about it this is right. Otherwise apps have to
1262 		 * play 'guess the biggest size' games. RCVBUF/SNDBUF
1263 		 * are treated in BSD as hints
1264 		 */
1265 		__sock_set_rcvbuf(sk, min_t(u32, val, READ_ONCE(sysctl_rmem_max)));
1266 		break;
1267 
1268 	case SO_RCVBUFFORCE:
1269 		if (!sockopt_capable(CAP_NET_ADMIN)) {
1270 			ret = -EPERM;
1271 			break;
1272 		}
1273 
1274 		/* No negative values (to prevent underflow, as val will be
1275 		 * multiplied by 2).
1276 		 */
1277 		__sock_set_rcvbuf(sk, max(val, 0));
1278 		break;
1279 
1280 	case SO_KEEPALIVE:
1281 		if (sk->sk_prot->keepalive)
1282 			sk->sk_prot->keepalive(sk, valbool);
1283 		sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
1284 		break;
1285 
1286 	case SO_OOBINLINE:
1287 		sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
1288 		break;
1289 
1290 	case SO_NO_CHECK:
1291 		sk->sk_no_check_tx = valbool;
1292 		break;
1293 
1294 	case SO_LINGER:
1295 		if (optlen < sizeof(ling)) {
1296 			ret = -EINVAL;	/* 1003.1g */
1297 			break;
1298 		}
1299 		if (copy_from_sockptr(&ling, optval, sizeof(ling))) {
1300 			ret = -EFAULT;
1301 			break;
1302 		}
1303 		if (!ling.l_onoff) {
1304 			sock_reset_flag(sk, SOCK_LINGER);
1305 		} else {
1306 			unsigned long t_sec = ling.l_linger;
1307 
1308 			if (t_sec >= MAX_SCHEDULE_TIMEOUT / HZ)
1309 				WRITE_ONCE(sk->sk_lingertime, MAX_SCHEDULE_TIMEOUT);
1310 			else
1311 				WRITE_ONCE(sk->sk_lingertime, t_sec * HZ);
1312 			sock_set_flag(sk, SOCK_LINGER);
1313 		}
1314 		break;
1315 
1316 	case SO_BSDCOMPAT:
1317 		break;
1318 
1319 	case SO_TIMESTAMP_OLD:
1320 	case SO_TIMESTAMP_NEW:
1321 	case SO_TIMESTAMPNS_OLD:
1322 	case SO_TIMESTAMPNS_NEW:
1323 		sock_set_timestamp(sk, optname, valbool);
1324 		break;
1325 
1326 	case SO_TIMESTAMPING_NEW:
1327 	case SO_TIMESTAMPING_OLD:
1328 		if (optlen == sizeof(timestamping)) {
1329 			if (copy_from_sockptr(&timestamping, optval,
1330 					      sizeof(timestamping))) {
1331 				ret = -EFAULT;
1332 				break;
1333 			}
1334 		} else {
1335 			memset(&timestamping, 0, sizeof(timestamping));
1336 			timestamping.flags = val;
1337 		}
1338 		ret = sock_set_timestamping(sk, optname, timestamping);
1339 		break;
1340 
1341 	case SO_RCVLOWAT:
1342 		{
1343 		int (*set_rcvlowat)(struct sock *sk, int val) = NULL;
1344 
1345 		if (val < 0)
1346 			val = INT_MAX;
1347 		if (sock)
1348 			set_rcvlowat = READ_ONCE(sock->ops)->set_rcvlowat;
1349 		if (set_rcvlowat)
1350 			ret = set_rcvlowat(sk, val);
1351 		else
1352 			WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
1353 		break;
1354 		}
1355 	case SO_RCVTIMEO_OLD:
1356 	case SO_RCVTIMEO_NEW:
1357 		ret = sock_set_timeout(&sk->sk_rcvtimeo, optval,
1358 				       optlen, optname == SO_RCVTIMEO_OLD);
1359 		break;
1360 
1361 	case SO_SNDTIMEO_OLD:
1362 	case SO_SNDTIMEO_NEW:
1363 		ret = sock_set_timeout(&sk->sk_sndtimeo, optval,
1364 				       optlen, optname == SO_SNDTIMEO_OLD);
1365 		break;
1366 
1367 	case SO_ATTACH_FILTER: {
1368 		struct sock_fprog fprog;
1369 
1370 		ret = copy_bpf_fprog_from_user(&fprog, optval, optlen);
1371 		if (!ret)
1372 			ret = sk_attach_filter(&fprog, sk);
1373 		break;
1374 	}
1375 	case SO_ATTACH_BPF:
1376 		ret = -EINVAL;
1377 		if (optlen == sizeof(u32)) {
1378 			u32 ufd;
1379 
1380 			ret = -EFAULT;
1381 			if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
1382 				break;
1383 
1384 			ret = sk_attach_bpf(ufd, sk);
1385 		}
1386 		break;
1387 
1388 	case SO_ATTACH_REUSEPORT_CBPF: {
1389 		struct sock_fprog fprog;
1390 
1391 		ret = copy_bpf_fprog_from_user(&fprog, optval, optlen);
1392 		if (!ret)
1393 			ret = sk_reuseport_attach_filter(&fprog, sk);
1394 		break;
1395 	}
1396 	case SO_ATTACH_REUSEPORT_EBPF:
1397 		ret = -EINVAL;
1398 		if (optlen == sizeof(u32)) {
1399 			u32 ufd;
1400 
1401 			ret = -EFAULT;
1402 			if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
1403 				break;
1404 
1405 			ret = sk_reuseport_attach_bpf(ufd, sk);
1406 		}
1407 		break;
1408 
1409 	case SO_DETACH_REUSEPORT_BPF:
1410 		ret = reuseport_detach_prog(sk);
1411 		break;
1412 
1413 	case SO_DETACH_FILTER:
1414 		ret = sk_detach_filter(sk);
1415 		break;
1416 
1417 	case SO_LOCK_FILTER:
1418 		if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
1419 			ret = -EPERM;
1420 		else
1421 			sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
1422 		break;
1423 
1424 	case SO_MARK:
1425 		if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
1426 		    !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1427 			ret = -EPERM;
1428 			break;
1429 		}
1430 
1431 		__sock_set_mark(sk, val);
1432 		break;
1433 	case SO_RCVMARK:
1434 		sock_valbool_flag(sk, SOCK_RCVMARK, valbool);
1435 		break;
1436 
1437 	case SO_RXQ_OVFL:
1438 		sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
1439 		break;
1440 
1441 	case SO_WIFI_STATUS:
1442 		sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
1443 		break;
1444 
1445 	case SO_NOFCS:
1446 		sock_valbool_flag(sk, SOCK_NOFCS, valbool);
1447 		break;
1448 
1449 	case SO_SELECT_ERR_QUEUE:
1450 		sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
1451 		break;
1452 
1453 
1454 	case SO_INCOMING_CPU:
1455 		reuseport_update_incoming_cpu(sk, val);
1456 		break;
1457 
1458 	case SO_CNX_ADVICE:
1459 		if (val == 1)
1460 			dst_negative_advice(sk);
1461 		break;
1462 
1463 	case SO_ZEROCOPY:
1464 		if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
1465 			if (!(sk_is_tcp(sk) ||
1466 			      (sk->sk_type == SOCK_DGRAM &&
1467 			       sk->sk_protocol == IPPROTO_UDP)))
1468 				ret = -EOPNOTSUPP;
1469 		} else if (sk->sk_family != PF_RDS) {
1470 			ret = -EOPNOTSUPP;
1471 		}
1472 		if (!ret) {
1473 			if (val < 0 || val > 1)
1474 				ret = -EINVAL;
1475 			else
1476 				sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
1477 		}
1478 		break;
1479 
1480 	case SO_TXTIME:
1481 		if (optlen != sizeof(struct sock_txtime)) {
1482 			ret = -EINVAL;
1483 			break;
1484 		} else if (copy_from_sockptr(&sk_txtime, optval,
1485 			   sizeof(struct sock_txtime))) {
1486 			ret = -EFAULT;
1487 			break;
1488 		} else if (sk_txtime.flags & ~SOF_TXTIME_FLAGS_MASK) {
1489 			ret = -EINVAL;
1490 			break;
1491 		}
1492 		/* CLOCK_MONOTONIC is only used by sch_fq, and this packet
1493 		 * scheduler has enough safe guards.
1494 		 */
1495 		if (sk_txtime.clockid != CLOCK_MONOTONIC &&
1496 		    !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1497 			ret = -EPERM;
1498 			break;
1499 		}
1500 		sock_valbool_flag(sk, SOCK_TXTIME, true);
1501 		sk->sk_clockid = sk_txtime.clockid;
1502 		sk->sk_txtime_deadline_mode =
1503 			!!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE);
1504 		sk->sk_txtime_report_errors =
1505 			!!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS);
1506 		break;
1507 
1508 	case SO_BINDTOIFINDEX:
1509 		ret = sock_bindtoindex_locked(sk, val);
1510 		break;
1511 
1512 	case SO_BUF_LOCK:
1513 		if (val & ~SOCK_BUF_LOCK_MASK) {
1514 			ret = -EINVAL;
1515 			break;
1516 		}
1517 		sk->sk_userlocks = val | (sk->sk_userlocks &
1518 					  ~SOCK_BUF_LOCK_MASK);
1519 		break;
1520 
1521 	case SO_RESERVE_MEM:
1522 	{
1523 		int delta;
1524 
1525 		if (val < 0) {
1526 			ret = -EINVAL;
1527 			break;
1528 		}
1529 
1530 		delta = val - sk->sk_reserved_mem;
1531 		if (delta < 0)
1532 			sock_release_reserved_memory(sk, -delta);
1533 		else
1534 			ret = sock_reserve_memory(sk, delta);
1535 		break;
1536 	}
1537 
1538 	default:
1539 		ret = -ENOPROTOOPT;
1540 		break;
1541 	}
1542 	sockopt_release_sock(sk);
1543 	return ret;
1544 }
1545 
1546 int sock_setsockopt(struct socket *sock, int level, int optname,
1547 		    sockptr_t optval, unsigned int optlen)
1548 {
1549 	return sk_setsockopt(sock->sk, level, optname,
1550 			     optval, optlen);
1551 }
1552 EXPORT_SYMBOL(sock_setsockopt);
1553 
1554 static const struct cred *sk_get_peer_cred(struct sock *sk)
1555 {
1556 	const struct cred *cred;
1557 
1558 	spin_lock(&sk->sk_peer_lock);
1559 	cred = get_cred(sk->sk_peer_cred);
1560 	spin_unlock(&sk->sk_peer_lock);
1561 
1562 	return cred;
1563 }
1564 
1565 static void cred_to_ucred(struct pid *pid, const struct cred *cred,
1566 			  struct ucred *ucred)
1567 {
1568 	ucred->pid = pid_vnr(pid);
1569 	ucred->uid = ucred->gid = -1;
1570 	if (cred) {
1571 		struct user_namespace *current_ns = current_user_ns();
1572 
1573 		ucred->uid = from_kuid_munged(current_ns, cred->euid);
1574 		ucred->gid = from_kgid_munged(current_ns, cred->egid);
1575 	}
1576 }
1577 
1578 static int groups_to_user(sockptr_t dst, const struct group_info *src)
1579 {
1580 	struct user_namespace *user_ns = current_user_ns();
1581 	int i;
1582 
1583 	for (i = 0; i < src->ngroups; i++) {
1584 		gid_t gid = from_kgid_munged(user_ns, src->gid[i]);
1585 
1586 		if (copy_to_sockptr_offset(dst, i * sizeof(gid), &gid, sizeof(gid)))
1587 			return -EFAULT;
1588 	}
1589 
1590 	return 0;
1591 }
1592 
1593 int sk_getsockopt(struct sock *sk, int level, int optname,
1594 		  sockptr_t optval, sockptr_t optlen)
1595 {
1596 	struct socket *sock = sk->sk_socket;
1597 
1598 	union {
1599 		int val;
1600 		u64 val64;
1601 		unsigned long ulval;
1602 		struct linger ling;
1603 		struct old_timeval32 tm32;
1604 		struct __kernel_old_timeval tm;
1605 		struct  __kernel_sock_timeval stm;
1606 		struct sock_txtime txtime;
1607 		struct so_timestamping timestamping;
1608 	} v;
1609 
1610 	int lv = sizeof(int);
1611 	int len;
1612 
1613 	if (copy_from_sockptr(&len, optlen, sizeof(int)))
1614 		return -EFAULT;
1615 	if (len < 0)
1616 		return -EINVAL;
1617 
1618 	memset(&v, 0, sizeof(v));
1619 
1620 	switch (optname) {
1621 	case SO_DEBUG:
1622 		v.val = sock_flag(sk, SOCK_DBG);
1623 		break;
1624 
1625 	case SO_DONTROUTE:
1626 		v.val = sock_flag(sk, SOCK_LOCALROUTE);
1627 		break;
1628 
1629 	case SO_BROADCAST:
1630 		v.val = sock_flag(sk, SOCK_BROADCAST);
1631 		break;
1632 
1633 	case SO_SNDBUF:
1634 		v.val = READ_ONCE(sk->sk_sndbuf);
1635 		break;
1636 
1637 	case SO_RCVBUF:
1638 		v.val = READ_ONCE(sk->sk_rcvbuf);
1639 		break;
1640 
1641 	case SO_REUSEADDR:
1642 		v.val = sk->sk_reuse;
1643 		break;
1644 
1645 	case SO_REUSEPORT:
1646 		v.val = sk->sk_reuseport;
1647 		break;
1648 
1649 	case SO_KEEPALIVE:
1650 		v.val = sock_flag(sk, SOCK_KEEPOPEN);
1651 		break;
1652 
1653 	case SO_TYPE:
1654 		v.val = sk->sk_type;
1655 		break;
1656 
1657 	case SO_PROTOCOL:
1658 		v.val = sk->sk_protocol;
1659 		break;
1660 
1661 	case SO_DOMAIN:
1662 		v.val = sk->sk_family;
1663 		break;
1664 
1665 	case SO_ERROR:
1666 		v.val = -sock_error(sk);
1667 		if (v.val == 0)
1668 			v.val = xchg(&sk->sk_err_soft, 0);
1669 		break;
1670 
1671 	case SO_OOBINLINE:
1672 		v.val = sock_flag(sk, SOCK_URGINLINE);
1673 		break;
1674 
1675 	case SO_NO_CHECK:
1676 		v.val = sk->sk_no_check_tx;
1677 		break;
1678 
1679 	case SO_PRIORITY:
1680 		v.val = READ_ONCE(sk->sk_priority);
1681 		break;
1682 
1683 	case SO_LINGER:
1684 		lv		= sizeof(v.ling);
1685 		v.ling.l_onoff	= sock_flag(sk, SOCK_LINGER);
1686 		v.ling.l_linger	= READ_ONCE(sk->sk_lingertime) / HZ;
1687 		break;
1688 
1689 	case SO_BSDCOMPAT:
1690 		break;
1691 
1692 	case SO_TIMESTAMP_OLD:
1693 		v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
1694 				!sock_flag(sk, SOCK_TSTAMP_NEW) &&
1695 				!sock_flag(sk, SOCK_RCVTSTAMPNS);
1696 		break;
1697 
1698 	case SO_TIMESTAMPNS_OLD:
1699 		v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && !sock_flag(sk, SOCK_TSTAMP_NEW);
1700 		break;
1701 
1702 	case SO_TIMESTAMP_NEW:
1703 		v.val = sock_flag(sk, SOCK_RCVTSTAMP) && sock_flag(sk, SOCK_TSTAMP_NEW);
1704 		break;
1705 
1706 	case SO_TIMESTAMPNS_NEW:
1707 		v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && sock_flag(sk, SOCK_TSTAMP_NEW);
1708 		break;
1709 
1710 	case SO_TIMESTAMPING_OLD:
1711 	case SO_TIMESTAMPING_NEW:
1712 		lv = sizeof(v.timestamping);
1713 		/* For the later-added case SO_TIMESTAMPING_NEW: Be strict about only
1714 		 * returning the flags when they were set through the same option.
1715 		 * Don't change the beviour for the old case SO_TIMESTAMPING_OLD.
1716 		 */
1717 		if (optname == SO_TIMESTAMPING_OLD || sock_flag(sk, SOCK_TSTAMP_NEW)) {
1718 			v.timestamping.flags = READ_ONCE(sk->sk_tsflags);
1719 			v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc);
1720 		}
1721 		break;
1722 
1723 	case SO_RCVTIMEO_OLD:
1724 	case SO_RCVTIMEO_NEW:
1725 		lv = sock_get_timeout(READ_ONCE(sk->sk_rcvtimeo), &v,
1726 				      SO_RCVTIMEO_OLD == optname);
1727 		break;
1728 
1729 	case SO_SNDTIMEO_OLD:
1730 	case SO_SNDTIMEO_NEW:
1731 		lv = sock_get_timeout(READ_ONCE(sk->sk_sndtimeo), &v,
1732 				      SO_SNDTIMEO_OLD == optname);
1733 		break;
1734 
1735 	case SO_RCVLOWAT:
1736 		v.val = READ_ONCE(sk->sk_rcvlowat);
1737 		break;
1738 
1739 	case SO_SNDLOWAT:
1740 		v.val = 1;
1741 		break;
1742 
1743 	case SO_PASSCRED:
1744 		v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
1745 		break;
1746 
1747 	case SO_PASSPIDFD:
1748 		v.val = !!test_bit(SOCK_PASSPIDFD, &sock->flags);
1749 		break;
1750 
1751 	case SO_PEERCRED:
1752 	{
1753 		struct ucred peercred;
1754 		if (len > sizeof(peercred))
1755 			len = sizeof(peercred);
1756 
1757 		spin_lock(&sk->sk_peer_lock);
1758 		cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
1759 		spin_unlock(&sk->sk_peer_lock);
1760 
1761 		if (copy_to_sockptr(optval, &peercred, len))
1762 			return -EFAULT;
1763 		goto lenout;
1764 	}
1765 
1766 	case SO_PEERPIDFD:
1767 	{
1768 		struct pid *peer_pid;
1769 		struct file *pidfd_file = NULL;
1770 		int pidfd;
1771 
1772 		if (len > sizeof(pidfd))
1773 			len = sizeof(pidfd);
1774 
1775 		spin_lock(&sk->sk_peer_lock);
1776 		peer_pid = get_pid(sk->sk_peer_pid);
1777 		spin_unlock(&sk->sk_peer_lock);
1778 
1779 		if (!peer_pid)
1780 			return -ENODATA;
1781 
1782 		pidfd = pidfd_prepare(peer_pid, 0, &pidfd_file);
1783 		put_pid(peer_pid);
1784 		if (pidfd < 0)
1785 			return pidfd;
1786 
1787 		if (copy_to_sockptr(optval, &pidfd, len) ||
1788 		    copy_to_sockptr(optlen, &len, sizeof(int))) {
1789 			put_unused_fd(pidfd);
1790 			fput(pidfd_file);
1791 
1792 			return -EFAULT;
1793 		}
1794 
1795 		fd_install(pidfd, pidfd_file);
1796 		return 0;
1797 	}
1798 
1799 	case SO_PEERGROUPS:
1800 	{
1801 		const struct cred *cred;
1802 		int ret, n;
1803 
1804 		cred = sk_get_peer_cred(sk);
1805 		if (!cred)
1806 			return -ENODATA;
1807 
1808 		n = cred->group_info->ngroups;
1809 		if (len < n * sizeof(gid_t)) {
1810 			len = n * sizeof(gid_t);
1811 			put_cred(cred);
1812 			return copy_to_sockptr(optlen, &len, sizeof(int)) ? -EFAULT : -ERANGE;
1813 		}
1814 		len = n * sizeof(gid_t);
1815 
1816 		ret = groups_to_user(optval, cred->group_info);
1817 		put_cred(cred);
1818 		if (ret)
1819 			return ret;
1820 		goto lenout;
1821 	}
1822 
1823 	case SO_PEERNAME:
1824 	{
1825 		struct sockaddr_storage address;
1826 
1827 		lv = READ_ONCE(sock->ops)->getname(sock, (struct sockaddr *)&address, 2);
1828 		if (lv < 0)
1829 			return -ENOTCONN;
1830 		if (lv < len)
1831 			return -EINVAL;
1832 		if (copy_to_sockptr(optval, &address, len))
1833 			return -EFAULT;
1834 		goto lenout;
1835 	}
1836 
1837 	/* Dubious BSD thing... Probably nobody even uses it, but
1838 	 * the UNIX standard wants it for whatever reason... -DaveM
1839 	 */
1840 	case SO_ACCEPTCONN:
1841 		v.val = sk->sk_state == TCP_LISTEN;
1842 		break;
1843 
1844 	case SO_PASSSEC:
1845 		v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
1846 		break;
1847 
1848 	case SO_PEERSEC:
1849 		return security_socket_getpeersec_stream(sock,
1850 							 optval, optlen, len);
1851 
1852 	case SO_MARK:
1853 		v.val = READ_ONCE(sk->sk_mark);
1854 		break;
1855 
1856 	case SO_RCVMARK:
1857 		v.val = sock_flag(sk, SOCK_RCVMARK);
1858 		break;
1859 
1860 	case SO_RXQ_OVFL:
1861 		v.val = sock_flag(sk, SOCK_RXQ_OVFL);
1862 		break;
1863 
1864 	case SO_WIFI_STATUS:
1865 		v.val = sock_flag(sk, SOCK_WIFI_STATUS);
1866 		break;
1867 
1868 	case SO_PEEK_OFF:
1869 		if (!READ_ONCE(sock->ops)->set_peek_off)
1870 			return -EOPNOTSUPP;
1871 
1872 		v.val = READ_ONCE(sk->sk_peek_off);
1873 		break;
1874 	case SO_NOFCS:
1875 		v.val = sock_flag(sk, SOCK_NOFCS);
1876 		break;
1877 
1878 	case SO_BINDTODEVICE:
1879 		return sock_getbindtodevice(sk, optval, optlen, len);
1880 
1881 	case SO_GET_FILTER:
1882 		len = sk_get_filter(sk, optval, len);
1883 		if (len < 0)
1884 			return len;
1885 
1886 		goto lenout;
1887 
1888 	case SO_LOCK_FILTER:
1889 		v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1890 		break;
1891 
1892 	case SO_BPF_EXTENSIONS:
1893 		v.val = bpf_tell_extensions();
1894 		break;
1895 
1896 	case SO_SELECT_ERR_QUEUE:
1897 		v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
1898 		break;
1899 
1900 #ifdef CONFIG_NET_RX_BUSY_POLL
1901 	case SO_BUSY_POLL:
1902 		v.val = READ_ONCE(sk->sk_ll_usec);
1903 		break;
1904 	case SO_PREFER_BUSY_POLL:
1905 		v.val = READ_ONCE(sk->sk_prefer_busy_poll);
1906 		break;
1907 #endif
1908 
1909 	case SO_MAX_PACING_RATE:
1910 		/* The READ_ONCE() pair with the WRITE_ONCE() in sk_setsockopt() */
1911 		if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) {
1912 			lv = sizeof(v.ulval);
1913 			v.ulval = READ_ONCE(sk->sk_max_pacing_rate);
1914 		} else {
1915 			/* 32bit version */
1916 			v.val = min_t(unsigned long, ~0U,
1917 				      READ_ONCE(sk->sk_max_pacing_rate));
1918 		}
1919 		break;
1920 
1921 	case SO_INCOMING_CPU:
1922 		v.val = READ_ONCE(sk->sk_incoming_cpu);
1923 		break;
1924 
1925 	case SO_MEMINFO:
1926 	{
1927 		u32 meminfo[SK_MEMINFO_VARS];
1928 
1929 		sk_get_meminfo(sk, meminfo);
1930 
1931 		len = min_t(unsigned int, len, sizeof(meminfo));
1932 		if (copy_to_sockptr(optval, &meminfo, len))
1933 			return -EFAULT;
1934 
1935 		goto lenout;
1936 	}
1937 
1938 #ifdef CONFIG_NET_RX_BUSY_POLL
1939 	case SO_INCOMING_NAPI_ID:
1940 		v.val = READ_ONCE(sk->sk_napi_id);
1941 
1942 		/* aggregate non-NAPI IDs down to 0 */
1943 		if (v.val < MIN_NAPI_ID)
1944 			v.val = 0;
1945 
1946 		break;
1947 #endif
1948 
1949 	case SO_COOKIE:
1950 		lv = sizeof(u64);
1951 		if (len < lv)
1952 			return -EINVAL;
1953 		v.val64 = sock_gen_cookie(sk);
1954 		break;
1955 
1956 	case SO_ZEROCOPY:
1957 		v.val = sock_flag(sk, SOCK_ZEROCOPY);
1958 		break;
1959 
1960 	case SO_TXTIME:
1961 		lv = sizeof(v.txtime);
1962 		v.txtime.clockid = sk->sk_clockid;
1963 		v.txtime.flags |= sk->sk_txtime_deadline_mode ?
1964 				  SOF_TXTIME_DEADLINE_MODE : 0;
1965 		v.txtime.flags |= sk->sk_txtime_report_errors ?
1966 				  SOF_TXTIME_REPORT_ERRORS : 0;
1967 		break;
1968 
1969 	case SO_BINDTOIFINDEX:
1970 		v.val = READ_ONCE(sk->sk_bound_dev_if);
1971 		break;
1972 
1973 	case SO_NETNS_COOKIE:
1974 		lv = sizeof(u64);
1975 		if (len != lv)
1976 			return -EINVAL;
1977 		v.val64 = sock_net(sk)->net_cookie;
1978 		break;
1979 
1980 	case SO_BUF_LOCK:
1981 		v.val = sk->sk_userlocks & SOCK_BUF_LOCK_MASK;
1982 		break;
1983 
1984 	case SO_RESERVE_MEM:
1985 		v.val = READ_ONCE(sk->sk_reserved_mem);
1986 		break;
1987 
1988 	case SO_TXREHASH:
1989 		/* Paired with WRITE_ONCE() in sk_setsockopt() */
1990 		v.val = READ_ONCE(sk->sk_txrehash);
1991 		break;
1992 
1993 	default:
1994 		/* We implement the SO_SNDLOWAT etc to not be settable
1995 		 * (1003.1g 7).
1996 		 */
1997 		return -ENOPROTOOPT;
1998 	}
1999 
2000 	if (len > lv)
2001 		len = lv;
2002 	if (copy_to_sockptr(optval, &v, len))
2003 		return -EFAULT;
2004 lenout:
2005 	if (copy_to_sockptr(optlen, &len, sizeof(int)))
2006 		return -EFAULT;
2007 	return 0;
2008 }
2009 
2010 /*
2011  * Initialize an sk_lock.
2012  *
2013  * (We also register the sk_lock with the lock validator.)
2014  */
2015 static inline void sock_lock_init(struct sock *sk)
2016 {
2017 	if (sk->sk_kern_sock)
2018 		sock_lock_init_class_and_name(
2019 			sk,
2020 			af_family_kern_slock_key_strings[sk->sk_family],
2021 			af_family_kern_slock_keys + sk->sk_family,
2022 			af_family_kern_key_strings[sk->sk_family],
2023 			af_family_kern_keys + sk->sk_family);
2024 	else
2025 		sock_lock_init_class_and_name(
2026 			sk,
2027 			af_family_slock_key_strings[sk->sk_family],
2028 			af_family_slock_keys + sk->sk_family,
2029 			af_family_key_strings[sk->sk_family],
2030 			af_family_keys + sk->sk_family);
2031 }
2032 
2033 /*
2034  * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
2035  * even temporarly, because of RCU lookups. sk_node should also be left as is.
2036  * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
2037  */
2038 static void sock_copy(struct sock *nsk, const struct sock *osk)
2039 {
2040 	const struct proto *prot = READ_ONCE(osk->sk_prot);
2041 #ifdef CONFIG_SECURITY_NETWORK
2042 	void *sptr = nsk->sk_security;
2043 #endif
2044 
2045 	/* If we move sk_tx_queue_mapping out of the private section,
2046 	 * we must check if sk_tx_queue_clear() is called after
2047 	 * sock_copy() in sk_clone_lock().
2048 	 */
2049 	BUILD_BUG_ON(offsetof(struct sock, sk_tx_queue_mapping) <
2050 		     offsetof(struct sock, sk_dontcopy_begin) ||
2051 		     offsetof(struct sock, sk_tx_queue_mapping) >=
2052 		     offsetof(struct sock, sk_dontcopy_end));
2053 
2054 	memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
2055 
2056 	unsafe_memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
2057 		      prot->obj_size - offsetof(struct sock, sk_dontcopy_end),
2058 		      /* alloc is larger than struct, see sk_prot_alloc() */);
2059 
2060 #ifdef CONFIG_SECURITY_NETWORK
2061 	nsk->sk_security = sptr;
2062 	security_sk_clone(osk, nsk);
2063 #endif
2064 }
2065 
2066 static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
2067 		int family)
2068 {
2069 	struct sock *sk;
2070 	struct kmem_cache *slab;
2071 
2072 	slab = prot->slab;
2073 	if (slab != NULL) {
2074 		sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
2075 		if (!sk)
2076 			return sk;
2077 		if (want_init_on_alloc(priority))
2078 			sk_prot_clear_nulls(sk, prot->obj_size);
2079 	} else
2080 		sk = kmalloc(prot->obj_size, priority);
2081 
2082 	if (sk != NULL) {
2083 		if (security_sk_alloc(sk, family, priority))
2084 			goto out_free;
2085 
2086 		if (!try_module_get(prot->owner))
2087 			goto out_free_sec;
2088 	}
2089 
2090 	return sk;
2091 
2092 out_free_sec:
2093 	security_sk_free(sk);
2094 out_free:
2095 	if (slab != NULL)
2096 		kmem_cache_free(slab, sk);
2097 	else
2098 		kfree(sk);
2099 	return NULL;
2100 }
2101 
2102 static void sk_prot_free(struct proto *prot, struct sock *sk)
2103 {
2104 	struct kmem_cache *slab;
2105 	struct module *owner;
2106 
2107 	owner = prot->owner;
2108 	slab = prot->slab;
2109 
2110 	cgroup_sk_free(&sk->sk_cgrp_data);
2111 	mem_cgroup_sk_free(sk);
2112 	security_sk_free(sk);
2113 	if (slab != NULL)
2114 		kmem_cache_free(slab, sk);
2115 	else
2116 		kfree(sk);
2117 	module_put(owner);
2118 }
2119 
2120 /**
2121  *	sk_alloc - All socket objects are allocated here
2122  *	@net: the applicable net namespace
2123  *	@family: protocol family
2124  *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
2125  *	@prot: struct proto associated with this new sock instance
2126  *	@kern: is this to be a kernel socket?
2127  */
2128 struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
2129 		      struct proto *prot, int kern)
2130 {
2131 	struct sock *sk;
2132 
2133 	sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
2134 	if (sk) {
2135 		sk->sk_family = family;
2136 		/*
2137 		 * See comment in struct sock definition to understand
2138 		 * why we need sk_prot_creator -acme
2139 		 */
2140 		sk->sk_prot = sk->sk_prot_creator = prot;
2141 		sk->sk_kern_sock = kern;
2142 		sock_lock_init(sk);
2143 		sk->sk_net_refcnt = kern ? 0 : 1;
2144 		if (likely(sk->sk_net_refcnt)) {
2145 			get_net_track(net, &sk->ns_tracker, priority);
2146 			sock_inuse_add(net, 1);
2147 		} else {
2148 			__netns_tracker_alloc(net, &sk->ns_tracker,
2149 					      false, priority);
2150 		}
2151 
2152 		sock_net_set(sk, net);
2153 		refcount_set(&sk->sk_wmem_alloc, 1);
2154 
2155 		mem_cgroup_sk_alloc(sk);
2156 		cgroup_sk_alloc(&sk->sk_cgrp_data);
2157 		sock_update_classid(&sk->sk_cgrp_data);
2158 		sock_update_netprioidx(&sk->sk_cgrp_data);
2159 		sk_tx_queue_clear(sk);
2160 	}
2161 
2162 	return sk;
2163 }
2164 EXPORT_SYMBOL(sk_alloc);
2165 
2166 /* Sockets having SOCK_RCU_FREE will call this function after one RCU
2167  * grace period. This is the case for UDP sockets and TCP listeners.
2168  */
2169 static void __sk_destruct(struct rcu_head *head)
2170 {
2171 	struct sock *sk = container_of(head, struct sock, sk_rcu);
2172 	struct sk_filter *filter;
2173 
2174 	if (sk->sk_destruct)
2175 		sk->sk_destruct(sk);
2176 
2177 	filter = rcu_dereference_check(sk->sk_filter,
2178 				       refcount_read(&sk->sk_wmem_alloc) == 0);
2179 	if (filter) {
2180 		sk_filter_uncharge(sk, filter);
2181 		RCU_INIT_POINTER(sk->sk_filter, NULL);
2182 	}
2183 
2184 	sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
2185 
2186 #ifdef CONFIG_BPF_SYSCALL
2187 	bpf_sk_storage_free(sk);
2188 #endif
2189 
2190 	if (atomic_read(&sk->sk_omem_alloc))
2191 		pr_debug("%s: optmem leakage (%d bytes) detected\n",
2192 			 __func__, atomic_read(&sk->sk_omem_alloc));
2193 
2194 	if (sk->sk_frag.page) {
2195 		put_page(sk->sk_frag.page);
2196 		sk->sk_frag.page = NULL;
2197 	}
2198 
2199 	/* We do not need to acquire sk->sk_peer_lock, we are the last user. */
2200 	put_cred(sk->sk_peer_cred);
2201 	put_pid(sk->sk_peer_pid);
2202 
2203 	if (likely(sk->sk_net_refcnt))
2204 		put_net_track(sock_net(sk), &sk->ns_tracker);
2205 	else
2206 		__netns_tracker_free(sock_net(sk), &sk->ns_tracker, false);
2207 
2208 	sk_prot_free(sk->sk_prot_creator, sk);
2209 }
2210 
2211 void sk_destruct(struct sock *sk)
2212 {
2213 	bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE);
2214 
2215 	if (rcu_access_pointer(sk->sk_reuseport_cb)) {
2216 		reuseport_detach_sock(sk);
2217 		use_call_rcu = true;
2218 	}
2219 
2220 	if (use_call_rcu)
2221 		call_rcu(&sk->sk_rcu, __sk_destruct);
2222 	else
2223 		__sk_destruct(&sk->sk_rcu);
2224 }
2225 
2226 static void __sk_free(struct sock *sk)
2227 {
2228 	if (likely(sk->sk_net_refcnt))
2229 		sock_inuse_add(sock_net(sk), -1);
2230 
2231 	if (unlikely(sk->sk_net_refcnt && sock_diag_has_destroy_listeners(sk)))
2232 		sock_diag_broadcast_destroy(sk);
2233 	else
2234 		sk_destruct(sk);
2235 }
2236 
2237 void sk_free(struct sock *sk)
2238 {
2239 	/*
2240 	 * We subtract one from sk_wmem_alloc and can know if
2241 	 * some packets are still in some tx queue.
2242 	 * If not null, sock_wfree() will call __sk_free(sk) later
2243 	 */
2244 	if (refcount_dec_and_test(&sk->sk_wmem_alloc))
2245 		__sk_free(sk);
2246 }
2247 EXPORT_SYMBOL(sk_free);
2248 
2249 static void sk_init_common(struct sock *sk)
2250 {
2251 	skb_queue_head_init(&sk->sk_receive_queue);
2252 	skb_queue_head_init(&sk->sk_write_queue);
2253 	skb_queue_head_init(&sk->sk_error_queue);
2254 
2255 	rwlock_init(&sk->sk_callback_lock);
2256 	lockdep_set_class_and_name(&sk->sk_receive_queue.lock,
2257 			af_rlock_keys + sk->sk_family,
2258 			af_family_rlock_key_strings[sk->sk_family]);
2259 	lockdep_set_class_and_name(&sk->sk_write_queue.lock,
2260 			af_wlock_keys + sk->sk_family,
2261 			af_family_wlock_key_strings[sk->sk_family]);
2262 	lockdep_set_class_and_name(&sk->sk_error_queue.lock,
2263 			af_elock_keys + sk->sk_family,
2264 			af_family_elock_key_strings[sk->sk_family]);
2265 	lockdep_set_class_and_name(&sk->sk_callback_lock,
2266 			af_callback_keys + sk->sk_family,
2267 			af_family_clock_key_strings[sk->sk_family]);
2268 }
2269 
2270 /**
2271  *	sk_clone_lock - clone a socket, and lock its clone
2272  *	@sk: the socket to clone
2273  *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
2274  *
2275  *	Caller must unlock socket even in error path (bh_unlock_sock(newsk))
2276  */
2277 struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
2278 {
2279 	struct proto *prot = READ_ONCE(sk->sk_prot);
2280 	struct sk_filter *filter;
2281 	bool is_charged = true;
2282 	struct sock *newsk;
2283 
2284 	newsk = sk_prot_alloc(prot, priority, sk->sk_family);
2285 	if (!newsk)
2286 		goto out;
2287 
2288 	sock_copy(newsk, sk);
2289 
2290 	newsk->sk_prot_creator = prot;
2291 
2292 	/* SANITY */
2293 	if (likely(newsk->sk_net_refcnt)) {
2294 		get_net_track(sock_net(newsk), &newsk->ns_tracker, priority);
2295 		sock_inuse_add(sock_net(newsk), 1);
2296 	} else {
2297 		/* Kernel sockets are not elevating the struct net refcount.
2298 		 * Instead, use a tracker to more easily detect if a layer
2299 		 * is not properly dismantling its kernel sockets at netns
2300 		 * destroy time.
2301 		 */
2302 		__netns_tracker_alloc(sock_net(newsk), &newsk->ns_tracker,
2303 				      false, priority);
2304 	}
2305 	sk_node_init(&newsk->sk_node);
2306 	sock_lock_init(newsk);
2307 	bh_lock_sock(newsk);
2308 	newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
2309 	newsk->sk_backlog.len = 0;
2310 
2311 	atomic_set(&newsk->sk_rmem_alloc, 0);
2312 
2313 	/* sk_wmem_alloc set to one (see sk_free() and sock_wfree()) */
2314 	refcount_set(&newsk->sk_wmem_alloc, 1);
2315 
2316 	atomic_set(&newsk->sk_omem_alloc, 0);
2317 	sk_init_common(newsk);
2318 
2319 	newsk->sk_dst_cache	= NULL;
2320 	newsk->sk_dst_pending_confirm = 0;
2321 	newsk->sk_wmem_queued	= 0;
2322 	newsk->sk_forward_alloc = 0;
2323 	newsk->sk_reserved_mem  = 0;
2324 	atomic_set(&newsk->sk_drops, 0);
2325 	newsk->sk_send_head	= NULL;
2326 	newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
2327 	atomic_set(&newsk->sk_zckey, 0);
2328 
2329 	sock_reset_flag(newsk, SOCK_DONE);
2330 
2331 	/* sk->sk_memcg will be populated at accept() time */
2332 	newsk->sk_memcg = NULL;
2333 
2334 	cgroup_sk_clone(&newsk->sk_cgrp_data);
2335 
2336 	rcu_read_lock();
2337 	filter = rcu_dereference(sk->sk_filter);
2338 	if (filter != NULL)
2339 		/* though it's an empty new sock, the charging may fail
2340 		 * if sysctl_optmem_max was changed between creation of
2341 		 * original socket and cloning
2342 		 */
2343 		is_charged = sk_filter_charge(newsk, filter);
2344 	RCU_INIT_POINTER(newsk->sk_filter, filter);
2345 	rcu_read_unlock();
2346 
2347 	if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
2348 		/* We need to make sure that we don't uncharge the new
2349 		 * socket if we couldn't charge it in the first place
2350 		 * as otherwise we uncharge the parent's filter.
2351 		 */
2352 		if (!is_charged)
2353 			RCU_INIT_POINTER(newsk->sk_filter, NULL);
2354 		sk_free_unlock_clone(newsk);
2355 		newsk = NULL;
2356 		goto out;
2357 	}
2358 	RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
2359 
2360 	if (bpf_sk_storage_clone(sk, newsk)) {
2361 		sk_free_unlock_clone(newsk);
2362 		newsk = NULL;
2363 		goto out;
2364 	}
2365 
2366 	/* Clear sk_user_data if parent had the pointer tagged
2367 	 * as not suitable for copying when cloning.
2368 	 */
2369 	if (sk_user_data_is_nocopy(newsk))
2370 		newsk->sk_user_data = NULL;
2371 
2372 	newsk->sk_err	   = 0;
2373 	newsk->sk_err_soft = 0;
2374 	newsk->sk_priority = 0;
2375 	newsk->sk_incoming_cpu = raw_smp_processor_id();
2376 
2377 	/* Before updating sk_refcnt, we must commit prior changes to memory
2378 	 * (Documentation/RCU/rculist_nulls.rst for details)
2379 	 */
2380 	smp_wmb();
2381 	refcount_set(&newsk->sk_refcnt, 2);
2382 
2383 	sk_set_socket(newsk, NULL);
2384 	sk_tx_queue_clear(newsk);
2385 	RCU_INIT_POINTER(newsk->sk_wq, NULL);
2386 
2387 	if (newsk->sk_prot->sockets_allocated)
2388 		sk_sockets_allocated_inc(newsk);
2389 
2390 	if (sock_needs_netstamp(sk) && newsk->sk_flags & SK_FLAGS_TIMESTAMP)
2391 		net_enable_timestamp();
2392 out:
2393 	return newsk;
2394 }
2395 EXPORT_SYMBOL_GPL(sk_clone_lock);
2396 
2397 void sk_free_unlock_clone(struct sock *sk)
2398 {
2399 	/* It is still raw copy of parent, so invalidate
2400 	 * destructor and make plain sk_free() */
2401 	sk->sk_destruct = NULL;
2402 	bh_unlock_sock(sk);
2403 	sk_free(sk);
2404 }
2405 EXPORT_SYMBOL_GPL(sk_free_unlock_clone);
2406 
2407 static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst)
2408 {
2409 	bool is_ipv6 = false;
2410 	u32 max_size;
2411 
2412 #if IS_ENABLED(CONFIG_IPV6)
2413 	is_ipv6 = (sk->sk_family == AF_INET6 &&
2414 		   !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr));
2415 #endif
2416 	/* pairs with the WRITE_ONCE() in netif_set_gso(_ipv4)_max_size() */
2417 	max_size = is_ipv6 ? READ_ONCE(dst->dev->gso_max_size) :
2418 			READ_ONCE(dst->dev->gso_ipv4_max_size);
2419 	if (max_size > GSO_LEGACY_MAX_SIZE && !sk_is_tcp(sk))
2420 		max_size = GSO_LEGACY_MAX_SIZE;
2421 
2422 	return max_size - (MAX_TCP_HEADER + 1);
2423 }
2424 
2425 void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
2426 {
2427 	u32 max_segs = 1;
2428 
2429 	sk->sk_route_caps = dst->dev->features;
2430 	if (sk_is_tcp(sk))
2431 		sk->sk_route_caps |= NETIF_F_GSO;
2432 	if (sk->sk_route_caps & NETIF_F_GSO)
2433 		sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
2434 	if (unlikely(sk->sk_gso_disabled))
2435 		sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
2436 	if (sk_can_gso(sk)) {
2437 		if (dst->header_len && !xfrm_dst_offload_ok(dst)) {
2438 			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
2439 		} else {
2440 			sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
2441 			sk->sk_gso_max_size = sk_dst_gso_max_size(sk, dst);
2442 			/* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */
2443 			max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1);
2444 		}
2445 	}
2446 	sk->sk_gso_max_segs = max_segs;
2447 	sk_dst_set(sk, dst);
2448 }
2449 EXPORT_SYMBOL_GPL(sk_setup_caps);
2450 
2451 /*
2452  *	Simple resource managers for sockets.
2453  */
2454 
2455 
2456 /*
2457  * Write buffer destructor automatically called from kfree_skb.
2458  */
2459 void sock_wfree(struct sk_buff *skb)
2460 {
2461 	struct sock *sk = skb->sk;
2462 	unsigned int len = skb->truesize;
2463 	bool free;
2464 
2465 	if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
2466 		if (sock_flag(sk, SOCK_RCU_FREE) &&
2467 		    sk->sk_write_space == sock_def_write_space) {
2468 			rcu_read_lock();
2469 			free = refcount_sub_and_test(len, &sk->sk_wmem_alloc);
2470 			sock_def_write_space_wfree(sk);
2471 			rcu_read_unlock();
2472 			if (unlikely(free))
2473 				__sk_free(sk);
2474 			return;
2475 		}
2476 
2477 		/*
2478 		 * Keep a reference on sk_wmem_alloc, this will be released
2479 		 * after sk_write_space() call
2480 		 */
2481 		WARN_ON(refcount_sub_and_test(len - 1, &sk->sk_wmem_alloc));
2482 		sk->sk_write_space(sk);
2483 		len = 1;
2484 	}
2485 	/*
2486 	 * if sk_wmem_alloc reaches 0, we must finish what sk_free()
2487 	 * could not do because of in-flight packets
2488 	 */
2489 	if (refcount_sub_and_test(len, &sk->sk_wmem_alloc))
2490 		__sk_free(sk);
2491 }
2492 EXPORT_SYMBOL(sock_wfree);
2493 
2494 /* This variant of sock_wfree() is used by TCP,
2495  * since it sets SOCK_USE_WRITE_QUEUE.
2496  */
2497 void __sock_wfree(struct sk_buff *skb)
2498 {
2499 	struct sock *sk = skb->sk;
2500 
2501 	if (refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc))
2502 		__sk_free(sk);
2503 }
2504 
2505 void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
2506 {
2507 	skb_orphan(skb);
2508 	skb->sk = sk;
2509 #ifdef CONFIG_INET
2510 	if (unlikely(!sk_fullsock(sk))) {
2511 		skb->destructor = sock_edemux;
2512 		sock_hold(sk);
2513 		return;
2514 	}
2515 #endif
2516 	skb->destructor = sock_wfree;
2517 	skb_set_hash_from_sk(skb, sk);
2518 	/*
2519 	 * We used to take a refcount on sk, but following operation
2520 	 * is enough to guarantee sk_free() wont free this sock until
2521 	 * all in-flight packets are completed
2522 	 */
2523 	refcount_add(skb->truesize, &sk->sk_wmem_alloc);
2524 }
2525 EXPORT_SYMBOL(skb_set_owner_w);
2526 
2527 static bool can_skb_orphan_partial(const struct sk_buff *skb)
2528 {
2529 #ifdef CONFIG_TLS_DEVICE
2530 	/* Drivers depend on in-order delivery for crypto offload,
2531 	 * partial orphan breaks out-of-order-OK logic.
2532 	 */
2533 	if (skb->decrypted)
2534 		return false;
2535 #endif
2536 	return (skb->destructor == sock_wfree ||
2537 		(IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree));
2538 }
2539 
2540 /* This helper is used by netem, as it can hold packets in its
2541  * delay queue. We want to allow the owner socket to send more
2542  * packets, as if they were already TX completed by a typical driver.
2543  * But we also want to keep skb->sk set because some packet schedulers
2544  * rely on it (sch_fq for example).
2545  */
2546 void skb_orphan_partial(struct sk_buff *skb)
2547 {
2548 	if (skb_is_tcp_pure_ack(skb))
2549 		return;
2550 
2551 	if (can_skb_orphan_partial(skb) && skb_set_owner_sk_safe(skb, skb->sk))
2552 		return;
2553 
2554 	skb_orphan(skb);
2555 }
2556 EXPORT_SYMBOL(skb_orphan_partial);
2557 
2558 /*
2559  * Read buffer destructor automatically called from kfree_skb.
2560  */
2561 void sock_rfree(struct sk_buff *skb)
2562 {
2563 	struct sock *sk = skb->sk;
2564 	unsigned int len = skb->truesize;
2565 
2566 	atomic_sub(len, &sk->sk_rmem_alloc);
2567 	sk_mem_uncharge(sk, len);
2568 }
2569 EXPORT_SYMBOL(sock_rfree);
2570 
2571 /*
2572  * Buffer destructor for skbs that are not used directly in read or write
2573  * path, e.g. for error handler skbs. Automatically called from kfree_skb.
2574  */
2575 void sock_efree(struct sk_buff *skb)
2576 {
2577 	sock_put(skb->sk);
2578 }
2579 EXPORT_SYMBOL(sock_efree);
2580 
2581 /* Buffer destructor for prefetch/receive path where reference count may
2582  * not be held, e.g. for listen sockets.
2583  */
2584 #ifdef CONFIG_INET
2585 void sock_pfree(struct sk_buff *skb)
2586 {
2587 	struct sock *sk = skb->sk;
2588 
2589 	if (!sk_is_refcounted(sk))
2590 		return;
2591 
2592 	if (sk->sk_state == TCP_NEW_SYN_RECV && inet_reqsk(sk)->syncookie) {
2593 		inet_reqsk(sk)->rsk_listener = NULL;
2594 		reqsk_free(inet_reqsk(sk));
2595 		return;
2596 	}
2597 
2598 	sock_gen_put(sk);
2599 }
2600 EXPORT_SYMBOL(sock_pfree);
2601 #endif /* CONFIG_INET */
2602 
2603 kuid_t sock_i_uid(struct sock *sk)
2604 {
2605 	kuid_t uid;
2606 
2607 	read_lock_bh(&sk->sk_callback_lock);
2608 	uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
2609 	read_unlock_bh(&sk->sk_callback_lock);
2610 	return uid;
2611 }
2612 EXPORT_SYMBOL(sock_i_uid);
2613 
2614 unsigned long __sock_i_ino(struct sock *sk)
2615 {
2616 	unsigned long ino;
2617 
2618 	read_lock(&sk->sk_callback_lock);
2619 	ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
2620 	read_unlock(&sk->sk_callback_lock);
2621 	return ino;
2622 }
2623 EXPORT_SYMBOL(__sock_i_ino);
2624 
2625 unsigned long sock_i_ino(struct sock *sk)
2626 {
2627 	unsigned long ino;
2628 
2629 	local_bh_disable();
2630 	ino = __sock_i_ino(sk);
2631 	local_bh_enable();
2632 	return ino;
2633 }
2634 EXPORT_SYMBOL(sock_i_ino);
2635 
2636 /*
2637  * Allocate a skb from the socket's send buffer.
2638  */
2639 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
2640 			     gfp_t priority)
2641 {
2642 	if (force ||
2643 	    refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) {
2644 		struct sk_buff *skb = alloc_skb(size, priority);
2645 
2646 		if (skb) {
2647 			skb_set_owner_w(skb, sk);
2648 			return skb;
2649 		}
2650 	}
2651 	return NULL;
2652 }
2653 EXPORT_SYMBOL(sock_wmalloc);
2654 
2655 static void sock_ofree(struct sk_buff *skb)
2656 {
2657 	struct sock *sk = skb->sk;
2658 
2659 	atomic_sub(skb->truesize, &sk->sk_omem_alloc);
2660 }
2661 
2662 struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
2663 			     gfp_t priority)
2664 {
2665 	struct sk_buff *skb;
2666 
2667 	/* small safe race: SKB_TRUESIZE may differ from final skb->truesize */
2668 	if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
2669 	    READ_ONCE(sock_net(sk)->core.sysctl_optmem_max))
2670 		return NULL;
2671 
2672 	skb = alloc_skb(size, priority);
2673 	if (!skb)
2674 		return NULL;
2675 
2676 	atomic_add(skb->truesize, &sk->sk_omem_alloc);
2677 	skb->sk = sk;
2678 	skb->destructor = sock_ofree;
2679 	return skb;
2680 }
2681 
2682 /*
2683  * Allocate a memory block from the socket's option memory buffer.
2684  */
2685 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
2686 {
2687 	int optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
2688 
2689 	if ((unsigned int)size <= optmem_max &&
2690 	    atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
2691 		void *mem;
2692 		/* First do the add, to avoid the race if kmalloc
2693 		 * might sleep.
2694 		 */
2695 		atomic_add(size, &sk->sk_omem_alloc);
2696 		mem = kmalloc(size, priority);
2697 		if (mem)
2698 			return mem;
2699 		atomic_sub(size, &sk->sk_omem_alloc);
2700 	}
2701 	return NULL;
2702 }
2703 EXPORT_SYMBOL(sock_kmalloc);
2704 
2705 /* Free an option memory block. Note, we actually want the inline
2706  * here as this allows gcc to detect the nullify and fold away the
2707  * condition entirely.
2708  */
2709 static inline void __sock_kfree_s(struct sock *sk, void *mem, int size,
2710 				  const bool nullify)
2711 {
2712 	if (WARN_ON_ONCE(!mem))
2713 		return;
2714 	if (nullify)
2715 		kfree_sensitive(mem);
2716 	else
2717 		kfree(mem);
2718 	atomic_sub(size, &sk->sk_omem_alloc);
2719 }
2720 
2721 void sock_kfree_s(struct sock *sk, void *mem, int size)
2722 {
2723 	__sock_kfree_s(sk, mem, size, false);
2724 }
2725 EXPORT_SYMBOL(sock_kfree_s);
2726 
2727 void sock_kzfree_s(struct sock *sk, void *mem, int size)
2728 {
2729 	__sock_kfree_s(sk, mem, size, true);
2730 }
2731 EXPORT_SYMBOL(sock_kzfree_s);
2732 
2733 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
2734    I think, these locks should be removed for datagram sockets.
2735  */
2736 static long sock_wait_for_wmem(struct sock *sk, long timeo)
2737 {
2738 	DEFINE_WAIT(wait);
2739 
2740 	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2741 	for (;;) {
2742 		if (!timeo)
2743 			break;
2744 		if (signal_pending(current))
2745 			break;
2746 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
2747 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2748 		if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf))
2749 			break;
2750 		if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
2751 			break;
2752 		if (READ_ONCE(sk->sk_err))
2753 			break;
2754 		timeo = schedule_timeout(timeo);
2755 	}
2756 	finish_wait(sk_sleep(sk), &wait);
2757 	return timeo;
2758 }
2759 
2760 
2761 /*
2762  *	Generic send/receive buffer handlers
2763  */
2764 
2765 struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
2766 				     unsigned long data_len, int noblock,
2767 				     int *errcode, int max_page_order)
2768 {
2769 	struct sk_buff *skb;
2770 	long timeo;
2771 	int err;
2772 
2773 	timeo = sock_sndtimeo(sk, noblock);
2774 	for (;;) {
2775 		err = sock_error(sk);
2776 		if (err != 0)
2777 			goto failure;
2778 
2779 		err = -EPIPE;
2780 		if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
2781 			goto failure;
2782 
2783 		if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf))
2784 			break;
2785 
2786 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2787 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
2788 		err = -EAGAIN;
2789 		if (!timeo)
2790 			goto failure;
2791 		if (signal_pending(current))
2792 			goto interrupted;
2793 		timeo = sock_wait_for_wmem(sk, timeo);
2794 	}
2795 	skb = alloc_skb_with_frags(header_len, data_len, max_page_order,
2796 				   errcode, sk->sk_allocation);
2797 	if (skb)
2798 		skb_set_owner_w(skb, sk);
2799 	return skb;
2800 
2801 interrupted:
2802 	err = sock_intr_errno(timeo);
2803 failure:
2804 	*errcode = err;
2805 	return NULL;
2806 }
2807 EXPORT_SYMBOL(sock_alloc_send_pskb);
2808 
2809 int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
2810 		     struct sockcm_cookie *sockc)
2811 {
2812 	u32 tsflags;
2813 
2814 	switch (cmsg->cmsg_type) {
2815 	case SO_MARK:
2816 		if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
2817 		    !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2818 			return -EPERM;
2819 		if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
2820 			return -EINVAL;
2821 		sockc->mark = *(u32 *)CMSG_DATA(cmsg);
2822 		break;
2823 	case SO_TIMESTAMPING_OLD:
2824 	case SO_TIMESTAMPING_NEW:
2825 		if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
2826 			return -EINVAL;
2827 
2828 		tsflags = *(u32 *)CMSG_DATA(cmsg);
2829 		if (tsflags & ~SOF_TIMESTAMPING_TX_RECORD_MASK)
2830 			return -EINVAL;
2831 
2832 		sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK;
2833 		sockc->tsflags |= tsflags;
2834 		break;
2835 	case SCM_TXTIME:
2836 		if (!sock_flag(sk, SOCK_TXTIME))
2837 			return -EINVAL;
2838 		if (cmsg->cmsg_len != CMSG_LEN(sizeof(u64)))
2839 			return -EINVAL;
2840 		sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg));
2841 		break;
2842 	/* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */
2843 	case SCM_RIGHTS:
2844 	case SCM_CREDENTIALS:
2845 		break;
2846 	default:
2847 		return -EINVAL;
2848 	}
2849 	return 0;
2850 }
2851 EXPORT_SYMBOL(__sock_cmsg_send);
2852 
2853 int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
2854 		   struct sockcm_cookie *sockc)
2855 {
2856 	struct cmsghdr *cmsg;
2857 	int ret;
2858 
2859 	for_each_cmsghdr(cmsg, msg) {
2860 		if (!CMSG_OK(msg, cmsg))
2861 			return -EINVAL;
2862 		if (cmsg->cmsg_level != SOL_SOCKET)
2863 			continue;
2864 		ret = __sock_cmsg_send(sk, cmsg, sockc);
2865 		if (ret)
2866 			return ret;
2867 	}
2868 	return 0;
2869 }
2870 EXPORT_SYMBOL(sock_cmsg_send);
2871 
2872 static void sk_enter_memory_pressure(struct sock *sk)
2873 {
2874 	if (!sk->sk_prot->enter_memory_pressure)
2875 		return;
2876 
2877 	sk->sk_prot->enter_memory_pressure(sk);
2878 }
2879 
2880 static void sk_leave_memory_pressure(struct sock *sk)
2881 {
2882 	if (sk->sk_prot->leave_memory_pressure) {
2883 		INDIRECT_CALL_INET_1(sk->sk_prot->leave_memory_pressure,
2884 				     tcp_leave_memory_pressure, sk);
2885 	} else {
2886 		unsigned long *memory_pressure = sk->sk_prot->memory_pressure;
2887 
2888 		if (memory_pressure && READ_ONCE(*memory_pressure))
2889 			WRITE_ONCE(*memory_pressure, 0);
2890 	}
2891 }
2892 
2893 DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
2894 
2895 /**
2896  * skb_page_frag_refill - check that a page_frag contains enough room
2897  * @sz: minimum size of the fragment we want to get
2898  * @pfrag: pointer to page_frag
2899  * @gfp: priority for memory allocation
2900  *
2901  * Note: While this allocator tries to use high order pages, there is
2902  * no guarantee that allocations succeed. Therefore, @sz MUST be
2903  * less or equal than PAGE_SIZE.
2904  */
2905 bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
2906 {
2907 	if (pfrag->page) {
2908 		if (page_ref_count(pfrag->page) == 1) {
2909 			pfrag->offset = 0;
2910 			return true;
2911 		}
2912 		if (pfrag->offset + sz <= pfrag->size)
2913 			return true;
2914 		put_page(pfrag->page);
2915 	}
2916 
2917 	pfrag->offset = 0;
2918 	if (SKB_FRAG_PAGE_ORDER &&
2919 	    !static_branch_unlikely(&net_high_order_alloc_disable_key)) {
2920 		/* Avoid direct reclaim but allow kswapd to wake */
2921 		pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
2922 					  __GFP_COMP | __GFP_NOWARN |
2923 					  __GFP_NORETRY,
2924 					  SKB_FRAG_PAGE_ORDER);
2925 		if (likely(pfrag->page)) {
2926 			pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
2927 			return true;
2928 		}
2929 	}
2930 	pfrag->page = alloc_page(gfp);
2931 	if (likely(pfrag->page)) {
2932 		pfrag->size = PAGE_SIZE;
2933 		return true;
2934 	}
2935 	return false;
2936 }
2937 EXPORT_SYMBOL(skb_page_frag_refill);
2938 
2939 bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
2940 {
2941 	if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation)))
2942 		return true;
2943 
2944 	sk_enter_memory_pressure(sk);
2945 	sk_stream_moderate_sndbuf(sk);
2946 	return false;
2947 }
2948 EXPORT_SYMBOL(sk_page_frag_refill);
2949 
2950 void __lock_sock(struct sock *sk)
2951 	__releases(&sk->sk_lock.slock)
2952 	__acquires(&sk->sk_lock.slock)
2953 {
2954 	DEFINE_WAIT(wait);
2955 
2956 	for (;;) {
2957 		prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
2958 					TASK_UNINTERRUPTIBLE);
2959 		spin_unlock_bh(&sk->sk_lock.slock);
2960 		schedule();
2961 		spin_lock_bh(&sk->sk_lock.slock);
2962 		if (!sock_owned_by_user(sk))
2963 			break;
2964 	}
2965 	finish_wait(&sk->sk_lock.wq, &wait);
2966 }
2967 
2968 void __release_sock(struct sock *sk)
2969 	__releases(&sk->sk_lock.slock)
2970 	__acquires(&sk->sk_lock.slock)
2971 {
2972 	struct sk_buff *skb, *next;
2973 
2974 	while ((skb = sk->sk_backlog.head) != NULL) {
2975 		sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
2976 
2977 		spin_unlock_bh(&sk->sk_lock.slock);
2978 
2979 		do {
2980 			next = skb->next;
2981 			prefetch(next);
2982 			DEBUG_NET_WARN_ON_ONCE(skb_dst_is_noref(skb));
2983 			skb_mark_not_on_list(skb);
2984 			sk_backlog_rcv(sk, skb);
2985 
2986 			cond_resched();
2987 
2988 			skb = next;
2989 		} while (skb != NULL);
2990 
2991 		spin_lock_bh(&sk->sk_lock.slock);
2992 	}
2993 
2994 	/*
2995 	 * Doing the zeroing here guarantee we can not loop forever
2996 	 * while a wild producer attempts to flood us.
2997 	 */
2998 	sk->sk_backlog.len = 0;
2999 }
3000 
3001 void __sk_flush_backlog(struct sock *sk)
3002 {
3003 	spin_lock_bh(&sk->sk_lock.slock);
3004 	__release_sock(sk);
3005 
3006 	if (sk->sk_prot->release_cb)
3007 		INDIRECT_CALL_INET_1(sk->sk_prot->release_cb,
3008 				     tcp_release_cb, sk);
3009 
3010 	spin_unlock_bh(&sk->sk_lock.slock);
3011 }
3012 EXPORT_SYMBOL_GPL(__sk_flush_backlog);
3013 
3014 /**
3015  * sk_wait_data - wait for data to arrive at sk_receive_queue
3016  * @sk:    sock to wait on
3017  * @timeo: for how long
3018  * @skb:   last skb seen on sk_receive_queue
3019  *
3020  * Now socket state including sk->sk_err is changed only under lock,
3021  * hence we may omit checks after joining wait queue.
3022  * We check receive queue before schedule() only as optimization;
3023  * it is very likely that release_sock() added new data.
3024  */
3025 int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
3026 {
3027 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
3028 	int rc;
3029 
3030 	add_wait_queue(sk_sleep(sk), &wait);
3031 	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
3032 	rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb, &wait);
3033 	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
3034 	remove_wait_queue(sk_sleep(sk), &wait);
3035 	return rc;
3036 }
3037 EXPORT_SYMBOL(sk_wait_data);
3038 
3039 /**
3040  *	__sk_mem_raise_allocated - increase memory_allocated
3041  *	@sk: socket
3042  *	@size: memory size to allocate
3043  *	@amt: pages to allocate
3044  *	@kind: allocation type
3045  *
3046  *	Similar to __sk_mem_schedule(), but does not update sk_forward_alloc.
3047  *
3048  *	Unlike the globally shared limits among the sockets under same protocol,
3049  *	consuming the budget of a memcg won't have direct effect on other ones.
3050  *	So be optimistic about memcg's tolerance, and leave the callers to decide
3051  *	whether or not to raise allocated through sk_under_memory_pressure() or
3052  *	its variants.
3053  */
3054 int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
3055 {
3056 	struct mem_cgroup *memcg = mem_cgroup_sockets_enabled ? sk->sk_memcg : NULL;
3057 	struct proto *prot = sk->sk_prot;
3058 	bool charged = false;
3059 	long allocated;
3060 
3061 	sk_memory_allocated_add(sk, amt);
3062 	allocated = sk_memory_allocated(sk);
3063 
3064 	if (memcg) {
3065 		if (!mem_cgroup_charge_skmem(memcg, amt, gfp_memcg_charge()))
3066 			goto suppress_allocation;
3067 		charged = true;
3068 	}
3069 
3070 	/* Under limit. */
3071 	if (allocated <= sk_prot_mem_limits(sk, 0)) {
3072 		sk_leave_memory_pressure(sk);
3073 		return 1;
3074 	}
3075 
3076 	/* Under pressure. */
3077 	if (allocated > sk_prot_mem_limits(sk, 1))
3078 		sk_enter_memory_pressure(sk);
3079 
3080 	/* Over hard limit. */
3081 	if (allocated > sk_prot_mem_limits(sk, 2))
3082 		goto suppress_allocation;
3083 
3084 	/* Guarantee minimum buffer size under pressure (either global
3085 	 * or memcg) to make sure features described in RFC 7323 (TCP
3086 	 * Extensions for High Performance) work properly.
3087 	 *
3088 	 * This rule does NOT stand when exceeds global or memcg's hard
3089 	 * limit, or else a DoS attack can be taken place by spawning
3090 	 * lots of sockets whose usage are under minimum buffer size.
3091 	 */
3092 	if (kind == SK_MEM_RECV) {
3093 		if (atomic_read(&sk->sk_rmem_alloc) < sk_get_rmem0(sk, prot))
3094 			return 1;
3095 
3096 	} else { /* SK_MEM_SEND */
3097 		int wmem0 = sk_get_wmem0(sk, prot);
3098 
3099 		if (sk->sk_type == SOCK_STREAM) {
3100 			if (sk->sk_wmem_queued < wmem0)
3101 				return 1;
3102 		} else if (refcount_read(&sk->sk_wmem_alloc) < wmem0) {
3103 				return 1;
3104 		}
3105 	}
3106 
3107 	if (sk_has_memory_pressure(sk)) {
3108 		u64 alloc;
3109 
3110 		/* The following 'average' heuristic is within the
3111 		 * scope of global accounting, so it only makes
3112 		 * sense for global memory pressure.
3113 		 */
3114 		if (!sk_under_global_memory_pressure(sk))
3115 			return 1;
3116 
3117 		/* Try to be fair among all the sockets under global
3118 		 * pressure by allowing the ones that below average
3119 		 * usage to raise.
3120 		 */
3121 		alloc = sk_sockets_allocated_read_positive(sk);
3122 		if (sk_prot_mem_limits(sk, 2) > alloc *
3123 		    sk_mem_pages(sk->sk_wmem_queued +
3124 				 atomic_read(&sk->sk_rmem_alloc) +
3125 				 sk->sk_forward_alloc))
3126 			return 1;
3127 	}
3128 
3129 suppress_allocation:
3130 
3131 	if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
3132 		sk_stream_moderate_sndbuf(sk);
3133 
3134 		/* Fail only if socket is _under_ its sndbuf.
3135 		 * In this case we cannot block, so that we have to fail.
3136 		 */
3137 		if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) {
3138 			/* Force charge with __GFP_NOFAIL */
3139 			if (memcg && !charged) {
3140 				mem_cgroup_charge_skmem(memcg, amt,
3141 					gfp_memcg_charge() | __GFP_NOFAIL);
3142 			}
3143 			return 1;
3144 		}
3145 	}
3146 
3147 	if (kind == SK_MEM_SEND || (kind == SK_MEM_RECV && charged))
3148 		trace_sock_exceed_buf_limit(sk, prot, allocated, kind);
3149 
3150 	sk_memory_allocated_sub(sk, amt);
3151 
3152 	if (charged)
3153 		mem_cgroup_uncharge_skmem(memcg, amt);
3154 
3155 	return 0;
3156 }
3157 
3158 /**
3159  *	__sk_mem_schedule - increase sk_forward_alloc and memory_allocated
3160  *	@sk: socket
3161  *	@size: memory size to allocate
3162  *	@kind: allocation type
3163  *
3164  *	If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
3165  *	rmem allocation. This function assumes that protocols which have
3166  *	memory_pressure use sk_wmem_queued as write buffer accounting.
3167  */
3168 int __sk_mem_schedule(struct sock *sk, int size, int kind)
3169 {
3170 	int ret, amt = sk_mem_pages(size);
3171 
3172 	sk_forward_alloc_add(sk, amt << PAGE_SHIFT);
3173 	ret = __sk_mem_raise_allocated(sk, size, amt, kind);
3174 	if (!ret)
3175 		sk_forward_alloc_add(sk, -(amt << PAGE_SHIFT));
3176 	return ret;
3177 }
3178 EXPORT_SYMBOL(__sk_mem_schedule);
3179 
3180 /**
3181  *	__sk_mem_reduce_allocated - reclaim memory_allocated
3182  *	@sk: socket
3183  *	@amount: number of quanta
3184  *
3185  *	Similar to __sk_mem_reclaim(), but does not update sk_forward_alloc
3186  */
3187 void __sk_mem_reduce_allocated(struct sock *sk, int amount)
3188 {
3189 	sk_memory_allocated_sub(sk, amount);
3190 
3191 	if (mem_cgroup_sockets_enabled && sk->sk_memcg)
3192 		mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
3193 
3194 	if (sk_under_global_memory_pressure(sk) &&
3195 	    (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
3196 		sk_leave_memory_pressure(sk);
3197 }
3198 
3199 /**
3200  *	__sk_mem_reclaim - reclaim sk_forward_alloc and memory_allocated
3201  *	@sk: socket
3202  *	@amount: number of bytes (rounded down to a PAGE_SIZE multiple)
3203  */
3204 void __sk_mem_reclaim(struct sock *sk, int amount)
3205 {
3206 	amount >>= PAGE_SHIFT;
3207 	sk_forward_alloc_add(sk, -(amount << PAGE_SHIFT));
3208 	__sk_mem_reduce_allocated(sk, amount);
3209 }
3210 EXPORT_SYMBOL(__sk_mem_reclaim);
3211 
3212 int sk_set_peek_off(struct sock *sk, int val)
3213 {
3214 	WRITE_ONCE(sk->sk_peek_off, val);
3215 	return 0;
3216 }
3217 EXPORT_SYMBOL_GPL(sk_set_peek_off);
3218 
3219 /*
3220  * Set of default routines for initialising struct proto_ops when
3221  * the protocol does not support a particular function. In certain
3222  * cases where it makes no sense for a protocol to have a "do nothing"
3223  * function, some default processing is provided.
3224  */
3225 
3226 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
3227 {
3228 	return -EOPNOTSUPP;
3229 }
3230 EXPORT_SYMBOL(sock_no_bind);
3231 
3232 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
3233 		    int len, int flags)
3234 {
3235 	return -EOPNOTSUPP;
3236 }
3237 EXPORT_SYMBOL(sock_no_connect);
3238 
3239 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
3240 {
3241 	return -EOPNOTSUPP;
3242 }
3243 EXPORT_SYMBOL(sock_no_socketpair);
3244 
3245 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags,
3246 		   bool kern)
3247 {
3248 	return -EOPNOTSUPP;
3249 }
3250 EXPORT_SYMBOL(sock_no_accept);
3251 
3252 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
3253 		    int peer)
3254 {
3255 	return -EOPNOTSUPP;
3256 }
3257 EXPORT_SYMBOL(sock_no_getname);
3258 
3259 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3260 {
3261 	return -EOPNOTSUPP;
3262 }
3263 EXPORT_SYMBOL(sock_no_ioctl);
3264 
3265 int sock_no_listen(struct socket *sock, int backlog)
3266 {
3267 	return -EOPNOTSUPP;
3268 }
3269 EXPORT_SYMBOL(sock_no_listen);
3270 
3271 int sock_no_shutdown(struct socket *sock, int how)
3272 {
3273 	return -EOPNOTSUPP;
3274 }
3275 EXPORT_SYMBOL(sock_no_shutdown);
3276 
3277 int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
3278 {
3279 	return -EOPNOTSUPP;
3280 }
3281 EXPORT_SYMBOL(sock_no_sendmsg);
3282 
3283 int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *m, size_t len)
3284 {
3285 	return -EOPNOTSUPP;
3286 }
3287 EXPORT_SYMBOL(sock_no_sendmsg_locked);
3288 
3289 int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len,
3290 		    int flags)
3291 {
3292 	return -EOPNOTSUPP;
3293 }
3294 EXPORT_SYMBOL(sock_no_recvmsg);
3295 
3296 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
3297 {
3298 	/* Mirror missing mmap method error code */
3299 	return -ENODEV;
3300 }
3301 EXPORT_SYMBOL(sock_no_mmap);
3302 
3303 /*
3304  * When a file is received (via SCM_RIGHTS, etc), we must bump the
3305  * various sock-based usage counts.
3306  */
3307 void __receive_sock(struct file *file)
3308 {
3309 	struct socket *sock;
3310 
3311 	sock = sock_from_file(file);
3312 	if (sock) {
3313 		sock_update_netprioidx(&sock->sk->sk_cgrp_data);
3314 		sock_update_classid(&sock->sk->sk_cgrp_data);
3315 	}
3316 }
3317 
3318 /*
3319  *	Default Socket Callbacks
3320  */
3321 
3322 static void sock_def_wakeup(struct sock *sk)
3323 {
3324 	struct socket_wq *wq;
3325 
3326 	rcu_read_lock();
3327 	wq = rcu_dereference(sk->sk_wq);
3328 	if (skwq_has_sleeper(wq))
3329 		wake_up_interruptible_all(&wq->wait);
3330 	rcu_read_unlock();
3331 }
3332 
3333 static void sock_def_error_report(struct sock *sk)
3334 {
3335 	struct socket_wq *wq;
3336 
3337 	rcu_read_lock();
3338 	wq = rcu_dereference(sk->sk_wq);
3339 	if (skwq_has_sleeper(wq))
3340 		wake_up_interruptible_poll(&wq->wait, EPOLLERR);
3341 	sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
3342 	rcu_read_unlock();
3343 }
3344 
3345 void sock_def_readable(struct sock *sk)
3346 {
3347 	struct socket_wq *wq;
3348 
3349 	trace_sk_data_ready(sk);
3350 
3351 	rcu_read_lock();
3352 	wq = rcu_dereference(sk->sk_wq);
3353 	if (skwq_has_sleeper(wq))
3354 		wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI |
3355 						EPOLLRDNORM | EPOLLRDBAND);
3356 	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
3357 	rcu_read_unlock();
3358 }
3359 
3360 static void sock_def_write_space(struct sock *sk)
3361 {
3362 	struct socket_wq *wq;
3363 
3364 	rcu_read_lock();
3365 
3366 	/* Do not wake up a writer until he can make "significant"
3367 	 * progress.  --DaveM
3368 	 */
3369 	if (sock_writeable(sk)) {
3370 		wq = rcu_dereference(sk->sk_wq);
3371 		if (skwq_has_sleeper(wq))
3372 			wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
3373 						EPOLLWRNORM | EPOLLWRBAND);
3374 
3375 		/* Should agree with poll, otherwise some programs break */
3376 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
3377 	}
3378 
3379 	rcu_read_unlock();
3380 }
3381 
3382 /* An optimised version of sock_def_write_space(), should only be called
3383  * for SOCK_RCU_FREE sockets under RCU read section and after putting
3384  * ->sk_wmem_alloc.
3385  */
3386 static void sock_def_write_space_wfree(struct sock *sk)
3387 {
3388 	/* Do not wake up a writer until he can make "significant"
3389 	 * progress.  --DaveM
3390 	 */
3391 	if (sock_writeable(sk)) {
3392 		struct socket_wq *wq = rcu_dereference(sk->sk_wq);
3393 
3394 		/* rely on refcount_sub from sock_wfree() */
3395 		smp_mb__after_atomic();
3396 		if (wq && waitqueue_active(&wq->wait))
3397 			wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
3398 						EPOLLWRNORM | EPOLLWRBAND);
3399 
3400 		/* Should agree with poll, otherwise some programs break */
3401 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
3402 	}
3403 }
3404 
3405 static void sock_def_destruct(struct sock *sk)
3406 {
3407 }
3408 
3409 void sk_send_sigurg(struct sock *sk)
3410 {
3411 	if (sk->sk_socket && sk->sk_socket->file)
3412 		if (send_sigurg(&sk->sk_socket->file->f_owner))
3413 			sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
3414 }
3415 EXPORT_SYMBOL(sk_send_sigurg);
3416 
3417 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
3418 		    unsigned long expires)
3419 {
3420 	if (!mod_timer(timer, expires))
3421 		sock_hold(sk);
3422 }
3423 EXPORT_SYMBOL(sk_reset_timer);
3424 
3425 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
3426 {
3427 	if (del_timer(timer))
3428 		__sock_put(sk);
3429 }
3430 EXPORT_SYMBOL(sk_stop_timer);
3431 
3432 void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer)
3433 {
3434 	if (del_timer_sync(timer))
3435 		__sock_put(sk);
3436 }
3437 EXPORT_SYMBOL(sk_stop_timer_sync);
3438 
3439 void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid)
3440 {
3441 	sk_init_common(sk);
3442 	sk->sk_send_head	=	NULL;
3443 
3444 	timer_setup(&sk->sk_timer, NULL, 0);
3445 
3446 	sk->sk_allocation	=	GFP_KERNEL;
3447 	sk->sk_rcvbuf		=	READ_ONCE(sysctl_rmem_default);
3448 	sk->sk_sndbuf		=	READ_ONCE(sysctl_wmem_default);
3449 	sk->sk_state		=	TCP_CLOSE;
3450 	sk->sk_use_task_frag	=	true;
3451 	sk_set_socket(sk, sock);
3452 
3453 	sock_set_flag(sk, SOCK_ZAPPED);
3454 
3455 	if (sock) {
3456 		sk->sk_type	=	sock->type;
3457 		RCU_INIT_POINTER(sk->sk_wq, &sock->wq);
3458 		sock->sk	=	sk;
3459 	} else {
3460 		RCU_INIT_POINTER(sk->sk_wq, NULL);
3461 	}
3462 	sk->sk_uid	=	uid;
3463 
3464 	rwlock_init(&sk->sk_callback_lock);
3465 	if (sk->sk_kern_sock)
3466 		lockdep_set_class_and_name(
3467 			&sk->sk_callback_lock,
3468 			af_kern_callback_keys + sk->sk_family,
3469 			af_family_kern_clock_key_strings[sk->sk_family]);
3470 	else
3471 		lockdep_set_class_and_name(
3472 			&sk->sk_callback_lock,
3473 			af_callback_keys + sk->sk_family,
3474 			af_family_clock_key_strings[sk->sk_family]);
3475 
3476 	sk->sk_state_change	=	sock_def_wakeup;
3477 	sk->sk_data_ready	=	sock_def_readable;
3478 	sk->sk_write_space	=	sock_def_write_space;
3479 	sk->sk_error_report	=	sock_def_error_report;
3480 	sk->sk_destruct		=	sock_def_destruct;
3481 
3482 	sk->sk_frag.page	=	NULL;
3483 	sk->sk_frag.offset	=	0;
3484 	sk->sk_peek_off		=	-1;
3485 
3486 	sk->sk_peer_pid 	=	NULL;
3487 	sk->sk_peer_cred	=	NULL;
3488 	spin_lock_init(&sk->sk_peer_lock);
3489 
3490 	sk->sk_write_pending	=	0;
3491 	sk->sk_rcvlowat		=	1;
3492 	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
3493 	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
3494 
3495 	sk->sk_stamp = SK_DEFAULT_STAMP;
3496 #if BITS_PER_LONG==32
3497 	seqlock_init(&sk->sk_stamp_seq);
3498 #endif
3499 	atomic_set(&sk->sk_zckey, 0);
3500 
3501 #ifdef CONFIG_NET_RX_BUSY_POLL
3502 	sk->sk_napi_id		=	0;
3503 	sk->sk_ll_usec		=	READ_ONCE(sysctl_net_busy_read);
3504 #endif
3505 
3506 	sk->sk_max_pacing_rate = ~0UL;
3507 	sk->sk_pacing_rate = ~0UL;
3508 	WRITE_ONCE(sk->sk_pacing_shift, 10);
3509 	sk->sk_incoming_cpu = -1;
3510 
3511 	sk_rx_queue_clear(sk);
3512 	/*
3513 	 * Before updating sk_refcnt, we must commit prior changes to memory
3514 	 * (Documentation/RCU/rculist_nulls.rst for details)
3515 	 */
3516 	smp_wmb();
3517 	refcount_set(&sk->sk_refcnt, 1);
3518 	atomic_set(&sk->sk_drops, 0);
3519 }
3520 EXPORT_SYMBOL(sock_init_data_uid);
3521 
3522 void sock_init_data(struct socket *sock, struct sock *sk)
3523 {
3524 	kuid_t uid = sock ?
3525 		SOCK_INODE(sock)->i_uid :
3526 		make_kuid(sock_net(sk)->user_ns, 0);
3527 
3528 	sock_init_data_uid(sock, sk, uid);
3529 }
3530 EXPORT_SYMBOL(sock_init_data);
3531 
3532 void lock_sock_nested(struct sock *sk, int subclass)
3533 {
3534 	/* The sk_lock has mutex_lock() semantics here. */
3535 	mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
3536 
3537 	might_sleep();
3538 	spin_lock_bh(&sk->sk_lock.slock);
3539 	if (sock_owned_by_user_nocheck(sk))
3540 		__lock_sock(sk);
3541 	sk->sk_lock.owned = 1;
3542 	spin_unlock_bh(&sk->sk_lock.slock);
3543 }
3544 EXPORT_SYMBOL(lock_sock_nested);
3545 
3546 void release_sock(struct sock *sk)
3547 {
3548 	spin_lock_bh(&sk->sk_lock.slock);
3549 	if (sk->sk_backlog.tail)
3550 		__release_sock(sk);
3551 
3552 	if (sk->sk_prot->release_cb)
3553 		INDIRECT_CALL_INET_1(sk->sk_prot->release_cb,
3554 				     tcp_release_cb, sk);
3555 
3556 	sock_release_ownership(sk);
3557 	if (waitqueue_active(&sk->sk_lock.wq))
3558 		wake_up(&sk->sk_lock.wq);
3559 	spin_unlock_bh(&sk->sk_lock.slock);
3560 }
3561 EXPORT_SYMBOL(release_sock);
3562 
3563 bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock)
3564 {
3565 	might_sleep();
3566 	spin_lock_bh(&sk->sk_lock.slock);
3567 
3568 	if (!sock_owned_by_user_nocheck(sk)) {
3569 		/*
3570 		 * Fast path return with bottom halves disabled and
3571 		 * sock::sk_lock.slock held.
3572 		 *
3573 		 * The 'mutex' is not contended and holding
3574 		 * sock::sk_lock.slock prevents all other lockers to
3575 		 * proceed so the corresponding unlock_sock_fast() can
3576 		 * avoid the slow path of release_sock() completely and
3577 		 * just release slock.
3578 		 *
3579 		 * From a semantical POV this is equivalent to 'acquiring'
3580 		 * the 'mutex', hence the corresponding lockdep
3581 		 * mutex_release() has to happen in the fast path of
3582 		 * unlock_sock_fast().
3583 		 */
3584 		return false;
3585 	}
3586 
3587 	__lock_sock(sk);
3588 	sk->sk_lock.owned = 1;
3589 	__acquire(&sk->sk_lock.slock);
3590 	spin_unlock_bh(&sk->sk_lock.slock);
3591 	return true;
3592 }
3593 EXPORT_SYMBOL(__lock_sock_fast);
3594 
3595 int sock_gettstamp(struct socket *sock, void __user *userstamp,
3596 		   bool timeval, bool time32)
3597 {
3598 	struct sock *sk = sock->sk;
3599 	struct timespec64 ts;
3600 
3601 	sock_enable_timestamp(sk, SOCK_TIMESTAMP);
3602 	ts = ktime_to_timespec64(sock_read_timestamp(sk));
3603 	if (ts.tv_sec == -1)
3604 		return -ENOENT;
3605 	if (ts.tv_sec == 0) {
3606 		ktime_t kt = ktime_get_real();
3607 		sock_write_timestamp(sk, kt);
3608 		ts = ktime_to_timespec64(kt);
3609 	}
3610 
3611 	if (timeval)
3612 		ts.tv_nsec /= 1000;
3613 
3614 #ifdef CONFIG_COMPAT_32BIT_TIME
3615 	if (time32)
3616 		return put_old_timespec32(&ts, userstamp);
3617 #endif
3618 #ifdef CONFIG_SPARC64
3619 	/* beware of padding in sparc64 timeval */
3620 	if (timeval && !in_compat_syscall()) {
3621 		struct __kernel_old_timeval __user tv = {
3622 			.tv_sec = ts.tv_sec,
3623 			.tv_usec = ts.tv_nsec,
3624 		};
3625 		if (copy_to_user(userstamp, &tv, sizeof(tv)))
3626 			return -EFAULT;
3627 		return 0;
3628 	}
3629 #endif
3630 	return put_timespec64(&ts, userstamp);
3631 }
3632 EXPORT_SYMBOL(sock_gettstamp);
3633 
3634 void sock_enable_timestamp(struct sock *sk, enum sock_flags flag)
3635 {
3636 	if (!sock_flag(sk, flag)) {
3637 		unsigned long previous_flags = sk->sk_flags;
3638 
3639 		sock_set_flag(sk, flag);
3640 		/*
3641 		 * we just set one of the two flags which require net
3642 		 * time stamping, but time stamping might have been on
3643 		 * already because of the other one
3644 		 */
3645 		if (sock_needs_netstamp(sk) &&
3646 		    !(previous_flags & SK_FLAGS_TIMESTAMP))
3647 			net_enable_timestamp();
3648 	}
3649 }
3650 
3651 int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
3652 		       int level, int type)
3653 {
3654 	struct sock_exterr_skb *serr;
3655 	struct sk_buff *skb;
3656 	int copied, err;
3657 
3658 	err = -EAGAIN;
3659 	skb = sock_dequeue_err_skb(sk);
3660 	if (skb == NULL)
3661 		goto out;
3662 
3663 	copied = skb->len;
3664 	if (copied > len) {
3665 		msg->msg_flags |= MSG_TRUNC;
3666 		copied = len;
3667 	}
3668 	err = skb_copy_datagram_msg(skb, 0, msg, copied);
3669 	if (err)
3670 		goto out_free_skb;
3671 
3672 	sock_recv_timestamp(msg, sk, skb);
3673 
3674 	serr = SKB_EXT_ERR(skb);
3675 	put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee);
3676 
3677 	msg->msg_flags |= MSG_ERRQUEUE;
3678 	err = copied;
3679 
3680 out_free_skb:
3681 	kfree_skb(skb);
3682 out:
3683 	return err;
3684 }
3685 EXPORT_SYMBOL(sock_recv_errqueue);
3686 
3687 /*
3688  *	Get a socket option on an socket.
3689  *
3690  *	FIX: POSIX 1003.1g is very ambiguous here. It states that
3691  *	asynchronous errors should be reported by getsockopt. We assume
3692  *	this means if you specify SO_ERROR (otherwise whats the point of it).
3693  */
3694 int sock_common_getsockopt(struct socket *sock, int level, int optname,
3695 			   char __user *optval, int __user *optlen)
3696 {
3697 	struct sock *sk = sock->sk;
3698 
3699 	/* IPV6_ADDRFORM can change sk->sk_prot under us. */
3700 	return READ_ONCE(sk->sk_prot)->getsockopt(sk, level, optname, optval, optlen);
3701 }
3702 EXPORT_SYMBOL(sock_common_getsockopt);
3703 
3704 int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
3705 			int flags)
3706 {
3707 	struct sock *sk = sock->sk;
3708 	int addr_len = 0;
3709 	int err;
3710 
3711 	err = sk->sk_prot->recvmsg(sk, msg, size, flags, &addr_len);
3712 	if (err >= 0)
3713 		msg->msg_namelen = addr_len;
3714 	return err;
3715 }
3716 EXPORT_SYMBOL(sock_common_recvmsg);
3717 
3718 /*
3719  *	Set socket options on an inet socket.
3720  */
3721 int sock_common_setsockopt(struct socket *sock, int level, int optname,
3722 			   sockptr_t optval, unsigned int optlen)
3723 {
3724 	struct sock *sk = sock->sk;
3725 
3726 	/* IPV6_ADDRFORM can change sk->sk_prot under us. */
3727 	return READ_ONCE(sk->sk_prot)->setsockopt(sk, level, optname, optval, optlen);
3728 }
3729 EXPORT_SYMBOL(sock_common_setsockopt);
3730 
3731 void sk_common_release(struct sock *sk)
3732 {
3733 	if (sk->sk_prot->destroy)
3734 		sk->sk_prot->destroy(sk);
3735 
3736 	/*
3737 	 * Observation: when sk_common_release is called, processes have
3738 	 * no access to socket. But net still has.
3739 	 * Step one, detach it from networking:
3740 	 *
3741 	 * A. Remove from hash tables.
3742 	 */
3743 
3744 	sk->sk_prot->unhash(sk);
3745 
3746 	/*
3747 	 * In this point socket cannot receive new packets, but it is possible
3748 	 * that some packets are in flight because some CPU runs receiver and
3749 	 * did hash table lookup before we unhashed socket. They will achieve
3750 	 * receive queue and will be purged by socket destructor.
3751 	 *
3752 	 * Also we still have packets pending on receive queue and probably,
3753 	 * our own packets waiting in device queues. sock_destroy will drain
3754 	 * receive queue, but transmitted packets will delay socket destruction
3755 	 * until the last reference will be released.
3756 	 */
3757 
3758 	sock_orphan(sk);
3759 
3760 	xfrm_sk_free_policy(sk);
3761 
3762 	sock_put(sk);
3763 }
3764 EXPORT_SYMBOL(sk_common_release);
3765 
3766 void sk_get_meminfo(const struct sock *sk, u32 *mem)
3767 {
3768 	memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS);
3769 
3770 	mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
3771 	mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf);
3772 	mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
3773 	mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf);
3774 	mem[SK_MEMINFO_FWD_ALLOC] = sk_forward_alloc_get(sk);
3775 	mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued);
3776 	mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
3777 	mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
3778 	mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
3779 }
3780 
3781 #ifdef CONFIG_PROC_FS
3782 static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
3783 
3784 int sock_prot_inuse_get(struct net *net, struct proto *prot)
3785 {
3786 	int cpu, idx = prot->inuse_idx;
3787 	int res = 0;
3788 
3789 	for_each_possible_cpu(cpu)
3790 		res += per_cpu_ptr(net->core.prot_inuse, cpu)->val[idx];
3791 
3792 	return res >= 0 ? res : 0;
3793 }
3794 EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
3795 
3796 int sock_inuse_get(struct net *net)
3797 {
3798 	int cpu, res = 0;
3799 
3800 	for_each_possible_cpu(cpu)
3801 		res += per_cpu_ptr(net->core.prot_inuse, cpu)->all;
3802 
3803 	return res;
3804 }
3805 
3806 EXPORT_SYMBOL_GPL(sock_inuse_get);
3807 
3808 static int __net_init sock_inuse_init_net(struct net *net)
3809 {
3810 	net->core.prot_inuse = alloc_percpu(struct prot_inuse);
3811 	if (net->core.prot_inuse == NULL)
3812 		return -ENOMEM;
3813 	return 0;
3814 }
3815 
3816 static void __net_exit sock_inuse_exit_net(struct net *net)
3817 {
3818 	free_percpu(net->core.prot_inuse);
3819 }
3820 
3821 static struct pernet_operations net_inuse_ops = {
3822 	.init = sock_inuse_init_net,
3823 	.exit = sock_inuse_exit_net,
3824 };
3825 
3826 static __init int net_inuse_init(void)
3827 {
3828 	if (register_pernet_subsys(&net_inuse_ops))
3829 		panic("Cannot initialize net inuse counters");
3830 
3831 	return 0;
3832 }
3833 
3834 core_initcall(net_inuse_init);
3835 
3836 static int assign_proto_idx(struct proto *prot)
3837 {
3838 	prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
3839 
3840 	if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
3841 		pr_err("PROTO_INUSE_NR exhausted\n");
3842 		return -ENOSPC;
3843 	}
3844 
3845 	set_bit(prot->inuse_idx, proto_inuse_idx);
3846 	return 0;
3847 }
3848 
3849 static void release_proto_idx(struct proto *prot)
3850 {
3851 	if (prot->inuse_idx != PROTO_INUSE_NR - 1)
3852 		clear_bit(prot->inuse_idx, proto_inuse_idx);
3853 }
3854 #else
3855 static inline int assign_proto_idx(struct proto *prot)
3856 {
3857 	return 0;
3858 }
3859 
3860 static inline void release_proto_idx(struct proto *prot)
3861 {
3862 }
3863 
3864 #endif
3865 
3866 static void tw_prot_cleanup(struct timewait_sock_ops *twsk_prot)
3867 {
3868 	if (!twsk_prot)
3869 		return;
3870 	kfree(twsk_prot->twsk_slab_name);
3871 	twsk_prot->twsk_slab_name = NULL;
3872 	kmem_cache_destroy(twsk_prot->twsk_slab);
3873 	twsk_prot->twsk_slab = NULL;
3874 }
3875 
3876 static int tw_prot_init(const struct proto *prot)
3877 {
3878 	struct timewait_sock_ops *twsk_prot = prot->twsk_prot;
3879 
3880 	if (!twsk_prot)
3881 		return 0;
3882 
3883 	twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s",
3884 					      prot->name);
3885 	if (!twsk_prot->twsk_slab_name)
3886 		return -ENOMEM;
3887 
3888 	twsk_prot->twsk_slab =
3889 		kmem_cache_create(twsk_prot->twsk_slab_name,
3890 				  twsk_prot->twsk_obj_size, 0,
3891 				  SLAB_ACCOUNT | prot->slab_flags,
3892 				  NULL);
3893 	if (!twsk_prot->twsk_slab) {
3894 		pr_crit("%s: Can't create timewait sock SLAB cache!\n",
3895 			prot->name);
3896 		return -ENOMEM;
3897 	}
3898 
3899 	return 0;
3900 }
3901 
3902 static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
3903 {
3904 	if (!rsk_prot)
3905 		return;
3906 	kfree(rsk_prot->slab_name);
3907 	rsk_prot->slab_name = NULL;
3908 	kmem_cache_destroy(rsk_prot->slab);
3909 	rsk_prot->slab = NULL;
3910 }
3911 
3912 static int req_prot_init(const struct proto *prot)
3913 {
3914 	struct request_sock_ops *rsk_prot = prot->rsk_prot;
3915 
3916 	if (!rsk_prot)
3917 		return 0;
3918 
3919 	rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s",
3920 					prot->name);
3921 	if (!rsk_prot->slab_name)
3922 		return -ENOMEM;
3923 
3924 	rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name,
3925 					   rsk_prot->obj_size, 0,
3926 					   SLAB_ACCOUNT | prot->slab_flags,
3927 					   NULL);
3928 
3929 	if (!rsk_prot->slab) {
3930 		pr_crit("%s: Can't create request sock SLAB cache!\n",
3931 			prot->name);
3932 		return -ENOMEM;
3933 	}
3934 	return 0;
3935 }
3936 
3937 int proto_register(struct proto *prot, int alloc_slab)
3938 {
3939 	int ret = -ENOBUFS;
3940 
3941 	if (prot->memory_allocated && !prot->sysctl_mem) {
3942 		pr_err("%s: missing sysctl_mem\n", prot->name);
3943 		return -EINVAL;
3944 	}
3945 	if (prot->memory_allocated && !prot->per_cpu_fw_alloc) {
3946 		pr_err("%s: missing per_cpu_fw_alloc\n", prot->name);
3947 		return -EINVAL;
3948 	}
3949 	if (alloc_slab) {
3950 		prot->slab = kmem_cache_create_usercopy(prot->name,
3951 					prot->obj_size, 0,
3952 					SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT |
3953 					prot->slab_flags,
3954 					prot->useroffset, prot->usersize,
3955 					NULL);
3956 
3957 		if (prot->slab == NULL) {
3958 			pr_crit("%s: Can't create sock SLAB cache!\n",
3959 				prot->name);
3960 			goto out;
3961 		}
3962 
3963 		if (req_prot_init(prot))
3964 			goto out_free_request_sock_slab;
3965 
3966 		if (tw_prot_init(prot))
3967 			goto out_free_timewait_sock_slab;
3968 	}
3969 
3970 	mutex_lock(&proto_list_mutex);
3971 	ret = assign_proto_idx(prot);
3972 	if (ret) {
3973 		mutex_unlock(&proto_list_mutex);
3974 		goto out_free_timewait_sock_slab;
3975 	}
3976 	list_add(&prot->node, &proto_list);
3977 	mutex_unlock(&proto_list_mutex);
3978 	return ret;
3979 
3980 out_free_timewait_sock_slab:
3981 	if (alloc_slab)
3982 		tw_prot_cleanup(prot->twsk_prot);
3983 out_free_request_sock_slab:
3984 	if (alloc_slab) {
3985 		req_prot_cleanup(prot->rsk_prot);
3986 
3987 		kmem_cache_destroy(prot->slab);
3988 		prot->slab = NULL;
3989 	}
3990 out:
3991 	return ret;
3992 }
3993 EXPORT_SYMBOL(proto_register);
3994 
3995 void proto_unregister(struct proto *prot)
3996 {
3997 	mutex_lock(&proto_list_mutex);
3998 	release_proto_idx(prot);
3999 	list_del(&prot->node);
4000 	mutex_unlock(&proto_list_mutex);
4001 
4002 	kmem_cache_destroy(prot->slab);
4003 	prot->slab = NULL;
4004 
4005 	req_prot_cleanup(prot->rsk_prot);
4006 	tw_prot_cleanup(prot->twsk_prot);
4007 }
4008 EXPORT_SYMBOL(proto_unregister);
4009 
4010 int sock_load_diag_module(int family, int protocol)
4011 {
4012 	if (!protocol) {
4013 		if (!sock_is_registered(family))
4014 			return -ENOENT;
4015 
4016 		return request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
4017 				      NETLINK_SOCK_DIAG, family);
4018 	}
4019 
4020 #ifdef CONFIG_INET
4021 	if (family == AF_INET &&
4022 	    protocol != IPPROTO_RAW &&
4023 	    protocol < MAX_INET_PROTOS &&
4024 	    !rcu_access_pointer(inet_protos[protocol]))
4025 		return -ENOENT;
4026 #endif
4027 
4028 	return request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK,
4029 			      NETLINK_SOCK_DIAG, family, protocol);
4030 }
4031 EXPORT_SYMBOL(sock_load_diag_module);
4032 
4033 #ifdef CONFIG_PROC_FS
4034 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
4035 	__acquires(proto_list_mutex)
4036 {
4037 	mutex_lock(&proto_list_mutex);
4038 	return seq_list_start_head(&proto_list, *pos);
4039 }
4040 
4041 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4042 {
4043 	return seq_list_next(v, &proto_list, pos);
4044 }
4045 
4046 static void proto_seq_stop(struct seq_file *seq, void *v)
4047 	__releases(proto_list_mutex)
4048 {
4049 	mutex_unlock(&proto_list_mutex);
4050 }
4051 
4052 static char proto_method_implemented(const void *method)
4053 {
4054 	return method == NULL ? 'n' : 'y';
4055 }
4056 static long sock_prot_memory_allocated(struct proto *proto)
4057 {
4058 	return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
4059 }
4060 
4061 static const char *sock_prot_memory_pressure(struct proto *proto)
4062 {
4063 	return proto->memory_pressure != NULL ?
4064 	proto_memory_pressure(proto) ? "yes" : "no" : "NI";
4065 }
4066 
4067 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
4068 {
4069 
4070 	seq_printf(seq, "%-9s %4u %6d  %6ld   %-3s %6u   %-3s  %-10s "
4071 			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
4072 		   proto->name,
4073 		   proto->obj_size,
4074 		   sock_prot_inuse_get(seq_file_net(seq), proto),
4075 		   sock_prot_memory_allocated(proto),
4076 		   sock_prot_memory_pressure(proto),
4077 		   proto->max_header,
4078 		   proto->slab == NULL ? "no" : "yes",
4079 		   module_name(proto->owner),
4080 		   proto_method_implemented(proto->close),
4081 		   proto_method_implemented(proto->connect),
4082 		   proto_method_implemented(proto->disconnect),
4083 		   proto_method_implemented(proto->accept),
4084 		   proto_method_implemented(proto->ioctl),
4085 		   proto_method_implemented(proto->init),
4086 		   proto_method_implemented(proto->destroy),
4087 		   proto_method_implemented(proto->shutdown),
4088 		   proto_method_implemented(proto->setsockopt),
4089 		   proto_method_implemented(proto->getsockopt),
4090 		   proto_method_implemented(proto->sendmsg),
4091 		   proto_method_implemented(proto->recvmsg),
4092 		   proto_method_implemented(proto->bind),
4093 		   proto_method_implemented(proto->backlog_rcv),
4094 		   proto_method_implemented(proto->hash),
4095 		   proto_method_implemented(proto->unhash),
4096 		   proto_method_implemented(proto->get_port),
4097 		   proto_method_implemented(proto->enter_memory_pressure));
4098 }
4099 
4100 static int proto_seq_show(struct seq_file *seq, void *v)
4101 {
4102 	if (v == &proto_list)
4103 		seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
4104 			   "protocol",
4105 			   "size",
4106 			   "sockets",
4107 			   "memory",
4108 			   "press",
4109 			   "maxhdr",
4110 			   "slab",
4111 			   "module",
4112 			   "cl co di ac io in de sh ss gs se re bi br ha uh gp em\n");
4113 	else
4114 		proto_seq_printf(seq, list_entry(v, struct proto, node));
4115 	return 0;
4116 }
4117 
4118 static const struct seq_operations proto_seq_ops = {
4119 	.start  = proto_seq_start,
4120 	.next   = proto_seq_next,
4121 	.stop   = proto_seq_stop,
4122 	.show   = proto_seq_show,
4123 };
4124 
4125 static __net_init int proto_init_net(struct net *net)
4126 {
4127 	if (!proc_create_net("protocols", 0444, net->proc_net, &proto_seq_ops,
4128 			sizeof(struct seq_net_private)))
4129 		return -ENOMEM;
4130 
4131 	return 0;
4132 }
4133 
4134 static __net_exit void proto_exit_net(struct net *net)
4135 {
4136 	remove_proc_entry("protocols", net->proc_net);
4137 }
4138 
4139 
4140 static __net_initdata struct pernet_operations proto_net_ops = {
4141 	.init = proto_init_net,
4142 	.exit = proto_exit_net,
4143 };
4144 
4145 static int __init proto_init(void)
4146 {
4147 	return register_pernet_subsys(&proto_net_ops);
4148 }
4149 
4150 subsys_initcall(proto_init);
4151 
4152 #endif /* PROC_FS */
4153 
4154 #ifdef CONFIG_NET_RX_BUSY_POLL
4155 bool sk_busy_loop_end(void *p, unsigned long start_time)
4156 {
4157 	struct sock *sk = p;
4158 
4159 	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
4160 		return true;
4161 
4162 	if (sk_is_udp(sk) &&
4163 	    !skb_queue_empty_lockless(&udp_sk(sk)->reader_queue))
4164 		return true;
4165 
4166 	return sk_busy_loop_timeout(sk, start_time);
4167 }
4168 EXPORT_SYMBOL(sk_busy_loop_end);
4169 #endif /* CONFIG_NET_RX_BUSY_POLL */
4170 
4171 int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len)
4172 {
4173 	if (!sk->sk_prot->bind_add)
4174 		return -EOPNOTSUPP;
4175 	return sk->sk_prot->bind_add(sk, addr, addr_len);
4176 }
4177 EXPORT_SYMBOL(sock_bind_add);
4178 
4179 /* Copy 'size' bytes from userspace and return `size` back to userspace */
4180 int sock_ioctl_inout(struct sock *sk, unsigned int cmd,
4181 		     void __user *arg, void *karg, size_t size)
4182 {
4183 	int ret;
4184 
4185 	if (copy_from_user(karg, arg, size))
4186 		return -EFAULT;
4187 
4188 	ret = READ_ONCE(sk->sk_prot)->ioctl(sk, cmd, karg);
4189 	if (ret)
4190 		return ret;
4191 
4192 	if (copy_to_user(arg, karg, size))
4193 		return -EFAULT;
4194 
4195 	return 0;
4196 }
4197 EXPORT_SYMBOL(sock_ioctl_inout);
4198 
4199 /* This is the most common ioctl prep function, where the result (4 bytes) is
4200  * copied back to userspace if the ioctl() returns successfully. No input is
4201  * copied from userspace as input argument.
4202  */
4203 static int sock_ioctl_out(struct sock *sk, unsigned int cmd, void __user *arg)
4204 {
4205 	int ret, karg = 0;
4206 
4207 	ret = READ_ONCE(sk->sk_prot)->ioctl(sk, cmd, &karg);
4208 	if (ret)
4209 		return ret;
4210 
4211 	return put_user(karg, (int __user *)arg);
4212 }
4213 
4214 /* A wrapper around sock ioctls, which copies the data from userspace
4215  * (depending on the protocol/ioctl), and copies back the result to userspace.
4216  * The main motivation for this function is to pass kernel memory to the
4217  * protocol ioctl callbacks, instead of userspace memory.
4218  */
4219 int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
4220 {
4221 	int rc = 1;
4222 
4223 	if (sk->sk_type == SOCK_RAW && sk->sk_family == AF_INET)
4224 		rc = ipmr_sk_ioctl(sk, cmd, arg);
4225 	else if (sk->sk_type == SOCK_RAW && sk->sk_family == AF_INET6)
4226 		rc = ip6mr_sk_ioctl(sk, cmd, arg);
4227 	else if (sk_is_phonet(sk))
4228 		rc = phonet_sk_ioctl(sk, cmd, arg);
4229 
4230 	/* If ioctl was processed, returns its value */
4231 	if (rc <= 0)
4232 		return rc;
4233 
4234 	/* Otherwise call the default handler */
4235 	return sock_ioctl_out(sk, cmd, arg);
4236 }
4237 EXPORT_SYMBOL(sk_ioctl);
4238 
4239 static int __init sock_struct_check(void)
4240 {
4241 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_drops);
4242 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_peek_off);
4243 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_error_queue);
4244 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_receive_queue);
4245 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_backlog);
4246 
4247 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rx_dst);
4248 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rx_dst_ifindex);
4249 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rx_dst_cookie);
4250 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rcvbuf);
4251 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_filter);
4252 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_wq);
4253 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_data_ready);
4254 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rcvtimeo);
4255 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rcvlowat);
4256 
4257 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_err);
4258 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_socket);
4259 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_memcg);
4260 
4261 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_lock);
4262 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_reserved_mem);
4263 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_forward_alloc);
4264 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_tsflags);
4265 
4266 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_omem_alloc);
4267 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_omem_alloc);
4268 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_sndbuf);
4269 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_wmem_queued);
4270 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_wmem_alloc);
4271 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_tsq_flags);
4272 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_send_head);
4273 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_write_queue);
4274 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_write_pending);
4275 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_dst_pending_confirm);
4276 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_pacing_status);
4277 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_frag);
4278 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_timer);
4279 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_pacing_rate);
4280 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_zckey);
4281 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_tskey);
4282 
4283 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_max_pacing_rate);
4284 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_sndtimeo);
4285 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_priority);
4286 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_mark);
4287 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_dst_cache);
4288 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_route_caps);
4289 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_type);
4290 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_max_size);
4291 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_allocation);
4292 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_txhash);
4293 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_max_segs);
4294 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_pacing_shift);
4295 	CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_use_task_frag);
4296 	return 0;
4297 }
4298 
4299 core_initcall(sock_struct_check);
4300