xref: /linux/net/core/sock.c (revision d8d9ba8dc9c77358cd7ea73e4e44e8952c9baf35)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * INET		An implementation of the TCP/IP protocol suite for the LINUX
4  *		operating system.  INET is implemented using the  BSD Socket
5  *		interface as the means of communication with the user level.
6  *
7  *		Generic socket support routines. Memory allocators, socket lock/release
8  *		handler for protocols to use and generic option handler.
9  *
10  * Authors:	Ross Biro
11  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *		Florian La Roche, <flla@stud.uni-sb.de>
13  *		Alan Cox, <A.Cox@swansea.ac.uk>
14  *
15  * Fixes:
16  *		Alan Cox	: 	Numerous verify_area() problems
17  *		Alan Cox	:	Connecting on a connecting socket
18  *					now returns an error for tcp.
19  *		Alan Cox	:	sock->protocol is set correctly.
20  *					and is not sometimes left as 0.
21  *		Alan Cox	:	connect handles icmp errors on a
22  *					connect properly. Unfortunately there
23  *					is a restart syscall nasty there. I
24  *					can't match BSD without hacking the C
25  *					library. Ideas urgently sought!
26  *		Alan Cox	:	Disallow bind() to addresses that are
27  *					not ours - especially broadcast ones!!
28  *		Alan Cox	:	Socket 1024 _IS_ ok for users. (fencepost)
29  *		Alan Cox	:	sock_wfree/sock_rfree don't destroy sockets,
30  *					instead they leave that for the DESTROY timer.
31  *		Alan Cox	:	Clean up error flag in accept
32  *		Alan Cox	:	TCP ack handling is buggy, the DESTROY timer
33  *					was buggy. Put a remove_sock() in the handler
34  *					for memory when we hit 0. Also altered the timer
35  *					code. The ACK stuff can wait and needs major
36  *					TCP layer surgery.
37  *		Alan Cox	:	Fixed TCP ack bug, removed remove sock
38  *					and fixed timer/inet_bh race.
39  *		Alan Cox	:	Added zapped flag for TCP
40  *		Alan Cox	:	Move kfree_skb into skbuff.c and tidied up surplus code
41  *		Alan Cox	:	for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
42  *		Alan Cox	:	kfree_s calls now are kfree_skbmem so we can track skb resources
43  *		Alan Cox	:	Supports socket option broadcast now as does udp. Packet and raw need fixing.
44  *		Alan Cox	:	Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
45  *		Rick Sladkey	:	Relaxed UDP rules for matching packets.
46  *		C.E.Hawkins	:	IFF_PROMISC/SIOCGHWADDR support
47  *	Pauline Middelink	:	identd support
48  *		Alan Cox	:	Fixed connect() taking signals I think.
49  *		Alan Cox	:	SO_LINGER supported
50  *		Alan Cox	:	Error reporting fixes
51  *		Anonymous	:	inet_create tidied up (sk->reuse setting)
52  *		Alan Cox	:	inet sockets don't set sk->type!
53  *		Alan Cox	:	Split socket option code
54  *		Alan Cox	:	Callbacks
55  *		Alan Cox	:	Nagle flag for Charles & Johannes stuff
56  *		Alex		:	Removed restriction on inet fioctl
57  *		Alan Cox	:	Splitting INET from NET core
58  *		Alan Cox	:	Fixed bogus SO_TYPE handling in getsockopt()
59  *		Adam Caldwell	:	Missing return in SO_DONTROUTE/SO_DEBUG code
60  *		Alan Cox	:	Split IP from generic code
61  *		Alan Cox	:	New kfree_skbmem()
62  *		Alan Cox	:	Make SO_DEBUG superuser only.
63  *		Alan Cox	:	Allow anyone to clear SO_DEBUG
64  *					(compatibility fix)
65  *		Alan Cox	:	Added optimistic memory grabbing for AF_UNIX throughput.
66  *		Alan Cox	:	Allocator for a socket is settable.
67  *		Alan Cox	:	SO_ERROR includes soft errors.
68  *		Alan Cox	:	Allow NULL arguments on some SO_ opts
69  *		Alan Cox	: 	Generic socket allocation to make hooks
70  *					easier (suggested by Craig Metz).
71  *		Michael Pall	:	SO_ERROR returns positive errno again
72  *              Steve Whitehouse:       Added default destructor to free
73  *                                      protocol private data.
74  *              Steve Whitehouse:       Added various other default routines
75  *                                      common to several socket families.
76  *              Chris Evans     :       Call suser() check last on F_SETOWN
77  *		Jay Schulist	:	Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
78  *		Andi Kleen	:	Add sock_kmalloc()/sock_kfree_s()
79  *		Andi Kleen	:	Fix write_space callback
80  *		Chris Evans	:	Security fixes - signedness again
81  *		Arnaldo C. Melo :       cleanups, use skb_queue_purge
82  *
83  * To Fix:
84  */
85 
86 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
87 
88 #include <asm/unaligned.h>
89 #include <linux/capability.h>
90 #include <linux/errno.h>
91 #include <linux/errqueue.h>
92 #include <linux/types.h>
93 #include <linux/socket.h>
94 #include <linux/in.h>
95 #include <linux/kernel.h>
96 #include <linux/module.h>
97 #include <linux/proc_fs.h>
98 #include <linux/seq_file.h>
99 #include <linux/sched.h>
100 #include <linux/sched/mm.h>
101 #include <linux/timer.h>
102 #include <linux/string.h>
103 #include <linux/sockios.h>
104 #include <linux/net.h>
105 #include <linux/mm.h>
106 #include <linux/slab.h>
107 #include <linux/interrupt.h>
108 #include <linux/poll.h>
109 #include <linux/tcp.h>
110 #include <linux/init.h>
111 #include <linux/highmem.h>
112 #include <linux/user_namespace.h>
113 #include <linux/static_key.h>
114 #include <linux/memcontrol.h>
115 #include <linux/prefetch.h>
116 #include <linux/compat.h>
117 
118 #include <linux/uaccess.h>
119 
120 #include <linux/netdevice.h>
121 #include <net/protocol.h>
122 #include <linux/skbuff.h>
123 #include <net/net_namespace.h>
124 #include <net/request_sock.h>
125 #include <net/sock.h>
126 #include <linux/net_tstamp.h>
127 #include <net/xfrm.h>
128 #include <linux/ipsec.h>
129 #include <net/cls_cgroup.h>
130 #include <net/netprio_cgroup.h>
131 #include <linux/sock_diag.h>
132 
133 #include <linux/filter.h>
134 #include <net/sock_reuseport.h>
135 #include <net/bpf_sk_storage.h>
136 
137 #include <trace/events/sock.h>
138 
139 #include <net/tcp.h>
140 #include <net/busy_poll.h>
141 
142 #include <linux/ethtool.h>
143 
144 static DEFINE_MUTEX(proto_list_mutex);
145 static LIST_HEAD(proto_list);
146 
147 static void sock_inuse_add(struct net *net, int val);
148 
149 /**
150  * sk_ns_capable - General socket capability test
151  * @sk: Socket to use a capability on or through
152  * @user_ns: The user namespace of the capability to use
153  * @cap: The capability to use
154  *
155  * Test to see if the opener of the socket had when the socket was
156  * created and the current process has the capability @cap in the user
157  * namespace @user_ns.
158  */
159 bool sk_ns_capable(const struct sock *sk,
160 		   struct user_namespace *user_ns, int cap)
161 {
162 	return file_ns_capable(sk->sk_socket->file, user_ns, cap) &&
163 		ns_capable(user_ns, cap);
164 }
165 EXPORT_SYMBOL(sk_ns_capable);
166 
167 /**
168  * sk_capable - Socket global capability test
169  * @sk: Socket to use a capability on or through
170  * @cap: The global capability to use
171  *
172  * Test to see if the opener of the socket had when the socket was
173  * created and the current process has the capability @cap in all user
174  * namespaces.
175  */
176 bool sk_capable(const struct sock *sk, int cap)
177 {
178 	return sk_ns_capable(sk, &init_user_ns, cap);
179 }
180 EXPORT_SYMBOL(sk_capable);
181 
182 /**
183  * sk_net_capable - Network namespace socket capability test
184  * @sk: Socket to use a capability on or through
185  * @cap: The capability to use
186  *
187  * Test to see if the opener of the socket had when the socket was created
188  * and the current process has the capability @cap over the network namespace
189  * the socket is a member of.
190  */
191 bool sk_net_capable(const struct sock *sk, int cap)
192 {
193 	return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
194 }
195 EXPORT_SYMBOL(sk_net_capable);
196 
197 /*
198  * Each address family might have different locking rules, so we have
199  * one slock key per address family and separate keys for internal and
200  * userspace sockets.
201  */
202 static struct lock_class_key af_family_keys[AF_MAX];
203 static struct lock_class_key af_family_kern_keys[AF_MAX];
204 static struct lock_class_key af_family_slock_keys[AF_MAX];
205 static struct lock_class_key af_family_kern_slock_keys[AF_MAX];
206 
207 /*
208  * Make lock validator output more readable. (we pre-construct these
209  * strings build-time, so that runtime initialization of socket
210  * locks is fast):
211  */
212 
213 #define _sock_locks(x)						  \
214   x "AF_UNSPEC",	x "AF_UNIX"     ,	x "AF_INET"     , \
215   x "AF_AX25"  ,	x "AF_IPX"      ,	x "AF_APPLETALK", \
216   x "AF_NETROM",	x "AF_BRIDGE"   ,	x "AF_ATMPVC"   , \
217   x "AF_X25"   ,	x "AF_INET6"    ,	x "AF_ROSE"     , \
218   x "AF_DECnet",	x "AF_NETBEUI"  ,	x "AF_SECURITY" , \
219   x "AF_KEY"   ,	x "AF_NETLINK"  ,	x "AF_PACKET"   , \
220   x "AF_ASH"   ,	x "AF_ECONET"   ,	x "AF_ATMSVC"   , \
221   x "AF_RDS"   ,	x "AF_SNA"      ,	x "AF_IRDA"     , \
222   x "AF_PPPOX" ,	x "AF_WANPIPE"  ,	x "AF_LLC"      , \
223   x "27"       ,	x "28"          ,	x "AF_CAN"      , \
224   x "AF_TIPC"  ,	x "AF_BLUETOOTH",	x "IUCV"        , \
225   x "AF_RXRPC" ,	x "AF_ISDN"     ,	x "AF_PHONET"   , \
226   x "AF_IEEE802154",	x "AF_CAIF"	,	x "AF_ALG"      , \
227   x "AF_NFC"   ,	x "AF_VSOCK"    ,	x "AF_KCM"      , \
228   x "AF_QIPCRTR",	x "AF_SMC"	,	x "AF_XDP"	, \
229   x "AF_MCTP"  , \
230   x "AF_MAX"
231 
232 static const char *const af_family_key_strings[AF_MAX+1] = {
233 	_sock_locks("sk_lock-")
234 };
235 static const char *const af_family_slock_key_strings[AF_MAX+1] = {
236 	_sock_locks("slock-")
237 };
238 static const char *const af_family_clock_key_strings[AF_MAX+1] = {
239 	_sock_locks("clock-")
240 };
241 
242 static const char *const af_family_kern_key_strings[AF_MAX+1] = {
243 	_sock_locks("k-sk_lock-")
244 };
245 static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = {
246 	_sock_locks("k-slock-")
247 };
248 static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = {
249 	_sock_locks("k-clock-")
250 };
251 static const char *const af_family_rlock_key_strings[AF_MAX+1] = {
252 	_sock_locks("rlock-")
253 };
254 static const char *const af_family_wlock_key_strings[AF_MAX+1] = {
255 	_sock_locks("wlock-")
256 };
257 static const char *const af_family_elock_key_strings[AF_MAX+1] = {
258 	_sock_locks("elock-")
259 };
260 
261 /*
262  * sk_callback_lock and sk queues locking rules are per-address-family,
263  * so split the lock classes by using a per-AF key:
264  */
265 static struct lock_class_key af_callback_keys[AF_MAX];
266 static struct lock_class_key af_rlock_keys[AF_MAX];
267 static struct lock_class_key af_wlock_keys[AF_MAX];
268 static struct lock_class_key af_elock_keys[AF_MAX];
269 static struct lock_class_key af_kern_callback_keys[AF_MAX];
270 
271 /* Run time adjustable parameters. */
272 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
273 EXPORT_SYMBOL(sysctl_wmem_max);
274 __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
275 EXPORT_SYMBOL(sysctl_rmem_max);
276 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
277 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
278 
279 /* Maximal space eaten by iovec or ancillary data plus some space */
280 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
281 EXPORT_SYMBOL(sysctl_optmem_max);
282 
283 int sysctl_tstamp_allow_data __read_mostly = 1;
284 
285 DEFINE_STATIC_KEY_FALSE(memalloc_socks_key);
286 EXPORT_SYMBOL_GPL(memalloc_socks_key);
287 
288 /**
289  * sk_set_memalloc - sets %SOCK_MEMALLOC
290  * @sk: socket to set it on
291  *
292  * Set %SOCK_MEMALLOC on a socket for access to emergency reserves.
293  * It's the responsibility of the admin to adjust min_free_kbytes
294  * to meet the requirements
295  */
296 void sk_set_memalloc(struct sock *sk)
297 {
298 	sock_set_flag(sk, SOCK_MEMALLOC);
299 	sk->sk_allocation |= __GFP_MEMALLOC;
300 	static_branch_inc(&memalloc_socks_key);
301 }
302 EXPORT_SYMBOL_GPL(sk_set_memalloc);
303 
304 void sk_clear_memalloc(struct sock *sk)
305 {
306 	sock_reset_flag(sk, SOCK_MEMALLOC);
307 	sk->sk_allocation &= ~__GFP_MEMALLOC;
308 	static_branch_dec(&memalloc_socks_key);
309 
310 	/*
311 	 * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
312 	 * progress of swapping. SOCK_MEMALLOC may be cleared while
313 	 * it has rmem allocations due to the last swapfile being deactivated
314 	 * but there is a risk that the socket is unusable due to exceeding
315 	 * the rmem limits. Reclaim the reserves and obey rmem limits again.
316 	 */
317 	sk_mem_reclaim(sk);
318 }
319 EXPORT_SYMBOL_GPL(sk_clear_memalloc);
320 
321 int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
322 {
323 	int ret;
324 	unsigned int noreclaim_flag;
325 
326 	/* these should have been dropped before queueing */
327 	BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
328 
329 	noreclaim_flag = memalloc_noreclaim_save();
330 	ret = sk->sk_backlog_rcv(sk, skb);
331 	memalloc_noreclaim_restore(noreclaim_flag);
332 
333 	return ret;
334 }
335 EXPORT_SYMBOL(__sk_backlog_rcv);
336 
337 void sk_error_report(struct sock *sk)
338 {
339 	sk->sk_error_report(sk);
340 
341 	switch (sk->sk_family) {
342 	case AF_INET:
343 		fallthrough;
344 	case AF_INET6:
345 		trace_inet_sk_error_report(sk);
346 		break;
347 	default:
348 		break;
349 	}
350 }
351 EXPORT_SYMBOL(sk_error_report);
352 
353 static int sock_get_timeout(long timeo, void *optval, bool old_timeval)
354 {
355 	struct __kernel_sock_timeval tv;
356 
357 	if (timeo == MAX_SCHEDULE_TIMEOUT) {
358 		tv.tv_sec = 0;
359 		tv.tv_usec = 0;
360 	} else {
361 		tv.tv_sec = timeo / HZ;
362 		tv.tv_usec = ((timeo % HZ) * USEC_PER_SEC) / HZ;
363 	}
364 
365 	if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
366 		struct old_timeval32 tv32 = { tv.tv_sec, tv.tv_usec };
367 		*(struct old_timeval32 *)optval = tv32;
368 		return sizeof(tv32);
369 	}
370 
371 	if (old_timeval) {
372 		struct __kernel_old_timeval old_tv;
373 		old_tv.tv_sec = tv.tv_sec;
374 		old_tv.tv_usec = tv.tv_usec;
375 		*(struct __kernel_old_timeval *)optval = old_tv;
376 		return sizeof(old_tv);
377 	}
378 
379 	*(struct __kernel_sock_timeval *)optval = tv;
380 	return sizeof(tv);
381 }
382 
383 static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
384 			    bool old_timeval)
385 {
386 	struct __kernel_sock_timeval tv;
387 
388 	if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
389 		struct old_timeval32 tv32;
390 
391 		if (optlen < sizeof(tv32))
392 			return -EINVAL;
393 
394 		if (copy_from_sockptr(&tv32, optval, sizeof(tv32)))
395 			return -EFAULT;
396 		tv.tv_sec = tv32.tv_sec;
397 		tv.tv_usec = tv32.tv_usec;
398 	} else if (old_timeval) {
399 		struct __kernel_old_timeval old_tv;
400 
401 		if (optlen < sizeof(old_tv))
402 			return -EINVAL;
403 		if (copy_from_sockptr(&old_tv, optval, sizeof(old_tv)))
404 			return -EFAULT;
405 		tv.tv_sec = old_tv.tv_sec;
406 		tv.tv_usec = old_tv.tv_usec;
407 	} else {
408 		if (optlen < sizeof(tv))
409 			return -EINVAL;
410 		if (copy_from_sockptr(&tv, optval, sizeof(tv)))
411 			return -EFAULT;
412 	}
413 	if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
414 		return -EDOM;
415 
416 	if (tv.tv_sec < 0) {
417 		static int warned __read_mostly;
418 
419 		*timeo_p = 0;
420 		if (warned < 10 && net_ratelimit()) {
421 			warned++;
422 			pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
423 				__func__, current->comm, task_pid_nr(current));
424 		}
425 		return 0;
426 	}
427 	*timeo_p = MAX_SCHEDULE_TIMEOUT;
428 	if (tv.tv_sec == 0 && tv.tv_usec == 0)
429 		return 0;
430 	if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1))
431 		*timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec, USEC_PER_SEC / HZ);
432 	return 0;
433 }
434 
435 static bool sock_needs_netstamp(const struct sock *sk)
436 {
437 	switch (sk->sk_family) {
438 	case AF_UNSPEC:
439 	case AF_UNIX:
440 		return false;
441 	default:
442 		return true;
443 	}
444 }
445 
446 static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
447 {
448 	if (sk->sk_flags & flags) {
449 		sk->sk_flags &= ~flags;
450 		if (sock_needs_netstamp(sk) &&
451 		    !(sk->sk_flags & SK_FLAGS_TIMESTAMP))
452 			net_disable_timestamp();
453 	}
454 }
455 
456 
457 int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
458 {
459 	unsigned long flags;
460 	struct sk_buff_head *list = &sk->sk_receive_queue;
461 
462 	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
463 		atomic_inc(&sk->sk_drops);
464 		trace_sock_rcvqueue_full(sk, skb);
465 		return -ENOMEM;
466 	}
467 
468 	if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
469 		atomic_inc(&sk->sk_drops);
470 		return -ENOBUFS;
471 	}
472 
473 	skb->dev = NULL;
474 	skb_set_owner_r(skb, sk);
475 
476 	/* we escape from rcu protected region, make sure we dont leak
477 	 * a norefcounted dst
478 	 */
479 	skb_dst_force(skb);
480 
481 	spin_lock_irqsave(&list->lock, flags);
482 	sock_skb_set_dropcount(sk, skb);
483 	__skb_queue_tail(list, skb);
484 	spin_unlock_irqrestore(&list->lock, flags);
485 
486 	if (!sock_flag(sk, SOCK_DEAD))
487 		sk->sk_data_ready(sk);
488 	return 0;
489 }
490 EXPORT_SYMBOL(__sock_queue_rcv_skb);
491 
492 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
493 {
494 	int err;
495 
496 	err = sk_filter(sk, skb);
497 	if (err)
498 		return err;
499 
500 	return __sock_queue_rcv_skb(sk, skb);
501 }
502 EXPORT_SYMBOL(sock_queue_rcv_skb);
503 
504 int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
505 		     const int nested, unsigned int trim_cap, bool refcounted)
506 {
507 	int rc = NET_RX_SUCCESS;
508 
509 	if (sk_filter_trim_cap(sk, skb, trim_cap))
510 		goto discard_and_relse;
511 
512 	skb->dev = NULL;
513 
514 	if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
515 		atomic_inc(&sk->sk_drops);
516 		goto discard_and_relse;
517 	}
518 	if (nested)
519 		bh_lock_sock_nested(sk);
520 	else
521 		bh_lock_sock(sk);
522 	if (!sock_owned_by_user(sk)) {
523 		/*
524 		 * trylock + unlock semantics:
525 		 */
526 		mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
527 
528 		rc = sk_backlog_rcv(sk, skb);
529 
530 		mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
531 	} else if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) {
532 		bh_unlock_sock(sk);
533 		atomic_inc(&sk->sk_drops);
534 		goto discard_and_relse;
535 	}
536 
537 	bh_unlock_sock(sk);
538 out:
539 	if (refcounted)
540 		sock_put(sk);
541 	return rc;
542 discard_and_relse:
543 	kfree_skb(skb);
544 	goto out;
545 }
546 EXPORT_SYMBOL(__sk_receive_skb);
547 
548 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ip6_dst_check(struct dst_entry *,
549 							  u32));
550 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
551 							   u32));
552 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
553 {
554 	struct dst_entry *dst = __sk_dst_get(sk);
555 
556 	if (dst && dst->obsolete &&
557 	    INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check,
558 			       dst, cookie) == NULL) {
559 		sk_tx_queue_clear(sk);
560 		sk->sk_dst_pending_confirm = 0;
561 		RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
562 		dst_release(dst);
563 		return NULL;
564 	}
565 
566 	return dst;
567 }
568 EXPORT_SYMBOL(__sk_dst_check);
569 
570 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
571 {
572 	struct dst_entry *dst = sk_dst_get(sk);
573 
574 	if (dst && dst->obsolete &&
575 	    INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check,
576 			       dst, cookie) == NULL) {
577 		sk_dst_reset(sk);
578 		dst_release(dst);
579 		return NULL;
580 	}
581 
582 	return dst;
583 }
584 EXPORT_SYMBOL(sk_dst_check);
585 
586 static int sock_bindtoindex_locked(struct sock *sk, int ifindex)
587 {
588 	int ret = -ENOPROTOOPT;
589 #ifdef CONFIG_NETDEVICES
590 	struct net *net = sock_net(sk);
591 
592 	/* Sorry... */
593 	ret = -EPERM;
594 	if (sk->sk_bound_dev_if && !ns_capable(net->user_ns, CAP_NET_RAW))
595 		goto out;
596 
597 	ret = -EINVAL;
598 	if (ifindex < 0)
599 		goto out;
600 
601 	sk->sk_bound_dev_if = ifindex;
602 	if (sk->sk_prot->rehash)
603 		sk->sk_prot->rehash(sk);
604 	sk_dst_reset(sk);
605 
606 	ret = 0;
607 
608 out:
609 #endif
610 
611 	return ret;
612 }
613 
614 int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk)
615 {
616 	int ret;
617 
618 	if (lock_sk)
619 		lock_sock(sk);
620 	ret = sock_bindtoindex_locked(sk, ifindex);
621 	if (lock_sk)
622 		release_sock(sk);
623 
624 	return ret;
625 }
626 EXPORT_SYMBOL(sock_bindtoindex);
627 
628 static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen)
629 {
630 	int ret = -ENOPROTOOPT;
631 #ifdef CONFIG_NETDEVICES
632 	struct net *net = sock_net(sk);
633 	char devname[IFNAMSIZ];
634 	int index;
635 
636 	ret = -EINVAL;
637 	if (optlen < 0)
638 		goto out;
639 
640 	/* Bind this socket to a particular device like "eth0",
641 	 * as specified in the passed interface name. If the
642 	 * name is "" or the option length is zero the socket
643 	 * is not bound.
644 	 */
645 	if (optlen > IFNAMSIZ - 1)
646 		optlen = IFNAMSIZ - 1;
647 	memset(devname, 0, sizeof(devname));
648 
649 	ret = -EFAULT;
650 	if (copy_from_sockptr(devname, optval, optlen))
651 		goto out;
652 
653 	index = 0;
654 	if (devname[0] != '\0') {
655 		struct net_device *dev;
656 
657 		rcu_read_lock();
658 		dev = dev_get_by_name_rcu(net, devname);
659 		if (dev)
660 			index = dev->ifindex;
661 		rcu_read_unlock();
662 		ret = -ENODEV;
663 		if (!dev)
664 			goto out;
665 	}
666 
667 	return sock_bindtoindex(sk, index, true);
668 out:
669 #endif
670 
671 	return ret;
672 }
673 
674 static int sock_getbindtodevice(struct sock *sk, char __user *optval,
675 				int __user *optlen, int len)
676 {
677 	int ret = -ENOPROTOOPT;
678 #ifdef CONFIG_NETDEVICES
679 	struct net *net = sock_net(sk);
680 	char devname[IFNAMSIZ];
681 
682 	if (sk->sk_bound_dev_if == 0) {
683 		len = 0;
684 		goto zero;
685 	}
686 
687 	ret = -EINVAL;
688 	if (len < IFNAMSIZ)
689 		goto out;
690 
691 	ret = netdev_get_name(net, devname, sk->sk_bound_dev_if);
692 	if (ret)
693 		goto out;
694 
695 	len = strlen(devname) + 1;
696 
697 	ret = -EFAULT;
698 	if (copy_to_user(optval, devname, len))
699 		goto out;
700 
701 zero:
702 	ret = -EFAULT;
703 	if (put_user(len, optlen))
704 		goto out;
705 
706 	ret = 0;
707 
708 out:
709 #endif
710 
711 	return ret;
712 }
713 
714 bool sk_mc_loop(struct sock *sk)
715 {
716 	if (dev_recursion_level())
717 		return false;
718 	if (!sk)
719 		return true;
720 	switch (sk->sk_family) {
721 	case AF_INET:
722 		return inet_sk(sk)->mc_loop;
723 #if IS_ENABLED(CONFIG_IPV6)
724 	case AF_INET6:
725 		return inet6_sk(sk)->mc_loop;
726 #endif
727 	}
728 	WARN_ON_ONCE(1);
729 	return true;
730 }
731 EXPORT_SYMBOL(sk_mc_loop);
732 
733 void sock_set_reuseaddr(struct sock *sk)
734 {
735 	lock_sock(sk);
736 	sk->sk_reuse = SK_CAN_REUSE;
737 	release_sock(sk);
738 }
739 EXPORT_SYMBOL(sock_set_reuseaddr);
740 
741 void sock_set_reuseport(struct sock *sk)
742 {
743 	lock_sock(sk);
744 	sk->sk_reuseport = true;
745 	release_sock(sk);
746 }
747 EXPORT_SYMBOL(sock_set_reuseport);
748 
749 void sock_no_linger(struct sock *sk)
750 {
751 	lock_sock(sk);
752 	sk->sk_lingertime = 0;
753 	sock_set_flag(sk, SOCK_LINGER);
754 	release_sock(sk);
755 }
756 EXPORT_SYMBOL(sock_no_linger);
757 
758 void sock_set_priority(struct sock *sk, u32 priority)
759 {
760 	lock_sock(sk);
761 	sk->sk_priority = priority;
762 	release_sock(sk);
763 }
764 EXPORT_SYMBOL(sock_set_priority);
765 
766 void sock_set_sndtimeo(struct sock *sk, s64 secs)
767 {
768 	lock_sock(sk);
769 	if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1)
770 		sk->sk_sndtimeo = secs * HZ;
771 	else
772 		sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
773 	release_sock(sk);
774 }
775 EXPORT_SYMBOL(sock_set_sndtimeo);
776 
777 static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns)
778 {
779 	if (val)  {
780 		sock_valbool_flag(sk, SOCK_TSTAMP_NEW, new);
781 		sock_valbool_flag(sk, SOCK_RCVTSTAMPNS, ns);
782 		sock_set_flag(sk, SOCK_RCVTSTAMP);
783 		sock_enable_timestamp(sk, SOCK_TIMESTAMP);
784 	} else {
785 		sock_reset_flag(sk, SOCK_RCVTSTAMP);
786 		sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
787 	}
788 }
789 
790 void sock_enable_timestamps(struct sock *sk)
791 {
792 	lock_sock(sk);
793 	__sock_set_timestamps(sk, true, false, true);
794 	release_sock(sk);
795 }
796 EXPORT_SYMBOL(sock_enable_timestamps);
797 
798 void sock_set_timestamp(struct sock *sk, int optname, bool valbool)
799 {
800 	switch (optname) {
801 	case SO_TIMESTAMP_OLD:
802 		__sock_set_timestamps(sk, valbool, false, false);
803 		break;
804 	case SO_TIMESTAMP_NEW:
805 		__sock_set_timestamps(sk, valbool, true, false);
806 		break;
807 	case SO_TIMESTAMPNS_OLD:
808 		__sock_set_timestamps(sk, valbool, false, true);
809 		break;
810 	case SO_TIMESTAMPNS_NEW:
811 		__sock_set_timestamps(sk, valbool, true, true);
812 		break;
813 	}
814 }
815 
816 static int sock_timestamping_bind_phc(struct sock *sk, int phc_index)
817 {
818 	struct net *net = sock_net(sk);
819 	struct net_device *dev = NULL;
820 	bool match = false;
821 	int *vclock_index;
822 	int i, num;
823 
824 	if (sk->sk_bound_dev_if)
825 		dev = dev_get_by_index(net, sk->sk_bound_dev_if);
826 
827 	if (!dev) {
828 		pr_err("%s: sock not bind to device\n", __func__);
829 		return -EOPNOTSUPP;
830 	}
831 
832 	num = ethtool_get_phc_vclocks(dev, &vclock_index);
833 	for (i = 0; i < num; i++) {
834 		if (*(vclock_index + i) == phc_index) {
835 			match = true;
836 			break;
837 		}
838 	}
839 
840 	if (num > 0)
841 		kfree(vclock_index);
842 
843 	if (!match)
844 		return -EINVAL;
845 
846 	sk->sk_bind_phc = phc_index;
847 
848 	return 0;
849 }
850 
851 int sock_set_timestamping(struct sock *sk, int optname,
852 			  struct so_timestamping timestamping)
853 {
854 	int val = timestamping.flags;
855 	int ret;
856 
857 	if (val & ~SOF_TIMESTAMPING_MASK)
858 		return -EINVAL;
859 
860 	if (val & SOF_TIMESTAMPING_OPT_ID &&
861 	    !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
862 		if (sk->sk_protocol == IPPROTO_TCP &&
863 		    sk->sk_type == SOCK_STREAM) {
864 			if ((1 << sk->sk_state) &
865 			    (TCPF_CLOSE | TCPF_LISTEN))
866 				return -EINVAL;
867 			sk->sk_tskey = tcp_sk(sk)->snd_una;
868 		} else {
869 			sk->sk_tskey = 0;
870 		}
871 	}
872 
873 	if (val & SOF_TIMESTAMPING_OPT_STATS &&
874 	    !(val & SOF_TIMESTAMPING_OPT_TSONLY))
875 		return -EINVAL;
876 
877 	if (val & SOF_TIMESTAMPING_BIND_PHC) {
878 		ret = sock_timestamping_bind_phc(sk, timestamping.bind_phc);
879 		if (ret)
880 			return ret;
881 	}
882 
883 	sk->sk_tsflags = val;
884 	sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
885 
886 	if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
887 		sock_enable_timestamp(sk,
888 				      SOCK_TIMESTAMPING_RX_SOFTWARE);
889 	else
890 		sock_disable_timestamp(sk,
891 				       (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
892 	return 0;
893 }
894 
895 void sock_set_keepalive(struct sock *sk)
896 {
897 	lock_sock(sk);
898 	if (sk->sk_prot->keepalive)
899 		sk->sk_prot->keepalive(sk, true);
900 	sock_valbool_flag(sk, SOCK_KEEPOPEN, true);
901 	release_sock(sk);
902 }
903 EXPORT_SYMBOL(sock_set_keepalive);
904 
905 static void __sock_set_rcvbuf(struct sock *sk, int val)
906 {
907 	/* Ensure val * 2 fits into an int, to prevent max_t() from treating it
908 	 * as a negative value.
909 	 */
910 	val = min_t(int, val, INT_MAX / 2);
911 	sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
912 
913 	/* We double it on the way in to account for "struct sk_buff" etc.
914 	 * overhead.   Applications assume that the SO_RCVBUF setting they make
915 	 * will allow that much actual data to be received on that socket.
916 	 *
917 	 * Applications are unaware that "struct sk_buff" and other overheads
918 	 * allocate from the receive buffer during socket buffer allocation.
919 	 *
920 	 * And after considering the possible alternatives, returning the value
921 	 * we actually used in getsockopt is the most desirable behavior.
922 	 */
923 	WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF));
924 }
925 
926 void sock_set_rcvbuf(struct sock *sk, int val)
927 {
928 	lock_sock(sk);
929 	__sock_set_rcvbuf(sk, val);
930 	release_sock(sk);
931 }
932 EXPORT_SYMBOL(sock_set_rcvbuf);
933 
934 static void __sock_set_mark(struct sock *sk, u32 val)
935 {
936 	if (val != sk->sk_mark) {
937 		sk->sk_mark = val;
938 		sk_dst_reset(sk);
939 	}
940 }
941 
942 void sock_set_mark(struct sock *sk, u32 val)
943 {
944 	lock_sock(sk);
945 	__sock_set_mark(sk, val);
946 	release_sock(sk);
947 }
948 EXPORT_SYMBOL(sock_set_mark);
949 
950 /*
951  *	This is meant for all protocols to use and covers goings on
952  *	at the socket level. Everything here is generic.
953  */
954 
955 int sock_setsockopt(struct socket *sock, int level, int optname,
956 		    sockptr_t optval, unsigned int optlen)
957 {
958 	struct so_timestamping timestamping;
959 	struct sock_txtime sk_txtime;
960 	struct sock *sk = sock->sk;
961 	int val;
962 	int valbool;
963 	struct linger ling;
964 	int ret = 0;
965 
966 	/*
967 	 *	Options without arguments
968 	 */
969 
970 	if (optname == SO_BINDTODEVICE)
971 		return sock_setbindtodevice(sk, optval, optlen);
972 
973 	if (optlen < sizeof(int))
974 		return -EINVAL;
975 
976 	if (copy_from_sockptr(&val, optval, sizeof(val)))
977 		return -EFAULT;
978 
979 	valbool = val ? 1 : 0;
980 
981 	lock_sock(sk);
982 
983 	switch (optname) {
984 	case SO_DEBUG:
985 		if (val && !capable(CAP_NET_ADMIN))
986 			ret = -EACCES;
987 		else
988 			sock_valbool_flag(sk, SOCK_DBG, valbool);
989 		break;
990 	case SO_REUSEADDR:
991 		sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
992 		break;
993 	case SO_REUSEPORT:
994 		sk->sk_reuseport = valbool;
995 		break;
996 	case SO_TYPE:
997 	case SO_PROTOCOL:
998 	case SO_DOMAIN:
999 	case SO_ERROR:
1000 		ret = -ENOPROTOOPT;
1001 		break;
1002 	case SO_DONTROUTE:
1003 		sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
1004 		sk_dst_reset(sk);
1005 		break;
1006 	case SO_BROADCAST:
1007 		sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
1008 		break;
1009 	case SO_SNDBUF:
1010 		/* Don't error on this BSD doesn't and if you think
1011 		 * about it this is right. Otherwise apps have to
1012 		 * play 'guess the biggest size' games. RCVBUF/SNDBUF
1013 		 * are treated in BSD as hints
1014 		 */
1015 		val = min_t(u32, val, sysctl_wmem_max);
1016 set_sndbuf:
1017 		/* Ensure val * 2 fits into an int, to prevent max_t()
1018 		 * from treating it as a negative value.
1019 		 */
1020 		val = min_t(int, val, INT_MAX / 2);
1021 		sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
1022 		WRITE_ONCE(sk->sk_sndbuf,
1023 			   max_t(int, val * 2, SOCK_MIN_SNDBUF));
1024 		/* Wake up sending tasks if we upped the value. */
1025 		sk->sk_write_space(sk);
1026 		break;
1027 
1028 	case SO_SNDBUFFORCE:
1029 		if (!capable(CAP_NET_ADMIN)) {
1030 			ret = -EPERM;
1031 			break;
1032 		}
1033 
1034 		/* No negative values (to prevent underflow, as val will be
1035 		 * multiplied by 2).
1036 		 */
1037 		if (val < 0)
1038 			val = 0;
1039 		goto set_sndbuf;
1040 
1041 	case SO_RCVBUF:
1042 		/* Don't error on this BSD doesn't and if you think
1043 		 * about it this is right. Otherwise apps have to
1044 		 * play 'guess the biggest size' games. RCVBUF/SNDBUF
1045 		 * are treated in BSD as hints
1046 		 */
1047 		__sock_set_rcvbuf(sk, min_t(u32, val, sysctl_rmem_max));
1048 		break;
1049 
1050 	case SO_RCVBUFFORCE:
1051 		if (!capable(CAP_NET_ADMIN)) {
1052 			ret = -EPERM;
1053 			break;
1054 		}
1055 
1056 		/* No negative values (to prevent underflow, as val will be
1057 		 * multiplied by 2).
1058 		 */
1059 		__sock_set_rcvbuf(sk, max(val, 0));
1060 		break;
1061 
1062 	case SO_KEEPALIVE:
1063 		if (sk->sk_prot->keepalive)
1064 			sk->sk_prot->keepalive(sk, valbool);
1065 		sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
1066 		break;
1067 
1068 	case SO_OOBINLINE:
1069 		sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
1070 		break;
1071 
1072 	case SO_NO_CHECK:
1073 		sk->sk_no_check_tx = valbool;
1074 		break;
1075 
1076 	case SO_PRIORITY:
1077 		if ((val >= 0 && val <= 6) ||
1078 		    ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1079 			sk->sk_priority = val;
1080 		else
1081 			ret = -EPERM;
1082 		break;
1083 
1084 	case SO_LINGER:
1085 		if (optlen < sizeof(ling)) {
1086 			ret = -EINVAL;	/* 1003.1g */
1087 			break;
1088 		}
1089 		if (copy_from_sockptr(&ling, optval, sizeof(ling))) {
1090 			ret = -EFAULT;
1091 			break;
1092 		}
1093 		if (!ling.l_onoff)
1094 			sock_reset_flag(sk, SOCK_LINGER);
1095 		else {
1096 #if (BITS_PER_LONG == 32)
1097 			if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
1098 				sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
1099 			else
1100 #endif
1101 				sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
1102 			sock_set_flag(sk, SOCK_LINGER);
1103 		}
1104 		break;
1105 
1106 	case SO_BSDCOMPAT:
1107 		break;
1108 
1109 	case SO_PASSCRED:
1110 		if (valbool)
1111 			set_bit(SOCK_PASSCRED, &sock->flags);
1112 		else
1113 			clear_bit(SOCK_PASSCRED, &sock->flags);
1114 		break;
1115 
1116 	case SO_TIMESTAMP_OLD:
1117 	case SO_TIMESTAMP_NEW:
1118 	case SO_TIMESTAMPNS_OLD:
1119 	case SO_TIMESTAMPNS_NEW:
1120 		sock_set_timestamp(sk, optname, valbool);
1121 		break;
1122 
1123 	case SO_TIMESTAMPING_NEW:
1124 	case SO_TIMESTAMPING_OLD:
1125 		if (optlen == sizeof(timestamping)) {
1126 			if (copy_from_sockptr(&timestamping, optval,
1127 					      sizeof(timestamping))) {
1128 				ret = -EFAULT;
1129 				break;
1130 			}
1131 		} else {
1132 			memset(&timestamping, 0, sizeof(timestamping));
1133 			timestamping.flags = val;
1134 		}
1135 		ret = sock_set_timestamping(sk, optname, timestamping);
1136 		break;
1137 
1138 	case SO_RCVLOWAT:
1139 		if (val < 0)
1140 			val = INT_MAX;
1141 		if (sock->ops->set_rcvlowat)
1142 			ret = sock->ops->set_rcvlowat(sk, val);
1143 		else
1144 			WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
1145 		break;
1146 
1147 	case SO_RCVTIMEO_OLD:
1148 	case SO_RCVTIMEO_NEW:
1149 		ret = sock_set_timeout(&sk->sk_rcvtimeo, optval,
1150 				       optlen, optname == SO_RCVTIMEO_OLD);
1151 		break;
1152 
1153 	case SO_SNDTIMEO_OLD:
1154 	case SO_SNDTIMEO_NEW:
1155 		ret = sock_set_timeout(&sk->sk_sndtimeo, optval,
1156 				       optlen, optname == SO_SNDTIMEO_OLD);
1157 		break;
1158 
1159 	case SO_ATTACH_FILTER: {
1160 		struct sock_fprog fprog;
1161 
1162 		ret = copy_bpf_fprog_from_user(&fprog, optval, optlen);
1163 		if (!ret)
1164 			ret = sk_attach_filter(&fprog, sk);
1165 		break;
1166 	}
1167 	case SO_ATTACH_BPF:
1168 		ret = -EINVAL;
1169 		if (optlen == sizeof(u32)) {
1170 			u32 ufd;
1171 
1172 			ret = -EFAULT;
1173 			if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
1174 				break;
1175 
1176 			ret = sk_attach_bpf(ufd, sk);
1177 		}
1178 		break;
1179 
1180 	case SO_ATTACH_REUSEPORT_CBPF: {
1181 		struct sock_fprog fprog;
1182 
1183 		ret = copy_bpf_fprog_from_user(&fprog, optval, optlen);
1184 		if (!ret)
1185 			ret = sk_reuseport_attach_filter(&fprog, sk);
1186 		break;
1187 	}
1188 	case SO_ATTACH_REUSEPORT_EBPF:
1189 		ret = -EINVAL;
1190 		if (optlen == sizeof(u32)) {
1191 			u32 ufd;
1192 
1193 			ret = -EFAULT;
1194 			if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
1195 				break;
1196 
1197 			ret = sk_reuseport_attach_bpf(ufd, sk);
1198 		}
1199 		break;
1200 
1201 	case SO_DETACH_REUSEPORT_BPF:
1202 		ret = reuseport_detach_prog(sk);
1203 		break;
1204 
1205 	case SO_DETACH_FILTER:
1206 		ret = sk_detach_filter(sk);
1207 		break;
1208 
1209 	case SO_LOCK_FILTER:
1210 		if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
1211 			ret = -EPERM;
1212 		else
1213 			sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
1214 		break;
1215 
1216 	case SO_PASSSEC:
1217 		if (valbool)
1218 			set_bit(SOCK_PASSSEC, &sock->flags);
1219 		else
1220 			clear_bit(SOCK_PASSSEC, &sock->flags);
1221 		break;
1222 	case SO_MARK:
1223 		if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1224 			ret = -EPERM;
1225 			break;
1226 		}
1227 
1228 		__sock_set_mark(sk, val);
1229 		break;
1230 
1231 	case SO_RXQ_OVFL:
1232 		sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
1233 		break;
1234 
1235 	case SO_WIFI_STATUS:
1236 		sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
1237 		break;
1238 
1239 	case SO_PEEK_OFF:
1240 		if (sock->ops->set_peek_off)
1241 			ret = sock->ops->set_peek_off(sk, val);
1242 		else
1243 			ret = -EOPNOTSUPP;
1244 		break;
1245 
1246 	case SO_NOFCS:
1247 		sock_valbool_flag(sk, SOCK_NOFCS, valbool);
1248 		break;
1249 
1250 	case SO_SELECT_ERR_QUEUE:
1251 		sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
1252 		break;
1253 
1254 #ifdef CONFIG_NET_RX_BUSY_POLL
1255 	case SO_BUSY_POLL:
1256 		/* allow unprivileged users to decrease the value */
1257 		if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN))
1258 			ret = -EPERM;
1259 		else {
1260 			if (val < 0)
1261 				ret = -EINVAL;
1262 			else
1263 				WRITE_ONCE(sk->sk_ll_usec, val);
1264 		}
1265 		break;
1266 	case SO_PREFER_BUSY_POLL:
1267 		if (valbool && !capable(CAP_NET_ADMIN))
1268 			ret = -EPERM;
1269 		else
1270 			WRITE_ONCE(sk->sk_prefer_busy_poll, valbool);
1271 		break;
1272 	case SO_BUSY_POLL_BUDGET:
1273 		if (val > READ_ONCE(sk->sk_busy_poll_budget) && !capable(CAP_NET_ADMIN)) {
1274 			ret = -EPERM;
1275 		} else {
1276 			if (val < 0 || val > U16_MAX)
1277 				ret = -EINVAL;
1278 			else
1279 				WRITE_ONCE(sk->sk_busy_poll_budget, val);
1280 		}
1281 		break;
1282 #endif
1283 
1284 	case SO_MAX_PACING_RATE:
1285 		{
1286 		unsigned long ulval = (val == ~0U) ? ~0UL : (unsigned int)val;
1287 
1288 		if (sizeof(ulval) != sizeof(val) &&
1289 		    optlen >= sizeof(ulval) &&
1290 		    copy_from_sockptr(&ulval, optval, sizeof(ulval))) {
1291 			ret = -EFAULT;
1292 			break;
1293 		}
1294 		if (ulval != ~0UL)
1295 			cmpxchg(&sk->sk_pacing_status,
1296 				SK_PACING_NONE,
1297 				SK_PACING_NEEDED);
1298 		sk->sk_max_pacing_rate = ulval;
1299 		sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval);
1300 		break;
1301 		}
1302 	case SO_INCOMING_CPU:
1303 		WRITE_ONCE(sk->sk_incoming_cpu, val);
1304 		break;
1305 
1306 	case SO_CNX_ADVICE:
1307 		if (val == 1)
1308 			dst_negative_advice(sk);
1309 		break;
1310 
1311 	case SO_ZEROCOPY:
1312 		if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
1313 			if (!((sk->sk_type == SOCK_STREAM &&
1314 			       sk->sk_protocol == IPPROTO_TCP) ||
1315 			      (sk->sk_type == SOCK_DGRAM &&
1316 			       sk->sk_protocol == IPPROTO_UDP)))
1317 				ret = -ENOTSUPP;
1318 		} else if (sk->sk_family != PF_RDS) {
1319 			ret = -ENOTSUPP;
1320 		}
1321 		if (!ret) {
1322 			if (val < 0 || val > 1)
1323 				ret = -EINVAL;
1324 			else
1325 				sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
1326 		}
1327 		break;
1328 
1329 	case SO_TXTIME:
1330 		if (optlen != sizeof(struct sock_txtime)) {
1331 			ret = -EINVAL;
1332 			break;
1333 		} else if (copy_from_sockptr(&sk_txtime, optval,
1334 			   sizeof(struct sock_txtime))) {
1335 			ret = -EFAULT;
1336 			break;
1337 		} else if (sk_txtime.flags & ~SOF_TXTIME_FLAGS_MASK) {
1338 			ret = -EINVAL;
1339 			break;
1340 		}
1341 		/* CLOCK_MONOTONIC is only used by sch_fq, and this packet
1342 		 * scheduler has enough safe guards.
1343 		 */
1344 		if (sk_txtime.clockid != CLOCK_MONOTONIC &&
1345 		    !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1346 			ret = -EPERM;
1347 			break;
1348 		}
1349 		sock_valbool_flag(sk, SOCK_TXTIME, true);
1350 		sk->sk_clockid = sk_txtime.clockid;
1351 		sk->sk_txtime_deadline_mode =
1352 			!!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE);
1353 		sk->sk_txtime_report_errors =
1354 			!!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS);
1355 		break;
1356 
1357 	case SO_BINDTOIFINDEX:
1358 		ret = sock_bindtoindex_locked(sk, val);
1359 		break;
1360 
1361 	case SO_BUF_LOCK:
1362 		if (val & ~SOCK_BUF_LOCK_MASK) {
1363 			ret = -EINVAL;
1364 			break;
1365 		}
1366 		sk->sk_userlocks = val | (sk->sk_userlocks &
1367 					  ~SOCK_BUF_LOCK_MASK);
1368 		break;
1369 
1370 	default:
1371 		ret = -ENOPROTOOPT;
1372 		break;
1373 	}
1374 	release_sock(sk);
1375 	return ret;
1376 }
1377 EXPORT_SYMBOL(sock_setsockopt);
1378 
1379 
1380 static void cred_to_ucred(struct pid *pid, const struct cred *cred,
1381 			  struct ucred *ucred)
1382 {
1383 	ucred->pid = pid_vnr(pid);
1384 	ucred->uid = ucred->gid = -1;
1385 	if (cred) {
1386 		struct user_namespace *current_ns = current_user_ns();
1387 
1388 		ucred->uid = from_kuid_munged(current_ns, cred->euid);
1389 		ucred->gid = from_kgid_munged(current_ns, cred->egid);
1390 	}
1391 }
1392 
1393 static int groups_to_user(gid_t __user *dst, const struct group_info *src)
1394 {
1395 	struct user_namespace *user_ns = current_user_ns();
1396 	int i;
1397 
1398 	for (i = 0; i < src->ngroups; i++)
1399 		if (put_user(from_kgid_munged(user_ns, src->gid[i]), dst + i))
1400 			return -EFAULT;
1401 
1402 	return 0;
1403 }
1404 
1405 int sock_getsockopt(struct socket *sock, int level, int optname,
1406 		    char __user *optval, int __user *optlen)
1407 {
1408 	struct sock *sk = sock->sk;
1409 
1410 	union {
1411 		int val;
1412 		u64 val64;
1413 		unsigned long ulval;
1414 		struct linger ling;
1415 		struct old_timeval32 tm32;
1416 		struct __kernel_old_timeval tm;
1417 		struct  __kernel_sock_timeval stm;
1418 		struct sock_txtime txtime;
1419 		struct so_timestamping timestamping;
1420 	} v;
1421 
1422 	int lv = sizeof(int);
1423 	int len;
1424 
1425 	if (get_user(len, optlen))
1426 		return -EFAULT;
1427 	if (len < 0)
1428 		return -EINVAL;
1429 
1430 	memset(&v, 0, sizeof(v));
1431 
1432 	switch (optname) {
1433 	case SO_DEBUG:
1434 		v.val = sock_flag(sk, SOCK_DBG);
1435 		break;
1436 
1437 	case SO_DONTROUTE:
1438 		v.val = sock_flag(sk, SOCK_LOCALROUTE);
1439 		break;
1440 
1441 	case SO_BROADCAST:
1442 		v.val = sock_flag(sk, SOCK_BROADCAST);
1443 		break;
1444 
1445 	case SO_SNDBUF:
1446 		v.val = sk->sk_sndbuf;
1447 		break;
1448 
1449 	case SO_RCVBUF:
1450 		v.val = sk->sk_rcvbuf;
1451 		break;
1452 
1453 	case SO_REUSEADDR:
1454 		v.val = sk->sk_reuse;
1455 		break;
1456 
1457 	case SO_REUSEPORT:
1458 		v.val = sk->sk_reuseport;
1459 		break;
1460 
1461 	case SO_KEEPALIVE:
1462 		v.val = sock_flag(sk, SOCK_KEEPOPEN);
1463 		break;
1464 
1465 	case SO_TYPE:
1466 		v.val = sk->sk_type;
1467 		break;
1468 
1469 	case SO_PROTOCOL:
1470 		v.val = sk->sk_protocol;
1471 		break;
1472 
1473 	case SO_DOMAIN:
1474 		v.val = sk->sk_family;
1475 		break;
1476 
1477 	case SO_ERROR:
1478 		v.val = -sock_error(sk);
1479 		if (v.val == 0)
1480 			v.val = xchg(&sk->sk_err_soft, 0);
1481 		break;
1482 
1483 	case SO_OOBINLINE:
1484 		v.val = sock_flag(sk, SOCK_URGINLINE);
1485 		break;
1486 
1487 	case SO_NO_CHECK:
1488 		v.val = sk->sk_no_check_tx;
1489 		break;
1490 
1491 	case SO_PRIORITY:
1492 		v.val = sk->sk_priority;
1493 		break;
1494 
1495 	case SO_LINGER:
1496 		lv		= sizeof(v.ling);
1497 		v.ling.l_onoff	= sock_flag(sk, SOCK_LINGER);
1498 		v.ling.l_linger	= sk->sk_lingertime / HZ;
1499 		break;
1500 
1501 	case SO_BSDCOMPAT:
1502 		break;
1503 
1504 	case SO_TIMESTAMP_OLD:
1505 		v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
1506 				!sock_flag(sk, SOCK_TSTAMP_NEW) &&
1507 				!sock_flag(sk, SOCK_RCVTSTAMPNS);
1508 		break;
1509 
1510 	case SO_TIMESTAMPNS_OLD:
1511 		v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && !sock_flag(sk, SOCK_TSTAMP_NEW);
1512 		break;
1513 
1514 	case SO_TIMESTAMP_NEW:
1515 		v.val = sock_flag(sk, SOCK_RCVTSTAMP) && sock_flag(sk, SOCK_TSTAMP_NEW);
1516 		break;
1517 
1518 	case SO_TIMESTAMPNS_NEW:
1519 		v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && sock_flag(sk, SOCK_TSTAMP_NEW);
1520 		break;
1521 
1522 	case SO_TIMESTAMPING_OLD:
1523 		lv = sizeof(v.timestamping);
1524 		v.timestamping.flags = sk->sk_tsflags;
1525 		v.timestamping.bind_phc = sk->sk_bind_phc;
1526 		break;
1527 
1528 	case SO_RCVTIMEO_OLD:
1529 	case SO_RCVTIMEO_NEW:
1530 		lv = sock_get_timeout(sk->sk_rcvtimeo, &v, SO_RCVTIMEO_OLD == optname);
1531 		break;
1532 
1533 	case SO_SNDTIMEO_OLD:
1534 	case SO_SNDTIMEO_NEW:
1535 		lv = sock_get_timeout(sk->sk_sndtimeo, &v, SO_SNDTIMEO_OLD == optname);
1536 		break;
1537 
1538 	case SO_RCVLOWAT:
1539 		v.val = sk->sk_rcvlowat;
1540 		break;
1541 
1542 	case SO_SNDLOWAT:
1543 		v.val = 1;
1544 		break;
1545 
1546 	case SO_PASSCRED:
1547 		v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
1548 		break;
1549 
1550 	case SO_PEERCRED:
1551 	{
1552 		struct ucred peercred;
1553 		if (len > sizeof(peercred))
1554 			len = sizeof(peercred);
1555 		cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
1556 		if (copy_to_user(optval, &peercred, len))
1557 			return -EFAULT;
1558 		goto lenout;
1559 	}
1560 
1561 	case SO_PEERGROUPS:
1562 	{
1563 		int ret, n;
1564 
1565 		if (!sk->sk_peer_cred)
1566 			return -ENODATA;
1567 
1568 		n = sk->sk_peer_cred->group_info->ngroups;
1569 		if (len < n * sizeof(gid_t)) {
1570 			len = n * sizeof(gid_t);
1571 			return put_user(len, optlen) ? -EFAULT : -ERANGE;
1572 		}
1573 		len = n * sizeof(gid_t);
1574 
1575 		ret = groups_to_user((gid_t __user *)optval,
1576 				     sk->sk_peer_cred->group_info);
1577 		if (ret)
1578 			return ret;
1579 		goto lenout;
1580 	}
1581 
1582 	case SO_PEERNAME:
1583 	{
1584 		char address[128];
1585 
1586 		lv = sock->ops->getname(sock, (struct sockaddr *)address, 2);
1587 		if (lv < 0)
1588 			return -ENOTCONN;
1589 		if (lv < len)
1590 			return -EINVAL;
1591 		if (copy_to_user(optval, address, len))
1592 			return -EFAULT;
1593 		goto lenout;
1594 	}
1595 
1596 	/* Dubious BSD thing... Probably nobody even uses it, but
1597 	 * the UNIX standard wants it for whatever reason... -DaveM
1598 	 */
1599 	case SO_ACCEPTCONN:
1600 		v.val = sk->sk_state == TCP_LISTEN;
1601 		break;
1602 
1603 	case SO_PASSSEC:
1604 		v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
1605 		break;
1606 
1607 	case SO_PEERSEC:
1608 		return security_socket_getpeersec_stream(sock, optval, optlen, len);
1609 
1610 	case SO_MARK:
1611 		v.val = sk->sk_mark;
1612 		break;
1613 
1614 	case SO_RXQ_OVFL:
1615 		v.val = sock_flag(sk, SOCK_RXQ_OVFL);
1616 		break;
1617 
1618 	case SO_WIFI_STATUS:
1619 		v.val = sock_flag(sk, SOCK_WIFI_STATUS);
1620 		break;
1621 
1622 	case SO_PEEK_OFF:
1623 		if (!sock->ops->set_peek_off)
1624 			return -EOPNOTSUPP;
1625 
1626 		v.val = sk->sk_peek_off;
1627 		break;
1628 	case SO_NOFCS:
1629 		v.val = sock_flag(sk, SOCK_NOFCS);
1630 		break;
1631 
1632 	case SO_BINDTODEVICE:
1633 		return sock_getbindtodevice(sk, optval, optlen, len);
1634 
1635 	case SO_GET_FILTER:
1636 		len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
1637 		if (len < 0)
1638 			return len;
1639 
1640 		goto lenout;
1641 
1642 	case SO_LOCK_FILTER:
1643 		v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1644 		break;
1645 
1646 	case SO_BPF_EXTENSIONS:
1647 		v.val = bpf_tell_extensions();
1648 		break;
1649 
1650 	case SO_SELECT_ERR_QUEUE:
1651 		v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
1652 		break;
1653 
1654 #ifdef CONFIG_NET_RX_BUSY_POLL
1655 	case SO_BUSY_POLL:
1656 		v.val = sk->sk_ll_usec;
1657 		break;
1658 	case SO_PREFER_BUSY_POLL:
1659 		v.val = READ_ONCE(sk->sk_prefer_busy_poll);
1660 		break;
1661 #endif
1662 
1663 	case SO_MAX_PACING_RATE:
1664 		if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) {
1665 			lv = sizeof(v.ulval);
1666 			v.ulval = sk->sk_max_pacing_rate;
1667 		} else {
1668 			/* 32bit version */
1669 			v.val = min_t(unsigned long, sk->sk_max_pacing_rate, ~0U);
1670 		}
1671 		break;
1672 
1673 	case SO_INCOMING_CPU:
1674 		v.val = READ_ONCE(sk->sk_incoming_cpu);
1675 		break;
1676 
1677 	case SO_MEMINFO:
1678 	{
1679 		u32 meminfo[SK_MEMINFO_VARS];
1680 
1681 		sk_get_meminfo(sk, meminfo);
1682 
1683 		len = min_t(unsigned int, len, sizeof(meminfo));
1684 		if (copy_to_user(optval, &meminfo, len))
1685 			return -EFAULT;
1686 
1687 		goto lenout;
1688 	}
1689 
1690 #ifdef CONFIG_NET_RX_BUSY_POLL
1691 	case SO_INCOMING_NAPI_ID:
1692 		v.val = READ_ONCE(sk->sk_napi_id);
1693 
1694 		/* aggregate non-NAPI IDs down to 0 */
1695 		if (v.val < MIN_NAPI_ID)
1696 			v.val = 0;
1697 
1698 		break;
1699 #endif
1700 
1701 	case SO_COOKIE:
1702 		lv = sizeof(u64);
1703 		if (len < lv)
1704 			return -EINVAL;
1705 		v.val64 = sock_gen_cookie(sk);
1706 		break;
1707 
1708 	case SO_ZEROCOPY:
1709 		v.val = sock_flag(sk, SOCK_ZEROCOPY);
1710 		break;
1711 
1712 	case SO_TXTIME:
1713 		lv = sizeof(v.txtime);
1714 		v.txtime.clockid = sk->sk_clockid;
1715 		v.txtime.flags |= sk->sk_txtime_deadline_mode ?
1716 				  SOF_TXTIME_DEADLINE_MODE : 0;
1717 		v.txtime.flags |= sk->sk_txtime_report_errors ?
1718 				  SOF_TXTIME_REPORT_ERRORS : 0;
1719 		break;
1720 
1721 	case SO_BINDTOIFINDEX:
1722 		v.val = sk->sk_bound_dev_if;
1723 		break;
1724 
1725 	case SO_NETNS_COOKIE:
1726 		lv = sizeof(u64);
1727 		if (len != lv)
1728 			return -EINVAL;
1729 		v.val64 = sock_net(sk)->net_cookie;
1730 		break;
1731 
1732 	case SO_BUF_LOCK:
1733 		v.val = sk->sk_userlocks & SOCK_BUF_LOCK_MASK;
1734 		break;
1735 
1736 	default:
1737 		/* We implement the SO_SNDLOWAT etc to not be settable
1738 		 * (1003.1g 7).
1739 		 */
1740 		return -ENOPROTOOPT;
1741 	}
1742 
1743 	if (len > lv)
1744 		len = lv;
1745 	if (copy_to_user(optval, &v, len))
1746 		return -EFAULT;
1747 lenout:
1748 	if (put_user(len, optlen))
1749 		return -EFAULT;
1750 	return 0;
1751 }
1752 
1753 /*
1754  * Initialize an sk_lock.
1755  *
1756  * (We also register the sk_lock with the lock validator.)
1757  */
1758 static inline void sock_lock_init(struct sock *sk)
1759 {
1760 	if (sk->sk_kern_sock)
1761 		sock_lock_init_class_and_name(
1762 			sk,
1763 			af_family_kern_slock_key_strings[sk->sk_family],
1764 			af_family_kern_slock_keys + sk->sk_family,
1765 			af_family_kern_key_strings[sk->sk_family],
1766 			af_family_kern_keys + sk->sk_family);
1767 	else
1768 		sock_lock_init_class_and_name(
1769 			sk,
1770 			af_family_slock_key_strings[sk->sk_family],
1771 			af_family_slock_keys + sk->sk_family,
1772 			af_family_key_strings[sk->sk_family],
1773 			af_family_keys + sk->sk_family);
1774 }
1775 
1776 /*
1777  * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
1778  * even temporarly, because of RCU lookups. sk_node should also be left as is.
1779  * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
1780  */
1781 static void sock_copy(struct sock *nsk, const struct sock *osk)
1782 {
1783 	const struct proto *prot = READ_ONCE(osk->sk_prot);
1784 #ifdef CONFIG_SECURITY_NETWORK
1785 	void *sptr = nsk->sk_security;
1786 #endif
1787 
1788 	/* If we move sk_tx_queue_mapping out of the private section,
1789 	 * we must check if sk_tx_queue_clear() is called after
1790 	 * sock_copy() in sk_clone_lock().
1791 	 */
1792 	BUILD_BUG_ON(offsetof(struct sock, sk_tx_queue_mapping) <
1793 		     offsetof(struct sock, sk_dontcopy_begin) ||
1794 		     offsetof(struct sock, sk_tx_queue_mapping) >=
1795 		     offsetof(struct sock, sk_dontcopy_end));
1796 
1797 	memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
1798 
1799 	memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
1800 	       prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
1801 
1802 #ifdef CONFIG_SECURITY_NETWORK
1803 	nsk->sk_security = sptr;
1804 	security_sk_clone(osk, nsk);
1805 #endif
1806 }
1807 
1808 static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1809 		int family)
1810 {
1811 	struct sock *sk;
1812 	struct kmem_cache *slab;
1813 
1814 	slab = prot->slab;
1815 	if (slab != NULL) {
1816 		sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
1817 		if (!sk)
1818 			return sk;
1819 		if (want_init_on_alloc(priority))
1820 			sk_prot_clear_nulls(sk, prot->obj_size);
1821 	} else
1822 		sk = kmalloc(prot->obj_size, priority);
1823 
1824 	if (sk != NULL) {
1825 		if (security_sk_alloc(sk, family, priority))
1826 			goto out_free;
1827 
1828 		if (!try_module_get(prot->owner))
1829 			goto out_free_sec;
1830 	}
1831 
1832 	return sk;
1833 
1834 out_free_sec:
1835 	security_sk_free(sk);
1836 out_free:
1837 	if (slab != NULL)
1838 		kmem_cache_free(slab, sk);
1839 	else
1840 		kfree(sk);
1841 	return NULL;
1842 }
1843 
1844 static void sk_prot_free(struct proto *prot, struct sock *sk)
1845 {
1846 	struct kmem_cache *slab;
1847 	struct module *owner;
1848 
1849 	owner = prot->owner;
1850 	slab = prot->slab;
1851 
1852 	cgroup_sk_free(&sk->sk_cgrp_data);
1853 	mem_cgroup_sk_free(sk);
1854 	security_sk_free(sk);
1855 	if (slab != NULL)
1856 		kmem_cache_free(slab, sk);
1857 	else
1858 		kfree(sk);
1859 	module_put(owner);
1860 }
1861 
1862 /**
1863  *	sk_alloc - All socket objects are allocated here
1864  *	@net: the applicable net namespace
1865  *	@family: protocol family
1866  *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1867  *	@prot: struct proto associated with this new sock instance
1868  *	@kern: is this to be a kernel socket?
1869  */
1870 struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1871 		      struct proto *prot, int kern)
1872 {
1873 	struct sock *sk;
1874 
1875 	sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
1876 	if (sk) {
1877 		sk->sk_family = family;
1878 		/*
1879 		 * See comment in struct sock definition to understand
1880 		 * why we need sk_prot_creator -acme
1881 		 */
1882 		sk->sk_prot = sk->sk_prot_creator = prot;
1883 		sk->sk_kern_sock = kern;
1884 		sock_lock_init(sk);
1885 		sk->sk_net_refcnt = kern ? 0 : 1;
1886 		if (likely(sk->sk_net_refcnt)) {
1887 			get_net(net);
1888 			sock_inuse_add(net, 1);
1889 		}
1890 
1891 		sock_net_set(sk, net);
1892 		refcount_set(&sk->sk_wmem_alloc, 1);
1893 
1894 		mem_cgroup_sk_alloc(sk);
1895 		cgroup_sk_alloc(&sk->sk_cgrp_data);
1896 		sock_update_classid(&sk->sk_cgrp_data);
1897 		sock_update_netprioidx(&sk->sk_cgrp_data);
1898 		sk_tx_queue_clear(sk);
1899 	}
1900 
1901 	return sk;
1902 }
1903 EXPORT_SYMBOL(sk_alloc);
1904 
1905 /* Sockets having SOCK_RCU_FREE will call this function after one RCU
1906  * grace period. This is the case for UDP sockets and TCP listeners.
1907  */
1908 static void __sk_destruct(struct rcu_head *head)
1909 {
1910 	struct sock *sk = container_of(head, struct sock, sk_rcu);
1911 	struct sk_filter *filter;
1912 
1913 	if (sk->sk_destruct)
1914 		sk->sk_destruct(sk);
1915 
1916 	filter = rcu_dereference_check(sk->sk_filter,
1917 				       refcount_read(&sk->sk_wmem_alloc) == 0);
1918 	if (filter) {
1919 		sk_filter_uncharge(sk, filter);
1920 		RCU_INIT_POINTER(sk->sk_filter, NULL);
1921 	}
1922 
1923 	sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
1924 
1925 #ifdef CONFIG_BPF_SYSCALL
1926 	bpf_sk_storage_free(sk);
1927 #endif
1928 
1929 	if (atomic_read(&sk->sk_omem_alloc))
1930 		pr_debug("%s: optmem leakage (%d bytes) detected\n",
1931 			 __func__, atomic_read(&sk->sk_omem_alloc));
1932 
1933 	if (sk->sk_frag.page) {
1934 		put_page(sk->sk_frag.page);
1935 		sk->sk_frag.page = NULL;
1936 	}
1937 
1938 	if (sk->sk_peer_cred)
1939 		put_cred(sk->sk_peer_cred);
1940 	put_pid(sk->sk_peer_pid);
1941 	if (likely(sk->sk_net_refcnt))
1942 		put_net(sock_net(sk));
1943 	sk_prot_free(sk->sk_prot_creator, sk);
1944 }
1945 
1946 void sk_destruct(struct sock *sk)
1947 {
1948 	bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE);
1949 
1950 	if (rcu_access_pointer(sk->sk_reuseport_cb)) {
1951 		reuseport_detach_sock(sk);
1952 		use_call_rcu = true;
1953 	}
1954 
1955 	if (use_call_rcu)
1956 		call_rcu(&sk->sk_rcu, __sk_destruct);
1957 	else
1958 		__sk_destruct(&sk->sk_rcu);
1959 }
1960 
1961 static void __sk_free(struct sock *sk)
1962 {
1963 	if (likely(sk->sk_net_refcnt))
1964 		sock_inuse_add(sock_net(sk), -1);
1965 
1966 	if (unlikely(sk->sk_net_refcnt && sock_diag_has_destroy_listeners(sk)))
1967 		sock_diag_broadcast_destroy(sk);
1968 	else
1969 		sk_destruct(sk);
1970 }
1971 
1972 void sk_free(struct sock *sk)
1973 {
1974 	/*
1975 	 * We subtract one from sk_wmem_alloc and can know if
1976 	 * some packets are still in some tx queue.
1977 	 * If not null, sock_wfree() will call __sk_free(sk) later
1978 	 */
1979 	if (refcount_dec_and_test(&sk->sk_wmem_alloc))
1980 		__sk_free(sk);
1981 }
1982 EXPORT_SYMBOL(sk_free);
1983 
1984 static void sk_init_common(struct sock *sk)
1985 {
1986 	skb_queue_head_init(&sk->sk_receive_queue);
1987 	skb_queue_head_init(&sk->sk_write_queue);
1988 	skb_queue_head_init(&sk->sk_error_queue);
1989 
1990 	rwlock_init(&sk->sk_callback_lock);
1991 	lockdep_set_class_and_name(&sk->sk_receive_queue.lock,
1992 			af_rlock_keys + sk->sk_family,
1993 			af_family_rlock_key_strings[sk->sk_family]);
1994 	lockdep_set_class_and_name(&sk->sk_write_queue.lock,
1995 			af_wlock_keys + sk->sk_family,
1996 			af_family_wlock_key_strings[sk->sk_family]);
1997 	lockdep_set_class_and_name(&sk->sk_error_queue.lock,
1998 			af_elock_keys + sk->sk_family,
1999 			af_family_elock_key_strings[sk->sk_family]);
2000 	lockdep_set_class_and_name(&sk->sk_callback_lock,
2001 			af_callback_keys + sk->sk_family,
2002 			af_family_clock_key_strings[sk->sk_family]);
2003 }
2004 
2005 /**
2006  *	sk_clone_lock - clone a socket, and lock its clone
2007  *	@sk: the socket to clone
2008  *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
2009  *
2010  *	Caller must unlock socket even in error path (bh_unlock_sock(newsk))
2011  */
2012 struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
2013 {
2014 	struct proto *prot = READ_ONCE(sk->sk_prot);
2015 	struct sk_filter *filter;
2016 	bool is_charged = true;
2017 	struct sock *newsk;
2018 
2019 	newsk = sk_prot_alloc(prot, priority, sk->sk_family);
2020 	if (!newsk)
2021 		goto out;
2022 
2023 	sock_copy(newsk, sk);
2024 
2025 	newsk->sk_prot_creator = prot;
2026 
2027 	/* SANITY */
2028 	if (likely(newsk->sk_net_refcnt))
2029 		get_net(sock_net(newsk));
2030 	sk_node_init(&newsk->sk_node);
2031 	sock_lock_init(newsk);
2032 	bh_lock_sock(newsk);
2033 	newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
2034 	newsk->sk_backlog.len = 0;
2035 
2036 	atomic_set(&newsk->sk_rmem_alloc, 0);
2037 
2038 	/* sk_wmem_alloc set to one (see sk_free() and sock_wfree()) */
2039 	refcount_set(&newsk->sk_wmem_alloc, 1);
2040 
2041 	atomic_set(&newsk->sk_omem_alloc, 0);
2042 	sk_init_common(newsk);
2043 
2044 	newsk->sk_dst_cache	= NULL;
2045 	newsk->sk_dst_pending_confirm = 0;
2046 	newsk->sk_wmem_queued	= 0;
2047 	newsk->sk_forward_alloc = 0;
2048 	atomic_set(&newsk->sk_drops, 0);
2049 	newsk->sk_send_head	= NULL;
2050 	newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
2051 	atomic_set(&newsk->sk_zckey, 0);
2052 
2053 	sock_reset_flag(newsk, SOCK_DONE);
2054 
2055 	/* sk->sk_memcg will be populated at accept() time */
2056 	newsk->sk_memcg = NULL;
2057 
2058 	cgroup_sk_clone(&newsk->sk_cgrp_data);
2059 
2060 	rcu_read_lock();
2061 	filter = rcu_dereference(sk->sk_filter);
2062 	if (filter != NULL)
2063 		/* though it's an empty new sock, the charging may fail
2064 		 * if sysctl_optmem_max was changed between creation of
2065 		 * original socket and cloning
2066 		 */
2067 		is_charged = sk_filter_charge(newsk, filter);
2068 	RCU_INIT_POINTER(newsk->sk_filter, filter);
2069 	rcu_read_unlock();
2070 
2071 	if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
2072 		/* We need to make sure that we don't uncharge the new
2073 		 * socket if we couldn't charge it in the first place
2074 		 * as otherwise we uncharge the parent's filter.
2075 		 */
2076 		if (!is_charged)
2077 			RCU_INIT_POINTER(newsk->sk_filter, NULL);
2078 		sk_free_unlock_clone(newsk);
2079 		newsk = NULL;
2080 		goto out;
2081 	}
2082 	RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
2083 
2084 	if (bpf_sk_storage_clone(sk, newsk)) {
2085 		sk_free_unlock_clone(newsk);
2086 		newsk = NULL;
2087 		goto out;
2088 	}
2089 
2090 	/* Clear sk_user_data if parent had the pointer tagged
2091 	 * as not suitable for copying when cloning.
2092 	 */
2093 	if (sk_user_data_is_nocopy(newsk))
2094 		newsk->sk_user_data = NULL;
2095 
2096 	newsk->sk_err	   = 0;
2097 	newsk->sk_err_soft = 0;
2098 	newsk->sk_priority = 0;
2099 	newsk->sk_incoming_cpu = raw_smp_processor_id();
2100 	if (likely(newsk->sk_net_refcnt))
2101 		sock_inuse_add(sock_net(newsk), 1);
2102 
2103 	/* Before updating sk_refcnt, we must commit prior changes to memory
2104 	 * (Documentation/RCU/rculist_nulls.rst for details)
2105 	 */
2106 	smp_wmb();
2107 	refcount_set(&newsk->sk_refcnt, 2);
2108 
2109 	/* Increment the counter in the same struct proto as the master
2110 	 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
2111 	 * is the same as sk->sk_prot->socks, as this field was copied
2112 	 * with memcpy).
2113 	 *
2114 	 * This _changes_ the previous behaviour, where
2115 	 * tcp_create_openreq_child always was incrementing the
2116 	 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
2117 	 * to be taken into account in all callers. -acme
2118 	 */
2119 	sk_refcnt_debug_inc(newsk);
2120 	sk_set_socket(newsk, NULL);
2121 	sk_tx_queue_clear(newsk);
2122 	RCU_INIT_POINTER(newsk->sk_wq, NULL);
2123 
2124 	if (newsk->sk_prot->sockets_allocated)
2125 		sk_sockets_allocated_inc(newsk);
2126 
2127 	if (sock_needs_netstamp(sk) && newsk->sk_flags & SK_FLAGS_TIMESTAMP)
2128 		net_enable_timestamp();
2129 out:
2130 	return newsk;
2131 }
2132 EXPORT_SYMBOL_GPL(sk_clone_lock);
2133 
2134 void sk_free_unlock_clone(struct sock *sk)
2135 {
2136 	/* It is still raw copy of parent, so invalidate
2137 	 * destructor and make plain sk_free() */
2138 	sk->sk_destruct = NULL;
2139 	bh_unlock_sock(sk);
2140 	sk_free(sk);
2141 }
2142 EXPORT_SYMBOL_GPL(sk_free_unlock_clone);
2143 
2144 void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
2145 {
2146 	u32 max_segs = 1;
2147 
2148 	sk_dst_set(sk, dst);
2149 	sk->sk_route_caps = dst->dev->features | sk->sk_route_forced_caps;
2150 	if (sk->sk_route_caps & NETIF_F_GSO)
2151 		sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
2152 	sk->sk_route_caps &= ~sk->sk_route_nocaps;
2153 	if (sk_can_gso(sk)) {
2154 		if (dst->header_len && !xfrm_dst_offload_ok(dst)) {
2155 			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
2156 		} else {
2157 			sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
2158 			sk->sk_gso_max_size = dst->dev->gso_max_size;
2159 			max_segs = max_t(u32, dst->dev->gso_max_segs, 1);
2160 		}
2161 	}
2162 	sk->sk_gso_max_segs = max_segs;
2163 }
2164 EXPORT_SYMBOL_GPL(sk_setup_caps);
2165 
2166 /*
2167  *	Simple resource managers for sockets.
2168  */
2169 
2170 
2171 /*
2172  * Write buffer destructor automatically called from kfree_skb.
2173  */
2174 void sock_wfree(struct sk_buff *skb)
2175 {
2176 	struct sock *sk = skb->sk;
2177 	unsigned int len = skb->truesize;
2178 
2179 	if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
2180 		/*
2181 		 * Keep a reference on sk_wmem_alloc, this will be released
2182 		 * after sk_write_space() call
2183 		 */
2184 		WARN_ON(refcount_sub_and_test(len - 1, &sk->sk_wmem_alloc));
2185 		sk->sk_write_space(sk);
2186 		len = 1;
2187 	}
2188 	/*
2189 	 * if sk_wmem_alloc reaches 0, we must finish what sk_free()
2190 	 * could not do because of in-flight packets
2191 	 */
2192 	if (refcount_sub_and_test(len, &sk->sk_wmem_alloc))
2193 		__sk_free(sk);
2194 }
2195 EXPORT_SYMBOL(sock_wfree);
2196 
2197 /* This variant of sock_wfree() is used by TCP,
2198  * since it sets SOCK_USE_WRITE_QUEUE.
2199  */
2200 void __sock_wfree(struct sk_buff *skb)
2201 {
2202 	struct sock *sk = skb->sk;
2203 
2204 	if (refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc))
2205 		__sk_free(sk);
2206 }
2207 
2208 void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
2209 {
2210 	skb_orphan(skb);
2211 	skb->sk = sk;
2212 #ifdef CONFIG_INET
2213 	if (unlikely(!sk_fullsock(sk))) {
2214 		skb->destructor = sock_edemux;
2215 		sock_hold(sk);
2216 		return;
2217 	}
2218 #endif
2219 	skb->destructor = sock_wfree;
2220 	skb_set_hash_from_sk(skb, sk);
2221 	/*
2222 	 * We used to take a refcount on sk, but following operation
2223 	 * is enough to guarantee sk_free() wont free this sock until
2224 	 * all in-flight packets are completed
2225 	 */
2226 	refcount_add(skb->truesize, &sk->sk_wmem_alloc);
2227 }
2228 EXPORT_SYMBOL(skb_set_owner_w);
2229 
2230 static bool can_skb_orphan_partial(const struct sk_buff *skb)
2231 {
2232 #ifdef CONFIG_TLS_DEVICE
2233 	/* Drivers depend on in-order delivery for crypto offload,
2234 	 * partial orphan breaks out-of-order-OK logic.
2235 	 */
2236 	if (skb->decrypted)
2237 		return false;
2238 #endif
2239 	return (skb->destructor == sock_wfree ||
2240 		(IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree));
2241 }
2242 
2243 /* This helper is used by netem, as it can hold packets in its
2244  * delay queue. We want to allow the owner socket to send more
2245  * packets, as if they were already TX completed by a typical driver.
2246  * But we also want to keep skb->sk set because some packet schedulers
2247  * rely on it (sch_fq for example).
2248  */
2249 void skb_orphan_partial(struct sk_buff *skb)
2250 {
2251 	if (skb_is_tcp_pure_ack(skb))
2252 		return;
2253 
2254 	if (can_skb_orphan_partial(skb) && skb_set_owner_sk_safe(skb, skb->sk))
2255 		return;
2256 
2257 	skb_orphan(skb);
2258 }
2259 EXPORT_SYMBOL(skb_orphan_partial);
2260 
2261 /*
2262  * Read buffer destructor automatically called from kfree_skb.
2263  */
2264 void sock_rfree(struct sk_buff *skb)
2265 {
2266 	struct sock *sk = skb->sk;
2267 	unsigned int len = skb->truesize;
2268 
2269 	atomic_sub(len, &sk->sk_rmem_alloc);
2270 	sk_mem_uncharge(sk, len);
2271 }
2272 EXPORT_SYMBOL(sock_rfree);
2273 
2274 /*
2275  * Buffer destructor for skbs that are not used directly in read or write
2276  * path, e.g. for error handler skbs. Automatically called from kfree_skb.
2277  */
2278 void sock_efree(struct sk_buff *skb)
2279 {
2280 	sock_put(skb->sk);
2281 }
2282 EXPORT_SYMBOL(sock_efree);
2283 
2284 /* Buffer destructor for prefetch/receive path where reference count may
2285  * not be held, e.g. for listen sockets.
2286  */
2287 #ifdef CONFIG_INET
2288 void sock_pfree(struct sk_buff *skb)
2289 {
2290 	if (sk_is_refcounted(skb->sk))
2291 		sock_gen_put(skb->sk);
2292 }
2293 EXPORT_SYMBOL(sock_pfree);
2294 #endif /* CONFIG_INET */
2295 
2296 kuid_t sock_i_uid(struct sock *sk)
2297 {
2298 	kuid_t uid;
2299 
2300 	read_lock_bh(&sk->sk_callback_lock);
2301 	uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
2302 	read_unlock_bh(&sk->sk_callback_lock);
2303 	return uid;
2304 }
2305 EXPORT_SYMBOL(sock_i_uid);
2306 
2307 unsigned long sock_i_ino(struct sock *sk)
2308 {
2309 	unsigned long ino;
2310 
2311 	read_lock_bh(&sk->sk_callback_lock);
2312 	ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
2313 	read_unlock_bh(&sk->sk_callback_lock);
2314 	return ino;
2315 }
2316 EXPORT_SYMBOL(sock_i_ino);
2317 
2318 /*
2319  * Allocate a skb from the socket's send buffer.
2320  */
2321 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
2322 			     gfp_t priority)
2323 {
2324 	if (force ||
2325 	    refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) {
2326 		struct sk_buff *skb = alloc_skb(size, priority);
2327 
2328 		if (skb) {
2329 			skb_set_owner_w(skb, sk);
2330 			return skb;
2331 		}
2332 	}
2333 	return NULL;
2334 }
2335 EXPORT_SYMBOL(sock_wmalloc);
2336 
2337 static void sock_ofree(struct sk_buff *skb)
2338 {
2339 	struct sock *sk = skb->sk;
2340 
2341 	atomic_sub(skb->truesize, &sk->sk_omem_alloc);
2342 }
2343 
2344 struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
2345 			     gfp_t priority)
2346 {
2347 	struct sk_buff *skb;
2348 
2349 	/* small safe race: SKB_TRUESIZE may differ from final skb->truesize */
2350 	if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
2351 	    sysctl_optmem_max)
2352 		return NULL;
2353 
2354 	skb = alloc_skb(size, priority);
2355 	if (!skb)
2356 		return NULL;
2357 
2358 	atomic_add(skb->truesize, &sk->sk_omem_alloc);
2359 	skb->sk = sk;
2360 	skb->destructor = sock_ofree;
2361 	return skb;
2362 }
2363 
2364 /*
2365  * Allocate a memory block from the socket's option memory buffer.
2366  */
2367 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
2368 {
2369 	if ((unsigned int)size <= sysctl_optmem_max &&
2370 	    atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
2371 		void *mem;
2372 		/* First do the add, to avoid the race if kmalloc
2373 		 * might sleep.
2374 		 */
2375 		atomic_add(size, &sk->sk_omem_alloc);
2376 		mem = kmalloc(size, priority);
2377 		if (mem)
2378 			return mem;
2379 		atomic_sub(size, &sk->sk_omem_alloc);
2380 	}
2381 	return NULL;
2382 }
2383 EXPORT_SYMBOL(sock_kmalloc);
2384 
2385 /* Free an option memory block. Note, we actually want the inline
2386  * here as this allows gcc to detect the nullify and fold away the
2387  * condition entirely.
2388  */
2389 static inline void __sock_kfree_s(struct sock *sk, void *mem, int size,
2390 				  const bool nullify)
2391 {
2392 	if (WARN_ON_ONCE(!mem))
2393 		return;
2394 	if (nullify)
2395 		kfree_sensitive(mem);
2396 	else
2397 		kfree(mem);
2398 	atomic_sub(size, &sk->sk_omem_alloc);
2399 }
2400 
2401 void sock_kfree_s(struct sock *sk, void *mem, int size)
2402 {
2403 	__sock_kfree_s(sk, mem, size, false);
2404 }
2405 EXPORT_SYMBOL(sock_kfree_s);
2406 
2407 void sock_kzfree_s(struct sock *sk, void *mem, int size)
2408 {
2409 	__sock_kfree_s(sk, mem, size, true);
2410 }
2411 EXPORT_SYMBOL(sock_kzfree_s);
2412 
2413 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
2414    I think, these locks should be removed for datagram sockets.
2415  */
2416 static long sock_wait_for_wmem(struct sock *sk, long timeo)
2417 {
2418 	DEFINE_WAIT(wait);
2419 
2420 	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2421 	for (;;) {
2422 		if (!timeo)
2423 			break;
2424 		if (signal_pending(current))
2425 			break;
2426 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
2427 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2428 		if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf))
2429 			break;
2430 		if (sk->sk_shutdown & SEND_SHUTDOWN)
2431 			break;
2432 		if (sk->sk_err)
2433 			break;
2434 		timeo = schedule_timeout(timeo);
2435 	}
2436 	finish_wait(sk_sleep(sk), &wait);
2437 	return timeo;
2438 }
2439 
2440 
2441 /*
2442  *	Generic send/receive buffer handlers
2443  */
2444 
2445 struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
2446 				     unsigned long data_len, int noblock,
2447 				     int *errcode, int max_page_order)
2448 {
2449 	struct sk_buff *skb;
2450 	long timeo;
2451 	int err;
2452 
2453 	timeo = sock_sndtimeo(sk, noblock);
2454 	for (;;) {
2455 		err = sock_error(sk);
2456 		if (err != 0)
2457 			goto failure;
2458 
2459 		err = -EPIPE;
2460 		if (sk->sk_shutdown & SEND_SHUTDOWN)
2461 			goto failure;
2462 
2463 		if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf))
2464 			break;
2465 
2466 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2467 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
2468 		err = -EAGAIN;
2469 		if (!timeo)
2470 			goto failure;
2471 		if (signal_pending(current))
2472 			goto interrupted;
2473 		timeo = sock_wait_for_wmem(sk, timeo);
2474 	}
2475 	skb = alloc_skb_with_frags(header_len, data_len, max_page_order,
2476 				   errcode, sk->sk_allocation);
2477 	if (skb)
2478 		skb_set_owner_w(skb, sk);
2479 	return skb;
2480 
2481 interrupted:
2482 	err = sock_intr_errno(timeo);
2483 failure:
2484 	*errcode = err;
2485 	return NULL;
2486 }
2487 EXPORT_SYMBOL(sock_alloc_send_pskb);
2488 
2489 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
2490 				    int noblock, int *errcode)
2491 {
2492 	return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0);
2493 }
2494 EXPORT_SYMBOL(sock_alloc_send_skb);
2495 
2496 int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
2497 		     struct sockcm_cookie *sockc)
2498 {
2499 	u32 tsflags;
2500 
2501 	switch (cmsg->cmsg_type) {
2502 	case SO_MARK:
2503 		if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2504 			return -EPERM;
2505 		if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
2506 			return -EINVAL;
2507 		sockc->mark = *(u32 *)CMSG_DATA(cmsg);
2508 		break;
2509 	case SO_TIMESTAMPING_OLD:
2510 		if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
2511 			return -EINVAL;
2512 
2513 		tsflags = *(u32 *)CMSG_DATA(cmsg);
2514 		if (tsflags & ~SOF_TIMESTAMPING_TX_RECORD_MASK)
2515 			return -EINVAL;
2516 
2517 		sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK;
2518 		sockc->tsflags |= tsflags;
2519 		break;
2520 	case SCM_TXTIME:
2521 		if (!sock_flag(sk, SOCK_TXTIME))
2522 			return -EINVAL;
2523 		if (cmsg->cmsg_len != CMSG_LEN(sizeof(u64)))
2524 			return -EINVAL;
2525 		sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg));
2526 		break;
2527 	/* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */
2528 	case SCM_RIGHTS:
2529 	case SCM_CREDENTIALS:
2530 		break;
2531 	default:
2532 		return -EINVAL;
2533 	}
2534 	return 0;
2535 }
2536 EXPORT_SYMBOL(__sock_cmsg_send);
2537 
2538 int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
2539 		   struct sockcm_cookie *sockc)
2540 {
2541 	struct cmsghdr *cmsg;
2542 	int ret;
2543 
2544 	for_each_cmsghdr(cmsg, msg) {
2545 		if (!CMSG_OK(msg, cmsg))
2546 			return -EINVAL;
2547 		if (cmsg->cmsg_level != SOL_SOCKET)
2548 			continue;
2549 		ret = __sock_cmsg_send(sk, msg, cmsg, sockc);
2550 		if (ret)
2551 			return ret;
2552 	}
2553 	return 0;
2554 }
2555 EXPORT_SYMBOL(sock_cmsg_send);
2556 
2557 static void sk_enter_memory_pressure(struct sock *sk)
2558 {
2559 	if (!sk->sk_prot->enter_memory_pressure)
2560 		return;
2561 
2562 	sk->sk_prot->enter_memory_pressure(sk);
2563 }
2564 
2565 static void sk_leave_memory_pressure(struct sock *sk)
2566 {
2567 	if (sk->sk_prot->leave_memory_pressure) {
2568 		sk->sk_prot->leave_memory_pressure(sk);
2569 	} else {
2570 		unsigned long *memory_pressure = sk->sk_prot->memory_pressure;
2571 
2572 		if (memory_pressure && READ_ONCE(*memory_pressure))
2573 			WRITE_ONCE(*memory_pressure, 0);
2574 	}
2575 }
2576 
2577 #define SKB_FRAG_PAGE_ORDER	get_order(32768)
2578 DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
2579 
2580 /**
2581  * skb_page_frag_refill - check that a page_frag contains enough room
2582  * @sz: minimum size of the fragment we want to get
2583  * @pfrag: pointer to page_frag
2584  * @gfp: priority for memory allocation
2585  *
2586  * Note: While this allocator tries to use high order pages, there is
2587  * no guarantee that allocations succeed. Therefore, @sz MUST be
2588  * less or equal than PAGE_SIZE.
2589  */
2590 bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
2591 {
2592 	if (pfrag->page) {
2593 		if (page_ref_count(pfrag->page) == 1) {
2594 			pfrag->offset = 0;
2595 			return true;
2596 		}
2597 		if (pfrag->offset + sz <= pfrag->size)
2598 			return true;
2599 		put_page(pfrag->page);
2600 	}
2601 
2602 	pfrag->offset = 0;
2603 	if (SKB_FRAG_PAGE_ORDER &&
2604 	    !static_branch_unlikely(&net_high_order_alloc_disable_key)) {
2605 		/* Avoid direct reclaim but allow kswapd to wake */
2606 		pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
2607 					  __GFP_COMP | __GFP_NOWARN |
2608 					  __GFP_NORETRY,
2609 					  SKB_FRAG_PAGE_ORDER);
2610 		if (likely(pfrag->page)) {
2611 			pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
2612 			return true;
2613 		}
2614 	}
2615 	pfrag->page = alloc_page(gfp);
2616 	if (likely(pfrag->page)) {
2617 		pfrag->size = PAGE_SIZE;
2618 		return true;
2619 	}
2620 	return false;
2621 }
2622 EXPORT_SYMBOL(skb_page_frag_refill);
2623 
2624 bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
2625 {
2626 	if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation)))
2627 		return true;
2628 
2629 	sk_enter_memory_pressure(sk);
2630 	sk_stream_moderate_sndbuf(sk);
2631 	return false;
2632 }
2633 EXPORT_SYMBOL(sk_page_frag_refill);
2634 
2635 void __lock_sock(struct sock *sk)
2636 	__releases(&sk->sk_lock.slock)
2637 	__acquires(&sk->sk_lock.slock)
2638 {
2639 	DEFINE_WAIT(wait);
2640 
2641 	for (;;) {
2642 		prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
2643 					TASK_UNINTERRUPTIBLE);
2644 		spin_unlock_bh(&sk->sk_lock.slock);
2645 		schedule();
2646 		spin_lock_bh(&sk->sk_lock.slock);
2647 		if (!sock_owned_by_user(sk))
2648 			break;
2649 	}
2650 	finish_wait(&sk->sk_lock.wq, &wait);
2651 }
2652 
2653 void __release_sock(struct sock *sk)
2654 	__releases(&sk->sk_lock.slock)
2655 	__acquires(&sk->sk_lock.slock)
2656 {
2657 	struct sk_buff *skb, *next;
2658 
2659 	while ((skb = sk->sk_backlog.head) != NULL) {
2660 		sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
2661 
2662 		spin_unlock_bh(&sk->sk_lock.slock);
2663 
2664 		do {
2665 			next = skb->next;
2666 			prefetch(next);
2667 			WARN_ON_ONCE(skb_dst_is_noref(skb));
2668 			skb_mark_not_on_list(skb);
2669 			sk_backlog_rcv(sk, skb);
2670 
2671 			cond_resched();
2672 
2673 			skb = next;
2674 		} while (skb != NULL);
2675 
2676 		spin_lock_bh(&sk->sk_lock.slock);
2677 	}
2678 
2679 	/*
2680 	 * Doing the zeroing here guarantee we can not loop forever
2681 	 * while a wild producer attempts to flood us.
2682 	 */
2683 	sk->sk_backlog.len = 0;
2684 }
2685 
2686 void __sk_flush_backlog(struct sock *sk)
2687 {
2688 	spin_lock_bh(&sk->sk_lock.slock);
2689 	__release_sock(sk);
2690 	spin_unlock_bh(&sk->sk_lock.slock);
2691 }
2692 
2693 /**
2694  * sk_wait_data - wait for data to arrive at sk_receive_queue
2695  * @sk:    sock to wait on
2696  * @timeo: for how long
2697  * @skb:   last skb seen on sk_receive_queue
2698  *
2699  * Now socket state including sk->sk_err is changed only under lock,
2700  * hence we may omit checks after joining wait queue.
2701  * We check receive queue before schedule() only as optimization;
2702  * it is very likely that release_sock() added new data.
2703  */
2704 int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
2705 {
2706 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
2707 	int rc;
2708 
2709 	add_wait_queue(sk_sleep(sk), &wait);
2710 	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2711 	rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb, &wait);
2712 	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2713 	remove_wait_queue(sk_sleep(sk), &wait);
2714 	return rc;
2715 }
2716 EXPORT_SYMBOL(sk_wait_data);
2717 
2718 /**
2719  *	__sk_mem_raise_allocated - increase memory_allocated
2720  *	@sk: socket
2721  *	@size: memory size to allocate
2722  *	@amt: pages to allocate
2723  *	@kind: allocation type
2724  *
2725  *	Similar to __sk_mem_schedule(), but does not update sk_forward_alloc
2726  */
2727 int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
2728 {
2729 	struct proto *prot = sk->sk_prot;
2730 	long allocated = sk_memory_allocated_add(sk, amt);
2731 	bool charged = true;
2732 
2733 	if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
2734 	    !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt)))
2735 		goto suppress_allocation;
2736 
2737 	/* Under limit. */
2738 	if (allocated <= sk_prot_mem_limits(sk, 0)) {
2739 		sk_leave_memory_pressure(sk);
2740 		return 1;
2741 	}
2742 
2743 	/* Under pressure. */
2744 	if (allocated > sk_prot_mem_limits(sk, 1))
2745 		sk_enter_memory_pressure(sk);
2746 
2747 	/* Over hard limit. */
2748 	if (allocated > sk_prot_mem_limits(sk, 2))
2749 		goto suppress_allocation;
2750 
2751 	/* guarantee minimum buffer size under pressure */
2752 	if (kind == SK_MEM_RECV) {
2753 		if (atomic_read(&sk->sk_rmem_alloc) < sk_get_rmem0(sk, prot))
2754 			return 1;
2755 
2756 	} else { /* SK_MEM_SEND */
2757 		int wmem0 = sk_get_wmem0(sk, prot);
2758 
2759 		if (sk->sk_type == SOCK_STREAM) {
2760 			if (sk->sk_wmem_queued < wmem0)
2761 				return 1;
2762 		} else if (refcount_read(&sk->sk_wmem_alloc) < wmem0) {
2763 				return 1;
2764 		}
2765 	}
2766 
2767 	if (sk_has_memory_pressure(sk)) {
2768 		u64 alloc;
2769 
2770 		if (!sk_under_memory_pressure(sk))
2771 			return 1;
2772 		alloc = sk_sockets_allocated_read_positive(sk);
2773 		if (sk_prot_mem_limits(sk, 2) > alloc *
2774 		    sk_mem_pages(sk->sk_wmem_queued +
2775 				 atomic_read(&sk->sk_rmem_alloc) +
2776 				 sk->sk_forward_alloc))
2777 			return 1;
2778 	}
2779 
2780 suppress_allocation:
2781 
2782 	if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
2783 		sk_stream_moderate_sndbuf(sk);
2784 
2785 		/* Fail only if socket is _under_ its sndbuf.
2786 		 * In this case we cannot block, so that we have to fail.
2787 		 */
2788 		if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
2789 			return 1;
2790 	}
2791 
2792 	if (kind == SK_MEM_SEND || (kind == SK_MEM_RECV && charged))
2793 		trace_sock_exceed_buf_limit(sk, prot, allocated, kind);
2794 
2795 	sk_memory_allocated_sub(sk, amt);
2796 
2797 	if (mem_cgroup_sockets_enabled && sk->sk_memcg)
2798 		mem_cgroup_uncharge_skmem(sk->sk_memcg, amt);
2799 
2800 	return 0;
2801 }
2802 EXPORT_SYMBOL(__sk_mem_raise_allocated);
2803 
2804 /**
2805  *	__sk_mem_schedule - increase sk_forward_alloc and memory_allocated
2806  *	@sk: socket
2807  *	@size: memory size to allocate
2808  *	@kind: allocation type
2809  *
2810  *	If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
2811  *	rmem allocation. This function assumes that protocols which have
2812  *	memory_pressure use sk_wmem_queued as write buffer accounting.
2813  */
2814 int __sk_mem_schedule(struct sock *sk, int size, int kind)
2815 {
2816 	int ret, amt = sk_mem_pages(size);
2817 
2818 	sk->sk_forward_alloc += amt << SK_MEM_QUANTUM_SHIFT;
2819 	ret = __sk_mem_raise_allocated(sk, size, amt, kind);
2820 	if (!ret)
2821 		sk->sk_forward_alloc -= amt << SK_MEM_QUANTUM_SHIFT;
2822 	return ret;
2823 }
2824 EXPORT_SYMBOL(__sk_mem_schedule);
2825 
2826 /**
2827  *	__sk_mem_reduce_allocated - reclaim memory_allocated
2828  *	@sk: socket
2829  *	@amount: number of quanta
2830  *
2831  *	Similar to __sk_mem_reclaim(), but does not update sk_forward_alloc
2832  */
2833 void __sk_mem_reduce_allocated(struct sock *sk, int amount)
2834 {
2835 	sk_memory_allocated_sub(sk, amount);
2836 
2837 	if (mem_cgroup_sockets_enabled && sk->sk_memcg)
2838 		mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
2839 
2840 	if (sk_under_memory_pressure(sk) &&
2841 	    (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
2842 		sk_leave_memory_pressure(sk);
2843 }
2844 EXPORT_SYMBOL(__sk_mem_reduce_allocated);
2845 
2846 /**
2847  *	__sk_mem_reclaim - reclaim sk_forward_alloc and memory_allocated
2848  *	@sk: socket
2849  *	@amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple)
2850  */
2851 void __sk_mem_reclaim(struct sock *sk, int amount)
2852 {
2853 	amount >>= SK_MEM_QUANTUM_SHIFT;
2854 	sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
2855 	__sk_mem_reduce_allocated(sk, amount);
2856 }
2857 EXPORT_SYMBOL(__sk_mem_reclaim);
2858 
2859 int sk_set_peek_off(struct sock *sk, int val)
2860 {
2861 	sk->sk_peek_off = val;
2862 	return 0;
2863 }
2864 EXPORT_SYMBOL_GPL(sk_set_peek_off);
2865 
2866 /*
2867  * Set of default routines for initialising struct proto_ops when
2868  * the protocol does not support a particular function. In certain
2869  * cases where it makes no sense for a protocol to have a "do nothing"
2870  * function, some default processing is provided.
2871  */
2872 
2873 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
2874 {
2875 	return -EOPNOTSUPP;
2876 }
2877 EXPORT_SYMBOL(sock_no_bind);
2878 
2879 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
2880 		    int len, int flags)
2881 {
2882 	return -EOPNOTSUPP;
2883 }
2884 EXPORT_SYMBOL(sock_no_connect);
2885 
2886 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
2887 {
2888 	return -EOPNOTSUPP;
2889 }
2890 EXPORT_SYMBOL(sock_no_socketpair);
2891 
2892 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags,
2893 		   bool kern)
2894 {
2895 	return -EOPNOTSUPP;
2896 }
2897 EXPORT_SYMBOL(sock_no_accept);
2898 
2899 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
2900 		    int peer)
2901 {
2902 	return -EOPNOTSUPP;
2903 }
2904 EXPORT_SYMBOL(sock_no_getname);
2905 
2906 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2907 {
2908 	return -EOPNOTSUPP;
2909 }
2910 EXPORT_SYMBOL(sock_no_ioctl);
2911 
2912 int sock_no_listen(struct socket *sock, int backlog)
2913 {
2914 	return -EOPNOTSUPP;
2915 }
2916 EXPORT_SYMBOL(sock_no_listen);
2917 
2918 int sock_no_shutdown(struct socket *sock, int how)
2919 {
2920 	return -EOPNOTSUPP;
2921 }
2922 EXPORT_SYMBOL(sock_no_shutdown);
2923 
2924 int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
2925 {
2926 	return -EOPNOTSUPP;
2927 }
2928 EXPORT_SYMBOL(sock_no_sendmsg);
2929 
2930 int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *m, size_t len)
2931 {
2932 	return -EOPNOTSUPP;
2933 }
2934 EXPORT_SYMBOL(sock_no_sendmsg_locked);
2935 
2936 int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len,
2937 		    int flags)
2938 {
2939 	return -EOPNOTSUPP;
2940 }
2941 EXPORT_SYMBOL(sock_no_recvmsg);
2942 
2943 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
2944 {
2945 	/* Mirror missing mmap method error code */
2946 	return -ENODEV;
2947 }
2948 EXPORT_SYMBOL(sock_no_mmap);
2949 
2950 /*
2951  * When a file is received (via SCM_RIGHTS, etc), we must bump the
2952  * various sock-based usage counts.
2953  */
2954 void __receive_sock(struct file *file)
2955 {
2956 	struct socket *sock;
2957 
2958 	sock = sock_from_file(file);
2959 	if (sock) {
2960 		sock_update_netprioidx(&sock->sk->sk_cgrp_data);
2961 		sock_update_classid(&sock->sk->sk_cgrp_data);
2962 	}
2963 }
2964 
2965 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
2966 {
2967 	ssize_t res;
2968 	struct msghdr msg = {.msg_flags = flags};
2969 	struct kvec iov;
2970 	char *kaddr = kmap(page);
2971 	iov.iov_base = kaddr + offset;
2972 	iov.iov_len = size;
2973 	res = kernel_sendmsg(sock, &msg, &iov, 1, size);
2974 	kunmap(page);
2975 	return res;
2976 }
2977 EXPORT_SYMBOL(sock_no_sendpage);
2978 
2979 ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page,
2980 				int offset, size_t size, int flags)
2981 {
2982 	ssize_t res;
2983 	struct msghdr msg = {.msg_flags = flags};
2984 	struct kvec iov;
2985 	char *kaddr = kmap(page);
2986 
2987 	iov.iov_base = kaddr + offset;
2988 	iov.iov_len = size;
2989 	res = kernel_sendmsg_locked(sk, &msg, &iov, 1, size);
2990 	kunmap(page);
2991 	return res;
2992 }
2993 EXPORT_SYMBOL(sock_no_sendpage_locked);
2994 
2995 /*
2996  *	Default Socket Callbacks
2997  */
2998 
2999 static void sock_def_wakeup(struct sock *sk)
3000 {
3001 	struct socket_wq *wq;
3002 
3003 	rcu_read_lock();
3004 	wq = rcu_dereference(sk->sk_wq);
3005 	if (skwq_has_sleeper(wq))
3006 		wake_up_interruptible_all(&wq->wait);
3007 	rcu_read_unlock();
3008 }
3009 
3010 static void sock_def_error_report(struct sock *sk)
3011 {
3012 	struct socket_wq *wq;
3013 
3014 	rcu_read_lock();
3015 	wq = rcu_dereference(sk->sk_wq);
3016 	if (skwq_has_sleeper(wq))
3017 		wake_up_interruptible_poll(&wq->wait, EPOLLERR);
3018 	sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
3019 	rcu_read_unlock();
3020 }
3021 
3022 void sock_def_readable(struct sock *sk)
3023 {
3024 	struct socket_wq *wq;
3025 
3026 	rcu_read_lock();
3027 	wq = rcu_dereference(sk->sk_wq);
3028 	if (skwq_has_sleeper(wq))
3029 		wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI |
3030 						EPOLLRDNORM | EPOLLRDBAND);
3031 	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
3032 	rcu_read_unlock();
3033 }
3034 
3035 static void sock_def_write_space(struct sock *sk)
3036 {
3037 	struct socket_wq *wq;
3038 
3039 	rcu_read_lock();
3040 
3041 	/* Do not wake up a writer until he can make "significant"
3042 	 * progress.  --DaveM
3043 	 */
3044 	if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= READ_ONCE(sk->sk_sndbuf)) {
3045 		wq = rcu_dereference(sk->sk_wq);
3046 		if (skwq_has_sleeper(wq))
3047 			wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
3048 						EPOLLWRNORM | EPOLLWRBAND);
3049 
3050 		/* Should agree with poll, otherwise some programs break */
3051 		if (sock_writeable(sk))
3052 			sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
3053 	}
3054 
3055 	rcu_read_unlock();
3056 }
3057 
3058 static void sock_def_destruct(struct sock *sk)
3059 {
3060 }
3061 
3062 void sk_send_sigurg(struct sock *sk)
3063 {
3064 	if (sk->sk_socket && sk->sk_socket->file)
3065 		if (send_sigurg(&sk->sk_socket->file->f_owner))
3066 			sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
3067 }
3068 EXPORT_SYMBOL(sk_send_sigurg);
3069 
3070 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
3071 		    unsigned long expires)
3072 {
3073 	if (!mod_timer(timer, expires))
3074 		sock_hold(sk);
3075 }
3076 EXPORT_SYMBOL(sk_reset_timer);
3077 
3078 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
3079 {
3080 	if (del_timer(timer))
3081 		__sock_put(sk);
3082 }
3083 EXPORT_SYMBOL(sk_stop_timer);
3084 
3085 void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer)
3086 {
3087 	if (del_timer_sync(timer))
3088 		__sock_put(sk);
3089 }
3090 EXPORT_SYMBOL(sk_stop_timer_sync);
3091 
3092 void sock_init_data(struct socket *sock, struct sock *sk)
3093 {
3094 	sk_init_common(sk);
3095 	sk->sk_send_head	=	NULL;
3096 
3097 	timer_setup(&sk->sk_timer, NULL, 0);
3098 
3099 	sk->sk_allocation	=	GFP_KERNEL;
3100 	sk->sk_rcvbuf		=	sysctl_rmem_default;
3101 	sk->sk_sndbuf		=	sysctl_wmem_default;
3102 	sk->sk_state		=	TCP_CLOSE;
3103 	sk_set_socket(sk, sock);
3104 
3105 	sock_set_flag(sk, SOCK_ZAPPED);
3106 
3107 	if (sock) {
3108 		sk->sk_type	=	sock->type;
3109 		RCU_INIT_POINTER(sk->sk_wq, &sock->wq);
3110 		sock->sk	=	sk;
3111 		sk->sk_uid	=	SOCK_INODE(sock)->i_uid;
3112 	} else {
3113 		RCU_INIT_POINTER(sk->sk_wq, NULL);
3114 		sk->sk_uid	=	make_kuid(sock_net(sk)->user_ns, 0);
3115 	}
3116 
3117 	rwlock_init(&sk->sk_callback_lock);
3118 	if (sk->sk_kern_sock)
3119 		lockdep_set_class_and_name(
3120 			&sk->sk_callback_lock,
3121 			af_kern_callback_keys + sk->sk_family,
3122 			af_family_kern_clock_key_strings[sk->sk_family]);
3123 	else
3124 		lockdep_set_class_and_name(
3125 			&sk->sk_callback_lock,
3126 			af_callback_keys + sk->sk_family,
3127 			af_family_clock_key_strings[sk->sk_family]);
3128 
3129 	sk->sk_state_change	=	sock_def_wakeup;
3130 	sk->sk_data_ready	=	sock_def_readable;
3131 	sk->sk_write_space	=	sock_def_write_space;
3132 	sk->sk_error_report	=	sock_def_error_report;
3133 	sk->sk_destruct		=	sock_def_destruct;
3134 
3135 	sk->sk_frag.page	=	NULL;
3136 	sk->sk_frag.offset	=	0;
3137 	sk->sk_peek_off		=	-1;
3138 
3139 	sk->sk_peer_pid 	=	NULL;
3140 	sk->sk_peer_cred	=	NULL;
3141 	sk->sk_write_pending	=	0;
3142 	sk->sk_rcvlowat		=	1;
3143 	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
3144 	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
3145 
3146 	sk->sk_stamp = SK_DEFAULT_STAMP;
3147 #if BITS_PER_LONG==32
3148 	seqlock_init(&sk->sk_stamp_seq);
3149 #endif
3150 	atomic_set(&sk->sk_zckey, 0);
3151 
3152 #ifdef CONFIG_NET_RX_BUSY_POLL
3153 	sk->sk_napi_id		=	0;
3154 	sk->sk_ll_usec		=	sysctl_net_busy_read;
3155 #endif
3156 
3157 	sk->sk_max_pacing_rate = ~0UL;
3158 	sk->sk_pacing_rate = ~0UL;
3159 	WRITE_ONCE(sk->sk_pacing_shift, 10);
3160 	sk->sk_incoming_cpu = -1;
3161 
3162 	sk_rx_queue_clear(sk);
3163 	/*
3164 	 * Before updating sk_refcnt, we must commit prior changes to memory
3165 	 * (Documentation/RCU/rculist_nulls.rst for details)
3166 	 */
3167 	smp_wmb();
3168 	refcount_set(&sk->sk_refcnt, 1);
3169 	atomic_set(&sk->sk_drops, 0);
3170 }
3171 EXPORT_SYMBOL(sock_init_data);
3172 
3173 void lock_sock_nested(struct sock *sk, int subclass)
3174 {
3175 	might_sleep();
3176 	spin_lock_bh(&sk->sk_lock.slock);
3177 	if (sk->sk_lock.owned)
3178 		__lock_sock(sk);
3179 	sk->sk_lock.owned = 1;
3180 	spin_unlock(&sk->sk_lock.slock);
3181 	/*
3182 	 * The sk_lock has mutex_lock() semantics here:
3183 	 */
3184 	mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
3185 	local_bh_enable();
3186 }
3187 EXPORT_SYMBOL(lock_sock_nested);
3188 
3189 void release_sock(struct sock *sk)
3190 {
3191 	spin_lock_bh(&sk->sk_lock.slock);
3192 	if (sk->sk_backlog.tail)
3193 		__release_sock(sk);
3194 
3195 	/* Warning : release_cb() might need to release sk ownership,
3196 	 * ie call sock_release_ownership(sk) before us.
3197 	 */
3198 	if (sk->sk_prot->release_cb)
3199 		sk->sk_prot->release_cb(sk);
3200 
3201 	sock_release_ownership(sk);
3202 	if (waitqueue_active(&sk->sk_lock.wq))
3203 		wake_up(&sk->sk_lock.wq);
3204 	spin_unlock_bh(&sk->sk_lock.slock);
3205 }
3206 EXPORT_SYMBOL(release_sock);
3207 
3208 /**
3209  * lock_sock_fast - fast version of lock_sock
3210  * @sk: socket
3211  *
3212  * This version should be used for very small section, where process wont block
3213  * return false if fast path is taken:
3214  *
3215  *   sk_lock.slock locked, owned = 0, BH disabled
3216  *
3217  * return true if slow path is taken:
3218  *
3219  *   sk_lock.slock unlocked, owned = 1, BH enabled
3220  */
3221 bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock)
3222 {
3223 	might_sleep();
3224 	spin_lock_bh(&sk->sk_lock.slock);
3225 
3226 	if (!sk->sk_lock.owned)
3227 		/*
3228 		 * Note : We must disable BH
3229 		 */
3230 		return false;
3231 
3232 	__lock_sock(sk);
3233 	sk->sk_lock.owned = 1;
3234 	spin_unlock(&sk->sk_lock.slock);
3235 	/*
3236 	 * The sk_lock has mutex_lock() semantics here:
3237 	 */
3238 	mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
3239 	__acquire(&sk->sk_lock.slock);
3240 	local_bh_enable();
3241 	return true;
3242 }
3243 EXPORT_SYMBOL(lock_sock_fast);
3244 
3245 int sock_gettstamp(struct socket *sock, void __user *userstamp,
3246 		   bool timeval, bool time32)
3247 {
3248 	struct sock *sk = sock->sk;
3249 	struct timespec64 ts;
3250 
3251 	sock_enable_timestamp(sk, SOCK_TIMESTAMP);
3252 	ts = ktime_to_timespec64(sock_read_timestamp(sk));
3253 	if (ts.tv_sec == -1)
3254 		return -ENOENT;
3255 	if (ts.tv_sec == 0) {
3256 		ktime_t kt = ktime_get_real();
3257 		sock_write_timestamp(sk, kt);
3258 		ts = ktime_to_timespec64(kt);
3259 	}
3260 
3261 	if (timeval)
3262 		ts.tv_nsec /= 1000;
3263 
3264 #ifdef CONFIG_COMPAT_32BIT_TIME
3265 	if (time32)
3266 		return put_old_timespec32(&ts, userstamp);
3267 #endif
3268 #ifdef CONFIG_SPARC64
3269 	/* beware of padding in sparc64 timeval */
3270 	if (timeval && !in_compat_syscall()) {
3271 		struct __kernel_old_timeval __user tv = {
3272 			.tv_sec = ts.tv_sec,
3273 			.tv_usec = ts.tv_nsec,
3274 		};
3275 		if (copy_to_user(userstamp, &tv, sizeof(tv)))
3276 			return -EFAULT;
3277 		return 0;
3278 	}
3279 #endif
3280 	return put_timespec64(&ts, userstamp);
3281 }
3282 EXPORT_SYMBOL(sock_gettstamp);
3283 
3284 void sock_enable_timestamp(struct sock *sk, enum sock_flags flag)
3285 {
3286 	if (!sock_flag(sk, flag)) {
3287 		unsigned long previous_flags = sk->sk_flags;
3288 
3289 		sock_set_flag(sk, flag);
3290 		/*
3291 		 * we just set one of the two flags which require net
3292 		 * time stamping, but time stamping might have been on
3293 		 * already because of the other one
3294 		 */
3295 		if (sock_needs_netstamp(sk) &&
3296 		    !(previous_flags & SK_FLAGS_TIMESTAMP))
3297 			net_enable_timestamp();
3298 	}
3299 }
3300 
3301 int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
3302 		       int level, int type)
3303 {
3304 	struct sock_exterr_skb *serr;
3305 	struct sk_buff *skb;
3306 	int copied, err;
3307 
3308 	err = -EAGAIN;
3309 	skb = sock_dequeue_err_skb(sk);
3310 	if (skb == NULL)
3311 		goto out;
3312 
3313 	copied = skb->len;
3314 	if (copied > len) {
3315 		msg->msg_flags |= MSG_TRUNC;
3316 		copied = len;
3317 	}
3318 	err = skb_copy_datagram_msg(skb, 0, msg, copied);
3319 	if (err)
3320 		goto out_free_skb;
3321 
3322 	sock_recv_timestamp(msg, sk, skb);
3323 
3324 	serr = SKB_EXT_ERR(skb);
3325 	put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee);
3326 
3327 	msg->msg_flags |= MSG_ERRQUEUE;
3328 	err = copied;
3329 
3330 out_free_skb:
3331 	kfree_skb(skb);
3332 out:
3333 	return err;
3334 }
3335 EXPORT_SYMBOL(sock_recv_errqueue);
3336 
3337 /*
3338  *	Get a socket option on an socket.
3339  *
3340  *	FIX: POSIX 1003.1g is very ambiguous here. It states that
3341  *	asynchronous errors should be reported by getsockopt. We assume
3342  *	this means if you specify SO_ERROR (otherwise whats the point of it).
3343  */
3344 int sock_common_getsockopt(struct socket *sock, int level, int optname,
3345 			   char __user *optval, int __user *optlen)
3346 {
3347 	struct sock *sk = sock->sk;
3348 
3349 	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
3350 }
3351 EXPORT_SYMBOL(sock_common_getsockopt);
3352 
3353 int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
3354 			int flags)
3355 {
3356 	struct sock *sk = sock->sk;
3357 	int addr_len = 0;
3358 	int err;
3359 
3360 	err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
3361 				   flags & ~MSG_DONTWAIT, &addr_len);
3362 	if (err >= 0)
3363 		msg->msg_namelen = addr_len;
3364 	return err;
3365 }
3366 EXPORT_SYMBOL(sock_common_recvmsg);
3367 
3368 /*
3369  *	Set socket options on an inet socket.
3370  */
3371 int sock_common_setsockopt(struct socket *sock, int level, int optname,
3372 			   sockptr_t optval, unsigned int optlen)
3373 {
3374 	struct sock *sk = sock->sk;
3375 
3376 	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
3377 }
3378 EXPORT_SYMBOL(sock_common_setsockopt);
3379 
3380 void sk_common_release(struct sock *sk)
3381 {
3382 	if (sk->sk_prot->destroy)
3383 		sk->sk_prot->destroy(sk);
3384 
3385 	/*
3386 	 * Observation: when sk_common_release is called, processes have
3387 	 * no access to socket. But net still has.
3388 	 * Step one, detach it from networking:
3389 	 *
3390 	 * A. Remove from hash tables.
3391 	 */
3392 
3393 	sk->sk_prot->unhash(sk);
3394 
3395 	/*
3396 	 * In this point socket cannot receive new packets, but it is possible
3397 	 * that some packets are in flight because some CPU runs receiver and
3398 	 * did hash table lookup before we unhashed socket. They will achieve
3399 	 * receive queue and will be purged by socket destructor.
3400 	 *
3401 	 * Also we still have packets pending on receive queue and probably,
3402 	 * our own packets waiting in device queues. sock_destroy will drain
3403 	 * receive queue, but transmitted packets will delay socket destruction
3404 	 * until the last reference will be released.
3405 	 */
3406 
3407 	sock_orphan(sk);
3408 
3409 	xfrm_sk_free_policy(sk);
3410 
3411 	sk_refcnt_debug_release(sk);
3412 
3413 	sock_put(sk);
3414 }
3415 EXPORT_SYMBOL(sk_common_release);
3416 
3417 void sk_get_meminfo(const struct sock *sk, u32 *mem)
3418 {
3419 	memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS);
3420 
3421 	mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
3422 	mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf);
3423 	mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
3424 	mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf);
3425 	mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
3426 	mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued);
3427 	mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
3428 	mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
3429 	mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
3430 }
3431 
3432 #ifdef CONFIG_PROC_FS
3433 #define PROTO_INUSE_NR	64	/* should be enough for the first time */
3434 struct prot_inuse {
3435 	int val[PROTO_INUSE_NR];
3436 };
3437 
3438 static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
3439 
3440 void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
3441 {
3442 	__this_cpu_add(net->core.prot_inuse->val[prot->inuse_idx], val);
3443 }
3444 EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
3445 
3446 int sock_prot_inuse_get(struct net *net, struct proto *prot)
3447 {
3448 	int cpu, idx = prot->inuse_idx;
3449 	int res = 0;
3450 
3451 	for_each_possible_cpu(cpu)
3452 		res += per_cpu_ptr(net->core.prot_inuse, cpu)->val[idx];
3453 
3454 	return res >= 0 ? res : 0;
3455 }
3456 EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
3457 
3458 static void sock_inuse_add(struct net *net, int val)
3459 {
3460 	this_cpu_add(*net->core.sock_inuse, val);
3461 }
3462 
3463 int sock_inuse_get(struct net *net)
3464 {
3465 	int cpu, res = 0;
3466 
3467 	for_each_possible_cpu(cpu)
3468 		res += *per_cpu_ptr(net->core.sock_inuse, cpu);
3469 
3470 	return res;
3471 }
3472 
3473 EXPORT_SYMBOL_GPL(sock_inuse_get);
3474 
3475 static int __net_init sock_inuse_init_net(struct net *net)
3476 {
3477 	net->core.prot_inuse = alloc_percpu(struct prot_inuse);
3478 	if (net->core.prot_inuse == NULL)
3479 		return -ENOMEM;
3480 
3481 	net->core.sock_inuse = alloc_percpu(int);
3482 	if (net->core.sock_inuse == NULL)
3483 		goto out;
3484 
3485 	return 0;
3486 
3487 out:
3488 	free_percpu(net->core.prot_inuse);
3489 	return -ENOMEM;
3490 }
3491 
3492 static void __net_exit sock_inuse_exit_net(struct net *net)
3493 {
3494 	free_percpu(net->core.prot_inuse);
3495 	free_percpu(net->core.sock_inuse);
3496 }
3497 
3498 static struct pernet_operations net_inuse_ops = {
3499 	.init = sock_inuse_init_net,
3500 	.exit = sock_inuse_exit_net,
3501 };
3502 
3503 static __init int net_inuse_init(void)
3504 {
3505 	if (register_pernet_subsys(&net_inuse_ops))
3506 		panic("Cannot initialize net inuse counters");
3507 
3508 	return 0;
3509 }
3510 
3511 core_initcall(net_inuse_init);
3512 
3513 static int assign_proto_idx(struct proto *prot)
3514 {
3515 	prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
3516 
3517 	if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
3518 		pr_err("PROTO_INUSE_NR exhausted\n");
3519 		return -ENOSPC;
3520 	}
3521 
3522 	set_bit(prot->inuse_idx, proto_inuse_idx);
3523 	return 0;
3524 }
3525 
3526 static void release_proto_idx(struct proto *prot)
3527 {
3528 	if (prot->inuse_idx != PROTO_INUSE_NR - 1)
3529 		clear_bit(prot->inuse_idx, proto_inuse_idx);
3530 }
3531 #else
3532 static inline int assign_proto_idx(struct proto *prot)
3533 {
3534 	return 0;
3535 }
3536 
3537 static inline void release_proto_idx(struct proto *prot)
3538 {
3539 }
3540 
3541 static void sock_inuse_add(struct net *net, int val)
3542 {
3543 }
3544 #endif
3545 
3546 static void tw_prot_cleanup(struct timewait_sock_ops *twsk_prot)
3547 {
3548 	if (!twsk_prot)
3549 		return;
3550 	kfree(twsk_prot->twsk_slab_name);
3551 	twsk_prot->twsk_slab_name = NULL;
3552 	kmem_cache_destroy(twsk_prot->twsk_slab);
3553 	twsk_prot->twsk_slab = NULL;
3554 }
3555 
3556 static int tw_prot_init(const struct proto *prot)
3557 {
3558 	struct timewait_sock_ops *twsk_prot = prot->twsk_prot;
3559 
3560 	if (!twsk_prot)
3561 		return 0;
3562 
3563 	twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s",
3564 					      prot->name);
3565 	if (!twsk_prot->twsk_slab_name)
3566 		return -ENOMEM;
3567 
3568 	twsk_prot->twsk_slab =
3569 		kmem_cache_create(twsk_prot->twsk_slab_name,
3570 				  twsk_prot->twsk_obj_size, 0,
3571 				  SLAB_ACCOUNT | prot->slab_flags,
3572 				  NULL);
3573 	if (!twsk_prot->twsk_slab) {
3574 		pr_crit("%s: Can't create timewait sock SLAB cache!\n",
3575 			prot->name);
3576 		return -ENOMEM;
3577 	}
3578 
3579 	return 0;
3580 }
3581 
3582 static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
3583 {
3584 	if (!rsk_prot)
3585 		return;
3586 	kfree(rsk_prot->slab_name);
3587 	rsk_prot->slab_name = NULL;
3588 	kmem_cache_destroy(rsk_prot->slab);
3589 	rsk_prot->slab = NULL;
3590 }
3591 
3592 static int req_prot_init(const struct proto *prot)
3593 {
3594 	struct request_sock_ops *rsk_prot = prot->rsk_prot;
3595 
3596 	if (!rsk_prot)
3597 		return 0;
3598 
3599 	rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s",
3600 					prot->name);
3601 	if (!rsk_prot->slab_name)
3602 		return -ENOMEM;
3603 
3604 	rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name,
3605 					   rsk_prot->obj_size, 0,
3606 					   SLAB_ACCOUNT | prot->slab_flags,
3607 					   NULL);
3608 
3609 	if (!rsk_prot->slab) {
3610 		pr_crit("%s: Can't create request sock SLAB cache!\n",
3611 			prot->name);
3612 		return -ENOMEM;
3613 	}
3614 	return 0;
3615 }
3616 
3617 int proto_register(struct proto *prot, int alloc_slab)
3618 {
3619 	int ret = -ENOBUFS;
3620 
3621 	if (alloc_slab) {
3622 		prot->slab = kmem_cache_create_usercopy(prot->name,
3623 					prot->obj_size, 0,
3624 					SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT |
3625 					prot->slab_flags,
3626 					prot->useroffset, prot->usersize,
3627 					NULL);
3628 
3629 		if (prot->slab == NULL) {
3630 			pr_crit("%s: Can't create sock SLAB cache!\n",
3631 				prot->name);
3632 			goto out;
3633 		}
3634 
3635 		if (req_prot_init(prot))
3636 			goto out_free_request_sock_slab;
3637 
3638 		if (tw_prot_init(prot))
3639 			goto out_free_timewait_sock_slab;
3640 	}
3641 
3642 	mutex_lock(&proto_list_mutex);
3643 	ret = assign_proto_idx(prot);
3644 	if (ret) {
3645 		mutex_unlock(&proto_list_mutex);
3646 		goto out_free_timewait_sock_slab;
3647 	}
3648 	list_add(&prot->node, &proto_list);
3649 	mutex_unlock(&proto_list_mutex);
3650 	return ret;
3651 
3652 out_free_timewait_sock_slab:
3653 	if (alloc_slab)
3654 		tw_prot_cleanup(prot->twsk_prot);
3655 out_free_request_sock_slab:
3656 	if (alloc_slab) {
3657 		req_prot_cleanup(prot->rsk_prot);
3658 
3659 		kmem_cache_destroy(prot->slab);
3660 		prot->slab = NULL;
3661 	}
3662 out:
3663 	return ret;
3664 }
3665 EXPORT_SYMBOL(proto_register);
3666 
3667 void proto_unregister(struct proto *prot)
3668 {
3669 	mutex_lock(&proto_list_mutex);
3670 	release_proto_idx(prot);
3671 	list_del(&prot->node);
3672 	mutex_unlock(&proto_list_mutex);
3673 
3674 	kmem_cache_destroy(prot->slab);
3675 	prot->slab = NULL;
3676 
3677 	req_prot_cleanup(prot->rsk_prot);
3678 	tw_prot_cleanup(prot->twsk_prot);
3679 }
3680 EXPORT_SYMBOL(proto_unregister);
3681 
3682 int sock_load_diag_module(int family, int protocol)
3683 {
3684 	if (!protocol) {
3685 		if (!sock_is_registered(family))
3686 			return -ENOENT;
3687 
3688 		return request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
3689 				      NETLINK_SOCK_DIAG, family);
3690 	}
3691 
3692 #ifdef CONFIG_INET
3693 	if (family == AF_INET &&
3694 	    protocol != IPPROTO_RAW &&
3695 	    protocol < MAX_INET_PROTOS &&
3696 	    !rcu_access_pointer(inet_protos[protocol]))
3697 		return -ENOENT;
3698 #endif
3699 
3700 	return request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK,
3701 			      NETLINK_SOCK_DIAG, family, protocol);
3702 }
3703 EXPORT_SYMBOL(sock_load_diag_module);
3704 
3705 #ifdef CONFIG_PROC_FS
3706 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
3707 	__acquires(proto_list_mutex)
3708 {
3709 	mutex_lock(&proto_list_mutex);
3710 	return seq_list_start_head(&proto_list, *pos);
3711 }
3712 
3713 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3714 {
3715 	return seq_list_next(v, &proto_list, pos);
3716 }
3717 
3718 static void proto_seq_stop(struct seq_file *seq, void *v)
3719 	__releases(proto_list_mutex)
3720 {
3721 	mutex_unlock(&proto_list_mutex);
3722 }
3723 
3724 static char proto_method_implemented(const void *method)
3725 {
3726 	return method == NULL ? 'n' : 'y';
3727 }
3728 static long sock_prot_memory_allocated(struct proto *proto)
3729 {
3730 	return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
3731 }
3732 
3733 static const char *sock_prot_memory_pressure(struct proto *proto)
3734 {
3735 	return proto->memory_pressure != NULL ?
3736 	proto_memory_pressure(proto) ? "yes" : "no" : "NI";
3737 }
3738 
3739 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
3740 {
3741 
3742 	seq_printf(seq, "%-9s %4u %6d  %6ld   %-3s %6u   %-3s  %-10s "
3743 			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
3744 		   proto->name,
3745 		   proto->obj_size,
3746 		   sock_prot_inuse_get(seq_file_net(seq), proto),
3747 		   sock_prot_memory_allocated(proto),
3748 		   sock_prot_memory_pressure(proto),
3749 		   proto->max_header,
3750 		   proto->slab == NULL ? "no" : "yes",
3751 		   module_name(proto->owner),
3752 		   proto_method_implemented(proto->close),
3753 		   proto_method_implemented(proto->connect),
3754 		   proto_method_implemented(proto->disconnect),
3755 		   proto_method_implemented(proto->accept),
3756 		   proto_method_implemented(proto->ioctl),
3757 		   proto_method_implemented(proto->init),
3758 		   proto_method_implemented(proto->destroy),
3759 		   proto_method_implemented(proto->shutdown),
3760 		   proto_method_implemented(proto->setsockopt),
3761 		   proto_method_implemented(proto->getsockopt),
3762 		   proto_method_implemented(proto->sendmsg),
3763 		   proto_method_implemented(proto->recvmsg),
3764 		   proto_method_implemented(proto->sendpage),
3765 		   proto_method_implemented(proto->bind),
3766 		   proto_method_implemented(proto->backlog_rcv),
3767 		   proto_method_implemented(proto->hash),
3768 		   proto_method_implemented(proto->unhash),
3769 		   proto_method_implemented(proto->get_port),
3770 		   proto_method_implemented(proto->enter_memory_pressure));
3771 }
3772 
3773 static int proto_seq_show(struct seq_file *seq, void *v)
3774 {
3775 	if (v == &proto_list)
3776 		seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
3777 			   "protocol",
3778 			   "size",
3779 			   "sockets",
3780 			   "memory",
3781 			   "press",
3782 			   "maxhdr",
3783 			   "slab",
3784 			   "module",
3785 			   "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
3786 	else
3787 		proto_seq_printf(seq, list_entry(v, struct proto, node));
3788 	return 0;
3789 }
3790 
3791 static const struct seq_operations proto_seq_ops = {
3792 	.start  = proto_seq_start,
3793 	.next   = proto_seq_next,
3794 	.stop   = proto_seq_stop,
3795 	.show   = proto_seq_show,
3796 };
3797 
3798 static __net_init int proto_init_net(struct net *net)
3799 {
3800 	if (!proc_create_net("protocols", 0444, net->proc_net, &proto_seq_ops,
3801 			sizeof(struct seq_net_private)))
3802 		return -ENOMEM;
3803 
3804 	return 0;
3805 }
3806 
3807 static __net_exit void proto_exit_net(struct net *net)
3808 {
3809 	remove_proc_entry("protocols", net->proc_net);
3810 }
3811 
3812 
3813 static __net_initdata struct pernet_operations proto_net_ops = {
3814 	.init = proto_init_net,
3815 	.exit = proto_exit_net,
3816 };
3817 
3818 static int __init proto_init(void)
3819 {
3820 	return register_pernet_subsys(&proto_net_ops);
3821 }
3822 
3823 subsys_initcall(proto_init);
3824 
3825 #endif /* PROC_FS */
3826 
3827 #ifdef CONFIG_NET_RX_BUSY_POLL
3828 bool sk_busy_loop_end(void *p, unsigned long start_time)
3829 {
3830 	struct sock *sk = p;
3831 
3832 	return !skb_queue_empty_lockless(&sk->sk_receive_queue) ||
3833 	       sk_busy_loop_timeout(sk, start_time);
3834 }
3835 EXPORT_SYMBOL(sk_busy_loop_end);
3836 #endif /* CONFIG_NET_RX_BUSY_POLL */
3837 
3838 int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len)
3839 {
3840 	if (!sk->sk_prot->bind_add)
3841 		return -EOPNOTSUPP;
3842 	return sk->sk_prot->bind_add(sk, addr, addr_len);
3843 }
3844 EXPORT_SYMBOL(sock_bind_add);
3845