xref: /linux/net/core/sock.c (revision ffb239e29518578c45f278fccd32db958ff59174)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * INET		An implementation of the TCP/IP protocol suite for the LINUX
4  *		operating system.  INET is implemented using the  BSD Socket
5  *		interface as the means of communication with the user level.
6  *
7  *		Generic socket support routines. Memory allocators, socket lock/release
8  *		handler for protocols to use and generic option handler.
9  *
10  * Authors:	Ross Biro
11  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *		Florian La Roche, <flla@stud.uni-sb.de>
13  *		Alan Cox, <A.Cox@swansea.ac.uk>
14  *
15  * Fixes:
16  *		Alan Cox	: 	Numerous verify_area() problems
17  *		Alan Cox	:	Connecting on a connecting socket
18  *					now returns an error for tcp.
19  *		Alan Cox	:	sock->protocol is set correctly.
20  *					and is not sometimes left as 0.
21  *		Alan Cox	:	connect handles icmp errors on a
22  *					connect properly. Unfortunately there
23  *					is a restart syscall nasty there. I
24  *					can't match BSD without hacking the C
25  *					library. Ideas urgently sought!
26  *		Alan Cox	:	Disallow bind() to addresses that are
27  *					not ours - especially broadcast ones!!
28  *		Alan Cox	:	Socket 1024 _IS_ ok for users. (fencepost)
29  *		Alan Cox	:	sock_wfree/sock_rfree don't destroy sockets,
30  *					instead they leave that for the DESTROY timer.
31  *		Alan Cox	:	Clean up error flag in accept
32  *		Alan Cox	:	TCP ack handling is buggy, the DESTROY timer
33  *					was buggy. Put a remove_sock() in the handler
34  *					for memory when we hit 0. Also altered the timer
35  *					code. The ACK stuff can wait and needs major
36  *					TCP layer surgery.
37  *		Alan Cox	:	Fixed TCP ack bug, removed remove sock
38  *					and fixed timer/inet_bh race.
39  *		Alan Cox	:	Added zapped flag for TCP
40  *		Alan Cox	:	Move kfree_skb into skbuff.c and tidied up surplus code
41  *		Alan Cox	:	for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
42  *		Alan Cox	:	kfree_s calls now are kfree_skbmem so we can track skb resources
43  *		Alan Cox	:	Supports socket option broadcast now as does udp. Packet and raw need fixing.
44  *		Alan Cox	:	Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
45  *		Rick Sladkey	:	Relaxed UDP rules for matching packets.
46  *		C.E.Hawkins	:	IFF_PROMISC/SIOCGHWADDR support
47  *	Pauline Middelink	:	identd support
48  *		Alan Cox	:	Fixed connect() taking signals I think.
49  *		Alan Cox	:	SO_LINGER supported
50  *		Alan Cox	:	Error reporting fixes
51  *		Anonymous	:	inet_create tidied up (sk->reuse setting)
52  *		Alan Cox	:	inet sockets don't set sk->type!
53  *		Alan Cox	:	Split socket option code
54  *		Alan Cox	:	Callbacks
55  *		Alan Cox	:	Nagle flag for Charles & Johannes stuff
56  *		Alex		:	Removed restriction on inet fioctl
57  *		Alan Cox	:	Splitting INET from NET core
58  *		Alan Cox	:	Fixed bogus SO_TYPE handling in getsockopt()
59  *		Adam Caldwell	:	Missing return in SO_DONTROUTE/SO_DEBUG code
60  *		Alan Cox	:	Split IP from generic code
61  *		Alan Cox	:	New kfree_skbmem()
62  *		Alan Cox	:	Make SO_DEBUG superuser only.
63  *		Alan Cox	:	Allow anyone to clear SO_DEBUG
64  *					(compatibility fix)
65  *		Alan Cox	:	Added optimistic memory grabbing for AF_UNIX throughput.
66  *		Alan Cox	:	Allocator for a socket is settable.
67  *		Alan Cox	:	SO_ERROR includes soft errors.
68  *		Alan Cox	:	Allow NULL arguments on some SO_ opts
69  *		Alan Cox	: 	Generic socket allocation to make hooks
70  *					easier (suggested by Craig Metz).
71  *		Michael Pall	:	SO_ERROR returns positive errno again
72  *              Steve Whitehouse:       Added default destructor to free
73  *                                      protocol private data.
74  *              Steve Whitehouse:       Added various other default routines
75  *                                      common to several socket families.
76  *              Chris Evans     :       Call suser() check last on F_SETOWN
77  *		Jay Schulist	:	Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
78  *		Andi Kleen	:	Add sock_kmalloc()/sock_kfree_s()
79  *		Andi Kleen	:	Fix write_space callback
80  *		Chris Evans	:	Security fixes - signedness again
81  *		Arnaldo C. Melo :       cleanups, use skb_queue_purge
82  *
83  * To Fix:
84  */
85 
86 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
87 
88 #include <asm/unaligned.h>
89 #include <linux/capability.h>
90 #include <linux/errno.h>
91 #include <linux/errqueue.h>
92 #include <linux/types.h>
93 #include <linux/socket.h>
94 #include <linux/in.h>
95 #include <linux/kernel.h>
96 #include <linux/module.h>
97 #include <linux/proc_fs.h>
98 #include <linux/seq_file.h>
99 #include <linux/sched.h>
100 #include <linux/sched/mm.h>
101 #include <linux/timer.h>
102 #include <linux/string.h>
103 #include <linux/sockios.h>
104 #include <linux/net.h>
105 #include <linux/mm.h>
106 #include <linux/slab.h>
107 #include <linux/interrupt.h>
108 #include <linux/poll.h>
109 #include <linux/tcp.h>
110 #include <linux/init.h>
111 #include <linux/highmem.h>
112 #include <linux/user_namespace.h>
113 #include <linux/static_key.h>
114 #include <linux/memcontrol.h>
115 #include <linux/prefetch.h>
116 #include <linux/compat.h>
117 
118 #include <linux/uaccess.h>
119 
120 #include <linux/netdevice.h>
121 #include <net/protocol.h>
122 #include <linux/skbuff.h>
123 #include <net/net_namespace.h>
124 #include <net/request_sock.h>
125 #include <net/sock.h>
126 #include <linux/net_tstamp.h>
127 #include <net/xfrm.h>
128 #include <linux/ipsec.h>
129 #include <net/cls_cgroup.h>
130 #include <net/netprio_cgroup.h>
131 #include <linux/sock_diag.h>
132 
133 #include <linux/filter.h>
134 #include <net/sock_reuseport.h>
135 #include <net/bpf_sk_storage.h>
136 
137 #include <trace/events/sock.h>
138 
139 #include <net/tcp.h>
140 #include <net/busy_poll.h>
141 
142 #include <linux/ethtool.h>
143 
144 static DEFINE_MUTEX(proto_list_mutex);
145 static LIST_HEAD(proto_list);
146 
147 static void sock_inuse_add(struct net *net, int val);
148 
149 /**
150  * sk_ns_capable - General socket capability test
151  * @sk: Socket to use a capability on or through
152  * @user_ns: The user namespace of the capability to use
153  * @cap: The capability to use
154  *
155  * Test to see if the opener of the socket had when the socket was
156  * created and the current process has the capability @cap in the user
157  * namespace @user_ns.
158  */
159 bool sk_ns_capable(const struct sock *sk,
160 		   struct user_namespace *user_ns, int cap)
161 {
162 	return file_ns_capable(sk->sk_socket->file, user_ns, cap) &&
163 		ns_capable(user_ns, cap);
164 }
165 EXPORT_SYMBOL(sk_ns_capable);
166 
167 /**
168  * sk_capable - Socket global capability test
169  * @sk: Socket to use a capability on or through
170  * @cap: The global capability to use
171  *
172  * Test to see if the opener of the socket had when the socket was
173  * created and the current process has the capability @cap in all user
174  * namespaces.
175  */
176 bool sk_capable(const struct sock *sk, int cap)
177 {
178 	return sk_ns_capable(sk, &init_user_ns, cap);
179 }
180 EXPORT_SYMBOL(sk_capable);
181 
182 /**
183  * sk_net_capable - Network namespace socket capability test
184  * @sk: Socket to use a capability on or through
185  * @cap: The capability to use
186  *
187  * Test to see if the opener of the socket had when the socket was created
188  * and the current process has the capability @cap over the network namespace
189  * the socket is a member of.
190  */
191 bool sk_net_capable(const struct sock *sk, int cap)
192 {
193 	return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
194 }
195 EXPORT_SYMBOL(sk_net_capable);
196 
197 /*
198  * Each address family might have different locking rules, so we have
199  * one slock key per address family and separate keys for internal and
200  * userspace sockets.
201  */
202 static struct lock_class_key af_family_keys[AF_MAX];
203 static struct lock_class_key af_family_kern_keys[AF_MAX];
204 static struct lock_class_key af_family_slock_keys[AF_MAX];
205 static struct lock_class_key af_family_kern_slock_keys[AF_MAX];
206 
207 /*
208  * Make lock validator output more readable. (we pre-construct these
209  * strings build-time, so that runtime initialization of socket
210  * locks is fast):
211  */
212 
213 #define _sock_locks(x)						  \
214   x "AF_UNSPEC",	x "AF_UNIX"     ,	x "AF_INET"     , \
215   x "AF_AX25"  ,	x "AF_IPX"      ,	x "AF_APPLETALK", \
216   x "AF_NETROM",	x "AF_BRIDGE"   ,	x "AF_ATMPVC"   , \
217   x "AF_X25"   ,	x "AF_INET6"    ,	x "AF_ROSE"     , \
218   x "AF_DECnet",	x "AF_NETBEUI"  ,	x "AF_SECURITY" , \
219   x "AF_KEY"   ,	x "AF_NETLINK"  ,	x "AF_PACKET"   , \
220   x "AF_ASH"   ,	x "AF_ECONET"   ,	x "AF_ATMSVC"   , \
221   x "AF_RDS"   ,	x "AF_SNA"      ,	x "AF_IRDA"     , \
222   x "AF_PPPOX" ,	x "AF_WANPIPE"  ,	x "AF_LLC"      , \
223   x "27"       ,	x "28"          ,	x "AF_CAN"      , \
224   x "AF_TIPC"  ,	x "AF_BLUETOOTH",	x "IUCV"        , \
225   x "AF_RXRPC" ,	x "AF_ISDN"     ,	x "AF_PHONET"   , \
226   x "AF_IEEE802154",	x "AF_CAIF"	,	x "AF_ALG"      , \
227   x "AF_NFC"   ,	x "AF_VSOCK"    ,	x "AF_KCM"      , \
228   x "AF_QIPCRTR",	x "AF_SMC"	,	x "AF_XDP"	, \
229   x "AF_MCTP"  , \
230   x "AF_MAX"
231 
232 static const char *const af_family_key_strings[AF_MAX+1] = {
233 	_sock_locks("sk_lock-")
234 };
235 static const char *const af_family_slock_key_strings[AF_MAX+1] = {
236 	_sock_locks("slock-")
237 };
238 static const char *const af_family_clock_key_strings[AF_MAX+1] = {
239 	_sock_locks("clock-")
240 };
241 
242 static const char *const af_family_kern_key_strings[AF_MAX+1] = {
243 	_sock_locks("k-sk_lock-")
244 };
245 static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = {
246 	_sock_locks("k-slock-")
247 };
248 static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = {
249 	_sock_locks("k-clock-")
250 };
251 static const char *const af_family_rlock_key_strings[AF_MAX+1] = {
252 	_sock_locks("rlock-")
253 };
254 static const char *const af_family_wlock_key_strings[AF_MAX+1] = {
255 	_sock_locks("wlock-")
256 };
257 static const char *const af_family_elock_key_strings[AF_MAX+1] = {
258 	_sock_locks("elock-")
259 };
260 
261 /*
262  * sk_callback_lock and sk queues locking rules are per-address-family,
263  * so split the lock classes by using a per-AF key:
264  */
265 static struct lock_class_key af_callback_keys[AF_MAX];
266 static struct lock_class_key af_rlock_keys[AF_MAX];
267 static struct lock_class_key af_wlock_keys[AF_MAX];
268 static struct lock_class_key af_elock_keys[AF_MAX];
269 static struct lock_class_key af_kern_callback_keys[AF_MAX];
270 
271 /* Run time adjustable parameters. */
272 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
273 EXPORT_SYMBOL(sysctl_wmem_max);
274 __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
275 EXPORT_SYMBOL(sysctl_rmem_max);
276 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
277 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
278 
279 /* Maximal space eaten by iovec or ancillary data plus some space */
280 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
281 EXPORT_SYMBOL(sysctl_optmem_max);
282 
283 int sysctl_tstamp_allow_data __read_mostly = 1;
284 
285 DEFINE_STATIC_KEY_FALSE(memalloc_socks_key);
286 EXPORT_SYMBOL_GPL(memalloc_socks_key);
287 
288 /**
289  * sk_set_memalloc - sets %SOCK_MEMALLOC
290  * @sk: socket to set it on
291  *
292  * Set %SOCK_MEMALLOC on a socket for access to emergency reserves.
293  * It's the responsibility of the admin to adjust min_free_kbytes
294  * to meet the requirements
295  */
296 void sk_set_memalloc(struct sock *sk)
297 {
298 	sock_set_flag(sk, SOCK_MEMALLOC);
299 	sk->sk_allocation |= __GFP_MEMALLOC;
300 	static_branch_inc(&memalloc_socks_key);
301 }
302 EXPORT_SYMBOL_GPL(sk_set_memalloc);
303 
304 void sk_clear_memalloc(struct sock *sk)
305 {
306 	sock_reset_flag(sk, SOCK_MEMALLOC);
307 	sk->sk_allocation &= ~__GFP_MEMALLOC;
308 	static_branch_dec(&memalloc_socks_key);
309 
310 	/*
311 	 * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
312 	 * progress of swapping. SOCK_MEMALLOC may be cleared while
313 	 * it has rmem allocations due to the last swapfile being deactivated
314 	 * but there is a risk that the socket is unusable due to exceeding
315 	 * the rmem limits. Reclaim the reserves and obey rmem limits again.
316 	 */
317 	sk_mem_reclaim(sk);
318 }
319 EXPORT_SYMBOL_GPL(sk_clear_memalloc);
320 
321 int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
322 {
323 	int ret;
324 	unsigned int noreclaim_flag;
325 
326 	/* these should have been dropped before queueing */
327 	BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
328 
329 	noreclaim_flag = memalloc_noreclaim_save();
330 	ret = sk->sk_backlog_rcv(sk, skb);
331 	memalloc_noreclaim_restore(noreclaim_flag);
332 
333 	return ret;
334 }
335 EXPORT_SYMBOL(__sk_backlog_rcv);
336 
337 void sk_error_report(struct sock *sk)
338 {
339 	sk->sk_error_report(sk);
340 
341 	switch (sk->sk_family) {
342 	case AF_INET:
343 		fallthrough;
344 	case AF_INET6:
345 		trace_inet_sk_error_report(sk);
346 		break;
347 	default:
348 		break;
349 	}
350 }
351 EXPORT_SYMBOL(sk_error_report);
352 
353 static int sock_get_timeout(long timeo, void *optval, bool old_timeval)
354 {
355 	struct __kernel_sock_timeval tv;
356 
357 	if (timeo == MAX_SCHEDULE_TIMEOUT) {
358 		tv.tv_sec = 0;
359 		tv.tv_usec = 0;
360 	} else {
361 		tv.tv_sec = timeo / HZ;
362 		tv.tv_usec = ((timeo % HZ) * USEC_PER_SEC) / HZ;
363 	}
364 
365 	if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
366 		struct old_timeval32 tv32 = { tv.tv_sec, tv.tv_usec };
367 		*(struct old_timeval32 *)optval = tv32;
368 		return sizeof(tv32);
369 	}
370 
371 	if (old_timeval) {
372 		struct __kernel_old_timeval old_tv;
373 		old_tv.tv_sec = tv.tv_sec;
374 		old_tv.tv_usec = tv.tv_usec;
375 		*(struct __kernel_old_timeval *)optval = old_tv;
376 		return sizeof(old_tv);
377 	}
378 
379 	*(struct __kernel_sock_timeval *)optval = tv;
380 	return sizeof(tv);
381 }
382 
383 static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
384 			    bool old_timeval)
385 {
386 	struct __kernel_sock_timeval tv;
387 
388 	if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
389 		struct old_timeval32 tv32;
390 
391 		if (optlen < sizeof(tv32))
392 			return -EINVAL;
393 
394 		if (copy_from_sockptr(&tv32, optval, sizeof(tv32)))
395 			return -EFAULT;
396 		tv.tv_sec = tv32.tv_sec;
397 		tv.tv_usec = tv32.tv_usec;
398 	} else if (old_timeval) {
399 		struct __kernel_old_timeval old_tv;
400 
401 		if (optlen < sizeof(old_tv))
402 			return -EINVAL;
403 		if (copy_from_sockptr(&old_tv, optval, sizeof(old_tv)))
404 			return -EFAULT;
405 		tv.tv_sec = old_tv.tv_sec;
406 		tv.tv_usec = old_tv.tv_usec;
407 	} else {
408 		if (optlen < sizeof(tv))
409 			return -EINVAL;
410 		if (copy_from_sockptr(&tv, optval, sizeof(tv)))
411 			return -EFAULT;
412 	}
413 	if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
414 		return -EDOM;
415 
416 	if (tv.tv_sec < 0) {
417 		static int warned __read_mostly;
418 
419 		*timeo_p = 0;
420 		if (warned < 10 && net_ratelimit()) {
421 			warned++;
422 			pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
423 				__func__, current->comm, task_pid_nr(current));
424 		}
425 		return 0;
426 	}
427 	*timeo_p = MAX_SCHEDULE_TIMEOUT;
428 	if (tv.tv_sec == 0 && tv.tv_usec == 0)
429 		return 0;
430 	if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1))
431 		*timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec, USEC_PER_SEC / HZ);
432 	return 0;
433 }
434 
435 static bool sock_needs_netstamp(const struct sock *sk)
436 {
437 	switch (sk->sk_family) {
438 	case AF_UNSPEC:
439 	case AF_UNIX:
440 		return false;
441 	default:
442 		return true;
443 	}
444 }
445 
446 static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
447 {
448 	if (sk->sk_flags & flags) {
449 		sk->sk_flags &= ~flags;
450 		if (sock_needs_netstamp(sk) &&
451 		    !(sk->sk_flags & SK_FLAGS_TIMESTAMP))
452 			net_disable_timestamp();
453 	}
454 }
455 
456 
457 int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
458 {
459 	unsigned long flags;
460 	struct sk_buff_head *list = &sk->sk_receive_queue;
461 
462 	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
463 		atomic_inc(&sk->sk_drops);
464 		trace_sock_rcvqueue_full(sk, skb);
465 		return -ENOMEM;
466 	}
467 
468 	if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
469 		atomic_inc(&sk->sk_drops);
470 		return -ENOBUFS;
471 	}
472 
473 	skb->dev = NULL;
474 	skb_set_owner_r(skb, sk);
475 
476 	/* we escape from rcu protected region, make sure we dont leak
477 	 * a norefcounted dst
478 	 */
479 	skb_dst_force(skb);
480 
481 	spin_lock_irqsave(&list->lock, flags);
482 	sock_skb_set_dropcount(sk, skb);
483 	__skb_queue_tail(list, skb);
484 	spin_unlock_irqrestore(&list->lock, flags);
485 
486 	if (!sock_flag(sk, SOCK_DEAD))
487 		sk->sk_data_ready(sk);
488 	return 0;
489 }
490 EXPORT_SYMBOL(__sock_queue_rcv_skb);
491 
492 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
493 {
494 	int err;
495 
496 	err = sk_filter(sk, skb);
497 	if (err)
498 		return err;
499 
500 	return __sock_queue_rcv_skb(sk, skb);
501 }
502 EXPORT_SYMBOL(sock_queue_rcv_skb);
503 
504 int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
505 		     const int nested, unsigned int trim_cap, bool refcounted)
506 {
507 	int rc = NET_RX_SUCCESS;
508 
509 	if (sk_filter_trim_cap(sk, skb, trim_cap))
510 		goto discard_and_relse;
511 
512 	skb->dev = NULL;
513 
514 	if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
515 		atomic_inc(&sk->sk_drops);
516 		goto discard_and_relse;
517 	}
518 	if (nested)
519 		bh_lock_sock_nested(sk);
520 	else
521 		bh_lock_sock(sk);
522 	if (!sock_owned_by_user(sk)) {
523 		/*
524 		 * trylock + unlock semantics:
525 		 */
526 		mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
527 
528 		rc = sk_backlog_rcv(sk, skb);
529 
530 		mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
531 	} else if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) {
532 		bh_unlock_sock(sk);
533 		atomic_inc(&sk->sk_drops);
534 		goto discard_and_relse;
535 	}
536 
537 	bh_unlock_sock(sk);
538 out:
539 	if (refcounted)
540 		sock_put(sk);
541 	return rc;
542 discard_and_relse:
543 	kfree_skb(skb);
544 	goto out;
545 }
546 EXPORT_SYMBOL(__sk_receive_skb);
547 
548 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ip6_dst_check(struct dst_entry *,
549 							  u32));
550 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
551 							   u32));
552 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
553 {
554 	struct dst_entry *dst = __sk_dst_get(sk);
555 
556 	if (dst && dst->obsolete &&
557 	    INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check,
558 			       dst, cookie) == NULL) {
559 		sk_tx_queue_clear(sk);
560 		sk->sk_dst_pending_confirm = 0;
561 		RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
562 		dst_release(dst);
563 		return NULL;
564 	}
565 
566 	return dst;
567 }
568 EXPORT_SYMBOL(__sk_dst_check);
569 
570 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
571 {
572 	struct dst_entry *dst = sk_dst_get(sk);
573 
574 	if (dst && dst->obsolete &&
575 	    INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check,
576 			       dst, cookie) == NULL) {
577 		sk_dst_reset(sk);
578 		dst_release(dst);
579 		return NULL;
580 	}
581 
582 	return dst;
583 }
584 EXPORT_SYMBOL(sk_dst_check);
585 
586 static int sock_bindtoindex_locked(struct sock *sk, int ifindex)
587 {
588 	int ret = -ENOPROTOOPT;
589 #ifdef CONFIG_NETDEVICES
590 	struct net *net = sock_net(sk);
591 
592 	/* Sorry... */
593 	ret = -EPERM;
594 	if (sk->sk_bound_dev_if && !ns_capable(net->user_ns, CAP_NET_RAW))
595 		goto out;
596 
597 	ret = -EINVAL;
598 	if (ifindex < 0)
599 		goto out;
600 
601 	sk->sk_bound_dev_if = ifindex;
602 	if (sk->sk_prot->rehash)
603 		sk->sk_prot->rehash(sk);
604 	sk_dst_reset(sk);
605 
606 	ret = 0;
607 
608 out:
609 #endif
610 
611 	return ret;
612 }
613 
614 int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk)
615 {
616 	int ret;
617 
618 	if (lock_sk)
619 		lock_sock(sk);
620 	ret = sock_bindtoindex_locked(sk, ifindex);
621 	if (lock_sk)
622 		release_sock(sk);
623 
624 	return ret;
625 }
626 EXPORT_SYMBOL(sock_bindtoindex);
627 
628 static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen)
629 {
630 	int ret = -ENOPROTOOPT;
631 #ifdef CONFIG_NETDEVICES
632 	struct net *net = sock_net(sk);
633 	char devname[IFNAMSIZ];
634 	int index;
635 
636 	ret = -EINVAL;
637 	if (optlen < 0)
638 		goto out;
639 
640 	/* Bind this socket to a particular device like "eth0",
641 	 * as specified in the passed interface name. If the
642 	 * name is "" or the option length is zero the socket
643 	 * is not bound.
644 	 */
645 	if (optlen > IFNAMSIZ - 1)
646 		optlen = IFNAMSIZ - 1;
647 	memset(devname, 0, sizeof(devname));
648 
649 	ret = -EFAULT;
650 	if (copy_from_sockptr(devname, optval, optlen))
651 		goto out;
652 
653 	index = 0;
654 	if (devname[0] != '\0') {
655 		struct net_device *dev;
656 
657 		rcu_read_lock();
658 		dev = dev_get_by_name_rcu(net, devname);
659 		if (dev)
660 			index = dev->ifindex;
661 		rcu_read_unlock();
662 		ret = -ENODEV;
663 		if (!dev)
664 			goto out;
665 	}
666 
667 	return sock_bindtoindex(sk, index, true);
668 out:
669 #endif
670 
671 	return ret;
672 }
673 
674 static int sock_getbindtodevice(struct sock *sk, char __user *optval,
675 				int __user *optlen, int len)
676 {
677 	int ret = -ENOPROTOOPT;
678 #ifdef CONFIG_NETDEVICES
679 	struct net *net = sock_net(sk);
680 	char devname[IFNAMSIZ];
681 
682 	if (sk->sk_bound_dev_if == 0) {
683 		len = 0;
684 		goto zero;
685 	}
686 
687 	ret = -EINVAL;
688 	if (len < IFNAMSIZ)
689 		goto out;
690 
691 	ret = netdev_get_name(net, devname, sk->sk_bound_dev_if);
692 	if (ret)
693 		goto out;
694 
695 	len = strlen(devname) + 1;
696 
697 	ret = -EFAULT;
698 	if (copy_to_user(optval, devname, len))
699 		goto out;
700 
701 zero:
702 	ret = -EFAULT;
703 	if (put_user(len, optlen))
704 		goto out;
705 
706 	ret = 0;
707 
708 out:
709 #endif
710 
711 	return ret;
712 }
713 
714 bool sk_mc_loop(struct sock *sk)
715 {
716 	if (dev_recursion_level())
717 		return false;
718 	if (!sk)
719 		return true;
720 	switch (sk->sk_family) {
721 	case AF_INET:
722 		return inet_sk(sk)->mc_loop;
723 #if IS_ENABLED(CONFIG_IPV6)
724 	case AF_INET6:
725 		return inet6_sk(sk)->mc_loop;
726 #endif
727 	}
728 	WARN_ON_ONCE(1);
729 	return true;
730 }
731 EXPORT_SYMBOL(sk_mc_loop);
732 
733 void sock_set_reuseaddr(struct sock *sk)
734 {
735 	lock_sock(sk);
736 	sk->sk_reuse = SK_CAN_REUSE;
737 	release_sock(sk);
738 }
739 EXPORT_SYMBOL(sock_set_reuseaddr);
740 
741 void sock_set_reuseport(struct sock *sk)
742 {
743 	lock_sock(sk);
744 	sk->sk_reuseport = true;
745 	release_sock(sk);
746 }
747 EXPORT_SYMBOL(sock_set_reuseport);
748 
749 void sock_no_linger(struct sock *sk)
750 {
751 	lock_sock(sk);
752 	sk->sk_lingertime = 0;
753 	sock_set_flag(sk, SOCK_LINGER);
754 	release_sock(sk);
755 }
756 EXPORT_SYMBOL(sock_no_linger);
757 
758 void sock_set_priority(struct sock *sk, u32 priority)
759 {
760 	lock_sock(sk);
761 	sk->sk_priority = priority;
762 	release_sock(sk);
763 }
764 EXPORT_SYMBOL(sock_set_priority);
765 
766 void sock_set_sndtimeo(struct sock *sk, s64 secs)
767 {
768 	lock_sock(sk);
769 	if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1)
770 		sk->sk_sndtimeo = secs * HZ;
771 	else
772 		sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
773 	release_sock(sk);
774 }
775 EXPORT_SYMBOL(sock_set_sndtimeo);
776 
777 static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns)
778 {
779 	if (val)  {
780 		sock_valbool_flag(sk, SOCK_TSTAMP_NEW, new);
781 		sock_valbool_flag(sk, SOCK_RCVTSTAMPNS, ns);
782 		sock_set_flag(sk, SOCK_RCVTSTAMP);
783 		sock_enable_timestamp(sk, SOCK_TIMESTAMP);
784 	} else {
785 		sock_reset_flag(sk, SOCK_RCVTSTAMP);
786 		sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
787 	}
788 }
789 
790 void sock_enable_timestamps(struct sock *sk)
791 {
792 	lock_sock(sk);
793 	__sock_set_timestamps(sk, true, false, true);
794 	release_sock(sk);
795 }
796 EXPORT_SYMBOL(sock_enable_timestamps);
797 
798 void sock_set_timestamp(struct sock *sk, int optname, bool valbool)
799 {
800 	switch (optname) {
801 	case SO_TIMESTAMP_OLD:
802 		__sock_set_timestamps(sk, valbool, false, false);
803 		break;
804 	case SO_TIMESTAMP_NEW:
805 		__sock_set_timestamps(sk, valbool, true, false);
806 		break;
807 	case SO_TIMESTAMPNS_OLD:
808 		__sock_set_timestamps(sk, valbool, false, true);
809 		break;
810 	case SO_TIMESTAMPNS_NEW:
811 		__sock_set_timestamps(sk, valbool, true, true);
812 		break;
813 	}
814 }
815 
816 static int sock_timestamping_bind_phc(struct sock *sk, int phc_index)
817 {
818 	struct net *net = sock_net(sk);
819 	struct net_device *dev = NULL;
820 	bool match = false;
821 	int *vclock_index;
822 	int i, num;
823 
824 	if (sk->sk_bound_dev_if)
825 		dev = dev_get_by_index(net, sk->sk_bound_dev_if);
826 
827 	if (!dev) {
828 		pr_err("%s: sock not bind to device\n", __func__);
829 		return -EOPNOTSUPP;
830 	}
831 
832 	num = ethtool_get_phc_vclocks(dev, &vclock_index);
833 	for (i = 0; i < num; i++) {
834 		if (*(vclock_index + i) == phc_index) {
835 			match = true;
836 			break;
837 		}
838 	}
839 
840 	if (num > 0)
841 		kfree(vclock_index);
842 
843 	if (!match)
844 		return -EINVAL;
845 
846 	sk->sk_bind_phc = phc_index;
847 
848 	return 0;
849 }
850 
851 int sock_set_timestamping(struct sock *sk, int optname,
852 			  struct so_timestamping timestamping)
853 {
854 	int val = timestamping.flags;
855 	int ret;
856 
857 	if (val & ~SOF_TIMESTAMPING_MASK)
858 		return -EINVAL;
859 
860 	if (val & SOF_TIMESTAMPING_OPT_ID &&
861 	    !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
862 		if (sk->sk_protocol == IPPROTO_TCP &&
863 		    sk->sk_type == SOCK_STREAM) {
864 			if ((1 << sk->sk_state) &
865 			    (TCPF_CLOSE | TCPF_LISTEN))
866 				return -EINVAL;
867 			sk->sk_tskey = tcp_sk(sk)->snd_una;
868 		} else {
869 			sk->sk_tskey = 0;
870 		}
871 	}
872 
873 	if (val & SOF_TIMESTAMPING_OPT_STATS &&
874 	    !(val & SOF_TIMESTAMPING_OPT_TSONLY))
875 		return -EINVAL;
876 
877 	if (val & SOF_TIMESTAMPING_BIND_PHC) {
878 		ret = sock_timestamping_bind_phc(sk, timestamping.bind_phc);
879 		if (ret)
880 			return ret;
881 	}
882 
883 	sk->sk_tsflags = val;
884 	sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
885 
886 	if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
887 		sock_enable_timestamp(sk,
888 				      SOCK_TIMESTAMPING_RX_SOFTWARE);
889 	else
890 		sock_disable_timestamp(sk,
891 				       (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
892 	return 0;
893 }
894 
895 void sock_set_keepalive(struct sock *sk)
896 {
897 	lock_sock(sk);
898 	if (sk->sk_prot->keepalive)
899 		sk->sk_prot->keepalive(sk, true);
900 	sock_valbool_flag(sk, SOCK_KEEPOPEN, true);
901 	release_sock(sk);
902 }
903 EXPORT_SYMBOL(sock_set_keepalive);
904 
905 static void __sock_set_rcvbuf(struct sock *sk, int val)
906 {
907 	/* Ensure val * 2 fits into an int, to prevent max_t() from treating it
908 	 * as a negative value.
909 	 */
910 	val = min_t(int, val, INT_MAX / 2);
911 	sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
912 
913 	/* We double it on the way in to account for "struct sk_buff" etc.
914 	 * overhead.   Applications assume that the SO_RCVBUF setting they make
915 	 * will allow that much actual data to be received on that socket.
916 	 *
917 	 * Applications are unaware that "struct sk_buff" and other overheads
918 	 * allocate from the receive buffer during socket buffer allocation.
919 	 *
920 	 * And after considering the possible alternatives, returning the value
921 	 * we actually used in getsockopt is the most desirable behavior.
922 	 */
923 	WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF));
924 }
925 
926 void sock_set_rcvbuf(struct sock *sk, int val)
927 {
928 	lock_sock(sk);
929 	__sock_set_rcvbuf(sk, val);
930 	release_sock(sk);
931 }
932 EXPORT_SYMBOL(sock_set_rcvbuf);
933 
934 static void __sock_set_mark(struct sock *sk, u32 val)
935 {
936 	if (val != sk->sk_mark) {
937 		sk->sk_mark = val;
938 		sk_dst_reset(sk);
939 	}
940 }
941 
942 void sock_set_mark(struct sock *sk, u32 val)
943 {
944 	lock_sock(sk);
945 	__sock_set_mark(sk, val);
946 	release_sock(sk);
947 }
948 EXPORT_SYMBOL(sock_set_mark);
949 
950 /*
951  *	This is meant for all protocols to use and covers goings on
952  *	at the socket level. Everything here is generic.
953  */
954 
955 int sock_setsockopt(struct socket *sock, int level, int optname,
956 		    sockptr_t optval, unsigned int optlen)
957 {
958 	struct so_timestamping timestamping;
959 	struct sock_txtime sk_txtime;
960 	struct sock *sk = sock->sk;
961 	int val;
962 	int valbool;
963 	struct linger ling;
964 	int ret = 0;
965 
966 	/*
967 	 *	Options without arguments
968 	 */
969 
970 	if (optname == SO_BINDTODEVICE)
971 		return sock_setbindtodevice(sk, optval, optlen);
972 
973 	if (optlen < sizeof(int))
974 		return -EINVAL;
975 
976 	if (copy_from_sockptr(&val, optval, sizeof(val)))
977 		return -EFAULT;
978 
979 	valbool = val ? 1 : 0;
980 
981 	lock_sock(sk);
982 
983 	switch (optname) {
984 	case SO_DEBUG:
985 		if (val && !capable(CAP_NET_ADMIN))
986 			ret = -EACCES;
987 		else
988 			sock_valbool_flag(sk, SOCK_DBG, valbool);
989 		break;
990 	case SO_REUSEADDR:
991 		sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
992 		break;
993 	case SO_REUSEPORT:
994 		sk->sk_reuseport = valbool;
995 		break;
996 	case SO_TYPE:
997 	case SO_PROTOCOL:
998 	case SO_DOMAIN:
999 	case SO_ERROR:
1000 		ret = -ENOPROTOOPT;
1001 		break;
1002 	case SO_DONTROUTE:
1003 		sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
1004 		sk_dst_reset(sk);
1005 		break;
1006 	case SO_BROADCAST:
1007 		sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
1008 		break;
1009 	case SO_SNDBUF:
1010 		/* Don't error on this BSD doesn't and if you think
1011 		 * about it this is right. Otherwise apps have to
1012 		 * play 'guess the biggest size' games. RCVBUF/SNDBUF
1013 		 * are treated in BSD as hints
1014 		 */
1015 		val = min_t(u32, val, sysctl_wmem_max);
1016 set_sndbuf:
1017 		/* Ensure val * 2 fits into an int, to prevent max_t()
1018 		 * from treating it as a negative value.
1019 		 */
1020 		val = min_t(int, val, INT_MAX / 2);
1021 		sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
1022 		WRITE_ONCE(sk->sk_sndbuf,
1023 			   max_t(int, val * 2, SOCK_MIN_SNDBUF));
1024 		/* Wake up sending tasks if we upped the value. */
1025 		sk->sk_write_space(sk);
1026 		break;
1027 
1028 	case SO_SNDBUFFORCE:
1029 		if (!capable(CAP_NET_ADMIN)) {
1030 			ret = -EPERM;
1031 			break;
1032 		}
1033 
1034 		/* No negative values (to prevent underflow, as val will be
1035 		 * multiplied by 2).
1036 		 */
1037 		if (val < 0)
1038 			val = 0;
1039 		goto set_sndbuf;
1040 
1041 	case SO_RCVBUF:
1042 		/* Don't error on this BSD doesn't and if you think
1043 		 * about it this is right. Otherwise apps have to
1044 		 * play 'guess the biggest size' games. RCVBUF/SNDBUF
1045 		 * are treated in BSD as hints
1046 		 */
1047 		__sock_set_rcvbuf(sk, min_t(u32, val, sysctl_rmem_max));
1048 		break;
1049 
1050 	case SO_RCVBUFFORCE:
1051 		if (!capable(CAP_NET_ADMIN)) {
1052 			ret = -EPERM;
1053 			break;
1054 		}
1055 
1056 		/* No negative values (to prevent underflow, as val will be
1057 		 * multiplied by 2).
1058 		 */
1059 		__sock_set_rcvbuf(sk, max(val, 0));
1060 		break;
1061 
1062 	case SO_KEEPALIVE:
1063 		if (sk->sk_prot->keepalive)
1064 			sk->sk_prot->keepalive(sk, valbool);
1065 		sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
1066 		break;
1067 
1068 	case SO_OOBINLINE:
1069 		sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
1070 		break;
1071 
1072 	case SO_NO_CHECK:
1073 		sk->sk_no_check_tx = valbool;
1074 		break;
1075 
1076 	case SO_PRIORITY:
1077 		if ((val >= 0 && val <= 6) ||
1078 		    ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1079 			sk->sk_priority = val;
1080 		else
1081 			ret = -EPERM;
1082 		break;
1083 
1084 	case SO_LINGER:
1085 		if (optlen < sizeof(ling)) {
1086 			ret = -EINVAL;	/* 1003.1g */
1087 			break;
1088 		}
1089 		if (copy_from_sockptr(&ling, optval, sizeof(ling))) {
1090 			ret = -EFAULT;
1091 			break;
1092 		}
1093 		if (!ling.l_onoff)
1094 			sock_reset_flag(sk, SOCK_LINGER);
1095 		else {
1096 #if (BITS_PER_LONG == 32)
1097 			if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
1098 				sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
1099 			else
1100 #endif
1101 				sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
1102 			sock_set_flag(sk, SOCK_LINGER);
1103 		}
1104 		break;
1105 
1106 	case SO_BSDCOMPAT:
1107 		break;
1108 
1109 	case SO_PASSCRED:
1110 		if (valbool)
1111 			set_bit(SOCK_PASSCRED, &sock->flags);
1112 		else
1113 			clear_bit(SOCK_PASSCRED, &sock->flags);
1114 		break;
1115 
1116 	case SO_TIMESTAMP_OLD:
1117 	case SO_TIMESTAMP_NEW:
1118 	case SO_TIMESTAMPNS_OLD:
1119 	case SO_TIMESTAMPNS_NEW:
1120 		sock_set_timestamp(sk, optname, valbool);
1121 		break;
1122 
1123 	case SO_TIMESTAMPING_NEW:
1124 	case SO_TIMESTAMPING_OLD:
1125 		if (optlen == sizeof(timestamping)) {
1126 			if (copy_from_sockptr(&timestamping, optval,
1127 					      sizeof(timestamping))) {
1128 				ret = -EFAULT;
1129 				break;
1130 			}
1131 		} else {
1132 			memset(&timestamping, 0, sizeof(timestamping));
1133 			timestamping.flags = val;
1134 		}
1135 		ret = sock_set_timestamping(sk, optname, timestamping);
1136 		break;
1137 
1138 	case SO_RCVLOWAT:
1139 		if (val < 0)
1140 			val = INT_MAX;
1141 		if (sock->ops->set_rcvlowat)
1142 			ret = sock->ops->set_rcvlowat(sk, val);
1143 		else
1144 			WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
1145 		break;
1146 
1147 	case SO_RCVTIMEO_OLD:
1148 	case SO_RCVTIMEO_NEW:
1149 		ret = sock_set_timeout(&sk->sk_rcvtimeo, optval,
1150 				       optlen, optname == SO_RCVTIMEO_OLD);
1151 		break;
1152 
1153 	case SO_SNDTIMEO_OLD:
1154 	case SO_SNDTIMEO_NEW:
1155 		ret = sock_set_timeout(&sk->sk_sndtimeo, optval,
1156 				       optlen, optname == SO_SNDTIMEO_OLD);
1157 		break;
1158 
1159 	case SO_ATTACH_FILTER: {
1160 		struct sock_fprog fprog;
1161 
1162 		ret = copy_bpf_fprog_from_user(&fprog, optval, optlen);
1163 		if (!ret)
1164 			ret = sk_attach_filter(&fprog, sk);
1165 		break;
1166 	}
1167 	case SO_ATTACH_BPF:
1168 		ret = -EINVAL;
1169 		if (optlen == sizeof(u32)) {
1170 			u32 ufd;
1171 
1172 			ret = -EFAULT;
1173 			if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
1174 				break;
1175 
1176 			ret = sk_attach_bpf(ufd, sk);
1177 		}
1178 		break;
1179 
1180 	case SO_ATTACH_REUSEPORT_CBPF: {
1181 		struct sock_fprog fprog;
1182 
1183 		ret = copy_bpf_fprog_from_user(&fprog, optval, optlen);
1184 		if (!ret)
1185 			ret = sk_reuseport_attach_filter(&fprog, sk);
1186 		break;
1187 	}
1188 	case SO_ATTACH_REUSEPORT_EBPF:
1189 		ret = -EINVAL;
1190 		if (optlen == sizeof(u32)) {
1191 			u32 ufd;
1192 
1193 			ret = -EFAULT;
1194 			if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
1195 				break;
1196 
1197 			ret = sk_reuseport_attach_bpf(ufd, sk);
1198 		}
1199 		break;
1200 
1201 	case SO_DETACH_REUSEPORT_BPF:
1202 		ret = reuseport_detach_prog(sk);
1203 		break;
1204 
1205 	case SO_DETACH_FILTER:
1206 		ret = sk_detach_filter(sk);
1207 		break;
1208 
1209 	case SO_LOCK_FILTER:
1210 		if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
1211 			ret = -EPERM;
1212 		else
1213 			sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
1214 		break;
1215 
1216 	case SO_PASSSEC:
1217 		if (valbool)
1218 			set_bit(SOCK_PASSSEC, &sock->flags);
1219 		else
1220 			clear_bit(SOCK_PASSSEC, &sock->flags);
1221 		break;
1222 	case SO_MARK:
1223 		if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1224 			ret = -EPERM;
1225 			break;
1226 		}
1227 
1228 		__sock_set_mark(sk, val);
1229 		break;
1230 
1231 	case SO_RXQ_OVFL:
1232 		sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
1233 		break;
1234 
1235 	case SO_WIFI_STATUS:
1236 		sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
1237 		break;
1238 
1239 	case SO_PEEK_OFF:
1240 		if (sock->ops->set_peek_off)
1241 			ret = sock->ops->set_peek_off(sk, val);
1242 		else
1243 			ret = -EOPNOTSUPP;
1244 		break;
1245 
1246 	case SO_NOFCS:
1247 		sock_valbool_flag(sk, SOCK_NOFCS, valbool);
1248 		break;
1249 
1250 	case SO_SELECT_ERR_QUEUE:
1251 		sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
1252 		break;
1253 
1254 #ifdef CONFIG_NET_RX_BUSY_POLL
1255 	case SO_BUSY_POLL:
1256 		/* allow unprivileged users to decrease the value */
1257 		if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN))
1258 			ret = -EPERM;
1259 		else {
1260 			if (val < 0)
1261 				ret = -EINVAL;
1262 			else
1263 				WRITE_ONCE(sk->sk_ll_usec, val);
1264 		}
1265 		break;
1266 	case SO_PREFER_BUSY_POLL:
1267 		if (valbool && !capable(CAP_NET_ADMIN))
1268 			ret = -EPERM;
1269 		else
1270 			WRITE_ONCE(sk->sk_prefer_busy_poll, valbool);
1271 		break;
1272 	case SO_BUSY_POLL_BUDGET:
1273 		if (val > READ_ONCE(sk->sk_busy_poll_budget) && !capable(CAP_NET_ADMIN)) {
1274 			ret = -EPERM;
1275 		} else {
1276 			if (val < 0 || val > U16_MAX)
1277 				ret = -EINVAL;
1278 			else
1279 				WRITE_ONCE(sk->sk_busy_poll_budget, val);
1280 		}
1281 		break;
1282 #endif
1283 
1284 	case SO_MAX_PACING_RATE:
1285 		{
1286 		unsigned long ulval = (val == ~0U) ? ~0UL : (unsigned int)val;
1287 
1288 		if (sizeof(ulval) != sizeof(val) &&
1289 		    optlen >= sizeof(ulval) &&
1290 		    copy_from_sockptr(&ulval, optval, sizeof(ulval))) {
1291 			ret = -EFAULT;
1292 			break;
1293 		}
1294 		if (ulval != ~0UL)
1295 			cmpxchg(&sk->sk_pacing_status,
1296 				SK_PACING_NONE,
1297 				SK_PACING_NEEDED);
1298 		sk->sk_max_pacing_rate = ulval;
1299 		sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval);
1300 		break;
1301 		}
1302 	case SO_INCOMING_CPU:
1303 		WRITE_ONCE(sk->sk_incoming_cpu, val);
1304 		break;
1305 
1306 	case SO_CNX_ADVICE:
1307 		if (val == 1)
1308 			dst_negative_advice(sk);
1309 		break;
1310 
1311 	case SO_ZEROCOPY:
1312 		if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
1313 			if (!((sk->sk_type == SOCK_STREAM &&
1314 			       sk->sk_protocol == IPPROTO_TCP) ||
1315 			      (sk->sk_type == SOCK_DGRAM &&
1316 			       sk->sk_protocol == IPPROTO_UDP)))
1317 				ret = -ENOTSUPP;
1318 		} else if (sk->sk_family != PF_RDS) {
1319 			ret = -ENOTSUPP;
1320 		}
1321 		if (!ret) {
1322 			if (val < 0 || val > 1)
1323 				ret = -EINVAL;
1324 			else
1325 				sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
1326 		}
1327 		break;
1328 
1329 	case SO_TXTIME:
1330 		if (optlen != sizeof(struct sock_txtime)) {
1331 			ret = -EINVAL;
1332 			break;
1333 		} else if (copy_from_sockptr(&sk_txtime, optval,
1334 			   sizeof(struct sock_txtime))) {
1335 			ret = -EFAULT;
1336 			break;
1337 		} else if (sk_txtime.flags & ~SOF_TXTIME_FLAGS_MASK) {
1338 			ret = -EINVAL;
1339 			break;
1340 		}
1341 		/* CLOCK_MONOTONIC is only used by sch_fq, and this packet
1342 		 * scheduler has enough safe guards.
1343 		 */
1344 		if (sk_txtime.clockid != CLOCK_MONOTONIC &&
1345 		    !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1346 			ret = -EPERM;
1347 			break;
1348 		}
1349 		sock_valbool_flag(sk, SOCK_TXTIME, true);
1350 		sk->sk_clockid = sk_txtime.clockid;
1351 		sk->sk_txtime_deadline_mode =
1352 			!!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE);
1353 		sk->sk_txtime_report_errors =
1354 			!!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS);
1355 		break;
1356 
1357 	case SO_BINDTOIFINDEX:
1358 		ret = sock_bindtoindex_locked(sk, val);
1359 		break;
1360 
1361 	case SO_BUF_LOCK:
1362 		if (val & ~SOCK_BUF_LOCK_MASK) {
1363 			ret = -EINVAL;
1364 			break;
1365 		}
1366 		sk->sk_userlocks = val | (sk->sk_userlocks &
1367 					  ~SOCK_BUF_LOCK_MASK);
1368 		break;
1369 
1370 	default:
1371 		ret = -ENOPROTOOPT;
1372 		break;
1373 	}
1374 	release_sock(sk);
1375 	return ret;
1376 }
1377 EXPORT_SYMBOL(sock_setsockopt);
1378 
1379 
1380 static void cred_to_ucred(struct pid *pid, const struct cred *cred,
1381 			  struct ucred *ucred)
1382 {
1383 	ucred->pid = pid_vnr(pid);
1384 	ucred->uid = ucred->gid = -1;
1385 	if (cred) {
1386 		struct user_namespace *current_ns = current_user_ns();
1387 
1388 		ucred->uid = from_kuid_munged(current_ns, cred->euid);
1389 		ucred->gid = from_kgid_munged(current_ns, cred->egid);
1390 	}
1391 }
1392 
1393 static int groups_to_user(gid_t __user *dst, const struct group_info *src)
1394 {
1395 	struct user_namespace *user_ns = current_user_ns();
1396 	int i;
1397 
1398 	for (i = 0; i < src->ngroups; i++)
1399 		if (put_user(from_kgid_munged(user_ns, src->gid[i]), dst + i))
1400 			return -EFAULT;
1401 
1402 	return 0;
1403 }
1404 
1405 int sock_getsockopt(struct socket *sock, int level, int optname,
1406 		    char __user *optval, int __user *optlen)
1407 {
1408 	struct sock *sk = sock->sk;
1409 
1410 	union {
1411 		int val;
1412 		u64 val64;
1413 		unsigned long ulval;
1414 		struct linger ling;
1415 		struct old_timeval32 tm32;
1416 		struct __kernel_old_timeval tm;
1417 		struct  __kernel_sock_timeval stm;
1418 		struct sock_txtime txtime;
1419 		struct so_timestamping timestamping;
1420 	} v;
1421 
1422 	int lv = sizeof(int);
1423 	int len;
1424 
1425 	if (get_user(len, optlen))
1426 		return -EFAULT;
1427 	if (len < 0)
1428 		return -EINVAL;
1429 
1430 	memset(&v, 0, sizeof(v));
1431 
1432 	switch (optname) {
1433 	case SO_DEBUG:
1434 		v.val = sock_flag(sk, SOCK_DBG);
1435 		break;
1436 
1437 	case SO_DONTROUTE:
1438 		v.val = sock_flag(sk, SOCK_LOCALROUTE);
1439 		break;
1440 
1441 	case SO_BROADCAST:
1442 		v.val = sock_flag(sk, SOCK_BROADCAST);
1443 		break;
1444 
1445 	case SO_SNDBUF:
1446 		v.val = sk->sk_sndbuf;
1447 		break;
1448 
1449 	case SO_RCVBUF:
1450 		v.val = sk->sk_rcvbuf;
1451 		break;
1452 
1453 	case SO_REUSEADDR:
1454 		v.val = sk->sk_reuse;
1455 		break;
1456 
1457 	case SO_REUSEPORT:
1458 		v.val = sk->sk_reuseport;
1459 		break;
1460 
1461 	case SO_KEEPALIVE:
1462 		v.val = sock_flag(sk, SOCK_KEEPOPEN);
1463 		break;
1464 
1465 	case SO_TYPE:
1466 		v.val = sk->sk_type;
1467 		break;
1468 
1469 	case SO_PROTOCOL:
1470 		v.val = sk->sk_protocol;
1471 		break;
1472 
1473 	case SO_DOMAIN:
1474 		v.val = sk->sk_family;
1475 		break;
1476 
1477 	case SO_ERROR:
1478 		v.val = -sock_error(sk);
1479 		if (v.val == 0)
1480 			v.val = xchg(&sk->sk_err_soft, 0);
1481 		break;
1482 
1483 	case SO_OOBINLINE:
1484 		v.val = sock_flag(sk, SOCK_URGINLINE);
1485 		break;
1486 
1487 	case SO_NO_CHECK:
1488 		v.val = sk->sk_no_check_tx;
1489 		break;
1490 
1491 	case SO_PRIORITY:
1492 		v.val = sk->sk_priority;
1493 		break;
1494 
1495 	case SO_LINGER:
1496 		lv		= sizeof(v.ling);
1497 		v.ling.l_onoff	= sock_flag(sk, SOCK_LINGER);
1498 		v.ling.l_linger	= sk->sk_lingertime / HZ;
1499 		break;
1500 
1501 	case SO_BSDCOMPAT:
1502 		break;
1503 
1504 	case SO_TIMESTAMP_OLD:
1505 		v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
1506 				!sock_flag(sk, SOCK_TSTAMP_NEW) &&
1507 				!sock_flag(sk, SOCK_RCVTSTAMPNS);
1508 		break;
1509 
1510 	case SO_TIMESTAMPNS_OLD:
1511 		v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && !sock_flag(sk, SOCK_TSTAMP_NEW);
1512 		break;
1513 
1514 	case SO_TIMESTAMP_NEW:
1515 		v.val = sock_flag(sk, SOCK_RCVTSTAMP) && sock_flag(sk, SOCK_TSTAMP_NEW);
1516 		break;
1517 
1518 	case SO_TIMESTAMPNS_NEW:
1519 		v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && sock_flag(sk, SOCK_TSTAMP_NEW);
1520 		break;
1521 
1522 	case SO_TIMESTAMPING_OLD:
1523 		lv = sizeof(v.timestamping);
1524 		v.timestamping.flags = sk->sk_tsflags;
1525 		v.timestamping.bind_phc = sk->sk_bind_phc;
1526 		break;
1527 
1528 	case SO_RCVTIMEO_OLD:
1529 	case SO_RCVTIMEO_NEW:
1530 		lv = sock_get_timeout(sk->sk_rcvtimeo, &v, SO_RCVTIMEO_OLD == optname);
1531 		break;
1532 
1533 	case SO_SNDTIMEO_OLD:
1534 	case SO_SNDTIMEO_NEW:
1535 		lv = sock_get_timeout(sk->sk_sndtimeo, &v, SO_SNDTIMEO_OLD == optname);
1536 		break;
1537 
1538 	case SO_RCVLOWAT:
1539 		v.val = sk->sk_rcvlowat;
1540 		break;
1541 
1542 	case SO_SNDLOWAT:
1543 		v.val = 1;
1544 		break;
1545 
1546 	case SO_PASSCRED:
1547 		v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
1548 		break;
1549 
1550 	case SO_PEERCRED:
1551 	{
1552 		struct ucred peercred;
1553 		if (len > sizeof(peercred))
1554 			len = sizeof(peercred);
1555 		cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
1556 		if (copy_to_user(optval, &peercred, len))
1557 			return -EFAULT;
1558 		goto lenout;
1559 	}
1560 
1561 	case SO_PEERGROUPS:
1562 	{
1563 		int ret, n;
1564 
1565 		if (!sk->sk_peer_cred)
1566 			return -ENODATA;
1567 
1568 		n = sk->sk_peer_cred->group_info->ngroups;
1569 		if (len < n * sizeof(gid_t)) {
1570 			len = n * sizeof(gid_t);
1571 			return put_user(len, optlen) ? -EFAULT : -ERANGE;
1572 		}
1573 		len = n * sizeof(gid_t);
1574 
1575 		ret = groups_to_user((gid_t __user *)optval,
1576 				     sk->sk_peer_cred->group_info);
1577 		if (ret)
1578 			return ret;
1579 		goto lenout;
1580 	}
1581 
1582 	case SO_PEERNAME:
1583 	{
1584 		char address[128];
1585 
1586 		lv = sock->ops->getname(sock, (struct sockaddr *)address, 2);
1587 		if (lv < 0)
1588 			return -ENOTCONN;
1589 		if (lv < len)
1590 			return -EINVAL;
1591 		if (copy_to_user(optval, address, len))
1592 			return -EFAULT;
1593 		goto lenout;
1594 	}
1595 
1596 	/* Dubious BSD thing... Probably nobody even uses it, but
1597 	 * the UNIX standard wants it for whatever reason... -DaveM
1598 	 */
1599 	case SO_ACCEPTCONN:
1600 		v.val = sk->sk_state == TCP_LISTEN;
1601 		break;
1602 
1603 	case SO_PASSSEC:
1604 		v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
1605 		break;
1606 
1607 	case SO_PEERSEC:
1608 		return security_socket_getpeersec_stream(sock, optval, optlen, len);
1609 
1610 	case SO_MARK:
1611 		v.val = sk->sk_mark;
1612 		break;
1613 
1614 	case SO_RXQ_OVFL:
1615 		v.val = sock_flag(sk, SOCK_RXQ_OVFL);
1616 		break;
1617 
1618 	case SO_WIFI_STATUS:
1619 		v.val = sock_flag(sk, SOCK_WIFI_STATUS);
1620 		break;
1621 
1622 	case SO_PEEK_OFF:
1623 		if (!sock->ops->set_peek_off)
1624 			return -EOPNOTSUPP;
1625 
1626 		v.val = sk->sk_peek_off;
1627 		break;
1628 	case SO_NOFCS:
1629 		v.val = sock_flag(sk, SOCK_NOFCS);
1630 		break;
1631 
1632 	case SO_BINDTODEVICE:
1633 		return sock_getbindtodevice(sk, optval, optlen, len);
1634 
1635 	case SO_GET_FILTER:
1636 		len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
1637 		if (len < 0)
1638 			return len;
1639 
1640 		goto lenout;
1641 
1642 	case SO_LOCK_FILTER:
1643 		v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1644 		break;
1645 
1646 	case SO_BPF_EXTENSIONS:
1647 		v.val = bpf_tell_extensions();
1648 		break;
1649 
1650 	case SO_SELECT_ERR_QUEUE:
1651 		v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
1652 		break;
1653 
1654 #ifdef CONFIG_NET_RX_BUSY_POLL
1655 	case SO_BUSY_POLL:
1656 		v.val = sk->sk_ll_usec;
1657 		break;
1658 	case SO_PREFER_BUSY_POLL:
1659 		v.val = READ_ONCE(sk->sk_prefer_busy_poll);
1660 		break;
1661 #endif
1662 
1663 	case SO_MAX_PACING_RATE:
1664 		if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) {
1665 			lv = sizeof(v.ulval);
1666 			v.ulval = sk->sk_max_pacing_rate;
1667 		} else {
1668 			/* 32bit version */
1669 			v.val = min_t(unsigned long, sk->sk_max_pacing_rate, ~0U);
1670 		}
1671 		break;
1672 
1673 	case SO_INCOMING_CPU:
1674 		v.val = READ_ONCE(sk->sk_incoming_cpu);
1675 		break;
1676 
1677 	case SO_MEMINFO:
1678 	{
1679 		u32 meminfo[SK_MEMINFO_VARS];
1680 
1681 		sk_get_meminfo(sk, meminfo);
1682 
1683 		len = min_t(unsigned int, len, sizeof(meminfo));
1684 		if (copy_to_user(optval, &meminfo, len))
1685 			return -EFAULT;
1686 
1687 		goto lenout;
1688 	}
1689 
1690 #ifdef CONFIG_NET_RX_BUSY_POLL
1691 	case SO_INCOMING_NAPI_ID:
1692 		v.val = READ_ONCE(sk->sk_napi_id);
1693 
1694 		/* aggregate non-NAPI IDs down to 0 */
1695 		if (v.val < MIN_NAPI_ID)
1696 			v.val = 0;
1697 
1698 		break;
1699 #endif
1700 
1701 	case SO_COOKIE:
1702 		lv = sizeof(u64);
1703 		if (len < lv)
1704 			return -EINVAL;
1705 		v.val64 = sock_gen_cookie(sk);
1706 		break;
1707 
1708 	case SO_ZEROCOPY:
1709 		v.val = sock_flag(sk, SOCK_ZEROCOPY);
1710 		break;
1711 
1712 	case SO_TXTIME:
1713 		lv = sizeof(v.txtime);
1714 		v.txtime.clockid = sk->sk_clockid;
1715 		v.txtime.flags |= sk->sk_txtime_deadline_mode ?
1716 				  SOF_TXTIME_DEADLINE_MODE : 0;
1717 		v.txtime.flags |= sk->sk_txtime_report_errors ?
1718 				  SOF_TXTIME_REPORT_ERRORS : 0;
1719 		break;
1720 
1721 	case SO_BINDTOIFINDEX:
1722 		v.val = sk->sk_bound_dev_if;
1723 		break;
1724 
1725 	case SO_NETNS_COOKIE:
1726 		lv = sizeof(u64);
1727 		if (len != lv)
1728 			return -EINVAL;
1729 		v.val64 = sock_net(sk)->net_cookie;
1730 		break;
1731 
1732 	case SO_BUF_LOCK:
1733 		v.val = sk->sk_userlocks & SOCK_BUF_LOCK_MASK;
1734 		break;
1735 
1736 	default:
1737 		/* We implement the SO_SNDLOWAT etc to not be settable
1738 		 * (1003.1g 7).
1739 		 */
1740 		return -ENOPROTOOPT;
1741 	}
1742 
1743 	if (len > lv)
1744 		len = lv;
1745 	if (copy_to_user(optval, &v, len))
1746 		return -EFAULT;
1747 lenout:
1748 	if (put_user(len, optlen))
1749 		return -EFAULT;
1750 	return 0;
1751 }
1752 
1753 /*
1754  * Initialize an sk_lock.
1755  *
1756  * (We also register the sk_lock with the lock validator.)
1757  */
1758 static inline void sock_lock_init(struct sock *sk)
1759 {
1760 	if (sk->sk_kern_sock)
1761 		sock_lock_init_class_and_name(
1762 			sk,
1763 			af_family_kern_slock_key_strings[sk->sk_family],
1764 			af_family_kern_slock_keys + sk->sk_family,
1765 			af_family_kern_key_strings[sk->sk_family],
1766 			af_family_kern_keys + sk->sk_family);
1767 	else
1768 		sock_lock_init_class_and_name(
1769 			sk,
1770 			af_family_slock_key_strings[sk->sk_family],
1771 			af_family_slock_keys + sk->sk_family,
1772 			af_family_key_strings[sk->sk_family],
1773 			af_family_keys + sk->sk_family);
1774 }
1775 
1776 /*
1777  * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
1778  * even temporarly, because of RCU lookups. sk_node should also be left as is.
1779  * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
1780  */
1781 static void sock_copy(struct sock *nsk, const struct sock *osk)
1782 {
1783 	const struct proto *prot = READ_ONCE(osk->sk_prot);
1784 #ifdef CONFIG_SECURITY_NETWORK
1785 	void *sptr = nsk->sk_security;
1786 #endif
1787 
1788 	/* If we move sk_tx_queue_mapping out of the private section,
1789 	 * we must check if sk_tx_queue_clear() is called after
1790 	 * sock_copy() in sk_clone_lock().
1791 	 */
1792 	BUILD_BUG_ON(offsetof(struct sock, sk_tx_queue_mapping) <
1793 		     offsetof(struct sock, sk_dontcopy_begin) ||
1794 		     offsetof(struct sock, sk_tx_queue_mapping) >=
1795 		     offsetof(struct sock, sk_dontcopy_end));
1796 
1797 	memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
1798 
1799 	memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
1800 	       prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
1801 
1802 #ifdef CONFIG_SECURITY_NETWORK
1803 	nsk->sk_security = sptr;
1804 	security_sk_clone(osk, nsk);
1805 #endif
1806 }
1807 
1808 static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1809 		int family)
1810 {
1811 	struct sock *sk;
1812 	struct kmem_cache *slab;
1813 
1814 	slab = prot->slab;
1815 	if (slab != NULL) {
1816 		sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
1817 		if (!sk)
1818 			return sk;
1819 		if (want_init_on_alloc(priority))
1820 			sk_prot_clear_nulls(sk, prot->obj_size);
1821 	} else
1822 		sk = kmalloc(prot->obj_size, priority);
1823 
1824 	if (sk != NULL) {
1825 		if (security_sk_alloc(sk, family, priority))
1826 			goto out_free;
1827 
1828 		if (!try_module_get(prot->owner))
1829 			goto out_free_sec;
1830 	}
1831 
1832 	return sk;
1833 
1834 out_free_sec:
1835 	security_sk_free(sk);
1836 out_free:
1837 	if (slab != NULL)
1838 		kmem_cache_free(slab, sk);
1839 	else
1840 		kfree(sk);
1841 	return NULL;
1842 }
1843 
1844 static void sk_prot_free(struct proto *prot, struct sock *sk)
1845 {
1846 	struct kmem_cache *slab;
1847 	struct module *owner;
1848 
1849 	owner = prot->owner;
1850 	slab = prot->slab;
1851 
1852 	cgroup_sk_free(&sk->sk_cgrp_data);
1853 	mem_cgroup_sk_free(sk);
1854 	security_sk_free(sk);
1855 	if (slab != NULL)
1856 		kmem_cache_free(slab, sk);
1857 	else
1858 		kfree(sk);
1859 	module_put(owner);
1860 }
1861 
1862 /**
1863  *	sk_alloc - All socket objects are allocated here
1864  *	@net: the applicable net namespace
1865  *	@family: protocol family
1866  *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1867  *	@prot: struct proto associated with this new sock instance
1868  *	@kern: is this to be a kernel socket?
1869  */
1870 struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1871 		      struct proto *prot, int kern)
1872 {
1873 	struct sock *sk;
1874 
1875 	sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
1876 	if (sk) {
1877 		sk->sk_family = family;
1878 		/*
1879 		 * See comment in struct sock definition to understand
1880 		 * why we need sk_prot_creator -acme
1881 		 */
1882 		sk->sk_prot = sk->sk_prot_creator = prot;
1883 		sk->sk_kern_sock = kern;
1884 		sock_lock_init(sk);
1885 		sk->sk_net_refcnt = kern ? 0 : 1;
1886 		if (likely(sk->sk_net_refcnt)) {
1887 			get_net(net);
1888 			sock_inuse_add(net, 1);
1889 		}
1890 
1891 		sock_net_set(sk, net);
1892 		refcount_set(&sk->sk_wmem_alloc, 1);
1893 
1894 		mem_cgroup_sk_alloc(sk);
1895 		cgroup_sk_alloc(&sk->sk_cgrp_data);
1896 		sock_update_classid(&sk->sk_cgrp_data);
1897 		sock_update_netprioidx(&sk->sk_cgrp_data);
1898 		sk_tx_queue_clear(sk);
1899 	}
1900 
1901 	return sk;
1902 }
1903 EXPORT_SYMBOL(sk_alloc);
1904 
1905 /* Sockets having SOCK_RCU_FREE will call this function after one RCU
1906  * grace period. This is the case for UDP sockets and TCP listeners.
1907  */
1908 static void __sk_destruct(struct rcu_head *head)
1909 {
1910 	struct sock *sk = container_of(head, struct sock, sk_rcu);
1911 	struct sk_filter *filter;
1912 
1913 	if (sk->sk_destruct)
1914 		sk->sk_destruct(sk);
1915 
1916 	filter = rcu_dereference_check(sk->sk_filter,
1917 				       refcount_read(&sk->sk_wmem_alloc) == 0);
1918 	if (filter) {
1919 		sk_filter_uncharge(sk, filter);
1920 		RCU_INIT_POINTER(sk->sk_filter, NULL);
1921 	}
1922 
1923 	sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
1924 
1925 #ifdef CONFIG_BPF_SYSCALL
1926 	bpf_sk_storage_free(sk);
1927 #endif
1928 
1929 	if (atomic_read(&sk->sk_omem_alloc))
1930 		pr_debug("%s: optmem leakage (%d bytes) detected\n",
1931 			 __func__, atomic_read(&sk->sk_omem_alloc));
1932 
1933 	if (sk->sk_frag.page) {
1934 		put_page(sk->sk_frag.page);
1935 		sk->sk_frag.page = NULL;
1936 	}
1937 
1938 	if (sk->sk_peer_cred)
1939 		put_cred(sk->sk_peer_cred);
1940 	put_pid(sk->sk_peer_pid);
1941 	if (likely(sk->sk_net_refcnt))
1942 		put_net(sock_net(sk));
1943 	sk_prot_free(sk->sk_prot_creator, sk);
1944 }
1945 
1946 void sk_destruct(struct sock *sk)
1947 {
1948 	bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE);
1949 
1950 	if (rcu_access_pointer(sk->sk_reuseport_cb)) {
1951 		reuseport_detach_sock(sk);
1952 		use_call_rcu = true;
1953 	}
1954 
1955 	if (use_call_rcu)
1956 		call_rcu(&sk->sk_rcu, __sk_destruct);
1957 	else
1958 		__sk_destruct(&sk->sk_rcu);
1959 }
1960 
1961 static void __sk_free(struct sock *sk)
1962 {
1963 	if (likely(sk->sk_net_refcnt))
1964 		sock_inuse_add(sock_net(sk), -1);
1965 
1966 	if (unlikely(sk->sk_net_refcnt && sock_diag_has_destroy_listeners(sk)))
1967 		sock_diag_broadcast_destroy(sk);
1968 	else
1969 		sk_destruct(sk);
1970 }
1971 
1972 void sk_free(struct sock *sk)
1973 {
1974 	/*
1975 	 * We subtract one from sk_wmem_alloc and can know if
1976 	 * some packets are still in some tx queue.
1977 	 * If not null, sock_wfree() will call __sk_free(sk) later
1978 	 */
1979 	if (refcount_dec_and_test(&sk->sk_wmem_alloc))
1980 		__sk_free(sk);
1981 }
1982 EXPORT_SYMBOL(sk_free);
1983 
1984 static void sk_init_common(struct sock *sk)
1985 {
1986 	skb_queue_head_init(&sk->sk_receive_queue);
1987 	skb_queue_head_init(&sk->sk_write_queue);
1988 	skb_queue_head_init(&sk->sk_error_queue);
1989 
1990 	rwlock_init(&sk->sk_callback_lock);
1991 	lockdep_set_class_and_name(&sk->sk_receive_queue.lock,
1992 			af_rlock_keys + sk->sk_family,
1993 			af_family_rlock_key_strings[sk->sk_family]);
1994 	lockdep_set_class_and_name(&sk->sk_write_queue.lock,
1995 			af_wlock_keys + sk->sk_family,
1996 			af_family_wlock_key_strings[sk->sk_family]);
1997 	lockdep_set_class_and_name(&sk->sk_error_queue.lock,
1998 			af_elock_keys + sk->sk_family,
1999 			af_family_elock_key_strings[sk->sk_family]);
2000 	lockdep_set_class_and_name(&sk->sk_callback_lock,
2001 			af_callback_keys + sk->sk_family,
2002 			af_family_clock_key_strings[sk->sk_family]);
2003 }
2004 
2005 /**
2006  *	sk_clone_lock - clone a socket, and lock its clone
2007  *	@sk: the socket to clone
2008  *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
2009  *
2010  *	Caller must unlock socket even in error path (bh_unlock_sock(newsk))
2011  */
2012 struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
2013 {
2014 	struct proto *prot = READ_ONCE(sk->sk_prot);
2015 	struct sk_filter *filter;
2016 	bool is_charged = true;
2017 	struct sock *newsk;
2018 
2019 	newsk = sk_prot_alloc(prot, priority, sk->sk_family);
2020 	if (!newsk)
2021 		goto out;
2022 
2023 	sock_copy(newsk, sk);
2024 
2025 	newsk->sk_prot_creator = prot;
2026 
2027 	/* SANITY */
2028 	if (likely(newsk->sk_net_refcnt))
2029 		get_net(sock_net(newsk));
2030 	sk_node_init(&newsk->sk_node);
2031 	sock_lock_init(newsk);
2032 	bh_lock_sock(newsk);
2033 	newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
2034 	newsk->sk_backlog.len = 0;
2035 
2036 	atomic_set(&newsk->sk_rmem_alloc, 0);
2037 
2038 	/* sk_wmem_alloc set to one (see sk_free() and sock_wfree()) */
2039 	refcount_set(&newsk->sk_wmem_alloc, 1);
2040 
2041 	atomic_set(&newsk->sk_omem_alloc, 0);
2042 	sk_init_common(newsk);
2043 
2044 	newsk->sk_dst_cache	= NULL;
2045 	newsk->sk_dst_pending_confirm = 0;
2046 	newsk->sk_wmem_queued	= 0;
2047 	newsk->sk_forward_alloc = 0;
2048 	atomic_set(&newsk->sk_drops, 0);
2049 	newsk->sk_send_head	= NULL;
2050 	newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
2051 	atomic_set(&newsk->sk_zckey, 0);
2052 
2053 	sock_reset_flag(newsk, SOCK_DONE);
2054 
2055 	/* sk->sk_memcg will be populated at accept() time */
2056 	newsk->sk_memcg = NULL;
2057 
2058 	cgroup_sk_clone(&newsk->sk_cgrp_data);
2059 
2060 	rcu_read_lock();
2061 	filter = rcu_dereference(sk->sk_filter);
2062 	if (filter != NULL)
2063 		/* though it's an empty new sock, the charging may fail
2064 		 * if sysctl_optmem_max was changed between creation of
2065 		 * original socket and cloning
2066 		 */
2067 		is_charged = sk_filter_charge(newsk, filter);
2068 	RCU_INIT_POINTER(newsk->sk_filter, filter);
2069 	rcu_read_unlock();
2070 
2071 	if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
2072 		/* We need to make sure that we don't uncharge the new
2073 		 * socket if we couldn't charge it in the first place
2074 		 * as otherwise we uncharge the parent's filter.
2075 		 */
2076 		if (!is_charged)
2077 			RCU_INIT_POINTER(newsk->sk_filter, NULL);
2078 		sk_free_unlock_clone(newsk);
2079 		newsk = NULL;
2080 		goto out;
2081 	}
2082 	RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
2083 
2084 	if (bpf_sk_storage_clone(sk, newsk)) {
2085 		sk_free_unlock_clone(newsk);
2086 		newsk = NULL;
2087 		goto out;
2088 	}
2089 
2090 	/* Clear sk_user_data if parent had the pointer tagged
2091 	 * as not suitable for copying when cloning.
2092 	 */
2093 	if (sk_user_data_is_nocopy(newsk))
2094 		newsk->sk_user_data = NULL;
2095 
2096 	newsk->sk_err	   = 0;
2097 	newsk->sk_err_soft = 0;
2098 	newsk->sk_priority = 0;
2099 	newsk->sk_incoming_cpu = raw_smp_processor_id();
2100 	if (likely(newsk->sk_net_refcnt))
2101 		sock_inuse_add(sock_net(newsk), 1);
2102 
2103 	/* Before updating sk_refcnt, we must commit prior changes to memory
2104 	 * (Documentation/RCU/rculist_nulls.rst for details)
2105 	 */
2106 	smp_wmb();
2107 	refcount_set(&newsk->sk_refcnt, 2);
2108 
2109 	/* Increment the counter in the same struct proto as the master
2110 	 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
2111 	 * is the same as sk->sk_prot->socks, as this field was copied
2112 	 * with memcpy).
2113 	 *
2114 	 * This _changes_ the previous behaviour, where
2115 	 * tcp_create_openreq_child always was incrementing the
2116 	 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
2117 	 * to be taken into account in all callers. -acme
2118 	 */
2119 	sk_refcnt_debug_inc(newsk);
2120 	sk_set_socket(newsk, NULL);
2121 	sk_tx_queue_clear(newsk);
2122 	RCU_INIT_POINTER(newsk->sk_wq, NULL);
2123 
2124 	if (newsk->sk_prot->sockets_allocated)
2125 		sk_sockets_allocated_inc(newsk);
2126 
2127 	if (sock_needs_netstamp(sk) && newsk->sk_flags & SK_FLAGS_TIMESTAMP)
2128 		net_enable_timestamp();
2129 out:
2130 	return newsk;
2131 }
2132 EXPORT_SYMBOL_GPL(sk_clone_lock);
2133 
2134 void sk_free_unlock_clone(struct sock *sk)
2135 {
2136 	/* It is still raw copy of parent, so invalidate
2137 	 * destructor and make plain sk_free() */
2138 	sk->sk_destruct = NULL;
2139 	bh_unlock_sock(sk);
2140 	sk_free(sk);
2141 }
2142 EXPORT_SYMBOL_GPL(sk_free_unlock_clone);
2143 
2144 void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
2145 {
2146 	u32 max_segs = 1;
2147 
2148 	sk_dst_set(sk, dst);
2149 	sk->sk_route_caps = dst->dev->features | sk->sk_route_forced_caps;
2150 	if (sk->sk_route_caps & NETIF_F_GSO)
2151 		sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
2152 	sk->sk_route_caps &= ~sk->sk_route_nocaps;
2153 	if (sk_can_gso(sk)) {
2154 		if (dst->header_len && !xfrm_dst_offload_ok(dst)) {
2155 			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
2156 		} else {
2157 			sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
2158 			sk->sk_gso_max_size = dst->dev->gso_max_size;
2159 			max_segs = max_t(u32, dst->dev->gso_max_segs, 1);
2160 		}
2161 	}
2162 	sk->sk_gso_max_segs = max_segs;
2163 }
2164 EXPORT_SYMBOL_GPL(sk_setup_caps);
2165 
2166 /*
2167  *	Simple resource managers for sockets.
2168  */
2169 
2170 
2171 /*
2172  * Write buffer destructor automatically called from kfree_skb.
2173  */
2174 void sock_wfree(struct sk_buff *skb)
2175 {
2176 	struct sock *sk = skb->sk;
2177 	unsigned int len = skb->truesize;
2178 
2179 	if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
2180 		/*
2181 		 * Keep a reference on sk_wmem_alloc, this will be released
2182 		 * after sk_write_space() call
2183 		 */
2184 		WARN_ON(refcount_sub_and_test(len - 1, &sk->sk_wmem_alloc));
2185 		sk->sk_write_space(sk);
2186 		len = 1;
2187 	}
2188 	/*
2189 	 * if sk_wmem_alloc reaches 0, we must finish what sk_free()
2190 	 * could not do because of in-flight packets
2191 	 */
2192 	if (refcount_sub_and_test(len, &sk->sk_wmem_alloc))
2193 		__sk_free(sk);
2194 }
2195 EXPORT_SYMBOL(sock_wfree);
2196 
2197 /* This variant of sock_wfree() is used by TCP,
2198  * since it sets SOCK_USE_WRITE_QUEUE.
2199  */
2200 void __sock_wfree(struct sk_buff *skb)
2201 {
2202 	struct sock *sk = skb->sk;
2203 
2204 	if (refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc))
2205 		__sk_free(sk);
2206 }
2207 
2208 void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
2209 {
2210 	skb_orphan(skb);
2211 	skb->sk = sk;
2212 #ifdef CONFIG_INET
2213 	if (unlikely(!sk_fullsock(sk))) {
2214 		skb->destructor = sock_edemux;
2215 		sock_hold(sk);
2216 		return;
2217 	}
2218 #endif
2219 	skb->destructor = sock_wfree;
2220 	skb_set_hash_from_sk(skb, sk);
2221 	/*
2222 	 * We used to take a refcount on sk, but following operation
2223 	 * is enough to guarantee sk_free() wont free this sock until
2224 	 * all in-flight packets are completed
2225 	 */
2226 	refcount_add(skb->truesize, &sk->sk_wmem_alloc);
2227 }
2228 EXPORT_SYMBOL(skb_set_owner_w);
2229 
2230 static bool can_skb_orphan_partial(const struct sk_buff *skb)
2231 {
2232 #ifdef CONFIG_TLS_DEVICE
2233 	/* Drivers depend on in-order delivery for crypto offload,
2234 	 * partial orphan breaks out-of-order-OK logic.
2235 	 */
2236 	if (skb->decrypted)
2237 		return false;
2238 #endif
2239 	return (skb->destructor == sock_wfree ||
2240 		(IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree));
2241 }
2242 
2243 /* This helper is used by netem, as it can hold packets in its
2244  * delay queue. We want to allow the owner socket to send more
2245  * packets, as if they were already TX completed by a typical driver.
2246  * But we also want to keep skb->sk set because some packet schedulers
2247  * rely on it (sch_fq for example).
2248  */
2249 void skb_orphan_partial(struct sk_buff *skb)
2250 {
2251 	if (skb_is_tcp_pure_ack(skb))
2252 		return;
2253 
2254 	if (can_skb_orphan_partial(skb) && skb_set_owner_sk_safe(skb, skb->sk))
2255 		return;
2256 
2257 	skb_orphan(skb);
2258 }
2259 EXPORT_SYMBOL(skb_orphan_partial);
2260 
2261 /*
2262  * Read buffer destructor automatically called from kfree_skb.
2263  */
2264 void sock_rfree(struct sk_buff *skb)
2265 {
2266 	struct sock *sk = skb->sk;
2267 	unsigned int len = skb->truesize;
2268 
2269 	atomic_sub(len, &sk->sk_rmem_alloc);
2270 	sk_mem_uncharge(sk, len);
2271 }
2272 EXPORT_SYMBOL(sock_rfree);
2273 
2274 /*
2275  * Buffer destructor for skbs that are not used directly in read or write
2276  * path, e.g. for error handler skbs. Automatically called from kfree_skb.
2277  */
2278 void sock_efree(struct sk_buff *skb)
2279 {
2280 	sock_put(skb->sk);
2281 }
2282 EXPORT_SYMBOL(sock_efree);
2283 
2284 /* Buffer destructor for prefetch/receive path where reference count may
2285  * not be held, e.g. for listen sockets.
2286  */
2287 #ifdef CONFIG_INET
2288 void sock_pfree(struct sk_buff *skb)
2289 {
2290 	if (sk_is_refcounted(skb->sk))
2291 		sock_gen_put(skb->sk);
2292 }
2293 EXPORT_SYMBOL(sock_pfree);
2294 #endif /* CONFIG_INET */
2295 
2296 kuid_t sock_i_uid(struct sock *sk)
2297 {
2298 	kuid_t uid;
2299 
2300 	read_lock_bh(&sk->sk_callback_lock);
2301 	uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
2302 	read_unlock_bh(&sk->sk_callback_lock);
2303 	return uid;
2304 }
2305 EXPORT_SYMBOL(sock_i_uid);
2306 
2307 unsigned long sock_i_ino(struct sock *sk)
2308 {
2309 	unsigned long ino;
2310 
2311 	read_lock_bh(&sk->sk_callback_lock);
2312 	ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
2313 	read_unlock_bh(&sk->sk_callback_lock);
2314 	return ino;
2315 }
2316 EXPORT_SYMBOL(sock_i_ino);
2317 
2318 /*
2319  * Allocate a skb from the socket's send buffer.
2320  */
2321 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
2322 			     gfp_t priority)
2323 {
2324 	if (force ||
2325 	    refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) {
2326 		struct sk_buff *skb = alloc_skb(size, priority);
2327 
2328 		if (skb) {
2329 			skb_set_owner_w(skb, sk);
2330 			return skb;
2331 		}
2332 	}
2333 	return NULL;
2334 }
2335 EXPORT_SYMBOL(sock_wmalloc);
2336 
2337 static void sock_ofree(struct sk_buff *skb)
2338 {
2339 	struct sock *sk = skb->sk;
2340 
2341 	atomic_sub(skb->truesize, &sk->sk_omem_alloc);
2342 }
2343 
2344 struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
2345 			     gfp_t priority)
2346 {
2347 	struct sk_buff *skb;
2348 
2349 	/* small safe race: SKB_TRUESIZE may differ from final skb->truesize */
2350 	if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
2351 	    sysctl_optmem_max)
2352 		return NULL;
2353 
2354 	skb = alloc_skb(size, priority);
2355 	if (!skb)
2356 		return NULL;
2357 
2358 	atomic_add(skb->truesize, &sk->sk_omem_alloc);
2359 	skb->sk = sk;
2360 	skb->destructor = sock_ofree;
2361 	return skb;
2362 }
2363 
2364 /*
2365  * Allocate a memory block from the socket's option memory buffer.
2366  */
2367 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
2368 {
2369 	if ((unsigned int)size <= sysctl_optmem_max &&
2370 	    atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
2371 		void *mem;
2372 		/* First do the add, to avoid the race if kmalloc
2373 		 * might sleep.
2374 		 */
2375 		atomic_add(size, &sk->sk_omem_alloc);
2376 		mem = kmalloc(size, priority);
2377 		if (mem)
2378 			return mem;
2379 		atomic_sub(size, &sk->sk_omem_alloc);
2380 	}
2381 	return NULL;
2382 }
2383 EXPORT_SYMBOL(sock_kmalloc);
2384 
2385 /* Free an option memory block. Note, we actually want the inline
2386  * here as this allows gcc to detect the nullify and fold away the
2387  * condition entirely.
2388  */
2389 static inline void __sock_kfree_s(struct sock *sk, void *mem, int size,
2390 				  const bool nullify)
2391 {
2392 	if (WARN_ON_ONCE(!mem))
2393 		return;
2394 	if (nullify)
2395 		kfree_sensitive(mem);
2396 	else
2397 		kfree(mem);
2398 	atomic_sub(size, &sk->sk_omem_alloc);
2399 }
2400 
2401 void sock_kfree_s(struct sock *sk, void *mem, int size)
2402 {
2403 	__sock_kfree_s(sk, mem, size, false);
2404 }
2405 EXPORT_SYMBOL(sock_kfree_s);
2406 
2407 void sock_kzfree_s(struct sock *sk, void *mem, int size)
2408 {
2409 	__sock_kfree_s(sk, mem, size, true);
2410 }
2411 EXPORT_SYMBOL(sock_kzfree_s);
2412 
2413 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
2414    I think, these locks should be removed for datagram sockets.
2415  */
2416 static long sock_wait_for_wmem(struct sock *sk, long timeo)
2417 {
2418 	DEFINE_WAIT(wait);
2419 
2420 	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2421 	for (;;) {
2422 		if (!timeo)
2423 			break;
2424 		if (signal_pending(current))
2425 			break;
2426 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
2427 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2428 		if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf))
2429 			break;
2430 		if (sk->sk_shutdown & SEND_SHUTDOWN)
2431 			break;
2432 		if (sk->sk_err)
2433 			break;
2434 		timeo = schedule_timeout(timeo);
2435 	}
2436 	finish_wait(sk_sleep(sk), &wait);
2437 	return timeo;
2438 }
2439 
2440 
2441 /*
2442  *	Generic send/receive buffer handlers
2443  */
2444 
2445 struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
2446 				     unsigned long data_len, int noblock,
2447 				     int *errcode, int max_page_order)
2448 {
2449 	struct sk_buff *skb;
2450 	long timeo;
2451 	int err;
2452 
2453 	timeo = sock_sndtimeo(sk, noblock);
2454 	for (;;) {
2455 		err = sock_error(sk);
2456 		if (err != 0)
2457 			goto failure;
2458 
2459 		err = -EPIPE;
2460 		if (sk->sk_shutdown & SEND_SHUTDOWN)
2461 			goto failure;
2462 
2463 		if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf))
2464 			break;
2465 
2466 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2467 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
2468 		err = -EAGAIN;
2469 		if (!timeo)
2470 			goto failure;
2471 		if (signal_pending(current))
2472 			goto interrupted;
2473 		timeo = sock_wait_for_wmem(sk, timeo);
2474 	}
2475 	skb = alloc_skb_with_frags(header_len, data_len, max_page_order,
2476 				   errcode, sk->sk_allocation);
2477 	if (skb)
2478 		skb_set_owner_w(skb, sk);
2479 	return skb;
2480 
2481 interrupted:
2482 	err = sock_intr_errno(timeo);
2483 failure:
2484 	*errcode = err;
2485 	return NULL;
2486 }
2487 EXPORT_SYMBOL(sock_alloc_send_pskb);
2488 
2489 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
2490 				    int noblock, int *errcode)
2491 {
2492 	return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0);
2493 }
2494 EXPORT_SYMBOL(sock_alloc_send_skb);
2495 
2496 int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
2497 		     struct sockcm_cookie *sockc)
2498 {
2499 	u32 tsflags;
2500 
2501 	switch (cmsg->cmsg_type) {
2502 	case SO_MARK:
2503 		if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2504 			return -EPERM;
2505 		if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
2506 			return -EINVAL;
2507 		sockc->mark = *(u32 *)CMSG_DATA(cmsg);
2508 		break;
2509 	case SO_TIMESTAMPING_OLD:
2510 		if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
2511 			return -EINVAL;
2512 
2513 		tsflags = *(u32 *)CMSG_DATA(cmsg);
2514 		if (tsflags & ~SOF_TIMESTAMPING_TX_RECORD_MASK)
2515 			return -EINVAL;
2516 
2517 		sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK;
2518 		sockc->tsflags |= tsflags;
2519 		break;
2520 	case SCM_TXTIME:
2521 		if (!sock_flag(sk, SOCK_TXTIME))
2522 			return -EINVAL;
2523 		if (cmsg->cmsg_len != CMSG_LEN(sizeof(u64)))
2524 			return -EINVAL;
2525 		sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg));
2526 		break;
2527 	/* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */
2528 	case SCM_RIGHTS:
2529 	case SCM_CREDENTIALS:
2530 		break;
2531 	default:
2532 		return -EINVAL;
2533 	}
2534 	return 0;
2535 }
2536 EXPORT_SYMBOL(__sock_cmsg_send);
2537 
2538 int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
2539 		   struct sockcm_cookie *sockc)
2540 {
2541 	struct cmsghdr *cmsg;
2542 	int ret;
2543 
2544 	for_each_cmsghdr(cmsg, msg) {
2545 		if (!CMSG_OK(msg, cmsg))
2546 			return -EINVAL;
2547 		if (cmsg->cmsg_level != SOL_SOCKET)
2548 			continue;
2549 		ret = __sock_cmsg_send(sk, msg, cmsg, sockc);
2550 		if (ret)
2551 			return ret;
2552 	}
2553 	return 0;
2554 }
2555 EXPORT_SYMBOL(sock_cmsg_send);
2556 
2557 static void sk_enter_memory_pressure(struct sock *sk)
2558 {
2559 	if (!sk->sk_prot->enter_memory_pressure)
2560 		return;
2561 
2562 	sk->sk_prot->enter_memory_pressure(sk);
2563 }
2564 
2565 static void sk_leave_memory_pressure(struct sock *sk)
2566 {
2567 	if (sk->sk_prot->leave_memory_pressure) {
2568 		sk->sk_prot->leave_memory_pressure(sk);
2569 	} else {
2570 		unsigned long *memory_pressure = sk->sk_prot->memory_pressure;
2571 
2572 		if (memory_pressure && READ_ONCE(*memory_pressure))
2573 			WRITE_ONCE(*memory_pressure, 0);
2574 	}
2575 }
2576 
2577 #define SKB_FRAG_PAGE_ORDER	get_order(32768)
2578 DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
2579 
2580 /**
2581  * skb_page_frag_refill - check that a page_frag contains enough room
2582  * @sz: minimum size of the fragment we want to get
2583  * @pfrag: pointer to page_frag
2584  * @gfp: priority for memory allocation
2585  *
2586  * Note: While this allocator tries to use high order pages, there is
2587  * no guarantee that allocations succeed. Therefore, @sz MUST be
2588  * less or equal than PAGE_SIZE.
2589  */
2590 bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
2591 {
2592 	if (pfrag->page) {
2593 		if (page_ref_count(pfrag->page) == 1) {
2594 			pfrag->offset = 0;
2595 			return true;
2596 		}
2597 		if (pfrag->offset + sz <= pfrag->size)
2598 			return true;
2599 		put_page(pfrag->page);
2600 	}
2601 
2602 	pfrag->offset = 0;
2603 	if (SKB_FRAG_PAGE_ORDER &&
2604 	    !static_branch_unlikely(&net_high_order_alloc_disable_key)) {
2605 		/* Avoid direct reclaim but allow kswapd to wake */
2606 		pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
2607 					  __GFP_COMP | __GFP_NOWARN |
2608 					  __GFP_NORETRY,
2609 					  SKB_FRAG_PAGE_ORDER);
2610 		if (likely(pfrag->page)) {
2611 			pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
2612 			return true;
2613 		}
2614 	}
2615 	pfrag->page = alloc_page(gfp);
2616 	if (likely(pfrag->page)) {
2617 		pfrag->size = PAGE_SIZE;
2618 		return true;
2619 	}
2620 	return false;
2621 }
2622 EXPORT_SYMBOL(skb_page_frag_refill);
2623 
2624 bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
2625 {
2626 	if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation)))
2627 		return true;
2628 
2629 	sk_enter_memory_pressure(sk);
2630 	sk_stream_moderate_sndbuf(sk);
2631 	return false;
2632 }
2633 EXPORT_SYMBOL(sk_page_frag_refill);
2634 
2635 void __lock_sock(struct sock *sk)
2636 	__releases(&sk->sk_lock.slock)
2637 	__acquires(&sk->sk_lock.slock)
2638 {
2639 	DEFINE_WAIT(wait);
2640 
2641 	for (;;) {
2642 		prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
2643 					TASK_UNINTERRUPTIBLE);
2644 		spin_unlock_bh(&sk->sk_lock.slock);
2645 		schedule();
2646 		spin_lock_bh(&sk->sk_lock.slock);
2647 		if (!sock_owned_by_user(sk))
2648 			break;
2649 	}
2650 	finish_wait(&sk->sk_lock.wq, &wait);
2651 }
2652 
2653 void __release_sock(struct sock *sk)
2654 	__releases(&sk->sk_lock.slock)
2655 	__acquires(&sk->sk_lock.slock)
2656 {
2657 	struct sk_buff *skb, *next;
2658 
2659 	while ((skb = sk->sk_backlog.head) != NULL) {
2660 		sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
2661 
2662 		spin_unlock_bh(&sk->sk_lock.slock);
2663 
2664 		do {
2665 			next = skb->next;
2666 			prefetch(next);
2667 			WARN_ON_ONCE(skb_dst_is_noref(skb));
2668 			skb_mark_not_on_list(skb);
2669 			sk_backlog_rcv(sk, skb);
2670 
2671 			cond_resched();
2672 
2673 			skb = next;
2674 		} while (skb != NULL);
2675 
2676 		spin_lock_bh(&sk->sk_lock.slock);
2677 	}
2678 
2679 	/*
2680 	 * Doing the zeroing here guarantee we can not loop forever
2681 	 * while a wild producer attempts to flood us.
2682 	 */
2683 	sk->sk_backlog.len = 0;
2684 }
2685 
2686 void __sk_flush_backlog(struct sock *sk)
2687 {
2688 	spin_lock_bh(&sk->sk_lock.slock);
2689 	__release_sock(sk);
2690 	spin_unlock_bh(&sk->sk_lock.slock);
2691 }
2692 
2693 /**
2694  * sk_wait_data - wait for data to arrive at sk_receive_queue
2695  * @sk:    sock to wait on
2696  * @timeo: for how long
2697  * @skb:   last skb seen on sk_receive_queue
2698  *
2699  * Now socket state including sk->sk_err is changed only under lock,
2700  * hence we may omit checks after joining wait queue.
2701  * We check receive queue before schedule() only as optimization;
2702  * it is very likely that release_sock() added new data.
2703  */
2704 int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
2705 {
2706 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
2707 	int rc;
2708 
2709 	add_wait_queue(sk_sleep(sk), &wait);
2710 	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2711 	rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb, &wait);
2712 	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2713 	remove_wait_queue(sk_sleep(sk), &wait);
2714 	return rc;
2715 }
2716 EXPORT_SYMBOL(sk_wait_data);
2717 
2718 /**
2719  *	__sk_mem_raise_allocated - increase memory_allocated
2720  *	@sk: socket
2721  *	@size: memory size to allocate
2722  *	@amt: pages to allocate
2723  *	@kind: allocation type
2724  *
2725  *	Similar to __sk_mem_schedule(), but does not update sk_forward_alloc
2726  */
2727 int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
2728 {
2729 	struct proto *prot = sk->sk_prot;
2730 	long allocated = sk_memory_allocated_add(sk, amt);
2731 	bool memcg_charge = mem_cgroup_sockets_enabled && sk->sk_memcg;
2732 	bool charged = true;
2733 
2734 	if (memcg_charge &&
2735 	    !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt,
2736 						gfp_memcg_charge())))
2737 		goto suppress_allocation;
2738 
2739 	/* Under limit. */
2740 	if (allocated <= sk_prot_mem_limits(sk, 0)) {
2741 		sk_leave_memory_pressure(sk);
2742 		return 1;
2743 	}
2744 
2745 	/* Under pressure. */
2746 	if (allocated > sk_prot_mem_limits(sk, 1))
2747 		sk_enter_memory_pressure(sk);
2748 
2749 	/* Over hard limit. */
2750 	if (allocated > sk_prot_mem_limits(sk, 2))
2751 		goto suppress_allocation;
2752 
2753 	/* guarantee minimum buffer size under pressure */
2754 	if (kind == SK_MEM_RECV) {
2755 		if (atomic_read(&sk->sk_rmem_alloc) < sk_get_rmem0(sk, prot))
2756 			return 1;
2757 
2758 	} else { /* SK_MEM_SEND */
2759 		int wmem0 = sk_get_wmem0(sk, prot);
2760 
2761 		if (sk->sk_type == SOCK_STREAM) {
2762 			if (sk->sk_wmem_queued < wmem0)
2763 				return 1;
2764 		} else if (refcount_read(&sk->sk_wmem_alloc) < wmem0) {
2765 				return 1;
2766 		}
2767 	}
2768 
2769 	if (sk_has_memory_pressure(sk)) {
2770 		u64 alloc;
2771 
2772 		if (!sk_under_memory_pressure(sk))
2773 			return 1;
2774 		alloc = sk_sockets_allocated_read_positive(sk);
2775 		if (sk_prot_mem_limits(sk, 2) > alloc *
2776 		    sk_mem_pages(sk->sk_wmem_queued +
2777 				 atomic_read(&sk->sk_rmem_alloc) +
2778 				 sk->sk_forward_alloc))
2779 			return 1;
2780 	}
2781 
2782 suppress_allocation:
2783 
2784 	if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
2785 		sk_stream_moderate_sndbuf(sk);
2786 
2787 		/* Fail only if socket is _under_ its sndbuf.
2788 		 * In this case we cannot block, so that we have to fail.
2789 		 */
2790 		if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) {
2791 			/* Force charge with __GFP_NOFAIL */
2792 			if (memcg_charge && !charged) {
2793 				mem_cgroup_charge_skmem(sk->sk_memcg, amt,
2794 					gfp_memcg_charge() | __GFP_NOFAIL);
2795 			}
2796 			return 1;
2797 		}
2798 	}
2799 
2800 	if (kind == SK_MEM_SEND || (kind == SK_MEM_RECV && charged))
2801 		trace_sock_exceed_buf_limit(sk, prot, allocated, kind);
2802 
2803 	sk_memory_allocated_sub(sk, amt);
2804 
2805 	if (memcg_charge && charged)
2806 		mem_cgroup_uncharge_skmem(sk->sk_memcg, amt);
2807 
2808 	return 0;
2809 }
2810 EXPORT_SYMBOL(__sk_mem_raise_allocated);
2811 
2812 /**
2813  *	__sk_mem_schedule - increase sk_forward_alloc and memory_allocated
2814  *	@sk: socket
2815  *	@size: memory size to allocate
2816  *	@kind: allocation type
2817  *
2818  *	If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
2819  *	rmem allocation. This function assumes that protocols which have
2820  *	memory_pressure use sk_wmem_queued as write buffer accounting.
2821  */
2822 int __sk_mem_schedule(struct sock *sk, int size, int kind)
2823 {
2824 	int ret, amt = sk_mem_pages(size);
2825 
2826 	sk->sk_forward_alloc += amt << SK_MEM_QUANTUM_SHIFT;
2827 	ret = __sk_mem_raise_allocated(sk, size, amt, kind);
2828 	if (!ret)
2829 		sk->sk_forward_alloc -= amt << SK_MEM_QUANTUM_SHIFT;
2830 	return ret;
2831 }
2832 EXPORT_SYMBOL(__sk_mem_schedule);
2833 
2834 /**
2835  *	__sk_mem_reduce_allocated - reclaim memory_allocated
2836  *	@sk: socket
2837  *	@amount: number of quanta
2838  *
2839  *	Similar to __sk_mem_reclaim(), but does not update sk_forward_alloc
2840  */
2841 void __sk_mem_reduce_allocated(struct sock *sk, int amount)
2842 {
2843 	sk_memory_allocated_sub(sk, amount);
2844 
2845 	if (mem_cgroup_sockets_enabled && sk->sk_memcg)
2846 		mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
2847 
2848 	if (sk_under_memory_pressure(sk) &&
2849 	    (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
2850 		sk_leave_memory_pressure(sk);
2851 }
2852 EXPORT_SYMBOL(__sk_mem_reduce_allocated);
2853 
2854 /**
2855  *	__sk_mem_reclaim - reclaim sk_forward_alloc and memory_allocated
2856  *	@sk: socket
2857  *	@amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple)
2858  */
2859 void __sk_mem_reclaim(struct sock *sk, int amount)
2860 {
2861 	amount >>= SK_MEM_QUANTUM_SHIFT;
2862 	sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
2863 	__sk_mem_reduce_allocated(sk, amount);
2864 }
2865 EXPORT_SYMBOL(__sk_mem_reclaim);
2866 
2867 int sk_set_peek_off(struct sock *sk, int val)
2868 {
2869 	sk->sk_peek_off = val;
2870 	return 0;
2871 }
2872 EXPORT_SYMBOL_GPL(sk_set_peek_off);
2873 
2874 /*
2875  * Set of default routines for initialising struct proto_ops when
2876  * the protocol does not support a particular function. In certain
2877  * cases where it makes no sense for a protocol to have a "do nothing"
2878  * function, some default processing is provided.
2879  */
2880 
2881 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
2882 {
2883 	return -EOPNOTSUPP;
2884 }
2885 EXPORT_SYMBOL(sock_no_bind);
2886 
2887 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
2888 		    int len, int flags)
2889 {
2890 	return -EOPNOTSUPP;
2891 }
2892 EXPORT_SYMBOL(sock_no_connect);
2893 
2894 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
2895 {
2896 	return -EOPNOTSUPP;
2897 }
2898 EXPORT_SYMBOL(sock_no_socketpair);
2899 
2900 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags,
2901 		   bool kern)
2902 {
2903 	return -EOPNOTSUPP;
2904 }
2905 EXPORT_SYMBOL(sock_no_accept);
2906 
2907 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
2908 		    int peer)
2909 {
2910 	return -EOPNOTSUPP;
2911 }
2912 EXPORT_SYMBOL(sock_no_getname);
2913 
2914 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2915 {
2916 	return -EOPNOTSUPP;
2917 }
2918 EXPORT_SYMBOL(sock_no_ioctl);
2919 
2920 int sock_no_listen(struct socket *sock, int backlog)
2921 {
2922 	return -EOPNOTSUPP;
2923 }
2924 EXPORT_SYMBOL(sock_no_listen);
2925 
2926 int sock_no_shutdown(struct socket *sock, int how)
2927 {
2928 	return -EOPNOTSUPP;
2929 }
2930 EXPORT_SYMBOL(sock_no_shutdown);
2931 
2932 int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
2933 {
2934 	return -EOPNOTSUPP;
2935 }
2936 EXPORT_SYMBOL(sock_no_sendmsg);
2937 
2938 int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *m, size_t len)
2939 {
2940 	return -EOPNOTSUPP;
2941 }
2942 EXPORT_SYMBOL(sock_no_sendmsg_locked);
2943 
2944 int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len,
2945 		    int flags)
2946 {
2947 	return -EOPNOTSUPP;
2948 }
2949 EXPORT_SYMBOL(sock_no_recvmsg);
2950 
2951 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
2952 {
2953 	/* Mirror missing mmap method error code */
2954 	return -ENODEV;
2955 }
2956 EXPORT_SYMBOL(sock_no_mmap);
2957 
2958 /*
2959  * When a file is received (via SCM_RIGHTS, etc), we must bump the
2960  * various sock-based usage counts.
2961  */
2962 void __receive_sock(struct file *file)
2963 {
2964 	struct socket *sock;
2965 
2966 	sock = sock_from_file(file);
2967 	if (sock) {
2968 		sock_update_netprioidx(&sock->sk->sk_cgrp_data);
2969 		sock_update_classid(&sock->sk->sk_cgrp_data);
2970 	}
2971 }
2972 
2973 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
2974 {
2975 	ssize_t res;
2976 	struct msghdr msg = {.msg_flags = flags};
2977 	struct kvec iov;
2978 	char *kaddr = kmap(page);
2979 	iov.iov_base = kaddr + offset;
2980 	iov.iov_len = size;
2981 	res = kernel_sendmsg(sock, &msg, &iov, 1, size);
2982 	kunmap(page);
2983 	return res;
2984 }
2985 EXPORT_SYMBOL(sock_no_sendpage);
2986 
2987 ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page,
2988 				int offset, size_t size, int flags)
2989 {
2990 	ssize_t res;
2991 	struct msghdr msg = {.msg_flags = flags};
2992 	struct kvec iov;
2993 	char *kaddr = kmap(page);
2994 
2995 	iov.iov_base = kaddr + offset;
2996 	iov.iov_len = size;
2997 	res = kernel_sendmsg_locked(sk, &msg, &iov, 1, size);
2998 	kunmap(page);
2999 	return res;
3000 }
3001 EXPORT_SYMBOL(sock_no_sendpage_locked);
3002 
3003 /*
3004  *	Default Socket Callbacks
3005  */
3006 
3007 static void sock_def_wakeup(struct sock *sk)
3008 {
3009 	struct socket_wq *wq;
3010 
3011 	rcu_read_lock();
3012 	wq = rcu_dereference(sk->sk_wq);
3013 	if (skwq_has_sleeper(wq))
3014 		wake_up_interruptible_all(&wq->wait);
3015 	rcu_read_unlock();
3016 }
3017 
3018 static void sock_def_error_report(struct sock *sk)
3019 {
3020 	struct socket_wq *wq;
3021 
3022 	rcu_read_lock();
3023 	wq = rcu_dereference(sk->sk_wq);
3024 	if (skwq_has_sleeper(wq))
3025 		wake_up_interruptible_poll(&wq->wait, EPOLLERR);
3026 	sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
3027 	rcu_read_unlock();
3028 }
3029 
3030 void sock_def_readable(struct sock *sk)
3031 {
3032 	struct socket_wq *wq;
3033 
3034 	rcu_read_lock();
3035 	wq = rcu_dereference(sk->sk_wq);
3036 	if (skwq_has_sleeper(wq))
3037 		wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI |
3038 						EPOLLRDNORM | EPOLLRDBAND);
3039 	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
3040 	rcu_read_unlock();
3041 }
3042 
3043 static void sock_def_write_space(struct sock *sk)
3044 {
3045 	struct socket_wq *wq;
3046 
3047 	rcu_read_lock();
3048 
3049 	/* Do not wake up a writer until he can make "significant"
3050 	 * progress.  --DaveM
3051 	 */
3052 	if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= READ_ONCE(sk->sk_sndbuf)) {
3053 		wq = rcu_dereference(sk->sk_wq);
3054 		if (skwq_has_sleeper(wq))
3055 			wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
3056 						EPOLLWRNORM | EPOLLWRBAND);
3057 
3058 		/* Should agree with poll, otherwise some programs break */
3059 		if (sock_writeable(sk))
3060 			sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
3061 	}
3062 
3063 	rcu_read_unlock();
3064 }
3065 
3066 static void sock_def_destruct(struct sock *sk)
3067 {
3068 }
3069 
3070 void sk_send_sigurg(struct sock *sk)
3071 {
3072 	if (sk->sk_socket && sk->sk_socket->file)
3073 		if (send_sigurg(&sk->sk_socket->file->f_owner))
3074 			sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
3075 }
3076 EXPORT_SYMBOL(sk_send_sigurg);
3077 
3078 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
3079 		    unsigned long expires)
3080 {
3081 	if (!mod_timer(timer, expires))
3082 		sock_hold(sk);
3083 }
3084 EXPORT_SYMBOL(sk_reset_timer);
3085 
3086 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
3087 {
3088 	if (del_timer(timer))
3089 		__sock_put(sk);
3090 }
3091 EXPORT_SYMBOL(sk_stop_timer);
3092 
3093 void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer)
3094 {
3095 	if (del_timer_sync(timer))
3096 		__sock_put(sk);
3097 }
3098 EXPORT_SYMBOL(sk_stop_timer_sync);
3099 
3100 void sock_init_data(struct socket *sock, struct sock *sk)
3101 {
3102 	sk_init_common(sk);
3103 	sk->sk_send_head	=	NULL;
3104 
3105 	timer_setup(&sk->sk_timer, NULL, 0);
3106 
3107 	sk->sk_allocation	=	GFP_KERNEL;
3108 	sk->sk_rcvbuf		=	sysctl_rmem_default;
3109 	sk->sk_sndbuf		=	sysctl_wmem_default;
3110 	sk->sk_state		=	TCP_CLOSE;
3111 	sk_set_socket(sk, sock);
3112 
3113 	sock_set_flag(sk, SOCK_ZAPPED);
3114 
3115 	if (sock) {
3116 		sk->sk_type	=	sock->type;
3117 		RCU_INIT_POINTER(sk->sk_wq, &sock->wq);
3118 		sock->sk	=	sk;
3119 		sk->sk_uid	=	SOCK_INODE(sock)->i_uid;
3120 	} else {
3121 		RCU_INIT_POINTER(sk->sk_wq, NULL);
3122 		sk->sk_uid	=	make_kuid(sock_net(sk)->user_ns, 0);
3123 	}
3124 
3125 	rwlock_init(&sk->sk_callback_lock);
3126 	if (sk->sk_kern_sock)
3127 		lockdep_set_class_and_name(
3128 			&sk->sk_callback_lock,
3129 			af_kern_callback_keys + sk->sk_family,
3130 			af_family_kern_clock_key_strings[sk->sk_family]);
3131 	else
3132 		lockdep_set_class_and_name(
3133 			&sk->sk_callback_lock,
3134 			af_callback_keys + sk->sk_family,
3135 			af_family_clock_key_strings[sk->sk_family]);
3136 
3137 	sk->sk_state_change	=	sock_def_wakeup;
3138 	sk->sk_data_ready	=	sock_def_readable;
3139 	sk->sk_write_space	=	sock_def_write_space;
3140 	sk->sk_error_report	=	sock_def_error_report;
3141 	sk->sk_destruct		=	sock_def_destruct;
3142 
3143 	sk->sk_frag.page	=	NULL;
3144 	sk->sk_frag.offset	=	0;
3145 	sk->sk_peek_off		=	-1;
3146 
3147 	sk->sk_peer_pid 	=	NULL;
3148 	sk->sk_peer_cred	=	NULL;
3149 	sk->sk_write_pending	=	0;
3150 	sk->sk_rcvlowat		=	1;
3151 	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
3152 	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
3153 
3154 	sk->sk_stamp = SK_DEFAULT_STAMP;
3155 #if BITS_PER_LONG==32
3156 	seqlock_init(&sk->sk_stamp_seq);
3157 #endif
3158 	atomic_set(&sk->sk_zckey, 0);
3159 
3160 #ifdef CONFIG_NET_RX_BUSY_POLL
3161 	sk->sk_napi_id		=	0;
3162 	sk->sk_ll_usec		=	sysctl_net_busy_read;
3163 #endif
3164 
3165 	sk->sk_max_pacing_rate = ~0UL;
3166 	sk->sk_pacing_rate = ~0UL;
3167 	WRITE_ONCE(sk->sk_pacing_shift, 10);
3168 	sk->sk_incoming_cpu = -1;
3169 
3170 	sk_rx_queue_clear(sk);
3171 	/*
3172 	 * Before updating sk_refcnt, we must commit prior changes to memory
3173 	 * (Documentation/RCU/rculist_nulls.rst for details)
3174 	 */
3175 	smp_wmb();
3176 	refcount_set(&sk->sk_refcnt, 1);
3177 	atomic_set(&sk->sk_drops, 0);
3178 }
3179 EXPORT_SYMBOL(sock_init_data);
3180 
3181 void lock_sock_nested(struct sock *sk, int subclass)
3182 {
3183 	might_sleep();
3184 	spin_lock_bh(&sk->sk_lock.slock);
3185 	if (sk->sk_lock.owned)
3186 		__lock_sock(sk);
3187 	sk->sk_lock.owned = 1;
3188 	spin_unlock(&sk->sk_lock.slock);
3189 	/*
3190 	 * The sk_lock has mutex_lock() semantics here:
3191 	 */
3192 	mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
3193 	local_bh_enable();
3194 }
3195 EXPORT_SYMBOL(lock_sock_nested);
3196 
3197 void release_sock(struct sock *sk)
3198 {
3199 	spin_lock_bh(&sk->sk_lock.slock);
3200 	if (sk->sk_backlog.tail)
3201 		__release_sock(sk);
3202 
3203 	/* Warning : release_cb() might need to release sk ownership,
3204 	 * ie call sock_release_ownership(sk) before us.
3205 	 */
3206 	if (sk->sk_prot->release_cb)
3207 		sk->sk_prot->release_cb(sk);
3208 
3209 	sock_release_ownership(sk);
3210 	if (waitqueue_active(&sk->sk_lock.wq))
3211 		wake_up(&sk->sk_lock.wq);
3212 	spin_unlock_bh(&sk->sk_lock.slock);
3213 }
3214 EXPORT_SYMBOL(release_sock);
3215 
3216 /**
3217  * lock_sock_fast - fast version of lock_sock
3218  * @sk: socket
3219  *
3220  * This version should be used for very small section, where process wont block
3221  * return false if fast path is taken:
3222  *
3223  *   sk_lock.slock locked, owned = 0, BH disabled
3224  *
3225  * return true if slow path is taken:
3226  *
3227  *   sk_lock.slock unlocked, owned = 1, BH enabled
3228  */
3229 bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock)
3230 {
3231 	might_sleep();
3232 	spin_lock_bh(&sk->sk_lock.slock);
3233 
3234 	if (!sk->sk_lock.owned)
3235 		/*
3236 		 * Note : We must disable BH
3237 		 */
3238 		return false;
3239 
3240 	__lock_sock(sk);
3241 	sk->sk_lock.owned = 1;
3242 	spin_unlock(&sk->sk_lock.slock);
3243 	/*
3244 	 * The sk_lock has mutex_lock() semantics here:
3245 	 */
3246 	mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
3247 	__acquire(&sk->sk_lock.slock);
3248 	local_bh_enable();
3249 	return true;
3250 }
3251 EXPORT_SYMBOL(lock_sock_fast);
3252 
3253 int sock_gettstamp(struct socket *sock, void __user *userstamp,
3254 		   bool timeval, bool time32)
3255 {
3256 	struct sock *sk = sock->sk;
3257 	struct timespec64 ts;
3258 
3259 	sock_enable_timestamp(sk, SOCK_TIMESTAMP);
3260 	ts = ktime_to_timespec64(sock_read_timestamp(sk));
3261 	if (ts.tv_sec == -1)
3262 		return -ENOENT;
3263 	if (ts.tv_sec == 0) {
3264 		ktime_t kt = ktime_get_real();
3265 		sock_write_timestamp(sk, kt);
3266 		ts = ktime_to_timespec64(kt);
3267 	}
3268 
3269 	if (timeval)
3270 		ts.tv_nsec /= 1000;
3271 
3272 #ifdef CONFIG_COMPAT_32BIT_TIME
3273 	if (time32)
3274 		return put_old_timespec32(&ts, userstamp);
3275 #endif
3276 #ifdef CONFIG_SPARC64
3277 	/* beware of padding in sparc64 timeval */
3278 	if (timeval && !in_compat_syscall()) {
3279 		struct __kernel_old_timeval __user tv = {
3280 			.tv_sec = ts.tv_sec,
3281 			.tv_usec = ts.tv_nsec,
3282 		};
3283 		if (copy_to_user(userstamp, &tv, sizeof(tv)))
3284 			return -EFAULT;
3285 		return 0;
3286 	}
3287 #endif
3288 	return put_timespec64(&ts, userstamp);
3289 }
3290 EXPORT_SYMBOL(sock_gettstamp);
3291 
3292 void sock_enable_timestamp(struct sock *sk, enum sock_flags flag)
3293 {
3294 	if (!sock_flag(sk, flag)) {
3295 		unsigned long previous_flags = sk->sk_flags;
3296 
3297 		sock_set_flag(sk, flag);
3298 		/*
3299 		 * we just set one of the two flags which require net
3300 		 * time stamping, but time stamping might have been on
3301 		 * already because of the other one
3302 		 */
3303 		if (sock_needs_netstamp(sk) &&
3304 		    !(previous_flags & SK_FLAGS_TIMESTAMP))
3305 			net_enable_timestamp();
3306 	}
3307 }
3308 
3309 int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
3310 		       int level, int type)
3311 {
3312 	struct sock_exterr_skb *serr;
3313 	struct sk_buff *skb;
3314 	int copied, err;
3315 
3316 	err = -EAGAIN;
3317 	skb = sock_dequeue_err_skb(sk);
3318 	if (skb == NULL)
3319 		goto out;
3320 
3321 	copied = skb->len;
3322 	if (copied > len) {
3323 		msg->msg_flags |= MSG_TRUNC;
3324 		copied = len;
3325 	}
3326 	err = skb_copy_datagram_msg(skb, 0, msg, copied);
3327 	if (err)
3328 		goto out_free_skb;
3329 
3330 	sock_recv_timestamp(msg, sk, skb);
3331 
3332 	serr = SKB_EXT_ERR(skb);
3333 	put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee);
3334 
3335 	msg->msg_flags |= MSG_ERRQUEUE;
3336 	err = copied;
3337 
3338 out_free_skb:
3339 	kfree_skb(skb);
3340 out:
3341 	return err;
3342 }
3343 EXPORT_SYMBOL(sock_recv_errqueue);
3344 
3345 /*
3346  *	Get a socket option on an socket.
3347  *
3348  *	FIX: POSIX 1003.1g is very ambiguous here. It states that
3349  *	asynchronous errors should be reported by getsockopt. We assume
3350  *	this means if you specify SO_ERROR (otherwise whats the point of it).
3351  */
3352 int sock_common_getsockopt(struct socket *sock, int level, int optname,
3353 			   char __user *optval, int __user *optlen)
3354 {
3355 	struct sock *sk = sock->sk;
3356 
3357 	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
3358 }
3359 EXPORT_SYMBOL(sock_common_getsockopt);
3360 
3361 int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
3362 			int flags)
3363 {
3364 	struct sock *sk = sock->sk;
3365 	int addr_len = 0;
3366 	int err;
3367 
3368 	err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
3369 				   flags & ~MSG_DONTWAIT, &addr_len);
3370 	if (err >= 0)
3371 		msg->msg_namelen = addr_len;
3372 	return err;
3373 }
3374 EXPORT_SYMBOL(sock_common_recvmsg);
3375 
3376 /*
3377  *	Set socket options on an inet socket.
3378  */
3379 int sock_common_setsockopt(struct socket *sock, int level, int optname,
3380 			   sockptr_t optval, unsigned int optlen)
3381 {
3382 	struct sock *sk = sock->sk;
3383 
3384 	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
3385 }
3386 EXPORT_SYMBOL(sock_common_setsockopt);
3387 
3388 void sk_common_release(struct sock *sk)
3389 {
3390 	if (sk->sk_prot->destroy)
3391 		sk->sk_prot->destroy(sk);
3392 
3393 	/*
3394 	 * Observation: when sk_common_release is called, processes have
3395 	 * no access to socket. But net still has.
3396 	 * Step one, detach it from networking:
3397 	 *
3398 	 * A. Remove from hash tables.
3399 	 */
3400 
3401 	sk->sk_prot->unhash(sk);
3402 
3403 	/*
3404 	 * In this point socket cannot receive new packets, but it is possible
3405 	 * that some packets are in flight because some CPU runs receiver and
3406 	 * did hash table lookup before we unhashed socket. They will achieve
3407 	 * receive queue and will be purged by socket destructor.
3408 	 *
3409 	 * Also we still have packets pending on receive queue and probably,
3410 	 * our own packets waiting in device queues. sock_destroy will drain
3411 	 * receive queue, but transmitted packets will delay socket destruction
3412 	 * until the last reference will be released.
3413 	 */
3414 
3415 	sock_orphan(sk);
3416 
3417 	xfrm_sk_free_policy(sk);
3418 
3419 	sk_refcnt_debug_release(sk);
3420 
3421 	sock_put(sk);
3422 }
3423 EXPORT_SYMBOL(sk_common_release);
3424 
3425 void sk_get_meminfo(const struct sock *sk, u32 *mem)
3426 {
3427 	memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS);
3428 
3429 	mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
3430 	mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf);
3431 	mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
3432 	mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf);
3433 	mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
3434 	mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued);
3435 	mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
3436 	mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
3437 	mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
3438 }
3439 
3440 #ifdef CONFIG_PROC_FS
3441 #define PROTO_INUSE_NR	64	/* should be enough for the first time */
3442 struct prot_inuse {
3443 	int val[PROTO_INUSE_NR];
3444 };
3445 
3446 static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
3447 
3448 void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
3449 {
3450 	__this_cpu_add(net->core.prot_inuse->val[prot->inuse_idx], val);
3451 }
3452 EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
3453 
3454 int sock_prot_inuse_get(struct net *net, struct proto *prot)
3455 {
3456 	int cpu, idx = prot->inuse_idx;
3457 	int res = 0;
3458 
3459 	for_each_possible_cpu(cpu)
3460 		res += per_cpu_ptr(net->core.prot_inuse, cpu)->val[idx];
3461 
3462 	return res >= 0 ? res : 0;
3463 }
3464 EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
3465 
3466 static void sock_inuse_add(struct net *net, int val)
3467 {
3468 	this_cpu_add(*net->core.sock_inuse, val);
3469 }
3470 
3471 int sock_inuse_get(struct net *net)
3472 {
3473 	int cpu, res = 0;
3474 
3475 	for_each_possible_cpu(cpu)
3476 		res += *per_cpu_ptr(net->core.sock_inuse, cpu);
3477 
3478 	return res;
3479 }
3480 
3481 EXPORT_SYMBOL_GPL(sock_inuse_get);
3482 
3483 static int __net_init sock_inuse_init_net(struct net *net)
3484 {
3485 	net->core.prot_inuse = alloc_percpu(struct prot_inuse);
3486 	if (net->core.prot_inuse == NULL)
3487 		return -ENOMEM;
3488 
3489 	net->core.sock_inuse = alloc_percpu(int);
3490 	if (net->core.sock_inuse == NULL)
3491 		goto out;
3492 
3493 	return 0;
3494 
3495 out:
3496 	free_percpu(net->core.prot_inuse);
3497 	return -ENOMEM;
3498 }
3499 
3500 static void __net_exit sock_inuse_exit_net(struct net *net)
3501 {
3502 	free_percpu(net->core.prot_inuse);
3503 	free_percpu(net->core.sock_inuse);
3504 }
3505 
3506 static struct pernet_operations net_inuse_ops = {
3507 	.init = sock_inuse_init_net,
3508 	.exit = sock_inuse_exit_net,
3509 };
3510 
3511 static __init int net_inuse_init(void)
3512 {
3513 	if (register_pernet_subsys(&net_inuse_ops))
3514 		panic("Cannot initialize net inuse counters");
3515 
3516 	return 0;
3517 }
3518 
3519 core_initcall(net_inuse_init);
3520 
3521 static int assign_proto_idx(struct proto *prot)
3522 {
3523 	prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
3524 
3525 	if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
3526 		pr_err("PROTO_INUSE_NR exhausted\n");
3527 		return -ENOSPC;
3528 	}
3529 
3530 	set_bit(prot->inuse_idx, proto_inuse_idx);
3531 	return 0;
3532 }
3533 
3534 static void release_proto_idx(struct proto *prot)
3535 {
3536 	if (prot->inuse_idx != PROTO_INUSE_NR - 1)
3537 		clear_bit(prot->inuse_idx, proto_inuse_idx);
3538 }
3539 #else
3540 static inline int assign_proto_idx(struct proto *prot)
3541 {
3542 	return 0;
3543 }
3544 
3545 static inline void release_proto_idx(struct proto *prot)
3546 {
3547 }
3548 
3549 static void sock_inuse_add(struct net *net, int val)
3550 {
3551 }
3552 #endif
3553 
3554 static void tw_prot_cleanup(struct timewait_sock_ops *twsk_prot)
3555 {
3556 	if (!twsk_prot)
3557 		return;
3558 	kfree(twsk_prot->twsk_slab_name);
3559 	twsk_prot->twsk_slab_name = NULL;
3560 	kmem_cache_destroy(twsk_prot->twsk_slab);
3561 	twsk_prot->twsk_slab = NULL;
3562 }
3563 
3564 static int tw_prot_init(const struct proto *prot)
3565 {
3566 	struct timewait_sock_ops *twsk_prot = prot->twsk_prot;
3567 
3568 	if (!twsk_prot)
3569 		return 0;
3570 
3571 	twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s",
3572 					      prot->name);
3573 	if (!twsk_prot->twsk_slab_name)
3574 		return -ENOMEM;
3575 
3576 	twsk_prot->twsk_slab =
3577 		kmem_cache_create(twsk_prot->twsk_slab_name,
3578 				  twsk_prot->twsk_obj_size, 0,
3579 				  SLAB_ACCOUNT | prot->slab_flags,
3580 				  NULL);
3581 	if (!twsk_prot->twsk_slab) {
3582 		pr_crit("%s: Can't create timewait sock SLAB cache!\n",
3583 			prot->name);
3584 		return -ENOMEM;
3585 	}
3586 
3587 	return 0;
3588 }
3589 
3590 static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
3591 {
3592 	if (!rsk_prot)
3593 		return;
3594 	kfree(rsk_prot->slab_name);
3595 	rsk_prot->slab_name = NULL;
3596 	kmem_cache_destroy(rsk_prot->slab);
3597 	rsk_prot->slab = NULL;
3598 }
3599 
3600 static int req_prot_init(const struct proto *prot)
3601 {
3602 	struct request_sock_ops *rsk_prot = prot->rsk_prot;
3603 
3604 	if (!rsk_prot)
3605 		return 0;
3606 
3607 	rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s",
3608 					prot->name);
3609 	if (!rsk_prot->slab_name)
3610 		return -ENOMEM;
3611 
3612 	rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name,
3613 					   rsk_prot->obj_size, 0,
3614 					   SLAB_ACCOUNT | prot->slab_flags,
3615 					   NULL);
3616 
3617 	if (!rsk_prot->slab) {
3618 		pr_crit("%s: Can't create request sock SLAB cache!\n",
3619 			prot->name);
3620 		return -ENOMEM;
3621 	}
3622 	return 0;
3623 }
3624 
3625 int proto_register(struct proto *prot, int alloc_slab)
3626 {
3627 	int ret = -ENOBUFS;
3628 
3629 	if (alloc_slab) {
3630 		prot->slab = kmem_cache_create_usercopy(prot->name,
3631 					prot->obj_size, 0,
3632 					SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT |
3633 					prot->slab_flags,
3634 					prot->useroffset, prot->usersize,
3635 					NULL);
3636 
3637 		if (prot->slab == NULL) {
3638 			pr_crit("%s: Can't create sock SLAB cache!\n",
3639 				prot->name);
3640 			goto out;
3641 		}
3642 
3643 		if (req_prot_init(prot))
3644 			goto out_free_request_sock_slab;
3645 
3646 		if (tw_prot_init(prot))
3647 			goto out_free_timewait_sock_slab;
3648 	}
3649 
3650 	mutex_lock(&proto_list_mutex);
3651 	ret = assign_proto_idx(prot);
3652 	if (ret) {
3653 		mutex_unlock(&proto_list_mutex);
3654 		goto out_free_timewait_sock_slab;
3655 	}
3656 	list_add(&prot->node, &proto_list);
3657 	mutex_unlock(&proto_list_mutex);
3658 	return ret;
3659 
3660 out_free_timewait_sock_slab:
3661 	if (alloc_slab)
3662 		tw_prot_cleanup(prot->twsk_prot);
3663 out_free_request_sock_slab:
3664 	if (alloc_slab) {
3665 		req_prot_cleanup(prot->rsk_prot);
3666 
3667 		kmem_cache_destroy(prot->slab);
3668 		prot->slab = NULL;
3669 	}
3670 out:
3671 	return ret;
3672 }
3673 EXPORT_SYMBOL(proto_register);
3674 
3675 void proto_unregister(struct proto *prot)
3676 {
3677 	mutex_lock(&proto_list_mutex);
3678 	release_proto_idx(prot);
3679 	list_del(&prot->node);
3680 	mutex_unlock(&proto_list_mutex);
3681 
3682 	kmem_cache_destroy(prot->slab);
3683 	prot->slab = NULL;
3684 
3685 	req_prot_cleanup(prot->rsk_prot);
3686 	tw_prot_cleanup(prot->twsk_prot);
3687 }
3688 EXPORT_SYMBOL(proto_unregister);
3689 
3690 int sock_load_diag_module(int family, int protocol)
3691 {
3692 	if (!protocol) {
3693 		if (!sock_is_registered(family))
3694 			return -ENOENT;
3695 
3696 		return request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
3697 				      NETLINK_SOCK_DIAG, family);
3698 	}
3699 
3700 #ifdef CONFIG_INET
3701 	if (family == AF_INET &&
3702 	    protocol != IPPROTO_RAW &&
3703 	    protocol < MAX_INET_PROTOS &&
3704 	    !rcu_access_pointer(inet_protos[protocol]))
3705 		return -ENOENT;
3706 #endif
3707 
3708 	return request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK,
3709 			      NETLINK_SOCK_DIAG, family, protocol);
3710 }
3711 EXPORT_SYMBOL(sock_load_diag_module);
3712 
3713 #ifdef CONFIG_PROC_FS
3714 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
3715 	__acquires(proto_list_mutex)
3716 {
3717 	mutex_lock(&proto_list_mutex);
3718 	return seq_list_start_head(&proto_list, *pos);
3719 }
3720 
3721 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3722 {
3723 	return seq_list_next(v, &proto_list, pos);
3724 }
3725 
3726 static void proto_seq_stop(struct seq_file *seq, void *v)
3727 	__releases(proto_list_mutex)
3728 {
3729 	mutex_unlock(&proto_list_mutex);
3730 }
3731 
3732 static char proto_method_implemented(const void *method)
3733 {
3734 	return method == NULL ? 'n' : 'y';
3735 }
3736 static long sock_prot_memory_allocated(struct proto *proto)
3737 {
3738 	return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
3739 }
3740 
3741 static const char *sock_prot_memory_pressure(struct proto *proto)
3742 {
3743 	return proto->memory_pressure != NULL ?
3744 	proto_memory_pressure(proto) ? "yes" : "no" : "NI";
3745 }
3746 
3747 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
3748 {
3749 
3750 	seq_printf(seq, "%-9s %4u %6d  %6ld   %-3s %6u   %-3s  %-10s "
3751 			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
3752 		   proto->name,
3753 		   proto->obj_size,
3754 		   sock_prot_inuse_get(seq_file_net(seq), proto),
3755 		   sock_prot_memory_allocated(proto),
3756 		   sock_prot_memory_pressure(proto),
3757 		   proto->max_header,
3758 		   proto->slab == NULL ? "no" : "yes",
3759 		   module_name(proto->owner),
3760 		   proto_method_implemented(proto->close),
3761 		   proto_method_implemented(proto->connect),
3762 		   proto_method_implemented(proto->disconnect),
3763 		   proto_method_implemented(proto->accept),
3764 		   proto_method_implemented(proto->ioctl),
3765 		   proto_method_implemented(proto->init),
3766 		   proto_method_implemented(proto->destroy),
3767 		   proto_method_implemented(proto->shutdown),
3768 		   proto_method_implemented(proto->setsockopt),
3769 		   proto_method_implemented(proto->getsockopt),
3770 		   proto_method_implemented(proto->sendmsg),
3771 		   proto_method_implemented(proto->recvmsg),
3772 		   proto_method_implemented(proto->sendpage),
3773 		   proto_method_implemented(proto->bind),
3774 		   proto_method_implemented(proto->backlog_rcv),
3775 		   proto_method_implemented(proto->hash),
3776 		   proto_method_implemented(proto->unhash),
3777 		   proto_method_implemented(proto->get_port),
3778 		   proto_method_implemented(proto->enter_memory_pressure));
3779 }
3780 
3781 static int proto_seq_show(struct seq_file *seq, void *v)
3782 {
3783 	if (v == &proto_list)
3784 		seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
3785 			   "protocol",
3786 			   "size",
3787 			   "sockets",
3788 			   "memory",
3789 			   "press",
3790 			   "maxhdr",
3791 			   "slab",
3792 			   "module",
3793 			   "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
3794 	else
3795 		proto_seq_printf(seq, list_entry(v, struct proto, node));
3796 	return 0;
3797 }
3798 
3799 static const struct seq_operations proto_seq_ops = {
3800 	.start  = proto_seq_start,
3801 	.next   = proto_seq_next,
3802 	.stop   = proto_seq_stop,
3803 	.show   = proto_seq_show,
3804 };
3805 
3806 static __net_init int proto_init_net(struct net *net)
3807 {
3808 	if (!proc_create_net("protocols", 0444, net->proc_net, &proto_seq_ops,
3809 			sizeof(struct seq_net_private)))
3810 		return -ENOMEM;
3811 
3812 	return 0;
3813 }
3814 
3815 static __net_exit void proto_exit_net(struct net *net)
3816 {
3817 	remove_proc_entry("protocols", net->proc_net);
3818 }
3819 
3820 
3821 static __net_initdata struct pernet_operations proto_net_ops = {
3822 	.init = proto_init_net,
3823 	.exit = proto_exit_net,
3824 };
3825 
3826 static int __init proto_init(void)
3827 {
3828 	return register_pernet_subsys(&proto_net_ops);
3829 }
3830 
3831 subsys_initcall(proto_init);
3832 
3833 #endif /* PROC_FS */
3834 
3835 #ifdef CONFIG_NET_RX_BUSY_POLL
3836 bool sk_busy_loop_end(void *p, unsigned long start_time)
3837 {
3838 	struct sock *sk = p;
3839 
3840 	return !skb_queue_empty_lockless(&sk->sk_receive_queue) ||
3841 	       sk_busy_loop_timeout(sk, start_time);
3842 }
3843 EXPORT_SYMBOL(sk_busy_loop_end);
3844 #endif /* CONFIG_NET_RX_BUSY_POLL */
3845 
3846 int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len)
3847 {
3848 	if (!sk->sk_prot->bind_add)
3849 		return -EOPNOTSUPP;
3850 	return sk->sk_prot->bind_add(sk, addr, addr_len);
3851 }
3852 EXPORT_SYMBOL(sock_bind_add);
3853