xref: /linux/net/mptcp/sockopt.c (revision 68993ced0f618e36cf33388f1e50223e5e6e78cc)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Multipath TCP
3  *
4  * Copyright (c) 2021, Red Hat.
5  */
6 
7 #define pr_fmt(fmt) "MPTCP: " fmt
8 
9 #include <linux/kernel.h>
10 #include <linux/module.h>
11 #include <net/sock.h>
12 #include <net/protocol.h>
13 #include <net/tcp.h>
14 #include <net/mptcp.h>
15 #include "protocol.h"
16 
17 #define MIN_INFO_OPTLEN_SIZE		16
18 #define MIN_FULL_INFO_OPTLEN_SIZE	40
19 
__mptcp_tcp_fallback(struct mptcp_sock * msk)20 static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk)
21 {
22 	msk_owned_by_me(msk);
23 
24 	if (likely(!__mptcp_check_fallback(msk)))
25 		return NULL;
26 
27 	return msk->first;
28 }
29 
sockopt_seq_reset(const struct sock * sk)30 static u32 sockopt_seq_reset(const struct sock *sk)
31 {
32 	sock_owned_by_me(sk);
33 
34 	/* Highbits contain state.  Allows to distinguish sockopt_seq
35 	 * of listener and established:
36 	 * s0 = new_listener()
37 	 * sockopt(s0) - seq is 1
38 	 * s1 = accept(s0) - s1 inherits seq 1 if listener sk (s0)
39 	 * sockopt(s0) - seq increments to 2 on s0
40 	 * sockopt(s1) // seq increments to 2 on s1 (different option)
41 	 * new ssk completes join, inherits options from s0 // seq 2
42 	 * Needs sync from mptcp join logic, but ssk->seq == msk->seq
43 	 *
44 	 * Set High order bits to sk_state so ssk->seq == msk->seq test
45 	 * will fail.
46 	 */
47 
48 	return (u32)sk->sk_state << 24u;
49 }
50 
sockopt_seq_inc(struct mptcp_sock * msk)51 static void sockopt_seq_inc(struct mptcp_sock *msk)
52 {
53 	u32 seq = (msk->setsockopt_seq + 1) & 0x00ffffff;
54 
55 	msk->setsockopt_seq = sockopt_seq_reset((struct sock *)msk) + seq;
56 }
57 
mptcp_get_int_option(struct mptcp_sock * msk,sockptr_t optval,unsigned int optlen,int * val)58 static int mptcp_get_int_option(struct mptcp_sock *msk, sockptr_t optval,
59 				unsigned int optlen, int *val)
60 {
61 	if (optlen < sizeof(int))
62 		return -EINVAL;
63 
64 	if (copy_from_sockptr(val, optval, sizeof(*val)))
65 		return -EFAULT;
66 
67 	return 0;
68 }
69 
__mptcp_subflow_set_rcvbuf(struct sock * ssk,int val)70 static void __mptcp_subflow_set_rcvbuf(struct sock *ssk, int val)
71 {
72 	WRITE_ONCE(ssk->sk_rcvbuf, val);
73 	tcp_set_rcvbuf(ssk, val);
74 }
75 
mptcp_sol_socket_sync_intval(struct mptcp_sock * msk,int optname,int val)76 static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, int val)
77 {
78 	struct mptcp_subflow_context *subflow;
79 	struct sock *sk = (struct sock *)msk;
80 
81 	lock_sock(sk);
82 	sockopt_seq_inc(msk);
83 
84 	mptcp_for_each_subflow(msk, subflow) {
85 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
86 		bool slow = lock_sock_fast(ssk);
87 
88 		switch (optname) {
89 		case SO_DEBUG:
90 			sock_valbool_flag(ssk, SOCK_DBG, !!val);
91 			break;
92 		case SO_KEEPALIVE:
93 			if (ssk->sk_prot->keepalive)
94 				ssk->sk_prot->keepalive(ssk, !!val);
95 			sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val);
96 			break;
97 		case SO_PRIORITY:
98 			WRITE_ONCE(ssk->sk_priority, val);
99 			break;
100 		case SO_SNDBUF:
101 		case SO_SNDBUFFORCE:
102 			ssk->sk_userlocks |= SOCK_SNDBUF_LOCK;
103 			WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf);
104 			mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf;
105 			break;
106 		case SO_RCVBUF:
107 		case SO_RCVBUFFORCE:
108 			ssk->sk_userlocks |= SOCK_RCVBUF_LOCK;
109 			__mptcp_subflow_set_rcvbuf(ssk, sk->sk_rcvbuf);
110 			break;
111 		case SO_MARK:
112 			if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) {
113 				WRITE_ONCE(ssk->sk_mark, sk->sk_mark);
114 				sk_dst_reset(ssk);
115 			}
116 			break;
117 		case SO_INCOMING_CPU:
118 			WRITE_ONCE(ssk->sk_incoming_cpu, val);
119 			break;
120 		}
121 
122 		subflow->setsockopt_seq = msk->setsockopt_seq;
123 		unlock_sock_fast(ssk, slow);
124 	}
125 
126 	release_sock(sk);
127 }
128 
mptcp_sol_socket_intval(struct mptcp_sock * msk,int optname,int val)129 static int mptcp_sol_socket_intval(struct mptcp_sock *msk, int optname, int val)
130 {
131 	sockptr_t optval = KERNEL_SOCKPTR(&val);
132 	struct sock *sk = (struct sock *)msk;
133 	int ret;
134 
135 	ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname,
136 			      optval, sizeof(val));
137 	if (ret)
138 		return ret;
139 
140 	mptcp_sol_socket_sync_intval(msk, optname, val);
141 	return 0;
142 }
143 
mptcp_so_incoming_cpu(struct mptcp_sock * msk,int val)144 static void mptcp_so_incoming_cpu(struct mptcp_sock *msk, int val)
145 {
146 	struct sock *sk = (struct sock *)msk;
147 
148 	WRITE_ONCE(sk->sk_incoming_cpu, val);
149 
150 	mptcp_sol_socket_sync_intval(msk, SO_INCOMING_CPU, val);
151 }
152 
mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock * msk,int optname,int val)153 static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optname, int val)
154 {
155 	sockptr_t optval = KERNEL_SOCKPTR(&val);
156 	struct mptcp_subflow_context *subflow;
157 	struct sock *sk = (struct sock *)msk;
158 	int ret;
159 
160 	ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname,
161 			      optval, sizeof(val));
162 	if (ret)
163 		return ret;
164 
165 	lock_sock(sk);
166 	mptcp_for_each_subflow(msk, subflow) {
167 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
168 
169 		lock_sock(ssk);
170 		sock_set_timestamp(ssk, optname, !!val);
171 		release_sock(ssk);
172 	}
173 
174 	release_sock(sk);
175 	return 0;
176 }
177 
mptcp_setsockopt_sol_socket_int(struct mptcp_sock * msk,int optname,sockptr_t optval,unsigned int optlen)178 static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname,
179 					   sockptr_t optval,
180 					   unsigned int optlen)
181 {
182 	int val, ret;
183 
184 	ret = mptcp_get_int_option(msk, optval, optlen, &val);
185 	if (ret)
186 		return ret;
187 
188 	switch (optname) {
189 	case SO_KEEPALIVE:
190 	case SO_DEBUG:
191 	case SO_MARK:
192 	case SO_PRIORITY:
193 	case SO_SNDBUF:
194 	case SO_SNDBUFFORCE:
195 	case SO_RCVBUF:
196 	case SO_RCVBUFFORCE:
197 		return mptcp_sol_socket_intval(msk, optname, val);
198 	case SO_INCOMING_CPU:
199 		mptcp_so_incoming_cpu(msk, val);
200 		return 0;
201 	case SO_TIMESTAMP_OLD:
202 	case SO_TIMESTAMP_NEW:
203 	case SO_TIMESTAMPNS_OLD:
204 	case SO_TIMESTAMPNS_NEW:
205 		return mptcp_setsockopt_sol_socket_tstamp(msk, optname, val);
206 	}
207 
208 	return -ENOPROTOOPT;
209 }
210 
mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock * msk,int optname,sockptr_t optval,unsigned int optlen)211 static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk,
212 						    int optname,
213 						    sockptr_t optval,
214 						    unsigned int optlen)
215 {
216 	struct mptcp_subflow_context *subflow;
217 	struct sock *sk = (struct sock *)msk;
218 	struct so_timestamping timestamping;
219 	int ret;
220 
221 	if (optlen == sizeof(timestamping)) {
222 		if (copy_from_sockptr(&timestamping, optval,
223 				      sizeof(timestamping)))
224 			return -EFAULT;
225 	} else if (optlen == sizeof(int)) {
226 		memset(&timestamping, 0, sizeof(timestamping));
227 
228 		if (copy_from_sockptr(&timestamping.flags, optval, sizeof(int)))
229 			return -EFAULT;
230 	} else {
231 		return -EINVAL;
232 	}
233 
234 	ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname,
235 			      KERNEL_SOCKPTR(&timestamping),
236 			      sizeof(timestamping));
237 	if (ret)
238 		return ret;
239 
240 	lock_sock(sk);
241 
242 	mptcp_for_each_subflow(msk, subflow) {
243 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
244 
245 		lock_sock(ssk);
246 		sock_set_timestamping(ssk, optname, timestamping);
247 		release_sock(ssk);
248 	}
249 
250 	release_sock(sk);
251 
252 	return 0;
253 }
254 
mptcp_setsockopt_sol_socket_linger(struct mptcp_sock * msk,sockptr_t optval,unsigned int optlen)255 static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval,
256 					      unsigned int optlen)
257 {
258 	struct mptcp_subflow_context *subflow;
259 	struct sock *sk = (struct sock *)msk;
260 	struct linger ling;
261 	sockptr_t kopt;
262 	int ret;
263 
264 	if (optlen < sizeof(ling))
265 		return -EINVAL;
266 
267 	if (copy_from_sockptr(&ling, optval, sizeof(ling)))
268 		return -EFAULT;
269 
270 	kopt = KERNEL_SOCKPTR(&ling);
271 	ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, SO_LINGER, kopt, sizeof(ling));
272 	if (ret)
273 		return ret;
274 
275 	lock_sock(sk);
276 	sockopt_seq_inc(msk);
277 	mptcp_for_each_subflow(msk, subflow) {
278 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
279 		bool slow = lock_sock_fast(ssk);
280 
281 		if (!ling.l_onoff) {
282 			sock_reset_flag(ssk, SOCK_LINGER);
283 		} else {
284 			ssk->sk_lingertime = sk->sk_lingertime;
285 			sock_set_flag(ssk, SOCK_LINGER);
286 		}
287 
288 		subflow->setsockopt_seq = msk->setsockopt_seq;
289 		unlock_sock_fast(ssk, slow);
290 	}
291 
292 	release_sock(sk);
293 	return 0;
294 }
295 
mptcp_setsockopt_sol_socket(struct mptcp_sock * msk,int optname,sockptr_t optval,unsigned int optlen)296 static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
297 				       sockptr_t optval, unsigned int optlen)
298 {
299 	struct sock *sk = (struct sock *)msk;
300 	struct sock *ssk;
301 	int ret;
302 
303 	switch (optname) {
304 	case SO_REUSEPORT:
305 	case SO_REUSEADDR:
306 	case SO_BINDTODEVICE:
307 	case SO_BINDTOIFINDEX:
308 		lock_sock(sk);
309 		ssk = __mptcp_nmpc_sk(msk);
310 		if (IS_ERR(ssk)) {
311 			release_sock(sk);
312 			return PTR_ERR(ssk);
313 		}
314 
315 		ret = sk_setsockopt(ssk, SOL_SOCKET, optname, optval, optlen);
316 		if (ret == 0) {
317 			if (optname == SO_REUSEPORT)
318 				sk->sk_reuseport = ssk->sk_reuseport;
319 			else if (optname == SO_REUSEADDR)
320 				sk->sk_reuse = ssk->sk_reuse;
321 			else if (optname == SO_BINDTODEVICE)
322 				sk->sk_bound_dev_if = ssk->sk_bound_dev_if;
323 			else if (optname == SO_BINDTOIFINDEX)
324 				sk->sk_bound_dev_if = ssk->sk_bound_dev_if;
325 		}
326 		release_sock(sk);
327 		return ret;
328 	case SO_KEEPALIVE:
329 	case SO_PRIORITY:
330 	case SO_SNDBUF:
331 	case SO_SNDBUFFORCE:
332 	case SO_RCVBUF:
333 	case SO_RCVBUFFORCE:
334 	case SO_MARK:
335 	case SO_INCOMING_CPU:
336 	case SO_DEBUG:
337 	case SO_TIMESTAMP_OLD:
338 	case SO_TIMESTAMP_NEW:
339 	case SO_TIMESTAMPNS_OLD:
340 	case SO_TIMESTAMPNS_NEW:
341 		return mptcp_setsockopt_sol_socket_int(msk, optname, optval,
342 						       optlen);
343 	case SO_TIMESTAMPING_OLD:
344 	case SO_TIMESTAMPING_NEW:
345 		return mptcp_setsockopt_sol_socket_timestamping(msk, optname,
346 								optval, optlen);
347 	case SO_LINGER:
348 		return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen);
349 	case SO_RCVLOWAT:
350 	case SO_RCVTIMEO_OLD:
351 	case SO_RCVTIMEO_NEW:
352 	case SO_SNDTIMEO_OLD:
353 	case SO_SNDTIMEO_NEW:
354 	case SO_BUSY_POLL:
355 	case SO_PREFER_BUSY_POLL:
356 	case SO_BUSY_POLL_BUDGET:
357 		/* No need to copy: only relevant for msk */
358 		return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen);
359 	case SO_NO_CHECK:
360 	case SO_DONTROUTE:
361 	case SO_BROADCAST:
362 	case SO_BSDCOMPAT:
363 	case SO_PASSCRED:
364 	case SO_PASSPIDFD:
365 	case SO_PASSSEC:
366 	case SO_RXQ_OVFL:
367 	case SO_WIFI_STATUS:
368 	case SO_NOFCS:
369 	case SO_SELECT_ERR_QUEUE:
370 		return 0;
371 	}
372 
373 	/* SO_OOBINLINE is not supported, let's avoid the related mess
374 	 * SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF,
375 	 * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER,
376 	 * we must be careful with subflows
377 	 *
378 	 * SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks
379 	 * explicitly the sk_protocol field
380 	 *
381 	 * SO_PEEK_OFF is unsupported, as it is for plain TCP
382 	 * SO_MAX_PACING_RATE is unsupported, we must be careful with subflows
383 	 * SO_CNX_ADVICE is currently unsupported, could possibly be relevant,
384 	 * but likely needs careful design
385 	 *
386 	 * SO_ZEROCOPY is currently unsupported, TODO in sndmsg
387 	 * SO_TXTIME is currently unsupported
388 	 */
389 
390 	return -EOPNOTSUPP;
391 }
392 
mptcp_setsockopt_v6(struct mptcp_sock * msk,int optname,sockptr_t optval,unsigned int optlen)393 static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
394 			       sockptr_t optval, unsigned int optlen)
395 {
396 	struct sock *sk = (struct sock *)msk;
397 	int ret = -EOPNOTSUPP;
398 	struct sock *ssk;
399 
400 	switch (optname) {
401 	case IPV6_V6ONLY:
402 	case IPV6_TRANSPARENT:
403 	case IPV6_FREEBIND:
404 		lock_sock(sk);
405 		ssk = __mptcp_nmpc_sk(msk);
406 		if (IS_ERR(ssk)) {
407 			release_sock(sk);
408 			return PTR_ERR(ssk);
409 		}
410 
411 		ret = tcp_setsockopt(ssk, SOL_IPV6, optname, optval, optlen);
412 		if (ret != 0) {
413 			release_sock(sk);
414 			return ret;
415 		}
416 
417 		sockopt_seq_inc(msk);
418 
419 		switch (optname) {
420 		case IPV6_V6ONLY:
421 			sk->sk_ipv6only = ssk->sk_ipv6only;
422 			break;
423 		case IPV6_TRANSPARENT:
424 			inet_assign_bit(TRANSPARENT, sk,
425 					inet_test_bit(TRANSPARENT, ssk));
426 			break;
427 		case IPV6_FREEBIND:
428 			inet_assign_bit(FREEBIND, sk,
429 					inet_test_bit(FREEBIND, ssk));
430 			break;
431 		}
432 
433 		release_sock(sk);
434 		break;
435 	}
436 
437 	return ret;
438 }
439 
mptcp_supported_sockopt(int level,int optname)440 static bool mptcp_supported_sockopt(int level, int optname)
441 {
442 	if (level == SOL_IP) {
443 		switch (optname) {
444 		/* should work fine */
445 		case IP_FREEBIND:
446 		case IP_TRANSPARENT:
447 		case IP_BIND_ADDRESS_NO_PORT:
448 		case IP_LOCAL_PORT_RANGE:
449 
450 		/* the following are control cmsg related */
451 		case IP_PKTINFO:
452 		case IP_RECVTTL:
453 		case IP_RECVTOS:
454 		case IP_RECVOPTS:
455 		case IP_RETOPTS:
456 		case IP_PASSSEC:
457 		case IP_RECVORIGDSTADDR:
458 		case IP_CHECKSUM:
459 		case IP_RECVFRAGSIZE:
460 
461 		/* common stuff that need some love */
462 		case IP_TOS:
463 		case IP_TTL:
464 		case IP_MTU_DISCOVER:
465 		case IP_RECVERR:
466 
467 		/* possibly less common may deserve some love */
468 		case IP_MINTTL:
469 
470 		/* the following is apparently a no-op for plain TCP */
471 		case IP_RECVERR_RFC4884:
472 			return true;
473 		}
474 
475 		/* IP_OPTIONS is not supported, needs subflow care */
476 		/* IP_HDRINCL, IP_NODEFRAG are not supported, RAW specific */
477 		/* IP_MULTICAST_TTL, IP_MULTICAST_LOOP, IP_UNICAST_IF,
478 		 * IP_ADD_MEMBERSHIP, IP_ADD_SOURCE_MEMBERSHIP, IP_DROP_MEMBERSHIP,
479 		 * IP_DROP_SOURCE_MEMBERSHIP, IP_BLOCK_SOURCE, IP_UNBLOCK_SOURCE,
480 		 * MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP MCAST_JOIN_SOURCE_GROUP,
481 		 * MCAST_LEAVE_SOURCE_GROUP, MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE,
482 		 * MCAST_MSFILTER, IP_MULTICAST_ALL are not supported, better not deal
483 		 * with mcast stuff
484 		 */
485 		/* IP_IPSEC_POLICY, IP_XFRM_POLICY are nut supported, unrelated here */
486 		return false;
487 	}
488 	if (level == SOL_IPV6) {
489 		switch (optname) {
490 		case IPV6_V6ONLY:
491 
492 		/* the following are control cmsg related */
493 		case IPV6_RECVPKTINFO:
494 		case IPV6_2292PKTINFO:
495 		case IPV6_RECVHOPLIMIT:
496 		case IPV6_2292HOPLIMIT:
497 		case IPV6_RECVRTHDR:
498 		case IPV6_2292RTHDR:
499 		case IPV6_RECVHOPOPTS:
500 		case IPV6_2292HOPOPTS:
501 		case IPV6_RECVDSTOPTS:
502 		case IPV6_2292DSTOPTS:
503 		case IPV6_RECVTCLASS:
504 		case IPV6_FLOWINFO:
505 		case IPV6_RECVPATHMTU:
506 		case IPV6_RECVORIGDSTADDR:
507 		case IPV6_RECVFRAGSIZE:
508 
509 		/* the following ones need some love but are quite common */
510 		case IPV6_TCLASS:
511 		case IPV6_TRANSPARENT:
512 		case IPV6_FREEBIND:
513 		case IPV6_PKTINFO:
514 		case IPV6_2292PKTOPTIONS:
515 		case IPV6_UNICAST_HOPS:
516 		case IPV6_MTU_DISCOVER:
517 		case IPV6_MTU:
518 		case IPV6_RECVERR:
519 		case IPV6_FLOWINFO_SEND:
520 		case IPV6_FLOWLABEL_MGR:
521 		case IPV6_MINHOPCOUNT:
522 		case IPV6_DONTFRAG:
523 		case IPV6_AUTOFLOWLABEL:
524 
525 		/* the following one is a no-op for plain TCP */
526 		case IPV6_RECVERR_RFC4884:
527 			return true;
528 		}
529 
530 		/* IPV6_HOPOPTS, IPV6_RTHDRDSTOPTS, IPV6_RTHDR, IPV6_DSTOPTS are
531 		 * not supported
532 		 */
533 		/* IPV6_MULTICAST_HOPS, IPV6_MULTICAST_LOOP, IPV6_UNICAST_IF,
534 		 * IPV6_MULTICAST_IF, IPV6_ADDRFORM,
535 		 * IPV6_ADD_MEMBERSHIP, IPV6_DROP_MEMBERSHIP, IPV6_JOIN_ANYCAST,
536 		 * IPV6_LEAVE_ANYCAST, IPV6_MULTICAST_ALL, MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP,
537 		 * MCAST_JOIN_SOURCE_GROUP, MCAST_LEAVE_SOURCE_GROUP,
538 		 * MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, MCAST_MSFILTER
539 		 * are not supported better not deal with mcast
540 		 */
541 		/* IPV6_ROUTER_ALERT, IPV6_ROUTER_ALERT_ISOLATE are not supported, since are evil */
542 
543 		/* IPV6_IPSEC_POLICY, IPV6_XFRM_POLICY are not supported */
544 		/* IPV6_ADDR_PREFERENCES is not supported, we must be careful with subflows */
545 		return false;
546 	}
547 	if (level == SOL_TCP) {
548 		switch (optname) {
549 		/* the following are no-op or should work just fine */
550 		case TCP_THIN_DUPACK:
551 		case TCP_DEFER_ACCEPT:
552 
553 		/* the following need some love */
554 		case TCP_MAXSEG:
555 		case TCP_NODELAY:
556 		case TCP_THIN_LINEAR_TIMEOUTS:
557 		case TCP_CONGESTION:
558 		case TCP_CORK:
559 		case TCP_KEEPIDLE:
560 		case TCP_KEEPINTVL:
561 		case TCP_KEEPCNT:
562 		case TCP_SYNCNT:
563 		case TCP_SAVE_SYN:
564 		case TCP_LINGER2:
565 		case TCP_WINDOW_CLAMP:
566 		case TCP_QUICKACK:
567 		case TCP_USER_TIMEOUT:
568 		case TCP_TIMESTAMP:
569 		case TCP_NOTSENT_LOWAT:
570 		case TCP_TX_DELAY:
571 		case TCP_INQ:
572 		case TCP_FASTOPEN:
573 		case TCP_FASTOPEN_CONNECT:
574 		case TCP_FASTOPEN_KEY:
575 		case TCP_FASTOPEN_NO_COOKIE:
576 			return true;
577 		}
578 
579 		/* TCP_MD5SIG, TCP_MD5SIG_EXT are not supported, MD5 is not compatible with MPTCP */
580 
581 		/* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS,
582 		 * TCP_REPAIR_WINDOW are not supported, better avoid this mess
583 		 */
584 	}
585 	return false;
586 }
587 
mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock * msk,sockptr_t optval,unsigned int optlen)588 static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t optval,
589 					       unsigned int optlen)
590 {
591 	struct mptcp_subflow_context *subflow;
592 	struct sock *sk = (struct sock *)msk;
593 	char name[TCP_CA_NAME_MAX];
594 	bool cap_net_admin;
595 	int ret;
596 
597 	if (optlen < 1)
598 		return -EINVAL;
599 
600 	ret = strncpy_from_sockptr(name, optval,
601 				   min_t(long, TCP_CA_NAME_MAX - 1, optlen));
602 	if (ret < 0)
603 		return -EFAULT;
604 
605 	name[ret] = 0;
606 
607 	cap_net_admin = ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN);
608 
609 	ret = 0;
610 	lock_sock(sk);
611 	sockopt_seq_inc(msk);
612 	mptcp_for_each_subflow(msk, subflow) {
613 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
614 		int err;
615 
616 		lock_sock(ssk);
617 		err = tcp_set_congestion_control(ssk, name, true, cap_net_admin);
618 		if (err < 0 && ret == 0)
619 			ret = err;
620 		subflow->setsockopt_seq = msk->setsockopt_seq;
621 		release_sock(ssk);
622 	}
623 
624 	if (ret == 0)
625 		strscpy(msk->ca_name, name, sizeof(msk->ca_name));
626 
627 	release_sock(sk);
628 	return ret;
629 }
630 
__mptcp_setsockopt_set_val(struct mptcp_sock * msk,int max,int (* set_val)(struct sock *,int),int * msk_val,int val)631 static int __mptcp_setsockopt_set_val(struct mptcp_sock *msk, int max,
632 				      int (*set_val)(struct sock *, int),
633 				      int *msk_val, int val)
634 {
635 	struct mptcp_subflow_context *subflow;
636 	int err = 0;
637 
638 	mptcp_for_each_subflow(msk, subflow) {
639 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
640 		int ret;
641 
642 		lock_sock(ssk);
643 		ret = set_val(ssk, val);
644 		err = err ? : ret;
645 		release_sock(ssk);
646 	}
647 
648 	if (!err) {
649 		*msk_val = val;
650 		sockopt_seq_inc(msk);
651 	}
652 
653 	return err;
654 }
655 
__mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock * msk,int val)656 static int __mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, int val)
657 {
658 	struct mptcp_subflow_context *subflow;
659 	struct sock *sk = (struct sock *)msk;
660 
661 	sockopt_seq_inc(msk);
662 	msk->cork = !!val;
663 	mptcp_for_each_subflow(msk, subflow) {
664 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
665 
666 		lock_sock(ssk);
667 		__tcp_sock_set_cork(ssk, !!val);
668 		release_sock(ssk);
669 	}
670 	if (!val)
671 		mptcp_check_and_set_pending(sk);
672 
673 	return 0;
674 }
675 
__mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock * msk,int val)676 static int __mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, int val)
677 {
678 	struct mptcp_subflow_context *subflow;
679 	struct sock *sk = (struct sock *)msk;
680 
681 	sockopt_seq_inc(msk);
682 	msk->nodelay = !!val;
683 	mptcp_for_each_subflow(msk, subflow) {
684 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
685 
686 		lock_sock(ssk);
687 		__tcp_sock_set_nodelay(ssk, !!val);
688 		release_sock(ssk);
689 	}
690 	if (val)
691 		mptcp_check_and_set_pending(sk);
692 	return 0;
693 }
694 
mptcp_setsockopt_sol_ip_set(struct mptcp_sock * msk,int optname,sockptr_t optval,unsigned int optlen)695 static int mptcp_setsockopt_sol_ip_set(struct mptcp_sock *msk, int optname,
696 				       sockptr_t optval, unsigned int optlen)
697 {
698 	struct sock *sk = (struct sock *)msk;
699 	struct sock *ssk;
700 	int err;
701 
702 	err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen);
703 	if (err != 0)
704 		return err;
705 
706 	lock_sock(sk);
707 
708 	ssk = __mptcp_nmpc_sk(msk);
709 	if (IS_ERR(ssk)) {
710 		release_sock(sk);
711 		return PTR_ERR(ssk);
712 	}
713 
714 	switch (optname) {
715 	case IP_FREEBIND:
716 		inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk));
717 		break;
718 	case IP_TRANSPARENT:
719 		inet_assign_bit(TRANSPARENT, ssk,
720 				inet_test_bit(TRANSPARENT, sk));
721 		break;
722 	case IP_BIND_ADDRESS_NO_PORT:
723 		inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk,
724 				inet_test_bit(BIND_ADDRESS_NO_PORT, sk));
725 		break;
726 	case IP_LOCAL_PORT_RANGE:
727 		WRITE_ONCE(inet_sk(ssk)->local_port_range,
728 			   READ_ONCE(inet_sk(sk)->local_port_range));
729 		break;
730 	default:
731 		release_sock(sk);
732 		WARN_ON_ONCE(1);
733 		return -EOPNOTSUPP;
734 	}
735 
736 	sockopt_seq_inc(msk);
737 	release_sock(sk);
738 	return 0;
739 }
740 
mptcp_setsockopt_v4_set_tos(struct mptcp_sock * msk,int optname,sockptr_t optval,unsigned int optlen)741 static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname,
742 				       sockptr_t optval, unsigned int optlen)
743 {
744 	struct mptcp_subflow_context *subflow;
745 	struct sock *sk = (struct sock *)msk;
746 	int err, val;
747 
748 	err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen);
749 
750 	if (err != 0)
751 		return err;
752 
753 	lock_sock(sk);
754 	sockopt_seq_inc(msk);
755 	val = READ_ONCE(inet_sk(sk)->tos);
756 	mptcp_for_each_subflow(msk, subflow) {
757 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
758 		bool slow;
759 
760 		slow = lock_sock_fast(ssk);
761 		__ip_sock_set_tos(ssk, val);
762 		unlock_sock_fast(ssk, slow);
763 	}
764 	release_sock(sk);
765 
766 	return 0;
767 }
768 
mptcp_setsockopt_v4(struct mptcp_sock * msk,int optname,sockptr_t optval,unsigned int optlen)769 static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname,
770 			       sockptr_t optval, unsigned int optlen)
771 {
772 	switch (optname) {
773 	case IP_FREEBIND:
774 	case IP_TRANSPARENT:
775 	case IP_BIND_ADDRESS_NO_PORT:
776 	case IP_LOCAL_PORT_RANGE:
777 		return mptcp_setsockopt_sol_ip_set(msk, optname, optval, optlen);
778 	case IP_TOS:
779 		return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen);
780 	}
781 
782 	return -EOPNOTSUPP;
783 }
784 
mptcp_setsockopt_first_sf_only(struct mptcp_sock * msk,int level,int optname,sockptr_t optval,unsigned int optlen)785 static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname,
786 					  sockptr_t optval, unsigned int optlen)
787 {
788 	struct sock *sk = (struct sock *)msk;
789 	struct sock *ssk;
790 	int ret;
791 
792 	/* Limit to first subflow, before the connection establishment */
793 	lock_sock(sk);
794 	ssk = __mptcp_nmpc_sk(msk);
795 	if (IS_ERR(ssk)) {
796 		ret = PTR_ERR(ssk);
797 		goto unlock;
798 	}
799 
800 	ret = tcp_setsockopt(ssk, level, optname, optval, optlen);
801 
802 unlock:
803 	release_sock(sk);
804 	return ret;
805 }
806 
mptcp_setsockopt_all_sf(struct mptcp_sock * msk,int level,int optname,sockptr_t optval,unsigned int optlen)807 static int mptcp_setsockopt_all_sf(struct mptcp_sock *msk, int level,
808 				   int optname, sockptr_t optval,
809 				   unsigned int optlen)
810 {
811 	struct mptcp_subflow_context *subflow;
812 	int ret = 0;
813 
814 	mptcp_for_each_subflow(msk, subflow) {
815 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
816 
817 		ret = tcp_setsockopt(ssk, level, optname, optval, optlen);
818 		if (ret)
819 			break;
820 	}
821 
822 	if (!ret)
823 		sockopt_seq_inc(msk);
824 
825 	return ret;
826 }
827 
mptcp_setsockopt_sol_tcp(struct mptcp_sock * msk,int optname,sockptr_t optval,unsigned int optlen)828 static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
829 				    sockptr_t optval, unsigned int optlen)
830 {
831 	struct sock *sk = (void *)msk;
832 	int ret, val;
833 
834 	switch (optname) {
835 	case TCP_ULP:
836 		return -EOPNOTSUPP;
837 	case TCP_CONGESTION:
838 		return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen);
839 	case TCP_DEFER_ACCEPT:
840 		/* See tcp.c: TCP_DEFER_ACCEPT does not fail */
841 		mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen);
842 		return 0;
843 	case TCP_FASTOPEN:
844 	case TCP_FASTOPEN_CONNECT:
845 	case TCP_FASTOPEN_KEY:
846 	case TCP_FASTOPEN_NO_COOKIE:
847 		return mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname,
848 						      optval, optlen);
849 	}
850 
851 	ret = mptcp_get_int_option(msk, optval, optlen, &val);
852 	if (ret)
853 		return ret;
854 
855 	lock_sock(sk);
856 	switch (optname) {
857 	case TCP_INQ:
858 		if (val < 0 || val > 1)
859 			ret = -EINVAL;
860 		else
861 			msk->recvmsg_inq = !!val;
862 		break;
863 	case TCP_NOTSENT_LOWAT:
864 		WRITE_ONCE(msk->notsent_lowat, val);
865 		mptcp_write_space(sk);
866 		break;
867 	case TCP_CORK:
868 		ret = __mptcp_setsockopt_sol_tcp_cork(msk, val);
869 		break;
870 	case TCP_NODELAY:
871 		ret = __mptcp_setsockopt_sol_tcp_nodelay(msk, val);
872 		break;
873 	case TCP_KEEPIDLE:
874 		ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPIDLE,
875 						 &tcp_sock_set_keepidle_locked,
876 						 &msk->keepalive_idle, val);
877 		break;
878 	case TCP_KEEPINTVL:
879 		ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPINTVL,
880 						 &tcp_sock_set_keepintvl,
881 						 &msk->keepalive_intvl, val);
882 		break;
883 	case TCP_KEEPCNT:
884 		ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPCNT,
885 						 &tcp_sock_set_keepcnt,
886 						 &msk->keepalive_cnt,
887 						 val);
888 		break;
889 	case TCP_MAXSEG:
890 		msk->maxseg = val;
891 		ret = mptcp_setsockopt_all_sf(msk, SOL_TCP, optname, optval,
892 					      optlen);
893 		break;
894 	default:
895 		ret = -ENOPROTOOPT;
896 	}
897 
898 	release_sock(sk);
899 	return ret;
900 }
901 
mptcp_setsockopt(struct sock * sk,int level,int optname,sockptr_t optval,unsigned int optlen)902 int mptcp_setsockopt(struct sock *sk, int level, int optname,
903 		     sockptr_t optval, unsigned int optlen)
904 {
905 	struct mptcp_sock *msk = mptcp_sk(sk);
906 	struct sock *ssk;
907 
908 	pr_debug("msk=%p\n", msk);
909 
910 	if (level == SOL_SOCKET)
911 		return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen);
912 
913 	if (!mptcp_supported_sockopt(level, optname))
914 		return -ENOPROTOOPT;
915 
916 	/* @@ the meaning of setsockopt() when the socket is connected and
917 	 * there are multiple subflows is not yet defined. It is up to the
918 	 * MPTCP-level socket to configure the subflows until the subflow
919 	 * is in TCP fallback, when TCP socket options are passed through
920 	 * to the one remaining subflow.
921 	 */
922 	lock_sock(sk);
923 	ssk = __mptcp_tcp_fallback(msk);
924 	release_sock(sk);
925 	if (ssk)
926 		return tcp_setsockopt(ssk, level, optname, optval, optlen);
927 
928 	if (level == SOL_IP)
929 		return mptcp_setsockopt_v4(msk, optname, optval, optlen);
930 
931 	if (level == SOL_IPV6)
932 		return mptcp_setsockopt_v6(msk, optname, optval, optlen);
933 
934 	if (level == SOL_TCP)
935 		return mptcp_setsockopt_sol_tcp(msk, optname, optval, optlen);
936 
937 	return -EOPNOTSUPP;
938 }
939 
mptcp_getsockopt_first_sf_only(struct mptcp_sock * msk,int level,int optname,char __user * optval,int __user * optlen)940 static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname,
941 					  char __user *optval, int __user *optlen)
942 {
943 	struct sock *sk = (struct sock *)msk;
944 	struct sock *ssk;
945 	int ret;
946 
947 	lock_sock(sk);
948 	ssk = msk->first;
949 	if (ssk)
950 		goto get;
951 
952 	ssk = __mptcp_nmpc_sk(msk);
953 	if (IS_ERR(ssk)) {
954 		ret = PTR_ERR(ssk);
955 		goto out;
956 	}
957 
958 get:
959 	ret = tcp_getsockopt(ssk, level, optname, optval, optlen);
960 
961 out:
962 	release_sock(sk);
963 	return ret;
964 }
965 
mptcp_diag_fill_info(struct mptcp_sock * msk,struct mptcp_info * info)966 void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
967 {
968 	struct sock *sk = (struct sock *)msk;
969 	u32 flags = 0;
970 	bool slow;
971 	u32 now;
972 
973 	memset(info, 0, sizeof(*info));
974 
975 	info->mptcpi_extra_subflows = READ_ONCE(msk->pm.extra_subflows);
976 	info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled);
977 	info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted);
978 	info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used);
979 
980 	if (inet_sk_state_load(sk) == TCP_LISTEN)
981 		return;
982 
983 	/* The following limits only make sense for the in-kernel PM */
984 	if (mptcp_pm_is_kernel(msk)) {
985 		info->mptcpi_limit_extra_subflows =
986 			mptcp_pm_get_limit_extra_subflows(msk);
987 		info->mptcpi_endp_signal_max =
988 			mptcp_pm_get_endp_signal_max(msk);
989 		info->mptcpi_limit_add_addr_accepted =
990 			mptcp_pm_get_limit_add_addr_accepted(msk);
991 		info->mptcpi_endp_subflow_max =
992 			mptcp_pm_get_endp_subflow_max(msk);
993 		info->mptcpi_endp_laminar_max =
994 			mptcp_pm_get_endp_laminar_max(msk);
995 		info->mptcpi_endp_fullmesh_max =
996 			mptcp_pm_get_endp_fullmesh_max(msk);
997 	}
998 
999 	if (__mptcp_check_fallback(msk))
1000 		flags |= MPTCP_INFO_FLAG_FALLBACK;
1001 	if (READ_ONCE(msk->can_ack))
1002 		flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED;
1003 	info->mptcpi_flags = flags;
1004 
1005 	slow = lock_sock_fast(sk);
1006 	info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled);
1007 	info->mptcpi_token = msk->token;
1008 	info->mptcpi_write_seq = msk->write_seq;
1009 	info->mptcpi_retransmits = inet_csk(sk)->icsk_retransmits;
1010 	info->mptcpi_bytes_sent = msk->bytes_sent;
1011 	info->mptcpi_bytes_received = msk->bytes_received;
1012 	info->mptcpi_bytes_retrans = msk->bytes_retrans;
1013 	info->mptcpi_subflows_total = info->mptcpi_extra_subflows +
1014 		__mptcp_has_initial_subflow(msk);
1015 	now = tcp_jiffies32;
1016 	info->mptcpi_last_data_sent = jiffies_to_msecs(now - msk->last_data_sent);
1017 	info->mptcpi_last_data_recv = jiffies_to_msecs(now - msk->last_data_recv);
1018 	unlock_sock_fast(sk, slow);
1019 
1020 	mptcp_data_lock(sk);
1021 	info->mptcpi_last_ack_recv = jiffies_to_msecs(now - msk->last_ack_recv);
1022 	info->mptcpi_snd_una = msk->snd_una;
1023 	info->mptcpi_rcv_nxt = msk->ack_seq;
1024 	info->mptcpi_bytes_acked = msk->bytes_acked;
1025 	mptcp_data_unlock(sk);
1026 }
1027 EXPORT_SYMBOL_GPL(mptcp_diag_fill_info);
1028 
mptcp_getsockopt_info(struct mptcp_sock * msk,char __user * optval,int __user * optlen)1029 static int mptcp_getsockopt_info(struct mptcp_sock *msk, char __user *optval, int __user *optlen)
1030 {
1031 	struct mptcp_info m_info;
1032 	int len;
1033 
1034 	if (get_user(len, optlen))
1035 		return -EFAULT;
1036 
1037 	/* When used only to check if a fallback to TCP happened. */
1038 	if (len == 0)
1039 		return 0;
1040 
1041 	len = min_t(unsigned int, len, sizeof(struct mptcp_info));
1042 
1043 	mptcp_diag_fill_info(msk, &m_info);
1044 
1045 	if (put_user(len, optlen))
1046 		return -EFAULT;
1047 
1048 	if (copy_to_user(optval, &m_info, len))
1049 		return -EFAULT;
1050 
1051 	return 0;
1052 }
1053 
mptcp_put_subflow_data(struct mptcp_subflow_data * sfd,char __user * optval,u32 copied,int __user * optlen)1054 static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd,
1055 				  char __user *optval,
1056 				  u32 copied,
1057 				  int __user *optlen)
1058 {
1059 	u32 copylen = min_t(u32, sfd->size_subflow_data, sizeof(*sfd));
1060 
1061 	if (copied)
1062 		copied += sfd->size_subflow_data;
1063 	else
1064 		copied = copylen;
1065 
1066 	if (put_user(copied, optlen))
1067 		return -EFAULT;
1068 
1069 	if (copy_to_user(optval, sfd, copylen))
1070 		return -EFAULT;
1071 
1072 	return 0;
1073 }
1074 
mptcp_get_subflow_data(struct mptcp_subflow_data * sfd,char __user * optval,int __user * optlen)1075 static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd,
1076 				  char __user *optval,
1077 				  int __user *optlen)
1078 {
1079 	int len, copylen;
1080 
1081 	if (get_user(len, optlen))
1082 		return -EFAULT;
1083 
1084 	/* if mptcp_subflow_data size is changed, need to adjust
1085 	 * this function to deal with programs using old version.
1086 	 */
1087 	BUILD_BUG_ON(sizeof(*sfd) != MIN_INFO_OPTLEN_SIZE);
1088 
1089 	if (len < MIN_INFO_OPTLEN_SIZE)
1090 		return -EINVAL;
1091 
1092 	memset(sfd, 0, sizeof(*sfd));
1093 
1094 	copylen = min_t(unsigned int, len, sizeof(*sfd));
1095 	if (copy_from_user(sfd, optval, copylen))
1096 		return -EFAULT;
1097 
1098 	/* size_subflow_data is u32, but len is signed */
1099 	if (sfd->size_subflow_data > INT_MAX ||
1100 	    sfd->size_user > INT_MAX)
1101 		return -EINVAL;
1102 
1103 	if (sfd->size_subflow_data < MIN_INFO_OPTLEN_SIZE ||
1104 	    sfd->size_subflow_data > len)
1105 		return -EINVAL;
1106 
1107 	if (sfd->num_subflows || sfd->size_kernel)
1108 		return -EINVAL;
1109 
1110 	return len - sfd->size_subflow_data;
1111 }
1112 
mptcp_getsockopt_tcpinfo(struct mptcp_sock * msk,char __user * optval,int __user * optlen)1113 static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval,
1114 				    int __user *optlen)
1115 {
1116 	struct mptcp_subflow_context *subflow;
1117 	struct sock *sk = (struct sock *)msk;
1118 	unsigned int sfcount = 0, copied = 0;
1119 	struct mptcp_subflow_data sfd;
1120 	char __user *infoptr;
1121 	int len;
1122 
1123 	len = mptcp_get_subflow_data(&sfd, optval, optlen);
1124 	if (len < 0)
1125 		return len;
1126 
1127 	sfd.size_kernel = sizeof(struct tcp_info);
1128 	sfd.size_user = min_t(unsigned int, sfd.size_user,
1129 			      sizeof(struct tcp_info));
1130 
1131 	infoptr = optval + sfd.size_subflow_data;
1132 
1133 	lock_sock(sk);
1134 
1135 	mptcp_for_each_subflow(msk, subflow) {
1136 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
1137 
1138 		++sfcount;
1139 
1140 		if (len && len >= sfd.size_user) {
1141 			struct tcp_info info;
1142 
1143 			tcp_get_info(ssk, &info);
1144 
1145 			if (copy_to_user(infoptr, &info, sfd.size_user)) {
1146 				release_sock(sk);
1147 				return -EFAULT;
1148 			}
1149 
1150 			infoptr += sfd.size_user;
1151 			copied += sfd.size_user;
1152 			len -= sfd.size_user;
1153 		}
1154 	}
1155 
1156 	release_sock(sk);
1157 
1158 	sfd.num_subflows = sfcount;
1159 
1160 	if (mptcp_put_subflow_data(&sfd, optval, copied, optlen))
1161 		return -EFAULT;
1162 
1163 	return 0;
1164 }
1165 
mptcp_get_sub_addrs(const struct sock * sk,struct mptcp_subflow_addrs * a)1166 static void mptcp_get_sub_addrs(const struct sock *sk, struct mptcp_subflow_addrs *a)
1167 {
1168 	const struct inet_sock *inet = inet_sk(sk);
1169 
1170 	memset(a, 0, sizeof(*a));
1171 
1172 	if (sk->sk_family == AF_INET) {
1173 		a->sin_local.sin_family = AF_INET;
1174 		a->sin_local.sin_port = inet->inet_sport;
1175 		a->sin_local.sin_addr.s_addr = inet->inet_rcv_saddr;
1176 
1177 		if (!a->sin_local.sin_addr.s_addr)
1178 			a->sin_local.sin_addr.s_addr = inet->inet_saddr;
1179 
1180 		a->sin_remote.sin_family = AF_INET;
1181 		a->sin_remote.sin_port = inet->inet_dport;
1182 		a->sin_remote.sin_addr.s_addr = inet->inet_daddr;
1183 #if IS_ENABLED(CONFIG_IPV6)
1184 	} else if (sk->sk_family == AF_INET6) {
1185 		const struct ipv6_pinfo *np = inet6_sk(sk);
1186 
1187 		if (WARN_ON_ONCE(!np))
1188 			return;
1189 
1190 		a->sin6_local.sin6_family = AF_INET6;
1191 		a->sin6_local.sin6_port = inet->inet_sport;
1192 
1193 		if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
1194 			a->sin6_local.sin6_addr = np->saddr;
1195 		else
1196 			a->sin6_local.sin6_addr = sk->sk_v6_rcv_saddr;
1197 
1198 		a->sin6_remote.sin6_family = AF_INET6;
1199 		a->sin6_remote.sin6_port = inet->inet_dport;
1200 		a->sin6_remote.sin6_addr = sk->sk_v6_daddr;
1201 #endif
1202 	}
1203 }
1204 
mptcp_getsockopt_subflow_addrs(struct mptcp_sock * msk,char __user * optval,int __user * optlen)1205 static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *optval,
1206 					  int __user *optlen)
1207 {
1208 	struct mptcp_subflow_context *subflow;
1209 	struct sock *sk = (struct sock *)msk;
1210 	unsigned int sfcount = 0, copied = 0;
1211 	struct mptcp_subflow_data sfd;
1212 	char __user *addrptr;
1213 	int len;
1214 
1215 	len = mptcp_get_subflow_data(&sfd, optval, optlen);
1216 	if (len < 0)
1217 		return len;
1218 
1219 	sfd.size_kernel = sizeof(struct mptcp_subflow_addrs);
1220 	sfd.size_user = min_t(unsigned int, sfd.size_user,
1221 			      sizeof(struct mptcp_subflow_addrs));
1222 
1223 	addrptr = optval + sfd.size_subflow_data;
1224 
1225 	lock_sock(sk);
1226 
1227 	mptcp_for_each_subflow(msk, subflow) {
1228 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
1229 
1230 		++sfcount;
1231 
1232 		if (len && len >= sfd.size_user) {
1233 			struct mptcp_subflow_addrs a;
1234 
1235 			mptcp_get_sub_addrs(ssk, &a);
1236 
1237 			if (copy_to_user(addrptr, &a, sfd.size_user)) {
1238 				release_sock(sk);
1239 				return -EFAULT;
1240 			}
1241 
1242 			addrptr += sfd.size_user;
1243 			copied += sfd.size_user;
1244 			len -= sfd.size_user;
1245 		}
1246 	}
1247 
1248 	release_sock(sk);
1249 
1250 	sfd.num_subflows = sfcount;
1251 
1252 	if (mptcp_put_subflow_data(&sfd, optval, copied, optlen))
1253 		return -EFAULT;
1254 
1255 	return 0;
1256 }
1257 
mptcp_get_full_info(struct mptcp_full_info * mfi,char __user * optval,int __user * optlen)1258 static int mptcp_get_full_info(struct mptcp_full_info *mfi,
1259 			       char __user *optval,
1260 			       int __user *optlen)
1261 {
1262 	int len;
1263 
1264 	BUILD_BUG_ON(offsetof(struct mptcp_full_info, mptcp_info) !=
1265 		     MIN_FULL_INFO_OPTLEN_SIZE);
1266 
1267 	if (get_user(len, optlen))
1268 		return -EFAULT;
1269 
1270 	if (len < MIN_FULL_INFO_OPTLEN_SIZE)
1271 		return -EINVAL;
1272 
1273 	memset(mfi, 0, sizeof(*mfi));
1274 	if (copy_from_user(mfi, optval, MIN_FULL_INFO_OPTLEN_SIZE))
1275 		return -EFAULT;
1276 
1277 	if (mfi->size_tcpinfo_kernel ||
1278 	    mfi->size_sfinfo_kernel ||
1279 	    mfi->num_subflows)
1280 		return -EINVAL;
1281 
1282 	if (mfi->size_sfinfo_user > INT_MAX ||
1283 	    mfi->size_tcpinfo_user > INT_MAX)
1284 		return -EINVAL;
1285 
1286 	return len - MIN_FULL_INFO_OPTLEN_SIZE;
1287 }
1288 
mptcp_put_full_info(struct mptcp_full_info * mfi,char __user * optval,u32 copylen,int __user * optlen)1289 static int mptcp_put_full_info(struct mptcp_full_info *mfi,
1290 			       char __user *optval,
1291 			       u32 copylen,
1292 			       int __user *optlen)
1293 {
1294 	copylen += MIN_FULL_INFO_OPTLEN_SIZE;
1295 	if (put_user(copylen, optlen))
1296 		return -EFAULT;
1297 
1298 	if (copy_to_user(optval, mfi, copylen))
1299 		return -EFAULT;
1300 	return 0;
1301 }
1302 
mptcp_getsockopt_full_info(struct mptcp_sock * msk,char __user * optval,int __user * optlen)1303 static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optval,
1304 				      int __user *optlen)
1305 {
1306 	unsigned int sfcount = 0, copylen = 0;
1307 	struct mptcp_subflow_context *subflow;
1308 	struct sock *sk = (struct sock *)msk;
1309 	void __user *tcpinfoptr, *sfinfoptr;
1310 	struct mptcp_full_info mfi;
1311 	int len;
1312 
1313 	len = mptcp_get_full_info(&mfi, optval, optlen);
1314 	if (len < 0)
1315 		return len;
1316 
1317 	/* don't bother filling the mptcp info if there is not enough
1318 	 * user-space-provided storage
1319 	 */
1320 	if (len > 0) {
1321 		mptcp_diag_fill_info(msk, &mfi.mptcp_info);
1322 		copylen += min_t(unsigned int, len, sizeof(struct mptcp_info));
1323 	}
1324 
1325 	mfi.size_tcpinfo_kernel = sizeof(struct tcp_info);
1326 	mfi.size_tcpinfo_user = min_t(unsigned int, mfi.size_tcpinfo_user,
1327 				      sizeof(struct tcp_info));
1328 	sfinfoptr = u64_to_user_ptr(mfi.subflow_info);
1329 	mfi.size_sfinfo_kernel = sizeof(struct mptcp_subflow_info);
1330 	mfi.size_sfinfo_user = min_t(unsigned int, mfi.size_sfinfo_user,
1331 				     sizeof(struct mptcp_subflow_info));
1332 	tcpinfoptr = u64_to_user_ptr(mfi.tcp_info);
1333 
1334 	lock_sock(sk);
1335 	mptcp_for_each_subflow(msk, subflow) {
1336 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
1337 		struct mptcp_subflow_info sfinfo;
1338 		struct tcp_info tcp_info;
1339 
1340 		if (sfcount++ >= mfi.size_arrays_user)
1341 			continue;
1342 
1343 		/* fetch addr/tcp_info only if the user space buffers
1344 		 * are wide enough
1345 		 */
1346 		memset(&sfinfo, 0, sizeof(sfinfo));
1347 		sfinfo.id = subflow->subflow_id;
1348 		if (mfi.size_sfinfo_user >
1349 		    offsetof(struct mptcp_subflow_info, addrs))
1350 			mptcp_get_sub_addrs(ssk, &sfinfo.addrs);
1351 		if (copy_to_user(sfinfoptr, &sfinfo, mfi.size_sfinfo_user))
1352 			goto fail_release;
1353 
1354 		if (mfi.size_tcpinfo_user) {
1355 			tcp_get_info(ssk, &tcp_info);
1356 			if (copy_to_user(tcpinfoptr, &tcp_info,
1357 					 mfi.size_tcpinfo_user))
1358 				goto fail_release;
1359 		}
1360 
1361 		tcpinfoptr += mfi.size_tcpinfo_user;
1362 		sfinfoptr += mfi.size_sfinfo_user;
1363 	}
1364 	release_sock(sk);
1365 
1366 	mfi.num_subflows = sfcount;
1367 	if (mptcp_put_full_info(&mfi, optval, copylen, optlen))
1368 		return -EFAULT;
1369 
1370 	return 0;
1371 
1372 fail_release:
1373 	release_sock(sk);
1374 	return -EFAULT;
1375 }
1376 
mptcp_put_int_option(struct mptcp_sock * msk,char __user * optval,int __user * optlen,int val)1377 static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval,
1378 				int __user *optlen, int val)
1379 {
1380 	int len;
1381 
1382 	if (get_user(len, optlen))
1383 		return -EFAULT;
1384 	if (len < 0)
1385 		return -EINVAL;
1386 
1387 	if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) {
1388 		unsigned char ucval = (unsigned char)val;
1389 
1390 		len = 1;
1391 		if (put_user(len, optlen))
1392 			return -EFAULT;
1393 		if (copy_to_user(optval, &ucval, 1))
1394 			return -EFAULT;
1395 	} else {
1396 		len = min_t(unsigned int, len, sizeof(int));
1397 		if (put_user(len, optlen))
1398 			return -EFAULT;
1399 		if (copy_to_user(optval, &val, len))
1400 			return -EFAULT;
1401 	}
1402 
1403 	return 0;
1404 }
1405 
mptcp_getsockopt_sol_tcp(struct mptcp_sock * msk,int optname,char __user * optval,int __user * optlen)1406 static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
1407 				    char __user *optval, int __user *optlen)
1408 {
1409 	struct sock *sk = (void *)msk;
1410 
1411 	switch (optname) {
1412 	case TCP_ULP:
1413 	case TCP_CONGESTION:
1414 	case TCP_INFO:
1415 	case TCP_CC_INFO:
1416 	case TCP_DEFER_ACCEPT:
1417 	case TCP_FASTOPEN:
1418 	case TCP_FASTOPEN_CONNECT:
1419 	case TCP_FASTOPEN_KEY:
1420 	case TCP_FASTOPEN_NO_COOKIE:
1421 		return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname,
1422 						      optval, optlen);
1423 	case TCP_INQ:
1424 		return mptcp_put_int_option(msk, optval, optlen, msk->recvmsg_inq);
1425 	case TCP_CORK:
1426 		return mptcp_put_int_option(msk, optval, optlen, msk->cork);
1427 	case TCP_NODELAY:
1428 		return mptcp_put_int_option(msk, optval, optlen, msk->nodelay);
1429 	case TCP_KEEPIDLE:
1430 		return mptcp_put_int_option(msk, optval, optlen,
1431 					    msk->keepalive_idle ? :
1432 					    READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_time) / HZ);
1433 	case TCP_KEEPINTVL:
1434 		return mptcp_put_int_option(msk, optval, optlen,
1435 					    msk->keepalive_intvl ? :
1436 					    READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_intvl) / HZ);
1437 	case TCP_KEEPCNT:
1438 		return mptcp_put_int_option(msk, optval, optlen,
1439 					    msk->keepalive_cnt ? :
1440 					    READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_probes));
1441 	case TCP_NOTSENT_LOWAT:
1442 		return mptcp_put_int_option(msk, optval, optlen, msk->notsent_lowat);
1443 	case TCP_IS_MPTCP:
1444 		return mptcp_put_int_option(msk, optval, optlen, 1);
1445 	case TCP_MAXSEG:
1446 		return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname,
1447 						      optval, optlen);
1448 	}
1449 	return -EOPNOTSUPP;
1450 }
1451 
mptcp_getsockopt_v4(struct mptcp_sock * msk,int optname,char __user * optval,int __user * optlen)1452 static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname,
1453 			       char __user *optval, int __user *optlen)
1454 {
1455 	struct sock *sk = (void *)msk;
1456 
1457 	switch (optname) {
1458 	case IP_TOS:
1459 		return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->tos));
1460 	case IP_FREEBIND:
1461 		return mptcp_put_int_option(msk, optval, optlen,
1462 				inet_test_bit(FREEBIND, sk));
1463 	case IP_TRANSPARENT:
1464 		return mptcp_put_int_option(msk, optval, optlen,
1465 				inet_test_bit(TRANSPARENT, sk));
1466 	case IP_BIND_ADDRESS_NO_PORT:
1467 		return mptcp_put_int_option(msk, optval, optlen,
1468 				inet_test_bit(BIND_ADDRESS_NO_PORT, sk));
1469 	case IP_LOCAL_PORT_RANGE:
1470 		return mptcp_put_int_option(msk, optval, optlen,
1471 				READ_ONCE(inet_sk(sk)->local_port_range));
1472 	}
1473 
1474 	return -EOPNOTSUPP;
1475 }
1476 
mptcp_getsockopt_v6(struct mptcp_sock * msk,int optname,char __user * optval,int __user * optlen)1477 static int mptcp_getsockopt_v6(struct mptcp_sock *msk, int optname,
1478 			       char __user *optval, int __user *optlen)
1479 {
1480 	struct sock *sk = (void *)msk;
1481 
1482 	switch (optname) {
1483 	case IPV6_V6ONLY:
1484 		return mptcp_put_int_option(msk, optval, optlen,
1485 					    sk->sk_ipv6only);
1486 	case IPV6_TRANSPARENT:
1487 		return mptcp_put_int_option(msk, optval, optlen,
1488 					    inet_test_bit(TRANSPARENT, sk));
1489 	case IPV6_FREEBIND:
1490 		return mptcp_put_int_option(msk, optval, optlen,
1491 					    inet_test_bit(FREEBIND, sk));
1492 	}
1493 
1494 	return -EOPNOTSUPP;
1495 }
1496 
mptcp_getsockopt_sol_mptcp(struct mptcp_sock * msk,int optname,char __user * optval,int __user * optlen)1497 static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname,
1498 				      char __user *optval, int __user *optlen)
1499 {
1500 	switch (optname) {
1501 	case MPTCP_INFO:
1502 		return mptcp_getsockopt_info(msk, optval, optlen);
1503 	case MPTCP_FULL_INFO:
1504 		return mptcp_getsockopt_full_info(msk, optval, optlen);
1505 	case MPTCP_TCPINFO:
1506 		return mptcp_getsockopt_tcpinfo(msk, optval, optlen);
1507 	case MPTCP_SUBFLOW_ADDRS:
1508 		return mptcp_getsockopt_subflow_addrs(msk, optval, optlen);
1509 	}
1510 
1511 	return -EOPNOTSUPP;
1512 }
1513 
mptcp_getsockopt(struct sock * sk,int level,int optname,char __user * optval,int __user * option)1514 int mptcp_getsockopt(struct sock *sk, int level, int optname,
1515 		     char __user *optval, int __user *option)
1516 {
1517 	struct mptcp_sock *msk = mptcp_sk(sk);
1518 	struct sock *ssk;
1519 
1520 	pr_debug("msk=%p\n", msk);
1521 
1522 	/* @@ the meaning of setsockopt() when the socket is connected and
1523 	 * there are multiple subflows is not yet defined. It is up to the
1524 	 * MPTCP-level socket to configure the subflows until the subflow
1525 	 * is in TCP fallback, when socket options are passed through
1526 	 * to the one remaining subflow.
1527 	 */
1528 	lock_sock(sk);
1529 	ssk = __mptcp_tcp_fallback(msk);
1530 	release_sock(sk);
1531 	if (ssk)
1532 		return tcp_getsockopt(ssk, level, optname, optval, option);
1533 
1534 	if (level == SOL_IP)
1535 		return mptcp_getsockopt_v4(msk, optname, optval, option);
1536 	if (level == SOL_IPV6)
1537 		return mptcp_getsockopt_v6(msk, optname, optval, option);
1538 	if (level == SOL_TCP)
1539 		return mptcp_getsockopt_sol_tcp(msk, optname, optval, option);
1540 	if (level == SOL_MPTCP)
1541 		return mptcp_getsockopt_sol_mptcp(msk, optname, optval, option);
1542 	return -EOPNOTSUPP;
1543 }
1544 
sync_socket_options(struct mptcp_sock * msk,struct sock * ssk)1545 static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
1546 {
1547 	static const unsigned int tx_rx_locks = SOCK_RCVBUF_LOCK | SOCK_SNDBUF_LOCK;
1548 	struct sock *sk = (struct sock *)msk;
1549 	bool keep_open;
1550 
1551 	keep_open = sock_flag(sk, SOCK_KEEPOPEN);
1552 	if (ssk->sk_prot->keepalive)
1553 		ssk->sk_prot->keepalive(ssk, keep_open);
1554 	sock_valbool_flag(ssk, SOCK_KEEPOPEN, keep_open);
1555 
1556 	ssk->sk_priority = sk->sk_priority;
1557 	ssk->sk_bound_dev_if = sk->sk_bound_dev_if;
1558 	ssk->sk_incoming_cpu = sk->sk_incoming_cpu;
1559 	ssk->sk_ipv6only = sk->sk_ipv6only;
1560 	__ip_sock_set_tos(ssk, inet_sk(sk)->tos);
1561 
1562 	if (sk->sk_userlocks & tx_rx_locks) {
1563 		ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks;
1564 		if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) {
1565 			WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf);
1566 			mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf;
1567 		}
1568 		if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
1569 			__mptcp_subflow_set_rcvbuf(ssk, sk->sk_rcvbuf);
1570 	}
1571 
1572 	if (sock_flag(sk, SOCK_LINGER)) {
1573 		ssk->sk_lingertime = sk->sk_lingertime;
1574 		sock_set_flag(ssk, SOCK_LINGER);
1575 	} else {
1576 		sock_reset_flag(ssk, SOCK_LINGER);
1577 	}
1578 
1579 	if (sk->sk_mark != ssk->sk_mark) {
1580 		ssk->sk_mark = sk->sk_mark;
1581 		sk_dst_reset(ssk);
1582 	}
1583 
1584 	sock_valbool_flag(ssk, SOCK_DBG, sock_flag(sk, SOCK_DBG));
1585 
1586 	if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops)
1587 		tcp_set_congestion_control(ssk, msk->ca_name, false, true);
1588 	__tcp_sock_set_cork(ssk, !!msk->cork);
1589 	__tcp_sock_set_nodelay(ssk, !!msk->nodelay);
1590 	tcp_sock_set_keepidle_locked(ssk, msk->keepalive_idle);
1591 	tcp_sock_set_keepintvl(ssk, msk->keepalive_intvl);
1592 	tcp_sock_set_keepcnt(ssk, msk->keepalive_cnt);
1593 	tcp_sock_set_maxseg(ssk, msk->maxseg);
1594 
1595 	inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk));
1596 	inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk));
1597 	inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk, inet_test_bit(BIND_ADDRESS_NO_PORT, sk));
1598 	WRITE_ONCE(inet_sk(ssk)->local_port_range, READ_ONCE(inet_sk(sk)->local_port_range));
1599 }
1600 
mptcp_sockopt_sync_locked(struct mptcp_sock * msk,struct sock * ssk)1601 void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk)
1602 {
1603 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
1604 
1605 	msk_owned_by_me(msk);
1606 
1607 	ssk->sk_rcvlowat = 0;
1608 
1609 	/* subflows must ignore any latency-related settings: will not affect
1610 	 * the user-space - only the msk is relevant - but will foul the
1611 	 * mptcp scheduler
1612 	 */
1613 	tcp_sk(ssk)->notsent_lowat = UINT_MAX;
1614 
1615 	if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) {
1616 		sync_socket_options(msk, ssk);
1617 
1618 		subflow->setsockopt_seq = msk->setsockopt_seq;
1619 	}
1620 }
1621 
1622 /* unfortunately this is different enough from the tcp version so
1623  * that we can't factor it out
1624  */
mptcp_set_rcvlowat(struct sock * sk,int val)1625 int mptcp_set_rcvlowat(struct sock *sk, int val)
1626 {
1627 	struct mptcp_subflow_context *subflow;
1628 	int space, cap;
1629 
1630 	/* bpf can land here with a wrong sk type */
1631 	if (sk->sk_protocol == IPPROTO_TCP)
1632 		return -EINVAL;
1633 
1634 	if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
1635 		cap = sk->sk_rcvbuf >> 1;
1636 	else
1637 		cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
1638 	val = min(val, cap);
1639 	WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
1640 
1641 	/* Check if we need to signal EPOLLIN right now */
1642 	if (mptcp_epollin_ready(sk))
1643 		sk->sk_data_ready(sk);
1644 
1645 	if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
1646 		return 0;
1647 
1648 	space = mptcp_space_from_win(sk, val);
1649 	if (space <= sk->sk_rcvbuf)
1650 		return 0;
1651 
1652 	/* propagate the rcvbuf changes to all the subflows */
1653 	WRITE_ONCE(sk->sk_rcvbuf, space);
1654 	mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
1655 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
1656 		bool slow;
1657 
1658 		slow = lock_sock_fast(ssk);
1659 		WRITE_ONCE(ssk->sk_rcvbuf, space);
1660 		WRITE_ONCE(tcp_sk(ssk)->window_clamp, val);
1661 		unlock_sock_fast(ssk, slow);
1662 	}
1663 	return 0;
1664 }
1665