xref: /linux/net/mptcp/subflow.c (revision 2cef30d7bd8b8fbddeb74e3753c29d4248c094e0)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Multipath TCP
3  *
4  * Copyright (c) 2017 - 2019, Intel Corporation.
5  */
6 
7 #define pr_fmt(fmt) "MPTCP: " fmt
8 
9 #include <linux/kernel.h>
10 #include <linux/module.h>
11 #include <linux/netdevice.h>
12 #include <crypto/algapi.h>
13 #include <crypto/sha.h>
14 #include <net/sock.h>
15 #include <net/inet_common.h>
16 #include <net/inet_hashtables.h>
17 #include <net/protocol.h>
18 #include <net/tcp.h>
19 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
20 #include <net/ip6_route.h>
21 #endif
22 #include <net/mptcp.h>
23 #include "protocol.h"
24 #include "mib.h"
25 
26 static void SUBFLOW_REQ_INC_STATS(struct request_sock *req,
27 				  enum linux_mptcp_mib_field field)
28 {
29 	MPTCP_INC_STATS(sock_net(req_to_sk(req)), field);
30 }
31 
32 static void subflow_req_destructor(struct request_sock *req)
33 {
34 	struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
35 
36 	pr_debug("subflow_req=%p", subflow_req);
37 
38 	if (subflow_req->msk)
39 		sock_put((struct sock *)subflow_req->msk);
40 
41 	mptcp_token_destroy_request(req);
42 	tcp_request_sock_ops.destructor(req);
43 }
44 
45 static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
46 				  void *hmac)
47 {
48 	u8 msg[8];
49 
50 	put_unaligned_be32(nonce1, &msg[0]);
51 	put_unaligned_be32(nonce2, &msg[4]);
52 
53 	mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac);
54 }
55 
56 /* validate received token and create truncated hmac and nonce for SYN-ACK */
57 static struct mptcp_sock *subflow_token_join_request(struct request_sock *req,
58 						     const struct sk_buff *skb)
59 {
60 	struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
61 	u8 hmac[SHA256_DIGEST_SIZE];
62 	struct mptcp_sock *msk;
63 	int local_id;
64 
65 	msk = mptcp_token_get_sock(subflow_req->token);
66 	if (!msk) {
67 		SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINNOTOKEN);
68 		return NULL;
69 	}
70 
71 	local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)req);
72 	if (local_id < 0) {
73 		sock_put((struct sock *)msk);
74 		return NULL;
75 	}
76 	subflow_req->local_id = local_id;
77 
78 	get_random_bytes(&subflow_req->local_nonce, sizeof(u32));
79 
80 	subflow_generate_hmac(msk->local_key, msk->remote_key,
81 			      subflow_req->local_nonce,
82 			      subflow_req->remote_nonce, hmac);
83 
84 	subflow_req->thmac = get_unaligned_be64(hmac);
85 	return msk;
86 }
87 
88 static void subflow_init_req(struct request_sock *req,
89 			     const struct sock *sk_listener,
90 			     struct sk_buff *skb)
91 {
92 	struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
93 	struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
94 	struct mptcp_options_received mp_opt;
95 
96 	pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
97 
98 	mptcp_get_options(skb, &mp_opt);
99 
100 	subflow_req->mp_capable = 0;
101 	subflow_req->mp_join = 0;
102 	subflow_req->msk = NULL;
103 	mptcp_token_init_request(req);
104 
105 #ifdef CONFIG_TCP_MD5SIG
106 	/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
107 	 * TCP option space.
108 	 */
109 	if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info))
110 		return;
111 #endif
112 
113 	if (mp_opt.mp_capable) {
114 		SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
115 
116 		if (mp_opt.mp_join)
117 			return;
118 	} else if (mp_opt.mp_join) {
119 		SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX);
120 	}
121 
122 	if (mp_opt.mp_capable && listener->request_mptcp) {
123 		int err;
124 
125 		err = mptcp_token_new_request(req);
126 		if (err == 0)
127 			subflow_req->mp_capable = 1;
128 
129 		subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
130 	} else if (mp_opt.mp_join && listener->request_mptcp) {
131 		subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
132 		subflow_req->mp_join = 1;
133 		subflow_req->backup = mp_opt.backup;
134 		subflow_req->remote_id = mp_opt.join_id;
135 		subflow_req->token = mp_opt.token;
136 		subflow_req->remote_nonce = mp_opt.nonce;
137 		subflow_req->msk = subflow_token_join_request(req, skb);
138 		pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token,
139 			 subflow_req->remote_nonce, subflow_req->msk);
140 	}
141 }
142 
143 static void subflow_v4_init_req(struct request_sock *req,
144 				const struct sock *sk_listener,
145 				struct sk_buff *skb)
146 {
147 	tcp_rsk(req)->is_mptcp = 1;
148 
149 	tcp_request_sock_ipv4_ops.init_req(req, sk_listener, skb);
150 
151 	subflow_init_req(req, sk_listener, skb);
152 }
153 
154 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
155 static void subflow_v6_init_req(struct request_sock *req,
156 				const struct sock *sk_listener,
157 				struct sk_buff *skb)
158 {
159 	tcp_rsk(req)->is_mptcp = 1;
160 
161 	tcp_request_sock_ipv6_ops.init_req(req, sk_listener, skb);
162 
163 	subflow_init_req(req, sk_listener, skb);
164 }
165 #endif
166 
167 /* validate received truncated hmac and create hmac for third ACK */
168 static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow)
169 {
170 	u8 hmac[SHA256_DIGEST_SIZE];
171 	u64 thmac;
172 
173 	subflow_generate_hmac(subflow->remote_key, subflow->local_key,
174 			      subflow->remote_nonce, subflow->local_nonce,
175 			      hmac);
176 
177 	thmac = get_unaligned_be64(hmac);
178 	pr_debug("subflow=%p, token=%u, thmac=%llu, subflow->thmac=%llu\n",
179 		 subflow, subflow->token,
180 		 (unsigned long long)thmac,
181 		 (unsigned long long)subflow->thmac);
182 
183 	return thmac == subflow->thmac;
184 }
185 
186 static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
187 {
188 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
189 	struct mptcp_options_received mp_opt;
190 	struct sock *parent = subflow->conn;
191 
192 	subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
193 
194 	if (inet_sk_state_load(parent) == TCP_SYN_SENT) {
195 		inet_sk_state_store(parent, TCP_ESTABLISHED);
196 		parent->sk_state_change(parent);
197 	}
198 
199 	/* be sure no special action on any packet other than syn-ack */
200 	if (subflow->conn_finished)
201 		return;
202 
203 	subflow->conn_finished = 1;
204 	subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
205 	pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset);
206 
207 	mptcp_get_options(skb, &mp_opt);
208 	if (subflow->request_mptcp && mp_opt.mp_capable) {
209 		subflow->mp_capable = 1;
210 		subflow->can_ack = 1;
211 		subflow->remote_key = mp_opt.sndr_key;
212 		pr_debug("subflow=%p, remote_key=%llu", subflow,
213 			 subflow->remote_key);
214 	} else if (subflow->request_join && mp_opt.mp_join) {
215 		subflow->mp_join = 1;
216 		subflow->thmac = mp_opt.thmac;
217 		subflow->remote_nonce = mp_opt.nonce;
218 		pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
219 			 subflow->thmac, subflow->remote_nonce);
220 	} else {
221 		if (subflow->request_mptcp)
222 			MPTCP_INC_STATS(sock_net(sk),
223 					MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
224 		mptcp_do_fallback(sk);
225 		pr_fallback(mptcp_sk(subflow->conn));
226 	}
227 
228 	if (mptcp_check_fallback(sk))
229 		return;
230 
231 	if (subflow->mp_capable) {
232 		pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk),
233 			 subflow->remote_key);
234 		mptcp_finish_connect(sk);
235 	} else if (subflow->mp_join) {
236 		u8 hmac[SHA256_DIGEST_SIZE];
237 
238 		pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u",
239 			 subflow, subflow->thmac,
240 			 subflow->remote_nonce);
241 		if (!subflow_thmac_valid(subflow)) {
242 			MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC);
243 			subflow->mp_join = 0;
244 			goto do_reset;
245 		}
246 
247 		subflow_generate_hmac(subflow->local_key, subflow->remote_key,
248 				      subflow->local_nonce,
249 				      subflow->remote_nonce,
250 				      hmac);
251 
252 		memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN);
253 
254 		if (!mptcp_finish_join(sk))
255 			goto do_reset;
256 
257 		MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
258 	} else {
259 do_reset:
260 		tcp_send_active_reset(sk, GFP_ATOMIC);
261 		tcp_done(sk);
262 	}
263 }
264 
265 static struct request_sock_ops subflow_request_sock_ops;
266 static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops;
267 
268 static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb)
269 {
270 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
271 
272 	pr_debug("subflow=%p", subflow);
273 
274 	/* Never answer to SYNs sent to broadcast or multicast */
275 	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
276 		goto drop;
277 
278 	return tcp_conn_request(&subflow_request_sock_ops,
279 				&subflow_request_sock_ipv4_ops,
280 				sk, skb);
281 drop:
282 	tcp_listendrop(sk);
283 	return 0;
284 }
285 
286 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
287 static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops;
288 static struct inet_connection_sock_af_ops subflow_v6_specific;
289 static struct inet_connection_sock_af_ops subflow_v6m_specific;
290 
291 static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
292 {
293 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
294 
295 	pr_debug("subflow=%p", subflow);
296 
297 	if (skb->protocol == htons(ETH_P_IP))
298 		return subflow_v4_conn_request(sk, skb);
299 
300 	if (!ipv6_unicast_destination(skb))
301 		goto drop;
302 
303 	return tcp_conn_request(&subflow_request_sock_ops,
304 				&subflow_request_sock_ipv6_ops, sk, skb);
305 
306 drop:
307 	tcp_listendrop(sk);
308 	return 0; /* don't send reset */
309 }
310 #endif
311 
312 /* validate hmac received in third ACK */
313 static bool subflow_hmac_valid(const struct request_sock *req,
314 			       const struct mptcp_options_received *mp_opt)
315 {
316 	const struct mptcp_subflow_request_sock *subflow_req;
317 	u8 hmac[SHA256_DIGEST_SIZE];
318 	struct mptcp_sock *msk;
319 
320 	subflow_req = mptcp_subflow_rsk(req);
321 	msk = subflow_req->msk;
322 	if (!msk)
323 		return false;
324 
325 	subflow_generate_hmac(msk->remote_key, msk->local_key,
326 			      subflow_req->remote_nonce,
327 			      subflow_req->local_nonce, hmac);
328 
329 	return !crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN);
330 }
331 
332 static void mptcp_sock_destruct(struct sock *sk)
333 {
334 	/* if new mptcp socket isn't accepted, it is free'd
335 	 * from the tcp listener sockets request queue, linked
336 	 * from req->sk.  The tcp socket is released.
337 	 * This calls the ULP release function which will
338 	 * also remove the mptcp socket, via
339 	 * sock_put(ctx->conn).
340 	 *
341 	 * Problem is that the mptcp socket will not be in
342 	 * SYN_RECV state and doesn't have SOCK_DEAD flag.
343 	 * Both result in warnings from inet_sock_destruct.
344 	 */
345 
346 	if (sk->sk_state == TCP_SYN_RECV) {
347 		sk->sk_state = TCP_CLOSE;
348 		WARN_ON_ONCE(sk->sk_socket);
349 		sock_orphan(sk);
350 	}
351 
352 	mptcp_token_destroy(mptcp_sk(sk));
353 	inet_sock_destruct(sk);
354 }
355 
356 static void mptcp_force_close(struct sock *sk)
357 {
358 	inet_sk_state_store(sk, TCP_CLOSE);
359 	sk_common_release(sk);
360 }
361 
362 static void subflow_ulp_fallback(struct sock *sk,
363 				 struct mptcp_subflow_context *old_ctx)
364 {
365 	struct inet_connection_sock *icsk = inet_csk(sk);
366 
367 	mptcp_subflow_tcp_fallback(sk, old_ctx);
368 	icsk->icsk_ulp_ops = NULL;
369 	rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
370 	tcp_sk(sk)->is_mptcp = 0;
371 }
372 
373 static void subflow_drop_ctx(struct sock *ssk)
374 {
375 	struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk);
376 
377 	if (!ctx)
378 		return;
379 
380 	subflow_ulp_fallback(ssk, ctx);
381 	if (ctx->conn)
382 		sock_put(ctx->conn);
383 
384 	kfree_rcu(ctx, rcu);
385 }
386 
387 static struct sock *subflow_syn_recv_sock(const struct sock *sk,
388 					  struct sk_buff *skb,
389 					  struct request_sock *req,
390 					  struct dst_entry *dst,
391 					  struct request_sock *req_unhash,
392 					  bool *own_req)
393 {
394 	struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk);
395 	struct mptcp_subflow_request_sock *subflow_req;
396 	struct mptcp_options_received mp_opt;
397 	bool fallback, fallback_is_fatal;
398 	struct sock *new_msk = NULL;
399 	struct sock *child;
400 
401 	pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
402 
403 	/* After child creation we must look for 'mp_capable' even when options
404 	 * are not parsed
405 	 */
406 	mp_opt.mp_capable = 0;
407 
408 	/* hopefully temporary handling for MP_JOIN+syncookie */
409 	subflow_req = mptcp_subflow_rsk(req);
410 	fallback_is_fatal = subflow_req->mp_join;
411 	fallback = !tcp_rsk(req)->is_mptcp;
412 	if (fallback)
413 		goto create_child;
414 
415 	/* if the sk is MP_CAPABLE, we try to fetch the client key */
416 	if (subflow_req->mp_capable) {
417 		if (TCP_SKB_CB(skb)->seq != subflow_req->ssn_offset + 1) {
418 			/* here we can receive and accept an in-window,
419 			 * out-of-order pkt, which will not carry the MP_CAPABLE
420 			 * opt even on mptcp enabled paths
421 			 */
422 			goto create_msk;
423 		}
424 
425 		mptcp_get_options(skb, &mp_opt);
426 		if (!mp_opt.mp_capable) {
427 			fallback = true;
428 			goto create_child;
429 		}
430 
431 create_msk:
432 		new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req);
433 		if (!new_msk)
434 			fallback = true;
435 	} else if (subflow_req->mp_join) {
436 		mptcp_get_options(skb, &mp_opt);
437 		if (!mp_opt.mp_join ||
438 		    !subflow_hmac_valid(req, &mp_opt)) {
439 			SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
440 			fallback = true;
441 		}
442 	}
443 
444 create_child:
445 	child = listener->icsk_af_ops->syn_recv_sock(sk, skb, req, dst,
446 						     req_unhash, own_req);
447 
448 	if (child && *own_req) {
449 		struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child);
450 
451 		tcp_rsk(req)->drop_req = false;
452 
453 		/* we need to fallback on ctx allocation failure and on pre-reqs
454 		 * checking above. In the latter scenario we additionally need
455 		 * to reset the context to non MPTCP status.
456 		 */
457 		if (!ctx || fallback) {
458 			if (fallback_is_fatal)
459 				goto dispose_child;
460 
461 			subflow_drop_ctx(child);
462 			goto out;
463 		}
464 
465 		if (ctx->mp_capable) {
466 			/* new mpc subflow takes ownership of the newly
467 			 * created mptcp socket
468 			 */
469 			new_msk->sk_destruct = mptcp_sock_destruct;
470 			mptcp_pm_new_connection(mptcp_sk(new_msk), 1);
471 			mptcp_token_accept(subflow_req, mptcp_sk(new_msk));
472 			ctx->conn = new_msk;
473 			new_msk = NULL;
474 
475 			/* with OoO packets we can reach here without ingress
476 			 * mpc option
477 			 */
478 			ctx->remote_key = mp_opt.sndr_key;
479 			ctx->fully_established = mp_opt.mp_capable;
480 			ctx->can_ack = mp_opt.mp_capable;
481 		} else if (ctx->mp_join) {
482 			struct mptcp_sock *owner;
483 
484 			owner = subflow_req->msk;
485 			if (!owner)
486 				goto dispose_child;
487 
488 			/* move the msk reference ownership to the subflow */
489 			subflow_req->msk = NULL;
490 			ctx->conn = (struct sock *)owner;
491 			if (!mptcp_finish_join(child))
492 				goto dispose_child;
493 
494 			SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
495 			tcp_rsk(req)->drop_req = true;
496 		}
497 	}
498 
499 out:
500 	/* dispose of the left over mptcp master, if any */
501 	if (unlikely(new_msk))
502 		mptcp_force_close(new_msk);
503 
504 	/* check for expected invariant - should never trigger, just help
505 	 * catching eariler subtle bugs
506 	 */
507 	WARN_ON_ONCE(child && *own_req && tcp_sk(child)->is_mptcp &&
508 		     (!mptcp_subflow_ctx(child) ||
509 		      !mptcp_subflow_ctx(child)->conn));
510 	return child;
511 
512 dispose_child:
513 	subflow_drop_ctx(child);
514 	tcp_rsk(req)->drop_req = true;
515 	tcp_send_active_reset(child, GFP_ATOMIC);
516 	inet_csk_prepare_for_destroy_sock(child);
517 	tcp_done(child);
518 
519 	/* The last child reference will be released by the caller */
520 	return child;
521 }
522 
523 static struct inet_connection_sock_af_ops subflow_specific;
524 
525 enum mapping_status {
526 	MAPPING_OK,
527 	MAPPING_INVALID,
528 	MAPPING_EMPTY,
529 	MAPPING_DATA_FIN,
530 	MAPPING_DUMMY
531 };
532 
533 static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq)
534 {
535 	if ((u32)seq == (u32)old_seq)
536 		return old_seq;
537 
538 	/* Assume map covers data not mapped yet. */
539 	return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32));
540 }
541 
542 static void warn_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
543 {
544 	WARN_ONCE(1, "Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
545 		  ssn, subflow->map_subflow_seq, subflow->map_data_len);
546 }
547 
548 static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb)
549 {
550 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
551 	unsigned int skb_consumed;
552 
553 	skb_consumed = tcp_sk(ssk)->copied_seq - TCP_SKB_CB(skb)->seq;
554 	if (WARN_ON_ONCE(skb_consumed >= skb->len))
555 		return true;
556 
557 	return skb->len - skb_consumed <= subflow->map_data_len -
558 					  mptcp_subflow_get_map_offset(subflow);
559 }
560 
561 static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
562 {
563 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
564 	u32 ssn = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
565 
566 	if (unlikely(before(ssn, subflow->map_subflow_seq))) {
567 		/* Mapping covers data later in the subflow stream,
568 		 * currently unsupported.
569 		 */
570 		warn_bad_map(subflow, ssn);
571 		return false;
572 	}
573 	if (unlikely(!before(ssn, subflow->map_subflow_seq +
574 				  subflow->map_data_len))) {
575 		/* Mapping does covers past subflow data, invalid */
576 		warn_bad_map(subflow, ssn + skb->len);
577 		return false;
578 	}
579 	return true;
580 }
581 
582 static enum mapping_status get_mapping_status(struct sock *ssk)
583 {
584 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
585 	struct mptcp_ext *mpext;
586 	struct sk_buff *skb;
587 	u16 data_len;
588 	u64 map_seq;
589 
590 	skb = skb_peek(&ssk->sk_receive_queue);
591 	if (!skb)
592 		return MAPPING_EMPTY;
593 
594 	if (mptcp_check_fallback(ssk))
595 		return MAPPING_DUMMY;
596 
597 	mpext = mptcp_get_ext(skb);
598 	if (!mpext || !mpext->use_map) {
599 		if (!subflow->map_valid && !skb->len) {
600 			/* the TCP stack deliver 0 len FIN pkt to the receive
601 			 * queue, that is the only 0len pkts ever expected here,
602 			 * and we can admit no mapping only for 0 len pkts
603 			 */
604 			if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
605 				WARN_ONCE(1, "0len seq %d:%d flags %x",
606 					  TCP_SKB_CB(skb)->seq,
607 					  TCP_SKB_CB(skb)->end_seq,
608 					  TCP_SKB_CB(skb)->tcp_flags);
609 			sk_eat_skb(ssk, skb);
610 			return MAPPING_EMPTY;
611 		}
612 
613 		if (!subflow->map_valid)
614 			return MAPPING_INVALID;
615 
616 		goto validate_seq;
617 	}
618 
619 	pr_debug("seq=%llu is64=%d ssn=%u data_len=%u data_fin=%d",
620 		 mpext->data_seq, mpext->dsn64, mpext->subflow_seq,
621 		 mpext->data_len, mpext->data_fin);
622 
623 	data_len = mpext->data_len;
624 	if (data_len == 0) {
625 		pr_err("Infinite mapping not handled");
626 		MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
627 		return MAPPING_INVALID;
628 	}
629 
630 	if (mpext->data_fin == 1) {
631 		if (data_len == 1) {
632 			pr_debug("DATA_FIN with no payload");
633 			if (subflow->map_valid) {
634 				/* A DATA_FIN might arrive in a DSS
635 				 * option before the previous mapping
636 				 * has been fully consumed. Continue
637 				 * handling the existing mapping.
638 				 */
639 				skb_ext_del(skb, SKB_EXT_MPTCP);
640 				return MAPPING_OK;
641 			} else {
642 				return MAPPING_DATA_FIN;
643 			}
644 		}
645 
646 		/* Adjust for DATA_FIN using 1 byte of sequence space */
647 		data_len--;
648 	}
649 
650 	if (!mpext->dsn64) {
651 		map_seq = expand_seq(subflow->map_seq, subflow->map_data_len,
652 				     mpext->data_seq);
653 		subflow->use_64bit_ack = 0;
654 		pr_debug("expanded seq=%llu", subflow->map_seq);
655 	} else {
656 		map_seq = mpext->data_seq;
657 		subflow->use_64bit_ack = 1;
658 	}
659 
660 	if (subflow->map_valid) {
661 		/* Allow replacing only with an identical map */
662 		if (subflow->map_seq == map_seq &&
663 		    subflow->map_subflow_seq == mpext->subflow_seq &&
664 		    subflow->map_data_len == data_len) {
665 			skb_ext_del(skb, SKB_EXT_MPTCP);
666 			return MAPPING_OK;
667 		}
668 
669 		/* If this skb data are fully covered by the current mapping,
670 		 * the new map would need caching, which is not supported
671 		 */
672 		if (skb_is_fully_mapped(ssk, skb)) {
673 			MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSNOMATCH);
674 			return MAPPING_INVALID;
675 		}
676 
677 		/* will validate the next map after consuming the current one */
678 		return MAPPING_OK;
679 	}
680 
681 	subflow->map_seq = map_seq;
682 	subflow->map_subflow_seq = mpext->subflow_seq;
683 	subflow->map_data_len = data_len;
684 	subflow->map_valid = 1;
685 	subflow->mpc_map = mpext->mpc_map;
686 	pr_debug("new map seq=%llu subflow_seq=%u data_len=%u",
687 		 subflow->map_seq, subflow->map_subflow_seq,
688 		 subflow->map_data_len);
689 
690 validate_seq:
691 	/* we revalidate valid mapping on new skb, because we must ensure
692 	 * the current skb is completely covered by the available mapping
693 	 */
694 	if (!validate_mapping(ssk, skb))
695 		return MAPPING_INVALID;
696 
697 	skb_ext_del(skb, SKB_EXT_MPTCP);
698 	return MAPPING_OK;
699 }
700 
701 static int subflow_read_actor(read_descriptor_t *desc,
702 			      struct sk_buff *skb,
703 			      unsigned int offset, size_t len)
704 {
705 	size_t copy_len = min(desc->count, len);
706 
707 	desc->count -= copy_len;
708 
709 	pr_debug("flushed %zu bytes, %zu left", copy_len, desc->count);
710 	return copy_len;
711 }
712 
713 static bool subflow_check_data_avail(struct sock *ssk)
714 {
715 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
716 	enum mapping_status status;
717 	struct mptcp_sock *msk;
718 	struct sk_buff *skb;
719 
720 	pr_debug("msk=%p ssk=%p data_avail=%d skb=%p", subflow->conn, ssk,
721 		 subflow->data_avail, skb_peek(&ssk->sk_receive_queue));
722 	if (subflow->data_avail)
723 		return true;
724 
725 	msk = mptcp_sk(subflow->conn);
726 	for (;;) {
727 		u32 map_remaining;
728 		size_t delta;
729 		u64 ack_seq;
730 		u64 old_ack;
731 
732 		status = get_mapping_status(ssk);
733 		pr_debug("msk=%p ssk=%p status=%d", msk, ssk, status);
734 		if (status == MAPPING_INVALID) {
735 			ssk->sk_err = EBADMSG;
736 			goto fatal;
737 		}
738 		if (status == MAPPING_DUMMY) {
739 			__mptcp_do_fallback(msk);
740 			skb = skb_peek(&ssk->sk_receive_queue);
741 			subflow->map_valid = 1;
742 			subflow->map_seq = READ_ONCE(msk->ack_seq);
743 			subflow->map_data_len = skb->len;
744 			subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq -
745 						   subflow->ssn_offset;
746 			return true;
747 		}
748 
749 		if (status != MAPPING_OK)
750 			return false;
751 
752 		skb = skb_peek(&ssk->sk_receive_queue);
753 		if (WARN_ON_ONCE(!skb))
754 			return false;
755 
756 		/* if msk lacks the remote key, this subflow must provide an
757 		 * MP_CAPABLE-based mapping
758 		 */
759 		if (unlikely(!READ_ONCE(msk->can_ack))) {
760 			if (!subflow->mpc_map) {
761 				ssk->sk_err = EBADMSG;
762 				goto fatal;
763 			}
764 			WRITE_ONCE(msk->remote_key, subflow->remote_key);
765 			WRITE_ONCE(msk->ack_seq, subflow->map_seq);
766 			WRITE_ONCE(msk->can_ack, true);
767 		}
768 
769 		old_ack = READ_ONCE(msk->ack_seq);
770 		ack_seq = mptcp_subflow_get_mapped_dsn(subflow);
771 		pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack,
772 			 ack_seq);
773 		if (ack_seq == old_ack)
774 			break;
775 
776 		/* only accept in-sequence mapping. Old values are spurious
777 		 * retransmission; we can hit "future" values on active backup
778 		 * subflow switch, we relay on retransmissions to get
779 		 * in-sequence data.
780 		 * Cuncurrent subflows support will require subflow data
781 		 * reordering
782 		 */
783 		map_remaining = subflow->map_data_len -
784 				mptcp_subflow_get_map_offset(subflow);
785 		if (before64(ack_seq, old_ack))
786 			delta = min_t(size_t, old_ack - ack_seq, map_remaining);
787 		else
788 			delta = min_t(size_t, ack_seq - old_ack, map_remaining);
789 
790 		/* discard mapped data */
791 		pr_debug("discarding %zu bytes, current map len=%d", delta,
792 			 map_remaining);
793 		if (delta) {
794 			read_descriptor_t desc = {
795 				.count = delta,
796 			};
797 			int ret;
798 
799 			ret = tcp_read_sock(ssk, &desc, subflow_read_actor);
800 			if (ret < 0) {
801 				ssk->sk_err = -ret;
802 				goto fatal;
803 			}
804 			if (ret < delta)
805 				return false;
806 			if (delta == map_remaining)
807 				subflow->map_valid = 0;
808 		}
809 	}
810 	return true;
811 
812 fatal:
813 	/* fatal protocol error, close the socket */
814 	/* This barrier is coupled with smp_rmb() in tcp_poll() */
815 	smp_wmb();
816 	ssk->sk_error_report(ssk);
817 	tcp_set_state(ssk, TCP_CLOSE);
818 	tcp_send_active_reset(ssk, GFP_ATOMIC);
819 	return false;
820 }
821 
822 bool mptcp_subflow_data_available(struct sock *sk)
823 {
824 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
825 	struct sk_buff *skb;
826 
827 	/* check if current mapping is still valid */
828 	if (subflow->map_valid &&
829 	    mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) {
830 		subflow->map_valid = 0;
831 		subflow->data_avail = 0;
832 
833 		pr_debug("Done with mapping: seq=%u data_len=%u",
834 			 subflow->map_subflow_seq,
835 			 subflow->map_data_len);
836 	}
837 
838 	if (!subflow_check_data_avail(sk)) {
839 		subflow->data_avail = 0;
840 		return false;
841 	}
842 
843 	skb = skb_peek(&sk->sk_receive_queue);
844 	subflow->data_avail = skb &&
845 		       before(tcp_sk(sk)->copied_seq, TCP_SKB_CB(skb)->end_seq);
846 	return subflow->data_avail;
847 }
848 
849 /* If ssk has an mptcp parent socket, use the mptcp rcvbuf occupancy,
850  * not the ssk one.
851  *
852  * In mptcp, rwin is about the mptcp-level connection data.
853  *
854  * Data that is still on the ssk rx queue can thus be ignored,
855  * as far as mptcp peer is concerened that data is still inflight.
856  * DSS ACK is updated when skb is moved to the mptcp rx queue.
857  */
858 void mptcp_space(const struct sock *ssk, int *space, int *full_space)
859 {
860 	const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
861 	const struct sock *sk = subflow->conn;
862 
863 	*space = tcp_space(sk);
864 	*full_space = tcp_full_space(sk);
865 }
866 
867 static void subflow_data_ready(struct sock *sk)
868 {
869 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
870 	struct sock *parent = subflow->conn;
871 	struct mptcp_sock *msk;
872 
873 	msk = mptcp_sk(parent);
874 	if (inet_sk_state_load(sk) == TCP_LISTEN) {
875 		set_bit(MPTCP_DATA_READY, &msk->flags);
876 		parent->sk_data_ready(parent);
877 		return;
878 	}
879 
880 	WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
881 		     !subflow->mp_join);
882 
883 	if (mptcp_subflow_data_available(sk))
884 		mptcp_data_ready(parent, sk);
885 }
886 
887 static void subflow_write_space(struct sock *sk)
888 {
889 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
890 	struct sock *parent = subflow->conn;
891 
892 	sk_stream_write_space(sk);
893 	if (sk_stream_is_writeable(sk)) {
894 		set_bit(MPTCP_SEND_SPACE, &mptcp_sk(parent)->flags);
895 		smp_mb__after_atomic();
896 		/* set SEND_SPACE before sk_stream_write_space clears NOSPACE */
897 		sk_stream_write_space(parent);
898 	}
899 }
900 
901 static struct inet_connection_sock_af_ops *
902 subflow_default_af_ops(struct sock *sk)
903 {
904 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
905 	if (sk->sk_family == AF_INET6)
906 		return &subflow_v6_specific;
907 #endif
908 	return &subflow_specific;
909 }
910 
911 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
912 void mptcpv6_handle_mapped(struct sock *sk, bool mapped)
913 {
914 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
915 	struct inet_connection_sock *icsk = inet_csk(sk);
916 	struct inet_connection_sock_af_ops *target;
917 
918 	target = mapped ? &subflow_v6m_specific : subflow_default_af_ops(sk);
919 
920 	pr_debug("subflow=%p family=%d ops=%p target=%p mapped=%d",
921 		 subflow, sk->sk_family, icsk->icsk_af_ops, target, mapped);
922 
923 	if (likely(icsk->icsk_af_ops == target))
924 		return;
925 
926 	subflow->icsk_af_ops = icsk->icsk_af_ops;
927 	icsk->icsk_af_ops = target;
928 }
929 #endif
930 
931 static void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
932 				struct sockaddr_storage *addr)
933 {
934 	memset(addr, 0, sizeof(*addr));
935 	addr->ss_family = info->family;
936 	if (addr->ss_family == AF_INET) {
937 		struct sockaddr_in *in_addr = (struct sockaddr_in *)addr;
938 
939 		in_addr->sin_addr = info->addr;
940 		in_addr->sin_port = info->port;
941 	}
942 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
943 	else if (addr->ss_family == AF_INET6) {
944 		struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)addr;
945 
946 		in6_addr->sin6_addr = info->addr6;
947 		in6_addr->sin6_port = info->port;
948 	}
949 #endif
950 }
951 
952 int __mptcp_subflow_connect(struct sock *sk, int ifindex,
953 			    const struct mptcp_addr_info *loc,
954 			    const struct mptcp_addr_info *remote)
955 {
956 	struct mptcp_sock *msk = mptcp_sk(sk);
957 	struct mptcp_subflow_context *subflow;
958 	struct sockaddr_storage addr;
959 	int local_id = loc->id;
960 	struct socket *sf;
961 	struct sock *ssk;
962 	u32 remote_token;
963 	int addrlen;
964 	int err;
965 
966 	if (sk->sk_state != TCP_ESTABLISHED)
967 		return -ENOTCONN;
968 
969 	err = mptcp_subflow_create_socket(sk, &sf);
970 	if (err)
971 		return err;
972 
973 	ssk = sf->sk;
974 	subflow = mptcp_subflow_ctx(ssk);
975 	do {
976 		get_random_bytes(&subflow->local_nonce, sizeof(u32));
977 	} while (!subflow->local_nonce);
978 
979 	if (!local_id) {
980 		err = mptcp_pm_get_local_id(msk, (struct sock_common *)ssk);
981 		if (err < 0)
982 			goto failed;
983 
984 		local_id = err;
985 	}
986 
987 	subflow->remote_key = msk->remote_key;
988 	subflow->local_key = msk->local_key;
989 	subflow->token = msk->token;
990 	mptcp_info2sockaddr(loc, &addr);
991 
992 	addrlen = sizeof(struct sockaddr_in);
993 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
994 	if (loc->family == AF_INET6)
995 		addrlen = sizeof(struct sockaddr_in6);
996 #endif
997 	ssk->sk_bound_dev_if = ifindex;
998 	err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen);
999 	if (err)
1000 		goto failed;
1001 
1002 	mptcp_crypto_key_sha(subflow->remote_key, &remote_token, NULL);
1003 	pr_debug("msk=%p remote_token=%u local_id=%d", msk, remote_token,
1004 		 local_id);
1005 	subflow->remote_token = remote_token;
1006 	subflow->local_id = local_id;
1007 	subflow->request_join = 1;
1008 	subflow->request_bkup = 1;
1009 	mptcp_info2sockaddr(remote, &addr);
1010 
1011 	err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
1012 	if (err && err != -EINPROGRESS)
1013 		goto failed;
1014 
1015 	spin_lock_bh(&msk->join_list_lock);
1016 	list_add_tail(&subflow->node, &msk->join_list);
1017 	spin_unlock_bh(&msk->join_list_lock);
1018 
1019 	return err;
1020 
1021 failed:
1022 	sock_release(sf);
1023 	return err;
1024 }
1025 
1026 int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
1027 {
1028 	struct mptcp_subflow_context *subflow;
1029 	struct net *net = sock_net(sk);
1030 	struct socket *sf;
1031 	int err;
1032 
1033 	err = sock_create_kern(net, sk->sk_family, SOCK_STREAM, IPPROTO_TCP,
1034 			       &sf);
1035 	if (err)
1036 		return err;
1037 
1038 	lock_sock(sf->sk);
1039 
1040 	/* kernel sockets do not by default acquire net ref, but TCP timer
1041 	 * needs it.
1042 	 */
1043 	sf->sk->sk_net_refcnt = 1;
1044 	get_net(net);
1045 #ifdef CONFIG_PROC_FS
1046 	this_cpu_add(*net->core.sock_inuse, 1);
1047 #endif
1048 	err = tcp_set_ulp(sf->sk, "mptcp");
1049 	release_sock(sf->sk);
1050 
1051 	if (err) {
1052 		sock_release(sf);
1053 		return err;
1054 	}
1055 
1056 	/* the newly created socket really belongs to the owning MPTCP master
1057 	 * socket, even if for additional subflows the allocation is performed
1058 	 * by a kernel workqueue. Adjust inode references, so that the
1059 	 * procfs/diag interaces really show this one belonging to the correct
1060 	 * user.
1061 	 */
1062 	SOCK_INODE(sf)->i_ino = SOCK_INODE(sk->sk_socket)->i_ino;
1063 	SOCK_INODE(sf)->i_uid = SOCK_INODE(sk->sk_socket)->i_uid;
1064 	SOCK_INODE(sf)->i_gid = SOCK_INODE(sk->sk_socket)->i_gid;
1065 
1066 	subflow = mptcp_subflow_ctx(sf->sk);
1067 	pr_debug("subflow=%p", subflow);
1068 
1069 	*new_sock = sf;
1070 	sock_hold(sk);
1071 	subflow->conn = sk;
1072 
1073 	return 0;
1074 }
1075 
1076 static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
1077 							gfp_t priority)
1078 {
1079 	struct inet_connection_sock *icsk = inet_csk(sk);
1080 	struct mptcp_subflow_context *ctx;
1081 
1082 	ctx = kzalloc(sizeof(*ctx), priority);
1083 	if (!ctx)
1084 		return NULL;
1085 
1086 	rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
1087 	INIT_LIST_HEAD(&ctx->node);
1088 
1089 	pr_debug("subflow=%p", ctx);
1090 
1091 	ctx->tcp_sock = sk;
1092 
1093 	return ctx;
1094 }
1095 
1096 static void __subflow_state_change(struct sock *sk)
1097 {
1098 	struct socket_wq *wq;
1099 
1100 	rcu_read_lock();
1101 	wq = rcu_dereference(sk->sk_wq);
1102 	if (skwq_has_sleeper(wq))
1103 		wake_up_interruptible_all(&wq->wait);
1104 	rcu_read_unlock();
1105 }
1106 
1107 static bool subflow_is_done(const struct sock *sk)
1108 {
1109 	return sk->sk_shutdown & RCV_SHUTDOWN || sk->sk_state == TCP_CLOSE;
1110 }
1111 
1112 static void subflow_state_change(struct sock *sk)
1113 {
1114 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
1115 	struct sock *parent = subflow->conn;
1116 
1117 	__subflow_state_change(sk);
1118 
1119 	if (subflow_simultaneous_connect(sk)) {
1120 		mptcp_do_fallback(sk);
1121 		pr_fallback(mptcp_sk(parent));
1122 		subflow->conn_finished = 1;
1123 		if (inet_sk_state_load(parent) == TCP_SYN_SENT) {
1124 			inet_sk_state_store(parent, TCP_ESTABLISHED);
1125 			parent->sk_state_change(parent);
1126 		}
1127 	}
1128 
1129 	/* as recvmsg() does not acquire the subflow socket for ssk selection
1130 	 * a fin packet carrying a DSS can be unnoticed if we don't trigger
1131 	 * the data available machinery here.
1132 	 */
1133 	if (mptcp_subflow_data_available(sk))
1134 		mptcp_data_ready(parent, sk);
1135 
1136 	if (!(parent->sk_shutdown & RCV_SHUTDOWN) &&
1137 	    !subflow->rx_eof && subflow_is_done(sk)) {
1138 		subflow->rx_eof = 1;
1139 		mptcp_subflow_eof(parent);
1140 	}
1141 }
1142 
1143 static int subflow_ulp_init(struct sock *sk)
1144 {
1145 	struct inet_connection_sock *icsk = inet_csk(sk);
1146 	struct mptcp_subflow_context *ctx;
1147 	struct tcp_sock *tp = tcp_sk(sk);
1148 	int err = 0;
1149 
1150 	/* disallow attaching ULP to a socket unless it has been
1151 	 * created with sock_create_kern()
1152 	 */
1153 	if (!sk->sk_kern_sock) {
1154 		err = -EOPNOTSUPP;
1155 		goto out;
1156 	}
1157 
1158 	ctx = subflow_create_ctx(sk, GFP_KERNEL);
1159 	if (!ctx) {
1160 		err = -ENOMEM;
1161 		goto out;
1162 	}
1163 
1164 	pr_debug("subflow=%p, family=%d", ctx, sk->sk_family);
1165 
1166 	tp->is_mptcp = 1;
1167 	ctx->icsk_af_ops = icsk->icsk_af_ops;
1168 	icsk->icsk_af_ops = subflow_default_af_ops(sk);
1169 	ctx->tcp_data_ready = sk->sk_data_ready;
1170 	ctx->tcp_state_change = sk->sk_state_change;
1171 	ctx->tcp_write_space = sk->sk_write_space;
1172 	sk->sk_data_ready = subflow_data_ready;
1173 	sk->sk_write_space = subflow_write_space;
1174 	sk->sk_state_change = subflow_state_change;
1175 out:
1176 	return err;
1177 }
1178 
1179 static void subflow_ulp_release(struct sock *sk)
1180 {
1181 	struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(sk);
1182 
1183 	if (!ctx)
1184 		return;
1185 
1186 	if (ctx->conn)
1187 		sock_put(ctx->conn);
1188 
1189 	kfree_rcu(ctx, rcu);
1190 }
1191 
1192 static void subflow_ulp_clone(const struct request_sock *req,
1193 			      struct sock *newsk,
1194 			      const gfp_t priority)
1195 {
1196 	struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
1197 	struct mptcp_subflow_context *old_ctx = mptcp_subflow_ctx(newsk);
1198 	struct mptcp_subflow_context *new_ctx;
1199 
1200 	if (!tcp_rsk(req)->is_mptcp ||
1201 	    (!subflow_req->mp_capable && !subflow_req->mp_join)) {
1202 		subflow_ulp_fallback(newsk, old_ctx);
1203 		return;
1204 	}
1205 
1206 	new_ctx = subflow_create_ctx(newsk, priority);
1207 	if (!new_ctx) {
1208 		subflow_ulp_fallback(newsk, old_ctx);
1209 		return;
1210 	}
1211 
1212 	new_ctx->conn_finished = 1;
1213 	new_ctx->icsk_af_ops = old_ctx->icsk_af_ops;
1214 	new_ctx->tcp_data_ready = old_ctx->tcp_data_ready;
1215 	new_ctx->tcp_state_change = old_ctx->tcp_state_change;
1216 	new_ctx->tcp_write_space = old_ctx->tcp_write_space;
1217 	new_ctx->rel_write_seq = 1;
1218 	new_ctx->tcp_sock = newsk;
1219 
1220 	if (subflow_req->mp_capable) {
1221 		/* see comments in subflow_syn_recv_sock(), MPTCP connection
1222 		 * is fully established only after we receive the remote key
1223 		 */
1224 		new_ctx->mp_capable = 1;
1225 		new_ctx->local_key = subflow_req->local_key;
1226 		new_ctx->token = subflow_req->token;
1227 		new_ctx->ssn_offset = subflow_req->ssn_offset;
1228 		new_ctx->idsn = subflow_req->idsn;
1229 	} else if (subflow_req->mp_join) {
1230 		new_ctx->ssn_offset = subflow_req->ssn_offset;
1231 		new_ctx->mp_join = 1;
1232 		new_ctx->fully_established = 1;
1233 		new_ctx->backup = subflow_req->backup;
1234 		new_ctx->local_id = subflow_req->local_id;
1235 		new_ctx->token = subflow_req->token;
1236 		new_ctx->thmac = subflow_req->thmac;
1237 	}
1238 }
1239 
1240 static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = {
1241 	.name		= "mptcp",
1242 	.owner		= THIS_MODULE,
1243 	.init		= subflow_ulp_init,
1244 	.release	= subflow_ulp_release,
1245 	.clone		= subflow_ulp_clone,
1246 };
1247 
1248 static int subflow_ops_init(struct request_sock_ops *subflow_ops)
1249 {
1250 	subflow_ops->obj_size = sizeof(struct mptcp_subflow_request_sock);
1251 	subflow_ops->slab_name = "request_sock_subflow";
1252 
1253 	subflow_ops->slab = kmem_cache_create(subflow_ops->slab_name,
1254 					      subflow_ops->obj_size, 0,
1255 					      SLAB_ACCOUNT |
1256 					      SLAB_TYPESAFE_BY_RCU,
1257 					      NULL);
1258 	if (!subflow_ops->slab)
1259 		return -ENOMEM;
1260 
1261 	subflow_ops->destructor = subflow_req_destructor;
1262 
1263 	return 0;
1264 }
1265 
1266 void __init mptcp_subflow_init(void)
1267 {
1268 	subflow_request_sock_ops = tcp_request_sock_ops;
1269 	if (subflow_ops_init(&subflow_request_sock_ops) != 0)
1270 		panic("MPTCP: failed to init subflow request sock ops\n");
1271 
1272 	subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
1273 	subflow_request_sock_ipv4_ops.init_req = subflow_v4_init_req;
1274 
1275 	subflow_specific = ipv4_specific;
1276 	subflow_specific.conn_request = subflow_v4_conn_request;
1277 	subflow_specific.syn_recv_sock = subflow_syn_recv_sock;
1278 	subflow_specific.sk_rx_dst_set = subflow_finish_connect;
1279 
1280 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
1281 	subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops;
1282 	subflow_request_sock_ipv6_ops.init_req = subflow_v6_init_req;
1283 
1284 	subflow_v6_specific = ipv6_specific;
1285 	subflow_v6_specific.conn_request = subflow_v6_conn_request;
1286 	subflow_v6_specific.syn_recv_sock = subflow_syn_recv_sock;
1287 	subflow_v6_specific.sk_rx_dst_set = subflow_finish_connect;
1288 
1289 	subflow_v6m_specific = subflow_v6_specific;
1290 	subflow_v6m_specific.queue_xmit = ipv4_specific.queue_xmit;
1291 	subflow_v6m_specific.send_check = ipv4_specific.send_check;
1292 	subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len;
1293 	subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced;
1294 	subflow_v6m_specific.net_frag_header_len = 0;
1295 #endif
1296 
1297 	mptcp_diag_subflow_init(&subflow_ulp_ops);
1298 
1299 	if (tcp_register_ulp(&subflow_ulp_ops) != 0)
1300 		panic("MPTCP: failed to register subflows to ULP\n");
1301 }
1302