1 // SPDX-License-Identifier: GPL-2.0
2 /* OpenVPN data channel offload
3 *
4 * Copyright (C) 2019-2025 OpenVPN, Inc.
5 *
6 * Author: Antonio Quartulli <antonio@openvpn.net>
7 */
8
9 #include <linux/skbuff.h>
10 #include <net/hotdata.h>
11 #include <net/inet_common.h>
12 #include <net/ipv6.h>
13 #include <net/tcp.h>
14 #include <net/transp_v6.h>
15 #include <net/route.h>
16 #include <trace/events/sock.h>
17
18 #include "ovpnpriv.h"
19 #include "main.h"
20 #include "io.h"
21 #include "peer.h"
22 #include "proto.h"
23 #include "skb.h"
24 #include "tcp.h"
25
26 #define OVPN_TCP_DEPTH_NESTING 2
27 #if OVPN_TCP_DEPTH_NESTING == SINGLE_DEPTH_NESTING
28 #error "OVPN TCP requires its own lockdep subclass"
29 #endif
30
31 static struct proto ovpn_tcp_prot __ro_after_init;
32 static struct proto_ops ovpn_tcp_ops __ro_after_init;
33 static struct proto ovpn_tcp6_prot __ro_after_init;
34 static struct proto_ops ovpn_tcp6_ops __ro_after_init;
35
ovpn_tcp_parse(struct strparser * strp,struct sk_buff * skb)36 static int ovpn_tcp_parse(struct strparser *strp, struct sk_buff *skb)
37 {
38 struct strp_msg *rxm = strp_msg(skb);
39 __be16 blen;
40 u16 len;
41 int err;
42
43 /* when packets are written to the TCP stream, they are prepended with
44 * two bytes indicating the actual packet size.
45 * Parse accordingly and return the actual size (including the size
46 * header)
47 */
48
49 if (skb->len < rxm->offset + 2)
50 return 0;
51
52 err = skb_copy_bits(skb, rxm->offset, &blen, sizeof(blen));
53 if (err < 0)
54 return err;
55
56 len = be16_to_cpu(blen);
57 if (len < 2)
58 return -EINVAL;
59
60 return len + 2;
61 }
62
63 /* queue skb for sending to userspace via recvmsg on the socket */
ovpn_tcp_to_userspace(struct ovpn_peer * peer,struct sock * sk,struct sk_buff * skb)64 static void ovpn_tcp_to_userspace(struct ovpn_peer *peer, struct sock *sk,
65 struct sk_buff *skb)
66 {
67 skb_set_owner_r(skb, sk);
68 memset(skb->cb, 0, sizeof(skb->cb));
69 skb_queue_tail(&peer->tcp.user_queue, skb);
70 peer->tcp.sk_cb.sk_data_ready(sk);
71 }
72
ovpn_tcp_rcv(struct strparser * strp,struct sk_buff * skb)73 static void ovpn_tcp_rcv(struct strparser *strp, struct sk_buff *skb)
74 {
75 struct ovpn_peer *peer = container_of(strp, struct ovpn_peer, tcp.strp);
76 struct strp_msg *msg = strp_msg(skb);
77 size_t pkt_len = msg->full_len - 2;
78 size_t off = msg->offset + 2;
79 u8 opcode;
80
81 /* ensure skb->data points to the beginning of the openvpn packet */
82 if (!pskb_pull(skb, off)) {
83 net_warn_ratelimited("%s: packet too small for peer %u\n",
84 netdev_name(peer->ovpn->dev), peer->id);
85 goto err;
86 }
87
88 /* strparser does not trim the skb for us, therefore we do it now */
89 if (pskb_trim(skb, pkt_len) != 0) {
90 net_warn_ratelimited("%s: trimming skb failed for peer %u\n",
91 netdev_name(peer->ovpn->dev), peer->id);
92 goto err;
93 }
94
95 /* we need the first 4 bytes of data to be accessible
96 * to extract the opcode and the key ID later on
97 */
98 if (!pskb_may_pull(skb, OVPN_OPCODE_SIZE)) {
99 net_warn_ratelimited("%s: packet too small to fetch opcode for peer %u\n",
100 netdev_name(peer->ovpn->dev), peer->id);
101 goto err;
102 }
103
104 /* DATA_V2 packets are handled in kernel, the rest goes to user space */
105 opcode = ovpn_opcode_from_skb(skb, 0);
106 if (unlikely(opcode != OVPN_DATA_V2)) {
107 if (opcode == OVPN_DATA_V1) {
108 net_warn_ratelimited("%s: DATA_V1 detected on the TCP stream\n",
109 netdev_name(peer->ovpn->dev));
110 goto err;
111 }
112
113 /* The packet size header must be there when sending the packet
114 * to userspace, therefore we put it back
115 */
116 skb_push(skb, 2);
117 ovpn_tcp_to_userspace(peer, strp->sk, skb);
118 return;
119 }
120
121 /* hold reference to peer as required by ovpn_recv().
122 *
123 * NOTE: in this context we should already be holding a reference to
124 * this peer, therefore ovpn_peer_hold() is not expected to fail
125 */
126 if (WARN_ON(!ovpn_peer_hold(peer)))
127 goto err_nopeer;
128
129 ovpn_recv(peer, skb);
130 return;
131 err:
132 /* take reference for deferred peer deletion. should never fail */
133 if (WARN_ON(!ovpn_peer_hold(peer)))
134 goto err_nopeer;
135 schedule_work(&peer->tcp.defer_del_work);
136 dev_dstats_rx_dropped(peer->ovpn->dev);
137 err_nopeer:
138 kfree_skb(skb);
139 }
140
ovpn_tcp_recvmsg(struct sock * sk,struct msghdr * msg,size_t len,int flags,int * addr_len)141 static int ovpn_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
142 int flags, int *addr_len)
143 {
144 int err = 0, off, copied = 0, ret;
145 struct ovpn_socket *sock;
146 struct ovpn_peer *peer;
147 struct sk_buff *skb;
148
149 rcu_read_lock();
150 sock = rcu_dereference_sk_user_data(sk);
151 if (unlikely(!sock || !sock->peer || !ovpn_peer_hold(sock->peer))) {
152 rcu_read_unlock();
153 return -EBADF;
154 }
155 peer = sock->peer;
156 rcu_read_unlock();
157
158 skb = __skb_recv_datagram(sk, &peer->tcp.user_queue, flags, &off, &err);
159 if (!skb) {
160 if (err == -EAGAIN && sk->sk_shutdown & RCV_SHUTDOWN) {
161 ret = 0;
162 goto out;
163 }
164 ret = err;
165 goto out;
166 }
167
168 copied = len;
169 if (copied > skb->len)
170 copied = skb->len;
171 else if (copied < skb->len)
172 msg->msg_flags |= MSG_TRUNC;
173
174 err = skb_copy_datagram_msg(skb, 0, msg, copied);
175 if (unlikely(err)) {
176 kfree_skb(skb);
177 ret = err;
178 goto out;
179 }
180
181 if (flags & MSG_TRUNC)
182 copied = skb->len;
183 kfree_skb(skb);
184 ret = copied;
185 out:
186 ovpn_peer_put(peer);
187 return ret;
188 }
189
ovpn_tcp_socket_detach(struct ovpn_socket * ovpn_sock)190 void ovpn_tcp_socket_detach(struct ovpn_socket *ovpn_sock)
191 {
192 struct ovpn_peer *peer = ovpn_sock->peer;
193 struct sock *sk = ovpn_sock->sk;
194
195 strp_stop(&peer->tcp.strp);
196 skb_queue_purge(&peer->tcp.user_queue);
197
198 /* restore CBs that were saved in ovpn_sock_set_tcp_cb() */
199 sk->sk_data_ready = peer->tcp.sk_cb.sk_data_ready;
200 sk->sk_write_space = peer->tcp.sk_cb.sk_write_space;
201 sk->sk_prot = peer->tcp.sk_cb.prot;
202 sk->sk_socket->ops = peer->tcp.sk_cb.ops;
203
204 rcu_assign_sk_user_data(sk, NULL);
205 }
206
ovpn_tcp_socket_wait_finish(struct ovpn_socket * sock)207 void ovpn_tcp_socket_wait_finish(struct ovpn_socket *sock)
208 {
209 struct ovpn_peer *peer = sock->peer;
210
211 /* NOTE: we don't wait for peer->tcp.defer_del_work to finish:
212 * either the worker is not running or this function
213 * was invoked by that worker.
214 */
215
216 cancel_work_sync(&sock->tcp_tx_work);
217 strp_done(&peer->tcp.strp);
218
219 skb_queue_purge(&peer->tcp.out_queue);
220 kfree_skb(peer->tcp.out_msg.skb);
221 peer->tcp.out_msg.skb = NULL;
222 }
223
ovpn_tcp_send_sock(struct ovpn_peer * peer,struct sock * sk)224 static void ovpn_tcp_send_sock(struct ovpn_peer *peer, struct sock *sk)
225 {
226 struct sk_buff *skb = peer->tcp.out_msg.skb;
227 int ret, flags;
228
229 if (!skb)
230 return;
231
232 if (peer->tcp.tx_in_progress)
233 return;
234
235 peer->tcp.tx_in_progress = true;
236
237 do {
238 flags = ovpn_skb_cb(skb)->nosignal ? MSG_NOSIGNAL : 0;
239 ret = skb_send_sock_locked_with_flags(sk, skb,
240 peer->tcp.out_msg.offset,
241 peer->tcp.out_msg.len,
242 flags);
243 if (unlikely(ret < 0)) {
244 if (ret == -EAGAIN)
245 goto out;
246
247 net_warn_ratelimited("%s: TCP error to peer %u: %d\n",
248 netdev_name(peer->ovpn->dev),
249 peer->id, ret);
250
251 /* in case of TCP error we can't recover the VPN
252 * stream therefore we abort the connection
253 */
254 ovpn_peer_hold(peer);
255 schedule_work(&peer->tcp.defer_del_work);
256
257 /* we bail out immediately and keep tx_in_progress set
258 * to true. This way we prevent more TX attempts
259 * which would lead to more invocations of
260 * schedule_work()
261 */
262 return;
263 }
264
265 peer->tcp.out_msg.len -= ret;
266 peer->tcp.out_msg.offset += ret;
267 } while (peer->tcp.out_msg.len > 0);
268
269 if (!peer->tcp.out_msg.len) {
270 preempt_disable();
271 dev_dstats_tx_add(peer->ovpn->dev, skb->len);
272 preempt_enable();
273 }
274
275 kfree_skb(peer->tcp.out_msg.skb);
276 peer->tcp.out_msg.skb = NULL;
277 peer->tcp.out_msg.len = 0;
278 peer->tcp.out_msg.offset = 0;
279
280 out:
281 peer->tcp.tx_in_progress = false;
282 }
283
ovpn_tcp_tx_work(struct work_struct * work)284 void ovpn_tcp_tx_work(struct work_struct *work)
285 {
286 struct ovpn_socket *sock;
287
288 sock = container_of(work, struct ovpn_socket, tcp_tx_work);
289
290 lock_sock(sock->sk);
291 if (sock->peer)
292 ovpn_tcp_send_sock(sock->peer, sock->sk);
293 release_sock(sock->sk);
294 }
295
ovpn_tcp_send_sock_skb(struct ovpn_peer * peer,struct sock * sk,struct sk_buff * skb)296 static void ovpn_tcp_send_sock_skb(struct ovpn_peer *peer, struct sock *sk,
297 struct sk_buff *skb)
298 {
299 if (peer->tcp.out_msg.skb)
300 ovpn_tcp_send_sock(peer, sk);
301
302 if (peer->tcp.out_msg.skb) {
303 dev_dstats_tx_dropped(peer->ovpn->dev);
304 kfree_skb(skb);
305 return;
306 }
307
308 peer->tcp.out_msg.skb = skb;
309 peer->tcp.out_msg.len = skb->len;
310 peer->tcp.out_msg.offset = 0;
311 ovpn_tcp_send_sock(peer, sk);
312 }
313
ovpn_tcp_send_skb(struct ovpn_peer * peer,struct sock * sk,struct sk_buff * skb)314 void ovpn_tcp_send_skb(struct ovpn_peer *peer, struct sock *sk,
315 struct sk_buff *skb)
316 {
317 u16 len = skb->len;
318
319 *(__be16 *)__skb_push(skb, sizeof(u16)) = htons(len);
320
321 spin_lock_nested(&sk->sk_lock.slock, OVPN_TCP_DEPTH_NESTING);
322 if (sock_owned_by_user(sk)) {
323 if (skb_queue_len(&peer->tcp.out_queue) >=
324 READ_ONCE(net_hotdata.max_backlog)) {
325 dev_dstats_tx_dropped(peer->ovpn->dev);
326 kfree_skb(skb);
327 goto unlock;
328 }
329 __skb_queue_tail(&peer->tcp.out_queue, skb);
330 } else {
331 ovpn_tcp_send_sock_skb(peer, sk, skb);
332 }
333 unlock:
334 spin_unlock(&sk->sk_lock.slock);
335 }
336
ovpn_tcp_release(struct sock * sk)337 static void ovpn_tcp_release(struct sock *sk)
338 {
339 struct sk_buff_head queue;
340 struct ovpn_socket *sock;
341 struct ovpn_peer *peer;
342 struct sk_buff *skb;
343
344 rcu_read_lock();
345 sock = rcu_dereference_sk_user_data(sk);
346 if (!sock) {
347 rcu_read_unlock();
348 return;
349 }
350
351 peer = sock->peer;
352
353 /* during initialization this function is called before
354 * assigning sock->peer
355 */
356 if (unlikely(!peer || !ovpn_peer_hold(peer))) {
357 rcu_read_unlock();
358 return;
359 }
360 rcu_read_unlock();
361
362 __skb_queue_head_init(&queue);
363 skb_queue_splice_init(&peer->tcp.out_queue, &queue);
364
365 while ((skb = __skb_dequeue(&queue)))
366 ovpn_tcp_send_sock_skb(peer, sk, skb);
367
368 peer->tcp.sk_cb.prot->release_cb(sk);
369 ovpn_peer_put(peer);
370 }
371
ovpn_tcp_sendmsg(struct sock * sk,struct msghdr * msg,size_t size)372 static int ovpn_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
373 {
374 struct ovpn_socket *sock;
375 int ret, linear = PAGE_SIZE;
376 struct ovpn_peer *peer;
377 struct sk_buff *skb;
378
379 lock_sock(sk);
380 rcu_read_lock();
381 sock = rcu_dereference_sk_user_data(sk);
382 if (unlikely(!sock || !sock->peer || !ovpn_peer_hold(sock->peer))) {
383 rcu_read_unlock();
384 release_sock(sk);
385 return -EIO;
386 }
387 rcu_read_unlock();
388 peer = sock->peer;
389
390 if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_NOSIGNAL)) {
391 ret = -EOPNOTSUPP;
392 goto peer_free;
393 }
394
395 if (peer->tcp.out_msg.skb) {
396 ret = -EAGAIN;
397 goto peer_free;
398 }
399
400 if (size < linear)
401 linear = size;
402
403 skb = sock_alloc_send_pskb(sk, linear, size - linear,
404 msg->msg_flags & MSG_DONTWAIT, &ret, 0);
405 if (!skb) {
406 net_err_ratelimited("%s: skb alloc failed: %d\n",
407 netdev_name(peer->ovpn->dev), ret);
408 goto peer_free;
409 }
410
411 skb_put(skb, linear);
412 skb->len = size;
413 skb->data_len = size - linear;
414
415 ret = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
416 if (ret) {
417 kfree_skb(skb);
418 net_err_ratelimited("%s: skb copy from iter failed: %d\n",
419 netdev_name(peer->ovpn->dev), ret);
420 goto peer_free;
421 }
422
423 ovpn_skb_cb(skb)->nosignal = msg->msg_flags & MSG_NOSIGNAL;
424 ovpn_tcp_send_sock_skb(peer, sk, skb);
425 ret = size;
426 peer_free:
427 release_sock(sk);
428 ovpn_peer_put(peer);
429 return ret;
430 }
431
ovpn_tcp_disconnect(struct sock * sk,int flags)432 static int ovpn_tcp_disconnect(struct sock *sk, int flags)
433 {
434 return -EBUSY;
435 }
436
ovpn_tcp_data_ready(struct sock * sk)437 static void ovpn_tcp_data_ready(struct sock *sk)
438 {
439 struct ovpn_socket *sock;
440
441 trace_sk_data_ready(sk);
442
443 rcu_read_lock();
444 sock = rcu_dereference_sk_user_data(sk);
445 if (likely(sock && sock->peer))
446 strp_data_ready(&sock->peer->tcp.strp);
447 rcu_read_unlock();
448 }
449
ovpn_tcp_write_space(struct sock * sk)450 static void ovpn_tcp_write_space(struct sock *sk)
451 {
452 struct ovpn_socket *sock;
453
454 rcu_read_lock();
455 sock = rcu_dereference_sk_user_data(sk);
456 if (likely(sock && sock->peer)) {
457 schedule_work(&sock->tcp_tx_work);
458 sock->peer->tcp.sk_cb.sk_write_space(sk);
459 }
460 rcu_read_unlock();
461 }
462
463 static void ovpn_tcp_build_protos(struct proto *new_prot,
464 struct proto_ops *new_ops,
465 const struct proto *orig_prot,
466 const struct proto_ops *orig_ops);
467
ovpn_tcp_peer_del_work(struct work_struct * work)468 static void ovpn_tcp_peer_del_work(struct work_struct *work)
469 {
470 struct ovpn_peer *peer = container_of(work, struct ovpn_peer,
471 tcp.defer_del_work);
472
473 ovpn_peer_del(peer, OVPN_DEL_PEER_REASON_TRANSPORT_ERROR);
474 ovpn_peer_put(peer);
475 }
476
477 /* Set TCP encapsulation callbacks */
ovpn_tcp_socket_attach(struct ovpn_socket * ovpn_sock,struct ovpn_peer * peer)478 int ovpn_tcp_socket_attach(struct ovpn_socket *ovpn_sock,
479 struct ovpn_peer *peer)
480 {
481 struct strp_callbacks cb = {
482 .rcv_msg = ovpn_tcp_rcv,
483 .parse_msg = ovpn_tcp_parse,
484 };
485 int ret;
486
487 /* make sure no pre-existing encapsulation handler exists */
488 if (ovpn_sock->sk->sk_user_data)
489 return -EBUSY;
490
491 /* only a fully connected socket is expected. Connection should be
492 * handled in userspace
493 */
494 if (ovpn_sock->sk->sk_state != TCP_ESTABLISHED) {
495 net_err_ratelimited("%s: provided TCP socket is not in ESTABLISHED state: %d\n",
496 netdev_name(peer->ovpn->dev),
497 ovpn_sock->sk->sk_state);
498 return -EINVAL;
499 }
500
501 ret = strp_init(&peer->tcp.strp, ovpn_sock->sk, &cb);
502 if (ret < 0) {
503 DEBUG_NET_WARN_ON_ONCE(1);
504 return ret;
505 }
506
507 INIT_WORK(&peer->tcp.defer_del_work, ovpn_tcp_peer_del_work);
508
509 __sk_dst_reset(ovpn_sock->sk);
510 skb_queue_head_init(&peer->tcp.user_queue);
511 skb_queue_head_init(&peer->tcp.out_queue);
512
513 /* save current CBs so that they can be restored upon socket release */
514 peer->tcp.sk_cb.sk_data_ready = ovpn_sock->sk->sk_data_ready;
515 peer->tcp.sk_cb.sk_write_space = ovpn_sock->sk->sk_write_space;
516 peer->tcp.sk_cb.prot = ovpn_sock->sk->sk_prot;
517 peer->tcp.sk_cb.ops = ovpn_sock->sk->sk_socket->ops;
518
519 /* assign our static CBs and prot/ops */
520 ovpn_sock->sk->sk_data_ready = ovpn_tcp_data_ready;
521 ovpn_sock->sk->sk_write_space = ovpn_tcp_write_space;
522
523 if (ovpn_sock->sk->sk_family == AF_INET) {
524 ovpn_sock->sk->sk_prot = &ovpn_tcp_prot;
525 ovpn_sock->sk->sk_socket->ops = &ovpn_tcp_ops;
526 } else {
527 ovpn_sock->sk->sk_prot = &ovpn_tcp6_prot;
528 ovpn_sock->sk->sk_socket->ops = &ovpn_tcp6_ops;
529 }
530
531 /* avoid using task_frag */
532 ovpn_sock->sk->sk_allocation = GFP_ATOMIC;
533 ovpn_sock->sk->sk_use_task_frag = false;
534
535 /* enqueue the RX worker */
536 strp_check_rcv(&peer->tcp.strp);
537
538 return 0;
539 }
540
ovpn_tcp_close(struct sock * sk,long timeout)541 static void ovpn_tcp_close(struct sock *sk, long timeout)
542 {
543 struct ovpn_socket *sock;
544 struct ovpn_peer *peer;
545
546 rcu_read_lock();
547 sock = rcu_dereference_sk_user_data(sk);
548 if (!sock || !sock->peer || !ovpn_peer_hold(sock->peer)) {
549 rcu_read_unlock();
550 return;
551 }
552 peer = sock->peer;
553 rcu_read_unlock();
554
555 ovpn_peer_del(sock->peer, OVPN_DEL_PEER_REASON_TRANSPORT_DISCONNECT);
556 peer->tcp.sk_cb.prot->close(sk, timeout);
557 ovpn_peer_put(peer);
558 }
559
ovpn_tcp_poll(struct file * file,struct socket * sock,poll_table * wait)560 static __poll_t ovpn_tcp_poll(struct file *file, struct socket *sock,
561 poll_table *wait)
562 {
563 __poll_t mask = datagram_poll(file, sock, wait);
564 struct ovpn_socket *ovpn_sock;
565
566 rcu_read_lock();
567 ovpn_sock = rcu_dereference_sk_user_data(sock->sk);
568 if (ovpn_sock && ovpn_sock->peer &&
569 !skb_queue_empty(&ovpn_sock->peer->tcp.user_queue))
570 mask |= EPOLLIN | EPOLLRDNORM;
571 rcu_read_unlock();
572
573 return mask;
574 }
575
ovpn_tcp_build_protos(struct proto * new_prot,struct proto_ops * new_ops,const struct proto * orig_prot,const struct proto_ops * orig_ops)576 static void ovpn_tcp_build_protos(struct proto *new_prot,
577 struct proto_ops *new_ops,
578 const struct proto *orig_prot,
579 const struct proto_ops *orig_ops)
580 {
581 memcpy(new_prot, orig_prot, sizeof(*new_prot));
582 memcpy(new_ops, orig_ops, sizeof(*new_ops));
583 new_prot->recvmsg = ovpn_tcp_recvmsg;
584 new_prot->sendmsg = ovpn_tcp_sendmsg;
585 new_prot->disconnect = ovpn_tcp_disconnect;
586 new_prot->close = ovpn_tcp_close;
587 new_prot->release_cb = ovpn_tcp_release;
588 new_ops->poll = ovpn_tcp_poll;
589 }
590
591 /* Initialize TCP static objects */
ovpn_tcp_init(void)592 void __init ovpn_tcp_init(void)
593 {
594 ovpn_tcp_build_protos(&ovpn_tcp_prot, &ovpn_tcp_ops, &tcp_prot,
595 &inet_stream_ops);
596
597 #if IS_ENABLED(CONFIG_IPV6)
598 ovpn_tcp_build_protos(&ovpn_tcp6_prot, &ovpn_tcp6_ops, &tcpv6_prot,
599 &inet6_stream_ops);
600 #endif
601 }
602