1 // SPDX-License-Identifier: GPL-2.0 2 /* OpenVPN data channel offload 3 * 4 * Copyright (C) 2019-2025 OpenVPN, Inc. 5 * 6 * Author: Antonio Quartulli <antonio@openvpn.net> 7 */ 8 9 #include <linux/skbuff.h> 10 #include <net/hotdata.h> 11 #include <net/inet_common.h> 12 #include <net/ipv6.h> 13 #include <net/tcp.h> 14 #include <net/transp_v6.h> 15 #include <net/route.h> 16 #include <trace/events/sock.h> 17 18 #include "ovpnpriv.h" 19 #include "main.h" 20 #include "io.h" 21 #include "peer.h" 22 #include "proto.h" 23 #include "skb.h" 24 #include "tcp.h" 25 26 #define OVPN_TCP_DEPTH_NESTING 2 27 #if OVPN_TCP_DEPTH_NESTING == SINGLE_DEPTH_NESTING 28 #error "OVPN TCP requires its own lockdep subclass" 29 #endif 30 31 static struct proto ovpn_tcp_prot __ro_after_init; 32 static struct proto_ops ovpn_tcp_ops __ro_after_init; 33 static struct proto ovpn_tcp6_prot __ro_after_init; 34 static struct proto_ops ovpn_tcp6_ops __ro_after_init; 35 36 static int ovpn_tcp_parse(struct strparser *strp, struct sk_buff *skb) 37 { 38 struct strp_msg *rxm = strp_msg(skb); 39 __be16 blen; 40 u16 len; 41 int err; 42 43 /* when packets are written to the TCP stream, they are prepended with 44 * two bytes indicating the actual packet size. 45 * Parse accordingly and return the actual size (including the size 46 * header) 47 */ 48 49 if (skb->len < rxm->offset + 2) 50 return 0; 51 52 err = skb_copy_bits(skb, rxm->offset, &blen, sizeof(blen)); 53 if (err < 0) 54 return err; 55 56 len = be16_to_cpu(blen); 57 if (len < 2) 58 return -EINVAL; 59 60 return len + 2; 61 } 62 63 /* queue skb for sending to userspace via recvmsg on the socket */ 64 static void ovpn_tcp_to_userspace(struct ovpn_peer *peer, struct sock *sk, 65 struct sk_buff *skb) 66 { 67 skb_set_owner_r(skb, sk); 68 memset(skb->cb, 0, sizeof(skb->cb)); 69 skb_queue_tail(&peer->tcp.user_queue, skb); 70 peer->tcp.sk_cb.sk_data_ready(sk); 71 } 72 73 static struct sk_buff *ovpn_tcp_skb_packet(const struct ovpn_peer *peer, 74 struct sk_buff *orig_skb, 75 const int pkt_len, const int pkt_off) 76 { 77 struct sk_buff *ovpn_skb; 78 int err; 79 80 /* create a new skb with only the content of the current packet */ 81 ovpn_skb = netdev_alloc_skb(peer->ovpn->dev, pkt_len); 82 if (unlikely(!ovpn_skb)) 83 goto err; 84 85 skb_copy_header(ovpn_skb, orig_skb); 86 err = skb_copy_bits(orig_skb, pkt_off, skb_put(ovpn_skb, pkt_len), 87 pkt_len); 88 if (unlikely(err)) { 89 net_warn_ratelimited("%s: skb_copy_bits failed for peer %u\n", 90 netdev_name(peer->ovpn->dev), peer->id); 91 kfree_skb(ovpn_skb); 92 goto err; 93 } 94 95 consume_skb(orig_skb); 96 return ovpn_skb; 97 err: 98 kfree_skb(orig_skb); 99 return NULL; 100 } 101 102 static void ovpn_tcp_rcv(struct strparser *strp, struct sk_buff *skb) 103 { 104 struct ovpn_peer *peer = container_of(strp, struct ovpn_peer, tcp.strp); 105 struct strp_msg *msg = strp_msg(skb); 106 int pkt_len = msg->full_len - 2; 107 u8 opcode; 108 109 /* we need at least 4 bytes of data in the packet 110 * to extract the opcode and the key ID later on 111 */ 112 if (unlikely(pkt_len < OVPN_OPCODE_SIZE)) { 113 net_warn_ratelimited("%s: packet too small to fetch opcode for peer %u\n", 114 netdev_name(peer->ovpn->dev), peer->id); 115 goto err; 116 } 117 118 /* extract the packet into a new skb */ 119 skb = ovpn_tcp_skb_packet(peer, skb, pkt_len, msg->offset + 2); 120 if (unlikely(!skb)) 121 goto err; 122 123 /* DATA_V2 packets are handled in kernel, the rest goes to user space */ 124 opcode = ovpn_opcode_from_skb(skb, 0); 125 if (unlikely(opcode != OVPN_DATA_V2)) { 126 if (opcode == OVPN_DATA_V1) { 127 net_warn_ratelimited("%s: DATA_V1 detected on the TCP stream\n", 128 netdev_name(peer->ovpn->dev)); 129 goto err; 130 } 131 132 /* The packet size header must be there when sending the packet 133 * to userspace, therefore we put it back 134 */ 135 *(__be16 *)__skb_push(skb, sizeof(u16)) = htons(pkt_len); 136 ovpn_tcp_to_userspace(peer, strp->sk, skb); 137 return; 138 } 139 140 /* hold reference to peer as required by ovpn_recv(). 141 * 142 * NOTE: in this context we should already be holding a reference to 143 * this peer, therefore ovpn_peer_hold() is not expected to fail 144 */ 145 if (WARN_ON(!ovpn_peer_hold(peer))) 146 goto err_nopeer; 147 148 ovpn_recv(peer, skb); 149 return; 150 err: 151 /* take reference for deferred peer deletion. should never fail */ 152 if (WARN_ON(!ovpn_peer_hold(peer))) 153 goto err_nopeer; 154 schedule_work(&peer->tcp.defer_del_work); 155 dev_dstats_rx_dropped(peer->ovpn->dev); 156 err_nopeer: 157 kfree_skb(skb); 158 } 159 160 static int ovpn_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, 161 int flags, int *addr_len) 162 { 163 int err = 0, off, copied = 0, ret; 164 struct ovpn_socket *sock; 165 struct ovpn_peer *peer; 166 struct sk_buff *skb; 167 168 rcu_read_lock(); 169 sock = rcu_dereference_sk_user_data(sk); 170 if (unlikely(!sock || !sock->peer || !ovpn_peer_hold(sock->peer))) { 171 rcu_read_unlock(); 172 return -EBADF; 173 } 174 peer = sock->peer; 175 rcu_read_unlock(); 176 177 skb = __skb_recv_datagram(sk, &peer->tcp.user_queue, flags, &off, &err); 178 if (!skb) { 179 if (err == -EAGAIN && sk->sk_shutdown & RCV_SHUTDOWN) { 180 ret = 0; 181 goto out; 182 } 183 ret = err; 184 goto out; 185 } 186 187 copied = len; 188 if (copied > skb->len) 189 copied = skb->len; 190 else if (copied < skb->len) 191 msg->msg_flags |= MSG_TRUNC; 192 193 err = skb_copy_datagram_msg(skb, 0, msg, copied); 194 if (unlikely(err)) { 195 kfree_skb(skb); 196 ret = err; 197 goto out; 198 } 199 200 if (flags & MSG_TRUNC) 201 copied = skb->len; 202 kfree_skb(skb); 203 ret = copied; 204 out: 205 ovpn_peer_put(peer); 206 return ret; 207 } 208 209 void ovpn_tcp_socket_detach(struct ovpn_socket *ovpn_sock) 210 { 211 struct ovpn_peer *peer = ovpn_sock->peer; 212 struct sock *sk = ovpn_sock->sk; 213 214 strp_stop(&peer->tcp.strp); 215 skb_queue_purge(&peer->tcp.user_queue); 216 217 /* restore CBs that were saved in ovpn_sock_set_tcp_cb() */ 218 sk->sk_data_ready = peer->tcp.sk_cb.sk_data_ready; 219 sk->sk_write_space = peer->tcp.sk_cb.sk_write_space; 220 sk->sk_prot = peer->tcp.sk_cb.prot; 221 222 /* tcp_close() may race this function and could set 223 * sk->sk_socket to NULL. It does so by invoking 224 * sock_orphan(), which holds sk_callback_lock before 225 * doing the assignment. 226 * 227 * For this reason we acquire the same lock to avoid 228 * sk_socket to disappear under our feet 229 */ 230 write_lock_bh(&sk->sk_callback_lock); 231 if (sk->sk_socket) 232 sk->sk_socket->ops = peer->tcp.sk_cb.ops; 233 write_unlock_bh(&sk->sk_callback_lock); 234 235 rcu_assign_sk_user_data(sk, NULL); 236 } 237 238 void ovpn_tcp_socket_wait_finish(struct ovpn_socket *sock) 239 { 240 struct ovpn_peer *peer = sock->peer; 241 242 /* NOTE: we don't wait for peer->tcp.defer_del_work to finish: 243 * either the worker is not running or this function 244 * was invoked by that worker. 245 */ 246 247 cancel_work_sync(&sock->tcp_tx_work); 248 strp_done(&peer->tcp.strp); 249 250 skb_queue_purge(&peer->tcp.out_queue); 251 kfree_skb(peer->tcp.out_msg.skb); 252 peer->tcp.out_msg.skb = NULL; 253 } 254 255 static void ovpn_tcp_send_sock(struct ovpn_peer *peer, struct sock *sk) 256 { 257 struct sk_buff *skb = peer->tcp.out_msg.skb; 258 int ret, flags; 259 260 if (!skb) 261 return; 262 263 if (peer->tcp.tx_in_progress) 264 return; 265 266 peer->tcp.tx_in_progress = true; 267 268 do { 269 flags = ovpn_skb_cb(skb)->nosignal ? MSG_NOSIGNAL : 0; 270 ret = skb_send_sock_locked_with_flags(sk, skb, 271 peer->tcp.out_msg.offset, 272 peer->tcp.out_msg.len, 273 flags); 274 if (unlikely(ret < 0)) { 275 if (ret == -EAGAIN) 276 goto out; 277 278 net_warn_ratelimited("%s: TCP error to peer %u: %d\n", 279 netdev_name(peer->ovpn->dev), 280 peer->id, ret); 281 282 /* in case of TCP error we can't recover the VPN 283 * stream therefore we abort the connection 284 */ 285 ovpn_peer_hold(peer); 286 schedule_work(&peer->tcp.defer_del_work); 287 288 /* we bail out immediately and keep tx_in_progress set 289 * to true. This way we prevent more TX attempts 290 * which would lead to more invocations of 291 * schedule_work() 292 */ 293 return; 294 } 295 296 peer->tcp.out_msg.len -= ret; 297 peer->tcp.out_msg.offset += ret; 298 } while (peer->tcp.out_msg.len > 0); 299 300 if (!peer->tcp.out_msg.len) { 301 preempt_disable(); 302 dev_dstats_tx_add(peer->ovpn->dev, skb->len); 303 preempt_enable(); 304 } 305 306 kfree_skb(peer->tcp.out_msg.skb); 307 peer->tcp.out_msg.skb = NULL; 308 peer->tcp.out_msg.len = 0; 309 peer->tcp.out_msg.offset = 0; 310 311 out: 312 peer->tcp.tx_in_progress = false; 313 } 314 315 void ovpn_tcp_tx_work(struct work_struct *work) 316 { 317 struct ovpn_socket *sock; 318 319 sock = container_of(work, struct ovpn_socket, tcp_tx_work); 320 321 lock_sock(sock->sk); 322 if (sock->peer) 323 ovpn_tcp_send_sock(sock->peer, sock->sk); 324 release_sock(sock->sk); 325 } 326 327 static void ovpn_tcp_send_sock_skb(struct ovpn_peer *peer, struct sock *sk, 328 struct sk_buff *skb) 329 { 330 if (peer->tcp.out_msg.skb) 331 ovpn_tcp_send_sock(peer, sk); 332 333 if (peer->tcp.out_msg.skb) { 334 dev_dstats_tx_dropped(peer->ovpn->dev); 335 kfree_skb(skb); 336 return; 337 } 338 339 peer->tcp.out_msg.skb = skb; 340 peer->tcp.out_msg.len = skb->len; 341 peer->tcp.out_msg.offset = 0; 342 ovpn_tcp_send_sock(peer, sk); 343 } 344 345 void ovpn_tcp_send_skb(struct ovpn_peer *peer, struct sock *sk, 346 struct sk_buff *skb) 347 { 348 u16 len = skb->len; 349 350 *(__be16 *)__skb_push(skb, sizeof(u16)) = htons(len); 351 352 spin_lock_nested(&sk->sk_lock.slock, OVPN_TCP_DEPTH_NESTING); 353 if (sock_owned_by_user(sk)) { 354 if (skb_queue_len(&peer->tcp.out_queue) >= 355 READ_ONCE(net_hotdata.max_backlog)) { 356 dev_dstats_tx_dropped(peer->ovpn->dev); 357 kfree_skb(skb); 358 goto unlock; 359 } 360 __skb_queue_tail(&peer->tcp.out_queue, skb); 361 } else { 362 ovpn_tcp_send_sock_skb(peer, sk, skb); 363 } 364 unlock: 365 spin_unlock(&sk->sk_lock.slock); 366 } 367 368 static void ovpn_tcp_release(struct sock *sk) 369 { 370 struct sk_buff_head queue; 371 struct ovpn_socket *sock; 372 struct ovpn_peer *peer; 373 struct sk_buff *skb; 374 375 rcu_read_lock(); 376 sock = rcu_dereference_sk_user_data(sk); 377 if (!sock) { 378 rcu_read_unlock(); 379 return; 380 } 381 382 peer = sock->peer; 383 384 /* during initialization this function is called before 385 * assigning sock->peer 386 */ 387 if (unlikely(!peer || !ovpn_peer_hold(peer))) { 388 rcu_read_unlock(); 389 return; 390 } 391 rcu_read_unlock(); 392 393 __skb_queue_head_init(&queue); 394 skb_queue_splice_init(&peer->tcp.out_queue, &queue); 395 396 while ((skb = __skb_dequeue(&queue))) 397 ovpn_tcp_send_sock_skb(peer, sk, skb); 398 399 peer->tcp.sk_cb.prot->release_cb(sk); 400 ovpn_peer_put(peer); 401 } 402 403 static int ovpn_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) 404 { 405 struct ovpn_socket *sock; 406 int ret, linear = PAGE_SIZE; 407 struct ovpn_peer *peer; 408 struct sk_buff *skb; 409 410 lock_sock(sk); 411 rcu_read_lock(); 412 sock = rcu_dereference_sk_user_data(sk); 413 if (unlikely(!sock || !sock->peer || !ovpn_peer_hold(sock->peer))) { 414 rcu_read_unlock(); 415 release_sock(sk); 416 return -EIO; 417 } 418 rcu_read_unlock(); 419 peer = sock->peer; 420 421 if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_NOSIGNAL)) { 422 ret = -EOPNOTSUPP; 423 goto peer_free; 424 } 425 426 if (peer->tcp.out_msg.skb) { 427 ret = -EAGAIN; 428 goto peer_free; 429 } 430 431 if (size < linear) 432 linear = size; 433 434 skb = sock_alloc_send_pskb(sk, linear, size - linear, 435 msg->msg_flags & MSG_DONTWAIT, &ret, 0); 436 if (!skb) { 437 net_err_ratelimited("%s: skb alloc failed: %d\n", 438 netdev_name(peer->ovpn->dev), ret); 439 goto peer_free; 440 } 441 442 skb_put(skb, linear); 443 skb->len = size; 444 skb->data_len = size - linear; 445 446 ret = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); 447 if (ret) { 448 kfree_skb(skb); 449 net_err_ratelimited("%s: skb copy from iter failed: %d\n", 450 netdev_name(peer->ovpn->dev), ret); 451 goto peer_free; 452 } 453 454 ovpn_skb_cb(skb)->nosignal = msg->msg_flags & MSG_NOSIGNAL; 455 ovpn_tcp_send_sock_skb(peer, sk, skb); 456 ret = size; 457 peer_free: 458 release_sock(sk); 459 ovpn_peer_put(peer); 460 return ret; 461 } 462 463 static int ovpn_tcp_disconnect(struct sock *sk, int flags) 464 { 465 return -EBUSY; 466 } 467 468 static void ovpn_tcp_data_ready(struct sock *sk) 469 { 470 struct ovpn_socket *sock; 471 472 trace_sk_data_ready(sk); 473 474 rcu_read_lock(); 475 sock = rcu_dereference_sk_user_data(sk); 476 if (likely(sock && sock->peer)) 477 strp_data_ready(&sock->peer->tcp.strp); 478 rcu_read_unlock(); 479 } 480 481 static void ovpn_tcp_write_space(struct sock *sk) 482 { 483 struct ovpn_socket *sock; 484 485 rcu_read_lock(); 486 sock = rcu_dereference_sk_user_data(sk); 487 if (likely(sock && sock->peer)) { 488 schedule_work(&sock->tcp_tx_work); 489 sock->peer->tcp.sk_cb.sk_write_space(sk); 490 } 491 rcu_read_unlock(); 492 } 493 494 static void ovpn_tcp_build_protos(struct proto *new_prot, 495 struct proto_ops *new_ops, 496 const struct proto *orig_prot, 497 const struct proto_ops *orig_ops); 498 499 static void ovpn_tcp_peer_del_work(struct work_struct *work) 500 { 501 struct ovpn_peer *peer = container_of(work, struct ovpn_peer, 502 tcp.defer_del_work); 503 504 ovpn_peer_del(peer, OVPN_DEL_PEER_REASON_TRANSPORT_ERROR); 505 ovpn_peer_put(peer); 506 } 507 508 /* Set TCP encapsulation callbacks */ 509 int ovpn_tcp_socket_attach(struct ovpn_socket *ovpn_sock, 510 struct ovpn_peer *peer) 511 { 512 struct strp_callbacks cb = { 513 .rcv_msg = ovpn_tcp_rcv, 514 .parse_msg = ovpn_tcp_parse, 515 }; 516 int ret; 517 518 /* make sure no pre-existing encapsulation handler exists */ 519 if (ovpn_sock->sk->sk_user_data) 520 return -EBUSY; 521 rcu_assign_sk_user_data(ovpn_sock->sk, ovpn_sock); 522 523 /* only a fully connected socket is expected. Connection should be 524 * handled in userspace 525 */ 526 if (ovpn_sock->sk->sk_state != TCP_ESTABLISHED) { 527 net_err_ratelimited("%s: provided TCP socket is not in ESTABLISHED state: %d\n", 528 netdev_name(peer->ovpn->dev), 529 ovpn_sock->sk->sk_state); 530 ret = -EINVAL; 531 goto err; 532 } 533 534 ret = strp_init(&peer->tcp.strp, ovpn_sock->sk, &cb); 535 if (ret < 0) { 536 DEBUG_NET_WARN_ON_ONCE(1); 537 goto err; 538 } 539 540 INIT_WORK(&peer->tcp.defer_del_work, ovpn_tcp_peer_del_work); 541 542 __sk_dst_reset(ovpn_sock->sk); 543 skb_queue_head_init(&peer->tcp.user_queue); 544 skb_queue_head_init(&peer->tcp.out_queue); 545 546 /* save current CBs so that they can be restored upon socket release */ 547 peer->tcp.sk_cb.sk_data_ready = ovpn_sock->sk->sk_data_ready; 548 peer->tcp.sk_cb.sk_write_space = ovpn_sock->sk->sk_write_space; 549 peer->tcp.sk_cb.prot = ovpn_sock->sk->sk_prot; 550 peer->tcp.sk_cb.ops = ovpn_sock->sk->sk_socket->ops; 551 552 /* assign our static CBs and prot/ops */ 553 ovpn_sock->sk->sk_data_ready = ovpn_tcp_data_ready; 554 ovpn_sock->sk->sk_write_space = ovpn_tcp_write_space; 555 556 if (ovpn_sock->sk->sk_family == AF_INET) { 557 ovpn_sock->sk->sk_prot = &ovpn_tcp_prot; 558 ovpn_sock->sk->sk_socket->ops = &ovpn_tcp_ops; 559 } else { 560 ovpn_sock->sk->sk_prot = &ovpn_tcp6_prot; 561 ovpn_sock->sk->sk_socket->ops = &ovpn_tcp6_ops; 562 } 563 564 /* avoid using task_frag */ 565 ovpn_sock->sk->sk_allocation = GFP_ATOMIC; 566 ovpn_sock->sk->sk_use_task_frag = false; 567 568 /* enqueue the RX worker */ 569 strp_check_rcv(&peer->tcp.strp); 570 571 return 0; 572 err: 573 rcu_assign_sk_user_data(ovpn_sock->sk, NULL); 574 return ret; 575 } 576 577 static void ovpn_tcp_close(struct sock *sk, long timeout) 578 { 579 struct ovpn_socket *sock; 580 struct ovpn_peer *peer; 581 582 rcu_read_lock(); 583 sock = rcu_dereference_sk_user_data(sk); 584 if (!sock || !sock->peer || !ovpn_peer_hold(sock->peer)) { 585 rcu_read_unlock(); 586 return; 587 } 588 peer = sock->peer; 589 rcu_read_unlock(); 590 591 ovpn_peer_del(sock->peer, OVPN_DEL_PEER_REASON_TRANSPORT_DISCONNECT); 592 peer->tcp.sk_cb.prot->close(sk, timeout); 593 ovpn_peer_put(peer); 594 } 595 596 static __poll_t ovpn_tcp_poll(struct file *file, struct socket *sock, 597 poll_table *wait) 598 { 599 struct sk_buff_head *queue = &sock->sk->sk_receive_queue; 600 struct ovpn_socket *ovpn_sock; 601 struct ovpn_peer *peer = NULL; 602 __poll_t mask; 603 604 rcu_read_lock(); 605 ovpn_sock = rcu_dereference_sk_user_data(sock->sk); 606 /* if we landed in this callback, we expect to have a 607 * meaningful state. The ovpn_socket lifecycle would 608 * prevent it otherwise. 609 */ 610 if (WARN(!ovpn_sock || !ovpn_sock->peer, 611 "ovpn: null state in ovpn_tcp_poll!")) { 612 rcu_read_unlock(); 613 return 0; 614 } 615 616 if (ovpn_peer_hold(ovpn_sock->peer)) { 617 peer = ovpn_sock->peer; 618 queue = &peer->tcp.user_queue; 619 } 620 rcu_read_unlock(); 621 622 mask = datagram_poll_queue(file, sock, wait, queue); 623 624 if (peer) 625 ovpn_peer_put(peer); 626 627 return mask; 628 } 629 630 static void ovpn_tcp_build_protos(struct proto *new_prot, 631 struct proto_ops *new_ops, 632 const struct proto *orig_prot, 633 const struct proto_ops *orig_ops) 634 { 635 memcpy(new_prot, orig_prot, sizeof(*new_prot)); 636 memcpy(new_ops, orig_ops, sizeof(*new_ops)); 637 new_prot->recvmsg = ovpn_tcp_recvmsg; 638 new_prot->sendmsg = ovpn_tcp_sendmsg; 639 new_prot->disconnect = ovpn_tcp_disconnect; 640 new_prot->close = ovpn_tcp_close; 641 new_prot->release_cb = ovpn_tcp_release; 642 new_ops->poll = ovpn_tcp_poll; 643 } 644 645 /* Initialize TCP static objects */ 646 void __init ovpn_tcp_init(void) 647 { 648 ovpn_tcp_build_protos(&ovpn_tcp_prot, &ovpn_tcp_ops, &tcp_prot, 649 &inet_stream_ops); 650 651 #if IS_ENABLED(CONFIG_IPV6) 652 ovpn_tcp_build_protos(&ovpn_tcp6_prot, &ovpn_tcp6_ops, &tcpv6_prot, 653 &inet6_stream_ops); 654 #endif 655 } 656