1 // SPDX-License-Identifier: GPL-2.0 2 /* OpenVPN data channel offload 3 * 4 * Copyright (C) 2019-2025 OpenVPN, Inc. 5 * 6 * Author: Antonio Quartulli <antonio@openvpn.net> 7 */ 8 9 #include <linux/skbuff.h> 10 #include <net/hotdata.h> 11 #include <net/inet_common.h> 12 #include <net/ipv6.h> 13 #include <net/tcp.h> 14 #include <net/transp_v6.h> 15 #include <net/route.h> 16 #include <trace/events/sock.h> 17 18 #include "ovpnpriv.h" 19 #include "main.h" 20 #include "io.h" 21 #include "peer.h" 22 #include "proto.h" 23 #include "skb.h" 24 #include "tcp.h" 25 26 #define OVPN_TCP_DEPTH_NESTING 2 27 #if OVPN_TCP_DEPTH_NESTING == SINGLE_DEPTH_NESTING 28 #error "OVPN TCP requires its own lockdep subclass" 29 #endif 30 31 static struct proto ovpn_tcp_prot __ro_after_init; 32 static struct proto_ops ovpn_tcp_ops __ro_after_init; 33 static struct proto ovpn_tcp6_prot __ro_after_init; 34 static struct proto_ops ovpn_tcp6_ops __ro_after_init; 35 36 static int ovpn_tcp_parse(struct strparser *strp, struct sk_buff *skb) 37 { 38 struct strp_msg *rxm = strp_msg(skb); 39 __be16 blen; 40 u16 len; 41 int err; 42 43 /* when packets are written to the TCP stream, they are prepended with 44 * two bytes indicating the actual packet size. 45 * Parse accordingly and return the actual size (including the size 46 * header) 47 */ 48 49 if (skb->len < rxm->offset + 2) 50 return 0; 51 52 err = skb_copy_bits(skb, rxm->offset, &blen, sizeof(blen)); 53 if (err < 0) 54 return err; 55 56 len = be16_to_cpu(blen); 57 if (len < 2) 58 return -EINVAL; 59 60 return len + 2; 61 } 62 63 /* queue skb for sending to userspace via recvmsg on the socket */ 64 static void ovpn_tcp_to_userspace(struct ovpn_peer *peer, struct sock *sk, 65 struct sk_buff *skb) 66 { 67 skb_set_owner_r(skb, sk); 68 memset(skb->cb, 0, sizeof(skb->cb)); 69 skb_queue_tail(&peer->tcp.user_queue, skb); 70 peer->tcp.sk_cb.sk_data_ready(sk); 71 } 72 73 static void ovpn_tcp_rcv(struct strparser *strp, struct sk_buff *skb) 74 { 75 struct ovpn_peer *peer = container_of(strp, struct ovpn_peer, tcp.strp); 76 struct strp_msg *msg = strp_msg(skb); 77 size_t pkt_len = msg->full_len - 2; 78 size_t off = msg->offset + 2; 79 u8 opcode; 80 81 /* ensure skb->data points to the beginning of the openvpn packet */ 82 if (!pskb_pull(skb, off)) { 83 net_warn_ratelimited("%s: packet too small for peer %u\n", 84 netdev_name(peer->ovpn->dev), peer->id); 85 goto err; 86 } 87 88 /* strparser does not trim the skb for us, therefore we do it now */ 89 if (pskb_trim(skb, pkt_len) != 0) { 90 net_warn_ratelimited("%s: trimming skb failed for peer %u\n", 91 netdev_name(peer->ovpn->dev), peer->id); 92 goto err; 93 } 94 95 /* we need the first 4 bytes of data to be accessible 96 * to extract the opcode and the key ID later on 97 */ 98 if (!pskb_may_pull(skb, OVPN_OPCODE_SIZE)) { 99 net_warn_ratelimited("%s: packet too small to fetch opcode for peer %u\n", 100 netdev_name(peer->ovpn->dev), peer->id); 101 goto err; 102 } 103 104 /* DATA_V2 packets are handled in kernel, the rest goes to user space */ 105 opcode = ovpn_opcode_from_skb(skb, 0); 106 if (unlikely(opcode != OVPN_DATA_V2)) { 107 if (opcode == OVPN_DATA_V1) { 108 net_warn_ratelimited("%s: DATA_V1 detected on the TCP stream\n", 109 netdev_name(peer->ovpn->dev)); 110 goto err; 111 } 112 113 /* The packet size header must be there when sending the packet 114 * to userspace, therefore we put it back 115 */ 116 skb_push(skb, 2); 117 ovpn_tcp_to_userspace(peer, strp->sk, skb); 118 return; 119 } 120 121 /* hold reference to peer as required by ovpn_recv(). 122 * 123 * NOTE: in this context we should already be holding a reference to 124 * this peer, therefore ovpn_peer_hold() is not expected to fail 125 */ 126 if (WARN_ON(!ovpn_peer_hold(peer))) 127 goto err_nopeer; 128 129 ovpn_recv(peer, skb); 130 return; 131 err: 132 /* take reference for deferred peer deletion. should never fail */ 133 if (WARN_ON(!ovpn_peer_hold(peer))) 134 goto err_nopeer; 135 schedule_work(&peer->tcp.defer_del_work); 136 dev_dstats_rx_dropped(peer->ovpn->dev); 137 err_nopeer: 138 kfree_skb(skb); 139 } 140 141 static int ovpn_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, 142 int flags, int *addr_len) 143 { 144 int err = 0, off, copied = 0, ret; 145 struct ovpn_socket *sock; 146 struct ovpn_peer *peer; 147 struct sk_buff *skb; 148 149 rcu_read_lock(); 150 sock = rcu_dereference_sk_user_data(sk); 151 if (unlikely(!sock || !sock->peer || !ovpn_peer_hold(sock->peer))) { 152 rcu_read_unlock(); 153 return -EBADF; 154 } 155 peer = sock->peer; 156 rcu_read_unlock(); 157 158 skb = __skb_recv_datagram(sk, &peer->tcp.user_queue, flags, &off, &err); 159 if (!skb) { 160 if (err == -EAGAIN && sk->sk_shutdown & RCV_SHUTDOWN) { 161 ret = 0; 162 goto out; 163 } 164 ret = err; 165 goto out; 166 } 167 168 copied = len; 169 if (copied > skb->len) 170 copied = skb->len; 171 else if (copied < skb->len) 172 msg->msg_flags |= MSG_TRUNC; 173 174 err = skb_copy_datagram_msg(skb, 0, msg, copied); 175 if (unlikely(err)) { 176 kfree_skb(skb); 177 ret = err; 178 goto out; 179 } 180 181 if (flags & MSG_TRUNC) 182 copied = skb->len; 183 kfree_skb(skb); 184 ret = copied; 185 out: 186 ovpn_peer_put(peer); 187 return ret; 188 } 189 190 void ovpn_tcp_socket_detach(struct ovpn_socket *ovpn_sock) 191 { 192 struct ovpn_peer *peer = ovpn_sock->peer; 193 struct sock *sk = ovpn_sock->sk; 194 195 strp_stop(&peer->tcp.strp); 196 skb_queue_purge(&peer->tcp.user_queue); 197 198 /* restore CBs that were saved in ovpn_sock_set_tcp_cb() */ 199 sk->sk_data_ready = peer->tcp.sk_cb.sk_data_ready; 200 sk->sk_write_space = peer->tcp.sk_cb.sk_write_space; 201 sk->sk_prot = peer->tcp.sk_cb.prot; 202 203 /* tcp_close() may race this function and could set 204 * sk->sk_socket to NULL. It does so by invoking 205 * sock_orphan(), which holds sk_callback_lock before 206 * doing the assignment. 207 * 208 * For this reason we acquire the same lock to avoid 209 * sk_socket to disappear under our feet 210 */ 211 write_lock_bh(&sk->sk_callback_lock); 212 if (sk->sk_socket) 213 sk->sk_socket->ops = peer->tcp.sk_cb.ops; 214 write_unlock_bh(&sk->sk_callback_lock); 215 216 rcu_assign_sk_user_data(sk, NULL); 217 } 218 219 void ovpn_tcp_socket_wait_finish(struct ovpn_socket *sock) 220 { 221 struct ovpn_peer *peer = sock->peer; 222 223 /* NOTE: we don't wait for peer->tcp.defer_del_work to finish: 224 * either the worker is not running or this function 225 * was invoked by that worker. 226 */ 227 228 cancel_work_sync(&sock->tcp_tx_work); 229 strp_done(&peer->tcp.strp); 230 231 skb_queue_purge(&peer->tcp.out_queue); 232 kfree_skb(peer->tcp.out_msg.skb); 233 peer->tcp.out_msg.skb = NULL; 234 } 235 236 static void ovpn_tcp_send_sock(struct ovpn_peer *peer, struct sock *sk) 237 { 238 struct sk_buff *skb = peer->tcp.out_msg.skb; 239 int ret, flags; 240 241 if (!skb) 242 return; 243 244 if (peer->tcp.tx_in_progress) 245 return; 246 247 peer->tcp.tx_in_progress = true; 248 249 do { 250 flags = ovpn_skb_cb(skb)->nosignal ? MSG_NOSIGNAL : 0; 251 ret = skb_send_sock_locked_with_flags(sk, skb, 252 peer->tcp.out_msg.offset, 253 peer->tcp.out_msg.len, 254 flags); 255 if (unlikely(ret < 0)) { 256 if (ret == -EAGAIN) 257 goto out; 258 259 net_warn_ratelimited("%s: TCP error to peer %u: %d\n", 260 netdev_name(peer->ovpn->dev), 261 peer->id, ret); 262 263 /* in case of TCP error we can't recover the VPN 264 * stream therefore we abort the connection 265 */ 266 ovpn_peer_hold(peer); 267 schedule_work(&peer->tcp.defer_del_work); 268 269 /* we bail out immediately and keep tx_in_progress set 270 * to true. This way we prevent more TX attempts 271 * which would lead to more invocations of 272 * schedule_work() 273 */ 274 return; 275 } 276 277 peer->tcp.out_msg.len -= ret; 278 peer->tcp.out_msg.offset += ret; 279 } while (peer->tcp.out_msg.len > 0); 280 281 if (!peer->tcp.out_msg.len) { 282 preempt_disable(); 283 dev_dstats_tx_add(peer->ovpn->dev, skb->len); 284 preempt_enable(); 285 } 286 287 kfree_skb(peer->tcp.out_msg.skb); 288 peer->tcp.out_msg.skb = NULL; 289 peer->tcp.out_msg.len = 0; 290 peer->tcp.out_msg.offset = 0; 291 292 out: 293 peer->tcp.tx_in_progress = false; 294 } 295 296 void ovpn_tcp_tx_work(struct work_struct *work) 297 { 298 struct ovpn_socket *sock; 299 300 sock = container_of(work, struct ovpn_socket, tcp_tx_work); 301 302 lock_sock(sock->sk); 303 if (sock->peer) 304 ovpn_tcp_send_sock(sock->peer, sock->sk); 305 release_sock(sock->sk); 306 } 307 308 static void ovpn_tcp_send_sock_skb(struct ovpn_peer *peer, struct sock *sk, 309 struct sk_buff *skb) 310 { 311 if (peer->tcp.out_msg.skb) 312 ovpn_tcp_send_sock(peer, sk); 313 314 if (peer->tcp.out_msg.skb) { 315 dev_dstats_tx_dropped(peer->ovpn->dev); 316 kfree_skb(skb); 317 return; 318 } 319 320 peer->tcp.out_msg.skb = skb; 321 peer->tcp.out_msg.len = skb->len; 322 peer->tcp.out_msg.offset = 0; 323 ovpn_tcp_send_sock(peer, sk); 324 } 325 326 void ovpn_tcp_send_skb(struct ovpn_peer *peer, struct sock *sk, 327 struct sk_buff *skb) 328 { 329 u16 len = skb->len; 330 331 *(__be16 *)__skb_push(skb, sizeof(u16)) = htons(len); 332 333 spin_lock_nested(&sk->sk_lock.slock, OVPN_TCP_DEPTH_NESTING); 334 if (sock_owned_by_user(sk)) { 335 if (skb_queue_len(&peer->tcp.out_queue) >= 336 READ_ONCE(net_hotdata.max_backlog)) { 337 dev_dstats_tx_dropped(peer->ovpn->dev); 338 kfree_skb(skb); 339 goto unlock; 340 } 341 __skb_queue_tail(&peer->tcp.out_queue, skb); 342 } else { 343 ovpn_tcp_send_sock_skb(peer, sk, skb); 344 } 345 unlock: 346 spin_unlock(&sk->sk_lock.slock); 347 } 348 349 static void ovpn_tcp_release(struct sock *sk) 350 { 351 struct sk_buff_head queue; 352 struct ovpn_socket *sock; 353 struct ovpn_peer *peer; 354 struct sk_buff *skb; 355 356 rcu_read_lock(); 357 sock = rcu_dereference_sk_user_data(sk); 358 if (!sock) { 359 rcu_read_unlock(); 360 return; 361 } 362 363 peer = sock->peer; 364 365 /* during initialization this function is called before 366 * assigning sock->peer 367 */ 368 if (unlikely(!peer || !ovpn_peer_hold(peer))) { 369 rcu_read_unlock(); 370 return; 371 } 372 rcu_read_unlock(); 373 374 __skb_queue_head_init(&queue); 375 skb_queue_splice_init(&peer->tcp.out_queue, &queue); 376 377 while ((skb = __skb_dequeue(&queue))) 378 ovpn_tcp_send_sock_skb(peer, sk, skb); 379 380 peer->tcp.sk_cb.prot->release_cb(sk); 381 ovpn_peer_put(peer); 382 } 383 384 static int ovpn_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) 385 { 386 struct ovpn_socket *sock; 387 int ret, linear = PAGE_SIZE; 388 struct ovpn_peer *peer; 389 struct sk_buff *skb; 390 391 lock_sock(sk); 392 rcu_read_lock(); 393 sock = rcu_dereference_sk_user_data(sk); 394 if (unlikely(!sock || !sock->peer || !ovpn_peer_hold(sock->peer))) { 395 rcu_read_unlock(); 396 release_sock(sk); 397 return -EIO; 398 } 399 rcu_read_unlock(); 400 peer = sock->peer; 401 402 if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_NOSIGNAL)) { 403 ret = -EOPNOTSUPP; 404 goto peer_free; 405 } 406 407 if (peer->tcp.out_msg.skb) { 408 ret = -EAGAIN; 409 goto peer_free; 410 } 411 412 if (size < linear) 413 linear = size; 414 415 skb = sock_alloc_send_pskb(sk, linear, size - linear, 416 msg->msg_flags & MSG_DONTWAIT, &ret, 0); 417 if (!skb) { 418 net_err_ratelimited("%s: skb alloc failed: %d\n", 419 netdev_name(peer->ovpn->dev), ret); 420 goto peer_free; 421 } 422 423 skb_put(skb, linear); 424 skb->len = size; 425 skb->data_len = size - linear; 426 427 ret = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); 428 if (ret) { 429 kfree_skb(skb); 430 net_err_ratelimited("%s: skb copy from iter failed: %d\n", 431 netdev_name(peer->ovpn->dev), ret); 432 goto peer_free; 433 } 434 435 ovpn_skb_cb(skb)->nosignal = msg->msg_flags & MSG_NOSIGNAL; 436 ovpn_tcp_send_sock_skb(peer, sk, skb); 437 ret = size; 438 peer_free: 439 release_sock(sk); 440 ovpn_peer_put(peer); 441 return ret; 442 } 443 444 static int ovpn_tcp_disconnect(struct sock *sk, int flags) 445 { 446 return -EBUSY; 447 } 448 449 static void ovpn_tcp_data_ready(struct sock *sk) 450 { 451 struct ovpn_socket *sock; 452 453 trace_sk_data_ready(sk); 454 455 rcu_read_lock(); 456 sock = rcu_dereference_sk_user_data(sk); 457 if (likely(sock && sock->peer)) 458 strp_data_ready(&sock->peer->tcp.strp); 459 rcu_read_unlock(); 460 } 461 462 static void ovpn_tcp_write_space(struct sock *sk) 463 { 464 struct ovpn_socket *sock; 465 466 rcu_read_lock(); 467 sock = rcu_dereference_sk_user_data(sk); 468 if (likely(sock && sock->peer)) { 469 schedule_work(&sock->tcp_tx_work); 470 sock->peer->tcp.sk_cb.sk_write_space(sk); 471 } 472 rcu_read_unlock(); 473 } 474 475 static void ovpn_tcp_build_protos(struct proto *new_prot, 476 struct proto_ops *new_ops, 477 const struct proto *orig_prot, 478 const struct proto_ops *orig_ops); 479 480 static void ovpn_tcp_peer_del_work(struct work_struct *work) 481 { 482 struct ovpn_peer *peer = container_of(work, struct ovpn_peer, 483 tcp.defer_del_work); 484 485 ovpn_peer_del(peer, OVPN_DEL_PEER_REASON_TRANSPORT_ERROR); 486 ovpn_peer_put(peer); 487 } 488 489 /* Set TCP encapsulation callbacks */ 490 int ovpn_tcp_socket_attach(struct ovpn_socket *ovpn_sock, 491 struct ovpn_peer *peer) 492 { 493 struct strp_callbacks cb = { 494 .rcv_msg = ovpn_tcp_rcv, 495 .parse_msg = ovpn_tcp_parse, 496 }; 497 int ret; 498 499 /* make sure no pre-existing encapsulation handler exists */ 500 if (ovpn_sock->sk->sk_user_data) 501 return -EBUSY; 502 rcu_assign_sk_user_data(ovpn_sock->sk, ovpn_sock); 503 504 /* only a fully connected socket is expected. Connection should be 505 * handled in userspace 506 */ 507 if (ovpn_sock->sk->sk_state != TCP_ESTABLISHED) { 508 net_err_ratelimited("%s: provided TCP socket is not in ESTABLISHED state: %d\n", 509 netdev_name(peer->ovpn->dev), 510 ovpn_sock->sk->sk_state); 511 ret = -EINVAL; 512 goto err; 513 } 514 515 ret = strp_init(&peer->tcp.strp, ovpn_sock->sk, &cb); 516 if (ret < 0) { 517 DEBUG_NET_WARN_ON_ONCE(1); 518 goto err; 519 } 520 521 INIT_WORK(&peer->tcp.defer_del_work, ovpn_tcp_peer_del_work); 522 523 __sk_dst_reset(ovpn_sock->sk); 524 skb_queue_head_init(&peer->tcp.user_queue); 525 skb_queue_head_init(&peer->tcp.out_queue); 526 527 /* save current CBs so that they can be restored upon socket release */ 528 peer->tcp.sk_cb.sk_data_ready = ovpn_sock->sk->sk_data_ready; 529 peer->tcp.sk_cb.sk_write_space = ovpn_sock->sk->sk_write_space; 530 peer->tcp.sk_cb.prot = ovpn_sock->sk->sk_prot; 531 peer->tcp.sk_cb.ops = ovpn_sock->sk->sk_socket->ops; 532 533 /* assign our static CBs and prot/ops */ 534 ovpn_sock->sk->sk_data_ready = ovpn_tcp_data_ready; 535 ovpn_sock->sk->sk_write_space = ovpn_tcp_write_space; 536 537 if (ovpn_sock->sk->sk_family == AF_INET) { 538 ovpn_sock->sk->sk_prot = &ovpn_tcp_prot; 539 ovpn_sock->sk->sk_socket->ops = &ovpn_tcp_ops; 540 } else { 541 ovpn_sock->sk->sk_prot = &ovpn_tcp6_prot; 542 ovpn_sock->sk->sk_socket->ops = &ovpn_tcp6_ops; 543 } 544 545 /* avoid using task_frag */ 546 ovpn_sock->sk->sk_allocation = GFP_ATOMIC; 547 ovpn_sock->sk->sk_use_task_frag = false; 548 549 /* enqueue the RX worker */ 550 strp_check_rcv(&peer->tcp.strp); 551 552 return 0; 553 err: 554 rcu_assign_sk_user_data(ovpn_sock->sk, NULL); 555 return ret; 556 } 557 558 static void ovpn_tcp_close(struct sock *sk, long timeout) 559 { 560 struct ovpn_socket *sock; 561 struct ovpn_peer *peer; 562 563 rcu_read_lock(); 564 sock = rcu_dereference_sk_user_data(sk); 565 if (!sock || !sock->peer || !ovpn_peer_hold(sock->peer)) { 566 rcu_read_unlock(); 567 return; 568 } 569 peer = sock->peer; 570 rcu_read_unlock(); 571 572 ovpn_peer_del(sock->peer, OVPN_DEL_PEER_REASON_TRANSPORT_DISCONNECT); 573 peer->tcp.sk_cb.prot->close(sk, timeout); 574 ovpn_peer_put(peer); 575 } 576 577 static __poll_t ovpn_tcp_poll(struct file *file, struct socket *sock, 578 poll_table *wait) 579 { 580 struct sk_buff_head *queue = &sock->sk->sk_receive_queue; 581 struct ovpn_socket *ovpn_sock; 582 struct ovpn_peer *peer = NULL; 583 __poll_t mask; 584 585 rcu_read_lock(); 586 ovpn_sock = rcu_dereference_sk_user_data(sock->sk); 587 /* if we landed in this callback, we expect to have a 588 * meaningful state. The ovpn_socket lifecycle would 589 * prevent it otherwise. 590 */ 591 if (WARN(!ovpn_sock || !ovpn_sock->peer, 592 "ovpn: null state in ovpn_tcp_poll!")) { 593 rcu_read_unlock(); 594 return 0; 595 } 596 597 if (ovpn_peer_hold(ovpn_sock->peer)) { 598 peer = ovpn_sock->peer; 599 queue = &peer->tcp.user_queue; 600 } 601 rcu_read_unlock(); 602 603 mask = datagram_poll_queue(file, sock, wait, queue); 604 605 if (peer) 606 ovpn_peer_put(peer); 607 608 return mask; 609 } 610 611 static void ovpn_tcp_build_protos(struct proto *new_prot, 612 struct proto_ops *new_ops, 613 const struct proto *orig_prot, 614 const struct proto_ops *orig_ops) 615 { 616 memcpy(new_prot, orig_prot, sizeof(*new_prot)); 617 memcpy(new_ops, orig_ops, sizeof(*new_ops)); 618 new_prot->recvmsg = ovpn_tcp_recvmsg; 619 new_prot->sendmsg = ovpn_tcp_sendmsg; 620 new_prot->disconnect = ovpn_tcp_disconnect; 621 new_prot->close = ovpn_tcp_close; 622 new_prot->release_cb = ovpn_tcp_release; 623 new_ops->poll = ovpn_tcp_poll; 624 } 625 626 /* Initialize TCP static objects */ 627 void __init ovpn_tcp_init(void) 628 { 629 ovpn_tcp_build_protos(&ovpn_tcp_prot, &ovpn_tcp_ops, &tcp_prot, 630 &inet_stream_ops); 631 632 #if IS_ENABLED(CONFIG_IPV6) 633 ovpn_tcp_build_protos(&ovpn_tcp6_prot, &ovpn_tcp6_ops, &tcpv6_prot, 634 &inet6_stream_ops); 635 #endif 636 } 637