1 // SPDX-License-Identifier: GPL-2.0 2 /* OpenVPN data channel offload 3 * 4 * Copyright (C) 2019-2025 OpenVPN, Inc. 5 * 6 * Author: Antonio Quartulli <antonio@openvpn.net> 7 */ 8 9 #include <linux/skbuff.h> 10 #include <net/hotdata.h> 11 #include <net/inet_common.h> 12 #include <net/ipv6.h> 13 #include <net/tcp.h> 14 #include <net/transp_v6.h> 15 #include <net/route.h> 16 #include <trace/events/sock.h> 17 18 #include "ovpnpriv.h" 19 #include "main.h" 20 #include "io.h" 21 #include "peer.h" 22 #include "proto.h" 23 #include "skb.h" 24 #include "tcp.h" 25 26 #define OVPN_TCP_DEPTH_NESTING 2 27 #if OVPN_TCP_DEPTH_NESTING == SINGLE_DEPTH_NESTING 28 #error "OVPN TCP requires its own lockdep subclass" 29 #endif 30 31 static struct proto ovpn_tcp_prot __ro_after_init; 32 static struct proto_ops ovpn_tcp_ops __ro_after_init; 33 static struct proto ovpn_tcp6_prot __ro_after_init; 34 static struct proto_ops ovpn_tcp6_ops __ro_after_init; 35 36 static int ovpn_tcp_parse(struct strparser *strp, struct sk_buff *skb) 37 { 38 struct strp_msg *rxm = strp_msg(skb); 39 __be16 blen; 40 u16 len; 41 int err; 42 43 /* when packets are written to the TCP stream, they are prepended with 44 * two bytes indicating the actual packet size. 45 * Parse accordingly and return the actual size (including the size 46 * header) 47 */ 48 49 if (skb->len < rxm->offset + 2) 50 return 0; 51 52 err = skb_copy_bits(skb, rxm->offset, &blen, sizeof(blen)); 53 if (err < 0) 54 return err; 55 56 len = be16_to_cpu(blen); 57 if (len < 2) 58 return -EINVAL; 59 60 return len + 2; 61 } 62 63 /* queue skb for sending to userspace via recvmsg on the socket */ 64 static void ovpn_tcp_to_userspace(struct ovpn_peer *peer, struct sock *sk, 65 struct sk_buff *skb) 66 { 67 skb_set_owner_r(skb, sk); 68 memset(skb->cb, 0, sizeof(skb->cb)); 69 skb_queue_tail(&peer->tcp.user_queue, skb); 70 peer->tcp.sk_cb.sk_data_ready(sk); 71 } 72 73 static void ovpn_tcp_rcv(struct strparser *strp, struct sk_buff *skb) 74 { 75 struct ovpn_peer *peer = container_of(strp, struct ovpn_peer, tcp.strp); 76 struct strp_msg *msg = strp_msg(skb); 77 size_t pkt_len = msg->full_len - 2; 78 size_t off = msg->offset + 2; 79 u8 opcode; 80 81 /* ensure skb->data points to the beginning of the openvpn packet */ 82 if (!pskb_pull(skb, off)) { 83 net_warn_ratelimited("%s: packet too small for peer %u\n", 84 netdev_name(peer->ovpn->dev), peer->id); 85 goto err; 86 } 87 88 /* strparser does not trim the skb for us, therefore we do it now */ 89 if (pskb_trim(skb, pkt_len) != 0) { 90 net_warn_ratelimited("%s: trimming skb failed for peer %u\n", 91 netdev_name(peer->ovpn->dev), peer->id); 92 goto err; 93 } 94 95 /* we need the first 4 bytes of data to be accessible 96 * to extract the opcode and the key ID later on 97 */ 98 if (!pskb_may_pull(skb, OVPN_OPCODE_SIZE)) { 99 net_warn_ratelimited("%s: packet too small to fetch opcode for peer %u\n", 100 netdev_name(peer->ovpn->dev), peer->id); 101 goto err; 102 } 103 104 /* DATA_V2 packets are handled in kernel, the rest goes to user space */ 105 opcode = ovpn_opcode_from_skb(skb, 0); 106 if (unlikely(opcode != OVPN_DATA_V2)) { 107 if (opcode == OVPN_DATA_V1) { 108 net_warn_ratelimited("%s: DATA_V1 detected on the TCP stream\n", 109 netdev_name(peer->ovpn->dev)); 110 goto err; 111 } 112 113 /* The packet size header must be there when sending the packet 114 * to userspace, therefore we put it back 115 */ 116 skb_push(skb, 2); 117 ovpn_tcp_to_userspace(peer, strp->sk, skb); 118 return; 119 } 120 121 /* hold reference to peer as required by ovpn_recv(). 122 * 123 * NOTE: in this context we should already be holding a reference to 124 * this peer, therefore ovpn_peer_hold() is not expected to fail 125 */ 126 if (WARN_ON(!ovpn_peer_hold(peer))) 127 goto err_nopeer; 128 129 ovpn_recv(peer, skb); 130 return; 131 err: 132 /* take reference for deferred peer deletion. should never fail */ 133 if (WARN_ON(!ovpn_peer_hold(peer))) 134 goto err_nopeer; 135 schedule_work(&peer->tcp.defer_del_work); 136 dev_dstats_rx_dropped(peer->ovpn->dev); 137 err_nopeer: 138 kfree_skb(skb); 139 } 140 141 static int ovpn_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, 142 int flags, int *addr_len) 143 { 144 int err = 0, off, copied = 0, ret; 145 struct ovpn_socket *sock; 146 struct ovpn_peer *peer; 147 struct sk_buff *skb; 148 149 rcu_read_lock(); 150 sock = rcu_dereference_sk_user_data(sk); 151 if (unlikely(!sock || !sock->peer || !ovpn_peer_hold(sock->peer))) { 152 rcu_read_unlock(); 153 return -EBADF; 154 } 155 peer = sock->peer; 156 rcu_read_unlock(); 157 158 skb = __skb_recv_datagram(sk, &peer->tcp.user_queue, flags, &off, &err); 159 if (!skb) { 160 if (err == -EAGAIN && sk->sk_shutdown & RCV_SHUTDOWN) { 161 ret = 0; 162 goto out; 163 } 164 ret = err; 165 goto out; 166 } 167 168 copied = len; 169 if (copied > skb->len) 170 copied = skb->len; 171 else if (copied < skb->len) 172 msg->msg_flags |= MSG_TRUNC; 173 174 err = skb_copy_datagram_msg(skb, 0, msg, copied); 175 if (unlikely(err)) { 176 kfree_skb(skb); 177 ret = err; 178 goto out; 179 } 180 181 if (flags & MSG_TRUNC) 182 copied = skb->len; 183 kfree_skb(skb); 184 ret = copied; 185 out: 186 ovpn_peer_put(peer); 187 return ret; 188 } 189 190 void ovpn_tcp_socket_detach(struct ovpn_socket *ovpn_sock) 191 { 192 struct ovpn_peer *peer = ovpn_sock->peer; 193 struct sock *sk = ovpn_sock->sk; 194 195 strp_stop(&peer->tcp.strp); 196 skb_queue_purge(&peer->tcp.user_queue); 197 198 /* restore CBs that were saved in ovpn_sock_set_tcp_cb() */ 199 sk->sk_data_ready = peer->tcp.sk_cb.sk_data_ready; 200 sk->sk_write_space = peer->tcp.sk_cb.sk_write_space; 201 sk->sk_prot = peer->tcp.sk_cb.prot; 202 sk->sk_socket->ops = peer->tcp.sk_cb.ops; 203 204 rcu_assign_sk_user_data(sk, NULL); 205 } 206 207 void ovpn_tcp_socket_wait_finish(struct ovpn_socket *sock) 208 { 209 struct ovpn_peer *peer = sock->peer; 210 211 /* NOTE: we don't wait for peer->tcp.defer_del_work to finish: 212 * either the worker is not running or this function 213 * was invoked by that worker. 214 */ 215 216 cancel_work_sync(&sock->tcp_tx_work); 217 strp_done(&peer->tcp.strp); 218 219 skb_queue_purge(&peer->tcp.out_queue); 220 kfree_skb(peer->tcp.out_msg.skb); 221 peer->tcp.out_msg.skb = NULL; 222 } 223 224 static void ovpn_tcp_send_sock(struct ovpn_peer *peer, struct sock *sk) 225 { 226 struct sk_buff *skb = peer->tcp.out_msg.skb; 227 int ret, flags; 228 229 if (!skb) 230 return; 231 232 if (peer->tcp.tx_in_progress) 233 return; 234 235 peer->tcp.tx_in_progress = true; 236 237 do { 238 flags = ovpn_skb_cb(skb)->nosignal ? MSG_NOSIGNAL : 0; 239 ret = skb_send_sock_locked_with_flags(sk, skb, 240 peer->tcp.out_msg.offset, 241 peer->tcp.out_msg.len, 242 flags); 243 if (unlikely(ret < 0)) { 244 if (ret == -EAGAIN) 245 goto out; 246 247 net_warn_ratelimited("%s: TCP error to peer %u: %d\n", 248 netdev_name(peer->ovpn->dev), 249 peer->id, ret); 250 251 /* in case of TCP error we can't recover the VPN 252 * stream therefore we abort the connection 253 */ 254 ovpn_peer_hold(peer); 255 schedule_work(&peer->tcp.defer_del_work); 256 257 /* we bail out immediately and keep tx_in_progress set 258 * to true. This way we prevent more TX attempts 259 * which would lead to more invocations of 260 * schedule_work() 261 */ 262 return; 263 } 264 265 peer->tcp.out_msg.len -= ret; 266 peer->tcp.out_msg.offset += ret; 267 } while (peer->tcp.out_msg.len > 0); 268 269 if (!peer->tcp.out_msg.len) { 270 preempt_disable(); 271 dev_dstats_tx_add(peer->ovpn->dev, skb->len); 272 preempt_enable(); 273 } 274 275 kfree_skb(peer->tcp.out_msg.skb); 276 peer->tcp.out_msg.skb = NULL; 277 peer->tcp.out_msg.len = 0; 278 peer->tcp.out_msg.offset = 0; 279 280 out: 281 peer->tcp.tx_in_progress = false; 282 } 283 284 void ovpn_tcp_tx_work(struct work_struct *work) 285 { 286 struct ovpn_socket *sock; 287 288 sock = container_of(work, struct ovpn_socket, tcp_tx_work); 289 290 lock_sock(sock->sk); 291 if (sock->peer) 292 ovpn_tcp_send_sock(sock->peer, sock->sk); 293 release_sock(sock->sk); 294 } 295 296 static void ovpn_tcp_send_sock_skb(struct ovpn_peer *peer, struct sock *sk, 297 struct sk_buff *skb) 298 { 299 if (peer->tcp.out_msg.skb) 300 ovpn_tcp_send_sock(peer, sk); 301 302 if (peer->tcp.out_msg.skb) { 303 dev_dstats_tx_dropped(peer->ovpn->dev); 304 kfree_skb(skb); 305 return; 306 } 307 308 peer->tcp.out_msg.skb = skb; 309 peer->tcp.out_msg.len = skb->len; 310 peer->tcp.out_msg.offset = 0; 311 ovpn_tcp_send_sock(peer, sk); 312 } 313 314 void ovpn_tcp_send_skb(struct ovpn_peer *peer, struct sock *sk, 315 struct sk_buff *skb) 316 { 317 u16 len = skb->len; 318 319 *(__be16 *)__skb_push(skb, sizeof(u16)) = htons(len); 320 321 spin_lock_nested(&sk->sk_lock.slock, OVPN_TCP_DEPTH_NESTING); 322 if (sock_owned_by_user(sk)) { 323 if (skb_queue_len(&peer->tcp.out_queue) >= 324 READ_ONCE(net_hotdata.max_backlog)) { 325 dev_dstats_tx_dropped(peer->ovpn->dev); 326 kfree_skb(skb); 327 goto unlock; 328 } 329 __skb_queue_tail(&peer->tcp.out_queue, skb); 330 } else { 331 ovpn_tcp_send_sock_skb(peer, sk, skb); 332 } 333 unlock: 334 spin_unlock(&sk->sk_lock.slock); 335 } 336 337 static void ovpn_tcp_release(struct sock *sk) 338 { 339 struct sk_buff_head queue; 340 struct ovpn_socket *sock; 341 struct ovpn_peer *peer; 342 struct sk_buff *skb; 343 344 rcu_read_lock(); 345 sock = rcu_dereference_sk_user_data(sk); 346 if (!sock) { 347 rcu_read_unlock(); 348 return; 349 } 350 351 peer = sock->peer; 352 353 /* during initialization this function is called before 354 * assigning sock->peer 355 */ 356 if (unlikely(!peer || !ovpn_peer_hold(peer))) { 357 rcu_read_unlock(); 358 return; 359 } 360 rcu_read_unlock(); 361 362 __skb_queue_head_init(&queue); 363 skb_queue_splice_init(&peer->tcp.out_queue, &queue); 364 365 while ((skb = __skb_dequeue(&queue))) 366 ovpn_tcp_send_sock_skb(peer, sk, skb); 367 368 peer->tcp.sk_cb.prot->release_cb(sk); 369 ovpn_peer_put(peer); 370 } 371 372 static int ovpn_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) 373 { 374 struct ovpn_socket *sock; 375 int ret, linear = PAGE_SIZE; 376 struct ovpn_peer *peer; 377 struct sk_buff *skb; 378 379 lock_sock(sk); 380 rcu_read_lock(); 381 sock = rcu_dereference_sk_user_data(sk); 382 if (unlikely(!sock || !sock->peer || !ovpn_peer_hold(sock->peer))) { 383 rcu_read_unlock(); 384 release_sock(sk); 385 return -EIO; 386 } 387 rcu_read_unlock(); 388 peer = sock->peer; 389 390 if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_NOSIGNAL)) { 391 ret = -EOPNOTSUPP; 392 goto peer_free; 393 } 394 395 if (peer->tcp.out_msg.skb) { 396 ret = -EAGAIN; 397 goto peer_free; 398 } 399 400 if (size < linear) 401 linear = size; 402 403 skb = sock_alloc_send_pskb(sk, linear, size - linear, 404 msg->msg_flags & MSG_DONTWAIT, &ret, 0); 405 if (!skb) { 406 net_err_ratelimited("%s: skb alloc failed: %d\n", 407 netdev_name(peer->ovpn->dev), ret); 408 goto peer_free; 409 } 410 411 skb_put(skb, linear); 412 skb->len = size; 413 skb->data_len = size - linear; 414 415 ret = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); 416 if (ret) { 417 kfree_skb(skb); 418 net_err_ratelimited("%s: skb copy from iter failed: %d\n", 419 netdev_name(peer->ovpn->dev), ret); 420 goto peer_free; 421 } 422 423 ovpn_skb_cb(skb)->nosignal = msg->msg_flags & MSG_NOSIGNAL; 424 ovpn_tcp_send_sock_skb(peer, sk, skb); 425 ret = size; 426 peer_free: 427 release_sock(sk); 428 ovpn_peer_put(peer); 429 return ret; 430 } 431 432 static int ovpn_tcp_disconnect(struct sock *sk, int flags) 433 { 434 return -EBUSY; 435 } 436 437 static void ovpn_tcp_data_ready(struct sock *sk) 438 { 439 struct ovpn_socket *sock; 440 441 trace_sk_data_ready(sk); 442 443 rcu_read_lock(); 444 sock = rcu_dereference_sk_user_data(sk); 445 if (likely(sock && sock->peer)) 446 strp_data_ready(&sock->peer->tcp.strp); 447 rcu_read_unlock(); 448 } 449 450 static void ovpn_tcp_write_space(struct sock *sk) 451 { 452 struct ovpn_socket *sock; 453 454 rcu_read_lock(); 455 sock = rcu_dereference_sk_user_data(sk); 456 if (likely(sock && sock->peer)) { 457 schedule_work(&sock->tcp_tx_work); 458 sock->peer->tcp.sk_cb.sk_write_space(sk); 459 } 460 rcu_read_unlock(); 461 } 462 463 static void ovpn_tcp_build_protos(struct proto *new_prot, 464 struct proto_ops *new_ops, 465 const struct proto *orig_prot, 466 const struct proto_ops *orig_ops); 467 468 static void ovpn_tcp_peer_del_work(struct work_struct *work) 469 { 470 struct ovpn_peer *peer = container_of(work, struct ovpn_peer, 471 tcp.defer_del_work); 472 473 ovpn_peer_del(peer, OVPN_DEL_PEER_REASON_TRANSPORT_ERROR); 474 ovpn_peer_put(peer); 475 } 476 477 /* Set TCP encapsulation callbacks */ 478 int ovpn_tcp_socket_attach(struct ovpn_socket *ovpn_sock, 479 struct ovpn_peer *peer) 480 { 481 struct strp_callbacks cb = { 482 .rcv_msg = ovpn_tcp_rcv, 483 .parse_msg = ovpn_tcp_parse, 484 }; 485 int ret; 486 487 /* make sure no pre-existing encapsulation handler exists */ 488 if (ovpn_sock->sk->sk_user_data) 489 return -EBUSY; 490 491 /* only a fully connected socket is expected. Connection should be 492 * handled in userspace 493 */ 494 if (ovpn_sock->sk->sk_state != TCP_ESTABLISHED) { 495 net_err_ratelimited("%s: provided TCP socket is not in ESTABLISHED state: %d\n", 496 netdev_name(peer->ovpn->dev), 497 ovpn_sock->sk->sk_state); 498 return -EINVAL; 499 } 500 501 ret = strp_init(&peer->tcp.strp, ovpn_sock->sk, &cb); 502 if (ret < 0) { 503 DEBUG_NET_WARN_ON_ONCE(1); 504 return ret; 505 } 506 507 INIT_WORK(&peer->tcp.defer_del_work, ovpn_tcp_peer_del_work); 508 509 __sk_dst_reset(ovpn_sock->sk); 510 skb_queue_head_init(&peer->tcp.user_queue); 511 skb_queue_head_init(&peer->tcp.out_queue); 512 513 /* save current CBs so that they can be restored upon socket release */ 514 peer->tcp.sk_cb.sk_data_ready = ovpn_sock->sk->sk_data_ready; 515 peer->tcp.sk_cb.sk_write_space = ovpn_sock->sk->sk_write_space; 516 peer->tcp.sk_cb.prot = ovpn_sock->sk->sk_prot; 517 peer->tcp.sk_cb.ops = ovpn_sock->sk->sk_socket->ops; 518 519 /* assign our static CBs and prot/ops */ 520 ovpn_sock->sk->sk_data_ready = ovpn_tcp_data_ready; 521 ovpn_sock->sk->sk_write_space = ovpn_tcp_write_space; 522 523 if (ovpn_sock->sk->sk_family == AF_INET) { 524 ovpn_sock->sk->sk_prot = &ovpn_tcp_prot; 525 ovpn_sock->sk->sk_socket->ops = &ovpn_tcp_ops; 526 } else { 527 ovpn_sock->sk->sk_prot = &ovpn_tcp6_prot; 528 ovpn_sock->sk->sk_socket->ops = &ovpn_tcp6_ops; 529 } 530 531 /* avoid using task_frag */ 532 ovpn_sock->sk->sk_allocation = GFP_ATOMIC; 533 ovpn_sock->sk->sk_use_task_frag = false; 534 535 /* enqueue the RX worker */ 536 strp_check_rcv(&peer->tcp.strp); 537 538 return 0; 539 } 540 541 static void ovpn_tcp_close(struct sock *sk, long timeout) 542 { 543 struct ovpn_socket *sock; 544 struct ovpn_peer *peer; 545 546 rcu_read_lock(); 547 sock = rcu_dereference_sk_user_data(sk); 548 if (!sock || !sock->peer || !ovpn_peer_hold(sock->peer)) { 549 rcu_read_unlock(); 550 return; 551 } 552 peer = sock->peer; 553 rcu_read_unlock(); 554 555 ovpn_peer_del(sock->peer, OVPN_DEL_PEER_REASON_TRANSPORT_DISCONNECT); 556 peer->tcp.sk_cb.prot->close(sk, timeout); 557 ovpn_peer_put(peer); 558 } 559 560 static __poll_t ovpn_tcp_poll(struct file *file, struct socket *sock, 561 poll_table *wait) 562 { 563 __poll_t mask = datagram_poll(file, sock, wait); 564 struct ovpn_socket *ovpn_sock; 565 566 rcu_read_lock(); 567 ovpn_sock = rcu_dereference_sk_user_data(sock->sk); 568 if (ovpn_sock && ovpn_sock->peer && 569 !skb_queue_empty(&ovpn_sock->peer->tcp.user_queue)) 570 mask |= EPOLLIN | EPOLLRDNORM; 571 rcu_read_unlock(); 572 573 return mask; 574 } 575 576 static void ovpn_tcp_build_protos(struct proto *new_prot, 577 struct proto_ops *new_ops, 578 const struct proto *orig_prot, 579 const struct proto_ops *orig_ops) 580 { 581 memcpy(new_prot, orig_prot, sizeof(*new_prot)); 582 memcpy(new_ops, orig_ops, sizeof(*new_ops)); 583 new_prot->recvmsg = ovpn_tcp_recvmsg; 584 new_prot->sendmsg = ovpn_tcp_sendmsg; 585 new_prot->disconnect = ovpn_tcp_disconnect; 586 new_prot->close = ovpn_tcp_close; 587 new_prot->release_cb = ovpn_tcp_release; 588 new_ops->poll = ovpn_tcp_poll; 589 } 590 591 /* Initialize TCP static objects */ 592 void __init ovpn_tcp_init(void) 593 { 594 ovpn_tcp_build_protos(&ovpn_tcp_prot, &ovpn_tcp_ops, &tcp_prot, 595 &inet_stream_ops); 596 597 #if IS_ENABLED(CONFIG_IPV6) 598 ovpn_tcp_build_protos(&ovpn_tcp6_prot, &ovpn_tcp6_ops, &tcpv6_prot, 599 &inet6_stream_ops); 600 #endif 601 } 602