183752e12SNicolas Rybowski // SPDX-License-Identifier: GPL-2.0 283752e12SNicolas Rybowski /* Copyright (c) 2020, Tessares SA. */ 383752e12SNicolas Rybowski /* Copyright (c) 2024, Kylin Software */ 483752e12SNicolas Rybowski 583752e12SNicolas Rybowski /* vmlinux.h, bpf_helpers.h and other 'define' */ 683752e12SNicolas Rybowski #include "bpf_tracing_net.h" 7*cd19b885SGeliang Tang #include "mptcp_bpf.h" 883752e12SNicolas Rybowski 983752e12SNicolas Rybowski char _license[] SEC("license") = "GPL"; 1083752e12SNicolas Rybowski 1183752e12SNicolas Rybowski char cc[TCP_CA_NAME_MAX] = "reno"; 12*cd19b885SGeliang Tang int pid; 1383752e12SNicolas Rybowski 1483752e12SNicolas Rybowski /* Associate a subflow counter to each token */ 1583752e12SNicolas Rybowski struct { 1683752e12SNicolas Rybowski __uint(type, BPF_MAP_TYPE_HASH); 1783752e12SNicolas Rybowski __uint(key_size, sizeof(__u32)); 1883752e12SNicolas Rybowski __uint(value_size, sizeof(__u32)); 1983752e12SNicolas Rybowski __uint(max_entries, 100); 2083752e12SNicolas Rybowski } mptcp_sf SEC(".maps"); 2183752e12SNicolas Rybowski 2283752e12SNicolas Rybowski SEC("sockops") 2383752e12SNicolas Rybowski int mptcp_subflow(struct bpf_sock_ops *skops) 2483752e12SNicolas Rybowski { 2583752e12SNicolas Rybowski __u32 init = 1, key, mark, *cnt; 2683752e12SNicolas Rybowski struct mptcp_sock *msk; 2783752e12SNicolas Rybowski struct bpf_sock *sk; 2883752e12SNicolas Rybowski int err; 2983752e12SNicolas Rybowski 3083752e12SNicolas Rybowski if (skops->op != BPF_SOCK_OPS_TCP_CONNECT_CB) 3183752e12SNicolas Rybowski return 1; 3283752e12SNicolas Rybowski 3383752e12SNicolas Rybowski sk = skops->sk; 3483752e12SNicolas Rybowski if (!sk) 3583752e12SNicolas Rybowski return 1; 3683752e12SNicolas Rybowski 3783752e12SNicolas Rybowski msk = bpf_skc_to_mptcp_sock(sk); 3883752e12SNicolas Rybowski if (!msk) 3983752e12SNicolas Rybowski return 1; 4083752e12SNicolas Rybowski 4183752e12SNicolas Rybowski key = msk->token; 4283752e12SNicolas Rybowski cnt = bpf_map_lookup_elem(&mptcp_sf, &key); 4383752e12SNicolas Rybowski if (cnt) { 4483752e12SNicolas Rybowski /* A new subflow is added to an existing MPTCP connection */ 4583752e12SNicolas Rybowski __sync_fetch_and_add(cnt, 1); 4683752e12SNicolas Rybowski mark = *cnt; 4783752e12SNicolas Rybowski } else { 4883752e12SNicolas Rybowski /* A new MPTCP connection is just initiated and this is its primary subflow */ 4983752e12SNicolas Rybowski bpf_map_update_elem(&mptcp_sf, &key, &init, BPF_ANY); 5083752e12SNicolas Rybowski mark = init; 5183752e12SNicolas Rybowski } 5283752e12SNicolas Rybowski 5383752e12SNicolas Rybowski /* Set the mark of the subflow's socket based on appearance order */ 5483752e12SNicolas Rybowski err = bpf_setsockopt(skops, SOL_SOCKET, SO_MARK, &mark, sizeof(mark)); 5583752e12SNicolas Rybowski if (err < 0) 5683752e12SNicolas Rybowski return 1; 5783752e12SNicolas Rybowski if (mark == 2) 5883752e12SNicolas Rybowski err = bpf_setsockopt(skops, SOL_TCP, TCP_CONGESTION, cc, TCP_CA_NAME_MAX); 5983752e12SNicolas Rybowski 6083752e12SNicolas Rybowski return 1; 6183752e12SNicolas Rybowski } 62*cd19b885SGeliang Tang 63*cd19b885SGeliang Tang static int _check_getsockopt_subflow_mark(struct mptcp_sock *msk, struct bpf_sockopt *ctx) 64*cd19b885SGeliang Tang { 65*cd19b885SGeliang Tang struct mptcp_subflow_context *subflow; 66*cd19b885SGeliang Tang int i = 0; 67*cd19b885SGeliang Tang 68*cd19b885SGeliang Tang mptcp_for_each_subflow(msk, subflow) { 69*cd19b885SGeliang Tang struct sock *ssk; 70*cd19b885SGeliang Tang 71*cd19b885SGeliang Tang ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow, 72*cd19b885SGeliang Tang struct mptcp_subflow_context)); 73*cd19b885SGeliang Tang 74*cd19b885SGeliang Tang if (ssk->sk_mark != ++i) { 75*cd19b885SGeliang Tang ctx->retval = -2; 76*cd19b885SGeliang Tang break; 77*cd19b885SGeliang Tang } 78*cd19b885SGeliang Tang } 79*cd19b885SGeliang Tang 80*cd19b885SGeliang Tang return 1; 81*cd19b885SGeliang Tang } 82*cd19b885SGeliang Tang 83*cd19b885SGeliang Tang static int _check_getsockopt_subflow_cc(struct mptcp_sock *msk, struct bpf_sockopt *ctx) 84*cd19b885SGeliang Tang { 85*cd19b885SGeliang Tang struct mptcp_subflow_context *subflow; 86*cd19b885SGeliang Tang 87*cd19b885SGeliang Tang mptcp_for_each_subflow(msk, subflow) { 88*cd19b885SGeliang Tang struct inet_connection_sock *icsk; 89*cd19b885SGeliang Tang struct sock *ssk; 90*cd19b885SGeliang Tang 91*cd19b885SGeliang Tang ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow, 92*cd19b885SGeliang Tang struct mptcp_subflow_context)); 93*cd19b885SGeliang Tang icsk = bpf_core_cast(ssk, struct inet_connection_sock); 94*cd19b885SGeliang Tang 95*cd19b885SGeliang Tang if (ssk->sk_mark == 2 && 96*cd19b885SGeliang Tang __builtin_memcmp(icsk->icsk_ca_ops->name, cc, TCP_CA_NAME_MAX)) { 97*cd19b885SGeliang Tang ctx->retval = -2; 98*cd19b885SGeliang Tang break; 99*cd19b885SGeliang Tang } 100*cd19b885SGeliang Tang } 101*cd19b885SGeliang Tang 102*cd19b885SGeliang Tang return 1; 103*cd19b885SGeliang Tang } 104*cd19b885SGeliang Tang 105*cd19b885SGeliang Tang SEC("cgroup/getsockopt") 106*cd19b885SGeliang Tang int _getsockopt_subflow(struct bpf_sockopt *ctx) 107*cd19b885SGeliang Tang { 108*cd19b885SGeliang Tang struct bpf_sock *sk = ctx->sk; 109*cd19b885SGeliang Tang struct mptcp_sock *msk; 110*cd19b885SGeliang Tang 111*cd19b885SGeliang Tang if (bpf_get_current_pid_tgid() >> 32 != pid) 112*cd19b885SGeliang Tang return 1; 113*cd19b885SGeliang Tang 114*cd19b885SGeliang Tang if (!sk || sk->protocol != IPPROTO_MPTCP || 115*cd19b885SGeliang Tang (!(ctx->level == SOL_SOCKET && ctx->optname == SO_MARK) && 116*cd19b885SGeliang Tang !(ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION))) 117*cd19b885SGeliang Tang return 1; 118*cd19b885SGeliang Tang 119*cd19b885SGeliang Tang msk = bpf_core_cast(sk, struct mptcp_sock); 120*cd19b885SGeliang Tang if (msk->pm.subflows != 1) { 121*cd19b885SGeliang Tang ctx->retval = -1; 122*cd19b885SGeliang Tang return 1; 123*cd19b885SGeliang Tang } 124*cd19b885SGeliang Tang 125*cd19b885SGeliang Tang if (ctx->optname == SO_MARK) 126*cd19b885SGeliang Tang return _check_getsockopt_subflow_mark(msk, ctx); 127*cd19b885SGeliang Tang return _check_getsockopt_subflow_cc(msk, ctx); 128*cd19b885SGeliang Tang } 129