1*f4924aecSJason Xing #include "vmlinux.h"
2*f4924aecSJason Xing #include "bpf_tracing_net.h"
3*f4924aecSJason Xing #include <bpf/bpf_helpers.h>
4*f4924aecSJason Xing #include <bpf/bpf_tracing.h>
5*f4924aecSJason Xing #include "bpf_misc.h"
6*f4924aecSJason Xing #include "bpf_kfuncs.h"
7*f4924aecSJason Xing #include <errno.h>
8*f4924aecSJason Xing
9*f4924aecSJason Xing __u32 monitored_pid = 0;
10*f4924aecSJason Xing
11*f4924aecSJason Xing int nr_active;
12*f4924aecSJason Xing int nr_snd;
13*f4924aecSJason Xing int nr_passive;
14*f4924aecSJason Xing int nr_sched;
15*f4924aecSJason Xing int nr_txsw;
16*f4924aecSJason Xing int nr_ack;
17*f4924aecSJason Xing
18*f4924aecSJason Xing struct sk_stg {
19*f4924aecSJason Xing __u64 sendmsg_ns; /* record ts when sendmsg is called */
20*f4924aecSJason Xing };
21*f4924aecSJason Xing
22*f4924aecSJason Xing struct sk_tskey {
23*f4924aecSJason Xing u64 cookie;
24*f4924aecSJason Xing u32 tskey;
25*f4924aecSJason Xing };
26*f4924aecSJason Xing
27*f4924aecSJason Xing struct delay_info {
28*f4924aecSJason Xing u64 sendmsg_ns; /* record ts when sendmsg is called */
29*f4924aecSJason Xing u32 sched_delay; /* SCHED_CB - sendmsg_ns */
30*f4924aecSJason Xing u32 snd_sw_delay; /* SND_SW_CB - SCHED_CB */
31*f4924aecSJason Xing u32 ack_delay; /* ACK_CB - SND_SW_CB */
32*f4924aecSJason Xing };
33*f4924aecSJason Xing
34*f4924aecSJason Xing struct {
35*f4924aecSJason Xing __uint(type, BPF_MAP_TYPE_SK_STORAGE);
36*f4924aecSJason Xing __uint(map_flags, BPF_F_NO_PREALLOC);
37*f4924aecSJason Xing __type(key, int);
38*f4924aecSJason Xing __type(value, struct sk_stg);
39*f4924aecSJason Xing } sk_stg_map SEC(".maps");
40*f4924aecSJason Xing
41*f4924aecSJason Xing struct {
42*f4924aecSJason Xing __uint(type, BPF_MAP_TYPE_HASH);
43*f4924aecSJason Xing __type(key, struct sk_tskey);
44*f4924aecSJason Xing __type(value, struct delay_info);
45*f4924aecSJason Xing __uint(max_entries, 1024);
46*f4924aecSJason Xing } time_map SEC(".maps");
47*f4924aecSJason Xing
48*f4924aecSJason Xing static u64 delay_tolerance_nsec = 10000000000; /* 10 second as an example */
49*f4924aecSJason Xing
50*f4924aecSJason Xing extern int bpf_sock_ops_enable_tx_tstamp(struct bpf_sock_ops_kern *skops, u64 flags) __ksym;
51*f4924aecSJason Xing
bpf_test_sockopt(void * ctx,const struct sock * sk,int expected)52*f4924aecSJason Xing static int bpf_test_sockopt(void *ctx, const struct sock *sk, int expected)
53*f4924aecSJason Xing {
54*f4924aecSJason Xing int tmp, new = SK_BPF_CB_TX_TIMESTAMPING;
55*f4924aecSJason Xing int opt = SK_BPF_CB_FLAGS;
56*f4924aecSJason Xing int level = SOL_SOCKET;
57*f4924aecSJason Xing
58*f4924aecSJason Xing if (bpf_setsockopt(ctx, level, opt, &new, sizeof(new)) != expected)
59*f4924aecSJason Xing return 1;
60*f4924aecSJason Xing
61*f4924aecSJason Xing if (bpf_getsockopt(ctx, level, opt, &tmp, sizeof(tmp)) != expected ||
62*f4924aecSJason Xing (!expected && tmp != new))
63*f4924aecSJason Xing return 1;
64*f4924aecSJason Xing
65*f4924aecSJason Xing return 0;
66*f4924aecSJason Xing }
67*f4924aecSJason Xing
bpf_test_access_sockopt(void * ctx,const struct sock * sk)68*f4924aecSJason Xing static bool bpf_test_access_sockopt(void *ctx, const struct sock *sk)
69*f4924aecSJason Xing {
70*f4924aecSJason Xing if (bpf_test_sockopt(ctx, sk, -EOPNOTSUPP))
71*f4924aecSJason Xing return true;
72*f4924aecSJason Xing return false;
73*f4924aecSJason Xing }
74*f4924aecSJason Xing
bpf_test_access_load_hdr_opt(struct bpf_sock_ops * skops)75*f4924aecSJason Xing static bool bpf_test_access_load_hdr_opt(struct bpf_sock_ops *skops)
76*f4924aecSJason Xing {
77*f4924aecSJason Xing u8 opt[3] = {0};
78*f4924aecSJason Xing int load_flags = 0;
79*f4924aecSJason Xing int ret;
80*f4924aecSJason Xing
81*f4924aecSJason Xing ret = bpf_load_hdr_opt(skops, opt, sizeof(opt), load_flags);
82*f4924aecSJason Xing if (ret != -EOPNOTSUPP)
83*f4924aecSJason Xing return true;
84*f4924aecSJason Xing
85*f4924aecSJason Xing return false;
86*f4924aecSJason Xing }
87*f4924aecSJason Xing
bpf_test_access_cb_flags_set(struct bpf_sock_ops * skops)88*f4924aecSJason Xing static bool bpf_test_access_cb_flags_set(struct bpf_sock_ops *skops)
89*f4924aecSJason Xing {
90*f4924aecSJason Xing int ret;
91*f4924aecSJason Xing
92*f4924aecSJason Xing ret = bpf_sock_ops_cb_flags_set(skops, 0);
93*f4924aecSJason Xing if (ret != -EOPNOTSUPP)
94*f4924aecSJason Xing return true;
95*f4924aecSJason Xing
96*f4924aecSJason Xing return false;
97*f4924aecSJason Xing }
98*f4924aecSJason Xing
99*f4924aecSJason Xing /* In the timestamping callbacks, we're not allowed to call the following
100*f4924aecSJason Xing * BPF CALLs for the safety concern. Return false if expected.
101*f4924aecSJason Xing */
bpf_test_access_bpf_calls(struct bpf_sock_ops * skops,const struct sock * sk)102*f4924aecSJason Xing static bool bpf_test_access_bpf_calls(struct bpf_sock_ops *skops,
103*f4924aecSJason Xing const struct sock *sk)
104*f4924aecSJason Xing {
105*f4924aecSJason Xing if (bpf_test_access_sockopt(skops, sk))
106*f4924aecSJason Xing return true;
107*f4924aecSJason Xing
108*f4924aecSJason Xing if (bpf_test_access_load_hdr_opt(skops))
109*f4924aecSJason Xing return true;
110*f4924aecSJason Xing
111*f4924aecSJason Xing if (bpf_test_access_cb_flags_set(skops))
112*f4924aecSJason Xing return true;
113*f4924aecSJason Xing
114*f4924aecSJason Xing return false;
115*f4924aecSJason Xing }
116*f4924aecSJason Xing
bpf_test_delay(struct bpf_sock_ops * skops,const struct sock * sk)117*f4924aecSJason Xing static bool bpf_test_delay(struct bpf_sock_ops *skops, const struct sock *sk)
118*f4924aecSJason Xing {
119*f4924aecSJason Xing struct bpf_sock_ops_kern *skops_kern;
120*f4924aecSJason Xing u64 timestamp = bpf_ktime_get_ns();
121*f4924aecSJason Xing struct skb_shared_info *shinfo;
122*f4924aecSJason Xing struct delay_info dinfo = {0};
123*f4924aecSJason Xing struct sk_tskey key = {0};
124*f4924aecSJason Xing struct delay_info *val;
125*f4924aecSJason Xing struct sk_buff *skb;
126*f4924aecSJason Xing struct sk_stg *stg;
127*f4924aecSJason Xing u64 prior_ts, delay;
128*f4924aecSJason Xing
129*f4924aecSJason Xing if (bpf_test_access_bpf_calls(skops, sk))
130*f4924aecSJason Xing return false;
131*f4924aecSJason Xing
132*f4924aecSJason Xing skops_kern = bpf_cast_to_kern_ctx(skops);
133*f4924aecSJason Xing skb = skops_kern->skb;
134*f4924aecSJason Xing shinfo = bpf_core_cast(skb->head + skb->end, struct skb_shared_info);
135*f4924aecSJason Xing
136*f4924aecSJason Xing key.cookie = bpf_get_socket_cookie(skops);
137*f4924aecSJason Xing if (!key.cookie)
138*f4924aecSJason Xing return false;
139*f4924aecSJason Xing
140*f4924aecSJason Xing if (skops->op == BPF_SOCK_OPS_TSTAMP_SENDMSG_CB) {
141*f4924aecSJason Xing stg = bpf_sk_storage_get(&sk_stg_map, (void *)sk, 0, 0);
142*f4924aecSJason Xing if (!stg)
143*f4924aecSJason Xing return false;
144*f4924aecSJason Xing dinfo.sendmsg_ns = stg->sendmsg_ns;
145*f4924aecSJason Xing bpf_sock_ops_enable_tx_tstamp(skops_kern, 0);
146*f4924aecSJason Xing key.tskey = shinfo->tskey;
147*f4924aecSJason Xing if (!key.tskey)
148*f4924aecSJason Xing return false;
149*f4924aecSJason Xing bpf_map_update_elem(&time_map, &key, &dinfo, BPF_ANY);
150*f4924aecSJason Xing return true;
151*f4924aecSJason Xing }
152*f4924aecSJason Xing
153*f4924aecSJason Xing key.tskey = shinfo->tskey;
154*f4924aecSJason Xing if (!key.tskey)
155*f4924aecSJason Xing return false;
156*f4924aecSJason Xing
157*f4924aecSJason Xing val = bpf_map_lookup_elem(&time_map, &key);
158*f4924aecSJason Xing if (!val)
159*f4924aecSJason Xing return false;
160*f4924aecSJason Xing
161*f4924aecSJason Xing switch (skops->op) {
162*f4924aecSJason Xing case BPF_SOCK_OPS_TSTAMP_SCHED_CB:
163*f4924aecSJason Xing val->sched_delay = timestamp - val->sendmsg_ns;
164*f4924aecSJason Xing delay = val->sched_delay;
165*f4924aecSJason Xing break;
166*f4924aecSJason Xing case BPF_SOCK_OPS_TSTAMP_SND_SW_CB:
167*f4924aecSJason Xing prior_ts = val->sched_delay + val->sendmsg_ns;
168*f4924aecSJason Xing val->snd_sw_delay = timestamp - prior_ts;
169*f4924aecSJason Xing delay = val->snd_sw_delay;
170*f4924aecSJason Xing break;
171*f4924aecSJason Xing case BPF_SOCK_OPS_TSTAMP_ACK_CB:
172*f4924aecSJason Xing prior_ts = val->snd_sw_delay + val->sched_delay + val->sendmsg_ns;
173*f4924aecSJason Xing val->ack_delay = timestamp - prior_ts;
174*f4924aecSJason Xing delay = val->ack_delay;
175*f4924aecSJason Xing break;
176*f4924aecSJason Xing }
177*f4924aecSJason Xing
178*f4924aecSJason Xing if (delay >= delay_tolerance_nsec)
179*f4924aecSJason Xing return false;
180*f4924aecSJason Xing
181*f4924aecSJason Xing /* Since it's the last one, remove from the map after latency check */
182*f4924aecSJason Xing if (skops->op == BPF_SOCK_OPS_TSTAMP_ACK_CB)
183*f4924aecSJason Xing bpf_map_delete_elem(&time_map, &key);
184*f4924aecSJason Xing
185*f4924aecSJason Xing return true;
186*f4924aecSJason Xing }
187*f4924aecSJason Xing
188*f4924aecSJason Xing SEC("fentry/tcp_sendmsg_locked")
BPF_PROG(trace_tcp_sendmsg_locked,struct sock * sk,struct msghdr * msg,size_t size)189*f4924aecSJason Xing int BPF_PROG(trace_tcp_sendmsg_locked, struct sock *sk, struct msghdr *msg,
190*f4924aecSJason Xing size_t size)
191*f4924aecSJason Xing {
192*f4924aecSJason Xing __u32 pid = bpf_get_current_pid_tgid() >> 32;
193*f4924aecSJason Xing u64 timestamp = bpf_ktime_get_ns();
194*f4924aecSJason Xing u32 flag = sk->sk_bpf_cb_flags;
195*f4924aecSJason Xing struct sk_stg *stg;
196*f4924aecSJason Xing
197*f4924aecSJason Xing if (pid != monitored_pid || !flag)
198*f4924aecSJason Xing return 0;
199*f4924aecSJason Xing
200*f4924aecSJason Xing stg = bpf_sk_storage_get(&sk_stg_map, sk, 0,
201*f4924aecSJason Xing BPF_SK_STORAGE_GET_F_CREATE);
202*f4924aecSJason Xing if (!stg)
203*f4924aecSJason Xing return 0;
204*f4924aecSJason Xing
205*f4924aecSJason Xing stg->sendmsg_ns = timestamp;
206*f4924aecSJason Xing nr_snd += 1;
207*f4924aecSJason Xing return 0;
208*f4924aecSJason Xing }
209*f4924aecSJason Xing
210*f4924aecSJason Xing SEC("sockops")
skops_sockopt(struct bpf_sock_ops * skops)211*f4924aecSJason Xing int skops_sockopt(struct bpf_sock_ops *skops)
212*f4924aecSJason Xing {
213*f4924aecSJason Xing struct bpf_sock *bpf_sk = skops->sk;
214*f4924aecSJason Xing const struct sock *sk;
215*f4924aecSJason Xing
216*f4924aecSJason Xing if (!bpf_sk)
217*f4924aecSJason Xing return 1;
218*f4924aecSJason Xing
219*f4924aecSJason Xing sk = (struct sock *)bpf_skc_to_tcp_sock(bpf_sk);
220*f4924aecSJason Xing if (!sk)
221*f4924aecSJason Xing return 1;
222*f4924aecSJason Xing
223*f4924aecSJason Xing switch (skops->op) {
224*f4924aecSJason Xing case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
225*f4924aecSJason Xing nr_active += !bpf_test_sockopt(skops, sk, 0);
226*f4924aecSJason Xing break;
227*f4924aecSJason Xing case BPF_SOCK_OPS_TSTAMP_SENDMSG_CB:
228*f4924aecSJason Xing if (bpf_test_delay(skops, sk))
229*f4924aecSJason Xing nr_snd += 1;
230*f4924aecSJason Xing break;
231*f4924aecSJason Xing case BPF_SOCK_OPS_TSTAMP_SCHED_CB:
232*f4924aecSJason Xing if (bpf_test_delay(skops, sk))
233*f4924aecSJason Xing nr_sched += 1;
234*f4924aecSJason Xing break;
235*f4924aecSJason Xing case BPF_SOCK_OPS_TSTAMP_SND_SW_CB:
236*f4924aecSJason Xing if (bpf_test_delay(skops, sk))
237*f4924aecSJason Xing nr_txsw += 1;
238*f4924aecSJason Xing break;
239*f4924aecSJason Xing case BPF_SOCK_OPS_TSTAMP_ACK_CB:
240*f4924aecSJason Xing if (bpf_test_delay(skops, sk))
241*f4924aecSJason Xing nr_ack += 1;
242*f4924aecSJason Xing break;
243*f4924aecSJason Xing }
244*f4924aecSJason Xing
245*f4924aecSJason Xing return 1;
246*f4924aecSJason Xing }
247*f4924aecSJason Xing
248*f4924aecSJason Xing char _license[] SEC("license") = "GPL";
249