1 #include "vmlinux.h"
2 #include "bpf_tracing_net.h"
3 #include <bpf/bpf_helpers.h>
4 #include <bpf/bpf_tracing.h>
5 #include "bpf_misc.h"
6 #include "bpf_kfuncs.h"
7 #include <errno.h>
8
9 __u32 monitored_pid = 0;
10
11 int nr_active;
12 int nr_snd;
13 int nr_passive;
14 int nr_sched;
15 int nr_txsw;
16 int nr_ack;
17
18 struct sk_stg {
19 __u64 sendmsg_ns; /* record ts when sendmsg is called */
20 };
21
22 struct sk_tskey {
23 u64 cookie;
24 u32 tskey;
25 };
26
27 struct delay_info {
28 u64 sendmsg_ns; /* record ts when sendmsg is called */
29 u32 sched_delay; /* SCHED_CB - sendmsg_ns */
30 u32 snd_sw_delay; /* SND_SW_CB - SCHED_CB */
31 u32 ack_delay; /* ACK_CB - SND_SW_CB */
32 };
33
34 struct {
35 __uint(type, BPF_MAP_TYPE_SK_STORAGE);
36 __uint(map_flags, BPF_F_NO_PREALLOC);
37 __type(key, int);
38 __type(value, struct sk_stg);
39 } sk_stg_map SEC(".maps");
40
41 struct {
42 __uint(type, BPF_MAP_TYPE_HASH);
43 __type(key, struct sk_tskey);
44 __type(value, struct delay_info);
45 __uint(max_entries, 1024);
46 } time_map SEC(".maps");
47
48 static u64 delay_tolerance_nsec = 10000000000; /* 10 second as an example */
49
50 extern int bpf_sock_ops_enable_tx_tstamp(struct bpf_sock_ops_kern *skops, u64 flags) __ksym;
51
bpf_test_sockopt(void * ctx,const struct sock * sk,int expected)52 static int bpf_test_sockopt(void *ctx, const struct sock *sk, int expected)
53 {
54 int tmp, new = SK_BPF_CB_TX_TIMESTAMPING;
55 int opt = SK_BPF_CB_FLAGS;
56 int level = SOL_SOCKET;
57
58 if (bpf_setsockopt(ctx, level, opt, &new, sizeof(new)) != expected)
59 return 1;
60
61 if (bpf_getsockopt(ctx, level, opt, &tmp, sizeof(tmp)) != expected ||
62 (!expected && tmp != new))
63 return 1;
64
65 return 0;
66 }
67
bpf_test_access_sockopt(void * ctx,const struct sock * sk)68 static bool bpf_test_access_sockopt(void *ctx, const struct sock *sk)
69 {
70 if (bpf_test_sockopt(ctx, sk, -EOPNOTSUPP))
71 return true;
72 return false;
73 }
74
bpf_test_access_load_hdr_opt(struct bpf_sock_ops * skops)75 static bool bpf_test_access_load_hdr_opt(struct bpf_sock_ops *skops)
76 {
77 u8 opt[3] = {0};
78 int load_flags = 0;
79 int ret;
80
81 ret = bpf_load_hdr_opt(skops, opt, sizeof(opt), load_flags);
82 if (ret != -EOPNOTSUPP)
83 return true;
84
85 return false;
86 }
87
bpf_test_access_cb_flags_set(struct bpf_sock_ops * skops)88 static bool bpf_test_access_cb_flags_set(struct bpf_sock_ops *skops)
89 {
90 int ret;
91
92 ret = bpf_sock_ops_cb_flags_set(skops, 0);
93 if (ret != -EOPNOTSUPP)
94 return true;
95
96 return false;
97 }
98
99 /* In the timestamping callbacks, we're not allowed to call the following
100 * BPF CALLs for the safety concern. Return false if expected.
101 */
bpf_test_access_bpf_calls(struct bpf_sock_ops * skops,const struct sock * sk)102 static bool bpf_test_access_bpf_calls(struct bpf_sock_ops *skops,
103 const struct sock *sk)
104 {
105 if (bpf_test_access_sockopt(skops, sk))
106 return true;
107
108 if (bpf_test_access_load_hdr_opt(skops))
109 return true;
110
111 if (bpf_test_access_cb_flags_set(skops))
112 return true;
113
114 return false;
115 }
116
bpf_test_delay(struct bpf_sock_ops * skops,const struct sock * sk)117 static bool bpf_test_delay(struct bpf_sock_ops *skops, const struct sock *sk)
118 {
119 struct bpf_sock_ops_kern *skops_kern;
120 u64 timestamp = bpf_ktime_get_ns();
121 struct skb_shared_info *shinfo;
122 struct delay_info dinfo = {0};
123 struct sk_tskey key = {0};
124 struct delay_info *val;
125 struct sk_buff *skb;
126 struct sk_stg *stg;
127 u64 prior_ts, delay;
128
129 if (bpf_test_access_bpf_calls(skops, sk))
130 return false;
131
132 skops_kern = bpf_cast_to_kern_ctx(skops);
133 skb = skops_kern->skb;
134 shinfo = bpf_core_cast(skb->head + skb->end, struct skb_shared_info);
135
136 key.cookie = bpf_get_socket_cookie(skops);
137 if (!key.cookie)
138 return false;
139
140 if (skops->op == BPF_SOCK_OPS_TSTAMP_SENDMSG_CB) {
141 stg = bpf_sk_storage_get(&sk_stg_map, (void *)sk, 0, 0);
142 if (!stg)
143 return false;
144 dinfo.sendmsg_ns = stg->sendmsg_ns;
145 bpf_sock_ops_enable_tx_tstamp(skops_kern, 0);
146 key.tskey = shinfo->tskey;
147 if (!key.tskey)
148 return false;
149 bpf_map_update_elem(&time_map, &key, &dinfo, BPF_ANY);
150 return true;
151 }
152
153 key.tskey = shinfo->tskey;
154 if (!key.tskey)
155 return false;
156
157 val = bpf_map_lookup_elem(&time_map, &key);
158 if (!val)
159 return false;
160
161 switch (skops->op) {
162 case BPF_SOCK_OPS_TSTAMP_SCHED_CB:
163 val->sched_delay = timestamp - val->sendmsg_ns;
164 delay = val->sched_delay;
165 break;
166 case BPF_SOCK_OPS_TSTAMP_SND_SW_CB:
167 prior_ts = val->sched_delay + val->sendmsg_ns;
168 val->snd_sw_delay = timestamp - prior_ts;
169 delay = val->snd_sw_delay;
170 break;
171 case BPF_SOCK_OPS_TSTAMP_ACK_CB:
172 prior_ts = val->snd_sw_delay + val->sched_delay + val->sendmsg_ns;
173 val->ack_delay = timestamp - prior_ts;
174 delay = val->ack_delay;
175 break;
176 }
177
178 if (delay >= delay_tolerance_nsec)
179 return false;
180
181 /* Since it's the last one, remove from the map after latency check */
182 if (skops->op == BPF_SOCK_OPS_TSTAMP_ACK_CB)
183 bpf_map_delete_elem(&time_map, &key);
184
185 return true;
186 }
187
188 SEC("fentry/tcp_sendmsg_locked")
BPF_PROG(trace_tcp_sendmsg_locked,struct sock * sk,struct msghdr * msg,size_t size)189 int BPF_PROG(trace_tcp_sendmsg_locked, struct sock *sk, struct msghdr *msg,
190 size_t size)
191 {
192 __u32 pid = bpf_get_current_pid_tgid() >> 32;
193 u64 timestamp = bpf_ktime_get_ns();
194 u32 flag = sk->sk_bpf_cb_flags;
195 struct sk_stg *stg;
196
197 if (pid != monitored_pid || !flag)
198 return 0;
199
200 stg = bpf_sk_storage_get(&sk_stg_map, sk, 0,
201 BPF_SK_STORAGE_GET_F_CREATE);
202 if (!stg)
203 return 0;
204
205 stg->sendmsg_ns = timestamp;
206 nr_snd += 1;
207 return 0;
208 }
209
210 SEC("sockops")
skops_sockopt(struct bpf_sock_ops * skops)211 int skops_sockopt(struct bpf_sock_ops *skops)
212 {
213 struct bpf_sock *bpf_sk = skops->sk;
214 const struct sock *sk;
215
216 if (!bpf_sk)
217 return 1;
218
219 sk = (struct sock *)bpf_skc_to_tcp_sock(bpf_sk);
220 if (!sk)
221 return 1;
222
223 switch (skops->op) {
224 case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
225 nr_active += !bpf_test_sockopt(skops, sk, 0);
226 break;
227 case BPF_SOCK_OPS_TSTAMP_SENDMSG_CB:
228 if (bpf_test_delay(skops, sk))
229 nr_snd += 1;
230 break;
231 case BPF_SOCK_OPS_TSTAMP_SCHED_CB:
232 if (bpf_test_delay(skops, sk))
233 nr_sched += 1;
234 break;
235 case BPF_SOCK_OPS_TSTAMP_SND_SW_CB:
236 if (bpf_test_delay(skops, sk))
237 nr_txsw += 1;
238 break;
239 case BPF_SOCK_OPS_TSTAMP_ACK_CB:
240 if (bpf_test_delay(skops, sk))
241 nr_ack += 1;
242 break;
243 }
244
245 return 1;
246 }
247
248 char _license[] SEC("license") = "GPL";
249