1 #include "vmlinux.h" 2 #include "bpf_tracing_net.h" 3 #include <bpf/bpf_helpers.h> 4 #include <bpf/bpf_tracing.h> 5 #include "bpf_misc.h" 6 #include "bpf_kfuncs.h" 7 #include <errno.h> 8 9 __u32 monitored_pid = 0; 10 11 int nr_active; 12 int nr_snd; 13 int nr_passive; 14 int nr_sched; 15 int nr_txsw; 16 int nr_ack; 17 18 struct sk_stg { 19 __u64 sendmsg_ns; /* record ts when sendmsg is called */ 20 }; 21 22 struct sk_tskey { 23 u64 cookie; 24 u32 tskey; 25 }; 26 27 struct delay_info { 28 u64 sendmsg_ns; /* record ts when sendmsg is called */ 29 u32 sched_delay; /* SCHED_CB - sendmsg_ns */ 30 u32 snd_sw_delay; /* SND_SW_CB - SCHED_CB */ 31 u32 ack_delay; /* ACK_CB - SND_SW_CB */ 32 }; 33 34 struct { 35 __uint(type, BPF_MAP_TYPE_SK_STORAGE); 36 __uint(map_flags, BPF_F_NO_PREALLOC); 37 __type(key, int); 38 __type(value, struct sk_stg); 39 } sk_stg_map SEC(".maps"); 40 41 struct { 42 __uint(type, BPF_MAP_TYPE_HASH); 43 __type(key, struct sk_tskey); 44 __type(value, struct delay_info); 45 __uint(max_entries, 1024); 46 } time_map SEC(".maps"); 47 48 static u64 delay_tolerance_nsec = 10000000000; /* 10 second as an example */ 49 50 extern int bpf_sock_ops_enable_tx_tstamp(struct bpf_sock_ops_kern *skops, u64 flags) __ksym; 51 52 static int bpf_test_sockopt(void *ctx, const struct sock *sk, int expected) 53 { 54 int tmp, new = SK_BPF_CB_TX_TIMESTAMPING; 55 int opt = SK_BPF_CB_FLAGS; 56 int level = SOL_SOCKET; 57 58 if (bpf_setsockopt(ctx, level, opt, &new, sizeof(new)) != expected) 59 return 1; 60 61 if (bpf_getsockopt(ctx, level, opt, &tmp, sizeof(tmp)) != expected || 62 (!expected && tmp != new)) 63 return 1; 64 65 return 0; 66 } 67 68 static bool bpf_test_access_sockopt(void *ctx, const struct sock *sk) 69 { 70 if (bpf_test_sockopt(ctx, sk, -EOPNOTSUPP)) 71 return true; 72 return false; 73 } 74 75 static bool bpf_test_access_load_hdr_opt(struct bpf_sock_ops *skops) 76 { 77 u8 opt[3] = {0}; 78 int load_flags = 0; 79 int ret; 80 81 ret = bpf_load_hdr_opt(skops, opt, sizeof(opt), load_flags); 82 if (ret != -EOPNOTSUPP) 83 return true; 84 85 return false; 86 } 87 88 static bool bpf_test_access_cb_flags_set(struct bpf_sock_ops *skops) 89 { 90 int ret; 91 92 ret = bpf_sock_ops_cb_flags_set(skops, 0); 93 if (ret != -EOPNOTSUPP) 94 return true; 95 96 return false; 97 } 98 99 /* In the timestamping callbacks, we're not allowed to call the following 100 * BPF CALLs for the safety concern. Return false if expected. 101 */ 102 static bool bpf_test_access_bpf_calls(struct bpf_sock_ops *skops, 103 const struct sock *sk) 104 { 105 if (bpf_test_access_sockopt(skops, sk)) 106 return true; 107 108 if (bpf_test_access_load_hdr_opt(skops)) 109 return true; 110 111 if (bpf_test_access_cb_flags_set(skops)) 112 return true; 113 114 return false; 115 } 116 117 static bool bpf_test_delay(struct bpf_sock_ops *skops, const struct sock *sk) 118 { 119 struct bpf_sock_ops_kern *skops_kern; 120 u64 timestamp = bpf_ktime_get_ns(); 121 struct skb_shared_info *shinfo; 122 struct delay_info dinfo = {0}; 123 struct sk_tskey key = {0}; 124 struct delay_info *val; 125 struct sk_buff *skb; 126 struct sk_stg *stg; 127 u64 prior_ts, delay; 128 129 if (bpf_test_access_bpf_calls(skops, sk)) 130 return false; 131 132 skops_kern = bpf_cast_to_kern_ctx(skops); 133 skb = skops_kern->skb; 134 shinfo = bpf_core_cast(skb->head + skb->end, struct skb_shared_info); 135 136 key.cookie = bpf_get_socket_cookie(skops); 137 if (!key.cookie) 138 return false; 139 140 if (skops->op == BPF_SOCK_OPS_TSTAMP_SENDMSG_CB) { 141 stg = bpf_sk_storage_get(&sk_stg_map, (void *)sk, 0, 0); 142 if (!stg) 143 return false; 144 dinfo.sendmsg_ns = stg->sendmsg_ns; 145 bpf_sock_ops_enable_tx_tstamp(skops_kern, 0); 146 key.tskey = shinfo->tskey; 147 if (!key.tskey) 148 return false; 149 bpf_map_update_elem(&time_map, &key, &dinfo, BPF_ANY); 150 return true; 151 } 152 153 key.tskey = shinfo->tskey; 154 if (!key.tskey) 155 return false; 156 157 val = bpf_map_lookup_elem(&time_map, &key); 158 if (!val) 159 return false; 160 161 switch (skops->op) { 162 case BPF_SOCK_OPS_TSTAMP_SCHED_CB: 163 val->sched_delay = timestamp - val->sendmsg_ns; 164 delay = val->sched_delay; 165 break; 166 case BPF_SOCK_OPS_TSTAMP_SND_SW_CB: 167 prior_ts = val->sched_delay + val->sendmsg_ns; 168 val->snd_sw_delay = timestamp - prior_ts; 169 delay = val->snd_sw_delay; 170 break; 171 case BPF_SOCK_OPS_TSTAMP_ACK_CB: 172 prior_ts = val->snd_sw_delay + val->sched_delay + val->sendmsg_ns; 173 val->ack_delay = timestamp - prior_ts; 174 delay = val->ack_delay; 175 break; 176 } 177 178 if (delay >= delay_tolerance_nsec) 179 return false; 180 181 /* Since it's the last one, remove from the map after latency check */ 182 if (skops->op == BPF_SOCK_OPS_TSTAMP_ACK_CB) 183 bpf_map_delete_elem(&time_map, &key); 184 185 return true; 186 } 187 188 SEC("fentry/tcp_sendmsg_locked") 189 int BPF_PROG(trace_tcp_sendmsg_locked, struct sock *sk, struct msghdr *msg, 190 size_t size) 191 { 192 __u32 pid = bpf_get_current_pid_tgid() >> 32; 193 u64 timestamp = bpf_ktime_get_ns(); 194 u32 flag = sk->sk_bpf_cb_flags; 195 struct sk_stg *stg; 196 197 if (pid != monitored_pid || !flag) 198 return 0; 199 200 stg = bpf_sk_storage_get(&sk_stg_map, sk, 0, 201 BPF_SK_STORAGE_GET_F_CREATE); 202 if (!stg) 203 return 0; 204 205 stg->sendmsg_ns = timestamp; 206 nr_snd += 1; 207 return 0; 208 } 209 210 SEC("sockops") 211 int skops_sockopt(struct bpf_sock_ops *skops) 212 { 213 struct bpf_sock *bpf_sk = skops->sk; 214 const struct sock *sk; 215 216 if (!bpf_sk) 217 return 1; 218 219 sk = (struct sock *)bpf_skc_to_tcp_sock(bpf_sk); 220 if (!sk) 221 return 1; 222 223 switch (skops->op) { 224 case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: 225 nr_active += !bpf_test_sockopt(skops, sk, 0); 226 break; 227 case BPF_SOCK_OPS_TSTAMP_SENDMSG_CB: 228 if (bpf_test_delay(skops, sk)) 229 nr_snd += 1; 230 break; 231 case BPF_SOCK_OPS_TSTAMP_SCHED_CB: 232 if (bpf_test_delay(skops, sk)) 233 nr_sched += 1; 234 break; 235 case BPF_SOCK_OPS_TSTAMP_SND_SW_CB: 236 if (bpf_test_delay(skops, sk)) 237 nr_txsw += 1; 238 break; 239 case BPF_SOCK_OPS_TSTAMP_ACK_CB: 240 if (bpf_test_delay(skops, sk)) 241 nr_ack += 1; 242 break; 243 } 244 245 return 1; 246 } 247 248 char _license[] SEC("license") = "GPL"; 249