1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) Meta Platforms, Inc. and affiliates. */ 3 4 #include "vmlinux.h" 5 #include "bpf_tracing_net.h" 6 #include <bpf/bpf_core_read.h> 7 #include <bpf/bpf_helpers.h> 8 #include <bpf/bpf_tracing.h> 9 #include "bpf_misc.h" 10 11 extern unsigned long CONFIG_HZ __kconfig; 12 13 const volatile char veth[IFNAMSIZ]; 14 const volatile int veth_ifindex; 15 16 int nr_listen; 17 int nr_passive; 18 int nr_active; 19 int nr_connect; 20 int nr_binddev; 21 int nr_socket_post_create; 22 int nr_fin_wait1; 23 24 struct sockopt_test { 25 int opt; 26 int new; 27 int restore; 28 int expected; 29 int tcp_expected; 30 unsigned int flip:1; 31 }; 32 33 static const char not_exist_cc[] = "not_exist"; 34 static const char cubic_cc[] = "cubic"; 35 static const char reno_cc[] = "reno"; 36 37 static const struct sockopt_test sol_socket_tests[] = { 38 { .opt = SO_REUSEADDR, .flip = 1, }, 39 { .opt = SO_SNDBUF, .new = 8123, .expected = 8123 * 2, }, 40 { .opt = SO_RCVBUF, .new = 8123, .expected = 8123 * 2, }, 41 { .opt = SO_KEEPALIVE, .flip = 1, }, 42 { .opt = SO_PRIORITY, .new = 0xeb9f, .expected = 0xeb9f, }, 43 { .opt = SO_REUSEPORT, .flip = 1, }, 44 { .opt = SO_RCVLOWAT, .new = 8123, .expected = 8123, }, 45 { .opt = SO_MARK, .new = 0xeb9f, .expected = 0xeb9f, }, 46 { .opt = SO_MAX_PACING_RATE, .new = 0xeb9f, .expected = 0xeb9f, }, 47 { .opt = SO_TXREHASH, .flip = 1, }, 48 { .opt = 0, }, 49 }; 50 51 static const struct sockopt_test sol_tcp_tests[] = { 52 { .opt = TCP_NODELAY, .flip = 1, }, 53 { .opt = TCP_KEEPIDLE, .new = 123, .expected = 123, .restore = 321, }, 54 { .opt = TCP_KEEPINTVL, .new = 123, .expected = 123, .restore = 321, }, 55 { .opt = TCP_KEEPCNT, .new = 123, .expected = 123, .restore = 124, }, 56 { .opt = TCP_SYNCNT, .new = 123, .expected = 123, .restore = 124, }, 57 { .opt = TCP_WINDOW_CLAMP, .new = 8123, .expected = 8123, .restore = 8124, }, 58 { .opt = TCP_CONGESTION, }, 59 { .opt = TCP_THIN_LINEAR_TIMEOUTS, .flip = 1, }, 60 { .opt = TCP_USER_TIMEOUT, .new = 123400, .expected = 123400, }, 61 { .opt = TCP_NOTSENT_LOWAT, .new = 1314, .expected = 1314, }, 62 { .opt = TCP_BPF_SOCK_OPS_CB_FLAGS, .new = BPF_SOCK_OPS_ALL_CB_FLAGS, 63 .expected = BPF_SOCK_OPS_ALL_CB_FLAGS, }, 64 { .opt = TCP_BPF_DELACK_MAX, .new = 30000, .expected = 30000, }, 65 { .opt = TCP_BPF_RTO_MIN, .new = 30000, .expected = 30000, }, 66 { .opt = TCP_RTO_MAX_MS, .new = 2000, .expected = 2000, }, 67 { .opt = 0, }, 68 }; 69 70 static const struct sockopt_test sol_ip_tests[] = { 71 { .opt = IP_TOS, .new = 0xe1, .expected = 0xe1, .tcp_expected = 0xe0, }, 72 { .opt = 0, }, 73 }; 74 75 static const struct sockopt_test sol_ipv6_tests[] = { 76 { .opt = IPV6_TCLASS, .new = 0xe1, .expected = 0xe1, .tcp_expected = 0xe0, }, 77 { .opt = IPV6_AUTOFLOWLABEL, .flip = 1, }, 78 { .opt = 0, }, 79 }; 80 81 struct loop_ctx { 82 void *ctx; 83 struct sock *sk; 84 }; 85 86 static bool sk_is_tcp(struct sock *sk) 87 { 88 return (sk->__sk_common.skc_family == AF_INET || 89 sk->__sk_common.skc_family == AF_INET6) && 90 sk->sk_type == SOCK_STREAM && 91 sk->sk_protocol == IPPROTO_TCP; 92 } 93 94 static int bpf_test_sockopt_flip(void *ctx, struct sock *sk, 95 const struct sockopt_test *t, 96 int level) 97 { 98 int old, tmp, new, opt = t->opt; 99 100 opt = t->opt; 101 102 if (opt == SO_TXREHASH && !sk_is_tcp(sk)) 103 return 0; 104 105 if (bpf_getsockopt(ctx, level, opt, &old, sizeof(old))) 106 return 1; 107 /* kernel initialized txrehash to 255 */ 108 if (level == SOL_SOCKET && opt == SO_TXREHASH && old != 0 && old != 1) 109 old = 1; 110 111 new = !old; 112 if (bpf_setsockopt(ctx, level, opt, &new, sizeof(new))) 113 return 1; 114 if (bpf_getsockopt(ctx, level, opt, &tmp, sizeof(tmp)) || 115 tmp != new) 116 return 1; 117 118 if (bpf_setsockopt(ctx, level, opt, &old, sizeof(old))) 119 return 1; 120 121 return 0; 122 } 123 124 static int bpf_test_sockopt_int(void *ctx, struct sock *sk, 125 const struct sockopt_test *t, 126 int level) 127 { 128 int old, tmp, new, expected, opt; 129 130 opt = t->opt; 131 new = t->new; 132 if (sk->sk_type == SOCK_STREAM && t->tcp_expected) 133 expected = t->tcp_expected; 134 else 135 expected = t->expected; 136 137 if (bpf_getsockopt(ctx, level, opt, &old, sizeof(old)) || 138 old == new) 139 return 1; 140 141 if (bpf_setsockopt(ctx, level, opt, &new, sizeof(new))) 142 return 1; 143 if (bpf_getsockopt(ctx, level, opt, &tmp, sizeof(tmp)) || 144 tmp != expected) 145 return 1; 146 147 if (t->restore) 148 old = t->restore; 149 if (bpf_setsockopt(ctx, level, opt, &old, sizeof(old))) 150 return 1; 151 152 return 0; 153 } 154 155 static int bpf_test_socket_sockopt(__u32 i, struct loop_ctx *lc) 156 { 157 const struct sockopt_test *t; 158 159 if (i >= ARRAY_SIZE(sol_socket_tests)) 160 return 1; 161 162 t = &sol_socket_tests[i]; 163 if (!t->opt) 164 return 1; 165 166 if (t->flip) 167 return bpf_test_sockopt_flip(lc->ctx, lc->sk, t, SOL_SOCKET); 168 169 return bpf_test_sockopt_int(lc->ctx, lc->sk, t, SOL_SOCKET); 170 } 171 172 static int bpf_test_ip_sockopt(__u32 i, struct loop_ctx *lc) 173 { 174 const struct sockopt_test *t; 175 176 if (i >= ARRAY_SIZE(sol_ip_tests)) 177 return 1; 178 179 t = &sol_ip_tests[i]; 180 if (!t->opt) 181 return 1; 182 183 if (t->flip) 184 return bpf_test_sockopt_flip(lc->ctx, lc->sk, t, IPPROTO_IP); 185 186 return bpf_test_sockopt_int(lc->ctx, lc->sk, t, IPPROTO_IP); 187 } 188 189 static int bpf_test_ipv6_sockopt(__u32 i, struct loop_ctx *lc) 190 { 191 const struct sockopt_test *t; 192 193 if (i >= ARRAY_SIZE(sol_ipv6_tests)) 194 return 1; 195 196 t = &sol_ipv6_tests[i]; 197 if (!t->opt) 198 return 1; 199 200 if (t->flip) 201 return bpf_test_sockopt_flip(lc->ctx, lc->sk, t, IPPROTO_IPV6); 202 203 return bpf_test_sockopt_int(lc->ctx, lc->sk, t, IPPROTO_IPV6); 204 } 205 206 static int bpf_test_tcp_sockopt(__u32 i, struct loop_ctx *lc) 207 { 208 const struct sockopt_test *t; 209 struct sock *sk; 210 void *ctx; 211 212 if (i >= ARRAY_SIZE(sol_tcp_tests)) 213 return 1; 214 215 t = &sol_tcp_tests[i]; 216 if (!t->opt) 217 return 1; 218 219 ctx = lc->ctx; 220 sk = lc->sk; 221 222 if (t->opt == TCP_CONGESTION) { 223 char old_cc[16], tmp_cc[16]; 224 const char *new_cc; 225 int new_cc_len; 226 227 if (!bpf_setsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION, 228 (void *)not_exist_cc, sizeof(not_exist_cc))) 229 return 1; 230 if (bpf_getsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION, old_cc, sizeof(old_cc))) 231 return 1; 232 if (!bpf_strncmp(old_cc, sizeof(old_cc), cubic_cc)) { 233 new_cc = reno_cc; 234 new_cc_len = sizeof(reno_cc); 235 } else { 236 new_cc = cubic_cc; 237 new_cc_len = sizeof(cubic_cc); 238 } 239 if (bpf_setsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION, (void *)new_cc, 240 new_cc_len)) 241 return 1; 242 if (bpf_getsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION, tmp_cc, sizeof(tmp_cc))) 243 return 1; 244 if (bpf_strncmp(tmp_cc, sizeof(tmp_cc), new_cc)) 245 return 1; 246 if (bpf_setsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION, old_cc, sizeof(old_cc))) 247 return 1; 248 return 0; 249 } 250 251 if (t->flip) 252 return bpf_test_sockopt_flip(ctx, sk, t, IPPROTO_TCP); 253 254 return bpf_test_sockopt_int(ctx, sk, t, IPPROTO_TCP); 255 } 256 257 static int bpf_test_sockopt(void *ctx, struct sock *sk) 258 { 259 struct loop_ctx lc = { .ctx = ctx, .sk = sk, }; 260 __u16 family, proto; 261 int n; 262 263 family = sk->sk_family; 264 proto = sk->sk_protocol; 265 266 n = bpf_loop(ARRAY_SIZE(sol_socket_tests), bpf_test_socket_sockopt, &lc, 0); 267 if (n != ARRAY_SIZE(sol_socket_tests)) 268 return -1; 269 270 if (proto == IPPROTO_TCP) { 271 n = bpf_loop(ARRAY_SIZE(sol_tcp_tests), bpf_test_tcp_sockopt, &lc, 0); 272 if (n != ARRAY_SIZE(sol_tcp_tests)) 273 return -1; 274 } 275 276 if (family == AF_INET) { 277 n = bpf_loop(ARRAY_SIZE(sol_ip_tests), bpf_test_ip_sockopt, &lc, 0); 278 if (n != ARRAY_SIZE(sol_ip_tests)) 279 return -1; 280 } else { 281 n = bpf_loop(ARRAY_SIZE(sol_ipv6_tests), bpf_test_ipv6_sockopt, &lc, 0); 282 if (n != ARRAY_SIZE(sol_ipv6_tests)) 283 return -1; 284 } 285 286 return 0; 287 } 288 289 static int binddev_test(void *ctx) 290 { 291 const char empty_ifname[] = ""; 292 int ifindex, zero = 0; 293 294 if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, 295 (void *)veth, sizeof(veth))) 296 return -1; 297 if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX, 298 &ifindex, sizeof(int)) || 299 ifindex != veth_ifindex) 300 return -1; 301 302 if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, 303 (void *)empty_ifname, sizeof(empty_ifname))) 304 return -1; 305 if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX, 306 &ifindex, sizeof(int)) || 307 ifindex != 0) 308 return -1; 309 310 if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX, 311 (void *)&veth_ifindex, sizeof(int))) 312 return -1; 313 if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX, 314 &ifindex, sizeof(int)) || 315 ifindex != veth_ifindex) 316 return -1; 317 318 if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX, 319 &zero, sizeof(int))) 320 return -1; 321 if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX, 322 &ifindex, sizeof(int)) || 323 ifindex != 0) 324 return -1; 325 326 return 0; 327 } 328 329 static int test_tcp_maxseg(void *ctx, struct sock *sk) 330 { 331 int val = 1314, tmp; 332 333 if (sk->sk_state != TCP_ESTABLISHED) 334 return bpf_setsockopt(ctx, IPPROTO_TCP, TCP_MAXSEG, 335 &val, sizeof(val)); 336 337 if (bpf_getsockopt(ctx, IPPROTO_TCP, TCP_MAXSEG, &tmp, sizeof(tmp)) || 338 tmp > val) 339 return -1; 340 341 return 0; 342 } 343 344 static int test_tcp_saved_syn(void *ctx, struct sock *sk) 345 { 346 __u8 saved_syn[20]; 347 int one = 1; 348 349 if (sk->sk_state == TCP_LISTEN) 350 return bpf_setsockopt(ctx, IPPROTO_TCP, TCP_SAVE_SYN, 351 &one, sizeof(one)); 352 353 return bpf_getsockopt(ctx, IPPROTO_TCP, TCP_SAVED_SYN, 354 saved_syn, sizeof(saved_syn)); 355 } 356 357 SEC("lsm_cgroup/socket_post_create") 358 int BPF_PROG(socket_post_create, struct socket *sock, int family, 359 int type, int protocol, int kern) 360 { 361 struct sock *sk = sock->sk; 362 363 if (!sk) 364 return 1; 365 366 nr_socket_post_create += !bpf_test_sockopt(sk, sk); 367 nr_binddev += !binddev_test(sk); 368 369 return 1; 370 } 371 372 SEC("cgroup/getsockopt") 373 int _getsockopt(struct bpf_sockopt *ctx) 374 { 375 struct bpf_sock *sk = ctx->sk; 376 int *optval = ctx->optval; 377 struct tcp_sock *tp; 378 379 if (!sk || ctx->level != SOL_TCP || ctx->optname != TCP_BPF_SOCK_OPS_CB_FLAGS) 380 return 1; 381 382 tp = bpf_core_cast(sk, struct tcp_sock); 383 if (ctx->optval + sizeof(int) <= ctx->optval_end) { 384 *optval = tp->bpf_sock_ops_cb_flags; 385 ctx->retval = 0; 386 } 387 return 1; 388 } 389 390 SEC("sockops") 391 int skops_sockopt(struct bpf_sock_ops *skops) 392 { 393 struct bpf_sock *bpf_sk = skops->sk; 394 struct sock *sk; 395 int flags; 396 397 if (!bpf_sk) 398 return 1; 399 400 sk = (struct sock *)bpf_skc_to_tcp_sock(bpf_sk); 401 if (!sk) 402 return 1; 403 404 switch (skops->op) { 405 case BPF_SOCK_OPS_TCP_LISTEN_CB: 406 nr_listen += !(bpf_test_sockopt(skops, sk) || 407 test_tcp_maxseg(skops, sk) || 408 test_tcp_saved_syn(skops, sk)); 409 break; 410 case BPF_SOCK_OPS_TCP_CONNECT_CB: 411 nr_connect += !(bpf_test_sockopt(skops, sk) || 412 test_tcp_maxseg(skops, sk)); 413 break; 414 case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: 415 nr_active += !(bpf_test_sockopt(skops, sk) || 416 test_tcp_maxseg(skops, sk)); 417 break; 418 case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: 419 nr_passive += !(bpf_test_sockopt(skops, sk) || 420 test_tcp_maxseg(skops, sk) || 421 test_tcp_saved_syn(skops, sk)); 422 flags = skops->bpf_sock_ops_cb_flags | BPF_SOCK_OPS_STATE_CB_FLAG; 423 bpf_setsockopt(skops, SOL_TCP, TCP_BPF_SOCK_OPS_CB_FLAGS, &flags, sizeof(flags)); 424 break; 425 case BPF_SOCK_OPS_STATE_CB: 426 if (skops->args[1] == BPF_TCP_CLOSE_WAIT) 427 nr_fin_wait1 += !bpf_test_sockopt(skops, sk); 428 break; 429 } 430 431 return 1; 432 } 433 434 char _license[] SEC("license") = "GPL"; 435