1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) Meta Platforms, Inc. and affiliates. */ 3 4 #include "vmlinux.h" 5 #include "bpf_tracing_net.h" 6 #include <bpf/bpf_core_read.h> 7 #include <bpf/bpf_helpers.h> 8 #include <bpf/bpf_tracing.h> 9 10 #ifndef ARRAY_SIZE 11 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 12 #endif 13 14 extern unsigned long CONFIG_HZ __kconfig; 15 16 const volatile char veth[IFNAMSIZ]; 17 const volatile int veth_ifindex; 18 19 int nr_listen; 20 int nr_passive; 21 int nr_active; 22 int nr_connect; 23 int nr_binddev; 24 int nr_socket_post_create; 25 26 struct sockopt_test { 27 int opt; 28 int new; 29 int restore; 30 int expected; 31 int tcp_expected; 32 unsigned int flip:1; 33 }; 34 35 static const char cubic_cc[] = "cubic"; 36 static const char reno_cc[] = "reno"; 37 38 static const struct sockopt_test sol_socket_tests[] = { 39 { .opt = SO_REUSEADDR, .flip = 1, }, 40 { .opt = SO_SNDBUF, .new = 8123, .expected = 8123 * 2, }, 41 { .opt = SO_RCVBUF, .new = 8123, .expected = 8123 * 2, }, 42 { .opt = SO_KEEPALIVE, .flip = 1, }, 43 { .opt = SO_PRIORITY, .new = 0xeb9f, .expected = 0xeb9f, }, 44 { .opt = SO_REUSEPORT, .flip = 1, }, 45 { .opt = SO_RCVLOWAT, .new = 8123, .expected = 8123, }, 46 { .opt = SO_MARK, .new = 0xeb9f, .expected = 0xeb9f, }, 47 { .opt = SO_MAX_PACING_RATE, .new = 0xeb9f, .expected = 0xeb9f, }, 48 { .opt = SO_TXREHASH, .flip = 1, }, 49 { .opt = 0, }, 50 }; 51 52 static const struct sockopt_test sol_tcp_tests[] = { 53 { .opt = TCP_NODELAY, .flip = 1, }, 54 { .opt = TCP_MAXSEG, .new = 1314, .expected = 1314, }, 55 { .opt = TCP_KEEPIDLE, .new = 123, .expected = 123, .restore = 321, }, 56 { .opt = TCP_KEEPINTVL, .new = 123, .expected = 123, .restore = 321, }, 57 { .opt = TCP_KEEPCNT, .new = 123, .expected = 123, .restore = 124, }, 58 { .opt = TCP_SYNCNT, .new = 123, .expected = 123, .restore = 124, }, 59 { .opt = TCP_WINDOW_CLAMP, .new = 8123, .expected = 8123, .restore = 8124, }, 60 { .opt = TCP_CONGESTION, }, 61 { .opt = TCP_THIN_LINEAR_TIMEOUTS, .flip = 1, }, 62 { .opt = TCP_USER_TIMEOUT, .new = 123400, .expected = 123400, }, 63 { .opt = TCP_NOTSENT_LOWAT, .new = 1314, .expected = 1314, }, 64 { .opt = TCP_SAVE_SYN, .new = 1, .expected = 1, }, 65 { .opt = 0, }, 66 }; 67 68 static const struct sockopt_test sol_ip_tests[] = { 69 { .opt = IP_TOS, .new = 0xe1, .expected = 0xe1, .tcp_expected = 0xe0, }, 70 { .opt = 0, }, 71 }; 72 73 static const struct sockopt_test sol_ipv6_tests[] = { 74 { .opt = IPV6_TCLASS, .new = 0xe1, .expected = 0xe1, .tcp_expected = 0xe0, }, 75 { .opt = IPV6_AUTOFLOWLABEL, .flip = 1, }, 76 { .opt = 0, }, 77 }; 78 79 struct loop_ctx { 80 void *ctx; 81 struct sock *sk; 82 }; 83 84 static int __bpf_getsockopt(void *ctx, struct sock *sk, 85 int level, int opt, int *optval, 86 int optlen) 87 { 88 if (level == SOL_SOCKET) { 89 switch (opt) { 90 case SO_REUSEADDR: 91 *optval = !!BPF_CORE_READ_BITFIELD(sk, sk_reuse); 92 break; 93 case SO_KEEPALIVE: 94 *optval = !!(sk->sk_flags & (1UL << 3)); 95 break; 96 case SO_RCVLOWAT: 97 *optval = sk->sk_rcvlowat; 98 break; 99 case SO_MAX_PACING_RATE: 100 *optval = sk->sk_max_pacing_rate; 101 break; 102 default: 103 return bpf_getsockopt(ctx, level, opt, optval, optlen); 104 } 105 return 0; 106 } 107 108 if (level == IPPROTO_TCP) { 109 struct tcp_sock *tp = bpf_skc_to_tcp_sock(sk); 110 111 if (!tp) 112 return -1; 113 114 switch (opt) { 115 case TCP_NODELAY: 116 *optval = !!(BPF_CORE_READ_BITFIELD(tp, nonagle) & TCP_NAGLE_OFF); 117 break; 118 case TCP_MAXSEG: 119 *optval = tp->rx_opt.user_mss; 120 break; 121 case TCP_KEEPIDLE: 122 *optval = tp->keepalive_time / CONFIG_HZ; 123 break; 124 case TCP_SYNCNT: 125 *optval = tp->inet_conn.icsk_syn_retries; 126 break; 127 case TCP_KEEPINTVL: 128 *optval = tp->keepalive_intvl / CONFIG_HZ; 129 break; 130 case TCP_KEEPCNT: 131 *optval = tp->keepalive_probes; 132 break; 133 case TCP_WINDOW_CLAMP: 134 *optval = tp->window_clamp; 135 break; 136 case TCP_THIN_LINEAR_TIMEOUTS: 137 *optval = !!BPF_CORE_READ_BITFIELD(tp, thin_lto); 138 break; 139 case TCP_USER_TIMEOUT: 140 *optval = tp->inet_conn.icsk_user_timeout; 141 break; 142 case TCP_NOTSENT_LOWAT: 143 *optval = tp->notsent_lowat; 144 break; 145 case TCP_SAVE_SYN: 146 *optval = BPF_CORE_READ_BITFIELD(tp, save_syn); 147 break; 148 default: 149 return bpf_getsockopt(ctx, level, opt, optval, optlen); 150 } 151 return 0; 152 } 153 154 if (level == IPPROTO_IPV6) { 155 switch (opt) { 156 case IPV6_AUTOFLOWLABEL: { 157 __u16 proto = sk->sk_protocol; 158 struct inet_sock *inet_sk; 159 160 if (proto == IPPROTO_TCP) 161 inet_sk = (struct inet_sock *)bpf_skc_to_tcp_sock(sk); 162 else 163 inet_sk = (struct inet_sock *)bpf_skc_to_udp6_sock(sk); 164 165 if (!inet_sk) 166 return -1; 167 168 *optval = !!inet_sk->pinet6->autoflowlabel; 169 break; 170 } 171 default: 172 return bpf_getsockopt(ctx, level, opt, optval, optlen); 173 } 174 return 0; 175 } 176 177 return bpf_getsockopt(ctx, level, opt, optval, optlen); 178 } 179 180 static int bpf_test_sockopt_flip(void *ctx, struct sock *sk, 181 const struct sockopt_test *t, 182 int level) 183 { 184 int old, tmp, new, opt = t->opt; 185 186 opt = t->opt; 187 188 if (__bpf_getsockopt(ctx, sk, level, opt, &old, sizeof(old))) 189 return 1; 190 /* kernel initialized txrehash to 255 */ 191 if (level == SOL_SOCKET && opt == SO_TXREHASH && old != 0 && old != 1) 192 old = 1; 193 194 new = !old; 195 if (bpf_setsockopt(ctx, level, opt, &new, sizeof(new))) 196 return 1; 197 if (__bpf_getsockopt(ctx, sk, level, opt, &tmp, sizeof(tmp)) || 198 tmp != new) 199 return 1; 200 201 if (bpf_setsockopt(ctx, level, opt, &old, sizeof(old))) 202 return 1; 203 204 return 0; 205 } 206 207 static int bpf_test_sockopt_int(void *ctx, struct sock *sk, 208 const struct sockopt_test *t, 209 int level) 210 { 211 int old, tmp, new, expected, opt; 212 213 opt = t->opt; 214 new = t->new; 215 if (sk->sk_type == SOCK_STREAM && t->tcp_expected) 216 expected = t->tcp_expected; 217 else 218 expected = t->expected; 219 220 if (__bpf_getsockopt(ctx, sk, level, opt, &old, sizeof(old)) || 221 old == new) 222 return 1; 223 224 if (bpf_setsockopt(ctx, level, opt, &new, sizeof(new))) 225 return 1; 226 if (__bpf_getsockopt(ctx, sk, level, opt, &tmp, sizeof(tmp)) || 227 tmp != expected) 228 return 1; 229 230 if (t->restore) 231 old = t->restore; 232 if (bpf_setsockopt(ctx, level, opt, &old, sizeof(old))) 233 return 1; 234 235 return 0; 236 } 237 238 static int bpf_test_socket_sockopt(__u32 i, struct loop_ctx *lc) 239 { 240 const struct sockopt_test *t; 241 242 if (i >= ARRAY_SIZE(sol_socket_tests)) 243 return 1; 244 245 t = &sol_socket_tests[i]; 246 if (!t->opt) 247 return 1; 248 249 if (t->flip) 250 return bpf_test_sockopt_flip(lc->ctx, lc->sk, t, SOL_SOCKET); 251 252 return bpf_test_sockopt_int(lc->ctx, lc->sk, t, SOL_SOCKET); 253 } 254 255 static int bpf_test_ip_sockopt(__u32 i, struct loop_ctx *lc) 256 { 257 const struct sockopt_test *t; 258 259 if (i >= ARRAY_SIZE(sol_ip_tests)) 260 return 1; 261 262 t = &sol_ip_tests[i]; 263 if (!t->opt) 264 return 1; 265 266 if (t->flip) 267 return bpf_test_sockopt_flip(lc->ctx, lc->sk, t, IPPROTO_IP); 268 269 return bpf_test_sockopt_int(lc->ctx, lc->sk, t, IPPROTO_IP); 270 } 271 272 static int bpf_test_ipv6_sockopt(__u32 i, struct loop_ctx *lc) 273 { 274 const struct sockopt_test *t; 275 276 if (i >= ARRAY_SIZE(sol_ipv6_tests)) 277 return 1; 278 279 t = &sol_ipv6_tests[i]; 280 if (!t->opt) 281 return 1; 282 283 if (t->flip) 284 return bpf_test_sockopt_flip(lc->ctx, lc->sk, t, IPPROTO_IPV6); 285 286 return bpf_test_sockopt_int(lc->ctx, lc->sk, t, IPPROTO_IPV6); 287 } 288 289 static int bpf_test_tcp_sockopt(__u32 i, struct loop_ctx *lc) 290 { 291 const struct sockopt_test *t; 292 struct sock *sk; 293 void *ctx; 294 295 if (i >= ARRAY_SIZE(sol_tcp_tests)) 296 return 1; 297 298 t = &sol_tcp_tests[i]; 299 if (!t->opt) 300 return 1; 301 302 ctx = lc->ctx; 303 sk = lc->sk; 304 305 if (t->opt == TCP_CONGESTION) { 306 char old_cc[16], tmp_cc[16]; 307 const char *new_cc; 308 int new_cc_len; 309 310 if (bpf_getsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION, old_cc, sizeof(old_cc))) 311 return 1; 312 if (!bpf_strncmp(old_cc, sizeof(old_cc), cubic_cc)) { 313 new_cc = reno_cc; 314 new_cc_len = sizeof(reno_cc); 315 } else { 316 new_cc = cubic_cc; 317 new_cc_len = sizeof(cubic_cc); 318 } 319 if (bpf_setsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION, (void *)new_cc, 320 new_cc_len)) 321 return 1; 322 if (bpf_getsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION, tmp_cc, sizeof(tmp_cc))) 323 return 1; 324 if (bpf_strncmp(tmp_cc, sizeof(tmp_cc), new_cc)) 325 return 1; 326 if (bpf_setsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION, old_cc, sizeof(old_cc))) 327 return 1; 328 return 0; 329 } 330 331 if (t->flip) 332 return bpf_test_sockopt_flip(ctx, sk, t, IPPROTO_TCP); 333 334 return bpf_test_sockopt_int(ctx, sk, t, IPPROTO_TCP); 335 } 336 337 static int bpf_test_sockopt(void *ctx, struct sock *sk) 338 { 339 struct loop_ctx lc = { .ctx = ctx, .sk = sk, }; 340 __u16 family, proto; 341 int n; 342 343 family = sk->sk_family; 344 proto = sk->sk_protocol; 345 346 n = bpf_loop(ARRAY_SIZE(sol_socket_tests), bpf_test_socket_sockopt, &lc, 0); 347 if (n != ARRAY_SIZE(sol_socket_tests)) 348 return -1; 349 350 if (proto == IPPROTO_TCP) { 351 n = bpf_loop(ARRAY_SIZE(sol_tcp_tests), bpf_test_tcp_sockopt, &lc, 0); 352 if (n != ARRAY_SIZE(sol_tcp_tests)) 353 return -1; 354 } 355 356 if (family == AF_INET) { 357 n = bpf_loop(ARRAY_SIZE(sol_ip_tests), bpf_test_ip_sockopt, &lc, 0); 358 if (n != ARRAY_SIZE(sol_ip_tests)) 359 return -1; 360 } else { 361 n = bpf_loop(ARRAY_SIZE(sol_ipv6_tests), bpf_test_ipv6_sockopt, &lc, 0); 362 if (n != ARRAY_SIZE(sol_ipv6_tests)) 363 return -1; 364 } 365 366 return 0; 367 } 368 369 static int binddev_test(void *ctx) 370 { 371 const char empty_ifname[] = ""; 372 int ifindex, zero = 0; 373 374 if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, 375 (void *)veth, sizeof(veth))) 376 return -1; 377 if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX, 378 &ifindex, sizeof(int)) || 379 ifindex != veth_ifindex) 380 return -1; 381 382 if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, 383 (void *)empty_ifname, sizeof(empty_ifname))) 384 return -1; 385 if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX, 386 &ifindex, sizeof(int)) || 387 ifindex != 0) 388 return -1; 389 390 if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX, 391 (void *)&veth_ifindex, sizeof(int))) 392 return -1; 393 if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX, 394 &ifindex, sizeof(int)) || 395 ifindex != veth_ifindex) 396 return -1; 397 398 if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX, 399 &zero, sizeof(int))) 400 return -1; 401 if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX, 402 &ifindex, sizeof(int)) || 403 ifindex != 0) 404 return -1; 405 406 return 0; 407 } 408 409 SEC("lsm_cgroup/socket_post_create") 410 int BPF_PROG(socket_post_create, struct socket *sock, int family, 411 int type, int protocol, int kern) 412 { 413 struct sock *sk = sock->sk; 414 415 if (!sk) 416 return 1; 417 418 nr_socket_post_create += !bpf_test_sockopt(sk, sk); 419 nr_binddev += !binddev_test(sk); 420 421 return 1; 422 } 423 424 SEC("sockops") 425 int skops_sockopt(struct bpf_sock_ops *skops) 426 { 427 struct bpf_sock *bpf_sk = skops->sk; 428 struct sock *sk; 429 430 if (!bpf_sk) 431 return 1; 432 433 sk = (struct sock *)bpf_skc_to_tcp_sock(bpf_sk); 434 if (!sk) 435 return 1; 436 437 switch (skops->op) { 438 case BPF_SOCK_OPS_TCP_LISTEN_CB: 439 nr_listen += !bpf_test_sockopt(skops, sk); 440 break; 441 case BPF_SOCK_OPS_TCP_CONNECT_CB: 442 nr_connect += !bpf_test_sockopt(skops, sk); 443 break; 444 case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: 445 nr_active += !bpf_test_sockopt(skops, sk); 446 break; 447 case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: 448 nr_passive += !bpf_test_sockopt(skops, sk); 449 break; 450 } 451 452 return 1; 453 } 454 455 char _license[] SEC("license") = "GPL"; 456