1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2020 Facebook */ 3 4 #include <stddef.h> 5 #include <errno.h> 6 #include <stdbool.h> 7 #include <sys/types.h> 8 #include <sys/socket.h> 9 #include <linux/tcp.h> 10 #include <linux/socket.h> 11 #include <linux/bpf.h> 12 #include <linux/types.h> 13 #include <bpf/bpf_helpers.h> 14 #include <bpf/bpf_endian.h> 15 #define BPF_PROG_TEST_TCP_HDR_OPTIONS 16 #include "test_tcp_hdr_options.h" 17 #include "bpf_misc.h" 18 19 __u8 test_kind = TCPOPT_EXP; 20 __u16 test_magic = 0xeB9F; 21 __u32 inherit_cb_flags = 0; 22 23 struct bpf_test_option passive_synack_out = {}; 24 struct bpf_test_option passive_fin_out = {}; 25 26 struct bpf_test_option passive_estab_in = {}; 27 struct bpf_test_option passive_fin_in = {}; 28 29 struct bpf_test_option active_syn_out = {}; 30 struct bpf_test_option active_fin_out = {}; 31 32 struct bpf_test_option active_estab_in = {}; 33 struct bpf_test_option active_fin_in = {}; 34 35 struct { 36 __uint(type, BPF_MAP_TYPE_SK_STORAGE); 37 __uint(map_flags, BPF_F_NO_PREALLOC); 38 __type(key, int); 39 __type(value, struct hdr_stg); 40 } hdr_stg_map SEC(".maps"); 41 42 static bool skops_want_cookie(const struct bpf_sock_ops *skops) 43 { 44 return skops->args[0] == BPF_WRITE_HDR_TCP_SYNACK_COOKIE; 45 } 46 47 static bool skops_current_mss(const struct bpf_sock_ops *skops) 48 { 49 return skops->args[0] == BPF_WRITE_HDR_TCP_CURRENT_MSS; 50 } 51 52 static __u8 option_total_len(__u8 flags) 53 { 54 __u8 i, len = 1; /* +1 for flags */ 55 56 if (!flags) 57 return 0; 58 59 /* RESEND bit does not use a byte */ 60 for (i = OPTION_RESEND + 1; i < __NR_OPTION_FLAGS; i++) 61 len += !!TEST_OPTION_FLAGS(flags, i); 62 63 if (test_kind == TCPOPT_EXP) 64 return len + TCP_BPF_EXPOPT_BASE_LEN; 65 else 66 return len + 2; /* +1 kind, +1 kind-len */ 67 } 68 69 static void write_test_option(const struct bpf_test_option *test_opt, 70 __u8 *data) 71 { 72 __u8 offset = 0; 73 74 data[offset++] = test_opt->flags; 75 if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_MAX_DELACK_MS)) 76 data[offset++] = test_opt->max_delack_ms; 77 78 if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_RAND)) 79 data[offset++] = test_opt->rand; 80 } 81 82 static int store_option(struct bpf_sock_ops *skops, 83 const struct bpf_test_option *test_opt) 84 { 85 union { 86 struct tcp_exprm_opt exprm; 87 struct tcp_opt regular; 88 } write_opt; 89 int err; 90 91 if (test_kind == TCPOPT_EXP) { 92 write_opt.exprm.kind = TCPOPT_EXP; 93 write_opt.exprm.len = option_total_len(test_opt->flags); 94 write_opt.exprm.magic = __bpf_htons(test_magic); 95 write_opt.exprm.data32 = 0; 96 write_test_option(test_opt, write_opt.exprm.data); 97 err = bpf_store_hdr_opt(skops, &write_opt.exprm, 98 sizeof(write_opt.exprm), 0); 99 } else { 100 write_opt.regular.kind = test_kind; 101 write_opt.regular.len = option_total_len(test_opt->flags); 102 write_opt.regular.data32 = 0; 103 write_test_option(test_opt, write_opt.regular.data); 104 err = bpf_store_hdr_opt(skops, &write_opt.regular, 105 sizeof(write_opt.regular), 0); 106 } 107 108 if (err) 109 RET_CG_ERR(err); 110 111 return CG_OK; 112 } 113 114 static int parse_test_option(struct bpf_test_option *opt, const __u8 *start) 115 { 116 opt->flags = *start++; 117 118 if (TEST_OPTION_FLAGS(opt->flags, OPTION_MAX_DELACK_MS)) 119 opt->max_delack_ms = *start++; 120 121 if (TEST_OPTION_FLAGS(opt->flags, OPTION_RAND)) 122 opt->rand = *start++; 123 124 return 0; 125 } 126 127 static int load_option(struct bpf_sock_ops *skops, 128 struct bpf_test_option *test_opt, bool from_syn) 129 { 130 union { 131 struct tcp_exprm_opt exprm; 132 struct tcp_opt regular; 133 } search_opt; 134 int ret, load_flags = from_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0; 135 136 if (test_kind == TCPOPT_EXP) { 137 search_opt.exprm.kind = TCPOPT_EXP; 138 search_opt.exprm.len = 4; 139 search_opt.exprm.magic = __bpf_htons(test_magic); 140 search_opt.exprm.data32 = 0; 141 ret = bpf_load_hdr_opt(skops, &search_opt.exprm, 142 sizeof(search_opt.exprm), load_flags); 143 if (ret < 0) 144 return ret; 145 return parse_test_option(test_opt, search_opt.exprm.data); 146 } else { 147 search_opt.regular.kind = test_kind; 148 search_opt.regular.len = 0; 149 search_opt.regular.data32 = 0; 150 ret = bpf_load_hdr_opt(skops, &search_opt.regular, 151 sizeof(search_opt.regular), load_flags); 152 if (ret < 0) 153 return ret; 154 return parse_test_option(test_opt, search_opt.regular.data); 155 } 156 } 157 158 static int synack_opt_len(struct bpf_sock_ops *skops) 159 { 160 struct bpf_test_option test_opt = {}; 161 __u8 optlen; 162 int err; 163 164 if (!passive_synack_out.flags) 165 return CG_OK; 166 167 err = load_option(skops, &test_opt, true); 168 169 /* bpf_test_option is not found */ 170 if (err == -ENOMSG) 171 return CG_OK; 172 173 if (err) 174 RET_CG_ERR(err); 175 176 optlen = option_total_len(passive_synack_out.flags); 177 if (optlen) { 178 err = bpf_reserve_hdr_opt(skops, optlen, 0); 179 if (err) 180 RET_CG_ERR(err); 181 } 182 183 return CG_OK; 184 } 185 186 static int write_synack_opt(struct bpf_sock_ops *skops) 187 { 188 struct bpf_test_option opt; 189 190 if (!passive_synack_out.flags) 191 /* We should not even be called since no header 192 * space has been reserved. 193 */ 194 RET_CG_ERR(0); 195 196 opt = passive_synack_out; 197 if (skops_want_cookie(skops)) 198 SET_OPTION_FLAGS(opt.flags, OPTION_RESEND); 199 200 return store_option(skops, &opt); 201 } 202 203 static int syn_opt_len(struct bpf_sock_ops *skops) 204 { 205 __u8 optlen; 206 int err; 207 208 if (!active_syn_out.flags) 209 return CG_OK; 210 211 optlen = option_total_len(active_syn_out.flags); 212 if (optlen) { 213 err = bpf_reserve_hdr_opt(skops, optlen, 0); 214 if (err) 215 RET_CG_ERR(err); 216 } 217 218 return CG_OK; 219 } 220 221 static int write_syn_opt(struct bpf_sock_ops *skops) 222 { 223 if (!active_syn_out.flags) 224 RET_CG_ERR(0); 225 226 return store_option(skops, &active_syn_out); 227 } 228 229 static int fin_opt_len(struct bpf_sock_ops *skops) 230 { 231 struct bpf_test_option *opt; 232 struct hdr_stg *hdr_stg; 233 __u8 optlen; 234 int err; 235 236 if (!skops->sk) 237 RET_CG_ERR(0); 238 239 hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0); 240 if (!hdr_stg) 241 RET_CG_ERR(0); 242 243 if (hdr_stg->active) 244 opt = &active_fin_out; 245 else 246 opt = &passive_fin_out; 247 248 optlen = option_total_len(opt->flags); 249 if (optlen) { 250 err = bpf_reserve_hdr_opt(skops, optlen, 0); 251 if (err) 252 RET_CG_ERR(err); 253 } 254 255 return CG_OK; 256 } 257 258 static int write_fin_opt(struct bpf_sock_ops *skops) 259 { 260 struct bpf_test_option *opt; 261 struct hdr_stg *hdr_stg; 262 263 if (!skops->sk) 264 RET_CG_ERR(0); 265 266 hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0); 267 if (!hdr_stg) 268 RET_CG_ERR(0); 269 270 if (hdr_stg->active) 271 opt = &active_fin_out; 272 else 273 opt = &passive_fin_out; 274 275 if (!opt->flags) 276 RET_CG_ERR(0); 277 278 return store_option(skops, opt); 279 } 280 281 static int resend_in_ack(struct bpf_sock_ops *skops) 282 { 283 struct hdr_stg *hdr_stg; 284 285 if (!skops->sk) 286 return -1; 287 288 hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0); 289 if (!hdr_stg) 290 return -1; 291 292 return !!hdr_stg->resend_syn; 293 } 294 295 static int nodata_opt_len(struct bpf_sock_ops *skops) 296 { 297 int resend; 298 299 resend = resend_in_ack(skops); 300 if (resend < 0) 301 RET_CG_ERR(0); 302 303 if (resend) 304 return syn_opt_len(skops); 305 306 return CG_OK; 307 } 308 309 static int write_nodata_opt(struct bpf_sock_ops *skops) 310 { 311 int resend; 312 313 resend = resend_in_ack(skops); 314 if (resend < 0) 315 RET_CG_ERR(0); 316 317 if (resend) 318 return write_syn_opt(skops); 319 320 return CG_OK; 321 } 322 323 static int data_opt_len(struct bpf_sock_ops *skops) 324 { 325 /* Same as the nodata version. Mostly to show 326 * an example usage on skops->skb_len. 327 */ 328 return nodata_opt_len(skops); 329 } 330 331 static int write_data_opt(struct bpf_sock_ops *skops) 332 { 333 return write_nodata_opt(skops); 334 } 335 336 static int current_mss_opt_len(struct bpf_sock_ops *skops) 337 { 338 /* Reserve maximum that may be needed */ 339 int err; 340 341 err = bpf_reserve_hdr_opt(skops, option_total_len(OPTION_MASK), 0); 342 if (err) 343 RET_CG_ERR(err); 344 345 return CG_OK; 346 } 347 348 static int handle_hdr_opt_len(struct bpf_sock_ops *skops) 349 { 350 __u8 tcp_flags = skops_tcp_flags(skops); 351 352 if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK) 353 return synack_opt_len(skops); 354 355 if (tcp_flags & TCPHDR_SYN) 356 return syn_opt_len(skops); 357 358 if (tcp_flags & TCPHDR_FIN) 359 return fin_opt_len(skops); 360 361 if (skops_current_mss(skops)) 362 /* The kernel is calculating the MSS */ 363 return current_mss_opt_len(skops); 364 365 if (skops->skb_len) 366 return data_opt_len(skops); 367 368 return nodata_opt_len(skops); 369 } 370 371 static int handle_write_hdr_opt(struct bpf_sock_ops *skops) 372 { 373 __u8 tcp_flags = skops_tcp_flags(skops); 374 struct tcphdr *th; 375 376 if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK) 377 return write_synack_opt(skops); 378 379 if (tcp_flags & TCPHDR_SYN) 380 return write_syn_opt(skops); 381 382 if (tcp_flags & TCPHDR_FIN) 383 return write_fin_opt(skops); 384 385 th = skops->skb_data; 386 if (th + 1 > skops->skb_data_end) 387 RET_CG_ERR(0); 388 389 if (skops->skb_len > tcp_hdrlen(th)) 390 return write_data_opt(skops); 391 392 return write_nodata_opt(skops); 393 } 394 395 static int set_delack_max(struct bpf_sock_ops *skops, __u8 max_delack_ms) 396 { 397 __u32 max_delack_us = max_delack_ms * 1000; 398 399 return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_DELACK_MAX, 400 &max_delack_us, sizeof(max_delack_us)); 401 } 402 403 static int set_rto_min(struct bpf_sock_ops *skops, __u8 peer_max_delack_ms) 404 { 405 __u32 min_rto_us = peer_max_delack_ms * 1000; 406 407 return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_RTO_MIN, &min_rto_us, 408 sizeof(min_rto_us)); 409 } 410 411 static int handle_active_estab(struct bpf_sock_ops *skops) 412 { 413 struct hdr_stg init_stg = { 414 .active = true, 415 }; 416 int err; 417 418 err = load_option(skops, &active_estab_in, false); 419 if (err && err != -ENOMSG) 420 RET_CG_ERR(err); 421 422 init_stg.resend_syn = TEST_OPTION_FLAGS(active_estab_in.flags, 423 OPTION_RESEND); 424 if (!skops->sk || !bpf_sk_storage_get(&hdr_stg_map, skops->sk, 425 &init_stg, 426 BPF_SK_STORAGE_GET_F_CREATE)) 427 RET_CG_ERR(0); 428 429 if (init_stg.resend_syn) 430 /* Don't clear the write_hdr cb now because 431 * the ACK may get lost and retransmit may 432 * be needed. 433 * 434 * PARSE_ALL_HDR cb flag is set to learn if this 435 * resend_syn option has received by the peer. 436 * 437 * The header option will be resent until a valid 438 * packet is received at handle_parse_hdr() 439 * and all hdr cb flags will be cleared in 440 * handle_parse_hdr(). 441 */ 442 set_parse_all_hdr_cb_flags(skops); 443 else if (!active_fin_out.flags) 444 /* No options will be written from now */ 445 clear_hdr_cb_flags(skops); 446 447 if (active_syn_out.max_delack_ms) { 448 err = set_delack_max(skops, active_syn_out.max_delack_ms); 449 if (err) 450 RET_CG_ERR(err); 451 } 452 453 if (active_estab_in.max_delack_ms) { 454 err = set_rto_min(skops, active_estab_in.max_delack_ms); 455 if (err) 456 RET_CG_ERR(err); 457 } 458 459 return CG_OK; 460 } 461 462 static int handle_passive_estab(struct bpf_sock_ops *skops) 463 { 464 struct hdr_stg init_stg = {}; 465 struct tcphdr *th; 466 int err; 467 468 inherit_cb_flags = skops->bpf_sock_ops_cb_flags; 469 470 err = load_option(skops, &passive_estab_in, true); 471 if (err == -ENOENT) { 472 /* saved_syn is not found. It was in syncookie mode. 473 * We have asked the active side to resend the options 474 * in ACK, so try to find the bpf_test_option from ACK now. 475 */ 476 err = load_option(skops, &passive_estab_in, false); 477 init_stg.syncookie = true; 478 } 479 480 /* ENOMSG: The bpf_test_option is not found which is fine. 481 * Bail out now for all other errors. 482 */ 483 if (err && err != -ENOMSG) 484 RET_CG_ERR(err); 485 486 th = skops->skb_data; 487 if (th + 1 > skops->skb_data_end) 488 RET_CG_ERR(0); 489 490 if (th->syn) { 491 /* Fastopen */ 492 493 /* Cannot clear cb_flags to stop write_hdr cb. 494 * synack is not sent yet for fast open. 495 * Even it was, the synack may need to be retransmitted. 496 * 497 * PARSE_ALL_HDR cb flag is set to learn 498 * if synack has reached the peer. 499 * All cb_flags will be cleared in handle_parse_hdr(). 500 */ 501 set_parse_all_hdr_cb_flags(skops); 502 init_stg.fastopen = true; 503 } else if (!passive_fin_out.flags) { 504 /* No options will be written from now */ 505 clear_hdr_cb_flags(skops); 506 } 507 508 if (!skops->sk || 509 !bpf_sk_storage_get(&hdr_stg_map, skops->sk, &init_stg, 510 BPF_SK_STORAGE_GET_F_CREATE)) 511 RET_CG_ERR(0); 512 513 if (passive_synack_out.max_delack_ms) { 514 err = set_delack_max(skops, passive_synack_out.max_delack_ms); 515 if (err) 516 RET_CG_ERR(err); 517 } 518 519 if (passive_estab_in.max_delack_ms) { 520 err = set_rto_min(skops, passive_estab_in.max_delack_ms); 521 if (err) 522 RET_CG_ERR(err); 523 } 524 525 return CG_OK; 526 } 527 528 static int handle_parse_hdr(struct bpf_sock_ops *skops) 529 { 530 struct hdr_stg *hdr_stg; 531 struct tcphdr *th; 532 533 if (!skops->sk) 534 RET_CG_ERR(0); 535 536 th = skops->skb_data; 537 if (th + 1 > skops->skb_data_end) 538 RET_CG_ERR(0); 539 540 hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0); 541 if (!hdr_stg) 542 RET_CG_ERR(0); 543 544 if (hdr_stg->resend_syn || hdr_stg->fastopen) 545 /* The PARSE_ALL_HDR cb flag was turned on 546 * to ensure that the previously written 547 * options have reached the peer. 548 * Those previously written option includes: 549 * - Active side: resend_syn in ACK during syncookie 550 * or 551 * - Passive side: SYNACK during fastopen 552 * 553 * A valid packet has been received here after 554 * the 3WHS, so the PARSE_ALL_HDR cb flag 555 * can be cleared now. 556 */ 557 clear_parse_all_hdr_cb_flags(skops); 558 559 if (hdr_stg->resend_syn && !active_fin_out.flags) 560 /* Active side resent the syn option in ACK 561 * because the server was in syncookie mode. 562 * A valid packet has been received, so 563 * clear header cb flags if there is no 564 * more option to send. 565 */ 566 clear_hdr_cb_flags(skops); 567 568 if (hdr_stg->fastopen && !passive_fin_out.flags) 569 /* Passive side was in fastopen. 570 * A valid packet has been received, so 571 * the SYNACK has reached the peer. 572 * Clear header cb flags if there is no more 573 * option to send. 574 */ 575 clear_hdr_cb_flags(skops); 576 577 if (th->fin) { 578 struct bpf_test_option *fin_opt; 579 int err; 580 581 if (hdr_stg->active) 582 fin_opt = &active_fin_in; 583 else 584 fin_opt = &passive_fin_in; 585 586 err = load_option(skops, fin_opt, false); 587 if (err && err != -ENOMSG) 588 RET_CG_ERR(err); 589 } 590 591 return CG_OK; 592 } 593 594 SEC("sockops") 595 int estab(struct bpf_sock_ops *skops) 596 { 597 int true_val = 1; 598 599 switch (skops->op) { 600 case BPF_SOCK_OPS_TCP_LISTEN_CB: 601 bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN, 602 &true_val, sizeof(true_val)); 603 set_hdr_cb_flags(skops, BPF_SOCK_OPS_STATE_CB_FLAG); 604 break; 605 case BPF_SOCK_OPS_TCP_CONNECT_CB: 606 set_hdr_cb_flags(skops, 0); 607 break; 608 case BPF_SOCK_OPS_PARSE_HDR_OPT_CB: 609 return handle_parse_hdr(skops); 610 case BPF_SOCK_OPS_HDR_OPT_LEN_CB: 611 return handle_hdr_opt_len(skops); 612 case BPF_SOCK_OPS_WRITE_HDR_OPT_CB: 613 return handle_write_hdr_opt(skops); 614 case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: 615 return handle_passive_estab(skops); 616 case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: 617 return handle_active_estab(skops); 618 } 619 620 return CG_OK; 621 } 622 623 char _license[] SEC("license") = "GPL"; 624