1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2013 Patrick McHardy <kaber@trash.net> 4 */ 5 6 #include <linux/module.h> 7 #include <linux/skbuff.h> 8 #include <linux/unaligned.h> 9 #include <net/tcp.h> 10 #include <net/netns/generic.h> 11 #include <linux/proc_fs.h> 12 13 #include <linux/netfilter_ipv4.h> 14 #include <linux/netfilter_ipv6.h> 15 #include <linux/netfilter/nf_synproxy.h> 16 17 #include <net/netfilter/nf_conntrack.h> 18 #include <net/netfilter/nf_conntrack_ecache.h> 19 #include <net/netfilter/nf_conntrack_extend.h> 20 #include <net/netfilter/nf_conntrack_seqadj.h> 21 #include <net/netfilter/nf_conntrack_synproxy.h> 22 #include <net/netfilter/nf_conntrack_zones.h> 23 #include <net/netfilter/nf_synproxy.h> 24 25 static DEFINE_MUTEX(synproxy_mutex); 26 27 unsigned int synproxy_net_id; 28 EXPORT_SYMBOL_GPL(synproxy_net_id); 29 30 bool 31 synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, 32 const struct tcphdr *th, struct synproxy_options *opts) 33 { 34 int length = (th->doff * 4) - sizeof(*th); 35 u8 buf[40], *ptr; 36 37 if (unlikely(length < 0)) 38 return false; 39 40 ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf); 41 if (ptr == NULL) 42 return false; 43 44 opts->options = 0; 45 while (length > 0) { 46 int opcode = *ptr++; 47 int opsize; 48 49 switch (opcode) { 50 case TCPOPT_EOL: 51 return true; 52 case TCPOPT_NOP: 53 length--; 54 continue; 55 default: 56 if (length < 2) 57 return true; 58 opsize = *ptr++; 59 if (opsize < 2) 60 return true; 61 if (opsize > length) 62 return true; 63 64 switch (opcode) { 65 case TCPOPT_MSS: 66 if (opsize == TCPOLEN_MSS) { 67 opts->mss_option = get_unaligned_be16(ptr); 68 opts->options |= NF_SYNPROXY_OPT_MSS; 69 } 70 break; 71 case TCPOPT_WINDOW: 72 if (opsize == TCPOLEN_WINDOW) { 73 opts->wscale = *ptr; 74 if (opts->wscale > TCP_MAX_WSCALE) 75 opts->wscale = TCP_MAX_WSCALE; 76 opts->options |= NF_SYNPROXY_OPT_WSCALE; 77 } 78 break; 79 case TCPOPT_TIMESTAMP: 80 if (opsize == TCPOLEN_TIMESTAMP) { 81 opts->tsval = get_unaligned_be32(ptr); 82 opts->tsecr = get_unaligned_be32(ptr + 4); 83 opts->options |= NF_SYNPROXY_OPT_TIMESTAMP; 84 } 85 break; 86 case TCPOPT_SACK_PERM: 87 if (opsize == TCPOLEN_SACK_PERM) 88 opts->options |= NF_SYNPROXY_OPT_SACK_PERM; 89 break; 90 } 91 92 ptr += opsize - 2; 93 length -= opsize; 94 } 95 } 96 return true; 97 } 98 EXPORT_SYMBOL_GPL(synproxy_parse_options); 99 100 static unsigned int 101 synproxy_options_size(const struct synproxy_options *opts) 102 { 103 unsigned int size = 0; 104 105 if (opts->options & NF_SYNPROXY_OPT_MSS) 106 size += TCPOLEN_MSS_ALIGNED; 107 if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP) 108 size += TCPOLEN_TSTAMP_ALIGNED; 109 else if (opts->options & NF_SYNPROXY_OPT_SACK_PERM) 110 size += TCPOLEN_SACKPERM_ALIGNED; 111 if (opts->options & NF_SYNPROXY_OPT_WSCALE) 112 size += TCPOLEN_WSCALE_ALIGNED; 113 114 return size; 115 } 116 117 static void 118 synproxy_build_options(struct tcphdr *th, const struct synproxy_options *opts) 119 { 120 __be32 *ptr = (__be32 *)(th + 1); 121 u8 options = opts->options; 122 123 if (options & NF_SYNPROXY_OPT_MSS) 124 *ptr++ = htonl((TCPOPT_MSS << 24) | 125 (TCPOLEN_MSS << 16) | 126 opts->mss_option); 127 128 if (options & NF_SYNPROXY_OPT_TIMESTAMP) { 129 if (options & NF_SYNPROXY_OPT_SACK_PERM) 130 *ptr++ = htonl((TCPOPT_SACK_PERM << 24) | 131 (TCPOLEN_SACK_PERM << 16) | 132 (TCPOPT_TIMESTAMP << 8) | 133 TCPOLEN_TIMESTAMP); 134 else 135 *ptr++ = htonl((TCPOPT_NOP << 24) | 136 (TCPOPT_NOP << 16) | 137 (TCPOPT_TIMESTAMP << 8) | 138 TCPOLEN_TIMESTAMP); 139 140 *ptr++ = htonl(opts->tsval); 141 *ptr++ = htonl(opts->tsecr); 142 } else if (options & NF_SYNPROXY_OPT_SACK_PERM) 143 *ptr++ = htonl((TCPOPT_NOP << 24) | 144 (TCPOPT_NOP << 16) | 145 (TCPOPT_SACK_PERM << 8) | 146 TCPOLEN_SACK_PERM); 147 148 if (options & NF_SYNPROXY_OPT_WSCALE) 149 *ptr++ = htonl((TCPOPT_NOP << 24) | 150 (TCPOPT_WINDOW << 16) | 151 (TCPOLEN_WINDOW << 8) | 152 opts->wscale); 153 } 154 155 void synproxy_init_timestamp_cookie(const struct nf_synproxy_info *info, 156 struct synproxy_options *opts) 157 { 158 opts->tsecr = opts->tsval; 159 opts->tsval = tcp_clock_ms() & ~0x3f; 160 161 if (opts->options & NF_SYNPROXY_OPT_WSCALE) { 162 opts->tsval |= opts->wscale; 163 opts->wscale = info->wscale; 164 } else 165 opts->tsval |= 0xf; 166 167 if (opts->options & NF_SYNPROXY_OPT_SACK_PERM) 168 opts->tsval |= 1 << 4; 169 170 if (opts->options & NF_SYNPROXY_OPT_ECN) 171 opts->tsval |= 1 << 5; 172 } 173 EXPORT_SYMBOL_GPL(synproxy_init_timestamp_cookie); 174 175 static void 176 synproxy_check_timestamp_cookie(struct synproxy_options *opts) 177 { 178 opts->wscale = opts->tsecr & 0xf; 179 if (opts->wscale != 0xf) 180 opts->options |= NF_SYNPROXY_OPT_WSCALE; 181 182 opts->options |= opts->tsecr & (1 << 4) ? NF_SYNPROXY_OPT_SACK_PERM : 0; 183 184 opts->options |= opts->tsecr & (1 << 5) ? NF_SYNPROXY_OPT_ECN : 0; 185 } 186 187 static bool 188 synproxy_tstamp_adjust(struct sk_buff *skb, unsigned int protoff, 189 struct tcphdr *th, struct nf_conn *ct, 190 enum ip_conntrack_info ctinfo, 191 const struct nf_conn_synproxy *synproxy) 192 { 193 unsigned int optoff, optend; 194 u32 new, old; 195 196 if (synproxy->tsoff == 0) 197 return true; 198 199 optoff = protoff + sizeof(struct tcphdr); 200 optend = protoff + th->doff * 4; 201 202 if (skb_ensure_writable(skb, optend)) 203 return false; 204 205 th = (struct tcphdr *)(skb->data + protoff); 206 207 while (optoff < optend) { 208 unsigned char *op = skb->data + optoff; 209 210 switch (op[0]) { 211 case TCPOPT_EOL: 212 return true; 213 case TCPOPT_NOP: 214 optoff++; 215 continue; 216 default: 217 if (optoff + 1 == optend || 218 optoff + op[1] > optend || 219 op[1] < 2) 220 return true; 221 if (op[0] == TCPOPT_TIMESTAMP && 222 op[1] == TCPOLEN_TIMESTAMP) { 223 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) { 224 old = get_unaligned_be32(&op[2]); 225 new = old - synproxy->tsoff; 226 put_unaligned_be32(new, &op[2]); 227 } else { 228 old = get_unaligned_be32(&op[6]); 229 new = old + synproxy->tsoff; 230 put_unaligned_be32(new, &op[6]); 231 } 232 inet_proto_csum_replace4(&th->check, skb, 233 cpu_to_be32(old), 234 cpu_to_be32(new), false); 235 } 236 optoff += op[1]; 237 } 238 } 239 return true; 240 } 241 242 #ifdef CONFIG_PROC_FS 243 static void *synproxy_cpu_seq_start(struct seq_file *seq, loff_t *pos) 244 { 245 struct synproxy_net *snet = synproxy_pernet(seq_file_net(seq)); 246 int cpu; 247 248 if (*pos == 0) 249 return SEQ_START_TOKEN; 250 251 for (cpu = *pos - 1; cpu < nr_cpu_ids; cpu++) { 252 if (!cpu_possible(cpu)) 253 continue; 254 *pos = cpu + 1; 255 return per_cpu_ptr(snet->stats, cpu); 256 } 257 258 return NULL; 259 } 260 261 static void *synproxy_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) 262 { 263 struct synproxy_net *snet = synproxy_pernet(seq_file_net(seq)); 264 int cpu; 265 266 for (cpu = *pos; cpu < nr_cpu_ids; cpu++) { 267 if (!cpu_possible(cpu)) 268 continue; 269 *pos = cpu + 1; 270 return per_cpu_ptr(snet->stats, cpu); 271 } 272 (*pos)++; 273 return NULL; 274 } 275 276 static void synproxy_cpu_seq_stop(struct seq_file *seq, void *v) 277 { 278 return; 279 } 280 281 static int synproxy_cpu_seq_show(struct seq_file *seq, void *v) 282 { 283 struct synproxy_stats *stats = v; 284 285 if (v == SEQ_START_TOKEN) { 286 seq_puts(seq, "entries\t\tsyn_received\t" 287 "cookie_invalid\tcookie_valid\t" 288 "cookie_retrans\tconn_reopened\n"); 289 return 0; 290 } 291 292 seq_printf(seq, "%08x\t%08x\t%08x\t%08x\t%08x\t%08x\n", 0, 293 stats->syn_received, 294 stats->cookie_invalid, 295 stats->cookie_valid, 296 stats->cookie_retrans, 297 stats->conn_reopened); 298 299 return 0; 300 } 301 302 static const struct seq_operations synproxy_cpu_seq_ops = { 303 .start = synproxy_cpu_seq_start, 304 .next = synproxy_cpu_seq_next, 305 .stop = synproxy_cpu_seq_stop, 306 .show = synproxy_cpu_seq_show, 307 }; 308 309 static int __net_init synproxy_proc_init(struct net *net) 310 { 311 if (!proc_create_net("synproxy", 0444, net->proc_net_stat, 312 &synproxy_cpu_seq_ops, sizeof(struct seq_net_private))) 313 return -ENOMEM; 314 return 0; 315 } 316 317 static void __net_exit synproxy_proc_exit(struct net *net) 318 { 319 remove_proc_entry("synproxy", net->proc_net_stat); 320 } 321 #else 322 static int __net_init synproxy_proc_init(struct net *net) 323 { 324 return 0; 325 } 326 327 static void __net_exit synproxy_proc_exit(struct net *net) 328 { 329 return; 330 } 331 #endif /* CONFIG_PROC_FS */ 332 333 static int __net_init synproxy_net_init(struct net *net) 334 { 335 struct synproxy_net *snet = synproxy_pernet(net); 336 struct nf_conn *ct; 337 int err = -ENOMEM; 338 339 ct = nf_ct_tmpl_alloc(net, &nf_ct_zone_dflt, GFP_KERNEL); 340 if (!ct) 341 goto err1; 342 343 if (!nfct_seqadj_ext_add(ct)) 344 goto err2; 345 if (!nfct_synproxy_ext_add(ct)) 346 goto err2; 347 348 __set_bit(IPS_CONFIRMED_BIT, &ct->status); 349 snet->tmpl = ct; 350 351 snet->stats = alloc_percpu(struct synproxy_stats); 352 if (snet->stats == NULL) 353 goto err2; 354 355 err = synproxy_proc_init(net); 356 if (err < 0) 357 goto err3; 358 359 return 0; 360 361 err3: 362 free_percpu(snet->stats); 363 err2: 364 nf_ct_tmpl_free(ct); 365 err1: 366 return err; 367 } 368 369 static void __net_exit synproxy_net_exit(struct net *net) 370 { 371 struct synproxy_net *snet = synproxy_pernet(net); 372 373 nf_ct_put(snet->tmpl); 374 synproxy_proc_exit(net); 375 free_percpu(snet->stats); 376 } 377 378 static struct pernet_operations synproxy_net_ops = { 379 .init = synproxy_net_init, 380 .exit = synproxy_net_exit, 381 .id = &synproxy_net_id, 382 .size = sizeof(struct synproxy_net), 383 }; 384 385 static int __init synproxy_core_init(void) 386 { 387 return register_pernet_subsys(&synproxy_net_ops); 388 } 389 390 static void __exit synproxy_core_exit(void) 391 { 392 unregister_pernet_subsys(&synproxy_net_ops); 393 } 394 395 module_init(synproxy_core_init); 396 module_exit(synproxy_core_exit); 397 398 static struct iphdr * 399 synproxy_build_ip(struct net *net, struct sk_buff *skb, __be32 saddr, 400 __be32 daddr) 401 { 402 struct iphdr *iph; 403 404 skb_reset_network_header(skb); 405 iph = skb_put(skb, sizeof(*iph)); 406 iph->version = 4; 407 iph->ihl = sizeof(*iph) / 4; 408 iph->tos = 0; 409 iph->id = 0; 410 iph->frag_off = htons(IP_DF); 411 iph->ttl = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); 412 iph->protocol = IPPROTO_TCP; 413 iph->check = 0; 414 iph->saddr = saddr; 415 iph->daddr = daddr; 416 417 return iph; 418 } 419 420 static void 421 synproxy_send_tcp(struct net *net, 422 const struct sk_buff *skb, struct sk_buff *nskb, 423 struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo, 424 struct iphdr *niph, struct tcphdr *nth, 425 unsigned int tcp_hdr_size) 426 { 427 nth->check = ~tcp_v4_check(tcp_hdr_size, niph->saddr, niph->daddr, 0); 428 nskb->ip_summed = CHECKSUM_PARTIAL; 429 nskb->csum_start = (unsigned char *)nth - nskb->head; 430 nskb->csum_offset = offsetof(struct tcphdr, check); 431 432 skb_dst_set_noref(nskb, skb_dst(skb)); 433 nskb->protocol = htons(ETH_P_IP); 434 if (ip_route_me_harder(net, nskb->sk, nskb, RTN_UNSPEC)) 435 goto free_nskb; 436 437 if (nfct) { 438 nf_ct_set(nskb, (struct nf_conn *)nfct, ctinfo); 439 nf_conntrack_get(nfct); 440 } 441 442 ip_local_out(net, nskb->sk, nskb); 443 return; 444 445 free_nskb: 446 kfree_skb(nskb); 447 } 448 449 void 450 synproxy_send_client_synack(struct net *net, 451 const struct sk_buff *skb, const struct tcphdr *th, 452 const struct synproxy_options *opts) 453 { 454 struct sk_buff *nskb; 455 struct iphdr *iph, *niph; 456 struct tcphdr *nth; 457 unsigned int tcp_hdr_size; 458 u16 mss = opts->mss_encode; 459 460 iph = ip_hdr(skb); 461 462 tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts); 463 nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER, 464 GFP_ATOMIC); 465 if (!nskb) 466 return; 467 skb_reserve(nskb, MAX_TCP_HEADER); 468 469 niph = synproxy_build_ip(net, nskb, iph->daddr, iph->saddr); 470 471 skb_reset_transport_header(nskb); 472 nth = skb_put(nskb, tcp_hdr_size); 473 nth->source = th->dest; 474 nth->dest = th->source; 475 nth->seq = htonl(__cookie_v4_init_sequence(iph, th, &mss)); 476 nth->ack_seq = htonl(ntohl(th->seq) + 1); 477 tcp_flag_word(nth) = TCP_FLAG_SYN | TCP_FLAG_ACK; 478 if (opts->options & NF_SYNPROXY_OPT_ECN) 479 tcp_flag_word(nth) |= TCP_FLAG_ECE; 480 nth->doff = tcp_hdr_size / 4; 481 nth->window = 0; 482 nth->check = 0; 483 nth->urg_ptr = 0; 484 485 synproxy_build_options(nth, opts); 486 487 synproxy_send_tcp(net, skb, nskb, skb_nfct(skb), 488 IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); 489 } 490 EXPORT_SYMBOL_GPL(synproxy_send_client_synack); 491 492 static void 493 synproxy_send_server_syn(struct net *net, 494 const struct sk_buff *skb, const struct tcphdr *th, 495 const struct synproxy_options *opts, u32 recv_seq) 496 { 497 struct synproxy_net *snet = synproxy_pernet(net); 498 struct sk_buff *nskb; 499 struct iphdr *iph, *niph; 500 struct tcphdr *nth; 501 unsigned int tcp_hdr_size; 502 503 iph = ip_hdr(skb); 504 505 tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts); 506 nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER, 507 GFP_ATOMIC); 508 if (!nskb) 509 return; 510 skb_reserve(nskb, MAX_TCP_HEADER); 511 512 niph = synproxy_build_ip(net, nskb, iph->saddr, iph->daddr); 513 514 skb_reset_transport_header(nskb); 515 nth = skb_put(nskb, tcp_hdr_size); 516 nth->source = th->source; 517 nth->dest = th->dest; 518 nth->seq = htonl(recv_seq - 1); 519 /* ack_seq is used to relay our ISN to the synproxy hook to initialize 520 * sequence number translation once a connection tracking entry exists. 521 */ 522 nth->ack_seq = htonl(ntohl(th->ack_seq) - 1); 523 tcp_flag_word(nth) = TCP_FLAG_SYN; 524 if (opts->options & NF_SYNPROXY_OPT_ECN) 525 tcp_flag_word(nth) |= TCP_FLAG_ECE | TCP_FLAG_CWR; 526 nth->doff = tcp_hdr_size / 4; 527 nth->window = th->window; 528 nth->check = 0; 529 nth->urg_ptr = 0; 530 531 synproxy_build_options(nth, opts); 532 533 synproxy_send_tcp(net, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW, 534 niph, nth, tcp_hdr_size); 535 } 536 537 static void 538 synproxy_send_server_ack(struct net *net, 539 const struct ip_ct_tcp *state, 540 const struct sk_buff *skb, const struct tcphdr *th, 541 const struct synproxy_options *opts) 542 { 543 struct sk_buff *nskb; 544 struct iphdr *iph, *niph; 545 struct tcphdr *nth; 546 unsigned int tcp_hdr_size; 547 548 iph = ip_hdr(skb); 549 550 tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts); 551 nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER, 552 GFP_ATOMIC); 553 if (!nskb) 554 return; 555 skb_reserve(nskb, MAX_TCP_HEADER); 556 557 niph = synproxy_build_ip(net, nskb, iph->daddr, iph->saddr); 558 559 skb_reset_transport_header(nskb); 560 nth = skb_put(nskb, tcp_hdr_size); 561 nth->source = th->dest; 562 nth->dest = th->source; 563 nth->seq = htonl(ntohl(th->ack_seq)); 564 nth->ack_seq = htonl(ntohl(th->seq) + 1); 565 tcp_flag_word(nth) = TCP_FLAG_ACK; 566 nth->doff = tcp_hdr_size / 4; 567 nth->window = htons(state->seen[IP_CT_DIR_ORIGINAL].td_maxwin); 568 nth->check = 0; 569 nth->urg_ptr = 0; 570 571 synproxy_build_options(nth, opts); 572 573 synproxy_send_tcp(net, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); 574 } 575 576 static void 577 synproxy_send_client_ack(struct net *net, 578 const struct sk_buff *skb, const struct tcphdr *th, 579 const struct synproxy_options *opts) 580 { 581 struct sk_buff *nskb; 582 struct iphdr *iph, *niph; 583 struct tcphdr *nth; 584 unsigned int tcp_hdr_size; 585 586 iph = ip_hdr(skb); 587 588 tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts); 589 nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER, 590 GFP_ATOMIC); 591 if (!nskb) 592 return; 593 skb_reserve(nskb, MAX_TCP_HEADER); 594 595 niph = synproxy_build_ip(net, nskb, iph->saddr, iph->daddr); 596 597 skb_reset_transport_header(nskb); 598 nth = skb_put(nskb, tcp_hdr_size); 599 nth->source = th->source; 600 nth->dest = th->dest; 601 nth->seq = htonl(ntohl(th->seq) + 1); 602 nth->ack_seq = th->ack_seq; 603 tcp_flag_word(nth) = TCP_FLAG_ACK; 604 nth->doff = tcp_hdr_size / 4; 605 nth->window = htons(ntohs(th->window) >> opts->wscale); 606 nth->check = 0; 607 nth->urg_ptr = 0; 608 609 synproxy_build_options(nth, opts); 610 611 synproxy_send_tcp(net, skb, nskb, skb_nfct(skb), 612 IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); 613 } 614 615 bool 616 synproxy_recv_client_ack(struct net *net, 617 const struct sk_buff *skb, const struct tcphdr *th, 618 struct synproxy_options *opts, u32 recv_seq) 619 { 620 struct synproxy_net *snet = synproxy_pernet(net); 621 int mss; 622 623 mss = __cookie_v4_check(ip_hdr(skb), th); 624 if (mss == 0) { 625 this_cpu_inc(snet->stats->cookie_invalid); 626 return false; 627 } 628 629 this_cpu_inc(snet->stats->cookie_valid); 630 opts->mss_option = mss; 631 opts->options |= NF_SYNPROXY_OPT_MSS; 632 633 if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP) 634 synproxy_check_timestamp_cookie(opts); 635 636 synproxy_send_server_syn(net, skb, th, opts, recv_seq); 637 return true; 638 } 639 EXPORT_SYMBOL_GPL(synproxy_recv_client_ack); 640 641 unsigned int 642 ipv4_synproxy_hook(void *priv, struct sk_buff *skb, 643 const struct nf_hook_state *nhs) 644 { 645 struct net *net = nhs->net; 646 struct synproxy_net *snet = synproxy_pernet(net); 647 enum ip_conntrack_info ctinfo; 648 struct nf_conn *ct; 649 struct nf_conn_synproxy *synproxy; 650 struct synproxy_options opts = {}; 651 const struct ip_ct_tcp *state; 652 struct tcphdr *th, _th; 653 unsigned int thoff; 654 655 ct = nf_ct_get(skb, &ctinfo); 656 if (!ct) 657 return NF_ACCEPT; 658 659 synproxy = nfct_synproxy(ct); 660 if (!synproxy) 661 return NF_ACCEPT; 662 663 if (nf_is_loopback_packet(skb) || 664 ip_hdr(skb)->protocol != IPPROTO_TCP) 665 return NF_ACCEPT; 666 667 thoff = ip_hdrlen(skb); 668 th = skb_header_pointer(skb, thoff, sizeof(_th), &_th); 669 if (!th) 670 return NF_DROP; 671 672 state = &ct->proto.tcp; 673 switch (state->state) { 674 case TCP_CONNTRACK_CLOSE: 675 if (th->rst && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) { 676 nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - 677 ntohl(th->seq) + 1); 678 break; 679 } 680 681 if (!th->syn || th->ack || 682 CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) 683 break; 684 685 /* Reopened connection - reset the sequence number and timestamp 686 * adjustments, they will get initialized once the connection is 687 * reestablished. 688 */ 689 nf_ct_seqadj_init(ct, ctinfo, 0); 690 synproxy->tsoff = 0; 691 this_cpu_inc(snet->stats->conn_reopened); 692 fallthrough; 693 case TCP_CONNTRACK_SYN_SENT: 694 if (!synproxy_parse_options(skb, thoff, th, &opts)) 695 return NF_DROP; 696 697 if (!th->syn && th->ack && 698 CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { 699 /* Keep-Alives are sent with SEG.SEQ = SND.NXT-1, 700 * therefore we need to add 1 to make the SYN sequence 701 * number match the one of first SYN. 702 */ 703 if (synproxy_recv_client_ack(net, skb, th, &opts, 704 ntohl(th->seq) + 1)) { 705 this_cpu_inc(snet->stats->cookie_retrans); 706 consume_skb(skb); 707 return NF_STOLEN; 708 } else { 709 return NF_DROP; 710 } 711 } 712 713 synproxy->isn = ntohl(th->ack_seq); 714 if (opts.options & NF_SYNPROXY_OPT_TIMESTAMP) 715 synproxy->its = opts.tsecr; 716 717 nf_conntrack_event_cache(IPCT_SYNPROXY, ct); 718 break; 719 case TCP_CONNTRACK_SYN_RECV: 720 if (!th->syn || !th->ack) 721 break; 722 723 if (!synproxy_parse_options(skb, thoff, th, &opts)) 724 return NF_DROP; 725 726 if (opts.options & NF_SYNPROXY_OPT_TIMESTAMP) { 727 synproxy->tsoff = opts.tsval - synproxy->its; 728 nf_conntrack_event_cache(IPCT_SYNPROXY, ct); 729 } 730 731 opts.options &= ~(NF_SYNPROXY_OPT_MSS | 732 NF_SYNPROXY_OPT_WSCALE | 733 NF_SYNPROXY_OPT_SACK_PERM); 734 735 swap(opts.tsval, opts.tsecr); 736 synproxy_send_server_ack(net, state, skb, th, &opts); 737 738 nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq)); 739 nf_conntrack_event_cache(IPCT_SEQADJ, ct); 740 741 swap(opts.tsval, opts.tsecr); 742 synproxy_send_client_ack(net, skb, th, &opts); 743 744 consume_skb(skb); 745 return NF_STOLEN; 746 default: 747 break; 748 } 749 750 if (!synproxy_tstamp_adjust(skb, thoff, th, ct, ctinfo, synproxy)) 751 return NF_DROP_REASON(skb, SKB_DROP_REASON_NETFILTER_DROP, ENOMEM); 752 753 return NF_ACCEPT; 754 } 755 EXPORT_SYMBOL_GPL(ipv4_synproxy_hook); 756 757 static const struct nf_hook_ops ipv4_synproxy_ops[] = { 758 { 759 .hook = ipv4_synproxy_hook, 760 .pf = NFPROTO_IPV4, 761 .hooknum = NF_INET_LOCAL_IN, 762 .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, 763 }, 764 { 765 .hook = ipv4_synproxy_hook, 766 .pf = NFPROTO_IPV4, 767 .hooknum = NF_INET_POST_ROUTING, 768 .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, 769 }, 770 }; 771 772 int nf_synproxy_ipv4_init(struct synproxy_net *snet, struct net *net) 773 { 774 int err = 0; 775 776 mutex_lock(&synproxy_mutex); 777 if (snet->hook_ref4 == 0) { 778 err = nf_register_net_hooks(net, ipv4_synproxy_ops, 779 ARRAY_SIZE(ipv4_synproxy_ops)); 780 if (err) 781 goto out; 782 } 783 784 snet->hook_ref4++; 785 out: 786 mutex_unlock(&synproxy_mutex); 787 return err; 788 } 789 EXPORT_SYMBOL_GPL(nf_synproxy_ipv4_init); 790 791 void nf_synproxy_ipv4_fini(struct synproxy_net *snet, struct net *net) 792 { 793 mutex_lock(&synproxy_mutex); 794 snet->hook_ref4--; 795 if (snet->hook_ref4 == 0) 796 nf_unregister_net_hooks(net, ipv4_synproxy_ops, 797 ARRAY_SIZE(ipv4_synproxy_ops)); 798 mutex_unlock(&synproxy_mutex); 799 } 800 EXPORT_SYMBOL_GPL(nf_synproxy_ipv4_fini); 801 802 #if IS_ENABLED(CONFIG_IPV6) 803 static struct ipv6hdr * 804 synproxy_build_ip_ipv6(struct net *net, struct sk_buff *skb, 805 const struct in6_addr *saddr, 806 const struct in6_addr *daddr) 807 { 808 struct ipv6hdr *iph; 809 810 skb_reset_network_header(skb); 811 iph = skb_put(skb, sizeof(*iph)); 812 ip6_flow_hdr(iph, 0, 0); 813 iph->hop_limit = READ_ONCE(net->ipv6.devconf_all->hop_limit); 814 iph->nexthdr = IPPROTO_TCP; 815 iph->saddr = *saddr; 816 iph->daddr = *daddr; 817 818 return iph; 819 } 820 821 static void 822 synproxy_send_tcp_ipv6(struct net *net, 823 const struct sk_buff *skb, struct sk_buff *nskb, 824 struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo, 825 struct ipv6hdr *niph, struct tcphdr *nth, 826 unsigned int tcp_hdr_size) 827 { 828 struct dst_entry *dst; 829 struct flowi6 fl6; 830 int err; 831 832 nth->check = ~tcp_v6_check(tcp_hdr_size, &niph->saddr, &niph->daddr, 0); 833 nskb->ip_summed = CHECKSUM_PARTIAL; 834 nskb->csum_start = (unsigned char *)nth - nskb->head; 835 nskb->csum_offset = offsetof(struct tcphdr, check); 836 837 memset(&fl6, 0, sizeof(fl6)); 838 fl6.flowi6_proto = IPPROTO_TCP; 839 fl6.saddr = niph->saddr; 840 fl6.daddr = niph->daddr; 841 fl6.fl6_sport = nth->source; 842 fl6.fl6_dport = nth->dest; 843 security_skb_classify_flow((struct sk_buff *)skb, 844 flowi6_to_flowi_common(&fl6)); 845 err = nf_ip6_route(net, &dst, flowi6_to_flowi(&fl6), false); 846 if (err) { 847 goto free_nskb; 848 } 849 850 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); 851 if (IS_ERR(dst)) 852 goto free_nskb; 853 854 skb_dst_set(nskb, dst); 855 856 if (nfct) { 857 nf_ct_set(nskb, (struct nf_conn *)nfct, ctinfo); 858 nf_conntrack_get(nfct); 859 } 860 861 ip6_local_out(net, nskb->sk, nskb); 862 return; 863 864 free_nskb: 865 kfree_skb(nskb); 866 } 867 868 void 869 synproxy_send_client_synack_ipv6(struct net *net, 870 const struct sk_buff *skb, 871 const struct tcphdr *th, 872 const struct synproxy_options *opts) 873 { 874 struct sk_buff *nskb; 875 struct ipv6hdr *iph, *niph; 876 struct tcphdr *nth; 877 unsigned int tcp_hdr_size; 878 u16 mss = opts->mss_encode; 879 880 iph = ipv6_hdr(skb); 881 882 tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts); 883 nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER, 884 GFP_ATOMIC); 885 if (!nskb) 886 return; 887 skb_reserve(nskb, MAX_TCP_HEADER); 888 889 niph = synproxy_build_ip_ipv6(net, nskb, &iph->daddr, &iph->saddr); 890 891 skb_reset_transport_header(nskb); 892 nth = skb_put(nskb, tcp_hdr_size); 893 nth->source = th->dest; 894 nth->dest = th->source; 895 nth->seq = htonl(nf_ipv6_cookie_init_sequence(iph, th, &mss)); 896 nth->ack_seq = htonl(ntohl(th->seq) + 1); 897 tcp_flag_word(nth) = TCP_FLAG_SYN | TCP_FLAG_ACK; 898 if (opts->options & NF_SYNPROXY_OPT_ECN) 899 tcp_flag_word(nth) |= TCP_FLAG_ECE; 900 nth->doff = tcp_hdr_size / 4; 901 nth->window = 0; 902 nth->check = 0; 903 nth->urg_ptr = 0; 904 905 synproxy_build_options(nth, opts); 906 907 synproxy_send_tcp_ipv6(net, skb, nskb, skb_nfct(skb), 908 IP_CT_ESTABLISHED_REPLY, niph, nth, 909 tcp_hdr_size); 910 } 911 EXPORT_SYMBOL_GPL(synproxy_send_client_synack_ipv6); 912 913 static void 914 synproxy_send_server_syn_ipv6(struct net *net, const struct sk_buff *skb, 915 const struct tcphdr *th, 916 const struct synproxy_options *opts, u32 recv_seq) 917 { 918 struct synproxy_net *snet = synproxy_pernet(net); 919 struct sk_buff *nskb; 920 struct ipv6hdr *iph, *niph; 921 struct tcphdr *nth; 922 unsigned int tcp_hdr_size; 923 924 iph = ipv6_hdr(skb); 925 926 tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts); 927 nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER, 928 GFP_ATOMIC); 929 if (!nskb) 930 return; 931 skb_reserve(nskb, MAX_TCP_HEADER); 932 933 niph = synproxy_build_ip_ipv6(net, nskb, &iph->saddr, &iph->daddr); 934 935 skb_reset_transport_header(nskb); 936 nth = skb_put(nskb, tcp_hdr_size); 937 nth->source = th->source; 938 nth->dest = th->dest; 939 nth->seq = htonl(recv_seq - 1); 940 /* ack_seq is used to relay our ISN to the synproxy hook to initialize 941 * sequence number translation once a connection tracking entry exists. 942 */ 943 nth->ack_seq = htonl(ntohl(th->ack_seq) - 1); 944 tcp_flag_word(nth) = TCP_FLAG_SYN; 945 if (opts->options & NF_SYNPROXY_OPT_ECN) 946 tcp_flag_word(nth) |= TCP_FLAG_ECE | TCP_FLAG_CWR; 947 nth->doff = tcp_hdr_size / 4; 948 nth->window = th->window; 949 nth->check = 0; 950 nth->urg_ptr = 0; 951 952 synproxy_build_options(nth, opts); 953 954 synproxy_send_tcp_ipv6(net, skb, nskb, &snet->tmpl->ct_general, 955 IP_CT_NEW, niph, nth, tcp_hdr_size); 956 } 957 958 static void 959 synproxy_send_server_ack_ipv6(struct net *net, const struct ip_ct_tcp *state, 960 const struct sk_buff *skb, 961 const struct tcphdr *th, 962 const struct synproxy_options *opts) 963 { 964 struct sk_buff *nskb; 965 struct ipv6hdr *iph, *niph; 966 struct tcphdr *nth; 967 unsigned int tcp_hdr_size; 968 969 iph = ipv6_hdr(skb); 970 971 tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts); 972 nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER, 973 GFP_ATOMIC); 974 if (!nskb) 975 return; 976 skb_reserve(nskb, MAX_TCP_HEADER); 977 978 niph = synproxy_build_ip_ipv6(net, nskb, &iph->daddr, &iph->saddr); 979 980 skb_reset_transport_header(nskb); 981 nth = skb_put(nskb, tcp_hdr_size); 982 nth->source = th->dest; 983 nth->dest = th->source; 984 nth->seq = htonl(ntohl(th->ack_seq)); 985 nth->ack_seq = htonl(ntohl(th->seq) + 1); 986 tcp_flag_word(nth) = TCP_FLAG_ACK; 987 nth->doff = tcp_hdr_size / 4; 988 nth->window = htons(state->seen[IP_CT_DIR_ORIGINAL].td_maxwin); 989 nth->check = 0; 990 nth->urg_ptr = 0; 991 992 synproxy_build_options(nth, opts); 993 994 synproxy_send_tcp_ipv6(net, skb, nskb, NULL, 0, niph, nth, 995 tcp_hdr_size); 996 } 997 998 static void 999 synproxy_send_client_ack_ipv6(struct net *net, const struct sk_buff *skb, 1000 const struct tcphdr *th, 1001 const struct synproxy_options *opts) 1002 { 1003 struct sk_buff *nskb; 1004 struct ipv6hdr *iph, *niph; 1005 struct tcphdr *nth; 1006 unsigned int tcp_hdr_size; 1007 1008 iph = ipv6_hdr(skb); 1009 1010 tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts); 1011 nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER, 1012 GFP_ATOMIC); 1013 if (!nskb) 1014 return; 1015 skb_reserve(nskb, MAX_TCP_HEADER); 1016 1017 niph = synproxy_build_ip_ipv6(net, nskb, &iph->saddr, &iph->daddr); 1018 1019 skb_reset_transport_header(nskb); 1020 nth = skb_put(nskb, tcp_hdr_size); 1021 nth->source = th->source; 1022 nth->dest = th->dest; 1023 nth->seq = htonl(ntohl(th->seq) + 1); 1024 nth->ack_seq = th->ack_seq; 1025 tcp_flag_word(nth) = TCP_FLAG_ACK; 1026 nth->doff = tcp_hdr_size / 4; 1027 nth->window = htons(ntohs(th->window) >> opts->wscale); 1028 nth->check = 0; 1029 nth->urg_ptr = 0; 1030 1031 synproxy_build_options(nth, opts); 1032 1033 synproxy_send_tcp_ipv6(net, skb, nskb, skb_nfct(skb), 1034 IP_CT_ESTABLISHED_REPLY, niph, nth, 1035 tcp_hdr_size); 1036 } 1037 1038 bool 1039 synproxy_recv_client_ack_ipv6(struct net *net, 1040 const struct sk_buff *skb, 1041 const struct tcphdr *th, 1042 struct synproxy_options *opts, u32 recv_seq) 1043 { 1044 struct synproxy_net *snet = synproxy_pernet(net); 1045 int mss; 1046 1047 mss = nf_cookie_v6_check(ipv6_hdr(skb), th); 1048 if (mss == 0) { 1049 this_cpu_inc(snet->stats->cookie_invalid); 1050 return false; 1051 } 1052 1053 this_cpu_inc(snet->stats->cookie_valid); 1054 opts->mss_option = mss; 1055 opts->options |= NF_SYNPROXY_OPT_MSS; 1056 1057 if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP) 1058 synproxy_check_timestamp_cookie(opts); 1059 1060 synproxy_send_server_syn_ipv6(net, skb, th, opts, recv_seq); 1061 return true; 1062 } 1063 EXPORT_SYMBOL_GPL(synproxy_recv_client_ack_ipv6); 1064 1065 unsigned int 1066 ipv6_synproxy_hook(void *priv, struct sk_buff *skb, 1067 const struct nf_hook_state *nhs) 1068 { 1069 struct net *net = nhs->net; 1070 struct synproxy_net *snet = synproxy_pernet(net); 1071 enum ip_conntrack_info ctinfo; 1072 struct nf_conn *ct; 1073 struct nf_conn_synproxy *synproxy; 1074 struct synproxy_options opts = {}; 1075 const struct ip_ct_tcp *state; 1076 struct tcphdr *th, _th; 1077 __be16 frag_off; 1078 u8 nexthdr; 1079 int thoff; 1080 1081 ct = nf_ct_get(skb, &ctinfo); 1082 if (!ct) 1083 return NF_ACCEPT; 1084 1085 synproxy = nfct_synproxy(ct); 1086 if (!synproxy) 1087 return NF_ACCEPT; 1088 1089 if (nf_is_loopback_packet(skb)) 1090 return NF_ACCEPT; 1091 1092 nexthdr = ipv6_hdr(skb)->nexthdr; 1093 thoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, 1094 &frag_off); 1095 if (thoff < 0 || nexthdr != IPPROTO_TCP) 1096 return NF_ACCEPT; 1097 1098 th = skb_header_pointer(skb, thoff, sizeof(_th), &_th); 1099 if (!th) 1100 return NF_DROP; 1101 1102 state = &ct->proto.tcp; 1103 switch (state->state) { 1104 case TCP_CONNTRACK_CLOSE: 1105 if (th->rst && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) { 1106 nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - 1107 ntohl(th->seq) + 1); 1108 break; 1109 } 1110 1111 if (!th->syn || th->ack || 1112 CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) 1113 break; 1114 1115 /* Reopened connection - reset the sequence number and timestamp 1116 * adjustments, they will get initialized once the connection is 1117 * reestablished. 1118 */ 1119 nf_ct_seqadj_init(ct, ctinfo, 0); 1120 synproxy->tsoff = 0; 1121 this_cpu_inc(snet->stats->conn_reopened); 1122 fallthrough; 1123 case TCP_CONNTRACK_SYN_SENT: 1124 if (!synproxy_parse_options(skb, thoff, th, &opts)) 1125 return NF_DROP; 1126 1127 if (!th->syn && th->ack && 1128 CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { 1129 /* Keep-Alives are sent with SEG.SEQ = SND.NXT-1, 1130 * therefore we need to add 1 to make the SYN sequence 1131 * number match the one of first SYN. 1132 */ 1133 if (synproxy_recv_client_ack_ipv6(net, skb, th, &opts, 1134 ntohl(th->seq) + 1)) { 1135 this_cpu_inc(snet->stats->cookie_retrans); 1136 consume_skb(skb); 1137 return NF_STOLEN; 1138 } else { 1139 return NF_DROP; 1140 } 1141 } 1142 1143 synproxy->isn = ntohl(th->ack_seq); 1144 if (opts.options & NF_SYNPROXY_OPT_TIMESTAMP) 1145 synproxy->its = opts.tsecr; 1146 1147 nf_conntrack_event_cache(IPCT_SYNPROXY, ct); 1148 break; 1149 case TCP_CONNTRACK_SYN_RECV: 1150 if (!th->syn || !th->ack) 1151 break; 1152 1153 if (!synproxy_parse_options(skb, thoff, th, &opts)) 1154 return NF_DROP; 1155 1156 if (opts.options & NF_SYNPROXY_OPT_TIMESTAMP) { 1157 synproxy->tsoff = opts.tsval - synproxy->its; 1158 nf_conntrack_event_cache(IPCT_SYNPROXY, ct); 1159 } 1160 1161 opts.options &= ~(NF_SYNPROXY_OPT_MSS | 1162 NF_SYNPROXY_OPT_WSCALE | 1163 NF_SYNPROXY_OPT_SACK_PERM); 1164 1165 swap(opts.tsval, opts.tsecr); 1166 synproxy_send_server_ack_ipv6(net, state, skb, th, &opts); 1167 1168 nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq)); 1169 nf_conntrack_event_cache(IPCT_SEQADJ, ct); 1170 1171 swap(opts.tsval, opts.tsecr); 1172 synproxy_send_client_ack_ipv6(net, skb, th, &opts); 1173 1174 consume_skb(skb); 1175 return NF_STOLEN; 1176 default: 1177 break; 1178 } 1179 1180 if (!synproxy_tstamp_adjust(skb, thoff, th, ct, ctinfo, synproxy)) 1181 return NF_DROP_REASON(skb, SKB_DROP_REASON_NETFILTER_DROP, ENOMEM); 1182 1183 return NF_ACCEPT; 1184 } 1185 EXPORT_SYMBOL_GPL(ipv6_synproxy_hook); 1186 1187 static const struct nf_hook_ops ipv6_synproxy_ops[] = { 1188 { 1189 .hook = ipv6_synproxy_hook, 1190 .pf = NFPROTO_IPV6, 1191 .hooknum = NF_INET_LOCAL_IN, 1192 .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, 1193 }, 1194 { 1195 .hook = ipv6_synproxy_hook, 1196 .pf = NFPROTO_IPV6, 1197 .hooknum = NF_INET_POST_ROUTING, 1198 .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, 1199 }, 1200 }; 1201 1202 int 1203 nf_synproxy_ipv6_init(struct synproxy_net *snet, struct net *net) 1204 { 1205 int err = 0; 1206 1207 mutex_lock(&synproxy_mutex); 1208 if (snet->hook_ref6 == 0) { 1209 err = nf_register_net_hooks(net, ipv6_synproxy_ops, 1210 ARRAY_SIZE(ipv6_synproxy_ops)); 1211 if (err) 1212 goto out; 1213 } 1214 1215 snet->hook_ref6++; 1216 out: 1217 mutex_unlock(&synproxy_mutex); 1218 return err; 1219 } 1220 EXPORT_SYMBOL_GPL(nf_synproxy_ipv6_init); 1221 1222 void 1223 nf_synproxy_ipv6_fini(struct synproxy_net *snet, struct net *net) 1224 { 1225 mutex_lock(&synproxy_mutex); 1226 snet->hook_ref6--; 1227 if (snet->hook_ref6 == 0) 1228 nf_unregister_net_hooks(net, ipv6_synproxy_ops, 1229 ARRAY_SIZE(ipv6_synproxy_ops)); 1230 mutex_unlock(&synproxy_mutex); 1231 } 1232 EXPORT_SYMBOL_GPL(nf_synproxy_ipv6_fini); 1233 #endif /* CONFIG_IPV6 */ 1234 1235 MODULE_LICENSE("GPL"); 1236 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 1237 MODULE_DESCRIPTION("nftables SYNPROXY expression support"); 1238