1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TCP over IPv6 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on: 10 * linux/net/ipv4/tcp.c 11 * linux/net/ipv4/tcp_input.c 12 * linux/net/ipv4/tcp_output.c 13 * 14 * Fixes: 15 * Hideaki YOSHIFUJI : sin6_scope_id support 16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 18 * a single port at the same time. 19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file. 20 */ 21 22 #include <linux/bottom_half.h> 23 #include <linux/module.h> 24 #include <linux/errno.h> 25 #include <linux/types.h> 26 #include <linux/socket.h> 27 #include <linux/sockios.h> 28 #include <linux/net.h> 29 #include <linux/jiffies.h> 30 #include <linux/in.h> 31 #include <linux/in6.h> 32 #include <linux/netdevice.h> 33 #include <linux/init.h> 34 #include <linux/jhash.h> 35 #include <linux/ipsec.h> 36 #include <linux/times.h> 37 #include <linux/slab.h> 38 #include <linux/uaccess.h> 39 #include <linux/ipv6.h> 40 #include <linux/icmpv6.h> 41 #include <linux/random.h> 42 #include <linux/indirect_call_wrapper.h> 43 44 #include <net/aligned_data.h> 45 #include <net/tcp.h> 46 #include <net/ndisc.h> 47 #include <net/inet6_hashtables.h> 48 #include <net/inet6_connection_sock.h> 49 #include <net/ipv6.h> 50 #include <net/transp_v6.h> 51 #include <net/addrconf.h> 52 #include <net/ip6_route.h> 53 #include <net/ip6_checksum.h> 54 #include <net/inet_ecn.h> 55 #include <net/protocol.h> 56 #include <net/xfrm.h> 57 #include <net/snmp.h> 58 #include <net/dsfield.h> 59 #include <net/timewait_sock.h> 60 #include <net/inet_common.h> 61 #include <net/secure_seq.h> 62 #include <net/hotdata.h> 63 #include <net/busy_poll.h> 64 #include <net/rstreason.h> 65 #include <net/psp.h> 66 67 #include <linux/proc_fs.h> 68 #include <linux/seq_file.h> 69 70 #include <crypto/md5.h> 71 72 #include <trace/events/tcp.h> 73 74 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, 75 enum sk_rst_reason reason); 76 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 77 struct request_sock *req); 78 79 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); 80 81 static const struct inet_connection_sock_af_ops ipv6_mapped; 82 const struct inet_connection_sock_af_ops ipv6_specific; 83 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 84 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; 85 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; 86 #endif 87 88 /* Helper returning the inet6 address from a given tcp socket. 89 * It can be used in TCP stack instead of inet6_sk(sk). 90 * This avoids a dereference and allow compiler optimizations. 91 * It is a specialized version of inet6_sk_generic(). 92 */ 93 #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \ 94 struct tcp6_sock, tcp)->inet6) 95 96 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) 97 { 98 struct dst_entry *dst = skb_dst(skb); 99 100 if (dst && dst_hold_safe(dst)) { 101 rcu_assign_pointer(sk->sk_rx_dst, dst); 102 sk->sk_rx_dst_ifindex = skb->skb_iif; 103 sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst)); 104 } 105 } 106 107 static u32 tcp_v6_init_seq(const struct sk_buff *skb) 108 { 109 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32, 110 ipv6_hdr(skb)->saddr.s6_addr32, 111 tcp_hdr(skb)->dest, 112 tcp_hdr(skb)->source); 113 } 114 115 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb) 116 { 117 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, 118 ipv6_hdr(skb)->saddr.s6_addr32); 119 } 120 121 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr, 122 int addr_len) 123 { 124 /* This check is replicated from tcp_v6_connect() and intended to 125 * prevent BPF program called below from accessing bytes that are out 126 * of the bound specified by user in addr_len. 127 */ 128 if (addr_len < SIN6_LEN_RFC2133) 129 return -EINVAL; 130 131 sock_owned_by_me(sk); 132 133 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len); 134 } 135 136 static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr, 137 int addr_len) 138 { 139 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; 140 struct inet_connection_sock *icsk = inet_csk(sk); 141 struct inet_timewait_death_row *tcp_death_row; 142 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 143 struct in6_addr *saddr = NULL, *final_p; 144 struct inet_sock *inet = inet_sk(sk); 145 struct tcp_sock *tp = tcp_sk(sk); 146 struct net *net = sock_net(sk); 147 struct ipv6_txoptions *opt; 148 struct dst_entry *dst; 149 struct flowi6 *fl6; 150 int addr_type; 151 int err; 152 153 if (addr_len < SIN6_LEN_RFC2133) 154 return -EINVAL; 155 156 if (usin->sin6_family != AF_INET6) 157 return -EAFNOSUPPORT; 158 159 fl6 = &inet_sk(sk)->cork.fl.u.ip6; 160 memset(fl6, 0, sizeof(*fl6)); 161 162 if (inet6_test_bit(SNDFLOW, sk)) { 163 fl6->flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; 164 IP6_ECN_flow_init(fl6->flowlabel); 165 if (fl6->flowlabel & IPV6_FLOWLABEL_MASK) { 166 struct ip6_flowlabel *flowlabel; 167 flowlabel = fl6_sock_lookup(sk, fl6->flowlabel); 168 if (IS_ERR(flowlabel)) 169 return -EINVAL; 170 fl6_sock_release(flowlabel); 171 } 172 } 173 174 /* 175 * connect() to INADDR_ANY means loopback (BSD'ism). 176 */ 177 178 if (ipv6_addr_any(&usin->sin6_addr)) { 179 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) 180 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), 181 &usin->sin6_addr); 182 else 183 usin->sin6_addr = in6addr_loopback; 184 } 185 186 addr_type = ipv6_addr_type(&usin->sin6_addr); 187 188 if (addr_type & IPV6_ADDR_MULTICAST) 189 return -ENETUNREACH; 190 191 if (addr_type&IPV6_ADDR_LINKLOCAL) { 192 if (addr_len >= sizeof(struct sockaddr_in6) && 193 usin->sin6_scope_id) { 194 /* If interface is set while binding, indices 195 * must coincide. 196 */ 197 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) 198 return -EINVAL; 199 200 sk->sk_bound_dev_if = usin->sin6_scope_id; 201 } 202 203 /* Connect to link-local address requires an interface */ 204 if (!sk->sk_bound_dev_if) 205 return -EINVAL; 206 } 207 208 if (tp->rx_opt.ts_recent_stamp && 209 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { 210 tp->rx_opt.ts_recent = 0; 211 tp->rx_opt.ts_recent_stamp = 0; 212 WRITE_ONCE(tp->write_seq, 0); 213 } 214 215 sk->sk_v6_daddr = usin->sin6_addr; 216 np->flow_label = fl6->flowlabel; 217 218 /* 219 * TCP over IPv4 220 */ 221 222 if (addr_type & IPV6_ADDR_MAPPED) { 223 u32 exthdrlen = icsk->icsk_ext_hdr_len; 224 struct sockaddr_in sin; 225 226 if (ipv6_only_sock(sk)) 227 return -ENETUNREACH; 228 229 sin.sin_family = AF_INET; 230 sin.sin_port = usin->sin6_port; 231 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; 232 233 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ 234 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped); 235 if (sk_is_mptcp(sk)) 236 mptcpv6_handle_mapped(sk, true); 237 sk->sk_backlog_rcv = tcp_v4_do_rcv; 238 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 239 tp->af_specific = &tcp_sock_ipv6_mapped_specific; 240 #endif 241 242 err = tcp_v4_connect(sk, (struct sockaddr_unsized *)&sin, sizeof(sin)); 243 244 if (err) { 245 icsk->icsk_ext_hdr_len = exthdrlen; 246 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ 247 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific); 248 if (sk_is_mptcp(sk)) 249 mptcpv6_handle_mapped(sk, false); 250 sk->sk_backlog_rcv = tcp_v6_do_rcv; 251 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 252 tp->af_specific = &tcp_sock_ipv6_specific; 253 #endif 254 goto failure; 255 } 256 np->saddr = sk->sk_v6_rcv_saddr; 257 258 return err; 259 } 260 261 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 262 saddr = &sk->sk_v6_rcv_saddr; 263 264 fl6->flowi6_proto = IPPROTO_TCP; 265 fl6->daddr = sk->sk_v6_daddr; 266 fl6->saddr = saddr ? *saddr : np->saddr; 267 fl6->flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label); 268 fl6->flowi6_oif = sk->sk_bound_dev_if; 269 fl6->flowi6_mark = sk->sk_mark; 270 fl6->fl6_dport = usin->sin6_port; 271 fl6->fl6_sport = inet->inet_sport; 272 if (IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) && !fl6->fl6_sport) 273 fl6->flowi6_flags = FLOWI_FLAG_ANY_SPORT; 274 fl6->flowi6_uid = sk_uid(sk); 275 276 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); 277 final_p = fl6_update_dst(fl6, opt, &np->final); 278 279 security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); 280 281 dst = ip6_dst_lookup_flow(net, sk, fl6, final_p); 282 if (IS_ERR(dst)) { 283 err = PTR_ERR(dst); 284 goto failure; 285 } 286 287 tp->tcp_usec_ts = dst_tcp_usec_ts(dst); 288 tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; 289 290 if (!saddr) { 291 saddr = &fl6->saddr; 292 293 err = inet_bhash2_update_saddr(sk, saddr, AF_INET6); 294 if (err) 295 goto failure; 296 } 297 298 /* set the source address */ 299 np->saddr = *saddr; 300 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 301 302 sk->sk_gso_type = SKB_GSO_TCPV6; 303 ip6_dst_store(sk, dst, false, false); 304 305 icsk->icsk_ext_hdr_len = psp_sk_overhead(sk); 306 if (opt) 307 icsk->icsk_ext_hdr_len += opt->opt_flen + 308 opt->opt_nflen; 309 310 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); 311 312 inet->inet_dport = usin->sin6_port; 313 314 tcp_set_state(sk, TCP_SYN_SENT); 315 err = inet6_hash_connect(tcp_death_row, sk); 316 if (err) 317 goto late_failure; 318 319 sk_set_txhash(sk); 320 321 if (likely(!tp->repair)) { 322 if (!tp->write_seq) 323 WRITE_ONCE(tp->write_seq, 324 secure_tcpv6_seq(np->saddr.s6_addr32, 325 sk->sk_v6_daddr.s6_addr32, 326 inet->inet_sport, 327 inet->inet_dport)); 328 tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32, 329 sk->sk_v6_daddr.s6_addr32); 330 } 331 332 if (tcp_fastopen_defer_connect(sk, &err)) 333 return err; 334 if (err) 335 goto late_failure; 336 337 err = tcp_connect(sk); 338 if (err) 339 goto late_failure; 340 341 return 0; 342 343 late_failure: 344 tcp_set_state(sk, TCP_CLOSE); 345 inet_bhash2_reset_saddr(sk); 346 failure: 347 inet->inet_dport = 0; 348 sk->sk_route_caps = 0; 349 return err; 350 } 351 352 static struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu) 353 { 354 struct flowi6 *fl6 = &inet_sk(sk)->cork.fl.u.ip6; 355 struct dst_entry *dst; 356 357 dst = inet6_csk_route_socket(sk, fl6); 358 359 if (IS_ERR(dst)) 360 return NULL; 361 dst->ops->update_pmtu(dst, sk, NULL, mtu, true); 362 363 dst = inet6_csk_route_socket(sk, fl6); 364 return IS_ERR(dst) ? NULL : dst; 365 } 366 367 static void tcp_v6_mtu_reduced(struct sock *sk) 368 { 369 struct dst_entry *dst; 370 u32 mtu, dmtu; 371 372 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) 373 return; 374 375 mtu = READ_ONCE(tcp_sk(sk)->mtu_info); 376 377 /* Drop requests trying to increase our current mss. 378 * Check done in __ip6_rt_update_pmtu() is too late. 379 */ 380 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache) 381 return; 382 383 dst = inet6_csk_update_pmtu(sk, mtu); 384 if (!dst) 385 return; 386 387 dmtu = dst6_mtu(dst); 388 if (inet_csk(sk)->icsk_pmtu_cookie > dmtu) { 389 tcp_sync_mss(sk, dmtu); 390 tcp_simple_retransmit(sk); 391 } 392 } 393 394 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 395 u8 type, u8 code, int offset, __be32 info) 396 { 397 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; 398 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); 399 struct net *net = dev_net_rcu(skb->dev); 400 struct request_sock *fastopen; 401 struct ipv6_pinfo *np; 402 struct tcp_sock *tp; 403 __u32 seq, snd_una; 404 struct sock *sk; 405 bool fatal; 406 int err; 407 408 sk = __inet6_lookup_established(net, &hdr->daddr, th->dest, 409 &hdr->saddr, ntohs(th->source), 410 skb->dev->ifindex, inet6_sdif(skb)); 411 412 if (!sk) { 413 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), 414 ICMP6_MIB_INERRORS); 415 return -ENOENT; 416 } 417 418 if (sk->sk_state == TCP_TIME_WAIT) { 419 /* To increase the counter of ignored icmps for TCP-AO */ 420 tcp_ao_ignore_icmp(sk, AF_INET6, type, code); 421 inet_twsk_put(inet_twsk(sk)); 422 return 0; 423 } 424 seq = ntohl(th->seq); 425 fatal = icmpv6_err_convert(type, code, &err); 426 if (sk->sk_state == TCP_NEW_SYN_RECV) { 427 tcp_req_err(sk, seq, fatal); 428 return 0; 429 } 430 431 if (tcp_ao_ignore_icmp(sk, AF_INET6, type, code)) { 432 sock_put(sk); 433 return 0; 434 } 435 436 bh_lock_sock(sk); 437 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) 438 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); 439 440 if (sk->sk_state == TCP_CLOSE) 441 goto out; 442 443 if (static_branch_unlikely(&ip6_min_hopcount)) { 444 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 445 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { 446 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 447 goto out; 448 } 449 } 450 451 tp = tcp_sk(sk); 452 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ 453 fastopen = rcu_dereference(tp->fastopen_rsk); 454 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 455 if (sk->sk_state != TCP_LISTEN && 456 !between(seq, snd_una, tp->snd_nxt)) { 457 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); 458 goto out; 459 } 460 461 np = tcp_inet6_sk(sk); 462 463 if (type == NDISC_REDIRECT) { 464 if (!sock_owned_by_user(sk)) { 465 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); 466 467 if (dst) 468 dst->ops->redirect(dst, sk, skb); 469 } 470 goto out; 471 } 472 473 if (type == ICMPV6_PKT_TOOBIG) { 474 u32 mtu = ntohl(info); 475 476 /* We are not interested in TCP_LISTEN and open_requests 477 * (SYN-ACKs send out by Linux are always <576bytes so 478 * they should go through unfragmented). 479 */ 480 if (sk->sk_state == TCP_LISTEN) 481 goto out; 482 483 if (!ip6_sk_accept_pmtu(sk)) 484 goto out; 485 486 if (mtu < IPV6_MIN_MTU) 487 goto out; 488 489 WRITE_ONCE(tp->mtu_info, mtu); 490 491 if (!sock_owned_by_user(sk)) 492 tcp_v6_mtu_reduced(sk); 493 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, 494 &sk->sk_tsq_flags)) 495 sock_hold(sk); 496 goto out; 497 } 498 499 500 /* Might be for an request_sock */ 501 switch (sk->sk_state) { 502 case TCP_SYN_SENT: 503 case TCP_SYN_RECV: 504 /* Only in fast or simultaneous open. If a fast open socket is 505 * already accepted it is treated as a connected one below. 506 */ 507 if (fastopen && !fastopen->sk) 508 break; 509 510 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); 511 512 if (!sock_owned_by_user(sk)) 513 tcp_done_with_error(sk, err); 514 else 515 WRITE_ONCE(sk->sk_err_soft, err); 516 goto out; 517 case TCP_LISTEN: 518 break; 519 default: 520 /* check if this ICMP message allows revert of backoff. 521 * (see RFC 6069) 522 */ 523 if (!fastopen && type == ICMPV6_DEST_UNREACH && 524 code == ICMPV6_NOROUTE) 525 tcp_ld_RTO_revert(sk, seq); 526 } 527 528 if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { 529 WRITE_ONCE(sk->sk_err, err); 530 sk_error_report(sk); 531 } else { 532 WRITE_ONCE(sk->sk_err_soft, err); 533 } 534 out: 535 bh_unlock_sock(sk); 536 sock_put(sk); 537 return 0; 538 } 539 540 541 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, 542 struct flowi *fl, 543 struct request_sock *req, 544 struct tcp_fastopen_cookie *foc, 545 enum tcp_synack_type synack_type, 546 struct sk_buff *syn_skb) 547 { 548 struct inet_request_sock *ireq = inet_rsk(req); 549 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 550 struct ipv6_txoptions *opt; 551 struct flowi6 *fl6 = &fl->u.ip6; 552 struct sk_buff *skb; 553 int err = -ENOMEM; 554 u8 tclass; 555 556 /* First, grab a route. */ 557 if (!dst && (dst = inet6_csk_route_req(sk, NULL, fl6, req, 558 IPPROTO_TCP)) == NULL) 559 goto done; 560 561 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); 562 563 if (skb) { 564 tcp_rsk(req)->syn_ect_snt = np->tclass & INET_ECN_MASK; 565 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, 566 &ireq->ir_v6_rmt_addr); 567 568 fl6->daddr = ireq->ir_v6_rmt_addr; 569 if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts) 570 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); 571 572 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? 573 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | 574 (np->tclass & INET_ECN_MASK) : 575 np->tclass; 576 577 if (!INET_ECN_is_capable(tclass) && 578 tcp_bpf_ca_needs_ecn((struct sock *)req)) 579 tclass |= INET_ECN_ECT_0; 580 581 rcu_read_lock(); 582 opt = ireq->ipv6_opt; 583 if (!opt) 584 opt = rcu_dereference(np->opt); 585 err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark), 586 opt, tclass, READ_ONCE(sk->sk_priority)); 587 rcu_read_unlock(); 588 err = net_xmit_eval(err); 589 } 590 591 done: 592 return err; 593 } 594 595 596 static void tcp_v6_reqsk_destructor(struct request_sock *req) 597 { 598 kfree(inet_rsk(req)->ipv6_opt); 599 consume_skb(inet_rsk(req)->pktopts); 600 } 601 602 #ifdef CONFIG_TCP_MD5SIG 603 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 604 const struct in6_addr *addr, 605 int l3index) 606 { 607 return tcp_md5_do_lookup(sk, l3index, 608 (union tcp_md5_addr *)addr, AF_INET6); 609 } 610 611 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk, 612 const struct sock *addr_sk) 613 { 614 int l3index; 615 616 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), 617 addr_sk->sk_bound_dev_if); 618 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr, 619 l3index); 620 } 621 622 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, 623 sockptr_t optval, int optlen) 624 { 625 struct tcp_md5sig cmd; 626 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; 627 union tcp_ao_addr *addr; 628 int l3index = 0; 629 u8 prefixlen; 630 bool l3flag; 631 u8 flags; 632 633 if (optlen < sizeof(cmd)) 634 return -EINVAL; 635 636 if (copy_from_sockptr(&cmd, optval, sizeof(cmd))) 637 return -EFAULT; 638 639 if (sin6->sin6_family != AF_INET6) 640 return -EINVAL; 641 642 flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 643 l3flag = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 644 645 if (optname == TCP_MD5SIG_EXT && 646 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { 647 prefixlen = cmd.tcpm_prefixlen; 648 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) && 649 prefixlen > 32)) 650 return -EINVAL; 651 } else { 652 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128; 653 } 654 655 if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex && 656 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { 657 struct net_device *dev; 658 659 rcu_read_lock(); 660 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex); 661 if (dev && netif_is_l3_master(dev)) 662 l3index = dev->ifindex; 663 rcu_read_unlock(); 664 665 /* ok to reference set/not set outside of rcu; 666 * right now device MUST be an L3 master 667 */ 668 if (!dev || !l3index) 669 return -EINVAL; 670 } 671 672 if (!cmd.tcpm_keylen) { 673 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 674 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 675 AF_INET, prefixlen, 676 l3index, flags); 677 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 678 AF_INET6, prefixlen, l3index, flags); 679 } 680 681 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 682 return -EINVAL; 683 684 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) { 685 addr = (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3]; 686 687 /* Don't allow keys for peers that have a matching TCP-AO key. 688 * See the comment in tcp_ao_add_cmd() 689 */ 690 if (tcp_ao_required(sk, addr, AF_INET, 691 l3flag ? l3index : -1, false)) 692 return -EKEYREJECTED; 693 return tcp_md5_do_add(sk, addr, 694 AF_INET, prefixlen, l3index, flags, 695 cmd.tcpm_key, cmd.tcpm_keylen); 696 } 697 698 addr = (union tcp_md5_addr *)&sin6->sin6_addr; 699 700 /* Don't allow keys for peers that have a matching TCP-AO key. 701 * See the comment in tcp_ao_add_cmd() 702 */ 703 if (tcp_ao_required(sk, addr, AF_INET6, l3flag ? l3index : -1, false)) 704 return -EKEYREJECTED; 705 706 return tcp_md5_do_add(sk, addr, AF_INET6, prefixlen, l3index, flags, 707 cmd.tcpm_key, cmd.tcpm_keylen); 708 } 709 710 static void tcp_v6_md5_hash_headers(struct md5_ctx *ctx, 711 const struct in6_addr *daddr, 712 const struct in6_addr *saddr, 713 const struct tcphdr *th, int nbytes) 714 { 715 struct { 716 struct tcp6_pseudohdr ip; /* TCP pseudo-header (RFC2460) */ 717 struct tcphdr tcp; 718 } h; 719 720 h.ip.saddr = *saddr; 721 h.ip.daddr = *daddr; 722 h.ip.protocol = cpu_to_be32(IPPROTO_TCP); 723 h.ip.len = cpu_to_be32(nbytes); 724 h.tcp = *th; 725 h.tcp.check = 0; 726 md5_update(ctx, (const u8 *)&h, sizeof(h.ip) + sizeof(h.tcp)); 727 } 728 729 static noinline_for_stack void 730 tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 731 const struct in6_addr *daddr, struct in6_addr *saddr, 732 const struct tcphdr *th) 733 { 734 struct md5_ctx ctx; 735 736 md5_init(&ctx); 737 tcp_v6_md5_hash_headers(&ctx, daddr, saddr, th, th->doff << 2); 738 tcp_md5_hash_key(&ctx, key); 739 md5_final(&ctx, md5_hash); 740 } 741 742 static noinline_for_stack void 743 tcp_v6_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, 744 const struct sock *sk, const struct sk_buff *skb) 745 { 746 const struct tcphdr *th = tcp_hdr(skb); 747 const struct in6_addr *saddr, *daddr; 748 struct md5_ctx ctx; 749 750 if (sk) { /* valid for establish/request sockets */ 751 saddr = &sk->sk_v6_rcv_saddr; 752 daddr = &sk->sk_v6_daddr; 753 } else { 754 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 755 saddr = &ip6h->saddr; 756 daddr = &ip6h->daddr; 757 } 758 759 md5_init(&ctx); 760 tcp_v6_md5_hash_headers(&ctx, daddr, saddr, th, skb->len); 761 tcp_md5_hash_skb_data(&ctx, skb, th->doff << 2); 762 tcp_md5_hash_key(&ctx, key); 763 md5_final(&ctx, md5_hash); 764 } 765 #endif 766 767 static void tcp_v6_init_req(struct request_sock *req, 768 const struct sock *sk_listener, 769 struct sk_buff *skb, 770 u32 tw_isn) 771 { 772 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); 773 struct inet_request_sock *ireq = inet_rsk(req); 774 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener); 775 776 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; 777 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; 778 ireq->ir_rmt_addr = LOOPBACK4_IPV6; 779 ireq->ir_loc_addr = LOOPBACK4_IPV6; 780 781 /* So that link locals have meaning */ 782 if ((!sk_listener->sk_bound_dev_if || l3_slave) && 783 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) 784 ireq->ir_iif = tcp_v6_iif(skb); 785 786 if (!tw_isn && 787 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || 788 np->rxopt.bits.rxinfo || 789 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || 790 np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) { 791 refcount_inc(&skb->users); 792 ireq->pktopts = skb; 793 } 794 } 795 796 static struct dst_entry *tcp_v6_route_req(const struct sock *sk, 797 struct sk_buff *skb, 798 struct flowi *fl, 799 struct request_sock *req, 800 u32 tw_isn) 801 { 802 tcp_v6_init_req(req, sk, skb, tw_isn); 803 804 if (security_inet_conn_request(sk, skb, req)) 805 return NULL; 806 807 return inet6_csk_route_req(sk, NULL, &fl->u.ip6, req, IPPROTO_TCP); 808 } 809 810 struct request_sock_ops tcp6_request_sock_ops __read_mostly = { 811 .family = AF_INET6, 812 .obj_size = sizeof(struct tcp6_request_sock), 813 .send_ack = tcp_v6_reqsk_send_ack, 814 .destructor = tcp_v6_reqsk_destructor, 815 .send_reset = tcp_v6_send_reset, 816 }; 817 818 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { 819 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - 820 sizeof(struct ipv6hdr), 821 #ifdef CONFIG_TCP_MD5SIG 822 .req_md5_lookup = tcp_v6_md5_lookup, 823 .calc_md5_hash = tcp_v6_md5_hash_skb, 824 #endif 825 #ifdef CONFIG_TCP_AO 826 .ao_lookup = tcp_v6_ao_lookup_rsk, 827 .ao_calc_key = tcp_v6_ao_calc_key_rsk, 828 .ao_synack_hash = tcp_v6_ao_synack_hash, 829 #endif 830 #ifdef CONFIG_SYN_COOKIES 831 .cookie_init_seq = cookie_v6_init_sequence, 832 #endif 833 .route_req = tcp_v6_route_req, 834 .init_seq = tcp_v6_init_seq, 835 .init_ts_off = tcp_v6_init_ts_off, 836 .send_synack = tcp_v6_send_synack, 837 }; 838 839 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, 840 u32 ack, u32 win, u32 tsval, u32 tsecr, 841 int oif, int rst, u8 tclass, __be32 label, 842 u32 priority, u32 txhash, struct tcp_key *key) 843 { 844 struct net *net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb); 845 unsigned int tot_len = sizeof(struct tcphdr); 846 struct sock *ctl_sk = net->ipv6.tcp_sk; 847 const struct tcphdr *th = tcp_hdr(skb); 848 __be32 mrst = 0, *topt; 849 struct dst_entry *dst; 850 struct sk_buff *buff; 851 struct tcphdr *t1; 852 struct flowi6 fl6; 853 u32 mark = 0; 854 855 if (tsecr) 856 tot_len += TCPOLEN_TSTAMP_ALIGNED; 857 if (tcp_key_is_md5(key)) 858 tot_len += TCPOLEN_MD5SIG_ALIGNED; 859 if (tcp_key_is_ao(key)) 860 tot_len += tcp_ao_len_aligned(key->ao_key); 861 862 #ifdef CONFIG_MPTCP 863 if (rst && !tcp_key_is_md5(key)) { 864 mrst = mptcp_reset_option(skb); 865 866 if (mrst) 867 tot_len += sizeof(__be32); 868 } 869 #endif 870 871 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); 872 if (!buff) 873 return; 874 875 skb_reserve(buff, MAX_TCP_HEADER); 876 877 t1 = skb_push(buff, tot_len); 878 skb_reset_transport_header(buff); 879 880 /* Swap the send and the receive. */ 881 memset(t1, 0, sizeof(*t1)); 882 t1->dest = th->source; 883 t1->source = th->dest; 884 t1->doff = tot_len / 4; 885 t1->seq = htonl(seq); 886 t1->ack_seq = htonl(ack); 887 t1->ack = !rst || !th->ack; 888 t1->rst = rst; 889 t1->window = htons(win); 890 891 topt = (__be32 *)(t1 + 1); 892 893 if (tsecr) { 894 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 895 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); 896 *topt++ = htonl(tsval); 897 *topt++ = htonl(tsecr); 898 } 899 900 if (mrst) 901 *topt++ = mrst; 902 903 #ifdef CONFIG_TCP_MD5SIG 904 if (tcp_key_is_md5(key)) { 905 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 906 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); 907 tcp_v6_md5_hash_hdr((__u8 *)topt, key->md5_key, 908 &ipv6_hdr(skb)->saddr, 909 &ipv6_hdr(skb)->daddr, t1); 910 } 911 #endif 912 #ifdef CONFIG_TCP_AO 913 if (tcp_key_is_ao(key)) { 914 *topt++ = htonl((TCPOPT_AO << 24) | 915 (tcp_ao_len(key->ao_key) << 16) | 916 (key->ao_key->sndid << 8) | 917 (key->rcv_next)); 918 919 tcp_ao_hash_hdr(AF_INET6, (char *)topt, key->ao_key, 920 key->traffic_key, 921 (union tcp_ao_addr *)&ipv6_hdr(skb)->saddr, 922 (union tcp_ao_addr *)&ipv6_hdr(skb)->daddr, 923 t1, key->sne); 924 } 925 #endif 926 927 memset(&fl6, 0, sizeof(fl6)); 928 fl6.daddr = ipv6_hdr(skb)->saddr; 929 fl6.saddr = ipv6_hdr(skb)->daddr; 930 fl6.flowlabel = label; 931 932 buff->ip_summed = CHECKSUM_PARTIAL; 933 934 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); 935 936 fl6.flowi6_proto = IPPROTO_TCP; 937 if (rt6_need_strict(&fl6.daddr) && !oif) 938 fl6.flowi6_oif = tcp_v6_iif(skb); 939 else { 940 if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) 941 oif = skb->skb_iif; 942 943 fl6.flowi6_oif = oif; 944 } 945 946 if (sk) { 947 /* unconstify the socket only to attach it to buff with care. */ 948 skb_set_owner_edemux(buff, (struct sock *)sk); 949 psp_reply_set_decrypted(sk, buff); 950 951 if (sk->sk_state == TCP_TIME_WAIT) 952 mark = inet_twsk(sk)->tw_mark; 953 else 954 mark = READ_ONCE(sk->sk_mark); 955 skb_set_delivery_time(buff, tcp_transmit_time(sk), SKB_CLOCK_MONOTONIC); 956 } 957 if (txhash) { 958 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */ 959 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4); 960 } 961 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark; 962 fl6.fl6_dport = t1->dest; 963 fl6.fl6_sport = t1->source; 964 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); 965 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); 966 967 /* Pass a socket to ip6_dst_lookup either it is for RST 968 * Underlying function will use this to retrieve the network 969 * namespace 970 */ 971 if (sk && sk->sk_state != TCP_TIME_WAIT) 972 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/ 973 else 974 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL); 975 if (!IS_ERR(dst)) { 976 skb_dst_set(buff, dst); 977 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, 978 tclass, priority); 979 TCP_INC_STATS(net, TCP_MIB_OUTSEGS); 980 if (rst) 981 TCP_INC_STATS(net, TCP_MIB_OUTRSTS); 982 return; 983 } 984 985 kfree_skb(buff); 986 } 987 988 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, 989 enum sk_rst_reason reason) 990 { 991 const struct tcphdr *th = tcp_hdr(skb); 992 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 993 const __u8 *md5_hash_location = NULL; 994 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 995 bool allocated_traffic_key = false; 996 #endif 997 const struct tcp_ao_hdr *aoh; 998 struct tcp_key key = {}; 999 u32 seq = 0, ack_seq = 0; 1000 __be32 label = 0; 1001 u32 priority = 0; 1002 struct net *net; 1003 u32 txhash = 0; 1004 int oif = 0; 1005 #ifdef CONFIG_TCP_MD5SIG 1006 unsigned char newhash[16]; 1007 struct sock *sk1 = NULL; 1008 #endif 1009 1010 if (th->rst) 1011 return; 1012 1013 /* If sk not NULL, it means we did a successful lookup and incoming 1014 * route had to be correct. prequeue might have dropped our dst. 1015 */ 1016 if (!sk && !ipv6_unicast_destination(skb)) 1017 return; 1018 1019 net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb); 1020 /* Invalid TCP option size or twice included auth */ 1021 if (tcp_parse_auth_options(th, &md5_hash_location, &aoh)) 1022 return; 1023 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1024 rcu_read_lock(); 1025 #endif 1026 #ifdef CONFIG_TCP_MD5SIG 1027 if (sk && sk_fullsock(sk)) { 1028 int l3index; 1029 1030 /* sdif set, means packet ingressed via a device 1031 * in an L3 domain and inet_iif is set to it. 1032 */ 1033 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1034 key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); 1035 if (key.md5_key) 1036 key.type = TCP_KEY_MD5; 1037 } else if (md5_hash_location) { 1038 int dif = tcp_v6_iif_l3_slave(skb); 1039 int sdif = tcp_v6_sdif(skb); 1040 int l3index; 1041 1042 /* 1043 * active side is lost. Try to find listening socket through 1044 * source port, and then find md5 key through listening socket. 1045 * we are not loose security here: 1046 * Incoming packet is checked with md5 hash with finding key, 1047 * no RST generated if md5 hash doesn't match. 1048 */ 1049 sk1 = inet6_lookup_listener(net, NULL, 0, &ipv6h->saddr, th->source, 1050 &ipv6h->daddr, ntohs(th->source), 1051 dif, sdif); 1052 if (!sk1) 1053 goto out; 1054 1055 /* sdif set, means packet ingressed via a device 1056 * in an L3 domain and dif is set to it. 1057 */ 1058 l3index = tcp_v6_sdif(skb) ? dif : 0; 1059 1060 key.md5_key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); 1061 if (!key.md5_key) 1062 goto out; 1063 key.type = TCP_KEY_MD5; 1064 1065 tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb); 1066 if (memcmp(md5_hash_location, newhash, 16) != 0) 1067 goto out; 1068 } 1069 #endif 1070 1071 if (th->ack) 1072 seq = ntohl(th->ack_seq); 1073 else 1074 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - 1075 (th->doff << 2); 1076 1077 #ifdef CONFIG_TCP_AO 1078 if (aoh) { 1079 int l3index; 1080 1081 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1082 if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, seq, 1083 &key.ao_key, &key.traffic_key, 1084 &allocated_traffic_key, 1085 &key.rcv_next, &key.sne)) 1086 goto out; 1087 key.type = TCP_KEY_AO; 1088 } 1089 #endif 1090 1091 if (sk) { 1092 oif = sk->sk_bound_dev_if; 1093 if (sk_fullsock(sk)) { 1094 if (inet6_test_bit(REPFLOW, sk)) 1095 label = ip6_flowlabel(ipv6h); 1096 priority = READ_ONCE(sk->sk_priority); 1097 txhash = sk->sk_txhash; 1098 } 1099 if (sk->sk_state == TCP_TIME_WAIT) { 1100 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel); 1101 priority = inet_twsk(sk)->tw_priority; 1102 txhash = inet_twsk(sk)->tw_txhash; 1103 } 1104 } else { 1105 if (READ_ONCE(net->ipv6.sysctl.flowlabel_reflect) & 1106 FLOWLABEL_REFLECT_TCP_RESET) 1107 label = ip6_flowlabel(ipv6h); 1108 } 1109 1110 trace_tcp_send_reset(sk, skb, reason); 1111 1112 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1, 1113 ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK, 1114 label, priority, txhash, 1115 &key); 1116 1117 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1118 out: 1119 if (allocated_traffic_key) 1120 kfree(key.traffic_key); 1121 rcu_read_unlock(); 1122 #endif 1123 } 1124 1125 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, 1126 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, 1127 struct tcp_key *key, u8 tclass, 1128 __be32 label, u32 priority, u32 txhash) 1129 { 1130 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, 0, 1131 tclass, label, priority, txhash, key); 1132 } 1133 1134 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb, 1135 enum tcp_tw_status tw_status) 1136 { 1137 struct inet_timewait_sock *tw = inet_twsk(sk); 1138 struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 1139 u8 tclass = tw->tw_tclass; 1140 struct tcp_key key = {}; 1141 1142 if (tw_status == TCP_TW_ACK_OOW) 1143 tclass &= ~INET_ECN_MASK; 1144 #ifdef CONFIG_TCP_AO 1145 struct tcp_ao_info *ao_info; 1146 1147 if (static_branch_unlikely(&tcp_ao_needed.key)) { 1148 1149 /* FIXME: the segment to-be-acked is not verified yet */ 1150 ao_info = rcu_dereference(tcptw->ao_info); 1151 if (ao_info) { 1152 const struct tcp_ao_hdr *aoh; 1153 1154 /* Invalid TCP option size or twice included auth */ 1155 if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) 1156 goto out; 1157 if (aoh) 1158 key.ao_key = tcp_ao_established_key(sk, ao_info, 1159 aoh->rnext_keyid, -1); 1160 } 1161 } 1162 if (key.ao_key) { 1163 struct tcp_ao_key *rnext_key; 1164 1165 key.traffic_key = snd_other_key(key.ao_key); 1166 /* rcv_next switches to our rcv_next */ 1167 rnext_key = READ_ONCE(ao_info->rnext_key); 1168 key.rcv_next = rnext_key->rcvid; 1169 key.sne = READ_ONCE(ao_info->snd_sne); 1170 key.type = TCP_KEY_AO; 1171 #else 1172 if (0) { 1173 #endif 1174 #ifdef CONFIG_TCP_MD5SIG 1175 } else if (static_branch_unlikely(&tcp_md5_needed.key)) { 1176 key.md5_key = tcp_twsk_md5_key(tcptw); 1177 if (key.md5_key) 1178 key.type = TCP_KEY_MD5; 1179 #endif 1180 } 1181 1182 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, 1183 READ_ONCE(tcptw->tw_rcv_nxt), 1184 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 1185 tcp_tw_tsval(tcptw), 1186 READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if, 1187 &key, tclass, cpu_to_be32(tw->tw_flowlabel), 1188 tw->tw_priority, tw->tw_txhash); 1189 1190 #ifdef CONFIG_TCP_AO 1191 out: 1192 #endif 1193 inet_twsk_put(tw); 1194 } 1195 1196 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 1197 struct request_sock *req) 1198 { 1199 struct tcp_key key = {}; 1200 1201 #ifdef CONFIG_TCP_AO 1202 if (static_branch_unlikely(&tcp_ao_needed.key) && 1203 tcp_rsk_used_ao(req)) { 1204 const struct in6_addr *addr = &ipv6_hdr(skb)->saddr; 1205 const struct tcp_ao_hdr *aoh; 1206 int l3index; 1207 1208 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1209 /* Invalid TCP option size or twice included auth */ 1210 if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) 1211 return; 1212 if (!aoh) 1213 return; 1214 key.ao_key = tcp_ao_do_lookup(sk, l3index, 1215 (union tcp_ao_addr *)addr, 1216 AF_INET6, aoh->rnext_keyid, -1); 1217 if (unlikely(!key.ao_key)) { 1218 /* Send ACK with any matching MKT for the peer */ 1219 key.ao_key = tcp_ao_do_lookup(sk, l3index, 1220 (union tcp_ao_addr *)addr, 1221 AF_INET6, -1, -1); 1222 /* Matching key disappeared (user removed the key?) 1223 * let the handshake timeout. 1224 */ 1225 if (!key.ao_key) { 1226 net_info_ratelimited("TCP-AO key for (%pI6, %d)->(%pI6, %d) suddenly disappeared, won't ACK new connection\n", 1227 addr, 1228 ntohs(tcp_hdr(skb)->source), 1229 &ipv6_hdr(skb)->daddr, 1230 ntohs(tcp_hdr(skb)->dest)); 1231 return; 1232 } 1233 } 1234 key.traffic_key = kmalloc(tcp_ao_digest_size(key.ao_key), GFP_ATOMIC); 1235 if (!key.traffic_key) 1236 return; 1237 1238 key.type = TCP_KEY_AO; 1239 key.rcv_next = aoh->keyid; 1240 tcp_v6_ao_calc_key_rsk(key.ao_key, key.traffic_key, req); 1241 #else 1242 if (0) { 1243 #endif 1244 #ifdef CONFIG_TCP_MD5SIG 1245 } else if (static_branch_unlikely(&tcp_md5_needed.key)) { 1246 int l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1247 1248 key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, 1249 l3index); 1250 if (key.md5_key) 1251 key.type = TCP_KEY_MD5; 1252 #endif 1253 } 1254 1255 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 1256 * sk->sk_state == TCP_SYN_RECV -> for Fast Open. 1257 */ 1258 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? 1259 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 1260 tcp_rsk(req)->rcv_nxt, 1261 tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale, 1262 tcp_rsk_tsval(tcp_rsk(req)), 1263 req->ts_recent, sk->sk_bound_dev_if, 1264 &key, ipv6_get_dsfield(ipv6_hdr(skb)) & ~INET_ECN_MASK, 1265 0, 1266 READ_ONCE(sk->sk_priority), 1267 READ_ONCE(tcp_rsk(req)->txhash)); 1268 if (tcp_key_is_ao(&key)) 1269 kfree(key.traffic_key); 1270 } 1271 1272 1273 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) 1274 { 1275 #ifdef CONFIG_SYN_COOKIES 1276 const struct tcphdr *th = tcp_hdr(skb); 1277 1278 if (!th->syn) 1279 sk = cookie_v6_check(sk, skb); 1280 #endif 1281 return sk; 1282 } 1283 1284 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, 1285 struct tcphdr *th, u32 *cookie) 1286 { 1287 u16 mss = 0; 1288 #ifdef CONFIG_SYN_COOKIES 1289 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops, 1290 &tcp_request_sock_ipv6_ops, sk, th); 1291 if (mss) { 1292 *cookie = __cookie_v6_init_sequence(iph, th, &mss); 1293 tcp_synq_overflow(sk); 1294 } 1295 #endif 1296 return mss; 1297 } 1298 1299 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 1300 { 1301 if (skb->protocol == htons(ETH_P_IP)) 1302 return tcp_v4_conn_request(sk, skb); 1303 1304 if (!ipv6_unicast_destination(skb)) 1305 goto drop; 1306 1307 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) { 1308 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS); 1309 return 0; 1310 } 1311 1312 return tcp_conn_request(&tcp6_request_sock_ops, 1313 &tcp_request_sock_ipv6_ops, sk, skb); 1314 1315 drop: 1316 tcp_listendrop(sk); 1317 return 0; /* don't send reset */ 1318 } 1319 1320 static void tcp_v6_restore_cb(struct sk_buff *skb) 1321 { 1322 /* We need to move header back to the beginning if xfrm6_policy_check() 1323 * and tcp_v6_fill_cb() are going to be called again. 1324 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. 1325 */ 1326 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, 1327 sizeof(struct inet6_skb_parm)); 1328 } 1329 1330 /* Called from tcp_v4_syn_recv_sock() for v6_mapped children. */ 1331 static void tcp_v6_mapped_child_init(struct sock *newsk, const struct sock *sk) 1332 { 1333 struct inet_sock *newinet = inet_sk(newsk); 1334 struct ipv6_pinfo *newnp; 1335 1336 newinet->pinet6 = newnp = tcp_inet6_sk(newsk); 1337 newinet->ipv6_fl_list = NULL; 1338 1339 memcpy(newnp, tcp_inet6_sk(sk), sizeof(struct ipv6_pinfo)); 1340 1341 newnp->saddr = newsk->sk_v6_rcv_saddr; 1342 1343 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; 1344 if (sk_is_mptcp(newsk)) 1345 mptcpv6_handle_mapped(newsk, true); 1346 newsk->sk_backlog_rcv = tcp_v4_do_rcv; 1347 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1348 tcp_sk(newsk)->af_specific = &tcp_sock_ipv6_mapped_specific; 1349 #endif 1350 1351 newnp->ipv6_mc_list = NULL; 1352 newnp->ipv6_ac_list = NULL; 1353 newnp->pktoptions = NULL; 1354 newnp->opt = NULL; 1355 1356 /* tcp_v4_syn_recv_sock() has initialized newinet->mc_{index,ttl} */ 1357 newnp->mcast_oif = newinet->mc_index; 1358 newnp->mcast_hops = newinet->mc_ttl; 1359 1360 newnp->rcv_flowinfo = 0; 1361 if (inet6_test_bit(REPFLOW, sk)) 1362 newnp->flow_label = 0; 1363 } 1364 1365 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, 1366 struct request_sock *req, 1367 struct dst_entry *dst, 1368 struct request_sock *req_unhash, 1369 bool *own_req, 1370 void (*opt_child_init)(struct sock *newsk, 1371 const struct sock *sk)) 1372 { 1373 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1374 struct inet_request_sock *ireq; 1375 struct ipv6_txoptions *opt; 1376 struct inet_sock *newinet; 1377 bool found_dup_sk = false; 1378 struct ipv6_pinfo *newnp; 1379 struct tcp_sock *newtp; 1380 struct sock *newsk; 1381 #ifdef CONFIG_TCP_MD5SIG 1382 struct tcp_md5sig_key *key; 1383 int l3index; 1384 #endif 1385 struct flowi6 fl6; 1386 1387 if (skb->protocol == htons(ETH_P_IP)) 1388 return tcp_v4_syn_recv_sock(sk, skb, req, dst, 1389 req_unhash, own_req, 1390 tcp_v6_mapped_child_init); 1391 ireq = inet_rsk(req); 1392 1393 if (sk_acceptq_is_full(sk)) 1394 goto exit_overflow; 1395 1396 dst = inet6_csk_route_req(sk, dst, &fl6, req, IPPROTO_TCP); 1397 if (!dst) 1398 goto exit; 1399 1400 newsk = tcp_create_openreq_child(sk, req, skb); 1401 if (!newsk) 1402 goto exit_nonewsk; 1403 1404 /* 1405 * No need to charge this sock to the relevant IPv6 refcnt debug socks 1406 * count here, tcp_create_openreq_child now does this for us, see the 1407 * comment in that function for the gory details. -acme 1408 */ 1409 1410 newsk->sk_gso_type = SKB_GSO_TCPV6; 1411 inet6_sk_rx_dst_set(newsk, skb); 1412 1413 newinet = inet_sk(newsk); 1414 newinet->cork.fl.u.ip6 = fl6; 1415 newinet->pinet6 = tcp_inet6_sk(newsk); 1416 newinet->ipv6_fl_list = NULL; 1417 newinet->inet_opt = NULL; 1418 1419 newtp = tcp_sk(newsk); 1420 newnp = tcp_inet6_sk(newsk); 1421 1422 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1423 1424 ip6_dst_store(newsk, dst, false, false); 1425 1426 newnp->saddr = ireq->ir_v6_loc_addr; 1427 1428 /* Now IPv6 options... 1429 1430 First: no IPv4 options. 1431 */ 1432 newnp->ipv6_mc_list = NULL; 1433 newnp->ipv6_ac_list = NULL; 1434 1435 /* Clone RX bits */ 1436 newnp->rxopt.all = np->rxopt.all; 1437 1438 newnp->pktoptions = NULL; 1439 newnp->opt = NULL; 1440 newnp->mcast_oif = tcp_v6_iif(skb); 1441 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; 1442 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); 1443 if (inet6_test_bit(REPFLOW, sk)) 1444 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); 1445 1446 /* Set ToS of the new socket based upon the value of incoming SYN. 1447 * ECT bits are set later in tcp_init_transfer(). 1448 */ 1449 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) 1450 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; 1451 1452 /* Clone native IPv6 options from listening socket (if any) 1453 1454 Yes, keeping reference count would be much more clever, 1455 but we make one more one thing there: reattach optmem 1456 to newsk. 1457 */ 1458 opt = ireq->ipv6_opt; 1459 if (!opt) 1460 opt = rcu_dereference(np->opt); 1461 if (opt) { 1462 opt = ipv6_dup_options(newsk, opt); 1463 RCU_INIT_POINTER(newnp->opt, opt); 1464 } 1465 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1466 if (opt) 1467 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + 1468 opt->opt_flen; 1469 1470 tcp_ca_openreq_child(newsk, dst); 1471 1472 tcp_sync_mss(newsk, dst6_mtu(dst)); 1473 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); 1474 1475 tcp_initialize_rcv_mss(newsk); 1476 1477 #ifdef CONFIG_TCP_MD5SIG 1478 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); 1479 1480 if (!tcp_rsk_used_ao(req)) { 1481 /* Copy over the MD5 key from the original socket */ 1482 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); 1483 if (key) { 1484 const union tcp_md5_addr *addr; 1485 1486 addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr; 1487 if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) 1488 goto put_and_exit; 1489 } 1490 } 1491 #endif 1492 #ifdef CONFIG_TCP_AO 1493 /* Copy over tcp_ao_info if any */ 1494 if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET6)) 1495 goto put_and_exit; /* OOM */ 1496 #endif 1497 1498 if (__inet_inherit_port(sk, newsk) < 0) 1499 goto put_and_exit; 1500 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), 1501 &found_dup_sk); 1502 if (*own_req) { 1503 tcp_move_syn(newtp, req); 1504 1505 /* Clone pktoptions received with SYN, if we own the req */ 1506 if (ireq->pktopts) { 1507 newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk); 1508 consume_skb(ireq->pktopts); 1509 ireq->pktopts = NULL; 1510 if (newnp->pktoptions) 1511 tcp_v6_restore_cb(newnp->pktoptions); 1512 } 1513 } else { 1514 if (!req_unhash && found_dup_sk) { 1515 /* This code path should only be executed in the 1516 * syncookie case only 1517 */ 1518 bh_unlock_sock(newsk); 1519 sock_put(newsk); 1520 newsk = NULL; 1521 } 1522 } 1523 1524 return newsk; 1525 1526 exit_overflow: 1527 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1528 exit_nonewsk: 1529 dst_release(dst); 1530 exit: 1531 tcp_listendrop(sk); 1532 return NULL; 1533 put_and_exit: 1534 inet_csk_prepare_forced_close(newsk); 1535 tcp_done(newsk); 1536 goto exit; 1537 } 1538 1539 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, 1540 u32)); 1541 /* The socket must have it's spinlock held when we get 1542 * here, unless it is a TCP_LISTEN socket. 1543 * 1544 * We have a potential double-lock case here, so even when 1545 * doing backlog processing we use the BH locking scheme. 1546 * This is because we cannot sleep with the original spinlock 1547 * held. 1548 */ 1549 INDIRECT_CALLABLE_SCOPE 1550 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) 1551 { 1552 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1553 struct sk_buff *opt_skb = NULL; 1554 enum skb_drop_reason reason; 1555 struct tcp_sock *tp; 1556 1557 /* Imagine: socket is IPv6. IPv4 packet arrives, 1558 goes to IPv4 receive handler and backlogged. 1559 From backlog it always goes here. Kerboom... 1560 Fortunately, tcp_rcv_established and rcv_established 1561 handle them correctly, but it is not case with 1562 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK 1563 */ 1564 1565 if (skb->protocol == htons(ETH_P_IP)) 1566 return tcp_v4_do_rcv(sk, skb); 1567 1568 reason = psp_sk_rx_policy_check(sk, skb); 1569 if (reason) 1570 goto err_discard; 1571 1572 /* 1573 * socket locking is here for SMP purposes as backlog rcv 1574 * is currently called with bh processing disabled. 1575 */ 1576 1577 /* Do Stevens' IPV6_PKTOPTIONS. 1578 1579 Yes, guys, it is the only place in our code, where we 1580 may make it not affecting IPv4. 1581 The rest of code is protocol independent, 1582 and I do not like idea to uglify IPv4. 1583 1584 Actually, all the idea behind IPV6_PKTOPTIONS 1585 looks not very well thought. For now we latch 1586 options, received in the last packet, enqueued 1587 by tcp. Feel free to propose better solution. 1588 --ANK (980728) 1589 */ 1590 if (np->rxopt.all && sk->sk_state != TCP_LISTEN) 1591 opt_skb = skb_clone_and_charge_r(skb, sk); 1592 1593 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1594 struct dst_entry *dst; 1595 1596 dst = rcu_dereference_protected(sk->sk_rx_dst, 1597 lockdep_sock_is_held(sk)); 1598 1599 sock_rps_save_rxhash(sk, skb); 1600 sk_mark_napi_id(sk, skb); 1601 if (dst) { 1602 if (sk->sk_rx_dst_ifindex != skb->skb_iif || 1603 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check, 1604 dst, sk->sk_rx_dst_cookie) == NULL) { 1605 RCU_INIT_POINTER(sk->sk_rx_dst, NULL); 1606 dst_release(dst); 1607 } 1608 } 1609 1610 tcp_rcv_established(sk, skb); 1611 if (opt_skb) 1612 goto ipv6_pktoptions; 1613 return 0; 1614 } 1615 1616 if (tcp_checksum_complete(skb)) 1617 goto csum_err; 1618 1619 if (sk->sk_state == TCP_LISTEN) { 1620 struct sock *nsk = tcp_v6_cookie_check(sk, skb); 1621 1622 if (nsk != sk) { 1623 if (nsk) { 1624 reason = tcp_child_process(sk, nsk, skb); 1625 if (reason) 1626 goto reset; 1627 } 1628 return 0; 1629 } 1630 } else 1631 sock_rps_save_rxhash(sk, skb); 1632 1633 reason = tcp_rcv_state_process(sk, skb); 1634 if (reason) 1635 goto reset; 1636 if (opt_skb) 1637 goto ipv6_pktoptions; 1638 return 0; 1639 1640 reset: 1641 tcp_v6_send_reset(sk, skb, sk_rst_convert_drop_reason(reason)); 1642 discard: 1643 if (opt_skb) 1644 __kfree_skb(opt_skb); 1645 sk_skb_reason_drop(sk, skb, reason); 1646 return 0; 1647 csum_err: 1648 reason = SKB_DROP_REASON_TCP_CSUM; 1649 trace_tcp_bad_csum(skb); 1650 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); 1651 err_discard: 1652 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 1653 goto discard; 1654 1655 1656 ipv6_pktoptions: 1657 /* Do you ask, what is it? 1658 1659 1. skb was enqueued by tcp. 1660 2. skb is added to tail of read queue, rather than out of order. 1661 3. socket is not in passive state. 1662 4. Finally, it really contains options, which user wants to receive. 1663 */ 1664 tp = tcp_sk(sk); 1665 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && 1666 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { 1667 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) 1668 WRITE_ONCE(np->mcast_oif, tcp_v6_iif(opt_skb)); 1669 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) 1670 WRITE_ONCE(np->mcast_hops, 1671 ipv6_hdr(opt_skb)->hop_limit); 1672 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) 1673 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); 1674 if (inet6_test_bit(REPFLOW, sk)) 1675 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); 1676 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { 1677 tcp_v6_restore_cb(opt_skb); 1678 opt_skb = xchg(&np->pktoptions, opt_skb); 1679 } else { 1680 __kfree_skb(opt_skb); 1681 opt_skb = xchg(&np->pktoptions, NULL); 1682 } 1683 } 1684 1685 consume_skb(opt_skb); 1686 return 0; 1687 } 1688 1689 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, 1690 const struct tcphdr *th) 1691 { 1692 /* This is tricky: we move IP6CB at its correct location into 1693 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because 1694 * _decode_session6() uses IP6CB(). 1695 * barrier() makes sure compiler won't play aliasing games. 1696 */ 1697 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb), 1698 sizeof(struct inet6_skb_parm)); 1699 barrier(); 1700 1701 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 1702 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1703 skb->len - th->doff*4); 1704 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1705 TCP_SKB_CB(skb)->tcp_flags = tcp_flags_ntohs(th); 1706 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); 1707 TCP_SKB_CB(skb)->sacked = 0; 1708 TCP_SKB_CB(skb)->has_rxtstamp = 1709 skb->tstamp || skb_hwtstamps(skb)->hwtstamp; 1710 } 1711 1712 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) 1713 { 1714 struct net *net = dev_net_rcu(skb->dev); 1715 enum skb_drop_reason drop_reason; 1716 enum tcp_tw_status tw_status; 1717 int sdif = inet6_sdif(skb); 1718 int dif = inet6_iif(skb); 1719 const struct tcphdr *th; 1720 const struct ipv6hdr *hdr; 1721 struct sock *sk = NULL; 1722 bool refcounted; 1723 int ret; 1724 u32 isn; 1725 1726 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; 1727 if (skb->pkt_type != PACKET_HOST) 1728 goto discard_it; 1729 1730 /* 1731 * Count it even if it's bad. 1732 */ 1733 __TCP_INC_STATS(net, TCP_MIB_INSEGS); 1734 1735 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 1736 goto discard_it; 1737 1738 th = (const struct tcphdr *)skb->data; 1739 1740 if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) { 1741 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; 1742 goto bad_packet; 1743 } 1744 if (!pskb_may_pull(skb, th->doff*4)) 1745 goto discard_it; 1746 1747 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) 1748 goto csum_error; 1749 1750 th = (const struct tcphdr *)skb->data; 1751 hdr = ipv6_hdr(skb); 1752 1753 lookup: 1754 sk = __inet6_lookup_skb(skb, __tcp_hdrlen(th), 1755 th->source, th->dest, inet6_iif(skb), sdif, 1756 &refcounted); 1757 if (!sk) 1758 goto no_tcp_socket; 1759 1760 if (sk->sk_state == TCP_TIME_WAIT) 1761 goto do_time_wait; 1762 1763 if (sk->sk_state == TCP_NEW_SYN_RECV) { 1764 struct request_sock *req = inet_reqsk(sk); 1765 bool req_stolen = false; 1766 struct sock *nsk; 1767 1768 sk = req->rsk_listener; 1769 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) 1770 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1771 else 1772 drop_reason = tcp_inbound_hash(sk, req, skb, 1773 &hdr->saddr, &hdr->daddr, 1774 AF_INET6, dif, sdif); 1775 if (drop_reason) { 1776 sk_drops_skbadd(sk, skb); 1777 reqsk_put(req); 1778 goto discard_it; 1779 } 1780 if (tcp_checksum_complete(skb)) { 1781 reqsk_put(req); 1782 goto csum_error; 1783 } 1784 if (unlikely(sk->sk_state != TCP_LISTEN)) { 1785 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb); 1786 if (!nsk) { 1787 inet_csk_reqsk_queue_drop_and_put(sk, req); 1788 goto lookup; 1789 } 1790 sk = nsk; 1791 /* reuseport_migrate_sock() has already held one sk_refcnt 1792 * before returning. 1793 */ 1794 } else { 1795 sock_hold(sk); 1796 } 1797 refcounted = true; 1798 nsk = NULL; 1799 if (!tcp_filter(sk, skb, &drop_reason)) { 1800 th = (const struct tcphdr *)skb->data; 1801 hdr = ipv6_hdr(skb); 1802 tcp_v6_fill_cb(skb, hdr, th); 1803 nsk = tcp_check_req(sk, skb, req, false, &req_stolen, 1804 &drop_reason); 1805 } 1806 if (!nsk) { 1807 reqsk_put(req); 1808 if (req_stolen) { 1809 /* Another cpu got exclusive access to req 1810 * and created a full blown socket. 1811 * Try to feed this packet to this socket 1812 * instead of discarding it. 1813 */ 1814 tcp_v6_restore_cb(skb); 1815 sock_put(sk); 1816 goto lookup; 1817 } 1818 goto discard_and_relse; 1819 } 1820 nf_reset_ct(skb); 1821 if (nsk == sk) { 1822 reqsk_put(req); 1823 tcp_v6_restore_cb(skb); 1824 } else { 1825 drop_reason = tcp_child_process(sk, nsk, skb); 1826 if (drop_reason) { 1827 enum sk_rst_reason rst_reason; 1828 1829 rst_reason = sk_rst_convert_drop_reason(drop_reason); 1830 tcp_v6_send_reset(nsk, skb, rst_reason); 1831 goto discard_and_relse; 1832 } 1833 sock_put(sk); 1834 return 0; 1835 } 1836 } 1837 1838 process: 1839 if (static_branch_unlikely(&ip6_min_hopcount)) { 1840 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 1841 if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) { 1842 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 1843 drop_reason = SKB_DROP_REASON_TCP_MINTTL; 1844 goto discard_and_relse; 1845 } 1846 } 1847 1848 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { 1849 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1850 goto discard_and_relse; 1851 } 1852 1853 drop_reason = tcp_inbound_hash(sk, NULL, skb, &hdr->saddr, &hdr->daddr, 1854 AF_INET6, dif, sdif); 1855 if (drop_reason) 1856 goto discard_and_relse; 1857 1858 nf_reset_ct(skb); 1859 1860 if (tcp_filter(sk, skb, &drop_reason)) 1861 goto discard_and_relse; 1862 1863 th = (const struct tcphdr *)skb->data; 1864 hdr = ipv6_hdr(skb); 1865 tcp_v6_fill_cb(skb, hdr, th); 1866 1867 skb->dev = NULL; 1868 1869 if (sk->sk_state == TCP_LISTEN) { 1870 ret = tcp_v6_do_rcv(sk, skb); 1871 goto put_and_return; 1872 } 1873 1874 sk_incoming_cpu_update(sk); 1875 1876 bh_lock_sock_nested(sk); 1877 tcp_segs_in(tcp_sk(sk), skb); 1878 ret = 0; 1879 if (!sock_owned_by_user(sk)) { 1880 ret = tcp_v6_do_rcv(sk, skb); 1881 } else { 1882 if (tcp_add_backlog(sk, skb, &drop_reason)) 1883 goto discard_and_relse; 1884 } 1885 bh_unlock_sock(sk); 1886 put_and_return: 1887 if (refcounted) 1888 sock_put(sk); 1889 return ret ? -1 : 0; 1890 1891 no_tcp_socket: 1892 drop_reason = SKB_DROP_REASON_NO_SOCKET; 1893 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 1894 goto discard_it; 1895 1896 tcp_v6_fill_cb(skb, hdr, th); 1897 1898 if (tcp_checksum_complete(skb)) { 1899 csum_error: 1900 drop_reason = SKB_DROP_REASON_TCP_CSUM; 1901 trace_tcp_bad_csum(skb); 1902 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); 1903 bad_packet: 1904 __TCP_INC_STATS(net, TCP_MIB_INERRS); 1905 } else { 1906 tcp_v6_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason)); 1907 } 1908 1909 discard_it: 1910 SKB_DR_OR(drop_reason, NOT_SPECIFIED); 1911 sk_skb_reason_drop(sk, skb, drop_reason); 1912 return 0; 1913 1914 discard_and_relse: 1915 sk_drops_skbadd(sk, skb); 1916 if (refcounted) 1917 sock_put(sk); 1918 goto discard_it; 1919 1920 do_time_wait: 1921 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 1922 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1923 inet_twsk_put(inet_twsk(sk)); 1924 goto discard_it; 1925 } 1926 1927 tcp_v6_fill_cb(skb, hdr, th); 1928 1929 if (tcp_checksum_complete(skb)) { 1930 inet_twsk_put(inet_twsk(sk)); 1931 goto csum_error; 1932 } 1933 1934 tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn, 1935 &drop_reason); 1936 switch (tw_status) { 1937 case TCP_TW_SYN: 1938 { 1939 struct sock *sk2; 1940 1941 sk2 = inet6_lookup_listener(net, skb, __tcp_hdrlen(th), 1942 &ipv6_hdr(skb)->saddr, th->source, 1943 &ipv6_hdr(skb)->daddr, 1944 ntohs(th->dest), 1945 tcp_v6_iif_l3_slave(skb), 1946 sdif); 1947 if (sk2) { 1948 struct inet_timewait_sock *tw = inet_twsk(sk); 1949 inet_twsk_deschedule_put(tw); 1950 sk = sk2; 1951 tcp_v6_restore_cb(skb); 1952 refcounted = false; 1953 __this_cpu_write(tcp_tw_isn, isn); 1954 goto process; 1955 } 1956 1957 drop_reason = psp_twsk_rx_policy_check(inet_twsk(sk), skb); 1958 if (drop_reason) 1959 break; 1960 } 1961 /* to ACK */ 1962 fallthrough; 1963 case TCP_TW_ACK: 1964 case TCP_TW_ACK_OOW: 1965 tcp_v6_timewait_ack(sk, skb, tw_status); 1966 break; 1967 case TCP_TW_RST: 1968 tcp_v6_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET); 1969 inet_twsk_deschedule_put(inet_twsk(sk)); 1970 goto discard_it; 1971 case TCP_TW_SUCCESS: 1972 ; 1973 } 1974 goto discard_it; 1975 } 1976 1977 void tcp_v6_early_demux(struct sk_buff *skb) 1978 { 1979 struct net *net = dev_net_rcu(skb->dev); 1980 const struct ipv6hdr *hdr; 1981 const struct tcphdr *th; 1982 struct sock *sk; 1983 1984 if (skb->pkt_type != PACKET_HOST) 1985 return; 1986 1987 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) 1988 return; 1989 1990 hdr = ipv6_hdr(skb); 1991 th = tcp_hdr(skb); 1992 1993 if (th->doff < sizeof(struct tcphdr) / 4) 1994 return; 1995 1996 /* Note : We use inet6_iif() here, not tcp_v6_iif() */ 1997 sk = __inet6_lookup_established(net, &hdr->saddr, th->source, 1998 &hdr->daddr, ntohs(th->dest), 1999 inet6_iif(skb), inet6_sdif(skb)); 2000 if (sk) { 2001 skb->sk = sk; 2002 skb->destructor = sock_edemux; 2003 if (sk_fullsock(sk)) { 2004 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); 2005 2006 if (dst) 2007 dst = dst_check(dst, sk->sk_rx_dst_cookie); 2008 if (dst && 2009 sk->sk_rx_dst_ifindex == skb->skb_iif) 2010 skb_dst_set_noref(skb, dst); 2011 } 2012 } 2013 } 2014 2015 static struct timewait_sock_ops tcp6_timewait_sock_ops = { 2016 .twsk_obj_size = sizeof(struct tcp6_timewait_sock), 2017 }; 2018 2019 const struct inet_connection_sock_af_ops ipv6_specific = { 2020 .queue_xmit = inet6_csk_xmit, 2021 .rebuild_header = inet6_sk_rebuild_header, 2022 .sk_rx_dst_set = inet6_sk_rx_dst_set, 2023 .conn_request = tcp_v6_conn_request, 2024 .syn_recv_sock = tcp_v6_syn_recv_sock, 2025 .net_header_len = sizeof(struct ipv6hdr), 2026 .setsockopt = ipv6_setsockopt, 2027 .getsockopt = ipv6_getsockopt, 2028 .mtu_reduced = tcp_v6_mtu_reduced, 2029 }; 2030 2031 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 2032 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { 2033 #ifdef CONFIG_TCP_MD5SIG 2034 .md5_lookup = tcp_v6_md5_lookup, 2035 .calc_md5_hash = tcp_v6_md5_hash_skb, 2036 .md5_parse = tcp_v6_parse_md5_keys, 2037 #endif 2038 #ifdef CONFIG_TCP_AO 2039 .ao_lookup = tcp_v6_ao_lookup, 2040 .calc_ao_hash = tcp_v6_ao_hash_skb, 2041 .ao_parse = tcp_v6_parse_ao, 2042 .ao_calc_key_sk = tcp_v6_ao_calc_key_sk, 2043 #endif 2044 }; 2045 #endif 2046 2047 /* 2048 * TCP over IPv4 via INET6 API 2049 */ 2050 static const struct inet_connection_sock_af_ops ipv6_mapped = { 2051 .queue_xmit = ip_queue_xmit, 2052 .rebuild_header = inet_sk_rebuild_header, 2053 .sk_rx_dst_set = inet_sk_rx_dst_set, 2054 .conn_request = tcp_v6_conn_request, 2055 .syn_recv_sock = tcp_v6_syn_recv_sock, 2056 .net_header_len = sizeof(struct iphdr), 2057 .setsockopt = ipv6_setsockopt, 2058 .getsockopt = ipv6_getsockopt, 2059 .mtu_reduced = tcp_v4_mtu_reduced, 2060 }; 2061 2062 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 2063 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { 2064 #ifdef CONFIG_TCP_MD5SIG 2065 .md5_lookup = tcp_v4_md5_lookup, 2066 .calc_md5_hash = tcp_v4_md5_hash_skb, 2067 .md5_parse = tcp_v6_parse_md5_keys, 2068 #endif 2069 #ifdef CONFIG_TCP_AO 2070 .ao_lookup = tcp_v6_ao_lookup, 2071 .calc_ao_hash = tcp_v4_ao_hash_skb, 2072 .ao_parse = tcp_v6_parse_ao, 2073 .ao_calc_key_sk = tcp_v4_ao_calc_key_sk, 2074 #endif 2075 }; 2076 2077 static void tcp6_destruct_sock(struct sock *sk) 2078 { 2079 tcp_md5_destruct_sock(sk); 2080 tcp_ao_destroy_sock(sk, false); 2081 inet6_sock_destruct(sk); 2082 } 2083 #endif 2084 2085 /* NOTE: A lot of things set to zero explicitly by call to 2086 * sk_alloc() so need not be done here. 2087 */ 2088 static int tcp_v6_init_sock(struct sock *sk) 2089 { 2090 struct inet_connection_sock *icsk = inet_csk(sk); 2091 2092 tcp_init_sock(sk); 2093 2094 icsk->icsk_af_ops = &ipv6_specific; 2095 2096 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 2097 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; 2098 sk->sk_destruct = tcp6_destruct_sock; 2099 #endif 2100 2101 return 0; 2102 } 2103 2104 #ifdef CONFIG_PROC_FS 2105 /* Proc filesystem TCPv6 sock list dumping. */ 2106 static void get_openreq6(struct seq_file *seq, 2107 const struct request_sock *req, int i) 2108 { 2109 long ttd = req->rsk_timer.expires - jiffies; 2110 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; 2111 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; 2112 2113 if (ttd < 0) 2114 ttd = 0; 2115 2116 seq_printf(seq, 2117 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2118 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n", 2119 i, 2120 src->s6_addr32[0], src->s6_addr32[1], 2121 src->s6_addr32[2], src->s6_addr32[3], 2122 inet_rsk(req)->ir_num, 2123 dest->s6_addr32[0], dest->s6_addr32[1], 2124 dest->s6_addr32[2], dest->s6_addr32[3], 2125 ntohs(inet_rsk(req)->ir_rmt_port), 2126 TCP_SYN_RECV, 2127 0, 0, /* could print option size, but that is af dependent. */ 2128 1, /* timers active (only the expire timer) */ 2129 jiffies_to_clock_t(ttd), 2130 req->num_timeout, 2131 from_kuid_munged(seq_user_ns(seq), 2132 sk_uid(req->rsk_listener)), 2133 0, /* non standard timer */ 2134 0, /* open_requests have no inode */ 2135 0, req); 2136 } 2137 2138 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) 2139 { 2140 const struct in6_addr *dest, *src; 2141 __u16 destp, srcp; 2142 int timer_active; 2143 unsigned long timer_expires; 2144 const struct inet_sock *inet = inet_sk(sp); 2145 const struct tcp_sock *tp = tcp_sk(sp); 2146 const struct inet_connection_sock *icsk = inet_csk(sp); 2147 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; 2148 u8 icsk_pending; 2149 int rx_queue; 2150 int state; 2151 2152 dest = &sp->sk_v6_daddr; 2153 src = &sp->sk_v6_rcv_saddr; 2154 destp = ntohs(inet->inet_dport); 2155 srcp = ntohs(inet->inet_sport); 2156 2157 icsk_pending = smp_load_acquire(&icsk->icsk_pending); 2158 if (icsk_pending == ICSK_TIME_RETRANS || 2159 icsk_pending == ICSK_TIME_REO_TIMEOUT || 2160 icsk_pending == ICSK_TIME_LOSS_PROBE) { 2161 timer_active = 1; 2162 timer_expires = tcp_timeout_expires(sp); 2163 } else if (icsk_pending == ICSK_TIME_PROBE0) { 2164 timer_active = 4; 2165 timer_expires = tcp_timeout_expires(sp); 2166 } else if (timer_pending(&icsk->icsk_keepalive_timer)) { 2167 timer_active = 2; 2168 timer_expires = icsk->icsk_keepalive_timer.expires; 2169 } else { 2170 timer_active = 0; 2171 timer_expires = jiffies; 2172 } 2173 2174 state = inet_sk_state_load(sp); 2175 if (state == TCP_LISTEN) 2176 rx_queue = READ_ONCE(sp->sk_ack_backlog); 2177 else 2178 /* Because we don't lock the socket, 2179 * we might find a transient negative value. 2180 */ 2181 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - 2182 READ_ONCE(tp->copied_seq), 0); 2183 2184 seq_printf(seq, 2185 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2186 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n", 2187 i, 2188 src->s6_addr32[0], src->s6_addr32[1], 2189 src->s6_addr32[2], src->s6_addr32[3], srcp, 2190 dest->s6_addr32[0], dest->s6_addr32[1], 2191 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2192 state, 2193 READ_ONCE(tp->write_seq) - tp->snd_una, 2194 rx_queue, 2195 timer_active, 2196 jiffies_delta_to_clock_t(timer_expires - jiffies), 2197 READ_ONCE(icsk->icsk_retransmits), 2198 from_kuid_munged(seq_user_ns(seq), sk_uid(sp)), 2199 READ_ONCE(icsk->icsk_probes_out), 2200 sock_i_ino(sp), 2201 refcount_read(&sp->sk_refcnt), sp, 2202 jiffies_to_clock_t(icsk->icsk_rto), 2203 jiffies_to_clock_t(icsk->icsk_ack.ato), 2204 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), 2205 tcp_snd_cwnd(tp), 2206 state == TCP_LISTEN ? 2207 fastopenq->max_qlen : 2208 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) 2209 ); 2210 } 2211 2212 static void get_timewait6_sock(struct seq_file *seq, 2213 struct inet_timewait_sock *tw, int i) 2214 { 2215 long delta = tw->tw_timer.expires - jiffies; 2216 const struct in6_addr *dest, *src; 2217 __u16 destp, srcp; 2218 2219 dest = &tw->tw_v6_daddr; 2220 src = &tw->tw_v6_rcv_saddr; 2221 destp = ntohs(tw->tw_dport); 2222 srcp = ntohs(tw->tw_sport); 2223 2224 seq_printf(seq, 2225 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2226 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", 2227 i, 2228 src->s6_addr32[0], src->s6_addr32[1], 2229 src->s6_addr32[2], src->s6_addr32[3], srcp, 2230 dest->s6_addr32[0], dest->s6_addr32[1], 2231 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2232 READ_ONCE(tw->tw_substate), 0, 0, 2233 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, 2234 refcount_read(&tw->tw_refcnt), tw); 2235 } 2236 2237 static int tcp6_seq_show(struct seq_file *seq, void *v) 2238 { 2239 struct tcp_iter_state *st; 2240 struct sock *sk = v; 2241 2242 if (v == SEQ_START_TOKEN) { 2243 seq_puts(seq, 2244 " sl " 2245 "local_address " 2246 "remote_address " 2247 "st tx_queue rx_queue tr tm->when retrnsmt" 2248 " uid timeout inode\n"); 2249 goto out; 2250 } 2251 st = seq->private; 2252 2253 if (sk->sk_state == TCP_TIME_WAIT) 2254 get_timewait6_sock(seq, v, st->num); 2255 else if (sk->sk_state == TCP_NEW_SYN_RECV) 2256 get_openreq6(seq, v, st->num); 2257 else 2258 get_tcp6_sock(seq, v, st->num); 2259 out: 2260 return 0; 2261 } 2262 2263 static const struct seq_operations tcp6_seq_ops = { 2264 .show = tcp6_seq_show, 2265 .start = tcp_seq_start, 2266 .next = tcp_seq_next, 2267 .stop = tcp_seq_stop, 2268 }; 2269 2270 static struct tcp_seq_afinfo tcp6_seq_afinfo = { 2271 .family = AF_INET6, 2272 }; 2273 2274 int __net_init tcp6_proc_init(struct net *net) 2275 { 2276 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops, 2277 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo)) 2278 return -ENOMEM; 2279 return 0; 2280 } 2281 2282 void tcp6_proc_exit(struct net *net) 2283 { 2284 remove_proc_entry("tcp6", net->proc_net); 2285 } 2286 #endif 2287 2288 struct proto tcpv6_prot = { 2289 .name = "TCPv6", 2290 .owner = THIS_MODULE, 2291 .close = tcp_close, 2292 .pre_connect = tcp_v6_pre_connect, 2293 .connect = tcp_v6_connect, 2294 .disconnect = tcp_disconnect, 2295 .accept = inet_csk_accept, 2296 .ioctl = tcp_ioctl, 2297 .init = tcp_v6_init_sock, 2298 .destroy = tcp_v4_destroy_sock, 2299 .shutdown = tcp_shutdown, 2300 .setsockopt = tcp_setsockopt, 2301 .getsockopt = tcp_getsockopt, 2302 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt, 2303 .keepalive = tcp_set_keepalive, 2304 .recvmsg = tcp_recvmsg, 2305 .sendmsg = tcp_sendmsg, 2306 .splice_eof = tcp_splice_eof, 2307 .backlog_rcv = tcp_v6_do_rcv, 2308 .release_cb = tcp_release_cb, 2309 .hash = inet_hash, 2310 .unhash = inet_unhash, 2311 .get_port = inet_csk_get_port, 2312 .put_port = inet_put_port, 2313 #ifdef CONFIG_BPF_SYSCALL 2314 .psock_update_sk_prot = tcp_bpf_update_proto, 2315 #endif 2316 .enter_memory_pressure = tcp_enter_memory_pressure, 2317 .leave_memory_pressure = tcp_leave_memory_pressure, 2318 .stream_memory_free = tcp_stream_memory_free, 2319 .sockets_allocated = &tcp_sockets_allocated, 2320 2321 .memory_allocated = &net_aligned_data.tcp_memory_allocated, 2322 .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, 2323 2324 .memory_pressure = &tcp_memory_pressure, 2325 .sysctl_mem = sysctl_tcp_mem, 2326 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), 2327 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), 2328 .max_header = MAX_TCP_HEADER, 2329 .obj_size = sizeof(struct tcp6_sock), 2330 .freeptr_offset = offsetof(struct tcp6_sock, 2331 tcp.inet_conn.icsk_inet.sk.sk_freeptr), 2332 .ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6), 2333 .slab_flags = SLAB_TYPESAFE_BY_RCU, 2334 .twsk_prot = &tcp6_timewait_sock_ops, 2335 .rsk_prot = &tcp6_request_sock_ops, 2336 .h.hashinfo = NULL, 2337 .no_autobind = true, 2338 .diag_destroy = tcp_abort, 2339 }; 2340 EXPORT_SYMBOL_GPL(tcpv6_prot); 2341 2342 2343 static struct inet_protosw tcpv6_protosw = { 2344 .type = SOCK_STREAM, 2345 .protocol = IPPROTO_TCP, 2346 .prot = &tcpv6_prot, 2347 .ops = &inet6_stream_ops, 2348 .flags = INET_PROTOSW_PERMANENT | 2349 INET_PROTOSW_ICSK, 2350 }; 2351 2352 static int __net_init tcpv6_net_init(struct net *net) 2353 { 2354 int res; 2355 2356 res = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, 2357 SOCK_RAW, IPPROTO_TCP, net); 2358 if (!res) 2359 net->ipv6.tcp_sk->sk_clockid = CLOCK_MONOTONIC; 2360 2361 return res; 2362 } 2363 2364 static void __net_exit tcpv6_net_exit(struct net *net) 2365 { 2366 inet_ctl_sock_destroy(net->ipv6.tcp_sk); 2367 } 2368 2369 static struct pernet_operations tcpv6_net_ops = { 2370 .init = tcpv6_net_init, 2371 .exit = tcpv6_net_exit, 2372 }; 2373 2374 int __init tcpv6_init(void) 2375 { 2376 int ret; 2377 2378 net_hotdata.tcpv6_protocol = (struct inet6_protocol) { 2379 .handler = tcp_v6_rcv, 2380 .err_handler = tcp_v6_err, 2381 .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL, 2382 }; 2383 ret = inet6_add_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); 2384 if (ret) 2385 goto out; 2386 2387 /* register inet6 protocol */ 2388 ret = inet6_register_protosw(&tcpv6_protosw); 2389 if (ret) 2390 goto out_tcpv6_protocol; 2391 2392 ret = register_pernet_subsys(&tcpv6_net_ops); 2393 if (ret) 2394 goto out_tcpv6_protosw; 2395 2396 ret = mptcpv6_init(); 2397 if (ret) 2398 goto out_tcpv6_pernet_subsys; 2399 2400 out: 2401 return ret; 2402 2403 out_tcpv6_pernet_subsys: 2404 unregister_pernet_subsys(&tcpv6_net_ops); 2405 out_tcpv6_protosw: 2406 inet6_unregister_protosw(&tcpv6_protosw); 2407 out_tcpv6_protocol: 2408 inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); 2409 goto out; 2410 } 2411 2412 void tcpv6_exit(void) 2413 { 2414 unregister_pernet_subsys(&tcpv6_net_ops); 2415 inet6_unregister_protosw(&tcpv6_protosw); 2416 inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); 2417 } 2418