1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TCP over IPv6 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on: 10 * linux/net/ipv4/tcp.c 11 * linux/net/ipv4/tcp_input.c 12 * linux/net/ipv4/tcp_output.c 13 * 14 * Fixes: 15 * Hideaki YOSHIFUJI : sin6_scope_id support 16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 18 * a single port at the same time. 19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file. 20 */ 21 22 #include <linux/bottom_half.h> 23 #include <linux/module.h> 24 #include <linux/errno.h> 25 #include <linux/types.h> 26 #include <linux/socket.h> 27 #include <linux/sockios.h> 28 #include <linux/net.h> 29 #include <linux/jiffies.h> 30 #include <linux/in.h> 31 #include <linux/in6.h> 32 #include <linux/netdevice.h> 33 #include <linux/init.h> 34 #include <linux/jhash.h> 35 #include <linux/ipsec.h> 36 #include <linux/times.h> 37 #include <linux/slab.h> 38 #include <linux/uaccess.h> 39 #include <linux/ipv6.h> 40 #include <linux/icmpv6.h> 41 #include <linux/random.h> 42 #include <linux/indirect_call_wrapper.h> 43 44 #include <net/aligned_data.h> 45 #include <net/tcp.h> 46 #include <net/ndisc.h> 47 #include <net/inet6_hashtables.h> 48 #include <net/inet6_connection_sock.h> 49 #include <net/ipv6.h> 50 #include <net/transp_v6.h> 51 #include <net/addrconf.h> 52 #include <net/ip6_route.h> 53 #include <net/ip6_checksum.h> 54 #include <net/inet_ecn.h> 55 #include <net/protocol.h> 56 #include <net/xfrm.h> 57 #include <net/snmp.h> 58 #include <net/dsfield.h> 59 #include <net/timewait_sock.h> 60 #include <net/inet_common.h> 61 #include <net/secure_seq.h> 62 #include <net/hotdata.h> 63 #include <net/busy_poll.h> 64 #include <net/rstreason.h> 65 #include <net/psp.h> 66 67 #include <linux/proc_fs.h> 68 #include <linux/seq_file.h> 69 70 #include <crypto/md5.h> 71 #include <crypto/utils.h> 72 73 #include <trace/events/tcp.h> 74 75 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, 76 enum sk_rst_reason reason); 77 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 78 struct request_sock *req); 79 80 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); 81 82 static const struct inet_connection_sock_af_ops ipv6_mapped; 83 const struct inet_connection_sock_af_ops ipv6_specific; 84 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 85 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; 86 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; 87 #endif 88 89 /* Helper returning the inet6 address from a given tcp socket. 90 * It can be used in TCP stack instead of inet6_sk(sk). 91 * This avoids a dereference and allow compiler optimizations. 92 * It is a specialized version of inet6_sk_generic(). 93 */ 94 #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \ 95 struct tcp6_sock, tcp)->inet6) 96 97 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) 98 { 99 struct dst_entry *dst = skb_dst(skb); 100 101 if (dst && dst_hold_safe(dst)) { 102 rcu_assign_pointer(sk->sk_rx_dst, dst); 103 sk->sk_rx_dst_ifindex = skb->skb_iif; 104 sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst)); 105 } 106 } 107 108 INDIRECT_CALLABLE_SCOPE union tcp_seq_and_ts_off 109 tcp_v6_init_seq_and_ts_off(const struct net *net, const struct sk_buff *skb) 110 { 111 return secure_tcpv6_seq_and_ts_off(net, 112 ipv6_hdr(skb)->daddr.s6_addr32, 113 ipv6_hdr(skb)->saddr.s6_addr32, 114 tcp_hdr(skb)->dest, 115 tcp_hdr(skb)->source); 116 } 117 118 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr, 119 int addr_len) 120 { 121 /* This check is replicated from tcp_v6_connect() and intended to 122 * prevent BPF program called below from accessing bytes that are out 123 * of the bound specified by user in addr_len. 124 */ 125 if (addr_len < SIN6_LEN_RFC2133) 126 return -EINVAL; 127 128 sock_owned_by_me(sk); 129 130 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len); 131 } 132 133 static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr, 134 int addr_len) 135 { 136 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; 137 struct inet_connection_sock *icsk = inet_csk(sk); 138 struct inet_timewait_death_row *tcp_death_row; 139 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 140 struct in6_addr *saddr = NULL, *final_p; 141 struct inet_sock *inet = inet_sk(sk); 142 struct tcp_sock *tp = tcp_sk(sk); 143 struct net *net = sock_net(sk); 144 struct ipv6_txoptions *opt; 145 struct dst_entry *dst; 146 struct flowi6 *fl6; 147 int addr_type; 148 int err; 149 150 if (addr_len < SIN6_LEN_RFC2133) 151 return -EINVAL; 152 153 if (usin->sin6_family != AF_INET6) 154 return -EAFNOSUPPORT; 155 156 fl6 = &inet_sk(sk)->cork.fl.u.ip6; 157 memset(fl6, 0, sizeof(*fl6)); 158 159 if (inet6_test_bit(SNDFLOW, sk)) { 160 fl6->flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; 161 IP6_ECN_flow_init(fl6->flowlabel); 162 if (fl6->flowlabel & IPV6_FLOWLABEL_MASK) { 163 struct ip6_flowlabel *flowlabel; 164 flowlabel = fl6_sock_lookup(sk, fl6->flowlabel); 165 if (IS_ERR(flowlabel)) 166 return -EINVAL; 167 fl6_sock_release(flowlabel); 168 } 169 } 170 171 /* 172 * connect() to INADDR_ANY means loopback (BSD'ism). 173 */ 174 175 if (ipv6_addr_any(&usin->sin6_addr)) { 176 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) 177 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), 178 &usin->sin6_addr); 179 else 180 usin->sin6_addr = in6addr_loopback; 181 } 182 183 addr_type = ipv6_addr_type(&usin->sin6_addr); 184 185 if (addr_type & IPV6_ADDR_MULTICAST) 186 return -ENETUNREACH; 187 188 if (addr_type&IPV6_ADDR_LINKLOCAL) { 189 if (addr_len >= sizeof(struct sockaddr_in6) && 190 usin->sin6_scope_id) { 191 /* If interface is set while binding, indices 192 * must coincide. 193 */ 194 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) 195 return -EINVAL; 196 197 sk->sk_bound_dev_if = usin->sin6_scope_id; 198 } 199 200 /* Connect to link-local address requires an interface */ 201 if (!sk->sk_bound_dev_if) 202 return -EINVAL; 203 } 204 205 if (tp->rx_opt.ts_recent_stamp && 206 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { 207 tp->rx_opt.ts_recent = 0; 208 tp->rx_opt.ts_recent_stamp = 0; 209 WRITE_ONCE(tp->write_seq, 0); 210 } 211 212 sk->sk_v6_daddr = usin->sin6_addr; 213 np->flow_label = fl6->flowlabel; 214 215 /* 216 * TCP over IPv4 217 */ 218 219 if (addr_type & IPV6_ADDR_MAPPED) { 220 u32 exthdrlen = icsk->icsk_ext_hdr_len; 221 struct sockaddr_in sin; 222 223 if (ipv6_only_sock(sk)) 224 return -ENETUNREACH; 225 226 sin.sin_family = AF_INET; 227 sin.sin_port = usin->sin6_port; 228 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; 229 230 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ 231 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped); 232 if (sk_is_mptcp(sk)) 233 mptcpv6_handle_mapped(sk, true); 234 sk->sk_backlog_rcv = tcp_v4_do_rcv; 235 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 236 tp->af_specific = &tcp_sock_ipv6_mapped_specific; 237 #endif 238 239 err = tcp_v4_connect(sk, (struct sockaddr_unsized *)&sin, sizeof(sin)); 240 241 if (err) { 242 icsk->icsk_ext_hdr_len = exthdrlen; 243 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ 244 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific); 245 if (sk_is_mptcp(sk)) 246 mptcpv6_handle_mapped(sk, false); 247 sk->sk_backlog_rcv = tcp_v6_do_rcv; 248 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 249 tp->af_specific = &tcp_sock_ipv6_specific; 250 #endif 251 goto failure; 252 } 253 np->saddr = sk->sk_v6_rcv_saddr; 254 255 return err; 256 } 257 258 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 259 saddr = &sk->sk_v6_rcv_saddr; 260 261 sk_set_txhash(sk); 262 263 fl6->flowi6_proto = IPPROTO_TCP; 264 fl6->daddr = sk->sk_v6_daddr; 265 fl6->saddr = saddr ? *saddr : np->saddr; 266 fl6->flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label); 267 fl6->flowi6_oif = sk->sk_bound_dev_if; 268 fl6->flowi6_mark = sk->sk_mark; 269 fl6->fl6_dport = usin->sin6_port; 270 fl6->fl6_sport = inet->inet_sport; 271 if (IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) && !fl6->fl6_sport) 272 fl6->flowi6_flags = FLOWI_FLAG_ANY_SPORT; 273 fl6->flowi6_uid = sk_uid(sk); 274 275 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); 276 final_p = fl6_update_dst(fl6, opt, &np->final); 277 278 security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); 279 280 /* Non-zero mp_hash bypasses rt6_multipath_hash() in 281 * fib6_select_path(), letting txhash control ECMP path 282 * selection so that sk_rethink_txhash() rehashes onto a 283 * different path. Policies 1-3 derive a deterministic 284 * hash from the flow keys and must not be overridden. 285 */ 286 ip6_ecmp_set_mp_hash(net, fl6, sk->sk_txhash); 287 288 dst = ip6_dst_lookup_flow(net, sk, fl6, final_p); 289 if (IS_ERR(dst)) { 290 err = PTR_ERR(dst); 291 goto failure; 292 } 293 294 tp->tcp_usec_ts = dst_tcp_usec_ts(dst); 295 tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; 296 297 if (!saddr) { 298 saddr = &fl6->saddr; 299 300 err = inet_bhash2_update_saddr(sk, saddr, AF_INET6); 301 if (err) { 302 dst_release(dst); 303 goto failure; 304 } 305 } 306 307 /* set the source address */ 308 np->saddr = *saddr; 309 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 310 311 sk->sk_gso_type = SKB_GSO_TCPV6; 312 ip6_dst_store(sk, dst, false, false); 313 314 icsk->icsk_ext_hdr_len = psp_sk_overhead(sk); 315 if (opt) 316 icsk->icsk_ext_hdr_len += opt->opt_flen + 317 opt->opt_nflen; 318 319 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); 320 321 inet->inet_dport = usin->sin6_port; 322 323 tcp_set_state(sk, TCP_SYN_SENT); 324 err = inet6_hash_connect(tcp_death_row, sk); 325 if (err) 326 goto late_failure; 327 328 if (likely(!tp->repair)) { 329 union tcp_seq_and_ts_off st; 330 331 st = secure_tcpv6_seq_and_ts_off(net, 332 np->saddr.s6_addr32, 333 sk->sk_v6_daddr.s6_addr32, 334 inet->inet_sport, 335 inet->inet_dport); 336 if (!tp->write_seq) 337 WRITE_ONCE(tp->write_seq, st.seq); 338 WRITE_ONCE(tp->tsoffset, st.ts_off); 339 } 340 341 if (tcp_fastopen_defer_connect(sk, &err)) 342 return err; 343 if (err) 344 goto late_failure; 345 346 err = tcp_connect(sk); 347 if (err) 348 goto late_failure; 349 350 return 0; 351 352 late_failure: 353 tcp_set_state(sk, TCP_CLOSE); 354 inet_bhash2_reset_saddr(sk); 355 failure: 356 inet->inet_dport = 0; 357 sk->sk_route_caps = 0; 358 return err; 359 } 360 361 static struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu) 362 { 363 struct flowi6 *fl6 = &inet_sk(sk)->cork.fl.u.ip6; 364 struct dst_entry *dst; 365 366 dst = inet6_csk_route_socket(sk, fl6); 367 368 if (IS_ERR(dst)) 369 return NULL; 370 dst->ops->update_pmtu(dst, sk, NULL, mtu, true); 371 372 dst = inet6_csk_route_socket(sk, fl6); 373 return IS_ERR(dst) ? NULL : dst; 374 } 375 376 static void tcp_v6_mtu_reduced(struct sock *sk) 377 { 378 struct dst_entry *dst; 379 u32 mtu, dmtu; 380 381 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) 382 return; 383 384 mtu = READ_ONCE(tcp_sk(sk)->mtu_info); 385 386 /* Drop requests trying to increase our current mss. 387 * Check done in __ip6_rt_update_pmtu() is too late. 388 */ 389 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache) 390 return; 391 392 dst = inet6_csk_update_pmtu(sk, mtu); 393 if (!dst) 394 return; 395 396 dmtu = dst6_mtu(dst); 397 if (inet_csk(sk)->icsk_pmtu_cookie > dmtu) { 398 tcp_sync_mss(sk, dmtu); 399 tcp_simple_retransmit(sk); 400 } 401 } 402 403 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 404 u8 type, u8 code, int offset, __be32 info) 405 { 406 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; 407 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); 408 struct net *net = dev_net_rcu(skb->dev); 409 struct request_sock *fastopen; 410 struct ipv6_pinfo *np; 411 struct tcp_sock *tp; 412 __u32 seq, snd_una; 413 struct sock *sk; 414 bool fatal; 415 int err; 416 417 sk = __inet6_lookup_established(net, &hdr->daddr, th->dest, 418 &hdr->saddr, ntohs(th->source), 419 skb->dev->ifindex, inet6_sdif(skb)); 420 421 if (!sk) { 422 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), 423 ICMP6_MIB_INERRORS); 424 return -ENOENT; 425 } 426 427 if (sk->sk_state == TCP_TIME_WAIT) { 428 /* To increase the counter of ignored icmps for TCP-AO */ 429 tcp_ao_ignore_icmp(sk, AF_INET6, type, code); 430 inet_twsk_put(inet_twsk(sk)); 431 return 0; 432 } 433 seq = ntohl(th->seq); 434 fatal = icmpv6_err_convert(type, code, &err); 435 if (sk->sk_state == TCP_NEW_SYN_RECV) { 436 tcp_req_err(sk, seq, fatal); 437 return 0; 438 } 439 440 if (tcp_ao_ignore_icmp(sk, AF_INET6, type, code)) { 441 sock_put(sk); 442 return 0; 443 } 444 445 bh_lock_sock(sk); 446 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) 447 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); 448 449 if (sk->sk_state == TCP_CLOSE) 450 goto out; 451 452 if (static_branch_unlikely(&ip6_min_hopcount)) { 453 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 454 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { 455 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 456 goto out; 457 } 458 } 459 460 tp = tcp_sk(sk); 461 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ 462 fastopen = rcu_dereference(tp->fastopen_rsk); 463 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 464 if (sk->sk_state != TCP_LISTEN && 465 !between(seq, snd_una, tp->snd_nxt)) { 466 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); 467 goto out; 468 } 469 470 np = tcp_inet6_sk(sk); 471 472 if (type == NDISC_REDIRECT) { 473 if (!sock_owned_by_user(sk)) { 474 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); 475 476 if (dst) 477 dst->ops->redirect(dst, sk, skb); 478 } 479 goto out; 480 } 481 482 if (type == ICMPV6_PKT_TOOBIG) { 483 u32 mtu = ntohl(info); 484 485 /* We are not interested in TCP_LISTEN and open_requests 486 * (SYN-ACKs send out by Linux are always <576bytes so 487 * they should go through unfragmented). 488 */ 489 if (sk->sk_state == TCP_LISTEN) 490 goto out; 491 492 if (!ip6_sk_accept_pmtu(sk)) 493 goto out; 494 495 if (mtu < IPV6_MIN_MTU) 496 goto out; 497 498 WRITE_ONCE(tp->mtu_info, mtu); 499 500 if (!sock_owned_by_user(sk)) 501 tcp_v6_mtu_reduced(sk); 502 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, 503 &sk->sk_tsq_flags)) 504 sock_hold(sk); 505 goto out; 506 } 507 508 509 /* Might be for an request_sock */ 510 switch (sk->sk_state) { 511 case TCP_SYN_SENT: 512 case TCP_SYN_RECV: 513 /* Only in fast or simultaneous open. If a fast open socket is 514 * already accepted it is treated as a connected one below. 515 */ 516 if (fastopen && !fastopen->sk) 517 break; 518 519 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); 520 521 if (!sock_owned_by_user(sk)) 522 tcp_done_with_error(sk, err); 523 else 524 WRITE_ONCE(sk->sk_err_soft, err); 525 goto out; 526 case TCP_LISTEN: 527 break; 528 default: 529 /* check if this ICMP message allows revert of backoff. 530 * (see RFC 6069) 531 */ 532 if (!fastopen && type == ICMPV6_DEST_UNREACH && 533 code == ICMPV6_NOROUTE) 534 tcp_ld_RTO_revert(sk, seq); 535 } 536 537 if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { 538 WRITE_ONCE(sk->sk_err, err); 539 sk_error_report(sk); 540 } else { 541 WRITE_ONCE(sk->sk_err_soft, err); 542 } 543 out: 544 bh_unlock_sock(sk); 545 sock_put(sk); 546 return 0; 547 } 548 549 550 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, 551 struct flowi *fl, 552 struct request_sock *req, 553 struct tcp_fastopen_cookie *foc, 554 enum tcp_synack_type synack_type, 555 struct sk_buff *syn_skb) 556 { 557 struct inet_request_sock *ireq = inet_rsk(req); 558 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 559 struct ipv6_txoptions *opt; 560 struct flowi6 *fl6 = &fl->u.ip6; 561 struct sk_buff *skb; 562 int err = -ENOMEM; 563 u8 tclass; 564 565 /* First, grab a route. */ 566 if (!dst && (dst = inet6_csk_route_req(sk, NULL, fl6, req, 567 IPPROTO_TCP)) == NULL) 568 goto done; 569 570 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); 571 572 if (skb) { 573 tcp_rsk(req)->syn_ect_snt = np->tclass & INET_ECN_MASK; 574 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, 575 &ireq->ir_v6_rmt_addr); 576 577 fl6->daddr = ireq->ir_v6_rmt_addr; 578 if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts) 579 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); 580 581 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? 582 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | 583 (np->tclass & INET_ECN_MASK) : 584 np->tclass; 585 586 if (!INET_ECN_is_capable(tclass) && 587 tcp_bpf_ca_needs_ecn((struct sock *)req)) 588 tclass |= INET_ECN_ECT_0; 589 590 rcu_read_lock(); 591 opt = ireq->ipv6_opt; 592 if (!opt) 593 opt = rcu_dereference(np->opt); 594 err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark), 595 opt, tclass, READ_ONCE(sk->sk_priority)); 596 rcu_read_unlock(); 597 err = net_xmit_eval(err); 598 } 599 600 done: 601 return err; 602 } 603 604 605 static void tcp_v6_reqsk_destructor(struct request_sock *req) 606 { 607 kfree(inet_rsk(req)->ipv6_opt); 608 consume_skb(inet_rsk(req)->pktopts); 609 } 610 611 #ifdef CONFIG_TCP_MD5SIG 612 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 613 const struct in6_addr *addr, 614 int l3index) 615 { 616 return tcp_md5_do_lookup(sk, l3index, 617 (union tcp_md5_addr *)addr, AF_INET6); 618 } 619 620 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk, 621 const struct sock *addr_sk) 622 { 623 int l3index; 624 625 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), 626 addr_sk->sk_bound_dev_if); 627 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr, 628 l3index); 629 } 630 631 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, 632 sockptr_t optval, int optlen) 633 { 634 struct tcp_md5sig cmd; 635 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; 636 union tcp_ao_addr *addr; 637 int l3index = 0; 638 u8 prefixlen; 639 bool l3flag; 640 u8 flags; 641 642 if (optlen < sizeof(cmd)) 643 return -EINVAL; 644 645 if (copy_from_sockptr(&cmd, optval, sizeof(cmd))) 646 return -EFAULT; 647 648 if (sin6->sin6_family != AF_INET6) 649 return -EINVAL; 650 651 flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 652 l3flag = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 653 654 if (optname == TCP_MD5SIG_EXT && 655 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { 656 prefixlen = cmd.tcpm_prefixlen; 657 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) && 658 prefixlen > 32)) 659 return -EINVAL; 660 } else { 661 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128; 662 } 663 664 if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex && 665 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { 666 struct net_device *dev; 667 668 rcu_read_lock(); 669 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex); 670 if (dev && netif_is_l3_master(dev)) 671 l3index = dev->ifindex; 672 rcu_read_unlock(); 673 674 /* ok to reference set/not set outside of rcu; 675 * right now device MUST be an L3 master 676 */ 677 if (!dev || !l3index) 678 return -EINVAL; 679 } 680 681 if (!cmd.tcpm_keylen) { 682 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 683 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 684 AF_INET, prefixlen, 685 l3index, flags); 686 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 687 AF_INET6, prefixlen, l3index, flags); 688 } 689 690 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 691 return -EINVAL; 692 693 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) { 694 addr = (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3]; 695 696 /* Don't allow keys for peers that have a matching TCP-AO key. 697 * See the comment in tcp_ao_add_cmd() 698 */ 699 if (tcp_ao_required(sk, addr, AF_INET, 700 l3flag ? l3index : -1, false)) 701 return -EKEYREJECTED; 702 return tcp_md5_do_add(sk, addr, 703 AF_INET, prefixlen, l3index, flags, 704 cmd.tcpm_key, cmd.tcpm_keylen); 705 } 706 707 addr = (union tcp_md5_addr *)&sin6->sin6_addr; 708 709 /* Don't allow keys for peers that have a matching TCP-AO key. 710 * See the comment in tcp_ao_add_cmd() 711 */ 712 if (tcp_ao_required(sk, addr, AF_INET6, l3flag ? l3index : -1, false)) 713 return -EKEYREJECTED; 714 715 return tcp_md5_do_add(sk, addr, AF_INET6, prefixlen, l3index, flags, 716 cmd.tcpm_key, cmd.tcpm_keylen); 717 } 718 719 static void tcp_v6_md5_hash_headers(struct md5_ctx *ctx, 720 const struct in6_addr *daddr, 721 const struct in6_addr *saddr, 722 const struct tcphdr *th, int nbytes) 723 { 724 struct { 725 struct tcp6_pseudohdr ip; /* TCP pseudo-header (RFC2460) */ 726 struct tcphdr tcp; 727 } h; 728 729 h.ip.saddr = *saddr; 730 h.ip.daddr = *daddr; 731 h.ip.protocol = cpu_to_be32(IPPROTO_TCP); 732 h.ip.len = cpu_to_be32(nbytes); 733 h.tcp = *th; 734 h.tcp.check = 0; 735 md5_update(ctx, (const u8 *)&h, sizeof(h.ip) + sizeof(h.tcp)); 736 } 737 738 static noinline_for_stack void 739 tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 740 const struct in6_addr *daddr, struct in6_addr *saddr, 741 const struct tcphdr *th) 742 { 743 struct md5_ctx ctx; 744 745 md5_init(&ctx); 746 tcp_v6_md5_hash_headers(&ctx, daddr, saddr, th, th->doff << 2); 747 tcp_md5_hash_key(&ctx, key); 748 md5_final(&ctx, md5_hash); 749 } 750 751 static noinline_for_stack void 752 tcp_v6_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, 753 const struct sock *sk, const struct sk_buff *skb) 754 { 755 const struct tcphdr *th = tcp_hdr(skb); 756 const struct in6_addr *saddr, *daddr; 757 struct md5_ctx ctx; 758 759 if (sk) { /* valid for establish/request sockets */ 760 saddr = &sk->sk_v6_rcv_saddr; 761 daddr = &sk->sk_v6_daddr; 762 } else { 763 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 764 saddr = &ip6h->saddr; 765 daddr = &ip6h->daddr; 766 } 767 768 md5_init(&ctx); 769 tcp_v6_md5_hash_headers(&ctx, daddr, saddr, th, skb->len); 770 tcp_md5_hash_skb_data(&ctx, skb, th->doff << 2); 771 tcp_md5_hash_key(&ctx, key); 772 md5_final(&ctx, md5_hash); 773 } 774 #endif 775 776 static void tcp_v6_init_req(struct request_sock *req, 777 const struct sock *sk_listener, 778 struct sk_buff *skb, 779 u32 tw_isn) 780 { 781 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); 782 struct inet_request_sock *ireq = inet_rsk(req); 783 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener); 784 785 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; 786 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; 787 ireq->ir_rmt_addr = LOOPBACK4_IPV6; 788 ireq->ir_loc_addr = LOOPBACK4_IPV6; 789 790 /* So that link locals have meaning */ 791 if ((!sk_listener->sk_bound_dev_if || l3_slave) && 792 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) 793 ireq->ir_iif = tcp_v6_iif(skb); 794 795 if (!tw_isn && 796 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || 797 np->rxopt.bits.rxinfo || 798 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || 799 np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) { 800 refcount_inc(&skb->users); 801 ireq->pktopts = skb; 802 } 803 } 804 805 static struct dst_entry *tcp_v6_route_req(const struct sock *sk, 806 struct sk_buff *skb, 807 struct flowi *fl, 808 struct request_sock *req, 809 u32 tw_isn) 810 { 811 tcp_v6_init_req(req, sk, skb, tw_isn); 812 813 if (security_inet_conn_request(sk, skb, req)) 814 return NULL; 815 816 return inet6_csk_route_req(sk, NULL, &fl->u.ip6, req, IPPROTO_TCP); 817 } 818 819 struct request_sock_ops tcp6_request_sock_ops __read_mostly = { 820 .family = AF_INET6, 821 .obj_size = sizeof(struct tcp6_request_sock), 822 .send_ack = tcp_v6_reqsk_send_ack, 823 .destructor = tcp_v6_reqsk_destructor, 824 .send_reset = tcp_v6_send_reset, 825 }; 826 827 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { 828 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - 829 sizeof(struct ipv6hdr), 830 #ifdef CONFIG_TCP_MD5SIG 831 .req_md5_lookup = tcp_v6_md5_lookup, 832 .calc_md5_hash = tcp_v6_md5_hash_skb, 833 #endif 834 #ifdef CONFIG_TCP_AO 835 .ao_lookup = tcp_v6_ao_lookup_rsk, 836 .ao_calc_key = tcp_v6_ao_calc_key_rsk, 837 .ao_synack_hash = tcp_v6_ao_synack_hash, 838 #endif 839 #ifdef CONFIG_SYN_COOKIES 840 .cookie_init_seq = cookie_v6_init_sequence, 841 #endif 842 .route_req = tcp_v6_route_req, 843 .init_seq_and_ts_off = tcp_v6_init_seq_and_ts_off, 844 .send_synack = tcp_v6_send_synack, 845 }; 846 847 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, 848 u32 ack, u32 win, u32 tsval, u32 tsecr, 849 int oif, int rst, u8 tclass, __be32 label, 850 u32 priority, u32 txhash, struct tcp_key *key) 851 { 852 struct net *net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb); 853 unsigned int tot_len = sizeof(struct tcphdr); 854 struct sock *ctl_sk = net->ipv6.tcp_sk; 855 const struct tcphdr *th = tcp_hdr(skb); 856 __be32 mrst = 0, *topt; 857 struct dst_entry *dst; 858 struct sk_buff *buff; 859 struct tcphdr *t1; 860 struct flowi6 fl6; 861 u32 mark = 0; 862 863 if (tsecr) 864 tot_len += TCPOLEN_TSTAMP_ALIGNED; 865 if (tcp_key_is_md5(key)) 866 tot_len += TCPOLEN_MD5SIG_ALIGNED; 867 if (tcp_key_is_ao(key)) 868 tot_len += tcp_ao_len_aligned(key->ao_key); 869 870 #ifdef CONFIG_MPTCP 871 if (rst && !tcp_key_is_md5(key)) { 872 mrst = mptcp_reset_option(skb); 873 874 if (mrst) 875 tot_len += sizeof(__be32); 876 } 877 #endif 878 879 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); 880 if (!buff) 881 return; 882 883 skb_reserve(buff, MAX_TCP_HEADER); 884 885 t1 = skb_push(buff, tot_len); 886 skb_reset_transport_header(buff); 887 888 /* Swap the send and the receive. */ 889 memset(t1, 0, sizeof(*t1)); 890 t1->dest = th->source; 891 t1->source = th->dest; 892 t1->doff = tot_len / 4; 893 t1->seq = htonl(seq); 894 t1->ack_seq = htonl(ack); 895 t1->ack = !rst || !th->ack; 896 t1->rst = rst; 897 t1->window = htons(win); 898 899 topt = (__be32 *)(t1 + 1); 900 901 if (tsecr) { 902 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 903 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); 904 *topt++ = htonl(tsval); 905 *topt++ = htonl(tsecr); 906 } 907 908 if (mrst) 909 *topt++ = mrst; 910 911 #ifdef CONFIG_TCP_MD5SIG 912 if (tcp_key_is_md5(key)) { 913 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 914 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); 915 tcp_v6_md5_hash_hdr((__u8 *)topt, key->md5_key, 916 &ipv6_hdr(skb)->saddr, 917 &ipv6_hdr(skb)->daddr, t1); 918 } 919 #endif 920 #ifdef CONFIG_TCP_AO 921 if (tcp_key_is_ao(key)) { 922 *topt++ = htonl((TCPOPT_AO << 24) | 923 (tcp_ao_len(key->ao_key) << 16) | 924 (key->ao_key->sndid << 8) | 925 (key->rcv_next)); 926 927 tcp_ao_hash_hdr(AF_INET6, (char *)topt, key->ao_key, 928 key->traffic_key, 929 (union tcp_ao_addr *)&ipv6_hdr(skb)->saddr, 930 (union tcp_ao_addr *)&ipv6_hdr(skb)->daddr, 931 t1, key->sne); 932 } 933 #endif 934 935 memset(&fl6, 0, sizeof(fl6)); 936 fl6.daddr = ipv6_hdr(skb)->saddr; 937 fl6.saddr = ipv6_hdr(skb)->daddr; 938 fl6.flowlabel = label; 939 940 buff->ip_summed = CHECKSUM_PARTIAL; 941 942 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); 943 944 fl6.flowi6_proto = IPPROTO_TCP; 945 if (rt6_need_strict(&fl6.daddr) && !oif) 946 fl6.flowi6_oif = tcp_v6_iif(skb); 947 else { 948 if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) 949 oif = skb->skb_iif; 950 951 fl6.flowi6_oif = oif; 952 } 953 954 if (sk) { 955 /* unconstify the socket only to attach it to buff with care. */ 956 skb_set_owner_edemux(buff, (struct sock *)sk); 957 psp_reply_set_decrypted(sk, buff); 958 959 if (sk->sk_state == TCP_TIME_WAIT) 960 mark = inet_twsk(sk)->tw_mark; 961 else 962 mark = READ_ONCE(sk->sk_mark); 963 skb_set_delivery_time(buff, tcp_transmit_time(sk), SKB_CLOCK_MONOTONIC); 964 } 965 if (txhash) { 966 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */ 967 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4); 968 969 /* Select the local ECMP path from the connection's txhash, 970 * so a control packet (RST, or ACK from a time-wait socket) 971 * uses the same nexthop as the data. Only policy 0 uses 972 * mp_hash; policies 1-3 derive a deterministic hash. 973 */ 974 ip6_ecmp_set_mp_hash(net, &fl6, txhash); 975 } 976 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark; 977 fl6.fl6_dport = t1->dest; 978 fl6.fl6_sport = t1->source; 979 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); 980 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); 981 982 /* Pass a socket to ip6_dst_lookup either it is for RST 983 * Underlying function will use this to retrieve the network 984 * namespace 985 */ 986 if (sk && sk->sk_state != TCP_TIME_WAIT) 987 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/ 988 else 989 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL); 990 if (!IS_ERR(dst)) { 991 skb_dst_set(buff, dst); 992 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, 993 tclass, priority); 994 TCP_INC_STATS(net, TCP_MIB_OUTSEGS); 995 if (rst) 996 TCP_INC_STATS(net, TCP_MIB_OUTRSTS); 997 return; 998 } 999 1000 sk_skb_reason_drop(sk, buff, SKB_DROP_REASON_IP_OUTNOROUTES); 1001 } 1002 1003 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, 1004 enum sk_rst_reason reason) 1005 { 1006 const struct tcphdr *th = tcp_hdr(skb); 1007 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 1008 const __u8 *md5_hash_location = NULL; 1009 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1010 bool allocated_traffic_key = false; 1011 #endif 1012 const struct tcp_ao_hdr *aoh; 1013 struct tcp_key key = {}; 1014 u32 seq = 0, ack_seq = 0; 1015 __be32 label = 0; 1016 u32 priority = 0; 1017 struct net *net; 1018 u32 txhash = 0; 1019 int oif = 0; 1020 #ifdef CONFIG_TCP_MD5SIG 1021 unsigned char newhash[16]; 1022 struct sock *sk1 = NULL; 1023 #endif 1024 1025 if (th->rst) 1026 return; 1027 1028 /* If sk not NULL, it means we did a successful lookup and incoming 1029 * route had to be correct. prequeue might have dropped our dst. 1030 */ 1031 if (!sk && !ipv6_unicast_destination(skb)) 1032 return; 1033 1034 net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb); 1035 /* Invalid TCP option size or twice included auth */ 1036 if (tcp_parse_auth_options(th, &md5_hash_location, &aoh)) 1037 return; 1038 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1039 rcu_read_lock(); 1040 #endif 1041 #ifdef CONFIG_TCP_MD5SIG 1042 if (sk && sk_fullsock(sk)) { 1043 int l3index; 1044 1045 /* sdif set, means packet ingressed via a device 1046 * in an L3 domain and inet_iif is set to it. 1047 */ 1048 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1049 key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); 1050 if (key.md5_key) 1051 key.type = TCP_KEY_MD5; 1052 } else if (md5_hash_location) { 1053 int dif = tcp_v6_iif_l3_slave(skb); 1054 int sdif = tcp_v6_sdif(skb); 1055 int l3index; 1056 1057 /* 1058 * active side is lost. Try to find listening socket through 1059 * source port, and then find md5 key through listening socket. 1060 * we are not loose security here: 1061 * Incoming packet is checked with md5 hash with finding key, 1062 * no RST generated if md5 hash doesn't match. 1063 */ 1064 sk1 = inet6_lookup_listener(net, NULL, 0, &ipv6h->saddr, th->source, 1065 &ipv6h->daddr, ntohs(th->source), 1066 dif, sdif); 1067 if (!sk1) 1068 goto out; 1069 1070 /* sdif set, means packet ingressed via a device 1071 * in an L3 domain and dif is set to it. 1072 */ 1073 l3index = tcp_v6_sdif(skb) ? dif : 0; 1074 1075 key.md5_key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); 1076 if (!key.md5_key) 1077 goto out; 1078 key.type = TCP_KEY_MD5; 1079 1080 tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb); 1081 if (crypto_memneq(md5_hash_location, newhash, 16)) 1082 goto out; 1083 } 1084 #endif 1085 1086 if (th->ack) 1087 seq = ntohl(th->ack_seq); 1088 else 1089 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - 1090 (th->doff << 2); 1091 1092 #ifdef CONFIG_TCP_AO 1093 if (aoh) { 1094 int l3index; 1095 1096 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1097 if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, seq, 1098 &key.ao_key, &key.traffic_key, 1099 &allocated_traffic_key, 1100 &key.rcv_next, &key.sne)) 1101 goto out; 1102 key.type = TCP_KEY_AO; 1103 } 1104 #endif 1105 1106 if (sk) { 1107 oif = sk->sk_bound_dev_if; 1108 if (sk_fullsock(sk)) { 1109 if (inet6_test_bit(REPFLOW, sk)) 1110 label = ip6_flowlabel(ipv6h); 1111 priority = READ_ONCE(sk->sk_priority); 1112 txhash = sk->sk_txhash; 1113 } 1114 if (sk->sk_state == TCP_TIME_WAIT) { 1115 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel); 1116 priority = inet_twsk(sk)->tw_priority; 1117 txhash = inet_twsk(sk)->tw_txhash; 1118 } 1119 } else { 1120 if (READ_ONCE(net->ipv6.sysctl.flowlabel_reflect) & 1121 FLOWLABEL_REFLECT_TCP_RESET) 1122 label = ip6_flowlabel(ipv6h); 1123 } 1124 1125 trace_tcp_send_reset(sk, skb, reason); 1126 1127 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1, 1128 ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK, 1129 label, priority, txhash, 1130 &key); 1131 1132 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1133 out: 1134 if (allocated_traffic_key) 1135 kfree(key.traffic_key); 1136 rcu_read_unlock(); 1137 #endif 1138 } 1139 1140 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, 1141 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, 1142 struct tcp_key *key, u8 tclass, 1143 __be32 label, u32 priority, u32 txhash) 1144 { 1145 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, 0, 1146 tclass, label, priority, txhash, key); 1147 } 1148 1149 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb, 1150 enum tcp_tw_status tw_status) 1151 { 1152 struct inet_timewait_sock *tw = inet_twsk(sk); 1153 struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 1154 u8 tclass = tw->tw_tclass; 1155 struct tcp_key key = {}; 1156 1157 if (tw_status == TCP_TW_ACK_OOW) 1158 tclass &= ~INET_ECN_MASK; 1159 #ifdef CONFIG_TCP_AO 1160 struct tcp_ao_info *ao_info; 1161 1162 if (static_branch_unlikely(&tcp_ao_needed.key)) { 1163 1164 /* FIXME: the segment to-be-acked is not verified yet */ 1165 ao_info = rcu_dereference(tcptw->ao_info); 1166 if (ao_info) { 1167 const struct tcp_ao_hdr *aoh; 1168 1169 /* Invalid TCP option size or twice included auth */ 1170 if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) 1171 goto out; 1172 if (aoh) 1173 key.ao_key = tcp_ao_established_key(sk, ao_info, 1174 aoh->rnext_keyid, -1); 1175 } 1176 } 1177 if (key.ao_key) { 1178 struct tcp_ao_key *rnext_key; 1179 1180 key.traffic_key = snd_other_key(key.ao_key); 1181 /* rcv_next switches to our rcv_next */ 1182 rnext_key = READ_ONCE(ao_info->rnext_key); 1183 key.rcv_next = rnext_key->rcvid; 1184 key.sne = READ_ONCE(ao_info->snd_sne); 1185 key.type = TCP_KEY_AO; 1186 #else 1187 if (0) { 1188 #endif 1189 #ifdef CONFIG_TCP_MD5SIG 1190 } else if (static_branch_unlikely(&tcp_md5_needed.key)) { 1191 key.md5_key = tcp_twsk_md5_key(tcptw); 1192 if (key.md5_key) 1193 key.type = TCP_KEY_MD5; 1194 #endif 1195 } 1196 1197 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, 1198 READ_ONCE(tcptw->tw_rcv_nxt), 1199 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 1200 tcp_tw_tsval(tcptw), 1201 READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if, 1202 &key, tclass, cpu_to_be32(tw->tw_flowlabel), 1203 tw->tw_priority, tw->tw_txhash); 1204 1205 #ifdef CONFIG_TCP_AO 1206 out: 1207 #endif 1208 inet_twsk_put(tw); 1209 } 1210 1211 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 1212 struct request_sock *req) 1213 { 1214 struct tcp_key key = {}; 1215 1216 #ifdef CONFIG_TCP_AO 1217 if (static_branch_unlikely(&tcp_ao_needed.key) && 1218 tcp_rsk_used_ao(req)) { 1219 const struct in6_addr *addr = &ipv6_hdr(skb)->saddr; 1220 const struct tcp_ao_hdr *aoh; 1221 int l3index; 1222 1223 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1224 /* Invalid TCP option size or twice included auth */ 1225 if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) 1226 return; 1227 if (!aoh) 1228 return; 1229 key.ao_key = tcp_ao_do_lookup(sk, l3index, 1230 (union tcp_ao_addr *)addr, 1231 AF_INET6, aoh->rnext_keyid, -1); 1232 if (unlikely(!key.ao_key)) { 1233 /* Send ACK with any matching MKT for the peer */ 1234 key.ao_key = tcp_ao_do_lookup(sk, l3index, 1235 (union tcp_ao_addr *)addr, 1236 AF_INET6, -1, -1); 1237 /* Matching key disappeared (user removed the key?) 1238 * let the handshake timeout. 1239 */ 1240 if (!key.ao_key) { 1241 net_info_ratelimited("TCP-AO key for (%pI6, %d)->(%pI6, %d) suddenly disappeared, won't ACK new connection\n", 1242 addr, 1243 ntohs(tcp_hdr(skb)->source), 1244 &ipv6_hdr(skb)->daddr, 1245 ntohs(tcp_hdr(skb)->dest)); 1246 return; 1247 } 1248 } 1249 key.traffic_key = kmalloc(tcp_ao_digest_size(key.ao_key), GFP_ATOMIC); 1250 if (!key.traffic_key) 1251 return; 1252 1253 key.type = TCP_KEY_AO; 1254 key.rcv_next = aoh->keyid; 1255 tcp_v6_ao_calc_key_rsk(key.ao_key, key.traffic_key, req); 1256 #else 1257 if (0) { 1258 #endif 1259 #ifdef CONFIG_TCP_MD5SIG 1260 } else if (static_branch_unlikely(&tcp_md5_needed.key)) { 1261 int l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1262 1263 key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, 1264 l3index); 1265 if (key.md5_key) 1266 key.type = TCP_KEY_MD5; 1267 #endif 1268 } 1269 1270 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 1271 * sk->sk_state == TCP_SYN_RECV -> for Fast Open. 1272 */ 1273 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? 1274 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 1275 tcp_rsk(req)->rcv_nxt, 1276 tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale, 1277 tcp_rsk_tsval(tcp_rsk(req)), 1278 req->ts_recent, sk->sk_bound_dev_if, 1279 &key, ipv6_get_dsfield(ipv6_hdr(skb)) & ~INET_ECN_MASK, 1280 0, 1281 READ_ONCE(sk->sk_priority), 1282 READ_ONCE(tcp_rsk(req)->txhash)); 1283 if (tcp_key_is_ao(&key)) 1284 kfree(key.traffic_key); 1285 } 1286 1287 1288 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) 1289 { 1290 #ifdef CONFIG_SYN_COOKIES 1291 const struct tcphdr *th = tcp_hdr(skb); 1292 1293 if (!th->syn) 1294 sk = cookie_v6_check(sk, skb); 1295 #endif 1296 return sk; 1297 } 1298 1299 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, 1300 struct tcphdr *th, u32 *cookie) 1301 { 1302 u16 mss = 0; 1303 #ifdef CONFIG_SYN_COOKIES 1304 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops, 1305 &tcp_request_sock_ipv6_ops, sk, th); 1306 if (mss) { 1307 *cookie = __cookie_v6_init_sequence(iph, th, &mss); 1308 tcp_synq_overflow(sk); 1309 } 1310 #endif 1311 return mss; 1312 } 1313 1314 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 1315 { 1316 if (skb->protocol == htons(ETH_P_IP)) 1317 return tcp_v4_conn_request(sk, skb); 1318 1319 if (!ipv6_unicast_destination(skb)) 1320 goto drop; 1321 1322 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) { 1323 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS); 1324 return 0; 1325 } 1326 1327 return tcp_conn_request(&tcp6_request_sock_ops, 1328 &tcp_request_sock_ipv6_ops, sk, skb); 1329 1330 drop: 1331 tcp_listendrop(sk); 1332 return 0; /* don't send reset */ 1333 } 1334 1335 static void tcp_v6_restore_cb(struct sk_buff *skb) 1336 { 1337 /* We need to move header back to the beginning if xfrm6_policy_check() 1338 * and tcp_v6_fill_cb() are going to be called again. 1339 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. 1340 */ 1341 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, 1342 sizeof(struct inet6_skb_parm)); 1343 } 1344 1345 /* Called from tcp_v4_syn_recv_sock() for v6_mapped children. */ 1346 static void tcp_v6_mapped_child_init(struct sock *newsk, const struct sock *sk) 1347 { 1348 struct inet_sock *newinet = inet_sk(newsk); 1349 struct ipv6_pinfo *newnp; 1350 1351 newinet->pinet6 = newnp = tcp_inet6_sk(newsk); 1352 newinet->ipv6_fl_list = NULL; 1353 1354 memcpy(newnp, tcp_inet6_sk(sk), sizeof(struct ipv6_pinfo)); 1355 1356 newnp->saddr = newsk->sk_v6_rcv_saddr; 1357 1358 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; 1359 if (sk_is_mptcp(newsk)) 1360 mptcpv6_handle_mapped(newsk, true); 1361 newsk->sk_backlog_rcv = tcp_v4_do_rcv; 1362 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 1363 tcp_sk(newsk)->af_specific = &tcp_sock_ipv6_mapped_specific; 1364 #endif 1365 1366 newnp->ipv6_mc_list = NULL; 1367 newnp->ipv6_ac_list = NULL; 1368 newnp->pktoptions = NULL; 1369 newnp->opt = NULL; 1370 1371 /* tcp_v4_syn_recv_sock() has initialized newinet->mc_{index,ttl} */ 1372 newnp->mcast_oif = newinet->mc_index; 1373 newnp->mcast_hops = newinet->mc_ttl; 1374 1375 newnp->rcv_flowinfo = 0; 1376 if (inet6_test_bit(REPFLOW, sk)) 1377 newnp->flow_label = 0; 1378 } 1379 1380 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, 1381 struct request_sock *req, 1382 struct dst_entry *dst, 1383 struct request_sock *req_unhash, 1384 bool *own_req, 1385 void (*opt_child_init)(struct sock *newsk, 1386 const struct sock *sk)) 1387 { 1388 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1389 struct inet_request_sock *ireq; 1390 struct ipv6_txoptions *opt; 1391 struct inet_sock *newinet; 1392 bool found_dup_sk = false; 1393 struct ipv6_pinfo *newnp; 1394 struct tcp_sock *newtp; 1395 struct sock *newsk; 1396 #ifdef CONFIG_TCP_MD5SIG 1397 struct tcp_md5sig_key *key; 1398 int l3index; 1399 #endif 1400 struct flowi6 fl6; 1401 1402 if (skb->protocol == htons(ETH_P_IP)) 1403 return tcp_v4_syn_recv_sock(sk, skb, req, dst, 1404 req_unhash, own_req, 1405 tcp_v6_mapped_child_init); 1406 ireq = inet_rsk(req); 1407 1408 if (sk_acceptq_is_full(sk)) 1409 goto exit_overflow; 1410 1411 dst = inet6_csk_route_req(sk, dst, &fl6, req, IPPROTO_TCP); 1412 if (!dst) 1413 goto exit; 1414 1415 newsk = tcp_create_openreq_child(sk, req, skb); 1416 if (!newsk) 1417 goto exit_nonewsk; 1418 1419 /* 1420 * No need to charge this sock to the relevant IPv6 refcnt debug socks 1421 * count here, tcp_create_openreq_child now does this for us, see the 1422 * comment in that function for the gory details. -acme 1423 */ 1424 1425 newsk->sk_gso_type = SKB_GSO_TCPV6; 1426 inet6_sk_rx_dst_set(newsk, skb); 1427 1428 newinet = inet_sk(newsk); 1429 newinet->cork.fl.u.ip6 = fl6; 1430 newinet->pinet6 = tcp_inet6_sk(newsk); 1431 newinet->ipv6_fl_list = NULL; 1432 newinet->inet_opt = NULL; 1433 1434 newtp = tcp_sk(newsk); 1435 newnp = tcp_inet6_sk(newsk); 1436 1437 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1438 1439 ip6_dst_store(newsk, dst, false, false); 1440 1441 newnp->saddr = ireq->ir_v6_loc_addr; 1442 1443 /* Now IPv6 options... 1444 1445 First: no IPv4 options. 1446 */ 1447 newnp->ipv6_mc_list = NULL; 1448 newnp->ipv6_ac_list = NULL; 1449 1450 /* Clone RX bits */ 1451 newnp->rxopt.all = np->rxopt.all; 1452 1453 newnp->pktoptions = NULL; 1454 newnp->opt = NULL; 1455 newnp->mcast_oif = tcp_v6_iif(skb); 1456 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; 1457 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); 1458 if (inet6_test_bit(REPFLOW, sk)) 1459 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); 1460 1461 /* Set ToS of the new socket based upon the value of incoming SYN. 1462 * ECT bits are set later in tcp_init_transfer(). 1463 */ 1464 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) 1465 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; 1466 1467 /* Clone native IPv6 options from listening socket (if any) 1468 1469 Yes, keeping reference count would be much more clever, 1470 but we make one more one thing there: reattach optmem 1471 to newsk. 1472 */ 1473 opt = ireq->ipv6_opt; 1474 if (!opt) 1475 opt = rcu_dereference(np->opt); 1476 if (opt) { 1477 opt = ipv6_dup_options(newsk, opt); 1478 RCU_INIT_POINTER(newnp->opt, opt); 1479 } 1480 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1481 if (opt) 1482 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + 1483 opt->opt_flen; 1484 1485 tcp_ca_openreq_child(newsk, dst); 1486 1487 tcp_sync_mss(newsk, dst6_mtu(dst)); 1488 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); 1489 1490 tcp_initialize_rcv_mss(newsk); 1491 1492 #ifdef CONFIG_TCP_MD5SIG 1493 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); 1494 1495 if (!tcp_rsk_used_ao(req)) { 1496 /* Copy over the MD5 key from the original socket */ 1497 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); 1498 if (key) { 1499 const union tcp_md5_addr *addr; 1500 1501 addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr; 1502 if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) 1503 goto put_and_exit; 1504 } 1505 } 1506 #endif 1507 #ifdef CONFIG_TCP_AO 1508 /* Copy over tcp_ao_info if any */ 1509 if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET6)) 1510 goto put_and_exit; /* OOM */ 1511 #endif 1512 1513 if (__inet_inherit_port(sk, newsk) < 0) 1514 goto put_and_exit; 1515 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), 1516 &found_dup_sk); 1517 if (*own_req) { 1518 tcp_move_syn(newtp, req); 1519 1520 /* Clone pktoptions received with SYN, if we own the req */ 1521 if (ireq->pktopts) { 1522 newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk); 1523 consume_skb(ireq->pktopts); 1524 ireq->pktopts = NULL; 1525 if (newnp->pktoptions) 1526 tcp_v6_restore_cb(newnp->pktoptions); 1527 } 1528 } else { 1529 if (!req_unhash && found_dup_sk) { 1530 /* This code path should only be executed in the 1531 * syncookie case only 1532 */ 1533 bh_unlock_sock(newsk); 1534 sock_put(newsk); 1535 newsk = NULL; 1536 } 1537 } 1538 1539 return newsk; 1540 1541 exit_overflow: 1542 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1543 exit_nonewsk: 1544 dst_release(dst); 1545 exit: 1546 tcp_listendrop(sk); 1547 return NULL; 1548 put_and_exit: 1549 inet_csk_prepare_forced_close(newsk); 1550 tcp_done(newsk); 1551 goto exit; 1552 } 1553 1554 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, 1555 u32)); 1556 /* The socket must have it's spinlock held when we get 1557 * here, unless it is a TCP_LISTEN socket. 1558 * 1559 * We have a potential double-lock case here, so even when 1560 * doing backlog processing we use the BH locking scheme. 1561 * This is because we cannot sleep with the original spinlock 1562 * held. 1563 */ 1564 INDIRECT_CALLABLE_SCOPE 1565 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) 1566 { 1567 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1568 struct sk_buff *opt_skb = NULL; 1569 enum skb_drop_reason reason; 1570 struct tcp_sock *tp; 1571 1572 /* Imagine: socket is IPv6. IPv4 packet arrives, 1573 goes to IPv4 receive handler and backlogged. 1574 From backlog it always goes here. Kerboom... 1575 Fortunately, tcp_rcv_established and rcv_established 1576 handle them correctly, but it is not case with 1577 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK 1578 */ 1579 1580 if (skb->protocol == htons(ETH_P_IP)) 1581 return tcp_v4_do_rcv(sk, skb); 1582 1583 reason = psp_sk_rx_policy_check(sk, skb); 1584 if (reason) 1585 goto err_discard; 1586 1587 /* 1588 * socket locking is here for SMP purposes as backlog rcv 1589 * is currently called with bh processing disabled. 1590 */ 1591 1592 /* Do Stevens' IPV6_PKTOPTIONS. 1593 1594 Yes, guys, it is the only place in our code, where we 1595 may make it not affecting IPv4. 1596 The rest of code is protocol independent, 1597 and I do not like idea to uglify IPv4. 1598 1599 Actually, all the idea behind IPV6_PKTOPTIONS 1600 looks not very well thought. For now we latch 1601 options, received in the last packet, enqueued 1602 by tcp. Feel free to propose better solution. 1603 --ANK (980728) 1604 */ 1605 if (np->rxopt.all && sk->sk_state != TCP_LISTEN) 1606 opt_skb = skb_clone_and_charge_r(skb, sk); 1607 1608 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1609 struct dst_entry *dst; 1610 1611 dst = rcu_dereference_protected(sk->sk_rx_dst, 1612 lockdep_sock_is_held(sk)); 1613 1614 sock_rps_save_rxhash(sk, skb); 1615 sk_mark_napi_id(sk, skb); 1616 if (dst && unlikely(dst != skb_dst(skb))) { 1617 if (sk->sk_rx_dst_ifindex != skb->skb_iif || 1618 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check, 1619 dst, sk->sk_rx_dst_cookie) == NULL) { 1620 RCU_INIT_POINTER(sk->sk_rx_dst, NULL); 1621 dst_release(dst); 1622 } 1623 } 1624 1625 tcp_rcv_established(sk, skb); 1626 if (opt_skb) 1627 goto ipv6_pktoptions; 1628 return 0; 1629 } 1630 1631 if (tcp_checksum_complete(skb)) 1632 goto csum_err; 1633 1634 if (sk->sk_state == TCP_LISTEN) { 1635 struct sock *nsk = tcp_v6_cookie_check(sk, skb); 1636 1637 if (!nsk) 1638 return 0; 1639 if (nsk != sk) { 1640 reason = tcp_child_process(sk, nsk, skb); 1641 sock_put(nsk); 1642 if (reason) 1643 goto reset; 1644 return 0; 1645 } 1646 } else 1647 sock_rps_save_rxhash(sk, skb); 1648 1649 reason = tcp_rcv_state_process(sk, skb); 1650 if (reason) 1651 goto reset; 1652 if (opt_skb) 1653 goto ipv6_pktoptions; 1654 return 0; 1655 1656 reset: 1657 tcp_v6_send_reset(sk, skb, sk_rst_convert_drop_reason(reason)); 1658 discard: 1659 if (opt_skb) 1660 __kfree_skb(opt_skb); 1661 sk_skb_reason_drop(sk, skb, reason); 1662 return 0; 1663 csum_err: 1664 reason = SKB_DROP_REASON_TCP_CSUM; 1665 trace_tcp_bad_csum(skb); 1666 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); 1667 err_discard: 1668 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 1669 goto discard; 1670 1671 1672 ipv6_pktoptions: 1673 /* Do you ask, what is it? 1674 1675 1. skb was enqueued by tcp. 1676 2. skb is added to tail of read queue, rather than out of order. 1677 3. socket is not in passive state. 1678 4. Finally, it really contains options, which user wants to receive. 1679 */ 1680 tp = tcp_sk(sk); 1681 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && 1682 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { 1683 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) 1684 WRITE_ONCE(np->mcast_oif, tcp_v6_iif(opt_skb)); 1685 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) 1686 WRITE_ONCE(np->mcast_hops, 1687 ipv6_hdr(opt_skb)->hop_limit); 1688 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) 1689 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); 1690 if (inet6_test_bit(REPFLOW, sk)) 1691 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); 1692 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { 1693 tcp_v6_restore_cb(opt_skb); 1694 opt_skb = xchg(&np->pktoptions, opt_skb); 1695 } else { 1696 __kfree_skb(opt_skb); 1697 opt_skb = xchg(&np->pktoptions, NULL); 1698 } 1699 } 1700 1701 consume_skb(opt_skb); 1702 return 0; 1703 } 1704 1705 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, 1706 const struct tcphdr *th) 1707 { 1708 /* This is tricky: we move IP6CB at its correct location into 1709 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because 1710 * _decode_session6() uses IP6CB(). 1711 * barrier() makes sure compiler won't play aliasing games. 1712 */ 1713 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb), 1714 sizeof(struct inet6_skb_parm)); 1715 barrier(); 1716 1717 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 1718 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1719 skb->len - th->doff*4); 1720 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1721 TCP_SKB_CB(skb)->tcp_flags = tcp_flags_ntohs(th); 1722 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); 1723 TCP_SKB_CB(skb)->sacked = 0; 1724 TCP_SKB_CB(skb)->has_rxtstamp = 1725 skb->tstamp || skb_hwtstamps(skb)->hwtstamp; 1726 } 1727 1728 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) 1729 { 1730 struct net *net = dev_net_rcu(skb->dev); 1731 enum skb_drop_reason drop_reason; 1732 enum tcp_tw_status tw_status; 1733 int sdif = inet6_sdif(skb); 1734 int dif = inet6_iif(skb); 1735 const struct tcphdr *th; 1736 const struct ipv6hdr *hdr; 1737 struct sock *sk = NULL; 1738 bool refcounted; 1739 int ret; 1740 u32 isn; 1741 1742 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; 1743 if (skb->pkt_type != PACKET_HOST) 1744 goto discard_it; 1745 1746 /* 1747 * Count it even if it's bad. 1748 */ 1749 __TCP_INC_STATS(net, TCP_MIB_INSEGS); 1750 1751 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 1752 goto discard_it; 1753 1754 th = (const struct tcphdr *)skb->data; 1755 1756 if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) { 1757 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; 1758 goto bad_packet; 1759 } 1760 if (!pskb_may_pull(skb, th->doff*4)) 1761 goto discard_it; 1762 1763 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) 1764 goto csum_error; 1765 1766 th = (const struct tcphdr *)skb->data; 1767 hdr = ipv6_hdr(skb); 1768 1769 lookup: 1770 sk = __inet6_lookup_skb(skb, __tcp_hdrlen(th), 1771 th->source, th->dest, inet6_iif(skb), sdif, 1772 &refcounted); 1773 if (!sk) 1774 goto no_tcp_socket; 1775 1776 if (sk->sk_state == TCP_TIME_WAIT) 1777 goto do_time_wait; 1778 1779 if (sk->sk_state == TCP_NEW_SYN_RECV) { 1780 struct request_sock *req = inet_reqsk(sk); 1781 bool req_stolen = false; 1782 struct sock *nsk; 1783 1784 sk = req->rsk_listener; 1785 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) 1786 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1787 else 1788 drop_reason = tcp_inbound_hash(sk, req, skb, 1789 &hdr->saddr, &hdr->daddr, 1790 AF_INET6, dif, sdif); 1791 if (drop_reason) { 1792 sk_drops_skbadd(sk, skb); 1793 reqsk_put(req); 1794 goto discard_it; 1795 } 1796 if (tcp_checksum_complete(skb)) { 1797 reqsk_put(req); 1798 goto csum_error; 1799 } 1800 if (unlikely(sk->sk_state != TCP_LISTEN)) { 1801 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb); 1802 if (!nsk) { 1803 inet_csk_reqsk_queue_drop_and_put(sk, req); 1804 goto lookup; 1805 } 1806 sk = nsk; 1807 /* reuseport_migrate_sock() has already held one sk_refcnt 1808 * before returning. 1809 */ 1810 } else { 1811 sock_hold(sk); 1812 } 1813 refcounted = true; 1814 nsk = NULL; 1815 drop_reason = tcp_filter(sk, skb); 1816 if (!drop_reason) { 1817 th = (const struct tcphdr *)skb->data; 1818 hdr = ipv6_hdr(skb); 1819 tcp_v6_fill_cb(skb, hdr, th); 1820 nsk = tcp_check_req(sk, skb, req, false, &req_stolen, 1821 &drop_reason); 1822 } 1823 if (!nsk) { 1824 reqsk_put(req); 1825 if (req_stolen) { 1826 /* Another cpu got exclusive access to req 1827 * and created a full blown socket. 1828 * Try to feed this packet to this socket 1829 * instead of discarding it. 1830 */ 1831 tcp_v6_restore_cb(skb); 1832 sock_put(sk); 1833 goto lookup; 1834 } 1835 goto discard_and_relse; 1836 } 1837 nf_reset_ct(skb); 1838 if (nsk == sk) { 1839 reqsk_put(req); 1840 tcp_v6_restore_cb(skb); 1841 } else { 1842 drop_reason = tcp_child_process(sk, nsk, skb); 1843 if (drop_reason) { 1844 enum sk_rst_reason rst_reason; 1845 1846 rst_reason = sk_rst_convert_drop_reason(drop_reason); 1847 tcp_v6_send_reset(nsk, skb, rst_reason); 1848 sock_put(nsk); 1849 goto discard_and_relse; 1850 } 1851 sock_put(nsk); 1852 sock_put(sk); 1853 return 0; 1854 } 1855 } 1856 1857 isn = 0; 1858 process: 1859 if (static_branch_unlikely(&ip6_min_hopcount)) { 1860 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 1861 if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) { 1862 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 1863 drop_reason = SKB_DROP_REASON_TCP_MINTTL; 1864 goto discard_and_relse; 1865 } 1866 } 1867 1868 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { 1869 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1870 goto discard_and_relse; 1871 } 1872 1873 drop_reason = tcp_inbound_hash(sk, NULL, skb, &hdr->saddr, &hdr->daddr, 1874 AF_INET6, dif, sdif); 1875 if (drop_reason) 1876 goto discard_and_relse; 1877 1878 nf_reset_ct(skb); 1879 1880 drop_reason = tcp_filter(sk, skb); 1881 if (drop_reason) 1882 goto discard_and_relse; 1883 1884 th = (const struct tcphdr *)skb->data; 1885 hdr = ipv6_hdr(skb); 1886 tcp_v6_fill_cb(skb, hdr, th); 1887 TCP_SKB_CB(skb)->tcp_tw_isn = isn; 1888 1889 skb->dev = NULL; 1890 1891 if (sk->sk_state == TCP_LISTEN) { 1892 ret = tcp_v6_do_rcv(sk, skb); 1893 goto put_and_return; 1894 } 1895 1896 sk_incoming_cpu_update(sk); 1897 1898 bh_lock_sock_nested(sk); 1899 tcp_segs_in(tcp_sk(sk), skb); 1900 ret = 0; 1901 if (!sock_owned_by_user(sk)) { 1902 ret = tcp_v6_do_rcv(sk, skb); 1903 } else { 1904 drop_reason = tcp_add_backlog(sk, skb); 1905 if (drop_reason) 1906 goto discard_and_relse; 1907 } 1908 bh_unlock_sock(sk); 1909 put_and_return: 1910 if (refcounted) 1911 sock_put(sk); 1912 return ret ? -1 : 0; 1913 1914 no_tcp_socket: 1915 drop_reason = SKB_DROP_REASON_NO_SOCKET; 1916 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 1917 goto discard_it; 1918 1919 tcp_v6_fill_cb(skb, hdr, th); 1920 1921 if (tcp_checksum_complete(skb)) { 1922 csum_error: 1923 drop_reason = SKB_DROP_REASON_TCP_CSUM; 1924 trace_tcp_bad_csum(skb); 1925 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); 1926 bad_packet: 1927 __TCP_INC_STATS(net, TCP_MIB_INERRS); 1928 } else { 1929 tcp_v6_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason)); 1930 } 1931 1932 discard_it: 1933 SKB_DR_OR(drop_reason, NOT_SPECIFIED); 1934 sk_skb_reason_drop(sk, skb, drop_reason); 1935 return 0; 1936 1937 discard_and_relse: 1938 sk_drops_skbadd(sk, skb); 1939 if (refcounted) 1940 sock_put(sk); 1941 goto discard_it; 1942 1943 do_time_wait: 1944 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 1945 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1946 inet_twsk_put(inet_twsk(sk)); 1947 goto discard_it; 1948 } 1949 1950 tcp_v6_fill_cb(skb, hdr, th); 1951 1952 if (tcp_checksum_complete(skb)) { 1953 inet_twsk_put(inet_twsk(sk)); 1954 goto csum_error; 1955 } 1956 1957 tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn, 1958 &drop_reason); 1959 switch (tw_status) { 1960 case TCP_TW_SYN: 1961 { 1962 struct sock *sk2; 1963 1964 sk2 = inet6_lookup_listener(net, skb, __tcp_hdrlen(th), 1965 &ipv6_hdr(skb)->saddr, th->source, 1966 &ipv6_hdr(skb)->daddr, 1967 ntohs(th->dest), 1968 tcp_v6_iif_l3_slave(skb), 1969 sdif); 1970 if (sk2) { 1971 struct inet_timewait_sock *tw = inet_twsk(sk); 1972 inet_twsk_deschedule_put(tw); 1973 sk = sk2; 1974 tcp_v6_restore_cb(skb); 1975 refcounted = false; 1976 goto process; 1977 } 1978 1979 drop_reason = psp_twsk_rx_policy_check(inet_twsk(sk), skb); 1980 if (drop_reason) 1981 break; 1982 } 1983 /* to ACK */ 1984 fallthrough; 1985 case TCP_TW_ACK: 1986 case TCP_TW_ACK_OOW: 1987 tcp_v6_timewait_ack(sk, skb, tw_status); 1988 break; 1989 case TCP_TW_RST: 1990 tcp_v6_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET); 1991 inet_twsk_deschedule_put(inet_twsk(sk)); 1992 goto discard_it; 1993 case TCP_TW_SUCCESS: 1994 ; 1995 } 1996 goto discard_it; 1997 } 1998 1999 static struct timewait_sock_ops tcp6_timewait_sock_ops = { 2000 .twsk_obj_size = sizeof(struct tcp6_timewait_sock), 2001 }; 2002 2003 const struct inet_connection_sock_af_ops ipv6_specific = { 2004 .queue_xmit = inet6_csk_xmit, 2005 .rebuild_header = inet6_sk_rebuild_header, 2006 .sk_rx_dst_set = inet6_sk_rx_dst_set, 2007 .conn_request = tcp_v6_conn_request, 2008 .syn_recv_sock = tcp_v6_syn_recv_sock, 2009 .net_header_len = sizeof(struct ipv6hdr), 2010 .setsockopt = ipv6_setsockopt, 2011 .getsockopt = ipv6_getsockopt, 2012 .mtu_reduced = tcp_v6_mtu_reduced, 2013 }; 2014 2015 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 2016 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { 2017 #ifdef CONFIG_TCP_MD5SIG 2018 .md5_lookup = tcp_v6_md5_lookup, 2019 .calc_md5_hash = tcp_v6_md5_hash_skb, 2020 .md5_parse = tcp_v6_parse_md5_keys, 2021 #endif 2022 #ifdef CONFIG_TCP_AO 2023 .ao_lookup = tcp_v6_ao_lookup, 2024 .calc_ao_hash = tcp_v6_ao_hash_skb, 2025 .ao_parse = tcp_v6_parse_ao, 2026 .ao_calc_key_sk = tcp_v6_ao_calc_key_sk, 2027 #endif 2028 }; 2029 #endif 2030 2031 /* 2032 * TCP over IPv4 via INET6 API 2033 */ 2034 static const struct inet_connection_sock_af_ops ipv6_mapped = { 2035 .queue_xmit = ip_queue_xmit, 2036 .rebuild_header = inet_sk_rebuild_header, 2037 .sk_rx_dst_set = inet_sk_rx_dst_set, 2038 .conn_request = tcp_v6_conn_request, 2039 .syn_recv_sock = tcp_v6_syn_recv_sock, 2040 .net_header_len = sizeof(struct iphdr), 2041 .setsockopt = ipv6_setsockopt, 2042 .getsockopt = ipv6_getsockopt, 2043 .mtu_reduced = tcp_v4_mtu_reduced, 2044 }; 2045 2046 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 2047 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { 2048 #ifdef CONFIG_TCP_MD5SIG 2049 .md5_lookup = tcp_v4_md5_lookup, 2050 .calc_md5_hash = tcp_v4_md5_hash_skb, 2051 .md5_parse = tcp_v6_parse_md5_keys, 2052 #endif 2053 #ifdef CONFIG_TCP_AO 2054 .ao_lookup = tcp_v6_ao_lookup, 2055 .calc_ao_hash = tcp_v4_ao_hash_skb, 2056 .ao_parse = tcp_v6_parse_ao, 2057 .ao_calc_key_sk = tcp_v4_ao_calc_key_sk, 2058 #endif 2059 }; 2060 2061 static void tcp6_destruct_sock(struct sock *sk) 2062 { 2063 tcp_md5_destruct_sock(sk); 2064 tcp_ao_destroy_sock(sk, false); 2065 inet6_sock_destruct(sk); 2066 } 2067 #endif 2068 2069 /* NOTE: A lot of things set to zero explicitly by call to 2070 * sk_alloc() so need not be done here. 2071 */ 2072 static int tcp_v6_init_sock(struct sock *sk) 2073 { 2074 struct inet_connection_sock *icsk = inet_csk(sk); 2075 2076 tcp_init_sock(sk); 2077 2078 icsk->icsk_af_ops = &ipv6_specific; 2079 2080 #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) 2081 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; 2082 sk->sk_destruct = tcp6_destruct_sock; 2083 #endif 2084 2085 return 0; 2086 } 2087 2088 #ifdef CONFIG_PROC_FS 2089 /* Proc filesystem TCPv6 sock list dumping. */ 2090 static void get_openreq6(struct seq_file *seq, 2091 const struct request_sock *req, int i) 2092 { 2093 long ttd = req->rsk_timer.expires - jiffies; 2094 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; 2095 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; 2096 2097 if (ttd < 0) 2098 ttd = 0; 2099 2100 seq_printf(seq, 2101 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2102 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n", 2103 i, 2104 src->s6_addr32[0], src->s6_addr32[1], 2105 src->s6_addr32[2], src->s6_addr32[3], 2106 inet_rsk(req)->ir_num, 2107 dest->s6_addr32[0], dest->s6_addr32[1], 2108 dest->s6_addr32[2], dest->s6_addr32[3], 2109 ntohs(inet_rsk(req)->ir_rmt_port), 2110 TCP_SYN_RECV, 2111 0, 0, /* could print option size, but that is af dependent. */ 2112 1, /* timers active (only the expire timer) */ 2113 jiffies_to_clock_t(ttd), 2114 req->num_timeout, 2115 from_kuid_munged(seq_user_ns(seq), 2116 sk_uid(req->rsk_listener)), 2117 0, /* non standard timer */ 2118 0, /* open_requests have no inode */ 2119 0, req); 2120 } 2121 2122 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) 2123 { 2124 const struct in6_addr *dest, *src; 2125 __u16 destp, srcp; 2126 int timer_active; 2127 unsigned long timer_expires; 2128 const struct inet_sock *inet = inet_sk(sp); 2129 const struct tcp_sock *tp = tcp_sk(sp); 2130 const struct inet_connection_sock *icsk = inet_csk(sp); 2131 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; 2132 u8 icsk_pending; 2133 int rx_queue; 2134 int state; 2135 2136 dest = &sp->sk_v6_daddr; 2137 src = &sp->sk_v6_rcv_saddr; 2138 destp = ntohs(inet->inet_dport); 2139 srcp = ntohs(inet->inet_sport); 2140 2141 icsk_pending = smp_load_acquire(&icsk->icsk_pending); 2142 if (icsk_pending == ICSK_TIME_RETRANS || 2143 icsk_pending == ICSK_TIME_REO_TIMEOUT || 2144 icsk_pending == ICSK_TIME_LOSS_PROBE) { 2145 timer_active = 1; 2146 timer_expires = tcp_timeout_expires(sp); 2147 } else if (icsk_pending == ICSK_TIME_PROBE0) { 2148 timer_active = 4; 2149 timer_expires = tcp_timeout_expires(sp); 2150 } else if (timer_pending(&icsk->icsk_keepalive_timer)) { 2151 timer_active = 2; 2152 timer_expires = icsk->icsk_keepalive_timer.expires; 2153 } else { 2154 timer_active = 0; 2155 timer_expires = jiffies; 2156 } 2157 2158 state = inet_sk_state_load(sp); 2159 if (state == TCP_LISTEN) 2160 rx_queue = READ_ONCE(sp->sk_ack_backlog); 2161 else 2162 /* Because we don't lock the socket, 2163 * we might find a transient negative value. 2164 */ 2165 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - 2166 READ_ONCE(tp->copied_seq), 0); 2167 2168 seq_printf(seq, 2169 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2170 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %llu %d %pK %lu %lu %u %u %d\n", 2171 i, 2172 src->s6_addr32[0], src->s6_addr32[1], 2173 src->s6_addr32[2], src->s6_addr32[3], srcp, 2174 dest->s6_addr32[0], dest->s6_addr32[1], 2175 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2176 state, 2177 READ_ONCE(tp->write_seq) - tp->snd_una, 2178 rx_queue, 2179 timer_active, 2180 jiffies_delta_to_clock_t(timer_expires - jiffies), 2181 READ_ONCE(icsk->icsk_retransmits), 2182 from_kuid_munged(seq_user_ns(seq), sk_uid(sp)), 2183 READ_ONCE(icsk->icsk_probes_out), 2184 sock_i_ino(sp), 2185 refcount_read(&sp->sk_refcnt), sp, 2186 jiffies_to_clock_t(icsk->icsk_rto), 2187 jiffies_to_clock_t(icsk->icsk_ack.ato), 2188 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), 2189 tcp_snd_cwnd(tp), 2190 state == TCP_LISTEN ? 2191 fastopenq->max_qlen : 2192 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) 2193 ); 2194 } 2195 2196 static void get_timewait6_sock(struct seq_file *seq, 2197 struct inet_timewait_sock *tw, int i) 2198 { 2199 long delta = tw->tw_timer.expires - jiffies; 2200 const struct in6_addr *dest, *src; 2201 __u16 destp, srcp; 2202 2203 dest = &tw->tw_v6_daddr; 2204 src = &tw->tw_v6_rcv_saddr; 2205 destp = ntohs(tw->tw_dport); 2206 srcp = ntohs(tw->tw_sport); 2207 2208 seq_printf(seq, 2209 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2210 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", 2211 i, 2212 src->s6_addr32[0], src->s6_addr32[1], 2213 src->s6_addr32[2], src->s6_addr32[3], srcp, 2214 dest->s6_addr32[0], dest->s6_addr32[1], 2215 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2216 READ_ONCE(tw->tw_substate), 0, 0, 2217 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, 2218 refcount_read(&tw->tw_refcnt), tw); 2219 } 2220 2221 static int tcp6_seq_show(struct seq_file *seq, void *v) 2222 { 2223 struct tcp_iter_state *st; 2224 struct sock *sk = v; 2225 2226 if (v == SEQ_START_TOKEN) { 2227 seq_puts(seq, 2228 " sl " 2229 "local_address " 2230 "remote_address " 2231 "st tx_queue rx_queue tr tm->when retrnsmt" 2232 " uid timeout inode\n"); 2233 goto out; 2234 } 2235 st = seq->private; 2236 2237 if (sk->sk_state == TCP_TIME_WAIT) 2238 get_timewait6_sock(seq, v, st->num); 2239 else if (sk->sk_state == TCP_NEW_SYN_RECV) 2240 get_openreq6(seq, v, st->num); 2241 else 2242 get_tcp6_sock(seq, v, st->num); 2243 out: 2244 return 0; 2245 } 2246 2247 static const struct seq_operations tcp6_seq_ops = { 2248 .show = tcp6_seq_show, 2249 .start = tcp_seq_start, 2250 .next = tcp_seq_next, 2251 .stop = tcp_seq_stop, 2252 }; 2253 2254 static struct tcp_seq_afinfo tcp6_seq_afinfo = { 2255 .family = AF_INET6, 2256 }; 2257 2258 int __net_init tcp6_proc_init(struct net *net) 2259 { 2260 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops, 2261 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo)) 2262 return -ENOMEM; 2263 return 0; 2264 } 2265 2266 void tcp6_proc_exit(struct net *net) 2267 { 2268 remove_proc_entry("tcp6", net->proc_net); 2269 } 2270 #endif 2271 2272 struct proto tcpv6_prot = { 2273 .name = "TCPv6", 2274 .owner = THIS_MODULE, 2275 .close = tcp_close, 2276 .pre_connect = tcp_v6_pre_connect, 2277 .connect = tcp_v6_connect, 2278 .disconnect = tcp_disconnect, 2279 .accept = inet_csk_accept, 2280 .ioctl = tcp_ioctl, 2281 .init = tcp_v6_init_sock, 2282 .destroy = tcp_v4_destroy_sock, 2283 .shutdown = tcp_shutdown, 2284 .setsockopt = tcp_setsockopt, 2285 .getsockopt = tcp_getsockopt, 2286 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt, 2287 .keepalive = tcp_set_keepalive, 2288 .recvmsg = tcp_recvmsg, 2289 .sendmsg = tcp_sendmsg, 2290 .splice_eof = tcp_splice_eof, 2291 .backlog_rcv = tcp_v6_do_rcv, 2292 .release_cb = tcp_release_cb, 2293 .hash = inet_hash, 2294 .unhash = inet_unhash, 2295 .get_port = inet_csk_get_port, 2296 .put_port = inet_put_port, 2297 #ifdef CONFIG_BPF_SYSCALL 2298 .psock_update_sk_prot = tcp_bpf_update_proto, 2299 #endif 2300 .enter_memory_pressure = tcp_enter_memory_pressure, 2301 .leave_memory_pressure = tcp_leave_memory_pressure, 2302 .stream_memory_free = tcp_stream_memory_free, 2303 .sockets_allocated = &tcp_sockets_allocated, 2304 2305 .memory_allocated = &net_aligned_data.tcp_memory_allocated, 2306 .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, 2307 2308 .memory_pressure = &tcp_memory_pressure, 2309 .sysctl_mem = sysctl_tcp_mem, 2310 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), 2311 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), 2312 .max_header = MAX_TCP_HEADER, 2313 .obj_size = sizeof(struct tcp6_sock), 2314 .freeptr_offset = offsetof(struct tcp6_sock, 2315 tcp.inet_conn.icsk_inet.sk.sk_freeptr), 2316 .ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6), 2317 .slab_flags = SLAB_TYPESAFE_BY_RCU, 2318 .twsk_prot = &tcp6_timewait_sock_ops, 2319 .rsk_prot = &tcp6_request_sock_ops, 2320 .h.hashinfo = NULL, 2321 .no_autobind = true, 2322 .diag_destroy = tcp_abort, 2323 }; 2324 EXPORT_SYMBOL_GPL(tcpv6_prot); 2325 2326 2327 static struct inet_protosw tcpv6_protosw = { 2328 .type = SOCK_STREAM, 2329 .protocol = IPPROTO_TCP, 2330 .prot = &tcpv6_prot, 2331 .ops = &inet6_stream_ops, 2332 .flags = INET_PROTOSW_PERMANENT | 2333 INET_PROTOSW_ICSK, 2334 }; 2335 2336 static int __net_init tcpv6_net_init(struct net *net) 2337 { 2338 int res; 2339 2340 res = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, 2341 SOCK_RAW, IPPROTO_TCP, net); 2342 if (!res) 2343 net->ipv6.tcp_sk->sk_clockid = CLOCK_MONOTONIC; 2344 2345 return res; 2346 } 2347 2348 static void __net_exit tcpv6_net_exit(struct net *net) 2349 { 2350 inet_ctl_sock_destroy(net->ipv6.tcp_sk); 2351 } 2352 2353 static struct pernet_operations tcpv6_net_ops = { 2354 .init = tcpv6_net_init, 2355 .exit = tcpv6_net_exit, 2356 }; 2357 2358 int __init tcpv6_init(void) 2359 { 2360 int ret; 2361 2362 net_hotdata.tcpv6_protocol = (struct inet6_protocol) { 2363 .handler = tcp_v6_rcv, 2364 .err_handler = tcp_v6_err, 2365 .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL, 2366 }; 2367 ret = inet6_add_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); 2368 if (ret) 2369 goto out; 2370 2371 /* register inet6 protocol */ 2372 ret = inet6_register_protosw(&tcpv6_protosw); 2373 if (ret) 2374 goto out_tcpv6_protocol; 2375 2376 ret = register_pernet_subsys(&tcpv6_net_ops); 2377 if (ret) 2378 goto out_tcpv6_protosw; 2379 2380 ret = mptcpv6_init(); 2381 if (ret) 2382 goto out_tcpv6_pernet_subsys; 2383 2384 out: 2385 return ret; 2386 2387 out_tcpv6_pernet_subsys: 2388 unregister_pernet_subsys(&tcpv6_net_ops); 2389 out_tcpv6_protosw: 2390 inet6_unregister_protosw(&tcpv6_protosw); 2391 out_tcpv6_protocol: 2392 inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); 2393 goto out; 2394 } 2395 2396 void tcpv6_exit(void) 2397 { 2398 unregister_pernet_subsys(&tcpv6_net_ops); 2399 inet6_unregister_protosw(&tcpv6_protosw); 2400 inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); 2401 } 2402