11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * INET An implementation of the TCP/IP protocol suite for the LINUX 31da177e4SLinus Torvalds * operating system. INET is implemented using the BSD Socket 41da177e4SLinus Torvalds * interface as the means of communication with the user level. 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * Implementation of the Transmission Control Protocol(TCP). 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * IPv4 specific functions 91da177e4SLinus Torvalds * 101da177e4SLinus Torvalds * 111da177e4SLinus Torvalds * code split from: 121da177e4SLinus Torvalds * linux/ipv4/tcp.c 131da177e4SLinus Torvalds * linux/ipv4/tcp_input.c 141da177e4SLinus Torvalds * linux/ipv4/tcp_output.c 151da177e4SLinus Torvalds * 161da177e4SLinus Torvalds * See tcp.c for author information 171da177e4SLinus Torvalds * 181da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 191da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 201da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 211da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 221da177e4SLinus Torvalds */ 231da177e4SLinus Torvalds 241da177e4SLinus Torvalds /* 251da177e4SLinus Torvalds * Changes: 261da177e4SLinus Torvalds * David S. Miller : New socket lookup architecture. 271da177e4SLinus Torvalds * This code is dedicated to John Dyson. 281da177e4SLinus Torvalds * David S. Miller : Change semantics of established hash, 291da177e4SLinus Torvalds * half is devoted to TIME_WAIT sockets 301da177e4SLinus Torvalds * and the rest go in the other half. 311da177e4SLinus Torvalds * Andi Kleen : Add support for syncookies and fixed 321da177e4SLinus Torvalds * some bugs: ip options weren't passed to 331da177e4SLinus Torvalds * the TCP layer, missed a check for an 341da177e4SLinus Torvalds * ACK bit. 351da177e4SLinus Torvalds * Andi Kleen : Implemented fast path mtu discovery. 361da177e4SLinus Torvalds * Fixed many serious bugs in the 3760236fddSArnaldo Carvalho de Melo * request_sock handling and moved 381da177e4SLinus Torvalds * most of it into the af independent code. 391da177e4SLinus Torvalds * Added tail drop and some other bugfixes. 40caa20d9aSStephen Hemminger * Added new listen semantics. 411da177e4SLinus Torvalds * Mike McLagan : Routing by source 421da177e4SLinus Torvalds * Juan Jose Ciarlante: ip_dynaddr bits 431da177e4SLinus Torvalds * Andi Kleen: various fixes. 441da177e4SLinus Torvalds * Vitaly E. Lavrov : Transparent proxy revived after year 451da177e4SLinus Torvalds * coma. 461da177e4SLinus Torvalds * Andi Kleen : Fix new listen. 471da177e4SLinus Torvalds * Andi Kleen : Fix accept error reporting. 481da177e4SLinus Torvalds * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 491da177e4SLinus Torvalds * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 501da177e4SLinus Torvalds * a single port at the same time. 511da177e4SLinus Torvalds */ 521da177e4SLinus Torvalds 53afd46503SJoe Perches #define pr_fmt(fmt) "TCP: " fmt 541da177e4SLinus Torvalds 55eb4dea58SHerbert Xu #include <linux/bottom_half.h> 561da177e4SLinus Torvalds #include <linux/types.h> 571da177e4SLinus Torvalds #include <linux/fcntl.h> 581da177e4SLinus Torvalds #include <linux/module.h> 591da177e4SLinus Torvalds #include <linux/random.h> 601da177e4SLinus Torvalds #include <linux/cache.h> 611da177e4SLinus Torvalds #include <linux/jhash.h> 621da177e4SLinus Torvalds #include <linux/init.h> 631da177e4SLinus Torvalds #include <linux/times.h> 645a0e3ad6STejun Heo #include <linux/slab.h> 651da177e4SLinus Torvalds 66457c4cbcSEric W. Biederman #include <net/net_namespace.h> 671da177e4SLinus Torvalds #include <net/icmp.h> 68304a1618SArnaldo Carvalho de Melo #include <net/inet_hashtables.h> 691da177e4SLinus Torvalds #include <net/tcp.h> 7020380731SArnaldo Carvalho de Melo #include <net/transp_v6.h> 711da177e4SLinus Torvalds #include <net/ipv6.h> 721da177e4SLinus Torvalds #include <net/inet_common.h> 736d6ee43eSArnaldo Carvalho de Melo #include <net/timewait_sock.h> 741da177e4SLinus Torvalds #include <net/xfrm.h> 756e5714eaSDavid S. Miller #include <net/secure_seq.h> 76d1a4c0b3SGlauber Costa #include <net/tcp_memcontrol.h> 77076bb0c8SEliezer Tamir #include <net/busy_poll.h> 781da177e4SLinus Torvalds 791da177e4SLinus Torvalds #include <linux/inet.h> 801da177e4SLinus Torvalds #include <linux/ipv6.h> 811da177e4SLinus Torvalds #include <linux/stddef.h> 821da177e4SLinus Torvalds #include <linux/proc_fs.h> 831da177e4SLinus Torvalds #include <linux/seq_file.h> 841da177e4SLinus Torvalds 85cfb6eeb4SYOSHIFUJI Hideaki #include <linux/crypto.h> 86cfb6eeb4SYOSHIFUJI Hideaki #include <linux/scatterlist.h> 87cfb6eeb4SYOSHIFUJI Hideaki 88ab32ea5dSBrian Haley int sysctl_tcp_tw_reuse __read_mostly; 89ab32ea5dSBrian Haley int sysctl_tcp_low_latency __read_mostly; 904bc2f18bSEric Dumazet EXPORT_SYMBOL(sysctl_tcp_low_latency); 911da177e4SLinus Torvalds 92cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 93a915da9bSEric Dumazet static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 94318cf7aaSEric Dumazet __be32 daddr, __be32 saddr, const struct tcphdr *th); 95cfb6eeb4SYOSHIFUJI Hideaki #endif 96cfb6eeb4SYOSHIFUJI Hideaki 975caea4eaSEric Dumazet struct inet_hashinfo tcp_hashinfo; 984bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_hashinfo); 991da177e4SLinus Torvalds 100936b8bdbSOctavian Purdila static __u32 tcp_v4_init_sequence(const struct sk_buff *skb) 1011da177e4SLinus Torvalds { 102eddc9ec5SArnaldo Carvalho de Melo return secure_tcp_sequence_number(ip_hdr(skb)->daddr, 103eddc9ec5SArnaldo Carvalho de Melo ip_hdr(skb)->saddr, 104aa8223c7SArnaldo Carvalho de Melo tcp_hdr(skb)->dest, 105aa8223c7SArnaldo Carvalho de Melo tcp_hdr(skb)->source); 1061da177e4SLinus Torvalds } 1071da177e4SLinus Torvalds 1086d6ee43eSArnaldo Carvalho de Melo int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) 1096d6ee43eSArnaldo Carvalho de Melo { 1106d6ee43eSArnaldo Carvalho de Melo const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw); 1116d6ee43eSArnaldo Carvalho de Melo struct tcp_sock *tp = tcp_sk(sk); 1126d6ee43eSArnaldo Carvalho de Melo 1136d6ee43eSArnaldo Carvalho de Melo /* With PAWS, it is safe from the viewpoint 1146d6ee43eSArnaldo Carvalho de Melo of data integrity. Even without PAWS it is safe provided sequence 1156d6ee43eSArnaldo Carvalho de Melo spaces do not overlap i.e. at data rates <= 80Mbit/sec. 1166d6ee43eSArnaldo Carvalho de Melo 1176d6ee43eSArnaldo Carvalho de Melo Actually, the idea is close to VJ's one, only timestamp cache is 1186d6ee43eSArnaldo Carvalho de Melo held not per host, but per port pair and TW bucket is used as state 1196d6ee43eSArnaldo Carvalho de Melo holder. 1206d6ee43eSArnaldo Carvalho de Melo 1216d6ee43eSArnaldo Carvalho de Melo If TW bucket has been already destroyed we fall back to VJ's scheme 1226d6ee43eSArnaldo Carvalho de Melo and use initial timestamp retrieved from peer table. 1236d6ee43eSArnaldo Carvalho de Melo */ 1246d6ee43eSArnaldo Carvalho de Melo if (tcptw->tw_ts_recent_stamp && 12551456b29SIan Morris (!twp || (sysctl_tcp_tw_reuse && 1269d729f72SJames Morris get_seconds() - tcptw->tw_ts_recent_stamp > 1))) { 1276d6ee43eSArnaldo Carvalho de Melo tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; 1286d6ee43eSArnaldo Carvalho de Melo if (tp->write_seq == 0) 1296d6ee43eSArnaldo Carvalho de Melo tp->write_seq = 1; 1306d6ee43eSArnaldo Carvalho de Melo tp->rx_opt.ts_recent = tcptw->tw_ts_recent; 1316d6ee43eSArnaldo Carvalho de Melo tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; 1326d6ee43eSArnaldo Carvalho de Melo sock_hold(sktw); 1336d6ee43eSArnaldo Carvalho de Melo return 1; 1346d6ee43eSArnaldo Carvalho de Melo } 1356d6ee43eSArnaldo Carvalho de Melo 1366d6ee43eSArnaldo Carvalho de Melo return 0; 1376d6ee43eSArnaldo Carvalho de Melo } 1386d6ee43eSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(tcp_twsk_unique); 1396d6ee43eSArnaldo Carvalho de Melo 1401da177e4SLinus Torvalds /* This will initiate an outgoing connection. */ 1411da177e4SLinus Torvalds int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 1421da177e4SLinus Torvalds { 1432d7192d6SDavid S. Miller struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; 1441da177e4SLinus Torvalds struct inet_sock *inet = inet_sk(sk); 1451da177e4SLinus Torvalds struct tcp_sock *tp = tcp_sk(sk); 146dca8b089SDavid S. Miller __be16 orig_sport, orig_dport; 147bada8adcSAl Viro __be32 daddr, nexthop; 148da905bd1SDavid S. Miller struct flowi4 *fl4; 1492d7192d6SDavid S. Miller struct rtable *rt; 1501da177e4SLinus Torvalds int err; 151f6d8bd05SEric Dumazet struct ip_options_rcu *inet_opt; 1521da177e4SLinus Torvalds 1531da177e4SLinus Torvalds if (addr_len < sizeof(struct sockaddr_in)) 1541da177e4SLinus Torvalds return -EINVAL; 1551da177e4SLinus Torvalds 1561da177e4SLinus Torvalds if (usin->sin_family != AF_INET) 1571da177e4SLinus Torvalds return -EAFNOSUPPORT; 1581da177e4SLinus Torvalds 1591da177e4SLinus Torvalds nexthop = daddr = usin->sin_addr.s_addr; 160f6d8bd05SEric Dumazet inet_opt = rcu_dereference_protected(inet->inet_opt, 161f6d8bd05SEric Dumazet sock_owned_by_user(sk)); 162f6d8bd05SEric Dumazet if (inet_opt && inet_opt->opt.srr) { 1631da177e4SLinus Torvalds if (!daddr) 1641da177e4SLinus Torvalds return -EINVAL; 165f6d8bd05SEric Dumazet nexthop = inet_opt->opt.faddr; 1661da177e4SLinus Torvalds } 1671da177e4SLinus Torvalds 168dca8b089SDavid S. Miller orig_sport = inet->inet_sport; 169dca8b089SDavid S. Miller orig_dport = usin->sin_port; 170da905bd1SDavid S. Miller fl4 = &inet->cork.fl.u.ip4; 171da905bd1SDavid S. Miller rt = ip_route_connect(fl4, nexthop, inet->inet_saddr, 1721da177e4SLinus Torvalds RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, 1731da177e4SLinus Torvalds IPPROTO_TCP, 1740e0d44abSSteffen Klassert orig_sport, orig_dport, sk); 175b23dd4feSDavid S. Miller if (IS_ERR(rt)) { 176b23dd4feSDavid S. Miller err = PTR_ERR(rt); 177b23dd4feSDavid S. Miller if (err == -ENETUNREACH) 178f1d8cba6SEric Dumazet IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); 179b23dd4feSDavid S. Miller return err; 180584bdf8cSWei Dong } 1811da177e4SLinus Torvalds 1821da177e4SLinus Torvalds if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { 1831da177e4SLinus Torvalds ip_rt_put(rt); 1841da177e4SLinus Torvalds return -ENETUNREACH; 1851da177e4SLinus Torvalds } 1861da177e4SLinus Torvalds 187f6d8bd05SEric Dumazet if (!inet_opt || !inet_opt->opt.srr) 188da905bd1SDavid S. Miller daddr = fl4->daddr; 1891da177e4SLinus Torvalds 190c720c7e8SEric Dumazet if (!inet->inet_saddr) 191da905bd1SDavid S. Miller inet->inet_saddr = fl4->saddr; 192d1e559d0SEric Dumazet sk_rcv_saddr_set(sk, inet->inet_saddr); 1931da177e4SLinus Torvalds 194c720c7e8SEric Dumazet if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) { 1951da177e4SLinus Torvalds /* Reset inherited state */ 1961da177e4SLinus Torvalds tp->rx_opt.ts_recent = 0; 1971da177e4SLinus Torvalds tp->rx_opt.ts_recent_stamp = 0; 198ee995283SPavel Emelyanov if (likely(!tp->repair)) 1991da177e4SLinus Torvalds tp->write_seq = 0; 2001da177e4SLinus Torvalds } 2011da177e4SLinus Torvalds 202295ff7edSArnaldo Carvalho de Melo if (tcp_death_row.sysctl_tw_recycle && 20381166dd6SDavid S. Miller !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) 20481166dd6SDavid S. Miller tcp_fetch_timewait_stamp(sk, &rt->dst); 2051da177e4SLinus Torvalds 206c720c7e8SEric Dumazet inet->inet_dport = usin->sin_port; 207d1e559d0SEric Dumazet sk_daddr_set(sk, daddr); 2081da177e4SLinus Torvalds 209d83d8461SArnaldo Carvalho de Melo inet_csk(sk)->icsk_ext_hdr_len = 0; 210f6d8bd05SEric Dumazet if (inet_opt) 211f6d8bd05SEric Dumazet inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; 2121da177e4SLinus Torvalds 213bee7ca9eSWilliam Allen Simpson tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; 2141da177e4SLinus Torvalds 2151da177e4SLinus Torvalds /* Socket identity is still unknown (sport may be zero). 2161da177e4SLinus Torvalds * However we set state to SYN-SENT and not releasing socket 2171da177e4SLinus Torvalds * lock select source port, enter ourselves into the hash tables and 2181da177e4SLinus Torvalds * complete initialization after this. 2191da177e4SLinus Torvalds */ 2201da177e4SLinus Torvalds tcp_set_state(sk, TCP_SYN_SENT); 221a7f5e7f1SArnaldo Carvalho de Melo err = inet_hash_connect(&tcp_death_row, sk); 2221da177e4SLinus Torvalds if (err) 2231da177e4SLinus Torvalds goto failure; 2241da177e4SLinus Torvalds 2259e7ceb06SSathya Perla inet_set_txhash(sk); 2269e7ceb06SSathya Perla 227da905bd1SDavid S. Miller rt = ip_route_newports(fl4, rt, orig_sport, orig_dport, 228c720c7e8SEric Dumazet inet->inet_sport, inet->inet_dport, sk); 229b23dd4feSDavid S. Miller if (IS_ERR(rt)) { 230b23dd4feSDavid S. Miller err = PTR_ERR(rt); 231b23dd4feSDavid S. Miller rt = NULL; 2321da177e4SLinus Torvalds goto failure; 233b23dd4feSDavid S. Miller } 2341da177e4SLinus Torvalds /* OK, now commit destination to socket. */ 235bcd76111SHerbert Xu sk->sk_gso_type = SKB_GSO_TCPV4; 236d8d1f30bSChangli Gao sk_setup_caps(sk, &rt->dst); 2371da177e4SLinus Torvalds 238ee995283SPavel Emelyanov if (!tp->write_seq && likely(!tp->repair)) 239c720c7e8SEric Dumazet tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, 240c720c7e8SEric Dumazet inet->inet_daddr, 241c720c7e8SEric Dumazet inet->inet_sport, 2421da177e4SLinus Torvalds usin->sin_port); 2431da177e4SLinus Torvalds 244c720c7e8SEric Dumazet inet->inet_id = tp->write_seq ^ jiffies; 2451da177e4SLinus Torvalds 2461da177e4SLinus Torvalds err = tcp_connect(sk); 247ee995283SPavel Emelyanov 2481da177e4SLinus Torvalds rt = NULL; 2491da177e4SLinus Torvalds if (err) 2501da177e4SLinus Torvalds goto failure; 2511da177e4SLinus Torvalds 2521da177e4SLinus Torvalds return 0; 2531da177e4SLinus Torvalds 2541da177e4SLinus Torvalds failure: 2557174259eSArnaldo Carvalho de Melo /* 2567174259eSArnaldo Carvalho de Melo * This unhashes the socket and releases the local port, 2577174259eSArnaldo Carvalho de Melo * if necessary. 2587174259eSArnaldo Carvalho de Melo */ 2591da177e4SLinus Torvalds tcp_set_state(sk, TCP_CLOSE); 2601da177e4SLinus Torvalds ip_rt_put(rt); 2611da177e4SLinus Torvalds sk->sk_route_caps = 0; 262c720c7e8SEric Dumazet inet->inet_dport = 0; 2631da177e4SLinus Torvalds return err; 2641da177e4SLinus Torvalds } 2654bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_connect); 2661da177e4SLinus Torvalds 2671da177e4SLinus Torvalds /* 268563d34d0SEric Dumazet * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191. 269563d34d0SEric Dumazet * It can be called through tcp_release_cb() if socket was owned by user 270563d34d0SEric Dumazet * at the time tcp_v4_err() was called to handle ICMP message. 2711da177e4SLinus Torvalds */ 2724fab9071SNeal Cardwell void tcp_v4_mtu_reduced(struct sock *sk) 2731da177e4SLinus Torvalds { 2741da177e4SLinus Torvalds struct dst_entry *dst; 2751da177e4SLinus Torvalds struct inet_sock *inet = inet_sk(sk); 276563d34d0SEric Dumazet u32 mtu = tcp_sk(sk)->mtu_info; 2771da177e4SLinus Torvalds 27880d0a69fSDavid S. Miller dst = inet_csk_update_pmtu(sk, mtu); 27980d0a69fSDavid S. Miller if (!dst) 2801da177e4SLinus Torvalds return; 2811da177e4SLinus Torvalds 2821da177e4SLinus Torvalds /* Something is about to be wrong... Remember soft error 2831da177e4SLinus Torvalds * for the case, if this connection will not able to recover. 2841da177e4SLinus Torvalds */ 2851da177e4SLinus Torvalds if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) 2861da177e4SLinus Torvalds sk->sk_err_soft = EMSGSIZE; 2871da177e4SLinus Torvalds 2881da177e4SLinus Torvalds mtu = dst_mtu(dst); 2891da177e4SLinus Torvalds 2901da177e4SLinus Torvalds if (inet->pmtudisc != IP_PMTUDISC_DONT && 291482fc609SHannes Frederic Sowa ip_sk_accept_pmtu(sk) && 292d83d8461SArnaldo Carvalho de Melo inet_csk(sk)->icsk_pmtu_cookie > mtu) { 2931da177e4SLinus Torvalds tcp_sync_mss(sk, mtu); 2941da177e4SLinus Torvalds 2951da177e4SLinus Torvalds /* Resend the TCP packet because it's 2961da177e4SLinus Torvalds * clear that the old packet has been 2971da177e4SLinus Torvalds * dropped. This is the new "fast" path mtu 2981da177e4SLinus Torvalds * discovery. 2991da177e4SLinus Torvalds */ 3001da177e4SLinus Torvalds tcp_simple_retransmit(sk); 3011da177e4SLinus Torvalds } /* else let the usual retransmit timer handle it */ 3021da177e4SLinus Torvalds } 3034fab9071SNeal Cardwell EXPORT_SYMBOL(tcp_v4_mtu_reduced); 3041da177e4SLinus Torvalds 30555be7a9cSDavid S. Miller static void do_redirect(struct sk_buff *skb, struct sock *sk) 30655be7a9cSDavid S. Miller { 30755be7a9cSDavid S. Miller struct dst_entry *dst = __sk_dst_check(sk, 0); 30855be7a9cSDavid S. Miller 3091ed5c48fSDavid S. Miller if (dst) 3106700c270SDavid S. Miller dst->ops->redirect(dst, sk, skb); 31155be7a9cSDavid S. Miller } 31255be7a9cSDavid S. Miller 31326e37360SEric Dumazet 31426e37360SEric Dumazet /* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */ 31526e37360SEric Dumazet void tcp_req_err(struct sock *sk, u32 seq) 31626e37360SEric Dumazet { 31726e37360SEric Dumazet struct request_sock *req = inet_reqsk(sk); 31826e37360SEric Dumazet struct net *net = sock_net(sk); 31926e37360SEric Dumazet 32026e37360SEric Dumazet /* ICMPs are not backlogged, hence we cannot get 32126e37360SEric Dumazet * an established socket here. 32226e37360SEric Dumazet */ 32326e37360SEric Dumazet WARN_ON(req->sk); 32426e37360SEric Dumazet 32526e37360SEric Dumazet if (seq != tcp_rsk(req)->snt_isn) { 32626e37360SEric Dumazet NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 327c6973669SFan Du reqsk_put(req); 32826e37360SEric Dumazet } else { 32926e37360SEric Dumazet /* 33026e37360SEric Dumazet * Still in SYN_RECV, just remove it silently. 33126e37360SEric Dumazet * There is no good way to pass the error to the newly 33226e37360SEric Dumazet * created socket, and POSIX does not want network 33326e37360SEric Dumazet * errors returned from accept(). 33426e37360SEric Dumazet */ 33526e37360SEric Dumazet NET_INC_STATS_BH(net, LINUX_MIB_LISTENDROPS); 336c6973669SFan Du inet_csk_reqsk_queue_drop(req->rsk_listener, req); 33726e37360SEric Dumazet } 33826e37360SEric Dumazet } 33926e37360SEric Dumazet EXPORT_SYMBOL(tcp_req_err); 34026e37360SEric Dumazet 3411da177e4SLinus Torvalds /* 3421da177e4SLinus Torvalds * This routine is called by the ICMP module when it gets some 3431da177e4SLinus Torvalds * sort of error condition. If err < 0 then the socket should 3441da177e4SLinus Torvalds * be closed and the error returned to the user. If err > 0 3451da177e4SLinus Torvalds * it's just the icmp type << 8 | icmp code. After adjustment 3461da177e4SLinus Torvalds * header points to the first 8 bytes of the tcp header. We need 3471da177e4SLinus Torvalds * to find the appropriate port. 3481da177e4SLinus Torvalds * 3491da177e4SLinus Torvalds * The locking strategy used here is very "optimistic". When 3501da177e4SLinus Torvalds * someone else accesses the socket the ICMP is just dropped 3511da177e4SLinus Torvalds * and for some paths there is no check at all. 3521da177e4SLinus Torvalds * A more general error queue to queue errors for later handling 3531da177e4SLinus Torvalds * is probably better. 3541da177e4SLinus Torvalds * 3551da177e4SLinus Torvalds */ 3561da177e4SLinus Torvalds 3574d1a2d9eSDamian Lukowski void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) 3581da177e4SLinus Torvalds { 359b71d1d42SEric Dumazet const struct iphdr *iph = (const struct iphdr *)icmp_skb->data; 3604d1a2d9eSDamian Lukowski struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2)); 361f1ecd5d9SDamian Lukowski struct inet_connection_sock *icsk; 3621da177e4SLinus Torvalds struct tcp_sock *tp; 3631da177e4SLinus Torvalds struct inet_sock *inet; 3644d1a2d9eSDamian Lukowski const int type = icmp_hdr(icmp_skb)->type; 3654d1a2d9eSDamian Lukowski const int code = icmp_hdr(icmp_skb)->code; 3661da177e4SLinus Torvalds struct sock *sk; 367f1ecd5d9SDamian Lukowski struct sk_buff *skb; 3680a672f74SYuchung Cheng struct request_sock *fastopen; 3690a672f74SYuchung Cheng __u32 seq, snd_una; 370f1ecd5d9SDamian Lukowski __u32 remaining; 3711da177e4SLinus Torvalds int err; 3724d1a2d9eSDamian Lukowski struct net *net = dev_net(icmp_skb->dev); 3731da177e4SLinus Torvalds 37426e37360SEric Dumazet sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr, 37526e37360SEric Dumazet th->dest, iph->saddr, ntohs(th->source), 37626e37360SEric Dumazet inet_iif(icmp_skb)); 3771da177e4SLinus Torvalds if (!sk) { 378dcfc23caSPavel Emelyanov ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); 3791da177e4SLinus Torvalds return; 3801da177e4SLinus Torvalds } 3811da177e4SLinus Torvalds if (sk->sk_state == TCP_TIME_WAIT) { 3829469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(sk)); 3831da177e4SLinus Torvalds return; 3841da177e4SLinus Torvalds } 38526e37360SEric Dumazet seq = ntohl(th->seq); 38626e37360SEric Dumazet if (sk->sk_state == TCP_NEW_SYN_RECV) 38726e37360SEric Dumazet return tcp_req_err(sk, seq); 3881da177e4SLinus Torvalds 3891da177e4SLinus Torvalds bh_lock_sock(sk); 3901da177e4SLinus Torvalds /* If too many ICMPs get dropped on busy 3911da177e4SLinus Torvalds * servers this needs to be solved differently. 392563d34d0SEric Dumazet * We do take care of PMTU discovery (RFC1191) special case : 393563d34d0SEric Dumazet * we can receive locally generated ICMP messages while socket is held. 3941da177e4SLinus Torvalds */ 395b74aa930SEric Dumazet if (sock_owned_by_user(sk)) { 396b74aa930SEric Dumazet if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)) 397de0744afSPavel Emelyanov NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); 398b74aa930SEric Dumazet } 3991da177e4SLinus Torvalds if (sk->sk_state == TCP_CLOSE) 4001da177e4SLinus Torvalds goto out; 4011da177e4SLinus Torvalds 40297e3ecd1Sstephen hemminger if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { 40397e3ecd1Sstephen hemminger NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); 40497e3ecd1Sstephen hemminger goto out; 40597e3ecd1Sstephen hemminger } 40697e3ecd1Sstephen hemminger 407f1ecd5d9SDamian Lukowski icsk = inet_csk(sk); 4081da177e4SLinus Torvalds tp = tcp_sk(sk); 4090a672f74SYuchung Cheng /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ 4100a672f74SYuchung Cheng fastopen = tp->fastopen_rsk; 4110a672f74SYuchung Cheng snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 4121da177e4SLinus Torvalds if (sk->sk_state != TCP_LISTEN && 4130a672f74SYuchung Cheng !between(seq, snd_una, tp->snd_nxt)) { 414de0744afSPavel Emelyanov NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 4151da177e4SLinus Torvalds goto out; 4161da177e4SLinus Torvalds } 4171da177e4SLinus Torvalds 4181da177e4SLinus Torvalds switch (type) { 41955be7a9cSDavid S. Miller case ICMP_REDIRECT: 42055be7a9cSDavid S. Miller do_redirect(icmp_skb, sk); 42155be7a9cSDavid S. Miller goto out; 4221da177e4SLinus Torvalds case ICMP_SOURCE_QUENCH: 4231da177e4SLinus Torvalds /* Just silently ignore these. */ 4241da177e4SLinus Torvalds goto out; 4251da177e4SLinus Torvalds case ICMP_PARAMETERPROB: 4261da177e4SLinus Torvalds err = EPROTO; 4271da177e4SLinus Torvalds break; 4281da177e4SLinus Torvalds case ICMP_DEST_UNREACH: 4291da177e4SLinus Torvalds if (code > NR_ICMP_UNREACH) 4301da177e4SLinus Torvalds goto out; 4311da177e4SLinus Torvalds 4321da177e4SLinus Torvalds if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ 4330d4f0608SEric Dumazet /* We are not interested in TCP_LISTEN and open_requests 4340d4f0608SEric Dumazet * (SYN-ACKs send out by Linux are always <576bytes so 4350d4f0608SEric Dumazet * they should go through unfragmented). 4360d4f0608SEric Dumazet */ 4370d4f0608SEric Dumazet if (sk->sk_state == TCP_LISTEN) 4380d4f0608SEric Dumazet goto out; 4390d4f0608SEric Dumazet 440563d34d0SEric Dumazet tp->mtu_info = info; 441144d56e9SEric Dumazet if (!sock_owned_by_user(sk)) { 442563d34d0SEric Dumazet tcp_v4_mtu_reduced(sk); 443144d56e9SEric Dumazet } else { 444144d56e9SEric Dumazet if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags)) 445144d56e9SEric Dumazet sock_hold(sk); 446144d56e9SEric Dumazet } 4471da177e4SLinus Torvalds goto out; 4481da177e4SLinus Torvalds } 4491da177e4SLinus Torvalds 4501da177e4SLinus Torvalds err = icmp_err_convert[code].errno; 451f1ecd5d9SDamian Lukowski /* check if icmp_skb allows revert of backoff 452f1ecd5d9SDamian Lukowski * (see draft-zimmermann-tcp-lcd) */ 453f1ecd5d9SDamian Lukowski if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH) 454f1ecd5d9SDamian Lukowski break; 455f1ecd5d9SDamian Lukowski if (seq != tp->snd_una || !icsk->icsk_retransmits || 4560a672f74SYuchung Cheng !icsk->icsk_backoff || fastopen) 457f1ecd5d9SDamian Lukowski break; 458f1ecd5d9SDamian Lukowski 4598f49c270SDavid S. Miller if (sock_owned_by_user(sk)) 4608f49c270SDavid S. Miller break; 4618f49c270SDavid S. Miller 462f1ecd5d9SDamian Lukowski icsk->icsk_backoff--; 463fcdd1cf4SEric Dumazet icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) : 464fcdd1cf4SEric Dumazet TCP_TIMEOUT_INIT; 465fcdd1cf4SEric Dumazet icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX); 466f1ecd5d9SDamian Lukowski 467f1ecd5d9SDamian Lukowski skb = tcp_write_queue_head(sk); 468f1ecd5d9SDamian Lukowski BUG_ON(!skb); 469f1ecd5d9SDamian Lukowski 4707faee5c0SEric Dumazet remaining = icsk->icsk_rto - 4717faee5c0SEric Dumazet min(icsk->icsk_rto, 4727faee5c0SEric Dumazet tcp_time_stamp - tcp_skb_timestamp(skb)); 473f1ecd5d9SDamian Lukowski 474f1ecd5d9SDamian Lukowski if (remaining) { 475f1ecd5d9SDamian Lukowski inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 476f1ecd5d9SDamian Lukowski remaining, TCP_RTO_MAX); 477f1ecd5d9SDamian Lukowski } else { 478f1ecd5d9SDamian Lukowski /* RTO revert clocked out retransmission. 479f1ecd5d9SDamian Lukowski * Will retransmit now */ 480f1ecd5d9SDamian Lukowski tcp_retransmit_timer(sk); 481f1ecd5d9SDamian Lukowski } 482f1ecd5d9SDamian Lukowski 4831da177e4SLinus Torvalds break; 4841da177e4SLinus Torvalds case ICMP_TIME_EXCEEDED: 4851da177e4SLinus Torvalds err = EHOSTUNREACH; 4861da177e4SLinus Torvalds break; 4871da177e4SLinus Torvalds default: 4881da177e4SLinus Torvalds goto out; 4891da177e4SLinus Torvalds } 4901da177e4SLinus Torvalds 4911da177e4SLinus Torvalds switch (sk->sk_state) { 4921da177e4SLinus Torvalds case TCP_SYN_SENT: 4930a672f74SYuchung Cheng case TCP_SYN_RECV: 4940a672f74SYuchung Cheng /* Only in fast or simultaneous open. If a fast open socket is 4950a672f74SYuchung Cheng * is already accepted it is treated as a connected one below. 4961da177e4SLinus Torvalds */ 49751456b29SIan Morris if (fastopen && !fastopen->sk) 4980a672f74SYuchung Cheng break; 4990a672f74SYuchung Cheng 5001da177e4SLinus Torvalds if (!sock_owned_by_user(sk)) { 5011da177e4SLinus Torvalds sk->sk_err = err; 5021da177e4SLinus Torvalds 5031da177e4SLinus Torvalds sk->sk_error_report(sk); 5041da177e4SLinus Torvalds 5051da177e4SLinus Torvalds tcp_done(sk); 5061da177e4SLinus Torvalds } else { 5071da177e4SLinus Torvalds sk->sk_err_soft = err; 5081da177e4SLinus Torvalds } 5091da177e4SLinus Torvalds goto out; 5101da177e4SLinus Torvalds } 5111da177e4SLinus Torvalds 5121da177e4SLinus Torvalds /* If we've already connected we will keep trying 5131da177e4SLinus Torvalds * until we time out, or the user gives up. 5141da177e4SLinus Torvalds * 5151da177e4SLinus Torvalds * rfc1122 4.2.3.9 allows to consider as hard errors 5161da177e4SLinus Torvalds * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too, 5171da177e4SLinus Torvalds * but it is obsoleted by pmtu discovery). 5181da177e4SLinus Torvalds * 5191da177e4SLinus Torvalds * Note, that in modern internet, where routing is unreliable 5201da177e4SLinus Torvalds * and in each dark corner broken firewalls sit, sending random 5211da177e4SLinus Torvalds * errors ordered by their masters even this two messages finally lose 5221da177e4SLinus Torvalds * their original sense (even Linux sends invalid PORT_UNREACHs) 5231da177e4SLinus Torvalds * 5241da177e4SLinus Torvalds * Now we are in compliance with RFCs. 5251da177e4SLinus Torvalds * --ANK (980905) 5261da177e4SLinus Torvalds */ 5271da177e4SLinus Torvalds 5281da177e4SLinus Torvalds inet = inet_sk(sk); 5291da177e4SLinus Torvalds if (!sock_owned_by_user(sk) && inet->recverr) { 5301da177e4SLinus Torvalds sk->sk_err = err; 5311da177e4SLinus Torvalds sk->sk_error_report(sk); 5321da177e4SLinus Torvalds } else { /* Only an error on timeout */ 5331da177e4SLinus Torvalds sk->sk_err_soft = err; 5341da177e4SLinus Torvalds } 5351da177e4SLinus Torvalds 5361da177e4SLinus Torvalds out: 5371da177e4SLinus Torvalds bh_unlock_sock(sk); 5381da177e4SLinus Torvalds sock_put(sk); 5391da177e4SLinus Torvalds } 5401da177e4SLinus Torvalds 54128850dc7SDaniel Borkmann void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr) 5421da177e4SLinus Torvalds { 543aa8223c7SArnaldo Carvalho de Melo struct tcphdr *th = tcp_hdr(skb); 5441da177e4SLinus Torvalds 54584fa7933SPatrick McHardy if (skb->ip_summed == CHECKSUM_PARTIAL) { 546419f9f89SHerbert Xu th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0); 547663ead3bSHerbert Xu skb->csum_start = skb_transport_header(skb) - skb->head; 548ff1dcadbSAl Viro skb->csum_offset = offsetof(struct tcphdr, check); 5491da177e4SLinus Torvalds } else { 550419f9f89SHerbert Xu th->check = tcp_v4_check(skb->len, saddr, daddr, 55107f0757aSJoe Perches csum_partial(th, 5521da177e4SLinus Torvalds th->doff << 2, 5531da177e4SLinus Torvalds skb->csum)); 5541da177e4SLinus Torvalds } 5551da177e4SLinus Torvalds } 5561da177e4SLinus Torvalds 557419f9f89SHerbert Xu /* This routine computes an IPv4 TCP checksum. */ 558bb296246SHerbert Xu void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) 559419f9f89SHerbert Xu { 560cf533ea5SEric Dumazet const struct inet_sock *inet = inet_sk(sk); 561419f9f89SHerbert Xu 562419f9f89SHerbert Xu __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr); 563419f9f89SHerbert Xu } 5644bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_send_check); 565419f9f89SHerbert Xu 5661da177e4SLinus Torvalds /* 5671da177e4SLinus Torvalds * This routine will send an RST to the other tcp. 5681da177e4SLinus Torvalds * 5691da177e4SLinus Torvalds * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.) 5701da177e4SLinus Torvalds * for reset. 5711da177e4SLinus Torvalds * Answer: if a packet caused RST, it is not for a socket 5721da177e4SLinus Torvalds * existing in our system, if it is matched to a socket, 5731da177e4SLinus Torvalds * it is just duplicate segment or bug in other side's TCP. 5741da177e4SLinus Torvalds * So that we build reply only basing on parameters 5751da177e4SLinus Torvalds * arrived with segment. 5761da177e4SLinus Torvalds * Exception: precedence violation. We do not implement it in any case. 5771da177e4SLinus Torvalds */ 5781da177e4SLinus Torvalds 579cfb6eeb4SYOSHIFUJI Hideaki static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) 5801da177e4SLinus Torvalds { 581cf533ea5SEric Dumazet const struct tcphdr *th = tcp_hdr(skb); 582cfb6eeb4SYOSHIFUJI Hideaki struct { 583cfb6eeb4SYOSHIFUJI Hideaki struct tcphdr th; 584cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 585714e85beSAl Viro __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)]; 586cfb6eeb4SYOSHIFUJI Hideaki #endif 587cfb6eeb4SYOSHIFUJI Hideaki } rep; 5881da177e4SLinus Torvalds struct ip_reply_arg arg; 589cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 590cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *key; 591658ddaafSShawn Lu const __u8 *hash_location = NULL; 592658ddaafSShawn Lu unsigned char newhash[16]; 593658ddaafSShawn Lu int genhash; 594658ddaafSShawn Lu struct sock *sk1 = NULL; 595cfb6eeb4SYOSHIFUJI Hideaki #endif 596a86b1e30SPavel Emelyanov struct net *net; 5971da177e4SLinus Torvalds 5981da177e4SLinus Torvalds /* Never send a reset in response to a reset. */ 5991da177e4SLinus Torvalds if (th->rst) 6001da177e4SLinus Torvalds return; 6011da177e4SLinus Torvalds 602c3658e8dSEric Dumazet /* If sk not NULL, it means we did a successful lookup and incoming 603c3658e8dSEric Dumazet * route had to be correct. prequeue might have dropped our dst. 604c3658e8dSEric Dumazet */ 605c3658e8dSEric Dumazet if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL) 6061da177e4SLinus Torvalds return; 6071da177e4SLinus Torvalds 6081da177e4SLinus Torvalds /* Swap the send and the receive. */ 609cfb6eeb4SYOSHIFUJI Hideaki memset(&rep, 0, sizeof(rep)); 610cfb6eeb4SYOSHIFUJI Hideaki rep.th.dest = th->source; 611cfb6eeb4SYOSHIFUJI Hideaki rep.th.source = th->dest; 612cfb6eeb4SYOSHIFUJI Hideaki rep.th.doff = sizeof(struct tcphdr) / 4; 613cfb6eeb4SYOSHIFUJI Hideaki rep.th.rst = 1; 6141da177e4SLinus Torvalds 6151da177e4SLinus Torvalds if (th->ack) { 616cfb6eeb4SYOSHIFUJI Hideaki rep.th.seq = th->ack_seq; 6171da177e4SLinus Torvalds } else { 618cfb6eeb4SYOSHIFUJI Hideaki rep.th.ack = 1; 619cfb6eeb4SYOSHIFUJI Hideaki rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin + 6201da177e4SLinus Torvalds skb->len - (th->doff << 2)); 6211da177e4SLinus Torvalds } 6221da177e4SLinus Torvalds 6237174259eSArnaldo Carvalho de Melo memset(&arg, 0, sizeof(arg)); 624cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_base = (unsigned char *)&rep; 625cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_len = sizeof(rep.th); 626cfb6eeb4SYOSHIFUJI Hideaki 6270f85feaeSEric Dumazet net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); 628cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 629658ddaafSShawn Lu hash_location = tcp_parse_md5sig_option(th); 630658ddaafSShawn Lu if (!sk && hash_location) { 631658ddaafSShawn Lu /* 632658ddaafSShawn Lu * active side is lost. Try to find listening socket through 633658ddaafSShawn Lu * source port, and then find md5 key through listening socket. 634658ddaafSShawn Lu * we are not loose security here: 635658ddaafSShawn Lu * Incoming packet is checked with md5 hash with finding key, 636658ddaafSShawn Lu * no RST generated if md5 hash doesn't match. 637658ddaafSShawn Lu */ 6380f85feaeSEric Dumazet sk1 = __inet_lookup_listener(net, 639da5e3630STom Herbert &tcp_hashinfo, ip_hdr(skb)->saddr, 640da5e3630STom Herbert th->source, ip_hdr(skb)->daddr, 641658ddaafSShawn Lu ntohs(th->source), inet_iif(skb)); 642658ddaafSShawn Lu /* don't send rst if it can't find key */ 643658ddaafSShawn Lu if (!sk1) 644658ddaafSShawn Lu return; 645658ddaafSShawn Lu rcu_read_lock(); 646658ddaafSShawn Lu key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *) 647658ddaafSShawn Lu &ip_hdr(skb)->saddr, AF_INET); 648658ddaafSShawn Lu if (!key) 649658ddaafSShawn Lu goto release_sk1; 650658ddaafSShawn Lu 65139f8e58eSEric Dumazet genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb); 652658ddaafSShawn Lu if (genhash || memcmp(hash_location, newhash, 16) != 0) 653658ddaafSShawn Lu goto release_sk1; 654658ddaafSShawn Lu } else { 655658ddaafSShawn Lu key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *) 656658ddaafSShawn Lu &ip_hdr(skb)->saddr, 657a915da9bSEric Dumazet AF_INET) : NULL; 658658ddaafSShawn Lu } 659658ddaafSShawn Lu 660cfb6eeb4SYOSHIFUJI Hideaki if (key) { 661cfb6eeb4SYOSHIFUJI Hideaki rep.opt[0] = htonl((TCPOPT_NOP << 24) | 662cfb6eeb4SYOSHIFUJI Hideaki (TCPOPT_NOP << 16) | 663cfb6eeb4SYOSHIFUJI Hideaki (TCPOPT_MD5SIG << 8) | 664cfb6eeb4SYOSHIFUJI Hideaki TCPOLEN_MD5SIG); 665cfb6eeb4SYOSHIFUJI Hideaki /* Update length and the length the header thinks exists */ 666cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; 667cfb6eeb4SYOSHIFUJI Hideaki rep.th.doff = arg.iov[0].iov_len / 4; 668cfb6eeb4SYOSHIFUJI Hideaki 66949a72dfbSAdam Langley tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1], 67078e645cbSIlpo Järvinen key, ip_hdr(skb)->saddr, 67178e645cbSIlpo Järvinen ip_hdr(skb)->daddr, &rep.th); 672cfb6eeb4SYOSHIFUJI Hideaki } 673cfb6eeb4SYOSHIFUJI Hideaki #endif 674eddc9ec5SArnaldo Carvalho de Melo arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, 675eddc9ec5SArnaldo Carvalho de Melo ip_hdr(skb)->saddr, /* XXX */ 67652cd5750SIlpo Järvinen arg.iov[0].iov_len, IPPROTO_TCP, 0); 6771da177e4SLinus Torvalds arg.csumoffset = offsetof(struct tcphdr, check) / 2; 67888ef4a5aSKOVACS Krisztian arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; 679e2446eaaSShawn Lu /* When socket is gone, all binding information is lost. 6804c675258SAlexey Kuznetsov * routing might fail in this case. No choice here, if we choose to force 6814c675258SAlexey Kuznetsov * input interface, we will misroute in case of asymmetric route. 682e2446eaaSShawn Lu */ 6834c675258SAlexey Kuznetsov if (sk) 6844c675258SAlexey Kuznetsov arg.bound_dev_if = sk->sk_bound_dev_if; 6851da177e4SLinus Torvalds 68666b13d99SEric Dumazet arg.tos = ip_hdr(skb)->tos; 687bdbbb852SEric Dumazet ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), 688bdbbb852SEric Dumazet skb, &TCP_SKB_CB(skb)->header.h4.opt, 68924a2d43dSEric Dumazet ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, 69024a2d43dSEric Dumazet &arg, arg.iov[0].iov_len); 6911da177e4SLinus Torvalds 69263231bddSPavel Emelyanov TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 69363231bddSPavel Emelyanov TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); 694658ddaafSShawn Lu 695658ddaafSShawn Lu #ifdef CONFIG_TCP_MD5SIG 696658ddaafSShawn Lu release_sk1: 697658ddaafSShawn Lu if (sk1) { 698658ddaafSShawn Lu rcu_read_unlock(); 699658ddaafSShawn Lu sock_put(sk1); 700658ddaafSShawn Lu } 701658ddaafSShawn Lu #endif 7021da177e4SLinus Torvalds } 7031da177e4SLinus Torvalds 7041da177e4SLinus Torvalds /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states 7051da177e4SLinus Torvalds outside socket context is ugly, certainly. What can I do? 7061da177e4SLinus Torvalds */ 7071da177e4SLinus Torvalds 7089501f972SYOSHIFUJI Hideaki static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, 709ee684b6fSAndrey Vagin u32 win, u32 tsval, u32 tsecr, int oif, 71088ef4a5aSKOVACS Krisztian struct tcp_md5sig_key *key, 71166b13d99SEric Dumazet int reply_flags, u8 tos) 7121da177e4SLinus Torvalds { 713cf533ea5SEric Dumazet const struct tcphdr *th = tcp_hdr(skb); 7141da177e4SLinus Torvalds struct { 7151da177e4SLinus Torvalds struct tcphdr th; 716714e85beSAl Viro __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2) 717cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 718cfb6eeb4SYOSHIFUJI Hideaki + (TCPOLEN_MD5SIG_ALIGNED >> 2) 719cfb6eeb4SYOSHIFUJI Hideaki #endif 720cfb6eeb4SYOSHIFUJI Hideaki ]; 7211da177e4SLinus Torvalds } rep; 7221da177e4SLinus Torvalds struct ip_reply_arg arg; 723adf30907SEric Dumazet struct net *net = dev_net(skb_dst(skb)->dev); 7241da177e4SLinus Torvalds 7251da177e4SLinus Torvalds memset(&rep.th, 0, sizeof(struct tcphdr)); 7267174259eSArnaldo Carvalho de Melo memset(&arg, 0, sizeof(arg)); 7271da177e4SLinus Torvalds 7281da177e4SLinus Torvalds arg.iov[0].iov_base = (unsigned char *)&rep; 7291da177e4SLinus Torvalds arg.iov[0].iov_len = sizeof(rep.th); 730ee684b6fSAndrey Vagin if (tsecr) { 731cfb6eeb4SYOSHIFUJI Hideaki rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 7321da177e4SLinus Torvalds (TCPOPT_TIMESTAMP << 8) | 7331da177e4SLinus Torvalds TCPOLEN_TIMESTAMP); 734ee684b6fSAndrey Vagin rep.opt[1] = htonl(tsval); 735ee684b6fSAndrey Vagin rep.opt[2] = htonl(tsecr); 736cb48cfe8SCraig Schlenter arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED; 7371da177e4SLinus Torvalds } 7381da177e4SLinus Torvalds 7391da177e4SLinus Torvalds /* Swap the send and the receive. */ 7401da177e4SLinus Torvalds rep.th.dest = th->source; 7411da177e4SLinus Torvalds rep.th.source = th->dest; 7421da177e4SLinus Torvalds rep.th.doff = arg.iov[0].iov_len / 4; 7431da177e4SLinus Torvalds rep.th.seq = htonl(seq); 7441da177e4SLinus Torvalds rep.th.ack_seq = htonl(ack); 7451da177e4SLinus Torvalds rep.th.ack = 1; 7461da177e4SLinus Torvalds rep.th.window = htons(win); 7471da177e4SLinus Torvalds 748cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 749cfb6eeb4SYOSHIFUJI Hideaki if (key) { 750ee684b6fSAndrey Vagin int offset = (tsecr) ? 3 : 0; 751cfb6eeb4SYOSHIFUJI Hideaki 752cfb6eeb4SYOSHIFUJI Hideaki rep.opt[offset++] = htonl((TCPOPT_NOP << 24) | 753cfb6eeb4SYOSHIFUJI Hideaki (TCPOPT_NOP << 16) | 754cfb6eeb4SYOSHIFUJI Hideaki (TCPOPT_MD5SIG << 8) | 755cfb6eeb4SYOSHIFUJI Hideaki TCPOLEN_MD5SIG); 756cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; 757cfb6eeb4SYOSHIFUJI Hideaki rep.th.doff = arg.iov[0].iov_len/4; 758cfb6eeb4SYOSHIFUJI Hideaki 75949a72dfbSAdam Langley tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset], 76090b7e112SAdam Langley key, ip_hdr(skb)->saddr, 76190b7e112SAdam Langley ip_hdr(skb)->daddr, &rep.th); 762cfb6eeb4SYOSHIFUJI Hideaki } 763cfb6eeb4SYOSHIFUJI Hideaki #endif 76488ef4a5aSKOVACS Krisztian arg.flags = reply_flags; 765eddc9ec5SArnaldo Carvalho de Melo arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, 766eddc9ec5SArnaldo Carvalho de Melo ip_hdr(skb)->saddr, /* XXX */ 7671da177e4SLinus Torvalds arg.iov[0].iov_len, IPPROTO_TCP, 0); 7681da177e4SLinus Torvalds arg.csumoffset = offsetof(struct tcphdr, check) / 2; 7699501f972SYOSHIFUJI Hideaki if (oif) 7709501f972SYOSHIFUJI Hideaki arg.bound_dev_if = oif; 77166b13d99SEric Dumazet arg.tos = tos; 772bdbbb852SEric Dumazet ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), 773bdbbb852SEric Dumazet skb, &TCP_SKB_CB(skb)->header.h4.opt, 77424a2d43dSEric Dumazet ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, 77524a2d43dSEric Dumazet &arg, arg.iov[0].iov_len); 7761da177e4SLinus Torvalds 77763231bddSPavel Emelyanov TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 7781da177e4SLinus Torvalds } 7791da177e4SLinus Torvalds 7801da177e4SLinus Torvalds static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) 7811da177e4SLinus Torvalds { 7828feaf0c0SArnaldo Carvalho de Melo struct inet_timewait_sock *tw = inet_twsk(sk); 783cfb6eeb4SYOSHIFUJI Hideaki struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 7841da177e4SLinus Torvalds 7859501f972SYOSHIFUJI Hideaki tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 7867174259eSArnaldo Carvalho de Melo tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 787ee684b6fSAndrey Vagin tcp_time_stamp + tcptw->tw_ts_offset, 7889501f972SYOSHIFUJI Hideaki tcptw->tw_ts_recent, 7899501f972SYOSHIFUJI Hideaki tw->tw_bound_dev_if, 79088ef4a5aSKOVACS Krisztian tcp_twsk_md5_key(tcptw), 79166b13d99SEric Dumazet tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0, 79266b13d99SEric Dumazet tw->tw_tos 7939501f972SYOSHIFUJI Hideaki ); 7941da177e4SLinus Torvalds 7958feaf0c0SArnaldo Carvalho de Melo inet_twsk_put(tw); 7961da177e4SLinus Torvalds } 7971da177e4SLinus Torvalds 7986edafaafSGui Jianfeng static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, 7997174259eSArnaldo Carvalho de Melo struct request_sock *req) 8001da177e4SLinus Torvalds { 801168a8f58SJerry Chu /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 802168a8f58SJerry Chu * sk->sk_state == TCP_SYN_RECV -> for Fast Open. 803168a8f58SJerry Chu */ 804168a8f58SJerry Chu tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ? 805168a8f58SJerry Chu tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 806168a8f58SJerry Chu tcp_rsk(req)->rcv_nxt, req->rcv_wnd, 807ee684b6fSAndrey Vagin tcp_time_stamp, 8089501f972SYOSHIFUJI Hideaki req->ts_recent, 8099501f972SYOSHIFUJI Hideaki 0, 810a915da9bSEric Dumazet tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, 811a915da9bSEric Dumazet AF_INET), 81266b13d99SEric Dumazet inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, 81366b13d99SEric Dumazet ip_hdr(skb)->tos); 8141da177e4SLinus Torvalds } 8151da177e4SLinus Torvalds 8161da177e4SLinus Torvalds /* 8179bf1d83eSKris Katterjohn * Send a SYN-ACK after having received a SYN. 81860236fddSArnaldo Carvalho de Melo * This still operates on a request_sock only, not on a big 8191da177e4SLinus Torvalds * socket. 8201da177e4SLinus Torvalds */ 82172659eccSOctavian Purdila static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, 822d6274bd8SOctavian Purdila struct flowi *fl, 823e6b4d113SWilliam Allen Simpson struct request_sock *req, 824843f4a55SYuchung Cheng u16 queue_mapping, 825843f4a55SYuchung Cheng struct tcp_fastopen_cookie *foc) 8261da177e4SLinus Torvalds { 8272e6599cbSArnaldo Carvalho de Melo const struct inet_request_sock *ireq = inet_rsk(req); 8286bd023f3SDavid S. Miller struct flowi4 fl4; 8291da177e4SLinus Torvalds int err = -1; 8301da177e4SLinus Torvalds struct sk_buff *skb; 8311da177e4SLinus Torvalds 8321da177e4SLinus Torvalds /* First, grab a route. */ 833ba3f7f04SDavid S. Miller if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) 834fd80eb94SDenis V. Lunev return -1; 8351da177e4SLinus Torvalds 836843f4a55SYuchung Cheng skb = tcp_make_synack(sk, dst, req, foc); 8371da177e4SLinus Torvalds 8381da177e4SLinus Torvalds if (skb) { 839634fb979SEric Dumazet __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); 8401da177e4SLinus Torvalds 841fff32699SEric Dumazet skb_set_queue_mapping(skb, queue_mapping); 842634fb979SEric Dumazet err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, 843634fb979SEric Dumazet ireq->ir_rmt_addr, 8442e6599cbSArnaldo Carvalho de Melo ireq->opt); 845b9df3cb8SGerrit Renker err = net_xmit_eval(err); 8461da177e4SLinus Torvalds } 8471da177e4SLinus Torvalds 8481da177e4SLinus Torvalds return err; 8491da177e4SLinus Torvalds } 8501da177e4SLinus Torvalds 8511da177e4SLinus Torvalds /* 85260236fddSArnaldo Carvalho de Melo * IPv4 request_sock destructor. 8531da177e4SLinus Torvalds */ 85460236fddSArnaldo Carvalho de Melo static void tcp_v4_reqsk_destructor(struct request_sock *req) 8551da177e4SLinus Torvalds { 8562e6599cbSArnaldo Carvalho de Melo kfree(inet_rsk(req)->opt); 8571da177e4SLinus Torvalds } 8581da177e4SLinus Torvalds 8591da177e4SLinus Torvalds 860cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 861cfb6eeb4SYOSHIFUJI Hideaki /* 862cfb6eeb4SYOSHIFUJI Hideaki * RFC2385 MD5 checksumming requires a mapping of 863cfb6eeb4SYOSHIFUJI Hideaki * IP address->MD5 Key. 864cfb6eeb4SYOSHIFUJI Hideaki * We need to maintain these in the sk structure. 865cfb6eeb4SYOSHIFUJI Hideaki */ 866cfb6eeb4SYOSHIFUJI Hideaki 867cfb6eeb4SYOSHIFUJI Hideaki /* Find the Key structure for an address. */ 868a915da9bSEric Dumazet struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, 869a915da9bSEric Dumazet const union tcp_md5_addr *addr, 870a915da9bSEric Dumazet int family) 871cfb6eeb4SYOSHIFUJI Hideaki { 872fd3a154aSEric Dumazet const struct tcp_sock *tp = tcp_sk(sk); 873a915da9bSEric Dumazet struct tcp_md5sig_key *key; 874a915da9bSEric Dumazet unsigned int size = sizeof(struct in_addr); 875fd3a154aSEric Dumazet const struct tcp_md5sig_info *md5sig; 876cfb6eeb4SYOSHIFUJI Hideaki 877a8afca03SEric Dumazet /* caller either holds rcu_read_lock() or socket lock */ 878a8afca03SEric Dumazet md5sig = rcu_dereference_check(tp->md5sig_info, 879b4fb05eaSEric Dumazet sock_owned_by_user(sk) || 880b4fb05eaSEric Dumazet lockdep_is_held(&sk->sk_lock.slock)); 881a8afca03SEric Dumazet if (!md5sig) 882cfb6eeb4SYOSHIFUJI Hideaki return NULL; 883a915da9bSEric Dumazet #if IS_ENABLED(CONFIG_IPV6) 884a915da9bSEric Dumazet if (family == AF_INET6) 885a915da9bSEric Dumazet size = sizeof(struct in6_addr); 886a915da9bSEric Dumazet #endif 887b67bfe0dSSasha Levin hlist_for_each_entry_rcu(key, &md5sig->head, node) { 888a915da9bSEric Dumazet if (key->family != family) 889a915da9bSEric Dumazet continue; 890a915da9bSEric Dumazet if (!memcmp(&key->addr, addr, size)) 891a915da9bSEric Dumazet return key; 892cfb6eeb4SYOSHIFUJI Hideaki } 893cfb6eeb4SYOSHIFUJI Hideaki return NULL; 894cfb6eeb4SYOSHIFUJI Hideaki } 895a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_lookup); 896cfb6eeb4SYOSHIFUJI Hideaki 897cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, 898fd3a154aSEric Dumazet const struct sock *addr_sk) 899cfb6eeb4SYOSHIFUJI Hideaki { 900b52e6921SEric Dumazet const union tcp_md5_addr *addr; 901a915da9bSEric Dumazet 902b52e6921SEric Dumazet addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr; 903a915da9bSEric Dumazet return tcp_md5_do_lookup(sk, addr, AF_INET); 904cfb6eeb4SYOSHIFUJI Hideaki } 905cfb6eeb4SYOSHIFUJI Hideaki EXPORT_SYMBOL(tcp_v4_md5_lookup); 906cfb6eeb4SYOSHIFUJI Hideaki 907cfb6eeb4SYOSHIFUJI Hideaki /* This can be called on a newly created socket, from other files */ 908a915da9bSEric Dumazet int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, 909a915da9bSEric Dumazet int family, const u8 *newkey, u8 newkeylen, gfp_t gfp) 910cfb6eeb4SYOSHIFUJI Hideaki { 911cfb6eeb4SYOSHIFUJI Hideaki /* Add Key to the list */ 912b0a713e9SMatthias M. Dellweg struct tcp_md5sig_key *key; 913cfb6eeb4SYOSHIFUJI Hideaki struct tcp_sock *tp = tcp_sk(sk); 914f6685938SArnaldo Carvalho de Melo struct tcp_md5sig_info *md5sig; 915f6685938SArnaldo Carvalho de Melo 916c0353c7bSAydin Arik key = tcp_md5_do_lookup(sk, addr, family); 917a915da9bSEric Dumazet if (key) { 918a915da9bSEric Dumazet /* Pre-existing entry - just update that one. */ 919a915da9bSEric Dumazet memcpy(key->key, newkey, newkeylen); 920a915da9bSEric Dumazet key->keylen = newkeylen; 921a915da9bSEric Dumazet return 0; 922cfb6eeb4SYOSHIFUJI Hideaki } 923260fcbebSYan, Zheng 924a8afca03SEric Dumazet md5sig = rcu_dereference_protected(tp->md5sig_info, 925a8afca03SEric Dumazet sock_owned_by_user(sk)); 926a915da9bSEric Dumazet if (!md5sig) { 927a915da9bSEric Dumazet md5sig = kmalloc(sizeof(*md5sig), gfp); 928a915da9bSEric Dumazet if (!md5sig) 929a915da9bSEric Dumazet return -ENOMEM; 930a915da9bSEric Dumazet 931a915da9bSEric Dumazet sk_nocaps_add(sk, NETIF_F_GSO_MASK); 932a915da9bSEric Dumazet INIT_HLIST_HEAD(&md5sig->head); 933a8afca03SEric Dumazet rcu_assign_pointer(tp->md5sig_info, md5sig); 934a915da9bSEric Dumazet } 935a915da9bSEric Dumazet 9365f3d9cb2SEric Dumazet key = sock_kmalloc(sk, sizeof(*key), gfp); 937a915da9bSEric Dumazet if (!key) 938a915da9bSEric Dumazet return -ENOMEM; 93971cea17eSEric Dumazet if (!tcp_alloc_md5sig_pool()) { 9405f3d9cb2SEric Dumazet sock_kfree_s(sk, key, sizeof(*key)); 941cfb6eeb4SYOSHIFUJI Hideaki return -ENOMEM; 942cfb6eeb4SYOSHIFUJI Hideaki } 943f6685938SArnaldo Carvalho de Melo 944a915da9bSEric Dumazet memcpy(key->key, newkey, newkeylen); 945a915da9bSEric Dumazet key->keylen = newkeylen; 946a915da9bSEric Dumazet key->family = family; 947a915da9bSEric Dumazet memcpy(&key->addr, addr, 948a915da9bSEric Dumazet (family == AF_INET6) ? sizeof(struct in6_addr) : 949a915da9bSEric Dumazet sizeof(struct in_addr)); 950a915da9bSEric Dumazet hlist_add_head_rcu(&key->node, &md5sig->head); 951cfb6eeb4SYOSHIFUJI Hideaki return 0; 952cfb6eeb4SYOSHIFUJI Hideaki } 953a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_add); 954cfb6eeb4SYOSHIFUJI Hideaki 955a915da9bSEric Dumazet int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) 956cfb6eeb4SYOSHIFUJI Hideaki { 957a915da9bSEric Dumazet struct tcp_md5sig_key *key; 958cfb6eeb4SYOSHIFUJI Hideaki 959c0353c7bSAydin Arik key = tcp_md5_do_lookup(sk, addr, family); 960a915da9bSEric Dumazet if (!key) 961cfb6eeb4SYOSHIFUJI Hideaki return -ENOENT; 962a915da9bSEric Dumazet hlist_del_rcu(&key->node); 9635f3d9cb2SEric Dumazet atomic_sub(sizeof(*key), &sk->sk_omem_alloc); 964a915da9bSEric Dumazet kfree_rcu(key, rcu); 965a915da9bSEric Dumazet return 0; 966cfb6eeb4SYOSHIFUJI Hideaki } 967a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_del); 968cfb6eeb4SYOSHIFUJI Hideaki 969e0683e70Sstephen hemminger static void tcp_clear_md5_list(struct sock *sk) 970cfb6eeb4SYOSHIFUJI Hideaki { 971cfb6eeb4SYOSHIFUJI Hideaki struct tcp_sock *tp = tcp_sk(sk); 972a915da9bSEric Dumazet struct tcp_md5sig_key *key; 973b67bfe0dSSasha Levin struct hlist_node *n; 974a8afca03SEric Dumazet struct tcp_md5sig_info *md5sig; 975cfb6eeb4SYOSHIFUJI Hideaki 976a8afca03SEric Dumazet md5sig = rcu_dereference_protected(tp->md5sig_info, 1); 977a8afca03SEric Dumazet 978b67bfe0dSSasha Levin hlist_for_each_entry_safe(key, n, &md5sig->head, node) { 979a915da9bSEric Dumazet hlist_del_rcu(&key->node); 9805f3d9cb2SEric Dumazet atomic_sub(sizeof(*key), &sk->sk_omem_alloc); 981a915da9bSEric Dumazet kfree_rcu(key, rcu); 982cfb6eeb4SYOSHIFUJI Hideaki } 983cfb6eeb4SYOSHIFUJI Hideaki } 984cfb6eeb4SYOSHIFUJI Hideaki 985cfb6eeb4SYOSHIFUJI Hideaki static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, 986cfb6eeb4SYOSHIFUJI Hideaki int optlen) 987cfb6eeb4SYOSHIFUJI Hideaki { 988cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig cmd; 989cfb6eeb4SYOSHIFUJI Hideaki struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr; 990cfb6eeb4SYOSHIFUJI Hideaki 991cfb6eeb4SYOSHIFUJI Hideaki if (optlen < sizeof(cmd)) 992cfb6eeb4SYOSHIFUJI Hideaki return -EINVAL; 993cfb6eeb4SYOSHIFUJI Hideaki 994cfb6eeb4SYOSHIFUJI Hideaki if (copy_from_user(&cmd, optval, sizeof(cmd))) 995cfb6eeb4SYOSHIFUJI Hideaki return -EFAULT; 996cfb6eeb4SYOSHIFUJI Hideaki 997cfb6eeb4SYOSHIFUJI Hideaki if (sin->sin_family != AF_INET) 998cfb6eeb4SYOSHIFUJI Hideaki return -EINVAL; 999cfb6eeb4SYOSHIFUJI Hideaki 100064a124edSDmitry Popov if (!cmd.tcpm_keylen) 1001a915da9bSEric Dumazet return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, 1002a915da9bSEric Dumazet AF_INET); 1003cfb6eeb4SYOSHIFUJI Hideaki 1004cfb6eeb4SYOSHIFUJI Hideaki if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 1005cfb6eeb4SYOSHIFUJI Hideaki return -EINVAL; 1006cfb6eeb4SYOSHIFUJI Hideaki 1007a915da9bSEric Dumazet return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, 1008a915da9bSEric Dumazet AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, 1009a915da9bSEric Dumazet GFP_KERNEL); 1010cfb6eeb4SYOSHIFUJI Hideaki } 1011cfb6eeb4SYOSHIFUJI Hideaki 101249a72dfbSAdam Langley static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, 101349a72dfbSAdam Langley __be32 daddr, __be32 saddr, int nbytes) 1014cfb6eeb4SYOSHIFUJI Hideaki { 1015cfb6eeb4SYOSHIFUJI Hideaki struct tcp4_pseudohdr *bp; 101649a72dfbSAdam Langley struct scatterlist sg; 1017cfb6eeb4SYOSHIFUJI Hideaki 1018cfb6eeb4SYOSHIFUJI Hideaki bp = &hp->md5_blk.ip4; 1019cfb6eeb4SYOSHIFUJI Hideaki 1020cfb6eeb4SYOSHIFUJI Hideaki /* 102149a72dfbSAdam Langley * 1. the TCP pseudo-header (in the order: source IP address, 1022cfb6eeb4SYOSHIFUJI Hideaki * destination IP address, zero-padded protocol number, and 1023cfb6eeb4SYOSHIFUJI Hideaki * segment length) 1024cfb6eeb4SYOSHIFUJI Hideaki */ 1025cfb6eeb4SYOSHIFUJI Hideaki bp->saddr = saddr; 1026cfb6eeb4SYOSHIFUJI Hideaki bp->daddr = daddr; 1027cfb6eeb4SYOSHIFUJI Hideaki bp->pad = 0; 1028076fb722SYOSHIFUJI Hideaki bp->protocol = IPPROTO_TCP; 102949a72dfbSAdam Langley bp->len = cpu_to_be16(nbytes); 1030c7da57a1SDavid S. Miller 103149a72dfbSAdam Langley sg_init_one(&sg, bp, sizeof(*bp)); 103249a72dfbSAdam Langley return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp)); 103349a72dfbSAdam Langley } 103449a72dfbSAdam Langley 1035a915da9bSEric Dumazet static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 1036318cf7aaSEric Dumazet __be32 daddr, __be32 saddr, const struct tcphdr *th) 103749a72dfbSAdam Langley { 103849a72dfbSAdam Langley struct tcp_md5sig_pool *hp; 103949a72dfbSAdam Langley struct hash_desc *desc; 104049a72dfbSAdam Langley 104149a72dfbSAdam Langley hp = tcp_get_md5sig_pool(); 104249a72dfbSAdam Langley if (!hp) 104349a72dfbSAdam Langley goto clear_hash_noput; 104449a72dfbSAdam Langley desc = &hp->md5_desc; 104549a72dfbSAdam Langley 104649a72dfbSAdam Langley if (crypto_hash_init(desc)) 104749a72dfbSAdam Langley goto clear_hash; 104849a72dfbSAdam Langley if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2)) 104949a72dfbSAdam Langley goto clear_hash; 105049a72dfbSAdam Langley if (tcp_md5_hash_header(hp, th)) 105149a72dfbSAdam Langley goto clear_hash; 105249a72dfbSAdam Langley if (tcp_md5_hash_key(hp, key)) 105349a72dfbSAdam Langley goto clear_hash; 105449a72dfbSAdam Langley if (crypto_hash_final(desc, md5_hash)) 1055cfb6eeb4SYOSHIFUJI Hideaki goto clear_hash; 1056cfb6eeb4SYOSHIFUJI Hideaki 1057cfb6eeb4SYOSHIFUJI Hideaki tcp_put_md5sig_pool(); 1058cfb6eeb4SYOSHIFUJI Hideaki return 0; 105949a72dfbSAdam Langley 1060cfb6eeb4SYOSHIFUJI Hideaki clear_hash: 1061cfb6eeb4SYOSHIFUJI Hideaki tcp_put_md5sig_pool(); 1062cfb6eeb4SYOSHIFUJI Hideaki clear_hash_noput: 1063cfb6eeb4SYOSHIFUJI Hideaki memset(md5_hash, 0, 16); 106449a72dfbSAdam Langley return 1; 1065cfb6eeb4SYOSHIFUJI Hideaki } 1066cfb6eeb4SYOSHIFUJI Hideaki 106739f8e58eSEric Dumazet int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, 106839f8e58eSEric Dumazet const struct sock *sk, 1069318cf7aaSEric Dumazet const struct sk_buff *skb) 1070cfb6eeb4SYOSHIFUJI Hideaki { 107149a72dfbSAdam Langley struct tcp_md5sig_pool *hp; 107249a72dfbSAdam Langley struct hash_desc *desc; 1073318cf7aaSEric Dumazet const struct tcphdr *th = tcp_hdr(skb); 1074cfb6eeb4SYOSHIFUJI Hideaki __be32 saddr, daddr; 1075cfb6eeb4SYOSHIFUJI Hideaki 107639f8e58eSEric Dumazet if (sk) { /* valid for establish/request sockets */ 107739f8e58eSEric Dumazet saddr = sk->sk_rcv_saddr; 107839f8e58eSEric Dumazet daddr = sk->sk_daddr; 1079cfb6eeb4SYOSHIFUJI Hideaki } else { 108049a72dfbSAdam Langley const struct iphdr *iph = ip_hdr(skb); 108149a72dfbSAdam Langley saddr = iph->saddr; 108249a72dfbSAdam Langley daddr = iph->daddr; 1083cfb6eeb4SYOSHIFUJI Hideaki } 1084cfb6eeb4SYOSHIFUJI Hideaki 108549a72dfbSAdam Langley hp = tcp_get_md5sig_pool(); 108649a72dfbSAdam Langley if (!hp) 108749a72dfbSAdam Langley goto clear_hash_noput; 108849a72dfbSAdam Langley desc = &hp->md5_desc; 108949a72dfbSAdam Langley 109049a72dfbSAdam Langley if (crypto_hash_init(desc)) 109149a72dfbSAdam Langley goto clear_hash; 109249a72dfbSAdam Langley 109349a72dfbSAdam Langley if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len)) 109449a72dfbSAdam Langley goto clear_hash; 109549a72dfbSAdam Langley if (tcp_md5_hash_header(hp, th)) 109649a72dfbSAdam Langley goto clear_hash; 109749a72dfbSAdam Langley if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) 109849a72dfbSAdam Langley goto clear_hash; 109949a72dfbSAdam Langley if (tcp_md5_hash_key(hp, key)) 110049a72dfbSAdam Langley goto clear_hash; 110149a72dfbSAdam Langley if (crypto_hash_final(desc, md5_hash)) 110249a72dfbSAdam Langley goto clear_hash; 110349a72dfbSAdam Langley 110449a72dfbSAdam Langley tcp_put_md5sig_pool(); 110549a72dfbSAdam Langley return 0; 110649a72dfbSAdam Langley 110749a72dfbSAdam Langley clear_hash: 110849a72dfbSAdam Langley tcp_put_md5sig_pool(); 110949a72dfbSAdam Langley clear_hash_noput: 111049a72dfbSAdam Langley memset(md5_hash, 0, 16); 111149a72dfbSAdam Langley return 1; 111249a72dfbSAdam Langley } 111349a72dfbSAdam Langley EXPORT_SYMBOL(tcp_v4_md5_hash_skb); 1114cfb6eeb4SYOSHIFUJI Hideaki 1115ff74e23fSEric Dumazet /* Called with rcu_read_lock() */ 1116ff74e23fSEric Dumazet static bool tcp_v4_inbound_md5_hash(struct sock *sk, 11179ea88a15SDmitry Popov const struct sk_buff *skb) 1118cfb6eeb4SYOSHIFUJI Hideaki { 1119cfb6eeb4SYOSHIFUJI Hideaki /* 1120cfb6eeb4SYOSHIFUJI Hideaki * This gets called for each TCP segment that arrives 1121cfb6eeb4SYOSHIFUJI Hideaki * so we want to be efficient. 1122cfb6eeb4SYOSHIFUJI Hideaki * We have 3 drop cases: 1123cfb6eeb4SYOSHIFUJI Hideaki * o No MD5 hash and one expected. 1124cfb6eeb4SYOSHIFUJI Hideaki * o MD5 hash and we're not expecting one. 1125cfb6eeb4SYOSHIFUJI Hideaki * o MD5 hash and its wrong. 1126cfb6eeb4SYOSHIFUJI Hideaki */ 1127cf533ea5SEric Dumazet const __u8 *hash_location = NULL; 1128cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *hash_expected; 1129eddc9ec5SArnaldo Carvalho de Melo const struct iphdr *iph = ip_hdr(skb); 1130cf533ea5SEric Dumazet const struct tcphdr *th = tcp_hdr(skb); 1131cfb6eeb4SYOSHIFUJI Hideaki int genhash; 1132cfb6eeb4SYOSHIFUJI Hideaki unsigned char newhash[16]; 1133cfb6eeb4SYOSHIFUJI Hideaki 1134a915da9bSEric Dumazet hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr, 1135a915da9bSEric Dumazet AF_INET); 11367d5d5525SYOSHIFUJI Hideaki hash_location = tcp_parse_md5sig_option(th); 1137cfb6eeb4SYOSHIFUJI Hideaki 1138cfb6eeb4SYOSHIFUJI Hideaki /* We've parsed the options - do we have a hash? */ 1139cfb6eeb4SYOSHIFUJI Hideaki if (!hash_expected && !hash_location) 1140a2a385d6SEric Dumazet return false; 1141cfb6eeb4SYOSHIFUJI Hideaki 1142cfb6eeb4SYOSHIFUJI Hideaki if (hash_expected && !hash_location) { 1143785957d3SDavid S. Miller NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); 1144a2a385d6SEric Dumazet return true; 1145cfb6eeb4SYOSHIFUJI Hideaki } 1146cfb6eeb4SYOSHIFUJI Hideaki 1147cfb6eeb4SYOSHIFUJI Hideaki if (!hash_expected && hash_location) { 1148785957d3SDavid S. Miller NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); 1149a2a385d6SEric Dumazet return true; 1150cfb6eeb4SYOSHIFUJI Hideaki } 1151cfb6eeb4SYOSHIFUJI Hideaki 1152cfb6eeb4SYOSHIFUJI Hideaki /* Okay, so this is hash_expected and hash_location - 1153cfb6eeb4SYOSHIFUJI Hideaki * so we need to calculate the checksum. 1154cfb6eeb4SYOSHIFUJI Hideaki */ 115549a72dfbSAdam Langley genhash = tcp_v4_md5_hash_skb(newhash, 1156cfb6eeb4SYOSHIFUJI Hideaki hash_expected, 115739f8e58eSEric Dumazet NULL, skb); 1158cfb6eeb4SYOSHIFUJI Hideaki 1159cfb6eeb4SYOSHIFUJI Hideaki if (genhash || memcmp(hash_location, newhash, 16) != 0) { 1160e87cc472SJoe Perches net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n", 1161673d57e7SHarvey Harrison &iph->saddr, ntohs(th->source), 1162673d57e7SHarvey Harrison &iph->daddr, ntohs(th->dest), 1163e87cc472SJoe Perches genhash ? " tcp_v4_calc_md5_hash failed" 1164e87cc472SJoe Perches : ""); 1165a2a385d6SEric Dumazet return true; 1166cfb6eeb4SYOSHIFUJI Hideaki } 1167a2a385d6SEric Dumazet return false; 1168cfb6eeb4SYOSHIFUJI Hideaki } 1169cfb6eeb4SYOSHIFUJI Hideaki #endif 1170cfb6eeb4SYOSHIFUJI Hideaki 117108d2cc3bSEric Dumazet static void tcp_v4_init_req(struct request_sock *req, struct sock *sk_listener, 117216bea70aSOctavian Purdila struct sk_buff *skb) 117316bea70aSOctavian Purdila { 117416bea70aSOctavian Purdila struct inet_request_sock *ireq = inet_rsk(req); 117516bea70aSOctavian Purdila 117608d2cc3bSEric Dumazet sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); 117708d2cc3bSEric Dumazet sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); 117808d2cc3bSEric Dumazet ireq->no_srccheck = inet_sk(sk_listener)->transparent; 117916bea70aSOctavian Purdila ireq->opt = tcp_v4_save_options(skb); 118016bea70aSOctavian Purdila } 118116bea70aSOctavian Purdila 1182d94e0417SOctavian Purdila static struct dst_entry *tcp_v4_route_req(struct sock *sk, struct flowi *fl, 1183d94e0417SOctavian Purdila const struct request_sock *req, 1184d94e0417SOctavian Purdila bool *strict) 1185d94e0417SOctavian Purdila { 1186d94e0417SOctavian Purdila struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req); 1187d94e0417SOctavian Purdila 1188d94e0417SOctavian Purdila if (strict) { 1189d94e0417SOctavian Purdila if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr) 1190d94e0417SOctavian Purdila *strict = true; 1191d94e0417SOctavian Purdila else 1192d94e0417SOctavian Purdila *strict = false; 1193d94e0417SOctavian Purdila } 1194d94e0417SOctavian Purdila 1195d94e0417SOctavian Purdila return dst; 1196d94e0417SOctavian Purdila } 1197d94e0417SOctavian Purdila 119872a3effaSEric Dumazet struct request_sock_ops tcp_request_sock_ops __read_mostly = { 11991da177e4SLinus Torvalds .family = PF_INET, 12002e6599cbSArnaldo Carvalho de Melo .obj_size = sizeof(struct tcp_request_sock), 12015db92c99SOctavian Purdila .rtx_syn_ack = tcp_rtx_synack, 120260236fddSArnaldo Carvalho de Melo .send_ack = tcp_v4_reqsk_send_ack, 120360236fddSArnaldo Carvalho de Melo .destructor = tcp_v4_reqsk_destructor, 12041da177e4SLinus Torvalds .send_reset = tcp_v4_send_reset, 120572659eccSOctavian Purdila .syn_ack_timeout = tcp_syn_ack_timeout, 12061da177e4SLinus Torvalds }; 12071da177e4SLinus Torvalds 1208b2e4b3deSStephen Hemminger static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { 12092aec4a29SOctavian Purdila .mss_clamp = TCP_MSS_DEFAULT, 121016bea70aSOctavian Purdila #ifdef CONFIG_TCP_MD5SIG 1211fd3a154aSEric Dumazet .req_md5_lookup = tcp_v4_md5_lookup, 1212e3afe7b7SJohn Dykstra .calc_md5_hash = tcp_v4_md5_hash_skb, 1213b6332e6cSAndrew Morton #endif 121416bea70aSOctavian Purdila .init_req = tcp_v4_init_req, 1215fb7b37a7SOctavian Purdila #ifdef CONFIG_SYN_COOKIES 1216fb7b37a7SOctavian Purdila .cookie_init_seq = cookie_v4_init_sequence, 1217fb7b37a7SOctavian Purdila #endif 1218d94e0417SOctavian Purdila .route_req = tcp_v4_route_req, 1219936b8bdbSOctavian Purdila .init_seq = tcp_v4_init_sequence, 1220d6274bd8SOctavian Purdila .send_synack = tcp_v4_send_synack, 1221695da14eSOctavian Purdila .queue_hash_add = inet_csk_reqsk_queue_hash_add, 122216bea70aSOctavian Purdila }; 1223cfb6eeb4SYOSHIFUJI Hideaki 12241da177e4SLinus Torvalds int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 12251da177e4SLinus Torvalds { 12261da177e4SLinus Torvalds /* Never answer to SYNs send to broadcast or multicast */ 1227511c3f92SEric Dumazet if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) 12281da177e4SLinus Torvalds goto drop; 12291da177e4SLinus Torvalds 12301fb6f159SOctavian Purdila return tcp_conn_request(&tcp_request_sock_ops, 12311fb6f159SOctavian Purdila &tcp_request_sock_ipv4_ops, sk, skb); 12321da177e4SLinus Torvalds 12331da177e4SLinus Torvalds drop: 1234848bf15fSVijay Subramanian NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 12351da177e4SLinus Torvalds return 0; 12361da177e4SLinus Torvalds } 12374bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_conn_request); 12381da177e4SLinus Torvalds 12391da177e4SLinus Torvalds 12401da177e4SLinus Torvalds /* 12411da177e4SLinus Torvalds * The three way handshake has completed - we got a valid synack - 12421da177e4SLinus Torvalds * now create the new socket. 12431da177e4SLinus Torvalds */ 12441da177e4SLinus Torvalds struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, 124560236fddSArnaldo Carvalho de Melo struct request_sock *req, 12461da177e4SLinus Torvalds struct dst_entry *dst) 12471da177e4SLinus Torvalds { 12482e6599cbSArnaldo Carvalho de Melo struct inet_request_sock *ireq; 12491da177e4SLinus Torvalds struct inet_sock *newinet; 12501da177e4SLinus Torvalds struct tcp_sock *newtp; 12511da177e4SLinus Torvalds struct sock *newsk; 1252cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1253cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *key; 1254cfb6eeb4SYOSHIFUJI Hideaki #endif 1255f6d8bd05SEric Dumazet struct ip_options_rcu *inet_opt; 12561da177e4SLinus Torvalds 12571da177e4SLinus Torvalds if (sk_acceptq_is_full(sk)) 12581da177e4SLinus Torvalds goto exit_overflow; 12591da177e4SLinus Torvalds 12601da177e4SLinus Torvalds newsk = tcp_create_openreq_child(sk, req, skb); 12611da177e4SLinus Torvalds if (!newsk) 1262093d2823SBalazs Scheidler goto exit_nonewsk; 12631da177e4SLinus Torvalds 1264bcd76111SHerbert Xu newsk->sk_gso_type = SKB_GSO_TCPV4; 1265fae6ef87SNeal Cardwell inet_sk_rx_dst_set(newsk, skb); 12661da177e4SLinus Torvalds 12671da177e4SLinus Torvalds newtp = tcp_sk(newsk); 12681da177e4SLinus Torvalds newinet = inet_sk(newsk); 12692e6599cbSArnaldo Carvalho de Melo ireq = inet_rsk(req); 1270d1e559d0SEric Dumazet sk_daddr_set(newsk, ireq->ir_rmt_addr); 1271d1e559d0SEric Dumazet sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); 1272634fb979SEric Dumazet newinet->inet_saddr = ireq->ir_loc_addr; 1273f6d8bd05SEric Dumazet inet_opt = ireq->opt; 1274f6d8bd05SEric Dumazet rcu_assign_pointer(newinet->inet_opt, inet_opt); 12752e6599cbSArnaldo Carvalho de Melo ireq->opt = NULL; 1276463c84b9SArnaldo Carvalho de Melo newinet->mc_index = inet_iif(skb); 1277eddc9ec5SArnaldo Carvalho de Melo newinet->mc_ttl = ip_hdr(skb)->ttl; 12784c507d28SJiri Benc newinet->rcv_tos = ip_hdr(skb)->tos; 1279d83d8461SArnaldo Carvalho de Melo inet_csk(newsk)->icsk_ext_hdr_len = 0; 1280b73c3d0eSTom Herbert inet_set_txhash(newsk); 1281f6d8bd05SEric Dumazet if (inet_opt) 1282f6d8bd05SEric Dumazet inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; 1283c720c7e8SEric Dumazet newinet->inet_id = newtp->write_seq ^ jiffies; 12841da177e4SLinus Torvalds 1285dfd25fffSEric Dumazet if (!dst) { 1286dfd25fffSEric Dumazet dst = inet_csk_route_child_sock(sk, newsk, req); 1287dfd25fffSEric Dumazet if (!dst) 12880e734419SDavid S. Miller goto put_and_exit; 1289dfd25fffSEric Dumazet } else { 1290dfd25fffSEric Dumazet /* syncookie case : see end of cookie_v4_check() */ 1291dfd25fffSEric Dumazet } 12920e734419SDavid S. Miller sk_setup_caps(newsk, dst); 12930e734419SDavid S. Miller 129481164413SDaniel Borkmann tcp_ca_openreq_child(newsk, dst); 129581164413SDaniel Borkmann 12961da177e4SLinus Torvalds tcp_sync_mss(newsk, dst_mtu(dst)); 12970dbaee3bSDavid S. Miller newtp->advmss = dst_metric_advmss(dst); 1298f5fff5dcSTom Quetchenbach if (tcp_sk(sk)->rx_opt.user_mss && 1299f5fff5dcSTom Quetchenbach tcp_sk(sk)->rx_opt.user_mss < newtp->advmss) 1300f5fff5dcSTom Quetchenbach newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; 1301f5fff5dcSTom Quetchenbach 13021da177e4SLinus Torvalds tcp_initialize_rcv_mss(newsk); 13031da177e4SLinus Torvalds 1304cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1305cfb6eeb4SYOSHIFUJI Hideaki /* Copy over the MD5 key from the original socket */ 1306a915da9bSEric Dumazet key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr, 1307a915da9bSEric Dumazet AF_INET); 130800db4124SIan Morris if (key) { 1309cfb6eeb4SYOSHIFUJI Hideaki /* 1310cfb6eeb4SYOSHIFUJI Hideaki * We're using one, so create a matching key 1311cfb6eeb4SYOSHIFUJI Hideaki * on the newsk structure. If we fail to get 1312cfb6eeb4SYOSHIFUJI Hideaki * memory, then we end up not copying the key 1313cfb6eeb4SYOSHIFUJI Hideaki * across. Shucks. 1314cfb6eeb4SYOSHIFUJI Hideaki */ 1315a915da9bSEric Dumazet tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr, 1316a915da9bSEric Dumazet AF_INET, key->key, key->keylen, GFP_ATOMIC); 1317a465419bSEric Dumazet sk_nocaps_add(newsk, NETIF_F_GSO_MASK); 1318cfb6eeb4SYOSHIFUJI Hideaki } 1319cfb6eeb4SYOSHIFUJI Hideaki #endif 1320cfb6eeb4SYOSHIFUJI Hideaki 13210e734419SDavid S. Miller if (__inet_inherit_port(sk, newsk) < 0) 13220e734419SDavid S. Miller goto put_and_exit; 13239327f705SEric Dumazet __inet_hash_nolisten(newsk, NULL); 13241da177e4SLinus Torvalds 13251da177e4SLinus Torvalds return newsk; 13261da177e4SLinus Torvalds 13271da177e4SLinus Torvalds exit_overflow: 1328de0744afSPavel Emelyanov NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1329093d2823SBalazs Scheidler exit_nonewsk: 1330093d2823SBalazs Scheidler dst_release(dst); 13311da177e4SLinus Torvalds exit: 1332de0744afSPavel Emelyanov NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 13331da177e4SLinus Torvalds return NULL; 13340e734419SDavid S. Miller put_and_exit: 1335e337e24dSChristoph Paasch inet_csk_prepare_forced_close(newsk); 1336e337e24dSChristoph Paasch tcp_done(newsk); 13370e734419SDavid S. Miller goto exit; 13381da177e4SLinus Torvalds } 13394bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_syn_recv_sock); 13401da177e4SLinus Torvalds 13411da177e4SLinus Torvalds static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) 13421da177e4SLinus Torvalds { 134352452c54SEric Dumazet const struct tcphdr *th = tcp_hdr(skb); 1344eddc9ec5SArnaldo Carvalho de Melo const struct iphdr *iph = ip_hdr(skb); 134552452c54SEric Dumazet struct request_sock *req; 13461da177e4SLinus Torvalds struct sock *nsk; 134752452c54SEric Dumazet 134852452c54SEric Dumazet req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr); 1349fa76ce73SEric Dumazet if (req) { 1350fa76ce73SEric Dumazet nsk = tcp_check_req(sk, skb, req, false); 1351b357a364SEric Dumazet if (!nsk) 1352fa76ce73SEric Dumazet reqsk_put(req); 1353fa76ce73SEric Dumazet return nsk; 1354fa76ce73SEric Dumazet } 13551da177e4SLinus Torvalds 13563b1e0a65SYOSHIFUJI Hideaki nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr, 1357c67499c0SPavel Emelyanov th->source, iph->daddr, th->dest, inet_iif(skb)); 13581da177e4SLinus Torvalds 13591da177e4SLinus Torvalds if (nsk) { 13601da177e4SLinus Torvalds if (nsk->sk_state != TCP_TIME_WAIT) { 13611da177e4SLinus Torvalds bh_lock_sock(nsk); 13621da177e4SLinus Torvalds return nsk; 13631da177e4SLinus Torvalds } 13649469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(nsk)); 13651da177e4SLinus Torvalds return NULL; 13661da177e4SLinus Torvalds } 13671da177e4SLinus Torvalds 13681da177e4SLinus Torvalds #ifdef CONFIG_SYN_COOKIES 1369af9b4738SFlorian Westphal if (!th->syn) 1370461b74c3SCong Wang sk = cookie_v4_check(sk, skb); 13711da177e4SLinus Torvalds #endif 13721da177e4SLinus Torvalds return sk; 13731da177e4SLinus Torvalds } 13741da177e4SLinus Torvalds 13751da177e4SLinus Torvalds /* The socket must have it's spinlock held when we get 13761da177e4SLinus Torvalds * here. 13771da177e4SLinus Torvalds * 13781da177e4SLinus Torvalds * We have a potential double-lock case here, so even when 13791da177e4SLinus Torvalds * doing backlog processing we use the BH locking scheme. 13801da177e4SLinus Torvalds * This is because we cannot sleep with the original spinlock 13811da177e4SLinus Torvalds * held. 13821da177e4SLinus Torvalds */ 13831da177e4SLinus Torvalds int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) 13841da177e4SLinus Torvalds { 1385cfb6eeb4SYOSHIFUJI Hideaki struct sock *rsk; 1386cfb6eeb4SYOSHIFUJI Hideaki 13871da177e4SLinus Torvalds if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 138892101b3bSDavid S. Miller struct dst_entry *dst = sk->sk_rx_dst; 1389404e0a8bSEric Dumazet 1390404e0a8bSEric Dumazet sock_rps_save_rxhash(sk, skb); 13913d97379aSEric Dumazet sk_mark_napi_id(sk, skb); 1392404e0a8bSEric Dumazet if (dst) { 1393505fbcf0SEric Dumazet if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || 139451456b29SIan Morris !dst->ops->check(dst, 0)) { 139592101b3bSDavid S. Miller dst_release(dst); 139692101b3bSDavid S. Miller sk->sk_rx_dst = NULL; 139792101b3bSDavid S. Miller } 139892101b3bSDavid S. Miller } 1399c995ae22SVijay Subramanian tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len); 14001da177e4SLinus Torvalds return 0; 14011da177e4SLinus Torvalds } 14021da177e4SLinus Torvalds 1403ab6a5bb6SArnaldo Carvalho de Melo if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb)) 14041da177e4SLinus Torvalds goto csum_err; 14051da177e4SLinus Torvalds 14061da177e4SLinus Torvalds if (sk->sk_state == TCP_LISTEN) { 14071da177e4SLinus Torvalds struct sock *nsk = tcp_v4_hnd_req(sk, skb); 14081da177e4SLinus Torvalds if (!nsk) 14091da177e4SLinus Torvalds goto discard; 14101da177e4SLinus Torvalds 14111da177e4SLinus Torvalds if (nsk != sk) { 1412bdeab991STom Herbert sock_rps_save_rxhash(nsk, skb); 14133d97379aSEric Dumazet sk_mark_napi_id(sk, skb); 1414cfb6eeb4SYOSHIFUJI Hideaki if (tcp_child_process(sk, nsk, skb)) { 1415cfb6eeb4SYOSHIFUJI Hideaki rsk = nsk; 14161da177e4SLinus Torvalds goto reset; 1417cfb6eeb4SYOSHIFUJI Hideaki } 14181da177e4SLinus Torvalds return 0; 14191da177e4SLinus Torvalds } 1420ca55158cSEric Dumazet } else 1421bdeab991STom Herbert sock_rps_save_rxhash(sk, skb); 1422ca55158cSEric Dumazet 1423aa8223c7SArnaldo Carvalho de Melo if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { 1424cfb6eeb4SYOSHIFUJI Hideaki rsk = sk; 14251da177e4SLinus Torvalds goto reset; 1426cfb6eeb4SYOSHIFUJI Hideaki } 14271da177e4SLinus Torvalds return 0; 14281da177e4SLinus Torvalds 14291da177e4SLinus Torvalds reset: 1430cfb6eeb4SYOSHIFUJI Hideaki tcp_v4_send_reset(rsk, skb); 14311da177e4SLinus Torvalds discard: 14321da177e4SLinus Torvalds kfree_skb(skb); 14331da177e4SLinus Torvalds /* Be careful here. If this function gets more complicated and 14341da177e4SLinus Torvalds * gcc suffers from register pressure on the x86, sk (in %ebx) 14351da177e4SLinus Torvalds * might be destroyed here. This current version compiles correctly, 14361da177e4SLinus Torvalds * but you have been warned. 14371da177e4SLinus Torvalds */ 14381da177e4SLinus Torvalds return 0; 14391da177e4SLinus Torvalds 14401da177e4SLinus Torvalds csum_err: 14416a5dc9e5SEric Dumazet TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS); 144263231bddSPavel Emelyanov TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); 14431da177e4SLinus Torvalds goto discard; 14441da177e4SLinus Torvalds } 14454bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_do_rcv); 14461da177e4SLinus Torvalds 1447160eb5a6SDavid S. Miller void tcp_v4_early_demux(struct sk_buff *skb) 144841063e9dSDavid S. Miller { 144941063e9dSDavid S. Miller const struct iphdr *iph; 145041063e9dSDavid S. Miller const struct tcphdr *th; 145141063e9dSDavid S. Miller struct sock *sk; 145241063e9dSDavid S. Miller 145341063e9dSDavid S. Miller if (skb->pkt_type != PACKET_HOST) 1454160eb5a6SDavid S. Miller return; 145541063e9dSDavid S. Miller 145645f00f99SEric Dumazet if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) 1457160eb5a6SDavid S. Miller return; 145841063e9dSDavid S. Miller 145941063e9dSDavid S. Miller iph = ip_hdr(skb); 146045f00f99SEric Dumazet th = tcp_hdr(skb); 146141063e9dSDavid S. Miller 146241063e9dSDavid S. Miller if (th->doff < sizeof(struct tcphdr) / 4) 1463160eb5a6SDavid S. Miller return; 146441063e9dSDavid S. Miller 146545f00f99SEric Dumazet sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo, 146641063e9dSDavid S. Miller iph->saddr, th->source, 14677011d085SVijay Subramanian iph->daddr, ntohs(th->dest), 14689cb429d6SEric Dumazet skb->skb_iif); 146941063e9dSDavid S. Miller if (sk) { 147041063e9dSDavid S. Miller skb->sk = sk; 147141063e9dSDavid S. Miller skb->destructor = sock_edemux; 1472f7e4eb03SEric Dumazet if (sk_fullsock(sk)) { 1473d0c294c5SMichal Kubeček struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst); 1474505fbcf0SEric Dumazet 147541063e9dSDavid S. Miller if (dst) 147641063e9dSDavid S. Miller dst = dst_check(dst, 0); 147792101b3bSDavid S. Miller if (dst && 1478505fbcf0SEric Dumazet inet_sk(sk)->rx_dst_ifindex == skb->skb_iif) 147941063e9dSDavid S. Miller skb_dst_set_noref(skb, dst); 148041063e9dSDavid S. Miller } 148141063e9dSDavid S. Miller } 148241063e9dSDavid S. Miller } 148341063e9dSDavid S. Miller 1484b2fb4f54SEric Dumazet /* Packet is added to VJ-style prequeue for processing in process 1485b2fb4f54SEric Dumazet * context, if a reader task is waiting. Apparently, this exciting 1486b2fb4f54SEric Dumazet * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93) 1487b2fb4f54SEric Dumazet * failed somewhere. Latency? Burstiness? Well, at least now we will 1488b2fb4f54SEric Dumazet * see, why it failed. 8)8) --ANK 1489b2fb4f54SEric Dumazet * 1490b2fb4f54SEric Dumazet */ 1491b2fb4f54SEric Dumazet bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) 1492b2fb4f54SEric Dumazet { 1493b2fb4f54SEric Dumazet struct tcp_sock *tp = tcp_sk(sk); 1494b2fb4f54SEric Dumazet 1495b2fb4f54SEric Dumazet if (sysctl_tcp_low_latency || !tp->ucopy.task) 1496b2fb4f54SEric Dumazet return false; 1497b2fb4f54SEric Dumazet 1498b2fb4f54SEric Dumazet if (skb->len <= tcp_hdrlen(skb) && 1499b2fb4f54SEric Dumazet skb_queue_len(&tp->ucopy.prequeue) == 0) 1500b2fb4f54SEric Dumazet return false; 1501b2fb4f54SEric Dumazet 1502ca777effSEric Dumazet /* Before escaping RCU protected region, we need to take care of skb 1503ca777effSEric Dumazet * dst. Prequeue is only enabled for established sockets. 1504ca777effSEric Dumazet * For such sockets, we might need the skb dst only to set sk->sk_rx_dst 1505ca777effSEric Dumazet * Instead of doing full sk_rx_dst validity here, let's perform 1506ca777effSEric Dumazet * an optimistic check. 1507ca777effSEric Dumazet */ 1508ca777effSEric Dumazet if (likely(sk->sk_rx_dst)) 1509ca777effSEric Dumazet skb_dst_drop(skb); 1510ca777effSEric Dumazet else 151158717686SDavid S. Miller skb_dst_force(skb); 1512ca777effSEric Dumazet 1513b2fb4f54SEric Dumazet __skb_queue_tail(&tp->ucopy.prequeue, skb); 1514b2fb4f54SEric Dumazet tp->ucopy.memory += skb->truesize; 1515b2fb4f54SEric Dumazet if (tp->ucopy.memory > sk->sk_rcvbuf) { 1516b2fb4f54SEric Dumazet struct sk_buff *skb1; 1517b2fb4f54SEric Dumazet 1518b2fb4f54SEric Dumazet BUG_ON(sock_owned_by_user(sk)); 1519b2fb4f54SEric Dumazet 1520b2fb4f54SEric Dumazet while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) { 1521b2fb4f54SEric Dumazet sk_backlog_rcv(sk, skb1); 1522b2fb4f54SEric Dumazet NET_INC_STATS_BH(sock_net(sk), 1523b2fb4f54SEric Dumazet LINUX_MIB_TCPPREQUEUEDROPPED); 1524b2fb4f54SEric Dumazet } 1525b2fb4f54SEric Dumazet 1526b2fb4f54SEric Dumazet tp->ucopy.memory = 0; 1527b2fb4f54SEric Dumazet } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { 1528b2fb4f54SEric Dumazet wake_up_interruptible_sync_poll(sk_sleep(sk), 1529b2fb4f54SEric Dumazet POLLIN | POLLRDNORM | POLLRDBAND); 1530b2fb4f54SEric Dumazet if (!inet_csk_ack_scheduled(sk)) 1531b2fb4f54SEric Dumazet inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 1532b2fb4f54SEric Dumazet (3 * tcp_rto_min(sk)) / 4, 1533b2fb4f54SEric Dumazet TCP_RTO_MAX); 1534b2fb4f54SEric Dumazet } 1535b2fb4f54SEric Dumazet return true; 1536b2fb4f54SEric Dumazet } 1537b2fb4f54SEric Dumazet EXPORT_SYMBOL(tcp_prequeue); 1538b2fb4f54SEric Dumazet 15391da177e4SLinus Torvalds /* 15401da177e4SLinus Torvalds * From tcp_input.c 15411da177e4SLinus Torvalds */ 15421da177e4SLinus Torvalds 15431da177e4SLinus Torvalds int tcp_v4_rcv(struct sk_buff *skb) 15441da177e4SLinus Torvalds { 1545eddc9ec5SArnaldo Carvalho de Melo const struct iphdr *iph; 1546cf533ea5SEric Dumazet const struct tcphdr *th; 15471da177e4SLinus Torvalds struct sock *sk; 15481da177e4SLinus Torvalds int ret; 1549a86b1e30SPavel Emelyanov struct net *net = dev_net(skb->dev); 15501da177e4SLinus Torvalds 15511da177e4SLinus Torvalds if (skb->pkt_type != PACKET_HOST) 15521da177e4SLinus Torvalds goto discard_it; 15531da177e4SLinus Torvalds 15541da177e4SLinus Torvalds /* Count it even if it's bad */ 155563231bddSPavel Emelyanov TCP_INC_STATS_BH(net, TCP_MIB_INSEGS); 15561da177e4SLinus Torvalds 15571da177e4SLinus Torvalds if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 15581da177e4SLinus Torvalds goto discard_it; 15591da177e4SLinus Torvalds 1560aa8223c7SArnaldo Carvalho de Melo th = tcp_hdr(skb); 15611da177e4SLinus Torvalds 15621da177e4SLinus Torvalds if (th->doff < sizeof(struct tcphdr) / 4) 15631da177e4SLinus Torvalds goto bad_packet; 15641da177e4SLinus Torvalds if (!pskb_may_pull(skb, th->doff * 4)) 15651da177e4SLinus Torvalds goto discard_it; 15661da177e4SLinus Torvalds 15671da177e4SLinus Torvalds /* An explanation is required here, I think. 15681da177e4SLinus Torvalds * Packet length and doff are validated by header prediction, 1569caa20d9aSStephen Hemminger * provided case of th->doff==0 is eliminated. 15701da177e4SLinus Torvalds * So, we defer the checks. */ 1571ed70fcfcSTom Herbert 1572ed70fcfcSTom Herbert if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo)) 15736a5dc9e5SEric Dumazet goto csum_error; 15741da177e4SLinus Torvalds 1575aa8223c7SArnaldo Carvalho de Melo th = tcp_hdr(skb); 1576eddc9ec5SArnaldo Carvalho de Melo iph = ip_hdr(skb); 1577971f10ecSEric Dumazet /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB() 1578971f10ecSEric Dumazet * barrier() makes sure compiler wont play fool^Waliasing games. 1579971f10ecSEric Dumazet */ 1580971f10ecSEric Dumazet memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb), 1581971f10ecSEric Dumazet sizeof(struct inet_skb_parm)); 1582971f10ecSEric Dumazet barrier(); 1583971f10ecSEric Dumazet 15841da177e4SLinus Torvalds TCP_SKB_CB(skb)->seq = ntohl(th->seq); 15851da177e4SLinus Torvalds TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 15861da177e4SLinus Torvalds skb->len - th->doff * 4); 15871da177e4SLinus Torvalds TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1588e11ecddfSEric Dumazet TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); 158904317dafSEric Dumazet TCP_SKB_CB(skb)->tcp_tw_isn = 0; 1590b82d1bb4SEric Dumazet TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); 15911da177e4SLinus Torvalds TCP_SKB_CB(skb)->sacked = 0; 15921da177e4SLinus Torvalds 15939a1f27c4SArnaldo Carvalho de Melo sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); 15941da177e4SLinus Torvalds if (!sk) 15951da177e4SLinus Torvalds goto no_tcp_socket; 15961da177e4SLinus Torvalds 1597bb134d5dSEric Dumazet process: 1598bb134d5dSEric Dumazet if (sk->sk_state == TCP_TIME_WAIT) 1599bb134d5dSEric Dumazet goto do_time_wait; 1600bb134d5dSEric Dumazet 16016cce09f8SEric Dumazet if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { 16026cce09f8SEric Dumazet NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); 1603d218d111SStephen Hemminger goto discard_and_relse; 16046cce09f8SEric Dumazet } 1605d218d111SStephen Hemminger 16061da177e4SLinus Torvalds if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) 16071da177e4SLinus Torvalds goto discard_and_relse; 16089ea88a15SDmitry Popov 16099ea88a15SDmitry Popov #ifdef CONFIG_TCP_MD5SIG 16109ea88a15SDmitry Popov /* 16119ea88a15SDmitry Popov * We really want to reject the packet as early as possible 16129ea88a15SDmitry Popov * if: 16139ea88a15SDmitry Popov * o We're expecting an MD5'd packet and this is no MD5 tcp option 16149ea88a15SDmitry Popov * o There is an MD5 option and we're not expecting one 16159ea88a15SDmitry Popov */ 16169ea88a15SDmitry Popov if (tcp_v4_inbound_md5_hash(sk, skb)) 16179ea88a15SDmitry Popov goto discard_and_relse; 16189ea88a15SDmitry Popov #endif 16199ea88a15SDmitry Popov 1620b59c2701SPatrick McHardy nf_reset(skb); 16211da177e4SLinus Torvalds 1622fda9ef5dSDmitry Mishin if (sk_filter(sk, skb)) 16231da177e4SLinus Torvalds goto discard_and_relse; 16241da177e4SLinus Torvalds 16252c8c56e1SEric Dumazet sk_incoming_cpu_update(sk); 16261da177e4SLinus Torvalds skb->dev = NULL; 16271da177e4SLinus Torvalds 1628c6366184SIngo Molnar bh_lock_sock_nested(sk); 1629*2efd055cSMarcelo Ricardo Leitner tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); 16301da177e4SLinus Torvalds ret = 0; 16311da177e4SLinus Torvalds if (!sock_owned_by_user(sk)) { 16321da177e4SLinus Torvalds if (!tcp_prequeue(sk, skb)) 16331da177e4SLinus Torvalds ret = tcp_v4_do_rcv(sk, skb); 1634da882c1fSEric Dumazet } else if (unlikely(sk_add_backlog(sk, skb, 1635da882c1fSEric Dumazet sk->sk_rcvbuf + sk->sk_sndbuf))) { 16366b03a53aSZhu Yi bh_unlock_sock(sk); 16376cce09f8SEric Dumazet NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); 16386b03a53aSZhu Yi goto discard_and_relse; 16396b03a53aSZhu Yi } 16401da177e4SLinus Torvalds bh_unlock_sock(sk); 16411da177e4SLinus Torvalds 16421da177e4SLinus Torvalds sock_put(sk); 16431da177e4SLinus Torvalds 16441da177e4SLinus Torvalds return ret; 16451da177e4SLinus Torvalds 16461da177e4SLinus Torvalds no_tcp_socket: 16471da177e4SLinus Torvalds if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 16481da177e4SLinus Torvalds goto discard_it; 16491da177e4SLinus Torvalds 16501da177e4SLinus Torvalds if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { 16516a5dc9e5SEric Dumazet csum_error: 16526a5dc9e5SEric Dumazet TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS); 16531da177e4SLinus Torvalds bad_packet: 165463231bddSPavel Emelyanov TCP_INC_STATS_BH(net, TCP_MIB_INERRS); 16551da177e4SLinus Torvalds } else { 1656cfb6eeb4SYOSHIFUJI Hideaki tcp_v4_send_reset(NULL, skb); 16571da177e4SLinus Torvalds } 16581da177e4SLinus Torvalds 16591da177e4SLinus Torvalds discard_it: 16601da177e4SLinus Torvalds /* Discard frame. */ 16611da177e4SLinus Torvalds kfree_skb(skb); 16621da177e4SLinus Torvalds return 0; 16631da177e4SLinus Torvalds 16641da177e4SLinus Torvalds discard_and_relse: 16651da177e4SLinus Torvalds sock_put(sk); 16661da177e4SLinus Torvalds goto discard_it; 16671da177e4SLinus Torvalds 16681da177e4SLinus Torvalds do_time_wait: 16691da177e4SLinus Torvalds if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 16709469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(sk)); 16711da177e4SLinus Torvalds goto discard_it; 16721da177e4SLinus Torvalds } 16731da177e4SLinus Torvalds 16746a5dc9e5SEric Dumazet if (skb->len < (th->doff << 2)) { 16759469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(sk)); 16766a5dc9e5SEric Dumazet goto bad_packet; 16776a5dc9e5SEric Dumazet } 16786a5dc9e5SEric Dumazet if (tcp_checksum_complete(skb)) { 16796a5dc9e5SEric Dumazet inet_twsk_put(inet_twsk(sk)); 16806a5dc9e5SEric Dumazet goto csum_error; 16811da177e4SLinus Torvalds } 16829469c7b4SYOSHIFUJI Hideaki switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 16831da177e4SLinus Torvalds case TCP_TW_SYN: { 1684c346dca1SYOSHIFUJI Hideaki struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev), 1685c67499c0SPavel Emelyanov &tcp_hashinfo, 1686da5e3630STom Herbert iph->saddr, th->source, 1687eddc9ec5SArnaldo Carvalho de Melo iph->daddr, th->dest, 1688463c84b9SArnaldo Carvalho de Melo inet_iif(skb)); 16891da177e4SLinus Torvalds if (sk2) { 1690789f558cSEric Dumazet inet_twsk_deschedule(inet_twsk(sk)); 16919469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(sk)); 16921da177e4SLinus Torvalds sk = sk2; 16931da177e4SLinus Torvalds goto process; 16941da177e4SLinus Torvalds } 16951da177e4SLinus Torvalds /* Fall through to ACK */ 16961da177e4SLinus Torvalds } 16971da177e4SLinus Torvalds case TCP_TW_ACK: 16981da177e4SLinus Torvalds tcp_v4_timewait_ack(sk, skb); 16991da177e4SLinus Torvalds break; 17001da177e4SLinus Torvalds case TCP_TW_RST: 17011da177e4SLinus Torvalds goto no_tcp_socket; 17021da177e4SLinus Torvalds case TCP_TW_SUCCESS:; 17031da177e4SLinus Torvalds } 17041da177e4SLinus Torvalds goto discard_it; 17051da177e4SLinus Torvalds } 17061da177e4SLinus Torvalds 1707ccb7c410SDavid S. Miller static struct timewait_sock_ops tcp_timewait_sock_ops = { 1708ccb7c410SDavid S. Miller .twsk_obj_size = sizeof(struct tcp_timewait_sock), 1709ccb7c410SDavid S. Miller .twsk_unique = tcp_twsk_unique, 1710ccb7c410SDavid S. Miller .twsk_destructor= tcp_twsk_destructor, 1711ccb7c410SDavid S. Miller }; 17121da177e4SLinus Torvalds 171363d02d15SEric Dumazet void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) 17145d299f3dSEric Dumazet { 17155d299f3dSEric Dumazet struct dst_entry *dst = skb_dst(skb); 17165d299f3dSEric Dumazet 1717ca777effSEric Dumazet if (dst) { 17185d299f3dSEric Dumazet dst_hold(dst); 17195d299f3dSEric Dumazet sk->sk_rx_dst = dst; 17205d299f3dSEric Dumazet inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; 17215d299f3dSEric Dumazet } 1722ca777effSEric Dumazet } 172363d02d15SEric Dumazet EXPORT_SYMBOL(inet_sk_rx_dst_set); 17245d299f3dSEric Dumazet 17253b401a81SStephen Hemminger const struct inet_connection_sock_af_ops ipv4_specific = { 17261da177e4SLinus Torvalds .queue_xmit = ip_queue_xmit, 17271da177e4SLinus Torvalds .send_check = tcp_v4_send_check, 172832519f11SArnaldo Carvalho de Melo .rebuild_header = inet_sk_rebuild_header, 17295d299f3dSEric Dumazet .sk_rx_dst_set = inet_sk_rx_dst_set, 17301da177e4SLinus Torvalds .conn_request = tcp_v4_conn_request, 17311da177e4SLinus Torvalds .syn_recv_sock = tcp_v4_syn_recv_sock, 17321da177e4SLinus Torvalds .net_header_len = sizeof(struct iphdr), 17331da177e4SLinus Torvalds .setsockopt = ip_setsockopt, 17341da177e4SLinus Torvalds .getsockopt = ip_getsockopt, 1735543d9cfeSArnaldo Carvalho de Melo .addr2sockaddr = inet_csk_addr2sockaddr, 1736543d9cfeSArnaldo Carvalho de Melo .sockaddr_len = sizeof(struct sockaddr_in), 1737ab1e0a13SArnaldo Carvalho de Melo .bind_conflict = inet_csk_bind_conflict, 17383fdadf7dSDmitry Mishin #ifdef CONFIG_COMPAT 17393fdadf7dSDmitry Mishin .compat_setsockopt = compat_ip_setsockopt, 17403fdadf7dSDmitry Mishin .compat_getsockopt = compat_ip_getsockopt, 17413fdadf7dSDmitry Mishin #endif 17424fab9071SNeal Cardwell .mtu_reduced = tcp_v4_mtu_reduced, 17431da177e4SLinus Torvalds }; 17444bc2f18bSEric Dumazet EXPORT_SYMBOL(ipv4_specific); 17451da177e4SLinus Torvalds 1746cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1747b2e4b3deSStephen Hemminger static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { 1748cfb6eeb4SYOSHIFUJI Hideaki .md5_lookup = tcp_v4_md5_lookup, 174949a72dfbSAdam Langley .calc_md5_hash = tcp_v4_md5_hash_skb, 1750cfb6eeb4SYOSHIFUJI Hideaki .md5_parse = tcp_v4_parse_md5_keys, 1751cfb6eeb4SYOSHIFUJI Hideaki }; 1752b6332e6cSAndrew Morton #endif 1753cfb6eeb4SYOSHIFUJI Hideaki 17541da177e4SLinus Torvalds /* NOTE: A lot of things set to zero explicitly by call to 17551da177e4SLinus Torvalds * sk_alloc() so need not be done here. 17561da177e4SLinus Torvalds */ 17571da177e4SLinus Torvalds static int tcp_v4_init_sock(struct sock *sk) 17581da177e4SLinus Torvalds { 17596687e988SArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 17601da177e4SLinus Torvalds 1761900f65d3SNeal Cardwell tcp_init_sock(sk); 17621da177e4SLinus Torvalds 17638292a17aSArnaldo Carvalho de Melo icsk->icsk_af_ops = &ipv4_specific; 1764900f65d3SNeal Cardwell 1765cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1766ac807fa8SDavid S. Miller tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific; 1767cfb6eeb4SYOSHIFUJI Hideaki #endif 17681da177e4SLinus Torvalds 17691da177e4SLinus Torvalds return 0; 17701da177e4SLinus Torvalds } 17711da177e4SLinus Torvalds 17727d06b2e0SBrian Haley void tcp_v4_destroy_sock(struct sock *sk) 17731da177e4SLinus Torvalds { 17741da177e4SLinus Torvalds struct tcp_sock *tp = tcp_sk(sk); 17751da177e4SLinus Torvalds 17761da177e4SLinus Torvalds tcp_clear_xmit_timers(sk); 17771da177e4SLinus Torvalds 17786687e988SArnaldo Carvalho de Melo tcp_cleanup_congestion_control(sk); 1779317a76f9SStephen Hemminger 17801da177e4SLinus Torvalds /* Cleanup up the write buffer. */ 1781fe067e8aSDavid S. Miller tcp_write_queue_purge(sk); 17821da177e4SLinus Torvalds 17831da177e4SLinus Torvalds /* Cleans up our, hopefully empty, out_of_order_queue. */ 17841da177e4SLinus Torvalds __skb_queue_purge(&tp->out_of_order_queue); 17851da177e4SLinus Torvalds 1786cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1787cfb6eeb4SYOSHIFUJI Hideaki /* Clean up the MD5 key list, if any */ 1788cfb6eeb4SYOSHIFUJI Hideaki if (tp->md5sig_info) { 1789a915da9bSEric Dumazet tcp_clear_md5_list(sk); 1790a8afca03SEric Dumazet kfree_rcu(tp->md5sig_info, rcu); 1791cfb6eeb4SYOSHIFUJI Hideaki tp->md5sig_info = NULL; 1792cfb6eeb4SYOSHIFUJI Hideaki } 1793cfb6eeb4SYOSHIFUJI Hideaki #endif 1794cfb6eeb4SYOSHIFUJI Hideaki 17951da177e4SLinus Torvalds /* Clean prequeue, it must be empty really */ 17961da177e4SLinus Torvalds __skb_queue_purge(&tp->ucopy.prequeue); 17971da177e4SLinus Torvalds 17981da177e4SLinus Torvalds /* Clean up a referenced TCP bind bucket. */ 1799463c84b9SArnaldo Carvalho de Melo if (inet_csk(sk)->icsk_bind_hash) 1800ab1e0a13SArnaldo Carvalho de Melo inet_put_port(sk); 18011da177e4SLinus Torvalds 180200db4124SIan Morris BUG_ON(tp->fastopen_rsk); 1803435cf559SWilliam Allen Simpson 1804cf60af03SYuchung Cheng /* If socket is aborted during connect operation */ 1805cf60af03SYuchung Cheng tcp_free_fastopen_req(tp); 1806cd8ae852SEric Dumazet tcp_saved_syn_free(tp); 1807cf60af03SYuchung Cheng 1808180d8cd9SGlauber Costa sk_sockets_allocated_dec(sk); 1809d1a4c0b3SGlauber Costa sock_release_memcg(sk); 18101da177e4SLinus Torvalds } 18111da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_destroy_sock); 18121da177e4SLinus Torvalds 18131da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS 18141da177e4SLinus Torvalds /* Proc filesystem TCP sock list dumping. */ 18151da177e4SLinus Torvalds 1816a8b690f9STom Herbert /* 1817a8b690f9STom Herbert * Get next listener socket follow cur. If cur is NULL, get first socket 1818a8b690f9STom Herbert * starting from bucket given in st->bucket; when st->bucket is zero the 1819a8b690f9STom Herbert * very first socket in the hash table is returned. 1820a8b690f9STom Herbert */ 18211da177e4SLinus Torvalds static void *listening_get_next(struct seq_file *seq, void *cur) 18221da177e4SLinus Torvalds { 1823463c84b9SArnaldo Carvalho de Melo struct inet_connection_sock *icsk; 1824c25eb3bfSEric Dumazet struct hlist_nulls_node *node; 18251da177e4SLinus Torvalds struct sock *sk = cur; 18265caea4eaSEric Dumazet struct inet_listen_hashbucket *ilb; 18271da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 1828a4146b1bSDenis V. Lunev struct net *net = seq_file_net(seq); 18291da177e4SLinus Torvalds 18301da177e4SLinus Torvalds if (!sk) { 1831a8b690f9STom Herbert ilb = &tcp_hashinfo.listening_hash[st->bucket]; 18325caea4eaSEric Dumazet spin_lock_bh(&ilb->lock); 1833c25eb3bfSEric Dumazet sk = sk_nulls_head(&ilb->head); 1834a8b690f9STom Herbert st->offset = 0; 18351da177e4SLinus Torvalds goto get_sk; 18361da177e4SLinus Torvalds } 18375caea4eaSEric Dumazet ilb = &tcp_hashinfo.listening_hash[st->bucket]; 18381da177e4SLinus Torvalds ++st->num; 1839a8b690f9STom Herbert ++st->offset; 18401da177e4SLinus Torvalds 18411da177e4SLinus Torvalds if (st->state == TCP_SEQ_STATE_OPENREQ) { 184260236fddSArnaldo Carvalho de Melo struct request_sock *req = cur; 18431da177e4SLinus Torvalds 1844463c84b9SArnaldo Carvalho de Melo icsk = inet_csk(st->syn_wait_sk); 18451da177e4SLinus Torvalds req = req->dl_next; 18461da177e4SLinus Torvalds while (1) { 18471da177e4SLinus Torvalds while (req) { 1848bdccc4caSDaniel Lezcano if (req->rsk_ops->family == st->family) { 18491da177e4SLinus Torvalds cur = req; 18501da177e4SLinus Torvalds goto out; 18511da177e4SLinus Torvalds } 18521da177e4SLinus Torvalds req = req->dl_next; 18531da177e4SLinus Torvalds } 185472a3effaSEric Dumazet if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) 18551da177e4SLinus Torvalds break; 18561da177e4SLinus Torvalds get_req: 1857463c84b9SArnaldo Carvalho de Melo req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket]; 18581da177e4SLinus Torvalds } 18591bde5ac4SEric Dumazet sk = sk_nulls_next(st->syn_wait_sk); 18601da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_LISTENING; 1861b2827053SEric Dumazet spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 18621da177e4SLinus Torvalds } else { 1863463c84b9SArnaldo Carvalho de Melo icsk = inet_csk(sk); 1864b2827053SEric Dumazet spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 1865463c84b9SArnaldo Carvalho de Melo if (reqsk_queue_len(&icsk->icsk_accept_queue)) 18661da177e4SLinus Torvalds goto start_req; 1867b2827053SEric Dumazet spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 18681bde5ac4SEric Dumazet sk = sk_nulls_next(sk); 18691da177e4SLinus Torvalds } 18701da177e4SLinus Torvalds get_sk: 1871c25eb3bfSEric Dumazet sk_nulls_for_each_from(sk, node) { 18728475ef9fSPavel Emelyanov if (!net_eq(sock_net(sk), net)) 18738475ef9fSPavel Emelyanov continue; 18748475ef9fSPavel Emelyanov if (sk->sk_family == st->family) { 18751da177e4SLinus Torvalds cur = sk; 18761da177e4SLinus Torvalds goto out; 18771da177e4SLinus Torvalds } 1878463c84b9SArnaldo Carvalho de Melo icsk = inet_csk(sk); 1879b2827053SEric Dumazet spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 1880463c84b9SArnaldo Carvalho de Melo if (reqsk_queue_len(&icsk->icsk_accept_queue)) { 18811da177e4SLinus Torvalds start_req: 18821da177e4SLinus Torvalds st->uid = sock_i_uid(sk); 18831da177e4SLinus Torvalds st->syn_wait_sk = sk; 18841da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_OPENREQ; 18851da177e4SLinus Torvalds st->sbucket = 0; 18861da177e4SLinus Torvalds goto get_req; 18871da177e4SLinus Torvalds } 1888b2827053SEric Dumazet spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 18891da177e4SLinus Torvalds } 18905caea4eaSEric Dumazet spin_unlock_bh(&ilb->lock); 1891a8b690f9STom Herbert st->offset = 0; 18920f7ff927SArnaldo Carvalho de Melo if (++st->bucket < INET_LHTABLE_SIZE) { 18935caea4eaSEric Dumazet ilb = &tcp_hashinfo.listening_hash[st->bucket]; 18945caea4eaSEric Dumazet spin_lock_bh(&ilb->lock); 1895c25eb3bfSEric Dumazet sk = sk_nulls_head(&ilb->head); 18961da177e4SLinus Torvalds goto get_sk; 18971da177e4SLinus Torvalds } 18981da177e4SLinus Torvalds cur = NULL; 18991da177e4SLinus Torvalds out: 19001da177e4SLinus Torvalds return cur; 19011da177e4SLinus Torvalds } 19021da177e4SLinus Torvalds 19031da177e4SLinus Torvalds static void *listening_get_idx(struct seq_file *seq, loff_t *pos) 19041da177e4SLinus Torvalds { 1905a8b690f9STom Herbert struct tcp_iter_state *st = seq->private; 1906a8b690f9STom Herbert void *rc; 1907a8b690f9STom Herbert 1908a8b690f9STom Herbert st->bucket = 0; 1909a8b690f9STom Herbert st->offset = 0; 1910a8b690f9STom Herbert rc = listening_get_next(seq, NULL); 19111da177e4SLinus Torvalds 19121da177e4SLinus Torvalds while (rc && *pos) { 19131da177e4SLinus Torvalds rc = listening_get_next(seq, rc); 19141da177e4SLinus Torvalds --*pos; 19151da177e4SLinus Torvalds } 19161da177e4SLinus Torvalds return rc; 19171da177e4SLinus Torvalds } 19181da177e4SLinus Torvalds 191905dbc7b5SEric Dumazet static inline bool empty_bucket(const struct tcp_iter_state *st) 19206eac5604SAndi Kleen { 192105dbc7b5SEric Dumazet return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain); 19226eac5604SAndi Kleen } 19236eac5604SAndi Kleen 1924a8b690f9STom Herbert /* 1925a8b690f9STom Herbert * Get first established socket starting from bucket given in st->bucket. 1926a8b690f9STom Herbert * If st->bucket is zero, the very first socket in the hash is returned. 1927a8b690f9STom Herbert */ 19281da177e4SLinus Torvalds static void *established_get_first(struct seq_file *seq) 19291da177e4SLinus Torvalds { 19301da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 1931a4146b1bSDenis V. Lunev struct net *net = seq_file_net(seq); 19321da177e4SLinus Torvalds void *rc = NULL; 19331da177e4SLinus Torvalds 1934a8b690f9STom Herbert st->offset = 0; 1935a8b690f9STom Herbert for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { 19361da177e4SLinus Torvalds struct sock *sk; 19373ab5aee7SEric Dumazet struct hlist_nulls_node *node; 19389db66bdcSEric Dumazet spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); 19391da177e4SLinus Torvalds 19406eac5604SAndi Kleen /* Lockless fast path for the common case of empty buckets */ 19416eac5604SAndi Kleen if (empty_bucket(st)) 19426eac5604SAndi Kleen continue; 19436eac5604SAndi Kleen 19449db66bdcSEric Dumazet spin_lock_bh(lock); 19453ab5aee7SEric Dumazet sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { 1946f40c8174SDaniel Lezcano if (sk->sk_family != st->family || 1947878628fbSYOSHIFUJI Hideaki !net_eq(sock_net(sk), net)) { 19481da177e4SLinus Torvalds continue; 19491da177e4SLinus Torvalds } 19501da177e4SLinus Torvalds rc = sk; 19511da177e4SLinus Torvalds goto out; 19521da177e4SLinus Torvalds } 19539db66bdcSEric Dumazet spin_unlock_bh(lock); 19541da177e4SLinus Torvalds } 19551da177e4SLinus Torvalds out: 19561da177e4SLinus Torvalds return rc; 19571da177e4SLinus Torvalds } 19581da177e4SLinus Torvalds 19591da177e4SLinus Torvalds static void *established_get_next(struct seq_file *seq, void *cur) 19601da177e4SLinus Torvalds { 19611da177e4SLinus Torvalds struct sock *sk = cur; 19623ab5aee7SEric Dumazet struct hlist_nulls_node *node; 19631da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 1964a4146b1bSDenis V. Lunev struct net *net = seq_file_net(seq); 19651da177e4SLinus Torvalds 19661da177e4SLinus Torvalds ++st->num; 1967a8b690f9STom Herbert ++st->offset; 19681da177e4SLinus Torvalds 19693ab5aee7SEric Dumazet sk = sk_nulls_next(sk); 19701da177e4SLinus Torvalds 19713ab5aee7SEric Dumazet sk_nulls_for_each_from(sk, node) { 1972878628fbSYOSHIFUJI Hideaki if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) 197305dbc7b5SEric Dumazet return sk; 19741da177e4SLinus Torvalds } 19751da177e4SLinus Torvalds 197605dbc7b5SEric Dumazet spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); 197705dbc7b5SEric Dumazet ++st->bucket; 197805dbc7b5SEric Dumazet return established_get_first(seq); 19791da177e4SLinus Torvalds } 19801da177e4SLinus Torvalds 19811da177e4SLinus Torvalds static void *established_get_idx(struct seq_file *seq, loff_t pos) 19821da177e4SLinus Torvalds { 1983a8b690f9STom Herbert struct tcp_iter_state *st = seq->private; 1984a8b690f9STom Herbert void *rc; 1985a8b690f9STom Herbert 1986a8b690f9STom Herbert st->bucket = 0; 1987a8b690f9STom Herbert rc = established_get_first(seq); 19881da177e4SLinus Torvalds 19891da177e4SLinus Torvalds while (rc && pos) { 19901da177e4SLinus Torvalds rc = established_get_next(seq, rc); 19911da177e4SLinus Torvalds --pos; 19921da177e4SLinus Torvalds } 19931da177e4SLinus Torvalds return rc; 19941da177e4SLinus Torvalds } 19951da177e4SLinus Torvalds 19961da177e4SLinus Torvalds static void *tcp_get_idx(struct seq_file *seq, loff_t pos) 19971da177e4SLinus Torvalds { 19981da177e4SLinus Torvalds void *rc; 19991da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 20001da177e4SLinus Torvalds 20011da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_LISTENING; 20021da177e4SLinus Torvalds rc = listening_get_idx(seq, &pos); 20031da177e4SLinus Torvalds 20041da177e4SLinus Torvalds if (!rc) { 20051da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_ESTABLISHED; 20061da177e4SLinus Torvalds rc = established_get_idx(seq, pos); 20071da177e4SLinus Torvalds } 20081da177e4SLinus Torvalds 20091da177e4SLinus Torvalds return rc; 20101da177e4SLinus Torvalds } 20111da177e4SLinus Torvalds 2012a8b690f9STom Herbert static void *tcp_seek_last_pos(struct seq_file *seq) 2013a8b690f9STom Herbert { 2014a8b690f9STom Herbert struct tcp_iter_state *st = seq->private; 2015a8b690f9STom Herbert int offset = st->offset; 2016a8b690f9STom Herbert int orig_num = st->num; 2017a8b690f9STom Herbert void *rc = NULL; 2018a8b690f9STom Herbert 2019a8b690f9STom Herbert switch (st->state) { 2020a8b690f9STom Herbert case TCP_SEQ_STATE_OPENREQ: 2021a8b690f9STom Herbert case TCP_SEQ_STATE_LISTENING: 2022a8b690f9STom Herbert if (st->bucket >= INET_LHTABLE_SIZE) 2023a8b690f9STom Herbert break; 2024a8b690f9STom Herbert st->state = TCP_SEQ_STATE_LISTENING; 2025a8b690f9STom Herbert rc = listening_get_next(seq, NULL); 2026a8b690f9STom Herbert while (offset-- && rc) 2027a8b690f9STom Herbert rc = listening_get_next(seq, rc); 2028a8b690f9STom Herbert if (rc) 2029a8b690f9STom Herbert break; 2030a8b690f9STom Herbert st->bucket = 0; 203105dbc7b5SEric Dumazet st->state = TCP_SEQ_STATE_ESTABLISHED; 2032a8b690f9STom Herbert /* Fallthrough */ 2033a8b690f9STom Herbert case TCP_SEQ_STATE_ESTABLISHED: 2034a8b690f9STom Herbert if (st->bucket > tcp_hashinfo.ehash_mask) 2035a8b690f9STom Herbert break; 2036a8b690f9STom Herbert rc = established_get_first(seq); 2037a8b690f9STom Herbert while (offset-- && rc) 2038a8b690f9STom Herbert rc = established_get_next(seq, rc); 2039a8b690f9STom Herbert } 2040a8b690f9STom Herbert 2041a8b690f9STom Herbert st->num = orig_num; 2042a8b690f9STom Herbert 2043a8b690f9STom Herbert return rc; 2044a8b690f9STom Herbert } 2045a8b690f9STom Herbert 20461da177e4SLinus Torvalds static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) 20471da177e4SLinus Torvalds { 20481da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 2049a8b690f9STom Herbert void *rc; 2050a8b690f9STom Herbert 2051a8b690f9STom Herbert if (*pos && *pos == st->last_pos) { 2052a8b690f9STom Herbert rc = tcp_seek_last_pos(seq); 2053a8b690f9STom Herbert if (rc) 2054a8b690f9STom Herbert goto out; 2055a8b690f9STom Herbert } 2056a8b690f9STom Herbert 20571da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_LISTENING; 20581da177e4SLinus Torvalds st->num = 0; 2059a8b690f9STom Herbert st->bucket = 0; 2060a8b690f9STom Herbert st->offset = 0; 2061a8b690f9STom Herbert rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 2062a8b690f9STom Herbert 2063a8b690f9STom Herbert out: 2064a8b690f9STom Herbert st->last_pos = *pos; 2065a8b690f9STom Herbert return rc; 20661da177e4SLinus Torvalds } 20671da177e4SLinus Torvalds 20681da177e4SLinus Torvalds static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) 20691da177e4SLinus Torvalds { 2070a8b690f9STom Herbert struct tcp_iter_state *st = seq->private; 20711da177e4SLinus Torvalds void *rc = NULL; 20721da177e4SLinus Torvalds 20731da177e4SLinus Torvalds if (v == SEQ_START_TOKEN) { 20741da177e4SLinus Torvalds rc = tcp_get_idx(seq, 0); 20751da177e4SLinus Torvalds goto out; 20761da177e4SLinus Torvalds } 20771da177e4SLinus Torvalds 20781da177e4SLinus Torvalds switch (st->state) { 20791da177e4SLinus Torvalds case TCP_SEQ_STATE_OPENREQ: 20801da177e4SLinus Torvalds case TCP_SEQ_STATE_LISTENING: 20811da177e4SLinus Torvalds rc = listening_get_next(seq, v); 20821da177e4SLinus Torvalds if (!rc) { 20831da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_ESTABLISHED; 2084a8b690f9STom Herbert st->bucket = 0; 2085a8b690f9STom Herbert st->offset = 0; 20861da177e4SLinus Torvalds rc = established_get_first(seq); 20871da177e4SLinus Torvalds } 20881da177e4SLinus Torvalds break; 20891da177e4SLinus Torvalds case TCP_SEQ_STATE_ESTABLISHED: 20901da177e4SLinus Torvalds rc = established_get_next(seq, v); 20911da177e4SLinus Torvalds break; 20921da177e4SLinus Torvalds } 20931da177e4SLinus Torvalds out: 20941da177e4SLinus Torvalds ++*pos; 2095a8b690f9STom Herbert st->last_pos = *pos; 20961da177e4SLinus Torvalds return rc; 20971da177e4SLinus Torvalds } 20981da177e4SLinus Torvalds 20991da177e4SLinus Torvalds static void tcp_seq_stop(struct seq_file *seq, void *v) 21001da177e4SLinus Torvalds { 21011da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 21021da177e4SLinus Torvalds 21031da177e4SLinus Torvalds switch (st->state) { 21041da177e4SLinus Torvalds case TCP_SEQ_STATE_OPENREQ: 21051da177e4SLinus Torvalds if (v) { 2106463c84b9SArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk); 2107b2827053SEric Dumazet spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 21081da177e4SLinus Torvalds } 21091da177e4SLinus Torvalds case TCP_SEQ_STATE_LISTENING: 21101da177e4SLinus Torvalds if (v != SEQ_START_TOKEN) 21115caea4eaSEric Dumazet spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock); 21121da177e4SLinus Torvalds break; 21131da177e4SLinus Torvalds case TCP_SEQ_STATE_ESTABLISHED: 21141da177e4SLinus Torvalds if (v) 21159db66bdcSEric Dumazet spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); 21161da177e4SLinus Torvalds break; 21171da177e4SLinus Torvalds } 21181da177e4SLinus Torvalds } 21191da177e4SLinus Torvalds 212073cb88ecSArjan van de Ven int tcp_seq_open(struct inode *inode, struct file *file) 21211da177e4SLinus Torvalds { 2122d9dda78bSAl Viro struct tcp_seq_afinfo *afinfo = PDE_DATA(inode); 21231da177e4SLinus Torvalds struct tcp_iter_state *s; 212452d6f3f1SDenis V. Lunev int err; 21251da177e4SLinus Torvalds 212652d6f3f1SDenis V. Lunev err = seq_open_net(inode, file, &afinfo->seq_ops, 212752d6f3f1SDenis V. Lunev sizeof(struct tcp_iter_state)); 212852d6f3f1SDenis V. Lunev if (err < 0) 212952d6f3f1SDenis V. Lunev return err; 2130f40c8174SDaniel Lezcano 213152d6f3f1SDenis V. Lunev s = ((struct seq_file *)file->private_data)->private; 21321da177e4SLinus Torvalds s->family = afinfo->family; 2133a8b690f9STom Herbert s->last_pos = 0; 2134f40c8174SDaniel Lezcano return 0; 2135f40c8174SDaniel Lezcano } 213673cb88ecSArjan van de Ven EXPORT_SYMBOL(tcp_seq_open); 2137f40c8174SDaniel Lezcano 21386f8b13bcSDaniel Lezcano int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo) 21391da177e4SLinus Torvalds { 21401da177e4SLinus Torvalds int rc = 0; 21411da177e4SLinus Torvalds struct proc_dir_entry *p; 21421da177e4SLinus Torvalds 21439427c4b3SDenis V. Lunev afinfo->seq_ops.start = tcp_seq_start; 21449427c4b3SDenis V. Lunev afinfo->seq_ops.next = tcp_seq_next; 21459427c4b3SDenis V. Lunev afinfo->seq_ops.stop = tcp_seq_stop; 21469427c4b3SDenis V. Lunev 214784841c3cSDenis V. Lunev p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net, 214873cb88ecSArjan van de Ven afinfo->seq_fops, afinfo); 214984841c3cSDenis V. Lunev if (!p) 21501da177e4SLinus Torvalds rc = -ENOMEM; 21511da177e4SLinus Torvalds return rc; 21521da177e4SLinus Torvalds } 21534bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_proc_register); 21541da177e4SLinus Torvalds 21556f8b13bcSDaniel Lezcano void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) 21561da177e4SLinus Torvalds { 2157ece31ffdSGao feng remove_proc_entry(afinfo->name, net->proc_net); 21581da177e4SLinus Torvalds } 21594bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_proc_unregister); 21601da177e4SLinus Torvalds 2161d4f06873SEric Dumazet static void get_openreq4(const struct request_sock *req, 2162652586dfSTetsuo Handa struct seq_file *f, int i, kuid_t uid) 21631da177e4SLinus Torvalds { 21642e6599cbSArnaldo Carvalho de Melo const struct inet_request_sock *ireq = inet_rsk(req); 2165fa76ce73SEric Dumazet long delta = req->rsk_timer.expires - jiffies; 21661da177e4SLinus Torvalds 21675e659e4cSPavel Emelyanov seq_printf(f, "%4d: %08X:%04X %08X:%04X" 2168652586dfSTetsuo Handa " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK", 21691da177e4SLinus Torvalds i, 2170634fb979SEric Dumazet ireq->ir_loc_addr, 2171d4f06873SEric Dumazet ireq->ir_num, 2172634fb979SEric Dumazet ireq->ir_rmt_addr, 2173634fb979SEric Dumazet ntohs(ireq->ir_rmt_port), 21741da177e4SLinus Torvalds TCP_SYN_RECV, 21751da177e4SLinus Torvalds 0, 0, /* could print option size, but that is af dependent. */ 21761da177e4SLinus Torvalds 1, /* timers active (only the expire timer) */ 2177a399a805SEric Dumazet jiffies_delta_to_clock_t(delta), 2178e6c022a4SEric Dumazet req->num_timeout, 2179a7cb5a49SEric W. Biederman from_kuid_munged(seq_user_ns(f), uid), 21801da177e4SLinus Torvalds 0, /* non standard timer */ 21811da177e4SLinus Torvalds 0, /* open_requests have no inode */ 2182d4f06873SEric Dumazet 0, 2183652586dfSTetsuo Handa req); 21841da177e4SLinus Torvalds } 21851da177e4SLinus Torvalds 2186652586dfSTetsuo Handa static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) 21871da177e4SLinus Torvalds { 21881da177e4SLinus Torvalds int timer_active; 21891da177e4SLinus Torvalds unsigned long timer_expires; 2190cf533ea5SEric Dumazet const struct tcp_sock *tp = tcp_sk(sk); 2191cf4c6bf8SIlpo Järvinen const struct inet_connection_sock *icsk = inet_csk(sk); 2192cf533ea5SEric Dumazet const struct inet_sock *inet = inet_sk(sk); 2193168a8f58SJerry Chu struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq; 2194c720c7e8SEric Dumazet __be32 dest = inet->inet_daddr; 2195c720c7e8SEric Dumazet __be32 src = inet->inet_rcv_saddr; 2196c720c7e8SEric Dumazet __u16 destp = ntohs(inet->inet_dport); 2197c720c7e8SEric Dumazet __u16 srcp = ntohs(inet->inet_sport); 219849d09007SEric Dumazet int rx_queue; 21991da177e4SLinus Torvalds 22006ba8a3b1SNandita Dukkipati if (icsk->icsk_pending == ICSK_TIME_RETRANS || 22016ba8a3b1SNandita Dukkipati icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || 22026ba8a3b1SNandita Dukkipati icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { 22031da177e4SLinus Torvalds timer_active = 1; 2204463c84b9SArnaldo Carvalho de Melo timer_expires = icsk->icsk_timeout; 2205463c84b9SArnaldo Carvalho de Melo } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 22061da177e4SLinus Torvalds timer_active = 4; 2207463c84b9SArnaldo Carvalho de Melo timer_expires = icsk->icsk_timeout; 2208cf4c6bf8SIlpo Järvinen } else if (timer_pending(&sk->sk_timer)) { 22091da177e4SLinus Torvalds timer_active = 2; 2210cf4c6bf8SIlpo Järvinen timer_expires = sk->sk_timer.expires; 22111da177e4SLinus Torvalds } else { 22121da177e4SLinus Torvalds timer_active = 0; 22131da177e4SLinus Torvalds timer_expires = jiffies; 22141da177e4SLinus Torvalds } 22151da177e4SLinus Torvalds 221649d09007SEric Dumazet if (sk->sk_state == TCP_LISTEN) 221749d09007SEric Dumazet rx_queue = sk->sk_ack_backlog; 221849d09007SEric Dumazet else 221949d09007SEric Dumazet /* 222049d09007SEric Dumazet * because we dont lock socket, we might find a transient negative value 222149d09007SEric Dumazet */ 222249d09007SEric Dumazet rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); 222349d09007SEric Dumazet 22245e659e4cSPavel Emelyanov seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " 2225652586dfSTetsuo Handa "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d", 2226cf4c6bf8SIlpo Järvinen i, src, srcp, dest, destp, sk->sk_state, 222747da8ee6SSridhar Samudrala tp->write_seq - tp->snd_una, 222849d09007SEric Dumazet rx_queue, 22291da177e4SLinus Torvalds timer_active, 2230a399a805SEric Dumazet jiffies_delta_to_clock_t(timer_expires - jiffies), 2231463c84b9SArnaldo Carvalho de Melo icsk->icsk_retransmits, 2232a7cb5a49SEric W. Biederman from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)), 22336687e988SArnaldo Carvalho de Melo icsk->icsk_probes_out, 2234cf4c6bf8SIlpo Järvinen sock_i_ino(sk), 2235cf4c6bf8SIlpo Järvinen atomic_read(&sk->sk_refcnt), sk, 22367be87351SStephen Hemminger jiffies_to_clock_t(icsk->icsk_rto), 22377be87351SStephen Hemminger jiffies_to_clock_t(icsk->icsk_ack.ato), 2238463c84b9SArnaldo Carvalho de Melo (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, 22391da177e4SLinus Torvalds tp->snd_cwnd, 2240168a8f58SJerry Chu sk->sk_state == TCP_LISTEN ? 2241168a8f58SJerry Chu (fastopenq ? fastopenq->max_qlen : 0) : 2242652586dfSTetsuo Handa (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)); 22431da177e4SLinus Torvalds } 22441da177e4SLinus Torvalds 2245cf533ea5SEric Dumazet static void get_timewait4_sock(const struct inet_timewait_sock *tw, 2246652586dfSTetsuo Handa struct seq_file *f, int i) 22471da177e4SLinus Torvalds { 2248789f558cSEric Dumazet long delta = tw->tw_timer.expires - jiffies; 224923f33c2dSAl Viro __be32 dest, src; 22501da177e4SLinus Torvalds __u16 destp, srcp; 22511da177e4SLinus Torvalds 22521da177e4SLinus Torvalds dest = tw->tw_daddr; 22531da177e4SLinus Torvalds src = tw->tw_rcv_saddr; 22541da177e4SLinus Torvalds destp = ntohs(tw->tw_dport); 22551da177e4SLinus Torvalds srcp = ntohs(tw->tw_sport); 22561da177e4SLinus Torvalds 22575e659e4cSPavel Emelyanov seq_printf(f, "%4d: %08X:%04X %08X:%04X" 2258652586dfSTetsuo Handa " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK", 22591da177e4SLinus Torvalds i, src, srcp, dest, destp, tw->tw_substate, 0, 0, 2260a399a805SEric Dumazet 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, 2261652586dfSTetsuo Handa atomic_read(&tw->tw_refcnt), tw); 22621da177e4SLinus Torvalds } 22631da177e4SLinus Torvalds 22641da177e4SLinus Torvalds #define TMPSZ 150 22651da177e4SLinus Torvalds 22661da177e4SLinus Torvalds static int tcp4_seq_show(struct seq_file *seq, void *v) 22671da177e4SLinus Torvalds { 22681da177e4SLinus Torvalds struct tcp_iter_state *st; 226905dbc7b5SEric Dumazet struct sock *sk = v; 22701da177e4SLinus Torvalds 2271652586dfSTetsuo Handa seq_setwidth(seq, TMPSZ - 1); 22721da177e4SLinus Torvalds if (v == SEQ_START_TOKEN) { 2273652586dfSTetsuo Handa seq_puts(seq, " sl local_address rem_address st tx_queue " 22741da177e4SLinus Torvalds "rx_queue tr tm->when retrnsmt uid timeout " 22751da177e4SLinus Torvalds "inode"); 22761da177e4SLinus Torvalds goto out; 22771da177e4SLinus Torvalds } 22781da177e4SLinus Torvalds st = seq->private; 22791da177e4SLinus Torvalds 22801da177e4SLinus Torvalds switch (st->state) { 22811da177e4SLinus Torvalds case TCP_SEQ_STATE_LISTENING: 22821da177e4SLinus Torvalds case TCP_SEQ_STATE_ESTABLISHED: 228305dbc7b5SEric Dumazet if (sk->sk_state == TCP_TIME_WAIT) 2284652586dfSTetsuo Handa get_timewait4_sock(v, seq, st->num); 228505dbc7b5SEric Dumazet else 2286652586dfSTetsuo Handa get_tcp4_sock(v, seq, st->num); 22871da177e4SLinus Torvalds break; 22881da177e4SLinus Torvalds case TCP_SEQ_STATE_OPENREQ: 2289d4f06873SEric Dumazet get_openreq4(v, seq, st->num, st->uid); 22901da177e4SLinus Torvalds break; 22911da177e4SLinus Torvalds } 22921da177e4SLinus Torvalds out: 2293652586dfSTetsuo Handa seq_pad(seq, '\n'); 22941da177e4SLinus Torvalds return 0; 22951da177e4SLinus Torvalds } 22961da177e4SLinus Torvalds 229773cb88ecSArjan van de Ven static const struct file_operations tcp_afinfo_seq_fops = { 229873cb88ecSArjan van de Ven .owner = THIS_MODULE, 229973cb88ecSArjan van de Ven .open = tcp_seq_open, 230073cb88ecSArjan van de Ven .read = seq_read, 230173cb88ecSArjan van de Ven .llseek = seq_lseek, 230273cb88ecSArjan van de Ven .release = seq_release_net 230373cb88ecSArjan van de Ven }; 230473cb88ecSArjan van de Ven 23051da177e4SLinus Torvalds static struct tcp_seq_afinfo tcp4_seq_afinfo = { 23061da177e4SLinus Torvalds .name = "tcp", 23071da177e4SLinus Torvalds .family = AF_INET, 230873cb88ecSArjan van de Ven .seq_fops = &tcp_afinfo_seq_fops, 23099427c4b3SDenis V. Lunev .seq_ops = { 23109427c4b3SDenis V. Lunev .show = tcp4_seq_show, 23119427c4b3SDenis V. Lunev }, 23121da177e4SLinus Torvalds }; 23131da177e4SLinus Torvalds 23142c8c1e72SAlexey Dobriyan static int __net_init tcp4_proc_init_net(struct net *net) 2315757764f6SPavel Emelyanov { 2316757764f6SPavel Emelyanov return tcp_proc_register(net, &tcp4_seq_afinfo); 2317757764f6SPavel Emelyanov } 2318757764f6SPavel Emelyanov 23192c8c1e72SAlexey Dobriyan static void __net_exit tcp4_proc_exit_net(struct net *net) 2320757764f6SPavel Emelyanov { 2321757764f6SPavel Emelyanov tcp_proc_unregister(net, &tcp4_seq_afinfo); 2322757764f6SPavel Emelyanov } 2323757764f6SPavel Emelyanov 2324757764f6SPavel Emelyanov static struct pernet_operations tcp4_net_ops = { 2325757764f6SPavel Emelyanov .init = tcp4_proc_init_net, 2326757764f6SPavel Emelyanov .exit = tcp4_proc_exit_net, 2327757764f6SPavel Emelyanov }; 2328757764f6SPavel Emelyanov 23291da177e4SLinus Torvalds int __init tcp4_proc_init(void) 23301da177e4SLinus Torvalds { 2331757764f6SPavel Emelyanov return register_pernet_subsys(&tcp4_net_ops); 23321da177e4SLinus Torvalds } 23331da177e4SLinus Torvalds 23341da177e4SLinus Torvalds void tcp4_proc_exit(void) 23351da177e4SLinus Torvalds { 2336757764f6SPavel Emelyanov unregister_pernet_subsys(&tcp4_net_ops); 23371da177e4SLinus Torvalds } 23381da177e4SLinus Torvalds #endif /* CONFIG_PROC_FS */ 23391da177e4SLinus Torvalds 23401da177e4SLinus Torvalds struct proto tcp_prot = { 23411da177e4SLinus Torvalds .name = "TCP", 23421da177e4SLinus Torvalds .owner = THIS_MODULE, 23431da177e4SLinus Torvalds .close = tcp_close, 23441da177e4SLinus Torvalds .connect = tcp_v4_connect, 23451da177e4SLinus Torvalds .disconnect = tcp_disconnect, 2346463c84b9SArnaldo Carvalho de Melo .accept = inet_csk_accept, 23471da177e4SLinus Torvalds .ioctl = tcp_ioctl, 23481da177e4SLinus Torvalds .init = tcp_v4_init_sock, 23491da177e4SLinus Torvalds .destroy = tcp_v4_destroy_sock, 23501da177e4SLinus Torvalds .shutdown = tcp_shutdown, 23511da177e4SLinus Torvalds .setsockopt = tcp_setsockopt, 23521da177e4SLinus Torvalds .getsockopt = tcp_getsockopt, 23531da177e4SLinus Torvalds .recvmsg = tcp_recvmsg, 23547ba42910SChangli Gao .sendmsg = tcp_sendmsg, 23557ba42910SChangli Gao .sendpage = tcp_sendpage, 23561da177e4SLinus Torvalds .backlog_rcv = tcp_v4_do_rcv, 235746d3ceabSEric Dumazet .release_cb = tcp_release_cb, 2358ab1e0a13SArnaldo Carvalho de Melo .hash = inet_hash, 2359ab1e0a13SArnaldo Carvalho de Melo .unhash = inet_unhash, 2360ab1e0a13SArnaldo Carvalho de Melo .get_port = inet_csk_get_port, 23611da177e4SLinus Torvalds .enter_memory_pressure = tcp_enter_memory_pressure, 2362c9bee3b7SEric Dumazet .stream_memory_free = tcp_stream_memory_free, 23631da177e4SLinus Torvalds .sockets_allocated = &tcp_sockets_allocated, 23640a5578cfSArnaldo Carvalho de Melo .orphan_count = &tcp_orphan_count, 23651da177e4SLinus Torvalds .memory_allocated = &tcp_memory_allocated, 23661da177e4SLinus Torvalds .memory_pressure = &tcp_memory_pressure, 2367a4fe34bfSEric W. Biederman .sysctl_mem = sysctl_tcp_mem, 23681da177e4SLinus Torvalds .sysctl_wmem = sysctl_tcp_wmem, 23691da177e4SLinus Torvalds .sysctl_rmem = sysctl_tcp_rmem, 23701da177e4SLinus Torvalds .max_header = MAX_TCP_HEADER, 23711da177e4SLinus Torvalds .obj_size = sizeof(struct tcp_sock), 23723ab5aee7SEric Dumazet .slab_flags = SLAB_DESTROY_BY_RCU, 23736d6ee43eSArnaldo Carvalho de Melo .twsk_prot = &tcp_timewait_sock_ops, 237460236fddSArnaldo Carvalho de Melo .rsk_prot = &tcp_request_sock_ops, 237539d8cda7SPavel Emelyanov .h.hashinfo = &tcp_hashinfo, 23767ba42910SChangli Gao .no_autobind = true, 2377543d9cfeSArnaldo Carvalho de Melo #ifdef CONFIG_COMPAT 2378543d9cfeSArnaldo Carvalho de Melo .compat_setsockopt = compat_tcp_setsockopt, 2379543d9cfeSArnaldo Carvalho de Melo .compat_getsockopt = compat_tcp_getsockopt, 2380543d9cfeSArnaldo Carvalho de Melo #endif 2381c255a458SAndrew Morton #ifdef CONFIG_MEMCG_KMEM 2382d1a4c0b3SGlauber Costa .init_cgroup = tcp_init_cgroup, 2383d1a4c0b3SGlauber Costa .destroy_cgroup = tcp_destroy_cgroup, 2384d1a4c0b3SGlauber Costa .proto_cgroup = tcp_proto_cgroup, 2385d1a4c0b3SGlauber Costa #endif 23861da177e4SLinus Torvalds }; 23874bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_prot); 23881da177e4SLinus Torvalds 2389046ee902SDenis V. Lunev static void __net_exit tcp_sk_exit(struct net *net) 2390046ee902SDenis V. Lunev { 2391bdbbb852SEric Dumazet int cpu; 2392bdbbb852SEric Dumazet 2393bdbbb852SEric Dumazet for_each_possible_cpu(cpu) 2394bdbbb852SEric Dumazet inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu)); 2395bdbbb852SEric Dumazet free_percpu(net->ipv4.tcp_sk); 2396bdbbb852SEric Dumazet } 2397bdbbb852SEric Dumazet 2398bdbbb852SEric Dumazet static int __net_init tcp_sk_init(struct net *net) 2399bdbbb852SEric Dumazet { 2400bdbbb852SEric Dumazet int res, cpu; 2401bdbbb852SEric Dumazet 2402bdbbb852SEric Dumazet net->ipv4.tcp_sk = alloc_percpu(struct sock *); 2403bdbbb852SEric Dumazet if (!net->ipv4.tcp_sk) 2404bdbbb852SEric Dumazet return -ENOMEM; 2405bdbbb852SEric Dumazet 2406bdbbb852SEric Dumazet for_each_possible_cpu(cpu) { 2407bdbbb852SEric Dumazet struct sock *sk; 2408bdbbb852SEric Dumazet 2409bdbbb852SEric Dumazet res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW, 2410bdbbb852SEric Dumazet IPPROTO_TCP, net); 2411bdbbb852SEric Dumazet if (res) 2412bdbbb852SEric Dumazet goto fail; 2413bdbbb852SEric Dumazet *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk; 2414bdbbb852SEric Dumazet } 241549213555SDaniel Borkmann 2416bdbbb852SEric Dumazet net->ipv4.sysctl_tcp_ecn = 2; 241749213555SDaniel Borkmann net->ipv4.sysctl_tcp_ecn_fallback = 1; 241849213555SDaniel Borkmann 2419b0f9ca53SFan Du net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS; 24206b58e0a5SFan Du net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD; 242105cbc0dbSFan Du net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL; 2422bdbbb852SEric Dumazet 242349213555SDaniel Borkmann return 0; 2424bdbbb852SEric Dumazet fail: 2425bdbbb852SEric Dumazet tcp_sk_exit(net); 2426bdbbb852SEric Dumazet 2427bdbbb852SEric Dumazet return res; 2428b099ce26SEric W. Biederman } 2429b099ce26SEric W. Biederman 2430b099ce26SEric W. Biederman static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) 2431b099ce26SEric W. Biederman { 2432b099ce26SEric W. Biederman inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET); 2433046ee902SDenis V. Lunev } 2434046ee902SDenis V. Lunev 2435046ee902SDenis V. Lunev static struct pernet_operations __net_initdata tcp_sk_ops = { 2436046ee902SDenis V. Lunev .init = tcp_sk_init, 2437046ee902SDenis V. Lunev .exit = tcp_sk_exit, 2438b099ce26SEric W. Biederman .exit_batch = tcp_sk_exit_batch, 2439046ee902SDenis V. Lunev }; 2440046ee902SDenis V. Lunev 24419b0f976fSDenis V. Lunev void __init tcp_v4_init(void) 24421da177e4SLinus Torvalds { 24435caea4eaSEric Dumazet inet_hashinfo_init(&tcp_hashinfo); 24446a1b3054SEric W. Biederman if (register_pernet_subsys(&tcp_sk_ops)) 24451da177e4SLinus Torvalds panic("Failed to create the TCP control socket.\n"); 24461da177e4SLinus Torvalds } 2447