11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * INET An implementation of the TCP/IP protocol suite for the LINUX 31da177e4SLinus Torvalds * operating system. INET is implemented using the BSD Socket 41da177e4SLinus Torvalds * interface as the means of communication with the user level. 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * Implementation of the Transmission Control Protocol(TCP). 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * IPv4 specific functions 91da177e4SLinus Torvalds * 101da177e4SLinus Torvalds * 111da177e4SLinus Torvalds * code split from: 121da177e4SLinus Torvalds * linux/ipv4/tcp.c 131da177e4SLinus Torvalds * linux/ipv4/tcp_input.c 141da177e4SLinus Torvalds * linux/ipv4/tcp_output.c 151da177e4SLinus Torvalds * 161da177e4SLinus Torvalds * See tcp.c for author information 171da177e4SLinus Torvalds * 181da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 191da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 201da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 211da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 221da177e4SLinus Torvalds */ 231da177e4SLinus Torvalds 241da177e4SLinus Torvalds /* 251da177e4SLinus Torvalds * Changes: 261da177e4SLinus Torvalds * David S. Miller : New socket lookup architecture. 271da177e4SLinus Torvalds * This code is dedicated to John Dyson. 281da177e4SLinus Torvalds * David S. Miller : Change semantics of established hash, 291da177e4SLinus Torvalds * half is devoted to TIME_WAIT sockets 301da177e4SLinus Torvalds * and the rest go in the other half. 311da177e4SLinus Torvalds * Andi Kleen : Add support for syncookies and fixed 321da177e4SLinus Torvalds * some bugs: ip options weren't passed to 331da177e4SLinus Torvalds * the TCP layer, missed a check for an 341da177e4SLinus Torvalds * ACK bit. 351da177e4SLinus Torvalds * Andi Kleen : Implemented fast path mtu discovery. 361da177e4SLinus Torvalds * Fixed many serious bugs in the 3760236fddSArnaldo Carvalho de Melo * request_sock handling and moved 381da177e4SLinus Torvalds * most of it into the af independent code. 391da177e4SLinus Torvalds * Added tail drop and some other bugfixes. 40caa20d9aSStephen Hemminger * Added new listen semantics. 411da177e4SLinus Torvalds * Mike McLagan : Routing by source 421da177e4SLinus Torvalds * Juan Jose Ciarlante: ip_dynaddr bits 431da177e4SLinus Torvalds * Andi Kleen: various fixes. 441da177e4SLinus Torvalds * Vitaly E. Lavrov : Transparent proxy revived after year 451da177e4SLinus Torvalds * coma. 461da177e4SLinus Torvalds * Andi Kleen : Fix new listen. 471da177e4SLinus Torvalds * Andi Kleen : Fix accept error reporting. 481da177e4SLinus Torvalds * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 491da177e4SLinus Torvalds * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 501da177e4SLinus Torvalds * a single port at the same time. 511da177e4SLinus Torvalds */ 521da177e4SLinus Torvalds 53afd46503SJoe Perches #define pr_fmt(fmt) "TCP: " fmt 541da177e4SLinus Torvalds 55eb4dea58SHerbert Xu #include <linux/bottom_half.h> 561da177e4SLinus Torvalds #include <linux/types.h> 571da177e4SLinus Torvalds #include <linux/fcntl.h> 581da177e4SLinus Torvalds #include <linux/module.h> 591da177e4SLinus Torvalds #include <linux/random.h> 601da177e4SLinus Torvalds #include <linux/cache.h> 611da177e4SLinus Torvalds #include <linux/jhash.h> 621da177e4SLinus Torvalds #include <linux/init.h> 631da177e4SLinus Torvalds #include <linux/times.h> 645a0e3ad6STejun Heo #include <linux/slab.h> 651da177e4SLinus Torvalds 66457c4cbcSEric W. Biederman #include <net/net_namespace.h> 671da177e4SLinus Torvalds #include <net/icmp.h> 68304a1618SArnaldo Carvalho de Melo #include <net/inet_hashtables.h> 691da177e4SLinus Torvalds #include <net/tcp.h> 7020380731SArnaldo Carvalho de Melo #include <net/transp_v6.h> 711da177e4SLinus Torvalds #include <net/ipv6.h> 721da177e4SLinus Torvalds #include <net/inet_common.h> 736d6ee43eSArnaldo Carvalho de Melo #include <net/timewait_sock.h> 741da177e4SLinus Torvalds #include <net/xfrm.h> 756e5714eaSDavid S. Miller #include <net/secure_seq.h> 76076bb0c8SEliezer Tamir #include <net/busy_poll.h> 771da177e4SLinus Torvalds 781da177e4SLinus Torvalds #include <linux/inet.h> 791da177e4SLinus Torvalds #include <linux/ipv6.h> 801da177e4SLinus Torvalds #include <linux/stddef.h> 811da177e4SLinus Torvalds #include <linux/proc_fs.h> 821da177e4SLinus Torvalds #include <linux/seq_file.h> 836797318eSIvan Delalande #include <linux/inetdevice.h> 841da177e4SLinus Torvalds 85cf80e0e4SHerbert Xu #include <crypto/hash.h> 86cfb6eeb4SYOSHIFUJI Hideaki #include <linux/scatterlist.h> 87cfb6eeb4SYOSHIFUJI Hideaki 88cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 89a915da9bSEric Dumazet static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 90318cf7aaSEric Dumazet __be32 daddr, __be32 saddr, const struct tcphdr *th); 91cfb6eeb4SYOSHIFUJI Hideaki #endif 92cfb6eeb4SYOSHIFUJI Hideaki 935caea4eaSEric Dumazet struct inet_hashinfo tcp_hashinfo; 944bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_hashinfo); 951da177e4SLinus Torvalds 9684b114b9SEric Dumazet static u32 tcp_v4_init_seq(const struct sk_buff *skb) 971da177e4SLinus Torvalds { 9884b114b9SEric Dumazet return secure_tcp_seq(ip_hdr(skb)->daddr, 99eddc9ec5SArnaldo Carvalho de Melo ip_hdr(skb)->saddr, 100aa8223c7SArnaldo Carvalho de Melo tcp_hdr(skb)->dest, 10184b114b9SEric Dumazet tcp_hdr(skb)->source); 10284b114b9SEric Dumazet } 10384b114b9SEric Dumazet 1045d2ed052SEric Dumazet static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb) 10584b114b9SEric Dumazet { 1065d2ed052SEric Dumazet return secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr); 1071da177e4SLinus Torvalds } 1081da177e4SLinus Torvalds 1096d6ee43eSArnaldo Carvalho de Melo int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) 1106d6ee43eSArnaldo Carvalho de Melo { 1116d6ee43eSArnaldo Carvalho de Melo const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw); 1126d6ee43eSArnaldo Carvalho de Melo struct tcp_sock *tp = tcp_sk(sk); 1136d6ee43eSArnaldo Carvalho de Melo 1146d6ee43eSArnaldo Carvalho de Melo /* With PAWS, it is safe from the viewpoint 1156d6ee43eSArnaldo Carvalho de Melo of data integrity. Even without PAWS it is safe provided sequence 1166d6ee43eSArnaldo Carvalho de Melo spaces do not overlap i.e. at data rates <= 80Mbit/sec. 1176d6ee43eSArnaldo Carvalho de Melo 1186d6ee43eSArnaldo Carvalho de Melo Actually, the idea is close to VJ's one, only timestamp cache is 1196d6ee43eSArnaldo Carvalho de Melo held not per host, but per port pair and TW bucket is used as state 1206d6ee43eSArnaldo Carvalho de Melo holder. 1216d6ee43eSArnaldo Carvalho de Melo 1226d6ee43eSArnaldo Carvalho de Melo If TW bucket has been already destroyed we fall back to VJ's scheme 1236d6ee43eSArnaldo Carvalho de Melo and use initial timestamp retrieved from peer table. 1246d6ee43eSArnaldo Carvalho de Melo */ 1256d6ee43eSArnaldo Carvalho de Melo if (tcptw->tw_ts_recent_stamp && 12656ab6b93SHaishuang Yan (!twp || (sock_net(sk)->ipv4.sysctl_tcp_tw_reuse && 1279d729f72SJames Morris get_seconds() - tcptw->tw_ts_recent_stamp > 1))) { 1286d6ee43eSArnaldo Carvalho de Melo tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; 1296d6ee43eSArnaldo Carvalho de Melo if (tp->write_seq == 0) 1306d6ee43eSArnaldo Carvalho de Melo tp->write_seq = 1; 1316d6ee43eSArnaldo Carvalho de Melo tp->rx_opt.ts_recent = tcptw->tw_ts_recent; 1326d6ee43eSArnaldo Carvalho de Melo tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; 1336d6ee43eSArnaldo Carvalho de Melo sock_hold(sktw); 1346d6ee43eSArnaldo Carvalho de Melo return 1; 1356d6ee43eSArnaldo Carvalho de Melo } 1366d6ee43eSArnaldo Carvalho de Melo 1376d6ee43eSArnaldo Carvalho de Melo return 0; 1386d6ee43eSArnaldo Carvalho de Melo } 1396d6ee43eSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(tcp_twsk_unique); 1406d6ee43eSArnaldo Carvalho de Melo 1411da177e4SLinus Torvalds /* This will initiate an outgoing connection. */ 1421da177e4SLinus Torvalds int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 1431da177e4SLinus Torvalds { 1442d7192d6SDavid S. Miller struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; 1451da177e4SLinus Torvalds struct inet_sock *inet = inet_sk(sk); 1461da177e4SLinus Torvalds struct tcp_sock *tp = tcp_sk(sk); 147dca8b089SDavid S. Miller __be16 orig_sport, orig_dport; 148bada8adcSAl Viro __be32 daddr, nexthop; 149da905bd1SDavid S. Miller struct flowi4 *fl4; 1502d7192d6SDavid S. Miller struct rtable *rt; 1511da177e4SLinus Torvalds int err; 152f6d8bd05SEric Dumazet struct ip_options_rcu *inet_opt; 1531946e672SHaishuang Yan struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; 1541da177e4SLinus Torvalds 1551da177e4SLinus Torvalds if (addr_len < sizeof(struct sockaddr_in)) 1561da177e4SLinus Torvalds return -EINVAL; 1571da177e4SLinus Torvalds 1581da177e4SLinus Torvalds if (usin->sin_family != AF_INET) 1591da177e4SLinus Torvalds return -EAFNOSUPPORT; 1601da177e4SLinus Torvalds 1611da177e4SLinus Torvalds nexthop = daddr = usin->sin_addr.s_addr; 162f6d8bd05SEric Dumazet inet_opt = rcu_dereference_protected(inet->inet_opt, 1631e1d04e6SHannes Frederic Sowa lockdep_sock_is_held(sk)); 164f6d8bd05SEric Dumazet if (inet_opt && inet_opt->opt.srr) { 1651da177e4SLinus Torvalds if (!daddr) 1661da177e4SLinus Torvalds return -EINVAL; 167f6d8bd05SEric Dumazet nexthop = inet_opt->opt.faddr; 1681da177e4SLinus Torvalds } 1691da177e4SLinus Torvalds 170dca8b089SDavid S. Miller orig_sport = inet->inet_sport; 171dca8b089SDavid S. Miller orig_dport = usin->sin_port; 172da905bd1SDavid S. Miller fl4 = &inet->cork.fl.u.ip4; 173da905bd1SDavid S. Miller rt = ip_route_connect(fl4, nexthop, inet->inet_saddr, 1741da177e4SLinus Torvalds RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, 1751da177e4SLinus Torvalds IPPROTO_TCP, 1760e0d44abSSteffen Klassert orig_sport, orig_dport, sk); 177b23dd4feSDavid S. Miller if (IS_ERR(rt)) { 178b23dd4feSDavid S. Miller err = PTR_ERR(rt); 179b23dd4feSDavid S. Miller if (err == -ENETUNREACH) 180f1d8cba6SEric Dumazet IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); 181b23dd4feSDavid S. Miller return err; 182584bdf8cSWei Dong } 1831da177e4SLinus Torvalds 1841da177e4SLinus Torvalds if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { 1851da177e4SLinus Torvalds ip_rt_put(rt); 1861da177e4SLinus Torvalds return -ENETUNREACH; 1871da177e4SLinus Torvalds } 1881da177e4SLinus Torvalds 189f6d8bd05SEric Dumazet if (!inet_opt || !inet_opt->opt.srr) 190da905bd1SDavid S. Miller daddr = fl4->daddr; 1911da177e4SLinus Torvalds 192c720c7e8SEric Dumazet if (!inet->inet_saddr) 193da905bd1SDavid S. Miller inet->inet_saddr = fl4->saddr; 194d1e559d0SEric Dumazet sk_rcv_saddr_set(sk, inet->inet_saddr); 1951da177e4SLinus Torvalds 196c720c7e8SEric Dumazet if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) { 1971da177e4SLinus Torvalds /* Reset inherited state */ 1981da177e4SLinus Torvalds tp->rx_opt.ts_recent = 0; 1991da177e4SLinus Torvalds tp->rx_opt.ts_recent_stamp = 0; 200ee995283SPavel Emelyanov if (likely(!tp->repair)) 2011da177e4SLinus Torvalds tp->write_seq = 0; 2021da177e4SLinus Torvalds } 2031da177e4SLinus Torvalds 204c720c7e8SEric Dumazet inet->inet_dport = usin->sin_port; 205d1e559d0SEric Dumazet sk_daddr_set(sk, daddr); 2061da177e4SLinus Torvalds 207d83d8461SArnaldo Carvalho de Melo inet_csk(sk)->icsk_ext_hdr_len = 0; 208f6d8bd05SEric Dumazet if (inet_opt) 209f6d8bd05SEric Dumazet inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; 2101da177e4SLinus Torvalds 211bee7ca9eSWilliam Allen Simpson tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; 2121da177e4SLinus Torvalds 2131da177e4SLinus Torvalds /* Socket identity is still unknown (sport may be zero). 2141da177e4SLinus Torvalds * However we set state to SYN-SENT and not releasing socket 2151da177e4SLinus Torvalds * lock select source port, enter ourselves into the hash tables and 2161da177e4SLinus Torvalds * complete initialization after this. 2171da177e4SLinus Torvalds */ 2181da177e4SLinus Torvalds tcp_set_state(sk, TCP_SYN_SENT); 2191946e672SHaishuang Yan err = inet_hash_connect(tcp_death_row, sk); 2201da177e4SLinus Torvalds if (err) 2211da177e4SLinus Torvalds goto failure; 2221da177e4SLinus Torvalds 223877d1f62STom Herbert sk_set_txhash(sk); 2249e7ceb06SSathya Perla 225da905bd1SDavid S. Miller rt = ip_route_newports(fl4, rt, orig_sport, orig_dport, 226c720c7e8SEric Dumazet inet->inet_sport, inet->inet_dport, sk); 227b23dd4feSDavid S. Miller if (IS_ERR(rt)) { 228b23dd4feSDavid S. Miller err = PTR_ERR(rt); 229b23dd4feSDavid S. Miller rt = NULL; 2301da177e4SLinus Torvalds goto failure; 231b23dd4feSDavid S. Miller } 2321da177e4SLinus Torvalds /* OK, now commit destination to socket. */ 233bcd76111SHerbert Xu sk->sk_gso_type = SKB_GSO_TCPV4; 234d8d1f30bSChangli Gao sk_setup_caps(sk, &rt->dst); 23519f6d3f3SWei Wang rt = NULL; 2361da177e4SLinus Torvalds 23700355fa5SAlexey Kodanev if (likely(!tp->repair)) { 23884b114b9SEric Dumazet if (!tp->write_seq) 23984b114b9SEric Dumazet tp->write_seq = secure_tcp_seq(inet->inet_saddr, 240c720c7e8SEric Dumazet inet->inet_daddr, 241c720c7e8SEric Dumazet inet->inet_sport, 24284b114b9SEric Dumazet usin->sin_port); 2435d2ed052SEric Dumazet tp->tsoffset = secure_tcp_ts_off(sock_net(sk), 2445d2ed052SEric Dumazet inet->inet_saddr, 24584b114b9SEric Dumazet inet->inet_daddr); 24600355fa5SAlexey Kodanev } 2471da177e4SLinus Torvalds 248c720c7e8SEric Dumazet inet->inet_id = tp->write_seq ^ jiffies; 2491da177e4SLinus Torvalds 25019f6d3f3SWei Wang if (tcp_fastopen_defer_connect(sk, &err)) 25119f6d3f3SWei Wang return err; 25219f6d3f3SWei Wang if (err) 25319f6d3f3SWei Wang goto failure; 25419f6d3f3SWei Wang 2551da177e4SLinus Torvalds err = tcp_connect(sk); 256ee995283SPavel Emelyanov 2571da177e4SLinus Torvalds if (err) 2581da177e4SLinus Torvalds goto failure; 2591da177e4SLinus Torvalds 2601da177e4SLinus Torvalds return 0; 2611da177e4SLinus Torvalds 2621da177e4SLinus Torvalds failure: 2637174259eSArnaldo Carvalho de Melo /* 2647174259eSArnaldo Carvalho de Melo * This unhashes the socket and releases the local port, 2657174259eSArnaldo Carvalho de Melo * if necessary. 2667174259eSArnaldo Carvalho de Melo */ 2671da177e4SLinus Torvalds tcp_set_state(sk, TCP_CLOSE); 2681da177e4SLinus Torvalds ip_rt_put(rt); 2691da177e4SLinus Torvalds sk->sk_route_caps = 0; 270c720c7e8SEric Dumazet inet->inet_dport = 0; 2711da177e4SLinus Torvalds return err; 2721da177e4SLinus Torvalds } 2734bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_connect); 2741da177e4SLinus Torvalds 2751da177e4SLinus Torvalds /* 276563d34d0SEric Dumazet * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191. 277563d34d0SEric Dumazet * It can be called through tcp_release_cb() if socket was owned by user 278563d34d0SEric Dumazet * at the time tcp_v4_err() was called to handle ICMP message. 2791da177e4SLinus Torvalds */ 2804fab9071SNeal Cardwell void tcp_v4_mtu_reduced(struct sock *sk) 2811da177e4SLinus Torvalds { 2821da177e4SLinus Torvalds struct inet_sock *inet = inet_sk(sk); 28302b2faafSEric Dumazet struct dst_entry *dst; 28402b2faafSEric Dumazet u32 mtu; 2851da177e4SLinus Torvalds 28602b2faafSEric Dumazet if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) 28702b2faafSEric Dumazet return; 28802b2faafSEric Dumazet mtu = tcp_sk(sk)->mtu_info; 28980d0a69fSDavid S. Miller dst = inet_csk_update_pmtu(sk, mtu); 29080d0a69fSDavid S. Miller if (!dst) 2911da177e4SLinus Torvalds return; 2921da177e4SLinus Torvalds 2931da177e4SLinus Torvalds /* Something is about to be wrong... Remember soft error 2941da177e4SLinus Torvalds * for the case, if this connection will not able to recover. 2951da177e4SLinus Torvalds */ 2961da177e4SLinus Torvalds if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) 2971da177e4SLinus Torvalds sk->sk_err_soft = EMSGSIZE; 2981da177e4SLinus Torvalds 2991da177e4SLinus Torvalds mtu = dst_mtu(dst); 3001da177e4SLinus Torvalds 3011da177e4SLinus Torvalds if (inet->pmtudisc != IP_PMTUDISC_DONT && 302482fc609SHannes Frederic Sowa ip_sk_accept_pmtu(sk) && 303d83d8461SArnaldo Carvalho de Melo inet_csk(sk)->icsk_pmtu_cookie > mtu) { 3041da177e4SLinus Torvalds tcp_sync_mss(sk, mtu); 3051da177e4SLinus Torvalds 3061da177e4SLinus Torvalds /* Resend the TCP packet because it's 3071da177e4SLinus Torvalds * clear that the old packet has been 3081da177e4SLinus Torvalds * dropped. This is the new "fast" path mtu 3091da177e4SLinus Torvalds * discovery. 3101da177e4SLinus Torvalds */ 3111da177e4SLinus Torvalds tcp_simple_retransmit(sk); 3121da177e4SLinus Torvalds } /* else let the usual retransmit timer handle it */ 3131da177e4SLinus Torvalds } 3144fab9071SNeal Cardwell EXPORT_SYMBOL(tcp_v4_mtu_reduced); 3151da177e4SLinus Torvalds 31655be7a9cSDavid S. Miller static void do_redirect(struct sk_buff *skb, struct sock *sk) 31755be7a9cSDavid S. Miller { 31855be7a9cSDavid S. Miller struct dst_entry *dst = __sk_dst_check(sk, 0); 31955be7a9cSDavid S. Miller 3201ed5c48fSDavid S. Miller if (dst) 3216700c270SDavid S. Miller dst->ops->redirect(dst, sk, skb); 32255be7a9cSDavid S. Miller } 32355be7a9cSDavid S. Miller 32426e37360SEric Dumazet 32526e37360SEric Dumazet /* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */ 3269cf74903SEric Dumazet void tcp_req_err(struct sock *sk, u32 seq, bool abort) 32726e37360SEric Dumazet { 32826e37360SEric Dumazet struct request_sock *req = inet_reqsk(sk); 32926e37360SEric Dumazet struct net *net = sock_net(sk); 33026e37360SEric Dumazet 33126e37360SEric Dumazet /* ICMPs are not backlogged, hence we cannot get 33226e37360SEric Dumazet * an established socket here. 33326e37360SEric Dumazet */ 33426e37360SEric Dumazet if (seq != tcp_rsk(req)->snt_isn) { 33502a1d6e7SEric Dumazet __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); 3369cf74903SEric Dumazet } else if (abort) { 33726e37360SEric Dumazet /* 33826e37360SEric Dumazet * Still in SYN_RECV, just remove it silently. 33926e37360SEric Dumazet * There is no good way to pass the error to the newly 34026e37360SEric Dumazet * created socket, and POSIX does not want network 34126e37360SEric Dumazet * errors returned from accept(). 34226e37360SEric Dumazet */ 343c6973669SFan Du inet_csk_reqsk_queue_drop(req->rsk_listener, req); 3449caad864SEric Dumazet tcp_listendrop(req->rsk_listener); 34526e37360SEric Dumazet } 346ef84d8ceSEric Dumazet reqsk_put(req); 34726e37360SEric Dumazet } 34826e37360SEric Dumazet EXPORT_SYMBOL(tcp_req_err); 34926e37360SEric Dumazet 3501da177e4SLinus Torvalds /* 3511da177e4SLinus Torvalds * This routine is called by the ICMP module when it gets some 3521da177e4SLinus Torvalds * sort of error condition. If err < 0 then the socket should 3531da177e4SLinus Torvalds * be closed and the error returned to the user. If err > 0 3541da177e4SLinus Torvalds * it's just the icmp type << 8 | icmp code. After adjustment 3551da177e4SLinus Torvalds * header points to the first 8 bytes of the tcp header. We need 3561da177e4SLinus Torvalds * to find the appropriate port. 3571da177e4SLinus Torvalds * 3581da177e4SLinus Torvalds * The locking strategy used here is very "optimistic". When 3591da177e4SLinus Torvalds * someone else accesses the socket the ICMP is just dropped 3601da177e4SLinus Torvalds * and for some paths there is no check at all. 3611da177e4SLinus Torvalds * A more general error queue to queue errors for later handling 3621da177e4SLinus Torvalds * is probably better. 3631da177e4SLinus Torvalds * 3641da177e4SLinus Torvalds */ 3651da177e4SLinus Torvalds 3664d1a2d9eSDamian Lukowski void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) 3671da177e4SLinus Torvalds { 368b71d1d42SEric Dumazet const struct iphdr *iph = (const struct iphdr *)icmp_skb->data; 3694d1a2d9eSDamian Lukowski struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2)); 370f1ecd5d9SDamian Lukowski struct inet_connection_sock *icsk; 3711da177e4SLinus Torvalds struct tcp_sock *tp; 3721da177e4SLinus Torvalds struct inet_sock *inet; 3734d1a2d9eSDamian Lukowski const int type = icmp_hdr(icmp_skb)->type; 3744d1a2d9eSDamian Lukowski const int code = icmp_hdr(icmp_skb)->code; 3751da177e4SLinus Torvalds struct sock *sk; 376f1ecd5d9SDamian Lukowski struct sk_buff *skb; 3770a672f74SYuchung Cheng struct request_sock *fastopen; 3789a568de4SEric Dumazet u32 seq, snd_una; 3799a568de4SEric Dumazet s32 remaining; 3809a568de4SEric Dumazet u32 delta_us; 3811da177e4SLinus Torvalds int err; 3824d1a2d9eSDamian Lukowski struct net *net = dev_net(icmp_skb->dev); 3831da177e4SLinus Torvalds 38426e37360SEric Dumazet sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr, 38526e37360SEric Dumazet th->dest, iph->saddr, ntohs(th->source), 3863fa6f616SDavid Ahern inet_iif(icmp_skb), 0); 3871da177e4SLinus Torvalds if (!sk) { 3885d3848bcSEric Dumazet __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); 3891da177e4SLinus Torvalds return; 3901da177e4SLinus Torvalds } 3911da177e4SLinus Torvalds if (sk->sk_state == TCP_TIME_WAIT) { 3929469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(sk)); 3931da177e4SLinus Torvalds return; 3941da177e4SLinus Torvalds } 39526e37360SEric Dumazet seq = ntohl(th->seq); 39626e37360SEric Dumazet if (sk->sk_state == TCP_NEW_SYN_RECV) 3979cf74903SEric Dumazet return tcp_req_err(sk, seq, 3989cf74903SEric Dumazet type == ICMP_PARAMETERPROB || 3999cf74903SEric Dumazet type == ICMP_TIME_EXCEEDED || 4009cf74903SEric Dumazet (type == ICMP_DEST_UNREACH && 4019cf74903SEric Dumazet (code == ICMP_NET_UNREACH || 4029cf74903SEric Dumazet code == ICMP_HOST_UNREACH))); 4031da177e4SLinus Torvalds 4041da177e4SLinus Torvalds bh_lock_sock(sk); 4051da177e4SLinus Torvalds /* If too many ICMPs get dropped on busy 4061da177e4SLinus Torvalds * servers this needs to be solved differently. 407563d34d0SEric Dumazet * We do take care of PMTU discovery (RFC1191) special case : 408563d34d0SEric Dumazet * we can receive locally generated ICMP messages while socket is held. 4091da177e4SLinus Torvalds */ 410b74aa930SEric Dumazet if (sock_owned_by_user(sk)) { 411b74aa930SEric Dumazet if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)) 41202a1d6e7SEric Dumazet __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); 413b74aa930SEric Dumazet } 4141da177e4SLinus Torvalds if (sk->sk_state == TCP_CLOSE) 4151da177e4SLinus Torvalds goto out; 4161da177e4SLinus Torvalds 41797e3ecd1Sstephen hemminger if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { 41802a1d6e7SEric Dumazet __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 41997e3ecd1Sstephen hemminger goto out; 42097e3ecd1Sstephen hemminger } 42197e3ecd1Sstephen hemminger 422f1ecd5d9SDamian Lukowski icsk = inet_csk(sk); 4231da177e4SLinus Torvalds tp = tcp_sk(sk); 4240a672f74SYuchung Cheng /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ 4250a672f74SYuchung Cheng fastopen = tp->fastopen_rsk; 4260a672f74SYuchung Cheng snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 4271da177e4SLinus Torvalds if (sk->sk_state != TCP_LISTEN && 4280a672f74SYuchung Cheng !between(seq, snd_una, tp->snd_nxt)) { 42902a1d6e7SEric Dumazet __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); 4301da177e4SLinus Torvalds goto out; 4311da177e4SLinus Torvalds } 4321da177e4SLinus Torvalds 4331da177e4SLinus Torvalds switch (type) { 43455be7a9cSDavid S. Miller case ICMP_REDIRECT: 43545caeaa5SJon Maxwell if (!sock_owned_by_user(sk)) 43655be7a9cSDavid S. Miller do_redirect(icmp_skb, sk); 43755be7a9cSDavid S. Miller goto out; 4381da177e4SLinus Torvalds case ICMP_SOURCE_QUENCH: 4391da177e4SLinus Torvalds /* Just silently ignore these. */ 4401da177e4SLinus Torvalds goto out; 4411da177e4SLinus Torvalds case ICMP_PARAMETERPROB: 4421da177e4SLinus Torvalds err = EPROTO; 4431da177e4SLinus Torvalds break; 4441da177e4SLinus Torvalds case ICMP_DEST_UNREACH: 4451da177e4SLinus Torvalds if (code > NR_ICMP_UNREACH) 4461da177e4SLinus Torvalds goto out; 4471da177e4SLinus Torvalds 4481da177e4SLinus Torvalds if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ 4490d4f0608SEric Dumazet /* We are not interested in TCP_LISTEN and open_requests 4500d4f0608SEric Dumazet * (SYN-ACKs send out by Linux are always <576bytes so 4510d4f0608SEric Dumazet * they should go through unfragmented). 4520d4f0608SEric Dumazet */ 4530d4f0608SEric Dumazet if (sk->sk_state == TCP_LISTEN) 4540d4f0608SEric Dumazet goto out; 4550d4f0608SEric Dumazet 456563d34d0SEric Dumazet tp->mtu_info = info; 457144d56e9SEric Dumazet if (!sock_owned_by_user(sk)) { 458563d34d0SEric Dumazet tcp_v4_mtu_reduced(sk); 459144d56e9SEric Dumazet } else { 4607aa5470cSEric Dumazet if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags)) 461144d56e9SEric Dumazet sock_hold(sk); 462144d56e9SEric Dumazet } 4631da177e4SLinus Torvalds goto out; 4641da177e4SLinus Torvalds } 4651da177e4SLinus Torvalds 4661da177e4SLinus Torvalds err = icmp_err_convert[code].errno; 467f1ecd5d9SDamian Lukowski /* check if icmp_skb allows revert of backoff 468f1ecd5d9SDamian Lukowski * (see draft-zimmermann-tcp-lcd) */ 469f1ecd5d9SDamian Lukowski if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH) 470f1ecd5d9SDamian Lukowski break; 471f1ecd5d9SDamian Lukowski if (seq != tp->snd_una || !icsk->icsk_retransmits || 4720a672f74SYuchung Cheng !icsk->icsk_backoff || fastopen) 473f1ecd5d9SDamian Lukowski break; 474f1ecd5d9SDamian Lukowski 4758f49c270SDavid S. Miller if (sock_owned_by_user(sk)) 4768f49c270SDavid S. Miller break; 4778f49c270SDavid S. Miller 478f1ecd5d9SDamian Lukowski icsk->icsk_backoff--; 479fcdd1cf4SEric Dumazet icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) : 480fcdd1cf4SEric Dumazet TCP_TIMEOUT_INIT; 481fcdd1cf4SEric Dumazet icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX); 482f1ecd5d9SDamian Lukowski 483f1ecd5d9SDamian Lukowski skb = tcp_write_queue_head(sk); 484f1ecd5d9SDamian Lukowski BUG_ON(!skb); 485f1ecd5d9SDamian Lukowski 4869a568de4SEric Dumazet tcp_mstamp_refresh(tp); 4879a568de4SEric Dumazet delta_us = (u32)(tp->tcp_mstamp - skb->skb_mstamp); 4887faee5c0SEric Dumazet remaining = icsk->icsk_rto - 4899a568de4SEric Dumazet usecs_to_jiffies(delta_us); 490f1ecd5d9SDamian Lukowski 4919a568de4SEric Dumazet if (remaining > 0) { 492f1ecd5d9SDamian Lukowski inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 493f1ecd5d9SDamian Lukowski remaining, TCP_RTO_MAX); 494f1ecd5d9SDamian Lukowski } else { 495f1ecd5d9SDamian Lukowski /* RTO revert clocked out retransmission. 496f1ecd5d9SDamian Lukowski * Will retransmit now */ 497f1ecd5d9SDamian Lukowski tcp_retransmit_timer(sk); 498f1ecd5d9SDamian Lukowski } 499f1ecd5d9SDamian Lukowski 5001da177e4SLinus Torvalds break; 5011da177e4SLinus Torvalds case ICMP_TIME_EXCEEDED: 5021da177e4SLinus Torvalds err = EHOSTUNREACH; 5031da177e4SLinus Torvalds break; 5041da177e4SLinus Torvalds default: 5051da177e4SLinus Torvalds goto out; 5061da177e4SLinus Torvalds } 5071da177e4SLinus Torvalds 5081da177e4SLinus Torvalds switch (sk->sk_state) { 5091da177e4SLinus Torvalds case TCP_SYN_SENT: 5100a672f74SYuchung Cheng case TCP_SYN_RECV: 5110a672f74SYuchung Cheng /* Only in fast or simultaneous open. If a fast open socket is 5120a672f74SYuchung Cheng * is already accepted it is treated as a connected one below. 5131da177e4SLinus Torvalds */ 51451456b29SIan Morris if (fastopen && !fastopen->sk) 5150a672f74SYuchung Cheng break; 5160a672f74SYuchung Cheng 5171da177e4SLinus Torvalds if (!sock_owned_by_user(sk)) { 5181da177e4SLinus Torvalds sk->sk_err = err; 5191da177e4SLinus Torvalds 5201da177e4SLinus Torvalds sk->sk_error_report(sk); 5211da177e4SLinus Torvalds 5221da177e4SLinus Torvalds tcp_done(sk); 5231da177e4SLinus Torvalds } else { 5241da177e4SLinus Torvalds sk->sk_err_soft = err; 5251da177e4SLinus Torvalds } 5261da177e4SLinus Torvalds goto out; 5271da177e4SLinus Torvalds } 5281da177e4SLinus Torvalds 5291da177e4SLinus Torvalds /* If we've already connected we will keep trying 5301da177e4SLinus Torvalds * until we time out, or the user gives up. 5311da177e4SLinus Torvalds * 5321da177e4SLinus Torvalds * rfc1122 4.2.3.9 allows to consider as hard errors 5331da177e4SLinus Torvalds * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too, 5341da177e4SLinus Torvalds * but it is obsoleted by pmtu discovery). 5351da177e4SLinus Torvalds * 5361da177e4SLinus Torvalds * Note, that in modern internet, where routing is unreliable 5371da177e4SLinus Torvalds * and in each dark corner broken firewalls sit, sending random 5381da177e4SLinus Torvalds * errors ordered by their masters even this two messages finally lose 5391da177e4SLinus Torvalds * their original sense (even Linux sends invalid PORT_UNREACHs) 5401da177e4SLinus Torvalds * 5411da177e4SLinus Torvalds * Now we are in compliance with RFCs. 5421da177e4SLinus Torvalds * --ANK (980905) 5431da177e4SLinus Torvalds */ 5441da177e4SLinus Torvalds 5451da177e4SLinus Torvalds inet = inet_sk(sk); 5461da177e4SLinus Torvalds if (!sock_owned_by_user(sk) && inet->recverr) { 5471da177e4SLinus Torvalds sk->sk_err = err; 5481da177e4SLinus Torvalds sk->sk_error_report(sk); 5491da177e4SLinus Torvalds } else { /* Only an error on timeout */ 5501da177e4SLinus Torvalds sk->sk_err_soft = err; 5511da177e4SLinus Torvalds } 5521da177e4SLinus Torvalds 5531da177e4SLinus Torvalds out: 5541da177e4SLinus Torvalds bh_unlock_sock(sk); 5551da177e4SLinus Torvalds sock_put(sk); 5561da177e4SLinus Torvalds } 5571da177e4SLinus Torvalds 55828850dc7SDaniel Borkmann void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr) 5591da177e4SLinus Torvalds { 560aa8223c7SArnaldo Carvalho de Melo struct tcphdr *th = tcp_hdr(skb); 5611da177e4SLinus Torvalds 56284fa7933SPatrick McHardy if (skb->ip_summed == CHECKSUM_PARTIAL) { 563419f9f89SHerbert Xu th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0); 564663ead3bSHerbert Xu skb->csum_start = skb_transport_header(skb) - skb->head; 565ff1dcadbSAl Viro skb->csum_offset = offsetof(struct tcphdr, check); 5661da177e4SLinus Torvalds } else { 567419f9f89SHerbert Xu th->check = tcp_v4_check(skb->len, saddr, daddr, 56807f0757aSJoe Perches csum_partial(th, 5691da177e4SLinus Torvalds th->doff << 2, 5701da177e4SLinus Torvalds skb->csum)); 5711da177e4SLinus Torvalds } 5721da177e4SLinus Torvalds } 5731da177e4SLinus Torvalds 574419f9f89SHerbert Xu /* This routine computes an IPv4 TCP checksum. */ 575bb296246SHerbert Xu void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) 576419f9f89SHerbert Xu { 577cf533ea5SEric Dumazet const struct inet_sock *inet = inet_sk(sk); 578419f9f89SHerbert Xu 579419f9f89SHerbert Xu __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr); 580419f9f89SHerbert Xu } 5814bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_send_check); 582419f9f89SHerbert Xu 5831da177e4SLinus Torvalds /* 5841da177e4SLinus Torvalds * This routine will send an RST to the other tcp. 5851da177e4SLinus Torvalds * 5861da177e4SLinus Torvalds * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.) 5871da177e4SLinus Torvalds * for reset. 5881da177e4SLinus Torvalds * Answer: if a packet caused RST, it is not for a socket 5891da177e4SLinus Torvalds * existing in our system, if it is matched to a socket, 5901da177e4SLinus Torvalds * it is just duplicate segment or bug in other side's TCP. 5911da177e4SLinus Torvalds * So that we build reply only basing on parameters 5921da177e4SLinus Torvalds * arrived with segment. 5931da177e4SLinus Torvalds * Exception: precedence violation. We do not implement it in any case. 5941da177e4SLinus Torvalds */ 5951da177e4SLinus Torvalds 596a00e7444SEric Dumazet static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) 5971da177e4SLinus Torvalds { 598cf533ea5SEric Dumazet const struct tcphdr *th = tcp_hdr(skb); 599cfb6eeb4SYOSHIFUJI Hideaki struct { 600cfb6eeb4SYOSHIFUJI Hideaki struct tcphdr th; 601cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 602714e85beSAl Viro __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)]; 603cfb6eeb4SYOSHIFUJI Hideaki #endif 604cfb6eeb4SYOSHIFUJI Hideaki } rep; 6051da177e4SLinus Torvalds struct ip_reply_arg arg; 606cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 607e46787f0SFlorian Westphal struct tcp_md5sig_key *key = NULL; 608658ddaafSShawn Lu const __u8 *hash_location = NULL; 609658ddaafSShawn Lu unsigned char newhash[16]; 610658ddaafSShawn Lu int genhash; 611658ddaafSShawn Lu struct sock *sk1 = NULL; 612cfb6eeb4SYOSHIFUJI Hideaki #endif 613a86b1e30SPavel Emelyanov struct net *net; 6141da177e4SLinus Torvalds 6151da177e4SLinus Torvalds /* Never send a reset in response to a reset. */ 6161da177e4SLinus Torvalds if (th->rst) 6171da177e4SLinus Torvalds return; 6181da177e4SLinus Torvalds 619c3658e8dSEric Dumazet /* If sk not NULL, it means we did a successful lookup and incoming 620c3658e8dSEric Dumazet * route had to be correct. prequeue might have dropped our dst. 621c3658e8dSEric Dumazet */ 622c3658e8dSEric Dumazet if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL) 6231da177e4SLinus Torvalds return; 6241da177e4SLinus Torvalds 6251da177e4SLinus Torvalds /* Swap the send and the receive. */ 626cfb6eeb4SYOSHIFUJI Hideaki memset(&rep, 0, sizeof(rep)); 627cfb6eeb4SYOSHIFUJI Hideaki rep.th.dest = th->source; 628cfb6eeb4SYOSHIFUJI Hideaki rep.th.source = th->dest; 629cfb6eeb4SYOSHIFUJI Hideaki rep.th.doff = sizeof(struct tcphdr) / 4; 630cfb6eeb4SYOSHIFUJI Hideaki rep.th.rst = 1; 6311da177e4SLinus Torvalds 6321da177e4SLinus Torvalds if (th->ack) { 633cfb6eeb4SYOSHIFUJI Hideaki rep.th.seq = th->ack_seq; 6341da177e4SLinus Torvalds } else { 635cfb6eeb4SYOSHIFUJI Hideaki rep.th.ack = 1; 636cfb6eeb4SYOSHIFUJI Hideaki rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin + 6371da177e4SLinus Torvalds skb->len - (th->doff << 2)); 6381da177e4SLinus Torvalds } 6391da177e4SLinus Torvalds 6407174259eSArnaldo Carvalho de Melo memset(&arg, 0, sizeof(arg)); 641cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_base = (unsigned char *)&rep; 642cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_len = sizeof(rep.th); 643cfb6eeb4SYOSHIFUJI Hideaki 6440f85feaeSEric Dumazet net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); 645cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 6463b24d854SEric Dumazet rcu_read_lock(); 647658ddaafSShawn Lu hash_location = tcp_parse_md5sig_option(th); 648271c3b9bSFlorian Westphal if (sk && sk_fullsock(sk)) { 649e46787f0SFlorian Westphal key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *) 650e46787f0SFlorian Westphal &ip_hdr(skb)->saddr, AF_INET); 651e46787f0SFlorian Westphal } else if (hash_location) { 652658ddaafSShawn Lu /* 653658ddaafSShawn Lu * active side is lost. Try to find listening socket through 654658ddaafSShawn Lu * source port, and then find md5 key through listening socket. 655658ddaafSShawn Lu * we are not loose security here: 656658ddaafSShawn Lu * Incoming packet is checked with md5 hash with finding key, 657658ddaafSShawn Lu * no RST generated if md5 hash doesn't match. 658658ddaafSShawn Lu */ 659a583636aSCraig Gallek sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0, 660a583636aSCraig Gallek ip_hdr(skb)->saddr, 661da5e3630STom Herbert th->source, ip_hdr(skb)->daddr, 6623fa6f616SDavid Ahern ntohs(th->source), inet_iif(skb), 6633fa6f616SDavid Ahern tcp_v4_sdif(skb)); 664658ddaafSShawn Lu /* don't send rst if it can't find key */ 665658ddaafSShawn Lu if (!sk1) 6663b24d854SEric Dumazet goto out; 6673b24d854SEric Dumazet 668658ddaafSShawn Lu key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *) 669658ddaafSShawn Lu &ip_hdr(skb)->saddr, AF_INET); 670658ddaafSShawn Lu if (!key) 6713b24d854SEric Dumazet goto out; 6723b24d854SEric Dumazet 673658ddaafSShawn Lu 67439f8e58eSEric Dumazet genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb); 675658ddaafSShawn Lu if (genhash || memcmp(hash_location, newhash, 16) != 0) 6763b24d854SEric Dumazet goto out; 6773b24d854SEric Dumazet 678658ddaafSShawn Lu } 679658ddaafSShawn Lu 680cfb6eeb4SYOSHIFUJI Hideaki if (key) { 681cfb6eeb4SYOSHIFUJI Hideaki rep.opt[0] = htonl((TCPOPT_NOP << 24) | 682cfb6eeb4SYOSHIFUJI Hideaki (TCPOPT_NOP << 16) | 683cfb6eeb4SYOSHIFUJI Hideaki (TCPOPT_MD5SIG << 8) | 684cfb6eeb4SYOSHIFUJI Hideaki TCPOLEN_MD5SIG); 685cfb6eeb4SYOSHIFUJI Hideaki /* Update length and the length the header thinks exists */ 686cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; 687cfb6eeb4SYOSHIFUJI Hideaki rep.th.doff = arg.iov[0].iov_len / 4; 688cfb6eeb4SYOSHIFUJI Hideaki 68949a72dfbSAdam Langley tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1], 69078e645cbSIlpo Järvinen key, ip_hdr(skb)->saddr, 69178e645cbSIlpo Järvinen ip_hdr(skb)->daddr, &rep.th); 692cfb6eeb4SYOSHIFUJI Hideaki } 693cfb6eeb4SYOSHIFUJI Hideaki #endif 694eddc9ec5SArnaldo Carvalho de Melo arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, 695eddc9ec5SArnaldo Carvalho de Melo ip_hdr(skb)->saddr, /* XXX */ 69652cd5750SIlpo Järvinen arg.iov[0].iov_len, IPPROTO_TCP, 0); 6971da177e4SLinus Torvalds arg.csumoffset = offsetof(struct tcphdr, check) / 2; 698271c3b9bSFlorian Westphal arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0; 699271c3b9bSFlorian Westphal 700e2446eaaSShawn Lu /* When socket is gone, all binding information is lost. 7014c675258SAlexey Kuznetsov * routing might fail in this case. No choice here, if we choose to force 7024c675258SAlexey Kuznetsov * input interface, we will misroute in case of asymmetric route. 703e2446eaaSShawn Lu */ 7044c675258SAlexey Kuznetsov if (sk) 7054c675258SAlexey Kuznetsov arg.bound_dev_if = sk->sk_bound_dev_if; 7061da177e4SLinus Torvalds 707271c3b9bSFlorian Westphal BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) != 708271c3b9bSFlorian Westphal offsetof(struct inet_timewait_sock, tw_bound_dev_if)); 709271c3b9bSFlorian Westphal 71066b13d99SEric Dumazet arg.tos = ip_hdr(skb)->tos; 711e2d118a1SLorenzo Colitti arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); 71247dcc20aSEric Dumazet local_bh_disable(); 713bdbbb852SEric Dumazet ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), 714bdbbb852SEric Dumazet skb, &TCP_SKB_CB(skb)->header.h4.opt, 71524a2d43dSEric Dumazet ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, 71624a2d43dSEric Dumazet &arg, arg.iov[0].iov_len); 7171da177e4SLinus Torvalds 71890bbcc60SEric Dumazet __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); 71990bbcc60SEric Dumazet __TCP_INC_STATS(net, TCP_MIB_OUTRSTS); 72047dcc20aSEric Dumazet local_bh_enable(); 721658ddaafSShawn Lu 722658ddaafSShawn Lu #ifdef CONFIG_TCP_MD5SIG 7233b24d854SEric Dumazet out: 724658ddaafSShawn Lu rcu_read_unlock(); 725658ddaafSShawn Lu #endif 7261da177e4SLinus Torvalds } 7271da177e4SLinus Torvalds 7281da177e4SLinus Torvalds /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states 7291da177e4SLinus Torvalds outside socket context is ugly, certainly. What can I do? 7301da177e4SLinus Torvalds */ 7311da177e4SLinus Torvalds 732e2d118a1SLorenzo Colitti static void tcp_v4_send_ack(const struct sock *sk, 733e62a123bSEric Dumazet struct sk_buff *skb, u32 seq, u32 ack, 734ee684b6fSAndrey Vagin u32 win, u32 tsval, u32 tsecr, int oif, 73588ef4a5aSKOVACS Krisztian struct tcp_md5sig_key *key, 73666b13d99SEric Dumazet int reply_flags, u8 tos) 7371da177e4SLinus Torvalds { 738cf533ea5SEric Dumazet const struct tcphdr *th = tcp_hdr(skb); 7391da177e4SLinus Torvalds struct { 7401da177e4SLinus Torvalds struct tcphdr th; 741714e85beSAl Viro __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2) 742cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 743cfb6eeb4SYOSHIFUJI Hideaki + (TCPOLEN_MD5SIG_ALIGNED >> 2) 744cfb6eeb4SYOSHIFUJI Hideaki #endif 745cfb6eeb4SYOSHIFUJI Hideaki ]; 7461da177e4SLinus Torvalds } rep; 747e2d118a1SLorenzo Colitti struct net *net = sock_net(sk); 7481da177e4SLinus Torvalds struct ip_reply_arg arg; 7491da177e4SLinus Torvalds 7501da177e4SLinus Torvalds memset(&rep.th, 0, sizeof(struct tcphdr)); 7517174259eSArnaldo Carvalho de Melo memset(&arg, 0, sizeof(arg)); 7521da177e4SLinus Torvalds 7531da177e4SLinus Torvalds arg.iov[0].iov_base = (unsigned char *)&rep; 7541da177e4SLinus Torvalds arg.iov[0].iov_len = sizeof(rep.th); 755ee684b6fSAndrey Vagin if (tsecr) { 756cfb6eeb4SYOSHIFUJI Hideaki rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 7571da177e4SLinus Torvalds (TCPOPT_TIMESTAMP << 8) | 7581da177e4SLinus Torvalds TCPOLEN_TIMESTAMP); 759ee684b6fSAndrey Vagin rep.opt[1] = htonl(tsval); 760ee684b6fSAndrey Vagin rep.opt[2] = htonl(tsecr); 761cb48cfe8SCraig Schlenter arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED; 7621da177e4SLinus Torvalds } 7631da177e4SLinus Torvalds 7641da177e4SLinus Torvalds /* Swap the send and the receive. */ 7651da177e4SLinus Torvalds rep.th.dest = th->source; 7661da177e4SLinus Torvalds rep.th.source = th->dest; 7671da177e4SLinus Torvalds rep.th.doff = arg.iov[0].iov_len / 4; 7681da177e4SLinus Torvalds rep.th.seq = htonl(seq); 7691da177e4SLinus Torvalds rep.th.ack_seq = htonl(ack); 7701da177e4SLinus Torvalds rep.th.ack = 1; 7711da177e4SLinus Torvalds rep.th.window = htons(win); 7721da177e4SLinus Torvalds 773cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 774cfb6eeb4SYOSHIFUJI Hideaki if (key) { 775ee684b6fSAndrey Vagin int offset = (tsecr) ? 3 : 0; 776cfb6eeb4SYOSHIFUJI Hideaki 777cfb6eeb4SYOSHIFUJI Hideaki rep.opt[offset++] = htonl((TCPOPT_NOP << 24) | 778cfb6eeb4SYOSHIFUJI Hideaki (TCPOPT_NOP << 16) | 779cfb6eeb4SYOSHIFUJI Hideaki (TCPOPT_MD5SIG << 8) | 780cfb6eeb4SYOSHIFUJI Hideaki TCPOLEN_MD5SIG); 781cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; 782cfb6eeb4SYOSHIFUJI Hideaki rep.th.doff = arg.iov[0].iov_len/4; 783cfb6eeb4SYOSHIFUJI Hideaki 78449a72dfbSAdam Langley tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset], 78590b7e112SAdam Langley key, ip_hdr(skb)->saddr, 78690b7e112SAdam Langley ip_hdr(skb)->daddr, &rep.th); 787cfb6eeb4SYOSHIFUJI Hideaki } 788cfb6eeb4SYOSHIFUJI Hideaki #endif 78988ef4a5aSKOVACS Krisztian arg.flags = reply_flags; 790eddc9ec5SArnaldo Carvalho de Melo arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, 791eddc9ec5SArnaldo Carvalho de Melo ip_hdr(skb)->saddr, /* XXX */ 7921da177e4SLinus Torvalds arg.iov[0].iov_len, IPPROTO_TCP, 0); 7931da177e4SLinus Torvalds arg.csumoffset = offsetof(struct tcphdr, check) / 2; 7949501f972SYOSHIFUJI Hideaki if (oif) 7959501f972SYOSHIFUJI Hideaki arg.bound_dev_if = oif; 79666b13d99SEric Dumazet arg.tos = tos; 797e2d118a1SLorenzo Colitti arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL); 79847dcc20aSEric Dumazet local_bh_disable(); 799bdbbb852SEric Dumazet ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), 800bdbbb852SEric Dumazet skb, &TCP_SKB_CB(skb)->header.h4.opt, 80124a2d43dSEric Dumazet ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, 80224a2d43dSEric Dumazet &arg, arg.iov[0].iov_len); 8031da177e4SLinus Torvalds 80490bbcc60SEric Dumazet __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); 80547dcc20aSEric Dumazet local_bh_enable(); 8061da177e4SLinus Torvalds } 8071da177e4SLinus Torvalds 8081da177e4SLinus Torvalds static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) 8091da177e4SLinus Torvalds { 8108feaf0c0SArnaldo Carvalho de Melo struct inet_timewait_sock *tw = inet_twsk(sk); 811cfb6eeb4SYOSHIFUJI Hideaki struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 8121da177e4SLinus Torvalds 813e2d118a1SLorenzo Colitti tcp_v4_send_ack(sk, skb, 814e62a123bSEric Dumazet tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 8157174259eSArnaldo Carvalho de Melo tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 8169a568de4SEric Dumazet tcp_time_stamp_raw() + tcptw->tw_ts_offset, 8179501f972SYOSHIFUJI Hideaki tcptw->tw_ts_recent, 8189501f972SYOSHIFUJI Hideaki tw->tw_bound_dev_if, 81988ef4a5aSKOVACS Krisztian tcp_twsk_md5_key(tcptw), 82066b13d99SEric Dumazet tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0, 82166b13d99SEric Dumazet tw->tw_tos 8229501f972SYOSHIFUJI Hideaki ); 8231da177e4SLinus Torvalds 8248feaf0c0SArnaldo Carvalho de Melo inet_twsk_put(tw); 8251da177e4SLinus Torvalds } 8261da177e4SLinus Torvalds 827a00e7444SEric Dumazet static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 8287174259eSArnaldo Carvalho de Melo struct request_sock *req) 8291da177e4SLinus Torvalds { 830168a8f58SJerry Chu /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 831168a8f58SJerry Chu * sk->sk_state == TCP_SYN_RECV -> for Fast Open. 832168a8f58SJerry Chu */ 833e62a123bSEric Dumazet u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 : 834e62a123bSEric Dumazet tcp_sk(sk)->snd_nxt; 835e62a123bSEric Dumazet 83620a2b49fSEric Dumazet /* RFC 7323 2.3 83720a2b49fSEric Dumazet * The window field (SEG.WND) of every outgoing segment, with the 83820a2b49fSEric Dumazet * exception of <SYN> segments, MUST be right-shifted by 83920a2b49fSEric Dumazet * Rcv.Wind.Shift bits: 84020a2b49fSEric Dumazet */ 841e2d118a1SLorenzo Colitti tcp_v4_send_ack(sk, skb, seq, 84220a2b49fSEric Dumazet tcp_rsk(req)->rcv_nxt, 84320a2b49fSEric Dumazet req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, 8449a568de4SEric Dumazet tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, 8459501f972SYOSHIFUJI Hideaki req->ts_recent, 8469501f972SYOSHIFUJI Hideaki 0, 847a915da9bSEric Dumazet tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, 848a915da9bSEric Dumazet AF_INET), 84966b13d99SEric Dumazet inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, 85066b13d99SEric Dumazet ip_hdr(skb)->tos); 8511da177e4SLinus Torvalds } 8521da177e4SLinus Torvalds 8531da177e4SLinus Torvalds /* 8549bf1d83eSKris Katterjohn * Send a SYN-ACK after having received a SYN. 85560236fddSArnaldo Carvalho de Melo * This still operates on a request_sock only, not on a big 8561da177e4SLinus Torvalds * socket. 8571da177e4SLinus Torvalds */ 8580f935dbeSEric Dumazet static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, 859d6274bd8SOctavian Purdila struct flowi *fl, 860e6b4d113SWilliam Allen Simpson struct request_sock *req, 861ca6fb065SEric Dumazet struct tcp_fastopen_cookie *foc, 862b3d05147SEric Dumazet enum tcp_synack_type synack_type) 8631da177e4SLinus Torvalds { 8642e6599cbSArnaldo Carvalho de Melo const struct inet_request_sock *ireq = inet_rsk(req); 8656bd023f3SDavid S. Miller struct flowi4 fl4; 8661da177e4SLinus Torvalds int err = -1; 8671da177e4SLinus Torvalds struct sk_buff *skb; 8681da177e4SLinus Torvalds 8691da177e4SLinus Torvalds /* First, grab a route. */ 870ba3f7f04SDavid S. Miller if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) 871fd80eb94SDenis V. Lunev return -1; 8721da177e4SLinus Torvalds 873b3d05147SEric Dumazet skb = tcp_make_synack(sk, dst, req, foc, synack_type); 8741da177e4SLinus Torvalds 8751da177e4SLinus Torvalds if (skb) { 876634fb979SEric Dumazet __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); 8771da177e4SLinus Torvalds 878634fb979SEric Dumazet err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, 879634fb979SEric Dumazet ireq->ir_rmt_addr, 8802e6599cbSArnaldo Carvalho de Melo ireq->opt); 881b9df3cb8SGerrit Renker err = net_xmit_eval(err); 8821da177e4SLinus Torvalds } 8831da177e4SLinus Torvalds 8841da177e4SLinus Torvalds return err; 8851da177e4SLinus Torvalds } 8861da177e4SLinus Torvalds 8871da177e4SLinus Torvalds /* 88860236fddSArnaldo Carvalho de Melo * IPv4 request_sock destructor. 8891da177e4SLinus Torvalds */ 89060236fddSArnaldo Carvalho de Melo static void tcp_v4_reqsk_destructor(struct request_sock *req) 8911da177e4SLinus Torvalds { 8922e6599cbSArnaldo Carvalho de Melo kfree(inet_rsk(req)->opt); 8931da177e4SLinus Torvalds } 8941da177e4SLinus Torvalds 895cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 896cfb6eeb4SYOSHIFUJI Hideaki /* 897cfb6eeb4SYOSHIFUJI Hideaki * RFC2385 MD5 checksumming requires a mapping of 898cfb6eeb4SYOSHIFUJI Hideaki * IP address->MD5 Key. 899cfb6eeb4SYOSHIFUJI Hideaki * We need to maintain these in the sk structure. 900cfb6eeb4SYOSHIFUJI Hideaki */ 901cfb6eeb4SYOSHIFUJI Hideaki 902cfb6eeb4SYOSHIFUJI Hideaki /* Find the Key structure for an address. */ 903b83e3debSEric Dumazet struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk, 904a915da9bSEric Dumazet const union tcp_md5_addr *addr, 905a915da9bSEric Dumazet int family) 906cfb6eeb4SYOSHIFUJI Hideaki { 907fd3a154aSEric Dumazet const struct tcp_sock *tp = tcp_sk(sk); 908a915da9bSEric Dumazet struct tcp_md5sig_key *key; 909fd3a154aSEric Dumazet const struct tcp_md5sig_info *md5sig; 9106797318eSIvan Delalande __be32 mask; 9116797318eSIvan Delalande struct tcp_md5sig_key *best_match = NULL; 9126797318eSIvan Delalande bool match; 913cfb6eeb4SYOSHIFUJI Hideaki 914a8afca03SEric Dumazet /* caller either holds rcu_read_lock() or socket lock */ 915a8afca03SEric Dumazet md5sig = rcu_dereference_check(tp->md5sig_info, 9161e1d04e6SHannes Frederic Sowa lockdep_sock_is_held(sk)); 917a8afca03SEric Dumazet if (!md5sig) 918cfb6eeb4SYOSHIFUJI Hideaki return NULL; 919083a0326SArnd Bergmann 920b67bfe0dSSasha Levin hlist_for_each_entry_rcu(key, &md5sig->head, node) { 921a915da9bSEric Dumazet if (key->family != family) 922a915da9bSEric Dumazet continue; 9236797318eSIvan Delalande 9246797318eSIvan Delalande if (family == AF_INET) { 9256797318eSIvan Delalande mask = inet_make_mask(key->prefixlen); 9266797318eSIvan Delalande match = (key->addr.a4.s_addr & mask) == 9276797318eSIvan Delalande (addr->a4.s_addr & mask); 9286797318eSIvan Delalande #if IS_ENABLED(CONFIG_IPV6) 9296797318eSIvan Delalande } else if (family == AF_INET6) { 9306797318eSIvan Delalande match = ipv6_prefix_equal(&key->addr.a6, &addr->a6, 9316797318eSIvan Delalande key->prefixlen); 9326797318eSIvan Delalande #endif 9336797318eSIvan Delalande } else { 9346797318eSIvan Delalande match = false; 9356797318eSIvan Delalande } 9366797318eSIvan Delalande 9376797318eSIvan Delalande if (match && (!best_match || 9386797318eSIvan Delalande key->prefixlen > best_match->prefixlen)) 9396797318eSIvan Delalande best_match = key; 9406797318eSIvan Delalande } 9416797318eSIvan Delalande return best_match; 9426797318eSIvan Delalande } 9436797318eSIvan Delalande EXPORT_SYMBOL(tcp_md5_do_lookup); 9446797318eSIvan Delalande 945e8f37d57SWu Fengguang static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk, 9466797318eSIvan Delalande const union tcp_md5_addr *addr, 9476797318eSIvan Delalande int family, u8 prefixlen) 9486797318eSIvan Delalande { 9496797318eSIvan Delalande const struct tcp_sock *tp = tcp_sk(sk); 9506797318eSIvan Delalande struct tcp_md5sig_key *key; 9516797318eSIvan Delalande unsigned int size = sizeof(struct in_addr); 9526797318eSIvan Delalande const struct tcp_md5sig_info *md5sig; 9536797318eSIvan Delalande 9546797318eSIvan Delalande /* caller either holds rcu_read_lock() or socket lock */ 9556797318eSIvan Delalande md5sig = rcu_dereference_check(tp->md5sig_info, 9566797318eSIvan Delalande lockdep_sock_is_held(sk)); 9576797318eSIvan Delalande if (!md5sig) 9586797318eSIvan Delalande return NULL; 9596797318eSIvan Delalande #if IS_ENABLED(CONFIG_IPV6) 9606797318eSIvan Delalande if (family == AF_INET6) 9616797318eSIvan Delalande size = sizeof(struct in6_addr); 9626797318eSIvan Delalande #endif 9636797318eSIvan Delalande hlist_for_each_entry_rcu(key, &md5sig->head, node) { 9646797318eSIvan Delalande if (key->family != family) 9656797318eSIvan Delalande continue; 9666797318eSIvan Delalande if (!memcmp(&key->addr, addr, size) && 9676797318eSIvan Delalande key->prefixlen == prefixlen) 968a915da9bSEric Dumazet return key; 969cfb6eeb4SYOSHIFUJI Hideaki } 970cfb6eeb4SYOSHIFUJI Hideaki return NULL; 971cfb6eeb4SYOSHIFUJI Hideaki } 972cfb6eeb4SYOSHIFUJI Hideaki 973b83e3debSEric Dumazet struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, 974fd3a154aSEric Dumazet const struct sock *addr_sk) 975cfb6eeb4SYOSHIFUJI Hideaki { 976b52e6921SEric Dumazet const union tcp_md5_addr *addr; 977a915da9bSEric Dumazet 978b52e6921SEric Dumazet addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr; 979a915da9bSEric Dumazet return tcp_md5_do_lookup(sk, addr, AF_INET); 980cfb6eeb4SYOSHIFUJI Hideaki } 981cfb6eeb4SYOSHIFUJI Hideaki EXPORT_SYMBOL(tcp_v4_md5_lookup); 982cfb6eeb4SYOSHIFUJI Hideaki 983cfb6eeb4SYOSHIFUJI Hideaki /* This can be called on a newly created socket, from other files */ 984a915da9bSEric Dumazet int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, 9856797318eSIvan Delalande int family, u8 prefixlen, const u8 *newkey, u8 newkeylen, 9866797318eSIvan Delalande gfp_t gfp) 987cfb6eeb4SYOSHIFUJI Hideaki { 988cfb6eeb4SYOSHIFUJI Hideaki /* Add Key to the list */ 989b0a713e9SMatthias M. Dellweg struct tcp_md5sig_key *key; 990cfb6eeb4SYOSHIFUJI Hideaki struct tcp_sock *tp = tcp_sk(sk); 991f6685938SArnaldo Carvalho de Melo struct tcp_md5sig_info *md5sig; 992f6685938SArnaldo Carvalho de Melo 9936797318eSIvan Delalande key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen); 994a915da9bSEric Dumazet if (key) { 995a915da9bSEric Dumazet /* Pre-existing entry - just update that one. */ 996a915da9bSEric Dumazet memcpy(key->key, newkey, newkeylen); 997a915da9bSEric Dumazet key->keylen = newkeylen; 998a915da9bSEric Dumazet return 0; 999cfb6eeb4SYOSHIFUJI Hideaki } 1000260fcbebSYan, Zheng 1001a8afca03SEric Dumazet md5sig = rcu_dereference_protected(tp->md5sig_info, 10021e1d04e6SHannes Frederic Sowa lockdep_sock_is_held(sk)); 1003a915da9bSEric Dumazet if (!md5sig) { 1004a915da9bSEric Dumazet md5sig = kmalloc(sizeof(*md5sig), gfp); 1005a915da9bSEric Dumazet if (!md5sig) 1006a915da9bSEric Dumazet return -ENOMEM; 1007a915da9bSEric Dumazet 1008a915da9bSEric Dumazet sk_nocaps_add(sk, NETIF_F_GSO_MASK); 1009a915da9bSEric Dumazet INIT_HLIST_HEAD(&md5sig->head); 1010a8afca03SEric Dumazet rcu_assign_pointer(tp->md5sig_info, md5sig); 1011a915da9bSEric Dumazet } 1012a915da9bSEric Dumazet 10135f3d9cb2SEric Dumazet key = sock_kmalloc(sk, sizeof(*key), gfp); 1014a915da9bSEric Dumazet if (!key) 1015a915da9bSEric Dumazet return -ENOMEM; 101671cea17eSEric Dumazet if (!tcp_alloc_md5sig_pool()) { 10175f3d9cb2SEric Dumazet sock_kfree_s(sk, key, sizeof(*key)); 1018cfb6eeb4SYOSHIFUJI Hideaki return -ENOMEM; 1019cfb6eeb4SYOSHIFUJI Hideaki } 1020f6685938SArnaldo Carvalho de Melo 1021a915da9bSEric Dumazet memcpy(key->key, newkey, newkeylen); 1022a915da9bSEric Dumazet key->keylen = newkeylen; 1023a915da9bSEric Dumazet key->family = family; 10246797318eSIvan Delalande key->prefixlen = prefixlen; 1025a915da9bSEric Dumazet memcpy(&key->addr, addr, 1026a915da9bSEric Dumazet (family == AF_INET6) ? sizeof(struct in6_addr) : 1027a915da9bSEric Dumazet sizeof(struct in_addr)); 1028a915da9bSEric Dumazet hlist_add_head_rcu(&key->node, &md5sig->head); 1029cfb6eeb4SYOSHIFUJI Hideaki return 0; 1030cfb6eeb4SYOSHIFUJI Hideaki } 1031a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_add); 1032cfb6eeb4SYOSHIFUJI Hideaki 10336797318eSIvan Delalande int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family, 10346797318eSIvan Delalande u8 prefixlen) 1035cfb6eeb4SYOSHIFUJI Hideaki { 1036a915da9bSEric Dumazet struct tcp_md5sig_key *key; 1037cfb6eeb4SYOSHIFUJI Hideaki 10386797318eSIvan Delalande key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen); 1039a915da9bSEric Dumazet if (!key) 1040cfb6eeb4SYOSHIFUJI Hideaki return -ENOENT; 1041a915da9bSEric Dumazet hlist_del_rcu(&key->node); 10425f3d9cb2SEric Dumazet atomic_sub(sizeof(*key), &sk->sk_omem_alloc); 1043a915da9bSEric Dumazet kfree_rcu(key, rcu); 1044a915da9bSEric Dumazet return 0; 1045cfb6eeb4SYOSHIFUJI Hideaki } 1046a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_del); 1047cfb6eeb4SYOSHIFUJI Hideaki 1048e0683e70Sstephen hemminger static void tcp_clear_md5_list(struct sock *sk) 1049cfb6eeb4SYOSHIFUJI Hideaki { 1050cfb6eeb4SYOSHIFUJI Hideaki struct tcp_sock *tp = tcp_sk(sk); 1051a915da9bSEric Dumazet struct tcp_md5sig_key *key; 1052b67bfe0dSSasha Levin struct hlist_node *n; 1053a8afca03SEric Dumazet struct tcp_md5sig_info *md5sig; 1054cfb6eeb4SYOSHIFUJI Hideaki 1055a8afca03SEric Dumazet md5sig = rcu_dereference_protected(tp->md5sig_info, 1); 1056a8afca03SEric Dumazet 1057b67bfe0dSSasha Levin hlist_for_each_entry_safe(key, n, &md5sig->head, node) { 1058a915da9bSEric Dumazet hlist_del_rcu(&key->node); 10595f3d9cb2SEric Dumazet atomic_sub(sizeof(*key), &sk->sk_omem_alloc); 1060a915da9bSEric Dumazet kfree_rcu(key, rcu); 1061cfb6eeb4SYOSHIFUJI Hideaki } 1062cfb6eeb4SYOSHIFUJI Hideaki } 1063cfb6eeb4SYOSHIFUJI Hideaki 10648917a777SIvan Delalande static int tcp_v4_parse_md5_keys(struct sock *sk, int optname, 10658917a777SIvan Delalande char __user *optval, int optlen) 1066cfb6eeb4SYOSHIFUJI Hideaki { 1067cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig cmd; 1068cfb6eeb4SYOSHIFUJI Hideaki struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr; 10698917a777SIvan Delalande u8 prefixlen = 32; 1070cfb6eeb4SYOSHIFUJI Hideaki 1071cfb6eeb4SYOSHIFUJI Hideaki if (optlen < sizeof(cmd)) 1072cfb6eeb4SYOSHIFUJI Hideaki return -EINVAL; 1073cfb6eeb4SYOSHIFUJI Hideaki 1074cfb6eeb4SYOSHIFUJI Hideaki if (copy_from_user(&cmd, optval, sizeof(cmd))) 1075cfb6eeb4SYOSHIFUJI Hideaki return -EFAULT; 1076cfb6eeb4SYOSHIFUJI Hideaki 1077cfb6eeb4SYOSHIFUJI Hideaki if (sin->sin_family != AF_INET) 1078cfb6eeb4SYOSHIFUJI Hideaki return -EINVAL; 1079cfb6eeb4SYOSHIFUJI Hideaki 10808917a777SIvan Delalande if (optname == TCP_MD5SIG_EXT && 10818917a777SIvan Delalande cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { 10828917a777SIvan Delalande prefixlen = cmd.tcpm_prefixlen; 10838917a777SIvan Delalande if (prefixlen > 32) 10848917a777SIvan Delalande return -EINVAL; 10858917a777SIvan Delalande } 10868917a777SIvan Delalande 108764a124edSDmitry Popov if (!cmd.tcpm_keylen) 1088a915da9bSEric Dumazet return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, 10898917a777SIvan Delalande AF_INET, prefixlen); 1090cfb6eeb4SYOSHIFUJI Hideaki 1091cfb6eeb4SYOSHIFUJI Hideaki if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 1092cfb6eeb4SYOSHIFUJI Hideaki return -EINVAL; 1093cfb6eeb4SYOSHIFUJI Hideaki 1094a915da9bSEric Dumazet return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, 10958917a777SIvan Delalande AF_INET, prefixlen, cmd.tcpm_key, cmd.tcpm_keylen, 1096a915da9bSEric Dumazet GFP_KERNEL); 1097cfb6eeb4SYOSHIFUJI Hideaki } 1098cfb6eeb4SYOSHIFUJI Hideaki 109919689e38SEric Dumazet static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp, 110019689e38SEric Dumazet __be32 daddr, __be32 saddr, 110119689e38SEric Dumazet const struct tcphdr *th, int nbytes) 1102cfb6eeb4SYOSHIFUJI Hideaki { 1103cfb6eeb4SYOSHIFUJI Hideaki struct tcp4_pseudohdr *bp; 110449a72dfbSAdam Langley struct scatterlist sg; 110519689e38SEric Dumazet struct tcphdr *_th; 1106cfb6eeb4SYOSHIFUJI Hideaki 110719689e38SEric Dumazet bp = hp->scratch; 1108cfb6eeb4SYOSHIFUJI Hideaki bp->saddr = saddr; 1109cfb6eeb4SYOSHIFUJI Hideaki bp->daddr = daddr; 1110cfb6eeb4SYOSHIFUJI Hideaki bp->pad = 0; 1111076fb722SYOSHIFUJI Hideaki bp->protocol = IPPROTO_TCP; 111249a72dfbSAdam Langley bp->len = cpu_to_be16(nbytes); 1113c7da57a1SDavid S. Miller 111419689e38SEric Dumazet _th = (struct tcphdr *)(bp + 1); 111519689e38SEric Dumazet memcpy(_th, th, sizeof(*th)); 111619689e38SEric Dumazet _th->check = 0; 111719689e38SEric Dumazet 111819689e38SEric Dumazet sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); 111919689e38SEric Dumazet ahash_request_set_crypt(hp->md5_req, &sg, NULL, 112019689e38SEric Dumazet sizeof(*bp) + sizeof(*th)); 1121cf80e0e4SHerbert Xu return crypto_ahash_update(hp->md5_req); 112249a72dfbSAdam Langley } 112349a72dfbSAdam Langley 1124a915da9bSEric Dumazet static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 1125318cf7aaSEric Dumazet __be32 daddr, __be32 saddr, const struct tcphdr *th) 112649a72dfbSAdam Langley { 112749a72dfbSAdam Langley struct tcp_md5sig_pool *hp; 1128cf80e0e4SHerbert Xu struct ahash_request *req; 112949a72dfbSAdam Langley 113049a72dfbSAdam Langley hp = tcp_get_md5sig_pool(); 113149a72dfbSAdam Langley if (!hp) 113249a72dfbSAdam Langley goto clear_hash_noput; 1133cf80e0e4SHerbert Xu req = hp->md5_req; 113449a72dfbSAdam Langley 1135cf80e0e4SHerbert Xu if (crypto_ahash_init(req)) 113649a72dfbSAdam Langley goto clear_hash; 113719689e38SEric Dumazet if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2)) 113849a72dfbSAdam Langley goto clear_hash; 113949a72dfbSAdam Langley if (tcp_md5_hash_key(hp, key)) 114049a72dfbSAdam Langley goto clear_hash; 1141cf80e0e4SHerbert Xu ahash_request_set_crypt(req, NULL, md5_hash, 0); 1142cf80e0e4SHerbert Xu if (crypto_ahash_final(req)) 1143cfb6eeb4SYOSHIFUJI Hideaki goto clear_hash; 1144cfb6eeb4SYOSHIFUJI Hideaki 1145cfb6eeb4SYOSHIFUJI Hideaki tcp_put_md5sig_pool(); 1146cfb6eeb4SYOSHIFUJI Hideaki return 0; 114749a72dfbSAdam Langley 1148cfb6eeb4SYOSHIFUJI Hideaki clear_hash: 1149cfb6eeb4SYOSHIFUJI Hideaki tcp_put_md5sig_pool(); 1150cfb6eeb4SYOSHIFUJI Hideaki clear_hash_noput: 1151cfb6eeb4SYOSHIFUJI Hideaki memset(md5_hash, 0, 16); 115249a72dfbSAdam Langley return 1; 1153cfb6eeb4SYOSHIFUJI Hideaki } 1154cfb6eeb4SYOSHIFUJI Hideaki 115539f8e58eSEric Dumazet int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, 115639f8e58eSEric Dumazet const struct sock *sk, 1157318cf7aaSEric Dumazet const struct sk_buff *skb) 1158cfb6eeb4SYOSHIFUJI Hideaki { 115949a72dfbSAdam Langley struct tcp_md5sig_pool *hp; 1160cf80e0e4SHerbert Xu struct ahash_request *req; 1161318cf7aaSEric Dumazet const struct tcphdr *th = tcp_hdr(skb); 1162cfb6eeb4SYOSHIFUJI Hideaki __be32 saddr, daddr; 1163cfb6eeb4SYOSHIFUJI Hideaki 116439f8e58eSEric Dumazet if (sk) { /* valid for establish/request sockets */ 116539f8e58eSEric Dumazet saddr = sk->sk_rcv_saddr; 116639f8e58eSEric Dumazet daddr = sk->sk_daddr; 1167cfb6eeb4SYOSHIFUJI Hideaki } else { 116849a72dfbSAdam Langley const struct iphdr *iph = ip_hdr(skb); 116949a72dfbSAdam Langley saddr = iph->saddr; 117049a72dfbSAdam Langley daddr = iph->daddr; 1171cfb6eeb4SYOSHIFUJI Hideaki } 1172cfb6eeb4SYOSHIFUJI Hideaki 117349a72dfbSAdam Langley hp = tcp_get_md5sig_pool(); 117449a72dfbSAdam Langley if (!hp) 117549a72dfbSAdam Langley goto clear_hash_noput; 1176cf80e0e4SHerbert Xu req = hp->md5_req; 117749a72dfbSAdam Langley 1178cf80e0e4SHerbert Xu if (crypto_ahash_init(req)) 117949a72dfbSAdam Langley goto clear_hash; 118049a72dfbSAdam Langley 118119689e38SEric Dumazet if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len)) 118249a72dfbSAdam Langley goto clear_hash; 118349a72dfbSAdam Langley if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) 118449a72dfbSAdam Langley goto clear_hash; 118549a72dfbSAdam Langley if (tcp_md5_hash_key(hp, key)) 118649a72dfbSAdam Langley goto clear_hash; 1187cf80e0e4SHerbert Xu ahash_request_set_crypt(req, NULL, md5_hash, 0); 1188cf80e0e4SHerbert Xu if (crypto_ahash_final(req)) 118949a72dfbSAdam Langley goto clear_hash; 119049a72dfbSAdam Langley 119149a72dfbSAdam Langley tcp_put_md5sig_pool(); 119249a72dfbSAdam Langley return 0; 119349a72dfbSAdam Langley 119449a72dfbSAdam Langley clear_hash: 119549a72dfbSAdam Langley tcp_put_md5sig_pool(); 119649a72dfbSAdam Langley clear_hash_noput: 119749a72dfbSAdam Langley memset(md5_hash, 0, 16); 119849a72dfbSAdam Langley return 1; 119949a72dfbSAdam Langley } 120049a72dfbSAdam Langley EXPORT_SYMBOL(tcp_v4_md5_hash_skb); 1201cfb6eeb4SYOSHIFUJI Hideaki 1202ba8e275aSEric Dumazet #endif 1203ba8e275aSEric Dumazet 1204ff74e23fSEric Dumazet /* Called with rcu_read_lock() */ 1205ba8e275aSEric Dumazet static bool tcp_v4_inbound_md5_hash(const struct sock *sk, 12069ea88a15SDmitry Popov const struct sk_buff *skb) 1207cfb6eeb4SYOSHIFUJI Hideaki { 1208ba8e275aSEric Dumazet #ifdef CONFIG_TCP_MD5SIG 1209cfb6eeb4SYOSHIFUJI Hideaki /* 1210cfb6eeb4SYOSHIFUJI Hideaki * This gets called for each TCP segment that arrives 1211cfb6eeb4SYOSHIFUJI Hideaki * so we want to be efficient. 1212cfb6eeb4SYOSHIFUJI Hideaki * We have 3 drop cases: 1213cfb6eeb4SYOSHIFUJI Hideaki * o No MD5 hash and one expected. 1214cfb6eeb4SYOSHIFUJI Hideaki * o MD5 hash and we're not expecting one. 1215cfb6eeb4SYOSHIFUJI Hideaki * o MD5 hash and its wrong. 1216cfb6eeb4SYOSHIFUJI Hideaki */ 1217cf533ea5SEric Dumazet const __u8 *hash_location = NULL; 1218cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *hash_expected; 1219eddc9ec5SArnaldo Carvalho de Melo const struct iphdr *iph = ip_hdr(skb); 1220cf533ea5SEric Dumazet const struct tcphdr *th = tcp_hdr(skb); 1221cfb6eeb4SYOSHIFUJI Hideaki int genhash; 1222cfb6eeb4SYOSHIFUJI Hideaki unsigned char newhash[16]; 1223cfb6eeb4SYOSHIFUJI Hideaki 1224a915da9bSEric Dumazet hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr, 1225a915da9bSEric Dumazet AF_INET); 12267d5d5525SYOSHIFUJI Hideaki hash_location = tcp_parse_md5sig_option(th); 1227cfb6eeb4SYOSHIFUJI Hideaki 1228cfb6eeb4SYOSHIFUJI Hideaki /* We've parsed the options - do we have a hash? */ 1229cfb6eeb4SYOSHIFUJI Hideaki if (!hash_expected && !hash_location) 1230a2a385d6SEric Dumazet return false; 1231cfb6eeb4SYOSHIFUJI Hideaki 1232cfb6eeb4SYOSHIFUJI Hideaki if (hash_expected && !hash_location) { 1233c10d9310SEric Dumazet NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); 1234a2a385d6SEric Dumazet return true; 1235cfb6eeb4SYOSHIFUJI Hideaki } 1236cfb6eeb4SYOSHIFUJI Hideaki 1237cfb6eeb4SYOSHIFUJI Hideaki if (!hash_expected && hash_location) { 1238c10d9310SEric Dumazet NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); 1239a2a385d6SEric Dumazet return true; 1240cfb6eeb4SYOSHIFUJI Hideaki } 1241cfb6eeb4SYOSHIFUJI Hideaki 1242cfb6eeb4SYOSHIFUJI Hideaki /* Okay, so this is hash_expected and hash_location - 1243cfb6eeb4SYOSHIFUJI Hideaki * so we need to calculate the checksum. 1244cfb6eeb4SYOSHIFUJI Hideaki */ 124549a72dfbSAdam Langley genhash = tcp_v4_md5_hash_skb(newhash, 1246cfb6eeb4SYOSHIFUJI Hideaki hash_expected, 124739f8e58eSEric Dumazet NULL, skb); 1248cfb6eeb4SYOSHIFUJI Hideaki 1249cfb6eeb4SYOSHIFUJI Hideaki if (genhash || memcmp(hash_location, newhash, 16) != 0) { 125072145a68SEric Dumazet NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE); 1251e87cc472SJoe Perches net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n", 1252673d57e7SHarvey Harrison &iph->saddr, ntohs(th->source), 1253673d57e7SHarvey Harrison &iph->daddr, ntohs(th->dest), 1254e87cc472SJoe Perches genhash ? " tcp_v4_calc_md5_hash failed" 1255e87cc472SJoe Perches : ""); 1256a2a385d6SEric Dumazet return true; 1257cfb6eeb4SYOSHIFUJI Hideaki } 1258a2a385d6SEric Dumazet return false; 1259cfb6eeb4SYOSHIFUJI Hideaki #endif 1260ba8e275aSEric Dumazet return false; 1261ba8e275aSEric Dumazet } 1262cfb6eeb4SYOSHIFUJI Hideaki 1263b40cf18eSEric Dumazet static void tcp_v4_init_req(struct request_sock *req, 1264b40cf18eSEric Dumazet const struct sock *sk_listener, 126516bea70aSOctavian Purdila struct sk_buff *skb) 126616bea70aSOctavian Purdila { 126716bea70aSOctavian Purdila struct inet_request_sock *ireq = inet_rsk(req); 126816bea70aSOctavian Purdila 126908d2cc3bSEric Dumazet sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); 127008d2cc3bSEric Dumazet sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); 127191ed1e66SPaolo Abeni ireq->opt = tcp_v4_save_options(sock_net(sk_listener), skb); 127216bea70aSOctavian Purdila } 127316bea70aSOctavian Purdila 1274f964629eSEric Dumazet static struct dst_entry *tcp_v4_route_req(const struct sock *sk, 1275f964629eSEric Dumazet struct flowi *fl, 12764396e461SSoheil Hassas Yeganeh const struct request_sock *req) 1277d94e0417SOctavian Purdila { 12784396e461SSoheil Hassas Yeganeh return inet_csk_route_req(sk, &fl->u.ip4, req); 1279d94e0417SOctavian Purdila } 1280d94e0417SOctavian Purdila 128172a3effaSEric Dumazet struct request_sock_ops tcp_request_sock_ops __read_mostly = { 12821da177e4SLinus Torvalds .family = PF_INET, 12832e6599cbSArnaldo Carvalho de Melo .obj_size = sizeof(struct tcp_request_sock), 12845db92c99SOctavian Purdila .rtx_syn_ack = tcp_rtx_synack, 128560236fddSArnaldo Carvalho de Melo .send_ack = tcp_v4_reqsk_send_ack, 128660236fddSArnaldo Carvalho de Melo .destructor = tcp_v4_reqsk_destructor, 12871da177e4SLinus Torvalds .send_reset = tcp_v4_send_reset, 128872659eccSOctavian Purdila .syn_ack_timeout = tcp_syn_ack_timeout, 12891da177e4SLinus Torvalds }; 12901da177e4SLinus Torvalds 1291b2e4b3deSStephen Hemminger static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { 12922aec4a29SOctavian Purdila .mss_clamp = TCP_MSS_DEFAULT, 129316bea70aSOctavian Purdila #ifdef CONFIG_TCP_MD5SIG 1294fd3a154aSEric Dumazet .req_md5_lookup = tcp_v4_md5_lookup, 1295e3afe7b7SJohn Dykstra .calc_md5_hash = tcp_v4_md5_hash_skb, 1296b6332e6cSAndrew Morton #endif 129716bea70aSOctavian Purdila .init_req = tcp_v4_init_req, 1298fb7b37a7SOctavian Purdila #ifdef CONFIG_SYN_COOKIES 1299fb7b37a7SOctavian Purdila .cookie_init_seq = cookie_v4_init_sequence, 1300fb7b37a7SOctavian Purdila #endif 1301d94e0417SOctavian Purdila .route_req = tcp_v4_route_req, 130284b114b9SEric Dumazet .init_seq = tcp_v4_init_seq, 130384b114b9SEric Dumazet .init_ts_off = tcp_v4_init_ts_off, 1304d6274bd8SOctavian Purdila .send_synack = tcp_v4_send_synack, 130516bea70aSOctavian Purdila }; 1306cfb6eeb4SYOSHIFUJI Hideaki 13071da177e4SLinus Torvalds int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 13081da177e4SLinus Torvalds { 13091da177e4SLinus Torvalds /* Never answer to SYNs send to broadcast or multicast */ 1310511c3f92SEric Dumazet if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) 13111da177e4SLinus Torvalds goto drop; 13121da177e4SLinus Torvalds 13131fb6f159SOctavian Purdila return tcp_conn_request(&tcp_request_sock_ops, 13141fb6f159SOctavian Purdila &tcp_request_sock_ipv4_ops, sk, skb); 13151da177e4SLinus Torvalds 13161da177e4SLinus Torvalds drop: 13179caad864SEric Dumazet tcp_listendrop(sk); 13181da177e4SLinus Torvalds return 0; 13191da177e4SLinus Torvalds } 13204bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_conn_request); 13211da177e4SLinus Torvalds 13221da177e4SLinus Torvalds 13231da177e4SLinus Torvalds /* 13241da177e4SLinus Torvalds * The three way handshake has completed - we got a valid synack - 13251da177e4SLinus Torvalds * now create the new socket. 13261da177e4SLinus Torvalds */ 13270c27171eSEric Dumazet struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, 132860236fddSArnaldo Carvalho de Melo struct request_sock *req, 13295e0724d0SEric Dumazet struct dst_entry *dst, 13305e0724d0SEric Dumazet struct request_sock *req_unhash, 13315e0724d0SEric Dumazet bool *own_req) 13321da177e4SLinus Torvalds { 13332e6599cbSArnaldo Carvalho de Melo struct inet_request_sock *ireq; 13341da177e4SLinus Torvalds struct inet_sock *newinet; 13351da177e4SLinus Torvalds struct tcp_sock *newtp; 13361da177e4SLinus Torvalds struct sock *newsk; 1337cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1338cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *key; 1339cfb6eeb4SYOSHIFUJI Hideaki #endif 1340f6d8bd05SEric Dumazet struct ip_options_rcu *inet_opt; 13411da177e4SLinus Torvalds 13421da177e4SLinus Torvalds if (sk_acceptq_is_full(sk)) 13431da177e4SLinus Torvalds goto exit_overflow; 13441da177e4SLinus Torvalds 13451da177e4SLinus Torvalds newsk = tcp_create_openreq_child(sk, req, skb); 13461da177e4SLinus Torvalds if (!newsk) 1347093d2823SBalazs Scheidler goto exit_nonewsk; 13481da177e4SLinus Torvalds 1349bcd76111SHerbert Xu newsk->sk_gso_type = SKB_GSO_TCPV4; 1350fae6ef87SNeal Cardwell inet_sk_rx_dst_set(newsk, skb); 13511da177e4SLinus Torvalds 13521da177e4SLinus Torvalds newtp = tcp_sk(newsk); 13531da177e4SLinus Torvalds newinet = inet_sk(newsk); 13542e6599cbSArnaldo Carvalho de Melo ireq = inet_rsk(req); 1355d1e559d0SEric Dumazet sk_daddr_set(newsk, ireq->ir_rmt_addr); 1356d1e559d0SEric Dumazet sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); 13576dd9a14eSDavid Ahern newsk->sk_bound_dev_if = ireq->ir_iif; 1358634fb979SEric Dumazet newinet->inet_saddr = ireq->ir_loc_addr; 1359f6d8bd05SEric Dumazet inet_opt = ireq->opt; 1360f6d8bd05SEric Dumazet rcu_assign_pointer(newinet->inet_opt, inet_opt); 13612e6599cbSArnaldo Carvalho de Melo ireq->opt = NULL; 1362463c84b9SArnaldo Carvalho de Melo newinet->mc_index = inet_iif(skb); 1363eddc9ec5SArnaldo Carvalho de Melo newinet->mc_ttl = ip_hdr(skb)->ttl; 13644c507d28SJiri Benc newinet->rcv_tos = ip_hdr(skb)->tos; 1365d83d8461SArnaldo Carvalho de Melo inet_csk(newsk)->icsk_ext_hdr_len = 0; 1366f6d8bd05SEric Dumazet if (inet_opt) 1367f6d8bd05SEric Dumazet inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; 1368c720c7e8SEric Dumazet newinet->inet_id = newtp->write_seq ^ jiffies; 13691da177e4SLinus Torvalds 1370dfd25fffSEric Dumazet if (!dst) { 1371dfd25fffSEric Dumazet dst = inet_csk_route_child_sock(sk, newsk, req); 1372dfd25fffSEric Dumazet if (!dst) 13730e734419SDavid S. Miller goto put_and_exit; 1374dfd25fffSEric Dumazet } else { 1375dfd25fffSEric Dumazet /* syncookie case : see end of cookie_v4_check() */ 1376dfd25fffSEric Dumazet } 13770e734419SDavid S. Miller sk_setup_caps(newsk, dst); 13780e734419SDavid S. Miller 137981164413SDaniel Borkmann tcp_ca_openreq_child(newsk, dst); 138081164413SDaniel Borkmann 13811da177e4SLinus Torvalds tcp_sync_mss(newsk, dst_mtu(dst)); 13823541f9e8SEric Dumazet newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); 1383f5fff5dcSTom Quetchenbach 13841da177e4SLinus Torvalds tcp_initialize_rcv_mss(newsk); 13851da177e4SLinus Torvalds 1386cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1387cfb6eeb4SYOSHIFUJI Hideaki /* Copy over the MD5 key from the original socket */ 1388a915da9bSEric Dumazet key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr, 1389a915da9bSEric Dumazet AF_INET); 139000db4124SIan Morris if (key) { 1391cfb6eeb4SYOSHIFUJI Hideaki /* 1392cfb6eeb4SYOSHIFUJI Hideaki * We're using one, so create a matching key 1393cfb6eeb4SYOSHIFUJI Hideaki * on the newsk structure. If we fail to get 1394cfb6eeb4SYOSHIFUJI Hideaki * memory, then we end up not copying the key 1395cfb6eeb4SYOSHIFUJI Hideaki * across. Shucks. 1396cfb6eeb4SYOSHIFUJI Hideaki */ 1397a915da9bSEric Dumazet tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr, 13986797318eSIvan Delalande AF_INET, 32, key->key, key->keylen, GFP_ATOMIC); 1399a465419bSEric Dumazet sk_nocaps_add(newsk, NETIF_F_GSO_MASK); 1400cfb6eeb4SYOSHIFUJI Hideaki } 1401cfb6eeb4SYOSHIFUJI Hideaki #endif 1402cfb6eeb4SYOSHIFUJI Hideaki 14030e734419SDavid S. Miller if (__inet_inherit_port(sk, newsk) < 0) 14040e734419SDavid S. Miller goto put_and_exit; 14055e0724d0SEric Dumazet *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); 1406805c4bc0SEric Dumazet if (*own_req) 140749a496c9SEric Dumazet tcp_move_syn(newtp, req); 14081da177e4SLinus Torvalds 14091da177e4SLinus Torvalds return newsk; 14101da177e4SLinus Torvalds 14111da177e4SLinus Torvalds exit_overflow: 1412c10d9310SEric Dumazet NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1413093d2823SBalazs Scheidler exit_nonewsk: 1414093d2823SBalazs Scheidler dst_release(dst); 14151da177e4SLinus Torvalds exit: 14169caad864SEric Dumazet tcp_listendrop(sk); 14171da177e4SLinus Torvalds return NULL; 14180e734419SDavid S. Miller put_and_exit: 1419e337e24dSChristoph Paasch inet_csk_prepare_forced_close(newsk); 1420e337e24dSChristoph Paasch tcp_done(newsk); 14210e734419SDavid S. Miller goto exit; 14221da177e4SLinus Torvalds } 14234bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_syn_recv_sock); 14241da177e4SLinus Torvalds 1425079096f1SEric Dumazet static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb) 14261da177e4SLinus Torvalds { 14271da177e4SLinus Torvalds #ifdef CONFIG_SYN_COOKIES 1428079096f1SEric Dumazet const struct tcphdr *th = tcp_hdr(skb); 1429079096f1SEric Dumazet 1430af9b4738SFlorian Westphal if (!th->syn) 1431461b74c3SCong Wang sk = cookie_v4_check(sk, skb); 14321da177e4SLinus Torvalds #endif 14331da177e4SLinus Torvalds return sk; 14341da177e4SLinus Torvalds } 14351da177e4SLinus Torvalds 14361da177e4SLinus Torvalds /* The socket must have it's spinlock held when we get 1437e994b2f0SEric Dumazet * here, unless it is a TCP_LISTEN socket. 14381da177e4SLinus Torvalds * 14391da177e4SLinus Torvalds * We have a potential double-lock case here, so even when 14401da177e4SLinus Torvalds * doing backlog processing we use the BH locking scheme. 14411da177e4SLinus Torvalds * This is because we cannot sleep with the original spinlock 14421da177e4SLinus Torvalds * held. 14431da177e4SLinus Torvalds */ 14441da177e4SLinus Torvalds int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) 14451da177e4SLinus Torvalds { 1446cfb6eeb4SYOSHIFUJI Hideaki struct sock *rsk; 1447cfb6eeb4SYOSHIFUJI Hideaki 14481da177e4SLinus Torvalds if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 144992101b3bSDavid S. Miller struct dst_entry *dst = sk->sk_rx_dst; 1450404e0a8bSEric Dumazet 1451404e0a8bSEric Dumazet sock_rps_save_rxhash(sk, skb); 14523d97379aSEric Dumazet sk_mark_napi_id(sk, skb); 1453404e0a8bSEric Dumazet if (dst) { 1454505fbcf0SEric Dumazet if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || 145551456b29SIan Morris !dst->ops->check(dst, 0)) { 145692101b3bSDavid S. Miller dst_release(dst); 145792101b3bSDavid S. Miller sk->sk_rx_dst = NULL; 145892101b3bSDavid S. Miller } 145992101b3bSDavid S. Miller } 1460e42e24c3SMatvejchikov Ilya tcp_rcv_established(sk, skb, tcp_hdr(skb)); 14611da177e4SLinus Torvalds return 0; 14621da177e4SLinus Torvalds } 14631da177e4SLinus Torvalds 146412e25e10SEric Dumazet if (tcp_checksum_complete(skb)) 14651da177e4SLinus Torvalds goto csum_err; 14661da177e4SLinus Torvalds 14671da177e4SLinus Torvalds if (sk->sk_state == TCP_LISTEN) { 1468079096f1SEric Dumazet struct sock *nsk = tcp_v4_cookie_check(sk, skb); 1469079096f1SEric Dumazet 14701da177e4SLinus Torvalds if (!nsk) 14711da177e4SLinus Torvalds goto discard; 14721da177e4SLinus Torvalds if (nsk != sk) { 1473cfb6eeb4SYOSHIFUJI Hideaki if (tcp_child_process(sk, nsk, skb)) { 1474cfb6eeb4SYOSHIFUJI Hideaki rsk = nsk; 14751da177e4SLinus Torvalds goto reset; 1476cfb6eeb4SYOSHIFUJI Hideaki } 14771da177e4SLinus Torvalds return 0; 14781da177e4SLinus Torvalds } 1479ca55158cSEric Dumazet } else 1480bdeab991STom Herbert sock_rps_save_rxhash(sk, skb); 1481ca55158cSEric Dumazet 148272ab4a86SEric Dumazet if (tcp_rcv_state_process(sk, skb)) { 1483cfb6eeb4SYOSHIFUJI Hideaki rsk = sk; 14841da177e4SLinus Torvalds goto reset; 1485cfb6eeb4SYOSHIFUJI Hideaki } 14861da177e4SLinus Torvalds return 0; 14871da177e4SLinus Torvalds 14881da177e4SLinus Torvalds reset: 1489cfb6eeb4SYOSHIFUJI Hideaki tcp_v4_send_reset(rsk, skb); 14901da177e4SLinus Torvalds discard: 14911da177e4SLinus Torvalds kfree_skb(skb); 14921da177e4SLinus Torvalds /* Be careful here. If this function gets more complicated and 14931da177e4SLinus Torvalds * gcc suffers from register pressure on the x86, sk (in %ebx) 14941da177e4SLinus Torvalds * might be destroyed here. This current version compiles correctly, 14951da177e4SLinus Torvalds * but you have been warned. 14961da177e4SLinus Torvalds */ 14971da177e4SLinus Torvalds return 0; 14981da177e4SLinus Torvalds 14991da177e4SLinus Torvalds csum_err: 1500c10d9310SEric Dumazet TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); 1501c10d9310SEric Dumazet TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 15021da177e4SLinus Torvalds goto discard; 15031da177e4SLinus Torvalds } 15044bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_do_rcv); 15051da177e4SLinus Torvalds 1506160eb5a6SDavid S. Miller void tcp_v4_early_demux(struct sk_buff *skb) 150741063e9dSDavid S. Miller { 150841063e9dSDavid S. Miller const struct iphdr *iph; 150941063e9dSDavid S. Miller const struct tcphdr *th; 151041063e9dSDavid S. Miller struct sock *sk; 151141063e9dSDavid S. Miller 151241063e9dSDavid S. Miller if (skb->pkt_type != PACKET_HOST) 1513160eb5a6SDavid S. Miller return; 151441063e9dSDavid S. Miller 151545f00f99SEric Dumazet if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) 1516160eb5a6SDavid S. Miller return; 151741063e9dSDavid S. Miller 151841063e9dSDavid S. Miller iph = ip_hdr(skb); 151945f00f99SEric Dumazet th = tcp_hdr(skb); 152041063e9dSDavid S. Miller 152141063e9dSDavid S. Miller if (th->doff < sizeof(struct tcphdr) / 4) 1522160eb5a6SDavid S. Miller return; 152341063e9dSDavid S. Miller 152445f00f99SEric Dumazet sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo, 152541063e9dSDavid S. Miller iph->saddr, th->source, 15267011d085SVijay Subramanian iph->daddr, ntohs(th->dest), 15273fa6f616SDavid Ahern skb->skb_iif, inet_sdif(skb)); 152841063e9dSDavid S. Miller if (sk) { 152941063e9dSDavid S. Miller skb->sk = sk; 153041063e9dSDavid S. Miller skb->destructor = sock_edemux; 1531f7e4eb03SEric Dumazet if (sk_fullsock(sk)) { 1532d0c294c5SMichal Kubeček struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst); 1533505fbcf0SEric Dumazet 153441063e9dSDavid S. Miller if (dst) 153541063e9dSDavid S. Miller dst = dst_check(dst, 0); 153692101b3bSDavid S. Miller if (dst && 1537505fbcf0SEric Dumazet inet_sk(sk)->rx_dst_ifindex == skb->skb_iif) 153841063e9dSDavid S. Miller skb_dst_set_noref(skb, dst); 153941063e9dSDavid S. Miller } 154041063e9dSDavid S. Miller } 154141063e9dSDavid S. Miller } 154241063e9dSDavid S. Miller 1543c9c33212SEric Dumazet bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) 1544c9c33212SEric Dumazet { 1545c9c33212SEric Dumazet u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf; 1546c9c33212SEric Dumazet 1547c9c33212SEric Dumazet /* Only socket owner can try to collapse/prune rx queues 1548c9c33212SEric Dumazet * to reduce memory overhead, so add a little headroom here. 1549c9c33212SEric Dumazet * Few sockets backlog are possibly concurrently non empty. 1550c9c33212SEric Dumazet */ 1551c9c33212SEric Dumazet limit += 64*1024; 1552c9c33212SEric Dumazet 1553c9c33212SEric Dumazet /* In case all data was pulled from skb frags (in __pskb_pull_tail()), 1554c9c33212SEric Dumazet * we can fix skb->truesize to its real value to avoid future drops. 1555c9c33212SEric Dumazet * This is valid because skb is not yet charged to the socket. 1556c9c33212SEric Dumazet * It has been noticed pure SACK packets were sometimes dropped 1557c9c33212SEric Dumazet * (if cooked by drivers without copybreak feature). 1558c9c33212SEric Dumazet */ 155960b1af33SEric Dumazet skb_condense(skb); 1560c9c33212SEric Dumazet 1561c9c33212SEric Dumazet if (unlikely(sk_add_backlog(sk, skb, limit))) { 1562c9c33212SEric Dumazet bh_unlock_sock(sk); 1563c9c33212SEric Dumazet __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP); 1564c9c33212SEric Dumazet return true; 1565c9c33212SEric Dumazet } 1566c9c33212SEric Dumazet return false; 1567c9c33212SEric Dumazet } 1568c9c33212SEric Dumazet EXPORT_SYMBOL(tcp_add_backlog); 1569c9c33212SEric Dumazet 1570ac6e7800SEric Dumazet int tcp_filter(struct sock *sk, struct sk_buff *skb) 1571ac6e7800SEric Dumazet { 1572ac6e7800SEric Dumazet struct tcphdr *th = (struct tcphdr *)skb->data; 1573ac6e7800SEric Dumazet unsigned int eaten = skb->len; 1574ac6e7800SEric Dumazet int err; 1575ac6e7800SEric Dumazet 1576ac6e7800SEric Dumazet err = sk_filter_trim_cap(sk, skb, th->doff * 4); 1577ac6e7800SEric Dumazet if (!err) { 1578ac6e7800SEric Dumazet eaten -= skb->len; 1579ac6e7800SEric Dumazet TCP_SKB_CB(skb)->end_seq -= eaten; 1580ac6e7800SEric Dumazet } 1581ac6e7800SEric Dumazet return err; 1582ac6e7800SEric Dumazet } 1583ac6e7800SEric Dumazet EXPORT_SYMBOL(tcp_filter); 1584ac6e7800SEric Dumazet 15851da177e4SLinus Torvalds /* 15861da177e4SLinus Torvalds * From tcp_input.c 15871da177e4SLinus Torvalds */ 15881da177e4SLinus Torvalds 15891da177e4SLinus Torvalds int tcp_v4_rcv(struct sk_buff *skb) 15901da177e4SLinus Torvalds { 15913b24d854SEric Dumazet struct net *net = dev_net(skb->dev); 15923fa6f616SDavid Ahern int sdif = inet_sdif(skb); 1593eddc9ec5SArnaldo Carvalho de Melo const struct iphdr *iph; 1594cf533ea5SEric Dumazet const struct tcphdr *th; 15953b24d854SEric Dumazet bool refcounted; 15961da177e4SLinus Torvalds struct sock *sk; 15971da177e4SLinus Torvalds int ret; 15981da177e4SLinus Torvalds 15991da177e4SLinus Torvalds if (skb->pkt_type != PACKET_HOST) 16001da177e4SLinus Torvalds goto discard_it; 16011da177e4SLinus Torvalds 16021da177e4SLinus Torvalds /* Count it even if it's bad */ 160390bbcc60SEric Dumazet __TCP_INC_STATS(net, TCP_MIB_INSEGS); 16041da177e4SLinus Torvalds 16051da177e4SLinus Torvalds if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 16061da177e4SLinus Torvalds goto discard_it; 16071da177e4SLinus Torvalds 1608ea1627c2SEric Dumazet th = (const struct tcphdr *)skb->data; 16091da177e4SLinus Torvalds 1610ea1627c2SEric Dumazet if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) 16111da177e4SLinus Torvalds goto bad_packet; 16121da177e4SLinus Torvalds if (!pskb_may_pull(skb, th->doff * 4)) 16131da177e4SLinus Torvalds goto discard_it; 16141da177e4SLinus Torvalds 16151da177e4SLinus Torvalds /* An explanation is required here, I think. 16161da177e4SLinus Torvalds * Packet length and doff are validated by header prediction, 1617caa20d9aSStephen Hemminger * provided case of th->doff==0 is eliminated. 16181da177e4SLinus Torvalds * So, we defer the checks. */ 1619ed70fcfcSTom Herbert 1620ed70fcfcSTom Herbert if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo)) 16216a5dc9e5SEric Dumazet goto csum_error; 16221da177e4SLinus Torvalds 1623ea1627c2SEric Dumazet th = (const struct tcphdr *)skb->data; 1624eddc9ec5SArnaldo Carvalho de Melo iph = ip_hdr(skb); 1625971f10ecSEric Dumazet /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB() 1626971f10ecSEric Dumazet * barrier() makes sure compiler wont play fool^Waliasing games. 1627971f10ecSEric Dumazet */ 1628971f10ecSEric Dumazet memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb), 1629971f10ecSEric Dumazet sizeof(struct inet_skb_parm)); 1630971f10ecSEric Dumazet barrier(); 1631971f10ecSEric Dumazet 16321da177e4SLinus Torvalds TCP_SKB_CB(skb)->seq = ntohl(th->seq); 16331da177e4SLinus Torvalds TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 16341da177e4SLinus Torvalds skb->len - th->doff * 4); 16351da177e4SLinus Torvalds TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1636e11ecddfSEric Dumazet TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); 163704317dafSEric Dumazet TCP_SKB_CB(skb)->tcp_tw_isn = 0; 1638b82d1bb4SEric Dumazet TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); 16391da177e4SLinus Torvalds TCP_SKB_CB(skb)->sacked = 0; 164098aaa913SMike Maloney TCP_SKB_CB(skb)->has_rxtstamp = 164198aaa913SMike Maloney skb->tstamp || skb_hwtstamps(skb)->hwtstamp; 16421da177e4SLinus Torvalds 16434bdc3d66SEric Dumazet lookup: 1644a583636aSCraig Gallek sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source, 16453fa6f616SDavid Ahern th->dest, sdif, &refcounted); 16461da177e4SLinus Torvalds if (!sk) 16471da177e4SLinus Torvalds goto no_tcp_socket; 16481da177e4SLinus Torvalds 1649bb134d5dSEric Dumazet process: 1650bb134d5dSEric Dumazet if (sk->sk_state == TCP_TIME_WAIT) 1651bb134d5dSEric Dumazet goto do_time_wait; 1652bb134d5dSEric Dumazet 1653079096f1SEric Dumazet if (sk->sk_state == TCP_NEW_SYN_RECV) { 1654079096f1SEric Dumazet struct request_sock *req = inet_reqsk(sk); 16557716682cSEric Dumazet struct sock *nsk; 1656079096f1SEric Dumazet 1657079096f1SEric Dumazet sk = req->rsk_listener; 165872923555SEric Dumazet if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) { 1659e65c332dSEric Dumazet sk_drops_add(sk, skb); 166072923555SEric Dumazet reqsk_put(req); 166172923555SEric Dumazet goto discard_it; 166272923555SEric Dumazet } 16637716682cSEric Dumazet if (unlikely(sk->sk_state != TCP_LISTEN)) { 1664f03f2e15SEric Dumazet inet_csk_reqsk_queue_drop_and_put(sk, req); 16654bdc3d66SEric Dumazet goto lookup; 16664bdc3d66SEric Dumazet } 16673b24d854SEric Dumazet /* We own a reference on the listener, increase it again 16683b24d854SEric Dumazet * as we might lose it too soon. 16693b24d854SEric Dumazet */ 16707716682cSEric Dumazet sock_hold(sk); 16713b24d854SEric Dumazet refcounted = true; 1672*1f3b359fSEric Dumazet nsk = NULL; 1673*1f3b359fSEric Dumazet if (!tcp_filter(sk, skb)) 16747716682cSEric Dumazet nsk = tcp_check_req(sk, skb, req, false); 1675079096f1SEric Dumazet if (!nsk) { 1676079096f1SEric Dumazet reqsk_put(req); 16777716682cSEric Dumazet goto discard_and_relse; 1678079096f1SEric Dumazet } 1679079096f1SEric Dumazet if (nsk == sk) { 1680079096f1SEric Dumazet reqsk_put(req); 1681079096f1SEric Dumazet } else if (tcp_child_process(sk, nsk, skb)) { 1682079096f1SEric Dumazet tcp_v4_send_reset(nsk, skb); 16837716682cSEric Dumazet goto discard_and_relse; 1684079096f1SEric Dumazet } else { 16857716682cSEric Dumazet sock_put(sk); 1686079096f1SEric Dumazet return 0; 1687079096f1SEric Dumazet } 1688079096f1SEric Dumazet } 16896cce09f8SEric Dumazet if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { 169002a1d6e7SEric Dumazet __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 1691d218d111SStephen Hemminger goto discard_and_relse; 16926cce09f8SEric Dumazet } 1693d218d111SStephen Hemminger 16941da177e4SLinus Torvalds if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) 16951da177e4SLinus Torvalds goto discard_and_relse; 16969ea88a15SDmitry Popov 16979ea88a15SDmitry Popov if (tcp_v4_inbound_md5_hash(sk, skb)) 16989ea88a15SDmitry Popov goto discard_and_relse; 16999ea88a15SDmitry Popov 1700b59c2701SPatrick McHardy nf_reset(skb); 17011da177e4SLinus Torvalds 1702ac6e7800SEric Dumazet if (tcp_filter(sk, skb)) 17031da177e4SLinus Torvalds goto discard_and_relse; 1704ac6e7800SEric Dumazet th = (const struct tcphdr *)skb->data; 1705ac6e7800SEric Dumazet iph = ip_hdr(skb); 17061da177e4SLinus Torvalds 17071da177e4SLinus Torvalds skb->dev = NULL; 17081da177e4SLinus Torvalds 1709e994b2f0SEric Dumazet if (sk->sk_state == TCP_LISTEN) { 1710e994b2f0SEric Dumazet ret = tcp_v4_do_rcv(sk, skb); 1711e994b2f0SEric Dumazet goto put_and_return; 1712e994b2f0SEric Dumazet } 1713e994b2f0SEric Dumazet 1714e994b2f0SEric Dumazet sk_incoming_cpu_update(sk); 1715e994b2f0SEric Dumazet 1716c6366184SIngo Molnar bh_lock_sock_nested(sk); 1717a44d6eacSMartin KaFai Lau tcp_segs_in(tcp_sk(sk), skb); 17181da177e4SLinus Torvalds ret = 0; 17191da177e4SLinus Torvalds if (!sock_owned_by_user(sk)) { 17201da177e4SLinus Torvalds ret = tcp_v4_do_rcv(sk, skb); 1721c9c33212SEric Dumazet } else if (tcp_add_backlog(sk, skb)) { 17226b03a53aSZhu Yi goto discard_and_relse; 17236b03a53aSZhu Yi } 17241da177e4SLinus Torvalds bh_unlock_sock(sk); 17251da177e4SLinus Torvalds 1726e994b2f0SEric Dumazet put_and_return: 17273b24d854SEric Dumazet if (refcounted) 17281da177e4SLinus Torvalds sock_put(sk); 17291da177e4SLinus Torvalds 17301da177e4SLinus Torvalds return ret; 17311da177e4SLinus Torvalds 17321da177e4SLinus Torvalds no_tcp_socket: 17331da177e4SLinus Torvalds if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 17341da177e4SLinus Torvalds goto discard_it; 17351da177e4SLinus Torvalds 173612e25e10SEric Dumazet if (tcp_checksum_complete(skb)) { 17376a5dc9e5SEric Dumazet csum_error: 173890bbcc60SEric Dumazet __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); 17391da177e4SLinus Torvalds bad_packet: 174090bbcc60SEric Dumazet __TCP_INC_STATS(net, TCP_MIB_INERRS); 17411da177e4SLinus Torvalds } else { 1742cfb6eeb4SYOSHIFUJI Hideaki tcp_v4_send_reset(NULL, skb); 17431da177e4SLinus Torvalds } 17441da177e4SLinus Torvalds 17451da177e4SLinus Torvalds discard_it: 17461da177e4SLinus Torvalds /* Discard frame. */ 17471da177e4SLinus Torvalds kfree_skb(skb); 17481da177e4SLinus Torvalds return 0; 17491da177e4SLinus Torvalds 17501da177e4SLinus Torvalds discard_and_relse: 1751532182cdSEric Dumazet sk_drops_add(sk, skb); 17523b24d854SEric Dumazet if (refcounted) 17531da177e4SLinus Torvalds sock_put(sk); 17541da177e4SLinus Torvalds goto discard_it; 17551da177e4SLinus Torvalds 17561da177e4SLinus Torvalds do_time_wait: 17571da177e4SLinus Torvalds if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 17589469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(sk)); 17591da177e4SLinus Torvalds goto discard_it; 17601da177e4SLinus Torvalds } 17611da177e4SLinus Torvalds 17626a5dc9e5SEric Dumazet if (tcp_checksum_complete(skb)) { 17636a5dc9e5SEric Dumazet inet_twsk_put(inet_twsk(sk)); 17646a5dc9e5SEric Dumazet goto csum_error; 17651da177e4SLinus Torvalds } 17669469c7b4SYOSHIFUJI Hideaki switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 17671da177e4SLinus Torvalds case TCP_TW_SYN: { 1768c346dca1SYOSHIFUJI Hideaki struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev), 1769a583636aSCraig Gallek &tcp_hashinfo, skb, 1770a583636aSCraig Gallek __tcp_hdrlen(th), 1771da5e3630STom Herbert iph->saddr, th->source, 1772eddc9ec5SArnaldo Carvalho de Melo iph->daddr, th->dest, 17733fa6f616SDavid Ahern inet_iif(skb), 17743fa6f616SDavid Ahern sdif); 17751da177e4SLinus Torvalds if (sk2) { 1776dbe7faa4SEric Dumazet inet_twsk_deschedule_put(inet_twsk(sk)); 17771da177e4SLinus Torvalds sk = sk2; 17783b24d854SEric Dumazet refcounted = false; 17791da177e4SLinus Torvalds goto process; 17801da177e4SLinus Torvalds } 17811da177e4SLinus Torvalds /* Fall through to ACK */ 17821da177e4SLinus Torvalds } 17831da177e4SLinus Torvalds case TCP_TW_ACK: 17841da177e4SLinus Torvalds tcp_v4_timewait_ack(sk, skb); 17851da177e4SLinus Torvalds break; 17861da177e4SLinus Torvalds case TCP_TW_RST: 1787271c3b9bSFlorian Westphal tcp_v4_send_reset(sk, skb); 1788271c3b9bSFlorian Westphal inet_twsk_deschedule_put(inet_twsk(sk)); 1789271c3b9bSFlorian Westphal goto discard_it; 17901da177e4SLinus Torvalds case TCP_TW_SUCCESS:; 17911da177e4SLinus Torvalds } 17921da177e4SLinus Torvalds goto discard_it; 17931da177e4SLinus Torvalds } 17941da177e4SLinus Torvalds 1795ccb7c410SDavid S. Miller static struct timewait_sock_ops tcp_timewait_sock_ops = { 1796ccb7c410SDavid S. Miller .twsk_obj_size = sizeof(struct tcp_timewait_sock), 1797ccb7c410SDavid S. Miller .twsk_unique = tcp_twsk_unique, 1798ccb7c410SDavid S. Miller .twsk_destructor= tcp_twsk_destructor, 1799ccb7c410SDavid S. Miller }; 18001da177e4SLinus Torvalds 180163d02d15SEric Dumazet void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) 18025d299f3dSEric Dumazet { 18035d299f3dSEric Dumazet struct dst_entry *dst = skb_dst(skb); 18045d299f3dSEric Dumazet 18055037e9efSEric Dumazet if (dst && dst_hold_safe(dst)) { 18065d299f3dSEric Dumazet sk->sk_rx_dst = dst; 18075d299f3dSEric Dumazet inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; 18085d299f3dSEric Dumazet } 1809ca777effSEric Dumazet } 181063d02d15SEric Dumazet EXPORT_SYMBOL(inet_sk_rx_dst_set); 18115d299f3dSEric Dumazet 18123b401a81SStephen Hemminger const struct inet_connection_sock_af_ops ipv4_specific = { 18131da177e4SLinus Torvalds .queue_xmit = ip_queue_xmit, 18141da177e4SLinus Torvalds .send_check = tcp_v4_send_check, 181532519f11SArnaldo Carvalho de Melo .rebuild_header = inet_sk_rebuild_header, 18165d299f3dSEric Dumazet .sk_rx_dst_set = inet_sk_rx_dst_set, 18171da177e4SLinus Torvalds .conn_request = tcp_v4_conn_request, 18181da177e4SLinus Torvalds .syn_recv_sock = tcp_v4_syn_recv_sock, 18191da177e4SLinus Torvalds .net_header_len = sizeof(struct iphdr), 18201da177e4SLinus Torvalds .setsockopt = ip_setsockopt, 18211da177e4SLinus Torvalds .getsockopt = ip_getsockopt, 1822543d9cfeSArnaldo Carvalho de Melo .addr2sockaddr = inet_csk_addr2sockaddr, 1823543d9cfeSArnaldo Carvalho de Melo .sockaddr_len = sizeof(struct sockaddr_in), 18243fdadf7dSDmitry Mishin #ifdef CONFIG_COMPAT 18253fdadf7dSDmitry Mishin .compat_setsockopt = compat_ip_setsockopt, 18263fdadf7dSDmitry Mishin .compat_getsockopt = compat_ip_getsockopt, 18273fdadf7dSDmitry Mishin #endif 18284fab9071SNeal Cardwell .mtu_reduced = tcp_v4_mtu_reduced, 18291da177e4SLinus Torvalds }; 18304bc2f18bSEric Dumazet EXPORT_SYMBOL(ipv4_specific); 18311da177e4SLinus Torvalds 1832cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1833b2e4b3deSStephen Hemminger static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { 1834cfb6eeb4SYOSHIFUJI Hideaki .md5_lookup = tcp_v4_md5_lookup, 183549a72dfbSAdam Langley .calc_md5_hash = tcp_v4_md5_hash_skb, 1836cfb6eeb4SYOSHIFUJI Hideaki .md5_parse = tcp_v4_parse_md5_keys, 1837cfb6eeb4SYOSHIFUJI Hideaki }; 1838b6332e6cSAndrew Morton #endif 1839cfb6eeb4SYOSHIFUJI Hideaki 18401da177e4SLinus Torvalds /* NOTE: A lot of things set to zero explicitly by call to 18411da177e4SLinus Torvalds * sk_alloc() so need not be done here. 18421da177e4SLinus Torvalds */ 18431da177e4SLinus Torvalds static int tcp_v4_init_sock(struct sock *sk) 18441da177e4SLinus Torvalds { 18456687e988SArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 18461da177e4SLinus Torvalds 1847900f65d3SNeal Cardwell tcp_init_sock(sk); 18481da177e4SLinus Torvalds 18498292a17aSArnaldo Carvalho de Melo icsk->icsk_af_ops = &ipv4_specific; 1850900f65d3SNeal Cardwell 1851cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1852ac807fa8SDavid S. Miller tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific; 1853cfb6eeb4SYOSHIFUJI Hideaki #endif 18541da177e4SLinus Torvalds 18551da177e4SLinus Torvalds return 0; 18561da177e4SLinus Torvalds } 18571da177e4SLinus Torvalds 18587d06b2e0SBrian Haley void tcp_v4_destroy_sock(struct sock *sk) 18591da177e4SLinus Torvalds { 18601da177e4SLinus Torvalds struct tcp_sock *tp = tcp_sk(sk); 18611da177e4SLinus Torvalds 18621da177e4SLinus Torvalds tcp_clear_xmit_timers(sk); 18631da177e4SLinus Torvalds 18646687e988SArnaldo Carvalho de Melo tcp_cleanup_congestion_control(sk); 1865317a76f9SStephen Hemminger 1866734942ccSDave Watson tcp_cleanup_ulp(sk); 1867734942ccSDave Watson 18681da177e4SLinus Torvalds /* Cleanup up the write buffer. */ 1869fe067e8aSDavid S. Miller tcp_write_queue_purge(sk); 18701da177e4SLinus Torvalds 1871cf1ef3f0SWei Wang /* Check if we want to disable active TFO */ 1872cf1ef3f0SWei Wang tcp_fastopen_active_disable_ofo_check(sk); 1873cf1ef3f0SWei Wang 18741da177e4SLinus Torvalds /* Cleans up our, hopefully empty, out_of_order_queue. */ 18759f5afeaeSYaogong Wang skb_rbtree_purge(&tp->out_of_order_queue); 18761da177e4SLinus Torvalds 1877cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1878cfb6eeb4SYOSHIFUJI Hideaki /* Clean up the MD5 key list, if any */ 1879cfb6eeb4SYOSHIFUJI Hideaki if (tp->md5sig_info) { 1880a915da9bSEric Dumazet tcp_clear_md5_list(sk); 1881a8afca03SEric Dumazet kfree_rcu(tp->md5sig_info, rcu); 1882cfb6eeb4SYOSHIFUJI Hideaki tp->md5sig_info = NULL; 1883cfb6eeb4SYOSHIFUJI Hideaki } 1884cfb6eeb4SYOSHIFUJI Hideaki #endif 1885cfb6eeb4SYOSHIFUJI Hideaki 18861da177e4SLinus Torvalds /* Clean up a referenced TCP bind bucket. */ 1887463c84b9SArnaldo Carvalho de Melo if (inet_csk(sk)->icsk_bind_hash) 1888ab1e0a13SArnaldo Carvalho de Melo inet_put_port(sk); 18891da177e4SLinus Torvalds 189000db4124SIan Morris BUG_ON(tp->fastopen_rsk); 1891435cf559SWilliam Allen Simpson 1892cf60af03SYuchung Cheng /* If socket is aborted during connect operation */ 1893cf60af03SYuchung Cheng tcp_free_fastopen_req(tp); 1894cd8ae852SEric Dumazet tcp_saved_syn_free(tp); 1895cf60af03SYuchung Cheng 1896180d8cd9SGlauber Costa sk_sockets_allocated_dec(sk); 18971da177e4SLinus Torvalds } 18981da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_destroy_sock); 18991da177e4SLinus Torvalds 19001da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS 19011da177e4SLinus Torvalds /* Proc filesystem TCP sock list dumping. */ 19021da177e4SLinus Torvalds 1903a8b690f9STom Herbert /* 1904a8b690f9STom Herbert * Get next listener socket follow cur. If cur is NULL, get first socket 1905a8b690f9STom Herbert * starting from bucket given in st->bucket; when st->bucket is zero the 1906a8b690f9STom Herbert * very first socket in the hash table is returned. 1907a8b690f9STom Herbert */ 19081da177e4SLinus Torvalds static void *listening_get_next(struct seq_file *seq, void *cur) 19091da177e4SLinus Torvalds { 19101da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 1911a4146b1bSDenis V. Lunev struct net *net = seq_file_net(seq); 19123b24d854SEric Dumazet struct inet_listen_hashbucket *ilb; 19133b24d854SEric Dumazet struct sock *sk = cur; 19141da177e4SLinus Torvalds 19151da177e4SLinus Torvalds if (!sk) { 19163b24d854SEric Dumazet get_head: 1917a8b690f9STom Herbert ilb = &tcp_hashinfo.listening_hash[st->bucket]; 19189652dc2eSEric Dumazet spin_lock(&ilb->lock); 19193b24d854SEric Dumazet sk = sk_head(&ilb->head); 1920a8b690f9STom Herbert st->offset = 0; 19211da177e4SLinus Torvalds goto get_sk; 19221da177e4SLinus Torvalds } 19235caea4eaSEric Dumazet ilb = &tcp_hashinfo.listening_hash[st->bucket]; 19241da177e4SLinus Torvalds ++st->num; 1925a8b690f9STom Herbert ++st->offset; 19261da177e4SLinus Torvalds 19273b24d854SEric Dumazet sk = sk_next(sk); 19281da177e4SLinus Torvalds get_sk: 19293b24d854SEric Dumazet sk_for_each_from(sk) { 19308475ef9fSPavel Emelyanov if (!net_eq(sock_net(sk), net)) 19318475ef9fSPavel Emelyanov continue; 19323b24d854SEric Dumazet if (sk->sk_family == st->family) 19333b24d854SEric Dumazet return sk; 19341da177e4SLinus Torvalds } 19359652dc2eSEric Dumazet spin_unlock(&ilb->lock); 1936a8b690f9STom Herbert st->offset = 0; 19373b24d854SEric Dumazet if (++st->bucket < INET_LHTABLE_SIZE) 19383b24d854SEric Dumazet goto get_head; 19393b24d854SEric Dumazet return NULL; 19401da177e4SLinus Torvalds } 19411da177e4SLinus Torvalds 19421da177e4SLinus Torvalds static void *listening_get_idx(struct seq_file *seq, loff_t *pos) 19431da177e4SLinus Torvalds { 1944a8b690f9STom Herbert struct tcp_iter_state *st = seq->private; 1945a8b690f9STom Herbert void *rc; 1946a8b690f9STom Herbert 1947a8b690f9STom Herbert st->bucket = 0; 1948a8b690f9STom Herbert st->offset = 0; 1949a8b690f9STom Herbert rc = listening_get_next(seq, NULL); 19501da177e4SLinus Torvalds 19511da177e4SLinus Torvalds while (rc && *pos) { 19521da177e4SLinus Torvalds rc = listening_get_next(seq, rc); 19531da177e4SLinus Torvalds --*pos; 19541da177e4SLinus Torvalds } 19551da177e4SLinus Torvalds return rc; 19561da177e4SLinus Torvalds } 19571da177e4SLinus Torvalds 195805dbc7b5SEric Dumazet static inline bool empty_bucket(const struct tcp_iter_state *st) 19596eac5604SAndi Kleen { 196005dbc7b5SEric Dumazet return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain); 19616eac5604SAndi Kleen } 19626eac5604SAndi Kleen 1963a8b690f9STom Herbert /* 1964a8b690f9STom Herbert * Get first established socket starting from bucket given in st->bucket. 1965a8b690f9STom Herbert * If st->bucket is zero, the very first socket in the hash is returned. 1966a8b690f9STom Herbert */ 19671da177e4SLinus Torvalds static void *established_get_first(struct seq_file *seq) 19681da177e4SLinus Torvalds { 19691da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 1970a4146b1bSDenis V. Lunev struct net *net = seq_file_net(seq); 19711da177e4SLinus Torvalds void *rc = NULL; 19721da177e4SLinus Torvalds 1973a8b690f9STom Herbert st->offset = 0; 1974a8b690f9STom Herbert for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { 19751da177e4SLinus Torvalds struct sock *sk; 19763ab5aee7SEric Dumazet struct hlist_nulls_node *node; 19779db66bdcSEric Dumazet spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); 19781da177e4SLinus Torvalds 19796eac5604SAndi Kleen /* Lockless fast path for the common case of empty buckets */ 19806eac5604SAndi Kleen if (empty_bucket(st)) 19816eac5604SAndi Kleen continue; 19826eac5604SAndi Kleen 19839db66bdcSEric Dumazet spin_lock_bh(lock); 19843ab5aee7SEric Dumazet sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { 1985f40c8174SDaniel Lezcano if (sk->sk_family != st->family || 1986878628fbSYOSHIFUJI Hideaki !net_eq(sock_net(sk), net)) { 19871da177e4SLinus Torvalds continue; 19881da177e4SLinus Torvalds } 19891da177e4SLinus Torvalds rc = sk; 19901da177e4SLinus Torvalds goto out; 19911da177e4SLinus Torvalds } 19929db66bdcSEric Dumazet spin_unlock_bh(lock); 19931da177e4SLinus Torvalds } 19941da177e4SLinus Torvalds out: 19951da177e4SLinus Torvalds return rc; 19961da177e4SLinus Torvalds } 19971da177e4SLinus Torvalds 19981da177e4SLinus Torvalds static void *established_get_next(struct seq_file *seq, void *cur) 19991da177e4SLinus Torvalds { 20001da177e4SLinus Torvalds struct sock *sk = cur; 20013ab5aee7SEric Dumazet struct hlist_nulls_node *node; 20021da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 2003a4146b1bSDenis V. Lunev struct net *net = seq_file_net(seq); 20041da177e4SLinus Torvalds 20051da177e4SLinus Torvalds ++st->num; 2006a8b690f9STom Herbert ++st->offset; 20071da177e4SLinus Torvalds 20083ab5aee7SEric Dumazet sk = sk_nulls_next(sk); 20091da177e4SLinus Torvalds 20103ab5aee7SEric Dumazet sk_nulls_for_each_from(sk, node) { 2011878628fbSYOSHIFUJI Hideaki if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) 201205dbc7b5SEric Dumazet return sk; 20131da177e4SLinus Torvalds } 20141da177e4SLinus Torvalds 201505dbc7b5SEric Dumazet spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); 201605dbc7b5SEric Dumazet ++st->bucket; 201705dbc7b5SEric Dumazet return established_get_first(seq); 20181da177e4SLinus Torvalds } 20191da177e4SLinus Torvalds 20201da177e4SLinus Torvalds static void *established_get_idx(struct seq_file *seq, loff_t pos) 20211da177e4SLinus Torvalds { 2022a8b690f9STom Herbert struct tcp_iter_state *st = seq->private; 2023a8b690f9STom Herbert void *rc; 2024a8b690f9STom Herbert 2025a8b690f9STom Herbert st->bucket = 0; 2026a8b690f9STom Herbert rc = established_get_first(seq); 20271da177e4SLinus Torvalds 20281da177e4SLinus Torvalds while (rc && pos) { 20291da177e4SLinus Torvalds rc = established_get_next(seq, rc); 20301da177e4SLinus Torvalds --pos; 20311da177e4SLinus Torvalds } 20321da177e4SLinus Torvalds return rc; 20331da177e4SLinus Torvalds } 20341da177e4SLinus Torvalds 20351da177e4SLinus Torvalds static void *tcp_get_idx(struct seq_file *seq, loff_t pos) 20361da177e4SLinus Torvalds { 20371da177e4SLinus Torvalds void *rc; 20381da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 20391da177e4SLinus Torvalds 20401da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_LISTENING; 20411da177e4SLinus Torvalds rc = listening_get_idx(seq, &pos); 20421da177e4SLinus Torvalds 20431da177e4SLinus Torvalds if (!rc) { 20441da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_ESTABLISHED; 20451da177e4SLinus Torvalds rc = established_get_idx(seq, pos); 20461da177e4SLinus Torvalds } 20471da177e4SLinus Torvalds 20481da177e4SLinus Torvalds return rc; 20491da177e4SLinus Torvalds } 20501da177e4SLinus Torvalds 2051a8b690f9STom Herbert static void *tcp_seek_last_pos(struct seq_file *seq) 2052a8b690f9STom Herbert { 2053a8b690f9STom Herbert struct tcp_iter_state *st = seq->private; 2054a8b690f9STom Herbert int offset = st->offset; 2055a8b690f9STom Herbert int orig_num = st->num; 2056a8b690f9STom Herbert void *rc = NULL; 2057a8b690f9STom Herbert 2058a8b690f9STom Herbert switch (st->state) { 2059a8b690f9STom Herbert case TCP_SEQ_STATE_LISTENING: 2060a8b690f9STom Herbert if (st->bucket >= INET_LHTABLE_SIZE) 2061a8b690f9STom Herbert break; 2062a8b690f9STom Herbert st->state = TCP_SEQ_STATE_LISTENING; 2063a8b690f9STom Herbert rc = listening_get_next(seq, NULL); 2064a8b690f9STom Herbert while (offset-- && rc) 2065a8b690f9STom Herbert rc = listening_get_next(seq, rc); 2066a8b690f9STom Herbert if (rc) 2067a8b690f9STom Herbert break; 2068a8b690f9STom Herbert st->bucket = 0; 206905dbc7b5SEric Dumazet st->state = TCP_SEQ_STATE_ESTABLISHED; 2070a8b690f9STom Herbert /* Fallthrough */ 2071a8b690f9STom Herbert case TCP_SEQ_STATE_ESTABLISHED: 2072a8b690f9STom Herbert if (st->bucket > tcp_hashinfo.ehash_mask) 2073a8b690f9STom Herbert break; 2074a8b690f9STom Herbert rc = established_get_first(seq); 2075a8b690f9STom Herbert while (offset-- && rc) 2076a8b690f9STom Herbert rc = established_get_next(seq, rc); 2077a8b690f9STom Herbert } 2078a8b690f9STom Herbert 2079a8b690f9STom Herbert st->num = orig_num; 2080a8b690f9STom Herbert 2081a8b690f9STom Herbert return rc; 2082a8b690f9STom Herbert } 2083a8b690f9STom Herbert 20841da177e4SLinus Torvalds static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) 20851da177e4SLinus Torvalds { 20861da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 2087a8b690f9STom Herbert void *rc; 2088a8b690f9STom Herbert 2089a8b690f9STom Herbert if (*pos && *pos == st->last_pos) { 2090a8b690f9STom Herbert rc = tcp_seek_last_pos(seq); 2091a8b690f9STom Herbert if (rc) 2092a8b690f9STom Herbert goto out; 2093a8b690f9STom Herbert } 2094a8b690f9STom Herbert 20951da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_LISTENING; 20961da177e4SLinus Torvalds st->num = 0; 2097a8b690f9STom Herbert st->bucket = 0; 2098a8b690f9STom Herbert st->offset = 0; 2099a8b690f9STom Herbert rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 2100a8b690f9STom Herbert 2101a8b690f9STom Herbert out: 2102a8b690f9STom Herbert st->last_pos = *pos; 2103a8b690f9STom Herbert return rc; 21041da177e4SLinus Torvalds } 21051da177e4SLinus Torvalds 21061da177e4SLinus Torvalds static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) 21071da177e4SLinus Torvalds { 2108a8b690f9STom Herbert struct tcp_iter_state *st = seq->private; 21091da177e4SLinus Torvalds void *rc = NULL; 21101da177e4SLinus Torvalds 21111da177e4SLinus Torvalds if (v == SEQ_START_TOKEN) { 21121da177e4SLinus Torvalds rc = tcp_get_idx(seq, 0); 21131da177e4SLinus Torvalds goto out; 21141da177e4SLinus Torvalds } 21151da177e4SLinus Torvalds 21161da177e4SLinus Torvalds switch (st->state) { 21171da177e4SLinus Torvalds case TCP_SEQ_STATE_LISTENING: 21181da177e4SLinus Torvalds rc = listening_get_next(seq, v); 21191da177e4SLinus Torvalds if (!rc) { 21201da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_ESTABLISHED; 2121a8b690f9STom Herbert st->bucket = 0; 2122a8b690f9STom Herbert st->offset = 0; 21231da177e4SLinus Torvalds rc = established_get_first(seq); 21241da177e4SLinus Torvalds } 21251da177e4SLinus Torvalds break; 21261da177e4SLinus Torvalds case TCP_SEQ_STATE_ESTABLISHED: 21271da177e4SLinus Torvalds rc = established_get_next(seq, v); 21281da177e4SLinus Torvalds break; 21291da177e4SLinus Torvalds } 21301da177e4SLinus Torvalds out: 21311da177e4SLinus Torvalds ++*pos; 2132a8b690f9STom Herbert st->last_pos = *pos; 21331da177e4SLinus Torvalds return rc; 21341da177e4SLinus Torvalds } 21351da177e4SLinus Torvalds 21361da177e4SLinus Torvalds static void tcp_seq_stop(struct seq_file *seq, void *v) 21371da177e4SLinus Torvalds { 21381da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 21391da177e4SLinus Torvalds 21401da177e4SLinus Torvalds switch (st->state) { 21411da177e4SLinus Torvalds case TCP_SEQ_STATE_LISTENING: 21421da177e4SLinus Torvalds if (v != SEQ_START_TOKEN) 21439652dc2eSEric Dumazet spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock); 21441da177e4SLinus Torvalds break; 21451da177e4SLinus Torvalds case TCP_SEQ_STATE_ESTABLISHED: 21461da177e4SLinus Torvalds if (v) 21479db66bdcSEric Dumazet spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); 21481da177e4SLinus Torvalds break; 21491da177e4SLinus Torvalds } 21501da177e4SLinus Torvalds } 21511da177e4SLinus Torvalds 215273cb88ecSArjan van de Ven int tcp_seq_open(struct inode *inode, struct file *file) 21531da177e4SLinus Torvalds { 2154d9dda78bSAl Viro struct tcp_seq_afinfo *afinfo = PDE_DATA(inode); 21551da177e4SLinus Torvalds struct tcp_iter_state *s; 215652d6f3f1SDenis V. Lunev int err; 21571da177e4SLinus Torvalds 215852d6f3f1SDenis V. Lunev err = seq_open_net(inode, file, &afinfo->seq_ops, 215952d6f3f1SDenis V. Lunev sizeof(struct tcp_iter_state)); 216052d6f3f1SDenis V. Lunev if (err < 0) 216152d6f3f1SDenis V. Lunev return err; 2162f40c8174SDaniel Lezcano 216352d6f3f1SDenis V. Lunev s = ((struct seq_file *)file->private_data)->private; 21641da177e4SLinus Torvalds s->family = afinfo->family; 2165a8b690f9STom Herbert s->last_pos = 0; 2166f40c8174SDaniel Lezcano return 0; 2167f40c8174SDaniel Lezcano } 216873cb88ecSArjan van de Ven EXPORT_SYMBOL(tcp_seq_open); 2169f40c8174SDaniel Lezcano 21706f8b13bcSDaniel Lezcano int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo) 21711da177e4SLinus Torvalds { 21721da177e4SLinus Torvalds int rc = 0; 21731da177e4SLinus Torvalds struct proc_dir_entry *p; 21741da177e4SLinus Torvalds 21759427c4b3SDenis V. Lunev afinfo->seq_ops.start = tcp_seq_start; 21769427c4b3SDenis V. Lunev afinfo->seq_ops.next = tcp_seq_next; 21779427c4b3SDenis V. Lunev afinfo->seq_ops.stop = tcp_seq_stop; 21789427c4b3SDenis V. Lunev 217984841c3cSDenis V. Lunev p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net, 218073cb88ecSArjan van de Ven afinfo->seq_fops, afinfo); 218184841c3cSDenis V. Lunev if (!p) 21821da177e4SLinus Torvalds rc = -ENOMEM; 21831da177e4SLinus Torvalds return rc; 21841da177e4SLinus Torvalds } 21854bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_proc_register); 21861da177e4SLinus Torvalds 21876f8b13bcSDaniel Lezcano void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) 21881da177e4SLinus Torvalds { 2189ece31ffdSGao feng remove_proc_entry(afinfo->name, net->proc_net); 21901da177e4SLinus Torvalds } 21914bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_proc_unregister); 21921da177e4SLinus Torvalds 2193d4f06873SEric Dumazet static void get_openreq4(const struct request_sock *req, 2194aa3a0c8cSEric Dumazet struct seq_file *f, int i) 21951da177e4SLinus Torvalds { 21962e6599cbSArnaldo Carvalho de Melo const struct inet_request_sock *ireq = inet_rsk(req); 2197fa76ce73SEric Dumazet long delta = req->rsk_timer.expires - jiffies; 21981da177e4SLinus Torvalds 21995e659e4cSPavel Emelyanov seq_printf(f, "%4d: %08X:%04X %08X:%04X" 2200652586dfSTetsuo Handa " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK", 22011da177e4SLinus Torvalds i, 2202634fb979SEric Dumazet ireq->ir_loc_addr, 2203d4f06873SEric Dumazet ireq->ir_num, 2204634fb979SEric Dumazet ireq->ir_rmt_addr, 2205634fb979SEric Dumazet ntohs(ireq->ir_rmt_port), 22061da177e4SLinus Torvalds TCP_SYN_RECV, 22071da177e4SLinus Torvalds 0, 0, /* could print option size, but that is af dependent. */ 22081da177e4SLinus Torvalds 1, /* timers active (only the expire timer) */ 2209a399a805SEric Dumazet jiffies_delta_to_clock_t(delta), 2210e6c022a4SEric Dumazet req->num_timeout, 2211aa3a0c8cSEric Dumazet from_kuid_munged(seq_user_ns(f), 2212aa3a0c8cSEric Dumazet sock_i_uid(req->rsk_listener)), 22131da177e4SLinus Torvalds 0, /* non standard timer */ 22141da177e4SLinus Torvalds 0, /* open_requests have no inode */ 2215d4f06873SEric Dumazet 0, 2216652586dfSTetsuo Handa req); 22171da177e4SLinus Torvalds } 22181da177e4SLinus Torvalds 2219652586dfSTetsuo Handa static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) 22201da177e4SLinus Torvalds { 22211da177e4SLinus Torvalds int timer_active; 22221da177e4SLinus Torvalds unsigned long timer_expires; 2223cf533ea5SEric Dumazet const struct tcp_sock *tp = tcp_sk(sk); 2224cf4c6bf8SIlpo Järvinen const struct inet_connection_sock *icsk = inet_csk(sk); 2225cf533ea5SEric Dumazet const struct inet_sock *inet = inet_sk(sk); 22260536fcc0SEric Dumazet const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; 2227c720c7e8SEric Dumazet __be32 dest = inet->inet_daddr; 2228c720c7e8SEric Dumazet __be32 src = inet->inet_rcv_saddr; 2229c720c7e8SEric Dumazet __u16 destp = ntohs(inet->inet_dport); 2230c720c7e8SEric Dumazet __u16 srcp = ntohs(inet->inet_sport); 223149d09007SEric Dumazet int rx_queue; 223200fd38d9SEric Dumazet int state; 22331da177e4SLinus Torvalds 22346ba8a3b1SNandita Dukkipati if (icsk->icsk_pending == ICSK_TIME_RETRANS || 223557dde7f7SYuchung Cheng icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || 22366ba8a3b1SNandita Dukkipati icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { 22371da177e4SLinus Torvalds timer_active = 1; 2238463c84b9SArnaldo Carvalho de Melo timer_expires = icsk->icsk_timeout; 2239463c84b9SArnaldo Carvalho de Melo } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 22401da177e4SLinus Torvalds timer_active = 4; 2241463c84b9SArnaldo Carvalho de Melo timer_expires = icsk->icsk_timeout; 2242cf4c6bf8SIlpo Järvinen } else if (timer_pending(&sk->sk_timer)) { 22431da177e4SLinus Torvalds timer_active = 2; 2244cf4c6bf8SIlpo Järvinen timer_expires = sk->sk_timer.expires; 22451da177e4SLinus Torvalds } else { 22461da177e4SLinus Torvalds timer_active = 0; 22471da177e4SLinus Torvalds timer_expires = jiffies; 22481da177e4SLinus Torvalds } 22491da177e4SLinus Torvalds 225000fd38d9SEric Dumazet state = sk_state_load(sk); 225100fd38d9SEric Dumazet if (state == TCP_LISTEN) 225249d09007SEric Dumazet rx_queue = sk->sk_ack_backlog; 225349d09007SEric Dumazet else 225400fd38d9SEric Dumazet /* Because we don't lock the socket, 225500fd38d9SEric Dumazet * we might find a transient negative value. 225649d09007SEric Dumazet */ 225749d09007SEric Dumazet rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); 225849d09007SEric Dumazet 22595e659e4cSPavel Emelyanov seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " 2260652586dfSTetsuo Handa "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d", 226100fd38d9SEric Dumazet i, src, srcp, dest, destp, state, 226247da8ee6SSridhar Samudrala tp->write_seq - tp->snd_una, 226349d09007SEric Dumazet rx_queue, 22641da177e4SLinus Torvalds timer_active, 2265a399a805SEric Dumazet jiffies_delta_to_clock_t(timer_expires - jiffies), 2266463c84b9SArnaldo Carvalho de Melo icsk->icsk_retransmits, 2267a7cb5a49SEric W. Biederman from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)), 22686687e988SArnaldo Carvalho de Melo icsk->icsk_probes_out, 2269cf4c6bf8SIlpo Järvinen sock_i_ino(sk), 227041c6d650SReshetova, Elena refcount_read(&sk->sk_refcnt), sk, 22717be87351SStephen Hemminger jiffies_to_clock_t(icsk->icsk_rto), 22727be87351SStephen Hemminger jiffies_to_clock_t(icsk->icsk_ack.ato), 2273463c84b9SArnaldo Carvalho de Melo (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, 22741da177e4SLinus Torvalds tp->snd_cwnd, 227500fd38d9SEric Dumazet state == TCP_LISTEN ? 227600fd38d9SEric Dumazet fastopenq->max_qlen : 2277652586dfSTetsuo Handa (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)); 22781da177e4SLinus Torvalds } 22791da177e4SLinus Torvalds 2280cf533ea5SEric Dumazet static void get_timewait4_sock(const struct inet_timewait_sock *tw, 2281652586dfSTetsuo Handa struct seq_file *f, int i) 22821da177e4SLinus Torvalds { 2283789f558cSEric Dumazet long delta = tw->tw_timer.expires - jiffies; 228423f33c2dSAl Viro __be32 dest, src; 22851da177e4SLinus Torvalds __u16 destp, srcp; 22861da177e4SLinus Torvalds 22871da177e4SLinus Torvalds dest = tw->tw_daddr; 22881da177e4SLinus Torvalds src = tw->tw_rcv_saddr; 22891da177e4SLinus Torvalds destp = ntohs(tw->tw_dport); 22901da177e4SLinus Torvalds srcp = ntohs(tw->tw_sport); 22911da177e4SLinus Torvalds 22925e659e4cSPavel Emelyanov seq_printf(f, "%4d: %08X:%04X %08X:%04X" 2293652586dfSTetsuo Handa " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK", 22941da177e4SLinus Torvalds i, src, srcp, dest, destp, tw->tw_substate, 0, 0, 2295a399a805SEric Dumazet 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, 229641c6d650SReshetova, Elena refcount_read(&tw->tw_refcnt), tw); 22971da177e4SLinus Torvalds } 22981da177e4SLinus Torvalds 22991da177e4SLinus Torvalds #define TMPSZ 150 23001da177e4SLinus Torvalds 23011da177e4SLinus Torvalds static int tcp4_seq_show(struct seq_file *seq, void *v) 23021da177e4SLinus Torvalds { 23031da177e4SLinus Torvalds struct tcp_iter_state *st; 230405dbc7b5SEric Dumazet struct sock *sk = v; 23051da177e4SLinus Torvalds 2306652586dfSTetsuo Handa seq_setwidth(seq, TMPSZ - 1); 23071da177e4SLinus Torvalds if (v == SEQ_START_TOKEN) { 2308652586dfSTetsuo Handa seq_puts(seq, " sl local_address rem_address st tx_queue " 23091da177e4SLinus Torvalds "rx_queue tr tm->when retrnsmt uid timeout " 23101da177e4SLinus Torvalds "inode"); 23111da177e4SLinus Torvalds goto out; 23121da177e4SLinus Torvalds } 23131da177e4SLinus Torvalds st = seq->private; 23141da177e4SLinus Torvalds 231505dbc7b5SEric Dumazet if (sk->sk_state == TCP_TIME_WAIT) 2316652586dfSTetsuo Handa get_timewait4_sock(v, seq, st->num); 2317079096f1SEric Dumazet else if (sk->sk_state == TCP_NEW_SYN_RECV) 2318079096f1SEric Dumazet get_openreq4(v, seq, st->num); 231905dbc7b5SEric Dumazet else 2320652586dfSTetsuo Handa get_tcp4_sock(v, seq, st->num); 23211da177e4SLinus Torvalds out: 2322652586dfSTetsuo Handa seq_pad(seq, '\n'); 23231da177e4SLinus Torvalds return 0; 23241da177e4SLinus Torvalds } 23251da177e4SLinus Torvalds 232673cb88ecSArjan van de Ven static const struct file_operations tcp_afinfo_seq_fops = { 232773cb88ecSArjan van de Ven .owner = THIS_MODULE, 232873cb88ecSArjan van de Ven .open = tcp_seq_open, 232973cb88ecSArjan van de Ven .read = seq_read, 233073cb88ecSArjan van de Ven .llseek = seq_lseek, 233173cb88ecSArjan van de Ven .release = seq_release_net 233273cb88ecSArjan van de Ven }; 233373cb88ecSArjan van de Ven 23341da177e4SLinus Torvalds static struct tcp_seq_afinfo tcp4_seq_afinfo = { 23351da177e4SLinus Torvalds .name = "tcp", 23361da177e4SLinus Torvalds .family = AF_INET, 233773cb88ecSArjan van de Ven .seq_fops = &tcp_afinfo_seq_fops, 23389427c4b3SDenis V. Lunev .seq_ops = { 23399427c4b3SDenis V. Lunev .show = tcp4_seq_show, 23409427c4b3SDenis V. Lunev }, 23411da177e4SLinus Torvalds }; 23421da177e4SLinus Torvalds 23432c8c1e72SAlexey Dobriyan static int __net_init tcp4_proc_init_net(struct net *net) 2344757764f6SPavel Emelyanov { 2345757764f6SPavel Emelyanov return tcp_proc_register(net, &tcp4_seq_afinfo); 2346757764f6SPavel Emelyanov } 2347757764f6SPavel Emelyanov 23482c8c1e72SAlexey Dobriyan static void __net_exit tcp4_proc_exit_net(struct net *net) 2349757764f6SPavel Emelyanov { 2350757764f6SPavel Emelyanov tcp_proc_unregister(net, &tcp4_seq_afinfo); 2351757764f6SPavel Emelyanov } 2352757764f6SPavel Emelyanov 2353757764f6SPavel Emelyanov static struct pernet_operations tcp4_net_ops = { 2354757764f6SPavel Emelyanov .init = tcp4_proc_init_net, 2355757764f6SPavel Emelyanov .exit = tcp4_proc_exit_net, 2356757764f6SPavel Emelyanov }; 2357757764f6SPavel Emelyanov 23581da177e4SLinus Torvalds int __init tcp4_proc_init(void) 23591da177e4SLinus Torvalds { 2360757764f6SPavel Emelyanov return register_pernet_subsys(&tcp4_net_ops); 23611da177e4SLinus Torvalds } 23621da177e4SLinus Torvalds 23631da177e4SLinus Torvalds void tcp4_proc_exit(void) 23641da177e4SLinus Torvalds { 2365757764f6SPavel Emelyanov unregister_pernet_subsys(&tcp4_net_ops); 23661da177e4SLinus Torvalds } 23671da177e4SLinus Torvalds #endif /* CONFIG_PROC_FS */ 23681da177e4SLinus Torvalds 23691da177e4SLinus Torvalds struct proto tcp_prot = { 23701da177e4SLinus Torvalds .name = "TCP", 23711da177e4SLinus Torvalds .owner = THIS_MODULE, 23721da177e4SLinus Torvalds .close = tcp_close, 23731da177e4SLinus Torvalds .connect = tcp_v4_connect, 23741da177e4SLinus Torvalds .disconnect = tcp_disconnect, 2375463c84b9SArnaldo Carvalho de Melo .accept = inet_csk_accept, 23761da177e4SLinus Torvalds .ioctl = tcp_ioctl, 23771da177e4SLinus Torvalds .init = tcp_v4_init_sock, 23781da177e4SLinus Torvalds .destroy = tcp_v4_destroy_sock, 23791da177e4SLinus Torvalds .shutdown = tcp_shutdown, 23801da177e4SLinus Torvalds .setsockopt = tcp_setsockopt, 23811da177e4SLinus Torvalds .getsockopt = tcp_getsockopt, 23824b9d07a4SUrsula Braun .keepalive = tcp_set_keepalive, 23831da177e4SLinus Torvalds .recvmsg = tcp_recvmsg, 23847ba42910SChangli Gao .sendmsg = tcp_sendmsg, 23857ba42910SChangli Gao .sendpage = tcp_sendpage, 23861da177e4SLinus Torvalds .backlog_rcv = tcp_v4_do_rcv, 238746d3ceabSEric Dumazet .release_cb = tcp_release_cb, 2388ab1e0a13SArnaldo Carvalho de Melo .hash = inet_hash, 2389ab1e0a13SArnaldo Carvalho de Melo .unhash = inet_unhash, 2390ab1e0a13SArnaldo Carvalho de Melo .get_port = inet_csk_get_port, 23911da177e4SLinus Torvalds .enter_memory_pressure = tcp_enter_memory_pressure, 239206044751SEric Dumazet .leave_memory_pressure = tcp_leave_memory_pressure, 2393c9bee3b7SEric Dumazet .stream_memory_free = tcp_stream_memory_free, 23941da177e4SLinus Torvalds .sockets_allocated = &tcp_sockets_allocated, 23950a5578cfSArnaldo Carvalho de Melo .orphan_count = &tcp_orphan_count, 23961da177e4SLinus Torvalds .memory_allocated = &tcp_memory_allocated, 23971da177e4SLinus Torvalds .memory_pressure = &tcp_memory_pressure, 2398a4fe34bfSEric W. Biederman .sysctl_mem = sysctl_tcp_mem, 23991da177e4SLinus Torvalds .sysctl_wmem = sysctl_tcp_wmem, 24001da177e4SLinus Torvalds .sysctl_rmem = sysctl_tcp_rmem, 24011da177e4SLinus Torvalds .max_header = MAX_TCP_HEADER, 24021da177e4SLinus Torvalds .obj_size = sizeof(struct tcp_sock), 24035f0d5a3aSPaul E. McKenney .slab_flags = SLAB_TYPESAFE_BY_RCU, 24046d6ee43eSArnaldo Carvalho de Melo .twsk_prot = &tcp_timewait_sock_ops, 240560236fddSArnaldo Carvalho de Melo .rsk_prot = &tcp_request_sock_ops, 240639d8cda7SPavel Emelyanov .h.hashinfo = &tcp_hashinfo, 24077ba42910SChangli Gao .no_autobind = true, 2408543d9cfeSArnaldo Carvalho de Melo #ifdef CONFIG_COMPAT 2409543d9cfeSArnaldo Carvalho de Melo .compat_setsockopt = compat_tcp_setsockopt, 2410543d9cfeSArnaldo Carvalho de Melo .compat_getsockopt = compat_tcp_getsockopt, 2411543d9cfeSArnaldo Carvalho de Melo #endif 2412c1e64e29SLorenzo Colitti .diag_destroy = tcp_abort, 24131da177e4SLinus Torvalds }; 24144bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_prot); 24151da177e4SLinus Torvalds 2416046ee902SDenis V. Lunev static void __net_exit tcp_sk_exit(struct net *net) 2417046ee902SDenis V. Lunev { 2418bdbbb852SEric Dumazet int cpu; 2419bdbbb852SEric Dumazet 2420bdbbb852SEric Dumazet for_each_possible_cpu(cpu) 2421bdbbb852SEric Dumazet inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu)); 2422bdbbb852SEric Dumazet free_percpu(net->ipv4.tcp_sk); 2423bdbbb852SEric Dumazet } 2424bdbbb852SEric Dumazet 2425bdbbb852SEric Dumazet static int __net_init tcp_sk_init(struct net *net) 2426bdbbb852SEric Dumazet { 2427fee83d09SHaishuang Yan int res, cpu, cnt; 2428bdbbb852SEric Dumazet 2429bdbbb852SEric Dumazet net->ipv4.tcp_sk = alloc_percpu(struct sock *); 2430bdbbb852SEric Dumazet if (!net->ipv4.tcp_sk) 2431bdbbb852SEric Dumazet return -ENOMEM; 2432bdbbb852SEric Dumazet 2433bdbbb852SEric Dumazet for_each_possible_cpu(cpu) { 2434bdbbb852SEric Dumazet struct sock *sk; 2435bdbbb852SEric Dumazet 2436bdbbb852SEric Dumazet res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW, 2437bdbbb852SEric Dumazet IPPROTO_TCP, net); 2438bdbbb852SEric Dumazet if (res) 2439bdbbb852SEric Dumazet goto fail; 2440a9d6532bSEric Dumazet sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); 2441bdbbb852SEric Dumazet *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk; 2442bdbbb852SEric Dumazet } 244349213555SDaniel Borkmann 2444bdbbb852SEric Dumazet net->ipv4.sysctl_tcp_ecn = 2; 244549213555SDaniel Borkmann net->ipv4.sysctl_tcp_ecn_fallback = 1; 244649213555SDaniel Borkmann 2447b0f9ca53SFan Du net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS; 24486b58e0a5SFan Du net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD; 244905cbc0dbSFan Du net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL; 2450bdbbb852SEric Dumazet 245113b287e8SNikolay Borisov net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME; 24529bd6861bSNikolay Borisov net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES; 2453b840d15dSNikolay Borisov net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL; 245413b287e8SNikolay Borisov 24556fa25166SNikolay Borisov net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES; 24567c083ecbSNikolay Borisov net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES; 24570aca737dSDavid S. Miller net->ipv4.sysctl_tcp_syncookies = 1; 24581043e25fSNikolay Borisov net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH; 2459ae5c3f40SNikolay Borisov net->ipv4.sysctl_tcp_retries1 = TCP_RETR1; 2460c6214a97SNikolay Borisov net->ipv4.sysctl_tcp_retries2 = TCP_RETR2; 2461c402d9beSNikolay Borisov net->ipv4.sysctl_tcp_orphan_retries = 0; 24621e579caaSNikolay Borisov net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; 24634979f2d9SNikolay Borisov net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX; 246456ab6b93SHaishuang Yan net->ipv4.sysctl_tcp_tw_reuse = 0; 246512ed8244SNikolay Borisov 2466fee83d09SHaishuang Yan cnt = tcp_hashinfo.ehash_mask + 1; 2467fee83d09SHaishuang Yan net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (cnt + 1) / 2; 24681946e672SHaishuang Yan net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo; 24691946e672SHaishuang Yan 2470fee83d09SHaishuang Yan net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 256); 2471f9301034SEric Dumazet net->ipv4.sysctl_tcp_sack = 1; 24729bb37ef0SEric Dumazet net->ipv4.sysctl_tcp_window_scaling = 1; 24735d2ed052SEric Dumazet net->ipv4.sysctl_tcp_timestamps = 1; 2474fee83d09SHaishuang Yan 247549213555SDaniel Borkmann return 0; 2476bdbbb852SEric Dumazet fail: 2477bdbbb852SEric Dumazet tcp_sk_exit(net); 2478bdbbb852SEric Dumazet 2479bdbbb852SEric Dumazet return res; 2480b099ce26SEric W. Biederman } 2481b099ce26SEric W. Biederman 2482b099ce26SEric W. Biederman static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) 2483b099ce26SEric W. Biederman { 24841946e672SHaishuang Yan inet_twsk_purge(&tcp_hashinfo, AF_INET); 2485046ee902SDenis V. Lunev } 2486046ee902SDenis V. Lunev 2487046ee902SDenis V. Lunev static struct pernet_operations __net_initdata tcp_sk_ops = { 2488046ee902SDenis V. Lunev .init = tcp_sk_init, 2489046ee902SDenis V. Lunev .exit = tcp_sk_exit, 2490b099ce26SEric W. Biederman .exit_batch = tcp_sk_exit_batch, 2491046ee902SDenis V. Lunev }; 2492046ee902SDenis V. Lunev 24939b0f976fSDenis V. Lunev void __init tcp_v4_init(void) 24941da177e4SLinus Torvalds { 24956a1b3054SEric W. Biederman if (register_pernet_subsys(&tcp_sk_ops)) 24961da177e4SLinus Torvalds panic("Failed to create the TCP control socket.\n"); 24971da177e4SLinus Torvalds } 2498