11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * INET An implementation of the TCP/IP protocol suite for the LINUX 31da177e4SLinus Torvalds * operating system. INET is implemented using the BSD Socket 41da177e4SLinus Torvalds * interface as the means of communication with the user level. 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * Implementation of the Transmission Control Protocol(TCP). 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * IPv4 specific functions 91da177e4SLinus Torvalds * 101da177e4SLinus Torvalds * 111da177e4SLinus Torvalds * code split from: 121da177e4SLinus Torvalds * linux/ipv4/tcp.c 131da177e4SLinus Torvalds * linux/ipv4/tcp_input.c 141da177e4SLinus Torvalds * linux/ipv4/tcp_output.c 151da177e4SLinus Torvalds * 161da177e4SLinus Torvalds * See tcp.c for author information 171da177e4SLinus Torvalds * 181da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 191da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 201da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 211da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 221da177e4SLinus Torvalds */ 231da177e4SLinus Torvalds 241da177e4SLinus Torvalds /* 251da177e4SLinus Torvalds * Changes: 261da177e4SLinus Torvalds * David S. Miller : New socket lookup architecture. 271da177e4SLinus Torvalds * This code is dedicated to John Dyson. 281da177e4SLinus Torvalds * David S. Miller : Change semantics of established hash, 291da177e4SLinus Torvalds * half is devoted to TIME_WAIT sockets 301da177e4SLinus Torvalds * and the rest go in the other half. 311da177e4SLinus Torvalds * Andi Kleen : Add support for syncookies and fixed 321da177e4SLinus Torvalds * some bugs: ip options weren't passed to 331da177e4SLinus Torvalds * the TCP layer, missed a check for an 341da177e4SLinus Torvalds * ACK bit. 351da177e4SLinus Torvalds * Andi Kleen : Implemented fast path mtu discovery. 361da177e4SLinus Torvalds * Fixed many serious bugs in the 3760236fddSArnaldo Carvalho de Melo * request_sock handling and moved 381da177e4SLinus Torvalds * most of it into the af independent code. 391da177e4SLinus Torvalds * Added tail drop and some other bugfixes. 40caa20d9aSStephen Hemminger * Added new listen semantics. 411da177e4SLinus Torvalds * Mike McLagan : Routing by source 421da177e4SLinus Torvalds * Juan Jose Ciarlante: ip_dynaddr bits 431da177e4SLinus Torvalds * Andi Kleen: various fixes. 441da177e4SLinus Torvalds * Vitaly E. Lavrov : Transparent proxy revived after year 451da177e4SLinus Torvalds * coma. 461da177e4SLinus Torvalds * Andi Kleen : Fix new listen. 471da177e4SLinus Torvalds * Andi Kleen : Fix accept error reporting. 481da177e4SLinus Torvalds * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 491da177e4SLinus Torvalds * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 501da177e4SLinus Torvalds * a single port at the same time. 511da177e4SLinus Torvalds */ 521da177e4SLinus Torvalds 53afd46503SJoe Perches #define pr_fmt(fmt) "TCP: " fmt 541da177e4SLinus Torvalds 55eb4dea58SHerbert Xu #include <linux/bottom_half.h> 561da177e4SLinus Torvalds #include <linux/types.h> 571da177e4SLinus Torvalds #include <linux/fcntl.h> 581da177e4SLinus Torvalds #include <linux/module.h> 591da177e4SLinus Torvalds #include <linux/random.h> 601da177e4SLinus Torvalds #include <linux/cache.h> 611da177e4SLinus Torvalds #include <linux/jhash.h> 621da177e4SLinus Torvalds #include <linux/init.h> 631da177e4SLinus Torvalds #include <linux/times.h> 645a0e3ad6STejun Heo #include <linux/slab.h> 651da177e4SLinus Torvalds 66457c4cbcSEric W. Biederman #include <net/net_namespace.h> 671da177e4SLinus Torvalds #include <net/icmp.h> 68304a1618SArnaldo Carvalho de Melo #include <net/inet_hashtables.h> 691da177e4SLinus Torvalds #include <net/tcp.h> 7020380731SArnaldo Carvalho de Melo #include <net/transp_v6.h> 711da177e4SLinus Torvalds #include <net/ipv6.h> 721da177e4SLinus Torvalds #include <net/inet_common.h> 736d6ee43eSArnaldo Carvalho de Melo #include <net/timewait_sock.h> 741da177e4SLinus Torvalds #include <net/xfrm.h> 756e5714eaSDavid S. Miller #include <net/secure_seq.h> 76d1a4c0b3SGlauber Costa #include <net/tcp_memcontrol.h> 77076bb0c8SEliezer Tamir #include <net/busy_poll.h> 781da177e4SLinus Torvalds 791da177e4SLinus Torvalds #include <linux/inet.h> 801da177e4SLinus Torvalds #include <linux/ipv6.h> 811da177e4SLinus Torvalds #include <linux/stddef.h> 821da177e4SLinus Torvalds #include <linux/proc_fs.h> 831da177e4SLinus Torvalds #include <linux/seq_file.h> 841da177e4SLinus Torvalds 85cfb6eeb4SYOSHIFUJI Hideaki #include <linux/crypto.h> 86cfb6eeb4SYOSHIFUJI Hideaki #include <linux/scatterlist.h> 87cfb6eeb4SYOSHIFUJI Hideaki 88ab32ea5dSBrian Haley int sysctl_tcp_tw_reuse __read_mostly; 89ab32ea5dSBrian Haley int sysctl_tcp_low_latency __read_mostly; 904bc2f18bSEric Dumazet EXPORT_SYMBOL(sysctl_tcp_low_latency); 911da177e4SLinus Torvalds 92cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 93a915da9bSEric Dumazet static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 94318cf7aaSEric Dumazet __be32 daddr, __be32 saddr, const struct tcphdr *th); 95cfb6eeb4SYOSHIFUJI Hideaki #endif 96cfb6eeb4SYOSHIFUJI Hideaki 975caea4eaSEric Dumazet struct inet_hashinfo tcp_hashinfo; 984bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_hashinfo); 991da177e4SLinus Torvalds 100936b8bdbSOctavian Purdila static __u32 tcp_v4_init_sequence(const struct sk_buff *skb) 1011da177e4SLinus Torvalds { 102eddc9ec5SArnaldo Carvalho de Melo return secure_tcp_sequence_number(ip_hdr(skb)->daddr, 103eddc9ec5SArnaldo Carvalho de Melo ip_hdr(skb)->saddr, 104aa8223c7SArnaldo Carvalho de Melo tcp_hdr(skb)->dest, 105aa8223c7SArnaldo Carvalho de Melo tcp_hdr(skb)->source); 1061da177e4SLinus Torvalds } 1071da177e4SLinus Torvalds 1086d6ee43eSArnaldo Carvalho de Melo int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) 1096d6ee43eSArnaldo Carvalho de Melo { 1106d6ee43eSArnaldo Carvalho de Melo const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw); 1116d6ee43eSArnaldo Carvalho de Melo struct tcp_sock *tp = tcp_sk(sk); 1126d6ee43eSArnaldo Carvalho de Melo 1136d6ee43eSArnaldo Carvalho de Melo /* With PAWS, it is safe from the viewpoint 1146d6ee43eSArnaldo Carvalho de Melo of data integrity. Even without PAWS it is safe provided sequence 1156d6ee43eSArnaldo Carvalho de Melo spaces do not overlap i.e. at data rates <= 80Mbit/sec. 1166d6ee43eSArnaldo Carvalho de Melo 1176d6ee43eSArnaldo Carvalho de Melo Actually, the idea is close to VJ's one, only timestamp cache is 1186d6ee43eSArnaldo Carvalho de Melo held not per host, but per port pair and TW bucket is used as state 1196d6ee43eSArnaldo Carvalho de Melo holder. 1206d6ee43eSArnaldo Carvalho de Melo 1216d6ee43eSArnaldo Carvalho de Melo If TW bucket has been already destroyed we fall back to VJ's scheme 1226d6ee43eSArnaldo Carvalho de Melo and use initial timestamp retrieved from peer table. 1236d6ee43eSArnaldo Carvalho de Melo */ 1246d6ee43eSArnaldo Carvalho de Melo if (tcptw->tw_ts_recent_stamp && 12551456b29SIan Morris (!twp || (sysctl_tcp_tw_reuse && 1269d729f72SJames Morris get_seconds() - tcptw->tw_ts_recent_stamp > 1))) { 1276d6ee43eSArnaldo Carvalho de Melo tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; 1286d6ee43eSArnaldo Carvalho de Melo if (tp->write_seq == 0) 1296d6ee43eSArnaldo Carvalho de Melo tp->write_seq = 1; 1306d6ee43eSArnaldo Carvalho de Melo tp->rx_opt.ts_recent = tcptw->tw_ts_recent; 1316d6ee43eSArnaldo Carvalho de Melo tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; 1326d6ee43eSArnaldo Carvalho de Melo sock_hold(sktw); 1336d6ee43eSArnaldo Carvalho de Melo return 1; 1346d6ee43eSArnaldo Carvalho de Melo } 1356d6ee43eSArnaldo Carvalho de Melo 1366d6ee43eSArnaldo Carvalho de Melo return 0; 1376d6ee43eSArnaldo Carvalho de Melo } 1386d6ee43eSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(tcp_twsk_unique); 1396d6ee43eSArnaldo Carvalho de Melo 1401da177e4SLinus Torvalds /* This will initiate an outgoing connection. */ 1411da177e4SLinus Torvalds int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 1421da177e4SLinus Torvalds { 1432d7192d6SDavid S. Miller struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; 1441da177e4SLinus Torvalds struct inet_sock *inet = inet_sk(sk); 1451da177e4SLinus Torvalds struct tcp_sock *tp = tcp_sk(sk); 146dca8b089SDavid S. Miller __be16 orig_sport, orig_dport; 147bada8adcSAl Viro __be32 daddr, nexthop; 148da905bd1SDavid S. Miller struct flowi4 *fl4; 1492d7192d6SDavid S. Miller struct rtable *rt; 1501da177e4SLinus Torvalds int err; 151f6d8bd05SEric Dumazet struct ip_options_rcu *inet_opt; 1521da177e4SLinus Torvalds 1531da177e4SLinus Torvalds if (addr_len < sizeof(struct sockaddr_in)) 1541da177e4SLinus Torvalds return -EINVAL; 1551da177e4SLinus Torvalds 1561da177e4SLinus Torvalds if (usin->sin_family != AF_INET) 1571da177e4SLinus Torvalds return -EAFNOSUPPORT; 1581da177e4SLinus Torvalds 1591da177e4SLinus Torvalds nexthop = daddr = usin->sin_addr.s_addr; 160f6d8bd05SEric Dumazet inet_opt = rcu_dereference_protected(inet->inet_opt, 161f6d8bd05SEric Dumazet sock_owned_by_user(sk)); 162f6d8bd05SEric Dumazet if (inet_opt && inet_opt->opt.srr) { 1631da177e4SLinus Torvalds if (!daddr) 1641da177e4SLinus Torvalds return -EINVAL; 165f6d8bd05SEric Dumazet nexthop = inet_opt->opt.faddr; 1661da177e4SLinus Torvalds } 1671da177e4SLinus Torvalds 168dca8b089SDavid S. Miller orig_sport = inet->inet_sport; 169dca8b089SDavid S. Miller orig_dport = usin->sin_port; 170da905bd1SDavid S. Miller fl4 = &inet->cork.fl.u.ip4; 171da905bd1SDavid S. Miller rt = ip_route_connect(fl4, nexthop, inet->inet_saddr, 1721da177e4SLinus Torvalds RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, 1731da177e4SLinus Torvalds IPPROTO_TCP, 1740e0d44abSSteffen Klassert orig_sport, orig_dport, sk); 175b23dd4feSDavid S. Miller if (IS_ERR(rt)) { 176b23dd4feSDavid S. Miller err = PTR_ERR(rt); 177b23dd4feSDavid S. Miller if (err == -ENETUNREACH) 178f1d8cba6SEric Dumazet IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); 179b23dd4feSDavid S. Miller return err; 180584bdf8cSWei Dong } 1811da177e4SLinus Torvalds 1821da177e4SLinus Torvalds if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { 1831da177e4SLinus Torvalds ip_rt_put(rt); 1841da177e4SLinus Torvalds return -ENETUNREACH; 1851da177e4SLinus Torvalds } 1861da177e4SLinus Torvalds 187f6d8bd05SEric Dumazet if (!inet_opt || !inet_opt->opt.srr) 188da905bd1SDavid S. Miller daddr = fl4->daddr; 1891da177e4SLinus Torvalds 190c720c7e8SEric Dumazet if (!inet->inet_saddr) 191da905bd1SDavid S. Miller inet->inet_saddr = fl4->saddr; 192d1e559d0SEric Dumazet sk_rcv_saddr_set(sk, inet->inet_saddr); 1931da177e4SLinus Torvalds 194c720c7e8SEric Dumazet if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) { 1951da177e4SLinus Torvalds /* Reset inherited state */ 1961da177e4SLinus Torvalds tp->rx_opt.ts_recent = 0; 1971da177e4SLinus Torvalds tp->rx_opt.ts_recent_stamp = 0; 198ee995283SPavel Emelyanov if (likely(!tp->repair)) 1991da177e4SLinus Torvalds tp->write_seq = 0; 2001da177e4SLinus Torvalds } 2011da177e4SLinus Torvalds 202295ff7edSArnaldo Carvalho de Melo if (tcp_death_row.sysctl_tw_recycle && 20381166dd6SDavid S. Miller !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) 20481166dd6SDavid S. Miller tcp_fetch_timewait_stamp(sk, &rt->dst); 2051da177e4SLinus Torvalds 206c720c7e8SEric Dumazet inet->inet_dport = usin->sin_port; 207d1e559d0SEric Dumazet sk_daddr_set(sk, daddr); 2081da177e4SLinus Torvalds 209d83d8461SArnaldo Carvalho de Melo inet_csk(sk)->icsk_ext_hdr_len = 0; 210f6d8bd05SEric Dumazet if (inet_opt) 211f6d8bd05SEric Dumazet inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; 2121da177e4SLinus Torvalds 213bee7ca9eSWilliam Allen Simpson tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; 2141da177e4SLinus Torvalds 2151da177e4SLinus Torvalds /* Socket identity is still unknown (sport may be zero). 2161da177e4SLinus Torvalds * However we set state to SYN-SENT and not releasing socket 2171da177e4SLinus Torvalds * lock select source port, enter ourselves into the hash tables and 2181da177e4SLinus Torvalds * complete initialization after this. 2191da177e4SLinus Torvalds */ 2201da177e4SLinus Torvalds tcp_set_state(sk, TCP_SYN_SENT); 221a7f5e7f1SArnaldo Carvalho de Melo err = inet_hash_connect(&tcp_death_row, sk); 2221da177e4SLinus Torvalds if (err) 2231da177e4SLinus Torvalds goto failure; 2241da177e4SLinus Torvalds 225877d1f62STom Herbert sk_set_txhash(sk); 2269e7ceb06SSathya Perla 227da905bd1SDavid S. Miller rt = ip_route_newports(fl4, rt, orig_sport, orig_dport, 228c720c7e8SEric Dumazet inet->inet_sport, inet->inet_dport, sk); 229b23dd4feSDavid S. Miller if (IS_ERR(rt)) { 230b23dd4feSDavid S. Miller err = PTR_ERR(rt); 231b23dd4feSDavid S. Miller rt = NULL; 2321da177e4SLinus Torvalds goto failure; 233b23dd4feSDavid S. Miller } 2341da177e4SLinus Torvalds /* OK, now commit destination to socket. */ 235bcd76111SHerbert Xu sk->sk_gso_type = SKB_GSO_TCPV4; 236d8d1f30bSChangli Gao sk_setup_caps(sk, &rt->dst); 2371da177e4SLinus Torvalds 238ee995283SPavel Emelyanov if (!tp->write_seq && likely(!tp->repair)) 239c720c7e8SEric Dumazet tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, 240c720c7e8SEric Dumazet inet->inet_daddr, 241c720c7e8SEric Dumazet inet->inet_sport, 2421da177e4SLinus Torvalds usin->sin_port); 2431da177e4SLinus Torvalds 244c720c7e8SEric Dumazet inet->inet_id = tp->write_seq ^ jiffies; 2451da177e4SLinus Torvalds 2461da177e4SLinus Torvalds err = tcp_connect(sk); 247ee995283SPavel Emelyanov 2481da177e4SLinus Torvalds rt = NULL; 2491da177e4SLinus Torvalds if (err) 2501da177e4SLinus Torvalds goto failure; 2511da177e4SLinus Torvalds 2521da177e4SLinus Torvalds return 0; 2531da177e4SLinus Torvalds 2541da177e4SLinus Torvalds failure: 2557174259eSArnaldo Carvalho de Melo /* 2567174259eSArnaldo Carvalho de Melo * This unhashes the socket and releases the local port, 2577174259eSArnaldo Carvalho de Melo * if necessary. 2587174259eSArnaldo Carvalho de Melo */ 2591da177e4SLinus Torvalds tcp_set_state(sk, TCP_CLOSE); 2601da177e4SLinus Torvalds ip_rt_put(rt); 2611da177e4SLinus Torvalds sk->sk_route_caps = 0; 262c720c7e8SEric Dumazet inet->inet_dport = 0; 2631da177e4SLinus Torvalds return err; 2641da177e4SLinus Torvalds } 2654bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_connect); 2661da177e4SLinus Torvalds 2671da177e4SLinus Torvalds /* 268563d34d0SEric Dumazet * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191. 269563d34d0SEric Dumazet * It can be called through tcp_release_cb() if socket was owned by user 270563d34d0SEric Dumazet * at the time tcp_v4_err() was called to handle ICMP message. 2711da177e4SLinus Torvalds */ 2724fab9071SNeal Cardwell void tcp_v4_mtu_reduced(struct sock *sk) 2731da177e4SLinus Torvalds { 2741da177e4SLinus Torvalds struct dst_entry *dst; 2751da177e4SLinus Torvalds struct inet_sock *inet = inet_sk(sk); 276563d34d0SEric Dumazet u32 mtu = tcp_sk(sk)->mtu_info; 2771da177e4SLinus Torvalds 27880d0a69fSDavid S. Miller dst = inet_csk_update_pmtu(sk, mtu); 27980d0a69fSDavid S. Miller if (!dst) 2801da177e4SLinus Torvalds return; 2811da177e4SLinus Torvalds 2821da177e4SLinus Torvalds /* Something is about to be wrong... Remember soft error 2831da177e4SLinus Torvalds * for the case, if this connection will not able to recover. 2841da177e4SLinus Torvalds */ 2851da177e4SLinus Torvalds if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) 2861da177e4SLinus Torvalds sk->sk_err_soft = EMSGSIZE; 2871da177e4SLinus Torvalds 2881da177e4SLinus Torvalds mtu = dst_mtu(dst); 2891da177e4SLinus Torvalds 2901da177e4SLinus Torvalds if (inet->pmtudisc != IP_PMTUDISC_DONT && 291482fc609SHannes Frederic Sowa ip_sk_accept_pmtu(sk) && 292d83d8461SArnaldo Carvalho de Melo inet_csk(sk)->icsk_pmtu_cookie > mtu) { 2931da177e4SLinus Torvalds tcp_sync_mss(sk, mtu); 2941da177e4SLinus Torvalds 2951da177e4SLinus Torvalds /* Resend the TCP packet because it's 2961da177e4SLinus Torvalds * clear that the old packet has been 2971da177e4SLinus Torvalds * dropped. This is the new "fast" path mtu 2981da177e4SLinus Torvalds * discovery. 2991da177e4SLinus Torvalds */ 3001da177e4SLinus Torvalds tcp_simple_retransmit(sk); 3011da177e4SLinus Torvalds } /* else let the usual retransmit timer handle it */ 3021da177e4SLinus Torvalds } 3034fab9071SNeal Cardwell EXPORT_SYMBOL(tcp_v4_mtu_reduced); 3041da177e4SLinus Torvalds 30555be7a9cSDavid S. Miller static void do_redirect(struct sk_buff *skb, struct sock *sk) 30655be7a9cSDavid S. Miller { 30755be7a9cSDavid S. Miller struct dst_entry *dst = __sk_dst_check(sk, 0); 30855be7a9cSDavid S. Miller 3091ed5c48fSDavid S. Miller if (dst) 3106700c270SDavid S. Miller dst->ops->redirect(dst, sk, skb); 31155be7a9cSDavid S. Miller } 31255be7a9cSDavid S. Miller 31326e37360SEric Dumazet 31426e37360SEric Dumazet /* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */ 31526e37360SEric Dumazet void tcp_req_err(struct sock *sk, u32 seq) 31626e37360SEric Dumazet { 31726e37360SEric Dumazet struct request_sock *req = inet_reqsk(sk); 31826e37360SEric Dumazet struct net *net = sock_net(sk); 31926e37360SEric Dumazet 32026e37360SEric Dumazet /* ICMPs are not backlogged, hence we cannot get 32126e37360SEric Dumazet * an established socket here. 32226e37360SEric Dumazet */ 32326e37360SEric Dumazet WARN_ON(req->sk); 32426e37360SEric Dumazet 32526e37360SEric Dumazet if (seq != tcp_rsk(req)->snt_isn) { 32626e37360SEric Dumazet NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 32726e37360SEric Dumazet } else { 32826e37360SEric Dumazet /* 32926e37360SEric Dumazet * Still in SYN_RECV, just remove it silently. 33026e37360SEric Dumazet * There is no good way to pass the error to the newly 33126e37360SEric Dumazet * created socket, and POSIX does not want network 33226e37360SEric Dumazet * errors returned from accept(). 33326e37360SEric Dumazet */ 334c6973669SFan Du inet_csk_reqsk_queue_drop(req->rsk_listener, req); 335ef84d8ceSEric Dumazet NET_INC_STATS_BH(net, LINUX_MIB_LISTENDROPS); 33626e37360SEric Dumazet } 337ef84d8ceSEric Dumazet reqsk_put(req); 33826e37360SEric Dumazet } 33926e37360SEric Dumazet EXPORT_SYMBOL(tcp_req_err); 34026e37360SEric Dumazet 3411da177e4SLinus Torvalds /* 3421da177e4SLinus Torvalds * This routine is called by the ICMP module when it gets some 3431da177e4SLinus Torvalds * sort of error condition. If err < 0 then the socket should 3441da177e4SLinus Torvalds * be closed and the error returned to the user. If err > 0 3451da177e4SLinus Torvalds * it's just the icmp type << 8 | icmp code. After adjustment 3461da177e4SLinus Torvalds * header points to the first 8 bytes of the tcp header. We need 3471da177e4SLinus Torvalds * to find the appropriate port. 3481da177e4SLinus Torvalds * 3491da177e4SLinus Torvalds * The locking strategy used here is very "optimistic". When 3501da177e4SLinus Torvalds * someone else accesses the socket the ICMP is just dropped 3511da177e4SLinus Torvalds * and for some paths there is no check at all. 3521da177e4SLinus Torvalds * A more general error queue to queue errors for later handling 3531da177e4SLinus Torvalds * is probably better. 3541da177e4SLinus Torvalds * 3551da177e4SLinus Torvalds */ 3561da177e4SLinus Torvalds 3574d1a2d9eSDamian Lukowski void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) 3581da177e4SLinus Torvalds { 359b71d1d42SEric Dumazet const struct iphdr *iph = (const struct iphdr *)icmp_skb->data; 3604d1a2d9eSDamian Lukowski struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2)); 361f1ecd5d9SDamian Lukowski struct inet_connection_sock *icsk; 3621da177e4SLinus Torvalds struct tcp_sock *tp; 3631da177e4SLinus Torvalds struct inet_sock *inet; 3644d1a2d9eSDamian Lukowski const int type = icmp_hdr(icmp_skb)->type; 3654d1a2d9eSDamian Lukowski const int code = icmp_hdr(icmp_skb)->code; 3661da177e4SLinus Torvalds struct sock *sk; 367f1ecd5d9SDamian Lukowski struct sk_buff *skb; 3680a672f74SYuchung Cheng struct request_sock *fastopen; 3690a672f74SYuchung Cheng __u32 seq, snd_una; 370f1ecd5d9SDamian Lukowski __u32 remaining; 3711da177e4SLinus Torvalds int err; 3724d1a2d9eSDamian Lukowski struct net *net = dev_net(icmp_skb->dev); 3731da177e4SLinus Torvalds 37426e37360SEric Dumazet sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr, 37526e37360SEric Dumazet th->dest, iph->saddr, ntohs(th->source), 37626e37360SEric Dumazet inet_iif(icmp_skb)); 3771da177e4SLinus Torvalds if (!sk) { 378dcfc23caSPavel Emelyanov ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); 3791da177e4SLinus Torvalds return; 3801da177e4SLinus Torvalds } 3811da177e4SLinus Torvalds if (sk->sk_state == TCP_TIME_WAIT) { 3829469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(sk)); 3831da177e4SLinus Torvalds return; 3841da177e4SLinus Torvalds } 38526e37360SEric Dumazet seq = ntohl(th->seq); 38626e37360SEric Dumazet if (sk->sk_state == TCP_NEW_SYN_RECV) 38726e37360SEric Dumazet return tcp_req_err(sk, seq); 3881da177e4SLinus Torvalds 3891da177e4SLinus Torvalds bh_lock_sock(sk); 3901da177e4SLinus Torvalds /* If too many ICMPs get dropped on busy 3911da177e4SLinus Torvalds * servers this needs to be solved differently. 392563d34d0SEric Dumazet * We do take care of PMTU discovery (RFC1191) special case : 393563d34d0SEric Dumazet * we can receive locally generated ICMP messages while socket is held. 3941da177e4SLinus Torvalds */ 395b74aa930SEric Dumazet if (sock_owned_by_user(sk)) { 396b74aa930SEric Dumazet if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)) 397de0744afSPavel Emelyanov NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); 398b74aa930SEric Dumazet } 3991da177e4SLinus Torvalds if (sk->sk_state == TCP_CLOSE) 4001da177e4SLinus Torvalds goto out; 4011da177e4SLinus Torvalds 40297e3ecd1Sstephen hemminger if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { 40397e3ecd1Sstephen hemminger NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); 40497e3ecd1Sstephen hemminger goto out; 40597e3ecd1Sstephen hemminger } 40697e3ecd1Sstephen hemminger 407f1ecd5d9SDamian Lukowski icsk = inet_csk(sk); 4081da177e4SLinus Torvalds tp = tcp_sk(sk); 4090a672f74SYuchung Cheng /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ 4100a672f74SYuchung Cheng fastopen = tp->fastopen_rsk; 4110a672f74SYuchung Cheng snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 4121da177e4SLinus Torvalds if (sk->sk_state != TCP_LISTEN && 4130a672f74SYuchung Cheng !between(seq, snd_una, tp->snd_nxt)) { 414de0744afSPavel Emelyanov NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 4151da177e4SLinus Torvalds goto out; 4161da177e4SLinus Torvalds } 4171da177e4SLinus Torvalds 4181da177e4SLinus Torvalds switch (type) { 41955be7a9cSDavid S. Miller case ICMP_REDIRECT: 42055be7a9cSDavid S. Miller do_redirect(icmp_skb, sk); 42155be7a9cSDavid S. Miller goto out; 4221da177e4SLinus Torvalds case ICMP_SOURCE_QUENCH: 4231da177e4SLinus Torvalds /* Just silently ignore these. */ 4241da177e4SLinus Torvalds goto out; 4251da177e4SLinus Torvalds case ICMP_PARAMETERPROB: 4261da177e4SLinus Torvalds err = EPROTO; 4271da177e4SLinus Torvalds break; 4281da177e4SLinus Torvalds case ICMP_DEST_UNREACH: 4291da177e4SLinus Torvalds if (code > NR_ICMP_UNREACH) 4301da177e4SLinus Torvalds goto out; 4311da177e4SLinus Torvalds 4321da177e4SLinus Torvalds if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ 4330d4f0608SEric Dumazet /* We are not interested in TCP_LISTEN and open_requests 4340d4f0608SEric Dumazet * (SYN-ACKs send out by Linux are always <576bytes so 4350d4f0608SEric Dumazet * they should go through unfragmented). 4360d4f0608SEric Dumazet */ 4370d4f0608SEric Dumazet if (sk->sk_state == TCP_LISTEN) 4380d4f0608SEric Dumazet goto out; 4390d4f0608SEric Dumazet 440563d34d0SEric Dumazet tp->mtu_info = info; 441144d56e9SEric Dumazet if (!sock_owned_by_user(sk)) { 442563d34d0SEric Dumazet tcp_v4_mtu_reduced(sk); 443144d56e9SEric Dumazet } else { 444144d56e9SEric Dumazet if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags)) 445144d56e9SEric Dumazet sock_hold(sk); 446144d56e9SEric Dumazet } 4471da177e4SLinus Torvalds goto out; 4481da177e4SLinus Torvalds } 4491da177e4SLinus Torvalds 4501da177e4SLinus Torvalds err = icmp_err_convert[code].errno; 451f1ecd5d9SDamian Lukowski /* check if icmp_skb allows revert of backoff 452f1ecd5d9SDamian Lukowski * (see draft-zimmermann-tcp-lcd) */ 453f1ecd5d9SDamian Lukowski if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH) 454f1ecd5d9SDamian Lukowski break; 455f1ecd5d9SDamian Lukowski if (seq != tp->snd_una || !icsk->icsk_retransmits || 4560a672f74SYuchung Cheng !icsk->icsk_backoff || fastopen) 457f1ecd5d9SDamian Lukowski break; 458f1ecd5d9SDamian Lukowski 4598f49c270SDavid S. Miller if (sock_owned_by_user(sk)) 4608f49c270SDavid S. Miller break; 4618f49c270SDavid S. Miller 462f1ecd5d9SDamian Lukowski icsk->icsk_backoff--; 463fcdd1cf4SEric Dumazet icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) : 464fcdd1cf4SEric Dumazet TCP_TIMEOUT_INIT; 465fcdd1cf4SEric Dumazet icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX); 466f1ecd5d9SDamian Lukowski 467f1ecd5d9SDamian Lukowski skb = tcp_write_queue_head(sk); 468f1ecd5d9SDamian Lukowski BUG_ON(!skb); 469f1ecd5d9SDamian Lukowski 4707faee5c0SEric Dumazet remaining = icsk->icsk_rto - 4717faee5c0SEric Dumazet min(icsk->icsk_rto, 4727faee5c0SEric Dumazet tcp_time_stamp - tcp_skb_timestamp(skb)); 473f1ecd5d9SDamian Lukowski 474f1ecd5d9SDamian Lukowski if (remaining) { 475f1ecd5d9SDamian Lukowski inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 476f1ecd5d9SDamian Lukowski remaining, TCP_RTO_MAX); 477f1ecd5d9SDamian Lukowski } else { 478f1ecd5d9SDamian Lukowski /* RTO revert clocked out retransmission. 479f1ecd5d9SDamian Lukowski * Will retransmit now */ 480f1ecd5d9SDamian Lukowski tcp_retransmit_timer(sk); 481f1ecd5d9SDamian Lukowski } 482f1ecd5d9SDamian Lukowski 4831da177e4SLinus Torvalds break; 4841da177e4SLinus Torvalds case ICMP_TIME_EXCEEDED: 4851da177e4SLinus Torvalds err = EHOSTUNREACH; 4861da177e4SLinus Torvalds break; 4871da177e4SLinus Torvalds default: 4881da177e4SLinus Torvalds goto out; 4891da177e4SLinus Torvalds } 4901da177e4SLinus Torvalds 4911da177e4SLinus Torvalds switch (sk->sk_state) { 4921da177e4SLinus Torvalds case TCP_SYN_SENT: 4930a672f74SYuchung Cheng case TCP_SYN_RECV: 4940a672f74SYuchung Cheng /* Only in fast or simultaneous open. If a fast open socket is 4950a672f74SYuchung Cheng * is already accepted it is treated as a connected one below. 4961da177e4SLinus Torvalds */ 49751456b29SIan Morris if (fastopen && !fastopen->sk) 4980a672f74SYuchung Cheng break; 4990a672f74SYuchung Cheng 5001da177e4SLinus Torvalds if (!sock_owned_by_user(sk)) { 5011da177e4SLinus Torvalds sk->sk_err = err; 5021da177e4SLinus Torvalds 5031da177e4SLinus Torvalds sk->sk_error_report(sk); 5041da177e4SLinus Torvalds 5051da177e4SLinus Torvalds tcp_done(sk); 5061da177e4SLinus Torvalds } else { 5071da177e4SLinus Torvalds sk->sk_err_soft = err; 5081da177e4SLinus Torvalds } 5091da177e4SLinus Torvalds goto out; 5101da177e4SLinus Torvalds } 5111da177e4SLinus Torvalds 5121da177e4SLinus Torvalds /* If we've already connected we will keep trying 5131da177e4SLinus Torvalds * until we time out, or the user gives up. 5141da177e4SLinus Torvalds * 5151da177e4SLinus Torvalds * rfc1122 4.2.3.9 allows to consider as hard errors 5161da177e4SLinus Torvalds * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too, 5171da177e4SLinus Torvalds * but it is obsoleted by pmtu discovery). 5181da177e4SLinus Torvalds * 5191da177e4SLinus Torvalds * Note, that in modern internet, where routing is unreliable 5201da177e4SLinus Torvalds * and in each dark corner broken firewalls sit, sending random 5211da177e4SLinus Torvalds * errors ordered by their masters even this two messages finally lose 5221da177e4SLinus Torvalds * their original sense (even Linux sends invalid PORT_UNREACHs) 5231da177e4SLinus Torvalds * 5241da177e4SLinus Torvalds * Now we are in compliance with RFCs. 5251da177e4SLinus Torvalds * --ANK (980905) 5261da177e4SLinus Torvalds */ 5271da177e4SLinus Torvalds 5281da177e4SLinus Torvalds inet = inet_sk(sk); 5291da177e4SLinus Torvalds if (!sock_owned_by_user(sk) && inet->recverr) { 5301da177e4SLinus Torvalds sk->sk_err = err; 5311da177e4SLinus Torvalds sk->sk_error_report(sk); 5321da177e4SLinus Torvalds } else { /* Only an error on timeout */ 5331da177e4SLinus Torvalds sk->sk_err_soft = err; 5341da177e4SLinus Torvalds } 5351da177e4SLinus Torvalds 5361da177e4SLinus Torvalds out: 5371da177e4SLinus Torvalds bh_unlock_sock(sk); 5381da177e4SLinus Torvalds sock_put(sk); 5391da177e4SLinus Torvalds } 5401da177e4SLinus Torvalds 54128850dc7SDaniel Borkmann void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr) 5421da177e4SLinus Torvalds { 543aa8223c7SArnaldo Carvalho de Melo struct tcphdr *th = tcp_hdr(skb); 5441da177e4SLinus Torvalds 54584fa7933SPatrick McHardy if (skb->ip_summed == CHECKSUM_PARTIAL) { 546419f9f89SHerbert Xu th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0); 547663ead3bSHerbert Xu skb->csum_start = skb_transport_header(skb) - skb->head; 548ff1dcadbSAl Viro skb->csum_offset = offsetof(struct tcphdr, check); 5491da177e4SLinus Torvalds } else { 550419f9f89SHerbert Xu th->check = tcp_v4_check(skb->len, saddr, daddr, 55107f0757aSJoe Perches csum_partial(th, 5521da177e4SLinus Torvalds th->doff << 2, 5531da177e4SLinus Torvalds skb->csum)); 5541da177e4SLinus Torvalds } 5551da177e4SLinus Torvalds } 5561da177e4SLinus Torvalds 557419f9f89SHerbert Xu /* This routine computes an IPv4 TCP checksum. */ 558bb296246SHerbert Xu void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) 559419f9f89SHerbert Xu { 560cf533ea5SEric Dumazet const struct inet_sock *inet = inet_sk(sk); 561419f9f89SHerbert Xu 562419f9f89SHerbert Xu __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr); 563419f9f89SHerbert Xu } 5644bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_send_check); 565419f9f89SHerbert Xu 5661da177e4SLinus Torvalds /* 5671da177e4SLinus Torvalds * This routine will send an RST to the other tcp. 5681da177e4SLinus Torvalds * 5691da177e4SLinus Torvalds * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.) 5701da177e4SLinus Torvalds * for reset. 5711da177e4SLinus Torvalds * Answer: if a packet caused RST, it is not for a socket 5721da177e4SLinus Torvalds * existing in our system, if it is matched to a socket, 5731da177e4SLinus Torvalds * it is just duplicate segment or bug in other side's TCP. 5741da177e4SLinus Torvalds * So that we build reply only basing on parameters 5751da177e4SLinus Torvalds * arrived with segment. 5761da177e4SLinus Torvalds * Exception: precedence violation. We do not implement it in any case. 5771da177e4SLinus Torvalds */ 5781da177e4SLinus Torvalds 579a00e7444SEric Dumazet static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) 5801da177e4SLinus Torvalds { 581cf533ea5SEric Dumazet const struct tcphdr *th = tcp_hdr(skb); 582cfb6eeb4SYOSHIFUJI Hideaki struct { 583cfb6eeb4SYOSHIFUJI Hideaki struct tcphdr th; 584cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 585714e85beSAl Viro __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)]; 586cfb6eeb4SYOSHIFUJI Hideaki #endif 587cfb6eeb4SYOSHIFUJI Hideaki } rep; 5881da177e4SLinus Torvalds struct ip_reply_arg arg; 589cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 590e46787f0SFlorian Westphal struct tcp_md5sig_key *key = NULL; 591658ddaafSShawn Lu const __u8 *hash_location = NULL; 592658ddaafSShawn Lu unsigned char newhash[16]; 593658ddaafSShawn Lu int genhash; 594658ddaafSShawn Lu struct sock *sk1 = NULL; 595cfb6eeb4SYOSHIFUJI Hideaki #endif 596a86b1e30SPavel Emelyanov struct net *net; 5971da177e4SLinus Torvalds 5981da177e4SLinus Torvalds /* Never send a reset in response to a reset. */ 5991da177e4SLinus Torvalds if (th->rst) 6001da177e4SLinus Torvalds return; 6011da177e4SLinus Torvalds 602c3658e8dSEric Dumazet /* If sk not NULL, it means we did a successful lookup and incoming 603c3658e8dSEric Dumazet * route had to be correct. prequeue might have dropped our dst. 604c3658e8dSEric Dumazet */ 605c3658e8dSEric Dumazet if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL) 6061da177e4SLinus Torvalds return; 6071da177e4SLinus Torvalds 6081da177e4SLinus Torvalds /* Swap the send and the receive. */ 609cfb6eeb4SYOSHIFUJI Hideaki memset(&rep, 0, sizeof(rep)); 610cfb6eeb4SYOSHIFUJI Hideaki rep.th.dest = th->source; 611cfb6eeb4SYOSHIFUJI Hideaki rep.th.source = th->dest; 612cfb6eeb4SYOSHIFUJI Hideaki rep.th.doff = sizeof(struct tcphdr) / 4; 613cfb6eeb4SYOSHIFUJI Hideaki rep.th.rst = 1; 6141da177e4SLinus Torvalds 6151da177e4SLinus Torvalds if (th->ack) { 616cfb6eeb4SYOSHIFUJI Hideaki rep.th.seq = th->ack_seq; 6171da177e4SLinus Torvalds } else { 618cfb6eeb4SYOSHIFUJI Hideaki rep.th.ack = 1; 619cfb6eeb4SYOSHIFUJI Hideaki rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin + 6201da177e4SLinus Torvalds skb->len - (th->doff << 2)); 6211da177e4SLinus Torvalds } 6221da177e4SLinus Torvalds 6237174259eSArnaldo Carvalho de Melo memset(&arg, 0, sizeof(arg)); 624cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_base = (unsigned char *)&rep; 625cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_len = sizeof(rep.th); 626cfb6eeb4SYOSHIFUJI Hideaki 6270f85feaeSEric Dumazet net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); 628cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 629658ddaafSShawn Lu hash_location = tcp_parse_md5sig_option(th); 630271c3b9bSFlorian Westphal if (sk && sk_fullsock(sk)) { 631e46787f0SFlorian Westphal key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *) 632e46787f0SFlorian Westphal &ip_hdr(skb)->saddr, AF_INET); 633e46787f0SFlorian Westphal } else if (hash_location) { 634658ddaafSShawn Lu /* 635658ddaafSShawn Lu * active side is lost. Try to find listening socket through 636658ddaafSShawn Lu * source port, and then find md5 key through listening socket. 637658ddaafSShawn Lu * we are not loose security here: 638658ddaafSShawn Lu * Incoming packet is checked with md5 hash with finding key, 639658ddaafSShawn Lu * no RST generated if md5 hash doesn't match. 640658ddaafSShawn Lu */ 6410f85feaeSEric Dumazet sk1 = __inet_lookup_listener(net, 642da5e3630STom Herbert &tcp_hashinfo, ip_hdr(skb)->saddr, 643da5e3630STom Herbert th->source, ip_hdr(skb)->daddr, 644658ddaafSShawn Lu ntohs(th->source), inet_iif(skb)); 645658ddaafSShawn Lu /* don't send rst if it can't find key */ 646658ddaafSShawn Lu if (!sk1) 647658ddaafSShawn Lu return; 648658ddaafSShawn Lu rcu_read_lock(); 649658ddaafSShawn Lu key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *) 650658ddaafSShawn Lu &ip_hdr(skb)->saddr, AF_INET); 651658ddaafSShawn Lu if (!key) 652658ddaafSShawn Lu goto release_sk1; 653658ddaafSShawn Lu 65439f8e58eSEric Dumazet genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb); 655658ddaafSShawn Lu if (genhash || memcmp(hash_location, newhash, 16) != 0) 656658ddaafSShawn Lu goto release_sk1; 657658ddaafSShawn Lu } 658658ddaafSShawn Lu 659cfb6eeb4SYOSHIFUJI Hideaki if (key) { 660cfb6eeb4SYOSHIFUJI Hideaki rep.opt[0] = htonl((TCPOPT_NOP << 24) | 661cfb6eeb4SYOSHIFUJI Hideaki (TCPOPT_NOP << 16) | 662cfb6eeb4SYOSHIFUJI Hideaki (TCPOPT_MD5SIG << 8) | 663cfb6eeb4SYOSHIFUJI Hideaki TCPOLEN_MD5SIG); 664cfb6eeb4SYOSHIFUJI Hideaki /* Update length and the length the header thinks exists */ 665cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; 666cfb6eeb4SYOSHIFUJI Hideaki rep.th.doff = arg.iov[0].iov_len / 4; 667cfb6eeb4SYOSHIFUJI Hideaki 66849a72dfbSAdam Langley tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1], 66978e645cbSIlpo Järvinen key, ip_hdr(skb)->saddr, 67078e645cbSIlpo Järvinen ip_hdr(skb)->daddr, &rep.th); 671cfb6eeb4SYOSHIFUJI Hideaki } 672cfb6eeb4SYOSHIFUJI Hideaki #endif 673eddc9ec5SArnaldo Carvalho de Melo arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, 674eddc9ec5SArnaldo Carvalho de Melo ip_hdr(skb)->saddr, /* XXX */ 67552cd5750SIlpo Järvinen arg.iov[0].iov_len, IPPROTO_TCP, 0); 6761da177e4SLinus Torvalds arg.csumoffset = offsetof(struct tcphdr, check) / 2; 677271c3b9bSFlorian Westphal arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0; 678271c3b9bSFlorian Westphal 679e2446eaaSShawn Lu /* When socket is gone, all binding information is lost. 6804c675258SAlexey Kuznetsov * routing might fail in this case. No choice here, if we choose to force 6814c675258SAlexey Kuznetsov * input interface, we will misroute in case of asymmetric route. 682e2446eaaSShawn Lu */ 6834c675258SAlexey Kuznetsov if (sk) 6844c675258SAlexey Kuznetsov arg.bound_dev_if = sk->sk_bound_dev_if; 6851da177e4SLinus Torvalds 686271c3b9bSFlorian Westphal BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) != 687271c3b9bSFlorian Westphal offsetof(struct inet_timewait_sock, tw_bound_dev_if)); 688271c3b9bSFlorian Westphal 68966b13d99SEric Dumazet arg.tos = ip_hdr(skb)->tos; 690bdbbb852SEric Dumazet ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), 691bdbbb852SEric Dumazet skb, &TCP_SKB_CB(skb)->header.h4.opt, 69224a2d43dSEric Dumazet ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, 69324a2d43dSEric Dumazet &arg, arg.iov[0].iov_len); 6941da177e4SLinus Torvalds 69563231bddSPavel Emelyanov TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 69663231bddSPavel Emelyanov TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); 697658ddaafSShawn Lu 698658ddaafSShawn Lu #ifdef CONFIG_TCP_MD5SIG 699658ddaafSShawn Lu release_sk1: 700658ddaafSShawn Lu if (sk1) { 701658ddaafSShawn Lu rcu_read_unlock(); 702658ddaafSShawn Lu sock_put(sk1); 703658ddaafSShawn Lu } 704658ddaafSShawn Lu #endif 7051da177e4SLinus Torvalds } 7061da177e4SLinus Torvalds 7071da177e4SLinus Torvalds /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states 7081da177e4SLinus Torvalds outside socket context is ugly, certainly. What can I do? 7091da177e4SLinus Torvalds */ 7101da177e4SLinus Torvalds 711*e62a123bSEric Dumazet static void tcp_v4_send_ack(struct net *net, 712*e62a123bSEric Dumazet struct sk_buff *skb, u32 seq, u32 ack, 713ee684b6fSAndrey Vagin u32 win, u32 tsval, u32 tsecr, int oif, 71488ef4a5aSKOVACS Krisztian struct tcp_md5sig_key *key, 71566b13d99SEric Dumazet int reply_flags, u8 tos) 7161da177e4SLinus Torvalds { 717cf533ea5SEric Dumazet const struct tcphdr *th = tcp_hdr(skb); 7181da177e4SLinus Torvalds struct { 7191da177e4SLinus Torvalds struct tcphdr th; 720714e85beSAl Viro __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2) 721cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 722cfb6eeb4SYOSHIFUJI Hideaki + (TCPOLEN_MD5SIG_ALIGNED >> 2) 723cfb6eeb4SYOSHIFUJI Hideaki #endif 724cfb6eeb4SYOSHIFUJI Hideaki ]; 7251da177e4SLinus Torvalds } rep; 7261da177e4SLinus Torvalds struct ip_reply_arg arg; 7271da177e4SLinus Torvalds 7281da177e4SLinus Torvalds memset(&rep.th, 0, sizeof(struct tcphdr)); 7297174259eSArnaldo Carvalho de Melo memset(&arg, 0, sizeof(arg)); 7301da177e4SLinus Torvalds 7311da177e4SLinus Torvalds arg.iov[0].iov_base = (unsigned char *)&rep; 7321da177e4SLinus Torvalds arg.iov[0].iov_len = sizeof(rep.th); 733ee684b6fSAndrey Vagin if (tsecr) { 734cfb6eeb4SYOSHIFUJI Hideaki rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 7351da177e4SLinus Torvalds (TCPOPT_TIMESTAMP << 8) | 7361da177e4SLinus Torvalds TCPOLEN_TIMESTAMP); 737ee684b6fSAndrey Vagin rep.opt[1] = htonl(tsval); 738ee684b6fSAndrey Vagin rep.opt[2] = htonl(tsecr); 739cb48cfe8SCraig Schlenter arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED; 7401da177e4SLinus Torvalds } 7411da177e4SLinus Torvalds 7421da177e4SLinus Torvalds /* Swap the send and the receive. */ 7431da177e4SLinus Torvalds rep.th.dest = th->source; 7441da177e4SLinus Torvalds rep.th.source = th->dest; 7451da177e4SLinus Torvalds rep.th.doff = arg.iov[0].iov_len / 4; 7461da177e4SLinus Torvalds rep.th.seq = htonl(seq); 7471da177e4SLinus Torvalds rep.th.ack_seq = htonl(ack); 7481da177e4SLinus Torvalds rep.th.ack = 1; 7491da177e4SLinus Torvalds rep.th.window = htons(win); 7501da177e4SLinus Torvalds 751cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 752cfb6eeb4SYOSHIFUJI Hideaki if (key) { 753ee684b6fSAndrey Vagin int offset = (tsecr) ? 3 : 0; 754cfb6eeb4SYOSHIFUJI Hideaki 755cfb6eeb4SYOSHIFUJI Hideaki rep.opt[offset++] = htonl((TCPOPT_NOP << 24) | 756cfb6eeb4SYOSHIFUJI Hideaki (TCPOPT_NOP << 16) | 757cfb6eeb4SYOSHIFUJI Hideaki (TCPOPT_MD5SIG << 8) | 758cfb6eeb4SYOSHIFUJI Hideaki TCPOLEN_MD5SIG); 759cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; 760cfb6eeb4SYOSHIFUJI Hideaki rep.th.doff = arg.iov[0].iov_len/4; 761cfb6eeb4SYOSHIFUJI Hideaki 76249a72dfbSAdam Langley tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset], 76390b7e112SAdam Langley key, ip_hdr(skb)->saddr, 76490b7e112SAdam Langley ip_hdr(skb)->daddr, &rep.th); 765cfb6eeb4SYOSHIFUJI Hideaki } 766cfb6eeb4SYOSHIFUJI Hideaki #endif 76788ef4a5aSKOVACS Krisztian arg.flags = reply_flags; 768eddc9ec5SArnaldo Carvalho de Melo arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, 769eddc9ec5SArnaldo Carvalho de Melo ip_hdr(skb)->saddr, /* XXX */ 7701da177e4SLinus Torvalds arg.iov[0].iov_len, IPPROTO_TCP, 0); 7711da177e4SLinus Torvalds arg.csumoffset = offsetof(struct tcphdr, check) / 2; 7729501f972SYOSHIFUJI Hideaki if (oif) 7739501f972SYOSHIFUJI Hideaki arg.bound_dev_if = oif; 77466b13d99SEric Dumazet arg.tos = tos; 775bdbbb852SEric Dumazet ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), 776bdbbb852SEric Dumazet skb, &TCP_SKB_CB(skb)->header.h4.opt, 77724a2d43dSEric Dumazet ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, 77824a2d43dSEric Dumazet &arg, arg.iov[0].iov_len); 7791da177e4SLinus Torvalds 78063231bddSPavel Emelyanov TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 7811da177e4SLinus Torvalds } 7821da177e4SLinus Torvalds 7831da177e4SLinus Torvalds static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) 7841da177e4SLinus Torvalds { 7858feaf0c0SArnaldo Carvalho de Melo struct inet_timewait_sock *tw = inet_twsk(sk); 786cfb6eeb4SYOSHIFUJI Hideaki struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 7871da177e4SLinus Torvalds 788*e62a123bSEric Dumazet tcp_v4_send_ack(sock_net(sk), skb, 789*e62a123bSEric Dumazet tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 7907174259eSArnaldo Carvalho de Melo tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 791ee684b6fSAndrey Vagin tcp_time_stamp + tcptw->tw_ts_offset, 7929501f972SYOSHIFUJI Hideaki tcptw->tw_ts_recent, 7939501f972SYOSHIFUJI Hideaki tw->tw_bound_dev_if, 79488ef4a5aSKOVACS Krisztian tcp_twsk_md5_key(tcptw), 79566b13d99SEric Dumazet tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0, 79666b13d99SEric Dumazet tw->tw_tos 7979501f972SYOSHIFUJI Hideaki ); 7981da177e4SLinus Torvalds 7998feaf0c0SArnaldo Carvalho de Melo inet_twsk_put(tw); 8001da177e4SLinus Torvalds } 8011da177e4SLinus Torvalds 802a00e7444SEric Dumazet static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 8037174259eSArnaldo Carvalho de Melo struct request_sock *req) 8041da177e4SLinus Torvalds { 805168a8f58SJerry Chu /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 806168a8f58SJerry Chu * sk->sk_state == TCP_SYN_RECV -> for Fast Open. 807168a8f58SJerry Chu */ 808*e62a123bSEric Dumazet u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 : 809*e62a123bSEric Dumazet tcp_sk(sk)->snd_nxt; 810*e62a123bSEric Dumazet 811*e62a123bSEric Dumazet tcp_v4_send_ack(sock_net(sk), skb, seq, 812ed53d0abSEric Dumazet tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd, 813ee684b6fSAndrey Vagin tcp_time_stamp, 8149501f972SYOSHIFUJI Hideaki req->ts_recent, 8159501f972SYOSHIFUJI Hideaki 0, 816a915da9bSEric Dumazet tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, 817a915da9bSEric Dumazet AF_INET), 81866b13d99SEric Dumazet inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, 81966b13d99SEric Dumazet ip_hdr(skb)->tos); 8201da177e4SLinus Torvalds } 8211da177e4SLinus Torvalds 8221da177e4SLinus Torvalds /* 8239bf1d83eSKris Katterjohn * Send a SYN-ACK after having received a SYN. 82460236fddSArnaldo Carvalho de Melo * This still operates on a request_sock only, not on a big 8251da177e4SLinus Torvalds * socket. 8261da177e4SLinus Torvalds */ 8270f935dbeSEric Dumazet static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, 828d6274bd8SOctavian Purdila struct flowi *fl, 829e6b4d113SWilliam Allen Simpson struct request_sock *req, 830ca6fb065SEric Dumazet struct tcp_fastopen_cookie *foc, 831ca6fb065SEric Dumazet bool attach_req) 8321da177e4SLinus Torvalds { 8332e6599cbSArnaldo Carvalho de Melo const struct inet_request_sock *ireq = inet_rsk(req); 8346bd023f3SDavid S. Miller struct flowi4 fl4; 8351da177e4SLinus Torvalds int err = -1; 8361da177e4SLinus Torvalds struct sk_buff *skb; 8371da177e4SLinus Torvalds 8381da177e4SLinus Torvalds /* First, grab a route. */ 839ba3f7f04SDavid S. Miller if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) 840fd80eb94SDenis V. Lunev return -1; 8411da177e4SLinus Torvalds 842ca6fb065SEric Dumazet skb = tcp_make_synack(sk, dst, req, foc, attach_req); 8431da177e4SLinus Torvalds 8441da177e4SLinus Torvalds if (skb) { 845634fb979SEric Dumazet __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); 8461da177e4SLinus Torvalds 847634fb979SEric Dumazet err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, 848634fb979SEric Dumazet ireq->ir_rmt_addr, 8492e6599cbSArnaldo Carvalho de Melo ireq->opt); 850b9df3cb8SGerrit Renker err = net_xmit_eval(err); 8511da177e4SLinus Torvalds } 8521da177e4SLinus Torvalds 8531da177e4SLinus Torvalds return err; 8541da177e4SLinus Torvalds } 8551da177e4SLinus Torvalds 8561da177e4SLinus Torvalds /* 85760236fddSArnaldo Carvalho de Melo * IPv4 request_sock destructor. 8581da177e4SLinus Torvalds */ 85960236fddSArnaldo Carvalho de Melo static void tcp_v4_reqsk_destructor(struct request_sock *req) 8601da177e4SLinus Torvalds { 8612e6599cbSArnaldo Carvalho de Melo kfree(inet_rsk(req)->opt); 8621da177e4SLinus Torvalds } 8631da177e4SLinus Torvalds 8641da177e4SLinus Torvalds 865cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 866cfb6eeb4SYOSHIFUJI Hideaki /* 867cfb6eeb4SYOSHIFUJI Hideaki * RFC2385 MD5 checksumming requires a mapping of 868cfb6eeb4SYOSHIFUJI Hideaki * IP address->MD5 Key. 869cfb6eeb4SYOSHIFUJI Hideaki * We need to maintain these in the sk structure. 870cfb6eeb4SYOSHIFUJI Hideaki */ 871cfb6eeb4SYOSHIFUJI Hideaki 872cfb6eeb4SYOSHIFUJI Hideaki /* Find the Key structure for an address. */ 873b83e3debSEric Dumazet struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk, 874a915da9bSEric Dumazet const union tcp_md5_addr *addr, 875a915da9bSEric Dumazet int family) 876cfb6eeb4SYOSHIFUJI Hideaki { 877fd3a154aSEric Dumazet const struct tcp_sock *tp = tcp_sk(sk); 878a915da9bSEric Dumazet struct tcp_md5sig_key *key; 879a915da9bSEric Dumazet unsigned int size = sizeof(struct in_addr); 880fd3a154aSEric Dumazet const struct tcp_md5sig_info *md5sig; 881cfb6eeb4SYOSHIFUJI Hideaki 882a8afca03SEric Dumazet /* caller either holds rcu_read_lock() or socket lock */ 883a8afca03SEric Dumazet md5sig = rcu_dereference_check(tp->md5sig_info, 884b4fb05eaSEric Dumazet sock_owned_by_user(sk) || 885b83e3debSEric Dumazet lockdep_is_held((spinlock_t *)&sk->sk_lock.slock)); 886a8afca03SEric Dumazet if (!md5sig) 887cfb6eeb4SYOSHIFUJI Hideaki return NULL; 888a915da9bSEric Dumazet #if IS_ENABLED(CONFIG_IPV6) 889a915da9bSEric Dumazet if (family == AF_INET6) 890a915da9bSEric Dumazet size = sizeof(struct in6_addr); 891a915da9bSEric Dumazet #endif 892b67bfe0dSSasha Levin hlist_for_each_entry_rcu(key, &md5sig->head, node) { 893a915da9bSEric Dumazet if (key->family != family) 894a915da9bSEric Dumazet continue; 895a915da9bSEric Dumazet if (!memcmp(&key->addr, addr, size)) 896a915da9bSEric Dumazet return key; 897cfb6eeb4SYOSHIFUJI Hideaki } 898cfb6eeb4SYOSHIFUJI Hideaki return NULL; 899cfb6eeb4SYOSHIFUJI Hideaki } 900a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_lookup); 901cfb6eeb4SYOSHIFUJI Hideaki 902b83e3debSEric Dumazet struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, 903fd3a154aSEric Dumazet const struct sock *addr_sk) 904cfb6eeb4SYOSHIFUJI Hideaki { 905b52e6921SEric Dumazet const union tcp_md5_addr *addr; 906a915da9bSEric Dumazet 907b52e6921SEric Dumazet addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr; 908a915da9bSEric Dumazet return tcp_md5_do_lookup(sk, addr, AF_INET); 909cfb6eeb4SYOSHIFUJI Hideaki } 910cfb6eeb4SYOSHIFUJI Hideaki EXPORT_SYMBOL(tcp_v4_md5_lookup); 911cfb6eeb4SYOSHIFUJI Hideaki 912cfb6eeb4SYOSHIFUJI Hideaki /* This can be called on a newly created socket, from other files */ 913a915da9bSEric Dumazet int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, 914a915da9bSEric Dumazet int family, const u8 *newkey, u8 newkeylen, gfp_t gfp) 915cfb6eeb4SYOSHIFUJI Hideaki { 916cfb6eeb4SYOSHIFUJI Hideaki /* Add Key to the list */ 917b0a713e9SMatthias M. Dellweg struct tcp_md5sig_key *key; 918cfb6eeb4SYOSHIFUJI Hideaki struct tcp_sock *tp = tcp_sk(sk); 919f6685938SArnaldo Carvalho de Melo struct tcp_md5sig_info *md5sig; 920f6685938SArnaldo Carvalho de Melo 921c0353c7bSAydin Arik key = tcp_md5_do_lookup(sk, addr, family); 922a915da9bSEric Dumazet if (key) { 923a915da9bSEric Dumazet /* Pre-existing entry - just update that one. */ 924a915da9bSEric Dumazet memcpy(key->key, newkey, newkeylen); 925a915da9bSEric Dumazet key->keylen = newkeylen; 926a915da9bSEric Dumazet return 0; 927cfb6eeb4SYOSHIFUJI Hideaki } 928260fcbebSYan, Zheng 929a8afca03SEric Dumazet md5sig = rcu_dereference_protected(tp->md5sig_info, 9301b8e6a01SEric Dumazet sock_owned_by_user(sk) || 9311b8e6a01SEric Dumazet lockdep_is_held(&sk->sk_lock.slock)); 932a915da9bSEric Dumazet if (!md5sig) { 933a915da9bSEric Dumazet md5sig = kmalloc(sizeof(*md5sig), gfp); 934a915da9bSEric Dumazet if (!md5sig) 935a915da9bSEric Dumazet return -ENOMEM; 936a915da9bSEric Dumazet 937a915da9bSEric Dumazet sk_nocaps_add(sk, NETIF_F_GSO_MASK); 938a915da9bSEric Dumazet INIT_HLIST_HEAD(&md5sig->head); 939a8afca03SEric Dumazet rcu_assign_pointer(tp->md5sig_info, md5sig); 940a915da9bSEric Dumazet } 941a915da9bSEric Dumazet 9425f3d9cb2SEric Dumazet key = sock_kmalloc(sk, sizeof(*key), gfp); 943a915da9bSEric Dumazet if (!key) 944a915da9bSEric Dumazet return -ENOMEM; 94571cea17eSEric Dumazet if (!tcp_alloc_md5sig_pool()) { 9465f3d9cb2SEric Dumazet sock_kfree_s(sk, key, sizeof(*key)); 947cfb6eeb4SYOSHIFUJI Hideaki return -ENOMEM; 948cfb6eeb4SYOSHIFUJI Hideaki } 949f6685938SArnaldo Carvalho de Melo 950a915da9bSEric Dumazet memcpy(key->key, newkey, newkeylen); 951a915da9bSEric Dumazet key->keylen = newkeylen; 952a915da9bSEric Dumazet key->family = family; 953a915da9bSEric Dumazet memcpy(&key->addr, addr, 954a915da9bSEric Dumazet (family == AF_INET6) ? sizeof(struct in6_addr) : 955a915da9bSEric Dumazet sizeof(struct in_addr)); 956a915da9bSEric Dumazet hlist_add_head_rcu(&key->node, &md5sig->head); 957cfb6eeb4SYOSHIFUJI Hideaki return 0; 958cfb6eeb4SYOSHIFUJI Hideaki } 959a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_add); 960cfb6eeb4SYOSHIFUJI Hideaki 961a915da9bSEric Dumazet int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) 962cfb6eeb4SYOSHIFUJI Hideaki { 963a915da9bSEric Dumazet struct tcp_md5sig_key *key; 964cfb6eeb4SYOSHIFUJI Hideaki 965c0353c7bSAydin Arik key = tcp_md5_do_lookup(sk, addr, family); 966a915da9bSEric Dumazet if (!key) 967cfb6eeb4SYOSHIFUJI Hideaki return -ENOENT; 968a915da9bSEric Dumazet hlist_del_rcu(&key->node); 9695f3d9cb2SEric Dumazet atomic_sub(sizeof(*key), &sk->sk_omem_alloc); 970a915da9bSEric Dumazet kfree_rcu(key, rcu); 971a915da9bSEric Dumazet return 0; 972cfb6eeb4SYOSHIFUJI Hideaki } 973a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_del); 974cfb6eeb4SYOSHIFUJI Hideaki 975e0683e70Sstephen hemminger static void tcp_clear_md5_list(struct sock *sk) 976cfb6eeb4SYOSHIFUJI Hideaki { 977cfb6eeb4SYOSHIFUJI Hideaki struct tcp_sock *tp = tcp_sk(sk); 978a915da9bSEric Dumazet struct tcp_md5sig_key *key; 979b67bfe0dSSasha Levin struct hlist_node *n; 980a8afca03SEric Dumazet struct tcp_md5sig_info *md5sig; 981cfb6eeb4SYOSHIFUJI Hideaki 982a8afca03SEric Dumazet md5sig = rcu_dereference_protected(tp->md5sig_info, 1); 983a8afca03SEric Dumazet 984b67bfe0dSSasha Levin hlist_for_each_entry_safe(key, n, &md5sig->head, node) { 985a915da9bSEric Dumazet hlist_del_rcu(&key->node); 9865f3d9cb2SEric Dumazet atomic_sub(sizeof(*key), &sk->sk_omem_alloc); 987a915da9bSEric Dumazet kfree_rcu(key, rcu); 988cfb6eeb4SYOSHIFUJI Hideaki } 989cfb6eeb4SYOSHIFUJI Hideaki } 990cfb6eeb4SYOSHIFUJI Hideaki 991cfb6eeb4SYOSHIFUJI Hideaki static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, 992cfb6eeb4SYOSHIFUJI Hideaki int optlen) 993cfb6eeb4SYOSHIFUJI Hideaki { 994cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig cmd; 995cfb6eeb4SYOSHIFUJI Hideaki struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr; 996cfb6eeb4SYOSHIFUJI Hideaki 997cfb6eeb4SYOSHIFUJI Hideaki if (optlen < sizeof(cmd)) 998cfb6eeb4SYOSHIFUJI Hideaki return -EINVAL; 999cfb6eeb4SYOSHIFUJI Hideaki 1000cfb6eeb4SYOSHIFUJI Hideaki if (copy_from_user(&cmd, optval, sizeof(cmd))) 1001cfb6eeb4SYOSHIFUJI Hideaki return -EFAULT; 1002cfb6eeb4SYOSHIFUJI Hideaki 1003cfb6eeb4SYOSHIFUJI Hideaki if (sin->sin_family != AF_INET) 1004cfb6eeb4SYOSHIFUJI Hideaki return -EINVAL; 1005cfb6eeb4SYOSHIFUJI Hideaki 100664a124edSDmitry Popov if (!cmd.tcpm_keylen) 1007a915da9bSEric Dumazet return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, 1008a915da9bSEric Dumazet AF_INET); 1009cfb6eeb4SYOSHIFUJI Hideaki 1010cfb6eeb4SYOSHIFUJI Hideaki if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 1011cfb6eeb4SYOSHIFUJI Hideaki return -EINVAL; 1012cfb6eeb4SYOSHIFUJI Hideaki 1013a915da9bSEric Dumazet return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, 1014a915da9bSEric Dumazet AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, 1015a915da9bSEric Dumazet GFP_KERNEL); 1016cfb6eeb4SYOSHIFUJI Hideaki } 1017cfb6eeb4SYOSHIFUJI Hideaki 101849a72dfbSAdam Langley static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, 101949a72dfbSAdam Langley __be32 daddr, __be32 saddr, int nbytes) 1020cfb6eeb4SYOSHIFUJI Hideaki { 1021cfb6eeb4SYOSHIFUJI Hideaki struct tcp4_pseudohdr *bp; 102249a72dfbSAdam Langley struct scatterlist sg; 1023cfb6eeb4SYOSHIFUJI Hideaki 1024cfb6eeb4SYOSHIFUJI Hideaki bp = &hp->md5_blk.ip4; 1025cfb6eeb4SYOSHIFUJI Hideaki 1026cfb6eeb4SYOSHIFUJI Hideaki /* 102749a72dfbSAdam Langley * 1. the TCP pseudo-header (in the order: source IP address, 1028cfb6eeb4SYOSHIFUJI Hideaki * destination IP address, zero-padded protocol number, and 1029cfb6eeb4SYOSHIFUJI Hideaki * segment length) 1030cfb6eeb4SYOSHIFUJI Hideaki */ 1031cfb6eeb4SYOSHIFUJI Hideaki bp->saddr = saddr; 1032cfb6eeb4SYOSHIFUJI Hideaki bp->daddr = daddr; 1033cfb6eeb4SYOSHIFUJI Hideaki bp->pad = 0; 1034076fb722SYOSHIFUJI Hideaki bp->protocol = IPPROTO_TCP; 103549a72dfbSAdam Langley bp->len = cpu_to_be16(nbytes); 1036c7da57a1SDavid S. Miller 103749a72dfbSAdam Langley sg_init_one(&sg, bp, sizeof(*bp)); 103849a72dfbSAdam Langley return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp)); 103949a72dfbSAdam Langley } 104049a72dfbSAdam Langley 1041a915da9bSEric Dumazet static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 1042318cf7aaSEric Dumazet __be32 daddr, __be32 saddr, const struct tcphdr *th) 104349a72dfbSAdam Langley { 104449a72dfbSAdam Langley struct tcp_md5sig_pool *hp; 104549a72dfbSAdam Langley struct hash_desc *desc; 104649a72dfbSAdam Langley 104749a72dfbSAdam Langley hp = tcp_get_md5sig_pool(); 104849a72dfbSAdam Langley if (!hp) 104949a72dfbSAdam Langley goto clear_hash_noput; 105049a72dfbSAdam Langley desc = &hp->md5_desc; 105149a72dfbSAdam Langley 105249a72dfbSAdam Langley if (crypto_hash_init(desc)) 105349a72dfbSAdam Langley goto clear_hash; 105449a72dfbSAdam Langley if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2)) 105549a72dfbSAdam Langley goto clear_hash; 105649a72dfbSAdam Langley if (tcp_md5_hash_header(hp, th)) 105749a72dfbSAdam Langley goto clear_hash; 105849a72dfbSAdam Langley if (tcp_md5_hash_key(hp, key)) 105949a72dfbSAdam Langley goto clear_hash; 106049a72dfbSAdam Langley if (crypto_hash_final(desc, md5_hash)) 1061cfb6eeb4SYOSHIFUJI Hideaki goto clear_hash; 1062cfb6eeb4SYOSHIFUJI Hideaki 1063cfb6eeb4SYOSHIFUJI Hideaki tcp_put_md5sig_pool(); 1064cfb6eeb4SYOSHIFUJI Hideaki return 0; 106549a72dfbSAdam Langley 1066cfb6eeb4SYOSHIFUJI Hideaki clear_hash: 1067cfb6eeb4SYOSHIFUJI Hideaki tcp_put_md5sig_pool(); 1068cfb6eeb4SYOSHIFUJI Hideaki clear_hash_noput: 1069cfb6eeb4SYOSHIFUJI Hideaki memset(md5_hash, 0, 16); 107049a72dfbSAdam Langley return 1; 1071cfb6eeb4SYOSHIFUJI Hideaki } 1072cfb6eeb4SYOSHIFUJI Hideaki 107339f8e58eSEric Dumazet int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, 107439f8e58eSEric Dumazet const struct sock *sk, 1075318cf7aaSEric Dumazet const struct sk_buff *skb) 1076cfb6eeb4SYOSHIFUJI Hideaki { 107749a72dfbSAdam Langley struct tcp_md5sig_pool *hp; 107849a72dfbSAdam Langley struct hash_desc *desc; 1079318cf7aaSEric Dumazet const struct tcphdr *th = tcp_hdr(skb); 1080cfb6eeb4SYOSHIFUJI Hideaki __be32 saddr, daddr; 1081cfb6eeb4SYOSHIFUJI Hideaki 108239f8e58eSEric Dumazet if (sk) { /* valid for establish/request sockets */ 108339f8e58eSEric Dumazet saddr = sk->sk_rcv_saddr; 108439f8e58eSEric Dumazet daddr = sk->sk_daddr; 1085cfb6eeb4SYOSHIFUJI Hideaki } else { 108649a72dfbSAdam Langley const struct iphdr *iph = ip_hdr(skb); 108749a72dfbSAdam Langley saddr = iph->saddr; 108849a72dfbSAdam Langley daddr = iph->daddr; 1089cfb6eeb4SYOSHIFUJI Hideaki } 1090cfb6eeb4SYOSHIFUJI Hideaki 109149a72dfbSAdam Langley hp = tcp_get_md5sig_pool(); 109249a72dfbSAdam Langley if (!hp) 109349a72dfbSAdam Langley goto clear_hash_noput; 109449a72dfbSAdam Langley desc = &hp->md5_desc; 109549a72dfbSAdam Langley 109649a72dfbSAdam Langley if (crypto_hash_init(desc)) 109749a72dfbSAdam Langley goto clear_hash; 109849a72dfbSAdam Langley 109949a72dfbSAdam Langley if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len)) 110049a72dfbSAdam Langley goto clear_hash; 110149a72dfbSAdam Langley if (tcp_md5_hash_header(hp, th)) 110249a72dfbSAdam Langley goto clear_hash; 110349a72dfbSAdam Langley if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) 110449a72dfbSAdam Langley goto clear_hash; 110549a72dfbSAdam Langley if (tcp_md5_hash_key(hp, key)) 110649a72dfbSAdam Langley goto clear_hash; 110749a72dfbSAdam Langley if (crypto_hash_final(desc, md5_hash)) 110849a72dfbSAdam Langley goto clear_hash; 110949a72dfbSAdam Langley 111049a72dfbSAdam Langley tcp_put_md5sig_pool(); 111149a72dfbSAdam Langley return 0; 111249a72dfbSAdam Langley 111349a72dfbSAdam Langley clear_hash: 111449a72dfbSAdam Langley tcp_put_md5sig_pool(); 111549a72dfbSAdam Langley clear_hash_noput: 111649a72dfbSAdam Langley memset(md5_hash, 0, 16); 111749a72dfbSAdam Langley return 1; 111849a72dfbSAdam Langley } 111949a72dfbSAdam Langley EXPORT_SYMBOL(tcp_v4_md5_hash_skb); 1120cfb6eeb4SYOSHIFUJI Hideaki 1121ba8e275aSEric Dumazet #endif 1122ba8e275aSEric Dumazet 1123ff74e23fSEric Dumazet /* Called with rcu_read_lock() */ 1124ba8e275aSEric Dumazet static bool tcp_v4_inbound_md5_hash(const struct sock *sk, 11259ea88a15SDmitry Popov const struct sk_buff *skb) 1126cfb6eeb4SYOSHIFUJI Hideaki { 1127ba8e275aSEric Dumazet #ifdef CONFIG_TCP_MD5SIG 1128cfb6eeb4SYOSHIFUJI Hideaki /* 1129cfb6eeb4SYOSHIFUJI Hideaki * This gets called for each TCP segment that arrives 1130cfb6eeb4SYOSHIFUJI Hideaki * so we want to be efficient. 1131cfb6eeb4SYOSHIFUJI Hideaki * We have 3 drop cases: 1132cfb6eeb4SYOSHIFUJI Hideaki * o No MD5 hash and one expected. 1133cfb6eeb4SYOSHIFUJI Hideaki * o MD5 hash and we're not expecting one. 1134cfb6eeb4SYOSHIFUJI Hideaki * o MD5 hash and its wrong. 1135cfb6eeb4SYOSHIFUJI Hideaki */ 1136cf533ea5SEric Dumazet const __u8 *hash_location = NULL; 1137cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *hash_expected; 1138eddc9ec5SArnaldo Carvalho de Melo const struct iphdr *iph = ip_hdr(skb); 1139cf533ea5SEric Dumazet const struct tcphdr *th = tcp_hdr(skb); 1140cfb6eeb4SYOSHIFUJI Hideaki int genhash; 1141cfb6eeb4SYOSHIFUJI Hideaki unsigned char newhash[16]; 1142cfb6eeb4SYOSHIFUJI Hideaki 1143a915da9bSEric Dumazet hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr, 1144a915da9bSEric Dumazet AF_INET); 11457d5d5525SYOSHIFUJI Hideaki hash_location = tcp_parse_md5sig_option(th); 1146cfb6eeb4SYOSHIFUJI Hideaki 1147cfb6eeb4SYOSHIFUJI Hideaki /* We've parsed the options - do we have a hash? */ 1148cfb6eeb4SYOSHIFUJI Hideaki if (!hash_expected && !hash_location) 1149a2a385d6SEric Dumazet return false; 1150cfb6eeb4SYOSHIFUJI Hideaki 1151cfb6eeb4SYOSHIFUJI Hideaki if (hash_expected && !hash_location) { 1152785957d3SDavid S. Miller NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); 1153a2a385d6SEric Dumazet return true; 1154cfb6eeb4SYOSHIFUJI Hideaki } 1155cfb6eeb4SYOSHIFUJI Hideaki 1156cfb6eeb4SYOSHIFUJI Hideaki if (!hash_expected && hash_location) { 1157785957d3SDavid S. Miller NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); 1158a2a385d6SEric Dumazet return true; 1159cfb6eeb4SYOSHIFUJI Hideaki } 1160cfb6eeb4SYOSHIFUJI Hideaki 1161cfb6eeb4SYOSHIFUJI Hideaki /* Okay, so this is hash_expected and hash_location - 1162cfb6eeb4SYOSHIFUJI Hideaki * so we need to calculate the checksum. 1163cfb6eeb4SYOSHIFUJI Hideaki */ 116449a72dfbSAdam Langley genhash = tcp_v4_md5_hash_skb(newhash, 1165cfb6eeb4SYOSHIFUJI Hideaki hash_expected, 116639f8e58eSEric Dumazet NULL, skb); 1167cfb6eeb4SYOSHIFUJI Hideaki 1168cfb6eeb4SYOSHIFUJI Hideaki if (genhash || memcmp(hash_location, newhash, 16) != 0) { 1169e87cc472SJoe Perches net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n", 1170673d57e7SHarvey Harrison &iph->saddr, ntohs(th->source), 1171673d57e7SHarvey Harrison &iph->daddr, ntohs(th->dest), 1172e87cc472SJoe Perches genhash ? " tcp_v4_calc_md5_hash failed" 1173e87cc472SJoe Perches : ""); 1174a2a385d6SEric Dumazet return true; 1175cfb6eeb4SYOSHIFUJI Hideaki } 1176a2a385d6SEric Dumazet return false; 1177cfb6eeb4SYOSHIFUJI Hideaki #endif 1178ba8e275aSEric Dumazet return false; 1179ba8e275aSEric Dumazet } 1180cfb6eeb4SYOSHIFUJI Hideaki 1181b40cf18eSEric Dumazet static void tcp_v4_init_req(struct request_sock *req, 1182b40cf18eSEric Dumazet const struct sock *sk_listener, 118316bea70aSOctavian Purdila struct sk_buff *skb) 118416bea70aSOctavian Purdila { 118516bea70aSOctavian Purdila struct inet_request_sock *ireq = inet_rsk(req); 118616bea70aSOctavian Purdila 118708d2cc3bSEric Dumazet sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); 118808d2cc3bSEric Dumazet sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); 118908d2cc3bSEric Dumazet ireq->no_srccheck = inet_sk(sk_listener)->transparent; 119016bea70aSOctavian Purdila ireq->opt = tcp_v4_save_options(skb); 119116bea70aSOctavian Purdila } 119216bea70aSOctavian Purdila 1193f964629eSEric Dumazet static struct dst_entry *tcp_v4_route_req(const struct sock *sk, 1194f964629eSEric Dumazet struct flowi *fl, 1195d94e0417SOctavian Purdila const struct request_sock *req, 1196d94e0417SOctavian Purdila bool *strict) 1197d94e0417SOctavian Purdila { 1198d94e0417SOctavian Purdila struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req); 1199d94e0417SOctavian Purdila 1200d94e0417SOctavian Purdila if (strict) { 1201d94e0417SOctavian Purdila if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr) 1202d94e0417SOctavian Purdila *strict = true; 1203d94e0417SOctavian Purdila else 1204d94e0417SOctavian Purdila *strict = false; 1205d94e0417SOctavian Purdila } 1206d94e0417SOctavian Purdila 1207d94e0417SOctavian Purdila return dst; 1208d94e0417SOctavian Purdila } 1209d94e0417SOctavian Purdila 121072a3effaSEric Dumazet struct request_sock_ops tcp_request_sock_ops __read_mostly = { 12111da177e4SLinus Torvalds .family = PF_INET, 12122e6599cbSArnaldo Carvalho de Melo .obj_size = sizeof(struct tcp_request_sock), 12135db92c99SOctavian Purdila .rtx_syn_ack = tcp_rtx_synack, 121460236fddSArnaldo Carvalho de Melo .send_ack = tcp_v4_reqsk_send_ack, 121560236fddSArnaldo Carvalho de Melo .destructor = tcp_v4_reqsk_destructor, 12161da177e4SLinus Torvalds .send_reset = tcp_v4_send_reset, 121772659eccSOctavian Purdila .syn_ack_timeout = tcp_syn_ack_timeout, 12181da177e4SLinus Torvalds }; 12191da177e4SLinus Torvalds 1220b2e4b3deSStephen Hemminger static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { 12212aec4a29SOctavian Purdila .mss_clamp = TCP_MSS_DEFAULT, 122216bea70aSOctavian Purdila #ifdef CONFIG_TCP_MD5SIG 1223fd3a154aSEric Dumazet .req_md5_lookup = tcp_v4_md5_lookup, 1224e3afe7b7SJohn Dykstra .calc_md5_hash = tcp_v4_md5_hash_skb, 1225b6332e6cSAndrew Morton #endif 122616bea70aSOctavian Purdila .init_req = tcp_v4_init_req, 1227fb7b37a7SOctavian Purdila #ifdef CONFIG_SYN_COOKIES 1228fb7b37a7SOctavian Purdila .cookie_init_seq = cookie_v4_init_sequence, 1229fb7b37a7SOctavian Purdila #endif 1230d94e0417SOctavian Purdila .route_req = tcp_v4_route_req, 1231936b8bdbSOctavian Purdila .init_seq = tcp_v4_init_sequence, 1232d6274bd8SOctavian Purdila .send_synack = tcp_v4_send_synack, 123316bea70aSOctavian Purdila }; 1234cfb6eeb4SYOSHIFUJI Hideaki 12351da177e4SLinus Torvalds int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 12361da177e4SLinus Torvalds { 12371da177e4SLinus Torvalds /* Never answer to SYNs send to broadcast or multicast */ 1238511c3f92SEric Dumazet if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) 12391da177e4SLinus Torvalds goto drop; 12401da177e4SLinus Torvalds 12411fb6f159SOctavian Purdila return tcp_conn_request(&tcp_request_sock_ops, 12421fb6f159SOctavian Purdila &tcp_request_sock_ipv4_ops, sk, skb); 12431da177e4SLinus Torvalds 12441da177e4SLinus Torvalds drop: 1245848bf15fSVijay Subramanian NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 12461da177e4SLinus Torvalds return 0; 12471da177e4SLinus Torvalds } 12484bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_conn_request); 12491da177e4SLinus Torvalds 12501da177e4SLinus Torvalds 12511da177e4SLinus Torvalds /* 12521da177e4SLinus Torvalds * The three way handshake has completed - we got a valid synack - 12531da177e4SLinus Torvalds * now create the new socket. 12541da177e4SLinus Torvalds */ 12550c27171eSEric Dumazet struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, 125660236fddSArnaldo Carvalho de Melo struct request_sock *req, 12575e0724d0SEric Dumazet struct dst_entry *dst, 12585e0724d0SEric Dumazet struct request_sock *req_unhash, 12595e0724d0SEric Dumazet bool *own_req) 12601da177e4SLinus Torvalds { 12612e6599cbSArnaldo Carvalho de Melo struct inet_request_sock *ireq; 12621da177e4SLinus Torvalds struct inet_sock *newinet; 12631da177e4SLinus Torvalds struct tcp_sock *newtp; 12641da177e4SLinus Torvalds struct sock *newsk; 1265cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1266cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *key; 1267cfb6eeb4SYOSHIFUJI Hideaki #endif 1268f6d8bd05SEric Dumazet struct ip_options_rcu *inet_opt; 12691da177e4SLinus Torvalds 12701da177e4SLinus Torvalds if (sk_acceptq_is_full(sk)) 12711da177e4SLinus Torvalds goto exit_overflow; 12721da177e4SLinus Torvalds 12731da177e4SLinus Torvalds newsk = tcp_create_openreq_child(sk, req, skb); 12741da177e4SLinus Torvalds if (!newsk) 1275093d2823SBalazs Scheidler goto exit_nonewsk; 12761da177e4SLinus Torvalds 1277bcd76111SHerbert Xu newsk->sk_gso_type = SKB_GSO_TCPV4; 1278fae6ef87SNeal Cardwell inet_sk_rx_dst_set(newsk, skb); 12791da177e4SLinus Torvalds 12801da177e4SLinus Torvalds newtp = tcp_sk(newsk); 12811da177e4SLinus Torvalds newinet = inet_sk(newsk); 12822e6599cbSArnaldo Carvalho de Melo ireq = inet_rsk(req); 1283d1e559d0SEric Dumazet sk_daddr_set(newsk, ireq->ir_rmt_addr); 1284d1e559d0SEric Dumazet sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); 12856dd9a14eSDavid Ahern newsk->sk_bound_dev_if = ireq->ir_iif; 1286634fb979SEric Dumazet newinet->inet_saddr = ireq->ir_loc_addr; 1287f6d8bd05SEric Dumazet inet_opt = ireq->opt; 1288f6d8bd05SEric Dumazet rcu_assign_pointer(newinet->inet_opt, inet_opt); 12892e6599cbSArnaldo Carvalho de Melo ireq->opt = NULL; 1290463c84b9SArnaldo Carvalho de Melo newinet->mc_index = inet_iif(skb); 1291eddc9ec5SArnaldo Carvalho de Melo newinet->mc_ttl = ip_hdr(skb)->ttl; 12924c507d28SJiri Benc newinet->rcv_tos = ip_hdr(skb)->tos; 1293d83d8461SArnaldo Carvalho de Melo inet_csk(newsk)->icsk_ext_hdr_len = 0; 1294f6d8bd05SEric Dumazet if (inet_opt) 1295f6d8bd05SEric Dumazet inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; 1296c720c7e8SEric Dumazet newinet->inet_id = newtp->write_seq ^ jiffies; 12971da177e4SLinus Torvalds 1298dfd25fffSEric Dumazet if (!dst) { 1299dfd25fffSEric Dumazet dst = inet_csk_route_child_sock(sk, newsk, req); 1300dfd25fffSEric Dumazet if (!dst) 13010e734419SDavid S. Miller goto put_and_exit; 1302dfd25fffSEric Dumazet } else { 1303dfd25fffSEric Dumazet /* syncookie case : see end of cookie_v4_check() */ 1304dfd25fffSEric Dumazet } 13050e734419SDavid S. Miller sk_setup_caps(newsk, dst); 13060e734419SDavid S. Miller 130781164413SDaniel Borkmann tcp_ca_openreq_child(newsk, dst); 130881164413SDaniel Borkmann 13091da177e4SLinus Torvalds tcp_sync_mss(newsk, dst_mtu(dst)); 13100dbaee3bSDavid S. Miller newtp->advmss = dst_metric_advmss(dst); 1311f5fff5dcSTom Quetchenbach if (tcp_sk(sk)->rx_opt.user_mss && 1312f5fff5dcSTom Quetchenbach tcp_sk(sk)->rx_opt.user_mss < newtp->advmss) 1313f5fff5dcSTom Quetchenbach newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; 1314f5fff5dcSTom Quetchenbach 13151da177e4SLinus Torvalds tcp_initialize_rcv_mss(newsk); 13161da177e4SLinus Torvalds 1317cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1318cfb6eeb4SYOSHIFUJI Hideaki /* Copy over the MD5 key from the original socket */ 1319a915da9bSEric Dumazet key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr, 1320a915da9bSEric Dumazet AF_INET); 132100db4124SIan Morris if (key) { 1322cfb6eeb4SYOSHIFUJI Hideaki /* 1323cfb6eeb4SYOSHIFUJI Hideaki * We're using one, so create a matching key 1324cfb6eeb4SYOSHIFUJI Hideaki * on the newsk structure. If we fail to get 1325cfb6eeb4SYOSHIFUJI Hideaki * memory, then we end up not copying the key 1326cfb6eeb4SYOSHIFUJI Hideaki * across. Shucks. 1327cfb6eeb4SYOSHIFUJI Hideaki */ 1328a915da9bSEric Dumazet tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr, 1329a915da9bSEric Dumazet AF_INET, key->key, key->keylen, GFP_ATOMIC); 1330a465419bSEric Dumazet sk_nocaps_add(newsk, NETIF_F_GSO_MASK); 1331cfb6eeb4SYOSHIFUJI Hideaki } 1332cfb6eeb4SYOSHIFUJI Hideaki #endif 1333cfb6eeb4SYOSHIFUJI Hideaki 13340e734419SDavid S. Miller if (__inet_inherit_port(sk, newsk) < 0) 13350e734419SDavid S. Miller goto put_and_exit; 13365e0724d0SEric Dumazet *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); 1337805c4bc0SEric Dumazet if (*own_req) 133849a496c9SEric Dumazet tcp_move_syn(newtp, req); 13391da177e4SLinus Torvalds 13401da177e4SLinus Torvalds return newsk; 13411da177e4SLinus Torvalds 13421da177e4SLinus Torvalds exit_overflow: 1343de0744afSPavel Emelyanov NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1344093d2823SBalazs Scheidler exit_nonewsk: 1345093d2823SBalazs Scheidler dst_release(dst); 13461da177e4SLinus Torvalds exit: 1347de0744afSPavel Emelyanov NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 13481da177e4SLinus Torvalds return NULL; 13490e734419SDavid S. Miller put_and_exit: 1350e337e24dSChristoph Paasch inet_csk_prepare_forced_close(newsk); 1351e337e24dSChristoph Paasch tcp_done(newsk); 13520e734419SDavid S. Miller goto exit; 13531da177e4SLinus Torvalds } 13544bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_syn_recv_sock); 13551da177e4SLinus Torvalds 1356079096f1SEric Dumazet static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb) 13571da177e4SLinus Torvalds { 13581da177e4SLinus Torvalds #ifdef CONFIG_SYN_COOKIES 1359079096f1SEric Dumazet const struct tcphdr *th = tcp_hdr(skb); 1360079096f1SEric Dumazet 1361af9b4738SFlorian Westphal if (!th->syn) 1362461b74c3SCong Wang sk = cookie_v4_check(sk, skb); 13631da177e4SLinus Torvalds #endif 13641da177e4SLinus Torvalds return sk; 13651da177e4SLinus Torvalds } 13661da177e4SLinus Torvalds 13671da177e4SLinus Torvalds /* The socket must have it's spinlock held when we get 1368e994b2f0SEric Dumazet * here, unless it is a TCP_LISTEN socket. 13691da177e4SLinus Torvalds * 13701da177e4SLinus Torvalds * We have a potential double-lock case here, so even when 13711da177e4SLinus Torvalds * doing backlog processing we use the BH locking scheme. 13721da177e4SLinus Torvalds * This is because we cannot sleep with the original spinlock 13731da177e4SLinus Torvalds * held. 13741da177e4SLinus Torvalds */ 13751da177e4SLinus Torvalds int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) 13761da177e4SLinus Torvalds { 1377cfb6eeb4SYOSHIFUJI Hideaki struct sock *rsk; 1378cfb6eeb4SYOSHIFUJI Hideaki 13791da177e4SLinus Torvalds if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 138092101b3bSDavid S. Miller struct dst_entry *dst = sk->sk_rx_dst; 1381404e0a8bSEric Dumazet 1382404e0a8bSEric Dumazet sock_rps_save_rxhash(sk, skb); 13833d97379aSEric Dumazet sk_mark_napi_id(sk, skb); 1384404e0a8bSEric Dumazet if (dst) { 1385505fbcf0SEric Dumazet if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || 138651456b29SIan Morris !dst->ops->check(dst, 0)) { 138792101b3bSDavid S. Miller dst_release(dst); 138892101b3bSDavid S. Miller sk->sk_rx_dst = NULL; 138992101b3bSDavid S. Miller } 139092101b3bSDavid S. Miller } 1391c995ae22SVijay Subramanian tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len); 13921da177e4SLinus Torvalds return 0; 13931da177e4SLinus Torvalds } 13941da177e4SLinus Torvalds 139512e25e10SEric Dumazet if (tcp_checksum_complete(skb)) 13961da177e4SLinus Torvalds goto csum_err; 13971da177e4SLinus Torvalds 13981da177e4SLinus Torvalds if (sk->sk_state == TCP_LISTEN) { 1399079096f1SEric Dumazet struct sock *nsk = tcp_v4_cookie_check(sk, skb); 1400079096f1SEric Dumazet 14011da177e4SLinus Torvalds if (!nsk) 14021da177e4SLinus Torvalds goto discard; 14031da177e4SLinus Torvalds if (nsk != sk) { 1404bdeab991STom Herbert sock_rps_save_rxhash(nsk, skb); 140538cb5245SEric Dumazet sk_mark_napi_id(nsk, skb); 1406cfb6eeb4SYOSHIFUJI Hideaki if (tcp_child_process(sk, nsk, skb)) { 1407cfb6eeb4SYOSHIFUJI Hideaki rsk = nsk; 14081da177e4SLinus Torvalds goto reset; 1409cfb6eeb4SYOSHIFUJI Hideaki } 14101da177e4SLinus Torvalds return 0; 14111da177e4SLinus Torvalds } 1412ca55158cSEric Dumazet } else 1413bdeab991STom Herbert sock_rps_save_rxhash(sk, skb); 1414ca55158cSEric Dumazet 141572ab4a86SEric Dumazet if (tcp_rcv_state_process(sk, skb)) { 1416cfb6eeb4SYOSHIFUJI Hideaki rsk = sk; 14171da177e4SLinus Torvalds goto reset; 1418cfb6eeb4SYOSHIFUJI Hideaki } 14191da177e4SLinus Torvalds return 0; 14201da177e4SLinus Torvalds 14211da177e4SLinus Torvalds reset: 1422cfb6eeb4SYOSHIFUJI Hideaki tcp_v4_send_reset(rsk, skb); 14231da177e4SLinus Torvalds discard: 14241da177e4SLinus Torvalds kfree_skb(skb); 14251da177e4SLinus Torvalds /* Be careful here. If this function gets more complicated and 14261da177e4SLinus Torvalds * gcc suffers from register pressure on the x86, sk (in %ebx) 14271da177e4SLinus Torvalds * might be destroyed here. This current version compiles correctly, 14281da177e4SLinus Torvalds * but you have been warned. 14291da177e4SLinus Torvalds */ 14301da177e4SLinus Torvalds return 0; 14311da177e4SLinus Torvalds 14321da177e4SLinus Torvalds csum_err: 14336a5dc9e5SEric Dumazet TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS); 143463231bddSPavel Emelyanov TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); 14351da177e4SLinus Torvalds goto discard; 14361da177e4SLinus Torvalds } 14374bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_do_rcv); 14381da177e4SLinus Torvalds 1439160eb5a6SDavid S. Miller void tcp_v4_early_demux(struct sk_buff *skb) 144041063e9dSDavid S. Miller { 144141063e9dSDavid S. Miller const struct iphdr *iph; 144241063e9dSDavid S. Miller const struct tcphdr *th; 144341063e9dSDavid S. Miller struct sock *sk; 144441063e9dSDavid S. Miller 144541063e9dSDavid S. Miller if (skb->pkt_type != PACKET_HOST) 1446160eb5a6SDavid S. Miller return; 144741063e9dSDavid S. Miller 144845f00f99SEric Dumazet if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) 1449160eb5a6SDavid S. Miller return; 145041063e9dSDavid S. Miller 145141063e9dSDavid S. Miller iph = ip_hdr(skb); 145245f00f99SEric Dumazet th = tcp_hdr(skb); 145341063e9dSDavid S. Miller 145441063e9dSDavid S. Miller if (th->doff < sizeof(struct tcphdr) / 4) 1455160eb5a6SDavid S. Miller return; 145641063e9dSDavid S. Miller 145745f00f99SEric Dumazet sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo, 145841063e9dSDavid S. Miller iph->saddr, th->source, 14597011d085SVijay Subramanian iph->daddr, ntohs(th->dest), 14609cb429d6SEric Dumazet skb->skb_iif); 146141063e9dSDavid S. Miller if (sk) { 146241063e9dSDavid S. Miller skb->sk = sk; 146341063e9dSDavid S. Miller skb->destructor = sock_edemux; 1464f7e4eb03SEric Dumazet if (sk_fullsock(sk)) { 1465d0c294c5SMichal Kubeček struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst); 1466505fbcf0SEric Dumazet 146741063e9dSDavid S. Miller if (dst) 146841063e9dSDavid S. Miller dst = dst_check(dst, 0); 146992101b3bSDavid S. Miller if (dst && 1470505fbcf0SEric Dumazet inet_sk(sk)->rx_dst_ifindex == skb->skb_iif) 147141063e9dSDavid S. Miller skb_dst_set_noref(skb, dst); 147241063e9dSDavid S. Miller } 147341063e9dSDavid S. Miller } 147441063e9dSDavid S. Miller } 147541063e9dSDavid S. Miller 1476b2fb4f54SEric Dumazet /* Packet is added to VJ-style prequeue for processing in process 1477b2fb4f54SEric Dumazet * context, if a reader task is waiting. Apparently, this exciting 1478b2fb4f54SEric Dumazet * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93) 1479b2fb4f54SEric Dumazet * failed somewhere. Latency? Burstiness? Well, at least now we will 1480b2fb4f54SEric Dumazet * see, why it failed. 8)8) --ANK 1481b2fb4f54SEric Dumazet * 1482b2fb4f54SEric Dumazet */ 1483b2fb4f54SEric Dumazet bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) 1484b2fb4f54SEric Dumazet { 1485b2fb4f54SEric Dumazet struct tcp_sock *tp = tcp_sk(sk); 1486b2fb4f54SEric Dumazet 1487b2fb4f54SEric Dumazet if (sysctl_tcp_low_latency || !tp->ucopy.task) 1488b2fb4f54SEric Dumazet return false; 1489b2fb4f54SEric Dumazet 1490b2fb4f54SEric Dumazet if (skb->len <= tcp_hdrlen(skb) && 1491b2fb4f54SEric Dumazet skb_queue_len(&tp->ucopy.prequeue) == 0) 1492b2fb4f54SEric Dumazet return false; 1493b2fb4f54SEric Dumazet 1494ca777effSEric Dumazet /* Before escaping RCU protected region, we need to take care of skb 1495ca777effSEric Dumazet * dst. Prequeue is only enabled for established sockets. 1496ca777effSEric Dumazet * For such sockets, we might need the skb dst only to set sk->sk_rx_dst 1497ca777effSEric Dumazet * Instead of doing full sk_rx_dst validity here, let's perform 1498ca777effSEric Dumazet * an optimistic check. 1499ca777effSEric Dumazet */ 1500ca777effSEric Dumazet if (likely(sk->sk_rx_dst)) 1501ca777effSEric Dumazet skb_dst_drop(skb); 1502ca777effSEric Dumazet else 15035037e9efSEric Dumazet skb_dst_force_safe(skb); 1504ca777effSEric Dumazet 1505b2fb4f54SEric Dumazet __skb_queue_tail(&tp->ucopy.prequeue, skb); 1506b2fb4f54SEric Dumazet tp->ucopy.memory += skb->truesize; 1507b2fb4f54SEric Dumazet if (tp->ucopy.memory > sk->sk_rcvbuf) { 1508b2fb4f54SEric Dumazet struct sk_buff *skb1; 1509b2fb4f54SEric Dumazet 1510b2fb4f54SEric Dumazet BUG_ON(sock_owned_by_user(sk)); 1511b2fb4f54SEric Dumazet 1512b2fb4f54SEric Dumazet while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) { 1513b2fb4f54SEric Dumazet sk_backlog_rcv(sk, skb1); 1514b2fb4f54SEric Dumazet NET_INC_STATS_BH(sock_net(sk), 1515b2fb4f54SEric Dumazet LINUX_MIB_TCPPREQUEUEDROPPED); 1516b2fb4f54SEric Dumazet } 1517b2fb4f54SEric Dumazet 1518b2fb4f54SEric Dumazet tp->ucopy.memory = 0; 1519b2fb4f54SEric Dumazet } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { 1520b2fb4f54SEric Dumazet wake_up_interruptible_sync_poll(sk_sleep(sk), 1521b2fb4f54SEric Dumazet POLLIN | POLLRDNORM | POLLRDBAND); 1522b2fb4f54SEric Dumazet if (!inet_csk_ack_scheduled(sk)) 1523b2fb4f54SEric Dumazet inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 1524b2fb4f54SEric Dumazet (3 * tcp_rto_min(sk)) / 4, 1525b2fb4f54SEric Dumazet TCP_RTO_MAX); 1526b2fb4f54SEric Dumazet } 1527b2fb4f54SEric Dumazet return true; 1528b2fb4f54SEric Dumazet } 1529b2fb4f54SEric Dumazet EXPORT_SYMBOL(tcp_prequeue); 1530b2fb4f54SEric Dumazet 15311da177e4SLinus Torvalds /* 15321da177e4SLinus Torvalds * From tcp_input.c 15331da177e4SLinus Torvalds */ 15341da177e4SLinus Torvalds 15351da177e4SLinus Torvalds int tcp_v4_rcv(struct sk_buff *skb) 15361da177e4SLinus Torvalds { 1537eddc9ec5SArnaldo Carvalho de Melo const struct iphdr *iph; 1538cf533ea5SEric Dumazet const struct tcphdr *th; 15391da177e4SLinus Torvalds struct sock *sk; 15401da177e4SLinus Torvalds int ret; 1541a86b1e30SPavel Emelyanov struct net *net = dev_net(skb->dev); 15421da177e4SLinus Torvalds 15431da177e4SLinus Torvalds if (skb->pkt_type != PACKET_HOST) 15441da177e4SLinus Torvalds goto discard_it; 15451da177e4SLinus Torvalds 15461da177e4SLinus Torvalds /* Count it even if it's bad */ 154763231bddSPavel Emelyanov TCP_INC_STATS_BH(net, TCP_MIB_INSEGS); 15481da177e4SLinus Torvalds 15491da177e4SLinus Torvalds if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 15501da177e4SLinus Torvalds goto discard_it; 15511da177e4SLinus Torvalds 1552aa8223c7SArnaldo Carvalho de Melo th = tcp_hdr(skb); 15531da177e4SLinus Torvalds 15541da177e4SLinus Torvalds if (th->doff < sizeof(struct tcphdr) / 4) 15551da177e4SLinus Torvalds goto bad_packet; 15561da177e4SLinus Torvalds if (!pskb_may_pull(skb, th->doff * 4)) 15571da177e4SLinus Torvalds goto discard_it; 15581da177e4SLinus Torvalds 15591da177e4SLinus Torvalds /* An explanation is required here, I think. 15601da177e4SLinus Torvalds * Packet length and doff are validated by header prediction, 1561caa20d9aSStephen Hemminger * provided case of th->doff==0 is eliminated. 15621da177e4SLinus Torvalds * So, we defer the checks. */ 1563ed70fcfcSTom Herbert 1564ed70fcfcSTom Herbert if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo)) 15656a5dc9e5SEric Dumazet goto csum_error; 15661da177e4SLinus Torvalds 1567aa8223c7SArnaldo Carvalho de Melo th = tcp_hdr(skb); 1568eddc9ec5SArnaldo Carvalho de Melo iph = ip_hdr(skb); 1569971f10ecSEric Dumazet /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB() 1570971f10ecSEric Dumazet * barrier() makes sure compiler wont play fool^Waliasing games. 1571971f10ecSEric Dumazet */ 1572971f10ecSEric Dumazet memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb), 1573971f10ecSEric Dumazet sizeof(struct inet_skb_parm)); 1574971f10ecSEric Dumazet barrier(); 1575971f10ecSEric Dumazet 15761da177e4SLinus Torvalds TCP_SKB_CB(skb)->seq = ntohl(th->seq); 15771da177e4SLinus Torvalds TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 15781da177e4SLinus Torvalds skb->len - th->doff * 4); 15791da177e4SLinus Torvalds TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1580e11ecddfSEric Dumazet TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); 158104317dafSEric Dumazet TCP_SKB_CB(skb)->tcp_tw_isn = 0; 1582b82d1bb4SEric Dumazet TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); 15831da177e4SLinus Torvalds TCP_SKB_CB(skb)->sacked = 0; 15841da177e4SLinus Torvalds 15854bdc3d66SEric Dumazet lookup: 15869a1f27c4SArnaldo Carvalho de Melo sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); 15871da177e4SLinus Torvalds if (!sk) 15881da177e4SLinus Torvalds goto no_tcp_socket; 15891da177e4SLinus Torvalds 1590bb134d5dSEric Dumazet process: 1591bb134d5dSEric Dumazet if (sk->sk_state == TCP_TIME_WAIT) 1592bb134d5dSEric Dumazet goto do_time_wait; 1593bb134d5dSEric Dumazet 1594079096f1SEric Dumazet if (sk->sk_state == TCP_NEW_SYN_RECV) { 1595079096f1SEric Dumazet struct request_sock *req = inet_reqsk(sk); 1596079096f1SEric Dumazet struct sock *nsk = NULL; 1597079096f1SEric Dumazet 1598079096f1SEric Dumazet sk = req->rsk_listener; 1599079096f1SEric Dumazet if (tcp_v4_inbound_md5_hash(sk, skb)) 1600079096f1SEric Dumazet goto discard_and_relse; 16014bdc3d66SEric Dumazet if (likely(sk->sk_state == TCP_LISTEN)) { 1602079096f1SEric Dumazet nsk = tcp_check_req(sk, skb, req, false); 16034bdc3d66SEric Dumazet } else { 1604f03f2e15SEric Dumazet inet_csk_reqsk_queue_drop_and_put(sk, req); 16054bdc3d66SEric Dumazet goto lookup; 16064bdc3d66SEric Dumazet } 1607079096f1SEric Dumazet if (!nsk) { 1608079096f1SEric Dumazet reqsk_put(req); 1609079096f1SEric Dumazet goto discard_it; 1610079096f1SEric Dumazet } 1611079096f1SEric Dumazet if (nsk == sk) { 1612079096f1SEric Dumazet sock_hold(sk); 1613079096f1SEric Dumazet reqsk_put(req); 1614079096f1SEric Dumazet } else if (tcp_child_process(sk, nsk, skb)) { 1615079096f1SEric Dumazet tcp_v4_send_reset(nsk, skb); 1616079096f1SEric Dumazet goto discard_it; 1617079096f1SEric Dumazet } else { 1618079096f1SEric Dumazet return 0; 1619079096f1SEric Dumazet } 1620079096f1SEric Dumazet } 16216cce09f8SEric Dumazet if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { 16226cce09f8SEric Dumazet NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); 1623d218d111SStephen Hemminger goto discard_and_relse; 16246cce09f8SEric Dumazet } 1625d218d111SStephen Hemminger 16261da177e4SLinus Torvalds if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) 16271da177e4SLinus Torvalds goto discard_and_relse; 16289ea88a15SDmitry Popov 16299ea88a15SDmitry Popov if (tcp_v4_inbound_md5_hash(sk, skb)) 16309ea88a15SDmitry Popov goto discard_and_relse; 16319ea88a15SDmitry Popov 1632b59c2701SPatrick McHardy nf_reset(skb); 16331da177e4SLinus Torvalds 1634fda9ef5dSDmitry Mishin if (sk_filter(sk, skb)) 16351da177e4SLinus Torvalds goto discard_and_relse; 16361da177e4SLinus Torvalds 16371da177e4SLinus Torvalds skb->dev = NULL; 16381da177e4SLinus Torvalds 1639e994b2f0SEric Dumazet if (sk->sk_state == TCP_LISTEN) { 1640e994b2f0SEric Dumazet ret = tcp_v4_do_rcv(sk, skb); 1641e994b2f0SEric Dumazet goto put_and_return; 1642e994b2f0SEric Dumazet } 1643e994b2f0SEric Dumazet 1644e994b2f0SEric Dumazet sk_incoming_cpu_update(sk); 1645e994b2f0SEric Dumazet 1646c6366184SIngo Molnar bh_lock_sock_nested(sk); 16472efd055cSMarcelo Ricardo Leitner tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); 16481da177e4SLinus Torvalds ret = 0; 16491da177e4SLinus Torvalds if (!sock_owned_by_user(sk)) { 16501da177e4SLinus Torvalds if (!tcp_prequeue(sk, skb)) 16511da177e4SLinus Torvalds ret = tcp_v4_do_rcv(sk, skb); 1652da882c1fSEric Dumazet } else if (unlikely(sk_add_backlog(sk, skb, 1653da882c1fSEric Dumazet sk->sk_rcvbuf + sk->sk_sndbuf))) { 16546b03a53aSZhu Yi bh_unlock_sock(sk); 16556cce09f8SEric Dumazet NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); 16566b03a53aSZhu Yi goto discard_and_relse; 16576b03a53aSZhu Yi } 16581da177e4SLinus Torvalds bh_unlock_sock(sk); 16591da177e4SLinus Torvalds 1660e994b2f0SEric Dumazet put_and_return: 16611da177e4SLinus Torvalds sock_put(sk); 16621da177e4SLinus Torvalds 16631da177e4SLinus Torvalds return ret; 16641da177e4SLinus Torvalds 16651da177e4SLinus Torvalds no_tcp_socket: 16661da177e4SLinus Torvalds if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 16671da177e4SLinus Torvalds goto discard_it; 16681da177e4SLinus Torvalds 166912e25e10SEric Dumazet if (tcp_checksum_complete(skb)) { 16706a5dc9e5SEric Dumazet csum_error: 16716a5dc9e5SEric Dumazet TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS); 16721da177e4SLinus Torvalds bad_packet: 167363231bddSPavel Emelyanov TCP_INC_STATS_BH(net, TCP_MIB_INERRS); 16741da177e4SLinus Torvalds } else { 1675cfb6eeb4SYOSHIFUJI Hideaki tcp_v4_send_reset(NULL, skb); 16761da177e4SLinus Torvalds } 16771da177e4SLinus Torvalds 16781da177e4SLinus Torvalds discard_it: 16791da177e4SLinus Torvalds /* Discard frame. */ 16801da177e4SLinus Torvalds kfree_skb(skb); 16811da177e4SLinus Torvalds return 0; 16821da177e4SLinus Torvalds 16831da177e4SLinus Torvalds discard_and_relse: 16841da177e4SLinus Torvalds sock_put(sk); 16851da177e4SLinus Torvalds goto discard_it; 16861da177e4SLinus Torvalds 16871da177e4SLinus Torvalds do_time_wait: 16881da177e4SLinus Torvalds if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 16899469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(sk)); 16901da177e4SLinus Torvalds goto discard_it; 16911da177e4SLinus Torvalds } 16921da177e4SLinus Torvalds 16936a5dc9e5SEric Dumazet if (tcp_checksum_complete(skb)) { 16946a5dc9e5SEric Dumazet inet_twsk_put(inet_twsk(sk)); 16956a5dc9e5SEric Dumazet goto csum_error; 16961da177e4SLinus Torvalds } 16979469c7b4SYOSHIFUJI Hideaki switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 16981da177e4SLinus Torvalds case TCP_TW_SYN: { 1699c346dca1SYOSHIFUJI Hideaki struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev), 1700c67499c0SPavel Emelyanov &tcp_hashinfo, 1701da5e3630STom Herbert iph->saddr, th->source, 1702eddc9ec5SArnaldo Carvalho de Melo iph->daddr, th->dest, 1703463c84b9SArnaldo Carvalho de Melo inet_iif(skb)); 17041da177e4SLinus Torvalds if (sk2) { 1705dbe7faa4SEric Dumazet inet_twsk_deschedule_put(inet_twsk(sk)); 17061da177e4SLinus Torvalds sk = sk2; 17071da177e4SLinus Torvalds goto process; 17081da177e4SLinus Torvalds } 17091da177e4SLinus Torvalds /* Fall through to ACK */ 17101da177e4SLinus Torvalds } 17111da177e4SLinus Torvalds case TCP_TW_ACK: 17121da177e4SLinus Torvalds tcp_v4_timewait_ack(sk, skb); 17131da177e4SLinus Torvalds break; 17141da177e4SLinus Torvalds case TCP_TW_RST: 1715271c3b9bSFlorian Westphal tcp_v4_send_reset(sk, skb); 1716271c3b9bSFlorian Westphal inet_twsk_deschedule_put(inet_twsk(sk)); 1717271c3b9bSFlorian Westphal goto discard_it; 17181da177e4SLinus Torvalds case TCP_TW_SUCCESS:; 17191da177e4SLinus Torvalds } 17201da177e4SLinus Torvalds goto discard_it; 17211da177e4SLinus Torvalds } 17221da177e4SLinus Torvalds 1723ccb7c410SDavid S. Miller static struct timewait_sock_ops tcp_timewait_sock_ops = { 1724ccb7c410SDavid S. Miller .twsk_obj_size = sizeof(struct tcp_timewait_sock), 1725ccb7c410SDavid S. Miller .twsk_unique = tcp_twsk_unique, 1726ccb7c410SDavid S. Miller .twsk_destructor= tcp_twsk_destructor, 1727ccb7c410SDavid S. Miller }; 17281da177e4SLinus Torvalds 172963d02d15SEric Dumazet void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) 17305d299f3dSEric Dumazet { 17315d299f3dSEric Dumazet struct dst_entry *dst = skb_dst(skb); 17325d299f3dSEric Dumazet 17335037e9efSEric Dumazet if (dst && dst_hold_safe(dst)) { 17345d299f3dSEric Dumazet sk->sk_rx_dst = dst; 17355d299f3dSEric Dumazet inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; 17365d299f3dSEric Dumazet } 1737ca777effSEric Dumazet } 173863d02d15SEric Dumazet EXPORT_SYMBOL(inet_sk_rx_dst_set); 17395d299f3dSEric Dumazet 17403b401a81SStephen Hemminger const struct inet_connection_sock_af_ops ipv4_specific = { 17411da177e4SLinus Torvalds .queue_xmit = ip_queue_xmit, 17421da177e4SLinus Torvalds .send_check = tcp_v4_send_check, 174332519f11SArnaldo Carvalho de Melo .rebuild_header = inet_sk_rebuild_header, 17445d299f3dSEric Dumazet .sk_rx_dst_set = inet_sk_rx_dst_set, 17451da177e4SLinus Torvalds .conn_request = tcp_v4_conn_request, 17461da177e4SLinus Torvalds .syn_recv_sock = tcp_v4_syn_recv_sock, 17471da177e4SLinus Torvalds .net_header_len = sizeof(struct iphdr), 17481da177e4SLinus Torvalds .setsockopt = ip_setsockopt, 17491da177e4SLinus Torvalds .getsockopt = ip_getsockopt, 1750543d9cfeSArnaldo Carvalho de Melo .addr2sockaddr = inet_csk_addr2sockaddr, 1751543d9cfeSArnaldo Carvalho de Melo .sockaddr_len = sizeof(struct sockaddr_in), 1752ab1e0a13SArnaldo Carvalho de Melo .bind_conflict = inet_csk_bind_conflict, 17533fdadf7dSDmitry Mishin #ifdef CONFIG_COMPAT 17543fdadf7dSDmitry Mishin .compat_setsockopt = compat_ip_setsockopt, 17553fdadf7dSDmitry Mishin .compat_getsockopt = compat_ip_getsockopt, 17563fdadf7dSDmitry Mishin #endif 17574fab9071SNeal Cardwell .mtu_reduced = tcp_v4_mtu_reduced, 17581da177e4SLinus Torvalds }; 17594bc2f18bSEric Dumazet EXPORT_SYMBOL(ipv4_specific); 17601da177e4SLinus Torvalds 1761cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1762b2e4b3deSStephen Hemminger static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { 1763cfb6eeb4SYOSHIFUJI Hideaki .md5_lookup = tcp_v4_md5_lookup, 176449a72dfbSAdam Langley .calc_md5_hash = tcp_v4_md5_hash_skb, 1765cfb6eeb4SYOSHIFUJI Hideaki .md5_parse = tcp_v4_parse_md5_keys, 1766cfb6eeb4SYOSHIFUJI Hideaki }; 1767b6332e6cSAndrew Morton #endif 1768cfb6eeb4SYOSHIFUJI Hideaki 17691da177e4SLinus Torvalds /* NOTE: A lot of things set to zero explicitly by call to 17701da177e4SLinus Torvalds * sk_alloc() so need not be done here. 17711da177e4SLinus Torvalds */ 17721da177e4SLinus Torvalds static int tcp_v4_init_sock(struct sock *sk) 17731da177e4SLinus Torvalds { 17746687e988SArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 17751da177e4SLinus Torvalds 1776900f65d3SNeal Cardwell tcp_init_sock(sk); 17771da177e4SLinus Torvalds 17788292a17aSArnaldo Carvalho de Melo icsk->icsk_af_ops = &ipv4_specific; 1779900f65d3SNeal Cardwell 1780cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1781ac807fa8SDavid S. Miller tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific; 1782cfb6eeb4SYOSHIFUJI Hideaki #endif 17831da177e4SLinus Torvalds 17841da177e4SLinus Torvalds return 0; 17851da177e4SLinus Torvalds } 17861da177e4SLinus Torvalds 17877d06b2e0SBrian Haley void tcp_v4_destroy_sock(struct sock *sk) 17881da177e4SLinus Torvalds { 17891da177e4SLinus Torvalds struct tcp_sock *tp = tcp_sk(sk); 17901da177e4SLinus Torvalds 17911da177e4SLinus Torvalds tcp_clear_xmit_timers(sk); 17921da177e4SLinus Torvalds 17936687e988SArnaldo Carvalho de Melo tcp_cleanup_congestion_control(sk); 1794317a76f9SStephen Hemminger 17951da177e4SLinus Torvalds /* Cleanup up the write buffer. */ 1796fe067e8aSDavid S. Miller tcp_write_queue_purge(sk); 17971da177e4SLinus Torvalds 17981da177e4SLinus Torvalds /* Cleans up our, hopefully empty, out_of_order_queue. */ 17991da177e4SLinus Torvalds __skb_queue_purge(&tp->out_of_order_queue); 18001da177e4SLinus Torvalds 1801cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1802cfb6eeb4SYOSHIFUJI Hideaki /* Clean up the MD5 key list, if any */ 1803cfb6eeb4SYOSHIFUJI Hideaki if (tp->md5sig_info) { 1804a915da9bSEric Dumazet tcp_clear_md5_list(sk); 1805a8afca03SEric Dumazet kfree_rcu(tp->md5sig_info, rcu); 1806cfb6eeb4SYOSHIFUJI Hideaki tp->md5sig_info = NULL; 1807cfb6eeb4SYOSHIFUJI Hideaki } 1808cfb6eeb4SYOSHIFUJI Hideaki #endif 1809cfb6eeb4SYOSHIFUJI Hideaki 18101da177e4SLinus Torvalds /* Clean prequeue, it must be empty really */ 18111da177e4SLinus Torvalds __skb_queue_purge(&tp->ucopy.prequeue); 18121da177e4SLinus Torvalds 18131da177e4SLinus Torvalds /* Clean up a referenced TCP bind bucket. */ 1814463c84b9SArnaldo Carvalho de Melo if (inet_csk(sk)->icsk_bind_hash) 1815ab1e0a13SArnaldo Carvalho de Melo inet_put_port(sk); 18161da177e4SLinus Torvalds 181700db4124SIan Morris BUG_ON(tp->fastopen_rsk); 1818435cf559SWilliam Allen Simpson 1819cf60af03SYuchung Cheng /* If socket is aborted during connect operation */ 1820cf60af03SYuchung Cheng tcp_free_fastopen_req(tp); 1821cd8ae852SEric Dumazet tcp_saved_syn_free(tp); 1822cf60af03SYuchung Cheng 1823180d8cd9SGlauber Costa sk_sockets_allocated_dec(sk); 18243d596f7bSJohannes Weiner 1825baac50bbSJohannes Weiner if (mem_cgroup_sockets_enabled && sk->sk_memcg) 1826d1a4c0b3SGlauber Costa sock_release_memcg(sk); 18271da177e4SLinus Torvalds } 18281da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_destroy_sock); 18291da177e4SLinus Torvalds 18301da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS 18311da177e4SLinus Torvalds /* Proc filesystem TCP sock list dumping. */ 18321da177e4SLinus Torvalds 1833a8b690f9STom Herbert /* 1834a8b690f9STom Herbert * Get next listener socket follow cur. If cur is NULL, get first socket 1835a8b690f9STom Herbert * starting from bucket given in st->bucket; when st->bucket is zero the 1836a8b690f9STom Herbert * very first socket in the hash table is returned. 1837a8b690f9STom Herbert */ 18381da177e4SLinus Torvalds static void *listening_get_next(struct seq_file *seq, void *cur) 18391da177e4SLinus Torvalds { 1840463c84b9SArnaldo Carvalho de Melo struct inet_connection_sock *icsk; 1841c25eb3bfSEric Dumazet struct hlist_nulls_node *node; 18421da177e4SLinus Torvalds struct sock *sk = cur; 18435caea4eaSEric Dumazet struct inet_listen_hashbucket *ilb; 18441da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 1845a4146b1bSDenis V. Lunev struct net *net = seq_file_net(seq); 18461da177e4SLinus Torvalds 18471da177e4SLinus Torvalds if (!sk) { 1848a8b690f9STom Herbert ilb = &tcp_hashinfo.listening_hash[st->bucket]; 18495caea4eaSEric Dumazet spin_lock_bh(&ilb->lock); 1850c25eb3bfSEric Dumazet sk = sk_nulls_head(&ilb->head); 1851a8b690f9STom Herbert st->offset = 0; 18521da177e4SLinus Torvalds goto get_sk; 18531da177e4SLinus Torvalds } 18545caea4eaSEric Dumazet ilb = &tcp_hashinfo.listening_hash[st->bucket]; 18551da177e4SLinus Torvalds ++st->num; 1856a8b690f9STom Herbert ++st->offset; 18571da177e4SLinus Torvalds 18581bde5ac4SEric Dumazet sk = sk_nulls_next(sk); 18591da177e4SLinus Torvalds get_sk: 1860c25eb3bfSEric Dumazet sk_nulls_for_each_from(sk, node) { 18618475ef9fSPavel Emelyanov if (!net_eq(sock_net(sk), net)) 18628475ef9fSPavel Emelyanov continue; 18638475ef9fSPavel Emelyanov if (sk->sk_family == st->family) { 18641da177e4SLinus Torvalds cur = sk; 18651da177e4SLinus Torvalds goto out; 18661da177e4SLinus Torvalds } 1867463c84b9SArnaldo Carvalho de Melo icsk = inet_csk(sk); 18681da177e4SLinus Torvalds } 18695caea4eaSEric Dumazet spin_unlock_bh(&ilb->lock); 1870a8b690f9STom Herbert st->offset = 0; 18710f7ff927SArnaldo Carvalho de Melo if (++st->bucket < INET_LHTABLE_SIZE) { 18725caea4eaSEric Dumazet ilb = &tcp_hashinfo.listening_hash[st->bucket]; 18735caea4eaSEric Dumazet spin_lock_bh(&ilb->lock); 1874c25eb3bfSEric Dumazet sk = sk_nulls_head(&ilb->head); 18751da177e4SLinus Torvalds goto get_sk; 18761da177e4SLinus Torvalds } 18771da177e4SLinus Torvalds cur = NULL; 18781da177e4SLinus Torvalds out: 18791da177e4SLinus Torvalds return cur; 18801da177e4SLinus Torvalds } 18811da177e4SLinus Torvalds 18821da177e4SLinus Torvalds static void *listening_get_idx(struct seq_file *seq, loff_t *pos) 18831da177e4SLinus Torvalds { 1884a8b690f9STom Herbert struct tcp_iter_state *st = seq->private; 1885a8b690f9STom Herbert void *rc; 1886a8b690f9STom Herbert 1887a8b690f9STom Herbert st->bucket = 0; 1888a8b690f9STom Herbert st->offset = 0; 1889a8b690f9STom Herbert rc = listening_get_next(seq, NULL); 18901da177e4SLinus Torvalds 18911da177e4SLinus Torvalds while (rc && *pos) { 18921da177e4SLinus Torvalds rc = listening_get_next(seq, rc); 18931da177e4SLinus Torvalds --*pos; 18941da177e4SLinus Torvalds } 18951da177e4SLinus Torvalds return rc; 18961da177e4SLinus Torvalds } 18971da177e4SLinus Torvalds 189805dbc7b5SEric Dumazet static inline bool empty_bucket(const struct tcp_iter_state *st) 18996eac5604SAndi Kleen { 190005dbc7b5SEric Dumazet return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain); 19016eac5604SAndi Kleen } 19026eac5604SAndi Kleen 1903a8b690f9STom Herbert /* 1904a8b690f9STom Herbert * Get first established socket starting from bucket given in st->bucket. 1905a8b690f9STom Herbert * If st->bucket is zero, the very first socket in the hash is returned. 1906a8b690f9STom Herbert */ 19071da177e4SLinus Torvalds static void *established_get_first(struct seq_file *seq) 19081da177e4SLinus Torvalds { 19091da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 1910a4146b1bSDenis V. Lunev struct net *net = seq_file_net(seq); 19111da177e4SLinus Torvalds void *rc = NULL; 19121da177e4SLinus Torvalds 1913a8b690f9STom Herbert st->offset = 0; 1914a8b690f9STom Herbert for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { 19151da177e4SLinus Torvalds struct sock *sk; 19163ab5aee7SEric Dumazet struct hlist_nulls_node *node; 19179db66bdcSEric Dumazet spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); 19181da177e4SLinus Torvalds 19196eac5604SAndi Kleen /* Lockless fast path for the common case of empty buckets */ 19206eac5604SAndi Kleen if (empty_bucket(st)) 19216eac5604SAndi Kleen continue; 19226eac5604SAndi Kleen 19239db66bdcSEric Dumazet spin_lock_bh(lock); 19243ab5aee7SEric Dumazet sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { 1925f40c8174SDaniel Lezcano if (sk->sk_family != st->family || 1926878628fbSYOSHIFUJI Hideaki !net_eq(sock_net(sk), net)) { 19271da177e4SLinus Torvalds continue; 19281da177e4SLinus Torvalds } 19291da177e4SLinus Torvalds rc = sk; 19301da177e4SLinus Torvalds goto out; 19311da177e4SLinus Torvalds } 19329db66bdcSEric Dumazet spin_unlock_bh(lock); 19331da177e4SLinus Torvalds } 19341da177e4SLinus Torvalds out: 19351da177e4SLinus Torvalds return rc; 19361da177e4SLinus Torvalds } 19371da177e4SLinus Torvalds 19381da177e4SLinus Torvalds static void *established_get_next(struct seq_file *seq, void *cur) 19391da177e4SLinus Torvalds { 19401da177e4SLinus Torvalds struct sock *sk = cur; 19413ab5aee7SEric Dumazet struct hlist_nulls_node *node; 19421da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 1943a4146b1bSDenis V. Lunev struct net *net = seq_file_net(seq); 19441da177e4SLinus Torvalds 19451da177e4SLinus Torvalds ++st->num; 1946a8b690f9STom Herbert ++st->offset; 19471da177e4SLinus Torvalds 19483ab5aee7SEric Dumazet sk = sk_nulls_next(sk); 19491da177e4SLinus Torvalds 19503ab5aee7SEric Dumazet sk_nulls_for_each_from(sk, node) { 1951878628fbSYOSHIFUJI Hideaki if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) 195205dbc7b5SEric Dumazet return sk; 19531da177e4SLinus Torvalds } 19541da177e4SLinus Torvalds 195505dbc7b5SEric Dumazet spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); 195605dbc7b5SEric Dumazet ++st->bucket; 195705dbc7b5SEric Dumazet return established_get_first(seq); 19581da177e4SLinus Torvalds } 19591da177e4SLinus Torvalds 19601da177e4SLinus Torvalds static void *established_get_idx(struct seq_file *seq, loff_t pos) 19611da177e4SLinus Torvalds { 1962a8b690f9STom Herbert struct tcp_iter_state *st = seq->private; 1963a8b690f9STom Herbert void *rc; 1964a8b690f9STom Herbert 1965a8b690f9STom Herbert st->bucket = 0; 1966a8b690f9STom Herbert rc = established_get_first(seq); 19671da177e4SLinus Torvalds 19681da177e4SLinus Torvalds while (rc && pos) { 19691da177e4SLinus Torvalds rc = established_get_next(seq, rc); 19701da177e4SLinus Torvalds --pos; 19711da177e4SLinus Torvalds } 19721da177e4SLinus Torvalds return rc; 19731da177e4SLinus Torvalds } 19741da177e4SLinus Torvalds 19751da177e4SLinus Torvalds static void *tcp_get_idx(struct seq_file *seq, loff_t pos) 19761da177e4SLinus Torvalds { 19771da177e4SLinus Torvalds void *rc; 19781da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 19791da177e4SLinus Torvalds 19801da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_LISTENING; 19811da177e4SLinus Torvalds rc = listening_get_idx(seq, &pos); 19821da177e4SLinus Torvalds 19831da177e4SLinus Torvalds if (!rc) { 19841da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_ESTABLISHED; 19851da177e4SLinus Torvalds rc = established_get_idx(seq, pos); 19861da177e4SLinus Torvalds } 19871da177e4SLinus Torvalds 19881da177e4SLinus Torvalds return rc; 19891da177e4SLinus Torvalds } 19901da177e4SLinus Torvalds 1991a8b690f9STom Herbert static void *tcp_seek_last_pos(struct seq_file *seq) 1992a8b690f9STom Herbert { 1993a8b690f9STom Herbert struct tcp_iter_state *st = seq->private; 1994a8b690f9STom Herbert int offset = st->offset; 1995a8b690f9STom Herbert int orig_num = st->num; 1996a8b690f9STom Herbert void *rc = NULL; 1997a8b690f9STom Herbert 1998a8b690f9STom Herbert switch (st->state) { 1999a8b690f9STom Herbert case TCP_SEQ_STATE_LISTENING: 2000a8b690f9STom Herbert if (st->bucket >= INET_LHTABLE_SIZE) 2001a8b690f9STom Herbert break; 2002a8b690f9STom Herbert st->state = TCP_SEQ_STATE_LISTENING; 2003a8b690f9STom Herbert rc = listening_get_next(seq, NULL); 2004a8b690f9STom Herbert while (offset-- && rc) 2005a8b690f9STom Herbert rc = listening_get_next(seq, rc); 2006a8b690f9STom Herbert if (rc) 2007a8b690f9STom Herbert break; 2008a8b690f9STom Herbert st->bucket = 0; 200905dbc7b5SEric Dumazet st->state = TCP_SEQ_STATE_ESTABLISHED; 2010a8b690f9STom Herbert /* Fallthrough */ 2011a8b690f9STom Herbert case TCP_SEQ_STATE_ESTABLISHED: 2012a8b690f9STom Herbert if (st->bucket > tcp_hashinfo.ehash_mask) 2013a8b690f9STom Herbert break; 2014a8b690f9STom Herbert rc = established_get_first(seq); 2015a8b690f9STom Herbert while (offset-- && rc) 2016a8b690f9STom Herbert rc = established_get_next(seq, rc); 2017a8b690f9STom Herbert } 2018a8b690f9STom Herbert 2019a8b690f9STom Herbert st->num = orig_num; 2020a8b690f9STom Herbert 2021a8b690f9STom Herbert return rc; 2022a8b690f9STom Herbert } 2023a8b690f9STom Herbert 20241da177e4SLinus Torvalds static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) 20251da177e4SLinus Torvalds { 20261da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 2027a8b690f9STom Herbert void *rc; 2028a8b690f9STom Herbert 2029a8b690f9STom Herbert if (*pos && *pos == st->last_pos) { 2030a8b690f9STom Herbert rc = tcp_seek_last_pos(seq); 2031a8b690f9STom Herbert if (rc) 2032a8b690f9STom Herbert goto out; 2033a8b690f9STom Herbert } 2034a8b690f9STom Herbert 20351da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_LISTENING; 20361da177e4SLinus Torvalds st->num = 0; 2037a8b690f9STom Herbert st->bucket = 0; 2038a8b690f9STom Herbert st->offset = 0; 2039a8b690f9STom Herbert rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 2040a8b690f9STom Herbert 2041a8b690f9STom Herbert out: 2042a8b690f9STom Herbert st->last_pos = *pos; 2043a8b690f9STom Herbert return rc; 20441da177e4SLinus Torvalds } 20451da177e4SLinus Torvalds 20461da177e4SLinus Torvalds static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) 20471da177e4SLinus Torvalds { 2048a8b690f9STom Herbert struct tcp_iter_state *st = seq->private; 20491da177e4SLinus Torvalds void *rc = NULL; 20501da177e4SLinus Torvalds 20511da177e4SLinus Torvalds if (v == SEQ_START_TOKEN) { 20521da177e4SLinus Torvalds rc = tcp_get_idx(seq, 0); 20531da177e4SLinus Torvalds goto out; 20541da177e4SLinus Torvalds } 20551da177e4SLinus Torvalds 20561da177e4SLinus Torvalds switch (st->state) { 20571da177e4SLinus Torvalds case TCP_SEQ_STATE_LISTENING: 20581da177e4SLinus Torvalds rc = listening_get_next(seq, v); 20591da177e4SLinus Torvalds if (!rc) { 20601da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_ESTABLISHED; 2061a8b690f9STom Herbert st->bucket = 0; 2062a8b690f9STom Herbert st->offset = 0; 20631da177e4SLinus Torvalds rc = established_get_first(seq); 20641da177e4SLinus Torvalds } 20651da177e4SLinus Torvalds break; 20661da177e4SLinus Torvalds case TCP_SEQ_STATE_ESTABLISHED: 20671da177e4SLinus Torvalds rc = established_get_next(seq, v); 20681da177e4SLinus Torvalds break; 20691da177e4SLinus Torvalds } 20701da177e4SLinus Torvalds out: 20711da177e4SLinus Torvalds ++*pos; 2072a8b690f9STom Herbert st->last_pos = *pos; 20731da177e4SLinus Torvalds return rc; 20741da177e4SLinus Torvalds } 20751da177e4SLinus Torvalds 20761da177e4SLinus Torvalds static void tcp_seq_stop(struct seq_file *seq, void *v) 20771da177e4SLinus Torvalds { 20781da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 20791da177e4SLinus Torvalds 20801da177e4SLinus Torvalds switch (st->state) { 20811da177e4SLinus Torvalds case TCP_SEQ_STATE_LISTENING: 20821da177e4SLinus Torvalds if (v != SEQ_START_TOKEN) 20835caea4eaSEric Dumazet spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock); 20841da177e4SLinus Torvalds break; 20851da177e4SLinus Torvalds case TCP_SEQ_STATE_ESTABLISHED: 20861da177e4SLinus Torvalds if (v) 20879db66bdcSEric Dumazet spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); 20881da177e4SLinus Torvalds break; 20891da177e4SLinus Torvalds } 20901da177e4SLinus Torvalds } 20911da177e4SLinus Torvalds 209273cb88ecSArjan van de Ven int tcp_seq_open(struct inode *inode, struct file *file) 20931da177e4SLinus Torvalds { 2094d9dda78bSAl Viro struct tcp_seq_afinfo *afinfo = PDE_DATA(inode); 20951da177e4SLinus Torvalds struct tcp_iter_state *s; 209652d6f3f1SDenis V. Lunev int err; 20971da177e4SLinus Torvalds 209852d6f3f1SDenis V. Lunev err = seq_open_net(inode, file, &afinfo->seq_ops, 209952d6f3f1SDenis V. Lunev sizeof(struct tcp_iter_state)); 210052d6f3f1SDenis V. Lunev if (err < 0) 210152d6f3f1SDenis V. Lunev return err; 2102f40c8174SDaniel Lezcano 210352d6f3f1SDenis V. Lunev s = ((struct seq_file *)file->private_data)->private; 21041da177e4SLinus Torvalds s->family = afinfo->family; 2105a8b690f9STom Herbert s->last_pos = 0; 2106f40c8174SDaniel Lezcano return 0; 2107f40c8174SDaniel Lezcano } 210873cb88ecSArjan van de Ven EXPORT_SYMBOL(tcp_seq_open); 2109f40c8174SDaniel Lezcano 21106f8b13bcSDaniel Lezcano int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo) 21111da177e4SLinus Torvalds { 21121da177e4SLinus Torvalds int rc = 0; 21131da177e4SLinus Torvalds struct proc_dir_entry *p; 21141da177e4SLinus Torvalds 21159427c4b3SDenis V. Lunev afinfo->seq_ops.start = tcp_seq_start; 21169427c4b3SDenis V. Lunev afinfo->seq_ops.next = tcp_seq_next; 21179427c4b3SDenis V. Lunev afinfo->seq_ops.stop = tcp_seq_stop; 21189427c4b3SDenis V. Lunev 211984841c3cSDenis V. Lunev p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net, 212073cb88ecSArjan van de Ven afinfo->seq_fops, afinfo); 212184841c3cSDenis V. Lunev if (!p) 21221da177e4SLinus Torvalds rc = -ENOMEM; 21231da177e4SLinus Torvalds return rc; 21241da177e4SLinus Torvalds } 21254bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_proc_register); 21261da177e4SLinus Torvalds 21276f8b13bcSDaniel Lezcano void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) 21281da177e4SLinus Torvalds { 2129ece31ffdSGao feng remove_proc_entry(afinfo->name, net->proc_net); 21301da177e4SLinus Torvalds } 21314bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_proc_unregister); 21321da177e4SLinus Torvalds 2133d4f06873SEric Dumazet static void get_openreq4(const struct request_sock *req, 2134aa3a0c8cSEric Dumazet struct seq_file *f, int i) 21351da177e4SLinus Torvalds { 21362e6599cbSArnaldo Carvalho de Melo const struct inet_request_sock *ireq = inet_rsk(req); 2137fa76ce73SEric Dumazet long delta = req->rsk_timer.expires - jiffies; 21381da177e4SLinus Torvalds 21395e659e4cSPavel Emelyanov seq_printf(f, "%4d: %08X:%04X %08X:%04X" 2140652586dfSTetsuo Handa " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK", 21411da177e4SLinus Torvalds i, 2142634fb979SEric Dumazet ireq->ir_loc_addr, 2143d4f06873SEric Dumazet ireq->ir_num, 2144634fb979SEric Dumazet ireq->ir_rmt_addr, 2145634fb979SEric Dumazet ntohs(ireq->ir_rmt_port), 21461da177e4SLinus Torvalds TCP_SYN_RECV, 21471da177e4SLinus Torvalds 0, 0, /* could print option size, but that is af dependent. */ 21481da177e4SLinus Torvalds 1, /* timers active (only the expire timer) */ 2149a399a805SEric Dumazet jiffies_delta_to_clock_t(delta), 2150e6c022a4SEric Dumazet req->num_timeout, 2151aa3a0c8cSEric Dumazet from_kuid_munged(seq_user_ns(f), 2152aa3a0c8cSEric Dumazet sock_i_uid(req->rsk_listener)), 21531da177e4SLinus Torvalds 0, /* non standard timer */ 21541da177e4SLinus Torvalds 0, /* open_requests have no inode */ 2155d4f06873SEric Dumazet 0, 2156652586dfSTetsuo Handa req); 21571da177e4SLinus Torvalds } 21581da177e4SLinus Torvalds 2159652586dfSTetsuo Handa static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) 21601da177e4SLinus Torvalds { 21611da177e4SLinus Torvalds int timer_active; 21621da177e4SLinus Torvalds unsigned long timer_expires; 2163cf533ea5SEric Dumazet const struct tcp_sock *tp = tcp_sk(sk); 2164cf4c6bf8SIlpo Järvinen const struct inet_connection_sock *icsk = inet_csk(sk); 2165cf533ea5SEric Dumazet const struct inet_sock *inet = inet_sk(sk); 21660536fcc0SEric Dumazet const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; 2167c720c7e8SEric Dumazet __be32 dest = inet->inet_daddr; 2168c720c7e8SEric Dumazet __be32 src = inet->inet_rcv_saddr; 2169c720c7e8SEric Dumazet __u16 destp = ntohs(inet->inet_dport); 2170c720c7e8SEric Dumazet __u16 srcp = ntohs(inet->inet_sport); 217149d09007SEric Dumazet int rx_queue; 217200fd38d9SEric Dumazet int state; 21731da177e4SLinus Torvalds 21746ba8a3b1SNandita Dukkipati if (icsk->icsk_pending == ICSK_TIME_RETRANS || 21756ba8a3b1SNandita Dukkipati icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || 21766ba8a3b1SNandita Dukkipati icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { 21771da177e4SLinus Torvalds timer_active = 1; 2178463c84b9SArnaldo Carvalho de Melo timer_expires = icsk->icsk_timeout; 2179463c84b9SArnaldo Carvalho de Melo } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 21801da177e4SLinus Torvalds timer_active = 4; 2181463c84b9SArnaldo Carvalho de Melo timer_expires = icsk->icsk_timeout; 2182cf4c6bf8SIlpo Järvinen } else if (timer_pending(&sk->sk_timer)) { 21831da177e4SLinus Torvalds timer_active = 2; 2184cf4c6bf8SIlpo Järvinen timer_expires = sk->sk_timer.expires; 21851da177e4SLinus Torvalds } else { 21861da177e4SLinus Torvalds timer_active = 0; 21871da177e4SLinus Torvalds timer_expires = jiffies; 21881da177e4SLinus Torvalds } 21891da177e4SLinus Torvalds 219000fd38d9SEric Dumazet state = sk_state_load(sk); 219100fd38d9SEric Dumazet if (state == TCP_LISTEN) 219249d09007SEric Dumazet rx_queue = sk->sk_ack_backlog; 219349d09007SEric Dumazet else 219400fd38d9SEric Dumazet /* Because we don't lock the socket, 219500fd38d9SEric Dumazet * we might find a transient negative value. 219649d09007SEric Dumazet */ 219749d09007SEric Dumazet rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); 219849d09007SEric Dumazet 21995e659e4cSPavel Emelyanov seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " 2200652586dfSTetsuo Handa "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d", 220100fd38d9SEric Dumazet i, src, srcp, dest, destp, state, 220247da8ee6SSridhar Samudrala tp->write_seq - tp->snd_una, 220349d09007SEric Dumazet rx_queue, 22041da177e4SLinus Torvalds timer_active, 2205a399a805SEric Dumazet jiffies_delta_to_clock_t(timer_expires - jiffies), 2206463c84b9SArnaldo Carvalho de Melo icsk->icsk_retransmits, 2207a7cb5a49SEric W. Biederman from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)), 22086687e988SArnaldo Carvalho de Melo icsk->icsk_probes_out, 2209cf4c6bf8SIlpo Järvinen sock_i_ino(sk), 2210cf4c6bf8SIlpo Järvinen atomic_read(&sk->sk_refcnt), sk, 22117be87351SStephen Hemminger jiffies_to_clock_t(icsk->icsk_rto), 22127be87351SStephen Hemminger jiffies_to_clock_t(icsk->icsk_ack.ato), 2213463c84b9SArnaldo Carvalho de Melo (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, 22141da177e4SLinus Torvalds tp->snd_cwnd, 221500fd38d9SEric Dumazet state == TCP_LISTEN ? 221600fd38d9SEric Dumazet fastopenq->max_qlen : 2217652586dfSTetsuo Handa (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)); 22181da177e4SLinus Torvalds } 22191da177e4SLinus Torvalds 2220cf533ea5SEric Dumazet static void get_timewait4_sock(const struct inet_timewait_sock *tw, 2221652586dfSTetsuo Handa struct seq_file *f, int i) 22221da177e4SLinus Torvalds { 2223789f558cSEric Dumazet long delta = tw->tw_timer.expires - jiffies; 222423f33c2dSAl Viro __be32 dest, src; 22251da177e4SLinus Torvalds __u16 destp, srcp; 22261da177e4SLinus Torvalds 22271da177e4SLinus Torvalds dest = tw->tw_daddr; 22281da177e4SLinus Torvalds src = tw->tw_rcv_saddr; 22291da177e4SLinus Torvalds destp = ntohs(tw->tw_dport); 22301da177e4SLinus Torvalds srcp = ntohs(tw->tw_sport); 22311da177e4SLinus Torvalds 22325e659e4cSPavel Emelyanov seq_printf(f, "%4d: %08X:%04X %08X:%04X" 2233652586dfSTetsuo Handa " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK", 22341da177e4SLinus Torvalds i, src, srcp, dest, destp, tw->tw_substate, 0, 0, 2235a399a805SEric Dumazet 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, 2236652586dfSTetsuo Handa atomic_read(&tw->tw_refcnt), tw); 22371da177e4SLinus Torvalds } 22381da177e4SLinus Torvalds 22391da177e4SLinus Torvalds #define TMPSZ 150 22401da177e4SLinus Torvalds 22411da177e4SLinus Torvalds static int tcp4_seq_show(struct seq_file *seq, void *v) 22421da177e4SLinus Torvalds { 22431da177e4SLinus Torvalds struct tcp_iter_state *st; 224405dbc7b5SEric Dumazet struct sock *sk = v; 22451da177e4SLinus Torvalds 2246652586dfSTetsuo Handa seq_setwidth(seq, TMPSZ - 1); 22471da177e4SLinus Torvalds if (v == SEQ_START_TOKEN) { 2248652586dfSTetsuo Handa seq_puts(seq, " sl local_address rem_address st tx_queue " 22491da177e4SLinus Torvalds "rx_queue tr tm->when retrnsmt uid timeout " 22501da177e4SLinus Torvalds "inode"); 22511da177e4SLinus Torvalds goto out; 22521da177e4SLinus Torvalds } 22531da177e4SLinus Torvalds st = seq->private; 22541da177e4SLinus Torvalds 225505dbc7b5SEric Dumazet if (sk->sk_state == TCP_TIME_WAIT) 2256652586dfSTetsuo Handa get_timewait4_sock(v, seq, st->num); 2257079096f1SEric Dumazet else if (sk->sk_state == TCP_NEW_SYN_RECV) 2258079096f1SEric Dumazet get_openreq4(v, seq, st->num); 225905dbc7b5SEric Dumazet else 2260652586dfSTetsuo Handa get_tcp4_sock(v, seq, st->num); 22611da177e4SLinus Torvalds out: 2262652586dfSTetsuo Handa seq_pad(seq, '\n'); 22631da177e4SLinus Torvalds return 0; 22641da177e4SLinus Torvalds } 22651da177e4SLinus Torvalds 226673cb88ecSArjan van de Ven static const struct file_operations tcp_afinfo_seq_fops = { 226773cb88ecSArjan van de Ven .owner = THIS_MODULE, 226873cb88ecSArjan van de Ven .open = tcp_seq_open, 226973cb88ecSArjan van de Ven .read = seq_read, 227073cb88ecSArjan van de Ven .llseek = seq_lseek, 227173cb88ecSArjan van de Ven .release = seq_release_net 227273cb88ecSArjan van de Ven }; 227373cb88ecSArjan van de Ven 22741da177e4SLinus Torvalds static struct tcp_seq_afinfo tcp4_seq_afinfo = { 22751da177e4SLinus Torvalds .name = "tcp", 22761da177e4SLinus Torvalds .family = AF_INET, 227773cb88ecSArjan van de Ven .seq_fops = &tcp_afinfo_seq_fops, 22789427c4b3SDenis V. Lunev .seq_ops = { 22799427c4b3SDenis V. Lunev .show = tcp4_seq_show, 22809427c4b3SDenis V. Lunev }, 22811da177e4SLinus Torvalds }; 22821da177e4SLinus Torvalds 22832c8c1e72SAlexey Dobriyan static int __net_init tcp4_proc_init_net(struct net *net) 2284757764f6SPavel Emelyanov { 2285757764f6SPavel Emelyanov return tcp_proc_register(net, &tcp4_seq_afinfo); 2286757764f6SPavel Emelyanov } 2287757764f6SPavel Emelyanov 22882c8c1e72SAlexey Dobriyan static void __net_exit tcp4_proc_exit_net(struct net *net) 2289757764f6SPavel Emelyanov { 2290757764f6SPavel Emelyanov tcp_proc_unregister(net, &tcp4_seq_afinfo); 2291757764f6SPavel Emelyanov } 2292757764f6SPavel Emelyanov 2293757764f6SPavel Emelyanov static struct pernet_operations tcp4_net_ops = { 2294757764f6SPavel Emelyanov .init = tcp4_proc_init_net, 2295757764f6SPavel Emelyanov .exit = tcp4_proc_exit_net, 2296757764f6SPavel Emelyanov }; 2297757764f6SPavel Emelyanov 22981da177e4SLinus Torvalds int __init tcp4_proc_init(void) 22991da177e4SLinus Torvalds { 2300757764f6SPavel Emelyanov return register_pernet_subsys(&tcp4_net_ops); 23011da177e4SLinus Torvalds } 23021da177e4SLinus Torvalds 23031da177e4SLinus Torvalds void tcp4_proc_exit(void) 23041da177e4SLinus Torvalds { 2305757764f6SPavel Emelyanov unregister_pernet_subsys(&tcp4_net_ops); 23061da177e4SLinus Torvalds } 23071da177e4SLinus Torvalds #endif /* CONFIG_PROC_FS */ 23081da177e4SLinus Torvalds 23091da177e4SLinus Torvalds struct proto tcp_prot = { 23101da177e4SLinus Torvalds .name = "TCP", 23111da177e4SLinus Torvalds .owner = THIS_MODULE, 23121da177e4SLinus Torvalds .close = tcp_close, 23131da177e4SLinus Torvalds .connect = tcp_v4_connect, 23141da177e4SLinus Torvalds .disconnect = tcp_disconnect, 2315463c84b9SArnaldo Carvalho de Melo .accept = inet_csk_accept, 23161da177e4SLinus Torvalds .ioctl = tcp_ioctl, 23171da177e4SLinus Torvalds .init = tcp_v4_init_sock, 23181da177e4SLinus Torvalds .destroy = tcp_v4_destroy_sock, 23191da177e4SLinus Torvalds .shutdown = tcp_shutdown, 23201da177e4SLinus Torvalds .setsockopt = tcp_setsockopt, 23211da177e4SLinus Torvalds .getsockopt = tcp_getsockopt, 23221da177e4SLinus Torvalds .recvmsg = tcp_recvmsg, 23237ba42910SChangli Gao .sendmsg = tcp_sendmsg, 23247ba42910SChangli Gao .sendpage = tcp_sendpage, 23251da177e4SLinus Torvalds .backlog_rcv = tcp_v4_do_rcv, 232646d3ceabSEric Dumazet .release_cb = tcp_release_cb, 2327ab1e0a13SArnaldo Carvalho de Melo .hash = inet_hash, 2328ab1e0a13SArnaldo Carvalho de Melo .unhash = inet_unhash, 2329ab1e0a13SArnaldo Carvalho de Melo .get_port = inet_csk_get_port, 23301da177e4SLinus Torvalds .enter_memory_pressure = tcp_enter_memory_pressure, 2331c9bee3b7SEric Dumazet .stream_memory_free = tcp_stream_memory_free, 23321da177e4SLinus Torvalds .sockets_allocated = &tcp_sockets_allocated, 23330a5578cfSArnaldo Carvalho de Melo .orphan_count = &tcp_orphan_count, 23341da177e4SLinus Torvalds .memory_allocated = &tcp_memory_allocated, 23351da177e4SLinus Torvalds .memory_pressure = &tcp_memory_pressure, 2336a4fe34bfSEric W. Biederman .sysctl_mem = sysctl_tcp_mem, 23371da177e4SLinus Torvalds .sysctl_wmem = sysctl_tcp_wmem, 23381da177e4SLinus Torvalds .sysctl_rmem = sysctl_tcp_rmem, 23391da177e4SLinus Torvalds .max_header = MAX_TCP_HEADER, 23401da177e4SLinus Torvalds .obj_size = sizeof(struct tcp_sock), 23413ab5aee7SEric Dumazet .slab_flags = SLAB_DESTROY_BY_RCU, 23426d6ee43eSArnaldo Carvalho de Melo .twsk_prot = &tcp_timewait_sock_ops, 234360236fddSArnaldo Carvalho de Melo .rsk_prot = &tcp_request_sock_ops, 234439d8cda7SPavel Emelyanov .h.hashinfo = &tcp_hashinfo, 23457ba42910SChangli Gao .no_autobind = true, 2346543d9cfeSArnaldo Carvalho de Melo #ifdef CONFIG_COMPAT 2347543d9cfeSArnaldo Carvalho de Melo .compat_setsockopt = compat_tcp_setsockopt, 2348543d9cfeSArnaldo Carvalho de Melo .compat_getsockopt = compat_tcp_getsockopt, 2349543d9cfeSArnaldo Carvalho de Melo #endif 2350c1e64e29SLorenzo Colitti .diag_destroy = tcp_abort, 23511da177e4SLinus Torvalds }; 23524bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_prot); 23531da177e4SLinus Torvalds 2354046ee902SDenis V. Lunev static void __net_exit tcp_sk_exit(struct net *net) 2355046ee902SDenis V. Lunev { 2356bdbbb852SEric Dumazet int cpu; 2357bdbbb852SEric Dumazet 2358bdbbb852SEric Dumazet for_each_possible_cpu(cpu) 2359bdbbb852SEric Dumazet inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu)); 2360bdbbb852SEric Dumazet free_percpu(net->ipv4.tcp_sk); 2361bdbbb852SEric Dumazet } 2362bdbbb852SEric Dumazet 2363bdbbb852SEric Dumazet static int __net_init tcp_sk_init(struct net *net) 2364bdbbb852SEric Dumazet { 2365bdbbb852SEric Dumazet int res, cpu; 2366bdbbb852SEric Dumazet 2367bdbbb852SEric Dumazet net->ipv4.tcp_sk = alloc_percpu(struct sock *); 2368bdbbb852SEric Dumazet if (!net->ipv4.tcp_sk) 2369bdbbb852SEric Dumazet return -ENOMEM; 2370bdbbb852SEric Dumazet 2371bdbbb852SEric Dumazet for_each_possible_cpu(cpu) { 2372bdbbb852SEric Dumazet struct sock *sk; 2373bdbbb852SEric Dumazet 2374bdbbb852SEric Dumazet res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW, 2375bdbbb852SEric Dumazet IPPROTO_TCP, net); 2376bdbbb852SEric Dumazet if (res) 2377bdbbb852SEric Dumazet goto fail; 2378bdbbb852SEric Dumazet *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk; 2379bdbbb852SEric Dumazet } 238049213555SDaniel Borkmann 2381bdbbb852SEric Dumazet net->ipv4.sysctl_tcp_ecn = 2; 238249213555SDaniel Borkmann net->ipv4.sysctl_tcp_ecn_fallback = 1; 238349213555SDaniel Borkmann 2384b0f9ca53SFan Du net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS; 23856b58e0a5SFan Du net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD; 238605cbc0dbSFan Du net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL; 2387bdbbb852SEric Dumazet 238813b287e8SNikolay Borisov net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME; 23899bd6861bSNikolay Borisov net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES; 2390b840d15dSNikolay Borisov net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL; 239113b287e8SNikolay Borisov 239249213555SDaniel Borkmann return 0; 2393bdbbb852SEric Dumazet fail: 2394bdbbb852SEric Dumazet tcp_sk_exit(net); 2395bdbbb852SEric Dumazet 2396bdbbb852SEric Dumazet return res; 2397b099ce26SEric W. Biederman } 2398b099ce26SEric W. Biederman 2399b099ce26SEric W. Biederman static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) 2400b099ce26SEric W. Biederman { 2401b099ce26SEric W. Biederman inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET); 2402046ee902SDenis V. Lunev } 2403046ee902SDenis V. Lunev 2404046ee902SDenis V. Lunev static struct pernet_operations __net_initdata tcp_sk_ops = { 2405046ee902SDenis V. Lunev .init = tcp_sk_init, 2406046ee902SDenis V. Lunev .exit = tcp_sk_exit, 2407b099ce26SEric W. Biederman .exit_batch = tcp_sk_exit_batch, 2408046ee902SDenis V. Lunev }; 2409046ee902SDenis V. Lunev 24109b0f976fSDenis V. Lunev void __init tcp_v4_init(void) 24111da177e4SLinus Torvalds { 24125caea4eaSEric Dumazet inet_hashinfo_init(&tcp_hashinfo); 24136a1b3054SEric W. Biederman if (register_pernet_subsys(&tcp_sk_ops)) 24141da177e4SLinus Torvalds panic("Failed to create the TCP control socket.\n"); 24151da177e4SLinus Torvalds } 2416