11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * INET An implementation of the TCP/IP protocol suite for the LINUX 31da177e4SLinus Torvalds * operating system. INET is implemented using the BSD Socket 41da177e4SLinus Torvalds * interface as the means of communication with the user level. 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * Implementation of the Transmission Control Protocol(TCP). 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * IPv4 specific functions 91da177e4SLinus Torvalds * 101da177e4SLinus Torvalds * 111da177e4SLinus Torvalds * code split from: 121da177e4SLinus Torvalds * linux/ipv4/tcp.c 131da177e4SLinus Torvalds * linux/ipv4/tcp_input.c 141da177e4SLinus Torvalds * linux/ipv4/tcp_output.c 151da177e4SLinus Torvalds * 161da177e4SLinus Torvalds * See tcp.c for author information 171da177e4SLinus Torvalds * 181da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 191da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 201da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 211da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 221da177e4SLinus Torvalds */ 231da177e4SLinus Torvalds 241da177e4SLinus Torvalds /* 251da177e4SLinus Torvalds * Changes: 261da177e4SLinus Torvalds * David S. Miller : New socket lookup architecture. 271da177e4SLinus Torvalds * This code is dedicated to John Dyson. 281da177e4SLinus Torvalds * David S. Miller : Change semantics of established hash, 291da177e4SLinus Torvalds * half is devoted to TIME_WAIT sockets 301da177e4SLinus Torvalds * and the rest go in the other half. 311da177e4SLinus Torvalds * Andi Kleen : Add support for syncookies and fixed 321da177e4SLinus Torvalds * some bugs: ip options weren't passed to 331da177e4SLinus Torvalds * the TCP layer, missed a check for an 341da177e4SLinus Torvalds * ACK bit. 351da177e4SLinus Torvalds * Andi Kleen : Implemented fast path mtu discovery. 361da177e4SLinus Torvalds * Fixed many serious bugs in the 3760236fddSArnaldo Carvalho de Melo * request_sock handling and moved 381da177e4SLinus Torvalds * most of it into the af independent code. 391da177e4SLinus Torvalds * Added tail drop and some other bugfixes. 40caa20d9aSStephen Hemminger * Added new listen semantics. 411da177e4SLinus Torvalds * Mike McLagan : Routing by source 421da177e4SLinus Torvalds * Juan Jose Ciarlante: ip_dynaddr bits 431da177e4SLinus Torvalds * Andi Kleen: various fixes. 441da177e4SLinus Torvalds * Vitaly E. Lavrov : Transparent proxy revived after year 451da177e4SLinus Torvalds * coma. 461da177e4SLinus Torvalds * Andi Kleen : Fix new listen. 471da177e4SLinus Torvalds * Andi Kleen : Fix accept error reporting. 481da177e4SLinus Torvalds * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 491da177e4SLinus Torvalds * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 501da177e4SLinus Torvalds * a single port at the same time. 511da177e4SLinus Torvalds */ 521da177e4SLinus Torvalds 53afd46503SJoe Perches #define pr_fmt(fmt) "TCP: " fmt 541da177e4SLinus Torvalds 55eb4dea58SHerbert Xu #include <linux/bottom_half.h> 561da177e4SLinus Torvalds #include <linux/types.h> 571da177e4SLinus Torvalds #include <linux/fcntl.h> 581da177e4SLinus Torvalds #include <linux/module.h> 591da177e4SLinus Torvalds #include <linux/random.h> 601da177e4SLinus Torvalds #include <linux/cache.h> 611da177e4SLinus Torvalds #include <linux/jhash.h> 621da177e4SLinus Torvalds #include <linux/init.h> 631da177e4SLinus Torvalds #include <linux/times.h> 645a0e3ad6STejun Heo #include <linux/slab.h> 651da177e4SLinus Torvalds 66457c4cbcSEric W. Biederman #include <net/net_namespace.h> 671da177e4SLinus Torvalds #include <net/icmp.h> 68304a1618SArnaldo Carvalho de Melo #include <net/inet_hashtables.h> 691da177e4SLinus Torvalds #include <net/tcp.h> 7020380731SArnaldo Carvalho de Melo #include <net/transp_v6.h> 711da177e4SLinus Torvalds #include <net/ipv6.h> 721da177e4SLinus Torvalds #include <net/inet_common.h> 736d6ee43eSArnaldo Carvalho de Melo #include <net/timewait_sock.h> 741da177e4SLinus Torvalds #include <net/xfrm.h> 756e5714eaSDavid S. Miller #include <net/secure_seq.h> 76d1a4c0b3SGlauber Costa #include <net/tcp_memcontrol.h> 77076bb0c8SEliezer Tamir #include <net/busy_poll.h> 781da177e4SLinus Torvalds 791da177e4SLinus Torvalds #include <linux/inet.h> 801da177e4SLinus Torvalds #include <linux/ipv6.h> 811da177e4SLinus Torvalds #include <linux/stddef.h> 821da177e4SLinus Torvalds #include <linux/proc_fs.h> 831da177e4SLinus Torvalds #include <linux/seq_file.h> 841da177e4SLinus Torvalds 85cfb6eeb4SYOSHIFUJI Hideaki #include <linux/crypto.h> 86cfb6eeb4SYOSHIFUJI Hideaki #include <linux/scatterlist.h> 87cfb6eeb4SYOSHIFUJI Hideaki 88ab32ea5dSBrian Haley int sysctl_tcp_tw_reuse __read_mostly; 89ab32ea5dSBrian Haley int sysctl_tcp_low_latency __read_mostly; 904bc2f18bSEric Dumazet EXPORT_SYMBOL(sysctl_tcp_low_latency); 911da177e4SLinus Torvalds 92cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 93a915da9bSEric Dumazet static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 94318cf7aaSEric Dumazet __be32 daddr, __be32 saddr, const struct tcphdr *th); 95cfb6eeb4SYOSHIFUJI Hideaki #endif 96cfb6eeb4SYOSHIFUJI Hideaki 975caea4eaSEric Dumazet struct inet_hashinfo tcp_hashinfo; 984bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_hashinfo); 991da177e4SLinus Torvalds 100936b8bdbSOctavian Purdila static __u32 tcp_v4_init_sequence(const struct sk_buff *skb) 1011da177e4SLinus Torvalds { 102eddc9ec5SArnaldo Carvalho de Melo return secure_tcp_sequence_number(ip_hdr(skb)->daddr, 103eddc9ec5SArnaldo Carvalho de Melo ip_hdr(skb)->saddr, 104aa8223c7SArnaldo Carvalho de Melo tcp_hdr(skb)->dest, 105aa8223c7SArnaldo Carvalho de Melo tcp_hdr(skb)->source); 1061da177e4SLinus Torvalds } 1071da177e4SLinus Torvalds 1086d6ee43eSArnaldo Carvalho de Melo int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) 1096d6ee43eSArnaldo Carvalho de Melo { 1106d6ee43eSArnaldo Carvalho de Melo const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw); 1116d6ee43eSArnaldo Carvalho de Melo struct tcp_sock *tp = tcp_sk(sk); 1126d6ee43eSArnaldo Carvalho de Melo 1136d6ee43eSArnaldo Carvalho de Melo /* With PAWS, it is safe from the viewpoint 1146d6ee43eSArnaldo Carvalho de Melo of data integrity. Even without PAWS it is safe provided sequence 1156d6ee43eSArnaldo Carvalho de Melo spaces do not overlap i.e. at data rates <= 80Mbit/sec. 1166d6ee43eSArnaldo Carvalho de Melo 1176d6ee43eSArnaldo Carvalho de Melo Actually, the idea is close to VJ's one, only timestamp cache is 1186d6ee43eSArnaldo Carvalho de Melo held not per host, but per port pair and TW bucket is used as state 1196d6ee43eSArnaldo Carvalho de Melo holder. 1206d6ee43eSArnaldo Carvalho de Melo 1216d6ee43eSArnaldo Carvalho de Melo If TW bucket has been already destroyed we fall back to VJ's scheme 1226d6ee43eSArnaldo Carvalho de Melo and use initial timestamp retrieved from peer table. 1236d6ee43eSArnaldo Carvalho de Melo */ 1246d6ee43eSArnaldo Carvalho de Melo if (tcptw->tw_ts_recent_stamp && 1256d6ee43eSArnaldo Carvalho de Melo (twp == NULL || (sysctl_tcp_tw_reuse && 1269d729f72SJames Morris get_seconds() - tcptw->tw_ts_recent_stamp > 1))) { 1276d6ee43eSArnaldo Carvalho de Melo tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; 1286d6ee43eSArnaldo Carvalho de Melo if (tp->write_seq == 0) 1296d6ee43eSArnaldo Carvalho de Melo tp->write_seq = 1; 1306d6ee43eSArnaldo Carvalho de Melo tp->rx_opt.ts_recent = tcptw->tw_ts_recent; 1316d6ee43eSArnaldo Carvalho de Melo tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; 1326d6ee43eSArnaldo Carvalho de Melo sock_hold(sktw); 1336d6ee43eSArnaldo Carvalho de Melo return 1; 1346d6ee43eSArnaldo Carvalho de Melo } 1356d6ee43eSArnaldo Carvalho de Melo 1366d6ee43eSArnaldo Carvalho de Melo return 0; 1376d6ee43eSArnaldo Carvalho de Melo } 1386d6ee43eSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(tcp_twsk_unique); 1396d6ee43eSArnaldo Carvalho de Melo 1401da177e4SLinus Torvalds /* This will initiate an outgoing connection. */ 1411da177e4SLinus Torvalds int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 1421da177e4SLinus Torvalds { 1432d7192d6SDavid S. Miller struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; 1441da177e4SLinus Torvalds struct inet_sock *inet = inet_sk(sk); 1451da177e4SLinus Torvalds struct tcp_sock *tp = tcp_sk(sk); 146dca8b089SDavid S. Miller __be16 orig_sport, orig_dport; 147bada8adcSAl Viro __be32 daddr, nexthop; 148da905bd1SDavid S. Miller struct flowi4 *fl4; 1492d7192d6SDavid S. Miller struct rtable *rt; 1501da177e4SLinus Torvalds int err; 151f6d8bd05SEric Dumazet struct ip_options_rcu *inet_opt; 1521da177e4SLinus Torvalds 1531da177e4SLinus Torvalds if (addr_len < sizeof(struct sockaddr_in)) 1541da177e4SLinus Torvalds return -EINVAL; 1551da177e4SLinus Torvalds 1561da177e4SLinus Torvalds if (usin->sin_family != AF_INET) 1571da177e4SLinus Torvalds return -EAFNOSUPPORT; 1581da177e4SLinus Torvalds 1591da177e4SLinus Torvalds nexthop = daddr = usin->sin_addr.s_addr; 160f6d8bd05SEric Dumazet inet_opt = rcu_dereference_protected(inet->inet_opt, 161f6d8bd05SEric Dumazet sock_owned_by_user(sk)); 162f6d8bd05SEric Dumazet if (inet_opt && inet_opt->opt.srr) { 1631da177e4SLinus Torvalds if (!daddr) 1641da177e4SLinus Torvalds return -EINVAL; 165f6d8bd05SEric Dumazet nexthop = inet_opt->opt.faddr; 1661da177e4SLinus Torvalds } 1671da177e4SLinus Torvalds 168dca8b089SDavid S. Miller orig_sport = inet->inet_sport; 169dca8b089SDavid S. Miller orig_dport = usin->sin_port; 170da905bd1SDavid S. Miller fl4 = &inet->cork.fl.u.ip4; 171da905bd1SDavid S. Miller rt = ip_route_connect(fl4, nexthop, inet->inet_saddr, 1721da177e4SLinus Torvalds RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, 1731da177e4SLinus Torvalds IPPROTO_TCP, 1740e0d44abSSteffen Klassert orig_sport, orig_dport, sk); 175b23dd4feSDavid S. Miller if (IS_ERR(rt)) { 176b23dd4feSDavid S. Miller err = PTR_ERR(rt); 177b23dd4feSDavid S. Miller if (err == -ENETUNREACH) 178f1d8cba6SEric Dumazet IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); 179b23dd4feSDavid S. Miller return err; 180584bdf8cSWei Dong } 1811da177e4SLinus Torvalds 1821da177e4SLinus Torvalds if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { 1831da177e4SLinus Torvalds ip_rt_put(rt); 1841da177e4SLinus Torvalds return -ENETUNREACH; 1851da177e4SLinus Torvalds } 1861da177e4SLinus Torvalds 187f6d8bd05SEric Dumazet if (!inet_opt || !inet_opt->opt.srr) 188da905bd1SDavid S. Miller daddr = fl4->daddr; 1891da177e4SLinus Torvalds 190c720c7e8SEric Dumazet if (!inet->inet_saddr) 191da905bd1SDavid S. Miller inet->inet_saddr = fl4->saddr; 192c720c7e8SEric Dumazet inet->inet_rcv_saddr = inet->inet_saddr; 1931da177e4SLinus Torvalds 194c720c7e8SEric Dumazet if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) { 1951da177e4SLinus Torvalds /* Reset inherited state */ 1961da177e4SLinus Torvalds tp->rx_opt.ts_recent = 0; 1971da177e4SLinus Torvalds tp->rx_opt.ts_recent_stamp = 0; 198ee995283SPavel Emelyanov if (likely(!tp->repair)) 1991da177e4SLinus Torvalds tp->write_seq = 0; 2001da177e4SLinus Torvalds } 2011da177e4SLinus Torvalds 202295ff7edSArnaldo Carvalho de Melo if (tcp_death_row.sysctl_tw_recycle && 20381166dd6SDavid S. Miller !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) 20481166dd6SDavid S. Miller tcp_fetch_timewait_stamp(sk, &rt->dst); 2051da177e4SLinus Torvalds 206c720c7e8SEric Dumazet inet->inet_dport = usin->sin_port; 207c720c7e8SEric Dumazet inet->inet_daddr = daddr; 2081da177e4SLinus Torvalds 209d83d8461SArnaldo Carvalho de Melo inet_csk(sk)->icsk_ext_hdr_len = 0; 210f6d8bd05SEric Dumazet if (inet_opt) 211f6d8bd05SEric Dumazet inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; 2121da177e4SLinus Torvalds 213bee7ca9eSWilliam Allen Simpson tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; 2141da177e4SLinus Torvalds 2151da177e4SLinus Torvalds /* Socket identity is still unknown (sport may be zero). 2161da177e4SLinus Torvalds * However we set state to SYN-SENT and not releasing socket 2171da177e4SLinus Torvalds * lock select source port, enter ourselves into the hash tables and 2181da177e4SLinus Torvalds * complete initialization after this. 2191da177e4SLinus Torvalds */ 2201da177e4SLinus Torvalds tcp_set_state(sk, TCP_SYN_SENT); 221a7f5e7f1SArnaldo Carvalho de Melo err = inet_hash_connect(&tcp_death_row, sk); 2221da177e4SLinus Torvalds if (err) 2231da177e4SLinus Torvalds goto failure; 2241da177e4SLinus Torvalds 2259e7ceb06SSathya Perla inet_set_txhash(sk); 2269e7ceb06SSathya Perla 227da905bd1SDavid S. Miller rt = ip_route_newports(fl4, rt, orig_sport, orig_dport, 228c720c7e8SEric Dumazet inet->inet_sport, inet->inet_dport, sk); 229b23dd4feSDavid S. Miller if (IS_ERR(rt)) { 230b23dd4feSDavid S. Miller err = PTR_ERR(rt); 231b23dd4feSDavid S. Miller rt = NULL; 2321da177e4SLinus Torvalds goto failure; 233b23dd4feSDavid S. Miller } 2341da177e4SLinus Torvalds /* OK, now commit destination to socket. */ 235bcd76111SHerbert Xu sk->sk_gso_type = SKB_GSO_TCPV4; 236d8d1f30bSChangli Gao sk_setup_caps(sk, &rt->dst); 2371da177e4SLinus Torvalds 238ee995283SPavel Emelyanov if (!tp->write_seq && likely(!tp->repair)) 239c720c7e8SEric Dumazet tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, 240c720c7e8SEric Dumazet inet->inet_daddr, 241c720c7e8SEric Dumazet inet->inet_sport, 2421da177e4SLinus Torvalds usin->sin_port); 2431da177e4SLinus Torvalds 244c720c7e8SEric Dumazet inet->inet_id = tp->write_seq ^ jiffies; 2451da177e4SLinus Torvalds 2461da177e4SLinus Torvalds err = tcp_connect(sk); 247ee995283SPavel Emelyanov 2481da177e4SLinus Torvalds rt = NULL; 2491da177e4SLinus Torvalds if (err) 2501da177e4SLinus Torvalds goto failure; 2511da177e4SLinus Torvalds 2521da177e4SLinus Torvalds return 0; 2531da177e4SLinus Torvalds 2541da177e4SLinus Torvalds failure: 2557174259eSArnaldo Carvalho de Melo /* 2567174259eSArnaldo Carvalho de Melo * This unhashes the socket and releases the local port, 2577174259eSArnaldo Carvalho de Melo * if necessary. 2587174259eSArnaldo Carvalho de Melo */ 2591da177e4SLinus Torvalds tcp_set_state(sk, TCP_CLOSE); 2601da177e4SLinus Torvalds ip_rt_put(rt); 2611da177e4SLinus Torvalds sk->sk_route_caps = 0; 262c720c7e8SEric Dumazet inet->inet_dport = 0; 2631da177e4SLinus Torvalds return err; 2641da177e4SLinus Torvalds } 2654bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_connect); 2661da177e4SLinus Torvalds 2671da177e4SLinus Torvalds /* 268563d34d0SEric Dumazet * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191. 269563d34d0SEric Dumazet * It can be called through tcp_release_cb() if socket was owned by user 270563d34d0SEric Dumazet * at the time tcp_v4_err() was called to handle ICMP message. 2711da177e4SLinus Torvalds */ 2724fab9071SNeal Cardwell void tcp_v4_mtu_reduced(struct sock *sk) 2731da177e4SLinus Torvalds { 2741da177e4SLinus Torvalds struct dst_entry *dst; 2751da177e4SLinus Torvalds struct inet_sock *inet = inet_sk(sk); 276563d34d0SEric Dumazet u32 mtu = tcp_sk(sk)->mtu_info; 2771da177e4SLinus Torvalds 27880d0a69fSDavid S. Miller dst = inet_csk_update_pmtu(sk, mtu); 27980d0a69fSDavid S. Miller if (!dst) 2801da177e4SLinus Torvalds return; 2811da177e4SLinus Torvalds 2821da177e4SLinus Torvalds /* Something is about to be wrong... Remember soft error 2831da177e4SLinus Torvalds * for the case, if this connection will not able to recover. 2841da177e4SLinus Torvalds */ 2851da177e4SLinus Torvalds if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) 2861da177e4SLinus Torvalds sk->sk_err_soft = EMSGSIZE; 2871da177e4SLinus Torvalds 2881da177e4SLinus Torvalds mtu = dst_mtu(dst); 2891da177e4SLinus Torvalds 2901da177e4SLinus Torvalds if (inet->pmtudisc != IP_PMTUDISC_DONT && 291482fc609SHannes Frederic Sowa ip_sk_accept_pmtu(sk) && 292d83d8461SArnaldo Carvalho de Melo inet_csk(sk)->icsk_pmtu_cookie > mtu) { 2931da177e4SLinus Torvalds tcp_sync_mss(sk, mtu); 2941da177e4SLinus Torvalds 2951da177e4SLinus Torvalds /* Resend the TCP packet because it's 2961da177e4SLinus Torvalds * clear that the old packet has been 2971da177e4SLinus Torvalds * dropped. This is the new "fast" path mtu 2981da177e4SLinus Torvalds * discovery. 2991da177e4SLinus Torvalds */ 3001da177e4SLinus Torvalds tcp_simple_retransmit(sk); 3011da177e4SLinus Torvalds } /* else let the usual retransmit timer handle it */ 3021da177e4SLinus Torvalds } 3034fab9071SNeal Cardwell EXPORT_SYMBOL(tcp_v4_mtu_reduced); 3041da177e4SLinus Torvalds 30555be7a9cSDavid S. Miller static void do_redirect(struct sk_buff *skb, struct sock *sk) 30655be7a9cSDavid S. Miller { 30755be7a9cSDavid S. Miller struct dst_entry *dst = __sk_dst_check(sk, 0); 30855be7a9cSDavid S. Miller 3091ed5c48fSDavid S. Miller if (dst) 3106700c270SDavid S. Miller dst->ops->redirect(dst, sk, skb); 31155be7a9cSDavid S. Miller } 31255be7a9cSDavid S. Miller 3131da177e4SLinus Torvalds /* 3141da177e4SLinus Torvalds * This routine is called by the ICMP module when it gets some 3151da177e4SLinus Torvalds * sort of error condition. If err < 0 then the socket should 3161da177e4SLinus Torvalds * be closed and the error returned to the user. If err > 0 3171da177e4SLinus Torvalds * it's just the icmp type << 8 | icmp code. After adjustment 3181da177e4SLinus Torvalds * header points to the first 8 bytes of the tcp header. We need 3191da177e4SLinus Torvalds * to find the appropriate port. 3201da177e4SLinus Torvalds * 3211da177e4SLinus Torvalds * The locking strategy used here is very "optimistic". When 3221da177e4SLinus Torvalds * someone else accesses the socket the ICMP is just dropped 3231da177e4SLinus Torvalds * and for some paths there is no check at all. 3241da177e4SLinus Torvalds * A more general error queue to queue errors for later handling 3251da177e4SLinus Torvalds * is probably better. 3261da177e4SLinus Torvalds * 3271da177e4SLinus Torvalds */ 3281da177e4SLinus Torvalds 3294d1a2d9eSDamian Lukowski void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) 3301da177e4SLinus Torvalds { 331b71d1d42SEric Dumazet const struct iphdr *iph = (const struct iphdr *)icmp_skb->data; 3324d1a2d9eSDamian Lukowski struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2)); 333f1ecd5d9SDamian Lukowski struct inet_connection_sock *icsk; 3341da177e4SLinus Torvalds struct tcp_sock *tp; 3351da177e4SLinus Torvalds struct inet_sock *inet; 3364d1a2d9eSDamian Lukowski const int type = icmp_hdr(icmp_skb)->type; 3374d1a2d9eSDamian Lukowski const int code = icmp_hdr(icmp_skb)->code; 3381da177e4SLinus Torvalds struct sock *sk; 339f1ecd5d9SDamian Lukowski struct sk_buff *skb; 3400a672f74SYuchung Cheng struct request_sock *fastopen; 3410a672f74SYuchung Cheng __u32 seq, snd_una; 342f1ecd5d9SDamian Lukowski __u32 remaining; 3431da177e4SLinus Torvalds int err; 3444d1a2d9eSDamian Lukowski struct net *net = dev_net(icmp_skb->dev); 3451da177e4SLinus Torvalds 346fd54d716SPavel Emelyanov sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest, 3474d1a2d9eSDamian Lukowski iph->saddr, th->source, inet_iif(icmp_skb)); 3481da177e4SLinus Torvalds if (!sk) { 349dcfc23caSPavel Emelyanov ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); 3501da177e4SLinus Torvalds return; 3511da177e4SLinus Torvalds } 3521da177e4SLinus Torvalds if (sk->sk_state == TCP_TIME_WAIT) { 3539469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(sk)); 3541da177e4SLinus Torvalds return; 3551da177e4SLinus Torvalds } 3561da177e4SLinus Torvalds 3571da177e4SLinus Torvalds bh_lock_sock(sk); 3581da177e4SLinus Torvalds /* If too many ICMPs get dropped on busy 3591da177e4SLinus Torvalds * servers this needs to be solved differently. 360563d34d0SEric Dumazet * We do take care of PMTU discovery (RFC1191) special case : 361563d34d0SEric Dumazet * we can receive locally generated ICMP messages while socket is held. 3621da177e4SLinus Torvalds */ 363b74aa930SEric Dumazet if (sock_owned_by_user(sk)) { 364b74aa930SEric Dumazet if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)) 365de0744afSPavel Emelyanov NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); 366b74aa930SEric Dumazet } 3671da177e4SLinus Torvalds if (sk->sk_state == TCP_CLOSE) 3681da177e4SLinus Torvalds goto out; 3691da177e4SLinus Torvalds 37097e3ecd1Sstephen hemminger if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { 37197e3ecd1Sstephen hemminger NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); 37297e3ecd1Sstephen hemminger goto out; 37397e3ecd1Sstephen hemminger } 37497e3ecd1Sstephen hemminger 375f1ecd5d9SDamian Lukowski icsk = inet_csk(sk); 3761da177e4SLinus Torvalds tp = tcp_sk(sk); 3771da177e4SLinus Torvalds seq = ntohl(th->seq); 3780a672f74SYuchung Cheng /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ 3790a672f74SYuchung Cheng fastopen = tp->fastopen_rsk; 3800a672f74SYuchung Cheng snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 3811da177e4SLinus Torvalds if (sk->sk_state != TCP_LISTEN && 3820a672f74SYuchung Cheng !between(seq, snd_una, tp->snd_nxt)) { 383de0744afSPavel Emelyanov NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 3841da177e4SLinus Torvalds goto out; 3851da177e4SLinus Torvalds } 3861da177e4SLinus Torvalds 3871da177e4SLinus Torvalds switch (type) { 38855be7a9cSDavid S. Miller case ICMP_REDIRECT: 38955be7a9cSDavid S. Miller do_redirect(icmp_skb, sk); 39055be7a9cSDavid S. Miller goto out; 3911da177e4SLinus Torvalds case ICMP_SOURCE_QUENCH: 3921da177e4SLinus Torvalds /* Just silently ignore these. */ 3931da177e4SLinus Torvalds goto out; 3941da177e4SLinus Torvalds case ICMP_PARAMETERPROB: 3951da177e4SLinus Torvalds err = EPROTO; 3961da177e4SLinus Torvalds break; 3971da177e4SLinus Torvalds case ICMP_DEST_UNREACH: 3981da177e4SLinus Torvalds if (code > NR_ICMP_UNREACH) 3991da177e4SLinus Torvalds goto out; 4001da177e4SLinus Torvalds 4011da177e4SLinus Torvalds if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ 4020d4f0608SEric Dumazet /* We are not interested in TCP_LISTEN and open_requests 4030d4f0608SEric Dumazet * (SYN-ACKs send out by Linux are always <576bytes so 4040d4f0608SEric Dumazet * they should go through unfragmented). 4050d4f0608SEric Dumazet */ 4060d4f0608SEric Dumazet if (sk->sk_state == TCP_LISTEN) 4070d4f0608SEric Dumazet goto out; 4080d4f0608SEric Dumazet 409563d34d0SEric Dumazet tp->mtu_info = info; 410144d56e9SEric Dumazet if (!sock_owned_by_user(sk)) { 411563d34d0SEric Dumazet tcp_v4_mtu_reduced(sk); 412144d56e9SEric Dumazet } else { 413144d56e9SEric Dumazet if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags)) 414144d56e9SEric Dumazet sock_hold(sk); 415144d56e9SEric Dumazet } 4161da177e4SLinus Torvalds goto out; 4171da177e4SLinus Torvalds } 4181da177e4SLinus Torvalds 4191da177e4SLinus Torvalds err = icmp_err_convert[code].errno; 420f1ecd5d9SDamian Lukowski /* check if icmp_skb allows revert of backoff 421f1ecd5d9SDamian Lukowski * (see draft-zimmermann-tcp-lcd) */ 422f1ecd5d9SDamian Lukowski if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH) 423f1ecd5d9SDamian Lukowski break; 424f1ecd5d9SDamian Lukowski if (seq != tp->snd_una || !icsk->icsk_retransmits || 4250a672f74SYuchung Cheng !icsk->icsk_backoff || fastopen) 426f1ecd5d9SDamian Lukowski break; 427f1ecd5d9SDamian Lukowski 4288f49c270SDavid S. Miller if (sock_owned_by_user(sk)) 4298f49c270SDavid S. Miller break; 4308f49c270SDavid S. Miller 431f1ecd5d9SDamian Lukowski icsk->icsk_backoff--; 432fcdd1cf4SEric Dumazet icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) : 433fcdd1cf4SEric Dumazet TCP_TIMEOUT_INIT; 434fcdd1cf4SEric Dumazet icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX); 435f1ecd5d9SDamian Lukowski 436f1ecd5d9SDamian Lukowski skb = tcp_write_queue_head(sk); 437f1ecd5d9SDamian Lukowski BUG_ON(!skb); 438f1ecd5d9SDamian Lukowski 4397faee5c0SEric Dumazet remaining = icsk->icsk_rto - 4407faee5c0SEric Dumazet min(icsk->icsk_rto, 4417faee5c0SEric Dumazet tcp_time_stamp - tcp_skb_timestamp(skb)); 442f1ecd5d9SDamian Lukowski 443f1ecd5d9SDamian Lukowski if (remaining) { 444f1ecd5d9SDamian Lukowski inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 445f1ecd5d9SDamian Lukowski remaining, TCP_RTO_MAX); 446f1ecd5d9SDamian Lukowski } else { 447f1ecd5d9SDamian Lukowski /* RTO revert clocked out retransmission. 448f1ecd5d9SDamian Lukowski * Will retransmit now */ 449f1ecd5d9SDamian Lukowski tcp_retransmit_timer(sk); 450f1ecd5d9SDamian Lukowski } 451f1ecd5d9SDamian Lukowski 4521da177e4SLinus Torvalds break; 4531da177e4SLinus Torvalds case ICMP_TIME_EXCEEDED: 4541da177e4SLinus Torvalds err = EHOSTUNREACH; 4551da177e4SLinus Torvalds break; 4561da177e4SLinus Torvalds default: 4571da177e4SLinus Torvalds goto out; 4581da177e4SLinus Torvalds } 4591da177e4SLinus Torvalds 4601da177e4SLinus Torvalds switch (sk->sk_state) { 46160236fddSArnaldo Carvalho de Melo struct request_sock *req, **prev; 4621da177e4SLinus Torvalds case TCP_LISTEN: 4631da177e4SLinus Torvalds if (sock_owned_by_user(sk)) 4641da177e4SLinus Torvalds goto out; 4651da177e4SLinus Torvalds 466463c84b9SArnaldo Carvalho de Melo req = inet_csk_search_req(sk, &prev, th->dest, 4671da177e4SLinus Torvalds iph->daddr, iph->saddr); 4681da177e4SLinus Torvalds if (!req) 4691da177e4SLinus Torvalds goto out; 4701da177e4SLinus Torvalds 4711da177e4SLinus Torvalds /* ICMPs are not backlogged, hence we cannot get 4721da177e4SLinus Torvalds an established socket here. 4731da177e4SLinus Torvalds */ 474547b792cSIlpo Järvinen WARN_ON(req->sk); 4751da177e4SLinus Torvalds 4762e6599cbSArnaldo Carvalho de Melo if (seq != tcp_rsk(req)->snt_isn) { 477de0744afSPavel Emelyanov NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 4781da177e4SLinus Torvalds goto out; 4791da177e4SLinus Torvalds } 4801da177e4SLinus Torvalds 4811da177e4SLinus Torvalds /* 4821da177e4SLinus Torvalds * Still in SYN_RECV, just remove it silently. 4831da177e4SLinus Torvalds * There is no good way to pass the error to the newly 4841da177e4SLinus Torvalds * created socket, and POSIX does not want network 4851da177e4SLinus Torvalds * errors returned from accept(). 4861da177e4SLinus Torvalds */ 487463c84b9SArnaldo Carvalho de Melo inet_csk_reqsk_queue_drop(sk, req, prev); 488848bf15fSVijay Subramanian NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 4891da177e4SLinus Torvalds goto out; 4901da177e4SLinus Torvalds 4911da177e4SLinus Torvalds case TCP_SYN_SENT: 4920a672f74SYuchung Cheng case TCP_SYN_RECV: 4930a672f74SYuchung Cheng /* Only in fast or simultaneous open. If a fast open socket is 4940a672f74SYuchung Cheng * is already accepted it is treated as a connected one below. 4951da177e4SLinus Torvalds */ 4960a672f74SYuchung Cheng if (fastopen && fastopen->sk == NULL) 4970a672f74SYuchung Cheng break; 4980a672f74SYuchung Cheng 4991da177e4SLinus Torvalds if (!sock_owned_by_user(sk)) { 5001da177e4SLinus Torvalds sk->sk_err = err; 5011da177e4SLinus Torvalds 5021da177e4SLinus Torvalds sk->sk_error_report(sk); 5031da177e4SLinus Torvalds 5041da177e4SLinus Torvalds tcp_done(sk); 5051da177e4SLinus Torvalds } else { 5061da177e4SLinus Torvalds sk->sk_err_soft = err; 5071da177e4SLinus Torvalds } 5081da177e4SLinus Torvalds goto out; 5091da177e4SLinus Torvalds } 5101da177e4SLinus Torvalds 5111da177e4SLinus Torvalds /* If we've already connected we will keep trying 5121da177e4SLinus Torvalds * until we time out, or the user gives up. 5131da177e4SLinus Torvalds * 5141da177e4SLinus Torvalds * rfc1122 4.2.3.9 allows to consider as hard errors 5151da177e4SLinus Torvalds * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too, 5161da177e4SLinus Torvalds * but it is obsoleted by pmtu discovery). 5171da177e4SLinus Torvalds * 5181da177e4SLinus Torvalds * Note, that in modern internet, where routing is unreliable 5191da177e4SLinus Torvalds * and in each dark corner broken firewalls sit, sending random 5201da177e4SLinus Torvalds * errors ordered by their masters even this two messages finally lose 5211da177e4SLinus Torvalds * their original sense (even Linux sends invalid PORT_UNREACHs) 5221da177e4SLinus Torvalds * 5231da177e4SLinus Torvalds * Now we are in compliance with RFCs. 5241da177e4SLinus Torvalds * --ANK (980905) 5251da177e4SLinus Torvalds */ 5261da177e4SLinus Torvalds 5271da177e4SLinus Torvalds inet = inet_sk(sk); 5281da177e4SLinus Torvalds if (!sock_owned_by_user(sk) && inet->recverr) { 5291da177e4SLinus Torvalds sk->sk_err = err; 5301da177e4SLinus Torvalds sk->sk_error_report(sk); 5311da177e4SLinus Torvalds } else { /* Only an error on timeout */ 5321da177e4SLinus Torvalds sk->sk_err_soft = err; 5331da177e4SLinus Torvalds } 5341da177e4SLinus Torvalds 5351da177e4SLinus Torvalds out: 5361da177e4SLinus Torvalds bh_unlock_sock(sk); 5371da177e4SLinus Torvalds sock_put(sk); 5381da177e4SLinus Torvalds } 5391da177e4SLinus Torvalds 54028850dc7SDaniel Borkmann void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr) 5411da177e4SLinus Torvalds { 542aa8223c7SArnaldo Carvalho de Melo struct tcphdr *th = tcp_hdr(skb); 5431da177e4SLinus Torvalds 54484fa7933SPatrick McHardy if (skb->ip_summed == CHECKSUM_PARTIAL) { 545419f9f89SHerbert Xu th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0); 546663ead3bSHerbert Xu skb->csum_start = skb_transport_header(skb) - skb->head; 547ff1dcadbSAl Viro skb->csum_offset = offsetof(struct tcphdr, check); 5481da177e4SLinus Torvalds } else { 549419f9f89SHerbert Xu th->check = tcp_v4_check(skb->len, saddr, daddr, 55007f0757aSJoe Perches csum_partial(th, 5511da177e4SLinus Torvalds th->doff << 2, 5521da177e4SLinus Torvalds skb->csum)); 5531da177e4SLinus Torvalds } 5541da177e4SLinus Torvalds } 5551da177e4SLinus Torvalds 556419f9f89SHerbert Xu /* This routine computes an IPv4 TCP checksum. */ 557bb296246SHerbert Xu void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) 558419f9f89SHerbert Xu { 559cf533ea5SEric Dumazet const struct inet_sock *inet = inet_sk(sk); 560419f9f89SHerbert Xu 561419f9f89SHerbert Xu __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr); 562419f9f89SHerbert Xu } 5634bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_send_check); 564419f9f89SHerbert Xu 5651da177e4SLinus Torvalds /* 5661da177e4SLinus Torvalds * This routine will send an RST to the other tcp. 5671da177e4SLinus Torvalds * 5681da177e4SLinus Torvalds * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.) 5691da177e4SLinus Torvalds * for reset. 5701da177e4SLinus Torvalds * Answer: if a packet caused RST, it is not for a socket 5711da177e4SLinus Torvalds * existing in our system, if it is matched to a socket, 5721da177e4SLinus Torvalds * it is just duplicate segment or bug in other side's TCP. 5731da177e4SLinus Torvalds * So that we build reply only basing on parameters 5741da177e4SLinus Torvalds * arrived with segment. 5751da177e4SLinus Torvalds * Exception: precedence violation. We do not implement it in any case. 5761da177e4SLinus Torvalds */ 5771da177e4SLinus Torvalds 578cfb6eeb4SYOSHIFUJI Hideaki static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) 5791da177e4SLinus Torvalds { 580cf533ea5SEric Dumazet const struct tcphdr *th = tcp_hdr(skb); 581cfb6eeb4SYOSHIFUJI Hideaki struct { 582cfb6eeb4SYOSHIFUJI Hideaki struct tcphdr th; 583cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 584714e85beSAl Viro __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)]; 585cfb6eeb4SYOSHIFUJI Hideaki #endif 586cfb6eeb4SYOSHIFUJI Hideaki } rep; 5871da177e4SLinus Torvalds struct ip_reply_arg arg; 588cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 589cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *key; 590658ddaafSShawn Lu const __u8 *hash_location = NULL; 591658ddaafSShawn Lu unsigned char newhash[16]; 592658ddaafSShawn Lu int genhash; 593658ddaafSShawn Lu struct sock *sk1 = NULL; 594cfb6eeb4SYOSHIFUJI Hideaki #endif 595a86b1e30SPavel Emelyanov struct net *net; 5961da177e4SLinus Torvalds 5971da177e4SLinus Torvalds /* Never send a reset in response to a reset. */ 5981da177e4SLinus Torvalds if (th->rst) 5991da177e4SLinus Torvalds return; 6001da177e4SLinus Torvalds 601c3658e8dSEric Dumazet /* If sk not NULL, it means we did a successful lookup and incoming 602c3658e8dSEric Dumazet * route had to be correct. prequeue might have dropped our dst. 603c3658e8dSEric Dumazet */ 604c3658e8dSEric Dumazet if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL) 6051da177e4SLinus Torvalds return; 6061da177e4SLinus Torvalds 6071da177e4SLinus Torvalds /* Swap the send and the receive. */ 608cfb6eeb4SYOSHIFUJI Hideaki memset(&rep, 0, sizeof(rep)); 609cfb6eeb4SYOSHIFUJI Hideaki rep.th.dest = th->source; 610cfb6eeb4SYOSHIFUJI Hideaki rep.th.source = th->dest; 611cfb6eeb4SYOSHIFUJI Hideaki rep.th.doff = sizeof(struct tcphdr) / 4; 612cfb6eeb4SYOSHIFUJI Hideaki rep.th.rst = 1; 6131da177e4SLinus Torvalds 6141da177e4SLinus Torvalds if (th->ack) { 615cfb6eeb4SYOSHIFUJI Hideaki rep.th.seq = th->ack_seq; 6161da177e4SLinus Torvalds } else { 617cfb6eeb4SYOSHIFUJI Hideaki rep.th.ack = 1; 618cfb6eeb4SYOSHIFUJI Hideaki rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin + 6191da177e4SLinus Torvalds skb->len - (th->doff << 2)); 6201da177e4SLinus Torvalds } 6211da177e4SLinus Torvalds 6227174259eSArnaldo Carvalho de Melo memset(&arg, 0, sizeof(arg)); 623cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_base = (unsigned char *)&rep; 624cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_len = sizeof(rep.th); 625cfb6eeb4SYOSHIFUJI Hideaki 626*0f85feaeSEric Dumazet net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); 627cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 628658ddaafSShawn Lu hash_location = tcp_parse_md5sig_option(th); 629658ddaafSShawn Lu if (!sk && hash_location) { 630658ddaafSShawn Lu /* 631658ddaafSShawn Lu * active side is lost. Try to find listening socket through 632658ddaafSShawn Lu * source port, and then find md5 key through listening socket. 633658ddaafSShawn Lu * we are not loose security here: 634658ddaafSShawn Lu * Incoming packet is checked with md5 hash with finding key, 635658ddaafSShawn Lu * no RST generated if md5 hash doesn't match. 636658ddaafSShawn Lu */ 637*0f85feaeSEric Dumazet sk1 = __inet_lookup_listener(net, 638da5e3630STom Herbert &tcp_hashinfo, ip_hdr(skb)->saddr, 639da5e3630STom Herbert th->source, ip_hdr(skb)->daddr, 640658ddaafSShawn Lu ntohs(th->source), inet_iif(skb)); 641658ddaafSShawn Lu /* don't send rst if it can't find key */ 642658ddaafSShawn Lu if (!sk1) 643658ddaafSShawn Lu return; 644658ddaafSShawn Lu rcu_read_lock(); 645658ddaafSShawn Lu key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *) 646658ddaafSShawn Lu &ip_hdr(skb)->saddr, AF_INET); 647658ddaafSShawn Lu if (!key) 648658ddaafSShawn Lu goto release_sk1; 649658ddaafSShawn Lu 650658ddaafSShawn Lu genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb); 651658ddaafSShawn Lu if (genhash || memcmp(hash_location, newhash, 16) != 0) 652658ddaafSShawn Lu goto release_sk1; 653658ddaafSShawn Lu } else { 654658ddaafSShawn Lu key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *) 655658ddaafSShawn Lu &ip_hdr(skb)->saddr, 656a915da9bSEric Dumazet AF_INET) : NULL; 657658ddaafSShawn Lu } 658658ddaafSShawn Lu 659cfb6eeb4SYOSHIFUJI Hideaki if (key) { 660cfb6eeb4SYOSHIFUJI Hideaki rep.opt[0] = htonl((TCPOPT_NOP << 24) | 661cfb6eeb4SYOSHIFUJI Hideaki (TCPOPT_NOP << 16) | 662cfb6eeb4SYOSHIFUJI Hideaki (TCPOPT_MD5SIG << 8) | 663cfb6eeb4SYOSHIFUJI Hideaki TCPOLEN_MD5SIG); 664cfb6eeb4SYOSHIFUJI Hideaki /* Update length and the length the header thinks exists */ 665cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; 666cfb6eeb4SYOSHIFUJI Hideaki rep.th.doff = arg.iov[0].iov_len / 4; 667cfb6eeb4SYOSHIFUJI Hideaki 66849a72dfbSAdam Langley tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1], 66978e645cbSIlpo Järvinen key, ip_hdr(skb)->saddr, 67078e645cbSIlpo Järvinen ip_hdr(skb)->daddr, &rep.th); 671cfb6eeb4SYOSHIFUJI Hideaki } 672cfb6eeb4SYOSHIFUJI Hideaki #endif 673eddc9ec5SArnaldo Carvalho de Melo arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, 674eddc9ec5SArnaldo Carvalho de Melo ip_hdr(skb)->saddr, /* XXX */ 67552cd5750SIlpo Järvinen arg.iov[0].iov_len, IPPROTO_TCP, 0); 6761da177e4SLinus Torvalds arg.csumoffset = offsetof(struct tcphdr, check) / 2; 67788ef4a5aSKOVACS Krisztian arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; 678e2446eaaSShawn Lu /* When socket is gone, all binding information is lost. 6794c675258SAlexey Kuznetsov * routing might fail in this case. No choice here, if we choose to force 6804c675258SAlexey Kuznetsov * input interface, we will misroute in case of asymmetric route. 681e2446eaaSShawn Lu */ 6824c675258SAlexey Kuznetsov if (sk) 6834c675258SAlexey Kuznetsov arg.bound_dev_if = sk->sk_bound_dev_if; 6841da177e4SLinus Torvalds 68566b13d99SEric Dumazet arg.tos = ip_hdr(skb)->tos; 68624a2d43dSEric Dumazet ip_send_unicast_reply(net, skb, &TCP_SKB_CB(skb)->header.h4.opt, 68724a2d43dSEric Dumazet ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, 68824a2d43dSEric Dumazet &arg, arg.iov[0].iov_len); 6891da177e4SLinus Torvalds 69063231bddSPavel Emelyanov TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 69163231bddSPavel Emelyanov TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); 692658ddaafSShawn Lu 693658ddaafSShawn Lu #ifdef CONFIG_TCP_MD5SIG 694658ddaafSShawn Lu release_sk1: 695658ddaafSShawn Lu if (sk1) { 696658ddaafSShawn Lu rcu_read_unlock(); 697658ddaafSShawn Lu sock_put(sk1); 698658ddaafSShawn Lu } 699658ddaafSShawn Lu #endif 7001da177e4SLinus Torvalds } 7011da177e4SLinus Torvalds 7021da177e4SLinus Torvalds /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states 7031da177e4SLinus Torvalds outside socket context is ugly, certainly. What can I do? 7041da177e4SLinus Torvalds */ 7051da177e4SLinus Torvalds 7069501f972SYOSHIFUJI Hideaki static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, 707ee684b6fSAndrey Vagin u32 win, u32 tsval, u32 tsecr, int oif, 70888ef4a5aSKOVACS Krisztian struct tcp_md5sig_key *key, 70966b13d99SEric Dumazet int reply_flags, u8 tos) 7101da177e4SLinus Torvalds { 711cf533ea5SEric Dumazet const struct tcphdr *th = tcp_hdr(skb); 7121da177e4SLinus Torvalds struct { 7131da177e4SLinus Torvalds struct tcphdr th; 714714e85beSAl Viro __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2) 715cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 716cfb6eeb4SYOSHIFUJI Hideaki + (TCPOLEN_MD5SIG_ALIGNED >> 2) 717cfb6eeb4SYOSHIFUJI Hideaki #endif 718cfb6eeb4SYOSHIFUJI Hideaki ]; 7191da177e4SLinus Torvalds } rep; 7201da177e4SLinus Torvalds struct ip_reply_arg arg; 721adf30907SEric Dumazet struct net *net = dev_net(skb_dst(skb)->dev); 7221da177e4SLinus Torvalds 7231da177e4SLinus Torvalds memset(&rep.th, 0, sizeof(struct tcphdr)); 7247174259eSArnaldo Carvalho de Melo memset(&arg, 0, sizeof(arg)); 7251da177e4SLinus Torvalds 7261da177e4SLinus Torvalds arg.iov[0].iov_base = (unsigned char *)&rep; 7271da177e4SLinus Torvalds arg.iov[0].iov_len = sizeof(rep.th); 728ee684b6fSAndrey Vagin if (tsecr) { 729cfb6eeb4SYOSHIFUJI Hideaki rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 7301da177e4SLinus Torvalds (TCPOPT_TIMESTAMP << 8) | 7311da177e4SLinus Torvalds TCPOLEN_TIMESTAMP); 732ee684b6fSAndrey Vagin rep.opt[1] = htonl(tsval); 733ee684b6fSAndrey Vagin rep.opt[2] = htonl(tsecr); 734cb48cfe8SCraig Schlenter arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED; 7351da177e4SLinus Torvalds } 7361da177e4SLinus Torvalds 7371da177e4SLinus Torvalds /* Swap the send and the receive. */ 7381da177e4SLinus Torvalds rep.th.dest = th->source; 7391da177e4SLinus Torvalds rep.th.source = th->dest; 7401da177e4SLinus Torvalds rep.th.doff = arg.iov[0].iov_len / 4; 7411da177e4SLinus Torvalds rep.th.seq = htonl(seq); 7421da177e4SLinus Torvalds rep.th.ack_seq = htonl(ack); 7431da177e4SLinus Torvalds rep.th.ack = 1; 7441da177e4SLinus Torvalds rep.th.window = htons(win); 7451da177e4SLinus Torvalds 746cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 747cfb6eeb4SYOSHIFUJI Hideaki if (key) { 748ee684b6fSAndrey Vagin int offset = (tsecr) ? 3 : 0; 749cfb6eeb4SYOSHIFUJI Hideaki 750cfb6eeb4SYOSHIFUJI Hideaki rep.opt[offset++] = htonl((TCPOPT_NOP << 24) | 751cfb6eeb4SYOSHIFUJI Hideaki (TCPOPT_NOP << 16) | 752cfb6eeb4SYOSHIFUJI Hideaki (TCPOPT_MD5SIG << 8) | 753cfb6eeb4SYOSHIFUJI Hideaki TCPOLEN_MD5SIG); 754cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; 755cfb6eeb4SYOSHIFUJI Hideaki rep.th.doff = arg.iov[0].iov_len/4; 756cfb6eeb4SYOSHIFUJI Hideaki 75749a72dfbSAdam Langley tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset], 75890b7e112SAdam Langley key, ip_hdr(skb)->saddr, 75990b7e112SAdam Langley ip_hdr(skb)->daddr, &rep.th); 760cfb6eeb4SYOSHIFUJI Hideaki } 761cfb6eeb4SYOSHIFUJI Hideaki #endif 76288ef4a5aSKOVACS Krisztian arg.flags = reply_flags; 763eddc9ec5SArnaldo Carvalho de Melo arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, 764eddc9ec5SArnaldo Carvalho de Melo ip_hdr(skb)->saddr, /* XXX */ 7651da177e4SLinus Torvalds arg.iov[0].iov_len, IPPROTO_TCP, 0); 7661da177e4SLinus Torvalds arg.csumoffset = offsetof(struct tcphdr, check) / 2; 7679501f972SYOSHIFUJI Hideaki if (oif) 7689501f972SYOSHIFUJI Hideaki arg.bound_dev_if = oif; 76966b13d99SEric Dumazet arg.tos = tos; 77024a2d43dSEric Dumazet ip_send_unicast_reply(net, skb, &TCP_SKB_CB(skb)->header.h4.opt, 77124a2d43dSEric Dumazet ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, 77224a2d43dSEric Dumazet &arg, arg.iov[0].iov_len); 7731da177e4SLinus Torvalds 77463231bddSPavel Emelyanov TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 7751da177e4SLinus Torvalds } 7761da177e4SLinus Torvalds 7771da177e4SLinus Torvalds static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) 7781da177e4SLinus Torvalds { 7798feaf0c0SArnaldo Carvalho de Melo struct inet_timewait_sock *tw = inet_twsk(sk); 780cfb6eeb4SYOSHIFUJI Hideaki struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 7811da177e4SLinus Torvalds 7829501f972SYOSHIFUJI Hideaki tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 7837174259eSArnaldo Carvalho de Melo tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 784ee684b6fSAndrey Vagin tcp_time_stamp + tcptw->tw_ts_offset, 7859501f972SYOSHIFUJI Hideaki tcptw->tw_ts_recent, 7869501f972SYOSHIFUJI Hideaki tw->tw_bound_dev_if, 78788ef4a5aSKOVACS Krisztian tcp_twsk_md5_key(tcptw), 78866b13d99SEric Dumazet tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0, 78966b13d99SEric Dumazet tw->tw_tos 7909501f972SYOSHIFUJI Hideaki ); 7911da177e4SLinus Torvalds 7928feaf0c0SArnaldo Carvalho de Melo inet_twsk_put(tw); 7931da177e4SLinus Torvalds } 7941da177e4SLinus Torvalds 7956edafaafSGui Jianfeng static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, 7967174259eSArnaldo Carvalho de Melo struct request_sock *req) 7971da177e4SLinus Torvalds { 798168a8f58SJerry Chu /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 799168a8f58SJerry Chu * sk->sk_state == TCP_SYN_RECV -> for Fast Open. 800168a8f58SJerry Chu */ 801168a8f58SJerry Chu tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ? 802168a8f58SJerry Chu tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 803168a8f58SJerry Chu tcp_rsk(req)->rcv_nxt, req->rcv_wnd, 804ee684b6fSAndrey Vagin tcp_time_stamp, 8059501f972SYOSHIFUJI Hideaki req->ts_recent, 8069501f972SYOSHIFUJI Hideaki 0, 807a915da9bSEric Dumazet tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, 808a915da9bSEric Dumazet AF_INET), 80966b13d99SEric Dumazet inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, 81066b13d99SEric Dumazet ip_hdr(skb)->tos); 8111da177e4SLinus Torvalds } 8121da177e4SLinus Torvalds 8131da177e4SLinus Torvalds /* 8149bf1d83eSKris Katterjohn * Send a SYN-ACK after having received a SYN. 81560236fddSArnaldo Carvalho de Melo * This still operates on a request_sock only, not on a big 8161da177e4SLinus Torvalds * socket. 8171da177e4SLinus Torvalds */ 81872659eccSOctavian Purdila static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, 819d6274bd8SOctavian Purdila struct flowi *fl, 820e6b4d113SWilliam Allen Simpson struct request_sock *req, 821843f4a55SYuchung Cheng u16 queue_mapping, 822843f4a55SYuchung Cheng struct tcp_fastopen_cookie *foc) 8231da177e4SLinus Torvalds { 8242e6599cbSArnaldo Carvalho de Melo const struct inet_request_sock *ireq = inet_rsk(req); 8256bd023f3SDavid S. Miller struct flowi4 fl4; 8261da177e4SLinus Torvalds int err = -1; 8271da177e4SLinus Torvalds struct sk_buff *skb; 8281da177e4SLinus Torvalds 8291da177e4SLinus Torvalds /* First, grab a route. */ 830ba3f7f04SDavid S. Miller if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) 831fd80eb94SDenis V. Lunev return -1; 8321da177e4SLinus Torvalds 833843f4a55SYuchung Cheng skb = tcp_make_synack(sk, dst, req, foc); 8341da177e4SLinus Torvalds 8351da177e4SLinus Torvalds if (skb) { 836634fb979SEric Dumazet __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); 8371da177e4SLinus Torvalds 838fff32699SEric Dumazet skb_set_queue_mapping(skb, queue_mapping); 839634fb979SEric Dumazet err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, 840634fb979SEric Dumazet ireq->ir_rmt_addr, 8412e6599cbSArnaldo Carvalho de Melo ireq->opt); 842b9df3cb8SGerrit Renker err = net_xmit_eval(err); 8431da177e4SLinus Torvalds } 8441da177e4SLinus Torvalds 8451da177e4SLinus Torvalds return err; 8461da177e4SLinus Torvalds } 8471da177e4SLinus Torvalds 8481da177e4SLinus Torvalds /* 84960236fddSArnaldo Carvalho de Melo * IPv4 request_sock destructor. 8501da177e4SLinus Torvalds */ 85160236fddSArnaldo Carvalho de Melo static void tcp_v4_reqsk_destructor(struct request_sock *req) 8521da177e4SLinus Torvalds { 8532e6599cbSArnaldo Carvalho de Melo kfree(inet_rsk(req)->opt); 8541da177e4SLinus Torvalds } 8551da177e4SLinus Torvalds 856946cedccSEric Dumazet /* 857a2a385d6SEric Dumazet * Return true if a syncookie should be sent 858946cedccSEric Dumazet */ 859a2a385d6SEric Dumazet bool tcp_syn_flood_action(struct sock *sk, 860946cedccSEric Dumazet const struct sk_buff *skb, 861946cedccSEric Dumazet const char *proto) 8621da177e4SLinus Torvalds { 863946cedccSEric Dumazet const char *msg = "Dropping request"; 864a2a385d6SEric Dumazet bool want_cookie = false; 865946cedccSEric Dumazet struct listen_sock *lopt; 866946cedccSEric Dumazet 8672a1d4bd4SFlorian Westphal #ifdef CONFIG_SYN_COOKIES 868946cedccSEric Dumazet if (sysctl_tcp_syncookies) { 8692a1d4bd4SFlorian Westphal msg = "Sending cookies"; 870a2a385d6SEric Dumazet want_cookie = true; 871946cedccSEric Dumazet NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); 872946cedccSEric Dumazet } else 87380e40daaSArnaldo Carvalho de Melo #endif 874946cedccSEric Dumazet NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); 8752a1d4bd4SFlorian Westphal 876946cedccSEric Dumazet lopt = inet_csk(sk)->icsk_accept_queue.listen_opt; 8775ad37d5dSHannes Frederic Sowa if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) { 878946cedccSEric Dumazet lopt->synflood_warned = 1; 879afd46503SJoe Perches pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", 880946cedccSEric Dumazet proto, ntohs(tcp_hdr(skb)->dest), msg); 8812a1d4bd4SFlorian Westphal } 882946cedccSEric Dumazet return want_cookie; 883946cedccSEric Dumazet } 884946cedccSEric Dumazet EXPORT_SYMBOL(tcp_syn_flood_action); 8851da177e4SLinus Torvalds 886cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 887cfb6eeb4SYOSHIFUJI Hideaki /* 888cfb6eeb4SYOSHIFUJI Hideaki * RFC2385 MD5 checksumming requires a mapping of 889cfb6eeb4SYOSHIFUJI Hideaki * IP address->MD5 Key. 890cfb6eeb4SYOSHIFUJI Hideaki * We need to maintain these in the sk structure. 891cfb6eeb4SYOSHIFUJI Hideaki */ 892cfb6eeb4SYOSHIFUJI Hideaki 893cfb6eeb4SYOSHIFUJI Hideaki /* Find the Key structure for an address. */ 894a915da9bSEric Dumazet struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, 895a915da9bSEric Dumazet const union tcp_md5_addr *addr, 896a915da9bSEric Dumazet int family) 897cfb6eeb4SYOSHIFUJI Hideaki { 898cfb6eeb4SYOSHIFUJI Hideaki struct tcp_sock *tp = tcp_sk(sk); 899a915da9bSEric Dumazet struct tcp_md5sig_key *key; 900a915da9bSEric Dumazet unsigned int size = sizeof(struct in_addr); 901a8afca03SEric Dumazet struct tcp_md5sig_info *md5sig; 902cfb6eeb4SYOSHIFUJI Hideaki 903a8afca03SEric Dumazet /* caller either holds rcu_read_lock() or socket lock */ 904a8afca03SEric Dumazet md5sig = rcu_dereference_check(tp->md5sig_info, 905b4fb05eaSEric Dumazet sock_owned_by_user(sk) || 906b4fb05eaSEric Dumazet lockdep_is_held(&sk->sk_lock.slock)); 907a8afca03SEric Dumazet if (!md5sig) 908cfb6eeb4SYOSHIFUJI Hideaki return NULL; 909a915da9bSEric Dumazet #if IS_ENABLED(CONFIG_IPV6) 910a915da9bSEric Dumazet if (family == AF_INET6) 911a915da9bSEric Dumazet size = sizeof(struct in6_addr); 912a915da9bSEric Dumazet #endif 913b67bfe0dSSasha Levin hlist_for_each_entry_rcu(key, &md5sig->head, node) { 914a915da9bSEric Dumazet if (key->family != family) 915a915da9bSEric Dumazet continue; 916a915da9bSEric Dumazet if (!memcmp(&key->addr, addr, size)) 917a915da9bSEric Dumazet return key; 918cfb6eeb4SYOSHIFUJI Hideaki } 919cfb6eeb4SYOSHIFUJI Hideaki return NULL; 920cfb6eeb4SYOSHIFUJI Hideaki } 921a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_lookup); 922cfb6eeb4SYOSHIFUJI Hideaki 923cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, 924cfb6eeb4SYOSHIFUJI Hideaki struct sock *addr_sk) 925cfb6eeb4SYOSHIFUJI Hideaki { 926a915da9bSEric Dumazet union tcp_md5_addr *addr; 927a915da9bSEric Dumazet 928a915da9bSEric Dumazet addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr; 929a915da9bSEric Dumazet return tcp_md5_do_lookup(sk, addr, AF_INET); 930cfb6eeb4SYOSHIFUJI Hideaki } 931cfb6eeb4SYOSHIFUJI Hideaki EXPORT_SYMBOL(tcp_v4_md5_lookup); 932cfb6eeb4SYOSHIFUJI Hideaki 933f5b99bcdSAdrian Bunk static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk, 934cfb6eeb4SYOSHIFUJI Hideaki struct request_sock *req) 935cfb6eeb4SYOSHIFUJI Hideaki { 936a915da9bSEric Dumazet union tcp_md5_addr *addr; 937a915da9bSEric Dumazet 938634fb979SEric Dumazet addr = (union tcp_md5_addr *)&inet_rsk(req)->ir_rmt_addr; 939a915da9bSEric Dumazet return tcp_md5_do_lookup(sk, addr, AF_INET); 940cfb6eeb4SYOSHIFUJI Hideaki } 941cfb6eeb4SYOSHIFUJI Hideaki 942cfb6eeb4SYOSHIFUJI Hideaki /* This can be called on a newly created socket, from other files */ 943a915da9bSEric Dumazet int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, 944a915da9bSEric Dumazet int family, const u8 *newkey, u8 newkeylen, gfp_t gfp) 945cfb6eeb4SYOSHIFUJI Hideaki { 946cfb6eeb4SYOSHIFUJI Hideaki /* Add Key to the list */ 947b0a713e9SMatthias M. Dellweg struct tcp_md5sig_key *key; 948cfb6eeb4SYOSHIFUJI Hideaki struct tcp_sock *tp = tcp_sk(sk); 949f6685938SArnaldo Carvalho de Melo struct tcp_md5sig_info *md5sig; 950f6685938SArnaldo Carvalho de Melo 951c0353c7bSAydin Arik key = tcp_md5_do_lookup(sk, addr, family); 952a915da9bSEric Dumazet if (key) { 953a915da9bSEric Dumazet /* Pre-existing entry - just update that one. */ 954a915da9bSEric Dumazet memcpy(key->key, newkey, newkeylen); 955a915da9bSEric Dumazet key->keylen = newkeylen; 956a915da9bSEric Dumazet return 0; 957cfb6eeb4SYOSHIFUJI Hideaki } 958260fcbebSYan, Zheng 959a8afca03SEric Dumazet md5sig = rcu_dereference_protected(tp->md5sig_info, 960a8afca03SEric Dumazet sock_owned_by_user(sk)); 961a915da9bSEric Dumazet if (!md5sig) { 962a915da9bSEric Dumazet md5sig = kmalloc(sizeof(*md5sig), gfp); 963a915da9bSEric Dumazet if (!md5sig) 964a915da9bSEric Dumazet return -ENOMEM; 965a915da9bSEric Dumazet 966a915da9bSEric Dumazet sk_nocaps_add(sk, NETIF_F_GSO_MASK); 967a915da9bSEric Dumazet INIT_HLIST_HEAD(&md5sig->head); 968a8afca03SEric Dumazet rcu_assign_pointer(tp->md5sig_info, md5sig); 969a915da9bSEric Dumazet } 970a915da9bSEric Dumazet 9715f3d9cb2SEric Dumazet key = sock_kmalloc(sk, sizeof(*key), gfp); 972a915da9bSEric Dumazet if (!key) 973a915da9bSEric Dumazet return -ENOMEM; 97471cea17eSEric Dumazet if (!tcp_alloc_md5sig_pool()) { 9755f3d9cb2SEric Dumazet sock_kfree_s(sk, key, sizeof(*key)); 976cfb6eeb4SYOSHIFUJI Hideaki return -ENOMEM; 977cfb6eeb4SYOSHIFUJI Hideaki } 978f6685938SArnaldo Carvalho de Melo 979a915da9bSEric Dumazet memcpy(key->key, newkey, newkeylen); 980a915da9bSEric Dumazet key->keylen = newkeylen; 981a915da9bSEric Dumazet key->family = family; 982a915da9bSEric Dumazet memcpy(&key->addr, addr, 983a915da9bSEric Dumazet (family == AF_INET6) ? sizeof(struct in6_addr) : 984a915da9bSEric Dumazet sizeof(struct in_addr)); 985a915da9bSEric Dumazet hlist_add_head_rcu(&key->node, &md5sig->head); 986cfb6eeb4SYOSHIFUJI Hideaki return 0; 987cfb6eeb4SYOSHIFUJI Hideaki } 988a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_add); 989cfb6eeb4SYOSHIFUJI Hideaki 990a915da9bSEric Dumazet int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) 991cfb6eeb4SYOSHIFUJI Hideaki { 992a915da9bSEric Dumazet struct tcp_md5sig_key *key; 993cfb6eeb4SYOSHIFUJI Hideaki 994c0353c7bSAydin Arik key = tcp_md5_do_lookup(sk, addr, family); 995a915da9bSEric Dumazet if (!key) 996cfb6eeb4SYOSHIFUJI Hideaki return -ENOENT; 997a915da9bSEric Dumazet hlist_del_rcu(&key->node); 9985f3d9cb2SEric Dumazet atomic_sub(sizeof(*key), &sk->sk_omem_alloc); 999a915da9bSEric Dumazet kfree_rcu(key, rcu); 1000a915da9bSEric Dumazet return 0; 1001cfb6eeb4SYOSHIFUJI Hideaki } 1002a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_del); 1003cfb6eeb4SYOSHIFUJI Hideaki 1004e0683e70Sstephen hemminger static void tcp_clear_md5_list(struct sock *sk) 1005cfb6eeb4SYOSHIFUJI Hideaki { 1006cfb6eeb4SYOSHIFUJI Hideaki struct tcp_sock *tp = tcp_sk(sk); 1007a915da9bSEric Dumazet struct tcp_md5sig_key *key; 1008b67bfe0dSSasha Levin struct hlist_node *n; 1009a8afca03SEric Dumazet struct tcp_md5sig_info *md5sig; 1010cfb6eeb4SYOSHIFUJI Hideaki 1011a8afca03SEric Dumazet md5sig = rcu_dereference_protected(tp->md5sig_info, 1); 1012a8afca03SEric Dumazet 1013b67bfe0dSSasha Levin hlist_for_each_entry_safe(key, n, &md5sig->head, node) { 1014a915da9bSEric Dumazet hlist_del_rcu(&key->node); 10155f3d9cb2SEric Dumazet atomic_sub(sizeof(*key), &sk->sk_omem_alloc); 1016a915da9bSEric Dumazet kfree_rcu(key, rcu); 1017cfb6eeb4SYOSHIFUJI Hideaki } 1018cfb6eeb4SYOSHIFUJI Hideaki } 1019cfb6eeb4SYOSHIFUJI Hideaki 1020cfb6eeb4SYOSHIFUJI Hideaki static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, 1021cfb6eeb4SYOSHIFUJI Hideaki int optlen) 1022cfb6eeb4SYOSHIFUJI Hideaki { 1023cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig cmd; 1024cfb6eeb4SYOSHIFUJI Hideaki struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr; 1025cfb6eeb4SYOSHIFUJI Hideaki 1026cfb6eeb4SYOSHIFUJI Hideaki if (optlen < sizeof(cmd)) 1027cfb6eeb4SYOSHIFUJI Hideaki return -EINVAL; 1028cfb6eeb4SYOSHIFUJI Hideaki 1029cfb6eeb4SYOSHIFUJI Hideaki if (copy_from_user(&cmd, optval, sizeof(cmd))) 1030cfb6eeb4SYOSHIFUJI Hideaki return -EFAULT; 1031cfb6eeb4SYOSHIFUJI Hideaki 1032cfb6eeb4SYOSHIFUJI Hideaki if (sin->sin_family != AF_INET) 1033cfb6eeb4SYOSHIFUJI Hideaki return -EINVAL; 1034cfb6eeb4SYOSHIFUJI Hideaki 103564a124edSDmitry Popov if (!cmd.tcpm_keylen) 1036a915da9bSEric Dumazet return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, 1037a915da9bSEric Dumazet AF_INET); 1038cfb6eeb4SYOSHIFUJI Hideaki 1039cfb6eeb4SYOSHIFUJI Hideaki if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 1040cfb6eeb4SYOSHIFUJI Hideaki return -EINVAL; 1041cfb6eeb4SYOSHIFUJI Hideaki 1042a915da9bSEric Dumazet return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, 1043a915da9bSEric Dumazet AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, 1044a915da9bSEric Dumazet GFP_KERNEL); 1045cfb6eeb4SYOSHIFUJI Hideaki } 1046cfb6eeb4SYOSHIFUJI Hideaki 104749a72dfbSAdam Langley static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, 104849a72dfbSAdam Langley __be32 daddr, __be32 saddr, int nbytes) 1049cfb6eeb4SYOSHIFUJI Hideaki { 1050cfb6eeb4SYOSHIFUJI Hideaki struct tcp4_pseudohdr *bp; 105149a72dfbSAdam Langley struct scatterlist sg; 1052cfb6eeb4SYOSHIFUJI Hideaki 1053cfb6eeb4SYOSHIFUJI Hideaki bp = &hp->md5_blk.ip4; 1054cfb6eeb4SYOSHIFUJI Hideaki 1055cfb6eeb4SYOSHIFUJI Hideaki /* 105649a72dfbSAdam Langley * 1. the TCP pseudo-header (in the order: source IP address, 1057cfb6eeb4SYOSHIFUJI Hideaki * destination IP address, zero-padded protocol number, and 1058cfb6eeb4SYOSHIFUJI Hideaki * segment length) 1059cfb6eeb4SYOSHIFUJI Hideaki */ 1060cfb6eeb4SYOSHIFUJI Hideaki bp->saddr = saddr; 1061cfb6eeb4SYOSHIFUJI Hideaki bp->daddr = daddr; 1062cfb6eeb4SYOSHIFUJI Hideaki bp->pad = 0; 1063076fb722SYOSHIFUJI Hideaki bp->protocol = IPPROTO_TCP; 106449a72dfbSAdam Langley bp->len = cpu_to_be16(nbytes); 1065c7da57a1SDavid S. Miller 106649a72dfbSAdam Langley sg_init_one(&sg, bp, sizeof(*bp)); 106749a72dfbSAdam Langley return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp)); 106849a72dfbSAdam Langley } 106949a72dfbSAdam Langley 1070a915da9bSEric Dumazet static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 1071318cf7aaSEric Dumazet __be32 daddr, __be32 saddr, const struct tcphdr *th) 107249a72dfbSAdam Langley { 107349a72dfbSAdam Langley struct tcp_md5sig_pool *hp; 107449a72dfbSAdam Langley struct hash_desc *desc; 107549a72dfbSAdam Langley 107649a72dfbSAdam Langley hp = tcp_get_md5sig_pool(); 107749a72dfbSAdam Langley if (!hp) 107849a72dfbSAdam Langley goto clear_hash_noput; 107949a72dfbSAdam Langley desc = &hp->md5_desc; 108049a72dfbSAdam Langley 108149a72dfbSAdam Langley if (crypto_hash_init(desc)) 108249a72dfbSAdam Langley goto clear_hash; 108349a72dfbSAdam Langley if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2)) 108449a72dfbSAdam Langley goto clear_hash; 108549a72dfbSAdam Langley if (tcp_md5_hash_header(hp, th)) 108649a72dfbSAdam Langley goto clear_hash; 108749a72dfbSAdam Langley if (tcp_md5_hash_key(hp, key)) 108849a72dfbSAdam Langley goto clear_hash; 108949a72dfbSAdam Langley if (crypto_hash_final(desc, md5_hash)) 1090cfb6eeb4SYOSHIFUJI Hideaki goto clear_hash; 1091cfb6eeb4SYOSHIFUJI Hideaki 1092cfb6eeb4SYOSHIFUJI Hideaki tcp_put_md5sig_pool(); 1093cfb6eeb4SYOSHIFUJI Hideaki return 0; 109449a72dfbSAdam Langley 1095cfb6eeb4SYOSHIFUJI Hideaki clear_hash: 1096cfb6eeb4SYOSHIFUJI Hideaki tcp_put_md5sig_pool(); 1097cfb6eeb4SYOSHIFUJI Hideaki clear_hash_noput: 1098cfb6eeb4SYOSHIFUJI Hideaki memset(md5_hash, 0, 16); 109949a72dfbSAdam Langley return 1; 1100cfb6eeb4SYOSHIFUJI Hideaki } 1101cfb6eeb4SYOSHIFUJI Hideaki 110249a72dfbSAdam Langley int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, 1103318cf7aaSEric Dumazet const struct sock *sk, const struct request_sock *req, 1104318cf7aaSEric Dumazet const struct sk_buff *skb) 1105cfb6eeb4SYOSHIFUJI Hideaki { 110649a72dfbSAdam Langley struct tcp_md5sig_pool *hp; 110749a72dfbSAdam Langley struct hash_desc *desc; 1108318cf7aaSEric Dumazet const struct tcphdr *th = tcp_hdr(skb); 1109cfb6eeb4SYOSHIFUJI Hideaki __be32 saddr, daddr; 1110cfb6eeb4SYOSHIFUJI Hideaki 1111cfb6eeb4SYOSHIFUJI Hideaki if (sk) { 1112c720c7e8SEric Dumazet saddr = inet_sk(sk)->inet_saddr; 1113c720c7e8SEric Dumazet daddr = inet_sk(sk)->inet_daddr; 111449a72dfbSAdam Langley } else if (req) { 1115634fb979SEric Dumazet saddr = inet_rsk(req)->ir_loc_addr; 1116634fb979SEric Dumazet daddr = inet_rsk(req)->ir_rmt_addr; 1117cfb6eeb4SYOSHIFUJI Hideaki } else { 111849a72dfbSAdam Langley const struct iphdr *iph = ip_hdr(skb); 111949a72dfbSAdam Langley saddr = iph->saddr; 112049a72dfbSAdam Langley daddr = iph->daddr; 1121cfb6eeb4SYOSHIFUJI Hideaki } 1122cfb6eeb4SYOSHIFUJI Hideaki 112349a72dfbSAdam Langley hp = tcp_get_md5sig_pool(); 112449a72dfbSAdam Langley if (!hp) 112549a72dfbSAdam Langley goto clear_hash_noput; 112649a72dfbSAdam Langley desc = &hp->md5_desc; 112749a72dfbSAdam Langley 112849a72dfbSAdam Langley if (crypto_hash_init(desc)) 112949a72dfbSAdam Langley goto clear_hash; 113049a72dfbSAdam Langley 113149a72dfbSAdam Langley if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len)) 113249a72dfbSAdam Langley goto clear_hash; 113349a72dfbSAdam Langley if (tcp_md5_hash_header(hp, th)) 113449a72dfbSAdam Langley goto clear_hash; 113549a72dfbSAdam Langley if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) 113649a72dfbSAdam Langley goto clear_hash; 113749a72dfbSAdam Langley if (tcp_md5_hash_key(hp, key)) 113849a72dfbSAdam Langley goto clear_hash; 113949a72dfbSAdam Langley if (crypto_hash_final(desc, md5_hash)) 114049a72dfbSAdam Langley goto clear_hash; 114149a72dfbSAdam Langley 114249a72dfbSAdam Langley tcp_put_md5sig_pool(); 114349a72dfbSAdam Langley return 0; 114449a72dfbSAdam Langley 114549a72dfbSAdam Langley clear_hash: 114649a72dfbSAdam Langley tcp_put_md5sig_pool(); 114749a72dfbSAdam Langley clear_hash_noput: 114849a72dfbSAdam Langley memset(md5_hash, 0, 16); 114949a72dfbSAdam Langley return 1; 115049a72dfbSAdam Langley } 115149a72dfbSAdam Langley EXPORT_SYMBOL(tcp_v4_md5_hash_skb); 1152cfb6eeb4SYOSHIFUJI Hideaki 11539ea88a15SDmitry Popov static bool __tcp_v4_inbound_md5_hash(struct sock *sk, 11549ea88a15SDmitry Popov const struct sk_buff *skb) 1155cfb6eeb4SYOSHIFUJI Hideaki { 1156cfb6eeb4SYOSHIFUJI Hideaki /* 1157cfb6eeb4SYOSHIFUJI Hideaki * This gets called for each TCP segment that arrives 1158cfb6eeb4SYOSHIFUJI Hideaki * so we want to be efficient. 1159cfb6eeb4SYOSHIFUJI Hideaki * We have 3 drop cases: 1160cfb6eeb4SYOSHIFUJI Hideaki * o No MD5 hash and one expected. 1161cfb6eeb4SYOSHIFUJI Hideaki * o MD5 hash and we're not expecting one. 1162cfb6eeb4SYOSHIFUJI Hideaki * o MD5 hash and its wrong. 1163cfb6eeb4SYOSHIFUJI Hideaki */ 1164cf533ea5SEric Dumazet const __u8 *hash_location = NULL; 1165cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *hash_expected; 1166eddc9ec5SArnaldo Carvalho de Melo const struct iphdr *iph = ip_hdr(skb); 1167cf533ea5SEric Dumazet const struct tcphdr *th = tcp_hdr(skb); 1168cfb6eeb4SYOSHIFUJI Hideaki int genhash; 1169cfb6eeb4SYOSHIFUJI Hideaki unsigned char newhash[16]; 1170cfb6eeb4SYOSHIFUJI Hideaki 1171a915da9bSEric Dumazet hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr, 1172a915da9bSEric Dumazet AF_INET); 11737d5d5525SYOSHIFUJI Hideaki hash_location = tcp_parse_md5sig_option(th); 1174cfb6eeb4SYOSHIFUJI Hideaki 1175cfb6eeb4SYOSHIFUJI Hideaki /* We've parsed the options - do we have a hash? */ 1176cfb6eeb4SYOSHIFUJI Hideaki if (!hash_expected && !hash_location) 1177a2a385d6SEric Dumazet return false; 1178cfb6eeb4SYOSHIFUJI Hideaki 1179cfb6eeb4SYOSHIFUJI Hideaki if (hash_expected && !hash_location) { 1180785957d3SDavid S. Miller NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); 1181a2a385d6SEric Dumazet return true; 1182cfb6eeb4SYOSHIFUJI Hideaki } 1183cfb6eeb4SYOSHIFUJI Hideaki 1184cfb6eeb4SYOSHIFUJI Hideaki if (!hash_expected && hash_location) { 1185785957d3SDavid S. Miller NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); 1186a2a385d6SEric Dumazet return true; 1187cfb6eeb4SYOSHIFUJI Hideaki } 1188cfb6eeb4SYOSHIFUJI Hideaki 1189cfb6eeb4SYOSHIFUJI Hideaki /* Okay, so this is hash_expected and hash_location - 1190cfb6eeb4SYOSHIFUJI Hideaki * so we need to calculate the checksum. 1191cfb6eeb4SYOSHIFUJI Hideaki */ 119249a72dfbSAdam Langley genhash = tcp_v4_md5_hash_skb(newhash, 1193cfb6eeb4SYOSHIFUJI Hideaki hash_expected, 119449a72dfbSAdam Langley NULL, NULL, skb); 1195cfb6eeb4SYOSHIFUJI Hideaki 1196cfb6eeb4SYOSHIFUJI Hideaki if (genhash || memcmp(hash_location, newhash, 16) != 0) { 1197e87cc472SJoe Perches net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n", 1198673d57e7SHarvey Harrison &iph->saddr, ntohs(th->source), 1199673d57e7SHarvey Harrison &iph->daddr, ntohs(th->dest), 1200e87cc472SJoe Perches genhash ? " tcp_v4_calc_md5_hash failed" 1201e87cc472SJoe Perches : ""); 1202a2a385d6SEric Dumazet return true; 1203cfb6eeb4SYOSHIFUJI Hideaki } 1204a2a385d6SEric Dumazet return false; 1205cfb6eeb4SYOSHIFUJI Hideaki } 1206cfb6eeb4SYOSHIFUJI Hideaki 12079ea88a15SDmitry Popov static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) 12089ea88a15SDmitry Popov { 12099ea88a15SDmitry Popov bool ret; 12109ea88a15SDmitry Popov 12119ea88a15SDmitry Popov rcu_read_lock(); 12129ea88a15SDmitry Popov ret = __tcp_v4_inbound_md5_hash(sk, skb); 12139ea88a15SDmitry Popov rcu_read_unlock(); 12149ea88a15SDmitry Popov 12159ea88a15SDmitry Popov return ret; 12169ea88a15SDmitry Popov } 12179ea88a15SDmitry Popov 1218cfb6eeb4SYOSHIFUJI Hideaki #endif 1219cfb6eeb4SYOSHIFUJI Hideaki 122016bea70aSOctavian Purdila static void tcp_v4_init_req(struct request_sock *req, struct sock *sk, 122116bea70aSOctavian Purdila struct sk_buff *skb) 122216bea70aSOctavian Purdila { 122316bea70aSOctavian Purdila struct inet_request_sock *ireq = inet_rsk(req); 122416bea70aSOctavian Purdila 122516bea70aSOctavian Purdila ireq->ir_loc_addr = ip_hdr(skb)->daddr; 122616bea70aSOctavian Purdila ireq->ir_rmt_addr = ip_hdr(skb)->saddr; 122716bea70aSOctavian Purdila ireq->no_srccheck = inet_sk(sk)->transparent; 122816bea70aSOctavian Purdila ireq->opt = tcp_v4_save_options(skb); 122916bea70aSOctavian Purdila } 123016bea70aSOctavian Purdila 1231d94e0417SOctavian Purdila static struct dst_entry *tcp_v4_route_req(struct sock *sk, struct flowi *fl, 1232d94e0417SOctavian Purdila const struct request_sock *req, 1233d94e0417SOctavian Purdila bool *strict) 1234d94e0417SOctavian Purdila { 1235d94e0417SOctavian Purdila struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req); 1236d94e0417SOctavian Purdila 1237d94e0417SOctavian Purdila if (strict) { 1238d94e0417SOctavian Purdila if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr) 1239d94e0417SOctavian Purdila *strict = true; 1240d94e0417SOctavian Purdila else 1241d94e0417SOctavian Purdila *strict = false; 1242d94e0417SOctavian Purdila } 1243d94e0417SOctavian Purdila 1244d94e0417SOctavian Purdila return dst; 1245d94e0417SOctavian Purdila } 1246d94e0417SOctavian Purdila 124772a3effaSEric Dumazet struct request_sock_ops tcp_request_sock_ops __read_mostly = { 12481da177e4SLinus Torvalds .family = PF_INET, 12492e6599cbSArnaldo Carvalho de Melo .obj_size = sizeof(struct tcp_request_sock), 12505db92c99SOctavian Purdila .rtx_syn_ack = tcp_rtx_synack, 125160236fddSArnaldo Carvalho de Melo .send_ack = tcp_v4_reqsk_send_ack, 125260236fddSArnaldo Carvalho de Melo .destructor = tcp_v4_reqsk_destructor, 12531da177e4SLinus Torvalds .send_reset = tcp_v4_send_reset, 125472659eccSOctavian Purdila .syn_ack_timeout = tcp_syn_ack_timeout, 12551da177e4SLinus Torvalds }; 12561da177e4SLinus Torvalds 1257b2e4b3deSStephen Hemminger static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { 12582aec4a29SOctavian Purdila .mss_clamp = TCP_MSS_DEFAULT, 125916bea70aSOctavian Purdila #ifdef CONFIG_TCP_MD5SIG 1260cfb6eeb4SYOSHIFUJI Hideaki .md5_lookup = tcp_v4_reqsk_md5_lookup, 1261e3afe7b7SJohn Dykstra .calc_md5_hash = tcp_v4_md5_hash_skb, 1262b6332e6cSAndrew Morton #endif 126316bea70aSOctavian Purdila .init_req = tcp_v4_init_req, 1264fb7b37a7SOctavian Purdila #ifdef CONFIG_SYN_COOKIES 1265fb7b37a7SOctavian Purdila .cookie_init_seq = cookie_v4_init_sequence, 1266fb7b37a7SOctavian Purdila #endif 1267d94e0417SOctavian Purdila .route_req = tcp_v4_route_req, 1268936b8bdbSOctavian Purdila .init_seq = tcp_v4_init_sequence, 1269d6274bd8SOctavian Purdila .send_synack = tcp_v4_send_synack, 1270695da14eSOctavian Purdila .queue_hash_add = inet_csk_reqsk_queue_hash_add, 127116bea70aSOctavian Purdila }; 1272cfb6eeb4SYOSHIFUJI Hideaki 12731da177e4SLinus Torvalds int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 12741da177e4SLinus Torvalds { 12751da177e4SLinus Torvalds /* Never answer to SYNs send to broadcast or multicast */ 1276511c3f92SEric Dumazet if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) 12771da177e4SLinus Torvalds goto drop; 12781da177e4SLinus Torvalds 12791fb6f159SOctavian Purdila return tcp_conn_request(&tcp_request_sock_ops, 12801fb6f159SOctavian Purdila &tcp_request_sock_ipv4_ops, sk, skb); 12811da177e4SLinus Torvalds 12821da177e4SLinus Torvalds drop: 1283848bf15fSVijay Subramanian NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 12841da177e4SLinus Torvalds return 0; 12851da177e4SLinus Torvalds } 12864bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_conn_request); 12871da177e4SLinus Torvalds 12881da177e4SLinus Torvalds 12891da177e4SLinus Torvalds /* 12901da177e4SLinus Torvalds * The three way handshake has completed - we got a valid synack - 12911da177e4SLinus Torvalds * now create the new socket. 12921da177e4SLinus Torvalds */ 12931da177e4SLinus Torvalds struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, 129460236fddSArnaldo Carvalho de Melo struct request_sock *req, 12951da177e4SLinus Torvalds struct dst_entry *dst) 12961da177e4SLinus Torvalds { 12972e6599cbSArnaldo Carvalho de Melo struct inet_request_sock *ireq; 12981da177e4SLinus Torvalds struct inet_sock *newinet; 12991da177e4SLinus Torvalds struct tcp_sock *newtp; 13001da177e4SLinus Torvalds struct sock *newsk; 1301cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1302cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *key; 1303cfb6eeb4SYOSHIFUJI Hideaki #endif 1304f6d8bd05SEric Dumazet struct ip_options_rcu *inet_opt; 13051da177e4SLinus Torvalds 13061da177e4SLinus Torvalds if (sk_acceptq_is_full(sk)) 13071da177e4SLinus Torvalds goto exit_overflow; 13081da177e4SLinus Torvalds 13091da177e4SLinus Torvalds newsk = tcp_create_openreq_child(sk, req, skb); 13101da177e4SLinus Torvalds if (!newsk) 1311093d2823SBalazs Scheidler goto exit_nonewsk; 13121da177e4SLinus Torvalds 1313bcd76111SHerbert Xu newsk->sk_gso_type = SKB_GSO_TCPV4; 1314fae6ef87SNeal Cardwell inet_sk_rx_dst_set(newsk, skb); 13151da177e4SLinus Torvalds 13161da177e4SLinus Torvalds newtp = tcp_sk(newsk); 13171da177e4SLinus Torvalds newinet = inet_sk(newsk); 13182e6599cbSArnaldo Carvalho de Melo ireq = inet_rsk(req); 1319634fb979SEric Dumazet newinet->inet_daddr = ireq->ir_rmt_addr; 1320634fb979SEric Dumazet newinet->inet_rcv_saddr = ireq->ir_loc_addr; 1321634fb979SEric Dumazet newinet->inet_saddr = ireq->ir_loc_addr; 1322f6d8bd05SEric Dumazet inet_opt = ireq->opt; 1323f6d8bd05SEric Dumazet rcu_assign_pointer(newinet->inet_opt, inet_opt); 13242e6599cbSArnaldo Carvalho de Melo ireq->opt = NULL; 1325463c84b9SArnaldo Carvalho de Melo newinet->mc_index = inet_iif(skb); 1326eddc9ec5SArnaldo Carvalho de Melo newinet->mc_ttl = ip_hdr(skb)->ttl; 13274c507d28SJiri Benc newinet->rcv_tos = ip_hdr(skb)->tos; 1328d83d8461SArnaldo Carvalho de Melo inet_csk(newsk)->icsk_ext_hdr_len = 0; 1329b73c3d0eSTom Herbert inet_set_txhash(newsk); 1330f6d8bd05SEric Dumazet if (inet_opt) 1331f6d8bd05SEric Dumazet inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; 1332c720c7e8SEric Dumazet newinet->inet_id = newtp->write_seq ^ jiffies; 13331da177e4SLinus Torvalds 1334dfd25fffSEric Dumazet if (!dst) { 1335dfd25fffSEric Dumazet dst = inet_csk_route_child_sock(sk, newsk, req); 1336dfd25fffSEric Dumazet if (!dst) 13370e734419SDavid S. Miller goto put_and_exit; 1338dfd25fffSEric Dumazet } else { 1339dfd25fffSEric Dumazet /* syncookie case : see end of cookie_v4_check() */ 1340dfd25fffSEric Dumazet } 13410e734419SDavid S. Miller sk_setup_caps(newsk, dst); 13420e734419SDavid S. Miller 13431da177e4SLinus Torvalds tcp_sync_mss(newsk, dst_mtu(dst)); 13440dbaee3bSDavid S. Miller newtp->advmss = dst_metric_advmss(dst); 1345f5fff5dcSTom Quetchenbach if (tcp_sk(sk)->rx_opt.user_mss && 1346f5fff5dcSTom Quetchenbach tcp_sk(sk)->rx_opt.user_mss < newtp->advmss) 1347f5fff5dcSTom Quetchenbach newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; 1348f5fff5dcSTom Quetchenbach 13491da177e4SLinus Torvalds tcp_initialize_rcv_mss(newsk); 13501da177e4SLinus Torvalds 1351cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1352cfb6eeb4SYOSHIFUJI Hideaki /* Copy over the MD5 key from the original socket */ 1353a915da9bSEric Dumazet key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr, 1354a915da9bSEric Dumazet AF_INET); 1355c720c7e8SEric Dumazet if (key != NULL) { 1356cfb6eeb4SYOSHIFUJI Hideaki /* 1357cfb6eeb4SYOSHIFUJI Hideaki * We're using one, so create a matching key 1358cfb6eeb4SYOSHIFUJI Hideaki * on the newsk structure. If we fail to get 1359cfb6eeb4SYOSHIFUJI Hideaki * memory, then we end up not copying the key 1360cfb6eeb4SYOSHIFUJI Hideaki * across. Shucks. 1361cfb6eeb4SYOSHIFUJI Hideaki */ 1362a915da9bSEric Dumazet tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr, 1363a915da9bSEric Dumazet AF_INET, key->key, key->keylen, GFP_ATOMIC); 1364a465419bSEric Dumazet sk_nocaps_add(newsk, NETIF_F_GSO_MASK); 1365cfb6eeb4SYOSHIFUJI Hideaki } 1366cfb6eeb4SYOSHIFUJI Hideaki #endif 1367cfb6eeb4SYOSHIFUJI Hideaki 13680e734419SDavid S. Miller if (__inet_inherit_port(sk, newsk) < 0) 13690e734419SDavid S. Miller goto put_and_exit; 13709327f705SEric Dumazet __inet_hash_nolisten(newsk, NULL); 13711da177e4SLinus Torvalds 13721da177e4SLinus Torvalds return newsk; 13731da177e4SLinus Torvalds 13741da177e4SLinus Torvalds exit_overflow: 1375de0744afSPavel Emelyanov NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1376093d2823SBalazs Scheidler exit_nonewsk: 1377093d2823SBalazs Scheidler dst_release(dst); 13781da177e4SLinus Torvalds exit: 1379de0744afSPavel Emelyanov NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 13801da177e4SLinus Torvalds return NULL; 13810e734419SDavid S. Miller put_and_exit: 1382e337e24dSChristoph Paasch inet_csk_prepare_forced_close(newsk); 1383e337e24dSChristoph Paasch tcp_done(newsk); 13840e734419SDavid S. Miller goto exit; 13851da177e4SLinus Torvalds } 13864bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_syn_recv_sock); 13871da177e4SLinus Torvalds 13881da177e4SLinus Torvalds static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) 13891da177e4SLinus Torvalds { 1390aa8223c7SArnaldo Carvalho de Melo struct tcphdr *th = tcp_hdr(skb); 1391eddc9ec5SArnaldo Carvalho de Melo const struct iphdr *iph = ip_hdr(skb); 13921da177e4SLinus Torvalds struct sock *nsk; 139360236fddSArnaldo Carvalho de Melo struct request_sock **prev; 13941da177e4SLinus Torvalds /* Find possible connection requests. */ 1395463c84b9SArnaldo Carvalho de Melo struct request_sock *req = inet_csk_search_req(sk, &prev, th->source, 13961da177e4SLinus Torvalds iph->saddr, iph->daddr); 13971da177e4SLinus Torvalds if (req) 13988336886fSJerry Chu return tcp_check_req(sk, skb, req, prev, false); 13991da177e4SLinus Torvalds 14003b1e0a65SYOSHIFUJI Hideaki nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr, 1401c67499c0SPavel Emelyanov th->source, iph->daddr, th->dest, inet_iif(skb)); 14021da177e4SLinus Torvalds 14031da177e4SLinus Torvalds if (nsk) { 14041da177e4SLinus Torvalds if (nsk->sk_state != TCP_TIME_WAIT) { 14051da177e4SLinus Torvalds bh_lock_sock(nsk); 14061da177e4SLinus Torvalds return nsk; 14071da177e4SLinus Torvalds } 14089469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(nsk)); 14091da177e4SLinus Torvalds return NULL; 14101da177e4SLinus Torvalds } 14111da177e4SLinus Torvalds 14121da177e4SLinus Torvalds #ifdef CONFIG_SYN_COOKIES 1413af9b4738SFlorian Westphal if (!th->syn) 1414461b74c3SCong Wang sk = cookie_v4_check(sk, skb); 14151da177e4SLinus Torvalds #endif 14161da177e4SLinus Torvalds return sk; 14171da177e4SLinus Torvalds } 14181da177e4SLinus Torvalds 14191da177e4SLinus Torvalds /* The socket must have it's spinlock held when we get 14201da177e4SLinus Torvalds * here. 14211da177e4SLinus Torvalds * 14221da177e4SLinus Torvalds * We have a potential double-lock case here, so even when 14231da177e4SLinus Torvalds * doing backlog processing we use the BH locking scheme. 14241da177e4SLinus Torvalds * This is because we cannot sleep with the original spinlock 14251da177e4SLinus Torvalds * held. 14261da177e4SLinus Torvalds */ 14271da177e4SLinus Torvalds int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) 14281da177e4SLinus Torvalds { 1429cfb6eeb4SYOSHIFUJI Hideaki struct sock *rsk; 1430cfb6eeb4SYOSHIFUJI Hideaki 14311da177e4SLinus Torvalds if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 143292101b3bSDavid S. Miller struct dst_entry *dst = sk->sk_rx_dst; 1433404e0a8bSEric Dumazet 1434404e0a8bSEric Dumazet sock_rps_save_rxhash(sk, skb); 1435404e0a8bSEric Dumazet if (dst) { 1436505fbcf0SEric Dumazet if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || 1437505fbcf0SEric Dumazet dst->ops->check(dst, 0) == NULL) { 143892101b3bSDavid S. Miller dst_release(dst); 143992101b3bSDavid S. Miller sk->sk_rx_dst = NULL; 144092101b3bSDavid S. Miller } 144192101b3bSDavid S. Miller } 1442c995ae22SVijay Subramanian tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len); 14431da177e4SLinus Torvalds return 0; 14441da177e4SLinus Torvalds } 14451da177e4SLinus Torvalds 1446ab6a5bb6SArnaldo Carvalho de Melo if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb)) 14471da177e4SLinus Torvalds goto csum_err; 14481da177e4SLinus Torvalds 14491da177e4SLinus Torvalds if (sk->sk_state == TCP_LISTEN) { 14501da177e4SLinus Torvalds struct sock *nsk = tcp_v4_hnd_req(sk, skb); 14511da177e4SLinus Torvalds if (!nsk) 14521da177e4SLinus Torvalds goto discard; 14531da177e4SLinus Torvalds 14541da177e4SLinus Torvalds if (nsk != sk) { 1455bdeab991STom Herbert sock_rps_save_rxhash(nsk, skb); 1456cfb6eeb4SYOSHIFUJI Hideaki if (tcp_child_process(sk, nsk, skb)) { 1457cfb6eeb4SYOSHIFUJI Hideaki rsk = nsk; 14581da177e4SLinus Torvalds goto reset; 1459cfb6eeb4SYOSHIFUJI Hideaki } 14601da177e4SLinus Torvalds return 0; 14611da177e4SLinus Torvalds } 1462ca55158cSEric Dumazet } else 1463bdeab991STom Herbert sock_rps_save_rxhash(sk, skb); 1464ca55158cSEric Dumazet 1465aa8223c7SArnaldo Carvalho de Melo if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { 1466cfb6eeb4SYOSHIFUJI Hideaki rsk = sk; 14671da177e4SLinus Torvalds goto reset; 1468cfb6eeb4SYOSHIFUJI Hideaki } 14691da177e4SLinus Torvalds return 0; 14701da177e4SLinus Torvalds 14711da177e4SLinus Torvalds reset: 1472cfb6eeb4SYOSHIFUJI Hideaki tcp_v4_send_reset(rsk, skb); 14731da177e4SLinus Torvalds discard: 14741da177e4SLinus Torvalds kfree_skb(skb); 14751da177e4SLinus Torvalds /* Be careful here. If this function gets more complicated and 14761da177e4SLinus Torvalds * gcc suffers from register pressure on the x86, sk (in %ebx) 14771da177e4SLinus Torvalds * might be destroyed here. This current version compiles correctly, 14781da177e4SLinus Torvalds * but you have been warned. 14791da177e4SLinus Torvalds */ 14801da177e4SLinus Torvalds return 0; 14811da177e4SLinus Torvalds 14821da177e4SLinus Torvalds csum_err: 14836a5dc9e5SEric Dumazet TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS); 148463231bddSPavel Emelyanov TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); 14851da177e4SLinus Torvalds goto discard; 14861da177e4SLinus Torvalds } 14874bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_do_rcv); 14881da177e4SLinus Torvalds 1489160eb5a6SDavid S. Miller void tcp_v4_early_demux(struct sk_buff *skb) 149041063e9dSDavid S. Miller { 149141063e9dSDavid S. Miller const struct iphdr *iph; 149241063e9dSDavid S. Miller const struct tcphdr *th; 149341063e9dSDavid S. Miller struct sock *sk; 149441063e9dSDavid S. Miller 149541063e9dSDavid S. Miller if (skb->pkt_type != PACKET_HOST) 1496160eb5a6SDavid S. Miller return; 149741063e9dSDavid S. Miller 149845f00f99SEric Dumazet if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) 1499160eb5a6SDavid S. Miller return; 150041063e9dSDavid S. Miller 150141063e9dSDavid S. Miller iph = ip_hdr(skb); 150245f00f99SEric Dumazet th = tcp_hdr(skb); 150341063e9dSDavid S. Miller 150441063e9dSDavid S. Miller if (th->doff < sizeof(struct tcphdr) / 4) 1505160eb5a6SDavid S. Miller return; 150641063e9dSDavid S. Miller 150745f00f99SEric Dumazet sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo, 150841063e9dSDavid S. Miller iph->saddr, th->source, 15097011d085SVijay Subramanian iph->daddr, ntohs(th->dest), 15109cb429d6SEric Dumazet skb->skb_iif); 151141063e9dSDavid S. Miller if (sk) { 151241063e9dSDavid S. Miller skb->sk = sk; 151341063e9dSDavid S. Miller skb->destructor = sock_edemux; 151441063e9dSDavid S. Miller if (sk->sk_state != TCP_TIME_WAIT) { 151541063e9dSDavid S. Miller struct dst_entry *dst = sk->sk_rx_dst; 1516505fbcf0SEric Dumazet 151741063e9dSDavid S. Miller if (dst) 151841063e9dSDavid S. Miller dst = dst_check(dst, 0); 151992101b3bSDavid S. Miller if (dst && 1520505fbcf0SEric Dumazet inet_sk(sk)->rx_dst_ifindex == skb->skb_iif) 152141063e9dSDavid S. Miller skb_dst_set_noref(skb, dst); 152241063e9dSDavid S. Miller } 152341063e9dSDavid S. Miller } 152441063e9dSDavid S. Miller } 152541063e9dSDavid S. Miller 1526b2fb4f54SEric Dumazet /* Packet is added to VJ-style prequeue for processing in process 1527b2fb4f54SEric Dumazet * context, if a reader task is waiting. Apparently, this exciting 1528b2fb4f54SEric Dumazet * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93) 1529b2fb4f54SEric Dumazet * failed somewhere. Latency? Burstiness? Well, at least now we will 1530b2fb4f54SEric Dumazet * see, why it failed. 8)8) --ANK 1531b2fb4f54SEric Dumazet * 1532b2fb4f54SEric Dumazet */ 1533b2fb4f54SEric Dumazet bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) 1534b2fb4f54SEric Dumazet { 1535b2fb4f54SEric Dumazet struct tcp_sock *tp = tcp_sk(sk); 1536b2fb4f54SEric Dumazet 1537b2fb4f54SEric Dumazet if (sysctl_tcp_low_latency || !tp->ucopy.task) 1538b2fb4f54SEric Dumazet return false; 1539b2fb4f54SEric Dumazet 1540b2fb4f54SEric Dumazet if (skb->len <= tcp_hdrlen(skb) && 1541b2fb4f54SEric Dumazet skb_queue_len(&tp->ucopy.prequeue) == 0) 1542b2fb4f54SEric Dumazet return false; 1543b2fb4f54SEric Dumazet 1544ca777effSEric Dumazet /* Before escaping RCU protected region, we need to take care of skb 1545ca777effSEric Dumazet * dst. Prequeue is only enabled for established sockets. 1546ca777effSEric Dumazet * For such sockets, we might need the skb dst only to set sk->sk_rx_dst 1547ca777effSEric Dumazet * Instead of doing full sk_rx_dst validity here, let's perform 1548ca777effSEric Dumazet * an optimistic check. 1549ca777effSEric Dumazet */ 1550ca777effSEric Dumazet if (likely(sk->sk_rx_dst)) 1551ca777effSEric Dumazet skb_dst_drop(skb); 1552ca777effSEric Dumazet else 155358717686SDavid S. Miller skb_dst_force(skb); 1554ca777effSEric Dumazet 1555b2fb4f54SEric Dumazet __skb_queue_tail(&tp->ucopy.prequeue, skb); 1556b2fb4f54SEric Dumazet tp->ucopy.memory += skb->truesize; 1557b2fb4f54SEric Dumazet if (tp->ucopy.memory > sk->sk_rcvbuf) { 1558b2fb4f54SEric Dumazet struct sk_buff *skb1; 1559b2fb4f54SEric Dumazet 1560b2fb4f54SEric Dumazet BUG_ON(sock_owned_by_user(sk)); 1561b2fb4f54SEric Dumazet 1562b2fb4f54SEric Dumazet while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) { 1563b2fb4f54SEric Dumazet sk_backlog_rcv(sk, skb1); 1564b2fb4f54SEric Dumazet NET_INC_STATS_BH(sock_net(sk), 1565b2fb4f54SEric Dumazet LINUX_MIB_TCPPREQUEUEDROPPED); 1566b2fb4f54SEric Dumazet } 1567b2fb4f54SEric Dumazet 1568b2fb4f54SEric Dumazet tp->ucopy.memory = 0; 1569b2fb4f54SEric Dumazet } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { 1570b2fb4f54SEric Dumazet wake_up_interruptible_sync_poll(sk_sleep(sk), 1571b2fb4f54SEric Dumazet POLLIN | POLLRDNORM | POLLRDBAND); 1572b2fb4f54SEric Dumazet if (!inet_csk_ack_scheduled(sk)) 1573b2fb4f54SEric Dumazet inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 1574b2fb4f54SEric Dumazet (3 * tcp_rto_min(sk)) / 4, 1575b2fb4f54SEric Dumazet TCP_RTO_MAX); 1576b2fb4f54SEric Dumazet } 1577b2fb4f54SEric Dumazet return true; 1578b2fb4f54SEric Dumazet } 1579b2fb4f54SEric Dumazet EXPORT_SYMBOL(tcp_prequeue); 1580b2fb4f54SEric Dumazet 15811da177e4SLinus Torvalds /* 15821da177e4SLinus Torvalds * From tcp_input.c 15831da177e4SLinus Torvalds */ 15841da177e4SLinus Torvalds 15851da177e4SLinus Torvalds int tcp_v4_rcv(struct sk_buff *skb) 15861da177e4SLinus Torvalds { 1587eddc9ec5SArnaldo Carvalho de Melo const struct iphdr *iph; 1588cf533ea5SEric Dumazet const struct tcphdr *th; 15891da177e4SLinus Torvalds struct sock *sk; 15901da177e4SLinus Torvalds int ret; 1591a86b1e30SPavel Emelyanov struct net *net = dev_net(skb->dev); 15921da177e4SLinus Torvalds 15931da177e4SLinus Torvalds if (skb->pkt_type != PACKET_HOST) 15941da177e4SLinus Torvalds goto discard_it; 15951da177e4SLinus Torvalds 15961da177e4SLinus Torvalds /* Count it even if it's bad */ 159763231bddSPavel Emelyanov TCP_INC_STATS_BH(net, TCP_MIB_INSEGS); 15981da177e4SLinus Torvalds 15991da177e4SLinus Torvalds if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 16001da177e4SLinus Torvalds goto discard_it; 16011da177e4SLinus Torvalds 1602aa8223c7SArnaldo Carvalho de Melo th = tcp_hdr(skb); 16031da177e4SLinus Torvalds 16041da177e4SLinus Torvalds if (th->doff < sizeof(struct tcphdr) / 4) 16051da177e4SLinus Torvalds goto bad_packet; 16061da177e4SLinus Torvalds if (!pskb_may_pull(skb, th->doff * 4)) 16071da177e4SLinus Torvalds goto discard_it; 16081da177e4SLinus Torvalds 16091da177e4SLinus Torvalds /* An explanation is required here, I think. 16101da177e4SLinus Torvalds * Packet length and doff are validated by header prediction, 1611caa20d9aSStephen Hemminger * provided case of th->doff==0 is eliminated. 16121da177e4SLinus Torvalds * So, we defer the checks. */ 1613ed70fcfcSTom Herbert 1614ed70fcfcSTom Herbert if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo)) 16156a5dc9e5SEric Dumazet goto csum_error; 16161da177e4SLinus Torvalds 1617aa8223c7SArnaldo Carvalho de Melo th = tcp_hdr(skb); 1618eddc9ec5SArnaldo Carvalho de Melo iph = ip_hdr(skb); 1619971f10ecSEric Dumazet /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB() 1620971f10ecSEric Dumazet * barrier() makes sure compiler wont play fool^Waliasing games. 1621971f10ecSEric Dumazet */ 1622971f10ecSEric Dumazet memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb), 1623971f10ecSEric Dumazet sizeof(struct inet_skb_parm)); 1624971f10ecSEric Dumazet barrier(); 1625971f10ecSEric Dumazet 16261da177e4SLinus Torvalds TCP_SKB_CB(skb)->seq = ntohl(th->seq); 16271da177e4SLinus Torvalds TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 16281da177e4SLinus Torvalds skb->len - th->doff * 4); 16291da177e4SLinus Torvalds TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1630e11ecddfSEric Dumazet TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); 163104317dafSEric Dumazet TCP_SKB_CB(skb)->tcp_tw_isn = 0; 1632b82d1bb4SEric Dumazet TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); 16331da177e4SLinus Torvalds TCP_SKB_CB(skb)->sacked = 0; 16341da177e4SLinus Torvalds 16359a1f27c4SArnaldo Carvalho de Melo sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); 16361da177e4SLinus Torvalds if (!sk) 16371da177e4SLinus Torvalds goto no_tcp_socket; 16381da177e4SLinus Torvalds 1639bb134d5dSEric Dumazet process: 1640bb134d5dSEric Dumazet if (sk->sk_state == TCP_TIME_WAIT) 1641bb134d5dSEric Dumazet goto do_time_wait; 1642bb134d5dSEric Dumazet 16436cce09f8SEric Dumazet if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { 16446cce09f8SEric Dumazet NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); 1645d218d111SStephen Hemminger goto discard_and_relse; 16466cce09f8SEric Dumazet } 1647d218d111SStephen Hemminger 16481da177e4SLinus Torvalds if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) 16491da177e4SLinus Torvalds goto discard_and_relse; 16509ea88a15SDmitry Popov 16519ea88a15SDmitry Popov #ifdef CONFIG_TCP_MD5SIG 16529ea88a15SDmitry Popov /* 16539ea88a15SDmitry Popov * We really want to reject the packet as early as possible 16549ea88a15SDmitry Popov * if: 16559ea88a15SDmitry Popov * o We're expecting an MD5'd packet and this is no MD5 tcp option 16569ea88a15SDmitry Popov * o There is an MD5 option and we're not expecting one 16579ea88a15SDmitry Popov */ 16589ea88a15SDmitry Popov if (tcp_v4_inbound_md5_hash(sk, skb)) 16599ea88a15SDmitry Popov goto discard_and_relse; 16609ea88a15SDmitry Popov #endif 16619ea88a15SDmitry Popov 1662b59c2701SPatrick McHardy nf_reset(skb); 16631da177e4SLinus Torvalds 1664fda9ef5dSDmitry Mishin if (sk_filter(sk, skb)) 16651da177e4SLinus Torvalds goto discard_and_relse; 16661da177e4SLinus Torvalds 16678b80cda5SEliezer Tamir sk_mark_napi_id(sk, skb); 16681da177e4SLinus Torvalds skb->dev = NULL; 16691da177e4SLinus Torvalds 1670c6366184SIngo Molnar bh_lock_sock_nested(sk); 16711da177e4SLinus Torvalds ret = 0; 16721da177e4SLinus Torvalds if (!sock_owned_by_user(sk)) { 16731da177e4SLinus Torvalds if (!tcp_prequeue(sk, skb)) 16741da177e4SLinus Torvalds ret = tcp_v4_do_rcv(sk, skb); 1675da882c1fSEric Dumazet } else if (unlikely(sk_add_backlog(sk, skb, 1676da882c1fSEric Dumazet sk->sk_rcvbuf + sk->sk_sndbuf))) { 16776b03a53aSZhu Yi bh_unlock_sock(sk); 16786cce09f8SEric Dumazet NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); 16796b03a53aSZhu Yi goto discard_and_relse; 16806b03a53aSZhu Yi } 16811da177e4SLinus Torvalds bh_unlock_sock(sk); 16821da177e4SLinus Torvalds 16831da177e4SLinus Torvalds sock_put(sk); 16841da177e4SLinus Torvalds 16851da177e4SLinus Torvalds return ret; 16861da177e4SLinus Torvalds 16871da177e4SLinus Torvalds no_tcp_socket: 16881da177e4SLinus Torvalds if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 16891da177e4SLinus Torvalds goto discard_it; 16901da177e4SLinus Torvalds 16911da177e4SLinus Torvalds if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { 16926a5dc9e5SEric Dumazet csum_error: 16936a5dc9e5SEric Dumazet TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS); 16941da177e4SLinus Torvalds bad_packet: 169563231bddSPavel Emelyanov TCP_INC_STATS_BH(net, TCP_MIB_INERRS); 16961da177e4SLinus Torvalds } else { 1697cfb6eeb4SYOSHIFUJI Hideaki tcp_v4_send_reset(NULL, skb); 16981da177e4SLinus Torvalds } 16991da177e4SLinus Torvalds 17001da177e4SLinus Torvalds discard_it: 17011da177e4SLinus Torvalds /* Discard frame. */ 17021da177e4SLinus Torvalds kfree_skb(skb); 17031da177e4SLinus Torvalds return 0; 17041da177e4SLinus Torvalds 17051da177e4SLinus Torvalds discard_and_relse: 17061da177e4SLinus Torvalds sock_put(sk); 17071da177e4SLinus Torvalds goto discard_it; 17081da177e4SLinus Torvalds 17091da177e4SLinus Torvalds do_time_wait: 17101da177e4SLinus Torvalds if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 17119469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(sk)); 17121da177e4SLinus Torvalds goto discard_it; 17131da177e4SLinus Torvalds } 17141da177e4SLinus Torvalds 17156a5dc9e5SEric Dumazet if (skb->len < (th->doff << 2)) { 17169469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(sk)); 17176a5dc9e5SEric Dumazet goto bad_packet; 17186a5dc9e5SEric Dumazet } 17196a5dc9e5SEric Dumazet if (tcp_checksum_complete(skb)) { 17206a5dc9e5SEric Dumazet inet_twsk_put(inet_twsk(sk)); 17216a5dc9e5SEric Dumazet goto csum_error; 17221da177e4SLinus Torvalds } 17239469c7b4SYOSHIFUJI Hideaki switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 17241da177e4SLinus Torvalds case TCP_TW_SYN: { 1725c346dca1SYOSHIFUJI Hideaki struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev), 1726c67499c0SPavel Emelyanov &tcp_hashinfo, 1727da5e3630STom Herbert iph->saddr, th->source, 1728eddc9ec5SArnaldo Carvalho de Melo iph->daddr, th->dest, 1729463c84b9SArnaldo Carvalho de Melo inet_iif(skb)); 17301da177e4SLinus Torvalds if (sk2) { 17319469c7b4SYOSHIFUJI Hideaki inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row); 17329469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(sk)); 17331da177e4SLinus Torvalds sk = sk2; 17341da177e4SLinus Torvalds goto process; 17351da177e4SLinus Torvalds } 17361da177e4SLinus Torvalds /* Fall through to ACK */ 17371da177e4SLinus Torvalds } 17381da177e4SLinus Torvalds case TCP_TW_ACK: 17391da177e4SLinus Torvalds tcp_v4_timewait_ack(sk, skb); 17401da177e4SLinus Torvalds break; 17411da177e4SLinus Torvalds case TCP_TW_RST: 17421da177e4SLinus Torvalds goto no_tcp_socket; 17431da177e4SLinus Torvalds case TCP_TW_SUCCESS:; 17441da177e4SLinus Torvalds } 17451da177e4SLinus Torvalds goto discard_it; 17461da177e4SLinus Torvalds } 17471da177e4SLinus Torvalds 1748ccb7c410SDavid S. Miller static struct timewait_sock_ops tcp_timewait_sock_ops = { 1749ccb7c410SDavid S. Miller .twsk_obj_size = sizeof(struct tcp_timewait_sock), 1750ccb7c410SDavid S. Miller .twsk_unique = tcp_twsk_unique, 1751ccb7c410SDavid S. Miller .twsk_destructor= tcp_twsk_destructor, 1752ccb7c410SDavid S. Miller }; 17531da177e4SLinus Torvalds 175463d02d15SEric Dumazet void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) 17555d299f3dSEric Dumazet { 17565d299f3dSEric Dumazet struct dst_entry *dst = skb_dst(skb); 17575d299f3dSEric Dumazet 1758ca777effSEric Dumazet if (dst) { 17595d299f3dSEric Dumazet dst_hold(dst); 17605d299f3dSEric Dumazet sk->sk_rx_dst = dst; 17615d299f3dSEric Dumazet inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; 17625d299f3dSEric Dumazet } 1763ca777effSEric Dumazet } 176463d02d15SEric Dumazet EXPORT_SYMBOL(inet_sk_rx_dst_set); 17655d299f3dSEric Dumazet 17663b401a81SStephen Hemminger const struct inet_connection_sock_af_ops ipv4_specific = { 17671da177e4SLinus Torvalds .queue_xmit = ip_queue_xmit, 17681da177e4SLinus Torvalds .send_check = tcp_v4_send_check, 176932519f11SArnaldo Carvalho de Melo .rebuild_header = inet_sk_rebuild_header, 17705d299f3dSEric Dumazet .sk_rx_dst_set = inet_sk_rx_dst_set, 17711da177e4SLinus Torvalds .conn_request = tcp_v4_conn_request, 17721da177e4SLinus Torvalds .syn_recv_sock = tcp_v4_syn_recv_sock, 17731da177e4SLinus Torvalds .net_header_len = sizeof(struct iphdr), 17741da177e4SLinus Torvalds .setsockopt = ip_setsockopt, 17751da177e4SLinus Torvalds .getsockopt = ip_getsockopt, 1776543d9cfeSArnaldo Carvalho de Melo .addr2sockaddr = inet_csk_addr2sockaddr, 1777543d9cfeSArnaldo Carvalho de Melo .sockaddr_len = sizeof(struct sockaddr_in), 1778ab1e0a13SArnaldo Carvalho de Melo .bind_conflict = inet_csk_bind_conflict, 17793fdadf7dSDmitry Mishin #ifdef CONFIG_COMPAT 17803fdadf7dSDmitry Mishin .compat_setsockopt = compat_ip_setsockopt, 17813fdadf7dSDmitry Mishin .compat_getsockopt = compat_ip_getsockopt, 17823fdadf7dSDmitry Mishin #endif 17834fab9071SNeal Cardwell .mtu_reduced = tcp_v4_mtu_reduced, 17841da177e4SLinus Torvalds }; 17854bc2f18bSEric Dumazet EXPORT_SYMBOL(ipv4_specific); 17861da177e4SLinus Torvalds 1787cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1788b2e4b3deSStephen Hemminger static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { 1789cfb6eeb4SYOSHIFUJI Hideaki .md5_lookup = tcp_v4_md5_lookup, 179049a72dfbSAdam Langley .calc_md5_hash = tcp_v4_md5_hash_skb, 1791cfb6eeb4SYOSHIFUJI Hideaki .md5_parse = tcp_v4_parse_md5_keys, 1792cfb6eeb4SYOSHIFUJI Hideaki }; 1793b6332e6cSAndrew Morton #endif 1794cfb6eeb4SYOSHIFUJI Hideaki 17951da177e4SLinus Torvalds /* NOTE: A lot of things set to zero explicitly by call to 17961da177e4SLinus Torvalds * sk_alloc() so need not be done here. 17971da177e4SLinus Torvalds */ 17981da177e4SLinus Torvalds static int tcp_v4_init_sock(struct sock *sk) 17991da177e4SLinus Torvalds { 18006687e988SArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 18011da177e4SLinus Torvalds 1802900f65d3SNeal Cardwell tcp_init_sock(sk); 18031da177e4SLinus Torvalds 18048292a17aSArnaldo Carvalho de Melo icsk->icsk_af_ops = &ipv4_specific; 1805900f65d3SNeal Cardwell 1806cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1807ac807fa8SDavid S. Miller tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific; 1808cfb6eeb4SYOSHIFUJI Hideaki #endif 18091da177e4SLinus Torvalds 18101da177e4SLinus Torvalds return 0; 18111da177e4SLinus Torvalds } 18121da177e4SLinus Torvalds 18137d06b2e0SBrian Haley void tcp_v4_destroy_sock(struct sock *sk) 18141da177e4SLinus Torvalds { 18151da177e4SLinus Torvalds struct tcp_sock *tp = tcp_sk(sk); 18161da177e4SLinus Torvalds 18171da177e4SLinus Torvalds tcp_clear_xmit_timers(sk); 18181da177e4SLinus Torvalds 18196687e988SArnaldo Carvalho de Melo tcp_cleanup_congestion_control(sk); 1820317a76f9SStephen Hemminger 18211da177e4SLinus Torvalds /* Cleanup up the write buffer. */ 1822fe067e8aSDavid S. Miller tcp_write_queue_purge(sk); 18231da177e4SLinus Torvalds 18241da177e4SLinus Torvalds /* Cleans up our, hopefully empty, out_of_order_queue. */ 18251da177e4SLinus Torvalds __skb_queue_purge(&tp->out_of_order_queue); 18261da177e4SLinus Torvalds 1827cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1828cfb6eeb4SYOSHIFUJI Hideaki /* Clean up the MD5 key list, if any */ 1829cfb6eeb4SYOSHIFUJI Hideaki if (tp->md5sig_info) { 1830a915da9bSEric Dumazet tcp_clear_md5_list(sk); 1831a8afca03SEric Dumazet kfree_rcu(tp->md5sig_info, rcu); 1832cfb6eeb4SYOSHIFUJI Hideaki tp->md5sig_info = NULL; 1833cfb6eeb4SYOSHIFUJI Hideaki } 1834cfb6eeb4SYOSHIFUJI Hideaki #endif 1835cfb6eeb4SYOSHIFUJI Hideaki 18361da177e4SLinus Torvalds /* Clean prequeue, it must be empty really */ 18371da177e4SLinus Torvalds __skb_queue_purge(&tp->ucopy.prequeue); 18381da177e4SLinus Torvalds 18391da177e4SLinus Torvalds /* Clean up a referenced TCP bind bucket. */ 1840463c84b9SArnaldo Carvalho de Melo if (inet_csk(sk)->icsk_bind_hash) 1841ab1e0a13SArnaldo Carvalho de Melo inet_put_port(sk); 18421da177e4SLinus Torvalds 1843168a8f58SJerry Chu BUG_ON(tp->fastopen_rsk != NULL); 1844435cf559SWilliam Allen Simpson 1845cf60af03SYuchung Cheng /* If socket is aborted during connect operation */ 1846cf60af03SYuchung Cheng tcp_free_fastopen_req(tp); 1847cf60af03SYuchung Cheng 1848180d8cd9SGlauber Costa sk_sockets_allocated_dec(sk); 1849d1a4c0b3SGlauber Costa sock_release_memcg(sk); 18501da177e4SLinus Torvalds } 18511da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_destroy_sock); 18521da177e4SLinus Torvalds 18531da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS 18541da177e4SLinus Torvalds /* Proc filesystem TCP sock list dumping. */ 18551da177e4SLinus Torvalds 1856a8b690f9STom Herbert /* 1857a8b690f9STom Herbert * Get next listener socket follow cur. If cur is NULL, get first socket 1858a8b690f9STom Herbert * starting from bucket given in st->bucket; when st->bucket is zero the 1859a8b690f9STom Herbert * very first socket in the hash table is returned. 1860a8b690f9STom Herbert */ 18611da177e4SLinus Torvalds static void *listening_get_next(struct seq_file *seq, void *cur) 18621da177e4SLinus Torvalds { 1863463c84b9SArnaldo Carvalho de Melo struct inet_connection_sock *icsk; 1864c25eb3bfSEric Dumazet struct hlist_nulls_node *node; 18651da177e4SLinus Torvalds struct sock *sk = cur; 18665caea4eaSEric Dumazet struct inet_listen_hashbucket *ilb; 18671da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 1868a4146b1bSDenis V. Lunev struct net *net = seq_file_net(seq); 18691da177e4SLinus Torvalds 18701da177e4SLinus Torvalds if (!sk) { 1871a8b690f9STom Herbert ilb = &tcp_hashinfo.listening_hash[st->bucket]; 18725caea4eaSEric Dumazet spin_lock_bh(&ilb->lock); 1873c25eb3bfSEric Dumazet sk = sk_nulls_head(&ilb->head); 1874a8b690f9STom Herbert st->offset = 0; 18751da177e4SLinus Torvalds goto get_sk; 18761da177e4SLinus Torvalds } 18775caea4eaSEric Dumazet ilb = &tcp_hashinfo.listening_hash[st->bucket]; 18781da177e4SLinus Torvalds ++st->num; 1879a8b690f9STom Herbert ++st->offset; 18801da177e4SLinus Torvalds 18811da177e4SLinus Torvalds if (st->state == TCP_SEQ_STATE_OPENREQ) { 188260236fddSArnaldo Carvalho de Melo struct request_sock *req = cur; 18831da177e4SLinus Torvalds 1884463c84b9SArnaldo Carvalho de Melo icsk = inet_csk(st->syn_wait_sk); 18851da177e4SLinus Torvalds req = req->dl_next; 18861da177e4SLinus Torvalds while (1) { 18871da177e4SLinus Torvalds while (req) { 1888bdccc4caSDaniel Lezcano if (req->rsk_ops->family == st->family) { 18891da177e4SLinus Torvalds cur = req; 18901da177e4SLinus Torvalds goto out; 18911da177e4SLinus Torvalds } 18921da177e4SLinus Torvalds req = req->dl_next; 18931da177e4SLinus Torvalds } 189472a3effaSEric Dumazet if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) 18951da177e4SLinus Torvalds break; 18961da177e4SLinus Torvalds get_req: 1897463c84b9SArnaldo Carvalho de Melo req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket]; 18981da177e4SLinus Torvalds } 18991bde5ac4SEric Dumazet sk = sk_nulls_next(st->syn_wait_sk); 19001da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_LISTENING; 1901463c84b9SArnaldo Carvalho de Melo read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 19021da177e4SLinus Torvalds } else { 1903463c84b9SArnaldo Carvalho de Melo icsk = inet_csk(sk); 1904463c84b9SArnaldo Carvalho de Melo read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 1905463c84b9SArnaldo Carvalho de Melo if (reqsk_queue_len(&icsk->icsk_accept_queue)) 19061da177e4SLinus Torvalds goto start_req; 1907463c84b9SArnaldo Carvalho de Melo read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 19081bde5ac4SEric Dumazet sk = sk_nulls_next(sk); 19091da177e4SLinus Torvalds } 19101da177e4SLinus Torvalds get_sk: 1911c25eb3bfSEric Dumazet sk_nulls_for_each_from(sk, node) { 19128475ef9fSPavel Emelyanov if (!net_eq(sock_net(sk), net)) 19138475ef9fSPavel Emelyanov continue; 19148475ef9fSPavel Emelyanov if (sk->sk_family == st->family) { 19151da177e4SLinus Torvalds cur = sk; 19161da177e4SLinus Torvalds goto out; 19171da177e4SLinus Torvalds } 1918463c84b9SArnaldo Carvalho de Melo icsk = inet_csk(sk); 1919463c84b9SArnaldo Carvalho de Melo read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 1920463c84b9SArnaldo Carvalho de Melo if (reqsk_queue_len(&icsk->icsk_accept_queue)) { 19211da177e4SLinus Torvalds start_req: 19221da177e4SLinus Torvalds st->uid = sock_i_uid(sk); 19231da177e4SLinus Torvalds st->syn_wait_sk = sk; 19241da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_OPENREQ; 19251da177e4SLinus Torvalds st->sbucket = 0; 19261da177e4SLinus Torvalds goto get_req; 19271da177e4SLinus Torvalds } 1928463c84b9SArnaldo Carvalho de Melo read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 19291da177e4SLinus Torvalds } 19305caea4eaSEric Dumazet spin_unlock_bh(&ilb->lock); 1931a8b690f9STom Herbert st->offset = 0; 19320f7ff927SArnaldo Carvalho de Melo if (++st->bucket < INET_LHTABLE_SIZE) { 19335caea4eaSEric Dumazet ilb = &tcp_hashinfo.listening_hash[st->bucket]; 19345caea4eaSEric Dumazet spin_lock_bh(&ilb->lock); 1935c25eb3bfSEric Dumazet sk = sk_nulls_head(&ilb->head); 19361da177e4SLinus Torvalds goto get_sk; 19371da177e4SLinus Torvalds } 19381da177e4SLinus Torvalds cur = NULL; 19391da177e4SLinus Torvalds out: 19401da177e4SLinus Torvalds return cur; 19411da177e4SLinus Torvalds } 19421da177e4SLinus Torvalds 19431da177e4SLinus Torvalds static void *listening_get_idx(struct seq_file *seq, loff_t *pos) 19441da177e4SLinus Torvalds { 1945a8b690f9STom Herbert struct tcp_iter_state *st = seq->private; 1946a8b690f9STom Herbert void *rc; 1947a8b690f9STom Herbert 1948a8b690f9STom Herbert st->bucket = 0; 1949a8b690f9STom Herbert st->offset = 0; 1950a8b690f9STom Herbert rc = listening_get_next(seq, NULL); 19511da177e4SLinus Torvalds 19521da177e4SLinus Torvalds while (rc && *pos) { 19531da177e4SLinus Torvalds rc = listening_get_next(seq, rc); 19541da177e4SLinus Torvalds --*pos; 19551da177e4SLinus Torvalds } 19561da177e4SLinus Torvalds return rc; 19571da177e4SLinus Torvalds } 19581da177e4SLinus Torvalds 195905dbc7b5SEric Dumazet static inline bool empty_bucket(const struct tcp_iter_state *st) 19606eac5604SAndi Kleen { 196105dbc7b5SEric Dumazet return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain); 19626eac5604SAndi Kleen } 19636eac5604SAndi Kleen 1964a8b690f9STom Herbert /* 1965a8b690f9STom Herbert * Get first established socket starting from bucket given in st->bucket. 1966a8b690f9STom Herbert * If st->bucket is zero, the very first socket in the hash is returned. 1967a8b690f9STom Herbert */ 19681da177e4SLinus Torvalds static void *established_get_first(struct seq_file *seq) 19691da177e4SLinus Torvalds { 19701da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 1971a4146b1bSDenis V. Lunev struct net *net = seq_file_net(seq); 19721da177e4SLinus Torvalds void *rc = NULL; 19731da177e4SLinus Torvalds 1974a8b690f9STom Herbert st->offset = 0; 1975a8b690f9STom Herbert for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { 19761da177e4SLinus Torvalds struct sock *sk; 19773ab5aee7SEric Dumazet struct hlist_nulls_node *node; 19789db66bdcSEric Dumazet spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); 19791da177e4SLinus Torvalds 19806eac5604SAndi Kleen /* Lockless fast path for the common case of empty buckets */ 19816eac5604SAndi Kleen if (empty_bucket(st)) 19826eac5604SAndi Kleen continue; 19836eac5604SAndi Kleen 19849db66bdcSEric Dumazet spin_lock_bh(lock); 19853ab5aee7SEric Dumazet sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { 1986f40c8174SDaniel Lezcano if (sk->sk_family != st->family || 1987878628fbSYOSHIFUJI Hideaki !net_eq(sock_net(sk), net)) { 19881da177e4SLinus Torvalds continue; 19891da177e4SLinus Torvalds } 19901da177e4SLinus Torvalds rc = sk; 19911da177e4SLinus Torvalds goto out; 19921da177e4SLinus Torvalds } 19939db66bdcSEric Dumazet spin_unlock_bh(lock); 19941da177e4SLinus Torvalds } 19951da177e4SLinus Torvalds out: 19961da177e4SLinus Torvalds return rc; 19971da177e4SLinus Torvalds } 19981da177e4SLinus Torvalds 19991da177e4SLinus Torvalds static void *established_get_next(struct seq_file *seq, void *cur) 20001da177e4SLinus Torvalds { 20011da177e4SLinus Torvalds struct sock *sk = cur; 20023ab5aee7SEric Dumazet struct hlist_nulls_node *node; 20031da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 2004a4146b1bSDenis V. Lunev struct net *net = seq_file_net(seq); 20051da177e4SLinus Torvalds 20061da177e4SLinus Torvalds ++st->num; 2007a8b690f9STom Herbert ++st->offset; 20081da177e4SLinus Torvalds 20093ab5aee7SEric Dumazet sk = sk_nulls_next(sk); 20101da177e4SLinus Torvalds 20113ab5aee7SEric Dumazet sk_nulls_for_each_from(sk, node) { 2012878628fbSYOSHIFUJI Hideaki if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) 201305dbc7b5SEric Dumazet return sk; 20141da177e4SLinus Torvalds } 20151da177e4SLinus Torvalds 201605dbc7b5SEric Dumazet spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); 201705dbc7b5SEric Dumazet ++st->bucket; 201805dbc7b5SEric Dumazet return established_get_first(seq); 20191da177e4SLinus Torvalds } 20201da177e4SLinus Torvalds 20211da177e4SLinus Torvalds static void *established_get_idx(struct seq_file *seq, loff_t pos) 20221da177e4SLinus Torvalds { 2023a8b690f9STom Herbert struct tcp_iter_state *st = seq->private; 2024a8b690f9STom Herbert void *rc; 2025a8b690f9STom Herbert 2026a8b690f9STom Herbert st->bucket = 0; 2027a8b690f9STom Herbert rc = established_get_first(seq); 20281da177e4SLinus Torvalds 20291da177e4SLinus Torvalds while (rc && pos) { 20301da177e4SLinus Torvalds rc = established_get_next(seq, rc); 20311da177e4SLinus Torvalds --pos; 20321da177e4SLinus Torvalds } 20331da177e4SLinus Torvalds return rc; 20341da177e4SLinus Torvalds } 20351da177e4SLinus Torvalds 20361da177e4SLinus Torvalds static void *tcp_get_idx(struct seq_file *seq, loff_t pos) 20371da177e4SLinus Torvalds { 20381da177e4SLinus Torvalds void *rc; 20391da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 20401da177e4SLinus Torvalds 20411da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_LISTENING; 20421da177e4SLinus Torvalds rc = listening_get_idx(seq, &pos); 20431da177e4SLinus Torvalds 20441da177e4SLinus Torvalds if (!rc) { 20451da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_ESTABLISHED; 20461da177e4SLinus Torvalds rc = established_get_idx(seq, pos); 20471da177e4SLinus Torvalds } 20481da177e4SLinus Torvalds 20491da177e4SLinus Torvalds return rc; 20501da177e4SLinus Torvalds } 20511da177e4SLinus Torvalds 2052a8b690f9STom Herbert static void *tcp_seek_last_pos(struct seq_file *seq) 2053a8b690f9STom Herbert { 2054a8b690f9STom Herbert struct tcp_iter_state *st = seq->private; 2055a8b690f9STom Herbert int offset = st->offset; 2056a8b690f9STom Herbert int orig_num = st->num; 2057a8b690f9STom Herbert void *rc = NULL; 2058a8b690f9STom Herbert 2059a8b690f9STom Herbert switch (st->state) { 2060a8b690f9STom Herbert case TCP_SEQ_STATE_OPENREQ: 2061a8b690f9STom Herbert case TCP_SEQ_STATE_LISTENING: 2062a8b690f9STom Herbert if (st->bucket >= INET_LHTABLE_SIZE) 2063a8b690f9STom Herbert break; 2064a8b690f9STom Herbert st->state = TCP_SEQ_STATE_LISTENING; 2065a8b690f9STom Herbert rc = listening_get_next(seq, NULL); 2066a8b690f9STom Herbert while (offset-- && rc) 2067a8b690f9STom Herbert rc = listening_get_next(seq, rc); 2068a8b690f9STom Herbert if (rc) 2069a8b690f9STom Herbert break; 2070a8b690f9STom Herbert st->bucket = 0; 207105dbc7b5SEric Dumazet st->state = TCP_SEQ_STATE_ESTABLISHED; 2072a8b690f9STom Herbert /* Fallthrough */ 2073a8b690f9STom Herbert case TCP_SEQ_STATE_ESTABLISHED: 2074a8b690f9STom Herbert if (st->bucket > tcp_hashinfo.ehash_mask) 2075a8b690f9STom Herbert break; 2076a8b690f9STom Herbert rc = established_get_first(seq); 2077a8b690f9STom Herbert while (offset-- && rc) 2078a8b690f9STom Herbert rc = established_get_next(seq, rc); 2079a8b690f9STom Herbert } 2080a8b690f9STom Herbert 2081a8b690f9STom Herbert st->num = orig_num; 2082a8b690f9STom Herbert 2083a8b690f9STom Herbert return rc; 2084a8b690f9STom Herbert } 2085a8b690f9STom Herbert 20861da177e4SLinus Torvalds static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) 20871da177e4SLinus Torvalds { 20881da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 2089a8b690f9STom Herbert void *rc; 2090a8b690f9STom Herbert 2091a8b690f9STom Herbert if (*pos && *pos == st->last_pos) { 2092a8b690f9STom Herbert rc = tcp_seek_last_pos(seq); 2093a8b690f9STom Herbert if (rc) 2094a8b690f9STom Herbert goto out; 2095a8b690f9STom Herbert } 2096a8b690f9STom Herbert 20971da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_LISTENING; 20981da177e4SLinus Torvalds st->num = 0; 2099a8b690f9STom Herbert st->bucket = 0; 2100a8b690f9STom Herbert st->offset = 0; 2101a8b690f9STom Herbert rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 2102a8b690f9STom Herbert 2103a8b690f9STom Herbert out: 2104a8b690f9STom Herbert st->last_pos = *pos; 2105a8b690f9STom Herbert return rc; 21061da177e4SLinus Torvalds } 21071da177e4SLinus Torvalds 21081da177e4SLinus Torvalds static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) 21091da177e4SLinus Torvalds { 2110a8b690f9STom Herbert struct tcp_iter_state *st = seq->private; 21111da177e4SLinus Torvalds void *rc = NULL; 21121da177e4SLinus Torvalds 21131da177e4SLinus Torvalds if (v == SEQ_START_TOKEN) { 21141da177e4SLinus Torvalds rc = tcp_get_idx(seq, 0); 21151da177e4SLinus Torvalds goto out; 21161da177e4SLinus Torvalds } 21171da177e4SLinus Torvalds 21181da177e4SLinus Torvalds switch (st->state) { 21191da177e4SLinus Torvalds case TCP_SEQ_STATE_OPENREQ: 21201da177e4SLinus Torvalds case TCP_SEQ_STATE_LISTENING: 21211da177e4SLinus Torvalds rc = listening_get_next(seq, v); 21221da177e4SLinus Torvalds if (!rc) { 21231da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_ESTABLISHED; 2124a8b690f9STom Herbert st->bucket = 0; 2125a8b690f9STom Herbert st->offset = 0; 21261da177e4SLinus Torvalds rc = established_get_first(seq); 21271da177e4SLinus Torvalds } 21281da177e4SLinus Torvalds break; 21291da177e4SLinus Torvalds case TCP_SEQ_STATE_ESTABLISHED: 21301da177e4SLinus Torvalds rc = established_get_next(seq, v); 21311da177e4SLinus Torvalds break; 21321da177e4SLinus Torvalds } 21331da177e4SLinus Torvalds out: 21341da177e4SLinus Torvalds ++*pos; 2135a8b690f9STom Herbert st->last_pos = *pos; 21361da177e4SLinus Torvalds return rc; 21371da177e4SLinus Torvalds } 21381da177e4SLinus Torvalds 21391da177e4SLinus Torvalds static void tcp_seq_stop(struct seq_file *seq, void *v) 21401da177e4SLinus Torvalds { 21411da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 21421da177e4SLinus Torvalds 21431da177e4SLinus Torvalds switch (st->state) { 21441da177e4SLinus Torvalds case TCP_SEQ_STATE_OPENREQ: 21451da177e4SLinus Torvalds if (v) { 2146463c84b9SArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk); 2147463c84b9SArnaldo Carvalho de Melo read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 21481da177e4SLinus Torvalds } 21491da177e4SLinus Torvalds case TCP_SEQ_STATE_LISTENING: 21501da177e4SLinus Torvalds if (v != SEQ_START_TOKEN) 21515caea4eaSEric Dumazet spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock); 21521da177e4SLinus Torvalds break; 21531da177e4SLinus Torvalds case TCP_SEQ_STATE_ESTABLISHED: 21541da177e4SLinus Torvalds if (v) 21559db66bdcSEric Dumazet spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); 21561da177e4SLinus Torvalds break; 21571da177e4SLinus Torvalds } 21581da177e4SLinus Torvalds } 21591da177e4SLinus Torvalds 216073cb88ecSArjan van de Ven int tcp_seq_open(struct inode *inode, struct file *file) 21611da177e4SLinus Torvalds { 2162d9dda78bSAl Viro struct tcp_seq_afinfo *afinfo = PDE_DATA(inode); 21631da177e4SLinus Torvalds struct tcp_iter_state *s; 216452d6f3f1SDenis V. Lunev int err; 21651da177e4SLinus Torvalds 216652d6f3f1SDenis V. Lunev err = seq_open_net(inode, file, &afinfo->seq_ops, 216752d6f3f1SDenis V. Lunev sizeof(struct tcp_iter_state)); 216852d6f3f1SDenis V. Lunev if (err < 0) 216952d6f3f1SDenis V. Lunev return err; 2170f40c8174SDaniel Lezcano 217152d6f3f1SDenis V. Lunev s = ((struct seq_file *)file->private_data)->private; 21721da177e4SLinus Torvalds s->family = afinfo->family; 2173a8b690f9STom Herbert s->last_pos = 0; 2174f40c8174SDaniel Lezcano return 0; 2175f40c8174SDaniel Lezcano } 217673cb88ecSArjan van de Ven EXPORT_SYMBOL(tcp_seq_open); 2177f40c8174SDaniel Lezcano 21786f8b13bcSDaniel Lezcano int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo) 21791da177e4SLinus Torvalds { 21801da177e4SLinus Torvalds int rc = 0; 21811da177e4SLinus Torvalds struct proc_dir_entry *p; 21821da177e4SLinus Torvalds 21839427c4b3SDenis V. Lunev afinfo->seq_ops.start = tcp_seq_start; 21849427c4b3SDenis V. Lunev afinfo->seq_ops.next = tcp_seq_next; 21859427c4b3SDenis V. Lunev afinfo->seq_ops.stop = tcp_seq_stop; 21869427c4b3SDenis V. Lunev 218784841c3cSDenis V. Lunev p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net, 218873cb88ecSArjan van de Ven afinfo->seq_fops, afinfo); 218984841c3cSDenis V. Lunev if (!p) 21901da177e4SLinus Torvalds rc = -ENOMEM; 21911da177e4SLinus Torvalds return rc; 21921da177e4SLinus Torvalds } 21934bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_proc_register); 21941da177e4SLinus Torvalds 21956f8b13bcSDaniel Lezcano void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) 21961da177e4SLinus Torvalds { 2197ece31ffdSGao feng remove_proc_entry(afinfo->name, net->proc_net); 21981da177e4SLinus Torvalds } 21994bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_proc_unregister); 22001da177e4SLinus Torvalds 2201cf533ea5SEric Dumazet static void get_openreq4(const struct sock *sk, const struct request_sock *req, 2202652586dfSTetsuo Handa struct seq_file *f, int i, kuid_t uid) 22031da177e4SLinus Torvalds { 22042e6599cbSArnaldo Carvalho de Melo const struct inet_request_sock *ireq = inet_rsk(req); 2205a399a805SEric Dumazet long delta = req->expires - jiffies; 22061da177e4SLinus Torvalds 22075e659e4cSPavel Emelyanov seq_printf(f, "%4d: %08X:%04X %08X:%04X" 2208652586dfSTetsuo Handa " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK", 22091da177e4SLinus Torvalds i, 2210634fb979SEric Dumazet ireq->ir_loc_addr, 2211c720c7e8SEric Dumazet ntohs(inet_sk(sk)->inet_sport), 2212634fb979SEric Dumazet ireq->ir_rmt_addr, 2213634fb979SEric Dumazet ntohs(ireq->ir_rmt_port), 22141da177e4SLinus Torvalds TCP_SYN_RECV, 22151da177e4SLinus Torvalds 0, 0, /* could print option size, but that is af dependent. */ 22161da177e4SLinus Torvalds 1, /* timers active (only the expire timer) */ 2217a399a805SEric Dumazet jiffies_delta_to_clock_t(delta), 2218e6c022a4SEric Dumazet req->num_timeout, 2219a7cb5a49SEric W. Biederman from_kuid_munged(seq_user_ns(f), uid), 22201da177e4SLinus Torvalds 0, /* non standard timer */ 22211da177e4SLinus Torvalds 0, /* open_requests have no inode */ 22221da177e4SLinus Torvalds atomic_read(&sk->sk_refcnt), 2223652586dfSTetsuo Handa req); 22241da177e4SLinus Torvalds } 22251da177e4SLinus Torvalds 2226652586dfSTetsuo Handa static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) 22271da177e4SLinus Torvalds { 22281da177e4SLinus Torvalds int timer_active; 22291da177e4SLinus Torvalds unsigned long timer_expires; 2230cf533ea5SEric Dumazet const struct tcp_sock *tp = tcp_sk(sk); 2231cf4c6bf8SIlpo Järvinen const struct inet_connection_sock *icsk = inet_csk(sk); 2232cf533ea5SEric Dumazet const struct inet_sock *inet = inet_sk(sk); 2233168a8f58SJerry Chu struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq; 2234c720c7e8SEric Dumazet __be32 dest = inet->inet_daddr; 2235c720c7e8SEric Dumazet __be32 src = inet->inet_rcv_saddr; 2236c720c7e8SEric Dumazet __u16 destp = ntohs(inet->inet_dport); 2237c720c7e8SEric Dumazet __u16 srcp = ntohs(inet->inet_sport); 223849d09007SEric Dumazet int rx_queue; 22391da177e4SLinus Torvalds 22406ba8a3b1SNandita Dukkipati if (icsk->icsk_pending == ICSK_TIME_RETRANS || 22416ba8a3b1SNandita Dukkipati icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || 22426ba8a3b1SNandita Dukkipati icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { 22431da177e4SLinus Torvalds timer_active = 1; 2244463c84b9SArnaldo Carvalho de Melo timer_expires = icsk->icsk_timeout; 2245463c84b9SArnaldo Carvalho de Melo } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 22461da177e4SLinus Torvalds timer_active = 4; 2247463c84b9SArnaldo Carvalho de Melo timer_expires = icsk->icsk_timeout; 2248cf4c6bf8SIlpo Järvinen } else if (timer_pending(&sk->sk_timer)) { 22491da177e4SLinus Torvalds timer_active = 2; 2250cf4c6bf8SIlpo Järvinen timer_expires = sk->sk_timer.expires; 22511da177e4SLinus Torvalds } else { 22521da177e4SLinus Torvalds timer_active = 0; 22531da177e4SLinus Torvalds timer_expires = jiffies; 22541da177e4SLinus Torvalds } 22551da177e4SLinus Torvalds 225649d09007SEric Dumazet if (sk->sk_state == TCP_LISTEN) 225749d09007SEric Dumazet rx_queue = sk->sk_ack_backlog; 225849d09007SEric Dumazet else 225949d09007SEric Dumazet /* 226049d09007SEric Dumazet * because we dont lock socket, we might find a transient negative value 226149d09007SEric Dumazet */ 226249d09007SEric Dumazet rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); 226349d09007SEric Dumazet 22645e659e4cSPavel Emelyanov seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " 2265652586dfSTetsuo Handa "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d", 2266cf4c6bf8SIlpo Järvinen i, src, srcp, dest, destp, sk->sk_state, 226747da8ee6SSridhar Samudrala tp->write_seq - tp->snd_una, 226849d09007SEric Dumazet rx_queue, 22691da177e4SLinus Torvalds timer_active, 2270a399a805SEric Dumazet jiffies_delta_to_clock_t(timer_expires - jiffies), 2271463c84b9SArnaldo Carvalho de Melo icsk->icsk_retransmits, 2272a7cb5a49SEric W. Biederman from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)), 22736687e988SArnaldo Carvalho de Melo icsk->icsk_probes_out, 2274cf4c6bf8SIlpo Järvinen sock_i_ino(sk), 2275cf4c6bf8SIlpo Järvinen atomic_read(&sk->sk_refcnt), sk, 22767be87351SStephen Hemminger jiffies_to_clock_t(icsk->icsk_rto), 22777be87351SStephen Hemminger jiffies_to_clock_t(icsk->icsk_ack.ato), 2278463c84b9SArnaldo Carvalho de Melo (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, 22791da177e4SLinus Torvalds tp->snd_cwnd, 2280168a8f58SJerry Chu sk->sk_state == TCP_LISTEN ? 2281168a8f58SJerry Chu (fastopenq ? fastopenq->max_qlen : 0) : 2282652586dfSTetsuo Handa (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)); 22831da177e4SLinus Torvalds } 22841da177e4SLinus Torvalds 2285cf533ea5SEric Dumazet static void get_timewait4_sock(const struct inet_timewait_sock *tw, 2286652586dfSTetsuo Handa struct seq_file *f, int i) 22871da177e4SLinus Torvalds { 228823f33c2dSAl Viro __be32 dest, src; 22891da177e4SLinus Torvalds __u16 destp, srcp; 2290e2a1d3e4SEric Dumazet s32 delta = tw->tw_ttd - inet_tw_time_stamp(); 22911da177e4SLinus Torvalds 22921da177e4SLinus Torvalds dest = tw->tw_daddr; 22931da177e4SLinus Torvalds src = tw->tw_rcv_saddr; 22941da177e4SLinus Torvalds destp = ntohs(tw->tw_dport); 22951da177e4SLinus Torvalds srcp = ntohs(tw->tw_sport); 22961da177e4SLinus Torvalds 22975e659e4cSPavel Emelyanov seq_printf(f, "%4d: %08X:%04X %08X:%04X" 2298652586dfSTetsuo Handa " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK", 22991da177e4SLinus Torvalds i, src, srcp, dest, destp, tw->tw_substate, 0, 0, 2300a399a805SEric Dumazet 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, 2301652586dfSTetsuo Handa atomic_read(&tw->tw_refcnt), tw); 23021da177e4SLinus Torvalds } 23031da177e4SLinus Torvalds 23041da177e4SLinus Torvalds #define TMPSZ 150 23051da177e4SLinus Torvalds 23061da177e4SLinus Torvalds static int tcp4_seq_show(struct seq_file *seq, void *v) 23071da177e4SLinus Torvalds { 23081da177e4SLinus Torvalds struct tcp_iter_state *st; 230905dbc7b5SEric Dumazet struct sock *sk = v; 23101da177e4SLinus Torvalds 2311652586dfSTetsuo Handa seq_setwidth(seq, TMPSZ - 1); 23121da177e4SLinus Torvalds if (v == SEQ_START_TOKEN) { 2313652586dfSTetsuo Handa seq_puts(seq, " sl local_address rem_address st tx_queue " 23141da177e4SLinus Torvalds "rx_queue tr tm->when retrnsmt uid timeout " 23151da177e4SLinus Torvalds "inode"); 23161da177e4SLinus Torvalds goto out; 23171da177e4SLinus Torvalds } 23181da177e4SLinus Torvalds st = seq->private; 23191da177e4SLinus Torvalds 23201da177e4SLinus Torvalds switch (st->state) { 23211da177e4SLinus Torvalds case TCP_SEQ_STATE_LISTENING: 23221da177e4SLinus Torvalds case TCP_SEQ_STATE_ESTABLISHED: 232305dbc7b5SEric Dumazet if (sk->sk_state == TCP_TIME_WAIT) 2324652586dfSTetsuo Handa get_timewait4_sock(v, seq, st->num); 232505dbc7b5SEric Dumazet else 2326652586dfSTetsuo Handa get_tcp4_sock(v, seq, st->num); 23271da177e4SLinus Torvalds break; 23281da177e4SLinus Torvalds case TCP_SEQ_STATE_OPENREQ: 2329652586dfSTetsuo Handa get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid); 23301da177e4SLinus Torvalds break; 23311da177e4SLinus Torvalds } 23321da177e4SLinus Torvalds out: 2333652586dfSTetsuo Handa seq_pad(seq, '\n'); 23341da177e4SLinus Torvalds return 0; 23351da177e4SLinus Torvalds } 23361da177e4SLinus Torvalds 233773cb88ecSArjan van de Ven static const struct file_operations tcp_afinfo_seq_fops = { 233873cb88ecSArjan van de Ven .owner = THIS_MODULE, 233973cb88ecSArjan van de Ven .open = tcp_seq_open, 234073cb88ecSArjan van de Ven .read = seq_read, 234173cb88ecSArjan van de Ven .llseek = seq_lseek, 234273cb88ecSArjan van de Ven .release = seq_release_net 234373cb88ecSArjan van de Ven }; 234473cb88ecSArjan van de Ven 23451da177e4SLinus Torvalds static struct tcp_seq_afinfo tcp4_seq_afinfo = { 23461da177e4SLinus Torvalds .name = "tcp", 23471da177e4SLinus Torvalds .family = AF_INET, 234873cb88ecSArjan van de Ven .seq_fops = &tcp_afinfo_seq_fops, 23499427c4b3SDenis V. Lunev .seq_ops = { 23509427c4b3SDenis V. Lunev .show = tcp4_seq_show, 23519427c4b3SDenis V. Lunev }, 23521da177e4SLinus Torvalds }; 23531da177e4SLinus Torvalds 23542c8c1e72SAlexey Dobriyan static int __net_init tcp4_proc_init_net(struct net *net) 2355757764f6SPavel Emelyanov { 2356757764f6SPavel Emelyanov return tcp_proc_register(net, &tcp4_seq_afinfo); 2357757764f6SPavel Emelyanov } 2358757764f6SPavel Emelyanov 23592c8c1e72SAlexey Dobriyan static void __net_exit tcp4_proc_exit_net(struct net *net) 2360757764f6SPavel Emelyanov { 2361757764f6SPavel Emelyanov tcp_proc_unregister(net, &tcp4_seq_afinfo); 2362757764f6SPavel Emelyanov } 2363757764f6SPavel Emelyanov 2364757764f6SPavel Emelyanov static struct pernet_operations tcp4_net_ops = { 2365757764f6SPavel Emelyanov .init = tcp4_proc_init_net, 2366757764f6SPavel Emelyanov .exit = tcp4_proc_exit_net, 2367757764f6SPavel Emelyanov }; 2368757764f6SPavel Emelyanov 23691da177e4SLinus Torvalds int __init tcp4_proc_init(void) 23701da177e4SLinus Torvalds { 2371757764f6SPavel Emelyanov return register_pernet_subsys(&tcp4_net_ops); 23721da177e4SLinus Torvalds } 23731da177e4SLinus Torvalds 23741da177e4SLinus Torvalds void tcp4_proc_exit(void) 23751da177e4SLinus Torvalds { 2376757764f6SPavel Emelyanov unregister_pernet_subsys(&tcp4_net_ops); 23771da177e4SLinus Torvalds } 23781da177e4SLinus Torvalds #endif /* CONFIG_PROC_FS */ 23791da177e4SLinus Torvalds 23801da177e4SLinus Torvalds struct proto tcp_prot = { 23811da177e4SLinus Torvalds .name = "TCP", 23821da177e4SLinus Torvalds .owner = THIS_MODULE, 23831da177e4SLinus Torvalds .close = tcp_close, 23841da177e4SLinus Torvalds .connect = tcp_v4_connect, 23851da177e4SLinus Torvalds .disconnect = tcp_disconnect, 2386463c84b9SArnaldo Carvalho de Melo .accept = inet_csk_accept, 23871da177e4SLinus Torvalds .ioctl = tcp_ioctl, 23881da177e4SLinus Torvalds .init = tcp_v4_init_sock, 23891da177e4SLinus Torvalds .destroy = tcp_v4_destroy_sock, 23901da177e4SLinus Torvalds .shutdown = tcp_shutdown, 23911da177e4SLinus Torvalds .setsockopt = tcp_setsockopt, 23921da177e4SLinus Torvalds .getsockopt = tcp_getsockopt, 23931da177e4SLinus Torvalds .recvmsg = tcp_recvmsg, 23947ba42910SChangli Gao .sendmsg = tcp_sendmsg, 23957ba42910SChangli Gao .sendpage = tcp_sendpage, 23961da177e4SLinus Torvalds .backlog_rcv = tcp_v4_do_rcv, 239746d3ceabSEric Dumazet .release_cb = tcp_release_cb, 2398ab1e0a13SArnaldo Carvalho de Melo .hash = inet_hash, 2399ab1e0a13SArnaldo Carvalho de Melo .unhash = inet_unhash, 2400ab1e0a13SArnaldo Carvalho de Melo .get_port = inet_csk_get_port, 24011da177e4SLinus Torvalds .enter_memory_pressure = tcp_enter_memory_pressure, 2402c9bee3b7SEric Dumazet .stream_memory_free = tcp_stream_memory_free, 24031da177e4SLinus Torvalds .sockets_allocated = &tcp_sockets_allocated, 24040a5578cfSArnaldo Carvalho de Melo .orphan_count = &tcp_orphan_count, 24051da177e4SLinus Torvalds .memory_allocated = &tcp_memory_allocated, 24061da177e4SLinus Torvalds .memory_pressure = &tcp_memory_pressure, 2407a4fe34bfSEric W. Biederman .sysctl_mem = sysctl_tcp_mem, 24081da177e4SLinus Torvalds .sysctl_wmem = sysctl_tcp_wmem, 24091da177e4SLinus Torvalds .sysctl_rmem = sysctl_tcp_rmem, 24101da177e4SLinus Torvalds .max_header = MAX_TCP_HEADER, 24111da177e4SLinus Torvalds .obj_size = sizeof(struct tcp_sock), 24123ab5aee7SEric Dumazet .slab_flags = SLAB_DESTROY_BY_RCU, 24136d6ee43eSArnaldo Carvalho de Melo .twsk_prot = &tcp_timewait_sock_ops, 241460236fddSArnaldo Carvalho de Melo .rsk_prot = &tcp_request_sock_ops, 241539d8cda7SPavel Emelyanov .h.hashinfo = &tcp_hashinfo, 24167ba42910SChangli Gao .no_autobind = true, 2417543d9cfeSArnaldo Carvalho de Melo #ifdef CONFIG_COMPAT 2418543d9cfeSArnaldo Carvalho de Melo .compat_setsockopt = compat_tcp_setsockopt, 2419543d9cfeSArnaldo Carvalho de Melo .compat_getsockopt = compat_tcp_getsockopt, 2420543d9cfeSArnaldo Carvalho de Melo #endif 2421c255a458SAndrew Morton #ifdef CONFIG_MEMCG_KMEM 2422d1a4c0b3SGlauber Costa .init_cgroup = tcp_init_cgroup, 2423d1a4c0b3SGlauber Costa .destroy_cgroup = tcp_destroy_cgroup, 2424d1a4c0b3SGlauber Costa .proto_cgroup = tcp_proto_cgroup, 2425d1a4c0b3SGlauber Costa #endif 24261da177e4SLinus Torvalds }; 24274bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_prot); 24281da177e4SLinus Torvalds 2429046ee902SDenis V. Lunev static int __net_init tcp_sk_init(struct net *net) 2430046ee902SDenis V. Lunev { 24315d134f1cSHannes Frederic Sowa net->ipv4.sysctl_tcp_ecn = 2; 2432be9f4a44SEric Dumazet return 0; 2433046ee902SDenis V. Lunev } 2434046ee902SDenis V. Lunev 2435046ee902SDenis V. Lunev static void __net_exit tcp_sk_exit(struct net *net) 2436046ee902SDenis V. Lunev { 2437b099ce26SEric W. Biederman } 2438b099ce26SEric W. Biederman 2439b099ce26SEric W. Biederman static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) 2440b099ce26SEric W. Biederman { 2441b099ce26SEric W. Biederman inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET); 2442046ee902SDenis V. Lunev } 2443046ee902SDenis V. Lunev 2444046ee902SDenis V. Lunev static struct pernet_operations __net_initdata tcp_sk_ops = { 2445046ee902SDenis V. Lunev .init = tcp_sk_init, 2446046ee902SDenis V. Lunev .exit = tcp_sk_exit, 2447b099ce26SEric W. Biederman .exit_batch = tcp_sk_exit_batch, 2448046ee902SDenis V. Lunev }; 2449046ee902SDenis V. Lunev 24509b0f976fSDenis V. Lunev void __init tcp_v4_init(void) 24511da177e4SLinus Torvalds { 24525caea4eaSEric Dumazet inet_hashinfo_init(&tcp_hashinfo); 24536a1b3054SEric W. Biederman if (register_pernet_subsys(&tcp_sk_ops)) 24541da177e4SLinus Torvalds panic("Failed to create the TCP control socket.\n"); 24551da177e4SLinus Torvalds } 2456