11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * INET An implementation of the TCP/IP protocol suite for the LINUX 31da177e4SLinus Torvalds * operating system. INET is implemented using the BSD Socket 41da177e4SLinus Torvalds * interface as the means of communication with the user level. 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * Implementation of the Transmission Control Protocol(TCP). 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * IPv4 specific functions 91da177e4SLinus Torvalds * 101da177e4SLinus Torvalds * 111da177e4SLinus Torvalds * code split from: 121da177e4SLinus Torvalds * linux/ipv4/tcp.c 131da177e4SLinus Torvalds * linux/ipv4/tcp_input.c 141da177e4SLinus Torvalds * linux/ipv4/tcp_output.c 151da177e4SLinus Torvalds * 161da177e4SLinus Torvalds * See tcp.c for author information 171da177e4SLinus Torvalds * 181da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 191da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 201da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 211da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 221da177e4SLinus Torvalds */ 231da177e4SLinus Torvalds 241da177e4SLinus Torvalds /* 251da177e4SLinus Torvalds * Changes: 261da177e4SLinus Torvalds * David S. Miller : New socket lookup architecture. 271da177e4SLinus Torvalds * This code is dedicated to John Dyson. 281da177e4SLinus Torvalds * David S. Miller : Change semantics of established hash, 291da177e4SLinus Torvalds * half is devoted to TIME_WAIT sockets 301da177e4SLinus Torvalds * and the rest go in the other half. 311da177e4SLinus Torvalds * Andi Kleen : Add support for syncookies and fixed 321da177e4SLinus Torvalds * some bugs: ip options weren't passed to 331da177e4SLinus Torvalds * the TCP layer, missed a check for an 341da177e4SLinus Torvalds * ACK bit. 351da177e4SLinus Torvalds * Andi Kleen : Implemented fast path mtu discovery. 361da177e4SLinus Torvalds * Fixed many serious bugs in the 3760236fddSArnaldo Carvalho de Melo * request_sock handling and moved 381da177e4SLinus Torvalds * most of it into the af independent code. 391da177e4SLinus Torvalds * Added tail drop and some other bugfixes. 40caa20d9aSStephen Hemminger * Added new listen semantics. 411da177e4SLinus Torvalds * Mike McLagan : Routing by source 421da177e4SLinus Torvalds * Juan Jose Ciarlante: ip_dynaddr bits 431da177e4SLinus Torvalds * Andi Kleen: various fixes. 441da177e4SLinus Torvalds * Vitaly E. Lavrov : Transparent proxy revived after year 451da177e4SLinus Torvalds * coma. 461da177e4SLinus Torvalds * Andi Kleen : Fix new listen. 471da177e4SLinus Torvalds * Andi Kleen : Fix accept error reporting. 481da177e4SLinus Torvalds * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 491da177e4SLinus Torvalds * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 501da177e4SLinus Torvalds * a single port at the same time. 511da177e4SLinus Torvalds */ 521da177e4SLinus Torvalds 53afd46503SJoe Perches #define pr_fmt(fmt) "TCP: " fmt 541da177e4SLinus Torvalds 55eb4dea58SHerbert Xu #include <linux/bottom_half.h> 561da177e4SLinus Torvalds #include <linux/types.h> 571da177e4SLinus Torvalds #include <linux/fcntl.h> 581da177e4SLinus Torvalds #include <linux/module.h> 591da177e4SLinus Torvalds #include <linux/random.h> 601da177e4SLinus Torvalds #include <linux/cache.h> 611da177e4SLinus Torvalds #include <linux/jhash.h> 621da177e4SLinus Torvalds #include <linux/init.h> 631da177e4SLinus Torvalds #include <linux/times.h> 645a0e3ad6STejun Heo #include <linux/slab.h> 651da177e4SLinus Torvalds 66457c4cbcSEric W. Biederman #include <net/net_namespace.h> 671da177e4SLinus Torvalds #include <net/icmp.h> 68304a1618SArnaldo Carvalho de Melo #include <net/inet_hashtables.h> 691da177e4SLinus Torvalds #include <net/tcp.h> 7020380731SArnaldo Carvalho de Melo #include <net/transp_v6.h> 711da177e4SLinus Torvalds #include <net/ipv6.h> 721da177e4SLinus Torvalds #include <net/inet_common.h> 736d6ee43eSArnaldo Carvalho de Melo #include <net/timewait_sock.h> 741da177e4SLinus Torvalds #include <net/xfrm.h> 751a2449a8SChris Leech #include <net/netdma.h> 766e5714eaSDavid S. Miller #include <net/secure_seq.h> 77d1a4c0b3SGlauber Costa #include <net/tcp_memcontrol.h> 781da177e4SLinus Torvalds 791da177e4SLinus Torvalds #include <linux/inet.h> 801da177e4SLinus Torvalds #include <linux/ipv6.h> 811da177e4SLinus Torvalds #include <linux/stddef.h> 821da177e4SLinus Torvalds #include <linux/proc_fs.h> 831da177e4SLinus Torvalds #include <linux/seq_file.h> 841da177e4SLinus Torvalds 85cfb6eeb4SYOSHIFUJI Hideaki #include <linux/crypto.h> 86cfb6eeb4SYOSHIFUJI Hideaki #include <linux/scatterlist.h> 87cfb6eeb4SYOSHIFUJI Hideaki 88ab32ea5dSBrian Haley int sysctl_tcp_tw_reuse __read_mostly; 89ab32ea5dSBrian Haley int sysctl_tcp_low_latency __read_mostly; 904bc2f18bSEric Dumazet EXPORT_SYMBOL(sysctl_tcp_low_latency); 911da177e4SLinus Torvalds 921da177e4SLinus Torvalds 93cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 94a915da9bSEric Dumazet static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 95318cf7aaSEric Dumazet __be32 daddr, __be32 saddr, const struct tcphdr *th); 96cfb6eeb4SYOSHIFUJI Hideaki #endif 97cfb6eeb4SYOSHIFUJI Hideaki 985caea4eaSEric Dumazet struct inet_hashinfo tcp_hashinfo; 994bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_hashinfo); 1001da177e4SLinus Torvalds 101cf533ea5SEric Dumazet static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb) 1021da177e4SLinus Torvalds { 103eddc9ec5SArnaldo Carvalho de Melo return secure_tcp_sequence_number(ip_hdr(skb)->daddr, 104eddc9ec5SArnaldo Carvalho de Melo ip_hdr(skb)->saddr, 105aa8223c7SArnaldo Carvalho de Melo tcp_hdr(skb)->dest, 106aa8223c7SArnaldo Carvalho de Melo tcp_hdr(skb)->source); 1071da177e4SLinus Torvalds } 1081da177e4SLinus Torvalds 1096d6ee43eSArnaldo Carvalho de Melo int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) 1106d6ee43eSArnaldo Carvalho de Melo { 1116d6ee43eSArnaldo Carvalho de Melo const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw); 1126d6ee43eSArnaldo Carvalho de Melo struct tcp_sock *tp = tcp_sk(sk); 1136d6ee43eSArnaldo Carvalho de Melo 1146d6ee43eSArnaldo Carvalho de Melo /* With PAWS, it is safe from the viewpoint 1156d6ee43eSArnaldo Carvalho de Melo of data integrity. Even without PAWS it is safe provided sequence 1166d6ee43eSArnaldo Carvalho de Melo spaces do not overlap i.e. at data rates <= 80Mbit/sec. 1176d6ee43eSArnaldo Carvalho de Melo 1186d6ee43eSArnaldo Carvalho de Melo Actually, the idea is close to VJ's one, only timestamp cache is 1196d6ee43eSArnaldo Carvalho de Melo held not per host, but per port pair and TW bucket is used as state 1206d6ee43eSArnaldo Carvalho de Melo holder. 1216d6ee43eSArnaldo Carvalho de Melo 1226d6ee43eSArnaldo Carvalho de Melo If TW bucket has been already destroyed we fall back to VJ's scheme 1236d6ee43eSArnaldo Carvalho de Melo and use initial timestamp retrieved from peer table. 1246d6ee43eSArnaldo Carvalho de Melo */ 1256d6ee43eSArnaldo Carvalho de Melo if (tcptw->tw_ts_recent_stamp && 1266d6ee43eSArnaldo Carvalho de Melo (twp == NULL || (sysctl_tcp_tw_reuse && 1279d729f72SJames Morris get_seconds() - tcptw->tw_ts_recent_stamp > 1))) { 1286d6ee43eSArnaldo Carvalho de Melo tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; 1296d6ee43eSArnaldo Carvalho de Melo if (tp->write_seq == 0) 1306d6ee43eSArnaldo Carvalho de Melo tp->write_seq = 1; 1316d6ee43eSArnaldo Carvalho de Melo tp->rx_opt.ts_recent = tcptw->tw_ts_recent; 1326d6ee43eSArnaldo Carvalho de Melo tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; 1336d6ee43eSArnaldo Carvalho de Melo sock_hold(sktw); 1346d6ee43eSArnaldo Carvalho de Melo return 1; 1356d6ee43eSArnaldo Carvalho de Melo } 1366d6ee43eSArnaldo Carvalho de Melo 1376d6ee43eSArnaldo Carvalho de Melo return 0; 1386d6ee43eSArnaldo Carvalho de Melo } 1396d6ee43eSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(tcp_twsk_unique); 1406d6ee43eSArnaldo Carvalho de Melo 141ee995283SPavel Emelyanov static int tcp_repair_connect(struct sock *sk) 142ee995283SPavel Emelyanov { 143ee995283SPavel Emelyanov tcp_connect_init(sk); 144ee995283SPavel Emelyanov tcp_finish_connect(sk, NULL); 145ee995283SPavel Emelyanov 146ee995283SPavel Emelyanov return 0; 147ee995283SPavel Emelyanov } 148ee995283SPavel Emelyanov 1491da177e4SLinus Torvalds /* This will initiate an outgoing connection. */ 1501da177e4SLinus Torvalds int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 1511da177e4SLinus Torvalds { 1522d7192d6SDavid S. Miller struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; 1531da177e4SLinus Torvalds struct inet_sock *inet = inet_sk(sk); 1541da177e4SLinus Torvalds struct tcp_sock *tp = tcp_sk(sk); 155dca8b089SDavid S. Miller __be16 orig_sport, orig_dport; 156bada8adcSAl Viro __be32 daddr, nexthop; 157da905bd1SDavid S. Miller struct flowi4 *fl4; 1582d7192d6SDavid S. Miller struct rtable *rt; 1591da177e4SLinus Torvalds int err; 160f6d8bd05SEric Dumazet struct ip_options_rcu *inet_opt; 1611da177e4SLinus Torvalds 1621da177e4SLinus Torvalds if (addr_len < sizeof(struct sockaddr_in)) 1631da177e4SLinus Torvalds return -EINVAL; 1641da177e4SLinus Torvalds 1651da177e4SLinus Torvalds if (usin->sin_family != AF_INET) 1661da177e4SLinus Torvalds return -EAFNOSUPPORT; 1671da177e4SLinus Torvalds 1681da177e4SLinus Torvalds nexthop = daddr = usin->sin_addr.s_addr; 169f6d8bd05SEric Dumazet inet_opt = rcu_dereference_protected(inet->inet_opt, 170f6d8bd05SEric Dumazet sock_owned_by_user(sk)); 171f6d8bd05SEric Dumazet if (inet_opt && inet_opt->opt.srr) { 1721da177e4SLinus Torvalds if (!daddr) 1731da177e4SLinus Torvalds return -EINVAL; 174f6d8bd05SEric Dumazet nexthop = inet_opt->opt.faddr; 1751da177e4SLinus Torvalds } 1761da177e4SLinus Torvalds 177dca8b089SDavid S. Miller orig_sport = inet->inet_sport; 178dca8b089SDavid S. Miller orig_dport = usin->sin_port; 179da905bd1SDavid S. Miller fl4 = &inet->cork.fl.u.ip4; 180da905bd1SDavid S. Miller rt = ip_route_connect(fl4, nexthop, inet->inet_saddr, 1811da177e4SLinus Torvalds RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, 1821da177e4SLinus Torvalds IPPROTO_TCP, 183abdf7e72SDavid S. Miller orig_sport, orig_dport, sk, true); 184b23dd4feSDavid S. Miller if (IS_ERR(rt)) { 185b23dd4feSDavid S. Miller err = PTR_ERR(rt); 186b23dd4feSDavid S. Miller if (err == -ENETUNREACH) 1877c73a6faSPavel Emelyanov IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); 188b23dd4feSDavid S. Miller return err; 189584bdf8cSWei Dong } 1901da177e4SLinus Torvalds 1911da177e4SLinus Torvalds if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { 1921da177e4SLinus Torvalds ip_rt_put(rt); 1931da177e4SLinus Torvalds return -ENETUNREACH; 1941da177e4SLinus Torvalds } 1951da177e4SLinus Torvalds 196f6d8bd05SEric Dumazet if (!inet_opt || !inet_opt->opt.srr) 197da905bd1SDavid S. Miller daddr = fl4->daddr; 1981da177e4SLinus Torvalds 199c720c7e8SEric Dumazet if (!inet->inet_saddr) 200da905bd1SDavid S. Miller inet->inet_saddr = fl4->saddr; 201c720c7e8SEric Dumazet inet->inet_rcv_saddr = inet->inet_saddr; 2021da177e4SLinus Torvalds 203c720c7e8SEric Dumazet if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) { 2041da177e4SLinus Torvalds /* Reset inherited state */ 2051da177e4SLinus Torvalds tp->rx_opt.ts_recent = 0; 2061da177e4SLinus Torvalds tp->rx_opt.ts_recent_stamp = 0; 207ee995283SPavel Emelyanov if (likely(!tp->repair)) 2081da177e4SLinus Torvalds tp->write_seq = 0; 2091da177e4SLinus Torvalds } 2101da177e4SLinus Torvalds 211295ff7edSArnaldo Carvalho de Melo if (tcp_death_row.sysctl_tw_recycle && 212da905bd1SDavid S. Miller !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) { 213ed2361e6SDavid S. Miller struct inet_peer *peer = rt_get_peer(rt, fl4->daddr); 2147174259eSArnaldo Carvalho de Melo /* 2157174259eSArnaldo Carvalho de Melo * VJ's idea. We save last timestamp seen from 2167174259eSArnaldo Carvalho de Melo * the destination in peer table, when entering state 2177174259eSArnaldo Carvalho de Melo * TIME-WAIT * and initialize rx_opt.ts_recent from it, 2187174259eSArnaldo Carvalho de Melo * when trying new connection. 2191da177e4SLinus Torvalds */ 220317fe0e6SEric Dumazet if (peer) { 221317fe0e6SEric Dumazet inet_peer_refcheck(peer); 222317fe0e6SEric Dumazet if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) { 2231da177e4SLinus Torvalds tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; 2241da177e4SLinus Torvalds tp->rx_opt.ts_recent = peer->tcp_ts; 2251da177e4SLinus Torvalds } 2261da177e4SLinus Torvalds } 227317fe0e6SEric Dumazet } 2281da177e4SLinus Torvalds 229c720c7e8SEric Dumazet inet->inet_dport = usin->sin_port; 230c720c7e8SEric Dumazet inet->inet_daddr = daddr; 2311da177e4SLinus Torvalds 232d83d8461SArnaldo Carvalho de Melo inet_csk(sk)->icsk_ext_hdr_len = 0; 233f6d8bd05SEric Dumazet if (inet_opt) 234f6d8bd05SEric Dumazet inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; 2351da177e4SLinus Torvalds 236bee7ca9eSWilliam Allen Simpson tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; 2371da177e4SLinus Torvalds 2381da177e4SLinus Torvalds /* Socket identity is still unknown (sport may be zero). 2391da177e4SLinus Torvalds * However we set state to SYN-SENT and not releasing socket 2401da177e4SLinus Torvalds * lock select source port, enter ourselves into the hash tables and 2411da177e4SLinus Torvalds * complete initialization after this. 2421da177e4SLinus Torvalds */ 2431da177e4SLinus Torvalds tcp_set_state(sk, TCP_SYN_SENT); 244a7f5e7f1SArnaldo Carvalho de Melo err = inet_hash_connect(&tcp_death_row, sk); 2451da177e4SLinus Torvalds if (err) 2461da177e4SLinus Torvalds goto failure; 2471da177e4SLinus Torvalds 248da905bd1SDavid S. Miller rt = ip_route_newports(fl4, rt, orig_sport, orig_dport, 249c720c7e8SEric Dumazet inet->inet_sport, inet->inet_dport, sk); 250b23dd4feSDavid S. Miller if (IS_ERR(rt)) { 251b23dd4feSDavid S. Miller err = PTR_ERR(rt); 252b23dd4feSDavid S. Miller rt = NULL; 2531da177e4SLinus Torvalds goto failure; 254b23dd4feSDavid S. Miller } 2551da177e4SLinus Torvalds /* OK, now commit destination to socket. */ 256bcd76111SHerbert Xu sk->sk_gso_type = SKB_GSO_TCPV4; 257d8d1f30bSChangli Gao sk_setup_caps(sk, &rt->dst); 2581da177e4SLinus Torvalds 259ee995283SPavel Emelyanov if (!tp->write_seq && likely(!tp->repair)) 260c720c7e8SEric Dumazet tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, 261c720c7e8SEric Dumazet inet->inet_daddr, 262c720c7e8SEric Dumazet inet->inet_sport, 2631da177e4SLinus Torvalds usin->sin_port); 2641da177e4SLinus Torvalds 265c720c7e8SEric Dumazet inet->inet_id = tp->write_seq ^ jiffies; 2661da177e4SLinus Torvalds 267ee995283SPavel Emelyanov if (likely(!tp->repair)) 2681da177e4SLinus Torvalds err = tcp_connect(sk); 269ee995283SPavel Emelyanov else 270ee995283SPavel Emelyanov err = tcp_repair_connect(sk); 271ee995283SPavel Emelyanov 2721da177e4SLinus Torvalds rt = NULL; 2731da177e4SLinus Torvalds if (err) 2741da177e4SLinus Torvalds goto failure; 2751da177e4SLinus Torvalds 2761da177e4SLinus Torvalds return 0; 2771da177e4SLinus Torvalds 2781da177e4SLinus Torvalds failure: 2797174259eSArnaldo Carvalho de Melo /* 2807174259eSArnaldo Carvalho de Melo * This unhashes the socket and releases the local port, 2817174259eSArnaldo Carvalho de Melo * if necessary. 2827174259eSArnaldo Carvalho de Melo */ 2831da177e4SLinus Torvalds tcp_set_state(sk, TCP_CLOSE); 2841da177e4SLinus Torvalds ip_rt_put(rt); 2851da177e4SLinus Torvalds sk->sk_route_caps = 0; 286c720c7e8SEric Dumazet inet->inet_dport = 0; 2871da177e4SLinus Torvalds return err; 2881da177e4SLinus Torvalds } 2894bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_connect); 2901da177e4SLinus Torvalds 2911da177e4SLinus Torvalds /* 2921da177e4SLinus Torvalds * This routine does path mtu discovery as defined in RFC1191. 2931da177e4SLinus Torvalds */ 294b71d1d42SEric Dumazet static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu) 2951da177e4SLinus Torvalds { 2961da177e4SLinus Torvalds struct dst_entry *dst; 2971da177e4SLinus Torvalds struct inet_sock *inet = inet_sk(sk); 2981da177e4SLinus Torvalds 2991da177e4SLinus Torvalds /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs 3001da177e4SLinus Torvalds * send out by Linux are always <576bytes so they should go through 3011da177e4SLinus Torvalds * unfragmented). 3021da177e4SLinus Torvalds */ 3031da177e4SLinus Torvalds if (sk->sk_state == TCP_LISTEN) 3041da177e4SLinus Torvalds return; 3051da177e4SLinus Torvalds 3061da177e4SLinus Torvalds /* We don't check in the destentry if pmtu discovery is forbidden 3071da177e4SLinus Torvalds * on this route. We just assume that no packet_to_big packets 3081da177e4SLinus Torvalds * are send back when pmtu discovery is not active. 3091da177e4SLinus Torvalds * There is a small race when the user changes this flag in the 3101da177e4SLinus Torvalds * route, but I think that's acceptable. 3111da177e4SLinus Torvalds */ 3121da177e4SLinus Torvalds if ((dst = __sk_dst_check(sk, 0)) == NULL) 3131da177e4SLinus Torvalds return; 3141da177e4SLinus Torvalds 3151da177e4SLinus Torvalds dst->ops->update_pmtu(dst, mtu); 3161da177e4SLinus Torvalds 3171da177e4SLinus Torvalds /* Something is about to be wrong... Remember soft error 3181da177e4SLinus Torvalds * for the case, if this connection will not able to recover. 3191da177e4SLinus Torvalds */ 3201da177e4SLinus Torvalds if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) 3211da177e4SLinus Torvalds sk->sk_err_soft = EMSGSIZE; 3221da177e4SLinus Torvalds 3231da177e4SLinus Torvalds mtu = dst_mtu(dst); 3241da177e4SLinus Torvalds 3251da177e4SLinus Torvalds if (inet->pmtudisc != IP_PMTUDISC_DONT && 326d83d8461SArnaldo Carvalho de Melo inet_csk(sk)->icsk_pmtu_cookie > mtu) { 3271da177e4SLinus Torvalds tcp_sync_mss(sk, mtu); 3281da177e4SLinus Torvalds 3291da177e4SLinus Torvalds /* Resend the TCP packet because it's 3301da177e4SLinus Torvalds * clear that the old packet has been 3311da177e4SLinus Torvalds * dropped. This is the new "fast" path mtu 3321da177e4SLinus Torvalds * discovery. 3331da177e4SLinus Torvalds */ 3341da177e4SLinus Torvalds tcp_simple_retransmit(sk); 3351da177e4SLinus Torvalds } /* else let the usual retransmit timer handle it */ 3361da177e4SLinus Torvalds } 3371da177e4SLinus Torvalds 3381da177e4SLinus Torvalds /* 3391da177e4SLinus Torvalds * This routine is called by the ICMP module when it gets some 3401da177e4SLinus Torvalds * sort of error condition. If err < 0 then the socket should 3411da177e4SLinus Torvalds * be closed and the error returned to the user. If err > 0 3421da177e4SLinus Torvalds * it's just the icmp type << 8 | icmp code. After adjustment 3431da177e4SLinus Torvalds * header points to the first 8 bytes of the tcp header. We need 3441da177e4SLinus Torvalds * to find the appropriate port. 3451da177e4SLinus Torvalds * 3461da177e4SLinus Torvalds * The locking strategy used here is very "optimistic". When 3471da177e4SLinus Torvalds * someone else accesses the socket the ICMP is just dropped 3481da177e4SLinus Torvalds * and for some paths there is no check at all. 3491da177e4SLinus Torvalds * A more general error queue to queue errors for later handling 3501da177e4SLinus Torvalds * is probably better. 3511da177e4SLinus Torvalds * 3521da177e4SLinus Torvalds */ 3531da177e4SLinus Torvalds 3544d1a2d9eSDamian Lukowski void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) 3551da177e4SLinus Torvalds { 356b71d1d42SEric Dumazet const struct iphdr *iph = (const struct iphdr *)icmp_skb->data; 3574d1a2d9eSDamian Lukowski struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2)); 358f1ecd5d9SDamian Lukowski struct inet_connection_sock *icsk; 3591da177e4SLinus Torvalds struct tcp_sock *tp; 3601da177e4SLinus Torvalds struct inet_sock *inet; 3614d1a2d9eSDamian Lukowski const int type = icmp_hdr(icmp_skb)->type; 3624d1a2d9eSDamian Lukowski const int code = icmp_hdr(icmp_skb)->code; 3631da177e4SLinus Torvalds struct sock *sk; 364f1ecd5d9SDamian Lukowski struct sk_buff *skb; 3651da177e4SLinus Torvalds __u32 seq; 366f1ecd5d9SDamian Lukowski __u32 remaining; 3671da177e4SLinus Torvalds int err; 3684d1a2d9eSDamian Lukowski struct net *net = dev_net(icmp_skb->dev); 3691da177e4SLinus Torvalds 3704d1a2d9eSDamian Lukowski if (icmp_skb->len < (iph->ihl << 2) + 8) { 371dcfc23caSPavel Emelyanov ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); 3721da177e4SLinus Torvalds return; 3731da177e4SLinus Torvalds } 3741da177e4SLinus Torvalds 375fd54d716SPavel Emelyanov sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest, 3764d1a2d9eSDamian Lukowski iph->saddr, th->source, inet_iif(icmp_skb)); 3771da177e4SLinus Torvalds if (!sk) { 378dcfc23caSPavel Emelyanov ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); 3791da177e4SLinus Torvalds return; 3801da177e4SLinus Torvalds } 3811da177e4SLinus Torvalds if (sk->sk_state == TCP_TIME_WAIT) { 3829469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(sk)); 3831da177e4SLinus Torvalds return; 3841da177e4SLinus Torvalds } 3851da177e4SLinus Torvalds 3861da177e4SLinus Torvalds bh_lock_sock(sk); 3871da177e4SLinus Torvalds /* If too many ICMPs get dropped on busy 3881da177e4SLinus Torvalds * servers this needs to be solved differently. 3891da177e4SLinus Torvalds */ 3901da177e4SLinus Torvalds if (sock_owned_by_user(sk)) 391de0744afSPavel Emelyanov NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); 3921da177e4SLinus Torvalds 3931da177e4SLinus Torvalds if (sk->sk_state == TCP_CLOSE) 3941da177e4SLinus Torvalds goto out; 3951da177e4SLinus Torvalds 39697e3ecd1Sstephen hemminger if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { 39797e3ecd1Sstephen hemminger NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); 39897e3ecd1Sstephen hemminger goto out; 39997e3ecd1Sstephen hemminger } 40097e3ecd1Sstephen hemminger 401f1ecd5d9SDamian Lukowski icsk = inet_csk(sk); 4021da177e4SLinus Torvalds tp = tcp_sk(sk); 4031da177e4SLinus Torvalds seq = ntohl(th->seq); 4041da177e4SLinus Torvalds if (sk->sk_state != TCP_LISTEN && 4051da177e4SLinus Torvalds !between(seq, tp->snd_una, tp->snd_nxt)) { 406de0744afSPavel Emelyanov NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 4071da177e4SLinus Torvalds goto out; 4081da177e4SLinus Torvalds } 4091da177e4SLinus Torvalds 4101da177e4SLinus Torvalds switch (type) { 4111da177e4SLinus Torvalds case ICMP_SOURCE_QUENCH: 4121da177e4SLinus Torvalds /* Just silently ignore these. */ 4131da177e4SLinus Torvalds goto out; 4141da177e4SLinus Torvalds case ICMP_PARAMETERPROB: 4151da177e4SLinus Torvalds err = EPROTO; 4161da177e4SLinus Torvalds break; 4171da177e4SLinus Torvalds case ICMP_DEST_UNREACH: 4181da177e4SLinus Torvalds if (code > NR_ICMP_UNREACH) 4191da177e4SLinus Torvalds goto out; 4201da177e4SLinus Torvalds 4211da177e4SLinus Torvalds if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ 4221da177e4SLinus Torvalds if (!sock_owned_by_user(sk)) 4231da177e4SLinus Torvalds do_pmtu_discovery(sk, iph, info); 4241da177e4SLinus Torvalds goto out; 4251da177e4SLinus Torvalds } 4261da177e4SLinus Torvalds 4271da177e4SLinus Torvalds err = icmp_err_convert[code].errno; 428f1ecd5d9SDamian Lukowski /* check if icmp_skb allows revert of backoff 429f1ecd5d9SDamian Lukowski * (see draft-zimmermann-tcp-lcd) */ 430f1ecd5d9SDamian Lukowski if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH) 431f1ecd5d9SDamian Lukowski break; 432f1ecd5d9SDamian Lukowski if (seq != tp->snd_una || !icsk->icsk_retransmits || 433f1ecd5d9SDamian Lukowski !icsk->icsk_backoff) 434f1ecd5d9SDamian Lukowski break; 435f1ecd5d9SDamian Lukowski 4368f49c270SDavid S. Miller if (sock_owned_by_user(sk)) 4378f49c270SDavid S. Miller break; 4388f49c270SDavid S. Miller 439f1ecd5d9SDamian Lukowski icsk->icsk_backoff--; 4409ad7c049SJerry Chu inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) : 4419ad7c049SJerry Chu TCP_TIMEOUT_INIT) << icsk->icsk_backoff; 442f1ecd5d9SDamian Lukowski tcp_bound_rto(sk); 443f1ecd5d9SDamian Lukowski 444f1ecd5d9SDamian Lukowski skb = tcp_write_queue_head(sk); 445f1ecd5d9SDamian Lukowski BUG_ON(!skb); 446f1ecd5d9SDamian Lukowski 447f1ecd5d9SDamian Lukowski remaining = icsk->icsk_rto - min(icsk->icsk_rto, 448f1ecd5d9SDamian Lukowski tcp_time_stamp - TCP_SKB_CB(skb)->when); 449f1ecd5d9SDamian Lukowski 450f1ecd5d9SDamian Lukowski if (remaining) { 451f1ecd5d9SDamian Lukowski inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 452f1ecd5d9SDamian Lukowski remaining, TCP_RTO_MAX); 453f1ecd5d9SDamian Lukowski } else { 454f1ecd5d9SDamian Lukowski /* RTO revert clocked out retransmission. 455f1ecd5d9SDamian Lukowski * Will retransmit now */ 456f1ecd5d9SDamian Lukowski tcp_retransmit_timer(sk); 457f1ecd5d9SDamian Lukowski } 458f1ecd5d9SDamian Lukowski 4591da177e4SLinus Torvalds break; 4601da177e4SLinus Torvalds case ICMP_TIME_EXCEEDED: 4611da177e4SLinus Torvalds err = EHOSTUNREACH; 4621da177e4SLinus Torvalds break; 4631da177e4SLinus Torvalds default: 4641da177e4SLinus Torvalds goto out; 4651da177e4SLinus Torvalds } 4661da177e4SLinus Torvalds 4671da177e4SLinus Torvalds switch (sk->sk_state) { 46860236fddSArnaldo Carvalho de Melo struct request_sock *req, **prev; 4691da177e4SLinus Torvalds case TCP_LISTEN: 4701da177e4SLinus Torvalds if (sock_owned_by_user(sk)) 4711da177e4SLinus Torvalds goto out; 4721da177e4SLinus Torvalds 473463c84b9SArnaldo Carvalho de Melo req = inet_csk_search_req(sk, &prev, th->dest, 4741da177e4SLinus Torvalds iph->daddr, iph->saddr); 4751da177e4SLinus Torvalds if (!req) 4761da177e4SLinus Torvalds goto out; 4771da177e4SLinus Torvalds 4781da177e4SLinus Torvalds /* ICMPs are not backlogged, hence we cannot get 4791da177e4SLinus Torvalds an established socket here. 4801da177e4SLinus Torvalds */ 481547b792cSIlpo Järvinen WARN_ON(req->sk); 4821da177e4SLinus Torvalds 4832e6599cbSArnaldo Carvalho de Melo if (seq != tcp_rsk(req)->snt_isn) { 484de0744afSPavel Emelyanov NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 4851da177e4SLinus Torvalds goto out; 4861da177e4SLinus Torvalds } 4871da177e4SLinus Torvalds 4881da177e4SLinus Torvalds /* 4891da177e4SLinus Torvalds * Still in SYN_RECV, just remove it silently. 4901da177e4SLinus Torvalds * There is no good way to pass the error to the newly 4911da177e4SLinus Torvalds * created socket, and POSIX does not want network 4921da177e4SLinus Torvalds * errors returned from accept(). 4931da177e4SLinus Torvalds */ 494463c84b9SArnaldo Carvalho de Melo inet_csk_reqsk_queue_drop(sk, req, prev); 4951da177e4SLinus Torvalds goto out; 4961da177e4SLinus Torvalds 4971da177e4SLinus Torvalds case TCP_SYN_SENT: 4981da177e4SLinus Torvalds case TCP_SYN_RECV: /* Cannot happen. 4991da177e4SLinus Torvalds It can f.e. if SYNs crossed. 5001da177e4SLinus Torvalds */ 5011da177e4SLinus Torvalds if (!sock_owned_by_user(sk)) { 5021da177e4SLinus Torvalds sk->sk_err = err; 5031da177e4SLinus Torvalds 5041da177e4SLinus Torvalds sk->sk_error_report(sk); 5051da177e4SLinus Torvalds 5061da177e4SLinus Torvalds tcp_done(sk); 5071da177e4SLinus Torvalds } else { 5081da177e4SLinus Torvalds sk->sk_err_soft = err; 5091da177e4SLinus Torvalds } 5101da177e4SLinus Torvalds goto out; 5111da177e4SLinus Torvalds } 5121da177e4SLinus Torvalds 5131da177e4SLinus Torvalds /* If we've already connected we will keep trying 5141da177e4SLinus Torvalds * until we time out, or the user gives up. 5151da177e4SLinus Torvalds * 5161da177e4SLinus Torvalds * rfc1122 4.2.3.9 allows to consider as hard errors 5171da177e4SLinus Torvalds * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too, 5181da177e4SLinus Torvalds * but it is obsoleted by pmtu discovery). 5191da177e4SLinus Torvalds * 5201da177e4SLinus Torvalds * Note, that in modern internet, where routing is unreliable 5211da177e4SLinus Torvalds * and in each dark corner broken firewalls sit, sending random 5221da177e4SLinus Torvalds * errors ordered by their masters even this two messages finally lose 5231da177e4SLinus Torvalds * their original sense (even Linux sends invalid PORT_UNREACHs) 5241da177e4SLinus Torvalds * 5251da177e4SLinus Torvalds * Now we are in compliance with RFCs. 5261da177e4SLinus Torvalds * --ANK (980905) 5271da177e4SLinus Torvalds */ 5281da177e4SLinus Torvalds 5291da177e4SLinus Torvalds inet = inet_sk(sk); 5301da177e4SLinus Torvalds if (!sock_owned_by_user(sk) && inet->recverr) { 5311da177e4SLinus Torvalds sk->sk_err = err; 5321da177e4SLinus Torvalds sk->sk_error_report(sk); 5331da177e4SLinus Torvalds } else { /* Only an error on timeout */ 5341da177e4SLinus Torvalds sk->sk_err_soft = err; 5351da177e4SLinus Torvalds } 5361da177e4SLinus Torvalds 5371da177e4SLinus Torvalds out: 5381da177e4SLinus Torvalds bh_unlock_sock(sk); 5391da177e4SLinus Torvalds sock_put(sk); 5401da177e4SLinus Torvalds } 5411da177e4SLinus Torvalds 542419f9f89SHerbert Xu static void __tcp_v4_send_check(struct sk_buff *skb, 543419f9f89SHerbert Xu __be32 saddr, __be32 daddr) 5441da177e4SLinus Torvalds { 545aa8223c7SArnaldo Carvalho de Melo struct tcphdr *th = tcp_hdr(skb); 5461da177e4SLinus Torvalds 54784fa7933SPatrick McHardy if (skb->ip_summed == CHECKSUM_PARTIAL) { 548419f9f89SHerbert Xu th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0); 549663ead3bSHerbert Xu skb->csum_start = skb_transport_header(skb) - skb->head; 550ff1dcadbSAl Viro skb->csum_offset = offsetof(struct tcphdr, check); 5511da177e4SLinus Torvalds } else { 552419f9f89SHerbert Xu th->check = tcp_v4_check(skb->len, saddr, daddr, 55307f0757aSJoe Perches csum_partial(th, 5541da177e4SLinus Torvalds th->doff << 2, 5551da177e4SLinus Torvalds skb->csum)); 5561da177e4SLinus Torvalds } 5571da177e4SLinus Torvalds } 5581da177e4SLinus Torvalds 559419f9f89SHerbert Xu /* This routine computes an IPv4 TCP checksum. */ 560bb296246SHerbert Xu void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) 561419f9f89SHerbert Xu { 562cf533ea5SEric Dumazet const struct inet_sock *inet = inet_sk(sk); 563419f9f89SHerbert Xu 564419f9f89SHerbert Xu __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr); 565419f9f89SHerbert Xu } 5664bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_send_check); 567419f9f89SHerbert Xu 568a430a43dSHerbert Xu int tcp_v4_gso_send_check(struct sk_buff *skb) 569a430a43dSHerbert Xu { 570eddc9ec5SArnaldo Carvalho de Melo const struct iphdr *iph; 571a430a43dSHerbert Xu struct tcphdr *th; 572a430a43dSHerbert Xu 573a430a43dSHerbert Xu if (!pskb_may_pull(skb, sizeof(*th))) 574a430a43dSHerbert Xu return -EINVAL; 575a430a43dSHerbert Xu 576eddc9ec5SArnaldo Carvalho de Melo iph = ip_hdr(skb); 577aa8223c7SArnaldo Carvalho de Melo th = tcp_hdr(skb); 578a430a43dSHerbert Xu 579a430a43dSHerbert Xu th->check = 0; 58084fa7933SPatrick McHardy skb->ip_summed = CHECKSUM_PARTIAL; 581419f9f89SHerbert Xu __tcp_v4_send_check(skb, iph->saddr, iph->daddr); 582a430a43dSHerbert Xu return 0; 583a430a43dSHerbert Xu } 584a430a43dSHerbert Xu 5851da177e4SLinus Torvalds /* 5861da177e4SLinus Torvalds * This routine will send an RST to the other tcp. 5871da177e4SLinus Torvalds * 5881da177e4SLinus Torvalds * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.) 5891da177e4SLinus Torvalds * for reset. 5901da177e4SLinus Torvalds * Answer: if a packet caused RST, it is not for a socket 5911da177e4SLinus Torvalds * existing in our system, if it is matched to a socket, 5921da177e4SLinus Torvalds * it is just duplicate segment or bug in other side's TCP. 5931da177e4SLinus Torvalds * So that we build reply only basing on parameters 5941da177e4SLinus Torvalds * arrived with segment. 5951da177e4SLinus Torvalds * Exception: precedence violation. We do not implement it in any case. 5961da177e4SLinus Torvalds */ 5971da177e4SLinus Torvalds 598cfb6eeb4SYOSHIFUJI Hideaki static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) 5991da177e4SLinus Torvalds { 600cf533ea5SEric Dumazet const struct tcphdr *th = tcp_hdr(skb); 601cfb6eeb4SYOSHIFUJI Hideaki struct { 602cfb6eeb4SYOSHIFUJI Hideaki struct tcphdr th; 603cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 604714e85beSAl Viro __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)]; 605cfb6eeb4SYOSHIFUJI Hideaki #endif 606cfb6eeb4SYOSHIFUJI Hideaki } rep; 6071da177e4SLinus Torvalds struct ip_reply_arg arg; 608cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 609cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *key; 610658ddaafSShawn Lu const __u8 *hash_location = NULL; 611658ddaafSShawn Lu unsigned char newhash[16]; 612658ddaafSShawn Lu int genhash; 613658ddaafSShawn Lu struct sock *sk1 = NULL; 614cfb6eeb4SYOSHIFUJI Hideaki #endif 615a86b1e30SPavel Emelyanov struct net *net; 6161da177e4SLinus Torvalds 6171da177e4SLinus Torvalds /* Never send a reset in response to a reset. */ 6181da177e4SLinus Torvalds if (th->rst) 6191da177e4SLinus Torvalds return; 6201da177e4SLinus Torvalds 621511c3f92SEric Dumazet if (skb_rtable(skb)->rt_type != RTN_LOCAL) 6221da177e4SLinus Torvalds return; 6231da177e4SLinus Torvalds 6241da177e4SLinus Torvalds /* Swap the send and the receive. */ 625cfb6eeb4SYOSHIFUJI Hideaki memset(&rep, 0, sizeof(rep)); 626cfb6eeb4SYOSHIFUJI Hideaki rep.th.dest = th->source; 627cfb6eeb4SYOSHIFUJI Hideaki rep.th.source = th->dest; 628cfb6eeb4SYOSHIFUJI Hideaki rep.th.doff = sizeof(struct tcphdr) / 4; 629cfb6eeb4SYOSHIFUJI Hideaki rep.th.rst = 1; 6301da177e4SLinus Torvalds 6311da177e4SLinus Torvalds if (th->ack) { 632cfb6eeb4SYOSHIFUJI Hideaki rep.th.seq = th->ack_seq; 6331da177e4SLinus Torvalds } else { 634cfb6eeb4SYOSHIFUJI Hideaki rep.th.ack = 1; 635cfb6eeb4SYOSHIFUJI Hideaki rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin + 6361da177e4SLinus Torvalds skb->len - (th->doff << 2)); 6371da177e4SLinus Torvalds } 6381da177e4SLinus Torvalds 6397174259eSArnaldo Carvalho de Melo memset(&arg, 0, sizeof(arg)); 640cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_base = (unsigned char *)&rep; 641cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_len = sizeof(rep.th); 642cfb6eeb4SYOSHIFUJI Hideaki 643cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 644658ddaafSShawn Lu hash_location = tcp_parse_md5sig_option(th); 645658ddaafSShawn Lu if (!sk && hash_location) { 646658ddaafSShawn Lu /* 647658ddaafSShawn Lu * active side is lost. Try to find listening socket through 648658ddaafSShawn Lu * source port, and then find md5 key through listening socket. 649658ddaafSShawn Lu * we are not loose security here: 650658ddaafSShawn Lu * Incoming packet is checked with md5 hash with finding key, 651658ddaafSShawn Lu * no RST generated if md5 hash doesn't match. 652658ddaafSShawn Lu */ 653658ddaafSShawn Lu sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev), 654658ddaafSShawn Lu &tcp_hashinfo, ip_hdr(skb)->daddr, 655658ddaafSShawn Lu ntohs(th->source), inet_iif(skb)); 656658ddaafSShawn Lu /* don't send rst if it can't find key */ 657658ddaafSShawn Lu if (!sk1) 658658ddaafSShawn Lu return; 659658ddaafSShawn Lu rcu_read_lock(); 660658ddaafSShawn Lu key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *) 661658ddaafSShawn Lu &ip_hdr(skb)->saddr, AF_INET); 662658ddaafSShawn Lu if (!key) 663658ddaafSShawn Lu goto release_sk1; 664658ddaafSShawn Lu 665658ddaafSShawn Lu genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb); 666658ddaafSShawn Lu if (genhash || memcmp(hash_location, newhash, 16) != 0) 667658ddaafSShawn Lu goto release_sk1; 668658ddaafSShawn Lu } else { 669658ddaafSShawn Lu key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *) 670658ddaafSShawn Lu &ip_hdr(skb)->saddr, 671a915da9bSEric Dumazet AF_INET) : NULL; 672658ddaafSShawn Lu } 673658ddaafSShawn Lu 674cfb6eeb4SYOSHIFUJI Hideaki if (key) { 675cfb6eeb4SYOSHIFUJI Hideaki rep.opt[0] = htonl((TCPOPT_NOP << 24) | 676cfb6eeb4SYOSHIFUJI Hideaki (TCPOPT_NOP << 16) | 677cfb6eeb4SYOSHIFUJI Hideaki (TCPOPT_MD5SIG << 8) | 678cfb6eeb4SYOSHIFUJI Hideaki TCPOLEN_MD5SIG); 679cfb6eeb4SYOSHIFUJI Hideaki /* Update length and the length the header thinks exists */ 680cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; 681cfb6eeb4SYOSHIFUJI Hideaki rep.th.doff = arg.iov[0].iov_len / 4; 682cfb6eeb4SYOSHIFUJI Hideaki 68349a72dfbSAdam Langley tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1], 68478e645cbSIlpo Järvinen key, ip_hdr(skb)->saddr, 68578e645cbSIlpo Järvinen ip_hdr(skb)->daddr, &rep.th); 686cfb6eeb4SYOSHIFUJI Hideaki } 687cfb6eeb4SYOSHIFUJI Hideaki #endif 688eddc9ec5SArnaldo Carvalho de Melo arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, 689eddc9ec5SArnaldo Carvalho de Melo ip_hdr(skb)->saddr, /* XXX */ 69052cd5750SIlpo Järvinen arg.iov[0].iov_len, IPPROTO_TCP, 0); 6911da177e4SLinus Torvalds arg.csumoffset = offsetof(struct tcphdr, check) / 2; 69288ef4a5aSKOVACS Krisztian arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; 693e2446eaaSShawn Lu /* When socket is gone, all binding information is lost. 694e2446eaaSShawn Lu * routing might fail in this case. using iif for oif to 695e2446eaaSShawn Lu * make sure we can deliver it 696e2446eaaSShawn Lu */ 697e2446eaaSShawn Lu arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb); 6981da177e4SLinus Torvalds 699adf30907SEric Dumazet net = dev_net(skb_dst(skb)->dev); 70066b13d99SEric Dumazet arg.tos = ip_hdr(skb)->tos; 7010a5ebb80SDavid S. Miller ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, 7027feb49c8SDenis V. Lunev &arg, arg.iov[0].iov_len); 7031da177e4SLinus Torvalds 70463231bddSPavel Emelyanov TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 70563231bddSPavel Emelyanov TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); 706658ddaafSShawn Lu 707658ddaafSShawn Lu #ifdef CONFIG_TCP_MD5SIG 708658ddaafSShawn Lu release_sk1: 709658ddaafSShawn Lu if (sk1) { 710658ddaafSShawn Lu rcu_read_unlock(); 711658ddaafSShawn Lu sock_put(sk1); 712658ddaafSShawn Lu } 713658ddaafSShawn Lu #endif 7141da177e4SLinus Torvalds } 7151da177e4SLinus Torvalds 7161da177e4SLinus Torvalds /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states 7171da177e4SLinus Torvalds outside socket context is ugly, certainly. What can I do? 7181da177e4SLinus Torvalds */ 7191da177e4SLinus Torvalds 7209501f972SYOSHIFUJI Hideaki static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, 7219501f972SYOSHIFUJI Hideaki u32 win, u32 ts, int oif, 72288ef4a5aSKOVACS Krisztian struct tcp_md5sig_key *key, 72366b13d99SEric Dumazet int reply_flags, u8 tos) 7241da177e4SLinus Torvalds { 725cf533ea5SEric Dumazet const struct tcphdr *th = tcp_hdr(skb); 7261da177e4SLinus Torvalds struct { 7271da177e4SLinus Torvalds struct tcphdr th; 728714e85beSAl Viro __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2) 729cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 730cfb6eeb4SYOSHIFUJI Hideaki + (TCPOLEN_MD5SIG_ALIGNED >> 2) 731cfb6eeb4SYOSHIFUJI Hideaki #endif 732cfb6eeb4SYOSHIFUJI Hideaki ]; 7331da177e4SLinus Torvalds } rep; 7341da177e4SLinus Torvalds struct ip_reply_arg arg; 735adf30907SEric Dumazet struct net *net = dev_net(skb_dst(skb)->dev); 7361da177e4SLinus Torvalds 7371da177e4SLinus Torvalds memset(&rep.th, 0, sizeof(struct tcphdr)); 7387174259eSArnaldo Carvalho de Melo memset(&arg, 0, sizeof(arg)); 7391da177e4SLinus Torvalds 7401da177e4SLinus Torvalds arg.iov[0].iov_base = (unsigned char *)&rep; 7411da177e4SLinus Torvalds arg.iov[0].iov_len = sizeof(rep.th); 7421da177e4SLinus Torvalds if (ts) { 743cfb6eeb4SYOSHIFUJI Hideaki rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 7441da177e4SLinus Torvalds (TCPOPT_TIMESTAMP << 8) | 7451da177e4SLinus Torvalds TCPOLEN_TIMESTAMP); 746cfb6eeb4SYOSHIFUJI Hideaki rep.opt[1] = htonl(tcp_time_stamp); 747cfb6eeb4SYOSHIFUJI Hideaki rep.opt[2] = htonl(ts); 748cb48cfe8SCraig Schlenter arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED; 7491da177e4SLinus Torvalds } 7501da177e4SLinus Torvalds 7511da177e4SLinus Torvalds /* Swap the send and the receive. */ 7521da177e4SLinus Torvalds rep.th.dest = th->source; 7531da177e4SLinus Torvalds rep.th.source = th->dest; 7541da177e4SLinus Torvalds rep.th.doff = arg.iov[0].iov_len / 4; 7551da177e4SLinus Torvalds rep.th.seq = htonl(seq); 7561da177e4SLinus Torvalds rep.th.ack_seq = htonl(ack); 7571da177e4SLinus Torvalds rep.th.ack = 1; 7581da177e4SLinus Torvalds rep.th.window = htons(win); 7591da177e4SLinus Torvalds 760cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 761cfb6eeb4SYOSHIFUJI Hideaki if (key) { 762cfb6eeb4SYOSHIFUJI Hideaki int offset = (ts) ? 3 : 0; 763cfb6eeb4SYOSHIFUJI Hideaki 764cfb6eeb4SYOSHIFUJI Hideaki rep.opt[offset++] = htonl((TCPOPT_NOP << 24) | 765cfb6eeb4SYOSHIFUJI Hideaki (TCPOPT_NOP << 16) | 766cfb6eeb4SYOSHIFUJI Hideaki (TCPOPT_MD5SIG << 8) | 767cfb6eeb4SYOSHIFUJI Hideaki TCPOLEN_MD5SIG); 768cfb6eeb4SYOSHIFUJI Hideaki arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; 769cfb6eeb4SYOSHIFUJI Hideaki rep.th.doff = arg.iov[0].iov_len/4; 770cfb6eeb4SYOSHIFUJI Hideaki 77149a72dfbSAdam Langley tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset], 77290b7e112SAdam Langley key, ip_hdr(skb)->saddr, 77390b7e112SAdam Langley ip_hdr(skb)->daddr, &rep.th); 774cfb6eeb4SYOSHIFUJI Hideaki } 775cfb6eeb4SYOSHIFUJI Hideaki #endif 77688ef4a5aSKOVACS Krisztian arg.flags = reply_flags; 777eddc9ec5SArnaldo Carvalho de Melo arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, 778eddc9ec5SArnaldo Carvalho de Melo ip_hdr(skb)->saddr, /* XXX */ 7791da177e4SLinus Torvalds arg.iov[0].iov_len, IPPROTO_TCP, 0); 7801da177e4SLinus Torvalds arg.csumoffset = offsetof(struct tcphdr, check) / 2; 7819501f972SYOSHIFUJI Hideaki if (oif) 7829501f972SYOSHIFUJI Hideaki arg.bound_dev_if = oif; 78366b13d99SEric Dumazet arg.tos = tos; 7840a5ebb80SDavid S. Miller ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, 7857feb49c8SDenis V. Lunev &arg, arg.iov[0].iov_len); 7861da177e4SLinus Torvalds 78763231bddSPavel Emelyanov TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 7881da177e4SLinus Torvalds } 7891da177e4SLinus Torvalds 7901da177e4SLinus Torvalds static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) 7911da177e4SLinus Torvalds { 7928feaf0c0SArnaldo Carvalho de Melo struct inet_timewait_sock *tw = inet_twsk(sk); 793cfb6eeb4SYOSHIFUJI Hideaki struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 7941da177e4SLinus Torvalds 7959501f972SYOSHIFUJI Hideaki tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 7967174259eSArnaldo Carvalho de Melo tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 7979501f972SYOSHIFUJI Hideaki tcptw->tw_ts_recent, 7989501f972SYOSHIFUJI Hideaki tw->tw_bound_dev_if, 79988ef4a5aSKOVACS Krisztian tcp_twsk_md5_key(tcptw), 80066b13d99SEric Dumazet tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0, 80166b13d99SEric Dumazet tw->tw_tos 8029501f972SYOSHIFUJI Hideaki ); 8031da177e4SLinus Torvalds 8048feaf0c0SArnaldo Carvalho de Melo inet_twsk_put(tw); 8051da177e4SLinus Torvalds } 8061da177e4SLinus Torvalds 8076edafaafSGui Jianfeng static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, 8087174259eSArnaldo Carvalho de Melo struct request_sock *req) 8091da177e4SLinus Torvalds { 8109501f972SYOSHIFUJI Hideaki tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1, 811cfb6eeb4SYOSHIFUJI Hideaki tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, 8129501f972SYOSHIFUJI Hideaki req->ts_recent, 8139501f972SYOSHIFUJI Hideaki 0, 814a915da9bSEric Dumazet tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, 815a915da9bSEric Dumazet AF_INET), 81666b13d99SEric Dumazet inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, 81766b13d99SEric Dumazet ip_hdr(skb)->tos); 8181da177e4SLinus Torvalds } 8191da177e4SLinus Torvalds 8201da177e4SLinus Torvalds /* 8219bf1d83eSKris Katterjohn * Send a SYN-ACK after having received a SYN. 82260236fddSArnaldo Carvalho de Melo * This still operates on a request_sock only, not on a big 8231da177e4SLinus Torvalds * socket. 8241da177e4SLinus Torvalds */ 82572659eccSOctavian Purdila static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, 826e6b4d113SWilliam Allen Simpson struct request_sock *req, 827fff32699SEric Dumazet struct request_values *rvp, 828*7586ecebSEric Dumazet u16 queue_mapping, 829*7586ecebSEric Dumazet bool nocache) 8301da177e4SLinus Torvalds { 8312e6599cbSArnaldo Carvalho de Melo const struct inet_request_sock *ireq = inet_rsk(req); 8326bd023f3SDavid S. Miller struct flowi4 fl4; 8331da177e4SLinus Torvalds int err = -1; 8341da177e4SLinus Torvalds struct sk_buff * skb; 8351da177e4SLinus Torvalds 8361da177e4SLinus Torvalds /* First, grab a route. */ 837*7586ecebSEric Dumazet if (!dst && (dst = inet_csk_route_req(sk, &fl4, req, nocache)) == NULL) 838fd80eb94SDenis V. Lunev return -1; 8391da177e4SLinus Torvalds 840e6b4d113SWilliam Allen Simpson skb = tcp_make_synack(sk, dst, req, rvp); 8411da177e4SLinus Torvalds 8421da177e4SLinus Torvalds if (skb) { 843419f9f89SHerbert Xu __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr); 8441da177e4SLinus Torvalds 845fff32699SEric Dumazet skb_set_queue_mapping(skb, queue_mapping); 8462e6599cbSArnaldo Carvalho de Melo err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, 8472e6599cbSArnaldo Carvalho de Melo ireq->rmt_addr, 8482e6599cbSArnaldo Carvalho de Melo ireq->opt); 849b9df3cb8SGerrit Renker err = net_xmit_eval(err); 8501da177e4SLinus Torvalds } 8511da177e4SLinus Torvalds 8521da177e4SLinus Torvalds return err; 8531da177e4SLinus Torvalds } 8541da177e4SLinus Torvalds 85572659eccSOctavian Purdila static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req, 856e6b4d113SWilliam Allen Simpson struct request_values *rvp) 857fd80eb94SDenis V. Lunev { 85872659eccSOctavian Purdila TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); 859*7586ecebSEric Dumazet return tcp_v4_send_synack(sk, NULL, req, rvp, 0, false); 860fd80eb94SDenis V. Lunev } 861fd80eb94SDenis V. Lunev 8621da177e4SLinus Torvalds /* 86360236fddSArnaldo Carvalho de Melo * IPv4 request_sock destructor. 8641da177e4SLinus Torvalds */ 86560236fddSArnaldo Carvalho de Melo static void tcp_v4_reqsk_destructor(struct request_sock *req) 8661da177e4SLinus Torvalds { 8672e6599cbSArnaldo Carvalho de Melo kfree(inet_rsk(req)->opt); 8681da177e4SLinus Torvalds } 8691da177e4SLinus Torvalds 870946cedccSEric Dumazet /* 871a2a385d6SEric Dumazet * Return true if a syncookie should be sent 872946cedccSEric Dumazet */ 873a2a385d6SEric Dumazet bool tcp_syn_flood_action(struct sock *sk, 874946cedccSEric Dumazet const struct sk_buff *skb, 875946cedccSEric Dumazet const char *proto) 8761da177e4SLinus Torvalds { 877946cedccSEric Dumazet const char *msg = "Dropping request"; 878a2a385d6SEric Dumazet bool want_cookie = false; 879946cedccSEric Dumazet struct listen_sock *lopt; 880946cedccSEric Dumazet 881946cedccSEric Dumazet 8821da177e4SLinus Torvalds 8832a1d4bd4SFlorian Westphal #ifdef CONFIG_SYN_COOKIES 884946cedccSEric Dumazet if (sysctl_tcp_syncookies) { 8852a1d4bd4SFlorian Westphal msg = "Sending cookies"; 886a2a385d6SEric Dumazet want_cookie = true; 887946cedccSEric Dumazet NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES); 888946cedccSEric Dumazet } else 88980e40daaSArnaldo Carvalho de Melo #endif 890946cedccSEric Dumazet NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); 8912a1d4bd4SFlorian Westphal 892946cedccSEric Dumazet lopt = inet_csk(sk)->icsk_accept_queue.listen_opt; 893946cedccSEric Dumazet if (!lopt->synflood_warned) { 894946cedccSEric Dumazet lopt->synflood_warned = 1; 895afd46503SJoe Perches pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", 896946cedccSEric Dumazet proto, ntohs(tcp_hdr(skb)->dest), msg); 8972a1d4bd4SFlorian Westphal } 898946cedccSEric Dumazet return want_cookie; 899946cedccSEric Dumazet } 900946cedccSEric Dumazet EXPORT_SYMBOL(tcp_syn_flood_action); 9011da177e4SLinus Torvalds 9021da177e4SLinus Torvalds /* 90360236fddSArnaldo Carvalho de Melo * Save and compile IPv4 options into the request_sock if needed. 9041da177e4SLinus Torvalds */ 905f6d8bd05SEric Dumazet static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk, 9061da177e4SLinus Torvalds struct sk_buff *skb) 9071da177e4SLinus Torvalds { 908f6d8bd05SEric Dumazet const struct ip_options *opt = &(IPCB(skb)->opt); 909f6d8bd05SEric Dumazet struct ip_options_rcu *dopt = NULL; 9101da177e4SLinus Torvalds 9111da177e4SLinus Torvalds if (opt && opt->optlen) { 912f6d8bd05SEric Dumazet int opt_size = sizeof(*dopt) + opt->optlen; 913f6d8bd05SEric Dumazet 9141da177e4SLinus Torvalds dopt = kmalloc(opt_size, GFP_ATOMIC); 9151da177e4SLinus Torvalds if (dopt) { 916f6d8bd05SEric Dumazet if (ip_options_echo(&dopt->opt, skb)) { 9171da177e4SLinus Torvalds kfree(dopt); 9181da177e4SLinus Torvalds dopt = NULL; 9191da177e4SLinus Torvalds } 9201da177e4SLinus Torvalds } 9211da177e4SLinus Torvalds } 9221da177e4SLinus Torvalds return dopt; 9231da177e4SLinus Torvalds } 9241da177e4SLinus Torvalds 925cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 926cfb6eeb4SYOSHIFUJI Hideaki /* 927cfb6eeb4SYOSHIFUJI Hideaki * RFC2385 MD5 checksumming requires a mapping of 928cfb6eeb4SYOSHIFUJI Hideaki * IP address->MD5 Key. 929cfb6eeb4SYOSHIFUJI Hideaki * We need to maintain these in the sk structure. 930cfb6eeb4SYOSHIFUJI Hideaki */ 931cfb6eeb4SYOSHIFUJI Hideaki 932cfb6eeb4SYOSHIFUJI Hideaki /* Find the Key structure for an address. */ 933a915da9bSEric Dumazet struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, 934a915da9bSEric Dumazet const union tcp_md5_addr *addr, 935a915da9bSEric Dumazet int family) 936cfb6eeb4SYOSHIFUJI Hideaki { 937cfb6eeb4SYOSHIFUJI Hideaki struct tcp_sock *tp = tcp_sk(sk); 938a915da9bSEric Dumazet struct tcp_md5sig_key *key; 939a915da9bSEric Dumazet struct hlist_node *pos; 940a915da9bSEric Dumazet unsigned int size = sizeof(struct in_addr); 941a8afca03SEric Dumazet struct tcp_md5sig_info *md5sig; 942cfb6eeb4SYOSHIFUJI Hideaki 943a8afca03SEric Dumazet /* caller either holds rcu_read_lock() or socket lock */ 944a8afca03SEric Dumazet md5sig = rcu_dereference_check(tp->md5sig_info, 945b4fb05eaSEric Dumazet sock_owned_by_user(sk) || 946b4fb05eaSEric Dumazet lockdep_is_held(&sk->sk_lock.slock)); 947a8afca03SEric Dumazet if (!md5sig) 948cfb6eeb4SYOSHIFUJI Hideaki return NULL; 949a915da9bSEric Dumazet #if IS_ENABLED(CONFIG_IPV6) 950a915da9bSEric Dumazet if (family == AF_INET6) 951a915da9bSEric Dumazet size = sizeof(struct in6_addr); 952a915da9bSEric Dumazet #endif 953a8afca03SEric Dumazet hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) { 954a915da9bSEric Dumazet if (key->family != family) 955a915da9bSEric Dumazet continue; 956a915da9bSEric Dumazet if (!memcmp(&key->addr, addr, size)) 957a915da9bSEric Dumazet return key; 958cfb6eeb4SYOSHIFUJI Hideaki } 959cfb6eeb4SYOSHIFUJI Hideaki return NULL; 960cfb6eeb4SYOSHIFUJI Hideaki } 961a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_lookup); 962cfb6eeb4SYOSHIFUJI Hideaki 963cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, 964cfb6eeb4SYOSHIFUJI Hideaki struct sock *addr_sk) 965cfb6eeb4SYOSHIFUJI Hideaki { 966a915da9bSEric Dumazet union tcp_md5_addr *addr; 967a915da9bSEric Dumazet 968a915da9bSEric Dumazet addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr; 969a915da9bSEric Dumazet return tcp_md5_do_lookup(sk, addr, AF_INET); 970cfb6eeb4SYOSHIFUJI Hideaki } 971cfb6eeb4SYOSHIFUJI Hideaki EXPORT_SYMBOL(tcp_v4_md5_lookup); 972cfb6eeb4SYOSHIFUJI Hideaki 973f5b99bcdSAdrian Bunk static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk, 974cfb6eeb4SYOSHIFUJI Hideaki struct request_sock *req) 975cfb6eeb4SYOSHIFUJI Hideaki { 976a915da9bSEric Dumazet union tcp_md5_addr *addr; 977a915da9bSEric Dumazet 978a915da9bSEric Dumazet addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr; 979a915da9bSEric Dumazet return tcp_md5_do_lookup(sk, addr, AF_INET); 980cfb6eeb4SYOSHIFUJI Hideaki } 981cfb6eeb4SYOSHIFUJI Hideaki 982cfb6eeb4SYOSHIFUJI Hideaki /* This can be called on a newly created socket, from other files */ 983a915da9bSEric Dumazet int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, 984a915da9bSEric Dumazet int family, const u8 *newkey, u8 newkeylen, gfp_t gfp) 985cfb6eeb4SYOSHIFUJI Hideaki { 986cfb6eeb4SYOSHIFUJI Hideaki /* Add Key to the list */ 987b0a713e9SMatthias M. Dellweg struct tcp_md5sig_key *key; 988cfb6eeb4SYOSHIFUJI Hideaki struct tcp_sock *tp = tcp_sk(sk); 989f6685938SArnaldo Carvalho de Melo struct tcp_md5sig_info *md5sig; 990f6685938SArnaldo Carvalho de Melo 991a915da9bSEric Dumazet key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET); 992a915da9bSEric Dumazet if (key) { 993a915da9bSEric Dumazet /* Pre-existing entry - just update that one. */ 994a915da9bSEric Dumazet memcpy(key->key, newkey, newkeylen); 995a915da9bSEric Dumazet key->keylen = newkeylen; 996a915da9bSEric Dumazet return 0; 997cfb6eeb4SYOSHIFUJI Hideaki } 998260fcbebSYan, Zheng 999a8afca03SEric Dumazet md5sig = rcu_dereference_protected(tp->md5sig_info, 1000a8afca03SEric Dumazet sock_owned_by_user(sk)); 1001a915da9bSEric Dumazet if (!md5sig) { 1002a915da9bSEric Dumazet md5sig = kmalloc(sizeof(*md5sig), gfp); 1003a915da9bSEric Dumazet if (!md5sig) 1004a915da9bSEric Dumazet return -ENOMEM; 1005a915da9bSEric Dumazet 1006a915da9bSEric Dumazet sk_nocaps_add(sk, NETIF_F_GSO_MASK); 1007a915da9bSEric Dumazet INIT_HLIST_HEAD(&md5sig->head); 1008a8afca03SEric Dumazet rcu_assign_pointer(tp->md5sig_info, md5sig); 1009a915da9bSEric Dumazet } 1010a915da9bSEric Dumazet 10115f3d9cb2SEric Dumazet key = sock_kmalloc(sk, sizeof(*key), gfp); 1012a915da9bSEric Dumazet if (!key) 1013a915da9bSEric Dumazet return -ENOMEM; 1014a915da9bSEric Dumazet if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) { 10155f3d9cb2SEric Dumazet sock_kfree_s(sk, key, sizeof(*key)); 1016cfb6eeb4SYOSHIFUJI Hideaki return -ENOMEM; 1017cfb6eeb4SYOSHIFUJI Hideaki } 1018f6685938SArnaldo Carvalho de Melo 1019a915da9bSEric Dumazet memcpy(key->key, newkey, newkeylen); 1020a915da9bSEric Dumazet key->keylen = newkeylen; 1021a915da9bSEric Dumazet key->family = family; 1022a915da9bSEric Dumazet memcpy(&key->addr, addr, 1023a915da9bSEric Dumazet (family == AF_INET6) ? sizeof(struct in6_addr) : 1024a915da9bSEric Dumazet sizeof(struct in_addr)); 1025a915da9bSEric Dumazet hlist_add_head_rcu(&key->node, &md5sig->head); 1026cfb6eeb4SYOSHIFUJI Hideaki return 0; 1027cfb6eeb4SYOSHIFUJI Hideaki } 1028a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_add); 1029cfb6eeb4SYOSHIFUJI Hideaki 1030a915da9bSEric Dumazet int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) 1031cfb6eeb4SYOSHIFUJI Hideaki { 1032cfb6eeb4SYOSHIFUJI Hideaki struct tcp_sock *tp = tcp_sk(sk); 1033a915da9bSEric Dumazet struct tcp_md5sig_key *key; 1034a8afca03SEric Dumazet struct tcp_md5sig_info *md5sig; 1035cfb6eeb4SYOSHIFUJI Hideaki 1036a915da9bSEric Dumazet key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET); 1037a915da9bSEric Dumazet if (!key) 1038cfb6eeb4SYOSHIFUJI Hideaki return -ENOENT; 1039a915da9bSEric Dumazet hlist_del_rcu(&key->node); 10405f3d9cb2SEric Dumazet atomic_sub(sizeof(*key), &sk->sk_omem_alloc); 1041a915da9bSEric Dumazet kfree_rcu(key, rcu); 1042a8afca03SEric Dumazet md5sig = rcu_dereference_protected(tp->md5sig_info, 1043a8afca03SEric Dumazet sock_owned_by_user(sk)); 1044a8afca03SEric Dumazet if (hlist_empty(&md5sig->head)) 1045a915da9bSEric Dumazet tcp_free_md5sig_pool(); 1046a915da9bSEric Dumazet return 0; 1047cfb6eeb4SYOSHIFUJI Hideaki } 1048a915da9bSEric Dumazet EXPORT_SYMBOL(tcp_md5_do_del); 1049cfb6eeb4SYOSHIFUJI Hideaki 1050a915da9bSEric Dumazet void tcp_clear_md5_list(struct sock *sk) 1051cfb6eeb4SYOSHIFUJI Hideaki { 1052cfb6eeb4SYOSHIFUJI Hideaki struct tcp_sock *tp = tcp_sk(sk); 1053a915da9bSEric Dumazet struct tcp_md5sig_key *key; 1054a915da9bSEric Dumazet struct hlist_node *pos, *n; 1055a8afca03SEric Dumazet struct tcp_md5sig_info *md5sig; 1056cfb6eeb4SYOSHIFUJI Hideaki 1057a8afca03SEric Dumazet md5sig = rcu_dereference_protected(tp->md5sig_info, 1); 1058a8afca03SEric Dumazet 1059a8afca03SEric Dumazet if (!hlist_empty(&md5sig->head)) 1060cfb6eeb4SYOSHIFUJI Hideaki tcp_free_md5sig_pool(); 1061a8afca03SEric Dumazet hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) { 1062a915da9bSEric Dumazet hlist_del_rcu(&key->node); 10635f3d9cb2SEric Dumazet atomic_sub(sizeof(*key), &sk->sk_omem_alloc); 1064a915da9bSEric Dumazet kfree_rcu(key, rcu); 1065cfb6eeb4SYOSHIFUJI Hideaki } 1066cfb6eeb4SYOSHIFUJI Hideaki } 1067cfb6eeb4SYOSHIFUJI Hideaki 1068cfb6eeb4SYOSHIFUJI Hideaki static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, 1069cfb6eeb4SYOSHIFUJI Hideaki int optlen) 1070cfb6eeb4SYOSHIFUJI Hideaki { 1071cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig cmd; 1072cfb6eeb4SYOSHIFUJI Hideaki struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr; 1073cfb6eeb4SYOSHIFUJI Hideaki 1074cfb6eeb4SYOSHIFUJI Hideaki if (optlen < sizeof(cmd)) 1075cfb6eeb4SYOSHIFUJI Hideaki return -EINVAL; 1076cfb6eeb4SYOSHIFUJI Hideaki 1077cfb6eeb4SYOSHIFUJI Hideaki if (copy_from_user(&cmd, optval, sizeof(cmd))) 1078cfb6eeb4SYOSHIFUJI Hideaki return -EFAULT; 1079cfb6eeb4SYOSHIFUJI Hideaki 1080cfb6eeb4SYOSHIFUJI Hideaki if (sin->sin_family != AF_INET) 1081cfb6eeb4SYOSHIFUJI Hideaki return -EINVAL; 1082cfb6eeb4SYOSHIFUJI Hideaki 1083a8afca03SEric Dumazet if (!cmd.tcpm_key || !cmd.tcpm_keylen) 1084a915da9bSEric Dumazet return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, 1085a915da9bSEric Dumazet AF_INET); 1086cfb6eeb4SYOSHIFUJI Hideaki 1087cfb6eeb4SYOSHIFUJI Hideaki if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 1088cfb6eeb4SYOSHIFUJI Hideaki return -EINVAL; 1089cfb6eeb4SYOSHIFUJI Hideaki 1090a915da9bSEric Dumazet return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, 1091a915da9bSEric Dumazet AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, 1092a915da9bSEric Dumazet GFP_KERNEL); 1093cfb6eeb4SYOSHIFUJI Hideaki } 1094cfb6eeb4SYOSHIFUJI Hideaki 109549a72dfbSAdam Langley static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, 109649a72dfbSAdam Langley __be32 daddr, __be32 saddr, int nbytes) 1097cfb6eeb4SYOSHIFUJI Hideaki { 1098cfb6eeb4SYOSHIFUJI Hideaki struct tcp4_pseudohdr *bp; 109949a72dfbSAdam Langley struct scatterlist sg; 1100cfb6eeb4SYOSHIFUJI Hideaki 1101cfb6eeb4SYOSHIFUJI Hideaki bp = &hp->md5_blk.ip4; 1102cfb6eeb4SYOSHIFUJI Hideaki 1103cfb6eeb4SYOSHIFUJI Hideaki /* 110449a72dfbSAdam Langley * 1. the TCP pseudo-header (in the order: source IP address, 1105cfb6eeb4SYOSHIFUJI Hideaki * destination IP address, zero-padded protocol number, and 1106cfb6eeb4SYOSHIFUJI Hideaki * segment length) 1107cfb6eeb4SYOSHIFUJI Hideaki */ 1108cfb6eeb4SYOSHIFUJI Hideaki bp->saddr = saddr; 1109cfb6eeb4SYOSHIFUJI Hideaki bp->daddr = daddr; 1110cfb6eeb4SYOSHIFUJI Hideaki bp->pad = 0; 1111076fb722SYOSHIFUJI Hideaki bp->protocol = IPPROTO_TCP; 111249a72dfbSAdam Langley bp->len = cpu_to_be16(nbytes); 1113c7da57a1SDavid S. Miller 111449a72dfbSAdam Langley sg_init_one(&sg, bp, sizeof(*bp)); 111549a72dfbSAdam Langley return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp)); 111649a72dfbSAdam Langley } 111749a72dfbSAdam Langley 1118a915da9bSEric Dumazet static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 1119318cf7aaSEric Dumazet __be32 daddr, __be32 saddr, const struct tcphdr *th) 112049a72dfbSAdam Langley { 112149a72dfbSAdam Langley struct tcp_md5sig_pool *hp; 112249a72dfbSAdam Langley struct hash_desc *desc; 112349a72dfbSAdam Langley 112449a72dfbSAdam Langley hp = tcp_get_md5sig_pool(); 112549a72dfbSAdam Langley if (!hp) 112649a72dfbSAdam Langley goto clear_hash_noput; 112749a72dfbSAdam Langley desc = &hp->md5_desc; 112849a72dfbSAdam Langley 112949a72dfbSAdam Langley if (crypto_hash_init(desc)) 113049a72dfbSAdam Langley goto clear_hash; 113149a72dfbSAdam Langley if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2)) 113249a72dfbSAdam Langley goto clear_hash; 113349a72dfbSAdam Langley if (tcp_md5_hash_header(hp, th)) 113449a72dfbSAdam Langley goto clear_hash; 113549a72dfbSAdam Langley if (tcp_md5_hash_key(hp, key)) 113649a72dfbSAdam Langley goto clear_hash; 113749a72dfbSAdam Langley if (crypto_hash_final(desc, md5_hash)) 1138cfb6eeb4SYOSHIFUJI Hideaki goto clear_hash; 1139cfb6eeb4SYOSHIFUJI Hideaki 1140cfb6eeb4SYOSHIFUJI Hideaki tcp_put_md5sig_pool(); 1141cfb6eeb4SYOSHIFUJI Hideaki return 0; 114249a72dfbSAdam Langley 1143cfb6eeb4SYOSHIFUJI Hideaki clear_hash: 1144cfb6eeb4SYOSHIFUJI Hideaki tcp_put_md5sig_pool(); 1145cfb6eeb4SYOSHIFUJI Hideaki clear_hash_noput: 1146cfb6eeb4SYOSHIFUJI Hideaki memset(md5_hash, 0, 16); 114749a72dfbSAdam Langley return 1; 1148cfb6eeb4SYOSHIFUJI Hideaki } 1149cfb6eeb4SYOSHIFUJI Hideaki 115049a72dfbSAdam Langley int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, 1151318cf7aaSEric Dumazet const struct sock *sk, const struct request_sock *req, 1152318cf7aaSEric Dumazet const struct sk_buff *skb) 1153cfb6eeb4SYOSHIFUJI Hideaki { 115449a72dfbSAdam Langley struct tcp_md5sig_pool *hp; 115549a72dfbSAdam Langley struct hash_desc *desc; 1156318cf7aaSEric Dumazet const struct tcphdr *th = tcp_hdr(skb); 1157cfb6eeb4SYOSHIFUJI Hideaki __be32 saddr, daddr; 1158cfb6eeb4SYOSHIFUJI Hideaki 1159cfb6eeb4SYOSHIFUJI Hideaki if (sk) { 1160c720c7e8SEric Dumazet saddr = inet_sk(sk)->inet_saddr; 1161c720c7e8SEric Dumazet daddr = inet_sk(sk)->inet_daddr; 116249a72dfbSAdam Langley } else if (req) { 116349a72dfbSAdam Langley saddr = inet_rsk(req)->loc_addr; 116449a72dfbSAdam Langley daddr = inet_rsk(req)->rmt_addr; 1165cfb6eeb4SYOSHIFUJI Hideaki } else { 116649a72dfbSAdam Langley const struct iphdr *iph = ip_hdr(skb); 116749a72dfbSAdam Langley saddr = iph->saddr; 116849a72dfbSAdam Langley daddr = iph->daddr; 1169cfb6eeb4SYOSHIFUJI Hideaki } 1170cfb6eeb4SYOSHIFUJI Hideaki 117149a72dfbSAdam Langley hp = tcp_get_md5sig_pool(); 117249a72dfbSAdam Langley if (!hp) 117349a72dfbSAdam Langley goto clear_hash_noput; 117449a72dfbSAdam Langley desc = &hp->md5_desc; 117549a72dfbSAdam Langley 117649a72dfbSAdam Langley if (crypto_hash_init(desc)) 117749a72dfbSAdam Langley goto clear_hash; 117849a72dfbSAdam Langley 117949a72dfbSAdam Langley if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len)) 118049a72dfbSAdam Langley goto clear_hash; 118149a72dfbSAdam Langley if (tcp_md5_hash_header(hp, th)) 118249a72dfbSAdam Langley goto clear_hash; 118349a72dfbSAdam Langley if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) 118449a72dfbSAdam Langley goto clear_hash; 118549a72dfbSAdam Langley if (tcp_md5_hash_key(hp, key)) 118649a72dfbSAdam Langley goto clear_hash; 118749a72dfbSAdam Langley if (crypto_hash_final(desc, md5_hash)) 118849a72dfbSAdam Langley goto clear_hash; 118949a72dfbSAdam Langley 119049a72dfbSAdam Langley tcp_put_md5sig_pool(); 119149a72dfbSAdam Langley return 0; 119249a72dfbSAdam Langley 119349a72dfbSAdam Langley clear_hash: 119449a72dfbSAdam Langley tcp_put_md5sig_pool(); 119549a72dfbSAdam Langley clear_hash_noput: 119649a72dfbSAdam Langley memset(md5_hash, 0, 16); 119749a72dfbSAdam Langley return 1; 119849a72dfbSAdam Langley } 119949a72dfbSAdam Langley EXPORT_SYMBOL(tcp_v4_md5_hash_skb); 1200cfb6eeb4SYOSHIFUJI Hideaki 1201a2a385d6SEric Dumazet static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) 1202cfb6eeb4SYOSHIFUJI Hideaki { 1203cfb6eeb4SYOSHIFUJI Hideaki /* 1204cfb6eeb4SYOSHIFUJI Hideaki * This gets called for each TCP segment that arrives 1205cfb6eeb4SYOSHIFUJI Hideaki * so we want to be efficient. 1206cfb6eeb4SYOSHIFUJI Hideaki * We have 3 drop cases: 1207cfb6eeb4SYOSHIFUJI Hideaki * o No MD5 hash and one expected. 1208cfb6eeb4SYOSHIFUJI Hideaki * o MD5 hash and we're not expecting one. 1209cfb6eeb4SYOSHIFUJI Hideaki * o MD5 hash and its wrong. 1210cfb6eeb4SYOSHIFUJI Hideaki */ 1211cf533ea5SEric Dumazet const __u8 *hash_location = NULL; 1212cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *hash_expected; 1213eddc9ec5SArnaldo Carvalho de Melo const struct iphdr *iph = ip_hdr(skb); 1214cf533ea5SEric Dumazet const struct tcphdr *th = tcp_hdr(skb); 1215cfb6eeb4SYOSHIFUJI Hideaki int genhash; 1216cfb6eeb4SYOSHIFUJI Hideaki unsigned char newhash[16]; 1217cfb6eeb4SYOSHIFUJI Hideaki 1218a915da9bSEric Dumazet hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr, 1219a915da9bSEric Dumazet AF_INET); 12207d5d5525SYOSHIFUJI Hideaki hash_location = tcp_parse_md5sig_option(th); 1221cfb6eeb4SYOSHIFUJI Hideaki 1222cfb6eeb4SYOSHIFUJI Hideaki /* We've parsed the options - do we have a hash? */ 1223cfb6eeb4SYOSHIFUJI Hideaki if (!hash_expected && !hash_location) 1224a2a385d6SEric Dumazet return false; 1225cfb6eeb4SYOSHIFUJI Hideaki 1226cfb6eeb4SYOSHIFUJI Hideaki if (hash_expected && !hash_location) { 1227785957d3SDavid S. Miller NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); 1228a2a385d6SEric Dumazet return true; 1229cfb6eeb4SYOSHIFUJI Hideaki } 1230cfb6eeb4SYOSHIFUJI Hideaki 1231cfb6eeb4SYOSHIFUJI Hideaki if (!hash_expected && hash_location) { 1232785957d3SDavid S. Miller NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); 1233a2a385d6SEric Dumazet return true; 1234cfb6eeb4SYOSHIFUJI Hideaki } 1235cfb6eeb4SYOSHIFUJI Hideaki 1236cfb6eeb4SYOSHIFUJI Hideaki /* Okay, so this is hash_expected and hash_location - 1237cfb6eeb4SYOSHIFUJI Hideaki * so we need to calculate the checksum. 1238cfb6eeb4SYOSHIFUJI Hideaki */ 123949a72dfbSAdam Langley genhash = tcp_v4_md5_hash_skb(newhash, 1240cfb6eeb4SYOSHIFUJI Hideaki hash_expected, 124149a72dfbSAdam Langley NULL, NULL, skb); 1242cfb6eeb4SYOSHIFUJI Hideaki 1243cfb6eeb4SYOSHIFUJI Hideaki if (genhash || memcmp(hash_location, newhash, 16) != 0) { 1244e87cc472SJoe Perches net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n", 1245673d57e7SHarvey Harrison &iph->saddr, ntohs(th->source), 1246673d57e7SHarvey Harrison &iph->daddr, ntohs(th->dest), 1247e87cc472SJoe Perches genhash ? " tcp_v4_calc_md5_hash failed" 1248e87cc472SJoe Perches : ""); 1249a2a385d6SEric Dumazet return true; 1250cfb6eeb4SYOSHIFUJI Hideaki } 1251a2a385d6SEric Dumazet return false; 1252cfb6eeb4SYOSHIFUJI Hideaki } 1253cfb6eeb4SYOSHIFUJI Hideaki 1254cfb6eeb4SYOSHIFUJI Hideaki #endif 1255cfb6eeb4SYOSHIFUJI Hideaki 125672a3effaSEric Dumazet struct request_sock_ops tcp_request_sock_ops __read_mostly = { 12571da177e4SLinus Torvalds .family = PF_INET, 12582e6599cbSArnaldo Carvalho de Melo .obj_size = sizeof(struct tcp_request_sock), 125972659eccSOctavian Purdila .rtx_syn_ack = tcp_v4_rtx_synack, 126060236fddSArnaldo Carvalho de Melo .send_ack = tcp_v4_reqsk_send_ack, 126160236fddSArnaldo Carvalho de Melo .destructor = tcp_v4_reqsk_destructor, 12621da177e4SLinus Torvalds .send_reset = tcp_v4_send_reset, 126372659eccSOctavian Purdila .syn_ack_timeout = tcp_syn_ack_timeout, 12641da177e4SLinus Torvalds }; 12651da177e4SLinus Torvalds 1266cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1267b2e4b3deSStephen Hemminger static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { 1268cfb6eeb4SYOSHIFUJI Hideaki .md5_lookup = tcp_v4_reqsk_md5_lookup, 1269e3afe7b7SJohn Dykstra .calc_md5_hash = tcp_v4_md5_hash_skb, 1270cfb6eeb4SYOSHIFUJI Hideaki }; 1271b6332e6cSAndrew Morton #endif 1272cfb6eeb4SYOSHIFUJI Hideaki 12731da177e4SLinus Torvalds int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 12741da177e4SLinus Torvalds { 12754957faadSWilliam Allen Simpson struct tcp_extend_values tmp_ext; 12761da177e4SLinus Torvalds struct tcp_options_received tmp_opt; 1277cf533ea5SEric Dumazet const u8 *hash_location; 127860236fddSArnaldo Carvalho de Melo struct request_sock *req; 1279e6b4d113SWilliam Allen Simpson struct inet_request_sock *ireq; 12804957faadSWilliam Allen Simpson struct tcp_sock *tp = tcp_sk(sk); 1281e6b4d113SWilliam Allen Simpson struct dst_entry *dst = NULL; 1282eddc9ec5SArnaldo Carvalho de Melo __be32 saddr = ip_hdr(skb)->saddr; 1283eddc9ec5SArnaldo Carvalho de Melo __be32 daddr = ip_hdr(skb)->daddr; 12841da177e4SLinus Torvalds __u32 isn = TCP_SKB_CB(skb)->when; 1285a2a385d6SEric Dumazet bool want_cookie = false; 12861da177e4SLinus Torvalds 12871da177e4SLinus Torvalds /* Never answer to SYNs send to broadcast or multicast */ 1288511c3f92SEric Dumazet if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) 12891da177e4SLinus Torvalds goto drop; 12901da177e4SLinus Torvalds 12911da177e4SLinus Torvalds /* TW buckets are converted to open requests without 12921da177e4SLinus Torvalds * limitations, they conserve resources and peer is 12931da177e4SLinus Torvalds * evidently real one. 12941da177e4SLinus Torvalds */ 1295463c84b9SArnaldo Carvalho de Melo if (inet_csk_reqsk_queue_is_full(sk) && !isn) { 1296946cedccSEric Dumazet want_cookie = tcp_syn_flood_action(sk, skb, "TCP"); 1297946cedccSEric Dumazet if (!want_cookie) 12981da177e4SLinus Torvalds goto drop; 12991da177e4SLinus Torvalds } 13001da177e4SLinus Torvalds 13011da177e4SLinus Torvalds /* Accept backlog is full. If we have already queued enough 13021da177e4SLinus Torvalds * of warm entries in syn queue, drop request. It is better than 13031da177e4SLinus Torvalds * clogging syn queue with openreqs with exponentially increasing 13041da177e4SLinus Torvalds * timeout. 13051da177e4SLinus Torvalds */ 1306463c84b9SArnaldo Carvalho de Melo if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) 13071da177e4SLinus Torvalds goto drop; 13081da177e4SLinus Torvalds 1309ce4a7d0dSArnaldo Carvalho de Melo req = inet_reqsk_alloc(&tcp_request_sock_ops); 13101da177e4SLinus Torvalds if (!req) 13111da177e4SLinus Torvalds goto drop; 13121da177e4SLinus Torvalds 1313cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1314cfb6eeb4SYOSHIFUJI Hideaki tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops; 1315cfb6eeb4SYOSHIFUJI Hideaki #endif 1316cfb6eeb4SYOSHIFUJI Hideaki 13171da177e4SLinus Torvalds tcp_clear_options(&tmp_opt); 1318bee7ca9eSWilliam Allen Simpson tmp_opt.mss_clamp = TCP_MSS_DEFAULT; 13194957faadSWilliam Allen Simpson tmp_opt.user_mss = tp->rx_opt.user_mss; 1320bb5b7c11SDavid S. Miller tcp_parse_options(skb, &tmp_opt, &hash_location, 0); 13211da177e4SLinus Torvalds 13224957faadSWilliam Allen Simpson if (tmp_opt.cookie_plus > 0 && 13234957faadSWilliam Allen Simpson tmp_opt.saw_tstamp && 13244957faadSWilliam Allen Simpson !tp->rx_opt.cookie_out_never && 13254957faadSWilliam Allen Simpson (sysctl_tcp_cookie_size > 0 || 13264957faadSWilliam Allen Simpson (tp->cookie_values != NULL && 13274957faadSWilliam Allen Simpson tp->cookie_values->cookie_desired > 0))) { 13284957faadSWilliam Allen Simpson u8 *c; 13294957faadSWilliam Allen Simpson u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS]; 13304957faadSWilliam Allen Simpson int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE; 13314957faadSWilliam Allen Simpson 13324957faadSWilliam Allen Simpson if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0) 13334957faadSWilliam Allen Simpson goto drop_and_release; 13344957faadSWilliam Allen Simpson 13354957faadSWilliam Allen Simpson /* Secret recipe starts with IP addresses */ 13360eae88f3SEric Dumazet *mess++ ^= (__force u32)daddr; 13370eae88f3SEric Dumazet *mess++ ^= (__force u32)saddr; 13384957faadSWilliam Allen Simpson 13394957faadSWilliam Allen Simpson /* plus variable length Initiator Cookie */ 13404957faadSWilliam Allen Simpson c = (u8 *)mess; 13414957faadSWilliam Allen Simpson while (l-- > 0) 13424957faadSWilliam Allen Simpson *c++ ^= *hash_location++; 13434957faadSWilliam Allen Simpson 1344a2a385d6SEric Dumazet want_cookie = false; /* not our kind of cookie */ 13454957faadSWilliam Allen Simpson tmp_ext.cookie_out_never = 0; /* false */ 13464957faadSWilliam Allen Simpson tmp_ext.cookie_plus = tmp_opt.cookie_plus; 13474957faadSWilliam Allen Simpson } else if (!tp->rx_opt.cookie_in_always) { 13484957faadSWilliam Allen Simpson /* redundant indications, but ensure initialization. */ 13494957faadSWilliam Allen Simpson tmp_ext.cookie_out_never = 1; /* true */ 13504957faadSWilliam Allen Simpson tmp_ext.cookie_plus = 0; 13514957faadSWilliam Allen Simpson } else { 13524957faadSWilliam Allen Simpson goto drop_and_release; 13534957faadSWilliam Allen Simpson } 13544957faadSWilliam Allen Simpson tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always; 13551da177e4SLinus Torvalds 13564dfc2817SFlorian Westphal if (want_cookie && !tmp_opt.saw_tstamp) 13571da177e4SLinus Torvalds tcp_clear_options(&tmp_opt); 13581da177e4SLinus Torvalds 13591da177e4SLinus Torvalds tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; 13601da177e4SLinus Torvalds tcp_openreq_init(req, &tmp_opt, skb); 13611da177e4SLinus Torvalds 1362bb5b7c11SDavid S. Miller ireq = inet_rsk(req); 1363bb5b7c11SDavid S. Miller ireq->loc_addr = daddr; 1364bb5b7c11SDavid S. Miller ireq->rmt_addr = saddr; 1365bb5b7c11SDavid S. Miller ireq->no_srccheck = inet_sk(sk)->transparent; 1366bb5b7c11SDavid S. Miller ireq->opt = tcp_v4_save_options(sk, skb); 1367bb5b7c11SDavid S. Miller 1368284904aaSPaul Moore if (security_inet_conn_request(sk, skb, req)) 1369bb5b7c11SDavid S. Miller goto drop_and_free; 1370284904aaSPaul Moore 1371172d69e6SFlorian Westphal if (!want_cookie || tmp_opt.tstamp_ok) 1372bd14b1b2SEric Dumazet TCP_ECN_create_request(req, skb); 13731da177e4SLinus Torvalds 13741da177e4SLinus Torvalds if (want_cookie) { 13751da177e4SLinus Torvalds isn = cookie_v4_init_sequence(sk, skb, &req->mss); 1376172d69e6SFlorian Westphal req->cookie_ts = tmp_opt.tstamp_ok; 13771da177e4SLinus Torvalds } else if (!isn) { 13781da177e4SLinus Torvalds struct inet_peer *peer = NULL; 13796bd023f3SDavid S. Miller struct flowi4 fl4; 13801da177e4SLinus Torvalds 13811da177e4SLinus Torvalds /* VJ's idea. We save last timestamp seen 13821da177e4SLinus Torvalds * from the destination in peer table, when entering 13831da177e4SLinus Torvalds * state TIME-WAIT, and check against it before 13841da177e4SLinus Torvalds * accepting new connection request. 13851da177e4SLinus Torvalds * 13861da177e4SLinus Torvalds * If "isn" is not zero, this request hit alive 13871da177e4SLinus Torvalds * timewait bucket, so that all the necessary checks 13881da177e4SLinus Torvalds * are made in the function processing timewait state. 13891da177e4SLinus Torvalds */ 13901da177e4SLinus Torvalds if (tmp_opt.saw_tstamp && 1391295ff7edSArnaldo Carvalho de Melo tcp_death_row.sysctl_tw_recycle && 1392*7586ecebSEric Dumazet (dst = inet_csk_route_req(sk, &fl4, req, want_cookie)) != NULL && 1393ed2361e6SDavid S. Miller fl4.daddr == saddr && 1394ed2361e6SDavid S. Miller (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) { 1395317fe0e6SEric Dumazet inet_peer_refcheck(peer); 13962c1409a0SEric Dumazet if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && 13971da177e4SLinus Torvalds (s32)(peer->tcp_ts - req->ts_recent) > 13981da177e4SLinus Torvalds TCP_PAWS_WINDOW) { 1399de0744afSPavel Emelyanov NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); 14007cd04fa7SDenis V. Lunev goto drop_and_release; 14011da177e4SLinus Torvalds } 14021da177e4SLinus Torvalds } 14031da177e4SLinus Torvalds /* Kill the following clause, if you dislike this way. */ 14041da177e4SLinus Torvalds else if (!sysctl_tcp_syncookies && 1405463c84b9SArnaldo Carvalho de Melo (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < 14061da177e4SLinus Torvalds (sysctl_max_syn_backlog >> 2)) && 14071da177e4SLinus Torvalds (!peer || !peer->tcp_ts_stamp) && 14081da177e4SLinus Torvalds (!dst || !dst_metric(dst, RTAX_RTT))) { 14091da177e4SLinus Torvalds /* Without syncookies last quarter of 14101da177e4SLinus Torvalds * backlog is filled with destinations, 14111da177e4SLinus Torvalds * proven to be alive. 14121da177e4SLinus Torvalds * It means that we continue to communicate 14131da177e4SLinus Torvalds * to destinations, already remembered 14141da177e4SLinus Torvalds * to the moment of synflood. 14151da177e4SLinus Torvalds */ 1416afd46503SJoe Perches LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"), 1417673d57e7SHarvey Harrison &saddr, ntohs(tcp_hdr(skb)->source)); 14187cd04fa7SDenis V. Lunev goto drop_and_release; 14191da177e4SLinus Torvalds } 14201da177e4SLinus Torvalds 1421a94f723dSGerrit Renker isn = tcp_v4_init_sequence(skb); 14221da177e4SLinus Torvalds } 14232e6599cbSArnaldo Carvalho de Melo tcp_rsk(req)->snt_isn = isn; 14249ad7c049SJerry Chu tcp_rsk(req)->snt_synack = tcp_time_stamp; 14251da177e4SLinus Torvalds 142672659eccSOctavian Purdila if (tcp_v4_send_synack(sk, dst, req, 1427fff32699SEric Dumazet (struct request_values *)&tmp_ext, 1428*7586ecebSEric Dumazet skb_get_queue_mapping(skb), 1429*7586ecebSEric Dumazet want_cookie) || 14304957faadSWilliam Allen Simpson want_cookie) 14311da177e4SLinus Torvalds goto drop_and_free; 14321da177e4SLinus Torvalds 14333f421baaSArnaldo Carvalho de Melo inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); 14341da177e4SLinus Torvalds return 0; 14351da177e4SLinus Torvalds 14367cd04fa7SDenis V. Lunev drop_and_release: 14377cd04fa7SDenis V. Lunev dst_release(dst); 14381da177e4SLinus Torvalds drop_and_free: 143960236fddSArnaldo Carvalho de Melo reqsk_free(req); 14401da177e4SLinus Torvalds drop: 14411da177e4SLinus Torvalds return 0; 14421da177e4SLinus Torvalds } 14434bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_conn_request); 14441da177e4SLinus Torvalds 14451da177e4SLinus Torvalds 14461da177e4SLinus Torvalds /* 14471da177e4SLinus Torvalds * The three way handshake has completed - we got a valid synack - 14481da177e4SLinus Torvalds * now create the new socket. 14491da177e4SLinus Torvalds */ 14501da177e4SLinus Torvalds struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, 145160236fddSArnaldo Carvalho de Melo struct request_sock *req, 14521da177e4SLinus Torvalds struct dst_entry *dst) 14531da177e4SLinus Torvalds { 14542e6599cbSArnaldo Carvalho de Melo struct inet_request_sock *ireq; 14551da177e4SLinus Torvalds struct inet_sock *newinet; 14561da177e4SLinus Torvalds struct tcp_sock *newtp; 14571da177e4SLinus Torvalds struct sock *newsk; 1458cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1459cfb6eeb4SYOSHIFUJI Hideaki struct tcp_md5sig_key *key; 1460cfb6eeb4SYOSHIFUJI Hideaki #endif 1461f6d8bd05SEric Dumazet struct ip_options_rcu *inet_opt; 14621da177e4SLinus Torvalds 14631da177e4SLinus Torvalds if (sk_acceptq_is_full(sk)) 14641da177e4SLinus Torvalds goto exit_overflow; 14651da177e4SLinus Torvalds 14661da177e4SLinus Torvalds newsk = tcp_create_openreq_child(sk, req, skb); 14671da177e4SLinus Torvalds if (!newsk) 1468093d2823SBalazs Scheidler goto exit_nonewsk; 14691da177e4SLinus Torvalds 1470bcd76111SHerbert Xu newsk->sk_gso_type = SKB_GSO_TCPV4; 14711da177e4SLinus Torvalds 14721da177e4SLinus Torvalds newtp = tcp_sk(newsk); 14731da177e4SLinus Torvalds newinet = inet_sk(newsk); 14742e6599cbSArnaldo Carvalho de Melo ireq = inet_rsk(req); 1475c720c7e8SEric Dumazet newinet->inet_daddr = ireq->rmt_addr; 1476c720c7e8SEric Dumazet newinet->inet_rcv_saddr = ireq->loc_addr; 1477c720c7e8SEric Dumazet newinet->inet_saddr = ireq->loc_addr; 1478f6d8bd05SEric Dumazet inet_opt = ireq->opt; 1479f6d8bd05SEric Dumazet rcu_assign_pointer(newinet->inet_opt, inet_opt); 14802e6599cbSArnaldo Carvalho de Melo ireq->opt = NULL; 1481463c84b9SArnaldo Carvalho de Melo newinet->mc_index = inet_iif(skb); 1482eddc9ec5SArnaldo Carvalho de Melo newinet->mc_ttl = ip_hdr(skb)->ttl; 14834c507d28SJiri Benc newinet->rcv_tos = ip_hdr(skb)->tos; 1484d83d8461SArnaldo Carvalho de Melo inet_csk(newsk)->icsk_ext_hdr_len = 0; 1485f6d8bd05SEric Dumazet if (inet_opt) 1486f6d8bd05SEric Dumazet inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; 1487c720c7e8SEric Dumazet newinet->inet_id = newtp->write_seq ^ jiffies; 14881da177e4SLinus Torvalds 1489dfd25fffSEric Dumazet if (!dst) { 1490dfd25fffSEric Dumazet dst = inet_csk_route_child_sock(sk, newsk, req); 1491dfd25fffSEric Dumazet if (!dst) 14920e734419SDavid S. Miller goto put_and_exit; 1493dfd25fffSEric Dumazet } else { 1494dfd25fffSEric Dumazet /* syncookie case : see end of cookie_v4_check() */ 1495dfd25fffSEric Dumazet } 14960e734419SDavid S. Miller sk_setup_caps(newsk, dst); 14970e734419SDavid S. Miller 14985d424d5aSJohn Heffner tcp_mtup_init(newsk); 14991da177e4SLinus Torvalds tcp_sync_mss(newsk, dst_mtu(dst)); 15000dbaee3bSDavid S. Miller newtp->advmss = dst_metric_advmss(dst); 1501f5fff5dcSTom Quetchenbach if (tcp_sk(sk)->rx_opt.user_mss && 1502f5fff5dcSTom Quetchenbach tcp_sk(sk)->rx_opt.user_mss < newtp->advmss) 1503f5fff5dcSTom Quetchenbach newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; 1504f5fff5dcSTom Quetchenbach 15051da177e4SLinus Torvalds tcp_initialize_rcv_mss(newsk); 15069ad7c049SJerry Chu if (tcp_rsk(req)->snt_synack) 15079ad7c049SJerry Chu tcp_valid_rtt_meas(newsk, 15089ad7c049SJerry Chu tcp_time_stamp - tcp_rsk(req)->snt_synack); 15099ad7c049SJerry Chu newtp->total_retrans = req->retrans; 15101da177e4SLinus Torvalds 1511cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1512cfb6eeb4SYOSHIFUJI Hideaki /* Copy over the MD5 key from the original socket */ 1513a915da9bSEric Dumazet key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr, 1514a915da9bSEric Dumazet AF_INET); 1515c720c7e8SEric Dumazet if (key != NULL) { 1516cfb6eeb4SYOSHIFUJI Hideaki /* 1517cfb6eeb4SYOSHIFUJI Hideaki * We're using one, so create a matching key 1518cfb6eeb4SYOSHIFUJI Hideaki * on the newsk structure. If we fail to get 1519cfb6eeb4SYOSHIFUJI Hideaki * memory, then we end up not copying the key 1520cfb6eeb4SYOSHIFUJI Hideaki * across. Shucks. 1521cfb6eeb4SYOSHIFUJI Hideaki */ 1522a915da9bSEric Dumazet tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr, 1523a915da9bSEric Dumazet AF_INET, key->key, key->keylen, GFP_ATOMIC); 1524a465419bSEric Dumazet sk_nocaps_add(newsk, NETIF_F_GSO_MASK); 1525cfb6eeb4SYOSHIFUJI Hideaki } 1526cfb6eeb4SYOSHIFUJI Hideaki #endif 1527cfb6eeb4SYOSHIFUJI Hideaki 15280e734419SDavid S. Miller if (__inet_inherit_port(sk, newsk) < 0) 15290e734419SDavid S. Miller goto put_and_exit; 15309327f705SEric Dumazet __inet_hash_nolisten(newsk, NULL); 15311da177e4SLinus Torvalds 15321da177e4SLinus Torvalds return newsk; 15331da177e4SLinus Torvalds 15341da177e4SLinus Torvalds exit_overflow: 1535de0744afSPavel Emelyanov NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1536093d2823SBalazs Scheidler exit_nonewsk: 1537093d2823SBalazs Scheidler dst_release(dst); 15381da177e4SLinus Torvalds exit: 1539de0744afSPavel Emelyanov NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 15401da177e4SLinus Torvalds return NULL; 15410e734419SDavid S. Miller put_and_exit: 1542709e8697SEric Dumazet tcp_clear_xmit_timers(newsk); 1543d8a6e65fSEric Dumazet tcp_cleanup_congestion_control(newsk); 1544918eb399SEric Dumazet bh_unlock_sock(newsk); 15450e734419SDavid S. Miller sock_put(newsk); 15460e734419SDavid S. Miller goto exit; 15471da177e4SLinus Torvalds } 15484bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_syn_recv_sock); 15491da177e4SLinus Torvalds 15501da177e4SLinus Torvalds static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) 15511da177e4SLinus Torvalds { 1552aa8223c7SArnaldo Carvalho de Melo struct tcphdr *th = tcp_hdr(skb); 1553eddc9ec5SArnaldo Carvalho de Melo const struct iphdr *iph = ip_hdr(skb); 15541da177e4SLinus Torvalds struct sock *nsk; 155560236fddSArnaldo Carvalho de Melo struct request_sock **prev; 15561da177e4SLinus Torvalds /* Find possible connection requests. */ 1557463c84b9SArnaldo Carvalho de Melo struct request_sock *req = inet_csk_search_req(sk, &prev, th->source, 15581da177e4SLinus Torvalds iph->saddr, iph->daddr); 15591da177e4SLinus Torvalds if (req) 15601da177e4SLinus Torvalds return tcp_check_req(sk, skb, req, prev); 15611da177e4SLinus Torvalds 15623b1e0a65SYOSHIFUJI Hideaki nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr, 1563c67499c0SPavel Emelyanov th->source, iph->daddr, th->dest, inet_iif(skb)); 15641da177e4SLinus Torvalds 15651da177e4SLinus Torvalds if (nsk) { 15661da177e4SLinus Torvalds if (nsk->sk_state != TCP_TIME_WAIT) { 15671da177e4SLinus Torvalds bh_lock_sock(nsk); 15681da177e4SLinus Torvalds return nsk; 15691da177e4SLinus Torvalds } 15709469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(nsk)); 15711da177e4SLinus Torvalds return NULL; 15721da177e4SLinus Torvalds } 15731da177e4SLinus Torvalds 15741da177e4SLinus Torvalds #ifdef CONFIG_SYN_COOKIES 1575af9b4738SFlorian Westphal if (!th->syn) 15761da177e4SLinus Torvalds sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt)); 15771da177e4SLinus Torvalds #endif 15781da177e4SLinus Torvalds return sk; 15791da177e4SLinus Torvalds } 15801da177e4SLinus Torvalds 1581b51655b9SAl Viro static __sum16 tcp_v4_checksum_init(struct sk_buff *skb) 15821da177e4SLinus Torvalds { 1583eddc9ec5SArnaldo Carvalho de Melo const struct iphdr *iph = ip_hdr(skb); 1584eddc9ec5SArnaldo Carvalho de Melo 158584fa7933SPatrick McHardy if (skb->ip_summed == CHECKSUM_COMPLETE) { 1586eddc9ec5SArnaldo Carvalho de Melo if (!tcp_v4_check(skb->len, iph->saddr, 1587eddc9ec5SArnaldo Carvalho de Melo iph->daddr, skb->csum)) { 15881da177e4SLinus Torvalds skb->ip_summed = CHECKSUM_UNNECESSARY; 1589fb286bb2SHerbert Xu return 0; 1590fb286bb2SHerbert Xu } 1591fb286bb2SHerbert Xu } 1592fb286bb2SHerbert Xu 1593eddc9ec5SArnaldo Carvalho de Melo skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, 1594fb286bb2SHerbert Xu skb->len, IPPROTO_TCP, 0); 1595fb286bb2SHerbert Xu 1596fb286bb2SHerbert Xu if (skb->len <= 76) { 1597fb286bb2SHerbert Xu return __skb_checksum_complete(skb); 15981da177e4SLinus Torvalds } 15991da177e4SLinus Torvalds return 0; 16001da177e4SLinus Torvalds } 16011da177e4SLinus Torvalds 16021da177e4SLinus Torvalds 16031da177e4SLinus Torvalds /* The socket must have it's spinlock held when we get 16041da177e4SLinus Torvalds * here. 16051da177e4SLinus Torvalds * 16061da177e4SLinus Torvalds * We have a potential double-lock case here, so even when 16071da177e4SLinus Torvalds * doing backlog processing we use the BH locking scheme. 16081da177e4SLinus Torvalds * This is because we cannot sleep with the original spinlock 16091da177e4SLinus Torvalds * held. 16101da177e4SLinus Torvalds */ 16111da177e4SLinus Torvalds int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) 16121da177e4SLinus Torvalds { 1613cfb6eeb4SYOSHIFUJI Hideaki struct sock *rsk; 1614cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1615cfb6eeb4SYOSHIFUJI Hideaki /* 1616cfb6eeb4SYOSHIFUJI Hideaki * We really want to reject the packet as early as possible 1617cfb6eeb4SYOSHIFUJI Hideaki * if: 1618cfb6eeb4SYOSHIFUJI Hideaki * o We're expecting an MD5'd packet and this is no MD5 tcp option 1619cfb6eeb4SYOSHIFUJI Hideaki * o There is an MD5 option and we're not expecting one 1620cfb6eeb4SYOSHIFUJI Hideaki */ 1621cfb6eeb4SYOSHIFUJI Hideaki if (tcp_v4_inbound_md5_hash(sk, skb)) 1622cfb6eeb4SYOSHIFUJI Hideaki goto discard; 1623cfb6eeb4SYOSHIFUJI Hideaki #endif 1624cfb6eeb4SYOSHIFUJI Hideaki 16251da177e4SLinus Torvalds if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1626bdeab991STom Herbert sock_rps_save_rxhash(sk, skb); 1627aa8223c7SArnaldo Carvalho de Melo if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { 1628cfb6eeb4SYOSHIFUJI Hideaki rsk = sk; 16291da177e4SLinus Torvalds goto reset; 1630cfb6eeb4SYOSHIFUJI Hideaki } 16311da177e4SLinus Torvalds return 0; 16321da177e4SLinus Torvalds } 16331da177e4SLinus Torvalds 1634ab6a5bb6SArnaldo Carvalho de Melo if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb)) 16351da177e4SLinus Torvalds goto csum_err; 16361da177e4SLinus Torvalds 16371da177e4SLinus Torvalds if (sk->sk_state == TCP_LISTEN) { 16381da177e4SLinus Torvalds struct sock *nsk = tcp_v4_hnd_req(sk, skb); 16391da177e4SLinus Torvalds if (!nsk) 16401da177e4SLinus Torvalds goto discard; 16411da177e4SLinus Torvalds 16421da177e4SLinus Torvalds if (nsk != sk) { 1643bdeab991STom Herbert sock_rps_save_rxhash(nsk, skb); 1644cfb6eeb4SYOSHIFUJI Hideaki if (tcp_child_process(sk, nsk, skb)) { 1645cfb6eeb4SYOSHIFUJI Hideaki rsk = nsk; 16461da177e4SLinus Torvalds goto reset; 1647cfb6eeb4SYOSHIFUJI Hideaki } 16481da177e4SLinus Torvalds return 0; 16491da177e4SLinus Torvalds } 1650ca55158cSEric Dumazet } else 1651bdeab991STom Herbert sock_rps_save_rxhash(sk, skb); 1652ca55158cSEric Dumazet 1653aa8223c7SArnaldo Carvalho de Melo if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { 1654cfb6eeb4SYOSHIFUJI Hideaki rsk = sk; 16551da177e4SLinus Torvalds goto reset; 1656cfb6eeb4SYOSHIFUJI Hideaki } 16571da177e4SLinus Torvalds return 0; 16581da177e4SLinus Torvalds 16591da177e4SLinus Torvalds reset: 1660cfb6eeb4SYOSHIFUJI Hideaki tcp_v4_send_reset(rsk, skb); 16611da177e4SLinus Torvalds discard: 16621da177e4SLinus Torvalds kfree_skb(skb); 16631da177e4SLinus Torvalds /* Be careful here. If this function gets more complicated and 16641da177e4SLinus Torvalds * gcc suffers from register pressure on the x86, sk (in %ebx) 16651da177e4SLinus Torvalds * might be destroyed here. This current version compiles correctly, 16661da177e4SLinus Torvalds * but you have been warned. 16671da177e4SLinus Torvalds */ 16681da177e4SLinus Torvalds return 0; 16691da177e4SLinus Torvalds 16701da177e4SLinus Torvalds csum_err: 167163231bddSPavel Emelyanov TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); 16721da177e4SLinus Torvalds goto discard; 16731da177e4SLinus Torvalds } 16744bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_v4_do_rcv); 16751da177e4SLinus Torvalds 167641063e9dSDavid S. Miller int tcp_v4_early_demux(struct sk_buff *skb) 167741063e9dSDavid S. Miller { 167841063e9dSDavid S. Miller struct net *net = dev_net(skb->dev); 167941063e9dSDavid S. Miller const struct iphdr *iph; 168041063e9dSDavid S. Miller const struct tcphdr *th; 1681fd62e09bSDavid S. Miller struct net_device *dev; 168241063e9dSDavid S. Miller struct sock *sk; 168341063e9dSDavid S. Miller int err; 168441063e9dSDavid S. Miller 168541063e9dSDavid S. Miller err = -ENOENT; 168641063e9dSDavid S. Miller if (skb->pkt_type != PACKET_HOST) 168741063e9dSDavid S. Miller goto out_err; 168841063e9dSDavid S. Miller 168941063e9dSDavid S. Miller if (!pskb_may_pull(skb, ip_hdrlen(skb) + sizeof(struct tcphdr))) 169041063e9dSDavid S. Miller goto out_err; 169141063e9dSDavid S. Miller 169241063e9dSDavid S. Miller iph = ip_hdr(skb); 169341063e9dSDavid S. Miller th = (struct tcphdr *) ((char *)iph + ip_hdrlen(skb)); 169441063e9dSDavid S. Miller 169541063e9dSDavid S. Miller if (th->doff < sizeof(struct tcphdr) / 4) 169641063e9dSDavid S. Miller goto out_err; 169741063e9dSDavid S. Miller 169841063e9dSDavid S. Miller if (!pskb_may_pull(skb, ip_hdrlen(skb) + th->doff * 4)) 169941063e9dSDavid S. Miller goto out_err; 170041063e9dSDavid S. Miller 1701fd62e09bSDavid S. Miller dev = skb->dev; 170241063e9dSDavid S. Miller sk = __inet_lookup_established(net, &tcp_hashinfo, 170341063e9dSDavid S. Miller iph->saddr, th->source, 170441063e9dSDavid S. Miller iph->daddr, th->dest, 1705fd62e09bSDavid S. Miller dev->ifindex); 170641063e9dSDavid S. Miller if (sk) { 170741063e9dSDavid S. Miller skb->sk = sk; 170841063e9dSDavid S. Miller skb->destructor = sock_edemux; 170941063e9dSDavid S. Miller if (sk->sk_state != TCP_TIME_WAIT) { 171041063e9dSDavid S. Miller struct dst_entry *dst = sk->sk_rx_dst; 171141063e9dSDavid S. Miller if (dst) 171241063e9dSDavid S. Miller dst = dst_check(dst, 0); 171341063e9dSDavid S. Miller if (dst) { 1714fd62e09bSDavid S. Miller struct rtable *rt = (struct rtable *) dst; 1715fd62e09bSDavid S. Miller 1716fd62e09bSDavid S. Miller if (rt->rt_iif == dev->ifindex) { 171741063e9dSDavid S. Miller skb_dst_set_noref(skb, dst); 171841063e9dSDavid S. Miller err = 0; 171941063e9dSDavid S. Miller } 172041063e9dSDavid S. Miller } 172141063e9dSDavid S. Miller } 1722fd62e09bSDavid S. Miller } 172341063e9dSDavid S. Miller 172441063e9dSDavid S. Miller out_err: 172541063e9dSDavid S. Miller return err; 172641063e9dSDavid S. Miller } 172741063e9dSDavid S. Miller 17281da177e4SLinus Torvalds /* 17291da177e4SLinus Torvalds * From tcp_input.c 17301da177e4SLinus Torvalds */ 17311da177e4SLinus Torvalds 17321da177e4SLinus Torvalds int tcp_v4_rcv(struct sk_buff *skb) 17331da177e4SLinus Torvalds { 1734eddc9ec5SArnaldo Carvalho de Melo const struct iphdr *iph; 1735cf533ea5SEric Dumazet const struct tcphdr *th; 17361da177e4SLinus Torvalds struct sock *sk; 17371da177e4SLinus Torvalds int ret; 1738a86b1e30SPavel Emelyanov struct net *net = dev_net(skb->dev); 17391da177e4SLinus Torvalds 17401da177e4SLinus Torvalds if (skb->pkt_type != PACKET_HOST) 17411da177e4SLinus Torvalds goto discard_it; 17421da177e4SLinus Torvalds 17431da177e4SLinus Torvalds /* Count it even if it's bad */ 174463231bddSPavel Emelyanov TCP_INC_STATS_BH(net, TCP_MIB_INSEGS); 17451da177e4SLinus Torvalds 17461da177e4SLinus Torvalds if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 17471da177e4SLinus Torvalds goto discard_it; 17481da177e4SLinus Torvalds 1749aa8223c7SArnaldo Carvalho de Melo th = tcp_hdr(skb); 17501da177e4SLinus Torvalds 17511da177e4SLinus Torvalds if (th->doff < sizeof(struct tcphdr) / 4) 17521da177e4SLinus Torvalds goto bad_packet; 17531da177e4SLinus Torvalds if (!pskb_may_pull(skb, th->doff * 4)) 17541da177e4SLinus Torvalds goto discard_it; 17551da177e4SLinus Torvalds 17561da177e4SLinus Torvalds /* An explanation is required here, I think. 17571da177e4SLinus Torvalds * Packet length and doff are validated by header prediction, 1758caa20d9aSStephen Hemminger * provided case of th->doff==0 is eliminated. 17591da177e4SLinus Torvalds * So, we defer the checks. */ 176060476372SHerbert Xu if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb)) 17611da177e4SLinus Torvalds goto bad_packet; 17621da177e4SLinus Torvalds 1763aa8223c7SArnaldo Carvalho de Melo th = tcp_hdr(skb); 1764eddc9ec5SArnaldo Carvalho de Melo iph = ip_hdr(skb); 17651da177e4SLinus Torvalds TCP_SKB_CB(skb)->seq = ntohl(th->seq); 17661da177e4SLinus Torvalds TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 17671da177e4SLinus Torvalds skb->len - th->doff * 4); 17681da177e4SLinus Torvalds TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 17691da177e4SLinus Torvalds TCP_SKB_CB(skb)->when = 0; 1770b82d1bb4SEric Dumazet TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); 17711da177e4SLinus Torvalds TCP_SKB_CB(skb)->sacked = 0; 17721da177e4SLinus Torvalds 17739a1f27c4SArnaldo Carvalho de Melo sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); 17741da177e4SLinus Torvalds if (!sk) 17751da177e4SLinus Torvalds goto no_tcp_socket; 17761da177e4SLinus Torvalds 1777bb134d5dSEric Dumazet process: 1778bb134d5dSEric Dumazet if (sk->sk_state == TCP_TIME_WAIT) 1779bb134d5dSEric Dumazet goto do_time_wait; 1780bb134d5dSEric Dumazet 17816cce09f8SEric Dumazet if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { 17826cce09f8SEric Dumazet NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); 1783d218d111SStephen Hemminger goto discard_and_relse; 17846cce09f8SEric Dumazet } 1785d218d111SStephen Hemminger 17861da177e4SLinus Torvalds if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) 17871da177e4SLinus Torvalds goto discard_and_relse; 1788b59c2701SPatrick McHardy nf_reset(skb); 17891da177e4SLinus Torvalds 1790fda9ef5dSDmitry Mishin if (sk_filter(sk, skb)) 17911da177e4SLinus Torvalds goto discard_and_relse; 17921da177e4SLinus Torvalds 17931da177e4SLinus Torvalds skb->dev = NULL; 17941da177e4SLinus Torvalds 1795c6366184SIngo Molnar bh_lock_sock_nested(sk); 17961da177e4SLinus Torvalds ret = 0; 17971da177e4SLinus Torvalds if (!sock_owned_by_user(sk)) { 17981a2449a8SChris Leech #ifdef CONFIG_NET_DMA 17991a2449a8SChris Leech struct tcp_sock *tp = tcp_sk(sk); 18001a2449a8SChris Leech if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) 1801a2bd1140SDave Jiang tp->ucopy.dma_chan = net_dma_find_channel(); 18021a2449a8SChris Leech if (tp->ucopy.dma_chan) 18031a2449a8SChris Leech ret = tcp_v4_do_rcv(sk, skb); 18041a2449a8SChris Leech else 18051a2449a8SChris Leech #endif 18061a2449a8SChris Leech { 18071da177e4SLinus Torvalds if (!tcp_prequeue(sk, skb)) 18081da177e4SLinus Torvalds ret = tcp_v4_do_rcv(sk, skb); 18091a2449a8SChris Leech } 1810da882c1fSEric Dumazet } else if (unlikely(sk_add_backlog(sk, skb, 1811da882c1fSEric Dumazet sk->sk_rcvbuf + sk->sk_sndbuf))) { 18126b03a53aSZhu Yi bh_unlock_sock(sk); 18136cce09f8SEric Dumazet NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); 18146b03a53aSZhu Yi goto discard_and_relse; 18156b03a53aSZhu Yi } 18161da177e4SLinus Torvalds bh_unlock_sock(sk); 18171da177e4SLinus Torvalds 18181da177e4SLinus Torvalds sock_put(sk); 18191da177e4SLinus Torvalds 18201da177e4SLinus Torvalds return ret; 18211da177e4SLinus Torvalds 18221da177e4SLinus Torvalds no_tcp_socket: 18231da177e4SLinus Torvalds if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 18241da177e4SLinus Torvalds goto discard_it; 18251da177e4SLinus Torvalds 18261da177e4SLinus Torvalds if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { 18271da177e4SLinus Torvalds bad_packet: 182863231bddSPavel Emelyanov TCP_INC_STATS_BH(net, TCP_MIB_INERRS); 18291da177e4SLinus Torvalds } else { 1830cfb6eeb4SYOSHIFUJI Hideaki tcp_v4_send_reset(NULL, skb); 18311da177e4SLinus Torvalds } 18321da177e4SLinus Torvalds 18331da177e4SLinus Torvalds discard_it: 18341da177e4SLinus Torvalds /* Discard frame. */ 18351da177e4SLinus Torvalds kfree_skb(skb); 18361da177e4SLinus Torvalds return 0; 18371da177e4SLinus Torvalds 18381da177e4SLinus Torvalds discard_and_relse: 18391da177e4SLinus Torvalds sock_put(sk); 18401da177e4SLinus Torvalds goto discard_it; 18411da177e4SLinus Torvalds 18421da177e4SLinus Torvalds do_time_wait: 18431da177e4SLinus Torvalds if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 18449469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(sk)); 18451da177e4SLinus Torvalds goto discard_it; 18461da177e4SLinus Torvalds } 18471da177e4SLinus Torvalds 18481da177e4SLinus Torvalds if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { 184963231bddSPavel Emelyanov TCP_INC_STATS_BH(net, TCP_MIB_INERRS); 18509469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(sk)); 18511da177e4SLinus Torvalds goto discard_it; 18521da177e4SLinus Torvalds } 18539469c7b4SYOSHIFUJI Hideaki switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 18541da177e4SLinus Torvalds case TCP_TW_SYN: { 1855c346dca1SYOSHIFUJI Hideaki struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev), 1856c67499c0SPavel Emelyanov &tcp_hashinfo, 1857eddc9ec5SArnaldo Carvalho de Melo iph->daddr, th->dest, 1858463c84b9SArnaldo Carvalho de Melo inet_iif(skb)); 18591da177e4SLinus Torvalds if (sk2) { 18609469c7b4SYOSHIFUJI Hideaki inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row); 18619469c7b4SYOSHIFUJI Hideaki inet_twsk_put(inet_twsk(sk)); 18621da177e4SLinus Torvalds sk = sk2; 18631da177e4SLinus Torvalds goto process; 18641da177e4SLinus Torvalds } 18651da177e4SLinus Torvalds /* Fall through to ACK */ 18661da177e4SLinus Torvalds } 18671da177e4SLinus Torvalds case TCP_TW_ACK: 18681da177e4SLinus Torvalds tcp_v4_timewait_ack(sk, skb); 18691da177e4SLinus Torvalds break; 18701da177e4SLinus Torvalds case TCP_TW_RST: 18711da177e4SLinus Torvalds goto no_tcp_socket; 18721da177e4SLinus Torvalds case TCP_TW_SUCCESS:; 18731da177e4SLinus Torvalds } 18741da177e4SLinus Torvalds goto discard_it; 18751da177e4SLinus Torvalds } 18761da177e4SLinus Torvalds 18774670fd81SDavid S. Miller struct inet_peer *tcp_v4_get_peer(struct sock *sk) 18781da177e4SLinus Torvalds { 18791da177e4SLinus Torvalds struct rtable *rt = (struct rtable *) __sk_dst_get(sk); 18803f419d2dSDavid S. Miller struct inet_sock *inet = inet_sk(sk); 18811da177e4SLinus Torvalds 18824670fd81SDavid S. Miller /* If we don't have a valid cached route, or we're doing IP 18834670fd81SDavid S. Miller * options which make the IPv4 header destination address 18844670fd81SDavid S. Miller * different from our peer's, do not bother with this. 18854670fd81SDavid S. Miller */ 18864670fd81SDavid S. Miller if (!rt || inet->cork.fl.u.ip4.daddr != inet->inet_daddr) 18874670fd81SDavid S. Miller return NULL; 18884670fd81SDavid S. Miller return rt_get_peer_create(rt, inet->inet_daddr); 18891da177e4SLinus Torvalds } 18903f419d2dSDavid S. Miller EXPORT_SYMBOL(tcp_v4_get_peer); 18911da177e4SLinus Torvalds 1892ccb7c410SDavid S. Miller static struct timewait_sock_ops tcp_timewait_sock_ops = { 1893ccb7c410SDavid S. Miller .twsk_obj_size = sizeof(struct tcp_timewait_sock), 1894ccb7c410SDavid S. Miller .twsk_unique = tcp_twsk_unique, 1895ccb7c410SDavid S. Miller .twsk_destructor= tcp_twsk_destructor, 1896ccb7c410SDavid S. Miller }; 18971da177e4SLinus Torvalds 18983b401a81SStephen Hemminger const struct inet_connection_sock_af_ops ipv4_specific = { 18991da177e4SLinus Torvalds .queue_xmit = ip_queue_xmit, 19001da177e4SLinus Torvalds .send_check = tcp_v4_send_check, 190132519f11SArnaldo Carvalho de Melo .rebuild_header = inet_sk_rebuild_header, 19021da177e4SLinus Torvalds .conn_request = tcp_v4_conn_request, 19031da177e4SLinus Torvalds .syn_recv_sock = tcp_v4_syn_recv_sock, 19043f419d2dSDavid S. Miller .get_peer = tcp_v4_get_peer, 19051da177e4SLinus Torvalds .net_header_len = sizeof(struct iphdr), 19061da177e4SLinus Torvalds .setsockopt = ip_setsockopt, 19071da177e4SLinus Torvalds .getsockopt = ip_getsockopt, 1908543d9cfeSArnaldo Carvalho de Melo .addr2sockaddr = inet_csk_addr2sockaddr, 1909543d9cfeSArnaldo Carvalho de Melo .sockaddr_len = sizeof(struct sockaddr_in), 1910ab1e0a13SArnaldo Carvalho de Melo .bind_conflict = inet_csk_bind_conflict, 19113fdadf7dSDmitry Mishin #ifdef CONFIG_COMPAT 19123fdadf7dSDmitry Mishin .compat_setsockopt = compat_ip_setsockopt, 19133fdadf7dSDmitry Mishin .compat_getsockopt = compat_ip_getsockopt, 19143fdadf7dSDmitry Mishin #endif 19151da177e4SLinus Torvalds }; 19164bc2f18bSEric Dumazet EXPORT_SYMBOL(ipv4_specific); 19171da177e4SLinus Torvalds 1918cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1919b2e4b3deSStephen Hemminger static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { 1920cfb6eeb4SYOSHIFUJI Hideaki .md5_lookup = tcp_v4_md5_lookup, 192149a72dfbSAdam Langley .calc_md5_hash = tcp_v4_md5_hash_skb, 1922cfb6eeb4SYOSHIFUJI Hideaki .md5_parse = tcp_v4_parse_md5_keys, 1923cfb6eeb4SYOSHIFUJI Hideaki }; 1924b6332e6cSAndrew Morton #endif 1925cfb6eeb4SYOSHIFUJI Hideaki 19261da177e4SLinus Torvalds /* NOTE: A lot of things set to zero explicitly by call to 19271da177e4SLinus Torvalds * sk_alloc() so need not be done here. 19281da177e4SLinus Torvalds */ 19291da177e4SLinus Torvalds static int tcp_v4_init_sock(struct sock *sk) 19301da177e4SLinus Torvalds { 19316687e988SArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 19321da177e4SLinus Torvalds 1933900f65d3SNeal Cardwell tcp_init_sock(sk); 19341da177e4SLinus Torvalds 19358292a17aSArnaldo Carvalho de Melo icsk->icsk_af_ops = &ipv4_specific; 1936900f65d3SNeal Cardwell 1937cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1938ac807fa8SDavid S. Miller tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific; 1939cfb6eeb4SYOSHIFUJI Hideaki #endif 19401da177e4SLinus Torvalds 19411da177e4SLinus Torvalds return 0; 19421da177e4SLinus Torvalds } 19431da177e4SLinus Torvalds 19447d06b2e0SBrian Haley void tcp_v4_destroy_sock(struct sock *sk) 19451da177e4SLinus Torvalds { 19461da177e4SLinus Torvalds struct tcp_sock *tp = tcp_sk(sk); 19471da177e4SLinus Torvalds 19481da177e4SLinus Torvalds tcp_clear_xmit_timers(sk); 19491da177e4SLinus Torvalds 19506687e988SArnaldo Carvalho de Melo tcp_cleanup_congestion_control(sk); 1951317a76f9SStephen Hemminger 19521da177e4SLinus Torvalds /* Cleanup up the write buffer. */ 1953fe067e8aSDavid S. Miller tcp_write_queue_purge(sk); 19541da177e4SLinus Torvalds 19551da177e4SLinus Torvalds /* Cleans up our, hopefully empty, out_of_order_queue. */ 19561da177e4SLinus Torvalds __skb_queue_purge(&tp->out_of_order_queue); 19571da177e4SLinus Torvalds 1958cfb6eeb4SYOSHIFUJI Hideaki #ifdef CONFIG_TCP_MD5SIG 1959cfb6eeb4SYOSHIFUJI Hideaki /* Clean up the MD5 key list, if any */ 1960cfb6eeb4SYOSHIFUJI Hideaki if (tp->md5sig_info) { 1961a915da9bSEric Dumazet tcp_clear_md5_list(sk); 1962a8afca03SEric Dumazet kfree_rcu(tp->md5sig_info, rcu); 1963cfb6eeb4SYOSHIFUJI Hideaki tp->md5sig_info = NULL; 1964cfb6eeb4SYOSHIFUJI Hideaki } 1965cfb6eeb4SYOSHIFUJI Hideaki #endif 1966cfb6eeb4SYOSHIFUJI Hideaki 19671a2449a8SChris Leech #ifdef CONFIG_NET_DMA 19681a2449a8SChris Leech /* Cleans up our sk_async_wait_queue */ 19691a2449a8SChris Leech __skb_queue_purge(&sk->sk_async_wait_queue); 19701a2449a8SChris Leech #endif 19711a2449a8SChris Leech 19721da177e4SLinus Torvalds /* Clean prequeue, it must be empty really */ 19731da177e4SLinus Torvalds __skb_queue_purge(&tp->ucopy.prequeue); 19741da177e4SLinus Torvalds 19751da177e4SLinus Torvalds /* Clean up a referenced TCP bind bucket. */ 1976463c84b9SArnaldo Carvalho de Melo if (inet_csk(sk)->icsk_bind_hash) 1977ab1e0a13SArnaldo Carvalho de Melo inet_put_port(sk); 19781da177e4SLinus Torvalds 19791da177e4SLinus Torvalds /* 19801da177e4SLinus Torvalds * If sendmsg cached page exists, toss it. 19811da177e4SLinus Torvalds */ 19821da177e4SLinus Torvalds if (sk->sk_sndmsg_page) { 19831da177e4SLinus Torvalds __free_page(sk->sk_sndmsg_page); 19841da177e4SLinus Torvalds sk->sk_sndmsg_page = NULL; 19851da177e4SLinus Torvalds } 19861da177e4SLinus Torvalds 1987435cf559SWilliam Allen Simpson /* TCP Cookie Transactions */ 1988435cf559SWilliam Allen Simpson if (tp->cookie_values != NULL) { 1989435cf559SWilliam Allen Simpson kref_put(&tp->cookie_values->kref, 1990435cf559SWilliam Allen Simpson tcp_cookie_values_release); 1991435cf559SWilliam Allen Simpson tp->cookie_values = NULL; 1992435cf559SWilliam Allen Simpson } 1993435cf559SWilliam Allen Simpson 1994180d8cd9SGlauber Costa sk_sockets_allocated_dec(sk); 1995d1a4c0b3SGlauber Costa sock_release_memcg(sk); 19961da177e4SLinus Torvalds } 19971da177e4SLinus Torvalds EXPORT_SYMBOL(tcp_v4_destroy_sock); 19981da177e4SLinus Torvalds 19991da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS 20001da177e4SLinus Torvalds /* Proc filesystem TCP sock list dumping. */ 20011da177e4SLinus Torvalds 20023ab5aee7SEric Dumazet static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head) 20031da177e4SLinus Torvalds { 20043ab5aee7SEric Dumazet return hlist_nulls_empty(head) ? NULL : 20058feaf0c0SArnaldo Carvalho de Melo list_entry(head->first, struct inet_timewait_sock, tw_node); 20061da177e4SLinus Torvalds } 20071da177e4SLinus Torvalds 20088feaf0c0SArnaldo Carvalho de Melo static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) 20091da177e4SLinus Torvalds { 20103ab5aee7SEric Dumazet return !is_a_nulls(tw->tw_node.next) ? 20113ab5aee7SEric Dumazet hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; 20121da177e4SLinus Torvalds } 20131da177e4SLinus Torvalds 2014a8b690f9STom Herbert /* 2015a8b690f9STom Herbert * Get next listener socket follow cur. If cur is NULL, get first socket 2016a8b690f9STom Herbert * starting from bucket given in st->bucket; when st->bucket is zero the 2017a8b690f9STom Herbert * very first socket in the hash table is returned. 2018a8b690f9STom Herbert */ 20191da177e4SLinus Torvalds static void *listening_get_next(struct seq_file *seq, void *cur) 20201da177e4SLinus Torvalds { 2021463c84b9SArnaldo Carvalho de Melo struct inet_connection_sock *icsk; 2022c25eb3bfSEric Dumazet struct hlist_nulls_node *node; 20231da177e4SLinus Torvalds struct sock *sk = cur; 20245caea4eaSEric Dumazet struct inet_listen_hashbucket *ilb; 20251da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 2026a4146b1bSDenis V. Lunev struct net *net = seq_file_net(seq); 20271da177e4SLinus Torvalds 20281da177e4SLinus Torvalds if (!sk) { 2029a8b690f9STom Herbert ilb = &tcp_hashinfo.listening_hash[st->bucket]; 20305caea4eaSEric Dumazet spin_lock_bh(&ilb->lock); 2031c25eb3bfSEric Dumazet sk = sk_nulls_head(&ilb->head); 2032a8b690f9STom Herbert st->offset = 0; 20331da177e4SLinus Torvalds goto get_sk; 20341da177e4SLinus Torvalds } 20355caea4eaSEric Dumazet ilb = &tcp_hashinfo.listening_hash[st->bucket]; 20361da177e4SLinus Torvalds ++st->num; 2037a8b690f9STom Herbert ++st->offset; 20381da177e4SLinus Torvalds 20391da177e4SLinus Torvalds if (st->state == TCP_SEQ_STATE_OPENREQ) { 204060236fddSArnaldo Carvalho de Melo struct request_sock *req = cur; 20411da177e4SLinus Torvalds 2042463c84b9SArnaldo Carvalho de Melo icsk = inet_csk(st->syn_wait_sk); 20431da177e4SLinus Torvalds req = req->dl_next; 20441da177e4SLinus Torvalds while (1) { 20451da177e4SLinus Torvalds while (req) { 2046bdccc4caSDaniel Lezcano if (req->rsk_ops->family == st->family) { 20471da177e4SLinus Torvalds cur = req; 20481da177e4SLinus Torvalds goto out; 20491da177e4SLinus Torvalds } 20501da177e4SLinus Torvalds req = req->dl_next; 20511da177e4SLinus Torvalds } 205272a3effaSEric Dumazet if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) 20531da177e4SLinus Torvalds break; 20541da177e4SLinus Torvalds get_req: 2055463c84b9SArnaldo Carvalho de Melo req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket]; 20561da177e4SLinus Torvalds } 20571bde5ac4SEric Dumazet sk = sk_nulls_next(st->syn_wait_sk); 20581da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_LISTENING; 2059463c84b9SArnaldo Carvalho de Melo read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 20601da177e4SLinus Torvalds } else { 2061463c84b9SArnaldo Carvalho de Melo icsk = inet_csk(sk); 2062463c84b9SArnaldo Carvalho de Melo read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 2063463c84b9SArnaldo Carvalho de Melo if (reqsk_queue_len(&icsk->icsk_accept_queue)) 20641da177e4SLinus Torvalds goto start_req; 2065463c84b9SArnaldo Carvalho de Melo read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 20661bde5ac4SEric Dumazet sk = sk_nulls_next(sk); 20671da177e4SLinus Torvalds } 20681da177e4SLinus Torvalds get_sk: 2069c25eb3bfSEric Dumazet sk_nulls_for_each_from(sk, node) { 20708475ef9fSPavel Emelyanov if (!net_eq(sock_net(sk), net)) 20718475ef9fSPavel Emelyanov continue; 20728475ef9fSPavel Emelyanov if (sk->sk_family == st->family) { 20731da177e4SLinus Torvalds cur = sk; 20741da177e4SLinus Torvalds goto out; 20751da177e4SLinus Torvalds } 2076463c84b9SArnaldo Carvalho de Melo icsk = inet_csk(sk); 2077463c84b9SArnaldo Carvalho de Melo read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 2078463c84b9SArnaldo Carvalho de Melo if (reqsk_queue_len(&icsk->icsk_accept_queue)) { 20791da177e4SLinus Torvalds start_req: 20801da177e4SLinus Torvalds st->uid = sock_i_uid(sk); 20811da177e4SLinus Torvalds st->syn_wait_sk = sk; 20821da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_OPENREQ; 20831da177e4SLinus Torvalds st->sbucket = 0; 20841da177e4SLinus Torvalds goto get_req; 20851da177e4SLinus Torvalds } 2086463c84b9SArnaldo Carvalho de Melo read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 20871da177e4SLinus Torvalds } 20885caea4eaSEric Dumazet spin_unlock_bh(&ilb->lock); 2089a8b690f9STom Herbert st->offset = 0; 20900f7ff927SArnaldo Carvalho de Melo if (++st->bucket < INET_LHTABLE_SIZE) { 20915caea4eaSEric Dumazet ilb = &tcp_hashinfo.listening_hash[st->bucket]; 20925caea4eaSEric Dumazet spin_lock_bh(&ilb->lock); 2093c25eb3bfSEric Dumazet sk = sk_nulls_head(&ilb->head); 20941da177e4SLinus Torvalds goto get_sk; 20951da177e4SLinus Torvalds } 20961da177e4SLinus Torvalds cur = NULL; 20971da177e4SLinus Torvalds out: 20981da177e4SLinus Torvalds return cur; 20991da177e4SLinus Torvalds } 21001da177e4SLinus Torvalds 21011da177e4SLinus Torvalds static void *listening_get_idx(struct seq_file *seq, loff_t *pos) 21021da177e4SLinus Torvalds { 2103a8b690f9STom Herbert struct tcp_iter_state *st = seq->private; 2104a8b690f9STom Herbert void *rc; 2105a8b690f9STom Herbert 2106a8b690f9STom Herbert st->bucket = 0; 2107a8b690f9STom Herbert st->offset = 0; 2108a8b690f9STom Herbert rc = listening_get_next(seq, NULL); 21091da177e4SLinus Torvalds 21101da177e4SLinus Torvalds while (rc && *pos) { 21111da177e4SLinus Torvalds rc = listening_get_next(seq, rc); 21121da177e4SLinus Torvalds --*pos; 21131da177e4SLinus Torvalds } 21141da177e4SLinus Torvalds return rc; 21151da177e4SLinus Torvalds } 21161da177e4SLinus Torvalds 2117a2a385d6SEric Dumazet static inline bool empty_bucket(struct tcp_iter_state *st) 21186eac5604SAndi Kleen { 21193ab5aee7SEric Dumazet return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) && 21203ab5aee7SEric Dumazet hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain); 21216eac5604SAndi Kleen } 21226eac5604SAndi Kleen 2123a8b690f9STom Herbert /* 2124a8b690f9STom Herbert * Get first established socket starting from bucket given in st->bucket. 2125a8b690f9STom Herbert * If st->bucket is zero, the very first socket in the hash is returned. 2126a8b690f9STom Herbert */ 21271da177e4SLinus Torvalds static void *established_get_first(struct seq_file *seq) 21281da177e4SLinus Torvalds { 21291da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 2130a4146b1bSDenis V. Lunev struct net *net = seq_file_net(seq); 21311da177e4SLinus Torvalds void *rc = NULL; 21321da177e4SLinus Torvalds 2133a8b690f9STom Herbert st->offset = 0; 2134a8b690f9STom Herbert for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { 21351da177e4SLinus Torvalds struct sock *sk; 21363ab5aee7SEric Dumazet struct hlist_nulls_node *node; 21378feaf0c0SArnaldo Carvalho de Melo struct inet_timewait_sock *tw; 21389db66bdcSEric Dumazet spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); 21391da177e4SLinus Torvalds 21406eac5604SAndi Kleen /* Lockless fast path for the common case of empty buckets */ 21416eac5604SAndi Kleen if (empty_bucket(st)) 21426eac5604SAndi Kleen continue; 21436eac5604SAndi Kleen 21449db66bdcSEric Dumazet spin_lock_bh(lock); 21453ab5aee7SEric Dumazet sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { 2146f40c8174SDaniel Lezcano if (sk->sk_family != st->family || 2147878628fbSYOSHIFUJI Hideaki !net_eq(sock_net(sk), net)) { 21481da177e4SLinus Torvalds continue; 21491da177e4SLinus Torvalds } 21501da177e4SLinus Torvalds rc = sk; 21511da177e4SLinus Torvalds goto out; 21521da177e4SLinus Torvalds } 21531da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_TIME_WAIT; 21548feaf0c0SArnaldo Carvalho de Melo inet_twsk_for_each(tw, node, 2155dbca9b27SEric Dumazet &tcp_hashinfo.ehash[st->bucket].twchain) { 215628518fc1SPavel Emelyanov if (tw->tw_family != st->family || 2157878628fbSYOSHIFUJI Hideaki !net_eq(twsk_net(tw), net)) { 21581da177e4SLinus Torvalds continue; 21591da177e4SLinus Torvalds } 21601da177e4SLinus Torvalds rc = tw; 21611da177e4SLinus Torvalds goto out; 21621da177e4SLinus Torvalds } 21639db66bdcSEric Dumazet spin_unlock_bh(lock); 21641da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_ESTABLISHED; 21651da177e4SLinus Torvalds } 21661da177e4SLinus Torvalds out: 21671da177e4SLinus Torvalds return rc; 21681da177e4SLinus Torvalds } 21691da177e4SLinus Torvalds 21701da177e4SLinus Torvalds static void *established_get_next(struct seq_file *seq, void *cur) 21711da177e4SLinus Torvalds { 21721da177e4SLinus Torvalds struct sock *sk = cur; 21738feaf0c0SArnaldo Carvalho de Melo struct inet_timewait_sock *tw; 21743ab5aee7SEric Dumazet struct hlist_nulls_node *node; 21751da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 2176a4146b1bSDenis V. Lunev struct net *net = seq_file_net(seq); 21771da177e4SLinus Torvalds 21781da177e4SLinus Torvalds ++st->num; 2179a8b690f9STom Herbert ++st->offset; 21801da177e4SLinus Torvalds 21811da177e4SLinus Torvalds if (st->state == TCP_SEQ_STATE_TIME_WAIT) { 21821da177e4SLinus Torvalds tw = cur; 21831da177e4SLinus Torvalds tw = tw_next(tw); 21841da177e4SLinus Torvalds get_tw: 2185878628fbSYOSHIFUJI Hideaki while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) { 21861da177e4SLinus Torvalds tw = tw_next(tw); 21871da177e4SLinus Torvalds } 21881da177e4SLinus Torvalds if (tw) { 21891da177e4SLinus Torvalds cur = tw; 21901da177e4SLinus Torvalds goto out; 21911da177e4SLinus Torvalds } 21929db66bdcSEric Dumazet spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); 21931da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_ESTABLISHED; 21941da177e4SLinus Torvalds 21956eac5604SAndi Kleen /* Look for next non empty bucket */ 2196a8b690f9STom Herbert st->offset = 0; 2197f373b53bSEric Dumazet while (++st->bucket <= tcp_hashinfo.ehash_mask && 21986eac5604SAndi Kleen empty_bucket(st)) 21996eac5604SAndi Kleen ; 2200f373b53bSEric Dumazet if (st->bucket > tcp_hashinfo.ehash_mask) 22016eac5604SAndi Kleen return NULL; 22026eac5604SAndi Kleen 22039db66bdcSEric Dumazet spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); 22043ab5aee7SEric Dumazet sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain); 22051da177e4SLinus Torvalds } else 22063ab5aee7SEric Dumazet sk = sk_nulls_next(sk); 22071da177e4SLinus Torvalds 22083ab5aee7SEric Dumazet sk_nulls_for_each_from(sk, node) { 2209878628fbSYOSHIFUJI Hideaki if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) 22101da177e4SLinus Torvalds goto found; 22111da177e4SLinus Torvalds } 22121da177e4SLinus Torvalds 22131da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_TIME_WAIT; 2214dbca9b27SEric Dumazet tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain); 22151da177e4SLinus Torvalds goto get_tw; 22161da177e4SLinus Torvalds found: 22171da177e4SLinus Torvalds cur = sk; 22181da177e4SLinus Torvalds out: 22191da177e4SLinus Torvalds return cur; 22201da177e4SLinus Torvalds } 22211da177e4SLinus Torvalds 22221da177e4SLinus Torvalds static void *established_get_idx(struct seq_file *seq, loff_t pos) 22231da177e4SLinus Torvalds { 2224a8b690f9STom Herbert struct tcp_iter_state *st = seq->private; 2225a8b690f9STom Herbert void *rc; 2226a8b690f9STom Herbert 2227a8b690f9STom Herbert st->bucket = 0; 2228a8b690f9STom Herbert rc = established_get_first(seq); 22291da177e4SLinus Torvalds 22301da177e4SLinus Torvalds while (rc && pos) { 22311da177e4SLinus Torvalds rc = established_get_next(seq, rc); 22321da177e4SLinus Torvalds --pos; 22331da177e4SLinus Torvalds } 22341da177e4SLinus Torvalds return rc; 22351da177e4SLinus Torvalds } 22361da177e4SLinus Torvalds 22371da177e4SLinus Torvalds static void *tcp_get_idx(struct seq_file *seq, loff_t pos) 22381da177e4SLinus Torvalds { 22391da177e4SLinus Torvalds void *rc; 22401da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 22411da177e4SLinus Torvalds 22421da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_LISTENING; 22431da177e4SLinus Torvalds rc = listening_get_idx(seq, &pos); 22441da177e4SLinus Torvalds 22451da177e4SLinus Torvalds if (!rc) { 22461da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_ESTABLISHED; 22471da177e4SLinus Torvalds rc = established_get_idx(seq, pos); 22481da177e4SLinus Torvalds } 22491da177e4SLinus Torvalds 22501da177e4SLinus Torvalds return rc; 22511da177e4SLinus Torvalds } 22521da177e4SLinus Torvalds 2253a8b690f9STom Herbert static void *tcp_seek_last_pos(struct seq_file *seq) 2254a8b690f9STom Herbert { 2255a8b690f9STom Herbert struct tcp_iter_state *st = seq->private; 2256a8b690f9STom Herbert int offset = st->offset; 2257a8b690f9STom Herbert int orig_num = st->num; 2258a8b690f9STom Herbert void *rc = NULL; 2259a8b690f9STom Herbert 2260a8b690f9STom Herbert switch (st->state) { 2261a8b690f9STom Herbert case TCP_SEQ_STATE_OPENREQ: 2262a8b690f9STom Herbert case TCP_SEQ_STATE_LISTENING: 2263a8b690f9STom Herbert if (st->bucket >= INET_LHTABLE_SIZE) 2264a8b690f9STom Herbert break; 2265a8b690f9STom Herbert st->state = TCP_SEQ_STATE_LISTENING; 2266a8b690f9STom Herbert rc = listening_get_next(seq, NULL); 2267a8b690f9STom Herbert while (offset-- && rc) 2268a8b690f9STom Herbert rc = listening_get_next(seq, rc); 2269a8b690f9STom Herbert if (rc) 2270a8b690f9STom Herbert break; 2271a8b690f9STom Herbert st->bucket = 0; 2272a8b690f9STom Herbert /* Fallthrough */ 2273a8b690f9STom Herbert case TCP_SEQ_STATE_ESTABLISHED: 2274a8b690f9STom Herbert case TCP_SEQ_STATE_TIME_WAIT: 2275a8b690f9STom Herbert st->state = TCP_SEQ_STATE_ESTABLISHED; 2276a8b690f9STom Herbert if (st->bucket > tcp_hashinfo.ehash_mask) 2277a8b690f9STom Herbert break; 2278a8b690f9STom Herbert rc = established_get_first(seq); 2279a8b690f9STom Herbert while (offset-- && rc) 2280a8b690f9STom Herbert rc = established_get_next(seq, rc); 2281a8b690f9STom Herbert } 2282a8b690f9STom Herbert 2283a8b690f9STom Herbert st->num = orig_num; 2284a8b690f9STom Herbert 2285a8b690f9STom Herbert return rc; 2286a8b690f9STom Herbert } 2287a8b690f9STom Herbert 22881da177e4SLinus Torvalds static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) 22891da177e4SLinus Torvalds { 22901da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 2291a8b690f9STom Herbert void *rc; 2292a8b690f9STom Herbert 2293a8b690f9STom Herbert if (*pos && *pos == st->last_pos) { 2294a8b690f9STom Herbert rc = tcp_seek_last_pos(seq); 2295a8b690f9STom Herbert if (rc) 2296a8b690f9STom Herbert goto out; 2297a8b690f9STom Herbert } 2298a8b690f9STom Herbert 22991da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_LISTENING; 23001da177e4SLinus Torvalds st->num = 0; 2301a8b690f9STom Herbert st->bucket = 0; 2302a8b690f9STom Herbert st->offset = 0; 2303a8b690f9STom Herbert rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 2304a8b690f9STom Herbert 2305a8b690f9STom Herbert out: 2306a8b690f9STom Herbert st->last_pos = *pos; 2307a8b690f9STom Herbert return rc; 23081da177e4SLinus Torvalds } 23091da177e4SLinus Torvalds 23101da177e4SLinus Torvalds static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) 23111da177e4SLinus Torvalds { 2312a8b690f9STom Herbert struct tcp_iter_state *st = seq->private; 23131da177e4SLinus Torvalds void *rc = NULL; 23141da177e4SLinus Torvalds 23151da177e4SLinus Torvalds if (v == SEQ_START_TOKEN) { 23161da177e4SLinus Torvalds rc = tcp_get_idx(seq, 0); 23171da177e4SLinus Torvalds goto out; 23181da177e4SLinus Torvalds } 23191da177e4SLinus Torvalds 23201da177e4SLinus Torvalds switch (st->state) { 23211da177e4SLinus Torvalds case TCP_SEQ_STATE_OPENREQ: 23221da177e4SLinus Torvalds case TCP_SEQ_STATE_LISTENING: 23231da177e4SLinus Torvalds rc = listening_get_next(seq, v); 23241da177e4SLinus Torvalds if (!rc) { 23251da177e4SLinus Torvalds st->state = TCP_SEQ_STATE_ESTABLISHED; 2326a8b690f9STom Herbert st->bucket = 0; 2327a8b690f9STom Herbert st->offset = 0; 23281da177e4SLinus Torvalds rc = established_get_first(seq); 23291da177e4SLinus Torvalds } 23301da177e4SLinus Torvalds break; 23311da177e4SLinus Torvalds case TCP_SEQ_STATE_ESTABLISHED: 23321da177e4SLinus Torvalds case TCP_SEQ_STATE_TIME_WAIT: 23331da177e4SLinus Torvalds rc = established_get_next(seq, v); 23341da177e4SLinus Torvalds break; 23351da177e4SLinus Torvalds } 23361da177e4SLinus Torvalds out: 23371da177e4SLinus Torvalds ++*pos; 2338a8b690f9STom Herbert st->last_pos = *pos; 23391da177e4SLinus Torvalds return rc; 23401da177e4SLinus Torvalds } 23411da177e4SLinus Torvalds 23421da177e4SLinus Torvalds static void tcp_seq_stop(struct seq_file *seq, void *v) 23431da177e4SLinus Torvalds { 23441da177e4SLinus Torvalds struct tcp_iter_state *st = seq->private; 23451da177e4SLinus Torvalds 23461da177e4SLinus Torvalds switch (st->state) { 23471da177e4SLinus Torvalds case TCP_SEQ_STATE_OPENREQ: 23481da177e4SLinus Torvalds if (v) { 2349463c84b9SArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk); 2350463c84b9SArnaldo Carvalho de Melo read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 23511da177e4SLinus Torvalds } 23521da177e4SLinus Torvalds case TCP_SEQ_STATE_LISTENING: 23531da177e4SLinus Torvalds if (v != SEQ_START_TOKEN) 23545caea4eaSEric Dumazet spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock); 23551da177e4SLinus Torvalds break; 23561da177e4SLinus Torvalds case TCP_SEQ_STATE_TIME_WAIT: 23571da177e4SLinus Torvalds case TCP_SEQ_STATE_ESTABLISHED: 23581da177e4SLinus Torvalds if (v) 23599db66bdcSEric Dumazet spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); 23601da177e4SLinus Torvalds break; 23611da177e4SLinus Torvalds } 23621da177e4SLinus Torvalds } 23631da177e4SLinus Torvalds 236473cb88ecSArjan van de Ven int tcp_seq_open(struct inode *inode, struct file *file) 23651da177e4SLinus Torvalds { 23661da177e4SLinus Torvalds struct tcp_seq_afinfo *afinfo = PDE(inode)->data; 23671da177e4SLinus Torvalds struct tcp_iter_state *s; 236852d6f3f1SDenis V. Lunev int err; 23691da177e4SLinus Torvalds 237052d6f3f1SDenis V. Lunev err = seq_open_net(inode, file, &afinfo->seq_ops, 237152d6f3f1SDenis V. Lunev sizeof(struct tcp_iter_state)); 237252d6f3f1SDenis V. Lunev if (err < 0) 237352d6f3f1SDenis V. Lunev return err; 2374f40c8174SDaniel Lezcano 237552d6f3f1SDenis V. Lunev s = ((struct seq_file *)file->private_data)->private; 23761da177e4SLinus Torvalds s->family = afinfo->family; 2377a8b690f9STom Herbert s->last_pos = 0; 2378f40c8174SDaniel Lezcano return 0; 2379f40c8174SDaniel Lezcano } 238073cb88ecSArjan van de Ven EXPORT_SYMBOL(tcp_seq_open); 2381f40c8174SDaniel Lezcano 23826f8b13bcSDaniel Lezcano int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo) 23831da177e4SLinus Torvalds { 23841da177e4SLinus Torvalds int rc = 0; 23851da177e4SLinus Torvalds struct proc_dir_entry *p; 23861da177e4SLinus Torvalds 23879427c4b3SDenis V. Lunev afinfo->seq_ops.start = tcp_seq_start; 23889427c4b3SDenis V. Lunev afinfo->seq_ops.next = tcp_seq_next; 23899427c4b3SDenis V. Lunev afinfo->seq_ops.stop = tcp_seq_stop; 23909427c4b3SDenis V. Lunev 239184841c3cSDenis V. Lunev p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net, 239273cb88ecSArjan van de Ven afinfo->seq_fops, afinfo); 239384841c3cSDenis V. Lunev if (!p) 23941da177e4SLinus Torvalds rc = -ENOMEM; 23951da177e4SLinus Torvalds return rc; 23961da177e4SLinus Torvalds } 23974bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_proc_register); 23981da177e4SLinus Torvalds 23996f8b13bcSDaniel Lezcano void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) 24001da177e4SLinus Torvalds { 24016f8b13bcSDaniel Lezcano proc_net_remove(net, afinfo->name); 24021da177e4SLinus Torvalds } 24034bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_proc_unregister); 24041da177e4SLinus Torvalds 2405cf533ea5SEric Dumazet static void get_openreq4(const struct sock *sk, const struct request_sock *req, 24065e659e4cSPavel Emelyanov struct seq_file *f, int i, int uid, int *len) 24071da177e4SLinus Torvalds { 24082e6599cbSArnaldo Carvalho de Melo const struct inet_request_sock *ireq = inet_rsk(req); 24091da177e4SLinus Torvalds int ttd = req->expires - jiffies; 24101da177e4SLinus Torvalds 24115e659e4cSPavel Emelyanov seq_printf(f, "%4d: %08X:%04X %08X:%04X" 241271338aa7SDan Rosenberg " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n", 24131da177e4SLinus Torvalds i, 24142e6599cbSArnaldo Carvalho de Melo ireq->loc_addr, 2415c720c7e8SEric Dumazet ntohs(inet_sk(sk)->inet_sport), 24162e6599cbSArnaldo Carvalho de Melo ireq->rmt_addr, 24172e6599cbSArnaldo Carvalho de Melo ntohs(ireq->rmt_port), 24181da177e4SLinus Torvalds TCP_SYN_RECV, 24191da177e4SLinus Torvalds 0, 0, /* could print option size, but that is af dependent. */ 24201da177e4SLinus Torvalds 1, /* timers active (only the expire timer) */ 24211da177e4SLinus Torvalds jiffies_to_clock_t(ttd), 24221da177e4SLinus Torvalds req->retrans, 24231da177e4SLinus Torvalds uid, 24241da177e4SLinus Torvalds 0, /* non standard timer */ 24251da177e4SLinus Torvalds 0, /* open_requests have no inode */ 24261da177e4SLinus Torvalds atomic_read(&sk->sk_refcnt), 24275e659e4cSPavel Emelyanov req, 24285e659e4cSPavel Emelyanov len); 24291da177e4SLinus Torvalds } 24301da177e4SLinus Torvalds 24315e659e4cSPavel Emelyanov static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) 24321da177e4SLinus Torvalds { 24331da177e4SLinus Torvalds int timer_active; 24341da177e4SLinus Torvalds unsigned long timer_expires; 2435cf533ea5SEric Dumazet const struct tcp_sock *tp = tcp_sk(sk); 2436cf4c6bf8SIlpo Järvinen const struct inet_connection_sock *icsk = inet_csk(sk); 2437cf533ea5SEric Dumazet const struct inet_sock *inet = inet_sk(sk); 2438c720c7e8SEric Dumazet __be32 dest = inet->inet_daddr; 2439c720c7e8SEric Dumazet __be32 src = inet->inet_rcv_saddr; 2440c720c7e8SEric Dumazet __u16 destp = ntohs(inet->inet_dport); 2441c720c7e8SEric Dumazet __u16 srcp = ntohs(inet->inet_sport); 244249d09007SEric Dumazet int rx_queue; 24431da177e4SLinus Torvalds 2444463c84b9SArnaldo Carvalho de Melo if (icsk->icsk_pending == ICSK_TIME_RETRANS) { 24451da177e4SLinus Torvalds timer_active = 1; 2446463c84b9SArnaldo Carvalho de Melo timer_expires = icsk->icsk_timeout; 2447463c84b9SArnaldo Carvalho de Melo } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 24481da177e4SLinus Torvalds timer_active = 4; 2449463c84b9SArnaldo Carvalho de Melo timer_expires = icsk->icsk_timeout; 2450cf4c6bf8SIlpo Järvinen } else if (timer_pending(&sk->sk_timer)) { 24511da177e4SLinus Torvalds timer_active = 2; 2452cf4c6bf8SIlpo Järvinen timer_expires = sk->sk_timer.expires; 24531da177e4SLinus Torvalds } else { 24541da177e4SLinus Torvalds timer_active = 0; 24551da177e4SLinus Torvalds timer_expires = jiffies; 24561da177e4SLinus Torvalds } 24571da177e4SLinus Torvalds 245849d09007SEric Dumazet if (sk->sk_state == TCP_LISTEN) 245949d09007SEric Dumazet rx_queue = sk->sk_ack_backlog; 246049d09007SEric Dumazet else 246149d09007SEric Dumazet /* 246249d09007SEric Dumazet * because we dont lock socket, we might find a transient negative value 246349d09007SEric Dumazet */ 246449d09007SEric Dumazet rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); 246549d09007SEric Dumazet 24665e659e4cSPavel Emelyanov seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " 246771338aa7SDan Rosenberg "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n", 2468cf4c6bf8SIlpo Järvinen i, src, srcp, dest, destp, sk->sk_state, 246947da8ee6SSridhar Samudrala tp->write_seq - tp->snd_una, 247049d09007SEric Dumazet rx_queue, 24711da177e4SLinus Torvalds timer_active, 24721da177e4SLinus Torvalds jiffies_to_clock_t(timer_expires - jiffies), 2473463c84b9SArnaldo Carvalho de Melo icsk->icsk_retransmits, 2474cf4c6bf8SIlpo Järvinen sock_i_uid(sk), 24756687e988SArnaldo Carvalho de Melo icsk->icsk_probes_out, 2476cf4c6bf8SIlpo Järvinen sock_i_ino(sk), 2477cf4c6bf8SIlpo Järvinen atomic_read(&sk->sk_refcnt), sk, 24787be87351SStephen Hemminger jiffies_to_clock_t(icsk->icsk_rto), 24797be87351SStephen Hemminger jiffies_to_clock_t(icsk->icsk_ack.ato), 2480463c84b9SArnaldo Carvalho de Melo (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, 24811da177e4SLinus Torvalds tp->snd_cwnd, 24820b6a05c1SIlpo Järvinen tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh, 24835e659e4cSPavel Emelyanov len); 24841da177e4SLinus Torvalds } 24851da177e4SLinus Torvalds 2486cf533ea5SEric Dumazet static void get_timewait4_sock(const struct inet_timewait_sock *tw, 24875e659e4cSPavel Emelyanov struct seq_file *f, int i, int *len) 24881da177e4SLinus Torvalds { 248923f33c2dSAl Viro __be32 dest, src; 24901da177e4SLinus Torvalds __u16 destp, srcp; 24911da177e4SLinus Torvalds int ttd = tw->tw_ttd - jiffies; 24921da177e4SLinus Torvalds 24931da177e4SLinus Torvalds if (ttd < 0) 24941da177e4SLinus Torvalds ttd = 0; 24951da177e4SLinus Torvalds 24961da177e4SLinus Torvalds dest = tw->tw_daddr; 24971da177e4SLinus Torvalds src = tw->tw_rcv_saddr; 24981da177e4SLinus Torvalds destp = ntohs(tw->tw_dport); 24991da177e4SLinus Torvalds srcp = ntohs(tw->tw_sport); 25001da177e4SLinus Torvalds 25015e659e4cSPavel Emelyanov seq_printf(f, "%4d: %08X:%04X %08X:%04X" 250271338aa7SDan Rosenberg " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n", 25031da177e4SLinus Torvalds i, src, srcp, dest, destp, tw->tw_substate, 0, 0, 25041da177e4SLinus Torvalds 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0, 25055e659e4cSPavel Emelyanov atomic_read(&tw->tw_refcnt), tw, len); 25061da177e4SLinus Torvalds } 25071da177e4SLinus Torvalds 25081da177e4SLinus Torvalds #define TMPSZ 150 25091da177e4SLinus Torvalds 25101da177e4SLinus Torvalds static int tcp4_seq_show(struct seq_file *seq, void *v) 25111da177e4SLinus Torvalds { 25121da177e4SLinus Torvalds struct tcp_iter_state *st; 25135e659e4cSPavel Emelyanov int len; 25141da177e4SLinus Torvalds 25151da177e4SLinus Torvalds if (v == SEQ_START_TOKEN) { 25161da177e4SLinus Torvalds seq_printf(seq, "%-*s\n", TMPSZ - 1, 25171da177e4SLinus Torvalds " sl local_address rem_address st tx_queue " 25181da177e4SLinus Torvalds "rx_queue tr tm->when retrnsmt uid timeout " 25191da177e4SLinus Torvalds "inode"); 25201da177e4SLinus Torvalds goto out; 25211da177e4SLinus Torvalds } 25221da177e4SLinus Torvalds st = seq->private; 25231da177e4SLinus Torvalds 25241da177e4SLinus Torvalds switch (st->state) { 25251da177e4SLinus Torvalds case TCP_SEQ_STATE_LISTENING: 25261da177e4SLinus Torvalds case TCP_SEQ_STATE_ESTABLISHED: 25275e659e4cSPavel Emelyanov get_tcp4_sock(v, seq, st->num, &len); 25281da177e4SLinus Torvalds break; 25291da177e4SLinus Torvalds case TCP_SEQ_STATE_OPENREQ: 25305e659e4cSPavel Emelyanov get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len); 25311da177e4SLinus Torvalds break; 25321da177e4SLinus Torvalds case TCP_SEQ_STATE_TIME_WAIT: 25335e659e4cSPavel Emelyanov get_timewait4_sock(v, seq, st->num, &len); 25341da177e4SLinus Torvalds break; 25351da177e4SLinus Torvalds } 25365e659e4cSPavel Emelyanov seq_printf(seq, "%*s\n", TMPSZ - 1 - len, ""); 25371da177e4SLinus Torvalds out: 25381da177e4SLinus Torvalds return 0; 25391da177e4SLinus Torvalds } 25401da177e4SLinus Torvalds 254173cb88ecSArjan van de Ven static const struct file_operations tcp_afinfo_seq_fops = { 254273cb88ecSArjan van de Ven .owner = THIS_MODULE, 254373cb88ecSArjan van de Ven .open = tcp_seq_open, 254473cb88ecSArjan van de Ven .read = seq_read, 254573cb88ecSArjan van de Ven .llseek = seq_lseek, 254673cb88ecSArjan van de Ven .release = seq_release_net 254773cb88ecSArjan van de Ven }; 254873cb88ecSArjan van de Ven 25491da177e4SLinus Torvalds static struct tcp_seq_afinfo tcp4_seq_afinfo = { 25501da177e4SLinus Torvalds .name = "tcp", 25511da177e4SLinus Torvalds .family = AF_INET, 255273cb88ecSArjan van de Ven .seq_fops = &tcp_afinfo_seq_fops, 25539427c4b3SDenis V. Lunev .seq_ops = { 25549427c4b3SDenis V. Lunev .show = tcp4_seq_show, 25559427c4b3SDenis V. Lunev }, 25561da177e4SLinus Torvalds }; 25571da177e4SLinus Torvalds 25582c8c1e72SAlexey Dobriyan static int __net_init tcp4_proc_init_net(struct net *net) 2559757764f6SPavel Emelyanov { 2560757764f6SPavel Emelyanov return tcp_proc_register(net, &tcp4_seq_afinfo); 2561757764f6SPavel Emelyanov } 2562757764f6SPavel Emelyanov 25632c8c1e72SAlexey Dobriyan static void __net_exit tcp4_proc_exit_net(struct net *net) 2564757764f6SPavel Emelyanov { 2565757764f6SPavel Emelyanov tcp_proc_unregister(net, &tcp4_seq_afinfo); 2566757764f6SPavel Emelyanov } 2567757764f6SPavel Emelyanov 2568757764f6SPavel Emelyanov static struct pernet_operations tcp4_net_ops = { 2569757764f6SPavel Emelyanov .init = tcp4_proc_init_net, 2570757764f6SPavel Emelyanov .exit = tcp4_proc_exit_net, 2571757764f6SPavel Emelyanov }; 2572757764f6SPavel Emelyanov 25731da177e4SLinus Torvalds int __init tcp4_proc_init(void) 25741da177e4SLinus Torvalds { 2575757764f6SPavel Emelyanov return register_pernet_subsys(&tcp4_net_ops); 25761da177e4SLinus Torvalds } 25771da177e4SLinus Torvalds 25781da177e4SLinus Torvalds void tcp4_proc_exit(void) 25791da177e4SLinus Torvalds { 2580757764f6SPavel Emelyanov unregister_pernet_subsys(&tcp4_net_ops); 25811da177e4SLinus Torvalds } 25821da177e4SLinus Torvalds #endif /* CONFIG_PROC_FS */ 25831da177e4SLinus Torvalds 2584bf296b12SHerbert Xu struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) 2585bf296b12SHerbert Xu { 2586b71d1d42SEric Dumazet const struct iphdr *iph = skb_gro_network_header(skb); 2587bf296b12SHerbert Xu 2588bf296b12SHerbert Xu switch (skb->ip_summed) { 2589bf296b12SHerbert Xu case CHECKSUM_COMPLETE: 259086911732SHerbert Xu if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, 2591bf296b12SHerbert Xu skb->csum)) { 2592bf296b12SHerbert Xu skb->ip_summed = CHECKSUM_UNNECESSARY; 2593bf296b12SHerbert Xu break; 2594bf296b12SHerbert Xu } 2595bf296b12SHerbert Xu 2596bf296b12SHerbert Xu /* fall through */ 2597bf296b12SHerbert Xu case CHECKSUM_NONE: 2598bf296b12SHerbert Xu NAPI_GRO_CB(skb)->flush = 1; 2599bf296b12SHerbert Xu return NULL; 2600bf296b12SHerbert Xu } 2601bf296b12SHerbert Xu 2602bf296b12SHerbert Xu return tcp_gro_receive(head, skb); 2603bf296b12SHerbert Xu } 2604bf296b12SHerbert Xu 2605bf296b12SHerbert Xu int tcp4_gro_complete(struct sk_buff *skb) 2606bf296b12SHerbert Xu { 2607b71d1d42SEric Dumazet const struct iphdr *iph = ip_hdr(skb); 2608bf296b12SHerbert Xu struct tcphdr *th = tcp_hdr(skb); 2609bf296b12SHerbert Xu 2610bf296b12SHerbert Xu th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), 2611bf296b12SHerbert Xu iph->saddr, iph->daddr, 0); 2612bf296b12SHerbert Xu skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; 2613bf296b12SHerbert Xu 2614bf296b12SHerbert Xu return tcp_gro_complete(skb); 2615bf296b12SHerbert Xu } 2616bf296b12SHerbert Xu 26171da177e4SLinus Torvalds struct proto tcp_prot = { 26181da177e4SLinus Torvalds .name = "TCP", 26191da177e4SLinus Torvalds .owner = THIS_MODULE, 26201da177e4SLinus Torvalds .close = tcp_close, 26211da177e4SLinus Torvalds .connect = tcp_v4_connect, 26221da177e4SLinus Torvalds .disconnect = tcp_disconnect, 2623463c84b9SArnaldo Carvalho de Melo .accept = inet_csk_accept, 26241da177e4SLinus Torvalds .ioctl = tcp_ioctl, 26251da177e4SLinus Torvalds .init = tcp_v4_init_sock, 26261da177e4SLinus Torvalds .destroy = tcp_v4_destroy_sock, 26271da177e4SLinus Torvalds .shutdown = tcp_shutdown, 26281da177e4SLinus Torvalds .setsockopt = tcp_setsockopt, 26291da177e4SLinus Torvalds .getsockopt = tcp_getsockopt, 26301da177e4SLinus Torvalds .recvmsg = tcp_recvmsg, 26317ba42910SChangli Gao .sendmsg = tcp_sendmsg, 26327ba42910SChangli Gao .sendpage = tcp_sendpage, 26331da177e4SLinus Torvalds .backlog_rcv = tcp_v4_do_rcv, 2634ab1e0a13SArnaldo Carvalho de Melo .hash = inet_hash, 2635ab1e0a13SArnaldo Carvalho de Melo .unhash = inet_unhash, 2636ab1e0a13SArnaldo Carvalho de Melo .get_port = inet_csk_get_port, 26371da177e4SLinus Torvalds .enter_memory_pressure = tcp_enter_memory_pressure, 26381da177e4SLinus Torvalds .sockets_allocated = &tcp_sockets_allocated, 26390a5578cfSArnaldo Carvalho de Melo .orphan_count = &tcp_orphan_count, 26401da177e4SLinus Torvalds .memory_allocated = &tcp_memory_allocated, 26411da177e4SLinus Torvalds .memory_pressure = &tcp_memory_pressure, 26421da177e4SLinus Torvalds .sysctl_wmem = sysctl_tcp_wmem, 26431da177e4SLinus Torvalds .sysctl_rmem = sysctl_tcp_rmem, 26441da177e4SLinus Torvalds .max_header = MAX_TCP_HEADER, 26451da177e4SLinus Torvalds .obj_size = sizeof(struct tcp_sock), 26463ab5aee7SEric Dumazet .slab_flags = SLAB_DESTROY_BY_RCU, 26476d6ee43eSArnaldo Carvalho de Melo .twsk_prot = &tcp_timewait_sock_ops, 264860236fddSArnaldo Carvalho de Melo .rsk_prot = &tcp_request_sock_ops, 264939d8cda7SPavel Emelyanov .h.hashinfo = &tcp_hashinfo, 26507ba42910SChangli Gao .no_autobind = true, 2651543d9cfeSArnaldo Carvalho de Melo #ifdef CONFIG_COMPAT 2652543d9cfeSArnaldo Carvalho de Melo .compat_setsockopt = compat_tcp_setsockopt, 2653543d9cfeSArnaldo Carvalho de Melo .compat_getsockopt = compat_tcp_getsockopt, 2654543d9cfeSArnaldo Carvalho de Melo #endif 2655d1a4c0b3SGlauber Costa #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM 2656d1a4c0b3SGlauber Costa .init_cgroup = tcp_init_cgroup, 2657d1a4c0b3SGlauber Costa .destroy_cgroup = tcp_destroy_cgroup, 2658d1a4c0b3SGlauber Costa .proto_cgroup = tcp_proto_cgroup, 2659d1a4c0b3SGlauber Costa #endif 26601da177e4SLinus Torvalds }; 26614bc2f18bSEric Dumazet EXPORT_SYMBOL(tcp_prot); 26621da177e4SLinus Torvalds 2663046ee902SDenis V. Lunev static int __net_init tcp_sk_init(struct net *net) 2664046ee902SDenis V. Lunev { 2665046ee902SDenis V. Lunev return inet_ctl_sock_create(&net->ipv4.tcp_sock, 2666046ee902SDenis V. Lunev PF_INET, SOCK_RAW, IPPROTO_TCP, net); 2667046ee902SDenis V. Lunev } 2668046ee902SDenis V. Lunev 2669046ee902SDenis V. Lunev static void __net_exit tcp_sk_exit(struct net *net) 2670046ee902SDenis V. Lunev { 2671046ee902SDenis V. Lunev inet_ctl_sock_destroy(net->ipv4.tcp_sock); 2672b099ce26SEric W. Biederman } 2673b099ce26SEric W. Biederman 2674b099ce26SEric W. Biederman static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) 2675b099ce26SEric W. Biederman { 2676b099ce26SEric W. Biederman inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET); 2677046ee902SDenis V. Lunev } 2678046ee902SDenis V. Lunev 2679046ee902SDenis V. Lunev static struct pernet_operations __net_initdata tcp_sk_ops = { 2680046ee902SDenis V. Lunev .init = tcp_sk_init, 2681046ee902SDenis V. Lunev .exit = tcp_sk_exit, 2682b099ce26SEric W. Biederman .exit_batch = tcp_sk_exit_batch, 2683046ee902SDenis V. Lunev }; 2684046ee902SDenis V. Lunev 26859b0f976fSDenis V. Lunev void __init tcp_v4_init(void) 26861da177e4SLinus Torvalds { 26875caea4eaSEric Dumazet inet_hashinfo_init(&tcp_hashinfo); 26886a1b3054SEric W. Biederman if (register_pernet_subsys(&tcp_sk_ops)) 26891da177e4SLinus Torvalds panic("Failed to create the TCP control socket.\n"); 26901da177e4SLinus Torvalds } 2691