xref: /linux/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c (revision 9410645520e9b820069761f3450ef6661418e279)
1a7471224SKuniyuki Iwashima // SPDX-License-Identifier: GPL-2.0
2a7471224SKuniyuki Iwashima /* Copyright Amazon.com Inc. or its affiliates. */
3a7471224SKuniyuki Iwashima 
4a7471224SKuniyuki Iwashima #include "vmlinux.h"
5a7471224SKuniyuki Iwashima 
6a7471224SKuniyuki Iwashima #include <bpf/bpf_helpers.h>
7a7471224SKuniyuki Iwashima #include <bpf/bpf_endian.h>
8a7471224SKuniyuki Iwashima #include "bpf_tracing_net.h"
9a7471224SKuniyuki Iwashima #include "bpf_kfuncs.h"
10a7471224SKuniyuki Iwashima #include "test_siphash.h"
11a7471224SKuniyuki Iwashima #include "test_tcp_custom_syncookie.h"
12a1297871SJiri Olsa #include "bpf_misc.h"
13a7471224SKuniyuki Iwashima 
14b546b575SEduard Zingerman #define MAX_PACKET_OFF 0xffff
15b546b575SEduard Zingerman 
16a7471224SKuniyuki Iwashima /* Hash is calculated for each client and split into ISN and TS.
17a7471224SKuniyuki Iwashima  *
18a7471224SKuniyuki Iwashima  *       MSB                                   LSB
19a7471224SKuniyuki Iwashima  * ISN:  | 31 ... 8 | 7 6 |   5 |    4 | 3 2 1 0 |
20a7471224SKuniyuki Iwashima  *       |   Hash_1 | MSS | ECN | SACK |  WScale |
21a7471224SKuniyuki Iwashima  *
22a7471224SKuniyuki Iwashima  * TS:   | 31 ... 8 |          7 ... 0           |
23a7471224SKuniyuki Iwashima  *       |   Random |           Hash_2           |
24a7471224SKuniyuki Iwashima  */
25a7471224SKuniyuki Iwashima #define COOKIE_BITS	8
26a7471224SKuniyuki Iwashima #define COOKIE_MASK	(((__u32)1 << COOKIE_BITS) - 1)
27a7471224SKuniyuki Iwashima 
28a7471224SKuniyuki Iwashima enum {
29a7471224SKuniyuki Iwashima 	/* 0xf is invalid thus means that SYN did not have WScale. */
30a7471224SKuniyuki Iwashima 	BPF_SYNCOOKIE_WSCALE_MASK	= (1 << 4) - 1,
31a7471224SKuniyuki Iwashima 	BPF_SYNCOOKIE_SACK		= (1 << 4),
32a7471224SKuniyuki Iwashima 	BPF_SYNCOOKIE_ECN		= (1 << 5),
33a7471224SKuniyuki Iwashima };
34a7471224SKuniyuki Iwashima 
35a7471224SKuniyuki Iwashima #define MSS_LOCAL_IPV4	65495
36a7471224SKuniyuki Iwashima #define MSS_LOCAL_IPV6	65476
37a7471224SKuniyuki Iwashima 
38a7471224SKuniyuki Iwashima const __u16 msstab4[] = {
39a7471224SKuniyuki Iwashima 	536,
40a7471224SKuniyuki Iwashima 	1300,
41a7471224SKuniyuki Iwashima 	1460,
42a7471224SKuniyuki Iwashima 	MSS_LOCAL_IPV4,
43a7471224SKuniyuki Iwashima };
44a7471224SKuniyuki Iwashima 
45a7471224SKuniyuki Iwashima const __u16 msstab6[] = {
46a7471224SKuniyuki Iwashima 	1280 - 60, /* IPV6_MIN_MTU - 60 */
47a7471224SKuniyuki Iwashima 	1480 - 60,
48a7471224SKuniyuki Iwashima 	9000 - 60,
49a7471224SKuniyuki Iwashima 	MSS_LOCAL_IPV6,
50a7471224SKuniyuki Iwashima };
51a7471224SKuniyuki Iwashima 
52a7471224SKuniyuki Iwashima static siphash_key_t test_key_siphash = {
53a7471224SKuniyuki Iwashima 	{ 0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL }
54a7471224SKuniyuki Iwashima };
55a7471224SKuniyuki Iwashima 
56a7471224SKuniyuki Iwashima struct tcp_syncookie {
57a7471224SKuniyuki Iwashima 	struct __sk_buff *skb;
58b546b575SEduard Zingerman 	void *data;
59a7471224SKuniyuki Iwashima 	void *data_end;
60a7471224SKuniyuki Iwashima 	struct ethhdr *eth;
61a7471224SKuniyuki Iwashima 	struct iphdr *ipv4;
62a7471224SKuniyuki Iwashima 	struct ipv6hdr *ipv6;
63a7471224SKuniyuki Iwashima 	struct tcphdr *tcp;
64a7471224SKuniyuki Iwashima 	__be32 *ptr32;
65a7471224SKuniyuki Iwashima 	struct bpf_tcp_req_attrs attrs;
66b546b575SEduard Zingerman 	u32 off;
67a7471224SKuniyuki Iwashima 	u32 cookie;
68a7471224SKuniyuki Iwashima 	u64 first;
69a7471224SKuniyuki Iwashima };
70a7471224SKuniyuki Iwashima 
71a7471224SKuniyuki Iwashima bool handled_syn, handled_ack;
72a7471224SKuniyuki Iwashima 
tcp_load_headers(struct tcp_syncookie * ctx)73a7471224SKuniyuki Iwashima static int tcp_load_headers(struct tcp_syncookie *ctx)
74a7471224SKuniyuki Iwashima {
75b546b575SEduard Zingerman 	ctx->data = (void *)(long)ctx->skb->data;
76a7471224SKuniyuki Iwashima 	ctx->data_end = (void *)(long)ctx->skb->data_end;
77a7471224SKuniyuki Iwashima 	ctx->eth = (struct ethhdr *)(long)ctx->skb->data;
78a7471224SKuniyuki Iwashima 
79a7471224SKuniyuki Iwashima 	if (ctx->eth + 1 > ctx->data_end)
80a7471224SKuniyuki Iwashima 		goto err;
81a7471224SKuniyuki Iwashima 
82a7471224SKuniyuki Iwashima 	switch (bpf_ntohs(ctx->eth->h_proto)) {
83a7471224SKuniyuki Iwashima 	case ETH_P_IP:
84a7471224SKuniyuki Iwashima 		ctx->ipv4 = (struct iphdr *)(ctx->eth + 1);
85a7471224SKuniyuki Iwashima 
86a7471224SKuniyuki Iwashima 		if (ctx->ipv4 + 1 > ctx->data_end)
87a7471224SKuniyuki Iwashima 			goto err;
88a7471224SKuniyuki Iwashima 
89a7471224SKuniyuki Iwashima 		if (ctx->ipv4->ihl != sizeof(*ctx->ipv4) / 4)
90a7471224SKuniyuki Iwashima 			goto err;
91a7471224SKuniyuki Iwashima 
92a7471224SKuniyuki Iwashima 		if (ctx->ipv4->version != 4)
93a7471224SKuniyuki Iwashima 			goto err;
94a7471224SKuniyuki Iwashima 
95a7471224SKuniyuki Iwashima 		if (ctx->ipv4->protocol != IPPROTO_TCP)
96a7471224SKuniyuki Iwashima 			goto err;
97a7471224SKuniyuki Iwashima 
98a7471224SKuniyuki Iwashima 		ctx->tcp = (struct tcphdr *)(ctx->ipv4 + 1);
99a7471224SKuniyuki Iwashima 		break;
100a7471224SKuniyuki Iwashima 	case ETH_P_IPV6:
101a7471224SKuniyuki Iwashima 		ctx->ipv6 = (struct ipv6hdr *)(ctx->eth + 1);
102a7471224SKuniyuki Iwashima 
103a7471224SKuniyuki Iwashima 		if (ctx->ipv6 + 1 > ctx->data_end)
104a7471224SKuniyuki Iwashima 			goto err;
105a7471224SKuniyuki Iwashima 
106a7471224SKuniyuki Iwashima 		if (ctx->ipv6->version != 6)
107a7471224SKuniyuki Iwashima 			goto err;
108a7471224SKuniyuki Iwashima 
109a7471224SKuniyuki Iwashima 		if (ctx->ipv6->nexthdr != NEXTHDR_TCP)
110a7471224SKuniyuki Iwashima 			goto err;
111a7471224SKuniyuki Iwashima 
112a7471224SKuniyuki Iwashima 		ctx->tcp = (struct tcphdr *)(ctx->ipv6 + 1);
113a7471224SKuniyuki Iwashima 		break;
114a7471224SKuniyuki Iwashima 	default:
115a7471224SKuniyuki Iwashima 		goto err;
116a7471224SKuniyuki Iwashima 	}
117a7471224SKuniyuki Iwashima 
118a7471224SKuniyuki Iwashima 	if (ctx->tcp + 1 > ctx->data_end)
119a7471224SKuniyuki Iwashima 		goto err;
120a7471224SKuniyuki Iwashima 
121a7471224SKuniyuki Iwashima 	return 0;
122a7471224SKuniyuki Iwashima err:
123a7471224SKuniyuki Iwashima 	return -1;
124a7471224SKuniyuki Iwashima }
125a7471224SKuniyuki Iwashima 
tcp_reload_headers(struct tcp_syncookie * ctx)126a7471224SKuniyuki Iwashima static int tcp_reload_headers(struct tcp_syncookie *ctx)
127a7471224SKuniyuki Iwashima {
128a7471224SKuniyuki Iwashima 	/* Without volatile,
129a7471224SKuniyuki Iwashima 	 * R3 32-bit pointer arithmetic prohibited
130a7471224SKuniyuki Iwashima 	 */
131a7471224SKuniyuki Iwashima 	volatile u64 data_len = ctx->skb->data_end - ctx->skb->data;
132a7471224SKuniyuki Iwashima 
133a7471224SKuniyuki Iwashima 	if (ctx->tcp->doff < sizeof(*ctx->tcp) / 4)
134a7471224SKuniyuki Iwashima 		goto err;
135a7471224SKuniyuki Iwashima 
136a7471224SKuniyuki Iwashima 	/* Needed to calculate csum and parse TCP options. */
137a7471224SKuniyuki Iwashima 	if (bpf_skb_change_tail(ctx->skb, data_len + 60 - ctx->tcp->doff * 4, 0))
138a7471224SKuniyuki Iwashima 		goto err;
139a7471224SKuniyuki Iwashima 
140b546b575SEduard Zingerman 	ctx->data = (void *)(long)ctx->skb->data;
141a7471224SKuniyuki Iwashima 	ctx->data_end = (void *)(long)ctx->skb->data_end;
142a7471224SKuniyuki Iwashima 	ctx->eth = (struct ethhdr *)(long)ctx->skb->data;
143a7471224SKuniyuki Iwashima 	if (ctx->ipv4) {
144a7471224SKuniyuki Iwashima 		ctx->ipv4 = (struct iphdr *)(ctx->eth + 1);
145a7471224SKuniyuki Iwashima 		ctx->ipv6 = NULL;
146a7471224SKuniyuki Iwashima 		ctx->tcp = (struct tcphdr *)(ctx->ipv4 + 1);
147a7471224SKuniyuki Iwashima 	} else {
148a7471224SKuniyuki Iwashima 		ctx->ipv4 = NULL;
149a7471224SKuniyuki Iwashima 		ctx->ipv6 = (struct ipv6hdr *)(ctx->eth + 1);
150a7471224SKuniyuki Iwashima 		ctx->tcp = (struct tcphdr *)(ctx->ipv6 + 1);
151a7471224SKuniyuki Iwashima 	}
152a7471224SKuniyuki Iwashima 
153a7471224SKuniyuki Iwashima 	if ((void *)ctx->tcp + 60 > ctx->data_end)
154a7471224SKuniyuki Iwashima 		goto err;
155a7471224SKuniyuki Iwashima 
156a7471224SKuniyuki Iwashima 	return 0;
157a7471224SKuniyuki Iwashima err:
158a7471224SKuniyuki Iwashima 	return -1;
159a7471224SKuniyuki Iwashima }
160a7471224SKuniyuki Iwashima 
tcp_v4_csum(struct tcp_syncookie * ctx,__wsum csum)161a7471224SKuniyuki Iwashima static __sum16 tcp_v4_csum(struct tcp_syncookie *ctx, __wsum csum)
162a7471224SKuniyuki Iwashima {
163a7471224SKuniyuki Iwashima 	return csum_tcpudp_magic(ctx->ipv4->saddr, ctx->ipv4->daddr,
164a7471224SKuniyuki Iwashima 				 ctx->tcp->doff * 4, IPPROTO_TCP, csum);
165a7471224SKuniyuki Iwashima }
166a7471224SKuniyuki Iwashima 
tcp_v6_csum(struct tcp_syncookie * ctx,__wsum csum)167a7471224SKuniyuki Iwashima static __sum16 tcp_v6_csum(struct tcp_syncookie *ctx, __wsum csum)
168a7471224SKuniyuki Iwashima {
169a7471224SKuniyuki Iwashima 	return csum_ipv6_magic(&ctx->ipv6->saddr, &ctx->ipv6->daddr,
170a7471224SKuniyuki Iwashima 			       ctx->tcp->doff * 4, IPPROTO_TCP, csum);
171a7471224SKuniyuki Iwashima }
172a7471224SKuniyuki Iwashima 
tcp_validate_header(struct tcp_syncookie * ctx)173a7471224SKuniyuki Iwashima static int tcp_validate_header(struct tcp_syncookie *ctx)
174a7471224SKuniyuki Iwashima {
175a7471224SKuniyuki Iwashima 	s64 csum;
176a7471224SKuniyuki Iwashima 
177a7471224SKuniyuki Iwashima 	if (tcp_reload_headers(ctx))
178a7471224SKuniyuki Iwashima 		goto err;
179a7471224SKuniyuki Iwashima 
180a7471224SKuniyuki Iwashima 	csum = bpf_csum_diff(0, 0, (void *)ctx->tcp, ctx->tcp->doff * 4, 0);
181a7471224SKuniyuki Iwashima 	if (csum < 0)
182a7471224SKuniyuki Iwashima 		goto err;
183a7471224SKuniyuki Iwashima 
184a7471224SKuniyuki Iwashima 	if (ctx->ipv4) {
185a7471224SKuniyuki Iwashima 		/* check tcp_v4_csum(csum) is 0 if not on lo. */
186a7471224SKuniyuki Iwashima 
187a7471224SKuniyuki Iwashima 		csum = bpf_csum_diff(0, 0, (void *)ctx->ipv4, ctx->ipv4->ihl * 4, 0);
188a7471224SKuniyuki Iwashima 		if (csum < 0)
189a7471224SKuniyuki Iwashima 			goto err;
190a7471224SKuniyuki Iwashima 
191a7471224SKuniyuki Iwashima 		if (csum_fold(csum) != 0)
192a7471224SKuniyuki Iwashima 			goto err;
193a7471224SKuniyuki Iwashima 	} else if (ctx->ipv6) {
194a7471224SKuniyuki Iwashima 		/* check tcp_v6_csum(csum) is 0 if not on lo. */
195a7471224SKuniyuki Iwashima 	}
196a7471224SKuniyuki Iwashima 
197a7471224SKuniyuki Iwashima 	return 0;
198a7471224SKuniyuki Iwashima err:
199a7471224SKuniyuki Iwashima 	return -1;
200a7471224SKuniyuki Iwashima }
201a7471224SKuniyuki Iwashima 
next(struct tcp_syncookie * ctx,__u32 sz)202b546b575SEduard Zingerman static __always_inline void *next(struct tcp_syncookie *ctx, __u32 sz)
203b546b575SEduard Zingerman {
204b546b575SEduard Zingerman 	__u64 off = ctx->off;
205b546b575SEduard Zingerman 	__u8 *data;
206b546b575SEduard Zingerman 
207b546b575SEduard Zingerman 	/* Verifier forbids access to packet when offset exceeds MAX_PACKET_OFF */
208b546b575SEduard Zingerman 	if (off > MAX_PACKET_OFF - sz)
209b546b575SEduard Zingerman 		return NULL;
210b546b575SEduard Zingerman 
211b546b575SEduard Zingerman 	data = ctx->data + off;
212b546b575SEduard Zingerman 	barrier_var(data);
213b546b575SEduard Zingerman 	if (data + sz >= ctx->data_end)
214b546b575SEduard Zingerman 		return NULL;
215b546b575SEduard Zingerman 
216b546b575SEduard Zingerman 	ctx->off += sz;
217b546b575SEduard Zingerman 	return data;
218b546b575SEduard Zingerman }
219b546b575SEduard Zingerman 
tcp_parse_option(__u32 index,struct tcp_syncookie * ctx)220a7471224SKuniyuki Iwashima static int tcp_parse_option(__u32 index, struct tcp_syncookie *ctx)
221a7471224SKuniyuki Iwashima {
222b546b575SEduard Zingerman 	__u8 *opcode, *opsize, *wscale;
223b546b575SEduard Zingerman 	__u32 *tsval, *tsecr;
224b546b575SEduard Zingerman 	__u16 *mss;
225b546b575SEduard Zingerman 	__u32 off;
226a7471224SKuniyuki Iwashima 
227b546b575SEduard Zingerman 	off = ctx->off;
228b546b575SEduard Zingerman 	opcode = next(ctx, 1);
229b546b575SEduard Zingerman 	if (!opcode)
230a7471224SKuniyuki Iwashima 		goto stop;
231a7471224SKuniyuki Iwashima 
232b546b575SEduard Zingerman 	if (*opcode == TCPOPT_EOL)
233a7471224SKuniyuki Iwashima 		goto stop;
234a7471224SKuniyuki Iwashima 
235b546b575SEduard Zingerman 	if (*opcode == TCPOPT_NOP)
236a7471224SKuniyuki Iwashima 		goto next;
237a7471224SKuniyuki Iwashima 
238b546b575SEduard Zingerman 	opsize = next(ctx, 1);
239b546b575SEduard Zingerman 	if (!opsize)
240a7471224SKuniyuki Iwashima 		goto stop;
241a7471224SKuniyuki Iwashima 
242b546b575SEduard Zingerman 	if (*opsize < 2)
243a7471224SKuniyuki Iwashima 		goto stop;
244a7471224SKuniyuki Iwashima 
245b546b575SEduard Zingerman 	switch (*opcode) {
246a7471224SKuniyuki Iwashima 	case TCPOPT_MSS:
247b546b575SEduard Zingerman 		mss = next(ctx, 2);
248b546b575SEduard Zingerman 		if (*opsize == TCPOLEN_MSS && ctx->tcp->syn && mss)
249b546b575SEduard Zingerman 			ctx->attrs.mss = get_unaligned_be16(mss);
250a7471224SKuniyuki Iwashima 		break;
251a7471224SKuniyuki Iwashima 	case TCPOPT_WINDOW:
252b546b575SEduard Zingerman 		wscale = next(ctx, 1);
253b546b575SEduard Zingerman 		if (*opsize == TCPOLEN_WINDOW && ctx->tcp->syn && wscale) {
254a7471224SKuniyuki Iwashima 			ctx->attrs.wscale_ok = 1;
255b546b575SEduard Zingerman 			ctx->attrs.snd_wscale = *wscale;
256a7471224SKuniyuki Iwashima 		}
257a7471224SKuniyuki Iwashima 		break;
258a7471224SKuniyuki Iwashima 	case TCPOPT_TIMESTAMP:
259b546b575SEduard Zingerman 		tsval = next(ctx, 4);
260b546b575SEduard Zingerman 		tsecr = next(ctx, 4);
261b546b575SEduard Zingerman 		if (*opsize == TCPOLEN_TIMESTAMP && tsval && tsecr) {
262b546b575SEduard Zingerman 			ctx->attrs.rcv_tsval = get_unaligned_be32(tsval);
263b546b575SEduard Zingerman 			ctx->attrs.rcv_tsecr = get_unaligned_be32(tsecr);
264a7471224SKuniyuki Iwashima 
265a7471224SKuniyuki Iwashima 			if (ctx->tcp->syn && ctx->attrs.rcv_tsecr)
266a7471224SKuniyuki Iwashima 				ctx->attrs.tstamp_ok = 0;
267a7471224SKuniyuki Iwashima 			else
268a7471224SKuniyuki Iwashima 				ctx->attrs.tstamp_ok = 1;
269a7471224SKuniyuki Iwashima 		}
270a7471224SKuniyuki Iwashima 		break;
271a7471224SKuniyuki Iwashima 	case TCPOPT_SACK_PERM:
272b546b575SEduard Zingerman 		if (*opsize == TCPOLEN_SACK_PERM && ctx->tcp->syn)
273a7471224SKuniyuki Iwashima 			ctx->attrs.sack_ok = 1;
274a7471224SKuniyuki Iwashima 		break;
275a7471224SKuniyuki Iwashima 	}
276a7471224SKuniyuki Iwashima 
277b546b575SEduard Zingerman 	ctx->off = off + *opsize;
278a7471224SKuniyuki Iwashima next:
279a7471224SKuniyuki Iwashima 	return 0;
280a7471224SKuniyuki Iwashima stop:
281a7471224SKuniyuki Iwashima 	return 1;
282a7471224SKuniyuki Iwashima }
283a7471224SKuniyuki Iwashima 
tcp_parse_options(struct tcp_syncookie * ctx)284a7471224SKuniyuki Iwashima static void tcp_parse_options(struct tcp_syncookie *ctx)
285a7471224SKuniyuki Iwashima {
286b546b575SEduard Zingerman 	ctx->off = (__u8 *)(ctx->tcp + 1) - (__u8 *)ctx->data,
287a7471224SKuniyuki Iwashima 
288a7471224SKuniyuki Iwashima 	bpf_loop(40, tcp_parse_option, ctx, 0);
289a7471224SKuniyuki Iwashima }
290a7471224SKuniyuki Iwashima 
tcp_validate_sysctl(struct tcp_syncookie * ctx)291a7471224SKuniyuki Iwashima static int tcp_validate_sysctl(struct tcp_syncookie *ctx)
292a7471224SKuniyuki Iwashima {
293a7471224SKuniyuki Iwashima 	if ((ctx->ipv4 && ctx->attrs.mss != MSS_LOCAL_IPV4) ||
294a7471224SKuniyuki Iwashima 	    (ctx->ipv6 && ctx->attrs.mss != MSS_LOCAL_IPV6))
295a7471224SKuniyuki Iwashima 		goto err;
296a7471224SKuniyuki Iwashima 
297a7471224SKuniyuki Iwashima 	if (!ctx->attrs.wscale_ok || ctx->attrs.snd_wscale != 7)
298a7471224SKuniyuki Iwashima 		goto err;
299a7471224SKuniyuki Iwashima 
300a7471224SKuniyuki Iwashima 	if (!ctx->attrs.tstamp_ok)
301a7471224SKuniyuki Iwashima 		goto err;
302a7471224SKuniyuki Iwashima 
303a7471224SKuniyuki Iwashima 	if (!ctx->attrs.sack_ok)
304a7471224SKuniyuki Iwashima 		goto err;
305a7471224SKuniyuki Iwashima 
306a7471224SKuniyuki Iwashima 	if (!ctx->tcp->ece || !ctx->tcp->cwr)
307a7471224SKuniyuki Iwashima 		goto err;
308a7471224SKuniyuki Iwashima 
309a7471224SKuniyuki Iwashima 	return 0;
310a7471224SKuniyuki Iwashima err:
311a7471224SKuniyuki Iwashima 	return -1;
312a7471224SKuniyuki Iwashima }
313a7471224SKuniyuki Iwashima 
tcp_prepare_cookie(struct tcp_syncookie * ctx)314a7471224SKuniyuki Iwashima static void tcp_prepare_cookie(struct tcp_syncookie *ctx)
315a7471224SKuniyuki Iwashima {
316a7471224SKuniyuki Iwashima 	u32 seq = bpf_ntohl(ctx->tcp->seq);
317a7471224SKuniyuki Iwashima 	u64 first = 0, second;
318a7471224SKuniyuki Iwashima 	int mssind = 0;
319a7471224SKuniyuki Iwashima 	u32 hash;
320a7471224SKuniyuki Iwashima 
321a7471224SKuniyuki Iwashima 	if (ctx->ipv4) {
322a7471224SKuniyuki Iwashima 		for (mssind = ARRAY_SIZE(msstab4) - 1; mssind; mssind--)
323a7471224SKuniyuki Iwashima 			if (ctx->attrs.mss >= msstab4[mssind])
324a7471224SKuniyuki Iwashima 				break;
325a7471224SKuniyuki Iwashima 
326a7471224SKuniyuki Iwashima 		ctx->attrs.mss = msstab4[mssind];
327a7471224SKuniyuki Iwashima 
328a7471224SKuniyuki Iwashima 		first = (u64)ctx->ipv4->saddr << 32 | ctx->ipv4->daddr;
329a7471224SKuniyuki Iwashima 	} else if (ctx->ipv6) {
330a7471224SKuniyuki Iwashima 		for (mssind = ARRAY_SIZE(msstab6) - 1; mssind; mssind--)
331a7471224SKuniyuki Iwashima 			if (ctx->attrs.mss >= msstab6[mssind])
332a7471224SKuniyuki Iwashima 				break;
333a7471224SKuniyuki Iwashima 
334a7471224SKuniyuki Iwashima 		ctx->attrs.mss = msstab6[mssind];
335a7471224SKuniyuki Iwashima 
336a7471224SKuniyuki Iwashima 		first = (u64)ctx->ipv6->saddr.in6_u.u6_addr8[0] << 32 |
337a7471224SKuniyuki Iwashima 			ctx->ipv6->daddr.in6_u.u6_addr32[0];
338a7471224SKuniyuki Iwashima 	}
339a7471224SKuniyuki Iwashima 
340a7471224SKuniyuki Iwashima 	second = (u64)seq << 32 | ctx->tcp->source << 16 | ctx->tcp->dest;
341a7471224SKuniyuki Iwashima 	hash = siphash_2u64(first, second, &test_key_siphash);
342a7471224SKuniyuki Iwashima 
343a7471224SKuniyuki Iwashima 	if (ctx->attrs.tstamp_ok) {
344a7471224SKuniyuki Iwashima 		ctx->attrs.rcv_tsecr = bpf_get_prandom_u32();
345a7471224SKuniyuki Iwashima 		ctx->attrs.rcv_tsecr &= ~COOKIE_MASK;
346a7471224SKuniyuki Iwashima 		ctx->attrs.rcv_tsecr |= hash & COOKIE_MASK;
347a7471224SKuniyuki Iwashima 	}
348a7471224SKuniyuki Iwashima 
349a7471224SKuniyuki Iwashima 	hash &= ~COOKIE_MASK;
350a7471224SKuniyuki Iwashima 	hash |= mssind << 6;
351a7471224SKuniyuki Iwashima 
352a7471224SKuniyuki Iwashima 	if (ctx->attrs.wscale_ok)
353a7471224SKuniyuki Iwashima 		hash |= ctx->attrs.snd_wscale & BPF_SYNCOOKIE_WSCALE_MASK;
354a7471224SKuniyuki Iwashima 
355a7471224SKuniyuki Iwashima 	if (ctx->attrs.sack_ok)
356a7471224SKuniyuki Iwashima 		hash |= BPF_SYNCOOKIE_SACK;
357a7471224SKuniyuki Iwashima 
358a7471224SKuniyuki Iwashima 	if (ctx->attrs.tstamp_ok && ctx->tcp->ece && ctx->tcp->cwr)
359a7471224SKuniyuki Iwashima 		hash |= BPF_SYNCOOKIE_ECN;
360a7471224SKuniyuki Iwashima 
361a7471224SKuniyuki Iwashima 	ctx->cookie = hash;
362a7471224SKuniyuki Iwashima }
363a7471224SKuniyuki Iwashima 
tcp_write_options(struct tcp_syncookie * ctx)364a7471224SKuniyuki Iwashima static void tcp_write_options(struct tcp_syncookie *ctx)
365a7471224SKuniyuki Iwashima {
366a7471224SKuniyuki Iwashima 	ctx->ptr32 = (__be32 *)(ctx->tcp + 1);
367a7471224SKuniyuki Iwashima 
368a7471224SKuniyuki Iwashima 	*ctx->ptr32++ = bpf_htonl(TCPOPT_MSS << 24 | TCPOLEN_MSS << 16 |
369a7471224SKuniyuki Iwashima 				  ctx->attrs.mss);
370a7471224SKuniyuki Iwashima 
371a7471224SKuniyuki Iwashima 	if (ctx->attrs.wscale_ok)
372a7471224SKuniyuki Iwashima 		*ctx->ptr32++ = bpf_htonl(TCPOPT_NOP << 24 |
373a7471224SKuniyuki Iwashima 					  TCPOPT_WINDOW << 16 |
374a7471224SKuniyuki Iwashima 					  TCPOLEN_WINDOW << 8 |
375a7471224SKuniyuki Iwashima 					  ctx->attrs.snd_wscale);
376a7471224SKuniyuki Iwashima 
377a7471224SKuniyuki Iwashima 	if (ctx->attrs.tstamp_ok) {
378a7471224SKuniyuki Iwashima 		if (ctx->attrs.sack_ok)
379a7471224SKuniyuki Iwashima 			*ctx->ptr32++ = bpf_htonl(TCPOPT_SACK_PERM << 24 |
380a7471224SKuniyuki Iwashima 						  TCPOLEN_SACK_PERM << 16 |
381a7471224SKuniyuki Iwashima 						  TCPOPT_TIMESTAMP << 8 |
382a7471224SKuniyuki Iwashima 						  TCPOLEN_TIMESTAMP);
383a7471224SKuniyuki Iwashima 		else
384a7471224SKuniyuki Iwashima 			*ctx->ptr32++ = bpf_htonl(TCPOPT_NOP << 24 |
385a7471224SKuniyuki Iwashima 						  TCPOPT_NOP << 16 |
386a7471224SKuniyuki Iwashima 						  TCPOPT_TIMESTAMP << 8 |
387a7471224SKuniyuki Iwashima 						  TCPOLEN_TIMESTAMP);
388a7471224SKuniyuki Iwashima 
389a7471224SKuniyuki Iwashima 		*ctx->ptr32++ = bpf_htonl(ctx->attrs.rcv_tsecr);
390a7471224SKuniyuki Iwashima 		*ctx->ptr32++ = bpf_htonl(ctx->attrs.rcv_tsval);
391a7471224SKuniyuki Iwashima 	} else if (ctx->attrs.sack_ok) {
392a7471224SKuniyuki Iwashima 		*ctx->ptr32++ = bpf_htonl(TCPOPT_NOP << 24 |
393a7471224SKuniyuki Iwashima 					  TCPOPT_NOP << 16 |
394a7471224SKuniyuki Iwashima 					  TCPOPT_SACK_PERM << 8 |
395a7471224SKuniyuki Iwashima 					  TCPOLEN_SACK_PERM);
396a7471224SKuniyuki Iwashima 	}
397a7471224SKuniyuki Iwashima }
398a7471224SKuniyuki Iwashima 
tcp_handle_syn(struct tcp_syncookie * ctx)399a7471224SKuniyuki Iwashima static int tcp_handle_syn(struct tcp_syncookie *ctx)
400a7471224SKuniyuki Iwashima {
401a7471224SKuniyuki Iwashima 	s64 csum;
402a7471224SKuniyuki Iwashima 
403a7471224SKuniyuki Iwashima 	if (tcp_validate_header(ctx))
404a7471224SKuniyuki Iwashima 		goto err;
405a7471224SKuniyuki Iwashima 
406a7471224SKuniyuki Iwashima 	tcp_parse_options(ctx);
407a7471224SKuniyuki Iwashima 
408a7471224SKuniyuki Iwashima 	if (tcp_validate_sysctl(ctx))
409a7471224SKuniyuki Iwashima 		goto err;
410a7471224SKuniyuki Iwashima 
411a7471224SKuniyuki Iwashima 	tcp_prepare_cookie(ctx);
412a7471224SKuniyuki Iwashima 	tcp_write_options(ctx);
413a7471224SKuniyuki Iwashima 
414a7471224SKuniyuki Iwashima 	swap(ctx->tcp->source, ctx->tcp->dest);
415a7471224SKuniyuki Iwashima 	ctx->tcp->check = 0;
416a7471224SKuniyuki Iwashima 	ctx->tcp->ack_seq = bpf_htonl(bpf_ntohl(ctx->tcp->seq) + 1);
417a7471224SKuniyuki Iwashima 	ctx->tcp->seq = bpf_htonl(ctx->cookie);
418a7471224SKuniyuki Iwashima 	ctx->tcp->doff = ((long)ctx->ptr32 - (long)ctx->tcp) >> 2;
419a7471224SKuniyuki Iwashima 	ctx->tcp->ack = 1;
420a7471224SKuniyuki Iwashima 	if (!ctx->attrs.tstamp_ok || !ctx->tcp->ece || !ctx->tcp->cwr)
421a7471224SKuniyuki Iwashima 		ctx->tcp->ece = 0;
422a7471224SKuniyuki Iwashima 	ctx->tcp->cwr = 0;
423a7471224SKuniyuki Iwashima 
424a7471224SKuniyuki Iwashima 	csum = bpf_csum_diff(0, 0, (void *)ctx->tcp, ctx->tcp->doff * 4, 0);
425a7471224SKuniyuki Iwashima 	if (csum < 0)
426a7471224SKuniyuki Iwashima 		goto err;
427a7471224SKuniyuki Iwashima 
428a7471224SKuniyuki Iwashima 	if (ctx->ipv4) {
429a7471224SKuniyuki Iwashima 		swap(ctx->ipv4->saddr, ctx->ipv4->daddr);
430a7471224SKuniyuki Iwashima 		ctx->tcp->check = tcp_v4_csum(ctx, csum);
431a7471224SKuniyuki Iwashima 
432a7471224SKuniyuki Iwashima 		ctx->ipv4->check = 0;
433a7471224SKuniyuki Iwashima 		ctx->ipv4->tos = 0;
434a7471224SKuniyuki Iwashima 		ctx->ipv4->tot_len = bpf_htons((long)ctx->ptr32 - (long)ctx->ipv4);
435a7471224SKuniyuki Iwashima 		ctx->ipv4->id = 0;
436a7471224SKuniyuki Iwashima 		ctx->ipv4->ttl = 64;
437a7471224SKuniyuki Iwashima 
438a7471224SKuniyuki Iwashima 		csum = bpf_csum_diff(0, 0, (void *)ctx->ipv4, sizeof(*ctx->ipv4), 0);
439a7471224SKuniyuki Iwashima 		if (csum < 0)
440a7471224SKuniyuki Iwashima 			goto err;
441a7471224SKuniyuki Iwashima 
442a7471224SKuniyuki Iwashima 		ctx->ipv4->check = csum_fold(csum);
443a7471224SKuniyuki Iwashima 	} else if (ctx->ipv6) {
444a7471224SKuniyuki Iwashima 		swap(ctx->ipv6->saddr, ctx->ipv6->daddr);
445a7471224SKuniyuki Iwashima 		ctx->tcp->check = tcp_v6_csum(ctx, csum);
446a7471224SKuniyuki Iwashima 
447a7471224SKuniyuki Iwashima 		*(__be32 *)ctx->ipv6 = bpf_htonl(0x60000000);
448a7471224SKuniyuki Iwashima 		ctx->ipv6->payload_len = bpf_htons((long)ctx->ptr32 - (long)ctx->tcp);
449a7471224SKuniyuki Iwashima 		ctx->ipv6->hop_limit = 64;
450a7471224SKuniyuki Iwashima 	}
451a7471224SKuniyuki Iwashima 
452a7471224SKuniyuki Iwashima 	swap_array(ctx->eth->h_source, ctx->eth->h_dest);
453a7471224SKuniyuki Iwashima 
454a7471224SKuniyuki Iwashima 	if (bpf_skb_change_tail(ctx->skb, (long)ctx->ptr32 - (long)ctx->eth, 0))
455a7471224SKuniyuki Iwashima 		goto err;
456a7471224SKuniyuki Iwashima 
457a7471224SKuniyuki Iwashima 	return bpf_redirect(ctx->skb->ifindex, 0);
458a7471224SKuniyuki Iwashima err:
459a7471224SKuniyuki Iwashima 	return TC_ACT_SHOT;
460a7471224SKuniyuki Iwashima }
461a7471224SKuniyuki Iwashima 
tcp_validate_cookie(struct tcp_syncookie * ctx)462a7471224SKuniyuki Iwashima static int tcp_validate_cookie(struct tcp_syncookie *ctx)
463a7471224SKuniyuki Iwashima {
464a7471224SKuniyuki Iwashima 	u32 cookie = bpf_ntohl(ctx->tcp->ack_seq) - 1;
465a7471224SKuniyuki Iwashima 	u32 seq = bpf_ntohl(ctx->tcp->seq) - 1;
466a7471224SKuniyuki Iwashima 	u64 first = 0, second;
467a7471224SKuniyuki Iwashima 	int mssind;
468a7471224SKuniyuki Iwashima 	u32 hash;
469a7471224SKuniyuki Iwashima 
470a7471224SKuniyuki Iwashima 	if (ctx->ipv4)
471a7471224SKuniyuki Iwashima 		first = (u64)ctx->ipv4->saddr << 32 | ctx->ipv4->daddr;
472a7471224SKuniyuki Iwashima 	else if (ctx->ipv6)
473a7471224SKuniyuki Iwashima 		first = (u64)ctx->ipv6->saddr.in6_u.u6_addr8[0] << 32 |
474a7471224SKuniyuki Iwashima 			ctx->ipv6->daddr.in6_u.u6_addr32[0];
475a7471224SKuniyuki Iwashima 
476a7471224SKuniyuki Iwashima 	second = (u64)seq << 32 | ctx->tcp->source << 16 | ctx->tcp->dest;
477a7471224SKuniyuki Iwashima 	hash = siphash_2u64(first, second, &test_key_siphash);
478a7471224SKuniyuki Iwashima 
479a7471224SKuniyuki Iwashima 	if (ctx->attrs.tstamp_ok)
480a7471224SKuniyuki Iwashima 		hash -= ctx->attrs.rcv_tsecr & COOKIE_MASK;
481a7471224SKuniyuki Iwashima 	else
482a7471224SKuniyuki Iwashima 		hash &= ~COOKIE_MASK;
483a7471224SKuniyuki Iwashima 
484a7471224SKuniyuki Iwashima 	hash -= cookie & ~COOKIE_MASK;
485a7471224SKuniyuki Iwashima 	if (hash)
486a7471224SKuniyuki Iwashima 		goto err;
487a7471224SKuniyuki Iwashima 
488a7471224SKuniyuki Iwashima 	mssind = (cookie & (3 << 6)) >> 6;
489*af8a066fSKuniyuki Iwashima 	if (ctx->ipv4)
490a7471224SKuniyuki Iwashima 		ctx->attrs.mss = msstab4[mssind];
491*af8a066fSKuniyuki Iwashima 	else
492a7471224SKuniyuki Iwashima 		ctx->attrs.mss = msstab6[mssind];
493a7471224SKuniyuki Iwashima 
494a7471224SKuniyuki Iwashima 	ctx->attrs.snd_wscale = cookie & BPF_SYNCOOKIE_WSCALE_MASK;
495a7471224SKuniyuki Iwashima 	ctx->attrs.rcv_wscale = ctx->attrs.snd_wscale;
496a7471224SKuniyuki Iwashima 	ctx->attrs.wscale_ok = ctx->attrs.snd_wscale == BPF_SYNCOOKIE_WSCALE_MASK;
497a7471224SKuniyuki Iwashima 	ctx->attrs.sack_ok = cookie & BPF_SYNCOOKIE_SACK;
498a7471224SKuniyuki Iwashima 	ctx->attrs.ecn_ok = cookie & BPF_SYNCOOKIE_ECN;
499a7471224SKuniyuki Iwashima 
500a7471224SKuniyuki Iwashima 	return 0;
501a7471224SKuniyuki Iwashima err:
502a7471224SKuniyuki Iwashima 	return -1;
503a7471224SKuniyuki Iwashima }
504a7471224SKuniyuki Iwashima 
tcp_handle_ack(struct tcp_syncookie * ctx)505a7471224SKuniyuki Iwashima static int tcp_handle_ack(struct tcp_syncookie *ctx)
506a7471224SKuniyuki Iwashima {
507a7471224SKuniyuki Iwashima 	struct bpf_sock_tuple tuple;
508a7471224SKuniyuki Iwashima 	struct bpf_sock *skc;
509a7471224SKuniyuki Iwashima 	int ret = TC_ACT_OK;
510a7471224SKuniyuki Iwashima 	struct sock *sk;
511a7471224SKuniyuki Iwashima 	u32 tuple_size;
512a7471224SKuniyuki Iwashima 
513a7471224SKuniyuki Iwashima 	if (ctx->ipv4) {
514a7471224SKuniyuki Iwashima 		tuple.ipv4.saddr = ctx->ipv4->saddr;
515a7471224SKuniyuki Iwashima 		tuple.ipv4.daddr = ctx->ipv4->daddr;
516a7471224SKuniyuki Iwashima 		tuple.ipv4.sport = ctx->tcp->source;
517a7471224SKuniyuki Iwashima 		tuple.ipv4.dport = ctx->tcp->dest;
518a7471224SKuniyuki Iwashima 		tuple_size = sizeof(tuple.ipv4);
519a7471224SKuniyuki Iwashima 	} else if (ctx->ipv6) {
520a7471224SKuniyuki Iwashima 		__builtin_memcpy(tuple.ipv6.saddr, &ctx->ipv6->saddr, sizeof(tuple.ipv6.saddr));
521a7471224SKuniyuki Iwashima 		__builtin_memcpy(tuple.ipv6.daddr, &ctx->ipv6->daddr, sizeof(tuple.ipv6.daddr));
522a7471224SKuniyuki Iwashima 		tuple.ipv6.sport = ctx->tcp->source;
523a7471224SKuniyuki Iwashima 		tuple.ipv6.dport = ctx->tcp->dest;
524a7471224SKuniyuki Iwashima 		tuple_size = sizeof(tuple.ipv6);
525a7471224SKuniyuki Iwashima 	} else {
526a7471224SKuniyuki Iwashima 		goto out;
527a7471224SKuniyuki Iwashima 	}
528a7471224SKuniyuki Iwashima 
529a7471224SKuniyuki Iwashima 	skc = bpf_skc_lookup_tcp(ctx->skb, &tuple, tuple_size, -1, 0);
530a7471224SKuniyuki Iwashima 	if (!skc)
531a7471224SKuniyuki Iwashima 		goto out;
532a7471224SKuniyuki Iwashima 
533a7471224SKuniyuki Iwashima 	if (skc->state != TCP_LISTEN)
534a7471224SKuniyuki Iwashima 		goto release;
535a7471224SKuniyuki Iwashima 
536a7471224SKuniyuki Iwashima 	sk = (struct sock *)bpf_skc_to_tcp_sock(skc);
537a7471224SKuniyuki Iwashima 	if (!sk)
538a7471224SKuniyuki Iwashima 		goto err;
539a7471224SKuniyuki Iwashima 
540a7471224SKuniyuki Iwashima 	if (tcp_validate_header(ctx))
541a7471224SKuniyuki Iwashima 		goto err;
542a7471224SKuniyuki Iwashima 
543a7471224SKuniyuki Iwashima 	tcp_parse_options(ctx);
544a7471224SKuniyuki Iwashima 
545a7471224SKuniyuki Iwashima 	if (tcp_validate_cookie(ctx))
546a7471224SKuniyuki Iwashima 		goto err;
547a7471224SKuniyuki Iwashima 
548a7471224SKuniyuki Iwashima 	ret = bpf_sk_assign_tcp_reqsk(ctx->skb, sk, &ctx->attrs, sizeof(ctx->attrs));
549a7471224SKuniyuki Iwashima 	if (ret < 0)
550a7471224SKuniyuki Iwashima 		goto err;
551a7471224SKuniyuki Iwashima 
552a7471224SKuniyuki Iwashima release:
553a7471224SKuniyuki Iwashima 	bpf_sk_release(skc);
554a7471224SKuniyuki Iwashima out:
555a7471224SKuniyuki Iwashima 	return ret;
556a7471224SKuniyuki Iwashima 
557a7471224SKuniyuki Iwashima err:
558a7471224SKuniyuki Iwashima 	ret = TC_ACT_SHOT;
559a7471224SKuniyuki Iwashima 	goto release;
560a7471224SKuniyuki Iwashima }
561a7471224SKuniyuki Iwashima 
562a7471224SKuniyuki Iwashima SEC("tc")
tcp_custom_syncookie(struct __sk_buff * skb)563a7471224SKuniyuki Iwashima int tcp_custom_syncookie(struct __sk_buff *skb)
564a7471224SKuniyuki Iwashima {
565a7471224SKuniyuki Iwashima 	struct tcp_syncookie ctx = {
566a7471224SKuniyuki Iwashima 		.skb = skb,
567a7471224SKuniyuki Iwashima 	};
568a7471224SKuniyuki Iwashima 
569a7471224SKuniyuki Iwashima 	if (tcp_load_headers(&ctx))
570a7471224SKuniyuki Iwashima 		return TC_ACT_OK;
571a7471224SKuniyuki Iwashima 
572a7471224SKuniyuki Iwashima 	if (ctx.tcp->rst)
573a7471224SKuniyuki Iwashima 		return TC_ACT_OK;
574a7471224SKuniyuki Iwashima 
575a7471224SKuniyuki Iwashima 	if (ctx.tcp->syn) {
576a7471224SKuniyuki Iwashima 		if (ctx.tcp->ack)
577a7471224SKuniyuki Iwashima 			return TC_ACT_OK;
578a7471224SKuniyuki Iwashima 
579a7471224SKuniyuki Iwashima 		handled_syn = true;
580a7471224SKuniyuki Iwashima 
581a7471224SKuniyuki Iwashima 		return tcp_handle_syn(&ctx);
582a7471224SKuniyuki Iwashima 	}
583a7471224SKuniyuki Iwashima 
584a7471224SKuniyuki Iwashima 	handled_ack = true;
585a7471224SKuniyuki Iwashima 
586a7471224SKuniyuki Iwashima 	return tcp_handle_ack(&ctx);
587a7471224SKuniyuki Iwashima }
588a7471224SKuniyuki Iwashima 
589a7471224SKuniyuki Iwashima char _license[] SEC("license") = "GPL";
590