xref: /linux/tools/testing/selftests/bpf/progs/test_tcp_estats.c (revision 03ab8e6297acd1bc0eedaa050e2a1635c576fd11)
1bd4aed0eSJiong Wang /* Copyright (c) 2017 Facebook
2bd4aed0eSJiong Wang  *
3bd4aed0eSJiong Wang  * This program is free software; you can redistribute it and/or
4bd4aed0eSJiong Wang  * modify it under the terms of version 2 of the GNU General Public
5bd4aed0eSJiong Wang  * License as published by the Free Software Foundation.
6bd4aed0eSJiong Wang  */
7bd4aed0eSJiong Wang 
8bd4aed0eSJiong Wang /* This program shows clang/llvm is able to generate code pattern
9bd4aed0eSJiong Wang  * like:
10bd4aed0eSJiong Wang  *   _tcp_send_active_reset:
11bd4aed0eSJiong Wang  *      0:       bf 16 00 00 00 00 00 00         r6 = r1
12bd4aed0eSJiong Wang  *    ......
13bd4aed0eSJiong Wang  *    335:       b7 01 00 00 0f 00 00 00         r1 = 15
14bd4aed0eSJiong Wang  *    336:       05 00 48 00 00 00 00 00         goto 72
15bd4aed0eSJiong Wang  *
16bd4aed0eSJiong Wang  *   LBB0_3:
17bd4aed0eSJiong Wang  *    337:       b7 01 00 00 01 00 00 00         r1 = 1
18bd4aed0eSJiong Wang  *    338:       63 1a d0 ff 00 00 00 00         *(u32 *)(r10 - 48) = r1
19bd4aed0eSJiong Wang  *    408:       b7 01 00 00 03 00 00 00         r1 = 3
20bd4aed0eSJiong Wang  *
21bd4aed0eSJiong Wang  *   LBB0_4:
22bd4aed0eSJiong Wang  *    409:       71 a2 fe ff 00 00 00 00         r2 = *(u8 *)(r10 - 2)
23bd4aed0eSJiong Wang  *    410:       bf a7 00 00 00 00 00 00         r7 = r10
24bd4aed0eSJiong Wang  *    411:       07 07 00 00 b8 ff ff ff         r7 += -72
25bd4aed0eSJiong Wang  *    412:       bf 73 00 00 00 00 00 00         r3 = r7
26bd4aed0eSJiong Wang  *    413:       0f 13 00 00 00 00 00 00         r3 += r1
27bd4aed0eSJiong Wang  *    414:       73 23 2d 00 00 00 00 00         *(u8 *)(r3 + 45) = r2
28bd4aed0eSJiong Wang  *
29bd4aed0eSJiong Wang  * From the above code snippet, the code generated by the compiler
30bd4aed0eSJiong Wang  * is reasonable. The "r1" is assigned to different values in basic
31bd4aed0eSJiong Wang  * blocks "_tcp_send_active_reset" and "LBB0_3", and used in "LBB0_4".
32bd4aed0eSJiong Wang  * The verifier should be able to handle such code patterns.
33bd4aed0eSJiong Wang  */
34bd4aed0eSJiong Wang #include <string.h>
35bd4aed0eSJiong Wang #include <linux/bpf.h>
36bd4aed0eSJiong Wang #include <linux/ipv6.h>
37bd4aed0eSJiong Wang #include <linux/version.h>
38bd4aed0eSJiong Wang #include <sys/socket.h>
393e689141SToke Høiland-Jørgensen #include <bpf/bpf_helpers.h>
40bd4aed0eSJiong Wang 
4150f9aa44SDaniel Borkmann #define _(P) ({typeof(P) val = 0; bpf_probe_read_kernel(&val, sizeof(val), &P); val;})
42bd4aed0eSJiong Wang #define TCP_ESTATS_MAGIC 0xBAADBEEF
43bd4aed0eSJiong Wang 
44bd4aed0eSJiong Wang /* This test case needs "sock" and "pt_regs" data structure.
45bd4aed0eSJiong Wang  * Recursively, "sock" needs "sock_common" and "inet_sock".
46bd4aed0eSJiong Wang  * However, this is a unit test case only for
47bd4aed0eSJiong Wang  * verifier purpose without bpf program execution.
48bd4aed0eSJiong Wang  * We can safely mock much simpler data structures, basically
49bd4aed0eSJiong Wang  * only taking the necessary fields from kernel headers.
50bd4aed0eSJiong Wang  */
51bd4aed0eSJiong Wang typedef __u32 __bitwise __portpair;
52bd4aed0eSJiong Wang typedef __u64 __bitwise __addrpair;
53bd4aed0eSJiong Wang 
54bd4aed0eSJiong Wang struct sock_common {
55bd4aed0eSJiong Wang 	unsigned short		skc_family;
56bd4aed0eSJiong Wang 	union {
57bd4aed0eSJiong Wang 		__addrpair	skc_addrpair;
58bd4aed0eSJiong Wang 		struct {
59bd4aed0eSJiong Wang 			__be32	skc_daddr;
60bd4aed0eSJiong Wang 			__be32	skc_rcv_saddr;
61bd4aed0eSJiong Wang 		};
62bd4aed0eSJiong Wang 	};
63bd4aed0eSJiong Wang 	union {
64bd4aed0eSJiong Wang 		__portpair	skc_portpair;
65bd4aed0eSJiong Wang 		struct {
66bd4aed0eSJiong Wang 			__be16	skc_dport;
67bd4aed0eSJiong Wang 			__u16	skc_num;
68bd4aed0eSJiong Wang 		};
69bd4aed0eSJiong Wang 	};
70bd4aed0eSJiong Wang 	struct in6_addr		skc_v6_daddr;
71bd4aed0eSJiong Wang 	struct in6_addr		skc_v6_rcv_saddr;
72bd4aed0eSJiong Wang };
73bd4aed0eSJiong Wang 
74bd4aed0eSJiong Wang struct sock {
75bd4aed0eSJiong Wang 	struct sock_common	__sk_common;
76bd4aed0eSJiong Wang #define sk_family		__sk_common.skc_family
77bd4aed0eSJiong Wang #define sk_v6_daddr		__sk_common.skc_v6_daddr
78bd4aed0eSJiong Wang #define sk_v6_rcv_saddr		__sk_common.skc_v6_rcv_saddr
79bd4aed0eSJiong Wang };
80bd4aed0eSJiong Wang 
81bd4aed0eSJiong Wang struct inet_sock {
82bd4aed0eSJiong Wang 	struct sock		sk;
83bd4aed0eSJiong Wang #define inet_daddr		sk.__sk_common.skc_daddr
84bd4aed0eSJiong Wang #define inet_dport		sk.__sk_common.skc_dport
85bd4aed0eSJiong Wang 	__be32			inet_saddr;
86bd4aed0eSJiong Wang 	__be16			inet_sport;
87bd4aed0eSJiong Wang };
88bd4aed0eSJiong Wang 
89bd4aed0eSJiong Wang struct pt_regs {
90bd4aed0eSJiong Wang 	long di;
91bd4aed0eSJiong Wang };
92bd4aed0eSJiong Wang 
inet_sk(const struct sock * sk)93bd4aed0eSJiong Wang static inline struct inet_sock *inet_sk(const struct sock *sk)
94bd4aed0eSJiong Wang {
95bd4aed0eSJiong Wang 	return (struct inet_sock *)sk;
96bd4aed0eSJiong Wang }
97bd4aed0eSJiong Wang 
98bd4aed0eSJiong Wang /* Define various data structures for state recording.
99bd4aed0eSJiong Wang  * Some fields are not used due to test simplification.
100bd4aed0eSJiong Wang  */
101bd4aed0eSJiong Wang enum tcp_estats_addrtype {
102bd4aed0eSJiong Wang 	TCP_ESTATS_ADDRTYPE_IPV4 = 1,
103bd4aed0eSJiong Wang 	TCP_ESTATS_ADDRTYPE_IPV6 = 2
104bd4aed0eSJiong Wang };
105bd4aed0eSJiong Wang 
106bd4aed0eSJiong Wang enum tcp_estats_event_type {
107bd4aed0eSJiong Wang 	TCP_ESTATS_ESTABLISH,
108bd4aed0eSJiong Wang 	TCP_ESTATS_PERIODIC,
109bd4aed0eSJiong Wang 	TCP_ESTATS_TIMEOUT,
110bd4aed0eSJiong Wang 	TCP_ESTATS_RETRANSMIT_TIMEOUT,
111bd4aed0eSJiong Wang 	TCP_ESTATS_RETRANSMIT_OTHER,
112bd4aed0eSJiong Wang 	TCP_ESTATS_SYN_RETRANSMIT,
113bd4aed0eSJiong Wang 	TCP_ESTATS_SYNACK_RETRANSMIT,
114bd4aed0eSJiong Wang 	TCP_ESTATS_TERM,
115bd4aed0eSJiong Wang 	TCP_ESTATS_TX_RESET,
116bd4aed0eSJiong Wang 	TCP_ESTATS_RX_RESET,
117bd4aed0eSJiong Wang 	TCP_ESTATS_WRITE_TIMEOUT,
118bd4aed0eSJiong Wang 	TCP_ESTATS_CONN_TIMEOUT,
119bd4aed0eSJiong Wang 	TCP_ESTATS_ACK_LATENCY,
120bd4aed0eSJiong Wang 	TCP_ESTATS_NEVENTS,
121bd4aed0eSJiong Wang };
122bd4aed0eSJiong Wang 
123bd4aed0eSJiong Wang struct tcp_estats_event {
124bd4aed0eSJiong Wang 	int pid;
125bd4aed0eSJiong Wang 	int cpu;
126bd4aed0eSJiong Wang 	unsigned long ts;
127bd4aed0eSJiong Wang 	unsigned int magic;
128bd4aed0eSJiong Wang 	enum tcp_estats_event_type event_type;
129bd4aed0eSJiong Wang };
130bd4aed0eSJiong Wang 
131bd4aed0eSJiong Wang /* The below data structure is packed in order for
132bd4aed0eSJiong Wang  * llvm compiler to generate expected code.
133bd4aed0eSJiong Wang  */
134bd4aed0eSJiong Wang struct tcp_estats_conn_id {
135bd4aed0eSJiong Wang 	unsigned int localaddressType;
136bd4aed0eSJiong Wang 	struct {
137bd4aed0eSJiong Wang 		unsigned char data[16];
138bd4aed0eSJiong Wang 	} localaddress;
139bd4aed0eSJiong Wang 	struct {
140bd4aed0eSJiong Wang 		unsigned char data[16];
141bd4aed0eSJiong Wang 	} remaddress;
142bd4aed0eSJiong Wang 	unsigned short    localport;
143bd4aed0eSJiong Wang 	unsigned short    remport;
144bd4aed0eSJiong Wang } __attribute__((__packed__));
145bd4aed0eSJiong Wang 
146bd4aed0eSJiong Wang struct tcp_estats_basic_event {
147bd4aed0eSJiong Wang 	struct tcp_estats_event event;
148bd4aed0eSJiong Wang 	struct tcp_estats_conn_id conn_id;
149bd4aed0eSJiong Wang };
150bd4aed0eSJiong Wang 
151df0b7792SAndrii Nakryiko struct {
152bc7430ccSAndrii Nakryiko 	__uint(type, BPF_MAP_TYPE_HASH);
153bc7430ccSAndrii Nakryiko 	__uint(max_entries, 1024);
154bc7430ccSAndrii Nakryiko 	__type(key, __u32);
155bc7430ccSAndrii Nakryiko 	__type(value, struct tcp_estats_basic_event);
156bc7430ccSAndrii Nakryiko } ev_record_map SEC(".maps");
157bd4aed0eSJiong Wang 
158bd4aed0eSJiong Wang struct dummy_tracepoint_args {
159bd4aed0eSJiong Wang 	unsigned long long pad;
160bd4aed0eSJiong Wang 	struct sock *sock;
161bd4aed0eSJiong Wang };
162bd4aed0eSJiong Wang 
tcp_estats_ev_init(struct tcp_estats_event * event,enum tcp_estats_event_type type)163bd4aed0eSJiong Wang static __always_inline void tcp_estats_ev_init(struct tcp_estats_event *event,
164bd4aed0eSJiong Wang 					       enum tcp_estats_event_type type)
165bd4aed0eSJiong Wang {
166bd4aed0eSJiong Wang 	event->magic = TCP_ESTATS_MAGIC;
167bd4aed0eSJiong Wang 	event->ts = bpf_ktime_get_ns();
168bd4aed0eSJiong Wang 	event->event_type = type;
169bd4aed0eSJiong Wang }
170bd4aed0eSJiong Wang 
unaligned_u32_set(unsigned char * to,__u8 * from)171bd4aed0eSJiong Wang static __always_inline void unaligned_u32_set(unsigned char *to, __u8 *from)
172bd4aed0eSJiong Wang {
173bd4aed0eSJiong Wang 	to[0] = _(from[0]);
174bd4aed0eSJiong Wang 	to[1] = _(from[1]);
175bd4aed0eSJiong Wang 	to[2] = _(from[2]);
176bd4aed0eSJiong Wang 	to[3] = _(from[3]);
177bd4aed0eSJiong Wang }
178bd4aed0eSJiong Wang 
conn_id_ipv4_init(struct tcp_estats_conn_id * conn_id,__be32 * saddr,__be32 * daddr)179bd4aed0eSJiong Wang static __always_inline void conn_id_ipv4_init(struct tcp_estats_conn_id *conn_id,
180bd4aed0eSJiong Wang 					      __be32 *saddr, __be32 *daddr)
181bd4aed0eSJiong Wang {
182bd4aed0eSJiong Wang 	conn_id->localaddressType = TCP_ESTATS_ADDRTYPE_IPV4;
183bd4aed0eSJiong Wang 
184bd4aed0eSJiong Wang 	unaligned_u32_set(conn_id->localaddress.data, (__u8 *)saddr);
185bd4aed0eSJiong Wang 	unaligned_u32_set(conn_id->remaddress.data, (__u8 *)daddr);
186bd4aed0eSJiong Wang }
187bd4aed0eSJiong Wang 
conn_id_ipv6_init(struct tcp_estats_conn_id * conn_id,__be32 * saddr,__be32 * daddr)188bd4aed0eSJiong Wang static __always_inline void conn_id_ipv6_init(struct tcp_estats_conn_id *conn_id,
189bd4aed0eSJiong Wang 					      __be32 *saddr, __be32 *daddr)
190bd4aed0eSJiong Wang {
191bd4aed0eSJiong Wang 	conn_id->localaddressType = TCP_ESTATS_ADDRTYPE_IPV6;
192bd4aed0eSJiong Wang 
193bd4aed0eSJiong Wang 	unaligned_u32_set(conn_id->localaddress.data, (__u8 *)saddr);
194bd4aed0eSJiong Wang 	unaligned_u32_set(conn_id->localaddress.data + sizeof(__u32),
195bd4aed0eSJiong Wang 			  (__u8 *)(saddr + 1));
196bd4aed0eSJiong Wang 	unaligned_u32_set(conn_id->localaddress.data + sizeof(__u32) * 2,
197bd4aed0eSJiong Wang 			  (__u8 *)(saddr + 2));
198bd4aed0eSJiong Wang 	unaligned_u32_set(conn_id->localaddress.data + sizeof(__u32) * 3,
199bd4aed0eSJiong Wang 			  (__u8 *)(saddr + 3));
200bd4aed0eSJiong Wang 
201bd4aed0eSJiong Wang 	unaligned_u32_set(conn_id->remaddress.data,
202bd4aed0eSJiong Wang 			  (__u8 *)(daddr));
203bd4aed0eSJiong Wang 	unaligned_u32_set(conn_id->remaddress.data + sizeof(__u32),
204bd4aed0eSJiong Wang 			  (__u8 *)(daddr + 1));
205bd4aed0eSJiong Wang 	unaligned_u32_set(conn_id->remaddress.data + sizeof(__u32) * 2,
206bd4aed0eSJiong Wang 			  (__u8 *)(daddr + 2));
207bd4aed0eSJiong Wang 	unaligned_u32_set(conn_id->remaddress.data + sizeof(__u32) * 3,
208bd4aed0eSJiong Wang 			  (__u8 *)(daddr + 3));
209bd4aed0eSJiong Wang }
210bd4aed0eSJiong Wang 
tcp_estats_conn_id_init(struct tcp_estats_conn_id * conn_id,struct sock * sk)211bd4aed0eSJiong Wang static __always_inline void tcp_estats_conn_id_init(struct tcp_estats_conn_id *conn_id,
212bd4aed0eSJiong Wang 						    struct sock *sk)
213bd4aed0eSJiong Wang {
214bd4aed0eSJiong Wang 	conn_id->localport = _(inet_sk(sk)->inet_sport);
215bd4aed0eSJiong Wang 	conn_id->remport = _(inet_sk(sk)->inet_dport);
216bd4aed0eSJiong Wang 
217bd4aed0eSJiong Wang 	if (_(sk->sk_family) == AF_INET6)
218bd4aed0eSJiong Wang 		conn_id_ipv6_init(conn_id,
219bd4aed0eSJiong Wang 				  sk->sk_v6_rcv_saddr.s6_addr32,
220bd4aed0eSJiong Wang 				  sk->sk_v6_daddr.s6_addr32);
221bd4aed0eSJiong Wang 	else
222bd4aed0eSJiong Wang 		conn_id_ipv4_init(conn_id,
223bd4aed0eSJiong Wang 				  &inet_sk(sk)->inet_saddr,
224bd4aed0eSJiong Wang 				  &inet_sk(sk)->inet_daddr);
225bd4aed0eSJiong Wang }
226bd4aed0eSJiong Wang 
tcp_estats_init(struct sock * sk,struct tcp_estats_event * event,struct tcp_estats_conn_id * conn_id,enum tcp_estats_event_type type)227bd4aed0eSJiong Wang static __always_inline void tcp_estats_init(struct sock *sk,
228bd4aed0eSJiong Wang 					    struct tcp_estats_event *event,
229bd4aed0eSJiong Wang 					    struct tcp_estats_conn_id *conn_id,
230bd4aed0eSJiong Wang 					    enum tcp_estats_event_type type)
231bd4aed0eSJiong Wang {
232bd4aed0eSJiong Wang 	tcp_estats_ev_init(event, type);
233bd4aed0eSJiong Wang 	tcp_estats_conn_id_init(conn_id, sk);
234bd4aed0eSJiong Wang }
235bd4aed0eSJiong Wang 
send_basic_event(struct sock * sk,enum tcp_estats_event_type type)236bd4aed0eSJiong Wang static __always_inline void send_basic_event(struct sock *sk,
237bd4aed0eSJiong Wang 					     enum tcp_estats_event_type type)
238bd4aed0eSJiong Wang {
239bd4aed0eSJiong Wang 	struct tcp_estats_basic_event ev;
240bd4aed0eSJiong Wang 	__u32 key = bpf_get_prandom_u32();
241bd4aed0eSJiong Wang 
242bd4aed0eSJiong Wang 	memset(&ev, 0, sizeof(ev));
243bd4aed0eSJiong Wang 	tcp_estats_init(sk, &ev.event, &ev.conn_id, type);
244bd4aed0eSJiong Wang 	bpf_map_update_elem(&ev_record_map, &key, &ev, BPF_ANY);
245bd4aed0eSJiong Wang }
246bd4aed0eSJiong Wang 
247*3d1d6239SAndrii Nakryiko SEC("tp/dummy/tracepoint")
_dummy_tracepoint(struct dummy_tracepoint_args * arg)248bd4aed0eSJiong Wang int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
249bd4aed0eSJiong Wang {
250bd4aed0eSJiong Wang 	if (!arg->sock)
251bd4aed0eSJiong Wang 		return 0;
252bd4aed0eSJiong Wang 
253bd4aed0eSJiong Wang 	send_basic_event(arg->sock, TCP_ESTATS_TX_RESET);
254bd4aed0eSJiong Wang 	return 0;
255bd4aed0eSJiong Wang }
256bd4aed0eSJiong Wang 
257bd4aed0eSJiong Wang char _license[] SEC("license") = "GPL";
258