xref: /linux/samples/bpf/hbm_kern.h (revision 4201c9260a8d3c4ef238e51692a7e9b4e1e29efe)
1 /* SPDX-License-Identifier: GPL-2.0
2  *
3  * Copyright (c) 2019 Facebook
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of version 2 of the GNU General Public
7  * License as published by the Free Software Foundation.
8  *
9  * Include file for sample Host Bandwidth Manager (HBM) BPF programs
10  */
11 #define KBUILD_MODNAME "foo"
12 #include <stddef.h>
13 #include <stdbool.h>
14 #include <uapi/linux/bpf.h>
15 #include <uapi/linux/if_ether.h>
16 #include <uapi/linux/if_packet.h>
17 #include <uapi/linux/ip.h>
18 #include <uapi/linux/ipv6.h>
19 #include <uapi/linux/in.h>
20 #include <uapi/linux/tcp.h>
21 #include <uapi/linux/filter.h>
22 #include <uapi/linux/pkt_cls.h>
23 #include <net/ipv6.h>
24 #include <net/inet_ecn.h>
25 #include "bpf_endian.h"
26 #include "bpf_helpers.h"
27 #include "hbm.h"
28 
29 #define DROP_PKT	0
30 #define ALLOW_PKT	1
31 #define TCP_ECN_OK	1
32 
33 #ifndef HBM_DEBUG  // Define HBM_DEBUG to enable debugging
34 #undef bpf_printk
35 #define bpf_printk(fmt, ...)
36 #endif
37 
38 #define INITIAL_CREDIT_PACKETS	100
39 #define MAX_BYTES_PER_PACKET	1500
40 #define MARK_THRESH		(40 * MAX_BYTES_PER_PACKET)
41 #define DROP_THRESH		(80 * 5 * MAX_BYTES_PER_PACKET)
42 #define LARGE_PKT_DROP_THRESH	(DROP_THRESH - (15 * MAX_BYTES_PER_PACKET))
43 #define MARK_REGION_SIZE	(LARGE_PKT_DROP_THRESH - MARK_THRESH)
44 #define LARGE_PKT_THRESH	120
45 #define MAX_CREDIT		(100 * MAX_BYTES_PER_PACKET)
46 #define INIT_CREDIT		(INITIAL_CREDIT_PACKETS * MAX_BYTES_PER_PACKET)
47 
48 // rate in bytes per ns << 20
49 #define CREDIT_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20)
50 
51 struct bpf_map_def SEC("maps") queue_state = {
52 	.type = BPF_MAP_TYPE_CGROUP_STORAGE,
53 	.key_size = sizeof(struct bpf_cgroup_storage_key),
54 	.value_size = sizeof(struct hbm_vqueue),
55 };
56 BPF_ANNOTATE_KV_PAIR(queue_state, struct bpf_cgroup_storage_key,
57 		     struct hbm_vqueue);
58 
59 struct bpf_map_def SEC("maps") queue_stats = {
60 	.type = BPF_MAP_TYPE_ARRAY,
61 	.key_size = sizeof(u32),
62 	.value_size = sizeof(struct hbm_queue_stats),
63 	.max_entries = 1,
64 };
65 BPF_ANNOTATE_KV_PAIR(queue_stats, int, struct hbm_queue_stats);
66 
67 struct hbm_pkt_info {
68 	int	cwnd;
69 	int	rtt;
70 	bool	is_ip;
71 	bool	is_tcp;
72 	short	ecn;
73 };
74 
75 static int get_tcp_info(struct __sk_buff *skb, struct hbm_pkt_info *pkti)
76 {
77 	struct bpf_sock *sk;
78 	struct bpf_tcp_sock *tp;
79 
80 	sk = skb->sk;
81 	if (sk) {
82 		sk = bpf_sk_fullsock(sk);
83 		if (sk) {
84 			if (sk->protocol == IPPROTO_TCP) {
85 				tp = bpf_tcp_sock(sk);
86 				if (tp) {
87 					pkti->cwnd = tp->snd_cwnd;
88 					pkti->rtt = tp->srtt_us >> 3;
89 					return 0;
90 				}
91 			}
92 		}
93 	}
94 	return 1;
95 }
96 
97 static __always_inline void hbm_get_pkt_info(struct __sk_buff *skb,
98 					     struct hbm_pkt_info *pkti)
99 {
100 	struct iphdr iph;
101 	struct ipv6hdr *ip6h;
102 
103 	pkti->cwnd = 0;
104 	pkti->rtt = 0;
105 	bpf_skb_load_bytes(skb, 0, &iph, 12);
106 	if (iph.version == 6) {
107 		ip6h = (struct ipv6hdr *)&iph;
108 		pkti->is_ip = true;
109 		pkti->is_tcp = (ip6h->nexthdr == 6);
110 		pkti->ecn = (ip6h->flow_lbl[0] >> 4) & INET_ECN_MASK;
111 	} else if (iph.version == 4) {
112 		pkti->is_ip = true;
113 		pkti->is_tcp = (iph.protocol == 6);
114 		pkti->ecn = iph.tos & INET_ECN_MASK;
115 	} else {
116 		pkti->is_ip = false;
117 		pkti->is_tcp = false;
118 		pkti->ecn = 0;
119 	}
120 	if (pkti->is_tcp)
121 		get_tcp_info(skb, pkti);
122 }
123 
124 static __always_inline void hbm_init_vqueue(struct hbm_vqueue *qdp, int rate)
125 {
126 		bpf_printk("Initializing queue_state, rate:%d\n", rate * 128);
127 		qdp->lasttime = bpf_ktime_get_ns();
128 		qdp->credit = INIT_CREDIT;
129 		qdp->rate = rate * 128;
130 }
131 
132 static __always_inline void hbm_update_stats(struct hbm_queue_stats *qsp,
133 					     int len,
134 					     unsigned long long curtime,
135 					     bool congestion_flag,
136 					     bool drop_flag,
137 					     bool cwr_flag,
138 					     bool ecn_ce_flag,
139 					     struct hbm_pkt_info *pkti,
140 					     int credit)
141 {
142 	int rv = ALLOW_PKT;
143 
144 	if (qsp != NULL) {
145 		// Following is needed for work conserving
146 		__sync_add_and_fetch(&(qsp->bytes_total), len);
147 		if (qsp->stats) {
148 			// Optionally update statistics
149 			if (qsp->firstPacketTime == 0)
150 				qsp->firstPacketTime = curtime;
151 			qsp->lastPacketTime = curtime;
152 			__sync_add_and_fetch(&(qsp->pkts_total), 1);
153 			if (congestion_flag) {
154 				__sync_add_and_fetch(&(qsp->pkts_marked), 1);
155 				__sync_add_and_fetch(&(qsp->bytes_marked), len);
156 			}
157 			if (drop_flag) {
158 				__sync_add_and_fetch(&(qsp->pkts_dropped), 1);
159 				__sync_add_and_fetch(&(qsp->bytes_dropped),
160 						     len);
161 			}
162 			if (ecn_ce_flag)
163 				__sync_add_and_fetch(&(qsp->pkts_ecn_ce), 1);
164 			if (pkti->cwnd) {
165 				__sync_add_and_fetch(&(qsp->sum_cwnd),
166 						     pkti->cwnd);
167 				__sync_add_and_fetch(&(qsp->sum_cwnd_cnt), 1);
168 			}
169 			if (pkti->rtt)
170 				__sync_add_and_fetch(&(qsp->sum_rtt),
171 						     pkti->rtt);
172 			__sync_add_and_fetch(&(qsp->sum_credit), credit);
173 
174 			if (drop_flag)
175 				rv = DROP_PKT;
176 			if (cwr_flag)
177 				rv |= 2;
178 			if (rv == DROP_PKT)
179 				__sync_add_and_fetch(&(qsp->returnValCount[0]),
180 						     1);
181 			else if (rv == ALLOW_PKT)
182 				__sync_add_and_fetch(&(qsp->returnValCount[1]),
183 						     1);
184 			else if (rv == 2)
185 				__sync_add_and_fetch(&(qsp->returnValCount[2]),
186 						     1);
187 			else if (rv == 3)
188 				__sync_add_and_fetch(&(qsp->returnValCount[3]),
189 						     1);
190 		}
191 	}
192 }
193