1 // SPDX-License-Identifier: GPL-2.0 2 #include <string.h> 3 #include <linux/tcp.h> 4 #include <linux/bpf.h> 5 #include <netinet/in.h> 6 #include <bpf/bpf_helpers.h> 7 8 char _license[] SEC("license") = "GPL"; 9 10 int page_size = 0; /* userspace should set it */ 11 12 #ifndef SOL_TCP 13 #define SOL_TCP IPPROTO_TCP 14 #endif 15 16 #define SOL_CUSTOM 0xdeadbeef 17 18 struct sockopt_sk { 19 __u8 val; 20 }; 21 22 struct { 23 __uint(type, BPF_MAP_TYPE_SK_STORAGE); 24 __uint(map_flags, BPF_F_NO_PREALLOC); 25 __type(key, int); 26 __type(value, struct sockopt_sk); 27 } socket_storage_map SEC(".maps"); 28 29 SEC("cgroup/getsockopt") 30 int _getsockopt(struct bpf_sockopt *ctx) 31 { 32 __u8 *optval_end = ctx->optval_end; 33 __u8 *optval = ctx->optval; 34 struct sockopt_sk *storage; 35 struct bpf_sock *sk; 36 37 /* Bypass AF_NETLINK. */ 38 sk = ctx->sk; 39 if (sk && sk->family == AF_NETLINK) 40 goto out; 41 42 /* Make sure bpf_get_netns_cookie is callable. 43 */ 44 if (bpf_get_netns_cookie(NULL) == 0) 45 return 0; 46 47 if (bpf_get_netns_cookie(ctx) == 0) 48 return 0; 49 50 if (ctx->level == SOL_IP && ctx->optname == IP_TOS) { 51 /* Not interested in SOL_IP:IP_TOS; 52 * let next BPF program in the cgroup chain or kernel 53 * handle it. 54 */ 55 goto out; 56 } 57 58 if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) { 59 /* Not interested in SOL_SOCKET:SO_SNDBUF; 60 * let next BPF program in the cgroup chain or kernel 61 * handle it. 62 */ 63 goto out; 64 } 65 66 if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) { 67 /* Not interested in SOL_TCP:TCP_CONGESTION; 68 * let next BPF program in the cgroup chain or kernel 69 * handle it. 70 */ 71 goto out; 72 } 73 74 if (ctx->level == SOL_TCP && ctx->optname == TCP_ZEROCOPY_RECEIVE) { 75 /* Verify that TCP_ZEROCOPY_RECEIVE triggers. 76 * It has a custom implementation for performance 77 * reasons. 78 */ 79 80 /* Check that optval contains address (__u64) */ 81 if (optval + sizeof(__u64) > optval_end) 82 return 0; /* bounds check */ 83 84 if (((struct tcp_zerocopy_receive *)optval)->address != 0) 85 return 0; /* unexpected data */ 86 87 goto out; 88 } 89 90 if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) { 91 if (optval + 1 > optval_end) 92 return 0; /* bounds check */ 93 94 ctx->retval = 0; /* Reset system call return value to zero */ 95 96 /* Always export 0x55 */ 97 optval[0] = 0x55; 98 ctx->optlen = 1; 99 100 /* Userspace buffer is PAGE_SIZE * 2, but BPF 101 * program can only see the first PAGE_SIZE 102 * bytes of data. 103 */ 104 if (optval_end - optval != page_size) 105 return 0; /* unexpected data size */ 106 107 return 1; 108 } 109 110 if (ctx->level != SOL_CUSTOM) 111 return 0; /* deny everything except custom level */ 112 113 if (optval + 1 > optval_end) 114 return 0; /* bounds check */ 115 116 storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0, 117 BPF_SK_STORAGE_GET_F_CREATE); 118 if (!storage) 119 return 0; /* couldn't get sk storage */ 120 121 if (!ctx->retval) 122 return 0; /* kernel should not have handled 123 * SOL_CUSTOM, something is wrong! 124 */ 125 ctx->retval = 0; /* Reset system call return value to zero */ 126 127 optval[0] = storage->val; 128 ctx->optlen = 1; 129 130 return 1; 131 132 out: 133 /* optval larger than PAGE_SIZE use kernel's buffer. */ 134 if (ctx->optlen > page_size) 135 ctx->optlen = 0; 136 return 1; 137 } 138 139 SEC("cgroup/setsockopt") 140 int _setsockopt(struct bpf_sockopt *ctx) 141 { 142 __u8 *optval_end = ctx->optval_end; 143 __u8 *optval = ctx->optval; 144 struct sockopt_sk *storage; 145 struct bpf_sock *sk; 146 147 /* Bypass AF_NETLINK. */ 148 sk = ctx->sk; 149 if (sk && sk->family == AF_NETLINK) 150 goto out; 151 152 if (sk && sk->family == AF_INET && sk->type == SOCK_RAW) { 153 struct bpf_tcp_sock *tp = bpf_tcp_sock(sk); 154 155 if (tp) { 156 char saved_syn[60]; 157 158 bpf_getsockopt(sk, SOL_TCP, TCP_SAVED_SYN, 159 &saved_syn, sizeof(saved_syn)); 160 goto consumed; 161 } 162 163 goto out; 164 } 165 166 /* Make sure bpf_get_netns_cookie is callable. 167 */ 168 if (bpf_get_netns_cookie(NULL) == 0) 169 return 0; 170 171 if (bpf_get_netns_cookie(ctx) == 0) 172 return 0; 173 174 if (ctx->level == SOL_IP && ctx->optname == IP_TOS) { 175 /* Not interested in SOL_IP:IP_TOS; 176 * let next BPF program in the cgroup chain or kernel 177 * handle it. 178 */ 179 ctx->optlen = 0; /* bypass optval>PAGE_SIZE */ 180 return 1; 181 } 182 183 if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) { 184 /* Overwrite SO_SNDBUF value */ 185 186 if (optval + sizeof(__u32) > optval_end) 187 return 0; /* bounds check */ 188 189 *(__u32 *)optval = 0x55AA; 190 ctx->optlen = 4; 191 192 return 1; 193 } 194 195 if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) { 196 /* Always use cubic */ 197 198 if (optval + 5 > optval_end) 199 return 0; /* bounds check */ 200 201 memcpy(optval, "cubic", 5); 202 ctx->optlen = 5; 203 204 return 1; 205 } 206 207 if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) { 208 /* Original optlen is larger than PAGE_SIZE. */ 209 if (ctx->optlen != page_size * 2) 210 return 0; /* unexpected data size */ 211 212 if (optval + 1 > optval_end) 213 return 0; /* bounds check */ 214 215 /* Make sure we can trim the buffer. */ 216 optval[0] = 0; 217 ctx->optlen = 1; 218 219 /* Usepace buffer is PAGE_SIZE * 2, but BPF 220 * program can only see the first PAGE_SIZE 221 * bytes of data. 222 */ 223 if (optval_end - optval != page_size) 224 return 0; /* unexpected data size */ 225 226 return 1; 227 } 228 229 if (ctx->level != SOL_CUSTOM) 230 return 0; /* deny everything except custom level */ 231 232 if (optval + 1 > optval_end) 233 return 0; /* bounds check */ 234 235 storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0, 236 BPF_SK_STORAGE_GET_F_CREATE); 237 if (!storage) 238 return 0; /* couldn't get sk storage */ 239 240 storage->val = optval[0]; 241 242 consumed: 243 ctx->optlen = -1; /* BPF has consumed this option, don't call kernel 244 * setsockopt handler. 245 */ 246 247 return 1; 248 249 out: 250 /* optval larger than PAGE_SIZE use kernel's buffer. */ 251 if (ctx->optlen > page_size) 252 ctx->optlen = 0; 253 return 1; 254 } 255