1 /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ 2 /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ 3 #ifndef __USDT_BPF_H__ 4 #define __USDT_BPF_H__ 5 6 #include <linux/errno.h> 7 #include "bpf_helpers.h" 8 #include "bpf_tracing.h" 9 10 /* Below types and maps are internal implementation details of libbpf's USDT 11 * support and are subjects to change. Also, bpf_usdt_xxx() API helpers should 12 * be considered an unstable API as well and might be adjusted based on user 13 * feedback from using libbpf's USDT support in production. 14 */ 15 16 /* User can override BPF_USDT_MAX_SPEC_CNT to change default size of internal 17 * map that keeps track of USDT argument specifications. This might be 18 * necessary if there are a lot of USDT attachments. 19 */ 20 #ifndef BPF_USDT_MAX_SPEC_CNT 21 #define BPF_USDT_MAX_SPEC_CNT 256 22 #endif 23 /* User can override BPF_USDT_MAX_IP_CNT to change default size of internal 24 * map that keeps track of IP (memory address) mapping to USDT argument 25 * specification. 26 * Note, if kernel supports BPF cookies, this map is not used and could be 27 * resized all the way to 1 to save a bit of memory. 28 */ 29 #ifndef BPF_USDT_MAX_IP_CNT 30 #define BPF_USDT_MAX_IP_CNT (4 * BPF_USDT_MAX_SPEC_CNT) 31 #endif 32 33 enum __bpf_usdt_arg_type { 34 BPF_USDT_ARG_CONST, 35 BPF_USDT_ARG_REG, 36 BPF_USDT_ARG_REG_DEREF, 37 BPF_USDT_ARG_SIB, 38 }; 39 40 /* 41 * This struct layout is designed specifically to be backwards/forward 42 * compatible between libbpf versions for ARG_CONST, ARG_REG, and 43 * ARG_REG_DEREF modes. ARG_SIB requires libbpf v1.7+. 44 */ 45 struct __bpf_usdt_arg_spec { 46 /* u64 scalar interpreted depending on arg_type, see below */ 47 __u64 val_off; 48 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 49 /* arg location case, see bpf_usdt_arg() for details */ 50 enum __bpf_usdt_arg_type arg_type: 8; 51 /* index register offset within struct pt_regs */ 52 __u16 idx_reg_off: 12; 53 /* scale factor for index register (1, 2, 4, or 8) */ 54 __u16 scale_bitshift: 4; 55 /* reserved for future use, keeps reg_off offset stable */ 56 __u8 __reserved: 8; 57 #else 58 __u8 __reserved: 8; 59 __u16 idx_reg_off: 12; 60 __u16 scale_bitshift: 4; 61 enum __bpf_usdt_arg_type arg_type: 8; 62 #endif 63 /* offset of referenced register within struct pt_regs */ 64 short reg_off; 65 /* whether arg should be interpreted as signed value */ 66 bool arg_signed; 67 /* number of bits that need to be cleared and, optionally, 68 * sign-extended to cast arguments that are 1, 2, or 4 bytes 69 * long into final 8-byte u64/s64 value returned to user 70 */ 71 char arg_bitshift; 72 }; 73 74 /* should match USDT_MAX_ARG_CNT in usdt.c exactly */ 75 #define BPF_USDT_MAX_ARG_CNT 12 76 struct __bpf_usdt_spec { 77 struct __bpf_usdt_arg_spec args[BPF_USDT_MAX_ARG_CNT]; 78 __u64 usdt_cookie; 79 short arg_cnt; 80 }; 81 82 struct { 83 __uint(type, BPF_MAP_TYPE_ARRAY); 84 __uint(max_entries, BPF_USDT_MAX_SPEC_CNT); 85 __type(key, int); 86 __type(value, struct __bpf_usdt_spec); 87 } __bpf_usdt_specs SEC(".maps") __weak; 88 89 struct { 90 __uint(type, BPF_MAP_TYPE_HASH); 91 __uint(max_entries, BPF_USDT_MAX_IP_CNT); 92 __type(key, long); 93 __type(value, __u32); 94 } __bpf_usdt_ip_to_spec_id SEC(".maps") __weak; 95 96 extern const _Bool LINUX_HAS_BPF_COOKIE __kconfig; 97 98 static __always_inline 99 int __bpf_usdt_spec_id(struct pt_regs *ctx) 100 { 101 if (!LINUX_HAS_BPF_COOKIE) { 102 long ip = PT_REGS_IP(ctx); 103 int *spec_id_ptr; 104 105 spec_id_ptr = bpf_map_lookup_elem(&__bpf_usdt_ip_to_spec_id, &ip); 106 return spec_id_ptr ? *spec_id_ptr : -ESRCH; 107 } 108 109 return bpf_get_attach_cookie(ctx); 110 } 111 112 /* Return number of USDT arguments defined for currently traced USDT. */ 113 __weak __hidden 114 int bpf_usdt_arg_cnt(struct pt_regs *ctx) 115 { 116 struct __bpf_usdt_spec *spec; 117 int spec_id; 118 119 spec_id = __bpf_usdt_spec_id(ctx); 120 if (spec_id < 0) 121 return -ESRCH; 122 123 spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id); 124 if (!spec) 125 return -ESRCH; 126 127 return spec->arg_cnt; 128 } 129 130 /* Returns the size in bytes of the #*arg_num* (zero-indexed) USDT argument. 131 * Returns negative error if argument is not found or arg_num is invalid. 132 */ 133 static __always_inline 134 int bpf_usdt_arg_size(struct pt_regs *ctx, __u64 arg_num) 135 { 136 struct __bpf_usdt_arg_spec *arg_spec; 137 struct __bpf_usdt_spec *spec; 138 int spec_id; 139 140 spec_id = __bpf_usdt_spec_id(ctx); 141 if (spec_id < 0) 142 return -ESRCH; 143 144 spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id); 145 if (!spec) 146 return -ESRCH; 147 148 if (arg_num >= BPF_USDT_MAX_ARG_CNT) 149 return -ENOENT; 150 barrier_var(arg_num); 151 if (arg_num >= spec->arg_cnt) 152 return -ENOENT; 153 154 arg_spec = &spec->args[arg_num]; 155 156 /* arg_spec->arg_bitshift = 64 - arg_sz * 8 157 * so: arg_sz = (64 - arg_spec->arg_bitshift) / 8 158 */ 159 return (unsigned int)(64 - arg_spec->arg_bitshift) / 8; 160 } 161 162 /* Fetch USDT argument #*arg_num* (zero-indexed) and put its value into *res. 163 * Returns 0 on success; negative error, otherwise. 164 * On error *res is guaranteed to be set to zero. 165 */ 166 __weak __hidden 167 int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res) 168 { 169 struct __bpf_usdt_spec *spec; 170 struct __bpf_usdt_arg_spec *arg_spec; 171 unsigned long val, idx; 172 int err, spec_id; 173 174 *res = 0; 175 176 spec_id = __bpf_usdt_spec_id(ctx); 177 if (spec_id < 0) 178 return -ESRCH; 179 180 spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id); 181 if (!spec) 182 return -ESRCH; 183 184 if (arg_num >= BPF_USDT_MAX_ARG_CNT) 185 return -ENOENT; 186 barrier_var(arg_num); 187 if (arg_num >= spec->arg_cnt) 188 return -ENOENT; 189 190 arg_spec = &spec->args[arg_num]; 191 switch (arg_spec->arg_type) { 192 case BPF_USDT_ARG_CONST: 193 /* Arg is just a constant ("-4@$-9" in USDT arg spec). 194 * value is recorded in arg_spec->val_off directly. 195 */ 196 val = arg_spec->val_off; 197 break; 198 case BPF_USDT_ARG_REG: 199 /* Arg is in a register (e.g, "8@%rax" in USDT arg spec), 200 * so we read the contents of that register directly from 201 * struct pt_regs. To keep things simple user-space parts 202 * record offsetof(struct pt_regs, <regname>) in arg_spec->reg_off. 203 */ 204 err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off); 205 if (err) 206 return err; 207 break; 208 case BPF_USDT_ARG_REG_DEREF: 209 /* Arg is in memory addressed by register, plus some offset 210 * (e.g., "-4@-1204(%rbp)" in USDT arg spec). Register is 211 * identified like with BPF_USDT_ARG_REG case, and the offset 212 * is in arg_spec->val_off. We first fetch register contents 213 * from pt_regs, then do another user-space probe read to 214 * fetch argument value itself. 215 */ 216 err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off); 217 if (err) 218 return err; 219 err = bpf_probe_read_user(&val, sizeof(val), (void *)val + arg_spec->val_off); 220 if (err) 221 return err; 222 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 223 val >>= arg_spec->arg_bitshift; 224 #endif 225 break; 226 case BPF_USDT_ARG_SIB: 227 /* Arg is in memory addressed by SIB (Scale-Index-Base) mode 228 * (e.g., "-1@-96(%rbp,%rax,8)" in USDT arg spec). We first 229 * fetch the base register contents and the index register 230 * contents from pt_regs. Then we calculate the final address 231 * as base + (index * scale) + offset, and do a user-space 232 * probe read to fetch the argument value. 233 */ 234 err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off); 235 if (err) 236 return err; 237 err = bpf_probe_read_kernel(&idx, sizeof(idx), (void *)ctx + arg_spec->idx_reg_off); 238 if (err) 239 return err; 240 err = bpf_probe_read_user(&val, sizeof(val), (void *)(val + (idx << arg_spec->scale_bitshift) + arg_spec->val_off)); 241 if (err) 242 return err; 243 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 244 val >>= arg_spec->arg_bitshift; 245 #endif 246 break; 247 default: 248 return -EINVAL; 249 } 250 251 /* cast arg from 1, 2, or 4 bytes to final 8 byte size clearing 252 * necessary upper arg_bitshift bits, with sign extension if argument 253 * is signed 254 */ 255 val <<= arg_spec->arg_bitshift; 256 if (arg_spec->arg_signed) 257 val = ((long)val) >> arg_spec->arg_bitshift; 258 else 259 val = val >> arg_spec->arg_bitshift; 260 *res = val; 261 return 0; 262 } 263 264 /* Retrieve user-specified cookie value provided during attach as 265 * bpf_usdt_opts.usdt_cookie. This serves the same purpose as BPF cookie 266 * returned by bpf_get_attach_cookie(). Libbpf's support for USDT is itself 267 * utilizing BPF cookies internally, so user can't use BPF cookie directly 268 * for USDT programs and has to use bpf_usdt_cookie() API instead. 269 */ 270 __weak __hidden 271 long bpf_usdt_cookie(struct pt_regs *ctx) 272 { 273 struct __bpf_usdt_spec *spec; 274 int spec_id; 275 276 spec_id = __bpf_usdt_spec_id(ctx); 277 if (spec_id < 0) 278 return 0; 279 280 spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id); 281 if (!spec) 282 return 0; 283 284 return spec->usdt_cookie; 285 } 286 287 /* we rely on ___bpf_apply() and ___bpf_narg() macros already defined in bpf_tracing.h */ 288 #define ___bpf_usdt_args0() ctx 289 #define ___bpf_usdt_args1(x) ___bpf_usdt_args0(), ({ long _x; bpf_usdt_arg(ctx, 0, &_x); _x; }) 290 #define ___bpf_usdt_args2(x, args...) ___bpf_usdt_args1(args), ({ long _x; bpf_usdt_arg(ctx, 1, &_x); _x; }) 291 #define ___bpf_usdt_args3(x, args...) ___bpf_usdt_args2(args), ({ long _x; bpf_usdt_arg(ctx, 2, &_x); _x; }) 292 #define ___bpf_usdt_args4(x, args...) ___bpf_usdt_args3(args), ({ long _x; bpf_usdt_arg(ctx, 3, &_x); _x; }) 293 #define ___bpf_usdt_args5(x, args...) ___bpf_usdt_args4(args), ({ long _x; bpf_usdt_arg(ctx, 4, &_x); _x; }) 294 #define ___bpf_usdt_args6(x, args...) ___bpf_usdt_args5(args), ({ long _x; bpf_usdt_arg(ctx, 5, &_x); _x; }) 295 #define ___bpf_usdt_args7(x, args...) ___bpf_usdt_args6(args), ({ long _x; bpf_usdt_arg(ctx, 6, &_x); _x; }) 296 #define ___bpf_usdt_args8(x, args...) ___bpf_usdt_args7(args), ({ long _x; bpf_usdt_arg(ctx, 7, &_x); _x; }) 297 #define ___bpf_usdt_args9(x, args...) ___bpf_usdt_args8(args), ({ long _x; bpf_usdt_arg(ctx, 8, &_x); _x; }) 298 #define ___bpf_usdt_args10(x, args...) ___bpf_usdt_args9(args), ({ long _x; bpf_usdt_arg(ctx, 9, &_x); _x; }) 299 #define ___bpf_usdt_args11(x, args...) ___bpf_usdt_args10(args), ({ long _x; bpf_usdt_arg(ctx, 10, &_x); _x; }) 300 #define ___bpf_usdt_args12(x, args...) ___bpf_usdt_args11(args), ({ long _x; bpf_usdt_arg(ctx, 11, &_x); _x; }) 301 #define ___bpf_usdt_args(args...) ___bpf_apply(___bpf_usdt_args, ___bpf_narg(args))(args) 302 303 /* 304 * BPF_USDT serves the same purpose for USDT handlers as BPF_PROG for 305 * tp_btf/fentry/fexit BPF programs and BPF_KPROBE for kprobes. 306 * Original struct pt_regs * context is preserved as 'ctx' argument. 307 */ 308 #define BPF_USDT(name, args...) \ 309 name(struct pt_regs *ctx); \ 310 static __always_inline typeof(name(0)) \ 311 ____##name(struct pt_regs *ctx, ##args); \ 312 typeof(name(0)) name(struct pt_regs *ctx) \ 313 { \ 314 _Pragma("GCC diagnostic push") \ 315 _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ 316 return ____##name(___bpf_usdt_args(args)); \ 317 _Pragma("GCC diagnostic pop") \ 318 } \ 319 static __always_inline typeof(name(0)) \ 320 ____##name(struct pt_regs *ctx, ##args) 321 322 #endif /* __USDT_BPF_H__ */ 323