1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3 * Copyright (c) 2016 Facebook 4 * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io 5 */ 6 #include <uapi/linux/btf.h> 7 #include <linux/bpf-cgroup.h> 8 #include <linux/kernel.h> 9 #include <linux/types.h> 10 #include <linux/slab.h> 11 #include <linux/bpf.h> 12 #include <linux/btf.h> 13 #include <linux/bpf_verifier.h> 14 #include <linux/filter.h> 15 #include <net/netlink.h> 16 #include <linux/file.h> 17 #include <linux/vmalloc.h> 18 #include <linux/stringify.h> 19 #include <linux/bsearch.h> 20 #include <linux/sort.h> 21 #include <linux/perf_event.h> 22 #include <linux/ctype.h> 23 #include <linux/error-injection.h> 24 #include <linux/bpf_lsm.h> 25 #include <linux/btf_ids.h> 26 #include <linux/poison.h> 27 #include <linux/module.h> 28 #include <linux/cpumask.h> 29 #include <linux/cnum.h> 30 #include <linux/bpf_mem_alloc.h> 31 #include <net/xdp.h> 32 #include <linux/trace_events.h> 33 #include <linux/kallsyms.h> 34 35 #include "disasm.h" 36 37 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = { 38 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ 39 [_id] = & _name ## _verifier_ops, 40 #define BPF_MAP_TYPE(_id, _ops) 41 #define BPF_LINK_TYPE(_id, _name) 42 #include <linux/bpf_types.h> 43 #undef BPF_PROG_TYPE 44 #undef BPF_MAP_TYPE 45 #undef BPF_LINK_TYPE 46 }; 47 48 enum bpf_features { 49 BPF_FEAT_RDONLY_CAST_TO_VOID = 0, 50 BPF_FEAT_STREAMS = 1, 51 __MAX_BPF_FEAT, 52 }; 53 54 struct bpf_mem_alloc bpf_global_percpu_ma; 55 static bool bpf_global_percpu_ma_set; 56 57 /* bpf_check() is a static code analyzer that walks eBPF program 58 * instruction by instruction and updates register/stack state. 59 * All paths of conditional branches are analyzed until 'bpf_exit' insn. 60 * 61 * The first pass is depth-first-search to check that the program is a DAG. 62 * It rejects the following programs: 63 * - larger than BPF_MAXINSNS insns 64 * - if loop is present (detected via back-edge) 65 * - unreachable insns exist (shouldn't be a forest. program = one function) 66 * - out of bounds or malformed jumps 67 * The second pass is all possible path descent from the 1st insn. 68 * Since it's analyzing all paths through the program, the length of the 69 * analysis is limited to 64k insn, which may be hit even if total number of 70 * insn is less then 4K, but there are too many branches that change stack/regs. 71 * Number of 'branches to be analyzed' is limited to 1k 72 * 73 * On entry to each instruction, each register has a type, and the instruction 74 * changes the types of the registers depending on instruction semantics. 75 * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is 76 * copied to R1. 77 * 78 * All registers are 64-bit. 79 * R0 - return register 80 * R1-R5 argument passing registers 81 * R6-R9 callee saved registers 82 * R10 - frame pointer read-only 83 * 84 * At the start of BPF program the register R1 contains a pointer to bpf_context 85 * and has type PTR_TO_CTX. 86 * 87 * Verifier tracks arithmetic operations on pointers in case: 88 * BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), 89 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20), 90 * 1st insn copies R10 (which has FRAME_PTR) type into R1 91 * and 2nd arithmetic instruction is pattern matched to recognize 92 * that it wants to construct a pointer to some element within stack. 93 * So after 2nd insn, the register R1 has type PTR_TO_STACK 94 * (and -20 constant is saved for further stack bounds checking). 95 * Meaning that this reg is a pointer to stack plus known immediate constant. 96 * 97 * Most of the time the registers have SCALAR_VALUE type, which 98 * means the register has some value, but it's not a valid pointer. 99 * (like pointer plus pointer becomes SCALAR_VALUE type) 100 * 101 * When verifier sees load or store instructions the type of base register 102 * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are 103 * four pointer types recognized by check_mem_access() function. 104 * 105 * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value' 106 * and the range of [ptr, ptr + map's value_size) is accessible. 107 * 108 * registers used to pass values to function calls are checked against 109 * function argument constraints. 110 * 111 * ARG_PTR_TO_MAP_KEY is one of such argument constraints. 112 * It means that the register type passed to this function must be 113 * PTR_TO_STACK and it will be used inside the function as 114 * 'pointer to map element key' 115 * 116 * For example the argument constraints for bpf_map_lookup_elem(): 117 * .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, 118 * .arg1_type = ARG_CONST_MAP_PTR, 119 * .arg2_type = ARG_PTR_TO_MAP_KEY, 120 * 121 * ret_type says that this function returns 'pointer to map elem value or null' 122 * function expects 1st argument to be a const pointer to 'struct bpf_map' and 123 * 2nd argument should be a pointer to stack, which will be used inside 124 * the helper function as a pointer to map element key. 125 * 126 * On the kernel side the helper function looks like: 127 * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) 128 * { 129 * struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; 130 * void *key = (void *) (unsigned long) r2; 131 * void *value; 132 * 133 * here kernel can access 'key' and 'map' pointers safely, knowing that 134 * [key, key + map->key_size) bytes are valid and were initialized on 135 * the stack of eBPF program. 136 * } 137 * 138 * Corresponding eBPF program may look like: 139 * BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // after this insn R2 type is FRAME_PTR 140 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK 141 * BPF_LD_MAP_FD(BPF_REG_1, map_fd), // after this insn R1 type is CONST_PTR_TO_MAP 142 * BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), 143 * here verifier looks at prototype of map_lookup_elem() and sees: 144 * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok, 145 * Now verifier knows that this map has key of R1->map_ptr->key_size bytes 146 * 147 * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far, 148 * Now verifier checks that [R2, R2 + map's key_size) are within stack limits 149 * and were initialized prior to this call. 150 * If it's ok, then verifier allows this BPF_CALL insn and looks at 151 * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets 152 * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function 153 * returns either pointer to map value or NULL. 154 * 155 * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off' 156 * insn, the register holding that pointer in the true branch changes state to 157 * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false 158 * branch. See check_cond_jmp_op(). 159 * 160 * After the call R0 is set to return type of the function and registers R1-R5 161 * are set to NOT_INIT to indicate that they are no longer readable. 162 * 163 * The following reference types represent a potential reference to a kernel 164 * resource which, after first being allocated, must be checked and freed by 165 * the BPF program: 166 * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET 167 * 168 * When the verifier sees a helper call return a reference type, it allocates a 169 * pointer id for the reference and stores it in the current function state. 170 * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into 171 * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type 172 * passes through a NULL-check conditional. For the branch wherein the state is 173 * changed to CONST_IMM, the verifier releases the reference. 174 * 175 * For each helper function that allocates a reference, such as 176 * bpf_sk_lookup_tcp(), there is a corresponding release function, such as 177 * bpf_sk_release(). When a reference type passes into the release function, 178 * the verifier also releases the reference. If any unchecked or unreleased 179 * reference remains at the end of the program, the verifier rejects it. 180 */ 181 182 /* verifier_state + insn_idx are pushed to stack when branch is encountered */ 183 struct bpf_verifier_stack_elem { 184 /* verifier state is 'st' 185 * before processing instruction 'insn_idx' 186 * and after processing instruction 'prev_insn_idx' 187 */ 188 struct bpf_verifier_state st; 189 int insn_idx; 190 int prev_insn_idx; 191 struct bpf_verifier_stack_elem *next; 192 /* length of verifier log at the time this state was pushed on stack */ 193 u32 log_pos; 194 }; 195 196 #define BPF_COMPLEXITY_LIMIT_JMP_SEQ 8192 197 #define BPF_COMPLEXITY_LIMIT_STATES 64 198 199 #define BPF_GLOBAL_PERCPU_MA_MAX_SIZE 512 200 201 #define BPF_PRIV_STACK_MIN_SIZE 64 202 203 static int acquire_reference(struct bpf_verifier_env *env, int insn_idx, int parent_id); 204 static int release_reference_nomark(struct bpf_verifier_state *state, int id); 205 static int release_reference(struct bpf_verifier_env *env, int id); 206 static void invalidate_non_owning_refs(struct bpf_verifier_env *env); 207 static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env); 208 static int ref_set_non_owning(struct bpf_verifier_env *env, 209 struct bpf_reg_state *reg); 210 static bool is_trusted_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg); 211 static inline bool in_sleepable_context(struct bpf_verifier_env *env); 212 static const char *non_sleepable_context_description(struct bpf_verifier_env *env); 213 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg); 214 static void scalar_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg); 215 216 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux, 217 struct bpf_map *map, 218 bool unpriv, bool poison) 219 { 220 unpriv |= bpf_map_ptr_unpriv(aux); 221 aux->map_ptr_state.unpriv = unpriv; 222 aux->map_ptr_state.poison = poison; 223 aux->map_ptr_state.map_ptr = map; 224 } 225 226 static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state) 227 { 228 bool poisoned = bpf_map_key_poisoned(aux); 229 230 aux->map_key_state = state | BPF_MAP_KEY_SEEN | 231 (poisoned ? BPF_MAP_KEY_POISON : 0ULL); 232 } 233 234 static void update_ref_obj(struct ref_obj_desc *ref_obj, struct bpf_reg_state *reg) 235 { 236 ref_obj->id = reg->id; 237 ref_obj->parent_id = reg->parent_id; 238 ref_obj->cnt++; 239 } 240 241 static int validate_ref_obj(struct bpf_verifier_env *env, struct ref_obj_desc *ref_obj) 242 { 243 if (ref_obj->cnt > 1) { 244 verifier_bug(env, "function expects only one referenced object but got %d\n", 245 ref_obj->cnt); 246 return -EFAULT; 247 } 248 249 return 0; 250 } 251 252 struct bpf_call_arg_meta { 253 struct bpf_map_desc map; 254 struct bpf_dynptr_desc dynptr; 255 struct ref_obj_desc ref_obj; 256 bool raw_mode; 257 bool pkt_access; 258 u8 release_regno; 259 int regno; 260 int access_size; 261 int mem_size; 262 u64 msize_max_value; 263 int func_id; 264 struct btf *btf; 265 u32 btf_id; 266 struct btf *ret_btf; 267 u32 ret_btf_id; 268 u32 subprogno; 269 struct btf_field *kptr_field; 270 s64 const_map_key; 271 }; 272 273 struct bpf_kfunc_meta { 274 struct btf *btf; 275 const struct btf_type *proto; 276 const char *name; 277 const u32 *flags; 278 s32 id; 279 }; 280 281 struct btf *btf_vmlinux; 282 283 typedef struct argno { 284 int argno; 285 } argno_t; 286 287 static argno_t argno_from_reg(u32 regno) 288 { 289 return (argno_t){ .argno = regno }; 290 } 291 292 static argno_t argno_from_arg(u32 arg) 293 { 294 return (argno_t){ .argno = -arg }; 295 } 296 297 static int reg_from_argno(argno_t a) 298 { 299 if (a.argno >= 0) 300 return a.argno; 301 if (a.argno >= -MAX_BPF_FUNC_REG_ARGS) 302 return -a.argno; 303 return -1; 304 } 305 306 static int arg_from_argno(argno_t a) 307 { 308 if (a.argno < 0) 309 return -a.argno; 310 return -1; 311 } 312 313 static int arg_idx_from_argno(argno_t a) 314 { 315 return arg_from_argno(a) - 1; 316 } 317 318 static const char *btf_type_name(const struct btf *btf, u32 id) 319 { 320 return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off); 321 } 322 323 static DEFINE_MUTEX(bpf_verifier_lock); 324 static DEFINE_MUTEX(bpf_percpu_ma_lock); 325 326 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...) 327 { 328 struct bpf_verifier_env *env = private_data; 329 va_list args; 330 331 if (!bpf_verifier_log_needed(&env->log)) 332 return; 333 334 va_start(args, fmt); 335 bpf_verifier_vlog(&env->log, fmt, args); 336 va_end(args); 337 } 338 339 static void verbose_invalid_scalar(struct bpf_verifier_env *env, 340 struct bpf_reg_state *reg, 341 struct bpf_retval_range range, const char *ctx, 342 const char *reg_name) 343 { 344 bool unknown = true; 345 346 verbose(env, "%s the register %s has", ctx, reg_name); 347 if (reg_smin(reg) > S64_MIN) { 348 verbose(env, " smin=%lld", reg_smin(reg)); 349 unknown = false; 350 } 351 if (reg_smax(reg) < S64_MAX) { 352 verbose(env, " smax=%lld", reg_smax(reg)); 353 unknown = false; 354 } 355 if (unknown) 356 verbose(env, " unknown scalar value"); 357 verbose(env, " should have been in [%d, %d]\n", range.minval, range.maxval); 358 } 359 360 static bool reg_not_null(struct bpf_verifier_env *env, const struct bpf_reg_state *reg) 361 { 362 enum bpf_reg_type type; 363 364 type = reg->type; 365 if (type_may_be_null(type)) 366 return false; 367 368 type = base_type(type); 369 return type == PTR_TO_SOCKET || 370 type == PTR_TO_TCP_SOCK || 371 type == PTR_TO_MAP_VALUE || 372 type == PTR_TO_MAP_KEY || 373 type == PTR_TO_SOCK_COMMON || 374 (type == PTR_TO_BTF_ID && is_trusted_reg(env, reg)) || 375 (type == PTR_TO_MEM && !(reg->type & PTR_UNTRUSTED)) || 376 type == CONST_PTR_TO_MAP; 377 } 378 379 static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg) 380 { 381 struct btf_record *rec = NULL; 382 struct btf_struct_meta *meta; 383 384 if (reg->type == PTR_TO_MAP_VALUE) { 385 rec = reg->map_ptr->record; 386 } else if (type_is_ptr_alloc_obj(reg->type)) { 387 meta = btf_find_struct_meta(reg->btf, reg->btf_id); 388 if (meta) 389 rec = meta->record; 390 } 391 return rec; 392 } 393 394 bool bpf_subprog_is_global(const struct bpf_verifier_env *env, int subprog) 395 { 396 struct bpf_func_info_aux *aux = env->prog->aux->func_info_aux; 397 398 return aux && aux[subprog].linkage == BTF_FUNC_GLOBAL; 399 } 400 401 static bool subprog_returns_void(struct bpf_verifier_env *env, int subprog) 402 { 403 const struct btf_type *type, *func, *func_proto; 404 const struct btf *btf = env->prog->aux->btf; 405 u32 btf_id; 406 407 btf_id = env->prog->aux->func_info[subprog].type_id; 408 409 func = btf_type_by_id(btf, btf_id); 410 if (verifier_bug_if(!func, env, "btf_id %u not found", btf_id)) 411 return false; 412 413 func_proto = btf_type_by_id(btf, func->type); 414 if (!func_proto) 415 return false; 416 417 type = btf_type_skip_modifiers(btf, func_proto->type, NULL); 418 if (!type) 419 return false; 420 421 return btf_type_is_void(type); 422 } 423 424 static const char *subprog_name(const struct bpf_verifier_env *env, int subprog) 425 { 426 struct bpf_func_info *info; 427 428 if (!env->prog->aux->func_info) 429 return ""; 430 431 info = &env->prog->aux->func_info[subprog]; 432 return btf_type_name(env->prog->aux->btf, info->type_id); 433 } 434 435 void bpf_mark_subprog_exc_cb(struct bpf_verifier_env *env, int subprog) 436 { 437 struct bpf_subprog_info *info = subprog_info(env, subprog); 438 439 info->is_cb = true; 440 info->is_async_cb = true; 441 info->is_exception_cb = true; 442 } 443 444 static bool subprog_is_exc_cb(struct bpf_verifier_env *env, int subprog) 445 { 446 return subprog_info(env, subprog)->is_exception_cb; 447 } 448 449 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) 450 { 451 return btf_record_has_field(reg_btf_record(reg), BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK); 452 } 453 454 static bool type_is_rdonly_mem(u32 type) 455 { 456 return type & MEM_RDONLY; 457 } 458 459 static bool is_acquire_function(enum bpf_func_id func_id, 460 const struct bpf_map *map) 461 { 462 enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC; 463 464 if (func_id == BPF_FUNC_sk_lookup_tcp || 465 func_id == BPF_FUNC_sk_lookup_udp || 466 func_id == BPF_FUNC_skc_lookup_tcp || 467 func_id == BPF_FUNC_ringbuf_reserve || 468 func_id == BPF_FUNC_kptr_xchg) 469 return true; 470 471 if (func_id == BPF_FUNC_map_lookup_elem && 472 (map_type == BPF_MAP_TYPE_SOCKMAP || 473 map_type == BPF_MAP_TYPE_SOCKHASH)) 474 return true; 475 476 return false; 477 } 478 479 static bool is_ptr_cast_function(enum bpf_func_id func_id) 480 { 481 return func_id == BPF_FUNC_tcp_sock || 482 func_id == BPF_FUNC_sk_fullsock || 483 func_id == BPF_FUNC_skc_to_tcp_sock || 484 func_id == BPF_FUNC_skc_to_tcp6_sock || 485 func_id == BPF_FUNC_skc_to_udp6_sock || 486 func_id == BPF_FUNC_skc_to_mptcp_sock || 487 func_id == BPF_FUNC_skc_to_tcp_timewait_sock || 488 func_id == BPF_FUNC_skc_to_tcp_request_sock; 489 } 490 491 static bool is_sync_callback_calling_kfunc(u32 btf_id); 492 static bool is_async_callback_calling_kfunc(u32 btf_id); 493 static bool is_callback_calling_kfunc(u32 btf_id); 494 495 static bool is_bpf_wq_set_callback_kfunc(u32 btf_id); 496 static bool is_task_work_add_kfunc(u32 func_id); 497 498 static bool is_sync_callback_calling_function(enum bpf_func_id func_id) 499 { 500 return func_id == BPF_FUNC_for_each_map_elem || 501 func_id == BPF_FUNC_find_vma || 502 func_id == BPF_FUNC_loop || 503 func_id == BPF_FUNC_user_ringbuf_drain; 504 } 505 506 static bool is_async_callback_calling_function(enum bpf_func_id func_id) 507 { 508 return func_id == BPF_FUNC_timer_set_callback; 509 } 510 511 static bool is_callback_calling_function(enum bpf_func_id func_id) 512 { 513 return is_sync_callback_calling_function(func_id) || 514 is_async_callback_calling_function(func_id); 515 } 516 517 bool bpf_is_sync_callback_calling_insn(struct bpf_insn *insn) 518 { 519 return (bpf_helper_call(insn) && is_sync_callback_calling_function(insn->imm)) || 520 (bpf_pseudo_kfunc_call(insn) && is_sync_callback_calling_kfunc(insn->imm)); 521 } 522 523 bool bpf_is_async_callback_calling_insn(struct bpf_insn *insn) 524 { 525 return (bpf_helper_call(insn) && is_async_callback_calling_function(insn->imm)) || 526 (bpf_pseudo_kfunc_call(insn) && is_async_callback_calling_kfunc(insn->imm)); 527 } 528 529 static bool is_async_cb_sleepable(struct bpf_verifier_env *env, struct bpf_insn *insn) 530 { 531 /* bpf_timer callbacks are never sleepable. */ 532 if (bpf_helper_call(insn) && insn->imm == BPF_FUNC_timer_set_callback) 533 return false; 534 535 /* bpf_wq and bpf_task_work callbacks are always sleepable. */ 536 if (bpf_pseudo_kfunc_call(insn) && insn->off == 0 && 537 (is_bpf_wq_set_callback_kfunc(insn->imm) || is_task_work_add_kfunc(insn->imm))) 538 return true; 539 540 verifier_bug(env, "unhandled async callback in is_async_cb_sleepable"); 541 return false; 542 } 543 544 bool bpf_is_may_goto_insn(struct bpf_insn *insn) 545 { 546 return insn->code == (BPF_JMP | BPF_JCOND) && insn->src_reg == BPF_MAY_GOTO; 547 } 548 549 static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots) 550 { 551 int allocated_slots = state->allocated_stack / BPF_REG_SIZE; 552 553 /* We need to check that slots between [spi - nr_slots + 1, spi] are 554 * within [0, allocated_stack). 555 * 556 * Please note that the spi grows downwards. For example, a dynptr 557 * takes the size of two stack slots; the first slot will be at 558 * spi and the second slot will be at spi - 1. 559 */ 560 return spi - nr_slots + 1 >= 0 && spi < allocated_slots; 561 } 562 563 static int stack_slot_obj_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg, 564 const char *obj_kind, int nr_slots) 565 { 566 int off, spi; 567 568 if (!tnum_is_const(reg->var_off)) { 569 verbose(env, "%s has to be at a constant offset\n", obj_kind); 570 return -EINVAL; 571 } 572 573 off = reg->var_off.value; 574 if (off % BPF_REG_SIZE) { 575 verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off); 576 return -EINVAL; 577 } 578 579 spi = bpf_get_spi(off); 580 if (spi + 1 < nr_slots) { 581 verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off); 582 return -EINVAL; 583 } 584 585 if (!is_spi_bounds_valid(bpf_func(env, reg), spi, nr_slots)) 586 return -ERANGE; 587 return spi; 588 } 589 590 static int dynptr_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg) 591 { 592 return stack_slot_obj_get_spi(env, reg, "dynptr", BPF_DYNPTR_NR_SLOTS); 593 } 594 595 static int iter_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int nr_slots) 596 { 597 return stack_slot_obj_get_spi(env, reg, "iter", nr_slots); 598 } 599 600 static int irq_flag_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg) 601 { 602 return stack_slot_obj_get_spi(env, reg, "irq_flag", 1); 603 } 604 605 static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type) 606 { 607 switch (arg_type & DYNPTR_TYPE_FLAG_MASK) { 608 case DYNPTR_TYPE_LOCAL: 609 return BPF_DYNPTR_TYPE_LOCAL; 610 case DYNPTR_TYPE_RINGBUF: 611 return BPF_DYNPTR_TYPE_RINGBUF; 612 case DYNPTR_TYPE_SKB: 613 return BPF_DYNPTR_TYPE_SKB; 614 case DYNPTR_TYPE_XDP: 615 return BPF_DYNPTR_TYPE_XDP; 616 case DYNPTR_TYPE_SKB_META: 617 return BPF_DYNPTR_TYPE_SKB_META; 618 case DYNPTR_TYPE_FILE: 619 return BPF_DYNPTR_TYPE_FILE; 620 default: 621 return BPF_DYNPTR_TYPE_INVALID; 622 } 623 } 624 625 static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type) 626 { 627 switch (type) { 628 case BPF_DYNPTR_TYPE_LOCAL: 629 return DYNPTR_TYPE_LOCAL; 630 case BPF_DYNPTR_TYPE_RINGBUF: 631 return DYNPTR_TYPE_RINGBUF; 632 case BPF_DYNPTR_TYPE_SKB: 633 return DYNPTR_TYPE_SKB; 634 case BPF_DYNPTR_TYPE_XDP: 635 return DYNPTR_TYPE_XDP; 636 case BPF_DYNPTR_TYPE_SKB_META: 637 return DYNPTR_TYPE_SKB_META; 638 case BPF_DYNPTR_TYPE_FILE: 639 return DYNPTR_TYPE_FILE; 640 default: 641 return 0; 642 } 643 } 644 645 static bool dynptr_type_referenced(enum bpf_dynptr_type type) 646 { 647 return type == BPF_DYNPTR_TYPE_RINGBUF || type == BPF_DYNPTR_TYPE_FILE; 648 } 649 650 static void __mark_dynptr_reg(struct bpf_reg_state *reg, 651 enum bpf_dynptr_type type, 652 bool first_slot, int id, int parent_id); 653 654 655 static void mark_dynptr_stack_regs(struct bpf_verifier_env *env, 656 struct bpf_reg_state *sreg1, 657 struct bpf_reg_state *sreg2, 658 enum bpf_dynptr_type type, int parent_id) 659 { 660 int id = ++env->id_gen; 661 662 __mark_dynptr_reg(sreg1, type, true, id, parent_id); 663 __mark_dynptr_reg(sreg2, type, false, id, parent_id); 664 } 665 666 static void mark_dynptr_cb_reg(struct bpf_verifier_env *env, 667 struct bpf_reg_state *reg, 668 enum bpf_dynptr_type type) 669 { 670 __mark_dynptr_reg(reg, type, true, ++env->id_gen, 0); 671 } 672 673 static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env, 674 struct bpf_func_state *state, int spi); 675 676 static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg, 677 enum bpf_arg_type arg_type, int insn_idx, 678 struct ref_obj_desc *ref_obj, struct bpf_dynptr_desc *dynptr) 679 { 680 struct bpf_func_state *state = bpf_func(env, reg); 681 int spi, i, err, parent_id = 0; 682 enum bpf_dynptr_type type; 683 684 spi = dynptr_get_spi(env, reg); 685 if (spi < 0) 686 return spi; 687 688 /* We cannot assume both spi and spi - 1 belong to the same dynptr, 689 * hence we need to call destroy_if_dynptr_stack_slot twice for both, 690 * to ensure that for the following example: 691 * [d1][d1][d2][d2] 692 * spi 3 2 1 0 693 * So marking spi = 2 should lead to destruction of both d1 and d2. In 694 * case they do belong to same dynptr, second call won't see slot_type 695 * as STACK_DYNPTR and will simply skip destruction. 696 */ 697 err = destroy_if_dynptr_stack_slot(env, state, spi); 698 if (err) 699 return err; 700 err = destroy_if_dynptr_stack_slot(env, state, spi - 1); 701 if (err) 702 return err; 703 704 for (i = 0; i < BPF_REG_SIZE; i++) { 705 state->stack[spi].slot_type[i] = STACK_DYNPTR; 706 state->stack[spi - 1].slot_type[i] = STACK_DYNPTR; 707 } 708 709 type = arg_to_dynptr_type(arg_type); 710 if (type == BPF_DYNPTR_TYPE_INVALID) 711 return -EINVAL; 712 713 if (dynptr->type == BPF_DYNPTR_TYPE_INVALID) { /* dynptr constructors */ 714 err = validate_ref_obj(env, ref_obj); 715 if (err) 716 return err; 717 718 /* Track parent's id if the parent is a referenced object */ 719 parent_id = ref_obj->id; 720 721 if (dynptr_type_referenced(type)) { 722 int id; 723 724 /* 725 * Create an intermediate reference that tracks the referenced 726 * object for the referenced dynptr. Freeing a referenced dynptr 727 * through helpers/kfuncs will invalidate all clones. 728 */ 729 id = acquire_reference(env, insn_idx, parent_id); 730 if (id < 0) 731 return id; 732 733 parent_id = id; 734 } 735 } else { /* bpf_dynptr_clone() */ 736 parent_id = dynptr->parent_id; 737 } 738 739 mark_dynptr_stack_regs(env, &state->stack[spi].spilled_ptr, 740 &state->stack[spi - 1].spilled_ptr, type, parent_id); 741 742 return 0; 743 } 744 745 static void invalidate_dynptr(struct bpf_verifier_env *env, struct bpf_stack_state *stack) 746 { 747 int i; 748 749 for (i = 0; i < BPF_REG_SIZE; i++) { 750 stack[0].slot_type[i] = STACK_INVALID; 751 stack[1].slot_type[i] = STACK_INVALID; 752 } 753 754 bpf_mark_reg_not_init(env, &stack[0].spilled_ptr); 755 bpf_mark_reg_not_init(env, &stack[1].spilled_ptr); 756 } 757 758 static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg) 759 { 760 struct bpf_func_state *state = bpf_func(env, reg); 761 int spi; 762 763 spi = dynptr_get_spi(env, reg); 764 if (spi < 0) 765 return spi; 766 767 /* 768 * For referenced dynptr, release the parent ref which cascades to 769 * all clones and derived slices. For non-referenced dynptr, only 770 * the dynptr and slices derived from it will be invalidated. 771 */ 772 reg = &state->stack[spi].spilled_ptr; 773 return release_reference(env, dynptr_type_referenced(reg->dynptr.type) 774 ? reg->parent_id 775 : reg->id); 776 } 777 778 static void __mark_reg_unknown(const struct bpf_verifier_env *env, 779 struct bpf_reg_state *reg); 780 781 static void mark_reg_invalid(const struct bpf_verifier_env *env, struct bpf_reg_state *reg) 782 { 783 if (!env->allow_ptr_leaks) 784 bpf_mark_reg_not_init(env, reg); 785 else 786 __mark_reg_unknown(env, reg); 787 } 788 789 static int dynptr_ref_cnt(struct bpf_verifier_env *env, int v_parent_id) 790 { 791 struct bpf_stack_state *stack; 792 struct bpf_func_state *state; 793 struct bpf_reg_state *reg; 794 int ref_cnt = 0; 795 796 bpf_for_each_reg_in_vstate_mask(env->cur_state, state, reg, stack, 1 << STACK_DYNPTR, ({ 797 if (!stack || stack->slot_type[0] != STACK_DYNPTR) 798 continue; 799 if (!stack->spilled_ptr.dynptr.first_slot) 800 continue; 801 if (stack->spilled_ptr.parent_id == v_parent_id) 802 ref_cnt++; 803 })); 804 805 return ref_cnt; 806 } 807 808 static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env, 809 struct bpf_func_state *state, int spi) 810 { 811 int err = 0; 812 813 /* We always ensure that STACK_DYNPTR is never set partially, 814 * hence just checking for slot_type[0] is enough. This is 815 * different for STACK_SPILL, where it may be only set for 816 * 1 byte, so code has to use is_spilled_reg. 817 */ 818 if (state->stack[spi].slot_type[0] != STACK_DYNPTR) 819 return 0; 820 821 /* Reposition spi to first slot */ 822 if (!state->stack[spi].spilled_ptr.dynptr.first_slot) 823 spi = spi + 1; 824 825 /* 826 * A referenced dynptr can be overwritten only if there is at 827 * least one other dynptr sharing the same virtual ref parent, 828 * ensuring the reference can still be properly released. 829 */ 830 if (dynptr_type_referenced(state->stack[spi].spilled_ptr.dynptr.type) && 831 dynptr_ref_cnt(env, state->stack[spi].spilled_ptr.parent_id) <= 1) { 832 verbose(env, "cannot overwrite referenced dynptr\n"); 833 return -EINVAL; 834 } 835 836 /* Invalidate the dynptr and any derived slices */ 837 err = release_reference(env, state->stack[spi].spilled_ptr.id); 838 if (!err) { 839 mark_stack_slot_scratched(env, spi); 840 mark_stack_slot_scratched(env, spi - 1); 841 } 842 843 return err; 844 } 845 846 static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg) 847 { 848 int spi; 849 850 if (reg->type == CONST_PTR_TO_DYNPTR) 851 return false; 852 853 spi = dynptr_get_spi(env, reg); 854 855 /* -ERANGE (i.e. spi not falling into allocated stack slots) isn't an 856 * error because this just means the stack state hasn't been updated yet. 857 * We will do check_mem_access to check and update stack bounds later. 858 */ 859 if (spi < 0 && spi != -ERANGE) 860 return false; 861 862 /* We don't need to check if the stack slots are marked by previous 863 * dynptr initializations because we allow overwriting existing unreferenced 864 * STACK_DYNPTR slots, see mark_stack_slots_dynptr which calls 865 * destroy_if_dynptr_stack_slot to ensure dynptr objects at the slots we are 866 * touching are completely destructed before we reinitialize them for a new 867 * one. For referenced ones, destroy_if_dynptr_stack_slot returns an error early 868 * instead of delaying it until the end where the user will get "Unreleased 869 * reference" error. 870 */ 871 return true; 872 } 873 874 static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg) 875 { 876 struct bpf_func_state *state = bpf_func(env, reg); 877 int i, spi; 878 879 /* This already represents first slot of initialized bpf_dynptr. 880 * 881 * CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to 882 * check_func_arg_reg_off's logic, so we don't need to check its 883 * offset and alignment. 884 */ 885 if (reg->type == CONST_PTR_TO_DYNPTR) 886 return true; 887 888 spi = dynptr_get_spi(env, reg); 889 if (spi < 0) 890 return false; 891 if (!state->stack[spi].spilled_ptr.dynptr.first_slot) 892 return false; 893 894 for (i = 0; i < BPF_REG_SIZE; i++) { 895 if (state->stack[spi].slot_type[i] != STACK_DYNPTR || 896 state->stack[spi - 1].slot_type[i] != STACK_DYNPTR) 897 return false; 898 } 899 900 return true; 901 } 902 903 static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg_state *reg, 904 enum bpf_arg_type arg_type) 905 { 906 struct bpf_func_state *state = bpf_func(env, reg); 907 enum bpf_dynptr_type dynptr_type; 908 int spi; 909 910 /* ARG_PTR_TO_DYNPTR takes any type of dynptr */ 911 if (arg_type == ARG_PTR_TO_DYNPTR) 912 return true; 913 914 dynptr_type = arg_to_dynptr_type(arg_type); 915 if (reg->type == CONST_PTR_TO_DYNPTR) { 916 return reg->dynptr.type == dynptr_type; 917 } else { 918 spi = dynptr_get_spi(env, reg); 919 if (spi < 0) 920 return false; 921 return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type; 922 } 923 } 924 925 static void __mark_reg_known_zero(struct bpf_reg_state *reg); 926 927 static bool in_rcu_cs(struct bpf_verifier_env *env); 928 929 static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta); 930 931 static int mark_stack_slots_iter(struct bpf_verifier_env *env, 932 struct bpf_kfunc_call_arg_meta *meta, 933 struct bpf_reg_state *reg, int insn_idx, 934 struct btf *btf, u32 btf_id, int nr_slots) 935 { 936 struct bpf_func_state *state = bpf_func(env, reg); 937 int spi, i, j, id; 938 939 spi = iter_get_spi(env, reg, nr_slots); 940 if (spi < 0) 941 return spi; 942 943 id = acquire_reference(env, insn_idx, 0); 944 if (id < 0) 945 return id; 946 947 for (i = 0; i < nr_slots; i++) { 948 struct bpf_stack_state *slot = &state->stack[spi - i]; 949 struct bpf_reg_state *st = &slot->spilled_ptr; 950 951 __mark_reg_known_zero(st); 952 st->type = PTR_TO_STACK; /* we don't have dedicated reg type */ 953 if (is_kfunc_rcu_protected(meta)) { 954 if (in_rcu_cs(env)) 955 st->type |= MEM_RCU; 956 else 957 st->type |= PTR_UNTRUSTED; 958 } 959 st->id = i == 0 ? id : 0; 960 st->iter.btf = btf; 961 st->iter.btf_id = btf_id; 962 st->iter.state = BPF_ITER_STATE_ACTIVE; 963 st->iter.depth = 0; 964 965 for (j = 0; j < BPF_REG_SIZE; j++) 966 slot->slot_type[j] = STACK_ITER; 967 968 mark_stack_slot_scratched(env, spi - i); 969 } 970 971 return 0; 972 } 973 974 static int unmark_stack_slots_iter(struct bpf_verifier_env *env, 975 struct bpf_reg_state *reg, int nr_slots) 976 { 977 struct bpf_func_state *state = bpf_func(env, reg); 978 int spi, i, j; 979 980 spi = iter_get_spi(env, reg, nr_slots); 981 if (spi < 0) 982 return spi; 983 984 for (i = 0; i < nr_slots; i++) { 985 struct bpf_stack_state *slot = &state->stack[spi - i]; 986 struct bpf_reg_state *st = &slot->spilled_ptr; 987 988 if (i == 0) 989 WARN_ON_ONCE(release_reference(env, st->id)); 990 991 bpf_mark_reg_not_init(env, st); 992 993 for (j = 0; j < BPF_REG_SIZE; j++) 994 slot->slot_type[j] = STACK_INVALID; 995 996 mark_stack_slot_scratched(env, spi - i); 997 } 998 999 return 0; 1000 } 1001 1002 static bool is_iter_reg_valid_uninit(struct bpf_verifier_env *env, 1003 struct bpf_reg_state *reg, int nr_slots) 1004 { 1005 struct bpf_func_state *state = bpf_func(env, reg); 1006 int spi, i, j; 1007 1008 /* For -ERANGE (i.e. spi not falling into allocated stack slots), we 1009 * will do check_mem_access to check and update stack bounds later, so 1010 * return true for that case. 1011 */ 1012 spi = iter_get_spi(env, reg, nr_slots); 1013 if (spi == -ERANGE) 1014 return true; 1015 if (spi < 0) 1016 return false; 1017 1018 for (i = 0; i < nr_slots; i++) { 1019 struct bpf_stack_state *slot = &state->stack[spi - i]; 1020 1021 for (j = 0; j < BPF_REG_SIZE; j++) 1022 if (slot->slot_type[j] == STACK_ITER) 1023 return false; 1024 } 1025 1026 return true; 1027 } 1028 1029 static int is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg, 1030 struct btf *btf, u32 btf_id, int nr_slots) 1031 { 1032 struct bpf_func_state *state = bpf_func(env, reg); 1033 int spi, i, j; 1034 1035 spi = iter_get_spi(env, reg, nr_slots); 1036 if (spi < 0) 1037 return -EINVAL; 1038 1039 for (i = 0; i < nr_slots; i++) { 1040 struct bpf_stack_state *slot = &state->stack[spi - i]; 1041 struct bpf_reg_state *st = &slot->spilled_ptr; 1042 1043 if (st->type & PTR_UNTRUSTED) 1044 return -EPROTO; 1045 /* only main (first) slot has id set */ 1046 if (i == 0 && !st->id) 1047 return -EINVAL; 1048 if (i != 0 && st->id) 1049 return -EINVAL; 1050 if (st->iter.btf != btf || st->iter.btf_id != btf_id) 1051 return -EINVAL; 1052 1053 for (j = 0; j < BPF_REG_SIZE; j++) 1054 if (slot->slot_type[j] != STACK_ITER) 1055 return -EINVAL; 1056 } 1057 1058 return 0; 1059 } 1060 1061 static int acquire_irq_state(struct bpf_verifier_env *env, int insn_idx); 1062 static int release_irq_state(struct bpf_verifier_state *state, int id); 1063 1064 static int mark_stack_slot_irq_flag(struct bpf_verifier_env *env, 1065 struct bpf_kfunc_call_arg_meta *meta, 1066 struct bpf_reg_state *reg, int insn_idx, 1067 int kfunc_class) 1068 { 1069 struct bpf_func_state *state = bpf_func(env, reg); 1070 struct bpf_stack_state *slot; 1071 struct bpf_reg_state *st; 1072 int spi, i, id; 1073 1074 spi = irq_flag_get_spi(env, reg); 1075 if (spi < 0) 1076 return spi; 1077 1078 id = acquire_irq_state(env, insn_idx); 1079 if (id < 0) 1080 return id; 1081 1082 slot = &state->stack[spi]; 1083 st = &slot->spilled_ptr; 1084 1085 __mark_reg_known_zero(st); 1086 st->type = PTR_TO_STACK; /* we don't have dedicated reg type */ 1087 st->id = id; 1088 st->irq.kfunc_class = kfunc_class; 1089 1090 for (i = 0; i < BPF_REG_SIZE; i++) 1091 slot->slot_type[i] = STACK_IRQ_FLAG; 1092 1093 mark_stack_slot_scratched(env, spi); 1094 return 0; 1095 } 1096 1097 static int unmark_stack_slot_irq_flag(struct bpf_verifier_env *env, struct bpf_reg_state *reg, 1098 int kfunc_class) 1099 { 1100 struct bpf_func_state *state = bpf_func(env, reg); 1101 struct bpf_stack_state *slot; 1102 struct bpf_reg_state *st; 1103 int spi, i, err; 1104 1105 spi = irq_flag_get_spi(env, reg); 1106 if (spi < 0) 1107 return spi; 1108 1109 slot = &state->stack[spi]; 1110 st = &slot->spilled_ptr; 1111 1112 if (st->irq.kfunc_class != kfunc_class) { 1113 const char *flag_kfunc = st->irq.kfunc_class == IRQ_NATIVE_KFUNC ? "native" : "lock"; 1114 const char *used_kfunc = kfunc_class == IRQ_NATIVE_KFUNC ? "native" : "lock"; 1115 1116 verbose(env, "irq flag acquired by %s kfuncs cannot be restored with %s kfuncs\n", 1117 flag_kfunc, used_kfunc); 1118 return -EINVAL; 1119 } 1120 1121 err = release_irq_state(env->cur_state, st->id); 1122 WARN_ON_ONCE(err && err != -EACCES); 1123 if (err) { 1124 int insn_idx = 0; 1125 1126 for (int i = 0; i < env->cur_state->acquired_refs; i++) { 1127 if (env->cur_state->refs[i].id == env->cur_state->active_irq_id) { 1128 insn_idx = env->cur_state->refs[i].insn_idx; 1129 break; 1130 } 1131 } 1132 1133 verbose(env, "cannot restore irq state out of order, expected id=%d acquired at insn_idx=%d\n", 1134 env->cur_state->active_irq_id, insn_idx); 1135 return err; 1136 } 1137 1138 bpf_mark_reg_not_init(env, st); 1139 1140 for (i = 0; i < BPF_REG_SIZE; i++) 1141 slot->slot_type[i] = STACK_INVALID; 1142 1143 mark_stack_slot_scratched(env, spi); 1144 return 0; 1145 } 1146 1147 static bool is_irq_flag_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg) 1148 { 1149 struct bpf_func_state *state = bpf_func(env, reg); 1150 struct bpf_stack_state *slot; 1151 int spi, i; 1152 1153 /* For -ERANGE (i.e. spi not falling into allocated stack slots), we 1154 * will do check_mem_access to check and update stack bounds later, so 1155 * return true for that case. 1156 */ 1157 spi = irq_flag_get_spi(env, reg); 1158 if (spi == -ERANGE) 1159 return true; 1160 if (spi < 0) 1161 return false; 1162 1163 slot = &state->stack[spi]; 1164 1165 for (i = 0; i < BPF_REG_SIZE; i++) 1166 if (slot->slot_type[i] == STACK_IRQ_FLAG) 1167 return false; 1168 return true; 1169 } 1170 1171 static int is_irq_flag_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg) 1172 { 1173 struct bpf_func_state *state = bpf_func(env, reg); 1174 struct bpf_stack_state *slot; 1175 struct bpf_reg_state *st; 1176 int spi, i; 1177 1178 spi = irq_flag_get_spi(env, reg); 1179 if (spi < 0) 1180 return -EINVAL; 1181 1182 slot = &state->stack[spi]; 1183 st = &slot->spilled_ptr; 1184 1185 if (!st->id) 1186 return -EINVAL; 1187 1188 for (i = 0; i < BPF_REG_SIZE; i++) 1189 if (slot->slot_type[i] != STACK_IRQ_FLAG) 1190 return -EINVAL; 1191 return 0; 1192 } 1193 1194 /* Check if given stack slot is "special": 1195 * - spilled register state (STACK_SPILL); 1196 * - dynptr state (STACK_DYNPTR); 1197 * - iter state (STACK_ITER). 1198 * - irq flag state (STACK_IRQ_FLAG) 1199 */ 1200 static bool is_stack_slot_special(const struct bpf_stack_state *stack) 1201 { 1202 enum bpf_stack_slot_type type = stack->slot_type[BPF_REG_SIZE - 1]; 1203 1204 switch (type) { 1205 case STACK_SPILL: 1206 case STACK_DYNPTR: 1207 case STACK_ITER: 1208 case STACK_IRQ_FLAG: 1209 return true; 1210 case STACK_INVALID: 1211 case STACK_POISON: 1212 case STACK_MISC: 1213 case STACK_ZERO: 1214 return false; 1215 default: 1216 WARN_ONCE(1, "unknown stack slot type %d\n", type); 1217 return true; 1218 } 1219 } 1220 1221 /* The reg state of a pointer or a bounded scalar was saved when 1222 * it was spilled to the stack. 1223 */ 1224 1225 /* 1226 * Mark stack slot as STACK_MISC, unless it is already: 1227 * - STACK_INVALID, in which case they are equivalent. 1228 * - STACK_ZERO, in which case we preserve more precise STACK_ZERO. 1229 * - STACK_POISON, which truly forbids access to the slot. 1230 * Regardless of allow_ptr_leaks setting (i.e., privileged or unprivileged 1231 * mode), we won't promote STACK_INVALID to STACK_MISC. In privileged case it is 1232 * unnecessary as both are considered equivalent when loading data and pruning, 1233 * in case of unprivileged mode it will be incorrect to allow reads of invalid 1234 * slots. 1235 */ 1236 static void mark_stack_slot_misc(struct bpf_verifier_env *env, u8 *stype) 1237 { 1238 if (*stype == STACK_ZERO) 1239 return; 1240 if (*stype == STACK_INVALID || *stype == STACK_POISON) 1241 return; 1242 *stype = STACK_MISC; 1243 } 1244 1245 static void scrub_spilled_slot(u8 *stype) 1246 { 1247 if (*stype != STACK_INVALID && *stype != STACK_POISON) 1248 *stype = STACK_MISC; 1249 } 1250 1251 /* copy array src of length n * size bytes to dst. dst is reallocated if it's too 1252 * small to hold src. This is different from krealloc since we don't want to preserve 1253 * the contents of dst. 1254 * 1255 * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could 1256 * not be allocated. 1257 */ 1258 static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags) 1259 { 1260 size_t alloc_bytes; 1261 void *orig = dst; 1262 size_t bytes; 1263 1264 if (ZERO_OR_NULL_PTR(src)) 1265 goto out; 1266 1267 if (unlikely(check_mul_overflow(n, size, &bytes))) 1268 return NULL; 1269 1270 alloc_bytes = max(ksize(orig), kmalloc_size_roundup(bytes)); 1271 dst = krealloc(orig, alloc_bytes, flags); 1272 if (!dst) { 1273 kfree(orig); 1274 return NULL; 1275 } 1276 1277 memcpy(dst, src, bytes); 1278 out: 1279 return dst ? dst : ZERO_SIZE_PTR; 1280 } 1281 1282 /* resize an array from old_n items to new_n items. the array is reallocated if it's too 1283 * small to hold new_n items. new items are zeroed out if the array grows. 1284 * 1285 * Contrary to krealloc_array, does not free arr if new_n is zero. 1286 */ 1287 static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size) 1288 { 1289 size_t alloc_size; 1290 void *new_arr; 1291 1292 if (!new_n || old_n == new_n) 1293 goto out; 1294 1295 alloc_size = kmalloc_size_roundup(size_mul(new_n, size)); 1296 new_arr = krealloc(arr, alloc_size, GFP_KERNEL_ACCOUNT); 1297 if (!new_arr) { 1298 kfree(arr); 1299 return NULL; 1300 } 1301 arr = new_arr; 1302 1303 if (new_n > old_n) 1304 memset(arr + old_n * size, 0, (new_n - old_n) * size); 1305 1306 out: 1307 return arr ? arr : ZERO_SIZE_PTR; 1308 } 1309 1310 static int copy_reference_state(struct bpf_verifier_state *dst, const struct bpf_verifier_state *src) 1311 { 1312 dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs, 1313 sizeof(struct bpf_reference_state), GFP_KERNEL_ACCOUNT); 1314 if (!dst->refs) 1315 return -ENOMEM; 1316 1317 dst->acquired_refs = src->acquired_refs; 1318 dst->active_locks = src->active_locks; 1319 dst->active_preempt_locks = src->active_preempt_locks; 1320 dst->active_rcu_locks = src->active_rcu_locks; 1321 dst->active_irq_id = src->active_irq_id; 1322 dst->active_lock_id = src->active_lock_id; 1323 dst->active_lock_ptr = src->active_lock_ptr; 1324 return 0; 1325 } 1326 1327 static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src) 1328 { 1329 size_t n = src->allocated_stack / BPF_REG_SIZE; 1330 1331 dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state), 1332 GFP_KERNEL_ACCOUNT); 1333 if (!dst->stack) 1334 return -ENOMEM; 1335 1336 dst->allocated_stack = src->allocated_stack; 1337 1338 /* copy stack args state */ 1339 n = src->out_stack_arg_cnt; 1340 if (n) { 1341 dst->stack_arg_regs = copy_array(dst->stack_arg_regs, src->stack_arg_regs, n, 1342 sizeof(struct bpf_reg_state), 1343 GFP_KERNEL_ACCOUNT); 1344 if (!dst->stack_arg_regs) 1345 return -ENOMEM; 1346 } 1347 1348 dst->out_stack_arg_cnt = src->out_stack_arg_cnt; 1349 return 0; 1350 } 1351 1352 static int resize_reference_state(struct bpf_verifier_state *state, size_t n) 1353 { 1354 state->refs = realloc_array(state->refs, state->acquired_refs, n, 1355 sizeof(struct bpf_reference_state)); 1356 if (!state->refs) 1357 return -ENOMEM; 1358 1359 state->acquired_refs = n; 1360 return 0; 1361 } 1362 1363 /* Possibly update state->allocated_stack to be at least size bytes. Also 1364 * possibly update the function's high-water mark in its bpf_subprog_info. 1365 */ 1366 static int grow_stack_state(struct bpf_verifier_env *env, struct bpf_func_state *state, int size) 1367 { 1368 size_t old_n = state->allocated_stack / BPF_REG_SIZE, n; 1369 1370 /* The stack size is always a multiple of BPF_REG_SIZE. */ 1371 size = round_up(size, BPF_REG_SIZE); 1372 n = size / BPF_REG_SIZE; 1373 1374 if (old_n >= n) 1375 return 0; 1376 1377 state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state)); 1378 if (!state->stack) 1379 return -ENOMEM; 1380 1381 state->allocated_stack = size; 1382 1383 /* update known max for given subprogram */ 1384 if (env->subprog_info[state->subprogno].stack_depth < size) 1385 env->subprog_info[state->subprogno].stack_depth = size; 1386 1387 return 0; 1388 } 1389 1390 static int grow_stack_arg_slots(struct bpf_verifier_env *env, 1391 struct bpf_func_state *state, int cnt) 1392 { 1393 size_t old_n = state->out_stack_arg_cnt; 1394 1395 if (old_n >= cnt) 1396 return 0; 1397 1398 state->stack_arg_regs = realloc_array(state->stack_arg_regs, old_n, cnt, 1399 sizeof(struct bpf_reg_state)); 1400 if (!state->stack_arg_regs) 1401 return -ENOMEM; 1402 1403 state->out_stack_arg_cnt = cnt; 1404 return 0; 1405 } 1406 1407 /* Acquire a pointer id from the env and update the state->refs to include 1408 * this new pointer reference. 1409 * On success, returns a valid pointer id to associate with the register 1410 * On failure, returns a negative errno. 1411 */ 1412 static struct bpf_reference_state *acquire_reference_state(struct bpf_verifier_env *env, int insn_idx) 1413 { 1414 struct bpf_verifier_state *state = env->cur_state; 1415 int new_ofs = state->acquired_refs; 1416 int err; 1417 1418 err = resize_reference_state(state, state->acquired_refs + 1); 1419 if (err) 1420 return NULL; 1421 state->refs[new_ofs].insn_idx = insn_idx; 1422 1423 return &state->refs[new_ofs]; 1424 } 1425 1426 static int acquire_reference(struct bpf_verifier_env *env, int insn_idx, int parent_id) 1427 { 1428 struct bpf_reference_state *s; 1429 1430 s = acquire_reference_state(env, insn_idx); 1431 if (!s) 1432 return -ENOMEM; 1433 s->type = REF_TYPE_PTR; 1434 s->id = ++env->id_gen; 1435 s->parent_id = parent_id; 1436 return s->id; 1437 } 1438 1439 static int acquire_lock_state(struct bpf_verifier_env *env, int insn_idx, enum ref_state_type type, 1440 int id, void *ptr) 1441 { 1442 struct bpf_verifier_state *state = env->cur_state; 1443 struct bpf_reference_state *s; 1444 1445 s = acquire_reference_state(env, insn_idx); 1446 if (!s) 1447 return -ENOMEM; 1448 s->type = type; 1449 s->id = id; 1450 s->ptr = ptr; 1451 1452 state->active_locks++; 1453 state->active_lock_id = id; 1454 state->active_lock_ptr = ptr; 1455 return 0; 1456 } 1457 1458 static int acquire_irq_state(struct bpf_verifier_env *env, int insn_idx) 1459 { 1460 struct bpf_verifier_state *state = env->cur_state; 1461 struct bpf_reference_state *s; 1462 1463 s = acquire_reference_state(env, insn_idx); 1464 if (!s) 1465 return -ENOMEM; 1466 s->type = REF_TYPE_IRQ; 1467 s->id = ++env->id_gen; 1468 1469 state->active_irq_id = s->id; 1470 return s->id; 1471 } 1472 1473 static void release_reference_state(struct bpf_verifier_state *state, int idx) 1474 { 1475 int last_idx; 1476 size_t rem; 1477 1478 /* IRQ state requires the relative ordering of elements remaining the 1479 * same, since it relies on the refs array to behave as a stack, so that 1480 * it can detect out-of-order IRQ restore. Hence use memmove to shift 1481 * the array instead of swapping the final element into the deleted idx. 1482 */ 1483 last_idx = state->acquired_refs - 1; 1484 rem = state->acquired_refs - idx - 1; 1485 if (last_idx && idx != last_idx) 1486 memmove(&state->refs[idx], &state->refs[idx + 1], sizeof(*state->refs) * rem); 1487 memset(&state->refs[last_idx], 0, sizeof(*state->refs)); 1488 state->acquired_refs--; 1489 return; 1490 } 1491 1492 static bool find_reference_state(struct bpf_verifier_state *state, int id) 1493 { 1494 int i; 1495 1496 for (i = 0; i < state->acquired_refs; i++) { 1497 if (state->refs[i].type != REF_TYPE_PTR) 1498 continue; 1499 if (state->refs[i].id == id) 1500 return true; 1501 } 1502 1503 return false; 1504 } 1505 1506 static bool reg_is_referenced(struct bpf_verifier_env *env, const struct bpf_reg_state *reg) 1507 { 1508 return find_reference_state(env->cur_state, reg->id); 1509 } 1510 1511 static int release_lock_state(struct bpf_verifier_state *state, int type, int id, void *ptr) 1512 { 1513 void *prev_ptr = NULL; 1514 u32 prev_id = 0; 1515 int i; 1516 1517 for (i = 0; i < state->acquired_refs; i++) { 1518 if (state->refs[i].type == type && state->refs[i].id == id && 1519 state->refs[i].ptr == ptr) { 1520 release_reference_state(state, i); 1521 state->active_locks--; 1522 /* Reassign active lock (id, ptr). */ 1523 state->active_lock_id = prev_id; 1524 state->active_lock_ptr = prev_ptr; 1525 return 0; 1526 } 1527 if (state->refs[i].type & REF_TYPE_LOCK_MASK) { 1528 prev_id = state->refs[i].id; 1529 prev_ptr = state->refs[i].ptr; 1530 } 1531 } 1532 return -EINVAL; 1533 } 1534 1535 static int release_irq_state(struct bpf_verifier_state *state, int id) 1536 { 1537 u32 prev_id = 0; 1538 int i; 1539 1540 if (id != state->active_irq_id) 1541 return -EACCES; 1542 1543 for (i = 0; i < state->acquired_refs; i++) { 1544 if (state->refs[i].type != REF_TYPE_IRQ) 1545 continue; 1546 if (state->refs[i].id == id) { 1547 release_reference_state(state, i); 1548 state->active_irq_id = prev_id; 1549 return 0; 1550 } else { 1551 prev_id = state->refs[i].id; 1552 } 1553 } 1554 return -EINVAL; 1555 } 1556 1557 static struct bpf_reference_state *find_lock_state(struct bpf_verifier_state *state, enum ref_state_type type, 1558 int id, void *ptr) 1559 { 1560 int i; 1561 1562 for (i = 0; i < state->acquired_refs; i++) { 1563 struct bpf_reference_state *s = &state->refs[i]; 1564 1565 if (!(s->type & type)) 1566 continue; 1567 1568 if (s->id == id && s->ptr == ptr) 1569 return s; 1570 } 1571 return NULL; 1572 } 1573 1574 static void free_func_state(struct bpf_func_state *state) 1575 { 1576 if (!state) 1577 return; 1578 kfree(state->stack_arg_regs); 1579 kfree(state->stack); 1580 kfree(state); 1581 } 1582 1583 void bpf_clear_jmp_history(struct bpf_verifier_state *state) 1584 { 1585 kfree(state->jmp_history); 1586 state->jmp_history = NULL; 1587 state->jmp_history_cnt = 0; 1588 } 1589 1590 void bpf_free_verifier_state(struct bpf_verifier_state *state, 1591 bool free_self) 1592 { 1593 int i; 1594 1595 for (i = 0; i <= state->curframe; i++) { 1596 free_func_state(state->frame[i]); 1597 state->frame[i] = NULL; 1598 } 1599 kfree(state->refs); 1600 bpf_clear_jmp_history(state); 1601 if (free_self) 1602 kfree(state); 1603 } 1604 1605 /* copy verifier state from src to dst growing dst stack space 1606 * when necessary to accommodate larger src stack 1607 */ 1608 static int copy_func_state(struct bpf_func_state *dst, 1609 const struct bpf_func_state *src) 1610 { 1611 memcpy(dst, src, offsetof(struct bpf_func_state, stack)); 1612 return copy_stack_state(dst, src); 1613 } 1614 1615 int bpf_copy_verifier_state(struct bpf_verifier_state *dst_state, 1616 const struct bpf_verifier_state *src) 1617 { 1618 struct bpf_func_state *dst; 1619 int i, err; 1620 1621 dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history, 1622 src->jmp_history_cnt, sizeof(*dst_state->jmp_history), 1623 GFP_KERNEL_ACCOUNT); 1624 if (!dst_state->jmp_history) 1625 return -ENOMEM; 1626 dst_state->jmp_history_cnt = src->jmp_history_cnt; 1627 1628 /* if dst has more stack frames then src frame, free them, this is also 1629 * necessary in case of exceptional exits using bpf_throw. 1630 */ 1631 for (i = src->curframe + 1; i <= dst_state->curframe; i++) { 1632 free_func_state(dst_state->frame[i]); 1633 dst_state->frame[i] = NULL; 1634 } 1635 err = copy_reference_state(dst_state, src); 1636 if (err) 1637 return err; 1638 dst_state->speculative = src->speculative; 1639 dst_state->in_sleepable = src->in_sleepable; 1640 dst_state->curframe = src->curframe; 1641 dst_state->branches = src->branches; 1642 dst_state->parent = src->parent; 1643 dst_state->first_insn_idx = src->first_insn_idx; 1644 dst_state->last_insn_idx = src->last_insn_idx; 1645 dst_state->dfs_depth = src->dfs_depth; 1646 dst_state->callback_unroll_depth = src->callback_unroll_depth; 1647 dst_state->may_goto_depth = src->may_goto_depth; 1648 dst_state->equal_state = src->equal_state; 1649 for (i = 0; i <= src->curframe; i++) { 1650 dst = dst_state->frame[i]; 1651 if (!dst) { 1652 dst = kzalloc_obj(*dst, GFP_KERNEL_ACCOUNT); 1653 if (!dst) 1654 return -ENOMEM; 1655 dst_state->frame[i] = dst; 1656 } 1657 err = copy_func_state(dst, src->frame[i]); 1658 if (err) 1659 return err; 1660 } 1661 return 0; 1662 } 1663 1664 static u32 state_htab_size(struct bpf_verifier_env *env) 1665 { 1666 return env->prog->len; 1667 } 1668 1669 struct list_head *bpf_explored_state(struct bpf_verifier_env *env, int idx) 1670 { 1671 struct bpf_verifier_state *cur = env->cur_state; 1672 struct bpf_func_state *state = cur->frame[cur->curframe]; 1673 1674 return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)]; 1675 } 1676 1677 static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_state *b) 1678 { 1679 int fr; 1680 1681 if (a->curframe != b->curframe) 1682 return false; 1683 1684 for (fr = a->curframe; fr >= 0; fr--) 1685 if (a->frame[fr]->callsite != b->frame[fr]->callsite) 1686 return false; 1687 1688 return true; 1689 } 1690 1691 1692 void bpf_free_backedges(struct bpf_scc_visit *visit) 1693 { 1694 struct bpf_scc_backedge *backedge, *next; 1695 1696 for (backedge = visit->backedges; backedge; backedge = next) { 1697 bpf_free_verifier_state(&backedge->state, false); 1698 next = backedge->next; 1699 kfree(backedge); 1700 } 1701 visit->backedges = NULL; 1702 } 1703 1704 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, 1705 int *insn_idx, bool pop_log) 1706 { 1707 struct bpf_verifier_state *cur = env->cur_state; 1708 struct bpf_verifier_stack_elem *elem, *head = env->head; 1709 int err; 1710 1711 if (env->head == NULL) 1712 return -ENOENT; 1713 1714 if (cur) { 1715 err = bpf_copy_verifier_state(cur, &head->st); 1716 if (err) 1717 return err; 1718 } 1719 if (pop_log) 1720 bpf_vlog_reset(&env->log, head->log_pos); 1721 if (insn_idx) 1722 *insn_idx = head->insn_idx; 1723 if (prev_insn_idx) 1724 *prev_insn_idx = head->prev_insn_idx; 1725 elem = head->next; 1726 bpf_free_verifier_state(&head->st, false); 1727 kfree(head); 1728 env->head = elem; 1729 env->stack_size--; 1730 return 0; 1731 } 1732 1733 static bool error_recoverable_with_nospec(int err) 1734 { 1735 /* Should only return true for non-fatal errors that are allowed to 1736 * occur during speculative verification. For these we can insert a 1737 * nospec and the program might still be accepted. Do not include 1738 * something like ENOMEM because it is likely to re-occur for the next 1739 * architectural path once it has been recovered-from in all speculative 1740 * paths. 1741 */ 1742 return err == -EPERM || err == -EACCES || err == -EINVAL; 1743 } 1744 1745 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, 1746 int insn_idx, int prev_insn_idx, 1747 bool speculative) 1748 { 1749 struct bpf_verifier_state *cur = env->cur_state; 1750 struct bpf_verifier_stack_elem *elem; 1751 int err; 1752 1753 elem = kzalloc_obj(struct bpf_verifier_stack_elem, GFP_KERNEL_ACCOUNT); 1754 if (!elem) 1755 return ERR_PTR(-ENOMEM); 1756 1757 elem->insn_idx = insn_idx; 1758 elem->prev_insn_idx = prev_insn_idx; 1759 elem->next = env->head; 1760 elem->log_pos = env->log.end_pos; 1761 env->head = elem; 1762 env->stack_size++; 1763 err = bpf_copy_verifier_state(&elem->st, cur); 1764 if (err) 1765 return ERR_PTR(-ENOMEM); 1766 elem->st.speculative |= speculative; 1767 if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) { 1768 verbose(env, "The sequence of %d jumps is too complex.\n", 1769 env->stack_size); 1770 return ERR_PTR(-E2BIG); 1771 } 1772 if (elem->st.parent) { 1773 ++elem->st.parent->branches; 1774 /* WARN_ON(branches > 2) technically makes sense here, 1775 * but 1776 * 1. speculative states will bump 'branches' for non-branch 1777 * instructions 1778 * 2. is_state_visited() heuristics may decide not to create 1779 * a new state for a sequence of branches and all such current 1780 * and cloned states will be pointing to a single parent state 1781 * which might have large 'branches' count. 1782 */ 1783 } 1784 return &elem->st; 1785 } 1786 1787 static const char *reg_arg_name(struct bpf_verifier_env *env, argno_t argno) 1788 { 1789 char *buf = env->tmp_arg_name; 1790 int len = sizeof(env->tmp_arg_name); 1791 int arg, regno = reg_from_argno(argno); 1792 1793 if (regno >= 0) { 1794 snprintf(buf, len, "R%d", regno); 1795 } else { 1796 arg = arg_from_argno(argno); 1797 snprintf(buf, len, "*(R11-%u)", (arg - MAX_BPF_FUNC_REG_ARGS) * BPF_REG_SIZE); 1798 } 1799 1800 return buf; 1801 } 1802 1803 static const int caller_saved[CALLER_SAVED_REGS] = { 1804 BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5 1805 }; 1806 1807 /* This helper doesn't clear reg->id */ 1808 static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm) 1809 { 1810 reg->var_off = tnum_const(imm); 1811 reg->r64 = cnum64_from_urange(imm, imm); 1812 reg->r32 = cnum32_from_urange((u32)imm, (u32)imm); 1813 } 1814 1815 /* Mark the unknown part of a register (variable offset or scalar value) as 1816 * known to have the value @imm. 1817 */ 1818 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm) 1819 { 1820 /* Clear off and union(map_ptr, range) */ 1821 memset(((u8 *)reg) + sizeof(reg->type), 0, 1822 offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type)); 1823 reg->id = 0; 1824 reg->parent_id = 0; 1825 ___mark_reg_known(reg, imm); 1826 } 1827 1828 static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm) 1829 { 1830 reg->var_off = tnum_const_subreg(reg->var_off, imm); 1831 reg->r32 = cnum32_from_urange((u32)imm, (u32)imm); 1832 } 1833 1834 /* Mark the 'variable offset' part of a register as zero. This should be 1835 * used only on registers holding a pointer type. 1836 */ 1837 static void __mark_reg_known_zero(struct bpf_reg_state *reg) 1838 { 1839 __mark_reg_known(reg, 0); 1840 } 1841 1842 static void __mark_reg_const_zero(const struct bpf_verifier_env *env, struct bpf_reg_state *reg) 1843 { 1844 __mark_reg_known(reg, 0); 1845 reg->type = SCALAR_VALUE; 1846 /* all scalars are assumed imprecise initially (unless unprivileged, 1847 * in which case everything is forced to be precise) 1848 */ 1849 reg->precise = !env->bpf_capable; 1850 } 1851 1852 static void mark_reg_known_zero(struct bpf_verifier_env *env, 1853 struct bpf_reg_state *regs, u32 regno) 1854 { 1855 __mark_reg_known_zero(regs + regno); 1856 } 1857 1858 static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type type, 1859 bool first_slot, int id, int parent_id) 1860 { 1861 /* reg->type has no meaning for STACK_DYNPTR, but when we set reg for 1862 * callback arguments, it does need to be CONST_PTR_TO_DYNPTR, so simply 1863 * set it unconditionally as it is ignored for STACK_DYNPTR anyway. 1864 */ 1865 __mark_reg_known_zero(reg); 1866 reg->type = CONST_PTR_TO_DYNPTR; 1867 /* Give each dynptr a unique id to uniquely associate slices to it. */ 1868 reg->id = id; 1869 reg->parent_id = parent_id; 1870 reg->dynptr.type = type; 1871 reg->dynptr.first_slot = first_slot; 1872 } 1873 1874 static void mark_ptr_not_null_reg(struct bpf_reg_state *reg) 1875 { 1876 if (base_type(reg->type) == PTR_TO_MAP_VALUE) { 1877 const struct bpf_map *map = reg->map_ptr; 1878 1879 if (map->inner_map_meta) { 1880 reg->type = CONST_PTR_TO_MAP; 1881 reg->map_ptr = map->inner_map_meta; 1882 /* transfer reg's id which is unique for every map_lookup_elem 1883 * as UID of the inner map. 1884 */ 1885 if (btf_record_has_field(map->inner_map_meta->record, 1886 BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) { 1887 reg->map_uid = reg->id; 1888 } 1889 } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) { 1890 reg->type = PTR_TO_XDP_SOCK; 1891 } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP || 1892 map->map_type == BPF_MAP_TYPE_SOCKHASH) { 1893 reg->type = PTR_TO_SOCKET; 1894 } else { 1895 reg->type = PTR_TO_MAP_VALUE; 1896 } 1897 return; 1898 } 1899 1900 reg->type &= ~PTR_MAYBE_NULL; 1901 } 1902 1903 static void mark_reg_graph_node(struct bpf_reg_state *regs, u32 regno, 1904 struct btf_field_graph_root *ds_head) 1905 { 1906 __mark_reg_known(®s[regno], ds_head->node_offset); 1907 regs[regno].type = PTR_TO_BTF_ID | MEM_ALLOC; 1908 regs[regno].btf = ds_head->btf; 1909 regs[regno].btf_id = ds_head->value_btf_id; 1910 } 1911 1912 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg) 1913 { 1914 return type_is_pkt_pointer(reg->type); 1915 } 1916 1917 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg) 1918 { 1919 return reg_is_pkt_pointer(reg) || 1920 reg->type == PTR_TO_PACKET_END; 1921 } 1922 1923 static bool reg_is_dynptr_slice_pkt(const struct bpf_reg_state *reg) 1924 { 1925 return base_type(reg->type) == PTR_TO_MEM && 1926 (reg->type & 1927 (DYNPTR_TYPE_SKB | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META)); 1928 } 1929 1930 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */ 1931 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg, 1932 enum bpf_reg_type which) 1933 { 1934 /* The register can already have a range from prior markings. 1935 * This is fine as long as it hasn't been advanced from its 1936 * origin. 1937 */ 1938 return reg->type == which && 1939 reg->id == 0 && 1940 tnum_equals_const(reg->var_off, 0); 1941 } 1942 1943 static void __mark_reg32_unbounded(struct bpf_reg_state *reg) 1944 { 1945 reg->r32 = CNUM32_UNBOUNDED; 1946 } 1947 1948 static void __mark_reg64_unbounded(struct bpf_reg_state *reg) 1949 { 1950 reg->r64 = CNUM64_UNBOUNDED; 1951 } 1952 1953 /* Reset the min/max bounds of a register */ 1954 static void __mark_reg_unbounded(struct bpf_reg_state *reg) 1955 { 1956 __mark_reg64_unbounded(reg); 1957 __mark_reg32_unbounded(reg); 1958 } 1959 1960 static void reset_reg64_and_tnum(struct bpf_reg_state *reg) 1961 { 1962 __mark_reg64_unbounded(reg); 1963 reg->var_off = tnum_unknown; 1964 } 1965 1966 static void reset_reg32_and_tnum(struct bpf_reg_state *reg) 1967 { 1968 __mark_reg32_unbounded(reg); 1969 reg->var_off = tnum_unknown; 1970 } 1971 1972 static struct cnum32 cnum32_from_tnum(struct tnum tnum) 1973 { 1974 tnum = tnum_subreg(tnum); 1975 if ((tnum.mask & S32_MIN) || (tnum.value & S32_MIN)) 1976 /* min signed is max(sign bit) | min(other bits) */ 1977 /* max signed is min(sign bit) | max(other bits) */ 1978 return cnum32_from_srange(tnum.value | (tnum.mask & S32_MIN), 1979 tnum.value | (tnum.mask & S32_MAX)); 1980 else 1981 return cnum32_from_urange(tnum.value, (tnum.value | tnum.mask)); 1982 } 1983 1984 static struct cnum64 cnum64_from_tnum(struct tnum tnum) 1985 { 1986 if ((tnum.mask & S64_MIN) || (tnum.value & S64_MIN)) 1987 /* min signed is max(sign bit) | min(other bits) */ 1988 /* max signed is min(sign bit) | max(other bits) */ 1989 return cnum64_from_srange(tnum.value | (tnum.mask & S64_MIN), 1990 tnum.value | (tnum.mask & S64_MAX)); 1991 else 1992 return cnum64_from_urange(tnum.value, (tnum.value | tnum.mask)); 1993 } 1994 1995 static void __update_reg32_bounds(struct bpf_reg_state *reg) 1996 { 1997 cnum32_intersect_with(®->r32, cnum32_from_tnum(reg->var_off)); 1998 } 1999 2000 static void __update_reg64_bounds(struct bpf_reg_state *reg) 2001 { 2002 u64 tnum_next, tmax; 2003 bool umin_in_tnum; 2004 2005 cnum64_intersect_with(®->r64, cnum64_from_tnum(reg->var_off)); 2006 2007 /* Check if u64 and tnum overlap in a single value */ 2008 tnum_next = tnum_step(reg->var_off, reg_umin(reg)); 2009 umin_in_tnum = (reg_umin(reg) & ~reg->var_off.mask) == reg->var_off.value; 2010 tmax = reg->var_off.value | reg->var_off.mask; 2011 if (umin_in_tnum && tnum_next > reg_umax(reg)) { 2012 /* The u64 range and the tnum only overlap in umin. 2013 * u64: ---[xxxxxx]----- 2014 * tnum: --xx----------x- 2015 */ 2016 ___mark_reg_known(reg, reg_umin(reg)); 2017 } else if (!umin_in_tnum && tnum_next == tmax) { 2018 /* The u64 range and the tnum only overlap in the maximum value 2019 * represented by the tnum, called tmax. 2020 * u64: ---[xxxxxx]----- 2021 * tnum: xx-----x-------- 2022 */ 2023 ___mark_reg_known(reg, tmax); 2024 } else if (!umin_in_tnum && tnum_next <= reg_umax(reg) && 2025 tnum_step(reg->var_off, tnum_next) > reg_umax(reg)) { 2026 /* The u64 range and the tnum only overlap in between umin 2027 * (excluded) and umax. 2028 * u64: ---[xxxxxx]----- 2029 * tnum: xx----x-------x- 2030 */ 2031 ___mark_reg_known(reg, tnum_next); 2032 } 2033 } 2034 2035 static void __update_reg_bounds(struct bpf_reg_state *reg) 2036 { 2037 __update_reg32_bounds(reg); 2038 __update_reg64_bounds(reg); 2039 } 2040 2041 static void deduce_bounds_32_from_64(struct bpf_reg_state *reg) 2042 { 2043 cnum32_intersect_with(®->r32, cnum32_from_cnum64(reg->r64)); 2044 } 2045 2046 static void deduce_bounds_64_from_32(struct bpf_reg_state *reg) 2047 { 2048 reg->r64 = cnum64_cnum32_intersect(reg->r64, reg->r32); 2049 } 2050 2051 static void __reg_deduce_bounds(struct bpf_reg_state *reg) 2052 { 2053 deduce_bounds_32_from_64(reg); 2054 deduce_bounds_64_from_32(reg); 2055 } 2056 2057 /* Attempts to improve var_off based on unsigned min/max information */ 2058 static void __reg_bound_offset(struct bpf_reg_state *reg) 2059 { 2060 struct tnum var64_off = tnum_intersect(reg->var_off, 2061 tnum_range(reg_umin(reg), 2062 reg_umax(reg))); 2063 struct tnum var32_off = tnum_intersect(tnum_subreg(var64_off), 2064 tnum_range(reg_u32_min(reg), 2065 reg_u32_max(reg))); 2066 2067 reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off); 2068 } 2069 2070 static bool range_bounds_violation(struct bpf_reg_state *reg); 2071 2072 static void reg_bounds_sync(struct bpf_reg_state *reg) 2073 { 2074 /* If the input reg_state is invalid, we can exit early */ 2075 if (range_bounds_violation(reg)) 2076 return; 2077 /* We might have learned new bounds from the var_off. */ 2078 __update_reg_bounds(reg); 2079 /* We might have learned something about the sign bit. */ 2080 __reg_deduce_bounds(reg); 2081 __reg_deduce_bounds(reg); 2082 /* We might have learned some bits from the bounds. */ 2083 __reg_bound_offset(reg); 2084 /* Intersecting with the old var_off might have improved our bounds 2085 * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc), 2086 * then new var_off is (0; 0x7f...fc) which improves our umax. 2087 */ 2088 __update_reg_bounds(reg); 2089 } 2090 2091 static bool const_tnum_range_mismatch(struct bpf_reg_state *reg) 2092 { 2093 if (!tnum_is_const(reg->var_off)) 2094 return false; 2095 2096 return !cnum64_is_const(reg->r64) || reg->r64.base != reg->var_off.value; 2097 } 2098 2099 static bool const_tnum_range_mismatch_32(struct bpf_reg_state *reg) 2100 { 2101 if (!tnum_subreg_is_const(reg->var_off)) 2102 return false; 2103 2104 return !cnum32_is_const(reg->r32) || reg->r32.base != tnum_subreg(reg->var_off).value; 2105 } 2106 2107 static bool range_bounds_violation(struct bpf_reg_state *reg) 2108 { 2109 return cnum32_is_empty(reg->r32) || cnum64_is_empty(reg->r64); 2110 } 2111 2112 static int reg_bounds_sanity_check(struct bpf_verifier_env *env, 2113 struct bpf_reg_state *reg, const char *ctx) 2114 { 2115 const char *msg; 2116 2117 if (range_bounds_violation(reg)) { 2118 msg = "range bounds violation"; 2119 goto out; 2120 } 2121 2122 if (const_tnum_range_mismatch(reg)) { 2123 msg = "const tnum out of sync with range bounds"; 2124 goto out; 2125 } 2126 2127 if (const_tnum_range_mismatch_32(reg)) { 2128 msg = "const subreg tnum out of sync with range bounds"; 2129 goto out; 2130 } 2131 2132 return 0; 2133 out: 2134 verifier_bug(env, "REG INVARIANTS VIOLATION (%s): %s r64={.base=%#llx, .size=%#llx} " 2135 "r32={.base=%#x, .size=%#x} var_off=(%#llx, %#llx)", 2136 ctx, msg, 2137 reg->r64.base, reg->r64.size, 2138 reg->r32.base, reg->r32.size, 2139 reg->var_off.value, reg->var_off.mask); 2140 if (env->test_reg_invariants) 2141 return -EFAULT; 2142 __mark_reg_unbounded(reg); 2143 return 0; 2144 } 2145 2146 /* Mark a register as having a completely unknown (scalar) value. */ 2147 void bpf_mark_reg_unknown_imprecise(struct bpf_reg_state *reg) 2148 { 2149 s32 subreg_def = reg->subreg_def; 2150 2151 memset(reg, 0, sizeof(*reg)); 2152 reg->type = SCALAR_VALUE; 2153 reg->var_off = tnum_unknown; 2154 reg->subreg_def = subreg_def; 2155 __mark_reg_unbounded(reg); 2156 } 2157 2158 /* Mark a register as having a completely unknown (scalar) value, 2159 * initialize .precise as true when not bpf capable. 2160 */ 2161 static void __mark_reg_unknown(const struct bpf_verifier_env *env, 2162 struct bpf_reg_state *reg) 2163 { 2164 bpf_mark_reg_unknown_imprecise(reg); 2165 reg->precise = !env->bpf_capable; 2166 } 2167 2168 static void mark_reg_unknown(struct bpf_verifier_env *env, 2169 struct bpf_reg_state *regs, u32 regno) 2170 { 2171 __mark_reg_unknown(env, regs + regno); 2172 } 2173 2174 static int __mark_reg_s32_range(struct bpf_verifier_env *env, 2175 struct bpf_reg_state *regs, 2176 u32 regno, 2177 s32 s32_min, 2178 s32 s32_max) 2179 { 2180 struct bpf_reg_state *reg = regs + regno; 2181 2182 reg_set_srange32(reg, 2183 max_t(s32, reg_s32_min(reg), s32_min), 2184 min_t(s32, reg_s32_max(reg), s32_max)); 2185 reg_set_srange64(reg, 2186 max_t(s64, reg_smin(reg), s32_min), 2187 min_t(s64, reg_smax(reg), s32_max)); 2188 2189 reg_bounds_sync(reg); 2190 2191 return reg_bounds_sanity_check(env, reg, "s32_range"); 2192 } 2193 2194 void bpf_mark_reg_not_init(const struct bpf_verifier_env *env, 2195 struct bpf_reg_state *reg) 2196 { 2197 __mark_reg_unknown(env, reg); 2198 reg->type = NOT_INIT; 2199 } 2200 2201 static int mark_btf_ld_reg(struct bpf_verifier_env *env, 2202 struct bpf_reg_state *regs, u32 regno, 2203 enum bpf_reg_type reg_type, 2204 struct btf *btf, u32 btf_id, 2205 enum bpf_type_flag flag) 2206 { 2207 switch (reg_type) { 2208 case SCALAR_VALUE: 2209 mark_reg_unknown(env, regs, regno); 2210 return 0; 2211 case PTR_TO_BTF_ID: 2212 mark_reg_known_zero(env, regs, regno); 2213 regs[regno].type = PTR_TO_BTF_ID | flag; 2214 regs[regno].btf = btf; 2215 regs[regno].btf_id = btf_id; 2216 if (type_may_be_null(flag)) 2217 regs[regno].id = ++env->id_gen; 2218 return 0; 2219 case PTR_TO_MEM: 2220 mark_reg_known_zero(env, regs, regno); 2221 regs[regno].type = PTR_TO_MEM | flag; 2222 regs[regno].mem_size = 0; 2223 return 0; 2224 default: 2225 verifier_bug(env, "unexpected reg_type %d in %s\n", reg_type, __func__); 2226 return -EFAULT; 2227 } 2228 } 2229 2230 #define DEF_NOT_SUBREG (0) 2231 static void init_reg_state(struct bpf_verifier_env *env, 2232 struct bpf_func_state *state) 2233 { 2234 struct bpf_reg_state *regs = state->regs; 2235 int i; 2236 2237 for (i = 0; i < MAX_BPF_REG; i++) { 2238 bpf_mark_reg_not_init(env, ®s[i]); 2239 regs[i].subreg_def = DEF_NOT_SUBREG; 2240 } 2241 2242 /* frame pointer */ 2243 regs[BPF_REG_FP].type = PTR_TO_STACK; 2244 mark_reg_known_zero(env, regs, BPF_REG_FP); 2245 regs[BPF_REG_FP].frameno = state->frameno; 2246 } 2247 2248 static struct bpf_retval_range retval_range(s32 minval, s32 maxval) 2249 { 2250 /* 2251 * return_32bit is set to false by default and set explicitly 2252 * by the caller when necessary. 2253 */ 2254 return (struct bpf_retval_range){ minval, maxval, false }; 2255 } 2256 2257 static void init_func_state(struct bpf_verifier_env *env, 2258 struct bpf_func_state *state, 2259 int callsite, int frameno, int subprogno) 2260 { 2261 state->callsite = callsite; 2262 state->frameno = frameno; 2263 state->subprogno = subprogno; 2264 state->callback_ret_range = retval_range(0, 0); 2265 init_reg_state(env, state); 2266 mark_verifier_state_scratched(env); 2267 } 2268 2269 /* Similar to push_stack(), but for async callbacks */ 2270 static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env, 2271 int insn_idx, int prev_insn_idx, 2272 int subprog, bool is_sleepable) 2273 { 2274 struct bpf_verifier_stack_elem *elem; 2275 struct bpf_func_state *frame; 2276 2277 elem = kzalloc_obj(struct bpf_verifier_stack_elem, GFP_KERNEL_ACCOUNT); 2278 if (!elem) 2279 return ERR_PTR(-ENOMEM); 2280 2281 elem->insn_idx = insn_idx; 2282 elem->prev_insn_idx = prev_insn_idx; 2283 elem->next = env->head; 2284 elem->log_pos = env->log.end_pos; 2285 env->head = elem; 2286 env->stack_size++; 2287 if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) { 2288 verbose(env, 2289 "The sequence of %d jumps is too complex for async cb.\n", 2290 env->stack_size); 2291 return ERR_PTR(-E2BIG); 2292 } 2293 /* Unlike push_stack() do not bpf_copy_verifier_state(). 2294 * The caller state doesn't matter. 2295 * This is async callback. It starts in a fresh stack. 2296 * Initialize it similar to do_check_common(). 2297 */ 2298 elem->st.branches = 1; 2299 elem->st.in_sleepable = is_sleepable; 2300 frame = kzalloc_obj(*frame, GFP_KERNEL_ACCOUNT); 2301 if (!frame) 2302 return ERR_PTR(-ENOMEM); 2303 init_func_state(env, frame, 2304 BPF_MAIN_FUNC /* callsite */, 2305 0 /* frameno within this callchain */, 2306 subprog /* subprog number within this prog */); 2307 elem->st.frame[0] = frame; 2308 return &elem->st; 2309 } 2310 2311 2312 static int cmp_subprogs(const void *a, const void *b) 2313 { 2314 return ((struct bpf_subprog_info *)a)->start - 2315 ((struct bpf_subprog_info *)b)->start; 2316 } 2317 2318 /* Find subprogram that contains instruction at 'off' */ 2319 struct bpf_subprog_info *bpf_find_containing_subprog(struct bpf_verifier_env *env, int off) 2320 { 2321 struct bpf_subprog_info *vals = env->subprog_info; 2322 int l, r, m; 2323 2324 if (off >= env->prog->len || off < 0 || env->subprog_cnt == 0) 2325 return NULL; 2326 2327 l = 0; 2328 r = env->subprog_cnt - 1; 2329 while (l < r) { 2330 m = l + (r - l + 1) / 2; 2331 if (vals[m].start <= off) 2332 l = m; 2333 else 2334 r = m - 1; 2335 } 2336 return &vals[l]; 2337 } 2338 2339 /* Find subprogram that starts exactly at 'off' */ 2340 int bpf_find_subprog(struct bpf_verifier_env *env, int off) 2341 { 2342 struct bpf_subprog_info *p; 2343 2344 p = bpf_find_containing_subprog(env, off); 2345 if (!p || p->start != off) 2346 return -ENOENT; 2347 return p - env->subprog_info; 2348 } 2349 2350 static int add_subprog(struct bpf_verifier_env *env, int off) 2351 { 2352 int insn_cnt = env->prog->len; 2353 int ret; 2354 2355 if (off >= insn_cnt || off < 0) { 2356 verbose(env, "call to invalid destination\n"); 2357 return -EINVAL; 2358 } 2359 ret = bpf_find_subprog(env, off); 2360 if (ret >= 0) 2361 return ret; 2362 if (env->subprog_cnt >= BPF_MAX_SUBPROGS) { 2363 verbose(env, "too many subprograms\n"); 2364 return -E2BIG; 2365 } 2366 /* determine subprog starts. The end is one before the next starts */ 2367 env->subprog_info[env->subprog_cnt++].start = off; 2368 sort(env->subprog_info, env->subprog_cnt, 2369 sizeof(env->subprog_info[0]), cmp_subprogs, NULL); 2370 return env->subprog_cnt - 1; 2371 } 2372 2373 static int bpf_find_exception_callback_insn_off(struct bpf_verifier_env *env) 2374 { 2375 struct bpf_prog_aux *aux = env->prog->aux; 2376 struct btf *btf = aux->btf; 2377 const struct btf_type *t; 2378 u32 main_btf_id, id; 2379 const char *name; 2380 int ret, i; 2381 2382 /* Non-zero func_info_cnt implies valid btf */ 2383 if (!aux->func_info_cnt) 2384 return 0; 2385 main_btf_id = aux->func_info[0].type_id; 2386 2387 t = btf_type_by_id(btf, main_btf_id); 2388 if (!t) { 2389 verbose(env, "invalid btf id for main subprog in func_info\n"); 2390 return -EINVAL; 2391 } 2392 2393 name = btf_find_decl_tag_value(btf, t, -1, "exception_callback:"); 2394 if (IS_ERR(name)) { 2395 ret = PTR_ERR(name); 2396 /* If there is no tag present, there is no exception callback */ 2397 if (ret == -ENOENT) 2398 ret = 0; 2399 else if (ret == -EEXIST) 2400 verbose(env, "multiple exception callback tags for main subprog\n"); 2401 return ret; 2402 } 2403 2404 ret = btf_find_by_name_kind(btf, name, BTF_KIND_FUNC); 2405 if (ret < 0) { 2406 verbose(env, "exception callback '%s' could not be found in BTF\n", name); 2407 return ret; 2408 } 2409 id = ret; 2410 t = btf_type_by_id(btf, id); 2411 if (btf_func_linkage(t) != BTF_FUNC_GLOBAL) { 2412 verbose(env, "exception callback '%s' must have global linkage\n", name); 2413 return -EINVAL; 2414 } 2415 ret = 0; 2416 for (i = 0; i < aux->func_info_cnt; i++) { 2417 if (aux->func_info[i].type_id != id) 2418 continue; 2419 ret = aux->func_info[i].insn_off; 2420 /* Further func_info and subprog checks will also happen 2421 * later, so assume this is the right insn_off for now. 2422 */ 2423 if (!ret) { 2424 verbose(env, "invalid exception callback insn_off in func_info: 0\n"); 2425 ret = -EINVAL; 2426 } 2427 } 2428 if (!ret) { 2429 verbose(env, "exception callback type id not found in func_info\n"); 2430 ret = -EINVAL; 2431 } 2432 return ret; 2433 } 2434 2435 #define MAX_KFUNC_BTFS 256 2436 2437 struct bpf_kfunc_btf { 2438 struct btf *btf; 2439 struct module *module; 2440 u16 offset; 2441 }; 2442 2443 struct bpf_kfunc_btf_tab { 2444 struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS]; 2445 u32 nr_descs; 2446 }; 2447 2448 static int kfunc_desc_cmp_by_id_off(const void *a, const void *b) 2449 { 2450 const struct bpf_kfunc_desc *d0 = a; 2451 const struct bpf_kfunc_desc *d1 = b; 2452 2453 /* func_id is not greater than BTF_MAX_TYPE */ 2454 return d0->func_id - d1->func_id ?: d0->offset - d1->offset; 2455 } 2456 2457 static int kfunc_btf_cmp_by_off(const void *a, const void *b) 2458 { 2459 const struct bpf_kfunc_btf *d0 = a; 2460 const struct bpf_kfunc_btf *d1 = b; 2461 2462 return d0->offset - d1->offset; 2463 } 2464 2465 static struct bpf_kfunc_desc * 2466 find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset) 2467 { 2468 struct bpf_kfunc_desc desc = { 2469 .func_id = func_id, 2470 .offset = offset, 2471 }; 2472 struct bpf_kfunc_desc_tab *tab; 2473 2474 tab = prog->aux->kfunc_tab; 2475 return bsearch(&desc, tab->descs, tab->nr_descs, 2476 sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off); 2477 } 2478 2479 int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id, 2480 u16 btf_fd_idx, u8 **func_addr) 2481 { 2482 const struct bpf_kfunc_desc *desc; 2483 2484 desc = find_kfunc_desc(prog, func_id, btf_fd_idx); 2485 if (!desc) 2486 return -EFAULT; 2487 2488 *func_addr = (u8 *)desc->addr; 2489 return 0; 2490 } 2491 2492 static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env, 2493 s16 offset) 2494 { 2495 struct bpf_kfunc_btf kf_btf = { .offset = offset }; 2496 struct bpf_kfunc_btf_tab *tab; 2497 struct bpf_kfunc_btf *b; 2498 struct module *mod; 2499 struct btf *btf; 2500 int btf_fd; 2501 2502 tab = env->prog->aux->kfunc_btf_tab; 2503 b = bsearch(&kf_btf, tab->descs, tab->nr_descs, 2504 sizeof(tab->descs[0]), kfunc_btf_cmp_by_off); 2505 if (!b) { 2506 if (tab->nr_descs == MAX_KFUNC_BTFS) { 2507 verbose(env, "too many different module BTFs\n"); 2508 return ERR_PTR(-E2BIG); 2509 } 2510 2511 if (bpfptr_is_null(env->fd_array)) { 2512 verbose(env, "kfunc offset > 0 without fd_array is invalid\n"); 2513 return ERR_PTR(-EPROTO); 2514 } 2515 2516 if (copy_from_bpfptr_offset(&btf_fd, env->fd_array, 2517 offset * sizeof(btf_fd), 2518 sizeof(btf_fd))) 2519 return ERR_PTR(-EFAULT); 2520 2521 btf = btf_get_by_fd(btf_fd); 2522 if (IS_ERR(btf)) { 2523 verbose(env, "invalid module BTF fd specified\n"); 2524 return btf; 2525 } 2526 2527 if (!btf_is_module(btf)) { 2528 verbose(env, "BTF fd for kfunc is not a module BTF\n"); 2529 btf_put(btf); 2530 return ERR_PTR(-EINVAL); 2531 } 2532 2533 mod = btf_try_get_module(btf); 2534 if (!mod) { 2535 btf_put(btf); 2536 return ERR_PTR(-ENXIO); 2537 } 2538 2539 b = &tab->descs[tab->nr_descs++]; 2540 b->btf = btf; 2541 b->module = mod; 2542 b->offset = offset; 2543 2544 /* sort() reorders entries by value, so b may no longer point 2545 * to the right entry after this 2546 */ 2547 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), 2548 kfunc_btf_cmp_by_off, NULL); 2549 } else { 2550 btf = b->btf; 2551 } 2552 2553 return btf; 2554 } 2555 2556 void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab) 2557 { 2558 if (!tab) 2559 return; 2560 2561 while (tab->nr_descs--) { 2562 module_put(tab->descs[tab->nr_descs].module); 2563 btf_put(tab->descs[tab->nr_descs].btf); 2564 } 2565 kfree(tab); 2566 } 2567 2568 static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset) 2569 { 2570 if (offset) { 2571 if (offset < 0) { 2572 /* In the future, this can be allowed to increase limit 2573 * of fd index into fd_array, interpreted as u16. 2574 */ 2575 verbose(env, "negative offset disallowed for kernel module function call\n"); 2576 return ERR_PTR(-EINVAL); 2577 } 2578 2579 return __find_kfunc_desc_btf(env, offset); 2580 } 2581 return btf_vmlinux ?: ERR_PTR(-ENOENT); 2582 } 2583 2584 #define KF_IMPL_SUFFIX "_impl" 2585 2586 static const struct btf_type *find_kfunc_impl_proto(struct bpf_verifier_env *env, 2587 struct btf *btf, 2588 const char *func_name) 2589 { 2590 char *buf = env->tmp_str_buf; 2591 const struct btf_type *func; 2592 s32 impl_id; 2593 int len; 2594 2595 len = snprintf(buf, TMP_STR_BUF_LEN, "%s%s", func_name, KF_IMPL_SUFFIX); 2596 if (len < 0 || len >= TMP_STR_BUF_LEN) { 2597 verbose(env, "function name %s%s is too long\n", func_name, KF_IMPL_SUFFIX); 2598 return NULL; 2599 } 2600 2601 impl_id = btf_find_by_name_kind(btf, buf, BTF_KIND_FUNC); 2602 if (impl_id <= 0) { 2603 verbose(env, "cannot find function %s in BTF\n", buf); 2604 return NULL; 2605 } 2606 2607 func = btf_type_by_id(btf, impl_id); 2608 2609 return btf_type_by_id(btf, func->type); 2610 } 2611 2612 static int fetch_kfunc_meta(struct bpf_verifier_env *env, 2613 s32 func_id, 2614 s16 offset, 2615 struct bpf_kfunc_meta *kfunc) 2616 { 2617 const struct btf_type *func, *func_proto; 2618 const char *func_name; 2619 u32 *kfunc_flags; 2620 struct btf *btf; 2621 2622 if (func_id <= 0) { 2623 verbose(env, "invalid kernel function btf_id %d\n", func_id); 2624 return -EINVAL; 2625 } 2626 2627 btf = find_kfunc_desc_btf(env, offset); 2628 if (IS_ERR(btf)) { 2629 verbose(env, "failed to find BTF for kernel function\n"); 2630 return PTR_ERR(btf); 2631 } 2632 2633 /* 2634 * Note that kfunc_flags may be NULL at this point, which 2635 * means that we couldn't find func_id in any relevant 2636 * kfunc_id_set. This most likely indicates an invalid kfunc 2637 * call. However we don't fail with an error here, 2638 * and let the caller decide what to do with NULL kfunc->flags. 2639 */ 2640 kfunc_flags = btf_kfunc_flags(btf, func_id, env->prog); 2641 2642 func = btf_type_by_id(btf, func_id); 2643 if (!func || !btf_type_is_func(func)) { 2644 verbose(env, "kernel btf_id %d is not a function\n", func_id); 2645 return -EINVAL; 2646 } 2647 2648 func_name = btf_name_by_offset(btf, func->name_off); 2649 2650 /* 2651 * An actual prototype of a kfunc with KF_IMPLICIT_ARGS flag 2652 * can be found through the counterpart _impl kfunc. 2653 */ 2654 if (kfunc_flags && (*kfunc_flags & KF_IMPLICIT_ARGS)) 2655 func_proto = find_kfunc_impl_proto(env, btf, func_name); 2656 else 2657 func_proto = btf_type_by_id(btf, func->type); 2658 2659 if (!func_proto || !btf_type_is_func_proto(func_proto)) { 2660 verbose(env, "kernel function btf_id %d does not have a valid func_proto\n", 2661 func_id); 2662 return -EINVAL; 2663 } 2664 2665 memset(kfunc, 0, sizeof(*kfunc)); 2666 kfunc->btf = btf; 2667 kfunc->id = func_id; 2668 kfunc->name = func_name; 2669 kfunc->proto = func_proto; 2670 kfunc->flags = kfunc_flags; 2671 2672 return 0; 2673 } 2674 2675 int bpf_add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, u16 offset) 2676 { 2677 struct bpf_kfunc_btf_tab *btf_tab; 2678 struct btf_func_model func_model; 2679 struct bpf_kfunc_desc_tab *tab; 2680 struct bpf_prog_aux *prog_aux; 2681 struct bpf_kfunc_meta kfunc; 2682 struct bpf_kfunc_desc *desc; 2683 unsigned long addr; 2684 int err; 2685 2686 prog_aux = env->prog->aux; 2687 tab = prog_aux->kfunc_tab; 2688 btf_tab = prog_aux->kfunc_btf_tab; 2689 if (!tab) { 2690 if (!btf_vmlinux) { 2691 verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n"); 2692 return -ENOTSUPP; 2693 } 2694 2695 if (!env->prog->jit_requested) { 2696 verbose(env, "JIT is required for calling kernel function\n"); 2697 return -ENOTSUPP; 2698 } 2699 2700 if (!bpf_jit_supports_kfunc_call()) { 2701 verbose(env, "JIT does not support calling kernel function\n"); 2702 return -ENOTSUPP; 2703 } 2704 2705 if (!env->prog->gpl_compatible) { 2706 verbose(env, "cannot call kernel function from non-GPL compatible program\n"); 2707 return -EINVAL; 2708 } 2709 2710 tab = kzalloc_obj(*tab, GFP_KERNEL_ACCOUNT); 2711 if (!tab) 2712 return -ENOMEM; 2713 prog_aux->kfunc_tab = tab; 2714 } 2715 2716 /* func_id == 0 is always invalid, but instead of returning an error, be 2717 * conservative and wait until the code elimination pass before returning 2718 * error, so that invalid calls that get pruned out can be in BPF programs 2719 * loaded from userspace. It is also required that offset be untouched 2720 * for such calls. 2721 */ 2722 if (!func_id && !offset) 2723 return 0; 2724 2725 if (!btf_tab && offset) { 2726 btf_tab = kzalloc_obj(*btf_tab, GFP_KERNEL_ACCOUNT); 2727 if (!btf_tab) 2728 return -ENOMEM; 2729 prog_aux->kfunc_btf_tab = btf_tab; 2730 } 2731 2732 if (find_kfunc_desc(env->prog, func_id, offset)) 2733 return 0; 2734 2735 if (tab->nr_descs == MAX_KFUNC_DESCS) { 2736 verbose(env, "too many different kernel function calls\n"); 2737 return -E2BIG; 2738 } 2739 2740 err = fetch_kfunc_meta(env, func_id, offset, &kfunc); 2741 if (err) 2742 return err; 2743 2744 addr = kallsyms_lookup_name(kfunc.name); 2745 if (!addr) { 2746 verbose(env, "cannot find address for kernel function %s\n", kfunc.name); 2747 return -EINVAL; 2748 } 2749 2750 if (bpf_dev_bound_kfunc_id(func_id)) { 2751 err = bpf_dev_bound_kfunc_check(&env->log, prog_aux); 2752 if (err) 2753 return err; 2754 } 2755 2756 err = btf_distill_func_proto(&env->log, kfunc.btf, kfunc.proto, kfunc.name, &func_model); 2757 if (err) 2758 return err; 2759 2760 desc = &tab->descs[tab->nr_descs++]; 2761 desc->func_id = func_id; 2762 desc->offset = offset; 2763 desc->addr = addr; 2764 desc->func_model = func_model; 2765 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), 2766 kfunc_desc_cmp_by_id_off, NULL); 2767 return 0; 2768 } 2769 2770 bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog) 2771 { 2772 return !!prog->aux->kfunc_tab; 2773 } 2774 2775 static int add_subprog_and_kfunc(struct bpf_verifier_env *env) 2776 { 2777 struct bpf_subprog_info *subprog = env->subprog_info; 2778 int i, ret, insn_cnt = env->prog->len, ex_cb_insn; 2779 struct bpf_insn *insn = env->prog->insnsi; 2780 2781 /* Add entry function. */ 2782 ret = add_subprog(env, 0); 2783 if (ret) 2784 return ret; 2785 2786 for (i = 0; i < insn_cnt; i++, insn++) { 2787 if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) && 2788 !bpf_pseudo_kfunc_call(insn)) 2789 continue; 2790 2791 if (!env->bpf_capable) { 2792 verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n"); 2793 return -EPERM; 2794 } 2795 2796 if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn)) 2797 ret = add_subprog(env, i + insn->imm + 1); 2798 else 2799 ret = bpf_add_kfunc_call(env, insn->imm, insn->off); 2800 2801 if (ret < 0) 2802 return ret; 2803 } 2804 2805 ret = bpf_find_exception_callback_insn_off(env); 2806 if (ret < 0) 2807 return ret; 2808 ex_cb_insn = ret; 2809 2810 /* If ex_cb_insn > 0, this means that the main program has a subprog 2811 * marked using BTF decl tag to serve as the exception callback. 2812 */ 2813 if (ex_cb_insn) { 2814 ret = add_subprog(env, ex_cb_insn); 2815 if (ret < 0) 2816 return ret; 2817 for (i = 1; i < env->subprog_cnt; i++) { 2818 if (env->subprog_info[i].start != ex_cb_insn) 2819 continue; 2820 env->exception_callback_subprog = i; 2821 bpf_mark_subprog_exc_cb(env, i); 2822 break; 2823 } 2824 } 2825 2826 /* Add a fake 'exit' subprog which could simplify subprog iteration 2827 * logic. 'subprog_cnt' should not be increased. 2828 */ 2829 subprog[env->subprog_cnt].start = insn_cnt; 2830 2831 if (env->log.level & BPF_LOG_LEVEL2) 2832 for (i = 0; i < env->subprog_cnt; i++) 2833 verbose(env, "func#%d @%d\n", i, subprog[i].start); 2834 2835 return 0; 2836 } 2837 2838 static int check_subprogs(struct bpf_verifier_env *env) 2839 { 2840 int i, subprog_start, subprog_end, off, cur_subprog = 0; 2841 struct bpf_subprog_info *subprog = env->subprog_info; 2842 struct bpf_insn *insn = env->prog->insnsi; 2843 int insn_cnt = env->prog->len; 2844 2845 /* now check that all jumps are within the same subprog */ 2846 subprog_start = subprog[cur_subprog].start; 2847 subprog_end = subprog[cur_subprog + 1].start; 2848 for (i = 0; i < insn_cnt; i++) { 2849 u8 code = insn[i].code; 2850 2851 if (code == (BPF_JMP | BPF_CALL) && 2852 insn[i].src_reg == 0 && 2853 insn[i].imm == BPF_FUNC_tail_call) { 2854 subprog[cur_subprog].has_tail_call = true; 2855 subprog[cur_subprog].tail_call_reachable = true; 2856 } 2857 if (BPF_CLASS(code) == BPF_LD && 2858 (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND)) 2859 subprog[cur_subprog].has_ld_abs = true; 2860 if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) 2861 goto next; 2862 if (BPF_OP(code) == BPF_CALL) 2863 goto next; 2864 if (BPF_OP(code) == BPF_EXIT) { 2865 subprog[cur_subprog].exit_idx = i; 2866 goto next; 2867 } 2868 off = i + bpf_jmp_offset(&insn[i]) + 1; 2869 if (off < subprog_start || off >= subprog_end) { 2870 verbose(env, "jump out of range from insn %d to %d\n", i, off); 2871 return -EINVAL; 2872 } 2873 next: 2874 if (i == subprog_end - 1) { 2875 /* to avoid fall-through from one subprog into another 2876 * the last insn of the subprog should be either exit 2877 * or unconditional jump back or bpf_throw call 2878 */ 2879 if (code != (BPF_JMP | BPF_EXIT) && 2880 code != (BPF_JMP32 | BPF_JA) && 2881 code != (BPF_JMP | BPF_JA)) { 2882 verbose(env, "last insn is not an exit or jmp\n"); 2883 return -EINVAL; 2884 } 2885 subprog_start = subprog_end; 2886 cur_subprog++; 2887 if (cur_subprog < env->subprog_cnt) 2888 subprog_end = subprog[cur_subprog + 1].start; 2889 } 2890 } 2891 return 0; 2892 } 2893 2894 /* 2895 * Sort subprogs in topological order so that leaf subprogs come first and 2896 * their callers come later. This is a DFS post-order traversal of the call 2897 * graph. Scan only reachable instructions (those in the computed postorder) of 2898 * the current subprog to discover callees (direct subprogs and sync 2899 * callbacks). 2900 */ 2901 static int sort_subprogs_topo(struct bpf_verifier_env *env) 2902 { 2903 struct bpf_subprog_info *si = env->subprog_info; 2904 int *insn_postorder = env->cfg.insn_postorder; 2905 struct bpf_insn *insn = env->prog->insnsi; 2906 int cnt = env->subprog_cnt; 2907 int *dfs_stack = NULL; 2908 int top = 0, order = 0; 2909 int i, ret = 0; 2910 u8 *color = NULL; 2911 2912 color = kvzalloc_objs(*color, cnt, GFP_KERNEL_ACCOUNT); 2913 dfs_stack = kvmalloc_objs(*dfs_stack, cnt, GFP_KERNEL_ACCOUNT); 2914 if (!color || !dfs_stack) { 2915 ret = -ENOMEM; 2916 goto out; 2917 } 2918 2919 /* 2920 * DFS post-order traversal. 2921 * Color values: 0 = unvisited, 1 = on stack, 2 = done. 2922 */ 2923 for (i = 0; i < cnt; i++) { 2924 if (color[i]) 2925 continue; 2926 color[i] = 1; 2927 dfs_stack[top++] = i; 2928 2929 while (top > 0) { 2930 int cur = dfs_stack[top - 1]; 2931 int po_start = si[cur].postorder_start; 2932 int po_end = si[cur + 1].postorder_start; 2933 bool pushed = false; 2934 int j; 2935 2936 for (j = po_start; j < po_end; j++) { 2937 int idx = insn_postorder[j]; 2938 int callee; 2939 2940 if (!bpf_pseudo_call(&insn[idx]) && !bpf_pseudo_func(&insn[idx])) 2941 continue; 2942 callee = bpf_find_subprog(env, idx + insn[idx].imm + 1); 2943 if (callee < 0) { 2944 ret = -EFAULT; 2945 goto out; 2946 } 2947 if (color[callee] == 2) 2948 continue; 2949 if (color[callee] == 1) { 2950 if (bpf_pseudo_func(&insn[idx])) 2951 continue; 2952 verbose(env, "recursive call from %s() to %s()\n", 2953 subprog_name(env, cur), 2954 subprog_name(env, callee)); 2955 ret = -EINVAL; 2956 goto out; 2957 } 2958 color[callee] = 1; 2959 dfs_stack[top++] = callee; 2960 pushed = true; 2961 break; 2962 } 2963 2964 if (!pushed) { 2965 color[cur] = 2; 2966 env->subprog_topo_order[order++] = cur; 2967 top--; 2968 } 2969 } 2970 } 2971 2972 if (env->log.level & BPF_LOG_LEVEL2) 2973 for (i = 0; i < cnt; i++) 2974 verbose(env, "topo_order[%d] = %s\n", 2975 i, subprog_name(env, env->subprog_topo_order[i])); 2976 out: 2977 kvfree(dfs_stack); 2978 kvfree(color); 2979 return ret; 2980 } 2981 2982 static void mark_stack_slots_scratched(struct bpf_verifier_env *env, 2983 int spi, int nr_slots) 2984 { 2985 int i; 2986 2987 for (i = 0; i < nr_slots; i++) 2988 mark_stack_slot_scratched(env, spi - i); 2989 } 2990 2991 /* This function is supposed to be used by the following 32-bit optimization 2992 * code only. It returns TRUE if the source or destination register operates 2993 * on 64-bit, otherwise return FALSE. 2994 */ 2995 bool bpf_is_reg64(struct bpf_insn *insn, 2996 u32 regno, struct bpf_reg_state *reg, enum bpf_reg_arg_type t) 2997 { 2998 u8 code, class, op; 2999 3000 code = insn->code; 3001 class = BPF_CLASS(code); 3002 op = BPF_OP(code); 3003 if (class == BPF_JMP) { 3004 /* BPF_EXIT for "main" will reach here. Return TRUE 3005 * conservatively. 3006 */ 3007 if (op == BPF_EXIT) 3008 return true; 3009 if (op == BPF_CALL) { 3010 /* BPF to BPF call will reach here because of marking 3011 * caller saved clobber with DST_OP_NO_MARK for which we 3012 * don't care the register def because they are anyway 3013 * marked as NOT_INIT already. 3014 */ 3015 if (insn->src_reg == BPF_PSEUDO_CALL) 3016 return false; 3017 /* Helper call will reach here because of arg type 3018 * check, conservatively return TRUE. 3019 */ 3020 if (t == SRC_OP) 3021 return true; 3022 3023 return false; 3024 } 3025 } 3026 3027 if (class == BPF_ALU64 && op == BPF_END && (insn->imm == 16 || insn->imm == 32)) 3028 return false; 3029 3030 if (class == BPF_ALU64 || class == BPF_JMP || 3031 (class == BPF_ALU && op == BPF_END && insn->imm == 64)) 3032 return true; 3033 3034 if (class == BPF_ALU || class == BPF_JMP32) 3035 return false; 3036 3037 if (class == BPF_LDX) { 3038 if (t != SRC_OP) 3039 return BPF_SIZE(code) == BPF_DW || BPF_MODE(code) == BPF_MEMSX; 3040 /* LDX source must be ptr. */ 3041 return true; 3042 } 3043 3044 if (class == BPF_STX) { 3045 /* BPF_STX (including atomic variants) has one or more source 3046 * operands, one of which is a ptr. Check whether the caller is 3047 * asking about it. 3048 */ 3049 if (t == SRC_OP && reg->type != SCALAR_VALUE) 3050 return true; 3051 return BPF_SIZE(code) == BPF_DW; 3052 } 3053 3054 if (class == BPF_LD) { 3055 u8 mode = BPF_MODE(code); 3056 3057 /* LD_IMM64 */ 3058 if (mode == BPF_IMM) 3059 return true; 3060 3061 /* Both LD_IND and LD_ABS return 32-bit data. */ 3062 if (t != SRC_OP) 3063 return false; 3064 3065 /* Implicit ctx ptr. */ 3066 if (regno == BPF_REG_6) 3067 return true; 3068 3069 /* Explicit source could be any width. */ 3070 return true; 3071 } 3072 3073 if (class == BPF_ST) 3074 /* The only source register for BPF_ST is a ptr. */ 3075 return true; 3076 3077 /* Conservatively return true at default. */ 3078 return true; 3079 } 3080 3081 static void mark_insn_zext(struct bpf_verifier_env *env, 3082 struct bpf_reg_state *reg) 3083 { 3084 s32 def_idx = reg->subreg_def; 3085 3086 if (def_idx == DEF_NOT_SUBREG) 3087 return; 3088 3089 env->insn_aux_data[def_idx - 1].zext_dst = true; 3090 /* The dst will be zero extended, so won't be sub-register anymore. */ 3091 reg->subreg_def = DEF_NOT_SUBREG; 3092 } 3093 3094 static int __check_reg_arg(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno, 3095 enum bpf_reg_arg_type t) 3096 { 3097 struct bpf_insn *insn = env->prog->insnsi + env->insn_idx; 3098 struct bpf_reg_state *reg; 3099 bool rw64; 3100 3101 mark_reg_scratched(env, regno); 3102 3103 reg = ®s[regno]; 3104 rw64 = bpf_is_reg64(insn, regno, reg, t); 3105 if (t == SRC_OP) { 3106 /* check whether register used as source operand can be read */ 3107 if (reg->type == NOT_INIT) { 3108 verbose(env, "R%d !read_ok\n", regno); 3109 return -EACCES; 3110 } 3111 /* We don't need to worry about FP liveness because it's read-only */ 3112 if (regno == BPF_REG_FP) 3113 return 0; 3114 3115 if (rw64) 3116 mark_insn_zext(env, reg); 3117 3118 return 0; 3119 } else { 3120 /* check whether register used as dest operand can be written to */ 3121 if (regno == BPF_REG_FP) { 3122 verbose(env, "frame pointer is read only\n"); 3123 return -EACCES; 3124 } 3125 reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1; 3126 if (t == DST_OP) 3127 mark_reg_unknown(env, regs, regno); 3128 } 3129 return 0; 3130 } 3131 3132 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, 3133 enum bpf_reg_arg_type t) 3134 { 3135 struct bpf_verifier_state *vstate = env->cur_state; 3136 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 3137 3138 return __check_reg_arg(env, state->regs, regno, t); 3139 } 3140 3141 static void mark_indirect_target(struct bpf_verifier_env *env, int idx) 3142 { 3143 env->insn_aux_data[idx].indirect_target = true; 3144 } 3145 3146 #define LR_FRAMENO_BITS 3 3147 #define LR_SPI_BITS 6 3148 #define LR_ENTRY_BITS (LR_SPI_BITS + LR_FRAMENO_BITS + 1) 3149 #define LR_SIZE_BITS 4 3150 #define LR_FRAMENO_MASK ((1ull << LR_FRAMENO_BITS) - 1) 3151 #define LR_SPI_MASK ((1ull << LR_SPI_BITS) - 1) 3152 #define LR_SIZE_MASK ((1ull << LR_SIZE_BITS) - 1) 3153 #define LR_SPI_OFF LR_FRAMENO_BITS 3154 #define LR_IS_REG_OFF (LR_SPI_BITS + LR_FRAMENO_BITS) 3155 #define LINKED_REGS_MAX 6 3156 3157 struct linked_reg { 3158 u8 frameno; 3159 union { 3160 u8 spi; 3161 u8 regno; 3162 }; 3163 bool is_reg; 3164 }; 3165 3166 struct linked_regs { 3167 int cnt; 3168 struct linked_reg entries[LINKED_REGS_MAX]; 3169 }; 3170 3171 static struct linked_reg *linked_regs_push(struct linked_regs *s) 3172 { 3173 if (s->cnt < LINKED_REGS_MAX) 3174 return &s->entries[s->cnt++]; 3175 3176 return NULL; 3177 } 3178 3179 /* Use u64 as a vector of 6 10-bit values, use first 4-bits to track 3180 * number of elements currently in stack. 3181 * Pack one history entry for linked registers as 10 bits in the following format: 3182 * - 3-bits frameno 3183 * - 6-bits spi_or_reg 3184 * - 1-bit is_reg 3185 */ 3186 static u64 linked_regs_pack(struct linked_regs *s) 3187 { 3188 u64 val = 0; 3189 int i; 3190 3191 for (i = 0; i < s->cnt; ++i) { 3192 struct linked_reg *e = &s->entries[i]; 3193 u64 tmp = 0; 3194 3195 tmp |= e->frameno; 3196 tmp |= e->spi << LR_SPI_OFF; 3197 tmp |= (e->is_reg ? 1 : 0) << LR_IS_REG_OFF; 3198 3199 val <<= LR_ENTRY_BITS; 3200 val |= tmp; 3201 } 3202 val <<= LR_SIZE_BITS; 3203 val |= s->cnt; 3204 return val; 3205 } 3206 3207 static void linked_regs_unpack(u64 val, struct linked_regs *s) 3208 { 3209 int i; 3210 3211 s->cnt = val & LR_SIZE_MASK; 3212 val >>= LR_SIZE_BITS; 3213 3214 for (i = 0; i < s->cnt; ++i) { 3215 struct linked_reg *e = &s->entries[i]; 3216 3217 e->frameno = val & LR_FRAMENO_MASK; 3218 e->spi = (val >> LR_SPI_OFF) & LR_SPI_MASK; 3219 e->is_reg = (val >> LR_IS_REG_OFF) & 0x1; 3220 val >>= LR_ENTRY_BITS; 3221 } 3222 } 3223 3224 static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn) 3225 { 3226 const struct btf_type *func; 3227 struct btf *desc_btf; 3228 3229 if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL) 3230 return NULL; 3231 3232 desc_btf = find_kfunc_desc_btf(data, insn->off); 3233 if (IS_ERR(desc_btf)) 3234 return "<error>"; 3235 3236 func = btf_type_by_id(desc_btf, insn->imm); 3237 return btf_name_by_offset(desc_btf, func->name_off); 3238 } 3239 3240 void bpf_verbose_insn(struct bpf_verifier_env *env, struct bpf_insn *insn) 3241 { 3242 const struct bpf_insn_cbs cbs = { 3243 .cb_call = disasm_kfunc_name, 3244 .cb_print = verbose, 3245 .private_data = env, 3246 }; 3247 3248 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); 3249 } 3250 3251 /* If any register R in hist->linked_regs is marked as precise in bt, 3252 * do bt_set_frame_{reg,slot}(bt, R) for all registers in hist->linked_regs. 3253 */ 3254 void bpf_bt_sync_linked_regs(struct backtrack_state *bt, struct bpf_jmp_history_entry *hist) 3255 { 3256 struct linked_regs linked_regs; 3257 bool some_precise = false; 3258 int i; 3259 3260 if (!hist || hist->linked_regs == 0) 3261 return; 3262 3263 linked_regs_unpack(hist->linked_regs, &linked_regs); 3264 for (i = 0; i < linked_regs.cnt; ++i) { 3265 struct linked_reg *e = &linked_regs.entries[i]; 3266 3267 if ((e->is_reg && bt_is_frame_reg_set(bt, e->frameno, e->regno)) || 3268 (!e->is_reg && bt_is_frame_slot_set(bt, e->frameno, e->spi))) { 3269 some_precise = true; 3270 break; 3271 } 3272 } 3273 3274 if (!some_precise) 3275 return; 3276 3277 for (i = 0; i < linked_regs.cnt; ++i) { 3278 struct linked_reg *e = &linked_regs.entries[i]; 3279 3280 if (e->is_reg) 3281 bpf_bt_set_frame_reg(bt, e->frameno, e->regno); 3282 else 3283 bpf_bt_set_frame_slot(bt, e->frameno, e->spi); 3284 } 3285 } 3286 3287 int mark_chain_precision(struct bpf_verifier_env *env, int regno) 3288 { 3289 return bpf_mark_chain_precision(env, env->cur_state, regno, NULL); 3290 } 3291 3292 /* mark_chain_precision_batch() assumes that env->bt is set in the caller to 3293 * desired reg and stack masks across all relevant frames 3294 */ 3295 static int mark_chain_precision_batch(struct bpf_verifier_env *env, 3296 struct bpf_verifier_state *starting_state) 3297 { 3298 return bpf_mark_chain_precision(env, starting_state, -1, NULL); 3299 } 3300 3301 static bool is_spillable_regtype(enum bpf_reg_type type) 3302 { 3303 switch (base_type(type)) { 3304 case PTR_TO_MAP_VALUE: 3305 case PTR_TO_STACK: 3306 case PTR_TO_CTX: 3307 case PTR_TO_PACKET: 3308 case PTR_TO_PACKET_META: 3309 case PTR_TO_PACKET_END: 3310 case PTR_TO_FLOW_KEYS: 3311 case CONST_PTR_TO_MAP: 3312 case PTR_TO_SOCKET: 3313 case PTR_TO_SOCK_COMMON: 3314 case PTR_TO_TCP_SOCK: 3315 case PTR_TO_XDP_SOCK: 3316 case PTR_TO_BTF_ID: 3317 case PTR_TO_BUF: 3318 case PTR_TO_MEM: 3319 case PTR_TO_FUNC: 3320 case PTR_TO_MAP_KEY: 3321 case PTR_TO_ARENA: 3322 return true; 3323 default: 3324 return false; 3325 } 3326 } 3327 3328 3329 /* check if register is a constant scalar value */ 3330 static bool is_reg_const(struct bpf_reg_state *reg, bool subreg32) 3331 { 3332 return reg->type == SCALAR_VALUE && 3333 tnum_is_const(subreg32 ? tnum_subreg(reg->var_off) : reg->var_off); 3334 } 3335 3336 /* assuming is_reg_const() is true, return constant value of a register */ 3337 static u64 reg_const_value(struct bpf_reg_state *reg, bool subreg32) 3338 { 3339 return subreg32 ? tnum_subreg(reg->var_off).value : reg->var_off.value; 3340 } 3341 3342 static bool __is_pointer_value(bool allow_ptr_leaks, 3343 const struct bpf_reg_state *reg) 3344 { 3345 if (allow_ptr_leaks) 3346 return false; 3347 3348 return reg->type != SCALAR_VALUE; 3349 } 3350 3351 static void clear_scalar_id(struct bpf_reg_state *reg) 3352 { 3353 reg->id = 0; 3354 reg->delta = 0; 3355 } 3356 3357 static void assign_scalar_id_before_mov(struct bpf_verifier_env *env, 3358 struct bpf_reg_state *src_reg) 3359 { 3360 if (src_reg->type != SCALAR_VALUE) 3361 return; 3362 /* 3363 * The verifier is processing rX = rY insn and 3364 * rY->id has special linked register already. 3365 * Cleared it, since multiple rX += const are not supported. 3366 */ 3367 if (src_reg->id & BPF_ADD_CONST) 3368 clear_scalar_id(src_reg); 3369 /* 3370 * Ensure that src_reg has a valid ID that will be copied to 3371 * dst_reg and then will be used by sync_linked_regs() to 3372 * propagate min/max range. 3373 */ 3374 if (!src_reg->id && !tnum_is_const(src_reg->var_off)) 3375 src_reg->id = ++env->id_gen; 3376 } 3377 3378 static void save_register_state(struct bpf_verifier_env *env, 3379 struct bpf_func_state *state, 3380 int spi, struct bpf_reg_state *reg, 3381 int size) 3382 { 3383 int i; 3384 3385 state->stack[spi].spilled_ptr = *reg; 3386 3387 for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--) 3388 state->stack[spi].slot_type[i - 1] = STACK_SPILL; 3389 3390 /* size < 8 bytes spill */ 3391 for (; i; i--) 3392 mark_stack_slot_misc(env, &state->stack[spi].slot_type[i - 1]); 3393 } 3394 3395 static bool is_bpf_st_mem(struct bpf_insn *insn) 3396 { 3397 return BPF_CLASS(insn->code) == BPF_ST && BPF_MODE(insn->code) == BPF_MEM; 3398 } 3399 3400 static int get_reg_width(struct bpf_reg_state *reg) 3401 { 3402 return fls64(reg_umax(reg)); 3403 } 3404 3405 /* See comment for mark_fastcall_pattern_for_call() */ 3406 static void check_fastcall_stack_contract(struct bpf_verifier_env *env, 3407 struct bpf_func_state *state, int insn_idx, int off) 3408 { 3409 struct bpf_subprog_info *subprog = &env->subprog_info[state->subprogno]; 3410 struct bpf_insn_aux_data *aux = env->insn_aux_data; 3411 int i; 3412 3413 if (subprog->fastcall_stack_off <= off || aux[insn_idx].fastcall_pattern) 3414 return; 3415 /* access to the region [max_stack_depth .. fastcall_stack_off) 3416 * from something that is not a part of the fastcall pattern, 3417 * disable fastcall rewrites for current subprogram by setting 3418 * fastcall_stack_off to a value smaller than any possible offset. 3419 */ 3420 subprog->fastcall_stack_off = S16_MIN; 3421 /* reset fastcall aux flags within subprogram, 3422 * happens at most once per subprogram 3423 */ 3424 for (i = subprog->start; i < (subprog + 1)->start; ++i) { 3425 aux[i].fastcall_spills_num = 0; 3426 aux[i].fastcall_pattern = 0; 3427 } 3428 } 3429 3430 static void scrub_special_slot(struct bpf_func_state *state, int spi) 3431 { 3432 int i; 3433 3434 /* regular write of data into stack destroys any spilled ptr */ 3435 state->stack[spi].spilled_ptr.type = NOT_INIT; 3436 /* Mark slots as STACK_MISC if they belonged to spilled ptr/dynptr/iter. */ 3437 if (is_stack_slot_special(&state->stack[spi])) 3438 for (i = 0; i < BPF_REG_SIZE; i++) 3439 scrub_spilled_slot(&state->stack[spi].slot_type[i]); 3440 } 3441 3442 /* check_stack_{read,write}_fixed_off functions track spill/fill of registers, 3443 * stack boundary and alignment are checked in check_mem_access() 3444 */ 3445 static int check_stack_write_fixed_off(struct bpf_verifier_env *env, 3446 /* stack frame we're writing to */ 3447 struct bpf_func_state *state, 3448 int off, int size, int value_regno, 3449 int insn_idx) 3450 { 3451 struct bpf_func_state *cur; /* state of the current function */ 3452 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err; 3453 struct bpf_insn *insn = &env->prog->insnsi[insn_idx]; 3454 struct bpf_reg_state *reg = NULL; 3455 int insn_flags = INSN_F_STACK_ACCESS; 3456 int hist_spi = spi, hist_frame = state->frameno; 3457 3458 /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, 3459 * so it's aligned access and [off, off + size) are within stack limits 3460 */ 3461 if (!env->allow_ptr_leaks && 3462 bpf_is_spilled_reg(&state->stack[spi]) && 3463 !bpf_is_spilled_scalar_reg(&state->stack[spi]) && 3464 size != BPF_REG_SIZE) { 3465 verbose(env, "attempt to corrupt spilled pointer on stack\n"); 3466 return -EACCES; 3467 } 3468 3469 cur = env->cur_state->frame[env->cur_state->curframe]; 3470 if (value_regno >= 0) 3471 reg = &cur->regs[value_regno]; 3472 if (!env->bypass_spec_v4) { 3473 bool sanitize = reg && is_spillable_regtype(reg->type); 3474 3475 for (i = 0; i < size; i++) { 3476 u8 type = state->stack[spi].slot_type[i]; 3477 3478 if (type != STACK_MISC && type != STACK_ZERO) { 3479 sanitize = true; 3480 break; 3481 } 3482 } 3483 3484 if (sanitize) 3485 env->insn_aux_data[insn_idx].nospec_result = true; 3486 } 3487 3488 err = destroy_if_dynptr_stack_slot(env, state, spi); 3489 if (err) 3490 return err; 3491 3492 check_fastcall_stack_contract(env, state, insn_idx, off); 3493 mark_stack_slot_scratched(env, spi); 3494 if (reg && !(off % BPF_REG_SIZE) && reg->type == SCALAR_VALUE && env->bpf_capable) { 3495 bool reg_value_fits; 3496 3497 reg_value_fits = get_reg_width(reg) <= BITS_PER_BYTE * size; 3498 /* Make sure that reg had an ID to build a relation on spill. */ 3499 if (reg_value_fits) 3500 assign_scalar_id_before_mov(env, reg); 3501 save_register_state(env, state, spi, reg, size); 3502 /* Break the relation on a narrowing spill. */ 3503 if (!reg_value_fits) 3504 state->stack[spi].spilled_ptr.id = 0; 3505 } else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) && 3506 env->bpf_capable) { 3507 struct bpf_reg_state *tmp_reg = &env->fake_reg[0]; 3508 3509 memset(tmp_reg, 0, sizeof(*tmp_reg)); 3510 __mark_reg_known(tmp_reg, insn->imm); 3511 tmp_reg->type = SCALAR_VALUE; 3512 save_register_state(env, state, spi, tmp_reg, size); 3513 } else if (reg && is_spillable_regtype(reg->type)) { 3514 /* register containing pointer is being spilled into stack */ 3515 if (size != BPF_REG_SIZE) { 3516 verbose_linfo(env, insn_idx, "; "); 3517 verbose(env, "invalid size of register spill\n"); 3518 return -EACCES; 3519 } 3520 if (state != cur && reg->type == PTR_TO_STACK) { 3521 verbose(env, "cannot spill pointers to stack into stack frame of the caller\n"); 3522 return -EINVAL; 3523 } 3524 save_register_state(env, state, spi, reg, size); 3525 } else { 3526 u8 type = STACK_MISC; 3527 3528 scrub_special_slot(state, spi); 3529 3530 /* when we zero initialize stack slots mark them as such */ 3531 if ((reg && bpf_register_is_null(reg)) || 3532 (!reg && is_bpf_st_mem(insn) && insn->imm == 0)) { 3533 /* STACK_ZERO case happened because register spill 3534 * wasn't properly aligned at the stack slot boundary, 3535 * so it's not a register spill anymore; force 3536 * originating register to be precise to make 3537 * STACK_ZERO correct for subsequent states 3538 */ 3539 err = mark_chain_precision(env, value_regno); 3540 if (err) 3541 return err; 3542 type = STACK_ZERO; 3543 } 3544 3545 /* Mark slots affected by this stack write. */ 3546 for (i = 0; i < size; i++) 3547 state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = type; 3548 insn_flags = 0; /* not a register spill */ 3549 } 3550 3551 if (insn_flags) 3552 return bpf_push_jmp_history(env, env->cur_state, insn_flags, 3553 hist_spi, hist_frame, 0); 3554 return 0; 3555 } 3556 3557 /* Write the stack: 'stack[ptr_reg + off] = value_regno'. 'ptr_reg' is 3558 * known to contain a variable offset. 3559 * This function checks whether the write is permitted and conservatively 3560 * tracks the effects of the write, considering that each stack slot in the 3561 * dynamic range is potentially written to. 3562 * 3563 * 'value_regno' can be -1, meaning that an unknown value is being written to 3564 * the stack. 3565 * 3566 * Spilled pointers in range are not marked as written because we don't know 3567 * what's going to be actually written. This means that read propagation for 3568 * future reads cannot be terminated by this write. 3569 * 3570 * For privileged programs, uninitialized stack slots are considered 3571 * initialized by this write (even though we don't know exactly what offsets 3572 * are going to be written to). The idea is that we don't want the verifier to 3573 * reject future reads that access slots written to through variable offsets. 3574 */ 3575 static int check_stack_write_var_off(struct bpf_verifier_env *env, 3576 /* func where register points to */ 3577 struct bpf_func_state *state, 3578 struct bpf_reg_state *ptr_reg, int off, int size, 3579 int value_regno, int insn_idx) 3580 { 3581 struct bpf_func_state *cur; /* state of the current function */ 3582 int min_off, max_off; 3583 int i, err; 3584 struct bpf_reg_state *value_reg = NULL; 3585 struct bpf_insn *insn = &env->prog->insnsi[insn_idx]; 3586 bool writing_zero = false; 3587 /* set if the fact that we're writing a zero is used to let any 3588 * stack slots remain STACK_ZERO 3589 */ 3590 bool zero_used = false; 3591 3592 cur = env->cur_state->frame[env->cur_state->curframe]; 3593 min_off = reg_smin(ptr_reg) + off; 3594 max_off = reg_smax(ptr_reg) + off + size; 3595 if (value_regno >= 0) 3596 value_reg = &cur->regs[value_regno]; 3597 if ((value_reg && bpf_register_is_null(value_reg)) || 3598 (!value_reg && is_bpf_st_mem(insn) && insn->imm == 0)) 3599 writing_zero = true; 3600 3601 for (i = min_off; i < max_off; i++) { 3602 int spi; 3603 3604 spi = bpf_get_spi(i); 3605 err = destroy_if_dynptr_stack_slot(env, state, spi); 3606 if (err) 3607 return err; 3608 } 3609 3610 check_fastcall_stack_contract(env, state, insn_idx, min_off); 3611 /* Variable offset writes destroy any spilled pointers in range. */ 3612 for (i = min_off; i < max_off; i++) { 3613 u8 new_type, *stype; 3614 int slot, spi; 3615 3616 slot = -i - 1; 3617 spi = slot / BPF_REG_SIZE; 3618 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE]; 3619 mark_stack_slot_scratched(env, spi); 3620 3621 if (!env->allow_ptr_leaks && *stype != STACK_MISC && *stype != STACK_ZERO) { 3622 /* Reject the write if range we may write to has not 3623 * been initialized beforehand. If we didn't reject 3624 * here, the ptr status would be erased below (even 3625 * though not all slots are actually overwritten), 3626 * possibly opening the door to leaks. 3627 * 3628 * We do however catch STACK_INVALID case below, and 3629 * only allow reading possibly uninitialized memory 3630 * later for CAP_PERFMON, as the write may not happen to 3631 * that slot. 3632 */ 3633 verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d", 3634 insn_idx, i); 3635 return -EINVAL; 3636 } 3637 3638 /* If writing_zero and the spi slot contains a spill of value 0, 3639 * maintain the spill type. 3640 */ 3641 if (writing_zero && *stype == STACK_SPILL && 3642 bpf_is_spilled_scalar_reg(&state->stack[spi])) { 3643 struct bpf_reg_state *spill_reg = &state->stack[spi].spilled_ptr; 3644 3645 if (tnum_is_const(spill_reg->var_off) && spill_reg->var_off.value == 0) { 3646 zero_used = true; 3647 continue; 3648 } 3649 } 3650 3651 /* 3652 * Scrub slots if variable-offset stack write goes over spilled pointers. 3653 * Otherwise bpf_is_spilled_reg() may == true && spilled_ptr.type == NOT_INIT 3654 * and valid program is rejected by check_stack_read_fixed_off() 3655 * with obscure "invalid size of register fill" message. 3656 */ 3657 scrub_special_slot(state, spi); 3658 3659 /* Update the slot type. */ 3660 new_type = STACK_MISC; 3661 if (writing_zero && *stype == STACK_ZERO) { 3662 new_type = STACK_ZERO; 3663 zero_used = true; 3664 } 3665 /* If the slot is STACK_INVALID, we check whether it's OK to 3666 * pretend that it will be initialized by this write. The slot 3667 * might not actually be written to, and so if we mark it as 3668 * initialized future reads might leak uninitialized memory. 3669 * For privileged programs, we will accept such reads to slots 3670 * that may or may not be written because, if we're reject 3671 * them, the error would be too confusing. 3672 * Conservatively, treat STACK_POISON in a similar way. 3673 */ 3674 if ((*stype == STACK_INVALID || *stype == STACK_POISON) && 3675 !env->allow_uninit_stack) { 3676 verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d", 3677 insn_idx, i); 3678 return -EINVAL; 3679 } 3680 *stype = new_type; 3681 } 3682 if (zero_used) { 3683 /* backtracking doesn't work for STACK_ZERO yet. */ 3684 err = mark_chain_precision(env, value_regno); 3685 if (err) 3686 return err; 3687 } 3688 return 0; 3689 } 3690 3691 /* When register 'dst_regno' is assigned some values from stack[min_off, 3692 * max_off), we set the register's type according to the types of the 3693 * respective stack slots. If all the stack values are known to be zeros, then 3694 * so is the destination reg. Otherwise, the register is considered to be 3695 * SCALAR. This function does not deal with register filling; the caller must 3696 * ensure that all spilled registers in the stack range have been marked as 3697 * read. 3698 */ 3699 static void mark_reg_stack_read(struct bpf_verifier_env *env, 3700 /* func where src register points to */ 3701 struct bpf_func_state *ptr_state, 3702 int min_off, int max_off, int dst_regno) 3703 { 3704 struct bpf_verifier_state *vstate = env->cur_state; 3705 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 3706 int i, slot, spi; 3707 u8 *stype; 3708 int zeros = 0; 3709 3710 for (i = min_off; i < max_off; i++) { 3711 slot = -i - 1; 3712 spi = slot / BPF_REG_SIZE; 3713 mark_stack_slot_scratched(env, spi); 3714 stype = ptr_state->stack[spi].slot_type; 3715 if (stype[slot % BPF_REG_SIZE] != STACK_ZERO) 3716 break; 3717 zeros++; 3718 } 3719 if (zeros == max_off - min_off) { 3720 /* Any access_size read into register is zero extended, 3721 * so the whole register == const_zero. 3722 */ 3723 __mark_reg_const_zero(env, &state->regs[dst_regno]); 3724 } else { 3725 /* have read misc data from the stack */ 3726 mark_reg_unknown(env, state->regs, dst_regno); 3727 } 3728 } 3729 3730 /* Read the stack at 'off' and put the results into the register indicated by 3731 * 'dst_regno'. It handles reg filling if the addressed stack slot is a 3732 * spilled reg. 3733 * 3734 * 'dst_regno' can be -1, meaning that the read value is not going to a 3735 * register. 3736 * 3737 * The access is assumed to be within the current stack bounds. 3738 */ 3739 static int check_stack_read_fixed_off(struct bpf_verifier_env *env, 3740 /* func where src register points to */ 3741 struct bpf_func_state *reg_state, 3742 int off, int size, int dst_regno) 3743 { 3744 struct bpf_verifier_state *vstate = env->cur_state; 3745 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 3746 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE; 3747 struct bpf_reg_state *reg; 3748 u8 *stype, type; 3749 int insn_flags = INSN_F_STACK_ACCESS; 3750 int hist_spi = spi, hist_frame = reg_state->frameno; 3751 3752 stype = reg_state->stack[spi].slot_type; 3753 reg = ®_state->stack[spi].spilled_ptr; 3754 3755 mark_stack_slot_scratched(env, spi); 3756 check_fastcall_stack_contract(env, state, env->insn_idx, off); 3757 3758 if (bpf_is_spilled_reg(®_state->stack[spi])) { 3759 u8 spill_size = 1; 3760 3761 for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--) 3762 spill_size++; 3763 3764 if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) { 3765 if (reg->type != SCALAR_VALUE) { 3766 verbose_linfo(env, env->insn_idx, "; "); 3767 verbose(env, "invalid size of register fill\n"); 3768 return -EACCES; 3769 } 3770 3771 if (dst_regno < 0) 3772 return 0; 3773 3774 if (size <= spill_size && 3775 bpf_stack_narrow_access_ok(off, size, spill_size)) { 3776 /* The earlier check_reg_arg() has decided the 3777 * subreg_def for this insn. Save it first. 3778 */ 3779 s32 subreg_def = state->regs[dst_regno].subreg_def; 3780 3781 if (env->bpf_capable && size == 4 && spill_size == 4 && 3782 get_reg_width(reg) <= 32) 3783 /* Ensure stack slot has an ID to build a relation 3784 * with the destination register on fill. 3785 */ 3786 assign_scalar_id_before_mov(env, reg); 3787 state->regs[dst_regno] = *reg; 3788 state->regs[dst_regno].subreg_def = subreg_def; 3789 3790 /* Break the relation on a narrowing fill. 3791 * coerce_reg_to_size will adjust the boundaries. 3792 */ 3793 if (get_reg_width(reg) > size * BITS_PER_BYTE) 3794 clear_scalar_id(&state->regs[dst_regno]); 3795 } else { 3796 int spill_cnt = 0, zero_cnt = 0; 3797 3798 for (i = 0; i < size; i++) { 3799 type = stype[(slot - i) % BPF_REG_SIZE]; 3800 if (type == STACK_SPILL) { 3801 spill_cnt++; 3802 continue; 3803 } 3804 if (type == STACK_MISC) 3805 continue; 3806 if (type == STACK_ZERO) { 3807 zero_cnt++; 3808 continue; 3809 } 3810 if (type == STACK_INVALID && env->allow_uninit_stack) 3811 continue; 3812 if (type == STACK_POISON) { 3813 verbose(env, "reading from stack off %d+%d size %d, slot poisoned by dead code elimination\n", 3814 off, i, size); 3815 } else { 3816 verbose(env, "invalid read from stack off %d+%d size %d\n", 3817 off, i, size); 3818 } 3819 return -EACCES; 3820 } 3821 3822 if (spill_cnt == size && 3823 tnum_is_const(reg->var_off) && reg->var_off.value == 0) { 3824 __mark_reg_const_zero(env, &state->regs[dst_regno]); 3825 /* this IS register fill, so keep insn_flags */ 3826 } else if (zero_cnt == size) { 3827 /* similarly to mark_reg_stack_read(), preserve zeroes */ 3828 __mark_reg_const_zero(env, &state->regs[dst_regno]); 3829 insn_flags = 0; /* not restoring original register state */ 3830 } else { 3831 mark_reg_unknown(env, state->regs, dst_regno); 3832 insn_flags = 0; /* not restoring original register state */ 3833 } 3834 } 3835 } else if (dst_regno >= 0) { 3836 /* restore register state from stack */ 3837 if (env->bpf_capable) 3838 /* Ensure stack slot has an ID to build a relation 3839 * with the destination register on fill. 3840 */ 3841 assign_scalar_id_before_mov(env, reg); 3842 state->regs[dst_regno] = *reg; 3843 /* mark reg as written since spilled pointer state likely 3844 * has its liveness marks cleared by is_state_visited() 3845 * which resets stack/reg liveness for state transitions 3846 */ 3847 } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) { 3848 /* If dst_regno==-1, the caller is asking us whether 3849 * it is acceptable to use this value as a SCALAR_VALUE 3850 * (e.g. for XADD). 3851 * We must not allow unprivileged callers to do that 3852 * with spilled pointers. 3853 */ 3854 verbose(env, "leaking pointer from stack off %d\n", 3855 off); 3856 return -EACCES; 3857 } 3858 } else { 3859 for (i = 0; i < size; i++) { 3860 type = stype[(slot - i) % BPF_REG_SIZE]; 3861 if (type == STACK_MISC) 3862 continue; 3863 if (type == STACK_ZERO) 3864 continue; 3865 if (type == STACK_INVALID && env->allow_uninit_stack) 3866 continue; 3867 if (type == STACK_POISON) { 3868 verbose(env, "reading from stack off %d+%d size %d, slot poisoned by dead code elimination\n", 3869 off, i, size); 3870 } else { 3871 verbose(env, "invalid read from stack off %d+%d size %d\n", 3872 off, i, size); 3873 } 3874 return -EACCES; 3875 } 3876 if (dst_regno >= 0) 3877 mark_reg_stack_read(env, reg_state, off, off + size, dst_regno); 3878 insn_flags = 0; /* we are not restoring spilled register */ 3879 } 3880 if (insn_flags) 3881 return bpf_push_jmp_history(env, env->cur_state, insn_flags, 3882 hist_spi, hist_frame, 0); 3883 return 0; 3884 } 3885 3886 enum bpf_access_src { 3887 ACCESS_DIRECT = 1, /* the access is performed by an instruction */ 3888 ACCESS_HELPER = 2, /* the access is performed by a helper */ 3889 }; 3890 3891 static int check_stack_range_initialized(struct bpf_verifier_env *env, struct bpf_reg_state *reg, 3892 argno_t argno, int off, int access_size, 3893 bool zero_size_allowed, 3894 enum bpf_access_type type, 3895 struct bpf_call_arg_meta *meta); 3896 3897 static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno) 3898 { 3899 return cur_regs(env) + regno; 3900 } 3901 3902 /* Read the stack at 'reg + off' and put the result into the register 3903 * 'dst_regno'. 3904 * 'off' includes the pointer register's fixed offset(i.e. 'reg->off'), 3905 * but not its variable offset. 3906 * 'size' is assumed to be <= reg size and the access is assumed to be aligned. 3907 * 3908 * As opposed to check_stack_read_fixed_off, this function doesn't deal with 3909 * filling registers (i.e. reads of spilled register cannot be detected when 3910 * the offset is not fixed). We conservatively mark 'dst_regno' as containing 3911 * SCALAR_VALUE. That's why we assert that the 'reg' has a variable 3912 * offset; for a fixed offset check_stack_read_fixed_off should be used 3913 * instead. 3914 */ 3915 static int check_stack_read_var_off(struct bpf_verifier_env *env, struct bpf_reg_state *reg, 3916 argno_t ptr_argno, int off, int size, int dst_regno) 3917 { 3918 struct bpf_func_state *ptr_state = bpf_func(env, reg); 3919 int err; 3920 int min_off, max_off; 3921 3922 /* Note that we pass a NULL meta, so raw access will not be permitted. 3923 */ 3924 err = check_stack_range_initialized(env, reg, ptr_argno, off, size, 3925 false, BPF_READ, NULL); 3926 if (err) 3927 return err; 3928 3929 min_off = reg_smin(reg) + off; 3930 max_off = reg_smax(reg) + off; 3931 mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno); 3932 check_fastcall_stack_contract(env, ptr_state, env->insn_idx, min_off); 3933 return 0; 3934 } 3935 3936 /* check_stack_read dispatches to check_stack_read_fixed_off or 3937 * check_stack_read_var_off. 3938 * 3939 * The caller must ensure that the offset falls within the allocated stack 3940 * bounds. 3941 * 3942 * 'dst_regno' is a register which will receive the value from the stack. It 3943 * can be -1, meaning that the read value is not going to a register. 3944 */ 3945 static int check_stack_read(struct bpf_verifier_env *env, 3946 struct bpf_reg_state *reg, argno_t ptr_argno, int off, int size, 3947 int dst_regno) 3948 { 3949 struct bpf_func_state *state = bpf_func(env, reg); 3950 int err; 3951 /* Some accesses are only permitted with a static offset. */ 3952 bool var_off = !tnum_is_const(reg->var_off); 3953 3954 /* The offset is required to be static when reads don't go to a 3955 * register, in order to not leak pointers (see 3956 * check_stack_read_fixed_off). 3957 */ 3958 if (dst_regno < 0 && var_off) { 3959 char tn_buf[48]; 3960 3961 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 3962 verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n", 3963 tn_buf, off, size); 3964 return -EACCES; 3965 } 3966 /* Variable offset is prohibited for unprivileged mode for simplicity 3967 * since it requires corresponding support in Spectre masking for stack 3968 * ALU. See also retrieve_ptr_limit(). The check in 3969 * check_stack_access_for_ptr_arithmetic() called by 3970 * adjust_ptr_min_max_vals() prevents users from creating stack pointers 3971 * with variable offsets, therefore no check is required here. Further, 3972 * just checking it here would be insufficient as speculative stack 3973 * writes could still lead to unsafe speculative behaviour. 3974 */ 3975 if (!var_off) { 3976 off += reg->var_off.value; 3977 err = check_stack_read_fixed_off(env, state, off, size, 3978 dst_regno); 3979 } else { 3980 /* Variable offset stack reads need more conservative handling 3981 * than fixed offset ones. Note that dst_regno >= 0 on this 3982 * branch. 3983 */ 3984 err = check_stack_read_var_off(env, reg, ptr_argno, off, size, 3985 dst_regno); 3986 } 3987 return err; 3988 } 3989 3990 3991 /* check_stack_write dispatches to check_stack_write_fixed_off or 3992 * check_stack_write_var_off. 3993 * 3994 * 'reg' is the register used as a pointer into the stack. 3995 * 'value_regno' is the register whose value we're writing to the stack. It can 3996 * be -1, meaning that we're not writing from a register. 3997 * 3998 * The caller must ensure that the offset falls within the maximum stack size. 3999 */ 4000 static int check_stack_write(struct bpf_verifier_env *env, 4001 struct bpf_reg_state *reg, int off, int size, 4002 int value_regno, int insn_idx) 4003 { 4004 struct bpf_func_state *state = bpf_func(env, reg); 4005 int err; 4006 4007 if (tnum_is_const(reg->var_off)) { 4008 off += reg->var_off.value; 4009 err = check_stack_write_fixed_off(env, state, off, size, 4010 value_regno, insn_idx); 4011 } else { 4012 /* Variable offset stack reads need more conservative handling 4013 * than fixed offset ones. 4014 */ 4015 err = check_stack_write_var_off(env, state, 4016 reg, off, size, 4017 value_regno, insn_idx); 4018 } 4019 return err; 4020 } 4021 4022 /* 4023 * Write a value to the outgoing stack arg area. 4024 * off is a negative offset from r11 (e.g. -8 for arg6, -16 for arg7). 4025 */ 4026 static int check_stack_arg_write(struct bpf_verifier_env *env, struct bpf_func_state *state, 4027 int off, struct bpf_reg_state *value_reg) 4028 { 4029 int max_stack_arg_regs = MAX_BPF_FUNC_ARGS - MAX_BPF_FUNC_REG_ARGS; 4030 struct bpf_subprog_info *subprog = &env->subprog_info[state->subprogno]; 4031 int spi = -off / BPF_REG_SIZE - 1; 4032 struct bpf_reg_state *arg; 4033 int err; 4034 4035 if (spi >= max_stack_arg_regs) { 4036 verbose(env, "stack arg write offset %d exceeds max %d stack args\n", 4037 off, max_stack_arg_regs); 4038 return -EINVAL; 4039 } 4040 4041 err = grow_stack_arg_slots(env, state, spi + 1); 4042 if (err) 4043 return err; 4044 4045 /* Track the max outgoing stack arg slot count. */ 4046 if (spi + 1 > subprog->max_out_stack_arg_cnt) 4047 subprog->max_out_stack_arg_cnt = spi + 1; 4048 4049 if (value_reg) { 4050 state->stack_arg_regs[spi] = *value_reg; 4051 } else { 4052 /* BPF_ST: store immediate, treat as scalar */ 4053 arg = &state->stack_arg_regs[spi]; 4054 arg->type = SCALAR_VALUE; 4055 __mark_reg_known(arg, env->prog->insnsi[env->insn_idx].imm); 4056 } 4057 state->no_stack_arg_load = true; 4058 return bpf_push_jmp_history(env, env->cur_state, 4059 INSN_F_STACK_ARG_ACCESS, spi, 0, 0); 4060 } 4061 4062 /* 4063 * Read a value from the incoming stack arg area. 4064 * off is a positive offset from r11 (e.g. +8 for arg6, +16 for arg7). 4065 */ 4066 static int check_stack_arg_read(struct bpf_verifier_env *env, struct bpf_func_state *state, 4067 int off, int dst_regno) 4068 { 4069 struct bpf_subprog_info *subprog = &env->subprog_info[state->subprogno]; 4070 struct bpf_verifier_state *vstate = env->cur_state; 4071 int spi = off / BPF_REG_SIZE - 1; 4072 struct bpf_func_state *caller, *cur; 4073 struct bpf_reg_state *arg; 4074 4075 if (state->no_stack_arg_load) { 4076 verbose(env, "r11 load must be before any r11 store or call insn\n"); 4077 return -EINVAL; 4078 } 4079 4080 if (spi + 1 > bpf_in_stack_arg_cnt(subprog)) { 4081 verbose(env, "invalid read from stack arg off %d depth %d\n", 4082 off, bpf_in_stack_arg_cnt(subprog) * BPF_REG_SIZE); 4083 return -EACCES; 4084 } 4085 4086 caller = vstate->frame[vstate->curframe - 1]; 4087 arg = &caller->stack_arg_regs[spi]; 4088 cur = vstate->frame[vstate->curframe]; 4089 cur->regs[dst_regno] = *arg; 4090 return bpf_push_jmp_history(env, env->cur_state, 4091 INSN_F_STACK_ARG_ACCESS, spi, 0, 0); 4092 } 4093 4094 static int mark_stack_arg_precision(struct bpf_verifier_env *env, int arg_idx) 4095 { 4096 struct bpf_func_state *caller = cur_func(env); 4097 int spi = arg_idx - MAX_BPF_FUNC_REG_ARGS; 4098 4099 bt_set_frame_stack_arg_slot(&env->bt, caller->frameno, spi); 4100 return mark_chain_precision_batch(env, env->cur_state); 4101 } 4102 4103 static int check_outgoing_stack_args(struct bpf_verifier_env *env, struct bpf_func_state *caller, 4104 int nargs) 4105 { 4106 int i, spi; 4107 4108 for (i = MAX_BPF_FUNC_REG_ARGS; i < nargs; i++) { 4109 spi = i - MAX_BPF_FUNC_REG_ARGS; 4110 if (spi >= caller->out_stack_arg_cnt || 4111 caller->stack_arg_regs[spi].type == NOT_INIT) { 4112 verbose(env, "callee expects %d args, stack arg%d is not initialized\n", 4113 nargs, spi + 1); 4114 return -EFAULT; 4115 } 4116 } 4117 4118 return 0; 4119 } 4120 4121 static struct bpf_reg_state *get_func_arg_reg(struct bpf_func_state *caller, 4122 struct bpf_reg_state *regs, int arg) 4123 { 4124 if (arg < MAX_BPF_FUNC_REG_ARGS) 4125 return ®s[arg + 1]; 4126 4127 return &caller->stack_arg_regs[arg - MAX_BPF_FUNC_REG_ARGS]; 4128 } 4129 4130 static int check_map_access_type(struct bpf_verifier_env *env, struct bpf_reg_state *reg, 4131 int off, int size, enum bpf_access_type type) 4132 { 4133 struct bpf_map *map = reg->map_ptr; 4134 u32 cap = bpf_map_flags_to_cap(map); 4135 4136 if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) { 4137 verbose(env, "write into map forbidden, value_size=%d off=%lld size=%d\n", 4138 map->value_size, reg_smin(reg) + off, size); 4139 return -EACCES; 4140 } 4141 4142 if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) { 4143 verbose(env, "read from map forbidden, value_size=%d off=%lld size=%d\n", 4144 map->value_size, reg_smin(reg) + off, size); 4145 return -EACCES; 4146 } 4147 4148 return 0; 4149 } 4150 4151 /* check read/write into memory region (e.g., map value, ringbuf sample, etc) */ 4152 static int __check_mem_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, 4153 int off, int size, u32 mem_size, 4154 bool zero_size_allowed) 4155 { 4156 bool size_ok = size > 0 || (size == 0 && zero_size_allowed); 4157 4158 if (off >= 0 && size_ok && (u64)off + size <= mem_size) 4159 return 0; 4160 4161 switch (reg->type) { 4162 case PTR_TO_MAP_KEY: 4163 verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n", 4164 mem_size, off, size); 4165 break; 4166 case PTR_TO_MAP_VALUE: 4167 verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n", 4168 mem_size, off, size); 4169 break; 4170 case PTR_TO_PACKET: 4171 case PTR_TO_PACKET_META: 4172 case PTR_TO_PACKET_END: 4173 verbose(env, "invalid access to packet, off=%d size=%d, %s(id=%d,off=%d,r=%d)\n", 4174 off, size, reg_arg_name(env, argno), reg->id, off, mem_size); 4175 break; 4176 case PTR_TO_CTX: 4177 verbose(env, "invalid access to context, ctx_size=%d off=%d size=%d\n", 4178 mem_size, off, size); 4179 break; 4180 case PTR_TO_MEM: 4181 default: 4182 verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n", 4183 mem_size, off, size); 4184 } 4185 4186 return -EACCES; 4187 } 4188 4189 /* check read/write into a memory region with possible variable offset */ 4190 static int check_mem_region_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, 4191 int off, int size, u32 mem_size, 4192 bool zero_size_allowed) 4193 { 4194 int err; 4195 4196 /* We may have adjusted the register pointing to memory region, so we 4197 * need to try adding each of min_value and max_value to off 4198 * to make sure our theoretical access will be safe. 4199 * 4200 * The minimum value is only important with signed 4201 * comparisons where we can't assume the floor of a 4202 * value is 0. If we are using signed variables for our 4203 * index'es we need to make sure that whatever we use 4204 * will have a set floor within our range. 4205 */ 4206 if (reg_smin(reg) < 0 && 4207 (reg_smin(reg) == S64_MIN || 4208 (off + reg_smin(reg) != (s64)(s32)(off + reg_smin(reg))) || 4209 reg_smin(reg) + off < 0)) { 4210 verbose(env, "%s min value is negative, either use unsigned index or do a if (index >=0) check.\n", 4211 reg_arg_name(env, argno)); 4212 return -EACCES; 4213 } 4214 err = __check_mem_access(env, reg, argno, reg_smin(reg) + off, size, 4215 mem_size, zero_size_allowed); 4216 if (err) { 4217 verbose(env, "%s min value is outside of the allowed memory range\n", 4218 reg_arg_name(env, argno)); 4219 return err; 4220 } 4221 4222 /* If we haven't set a max value then we need to bail since we can't be 4223 * sure we won't do bad things. 4224 * If reg_umax(reg) + off could overflow, treat that as unbounded too. 4225 */ 4226 if (reg_umax(reg) >= BPF_MAX_VAR_OFF) { 4227 verbose(env, "%s unbounded memory access, make sure to bounds check any such access\n", 4228 reg_arg_name(env, argno)); 4229 return -EACCES; 4230 } 4231 err = __check_mem_access(env, reg, argno, reg_umax(reg) + off, size, 4232 mem_size, zero_size_allowed); 4233 if (err) { 4234 verbose(env, "%s max value is outside of the allowed memory range\n", 4235 reg_arg_name(env, argno)); 4236 return err; 4237 } 4238 4239 return 0; 4240 } 4241 4242 static int __check_ptr_off_reg(struct bpf_verifier_env *env, 4243 const struct bpf_reg_state *reg, argno_t argno, 4244 bool fixed_off_ok) 4245 { 4246 /* Access to this pointer-typed register or passing it to a helper 4247 * is only allowed in its original, unmodified form. 4248 */ 4249 4250 if (!tnum_is_const(reg->var_off)) { 4251 char tn_buf[48]; 4252 4253 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 4254 verbose(env, "variable %s access var_off=%s disallowed\n", 4255 reg_type_str(env, reg->type), tn_buf); 4256 return -EACCES; 4257 } 4258 4259 if (reg_smin(reg) < 0) { 4260 verbose(env, "negative offset %s ptr %s off=%lld disallowed\n", 4261 reg_type_str(env, reg->type), reg_arg_name(env, argno), reg->var_off.value); 4262 return -EACCES; 4263 } 4264 4265 if (!fixed_off_ok && reg->var_off.value != 0) { 4266 verbose(env, "dereference of modified %s ptr %s off=%lld disallowed\n", 4267 reg_type_str(env, reg->type), reg_arg_name(env, argno), reg->var_off.value); 4268 return -EACCES; 4269 } 4270 4271 return 0; 4272 } 4273 4274 static int check_ptr_off_reg(struct bpf_verifier_env *env, 4275 const struct bpf_reg_state *reg, int regno) 4276 { 4277 return __check_ptr_off_reg(env, reg, argno_from_reg(regno), false); 4278 } 4279 4280 static int map_kptr_match_type(struct bpf_verifier_env *env, 4281 struct btf_field *kptr_field, 4282 struct bpf_reg_state *reg, u32 regno) 4283 { 4284 const char *targ_name = btf_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id); 4285 int perm_flags; 4286 const char *reg_name = ""; 4287 4288 if (base_type(reg->type) != PTR_TO_BTF_ID) 4289 goto bad_type; 4290 4291 if (btf_is_kernel(reg->btf)) { 4292 perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED | MEM_RCU; 4293 4294 /* Only unreferenced case accepts untrusted pointers */ 4295 if (kptr_field->type == BPF_KPTR_UNREF) 4296 perm_flags |= PTR_UNTRUSTED; 4297 } else { 4298 perm_flags = PTR_MAYBE_NULL | MEM_ALLOC; 4299 if (kptr_field->type == BPF_KPTR_PERCPU) 4300 perm_flags |= MEM_PERCPU; 4301 } 4302 4303 if (type_flag(reg->type) & ~perm_flags) 4304 goto bad_type; 4305 4306 /* We need to verify reg->type and reg->btf, before accessing reg->btf */ 4307 reg_name = btf_type_name(reg->btf, reg->btf_id); 4308 4309 /* For ref_ptr case, release function check should ensure we get one 4310 * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the 4311 * normal store of unreferenced kptr, we must ensure var_off is zero. 4312 * Since ref_ptr cannot be accessed directly by BPF insns, check for 4313 * reg->id is not needed here. 4314 */ 4315 if (__check_ptr_off_reg(env, reg, argno_from_reg(regno), true)) 4316 return -EACCES; 4317 4318 /* A full type match is needed, as BTF can be vmlinux, module or prog BTF, and 4319 * we also need to take into account the reg->var_off. 4320 * 4321 * We want to support cases like: 4322 * 4323 * struct foo { 4324 * struct bar br; 4325 * struct baz bz; 4326 * }; 4327 * 4328 * struct foo *v; 4329 * v = func(); // PTR_TO_BTF_ID 4330 * val->foo = v; // reg->var_off is zero, btf and btf_id match type 4331 * val->bar = &v->br; // reg->var_off is still zero, but we need to retry with 4332 * // first member type of struct after comparison fails 4333 * val->baz = &v->bz; // reg->var_off is non-zero, so struct needs to be walked 4334 * // to match type 4335 * 4336 * In the kptr_ref case, check_func_arg_reg_off already ensures reg->var_off 4337 * is zero. We must also ensure that btf_struct_ids_match does not walk 4338 * the struct to match type against first member of struct, i.e. reject 4339 * second case from above. Hence, when type is BPF_KPTR_REF, we set 4340 * strict mode to true for type match. 4341 */ 4342 if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->var_off.value, 4343 kptr_field->kptr.btf, kptr_field->kptr.btf_id, 4344 kptr_field->type != BPF_KPTR_UNREF)) 4345 goto bad_type; 4346 return 0; 4347 bad_type: 4348 verbose(env, "invalid kptr access, R%d type=%s%s ", regno, 4349 reg_type_str(env, reg->type), reg_name); 4350 verbose(env, "expected=%s%s", reg_type_str(env, PTR_TO_BTF_ID), targ_name); 4351 if (kptr_field->type == BPF_KPTR_UNREF) 4352 verbose(env, " or %s%s\n", reg_type_str(env, PTR_TO_BTF_ID | PTR_UNTRUSTED), 4353 targ_name); 4354 else 4355 verbose(env, "\n"); 4356 return -EINVAL; 4357 } 4358 4359 static bool in_sleepable(struct bpf_verifier_env *env) 4360 { 4361 return env->cur_state->in_sleepable; 4362 } 4363 4364 /* The non-sleepable programs and sleepable programs with explicit bpf_rcu_read_lock() 4365 * can dereference RCU protected pointers and result is PTR_TRUSTED. 4366 */ 4367 static bool in_rcu_cs(struct bpf_verifier_env *env) 4368 { 4369 return env->cur_state->active_rcu_locks || 4370 env->cur_state->active_locks || 4371 !in_sleepable(env); 4372 } 4373 4374 /* Once GCC supports btf_type_tag the following mechanism will be replaced with tag check */ 4375 BTF_SET_START(rcu_protected_types) 4376 #ifdef CONFIG_NET 4377 BTF_ID(struct, prog_test_ref_kfunc) 4378 #endif 4379 #ifdef CONFIG_CGROUPS 4380 BTF_ID(struct, cgroup) 4381 #endif 4382 #ifdef CONFIG_BPF_JIT 4383 BTF_ID(struct, bpf_cpumask) 4384 #endif 4385 BTF_ID(struct, task_struct) 4386 #ifdef CONFIG_CRYPTO 4387 BTF_ID(struct, bpf_crypto_ctx) 4388 #endif 4389 BTF_SET_END(rcu_protected_types) 4390 4391 static bool rcu_protected_object(const struct btf *btf, u32 btf_id) 4392 { 4393 if (!btf_is_kernel(btf)) 4394 return true; 4395 return btf_id_set_contains(&rcu_protected_types, btf_id); 4396 } 4397 4398 static struct btf_record *kptr_pointee_btf_record(struct btf_field *kptr_field) 4399 { 4400 struct btf_struct_meta *meta; 4401 4402 if (btf_is_kernel(kptr_field->kptr.btf)) 4403 return NULL; 4404 4405 meta = btf_find_struct_meta(kptr_field->kptr.btf, 4406 kptr_field->kptr.btf_id); 4407 4408 return meta ? meta->record : NULL; 4409 } 4410 4411 static bool rcu_safe_kptr(const struct btf_field *field) 4412 { 4413 const struct btf_field_kptr *kptr = &field->kptr; 4414 4415 return field->type == BPF_KPTR_PERCPU || 4416 (field->type == BPF_KPTR_REF && rcu_protected_object(kptr->btf, kptr->btf_id)); 4417 } 4418 4419 static u32 btf_ld_kptr_type(struct bpf_verifier_env *env, struct btf_field *kptr_field) 4420 { 4421 struct btf_record *rec; 4422 u32 ret; 4423 4424 ret = PTR_MAYBE_NULL; 4425 if (rcu_safe_kptr(kptr_field) && in_rcu_cs(env)) { 4426 ret |= MEM_RCU; 4427 if (kptr_field->type == BPF_KPTR_PERCPU) 4428 ret |= MEM_PERCPU; 4429 else if (!btf_is_kernel(kptr_field->kptr.btf)) 4430 ret |= MEM_ALLOC; 4431 4432 rec = kptr_pointee_btf_record(kptr_field); 4433 if (rec && btf_record_has_field(rec, BPF_GRAPH_NODE)) 4434 ret |= NON_OWN_REF; 4435 } else { 4436 ret |= PTR_UNTRUSTED; 4437 } 4438 4439 return ret; 4440 } 4441 4442 static int mark_uptr_ld_reg(struct bpf_verifier_env *env, u32 regno, 4443 struct btf_field *field) 4444 { 4445 struct bpf_reg_state *reg; 4446 const struct btf_type *t; 4447 4448 t = btf_type_by_id(field->kptr.btf, field->kptr.btf_id); 4449 mark_reg_known_zero(env, cur_regs(env), regno); 4450 reg = reg_state(env, regno); 4451 reg->type = PTR_TO_MEM | PTR_MAYBE_NULL; 4452 reg->mem_size = t->size; 4453 reg->id = ++env->id_gen; 4454 4455 return 0; 4456 } 4457 4458 static int check_map_kptr_access(struct bpf_verifier_env *env, 4459 int value_regno, int insn_idx, 4460 struct btf_field *kptr_field) 4461 { 4462 struct bpf_insn *insn = &env->prog->insnsi[insn_idx]; 4463 int class = BPF_CLASS(insn->code); 4464 struct bpf_reg_state *val_reg; 4465 int ret; 4466 4467 /* Things we already checked for in check_map_access and caller: 4468 * - Reject cases where variable offset may touch kptr 4469 * - size of access (must be BPF_DW) 4470 * - tnum_is_const(reg->var_off) 4471 * - kptr_field->offset == off + reg->var_off.value 4472 */ 4473 /* Only BPF_[LDX,STX,ST] | BPF_MEM | BPF_DW is supported */ 4474 if (BPF_MODE(insn->code) != BPF_MEM) { 4475 verbose(env, "kptr in map can only be accessed using BPF_MEM instruction mode\n"); 4476 return -EACCES; 4477 } 4478 4479 /* We only allow loading referenced kptr, since it will be marked as 4480 * untrusted, similar to unreferenced kptr. 4481 */ 4482 if (class != BPF_LDX && 4483 (kptr_field->type == BPF_KPTR_REF || kptr_field->type == BPF_KPTR_PERCPU)) { 4484 verbose(env, "store to referenced kptr disallowed\n"); 4485 return -EACCES; 4486 } 4487 if (class != BPF_LDX && kptr_field->type == BPF_UPTR) { 4488 verbose(env, "store to uptr disallowed\n"); 4489 return -EACCES; 4490 } 4491 4492 if (class == BPF_LDX) { 4493 if (kptr_field->type == BPF_UPTR) 4494 return mark_uptr_ld_reg(env, value_regno, kptr_field); 4495 4496 /* We can simply mark the value_regno receiving the pointer 4497 * value from map as PTR_TO_BTF_ID, with the correct type. 4498 */ 4499 ret = mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, 4500 kptr_field->kptr.btf, kptr_field->kptr.btf_id, 4501 btf_ld_kptr_type(env, kptr_field)); 4502 if (ret < 0) 4503 return ret; 4504 } else if (class == BPF_STX) { 4505 val_reg = reg_state(env, value_regno); 4506 if (!bpf_register_is_null(val_reg) && 4507 map_kptr_match_type(env, kptr_field, val_reg, value_regno)) 4508 return -EACCES; 4509 } else if (class == BPF_ST) { 4510 if (insn->imm) { 4511 verbose(env, "BPF_ST imm must be 0 when storing to kptr at off=%u\n", 4512 kptr_field->offset); 4513 return -EACCES; 4514 } 4515 } else { 4516 verbose(env, "kptr in map can only be accessed using BPF_LDX/BPF_STX/BPF_ST\n"); 4517 return -EACCES; 4518 } 4519 return 0; 4520 } 4521 4522 /* 4523 * Return the size of the memory region accessible from a pointer to map value. 4524 * For INSN_ARRAY maps whole bpf_insn_array->ips array is accessible. 4525 */ 4526 static u32 map_mem_size(const struct bpf_map *map) 4527 { 4528 if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) 4529 return map->max_entries * sizeof(long); 4530 4531 return map->value_size; 4532 } 4533 4534 /* check read/write into a map element with possible variable offset */ 4535 static int check_map_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, 4536 int off, int size, bool zero_size_allowed, 4537 enum bpf_access_src src) 4538 { 4539 struct bpf_map *map = reg->map_ptr; 4540 u32 mem_size = map_mem_size(map); 4541 struct btf_record *rec; 4542 int err, i; 4543 4544 err = check_mem_region_access(env, reg, argno, off, size, mem_size, zero_size_allowed); 4545 if (err) 4546 return err; 4547 4548 if (IS_ERR_OR_NULL(map->record)) 4549 return 0; 4550 rec = map->record; 4551 for (i = 0; i < rec->cnt; i++) { 4552 struct btf_field *field = &rec->fields[i]; 4553 u32 p = field->offset; 4554 4555 /* If any part of a field can be touched by load/store, reject 4556 * this program. To check that [x1, x2) overlaps with [y1, y2), 4557 * it is sufficient to check x1 < y2 && y1 < x2. 4558 */ 4559 if (reg_smin(reg) + off < p + field->size && 4560 p < reg_umax(reg) + off + size) { 4561 switch (field->type) { 4562 case BPF_KPTR_UNREF: 4563 case BPF_KPTR_REF: 4564 case BPF_KPTR_PERCPU: 4565 case BPF_UPTR: 4566 if (src != ACCESS_DIRECT) { 4567 verbose(env, "%s cannot be accessed indirectly by helper\n", 4568 btf_field_type_name(field->type)); 4569 return -EACCES; 4570 } 4571 if (!tnum_is_const(reg->var_off)) { 4572 verbose(env, "%s access cannot have variable offset\n", 4573 btf_field_type_name(field->type)); 4574 return -EACCES; 4575 } 4576 if (p != off + reg->var_off.value) { 4577 verbose(env, "%s access misaligned expected=%u off=%llu\n", 4578 btf_field_type_name(field->type), 4579 p, off + reg->var_off.value); 4580 return -EACCES; 4581 } 4582 if (size != bpf_size_to_bytes(BPF_DW)) { 4583 verbose(env, "%s access size must be BPF_DW\n", 4584 btf_field_type_name(field->type)); 4585 return -EACCES; 4586 } 4587 break; 4588 default: 4589 verbose(env, "%s cannot be accessed directly by load/store\n", 4590 btf_field_type_name(field->type)); 4591 return -EACCES; 4592 } 4593 } 4594 } 4595 return 0; 4596 } 4597 4598 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, 4599 const struct bpf_call_arg_meta *meta, 4600 enum bpf_access_type t) 4601 { 4602 enum bpf_prog_type prog_type = resolve_prog_type(env->prog); 4603 4604 switch (prog_type) { 4605 /* Program types only with direct read access go here! */ 4606 case BPF_PROG_TYPE_LWT_IN: 4607 case BPF_PROG_TYPE_LWT_OUT: 4608 case BPF_PROG_TYPE_LWT_SEG6LOCAL: 4609 case BPF_PROG_TYPE_SK_REUSEPORT: 4610 case BPF_PROG_TYPE_FLOW_DISSECTOR: 4611 case BPF_PROG_TYPE_CGROUP_SKB: 4612 if (t == BPF_WRITE) 4613 return false; 4614 fallthrough; 4615 4616 /* Program types with direct read + write access go here! */ 4617 case BPF_PROG_TYPE_SCHED_CLS: 4618 case BPF_PROG_TYPE_SCHED_ACT: 4619 case BPF_PROG_TYPE_XDP: 4620 case BPF_PROG_TYPE_LWT_XMIT: 4621 case BPF_PROG_TYPE_SK_SKB: 4622 case BPF_PROG_TYPE_SK_MSG: 4623 if (meta) 4624 return meta->pkt_access; 4625 4626 env->seen_direct_write = true; 4627 return true; 4628 4629 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 4630 if (t == BPF_WRITE) 4631 env->seen_direct_write = true; 4632 4633 return true; 4634 4635 default: 4636 return false; 4637 } 4638 } 4639 4640 static int check_packet_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, int off, 4641 int size, bool zero_size_allowed) 4642 { 4643 int err; 4644 4645 if (reg->range < 0) { 4646 verbose(env, "%s offset is outside of the packet\n", reg_arg_name(env, argno)); 4647 return -EINVAL; 4648 } 4649 4650 err = check_mem_region_access(env, reg, argno, off, size, reg->range, zero_size_allowed); 4651 if (err) 4652 return err; 4653 4654 /* __check_mem_access has made sure "off + size - 1" is within u16. 4655 * reg_umax(reg) can't be bigger than MAX_PACKET_OFF which is 0xffff, 4656 * otherwise find_good_pkt_pointers would have refused to set range info 4657 * that __check_mem_access would have rejected this pkt access. 4658 * Therefore, "off + reg_umax(reg) + size - 1" won't overflow u32. 4659 */ 4660 env->prog->aux->max_pkt_offset = 4661 max_t(u32, env->prog->aux->max_pkt_offset, 4662 off + reg_umax(reg) + size - 1); 4663 4664 return 0; 4665 } 4666 4667 static bool is_var_ctx_off_allowed(struct bpf_prog *prog) 4668 { 4669 return resolve_prog_type(prog) == BPF_PROG_TYPE_SYSCALL; 4670 } 4671 4672 /* check access to 'struct bpf_context' fields. Supports fixed offsets only */ 4673 static int __check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size, 4674 enum bpf_access_type t, struct bpf_insn_access_aux *info) 4675 { 4676 if (env->ops->is_valid_access && 4677 env->ops->is_valid_access(off, size, t, env->prog, info)) { 4678 /* A non zero info.ctx_field_size indicates that this field is a 4679 * candidate for later verifier transformation to load the whole 4680 * field and then apply a mask when accessed with a narrower 4681 * access than actual ctx access size. A zero info.ctx_field_size 4682 * will only allow for whole field access and rejects any other 4683 * type of narrower access. 4684 */ 4685 if (base_type(info->reg_type) == PTR_TO_BTF_ID) { 4686 if (info->ref_id && 4687 !find_reference_state(env->cur_state, info->ref_id)) { 4688 verbose(env, "invalid bpf_context access off=%d. Reference may already be released\n", 4689 off); 4690 return -EACCES; 4691 } 4692 } else { 4693 env->insn_aux_data[insn_idx].ctx_field_size = info->ctx_field_size; 4694 } 4695 /* remember the offset of last byte accessed in ctx */ 4696 if (env->prog->aux->max_ctx_offset < off + size) 4697 env->prog->aux->max_ctx_offset = off + size; 4698 return 0; 4699 } 4700 4701 verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size); 4702 return -EACCES; 4703 } 4704 4705 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, struct bpf_reg_state *reg, argno_t argno, 4706 int off, int access_size, enum bpf_access_type t, 4707 struct bpf_insn_access_aux *info) 4708 { 4709 /* 4710 * Program types that don't rewrite ctx accesses can safely 4711 * dereference ctx pointers with fixed offsets. 4712 */ 4713 bool var_off_ok = is_var_ctx_off_allowed(env->prog); 4714 bool fixed_off_ok = !env->ops->convert_ctx_access; 4715 int err; 4716 4717 if (var_off_ok) 4718 err = check_mem_region_access(env, reg, argno, off, access_size, U16_MAX, false); 4719 else 4720 err = __check_ptr_off_reg(env, reg, argno, fixed_off_ok); 4721 if (err) 4722 return err; 4723 off += reg_umax(reg); 4724 4725 err = __check_ctx_access(env, insn_idx, off, access_size, t, info); 4726 if (err) 4727 verbose_linfo(env, insn_idx, "; "); 4728 return err; 4729 } 4730 4731 static int check_flow_keys_access(struct bpf_verifier_env *env, 4732 struct bpf_reg_state *reg, argno_t argno, 4733 int off, int size) 4734 { 4735 /* Only a constant offset is allowed here; fold it into off. */ 4736 if (!tnum_is_const(reg->var_off)) { 4737 char tn_buf[48]; 4738 4739 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 4740 verbose(env, "%s invalid variable offset to flow keys: off=%d, var_off=%s\n", 4741 reg_arg_name(env, argno), off, tn_buf); 4742 return -EACCES; 4743 } 4744 off += reg->var_off.value; 4745 4746 if (size < 0 || off < 0 || 4747 (u64)off + size > sizeof(struct bpf_flow_keys)) { 4748 verbose(env, "invalid access to flow keys off=%d size=%d\n", 4749 off, size); 4750 return -EACCES; 4751 } 4752 return 0; 4753 } 4754 4755 static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, 4756 struct bpf_reg_state *reg, argno_t argno, int off, int size, 4757 enum bpf_access_type t) 4758 { 4759 struct bpf_insn_access_aux info = {}; 4760 bool valid; 4761 4762 if (reg_smin(reg) < 0) { 4763 verbose(env, "%s min value is negative, either use unsigned index or do a if (index >=0) check.\n", 4764 reg_arg_name(env, argno)); 4765 return -EACCES; 4766 } 4767 4768 switch (reg->type) { 4769 case PTR_TO_SOCK_COMMON: 4770 valid = bpf_sock_common_is_valid_access(off, size, t, &info); 4771 break; 4772 case PTR_TO_SOCKET: 4773 valid = bpf_sock_is_valid_access(off, size, t, &info); 4774 break; 4775 case PTR_TO_TCP_SOCK: 4776 valid = bpf_tcp_sock_is_valid_access(off, size, t, &info); 4777 break; 4778 case PTR_TO_XDP_SOCK: 4779 valid = bpf_xdp_sock_is_valid_access(off, size, t, &info); 4780 break; 4781 default: 4782 valid = false; 4783 } 4784 4785 4786 if (valid) { 4787 env->insn_aux_data[insn_idx].ctx_field_size = 4788 info.ctx_field_size; 4789 return 0; 4790 } 4791 4792 verbose(env, "%s invalid %s access off=%d size=%d\n", 4793 reg_arg_name(env, argno), reg_type_str(env, reg->type), off, size); 4794 4795 return -EACCES; 4796 } 4797 4798 static bool is_pointer_value(struct bpf_verifier_env *env, int regno) 4799 { 4800 return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno)); 4801 } 4802 4803 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno) 4804 { 4805 const struct bpf_reg_state *reg = reg_state(env, regno); 4806 4807 return reg->type == PTR_TO_CTX; 4808 } 4809 4810 static bool is_sk_reg(struct bpf_verifier_env *env, int regno) 4811 { 4812 const struct bpf_reg_state *reg = reg_state(env, regno); 4813 4814 return type_is_sk_pointer(reg->type); 4815 } 4816 4817 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno) 4818 { 4819 const struct bpf_reg_state *reg = reg_state(env, regno); 4820 4821 return type_is_pkt_pointer(reg->type); 4822 } 4823 4824 static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno) 4825 { 4826 const struct bpf_reg_state *reg = reg_state(env, regno); 4827 4828 /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */ 4829 return reg->type == PTR_TO_FLOW_KEYS; 4830 } 4831 4832 static bool is_arena_reg(struct bpf_verifier_env *env, int regno) 4833 { 4834 const struct bpf_reg_state *reg = reg_state(env, regno); 4835 4836 return reg->type == PTR_TO_ARENA; 4837 } 4838 4839 /* Return false if @regno contains a pointer whose type isn't supported for 4840 * atomic instruction @insn. 4841 */ 4842 static bool atomic_ptr_type_ok(struct bpf_verifier_env *env, int regno, 4843 struct bpf_insn *insn) 4844 { 4845 if (is_ctx_reg(env, regno)) 4846 return false; 4847 if (is_pkt_reg(env, regno)) 4848 return false; 4849 if (is_flow_key_reg(env, regno)) 4850 return false; 4851 if (is_sk_reg(env, regno)) 4852 return false; 4853 if (is_arena_reg(env, regno)) 4854 return bpf_jit_supports_insn(insn, true); 4855 4856 return true; 4857 } 4858 4859 static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = { 4860 #ifdef CONFIG_NET 4861 [PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK], 4862 [PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], 4863 [PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP], 4864 #endif 4865 [CONST_PTR_TO_MAP] = btf_bpf_map_id, 4866 }; 4867 4868 static bool is_trusted_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg) 4869 { 4870 /* A referenced register is always trusted. */ 4871 if (reg_is_referenced(env, reg)) 4872 return true; 4873 4874 /* Types listed in the reg2btf_ids are always trusted */ 4875 if (reg2btf_ids[base_type(reg->type)] && 4876 !bpf_type_has_unsafe_modifiers(reg->type)) 4877 return true; 4878 4879 /* If a register is not referenced, it is trusted if it has the 4880 * MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the 4881 * other type modifiers may be safe, but we elect to take an opt-in 4882 * approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are 4883 * not. 4884 * 4885 * Eventually, we should make PTR_TRUSTED the single source of truth 4886 * for whether a register is trusted. 4887 */ 4888 return type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS && 4889 !bpf_type_has_unsafe_modifiers(reg->type); 4890 } 4891 4892 static bool is_rcu_reg(const struct bpf_reg_state *reg) 4893 { 4894 return reg->type & MEM_RCU; 4895 } 4896 4897 static void clear_trusted_flags(enum bpf_type_flag *flag) 4898 { 4899 *flag &= ~(BPF_REG_TRUSTED_MODIFIERS | MEM_RCU); 4900 } 4901 4902 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env, 4903 const struct bpf_reg_state *reg, 4904 int off, int size, bool strict) 4905 { 4906 struct tnum reg_off; 4907 int ip_align; 4908 4909 /* Byte size accesses are always allowed. */ 4910 if (!strict || size == 1) 4911 return 0; 4912 4913 /* For platforms that do not have a Kconfig enabling 4914 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of 4915 * NET_IP_ALIGN is universally set to '2'. And on platforms 4916 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get 4917 * to this code only in strict mode where we want to emulate 4918 * the NET_IP_ALIGN==2 checking. Therefore use an 4919 * unconditional IP align value of '2'. 4920 */ 4921 ip_align = 2; 4922 4923 reg_off = tnum_add(reg->var_off, tnum_const(ip_align + off)); 4924 if (!tnum_is_aligned(reg_off, size)) { 4925 char tn_buf[48]; 4926 4927 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 4928 verbose(env, 4929 "misaligned packet access off %d+%s+%d size %d\n", 4930 ip_align, tn_buf, off, size); 4931 return -EACCES; 4932 } 4933 4934 return 0; 4935 } 4936 4937 static int check_generic_ptr_alignment(struct bpf_verifier_env *env, 4938 const struct bpf_reg_state *reg, 4939 const char *pointer_desc, 4940 int off, int size, bool strict) 4941 { 4942 struct tnum reg_off; 4943 4944 /* Byte size accesses are always allowed. */ 4945 if (!strict || size == 1) 4946 return 0; 4947 4948 reg_off = tnum_add(reg->var_off, tnum_const(off)); 4949 if (!tnum_is_aligned(reg_off, size)) { 4950 char tn_buf[48]; 4951 4952 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 4953 verbose(env, "misaligned %saccess off %s+%d size %d\n", 4954 pointer_desc, tn_buf, off, size); 4955 return -EACCES; 4956 } 4957 4958 return 0; 4959 } 4960 4961 static int check_ptr_alignment(struct bpf_verifier_env *env, 4962 const struct bpf_reg_state *reg, int off, 4963 int size, bool strict_alignment_once) 4964 { 4965 bool strict = env->strict_alignment || strict_alignment_once; 4966 const char *pointer_desc = ""; 4967 4968 switch (reg->type) { 4969 case PTR_TO_PACKET: 4970 case PTR_TO_PACKET_META: 4971 /* Special case, because of NET_IP_ALIGN. Given metadata sits 4972 * right in front, treat it the very same way. 4973 */ 4974 return check_pkt_ptr_alignment(env, reg, off, size, strict); 4975 case PTR_TO_FLOW_KEYS: 4976 pointer_desc = "flow keys "; 4977 break; 4978 case PTR_TO_MAP_KEY: 4979 pointer_desc = "key "; 4980 break; 4981 case PTR_TO_MAP_VALUE: 4982 pointer_desc = "value "; 4983 if (reg->map_ptr->map_type == BPF_MAP_TYPE_INSN_ARRAY) 4984 strict = true; 4985 break; 4986 case PTR_TO_CTX: 4987 pointer_desc = "context "; 4988 break; 4989 case PTR_TO_STACK: 4990 pointer_desc = "stack "; 4991 /* The stack spill tracking logic in check_stack_write_fixed_off() 4992 * and check_stack_read_fixed_off() relies on stack accesses being 4993 * aligned. 4994 */ 4995 strict = true; 4996 break; 4997 case PTR_TO_SOCKET: 4998 pointer_desc = "sock "; 4999 break; 5000 case PTR_TO_SOCK_COMMON: 5001 pointer_desc = "sock_common "; 5002 break; 5003 case PTR_TO_TCP_SOCK: 5004 pointer_desc = "tcp_sock "; 5005 break; 5006 case PTR_TO_XDP_SOCK: 5007 pointer_desc = "xdp_sock "; 5008 break; 5009 case PTR_TO_ARENA: 5010 return 0; 5011 default: 5012 break; 5013 } 5014 return check_generic_ptr_alignment(env, reg, pointer_desc, off, size, 5015 strict); 5016 } 5017 5018 static enum priv_stack_mode bpf_enable_priv_stack(struct bpf_prog *prog) 5019 { 5020 if (!bpf_jit_supports_private_stack()) 5021 return NO_PRIV_STACK; 5022 5023 /* bpf_prog_check_recur() checks all prog types that use bpf trampoline 5024 * while kprobe/tp/perf_event/raw_tp don't use trampoline hence checked 5025 * explicitly. 5026 */ 5027 switch (prog->type) { 5028 case BPF_PROG_TYPE_KPROBE: 5029 case BPF_PROG_TYPE_TRACEPOINT: 5030 case BPF_PROG_TYPE_PERF_EVENT: 5031 case BPF_PROG_TYPE_RAW_TRACEPOINT: 5032 return PRIV_STACK_ADAPTIVE; 5033 case BPF_PROG_TYPE_TRACING: 5034 case BPF_PROG_TYPE_LSM: 5035 case BPF_PROG_TYPE_STRUCT_OPS: 5036 if (prog->aux->priv_stack_requested || bpf_prog_check_recur(prog)) 5037 return PRIV_STACK_ADAPTIVE; 5038 fallthrough; 5039 default: 5040 break; 5041 } 5042 5043 return NO_PRIV_STACK; 5044 } 5045 5046 static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth) 5047 { 5048 if (env->prog->jit_requested) 5049 return round_up(stack_depth, 16); 5050 5051 /* round up to 32-bytes, since this is granularity 5052 * of interpreter stack size 5053 */ 5054 return round_up(max_t(u32, stack_depth, 1), 32); 5055 } 5056 5057 /* temporary state used for call frame depth calculation */ 5058 struct bpf_subprog_call_depth_info { 5059 int ret_insn; /* caller instruction where we return to. */ 5060 int caller; /* caller subprogram idx */ 5061 int frame; /* # of consecutive static call stack frames on top of stack */ 5062 }; 5063 5064 /* starting from main bpf function walk all instructions of the function 5065 * and recursively walk all callees that given function can call. 5066 * Ignore jump and exit insns. 5067 */ 5068 static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx, 5069 struct bpf_subprog_call_depth_info *dinfo, 5070 bool priv_stack_supported) 5071 { 5072 struct bpf_subprog_info *subprog = env->subprog_info; 5073 struct bpf_insn *insn = env->prog->insnsi; 5074 int depth = 0, frame = 0, i, subprog_end, subprog_depth; 5075 bool tail_call_reachable = false; 5076 int total; 5077 int tmp; 5078 5079 /* no caller idx */ 5080 dinfo[idx].caller = -1; 5081 5082 i = subprog[idx].start; 5083 if (!priv_stack_supported) 5084 subprog[idx].priv_stack_mode = NO_PRIV_STACK; 5085 process_func: 5086 /* protect against potential stack overflow that might happen when 5087 * bpf2bpf calls get combined with tailcalls. Limit the caller's stack 5088 * depth for such case down to 256 so that the worst case scenario 5089 * would result in 8k stack size (32 which is tailcall limit * 256 = 5090 * 8k). 5091 * 5092 * To get the idea what might happen, see an example: 5093 * func1 -> sub rsp, 128 5094 * subfunc1 -> sub rsp, 256 5095 * tailcall1 -> add rsp, 256 5096 * func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320) 5097 * subfunc2 -> sub rsp, 64 5098 * subfunc22 -> sub rsp, 128 5099 * tailcall2 -> add rsp, 128 5100 * func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416) 5101 * 5102 * tailcall will unwind the current stack frame but it will not get rid 5103 * of caller's stack as shown on the example above. 5104 */ 5105 if (idx && subprog[idx].has_tail_call && depth >= 256) { 5106 verbose(env, 5107 "tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n", 5108 depth); 5109 return -EACCES; 5110 } 5111 5112 subprog_depth = round_up_stack_depth(env, subprog[idx].stack_depth); 5113 if (IS_ENABLED(CONFIG_X86_64) && subprog[idx].stack_arg_cnt) { 5114 /* x86-64 uses R9 for both private stack frame pointer and arg6. */ 5115 subprog[idx].priv_stack_mode = NO_PRIV_STACK; 5116 } else if (priv_stack_supported) { 5117 /* Request private stack support only if the subprog stack 5118 * depth is no less than BPF_PRIV_STACK_MIN_SIZE. This is to 5119 * avoid jit penalty if the stack usage is small. 5120 */ 5121 if (subprog[idx].priv_stack_mode == PRIV_STACK_UNKNOWN && 5122 subprog_depth >= BPF_PRIV_STACK_MIN_SIZE) 5123 subprog[idx].priv_stack_mode = PRIV_STACK_ADAPTIVE; 5124 } 5125 5126 if (subprog[idx].priv_stack_mode == PRIV_STACK_ADAPTIVE) { 5127 if (subprog_depth > env->max_stack_depth) 5128 env->max_stack_depth = subprog_depth; 5129 if (subprog_depth > MAX_BPF_STACK) { 5130 verbose(env, "stack size of subprog %d is %d. Too large\n", 5131 idx, subprog_depth); 5132 return -EACCES; 5133 } 5134 } else { 5135 depth += subprog_depth; 5136 if (depth > env->max_stack_depth) 5137 env->max_stack_depth = depth; 5138 if (depth > MAX_BPF_STACK) { 5139 total = 0; 5140 for (tmp = idx; tmp >= 0; tmp = dinfo[tmp].caller) 5141 total++; 5142 5143 verbose(env, "combined stack size of %d calls is %d. Too large\n", 5144 total, depth); 5145 return -EACCES; 5146 } 5147 } 5148 continue_func: 5149 subprog_end = subprog[idx + 1].start; 5150 for (; i < subprog_end; i++) { 5151 int next_insn, sidx; 5152 5153 if (bpf_pseudo_kfunc_call(insn + i) && !insn[i].off) { 5154 bool err = false; 5155 5156 if (!bpf_is_throw_kfunc(insn + i)) 5157 continue; 5158 for (tmp = idx; tmp >= 0 && !err; tmp = dinfo[tmp].caller) { 5159 if (subprog[tmp].is_cb) { 5160 err = true; 5161 break; 5162 } 5163 } 5164 if (!err) 5165 continue; 5166 verbose(env, 5167 "bpf_throw kfunc (insn %d) cannot be called from callback subprog %d\n", 5168 i, idx); 5169 return -EINVAL; 5170 } 5171 5172 if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i)) 5173 continue; 5174 /* remember insn and function to return to */ 5175 5176 /* find the callee */ 5177 next_insn = i + insn[i].imm + 1; 5178 sidx = bpf_find_subprog(env, next_insn); 5179 if (verifier_bug_if(sidx < 0, env, "callee not found at insn %d", next_insn)) 5180 return -EFAULT; 5181 if (subprog[sidx].is_async_cb) { 5182 if (subprog[sidx].has_tail_call) { 5183 verifier_bug(env, "subprog has tail_call and async cb"); 5184 return -EFAULT; 5185 } 5186 /* async callbacks don't increase bpf prog stack size unless called directly */ 5187 if (!bpf_pseudo_call(insn + i)) 5188 continue; 5189 if (subprog[sidx].is_exception_cb) { 5190 verbose(env, "insn %d cannot call exception cb directly", i); 5191 return -EINVAL; 5192 } 5193 } 5194 5195 /* store caller info for after we return from callee */ 5196 dinfo[idx].frame = frame; 5197 dinfo[idx].ret_insn = i + 1; 5198 5199 /* push caller idx into callee's dinfo */ 5200 dinfo[sidx].caller = idx; 5201 5202 i = next_insn; 5203 5204 idx = sidx; 5205 if (!priv_stack_supported) 5206 subprog[idx].priv_stack_mode = NO_PRIV_STACK; 5207 5208 if (subprog[idx].has_tail_call) 5209 tail_call_reachable = true; 5210 5211 frame = bpf_subprog_is_global(env, idx) ? 0 : frame + 1; 5212 if (frame >= MAX_CALL_FRAMES) { 5213 verbose(env, "the call stack of %d frames is too deep !\n", 5214 frame); 5215 return -E2BIG; 5216 } 5217 goto process_func; 5218 } 5219 /* if tail call got detected across bpf2bpf calls then mark each of the 5220 * currently present subprog frames as tail call reachable subprogs; 5221 * this info will be utilized by JIT so that we will be preserving the 5222 * tail call counter throughout bpf2bpf calls combined with tailcalls 5223 */ 5224 if (tail_call_reachable) { 5225 for (tmp = idx; tmp >= 0; tmp = dinfo[tmp].caller) { 5226 if (subprog[tmp].is_exception_cb) { 5227 verbose(env, "cannot tail call within exception cb\n"); 5228 return -EINVAL; 5229 } 5230 if (subprog[tmp].stack_arg_cnt) { 5231 verbose(env, "tail_calls are not allowed in programs with stack args\n"); 5232 return -EINVAL; 5233 } 5234 subprog[tmp].tail_call_reachable = true; 5235 } 5236 } else if (!idx && subprog[0].has_tail_call && subprog[0].stack_arg_cnt) { 5237 verbose(env, "tail_calls are not allowed in programs with stack args\n"); 5238 return -EINVAL; 5239 } 5240 5241 if (subprog[0].tail_call_reachable) 5242 env->prog->aux->tail_call_reachable = true; 5243 5244 /* end of for() loop means the last insn of the 'subprog' 5245 * was reached. Doesn't matter whether it was JA or EXIT 5246 */ 5247 if (frame == 0 && dinfo[idx].caller < 0) 5248 return 0; 5249 if (subprog[idx].priv_stack_mode != PRIV_STACK_ADAPTIVE) 5250 depth -= round_up_stack_depth(env, subprog[idx].stack_depth); 5251 5252 /* pop caller idx from callee */ 5253 idx = dinfo[idx].caller; 5254 5255 /* retrieve caller state from its frame */ 5256 frame = dinfo[idx].frame; 5257 i = dinfo[idx].ret_insn; 5258 5259 /* reset tail_call_reachable to the parent's actual state */ 5260 tail_call_reachable = subprog[idx].tail_call_reachable; 5261 5262 goto continue_func; 5263 } 5264 5265 static int check_max_stack_depth(struct bpf_verifier_env *env) 5266 { 5267 enum priv_stack_mode priv_stack_mode = PRIV_STACK_UNKNOWN; 5268 struct bpf_subprog_call_depth_info *dinfo; 5269 struct bpf_subprog_info *si = env->subprog_info; 5270 bool priv_stack_supported; 5271 int ret; 5272 5273 dinfo = kvcalloc(env->subprog_cnt, sizeof(*dinfo), GFP_KERNEL_ACCOUNT); 5274 if (!dinfo) 5275 return -ENOMEM; 5276 5277 for (int i = 0; i < env->subprog_cnt; i++) { 5278 if (si[i].has_tail_call) { 5279 priv_stack_mode = NO_PRIV_STACK; 5280 break; 5281 } 5282 } 5283 5284 if (priv_stack_mode == PRIV_STACK_UNKNOWN) 5285 priv_stack_mode = bpf_enable_priv_stack(env->prog); 5286 5287 /* All async_cb subprogs use normal kernel stack. If a particular 5288 * subprog appears in both main prog and async_cb subtree, that 5289 * subprog will use normal kernel stack to avoid potential nesting. 5290 * The reverse subprog traversal ensures when main prog subtree is 5291 * checked, the subprogs appearing in async_cb subtrees are already 5292 * marked as using normal kernel stack, so stack size checking can 5293 * be done properly. 5294 */ 5295 for (int i = env->subprog_cnt - 1; i >= 0; i--) { 5296 if (!i || si[i].is_async_cb) { 5297 priv_stack_supported = !i && priv_stack_mode == PRIV_STACK_ADAPTIVE; 5298 ret = check_max_stack_depth_subprog(env, i, dinfo, 5299 priv_stack_supported); 5300 if (ret < 0) { 5301 kvfree(dinfo); 5302 return ret; 5303 } 5304 } 5305 } 5306 5307 for (int i = 0; i < env->subprog_cnt; i++) { 5308 if (si[i].priv_stack_mode == PRIV_STACK_ADAPTIVE) { 5309 env->prog->aux->jits_use_priv_stack = true; 5310 break; 5311 } 5312 } 5313 5314 kvfree(dinfo); 5315 5316 return 0; 5317 } 5318 5319 static int __check_buffer_access(struct bpf_verifier_env *env, 5320 const char *buf_info, 5321 const struct bpf_reg_state *reg, 5322 argno_t argno, int off, int size) 5323 { 5324 if (off < 0) { 5325 verbose(env, 5326 "%s invalid %s buffer access: off=%d, size=%d\n", 5327 reg_arg_name(env, argno), buf_info, off, size); 5328 return -EACCES; 5329 } 5330 if (!tnum_is_const(reg->var_off)) { 5331 char tn_buf[48]; 5332 5333 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 5334 verbose(env, 5335 "%s invalid variable buffer offset: off=%d, var_off=%s\n", 5336 reg_arg_name(env, argno), off, tn_buf); 5337 return -EACCES; 5338 } 5339 5340 return 0; 5341 } 5342 5343 static int check_tp_buffer_access(struct bpf_verifier_env *env, 5344 const struct bpf_reg_state *reg, 5345 argno_t argno, int off, int size) 5346 { 5347 int err; 5348 5349 err = __check_buffer_access(env, "tracepoint", reg, argno, off, size); 5350 if (err) 5351 return err; 5352 5353 env->prog->aux->max_tp_access = max(reg->var_off.value + off + size, 5354 env->prog->aux->max_tp_access); 5355 5356 return 0; 5357 } 5358 5359 static int check_buffer_access(struct bpf_verifier_env *env, 5360 const struct bpf_reg_state *reg, 5361 argno_t argno, int off, int size, 5362 bool zero_size_allowed, 5363 u32 *max_access) 5364 { 5365 const char *buf_info = type_is_rdonly_mem(reg->type) ? "rdonly" : "rdwr"; 5366 int err; 5367 5368 err = __check_buffer_access(env, buf_info, reg, argno, off, size); 5369 if (err) 5370 return err; 5371 5372 *max_access = max(reg->var_off.value + off + size, *max_access); 5373 5374 return 0; 5375 } 5376 5377 /* BPF architecture zero extends alu32 ops into 64-bit registesr */ 5378 static void zext_32_to_64(struct bpf_reg_state *reg) 5379 { 5380 reg->var_off = tnum_subreg(reg->var_off); 5381 reg_set_urange64(reg, reg_u32_min(reg), reg_u32_max(reg)); 5382 } 5383 5384 /* truncate register to smaller size (in bytes) 5385 * must be called with size < BPF_REG_SIZE 5386 */ 5387 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size) 5388 { 5389 u64 mask; 5390 5391 /* clear high bits in bit representation */ 5392 reg->var_off = tnum_cast(reg->var_off, size); 5393 5394 /* fix arithmetic bounds */ 5395 mask = ((u64)1 << (size * 8)) - 1; 5396 if ((reg_umin(reg) & ~mask) == (reg_umax(reg) & ~mask)) 5397 reg_set_urange64(reg, reg_umin(reg) & mask, reg_umax(reg) & mask); 5398 else 5399 reg_set_urange64(reg, 0, mask); 5400 5401 /* If size is smaller than 32bit register the 32bit register 5402 * values are also truncated so we push 64-bit bounds into 5403 * 32-bit bounds. Above were truncated < 32-bits already. 5404 */ 5405 if (size < 4) 5406 __mark_reg32_unbounded(reg); 5407 5408 reg_bounds_sync(reg); 5409 } 5410 5411 static void set_sext64_default_val(struct bpf_reg_state *reg, int size) 5412 { 5413 if (size == 1) { 5414 reg_set_srange64(reg, S8_MIN, S8_MAX); 5415 reg_set_srange32(reg, S8_MIN, S8_MAX); 5416 } else if (size == 2) { 5417 reg_set_srange64(reg, S16_MIN, S16_MAX); 5418 reg_set_srange32(reg, S16_MIN, S16_MAX); 5419 } else { 5420 /* size == 4 */ 5421 reg_set_srange64(reg, S32_MIN, S32_MAX); 5422 reg_set_srange32(reg, S32_MIN, S32_MAX); 5423 } 5424 reg->var_off = tnum_unknown; 5425 } 5426 5427 static void coerce_reg_to_size_sx(struct bpf_reg_state *reg, int size) 5428 { 5429 s64 init_s64_max, init_s64_min, s64_max, s64_min, u64_cval; 5430 u64 top_smax_value, top_smin_value; 5431 u64 num_bits = size * 8; 5432 5433 if (tnum_is_const(reg->var_off)) { 5434 u64_cval = reg->var_off.value; 5435 if (size == 1) 5436 reg->var_off = tnum_const((s8)u64_cval); 5437 else if (size == 2) 5438 reg->var_off = tnum_const((s16)u64_cval); 5439 else 5440 /* size == 4 */ 5441 reg->var_off = tnum_const((s32)u64_cval); 5442 5443 u64_cval = reg->var_off.value; 5444 reg->r64 = cnum64_from_urange(u64_cval, u64_cval); 5445 reg->r32 = cnum32_from_urange((u32)u64_cval, (u32)u64_cval); 5446 return; 5447 } 5448 5449 top_smax_value = ((u64)reg_smax(reg) >> num_bits) << num_bits; 5450 top_smin_value = ((u64)reg_smin(reg) >> num_bits) << num_bits; 5451 5452 if (top_smax_value != top_smin_value) 5453 goto out; 5454 5455 /* find the s64_min and s64_min after sign extension */ 5456 if (size == 1) { 5457 init_s64_max = (s8)reg_smax(reg); 5458 init_s64_min = (s8)reg_smin(reg); 5459 } else if (size == 2) { 5460 init_s64_max = (s16)reg_smax(reg); 5461 init_s64_min = (s16)reg_smin(reg); 5462 } else { 5463 init_s64_max = (s32)reg_smax(reg); 5464 init_s64_min = (s32)reg_smin(reg); 5465 } 5466 5467 s64_max = max(init_s64_max, init_s64_min); 5468 s64_min = min(init_s64_max, init_s64_min); 5469 5470 /* both of s64_max/s64_min positive or negative */ 5471 if ((s64_max >= 0) == (s64_min >= 0)) { 5472 reg_set_srange64(reg, s64_min, s64_max); 5473 reg_set_srange32(reg, s64_min, s64_max); 5474 reg->var_off = tnum_range(s64_min, s64_max); 5475 return; 5476 } 5477 5478 out: 5479 set_sext64_default_val(reg, size); 5480 } 5481 5482 static void set_sext32_default_val(struct bpf_reg_state *reg, int size) 5483 { 5484 if (size == 1) 5485 reg_set_srange32(reg, S8_MIN, S8_MAX); 5486 else 5487 /* size == 2 */ 5488 reg_set_srange32(reg, S16_MIN, S16_MAX); 5489 reg->var_off = tnum_subreg(tnum_unknown); 5490 } 5491 5492 static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size) 5493 { 5494 s32 init_s32_max, init_s32_min, s32_max, s32_min, u32_val; 5495 u32 top_smax_value, top_smin_value; 5496 u32 num_bits = size * 8; 5497 5498 if (tnum_is_const(reg->var_off)) { 5499 u32_val = reg->var_off.value; 5500 if (size == 1) 5501 reg->var_off = tnum_const((s8)u32_val); 5502 else 5503 reg->var_off = tnum_const((s16)u32_val); 5504 5505 u32_val = reg->var_off.value; 5506 reg_set_srange32(reg, u32_val, u32_val); 5507 return; 5508 } 5509 5510 top_smax_value = ((u32)reg_s32_max(reg) >> num_bits) << num_bits; 5511 top_smin_value = ((u32)reg_s32_min(reg) >> num_bits) << num_bits; 5512 5513 if (top_smax_value != top_smin_value) 5514 goto out; 5515 5516 /* find the s32_min and s32_min after sign extension */ 5517 if (size == 1) { 5518 init_s32_max = (s8)reg_s32_max(reg); 5519 init_s32_min = (s8)reg_s32_min(reg); 5520 } else { 5521 /* size == 2 */ 5522 init_s32_max = (s16)reg_s32_max(reg); 5523 init_s32_min = (s16)reg_s32_min(reg); 5524 } 5525 s32_max = max(init_s32_max, init_s32_min); 5526 s32_min = min(init_s32_max, init_s32_min); 5527 5528 if ((s32_min >= 0) == (s32_max >= 0)) { 5529 reg_set_srange32(reg, s32_min, s32_max); 5530 reg->var_off = tnum_subreg(tnum_range(s32_min, s32_max)); 5531 return; 5532 } 5533 5534 out: 5535 set_sext32_default_val(reg, size); 5536 } 5537 5538 bool bpf_map_is_rdonly(const struct bpf_map *map) 5539 { 5540 /* A map is considered read-only if the following condition are true: 5541 * 5542 * 1) BPF program side cannot change any of the map content. The 5543 * BPF_F_RDONLY_PROG flag is throughout the lifetime of a map 5544 * and was set at map creation time. 5545 * 2) The map value(s) have been initialized from user space by a 5546 * loader and then "frozen", such that no new map update/delete 5547 * operations from syscall side are possible for the rest of 5548 * the map's lifetime from that point onwards. 5549 * 3) Any parallel/pending map update/delete operations from syscall 5550 * side have been completed. Only after that point, it's safe to 5551 * assume that map value(s) are immutable. 5552 */ 5553 return (map->map_flags & BPF_F_RDONLY_PROG) && 5554 READ_ONCE(map->frozen) && 5555 !bpf_map_write_active(map); 5556 } 5557 5558 int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val, 5559 bool is_ldsx) 5560 { 5561 void *ptr; 5562 u64 addr; 5563 int err; 5564 5565 err = map->ops->map_direct_value_addr(map, &addr, off); 5566 if (err) 5567 return err; 5568 ptr = (void *)(long)addr + off; 5569 5570 switch (size) { 5571 case sizeof(u8): 5572 *val = is_ldsx ? (s64)*(s8 *)ptr : (u64)*(u8 *)ptr; 5573 break; 5574 case sizeof(u16): 5575 *val = is_ldsx ? (s64)*(s16 *)ptr : (u64)*(u16 *)ptr; 5576 break; 5577 case sizeof(u32): 5578 *val = is_ldsx ? (s64)*(s32 *)ptr : (u64)*(u32 *)ptr; 5579 break; 5580 case sizeof(u64): 5581 *val = *(u64 *)ptr; 5582 break; 5583 default: 5584 return -EINVAL; 5585 } 5586 return 0; 5587 } 5588 5589 #define BTF_TYPE_SAFE_RCU(__type) __PASTE(__type, __safe_rcu) 5590 #define BTF_TYPE_SAFE_RCU_OR_NULL(__type) __PASTE(__type, __safe_rcu_or_null) 5591 #define BTF_TYPE_SAFE_TRUSTED(__type) __PASTE(__type, __safe_trusted) 5592 #define BTF_TYPE_SAFE_TRUSTED_OR_NULL(__type) __PASTE(__type, __safe_trusted_or_null) 5593 5594 /* 5595 * Allow list few fields as RCU trusted or full trusted. 5596 * This logic doesn't allow mix tagging and will be removed once GCC supports 5597 * btf_type_tag. 5598 */ 5599 5600 /* RCU trusted: these fields are trusted in RCU CS and never NULL */ 5601 BTF_TYPE_SAFE_RCU(struct task_struct) { 5602 const cpumask_t *cpus_ptr; 5603 struct css_set __rcu *cgroups; 5604 struct task_struct __rcu *real_parent; 5605 struct task_struct *group_leader; 5606 }; 5607 5608 BTF_TYPE_SAFE_RCU(struct cgroup) { 5609 /* cgrp->kn is always accessible as documented in kernel/cgroup/cgroup.c */ 5610 struct kernfs_node *kn; 5611 }; 5612 5613 BTF_TYPE_SAFE_RCU(struct css_set) { 5614 struct cgroup *dfl_cgrp; 5615 }; 5616 5617 BTF_TYPE_SAFE_RCU(struct cgroup_subsys_state) { 5618 struct cgroup *cgroup; 5619 }; 5620 5621 /* RCU trusted: these fields are trusted in RCU CS and can be NULL */ 5622 BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct) { 5623 struct file __rcu *exe_file; 5624 #ifdef CONFIG_MEMCG 5625 struct task_struct __rcu *owner; 5626 #endif 5627 }; 5628 5629 /* skb->sk, req->sk are not RCU protected, but we mark them as such 5630 * because bpf prog accessible sockets are SOCK_RCU_FREE. 5631 */ 5632 BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff) { 5633 struct sock *sk; 5634 }; 5635 5636 BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock) { 5637 struct sock *sk; 5638 }; 5639 5640 /* full trusted: these fields are trusted even outside of RCU CS and never NULL */ 5641 BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta) { 5642 struct seq_file *seq; 5643 }; 5644 5645 BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task) { 5646 struct bpf_iter_meta *meta; 5647 struct task_struct *task; 5648 }; 5649 5650 BTF_TYPE_SAFE_TRUSTED(struct linux_binprm) { 5651 struct file *file; 5652 }; 5653 5654 BTF_TYPE_SAFE_TRUSTED(struct file) { 5655 struct inode *f_inode; 5656 }; 5657 5658 BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry) { 5659 struct inode *d_inode; 5660 }; 5661 5662 BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket) { 5663 struct sock *sk; 5664 }; 5665 5666 BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct vm_area_struct) { 5667 struct mm_struct *vm_mm; 5668 struct file *vm_file; 5669 }; 5670 5671 static bool type_is_rcu(struct bpf_verifier_env *env, 5672 struct bpf_reg_state *reg, 5673 const char *field_name, u32 btf_id) 5674 { 5675 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct task_struct)); 5676 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct cgroup)); 5677 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct css_set)); 5678 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct cgroup_subsys_state)); 5679 5680 return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu"); 5681 } 5682 5683 static bool type_is_rcu_or_null(struct bpf_verifier_env *env, 5684 struct bpf_reg_state *reg, 5685 const char *field_name, u32 btf_id) 5686 { 5687 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct)); 5688 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff)); 5689 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock)); 5690 5691 return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu_or_null"); 5692 } 5693 5694 static bool type_is_trusted(struct bpf_verifier_env *env, 5695 struct bpf_reg_state *reg, 5696 const char *field_name, u32 btf_id) 5697 { 5698 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta)); 5699 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task)); 5700 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm)); 5701 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file)); 5702 5703 return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_trusted"); 5704 } 5705 5706 static bool type_is_trusted_or_null(struct bpf_verifier_env *env, 5707 struct bpf_reg_state *reg, 5708 const char *field_name, u32 btf_id) 5709 { 5710 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket)); 5711 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry)); 5712 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct vm_area_struct)); 5713 5714 return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, 5715 "__safe_trusted_or_null"); 5716 } 5717 5718 static int check_ptr_to_btf_access(struct bpf_verifier_env *env, 5719 struct bpf_reg_state *regs, struct bpf_reg_state *reg, 5720 argno_t argno, int off, int size, 5721 enum bpf_access_type atype, 5722 int value_regno) 5723 { 5724 const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id); 5725 const char *tname = btf_name_by_offset(reg->btf, t->name_off); 5726 const char *field_name = NULL; 5727 enum bpf_type_flag flag = 0; 5728 u32 btf_id = 0; 5729 int ret; 5730 5731 if (!env->allow_ptr_leaks) { 5732 verbose(env, 5733 "'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n", 5734 tname); 5735 return -EPERM; 5736 } 5737 if (!env->prog->gpl_compatible && btf_is_kernel(reg->btf)) { 5738 verbose(env, 5739 "Cannot access kernel 'struct %s' from non-GPL compatible program\n", 5740 tname); 5741 return -EINVAL; 5742 } 5743 5744 if (!tnum_is_const(reg->var_off)) { 5745 char tn_buf[48]; 5746 5747 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 5748 verbose(env, 5749 "%s is ptr_%s invalid variable offset: off=%d, var_off=%s\n", 5750 reg_arg_name(env, argno), tname, off, tn_buf); 5751 return -EACCES; 5752 } 5753 5754 off += reg->var_off.value; 5755 5756 if (off < 0) { 5757 verbose(env, 5758 "%s is ptr_%s invalid negative access: off=%d\n", 5759 reg_arg_name(env, argno), tname, off); 5760 return -EACCES; 5761 } 5762 5763 if (reg->type & MEM_USER) { 5764 verbose(env, 5765 "%s is ptr_%s access user memory: off=%d\n", 5766 reg_arg_name(env, argno), tname, off); 5767 return -EACCES; 5768 } 5769 5770 if (reg->type & MEM_PERCPU) { 5771 verbose(env, 5772 "%s is ptr_%s access percpu memory: off=%d\n", 5773 reg_arg_name(env, argno), tname, off); 5774 return -EACCES; 5775 } 5776 5777 if (env->ops->btf_struct_access && !type_is_alloc(reg->type) && atype == BPF_WRITE) { 5778 if (!btf_is_kernel(reg->btf)) { 5779 verifier_bug(env, "reg->btf must be kernel btf"); 5780 return -EFAULT; 5781 } 5782 ret = env->ops->btf_struct_access(&env->log, reg, off, size); 5783 } else { 5784 /* Writes are permitted with default btf_struct_access for 5785 * program allocated objects (which always have id > 0), 5786 * but not for untrusted PTR_TO_BTF_ID | MEM_ALLOC. 5787 */ 5788 if (atype != BPF_READ && !type_is_ptr_alloc_obj(reg->type)) { 5789 verbose(env, "only read is supported\n"); 5790 return -EACCES; 5791 } 5792 5793 if (type_is_alloc(reg->type) && !type_is_non_owning_ref(reg->type) && 5794 !(reg->type & MEM_RCU) && !reg_is_referenced(env, reg)) { 5795 verifier_bug(env, "allocated object must have a referenced id"); 5796 return -EFAULT; 5797 } 5798 5799 ret = btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag, &field_name); 5800 } 5801 5802 if (ret < 0) 5803 return ret; 5804 5805 if (ret != PTR_TO_BTF_ID) { 5806 /* just mark; */ 5807 5808 } else if (type_flag(reg->type) & PTR_UNTRUSTED) { 5809 /* If this is an untrusted pointer, all pointers formed by walking it 5810 * also inherit the untrusted flag. 5811 */ 5812 flag = PTR_UNTRUSTED; 5813 5814 } else if (is_trusted_reg(env, reg) || is_rcu_reg(reg)) { 5815 /* By default any pointer obtained from walking a trusted pointer is no 5816 * longer trusted, unless the field being accessed has explicitly been 5817 * marked as inheriting its parent's state of trust (either full or RCU). 5818 * For example: 5819 * 'cgroups' pointer is untrusted if task->cgroups dereference 5820 * happened in a sleepable program outside of bpf_rcu_read_lock() 5821 * section. In a non-sleepable program it's trusted while in RCU CS (aka MEM_RCU). 5822 * Note bpf_rcu_read_unlock() converts MEM_RCU pointers to PTR_UNTRUSTED. 5823 * 5824 * A regular RCU-protected pointer with __rcu tag can also be deemed 5825 * trusted if we are in an RCU CS. Such pointer can be NULL. 5826 */ 5827 if (type_is_trusted(env, reg, field_name, btf_id)) { 5828 flag |= PTR_TRUSTED; 5829 } else if (type_is_trusted_or_null(env, reg, field_name, btf_id)) { 5830 flag |= PTR_TRUSTED | PTR_MAYBE_NULL; 5831 } else if (in_rcu_cs(env) && !type_may_be_null(reg->type)) { 5832 if (type_is_rcu(env, reg, field_name, btf_id)) { 5833 /* ignore __rcu tag and mark it MEM_RCU */ 5834 flag |= MEM_RCU; 5835 } else if (flag & MEM_RCU || 5836 type_is_rcu_or_null(env, reg, field_name, btf_id)) { 5837 /* __rcu tagged pointers can be NULL */ 5838 flag |= MEM_RCU | PTR_MAYBE_NULL; 5839 5840 /* We always trust them */ 5841 if (type_is_rcu_or_null(env, reg, field_name, btf_id) && 5842 flag & PTR_UNTRUSTED) 5843 flag &= ~PTR_UNTRUSTED; 5844 } else if (flag & (MEM_PERCPU | MEM_USER)) { 5845 /* keep as-is */ 5846 } else { 5847 /* walking unknown pointers yields old deprecated PTR_TO_BTF_ID */ 5848 clear_trusted_flags(&flag); 5849 } 5850 } else { 5851 /* 5852 * If not in RCU CS or MEM_RCU pointer can be NULL then 5853 * aggressively mark as untrusted otherwise such 5854 * pointers will be plain PTR_TO_BTF_ID without flags 5855 * and will be allowed to be passed into helpers for 5856 * compat reasons. 5857 */ 5858 flag = PTR_UNTRUSTED; 5859 } 5860 } else { 5861 /* Old compat. Deprecated */ 5862 clear_trusted_flags(&flag); 5863 } 5864 5865 if (atype == BPF_READ && value_regno >= 0) { 5866 ret = mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag); 5867 if (ret < 0) 5868 return ret; 5869 } 5870 5871 return 0; 5872 } 5873 5874 static int check_ptr_to_map_access(struct bpf_verifier_env *env, 5875 struct bpf_reg_state *regs, struct bpf_reg_state *reg, 5876 argno_t argno, int off, int size, 5877 enum bpf_access_type atype, 5878 int value_regno) 5879 { 5880 struct bpf_map *map = reg->map_ptr; 5881 struct bpf_reg_state map_reg; 5882 enum bpf_type_flag flag = 0; 5883 const struct btf_type *t; 5884 const char *tname; 5885 u32 btf_id; 5886 int ret; 5887 5888 if (!btf_vmlinux) { 5889 verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n"); 5890 return -ENOTSUPP; 5891 } 5892 5893 if (!map->ops->map_btf_id || !*map->ops->map_btf_id) { 5894 verbose(env, "map_ptr access not supported for map type %d\n", 5895 map->map_type); 5896 return -ENOTSUPP; 5897 } 5898 5899 t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id); 5900 tname = btf_name_by_offset(btf_vmlinux, t->name_off); 5901 5902 if (!env->allow_ptr_leaks) { 5903 verbose(env, 5904 "'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n", 5905 tname); 5906 return -EPERM; 5907 } 5908 5909 if (off < 0) { 5910 verbose(env, "%s is %s invalid negative access: off=%d\n", 5911 reg_arg_name(env, argno), tname, off); 5912 return -EACCES; 5913 } 5914 5915 if (atype != BPF_READ) { 5916 verbose(env, "only read from %s is supported\n", tname); 5917 return -EACCES; 5918 } 5919 5920 /* Simulate access to a PTR_TO_BTF_ID */ 5921 memset(&map_reg, 0, sizeof(map_reg)); 5922 ret = mark_btf_ld_reg(env, &map_reg, 0, PTR_TO_BTF_ID, 5923 btf_vmlinux, *map->ops->map_btf_id, 0); 5924 if (ret < 0) 5925 return ret; 5926 ret = btf_struct_access(&env->log, &map_reg, off, size, atype, &btf_id, &flag, NULL); 5927 if (ret < 0) 5928 return ret; 5929 5930 if (value_regno >= 0) { 5931 ret = mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, flag); 5932 if (ret < 0) 5933 return ret; 5934 } 5935 5936 return 0; 5937 } 5938 5939 /* Check that the stack access at the given offset is within bounds. The 5940 * maximum valid offset is -1. 5941 * 5942 * The minimum valid offset is -MAX_BPF_STACK for writes, and 5943 * -state->allocated_stack for reads. 5944 */ 5945 static int check_stack_slot_within_bounds(struct bpf_verifier_env *env, 5946 s64 off, 5947 struct bpf_func_state *state, 5948 enum bpf_access_type t) 5949 { 5950 int min_valid_off; 5951 5952 if (t == BPF_WRITE || env->allow_uninit_stack) 5953 min_valid_off = -MAX_BPF_STACK; 5954 else 5955 min_valid_off = -state->allocated_stack; 5956 5957 if (off < min_valid_off || off > -1) 5958 return -EACCES; 5959 return 0; 5960 } 5961 5962 /* Check that the stack access at 'regno + off' falls within the maximum stack 5963 * bounds. 5964 * 5965 * 'off' includes `regno->offset`, but not its dynamic part (if any). 5966 */ 5967 static int check_stack_access_within_bounds( 5968 struct bpf_verifier_env *env, struct bpf_reg_state *reg, 5969 argno_t argno, int off, int access_size, 5970 enum bpf_access_type type) 5971 { 5972 struct bpf_func_state *state = bpf_func(env, reg); 5973 s64 min_off, max_off; 5974 int err; 5975 char *err_extra; 5976 5977 if (type == BPF_READ) 5978 err_extra = " read from"; 5979 else 5980 err_extra = " write to"; 5981 5982 if (tnum_is_const(reg->var_off)) { 5983 min_off = (s64)reg->var_off.value + off; 5984 max_off = min_off + access_size; 5985 } else { 5986 if (reg_smax(reg) >= BPF_MAX_VAR_OFF || 5987 reg_smin(reg) <= -BPF_MAX_VAR_OFF) { 5988 verbose(env, "invalid unbounded variable-offset%s stack %s\n", 5989 err_extra, reg_arg_name(env, argno)); 5990 return -EACCES; 5991 } 5992 min_off = reg_smin(reg) + off; 5993 max_off = reg_smax(reg) + off + access_size; 5994 } 5995 5996 err = check_stack_slot_within_bounds(env, min_off, state, type); 5997 if (!err && max_off > 0) 5998 err = -EINVAL; /* out of stack access into non-negative offsets */ 5999 if (!err && access_size < 0) 6000 /* access_size should not be negative (or overflow an int); others checks 6001 * along the way should have prevented such an access. 6002 */ 6003 err = -EFAULT; /* invalid negative access size; integer overflow? */ 6004 6005 if (err) { 6006 if (tnum_is_const(reg->var_off)) { 6007 verbose(env, "invalid%s stack %s off=%lld size=%d\n", 6008 err_extra, reg_arg_name(env, argno), min_off, access_size); 6009 } else { 6010 char tn_buf[48]; 6011 6012 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 6013 verbose(env, "invalid variable-offset%s stack %s var_off=%s off=%d size=%d\n", 6014 err_extra, reg_arg_name(env, argno), tn_buf, off, access_size); 6015 } 6016 return err; 6017 } 6018 6019 /* Note that there is no stack access with offset zero, so the needed stack 6020 * size is -min_off, not -min_off+1. 6021 */ 6022 return grow_stack_state(env, state, -min_off /* size */); 6023 } 6024 6025 static bool get_func_retval_range(struct bpf_prog *prog, 6026 struct bpf_retval_range *range) 6027 { 6028 if (prog->type == BPF_PROG_TYPE_LSM && 6029 prog->expected_attach_type == BPF_LSM_MAC && 6030 !bpf_lsm_get_retval_range(prog, range)) { 6031 return true; 6032 } 6033 return false; 6034 } 6035 6036 static void add_scalar_to_reg(struct bpf_reg_state *dst_reg, s64 val) 6037 { 6038 struct bpf_reg_state fake_reg; 6039 6040 if (!val) 6041 return; 6042 6043 fake_reg.type = SCALAR_VALUE; 6044 __mark_reg_known(&fake_reg, val); 6045 6046 scalar32_min_max_add(dst_reg, &fake_reg); 6047 scalar_min_max_add(dst_reg, &fake_reg); 6048 dst_reg->var_off = tnum_add(dst_reg->var_off, fake_reg.var_off); 6049 6050 reg_bounds_sync(dst_reg); 6051 } 6052 6053 /* check whether memory at (regno + off) is accessible for t = (read | write) 6054 * if t==write, value_regno is a register which value is stored into memory 6055 * if t==read, value_regno is a register which will receive the value from memory 6056 * if t==write && value_regno==-1, some unknown value is stored into memory 6057 * if t==read && value_regno==-1, don't care what we read from memory 6058 */ 6059 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, struct bpf_reg_state *reg, argno_t argno, 6060 int off, int bpf_size, enum bpf_access_type t, 6061 int value_regno, bool strict_alignment_once, bool is_ldsx) 6062 { 6063 struct bpf_reg_state *regs = cur_regs(env); 6064 int size, err = 0; 6065 6066 size = bpf_size_to_bytes(bpf_size); 6067 if (size < 0) 6068 return size; 6069 6070 err = check_ptr_alignment(env, reg, off, size, strict_alignment_once); 6071 if (err) 6072 return err; 6073 6074 if (reg->type == PTR_TO_MAP_KEY) { 6075 if (t == BPF_WRITE) { 6076 verbose(env, "write to change key %s not allowed\n", 6077 reg_arg_name(env, argno)); 6078 return -EACCES; 6079 } 6080 6081 err = check_mem_region_access(env, reg, argno, off, size, 6082 reg->map_ptr->key_size, false); 6083 if (err) 6084 return err; 6085 if (value_regno >= 0) 6086 mark_reg_unknown(env, regs, value_regno); 6087 } else if (reg->type == PTR_TO_MAP_VALUE) { 6088 struct btf_field *kptr_field = NULL; 6089 6090 if (t == BPF_WRITE && value_regno >= 0 && 6091 is_pointer_value(env, value_regno)) { 6092 verbose(env, "R%d leaks addr into map\n", value_regno); 6093 return -EACCES; 6094 } 6095 err = check_map_access_type(env, reg, off, size, t); 6096 if (err) 6097 return err; 6098 err = check_map_access(env, reg, argno, off, size, false, ACCESS_DIRECT); 6099 if (err) 6100 return err; 6101 if (tnum_is_const(reg->var_off)) 6102 kptr_field = btf_record_find(reg->map_ptr->record, 6103 off + reg->var_off.value, BPF_KPTR | BPF_UPTR); 6104 if (kptr_field) { 6105 err = check_map_kptr_access(env, value_regno, insn_idx, kptr_field); 6106 } else if (t == BPF_READ && value_regno >= 0) { 6107 struct bpf_map *map = reg->map_ptr; 6108 6109 /* 6110 * If map is read-only, track its contents as scalars, 6111 * unless it is an insn array (see the special case below) 6112 */ 6113 if (tnum_is_const(reg->var_off) && 6114 bpf_map_is_rdonly(map) && 6115 map->ops->map_direct_value_addr && 6116 map->map_type != BPF_MAP_TYPE_INSN_ARRAY) { 6117 int map_off = off + reg->var_off.value; 6118 u64 val = 0; 6119 6120 err = bpf_map_direct_read(map, map_off, size, 6121 &val, is_ldsx); 6122 if (err) 6123 return err; 6124 6125 regs[value_regno].type = SCALAR_VALUE; 6126 __mark_reg_known(®s[value_regno], val); 6127 } else if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) { 6128 if (bpf_size != BPF_DW) { 6129 verbose(env, "Invalid read of %d bytes from insn_array\n", 6130 size); 6131 return -EACCES; 6132 } 6133 regs[value_regno] = *reg; 6134 add_scalar_to_reg(®s[value_regno], off); 6135 regs[value_regno].type = PTR_TO_INSN; 6136 } else { 6137 mark_reg_unknown(env, regs, value_regno); 6138 } 6139 } 6140 } else if (base_type(reg->type) == PTR_TO_MEM) { 6141 bool rdonly_mem = type_is_rdonly_mem(reg->type); 6142 bool rdonly_untrusted = rdonly_mem && (reg->type & PTR_UNTRUSTED); 6143 6144 if (type_may_be_null(reg->type)) { 6145 verbose(env, "%s invalid mem access '%s'\n", reg_arg_name(env, argno), 6146 reg_type_str(env, reg->type)); 6147 return -EACCES; 6148 } 6149 6150 if (t == BPF_WRITE && rdonly_mem) { 6151 verbose(env, "%s cannot write into %s\n", 6152 reg_arg_name(env, argno), reg_type_str(env, reg->type)); 6153 return -EACCES; 6154 } 6155 6156 if (t == BPF_WRITE && value_regno >= 0 && 6157 is_pointer_value(env, value_regno)) { 6158 verbose(env, "R%d leaks addr into mem\n", value_regno); 6159 return -EACCES; 6160 } 6161 6162 /* 6163 * Accesses to untrusted PTR_TO_MEM are done through probe 6164 * instructions, hence no need to check bounds in that case. 6165 */ 6166 if (!rdonly_untrusted) 6167 err = check_mem_region_access(env, reg, argno, off, size, 6168 reg->mem_size, false); 6169 if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem)) 6170 mark_reg_unknown(env, regs, value_regno); 6171 } else if (reg->type == PTR_TO_CTX) { 6172 struct bpf_insn_access_aux info = { 6173 .reg_type = SCALAR_VALUE, 6174 .is_ldsx = is_ldsx, 6175 .log = &env->log, 6176 }; 6177 struct bpf_retval_range range; 6178 6179 if (t == BPF_WRITE && value_regno >= 0 && 6180 is_pointer_value(env, value_regno)) { 6181 verbose(env, "R%d leaks addr into ctx\n", value_regno); 6182 return -EACCES; 6183 } 6184 6185 err = check_ctx_access(env, insn_idx, reg, argno, off, size, t, &info); 6186 if (!err && t == BPF_READ && value_regno >= 0) { 6187 /* ctx access returns either a scalar, or a 6188 * PTR_TO_PACKET[_META,_END]. In the latter 6189 * case, we know the offset is zero. 6190 */ 6191 if (info.reg_type == SCALAR_VALUE) { 6192 if (info.is_retval && get_func_retval_range(env->prog, &range)) { 6193 err = __mark_reg_s32_range(env, regs, value_regno, 6194 range.minval, range.maxval); 6195 if (err) 6196 return err; 6197 } else { 6198 mark_reg_unknown(env, regs, value_regno); 6199 } 6200 } else { 6201 mark_reg_known_zero(env, regs, 6202 value_regno); 6203 /* A load of ctx field could have different 6204 * actual load size with the one encoded in the 6205 * insn. When the dst is PTR, it is for sure not 6206 * a sub-register. 6207 */ 6208 regs[value_regno].subreg_def = DEF_NOT_SUBREG; 6209 if (base_type(info.reg_type) == PTR_TO_BTF_ID) { 6210 regs[value_regno].btf = info.btf; 6211 regs[value_regno].btf_id = info.btf_id; 6212 regs[value_regno].id = info.ref_id; 6213 } 6214 if (type_may_be_null(info.reg_type) && !regs[value_regno].id) 6215 regs[value_regno].id = ++env->id_gen; 6216 } 6217 regs[value_regno].type = info.reg_type; 6218 } 6219 6220 } else if (reg->type == PTR_TO_STACK) { 6221 /* Basic bounds checks. */ 6222 err = check_stack_access_within_bounds(env, reg, argno, off, size, t); 6223 if (err) 6224 return err; 6225 6226 if (t == BPF_READ) 6227 err = check_stack_read(env, reg, argno, off, size, 6228 value_regno); 6229 else 6230 err = check_stack_write(env, reg, off, size, 6231 value_regno, insn_idx); 6232 } else if (reg_is_pkt_pointer(reg)) { 6233 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) { 6234 verbose(env, "cannot write into packet\n"); 6235 return -EACCES; 6236 } 6237 if (t == BPF_WRITE && value_regno >= 0 && 6238 is_pointer_value(env, value_regno)) { 6239 verbose(env, "R%d leaks addr into packet\n", 6240 value_regno); 6241 return -EACCES; 6242 } 6243 err = check_packet_access(env, reg, argno, off, size, false); 6244 if (!err && t == BPF_READ && value_regno >= 0) 6245 mark_reg_unknown(env, regs, value_regno); 6246 } else if (reg->type == PTR_TO_FLOW_KEYS) { 6247 if (t == BPF_WRITE && value_regno >= 0 && 6248 is_pointer_value(env, value_regno)) { 6249 verbose(env, "R%d leaks addr into flow keys\n", 6250 value_regno); 6251 return -EACCES; 6252 } 6253 6254 err = check_flow_keys_access(env, reg, argno, off, size); 6255 if (!err && t == BPF_READ && value_regno >= 0) 6256 mark_reg_unknown(env, regs, value_regno); 6257 } else if (type_is_sk_pointer(reg->type)) { 6258 if (t == BPF_WRITE) { 6259 verbose(env, "%s cannot write into %s\n", 6260 reg_arg_name(env, argno), reg_type_str(env, reg->type)); 6261 return -EACCES; 6262 } 6263 err = check_sock_access(env, insn_idx, reg, argno, off, size, t); 6264 if (!err && value_regno >= 0) 6265 mark_reg_unknown(env, regs, value_regno); 6266 } else if (reg->type == PTR_TO_TP_BUFFER) { 6267 err = check_tp_buffer_access(env, reg, argno, off, size); 6268 if (!err && t == BPF_READ && value_regno >= 0) 6269 mark_reg_unknown(env, regs, value_regno); 6270 } else if (base_type(reg->type) == PTR_TO_BTF_ID && 6271 !type_may_be_null(reg->type)) { 6272 err = check_ptr_to_btf_access(env, regs, reg, argno, off, size, t, 6273 value_regno); 6274 } else if (reg->type == CONST_PTR_TO_MAP) { 6275 err = check_ptr_to_map_access(env, regs, reg, argno, off, size, t, 6276 value_regno); 6277 } else if (base_type(reg->type) == PTR_TO_BUF && 6278 !type_may_be_null(reg->type)) { 6279 bool rdonly_mem = type_is_rdonly_mem(reg->type); 6280 u32 *max_access; 6281 6282 if (rdonly_mem) { 6283 if (t == BPF_WRITE) { 6284 verbose(env, "%s cannot write into %s\n", 6285 reg_arg_name(env, argno), reg_type_str(env, reg->type)); 6286 return -EACCES; 6287 } 6288 max_access = &env->prog->aux->max_rdonly_access; 6289 } else { 6290 max_access = &env->prog->aux->max_rdwr_access; 6291 } 6292 6293 err = check_buffer_access(env, reg, argno, off, size, false, 6294 max_access); 6295 6296 if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ)) 6297 mark_reg_unknown(env, regs, value_regno); 6298 } else if (reg->type == PTR_TO_ARENA) { 6299 if (t == BPF_READ && value_regno >= 0) 6300 mark_reg_unknown(env, regs, value_regno); 6301 } else { 6302 verbose(env, "%s invalid mem access '%s'\n", reg_arg_name(env, argno), 6303 reg_type_str(env, reg->type)); 6304 return -EACCES; 6305 } 6306 6307 if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ && 6308 regs[value_regno].type == SCALAR_VALUE) { 6309 if (!is_ldsx) 6310 /* b/h/w load zero-extends, mark upper bits as known 0 */ 6311 coerce_reg_to_size(®s[value_regno], size); 6312 else 6313 coerce_reg_to_size_sx(®s[value_regno], size); 6314 } 6315 return err; 6316 } 6317 6318 static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type, 6319 bool allow_trust_mismatch); 6320 6321 static int check_load_mem(struct bpf_verifier_env *env, struct bpf_insn *insn, 6322 bool strict_alignment_once, bool is_ldsx, 6323 bool allow_trust_mismatch, const char *ctx) 6324 { 6325 struct bpf_verifier_state *vstate = env->cur_state; 6326 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 6327 struct bpf_reg_state *regs = cur_regs(env); 6328 enum bpf_reg_type src_reg_type; 6329 int err; 6330 6331 /* Handle stack arg read */ 6332 if (is_stack_arg_ldx(insn)) { 6333 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); 6334 if (err) 6335 return err; 6336 return check_stack_arg_read(env, state, insn->off, insn->dst_reg); 6337 } 6338 6339 /* check src operand */ 6340 err = check_reg_arg(env, insn->src_reg, SRC_OP); 6341 if (err) 6342 return err; 6343 6344 /* check dst operand */ 6345 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); 6346 if (err) 6347 return err; 6348 6349 src_reg_type = regs[insn->src_reg].type; 6350 6351 /* Check if (src_reg + off) is readable. The state of dst_reg will be 6352 * updated by this call. 6353 */ 6354 err = check_mem_access(env, env->insn_idx, regs + insn->src_reg, argno_from_reg(insn->src_reg), insn->off, 6355 BPF_SIZE(insn->code), BPF_READ, insn->dst_reg, 6356 strict_alignment_once, is_ldsx); 6357 err = err ?: save_aux_ptr_type(env, src_reg_type, 6358 allow_trust_mismatch); 6359 err = err ?: reg_bounds_sanity_check(env, ®s[insn->dst_reg], ctx); 6360 6361 return err; 6362 } 6363 6364 static int check_store_reg(struct bpf_verifier_env *env, struct bpf_insn *insn, 6365 bool strict_alignment_once) 6366 { 6367 struct bpf_verifier_state *vstate = env->cur_state; 6368 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 6369 struct bpf_reg_state *regs = cur_regs(env); 6370 enum bpf_reg_type dst_reg_type; 6371 int err; 6372 6373 /* Handle stack arg write */ 6374 if (is_stack_arg_stx(insn)) { 6375 err = check_reg_arg(env, insn->src_reg, SRC_OP); 6376 if (err) 6377 return err; 6378 return check_stack_arg_write(env, state, insn->off, regs + insn->src_reg); 6379 } 6380 6381 /* check src1 operand */ 6382 err = check_reg_arg(env, insn->src_reg, SRC_OP); 6383 if (err) 6384 return err; 6385 6386 /* check src2 operand */ 6387 err = check_reg_arg(env, insn->dst_reg, SRC_OP); 6388 if (err) 6389 return err; 6390 6391 dst_reg_type = regs[insn->dst_reg].type; 6392 6393 /* Check if (dst_reg + off) is writeable. */ 6394 err = check_mem_access(env, env->insn_idx, regs + insn->dst_reg, argno_from_reg(insn->dst_reg), insn->off, 6395 BPF_SIZE(insn->code), BPF_WRITE, insn->src_reg, 6396 strict_alignment_once, false); 6397 err = err ?: save_aux_ptr_type(env, dst_reg_type, false); 6398 6399 return err; 6400 } 6401 6402 static int check_atomic_rmw(struct bpf_verifier_env *env, 6403 struct bpf_insn *insn) 6404 { 6405 struct bpf_reg_state *dst_reg; 6406 int load_reg; 6407 int err; 6408 6409 if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) { 6410 verbose(env, "invalid atomic operand size\n"); 6411 return -EINVAL; 6412 } 6413 6414 /* check src1 operand */ 6415 err = check_reg_arg(env, insn->src_reg, SRC_OP); 6416 if (err) 6417 return err; 6418 6419 /* check src2 operand */ 6420 err = check_reg_arg(env, insn->dst_reg, SRC_OP); 6421 if (err) 6422 return err; 6423 6424 if (insn->imm == BPF_CMPXCHG) { 6425 /* Check comparison of R0 with memory location */ 6426 const u32 aux_reg = BPF_REG_0; 6427 6428 err = check_reg_arg(env, aux_reg, SRC_OP); 6429 if (err) 6430 return err; 6431 6432 if (is_pointer_value(env, aux_reg)) { 6433 verbose(env, "R%d leaks addr into mem\n", aux_reg); 6434 return -EACCES; 6435 } 6436 } 6437 6438 if (is_pointer_value(env, insn->src_reg)) { 6439 verbose(env, "R%d leaks addr into mem\n", insn->src_reg); 6440 return -EACCES; 6441 } 6442 6443 if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) { 6444 verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n", 6445 insn->dst_reg, 6446 reg_type_str(env, reg_state(env, insn->dst_reg)->type)); 6447 return -EACCES; 6448 } 6449 6450 if (insn->imm & BPF_FETCH) { 6451 if (insn->imm == BPF_CMPXCHG) 6452 load_reg = BPF_REG_0; 6453 else 6454 load_reg = insn->src_reg; 6455 6456 /* check and record load of old value */ 6457 err = check_reg_arg(env, load_reg, DST_OP); 6458 if (err) 6459 return err; 6460 } else { 6461 /* This instruction accesses a memory location but doesn't 6462 * actually load it into a register. 6463 */ 6464 load_reg = -1; 6465 } 6466 6467 dst_reg = cur_regs(env) + insn->dst_reg; 6468 6469 /* Check whether we can read the memory, with second call for fetch 6470 * case to simulate the register fill. 6471 */ 6472 err = check_mem_access(env, env->insn_idx, dst_reg, argno_from_reg(insn->dst_reg), insn->off, 6473 BPF_SIZE(insn->code), BPF_READ, -1, true, false); 6474 if (!err && load_reg >= 0) 6475 err = check_mem_access(env, env->insn_idx, dst_reg, argno_from_reg(insn->dst_reg), 6476 insn->off, BPF_SIZE(insn->code), 6477 BPF_READ, load_reg, true, false); 6478 if (err) 6479 return err; 6480 6481 if (is_arena_reg(env, insn->dst_reg)) { 6482 err = save_aux_ptr_type(env, PTR_TO_ARENA, false); 6483 if (err) 6484 return err; 6485 } 6486 /* Check whether we can write into the same memory. */ 6487 err = check_mem_access(env, env->insn_idx, dst_reg, argno_from_reg(insn->dst_reg), insn->off, 6488 BPF_SIZE(insn->code), BPF_WRITE, -1, true, false); 6489 if (err) 6490 return err; 6491 return 0; 6492 } 6493 6494 static int check_atomic_load(struct bpf_verifier_env *env, 6495 struct bpf_insn *insn) 6496 { 6497 int err; 6498 6499 err = check_load_mem(env, insn, true, false, false, "atomic_load"); 6500 if (err) 6501 return err; 6502 6503 if (!atomic_ptr_type_ok(env, insn->src_reg, insn)) { 6504 verbose(env, "BPF_ATOMIC loads from R%d %s is not allowed\n", 6505 insn->src_reg, 6506 reg_type_str(env, reg_state(env, insn->src_reg)->type)); 6507 return -EACCES; 6508 } 6509 6510 return 0; 6511 } 6512 6513 static int check_atomic_store(struct bpf_verifier_env *env, 6514 struct bpf_insn *insn) 6515 { 6516 int err; 6517 6518 err = check_store_reg(env, insn, true); 6519 if (err) 6520 return err; 6521 6522 if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) { 6523 verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n", 6524 insn->dst_reg, 6525 reg_type_str(env, reg_state(env, insn->dst_reg)->type)); 6526 return -EACCES; 6527 } 6528 6529 return 0; 6530 } 6531 6532 static int check_atomic(struct bpf_verifier_env *env, struct bpf_insn *insn) 6533 { 6534 switch (insn->imm) { 6535 case BPF_ADD: 6536 case BPF_ADD | BPF_FETCH: 6537 case BPF_AND: 6538 case BPF_AND | BPF_FETCH: 6539 case BPF_OR: 6540 case BPF_OR | BPF_FETCH: 6541 case BPF_XOR: 6542 case BPF_XOR | BPF_FETCH: 6543 case BPF_XCHG: 6544 case BPF_CMPXCHG: 6545 return check_atomic_rmw(env, insn); 6546 case BPF_LOAD_ACQ: 6547 if (BPF_SIZE(insn->code) == BPF_DW && BITS_PER_LONG != 64) { 6548 verbose(env, 6549 "64-bit load-acquires are only supported on 64-bit arches\n"); 6550 return -EOPNOTSUPP; 6551 } 6552 return check_atomic_load(env, insn); 6553 case BPF_STORE_REL: 6554 if (BPF_SIZE(insn->code) == BPF_DW && BITS_PER_LONG != 64) { 6555 verbose(env, 6556 "64-bit store-releases are only supported on 64-bit arches\n"); 6557 return -EOPNOTSUPP; 6558 } 6559 return check_atomic_store(env, insn); 6560 default: 6561 verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", 6562 insn->imm); 6563 return -EINVAL; 6564 } 6565 } 6566 6567 /* When register 'regno' is used to read the stack (either directly or through 6568 * a helper function) make sure that it's within stack boundary and, depending 6569 * on the access type and privileges, that all elements of the stack are 6570 * initialized. 6571 * 6572 * All registers that have been spilled on the stack in the slots within the 6573 * read offsets are marked as read. 6574 */ 6575 static int check_stack_range_initialized( 6576 struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, int off, 6577 int access_size, bool zero_size_allowed, 6578 enum bpf_access_type type, struct bpf_call_arg_meta *meta) 6579 { 6580 struct bpf_func_state *state = bpf_func(env, reg); 6581 int err, min_off, max_off, i, j, slot, spi; 6582 /* Some accesses can write anything into the stack, others are 6583 * read-only. 6584 */ 6585 bool clobber = type == BPF_WRITE; 6586 /* 6587 * Negative access_size signals global subprog/kfunc arg check where 6588 * STACK_POISON slots are acceptable. static stack liveness 6589 * might have determined that subprog doesn't read them, 6590 * but BTF based global subprog validation isn't accurate enough. 6591 */ 6592 bool allow_poison = access_size < 0 || clobber; 6593 6594 access_size = abs(access_size); 6595 6596 if (access_size == 0 && !zero_size_allowed) { 6597 verbose(env, "invalid zero-sized read\n"); 6598 return -EACCES; 6599 } 6600 6601 err = check_stack_access_within_bounds(env, reg, argno, off, access_size, type); 6602 if (err) 6603 return err; 6604 6605 6606 if (tnum_is_const(reg->var_off)) { 6607 min_off = max_off = reg->var_off.value + off; 6608 } else { 6609 /* Variable offset is prohibited for unprivileged mode for 6610 * simplicity since it requires corresponding support in 6611 * Spectre masking for stack ALU. 6612 * See also retrieve_ptr_limit(). 6613 */ 6614 if (!env->bypass_spec_v1) { 6615 char tn_buf[48]; 6616 6617 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 6618 verbose(env, "%s variable offset stack access prohibited for !root, var_off=%s\n", 6619 reg_arg_name(env, argno), tn_buf); 6620 return -EACCES; 6621 } 6622 /* Only initialized buffer on stack is allowed to be accessed 6623 * with variable offset. With uninitialized buffer it's hard to 6624 * guarantee that whole memory is marked as initialized on 6625 * helper return since specific bounds are unknown what may 6626 * cause uninitialized stack leaking. 6627 */ 6628 if (meta && meta->raw_mode) 6629 meta = NULL; 6630 6631 min_off = reg_smin(reg) + off; 6632 max_off = reg_smax(reg) + off; 6633 } 6634 6635 if (meta && meta->raw_mode) { 6636 /* Ensure we won't be overwriting dynptrs when simulating byte 6637 * by byte access in check_helper_call using meta.access_size. 6638 * This would be a problem if we have a helper in the future 6639 * which takes: 6640 * 6641 * helper(uninit_mem, len, dynptr) 6642 * 6643 * Now, uninint_mem may overlap with dynptr pointer. Hence, it 6644 * may end up writing to dynptr itself when touching memory from 6645 * arg 1. This can be relaxed on a case by case basis for known 6646 * safe cases, but reject due to the possibilitiy of aliasing by 6647 * default. 6648 */ 6649 for (i = min_off; i < max_off + access_size; i++) { 6650 int stack_off = -i - 1; 6651 6652 spi = bpf_get_spi(i); 6653 /* raw_mode may write past allocated_stack */ 6654 if (state->allocated_stack <= stack_off) 6655 continue; 6656 if (state->stack[spi].slot_type[stack_off % BPF_REG_SIZE] == STACK_DYNPTR) { 6657 verbose(env, "potential write to dynptr at off=%d disallowed\n", i); 6658 return -EACCES; 6659 } 6660 } 6661 meta->access_size = access_size; 6662 meta->regno = reg_from_argno(argno); 6663 return 0; 6664 } 6665 6666 for (i = min_off; i < max_off + access_size; i++) { 6667 u8 *stype; 6668 6669 slot = -i - 1; 6670 spi = slot / BPF_REG_SIZE; 6671 if (state->allocated_stack <= slot) { 6672 verbose(env, "allocated_stack too small\n"); 6673 return -EFAULT; 6674 } 6675 6676 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE]; 6677 if (*stype == STACK_MISC) 6678 goto mark; 6679 if ((*stype == STACK_ZERO) || 6680 (*stype == STACK_INVALID && env->allow_uninit_stack)) { 6681 if (clobber) { 6682 /* helper can write anything into the stack */ 6683 *stype = STACK_MISC; 6684 } 6685 goto mark; 6686 } 6687 6688 if (bpf_is_spilled_reg(&state->stack[spi]) && 6689 (state->stack[spi].spilled_ptr.type == SCALAR_VALUE || 6690 env->allow_ptr_leaks)) { 6691 if (clobber) { 6692 __mark_reg_unknown(env, &state->stack[spi].spilled_ptr); 6693 for (j = 0; j < BPF_REG_SIZE; j++) 6694 scrub_spilled_slot(&state->stack[spi].slot_type[j]); 6695 } 6696 goto mark; 6697 } 6698 6699 if (*stype == STACK_POISON) { 6700 if (allow_poison) 6701 goto mark; 6702 verbose(env, "reading from stack %s off %d+%d size %d, slot poisoned by dead code elimination\n", 6703 reg_arg_name(env, argno), min_off, i - min_off, access_size); 6704 } else if (tnum_is_const(reg->var_off)) { 6705 verbose(env, "invalid read from stack %s off %d+%d size %d\n", 6706 reg_arg_name(env, argno), min_off, i - min_off, access_size); 6707 } else { 6708 char tn_buf[48]; 6709 6710 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 6711 verbose(env, "invalid read from stack %s var_off %s+%d size %d\n", 6712 reg_arg_name(env, argno), tn_buf, i - min_off, access_size); 6713 } 6714 return -EACCES; 6715 mark: 6716 ; 6717 } 6718 return 0; 6719 } 6720 6721 static int check_helper_mem_access(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, 6722 int access_size, enum bpf_access_type access_type, 6723 bool zero_size_allowed, 6724 struct bpf_call_arg_meta *meta) 6725 { 6726 struct bpf_reg_state *regs = cur_regs(env); 6727 u32 *max_access; 6728 6729 switch (base_type(reg->type)) { 6730 case PTR_TO_PACKET: 6731 case PTR_TO_PACKET_META: 6732 return check_packet_access(env, reg, argno, 0, access_size, 6733 zero_size_allowed); 6734 case PTR_TO_MAP_KEY: 6735 if (access_type == BPF_WRITE) { 6736 verbose(env, "%s cannot write into %s\n", 6737 reg_arg_name(env, argno), reg_type_str(env, reg->type)); 6738 return -EACCES; 6739 } 6740 return check_mem_region_access(env, reg, argno, 0, access_size, 6741 reg->map_ptr->key_size, false); 6742 case PTR_TO_MAP_VALUE: 6743 if (check_map_access_type(env, reg, 0, access_size, access_type)) 6744 return -EACCES; 6745 return check_map_access(env, reg, argno, 0, access_size, 6746 zero_size_allowed, ACCESS_HELPER); 6747 case PTR_TO_MEM: 6748 if (type_is_rdonly_mem(reg->type)) { 6749 if (access_type == BPF_WRITE) { 6750 verbose(env, "%s cannot write into %s\n", 6751 reg_arg_name(env, argno), reg_type_str(env, reg->type)); 6752 return -EACCES; 6753 } 6754 } 6755 return check_mem_region_access(env, reg, argno, 0, 6756 access_size, reg->mem_size, 6757 zero_size_allowed); 6758 case PTR_TO_BUF: 6759 if (type_is_rdonly_mem(reg->type)) { 6760 if (access_type == BPF_WRITE) { 6761 verbose(env, "%s cannot write into %s\n", 6762 reg_arg_name(env, argno), reg_type_str(env, reg->type)); 6763 return -EACCES; 6764 } 6765 6766 max_access = &env->prog->aux->max_rdonly_access; 6767 } else { 6768 max_access = &env->prog->aux->max_rdwr_access; 6769 } 6770 return check_buffer_access(env, reg, argno, 0, 6771 access_size, zero_size_allowed, 6772 max_access); 6773 case PTR_TO_STACK: 6774 return check_stack_range_initialized( 6775 env, reg, 6776 argno, 0, access_size, 6777 zero_size_allowed, access_type, meta); 6778 case PTR_TO_BTF_ID: 6779 return check_ptr_to_btf_access(env, regs, reg, argno, 0, 6780 access_size, access_type, -1); 6781 case PTR_TO_CTX: 6782 /* Only permit reading or writing syscall context using helper calls. */ 6783 if (is_var_ctx_off_allowed(env->prog)) { 6784 int err = check_mem_region_access(env, reg, argno, 0, access_size, U16_MAX, 6785 zero_size_allowed); 6786 if (err) 6787 return err; 6788 if (env->prog->aux->max_ctx_offset < reg_umax(reg) + access_size) 6789 env->prog->aux->max_ctx_offset = reg_umax(reg) + access_size; 6790 return 0; 6791 } 6792 fallthrough; 6793 default: /* scalar_value or invalid ptr */ 6794 /* Allow zero-byte read from NULL, regardless of pointer type */ 6795 if (zero_size_allowed && access_size == 0 && 6796 bpf_register_is_null(reg)) 6797 return 0; 6798 6799 verbose(env, "%s type=%s ", reg_arg_name(env, argno), 6800 reg_type_str(env, reg->type)); 6801 verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK)); 6802 return -EACCES; 6803 } 6804 } 6805 6806 /* verify arguments to helpers or kfuncs consisting of a pointer and an access 6807 * size. 6808 * 6809 * @mem_reg contains the pointer, @size_reg contains the access size. 6810 */ 6811 static int check_mem_size_reg(struct bpf_verifier_env *env, 6812 struct bpf_reg_state *mem_reg, 6813 struct bpf_reg_state *size_reg, argno_t mem_argno, 6814 argno_t size_argno, enum bpf_access_type access_type, 6815 bool zero_size_allowed, 6816 struct bpf_call_arg_meta *meta) 6817 { 6818 int err; 6819 6820 /* This is used to refine r0 return value bounds for helpers 6821 * that enforce this value as an upper bound on return values. 6822 * See do_refine_retval_range() for helpers that can refine 6823 * the return value. C type of helper is u32 so we pull register 6824 * bound from umax_value however, if negative verifier errors 6825 * out. Only upper bounds can be learned because retval is an 6826 * int type and negative retvals are allowed. 6827 */ 6828 meta->msize_max_value = reg_umax(size_reg); 6829 6830 /* The register is SCALAR_VALUE; the access check happens using 6831 * its boundaries. For unprivileged variable accesses, disable 6832 * raw mode so that the program is required to initialize all 6833 * the memory that the helper could just partially fill up. 6834 */ 6835 if (!tnum_is_const(size_reg->var_off)) 6836 meta = NULL; 6837 6838 if (reg_smin(size_reg) < 0) { 6839 verbose(env, "%s min value is negative, either use unsigned or 'var &= const'\n", 6840 reg_arg_name(env, size_argno)); 6841 return -EACCES; 6842 } 6843 6844 if (reg_umin(size_reg) == 0 && !zero_size_allowed) { 6845 verbose(env, "%s invalid zero-sized read: u64=[%lld,%lld]\n", 6846 reg_arg_name(env, size_argno), reg_umin(size_reg), reg_umax(size_reg)); 6847 return -EACCES; 6848 } 6849 6850 if (reg_umax(size_reg) >= BPF_MAX_VAR_SIZ) { 6851 verbose(env, "%s unbounded memory access, use 'var &= const' or 'if (var < const)'\n", 6852 reg_arg_name(env, size_argno)); 6853 return -EACCES; 6854 } 6855 err = check_helper_mem_access(env, mem_reg, mem_argno, reg_umax(size_reg), 6856 access_type, zero_size_allowed, meta); 6857 if (!err) { 6858 int regno = reg_from_argno(size_argno); 6859 6860 if (regno >= 0) 6861 err = mark_chain_precision(env, regno); 6862 else 6863 err = mark_stack_arg_precision(env, arg_idx_from_argno(size_argno)); 6864 } 6865 return err; 6866 } 6867 6868 static int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, 6869 argno_t argno, u32 mem_size) 6870 { 6871 bool may_be_null = type_may_be_null(reg->type); 6872 struct bpf_reg_state saved_reg; 6873 int err; 6874 6875 if (bpf_register_is_null(reg)) 6876 return 0; 6877 6878 if (mem_size > S32_MAX) { 6879 verbose(env, "%s memory size %u is too large\n", 6880 reg_arg_name(env, argno), mem_size); 6881 return -EACCES; 6882 } 6883 6884 /* Assuming that the register contains a value check if the memory 6885 * access is safe. Temporarily save and restore the register's state as 6886 * the conversion shouldn't be visible to a caller. 6887 */ 6888 if (may_be_null) { 6889 saved_reg = *reg; 6890 mark_ptr_not_null_reg(reg); 6891 } 6892 6893 int size = base_type(reg->type) == PTR_TO_STACK ? -(int)mem_size : mem_size; 6894 6895 err = check_helper_mem_access(env, reg, argno, size, BPF_READ, true, NULL); 6896 err = err ?: check_helper_mem_access(env, reg, argno, size, BPF_WRITE, true, NULL); 6897 6898 if (may_be_null) 6899 *reg = saved_reg; 6900 6901 return err; 6902 } 6903 6904 static int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *mem_reg, 6905 struct bpf_reg_state *size_reg, argno_t mem_argno, argno_t size_argno) 6906 { 6907 bool may_be_null = type_may_be_null(mem_reg->type); 6908 struct bpf_reg_state saved_reg; 6909 struct bpf_call_arg_meta meta; 6910 int err; 6911 6912 memset(&meta, 0, sizeof(meta)); 6913 6914 if (may_be_null) { 6915 saved_reg = *mem_reg; 6916 mark_ptr_not_null_reg(mem_reg); 6917 } 6918 6919 err = check_mem_size_reg(env, mem_reg, size_reg, mem_argno, size_argno, BPF_READ, true, &meta); 6920 err = err ?: check_mem_size_reg(env, mem_reg, size_reg, mem_argno, size_argno, BPF_WRITE, true, &meta); 6921 6922 if (may_be_null) 6923 *mem_reg = saved_reg; 6924 6925 return err; 6926 } 6927 6928 enum { 6929 PROCESS_SPIN_LOCK = (1 << 0), 6930 PROCESS_RES_LOCK = (1 << 1), 6931 PROCESS_LOCK_IRQ = (1 << 2), 6932 }; 6933 6934 /* Implementation details: 6935 * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL. 6936 * bpf_obj_new returns PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL. 6937 * Two bpf_map_lookups (even with the same key) will have different reg->id. 6938 * Two separate bpf_obj_new will also have different reg->id. 6939 * For traditional PTR_TO_MAP_VALUE or PTR_TO_BTF_ID | MEM_ALLOC, the verifier 6940 * clears reg->id after value_or_null->value transition, since the verifier only 6941 * cares about the range of access to valid map value pointer and doesn't care 6942 * about actual address of the map element. 6943 * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps 6944 * reg->id > 0 after value_or_null->value transition. By doing so 6945 * two bpf_map_lookups will be considered two different pointers that 6946 * point to different bpf_spin_locks. Likewise for pointers to allocated objects 6947 * returned from bpf_obj_new. 6948 * The verifier allows taking only one bpf_spin_lock at a time to avoid 6949 * dead-locks. 6950 * Since only one bpf_spin_lock is allowed the checks are simpler than 6951 * reg_is_refcounted() logic. The verifier needs to remember only 6952 * one spin_lock instead of array of acquired_refs. 6953 * env->cur_state->active_locks remembers which map value element or allocated 6954 * object got locked and clears it after bpf_spin_unlock. 6955 */ 6956 static int process_spin_lock(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, int flags) 6957 { 6958 bool is_lock = flags & PROCESS_SPIN_LOCK, is_res_lock = flags & PROCESS_RES_LOCK; 6959 const char *lock_str = is_res_lock ? "bpf_res_spin" : "bpf_spin"; 6960 struct bpf_verifier_state *cur = env->cur_state; 6961 bool is_const = tnum_is_const(reg->var_off); 6962 bool is_irq = flags & PROCESS_LOCK_IRQ; 6963 u64 val = reg->var_off.value; 6964 struct bpf_map *map = NULL; 6965 struct btf *btf = NULL; 6966 struct btf_record *rec; 6967 u32 spin_lock_off; 6968 int err; 6969 6970 if (!is_const) { 6971 verbose(env, 6972 "%s doesn't have constant offset. %s_lock has to be at the constant offset\n", 6973 reg_arg_name(env, argno), lock_str); 6974 return -EINVAL; 6975 } 6976 if (reg->type == PTR_TO_MAP_VALUE) { 6977 map = reg->map_ptr; 6978 if (!map->btf) { 6979 verbose(env, 6980 "map '%s' has to have BTF in order to use %s_lock\n", 6981 map->name, lock_str); 6982 return -EINVAL; 6983 } 6984 } else { 6985 btf = reg->btf; 6986 } 6987 6988 rec = reg_btf_record(reg); 6989 if (!btf_record_has_field(rec, is_res_lock ? BPF_RES_SPIN_LOCK : BPF_SPIN_LOCK)) { 6990 verbose(env, "%s '%s' has no valid %s_lock\n", map ? "map" : "local", 6991 map ? map->name : "kptr", lock_str); 6992 return -EINVAL; 6993 } 6994 spin_lock_off = is_res_lock ? rec->res_spin_lock_off : rec->spin_lock_off; 6995 if (spin_lock_off != val) { 6996 verbose(env, "off %lld doesn't point to 'struct %s_lock' that is at %d\n", 6997 val, lock_str, spin_lock_off); 6998 return -EINVAL; 6999 } 7000 if (is_lock) { 7001 void *ptr; 7002 int type; 7003 7004 if (map) 7005 ptr = map; 7006 else 7007 ptr = btf; 7008 7009 if (!is_res_lock && cur->active_locks) { 7010 if (find_lock_state(env->cur_state, REF_TYPE_LOCK, 0, NULL)) { 7011 verbose(env, 7012 "Locking two bpf_spin_locks are not allowed\n"); 7013 return -EINVAL; 7014 } 7015 } else if (is_res_lock && cur->active_locks) { 7016 if (find_lock_state(env->cur_state, REF_TYPE_RES_LOCK | REF_TYPE_RES_LOCK_IRQ, reg->id, ptr)) { 7017 verbose(env, "Acquiring the same lock again, AA deadlock detected\n"); 7018 return -EINVAL; 7019 } 7020 } 7021 7022 if (is_res_lock && is_irq) 7023 type = REF_TYPE_RES_LOCK_IRQ; 7024 else if (is_res_lock) 7025 type = REF_TYPE_RES_LOCK; 7026 else 7027 type = REF_TYPE_LOCK; 7028 err = acquire_lock_state(env, env->insn_idx, type, reg->id, ptr); 7029 if (err < 0) { 7030 verbose(env, "Failed to acquire lock state\n"); 7031 return err; 7032 } 7033 } else { 7034 void *ptr; 7035 int type; 7036 7037 if (map) 7038 ptr = map; 7039 else 7040 ptr = btf; 7041 7042 if (!cur->active_locks) { 7043 verbose(env, "%s_unlock without taking a lock\n", lock_str); 7044 return -EINVAL; 7045 } 7046 7047 if (is_res_lock && is_irq) 7048 type = REF_TYPE_RES_LOCK_IRQ; 7049 else if (is_res_lock) 7050 type = REF_TYPE_RES_LOCK; 7051 else 7052 type = REF_TYPE_LOCK; 7053 if (!find_lock_state(cur, type, reg->id, ptr)) { 7054 verbose(env, "%s_unlock of different lock\n", lock_str); 7055 return -EINVAL; 7056 } 7057 if (reg->id != cur->active_lock_id || ptr != cur->active_lock_ptr) { 7058 verbose(env, "%s_unlock cannot be out of order\n", lock_str); 7059 return -EINVAL; 7060 } 7061 if (release_lock_state(cur, type, reg->id, ptr)) { 7062 verbose(env, "%s_unlock of different lock\n", lock_str); 7063 return -EINVAL; 7064 } 7065 7066 invalidate_non_owning_refs(env); 7067 } 7068 return 0; 7069 } 7070 7071 /* Check if @regno is a pointer to a specific field in a map value */ 7072 static int check_map_field_pointer(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, 7073 enum btf_field_type field_type, 7074 struct bpf_map_desc *map_desc) 7075 { 7076 bool is_const = tnum_is_const(reg->var_off); 7077 struct bpf_map *map = reg->map_ptr; 7078 u64 val = reg->var_off.value; 7079 const char *struct_name = btf_field_type_name(field_type); 7080 int field_off = -1; 7081 7082 if (!is_const) { 7083 verbose(env, 7084 "%s doesn't have constant offset. %s has to be at the constant offset\n", 7085 reg_arg_name(env, argno), struct_name); 7086 return -EINVAL; 7087 } 7088 if (!map->btf) { 7089 verbose(env, "map '%s' has to have BTF in order to use %s\n", map->name, 7090 struct_name); 7091 return -EINVAL; 7092 } 7093 if (!btf_record_has_field(map->record, field_type)) { 7094 verbose(env, "map '%s' has no valid %s\n", map->name, struct_name); 7095 return -EINVAL; 7096 } 7097 switch (field_type) { 7098 case BPF_TIMER: 7099 field_off = map->record->timer_off; 7100 break; 7101 case BPF_TASK_WORK: 7102 field_off = map->record->task_work_off; 7103 break; 7104 case BPF_WORKQUEUE: 7105 field_off = map->record->wq_off; 7106 break; 7107 default: 7108 verifier_bug(env, "unsupported BTF field type: %s\n", struct_name); 7109 return -EINVAL; 7110 } 7111 if (field_off != val) { 7112 verbose(env, "off %lld doesn't point to 'struct %s' that is at %d\n", 7113 val, struct_name, field_off); 7114 return -EINVAL; 7115 } 7116 if (map_desc->ptr) { 7117 verifier_bug(env, "Two map pointers in a %s helper", struct_name); 7118 return -EFAULT; 7119 } 7120 map_desc->uid = reg->map_uid; 7121 map_desc->ptr = map; 7122 return 0; 7123 } 7124 7125 static int process_timer_func(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, 7126 struct bpf_map_desc *map) 7127 { 7128 if (IS_ENABLED(CONFIG_PREEMPT_RT)) { 7129 verbose(env, "bpf_timer cannot be used for PREEMPT_RT.\n"); 7130 return -EOPNOTSUPP; 7131 } 7132 return check_map_field_pointer(env, reg, argno, BPF_TIMER, map); 7133 } 7134 7135 static int process_timer_helper(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, 7136 struct bpf_call_arg_meta *meta) 7137 { 7138 return process_timer_func(env, reg, argno, &meta->map); 7139 } 7140 7141 static int process_timer_kfunc(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, 7142 struct bpf_kfunc_call_arg_meta *meta) 7143 { 7144 return process_timer_func(env, reg, argno, &meta->map); 7145 } 7146 7147 static int process_kptr_func(struct bpf_verifier_env *env, int regno, 7148 struct bpf_call_arg_meta *meta) 7149 { 7150 struct bpf_reg_state *reg = reg_state(env, regno); 7151 struct btf_field *kptr_field; 7152 struct bpf_map *map_ptr; 7153 struct btf_record *rec; 7154 u32 kptr_off; 7155 7156 if (type_is_ptr_alloc_obj(reg->type)) { 7157 rec = reg_btf_record(reg); 7158 } else { /* PTR_TO_MAP_VALUE */ 7159 map_ptr = reg->map_ptr; 7160 if (!map_ptr->btf) { 7161 verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n", 7162 map_ptr->name); 7163 return -EINVAL; 7164 } 7165 rec = map_ptr->record; 7166 meta->map.ptr = map_ptr; 7167 } 7168 7169 if (!tnum_is_const(reg->var_off)) { 7170 verbose(env, 7171 "R%d doesn't have constant offset. kptr has to be at the constant offset\n", 7172 regno); 7173 return -EINVAL; 7174 } 7175 7176 if (!btf_record_has_field(rec, BPF_KPTR)) { 7177 verbose(env, "R%d has no valid kptr\n", regno); 7178 return -EINVAL; 7179 } 7180 7181 kptr_off = reg->var_off.value; 7182 kptr_field = btf_record_find(rec, kptr_off, BPF_KPTR); 7183 if (!kptr_field) { 7184 verbose(env, "off=%d doesn't point to kptr\n", kptr_off); 7185 return -EACCES; 7186 } 7187 if (kptr_field->type != BPF_KPTR_REF && kptr_field->type != BPF_KPTR_PERCPU) { 7188 verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off); 7189 return -EACCES; 7190 } 7191 meta->kptr_field = kptr_field; 7192 return 0; 7193 } 7194 7195 /* 7196 * Validate dynptr arguments for helper, kfunc and subprog. 7197 * 7198 * @dynptr is both input and output. It is populated when the argument is 7199 * tagged with MEM_UNINIT (i.e., the dynptr argument that will be constructed) 7200 * and consumed when the argument is expecting to be an initialized dynptr. 7201 * @parent_id is used to track the referenced parent object (e.g., file or skb in 7202 * qdisc program) when constructing a dynptr. 7203 * 7204 * There are two register types representing a bpf_dynptr, one is PTR_TO_STACK 7205 * which points to a stack slot, and the other is CONST_PTR_TO_DYNPTR. 7206 * 7207 * In both cases we deal with the first 8 bytes, but need to mark the next 8 7208 * bytes as STACK_DYNPTR in case of PTR_TO_STACK. In case of 7209 * CONST_PTR_TO_DYNPTR, we are guaranteed to get the beginning of the object. 7210 * 7211 * Mutability of bpf_dynptr is at two levels: the dynptr and the memory the 7212 * dynptr points to. At the first level, the verifier will make sure a 7213 * CONST_PTR_TO_DYNPTR cannot be reinitialized or destroyed. The mutability of 7214 * a dynptr's view (i.e., start and offset) is not tracked as there is not such 7215 * use case. The second level is tracked using the upper bit of bpf_dynptr->size 7216 * and checked dynamically during runtime. 7217 */ 7218 static int process_dynptr_func(struct bpf_verifier_env *env, struct bpf_reg_state *reg, 7219 argno_t argno, int insn_idx, enum bpf_arg_type arg_type, 7220 struct ref_obj_desc *ref_obj, struct bpf_dynptr_desc *dynptr) 7221 { 7222 int spi, err = 0; 7223 7224 if (reg->type != PTR_TO_STACK && reg->type != CONST_PTR_TO_DYNPTR) { 7225 verbose(env, 7226 "%s expected pointer to stack or const struct bpf_dynptr\n", 7227 reg_arg_name(env, argno)); 7228 return -EINVAL; 7229 } 7230 7231 /* MEM_UNINIT - Points to memory that is an appropriate candidate for 7232 * constructing a mutable bpf_dynptr object. 7233 * 7234 * Currently, this is only possible with PTR_TO_STACK 7235 * pointing to a region of at least 16 bytes which doesn't 7236 * contain an existing bpf_dynptr. 7237 * 7238 * OBJ_RELEASE - Points to a initialized bpf_dynptr that will be 7239 * destroyed. 7240 * 7241 * None - Points to a initialized dynptr that cannot be 7242 * reinitialized or destroyed. However, the view of the 7243 * dynptr and the memory it points to may be mutated. 7244 */ 7245 if (arg_type & MEM_UNINIT) { 7246 int i; 7247 7248 if (!is_dynptr_reg_valid_uninit(env, reg)) { 7249 verbose(env, "Dynptr has to be an uninitialized dynptr\n"); 7250 return -EINVAL; 7251 } 7252 7253 /* we write BPF_DW bits (8 bytes) at a time */ 7254 for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) { 7255 err = check_mem_access(env, insn_idx, reg, argno, 7256 i, BPF_DW, BPF_WRITE, -1, false, false); 7257 if (err) 7258 return err; 7259 } 7260 7261 err = mark_stack_slots_dynptr(env, reg, arg_type, insn_idx, ref_obj, dynptr); 7262 } else /* OBJ_RELEASE and None case from above */ { 7263 /* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */ 7264 if (reg->type == CONST_PTR_TO_DYNPTR && (arg_type & OBJ_RELEASE)) { 7265 verbose(env, "CONST_PTR_TO_DYNPTR cannot be released\n"); 7266 return -EINVAL; 7267 } 7268 7269 if (!is_dynptr_reg_valid_init(env, reg)) { 7270 verbose(env, "Expected an initialized dynptr as %s\n", 7271 reg_arg_name(env, argno)); 7272 return -EINVAL; 7273 } 7274 7275 /* Fold modifiers (in this case, OBJ_RELEASE) when checking expected type */ 7276 if (!is_dynptr_type_expected(env, reg, arg_type & ~OBJ_RELEASE)) { 7277 verbose(env, 7278 "Expected a dynptr of type %s as %s\n", 7279 dynptr_type_str(arg_to_dynptr_type(arg_type)), 7280 reg_arg_name(env, argno)); 7281 return -EINVAL; 7282 } 7283 7284 if (reg->type != CONST_PTR_TO_DYNPTR) { 7285 struct bpf_func_state *state = bpf_func(env, reg); 7286 7287 spi = dynptr_get_spi(env, reg); 7288 if (spi < 0) 7289 return spi; 7290 7291 /* 7292 * For CONST_PTR_TO_DYNPTR, reg is already scratched by check_reg_arg 7293 * in check_helper_call and mark_btf_func_reg_size in check_kfunc_call. 7294 */ 7295 mark_stack_slots_scratched(env, spi, BPF_DYNPTR_NR_SLOTS); 7296 7297 reg = &state->stack[spi].spilled_ptr; 7298 } 7299 7300 if (dynptr) { 7301 dynptr->type = reg->dynptr.type; 7302 dynptr->id = reg->id; 7303 dynptr->parent_id = reg->parent_id; 7304 } 7305 } 7306 return err; 7307 } 7308 7309 static bool is_iter_kfunc(struct bpf_kfunc_call_arg_meta *meta) 7310 { 7311 return meta->kfunc_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY); 7312 } 7313 7314 static bool is_iter_new_kfunc(struct bpf_kfunc_call_arg_meta *meta) 7315 { 7316 return meta->kfunc_flags & KF_ITER_NEW; 7317 } 7318 7319 7320 static bool is_iter_destroy_kfunc(struct bpf_kfunc_call_arg_meta *meta) 7321 { 7322 return meta->kfunc_flags & KF_ITER_DESTROY; 7323 } 7324 7325 static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg_idx, 7326 const struct btf_param *arg) 7327 { 7328 /* btf_check_iter_kfuncs() guarantees that first argument of any iter 7329 * kfunc is iter state pointer 7330 */ 7331 if (is_iter_kfunc(meta)) 7332 return arg_idx == 0; 7333 7334 /* iter passed as an argument to a generic kfunc */ 7335 return btf_param_match_suffix(meta->btf, arg, "__iter"); 7336 } 7337 7338 static int process_iter_arg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, int insn_idx, 7339 struct bpf_kfunc_call_arg_meta *meta) 7340 { 7341 struct bpf_func_state *state = bpf_func(env, reg); 7342 const struct btf_type *t; 7343 u32 arg_idx = arg_idx_from_argno(argno); 7344 int spi, err, i, nr_slots, btf_id; 7345 7346 if (reg->type != PTR_TO_STACK) { 7347 verbose(env, "%s expected pointer to an iterator on stack\n", 7348 reg_arg_name(env, argno)); 7349 return -EINVAL; 7350 } 7351 7352 /* For iter_{new,next,destroy} functions, btf_check_iter_kfuncs() 7353 * ensures struct convention, so we wouldn't need to do any BTF 7354 * validation here. But given iter state can be passed as a parameter 7355 * to any kfunc, if arg has "__iter" suffix, we need to be a bit more 7356 * conservative here. 7357 */ 7358 btf_id = btf_check_iter_arg(meta->btf, meta->func_proto, arg_idx); 7359 if (btf_id < 0) { 7360 verbose(env, "expected valid iter pointer as %s\n", 7361 reg_arg_name(env, argno)); 7362 return -EINVAL; 7363 } 7364 t = btf_type_by_id(meta->btf, btf_id); 7365 nr_slots = t->size / BPF_REG_SIZE; 7366 7367 if (is_iter_new_kfunc(meta)) { 7368 /* bpf_iter_<type>_new() expects pointer to uninit iter state */ 7369 if (!is_iter_reg_valid_uninit(env, reg, nr_slots)) { 7370 verbose(env, "expected uninitialized iter_%s as %s\n", 7371 iter_type_str(meta->btf, btf_id), reg_arg_name(env, argno)); 7372 return -EINVAL; 7373 } 7374 7375 for (i = 0; i < nr_slots * 8; i += BPF_REG_SIZE) { 7376 err = check_mem_access(env, insn_idx, reg, argno, 7377 i, BPF_DW, BPF_WRITE, -1, false, false); 7378 if (err) 7379 return err; 7380 } 7381 7382 err = mark_stack_slots_iter(env, meta, reg, insn_idx, meta->btf, btf_id, nr_slots); 7383 if (err) 7384 return err; 7385 } else { 7386 /* iter_next() or iter_destroy(), as well as any kfunc 7387 * accepting iter argument, expect initialized iter state 7388 */ 7389 err = is_iter_reg_valid_init(env, reg, meta->btf, btf_id, nr_slots); 7390 switch (err) { 7391 case 0: 7392 break; 7393 case -EINVAL: 7394 verbose(env, "expected an initialized iter_%s as %s\n", 7395 iter_type_str(meta->btf, btf_id), reg_arg_name(env, argno)); 7396 return err; 7397 case -EPROTO: 7398 verbose(env, "expected an RCU CS when using %s\n", meta->func_name); 7399 return err; 7400 default: 7401 return err; 7402 } 7403 7404 spi = iter_get_spi(env, reg, nr_slots); 7405 if (spi < 0) 7406 return spi; 7407 7408 mark_stack_slots_scratched(env, spi, nr_slots); 7409 7410 /* remember meta->iter info for process_iter_next_call() */ 7411 meta->iter.spi = spi; 7412 meta->iter.frameno = reg->frameno; 7413 update_ref_obj(&meta->ref_obj, &state->stack[spi].spilled_ptr); 7414 7415 if (is_iter_destroy_kfunc(meta)) { 7416 err = unmark_stack_slots_iter(env, reg, nr_slots); 7417 if (err) 7418 return err; 7419 } 7420 } 7421 7422 return 0; 7423 } 7424 7425 /* Look for a previous loop entry at insn_idx: nearest parent state 7426 * stopped at insn_idx with callsites matching those in cur->frame. 7427 */ 7428 static struct bpf_verifier_state *find_prev_entry(struct bpf_verifier_env *env, 7429 struct bpf_verifier_state *cur, 7430 int insn_idx) 7431 { 7432 struct bpf_verifier_state_list *sl; 7433 struct bpf_verifier_state *st; 7434 struct list_head *pos, *head; 7435 7436 /* Explored states are pushed in stack order, most recent states come first */ 7437 head = bpf_explored_state(env, insn_idx); 7438 list_for_each(pos, head) { 7439 sl = container_of(pos, struct bpf_verifier_state_list, node); 7440 /* If st->branches != 0 state is a part of current DFS verification path, 7441 * hence cur & st for a loop. 7442 */ 7443 st = &sl->state; 7444 if (st->insn_idx == insn_idx && st->branches && same_callsites(st, cur) && 7445 st->dfs_depth < cur->dfs_depth) 7446 return st; 7447 } 7448 7449 return NULL; 7450 } 7451 7452 /* 7453 * Check if scalar registers are exact for the purpose of not widening. 7454 * More lenient than regs_exact() 7455 */ 7456 static bool scalars_exact_for_widen(const struct bpf_reg_state *rold, 7457 const struct bpf_reg_state *rcur) 7458 { 7459 return !memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)); 7460 } 7461 7462 static void maybe_widen_reg(struct bpf_verifier_env *env, 7463 struct bpf_reg_state *rold, struct bpf_reg_state *rcur) 7464 { 7465 if (rold->type != SCALAR_VALUE) 7466 return; 7467 if (rold->type != rcur->type) 7468 return; 7469 if (rold->precise || rcur->precise || scalars_exact_for_widen(rold, rcur)) 7470 return; 7471 __mark_reg_unknown(env, rcur); 7472 } 7473 7474 static int widen_imprecise_scalars(struct bpf_verifier_env *env, 7475 struct bpf_verifier_state *old, 7476 struct bpf_verifier_state *cur) 7477 { 7478 struct bpf_func_state *fold, *fcur; 7479 int i, fr, num_slots; 7480 7481 for (fr = old->curframe; fr >= 0; fr--) { 7482 fold = old->frame[fr]; 7483 fcur = cur->frame[fr]; 7484 7485 for (i = 0; i < MAX_BPF_REG; i++) 7486 maybe_widen_reg(env, 7487 &fold->regs[i], 7488 &fcur->regs[i]); 7489 7490 num_slots = min(fold->allocated_stack / BPF_REG_SIZE, 7491 fcur->allocated_stack / BPF_REG_SIZE); 7492 for (i = 0; i < num_slots; i++) { 7493 if (!bpf_is_spilled_reg(&fold->stack[i]) || 7494 !bpf_is_spilled_reg(&fcur->stack[i])) 7495 continue; 7496 7497 maybe_widen_reg(env, 7498 &fold->stack[i].spilled_ptr, 7499 &fcur->stack[i].spilled_ptr); 7500 } 7501 } 7502 return 0; 7503 } 7504 7505 static struct bpf_reg_state *get_iter_from_state(struct bpf_verifier_state *cur_st, 7506 struct bpf_kfunc_call_arg_meta *meta) 7507 { 7508 int iter_frameno = meta->iter.frameno; 7509 int iter_spi = meta->iter.spi; 7510 7511 return &cur_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr; 7512 } 7513 7514 /* process_iter_next_call() is called when verifier gets to iterator's next 7515 * "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer 7516 * to it as just "iter_next()" in comments below. 7517 * 7518 * BPF verifier relies on a crucial contract for any iter_next() 7519 * implementation: it should *eventually* return NULL, and once that happens 7520 * it should keep returning NULL. That is, once iterator exhausts elements to 7521 * iterate, it should never reset or spuriously return new elements. 7522 * 7523 * With the assumption of such contract, process_iter_next_call() simulates 7524 * a fork in the verifier state to validate loop logic correctness and safety 7525 * without having to simulate infinite amount of iterations. 7526 * 7527 * In current state, we first assume that iter_next() returned NULL and 7528 * iterator state is set to DRAINED (BPF_ITER_STATE_DRAINED). In such 7529 * conditions we should not form an infinite loop and should eventually reach 7530 * exit. 7531 * 7532 * Besides that, we also fork current state and enqueue it for later 7533 * verification. In a forked state we keep iterator state as ACTIVE 7534 * (BPF_ITER_STATE_ACTIVE) and assume non-NULL return from iter_next(). We 7535 * also bump iteration depth to prevent erroneous infinite loop detection 7536 * later on (see iter_active_depths_differ() comment for details). In this 7537 * state we assume that we'll eventually loop back to another iter_next() 7538 * calls (it could be in exactly same location or in some other instruction, 7539 * it doesn't matter, we don't make any unnecessary assumptions about this, 7540 * everything revolves around iterator state in a stack slot, not which 7541 * instruction is calling iter_next()). When that happens, we either will come 7542 * to iter_next() with equivalent state and can conclude that next iteration 7543 * will proceed in exactly the same way as we just verified, so it's safe to 7544 * assume that loop converges. If not, we'll go on another iteration 7545 * simulation with a different input state, until all possible starting states 7546 * are validated or we reach maximum number of instructions limit. 7547 * 7548 * This way, we will either exhaustively discover all possible input states 7549 * that iterator loop can start with and eventually will converge, or we'll 7550 * effectively regress into bounded loop simulation logic and either reach 7551 * maximum number of instructions if loop is not provably convergent, or there 7552 * is some statically known limit on number of iterations (e.g., if there is 7553 * an explicit `if n > 100 then break;` statement somewhere in the loop). 7554 * 7555 * Iteration convergence logic in is_state_visited() relies on exact 7556 * states comparison, which ignores read and precision marks. 7557 * This is necessary because read and precision marks are not finalized 7558 * while in the loop. Exact comparison might preclude convergence for 7559 * simple programs like below: 7560 * 7561 * i = 0; 7562 * while(iter_next(&it)) 7563 * i++; 7564 * 7565 * At each iteration step i++ would produce a new distinct state and 7566 * eventually instruction processing limit would be reached. 7567 * 7568 * To avoid such behavior speculatively forget (widen) range for 7569 * imprecise scalar registers, if those registers were not precise at the 7570 * end of the previous iteration and do not match exactly. 7571 * 7572 * This is a conservative heuristic that allows to verify wide range of programs, 7573 * however it precludes verification of programs that conjure an 7574 * imprecise value on the first loop iteration and use it as precise on a second. 7575 * For example, the following safe program would fail to verify: 7576 * 7577 * struct bpf_num_iter it; 7578 * int arr[10]; 7579 * int i = 0, a = 0; 7580 * bpf_iter_num_new(&it, 0, 10); 7581 * while (bpf_iter_num_next(&it)) { 7582 * if (a == 0) { 7583 * a = 1; 7584 * i = 7; // Because i changed verifier would forget 7585 * // it's range on second loop entry. 7586 * } else { 7587 * arr[i] = 42; // This would fail to verify. 7588 * } 7589 * } 7590 * bpf_iter_num_destroy(&it); 7591 */ 7592 static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx, 7593 struct bpf_kfunc_call_arg_meta *meta) 7594 { 7595 struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st; 7596 struct bpf_func_state *cur_fr = cur_st->frame[cur_st->curframe], *queued_fr; 7597 struct bpf_reg_state *cur_iter, *queued_iter; 7598 7599 BTF_TYPE_EMIT(struct bpf_iter); 7600 7601 cur_iter = get_iter_from_state(cur_st, meta); 7602 7603 if (cur_iter->iter.state != BPF_ITER_STATE_ACTIVE && 7604 cur_iter->iter.state != BPF_ITER_STATE_DRAINED) { 7605 verifier_bug(env, "unexpected iterator state %d (%s)", 7606 cur_iter->iter.state, iter_state_str(cur_iter->iter.state)); 7607 return -EFAULT; 7608 } 7609 7610 if (cur_iter->iter.state == BPF_ITER_STATE_ACTIVE) { 7611 /* Because iter_next() call is a checkpoint is_state_visitied() 7612 * should guarantee parent state with same call sites and insn_idx. 7613 */ 7614 if (!cur_st->parent || cur_st->parent->insn_idx != insn_idx || 7615 !same_callsites(cur_st->parent, cur_st)) { 7616 verifier_bug(env, "bad parent state for iter next call"); 7617 return -EFAULT; 7618 } 7619 /* Note cur_st->parent in the call below, it is necessary to skip 7620 * checkpoint created for cur_st by is_state_visited() 7621 * right at this instruction. 7622 */ 7623 prev_st = find_prev_entry(env, cur_st->parent, insn_idx); 7624 /* branch out active iter state */ 7625 queued_st = push_stack(env, insn_idx + 1, insn_idx, false); 7626 if (IS_ERR(queued_st)) 7627 return PTR_ERR(queued_st); 7628 7629 queued_iter = get_iter_from_state(queued_st, meta); 7630 queued_iter->iter.state = BPF_ITER_STATE_ACTIVE; 7631 queued_iter->iter.depth++; 7632 if (prev_st) 7633 widen_imprecise_scalars(env, prev_st, queued_st); 7634 7635 queued_fr = queued_st->frame[queued_st->curframe]; 7636 mark_ptr_not_null_reg(&queued_fr->regs[BPF_REG_0]); 7637 } 7638 7639 /* switch to DRAINED state, but keep the depth unchanged */ 7640 /* mark current iter state as drained and assume returned NULL */ 7641 cur_iter->iter.state = BPF_ITER_STATE_DRAINED; 7642 __mark_reg_const_zero(env, &cur_fr->regs[BPF_REG_0]); 7643 7644 return 0; 7645 } 7646 7647 static bool arg_type_is_mem_size(enum bpf_arg_type type) 7648 { 7649 return type == ARG_CONST_SIZE || 7650 type == ARG_CONST_SIZE_OR_ZERO; 7651 } 7652 7653 static bool arg_type_is_raw_mem(enum bpf_arg_type type) 7654 { 7655 return base_type(type) == ARG_PTR_TO_MEM && 7656 type & MEM_UNINIT; 7657 } 7658 7659 static bool arg_type_is_release(enum bpf_arg_type type) 7660 { 7661 return type & OBJ_RELEASE; 7662 } 7663 7664 static bool arg_type_is_dynptr(enum bpf_arg_type type) 7665 { 7666 return base_type(type) == ARG_PTR_TO_DYNPTR; 7667 } 7668 7669 static int resolve_map_arg_type(struct bpf_verifier_env *env, 7670 const struct bpf_call_arg_meta *meta, 7671 enum bpf_arg_type *arg_type) 7672 { 7673 if (!meta->map.ptr) { 7674 /* kernel subsystem misconfigured verifier */ 7675 verifier_bug(env, "invalid map_ptr to access map->type"); 7676 return -EFAULT; 7677 } 7678 7679 switch (meta->map.ptr->map_type) { 7680 case BPF_MAP_TYPE_SOCKMAP: 7681 case BPF_MAP_TYPE_SOCKHASH: 7682 if (*arg_type == ARG_PTR_TO_MAP_VALUE) { 7683 *arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON; 7684 } else { 7685 verbose(env, "invalid arg_type for sockmap/sockhash\n"); 7686 return -EINVAL; 7687 } 7688 break; 7689 case BPF_MAP_TYPE_BLOOM_FILTER: 7690 if (meta->func_id == BPF_FUNC_map_peek_elem) 7691 *arg_type = ARG_PTR_TO_MAP_VALUE; 7692 break; 7693 default: 7694 break; 7695 } 7696 return 0; 7697 } 7698 7699 struct bpf_reg_types { 7700 const enum bpf_reg_type types[10]; 7701 u32 *btf_id; 7702 }; 7703 7704 static const struct bpf_reg_types sock_types = { 7705 .types = { 7706 PTR_TO_SOCK_COMMON, 7707 PTR_TO_SOCKET, 7708 PTR_TO_TCP_SOCK, 7709 PTR_TO_XDP_SOCK, 7710 }, 7711 }; 7712 7713 #ifdef CONFIG_NET 7714 static const struct bpf_reg_types btf_id_sock_common_types = { 7715 .types = { 7716 PTR_TO_SOCK_COMMON, 7717 PTR_TO_SOCKET, 7718 PTR_TO_TCP_SOCK, 7719 PTR_TO_XDP_SOCK, 7720 PTR_TO_BTF_ID, 7721 PTR_TO_BTF_ID | PTR_TRUSTED, 7722 }, 7723 .btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], 7724 }; 7725 #endif 7726 7727 static const struct bpf_reg_types mem_types = { 7728 .types = { 7729 PTR_TO_STACK, 7730 PTR_TO_PACKET, 7731 PTR_TO_PACKET_META, 7732 PTR_TO_MAP_KEY, 7733 PTR_TO_MAP_VALUE, 7734 PTR_TO_MEM, 7735 PTR_TO_MEM | MEM_RINGBUF, 7736 PTR_TO_BUF, 7737 PTR_TO_BTF_ID | PTR_TRUSTED, 7738 PTR_TO_CTX, 7739 }, 7740 }; 7741 7742 static const struct bpf_reg_types spin_lock_types = { 7743 .types = { 7744 PTR_TO_MAP_VALUE, 7745 PTR_TO_BTF_ID | MEM_ALLOC, 7746 } 7747 }; 7748 7749 static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } }; 7750 static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } }; 7751 static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } }; 7752 static const struct bpf_reg_types ringbuf_mem_types = { .types = { PTR_TO_MEM | MEM_RINGBUF } }; 7753 static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } }; 7754 static const struct bpf_reg_types btf_ptr_types = { 7755 .types = { 7756 PTR_TO_BTF_ID, 7757 PTR_TO_BTF_ID | PTR_TRUSTED, 7758 PTR_TO_BTF_ID | MEM_RCU, 7759 }, 7760 }; 7761 static const struct bpf_reg_types percpu_btf_ptr_types = { 7762 .types = { 7763 PTR_TO_BTF_ID | MEM_PERCPU, 7764 PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU, 7765 PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED, 7766 } 7767 }; 7768 static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } }; 7769 static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } }; 7770 static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } }; 7771 static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } }; 7772 static const struct bpf_reg_types kptr_xchg_dest_types = { 7773 .types = { 7774 PTR_TO_MAP_VALUE, 7775 PTR_TO_BTF_ID | MEM_ALLOC, 7776 PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF, 7777 PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU, 7778 } 7779 }; 7780 static const struct bpf_reg_types dynptr_types = { 7781 .types = { 7782 PTR_TO_STACK, 7783 CONST_PTR_TO_DYNPTR, 7784 } 7785 }; 7786 7787 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { 7788 [ARG_PTR_TO_MAP_KEY] = &mem_types, 7789 [ARG_PTR_TO_MAP_VALUE] = &mem_types, 7790 [ARG_CONST_SIZE] = &scalar_types, 7791 [ARG_CONST_SIZE_OR_ZERO] = &scalar_types, 7792 [ARG_CONST_ALLOC_SIZE_OR_ZERO] = &scalar_types, 7793 [ARG_CONST_MAP_PTR] = &const_map_ptr_types, 7794 [ARG_PTR_TO_CTX] = &context_types, 7795 [ARG_PTR_TO_SOCK_COMMON] = &sock_types, 7796 #ifdef CONFIG_NET 7797 [ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types, 7798 #endif 7799 [ARG_PTR_TO_SOCKET] = &fullsock_types, 7800 [ARG_PTR_TO_BTF_ID] = &btf_ptr_types, 7801 [ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types, 7802 [ARG_PTR_TO_MEM] = &mem_types, 7803 [ARG_PTR_TO_RINGBUF_MEM] = &ringbuf_mem_types, 7804 [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types, 7805 [ARG_PTR_TO_FUNC] = &func_ptr_types, 7806 [ARG_PTR_TO_STACK] = &stack_ptr_types, 7807 [ARG_PTR_TO_CONST_STR] = &const_str_ptr_types, 7808 [ARG_PTR_TO_TIMER] = &timer_types, 7809 [ARG_KPTR_XCHG_DEST] = &kptr_xchg_dest_types, 7810 [ARG_PTR_TO_DYNPTR] = &dynptr_types, 7811 }; 7812 7813 static int check_reg_type(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, 7814 enum bpf_arg_type arg_type, 7815 const u32 *arg_btf_id, 7816 struct bpf_call_arg_meta *meta) 7817 { 7818 enum bpf_reg_type expected, type = reg->type; 7819 const struct bpf_reg_types *compatible; 7820 int i, j, err; 7821 7822 compatible = compatible_reg_types[base_type(arg_type)]; 7823 if (!compatible) { 7824 verifier_bug(env, "unsupported arg type %d", arg_type); 7825 return -EFAULT; 7826 } 7827 7828 /* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY, 7829 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY 7830 * 7831 * Same for MAYBE_NULL: 7832 * 7833 * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL, 7834 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL 7835 * 7836 * ARG_PTR_TO_MEM is compatible with PTR_TO_MEM that is tagged with a dynptr type. 7837 * 7838 * Therefore we fold these flags depending on the arg_type before comparison. 7839 */ 7840 if (arg_type & MEM_RDONLY) 7841 type &= ~MEM_RDONLY; 7842 if (arg_type & PTR_MAYBE_NULL) 7843 type &= ~PTR_MAYBE_NULL; 7844 if (base_type(arg_type) == ARG_PTR_TO_MEM) 7845 type &= ~DYNPTR_TYPE_FLAG_MASK; 7846 7847 /* Local kptr types are allowed as the source argument of bpf_kptr_xchg */ 7848 if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type) && reg_from_argno(argno) == BPF_REG_2) { 7849 type &= ~MEM_ALLOC; 7850 type &= ~MEM_PERCPU; 7851 } 7852 7853 for (i = 0; i < ARRAY_SIZE(compatible->types); i++) { 7854 expected = compatible->types[i]; 7855 if (expected == NOT_INIT) 7856 break; 7857 7858 if (type == expected) 7859 goto found; 7860 } 7861 7862 verbose(env, "%s type=%s expected=", reg_arg_name(env, argno), reg_type_str(env, reg->type)); 7863 for (j = 0; j + 1 < i; j++) 7864 verbose(env, "%s, ", reg_type_str(env, compatible->types[j])); 7865 verbose(env, "%s\n", reg_type_str(env, compatible->types[j])); 7866 return -EACCES; 7867 7868 found: 7869 if (base_type(reg->type) != PTR_TO_BTF_ID) 7870 return 0; 7871 7872 if (compatible == &mem_types) { 7873 if (!(arg_type & MEM_RDONLY)) { 7874 verbose(env, 7875 "%s() may write into memory pointed by %s type=%s\n", 7876 func_id_name(meta->func_id), 7877 reg_arg_name(env, argno), reg_type_str(env, reg->type)); 7878 return -EACCES; 7879 } 7880 return 0; 7881 } 7882 7883 switch ((int)reg->type) { 7884 case PTR_TO_BTF_ID: 7885 case PTR_TO_BTF_ID | PTR_TRUSTED: 7886 case PTR_TO_BTF_ID | PTR_TRUSTED | PTR_MAYBE_NULL: 7887 case PTR_TO_BTF_ID | MEM_RCU: 7888 case PTR_TO_BTF_ID | PTR_MAYBE_NULL: 7889 case PTR_TO_BTF_ID | PTR_MAYBE_NULL | MEM_RCU: 7890 { 7891 /* For bpf_sk_release, it needs to match against first member 7892 * 'struct sock_common', hence make an exception for it. This 7893 * allows bpf_sk_release to work for multiple socket types. 7894 */ 7895 bool strict_type_match = arg_type_is_release(arg_type) && 7896 meta->func_id != BPF_FUNC_sk_release; 7897 7898 if (type_may_be_null(reg->type) && 7899 (!type_may_be_null(arg_type) || arg_type_is_release(arg_type))) { 7900 verbose(env, "Possibly NULL pointer passed to helper %s\n", 7901 reg_arg_name(env, argno)); 7902 return -EACCES; 7903 } 7904 7905 if (!arg_btf_id) { 7906 if (!compatible->btf_id) { 7907 verifier_bug(env, "missing arg compatible BTF ID"); 7908 return -EFAULT; 7909 } 7910 arg_btf_id = compatible->btf_id; 7911 } 7912 7913 if (meta->func_id == BPF_FUNC_kptr_xchg) { 7914 if (map_kptr_match_type(env, meta->kptr_field, reg, reg_from_argno(argno))) 7915 return -EACCES; 7916 } else { 7917 if (arg_btf_id == BPF_PTR_POISON) { 7918 verbose(env, "verifier internal error:"); 7919 verbose(env, "%s has non-overwritten BPF_PTR_POISON type\n", 7920 reg_arg_name(env, argno)); 7921 return -EACCES; 7922 } 7923 7924 err = __check_ptr_off_reg(env, reg, argno, true); 7925 if (err) 7926 return err; 7927 7928 if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 7929 reg->var_off.value, btf_vmlinux, *arg_btf_id, 7930 strict_type_match)) { 7931 verbose(env, "%s is of type %s but %s is expected\n", 7932 reg_arg_name(env, argno), 7933 btf_type_name(reg->btf, reg->btf_id), 7934 btf_type_name(btf_vmlinux, *arg_btf_id)); 7935 return -EACCES; 7936 } 7937 } 7938 break; 7939 } 7940 case PTR_TO_BTF_ID | MEM_ALLOC: 7941 case PTR_TO_BTF_ID | MEM_PERCPU | MEM_ALLOC: 7942 case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF: 7943 case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU: 7944 if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock && 7945 meta->func_id != BPF_FUNC_kptr_xchg) { 7946 verifier_bug(env, "unimplemented handling of MEM_ALLOC"); 7947 return -EFAULT; 7948 } 7949 /* Check if local kptr in src arg matches kptr in dst arg */ 7950 if (meta->func_id == BPF_FUNC_kptr_xchg) { 7951 int regno = reg_from_argno(argno); 7952 7953 if (regno == BPF_REG_2 && 7954 map_kptr_match_type(env, meta->kptr_field, reg, regno)) 7955 return -EACCES; 7956 } 7957 break; 7958 case PTR_TO_BTF_ID | MEM_PERCPU: 7959 case PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU: 7960 case PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED: 7961 /* Handled by helper specific checks */ 7962 break; 7963 default: 7964 verifier_bug(env, "invalid PTR_TO_BTF_ID register for type match"); 7965 return -EFAULT; 7966 } 7967 return 0; 7968 } 7969 7970 static struct btf_field * 7971 reg_find_field_offset(const struct bpf_reg_state *reg, s32 off, u32 fields) 7972 { 7973 struct btf_field *field; 7974 struct btf_record *rec; 7975 7976 rec = reg_btf_record(reg); 7977 if (!rec) 7978 return NULL; 7979 7980 field = btf_record_find(rec, off, fields); 7981 if (!field) 7982 return NULL; 7983 7984 return field; 7985 } 7986 7987 static int check_func_arg_reg_off(struct bpf_verifier_env *env, 7988 const struct bpf_reg_state *reg, argno_t argno, 7989 enum bpf_arg_type arg_type) 7990 { 7991 u32 type = reg->type; 7992 7993 /* When referenced register is passed to release function, its fixed 7994 * offset must be 0. 7995 * 7996 * We will check arg_type_is_release reg has id when storing 7997 * meta->release_regno. 7998 */ 7999 if (arg_type_is_release(arg_type)) { 8000 /* ARG_PTR_TO_DYNPTR with OBJ_RELEASE is a bit special, as it 8001 * may not directly point to the object being released, but to 8002 * dynptr pointing to such object, which might be at some offset 8003 * on the stack. In that case, we simply to fallback to the 8004 * default handling. 8005 */ 8006 if (arg_type_is_dynptr(arg_type) && type == PTR_TO_STACK) 8007 return 0; 8008 8009 /* Doing check_ptr_off_reg check for the offset will catch this 8010 * because fixed_off_ok is false, but checking here allows us 8011 * to give the user a better error message. 8012 */ 8013 if (!tnum_is_const(reg->var_off) || reg->var_off.value != 0) { 8014 verbose(env, "%s must have zero offset when passed to release func or trusted arg to kfunc\n", 8015 reg_arg_name(env, argno)); 8016 return -EINVAL; 8017 } 8018 } 8019 8020 switch (type) { 8021 /* Pointer types where both fixed and variable offset is explicitly allowed: */ 8022 case PTR_TO_STACK: 8023 case PTR_TO_PACKET: 8024 case PTR_TO_PACKET_META: 8025 case PTR_TO_MAP_KEY: 8026 case PTR_TO_MAP_VALUE: 8027 case PTR_TO_MEM: 8028 case PTR_TO_MEM | MEM_RDONLY: 8029 case PTR_TO_MEM | MEM_RINGBUF: 8030 case PTR_TO_BUF: 8031 case PTR_TO_BUF | MEM_RDONLY: 8032 case PTR_TO_ARENA: 8033 case SCALAR_VALUE: 8034 return 0; 8035 /* All the rest must be rejected, except PTR_TO_BTF_ID which allows 8036 * fixed offset. 8037 */ 8038 case PTR_TO_BTF_ID: 8039 case PTR_TO_BTF_ID | MEM_ALLOC: 8040 case PTR_TO_BTF_ID | PTR_TRUSTED: 8041 case PTR_TO_BTF_ID | MEM_RCU: 8042 case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF: 8043 case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU: 8044 /* When referenced PTR_TO_BTF_ID is passed to release function, 8045 * its fixed offset must be 0. In the other cases, fixed offset 8046 * can be non-zero. This was already checked above. So pass 8047 * fixed_off_ok as true to allow fixed offset for all other 8048 * cases. var_off always must be 0 for PTR_TO_BTF_ID, hence we 8049 * still need to do checks instead of returning. 8050 */ 8051 return __check_ptr_off_reg(env, reg, argno, true); 8052 case PTR_TO_CTX: 8053 /* 8054 * Allow fixed and variable offsets for syscall context, but 8055 * only when the argument is passed as memory, not ctx, 8056 * otherwise we may get modified ctx in tail called programs and 8057 * global subprogs (that may act as extension prog hooks). 8058 */ 8059 if (arg_type != ARG_PTR_TO_CTX && is_var_ctx_off_allowed(env->prog)) 8060 return 0; 8061 fallthrough; 8062 default: 8063 return __check_ptr_off_reg(env, reg, argno, false); 8064 } 8065 } 8066 8067 static int check_arg_const_str(struct bpf_verifier_env *env, 8068 struct bpf_reg_state *reg, argno_t argno) 8069 { 8070 struct bpf_map *map = reg->map_ptr; 8071 int err; 8072 int map_off; 8073 u64 map_addr; 8074 char *str_ptr; 8075 8076 if (reg->type != PTR_TO_MAP_VALUE) 8077 return -EINVAL; 8078 8079 if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) { 8080 verbose(env, "%s points to insn_array map which cannot be used as const string\n", 8081 reg_arg_name(env, argno)); 8082 return -EACCES; 8083 } 8084 8085 if (!bpf_map_is_rdonly(map)) { 8086 verbose(env, "%s does not point to a readonly map'\n", reg_arg_name(env, argno)); 8087 return -EACCES; 8088 } 8089 8090 if (!tnum_is_const(reg->var_off)) { 8091 verbose(env, "%s is not a constant address'\n", reg_arg_name(env, argno)); 8092 return -EACCES; 8093 } 8094 8095 if (!map->ops->map_direct_value_addr) { 8096 verbose(env, "no direct value access support for this map type\n"); 8097 return -EACCES; 8098 } 8099 8100 err = check_map_access(env, reg, argno, 0, 8101 map->value_size - reg->var_off.value, false, 8102 ACCESS_HELPER); 8103 if (err) 8104 return err; 8105 8106 map_off = reg->var_off.value; 8107 err = map->ops->map_direct_value_addr(map, &map_addr, map_off); 8108 if (err) { 8109 verbose(env, "direct value access on string failed\n"); 8110 return err; 8111 } 8112 8113 str_ptr = (char *)(long)(map_addr); 8114 if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) { 8115 verbose(env, "string is not zero-terminated\n"); 8116 return -EINVAL; 8117 } 8118 return 0; 8119 } 8120 8121 /* Returns constant key value in `value` if possible, else negative error */ 8122 static int get_constant_map_key(struct bpf_verifier_env *env, 8123 struct bpf_reg_state *key, 8124 u32 key_size, 8125 s64 *value) 8126 { 8127 struct bpf_func_state *state = bpf_func(env, key); 8128 struct bpf_reg_state *reg; 8129 int slot, spi, off; 8130 int spill_size = 0; 8131 int zero_size = 0; 8132 int stack_off; 8133 int i, err; 8134 u8 *stype; 8135 8136 if (!env->bpf_capable) 8137 return -EOPNOTSUPP; 8138 if (key->type != PTR_TO_STACK) 8139 return -EOPNOTSUPP; 8140 if (!tnum_is_const(key->var_off)) 8141 return -EOPNOTSUPP; 8142 8143 stack_off = key->var_off.value; 8144 slot = -stack_off - 1; 8145 spi = slot / BPF_REG_SIZE; 8146 off = slot % BPF_REG_SIZE; 8147 stype = state->stack[spi].slot_type; 8148 8149 /* First handle precisely tracked STACK_ZERO */ 8150 for (i = off; i >= 0 && stype[i] == STACK_ZERO; i--) 8151 zero_size++; 8152 if (zero_size >= key_size) { 8153 *value = 0; 8154 return 0; 8155 } 8156 8157 /* Check that stack contains a scalar spill of expected size */ 8158 if (!bpf_is_spilled_scalar_reg(&state->stack[spi])) 8159 return -EOPNOTSUPP; 8160 for (i = off; i >= 0 && stype[i] == STACK_SPILL; i--) 8161 spill_size++; 8162 if (spill_size != key_size) 8163 return -EOPNOTSUPP; 8164 8165 reg = &state->stack[spi].spilled_ptr; 8166 if (!tnum_is_const(reg->var_off)) 8167 /* Stack value not statically known */ 8168 return -EOPNOTSUPP; 8169 8170 /* We are relying on a constant value. So mark as precise 8171 * to prevent pruning on it. 8172 */ 8173 bpf_bt_set_frame_slot(&env->bt, key->frameno, spi); 8174 err = mark_chain_precision_batch(env, env->cur_state); 8175 if (err < 0) 8176 return err; 8177 8178 *value = reg->var_off.value; 8179 return 0; 8180 } 8181 8182 static bool can_elide_value_nullness(const struct bpf_map *map); 8183 8184 static int check_func_arg(struct bpf_verifier_env *env, u32 arg, 8185 struct bpf_call_arg_meta *meta, 8186 const struct bpf_func_proto *fn, 8187 int insn_idx) 8188 { 8189 u32 regno = BPF_REG_1 + arg; 8190 struct bpf_reg_state *reg = reg_state(env, regno); 8191 enum bpf_arg_type arg_type = fn->arg_type[arg]; 8192 argno_t argno = argno_from_arg(arg + 1); 8193 enum bpf_reg_type type = reg->type; 8194 u32 *arg_btf_id = NULL; 8195 u32 key_size; 8196 int err = 0; 8197 8198 if (arg_type == ARG_DONTCARE) 8199 return 0; 8200 8201 err = check_reg_arg(env, regno, SRC_OP); 8202 if (err) 8203 return err; 8204 8205 if (arg_type == ARG_ANYTHING) { 8206 if (is_pointer_value(env, regno)) { 8207 verbose(env, "R%d leaks addr into helper function\n", 8208 regno); 8209 return -EACCES; 8210 } 8211 return 0; 8212 } 8213 8214 if (type_is_pkt_pointer(type) && 8215 !may_access_direct_pkt_data(env, meta, BPF_READ)) { 8216 verbose(env, "helper access to the packet is not allowed\n"); 8217 return -EACCES; 8218 } 8219 8220 if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) { 8221 err = resolve_map_arg_type(env, meta, &arg_type); 8222 if (err) 8223 return err; 8224 } 8225 8226 if (bpf_register_is_null(reg) && type_may_be_null(arg_type)) 8227 /* A NULL register has a SCALAR_VALUE type, so skip 8228 * type checking. 8229 */ 8230 goto skip_type_check; 8231 8232 /* arg_btf_id and arg_size are in a union. */ 8233 if (base_type(arg_type) == ARG_PTR_TO_BTF_ID || 8234 base_type(arg_type) == ARG_PTR_TO_SPIN_LOCK) 8235 arg_btf_id = fn->arg_btf_id[arg]; 8236 8237 err = check_reg_type(env, reg, argno_from_reg(regno), arg_type, arg_btf_id, meta); 8238 if (err) 8239 return err; 8240 8241 err = check_func_arg_reg_off(env, reg, argno_from_reg(regno), arg_type); 8242 if (err) 8243 return err; 8244 8245 skip_type_check: 8246 if (arg_type_is_release(arg_type) && !arg_type_is_dynptr(arg_type) && 8247 !reg_is_referenced(env, reg) && !bpf_register_is_null(reg)) { 8248 verbose(env, "release helper %s expects referenced PTR_TO_BTF_ID passed to %s\n", 8249 func_id_name(meta->func_id), reg_arg_name(env, argno)); 8250 return -EINVAL; 8251 } 8252 8253 if (reg_is_referenced(env, reg)) 8254 update_ref_obj(&meta->ref_obj, reg); 8255 8256 switch (base_type(arg_type)) { 8257 case ARG_CONST_MAP_PTR: 8258 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */ 8259 if (meta->map.ptr) { 8260 /* Use map_uid (which is unique id of inner map) to reject: 8261 * inner_map1 = bpf_map_lookup_elem(outer_map, key1) 8262 * inner_map2 = bpf_map_lookup_elem(outer_map, key2) 8263 * if (inner_map1 && inner_map2) { 8264 * timer = bpf_map_lookup_elem(inner_map1); 8265 * if (timer) 8266 * // mismatch would have been allowed 8267 * bpf_timer_init(timer, inner_map2); 8268 * } 8269 * 8270 * Comparing map_ptr is enough to distinguish normal and outer maps. 8271 */ 8272 if (meta->map.ptr != reg->map_ptr || 8273 meta->map.uid != reg->map_uid) { 8274 verbose(env, 8275 "timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n", 8276 meta->map.uid, reg->map_uid); 8277 return -EINVAL; 8278 } 8279 } 8280 meta->map.ptr = reg->map_ptr; 8281 meta->map.uid = reg->map_uid; 8282 break; 8283 case ARG_PTR_TO_MAP_KEY: 8284 /* bpf_map_xxx(..., map_ptr, ..., key) call: 8285 * check that [key, key + map->key_size) are within 8286 * stack limits and initialized 8287 */ 8288 if (!meta->map.ptr) { 8289 /* in function declaration map_ptr must come before 8290 * map_key, so that it's verified and known before 8291 * we have to check map_key here. Otherwise it means 8292 * that kernel subsystem misconfigured verifier 8293 */ 8294 verifier_bug(env, "invalid map_ptr to access map->key"); 8295 return -EFAULT; 8296 } 8297 key_size = meta->map.ptr->key_size; 8298 err = check_helper_mem_access(env, reg, argno_from_reg(regno), key_size, BPF_READ, false, NULL); 8299 if (err) 8300 return err; 8301 if (can_elide_value_nullness(meta->map.ptr)) { 8302 err = get_constant_map_key(env, reg, key_size, &meta->const_map_key); 8303 if (err < 0) { 8304 meta->const_map_key = -1; 8305 if (err == -EOPNOTSUPP) 8306 err = 0; 8307 else 8308 return err; 8309 } 8310 } 8311 break; 8312 case ARG_PTR_TO_MAP_VALUE: 8313 if (type_may_be_null(arg_type) && bpf_register_is_null(reg)) 8314 return 0; 8315 8316 /* bpf_map_xxx(..., map_ptr, ..., value) call: 8317 * check [value, value + map->value_size) validity 8318 */ 8319 if (!meta->map.ptr) { 8320 /* kernel subsystem misconfigured verifier */ 8321 verifier_bug(env, "invalid map_ptr to access map->value"); 8322 return -EFAULT; 8323 } 8324 meta->raw_mode = arg_type & MEM_UNINIT; 8325 err = check_helper_mem_access(env, reg, argno_from_reg(regno), meta->map.ptr->value_size, 8326 arg_type & MEM_WRITE ? BPF_WRITE : BPF_READ, 8327 false, meta); 8328 break; 8329 case ARG_PTR_TO_PERCPU_BTF_ID: 8330 if (!reg->btf_id) { 8331 verbose(env, "Helper has invalid btf_id in R%d\n", regno); 8332 return -EACCES; 8333 } 8334 meta->ret_btf = reg->btf; 8335 meta->ret_btf_id = reg->btf_id; 8336 break; 8337 case ARG_PTR_TO_SPIN_LOCK: 8338 if (in_rbtree_lock_required_cb(env)) { 8339 verbose(env, "can't spin_{lock,unlock} in rbtree cb\n"); 8340 return -EACCES; 8341 } 8342 if (meta->func_id == BPF_FUNC_spin_lock) { 8343 err = process_spin_lock(env, reg, argno_from_reg(regno), PROCESS_SPIN_LOCK); 8344 if (err) 8345 return err; 8346 } else if (meta->func_id == BPF_FUNC_spin_unlock) { 8347 err = process_spin_lock(env, reg, argno_from_reg(regno), 0); 8348 if (err) 8349 return err; 8350 } else { 8351 verifier_bug(env, "spin lock arg on unexpected helper"); 8352 return -EFAULT; 8353 } 8354 break; 8355 case ARG_PTR_TO_TIMER: 8356 err = process_timer_helper(env, reg, argno_from_reg(regno), meta); 8357 if (err) 8358 return err; 8359 break; 8360 case ARG_PTR_TO_FUNC: 8361 meta->subprogno = reg->subprogno; 8362 break; 8363 case ARG_PTR_TO_MEM: 8364 /* The access to this pointer is only checked when we hit the 8365 * next is_mem_size argument below. 8366 */ 8367 meta->raw_mode = arg_type & MEM_UNINIT; 8368 if (arg_type & MEM_FIXED_SIZE) { 8369 err = check_helper_mem_access(env, reg, argno_from_reg(regno), fn->arg_size[arg], 8370 arg_type & MEM_WRITE ? BPF_WRITE : BPF_READ, 8371 false, meta); 8372 if (err) 8373 return err; 8374 if (arg_type & MEM_ALIGNED) 8375 err = check_ptr_alignment(env, reg, 0, fn->arg_size[arg], true); 8376 } 8377 break; 8378 case ARG_CONST_SIZE: 8379 err = check_mem_size_reg(env, reg_state(env, regno - 1), reg, argno_from_reg(regno - 1), 8380 argno_from_reg(regno), 8381 fn->arg_type[arg - 1] & MEM_WRITE ? 8382 BPF_WRITE : BPF_READ, 8383 false, meta); 8384 break; 8385 case ARG_CONST_SIZE_OR_ZERO: 8386 err = check_mem_size_reg(env, reg_state(env, regno - 1), reg, argno_from_reg(regno - 1), 8387 argno_from_reg(regno), 8388 fn->arg_type[arg - 1] & MEM_WRITE ? 8389 BPF_WRITE : BPF_READ, 8390 true, meta); 8391 break; 8392 case ARG_PTR_TO_DYNPTR: 8393 err = process_dynptr_func(env, reg, argno_from_reg(regno), insn_idx, arg_type, &meta->ref_obj, 8394 &meta->dynptr); 8395 if (err) 8396 return err; 8397 break; 8398 case ARG_CONST_ALLOC_SIZE_OR_ZERO: 8399 if (!tnum_is_const(reg->var_off)) { 8400 verbose(env, "R%d is not a known constant'\n", 8401 regno); 8402 return -EACCES; 8403 } 8404 meta->mem_size = reg->var_off.value; 8405 err = mark_chain_precision(env, regno); 8406 if (err) 8407 return err; 8408 break; 8409 case ARG_PTR_TO_CONST_STR: 8410 { 8411 err = check_arg_const_str(env, reg, argno_from_reg(regno)); 8412 if (err) 8413 return err; 8414 break; 8415 } 8416 case ARG_KPTR_XCHG_DEST: 8417 err = process_kptr_func(env, regno, meta); 8418 if (err) 8419 return err; 8420 break; 8421 } 8422 8423 return err; 8424 } 8425 8426 static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id) 8427 { 8428 enum bpf_attach_type eatype = env->prog->expected_attach_type; 8429 enum bpf_prog_type type = resolve_prog_type(env->prog); 8430 8431 if (func_id != BPF_FUNC_map_update_elem && 8432 func_id != BPF_FUNC_map_delete_elem) 8433 return false; 8434 8435 /* It's not possible to get access to a locked struct sock in these 8436 * contexts, so updating is safe. 8437 */ 8438 switch (type) { 8439 case BPF_PROG_TYPE_TRACING: 8440 if (eatype == BPF_TRACE_ITER) 8441 return true; 8442 break; 8443 case BPF_PROG_TYPE_SOCK_OPS: 8444 /* map_update allowed only via dedicated helpers with event type checks */ 8445 if (func_id == BPF_FUNC_map_delete_elem) 8446 return true; 8447 break; 8448 case BPF_PROG_TYPE_SOCKET_FILTER: 8449 case BPF_PROG_TYPE_SCHED_CLS: 8450 case BPF_PROG_TYPE_SCHED_ACT: 8451 case BPF_PROG_TYPE_XDP: 8452 case BPF_PROG_TYPE_SK_REUSEPORT: 8453 case BPF_PROG_TYPE_FLOW_DISSECTOR: 8454 case BPF_PROG_TYPE_SK_LOOKUP: 8455 return true; 8456 default: 8457 break; 8458 } 8459 8460 verbose(env, "cannot update sockmap in this context\n"); 8461 return false; 8462 } 8463 8464 bool bpf_allow_tail_call_in_subprogs(struct bpf_verifier_env *env) 8465 { 8466 return env->prog->jit_requested && 8467 bpf_jit_supports_subprog_tailcalls(); 8468 } 8469 8470 static int check_map_func_compatibility(struct bpf_verifier_env *env, 8471 struct bpf_map *map, int func_id) 8472 { 8473 if (!map) 8474 return 0; 8475 8476 /* We need a two way check, first is from map perspective ... */ 8477 switch (map->map_type) { 8478 case BPF_MAP_TYPE_PROG_ARRAY: 8479 if (func_id != BPF_FUNC_tail_call) 8480 goto error; 8481 break; 8482 case BPF_MAP_TYPE_PERF_EVENT_ARRAY: 8483 if (func_id != BPF_FUNC_perf_event_read && 8484 func_id != BPF_FUNC_perf_event_output && 8485 func_id != BPF_FUNC_skb_output && 8486 func_id != BPF_FUNC_perf_event_read_value && 8487 func_id != BPF_FUNC_xdp_output) 8488 goto error; 8489 break; 8490 case BPF_MAP_TYPE_RINGBUF: 8491 if (func_id != BPF_FUNC_ringbuf_output && 8492 func_id != BPF_FUNC_ringbuf_reserve && 8493 func_id != BPF_FUNC_ringbuf_query && 8494 func_id != BPF_FUNC_ringbuf_reserve_dynptr && 8495 func_id != BPF_FUNC_ringbuf_submit_dynptr && 8496 func_id != BPF_FUNC_ringbuf_discard_dynptr) 8497 goto error; 8498 break; 8499 case BPF_MAP_TYPE_USER_RINGBUF: 8500 if (func_id != BPF_FUNC_user_ringbuf_drain) 8501 goto error; 8502 break; 8503 case BPF_MAP_TYPE_STACK_TRACE: 8504 if (func_id != BPF_FUNC_get_stackid) 8505 goto error; 8506 break; 8507 case BPF_MAP_TYPE_CGROUP_ARRAY: 8508 if (func_id != BPF_FUNC_skb_under_cgroup && 8509 func_id != BPF_FUNC_current_task_under_cgroup) 8510 goto error; 8511 break; 8512 case BPF_MAP_TYPE_CGROUP_STORAGE: 8513 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: 8514 if (func_id != BPF_FUNC_get_local_storage) 8515 goto error; 8516 break; 8517 case BPF_MAP_TYPE_DEVMAP: 8518 case BPF_MAP_TYPE_DEVMAP_HASH: 8519 if (func_id != BPF_FUNC_redirect_map && 8520 func_id != BPF_FUNC_map_lookup_elem) 8521 goto error; 8522 break; 8523 /* Restrict bpf side of cpumap and xskmap, open when use-cases 8524 * appear. 8525 */ 8526 case BPF_MAP_TYPE_CPUMAP: 8527 if (func_id != BPF_FUNC_redirect_map) 8528 goto error; 8529 break; 8530 case BPF_MAP_TYPE_XSKMAP: 8531 if (func_id != BPF_FUNC_redirect_map && 8532 func_id != BPF_FUNC_map_lookup_elem) 8533 goto error; 8534 break; 8535 case BPF_MAP_TYPE_ARRAY_OF_MAPS: 8536 case BPF_MAP_TYPE_HASH_OF_MAPS: 8537 if (func_id != BPF_FUNC_map_lookup_elem) 8538 goto error; 8539 break; 8540 case BPF_MAP_TYPE_SOCKMAP: 8541 if (func_id != BPF_FUNC_sk_redirect_map && 8542 func_id != BPF_FUNC_sock_map_update && 8543 func_id != BPF_FUNC_msg_redirect_map && 8544 func_id != BPF_FUNC_sk_select_reuseport && 8545 func_id != BPF_FUNC_map_lookup_elem && 8546 !may_update_sockmap(env, func_id)) 8547 goto error; 8548 break; 8549 case BPF_MAP_TYPE_SOCKHASH: 8550 if (func_id != BPF_FUNC_sk_redirect_hash && 8551 func_id != BPF_FUNC_sock_hash_update && 8552 func_id != BPF_FUNC_msg_redirect_hash && 8553 func_id != BPF_FUNC_sk_select_reuseport && 8554 func_id != BPF_FUNC_map_lookup_elem && 8555 !may_update_sockmap(env, func_id)) 8556 goto error; 8557 break; 8558 case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: 8559 if (func_id != BPF_FUNC_sk_select_reuseport) 8560 goto error; 8561 break; 8562 case BPF_MAP_TYPE_QUEUE: 8563 case BPF_MAP_TYPE_STACK: 8564 if (func_id != BPF_FUNC_map_peek_elem && 8565 func_id != BPF_FUNC_map_pop_elem && 8566 func_id != BPF_FUNC_map_push_elem) 8567 goto error; 8568 break; 8569 case BPF_MAP_TYPE_SK_STORAGE: 8570 if (func_id != BPF_FUNC_sk_storage_get && 8571 func_id != BPF_FUNC_sk_storage_delete && 8572 func_id != BPF_FUNC_kptr_xchg) 8573 goto error; 8574 break; 8575 case BPF_MAP_TYPE_INODE_STORAGE: 8576 if (func_id != BPF_FUNC_inode_storage_get && 8577 func_id != BPF_FUNC_inode_storage_delete && 8578 func_id != BPF_FUNC_kptr_xchg) 8579 goto error; 8580 break; 8581 case BPF_MAP_TYPE_TASK_STORAGE: 8582 if (func_id != BPF_FUNC_task_storage_get && 8583 func_id != BPF_FUNC_task_storage_delete && 8584 func_id != BPF_FUNC_kptr_xchg) 8585 goto error; 8586 break; 8587 case BPF_MAP_TYPE_CGRP_STORAGE: 8588 if (func_id != BPF_FUNC_cgrp_storage_get && 8589 func_id != BPF_FUNC_cgrp_storage_delete && 8590 func_id != BPF_FUNC_kptr_xchg) 8591 goto error; 8592 break; 8593 case BPF_MAP_TYPE_BLOOM_FILTER: 8594 if (func_id != BPF_FUNC_map_peek_elem && 8595 func_id != BPF_FUNC_map_push_elem) 8596 goto error; 8597 break; 8598 case BPF_MAP_TYPE_INSN_ARRAY: 8599 goto error; 8600 default: 8601 break; 8602 } 8603 8604 /* ... and second from the function itself. */ 8605 switch (func_id) { 8606 case BPF_FUNC_tail_call: 8607 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY) 8608 goto error; 8609 if (env->subprog_cnt > 1 && !bpf_allow_tail_call_in_subprogs(env)) { 8610 verbose(env, "mixing of tail_calls and bpf-to-bpf calls is not supported\n"); 8611 return -EINVAL; 8612 } 8613 break; 8614 case BPF_FUNC_perf_event_read: 8615 case BPF_FUNC_perf_event_output: 8616 case BPF_FUNC_perf_event_read_value: 8617 case BPF_FUNC_skb_output: 8618 case BPF_FUNC_xdp_output: 8619 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) 8620 goto error; 8621 break; 8622 case BPF_FUNC_ringbuf_output: 8623 case BPF_FUNC_ringbuf_reserve: 8624 case BPF_FUNC_ringbuf_query: 8625 case BPF_FUNC_ringbuf_reserve_dynptr: 8626 case BPF_FUNC_ringbuf_submit_dynptr: 8627 case BPF_FUNC_ringbuf_discard_dynptr: 8628 if (map->map_type != BPF_MAP_TYPE_RINGBUF) 8629 goto error; 8630 break; 8631 case BPF_FUNC_user_ringbuf_drain: 8632 if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF) 8633 goto error; 8634 break; 8635 case BPF_FUNC_get_stackid: 8636 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE) 8637 goto error; 8638 break; 8639 case BPF_FUNC_current_task_under_cgroup: 8640 case BPF_FUNC_skb_under_cgroup: 8641 if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY) 8642 goto error; 8643 break; 8644 case BPF_FUNC_redirect_map: 8645 if (map->map_type != BPF_MAP_TYPE_DEVMAP && 8646 map->map_type != BPF_MAP_TYPE_DEVMAP_HASH && 8647 map->map_type != BPF_MAP_TYPE_CPUMAP && 8648 map->map_type != BPF_MAP_TYPE_XSKMAP) 8649 goto error; 8650 break; 8651 case BPF_FUNC_sk_redirect_map: 8652 case BPF_FUNC_msg_redirect_map: 8653 case BPF_FUNC_sock_map_update: 8654 if (map->map_type != BPF_MAP_TYPE_SOCKMAP) 8655 goto error; 8656 break; 8657 case BPF_FUNC_sk_redirect_hash: 8658 case BPF_FUNC_msg_redirect_hash: 8659 case BPF_FUNC_sock_hash_update: 8660 if (map->map_type != BPF_MAP_TYPE_SOCKHASH) 8661 goto error; 8662 break; 8663 case BPF_FUNC_get_local_storage: 8664 if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && 8665 map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) 8666 goto error; 8667 break; 8668 case BPF_FUNC_sk_select_reuseport: 8669 if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY && 8670 map->map_type != BPF_MAP_TYPE_SOCKMAP && 8671 map->map_type != BPF_MAP_TYPE_SOCKHASH) 8672 goto error; 8673 break; 8674 case BPF_FUNC_map_pop_elem: 8675 if (map->map_type != BPF_MAP_TYPE_QUEUE && 8676 map->map_type != BPF_MAP_TYPE_STACK) 8677 goto error; 8678 break; 8679 case BPF_FUNC_map_peek_elem: 8680 case BPF_FUNC_map_push_elem: 8681 if (map->map_type != BPF_MAP_TYPE_QUEUE && 8682 map->map_type != BPF_MAP_TYPE_STACK && 8683 map->map_type != BPF_MAP_TYPE_BLOOM_FILTER) 8684 goto error; 8685 break; 8686 case BPF_FUNC_map_lookup_percpu_elem: 8687 if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY && 8688 map->map_type != BPF_MAP_TYPE_PERCPU_HASH && 8689 map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH) 8690 goto error; 8691 break; 8692 case BPF_FUNC_sk_storage_get: 8693 case BPF_FUNC_sk_storage_delete: 8694 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) 8695 goto error; 8696 break; 8697 case BPF_FUNC_inode_storage_get: 8698 case BPF_FUNC_inode_storage_delete: 8699 if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE) 8700 goto error; 8701 break; 8702 case BPF_FUNC_task_storage_get: 8703 case BPF_FUNC_task_storage_delete: 8704 if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE) 8705 goto error; 8706 break; 8707 case BPF_FUNC_cgrp_storage_get: 8708 case BPF_FUNC_cgrp_storage_delete: 8709 if (map->map_type != BPF_MAP_TYPE_CGRP_STORAGE) 8710 goto error; 8711 break; 8712 default: 8713 break; 8714 } 8715 8716 return 0; 8717 error: 8718 verbose(env, "cannot pass map_type %d into func %s#%d\n", 8719 map->map_type, func_id_name(func_id), func_id); 8720 return -EINVAL; 8721 } 8722 8723 static bool check_raw_mode_ok(const struct bpf_func_proto *fn) 8724 { 8725 int count = 0; 8726 8727 if (arg_type_is_raw_mem(fn->arg1_type)) 8728 count++; 8729 if (arg_type_is_raw_mem(fn->arg2_type)) 8730 count++; 8731 if (arg_type_is_raw_mem(fn->arg3_type)) 8732 count++; 8733 if (arg_type_is_raw_mem(fn->arg4_type)) 8734 count++; 8735 if (arg_type_is_raw_mem(fn->arg5_type)) 8736 count++; 8737 8738 /* We only support one arg being in raw mode at the moment, 8739 * which is sufficient for the helper functions we have 8740 * right now. 8741 */ 8742 return count <= 1; 8743 } 8744 8745 static bool check_args_pair_invalid(const struct bpf_func_proto *fn, int arg) 8746 { 8747 bool is_fixed = fn->arg_type[arg] & MEM_FIXED_SIZE; 8748 bool has_size = fn->arg_size[arg] != 0; 8749 bool is_next_size = false; 8750 8751 if (arg + 1 < ARRAY_SIZE(fn->arg_type)) 8752 is_next_size = arg_type_is_mem_size(fn->arg_type[arg + 1]); 8753 8754 if (base_type(fn->arg_type[arg]) != ARG_PTR_TO_MEM) 8755 return is_next_size; 8756 8757 return has_size == is_next_size || is_next_size == is_fixed; 8758 } 8759 8760 static bool check_arg_pair_ok(const struct bpf_func_proto *fn) 8761 { 8762 /* bpf_xxx(..., buf, len) call will access 'len' 8763 * bytes from memory 'buf'. Both arg types need 8764 * to be paired, so make sure there's no buggy 8765 * helper function specification. 8766 */ 8767 if (arg_type_is_mem_size(fn->arg1_type) || 8768 check_args_pair_invalid(fn, 0) || 8769 check_args_pair_invalid(fn, 1) || 8770 check_args_pair_invalid(fn, 2) || 8771 check_args_pair_invalid(fn, 3) || 8772 check_args_pair_invalid(fn, 4)) 8773 return false; 8774 8775 return true; 8776 } 8777 8778 static bool check_btf_id_ok(const struct bpf_func_proto *fn) 8779 { 8780 int i; 8781 8782 for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) { 8783 if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID) 8784 return !!fn->arg_btf_id[i]; 8785 if (base_type(fn->arg_type[i]) == ARG_PTR_TO_SPIN_LOCK) 8786 return fn->arg_btf_id[i] == BPF_PTR_POISON; 8787 if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i] && 8788 /* arg_btf_id and arg_size are in a union. */ 8789 (base_type(fn->arg_type[i]) != ARG_PTR_TO_MEM || 8790 !(fn->arg_type[i] & MEM_FIXED_SIZE))) 8791 return false; 8792 } 8793 8794 return true; 8795 } 8796 8797 static bool check_mem_arg_rw_flag_ok(const struct bpf_func_proto *fn) 8798 { 8799 int i; 8800 8801 for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) { 8802 enum bpf_arg_type arg_type = fn->arg_type[i]; 8803 8804 if (base_type(arg_type) != ARG_PTR_TO_MEM) 8805 continue; 8806 if (!(arg_type & (MEM_WRITE | MEM_RDONLY))) 8807 return false; 8808 } 8809 8810 return true; 8811 } 8812 8813 static bool check_proto_release_reg(const struct bpf_func_proto *fn, struct bpf_call_arg_meta *meta) 8814 { 8815 int i; 8816 8817 for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) { 8818 enum bpf_arg_type arg_type = fn->arg_type[i]; 8819 8820 if (arg_type_is_release(arg_type)) { 8821 if (meta->release_regno) 8822 return false; 8823 meta->release_regno = i + 1; 8824 } 8825 } 8826 8827 return true; 8828 } 8829 8830 static int check_func_proto(const struct bpf_func_proto *fn, struct bpf_call_arg_meta *meta) 8831 { 8832 return check_raw_mode_ok(fn) && 8833 check_arg_pair_ok(fn) && 8834 check_mem_arg_rw_flag_ok(fn) && 8835 check_proto_release_reg(fn, meta) && 8836 check_btf_id_ok(fn) ? 0 : -EINVAL; 8837 } 8838 8839 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END] 8840 * are now invalid, so turn them into unknown SCALAR_VALUE. 8841 * 8842 * This also applies to dynptr slices belonging to skb and xdp dynptrs, 8843 * since these slices point to packet data. 8844 */ 8845 static void clear_all_pkt_pointers(struct bpf_verifier_env *env) 8846 { 8847 struct bpf_func_state *state; 8848 struct bpf_reg_state *reg; 8849 8850 bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ 8851 if (reg_is_pkt_pointer_any(reg) || reg_is_dynptr_slice_pkt(reg)) 8852 mark_reg_invalid(env, reg); 8853 })); 8854 } 8855 8856 enum { 8857 AT_PKT_END = -1, 8858 BEYOND_PKT_END = -2, 8859 }; 8860 8861 static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open) 8862 { 8863 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 8864 struct bpf_reg_state *reg = &state->regs[regn]; 8865 8866 if (reg->type != PTR_TO_PACKET) 8867 /* PTR_TO_PACKET_META is not supported yet */ 8868 return; 8869 8870 /* The 'reg' is pkt > pkt_end or pkt >= pkt_end. 8871 * How far beyond pkt_end it goes is unknown. 8872 * if (!range_open) it's the case of pkt >= pkt_end 8873 * if (range_open) it's the case of pkt > pkt_end 8874 * hence this pointer is at least 1 byte bigger than pkt_end 8875 */ 8876 if (range_open) 8877 reg->range = BEYOND_PKT_END; 8878 else 8879 reg->range = AT_PKT_END; 8880 } 8881 8882 static int release_reference_nomark(struct bpf_verifier_state *state, int id) 8883 { 8884 int i; 8885 8886 for (i = 0; i < state->acquired_refs; i++) { 8887 if (state->refs[i].type != REF_TYPE_PTR) 8888 continue; 8889 if (state->refs[i].id == id) { 8890 release_reference_state(state, i); 8891 return 0; 8892 } 8893 } 8894 return -EINVAL; 8895 } 8896 8897 static int idstack_push(struct bpf_idmap *idmap, u32 id) 8898 { 8899 int i; 8900 8901 if (!id) 8902 return 0; 8903 8904 for (i = 0; i < idmap->cnt; i++) 8905 if (idmap->map[i].old == id) 8906 return 0; 8907 8908 if (WARN_ON_ONCE(idmap->cnt >= BPF_ID_MAP_SIZE)) 8909 return -EFAULT; 8910 8911 idmap->map[idmap->cnt++].old = id; 8912 return 0; 8913 } 8914 8915 static int idstack_pop(struct bpf_idmap *idmap) 8916 { 8917 if (!idmap->cnt) 8918 return 0; 8919 8920 return idmap->map[--idmap->cnt].old; 8921 } 8922 8923 /* Release id and objects derived from it iteratively in a DFS manner */ 8924 static int release_reference(struct bpf_verifier_env *env, int id) 8925 { 8926 u32 mask = (1 << STACK_SPILL) | (1 << STACK_DYNPTR); 8927 struct bpf_verifier_state *vstate = env->cur_state; 8928 struct bpf_idmap *idstack = &env->idmap_scratch; 8929 struct bpf_stack_state *stack; 8930 struct bpf_func_state *state; 8931 struct bpf_reg_state *reg; 8932 int i, err; 8933 8934 idstack->cnt = 0; 8935 err = idstack_push(idstack, id); 8936 if (err) 8937 return err; 8938 8939 if (find_reference_state(vstate, id)) 8940 WARN_ON_ONCE(release_reference_nomark(vstate, id)); 8941 8942 while ((id = idstack_pop(idstack))) { 8943 /* 8944 * Child references are inaccessible after parent is released, 8945 * any child references that exist at this point are a leak. 8946 */ 8947 for (i = 0; i < vstate->acquired_refs; i++) { 8948 if (vstate->refs[i].type != REF_TYPE_PTR) 8949 continue; 8950 if (vstate->refs[i].parent_id != id) 8951 continue; 8952 verbose(env, "Leaking reference id=%d alloc_insn=%d. Release it first.\n", 8953 vstate->refs[i].id, vstate->refs[i].insn_idx); 8954 return -EINVAL; 8955 } 8956 8957 bpf_for_each_reg_in_vstate_mask(vstate, state, reg, stack, mask, ({ 8958 if (reg->id != id && reg->parent_id != id) 8959 continue; 8960 8961 /* Free objects derived from the current object */ 8962 if (reg->parent_id == id) { 8963 err = idstack_push(idstack, reg->id); 8964 if (err) 8965 return err; 8966 } 8967 8968 if (!stack || stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL) 8969 mark_reg_invalid(env, reg); 8970 else if (stack->slot_type[BPF_REG_SIZE - 1] == STACK_DYNPTR) 8971 invalidate_dynptr(env, stack); 8972 })); 8973 } 8974 8975 return 0; 8976 } 8977 8978 static void invalidate_non_owning_refs(struct bpf_verifier_env *env) 8979 { 8980 struct bpf_func_state *unused; 8981 struct bpf_reg_state *reg; 8982 8983 bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({ 8984 if (type_is_non_owning_ref(reg->type)) 8985 mark_reg_invalid(env, reg); 8986 })); 8987 } 8988 8989 static void invalidate_rcu_protected_refs(struct bpf_verifier_env *env) 8990 { 8991 struct bpf_stack_state *stack; 8992 struct bpf_func_state *state; 8993 struct bpf_reg_state *reg; 8994 u32 clear_mask = (1 << STACK_SPILL) | (1 << STACK_ITER); 8995 8996 bpf_for_each_reg_in_vstate_mask(env->cur_state, state, reg, stack, clear_mask, ({ 8997 if (reg->type & MEM_RCU) { 8998 reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL); 8999 reg->type |= PTR_UNTRUSTED; 9000 } 9001 })); 9002 } 9003 9004 static int ref_convert_alloc_rcu_protected(struct bpf_verifier_env *env, u32 id) 9005 { 9006 struct bpf_func_state *state; 9007 struct bpf_reg_state *reg; 9008 int err; 9009 9010 err = release_reference_nomark(env->cur_state, id); 9011 9012 bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ 9013 if (reg->id != id) 9014 continue; 9015 if ((reg->type & MEM_ALLOC) && (reg->type & MEM_PERCPU)) { 9016 reg->id = 0; 9017 reg->type &= ~MEM_ALLOC; 9018 reg->type |= MEM_RCU; 9019 } 9020 })); 9021 9022 return err; 9023 } 9024 9025 static void clear_caller_saved_regs(struct bpf_verifier_env *env, 9026 struct bpf_reg_state *regs) 9027 { 9028 int i; 9029 9030 /* after the call registers r0 - r5 were scratched */ 9031 for (i = 0; i < CALLER_SAVED_REGS; i++) { 9032 bpf_mark_reg_not_init(env, ®s[caller_saved[i]]); 9033 __check_reg_arg(env, regs, caller_saved[i], DST_OP_NO_MARK); 9034 } 9035 } 9036 9037 static void invalidate_outgoing_stack_args(const struct bpf_verifier_env *env, 9038 struct bpf_func_state *state) 9039 { 9040 int i, nslots = state->out_stack_arg_cnt; 9041 9042 for (i = 0; i < nslots; i++) 9043 bpf_mark_reg_not_init(env, &state->stack_arg_regs[i]); 9044 } 9045 9046 typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env, 9047 struct bpf_func_state *caller, 9048 struct bpf_func_state *callee, 9049 int insn_idx); 9050 9051 static int set_callee_state(struct bpf_verifier_env *env, 9052 struct bpf_func_state *caller, 9053 struct bpf_func_state *callee, int insn_idx); 9054 9055 static int setup_func_entry(struct bpf_verifier_env *env, int subprog, int callsite, 9056 set_callee_state_fn set_callee_state_cb, 9057 struct bpf_verifier_state *state) 9058 { 9059 struct bpf_func_state *caller, *callee; 9060 int err; 9061 9062 if (state->curframe + 1 >= MAX_CALL_FRAMES) { 9063 verbose(env, "the call stack of %d frames is too deep\n", 9064 state->curframe + 2); 9065 return -E2BIG; 9066 } 9067 9068 if (state->frame[state->curframe + 1]) { 9069 verifier_bug(env, "Frame %d already allocated", state->curframe + 1); 9070 return -EFAULT; 9071 } 9072 9073 caller = state->frame[state->curframe]; 9074 callee = kzalloc_obj(*callee, GFP_KERNEL_ACCOUNT); 9075 if (!callee) 9076 return -ENOMEM; 9077 state->frame[state->curframe + 1] = callee; 9078 9079 /* callee cannot access r0, r6 - r9 for reading and has to write 9080 * into its own stack before reading from it. 9081 * callee can read/write into caller's stack 9082 */ 9083 init_func_state(env, callee, 9084 /* remember the callsite, it will be used by bpf_exit */ 9085 callsite, 9086 state->curframe + 1 /* frameno within this callchain */, 9087 subprog /* subprog number within this prog */); 9088 err = set_callee_state_cb(env, caller, callee, callsite); 9089 if (err) 9090 goto err_out; 9091 9092 /* only increment it after check_reg_arg() finished */ 9093 state->curframe++; 9094 9095 return 0; 9096 9097 err_out: 9098 free_func_state(callee); 9099 state->frame[state->curframe + 1] = NULL; 9100 return err; 9101 } 9102 9103 static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog, 9104 const struct btf *btf, 9105 struct bpf_reg_state *regs) 9106 { 9107 struct bpf_subprog_info *sub = subprog_info(env, subprog); 9108 struct bpf_func_state *caller = cur_func(env); 9109 struct bpf_verifier_log *log = &env->log; 9110 struct ref_obj_desc ref_obj = {}; 9111 u32 i; 9112 int ret, err; 9113 9114 ret = btf_prepare_func_args(env, subprog); 9115 if (ret) { 9116 if (bpf_in_stack_arg_cnt(sub) > 0) { 9117 err = check_outgoing_stack_args(env, caller, sub->arg_cnt); 9118 if (err) 9119 return err; 9120 } 9121 return ret; 9122 } 9123 9124 ret = check_outgoing_stack_args(env, caller, sub->arg_cnt); 9125 if (ret) 9126 return ret; 9127 9128 /* check that BTF function arguments match actual types that the 9129 * verifier sees. 9130 */ 9131 for (i = 0; i < sub->arg_cnt; i++) { 9132 argno_t argno = argno_from_arg(i + 1); 9133 struct bpf_reg_state *reg = get_func_arg_reg(caller, regs, i); 9134 struct bpf_subprog_arg_info *arg = &sub->args[i]; 9135 9136 if (arg->arg_type == ARG_ANYTHING) { 9137 if (reg->type != SCALAR_VALUE) { 9138 bpf_log(log, "%s is not a scalar\n", reg_arg_name(env, argno)); 9139 return -EINVAL; 9140 } 9141 } else if (arg->arg_type & PTR_UNTRUSTED) { 9142 /* 9143 * Anything is allowed for untrusted arguments, as these are 9144 * read-only and probe read instructions would protect against 9145 * invalid memory access. 9146 */ 9147 } else if (arg->arg_type == ARG_PTR_TO_CTX) { 9148 ret = check_func_arg_reg_off(env, reg, argno, ARG_PTR_TO_CTX); 9149 if (ret < 0) 9150 return ret; 9151 /* If function expects ctx type in BTF check that caller 9152 * is passing PTR_TO_CTX. 9153 */ 9154 if (reg->type != PTR_TO_CTX) { 9155 bpf_log(log, "%s expects pointer to ctx\n", 9156 reg_arg_name(env, argno)); 9157 return -EINVAL; 9158 } 9159 } else if (base_type(arg->arg_type) == ARG_PTR_TO_MEM) { 9160 ret = check_func_arg_reg_off(env, reg, argno, ARG_DONTCARE); 9161 if (ret < 0) 9162 return ret; 9163 if (check_mem_reg(env, reg, argno, arg->mem_size)) 9164 return -EINVAL; 9165 if (!(arg->arg_type & PTR_MAYBE_NULL) && (reg->type & PTR_MAYBE_NULL)) { 9166 bpf_log(log, "%s is expected to be non-NULL\n", 9167 reg_arg_name(env, argno)); 9168 return -EINVAL; 9169 } 9170 } else if (base_type(arg->arg_type) == ARG_PTR_TO_ARENA) { 9171 /* 9172 * Can pass any value and the kernel won't crash, but 9173 * only PTR_TO_ARENA or SCALAR make sense. Everything 9174 * else is a bug in the bpf program. Point it out to 9175 * the user at the verification time instead of 9176 * run-time debug nightmare. 9177 */ 9178 if (reg->type != PTR_TO_ARENA && reg->type != SCALAR_VALUE) { 9179 bpf_log(log, "%s is not a pointer to arena or scalar.\n", 9180 reg_arg_name(env, argno)); 9181 return -EINVAL; 9182 } 9183 } else if (arg->arg_type == ARG_PTR_TO_DYNPTR) { 9184 ret = check_func_arg_reg_off(env, reg, argno, ARG_PTR_TO_DYNPTR); 9185 if (ret) 9186 return ret; 9187 9188 ret = process_dynptr_func(env, reg, argno, -1, arg->arg_type, &ref_obj, NULL); 9189 if (ret) 9190 return ret; 9191 } else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) { 9192 struct bpf_call_arg_meta meta; 9193 int err; 9194 9195 if (bpf_register_is_null(reg) && type_may_be_null(arg->arg_type)) 9196 continue; 9197 9198 memset(&meta, 0, sizeof(meta)); /* leave func_id as zero */ 9199 err = check_reg_type(env, reg, argno, arg->arg_type, &arg->btf_id, &meta); 9200 err = err ?: check_func_arg_reg_off(env, reg, argno, arg->arg_type); 9201 if (err) 9202 return err; 9203 } else { 9204 verifier_bug(env, "unrecognized %s type %d", 9205 reg_arg_name(env, argno), arg->arg_type); 9206 return -EFAULT; 9207 } 9208 } 9209 9210 return 0; 9211 } 9212 9213 /* Compare BTF of a function call with given bpf_reg_state. 9214 * Returns: 9215 * EFAULT - there is a verifier bug. Abort verification. 9216 * EINVAL - there is a type mismatch or BTF is not available. 9217 * 0 - BTF matches with what bpf_reg_state expects. 9218 * Only PTR_TO_CTX and SCALAR_VALUE states are recognized. 9219 */ 9220 static int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog, 9221 struct bpf_reg_state *regs) 9222 { 9223 struct bpf_prog *prog = env->prog; 9224 struct btf *btf = prog->aux->btf; 9225 u32 btf_id; 9226 int err; 9227 9228 if (!prog->aux->func_info) 9229 return -EINVAL; 9230 9231 btf_id = prog->aux->func_info[subprog].type_id; 9232 if (!btf_id) 9233 return -EFAULT; 9234 9235 if (prog->aux->func_info_aux[subprog].unreliable) 9236 return -EINVAL; 9237 9238 err = btf_check_func_arg_match(env, subprog, btf, regs); 9239 /* Compiler optimizations can remove arguments from static functions 9240 * or mismatched type can be passed into a global function. 9241 * In such cases mark the function as unreliable from BTF point of view. 9242 */ 9243 if (err) 9244 prog->aux->func_info_aux[subprog].unreliable = true; 9245 return err; 9246 } 9247 9248 static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *insn, 9249 int insn_idx, int subprog, 9250 set_callee_state_fn set_callee_state_cb) 9251 { 9252 struct bpf_verifier_state *state = env->cur_state, *callback_state; 9253 struct bpf_func_state *caller, *callee; 9254 int err; 9255 9256 caller = state->frame[state->curframe]; 9257 err = btf_check_subprog_call(env, subprog, caller->regs); 9258 if (err == -EFAULT) 9259 return err; 9260 9261 /* set_callee_state is used for direct subprog calls, but we are 9262 * interested in validating only BPF helpers that can call subprogs as 9263 * callbacks 9264 */ 9265 env->subprog_info[subprog].is_cb = true; 9266 if (bpf_pseudo_kfunc_call(insn) && 9267 !is_callback_calling_kfunc(insn->imm)) { 9268 verifier_bug(env, "kfunc %s#%d not marked as callback-calling", 9269 func_id_name(insn->imm), insn->imm); 9270 return -EFAULT; 9271 } else if (!bpf_pseudo_kfunc_call(insn) && 9272 !is_callback_calling_function(insn->imm)) { /* helper */ 9273 verifier_bug(env, "helper %s#%d not marked as callback-calling", 9274 func_id_name(insn->imm), insn->imm); 9275 return -EFAULT; 9276 } 9277 9278 if (bpf_is_async_callback_calling_insn(insn)) { 9279 struct bpf_verifier_state *async_cb; 9280 9281 /* there is no real recursion here. timer and workqueue callbacks are async */ 9282 env->subprog_info[subprog].is_async_cb = true; 9283 async_cb = push_async_cb(env, env->subprog_info[subprog].start, 9284 insn_idx, subprog, 9285 is_async_cb_sleepable(env, insn)); 9286 if (IS_ERR(async_cb)) 9287 return PTR_ERR(async_cb); 9288 callee = async_cb->frame[0]; 9289 callee->async_entry_cnt = caller->async_entry_cnt + 1; 9290 9291 /* Convert bpf_timer_set_callback() args into timer callback args */ 9292 err = set_callee_state_cb(env, caller, callee, insn_idx); 9293 if (err) 9294 return err; 9295 9296 return 0; 9297 } 9298 9299 /* for callback functions enqueue entry to callback and 9300 * proceed with next instruction within current frame. 9301 */ 9302 callback_state = push_stack(env, env->subprog_info[subprog].start, insn_idx, false); 9303 if (IS_ERR(callback_state)) 9304 return PTR_ERR(callback_state); 9305 9306 err = setup_func_entry(env, subprog, insn_idx, set_callee_state_cb, 9307 callback_state); 9308 if (err) 9309 return err; 9310 9311 callback_state->callback_unroll_depth++; 9312 callback_state->frame[callback_state->curframe - 1]->callback_depth++; 9313 caller->callback_depth = 0; 9314 return 0; 9315 } 9316 9317 static int process_bpf_exit_full(struct bpf_verifier_env *env, 9318 bool *do_print_state, bool exception_exit); 9319 9320 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, 9321 int *insn_idx) 9322 { 9323 struct bpf_verifier_state *state = env->cur_state; 9324 struct bpf_subprog_info *caller_info; 9325 u16 callee_incoming, stack_arg_cnt; 9326 struct bpf_func_state *caller; 9327 int err, subprog, target_insn; 9328 9329 target_insn = *insn_idx + insn->imm + 1; 9330 subprog = bpf_find_subprog(env, target_insn); 9331 if (verifier_bug_if(subprog < 0, env, "target of func call at insn %d is not a program", 9332 target_insn)) 9333 return -EFAULT; 9334 9335 caller = state->frame[state->curframe]; 9336 err = btf_check_subprog_call(env, subprog, caller->regs); 9337 if (err == -EFAULT) 9338 return err; 9339 if (bpf_subprog_is_global(env, subprog)) { 9340 const char *sub_name = subprog_name(env, subprog); 9341 9342 if (env->cur_state->active_locks) { 9343 verbose(env, "global function calls are not allowed while holding a lock,\n" 9344 "use static function instead\n"); 9345 return -EINVAL; 9346 } 9347 9348 if (env->subprog_info[subprog].might_sleep && !in_sleepable_context(env)) { 9349 verbose(env, "sleepable global function %s() called in %s\n", 9350 sub_name, non_sleepable_context_description(env)); 9351 return -EINVAL; 9352 } 9353 9354 if (err) { 9355 verbose(env, "Caller passes invalid args into func#%d ('%s')\n", 9356 subprog, sub_name); 9357 return err; 9358 } 9359 9360 if (env->log.level & BPF_LOG_LEVEL) 9361 verbose(env, "Func#%d ('%s') is global and assumed valid.\n", 9362 subprog, sub_name); 9363 if (env->subprog_info[subprog].changes_pkt_data) 9364 clear_all_pkt_pointers(env); 9365 /* mark global subprog for verifying after main prog */ 9366 subprog_aux(env, subprog)->called = true; 9367 clear_caller_saved_regs(env, caller->regs); 9368 invalidate_outgoing_stack_args(env, cur_func(env)); 9369 9370 /* All non-void global functions return a 64-bit SCALAR_VALUE. */ 9371 if (!subprog_returns_void(env, subprog)) { 9372 mark_reg_unknown(env, caller->regs, BPF_REG_0); 9373 caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG; 9374 } 9375 9376 if (env->subprog_info[subprog].might_throw) { 9377 struct bpf_verifier_state *branch; 9378 9379 branch = push_stack(env, *insn_idx + 1, *insn_idx, false); 9380 if (IS_ERR(branch)) { 9381 verbose(env, "failed to push state for global subprog exception path\n"); 9382 return PTR_ERR(branch); 9383 } 9384 return process_bpf_exit_full(env, NULL, true); 9385 } 9386 9387 /* continue with next insn after call */ 9388 return 0; 9389 } 9390 9391 /* 9392 * Track caller's total stack arg count (incoming + max outgoing). 9393 * This is needed so the JIT knows how much stack arg space to allocate. 9394 */ 9395 caller_info = &env->subprog_info[caller->subprogno]; 9396 callee_incoming = bpf_in_stack_arg_cnt(&env->subprog_info[subprog]); 9397 stack_arg_cnt = bpf_in_stack_arg_cnt(caller_info) + callee_incoming; 9398 if (stack_arg_cnt > caller_info->stack_arg_cnt) 9399 caller_info->stack_arg_cnt = stack_arg_cnt; 9400 9401 /* for regular function entry setup new frame and continue 9402 * from that frame. 9403 */ 9404 err = setup_func_entry(env, subprog, *insn_idx, set_callee_state, state); 9405 if (err) 9406 return err; 9407 9408 clear_caller_saved_regs(env, caller->regs); 9409 9410 /* and go analyze first insn of the callee */ 9411 *insn_idx = env->subprog_info[subprog].start - 1; 9412 9413 if (env->log.level & BPF_LOG_LEVEL) { 9414 verbose(env, "caller:\n"); 9415 print_verifier_state(env, state, caller->frameno, true); 9416 verbose(env, "callee:\n"); 9417 print_verifier_state(env, state, state->curframe, true); 9418 } 9419 9420 return 0; 9421 } 9422 9423 int map_set_for_each_callback_args(struct bpf_verifier_env *env, 9424 struct bpf_func_state *caller, 9425 struct bpf_func_state *callee) 9426 { 9427 /* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, 9428 * void *callback_ctx, u64 flags); 9429 * callback_fn(struct bpf_map *map, void *key, void *value, 9430 * void *callback_ctx); 9431 */ 9432 callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1]; 9433 9434 callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY; 9435 __mark_reg_known_zero(&callee->regs[BPF_REG_2]); 9436 callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr; 9437 9438 callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE; 9439 __mark_reg_known_zero(&callee->regs[BPF_REG_3]); 9440 callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr; 9441 9442 /* pointer to stack or null */ 9443 callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3]; 9444 9445 /* unused */ 9446 bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]); 9447 return 0; 9448 } 9449 9450 static int set_callee_state(struct bpf_verifier_env *env, 9451 struct bpf_func_state *caller, 9452 struct bpf_func_state *callee, int insn_idx) 9453 { 9454 int i; 9455 9456 /* copy r1 - r5 args that callee can access. The copy includes parent 9457 * pointers, which connects us up to the liveness chain 9458 */ 9459 for (i = BPF_REG_1; i <= BPF_REG_5; i++) 9460 callee->regs[i] = caller->regs[i]; 9461 return 0; 9462 } 9463 9464 static int set_map_elem_callback_state(struct bpf_verifier_env *env, 9465 struct bpf_func_state *caller, 9466 struct bpf_func_state *callee, 9467 int insn_idx) 9468 { 9469 struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx]; 9470 struct bpf_map *map; 9471 int err; 9472 9473 /* valid map_ptr and poison value does not matter */ 9474 map = insn_aux->map_ptr_state.map_ptr; 9475 if (!map->ops->map_set_for_each_callback_args || 9476 !map->ops->map_for_each_callback) { 9477 verbose(env, "callback function not allowed for map\n"); 9478 return -ENOTSUPP; 9479 } 9480 9481 err = map->ops->map_set_for_each_callback_args(env, caller, callee); 9482 if (err) 9483 return err; 9484 9485 callee->in_callback_fn = true; 9486 callee->callback_ret_range = retval_range(0, 1); 9487 return 0; 9488 } 9489 9490 static int set_loop_callback_state(struct bpf_verifier_env *env, 9491 struct bpf_func_state *caller, 9492 struct bpf_func_state *callee, 9493 int insn_idx) 9494 { 9495 /* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, 9496 * u64 flags); 9497 * callback_fn(u64 index, void *callback_ctx); 9498 */ 9499 callee->regs[BPF_REG_1].type = SCALAR_VALUE; 9500 callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3]; 9501 9502 /* unused */ 9503 bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_3]); 9504 bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]); 9505 bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]); 9506 9507 callee->in_callback_fn = true; 9508 callee->callback_ret_range = retval_range(0, 1); 9509 return 0; 9510 } 9511 9512 static int set_timer_callback_state(struct bpf_verifier_env *env, 9513 struct bpf_func_state *caller, 9514 struct bpf_func_state *callee, 9515 int insn_idx) 9516 { 9517 struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr; 9518 9519 /* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn); 9520 * callback_fn(struct bpf_map *map, void *key, void *value); 9521 */ 9522 callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP; 9523 __mark_reg_known_zero(&callee->regs[BPF_REG_1]); 9524 callee->regs[BPF_REG_1].map_ptr = map_ptr; 9525 9526 callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY; 9527 __mark_reg_known_zero(&callee->regs[BPF_REG_2]); 9528 callee->regs[BPF_REG_2].map_ptr = map_ptr; 9529 9530 callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE; 9531 __mark_reg_known_zero(&callee->regs[BPF_REG_3]); 9532 callee->regs[BPF_REG_3].map_ptr = map_ptr; 9533 9534 /* unused */ 9535 bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]); 9536 bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]); 9537 callee->in_async_callback_fn = true; 9538 callee->callback_ret_range = retval_range(0, 0); 9539 return 0; 9540 } 9541 9542 static int set_find_vma_callback_state(struct bpf_verifier_env *env, 9543 struct bpf_func_state *caller, 9544 struct bpf_func_state *callee, 9545 int insn_idx) 9546 { 9547 /* bpf_find_vma(struct task_struct *task, u64 addr, 9548 * void *callback_fn, void *callback_ctx, u64 flags) 9549 * (callback_fn)(struct task_struct *task, 9550 * struct vm_area_struct *vma, void *callback_ctx); 9551 */ 9552 callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1]; 9553 9554 callee->regs[BPF_REG_2].type = PTR_TO_BTF_ID; 9555 __mark_reg_known_zero(&callee->regs[BPF_REG_2]); 9556 callee->regs[BPF_REG_2].btf = btf_vmlinux; 9557 callee->regs[BPF_REG_2].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA]; 9558 9559 /* pointer to stack or null */ 9560 callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4]; 9561 9562 /* unused */ 9563 bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]); 9564 bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]); 9565 callee->in_callback_fn = true; 9566 callee->callback_ret_range = retval_range(0, 1); 9567 return 0; 9568 } 9569 9570 static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env, 9571 struct bpf_func_state *caller, 9572 struct bpf_func_state *callee, 9573 int insn_idx) 9574 { 9575 /* bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void 9576 * callback_ctx, u64 flags); 9577 * callback_fn(const struct bpf_dynptr_t* dynptr, void *callback_ctx); 9578 */ 9579 bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_0]); 9580 mark_dynptr_cb_reg(env, &callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL); 9581 callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3]; 9582 9583 /* unused */ 9584 bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_3]); 9585 bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]); 9586 bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]); 9587 9588 callee->in_callback_fn = true; 9589 callee->callback_ret_range = retval_range(0, 1); 9590 return 0; 9591 } 9592 9593 static int set_rbtree_add_callback_state(struct bpf_verifier_env *env, 9594 struct bpf_func_state *caller, 9595 struct bpf_func_state *callee, 9596 int insn_idx) 9597 { 9598 /* void bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node, 9599 * bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b)); 9600 * 9601 * 'struct bpf_rb_node *node' arg to bpf_rbtree_add_impl is the same PTR_TO_BTF_ID w/ offset 9602 * that 'less' callback args will be receiving. However, 'node' arg was release_reference'd 9603 * by this point, so look at 'root' 9604 */ 9605 struct btf_field *field; 9606 9607 field = reg_find_field_offset(&caller->regs[BPF_REG_1], 9608 caller->regs[BPF_REG_1].var_off.value, 9609 BPF_RB_ROOT); 9610 if (!field || !field->graph_root.value_btf_id) 9611 return -EFAULT; 9612 9613 mark_reg_graph_node(callee->regs, BPF_REG_1, &field->graph_root); 9614 ref_set_non_owning(env, &callee->regs[BPF_REG_1]); 9615 mark_reg_graph_node(callee->regs, BPF_REG_2, &field->graph_root); 9616 ref_set_non_owning(env, &callee->regs[BPF_REG_2]); 9617 9618 bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_3]); 9619 bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]); 9620 bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]); 9621 callee->in_callback_fn = true; 9622 callee->callback_ret_range = retval_range(0, 1); 9623 return 0; 9624 } 9625 9626 static int set_task_work_schedule_callback_state(struct bpf_verifier_env *env, 9627 struct bpf_func_state *caller, 9628 struct bpf_func_state *callee, 9629 int insn_idx) 9630 { 9631 struct bpf_map *map_ptr = caller->regs[BPF_REG_3].map_ptr; 9632 9633 /* 9634 * callback_fn(struct bpf_map *map, void *key, void *value); 9635 */ 9636 callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP; 9637 __mark_reg_known_zero(&callee->regs[BPF_REG_1]); 9638 callee->regs[BPF_REG_1].map_ptr = map_ptr; 9639 9640 callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY; 9641 __mark_reg_known_zero(&callee->regs[BPF_REG_2]); 9642 callee->regs[BPF_REG_2].map_ptr = map_ptr; 9643 9644 callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE; 9645 __mark_reg_known_zero(&callee->regs[BPF_REG_3]); 9646 callee->regs[BPF_REG_3].map_ptr = map_ptr; 9647 9648 /* unused */ 9649 bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_4]); 9650 bpf_mark_reg_not_init(env, &callee->regs[BPF_REG_5]); 9651 callee->in_async_callback_fn = true; 9652 callee->callback_ret_range = retval_range(S32_MIN, S32_MAX); 9653 return 0; 9654 } 9655 9656 static bool is_rbtree_lock_required_kfunc(u32 btf_id); 9657 9658 /* Are we currently verifying the callback for a rbtree helper that must 9659 * be called with lock held? If so, no need to complain about unreleased 9660 * lock 9661 */ 9662 static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env) 9663 { 9664 struct bpf_verifier_state *state = env->cur_state; 9665 struct bpf_insn *insn = env->prog->insnsi; 9666 struct bpf_func_state *callee; 9667 int kfunc_btf_id; 9668 9669 if (!state->curframe) 9670 return false; 9671 9672 callee = state->frame[state->curframe]; 9673 9674 if (!callee->in_callback_fn) 9675 return false; 9676 9677 kfunc_btf_id = insn[callee->callsite].imm; 9678 return is_rbtree_lock_required_kfunc(kfunc_btf_id); 9679 } 9680 9681 static bool retval_range_within(struct bpf_retval_range range, const struct bpf_reg_state *reg) 9682 { 9683 if (range.return_32bit) 9684 return range.minval <= reg_s32_min(reg) && reg_s32_max(reg) <= range.maxval; 9685 else 9686 return range.minval <= reg_smin(reg) && reg_smax(reg) <= range.maxval; 9687 } 9688 9689 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) 9690 { 9691 struct bpf_verifier_state *state = env->cur_state, *prev_st; 9692 struct bpf_func_state *caller, *callee; 9693 struct bpf_reg_state *r0; 9694 bool in_callback_fn; 9695 int err; 9696 9697 callee = state->frame[state->curframe]; 9698 r0 = &callee->regs[BPF_REG_0]; 9699 if (r0->type == PTR_TO_STACK) { 9700 /* technically it's ok to return caller's stack pointer 9701 * (or caller's caller's pointer) back to the caller, 9702 * since these pointers are valid. Only current stack 9703 * pointer will be invalid as soon as function exits, 9704 * but let's be conservative 9705 */ 9706 verbose(env, "cannot return stack pointer to the caller\n"); 9707 return -EINVAL; 9708 } 9709 9710 caller = state->frame[state->curframe - 1]; 9711 if (callee->in_callback_fn) { 9712 if (r0->type != SCALAR_VALUE) { 9713 verbose(env, "R0 not a scalar value\n"); 9714 return -EACCES; 9715 } 9716 9717 /* we are going to rely on register's precise value */ 9718 err = mark_chain_precision(env, BPF_REG_0); 9719 if (err) 9720 return err; 9721 9722 /* enforce R0 return value range, and bpf_callback_t returns 64bit */ 9723 if (!retval_range_within(callee->callback_ret_range, r0)) { 9724 verbose_invalid_scalar(env, r0, callee->callback_ret_range, 9725 "At callback return", "R0"); 9726 return -EINVAL; 9727 } 9728 if (!bpf_calls_callback(env, callee->callsite)) { 9729 verifier_bug(env, "in callback at %d, callsite %d !calls_callback", 9730 *insn_idx, callee->callsite); 9731 return -EFAULT; 9732 } 9733 } else { 9734 /* return to the caller whatever r0 had in the callee */ 9735 caller->regs[BPF_REG_0] = *r0; 9736 } 9737 9738 /* for callbacks like bpf_loop or bpf_for_each_map_elem go back to callsite, 9739 * there function call logic would reschedule callback visit. If iteration 9740 * converges is_state_visited() would prune that visit eventually. 9741 */ 9742 in_callback_fn = callee->in_callback_fn; 9743 if (in_callback_fn) 9744 *insn_idx = callee->callsite; 9745 else 9746 *insn_idx = callee->callsite + 1; 9747 9748 if (env->log.level & BPF_LOG_LEVEL) { 9749 verbose(env, "returning from callee:\n"); 9750 print_verifier_state(env, state, callee->frameno, true); 9751 verbose(env, "to caller at %d:\n", *insn_idx); 9752 print_verifier_state(env, state, caller->frameno, true); 9753 } 9754 /* clear everything in the callee. In case of exceptional exits using 9755 * bpf_throw, this will be done by copy_verifier_state for extra frames. */ 9756 free_func_state(callee); 9757 state->frame[state->curframe--] = NULL; 9758 invalidate_outgoing_stack_args(env, caller); 9759 9760 /* for callbacks widen imprecise scalars to make programs like below verify: 9761 * 9762 * struct ctx { int i; } 9763 * void cb(int idx, struct ctx *ctx) { ctx->i++; ... } 9764 * ... 9765 * struct ctx = { .i = 0; } 9766 * bpf_loop(100, cb, &ctx, 0); 9767 * 9768 * This is similar to what is done in process_iter_next_call() for open 9769 * coded iterators. 9770 */ 9771 prev_st = in_callback_fn ? find_prev_entry(env, state, *insn_idx) : NULL; 9772 if (prev_st) { 9773 err = widen_imprecise_scalars(env, prev_st, state); 9774 if (err) 9775 return err; 9776 } 9777 return 0; 9778 } 9779 9780 static int do_refine_retval_range(struct bpf_verifier_env *env, 9781 struct bpf_reg_state *regs, int ret_type, 9782 int func_id, 9783 struct bpf_call_arg_meta *meta) 9784 { 9785 struct bpf_retval_range range; 9786 struct bpf_reg_state *ret_reg = ®s[BPF_REG_0]; 9787 enum bpf_prog_type prog_type = resolve_prog_type(env->prog); 9788 9789 if (ret_type != RET_INTEGER) 9790 return 0; 9791 9792 switch (func_id) { 9793 case BPF_FUNC_get_stack: 9794 case BPF_FUNC_get_task_stack: 9795 case BPF_FUNC_probe_read_str: 9796 case BPF_FUNC_probe_read_kernel_str: 9797 case BPF_FUNC_probe_read_user_str: 9798 reg_set_srange64(ret_reg, -MAX_ERRNO, meta->msize_max_value); 9799 reg_set_srange32(ret_reg, -MAX_ERRNO, meta->msize_max_value); 9800 reg_bounds_sync(ret_reg); 9801 break; 9802 case BPF_FUNC_get_smp_processor_id: 9803 reg_set_urange64(ret_reg, 0, nr_cpu_ids - 1); 9804 reg_set_urange32(ret_reg, 0, nr_cpu_ids - 1); 9805 reg_bounds_sync(ret_reg); 9806 break; 9807 case BPF_FUNC_get_retval: 9808 /* 9809 * bpf_get_retval may see arbitrary value passed by bpf_prog_run_array_cg for 9810 * CGROUP_GETSOCKOPT type. 9811 */ 9812 if (prog_type == BPF_PROG_TYPE_CGROUP_SOCKOPT && 9813 env->prog->expected_attach_type == BPF_CGROUP_GETSOCKOPT) 9814 break; 9815 9816 if (prog_type == BPF_PROG_TYPE_LSM && 9817 env->prog->expected_attach_type == BPF_LSM_CGROUP) { 9818 if (!env->prog->aux->attach_func_proto->type) 9819 break; 9820 bpf_lsm_get_retval_range(env->prog, &range); 9821 } else { 9822 range.minval = -MAX_ERRNO; 9823 range.maxval = 0; 9824 } 9825 9826 reg_set_srange64(ret_reg, range.minval, range.maxval); 9827 reg_set_srange32(ret_reg, range.minval, range.maxval); 9828 reg_bounds_sync(ret_reg); 9829 break; 9830 } 9831 9832 return reg_bounds_sanity_check(env, ret_reg, "retval"); 9833 } 9834 9835 static int 9836 record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, 9837 int func_id, int insn_idx) 9838 { 9839 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx]; 9840 struct bpf_map *map = meta->map.ptr; 9841 9842 if (func_id != BPF_FUNC_tail_call && 9843 func_id != BPF_FUNC_map_lookup_elem && 9844 func_id != BPF_FUNC_map_update_elem && 9845 func_id != BPF_FUNC_map_delete_elem && 9846 func_id != BPF_FUNC_map_push_elem && 9847 func_id != BPF_FUNC_map_pop_elem && 9848 func_id != BPF_FUNC_map_peek_elem && 9849 func_id != BPF_FUNC_for_each_map_elem && 9850 func_id != BPF_FUNC_redirect_map && 9851 func_id != BPF_FUNC_map_lookup_percpu_elem) 9852 return 0; 9853 9854 if (map == NULL) { 9855 verifier_bug(env, "expected map for helper call"); 9856 return -EFAULT; 9857 } 9858 9859 /* In case of read-only, some additional restrictions 9860 * need to be applied in order to prevent altering the 9861 * state of the map from program side. 9862 */ 9863 if ((map->map_flags & BPF_F_RDONLY_PROG) && 9864 (func_id == BPF_FUNC_map_delete_elem || 9865 func_id == BPF_FUNC_map_update_elem || 9866 func_id == BPF_FUNC_map_push_elem || 9867 func_id == BPF_FUNC_map_pop_elem)) { 9868 verbose(env, "write into map forbidden\n"); 9869 return -EACCES; 9870 } 9871 9872 if (!aux->map_ptr_state.map_ptr) 9873 bpf_map_ptr_store(aux, meta->map.ptr, 9874 !meta->map.ptr->bypass_spec_v1, false); 9875 else if (aux->map_ptr_state.map_ptr != meta->map.ptr) 9876 bpf_map_ptr_store(aux, meta->map.ptr, 9877 !meta->map.ptr->bypass_spec_v1, true); 9878 return 0; 9879 } 9880 9881 static int 9882 record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, 9883 int func_id, int insn_idx) 9884 { 9885 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx]; 9886 struct bpf_reg_state *reg; 9887 struct bpf_map *map = meta->map.ptr; 9888 u64 val, max; 9889 int err; 9890 9891 if (func_id != BPF_FUNC_tail_call) 9892 return 0; 9893 if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) { 9894 verbose(env, "expected prog array map for tail call"); 9895 return -EINVAL; 9896 } 9897 9898 reg = reg_state(env, BPF_REG_3); 9899 val = reg->var_off.value; 9900 max = map->max_entries; 9901 9902 if (!(is_reg_const(reg, false) && val < max)) { 9903 bpf_map_key_store(aux, BPF_MAP_KEY_POISON); 9904 return 0; 9905 } 9906 9907 err = mark_chain_precision(env, BPF_REG_3); 9908 if (err) 9909 return err; 9910 if (bpf_map_key_unseen(aux)) 9911 bpf_map_key_store(aux, val); 9912 else if (!bpf_map_key_poisoned(aux) && 9913 bpf_map_key_immediate(aux) != val) 9914 bpf_map_key_store(aux, BPF_MAP_KEY_POISON); 9915 return 0; 9916 } 9917 9918 static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exit) 9919 { 9920 struct bpf_verifier_state *state = env->cur_state; 9921 enum bpf_prog_type type = resolve_prog_type(env->prog); 9922 struct bpf_reg_state *reg = reg_state(env, BPF_REG_0); 9923 bool refs_lingering = false; 9924 int i; 9925 9926 if (!exception_exit && cur_func(env)->frameno) 9927 return 0; 9928 9929 for (i = 0; i < state->acquired_refs; i++) { 9930 if (state->refs[i].type != REF_TYPE_PTR) 9931 continue; 9932 /* Allow struct_ops programs to return a referenced kptr back to 9933 * kernel. Type checks are performed later in check_return_code. 9934 */ 9935 if (type == BPF_PROG_TYPE_STRUCT_OPS && !exception_exit && 9936 reg->id == state->refs[i].id) 9937 continue; 9938 verbose(env, "Unreleased reference id=%d alloc_insn=%d\n", 9939 state->refs[i].id, state->refs[i].insn_idx); 9940 refs_lingering = true; 9941 } 9942 return refs_lingering ? -EINVAL : 0; 9943 } 9944 9945 static int check_resource_leak(struct bpf_verifier_env *env, bool exception_exit, bool check_lock, const char *prefix) 9946 { 9947 int err; 9948 9949 if (check_lock && env->cur_state->active_locks) { 9950 verbose(env, "%s cannot be used inside bpf_spin_lock-ed region\n", prefix); 9951 return -EINVAL; 9952 } 9953 9954 err = check_reference_leak(env, exception_exit); 9955 if (err) { 9956 verbose(env, "%s would lead to reference leak\n", prefix); 9957 return err; 9958 } 9959 9960 if (check_lock && env->cur_state->active_irq_id) { 9961 verbose(env, "%s cannot be used inside bpf_local_irq_save-ed region\n", prefix); 9962 return -EINVAL; 9963 } 9964 9965 if (check_lock && env->cur_state->active_rcu_locks) { 9966 verbose(env, "%s cannot be used inside bpf_rcu_read_lock-ed region\n", prefix); 9967 return -EINVAL; 9968 } 9969 9970 if (check_lock && env->cur_state->active_preempt_locks) { 9971 verbose(env, "%s cannot be used inside bpf_preempt_disable-ed region\n", prefix); 9972 return -EINVAL; 9973 } 9974 9975 return 0; 9976 } 9977 9978 static int check_bpf_snprintf_call(struct bpf_verifier_env *env, 9979 struct bpf_reg_state *regs) 9980 { 9981 struct bpf_reg_state *fmt_reg = ®s[BPF_REG_3]; 9982 struct bpf_reg_state *data_len_reg = ®s[BPF_REG_5]; 9983 struct bpf_map *fmt_map = fmt_reg->map_ptr; 9984 struct bpf_bprintf_data data = {}; 9985 int err, fmt_map_off, num_args; 9986 u64 fmt_addr; 9987 char *fmt; 9988 9989 /* data must be an array of u64 */ 9990 if (data_len_reg->var_off.value % 8) 9991 return -EINVAL; 9992 num_args = data_len_reg->var_off.value / 8; 9993 9994 /* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const 9995 * and map_direct_value_addr is set. 9996 */ 9997 fmt_map_off = fmt_reg->var_off.value; 9998 err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr, 9999 fmt_map_off); 10000 if (err) { 10001 verbose(env, "failed to retrieve map value address\n"); 10002 return -EFAULT; 10003 } 10004 fmt = (char *)(long)fmt_addr + fmt_map_off; 10005 10006 /* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we 10007 * can focus on validating the format specifiers. 10008 */ 10009 err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, num_args, &data); 10010 if (err < 0) 10011 verbose(env, "Invalid format string\n"); 10012 10013 return err; 10014 } 10015 10016 static int check_get_func_ip(struct bpf_verifier_env *env) 10017 { 10018 enum bpf_prog_type type = resolve_prog_type(env->prog); 10019 int func_id = BPF_FUNC_get_func_ip; 10020 10021 if (type == BPF_PROG_TYPE_TRACING) { 10022 if (!bpf_prog_has_trampoline(env->prog)) { 10023 verbose(env, "func %s#%d supported only for fentry/fexit/fsession/fmod_ret programs\n", 10024 func_id_name(func_id), func_id); 10025 return -ENOTSUPP; 10026 } 10027 return 0; 10028 } else if (type == BPF_PROG_TYPE_KPROBE) { 10029 return 0; 10030 } 10031 10032 verbose(env, "func %s#%d not supported for program type %d\n", 10033 func_id_name(func_id), func_id, type); 10034 return -ENOTSUPP; 10035 } 10036 10037 static struct bpf_insn_aux_data *cur_aux(const struct bpf_verifier_env *env) 10038 { 10039 return &env->insn_aux_data[env->insn_idx]; 10040 } 10041 10042 static bool loop_flag_is_zero(struct bpf_verifier_env *env) 10043 { 10044 struct bpf_reg_state *reg = reg_state(env, BPF_REG_4); 10045 bool reg_is_null = bpf_register_is_null(reg); 10046 10047 if (reg_is_null) 10048 mark_chain_precision(env, BPF_REG_4); 10049 10050 return reg_is_null; 10051 } 10052 10053 static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno) 10054 { 10055 struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state; 10056 10057 if (!state->initialized) { 10058 state->initialized = 1; 10059 state->fit_for_inline = loop_flag_is_zero(env); 10060 state->callback_subprogno = subprogno; 10061 return; 10062 } 10063 10064 if (!state->fit_for_inline) 10065 return; 10066 10067 state->fit_for_inline = (loop_flag_is_zero(env) && 10068 state->callback_subprogno == subprogno); 10069 } 10070 10071 /* Returns whether or not the given map can potentially elide 10072 * lookup return value nullness check. This is possible if the key 10073 * is statically known. 10074 */ 10075 static bool can_elide_value_nullness(const struct bpf_map *map) 10076 { 10077 if (map->map_flags & BPF_F_INNER_MAP) 10078 return false; 10079 10080 switch (map->map_type) { 10081 case BPF_MAP_TYPE_ARRAY: 10082 case BPF_MAP_TYPE_PERCPU_ARRAY: 10083 return true; 10084 default: 10085 return false; 10086 } 10087 } 10088 10089 int bpf_get_helper_proto(struct bpf_verifier_env *env, int func_id, 10090 const struct bpf_func_proto **ptr) 10091 { 10092 if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) 10093 return -ERANGE; 10094 10095 if (!env->ops->get_func_proto) 10096 return -EINVAL; 10097 10098 *ptr = env->ops->get_func_proto(func_id, env->prog); 10099 return *ptr && (*ptr)->func ? 0 : -EINVAL; 10100 } 10101 10102 /* Check if we're in a sleepable context. */ 10103 static inline bool in_sleepable_context(struct bpf_verifier_env *env) 10104 { 10105 return !env->cur_state->active_rcu_locks && 10106 !env->cur_state->active_preempt_locks && 10107 !env->cur_state->active_locks && 10108 !env->cur_state->active_irq_id && 10109 in_sleepable(env); 10110 } 10111 10112 static const char *non_sleepable_context_description(struct bpf_verifier_env *env) 10113 { 10114 if (env->cur_state->active_rcu_locks) 10115 return "rcu_read_lock region"; 10116 if (env->cur_state->active_preempt_locks) 10117 return "non-preemptible region"; 10118 if (env->cur_state->active_irq_id) 10119 return "IRQ-disabled region"; 10120 if (env->cur_state->active_locks) 10121 return "lock region"; 10122 return "non-sleepable prog"; 10123 } 10124 10125 static int release_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, 10126 bool convert_rcu, bool release_dynptr) 10127 { 10128 int err = -EINVAL; 10129 10130 if (bpf_register_is_null(reg)) 10131 return 0; 10132 10133 if (release_dynptr) 10134 err = unmark_stack_slots_dynptr(env, reg); 10135 else if (convert_rcu) 10136 err = ref_convert_alloc_rcu_protected(env, reg->id); 10137 else if (reg_is_referenced(env, reg)) 10138 err = release_reference(env, reg->id); 10139 10140 return err; 10141 } 10142 10143 static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn, 10144 int *insn_idx_p) 10145 { 10146 enum bpf_prog_type prog_type = resolve_prog_type(env->prog); 10147 bool returns_cpu_specific_alloc_ptr = false; 10148 const struct bpf_func_proto *fn = NULL; 10149 enum bpf_return_type ret_type; 10150 enum bpf_type_flag ret_flag; 10151 struct bpf_reg_state *regs; 10152 struct bpf_call_arg_meta meta; 10153 int insn_idx = *insn_idx_p; 10154 bool changes_data; 10155 int i, err, func_id; 10156 10157 /* find function prototype */ 10158 func_id = insn->imm; 10159 err = bpf_get_helper_proto(env, insn->imm, &fn); 10160 if (err == -ERANGE) { 10161 verbose(env, "invalid func %s#%d\n", func_id_name(func_id), func_id); 10162 return -EINVAL; 10163 } 10164 10165 if (err) { 10166 verbose(env, "program of this type cannot use helper %s#%d\n", 10167 func_id_name(func_id), func_id); 10168 return err; 10169 } 10170 10171 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 10172 if (!env->prog->gpl_compatible && fn->gpl_only) { 10173 verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n"); 10174 return -EINVAL; 10175 } 10176 10177 if (fn->allowed && !fn->allowed(env->prog)) { 10178 verbose(env, "helper call is not allowed in probe\n"); 10179 return -EINVAL; 10180 } 10181 10182 /* With LD_ABS/IND some JITs save/restore skb from r1. */ 10183 changes_data = bpf_helper_changes_pkt_data(func_id); 10184 if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) { 10185 verifier_bug(env, "func %s#%d: r1 != ctx", func_id_name(func_id), func_id); 10186 return -EFAULT; 10187 } 10188 10189 memset(&meta, 0, sizeof(meta)); 10190 meta.pkt_access = fn->pkt_access; 10191 10192 err = check_func_proto(fn, &meta); 10193 if (err) { 10194 verifier_bug(env, "incorrect func proto %s#%d", func_id_name(func_id), func_id); 10195 return err; 10196 } 10197 10198 if (fn->might_sleep && !in_sleepable_context(env)) { 10199 verbose(env, "sleepable helper %s#%d in %s\n", func_id_name(func_id), func_id, 10200 non_sleepable_context_description(env)); 10201 return -EINVAL; 10202 } 10203 10204 /* Track non-sleepable context for helpers. */ 10205 if (!in_sleepable_context(env)) 10206 env->insn_aux_data[insn_idx].non_sleepable = true; 10207 10208 meta.func_id = func_id; 10209 /* check args */ 10210 for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) { 10211 err = check_func_arg(env, i, &meta, fn, insn_idx); 10212 if (err) 10213 return err; 10214 } 10215 10216 err = record_func_map(env, &meta, func_id, insn_idx); 10217 if (err) 10218 return err; 10219 10220 err = record_func_key(env, &meta, func_id, insn_idx); 10221 if (err) 10222 return err; 10223 10224 regs = cur_regs(env); 10225 10226 /* Mark slots with STACK_MISC in case of raw mode, stack offset 10227 * is inferred from register state. 10228 */ 10229 for (i = 0; i < meta.access_size; i++) { 10230 err = check_mem_access(env, insn_idx, regs + meta.regno, argno_from_reg(meta.regno), i, BPF_B, 10231 BPF_WRITE, -1, false, false); 10232 if (err) 10233 return err; 10234 } 10235 10236 if (meta.release_regno) { 10237 struct bpf_reg_state *reg = ®s[meta.release_regno]; 10238 bool convert_rcu = (func_id == BPF_FUNC_kptr_xchg) && in_rcu_cs(env) && 10239 (reg->type & MEM_ALLOC) && (reg->type & MEM_PERCPU); 10240 10241 err = release_reg(env, reg, convert_rcu, !!meta.dynptr.id); 10242 if (err) 10243 return err; 10244 } 10245 10246 switch (func_id) { 10247 case BPF_FUNC_tail_call: 10248 err = check_resource_leak(env, false, true, "tail_call"); 10249 if (err) 10250 return err; 10251 break; 10252 case BPF_FUNC_get_local_storage: 10253 /* check that flags argument in get_local_storage(map, flags) is 0, 10254 * this is required because get_local_storage() can't return an error. 10255 */ 10256 if (!bpf_register_is_null(®s[BPF_REG_2])) { 10257 verbose(env, "get_local_storage() doesn't support non-zero flags\n"); 10258 return -EINVAL; 10259 } 10260 break; 10261 case BPF_FUNC_for_each_map_elem: 10262 err = push_callback_call(env, insn, insn_idx, meta.subprogno, 10263 set_map_elem_callback_state); 10264 break; 10265 case BPF_FUNC_timer_set_callback: 10266 err = push_callback_call(env, insn, insn_idx, meta.subprogno, 10267 set_timer_callback_state); 10268 break; 10269 case BPF_FUNC_find_vma: 10270 err = push_callback_call(env, insn, insn_idx, meta.subprogno, 10271 set_find_vma_callback_state); 10272 break; 10273 case BPF_FUNC_snprintf: 10274 err = check_bpf_snprintf_call(env, regs); 10275 break; 10276 case BPF_FUNC_loop: 10277 update_loop_inline_state(env, meta.subprogno); 10278 /* Verifier relies on R1 value to determine if bpf_loop() iteration 10279 * is finished, thus mark it precise. 10280 */ 10281 err = mark_chain_precision(env, BPF_REG_1); 10282 if (err) 10283 return err; 10284 if (cur_func(env)->callback_depth < reg_umax(®s[BPF_REG_1])) { 10285 err = push_callback_call(env, insn, insn_idx, meta.subprogno, 10286 set_loop_callback_state); 10287 } else { 10288 cur_func(env)->callback_depth = 0; 10289 if (env->log.level & BPF_LOG_LEVEL2) 10290 verbose(env, "frame%d bpf_loop iteration limit reached\n", 10291 env->cur_state->curframe); 10292 } 10293 break; 10294 case BPF_FUNC_dynptr_from_mem: 10295 if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) { 10296 verbose(env, "Unsupported reg type %s for bpf_dynptr_from_mem data\n", 10297 reg_type_str(env, regs[BPF_REG_1].type)); 10298 return -EACCES; 10299 } 10300 break; 10301 case BPF_FUNC_set_retval: 10302 { 10303 struct bpf_retval_range range = { 10304 .minval = -MAX_ERRNO, 10305 .maxval = 0, 10306 .return_32bit = true 10307 }; 10308 struct bpf_reg_state *r1 = ®s[BPF_REG_1]; 10309 10310 if (r1->type != SCALAR_VALUE) { 10311 verbose(env, "R1 is not a scalar\n"); 10312 return -EINVAL; 10313 } 10314 10315 /* CGROUP_GETSOCKOPT is allowed to return arbitrary value */ 10316 if (prog_type == BPF_PROG_TYPE_CGROUP_SOCKOPT && 10317 env->prog->expected_attach_type == BPF_CGROUP_GETSOCKOPT) 10318 break; 10319 10320 if (prog_type == BPF_PROG_TYPE_LSM && 10321 env->prog->expected_attach_type == BPF_LSM_CGROUP) { 10322 if (!env->prog->aux->attach_func_proto->type) { 10323 /* Make sure programs that attach to void 10324 * hooks don't try to modify return value. 10325 */ 10326 verbose(env, "BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n"); 10327 return -EINVAL; 10328 } 10329 bpf_lsm_get_retval_range(env->prog, &range); 10330 } 10331 10332 err = mark_chain_precision(env, BPF_REG_1); 10333 if (err) 10334 return err; 10335 10336 if (!retval_range_within(range, r1)) { 10337 verbose_invalid_scalar(env, r1, range, "At bpf_set_retval", "R1"); 10338 return -EINVAL; 10339 } 10340 10341 break; 10342 } 10343 case BPF_FUNC_dynptr_write: 10344 { 10345 enum bpf_dynptr_type dynptr_type = meta.dynptr.type; 10346 10347 if (dynptr_type == BPF_DYNPTR_TYPE_INVALID) 10348 return -EFAULT; 10349 10350 if (dynptr_type == BPF_DYNPTR_TYPE_SKB || 10351 dynptr_type == BPF_DYNPTR_TYPE_SKB_META) 10352 /* this will trigger clear_all_pkt_pointers(), which will 10353 * invalidate all dynptr slices associated with the skb 10354 */ 10355 changes_data = true; 10356 10357 break; 10358 } 10359 case BPF_FUNC_per_cpu_ptr: 10360 case BPF_FUNC_this_cpu_ptr: 10361 { 10362 struct bpf_reg_state *reg = ®s[BPF_REG_1]; 10363 const struct btf_type *type; 10364 10365 if (reg->type & MEM_RCU) { 10366 type = btf_type_by_id(reg->btf, reg->btf_id); 10367 if (!type || !btf_type_is_struct(type)) { 10368 verbose(env, "Helper has invalid btf/btf_id in R1\n"); 10369 return -EFAULT; 10370 } 10371 returns_cpu_specific_alloc_ptr = true; 10372 env->insn_aux_data[insn_idx].call_with_percpu_alloc_ptr = true; 10373 } 10374 break; 10375 } 10376 case BPF_FUNC_user_ringbuf_drain: 10377 err = push_callback_call(env, insn, insn_idx, meta.subprogno, 10378 set_user_ringbuf_callback_state); 10379 break; 10380 } 10381 10382 if (err) 10383 return err; 10384 10385 /* reset caller saved regs */ 10386 for (i = 0; i < CALLER_SAVED_REGS; i++) { 10387 bpf_mark_reg_not_init(env, ®s[caller_saved[i]]); 10388 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK); 10389 } 10390 invalidate_outgoing_stack_args(env, cur_func(env)); 10391 10392 /* helper call returns 64-bit value. */ 10393 regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG; 10394 10395 /* update return register (already marked as written above) */ 10396 ret_type = fn->ret_type; 10397 ret_flag = type_flag(ret_type); 10398 10399 switch (base_type(ret_type)) { 10400 case RET_INTEGER: 10401 /* sets type to SCALAR_VALUE */ 10402 mark_reg_unknown(env, regs, BPF_REG_0); 10403 break; 10404 case RET_VOID: 10405 regs[BPF_REG_0].type = NOT_INIT; 10406 break; 10407 case RET_PTR_TO_MAP_VALUE: 10408 /* There is no offset yet applied, variable or fixed */ 10409 mark_reg_known_zero(env, regs, BPF_REG_0); 10410 /* remember map_ptr, so that check_map_access() 10411 * can check 'value_size' boundary of memory access 10412 * to map element returned from bpf_map_lookup_elem() 10413 */ 10414 if (meta.map.ptr == NULL) { 10415 verifier_bug(env, "unexpected null map_ptr"); 10416 return -EFAULT; 10417 } 10418 10419 if (func_id == BPF_FUNC_map_lookup_elem && 10420 can_elide_value_nullness(meta.map.ptr) && 10421 meta.const_map_key >= 0 && 10422 meta.const_map_key < meta.map.ptr->max_entries) 10423 ret_flag &= ~PTR_MAYBE_NULL; 10424 10425 regs[BPF_REG_0].map_ptr = meta.map.ptr; 10426 regs[BPF_REG_0].map_uid = meta.map.uid; 10427 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag; 10428 if (!type_may_be_null(ret_flag) && 10429 btf_record_has_field(meta.map.ptr->record, BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK)) { 10430 regs[BPF_REG_0].id = ++env->id_gen; 10431 } 10432 break; 10433 case RET_PTR_TO_SOCKET: 10434 mark_reg_known_zero(env, regs, BPF_REG_0); 10435 regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag; 10436 break; 10437 case RET_PTR_TO_SOCK_COMMON: 10438 mark_reg_known_zero(env, regs, BPF_REG_0); 10439 regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag; 10440 break; 10441 case RET_PTR_TO_TCP_SOCK: 10442 mark_reg_known_zero(env, regs, BPF_REG_0); 10443 regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag; 10444 break; 10445 case RET_PTR_TO_MEM: 10446 mark_reg_known_zero(env, regs, BPF_REG_0); 10447 regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag; 10448 regs[BPF_REG_0].mem_size = meta.mem_size; 10449 break; 10450 case RET_PTR_TO_MEM_OR_BTF_ID: 10451 { 10452 const struct btf_type *t; 10453 10454 mark_reg_known_zero(env, regs, BPF_REG_0); 10455 t = btf_type_skip_modifiers(meta.ret_btf, meta.ret_btf_id, NULL); 10456 if (!btf_type_is_struct(t)) { 10457 u32 tsize; 10458 const struct btf_type *ret; 10459 const char *tname; 10460 10461 /* resolve the type size of ksym. */ 10462 ret = btf_resolve_size(meta.ret_btf, t, &tsize); 10463 if (IS_ERR(ret)) { 10464 tname = btf_name_by_offset(meta.ret_btf, t->name_off); 10465 verbose(env, "unable to resolve the size of type '%s': %ld\n", 10466 tname, PTR_ERR(ret)); 10467 return -EINVAL; 10468 } 10469 regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag; 10470 regs[BPF_REG_0].mem_size = tsize; 10471 } else { 10472 if (returns_cpu_specific_alloc_ptr) { 10473 regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC | MEM_RCU; 10474 } else { 10475 /* MEM_RDONLY may be carried from ret_flag, but it 10476 * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise 10477 * it will confuse the check of PTR_TO_BTF_ID in 10478 * check_mem_access(). 10479 */ 10480 ret_flag &= ~MEM_RDONLY; 10481 regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag; 10482 } 10483 10484 regs[BPF_REG_0].btf = meta.ret_btf; 10485 regs[BPF_REG_0].btf_id = meta.ret_btf_id; 10486 } 10487 break; 10488 } 10489 case RET_PTR_TO_BTF_ID: 10490 { 10491 struct btf *ret_btf; 10492 int ret_btf_id; 10493 10494 mark_reg_known_zero(env, regs, BPF_REG_0); 10495 regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag; 10496 if (func_id == BPF_FUNC_kptr_xchg) { 10497 ret_btf = meta.kptr_field->kptr.btf; 10498 ret_btf_id = meta.kptr_field->kptr.btf_id; 10499 if (!btf_is_kernel(ret_btf)) { 10500 regs[BPF_REG_0].type |= MEM_ALLOC; 10501 if (meta.kptr_field->type == BPF_KPTR_PERCPU) 10502 regs[BPF_REG_0].type |= MEM_PERCPU; 10503 } 10504 } else { 10505 if (fn->ret_btf_id == BPF_PTR_POISON) { 10506 verifier_bug(env, "func %s has non-overwritten BPF_PTR_POISON return type", 10507 func_id_name(func_id)); 10508 return -EFAULT; 10509 } 10510 ret_btf = btf_vmlinux; 10511 ret_btf_id = *fn->ret_btf_id; 10512 } 10513 if (ret_btf_id == 0) { 10514 verbose(env, "invalid return type %u of func %s#%d\n", 10515 base_type(ret_type), func_id_name(func_id), 10516 func_id); 10517 return -EINVAL; 10518 } 10519 regs[BPF_REG_0].btf = ret_btf; 10520 regs[BPF_REG_0].btf_id = ret_btf_id; 10521 break; 10522 } 10523 default: 10524 verbose(env, "unknown return type %u of func %s#%d\n", 10525 base_type(ret_type), func_id_name(func_id), func_id); 10526 return -EINVAL; 10527 } 10528 10529 if (type_may_be_null(regs[BPF_REG_0].type)) 10530 regs[BPF_REG_0].id = ++env->id_gen; 10531 10532 if (is_ptr_cast_function(func_id) && 10533 find_reference_state(env->cur_state, meta.ref_obj.id)) { 10534 struct bpf_verifier_state *branch; 10535 struct bpf_reg_state *r0; 10536 10537 err = validate_ref_obj(env, &meta.ref_obj); 10538 if (err) 10539 return err; 10540 10541 /* 10542 * In order for a release of any of the original or cast pointers 10543 * to invalidate all other pointers, reuse the same reference id for 10544 * the cast result. 10545 * This reference id can't be used for nullness propagation, 10546 * as cast might return NULL for a non-NULL input. 10547 * Hence, explore the NULL case as a separate branch. 10548 */ 10549 branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false); 10550 if (IS_ERR(branch)) 10551 return PTR_ERR(branch); 10552 10553 r0 = &branch->frame[branch->curframe]->regs[BPF_REG_0]; 10554 __mark_reg_known_zero(r0); 10555 r0->type = SCALAR_VALUE; 10556 10557 regs[BPF_REG_0].type &= ~PTR_MAYBE_NULL; 10558 regs[BPF_REG_0].id = meta.ref_obj.id; 10559 } else if (is_acquire_function(func_id, meta.map.ptr)) { 10560 int id = acquire_reference(env, insn_idx, 0); 10561 10562 if (id < 0) 10563 return id; 10564 10565 regs[BPF_REG_0].id = id; 10566 } 10567 10568 if (func_id == BPF_FUNC_dynptr_data) 10569 regs[BPF_REG_0].parent_id = meta.dynptr.id; 10570 10571 err = do_refine_retval_range(env, regs, fn->ret_type, func_id, &meta); 10572 if (err) 10573 return err; 10574 10575 err = check_map_func_compatibility(env, meta.map.ptr, func_id); 10576 if (err) 10577 return err; 10578 10579 if ((func_id == BPF_FUNC_get_stack || 10580 func_id == BPF_FUNC_get_task_stack) && 10581 !env->prog->has_callchain_buf) { 10582 const char *err_str; 10583 10584 #ifdef CONFIG_PERF_EVENTS 10585 err = get_callchain_buffers(sysctl_perf_event_max_stack); 10586 err_str = "cannot get callchain buffer for func %s#%d\n"; 10587 #else 10588 err = -ENOTSUPP; 10589 err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n"; 10590 #endif 10591 if (err) { 10592 verbose(env, err_str, func_id_name(func_id), func_id); 10593 return err; 10594 } 10595 10596 env->prog->has_callchain_buf = true; 10597 } 10598 10599 if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack) 10600 env->prog->call_get_stack = true; 10601 10602 if (func_id == BPF_FUNC_get_func_ip) { 10603 if (check_get_func_ip(env)) 10604 return -ENOTSUPP; 10605 env->prog->call_get_func_ip = true; 10606 } 10607 10608 if (func_id == BPF_FUNC_tail_call) { 10609 if (env->cur_state->curframe) { 10610 struct bpf_verifier_state *branch; 10611 10612 mark_reg_scratched(env, BPF_REG_0); 10613 branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false); 10614 if (IS_ERR(branch)) 10615 return PTR_ERR(branch); 10616 clear_all_pkt_pointers(env); 10617 mark_reg_unknown(env, regs, BPF_REG_0); 10618 err = prepare_func_exit(env, &env->insn_idx); 10619 if (err) 10620 return err; 10621 env->insn_idx--; 10622 } else { 10623 changes_data = false; 10624 } 10625 } 10626 10627 if (changes_data) 10628 clear_all_pkt_pointers(env); 10629 return 0; 10630 } 10631 10632 /* mark_btf_func_reg_size() is used when the reg size is determined by 10633 * the BTF func_proto's return value size and argument. 10634 */ 10635 static void __mark_btf_func_reg_size(struct bpf_verifier_env *env, struct bpf_reg_state *regs, 10636 u32 regno, size_t reg_size) 10637 { 10638 struct bpf_reg_state *reg = ®s[regno]; 10639 10640 if (regno == BPF_REG_0) { 10641 /* Function return value */ 10642 reg->subreg_def = reg_size == sizeof(u64) ? 10643 DEF_NOT_SUBREG : env->insn_idx + 1; 10644 } else if (reg_size == sizeof(u64)) { 10645 /* Function argument */ 10646 mark_insn_zext(env, reg); 10647 } 10648 } 10649 10650 static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno, 10651 size_t reg_size) 10652 { 10653 return __mark_btf_func_reg_size(env, cur_regs(env), regno, reg_size); 10654 } 10655 10656 static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta) 10657 { 10658 return meta->kfunc_flags & KF_ACQUIRE; 10659 } 10660 10661 static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta) 10662 { 10663 return meta->kfunc_flags & KF_RELEASE; 10664 } 10665 10666 static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta) 10667 { 10668 return meta->kfunc_flags & KF_DESTRUCTIVE; 10669 } 10670 10671 static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta) 10672 { 10673 return meta->kfunc_flags & KF_RCU; 10674 } 10675 10676 static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta) 10677 { 10678 return meta->kfunc_flags & KF_RCU_PROTECTED; 10679 } 10680 10681 static bool is_kfunc_arg_mem_size(const struct btf *btf, 10682 const struct btf_param *arg, 10683 const struct bpf_reg_state *reg) 10684 { 10685 const struct btf_type *t; 10686 10687 t = btf_type_skip_modifiers(btf, arg->type, NULL); 10688 if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE) 10689 return false; 10690 10691 return btf_param_match_suffix(btf, arg, "__sz"); 10692 } 10693 10694 static bool is_kfunc_arg_const_mem_size(const struct btf *btf, 10695 const struct btf_param *arg, 10696 const struct bpf_reg_state *reg) 10697 { 10698 const struct btf_type *t; 10699 10700 t = btf_type_skip_modifiers(btf, arg->type, NULL); 10701 if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE) 10702 return false; 10703 10704 return btf_param_match_suffix(btf, arg, "__szk"); 10705 } 10706 10707 static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg) 10708 { 10709 return btf_param_match_suffix(btf, arg, "__k"); 10710 } 10711 10712 static bool is_kfunc_arg_ignore(const struct btf *btf, const struct btf_param *arg) 10713 { 10714 return btf_param_match_suffix(btf, arg, "__ign"); 10715 } 10716 10717 static bool is_kfunc_arg_map(const struct btf *btf, const struct btf_param *arg) 10718 { 10719 return btf_param_match_suffix(btf, arg, "__map"); 10720 } 10721 10722 static bool is_kfunc_arg_alloc_obj(const struct btf *btf, const struct btf_param *arg) 10723 { 10724 return btf_param_match_suffix(btf, arg, "__alloc"); 10725 } 10726 10727 static bool is_kfunc_arg_uninit(const struct btf *btf, const struct btf_param *arg) 10728 { 10729 return btf_param_match_suffix(btf, arg, "__uninit"); 10730 } 10731 10732 static bool is_kfunc_arg_refcounted_kptr(const struct btf *btf, const struct btf_param *arg) 10733 { 10734 return btf_param_match_suffix(btf, arg, "__refcounted_kptr"); 10735 } 10736 10737 static bool is_kfunc_arg_nullable(const struct btf *btf, const struct btf_param *arg) 10738 { 10739 return btf_param_match_suffix(btf, arg, "__nullable"); 10740 } 10741 10742 static bool is_kfunc_arg_nonown_allowed(const struct btf *btf, const struct btf_param *arg) 10743 { 10744 return btf_param_match_suffix(btf, arg, "__nonown_allowed"); 10745 } 10746 10747 static bool is_kfunc_arg_const_str(const struct btf *btf, const struct btf_param *arg) 10748 { 10749 return btf_param_match_suffix(btf, arg, "__str"); 10750 } 10751 10752 static bool is_kfunc_arg_irq_flag(const struct btf *btf, const struct btf_param *arg) 10753 { 10754 return btf_param_match_suffix(btf, arg, "__irq_flag"); 10755 } 10756 10757 static bool is_kfunc_arg_scalar_with_name(const struct btf *btf, 10758 const struct btf_param *arg, 10759 const char *name) 10760 { 10761 int len, target_len = strlen(name); 10762 const char *param_name; 10763 10764 param_name = btf_name_by_offset(btf, arg->name_off); 10765 if (str_is_empty(param_name)) 10766 return false; 10767 len = strlen(param_name); 10768 if (len != target_len) 10769 return false; 10770 if (strcmp(param_name, name)) 10771 return false; 10772 10773 return true; 10774 } 10775 10776 enum { 10777 KF_ARG_DYNPTR_ID, 10778 KF_ARG_LIST_HEAD_ID, 10779 KF_ARG_LIST_NODE_ID, 10780 KF_ARG_RB_ROOT_ID, 10781 KF_ARG_RB_NODE_ID, 10782 KF_ARG_WORKQUEUE_ID, 10783 KF_ARG_RES_SPIN_LOCK_ID, 10784 KF_ARG_TASK_WORK_ID, 10785 KF_ARG_PROG_AUX_ID, 10786 KF_ARG_TIMER_ID 10787 }; 10788 10789 BTF_ID_LIST(kf_arg_btf_ids) 10790 BTF_ID(struct, bpf_dynptr) 10791 BTF_ID(struct, bpf_list_head) 10792 BTF_ID(struct, bpf_list_node) 10793 BTF_ID(struct, bpf_rb_root) 10794 BTF_ID(struct, bpf_rb_node) 10795 BTF_ID(struct, bpf_wq) 10796 BTF_ID(struct, bpf_res_spin_lock) 10797 BTF_ID(struct, bpf_task_work) 10798 BTF_ID(struct, bpf_prog_aux) 10799 BTF_ID(struct, bpf_timer) 10800 10801 static bool __is_kfunc_ptr_arg_type(const struct btf *btf, 10802 const struct btf_param *arg, int type) 10803 { 10804 const struct btf_type *t; 10805 u32 res_id; 10806 10807 t = btf_type_skip_modifiers(btf, arg->type, NULL); 10808 if (!t) 10809 return false; 10810 if (!btf_type_is_ptr(t)) 10811 return false; 10812 t = btf_type_skip_modifiers(btf, t->type, &res_id); 10813 if (!t) 10814 return false; 10815 return btf_types_are_same(btf, res_id, btf_vmlinux, kf_arg_btf_ids[type]); 10816 } 10817 10818 static bool is_kfunc_arg_dynptr(const struct btf *btf, const struct btf_param *arg) 10819 { 10820 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_DYNPTR_ID); 10821 } 10822 10823 static bool is_kfunc_arg_list_head(const struct btf *btf, const struct btf_param *arg) 10824 { 10825 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_HEAD_ID); 10826 } 10827 10828 static bool is_kfunc_arg_list_node(const struct btf *btf, const struct btf_param *arg) 10829 { 10830 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_NODE_ID); 10831 } 10832 10833 static bool is_kfunc_arg_rbtree_root(const struct btf *btf, const struct btf_param *arg) 10834 { 10835 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_ROOT_ID); 10836 } 10837 10838 static bool is_kfunc_arg_rbtree_node(const struct btf *btf, const struct btf_param *arg) 10839 { 10840 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_NODE_ID); 10841 } 10842 10843 static bool is_kfunc_arg_timer(const struct btf *btf, const struct btf_param *arg) 10844 { 10845 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_TIMER_ID); 10846 } 10847 10848 static bool is_kfunc_arg_wq(const struct btf *btf, const struct btf_param *arg) 10849 { 10850 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_WORKQUEUE_ID); 10851 } 10852 10853 static bool is_kfunc_arg_task_work(const struct btf *btf, const struct btf_param *arg) 10854 { 10855 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_TASK_WORK_ID); 10856 } 10857 10858 static bool is_kfunc_arg_res_spin_lock(const struct btf *btf, const struct btf_param *arg) 10859 { 10860 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RES_SPIN_LOCK_ID); 10861 } 10862 10863 static bool is_rbtree_node_type(const struct btf_type *t) 10864 { 10865 return t == btf_type_by_id(btf_vmlinux, kf_arg_btf_ids[KF_ARG_RB_NODE_ID]); 10866 } 10867 10868 static bool is_list_node_type(const struct btf_type *t) 10869 { 10870 return t == btf_type_by_id(btf_vmlinux, kf_arg_btf_ids[KF_ARG_LIST_NODE_ID]); 10871 } 10872 10873 static bool is_kfunc_arg_callback(struct bpf_verifier_env *env, const struct btf *btf, 10874 const struct btf_param *arg) 10875 { 10876 const struct btf_type *t; 10877 10878 t = btf_type_resolve_func_ptr(btf, arg->type, NULL); 10879 if (!t) 10880 return false; 10881 10882 return true; 10883 } 10884 10885 static bool is_kfunc_arg_prog_aux(const struct btf *btf, const struct btf_param *arg) 10886 { 10887 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_PROG_AUX_ID); 10888 } 10889 10890 /* 10891 * A kfunc with KF_IMPLICIT_ARGS has two prototypes in BTF: 10892 * - the _impl prototype with full arg list (meta->func_proto) 10893 * - the BPF API prototype w/o implicit args (func->type in BTF) 10894 * To determine whether an argument is implicit, we compare its position 10895 * against the number of arguments in the prototype w/o implicit args. 10896 */ 10897 static bool is_kfunc_arg_implicit(const struct bpf_kfunc_call_arg_meta *meta, u32 arg_idx) 10898 { 10899 const struct btf_type *func, *func_proto; 10900 u32 argn; 10901 10902 if (!(meta->kfunc_flags & KF_IMPLICIT_ARGS)) 10903 return false; 10904 10905 func = btf_type_by_id(meta->btf, meta->func_id); 10906 func_proto = btf_type_by_id(meta->btf, func->type); 10907 argn = btf_type_vlen(func_proto); 10908 10909 return argn <= arg_idx; 10910 } 10911 10912 /* Returns true if struct is composed of scalars, 4 levels of nesting allowed */ 10913 static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env, 10914 const struct btf *btf, 10915 const struct btf_type *t, int rec) 10916 { 10917 const struct btf_type *member_type; 10918 const struct btf_member *member; 10919 u32 i; 10920 10921 if (!btf_type_is_struct(t)) 10922 return false; 10923 10924 for_each_member(i, t, member) { 10925 const struct btf_array *array; 10926 10927 member_type = btf_type_skip_modifiers(btf, member->type, NULL); 10928 if (btf_type_is_struct(member_type)) { 10929 if (rec >= 3) { 10930 verbose(env, "max struct nesting depth exceeded\n"); 10931 return false; 10932 } 10933 if (!__btf_type_is_scalar_struct(env, btf, member_type, rec + 1)) 10934 return false; 10935 continue; 10936 } 10937 if (btf_type_is_array(member_type)) { 10938 array = btf_array(member_type); 10939 if (!array->nelems) 10940 return false; 10941 member_type = btf_type_skip_modifiers(btf, array->type, NULL); 10942 if (!btf_type_is_scalar(member_type)) 10943 return false; 10944 continue; 10945 } 10946 if (!btf_type_is_scalar(member_type)) 10947 return false; 10948 } 10949 return true; 10950 } 10951 10952 enum kfunc_ptr_arg_type { 10953 KF_ARG_PTR_TO_CTX, 10954 KF_ARG_PTR_TO_ALLOC_BTF_ID, /* Allocated object */ 10955 KF_ARG_PTR_TO_REFCOUNTED_KPTR, /* Refcounted local kptr */ 10956 KF_ARG_PTR_TO_DYNPTR, 10957 KF_ARG_PTR_TO_ITER, 10958 KF_ARG_PTR_TO_LIST_HEAD, 10959 KF_ARG_PTR_TO_LIST_NODE, 10960 KF_ARG_PTR_TO_BTF_ID, /* Also covers reg2btf_ids conversions */ 10961 KF_ARG_PTR_TO_MEM, 10962 KF_ARG_PTR_TO_MEM_SIZE, /* Size derived from next argument, skip it */ 10963 KF_ARG_PTR_TO_CALLBACK, 10964 KF_ARG_PTR_TO_RB_ROOT, 10965 KF_ARG_PTR_TO_RB_NODE, 10966 KF_ARG_PTR_TO_NULL, 10967 KF_ARG_PTR_TO_CONST_STR, 10968 KF_ARG_PTR_TO_MAP, 10969 KF_ARG_PTR_TO_TIMER, 10970 KF_ARG_PTR_TO_WORKQUEUE, 10971 KF_ARG_PTR_TO_IRQ_FLAG, 10972 KF_ARG_PTR_TO_RES_SPIN_LOCK, 10973 KF_ARG_PTR_TO_TASK_WORK, 10974 }; 10975 10976 enum special_kfunc_type { 10977 KF_bpf_obj_new_impl, 10978 KF_bpf_obj_new, 10979 KF_bpf_obj_drop_impl, 10980 KF_bpf_obj_drop, 10981 KF_bpf_refcount_acquire_impl, 10982 KF_bpf_refcount_acquire, 10983 KF_bpf_list_push_front_impl, 10984 KF_bpf_list_push_front, 10985 KF_bpf_list_push_back_impl, 10986 KF_bpf_list_push_back, 10987 KF_bpf_list_add, 10988 KF_bpf_list_pop_front, 10989 KF_bpf_list_pop_back, 10990 KF_bpf_list_del, 10991 KF_bpf_list_front, 10992 KF_bpf_list_back, 10993 KF_bpf_list_is_first, 10994 KF_bpf_list_is_last, 10995 KF_bpf_list_empty, 10996 KF_bpf_cast_to_kern_ctx, 10997 KF_bpf_rdonly_cast, 10998 KF_bpf_rcu_read_lock, 10999 KF_bpf_rcu_read_unlock, 11000 KF_bpf_rbtree_remove, 11001 KF_bpf_rbtree_add_impl, 11002 KF_bpf_rbtree_add, 11003 KF_bpf_rbtree_first, 11004 KF_bpf_rbtree_root, 11005 KF_bpf_rbtree_left, 11006 KF_bpf_rbtree_right, 11007 KF_bpf_dynptr_from_skb, 11008 KF_bpf_dynptr_from_xdp, 11009 KF_bpf_dynptr_from_skb_meta, 11010 KF_bpf_xdp_pull_data, 11011 KF_bpf_dynptr_slice, 11012 KF_bpf_dynptr_slice_rdwr, 11013 KF_bpf_dynptr_clone, 11014 KF_bpf_percpu_obj_new_impl, 11015 KF_bpf_percpu_obj_new, 11016 KF_bpf_percpu_obj_drop_impl, 11017 KF_bpf_percpu_obj_drop, 11018 KF_bpf_throw, 11019 KF_bpf_wq_set_callback, 11020 KF_bpf_preempt_disable, 11021 KF_bpf_preempt_enable, 11022 KF_bpf_iter_css_task_new, 11023 KF_bpf_session_cookie, 11024 KF_bpf_get_kmem_cache, 11025 KF_bpf_local_irq_save, 11026 KF_bpf_local_irq_restore, 11027 KF_bpf_iter_num_new, 11028 KF_bpf_iter_num_next, 11029 KF_bpf_iter_num_destroy, 11030 KF_bpf_set_dentry_xattr, 11031 KF_bpf_remove_dentry_xattr, 11032 KF_bpf_res_spin_lock, 11033 KF_bpf_res_spin_unlock, 11034 KF_bpf_res_spin_lock_irqsave, 11035 KF_bpf_res_spin_unlock_irqrestore, 11036 KF_bpf_dynptr_from_file, 11037 KF_bpf_dynptr_file_discard, 11038 KF___bpf_trap, 11039 KF_bpf_task_work_schedule_signal, 11040 KF_bpf_task_work_schedule_resume, 11041 KF_bpf_arena_alloc_pages, 11042 KF_bpf_arena_free_pages, 11043 KF_bpf_arena_reserve_pages, 11044 KF_bpf_session_is_return, 11045 KF_bpf_stream_vprintk, 11046 KF_bpf_stream_print_stack, 11047 }; 11048 11049 BTF_ID_LIST(special_kfunc_list) 11050 BTF_ID(func, bpf_obj_new_impl) 11051 BTF_ID(func, bpf_obj_new) 11052 BTF_ID(func, bpf_obj_drop_impl) 11053 BTF_ID(func, bpf_obj_drop) 11054 BTF_ID(func, bpf_refcount_acquire_impl) 11055 BTF_ID(func, bpf_refcount_acquire) 11056 BTF_ID(func, bpf_list_push_front_impl) 11057 BTF_ID(func, bpf_list_push_front) 11058 BTF_ID(func, bpf_list_push_back_impl) 11059 BTF_ID(func, bpf_list_push_back) 11060 BTF_ID(func, bpf_list_add) 11061 BTF_ID(func, bpf_list_pop_front) 11062 BTF_ID(func, bpf_list_pop_back) 11063 BTF_ID(func, bpf_list_del) 11064 BTF_ID(func, bpf_list_front) 11065 BTF_ID(func, bpf_list_back) 11066 BTF_ID(func, bpf_list_is_first) 11067 BTF_ID(func, bpf_list_is_last) 11068 BTF_ID(func, bpf_list_empty) 11069 BTF_ID(func, bpf_cast_to_kern_ctx) 11070 BTF_ID(func, bpf_rdonly_cast) 11071 BTF_ID(func, bpf_rcu_read_lock) 11072 BTF_ID(func, bpf_rcu_read_unlock) 11073 BTF_ID(func, bpf_rbtree_remove) 11074 BTF_ID(func, bpf_rbtree_add_impl) 11075 BTF_ID(func, bpf_rbtree_add) 11076 BTF_ID(func, bpf_rbtree_first) 11077 BTF_ID(func, bpf_rbtree_root) 11078 BTF_ID(func, bpf_rbtree_left) 11079 BTF_ID(func, bpf_rbtree_right) 11080 #ifdef CONFIG_NET 11081 BTF_ID(func, bpf_dynptr_from_skb) 11082 BTF_ID(func, bpf_dynptr_from_xdp) 11083 BTF_ID(func, bpf_dynptr_from_skb_meta) 11084 BTF_ID(func, bpf_xdp_pull_data) 11085 #else 11086 BTF_ID_UNUSED 11087 BTF_ID_UNUSED 11088 BTF_ID_UNUSED 11089 BTF_ID_UNUSED 11090 #endif 11091 BTF_ID(func, bpf_dynptr_slice) 11092 BTF_ID(func, bpf_dynptr_slice_rdwr) 11093 BTF_ID(func, bpf_dynptr_clone) 11094 BTF_ID(func, bpf_percpu_obj_new_impl) 11095 BTF_ID(func, bpf_percpu_obj_new) 11096 BTF_ID(func, bpf_percpu_obj_drop_impl) 11097 BTF_ID(func, bpf_percpu_obj_drop) 11098 BTF_ID(func, bpf_throw) 11099 BTF_ID(func, bpf_wq_set_callback) 11100 BTF_ID(func, bpf_preempt_disable) 11101 BTF_ID(func, bpf_preempt_enable) 11102 #ifdef CONFIG_CGROUPS 11103 BTF_ID(func, bpf_iter_css_task_new) 11104 #else 11105 BTF_ID_UNUSED 11106 #endif 11107 #ifdef CONFIG_BPF_EVENTS 11108 BTF_ID(func, bpf_session_cookie) 11109 #else 11110 BTF_ID_UNUSED 11111 #endif 11112 BTF_ID(func, bpf_get_kmem_cache) 11113 BTF_ID(func, bpf_local_irq_save) 11114 BTF_ID(func, bpf_local_irq_restore) 11115 BTF_ID(func, bpf_iter_num_new) 11116 BTF_ID(func, bpf_iter_num_next) 11117 BTF_ID(func, bpf_iter_num_destroy) 11118 #ifdef CONFIG_BPF_LSM 11119 BTF_ID(func, bpf_set_dentry_xattr) 11120 BTF_ID(func, bpf_remove_dentry_xattr) 11121 #else 11122 BTF_ID_UNUSED 11123 BTF_ID_UNUSED 11124 #endif 11125 BTF_ID(func, bpf_res_spin_lock) 11126 BTF_ID(func, bpf_res_spin_unlock) 11127 BTF_ID(func, bpf_res_spin_lock_irqsave) 11128 BTF_ID(func, bpf_res_spin_unlock_irqrestore) 11129 BTF_ID(func, bpf_dynptr_from_file) 11130 BTF_ID(func, bpf_dynptr_file_discard) 11131 BTF_ID(func, __bpf_trap) 11132 BTF_ID(func, bpf_task_work_schedule_signal) 11133 BTF_ID(func, bpf_task_work_schedule_resume) 11134 BTF_ID(func, bpf_arena_alloc_pages) 11135 BTF_ID(func, bpf_arena_free_pages) 11136 BTF_ID(func, bpf_arena_reserve_pages) 11137 #ifdef CONFIG_BPF_EVENTS 11138 BTF_ID(func, bpf_session_is_return) 11139 #else 11140 BTF_ID_UNUSED 11141 #endif 11142 BTF_ID(func, bpf_stream_vprintk) 11143 BTF_ID(func, bpf_stream_print_stack) 11144 11145 static bool is_bpf_obj_new_kfunc(u32 func_id) 11146 { 11147 return func_id == special_kfunc_list[KF_bpf_obj_new] || 11148 func_id == special_kfunc_list[KF_bpf_obj_new_impl]; 11149 } 11150 11151 static bool is_bpf_percpu_obj_new_kfunc(u32 func_id) 11152 { 11153 return func_id == special_kfunc_list[KF_bpf_percpu_obj_new] || 11154 func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]; 11155 } 11156 11157 static bool is_bpf_obj_drop_kfunc(u32 func_id) 11158 { 11159 return func_id == special_kfunc_list[KF_bpf_obj_drop] || 11160 func_id == special_kfunc_list[KF_bpf_obj_drop_impl]; 11161 } 11162 11163 static bool is_bpf_percpu_obj_drop_kfunc(u32 func_id) 11164 { 11165 return func_id == special_kfunc_list[KF_bpf_percpu_obj_drop] || 11166 func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl]; 11167 } 11168 11169 static bool is_bpf_refcount_acquire_kfunc(u32 func_id) 11170 { 11171 return func_id == special_kfunc_list[KF_bpf_refcount_acquire] || 11172 func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]; 11173 } 11174 11175 static bool is_bpf_list_push_kfunc(u32 func_id) 11176 { 11177 return func_id == special_kfunc_list[KF_bpf_list_push_front] || 11178 func_id == special_kfunc_list[KF_bpf_list_push_front_impl] || 11179 func_id == special_kfunc_list[KF_bpf_list_push_back] || 11180 func_id == special_kfunc_list[KF_bpf_list_push_back_impl] || 11181 func_id == special_kfunc_list[KF_bpf_list_add]; 11182 } 11183 11184 static bool is_bpf_rbtree_add_kfunc(u32 func_id) 11185 { 11186 return func_id == special_kfunc_list[KF_bpf_rbtree_add] || 11187 func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]; 11188 } 11189 11190 static bool is_task_work_add_kfunc(u32 func_id) 11191 { 11192 return func_id == special_kfunc_list[KF_bpf_task_work_schedule_signal] || 11193 func_id == special_kfunc_list[KF_bpf_task_work_schedule_resume]; 11194 } 11195 11196 static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) 11197 { 11198 if (is_bpf_refcount_acquire_kfunc(meta->func_id) && meta->arg_owning_ref) 11199 return false; 11200 11201 return meta->kfunc_flags & KF_RET_NULL; 11202 } 11203 11204 static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta) 11205 { 11206 return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_lock]; 11207 } 11208 11209 static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta *meta) 11210 { 11211 return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_unlock]; 11212 } 11213 11214 static bool is_kfunc_bpf_preempt_disable(struct bpf_kfunc_call_arg_meta *meta) 11215 { 11216 return meta->func_id == special_kfunc_list[KF_bpf_preempt_disable]; 11217 } 11218 11219 static bool is_kfunc_bpf_preempt_enable(struct bpf_kfunc_call_arg_meta *meta) 11220 { 11221 return meta->func_id == special_kfunc_list[KF_bpf_preempt_enable]; 11222 } 11223 11224 bool bpf_is_kfunc_pkt_changing(struct bpf_kfunc_call_arg_meta *meta) 11225 { 11226 return meta->func_id == special_kfunc_list[KF_bpf_xdp_pull_data]; 11227 } 11228 11229 static enum kfunc_ptr_arg_type 11230 get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, struct bpf_func_state *caller, 11231 struct bpf_reg_state *regs, struct bpf_kfunc_call_arg_meta *meta, 11232 const struct btf_type *t, const struct btf_type *ref_t, 11233 const char *ref_tname, const struct btf_param *args, 11234 int arg, int nargs, argno_t argno, struct bpf_reg_state *reg) 11235 { 11236 bool arg_mem_size = false; 11237 11238 if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] || 11239 meta->func_id == special_kfunc_list[KF_bpf_session_is_return] || 11240 meta->func_id == special_kfunc_list[KF_bpf_session_cookie]) 11241 return KF_ARG_PTR_TO_CTX; 11242 11243 if (arg + 1 < nargs && 11244 (is_kfunc_arg_mem_size(meta->btf, &args[arg + 1], get_func_arg_reg(caller, regs, arg + 1)) || 11245 is_kfunc_arg_const_mem_size(meta->btf, &args[arg + 1], get_func_arg_reg(caller, regs, arg + 1)))) 11246 arg_mem_size = true; 11247 11248 /* In this function, we verify the kfunc's BTF as per the argument type, 11249 * leaving the rest of the verification with respect to the register 11250 * type to our caller. When a set of conditions hold in the BTF type of 11251 * arguments, we resolve it to a known kfunc_ptr_arg_type. 11252 */ 11253 if (btf_is_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), arg)) 11254 return KF_ARG_PTR_TO_CTX; 11255 11256 if (is_kfunc_arg_nullable(meta->btf, &args[arg]) && bpf_register_is_null(reg) && 11257 !arg_mem_size) 11258 return KF_ARG_PTR_TO_NULL; 11259 11260 if (is_kfunc_arg_alloc_obj(meta->btf, &args[arg])) 11261 return KF_ARG_PTR_TO_ALLOC_BTF_ID; 11262 11263 if (is_kfunc_arg_refcounted_kptr(meta->btf, &args[arg])) 11264 return KF_ARG_PTR_TO_REFCOUNTED_KPTR; 11265 11266 if (is_kfunc_arg_dynptr(meta->btf, &args[arg])) 11267 return KF_ARG_PTR_TO_DYNPTR; 11268 11269 if (is_kfunc_arg_iter(meta, arg, &args[arg])) 11270 return KF_ARG_PTR_TO_ITER; 11271 11272 if (is_kfunc_arg_list_head(meta->btf, &args[arg])) 11273 return KF_ARG_PTR_TO_LIST_HEAD; 11274 11275 if (is_kfunc_arg_list_node(meta->btf, &args[arg])) 11276 return KF_ARG_PTR_TO_LIST_NODE; 11277 11278 if (is_kfunc_arg_rbtree_root(meta->btf, &args[arg])) 11279 return KF_ARG_PTR_TO_RB_ROOT; 11280 11281 if (is_kfunc_arg_rbtree_node(meta->btf, &args[arg])) 11282 return KF_ARG_PTR_TO_RB_NODE; 11283 11284 if (is_kfunc_arg_const_str(meta->btf, &args[arg])) 11285 return KF_ARG_PTR_TO_CONST_STR; 11286 11287 if (is_kfunc_arg_map(meta->btf, &args[arg])) 11288 return KF_ARG_PTR_TO_MAP; 11289 11290 if (is_kfunc_arg_wq(meta->btf, &args[arg])) 11291 return KF_ARG_PTR_TO_WORKQUEUE; 11292 11293 if (is_kfunc_arg_timer(meta->btf, &args[arg])) 11294 return KF_ARG_PTR_TO_TIMER; 11295 11296 if (is_kfunc_arg_task_work(meta->btf, &args[arg])) 11297 return KF_ARG_PTR_TO_TASK_WORK; 11298 11299 if (is_kfunc_arg_irq_flag(meta->btf, &args[arg])) 11300 return KF_ARG_PTR_TO_IRQ_FLAG; 11301 11302 if (is_kfunc_arg_res_spin_lock(meta->btf, &args[arg])) 11303 return KF_ARG_PTR_TO_RES_SPIN_LOCK; 11304 11305 if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) { 11306 if (!btf_type_is_struct(ref_t)) { 11307 verbose(env, "kernel function %s %s pointer type %s %s is not supported\n", 11308 meta->func_name, reg_arg_name(env, argno), 11309 btf_type_str(ref_t), ref_tname); 11310 return -EINVAL; 11311 } 11312 return KF_ARG_PTR_TO_BTF_ID; 11313 } 11314 11315 if (is_kfunc_arg_callback(env, meta->btf, &args[arg])) 11316 return KF_ARG_PTR_TO_CALLBACK; 11317 11318 /* This is the catch all argument type of register types supported by 11319 * check_helper_mem_access. However, we only allow when argument type is 11320 * pointer to scalar, or struct composed (recursively) of scalars. When 11321 * arg_mem_size is true, the pointer can be void *. 11322 */ 11323 if (!btf_type_is_scalar(ref_t) && !__btf_type_is_scalar_struct(env, meta->btf, ref_t, 0) && 11324 (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) { 11325 verbose(env, "%s pointer type %s %s must point to %sscalar, or struct with scalar\n", 11326 reg_arg_name(env, argno), 11327 btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : ""); 11328 return -EINVAL; 11329 } 11330 return arg_mem_size ? KF_ARG_PTR_TO_MEM_SIZE : KF_ARG_PTR_TO_MEM; 11331 } 11332 11333 static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env, 11334 struct bpf_reg_state *reg, 11335 const struct btf_type *ref_t, 11336 const char *ref_tname, u32 ref_id, 11337 struct bpf_kfunc_call_arg_meta *meta, 11338 int arg, argno_t argno) 11339 { 11340 const struct btf_type *reg_ref_t; 11341 bool strict_type_match = false; 11342 const struct btf *reg_btf; 11343 const char *reg_ref_tname; 11344 bool taking_projection; 11345 bool struct_same; 11346 u32 reg_ref_id; 11347 11348 if (base_type(reg->type) == PTR_TO_BTF_ID) { 11349 reg_btf = reg->btf; 11350 reg_ref_id = reg->btf_id; 11351 } else { 11352 reg_btf = btf_vmlinux; 11353 reg_ref_id = *reg2btf_ids[base_type(reg->type)]; 11354 } 11355 11356 /* Enforce strict type matching for calls to kfuncs that are acquiring 11357 * or releasing a reference, or are no-cast aliases. We do _not_ 11358 * enforce strict matching for kfuncs by default, 11359 * as we want to enable BPF programs to pass types that are bitwise 11360 * equivalent without forcing them to explicitly cast with something 11361 * like bpf_cast_to_kern_ctx(). 11362 * 11363 * For example, say we had a type like the following: 11364 * 11365 * struct bpf_cpumask { 11366 * cpumask_t cpumask; 11367 * refcount_t usage; 11368 * }; 11369 * 11370 * Note that as specified in <linux/cpumask.h>, cpumask_t is typedef'ed 11371 * to a struct cpumask, so it would be safe to pass a struct 11372 * bpf_cpumask * to a kfunc expecting a struct cpumask *. 11373 * 11374 * The philosophy here is similar to how we allow scalars of different 11375 * types to be passed to kfuncs as long as the size is the same. The 11376 * only difference here is that we're simply allowing 11377 * btf_struct_ids_match() to walk the struct at the 0th offset, and 11378 * resolve types. 11379 */ 11380 if ((is_kfunc_release(meta) && reg_is_referenced(env, reg)) || 11381 btf_type_ids_nocast_alias(&env->log, reg_btf, reg_ref_id, meta->btf, ref_id)) 11382 strict_type_match = true; 11383 11384 WARN_ON_ONCE(is_kfunc_release(meta) && !tnum_is_const(reg->var_off)); 11385 11386 reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, ®_ref_id); 11387 reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off); 11388 struct_same = btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->var_off.value, 11389 meta->btf, ref_id, strict_type_match); 11390 /* If kfunc is accepting a projection type (ie. __sk_buff), it cannot 11391 * actually use it -- it must cast to the underlying type. So we allow 11392 * caller to pass in the underlying type. 11393 */ 11394 taking_projection = btf_is_projection_of(ref_tname, reg_ref_tname); 11395 if (!taking_projection && !struct_same) { 11396 verbose(env, "kernel function %s %s expected pointer to %s %s but %s has a pointer to %s %s\n", 11397 meta->func_name, reg_arg_name(env, argno), 11398 btf_type_str(ref_t), ref_tname, reg_arg_name(env, argno), 11399 btf_type_str(reg_ref_t), reg_ref_tname); 11400 return -EINVAL; 11401 } 11402 return 0; 11403 } 11404 11405 static int process_irq_flag(struct bpf_verifier_env *env, struct bpf_reg_state *reg, argno_t argno, 11406 struct bpf_kfunc_call_arg_meta *meta) 11407 { 11408 int err, spi, kfunc_class = IRQ_NATIVE_KFUNC; 11409 bool irq_save; 11410 11411 if (meta->func_id == special_kfunc_list[KF_bpf_local_irq_save] || 11412 meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave]) { 11413 irq_save = true; 11414 if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave]) 11415 kfunc_class = IRQ_LOCK_KFUNC; 11416 } else if (meta->func_id == special_kfunc_list[KF_bpf_local_irq_restore] || 11417 meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore]) { 11418 irq_save = false; 11419 if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore]) 11420 kfunc_class = IRQ_LOCK_KFUNC; 11421 } else { 11422 verifier_bug(env, "unknown irq flags kfunc"); 11423 return -EFAULT; 11424 } 11425 11426 if (irq_save) { 11427 if (!is_irq_flag_reg_valid_uninit(env, reg)) { 11428 verbose(env, "expected uninitialized irq flag as %s\n", 11429 reg_arg_name(env, argno)); 11430 return -EINVAL; 11431 } 11432 11433 err = check_mem_access(env, env->insn_idx, reg, argno, 0, BPF_DW, 11434 BPF_WRITE, -1, false, false); 11435 if (err) 11436 return err; 11437 11438 err = mark_stack_slot_irq_flag(env, meta, reg, env->insn_idx, kfunc_class); 11439 if (err) 11440 return err; 11441 } else { 11442 err = is_irq_flag_reg_valid_init(env, reg); 11443 if (err) { 11444 verbose(env, "expected an initialized irq flag as %s\n", 11445 reg_arg_name(env, argno)); 11446 return err; 11447 } 11448 11449 spi = irq_flag_get_spi(env, reg); 11450 if (spi < 0) 11451 return spi; 11452 11453 mark_stack_slots_scratched(env, spi, 1); 11454 11455 err = unmark_stack_slot_irq_flag(env, reg, kfunc_class); 11456 if (err) 11457 return err; 11458 } 11459 return 0; 11460 } 11461 11462 11463 static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg) 11464 { 11465 struct btf_record *rec = reg_btf_record(reg); 11466 11467 if (!env->cur_state->active_locks) { 11468 verifier_bug(env, "%s w/o active lock", __func__); 11469 return -EFAULT; 11470 } 11471 11472 if (type_flag(reg->type) & NON_OWN_REF) { 11473 verifier_bug(env, "NON_OWN_REF already set"); 11474 return -EFAULT; 11475 } 11476 11477 reg->type |= NON_OWN_REF; 11478 if (rec->refcount_off >= 0) 11479 reg->type |= MEM_RCU; 11480 11481 return 0; 11482 } 11483 11484 static void ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 id) 11485 { 11486 struct bpf_func_state *unused; 11487 struct bpf_reg_state *reg; 11488 11489 WARN_ON_ONCE(release_reference_nomark(env->cur_state, id)); 11490 11491 bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({ 11492 if (reg->id == id) { 11493 reg->id = 0; 11494 ref_set_non_owning(env, reg); 11495 } 11496 })); 11497 11498 return; 11499 } 11500 11501 /* Implementation details: 11502 * 11503 * Each register points to some region of memory, which we define as an 11504 * allocation. Each allocation may embed a bpf_spin_lock which protects any 11505 * special BPF objects (bpf_list_head, bpf_rb_root, etc.) part of the same 11506 * allocation. The lock and the data it protects are colocated in the same 11507 * memory region. 11508 * 11509 * Hence, everytime a register holds a pointer value pointing to such 11510 * allocation, the verifier preserves a unique reg->id for it. 11511 * 11512 * The verifier remembers the lock 'ptr' and the lock 'id' whenever 11513 * bpf_spin_lock is called. 11514 * 11515 * To enable this, lock state in the verifier captures two values: 11516 * active_lock.ptr = Register's type specific pointer 11517 * active_lock.id = A unique ID for each register pointer value 11518 * 11519 * Currently, PTR_TO_MAP_VALUE and PTR_TO_BTF_ID | MEM_ALLOC are the two 11520 * supported register types. 11521 * 11522 * The active_lock.ptr in case of map values is the reg->map_ptr, and in case of 11523 * allocated objects is the reg->btf pointer. 11524 * 11525 * The active_lock.id is non-unique for maps supporting direct_value_addr, as we 11526 * can establish the provenance of the map value statically for each distinct 11527 * lookup into such maps. They always contain a single map value hence unique 11528 * IDs for each pseudo load pessimizes the algorithm and rejects valid programs. 11529 * 11530 * So, in case of global variables, they use array maps with max_entries = 1, 11531 * hence their active_lock.ptr becomes map_ptr and id = 0 (since they all point 11532 * into the same map value as max_entries is 1, as described above). 11533 * 11534 * In case of inner map lookups, the inner map pointer has same map_ptr as the 11535 * outer map pointer (in verifier context), but each lookup into an inner map 11536 * assigns a fresh reg->id to the lookup, so while lookups into distinct inner 11537 * maps from the same outer map share the same map_ptr as active_lock.ptr, they 11538 * will get different reg->id assigned to each lookup, hence different 11539 * active_lock.id. 11540 * 11541 * In case of allocated objects, active_lock.ptr is the reg->btf, and the 11542 * reg->id is a unique ID preserved after the NULL pointer check on the pointer 11543 * returned from bpf_obj_new. Each allocation receives a new reg->id. 11544 */ 11545 static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_reg_state *reg) 11546 { 11547 struct bpf_reference_state *s; 11548 void *ptr; 11549 u32 id; 11550 11551 switch ((int)reg->type) { 11552 case PTR_TO_MAP_VALUE: 11553 ptr = reg->map_ptr; 11554 break; 11555 case PTR_TO_BTF_ID | MEM_ALLOC: 11556 ptr = reg->btf; 11557 break; 11558 default: 11559 verifier_bug(env, "unknown reg type for lock check"); 11560 return -EFAULT; 11561 } 11562 id = reg->id; 11563 11564 if (!env->cur_state->active_locks) 11565 return -EINVAL; 11566 s = find_lock_state(env->cur_state, REF_TYPE_LOCK_MASK, id, ptr); 11567 if (!s) { 11568 verbose(env, "held lock and object are not in the same allocation\n"); 11569 return -EINVAL; 11570 } 11571 return 0; 11572 } 11573 11574 static bool is_bpf_list_api_kfunc(u32 btf_id) 11575 { 11576 return is_bpf_list_push_kfunc(btf_id) || 11577 btf_id == special_kfunc_list[KF_bpf_list_pop_front] || 11578 btf_id == special_kfunc_list[KF_bpf_list_pop_back] || 11579 btf_id == special_kfunc_list[KF_bpf_list_del] || 11580 btf_id == special_kfunc_list[KF_bpf_list_front] || 11581 btf_id == special_kfunc_list[KF_bpf_list_back] || 11582 btf_id == special_kfunc_list[KF_bpf_list_is_first] || 11583 btf_id == special_kfunc_list[KF_bpf_list_is_last] || 11584 btf_id == special_kfunc_list[KF_bpf_list_empty]; 11585 } 11586 11587 static bool is_bpf_rbtree_api_kfunc(u32 btf_id) 11588 { 11589 return is_bpf_rbtree_add_kfunc(btf_id) || 11590 btf_id == special_kfunc_list[KF_bpf_rbtree_remove] || 11591 btf_id == special_kfunc_list[KF_bpf_rbtree_first] || 11592 btf_id == special_kfunc_list[KF_bpf_rbtree_root] || 11593 btf_id == special_kfunc_list[KF_bpf_rbtree_left] || 11594 btf_id == special_kfunc_list[KF_bpf_rbtree_right]; 11595 } 11596 11597 static bool is_bpf_iter_num_api_kfunc(u32 btf_id) 11598 { 11599 return btf_id == special_kfunc_list[KF_bpf_iter_num_new] || 11600 btf_id == special_kfunc_list[KF_bpf_iter_num_next] || 11601 btf_id == special_kfunc_list[KF_bpf_iter_num_destroy]; 11602 } 11603 11604 static bool is_bpf_graph_api_kfunc(u32 btf_id) 11605 { 11606 return is_bpf_list_api_kfunc(btf_id) || 11607 is_bpf_rbtree_api_kfunc(btf_id) || 11608 is_bpf_refcount_acquire_kfunc(btf_id); 11609 } 11610 11611 static bool is_bpf_res_spin_lock_kfunc(u32 btf_id) 11612 { 11613 return btf_id == special_kfunc_list[KF_bpf_res_spin_lock] || 11614 btf_id == special_kfunc_list[KF_bpf_res_spin_unlock] || 11615 btf_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave] || 11616 btf_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore]; 11617 } 11618 11619 static bool is_bpf_arena_kfunc(u32 btf_id) 11620 { 11621 return btf_id == special_kfunc_list[KF_bpf_arena_alloc_pages] || 11622 btf_id == special_kfunc_list[KF_bpf_arena_free_pages] || 11623 btf_id == special_kfunc_list[KF_bpf_arena_reserve_pages]; 11624 } 11625 11626 static bool is_bpf_stream_kfunc(u32 btf_id) 11627 { 11628 return btf_id == special_kfunc_list[KF_bpf_stream_vprintk] || 11629 btf_id == special_kfunc_list[KF_bpf_stream_print_stack]; 11630 } 11631 11632 static bool kfunc_spin_allowed(u32 btf_id) 11633 { 11634 return is_bpf_graph_api_kfunc(btf_id) || is_bpf_iter_num_api_kfunc(btf_id) || 11635 is_bpf_res_spin_lock_kfunc(btf_id) || is_bpf_arena_kfunc(btf_id) || 11636 is_bpf_stream_kfunc(btf_id); 11637 } 11638 11639 static bool is_sync_callback_calling_kfunc(u32 btf_id) 11640 { 11641 return is_bpf_rbtree_add_kfunc(btf_id); 11642 } 11643 11644 static bool is_async_callback_calling_kfunc(u32 btf_id) 11645 { 11646 return is_bpf_wq_set_callback_kfunc(btf_id) || 11647 is_task_work_add_kfunc(btf_id); 11648 } 11649 11650 bool bpf_is_throw_kfunc(struct bpf_insn *insn) 11651 { 11652 return bpf_pseudo_kfunc_call(insn) && insn->off == 0 && 11653 insn->imm == special_kfunc_list[KF_bpf_throw]; 11654 } 11655 11656 static bool is_bpf_wq_set_callback_kfunc(u32 btf_id) 11657 { 11658 return btf_id == special_kfunc_list[KF_bpf_wq_set_callback]; 11659 } 11660 11661 static bool is_callback_calling_kfunc(u32 btf_id) 11662 { 11663 return is_sync_callback_calling_kfunc(btf_id) || 11664 is_async_callback_calling_kfunc(btf_id); 11665 } 11666 11667 static bool is_rbtree_lock_required_kfunc(u32 btf_id) 11668 { 11669 return is_bpf_rbtree_api_kfunc(btf_id); 11670 } 11671 11672 static bool check_kfunc_is_graph_root_api(struct bpf_verifier_env *env, 11673 enum btf_field_type head_field_type, 11674 u32 kfunc_btf_id) 11675 { 11676 bool ret; 11677 11678 switch (head_field_type) { 11679 case BPF_LIST_HEAD: 11680 ret = is_bpf_list_api_kfunc(kfunc_btf_id); 11681 break; 11682 case BPF_RB_ROOT: 11683 ret = is_bpf_rbtree_api_kfunc(kfunc_btf_id); 11684 break; 11685 default: 11686 verbose(env, "verifier internal error: unexpected graph root argument type %s\n", 11687 btf_field_type_name(head_field_type)); 11688 return false; 11689 } 11690 11691 if (!ret) 11692 verbose(env, "verifier internal error: %s head arg for unknown kfunc\n", 11693 btf_field_type_name(head_field_type)); 11694 return ret; 11695 } 11696 11697 static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env, 11698 enum btf_field_type node_field_type, 11699 u32 kfunc_btf_id) 11700 { 11701 bool ret; 11702 11703 switch (node_field_type) { 11704 case BPF_LIST_NODE: 11705 ret = is_bpf_list_push_kfunc(kfunc_btf_id) || 11706 kfunc_btf_id == special_kfunc_list[KF_bpf_list_del] || 11707 kfunc_btf_id == special_kfunc_list[KF_bpf_list_is_first] || 11708 kfunc_btf_id == special_kfunc_list[KF_bpf_list_is_last]; 11709 break; 11710 case BPF_RB_NODE: 11711 ret = (is_bpf_rbtree_add_kfunc(kfunc_btf_id) || 11712 kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] || 11713 kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_left] || 11714 kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_right]); 11715 break; 11716 default: 11717 verbose(env, "verifier internal error: unexpected graph node argument type %s\n", 11718 btf_field_type_name(node_field_type)); 11719 return false; 11720 } 11721 11722 if (!ret) 11723 verbose(env, "verifier internal error: %s node arg for unknown kfunc\n", 11724 btf_field_type_name(node_field_type)); 11725 return ret; 11726 } 11727 11728 static int 11729 __process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env *env, 11730 struct bpf_reg_state *reg, argno_t argno, 11731 struct bpf_kfunc_call_arg_meta *meta, 11732 enum btf_field_type head_field_type, 11733 struct btf_field **head_field) 11734 { 11735 const char *head_type_name; 11736 struct btf_field *field; 11737 struct btf_record *rec; 11738 u32 head_off; 11739 11740 if (meta->btf != btf_vmlinux) { 11741 verifier_bug(env, "unexpected btf mismatch in kfunc call"); 11742 return -EFAULT; 11743 } 11744 11745 if (!check_kfunc_is_graph_root_api(env, head_field_type, meta->func_id)) 11746 return -EFAULT; 11747 11748 head_type_name = btf_field_type_name(head_field_type); 11749 if (!tnum_is_const(reg->var_off)) { 11750 verbose(env, 11751 "%s doesn't have constant offset. %s has to be at the constant offset\n", 11752 reg_arg_name(env, argno), head_type_name); 11753 return -EINVAL; 11754 } 11755 11756 rec = reg_btf_record(reg); 11757 head_off = reg->var_off.value; 11758 field = btf_record_find(rec, head_off, head_field_type); 11759 if (!field) { 11760 verbose(env, "%s not found at offset=%u\n", head_type_name, head_off); 11761 return -EINVAL; 11762 } 11763 11764 /* All functions require bpf_list_head to be protected using a bpf_spin_lock */ 11765 if (check_reg_allocation_locked(env, reg)) { 11766 verbose(env, "bpf_spin_lock at off=%d must be held for %s\n", 11767 rec->spin_lock_off, head_type_name); 11768 return -EINVAL; 11769 } 11770 11771 if (*head_field) { 11772 verifier_bug(env, "repeating %s arg", head_type_name); 11773 return -EFAULT; 11774 } 11775 *head_field = field; 11776 return 0; 11777 } 11778 11779 static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env, 11780 struct bpf_reg_state *reg, argno_t argno, 11781 struct bpf_kfunc_call_arg_meta *meta) 11782 { 11783 return __process_kf_arg_ptr_to_graph_root(env, reg, argno, meta, BPF_LIST_HEAD, 11784 &meta->arg_list_head.field); 11785 } 11786 11787 static int process_kf_arg_ptr_to_rbtree_root(struct bpf_verifier_env *env, 11788 struct bpf_reg_state *reg, argno_t argno, 11789 struct bpf_kfunc_call_arg_meta *meta) 11790 { 11791 return __process_kf_arg_ptr_to_graph_root(env, reg, argno, meta, BPF_RB_ROOT, 11792 &meta->arg_rbtree_root.field); 11793 } 11794 11795 static int 11796 __process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env, 11797 struct bpf_reg_state *reg, argno_t argno, 11798 struct bpf_kfunc_call_arg_meta *meta, 11799 enum btf_field_type head_field_type, 11800 enum btf_field_type node_field_type, 11801 struct btf_field **node_field) 11802 { 11803 const char *node_type_name; 11804 const struct btf_type *et, *t; 11805 struct btf_field *field; 11806 u32 node_off; 11807 11808 if (meta->btf != btf_vmlinux) { 11809 verifier_bug(env, "unexpected btf mismatch in kfunc call"); 11810 return -EFAULT; 11811 } 11812 11813 if (!check_kfunc_is_graph_node_api(env, node_field_type, meta->func_id)) 11814 return -EFAULT; 11815 11816 node_type_name = btf_field_type_name(node_field_type); 11817 if (!tnum_is_const(reg->var_off)) { 11818 verbose(env, 11819 "%s doesn't have constant offset. %s has to be at the constant offset\n", 11820 reg_arg_name(env, argno), node_type_name); 11821 return -EINVAL; 11822 } 11823 11824 node_off = reg->var_off.value; 11825 field = reg_find_field_offset(reg, node_off, node_field_type); 11826 if (!field) { 11827 verbose(env, "%s not found at offset=%u\n", node_type_name, node_off); 11828 return -EINVAL; 11829 } 11830 11831 field = *node_field; 11832 11833 et = btf_type_by_id(field->graph_root.btf, field->graph_root.value_btf_id); 11834 t = btf_type_by_id(reg->btf, reg->btf_id); 11835 if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 0, field->graph_root.btf, 11836 field->graph_root.value_btf_id, true)) { 11837 verbose(env, "operation on %s expects arg#1 %s at offset=%d " 11838 "in struct %s, but arg is at offset=%d in struct %s\n", 11839 btf_field_type_name(head_field_type), 11840 btf_field_type_name(node_field_type), 11841 field->graph_root.node_offset, 11842 btf_name_by_offset(field->graph_root.btf, et->name_off), 11843 node_off, btf_name_by_offset(reg->btf, t->name_off)); 11844 return -EINVAL; 11845 } 11846 meta->arg_btf = reg->btf; 11847 meta->arg_btf_id = reg->btf_id; 11848 11849 if (node_off != field->graph_root.node_offset) { 11850 verbose(env, "arg#1 offset=%d, but expected %s at offset=%d in struct %s\n", 11851 node_off, btf_field_type_name(node_field_type), 11852 field->graph_root.node_offset, 11853 btf_name_by_offset(field->graph_root.btf, et->name_off)); 11854 return -EINVAL; 11855 } 11856 11857 return 0; 11858 } 11859 11860 static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env, 11861 struct bpf_reg_state *reg, argno_t argno, 11862 struct bpf_kfunc_call_arg_meta *meta) 11863 { 11864 return __process_kf_arg_ptr_to_graph_node(env, reg, argno, meta, 11865 BPF_LIST_HEAD, BPF_LIST_NODE, 11866 &meta->arg_list_head.field); 11867 } 11868 11869 static int process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env *env, 11870 struct bpf_reg_state *reg, argno_t argno, 11871 struct bpf_kfunc_call_arg_meta *meta) 11872 { 11873 return __process_kf_arg_ptr_to_graph_node(env, reg, argno, meta, 11874 BPF_RB_ROOT, BPF_RB_NODE, 11875 &meta->arg_rbtree_root.field); 11876 } 11877 11878 /* 11879 * css_task iter allowlist is needed to avoid dead locking on css_set_lock. 11880 * LSM hooks and iters (both sleepable and non-sleepable) are safe. 11881 * Any sleepable progs are also safe since bpf_check_attach_target() enforce 11882 * them can only be attached to some specific hook points. 11883 */ 11884 static bool check_css_task_iter_allowlist(struct bpf_verifier_env *env) 11885 { 11886 enum bpf_prog_type prog_type = resolve_prog_type(env->prog); 11887 11888 switch (prog_type) { 11889 case BPF_PROG_TYPE_LSM: 11890 return true; 11891 case BPF_PROG_TYPE_TRACING: 11892 if (env->prog->expected_attach_type == BPF_TRACE_ITER) 11893 return true; 11894 fallthrough; 11895 default: 11896 return in_sleepable(env); 11897 } 11898 } 11899 11900 static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta, 11901 int insn_idx) 11902 { 11903 const char *func_name = meta->func_name, *ref_tname; 11904 struct bpf_func_state *caller = cur_func(env); 11905 struct bpf_reg_state *regs = cur_regs(env); 11906 const struct btf *btf = meta->btf; 11907 const struct btf_param *args; 11908 struct btf_record *rec; 11909 u32 i, nargs; 11910 int ret; 11911 11912 args = (const struct btf_param *)(meta->func_proto + 1); 11913 nargs = btf_type_vlen(meta->func_proto); 11914 if (nargs > MAX_BPF_FUNC_ARGS) { 11915 verbose(env, "Function %s has %d > %d args\n", func_name, nargs, 11916 MAX_BPF_FUNC_ARGS); 11917 return -EINVAL; 11918 } 11919 if (nargs > MAX_BPF_FUNC_REG_ARGS && !bpf_jit_supports_stack_args()) { 11920 verbose(env, "JIT does not support kfunc %s() with %d args\n", 11921 func_name, nargs); 11922 return -ENOTSUPP; 11923 } 11924 11925 ret = check_outgoing_stack_args(env, caller, nargs); 11926 if (ret) 11927 return ret; 11928 11929 /* Check that BTF function arguments match actual types that the 11930 * verifier sees. 11931 */ 11932 for (i = 0; i < nargs; i++) { 11933 struct bpf_reg_state *reg = get_func_arg_reg(caller, regs, i); 11934 const struct btf_type *t, *ref_t, *resolve_ret; 11935 enum bpf_arg_type arg_type = ARG_DONTCARE; 11936 argno_t argno = argno_from_arg(i + 1); 11937 int regno = reg_from_argno(argno); 11938 u32 ref_id, type_size; 11939 bool is_ret_buf_sz = false; 11940 int kf_arg_type; 11941 11942 if (is_kfunc_arg_prog_aux(btf, &args[i])) { 11943 /* Reject repeated use bpf_prog_aux */ 11944 if (meta->arg_prog) { 11945 verifier_bug(env, "Only 1 prog->aux argument supported per-kfunc"); 11946 return -EFAULT; 11947 } 11948 if (regno < 0) { 11949 verbose(env, "%s prog->aux cannot be a stack argument\n", 11950 reg_arg_name(env, argno)); 11951 return -EINVAL; 11952 } 11953 meta->arg_prog = true; 11954 cur_aux(env)->arg_prog = regno; 11955 continue; 11956 } 11957 11958 if (is_kfunc_arg_ignore(btf, &args[i]) || is_kfunc_arg_implicit(meta, i)) 11959 continue; 11960 11961 t = btf_type_skip_modifiers(btf, args[i].type, NULL); 11962 11963 if (btf_type_is_scalar(t)) { 11964 if (reg->type != SCALAR_VALUE) { 11965 verbose(env, "%s is not a scalar\n", reg_arg_name(env, argno)); 11966 return -EINVAL; 11967 } 11968 11969 if (is_kfunc_arg_constant(meta->btf, &args[i])) { 11970 if (meta->arg_constant.found) { 11971 verifier_bug(env, "only one constant argument permitted"); 11972 return -EFAULT; 11973 } 11974 if (!tnum_is_const(reg->var_off)) { 11975 verbose(env, "%s must be a known constant\n", 11976 reg_arg_name(env, argno)); 11977 return -EINVAL; 11978 } 11979 if (regno >= 0) 11980 ret = mark_chain_precision(env, regno); 11981 else 11982 ret = mark_stack_arg_precision(env, i); 11983 if (ret < 0) 11984 return ret; 11985 meta->arg_constant.found = true; 11986 meta->arg_constant.value = reg->var_off.value; 11987 } else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdonly_buf_size")) { 11988 meta->r0_rdonly = true; 11989 is_ret_buf_sz = true; 11990 } else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdwr_buf_size")) { 11991 is_ret_buf_sz = true; 11992 } 11993 11994 if (is_ret_buf_sz) { 11995 if (meta->r0_size) { 11996 verbose(env, "2 or more rdonly/rdwr_buf_size parameters for kfunc"); 11997 return -EINVAL; 11998 } 11999 12000 if (!tnum_is_const(reg->var_off)) { 12001 verbose(env, "%s is not a const\n", 12002 reg_arg_name(env, argno)); 12003 return -EINVAL; 12004 } 12005 12006 meta->r0_size = reg->var_off.value; 12007 if (regno >= 0) 12008 ret = mark_chain_precision(env, regno); 12009 else 12010 ret = mark_stack_arg_precision(env, i); 12011 if (ret) 12012 return ret; 12013 } 12014 continue; 12015 } 12016 12017 if (!btf_type_is_ptr(t)) { 12018 verbose(env, "Unrecognized %s type %s\n", 12019 reg_arg_name(env, argno), btf_type_str(t)); 12020 return -EINVAL; 12021 } 12022 12023 if ((bpf_register_is_null(reg) || type_may_be_null(reg->type)) && 12024 !is_kfunc_arg_nullable(meta->btf, &args[i])) { 12025 verbose(env, "Possibly NULL pointer passed to trusted %s\n", 12026 reg_arg_name(env, argno)); 12027 return -EACCES; 12028 } 12029 12030 if (regno == meta->release_regno && !is_kfunc_arg_dynptr(meta->btf, &args[i]) && 12031 !reg_is_referenced(env, reg) && !bpf_register_is_null(reg)) { 12032 verbose(env, "release kfunc %s expects referenced PTR_TO_BTF_ID passed to %s\n", 12033 func_name, reg_arg_name(env, argno)); 12034 return -EINVAL; 12035 } 12036 12037 if (reg_is_referenced(env, reg)) 12038 update_ref_obj(&meta->ref_obj, reg); 12039 12040 ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id); 12041 ref_tname = btf_name_by_offset(btf, ref_t->name_off); 12042 12043 kf_arg_type = get_kfunc_ptr_arg_type(env, caller, regs, meta, t, ref_t, ref_tname, 12044 args, i, nargs, argno, reg); 12045 if (kf_arg_type < 0) 12046 return kf_arg_type; 12047 12048 switch (kf_arg_type) { 12049 case KF_ARG_PTR_TO_NULL: 12050 continue; 12051 case KF_ARG_PTR_TO_MAP: 12052 if (!reg->map_ptr) { 12053 verbose(env, "pointer in %s isn't map pointer\n", 12054 reg_arg_name(env, argno)); 12055 return -EINVAL; 12056 } 12057 if (meta->map.ptr && (reg->map_ptr->record->wq_off >= 0 || 12058 reg->map_ptr->record->task_work_off >= 0)) { 12059 /* Use map_uid (which is unique id of inner map) to reject: 12060 * inner_map1 = bpf_map_lookup_elem(outer_map, key1) 12061 * inner_map2 = bpf_map_lookup_elem(outer_map, key2) 12062 * if (inner_map1 && inner_map2) { 12063 * wq = bpf_map_lookup_elem(inner_map1); 12064 * if (wq) 12065 * // mismatch would have been allowed 12066 * bpf_wq_init(wq, inner_map2); 12067 * } 12068 * 12069 * Comparing map_ptr is enough to distinguish normal and outer maps. 12070 */ 12071 if (meta->map.ptr != reg->map_ptr || 12072 meta->map.uid != reg->map_uid) { 12073 if (reg->map_ptr->record->task_work_off >= 0) { 12074 verbose(env, 12075 "bpf_task_work pointer in R2 map_uid=%d doesn't match map pointer in R3 map_uid=%d\n", 12076 meta->map.uid, reg->map_uid); 12077 return -EINVAL; 12078 } 12079 verbose(env, 12080 "workqueue pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n", 12081 meta->map.uid, reg->map_uid); 12082 return -EINVAL; 12083 } 12084 } 12085 meta->map.ptr = reg->map_ptr; 12086 meta->map.uid = reg->map_uid; 12087 fallthrough; 12088 case KF_ARG_PTR_TO_ALLOC_BTF_ID: 12089 case KF_ARG_PTR_TO_BTF_ID: 12090 if (!is_trusted_reg(env, reg)) { 12091 if (!is_kfunc_rcu(meta)) { 12092 verbose(env, "%s must be referenced or trusted\n", 12093 reg_arg_name(env, argno)); 12094 return -EINVAL; 12095 } 12096 if (!is_rcu_reg(reg)) { 12097 verbose(env, "%s must be a rcu pointer\n", 12098 reg_arg_name(env, argno)); 12099 return -EINVAL; 12100 } 12101 } 12102 fallthrough; 12103 case KF_ARG_PTR_TO_ITER: 12104 case KF_ARG_PTR_TO_LIST_HEAD: 12105 case KF_ARG_PTR_TO_LIST_NODE: 12106 case KF_ARG_PTR_TO_RB_ROOT: 12107 case KF_ARG_PTR_TO_RB_NODE: 12108 case KF_ARG_PTR_TO_MEM: 12109 case KF_ARG_PTR_TO_MEM_SIZE: 12110 case KF_ARG_PTR_TO_CALLBACK: 12111 case KF_ARG_PTR_TO_REFCOUNTED_KPTR: 12112 case KF_ARG_PTR_TO_CONST_STR: 12113 case KF_ARG_PTR_TO_WORKQUEUE: 12114 case KF_ARG_PTR_TO_TIMER: 12115 case KF_ARG_PTR_TO_TASK_WORK: 12116 case KF_ARG_PTR_TO_IRQ_FLAG: 12117 case KF_ARG_PTR_TO_RES_SPIN_LOCK: 12118 break; 12119 case KF_ARG_PTR_TO_DYNPTR: 12120 arg_type = ARG_PTR_TO_DYNPTR; 12121 break; 12122 case KF_ARG_PTR_TO_CTX: 12123 arg_type = ARG_PTR_TO_CTX; 12124 break; 12125 default: 12126 verifier_bug(env, "unknown kfunc arg type %d", kf_arg_type); 12127 return -EFAULT; 12128 } 12129 12130 if (regno == meta->release_regno) 12131 arg_type |= OBJ_RELEASE; 12132 ret = check_func_arg_reg_off(env, reg, argno, arg_type); 12133 if (ret < 0) 12134 return ret; 12135 12136 switch (kf_arg_type) { 12137 case KF_ARG_PTR_TO_CTX: 12138 if (reg->type != PTR_TO_CTX) { 12139 verbose(env, "%s expected pointer to ctx, but got %s\n", 12140 reg_arg_name(env, argno), reg_type_str(env, reg->type)); 12141 return -EINVAL; 12142 } 12143 12144 if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) { 12145 ret = get_kern_ctx_btf_id(&env->log, resolve_prog_type(env->prog)); 12146 if (ret < 0) 12147 return -EINVAL; 12148 meta->ret_btf_id = ret; 12149 } 12150 break; 12151 case KF_ARG_PTR_TO_ALLOC_BTF_ID: 12152 if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC)) { 12153 if (!is_bpf_obj_drop_kfunc(meta->func_id)) { 12154 verbose(env, "%s expected for bpf_obj_drop()\n", 12155 reg_arg_name(env, argno)); 12156 return -EINVAL; 12157 } 12158 } else if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC | MEM_PERCPU)) { 12159 if (!is_bpf_percpu_obj_drop_kfunc(meta->func_id)) { 12160 verbose(env, "%s expected for bpf_percpu_obj_drop()\n", 12161 reg_arg_name(env, argno)); 12162 return -EINVAL; 12163 } 12164 } else { 12165 verbose(env, "%s expected pointer to allocated object\n", 12166 reg_arg_name(env, argno)); 12167 return -EINVAL; 12168 } 12169 if (!reg_is_referenced(env, reg)) { 12170 verbose(env, "allocated object must be referenced\n"); 12171 return -EINVAL; 12172 } 12173 if (meta->btf == btf_vmlinux) { 12174 meta->arg_btf = reg->btf; 12175 meta->arg_btf_id = reg->btf_id; 12176 } 12177 break; 12178 case KF_ARG_PTR_TO_DYNPTR: 12179 { 12180 enum bpf_arg_type dynptr_arg_type = ARG_PTR_TO_DYNPTR; 12181 12182 if (is_kfunc_arg_uninit(btf, &args[i])) 12183 dynptr_arg_type |= MEM_UNINIT; 12184 12185 if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) { 12186 dynptr_arg_type |= DYNPTR_TYPE_SKB; 12187 } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp]) { 12188 dynptr_arg_type |= DYNPTR_TYPE_XDP; 12189 } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb_meta]) { 12190 dynptr_arg_type |= DYNPTR_TYPE_SKB_META; 12191 } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) { 12192 dynptr_arg_type |= DYNPTR_TYPE_FILE; 12193 } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_file_discard]) { 12194 dynptr_arg_type |= DYNPTR_TYPE_FILE | OBJ_RELEASE; 12195 } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_clone] && 12196 (dynptr_arg_type & MEM_UNINIT)) { 12197 enum bpf_dynptr_type parent_type = meta->dynptr.type; 12198 12199 if (parent_type == BPF_DYNPTR_TYPE_INVALID) { 12200 verifier_bug(env, "no dynptr type for parent of clone"); 12201 return -EFAULT; 12202 } 12203 12204 dynptr_arg_type |= (unsigned int)get_dynptr_type_flag(parent_type); 12205 } 12206 12207 ret = process_dynptr_func(env, reg, argno, insn_idx, dynptr_arg_type, 12208 &meta->ref_obj, &meta->dynptr); 12209 if (ret < 0) 12210 return ret; 12211 break; 12212 } 12213 case KF_ARG_PTR_TO_ITER: 12214 if (meta->func_id == special_kfunc_list[KF_bpf_iter_css_task_new]) { 12215 if (!check_css_task_iter_allowlist(env)) { 12216 verbose(env, "css_task_iter is only allowed in bpf_lsm, bpf_iter and sleepable progs\n"); 12217 return -EINVAL; 12218 } 12219 } 12220 ret = process_iter_arg(env, reg, argno, insn_idx, meta); 12221 if (ret < 0) 12222 return ret; 12223 break; 12224 case KF_ARG_PTR_TO_LIST_HEAD: 12225 if (reg->type != PTR_TO_MAP_VALUE && 12226 reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { 12227 verbose(env, "%s expected pointer to map value or allocated object\n", 12228 reg_arg_name(env, argno)); 12229 return -EINVAL; 12230 } 12231 if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && 12232 !reg_is_referenced(env, reg)) { 12233 verbose(env, "allocated object must be referenced\n"); 12234 return -EINVAL; 12235 } 12236 ret = process_kf_arg_ptr_to_list_head(env, reg, argno, meta); 12237 if (ret < 0) 12238 return ret; 12239 break; 12240 case KF_ARG_PTR_TO_RB_ROOT: 12241 if (reg->type != PTR_TO_MAP_VALUE && 12242 reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { 12243 verbose(env, "%s expected pointer to map value or allocated object\n", 12244 reg_arg_name(env, argno)); 12245 return -EINVAL; 12246 } 12247 if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && 12248 !reg_is_referenced(env, reg)) { 12249 verbose(env, "allocated object must be referenced\n"); 12250 return -EINVAL; 12251 } 12252 ret = process_kf_arg_ptr_to_rbtree_root(env, reg, argno, meta); 12253 if (ret < 0) 12254 return ret; 12255 break; 12256 case KF_ARG_PTR_TO_LIST_NODE: 12257 if (is_kfunc_arg_nonown_allowed(btf, &args[i]) && 12258 type_is_non_owning_ref(reg->type) && !reg_is_referenced(env, reg)) { 12259 /* Allow bpf_list_front/back return value for 12260 * __nonown_allowed list-node arguments. 12261 */ 12262 goto check_ok; 12263 } 12264 if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { 12265 verbose(env, "%s expected pointer to allocated object\n", 12266 reg_arg_name(env, argno)); 12267 return -EINVAL; 12268 } 12269 if (!reg_is_referenced(env, reg)) { 12270 verbose(env, "allocated object must be referenced\n"); 12271 return -EINVAL; 12272 } 12273 check_ok: 12274 ret = process_kf_arg_ptr_to_list_node(env, reg, argno, meta); 12275 if (ret < 0) 12276 return ret; 12277 break; 12278 case KF_ARG_PTR_TO_RB_NODE: 12279 if (is_bpf_rbtree_add_kfunc(meta->func_id)) { 12280 if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { 12281 verbose(env, "%s expected pointer to allocated object\n", 12282 reg_arg_name(env, argno)); 12283 return -EINVAL; 12284 } 12285 if (!reg_is_referenced(env, reg)) { 12286 verbose(env, "allocated object must be referenced\n"); 12287 return -EINVAL; 12288 } 12289 } else { 12290 if (!type_is_non_owning_ref(reg->type) && 12291 !reg_is_referenced(env, reg)) { 12292 verbose(env, "%s can only take non-owning or refcounted bpf_rb_node pointer\n", func_name); 12293 return -EINVAL; 12294 } 12295 if (in_rbtree_lock_required_cb(env)) { 12296 verbose(env, "%s not allowed in rbtree cb\n", func_name); 12297 return -EINVAL; 12298 } 12299 } 12300 12301 ret = process_kf_arg_ptr_to_rbtree_node(env, reg, argno, meta); 12302 if (ret < 0) 12303 return ret; 12304 break; 12305 case KF_ARG_PTR_TO_MAP: 12306 /* If argument has '__map' suffix expect 'struct bpf_map *' */ 12307 ref_id = *reg2btf_ids[CONST_PTR_TO_MAP]; 12308 ref_t = btf_type_by_id(btf_vmlinux, ref_id); 12309 ref_tname = btf_name_by_offset(btf, ref_t->name_off); 12310 fallthrough; 12311 case KF_ARG_PTR_TO_BTF_ID: 12312 /* Only base_type is checked, further checks are done here */ 12313 if ((base_type(reg->type) != PTR_TO_BTF_ID || 12314 (bpf_type_has_unsafe_modifiers(reg->type) && !is_rcu_reg(reg))) && 12315 !reg2btf_ids[base_type(reg->type)]) { 12316 verbose(env, "%s is %s ", reg_arg_name(env, argno), 12317 reg_type_str(env, reg->type)); 12318 verbose(env, "expected %s or socket\n", 12319 reg_type_str(env, base_type(reg->type) | 12320 (type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS))); 12321 return -EINVAL; 12322 } 12323 ret = process_kf_arg_ptr_to_btf_id(env, reg, ref_t, ref_tname, ref_id, meta, i, argno); 12324 if (ret < 0) 12325 return ret; 12326 break; 12327 case KF_ARG_PTR_TO_MEM: 12328 resolve_ret = btf_resolve_size(btf, ref_t, &type_size); 12329 if (IS_ERR(resolve_ret)) { 12330 verbose(env, "%s reference type('%s %s') size cannot be determined: %ld\n", 12331 reg_arg_name(env, argno), btf_type_str(ref_t), 12332 ref_tname, PTR_ERR(resolve_ret)); 12333 return -EINVAL; 12334 } 12335 ret = check_mem_reg(env, reg, argno, type_size); 12336 if (ret < 0) 12337 return ret; 12338 break; 12339 case KF_ARG_PTR_TO_MEM_SIZE: 12340 { 12341 struct bpf_reg_state *buff_reg = reg; 12342 const struct btf_param *buff_arg = &args[i]; 12343 struct bpf_reg_state *size_reg = get_func_arg_reg(caller, regs, i + 1); 12344 const struct btf_param *size_arg = &args[i + 1]; 12345 argno_t next_argno = argno_from_arg(i + 2); 12346 12347 if (!bpf_register_is_null(buff_reg) || !is_kfunc_arg_nullable(meta->btf, buff_arg)) { 12348 ret = check_kfunc_mem_size_reg(env, buff_reg, size_reg, 12349 argno, next_argno); 12350 if (ret < 0) { 12351 verbose(env, "%s and ", reg_arg_name(env, argno)); 12352 verbose(env, "%s memory, len pair leads to invalid memory access\n", 12353 reg_arg_name(env, next_argno)); 12354 return ret; 12355 } 12356 } 12357 12358 if (is_kfunc_arg_const_mem_size(meta->btf, size_arg, size_reg)) { 12359 if (meta->arg_constant.found) { 12360 verifier_bug(env, "only one constant argument permitted"); 12361 return -EFAULT; 12362 } 12363 if (!tnum_is_const(size_reg->var_off)) { 12364 verbose(env, "%s must be a known constant\n", 12365 reg_arg_name(env, next_argno)); 12366 return -EINVAL; 12367 } 12368 meta->arg_constant.found = true; 12369 meta->arg_constant.value = size_reg->var_off.value; 12370 } 12371 12372 /* Skip next '__sz' or '__szk' argument */ 12373 i++; 12374 break; 12375 } 12376 case KF_ARG_PTR_TO_CALLBACK: 12377 if (reg->type != PTR_TO_FUNC) { 12378 verbose(env, "%s expected pointer to func\n", reg_arg_name(env, argno)); 12379 return -EINVAL; 12380 } 12381 meta->subprogno = reg->subprogno; 12382 break; 12383 case KF_ARG_PTR_TO_REFCOUNTED_KPTR: 12384 if (!type_is_ptr_alloc_obj(reg->type)) { 12385 verbose(env, "%s is neither owning or non-owning ref\n", 12386 reg_arg_name(env, argno)); 12387 return -EINVAL; 12388 } 12389 if (!type_is_non_owning_ref(reg->type)) 12390 meta->arg_owning_ref = true; 12391 12392 rec = reg_btf_record(reg); 12393 if (!rec) { 12394 verifier_bug(env, "Couldn't find btf_record"); 12395 return -EFAULT; 12396 } 12397 12398 if (rec->refcount_off < 0) { 12399 verbose(env, "%s doesn't point to a type with bpf_refcount field\n", 12400 reg_arg_name(env, argno)); 12401 return -EINVAL; 12402 } 12403 12404 meta->arg_btf = reg->btf; 12405 meta->arg_btf_id = reg->btf_id; 12406 break; 12407 case KF_ARG_PTR_TO_CONST_STR: 12408 if (reg->type != PTR_TO_MAP_VALUE) { 12409 verbose(env, "%s doesn't point to a const string\n", 12410 reg_arg_name(env, argno)); 12411 return -EINVAL; 12412 } 12413 ret = check_arg_const_str(env, reg, argno); 12414 if (ret) 12415 return ret; 12416 break; 12417 case KF_ARG_PTR_TO_WORKQUEUE: 12418 if (reg->type != PTR_TO_MAP_VALUE) { 12419 verbose(env, "%s doesn't point to a map value\n", 12420 reg_arg_name(env, argno)); 12421 return -EINVAL; 12422 } 12423 ret = check_map_field_pointer(env, reg, argno, BPF_WORKQUEUE, &meta->map); 12424 if (ret < 0) 12425 return ret; 12426 break; 12427 case KF_ARG_PTR_TO_TIMER: 12428 if (reg->type != PTR_TO_MAP_VALUE) { 12429 verbose(env, "%s doesn't point to a map value\n", 12430 reg_arg_name(env, argno)); 12431 return -EINVAL; 12432 } 12433 ret = process_timer_kfunc(env, reg, argno, meta); 12434 if (ret < 0) 12435 return ret; 12436 break; 12437 case KF_ARG_PTR_TO_TASK_WORK: 12438 if (reg->type != PTR_TO_MAP_VALUE) { 12439 verbose(env, "%s doesn't point to a map value\n", 12440 reg_arg_name(env, argno)); 12441 return -EINVAL; 12442 } 12443 ret = check_map_field_pointer(env, reg, argno, BPF_TASK_WORK, &meta->map); 12444 if (ret < 0) 12445 return ret; 12446 break; 12447 case KF_ARG_PTR_TO_IRQ_FLAG: 12448 if (reg->type != PTR_TO_STACK) { 12449 verbose(env, "%s doesn't point to an irq flag on stack\n", 12450 reg_arg_name(env, argno)); 12451 return -EINVAL; 12452 } 12453 ret = process_irq_flag(env, reg, argno, meta); 12454 if (ret < 0) 12455 return ret; 12456 break; 12457 case KF_ARG_PTR_TO_RES_SPIN_LOCK: 12458 { 12459 int flags = PROCESS_RES_LOCK; 12460 12461 if (reg->type != PTR_TO_MAP_VALUE && reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { 12462 verbose(env, "%s doesn't point to map value or allocated object\n", 12463 reg_arg_name(env, argno)); 12464 return -EINVAL; 12465 } 12466 12467 if (!is_bpf_res_spin_lock_kfunc(meta->func_id)) 12468 return -EFAULT; 12469 if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock] || 12470 meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave]) 12471 flags |= PROCESS_SPIN_LOCK; 12472 if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave] || 12473 meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore]) 12474 flags |= PROCESS_LOCK_IRQ; 12475 ret = process_spin_lock(env, reg, argno, flags); 12476 if (ret < 0) 12477 return ret; 12478 break; 12479 } 12480 } 12481 } 12482 12483 return 0; 12484 } 12485 12486 int bpf_fetch_kfunc_arg_meta(struct bpf_verifier_env *env, 12487 s32 func_id, 12488 s16 offset, 12489 struct bpf_kfunc_call_arg_meta *meta) 12490 { 12491 struct bpf_kfunc_meta kfunc; 12492 int err; 12493 12494 err = fetch_kfunc_meta(env, func_id, offset, &kfunc); 12495 if (err) 12496 return err; 12497 12498 memset(meta, 0, sizeof(*meta)); 12499 meta->btf = kfunc.btf; 12500 meta->func_id = kfunc.id; 12501 meta->func_proto = kfunc.proto; 12502 meta->func_name = kfunc.name; 12503 12504 if (!kfunc.flags || !btf_kfunc_is_allowed(kfunc.btf, kfunc.id, env->prog)) 12505 return -EACCES; 12506 12507 meta->kfunc_flags = *kfunc.flags; 12508 12509 /* Only support release referenced argument passed by register */ 12510 if (is_kfunc_release(meta)) 12511 meta->release_regno = BPF_REG_1; 12512 12513 return 0; 12514 } 12515 12516 /* 12517 * Determine how many bytes a helper accesses through a stack pointer at 12518 * argument position @arg (0-based, corresponding to R1-R5). 12519 * 12520 * Returns: 12521 * > 0 known read access size in bytes 12522 * 0 doesn't read anything directly 12523 * S64_MIN unknown 12524 * < 0 known write access of (-return) bytes 12525 */ 12526 s64 bpf_helper_stack_access_bytes(struct bpf_verifier_env *env, struct bpf_insn *insn, 12527 int arg, int insn_idx) 12528 { 12529 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx]; 12530 const struct bpf_func_proto *fn; 12531 enum bpf_arg_type at; 12532 s64 size; 12533 12534 if (bpf_get_helper_proto(env, insn->imm, &fn) < 0) 12535 return S64_MIN; 12536 12537 at = fn->arg_type[arg]; 12538 12539 switch (base_type(at)) { 12540 case ARG_PTR_TO_MAP_KEY: 12541 case ARG_PTR_TO_MAP_VALUE: { 12542 bool is_key = base_type(at) == ARG_PTR_TO_MAP_KEY; 12543 u64 val; 12544 int i, map_reg; 12545 12546 for (i = 0; i < arg; i++) { 12547 if (base_type(fn->arg_type[i]) == ARG_CONST_MAP_PTR) 12548 break; 12549 } 12550 if (i >= arg) 12551 goto scan_all_maps; 12552 12553 map_reg = BPF_REG_1 + i; 12554 12555 if (!(aux->const_reg_map_mask & BIT(map_reg))) 12556 goto scan_all_maps; 12557 12558 i = aux->const_reg_vals[map_reg]; 12559 if (i < env->used_map_cnt) { 12560 size = is_key ? env->used_maps[i]->key_size 12561 : env->used_maps[i]->value_size; 12562 goto out; 12563 } 12564 scan_all_maps: 12565 /* 12566 * Map pointer is not known at this call site (e.g. different 12567 * maps on merged paths). Conservatively return the largest 12568 * key_size or value_size across all maps used by the program. 12569 */ 12570 val = 0; 12571 for (i = 0; i < env->used_map_cnt; i++) { 12572 struct bpf_map *map = env->used_maps[i]; 12573 u32 sz = is_key ? map->key_size : map->value_size; 12574 12575 if (sz > val) 12576 val = sz; 12577 if (map->inner_map_meta) { 12578 sz = is_key ? map->inner_map_meta->key_size 12579 : map->inner_map_meta->value_size; 12580 if (sz > val) 12581 val = sz; 12582 } 12583 } 12584 if (!val) 12585 return S64_MIN; 12586 size = val; 12587 goto out; 12588 } 12589 case ARG_PTR_TO_MEM: 12590 if (at & MEM_FIXED_SIZE) { 12591 size = fn->arg_size[arg]; 12592 goto out; 12593 } 12594 if (arg + 1 < ARRAY_SIZE(fn->arg_type) && 12595 arg_type_is_mem_size(fn->arg_type[arg + 1])) { 12596 int size_reg = BPF_REG_1 + arg + 1; 12597 12598 if (aux->const_reg_mask & BIT(size_reg)) { 12599 size = (s64)aux->const_reg_vals[size_reg]; 12600 goto out; 12601 } 12602 /* 12603 * Size arg is const on each path but differs across merged 12604 * paths. MAX_BPF_STACK is a safe upper bound for reads. 12605 */ 12606 if (at & MEM_UNINIT) 12607 return 0; 12608 return MAX_BPF_STACK; 12609 } 12610 return S64_MIN; 12611 case ARG_PTR_TO_DYNPTR: 12612 size = BPF_DYNPTR_SIZE; 12613 break; 12614 case ARG_PTR_TO_STACK: 12615 /* 12616 * Only used by bpf_calls_callback() helpers. The helper itself 12617 * doesn't access stack. The callback subprog does and it's 12618 * analyzed separately. 12619 */ 12620 return 0; 12621 default: 12622 return S64_MIN; 12623 } 12624 out: 12625 /* 12626 * MEM_UNINIT args are write-only: the helper initializes the 12627 * buffer without reading it. 12628 */ 12629 if (at & MEM_UNINIT) 12630 return -size; 12631 return size; 12632 } 12633 12634 /* 12635 * Determine how many bytes a kfunc accesses through a stack pointer at 12636 * argument position @arg (0-based, corresponding to R1-R5). 12637 * 12638 * Returns: 12639 * > 0 known read access size in bytes 12640 * 0 doesn't access memory through that argument (ex: not a pointer) 12641 * S64_MIN unknown 12642 * < 0 known write access of (-return) bytes 12643 */ 12644 s64 bpf_kfunc_stack_access_bytes(struct bpf_verifier_env *env, struct bpf_insn *insn, 12645 int arg, int insn_idx) 12646 { 12647 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx]; 12648 struct bpf_kfunc_call_arg_meta meta; 12649 const struct btf_param *args; 12650 const struct btf_type *t, *ref_t; 12651 const struct btf *btf; 12652 u32 nargs, type_size; 12653 s64 size; 12654 12655 if (bpf_fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta) < 0) 12656 return S64_MIN; 12657 12658 btf = meta.btf; 12659 args = btf_params(meta.func_proto); 12660 nargs = btf_type_vlen(meta.func_proto); 12661 if (arg >= nargs) 12662 return 0; 12663 12664 t = btf_type_skip_modifiers(btf, args[arg].type, NULL); 12665 if (!btf_type_is_ptr(t)) 12666 return 0; 12667 12668 /* dynptr: fixed 16-byte on-stack representation */ 12669 if (is_kfunc_arg_dynptr(btf, &args[arg])) { 12670 size = BPF_DYNPTR_SIZE; 12671 goto out; 12672 } 12673 12674 /* ptr + __sz/__szk pair: size is in the next register */ 12675 if (arg + 1 < nargs && 12676 (btf_param_match_suffix(btf, &args[arg + 1], "__sz") || 12677 btf_param_match_suffix(btf, &args[arg + 1], "__szk"))) { 12678 int size_reg = BPF_REG_1 + arg + 1; 12679 12680 if (aux->const_reg_mask & BIT(size_reg)) { 12681 size = (s64)aux->const_reg_vals[size_reg]; 12682 goto out; 12683 } 12684 return MAX_BPF_STACK; 12685 } 12686 12687 /* fixed-size pointed-to type: resolve via BTF */ 12688 ref_t = btf_type_skip_modifiers(btf, t->type, NULL); 12689 if (!IS_ERR(btf_resolve_size(btf, ref_t, &type_size))) { 12690 size = type_size; 12691 goto out; 12692 } 12693 12694 return S64_MIN; 12695 out: 12696 /* KF_ITER_NEW kfuncs initialize the iterator state at arg 0 */ 12697 if (arg == 0 && meta.kfunc_flags & KF_ITER_NEW) 12698 return -size; 12699 if (is_kfunc_arg_uninit(btf, &args[arg])) 12700 return -size; 12701 return size; 12702 } 12703 12704 /* check special kfuncs and return: 12705 * 1 - not fall-through to 'else' branch, continue verification 12706 * 0 - fall-through to 'else' branch 12707 * < 0 - not fall-through to 'else' branch, return error 12708 */ 12709 static int check_special_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta, 12710 struct bpf_reg_state *regs, struct bpf_insn_aux_data *insn_aux, 12711 const struct btf_type *ptr_type, struct btf *desc_btf) 12712 { 12713 const struct btf_type *ret_t; 12714 int err = 0; 12715 12716 if (meta->btf != btf_vmlinux) 12717 return 0; 12718 12719 if (is_bpf_obj_new_kfunc(meta->func_id) || is_bpf_percpu_obj_new_kfunc(meta->func_id)) { 12720 struct btf_struct_meta *struct_meta; 12721 struct btf *ret_btf; 12722 u32 ret_btf_id; 12723 12724 if (is_bpf_obj_new_kfunc(meta->func_id) && !bpf_global_ma_set) 12725 return -ENOMEM; 12726 12727 if (((u64)(u32)meta->arg_constant.value) != meta->arg_constant.value) { 12728 verbose(env, "local type ID argument must be in range [0, U32_MAX]\n"); 12729 return -EINVAL; 12730 } 12731 12732 ret_btf = env->prog->aux->btf; 12733 ret_btf_id = meta->arg_constant.value; 12734 12735 /* This may be NULL due to user not supplying a BTF */ 12736 if (!ret_btf) { 12737 verbose(env, "bpf_obj_new/bpf_percpu_obj_new requires prog BTF\n"); 12738 return -EINVAL; 12739 } 12740 12741 ret_t = btf_type_by_id(ret_btf, ret_btf_id); 12742 if (!ret_t || !__btf_type_is_struct(ret_t)) { 12743 verbose(env, "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct\n"); 12744 return -EINVAL; 12745 } 12746 12747 if (is_bpf_percpu_obj_new_kfunc(meta->func_id)) { 12748 if (ret_t->size > BPF_GLOBAL_PERCPU_MA_MAX_SIZE) { 12749 verbose(env, "bpf_percpu_obj_new type size (%d) is greater than %d\n", 12750 ret_t->size, BPF_GLOBAL_PERCPU_MA_MAX_SIZE); 12751 return -EINVAL; 12752 } 12753 12754 if (!bpf_global_percpu_ma_set) { 12755 mutex_lock(&bpf_percpu_ma_lock); 12756 if (!bpf_global_percpu_ma_set) { 12757 /* Charge memory allocated with bpf_global_percpu_ma to 12758 * root memcg. The obj_cgroup for root memcg is NULL. 12759 */ 12760 err = bpf_mem_alloc_percpu_init(&bpf_global_percpu_ma, NULL); 12761 if (!err) 12762 bpf_global_percpu_ma_set = true; 12763 } 12764 mutex_unlock(&bpf_percpu_ma_lock); 12765 if (err) 12766 return err; 12767 } 12768 12769 mutex_lock(&bpf_percpu_ma_lock); 12770 err = bpf_mem_alloc_percpu_unit_init(&bpf_global_percpu_ma, ret_t->size); 12771 mutex_unlock(&bpf_percpu_ma_lock); 12772 if (err) 12773 return err; 12774 } 12775 12776 struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id); 12777 if (is_bpf_percpu_obj_new_kfunc(meta->func_id)) { 12778 if (!__btf_type_is_scalar_struct(env, ret_btf, ret_t, 0)) { 12779 verbose(env, "bpf_percpu_obj_new type ID argument must be of a struct of scalars\n"); 12780 return -EINVAL; 12781 } 12782 12783 if (struct_meta) { 12784 verbose(env, "bpf_percpu_obj_new type ID argument must not contain special fields\n"); 12785 return -EINVAL; 12786 } 12787 } 12788 12789 mark_reg_known_zero(env, regs, BPF_REG_0); 12790 regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC; 12791 regs[BPF_REG_0].btf = ret_btf; 12792 regs[BPF_REG_0].btf_id = ret_btf_id; 12793 if (is_bpf_percpu_obj_new_kfunc(meta->func_id)) 12794 regs[BPF_REG_0].type |= MEM_PERCPU; 12795 12796 insn_aux->obj_new_size = ret_t->size; 12797 insn_aux->kptr_struct_meta = struct_meta; 12798 } else if (is_bpf_refcount_acquire_kfunc(meta->func_id)) { 12799 mark_reg_known_zero(env, regs, BPF_REG_0); 12800 regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC; 12801 regs[BPF_REG_0].btf = meta->arg_btf; 12802 regs[BPF_REG_0].btf_id = meta->arg_btf_id; 12803 12804 insn_aux->kptr_struct_meta = 12805 btf_find_struct_meta(meta->arg_btf, 12806 meta->arg_btf_id); 12807 } else if (is_list_node_type(ptr_type)) { 12808 struct btf_field *field = meta->arg_list_head.field; 12809 12810 mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root); 12811 } else if (is_rbtree_node_type(ptr_type)) { 12812 struct btf_field *field = meta->arg_rbtree_root.field; 12813 12814 mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root); 12815 } else if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) { 12816 mark_reg_known_zero(env, regs, BPF_REG_0); 12817 regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED; 12818 regs[BPF_REG_0].btf = desc_btf; 12819 regs[BPF_REG_0].btf_id = meta->ret_btf_id; 12820 } else if (meta->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) { 12821 ret_t = btf_type_by_id(desc_btf, meta->arg_constant.value); 12822 if (!ret_t) { 12823 verbose(env, "Unknown type ID %lld passed to kfunc bpf_rdonly_cast\n", 12824 meta->arg_constant.value); 12825 return -EINVAL; 12826 } else if (btf_type_is_struct(ret_t)) { 12827 mark_reg_known_zero(env, regs, BPF_REG_0); 12828 regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED; 12829 regs[BPF_REG_0].btf = desc_btf; 12830 regs[BPF_REG_0].btf_id = meta->arg_constant.value; 12831 } else if (btf_type_is_void(ret_t)) { 12832 mark_reg_known_zero(env, regs, BPF_REG_0); 12833 regs[BPF_REG_0].type = PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED; 12834 regs[BPF_REG_0].mem_size = 0; 12835 } else { 12836 verbose(env, 12837 "kfunc bpf_rdonly_cast type ID argument must be of a struct or void\n"); 12838 return -EINVAL; 12839 } 12840 } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice] || 12841 meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) { 12842 enum bpf_type_flag type_flag = get_dynptr_type_flag(meta->dynptr.type); 12843 12844 mark_reg_known_zero(env, regs, BPF_REG_0); 12845 12846 if (!meta->arg_constant.found) { 12847 verifier_bug(env, "bpf_dynptr_slice(_rdwr) no constant size"); 12848 return -EFAULT; 12849 } 12850 12851 regs[BPF_REG_0].mem_size = meta->arg_constant.value; 12852 12853 /* PTR_MAYBE_NULL will be added when is_kfunc_ret_null is checked */ 12854 regs[BPF_REG_0].type = PTR_TO_MEM | type_flag; 12855 12856 if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice]) { 12857 regs[BPF_REG_0].type |= MEM_RDONLY; 12858 } else { 12859 /* this will set env->seen_direct_write to true */ 12860 if (!may_access_direct_pkt_data(env, NULL, BPF_WRITE)) { 12861 verbose(env, "the prog does not allow writes to packet data\n"); 12862 return -EINVAL; 12863 } 12864 } 12865 12866 if (!meta->dynptr.id) { 12867 verifier_bug(env, "no dynptr id"); 12868 return -EFAULT; 12869 } 12870 regs[BPF_REG_0].parent_id = meta->dynptr.id; 12871 } else { 12872 return 0; 12873 } 12874 12875 return 1; 12876 } 12877 12878 static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name); 12879 12880 static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, 12881 int *insn_idx_p) 12882 { 12883 bool sleepable, rcu_lock, rcu_unlock, preempt_disable, preempt_enable; 12884 struct bpf_reg_state *regs = cur_regs(env); 12885 const char *func_name, *ptr_type_name; 12886 const struct btf_type *t, *ptr_type; 12887 struct bpf_kfunc_call_arg_meta meta; 12888 struct bpf_insn_aux_data *insn_aux; 12889 int err, insn_idx = *insn_idx_p; 12890 const struct btf_param *args; 12891 u32 i, nargs, ptr_type_id; 12892 struct btf *desc_btf; 12893 int id; 12894 12895 /* skip for now, but return error when we find this in fixup_kfunc_call */ 12896 if (!insn->imm) 12897 return 0; 12898 12899 err = bpf_fetch_kfunc_arg_meta(env, insn->imm, insn->off, &meta); 12900 if (err == -EACCES && meta.func_name) 12901 verbose(env, "calling kernel function %s is not allowed\n", meta.func_name); 12902 if (err) 12903 return err; 12904 desc_btf = meta.btf; 12905 func_name = meta.func_name; 12906 insn_aux = &env->insn_aux_data[insn_idx]; 12907 12908 insn_aux->is_iter_next = bpf_is_iter_next_kfunc(&meta); 12909 12910 if (!insn->off && 12911 (insn->imm == special_kfunc_list[KF_bpf_res_spin_lock] || 12912 insn->imm == special_kfunc_list[KF_bpf_res_spin_lock_irqsave])) { 12913 struct bpf_verifier_state *branch; 12914 struct bpf_reg_state *regs; 12915 12916 branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false); 12917 if (IS_ERR(branch)) { 12918 verbose(env, "failed to push state for failed lock acquisition\n"); 12919 return PTR_ERR(branch); 12920 } 12921 12922 regs = branch->frame[branch->curframe]->regs; 12923 12924 /* Clear r0-r5 registers in forked state */ 12925 for (i = 0; i < CALLER_SAVED_REGS; i++) 12926 bpf_mark_reg_not_init(env, ®s[caller_saved[i]]); 12927 12928 mark_reg_unknown(env, regs, BPF_REG_0); 12929 err = __mark_reg_s32_range(env, regs, BPF_REG_0, -MAX_ERRNO, -1); 12930 if (err) { 12931 verbose(env, "failed to mark s32 range for retval in forked state for lock\n"); 12932 return err; 12933 } 12934 __mark_btf_func_reg_size(env, regs, BPF_REG_0, sizeof(u32)); 12935 } else if (!insn->off && insn->imm == special_kfunc_list[KF___bpf_trap]) { 12936 verbose(env, "unexpected __bpf_trap() due to uninitialized variable?\n"); 12937 return -EFAULT; 12938 } 12939 12940 if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) { 12941 verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n"); 12942 return -EACCES; 12943 } 12944 12945 sleepable = bpf_is_kfunc_sleepable(&meta); 12946 if (sleepable && !in_sleepable(env)) { 12947 verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name); 12948 return -EACCES; 12949 } 12950 12951 /* Track non-sleepable context for kfuncs, same as for helpers. */ 12952 if (!in_sleepable_context(env)) 12953 insn_aux->non_sleepable = true; 12954 12955 /* Check the arguments */ 12956 err = check_kfunc_args(env, &meta, insn_idx); 12957 if (err < 0) 12958 return err; 12959 12960 if (is_bpf_rbtree_add_kfunc(meta.func_id)) { 12961 err = push_callback_call(env, insn, insn_idx, meta.subprogno, 12962 set_rbtree_add_callback_state); 12963 if (err) { 12964 verbose(env, "kfunc %s#%d failed callback verification\n", 12965 func_name, meta.func_id); 12966 return err; 12967 } 12968 } 12969 12970 if (meta.func_id == special_kfunc_list[KF_bpf_session_cookie]) { 12971 meta.r0_size = sizeof(u64); 12972 meta.r0_rdonly = false; 12973 } 12974 12975 if (is_bpf_wq_set_callback_kfunc(meta.func_id)) { 12976 err = push_callback_call(env, insn, insn_idx, meta.subprogno, 12977 set_timer_callback_state); 12978 if (err) { 12979 verbose(env, "kfunc %s#%d failed callback verification\n", 12980 func_name, meta.func_id); 12981 return err; 12982 } 12983 } 12984 12985 if (is_task_work_add_kfunc(meta.func_id)) { 12986 err = push_callback_call(env, insn, insn_idx, meta.subprogno, 12987 set_task_work_schedule_callback_state); 12988 if (err) { 12989 verbose(env, "kfunc %s#%d failed callback verification\n", 12990 func_name, meta.func_id); 12991 return err; 12992 } 12993 } 12994 12995 rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta); 12996 rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta); 12997 12998 preempt_disable = is_kfunc_bpf_preempt_disable(&meta); 12999 preempt_enable = is_kfunc_bpf_preempt_enable(&meta); 13000 13001 if (rcu_lock) { 13002 env->cur_state->active_rcu_locks++; 13003 } else if (rcu_unlock) { 13004 if (env->cur_state->active_rcu_locks == 0) { 13005 verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name); 13006 return -EINVAL; 13007 } 13008 if (--env->cur_state->active_rcu_locks == 0) 13009 invalidate_rcu_protected_refs(env); 13010 } else if (preempt_disable) { 13011 env->cur_state->active_preempt_locks++; 13012 } else if (preempt_enable) { 13013 if (env->cur_state->active_preempt_locks == 0) { 13014 verbose(env, "unmatched attempt to enable preemption (kernel function %s)\n", func_name); 13015 return -EINVAL; 13016 } 13017 env->cur_state->active_preempt_locks--; 13018 } 13019 13020 if (sleepable && !in_sleepable_context(env)) { 13021 verbose(env, "kernel func %s is sleepable within %s\n", 13022 func_name, non_sleepable_context_description(env)); 13023 return -EACCES; 13024 } 13025 13026 if (in_rbtree_lock_required_cb(env) && (rcu_lock || rcu_unlock)) { 13027 verbose(env, "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n"); 13028 return -EACCES; 13029 } 13030 13031 if (is_kfunc_rcu_protected(&meta) && !in_rcu_cs(env)) { 13032 verbose(env, "kernel func %s requires RCU critical section protection\n", func_name); 13033 return -EACCES; 13034 } 13035 13036 /* In case of release function, we get register number of refcounted 13037 * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now. 13038 */ 13039 if (meta.release_regno) { 13040 err = release_reg(env, ®s[meta.release_regno], false, !!meta.dynptr.id); 13041 if (err) 13042 return err; 13043 } 13044 13045 if (is_bpf_list_push_kfunc(meta.func_id) || is_bpf_rbtree_add_kfunc(meta.func_id)) { 13046 id = regs[BPF_REG_2].id; 13047 insn_aux->insert_off = regs[BPF_REG_2].var_off.value; 13048 insn_aux->kptr_struct_meta = btf_find_struct_meta(meta.arg_btf, meta.arg_btf_id); 13049 ref_convert_owning_non_owning(env, id); 13050 } 13051 13052 if (meta.func_id == special_kfunc_list[KF_bpf_throw]) { 13053 if (!bpf_jit_supports_exceptions()) { 13054 verbose(env, "JIT does not support calling kfunc %s#%d\n", 13055 func_name, meta.func_id); 13056 return -ENOTSUPP; 13057 } 13058 env->seen_exception = true; 13059 13060 /* In the case of the default callback, the cookie value passed 13061 * to bpf_throw becomes the return value of the program. 13062 */ 13063 if (!env->exception_callback_subprog) { 13064 err = check_return_code(env, BPF_REG_1, "R1"); 13065 if (err < 0) 13066 return err; 13067 } 13068 } 13069 13070 for (i = 0; i < CALLER_SAVED_REGS; i++) { 13071 u32 regno = caller_saved[i]; 13072 13073 bpf_mark_reg_not_init(env, ®s[regno]); 13074 regs[regno].subreg_def = DEF_NOT_SUBREG; 13075 } 13076 invalidate_outgoing_stack_args(env, cur_func(env)); 13077 13078 /* Check return type */ 13079 t = btf_type_skip_modifiers(desc_btf, meta.func_proto->type, NULL); 13080 13081 if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) { 13082 if (meta.btf != btf_vmlinux || 13083 (!is_bpf_obj_new_kfunc(meta.func_id) && 13084 !is_bpf_percpu_obj_new_kfunc(meta.func_id) && 13085 !is_bpf_refcount_acquire_kfunc(meta.func_id))) { 13086 verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n"); 13087 return -EINVAL; 13088 } 13089 } 13090 13091 if (btf_type_is_scalar(t)) { 13092 mark_reg_unknown(env, regs, BPF_REG_0); 13093 if (meta.btf == btf_vmlinux && (meta.func_id == special_kfunc_list[KF_bpf_res_spin_lock] || 13094 meta.func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave])) 13095 __mark_reg_const_zero(env, ®s[BPF_REG_0]); 13096 mark_btf_func_reg_size(env, BPF_REG_0, t->size); 13097 } else if (btf_type_is_ptr(t)) { 13098 ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id); 13099 err = check_special_kfunc(env, &meta, regs, insn_aux, ptr_type, desc_btf); 13100 if (err) { 13101 if (err < 0) 13102 return err; 13103 } else if (btf_type_is_void(ptr_type)) { 13104 /* kfunc returning 'void *' is equivalent to returning scalar */ 13105 mark_reg_unknown(env, regs, BPF_REG_0); 13106 } else if (!__btf_type_is_struct(ptr_type)) { 13107 if (!meta.r0_size) { 13108 __u32 sz; 13109 13110 if (!IS_ERR(btf_resolve_size(desc_btf, ptr_type, &sz))) { 13111 meta.r0_size = sz; 13112 meta.r0_rdonly = true; 13113 } 13114 } 13115 if (!meta.r0_size) { 13116 ptr_type_name = btf_name_by_offset(desc_btf, 13117 ptr_type->name_off); 13118 verbose(env, 13119 "kernel function %s returns pointer type %s %s is not supported\n", 13120 func_name, 13121 btf_type_str(ptr_type), 13122 ptr_type_name); 13123 return -EINVAL; 13124 } 13125 13126 mark_reg_known_zero(env, regs, BPF_REG_0); 13127 regs[BPF_REG_0].type = PTR_TO_MEM; 13128 regs[BPF_REG_0].mem_size = meta.r0_size; 13129 13130 if (meta.r0_rdonly) 13131 regs[BPF_REG_0].type |= MEM_RDONLY; 13132 13133 /* Ensures we don't access the memory after a release_reference() */ 13134 if (meta.ref_obj.id) { 13135 err = validate_ref_obj(env, &meta.ref_obj); 13136 if (err) 13137 return err; 13138 regs[BPF_REG_0].parent_id = meta.ref_obj.id; 13139 } 13140 13141 if (is_kfunc_rcu_protected(&meta)) 13142 regs[BPF_REG_0].type |= MEM_RCU; 13143 } else { 13144 enum bpf_reg_type type = PTR_TO_BTF_ID; 13145 13146 if (meta.func_id == special_kfunc_list[KF_bpf_get_kmem_cache]) 13147 type |= PTR_UNTRUSTED; 13148 else if (is_kfunc_rcu_protected(&meta) || 13149 (bpf_is_iter_next_kfunc(&meta) && 13150 (get_iter_from_state(env->cur_state, &meta) 13151 ->type & MEM_RCU))) { 13152 /* 13153 * If the iterator's constructor (the _new 13154 * function e.g., bpf_iter_task_new) has been 13155 * annotated with BPF kfunc flag 13156 * KF_RCU_PROTECTED and was called within a RCU 13157 * read-side critical section, also propagate 13158 * the MEM_RCU flag to the pointer returned from 13159 * the iterator's next function (e.g., 13160 * bpf_iter_task_next). 13161 */ 13162 type |= MEM_RCU; 13163 } else { 13164 /* 13165 * Any PTR_TO_BTF_ID that is returned from a BPF 13166 * kfunc should by default be treated as 13167 * implicitly trusted. 13168 */ 13169 type |= PTR_TRUSTED; 13170 } 13171 13172 mark_reg_known_zero(env, regs, BPF_REG_0); 13173 regs[BPF_REG_0].btf = desc_btf; 13174 regs[BPF_REG_0].type = type; 13175 regs[BPF_REG_0].btf_id = ptr_type_id; 13176 } 13177 13178 if (is_kfunc_ret_null(&meta)) { 13179 regs[BPF_REG_0].type |= PTR_MAYBE_NULL; 13180 /* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */ 13181 regs[BPF_REG_0].id = ++env->id_gen; 13182 } 13183 mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *)); 13184 if (is_kfunc_acquire(&meta)) { 13185 id = acquire_reference(env, insn_idx, 0); 13186 if (id < 0) 13187 return id; 13188 regs[BPF_REG_0].id = id; 13189 } else if (is_rbtree_node_type(ptr_type) || is_list_node_type(ptr_type)) { 13190 ref_set_non_owning(env, ®s[BPF_REG_0]); 13191 } 13192 13193 if (reg_may_point_to_spin_lock(®s[BPF_REG_0]) && !regs[BPF_REG_0].id) 13194 regs[BPF_REG_0].id = ++env->id_gen; 13195 } else if (btf_type_is_void(t)) { 13196 if (meta.btf == btf_vmlinux) { 13197 if (is_bpf_obj_drop_kfunc(meta.func_id) || 13198 is_bpf_percpu_obj_drop_kfunc(meta.func_id)) { 13199 insn_aux->kptr_struct_meta = 13200 btf_find_struct_meta(meta.arg_btf, 13201 meta.arg_btf_id); 13202 } 13203 } 13204 } 13205 13206 if (bpf_is_kfunc_pkt_changing(&meta)) 13207 clear_all_pkt_pointers(env); 13208 13209 nargs = btf_type_vlen(meta.func_proto); 13210 if (nargs > MAX_BPF_FUNC_REG_ARGS) { 13211 struct bpf_func_state *caller = cur_func(env); 13212 struct bpf_subprog_info *caller_info = &env->subprog_info[caller->subprogno]; 13213 u16 out_stack_arg_cnt = nargs - MAX_BPF_FUNC_REG_ARGS; 13214 u16 stack_arg_cnt = bpf_in_stack_arg_cnt(caller_info) + out_stack_arg_cnt; 13215 13216 if (stack_arg_cnt > caller_info->stack_arg_cnt) 13217 caller_info->stack_arg_cnt = stack_arg_cnt; 13218 } 13219 13220 args = (const struct btf_param *)(meta.func_proto + 1); 13221 for (i = 0; i < min_t(int, nargs, MAX_BPF_FUNC_REG_ARGS); i++) { 13222 u32 regno = i + 1; 13223 13224 t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL); 13225 if (btf_type_is_ptr(t)) 13226 mark_btf_func_reg_size(env, regno, sizeof(void *)); 13227 else 13228 /* scalar. ensured by check_kfunc_args() */ 13229 mark_btf_func_reg_size(env, regno, t->size); 13230 } 13231 13232 if (bpf_is_iter_next_kfunc(&meta)) { 13233 err = process_iter_next_call(env, insn_idx, &meta); 13234 if (err) 13235 return err; 13236 } 13237 13238 if (meta.func_id == special_kfunc_list[KF_bpf_session_cookie]) 13239 env->prog->call_session_cookie = true; 13240 13241 if (bpf_is_throw_kfunc(insn)) 13242 return process_bpf_exit_full(env, NULL, true); 13243 13244 return 0; 13245 } 13246 13247 static bool check_reg_sane_offset_scalar(struct bpf_verifier_env *env, 13248 const struct bpf_reg_state *reg, 13249 enum bpf_reg_type type) 13250 { 13251 bool known = tnum_is_const(reg->var_off); 13252 s64 val = reg->var_off.value; 13253 s64 smin = reg_smin(reg); 13254 13255 if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) { 13256 verbose(env, "math between %s pointer and %lld is not allowed\n", 13257 reg_type_str(env, type), val); 13258 return false; 13259 } 13260 13261 if (smin == S64_MIN) { 13262 verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n", 13263 reg_type_str(env, type)); 13264 return false; 13265 } 13266 13267 if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) { 13268 verbose(env, "value %lld makes %s pointer be out of bounds\n", 13269 smin, reg_type_str(env, type)); 13270 return false; 13271 } 13272 13273 return true; 13274 } 13275 13276 static bool check_reg_sane_offset_ptr(struct bpf_verifier_env *env, 13277 const struct bpf_reg_state *reg, 13278 enum bpf_reg_type type) 13279 { 13280 bool known = tnum_is_const(reg->var_off); 13281 s64 val = reg->var_off.value; 13282 s64 smin = reg_smin(reg); 13283 13284 if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) { 13285 verbose(env, "%s pointer offset %lld is not allowed\n", 13286 reg_type_str(env, type), val); 13287 return false; 13288 } 13289 13290 if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) { 13291 verbose(env, "%s pointer offset %lld is not allowed\n", 13292 reg_type_str(env, type), smin); 13293 return false; 13294 } 13295 13296 return true; 13297 } 13298 13299 enum { 13300 REASON_BOUNDS = -1, 13301 REASON_TYPE = -2, 13302 REASON_PATHS = -3, 13303 REASON_LIMIT = -4, 13304 REASON_STACK = -5, 13305 }; 13306 13307 static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, 13308 u32 *alu_limit, bool mask_to_left) 13309 { 13310 u32 max = 0, ptr_limit = 0; 13311 13312 switch (ptr_reg->type) { 13313 case PTR_TO_STACK: 13314 /* Offset 0 is out-of-bounds, but acceptable start for the 13315 * left direction, see BPF_REG_FP. Also, unknown scalar 13316 * offset where we would need to deal with min/max bounds is 13317 * currently prohibited for unprivileged. 13318 */ 13319 max = MAX_BPF_STACK + mask_to_left; 13320 ptr_limit = -ptr_reg->var_off.value; 13321 break; 13322 case PTR_TO_MAP_VALUE: 13323 max = ptr_reg->map_ptr->value_size; 13324 ptr_limit = mask_to_left ? reg_smin(ptr_reg) : reg_umax(ptr_reg); 13325 break; 13326 default: 13327 return REASON_TYPE; 13328 } 13329 13330 if (ptr_limit >= max) 13331 return REASON_LIMIT; 13332 *alu_limit = ptr_limit; 13333 return 0; 13334 } 13335 13336 static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env, 13337 const struct bpf_insn *insn) 13338 { 13339 return env->bypass_spec_v1 || 13340 BPF_SRC(insn->code) == BPF_K || 13341 cur_aux(env)->nospec; 13342 } 13343 13344 static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux, 13345 u32 alu_state, u32 alu_limit) 13346 { 13347 /* If we arrived here from different branches with different 13348 * state or limits to sanitize, then this won't work. 13349 */ 13350 if (aux->alu_state && 13351 (aux->alu_state != alu_state || 13352 aux->alu_limit != alu_limit)) 13353 return REASON_PATHS; 13354 13355 /* Corresponding fixup done in do_misc_fixups(). */ 13356 aux->alu_state = alu_state; 13357 aux->alu_limit = alu_limit; 13358 return 0; 13359 } 13360 13361 static int sanitize_val_alu(struct bpf_verifier_env *env, 13362 struct bpf_insn *insn) 13363 { 13364 struct bpf_insn_aux_data *aux = cur_aux(env); 13365 13366 if (can_skip_alu_sanitation(env, insn)) 13367 return 0; 13368 13369 return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0); 13370 } 13371 13372 static bool sanitize_needed(u8 opcode) 13373 { 13374 return opcode == BPF_ADD || opcode == BPF_SUB; 13375 } 13376 13377 struct bpf_sanitize_info { 13378 struct bpf_insn_aux_data aux; 13379 bool mask_to_left; 13380 }; 13381 13382 static int sanitize_speculative_path(struct bpf_verifier_env *env, 13383 const struct bpf_insn *insn, 13384 u32 next_idx, u32 curr_idx) 13385 { 13386 struct bpf_verifier_state *branch; 13387 struct bpf_reg_state *regs; 13388 13389 branch = push_stack(env, next_idx, curr_idx, true); 13390 if (!IS_ERR(branch) && insn) { 13391 regs = branch->frame[branch->curframe]->regs; 13392 if (BPF_SRC(insn->code) == BPF_K) { 13393 mark_reg_unknown(env, regs, insn->dst_reg); 13394 } else if (BPF_SRC(insn->code) == BPF_X) { 13395 mark_reg_unknown(env, regs, insn->dst_reg); 13396 mark_reg_unknown(env, regs, insn->src_reg); 13397 } 13398 } 13399 return PTR_ERR_OR_ZERO(branch); 13400 } 13401 13402 static int sanitize_ptr_alu(struct bpf_verifier_env *env, 13403 struct bpf_insn *insn, 13404 const struct bpf_reg_state *ptr_reg, 13405 const struct bpf_reg_state *off_reg, 13406 struct bpf_reg_state *dst_reg, 13407 struct bpf_sanitize_info *info, 13408 const bool commit_window) 13409 { 13410 struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux; 13411 struct bpf_verifier_state *vstate = env->cur_state; 13412 bool off_is_imm = tnum_is_const(off_reg->var_off); 13413 bool off_is_neg = reg_smin(off_reg) < 0; 13414 bool ptr_is_dst_reg = ptr_reg == dst_reg; 13415 u8 opcode = BPF_OP(insn->code); 13416 u32 alu_state, alu_limit; 13417 struct bpf_reg_state tmp; 13418 int err; 13419 13420 if (can_skip_alu_sanitation(env, insn)) 13421 return 0; 13422 13423 /* We already marked aux for masking from non-speculative 13424 * paths, thus we got here in the first place. We only care 13425 * to explore bad access from here. 13426 */ 13427 if (vstate->speculative) 13428 goto do_sim; 13429 13430 if (!commit_window) { 13431 if (!tnum_is_const(off_reg->var_off) && 13432 (reg_smin(off_reg) < 0) != (reg_smax(off_reg) < 0)) 13433 return REASON_BOUNDS; 13434 13435 info->mask_to_left = (opcode == BPF_ADD && off_is_neg) || 13436 (opcode == BPF_SUB && !off_is_neg); 13437 } 13438 13439 err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left); 13440 if (err < 0) 13441 return err; 13442 13443 if (commit_window) { 13444 /* In commit phase we narrow the masking window based on 13445 * the observed pointer move after the simulated operation. 13446 */ 13447 alu_state = info->aux.alu_state; 13448 alu_limit = abs(info->aux.alu_limit - alu_limit); 13449 } else { 13450 alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0; 13451 alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0; 13452 alu_state |= ptr_is_dst_reg ? 13453 BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST; 13454 13455 /* Limit pruning on unknown scalars to enable deep search for 13456 * potential masking differences from other program paths. 13457 */ 13458 if (!off_is_imm) 13459 env->explore_alu_limits = true; 13460 } 13461 13462 err = update_alu_sanitation_state(aux, alu_state, alu_limit); 13463 if (err < 0) 13464 return err; 13465 do_sim: 13466 /* If we're in commit phase, we're done here given we already 13467 * pushed the truncated dst_reg into the speculative verification 13468 * stack. 13469 * 13470 * Also, when register is a known constant, we rewrite register-based 13471 * operation to immediate-based, and thus do not need masking (and as 13472 * a consequence, do not need to simulate the zero-truncation either). 13473 */ 13474 if (commit_window || off_is_imm) 13475 return 0; 13476 13477 /* Simulate and find potential out-of-bounds access under 13478 * speculative execution from truncation as a result of 13479 * masking when off was not within expected range. If off 13480 * sits in dst, then we temporarily need to move ptr there 13481 * to simulate dst (== 0) +/-= ptr. Needed, for example, 13482 * for cases where we use K-based arithmetic in one direction 13483 * and truncated reg-based in the other in order to explore 13484 * bad access. 13485 */ 13486 if (!ptr_is_dst_reg) { 13487 tmp = *dst_reg; 13488 *dst_reg = *ptr_reg; 13489 } 13490 err = sanitize_speculative_path(env, NULL, env->insn_idx + 1, env->insn_idx); 13491 if (err < 0) 13492 return REASON_STACK; 13493 if (!ptr_is_dst_reg) 13494 *dst_reg = tmp; 13495 return 0; 13496 } 13497 13498 static void sanitize_mark_insn_seen(struct bpf_verifier_env *env) 13499 { 13500 struct bpf_verifier_state *vstate = env->cur_state; 13501 13502 /* If we simulate paths under speculation, we don't update the 13503 * insn as 'seen' such that when we verify unreachable paths in 13504 * the non-speculative domain, sanitize_dead_code() can still 13505 * rewrite/sanitize them. 13506 */ 13507 if (!vstate->speculative) 13508 env->insn_aux_data[env->insn_idx].seen = env->pass_cnt; 13509 } 13510 13511 static int sanitize_err(struct bpf_verifier_env *env, 13512 const struct bpf_insn *insn, int reason, 13513 const struct bpf_reg_state *off_reg, 13514 const struct bpf_reg_state *dst_reg) 13515 { 13516 static const char *err = "pointer arithmetic with it prohibited for !root"; 13517 const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub"; 13518 u32 dst = insn->dst_reg, src = insn->src_reg; 13519 13520 switch (reason) { 13521 case REASON_BOUNDS: 13522 verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n", 13523 off_reg == dst_reg ? dst : src, err); 13524 break; 13525 case REASON_TYPE: 13526 verbose(env, "R%d has pointer with unsupported alu operation, %s\n", 13527 off_reg == dst_reg ? src : dst, err); 13528 break; 13529 case REASON_PATHS: 13530 verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n", 13531 dst, op, err); 13532 break; 13533 case REASON_LIMIT: 13534 verbose(env, "R%d tried to %s beyond pointer bounds, %s\n", 13535 dst, op, err); 13536 break; 13537 case REASON_STACK: 13538 verbose(env, "R%d could not be pushed for speculative verification, %s\n", 13539 dst, err); 13540 return -ENOMEM; 13541 default: 13542 verifier_bug(env, "unknown reason (%d)", reason); 13543 break; 13544 } 13545 13546 return -EACCES; 13547 } 13548 13549 /* check that stack access falls within stack limits and that 'reg' doesn't 13550 * have a variable offset. 13551 * 13552 * Variable offset is prohibited for unprivileged mode for simplicity since it 13553 * requires corresponding support in Spectre masking for stack ALU. See also 13554 * retrieve_ptr_limit(). 13555 */ 13556 static int check_stack_access_for_ptr_arithmetic( 13557 struct bpf_verifier_env *env, 13558 int regno, 13559 const struct bpf_reg_state *reg, 13560 int off) 13561 { 13562 if (!tnum_is_const(reg->var_off)) { 13563 char tn_buf[48]; 13564 13565 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 13566 verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n", 13567 regno, tn_buf, off); 13568 return -EACCES; 13569 } 13570 13571 if (off >= 0 || off < -MAX_BPF_STACK) { 13572 verbose(env, "R%d stack pointer arithmetic goes out of range, " 13573 "prohibited for !root; off=%d\n", regno, off); 13574 return -EACCES; 13575 } 13576 13577 return 0; 13578 } 13579 13580 static int sanitize_check_bounds(struct bpf_verifier_env *env, 13581 const struct bpf_insn *insn, 13582 struct bpf_reg_state *dst_reg) 13583 { 13584 u32 dst = insn->dst_reg; 13585 13586 /* For unprivileged we require that resulting offset must be in bounds 13587 * in order to be able to sanitize access later on. 13588 */ 13589 if (env->bypass_spec_v1) 13590 return 0; 13591 13592 switch (dst_reg->type) { 13593 case PTR_TO_STACK: 13594 if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg, 13595 dst_reg->var_off.value)) 13596 return -EACCES; 13597 break; 13598 case PTR_TO_MAP_VALUE: 13599 if (check_map_access(env, dst_reg, argno_from_reg(dst), 0, 1, false, ACCESS_HELPER)) { 13600 verbose(env, "R%d pointer arithmetic of map value goes out of range, " 13601 "prohibited for !root\n", dst); 13602 return -EACCES; 13603 } 13604 break; 13605 default: 13606 return -EOPNOTSUPP; 13607 } 13608 13609 return 0; 13610 } 13611 13612 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off. 13613 * Caller should also handle BPF_MOV case separately. 13614 * If we return -EACCES, caller may want to try again treating pointer as a 13615 * scalar. So we only emit a diagnostic if !env->allow_ptr_leaks. 13616 */ 13617 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, 13618 struct bpf_insn *insn, 13619 const struct bpf_reg_state *ptr_reg, 13620 const struct bpf_reg_state *off_reg) 13621 { 13622 struct bpf_verifier_state *vstate = env->cur_state; 13623 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 13624 struct bpf_reg_state *regs = state->regs, *dst_reg; 13625 bool known = tnum_is_const(off_reg->var_off); 13626 s64 smin_val = reg_smin(off_reg), smax_val = reg_smax(off_reg); 13627 u64 umin_val = reg_umin(off_reg), umax_val = reg_umax(off_reg); 13628 struct bpf_sanitize_info info = {}; 13629 u8 opcode = BPF_OP(insn->code); 13630 u32 dst = insn->dst_reg; 13631 int ret, bounds_ret; 13632 13633 dst_reg = ®s[dst]; 13634 13635 if ((known && (smin_val != smax_val || umin_val != umax_val)) || 13636 smin_val > smax_val || umin_val > umax_val) { 13637 /* Taint dst register if offset had invalid bounds derived from 13638 * e.g. dead branches. 13639 */ 13640 __mark_reg_unknown(env, dst_reg); 13641 return 0; 13642 } 13643 13644 if (BPF_CLASS(insn->code) != BPF_ALU64) { 13645 /* 32-bit ALU ops on pointers produce (meaningless) scalars */ 13646 if (opcode == BPF_SUB && env->allow_ptr_leaks) { 13647 __mark_reg_unknown(env, dst_reg); 13648 return 0; 13649 } 13650 13651 verbose(env, 13652 "R%d 32-bit pointer arithmetic prohibited\n", 13653 dst); 13654 return -EACCES; 13655 } 13656 13657 if (ptr_reg->type & PTR_MAYBE_NULL) { 13658 verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n", 13659 dst, reg_type_str(env, ptr_reg->type)); 13660 return -EACCES; 13661 } 13662 13663 /* 13664 * Accesses to untrusted PTR_TO_MEM are done through probe 13665 * instructions, hence no need to track offsets. 13666 */ 13667 if (base_type(ptr_reg->type) == PTR_TO_MEM && (ptr_reg->type & PTR_UNTRUSTED)) 13668 return 0; 13669 13670 switch (base_type(ptr_reg->type)) { 13671 case PTR_TO_CTX: 13672 case PTR_TO_MAP_VALUE: 13673 case PTR_TO_MAP_KEY: 13674 case PTR_TO_STACK: 13675 case PTR_TO_PACKET_META: 13676 case PTR_TO_PACKET: 13677 case PTR_TO_TP_BUFFER: 13678 case PTR_TO_BTF_ID: 13679 case PTR_TO_MEM: 13680 case PTR_TO_BUF: 13681 case PTR_TO_FUNC: 13682 case CONST_PTR_TO_DYNPTR: 13683 break; 13684 case PTR_TO_FLOW_KEYS: 13685 if (known) 13686 break; 13687 fallthrough; 13688 case CONST_PTR_TO_MAP: 13689 /* smin_val represents the known value */ 13690 if (known && smin_val == 0 && opcode == BPF_ADD) 13691 break; 13692 fallthrough; 13693 default: 13694 verbose(env, "R%d pointer arithmetic on %s prohibited\n", 13695 dst, reg_type_str(env, ptr_reg->type)); 13696 return -EACCES; 13697 } 13698 13699 /* In case of 'scalar += pointer', dst_reg inherits pointer type and id. 13700 * The id may be overwritten later if we create a new variable offset. 13701 */ 13702 dst_reg->type = ptr_reg->type; 13703 dst_reg->id = ptr_reg->id; 13704 13705 if (!check_reg_sane_offset_scalar(env, off_reg, ptr_reg->type) || 13706 !check_reg_sane_offset_ptr(env, ptr_reg, ptr_reg->type)) 13707 return -EINVAL; 13708 13709 /* pointer types do not carry 32-bit bounds at the moment. */ 13710 __mark_reg32_unbounded(dst_reg); 13711 13712 if (sanitize_needed(opcode)) { 13713 ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg, 13714 &info, false); 13715 if (ret < 0) 13716 return sanitize_err(env, insn, ret, off_reg, dst_reg); 13717 } 13718 13719 switch (opcode) { 13720 case BPF_ADD: 13721 /* 13722 * dst_reg gets the pointer type and since some positive 13723 * integer value was added to the pointer, give it a new 'id' 13724 * if it's a PTR_TO_PACKET. 13725 * this creates a new 'base' pointer, off_reg (variable) gets 13726 * added into the variable offset, and we copy the fixed offset 13727 * from ptr_reg. 13728 */ 13729 dst_reg->r64 = cnum64_add(ptr_reg->r64, off_reg->r64); 13730 dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off); 13731 dst_reg->raw = ptr_reg->raw; 13732 if (reg_is_pkt_pointer(ptr_reg)) { 13733 if (!known) 13734 dst_reg->id = ++env->id_gen; 13735 /* 13736 * Clear range for unknown addends since we can't know 13737 * where the pkt pointer ended up. Also clear AT_PKT_END / 13738 * BEYOND_PKT_END from prior comparison as any pointer 13739 * arithmetic invalidates them. 13740 */ 13741 if (!known || dst_reg->range < 0) 13742 memset(&dst_reg->raw, 0, sizeof(dst_reg->raw)); 13743 } 13744 break; 13745 case BPF_SUB: 13746 if (dst_reg == off_reg) { 13747 /* scalar -= pointer. Creates an unknown scalar */ 13748 verbose(env, "R%d tried to subtract pointer from scalar\n", 13749 dst); 13750 return -EACCES; 13751 } 13752 /* We don't allow subtraction from FP, because (according to 13753 * test_verifier.c test "invalid fp arithmetic", JITs might not 13754 * be able to deal with it. 13755 */ 13756 if (ptr_reg->type == PTR_TO_STACK) { 13757 verbose(env, "R%d subtraction from stack pointer prohibited\n", 13758 dst); 13759 return -EACCES; 13760 } 13761 dst_reg->r64 = cnum64_add(ptr_reg->r64, cnum64_negate(off_reg->r64)); 13762 dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off); 13763 dst_reg->raw = ptr_reg->raw; 13764 if (reg_is_pkt_pointer(ptr_reg)) { 13765 if (!known) 13766 dst_reg->id = ++env->id_gen; 13767 /* 13768 * Clear range if the subtrahend may be negative since 13769 * pkt pointer could move past its bounds. A positive 13770 * subtrahend moves it backwards keeping positive range 13771 * intact. Also clear AT_PKT_END / BEYOND_PKT_END from 13772 * prior comparison as arithmetic invalidates them. 13773 */ 13774 if ((!known && smin_val < 0) || dst_reg->range < 0) 13775 memset(&dst_reg->raw, 0, sizeof(dst_reg->raw)); 13776 } 13777 break; 13778 case BPF_AND: 13779 case BPF_OR: 13780 case BPF_XOR: 13781 /* bitwise ops on pointers are troublesome, prohibit. */ 13782 verbose(env, "R%d bitwise operator %s on pointer prohibited\n", 13783 dst, bpf_alu_string[opcode >> 4]); 13784 return -EACCES; 13785 default: 13786 /* other operators (e.g. MUL,LSH) produce non-pointer results */ 13787 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n", 13788 dst, bpf_alu_string[opcode >> 4]); 13789 return -EACCES; 13790 } 13791 13792 if (!check_reg_sane_offset_ptr(env, dst_reg, ptr_reg->type)) 13793 return -EINVAL; 13794 reg_bounds_sync(dst_reg); 13795 bounds_ret = sanitize_check_bounds(env, insn, dst_reg); 13796 if (bounds_ret == -EACCES) 13797 return bounds_ret; 13798 if (sanitize_needed(opcode)) { 13799 ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg, 13800 &info, true); 13801 if (verifier_bug_if(!can_skip_alu_sanitation(env, insn) 13802 && !env->cur_state->speculative 13803 && bounds_ret 13804 && !ret, 13805 env, "Pointer type unsupported by sanitize_check_bounds() not rejected by retrieve_ptr_limit() as required")) { 13806 return -EFAULT; 13807 } 13808 if (ret < 0) 13809 return sanitize_err(env, insn, ret, off_reg, dst_reg); 13810 } 13811 13812 return 0; 13813 } 13814 13815 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg, 13816 struct bpf_reg_state *src_reg) 13817 { 13818 dst_reg->r32 = cnum32_add(dst_reg->r32, src_reg->r32); 13819 } 13820 13821 static void scalar_min_max_add(struct bpf_reg_state *dst_reg, 13822 struct bpf_reg_state *src_reg) 13823 { 13824 dst_reg->r64 = cnum64_add(dst_reg->r64, src_reg->r64); 13825 } 13826 13827 static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg, 13828 struct bpf_reg_state *src_reg) 13829 { 13830 dst_reg->r32 = cnum32_add(dst_reg->r32, cnum32_negate(src_reg->r32)); 13831 } 13832 13833 static void scalar_min_max_sub(struct bpf_reg_state *dst_reg, 13834 struct bpf_reg_state *src_reg) 13835 { 13836 dst_reg->r64 = cnum64_add(dst_reg->r64, cnum64_negate(src_reg->r64)); 13837 } 13838 13839 static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg, 13840 struct bpf_reg_state *src_reg) 13841 { 13842 s32 smin = reg_s32_min(dst_reg); 13843 s32 smax = reg_s32_max(dst_reg); 13844 u32 umin = reg_u32_min(dst_reg); 13845 u32 umax = reg_u32_max(dst_reg); 13846 s32 tmp_prod[4]; 13847 13848 if (check_mul_overflow(umax, reg_u32_max(src_reg), &umax) || 13849 check_mul_overflow(umin, reg_u32_min(src_reg), &umin)) { 13850 /* Overflow possible, we know nothing */ 13851 umin = 0; 13852 umax = U32_MAX; 13853 } 13854 if (check_mul_overflow(smin, reg_s32_min(src_reg), &tmp_prod[0]) || 13855 check_mul_overflow(smin, reg_s32_max(src_reg), &tmp_prod[1]) || 13856 check_mul_overflow(smax, reg_s32_min(src_reg), &tmp_prod[2]) || 13857 check_mul_overflow(smax, reg_s32_max(src_reg), &tmp_prod[3])) { 13858 /* Overflow possible, we know nothing */ 13859 smin = S32_MIN; 13860 smax = S32_MAX; 13861 } else { 13862 smin = min_array(tmp_prod, 4); 13863 smax = max_array(tmp_prod, 4); 13864 } 13865 13866 dst_reg->r32 = cnum32_intersect(cnum32_from_urange(umin, umax), 13867 cnum32_from_srange(smin, smax)); 13868 } 13869 13870 static void scalar_min_max_mul(struct bpf_reg_state *dst_reg, 13871 struct bpf_reg_state *src_reg) 13872 { 13873 s64 smin = reg_smin(dst_reg); 13874 s64 smax = reg_smax(dst_reg); 13875 u64 umin = reg_umin(dst_reg); 13876 u64 umax = reg_umax(dst_reg); 13877 s64 tmp_prod[4]; 13878 13879 if (check_mul_overflow(umax, reg_umax(src_reg), &umax) || 13880 check_mul_overflow(umin, reg_umin(src_reg), &umin)) { 13881 /* Overflow possible, we know nothing */ 13882 umin = 0; 13883 umax = U64_MAX; 13884 } 13885 if (check_mul_overflow(smin, reg_smin(src_reg), &tmp_prod[0]) || 13886 check_mul_overflow(smin, reg_smax(src_reg), &tmp_prod[1]) || 13887 check_mul_overflow(smax, reg_smin(src_reg), &tmp_prod[2]) || 13888 check_mul_overflow(smax, reg_smax(src_reg), &tmp_prod[3])) { 13889 /* Overflow possible, we know nothing */ 13890 smin = S64_MIN; 13891 smax = S64_MAX; 13892 } else { 13893 smin = min_array(tmp_prod, 4); 13894 smax = max_array(tmp_prod, 4); 13895 } 13896 13897 dst_reg->r64 = cnum64_intersect(cnum64_from_urange(umin, umax), 13898 cnum64_from_srange(smin, smax)); 13899 } 13900 13901 static void scalar32_min_max_udiv(struct bpf_reg_state *dst_reg, 13902 struct bpf_reg_state *src_reg) 13903 { 13904 u32 src_val = reg_u32_min(src_reg); /* non-zero, const divisor */ 13905 13906 reg_set_urange32(dst_reg, reg_u32_min(dst_reg) / src_val, 13907 reg_u32_max(dst_reg) / src_val); 13908 13909 /* Reset other ranges/tnum to unbounded/unknown. */ 13910 reset_reg64_and_tnum(dst_reg); 13911 } 13912 13913 static void scalar_min_max_udiv(struct bpf_reg_state *dst_reg, 13914 struct bpf_reg_state *src_reg) 13915 { 13916 u64 src_val = reg_umin(src_reg); /* non-zero, const divisor */ 13917 13918 reg_set_urange64(dst_reg, div64_u64(reg_umin(dst_reg), src_val), 13919 div64_u64(reg_umax(dst_reg), src_val)); 13920 13921 /* Reset other ranges/tnum to unbounded/unknown. */ 13922 reset_reg32_and_tnum(dst_reg); 13923 } 13924 13925 static void scalar32_min_max_sdiv(struct bpf_reg_state *dst_reg, 13926 struct bpf_reg_state *src_reg) 13927 { 13928 s32 smin = reg_s32_min(dst_reg); 13929 s32 smax = reg_s32_max(dst_reg); 13930 s32 src_val = reg_s32_min(src_reg); /* non-zero, const divisor */ 13931 s32 res1, res2; 13932 13933 /* BPF div specification: S32_MIN / -1 = S32_MIN */ 13934 if (smin == S32_MIN && src_val == -1) { 13935 /* 13936 * If the dividend range contains more than just S32_MIN, 13937 * we cannot precisely track the result, so it becomes unbounded. 13938 * e.g., [S32_MIN, S32_MIN+10]/(-1), 13939 * = {S32_MIN} U [-(S32_MIN+10), -(S32_MIN+1)] 13940 * = {S32_MIN} U [S32_MAX-9, S32_MAX] = [S32_MIN, S32_MAX] 13941 * Otherwise (if dividend is exactly S32_MIN), result remains S32_MIN. 13942 */ 13943 if (smax != S32_MIN) { 13944 smin = S32_MIN; 13945 smax = S32_MAX; 13946 } 13947 goto reset; 13948 } 13949 13950 res1 = smin / src_val; 13951 res2 = smax / src_val; 13952 smin = min(res1, res2); 13953 smax = max(res1, res2); 13954 13955 reset: 13956 reg_set_srange32(dst_reg, smin, smax); 13957 /* Reset other ranges/tnum to unbounded/unknown. */ 13958 reset_reg64_and_tnum(dst_reg); 13959 } 13960 13961 static void scalar_min_max_sdiv(struct bpf_reg_state *dst_reg, 13962 struct bpf_reg_state *src_reg) 13963 { 13964 s64 smin = reg_smin(dst_reg); 13965 s64 smax = reg_smax(dst_reg); 13966 s64 src_val = reg_smin(src_reg); /* non-zero, const divisor */ 13967 s64 res1, res2; 13968 13969 /* BPF div specification: S64_MIN / -1 = S64_MIN */ 13970 if (smin == S64_MIN && src_val == -1) { 13971 /* 13972 * If the dividend range contains more than just S64_MIN, 13973 * we cannot precisely track the result, so it becomes unbounded. 13974 * e.g., [S64_MIN, S64_MIN+10]/(-1), 13975 * = {S64_MIN} U [-(S64_MIN+10), -(S64_MIN+1)] 13976 * = {S64_MIN} U [S64_MAX-9, S64_MAX] = [S64_MIN, S64_MAX] 13977 * Otherwise (if dividend is exactly S64_MIN), result remains S64_MIN. 13978 */ 13979 if (smax != S64_MIN) { 13980 smin = S64_MIN; 13981 smax = S64_MAX; 13982 } 13983 goto reset; 13984 } 13985 13986 res1 = div64_s64(smin, src_val); 13987 res2 = div64_s64(smax, src_val); 13988 smin = min(res1, res2); 13989 smax = max(res1, res2); 13990 13991 reset: 13992 reg_set_srange64(dst_reg, smin, smax); 13993 /* Reset other ranges/tnum to unbounded/unknown. */ 13994 reset_reg32_and_tnum(dst_reg); 13995 } 13996 13997 static void scalar32_min_max_umod(struct bpf_reg_state *dst_reg, 13998 struct bpf_reg_state *src_reg) 13999 { 14000 u32 src_val = reg_u32_min(src_reg); /* non-zero, const divisor */ 14001 u32 res_max = src_val - 1; 14002 14003 /* 14004 * If dst_umax <= res_max, the result remains unchanged. 14005 * e.g., [2, 5] % 10 = [2, 5]. 14006 */ 14007 if (reg_u32_max(dst_reg) <= res_max) 14008 return; 14009 14010 reg_set_urange32(dst_reg, 0, min(reg_u32_max(dst_reg), res_max)); 14011 14012 /* Reset other ranges/tnum to unbounded/unknown. */ 14013 reset_reg64_and_tnum(dst_reg); 14014 } 14015 14016 static void scalar_min_max_umod(struct bpf_reg_state *dst_reg, 14017 struct bpf_reg_state *src_reg) 14018 { 14019 u64 src_val = reg_umin(src_reg); /* non-zero, const divisor */ 14020 u64 res_max = src_val - 1; 14021 14022 /* 14023 * If dst_umax <= res_max, the result remains unchanged. 14024 * e.g., [2, 5] % 10 = [2, 5]. 14025 */ 14026 if (reg_umax(dst_reg) <= res_max) 14027 return; 14028 14029 reg_set_urange64(dst_reg, 0, min(reg_umax(dst_reg), res_max)); 14030 14031 /* Reset other ranges/tnum to unbounded/unknown. */ 14032 reset_reg32_and_tnum(dst_reg); 14033 } 14034 14035 static void scalar32_min_max_smod(struct bpf_reg_state *dst_reg, 14036 struct bpf_reg_state *src_reg) 14037 { 14038 s32 src_val = reg_s32_min(src_reg); /* non-zero, const divisor */ 14039 14040 /* 14041 * Safe absolute value calculation: 14042 * If src_val == S32_MIN (-2147483648), src_abs becomes 2147483648. 14043 * Here use unsigned integer to avoid overflow. 14044 */ 14045 u32 src_abs = (src_val > 0) ? (u32)src_val : -(u32)src_val; 14046 14047 /* 14048 * Calculate the maximum possible absolute value of the result. 14049 * Even if src_abs is 2147483648 (S32_MIN), subtracting 1 gives 14050 * 2147483647 (S32_MAX), which fits perfectly in s32. 14051 */ 14052 s32 res_max_abs = src_abs - 1; 14053 14054 /* 14055 * If the dividend is already within the result range, 14056 * the result remains unchanged. e.g., [-2, 5] % 10 = [-2, 5]. 14057 */ 14058 if (reg_s32_min(dst_reg) >= -res_max_abs && reg_s32_max(dst_reg) <= res_max_abs) 14059 return; 14060 14061 /* General case: result has the same sign as the dividend. */ 14062 if (reg_s32_min(dst_reg) >= 0) { 14063 reg_set_srange32(dst_reg, 0, min(reg_s32_max(dst_reg), res_max_abs)); 14064 } else if (reg_s32_max(dst_reg) <= 0) { 14065 reg_set_srange32(dst_reg, max(reg_s32_min(dst_reg), -res_max_abs), 0); 14066 } else { 14067 reg_set_srange32(dst_reg, -res_max_abs, res_max_abs); 14068 } 14069 14070 /* Reset other ranges/tnum to unbounded/unknown. */ 14071 reset_reg64_and_tnum(dst_reg); 14072 } 14073 14074 static void scalar_min_max_smod(struct bpf_reg_state *dst_reg, 14075 struct bpf_reg_state *src_reg) 14076 { 14077 s64 src_val = reg_smin(src_reg); /* non-zero, const divisor */ 14078 14079 /* 14080 * Safe absolute value calculation: 14081 * If src_val == S64_MIN (-2^63), src_abs becomes 2^63. 14082 * Here use unsigned integer to avoid overflow. 14083 */ 14084 u64 src_abs = (src_val > 0) ? (u64)src_val : -(u64)src_val; 14085 14086 /* 14087 * Calculate the maximum possible absolute value of the result. 14088 * Even if src_abs is 2^63 (S64_MIN), subtracting 1 gives 14089 * 2^63 - 1 (S64_MAX), which fits perfectly in s64. 14090 */ 14091 s64 res_max_abs = src_abs - 1; 14092 14093 /* 14094 * If the dividend is already within the result range, 14095 * the result remains unchanged. e.g., [-2, 5] % 10 = [-2, 5]. 14096 */ 14097 if (reg_smin(dst_reg) >= -res_max_abs && reg_smax(dst_reg) <= res_max_abs) 14098 return; 14099 14100 /* General case: result has the same sign as the dividend. */ 14101 if (reg_smin(dst_reg) >= 0) { 14102 reg_set_srange64(dst_reg, 0, min(reg_smax(dst_reg), res_max_abs)); 14103 } else if (reg_smax(dst_reg) <= 0) { 14104 reg_set_srange64(dst_reg, max(reg_smin(dst_reg), -res_max_abs), 0); 14105 } else { 14106 reg_set_srange64(dst_reg, -res_max_abs, res_max_abs); 14107 } 14108 14109 /* Reset other ranges/tnum to unbounded/unknown. */ 14110 reset_reg32_and_tnum(dst_reg); 14111 } 14112 14113 static void scalar32_min_max_and(struct bpf_reg_state *dst_reg, 14114 struct bpf_reg_state *src_reg) 14115 { 14116 bool src_known = tnum_subreg_is_const(src_reg->var_off); 14117 bool dst_known = tnum_subreg_is_const(dst_reg->var_off); 14118 struct tnum var32_off = tnum_subreg(dst_reg->var_off); 14119 u32 umax_val = reg_u32_max(src_reg); 14120 14121 if (src_known && dst_known) { 14122 __mark_reg32_known(dst_reg, var32_off.value); 14123 return; 14124 } 14125 14126 /* We get our minimum from the var_off, since that's inherently 14127 * bitwise. Our maximum is the minimum of the operands' maxima. 14128 */ 14129 reg_set_urange32(dst_reg, 14130 var32_off.value, 14131 min(reg_u32_max(dst_reg), umax_val)); 14132 } 14133 14134 static void scalar_min_max_and(struct bpf_reg_state *dst_reg, 14135 struct bpf_reg_state *src_reg) 14136 { 14137 bool src_known = tnum_is_const(src_reg->var_off); 14138 bool dst_known = tnum_is_const(dst_reg->var_off); 14139 u64 umax_val = reg_umax(src_reg); 14140 14141 if (src_known && dst_known) { 14142 __mark_reg_known(dst_reg, dst_reg->var_off.value); 14143 return; 14144 } 14145 14146 /* We get our minimum from the var_off, since that's inherently 14147 * bitwise. Our maximum is the minimum of the operands' maxima. 14148 */ 14149 reg_set_urange64(dst_reg, 14150 dst_reg->var_off.value, 14151 min(reg_umax(dst_reg), umax_val)); 14152 14153 /* We may learn something more from the var_off */ 14154 __update_reg_bounds(dst_reg); 14155 } 14156 14157 static void scalar32_min_max_or(struct bpf_reg_state *dst_reg, 14158 struct bpf_reg_state *src_reg) 14159 { 14160 bool src_known = tnum_subreg_is_const(src_reg->var_off); 14161 bool dst_known = tnum_subreg_is_const(dst_reg->var_off); 14162 struct tnum var32_off = tnum_subreg(dst_reg->var_off); 14163 u32 umin_val = reg_u32_min(src_reg); 14164 14165 if (src_known && dst_known) { 14166 __mark_reg32_known(dst_reg, var32_off.value); 14167 return; 14168 } 14169 14170 /* We get our maximum from the var_off, and our minimum is the 14171 * maximum of the operands' minima 14172 */ 14173 reg_set_urange32(dst_reg, 14174 max(reg_u32_min(dst_reg), umin_val), 14175 var32_off.value | var32_off.mask); 14176 } 14177 14178 static void scalar_min_max_or(struct bpf_reg_state *dst_reg, 14179 struct bpf_reg_state *src_reg) 14180 { 14181 bool src_known = tnum_is_const(src_reg->var_off); 14182 bool dst_known = tnum_is_const(dst_reg->var_off); 14183 u64 umin_val = reg_umin(src_reg); 14184 14185 if (src_known && dst_known) { 14186 __mark_reg_known(dst_reg, dst_reg->var_off.value); 14187 return; 14188 } 14189 14190 /* We get our maximum from the var_off, and our minimum is the 14191 * maximum of the operands' minima 14192 */ 14193 reg_set_urange64(dst_reg, 14194 max(reg_umin(dst_reg), umin_val), 14195 dst_reg->var_off.value | dst_reg->var_off.mask); 14196 14197 /* We may learn something more from the var_off */ 14198 __update_reg_bounds(dst_reg); 14199 } 14200 14201 static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg, 14202 struct bpf_reg_state *src_reg) 14203 { 14204 bool src_known = tnum_subreg_is_const(src_reg->var_off); 14205 bool dst_known = tnum_subreg_is_const(dst_reg->var_off); 14206 struct tnum var32_off = tnum_subreg(dst_reg->var_off); 14207 14208 if (src_known && dst_known) { 14209 __mark_reg32_known(dst_reg, var32_off.value); 14210 return; 14211 } 14212 14213 /* We get both minimum and maximum from the var32_off. */ 14214 reg_set_urange32(dst_reg, var32_off.value, var32_off.value | var32_off.mask); 14215 } 14216 14217 static void scalar_min_max_xor(struct bpf_reg_state *dst_reg, 14218 struct bpf_reg_state *src_reg) 14219 { 14220 bool src_known = tnum_is_const(src_reg->var_off); 14221 bool dst_known = tnum_is_const(dst_reg->var_off); 14222 14223 if (src_known && dst_known) { 14224 /* dst_reg->var_off.value has been updated earlier */ 14225 __mark_reg_known(dst_reg, dst_reg->var_off.value); 14226 return; 14227 } 14228 14229 /* We get both minimum and maximum from the var_off. */ 14230 reg_set_urange64(dst_reg, 14231 dst_reg->var_off.value, 14232 dst_reg->var_off.value | dst_reg->var_off.mask); 14233 } 14234 14235 static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg, 14236 u64 umin_val, u64 umax_val) 14237 { 14238 /* If we might shift our top bit out, then we know nothing */ 14239 if (umax_val > 31 || reg_u32_max(dst_reg) > 1ULL << (31 - umax_val)) 14240 reg_set_urange32(dst_reg, 0, U32_MAX); 14241 else 14242 /* We lose all sign bit information (except what we can pick 14243 * up from var_off) 14244 */ 14245 reg_set_urange32(dst_reg, reg_u32_min(dst_reg) << umin_val, 14246 reg_u32_max(dst_reg) << umax_val); 14247 } 14248 14249 static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg, 14250 struct bpf_reg_state *src_reg) 14251 { 14252 u32 umax_val = reg_u32_max(src_reg); 14253 u32 umin_val = reg_u32_min(src_reg); 14254 /* u32 alu operation will zext upper bits */ 14255 struct tnum subreg = tnum_subreg(dst_reg->var_off); 14256 14257 __scalar32_min_max_lsh(dst_reg, umin_val, umax_val); 14258 dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val)); 14259 /* Not required but being careful mark reg64 bounds as unknown so 14260 * that we are forced to pick them up from tnum and zext later and 14261 * if some path skips this step we are still safe. 14262 */ 14263 __mark_reg64_unbounded(dst_reg); 14264 __update_reg32_bounds(dst_reg); 14265 } 14266 14267 static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg, 14268 u64 umin_val, u64 umax_val) 14269 { 14270 struct cnum64 u, s; 14271 14272 /* Special case <<32 because it is a common compiler pattern to sign 14273 * extend subreg by doing <<32 s>>32. smin/smax assignments are correct 14274 * because s32 bounds don't flip sign when shifting to the left by 14275 * 32bits. 14276 */ 14277 if (umin_val == 32 && umax_val == 32) 14278 s = cnum64_from_srange((s64)reg_s32_min(dst_reg) << 32, 14279 (s64)reg_s32_max(dst_reg) << 32); 14280 else 14281 s = CNUM64_UNBOUNDED; 14282 14283 /* If we might shift our top bit out, then we know nothing */ 14284 if (reg_umax(dst_reg) > 1ULL << (63 - umax_val)) 14285 u = CNUM64_UNBOUNDED; 14286 else 14287 u = cnum64_from_urange(reg_umin(dst_reg) << umin_val, 14288 reg_umax(dst_reg) << umax_val); 14289 14290 dst_reg->r64 = cnum64_intersect(u, s); 14291 } 14292 14293 static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg, 14294 struct bpf_reg_state *src_reg) 14295 { 14296 u64 umax_val = reg_umax(src_reg); 14297 u64 umin_val = reg_umin(src_reg); 14298 14299 /* scalar64 calc uses 32bit unshifted bounds so must be called first */ 14300 __scalar64_min_max_lsh(dst_reg, umin_val, umax_val); 14301 __scalar32_min_max_lsh(dst_reg, umin_val, umax_val); 14302 14303 dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val); 14304 /* We may learn something more from the var_off */ 14305 __update_reg_bounds(dst_reg); 14306 } 14307 14308 static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg, 14309 struct bpf_reg_state *src_reg) 14310 { 14311 struct tnum subreg = tnum_subreg(dst_reg->var_off); 14312 u32 umax_val = reg_u32_max(src_reg); 14313 u32 umin_val = reg_u32_min(src_reg); 14314 14315 /* BPF_RSH is an unsigned shift. If the value in dst_reg might 14316 * be negative, then either: 14317 * 1) src_reg might be zero, so the sign bit of the result is 14318 * unknown, so we lose our signed bounds 14319 * 2) it's known negative, thus the unsigned bounds capture the 14320 * signed bounds 14321 * 3) the signed bounds cross zero, so they tell us nothing 14322 * about the result 14323 * If the value in dst_reg is known nonnegative, then again the 14324 * unsigned bounds capture the signed bounds. 14325 * Thus, in all cases it suffices to blow away our signed bounds 14326 * and rely on inferring new ones from the unsigned bounds and 14327 * var_off of the result. 14328 */ 14329 14330 dst_reg->var_off = tnum_rshift(subreg, umin_val); 14331 reg_set_urange32(dst_reg, reg_u32_min(dst_reg) >> umax_val, 14332 reg_u32_max(dst_reg) >> umin_val); 14333 14334 __mark_reg64_unbounded(dst_reg); 14335 __update_reg32_bounds(dst_reg); 14336 } 14337 14338 static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg, 14339 struct bpf_reg_state *src_reg) 14340 { 14341 u64 umax_val = reg_umax(src_reg); 14342 u64 umin_val = reg_umin(src_reg); 14343 14344 /* BPF_RSH is an unsigned shift. If the value in dst_reg might 14345 * be negative, then either: 14346 * 1) src_reg might be zero, so the sign bit of the result is 14347 * unknown, so we lose our signed bounds 14348 * 2) it's known negative, thus the unsigned bounds capture the 14349 * signed bounds 14350 * 3) the signed bounds cross zero, so they tell us nothing 14351 * about the result 14352 * If the value in dst_reg is known nonnegative, then again the 14353 * unsigned bounds capture the signed bounds. 14354 * Thus, in all cases it suffices to blow away our signed bounds 14355 * and rely on inferring new ones from the unsigned bounds and 14356 * var_off of the result. 14357 */ 14358 dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val); 14359 reg_set_urange64(dst_reg, reg_umin(dst_reg) >> umax_val, 14360 reg_umax(dst_reg) >> umin_val); 14361 14362 /* Its not easy to operate on alu32 bounds here because it depends 14363 * on bits being shifted in. Take easy way out and mark unbounded 14364 * so we can recalculate later from tnum. 14365 */ 14366 __mark_reg32_unbounded(dst_reg); 14367 __update_reg_bounds(dst_reg); 14368 } 14369 14370 static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg, 14371 struct bpf_reg_state *src_reg) 14372 { 14373 u64 umin_val = reg_u32_min(src_reg); 14374 14375 /* Upon reaching here, src_known is true and 14376 * umax_val is equal to umin_val. 14377 * Blow away the dst_reg umin_value/umax_value and rely on 14378 * dst_reg var_off to refine the result. 14379 */ 14380 reg_set_srange32(dst_reg, 14381 (u32)(((s32)reg_s32_min(dst_reg)) >> umin_val), 14382 (u32)(((s32)reg_s32_max(dst_reg)) >> umin_val)); 14383 14384 dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32); 14385 14386 __mark_reg64_unbounded(dst_reg); 14387 __update_reg32_bounds(dst_reg); 14388 } 14389 14390 static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg, 14391 struct bpf_reg_state *src_reg) 14392 { 14393 u64 umin_val = reg_umin(src_reg); 14394 14395 /* Upon reaching here, src_known is true and umax_val is equal 14396 * to umin_val. 14397 */ 14398 reg_set_srange64(dst_reg, reg_smin(dst_reg) >> umin_val, 14399 reg_smax(dst_reg) >> umin_val); 14400 14401 dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64); 14402 14403 /* Its not easy to operate on alu32 bounds here because it depends 14404 * on bits being shifted in from upper 32-bits. Take easy way out 14405 * and mark unbounded so we can recalculate later from tnum. 14406 */ 14407 __mark_reg32_unbounded(dst_reg); 14408 __update_reg_bounds(dst_reg); 14409 } 14410 14411 static void scalar_byte_swap(struct bpf_reg_state *dst_reg, struct bpf_insn *insn) 14412 { 14413 /* 14414 * Byte swap operation - update var_off using tnum_bswap. 14415 * Three cases: 14416 * 1. bswap(16|32|64): opcode=0xd7 (BPF_END | BPF_ALU64 | BPF_TO_LE) 14417 * unconditional swap 14418 * 2. to_le(16|32|64): opcode=0xd4 (BPF_END | BPF_ALU | BPF_TO_LE) 14419 * swap on big-endian, truncation or no-op on little-endian 14420 * 3. to_be(16|32|64): opcode=0xdc (BPF_END | BPF_ALU | BPF_TO_BE) 14421 * swap on little-endian, truncation or no-op on big-endian 14422 */ 14423 14424 bool alu64 = BPF_CLASS(insn->code) == BPF_ALU64; 14425 bool to_le = BPF_SRC(insn->code) == BPF_TO_LE; 14426 bool is_big_endian; 14427 #ifdef CONFIG_CPU_BIG_ENDIAN 14428 is_big_endian = true; 14429 #else 14430 is_big_endian = false; 14431 #endif 14432 /* Apply bswap if alu64 or switch between big-endian and little-endian machines */ 14433 bool need_bswap = alu64 || (to_le == is_big_endian); 14434 14435 /* 14436 * If the register is mutated, manually reset its scalar ID to break 14437 * any existing ties and avoid incorrect bounds propagation. 14438 */ 14439 if (need_bswap || insn->imm == 16 || insn->imm == 32) 14440 clear_scalar_id(dst_reg); 14441 14442 if (need_bswap) { 14443 if (insn->imm == 16) 14444 dst_reg->var_off = tnum_bswap16(dst_reg->var_off); 14445 else if (insn->imm == 32) 14446 dst_reg->var_off = tnum_bswap32(dst_reg->var_off); 14447 else if (insn->imm == 64) 14448 dst_reg->var_off = tnum_bswap64(dst_reg->var_off); 14449 /* 14450 * Byteswap scrambles the range, so we must reset bounds. 14451 * Bounds will be re-derived from the new tnum later. 14452 */ 14453 __mark_reg_unbounded(dst_reg); 14454 } 14455 /* For bswap16/32, truncate dst register to match the swapped size */ 14456 if (insn->imm == 16 || insn->imm == 32) 14457 coerce_reg_to_size(dst_reg, insn->imm / 8); 14458 } 14459 14460 static bool is_safe_to_compute_dst_reg_range(struct bpf_insn *insn, 14461 const struct bpf_reg_state *src_reg) 14462 { 14463 bool src_is_const = false; 14464 u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; 14465 14466 if (insn_bitness == 32) { 14467 if (tnum_subreg_is_const(src_reg->var_off) 14468 && reg_s32_min(src_reg) == reg_s32_max(src_reg) 14469 && reg_u32_min(src_reg) == reg_u32_max(src_reg)) 14470 src_is_const = true; 14471 } else { 14472 if (tnum_is_const(src_reg->var_off) 14473 && reg_smin(src_reg) == reg_smax(src_reg) 14474 && reg_umin(src_reg) == reg_umax(src_reg)) 14475 src_is_const = true; 14476 } 14477 14478 switch (BPF_OP(insn->code)) { 14479 case BPF_ADD: 14480 case BPF_SUB: 14481 case BPF_NEG: 14482 case BPF_AND: 14483 case BPF_XOR: 14484 case BPF_OR: 14485 case BPF_MUL: 14486 case BPF_END: 14487 return true; 14488 14489 /* 14490 * Division and modulo operators range is only safe to compute when the 14491 * divisor is a constant. 14492 */ 14493 case BPF_DIV: 14494 case BPF_MOD: 14495 return src_is_const; 14496 14497 /* Shift operators range is only computable if shift dimension operand 14498 * is a constant. Shifts greater than 31 or 63 are undefined. This 14499 * includes shifts by a negative number. 14500 */ 14501 case BPF_LSH: 14502 case BPF_RSH: 14503 case BPF_ARSH: 14504 return (src_is_const && reg_umax(src_reg) < insn_bitness); 14505 default: 14506 return false; 14507 } 14508 } 14509 14510 static int maybe_fork_scalars(struct bpf_verifier_env *env, struct bpf_insn *insn, 14511 struct bpf_reg_state *dst_reg) 14512 { 14513 struct bpf_verifier_state *branch; 14514 struct bpf_reg_state *regs; 14515 bool alu32; 14516 14517 if (reg_smin(dst_reg) == -1 && reg_smax(dst_reg) == 0) 14518 alu32 = false; 14519 else if (reg_s32_min(dst_reg) == -1 && reg_s32_max(dst_reg) == 0) 14520 alu32 = true; 14521 else 14522 return 0; 14523 14524 branch = push_stack(env, env->insn_idx, env->insn_idx, false); 14525 if (IS_ERR(branch)) 14526 return PTR_ERR(branch); 14527 14528 regs = branch->frame[branch->curframe]->regs; 14529 if (alu32) { 14530 __mark_reg32_known(®s[insn->dst_reg], 0); 14531 __mark_reg32_known(dst_reg, -1ull); 14532 } else { 14533 __mark_reg_known(®s[insn->dst_reg], 0); 14534 __mark_reg_known(dst_reg, -1ull); 14535 } 14536 return 0; 14537 } 14538 14539 /* WARNING: This function does calculations on 64-bit values, but the actual 14540 * execution may occur on 32-bit values. Therefore, things like bitshifts 14541 * need extra checks in the 32-bit case. 14542 */ 14543 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, 14544 struct bpf_insn *insn, 14545 struct bpf_reg_state *dst_reg, 14546 struct bpf_reg_state src_reg) 14547 { 14548 u8 opcode = BPF_OP(insn->code); 14549 s16 off = insn->off; 14550 bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64); 14551 int ret; 14552 14553 if (!is_safe_to_compute_dst_reg_range(insn, &src_reg)) { 14554 __mark_reg_unknown(env, dst_reg); 14555 return 0; 14556 } 14557 14558 if (sanitize_needed(opcode)) { 14559 ret = sanitize_val_alu(env, insn); 14560 if (ret < 0) 14561 return sanitize_err(env, insn, ret, NULL, NULL); 14562 } 14563 14564 /* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops. 14565 * There are two classes of instructions: The first class we track both 14566 * alu32 and alu64 sign/unsigned bounds independently this provides the 14567 * greatest amount of precision when alu operations are mixed with jmp32 14568 * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD, 14569 * and BPF_OR. This is possible because these ops have fairly easy to 14570 * understand and calculate behavior in both 32-bit and 64-bit alu ops. 14571 * See alu32 verifier tests for examples. The second class of 14572 * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy 14573 * with regards to tracking sign/unsigned bounds because the bits may 14574 * cross subreg boundaries in the alu64 case. When this happens we mark 14575 * the reg unbounded in the subreg bound space and use the resulting 14576 * tnum to calculate an approximation of the sign/unsigned bounds. 14577 */ 14578 switch (opcode) { 14579 case BPF_ADD: 14580 scalar32_min_max_add(dst_reg, &src_reg); 14581 scalar_min_max_add(dst_reg, &src_reg); 14582 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off); 14583 break; 14584 case BPF_SUB: 14585 scalar32_min_max_sub(dst_reg, &src_reg); 14586 scalar_min_max_sub(dst_reg, &src_reg); 14587 dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off); 14588 break; 14589 case BPF_NEG: 14590 env->fake_reg[0] = *dst_reg; 14591 __mark_reg_known(dst_reg, 0); 14592 scalar32_min_max_sub(dst_reg, &env->fake_reg[0]); 14593 scalar_min_max_sub(dst_reg, &env->fake_reg[0]); 14594 dst_reg->var_off = tnum_neg(env->fake_reg[0].var_off); 14595 break; 14596 case BPF_MUL: 14597 dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off); 14598 scalar32_min_max_mul(dst_reg, &src_reg); 14599 scalar_min_max_mul(dst_reg, &src_reg); 14600 break; 14601 case BPF_DIV: 14602 /* BPF div specification: x / 0 = 0 */ 14603 if ((alu32 && reg_u32_min(&src_reg) == 0) || (!alu32 && reg_umin(&src_reg) == 0)) { 14604 ___mark_reg_known(dst_reg, 0); 14605 break; 14606 } 14607 if (alu32) 14608 if (off == 1) 14609 scalar32_min_max_sdiv(dst_reg, &src_reg); 14610 else 14611 scalar32_min_max_udiv(dst_reg, &src_reg); 14612 else 14613 if (off == 1) 14614 scalar_min_max_sdiv(dst_reg, &src_reg); 14615 else 14616 scalar_min_max_udiv(dst_reg, &src_reg); 14617 break; 14618 case BPF_MOD: 14619 /* BPF mod specification: x % 0 = x */ 14620 if ((alu32 && reg_u32_min(&src_reg) == 0) || (!alu32 && reg_umin(&src_reg) == 0)) 14621 break; 14622 if (alu32) 14623 if (off == 1) 14624 scalar32_min_max_smod(dst_reg, &src_reg); 14625 else 14626 scalar32_min_max_umod(dst_reg, &src_reg); 14627 else 14628 if (off == 1) 14629 scalar_min_max_smod(dst_reg, &src_reg); 14630 else 14631 scalar_min_max_umod(dst_reg, &src_reg); 14632 break; 14633 case BPF_AND: 14634 if (tnum_is_const(src_reg.var_off)) { 14635 ret = maybe_fork_scalars(env, insn, dst_reg); 14636 if (ret) 14637 return ret; 14638 } 14639 dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off); 14640 scalar32_min_max_and(dst_reg, &src_reg); 14641 scalar_min_max_and(dst_reg, &src_reg); 14642 break; 14643 case BPF_OR: 14644 if (tnum_is_const(src_reg.var_off)) { 14645 ret = maybe_fork_scalars(env, insn, dst_reg); 14646 if (ret) 14647 return ret; 14648 } 14649 dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off); 14650 scalar32_min_max_or(dst_reg, &src_reg); 14651 scalar_min_max_or(dst_reg, &src_reg); 14652 break; 14653 case BPF_XOR: 14654 dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off); 14655 scalar32_min_max_xor(dst_reg, &src_reg); 14656 scalar_min_max_xor(dst_reg, &src_reg); 14657 break; 14658 case BPF_LSH: 14659 if (alu32) 14660 scalar32_min_max_lsh(dst_reg, &src_reg); 14661 else 14662 scalar_min_max_lsh(dst_reg, &src_reg); 14663 break; 14664 case BPF_RSH: 14665 if (alu32) 14666 scalar32_min_max_rsh(dst_reg, &src_reg); 14667 else 14668 scalar_min_max_rsh(dst_reg, &src_reg); 14669 break; 14670 case BPF_ARSH: 14671 if (alu32) 14672 scalar32_min_max_arsh(dst_reg, &src_reg); 14673 else 14674 scalar_min_max_arsh(dst_reg, &src_reg); 14675 break; 14676 case BPF_END: 14677 scalar_byte_swap(dst_reg, insn); 14678 break; 14679 default: 14680 break; 14681 } 14682 14683 /* 14684 * ALU32 ops are zero extended into 64bit register. 14685 * 14686 * BPF_END is already handled inside the helper (truncation), 14687 * so skip zext here to avoid unexpected zero extension. 14688 * e.g., le64: opcode=(BPF_END|BPF_ALU|BPF_TO_LE), imm=0x40 14689 * This is a 64bit byte swap operation with alu32==true, 14690 * but we should not zero extend the result. 14691 */ 14692 if (alu32 && opcode != BPF_END) 14693 zext_32_to_64(dst_reg); 14694 reg_bounds_sync(dst_reg); 14695 return 0; 14696 } 14697 14698 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max 14699 * and var_off. 14700 */ 14701 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, 14702 struct bpf_insn *insn) 14703 { 14704 struct bpf_verifier_state *vstate = env->cur_state; 14705 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 14706 struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg; 14707 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0}; 14708 bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64); 14709 u8 opcode = BPF_OP(insn->code); 14710 int err; 14711 14712 dst_reg = ®s[insn->dst_reg]; 14713 if (BPF_SRC(insn->code) == BPF_X) 14714 src_reg = ®s[insn->src_reg]; 14715 else 14716 src_reg = NULL; 14717 14718 /* Case where at least one operand is an arena. */ 14719 if (dst_reg->type == PTR_TO_ARENA || (src_reg && src_reg->type == PTR_TO_ARENA)) { 14720 struct bpf_insn_aux_data *aux = cur_aux(env); 14721 14722 if (dst_reg->type != PTR_TO_ARENA) 14723 *dst_reg = *src_reg; 14724 14725 dst_reg->subreg_def = env->insn_idx + 1; 14726 14727 if (BPF_CLASS(insn->code) == BPF_ALU64) 14728 /* 14729 * 32-bit operations zero upper bits automatically. 14730 * 64-bit operations need to be converted to 32. 14731 */ 14732 aux->needs_zext = true; 14733 14734 /* Any arithmetic operations are allowed on arena pointers */ 14735 return 0; 14736 } 14737 14738 if (dst_reg->type != SCALAR_VALUE) 14739 ptr_reg = dst_reg; 14740 14741 if (BPF_SRC(insn->code) == BPF_X) { 14742 if (src_reg->type != SCALAR_VALUE) { 14743 if (dst_reg->type != SCALAR_VALUE) { 14744 /* Combining two pointers by any ALU op yields 14745 * an arbitrary scalar. Disallow all math except 14746 * pointer subtraction 14747 */ 14748 if (opcode == BPF_SUB && env->allow_ptr_leaks) { 14749 mark_reg_unknown(env, regs, insn->dst_reg); 14750 return 0; 14751 } 14752 verbose(env, "R%d pointer %s pointer prohibited\n", 14753 insn->dst_reg, 14754 bpf_alu_string[opcode >> 4]); 14755 return -EACCES; 14756 } else { 14757 /* scalar += pointer 14758 * This is legal, but we have to reverse our 14759 * src/dest handling in computing the range 14760 */ 14761 err = mark_chain_precision(env, insn->dst_reg); 14762 if (err) 14763 return err; 14764 return adjust_ptr_min_max_vals(env, insn, 14765 src_reg, dst_reg); 14766 } 14767 } else if (ptr_reg) { 14768 /* pointer += scalar */ 14769 err = mark_chain_precision(env, insn->src_reg); 14770 if (err) 14771 return err; 14772 return adjust_ptr_min_max_vals(env, insn, 14773 dst_reg, src_reg); 14774 } else if (dst_reg->precise) { 14775 /* if dst_reg is precise, src_reg should be precise as well */ 14776 err = mark_chain_precision(env, insn->src_reg); 14777 if (err) 14778 return err; 14779 } 14780 } else { 14781 /* Pretend the src is a reg with a known value, since we only 14782 * need to be able to read from this state. 14783 */ 14784 off_reg.type = SCALAR_VALUE; 14785 __mark_reg_known(&off_reg, insn->imm); 14786 src_reg = &off_reg; 14787 if (ptr_reg) /* pointer += K */ 14788 return adjust_ptr_min_max_vals(env, insn, 14789 ptr_reg, src_reg); 14790 } 14791 14792 /* Got here implies adding two SCALAR_VALUEs */ 14793 if (WARN_ON_ONCE(ptr_reg)) { 14794 print_verifier_state(env, vstate, vstate->curframe, true); 14795 verbose(env, "verifier internal error: unexpected ptr_reg\n"); 14796 return -EFAULT; 14797 } 14798 if (WARN_ON(!src_reg)) { 14799 print_verifier_state(env, vstate, vstate->curframe, true); 14800 verbose(env, "verifier internal error: no src_reg\n"); 14801 return -EFAULT; 14802 } 14803 /* 14804 * For alu32 linked register tracking, we need to check dst_reg's 14805 * umax_value before the ALU operation. After adjust_scalar_min_max_vals(), 14806 * alu32 ops will have zero-extended the result, making umax_value <= U32_MAX. 14807 */ 14808 u64 dst_umax = reg_umax(dst_reg); 14809 14810 err = adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg); 14811 if (err) 14812 return err; 14813 /* 14814 * Compilers can generate the code 14815 * r1 = r2 14816 * r1 += 0x1 14817 * if r2 < 1000 goto ... 14818 * use r1 in memory access 14819 * So remember constant delta between r2 and r1 and update r1 after 14820 * 'if' condition. 14821 */ 14822 if (env->bpf_capable && 14823 (BPF_OP(insn->code) == BPF_ADD || BPF_OP(insn->code) == BPF_SUB) && 14824 dst_reg->id && is_reg_const(src_reg, alu32) && 14825 !(BPF_SRC(insn->code) == BPF_X && insn->src_reg == insn->dst_reg)) { 14826 u64 val = reg_const_value(src_reg, alu32); 14827 s32 off; 14828 14829 if (!alu32 && ((s64)val < S32_MIN || (s64)val > S32_MAX)) 14830 goto clear_id; 14831 14832 if (alu32 && (dst_umax > U32_MAX)) 14833 goto clear_id; 14834 14835 off = (s32)val; 14836 14837 if (BPF_OP(insn->code) == BPF_SUB) { 14838 /* Negating S32_MIN would overflow */ 14839 if (off == S32_MIN) 14840 goto clear_id; 14841 off = -off; 14842 } 14843 14844 if (dst_reg->id & BPF_ADD_CONST) { 14845 /* 14846 * If the register already went through rX += val 14847 * we cannot accumulate another val into rx->off. 14848 */ 14849 clear_id: 14850 clear_scalar_id(dst_reg); 14851 } else { 14852 if (alu32) 14853 dst_reg->id |= BPF_ADD_CONST32; 14854 else 14855 dst_reg->id |= BPF_ADD_CONST64; 14856 dst_reg->delta = off; 14857 } 14858 } else { 14859 /* 14860 * Make sure ID is cleared otherwise dst_reg min/max could be 14861 * incorrectly propagated into other registers by sync_linked_regs() 14862 */ 14863 clear_scalar_id(dst_reg); 14864 } 14865 return 0; 14866 } 14867 14868 /* check validity of 32-bit and 64-bit arithmetic operations */ 14869 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) 14870 { 14871 struct bpf_reg_state *regs = cur_regs(env); 14872 u8 opcode = BPF_OP(insn->code); 14873 int err; 14874 14875 if (opcode == BPF_END || opcode == BPF_NEG) { 14876 /* check src operand */ 14877 err = check_reg_arg(env, insn->dst_reg, SRC_OP); 14878 if (err) 14879 return err; 14880 14881 if (is_pointer_value(env, insn->dst_reg)) { 14882 verbose(env, "R%d pointer arithmetic prohibited\n", 14883 insn->dst_reg); 14884 return -EACCES; 14885 } 14886 14887 /* check dest operand */ 14888 if (regs[insn->dst_reg].type == SCALAR_VALUE) { 14889 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); 14890 err = err ?: adjust_scalar_min_max_vals(env, insn, 14891 ®s[insn->dst_reg], 14892 regs[insn->dst_reg]); 14893 } else { 14894 err = check_reg_arg(env, insn->dst_reg, DST_OP); 14895 } 14896 if (err) 14897 return err; 14898 14899 } else if (opcode == BPF_MOV) { 14900 14901 if (BPF_SRC(insn->code) == BPF_X) { 14902 if (insn->off == BPF_ADDR_SPACE_CAST) { 14903 if (!env->prog->aux->arena) { 14904 verbose(env, "addr_space_cast insn can only be used in a program that has an associated arena\n"); 14905 return -EINVAL; 14906 } 14907 } 14908 14909 /* check src operand */ 14910 err = check_reg_arg(env, insn->src_reg, SRC_OP); 14911 if (err) 14912 return err; 14913 } 14914 14915 /* check dest operand, mark as required later */ 14916 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); 14917 if (err) 14918 return err; 14919 14920 if (BPF_SRC(insn->code) == BPF_X) { 14921 struct bpf_reg_state *src_reg = regs + insn->src_reg; 14922 struct bpf_reg_state *dst_reg = regs + insn->dst_reg; 14923 14924 if (BPF_CLASS(insn->code) == BPF_ALU64) { 14925 if (insn->imm) { 14926 /* off == BPF_ADDR_SPACE_CAST */ 14927 mark_reg_unknown(env, regs, insn->dst_reg); 14928 if (insn->imm == 1) { /* cast from as(1) to as(0) */ 14929 dst_reg->type = PTR_TO_ARENA; 14930 /* PTR_TO_ARENA is 32-bit */ 14931 dst_reg->subreg_def = env->insn_idx + 1; 14932 } 14933 } else if (insn->off == 0) { 14934 /* case: R1 = R2 14935 * copy register state to dest reg 14936 */ 14937 assign_scalar_id_before_mov(env, src_reg); 14938 *dst_reg = *src_reg; 14939 dst_reg->subreg_def = DEF_NOT_SUBREG; 14940 } else { 14941 /* case: R1 = (s8, s16 s32)R2 */ 14942 if (is_pointer_value(env, insn->src_reg)) { 14943 verbose(env, 14944 "R%d sign-extension part of pointer\n", 14945 insn->src_reg); 14946 return -EACCES; 14947 } else if (src_reg->type == SCALAR_VALUE) { 14948 bool no_sext; 14949 14950 no_sext = reg_umax(src_reg) < (1ULL << (insn->off - 1)); 14951 if (no_sext) 14952 assign_scalar_id_before_mov(env, src_reg); 14953 *dst_reg = *src_reg; 14954 if (!no_sext) 14955 clear_scalar_id(dst_reg); 14956 coerce_reg_to_size_sx(dst_reg, insn->off >> 3); 14957 dst_reg->subreg_def = DEF_NOT_SUBREG; 14958 } else { 14959 mark_reg_unknown(env, regs, insn->dst_reg); 14960 } 14961 } 14962 } else { 14963 /* R1 = (u32) R2 */ 14964 if (is_pointer_value(env, insn->src_reg)) { 14965 verbose(env, 14966 "R%d partial copy of pointer\n", 14967 insn->src_reg); 14968 return -EACCES; 14969 } else if (src_reg->type == SCALAR_VALUE) { 14970 if (insn->off == 0) { 14971 bool is_src_reg_u32 = get_reg_width(src_reg) <= 32; 14972 14973 if (is_src_reg_u32) 14974 assign_scalar_id_before_mov(env, src_reg); 14975 *dst_reg = *src_reg; 14976 /* Make sure ID is cleared if src_reg is not in u32 14977 * range otherwise dst_reg min/max could be incorrectly 14978 * propagated into src_reg by sync_linked_regs() 14979 */ 14980 if (!is_src_reg_u32) 14981 clear_scalar_id(dst_reg); 14982 dst_reg->subreg_def = env->insn_idx + 1; 14983 } else { 14984 /* case: W1 = (s8, s16)W2 */ 14985 bool no_sext = reg_umax(src_reg) < (1ULL << (insn->off - 1)); 14986 14987 if (no_sext) 14988 assign_scalar_id_before_mov(env, src_reg); 14989 *dst_reg = *src_reg; 14990 if (!no_sext) 14991 clear_scalar_id(dst_reg); 14992 dst_reg->subreg_def = env->insn_idx + 1; 14993 coerce_subreg_to_size_sx(dst_reg, insn->off >> 3); 14994 } 14995 } else { 14996 mark_reg_unknown(env, regs, 14997 insn->dst_reg); 14998 } 14999 zext_32_to_64(dst_reg); 15000 reg_bounds_sync(dst_reg); 15001 } 15002 } else { 15003 /* case: R = imm 15004 * remember the value we stored into this reg 15005 */ 15006 /* clear any state __mark_reg_known doesn't set */ 15007 mark_reg_unknown(env, regs, insn->dst_reg); 15008 regs[insn->dst_reg].type = SCALAR_VALUE; 15009 if (BPF_CLASS(insn->code) == BPF_ALU64) { 15010 __mark_reg_known(regs + insn->dst_reg, 15011 insn->imm); 15012 } else { 15013 __mark_reg_known(regs + insn->dst_reg, 15014 (u32)insn->imm); 15015 } 15016 } 15017 15018 } else { /* all other ALU ops: and, sub, xor, add, ... */ 15019 15020 if (BPF_SRC(insn->code) == BPF_X) { 15021 /* check src1 operand */ 15022 err = check_reg_arg(env, insn->src_reg, SRC_OP); 15023 if (err) 15024 return err; 15025 } 15026 15027 /* check src2 operand */ 15028 err = check_reg_arg(env, insn->dst_reg, SRC_OP); 15029 if (err) 15030 return err; 15031 15032 if ((opcode == BPF_MOD || opcode == BPF_DIV) && 15033 BPF_SRC(insn->code) == BPF_K && insn->imm == 0) { 15034 verbose(env, "div by zero\n"); 15035 return -EINVAL; 15036 } 15037 15038 if ((opcode == BPF_LSH || opcode == BPF_RSH || 15039 opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) { 15040 int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32; 15041 15042 if (insn->imm < 0 || insn->imm >= size) { 15043 verbose(env, "invalid shift %d\n", insn->imm); 15044 return -EINVAL; 15045 } 15046 } 15047 15048 /* check dest operand */ 15049 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); 15050 err = err ?: adjust_reg_min_max_vals(env, insn); 15051 if (err) 15052 return err; 15053 } 15054 15055 return reg_bounds_sanity_check(env, ®s[insn->dst_reg], "alu"); 15056 } 15057 15058 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, 15059 struct bpf_reg_state *dst_reg, 15060 enum bpf_reg_type type, 15061 bool range_right_open) 15062 { 15063 struct bpf_func_state *state; 15064 struct bpf_reg_state *reg; 15065 int new_range; 15066 15067 if (reg_umax(dst_reg) == 0 && range_right_open) 15068 /* This doesn't give us any range */ 15069 return; 15070 15071 if (reg_umax(dst_reg) > MAX_PACKET_OFF) 15072 /* Risk of overflow. For instance, ptr + (1<<63) may be less 15073 * than pkt_end, but that's because it's also less than pkt. 15074 */ 15075 return; 15076 15077 new_range = reg_umax(dst_reg); 15078 if (range_right_open) 15079 new_range++; 15080 15081 /* Examples for register markings: 15082 * 15083 * pkt_data in dst register: 15084 * 15085 * r2 = r3; 15086 * r2 += 8; 15087 * if (r2 > pkt_end) goto <handle exception> 15088 * <access okay> 15089 * 15090 * r2 = r3; 15091 * r2 += 8; 15092 * if (r2 < pkt_end) goto <access okay> 15093 * <handle exception> 15094 * 15095 * Where: 15096 * r2 == dst_reg, pkt_end == src_reg 15097 * r2=pkt(id=n,off=8,r=0) 15098 * r3=pkt(id=n,off=0,r=0) 15099 * 15100 * pkt_data in src register: 15101 * 15102 * r2 = r3; 15103 * r2 += 8; 15104 * if (pkt_end >= r2) goto <access okay> 15105 * <handle exception> 15106 * 15107 * r2 = r3; 15108 * r2 += 8; 15109 * if (pkt_end <= r2) goto <handle exception> 15110 * <access okay> 15111 * 15112 * Where: 15113 * pkt_end == dst_reg, r2 == src_reg 15114 * r2=pkt(id=n,off=8,r=0) 15115 * r3=pkt(id=n,off=0,r=0) 15116 * 15117 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8) 15118 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8) 15119 * and [r3, r3 + 8-1) respectively is safe to access depending on 15120 * the check. 15121 */ 15122 15123 /* If our ids match, then we must have the same max_value. And we 15124 * don't care about the other reg's fixed offset, since if it's too big 15125 * the range won't allow anything. 15126 * reg_umax(dst_reg) is known < MAX_PACKET_OFF, therefore it fits in a u16. 15127 */ 15128 bpf_for_each_reg_in_vstate(vstate, state, reg, ({ 15129 if (reg->type == type && reg->id == dst_reg->id) 15130 /* keep the maximum range already checked */ 15131 reg->range = max(reg->range, new_range); 15132 })); 15133 } 15134 15135 static void regs_refine_cond_op(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2, 15136 u8 opcode, bool is_jmp32); 15137 static u8 rev_opcode(u8 opcode); 15138 15139 /* 15140 * Learn more information about live branches by simulating refinement on both branches. 15141 * regs_refine_cond_op() is sound, so producing ill-formed register bounds for the branch means 15142 * that branch is dead. 15143 */ 15144 static int simulate_both_branches_taken(struct bpf_verifier_env *env, u8 opcode, bool is_jmp32) 15145 { 15146 /* Fallthrough (FALSE) branch */ 15147 regs_refine_cond_op(&env->false_reg1, &env->false_reg2, rev_opcode(opcode), is_jmp32); 15148 reg_bounds_sync(&env->false_reg1); 15149 reg_bounds_sync(&env->false_reg2); 15150 /* 15151 * If there is a range bounds violation in *any* of the abstract values in either 15152 * reg_states in the FALSE branch (i.e. reg1, reg2), the FALSE branch must be dead. Only 15153 * TRUE branch will be taken. 15154 */ 15155 if (range_bounds_violation(&env->false_reg1) || range_bounds_violation(&env->false_reg2)) 15156 return 1; 15157 15158 /* Jump (TRUE) branch */ 15159 regs_refine_cond_op(&env->true_reg1, &env->true_reg2, opcode, is_jmp32); 15160 reg_bounds_sync(&env->true_reg1); 15161 reg_bounds_sync(&env->true_reg2); 15162 /* 15163 * If there is a range bounds violation in *any* of the abstract values in either 15164 * reg_states in the TRUE branch (i.e. true_reg1, true_reg2), the TRUE branch must be dead. 15165 * Only FALSE branch will be taken. 15166 */ 15167 if (range_bounds_violation(&env->true_reg1) || range_bounds_violation(&env->true_reg2)) 15168 return 0; 15169 15170 /* Both branches are possible, we can't determine which one will be taken. */ 15171 return -1; 15172 } 15173 15174 /* 15175 * <reg1> <op> <reg2>, currently assuming reg2 is a constant 15176 */ 15177 static int is_scalar_branch_taken(struct bpf_verifier_env *env, struct bpf_reg_state *reg1, 15178 struct bpf_reg_state *reg2, u8 opcode, bool is_jmp32) 15179 { 15180 struct tnum t1 = is_jmp32 ? tnum_subreg(reg1->var_off) : reg1->var_off; 15181 struct tnum t2 = is_jmp32 ? tnum_subreg(reg2->var_off) : reg2->var_off; 15182 u64 umin1 = is_jmp32 ? (u64)reg_u32_min(reg1) : reg_umin(reg1); 15183 u64 umax1 = is_jmp32 ? (u64)reg_u32_max(reg1) : reg_umax(reg1); 15184 s64 smin1 = is_jmp32 ? (s64)reg_s32_min(reg1) : reg_smin(reg1); 15185 s64 smax1 = is_jmp32 ? (s64)reg_s32_max(reg1) : reg_smax(reg1); 15186 u64 umin2 = is_jmp32 ? (u64)reg_u32_min(reg2) : reg_umin(reg2); 15187 u64 umax2 = is_jmp32 ? (u64)reg_u32_max(reg2) : reg_umax(reg2); 15188 s64 smin2 = is_jmp32 ? (s64)reg_s32_min(reg2) : reg_smin(reg2); 15189 s64 smax2 = is_jmp32 ? (s64)reg_s32_max(reg2) : reg_smax(reg2); 15190 15191 if (reg1 == reg2) { 15192 switch (opcode) { 15193 case BPF_JGE: 15194 case BPF_JLE: 15195 case BPF_JSGE: 15196 case BPF_JSLE: 15197 case BPF_JEQ: 15198 return 1; 15199 case BPF_JGT: 15200 case BPF_JLT: 15201 case BPF_JSGT: 15202 case BPF_JSLT: 15203 case BPF_JNE: 15204 return 0; 15205 case BPF_JSET: 15206 if (tnum_is_const(t1)) 15207 return t1.value != 0; 15208 else 15209 return (smin1 <= 0 && smax1 >= 0) ? -1 : 1; 15210 default: 15211 return -1; 15212 } 15213 } 15214 15215 switch (opcode) { 15216 case BPF_JEQ: 15217 /* constants, umin/umax and smin/smax checks would be 15218 * redundant in this case because they all should match 15219 */ 15220 if (tnum_is_const(t1) && tnum_is_const(t2)) 15221 return t1.value == t2.value; 15222 if (!tnum_overlap(t1, t2)) 15223 return 0; 15224 /* non-overlapping ranges */ 15225 if (umin1 > umax2 || umax1 < umin2) 15226 return 0; 15227 if (smin1 > smax2 || smax1 < smin2) 15228 return 0; 15229 if (!is_jmp32) { 15230 /* if 64-bit ranges are inconclusive, see if we can 15231 * utilize 32-bit subrange knowledge to eliminate 15232 * branches that can't be taken a priori 15233 */ 15234 if (reg_u32_min(reg1) > reg_u32_max(reg2) || 15235 reg_u32_max(reg1) < reg_u32_min(reg2)) 15236 return 0; 15237 if (reg_s32_min(reg1) > reg_s32_max(reg2) || 15238 reg_s32_max(reg1) < reg_s32_min(reg2)) 15239 return 0; 15240 } 15241 break; 15242 case BPF_JNE: 15243 /* constants, umin/umax and smin/smax checks would be 15244 * redundant in this case because they all should match 15245 */ 15246 if (tnum_is_const(t1) && tnum_is_const(t2)) 15247 return t1.value != t2.value; 15248 if (!tnum_overlap(t1, t2)) 15249 return 1; 15250 /* non-overlapping ranges */ 15251 if (umin1 > umax2 || umax1 < umin2) 15252 return 1; 15253 if (smin1 > smax2 || smax1 < smin2) 15254 return 1; 15255 if (!is_jmp32) { 15256 /* if 64-bit ranges are inconclusive, see if we can 15257 * utilize 32-bit subrange knowledge to eliminate 15258 * branches that can't be taken a priori 15259 */ 15260 if (reg_u32_min(reg1) > reg_u32_max(reg2) || 15261 reg_u32_max(reg1) < reg_u32_min(reg2)) 15262 return 1; 15263 if (reg_s32_min(reg1) > reg_s32_max(reg2) || 15264 reg_s32_max(reg1) < reg_s32_min(reg2)) 15265 return 1; 15266 } 15267 break; 15268 case BPF_JSET: 15269 if (!is_reg_const(reg2, is_jmp32)) { 15270 swap(reg1, reg2); 15271 swap(t1, t2); 15272 } 15273 if (!is_reg_const(reg2, is_jmp32)) 15274 return -1; 15275 if ((~t1.mask & t1.value) & t2.value) 15276 return 1; 15277 if (!((t1.mask | t1.value) & t2.value)) 15278 return 0; 15279 break; 15280 case BPF_JGT: 15281 if (umin1 > umax2) 15282 return 1; 15283 else if (umax1 <= umin2) 15284 return 0; 15285 break; 15286 case BPF_JSGT: 15287 if (smin1 > smax2) 15288 return 1; 15289 else if (smax1 <= smin2) 15290 return 0; 15291 break; 15292 case BPF_JLT: 15293 if (umax1 < umin2) 15294 return 1; 15295 else if (umin1 >= umax2) 15296 return 0; 15297 break; 15298 case BPF_JSLT: 15299 if (smax1 < smin2) 15300 return 1; 15301 else if (smin1 >= smax2) 15302 return 0; 15303 break; 15304 case BPF_JGE: 15305 if (umin1 >= umax2) 15306 return 1; 15307 else if (umax1 < umin2) 15308 return 0; 15309 break; 15310 case BPF_JSGE: 15311 if (smin1 >= smax2) 15312 return 1; 15313 else if (smax1 < smin2) 15314 return 0; 15315 break; 15316 case BPF_JLE: 15317 if (umax1 <= umin2) 15318 return 1; 15319 else if (umin1 > umax2) 15320 return 0; 15321 break; 15322 case BPF_JSLE: 15323 if (smax1 <= smin2) 15324 return 1; 15325 else if (smin1 > smax2) 15326 return 0; 15327 break; 15328 } 15329 15330 return simulate_both_branches_taken(env, opcode, is_jmp32); 15331 } 15332 15333 static int flip_opcode(u32 opcode) 15334 { 15335 /* How can we transform "a <op> b" into "b <op> a"? */ 15336 static const u8 opcode_flip[16] = { 15337 /* these stay the same */ 15338 [BPF_JEQ >> 4] = BPF_JEQ, 15339 [BPF_JNE >> 4] = BPF_JNE, 15340 [BPF_JSET >> 4] = BPF_JSET, 15341 /* these swap "lesser" and "greater" (L and G in the opcodes) */ 15342 [BPF_JGE >> 4] = BPF_JLE, 15343 [BPF_JGT >> 4] = BPF_JLT, 15344 [BPF_JLE >> 4] = BPF_JGE, 15345 [BPF_JLT >> 4] = BPF_JGT, 15346 [BPF_JSGE >> 4] = BPF_JSLE, 15347 [BPF_JSGT >> 4] = BPF_JSLT, 15348 [BPF_JSLE >> 4] = BPF_JSGE, 15349 [BPF_JSLT >> 4] = BPF_JSGT 15350 }; 15351 return opcode_flip[opcode >> 4]; 15352 } 15353 15354 static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg, 15355 struct bpf_reg_state *src_reg, 15356 u8 opcode) 15357 { 15358 struct bpf_reg_state *pkt; 15359 15360 if (src_reg->type == PTR_TO_PACKET_END) { 15361 pkt = dst_reg; 15362 } else if (dst_reg->type == PTR_TO_PACKET_END) { 15363 pkt = src_reg; 15364 opcode = flip_opcode(opcode); 15365 } else { 15366 return -1; 15367 } 15368 15369 if (pkt->range >= 0) 15370 return -1; 15371 15372 switch (opcode) { 15373 case BPF_JLE: 15374 /* pkt <= pkt_end */ 15375 fallthrough; 15376 case BPF_JGT: 15377 /* pkt > pkt_end */ 15378 if (pkt->range == BEYOND_PKT_END) 15379 /* pkt has at last one extra byte beyond pkt_end */ 15380 return opcode == BPF_JGT; 15381 break; 15382 case BPF_JLT: 15383 /* pkt < pkt_end */ 15384 fallthrough; 15385 case BPF_JGE: 15386 /* pkt >= pkt_end */ 15387 if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END) 15388 return opcode == BPF_JGE; 15389 break; 15390 } 15391 return -1; 15392 } 15393 15394 /* compute branch direction of the expression "if (<reg1> opcode <reg2>) goto target;" 15395 * and return: 15396 * 1 - branch will be taken and "goto target" will be executed 15397 * 0 - branch will not be taken and fall-through to next insn 15398 * -1 - unknown. Example: "if (reg1 < 5)" is unknown when register value 15399 * range [0,10] 15400 */ 15401 static int is_branch_taken(struct bpf_verifier_env *env, struct bpf_reg_state *reg1, 15402 struct bpf_reg_state *reg2, u8 opcode, bool is_jmp32) 15403 { 15404 if (reg_is_pkt_pointer_any(reg1) && reg_is_pkt_pointer_any(reg2) && !is_jmp32) 15405 return is_pkt_ptr_branch_taken(reg1, reg2, opcode); 15406 15407 if (__is_pointer_value(false, reg1) || __is_pointer_value(false, reg2)) { 15408 u64 val; 15409 15410 /* arrange that reg2 is a scalar, and reg1 is a pointer */ 15411 if (!is_reg_const(reg2, is_jmp32)) { 15412 opcode = flip_opcode(opcode); 15413 swap(reg1, reg2); 15414 } 15415 /* and ensure that reg2 is a constant */ 15416 if (!is_reg_const(reg2, is_jmp32)) 15417 return -1; 15418 15419 if (!reg_not_null(env, reg1)) 15420 return -1; 15421 15422 /* If pointer is valid tests against zero will fail so we can 15423 * use this to direct branch taken. 15424 */ 15425 val = reg_const_value(reg2, is_jmp32); 15426 if (val != 0) 15427 return -1; 15428 15429 switch (opcode) { 15430 case BPF_JEQ: 15431 return 0; 15432 case BPF_JNE: 15433 return 1; 15434 default: 15435 return -1; 15436 } 15437 } 15438 15439 /* now deal with two scalars, but not necessarily constants */ 15440 return is_scalar_branch_taken(env, reg1, reg2, opcode, is_jmp32); 15441 } 15442 15443 /* Opcode that corresponds to a *false* branch condition. 15444 * E.g., if r1 < r2, then reverse (false) condition is r1 >= r2 15445 */ 15446 static u8 rev_opcode(u8 opcode) 15447 { 15448 switch (opcode) { 15449 case BPF_JEQ: return BPF_JNE; 15450 case BPF_JNE: return BPF_JEQ; 15451 /* JSET doesn't have it's reverse opcode in BPF, so add 15452 * BPF_X flag to denote the reverse of that operation 15453 */ 15454 case BPF_JSET: return BPF_JSET | BPF_X; 15455 case BPF_JSET | BPF_X: return BPF_JSET; 15456 case BPF_JGE: return BPF_JLT; 15457 case BPF_JGT: return BPF_JLE; 15458 case BPF_JLE: return BPF_JGT; 15459 case BPF_JLT: return BPF_JGE; 15460 case BPF_JSGE: return BPF_JSLT; 15461 case BPF_JSGT: return BPF_JSLE; 15462 case BPF_JSLE: return BPF_JSGT; 15463 case BPF_JSLT: return BPF_JSGE; 15464 default: return 0; 15465 } 15466 } 15467 15468 /* Refine range knowledge for <reg1> <op> <reg>2 conditional operation. */ 15469 static void regs_refine_cond_op(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2, 15470 u8 opcode, bool is_jmp32) 15471 { 15472 struct tnum t; 15473 u64 val; 15474 15475 /* In case of GE/GT/SGE/JST, reuse LE/LT/SLE/SLT logic from below */ 15476 switch (opcode) { 15477 case BPF_JGE: 15478 case BPF_JGT: 15479 case BPF_JSGE: 15480 case BPF_JSGT: 15481 opcode = flip_opcode(opcode); 15482 swap(reg1, reg2); 15483 break; 15484 default: 15485 break; 15486 } 15487 15488 switch (opcode) { 15489 case BPF_JEQ: 15490 if (is_jmp32) { 15491 reg1->r32 = cnum32_intersect(reg1->r32, reg2->r32); 15492 reg2->r32 = reg1->r32; 15493 15494 t = tnum_intersect(tnum_subreg(reg1->var_off), tnum_subreg(reg2->var_off)); 15495 reg1->var_off = tnum_with_subreg(reg1->var_off, t); 15496 reg2->var_off = tnum_with_subreg(reg2->var_off, t); 15497 } else { 15498 reg1->r64 = cnum64_intersect(reg1->r64, reg2->r64); 15499 reg2->r64 = reg1->r64; 15500 15501 reg1->var_off = tnum_intersect(reg1->var_off, reg2->var_off); 15502 reg2->var_off = reg1->var_off; 15503 } 15504 break; 15505 case BPF_JNE: 15506 if (!is_reg_const(reg2, is_jmp32)) 15507 swap(reg1, reg2); 15508 if (!is_reg_const(reg2, is_jmp32)) 15509 break; 15510 15511 /* try to recompute the bound of reg1 if reg2 is a const and 15512 * is exactly the edge of reg1. 15513 */ 15514 val = reg_const_value(reg2, is_jmp32); 15515 if (is_jmp32) { 15516 /* Complement of the range [val, val] as cnum32. */ 15517 cnum32_intersect_with(®1->r32, (struct cnum32){ val + 1, U32_MAX - 1 }); 15518 } else { 15519 /* Complement of the range [val, val] as cnum64. */ 15520 cnum64_intersect_with(®1->r64, (struct cnum64){ val + 1, U64_MAX - 1 }); 15521 } 15522 break; 15523 case BPF_JSET: 15524 if (!is_reg_const(reg2, is_jmp32)) 15525 swap(reg1, reg2); 15526 if (!is_reg_const(reg2, is_jmp32)) 15527 break; 15528 val = reg_const_value(reg2, is_jmp32); 15529 /* BPF_JSET (i.e., TRUE branch, *not* BPF_JSET | BPF_X) 15530 * requires single bit to learn something useful. E.g., if we 15531 * know that `r1 & 0x3` is true, then which bits (0, 1, or both) 15532 * are actually set? We can learn something definite only if 15533 * it's a single-bit value to begin with. 15534 * 15535 * BPF_JSET | BPF_X (i.e., negation of BPF_JSET) doesn't have 15536 * this restriction. I.e., !(r1 & 0x3) means neither bit 0 nor 15537 * bit 1 is set, which we can readily use in adjustments. 15538 */ 15539 if (!is_power_of_2(val)) 15540 break; 15541 if (is_jmp32) { 15542 t = tnum_or(tnum_subreg(reg1->var_off), tnum_const(val)); 15543 reg1->var_off = tnum_with_subreg(reg1->var_off, t); 15544 } else { 15545 reg1->var_off = tnum_or(reg1->var_off, tnum_const(val)); 15546 } 15547 break; 15548 case BPF_JSET | BPF_X: /* reverse of BPF_JSET, see rev_opcode() */ 15549 if (!is_reg_const(reg2, is_jmp32)) 15550 swap(reg1, reg2); 15551 if (!is_reg_const(reg2, is_jmp32)) 15552 break; 15553 val = reg_const_value(reg2, is_jmp32); 15554 /* Forget the ranges before narrowing tnums, to avoid invariant 15555 * violations if we're on a dead branch. 15556 */ 15557 __mark_reg_unbounded(reg1); 15558 if (is_jmp32) { 15559 t = tnum_and(tnum_subreg(reg1->var_off), tnum_const(~val)); 15560 reg1->var_off = tnum_with_subreg(reg1->var_off, t); 15561 } else { 15562 reg1->var_off = tnum_and(reg1->var_off, tnum_const(~val)); 15563 } 15564 break; 15565 case BPF_JLE: 15566 if (is_jmp32) { 15567 cnum32_intersect_with_urange(®1->r32, 0, reg_u32_max(reg2)); 15568 cnum32_intersect_with_urange(®2->r32, reg_u32_min(reg1), U32_MAX); 15569 } else { 15570 cnum64_intersect_with_urange(®1->r64, 0, reg_umax(reg2)); 15571 cnum64_intersect_with_urange(®2->r64, reg_umin(reg1), U64_MAX); 15572 } 15573 break; 15574 case BPF_JLT: 15575 if (is_jmp32) { 15576 cnum32_intersect_with_urange(®1->r32, 0, reg_u32_max(reg2) - 1); 15577 cnum32_intersect_with_urange(®2->r32, reg_u32_min(reg1) + 1, U32_MAX); 15578 } else { 15579 cnum64_intersect_with_urange(®1->r64, 0, reg_umax(reg2) - 1); 15580 cnum64_intersect_with_urange(®2->r64, reg_umin(reg1) + 1, U64_MAX); 15581 } 15582 break; 15583 case BPF_JSLE: 15584 if (is_jmp32) { 15585 cnum32_intersect_with_srange(®1->r32, S32_MIN, reg_s32_max(reg2)); 15586 cnum32_intersect_with_srange(®2->r32, reg_s32_min(reg1), S32_MAX); 15587 } else { 15588 cnum64_intersect_with_srange(®1->r64, S64_MIN, reg_smax(reg2)); 15589 cnum64_intersect_with_srange(®2->r64, reg_smin(reg1), S64_MAX); 15590 } 15591 break; 15592 case BPF_JSLT: 15593 if (is_jmp32) { 15594 cnum32_intersect_with_srange(®1->r32, S32_MIN, reg_s32_max(reg2) - 1); 15595 cnum32_intersect_with_srange(®2->r32, reg_s32_min(reg1) + 1, S32_MAX); 15596 } else { 15597 cnum64_intersect_with_srange(®1->r64, S64_MIN, reg_smax(reg2) - 1); 15598 cnum64_intersect_with_srange(®2->r64, reg_smin(reg1) + 1, S64_MAX); 15599 } 15600 break; 15601 default: 15602 return; 15603 } 15604 } 15605 15606 /* Check for invariant violations on the registers for both branches of a condition */ 15607 static int regs_bounds_sanity_check_branches(struct bpf_verifier_env *env) 15608 { 15609 int err; 15610 15611 err = reg_bounds_sanity_check(env, &env->true_reg1, "true_reg1"); 15612 err = err ?: reg_bounds_sanity_check(env, &env->true_reg2, "true_reg2"); 15613 err = err ?: reg_bounds_sanity_check(env, &env->false_reg1, "false_reg1"); 15614 err = err ?: reg_bounds_sanity_check(env, &env->false_reg2, "false_reg2"); 15615 return err; 15616 } 15617 15618 static void mark_ptr_or_null_reg(struct bpf_func_state *state, 15619 struct bpf_reg_state *reg, u32 id, 15620 bool is_null) 15621 { 15622 if (type_may_be_null(reg->type) && reg->id == id && 15623 (is_rcu_reg(reg) || !WARN_ON_ONCE(!reg->id))) { 15624 /* Old offset should have been known-zero, because we don't 15625 * allow pointer arithmetic on pointers that might be NULL. 15626 * If we see this happening, don't convert the register. 15627 * 15628 * But in some cases, some helpers that return local kptrs 15629 * advance offset for the returned pointer. In those cases, 15630 * it is fine to expect to see reg->var_off. 15631 */ 15632 if (!(type_is_ptr_alloc_obj(reg->type) || type_is_non_owning_ref(reg->type)) && 15633 WARN_ON_ONCE(!tnum_equals_const(reg->var_off, 0))) 15634 return; 15635 if (is_null) { 15636 /* We don't need id from this point 15637 * onwards anymore, thus we should better reset it, 15638 * so that state pruning has chances to take effect. 15639 */ 15640 __mark_reg_known_zero(reg); 15641 reg->type = SCALAR_VALUE; 15642 15643 return; 15644 } 15645 15646 mark_ptr_not_null_reg(reg); 15647 15648 /* 15649 * reg->id is preserved for object relationship tracking 15650 * and spin_lock lock state tracking 15651 */ 15652 } 15653 } 15654 15655 /* The logic is similar to find_good_pkt_pointers(), both could eventually 15656 * be folded together at some point. 15657 */ 15658 static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, 15659 bool is_null) 15660 { 15661 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 15662 struct bpf_reg_state *regs = state->regs, *reg; 15663 u32 id = regs[regno].id; 15664 15665 if (is_null && find_reference_state(vstate, id)) 15666 /* regs[regno] is in the " == NULL" branch. 15667 * No one could have freed the reference state before 15668 * doing the NULL check. 15669 */ 15670 WARN_ON_ONCE(release_reference_nomark(vstate, id)); 15671 15672 bpf_for_each_reg_in_vstate(vstate, state, reg, ({ 15673 mark_ptr_or_null_reg(state, reg, id, is_null); 15674 })); 15675 } 15676 15677 static bool try_match_pkt_pointers(const struct bpf_insn *insn, 15678 struct bpf_reg_state *dst_reg, 15679 struct bpf_reg_state *src_reg, 15680 struct bpf_verifier_state *this_branch, 15681 struct bpf_verifier_state *other_branch) 15682 { 15683 if (BPF_SRC(insn->code) != BPF_X) 15684 return false; 15685 15686 /* Pointers are always 64-bit. */ 15687 if (BPF_CLASS(insn->code) == BPF_JMP32) 15688 return false; 15689 15690 switch (BPF_OP(insn->code)) { 15691 case BPF_JGT: 15692 if ((dst_reg->type == PTR_TO_PACKET && 15693 src_reg->type == PTR_TO_PACKET_END) || 15694 (dst_reg->type == PTR_TO_PACKET_META && 15695 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) { 15696 /* pkt_data' > pkt_end, pkt_meta' > pkt_data */ 15697 find_good_pkt_pointers(this_branch, dst_reg, 15698 dst_reg->type, false); 15699 mark_pkt_end(other_branch, insn->dst_reg, true); 15700 } else if ((dst_reg->type == PTR_TO_PACKET_END && 15701 src_reg->type == PTR_TO_PACKET) || 15702 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && 15703 src_reg->type == PTR_TO_PACKET_META)) { 15704 /* pkt_end > pkt_data', pkt_data > pkt_meta' */ 15705 find_good_pkt_pointers(other_branch, src_reg, 15706 src_reg->type, true); 15707 mark_pkt_end(this_branch, insn->src_reg, false); 15708 } else { 15709 return false; 15710 } 15711 break; 15712 case BPF_JLT: 15713 if ((dst_reg->type == PTR_TO_PACKET && 15714 src_reg->type == PTR_TO_PACKET_END) || 15715 (dst_reg->type == PTR_TO_PACKET_META && 15716 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) { 15717 /* pkt_data' < pkt_end, pkt_meta' < pkt_data */ 15718 find_good_pkt_pointers(other_branch, dst_reg, 15719 dst_reg->type, true); 15720 mark_pkt_end(this_branch, insn->dst_reg, false); 15721 } else if ((dst_reg->type == PTR_TO_PACKET_END && 15722 src_reg->type == PTR_TO_PACKET) || 15723 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && 15724 src_reg->type == PTR_TO_PACKET_META)) { 15725 /* pkt_end < pkt_data', pkt_data > pkt_meta' */ 15726 find_good_pkt_pointers(this_branch, src_reg, 15727 src_reg->type, false); 15728 mark_pkt_end(other_branch, insn->src_reg, true); 15729 } else { 15730 return false; 15731 } 15732 break; 15733 case BPF_JGE: 15734 if ((dst_reg->type == PTR_TO_PACKET && 15735 src_reg->type == PTR_TO_PACKET_END) || 15736 (dst_reg->type == PTR_TO_PACKET_META && 15737 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) { 15738 /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */ 15739 find_good_pkt_pointers(this_branch, dst_reg, 15740 dst_reg->type, true); 15741 mark_pkt_end(other_branch, insn->dst_reg, false); 15742 } else if ((dst_reg->type == PTR_TO_PACKET_END && 15743 src_reg->type == PTR_TO_PACKET) || 15744 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && 15745 src_reg->type == PTR_TO_PACKET_META)) { 15746 /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */ 15747 find_good_pkt_pointers(other_branch, src_reg, 15748 src_reg->type, false); 15749 mark_pkt_end(this_branch, insn->src_reg, true); 15750 } else { 15751 return false; 15752 } 15753 break; 15754 case BPF_JLE: 15755 if ((dst_reg->type == PTR_TO_PACKET && 15756 src_reg->type == PTR_TO_PACKET_END) || 15757 (dst_reg->type == PTR_TO_PACKET_META && 15758 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) { 15759 /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */ 15760 find_good_pkt_pointers(other_branch, dst_reg, 15761 dst_reg->type, false); 15762 mark_pkt_end(this_branch, insn->dst_reg, true); 15763 } else if ((dst_reg->type == PTR_TO_PACKET_END && 15764 src_reg->type == PTR_TO_PACKET) || 15765 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && 15766 src_reg->type == PTR_TO_PACKET_META)) { 15767 /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */ 15768 find_good_pkt_pointers(this_branch, src_reg, 15769 src_reg->type, true); 15770 mark_pkt_end(other_branch, insn->src_reg, false); 15771 } else { 15772 return false; 15773 } 15774 break; 15775 default: 15776 return false; 15777 } 15778 15779 return true; 15780 } 15781 15782 static void __collect_linked_regs(struct linked_regs *reg_set, struct bpf_reg_state *reg, 15783 u32 id, u32 frameno, u32 spi_or_reg, bool is_reg) 15784 { 15785 struct linked_reg *e; 15786 15787 if (reg->type != SCALAR_VALUE || (reg->id & ~BPF_ADD_CONST) != id) 15788 return; 15789 15790 e = linked_regs_push(reg_set); 15791 if (e) { 15792 e->frameno = frameno; 15793 e->is_reg = is_reg; 15794 e->regno = spi_or_reg; 15795 } else { 15796 clear_scalar_id(reg); 15797 } 15798 } 15799 15800 /* For all R being scalar registers or spilled scalar registers 15801 * in verifier state, save R in linked_regs if R->id == id. 15802 * If there are too many Rs sharing same id, reset id for leftover Rs. 15803 */ 15804 static void collect_linked_regs(struct bpf_verifier_env *env, 15805 struct bpf_verifier_state *vstate, 15806 u32 id, 15807 struct linked_regs *linked_regs) 15808 { 15809 struct bpf_insn_aux_data *aux = env->insn_aux_data; 15810 struct bpf_func_state *func; 15811 struct bpf_reg_state *reg; 15812 u16 live_regs; 15813 int i, j; 15814 15815 id = id & ~BPF_ADD_CONST; 15816 for (i = vstate->curframe; i >= 0; i--) { 15817 live_regs = aux[bpf_frame_insn_idx(vstate, i)].live_regs_before; 15818 func = vstate->frame[i]; 15819 for (j = 0; j < BPF_REG_FP; j++) { 15820 if (!(live_regs & BIT(j))) 15821 continue; 15822 reg = &func->regs[j]; 15823 __collect_linked_regs(linked_regs, reg, id, i, j, true); 15824 } 15825 for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) { 15826 if (!bpf_is_spilled_reg(&func->stack[j])) 15827 continue; 15828 reg = &func->stack[j].spilled_ptr; 15829 __collect_linked_regs(linked_regs, reg, id, i, j, false); 15830 } 15831 } 15832 } 15833 15834 /* For all R in linked_regs, copy known_reg range into R 15835 * if R->id == known_reg->id. 15836 */ 15837 static void sync_linked_regs(struct bpf_verifier_env *env, struct bpf_verifier_state *vstate, 15838 struct bpf_reg_state *known_reg, struct linked_regs *linked_regs) 15839 { 15840 struct bpf_reg_state fake_reg; 15841 struct bpf_reg_state *reg; 15842 struct linked_reg *e; 15843 int i; 15844 15845 for (i = 0; i < linked_regs->cnt; ++i) { 15846 e = &linked_regs->entries[i]; 15847 reg = e->is_reg ? &vstate->frame[e->frameno]->regs[e->regno] 15848 : &vstate->frame[e->frameno]->stack[e->spi].spilled_ptr; 15849 if (reg->type != SCALAR_VALUE || reg == known_reg) 15850 continue; 15851 if ((reg->id & ~BPF_ADD_CONST) != (known_reg->id & ~BPF_ADD_CONST)) 15852 continue; 15853 /* 15854 * Skip mixed 32/64-bit links: the delta relationship doesn't 15855 * hold across different ALU widths. 15856 */ 15857 if (((reg->id ^ known_reg->id) & BPF_ADD_CONST) == BPF_ADD_CONST) 15858 continue; 15859 if ((!(reg->id & BPF_ADD_CONST) && !(known_reg->id & BPF_ADD_CONST)) || 15860 reg->delta == known_reg->delta) { 15861 s32 saved_subreg_def = reg->subreg_def; 15862 15863 *reg = *known_reg; 15864 reg->subreg_def = saved_subreg_def; 15865 } else { 15866 s32 saved_subreg_def = reg->subreg_def; 15867 s32 saved_off = reg->delta; 15868 u32 saved_id = reg->id; 15869 15870 fake_reg.type = SCALAR_VALUE; 15871 __mark_reg_known(&fake_reg, (s64)reg->delta - (s64)known_reg->delta); 15872 15873 /* reg = known_reg; reg += delta */ 15874 *reg = *known_reg; 15875 /* 15876 * Must preserve off, id and subreg_def flag, 15877 * otherwise another sync_linked_regs() will be incorrect. 15878 */ 15879 reg->delta = saved_off; 15880 reg->id = saved_id; 15881 reg->subreg_def = saved_subreg_def; 15882 15883 scalar32_min_max_add(reg, &fake_reg); 15884 scalar_min_max_add(reg, &fake_reg); 15885 reg->var_off = tnum_add(reg->var_off, fake_reg.var_off); 15886 if ((reg->id | known_reg->id) & BPF_ADD_CONST32) 15887 zext_32_to_64(reg); 15888 reg_bounds_sync(reg); 15889 } 15890 if (e->is_reg) 15891 mark_reg_scratched(env, e->regno); 15892 else 15893 mark_stack_slot_scratched(env, e->spi); 15894 } 15895 } 15896 15897 static int check_cond_jmp_op(struct bpf_verifier_env *env, 15898 struct bpf_insn *insn, int *insn_idx) 15899 { 15900 struct bpf_verifier_state *this_branch = env->cur_state; 15901 struct bpf_verifier_state *other_branch; 15902 struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs; 15903 struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL; 15904 struct bpf_reg_state *eq_branch_regs; 15905 struct linked_regs linked_regs = {}; 15906 u8 opcode = BPF_OP(insn->code); 15907 int insn_flags = 0; 15908 bool is_jmp32; 15909 int pred = -1; 15910 int err; 15911 15912 /* Only conditional jumps are expected to reach here. */ 15913 if (opcode == BPF_JA || opcode > BPF_JCOND) { 15914 verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode); 15915 return -EINVAL; 15916 } 15917 15918 if (opcode == BPF_JCOND) { 15919 struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st; 15920 int idx = *insn_idx; 15921 15922 prev_st = find_prev_entry(env, cur_st->parent, idx); 15923 15924 /* branch out 'fallthrough' insn as a new state to explore */ 15925 queued_st = push_stack(env, idx + 1, idx, false); 15926 if (IS_ERR(queued_st)) 15927 return PTR_ERR(queued_st); 15928 15929 queued_st->may_goto_depth++; 15930 if (prev_st) 15931 widen_imprecise_scalars(env, prev_st, queued_st); 15932 *insn_idx += insn->off; 15933 return 0; 15934 } 15935 15936 /* check src2 operand */ 15937 err = check_reg_arg(env, insn->dst_reg, SRC_OP); 15938 if (err) 15939 return err; 15940 15941 dst_reg = ®s[insn->dst_reg]; 15942 if (BPF_SRC(insn->code) == BPF_X) { 15943 /* check src1 operand */ 15944 err = check_reg_arg(env, insn->src_reg, SRC_OP); 15945 if (err) 15946 return err; 15947 15948 src_reg = ®s[insn->src_reg]; 15949 if (!(reg_is_pkt_pointer_any(dst_reg) && reg_is_pkt_pointer_any(src_reg)) && 15950 is_pointer_value(env, insn->src_reg)) { 15951 verbose(env, "R%d pointer comparison prohibited\n", 15952 insn->src_reg); 15953 return -EACCES; 15954 } 15955 15956 if (src_reg->type == PTR_TO_STACK) 15957 insn_flags |= INSN_F_SRC_REG_STACK; 15958 if (dst_reg->type == PTR_TO_STACK) 15959 insn_flags |= INSN_F_DST_REG_STACK; 15960 } else { 15961 src_reg = &env->fake_reg[0]; 15962 memset(src_reg, 0, sizeof(*src_reg)); 15963 src_reg->type = SCALAR_VALUE; 15964 __mark_reg_known(src_reg, insn->imm); 15965 15966 if (dst_reg->type == PTR_TO_STACK) 15967 insn_flags |= INSN_F_DST_REG_STACK; 15968 } 15969 15970 if (insn_flags) { 15971 err = bpf_push_jmp_history(env, this_branch, insn_flags, 0, 0, 0); 15972 if (err) 15973 return err; 15974 } 15975 15976 is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; 15977 env->false_reg1 = *dst_reg; 15978 env->false_reg2 = *src_reg; 15979 env->true_reg1 = *dst_reg; 15980 env->true_reg2 = *src_reg; 15981 pred = is_branch_taken(env, dst_reg, src_reg, opcode, is_jmp32); 15982 if (pred >= 0) { 15983 /* If we get here with a dst_reg pointer type it is because 15984 * above is_branch_taken() special cased the 0 comparison. 15985 */ 15986 if (!__is_pointer_value(false, dst_reg)) 15987 err = mark_chain_precision(env, insn->dst_reg); 15988 if (BPF_SRC(insn->code) == BPF_X && !err && 15989 !__is_pointer_value(false, src_reg)) 15990 err = mark_chain_precision(env, insn->src_reg); 15991 if (err) 15992 return err; 15993 } 15994 15995 if (pred == 1) { 15996 /* Only follow the goto, ignore fall-through. If needed, push 15997 * the fall-through branch for simulation under speculative 15998 * execution. 15999 */ 16000 if (!env->bypass_spec_v1) { 16001 err = sanitize_speculative_path(env, insn, *insn_idx + 1, *insn_idx); 16002 if (err < 0) 16003 return err; 16004 } 16005 if (env->log.level & BPF_LOG_LEVEL) 16006 print_insn_state(env, this_branch, this_branch->curframe); 16007 *insn_idx += insn->off; 16008 return 0; 16009 } else if (pred == 0) { 16010 /* Only follow the fall-through branch, since that's where the 16011 * program will go. If needed, push the goto branch for 16012 * simulation under speculative execution. 16013 */ 16014 if (!env->bypass_spec_v1) { 16015 err = sanitize_speculative_path(env, insn, *insn_idx + insn->off + 1, 16016 *insn_idx); 16017 if (err < 0) 16018 return err; 16019 } 16020 if (env->log.level & BPF_LOG_LEVEL) 16021 print_insn_state(env, this_branch, this_branch->curframe); 16022 return 0; 16023 } 16024 16025 /* Push scalar registers sharing same ID to jump history, 16026 * do this before creating 'other_branch', so that both 16027 * 'this_branch' and 'other_branch' share this history 16028 * if parent state is created. 16029 */ 16030 if (BPF_SRC(insn->code) == BPF_X && src_reg->type == SCALAR_VALUE && src_reg->id) 16031 collect_linked_regs(env, this_branch, src_reg->id, &linked_regs); 16032 if (dst_reg->type == SCALAR_VALUE && dst_reg->id) 16033 collect_linked_regs(env, this_branch, dst_reg->id, &linked_regs); 16034 if (linked_regs.cnt > 1) { 16035 err = bpf_push_jmp_history(env, this_branch, 0, 0, 0, linked_regs_pack(&linked_regs)); 16036 if (err) 16037 return err; 16038 } 16039 16040 other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx, false); 16041 if (IS_ERR(other_branch)) 16042 return PTR_ERR(other_branch); 16043 other_branch_regs = other_branch->frame[other_branch->curframe]->regs; 16044 16045 err = regs_bounds_sanity_check_branches(env); 16046 if (err) 16047 return err; 16048 16049 *dst_reg = env->false_reg1; 16050 *src_reg = env->false_reg2; 16051 other_branch_regs[insn->dst_reg] = env->true_reg1; 16052 if (BPF_SRC(insn->code) == BPF_X) 16053 other_branch_regs[insn->src_reg] = env->true_reg2; 16054 16055 if (BPF_SRC(insn->code) == BPF_X && 16056 src_reg->type == SCALAR_VALUE && src_reg->id && 16057 !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) { 16058 sync_linked_regs(env, this_branch, src_reg, &linked_regs); 16059 sync_linked_regs(env, other_branch, &other_branch_regs[insn->src_reg], 16060 &linked_regs); 16061 } 16062 if (dst_reg->type == SCALAR_VALUE && dst_reg->id && 16063 !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) { 16064 sync_linked_regs(env, this_branch, dst_reg, &linked_regs); 16065 sync_linked_regs(env, other_branch, &other_branch_regs[insn->dst_reg], 16066 &linked_regs); 16067 } 16068 16069 /* if one pointer register is compared to another pointer 16070 * register check if PTR_MAYBE_NULL could be lifted. 16071 * E.g. register A - maybe null 16072 * register B - not null 16073 * for JNE A, B, ... - A is not null in the false branch; 16074 * for JEQ A, B, ... - A is not null in the true branch. 16075 * 16076 * Since PTR_TO_BTF_ID points to a kernel struct that does 16077 * not need to be null checked by the BPF program, i.e., 16078 * could be null even without PTR_MAYBE_NULL marking, so 16079 * only propagate nullness when neither reg is that type. 16080 */ 16081 if (!is_jmp32 && BPF_SRC(insn->code) == BPF_X && 16082 __is_pointer_value(false, src_reg) && __is_pointer_value(false, dst_reg) && 16083 type_may_be_null(src_reg->type) != type_may_be_null(dst_reg->type) && 16084 base_type(src_reg->type) != PTR_TO_BTF_ID && 16085 base_type(dst_reg->type) != PTR_TO_BTF_ID) { 16086 eq_branch_regs = NULL; 16087 switch (opcode) { 16088 case BPF_JEQ: 16089 eq_branch_regs = other_branch_regs; 16090 break; 16091 case BPF_JNE: 16092 eq_branch_regs = regs; 16093 break; 16094 default: 16095 /* do nothing */ 16096 break; 16097 } 16098 if (eq_branch_regs) { 16099 if (type_may_be_null(src_reg->type)) 16100 mark_ptr_not_null_reg(&eq_branch_regs[insn->src_reg]); 16101 else 16102 mark_ptr_not_null_reg(&eq_branch_regs[insn->dst_reg]); 16103 } 16104 } 16105 16106 /* detect if R == 0 where R is returned from bpf_map_lookup_elem(). 16107 * Also does the same detection for a register whose the value is 16108 * known to be 0. 16109 * NOTE: these optimizations below are related with pointer comparison 16110 * which will never be JMP32. 16111 */ 16112 if (!is_jmp32 && (opcode == BPF_JEQ || opcode == BPF_JNE) && 16113 type_may_be_null(dst_reg->type) && 16114 ((BPF_SRC(insn->code) == BPF_K && insn->imm == 0) || 16115 (BPF_SRC(insn->code) == BPF_X && bpf_register_is_null(src_reg)))) { 16116 /* Mark all identical registers in each branch as either 16117 * safe or unknown depending R == 0 or R != 0 conditional. 16118 */ 16119 mark_ptr_or_null_regs(this_branch, insn->dst_reg, 16120 opcode == BPF_JNE); 16121 mark_ptr_or_null_regs(other_branch, insn->dst_reg, 16122 opcode == BPF_JEQ); 16123 } else if (!try_match_pkt_pointers(insn, dst_reg, ®s[insn->src_reg], 16124 this_branch, other_branch) && 16125 is_pointer_value(env, insn->dst_reg)) { 16126 verbose(env, "R%d pointer comparison prohibited\n", 16127 insn->dst_reg); 16128 return -EACCES; 16129 } 16130 if (env->log.level & BPF_LOG_LEVEL) 16131 print_insn_state(env, this_branch, this_branch->curframe); 16132 return 0; 16133 } 16134 16135 /* verify BPF_LD_IMM64 instruction */ 16136 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) 16137 { 16138 struct bpf_insn_aux_data *aux = cur_aux(env); 16139 struct bpf_reg_state *regs = cur_regs(env); 16140 struct bpf_reg_state *dst_reg; 16141 struct bpf_map *map; 16142 int err; 16143 16144 if (BPF_SIZE(insn->code) != BPF_DW) { 16145 verbose(env, "invalid BPF_LD_IMM insn\n"); 16146 return -EINVAL; 16147 } 16148 16149 err = check_reg_arg(env, insn->dst_reg, DST_OP); 16150 if (err) 16151 return err; 16152 16153 dst_reg = ®s[insn->dst_reg]; 16154 if (insn->src_reg == 0) { 16155 u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm; 16156 16157 dst_reg->type = SCALAR_VALUE; 16158 __mark_reg_known(®s[insn->dst_reg], imm); 16159 return 0; 16160 } 16161 16162 /* All special src_reg cases are listed below. From this point onwards 16163 * we either succeed and assign a corresponding dst_reg->type after 16164 * zeroing the offset, or fail and reject the program. 16165 */ 16166 mark_reg_known_zero(env, regs, insn->dst_reg); 16167 16168 if (insn->src_reg == BPF_PSEUDO_BTF_ID) { 16169 dst_reg->type = aux->btf_var.reg_type; 16170 switch (base_type(dst_reg->type)) { 16171 case PTR_TO_MEM: 16172 dst_reg->mem_size = aux->btf_var.mem_size; 16173 break; 16174 case PTR_TO_BTF_ID: 16175 dst_reg->btf = aux->btf_var.btf; 16176 dst_reg->btf_id = aux->btf_var.btf_id; 16177 break; 16178 default: 16179 verifier_bug(env, "pseudo btf id: unexpected dst reg type"); 16180 return -EFAULT; 16181 } 16182 return 0; 16183 } 16184 16185 if (insn->src_reg == BPF_PSEUDO_FUNC) { 16186 struct bpf_prog_aux *aux = env->prog->aux; 16187 u32 subprogno = bpf_find_subprog(env, 16188 env->insn_idx + insn->imm + 1); 16189 16190 if (!aux->func_info) { 16191 verbose(env, "missing btf func_info\n"); 16192 return -EINVAL; 16193 } 16194 if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) { 16195 verbose(env, "callback function not static\n"); 16196 return -EINVAL; 16197 } 16198 16199 dst_reg->type = PTR_TO_FUNC; 16200 dst_reg->subprogno = subprogno; 16201 return 0; 16202 } 16203 16204 map = env->used_maps[aux->map_index]; 16205 16206 if (insn->src_reg == BPF_PSEUDO_MAP_VALUE || 16207 insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) { 16208 if (map->map_type == BPF_MAP_TYPE_ARENA) { 16209 __mark_reg_unknown(env, dst_reg); 16210 dst_reg->map_ptr = map; 16211 return 0; 16212 } 16213 __mark_reg_known(dst_reg, aux->map_off); 16214 dst_reg->type = PTR_TO_MAP_VALUE; 16215 dst_reg->map_ptr = map; 16216 WARN_ON_ONCE(map->map_type != BPF_MAP_TYPE_INSN_ARRAY && 16217 map->max_entries != 1); 16218 /* We want reg->id to be same (0) as map_value is not distinct */ 16219 } else if (insn->src_reg == BPF_PSEUDO_MAP_FD || 16220 insn->src_reg == BPF_PSEUDO_MAP_IDX) { 16221 dst_reg->type = CONST_PTR_TO_MAP; 16222 dst_reg->map_ptr = map; 16223 } else { 16224 verifier_bug(env, "unexpected src reg value for ldimm64"); 16225 return -EFAULT; 16226 } 16227 16228 return 0; 16229 } 16230 16231 static bool may_access_skb(enum bpf_prog_type type) 16232 { 16233 switch (type) { 16234 case BPF_PROG_TYPE_SOCKET_FILTER: 16235 case BPF_PROG_TYPE_SCHED_CLS: 16236 case BPF_PROG_TYPE_SCHED_ACT: 16237 return true; 16238 default: 16239 return false; 16240 } 16241 } 16242 16243 /* verify safety of LD_ABS|LD_IND instructions: 16244 * - they can only appear in the programs where ctx == skb 16245 * - since they are wrappers of function calls, they scratch R1-R5 registers, 16246 * preserve R6-R9, and store return value into R0 16247 * 16248 * Implicit input: 16249 * ctx == skb == R6 == CTX 16250 * 16251 * Explicit input: 16252 * SRC == any register 16253 * IMM == 32-bit immediate 16254 * 16255 * Output: 16256 * R0 - 8/16/32-bit skb data converted to cpu endianness 16257 */ 16258 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) 16259 { 16260 struct bpf_reg_state *regs = cur_regs(env); 16261 static const int ctx_reg = BPF_REG_6; 16262 u8 mode = BPF_MODE(insn->code); 16263 int i, err; 16264 16265 if (!may_access_skb(resolve_prog_type(env->prog))) { 16266 verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n"); 16267 return -EINVAL; 16268 } 16269 16270 if (!env->ops->gen_ld_abs) { 16271 verifier_bug(env, "gen_ld_abs is null"); 16272 return -EFAULT; 16273 } 16274 16275 /* check whether implicit source operand (register R6) is readable */ 16276 err = check_reg_arg(env, ctx_reg, SRC_OP); 16277 if (err) 16278 return err; 16279 16280 /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as 16281 * gen_ld_abs() may terminate the program at runtime, leading to 16282 * reference leak. 16283 */ 16284 err = check_resource_leak(env, false, true, "BPF_LD_[ABS|IND]"); 16285 if (err) 16286 return err; 16287 16288 if (regs[ctx_reg].type != PTR_TO_CTX) { 16289 verbose(env, 16290 "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n"); 16291 return -EINVAL; 16292 } 16293 16294 if (mode == BPF_IND) { 16295 /* check explicit source operand */ 16296 err = check_reg_arg(env, insn->src_reg, SRC_OP); 16297 if (err) 16298 return err; 16299 } 16300 16301 err = check_ptr_off_reg(env, ®s[ctx_reg], ctx_reg); 16302 if (err < 0) 16303 return err; 16304 16305 /* reset caller saved regs to unreadable */ 16306 for (i = 0; i < CALLER_SAVED_REGS; i++) { 16307 bpf_mark_reg_not_init(env, ®s[caller_saved[i]]); 16308 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK); 16309 } 16310 16311 /* mark destination R0 register as readable, since it contains 16312 * the value fetched from the packet. 16313 * Already marked as written above. 16314 */ 16315 mark_reg_unknown(env, regs, BPF_REG_0); 16316 /* ld_abs load up to 32-bit skb data. */ 16317 regs[BPF_REG_0].subreg_def = env->insn_idx + 1; 16318 /* 16319 * See bpf_gen_ld_abs() which emits a hidden BPF_EXIT with r0=0 16320 * which must be explored by the verifier when in a subprog. 16321 */ 16322 if (env->cur_state->curframe) { 16323 struct bpf_verifier_state *branch; 16324 16325 mark_reg_scratched(env, BPF_REG_0); 16326 branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false); 16327 if (IS_ERR(branch)) 16328 return PTR_ERR(branch); 16329 mark_reg_known_zero(env, regs, BPF_REG_0); 16330 err = prepare_func_exit(env, &env->insn_idx); 16331 if (err) 16332 return err; 16333 env->insn_idx--; 16334 } 16335 return 0; 16336 } 16337 16338 16339 static bool return_retval_range(struct bpf_verifier_env *env, struct bpf_retval_range *range) 16340 { 16341 enum bpf_prog_type prog_type = resolve_prog_type(env->prog); 16342 16343 /* Default return value range. */ 16344 *range = retval_range(0, 1); 16345 16346 switch (prog_type) { 16347 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 16348 switch (env->prog->expected_attach_type) { 16349 case BPF_CGROUP_UDP4_RECVMSG: 16350 case BPF_CGROUP_UDP6_RECVMSG: 16351 case BPF_CGROUP_UNIX_RECVMSG: 16352 case BPF_CGROUP_INET4_GETPEERNAME: 16353 case BPF_CGROUP_INET6_GETPEERNAME: 16354 case BPF_CGROUP_UNIX_GETPEERNAME: 16355 case BPF_CGROUP_INET4_GETSOCKNAME: 16356 case BPF_CGROUP_INET6_GETSOCKNAME: 16357 case BPF_CGROUP_UNIX_GETSOCKNAME: 16358 *range = retval_range(1, 1); 16359 break; 16360 case BPF_CGROUP_INET4_BIND: 16361 case BPF_CGROUP_INET6_BIND: 16362 *range = retval_range(0, 3); 16363 break; 16364 default: 16365 break; 16366 } 16367 break; 16368 case BPF_PROG_TYPE_CGROUP_SKB: 16369 if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) 16370 *range = retval_range(0, 3); 16371 break; 16372 case BPF_PROG_TYPE_CGROUP_SOCK: 16373 case BPF_PROG_TYPE_SOCK_OPS: 16374 case BPF_PROG_TYPE_CGROUP_DEVICE: 16375 case BPF_PROG_TYPE_CGROUP_SYSCTL: 16376 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 16377 break; 16378 case BPF_PROG_TYPE_RAW_TRACEPOINT: 16379 if (!env->prog->aux->attach_btf_id) 16380 return false; 16381 *range = retval_range(0, 0); 16382 break; 16383 case BPF_PROG_TYPE_TRACING: 16384 switch (env->prog->expected_attach_type) { 16385 case BPF_TRACE_FENTRY: 16386 case BPF_TRACE_FEXIT: 16387 case BPF_TRACE_FSESSION: 16388 case BPF_TRACE_FENTRY_MULTI: 16389 case BPF_TRACE_FEXIT_MULTI: 16390 case BPF_TRACE_FSESSION_MULTI: 16391 *range = retval_range(0, 0); 16392 break; 16393 case BPF_TRACE_RAW_TP: 16394 case BPF_MODIFY_RETURN: 16395 return false; 16396 case BPF_TRACE_ITER: 16397 default: 16398 break; 16399 } 16400 break; 16401 case BPF_PROG_TYPE_KPROBE: 16402 switch (env->prog->expected_attach_type) { 16403 case BPF_TRACE_KPROBE_SESSION: 16404 case BPF_TRACE_UPROBE_SESSION: 16405 break; 16406 default: 16407 return false; 16408 } 16409 break; 16410 case BPF_PROG_TYPE_SK_LOOKUP: 16411 *range = retval_range(SK_DROP, SK_PASS); 16412 break; 16413 16414 case BPF_PROG_TYPE_LSM: 16415 if (env->prog->expected_attach_type != BPF_LSM_CGROUP) { 16416 /* no range found, any return value is allowed */ 16417 if (!get_func_retval_range(env->prog, range)) 16418 return false; 16419 /* no restricted range, any return value is allowed */ 16420 if (range->minval == S32_MIN && range->maxval == S32_MAX) 16421 return false; 16422 range->return_32bit = true; 16423 } else if (!env->prog->aux->attach_func_proto->type) { 16424 /* Make sure programs that attach to void 16425 * hooks don't try to modify return value. 16426 */ 16427 *range = retval_range(1, 1); 16428 } 16429 break; 16430 16431 case BPF_PROG_TYPE_NETFILTER: 16432 *range = retval_range(NF_DROP, NF_ACCEPT); 16433 break; 16434 case BPF_PROG_TYPE_STRUCT_OPS: 16435 *range = retval_range(0, 0); 16436 break; 16437 case BPF_PROG_TYPE_EXT: 16438 /* freplace program can return anything as its return value 16439 * depends on the to-be-replaced kernel func or bpf program. 16440 */ 16441 default: 16442 return false; 16443 } 16444 16445 /* Continue calculating. */ 16446 16447 return true; 16448 } 16449 16450 static bool program_returns_void(struct bpf_verifier_env *env) 16451 { 16452 const struct bpf_prog *prog = env->prog; 16453 enum bpf_prog_type prog_type = prog->type; 16454 16455 switch (prog_type) { 16456 case BPF_PROG_TYPE_LSM: 16457 /* See return_retval_range, for BPF_LSM_CGROUP can be 0 or 0-1 depending on hook. */ 16458 if (prog->expected_attach_type != BPF_LSM_CGROUP && 16459 !prog->aux->attach_func_proto->type) 16460 return true; 16461 break; 16462 case BPF_PROG_TYPE_STRUCT_OPS: 16463 if (!prog->aux->attach_func_proto->type) 16464 return true; 16465 break; 16466 case BPF_PROG_TYPE_EXT: 16467 /* 16468 * If the actual program is an extension, let it 16469 * return void - attaching will succeed only if the 16470 * program being replaced also returns void, and since 16471 * it has passed verification its actual type doesn't matter. 16472 */ 16473 if (subprog_returns_void(env, 0)) 16474 return true; 16475 break; 16476 default: 16477 break; 16478 } 16479 return false; 16480 } 16481 16482 static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name) 16483 { 16484 const char *exit_ctx = "At program exit"; 16485 struct tnum enforce_attach_type_range = tnum_unknown; 16486 const struct bpf_prog *prog = env->prog; 16487 struct bpf_reg_state *reg = reg_state(env, regno); 16488 struct bpf_retval_range range = retval_range(0, 1); 16489 enum bpf_prog_type prog_type = resolve_prog_type(env->prog); 16490 struct bpf_func_state *frame = env->cur_state->frame[0]; 16491 const struct btf_type *reg_type, *ret_type = NULL; 16492 int err; 16493 16494 /* LSM and struct_ops func-ptr's return type could be "void" */ 16495 if (!frame->in_async_callback_fn && program_returns_void(env)) 16496 return 0; 16497 16498 if (prog_type == BPF_PROG_TYPE_STRUCT_OPS) { 16499 /* Allow a struct_ops program to return a referenced kptr if it 16500 * matches the operator's return type and is in its unmodified 16501 * form. A scalar zero (i.e., a null pointer) is also allowed. 16502 */ 16503 reg_type = reg->btf ? btf_type_by_id(reg->btf, reg->btf_id) : NULL; 16504 ret_type = btf_type_resolve_ptr(prog->aux->attach_btf, 16505 prog->aux->attach_func_proto->type, 16506 NULL); 16507 if (ret_type && ret_type == reg_type && reg_is_referenced(env, reg)) 16508 return __check_ptr_off_reg(env, reg, argno_from_reg(regno), false); 16509 } 16510 16511 /* eBPF calling convention is such that R0 is used 16512 * to return the value from eBPF program. 16513 * Make sure that it's readable at this time 16514 * of bpf_exit, which means that program wrote 16515 * something into it earlier 16516 */ 16517 err = check_reg_arg(env, regno, SRC_OP); 16518 if (err) 16519 return err; 16520 16521 if (is_pointer_value(env, regno)) { 16522 verbose(env, "R%d leaks addr as return value\n", regno); 16523 return -EACCES; 16524 } 16525 16526 if (frame->in_async_callback_fn) { 16527 exit_ctx = "At async callback return"; 16528 range = frame->callback_ret_range; 16529 goto enforce_retval; 16530 } 16531 16532 if (prog_type == BPF_PROG_TYPE_STRUCT_OPS && !ret_type) 16533 return 0; 16534 16535 if (prog_type == BPF_PROG_TYPE_CGROUP_SKB && (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS)) 16536 enforce_attach_type_range = tnum_range(2, 3); 16537 16538 if (!return_retval_range(env, &range)) 16539 return 0; 16540 16541 enforce_retval: 16542 if (reg->type != SCALAR_VALUE) { 16543 verbose(env, "%s the register R%d is not a known value (%s)\n", 16544 exit_ctx, regno, reg_type_str(env, reg->type)); 16545 return -EINVAL; 16546 } 16547 16548 err = mark_chain_precision(env, regno); 16549 if (err) 16550 return err; 16551 16552 if (!retval_range_within(range, reg)) { 16553 verbose_invalid_scalar(env, reg, range, exit_ctx, reg_name); 16554 if (prog->expected_attach_type == BPF_LSM_CGROUP && 16555 prog_type == BPF_PROG_TYPE_LSM && 16556 !prog->aux->attach_func_proto->type) 16557 verbose(env, "Note, BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n"); 16558 return -EINVAL; 16559 } 16560 16561 if (!tnum_is_unknown(enforce_attach_type_range) && 16562 tnum_in(enforce_attach_type_range, reg->var_off)) 16563 env->prog->enforce_expected_attach_type = 1; 16564 return 0; 16565 } 16566 16567 static int check_global_subprog_return_code(struct bpf_verifier_env *env) 16568 { 16569 struct bpf_reg_state *reg = reg_state(env, BPF_REG_0); 16570 struct bpf_func_state *cur_frame = cur_func(env); 16571 int err; 16572 16573 if (subprog_returns_void(env, cur_frame->subprogno)) 16574 return 0; 16575 16576 err = check_reg_arg(env, BPF_REG_0, SRC_OP); 16577 if (err) 16578 return err; 16579 16580 /* Pointers to arena are safe to pass between subprograms. */ 16581 if (is_arena_reg(env, BPF_REG_0)) 16582 return 0; 16583 16584 if (is_pointer_value(env, BPF_REG_0)) { 16585 verbose(env, "R%d leaks addr as return value\n", BPF_REG_0); 16586 return -EACCES; 16587 } 16588 16589 if (reg->type != SCALAR_VALUE) { 16590 verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n", 16591 reg_type_str(env, reg->type)); 16592 return -EINVAL; 16593 } 16594 16595 return 0; 16596 } 16597 16598 /* Bitmask with 1s for all caller saved registers */ 16599 #define ALL_CALLER_SAVED_REGS ((1u << CALLER_SAVED_REGS) - 1) 16600 16601 /* True if do_misc_fixups() replaces calls to helper number 'imm', 16602 * replacement patch is presumed to follow bpf_fastcall contract 16603 * (see mark_fastcall_pattern_for_call() below). 16604 */ 16605 bool bpf_verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm) 16606 { 16607 switch (imm) { 16608 #ifdef CONFIG_X86_64 16609 case BPF_FUNC_get_smp_processor_id: 16610 #ifdef CONFIG_SMP 16611 case BPF_FUNC_get_current_task_btf: 16612 case BPF_FUNC_get_current_task: 16613 #endif 16614 return env->prog->jit_requested && bpf_jit_supports_percpu_insn(); 16615 #endif 16616 default: 16617 return false; 16618 } 16619 } 16620 16621 /* If @call is a kfunc or helper call, fills @cs and returns true, 16622 * otherwise returns false. 16623 */ 16624 bool bpf_get_call_summary(struct bpf_verifier_env *env, struct bpf_insn *call, 16625 struct bpf_call_summary *cs) 16626 { 16627 struct bpf_kfunc_call_arg_meta meta; 16628 const struct bpf_func_proto *fn; 16629 int i; 16630 16631 if (bpf_helper_call(call)) { 16632 16633 if (bpf_get_helper_proto(env, call->imm, &fn) < 0) 16634 /* error would be reported later */ 16635 return false; 16636 cs->fastcall = fn->allow_fastcall && 16637 (bpf_verifier_inlines_helper_call(env, call->imm) || 16638 bpf_jit_inlines_helper_call(call->imm)); 16639 cs->is_void = fn->ret_type == RET_VOID; 16640 cs->num_params = 0; 16641 for (i = 0; i < ARRAY_SIZE(fn->arg_type); ++i) { 16642 if (fn->arg_type[i] == ARG_DONTCARE) 16643 break; 16644 cs->num_params++; 16645 } 16646 return true; 16647 } 16648 16649 if (bpf_pseudo_kfunc_call(call)) { 16650 int err; 16651 16652 err = bpf_fetch_kfunc_arg_meta(env, call->imm, call->off, &meta); 16653 if (err < 0) 16654 /* error would be reported later */ 16655 return false; 16656 cs->num_params = btf_type_vlen(meta.func_proto); 16657 cs->fastcall = meta.kfunc_flags & KF_FASTCALL; 16658 cs->is_void = btf_type_is_void(btf_type_by_id(meta.btf, meta.func_proto->type)); 16659 return true; 16660 } 16661 16662 return false; 16663 } 16664 16665 /* LLVM define a bpf_fastcall function attribute. 16666 * This attribute means that function scratches only some of 16667 * the caller saved registers defined by ABI. 16668 * For BPF the set of such registers could be defined as follows: 16669 * - R0 is scratched only if function is non-void; 16670 * - R1-R5 are scratched only if corresponding parameter type is defined 16671 * in the function prototype. 16672 * 16673 * The contract between kernel and clang allows to simultaneously use 16674 * such functions and maintain backwards compatibility with old 16675 * kernels that don't understand bpf_fastcall calls: 16676 * 16677 * - for bpf_fastcall calls clang allocates registers as-if relevant r0-r5 16678 * registers are not scratched by the call; 16679 * 16680 * - as a post-processing step, clang visits each bpf_fastcall call and adds 16681 * spill/fill for every live r0-r5; 16682 * 16683 * - stack offsets used for the spill/fill are allocated as lowest 16684 * stack offsets in whole function and are not used for any other 16685 * purposes; 16686 * 16687 * - when kernel loads a program, it looks for such patterns 16688 * (bpf_fastcall function surrounded by spills/fills) and checks if 16689 * spill/fill stack offsets are used exclusively in fastcall patterns; 16690 * 16691 * - if so, and if verifier or current JIT inlines the call to the 16692 * bpf_fastcall function (e.g. a helper call), kernel removes unnecessary 16693 * spill/fill pairs; 16694 * 16695 * - when old kernel loads a program, presence of spill/fill pairs 16696 * keeps BPF program valid, albeit slightly less efficient. 16697 * 16698 * For example: 16699 * 16700 * r1 = 1; 16701 * r2 = 2; 16702 * *(u64 *)(r10 - 8) = r1; r1 = 1; 16703 * *(u64 *)(r10 - 16) = r2; r2 = 2; 16704 * call %[to_be_inlined] --> call %[to_be_inlined] 16705 * r2 = *(u64 *)(r10 - 16); r0 = r1; 16706 * r1 = *(u64 *)(r10 - 8); r0 += r2; 16707 * r0 = r1; exit; 16708 * r0 += r2; 16709 * exit; 16710 * 16711 * The purpose of mark_fastcall_pattern_for_call is to: 16712 * - look for such patterns; 16713 * - mark spill and fill instructions in env->insn_aux_data[*].fastcall_pattern; 16714 * - mark set env->insn_aux_data[*].fastcall_spills_num for call instruction; 16715 * - update env->subprog_info[*]->fastcall_stack_off to find an offset 16716 * at which bpf_fastcall spill/fill stack slots start; 16717 * - update env->subprog_info[*]->keep_fastcall_stack. 16718 * 16719 * The .fastcall_pattern and .fastcall_stack_off are used by 16720 * check_fastcall_stack_contract() to check if every stack access to 16721 * fastcall spill/fill stack slot originates from spill/fill 16722 * instructions, members of fastcall patterns. 16723 * 16724 * If such condition holds true for a subprogram, fastcall patterns could 16725 * be rewritten by remove_fastcall_spills_fills(). 16726 * Otherwise bpf_fastcall patterns are not changed in the subprogram 16727 * (code, presumably, generated by an older clang version). 16728 * 16729 * For example, it is *not* safe to remove spill/fill below: 16730 * 16731 * r1 = 1; 16732 * *(u64 *)(r10 - 8) = r1; r1 = 1; 16733 * call %[to_be_inlined] --> call %[to_be_inlined] 16734 * r1 = *(u64 *)(r10 - 8); r0 = *(u64 *)(r10 - 8); <---- wrong !!! 16735 * r0 = *(u64 *)(r10 - 8); r0 += r1; 16736 * r0 += r1; exit; 16737 * exit; 16738 */ 16739 static void mark_fastcall_pattern_for_call(struct bpf_verifier_env *env, 16740 struct bpf_subprog_info *subprog, 16741 int insn_idx, s16 lowest_off) 16742 { 16743 struct bpf_insn *insns = env->prog->insnsi, *stx, *ldx; 16744 struct bpf_insn *call = &env->prog->insnsi[insn_idx]; 16745 u32 clobbered_regs_mask; 16746 struct bpf_call_summary cs; 16747 u32 expected_regs_mask; 16748 s16 off; 16749 int i; 16750 16751 if (!bpf_get_call_summary(env, call, &cs)) 16752 return; 16753 16754 /* A bitmask specifying which caller saved registers are clobbered 16755 * by a call to a helper/kfunc *as if* this helper/kfunc follows 16756 * bpf_fastcall contract: 16757 * - includes R0 if function is non-void; 16758 * - includes R1-R5 if corresponding parameter has is described 16759 * in the function prototype. 16760 */ 16761 clobbered_regs_mask = GENMASK(cs.num_params, cs.is_void ? 1 : 0); 16762 /* e.g. if helper call clobbers r{0,1}, expect r{2,3,4,5} in the pattern */ 16763 expected_regs_mask = ~clobbered_regs_mask & ALL_CALLER_SAVED_REGS; 16764 16765 /* match pairs of form: 16766 * 16767 * *(u64 *)(r10 - Y) = rX (where Y % 8 == 0) 16768 * ... 16769 * call %[to_be_inlined] 16770 * ... 16771 * rX = *(u64 *)(r10 - Y) 16772 */ 16773 for (i = 1, off = lowest_off; i <= ARRAY_SIZE(caller_saved); ++i, off += BPF_REG_SIZE) { 16774 if (insn_idx - i < 0 || insn_idx + i >= env->prog->len) 16775 break; 16776 stx = &insns[insn_idx - i]; 16777 ldx = &insns[insn_idx + i]; 16778 /* must be a stack spill/fill pair */ 16779 if (stx->code != (BPF_STX | BPF_MEM | BPF_DW) || 16780 ldx->code != (BPF_LDX | BPF_MEM | BPF_DW) || 16781 stx->dst_reg != BPF_REG_10 || 16782 ldx->src_reg != BPF_REG_10) 16783 break; 16784 /* must be a spill/fill for the same reg */ 16785 if (stx->src_reg != ldx->dst_reg) 16786 break; 16787 /* must be one of the previously unseen registers */ 16788 if ((BIT(stx->src_reg) & expected_regs_mask) == 0) 16789 break; 16790 /* must be a spill/fill for the same expected offset, 16791 * no need to check offset alignment, BPF_DW stack access 16792 * is always 8-byte aligned. 16793 */ 16794 if (stx->off != off || ldx->off != off) 16795 break; 16796 expected_regs_mask &= ~BIT(stx->src_reg); 16797 env->insn_aux_data[insn_idx - i].fastcall_pattern = 1; 16798 env->insn_aux_data[insn_idx + i].fastcall_pattern = 1; 16799 } 16800 if (i == 1) 16801 return; 16802 16803 /* Conditionally set 'fastcall_spills_num' to allow forward 16804 * compatibility when more helper functions are marked as 16805 * bpf_fastcall at compile time than current kernel supports, e.g: 16806 * 16807 * 1: *(u64 *)(r10 - 8) = r1 16808 * 2: call A ;; assume A is bpf_fastcall for current kernel 16809 * 3: r1 = *(u64 *)(r10 - 8) 16810 * 4: *(u64 *)(r10 - 8) = r1 16811 * 5: call B ;; assume B is not bpf_fastcall for current kernel 16812 * 6: r1 = *(u64 *)(r10 - 8) 16813 * 16814 * There is no need to block bpf_fastcall rewrite for such program. 16815 * Set 'fastcall_pattern' for both calls to keep check_fastcall_stack_contract() happy, 16816 * don't set 'fastcall_spills_num' for call B so that remove_fastcall_spills_fills() 16817 * does not remove spill/fill pair {4,6}. 16818 */ 16819 if (cs.fastcall) 16820 env->insn_aux_data[insn_idx].fastcall_spills_num = i - 1; 16821 else 16822 subprog->keep_fastcall_stack = 1; 16823 subprog->fastcall_stack_off = min(subprog->fastcall_stack_off, off); 16824 } 16825 16826 static int mark_fastcall_patterns(struct bpf_verifier_env *env) 16827 { 16828 struct bpf_subprog_info *subprog = env->subprog_info; 16829 struct bpf_insn *insn; 16830 s16 lowest_off; 16831 int s, i; 16832 16833 for (s = 0; s < env->subprog_cnt; ++s, ++subprog) { 16834 /* find lowest stack spill offset used in this subprog */ 16835 lowest_off = 0; 16836 for (i = subprog->start; i < (subprog + 1)->start; ++i) { 16837 insn = env->prog->insnsi + i; 16838 if (insn->code != (BPF_STX | BPF_MEM | BPF_DW) || 16839 insn->dst_reg != BPF_REG_10) 16840 continue; 16841 lowest_off = min(lowest_off, insn->off); 16842 } 16843 /* use this offset to find fastcall patterns */ 16844 for (i = subprog->start; i < (subprog + 1)->start; ++i) { 16845 insn = env->prog->insnsi + i; 16846 if (insn->code != (BPF_JMP | BPF_CALL)) 16847 continue; 16848 mark_fastcall_pattern_for_call(env, subprog, i, lowest_off); 16849 } 16850 } 16851 return 0; 16852 } 16853 16854 static void adjust_btf_func(struct bpf_verifier_env *env) 16855 { 16856 struct bpf_prog_aux *aux = env->prog->aux; 16857 int i; 16858 16859 if (!aux->func_info) 16860 return; 16861 16862 /* func_info is not available for hidden subprogs */ 16863 for (i = 0; i < env->subprog_cnt - env->hidden_subprog_cnt; i++) 16864 aux->func_info[i].insn_off = env->subprog_info[i].start; 16865 } 16866 16867 /* Find id in idset and increment its count, or add new entry */ 16868 static void idset_cnt_inc(struct bpf_idset *idset, u32 id) 16869 { 16870 u32 i; 16871 16872 for (i = 0; i < idset->num_ids; i++) { 16873 if (idset->entries[i].id == id) { 16874 idset->entries[i].cnt++; 16875 return; 16876 } 16877 } 16878 /* New id */ 16879 if (idset->num_ids < BPF_ID_MAP_SIZE) { 16880 idset->entries[idset->num_ids].id = id; 16881 idset->entries[idset->num_ids].cnt = 1; 16882 idset->num_ids++; 16883 } 16884 } 16885 16886 /* Find id in idset and return its count, or 0 if not found */ 16887 static u32 idset_cnt_get(struct bpf_idset *idset, u32 id) 16888 { 16889 u32 i; 16890 16891 for (i = 0; i < idset->num_ids; i++) { 16892 if (idset->entries[i].id == id) 16893 return idset->entries[i].cnt; 16894 } 16895 return 0; 16896 } 16897 16898 /* 16899 * Clear singular scalar ids in a state. 16900 * A register with a non-zero id is called singular if no other register shares 16901 * the same base id. Such registers can be treated as independent (id=0). 16902 */ 16903 void bpf_clear_singular_ids(struct bpf_verifier_env *env, 16904 struct bpf_verifier_state *st) 16905 { 16906 struct bpf_idset *idset = &env->idset_scratch; 16907 struct bpf_func_state *func; 16908 struct bpf_reg_state *reg; 16909 16910 idset->num_ids = 0; 16911 16912 bpf_for_each_reg_in_vstate(st, func, reg, ({ 16913 if (reg->type != SCALAR_VALUE) 16914 continue; 16915 if (!reg->id) 16916 continue; 16917 idset_cnt_inc(idset, reg->id & ~BPF_ADD_CONST); 16918 })); 16919 16920 bpf_for_each_reg_in_vstate(st, func, reg, ({ 16921 if (reg->type != SCALAR_VALUE) 16922 continue; 16923 if (!reg->id) 16924 continue; 16925 if (idset_cnt_get(idset, reg->id & ~BPF_ADD_CONST) == 1) 16926 clear_scalar_id(reg); 16927 })); 16928 } 16929 16930 /* Return true if it's OK to have the same insn return a different type. */ 16931 static bool reg_type_mismatch_ok(enum bpf_reg_type type) 16932 { 16933 switch (base_type(type)) { 16934 case PTR_TO_CTX: 16935 case PTR_TO_SOCKET: 16936 case PTR_TO_SOCK_COMMON: 16937 case PTR_TO_TCP_SOCK: 16938 case PTR_TO_XDP_SOCK: 16939 case PTR_TO_BTF_ID: 16940 case PTR_TO_ARENA: 16941 return false; 16942 default: 16943 return true; 16944 } 16945 } 16946 16947 /* If an instruction was previously used with particular pointer types, then we 16948 * need to be careful to avoid cases such as the below, where it may be ok 16949 * for one branch accessing the pointer, but not ok for the other branch: 16950 * 16951 * R1 = sock_ptr 16952 * goto X; 16953 * ... 16954 * R1 = some_other_valid_ptr; 16955 * goto X; 16956 * ... 16957 * R2 = *(u32 *)(R1 + 0); 16958 */ 16959 static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev) 16960 { 16961 return src != prev && (!reg_type_mismatch_ok(src) || 16962 !reg_type_mismatch_ok(prev)); 16963 } 16964 16965 static bool is_ptr_to_mem_or_btf_id(enum bpf_reg_type type) 16966 { 16967 switch (base_type(type)) { 16968 case PTR_TO_MEM: 16969 case PTR_TO_BTF_ID: 16970 return true; 16971 default: 16972 return false; 16973 } 16974 } 16975 16976 static bool is_ptr_to_mem(enum bpf_reg_type type) 16977 { 16978 return base_type(type) == PTR_TO_MEM; 16979 } 16980 16981 static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type, 16982 bool allow_trust_mismatch) 16983 { 16984 enum bpf_reg_type *prev_type = &env->insn_aux_data[env->insn_idx].ptr_type; 16985 enum bpf_reg_type merged_type; 16986 16987 if (*prev_type == NOT_INIT) { 16988 /* Saw a valid insn 16989 * dst_reg = *(u32 *)(src_reg + off) 16990 * save type to validate intersecting paths 16991 */ 16992 *prev_type = type; 16993 } else if (reg_type_mismatch(type, *prev_type)) { 16994 /* Abuser program is trying to use the same insn 16995 * dst_reg = *(u32*) (src_reg + off) 16996 * with different pointer types: 16997 * src_reg == ctx in one branch and 16998 * src_reg == stack|map in some other branch. 16999 * Reject it. 17000 */ 17001 if (allow_trust_mismatch && 17002 is_ptr_to_mem_or_btf_id(type) && 17003 is_ptr_to_mem_or_btf_id(*prev_type)) { 17004 /* 17005 * Have to support a use case when one path through 17006 * the program yields TRUSTED pointer while another 17007 * is UNTRUSTED. Fallback to UNTRUSTED to generate 17008 * BPF_PROBE_MEM/BPF_PROBE_MEMSX. 17009 * Same behavior of MEM_RDONLY flag. 17010 */ 17011 if (is_ptr_to_mem(type) || is_ptr_to_mem(*prev_type)) 17012 merged_type = PTR_TO_MEM; 17013 else 17014 merged_type = PTR_TO_BTF_ID; 17015 if ((type & PTR_UNTRUSTED) || (*prev_type & PTR_UNTRUSTED)) 17016 merged_type |= PTR_UNTRUSTED; 17017 if ((type & MEM_RDONLY) || (*prev_type & MEM_RDONLY)) 17018 merged_type |= MEM_RDONLY; 17019 *prev_type = merged_type; 17020 } else { 17021 verbose(env, "same insn cannot be used with different pointers\n"); 17022 return -EINVAL; 17023 } 17024 } 17025 17026 return 0; 17027 } 17028 17029 enum { 17030 PROCESS_BPF_EXIT = 1, 17031 INSN_IDX_UPDATED = 2, 17032 }; 17033 17034 static int process_bpf_exit_full(struct bpf_verifier_env *env, 17035 bool *do_print_state, 17036 bool exception_exit) 17037 { 17038 struct bpf_func_state *cur_frame = cur_func(env); 17039 17040 /* We must do check_reference_leak here before 17041 * prepare_func_exit to handle the case when 17042 * state->curframe > 0, it may be a callback function, 17043 * for which reference_state must match caller reference 17044 * state when it exits. 17045 */ 17046 int err = check_resource_leak(env, exception_exit, 17047 exception_exit || !env->cur_state->curframe, 17048 exception_exit ? "bpf_throw" : 17049 "BPF_EXIT instruction in main prog"); 17050 if (err) 17051 return err; 17052 17053 /* The side effect of the prepare_func_exit which is 17054 * being skipped is that it frees bpf_func_state. 17055 * Typically, process_bpf_exit will only be hit with 17056 * outermost exit. copy_verifier_state in pop_stack will 17057 * handle freeing of any extra bpf_func_state left over 17058 * from not processing all nested function exits. We 17059 * also skip return code checks as they are not needed 17060 * for exceptional exits. 17061 */ 17062 if (exception_exit) 17063 return PROCESS_BPF_EXIT; 17064 17065 if (env->cur_state->curframe) { 17066 /* exit from nested function */ 17067 err = prepare_func_exit(env, &env->insn_idx); 17068 if (err) 17069 return err; 17070 *do_print_state = true; 17071 return INSN_IDX_UPDATED; 17072 } 17073 17074 /* 17075 * Return from a regular global subprogram differs from return 17076 * from the main program or async/exception callback. 17077 * Main program exit implies return code restrictions 17078 * that depend on program type. 17079 * Exit from exception callback is equivalent to main program exit. 17080 * Exit from async callback implies return code restrictions 17081 * that depend on async scheduling mechanism. 17082 */ 17083 if (cur_frame->subprogno && 17084 !cur_frame->in_async_callback_fn && 17085 !cur_frame->in_exception_callback_fn) 17086 err = check_global_subprog_return_code(env); 17087 else 17088 err = check_return_code(env, BPF_REG_0, "R0"); 17089 if (err) 17090 return err; 17091 return PROCESS_BPF_EXIT; 17092 } 17093 17094 static int indirect_jump_min_max_index(struct bpf_verifier_env *env, 17095 int regno, 17096 struct bpf_map *map, 17097 u32 *pmin_index, u32 *pmax_index) 17098 { 17099 struct bpf_reg_state *reg = reg_state(env, regno); 17100 u64 min_index = reg_umin(reg); 17101 u64 max_index = reg_umax(reg); 17102 const u32 size = 8; 17103 17104 if (min_index > (u64) U32_MAX * size) { 17105 verbose(env, "the sum of R%u umin_value %llu is too big\n", regno, reg_umin(reg)); 17106 return -ERANGE; 17107 } 17108 if (max_index > (u64) U32_MAX * size) { 17109 verbose(env, "the sum of R%u umax_value %llu is too big\n", regno, reg_umax(reg)); 17110 return -ERANGE; 17111 } 17112 17113 min_index /= size; 17114 max_index /= size; 17115 17116 if (max_index >= map->max_entries) { 17117 verbose(env, "R%u points to outside of jump table: [%llu,%llu] max_entries %u\n", 17118 regno, min_index, max_index, map->max_entries); 17119 return -EINVAL; 17120 } 17121 17122 *pmin_index = min_index; 17123 *pmax_index = max_index; 17124 return 0; 17125 } 17126 17127 /* gotox *dst_reg */ 17128 static int check_indirect_jump(struct bpf_verifier_env *env, struct bpf_insn *insn) 17129 { 17130 struct bpf_verifier_state *other_branch; 17131 struct bpf_reg_state *dst_reg; 17132 struct bpf_map *map; 17133 u32 min_index, max_index; 17134 int err = 0; 17135 int n; 17136 int i; 17137 17138 dst_reg = reg_state(env, insn->dst_reg); 17139 if (dst_reg->type != PTR_TO_INSN) { 17140 verbose(env, "R%d has type %s, expected PTR_TO_INSN\n", 17141 insn->dst_reg, reg_type_str(env, dst_reg->type)); 17142 return -EINVAL; 17143 } 17144 17145 map = dst_reg->map_ptr; 17146 if (verifier_bug_if(!map, env, "R%d has an empty map pointer", insn->dst_reg)) 17147 return -EFAULT; 17148 17149 if (verifier_bug_if(map->map_type != BPF_MAP_TYPE_INSN_ARRAY, env, 17150 "R%d has incorrect map type %d", insn->dst_reg, map->map_type)) 17151 return -EFAULT; 17152 17153 err = indirect_jump_min_max_index(env, insn->dst_reg, map, &min_index, &max_index); 17154 if (err) 17155 return err; 17156 17157 /* Ensure that the buffer is large enough */ 17158 if (!env->gotox_tmp_buf || env->gotox_tmp_buf->cnt < max_index - min_index + 1) { 17159 env->gotox_tmp_buf = bpf_iarray_realloc(env->gotox_tmp_buf, 17160 max_index - min_index + 1); 17161 if (!env->gotox_tmp_buf) 17162 return -ENOMEM; 17163 } 17164 17165 n = bpf_copy_insn_array_uniq(map, min_index, max_index, env->gotox_tmp_buf->items); 17166 if (n < 0) 17167 return n; 17168 if (n == 0) { 17169 verbose(env, "register R%d doesn't point to any offset in map id=%d\n", 17170 insn->dst_reg, map->id); 17171 return -EINVAL; 17172 } 17173 17174 for (i = 0; i < n - 1; i++) { 17175 mark_indirect_target(env, env->gotox_tmp_buf->items[i]); 17176 other_branch = push_stack(env, env->gotox_tmp_buf->items[i], 17177 env->insn_idx, env->cur_state->speculative); 17178 if (IS_ERR(other_branch)) 17179 return PTR_ERR(other_branch); 17180 } 17181 env->insn_idx = env->gotox_tmp_buf->items[n-1]; 17182 mark_indirect_target(env, env->insn_idx); 17183 return INSN_IDX_UPDATED; 17184 } 17185 17186 static int do_check_insn(struct bpf_verifier_env *env, bool *do_print_state) 17187 { 17188 int err; 17189 struct bpf_insn *insn = &env->prog->insnsi[env->insn_idx]; 17190 u8 class = BPF_CLASS(insn->code); 17191 17192 switch (class) { 17193 case BPF_ALU: 17194 case BPF_ALU64: 17195 return check_alu_op(env, insn); 17196 17197 case BPF_LDX: 17198 return check_load_mem(env, insn, false, 17199 BPF_MODE(insn->code) == BPF_MEMSX, 17200 true, "ldx"); 17201 17202 case BPF_STX: 17203 if (BPF_MODE(insn->code) == BPF_ATOMIC) 17204 return check_atomic(env, insn); 17205 return check_store_reg(env, insn, false); 17206 17207 case BPF_ST: { 17208 /* Handle stack arg write (store immediate) */ 17209 if (is_stack_arg_st(insn)) { 17210 struct bpf_verifier_state *vstate = env->cur_state; 17211 struct bpf_func_state *state = vstate->frame[vstate->curframe]; 17212 17213 return check_stack_arg_write(env, state, insn->off, NULL); 17214 } 17215 17216 enum bpf_reg_type dst_reg_type; 17217 17218 err = check_reg_arg(env, insn->dst_reg, SRC_OP); 17219 if (err) 17220 return err; 17221 17222 dst_reg_type = cur_regs(env)[insn->dst_reg].type; 17223 17224 err = check_mem_access(env, env->insn_idx, cur_regs(env) + insn->dst_reg, argno_from_reg(insn->dst_reg), 17225 insn->off, BPF_SIZE(insn->code), 17226 BPF_WRITE, -1, false, false); 17227 if (err) 17228 return err; 17229 17230 return save_aux_ptr_type(env, dst_reg_type, false); 17231 } 17232 case BPF_JMP: 17233 case BPF_JMP32: { 17234 u8 opcode = BPF_OP(insn->code); 17235 17236 env->jmps_processed++; 17237 if (opcode == BPF_CALL) { 17238 if (env->cur_state->active_locks) { 17239 if ((insn->src_reg == BPF_REG_0 && 17240 insn->imm != BPF_FUNC_spin_unlock && 17241 insn->imm != BPF_FUNC_kptr_xchg) || 17242 (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && 17243 (insn->off != 0 || !kfunc_spin_allowed(insn->imm)))) { 17244 verbose(env, 17245 "function calls are not allowed while holding a lock\n"); 17246 return -EINVAL; 17247 } 17248 } 17249 mark_reg_scratched(env, BPF_REG_0); 17250 if (bpf_in_stack_arg_cnt(&env->subprog_info[cur_func(env)->subprogno])) 17251 cur_func(env)->no_stack_arg_load = true; 17252 if (insn->src_reg == BPF_PSEUDO_CALL) 17253 return check_func_call(env, insn, &env->insn_idx); 17254 if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) 17255 return check_kfunc_call(env, insn, &env->insn_idx); 17256 return check_helper_call(env, insn, &env->insn_idx); 17257 } else if (opcode == BPF_JA) { 17258 if (BPF_SRC(insn->code) == BPF_X) 17259 return check_indirect_jump(env, insn); 17260 17261 if (class == BPF_JMP) 17262 env->insn_idx += insn->off + 1; 17263 else 17264 env->insn_idx += insn->imm + 1; 17265 return INSN_IDX_UPDATED; 17266 } else if (opcode == BPF_EXIT) { 17267 return process_bpf_exit_full(env, do_print_state, false); 17268 } 17269 return check_cond_jmp_op(env, insn, &env->insn_idx); 17270 } 17271 case BPF_LD: { 17272 u8 mode = BPF_MODE(insn->code); 17273 17274 if (mode == BPF_ABS || mode == BPF_IND) 17275 return check_ld_abs(env, insn); 17276 17277 if (mode == BPF_IMM) { 17278 err = check_ld_imm(env, insn); 17279 if (err) 17280 return err; 17281 17282 env->insn_idx++; 17283 sanitize_mark_insn_seen(env); 17284 } 17285 return 0; 17286 } 17287 } 17288 /* all class values are handled above. silence compiler warning */ 17289 return -EFAULT; 17290 } 17291 17292 static int do_check(struct bpf_verifier_env *env) 17293 { 17294 bool pop_log = !(env->log.level & BPF_LOG_LEVEL2); 17295 struct bpf_verifier_state *state = env->cur_state; 17296 struct bpf_insn *insns = env->prog->insnsi; 17297 int insn_cnt = env->prog->len; 17298 bool do_print_state = false; 17299 int prev_insn_idx = -1; 17300 17301 for (;;) { 17302 struct bpf_insn *insn; 17303 struct bpf_insn_aux_data *insn_aux; 17304 int err; 17305 17306 /* reset current history entry on each new instruction */ 17307 env->cur_hist_ent = NULL; 17308 17309 env->prev_insn_idx = prev_insn_idx; 17310 if (env->insn_idx >= insn_cnt) { 17311 verbose(env, "invalid insn idx %d insn_cnt %d\n", 17312 env->insn_idx, insn_cnt); 17313 return -EFAULT; 17314 } 17315 17316 insn = &insns[env->insn_idx]; 17317 insn_aux = &env->insn_aux_data[env->insn_idx]; 17318 17319 if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) { 17320 verbose(env, 17321 "BPF program is too large. Processed %d insn\n", 17322 env->insn_processed); 17323 return -E2BIG; 17324 } 17325 17326 state->last_insn_idx = env->prev_insn_idx; 17327 state->insn_idx = env->insn_idx; 17328 17329 if (bpf_is_prune_point(env, env->insn_idx)) { 17330 err = bpf_is_state_visited(env, env->insn_idx); 17331 if (err < 0) 17332 return err; 17333 if (err == 1) { 17334 /* found equivalent state, can prune the search */ 17335 if (env->log.level & BPF_LOG_LEVEL) { 17336 if (do_print_state) 17337 verbose(env, "\nfrom %d to %d%s: safe\n", 17338 env->prev_insn_idx, env->insn_idx, 17339 env->cur_state->speculative ? 17340 " (speculative execution)" : ""); 17341 else 17342 verbose(env, "%d: safe\n", env->insn_idx); 17343 } 17344 goto process_bpf_exit; 17345 } 17346 } 17347 17348 if (bpf_is_jmp_point(env, env->insn_idx)) { 17349 err = bpf_push_jmp_history(env, state, 0, 0, 0, 0); 17350 if (err) 17351 return err; 17352 } 17353 17354 if (signal_pending(current)) 17355 return -EAGAIN; 17356 17357 if (need_resched()) 17358 cond_resched(); 17359 17360 if (env->log.level & BPF_LOG_LEVEL2 && do_print_state) { 17361 verbose(env, "\nfrom %d to %d%s:", 17362 env->prev_insn_idx, env->insn_idx, 17363 env->cur_state->speculative ? 17364 " (speculative execution)" : ""); 17365 print_verifier_state(env, state, state->curframe, true); 17366 do_print_state = false; 17367 } 17368 17369 if (env->log.level & BPF_LOG_LEVEL) { 17370 if (verifier_state_scratched(env)) 17371 print_insn_state(env, state, state->curframe); 17372 17373 verbose_linfo(env, env->insn_idx, "; "); 17374 env->prev_log_pos = env->log.end_pos; 17375 verbose(env, "%d: ", env->insn_idx); 17376 bpf_verbose_insn(env, insn); 17377 env->prev_insn_print_pos = env->log.end_pos - env->prev_log_pos; 17378 env->prev_log_pos = env->log.end_pos; 17379 } 17380 17381 if (bpf_prog_is_offloaded(env->prog->aux)) { 17382 err = bpf_prog_offload_verify_insn(env, env->insn_idx, 17383 env->prev_insn_idx); 17384 if (err) 17385 return err; 17386 } 17387 17388 sanitize_mark_insn_seen(env); 17389 prev_insn_idx = env->insn_idx; 17390 17391 /* Sanity check: precomputed constants must match verifier state */ 17392 if (!state->speculative && insn_aux->const_reg_mask) { 17393 struct bpf_reg_state *regs = cur_regs(env); 17394 u16 mask = insn_aux->const_reg_mask; 17395 17396 for (int r = 0; r < ARRAY_SIZE(insn_aux->const_reg_vals); r++) { 17397 u32 cval = insn_aux->const_reg_vals[r]; 17398 17399 if (!(mask & BIT(r))) 17400 continue; 17401 if (regs[r].type != SCALAR_VALUE) 17402 continue; 17403 if (!tnum_is_const(regs[r].var_off)) 17404 continue; 17405 if (verifier_bug_if((u32)regs[r].var_off.value != cval, 17406 env, "const R%d: %u != %llu", 17407 r, cval, regs[r].var_off.value)) 17408 return -EFAULT; 17409 } 17410 } 17411 17412 /* Reduce verification complexity by stopping speculative path 17413 * verification when a nospec is encountered. 17414 */ 17415 if (state->speculative && insn_aux->nospec) 17416 goto process_bpf_exit; 17417 17418 err = do_check_insn(env, &do_print_state); 17419 if (error_recoverable_with_nospec(err) && state->speculative) { 17420 /* Prevent this speculative path from ever reaching the 17421 * insn that would have been unsafe to execute. 17422 */ 17423 insn_aux->nospec = true; 17424 /* If it was an ADD/SUB insn, potentially remove any 17425 * markings for alu sanitization. 17426 */ 17427 insn_aux->alu_state = 0; 17428 goto process_bpf_exit; 17429 } else if (err < 0) { 17430 return err; 17431 } else if (err == PROCESS_BPF_EXIT) { 17432 goto process_bpf_exit; 17433 } else if (err == INSN_IDX_UPDATED) { 17434 } else if (err == 0) { 17435 env->insn_idx++; 17436 } 17437 17438 if (state->speculative && insn_aux->nospec_result) { 17439 /* If we are on a path that performed a jump-op, this 17440 * may skip a nospec patched-in after the jump. This can 17441 * currently never happen because nospec_result is only 17442 * used for the write-ops 17443 * `*(size*)(dst_reg+off)=src_reg|imm32` and helper 17444 * calls. These must never skip the following insn 17445 * (i.e., bpf_insn_successors()'s opcode_info.can_jump 17446 * is false). Still, add a warning to document this in 17447 * case nospec_result is used elsewhere in the future. 17448 * 17449 * All non-branch instructions have a single 17450 * fall-through edge. For these, nospec_result should 17451 * already work. 17452 */ 17453 if (verifier_bug_if((BPF_CLASS(insn->code) == BPF_JMP || 17454 BPF_CLASS(insn->code) == BPF_JMP32) && 17455 BPF_OP(insn->code) != BPF_CALL, env, 17456 "speculation barrier after jump instruction may not have the desired effect")) 17457 return -EFAULT; 17458 process_bpf_exit: 17459 mark_verifier_state_scratched(env); 17460 err = bpf_update_branch_counts(env, env->cur_state); 17461 if (err) 17462 return err; 17463 err = pop_stack(env, &prev_insn_idx, &env->insn_idx, 17464 pop_log); 17465 if (err < 0) { 17466 if (err != -ENOENT) 17467 return err; 17468 break; 17469 } else { 17470 do_print_state = true; 17471 continue; 17472 } 17473 } 17474 } 17475 17476 return 0; 17477 } 17478 17479 static int find_btf_percpu_datasec(struct btf *btf) 17480 { 17481 const struct btf_type *t; 17482 const char *tname; 17483 int i, n; 17484 17485 /* 17486 * Both vmlinux and module each have their own ".data..percpu" 17487 * DATASECs in BTF. So for module's case, we need to skip vmlinux BTF 17488 * types to look at only module's own BTF types. 17489 */ 17490 n = btf_nr_types(btf); 17491 for (i = btf_named_start_id(btf, true); i < n; i++) { 17492 t = btf_type_by_id(btf, i); 17493 if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC) 17494 continue; 17495 17496 tname = btf_name_by_offset(btf, t->name_off); 17497 if (!strcmp(tname, ".data..percpu")) 17498 return i; 17499 } 17500 17501 return -ENOENT; 17502 } 17503 17504 /* 17505 * Add btf to the env->used_btfs array. If needed, refcount the 17506 * corresponding kernel module. To simplify caller's logic 17507 * in case of error or if btf was added before the function 17508 * decreases the btf refcount. 17509 */ 17510 static int __add_used_btf(struct bpf_verifier_env *env, struct btf *btf) 17511 { 17512 struct btf_mod_pair *btf_mod; 17513 int ret = 0; 17514 int i; 17515 17516 /* check whether we recorded this BTF (and maybe module) already */ 17517 for (i = 0; i < env->used_btf_cnt; i++) 17518 if (env->used_btfs[i].btf == btf) 17519 goto ret_put; 17520 17521 if (env->used_btf_cnt >= MAX_USED_BTFS) { 17522 verbose(env, "The total number of btfs per program has reached the limit of %u\n", 17523 MAX_USED_BTFS); 17524 ret = -E2BIG; 17525 goto ret_put; 17526 } 17527 17528 btf_mod = &env->used_btfs[env->used_btf_cnt]; 17529 btf_mod->btf = btf; 17530 btf_mod->module = NULL; 17531 17532 /* if we reference variables from kernel module, bump its refcount */ 17533 if (btf_is_module(btf)) { 17534 btf_mod->module = btf_try_get_module(btf); 17535 if (!btf_mod->module) { 17536 ret = -ENXIO; 17537 goto ret_put; 17538 } 17539 } 17540 17541 env->used_btf_cnt++; 17542 return 0; 17543 17544 ret_put: 17545 /* Either error or this BTF was already added */ 17546 btf_put(btf); 17547 return ret; 17548 } 17549 17550 /* replace pseudo btf_id with kernel symbol address */ 17551 static int __check_pseudo_btf_id(struct bpf_verifier_env *env, 17552 struct bpf_insn *insn, 17553 struct bpf_insn_aux_data *aux, 17554 struct btf *btf) 17555 { 17556 const struct btf_var_secinfo *vsi; 17557 const struct btf_type *datasec; 17558 const struct btf_type *t; 17559 const char *sym_name; 17560 bool percpu = false; 17561 u32 type, id = insn->imm; 17562 s32 datasec_id; 17563 u64 addr; 17564 int i; 17565 17566 t = btf_type_by_id(btf, id); 17567 if (!t) { 17568 verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id); 17569 return -ENOENT; 17570 } 17571 17572 if (!btf_type_is_var(t) && !btf_type_is_func(t)) { 17573 verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR or KIND_FUNC\n", id); 17574 return -EINVAL; 17575 } 17576 17577 sym_name = btf_name_by_offset(btf, t->name_off); 17578 addr = kallsyms_lookup_name(sym_name); 17579 if (!addr) { 17580 verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n", 17581 sym_name); 17582 return -ENOENT; 17583 } 17584 insn[0].imm = (u32)addr; 17585 insn[1].imm = addr >> 32; 17586 17587 if (btf_type_is_func(t)) { 17588 aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY; 17589 aux->btf_var.mem_size = 0; 17590 return 0; 17591 } 17592 17593 datasec_id = find_btf_percpu_datasec(btf); 17594 if (datasec_id > 0) { 17595 datasec = btf_type_by_id(btf, datasec_id); 17596 for_each_vsi(i, datasec, vsi) { 17597 if (vsi->type == id) { 17598 percpu = true; 17599 break; 17600 } 17601 } 17602 } 17603 17604 type = t->type; 17605 t = btf_type_skip_modifiers(btf, type, NULL); 17606 if (percpu) { 17607 aux->btf_var.reg_type = PTR_TO_BTF_ID | MEM_PERCPU; 17608 aux->btf_var.btf = btf; 17609 aux->btf_var.btf_id = type; 17610 } else if (!btf_type_is_struct(t)) { 17611 const struct btf_type *ret; 17612 const char *tname; 17613 u32 tsize; 17614 17615 /* resolve the type size of ksym. */ 17616 ret = btf_resolve_size(btf, t, &tsize); 17617 if (IS_ERR(ret)) { 17618 tname = btf_name_by_offset(btf, t->name_off); 17619 verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n", 17620 tname, PTR_ERR(ret)); 17621 return -EINVAL; 17622 } 17623 aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY; 17624 aux->btf_var.mem_size = tsize; 17625 } else { 17626 aux->btf_var.reg_type = PTR_TO_BTF_ID; 17627 aux->btf_var.btf = btf; 17628 aux->btf_var.btf_id = type; 17629 } 17630 17631 return 0; 17632 } 17633 17634 static int check_pseudo_btf_id(struct bpf_verifier_env *env, 17635 struct bpf_insn *insn, 17636 struct bpf_insn_aux_data *aux) 17637 { 17638 struct btf *btf; 17639 int btf_fd; 17640 int err; 17641 17642 btf_fd = insn[1].imm; 17643 if (btf_fd) { 17644 btf = btf_get_by_fd(btf_fd); 17645 if (IS_ERR(btf)) { 17646 verbose(env, "invalid module BTF object FD specified.\n"); 17647 return -EINVAL; 17648 } 17649 } else { 17650 if (!btf_vmlinux) { 17651 verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n"); 17652 return -EINVAL; 17653 } 17654 btf_get(btf_vmlinux); 17655 btf = btf_vmlinux; 17656 } 17657 17658 err = __check_pseudo_btf_id(env, insn, aux, btf); 17659 if (err) { 17660 btf_put(btf); 17661 return err; 17662 } 17663 17664 return __add_used_btf(env, btf); 17665 } 17666 17667 static bool is_tracing_prog_type(enum bpf_prog_type type) 17668 { 17669 switch (type) { 17670 case BPF_PROG_TYPE_KPROBE: 17671 case BPF_PROG_TYPE_TRACEPOINT: 17672 case BPF_PROG_TYPE_PERF_EVENT: 17673 case BPF_PROG_TYPE_RAW_TRACEPOINT: 17674 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: 17675 return true; 17676 default: 17677 return false; 17678 } 17679 } 17680 17681 static bool bpf_map_is_cgroup_storage(struct bpf_map *map) 17682 { 17683 return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE || 17684 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE); 17685 } 17686 17687 static int check_map_prog_compatibility(struct bpf_verifier_env *env, 17688 struct bpf_map *map, 17689 struct bpf_prog *prog) 17690 17691 { 17692 enum bpf_prog_type prog_type = resolve_prog_type(prog); 17693 17694 if (map->excl_prog_sha && 17695 memcmp(map->excl_prog_sha, prog->digest, SHA256_DIGEST_SIZE)) { 17696 verbose(env, "program's hash doesn't match map's excl_prog_hash\n"); 17697 return -EACCES; 17698 } 17699 17700 if (btf_record_has_field(map->record, BPF_LIST_HEAD) || 17701 btf_record_has_field(map->record, BPF_RB_ROOT)) { 17702 if (is_tracing_prog_type(prog_type)) { 17703 verbose(env, "tracing progs cannot use bpf_{list_head,rb_root} yet\n"); 17704 return -EINVAL; 17705 } 17706 } 17707 17708 if (btf_record_has_field(map->record, BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK)) { 17709 if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) { 17710 verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n"); 17711 return -EINVAL; 17712 } 17713 17714 if (is_tracing_prog_type(prog_type)) { 17715 verbose(env, "tracing progs cannot use bpf_spin_lock yet\n"); 17716 return -EINVAL; 17717 } 17718 } 17719 17720 if ((bpf_prog_is_offloaded(prog->aux) || bpf_map_is_offloaded(map)) && 17721 !bpf_offload_prog_map_match(prog, map)) { 17722 verbose(env, "offload device mismatch between prog and map\n"); 17723 return -EINVAL; 17724 } 17725 17726 if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 17727 verbose(env, "bpf_struct_ops map cannot be used in prog\n"); 17728 return -EINVAL; 17729 } 17730 17731 if (prog->sleepable) 17732 switch (map->map_type) { 17733 case BPF_MAP_TYPE_HASH: 17734 case BPF_MAP_TYPE_RHASH: 17735 case BPF_MAP_TYPE_LRU_HASH: 17736 case BPF_MAP_TYPE_ARRAY: 17737 case BPF_MAP_TYPE_PERCPU_HASH: 17738 case BPF_MAP_TYPE_PERCPU_ARRAY: 17739 case BPF_MAP_TYPE_LRU_PERCPU_HASH: 17740 case BPF_MAP_TYPE_ARRAY_OF_MAPS: 17741 case BPF_MAP_TYPE_HASH_OF_MAPS: 17742 case BPF_MAP_TYPE_RINGBUF: 17743 case BPF_MAP_TYPE_USER_RINGBUF: 17744 case BPF_MAP_TYPE_INODE_STORAGE: 17745 case BPF_MAP_TYPE_SK_STORAGE: 17746 case BPF_MAP_TYPE_TASK_STORAGE: 17747 case BPF_MAP_TYPE_CGRP_STORAGE: 17748 case BPF_MAP_TYPE_QUEUE: 17749 case BPF_MAP_TYPE_STACK: 17750 case BPF_MAP_TYPE_ARENA: 17751 case BPF_MAP_TYPE_INSN_ARRAY: 17752 case BPF_MAP_TYPE_PROG_ARRAY: 17753 break; 17754 default: 17755 verbose(env, 17756 "Sleepable programs can only use array, hash, ringbuf and local storage maps\n"); 17757 return -EINVAL; 17758 } 17759 17760 if (bpf_map_is_cgroup_storage(map) && 17761 bpf_cgroup_storage_assign(env->prog->aux, map)) { 17762 verbose(env, "only one cgroup storage of each type is allowed\n"); 17763 return -EBUSY; 17764 } 17765 17766 if (map->map_type == BPF_MAP_TYPE_ARENA) { 17767 if (env->prog->aux->arena) { 17768 verbose(env, "Only one arena per program\n"); 17769 return -EBUSY; 17770 } 17771 if (!env->allow_ptr_leaks || !env->bpf_capable) { 17772 verbose(env, "CAP_BPF and CAP_PERFMON are required to use arena\n"); 17773 return -EPERM; 17774 } 17775 if (!env->prog->jit_requested) { 17776 verbose(env, "JIT is required to use arena\n"); 17777 return -EOPNOTSUPP; 17778 } 17779 if (!bpf_jit_supports_arena()) { 17780 verbose(env, "JIT doesn't support arena\n"); 17781 return -EOPNOTSUPP; 17782 } 17783 env->prog->aux->arena = (void *)map; 17784 if (!bpf_arena_get_user_vm_start(env->prog->aux->arena)) { 17785 verbose(env, "arena's user address must be set via map_extra or mmap()\n"); 17786 return -EINVAL; 17787 } 17788 } 17789 17790 return 0; 17791 } 17792 17793 static int __add_used_map(struct bpf_verifier_env *env, struct bpf_map *map) 17794 { 17795 int i, err; 17796 17797 /* check whether we recorded this map already */ 17798 for (i = 0; i < env->used_map_cnt; i++) 17799 if (env->used_maps[i] == map) 17800 return i; 17801 17802 if (env->used_map_cnt >= MAX_USED_MAPS) { 17803 verbose(env, "The total number of maps per program has reached the limit of %u\n", 17804 MAX_USED_MAPS); 17805 return -E2BIG; 17806 } 17807 17808 err = check_map_prog_compatibility(env, map, env->prog); 17809 if (err) 17810 return err; 17811 17812 if (env->prog->sleepable) 17813 atomic64_inc(&map->sleepable_refcnt); 17814 17815 /* hold the map. If the program is rejected by verifier, 17816 * the map will be released by release_maps() or it 17817 * will be used by the valid program until it's unloaded 17818 * and all maps are released in bpf_free_used_maps() 17819 */ 17820 bpf_map_inc(map); 17821 17822 env->used_maps[env->used_map_cnt++] = map; 17823 17824 if (map->map_type == BPF_MAP_TYPE_INSN_ARRAY) { 17825 err = bpf_insn_array_init(map, env->prog); 17826 if (err) { 17827 verbose(env, "Failed to properly initialize insn array\n"); 17828 return err; 17829 } 17830 env->insn_array_maps[env->insn_array_map_cnt++] = map; 17831 } 17832 17833 return env->used_map_cnt - 1; 17834 } 17835 17836 /* Add map behind fd to used maps list, if it's not already there, and return 17837 * its index. 17838 * Returns <0 on error, or >= 0 index, on success. 17839 */ 17840 static int add_used_map(struct bpf_verifier_env *env, int fd) 17841 { 17842 struct bpf_map *map; 17843 CLASS(fd, f)(fd); 17844 17845 map = __bpf_map_get(f); 17846 if (IS_ERR(map)) { 17847 verbose(env, "fd %d is not pointing to valid bpf_map\n", fd); 17848 return PTR_ERR(map); 17849 } 17850 17851 return __add_used_map(env, map); 17852 } 17853 17854 static int check_alu_fields(struct bpf_verifier_env *env, struct bpf_insn *insn) 17855 { 17856 u8 class = BPF_CLASS(insn->code); 17857 u8 opcode = BPF_OP(insn->code); 17858 17859 switch (opcode) { 17860 case BPF_NEG: 17861 if (BPF_SRC(insn->code) != BPF_K || insn->src_reg != BPF_REG_0 || 17862 insn->off != 0 || insn->imm != 0) { 17863 verbose(env, "BPF_NEG uses reserved fields\n"); 17864 return -EINVAL; 17865 } 17866 return 0; 17867 case BPF_END: 17868 if (insn->src_reg != BPF_REG_0 || insn->off != 0 || 17869 (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) || 17870 (class == BPF_ALU64 && BPF_SRC(insn->code) != BPF_TO_LE)) { 17871 verbose(env, "BPF_END uses reserved fields\n"); 17872 return -EINVAL; 17873 } 17874 return 0; 17875 case BPF_MOV: 17876 if (BPF_SRC(insn->code) == BPF_X) { 17877 if (class == BPF_ALU) { 17878 if ((insn->off != 0 && insn->off != 8 && insn->off != 16) || 17879 insn->imm) { 17880 verbose(env, "BPF_MOV uses reserved fields\n"); 17881 return -EINVAL; 17882 } 17883 } else if (insn->off == BPF_ADDR_SPACE_CAST) { 17884 if (insn->imm != 1 && insn->imm != 1u << 16) { 17885 verbose(env, "addr_space_cast insn can only convert between address space 1 and 0\n"); 17886 return -EINVAL; 17887 } 17888 } else if ((insn->off != 0 && insn->off != 8 && 17889 insn->off != 16 && insn->off != 32) || insn->imm) { 17890 verbose(env, "BPF_MOV uses reserved fields\n"); 17891 return -EINVAL; 17892 } 17893 } else if (insn->src_reg != BPF_REG_0 || insn->off != 0) { 17894 verbose(env, "BPF_MOV uses reserved fields\n"); 17895 return -EINVAL; 17896 } 17897 return 0; 17898 case BPF_ADD: 17899 case BPF_SUB: 17900 case BPF_AND: 17901 case BPF_OR: 17902 case BPF_XOR: 17903 case BPF_LSH: 17904 case BPF_RSH: 17905 case BPF_ARSH: 17906 case BPF_MUL: 17907 case BPF_DIV: 17908 case BPF_MOD: 17909 if (BPF_SRC(insn->code) == BPF_X) { 17910 if (insn->imm != 0 || (insn->off != 0 && insn->off != 1) || 17911 (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) { 17912 verbose(env, "BPF_ALU uses reserved fields\n"); 17913 return -EINVAL; 17914 } 17915 } else if (insn->src_reg != BPF_REG_0 || 17916 (insn->off != 0 && insn->off != 1) || 17917 (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) { 17918 verbose(env, "BPF_ALU uses reserved fields\n"); 17919 return -EINVAL; 17920 } 17921 return 0; 17922 default: 17923 verbose(env, "invalid BPF_ALU opcode %x\n", opcode); 17924 return -EINVAL; 17925 } 17926 } 17927 17928 static int check_jmp_fields(struct bpf_verifier_env *env, struct bpf_insn *insn) 17929 { 17930 u8 class = BPF_CLASS(insn->code); 17931 u8 opcode = BPF_OP(insn->code); 17932 17933 switch (opcode) { 17934 case BPF_CALL: 17935 if (BPF_SRC(insn->code) != BPF_K || 17936 (insn->src_reg != BPF_PSEUDO_KFUNC_CALL && insn->off != 0) || 17937 (insn->src_reg != BPF_REG_0 && insn->src_reg != BPF_PSEUDO_CALL && 17938 insn->src_reg != BPF_PSEUDO_KFUNC_CALL) || 17939 insn->dst_reg != BPF_REG_0 || class == BPF_JMP32) { 17940 verbose(env, "BPF_CALL uses reserved fields\n"); 17941 return -EINVAL; 17942 } 17943 return 0; 17944 case BPF_JA: 17945 if (BPF_SRC(insn->code) == BPF_X) { 17946 if (insn->src_reg != BPF_REG_0 || insn->imm != 0 || insn->off != 0) { 17947 verbose(env, "BPF_JA|BPF_X uses reserved fields\n"); 17948 return -EINVAL; 17949 } 17950 } else if (insn->src_reg != BPF_REG_0 || insn->dst_reg != BPF_REG_0 || 17951 (class == BPF_JMP && insn->imm != 0) || 17952 (class == BPF_JMP32 && insn->off != 0)) { 17953 verbose(env, "BPF_JA uses reserved fields\n"); 17954 return -EINVAL; 17955 } 17956 return 0; 17957 case BPF_EXIT: 17958 if (BPF_SRC(insn->code) != BPF_K || insn->imm != 0 || 17959 insn->src_reg != BPF_REG_0 || insn->dst_reg != BPF_REG_0 || 17960 class == BPF_JMP32) { 17961 verbose(env, "BPF_EXIT uses reserved fields\n"); 17962 return -EINVAL; 17963 } 17964 return 0; 17965 case BPF_JCOND: 17966 if (insn->code != (BPF_JMP | BPF_JCOND) || insn->src_reg != BPF_MAY_GOTO || 17967 insn->dst_reg || insn->imm) { 17968 verbose(env, "invalid may_goto imm %d\n", insn->imm); 17969 return -EINVAL; 17970 } 17971 return 0; 17972 default: 17973 if (BPF_SRC(insn->code) == BPF_X) { 17974 if (insn->imm != 0) { 17975 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); 17976 return -EINVAL; 17977 } 17978 } else if (insn->src_reg != BPF_REG_0) { 17979 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); 17980 return -EINVAL; 17981 } 17982 return 0; 17983 } 17984 } 17985 17986 static int check_insn_fields(struct bpf_verifier_env *env, struct bpf_insn *insn) 17987 { 17988 switch (BPF_CLASS(insn->code)) { 17989 case BPF_ALU: 17990 case BPF_ALU64: 17991 return check_alu_fields(env, insn); 17992 case BPF_LDX: 17993 if ((BPF_MODE(insn->code) != BPF_MEM && BPF_MODE(insn->code) != BPF_MEMSX) || 17994 insn->imm != 0) { 17995 verbose(env, "BPF_LDX uses reserved fields\n"); 17996 return -EINVAL; 17997 } 17998 return 0; 17999 case BPF_STX: 18000 if (BPF_MODE(insn->code) == BPF_ATOMIC) 18001 return 0; 18002 if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) { 18003 verbose(env, "BPF_STX uses reserved fields\n"); 18004 return -EINVAL; 18005 } 18006 return 0; 18007 case BPF_ST: 18008 if (BPF_MODE(insn->code) != BPF_MEM || insn->src_reg != BPF_REG_0) { 18009 verbose(env, "BPF_ST uses reserved fields\n"); 18010 return -EINVAL; 18011 } 18012 return 0; 18013 case BPF_JMP: 18014 case BPF_JMP32: 18015 return check_jmp_fields(env, insn); 18016 case BPF_LD: { 18017 u8 mode = BPF_MODE(insn->code); 18018 18019 if (mode == BPF_ABS || mode == BPF_IND) { 18020 if (insn->dst_reg != BPF_REG_0 || insn->off != 0 || 18021 BPF_SIZE(insn->code) == BPF_DW || 18022 (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) { 18023 verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n"); 18024 return -EINVAL; 18025 } 18026 } else if (mode != BPF_IMM) { 18027 verbose(env, "invalid BPF_LD mode\n"); 18028 return -EINVAL; 18029 } 18030 return 0; 18031 } 18032 default: 18033 verbose(env, "unknown insn class %d\n", BPF_CLASS(insn->code)); 18034 return -EINVAL; 18035 } 18036 } 18037 18038 /* 18039 * Check that insns are sane and rewrite pseudo imm in ld_imm64 instructions: 18040 * 18041 * 1. if it accesses map FD, replace it with actual map pointer. 18042 * 2. if it accesses btf_id of a VAR, replace it with pointer to the var. 18043 * 18044 * NOTE: btf_vmlinux is required for converting pseudo btf_id. 18045 */ 18046 static int check_and_resolve_insns(struct bpf_verifier_env *env) 18047 { 18048 struct bpf_insn *insn = env->prog->insnsi; 18049 int insn_cnt = env->prog->len; 18050 int i, err; 18051 18052 err = bpf_prog_calc_tag(env->prog); 18053 if (err) 18054 return err; 18055 18056 for (i = 0; i < insn_cnt; i++, insn++) { 18057 if (insn->dst_reg >= MAX_BPF_REG && 18058 !is_stack_arg_st(insn) && !is_stack_arg_stx(insn)) { 18059 verbose(env, "R%d is invalid\n", insn->dst_reg); 18060 return -EINVAL; 18061 } 18062 if (insn->src_reg >= MAX_BPF_REG && !is_stack_arg_ldx(insn)) { 18063 verbose(env, "R%d is invalid\n", insn->src_reg); 18064 return -EINVAL; 18065 } 18066 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) { 18067 struct bpf_insn_aux_data *aux; 18068 struct bpf_map *map; 18069 int map_idx; 18070 u64 addr; 18071 u32 fd; 18072 18073 if (i == insn_cnt - 1 || insn[1].code != 0 || 18074 insn[1].dst_reg != 0 || insn[1].src_reg != 0 || 18075 insn[1].off != 0) { 18076 verbose(env, "invalid bpf_ld_imm64 insn\n"); 18077 return -EINVAL; 18078 } 18079 18080 if (insn[0].off != 0) { 18081 verbose(env, "BPF_LD_IMM64 uses reserved fields\n"); 18082 return -EINVAL; 18083 } 18084 18085 if (insn[0].src_reg == 0) 18086 /* valid generic load 64-bit imm */ 18087 goto next_insn; 18088 18089 if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) { 18090 aux = &env->insn_aux_data[i]; 18091 err = check_pseudo_btf_id(env, insn, aux); 18092 if (err) 18093 return err; 18094 goto next_insn; 18095 } 18096 18097 if (insn[0].src_reg == BPF_PSEUDO_FUNC) { 18098 aux = &env->insn_aux_data[i]; 18099 aux->ptr_type = PTR_TO_FUNC; 18100 goto next_insn; 18101 } 18102 18103 /* In final convert_pseudo_ld_imm64() step, this is 18104 * converted into regular 64-bit imm load insn. 18105 */ 18106 switch (insn[0].src_reg) { 18107 case BPF_PSEUDO_MAP_VALUE: 18108 case BPF_PSEUDO_MAP_IDX_VALUE: 18109 break; 18110 case BPF_PSEUDO_MAP_FD: 18111 case BPF_PSEUDO_MAP_IDX: 18112 if (insn[1].imm == 0) 18113 break; 18114 fallthrough; 18115 default: 18116 verbose(env, "unrecognized bpf_ld_imm64 insn\n"); 18117 return -EINVAL; 18118 } 18119 18120 switch (insn[0].src_reg) { 18121 case BPF_PSEUDO_MAP_IDX_VALUE: 18122 case BPF_PSEUDO_MAP_IDX: 18123 if (bpfptr_is_null(env->fd_array)) { 18124 verbose(env, "fd_idx without fd_array is invalid\n"); 18125 return -EPROTO; 18126 } 18127 if (copy_from_bpfptr_offset(&fd, env->fd_array, 18128 insn[0].imm * sizeof(fd), 18129 sizeof(fd))) 18130 return -EFAULT; 18131 break; 18132 default: 18133 fd = insn[0].imm; 18134 break; 18135 } 18136 18137 map_idx = add_used_map(env, fd); 18138 if (map_idx < 0) 18139 return map_idx; 18140 map = env->used_maps[map_idx]; 18141 18142 aux = &env->insn_aux_data[i]; 18143 aux->map_index = map_idx; 18144 18145 if (insn[0].src_reg == BPF_PSEUDO_MAP_FD || 18146 insn[0].src_reg == BPF_PSEUDO_MAP_IDX) { 18147 addr = (unsigned long)map; 18148 } else { 18149 u32 off = insn[1].imm; 18150 18151 if (!map->ops->map_direct_value_addr) { 18152 verbose(env, "no direct value access support for this map type\n"); 18153 return -EINVAL; 18154 } 18155 18156 err = map->ops->map_direct_value_addr(map, &addr, off); 18157 if (err) { 18158 verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n", 18159 map->value_size, off); 18160 return err; 18161 } 18162 18163 aux->map_off = off; 18164 addr += off; 18165 } 18166 18167 insn[0].imm = (u32)addr; 18168 insn[1].imm = addr >> 32; 18169 18170 next_insn: 18171 insn++; 18172 i++; 18173 continue; 18174 } 18175 18176 /* Basic sanity check before we invest more work here. */ 18177 if (!bpf_opcode_in_insntable(insn->code)) { 18178 verbose(env, "unknown opcode %02x\n", insn->code); 18179 return -EINVAL; 18180 } 18181 18182 err = check_insn_fields(env, insn); 18183 if (err) 18184 return err; 18185 } 18186 18187 /* now all pseudo BPF_LD_IMM64 instructions load valid 18188 * 'struct bpf_map *' into a register instead of user map_fd. 18189 * These pointers will be used later by verifier to validate map access. 18190 */ 18191 return 0; 18192 } 18193 18194 /* drop refcnt of maps used by the rejected program */ 18195 static void release_maps(struct bpf_verifier_env *env) 18196 { 18197 __bpf_free_used_maps(env->prog->aux, env->used_maps, 18198 env->used_map_cnt); 18199 } 18200 18201 /* drop refcnt of maps used by the rejected program */ 18202 static void release_btfs(struct bpf_verifier_env *env) 18203 { 18204 __bpf_free_used_btfs(env->used_btfs, env->used_btf_cnt); 18205 } 18206 18207 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */ 18208 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env) 18209 { 18210 struct bpf_insn *insn = env->prog->insnsi; 18211 int insn_cnt = env->prog->len; 18212 int i; 18213 18214 for (i = 0; i < insn_cnt; i++, insn++) { 18215 if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) 18216 continue; 18217 if (insn->src_reg == BPF_PSEUDO_FUNC) 18218 continue; 18219 insn->src_reg = 0; 18220 } 18221 } 18222 18223 static void release_insn_arrays(struct bpf_verifier_env *env) 18224 { 18225 int i; 18226 18227 for (i = 0; i < env->insn_array_map_cnt; i++) 18228 bpf_insn_array_release(env->insn_array_maps[i]); 18229 } 18230 18231 18232 18233 /* The verifier does more data flow analysis than llvm and will not 18234 * explore branches that are dead at run time. Malicious programs can 18235 * have dead code too. Therefore replace all dead at-run-time code 18236 * with 'ja -1'. 18237 * 18238 * Just nops are not optimal, e.g. if they would sit at the end of the 18239 * program and through another bug we would manage to jump there, then 18240 * we'd execute beyond program memory otherwise. Returning exception 18241 * code also wouldn't work since we can have subprogs where the dead 18242 * code could be located. 18243 */ 18244 static void sanitize_dead_code(struct bpf_verifier_env *env) 18245 { 18246 struct bpf_insn_aux_data *aux_data = env->insn_aux_data; 18247 struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1); 18248 struct bpf_insn *insn = env->prog->insnsi; 18249 const int insn_cnt = env->prog->len; 18250 int i; 18251 18252 for (i = 0; i < insn_cnt; i++) { 18253 if (aux_data[i].seen) 18254 continue; 18255 memcpy(insn + i, &trap, sizeof(trap)); 18256 aux_data[i].zext_dst = false; 18257 } 18258 } 18259 18260 18261 18262 static void free_states(struct bpf_verifier_env *env) 18263 { 18264 struct bpf_verifier_state_list *sl; 18265 struct list_head *head, *pos, *tmp; 18266 struct bpf_scc_info *info; 18267 int i, j; 18268 18269 bpf_free_verifier_state(env->cur_state, true); 18270 env->cur_state = NULL; 18271 while (!pop_stack(env, NULL, NULL, false)); 18272 18273 list_for_each_safe(pos, tmp, &env->free_list) { 18274 sl = container_of(pos, struct bpf_verifier_state_list, node); 18275 bpf_free_verifier_state(&sl->state, false); 18276 kfree(sl); 18277 } 18278 INIT_LIST_HEAD(&env->free_list); 18279 18280 for (i = 0; i < env->scc_cnt; ++i) { 18281 info = env->scc_info[i]; 18282 if (!info) 18283 continue; 18284 for (j = 0; j < info->num_visits; j++) 18285 bpf_free_backedges(&info->visits[j]); 18286 kvfree(info); 18287 env->scc_info[i] = NULL; 18288 } 18289 18290 if (!env->explored_states) 18291 return; 18292 18293 for (i = 0; i < state_htab_size(env); i++) { 18294 head = &env->explored_states[i]; 18295 18296 list_for_each_safe(pos, tmp, head) { 18297 sl = container_of(pos, struct bpf_verifier_state_list, node); 18298 bpf_free_verifier_state(&sl->state, false); 18299 kfree(sl); 18300 } 18301 INIT_LIST_HEAD(&env->explored_states[i]); 18302 } 18303 } 18304 18305 static int do_check_common(struct bpf_verifier_env *env, int subprog) 18306 { 18307 bool pop_log = !(env->log.level & BPF_LOG_LEVEL2); 18308 struct bpf_subprog_info *sub = subprog_info(env, subprog); 18309 struct bpf_prog_aux *aux = env->prog->aux; 18310 struct bpf_verifier_state *state; 18311 struct bpf_reg_state *regs; 18312 int ret, i; 18313 18314 env->prev_linfo = NULL; 18315 env->pass_cnt++; 18316 18317 state = kzalloc_obj(struct bpf_verifier_state, GFP_KERNEL_ACCOUNT); 18318 if (!state) 18319 return -ENOMEM; 18320 state->curframe = 0; 18321 state->speculative = false; 18322 state->branches = 1; 18323 state->in_sleepable = env->prog->sleepable; 18324 state->frame[0] = kzalloc_obj(struct bpf_func_state, GFP_KERNEL_ACCOUNT); 18325 if (!state->frame[0]) { 18326 kfree(state); 18327 return -ENOMEM; 18328 } 18329 env->cur_state = state; 18330 init_func_state(env, state->frame[0], 18331 BPF_MAIN_FUNC /* callsite */, 18332 0 /* frameno */, 18333 subprog); 18334 state->first_insn_idx = env->subprog_info[subprog].start; 18335 state->last_insn_idx = -1; 18336 18337 regs = state->frame[state->curframe]->regs; 18338 if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) { 18339 const char *sub_name = subprog_name(env, subprog); 18340 struct bpf_subprog_arg_info *arg; 18341 struct bpf_reg_state *reg; 18342 18343 if (env->log.level & BPF_LOG_LEVEL) 18344 verbose(env, "Validating %s() func#%d...\n", sub_name, subprog); 18345 ret = btf_prepare_func_args(env, subprog); 18346 if (ret) 18347 goto out; 18348 18349 if (subprog_is_exc_cb(env, subprog)) { 18350 state->frame[0]->in_exception_callback_fn = true; 18351 18352 /* 18353 * Global functions are scalar or void, make sure 18354 * we return a scalar. 18355 */ 18356 if (subprog_returns_void(env, subprog)) { 18357 verbose(env, "exception cb cannot return void\n"); 18358 ret = -EINVAL; 18359 goto out; 18360 } 18361 18362 /* Also ensure the callback only has a single scalar argument. */ 18363 if (sub->arg_cnt != 1 || sub->args[0].arg_type != ARG_ANYTHING) { 18364 verbose(env, "exception cb only supports single integer argument\n"); 18365 ret = -EINVAL; 18366 goto out; 18367 } 18368 } 18369 for (i = BPF_REG_1; i <= min_t(u32, sub->arg_cnt, MAX_BPF_FUNC_REG_ARGS); i++) { 18370 arg = &sub->args[i - BPF_REG_1]; 18371 reg = ®s[i]; 18372 18373 if (arg->arg_type == ARG_PTR_TO_CTX) { 18374 reg->type = PTR_TO_CTX; 18375 mark_reg_known_zero(env, regs, i); 18376 } else if (arg->arg_type == ARG_ANYTHING) { 18377 reg->type = SCALAR_VALUE; 18378 mark_reg_unknown(env, regs, i); 18379 } else if (arg->arg_type == ARG_PTR_TO_DYNPTR) { 18380 /* assume unspecial LOCAL dynptr type */ 18381 __mark_dynptr_reg(reg, BPF_DYNPTR_TYPE_LOCAL, true, ++env->id_gen, 0); 18382 } else if (base_type(arg->arg_type) == ARG_PTR_TO_MEM) { 18383 reg->type = PTR_TO_MEM; 18384 reg->type |= arg->arg_type & 18385 (PTR_MAYBE_NULL | PTR_UNTRUSTED | MEM_RDONLY); 18386 mark_reg_known_zero(env, regs, i); 18387 reg->mem_size = arg->mem_size; 18388 if (arg->arg_type & PTR_MAYBE_NULL) 18389 reg->id = ++env->id_gen; 18390 } else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) { 18391 reg->type = PTR_TO_BTF_ID; 18392 if (arg->arg_type & PTR_MAYBE_NULL) 18393 reg->type |= PTR_MAYBE_NULL; 18394 if (arg->arg_type & PTR_UNTRUSTED) 18395 reg->type |= PTR_UNTRUSTED; 18396 if (arg->arg_type & PTR_TRUSTED) 18397 reg->type |= PTR_TRUSTED; 18398 mark_reg_known_zero(env, regs, i); 18399 reg->btf = bpf_get_btf_vmlinux(); /* can't fail at this point */ 18400 reg->btf_id = arg->btf_id; 18401 reg->id = ++env->id_gen; 18402 } else if (base_type(arg->arg_type) == ARG_PTR_TO_ARENA) { 18403 /* caller can pass either PTR_TO_ARENA or SCALAR */ 18404 mark_reg_unknown(env, regs, i); 18405 } else { 18406 verifier_bug(env, "unhandled arg#%d type %d", 18407 i - BPF_REG_1 + 1, arg->arg_type); 18408 ret = -EFAULT; 18409 goto out; 18410 } 18411 } 18412 if (env->prog->type == BPF_PROG_TYPE_EXT && sub->arg_cnt > MAX_BPF_FUNC_REG_ARGS) { 18413 verbose(env, "freplace programs with >%d args not supported yet\n", 18414 MAX_BPF_FUNC_REG_ARGS); 18415 ret = -EINVAL; 18416 goto out; 18417 } 18418 } else { 18419 /* if main BPF program has associated BTF info, validate that 18420 * it's matching expected signature, and otherwise mark BTF 18421 * info for main program as unreliable 18422 */ 18423 if (env->prog->aux->func_info_aux) { 18424 ret = btf_prepare_func_args(env, 0); 18425 if (ret || sub->arg_cnt != 1 || sub->args[0].arg_type != ARG_PTR_TO_CTX) { 18426 env->prog->aux->func_info_aux[0].unreliable = true; 18427 sub->arg_cnt = 1; 18428 sub->stack_arg_cnt = 0; 18429 } 18430 } 18431 18432 /* 1st arg to a function */ 18433 regs[BPF_REG_1].type = PTR_TO_CTX; 18434 mark_reg_known_zero(env, regs, BPF_REG_1); 18435 } 18436 18437 /* Acquire references for struct_ops program arguments tagged with "__ref" */ 18438 if (!subprog && env->prog->type == BPF_PROG_TYPE_STRUCT_OPS) { 18439 for (i = 0; i < aux->ctx_arg_info_size; i++) { 18440 ret = aux->ctx_arg_info[i].refcounted ? acquire_reference(env, 0, 0) : 0; 18441 if (ret < 0) 18442 goto out; 18443 18444 aux->ctx_arg_info[i].ref_id = ret; 18445 } 18446 } 18447 18448 ret = do_check(env); 18449 out: 18450 if (!ret && pop_log) 18451 bpf_vlog_reset(&env->log, 0); 18452 free_states(env); 18453 return ret; 18454 } 18455 18456 /* Lazily verify all global functions based on their BTF, if they are called 18457 * from main BPF program or any of subprograms transitively. 18458 * BPF global subprogs called from dead code are not validated. 18459 * All callable global functions must pass verification. 18460 * Otherwise the whole program is rejected. 18461 * Consider: 18462 * int bar(int); 18463 * int foo(int f) 18464 * { 18465 * return bar(f); 18466 * } 18467 * int bar(int b) 18468 * { 18469 * ... 18470 * } 18471 * foo() will be verified first for R1=any_scalar_value. During verification it 18472 * will be assumed that bar() already verified successfully and call to bar() 18473 * from foo() will be checked for type match only. Later bar() will be verified 18474 * independently to check that it's safe for R1=any_scalar_value. 18475 */ 18476 static int do_check_subprogs(struct bpf_verifier_env *env) 18477 { 18478 struct bpf_prog_aux *aux = env->prog->aux; 18479 struct bpf_func_info_aux *sub_aux; 18480 int i, ret, new_cnt; 18481 u32 insn_processed; 18482 18483 if (!aux->func_info) 18484 return 0; 18485 18486 /* exception callback is presumed to be always called */ 18487 if (env->exception_callback_subprog) 18488 subprog_aux(env, env->exception_callback_subprog)->called = true; 18489 18490 again: 18491 new_cnt = 0; 18492 for (i = 1; i < env->subprog_cnt; i++) { 18493 if (!bpf_subprog_is_global(env, i)) 18494 continue; 18495 18496 insn_processed = env->insn_processed; 18497 18498 sub_aux = subprog_aux(env, i); 18499 if (!sub_aux->called || sub_aux->verified) 18500 continue; 18501 18502 env->insn_idx = env->subprog_info[i].start; 18503 WARN_ON_ONCE(env->insn_idx == 0); 18504 ret = do_check_common(env, i); 18505 env->subprog_info[i].insn_processed = env->insn_processed - insn_processed; 18506 if (ret) { 18507 return ret; 18508 } else if (env->log.level & BPF_LOG_LEVEL) { 18509 verbose(env, "Func#%d ('%s') is safe for any args that match its prototype\n", 18510 i, subprog_name(env, i)); 18511 } 18512 18513 /* We verified new global subprog, it might have called some 18514 * more global subprogs that we haven't verified yet, so we 18515 * need to do another pass over subprogs to verify those. 18516 */ 18517 sub_aux->verified = true; 18518 new_cnt++; 18519 } 18520 18521 /* We can't loop forever as we verify at least one global subprog on 18522 * each pass. 18523 */ 18524 if (new_cnt) 18525 goto again; 18526 18527 return 0; 18528 } 18529 18530 static int do_check_main(struct bpf_verifier_env *env) 18531 { 18532 u32 insn_processed = env->insn_processed; 18533 int ret; 18534 18535 env->insn_idx = 0; 18536 ret = do_check_common(env, 0); 18537 env->subprog_info[0].insn_processed = env->insn_processed - insn_processed; 18538 if (!ret) 18539 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth; 18540 return ret; 18541 } 18542 18543 18544 static void print_verification_stats(struct bpf_verifier_env *env) 18545 { 18546 /* Skip over hidden subprogs which are not verified. */ 18547 int i, subprog_cnt = env->subprog_cnt - env->hidden_subprog_cnt; 18548 18549 if (env->log.level & BPF_LOG_STATS) { 18550 verbose(env, "verification time %lld usec\n", 18551 div_u64(env->verification_time, 1000)); 18552 verbose(env, "stack depth %d", env->subprog_info[0].stack_depth); 18553 for (i = 1; i < subprog_cnt; i++) 18554 verbose(env, "+%d", env->subprog_info[i].stack_depth); 18555 verbose(env, " max %d\n", env->max_stack_depth); 18556 verbose(env, "insns processed %d", env->subprog_info[0].insn_processed); 18557 for (i = 1; i < subprog_cnt; i++) 18558 if (bpf_subprog_is_global(env, i)) 18559 verbose(env, "+%d", env->subprog_info[i].insn_processed); 18560 verbose(env, "\n"); 18561 } 18562 verbose(env, "processed %d insns (limit %d) max_states_per_insn %d " 18563 "total_states %d peak_states %d mark_read %d\n", 18564 env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS, 18565 env->max_states_per_insn, env->total_states, 18566 env->peak_states, env->longest_mark_read_walk); 18567 } 18568 18569 int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog, 18570 const struct bpf_ctx_arg_aux *info, u32 cnt) 18571 { 18572 prog->aux->ctx_arg_info = kmemdup_array(info, cnt, sizeof(*info), GFP_KERNEL_ACCOUNT); 18573 prog->aux->ctx_arg_info_size = cnt; 18574 18575 return prog->aux->ctx_arg_info ? 0 : -ENOMEM; 18576 } 18577 18578 static int check_struct_ops_btf_id(struct bpf_verifier_env *env) 18579 { 18580 const struct btf_type *t, *func_proto; 18581 const struct bpf_struct_ops_desc *st_ops_desc; 18582 const struct bpf_struct_ops *st_ops; 18583 const struct btf_member *member; 18584 struct bpf_prog *prog = env->prog; 18585 bool has_refcounted_arg = false; 18586 u32 btf_id, member_idx, member_off; 18587 struct btf *btf; 18588 const char *mname; 18589 int i, err; 18590 18591 if (!prog->gpl_compatible) { 18592 verbose(env, "struct ops programs must have a GPL compatible license\n"); 18593 return -EINVAL; 18594 } 18595 18596 if (!prog->aux->attach_btf_id) 18597 return -ENOTSUPP; 18598 18599 btf = prog->aux->attach_btf; 18600 if (btf_is_module(btf)) { 18601 /* Make sure st_ops is valid through the lifetime of env */ 18602 env->attach_btf_mod = btf_try_get_module(btf); 18603 if (!env->attach_btf_mod) { 18604 verbose(env, "struct_ops module %s is not found\n", 18605 btf_get_name(btf)); 18606 return -ENOTSUPP; 18607 } 18608 } 18609 18610 btf_id = prog->aux->attach_btf_id; 18611 st_ops_desc = bpf_struct_ops_find(btf, btf_id); 18612 if (!st_ops_desc) { 18613 verbose(env, "attach_btf_id %u is not a supported struct\n", 18614 btf_id); 18615 return -ENOTSUPP; 18616 } 18617 st_ops = st_ops_desc->st_ops; 18618 18619 t = st_ops_desc->type; 18620 member_idx = prog->expected_attach_type; 18621 if (member_idx >= btf_type_vlen(t)) { 18622 verbose(env, "attach to invalid member idx %u of struct %s\n", 18623 member_idx, st_ops->name); 18624 return -EINVAL; 18625 } 18626 18627 member = &btf_type_member(t)[member_idx]; 18628 mname = btf_name_by_offset(btf, member->name_off); 18629 func_proto = btf_type_resolve_func_ptr(btf, member->type, 18630 NULL); 18631 if (!func_proto) { 18632 verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n", 18633 mname, member_idx, st_ops->name); 18634 return -EINVAL; 18635 } 18636 18637 member_off = __btf_member_bit_offset(t, member) / 8; 18638 err = bpf_struct_ops_supported(st_ops, member_off); 18639 if (err) { 18640 verbose(env, "attach to unsupported member %s of struct %s\n", 18641 mname, st_ops->name); 18642 return err; 18643 } 18644 18645 if (st_ops->check_member) { 18646 err = st_ops->check_member(t, member, prog); 18647 18648 if (err) { 18649 verbose(env, "attach to unsupported member %s of struct %s\n", 18650 mname, st_ops->name); 18651 return err; 18652 } 18653 } 18654 18655 if (prog->aux->priv_stack_requested && !bpf_jit_supports_private_stack()) { 18656 verbose(env, "Private stack not supported by jit\n"); 18657 return -EACCES; 18658 } 18659 18660 for (i = 0; i < st_ops_desc->arg_info[member_idx].cnt; i++) { 18661 if (st_ops_desc->arg_info[member_idx].info[i].refcounted) { 18662 has_refcounted_arg = true; 18663 break; 18664 } 18665 } 18666 18667 /* Tail call is not allowed for programs with refcounted arguments since we 18668 * cannot guarantee that valid refcounted kptrs will be passed to the callee. 18669 */ 18670 for (i = 0; i < env->subprog_cnt; i++) { 18671 if (has_refcounted_arg && env->subprog_info[i].has_tail_call) { 18672 verbose(env, "program with __ref argument cannot tail call\n"); 18673 return -EINVAL; 18674 } 18675 } 18676 18677 prog->aux->st_ops = st_ops; 18678 prog->aux->attach_st_ops_member_off = member_off; 18679 18680 prog->aux->attach_func_proto = func_proto; 18681 prog->aux->attach_func_name = mname; 18682 env->ops = st_ops->verifier_ops; 18683 18684 return bpf_prog_ctx_arg_info_init(prog, st_ops_desc->arg_info[member_idx].info, 18685 st_ops_desc->arg_info[member_idx].cnt); 18686 } 18687 #define SECURITY_PREFIX "security_" 18688 18689 #ifdef CONFIG_FUNCTION_ERROR_INJECTION 18690 18691 /* list of non-sleepable functions that are otherwise on 18692 * ALLOW_ERROR_INJECTION list 18693 */ 18694 BTF_SET_START(btf_non_sleepable_error_inject) 18695 /* Three functions below can be called from sleepable and non-sleepable context. 18696 * Assume non-sleepable from bpf safety point of view. 18697 */ 18698 BTF_ID(func, __filemap_add_folio) 18699 #ifdef CONFIG_FAIL_PAGE_ALLOC 18700 BTF_ID(func, should_fail_alloc_page) 18701 #endif 18702 #ifdef CONFIG_FAILSLAB 18703 BTF_ID(func, should_failslab) 18704 #endif 18705 BTF_SET_END(btf_non_sleepable_error_inject) 18706 18707 static int check_non_sleepable_error_inject(u32 btf_id) 18708 { 18709 return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id); 18710 } 18711 18712 static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *func_name) 18713 { 18714 /* fentry/fexit/fmod_ret progs can be sleepable if they are 18715 * attached to ALLOW_ERROR_INJECTION and are not in denylist. 18716 */ 18717 if (!check_non_sleepable_error_inject(btf_id) && 18718 within_error_injection_list(addr)) 18719 return 0; 18720 18721 return -EINVAL; 18722 } 18723 18724 static int check_attach_modify_return(unsigned long addr, const char *func_name) 18725 { 18726 if (within_error_injection_list(addr) || 18727 !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1)) 18728 return 0; 18729 18730 return -EINVAL; 18731 } 18732 18733 #else 18734 18735 /* Unfortunately, the arch-specific prefixes are hard-coded in arch syscall code 18736 * so we need to hard-code them, too. Ftrace has arch_syscall_match_sym_name() 18737 * but that just compares two concrete function names. 18738 */ 18739 static bool has_arch_syscall_prefix(const char *func_name) 18740 { 18741 #if defined(__x86_64__) 18742 return !strncmp(func_name, "__x64_", 6); 18743 #elif defined(__i386__) 18744 return !strncmp(func_name, "__ia32_", 7); 18745 #elif defined(__s390x__) 18746 return !strncmp(func_name, "__s390x_", 8); 18747 #elif defined(__aarch64__) 18748 return !strncmp(func_name, "__arm64_", 8); 18749 #elif defined(__riscv) 18750 return !strncmp(func_name, "__riscv_", 8); 18751 #elif defined(__powerpc__) || defined(__powerpc64__) 18752 return !strncmp(func_name, "sys_", 4); 18753 #elif defined(__loongarch__) 18754 return !strncmp(func_name, "sys_", 4); 18755 #else 18756 return false; 18757 #endif 18758 } 18759 18760 /* Without error injection, allow sleepable and fmod_ret progs on syscalls. */ 18761 18762 static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *func_name) 18763 { 18764 if (has_arch_syscall_prefix(func_name)) 18765 return 0; 18766 18767 return -EINVAL; 18768 } 18769 18770 static int check_attach_modify_return(unsigned long addr, const char *func_name) 18771 { 18772 if (has_arch_syscall_prefix(func_name) || 18773 !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1)) 18774 return 0; 18775 18776 return -EINVAL; 18777 } 18778 18779 #endif /* CONFIG_FUNCTION_ERROR_INJECTION */ 18780 18781 static bool is_tracing_multi_id(const struct bpf_prog *prog, u32 btf_id) 18782 { 18783 return is_tracing_multi(prog->expected_attach_type) && bpf_multi_func_btf_id[0] == btf_id; 18784 } 18785 18786 static int btf_id_allow_sleepable(u32 btf_id, unsigned long addr, const struct bpf_prog *prog, 18787 const struct btf *btf) 18788 { 18789 const struct btf_type *t; 18790 const char *tname; 18791 18792 switch (prog->type) { 18793 case BPF_PROG_TYPE_TRACING: 18794 t = btf_type_by_id(btf, btf_id); 18795 if (!t) 18796 return -EINVAL; 18797 tname = btf_name_by_offset(btf, t->name_off); 18798 if (!tname) 18799 return -EINVAL; 18800 18801 /* 18802 * *.multi sleepable programs will pass initial sleepable check, 18803 * the actual attached btf ids are checked later during the link 18804 * attachment. 18805 */ 18806 if (is_tracing_multi_id(prog, btf_id)) 18807 return 0; 18808 if (!check_attach_sleepable(btf_id, addr, tname)) 18809 return 0; 18810 /* 18811 * fentry/fexit/fmod_ret progs can also be sleepable if they are 18812 * in the fmodret id set with the KF_SLEEPABLE flag. 18813 */ 18814 else { 18815 u32 *flags = btf_kfunc_is_modify_return(btf, btf_id, prog); 18816 18817 if (flags && (*flags & KF_SLEEPABLE)) 18818 return 0; 18819 } 18820 break; 18821 case BPF_PROG_TYPE_LSM: 18822 /* 18823 * LSM progs check that they are attached to bpf_lsm_*() funcs. 18824 * Only some of them are sleepable. 18825 */ 18826 if (bpf_lsm_is_sleepable_hook(btf_id)) 18827 return 0; 18828 break; 18829 default: 18830 break; 18831 } 18832 return -EINVAL; 18833 } 18834 18835 int bpf_check_attach_target(struct bpf_verifier_log *log, 18836 const struct bpf_prog *prog, 18837 const struct bpf_prog *tgt_prog, 18838 u32 btf_id, 18839 struct bpf_attach_target_info *tgt_info) 18840 { 18841 bool prog_extension = prog->type == BPF_PROG_TYPE_EXT; 18842 bool prog_tracing = prog->type == BPF_PROG_TYPE_TRACING; 18843 char trace_symbol[KSYM_SYMBOL_LEN]; 18844 const char prefix[] = "btf_trace_"; 18845 struct bpf_raw_event_map *btp; 18846 int ret = 0, subprog = -1, i; 18847 const struct btf_type *t; 18848 bool conservative = true; 18849 const char *tname, *fname; 18850 struct btf *btf; 18851 long addr = 0; 18852 struct module *mod = NULL; 18853 18854 if (!btf_id) { 18855 bpf_log(log, "Tracing programs must provide btf_id\n"); 18856 return -EINVAL; 18857 } 18858 btf = tgt_prog ? tgt_prog->aux->btf : prog->aux->attach_btf; 18859 if (!btf) { 18860 bpf_log(log, 18861 "Tracing program can only be attached to another program annotated with BTF\n"); 18862 return -EINVAL; 18863 } 18864 t = btf_type_by_id(btf, btf_id); 18865 if (!t) { 18866 bpf_log(log, "attach_btf_id %u is invalid\n", btf_id); 18867 return -EINVAL; 18868 } 18869 tname = btf_name_by_offset(btf, t->name_off); 18870 if (!tname) { 18871 bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id); 18872 return -EINVAL; 18873 } 18874 if (tgt_prog) { 18875 struct bpf_prog_aux *aux = tgt_prog->aux; 18876 bool tgt_changes_pkt_data; 18877 bool tgt_might_sleep; 18878 18879 if (bpf_prog_is_dev_bound(prog->aux) && 18880 !bpf_prog_dev_bound_match(prog, tgt_prog)) { 18881 bpf_log(log, "Target program bound device mismatch"); 18882 return -EINVAL; 18883 } 18884 18885 for (i = 0; i < aux->func_info_cnt; i++) 18886 if (aux->func_info[i].type_id == btf_id) { 18887 subprog = i; 18888 break; 18889 } 18890 if (subprog == -1) { 18891 bpf_log(log, "Subprog %s doesn't exist\n", tname); 18892 return -EINVAL; 18893 } 18894 if (aux->func && aux->func[subprog]->aux->exception_cb) { 18895 bpf_log(log, 18896 "%s programs cannot attach to exception callback\n", 18897 prog_extension ? "Extension" : "Tracing"); 18898 return -EINVAL; 18899 } 18900 conservative = aux->func_info_aux[subprog].unreliable; 18901 if (prog_extension) { 18902 if (conservative) { 18903 bpf_log(log, 18904 "Cannot replace static functions\n"); 18905 return -EINVAL; 18906 } 18907 if (!prog->jit_requested) { 18908 bpf_log(log, 18909 "Extension programs should be JITed\n"); 18910 return -EINVAL; 18911 } 18912 tgt_changes_pkt_data = aux->func 18913 ? aux->func[subprog]->aux->changes_pkt_data 18914 : aux->changes_pkt_data; 18915 if (prog->aux->changes_pkt_data && !tgt_changes_pkt_data) { 18916 bpf_log(log, 18917 "Extension program changes packet data, while original does not\n"); 18918 return -EINVAL; 18919 } 18920 18921 tgt_might_sleep = aux->func 18922 ? aux->func[subprog]->aux->might_sleep 18923 : aux->might_sleep; 18924 if (prog->aux->might_sleep && !tgt_might_sleep) { 18925 bpf_log(log, 18926 "Extension program may sleep, while original does not\n"); 18927 return -EINVAL; 18928 } 18929 } 18930 if (!tgt_prog->jited) { 18931 bpf_log(log, "Can attach to only JITed progs\n"); 18932 return -EINVAL; 18933 } 18934 if (prog_tracing) { 18935 if (aux->attach_tracing_prog) { 18936 /* 18937 * Target program is an fentry/fexit which is already attached 18938 * to another tracing program. More levels of nesting 18939 * attachment are not allowed. 18940 */ 18941 bpf_log(log, "Cannot nest tracing program attach more than once\n"); 18942 return -EINVAL; 18943 } 18944 } else if (tgt_prog->type == prog->type) { 18945 /* 18946 * To avoid potential call chain cycles, prevent attaching of a 18947 * program extension to another extension. It's ok to attach 18948 * fentry/fexit to extension program. 18949 */ 18950 bpf_log(log, "Cannot recursively attach\n"); 18951 return -EINVAL; 18952 } 18953 if (tgt_prog->type == BPF_PROG_TYPE_TRACING && 18954 prog_extension && 18955 (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY || 18956 tgt_prog->expected_attach_type == BPF_TRACE_FEXIT || 18957 tgt_prog->expected_attach_type == BPF_TRACE_FENTRY_MULTI || 18958 tgt_prog->expected_attach_type == BPF_TRACE_FEXIT_MULTI || 18959 tgt_prog->expected_attach_type == BPF_TRACE_FSESSION || 18960 tgt_prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI)) { 18961 /* Program extensions can extend all program types 18962 * except fentry/fexit. The reason is the following. 18963 * The fentry/fexit programs are used for performance 18964 * analysis, stats and can be attached to any program 18965 * type. When extension program is replacing XDP function 18966 * it is necessary to allow performance analysis of all 18967 * functions. Both original XDP program and its program 18968 * extension. Hence attaching fentry/fexit to 18969 * BPF_PROG_TYPE_EXT is allowed. If extending of 18970 * fentry/fexit was allowed it would be possible to create 18971 * long call chain fentry->extension->fentry->extension 18972 * beyond reasonable stack size. Hence extending fentry 18973 * is not allowed. 18974 */ 18975 bpf_log(log, "Cannot extend fentry/fexit/fsession\n"); 18976 return -EINVAL; 18977 } 18978 } else { 18979 if (prog_extension) { 18980 bpf_log(log, "Cannot replace kernel functions\n"); 18981 return -EINVAL; 18982 } 18983 } 18984 18985 switch (prog->expected_attach_type) { 18986 case BPF_TRACE_RAW_TP: 18987 if (tgt_prog) { 18988 bpf_log(log, 18989 "Only FENTRY/FEXIT/FSESSION progs are attachable to another BPF prog\n"); 18990 return -EINVAL; 18991 } 18992 if (!btf_type_is_typedef(t)) { 18993 bpf_log(log, "attach_btf_id %u is not a typedef\n", 18994 btf_id); 18995 return -EINVAL; 18996 } 18997 if (strncmp(prefix, tname, sizeof(prefix) - 1)) { 18998 bpf_log(log, "attach_btf_id %u points to wrong type name %s\n", 18999 btf_id, tname); 19000 return -EINVAL; 19001 } 19002 tname += sizeof(prefix) - 1; 19003 19004 /* The func_proto of "btf_trace_##tname" is generated from typedef without argument 19005 * names. Thus using bpf_raw_event_map to get argument names. 19006 */ 19007 btp = bpf_get_raw_tracepoint(tname); 19008 if (!btp) 19009 return -EINVAL; 19010 if (prog->sleepable && !tracepoint_is_faultable(btp->tp)) { 19011 bpf_log(log, "Sleepable program cannot attach to non-faultable tracepoint %s\n", 19012 tname); 19013 bpf_put_raw_tracepoint(btp); 19014 return -EINVAL; 19015 } 19016 fname = kallsyms_lookup((unsigned long)btp->bpf_func, NULL, NULL, NULL, 19017 trace_symbol); 19018 bpf_put_raw_tracepoint(btp); 19019 19020 if (fname) 19021 ret = btf_find_by_name_kind(btf, fname, BTF_KIND_FUNC); 19022 19023 if (!fname || ret < 0) { 19024 bpf_log(log, "Cannot find btf of tracepoint template, fall back to %s%s.\n", 19025 prefix, tname); 19026 t = btf_type_by_id(btf, t->type); 19027 if (!btf_type_is_ptr(t)) 19028 /* should never happen in valid vmlinux build */ 19029 return -EINVAL; 19030 } else { 19031 t = btf_type_by_id(btf, ret); 19032 if (!btf_type_is_func(t)) 19033 /* should never happen in valid vmlinux build */ 19034 return -EINVAL; 19035 } 19036 19037 t = btf_type_by_id(btf, t->type); 19038 if (!btf_type_is_func_proto(t)) 19039 /* should never happen in valid vmlinux build */ 19040 return -EINVAL; 19041 19042 break; 19043 case BPF_TRACE_ITER: 19044 if (!btf_type_is_func(t)) { 19045 bpf_log(log, "attach_btf_id %u is not a function\n", 19046 btf_id); 19047 return -EINVAL; 19048 } 19049 t = btf_type_by_id(btf, t->type); 19050 if (!btf_type_is_func_proto(t)) 19051 return -EINVAL; 19052 ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel); 19053 if (ret) 19054 return ret; 19055 break; 19056 default: 19057 if (!prog_extension) 19058 return -EINVAL; 19059 fallthrough; 19060 case BPF_MODIFY_RETURN: 19061 case BPF_LSM_MAC: 19062 case BPF_LSM_CGROUP: 19063 case BPF_TRACE_FENTRY: 19064 case BPF_TRACE_FEXIT: 19065 case BPF_TRACE_FSESSION: 19066 case BPF_TRACE_FSESSION_MULTI: 19067 case BPF_TRACE_FENTRY_MULTI: 19068 case BPF_TRACE_FEXIT_MULTI: 19069 if ((prog->expected_attach_type == BPF_TRACE_FSESSION || 19070 prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI) && 19071 !bpf_jit_supports_fsession()) { 19072 bpf_log(log, "JIT does not support fsession\n"); 19073 return -EOPNOTSUPP; 19074 } 19075 if (!btf_type_is_func(t)) { 19076 bpf_log(log, "attach_btf_id %u is not a function\n", 19077 btf_id); 19078 return -EINVAL; 19079 } 19080 if (prog_extension && 19081 btf_check_type_match(log, prog, btf, t)) 19082 return -EINVAL; 19083 t = btf_type_by_id(btf, t->type); 19084 if (!btf_type_is_func_proto(t)) 19085 return -EINVAL; 19086 19087 if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) && 19088 (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type || 19089 prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type)) 19090 return -EINVAL; 19091 19092 if (tgt_prog && conservative) 19093 t = NULL; 19094 19095 ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel); 19096 if (ret < 0) 19097 return ret; 19098 19099 /* 19100 * *.multi programs don't need an address during program 19101 * verification, we just take the module ref if needed. 19102 */ 19103 if (is_tracing_multi_id(prog, btf_id)) { 19104 if (btf_is_module(btf)) { 19105 mod = btf_try_get_module(btf); 19106 if (!mod) 19107 return -ENOENT; 19108 } 19109 addr = 0; 19110 } else if (tgt_prog) { 19111 if (subprog == 0) 19112 addr = (long) tgt_prog->bpf_func; 19113 else 19114 addr = (long) tgt_prog->aux->func[subprog]->bpf_func; 19115 } else { 19116 if (btf_is_module(btf)) { 19117 mod = btf_try_get_module(btf); 19118 if (mod) 19119 addr = find_kallsyms_symbol_value(mod, tname); 19120 else 19121 addr = 0; 19122 } else { 19123 addr = kallsyms_lookup_name(tname); 19124 } 19125 if (!addr) { 19126 module_put(mod); 19127 bpf_log(log, 19128 "The address of function %s cannot be found\n", 19129 tname); 19130 return -ENOENT; 19131 } 19132 } 19133 19134 if (prog->sleepable) { 19135 ret = btf_id_allow_sleepable(btf_id, addr, prog, btf); 19136 if (ret) { 19137 module_put(mod); 19138 bpf_log(log, "%s is not sleepable\n", tname); 19139 return ret; 19140 } 19141 } else if (prog->expected_attach_type == BPF_MODIFY_RETURN) { 19142 if (tgt_prog) { 19143 module_put(mod); 19144 bpf_log(log, "can't modify return codes of BPF programs\n"); 19145 return -EINVAL; 19146 } 19147 ret = -EINVAL; 19148 if (btf_kfunc_is_modify_return(btf, btf_id, prog) || 19149 !check_attach_modify_return(addr, tname)) 19150 ret = 0; 19151 if (ret) { 19152 module_put(mod); 19153 bpf_log(log, "%s() is not modifiable\n", tname); 19154 return ret; 19155 } 19156 } 19157 19158 break; 19159 } 19160 tgt_info->tgt_addr = addr; 19161 tgt_info->tgt_name = tname; 19162 tgt_info->tgt_type = t; 19163 tgt_info->tgt_mod = mod; 19164 return 0; 19165 } 19166 19167 BTF_SET_START(btf_id_deny) 19168 BTF_ID_UNUSED 19169 #ifdef CONFIG_SMP 19170 BTF_ID(func, ___migrate_enable) 19171 BTF_ID(func, migrate_disable) 19172 BTF_ID(func, migrate_enable) 19173 #endif 19174 #if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU 19175 BTF_ID(func, rcu_read_unlock_strict) 19176 #endif 19177 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_TRACE_PREEMPT_TOGGLE) 19178 BTF_ID(func, preempt_count_add) 19179 BTF_ID(func, preempt_count_sub) 19180 #endif 19181 #ifdef CONFIG_PREEMPT_RCU 19182 BTF_ID(func, __rcu_read_lock) 19183 BTF_ID(func, __rcu_read_unlock) 19184 #endif 19185 BTF_SET_END(btf_id_deny) 19186 19187 /* fexit and fmod_ret can't be used to attach to __noreturn functions. 19188 * Currently, we must manually list all __noreturn functions here. Once a more 19189 * robust solution is implemented, this workaround can be removed. 19190 */ 19191 BTF_SET_START(noreturn_deny) 19192 #ifdef CONFIG_IA32_EMULATION 19193 BTF_ID(func, __ia32_sys_exit) 19194 BTF_ID(func, __ia32_sys_exit_group) 19195 #endif 19196 #ifdef CONFIG_KUNIT 19197 BTF_ID(func, __kunit_abort) 19198 BTF_ID(func, kunit_try_catch_throw) 19199 #endif 19200 #ifdef CONFIG_MODULES 19201 BTF_ID(func, __module_put_and_kthread_exit) 19202 #endif 19203 #ifdef CONFIG_X86_64 19204 BTF_ID(func, __x64_sys_exit) 19205 BTF_ID(func, __x64_sys_exit_group) 19206 #endif 19207 BTF_ID(func, do_exit) 19208 BTF_ID(func, do_group_exit) 19209 BTF_ID(func, kthread_complete_and_exit) 19210 BTF_ID(func, make_task_dead) 19211 BTF_SET_END(noreturn_deny) 19212 19213 static bool can_be_sleepable(struct bpf_prog *prog) 19214 { 19215 if (prog->type == BPF_PROG_TYPE_TRACING) { 19216 switch (prog->expected_attach_type) { 19217 case BPF_TRACE_FENTRY: 19218 case BPF_TRACE_FEXIT: 19219 case BPF_MODIFY_RETURN: 19220 case BPF_TRACE_ITER: 19221 case BPF_TRACE_FSESSION: 19222 case BPF_TRACE_RAW_TP: 19223 case BPF_TRACE_FENTRY_MULTI: 19224 case BPF_TRACE_FEXIT_MULTI: 19225 case BPF_TRACE_FSESSION_MULTI: 19226 return true; 19227 default: 19228 return false; 19229 } 19230 } 19231 if (prog->type == BPF_PROG_TYPE_LSM) 19232 return prog->expected_attach_type != BPF_LSM_CGROUP; 19233 19234 return prog->type == BPF_PROG_TYPE_KPROBE /* only for uprobes */ || 19235 prog->type == BPF_PROG_TYPE_STRUCT_OPS || 19236 prog->type == BPF_PROG_TYPE_RAW_TRACEPOINT || 19237 prog->type == BPF_PROG_TYPE_TRACEPOINT; 19238 } 19239 19240 static int check_attach_btf_id(struct bpf_verifier_env *env) 19241 { 19242 struct bpf_prog *prog = env->prog; 19243 struct bpf_prog *tgt_prog = prog->aux->dst_prog; 19244 struct bpf_attach_target_info tgt_info = {}; 19245 u32 btf_id = prog->aux->attach_btf_id; 19246 struct bpf_trampoline *tr; 19247 int ret; 19248 u64 key; 19249 19250 if (prog->type == BPF_PROG_TYPE_SYSCALL) { 19251 if (prog->sleepable) 19252 /* attach_btf_id checked to be zero already */ 19253 return 0; 19254 verbose(env, "Syscall programs can only be sleepable\n"); 19255 return -EINVAL; 19256 } 19257 19258 if (prog->sleepable && !can_be_sleepable(prog)) { 19259 verbose(env, "Program of this type cannot be sleepable\n"); 19260 return -EINVAL; 19261 } 19262 19263 if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) 19264 return check_struct_ops_btf_id(env); 19265 19266 if (prog->type != BPF_PROG_TYPE_TRACING && 19267 prog->type != BPF_PROG_TYPE_LSM && 19268 prog->type != BPF_PROG_TYPE_EXT) 19269 return 0; 19270 19271 ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info); 19272 if (ret) 19273 return ret; 19274 19275 if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) { 19276 /* to make freplace equivalent to their targets, they need to 19277 * inherit env->ops and expected_attach_type for the rest of the 19278 * verification 19279 */ 19280 env->ops = bpf_verifier_ops[tgt_prog->type]; 19281 prog->expected_attach_type = tgt_prog->expected_attach_type; 19282 } 19283 19284 /* store info about the attachment target that will be used later */ 19285 prog->aux->attach_func_proto = tgt_info.tgt_type; 19286 prog->aux->attach_func_name = tgt_info.tgt_name; 19287 prog->aux->mod = tgt_info.tgt_mod; 19288 19289 if (tgt_prog) { 19290 prog->aux->saved_dst_prog_type = tgt_prog->type; 19291 prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type; 19292 } 19293 19294 if (prog->expected_attach_type == BPF_TRACE_RAW_TP) { 19295 prog->aux->attach_btf_trace = true; 19296 return 0; 19297 } else if (prog->expected_attach_type == BPF_TRACE_ITER) { 19298 return bpf_iter_prog_supported(prog); 19299 } 19300 19301 if (prog->type == BPF_PROG_TYPE_LSM) { 19302 ret = bpf_lsm_verify_prog(&env->log, prog); 19303 if (ret < 0) 19304 return ret; 19305 } else if (prog->type == BPF_PROG_TYPE_TRACING && 19306 btf_id_set_contains(&btf_id_deny, btf_id)) { 19307 verbose(env, "Attaching tracing programs to function '%s' is rejected.\n", 19308 tgt_info.tgt_name); 19309 return -EINVAL; 19310 } else if ((prog->expected_attach_type == BPF_TRACE_FEXIT || 19311 prog->expected_attach_type == BPF_TRACE_FSESSION || 19312 prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI || 19313 prog->expected_attach_type == BPF_MODIFY_RETURN) && 19314 btf_id_set_contains(&noreturn_deny, btf_id)) { 19315 verbose(env, "Attaching fexit/fsession/fmod_ret to __noreturn function '%s' is rejected.\n", 19316 tgt_info.tgt_name); 19317 return -EINVAL; 19318 } 19319 19320 /* 19321 * We don't get trampoline for tracing_multi programs at this point, 19322 * it's done when tracing_multi link is created. 19323 */ 19324 if (prog->type == BPF_PROG_TYPE_TRACING && 19325 is_tracing_multi(prog->expected_attach_type)) 19326 return 0; 19327 19328 key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id); 19329 tr = bpf_trampoline_get(key, &tgt_info); 19330 if (!tr) 19331 return -ENOMEM; 19332 19333 if (tgt_prog && tgt_prog->aux->tail_call_reachable) 19334 tr->flags = BPF_TRAMP_F_TAIL_CALL_CTX; 19335 19336 prog->aux->dst_trampoline = tr; 19337 return 0; 19338 } 19339 19340 int bpf_check_attach_btf_id_multi(struct btf *btf, struct bpf_prog *prog, u32 btf_id, 19341 struct bpf_attach_target_info *tgt_info) 19342 { 19343 const struct btf_type *t; 19344 unsigned long addr; 19345 const char *tname; 19346 int err; 19347 19348 if (!btf_id || !btf) 19349 return -EINVAL; 19350 19351 /* Check noreturn attachment. */ 19352 if ((prog->expected_attach_type == BPF_TRACE_FEXIT_MULTI || 19353 prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI) && 19354 btf_id_set_contains(&noreturn_deny, btf_id)) 19355 return -EINVAL; 19356 /* Check denied attachment. */ 19357 if (btf_id_set_contains(&btf_id_deny, btf_id)) 19358 return -EINVAL; 19359 19360 /* Check and get function target data. */ 19361 t = btf_type_by_id(btf, btf_id); 19362 if (!t) 19363 return -EINVAL; 19364 tname = btf_name_by_offset(btf, t->name_off); 19365 if (!tname) 19366 return -EINVAL; 19367 if (!btf_type_is_func(t)) 19368 return -EINVAL; 19369 t = btf_type_by_id(btf, t->type); 19370 if (!btf_type_is_func_proto(t)) 19371 return -EINVAL; 19372 err = btf_distill_func_proto(NULL, btf, t, tname, &tgt_info->fmodel); 19373 if (err < 0) 19374 return err; 19375 if (btf_is_module(btf)) { 19376 /* The bpf program already holds reference to module. */ 19377 if (WARN_ON_ONCE(!prog->aux->mod)) 19378 return -EINVAL; 19379 addr = find_kallsyms_symbol_value(prog->aux->mod, tname); 19380 } else { 19381 addr = kallsyms_lookup_name(tname); 19382 } 19383 if (!addr || !ftrace_location(addr)) 19384 return -ENOENT; 19385 19386 /* Check sleepable program attachment. */ 19387 if (prog->sleepable) { 19388 err = btf_id_allow_sleepable(btf_id, addr, prog, btf); 19389 if (err) 19390 return err; 19391 } 19392 tgt_info->tgt_addr = addr; 19393 return 0; 19394 } 19395 19396 struct btf *bpf_get_btf_vmlinux(void) 19397 { 19398 if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) { 19399 mutex_lock(&bpf_verifier_lock); 19400 if (!btf_vmlinux) 19401 btf_vmlinux = btf_parse_vmlinux(); 19402 mutex_unlock(&bpf_verifier_lock); 19403 } 19404 return btf_vmlinux; 19405 } 19406 19407 /* 19408 * The add_fd_from_fd_array() is executed only if fd_array_cnt is non-zero. In 19409 * this case expect that every file descriptor in the array is either a map or 19410 * a BTF. Everything else is considered to be trash. 19411 */ 19412 static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd) 19413 { 19414 struct bpf_map *map; 19415 struct btf *btf; 19416 CLASS(fd, f)(fd); 19417 int err; 19418 19419 map = __bpf_map_get(f); 19420 if (!IS_ERR(map)) { 19421 err = __add_used_map(env, map); 19422 if (err < 0) 19423 return err; 19424 return 0; 19425 } 19426 19427 btf = __btf_get_by_fd(f); 19428 if (!IS_ERR(btf)) { 19429 btf_get(btf); 19430 return __add_used_btf(env, btf); 19431 } 19432 19433 verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd); 19434 return PTR_ERR(map); 19435 } 19436 19437 static int process_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr) 19438 { 19439 size_t size = sizeof(int); 19440 int ret; 19441 int fd; 19442 u32 i; 19443 19444 env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel); 19445 19446 /* 19447 * The only difference between old (no fd_array_cnt is given) and new 19448 * APIs is that in the latter case the fd_array is expected to be 19449 * continuous and is scanned for map fds right away 19450 */ 19451 if (!attr->fd_array_cnt) 19452 return 0; 19453 19454 /* Check for integer overflow */ 19455 if (attr->fd_array_cnt >= (U32_MAX / size)) { 19456 verbose(env, "fd_array_cnt is too big (%u)\n", attr->fd_array_cnt); 19457 return -EINVAL; 19458 } 19459 19460 for (i = 0; i < attr->fd_array_cnt; i++) { 19461 if (copy_from_bpfptr_offset(&fd, env->fd_array, i * size, size)) 19462 return -EFAULT; 19463 19464 ret = add_fd_from_fd_array(env, fd); 19465 if (ret) 19466 return ret; 19467 } 19468 19469 return 0; 19470 } 19471 19472 /* replace a generic kfunc with a specialized version if necessary */ 19473 static int specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc, int insn_idx) 19474 { 19475 struct bpf_prog *prog = env->prog; 19476 bool seen_direct_write; 19477 void *xdp_kfunc; 19478 bool is_rdonly; 19479 u32 func_id = desc->func_id; 19480 u16 offset = desc->offset; 19481 unsigned long addr = desc->addr; 19482 19483 if (offset) /* return if module BTF is used */ 19484 return 0; 19485 19486 if (bpf_dev_bound_kfunc_id(func_id)) { 19487 xdp_kfunc = bpf_dev_bound_resolve_kfunc(prog, func_id); 19488 if (xdp_kfunc) 19489 addr = (unsigned long)xdp_kfunc; 19490 /* fallback to default kfunc when not supported by netdev */ 19491 } else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) { 19492 seen_direct_write = env->seen_direct_write; 19493 is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE); 19494 19495 if (is_rdonly) 19496 addr = (unsigned long)bpf_dynptr_from_skb_rdonly; 19497 19498 /* restore env->seen_direct_write to its original value, since 19499 * may_access_direct_pkt_data mutates it 19500 */ 19501 env->seen_direct_write = seen_direct_write; 19502 } else if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr]) { 19503 if (bpf_lsm_has_d_inode_locked(prog)) 19504 addr = (unsigned long)bpf_set_dentry_xattr_locked; 19505 } else if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr]) { 19506 if (bpf_lsm_has_d_inode_locked(prog)) 19507 addr = (unsigned long)bpf_remove_dentry_xattr_locked; 19508 } else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) { 19509 if (!env->insn_aux_data[insn_idx].non_sleepable) 19510 addr = (unsigned long)bpf_dynptr_from_file_sleepable; 19511 } else if (func_id == special_kfunc_list[KF_bpf_arena_alloc_pages]) { 19512 if (env->insn_aux_data[insn_idx].non_sleepable) 19513 addr = (unsigned long)bpf_arena_alloc_pages_non_sleepable; 19514 } else if (func_id == special_kfunc_list[KF_bpf_arena_free_pages]) { 19515 if (env->insn_aux_data[insn_idx].non_sleepable) 19516 addr = (unsigned long)bpf_arena_free_pages_non_sleepable; 19517 } 19518 desc->addr = addr; 19519 return 0; 19520 } 19521 19522 static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux, 19523 u16 struct_meta_reg, 19524 u16 node_offset_reg, 19525 struct bpf_insn *insn, 19526 struct bpf_insn *insn_buf, 19527 int *cnt) 19528 { 19529 struct btf_struct_meta *kptr_struct_meta = insn_aux->kptr_struct_meta; 19530 struct bpf_insn addr[2] = { BPF_LD_IMM64(struct_meta_reg, (long)kptr_struct_meta) }; 19531 19532 insn_buf[0] = addr[0]; 19533 insn_buf[1] = addr[1]; 19534 insn_buf[2] = BPF_MOV64_IMM(node_offset_reg, insn_aux->insert_off); 19535 insn_buf[3] = *insn; 19536 *cnt = 4; 19537 } 19538 19539 int bpf_fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, 19540 struct bpf_insn *insn_buf, int insn_idx, int *cnt) 19541 { 19542 struct bpf_kfunc_desc *desc; 19543 int err; 19544 19545 if (!insn->imm) { 19546 verbose(env, "invalid kernel function call not eliminated in verifier pass\n"); 19547 return -EINVAL; 19548 } 19549 19550 *cnt = 0; 19551 19552 /* insn->imm has the btf func_id. Replace it with an offset relative to 19553 * __bpf_call_base, unless the JIT needs to call functions that are 19554 * further than 32 bits away (bpf_jit_supports_far_kfunc_call()). 19555 */ 19556 desc = find_kfunc_desc(env->prog, insn->imm, insn->off); 19557 if (!desc) { 19558 verifier_bug(env, "kernel function descriptor not found for func_id %u", 19559 insn->imm); 19560 return -EFAULT; 19561 } 19562 19563 err = specialize_kfunc(env, desc, insn_idx); 19564 if (err) 19565 return err; 19566 19567 if (!bpf_jit_supports_far_kfunc_call()) 19568 insn->imm = BPF_CALL_IMM(desc->addr); 19569 19570 if (is_bpf_obj_new_kfunc(desc->func_id) || is_bpf_percpu_obj_new_kfunc(desc->func_id)) { 19571 struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta; 19572 struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) }; 19573 u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size; 19574 19575 if (is_bpf_percpu_obj_new_kfunc(desc->func_id) && kptr_struct_meta) { 19576 verifier_bug(env, "NULL kptr_struct_meta expected at insn_idx %d", 19577 insn_idx); 19578 return -EFAULT; 19579 } 19580 19581 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size); 19582 insn_buf[1] = addr[0]; 19583 insn_buf[2] = addr[1]; 19584 insn_buf[3] = *insn; 19585 *cnt = 4; 19586 } else if (is_bpf_obj_drop_kfunc(desc->func_id) || 19587 is_bpf_percpu_obj_drop_kfunc(desc->func_id) || 19588 is_bpf_refcount_acquire_kfunc(desc->func_id)) { 19589 struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta; 19590 struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) }; 19591 19592 if (is_bpf_percpu_obj_drop_kfunc(desc->func_id) && kptr_struct_meta) { 19593 verifier_bug(env, "NULL kptr_struct_meta expected at insn_idx %d", 19594 insn_idx); 19595 return -EFAULT; 19596 } 19597 19598 if (is_bpf_refcount_acquire_kfunc(desc->func_id) && !kptr_struct_meta) { 19599 verifier_bug(env, "kptr_struct_meta expected at insn_idx %d", 19600 insn_idx); 19601 return -EFAULT; 19602 } 19603 19604 insn_buf[0] = addr[0]; 19605 insn_buf[1] = addr[1]; 19606 insn_buf[2] = *insn; 19607 *cnt = 3; 19608 } else if (is_bpf_list_push_kfunc(desc->func_id) || 19609 is_bpf_rbtree_add_kfunc(desc->func_id)) { 19610 struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta; 19611 int struct_meta_reg = BPF_REG_3; 19612 int node_offset_reg = BPF_REG_4; 19613 19614 /* list_add/rbtree_add have an extra arg (prev/less), 19615 * so args-to-fixup are in diff regs. 19616 */ 19617 if (desc->func_id == special_kfunc_list[KF_bpf_list_add] || 19618 is_bpf_rbtree_add_kfunc(desc->func_id)) { 19619 struct_meta_reg = BPF_REG_4; 19620 node_offset_reg = BPF_REG_5; 19621 } 19622 19623 if (!kptr_struct_meta) { 19624 verifier_bug(env, "kptr_struct_meta expected at insn_idx %d", 19625 insn_idx); 19626 return -EFAULT; 19627 } 19628 19629 __fixup_collection_insert_kfunc(&env->insn_aux_data[insn_idx], struct_meta_reg, 19630 node_offset_reg, insn, insn_buf, cnt); 19631 } else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] || 19632 desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) { 19633 insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1); 19634 *cnt = 1; 19635 } else if (desc->func_id == special_kfunc_list[KF_bpf_session_is_return] && 19636 (env->prog->expected_attach_type == BPF_TRACE_FSESSION || 19637 env->prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI)) { 19638 19639 /* 19640 * inline the bpf_session_is_return() for fsession: 19641 * bool bpf_session_is_return(void *ctx) 19642 * { 19643 * return (((u64 *)ctx)[-1] >> BPF_TRAMP_IS_RETURN_SHIFT) & 1; 19644 * } 19645 */ 19646 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); 19647 insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_IS_RETURN_SHIFT); 19648 insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1); 19649 *cnt = 3; 19650 } else if (desc->func_id == special_kfunc_list[KF_bpf_session_cookie] && 19651 (env->prog->expected_attach_type == BPF_TRACE_FSESSION || 19652 env->prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI)) { 19653 /* 19654 * inline bpf_session_cookie() for fsession: 19655 * __u64 *bpf_session_cookie(void *ctx) 19656 * { 19657 * u64 off = (((u64 *)ctx)[-1] >> BPF_TRAMP_COOKIE_INDEX_SHIFT) & 0xFF; 19658 * return &((u64 *)ctx)[-off]; 19659 * } 19660 */ 19661 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); 19662 insn_buf[1] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, BPF_TRAMP_COOKIE_INDEX_SHIFT); 19663 insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFF); 19664 insn_buf[3] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3); 19665 insn_buf[4] = BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1); 19666 insn_buf[5] = BPF_ALU64_IMM(BPF_NEG, BPF_REG_0, 0); 19667 *cnt = 6; 19668 } 19669 19670 if (env->insn_aux_data[insn_idx].arg_prog) { 19671 u32 regno = env->insn_aux_data[insn_idx].arg_prog; 19672 struct bpf_insn ld_addrs[2] = { BPF_LD_IMM64(regno, (long)env->prog->aux) }; 19673 int idx = *cnt; 19674 19675 insn_buf[idx++] = ld_addrs[0]; 19676 insn_buf[idx++] = ld_addrs[1]; 19677 insn_buf[idx++] = *insn; 19678 *cnt = idx; 19679 } 19680 return 0; 19681 } 19682 19683 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, 19684 struct bpf_log_attr *attr_log) 19685 { 19686 u64 start_time = ktime_get_ns(); 19687 struct bpf_verifier_env *env; 19688 int i, len, ret = -EINVAL, err; 19689 bool is_priv; 19690 19691 BTF_TYPE_EMIT(enum bpf_features); 19692 19693 /* no program is valid */ 19694 if (ARRAY_SIZE(bpf_verifier_ops) == 0) 19695 return -EINVAL; 19696 19697 /* 'struct bpf_verifier_env' can be global, but since it's not small, 19698 * allocate/free it every time bpf_check() is called 19699 */ 19700 env = kvzalloc_obj(struct bpf_verifier_env, GFP_KERNEL_ACCOUNT); 19701 if (!env) 19702 return -ENOMEM; 19703 19704 env->bt.env = env; 19705 19706 len = (*prog)->len; 19707 env->insn_aux_data = 19708 vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len)); 19709 ret = -ENOMEM; 19710 if (!env->insn_aux_data) 19711 goto err_free_env; 19712 for (i = 0; i < len; i++) 19713 env->insn_aux_data[i].orig_idx = i; 19714 env->succ = bpf_iarray_realloc(NULL, 2); 19715 if (!env->succ) 19716 goto err_free_env; 19717 env->prog = *prog; 19718 env->ops = bpf_verifier_ops[env->prog->type]; 19719 19720 env->allow_ptr_leaks = bpf_allow_ptr_leaks(env->prog->aux->token); 19721 env->allow_uninit_stack = bpf_allow_uninit_stack(env->prog->aux->token); 19722 env->bypass_spec_v1 = bpf_bypass_spec_v1(env->prog->aux->token); 19723 env->bypass_spec_v4 = bpf_bypass_spec_v4(env->prog->aux->token); 19724 env->bpf_capable = is_priv = bpf_token_capable(env->prog->aux->token, CAP_BPF); 19725 19726 bpf_get_btf_vmlinux(); 19727 19728 /* grab the mutex to protect few globals used by verifier */ 19729 if (!is_priv) 19730 mutex_lock(&bpf_verifier_lock); 19731 19732 /* user could have requested verbose verifier output 19733 * and supplied buffer to store the verification trace 19734 */ 19735 ret = bpf_vlog_init(&env->log, attr_log->level, attr_log->ubuf, attr_log->size); 19736 if (ret) 19737 goto err_unlock; 19738 19739 ret = process_fd_array(env, attr, uattr); 19740 if (ret) 19741 goto skip_full_check; 19742 19743 mark_verifier_state_clean(env); 19744 19745 if (IS_ERR(btf_vmlinux)) { 19746 /* Either gcc or pahole or kernel are broken. */ 19747 verbose(env, "in-kernel BTF is malformed\n"); 19748 ret = PTR_ERR(btf_vmlinux); 19749 goto skip_full_check; 19750 } 19751 19752 env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT); 19753 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) 19754 env->strict_alignment = true; 19755 if (attr->prog_flags & BPF_F_ANY_ALIGNMENT) 19756 env->strict_alignment = false; 19757 19758 if (is_priv) 19759 env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ; 19760 env->test_reg_invariants = attr->prog_flags & BPF_F_TEST_REG_INVARIANTS; 19761 19762 env->explored_states = kvzalloc_objs(struct list_head, 19763 state_htab_size(env), 19764 GFP_KERNEL_ACCOUNT); 19765 ret = -ENOMEM; 19766 if (!env->explored_states) 19767 goto skip_full_check; 19768 19769 for (i = 0; i < state_htab_size(env); i++) 19770 INIT_LIST_HEAD(&env->explored_states[i]); 19771 INIT_LIST_HEAD(&env->free_list); 19772 19773 ret = bpf_check_btf_info_early(env, attr, uattr); 19774 if (ret < 0) 19775 goto skip_full_check; 19776 19777 ret = add_subprog_and_kfunc(env); 19778 if (ret < 0) 19779 goto skip_full_check; 19780 19781 ret = check_subprogs(env); 19782 if (ret < 0) 19783 goto skip_full_check; 19784 19785 ret = bpf_check_btf_info(env, attr, uattr); 19786 if (ret < 0) 19787 goto skip_full_check; 19788 19789 ret = check_and_resolve_insns(env); 19790 if (ret < 0) 19791 goto skip_full_check; 19792 19793 if (bpf_prog_is_offloaded(env->prog->aux)) { 19794 ret = bpf_prog_offload_verifier_prep(env->prog); 19795 if (ret) 19796 goto skip_full_check; 19797 } 19798 19799 ret = bpf_check_cfg(env); 19800 if (ret < 0) 19801 goto skip_full_check; 19802 19803 ret = bpf_compute_postorder(env); 19804 if (ret < 0) 19805 goto skip_full_check; 19806 19807 ret = bpf_stack_liveness_init(env); 19808 if (ret) 19809 goto skip_full_check; 19810 19811 ret = check_attach_btf_id(env); 19812 if (ret) 19813 goto skip_full_check; 19814 19815 ret = bpf_compute_const_regs(env); 19816 if (ret < 0) 19817 goto skip_full_check; 19818 19819 ret = bpf_prune_dead_branches(env); 19820 if (ret < 0) 19821 goto skip_full_check; 19822 19823 ret = sort_subprogs_topo(env); 19824 if (ret < 0) 19825 goto skip_full_check; 19826 19827 ret = bpf_compute_scc(env); 19828 if (ret < 0) 19829 goto skip_full_check; 19830 19831 ret = bpf_compute_live_registers(env); 19832 if (ret < 0) 19833 goto skip_full_check; 19834 19835 ret = mark_fastcall_patterns(env); 19836 if (ret < 0) 19837 goto skip_full_check; 19838 19839 ret = do_check_main(env); 19840 ret = ret ?: do_check_subprogs(env); 19841 19842 if (ret == 0 && bpf_prog_is_offloaded(env->prog->aux)) 19843 ret = bpf_prog_offload_finalize(env); 19844 19845 skip_full_check: 19846 kvfree(env->explored_states); 19847 19848 /* might decrease stack depth, keep it before passes that 19849 * allocate additional slots. 19850 */ 19851 if (ret == 0) 19852 ret = bpf_remove_fastcall_spills_fills(env); 19853 19854 if (ret == 0) 19855 ret = check_max_stack_depth(env); 19856 19857 /* instruction rewrites happen after this point */ 19858 if (ret == 0) 19859 ret = bpf_optimize_bpf_loop(env); 19860 19861 if (is_priv) { 19862 if (ret == 0) 19863 bpf_opt_hard_wire_dead_code_branches(env); 19864 if (ret == 0) 19865 ret = bpf_opt_remove_dead_code(env); 19866 if (ret == 0) 19867 ret = bpf_opt_remove_nops(env); 19868 } else { 19869 if (ret == 0) 19870 sanitize_dead_code(env); 19871 } 19872 19873 if (ret == 0) 19874 /* program is valid, convert *(u32*)(ctx + off) accesses */ 19875 ret = bpf_convert_ctx_accesses(env); 19876 19877 if (ret == 0) 19878 ret = bpf_do_misc_fixups(env); 19879 19880 /* do 32-bit optimization after insn patching has done so those patched 19881 * insns could be handled correctly. 19882 */ 19883 if (ret == 0 && !bpf_prog_is_offloaded(env->prog->aux)) { 19884 ret = bpf_opt_subreg_zext_lo32_rnd_hi32(env, attr); 19885 env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret 19886 : false; 19887 } 19888 19889 if (ret == 0) 19890 ret = bpf_fixup_call_args(env); 19891 19892 env->verification_time = ktime_get_ns() - start_time; 19893 print_verification_stats(env); 19894 env->prog->aux->verified_insns = env->insn_processed; 19895 19896 /* preserve original error even if log finalization is successful */ 19897 err = bpf_log_attr_finalize(attr_log, &env->log); 19898 if (err) 19899 ret = err; 19900 19901 if (ret) 19902 goto err_release_maps; 19903 19904 if (env->used_map_cnt) { 19905 /* if program passed verifier, update used_maps in bpf_prog_info */ 19906 env->prog->aux->used_maps = kmalloc_objs(env->used_maps[0], 19907 env->used_map_cnt, 19908 GFP_KERNEL_ACCOUNT); 19909 19910 if (!env->prog->aux->used_maps) { 19911 ret = -ENOMEM; 19912 goto err_release_maps; 19913 } 19914 19915 memcpy(env->prog->aux->used_maps, env->used_maps, 19916 sizeof(env->used_maps[0]) * env->used_map_cnt); 19917 env->prog->aux->used_map_cnt = env->used_map_cnt; 19918 } 19919 if (env->used_btf_cnt) { 19920 /* if program passed verifier, update used_btfs in bpf_prog_aux */ 19921 env->prog->aux->used_btfs = kmalloc_objs(env->used_btfs[0], 19922 env->used_btf_cnt, 19923 GFP_KERNEL_ACCOUNT); 19924 if (!env->prog->aux->used_btfs) { 19925 ret = -ENOMEM; 19926 goto err_release_maps; 19927 } 19928 19929 memcpy(env->prog->aux->used_btfs, env->used_btfs, 19930 sizeof(env->used_btfs[0]) * env->used_btf_cnt); 19931 env->prog->aux->used_btf_cnt = env->used_btf_cnt; 19932 } 19933 if (env->used_map_cnt || env->used_btf_cnt) { 19934 /* program is valid. Convert pseudo bpf_ld_imm64 into generic 19935 * bpf_ld_imm64 instructions 19936 */ 19937 convert_pseudo_ld_imm64(env); 19938 } 19939 19940 adjust_btf_func(env); 19941 19942 /* extension progs temporarily inherit the attach_type of their targets 19943 for verification purposes, so set it back to zero before returning 19944 */ 19945 if (env->prog->type == BPF_PROG_TYPE_EXT) 19946 env->prog->expected_attach_type = 0; 19947 19948 env->prog = __bpf_prog_select_runtime(env, env->prog, &ret); 19949 19950 err_release_maps: 19951 if (ret) 19952 release_insn_arrays(env); 19953 if (!env->prog->aux->used_maps) 19954 /* if we didn't copy map pointers into bpf_prog_info, release 19955 * them now. Otherwise free_used_maps() will release them. 19956 */ 19957 release_maps(env); 19958 if (!env->prog->aux->used_btfs) 19959 release_btfs(env); 19960 19961 *prog = env->prog; 19962 19963 module_put(env->attach_btf_mod); 19964 err_unlock: 19965 if (!is_priv) 19966 mutex_unlock(&bpf_verifier_lock); 19967 bpf_clear_insn_aux_data(env, 0, env->prog->len); 19968 vfree(env->insn_aux_data); 19969 err_free_env: 19970 bpf_stack_liveness_free(env); 19971 kvfree(env->cfg.insn_postorder); 19972 kvfree(env->scc_info); 19973 kvfree(env->succ); 19974 kvfree(env->gotox_tmp_buf); 19975 kvfree(env); 19976 return ret; 19977 } 19978